mirror of
https://github.com/simon987/od-database.git
synced 2025-04-19 18:36:44 +00:00
Fixed file duplication problem
This commit is contained in:
parent
1718bb91ca
commit
af2601ee70
@ -146,8 +146,8 @@ class RemoteDirectoryCrawler:
|
|||||||
try:
|
try:
|
||||||
path = os.path.join(file.path, file.name, "")
|
path = os.path.join(file.path, file.name, "")
|
||||||
if path not in self.crawled_paths:
|
if path not in self.crawled_paths:
|
||||||
listing = directory.list_dir(path)
|
|
||||||
self.crawled_paths.add(path)
|
self.crawled_paths.add(path)
|
||||||
|
listing = directory.list_dir(path)
|
||||||
|
|
||||||
for f in listing:
|
for f in listing:
|
||||||
if f.is_dir:
|
if f.is_dir:
|
||||||
|
@ -4,7 +4,7 @@ import json
|
|||||||
|
|
||||||
payload = json.dumps({
|
payload = json.dumps({
|
||||||
"website_id": 123,
|
"website_id": 123,
|
||||||
"url": "https://frenchy.ga/",
|
"url": "http://liminaire.fr/TEXTES/",
|
||||||
"priority": 2,
|
"priority": 2,
|
||||||
"callback_type": "",
|
"callback_type": "",
|
||||||
"callback_args": "{}"
|
"callback_args": "{}"
|
||||||
|
@ -82,7 +82,6 @@ class ElasticSearchEngine(SearchEngine):
|
|||||||
def import_json(self, in_str: str, website_id: int):
|
def import_json(self, in_str: str, website_id: int):
|
||||||
import_every = 1000
|
import_every = 1000
|
||||||
|
|
||||||
print(in_str)
|
|
||||||
docs = []
|
docs = []
|
||||||
|
|
||||||
for line in in_str.splitlines():
|
for line in in_str.splitlines():
|
||||||
|
Loading…
x
Reference in New Issue
Block a user