Fixed file duplication problem

This commit is contained in:
Simon 2018-06-12 15:55:52 -04:00
parent 1718bb91ca
commit af2601ee70
3 changed files with 2 additions and 3 deletions

View File

@ -146,8 +146,8 @@ class RemoteDirectoryCrawler:
try:
path = os.path.join(file.path, file.name, "")
if path not in self.crawled_paths:
listing = directory.list_dir(path)
self.crawled_paths.add(path)
listing = directory.list_dir(path)
for f in listing:
if f.is_dir:

View File

@ -4,7 +4,7 @@ import json
payload = json.dumps({
"website_id": 123,
"url": "https://frenchy.ga/",
"url": "http://liminaire.fr/TEXTES/",
"priority": 2,
"callback_type": "",
"callback_args": "{}"

View File

@ -82,7 +82,6 @@ class ElasticSearchEngine(SearchEngine):
def import_json(self, in_str: str, website_id: int):
import_every = 1000
print(in_str)
docs = []
for line in in_str.splitlines():