mirror of
https://github.com/simon987/od-database.git
synced 2025-04-20 02:46:45 +00:00
Fixed file duplication problem
This commit is contained in:
parent
1718bb91ca
commit
af2601ee70
@ -146,8 +146,8 @@ class RemoteDirectoryCrawler:
|
||||
try:
|
||||
path = os.path.join(file.path, file.name, "")
|
||||
if path not in self.crawled_paths:
|
||||
listing = directory.list_dir(path)
|
||||
self.crawled_paths.add(path)
|
||||
listing = directory.list_dir(path)
|
||||
|
||||
for f in listing:
|
||||
if f.is_dir:
|
||||
|
@ -4,7 +4,7 @@ import json
|
||||
|
||||
payload = json.dumps({
|
||||
"website_id": 123,
|
||||
"url": "https://frenchy.ga/",
|
||||
"url": "http://liminaire.fr/TEXTES/",
|
||||
"priority": 2,
|
||||
"callback_type": "",
|
||||
"callback_args": "{}"
|
||||
|
@ -82,7 +82,6 @@ class ElasticSearchEngine(SearchEngine):
|
||||
def import_json(self, in_str: str, website_id: int):
|
||||
import_every = 1000
|
||||
|
||||
print(in_str)
|
||||
docs = []
|
||||
|
||||
for line in in_str.splitlines():
|
||||
|
Loading…
x
Reference in New Issue
Block a user