diff --git a/crawl_server/crawler.py b/crawl_server/crawler.py index 5f4cda0..503a887 100644 --- a/crawl_server/crawler.py +++ b/crawl_server/crawler.py @@ -146,8 +146,8 @@ class RemoteDirectoryCrawler: try: path = os.path.join(file.path, file.name, "") if path not in self.crawled_paths: - listing = directory.list_dir(path) self.crawled_paths.add(path) + listing = directory.list_dir(path) for f in listing: if f.is_dir: diff --git a/debug_put.py b/debug_put.py index 55a61b9..2ee0c76 100644 --- a/debug_put.py +++ b/debug_put.py @@ -4,7 +4,7 @@ import json payload = json.dumps({ "website_id": 123, - "url": "https://frenchy.ga/", + "url": "http://liminaire.fr/TEXTES/", "priority": 2, "callback_type": "", "callback_args": "{}" diff --git a/search/search.py b/search/search.py index 101cab7..bdbd2d1 100644 --- a/search/search.py +++ b/search/search.py @@ -82,7 +82,6 @@ class ElasticSearchEngine(SearchEngine): def import_json(self, in_str: str, website_id: int): import_every = 1000 - print(in_str) docs = [] for line in in_str.splitlines():