mirror of
https://github.com/simon987/od-database.git
synced 2025-12-13 14:59:02 +00:00
Added filter to check if a website can be scanned from its parent directory
This commit is contained in:
@@ -92,6 +92,7 @@ class RemoteDirectoryCrawler:
|
||||
if root_listing:
|
||||
self.crawled_paths.append(path_id)
|
||||
else:
|
||||
logger.info("No files in root listing for " + self.url)
|
||||
return CrawlResult(0, "empty")
|
||||
directory.close()
|
||||
except TimeoutError:
|
||||
|
||||
@@ -105,6 +105,7 @@ class HttpDirectory(RemoteDirectory):
|
||||
path_url = urljoin(self.base_url, path, "")
|
||||
body = self._stream_body(path_url)
|
||||
if not body:
|
||||
logger.info("No body returned @ " + path_url)
|
||||
return None, None
|
||||
anchors = self._parse_links(body)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user