barebones crawl_server microservice

This commit is contained in:
Simon
2018-06-11 19:00:43 -04:00
parent 8421cc0885
commit d849227798
14 changed files with 264 additions and 220 deletions

View File

@@ -71,7 +71,7 @@ class RemoteDirectoryCrawler:
try:
directory = RemoteDirectoryFactory.get_directory(self.url)
root_listing = directory.list_dir("/dl2/") # todo get path
root_listing = directory.list_dir("/")
directory.close()
except TimeoutError:
return

View File

@@ -54,7 +54,7 @@ class HttpDirectory(RemoteDirectory):
if self._should_ignore(link):
continue
file_url = urljoin(path_url, link[1])
file_url = urljoin(path_url, link.url)
path, file_name = os.path.split(file_url[len(self.base_url) - 1:])
if self._isdir(link):