diff --git a/crawl_server/crawler.py b/crawl_server/crawler.py
index 4acb78a..358786a 100644
--- a/crawl_server/crawler.py
+++ b/crawl_server/crawler.py
@@ -20,8 +20,13 @@ class File:
         self.path = path
         self.is_dir = is_dir
 
-    def __str__(self):
-        return ("DIR " if self.is_dir else "FILE ") + self.path + "/" + self.name
+    def __bytes__(self):
+        return b"|".join([
+            self.name.encode(),
+            b"D" if self.is_dir else b"F",
+            str(self.size).encode(),
+            str(self.mtime).encode(),
+        ])
 
     def to_json(self):
         return ujson.dumps({
@@ -39,7 +44,7 @@ class RemoteDirectory:
     def __init__(self, base_url):
         self.base_url = base_url
 
-    def list_dir(self, path: str) -> list:
+    def list_dir(self, path: str):
         raise NotImplementedError
 
     def close(self):
@@ -82,8 +87,8 @@ class RemoteDirectoryCrawler:
 
         try:
             directory = RemoteDirectoryFactory.get_directory(self.url)
-            root_listing = directory.list_dir("")
-            self.crawled_paths.append("")
+            path, root_listing = directory.list_dir("")
+            self.crawled_paths.append(path)
             directory.close()
         except TimeoutError:
             return CrawlResult(0, "timeout")
@@ -136,9 +141,9 @@ class RemoteDirectoryCrawler:
                 break
 
             try:
-                if path not in self.crawled_paths:
-                    self.crawled_paths.append(path)
-                    listing = directory.list_dir(path)
+                path_id, listing = directory.list_dir(path)
+                if len(listing) > 0 and path_id not in self.crawled_paths:
+                    self.crawled_paths.append(path_id)
                     timeout_retries = RemoteDirectoryCrawler.MAX_TIMEOUT_RETRIES
 
                     for f in listing:
@@ -148,6 +153,9 @@ class RemoteDirectoryCrawler:
                             files_q.put(f)
                     import sys
                     print("LISTED " + repr(path) + "dirs:" + str(in_q.qsize()))
+                else:
+                    pass
+                    # print("SKIPPED: " + path + ", dropped " + str(len(listing)))
             except TooManyConnectionsError:
                 print("Too many connections")
                 # Kill worker and resubmit listing task
diff --git a/crawl_server/remote_ftp.py b/crawl_server/remote_ftp.py
index 3c81521..d5b500a 100644
--- a/crawl_server/remote_ftp.py
+++ b/crawl_server/remote_ftp.py
@@ -44,7 +44,7 @@ class FtpDirectory(RemoteDirectory):
                 print("Connection error; reconnecting..." + e.strerror + " " + str(e.errno))
                 time.sleep(2 * random.uniform(0.5, 1.5))
 
-    def list_dir(self, path) -> list:
+    def list_dir(self, path):
         if not self.ftp:
             # No connection - assuming that connection was dropped because too many
             raise TooManyConnectionsError()
@@ -65,7 +65,7 @@ class FtpDirectory(RemoteDirectory):
                             is_dir=is_dir,
                             path=path
                         ))
-                return results
+                return path, results
             except ftputil.error.ParserError as e:
                 print("TODO: fix parsing error: " + e.strerror + " @ " + str(e.file_name))
                 break
@@ -82,7 +82,7 @@ class FtpDirectory(RemoteDirectory):
                 print(type(e))
                 raise e
 
-        return []
+        return path, []
 
     def try_stat(self, path):
 
diff --git a/crawl_server/remote_http.py b/crawl_server/remote_http.py
index c8134e1..a5060bf 100644
--- a/crawl_server/remote_http.py
+++ b/crawl_server/remote_http.py
@@ -9,6 +9,7 @@ from requests.exceptions import RequestException
 from multiprocessing.pool import ThreadPool
 import config
 from dateutil.parser import parse as parse_date
+import hashlib
 
 
 class Anchor:
@@ -66,7 +67,9 @@ class HttpDirectory(RemoteDirectory):
         "?MA",
         "?SA",
         "?DA",
-        "?ND"
+        "?ND",
+        "?C=N&O=A",
+        "?C=N&O=A"
     )
     MAX_RETRIES = 3
 
@@ -79,31 +82,40 @@ class HttpDirectory(RemoteDirectory):
 
     def list_dir(self, path):
 
+        current_dir_name = path[path.rstrip("/").rfind("/") + 1: -1]
+        path_identifier = hashlib.sha1(current_dir_name.encode())
         path_url = urljoin(self.base_url, path, "")
         body = self._stream_body(path_url)
         if not body:
-            return None
+            return None, None
         anchors = self._parse_links(body)
 
         urls_to_request = []
+        files = []
 
         for anchor in anchors:
             if self._should_ignore(self.base_url, anchor):
                 continue
 
             if self._isdir(anchor):
-                yield File(
+
+                directory = File(
                     name=anchor.href,
-                    mtime=None,
-                    size=None,
+                    mtime=0,
+                    size=0,
                     path=path,
                     is_dir=True
                 )
+                path_identifier.update(bytes(directory))
+                files.append(directory)
             else:
                 urls_to_request.append(urljoin(path_url, anchor.href))
 
         for file in self.request_files(urls_to_request):
-            yield file
+            files.append(file)
+            path_identifier.update(bytes(file))
+
+        return path_identifier.hexdigest(), files
 
     def request_files(self, urls_to_request: list) -> list:
 
@@ -168,11 +180,14 @@ class HttpDirectory(RemoteDirectory):
     def _parse_links(body):
 
         parser = HTMLAnchorParser()
+        anchors = []
 
         for chunk in body:
             parser.feed(chunk)
             for anchor in parser.anchors:
-                yield anchor
+                anchors.append(anchor)
+
+        return anchors
 
     @staticmethod
     def _isdir(link: Anchor):
@@ -180,7 +195,7 @@ class HttpDirectory(RemoteDirectory):
 
     @staticmethod
     def _should_ignore(base_url, link: Anchor):
-        if link.text == "../" or link.href == "../" or link.href == "./" \
+        if link.text == "../" or link.href == "../" or link.href == "./" or link.href == "" \
                 or link.href.endswith(HttpDirectory.BLACK_LIST):
             return True