diff --git a/crawl_server/crawler.py b/crawl_server/crawler.py
index b103167..760827f 100644
--- a/crawl_server/crawler.py
+++ b/crawl_server/crawler.py
@@ -4,6 +4,7 @@ from urllib.parse import urlparse, urljoin
 from threading import Thread
 from queue import Queue, Empty
 from crawl_server import logger
+from pybloom_live import ScalableBloomFilter
 
 
 class TooManyConnectionsError(Exception):
@@ -21,11 +22,11 @@ class File:
         self.is_dir = is_dir
 
     def __bytes__(self):
-        return b"|".join([
+        return b"".join([
             self.name.encode(),
             b"D" if self.is_dir else b"F",
-            str(self.size).encode(),
-            str(self.mtime).encode(),
+            self.size.to_bytes(6, byteorder="little"),
+            self.mtime.to_bytes(6, byteorder="little"),
         ])
 
     def to_json(self):
@@ -81,7 +82,7 @@ class RemoteDirectoryCrawler:
     def __init__(self, url, max_threads: int):
         self.url = url
         self.max_threads = max_threads
-        self.crawled_paths = list()
+        self.crawled_paths = ScalableBloomFilter(error_rate=0.0001)
         self.status_code = "success"
 
     def crawl_directory(self, out_file: str) -> CrawlResult:
@@ -91,7 +92,7 @@ class RemoteDirectoryCrawler:
                 logger.info("Crawling directory " + self.url + " with " + str(type(directory)))
                 path_id, root_listing = directory.list_dir(urlparse(self.url).path)
                 if root_listing:
-                    self.crawled_paths.append(path_id)
+                    self.crawled_paths.add(path_id)
                 else:
                     logger.info("No files in root listing for " + self.url)
                     return CrawlResult(0, "empty")
@@ -152,7 +153,7 @@ class RemoteDirectoryCrawler:
             try:
                 path_id, listing = directory.list_dir(path)
                 if len(listing) > 0 and path_id not in self.crawled_paths:
-                    self.crawled_paths.append(path_id)
+                    self.crawled_paths.add(path_id)
 
                     for f in listing:
                         if f.is_dir:
diff --git a/crawl_server/remote_http.py b/crawl_server/remote_http.py
index 531999c..368aaac 100644
--- a/crawl_server/remote_http.py
+++ b/crawl_server/remote_http.py
@@ -102,7 +102,7 @@ class HttpDirectory(RemoteDirectory):
     def list_dir(self, path):
 
         current_dir_name = path[path.rstrip("/").rfind("/") + 1: -1]
-        path_identifier = hashlib.sha1(current_dir_name.encode())
+        path_identifier = hashlib.md5(current_dir_name.encode())
         path_url = urljoin(self.base_url, path, "")
         body = self._stream_body(path_url)
         anchors = self._parse_links(body)