commit b3e5d05262b166a3a300214a8f57a80f404d9135
Author: Simon <fortier.simon@hotmail.com>
Date:   Thu Aug 9 15:02:14 2018 -0400

    Initial commit

diff --git a/.gitmodules b/.gitmodules
new file mode 100644
index 0000000..e3bd571
--- /dev/null
+++ b/.gitmodules
@@ -0,0 +1,3 @@
+[submodule "od_database"]
+	path = od_database
+	url = https://github.com/simon987/od-database
diff --git a/__init__.py b/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/config.py b/config.py
new file mode 100644
index 0000000..d6d90c4
--- /dev/null
+++ b/config.py
@@ -0,0 +1,2 @@
+API_KEY = ""
+API_URL = "https://od-db.the-eye.eu/api/"
diff --git a/od_db_client.py b/od_db_client.py
new file mode 100644
index 0000000..f3d5e00
--- /dev/null
+++ b/od_db_client.py
@@ -0,0 +1,143 @@
+import requests
+import json
+import re
+import humanfriendly
+import time
+
+from od_database.od_util import truncate_path
+
+
+class OdDatabase:
+
+    def __init__(self, url, token):
+
+        self.url = url
+        self.token = token
+
+    @staticmethod
+    def perform_request(url, method="GET", data=None, json_data=None):
+
+        try:
+            if json_data:
+                return requests.request("POST", url, data=json_data, headers={"Content-Type": "application/json"})
+            else:
+                return requests.request(method, url, data=data)
+        except Exception as e:
+            print(e)
+            return None
+
+    def website_by_url(self, url):
+
+        r = self.perform_request(self.url + "website/by_url?token=" + self.token + "&url=" + url)
+        if not r or r.status_code != 200:
+            return None
+
+        return int(r.text)
+
+    def website_is_blacklisted(self, url):
+
+        r = self.perform_request(self.url + "website/blacklisted?token=" + self.token + "&url=" + url)
+        if not r or r.status_code != 200:
+            return False
+        return r.text == "True"
+
+    def add_website(self, url):
+
+        r = self.perform_request(self.url + "website/add?token=" + self.token + "&url=" + url)
+        if not r or r.status_code != 200:
+            return None
+        return int(r.text)
+
+    def enqueue(self, website_id=None, url=None, priority=1, callback_type="", callback_args=""):
+
+        data = json.dumps({
+            "token": self.token,
+            "website_id": website_id,
+            "url": url,
+            "priority": priority,
+            "callback_type": callback_type,
+            "callback_args": callback_args
+        })
+        r = self.perform_request(self.url + "task/force_enqueue", json_data=data)
+
+        if not r or r.status_code != 200:
+            return False
+        return True
+
+    def search(self, q, p, per_page, sort_order, extensions, size_min, size_max, match_all, fields, date_min, date_max):
+
+        data = json.dumps({
+            "token": self.token,
+            "query": q,
+            "page": p,
+            "per_page": per_page,
+            "sort_order": sort_order,
+            "extensions": extensions,
+            "size_min": size_min,
+            "size_max": size_max,
+            "match_all": match_all,
+            "fields": fields,
+            "date_min": date_min,
+            "date_max": date_max
+        })
+
+        r = self.perform_request(self.url + "search", json_data=data)
+
+        if not r or r.status_code != 200:
+            return None
+        return json.loads(r.text)
+
+    @staticmethod
+    def format_search_hits(hits, query):
+
+        message = str(hits["hits"]["total"]) + " matches found in " + str(hits["took"]) + "ms for query `" + query + "`:    \n\n"
+        message += "File | Size | Date    \n"
+        message += ":-- | :-- | --:    \n"
+
+        for hit in hits["hits"]["hits"]:
+            src = hit["_source"]
+
+            # File name highlight
+            if "name" in hit["highlight"]:
+                hl_name = format_highlight(hit["highlight"]["name"][0])
+            elif "name.nGram" in hit["highlight"]:
+                hl_name = format_highlight(hit["highlight"]["name.nGram"][0])
+            else:
+                hl_name = src["name"]
+
+            # Path highlight
+            if "path" in hit["highlight"]:
+                hl_path = format_highlight(hit["highlight"]["path"][0])
+            else:
+                hl_path = src["path"]
+            hl_path = truncate_path(hl_path, 65)
+            hl_path += "/" if hl_path else ""
+
+            message += "[" + src["website_url"] + "](https://od-db.the-eye.eu/website/" + str(src["website_id"]) + "/)" + hl_path
+            message += hl_name + ("." if src["ext"] else "") + src["ext"] + "| "
+            message += humanfriendly.format_size(src["size"]) + " | "
+            message += time.strftime("%Y-%m-%d", time.gmtime(src["mtime"])) + "    \n"
+
+        message += "\n[More results for this query](https://the-eye.eu/search?q=" + query + ") |" \
+                   " [OD-Database](https://od-db.the-eye.eu/)"
+
+        return message
+
+    def get_stats(self, website_id):
+        r = self.perform_request(self.url + "../website/" + str(website_id) + "/json_chart")
+        return json.loads(r.text)
+
+
+def format_highlight(text):
+
+    text = re.sub("(<mark>)", "**", text)
+    text = re.sub("(<mark>)\s+", " **", text)
+    text = re.sub("(</mark>)", "**", text)
+    text = re.sub("\s+(</mark>)", "** ", text)
+
+    return text
+
+
+
+
+
diff --git a/run.py b/run.py
new file mode 100644
index 0000000..6627ed4
--- /dev/null
+++ b/run.py
@@ -0,0 +1,133 @@
+import praw
+from od_database.reddit_bot import RedditBot
+import re
+import os
+import json
+from od_db_client import OdDatabase
+from od_database.od_util import get_top_directory, is_od, is_valid_url
+import shlex
+import config
+
+
+PATTERN = re.compile("[\[\]\\\()]+")
+od_db_client = OdDatabase(config.API_URL, config.API_KEY)
+
+
+def process_comment(comment, bot):
+
+    text = PATTERN.sub(" ", comment.body).strip()
+
+    if text.startswith("u/opendirectories-bot") or text.startswith("/u/opendirectories-bot"):
+        lines = shlex.split(text,)
+        if len(lines) > 1:
+            text = lines[1]
+            if text.startswith("?"):
+                process_query(comment, bot, text[1:])
+            else:
+                process_url(comment, bot, text)
+
+
+def process_query(comment, bot, query):
+
+    print("Search query '" + query + "'")
+
+    hits = od_db_client.search(
+        query, 0, 10,
+        "score", [],
+        0, 0,
+        False, ["path", "name^5", "name.nGram^2"],
+        0, 0
+    )
+    message = od_db_client.format_search_hits(hits, query) + "\n***    \n" + bot.bottom_line
+    print(message)
+    bot.reply(comment, message)
+
+
+def process_url(comment, bot, url):
+
+    url = os.path.join(url, "")  # Add trailing slash
+
+    if not is_valid_url(url):
+        print("Url is invalid")
+        handle_invalid_url(comment, bot, url)
+
+    if od_db_client.website_is_blacklisted(url):
+        print("Website is blacklisted")
+        handle_blacklisted(comment, bot)
+        return
+
+    url = get_top_directory(url)
+    website_id = od_db_client.website_by_url(url)
+
+    if not website_id:
+        print("Website does not exist")
+
+        if not is_od(url):
+            print("Website is not an od")
+            handle_non_od_website(comment, bot, url)
+            return
+
+        handle_new_website(comment, bot, url)
+    else:
+        print("Website already exists")
+        handle_existing_website(comment, bot, website_id)
+
+
+def handle_invalid_url(comment, bot, url):
+    bot.reply(comment, "Hello, " + str(comment.author) + ". Unfortunately it seems that the link you provided: `" +
+              url + "` is not valid. Make sure that you include the `http(s)://` prefix.    \n***    \n" + bot.bottom_line)
+
+
+def handle_blacklisted(comment, bot):
+    bot.reply(comment, "Hello, " + str(comment.author) + ". Unfortunately my programmer has blacklisted this website."
+                       " If you think that this is an error, please "
+                       "[contact him](https://old.reddit.com/message/compose?to=Hexahedr_n)    \n***    \n" + bot.bottom_line)
+
+
+def handle_non_od_website(comment, bot, url):
+    bot.reply(comment, "Hello, " + str(comment.author) + ". Unfortunately it seems that the link you provided: `" +
+              url + "` does not point to an open directory. This could also mean that the website is not responding "
+                    "(in which case, feel free to retry in a few minutes). If you think that this is an error, please "
+                    "[contact my programmer](https://old.reddit.com/message/compose?to=Hexahedr_n)    \n***    \n" +
+              bot.bottom_line)
+
+
+def handle_new_website(comment, bot, url):
+
+    website_id = od_db_client.add_website(url)
+    if website_id:
+        reply = bot.reply(comment, "Hello, " + str(comment.author) + ". This website was added to od-database and will "
+                                   "be processed as soon as a crawl server is available. Thank you for your "
+                                   "contribution to the database!    \nI will edit this comment when the website has"
+                                   " been crawled and indexed. Website id is `" + str(website_id) + "`.    \n***    \n"
+                                   + bot.bottom_line)
+
+        od_db_client.enqueue(website_id=website_id, url=url, priority=2, callback_type="reddit_comment",
+                             callback_args=json.dumps({
+                                 "comment_id": reply.id
+                             }))
+    else:
+        print("Could not create new website")
+
+
+def handle_existing_website(comment, bot, website_id):
+
+    stats = od_db_client.get_stats(website_id)
+    message_header = "Hello, " + str(comment.author) + ". This website was crawled and indexed by od-database at `" + \
+                     stats["report_time"] + "`. "
+
+    message = bot.get_comment(stats, website_id, message_header)
+    print(message)
+    bot.reply(comment, message)
+
+
+if __name__ == "__main__":
+    reddit = praw.Reddit('opendirectories-bot',
+                         user_agent='github.com/simon987/opendirectories-bot-new (by /u/Hexahedr_n)')
+    bot = RedditBot("processed.txt", reddit)
+    subreddit = reddit.subreddit("test")
+
+    for comment in subreddit.comments(limit=50):
+        if not bot.has_crawled(comment):
+            process_comment(comment, bot)
+