From 5fdf35ab560ab3fae4b62147cf506c77423cfa2e Mon Sep 17 00:00:00 2001 From: Simon Date: Wed, 6 Jun 2018 13:23:23 -0400 Subject: [PATCH] Updated link --- .gitignore | 1 + queue_reddit_links.py | 23 +++++++++++++---------- reddit_bot.py | 4 ++-- 3 files changed, 16 insertions(+), 12 deletions(-) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..62c8935 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +.idea/ \ No newline at end of file diff --git a/queue_reddit_links.py b/queue_reddit_links.py index ea23183..9059496 100644 --- a/queue_reddit_links.py +++ b/queue_reddit_links.py @@ -9,8 +9,8 @@ pattern = re.compile("[\[\]\\\()]+") reddit = praw.Reddit('opendirectories-bot', user_agent='github.com/simon987/od-database v1.0 (by /u/Hexahedr_n)') db = Database("db.sqlite3") -subreddit = reddit.subreddit("opendirectories") -# subreddit = reddit.subreddit("test") +# subreddit = reddit.subreddit("opendirectories") +subreddit = reddit.subreddit("test") bot = RedditBot("crawled.txt", reddit) submissions = [] @@ -42,7 +42,7 @@ def handle_subdir_repost(website_id, reddit_obj): # Check comments -for comment in []: #subreddit.comments(limit=50): +for comment in subreddit.comments(limit=50): if not bot.has_crawled(comment): text = pattern.sub(" ", comment.body).strip() @@ -56,7 +56,8 @@ for comment in []: #subreddit.comments(limit=50): if website and not scanned: # in progress - pass + print("In progress") + continue if website and db.website_has_been_scanned(url): bot.log_crawl(comment.id) @@ -66,7 +67,8 @@ for comment in []: #subreddit.comments(limit=50): website_id = db.website_exists(url) if website_id and not scanned: # IN progress - pass + print("Parent in progress") + continue if website_id and db.website_has_been_scanned(url): bot.log_crawl(comment.id) handle_subdir_repost(website_id, comment) @@ -75,7 +77,7 @@ for comment in []: #subreddit.comments(limit=50): if not od_util.is_valid_url(url): print("Skipping reddit comment: Invalid url") bot.log_crawl(comment.id) - bot.reply(comment, "Hello, " + comment.author + ". Unfortunately it seems that the link you " + bot.reply(comment, "Hello, " + str(comment.author) + ". Unfortunately it seems that the link you " "provided: `" + url + "` is not valid. Make sure that you include the" "'`http(s)://` prefix. \n") continue @@ -83,15 +85,16 @@ for comment in []: #subreddit.comments(limit=50): if od_util.is_blacklisted(url): print("Skipping reddit comment: blacklisted") bot.log_crawl(comment.id) - bot.reply(comment, "Hello, " + comment.author + ". Unfortunately my programmer has blacklisted " - "this website. If you think that this is an error, please " + bot.reply(comment, "Hello, " + str(comment.author) + ". Unfortunately my programmer has " + "blacklisted this website. If you think that this is an error, please " "[contact him](https://www.reddit.com/message/compose?to=Hexahedr_n)") + continue if not od_util.is_od(url): print("Skipping reddit comment: Not an OD") print(url) bot.log_crawl(comment.id) - bot.reply(comment, "Hello, " + comment.author + ". Unfortunately it seems that the link you " + bot.reply(comment, "Hello, " + str(comment.author) + ". Unfortunately it seems that the link you " "provided: `" + url + "` does not point to an open directory. This could also" " mean that the website is not responding (in which case, feel free to retry in " "a few minutes). If you think that this is an error, please " @@ -105,7 +108,7 @@ for comment in []: #subreddit.comments(limit=50): # Check posts -for submission in subreddit.new(limit=500): +for submission in subreddit.new(limit=3): submissions.append(submission) diff --git a/reddit_bot.py b/reddit_bot.py index fe314ee..c9dfbae 100644 --- a/reddit_bot.py +++ b/reddit_bot.py @@ -60,8 +60,8 @@ class RedditBot: comment += stat + " \n" if len(stat) > 0 else "" comment += RedditBot.format_stats(stats[stat]) - comment += "[Full Report](https://simon987.net/od-database/website/" + str(website_id) + "/)" - comment += " | [Link list](https://simon987.net/od-database/website/" + str(website_id) + "/links) \n" + comment += "[Full Report](https://od-database.simon987.net/website/" + str(website_id) + "/)" + comment += " | [Link list](https://od-database.simon987.net/website/" + str(website_id) + "/links) \n" comment += "*** \n" comment += RedditBot.bottom_line