Updated link

This commit is contained in:
Simon 2018-06-06 13:23:23 -04:00
parent a92d8fe44e
commit 5fdf35ab56
3 changed files with 16 additions and 12 deletions

1
.gitignore vendored Normal file
View File

@ -0,0 +1 @@
.idea/

View File

@ -9,8 +9,8 @@ pattern = re.compile("[\[\]\\\()]+")
reddit = praw.Reddit('opendirectories-bot', reddit = praw.Reddit('opendirectories-bot',
user_agent='github.com/simon987/od-database v1.0 (by /u/Hexahedr_n)') user_agent='github.com/simon987/od-database v1.0 (by /u/Hexahedr_n)')
db = Database("db.sqlite3") db = Database("db.sqlite3")
subreddit = reddit.subreddit("opendirectories") # subreddit = reddit.subreddit("opendirectories")
# subreddit = reddit.subreddit("test") subreddit = reddit.subreddit("test")
bot = RedditBot("crawled.txt", reddit) bot = RedditBot("crawled.txt", reddit)
submissions = [] submissions = []
@ -42,7 +42,7 @@ def handle_subdir_repost(website_id, reddit_obj):
# Check comments # Check comments
for comment in []: #subreddit.comments(limit=50): for comment in subreddit.comments(limit=50):
if not bot.has_crawled(comment): if not bot.has_crawled(comment):
text = pattern.sub(" ", comment.body).strip() text = pattern.sub(" ", comment.body).strip()
@ -56,7 +56,8 @@ for comment in []: #subreddit.comments(limit=50):
if website and not scanned: if website and not scanned:
# in progress # in progress
pass print("In progress")
continue
if website and db.website_has_been_scanned(url): if website and db.website_has_been_scanned(url):
bot.log_crawl(comment.id) bot.log_crawl(comment.id)
@ -66,7 +67,8 @@ for comment in []: #subreddit.comments(limit=50):
website_id = db.website_exists(url) website_id = db.website_exists(url)
if website_id and not scanned: if website_id and not scanned:
# IN progress # IN progress
pass print("Parent in progress")
continue
if website_id and db.website_has_been_scanned(url): if website_id and db.website_has_been_scanned(url):
bot.log_crawl(comment.id) bot.log_crawl(comment.id)
handle_subdir_repost(website_id, comment) handle_subdir_repost(website_id, comment)
@ -75,7 +77,7 @@ for comment in []: #subreddit.comments(limit=50):
if not od_util.is_valid_url(url): if not od_util.is_valid_url(url):
print("Skipping reddit comment: Invalid url") print("Skipping reddit comment: Invalid url")
bot.log_crawl(comment.id) bot.log_crawl(comment.id)
bot.reply(comment, "Hello, " + comment.author + ". Unfortunately it seems that the link you " bot.reply(comment, "Hello, " + str(comment.author) + ". Unfortunately it seems that the link you "
"provided: `" + url + "` is not valid. Make sure that you include the" "provided: `" + url + "` is not valid. Make sure that you include the"
"'`http(s)://` prefix. \n") "'`http(s)://` prefix. \n")
continue continue
@ -83,15 +85,16 @@ for comment in []: #subreddit.comments(limit=50):
if od_util.is_blacklisted(url): if od_util.is_blacklisted(url):
print("Skipping reddit comment: blacklisted") print("Skipping reddit comment: blacklisted")
bot.log_crawl(comment.id) bot.log_crawl(comment.id)
bot.reply(comment, "Hello, " + comment.author + ". Unfortunately my programmer has blacklisted " bot.reply(comment, "Hello, " + str(comment.author) + ". Unfortunately my programmer has "
"this website. If you think that this is an error, please " "blacklisted this website. If you think that this is an error, please "
"[contact him](https://www.reddit.com/message/compose?to=Hexahedr_n)") "[contact him](https://www.reddit.com/message/compose?to=Hexahedr_n)")
continue
if not od_util.is_od(url): if not od_util.is_od(url):
print("Skipping reddit comment: Not an OD") print("Skipping reddit comment: Not an OD")
print(url) print(url)
bot.log_crawl(comment.id) bot.log_crawl(comment.id)
bot.reply(comment, "Hello, " + comment.author + ". Unfortunately it seems that the link you " bot.reply(comment, "Hello, " + str(comment.author) + ". Unfortunately it seems that the link you "
"provided: `" + url + "` does not point to an open directory. This could also" "provided: `" + url + "` does not point to an open directory. This could also"
" mean that the website is not responding (in which case, feel free to retry in " " mean that the website is not responding (in which case, feel free to retry in "
"a few minutes). If you think that this is an error, please " "a few minutes). If you think that this is an error, please "
@ -105,7 +108,7 @@ for comment in []: #subreddit.comments(limit=50):
# Check posts # Check posts
for submission in subreddit.new(limit=500): for submission in subreddit.new(limit=3):
submissions.append(submission) submissions.append(submission)

View File

@ -60,8 +60,8 @@ class RedditBot:
comment += stat + " \n" if len(stat) > 0 else "" comment += stat + " \n" if len(stat) > 0 else ""
comment += RedditBot.format_stats(stats[stat]) comment += RedditBot.format_stats(stats[stat])
comment += "[Full Report](https://simon987.net/od-database/website/" + str(website_id) + "/)" comment += "[Full Report](https://od-database.simon987.net/website/" + str(website_id) + "/)"
comment += " | [Link list](https://simon987.net/od-database/website/" + str(website_id) + "/links) \n" comment += " | [Link list](https://od-database.simon987.net/website/" + str(website_id) + "/links) \n"
comment += "*** \n" comment += "*** \n"
comment += RedditBot.bottom_line comment += RedditBot.bottom_line