mirror of
https://github.com/simon987/od-database.git
synced 2025-10-24 03:16:53 +00:00
Updated link
This commit is contained in:
parent
a92d8fe44e
commit
5fdf35ab56
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
@ -0,0 +1 @@
|
|||||||
|
.idea/
|
@ -9,8 +9,8 @@ pattern = re.compile("[\[\]\\\()]+")
|
|||||||
reddit = praw.Reddit('opendirectories-bot',
|
reddit = praw.Reddit('opendirectories-bot',
|
||||||
user_agent='github.com/simon987/od-database v1.0 (by /u/Hexahedr_n)')
|
user_agent='github.com/simon987/od-database v1.0 (by /u/Hexahedr_n)')
|
||||||
db = Database("db.sqlite3")
|
db = Database("db.sqlite3")
|
||||||
subreddit = reddit.subreddit("opendirectories")
|
# subreddit = reddit.subreddit("opendirectories")
|
||||||
# subreddit = reddit.subreddit("test")
|
subreddit = reddit.subreddit("test")
|
||||||
bot = RedditBot("crawled.txt", reddit)
|
bot = RedditBot("crawled.txt", reddit)
|
||||||
|
|
||||||
submissions = []
|
submissions = []
|
||||||
@ -42,7 +42,7 @@ def handle_subdir_repost(website_id, reddit_obj):
|
|||||||
|
|
||||||
|
|
||||||
# Check comments
|
# Check comments
|
||||||
for comment in []: #subreddit.comments(limit=50):
|
for comment in subreddit.comments(limit=50):
|
||||||
|
|
||||||
if not bot.has_crawled(comment):
|
if not bot.has_crawled(comment):
|
||||||
text = pattern.sub(" ", comment.body).strip()
|
text = pattern.sub(" ", comment.body).strip()
|
||||||
@ -56,7 +56,8 @@ for comment in []: #subreddit.comments(limit=50):
|
|||||||
|
|
||||||
if website and not scanned:
|
if website and not scanned:
|
||||||
# in progress
|
# in progress
|
||||||
pass
|
print("In progress")
|
||||||
|
continue
|
||||||
|
|
||||||
if website and db.website_has_been_scanned(url):
|
if website and db.website_has_been_scanned(url):
|
||||||
bot.log_crawl(comment.id)
|
bot.log_crawl(comment.id)
|
||||||
@ -66,7 +67,8 @@ for comment in []: #subreddit.comments(limit=50):
|
|||||||
website_id = db.website_exists(url)
|
website_id = db.website_exists(url)
|
||||||
if website_id and not scanned:
|
if website_id and not scanned:
|
||||||
# IN progress
|
# IN progress
|
||||||
pass
|
print("Parent in progress")
|
||||||
|
continue
|
||||||
if website_id and db.website_has_been_scanned(url):
|
if website_id and db.website_has_been_scanned(url):
|
||||||
bot.log_crawl(comment.id)
|
bot.log_crawl(comment.id)
|
||||||
handle_subdir_repost(website_id, comment)
|
handle_subdir_repost(website_id, comment)
|
||||||
@ -75,7 +77,7 @@ for comment in []: #subreddit.comments(limit=50):
|
|||||||
if not od_util.is_valid_url(url):
|
if not od_util.is_valid_url(url):
|
||||||
print("Skipping reddit comment: Invalid url")
|
print("Skipping reddit comment: Invalid url")
|
||||||
bot.log_crawl(comment.id)
|
bot.log_crawl(comment.id)
|
||||||
bot.reply(comment, "Hello, " + comment.author + ". Unfortunately it seems that the link you "
|
bot.reply(comment, "Hello, " + str(comment.author) + ". Unfortunately it seems that the link you "
|
||||||
"provided: `" + url + "` is not valid. Make sure that you include the"
|
"provided: `" + url + "` is not valid. Make sure that you include the"
|
||||||
"'`http(s)://` prefix. \n")
|
"'`http(s)://` prefix. \n")
|
||||||
continue
|
continue
|
||||||
@ -83,15 +85,16 @@ for comment in []: #subreddit.comments(limit=50):
|
|||||||
if od_util.is_blacklisted(url):
|
if od_util.is_blacklisted(url):
|
||||||
print("Skipping reddit comment: blacklisted")
|
print("Skipping reddit comment: blacklisted")
|
||||||
bot.log_crawl(comment.id)
|
bot.log_crawl(comment.id)
|
||||||
bot.reply(comment, "Hello, " + comment.author + ". Unfortunately my programmer has blacklisted "
|
bot.reply(comment, "Hello, " + str(comment.author) + ". Unfortunately my programmer has "
|
||||||
"this website. If you think that this is an error, please "
|
"blacklisted this website. If you think that this is an error, please "
|
||||||
"[contact him](https://www.reddit.com/message/compose?to=Hexahedr_n)")
|
"[contact him](https://www.reddit.com/message/compose?to=Hexahedr_n)")
|
||||||
|
continue
|
||||||
|
|
||||||
if not od_util.is_od(url):
|
if not od_util.is_od(url):
|
||||||
print("Skipping reddit comment: Not an OD")
|
print("Skipping reddit comment: Not an OD")
|
||||||
print(url)
|
print(url)
|
||||||
bot.log_crawl(comment.id)
|
bot.log_crawl(comment.id)
|
||||||
bot.reply(comment, "Hello, " + comment.author + ". Unfortunately it seems that the link you "
|
bot.reply(comment, "Hello, " + str(comment.author) + ". Unfortunately it seems that the link you "
|
||||||
"provided: `" + url + "` does not point to an open directory. This could also"
|
"provided: `" + url + "` does not point to an open directory. This could also"
|
||||||
" mean that the website is not responding (in which case, feel free to retry in "
|
" mean that the website is not responding (in which case, feel free to retry in "
|
||||||
"a few minutes). If you think that this is an error, please "
|
"a few minutes). If you think that this is an error, please "
|
||||||
@ -105,7 +108,7 @@ for comment in []: #subreddit.comments(limit=50):
|
|||||||
|
|
||||||
|
|
||||||
# Check posts
|
# Check posts
|
||||||
for submission in subreddit.new(limit=500):
|
for submission in subreddit.new(limit=3):
|
||||||
submissions.append(submission)
|
submissions.append(submission)
|
||||||
|
|
||||||
|
|
||||||
|
@ -60,8 +60,8 @@ class RedditBot:
|
|||||||
comment += stat + " \n" if len(stat) > 0 else ""
|
comment += stat + " \n" if len(stat) > 0 else ""
|
||||||
comment += RedditBot.format_stats(stats[stat])
|
comment += RedditBot.format_stats(stats[stat])
|
||||||
|
|
||||||
comment += "[Full Report](https://simon987.net/od-database/website/" + str(website_id) + "/)"
|
comment += "[Full Report](https://od-database.simon987.net/website/" + str(website_id) + "/)"
|
||||||
comment += " | [Link list](https://simon987.net/od-database/website/" + str(website_id) + "/links) \n"
|
comment += " | [Link list](https://od-database.simon987.net/website/" + str(website_id) + "/links) \n"
|
||||||
comment += "*** \n"
|
comment += "*** \n"
|
||||||
comment += RedditBot.bottom_line
|
comment += RedditBot.bottom_line
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user