od-database/queue_reddit_links.py
2018-05-28 20:35:04 -04:00

47 lines
1.2 KiB
Python

import praw
from reddit_bot import RedditBot
from database import Database, Website
import od_util
from urllib.parse import urljoin
reddit = praw.Reddit('opendirectories-bot',
user_agent='github.com/simon987/od-database v1.0 (by /u/Hexahedr_n)')
db = Database("db.sqlite3")
subreddit = reddit.subreddit("opendirectories")
submissions = []
for submission in subreddit.new(limit=3):
submissions.append(submission)
bot = RedditBot("crawled.txt", reddit)
for s in submissions:
if not s.is_self:
if not bot.has_crawled(s.id):
url = urljoin(s.url, "")
website = db.get_website_by_url(url)
if website:
continue
website = db.website_exists(url)
if website:
print("Repost!")
continue
if not od_util.is_valid_url(url):
print("Parent dir already posted!")
continue
if not od_util.is_od(url):
print(url)
continue
web_id = db.insert_website(Website(url, "localhost", "reddit_bot"))
db.enqueue(web_id, s.id, priority=2) # Higher priority for reddit posts
print("Queued " + str(web_id))