mirror of
https://github.com/simon987/od-database.git
synced 2025-12-13 23:09:01 +00:00
Initial commit
This commit is contained in:
46
queue_reddit_links.py
Normal file
46
queue_reddit_links.py
Normal file
@@ -0,0 +1,46 @@
|
||||
import praw
|
||||
from reddit_bot import RedditBot
|
||||
from database import Database, Website
|
||||
import od_util
|
||||
from urllib.parse import urljoin
|
||||
|
||||
reddit = praw.Reddit('opendirectories-bot',
|
||||
user_agent='github.com/simon987/od-database v1.0 (by /u/Hexahedr_n)')
|
||||
db = Database("db.sqlite3")
|
||||
subreddit = reddit.subreddit("opendirectories")
|
||||
|
||||
submissions = []
|
||||
|
||||
for submission in subreddit.new(limit=3):
|
||||
submissions.append(submission)
|
||||
|
||||
bot = RedditBot("crawled.txt", reddit)
|
||||
|
||||
for s in submissions:
|
||||
|
||||
if not s.is_self:
|
||||
if not bot.has_crawled(s.id):
|
||||
|
||||
url = urljoin(s.url, "")
|
||||
|
||||
website = db.get_website_by_url(url)
|
||||
|
||||
if website:
|
||||
continue
|
||||
|
||||
website = db.website_exists(url)
|
||||
if website:
|
||||
print("Repost!")
|
||||
continue
|
||||
|
||||
if not od_util.is_valid_url(url):
|
||||
print("Parent dir already posted!")
|
||||
continue
|
||||
|
||||
if not od_util.is_od(url):
|
||||
print(url)
|
||||
continue
|
||||
|
||||
web_id = db.insert_website(Website(url, "localhost", "reddit_bot"))
|
||||
db.enqueue(web_id, s.id, priority=2) # Higher priority for reddit posts
|
||||
print("Queued " + str(web_id))
|
||||
Reference in New Issue
Block a user