mirror of
https://github.com/simon987/od-database.git
synced 2025-04-19 10:26:44 +00:00
Duplicate website w/ reddit post + refactor
This commit is contained in:
parent
7f1e12cc3c
commit
537228444b
71
app.py
71
app.py
@ -144,46 +144,47 @@ def submit():
|
|||||||
return render_template("submit.html", queue=db.queue(), recaptcha=recaptcha)
|
return render_template("submit.html", queue=db.queue(), recaptcha=recaptcha)
|
||||||
|
|
||||||
|
|
||||||
|
def try_enqueue(url):
|
||||||
|
|
||||||
|
url = os.path.join(url, "")
|
||||||
|
website = db.get_website_by_url(url)
|
||||||
|
|
||||||
|
if website:
|
||||||
|
return "Website already exists", "danger"
|
||||||
|
|
||||||
|
website = db.website_exists(url)
|
||||||
|
if website:
|
||||||
|
return "A parent directory of this url has already been posted", "danger"
|
||||||
|
|
||||||
|
if not od_util.is_valid_url(url):
|
||||||
|
return "<strong>Error:</strong> Invalid url. Make sure to include the http(s):// suffix. " \
|
||||||
|
"FTP is not supported", "danger"
|
||||||
|
|
||||||
|
if od_util.is_blacklisted(url):
|
||||||
|
|
||||||
|
return "<strong>Error:</strong> " \
|
||||||
|
"Sorry, this website has been blacklisted. If you think " \
|
||||||
|
"this is an error, please <a href='/contribute'>contact me</a>.", "danger"
|
||||||
|
|
||||||
|
if not od_util.is_od(url):
|
||||||
|
return "<strong>Error:</strong>" \
|
||||||
|
"The anti-spam algorithm determined that the submitted url is not " \
|
||||||
|
"an open directory or the server is not responding. If you think " \
|
||||||
|
"this is an error, please <a href='/contribute'>contact me</a>.", "danger"
|
||||||
|
|
||||||
|
web_id = db.insert_website(Website(url, str(request.remote_addr), str(request.user_agent)))
|
||||||
|
db.enqueue(web_id)
|
||||||
|
|
||||||
|
return "The website has been added to the queue", "success"
|
||||||
|
|
||||||
|
|
||||||
@app.route("/enqueue", methods=["POST"])
|
@app.route("/enqueue", methods=["POST"])
|
||||||
def enqueue():
|
def enqueue():
|
||||||
if recaptcha.verify():
|
if recaptcha.verify():
|
||||||
|
|
||||||
url = os.path.join(request.form.get("url"), "")
|
url = os.path.join(request.form.get("url"), "")
|
||||||
|
message, msg_type = try_enqueue(url)
|
||||||
website = db.get_website_by_url(url)
|
flash(message, msg_type)
|
||||||
|
|
||||||
if website:
|
|
||||||
flash("Website already exists", "danger")
|
|
||||||
return redirect("/submit")
|
|
||||||
|
|
||||||
website = db.website_exists(url)
|
|
||||||
if website:
|
|
||||||
flash("A parent directory of this url has already been posted", "danger")
|
|
||||||
return redirect("/submit")
|
|
||||||
|
|
||||||
if not od_util.is_valid_url(url):
|
|
||||||
flash("<strong>Error:</strong> "
|
|
||||||
"Invalid url. Make sure to include the http(s):// suffix. "
|
|
||||||
"FTP is not supported", "danger")
|
|
||||||
return redirect("/submit")
|
|
||||||
|
|
||||||
if od_util.is_blacklisted(url):
|
|
||||||
flash("<strong>Error:</strong> "
|
|
||||||
"Sorry, this website has been blacklisted. If you think "
|
|
||||||
"this is an error, please <a href='/contribute'>contact me</a>.", "danger")
|
|
||||||
return redirect("/submit")
|
|
||||||
|
|
||||||
if not od_util.is_od(url):
|
|
||||||
flash("<strong>Error:</strong>"
|
|
||||||
"The anti-spam algorithm determined that the submitted url is not "
|
|
||||||
"an open directory or the server is not responding. If you think "
|
|
||||||
"this is an error, please <a href='/contribute'>contact me</a>.", "danger")
|
|
||||||
|
|
||||||
return redirect("/submit")
|
|
||||||
|
|
||||||
web_id = db.insert_website(Website(url, str(request.remote_addr), str(request.user_agent)))
|
|
||||||
db.enqueue(web_id)
|
|
||||||
flash("The website has been added to the queue", "success")
|
|
||||||
|
|
||||||
return redirect("/submit")
|
return redirect("/submit")
|
||||||
else:
|
else:
|
||||||
|
@ -57,22 +57,20 @@ for comment in subreddit.comments(limit=50):
|
|||||||
|
|
||||||
website = db.get_website_by_url(url)
|
website = db.get_website_by_url(url)
|
||||||
|
|
||||||
if website and not scanned:
|
if website:
|
||||||
# in progress
|
if not scanned:
|
||||||
print(url)
|
# in progress
|
||||||
print("In progress")
|
print(url)
|
||||||
continue
|
print("In progress")
|
||||||
|
continue
|
||||||
if website and db.website_has_been_scanned(url):
|
|
||||||
handle_exact_repost(website.id, comment)
|
handle_exact_repost(website.id, comment)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
website_id = db.website_exists(url)
|
website_id = db.website_exists(url)
|
||||||
if website_id and not scanned:
|
if website_id:
|
||||||
# IN progress
|
if not scanned:
|
||||||
print("Parent in progress")
|
print("Parent in progress")
|
||||||
continue
|
continue
|
||||||
if website_id and db.website_has_been_scanned(url):
|
|
||||||
handle_subdir_repost(website_id, comment)
|
handle_subdir_repost(website_id, comment)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
@ -116,16 +114,25 @@ for s in submissions:
|
|||||||
if not bot.has_crawled(s.id):
|
if not bot.has_crawled(s.id):
|
||||||
|
|
||||||
url = os.path.join(s.url, "") # add trailing slash
|
url = os.path.join(s.url, "") # add trailing slash
|
||||||
|
scanned = db.website_has_been_scanned(url)
|
||||||
|
|
||||||
website = db.get_website_by_url(url)
|
website = db.get_website_by_url(url)
|
||||||
|
|
||||||
if website:
|
if website:
|
||||||
|
if not scanned:
|
||||||
|
print(url)
|
||||||
|
print("In progress")
|
||||||
|
continue
|
||||||
handle_exact_repost(website.id, s)
|
handle_exact_repost(website.id, s)
|
||||||
|
continue
|
||||||
|
|
||||||
website_id = db.website_exists(url)
|
website_id = db.website_exists(url)
|
||||||
if website_id:
|
if website_id:
|
||||||
bot.log_crawl(s.id)
|
if not scanned:
|
||||||
|
print("Parent in progress")
|
||||||
|
continue
|
||||||
handle_subdir_repost(website_id, s)
|
handle_subdir_repost(website_id, s)
|
||||||
|
continue
|
||||||
|
|
||||||
if not od_util.is_valid_url(url):
|
if not od_util.is_valid_url(url):
|
||||||
print("Skipping reddit post: Invalid url")
|
print("Skipping reddit post: Invalid url")
|
||||||
|
Loading…
x
Reference in New Issue
Block a user