diff --git a/app.py b/app.py index 93de8dd..04aaf91 100644 --- a/app.py +++ b/app.py @@ -144,46 +144,47 @@ def submit(): return render_template("submit.html", queue=db.queue(), recaptcha=recaptcha) +def try_enqueue(url): + + url = os.path.join(url, "") + website = db.get_website_by_url(url) + + if website: + return "Website already exists", "danger" + + website = db.website_exists(url) + if website: + return "A parent directory of this url has already been posted", "danger" + + if not od_util.is_valid_url(url): + return "Error: Invalid url. Make sure to include the http(s):// suffix. " \ + "FTP is not supported", "danger" + + if od_util.is_blacklisted(url): + + return "Error: " \ + "Sorry, this website has been blacklisted. If you think " \ + "this is an error, please contact me.", "danger" + + if not od_util.is_od(url): + return "Error:" \ + "The anti-spam algorithm determined that the submitted url is not " \ + "an open directory or the server is not responding. If you think " \ + "this is an error, please contact me.", "danger" + + web_id = db.insert_website(Website(url, str(request.remote_addr), str(request.user_agent))) + db.enqueue(web_id) + + return "The website has been added to the queue", "success" + + @app.route("/enqueue", methods=["POST"]) def enqueue(): if recaptcha.verify(): url = os.path.join(request.form.get("url"), "") - - website = db.get_website_by_url(url) - - if website: - flash("Website already exists", "danger") - return redirect("/submit") - - website = db.website_exists(url) - if website: - flash("A parent directory of this url has already been posted", "danger") - return redirect("/submit") - - if not od_util.is_valid_url(url): - flash("Error: " - "Invalid url. Make sure to include the http(s):// suffix. " - "FTP is not supported", "danger") - return redirect("/submit") - - if od_util.is_blacklisted(url): - flash("Error: " - "Sorry, this website has been blacklisted. If you think " - "this is an error, please contact me.", "danger") - return redirect("/submit") - - if not od_util.is_od(url): - flash("Error:" - "The anti-spam algorithm determined that the submitted url is not " - "an open directory or the server is not responding. If you think " - "this is an error, please contact me.", "danger") - - return redirect("/submit") - - web_id = db.insert_website(Website(url, str(request.remote_addr), str(request.user_agent))) - db.enqueue(web_id) - flash("The website has been added to the queue", "success") + message, msg_type = try_enqueue(url) + flash(message, msg_type) return redirect("/submit") else: diff --git a/queue_reddit_links.py b/queue_reddit_links.py index 6253bfc..5005d6b 100644 --- a/queue_reddit_links.py +++ b/queue_reddit_links.py @@ -57,22 +57,20 @@ for comment in subreddit.comments(limit=50): website = db.get_website_by_url(url) - if website and not scanned: - # in progress - print(url) - print("In progress") - continue - - if website and db.website_has_been_scanned(url): + if website: + if not scanned: + # in progress + print(url) + print("In progress") + continue handle_exact_repost(website.id, comment) continue website_id = db.website_exists(url) - if website_id and not scanned: - # IN progress - print("Parent in progress") - continue - if website_id and db.website_has_been_scanned(url): + if website_id: + if not scanned: + print("Parent in progress") + continue handle_subdir_repost(website_id, comment) continue @@ -116,16 +114,25 @@ for s in submissions: if not bot.has_crawled(s.id): url = os.path.join(s.url, "") # add trailing slash + scanned = db.website_has_been_scanned(url) website = db.get_website_by_url(url) if website: + if not scanned: + print(url) + print("In progress") + continue handle_exact_repost(website.id, s) + continue website_id = db.website_exists(url) if website_id: - bot.log_crawl(s.id) + if not scanned: + print("Parent in progress") + continue handle_subdir_repost(website_id, s) + continue if not od_util.is_valid_url(url): print("Skipping reddit post: Invalid url")