FTP url validation

This commit is contained in:
Simon 2018-06-10 14:32:19 -04:00
parent 0304c98a31
commit d8c16d53e6

View File

@ -5,6 +5,7 @@ import os
import validators import validators
import re import re
import mimetypes import mimetypes
from ftplib import FTP
def truncate_path(path, max_len): def truncate_path(path, max_len):
@ -42,7 +43,7 @@ def is_valid_url(url):
if not url.endswith("/"): if not url.endswith("/"):
return False return False
if not url.startswith(("http://", "https://")): if not url.startswith(("http://", "https://", "ftp://")):
return False return False
return validators.url(url) return validators.url(url)
@ -67,29 +68,36 @@ def is_od(url):
return False return False
try: try:
r = requests.get(url, timeout=15, allow_redirects=False) if url.startswith("ftp://"):
if r.status_code != 200: url = url[6:-1] # Remove schema and trailing slash
print("No redirects allowed!") ftp = FTP(url)
return False ftp.login()
soup = BeautifulSoup(r.text, "lxml") ftp.close()
return True
else:
r = requests.get(url, timeout=15, allow_redirects=False)
if r.status_code != 200:
print("No redirects allowed!")
return False
soup = BeautifulSoup(r.text, "lxml")
external_links = sum(1 if is_external_link(url, a.get("href")) else 0 for a in soup.find_all("a")) external_links = sum(1 if is_external_link(url, a.get("href")) else 0 for a in soup.find_all("a"))
link_tags = len(list(soup.find_all("link"))) link_tags = len(list(soup.find_all("link")))
script_tags = len(list(soup.find_all("script"))) script_tags = len(list(soup.find_all("script")))
if external_links > 11: if external_links > 11:
print("Too many external links!") print("Too many external links!")
return False return False
if link_tags > 5: if link_tags > 5:
print("Too many link tags!") print("Too many link tags!")
return False return False
if script_tags > 7: if script_tags > 7:
print("Too many script tags!") print("Too many script tags!")
return False return False
return True return True
except Exception as e: except Exception as e:
print(e) print(e)