mirror of
https://github.com/simon987/od-database.git
synced 2025-04-20 10:56:47 +00:00
FTP url validation
This commit is contained in:
parent
0304c98a31
commit
d8c16d53e6
46
od_util.py
46
od_util.py
@ -5,6 +5,7 @@ import os
|
|||||||
import validators
|
import validators
|
||||||
import re
|
import re
|
||||||
import mimetypes
|
import mimetypes
|
||||||
|
from ftplib import FTP
|
||||||
|
|
||||||
|
|
||||||
def truncate_path(path, max_len):
|
def truncate_path(path, max_len):
|
||||||
@ -42,7 +43,7 @@ def is_valid_url(url):
|
|||||||
if not url.endswith("/"):
|
if not url.endswith("/"):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
if not url.startswith(("http://", "https://")):
|
if not url.startswith(("http://", "https://", "ftp://")):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
return validators.url(url)
|
return validators.url(url)
|
||||||
@ -67,29 +68,36 @@ def is_od(url):
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
try:
|
try:
|
||||||
r = requests.get(url, timeout=15, allow_redirects=False)
|
if url.startswith("ftp://"):
|
||||||
if r.status_code != 200:
|
url = url[6:-1] # Remove schema and trailing slash
|
||||||
print("No redirects allowed!")
|
ftp = FTP(url)
|
||||||
return False
|
ftp.login()
|
||||||
soup = BeautifulSoup(r.text, "lxml")
|
ftp.close()
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
r = requests.get(url, timeout=15, allow_redirects=False)
|
||||||
|
if r.status_code != 200:
|
||||||
|
print("No redirects allowed!")
|
||||||
|
return False
|
||||||
|
soup = BeautifulSoup(r.text, "lxml")
|
||||||
|
|
||||||
external_links = sum(1 if is_external_link(url, a.get("href")) else 0 for a in soup.find_all("a"))
|
external_links = sum(1 if is_external_link(url, a.get("href")) else 0 for a in soup.find_all("a"))
|
||||||
link_tags = len(list(soup.find_all("link")))
|
link_tags = len(list(soup.find_all("link")))
|
||||||
script_tags = len(list(soup.find_all("script")))
|
script_tags = len(list(soup.find_all("script")))
|
||||||
|
|
||||||
if external_links > 11:
|
if external_links > 11:
|
||||||
print("Too many external links!")
|
print("Too many external links!")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
if link_tags > 5:
|
if link_tags > 5:
|
||||||
print("Too many link tags!")
|
print("Too many link tags!")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
if script_tags > 7:
|
if script_tags > 7:
|
||||||
print("Too many script tags!")
|
print("Too many script tags!")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(e)
|
print(e)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user