Fixes problem with link tags with no href

This commit is contained in:
simon 2018-02-14 14:23:31 -05:00
parent 56b28f534e
commit 90c5835cb5

View File

@ -136,6 +136,7 @@ class NginxParser(PageParser):
try: try:
if PageParser.should_save_link(link.text): if PageParser.should_save_link(link.text):
target = link.get("href") target = link.get("href")
if target is not None:
short_file_name = os.path.split(target)[1] short_file_name = os.path.split(target)[1]
full_link = urljoin(base_url, target) full_link = urljoin(base_url, target)
file_type = PageParser.file_type(target) file_type = PageParser.file_type(target)
@ -188,6 +189,7 @@ class ApacheParser(PageParser):
if PageParser.should_save_link(link.text): if PageParser.should_save_link(link.text):
target = link.get("href") target = link.get("href")
if target is not None:
short_file_name = os.path.split(target)[1] short_file_name = os.path.split(target)[1]
file_type = PageParser.file_type(target) file_type = PageParser.file_type(target)
full_link = urljoin(base_url, target) full_link = urljoin(base_url, target)
@ -210,6 +212,7 @@ class ApacheParser(PageParser):
if PageParser.should_save_link(link.text): if PageParser.should_save_link(link.text):
target = link.get("href") target = link.get("href")
if target is not None:
short_file_name = os.path.split(target)[1] short_file_name = os.path.split(target)[1]
full_link = urljoin(base_url, target) full_link = urljoin(base_url, target)
file_type = PageParser.file_type(target) file_type = PageParser.file_type(target)