mirror of
https://github.com/simon987/od-database.git
synced 2025-04-18 01:46:46 +00:00
Improved external link detection
This commit is contained in:
parent
400abc9a3c
commit
b63c7190c3
@ -1,4 +1,4 @@
|
||||
from urllib.parse import unquote
|
||||
from urllib.parse import unquote, urljoin
|
||||
import os
|
||||
from html.parser import HTMLParser
|
||||
from itertools import repeat
|
||||
@ -177,7 +177,8 @@ class HttpDirectory(RemoteDirectory):
|
||||
return True
|
||||
|
||||
# Ignore external links
|
||||
if link.href.startswith("http") and not link.href.startswith(base_url):
|
||||
full_url = os.path.join(base_url, link.href)
|
||||
if not full_url.startswith(base_url):
|
||||
return True
|
||||
|
||||
def close(self):
|
||||
|
@ -4,9 +4,9 @@ import json
|
||||
|
||||
payload = json.dumps({
|
||||
"website_id": 123,
|
||||
# "url": "http://alphamediazone.com/data/Movies1/",
|
||||
"url": "http://liminaire.fr/TEXTES/",
|
||||
# "url": "http://localhost:8000/",
|
||||
"url": "http://ubuntu.mirrorservice.org/",
|
||||
# "url": "http://ubuntu.mirrorservice.org/",
|
||||
"priority": 2,
|
||||
"callback_type": "",
|
||||
"callback_args": "{}"
|
||||
|
Loading…
x
Reference in New Issue
Block a user