mirror of
https://github.com/simon987/od-database.git
synced 2025-04-20 10:56:47 +00:00
Improved external link detection
This commit is contained in:
parent
400abc9a3c
commit
b63c7190c3
@ -1,4 +1,4 @@
|
|||||||
from urllib.parse import unquote
|
from urllib.parse import unquote, urljoin
|
||||||
import os
|
import os
|
||||||
from html.parser import HTMLParser
|
from html.parser import HTMLParser
|
||||||
from itertools import repeat
|
from itertools import repeat
|
||||||
@ -177,7 +177,8 @@ class HttpDirectory(RemoteDirectory):
|
|||||||
return True
|
return True
|
||||||
|
|
||||||
# Ignore external links
|
# Ignore external links
|
||||||
if link.href.startswith("http") and not link.href.startswith(base_url):
|
full_url = os.path.join(base_url, link.href)
|
||||||
|
if not full_url.startswith(base_url):
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def close(self):
|
def close(self):
|
||||||
|
@ -4,9 +4,9 @@ import json
|
|||||||
|
|
||||||
payload = json.dumps({
|
payload = json.dumps({
|
||||||
"website_id": 123,
|
"website_id": 123,
|
||||||
# "url": "http://alphamediazone.com/data/Movies1/",
|
"url": "http://liminaire.fr/TEXTES/",
|
||||||
# "url": "http://localhost:8000/",
|
# "url": "http://localhost:8000/",
|
||||||
"url": "http://ubuntu.mirrorservice.org/",
|
# "url": "http://ubuntu.mirrorservice.org/",
|
||||||
"priority": 2,
|
"priority": 2,
|
||||||
"callback_type": "",
|
"callback_type": "",
|
||||||
"callback_args": "{}"
|
"callback_args": "{}"
|
||||||
|
Loading…
x
Reference in New Issue
Block a user