mirror of
				https://github.com/simon987/od-database.git
				synced 2025-11-03 22:46:52 +00:00 
			
		
		
		
	Improved external link detection
This commit is contained in:
		
							parent
							
								
									400abc9a3c
								
							
						
					
					
						commit
						b63c7190c3
					
				@ -1,4 +1,4 @@
 | 
				
			|||||||
from urllib.parse import unquote
 | 
					from urllib.parse import unquote, urljoin
 | 
				
			||||||
import os
 | 
					import os
 | 
				
			||||||
from html.parser import HTMLParser
 | 
					from html.parser import HTMLParser
 | 
				
			||||||
from itertools import repeat
 | 
					from itertools import repeat
 | 
				
			||||||
@ -177,7 +177,8 @@ class HttpDirectory(RemoteDirectory):
 | 
				
			|||||||
            return True
 | 
					            return True
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        # Ignore external links
 | 
					        # Ignore external links
 | 
				
			||||||
        if link.href.startswith("http") and not link.href.startswith(base_url):
 | 
					        full_url = os.path.join(base_url, link.href)
 | 
				
			||||||
 | 
					        if not full_url.startswith(base_url):
 | 
				
			||||||
            return True
 | 
					            return True
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def close(self):
 | 
					    def close(self):
 | 
				
			||||||
 | 
				
			|||||||
@ -4,9 +4,9 @@ import json
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
payload = json.dumps({
 | 
					payload = json.dumps({
 | 
				
			||||||
    "website_id": 123,
 | 
					    "website_id": 123,
 | 
				
			||||||
    # "url": "http://alphamediazone.com/data/Movies1/",
 | 
					    "url": "http://liminaire.fr/TEXTES/",
 | 
				
			||||||
    # "url": "http://localhost:8000/",
 | 
					    # "url": "http://localhost:8000/",
 | 
				
			||||||
    "url": "http://ubuntu.mirrorservice.org/",
 | 
					    # "url": "http://ubuntu.mirrorservice.org/",
 | 
				
			||||||
    "priority": 2,
 | 
					    "priority": 2,
 | 
				
			||||||
    "callback_type": "",
 | 
					    "callback_type": "",
 | 
				
			||||||
    "callback_args": "{}"
 | 
					    "callback_args": "{}"
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user