mirror of
				https://github.com/simon987/opendirectories-bot.git
				synced 2025-10-31 14:16:52 +00:00 
			
		
		
		
	Fixed a problematic website
This commit is contained in:
		
							parent
							
								
									88cebae047
								
							
						
					
					
						commit
						8e1f4543fd
					
				
							
								
								
									
										12
									
								
								parser.py
									
									
									
									
									
								
							
							
						
						
									
										12
									
								
								parser.py
									
									
									
									
									
								
							| @ -68,12 +68,10 @@ class PageParser: | ||||
|             return "d" | ||||
|         return "f" | ||||
| 
 | ||||
| 
 | ||||
|     @staticmethod | ||||
|     def clean_page(text): | ||||
|         text = text.replace("<A", "<a") | ||||
|         text = text.replace("</A", "</a") | ||||
|         # text = text.replace("&", "&") | ||||
|         text = text.replace("<hr>", "") | ||||
| 
 | ||||
|         return text | ||||
| @ -108,11 +106,11 @@ class NginxParser(PageParser): | ||||
| 
 | ||||
|         soup = BeautifulSoup(text, "html.parser") | ||||
| 
 | ||||
|         for link in soup.find("pre").find_all("a"): | ||||
| 
 | ||||
|             parsed_link = self.parse_link(link, text, base_url) | ||||
|             if parsed_link is not None: | ||||
|                 links[parsed_link[0]] = parsed_link[1] | ||||
|         for pre in soup.find_all("pre"): | ||||
|             for link in pre.find_all("a"): | ||||
|                 parsed_link = self.parse_link(link, text, base_url) | ||||
|                 if parsed_link is not None: | ||||
|                     links[parsed_link[0]] = parsed_link[1] | ||||
| 
 | ||||
|         return links | ||||
| 
 | ||||
|  | ||||
| @ -7,7 +7,6 @@ https://zeus.feralhosting.com/matt07211/Anime-OST/ (root is nginx, /Flac is Apac | ||||
| https://filepursuit.com/ (recursion problem - not an OD) | ||||
| https://drive.google.com/drive/folders/0BzylFUcRnoTZflc3Z3Z0eDFuV0Q4M2ROUlg5UWNWLWs3WDBtNFZZUkdqUmxqQm9nd0VPclk (Should be excluded) | ||||
| 
 | ||||
| http://www.gamers.org/pub/archives/uwp-uml/ (?) | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| @ -24,3 +23,4 @@ https://www.datto.com/resource-downloads/ | ||||
| https://www.annmariegianni.com/wp-content/uploads/ | ||||
| http://archive.scene.org/pub/resources/docs/bbs_finland/ | ||||
| http://dl.apkhome.org | ||||
| http://www.gamers.org/pub/archives/uwp-uml/ | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user