mirror of
				https://github.com/simon987/opendirectories-bot.git
				synced 2025-10-31 06:16:51 +00:00 
			
		
		
		
	Fixed a problematic website
This commit is contained in:
		
							parent
							
								
									88cebae047
								
							
						
					
					
						commit
						8e1f4543fd
					
				
							
								
								
									
										12
									
								
								parser.py
									
									
									
									
									
								
							
							
						
						
									
										12
									
								
								parser.py
									
									
									
									
									
								
							| @ -68,12 +68,10 @@ class PageParser: | |||||||
|             return "d" |             return "d" | ||||||
|         return "f" |         return "f" | ||||||
| 
 | 
 | ||||||
| 
 |  | ||||||
|     @staticmethod |     @staticmethod | ||||||
|     def clean_page(text): |     def clean_page(text): | ||||||
|         text = text.replace("<A", "<a") |         text = text.replace("<A", "<a") | ||||||
|         text = text.replace("</A", "</a") |         text = text.replace("</A", "</a") | ||||||
|         # text = text.replace("&", "&") |  | ||||||
|         text = text.replace("<hr>", "") |         text = text.replace("<hr>", "") | ||||||
| 
 | 
 | ||||||
|         return text |         return text | ||||||
| @ -108,11 +106,11 @@ class NginxParser(PageParser): | |||||||
| 
 | 
 | ||||||
|         soup = BeautifulSoup(text, "html.parser") |         soup = BeautifulSoup(text, "html.parser") | ||||||
| 
 | 
 | ||||||
|         for link in soup.find("pre").find_all("a"): |         for pre in soup.find_all("pre"): | ||||||
| 
 |             for link in pre.find_all("a"): | ||||||
|             parsed_link = self.parse_link(link, text, base_url) |                 parsed_link = self.parse_link(link, text, base_url) | ||||||
|             if parsed_link is not None: |                 if parsed_link is not None: | ||||||
|                 links[parsed_link[0]] = parsed_link[1] |                     links[parsed_link[0]] = parsed_link[1] | ||||||
| 
 | 
 | ||||||
|         return links |         return links | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -7,7 +7,6 @@ https://zeus.feralhosting.com/matt07211/Anime-OST/ (root is nginx, /Flac is Apac | |||||||
| https://filepursuit.com/ (recursion problem - not an OD) | https://filepursuit.com/ (recursion problem - not an OD) | ||||||
| https://drive.google.com/drive/folders/0BzylFUcRnoTZflc3Z3Z0eDFuV0Q4M2ROUlg5UWNWLWs3WDBtNFZZUkdqUmxqQm9nd0VPclk (Should be excluded) | https://drive.google.com/drive/folders/0BzylFUcRnoTZflc3Z3Z0eDFuV0Q4M2ROUlg5UWNWLWs3WDBtNFZZUkdqUmxqQm9nd0VPclk (Should be excluded) | ||||||
| 
 | 
 | ||||||
| http://www.gamers.org/pub/archives/uwp-uml/ (?) |  | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| @ -23,4 +22,5 @@ http://www.serenitystreetnews.com/videos/ | |||||||
| https://www.datto.com/resource-downloads/ | https://www.datto.com/resource-downloads/ | ||||||
| https://www.annmariegianni.com/wp-content/uploads/ | https://www.annmariegianni.com/wp-content/uploads/ | ||||||
| http://archive.scene.org/pub/resources/docs/bbs_finland/ | http://archive.scene.org/pub/resources/docs/bbs_finland/ | ||||||
| http://dl.apkhome.org | http://dl.apkhome.org | ||||||
|  | http://www.gamers.org/pub/archives/uwp-uml/ | ||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user