mirror of
				https://github.com/simon987/chan_feed.git
				synced 2025-10-26 05:06:51 +00:00 
			
		
		
		
	Add two Lynx chans, update dependencies
This commit is contained in:
		
							parent
							
								
									7ea1612b32
								
							
						
					
					
						commit
						77a053d6ee
					
				| @ -7,7 +7,7 @@ image boards and publishes serialised JSON to RabbitMQ | ||||
| Compatible image boards: 4chan, lainchan, uboachan, | ||||
| 22chan, wizchan, 1chan, 2ch.hk, endchan, 38chan, alokal, | ||||
| horochan, doushio, desuchan, tgchan, lolnada, 7chan, chanon, | ||||
| chan.org.li, hispachan, 8kun, nowere, iichan and more. | ||||
| chan.org.li, hispachan, 8kun, nowere, iichan, 2chan and more. | ||||
| 
 | ||||
| Can optionally push monitoring data to InfluxDB. Below is an | ||||
| example of Grafana being used to display it. | ||||
|  | ||||
| @ -1,4 +1,4 @@ | ||||
| from chan.json import JsonChanHelper | ||||
| from chan.chan_json import JsonChanHelper | ||||
| from post_process import get_links_from_body | ||||
| 
 | ||||
| 
 | ||||
|  | ||||
							
								
								
									
										26
									
								
								chan/chan.py
									
									
									
									
									
								
							
							
						
						
									
										26
									
								
								chan/chan.py
									
									
									
									
									
								
							| @ -9,10 +9,11 @@ from chan.endchan_html import EndchanHtmlChanHelper | ||||
| from chan.fchan_html import FChanHtmlChanHelper | ||||
| from chan.hispachan_html import HispachanHtmlHelper | ||||
| from chan.iichan_html import IichanHtmlChanHelper | ||||
| from chan.json import JsonChanHelper | ||||
| from chan.chan_json import JsonChanHelper | ||||
| from chan.json_kun import JsonKunChanHelper | ||||
| from chan.kev4_php import Kev4PhpHelper | ||||
| from chan.lolnada_html import LolNadaHtmlChanHelper | ||||
| from chan.lynx import LynxChanHelper | ||||
| from chan.mayuri import MayuriChanHelper | ||||
| from chan.nowere_html import NowereHtmlChanHelper | ||||
| from chan.plus4chan_html import Plus4ChanHelper | ||||
| @ -596,5 +597,26 @@ CHANS = { | ||||
|         ), | ||||
|         rps=1 / 3 | ||||
|     ), | ||||
|     # next is 36 | ||||
|     "waifuist": LynxChanHelper( | ||||
|         36, | ||||
|         "https://waifuist.pro/", | ||||
|         "https://waifuist.pro/", | ||||
|         "/res/", | ||||
|         "", | ||||
|         ( | ||||
|             "w", "starlet", "etc", | ||||
|         ), | ||||
|         rps=1 / 25 | ||||
|     ), | ||||
|     "cutiegarden": LynxChanHelper( | ||||
|         37, | ||||
|         "https://cutie.garden/", | ||||
|         "https://cutie.garden/", | ||||
|         "/res/", | ||||
|         "", | ||||
|         ( | ||||
|             "lg", "cozy", "meta", "test" | ||||
|         ), | ||||
|         rps=1 / 25 | ||||
|     ), | ||||
| } | ||||
|  | ||||
| @ -1,6 +1,6 @@ | ||||
| from vanwanet_scrape.scraper import Scraper | ||||
| 
 | ||||
| from chan.json import JsonChanHelper | ||||
| from chan.chan_json import JsonChanHelper | ||||
| from util import logger | ||||
| 
 | ||||
| 
 | ||||
|  | ||||
							
								
								
									
										83
									
								
								chan/lynx.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										83
									
								
								chan/lynx.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,83 @@ | ||||
| import json | ||||
| from datetime import datetime | ||||
| from json import JSONDecodeError | ||||
| from urllib.parse import urljoin | ||||
| 
 | ||||
| import cloudscraper | ||||
| 
 | ||||
| from chan.helper import ChanHelper | ||||
| from util import logger | ||||
| 
 | ||||
| 
 | ||||
| class LynxChanHelper(ChanHelper): | ||||
|     """See https://gitgud.io/LynxChan/LynxChan/blob/master/doc/Json.txt""" | ||||
| 
 | ||||
|     def __init__(self, db_id, base_url, image_url, thread_path, image_path, boards, rps): | ||||
|         super().__init__(db_id, base_url, image_url, thread_path, image_path, boards, rps) | ||||
| 
 | ||||
|         scraper = cloudscraper.create_scraper() | ||||
|         self.get_method = scraper.get | ||||
| 
 | ||||
|     @staticmethod | ||||
|     def item_id(item): | ||||
|         return item["threadId"] if LynxChanHelper.item_type(item) == "thread" else item["postId"] | ||||
| 
 | ||||
|     @staticmethod | ||||
|     def item_mtime(item): | ||||
|         return datetime.fromisoformat(item["creation"]).timestamp() | ||||
| 
 | ||||
|     def item_urls(self, item, board): | ||||
|         return [ | ||||
|             urljoin(self._base_url, im["path"]) | ||||
|             for im in item["files"] | ||||
|         ] if "files" in item and item["files"] else [] | ||||
| 
 | ||||
|     @staticmethod | ||||
|     def item_type(item): | ||||
|         return "thread" if "threadId" in item else "post" | ||||
| 
 | ||||
|     def threads_url(self, board): | ||||
|         return "%s%s/1.json" % (self._base_url, board) | ||||
| 
 | ||||
|     @staticmethod | ||||
|     def thread_mtime(thread): | ||||
|         return (thread["ommitedPosts"] if "ommitedPosts" in thread else 0) + len(thread["posts"]) | ||||
| 
 | ||||
|     @staticmethod | ||||
|     def parse_threads_list(r): | ||||
|         try: | ||||
|             j = json.loads(r.content.decode('utf-8', 'ignore')) | ||||
|             if len(j) == 0 or "threads" not in j: | ||||
|                 logger.warning("No threads in response for %s: %s" % (r.url, r.text,)) | ||||
|                 return [], None | ||||
|         except JSONDecodeError: | ||||
|             logger.warning("JSONDecodeError for %s:" % (r.url,)) | ||||
|             logger.warning(r.text) | ||||
|             return [], None | ||||
| 
 | ||||
|         next_page = None | ||||
|         url = r.url[:r.url.rfind("?")] if "?" in r.url else r.url | ||||
|         current_page = int(url[url.rfind("/") + 1:-5]) | ||||
|         if current_page < j["pageCount"]: | ||||
|             next_page = urljoin(r.url, "%d.json" % (current_page + 1)) | ||||
| 
 | ||||
|         return j["threads"], next_page | ||||
| 
 | ||||
|     @staticmethod | ||||
|     def parse_thread(r): | ||||
|         try: | ||||
|             j = json.loads(r.content.decode('utf-8', 'ignore')) | ||||
|         except JSONDecodeError: | ||||
|             logger.warning("JSONDecodeError for %s:" % (r.url,)) | ||||
|             logger.warning(r.text) | ||||
|             return [] | ||||
| 
 | ||||
|         all_items = [] | ||||
|         for post in j["posts"]: | ||||
|             post["_parent"] = j["threadId"] | ||||
|             all_items.append(post) | ||||
| 
 | ||||
|         del j["posts"] | ||||
|         all_items.append(j) | ||||
| 
 | ||||
|         return all_items | ||||
| @ -1,4 +1,4 @@ | ||||
| from chan.json import JsonChanHelper | ||||
| from chan.chan_json import JsonChanHelper | ||||
| from post_process import get_links_from_body | ||||
| 
 | ||||
| 
 | ||||
|  | ||||
| @ -7,4 +7,6 @@ influxdb | ||||
| pika | ||||
| bs4 | ||||
| urllib3 | ||||
| git+git://github.com/simon987/hexlib.git | ||||
| git+git://github.com/simon987/hexlib.git | ||||
| git+git://github.com/simon987/vanwanet_scrape.git | ||||
| cloudscraper | ||||
							
								
								
									
										10
									
								
								run.py
									
									
									
									
									
								
							
							
						
						
									
										10
									
								
								run.py
									
									
									
									
									
								
							| @ -102,14 +102,14 @@ class ChanState: | ||||
|             conn.commit() | ||||
| 
 | ||||
|     def mark_visited(self, item: int, helper): | ||||
|         with sqlite3.connect(self._db) as conn: | ||||
|         with sqlite3.connect(self._db, timeout=10000) as conn: | ||||
|             conn.execute( | ||||
|                 "INSERT INTO posts (post, chan) VALUES (?,?)", | ||||
|                 (item, helper.db_id) | ||||
|             ) | ||||
| 
 | ||||
|     def has_visited(self, item: int, helper): | ||||
|         with sqlite3.connect(self._db) as conn: | ||||
|         with sqlite3.connect(self._db, timeout=10000) as conn: | ||||
|             cur = conn.cursor() | ||||
|             cur.execute( | ||||
|                 "SELECT post FROM posts WHERE post=? AND chan=?", | ||||
| @ -122,7 +122,7 @@ class ChanState: | ||||
|         if mtime == -1: | ||||
|             return True | ||||
| 
 | ||||
|         with sqlite3.connect(self._db, timeout=5000) as conn: | ||||
|         with sqlite3.connect(self._db, timeout=10000) as conn: | ||||
|             cur = conn.cursor() | ||||
|             cur.execute( | ||||
|                 "SELECT last_modified, ts FROM threads WHERE thread=? AND chan=?", | ||||
| @ -134,7 +134,7 @@ class ChanState: | ||||
|             return False | ||||
| 
 | ||||
|     def mark_thread_as_visited(self, thread, helper, board): | ||||
|         with sqlite3.connect(self._db, timeout=5000) as conn: | ||||
|         with sqlite3.connect(self._db, timeout=10000) as conn: | ||||
|             conn.execute( | ||||
|                 "INSERT INTO threads (thread, last_modified, chan) " | ||||
|                 "VALUES (?,?,?) " | ||||
| @ -243,7 +243,7 @@ if __name__ == "__main__": | ||||
|     state = ChanState() | ||||
| 
 | ||||
|     publish_q = Queue() | ||||
|     for _ in range(5): | ||||
|     for _ in range(10): | ||||
|         publish_thread = Thread(target=publish_worker, args=(publish_q, chan_helper, proxy)) | ||||
|         publish_thread.setDaemon(True) | ||||
|         publish_thread.start() | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user