assume utf8 encoding

This commit is contained in:
simon 2019-09-10 14:02:36 -04:00
parent 06d6762d51
commit 8f8c4f23f0
7 changed files with 21 additions and 12 deletions

View File

@ -163,7 +163,7 @@ CHANS = {
( (
"jp", "drive" "jp", "drive"
), ),
rps=1 / 60 rps=1 / 120
), ),
"synch": SynchJsonChanHelper( "synch": SynchJsonChanHelper(
12, 12,

View File

@ -1,5 +1,4 @@
import datetime import datetime
import _strptime
import re import re
from urllib.parse import urljoin from urllib.parse import urljoin
@ -11,7 +10,7 @@ from chan.desuchan_html import DesuChanHtmlChanHelper
class FChanHtmlChanHelper(DesuChanHtmlChanHelper): class FChanHtmlChanHelper(DesuChanHtmlChanHelper):
def parse_threads_list(self, r): def parse_threads_list(self, r):
soup = BeautifulSoup(r.text, "html.parser") soup = BeautifulSoup(r.content.decode('utf-8', 'ignore'), "html.parser")
threads = [] threads = []

View File

@ -58,7 +58,7 @@ class ChanHelper:
@staticmethod @staticmethod
def parse_thread(r): def parse_thread(r):
soup = BeautifulSoup(r.text, "html.parser") soup = BeautifulSoup(r.content.decode('utf-8', 'ignore'), "html.parser")
op_el = soup.find("div", attrs={"class": "innerOP"}) op_el = soup.find("div", attrs={"class": "innerOP"})
yield { yield {

View File

@ -39,7 +39,7 @@ class JsonChanHelper(ChanHelper):
@staticmethod @staticmethod
def parse_threads_list(r): def parse_threads_list(r):
try: try:
j = json.loads(r.text) j = json.loads(r.content.decode('utf-8', 'ignore'))
if len(j) == 0 or "threads" not in j[0]: if len(j) == 0 or "threads" not in j[0]:
logger.warning("No threads in response for %s: %s" % (r.url, r.text,)) logger.warning("No threads in response for %s: %s" % (r.url, r.text,))
return [], None return [], None
@ -56,5 +56,10 @@ class JsonChanHelper(ChanHelper):
@staticmethod @staticmethod
def parse_thread(r): def parse_thread(r):
j = json.loads(r.text) try:
j = json.loads(r.content.decode('utf-8', 'ignore'))
except JSONDecodeError:
logger.warning("JSONDecodeError for %s:" % (r.url,))
logger.warning(r.text)
return []
return j["posts"] return j["posts"]

View File

@ -39,7 +39,7 @@ class LolNadaHtmlChanHelper(ChanHelper):
return item["time"] return item["time"]
def parse_threads_list(self, r): def parse_threads_list(self, r):
soup = BeautifulSoup(r.text, "html.parser") soup = BeautifulSoup(r.content.decode('utf-8', 'ignore'), "html.parser")
threads = [] threads = []
@ -59,7 +59,7 @@ class LolNadaHtmlChanHelper(ChanHelper):
@staticmethod @staticmethod
def parse_thread(r): def parse_thread(r):
soup = BeautifulSoup(r.text, "html.parser") soup = BeautifulSoup(r.content.decode('utf-8', 'ignore'), "html.parser")
op_el = soup.find("div", class_="hilo") op_el = soup.find("div", class_="hilo")
for post_el in op_el.find_all("div", class_="post reply"): for post_el in op_el.find_all("div", class_="post reply"):

View File

@ -42,7 +42,7 @@ class MayuriChanHelper(ChanHelper):
def parse_threads_list(self, r): def parse_threads_list(self, r):
try: try:
j = json.loads(r.text) j = json.loads(r.content.decode('utf-8', 'ignore'))
except JSONDecodeError: except JSONDecodeError:
logger.warning("JSONDecodeError for %s:" % (r.url,)) logger.warning("JSONDecodeError for %s:" % (r.url,))
logger.warning(r.text) logger.warning(r.text)
@ -54,7 +54,7 @@ class MayuriChanHelper(ChanHelper):
@staticmethod @staticmethod
def parse_thread(r): def parse_thread(r):
try: try:
j = json.loads(r.text) j = json.loads(r.content.decode('utf-8', 'ignore'))
except JSONDecodeError: except JSONDecodeError:
logger.warning("JSONDecodeError for %s:" % (r.url,)) logger.warning("JSONDecodeError for %s:" % (r.url,))
logger.warning(r.text) logger.warning(r.text)

View File

@ -19,7 +19,7 @@ class RussianJsonChanHelper(ChanHelper):
@staticmethod @staticmethod
def parse_threads_list(r): def parse_threads_list(r):
try: try:
j = json.loads(r.text) j = json.loads(r.content.decode('utf-8', 'ignore'))
except JSONDecodeError: except JSONDecodeError:
logger.warning("JSONDecodeError for %s:" % (r.url,)) logger.warning("JSONDecodeError for %s:" % (r.url,))
logger.warning(r.text) logger.warning(r.text)
@ -28,7 +28,12 @@ class RussianJsonChanHelper(ChanHelper):
@staticmethod @staticmethod
def parse_thread(r): def parse_thread(r):
j = json.loads(r.text) try:
j = json.loads(r.content.decode('utf-8', 'ignore'))
except JSONDecodeError:
logger.warning("JSONDecodeError for %s:" % (r.url,))
logger.warning(r.text)
return []
for thread in j["threads"]: for thread in j["threads"]:
for post in thread["posts"]: for post in thread["posts"]:
yield post yield post