assume utf8 encoding

This commit is contained in:
simon 2019-09-10 14:02:36 -04:00
parent 06d6762d51
commit 8f8c4f23f0
7 changed files with 21 additions and 12 deletions

View File

@ -163,7 +163,7 @@ CHANS = {
(
"jp", "drive"
),
rps=1 / 60
rps=1 / 120
),
"synch": SynchJsonChanHelper(
12,

View File

@ -1,5 +1,4 @@
import datetime
import _strptime
import re
from urllib.parse import urljoin
@ -11,7 +10,7 @@ from chan.desuchan_html import DesuChanHtmlChanHelper
class FChanHtmlChanHelper(DesuChanHtmlChanHelper):
def parse_threads_list(self, r):
soup = BeautifulSoup(r.text, "html.parser")
soup = BeautifulSoup(r.content.decode('utf-8', 'ignore'), "html.parser")
threads = []

View File

@ -58,7 +58,7 @@ class ChanHelper:
@staticmethod
def parse_thread(r):
soup = BeautifulSoup(r.text, "html.parser")
soup = BeautifulSoup(r.content.decode('utf-8', 'ignore'), "html.parser")
op_el = soup.find("div", attrs={"class": "innerOP"})
yield {

View File

@ -39,7 +39,7 @@ class JsonChanHelper(ChanHelper):
@staticmethod
def parse_threads_list(r):
try:
j = json.loads(r.text)
j = json.loads(r.content.decode('utf-8', 'ignore'))
if len(j) == 0 or "threads" not in j[0]:
logger.warning("No threads in response for %s: %s" % (r.url, r.text,))
return [], None
@ -56,5 +56,10 @@ class JsonChanHelper(ChanHelper):
@staticmethod
def parse_thread(r):
j = json.loads(r.text)
try:
j = json.loads(r.content.decode('utf-8', 'ignore'))
except JSONDecodeError:
logger.warning("JSONDecodeError for %s:" % (r.url,))
logger.warning(r.text)
return []
return j["posts"]

View File

@ -39,7 +39,7 @@ class LolNadaHtmlChanHelper(ChanHelper):
return item["time"]
def parse_threads_list(self, r):
soup = BeautifulSoup(r.text, "html.parser")
soup = BeautifulSoup(r.content.decode('utf-8', 'ignore'), "html.parser")
threads = []
@ -59,7 +59,7 @@ class LolNadaHtmlChanHelper(ChanHelper):
@staticmethod
def parse_thread(r):
soup = BeautifulSoup(r.text, "html.parser")
soup = BeautifulSoup(r.content.decode('utf-8', 'ignore'), "html.parser")
op_el = soup.find("div", class_="hilo")
for post_el in op_el.find_all("div", class_="post reply"):

View File

@ -42,7 +42,7 @@ class MayuriChanHelper(ChanHelper):
def parse_threads_list(self, r):
try:
j = json.loads(r.text)
j = json.loads(r.content.decode('utf-8', 'ignore'))
except JSONDecodeError:
logger.warning("JSONDecodeError for %s:" % (r.url,))
logger.warning(r.text)
@ -54,7 +54,7 @@ class MayuriChanHelper(ChanHelper):
@staticmethod
def parse_thread(r):
try:
j = json.loads(r.text)
j = json.loads(r.content.decode('utf-8', 'ignore'))
except JSONDecodeError:
logger.warning("JSONDecodeError for %s:" % (r.url,))
logger.warning(r.text)

View File

@ -19,7 +19,7 @@ class RussianJsonChanHelper(ChanHelper):
@staticmethod
def parse_threads_list(r):
try:
j = json.loads(r.text)
j = json.loads(r.content.decode('utf-8', 'ignore'))
except JSONDecodeError:
logger.warning("JSONDecodeError for %s:" % (r.url,))
logger.warning(r.text)
@ -28,7 +28,12 @@ class RussianJsonChanHelper(ChanHelper):
@staticmethod
def parse_thread(r):
j = json.loads(r.text)
try:
j = json.loads(r.content.decode('utf-8', 'ignore'))
except JSONDecodeError:
logger.warning("JSONDecodeError for %s:" % (r.url,))
logger.warning(r.text)
return []
for thread in j["threads"]:
for post in thread["posts"]:
yield post