mirror of
https://github.com/simon987/chan_feed.git
synced 2025-12-15 13:29:03 +00:00
Initial commit
This commit is contained in:
45
post_process.py
Normal file
45
post_process.py
Normal file
@@ -0,0 +1,45 @@
|
||||
import re
|
||||
|
||||
LINK_RE = re.compile(r"(https?://[\w\-_.]+\.[a-z]{2,4}([^\s<'\"]*|$))")
|
||||
|
||||
|
||||
def post_process(thing, board, helper):
|
||||
thing["v"] = 1.0
|
||||
|
||||
thing["board"] = board
|
||||
thing["chan"] = helper.db_id
|
||||
|
||||
if "com" in thing and thing["com"]:
|
||||
thing["urls"] = get_links_from_body(thing["com"])
|
||||
elif "sub" in thing and thing["sub"]:
|
||||
thing["urls"] = get_links_from_body(thing["sub"])
|
||||
if "fsize" in thing and thing["fsize"]:
|
||||
url = helper.image_url(board, thing["tim"], thing["ext"])
|
||||
if "urls" in thing:
|
||||
thing["urls"].append(url)
|
||||
else:
|
||||
thing["urls"] = [url]
|
||||
if "urls" not in thing:
|
||||
thing["urls"] = []
|
||||
|
||||
return thing
|
||||
|
||||
|
||||
def get_links_from_body(body):
|
||||
result = set()
|
||||
|
||||
body = body \
|
||||
.replace("<wbr>", "") \
|
||||
.replace("</s>", "") \
|
||||
.replace(" dot ", ".")
|
||||
|
||||
for match in LINK_RE.finditer(body):
|
||||
url = match.group(1)
|
||||
if is_external(url):
|
||||
result.add(url)
|
||||
|
||||
return list(result)
|
||||
|
||||
|
||||
def is_external(url):
|
||||
return not url.startswith(("#", "/"))
|
||||
Reference in New Issue
Block a user