mirror of
https://github.com/simon987/parler_feed.git
synced 2025-04-18 01:36:45 +00:00
Fix postref, add approx date
This commit is contained in:
parent
a6ede3814b
commit
11901b7d6c
16
items.py
16
items.py
@ -35,11 +35,12 @@ class ParlerProfile(ParlerItem):
|
|||||||
|
|
||||||
class ParlerFollower(ParlerItem):
|
class ParlerFollower(ParlerItem):
|
||||||
|
|
||||||
def __init__(self, user_id, follower_id):
|
def __init__(self, user_id, follower_id, approx_date):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.data = {
|
self.data = {
|
||||||
"user_id": user_id,
|
"user_id": user_id,
|
||||||
"follower_id": follower_id,
|
"follower_id": follower_id,
|
||||||
|
"approx_date": approx_date,
|
||||||
}
|
}
|
||||||
|
|
||||||
def item_type(self):
|
def item_type(self):
|
||||||
@ -51,11 +52,12 @@ class ParlerFollower(ParlerItem):
|
|||||||
|
|
||||||
class ParlerFollowee(ParlerItem):
|
class ParlerFollowee(ParlerItem):
|
||||||
|
|
||||||
def __init__(self, user_id, followee_id):
|
def __init__(self, user_id, followee_id, approx_date):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.data = {
|
self.data = {
|
||||||
"user_id": user_id,
|
"user_id": user_id,
|
||||||
"followee_id": followee_id,
|
"followee_id": followee_id,
|
||||||
|
"approx_date": approx_date
|
||||||
}
|
}
|
||||||
|
|
||||||
def item_type(self):
|
def item_type(self):
|
||||||
@ -80,15 +82,19 @@ class ParlerPost(ParlerItem):
|
|||||||
|
|
||||||
class ParlerPostRef(ParlerItem):
|
class ParlerPostRef(ParlerItem):
|
||||||
|
|
||||||
def __init__(self, data):
|
def __init__(self, post_id, user_id, approx_date):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.data = data
|
self.data = {
|
||||||
|
"post_id": post_id,
|
||||||
|
"user_id": user_id,
|
||||||
|
"approx_date": approx_date
|
||||||
|
}
|
||||||
|
|
||||||
def item_type(self):
|
def item_type(self):
|
||||||
return "postref"
|
return "postref"
|
||||||
|
|
||||||
def item_id(self):
|
def item_id(self):
|
||||||
return self.data["id"]
|
return self.data["user_id"] + self.data["post_id"]
|
||||||
|
|
||||||
|
|
||||||
class ParlerUrl(ParlerItem):
|
class ParlerUrl(ParlerItem):
|
||||||
|
35
scanner.py
35
scanner.py
@ -29,24 +29,30 @@ if not MST:
|
|||||||
|
|
||||||
class SessionDebugWrapper(requests.Session):
|
class SessionDebugWrapper(requests.Session):
|
||||||
def get(self, url, **kwargs):
|
def get(self, url, **kwargs):
|
||||||
retries = 3
|
retries = 4
|
||||||
|
|
||||||
while retries > 0:
|
while retries > 0:
|
||||||
retries -= 1
|
retries -= 1
|
||||||
try:
|
try:
|
||||||
r = super().get(url, **kwargs, timeout=15)
|
r = super().get(url, **kwargs, timeout=45)
|
||||||
logger.debug(
|
logger.debug(
|
||||||
"GET %s <%d>"
|
"GET %s <%d>"
|
||||||
% (url + "?" + (urlencode(kwargs["params"]) if "params" in kwargs else ""), r.status_code)
|
% (url + "?" + (urlencode(kwargs["params"]) if "params" in kwargs else ""), r.status_code)
|
||||||
)
|
)
|
||||||
if r.status_code == 429:
|
if r.status_code == 429:
|
||||||
sleep(1)
|
sleep(15)
|
||||||
raise Exception("rate limited")
|
raise Exception("rate limited")
|
||||||
|
if r.status_code == 502:
|
||||||
|
raise Exception("Server error")
|
||||||
return r
|
return r
|
||||||
except KeyboardInterrupt as e:
|
except KeyboardInterrupt as e:
|
||||||
raise e
|
raise e
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning("Error with request %s: %s" % (url, str(e)))
|
logger.warning(
|
||||||
|
"%s: %s"
|
||||||
|
% (url + "?" + (urlencode(kwargs["params"]) if "params" in kwargs else ""), str(e))
|
||||||
|
)
|
||||||
|
sleep(10)
|
||||||
raise Exception("Gave up request after maximum number of retries")
|
raise Exception("Gave up request after maximum number of retries")
|
||||||
|
|
||||||
|
|
||||||
@ -112,7 +118,7 @@ class ParlerScanner:
|
|||||||
for items_key in items_keys:
|
for items_key in items_keys:
|
||||||
if items_key in j and j[items_key]:
|
if items_key in j and j[items_key]:
|
||||||
for item in j[items_key]:
|
for item in j[items_key]:
|
||||||
yield item, items_key
|
yield item, items_key, current_key
|
||||||
|
|
||||||
self._state.set_resume_key(resume_endpoint, resume_id, current_key)
|
self._state.set_resume_key(resume_endpoint, resume_id, current_key)
|
||||||
|
|
||||||
@ -122,27 +128,27 @@ class ParlerScanner:
|
|||||||
current_key = j["next"]
|
current_key = j["next"]
|
||||||
|
|
||||||
def user_followers(self, api, user_id):
|
def user_followers(self, api, user_id):
|
||||||
for profile, key in self._iterate_endpoint(
|
for profile, key, it_index in self._iterate_endpoint(
|
||||||
func=api.user_api.get_followers_for_user_id,
|
func=api.user_api.get_followers_for_user_id,
|
||||||
params={"id": user_id},
|
params={"id": user_id},
|
||||||
resume_endpoint="followers",
|
resume_endpoint="followers",
|
||||||
resume_id=user_id,
|
resume_id=user_id,
|
||||||
items_keys=["followers"]
|
items_keys=["followers"]
|
||||||
):
|
):
|
||||||
yield ParlerFollower(user_id=user_id, follower_id=profile["id"])
|
yield ParlerFollower(user_id=user_id, follower_id=profile["id"], approx_date=it_index)
|
||||||
|
|
||||||
def user_followees(self, api, user_id):
|
def user_followees(self, api, user_id):
|
||||||
for profile, key in self._iterate_endpoint(
|
for profile, key, it_index in self._iterate_endpoint(
|
||||||
func=api.user_api.get_following_for_user_id,
|
func=api.user_api.get_following_for_user_id,
|
||||||
params={"id": user_id},
|
params={"id": user_id},
|
||||||
resume_endpoint="followees",
|
resume_endpoint="followees",
|
||||||
resume_id=user_id,
|
resume_id=user_id,
|
||||||
items_keys=["followees"]
|
items_keys=["followees"]
|
||||||
):
|
):
|
||||||
yield ParlerFollowee(user_id=user_id, followee_id=profile["id"])
|
yield ParlerFollowee(user_id=user_id, followee_id=profile["id"], approx_date=it_index)
|
||||||
|
|
||||||
def user_posts(self, api, user_id):
|
def user_posts(self, api, user_id):
|
||||||
for item, key in self._iterate_endpoint(
|
for item, key, it_index in self._iterate_endpoint(
|
||||||
func=api.feed_api.get_users_feed,
|
func=api.feed_api.get_users_feed,
|
||||||
params={"id": user_id},
|
params={"id": user_id},
|
||||||
resume_endpoint="posts",
|
resume_endpoint="posts",
|
||||||
@ -153,12 +159,12 @@ class ParlerScanner:
|
|||||||
if key == "posts":
|
if key == "posts":
|
||||||
yield ParlerPost(data=item)
|
yield ParlerPost(data=item)
|
||||||
elif key == "postRefs":
|
elif key == "postRefs":
|
||||||
yield ParlerPostRef(data=item)
|
yield ParlerPostRef(post_id=item["_id"], user_id=user_id, approx_date=it_index)
|
||||||
elif key == "urls":
|
elif key == "urls":
|
||||||
yield ParlerUrl(data=item)
|
yield ParlerUrl(data=item)
|
||||||
|
|
||||||
def post_comments(self, api, post_id):
|
def post_comments(self, api, post_id):
|
||||||
for item, key in self._iterate_endpoint(
|
for item, key, _ in self._iterate_endpoint(
|
||||||
func=api.comments_api.get_comments,
|
func=api.comments_api.get_comments,
|
||||||
params={"id": post_id, "reverse": "true"},
|
params={"id": post_id, "reverse": "true"},
|
||||||
resume_endpoint="comments",
|
resume_endpoint="comments",
|
||||||
@ -191,6 +197,8 @@ class ParlerScanner:
|
|||||||
user_id = self._get_user_id_hash(api, int_id)
|
user_id = self._get_user_id_hash(api, int_id)
|
||||||
if user_id:
|
if user_id:
|
||||||
yield user_id, int_id
|
yield user_id, int_id
|
||||||
|
else:
|
||||||
|
self._state.mark_visited_user(int_id)
|
||||||
|
|
||||||
def process_userid(self, api, user_id, int_id):
|
def process_userid(self, api, user_id, int_id):
|
||||||
profile = self.fetch_profile(api, user_id, int_id)
|
profile = self.fetch_profile(api, user_id, int_id)
|
||||||
@ -205,8 +213,7 @@ class ParlerScanner:
|
|||||||
for post in self.user_posts(api, user_id):
|
for post in self.user_posts(api, user_id):
|
||||||
yield post
|
yield post
|
||||||
|
|
||||||
if (post.item_type() == "post" or post.item_type() == "postref") \
|
if post.item_type() == "post" and not self._state.has_visited_post(post):
|
||||||
and not self._state.has_visited_post(post):
|
|
||||||
for comment in self.post_comments(api, post.item_id()):
|
for comment in self.post_comments(api, post.item_id()):
|
||||||
yield comment
|
yield comment
|
||||||
self._state.mark_visited_post(post.item_id())
|
self._state.mark_visited_post(post.item_id())
|
||||||
|
2
util.py
2
util.py
@ -3,7 +3,7 @@ import sys
|
|||||||
from logging import StreamHandler
|
from logging import StreamHandler
|
||||||
|
|
||||||
logger = logging.getLogger("default")
|
logger = logging.getLogger("default")
|
||||||
logger.setLevel(logging.DEBUG)
|
logger.setLevel(logging.INFO)
|
||||||
|
|
||||||
formatter = logging.Formatter('%(asctime)s <%(threadName)s> %(levelname)-5s %(message)s')
|
formatter = logging.Formatter('%(asctime)s <%(threadName)s> %(levelname)-5s %(message)s')
|
||||||
for h in logger.handlers:
|
for h in logger.handlers:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user