Use redis instead of rabbitmq

This commit is contained in:
simon987 2020-04-10 08:10:43 -04:00
parent fcb2ce8b68
commit 04c4649af9
3 changed files with 17 additions and 33 deletions

View File

@ -1,5 +1,5 @@
praw praw
pika redis
influxdb influxdb
psaw psaw
git+git://github.com/simon987/hexlib.git git+git://github.com/simon987/hexlib.git

View File

@ -9,6 +9,7 @@ from run import publish, logger
if len(sys.argv) != 3: if len(sys.argv) != 3:
print("Usage: ./retropublish.py post|comment subreddit") print("Usage: ./retropublish.py post|comment subreddit")
quit(0)
item_type = sys.argv[1] item_type = sys.argv[1]
subreddit = sys.argv[2] subreddit = sys.argv[2]
@ -23,6 +24,5 @@ else:
for item in gen: for item in gen:
try: try:
publish(item) publish(item)
time.sleep(0.2)
except Exception as e: except Exception as e:
logger.error(str(e) + ": " + traceback.format_exc()) logger.error(str(e) + ": " + traceback.format_exc())

46
run.py
View File

@ -12,8 +12,8 @@ from datetime import datetime, timedelta
from itertools import islice from itertools import islice
from logging import FileHandler, StreamHandler from logging import FileHandler, StreamHandler
from queue import Queue from queue import Queue
import redis
import pika
import praw import praw
from hexlib.misc import buffered from hexlib.misc import buffered
from hexlib.monitoring import Monitoring from hexlib.monitoring import Monitoring
@ -34,16 +34,14 @@ logger.setLevel(logging.DEBUG)
def connect(): def connect():
global reddit_channel global r
rabbit = pika.BlockingConnection(pika.ConnectionParameters(host="localhost")) r = redis.Redis(host='localhost', port=6379, db=0)
reddit_channel = rabbit.channel()
reddit_channel.exchange_declare(exchange="reddit", exchange_type="topic")
connect() connect()
REALTIME_DELAY = timedelta(seconds=60) REALTIME_DELAY = timedelta(seconds=60)
MONITORING = True MONITORING = False
if MONITORING: if MONITORING:
monitoring = Monitoring("reddit_feed", logger=logger, batch_size=50, flush_on_exit=True) monitoring = Monitoring("reddit_feed", logger=logger, batch_size=50, flush_on_exit=True)
@ -105,36 +103,22 @@ def serialize(thing):
} }
@buffered(batch_size=5000, flush_on_exit=True)
def _publish_buffered(items):
buckets = defaultdict(list)
for item in items:
buckets[item[0]].append(item)
for bucket in buckets.values():
routing_key, _ = bucket[0]
body = [item[1] for item in bucket]
while True:
try:
reddit_channel.basic_publish(
exchange='reddit',
routing_key=routing_key,
body=json.dumps(body, separators=(',', ':'), ensure_ascii=False, sort_keys=True)
)
logger.debug("RabbitMQ: published %d items (%s)" % (len(body), routing_key))
break
except Exception as e:
logger.error(str(e))
time.sleep(0.5)
def publish(thing): def publish(thing):
thing_type = type(thing).__name__.lower() thing_type = type(thing).__name__.lower()
j = serialize(thing) j = serialize(thing)
post_process(j) post_process(j)
routing_key = "%s.%s" % (thing_type, str(thing.subreddit).lower())
_publish_buffered([("%s.%s" % (thing_type, str(thing.subreddit).lower()), j)]) while True:
try:
r.rpush(
"q.reddit." + routing_key,
json.dumps(j, separators=(',', ':'), ensure_ascii=False, sort_keys=True)
)
break
except Exception as e:
logger.error(str(e))
time.sleep(0.5)
def publish_worker(q: Queue): def publish_worker(q: Queue):