Compare commits

...

3 Commits

Author SHA1 Message Date
89b21884b7 Use hash in volatile state 2020-12-20 20:15:17 -05:00
6832b9edd6 remove extraneous ttl arg 2020-12-20 19:55:52 -05:00
30c9494daa add download_file, bool volatile state 2020-12-20 19:53:38 -05:00
6 changed files with 156 additions and 15 deletions

View File

@ -2,6 +2,7 @@ import base64
import sqlite3
import redis
import orjson as json
import umsgpack
class PersistentState:
@ -19,36 +20,69 @@ class PersistentState:
class VolatileState:
"""Quick and dirty volatile dict-like redis wrapper"""
def __init__(self, prefix, ttl=3600, **redis_args):
def __init__(self, prefix, **redis_args):
self.rdb = redis.Redis(**redis_args)
self.prefix = prefix
self.ttl = ttl
def __getitem__(self, table):
return RedisTable(self, table)
class VolatileBooleanState:
"""Quick and dirty volatile dict-like redis wrapper for boolean values"""
def __init__(self, prefix, **redis_args):
self.rdb = redis.Redis(**redis_args)
self.prefix = prefix
def __getitem__(self, table):
return RedisBooleanTable(self, table)
class RedisTable:
def __init__(self, state, table):
self._state = state
self._table = table
def __setitem__(self, key, value):
self._state.rdb.set(self._state.prefix + self._table + ":" + str(key), json.dumps(value), ex=self._state.ttl)
self._state.rdb.hset(self._state.prefix + self._table, str(key), umsgpack.dumps(value))
def __getitem__(self, key):
val = self._state.rdb.get(self._state.prefix + self._table + ":" + str(key))
val = self._state.rdb.hget(self._state.prefix + self._table, str(key))
if val:
return json.loads(val)
return umsgpack.loads(val)
return None
def __delitem__(self, key):
self._state.rdb.delete(self._state.prefix + self._table + ":" + str(key))
self._state.rdb.hdel(self._state.prefix + self._table, str(key))
def __iter__(self):
for key in self._state.rdb.scan_iter(self._state.prefix + self._table + "*"):
val = self._state.rdb.get(key)
yield json.loads(val) if val else None
val = self._state.rdb.hgetall(self._state.prefix + self._table)
if val:
return ((k, umsgpack.loads(v)) for k, v in
self._state.rdb.hgetall(self._state.prefix + self._table).items())
class RedisBooleanTable:
def __init__(self, state, table):
self._state = state
self._table = table
def __setitem__(self, key, value):
if value:
self._state.rdb.sadd(self._state.prefix + self._table, str(key))
else:
self.__delitem__(key)
def __getitem__(self, key):
return self._state.rdb.sismember(self._state.prefix + self._table, str(key))
def __delitem__(self, key):
self._state.rdb.srem(self._state.prefix + self._table, str(key))
def __iter__(self):
return iter(self._state.rdb.smembers(self._state.prefix + self._table))
class Table:
@ -132,7 +166,6 @@ def _deserialize(value, col_type):
def pg_fetch_cursor_all(cur, name, batch_size=1000):
while True:
cur.execute("FETCH FORWARD %d FROM %s" % (batch_size, name))
cnt = 0

View File

@ -9,6 +9,18 @@ import siphash
last_time_called = dict()
def retry(attempts, callback=None):
def decorate(func):
retries = attempts
while retries > 0:
try:
func()
except Exception as e:
if callback:
callback(e)
return decorate
def chunks(lst: list, chunk_len: int):
for i in range(0, len(lst), chunk_len):
yield lst[i:i + chunk_len]

View File

@ -1,7 +1,11 @@
import pickle
import re
import os
from datetime import datetime
from base64 import b64encode, b64decode
from http.cookiejar import Cookie
import requests
import orjson as json
from dateutil.parser import parse
from requests.cookies import RequestsCookieJar
@ -68,3 +72,35 @@ def cookiejar_filter(cj, pattern):
if re.match(pattern, c.domain):
filtered_cj.set_cookie(c)
return filtered_cj
def download_file(url, destination, session=None, headers=None, overwrite=False, retries=1, err_cb=None,
save_meta=False):
if os.path.exists(destination) and not overwrite:
return
if session is None:
session = requests.session()
while retries > 0:
try:
r = session.get(url, stream=True, headers=headers)
with open(destination + ".part", "wb") as f:
for chunk in r.iter_content(chunk_size=4096):
if chunk:
f.write(chunk)
os.rename(destination + ".part", destination)
if save_meta:
with open(destination + ".meta", "wb") as f:
f.write(json.dumps({
"headers": dict(**r.headers),
"url": url,
"timestamp": datetime.utcnow().replace(microsecond=0).isoformat()
}))
break
except Exception as e:
if err_cb:
err_cb(e)
retries -= 1

View File

@ -2,7 +2,7 @@ from setuptools import setup
setup(
name="hexlib",
version="1.17",
version="1.19",
description="Misc utility methods",
author="simon987",
author_email="me@simon987.net",
@ -12,6 +12,7 @@ setup(
"data/*"
]},
install_requires=[
"ImageHash", "influxdb", "siphash", "python-dateutil", "redis", "orjson", "zstandard"
"ImageHash", "influxdb", "siphash", "python-dateutil", "redis", "orjson", "zstandard",
"u-msgpack-python"
]
)

View File

@ -1,5 +1,5 @@
from unittest import TestCase
from hexlib.db import VolatileState
from hexlib.db import VolatileState, VolatileBooleanState
class TestVolatileState(TestCase):
@ -8,7 +8,7 @@ class TestVolatileState(TestCase):
s = VolatileState(prefix="test1")
val = {
"field1": 1,
"arr1": [1,2,3]
"arr1": [1, 2, 3]
}
s["a"]["1"] = val
@ -23,7 +23,7 @@ class TestVolatileState(TestCase):
s["b"]["3"] = 3
s["b"]["4"] = 4
self.assertEqual(sum(s["b"]), 10)
self.assertEqual(sum(v for k,v in s["b"]), 10)
def test_int_key(self):
s = VolatileState(prefix="test2")
@ -38,3 +38,33 @@ class TestVolatileState(TestCase):
del s["c"]["1"]
self.assertIsNone(s["c"]["1"])
class TestVolatileBoolState(TestCase):
def test_get_set(self):
s = VolatileBooleanState(prefix="test1")
s["a"]["1"] = True
s["a"]["2"] = True
self.assertTrue(s["a"]["1"])
self.assertTrue(s["a"]["2"])
self.assertFalse(s["a"]["3"])
def test_iter(self):
s = VolatileBooleanState(prefix="test2")
s["b"]["1"] = True
s["b"]["2"] = True
s["b"]["3"] = True
s["b"]["4"] = True
self.assertEqual(sum(int(x) for x in s["b"]), 10)
def test_delete(self):
s = VolatileBooleanState(prefix="test3")
s["c"]["1"] = True
self.assertTrue(s["c"]["1"])
del s["c"]["1"]
self.assertFalse(s["c"]["1"])

View File

@ -0,0 +1,29 @@
from unittest import TestCase
import os
from hexlib.web import download_file
class TestDownloadFile(TestCase):
def test_download_file(self):
download_file("http://ovh.net/files/10Mb.dat", "/tmp/10Mb.dat")
self.assertTrue(os.path.exists("/tmp/10Mb.dat"))
os.remove("/tmp/10Mb.dat")
def test_download_file_error(self):
exceptions = []
def cb(ex):
exceptions.append(ex)
download_file("http://thisUrlIsInvalid", "/tmp/file.txt", err_cb=cb, retries=3)
self.assertFalse(os.path.exists("/tmp/10Mb.dat"))
self.assertEqual(len(exceptions), 3)
def test_download_file_meta(self):
download_file("http://ovh.net/files/10Mb.dat", "/tmp/10Mb.dat", save_meta=True)
self.assertTrue(os.path.exists("/tmp/10Mb.dat"))
self.assertTrue(os.path.exists("/tmp/10Mb.dat.meta"))
os.remove("/tmp/10Mb.dat")
# os.remove("/tmp/10Mb.dat.meta")