mirror of
https://github.com/simon987/hexlib.git
synced 2025-04-10 14:06:43 +00:00
Compare commits
3 Commits
2d74f61553
...
89b21884b7
Author | SHA1 | Date | |
---|---|---|---|
89b21884b7 | |||
6832b9edd6 | |||
30c9494daa |
53
hexlib/db.py
53
hexlib/db.py
@ -2,6 +2,7 @@ import base64
|
|||||||
import sqlite3
|
import sqlite3
|
||||||
import redis
|
import redis
|
||||||
import orjson as json
|
import orjson as json
|
||||||
|
import umsgpack
|
||||||
|
|
||||||
|
|
||||||
class PersistentState:
|
class PersistentState:
|
||||||
@ -19,36 +20,69 @@ class PersistentState:
|
|||||||
|
|
||||||
class VolatileState:
|
class VolatileState:
|
||||||
"""Quick and dirty volatile dict-like redis wrapper"""
|
"""Quick and dirty volatile dict-like redis wrapper"""
|
||||||
def __init__(self, prefix, ttl=3600, **redis_args):
|
|
||||||
|
def __init__(self, prefix, **redis_args):
|
||||||
self.rdb = redis.Redis(**redis_args)
|
self.rdb = redis.Redis(**redis_args)
|
||||||
self.prefix = prefix
|
self.prefix = prefix
|
||||||
self.ttl = ttl
|
|
||||||
|
|
||||||
def __getitem__(self, table):
|
def __getitem__(self, table):
|
||||||
return RedisTable(self, table)
|
return RedisTable(self, table)
|
||||||
|
|
||||||
|
|
||||||
|
class VolatileBooleanState:
|
||||||
|
"""Quick and dirty volatile dict-like redis wrapper for boolean values"""
|
||||||
|
|
||||||
|
def __init__(self, prefix, **redis_args):
|
||||||
|
self.rdb = redis.Redis(**redis_args)
|
||||||
|
self.prefix = prefix
|
||||||
|
|
||||||
|
def __getitem__(self, table):
|
||||||
|
return RedisBooleanTable(self, table)
|
||||||
|
|
||||||
|
|
||||||
class RedisTable:
|
class RedisTable:
|
||||||
def __init__(self, state, table):
|
def __init__(self, state, table):
|
||||||
self._state = state
|
self._state = state
|
||||||
self._table = table
|
self._table = table
|
||||||
|
|
||||||
def __setitem__(self, key, value):
|
def __setitem__(self, key, value):
|
||||||
self._state.rdb.set(self._state.prefix + self._table + ":" + str(key), json.dumps(value), ex=self._state.ttl)
|
self._state.rdb.hset(self._state.prefix + self._table, str(key), umsgpack.dumps(value))
|
||||||
|
|
||||||
def __getitem__(self, key):
|
def __getitem__(self, key):
|
||||||
val = self._state.rdb.get(self._state.prefix + self._table + ":" + str(key))
|
val = self._state.rdb.hget(self._state.prefix + self._table, str(key))
|
||||||
if val:
|
if val:
|
||||||
return json.loads(val)
|
return umsgpack.loads(val)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def __delitem__(self, key):
|
def __delitem__(self, key):
|
||||||
self._state.rdb.delete(self._state.prefix + self._table + ":" + str(key))
|
self._state.rdb.hdel(self._state.prefix + self._table, str(key))
|
||||||
|
|
||||||
def __iter__(self):
|
def __iter__(self):
|
||||||
for key in self._state.rdb.scan_iter(self._state.prefix + self._table + "*"):
|
val = self._state.rdb.hgetall(self._state.prefix + self._table)
|
||||||
val = self._state.rdb.get(key)
|
if val:
|
||||||
yield json.loads(val) if val else None
|
return ((k, umsgpack.loads(v)) for k, v in
|
||||||
|
self._state.rdb.hgetall(self._state.prefix + self._table).items())
|
||||||
|
|
||||||
|
|
||||||
|
class RedisBooleanTable:
|
||||||
|
def __init__(self, state, table):
|
||||||
|
self._state = state
|
||||||
|
self._table = table
|
||||||
|
|
||||||
|
def __setitem__(self, key, value):
|
||||||
|
if value:
|
||||||
|
self._state.rdb.sadd(self._state.prefix + self._table, str(key))
|
||||||
|
else:
|
||||||
|
self.__delitem__(key)
|
||||||
|
|
||||||
|
def __getitem__(self, key):
|
||||||
|
return self._state.rdb.sismember(self._state.prefix + self._table, str(key))
|
||||||
|
|
||||||
|
def __delitem__(self, key):
|
||||||
|
self._state.rdb.srem(self._state.prefix + self._table, str(key))
|
||||||
|
|
||||||
|
def __iter__(self):
|
||||||
|
return iter(self._state.rdb.smembers(self._state.prefix + self._table))
|
||||||
|
|
||||||
|
|
||||||
class Table:
|
class Table:
|
||||||
@ -132,7 +166,6 @@ def _deserialize(value, col_type):
|
|||||||
|
|
||||||
|
|
||||||
def pg_fetch_cursor_all(cur, name, batch_size=1000):
|
def pg_fetch_cursor_all(cur, name, batch_size=1000):
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
cur.execute("FETCH FORWARD %d FROM %s" % (batch_size, name))
|
cur.execute("FETCH FORWARD %d FROM %s" % (batch_size, name))
|
||||||
cnt = 0
|
cnt = 0
|
||||||
|
@ -9,6 +9,18 @@ import siphash
|
|||||||
last_time_called = dict()
|
last_time_called = dict()
|
||||||
|
|
||||||
|
|
||||||
|
def retry(attempts, callback=None):
|
||||||
|
def decorate(func):
|
||||||
|
retries = attempts
|
||||||
|
while retries > 0:
|
||||||
|
try:
|
||||||
|
func()
|
||||||
|
except Exception as e:
|
||||||
|
if callback:
|
||||||
|
callback(e)
|
||||||
|
return decorate
|
||||||
|
|
||||||
|
|
||||||
def chunks(lst: list, chunk_len: int):
|
def chunks(lst: list, chunk_len: int):
|
||||||
for i in range(0, len(lst), chunk_len):
|
for i in range(0, len(lst), chunk_len):
|
||||||
yield lst[i:i + chunk_len]
|
yield lst[i:i + chunk_len]
|
||||||
|
@ -1,7 +1,11 @@
|
|||||||
import pickle
|
import pickle
|
||||||
import re
|
import re
|
||||||
|
import os
|
||||||
|
from datetime import datetime
|
||||||
from base64 import b64encode, b64decode
|
from base64 import b64encode, b64decode
|
||||||
from http.cookiejar import Cookie
|
from http.cookiejar import Cookie
|
||||||
|
import requests
|
||||||
|
import orjson as json
|
||||||
|
|
||||||
from dateutil.parser import parse
|
from dateutil.parser import parse
|
||||||
from requests.cookies import RequestsCookieJar
|
from requests.cookies import RequestsCookieJar
|
||||||
@ -68,3 +72,35 @@ def cookiejar_filter(cj, pattern):
|
|||||||
if re.match(pattern, c.domain):
|
if re.match(pattern, c.domain):
|
||||||
filtered_cj.set_cookie(c)
|
filtered_cj.set_cookie(c)
|
||||||
return filtered_cj
|
return filtered_cj
|
||||||
|
|
||||||
|
|
||||||
|
def download_file(url, destination, session=None, headers=None, overwrite=False, retries=1, err_cb=None,
|
||||||
|
save_meta=False):
|
||||||
|
if os.path.exists(destination) and not overwrite:
|
||||||
|
return
|
||||||
|
|
||||||
|
if session is None:
|
||||||
|
session = requests.session()
|
||||||
|
|
||||||
|
while retries > 0:
|
||||||
|
try:
|
||||||
|
r = session.get(url, stream=True, headers=headers)
|
||||||
|
|
||||||
|
with open(destination + ".part", "wb") as f:
|
||||||
|
for chunk in r.iter_content(chunk_size=4096):
|
||||||
|
if chunk:
|
||||||
|
f.write(chunk)
|
||||||
|
os.rename(destination + ".part", destination)
|
||||||
|
|
||||||
|
if save_meta:
|
||||||
|
with open(destination + ".meta", "wb") as f:
|
||||||
|
f.write(json.dumps({
|
||||||
|
"headers": dict(**r.headers),
|
||||||
|
"url": url,
|
||||||
|
"timestamp": datetime.utcnow().replace(microsecond=0).isoformat()
|
||||||
|
}))
|
||||||
|
break
|
||||||
|
except Exception as e:
|
||||||
|
if err_cb:
|
||||||
|
err_cb(e)
|
||||||
|
retries -= 1
|
||||||
|
5
setup.py
5
setup.py
@ -2,7 +2,7 @@ from setuptools import setup
|
|||||||
|
|
||||||
setup(
|
setup(
|
||||||
name="hexlib",
|
name="hexlib",
|
||||||
version="1.17",
|
version="1.19",
|
||||||
description="Misc utility methods",
|
description="Misc utility methods",
|
||||||
author="simon987",
|
author="simon987",
|
||||||
author_email="me@simon987.net",
|
author_email="me@simon987.net",
|
||||||
@ -12,6 +12,7 @@ setup(
|
|||||||
"data/*"
|
"data/*"
|
||||||
]},
|
]},
|
||||||
install_requires=[
|
install_requires=[
|
||||||
"ImageHash", "influxdb", "siphash", "python-dateutil", "redis", "orjson", "zstandard"
|
"ImageHash", "influxdb", "siphash", "python-dateutil", "redis", "orjson", "zstandard",
|
||||||
|
"u-msgpack-python"
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
from unittest import TestCase
|
from unittest import TestCase
|
||||||
from hexlib.db import VolatileState
|
from hexlib.db import VolatileState, VolatileBooleanState
|
||||||
|
|
||||||
|
|
||||||
class TestVolatileState(TestCase):
|
class TestVolatileState(TestCase):
|
||||||
@ -8,7 +8,7 @@ class TestVolatileState(TestCase):
|
|||||||
s = VolatileState(prefix="test1")
|
s = VolatileState(prefix="test1")
|
||||||
val = {
|
val = {
|
||||||
"field1": 1,
|
"field1": 1,
|
||||||
"arr1": [1,2,3]
|
"arr1": [1, 2, 3]
|
||||||
}
|
}
|
||||||
|
|
||||||
s["a"]["1"] = val
|
s["a"]["1"] = val
|
||||||
@ -23,7 +23,7 @@ class TestVolatileState(TestCase):
|
|||||||
s["b"]["3"] = 3
|
s["b"]["3"] = 3
|
||||||
s["b"]["4"] = 4
|
s["b"]["4"] = 4
|
||||||
|
|
||||||
self.assertEqual(sum(s["b"]), 10)
|
self.assertEqual(sum(v for k,v in s["b"]), 10)
|
||||||
|
|
||||||
def test_int_key(self):
|
def test_int_key(self):
|
||||||
s = VolatileState(prefix="test2")
|
s = VolatileState(prefix="test2")
|
||||||
@ -38,3 +38,33 @@ class TestVolatileState(TestCase):
|
|||||||
del s["c"]["1"]
|
del s["c"]["1"]
|
||||||
self.assertIsNone(s["c"]["1"])
|
self.assertIsNone(s["c"]["1"])
|
||||||
|
|
||||||
|
|
||||||
|
class TestVolatileBoolState(TestCase):
|
||||||
|
|
||||||
|
def test_get_set(self):
|
||||||
|
s = VolatileBooleanState(prefix="test1")
|
||||||
|
|
||||||
|
s["a"]["1"] = True
|
||||||
|
s["a"]["2"] = True
|
||||||
|
|
||||||
|
self.assertTrue(s["a"]["1"])
|
||||||
|
self.assertTrue(s["a"]["2"])
|
||||||
|
self.assertFalse(s["a"]["3"])
|
||||||
|
|
||||||
|
def test_iter(self):
|
||||||
|
s = VolatileBooleanState(prefix="test2")
|
||||||
|
|
||||||
|
s["b"]["1"] = True
|
||||||
|
s["b"]["2"] = True
|
||||||
|
s["b"]["3"] = True
|
||||||
|
s["b"]["4"] = True
|
||||||
|
|
||||||
|
self.assertEqual(sum(int(x) for x in s["b"]), 10)
|
||||||
|
|
||||||
|
def test_delete(self):
|
||||||
|
s = VolatileBooleanState(prefix="test3")
|
||||||
|
|
||||||
|
s["c"]["1"] = True
|
||||||
|
self.assertTrue(s["c"]["1"])
|
||||||
|
del s["c"]["1"]
|
||||||
|
self.assertFalse(s["c"]["1"])
|
||||||
|
29
test/test_download_file.py
Normal file
29
test/test_download_file.py
Normal file
@ -0,0 +1,29 @@
|
|||||||
|
from unittest import TestCase
|
||||||
|
import os
|
||||||
|
|
||||||
|
from hexlib.web import download_file
|
||||||
|
|
||||||
|
|
||||||
|
class TestDownloadFile(TestCase):
|
||||||
|
|
||||||
|
def test_download_file(self):
|
||||||
|
download_file("http://ovh.net/files/10Mb.dat", "/tmp/10Mb.dat")
|
||||||
|
self.assertTrue(os.path.exists("/tmp/10Mb.dat"))
|
||||||
|
os.remove("/tmp/10Mb.dat")
|
||||||
|
|
||||||
|
def test_download_file_error(self):
|
||||||
|
exceptions = []
|
||||||
|
|
||||||
|
def cb(ex):
|
||||||
|
exceptions.append(ex)
|
||||||
|
|
||||||
|
download_file("http://thisUrlIsInvalid", "/tmp/file.txt", err_cb=cb, retries=3)
|
||||||
|
self.assertFalse(os.path.exists("/tmp/10Mb.dat"))
|
||||||
|
self.assertEqual(len(exceptions), 3)
|
||||||
|
|
||||||
|
def test_download_file_meta(self):
|
||||||
|
download_file("http://ovh.net/files/10Mb.dat", "/tmp/10Mb.dat", save_meta=True)
|
||||||
|
self.assertTrue(os.path.exists("/tmp/10Mb.dat"))
|
||||||
|
self.assertTrue(os.path.exists("/tmp/10Mb.dat.meta"))
|
||||||
|
os.remove("/tmp/10Mb.dat")
|
||||||
|
# os.remove("/tmp/10Mb.dat.meta")
|
Loading…
x
Reference in New Issue
Block a user