mirror of
https://github.com/simon987/hexlib.git
synced 2025-04-21 18:46:42 +00:00
Compare commits
No commits in common. "89b21884b70ab7f1a3eb677fc53364b25468f594" and "2d74f61553ae401250e303fb17edae3d5a283498" have entirely different histories.
89b21884b7
...
2d74f61553
53
hexlib/db.py
53
hexlib/db.py
@ -2,7 +2,6 @@ import base64
|
|||||||
import sqlite3
|
import sqlite3
|
||||||
import redis
|
import redis
|
||||||
import orjson as json
|
import orjson as json
|
||||||
import umsgpack
|
|
||||||
|
|
||||||
|
|
||||||
class PersistentState:
|
class PersistentState:
|
||||||
@ -20,69 +19,36 @@ class PersistentState:
|
|||||||
|
|
||||||
class VolatileState:
|
class VolatileState:
|
||||||
"""Quick and dirty volatile dict-like redis wrapper"""
|
"""Quick and dirty volatile dict-like redis wrapper"""
|
||||||
|
def __init__(self, prefix, ttl=3600, **redis_args):
|
||||||
def __init__(self, prefix, **redis_args):
|
|
||||||
self.rdb = redis.Redis(**redis_args)
|
self.rdb = redis.Redis(**redis_args)
|
||||||
self.prefix = prefix
|
self.prefix = prefix
|
||||||
|
self.ttl = ttl
|
||||||
|
|
||||||
def __getitem__(self, table):
|
def __getitem__(self, table):
|
||||||
return RedisTable(self, table)
|
return RedisTable(self, table)
|
||||||
|
|
||||||
|
|
||||||
class VolatileBooleanState:
|
|
||||||
"""Quick and dirty volatile dict-like redis wrapper for boolean values"""
|
|
||||||
|
|
||||||
def __init__(self, prefix, **redis_args):
|
|
||||||
self.rdb = redis.Redis(**redis_args)
|
|
||||||
self.prefix = prefix
|
|
||||||
|
|
||||||
def __getitem__(self, table):
|
|
||||||
return RedisBooleanTable(self, table)
|
|
||||||
|
|
||||||
|
|
||||||
class RedisTable:
|
class RedisTable:
|
||||||
def __init__(self, state, table):
|
def __init__(self, state, table):
|
||||||
self._state = state
|
self._state = state
|
||||||
self._table = table
|
self._table = table
|
||||||
|
|
||||||
def __setitem__(self, key, value):
|
def __setitem__(self, key, value):
|
||||||
self._state.rdb.hset(self._state.prefix + self._table, str(key), umsgpack.dumps(value))
|
self._state.rdb.set(self._state.prefix + self._table + ":" + str(key), json.dumps(value), ex=self._state.ttl)
|
||||||
|
|
||||||
def __getitem__(self, key):
|
def __getitem__(self, key):
|
||||||
val = self._state.rdb.hget(self._state.prefix + self._table, str(key))
|
val = self._state.rdb.get(self._state.prefix + self._table + ":" + str(key))
|
||||||
if val:
|
if val:
|
||||||
return umsgpack.loads(val)
|
return json.loads(val)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def __delitem__(self, key):
|
def __delitem__(self, key):
|
||||||
self._state.rdb.hdel(self._state.prefix + self._table, str(key))
|
self._state.rdb.delete(self._state.prefix + self._table + ":" + str(key))
|
||||||
|
|
||||||
def __iter__(self):
|
def __iter__(self):
|
||||||
val = self._state.rdb.hgetall(self._state.prefix + self._table)
|
for key in self._state.rdb.scan_iter(self._state.prefix + self._table + "*"):
|
||||||
if val:
|
val = self._state.rdb.get(key)
|
||||||
return ((k, umsgpack.loads(v)) for k, v in
|
yield json.loads(val) if val else None
|
||||||
self._state.rdb.hgetall(self._state.prefix + self._table).items())
|
|
||||||
|
|
||||||
|
|
||||||
class RedisBooleanTable:
|
|
||||||
def __init__(self, state, table):
|
|
||||||
self._state = state
|
|
||||||
self._table = table
|
|
||||||
|
|
||||||
def __setitem__(self, key, value):
|
|
||||||
if value:
|
|
||||||
self._state.rdb.sadd(self._state.prefix + self._table, str(key))
|
|
||||||
else:
|
|
||||||
self.__delitem__(key)
|
|
||||||
|
|
||||||
def __getitem__(self, key):
|
|
||||||
return self._state.rdb.sismember(self._state.prefix + self._table, str(key))
|
|
||||||
|
|
||||||
def __delitem__(self, key):
|
|
||||||
self._state.rdb.srem(self._state.prefix + self._table, str(key))
|
|
||||||
|
|
||||||
def __iter__(self):
|
|
||||||
return iter(self._state.rdb.smembers(self._state.prefix + self._table))
|
|
||||||
|
|
||||||
|
|
||||||
class Table:
|
class Table:
|
||||||
@ -166,6 +132,7 @@ def _deserialize(value, col_type):
|
|||||||
|
|
||||||
|
|
||||||
def pg_fetch_cursor_all(cur, name, batch_size=1000):
|
def pg_fetch_cursor_all(cur, name, batch_size=1000):
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
cur.execute("FETCH FORWARD %d FROM %s" % (batch_size, name))
|
cur.execute("FETCH FORWARD %d FROM %s" % (batch_size, name))
|
||||||
cnt = 0
|
cnt = 0
|
||||||
|
@ -9,18 +9,6 @@ import siphash
|
|||||||
last_time_called = dict()
|
last_time_called = dict()
|
||||||
|
|
||||||
|
|
||||||
def retry(attempts, callback=None):
|
|
||||||
def decorate(func):
|
|
||||||
retries = attempts
|
|
||||||
while retries > 0:
|
|
||||||
try:
|
|
||||||
func()
|
|
||||||
except Exception as e:
|
|
||||||
if callback:
|
|
||||||
callback(e)
|
|
||||||
return decorate
|
|
||||||
|
|
||||||
|
|
||||||
def chunks(lst: list, chunk_len: int):
|
def chunks(lst: list, chunk_len: int):
|
||||||
for i in range(0, len(lst), chunk_len):
|
for i in range(0, len(lst), chunk_len):
|
||||||
yield lst[i:i + chunk_len]
|
yield lst[i:i + chunk_len]
|
||||||
|
@ -1,11 +1,7 @@
|
|||||||
import pickle
|
import pickle
|
||||||
import re
|
import re
|
||||||
import os
|
|
||||||
from datetime import datetime
|
|
||||||
from base64 import b64encode, b64decode
|
from base64 import b64encode, b64decode
|
||||||
from http.cookiejar import Cookie
|
from http.cookiejar import Cookie
|
||||||
import requests
|
|
||||||
import orjson as json
|
|
||||||
|
|
||||||
from dateutil.parser import parse
|
from dateutil.parser import parse
|
||||||
from requests.cookies import RequestsCookieJar
|
from requests.cookies import RequestsCookieJar
|
||||||
@ -72,35 +68,3 @@ def cookiejar_filter(cj, pattern):
|
|||||||
if re.match(pattern, c.domain):
|
if re.match(pattern, c.domain):
|
||||||
filtered_cj.set_cookie(c)
|
filtered_cj.set_cookie(c)
|
||||||
return filtered_cj
|
return filtered_cj
|
||||||
|
|
||||||
|
|
||||||
def download_file(url, destination, session=None, headers=None, overwrite=False, retries=1, err_cb=None,
|
|
||||||
save_meta=False):
|
|
||||||
if os.path.exists(destination) and not overwrite:
|
|
||||||
return
|
|
||||||
|
|
||||||
if session is None:
|
|
||||||
session = requests.session()
|
|
||||||
|
|
||||||
while retries > 0:
|
|
||||||
try:
|
|
||||||
r = session.get(url, stream=True, headers=headers)
|
|
||||||
|
|
||||||
with open(destination + ".part", "wb") as f:
|
|
||||||
for chunk in r.iter_content(chunk_size=4096):
|
|
||||||
if chunk:
|
|
||||||
f.write(chunk)
|
|
||||||
os.rename(destination + ".part", destination)
|
|
||||||
|
|
||||||
if save_meta:
|
|
||||||
with open(destination + ".meta", "wb") as f:
|
|
||||||
f.write(json.dumps({
|
|
||||||
"headers": dict(**r.headers),
|
|
||||||
"url": url,
|
|
||||||
"timestamp": datetime.utcnow().replace(microsecond=0).isoformat()
|
|
||||||
}))
|
|
||||||
break
|
|
||||||
except Exception as e:
|
|
||||||
if err_cb:
|
|
||||||
err_cb(e)
|
|
||||||
retries -= 1
|
|
||||||
|
5
setup.py
5
setup.py
@ -2,7 +2,7 @@ from setuptools import setup
|
|||||||
|
|
||||||
setup(
|
setup(
|
||||||
name="hexlib",
|
name="hexlib",
|
||||||
version="1.19",
|
version="1.17",
|
||||||
description="Misc utility methods",
|
description="Misc utility methods",
|
||||||
author="simon987",
|
author="simon987",
|
||||||
author_email="me@simon987.net",
|
author_email="me@simon987.net",
|
||||||
@ -12,7 +12,6 @@ setup(
|
|||||||
"data/*"
|
"data/*"
|
||||||
]},
|
]},
|
||||||
install_requires=[
|
install_requires=[
|
||||||
"ImageHash", "influxdb", "siphash", "python-dateutil", "redis", "orjson", "zstandard",
|
"ImageHash", "influxdb", "siphash", "python-dateutil", "redis", "orjson", "zstandard"
|
||||||
"u-msgpack-python"
|
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
from unittest import TestCase
|
from unittest import TestCase
|
||||||
from hexlib.db import VolatileState, VolatileBooleanState
|
from hexlib.db import VolatileState
|
||||||
|
|
||||||
|
|
||||||
class TestVolatileState(TestCase):
|
class TestVolatileState(TestCase):
|
||||||
@ -23,7 +23,7 @@ class TestVolatileState(TestCase):
|
|||||||
s["b"]["3"] = 3
|
s["b"]["3"] = 3
|
||||||
s["b"]["4"] = 4
|
s["b"]["4"] = 4
|
||||||
|
|
||||||
self.assertEqual(sum(v for k,v in s["b"]), 10)
|
self.assertEqual(sum(s["b"]), 10)
|
||||||
|
|
||||||
def test_int_key(self):
|
def test_int_key(self):
|
||||||
s = VolatileState(prefix="test2")
|
s = VolatileState(prefix="test2")
|
||||||
@ -38,33 +38,3 @@ class TestVolatileState(TestCase):
|
|||||||
del s["c"]["1"]
|
del s["c"]["1"]
|
||||||
self.assertIsNone(s["c"]["1"])
|
self.assertIsNone(s["c"]["1"])
|
||||||
|
|
||||||
|
|
||||||
class TestVolatileBoolState(TestCase):
|
|
||||||
|
|
||||||
def test_get_set(self):
|
|
||||||
s = VolatileBooleanState(prefix="test1")
|
|
||||||
|
|
||||||
s["a"]["1"] = True
|
|
||||||
s["a"]["2"] = True
|
|
||||||
|
|
||||||
self.assertTrue(s["a"]["1"])
|
|
||||||
self.assertTrue(s["a"]["2"])
|
|
||||||
self.assertFalse(s["a"]["3"])
|
|
||||||
|
|
||||||
def test_iter(self):
|
|
||||||
s = VolatileBooleanState(prefix="test2")
|
|
||||||
|
|
||||||
s["b"]["1"] = True
|
|
||||||
s["b"]["2"] = True
|
|
||||||
s["b"]["3"] = True
|
|
||||||
s["b"]["4"] = True
|
|
||||||
|
|
||||||
self.assertEqual(sum(int(x) for x in s["b"]), 10)
|
|
||||||
|
|
||||||
def test_delete(self):
|
|
||||||
s = VolatileBooleanState(prefix="test3")
|
|
||||||
|
|
||||||
s["c"]["1"] = True
|
|
||||||
self.assertTrue(s["c"]["1"])
|
|
||||||
del s["c"]["1"]
|
|
||||||
self.assertFalse(s["c"]["1"])
|
|
||||||
|
@ -1,29 +0,0 @@
|
|||||||
from unittest import TestCase
|
|
||||||
import os
|
|
||||||
|
|
||||||
from hexlib.web import download_file
|
|
||||||
|
|
||||||
|
|
||||||
class TestDownloadFile(TestCase):
|
|
||||||
|
|
||||||
def test_download_file(self):
|
|
||||||
download_file("http://ovh.net/files/10Mb.dat", "/tmp/10Mb.dat")
|
|
||||||
self.assertTrue(os.path.exists("/tmp/10Mb.dat"))
|
|
||||||
os.remove("/tmp/10Mb.dat")
|
|
||||||
|
|
||||||
def test_download_file_error(self):
|
|
||||||
exceptions = []
|
|
||||||
|
|
||||||
def cb(ex):
|
|
||||||
exceptions.append(ex)
|
|
||||||
|
|
||||||
download_file("http://thisUrlIsInvalid", "/tmp/file.txt", err_cb=cb, retries=3)
|
|
||||||
self.assertFalse(os.path.exists("/tmp/10Mb.dat"))
|
|
||||||
self.assertEqual(len(exceptions), 3)
|
|
||||||
|
|
||||||
def test_download_file_meta(self):
|
|
||||||
download_file("http://ovh.net/files/10Mb.dat", "/tmp/10Mb.dat", save_meta=True)
|
|
||||||
self.assertTrue(os.path.exists("/tmp/10Mb.dat"))
|
|
||||||
self.assertTrue(os.path.exists("/tmp/10Mb.dat.meta"))
|
|
||||||
os.remove("/tmp/10Mb.dat")
|
|
||||||
# os.remove("/tmp/10Mb.dat.meta")
|
|
Loading…
x
Reference in New Issue
Block a user