mirror of
https://github.com/simon987/hexlib.git
synced 2025-04-10 14:06:43 +00:00
add download_file, bool volatile state
This commit is contained in:
parent
2d74f61553
commit
30c9494daa
35
hexlib/db.py
35
hexlib/db.py
@ -19,6 +19,7 @@ class PersistentState:
|
|||||||
|
|
||||||
class VolatileState:
|
class VolatileState:
|
||||||
"""Quick and dirty volatile dict-like redis wrapper"""
|
"""Quick and dirty volatile dict-like redis wrapper"""
|
||||||
|
|
||||||
def __init__(self, prefix, ttl=3600, **redis_args):
|
def __init__(self, prefix, ttl=3600, **redis_args):
|
||||||
self.rdb = redis.Redis(**redis_args)
|
self.rdb = redis.Redis(**redis_args)
|
||||||
self.prefix = prefix
|
self.prefix = prefix
|
||||||
@ -28,6 +29,18 @@ class VolatileState:
|
|||||||
return RedisTable(self, table)
|
return RedisTable(self, table)
|
||||||
|
|
||||||
|
|
||||||
|
class VolatileBooleanState:
|
||||||
|
"""Quick and dirty volatile dict-like redis wrapper for boolean values"""
|
||||||
|
|
||||||
|
def __init__(self, prefix, ttl=3600, **redis_args):
|
||||||
|
self.rdb = redis.Redis(**redis_args)
|
||||||
|
self.prefix = prefix
|
||||||
|
self.ttl = ttl
|
||||||
|
|
||||||
|
def __getitem__(self, table):
|
||||||
|
return RedisBooleanTable(self, table)
|
||||||
|
|
||||||
|
|
||||||
class RedisTable:
|
class RedisTable:
|
||||||
def __init__(self, state, table):
|
def __init__(self, state, table):
|
||||||
self._state = state
|
self._state = state
|
||||||
@ -51,6 +64,27 @@ class RedisTable:
|
|||||||
yield json.loads(val) if val else None
|
yield json.loads(val) if val else None
|
||||||
|
|
||||||
|
|
||||||
|
class RedisBooleanTable:
|
||||||
|
def __init__(self, state, table):
|
||||||
|
self._state = state
|
||||||
|
self._table = table
|
||||||
|
|
||||||
|
def __setitem__(self, key, value):
|
||||||
|
if value:
|
||||||
|
self._state.rdb.sadd(self._state.prefix + self._table, str(key))
|
||||||
|
else:
|
||||||
|
self.__delitem__(key)
|
||||||
|
|
||||||
|
def __getitem__(self, key):
|
||||||
|
return self._state.rdb.sismember(self._state.prefix + self._table, str(key))
|
||||||
|
|
||||||
|
def __delitem__(self, key):
|
||||||
|
self._state.rdb.srem(self._state.prefix + self._table, str(key))
|
||||||
|
|
||||||
|
def __iter__(self):
|
||||||
|
return iter(self._state.rdb.smembers(self._state.prefix + self._table))
|
||||||
|
|
||||||
|
|
||||||
class Table:
|
class Table:
|
||||||
def __init__(self, state, table):
|
def __init__(self, state, table):
|
||||||
self._state = state
|
self._state = state
|
||||||
@ -132,7 +166,6 @@ def _deserialize(value, col_type):
|
|||||||
|
|
||||||
|
|
||||||
def pg_fetch_cursor_all(cur, name, batch_size=1000):
|
def pg_fetch_cursor_all(cur, name, batch_size=1000):
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
cur.execute("FETCH FORWARD %d FROM %s" % (batch_size, name))
|
cur.execute("FETCH FORWARD %d FROM %s" % (batch_size, name))
|
||||||
cnt = 0
|
cnt = 0
|
||||||
|
@ -9,6 +9,18 @@ import siphash
|
|||||||
last_time_called = dict()
|
last_time_called = dict()
|
||||||
|
|
||||||
|
|
||||||
|
def retry(attempts, callback=None):
|
||||||
|
def decorate(func):
|
||||||
|
retries = attempts
|
||||||
|
while retries > 0:
|
||||||
|
try:
|
||||||
|
func()
|
||||||
|
except Exception as e:
|
||||||
|
if callback:
|
||||||
|
callback(e)
|
||||||
|
return decorate
|
||||||
|
|
||||||
|
|
||||||
def chunks(lst: list, chunk_len: int):
|
def chunks(lst: list, chunk_len: int):
|
||||||
for i in range(0, len(lst), chunk_len):
|
for i in range(0, len(lst), chunk_len):
|
||||||
yield lst[i:i + chunk_len]
|
yield lst[i:i + chunk_len]
|
||||||
|
@ -1,7 +1,11 @@
|
|||||||
import pickle
|
import pickle
|
||||||
import re
|
import re
|
||||||
|
import os
|
||||||
|
from datetime import datetime
|
||||||
from base64 import b64encode, b64decode
|
from base64 import b64encode, b64decode
|
||||||
from http.cookiejar import Cookie
|
from http.cookiejar import Cookie
|
||||||
|
import requests
|
||||||
|
import orjson as json
|
||||||
|
|
||||||
from dateutil.parser import parse
|
from dateutil.parser import parse
|
||||||
from requests.cookies import RequestsCookieJar
|
from requests.cookies import RequestsCookieJar
|
||||||
@ -68,3 +72,35 @@ def cookiejar_filter(cj, pattern):
|
|||||||
if re.match(pattern, c.domain):
|
if re.match(pattern, c.domain):
|
||||||
filtered_cj.set_cookie(c)
|
filtered_cj.set_cookie(c)
|
||||||
return filtered_cj
|
return filtered_cj
|
||||||
|
|
||||||
|
|
||||||
|
def download_file(url, destination, session=None, headers=None, overwrite=False, retries=1, err_cb=None,
|
||||||
|
save_meta=False):
|
||||||
|
if os.path.exists(destination) and not overwrite:
|
||||||
|
return
|
||||||
|
|
||||||
|
if session is None:
|
||||||
|
session = requests.session()
|
||||||
|
|
||||||
|
while retries > 0:
|
||||||
|
try:
|
||||||
|
r = session.get(url, stream=True, headers=headers)
|
||||||
|
|
||||||
|
with open(destination + ".part", "wb") as f:
|
||||||
|
for chunk in r.iter_content(chunk_size=4096):
|
||||||
|
if chunk:
|
||||||
|
f.write(chunk)
|
||||||
|
os.rename(destination + ".part", destination)
|
||||||
|
|
||||||
|
if save_meta:
|
||||||
|
with open(destination + ".meta", "wb") as f:
|
||||||
|
f.write(json.dumps({
|
||||||
|
"headers": dict(**r.headers),
|
||||||
|
"url": url,
|
||||||
|
"timestamp": datetime.utcnow().replace(microsecond=0).isoformat()
|
||||||
|
}))
|
||||||
|
break
|
||||||
|
except Exception as e:
|
||||||
|
if err_cb:
|
||||||
|
err_cb(e)
|
||||||
|
retries -= 1
|
||||||
|
2
setup.py
2
setup.py
@ -2,7 +2,7 @@ from setuptools import setup
|
|||||||
|
|
||||||
setup(
|
setup(
|
||||||
name="hexlib",
|
name="hexlib",
|
||||||
version="1.17",
|
version="1.18",
|
||||||
description="Misc utility methods",
|
description="Misc utility methods",
|
||||||
author="simon987",
|
author="simon987",
|
||||||
author_email="me@simon987.net",
|
author_email="me@simon987.net",
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
from unittest import TestCase
|
from unittest import TestCase
|
||||||
from hexlib.db import VolatileState
|
from hexlib.db import VolatileState, VolatileBooleanState
|
||||||
|
|
||||||
|
|
||||||
class TestVolatileState(TestCase):
|
class TestVolatileState(TestCase):
|
||||||
@ -8,7 +8,7 @@ class TestVolatileState(TestCase):
|
|||||||
s = VolatileState(prefix="test1")
|
s = VolatileState(prefix="test1")
|
||||||
val = {
|
val = {
|
||||||
"field1": 1,
|
"field1": 1,
|
||||||
"arr1": [1,2,3]
|
"arr1": [1, 2, 3]
|
||||||
}
|
}
|
||||||
|
|
||||||
s["a"]["1"] = val
|
s["a"]["1"] = val
|
||||||
@ -38,3 +38,33 @@ class TestVolatileState(TestCase):
|
|||||||
del s["c"]["1"]
|
del s["c"]["1"]
|
||||||
self.assertIsNone(s["c"]["1"])
|
self.assertIsNone(s["c"]["1"])
|
||||||
|
|
||||||
|
|
||||||
|
class TestVolatileBoolState(TestCase):
|
||||||
|
|
||||||
|
def test_get_set(self):
|
||||||
|
s = VolatileBooleanState(prefix="test1")
|
||||||
|
|
||||||
|
s["a"]["1"] = True
|
||||||
|
s["a"]["2"] = True
|
||||||
|
|
||||||
|
self.assertTrue(s["a"]["1"])
|
||||||
|
self.assertTrue(s["a"]["2"])
|
||||||
|
self.assertFalse(s["a"]["3"])
|
||||||
|
|
||||||
|
def test_iter(self):
|
||||||
|
s = VolatileBooleanState(prefix="test2")
|
||||||
|
|
||||||
|
s["b"]["1"] = True
|
||||||
|
s["b"]["2"] = True
|
||||||
|
s["b"]["3"] = True
|
||||||
|
s["b"]["4"] = True
|
||||||
|
|
||||||
|
self.assertEqual(sum(int(x) for x in s["b"]), 10)
|
||||||
|
|
||||||
|
def test_delete(self):
|
||||||
|
s = VolatileBooleanState(prefix="test3")
|
||||||
|
|
||||||
|
s["c"]["1"] = True
|
||||||
|
self.assertTrue(s["c"]["1"])
|
||||||
|
del s["c"]["1"]
|
||||||
|
self.assertFalse(s["c"]["1"])
|
||||||
|
29
test/test_download_file.py
Normal file
29
test/test_download_file.py
Normal file
@ -0,0 +1,29 @@
|
|||||||
|
from unittest import TestCase
|
||||||
|
import os
|
||||||
|
|
||||||
|
from hexlib.web import download_file
|
||||||
|
|
||||||
|
|
||||||
|
class TestDownloadFile(TestCase):
|
||||||
|
|
||||||
|
def test_download_file(self):
|
||||||
|
download_file("http://ovh.net/files/10Mb.dat", "/tmp/10Mb.dat")
|
||||||
|
self.assertTrue(os.path.exists("/tmp/10Mb.dat"))
|
||||||
|
os.remove("/tmp/10Mb.dat")
|
||||||
|
|
||||||
|
def test_download_file_error(self):
|
||||||
|
exceptions = []
|
||||||
|
|
||||||
|
def cb(ex):
|
||||||
|
exceptions.append(ex)
|
||||||
|
|
||||||
|
download_file("http://thisUrlIsInvalid", "/tmp/file.txt", err_cb=cb, retries=3)
|
||||||
|
self.assertFalse(os.path.exists("/tmp/10Mb.dat"))
|
||||||
|
self.assertEqual(len(exceptions), 3)
|
||||||
|
|
||||||
|
def test_download_file_meta(self):
|
||||||
|
download_file("http://ovh.net/files/10Mb.dat", "/tmp/10Mb.dat", save_meta=True)
|
||||||
|
self.assertTrue(os.path.exists("/tmp/10Mb.dat"))
|
||||||
|
self.assertTrue(os.path.exists("/tmp/10Mb.dat.meta"))
|
||||||
|
os.remove("/tmp/10Mb.dat")
|
||||||
|
# os.remove("/tmp/10Mb.dat.meta")
|
Loading…
x
Reference in New Issue
Block a user