From 30c9494daaf86d1aeae8e544258c7c660f715ef4 Mon Sep 17 00:00:00 2001 From: simon987 Date: Sun, 20 Dec 2020 19:53:38 -0500 Subject: [PATCH] add download_file, bool volatile state --- hexlib/db.py | 35 ++++++++++++++++++++++++++++++++++- hexlib/misc.py | 12 ++++++++++++ hexlib/web.py | 36 ++++++++++++++++++++++++++++++++++++ setup.py | 2 +- test/test_VolatileState.py | 34 ++++++++++++++++++++++++++++++++-- test/test_download_file.py | 29 +++++++++++++++++++++++++++++ 6 files changed, 144 insertions(+), 4 deletions(-) create mode 100644 test/test_download_file.py diff --git a/hexlib/db.py b/hexlib/db.py index ad2f781..b0bb7cd 100644 --- a/hexlib/db.py +++ b/hexlib/db.py @@ -19,6 +19,7 @@ class PersistentState: class VolatileState: """Quick and dirty volatile dict-like redis wrapper""" + def __init__(self, prefix, ttl=3600, **redis_args): self.rdb = redis.Redis(**redis_args) self.prefix = prefix @@ -28,6 +29,18 @@ class VolatileState: return RedisTable(self, table) +class VolatileBooleanState: + """Quick and dirty volatile dict-like redis wrapper for boolean values""" + + def __init__(self, prefix, ttl=3600, **redis_args): + self.rdb = redis.Redis(**redis_args) + self.prefix = prefix + self.ttl = ttl + + def __getitem__(self, table): + return RedisBooleanTable(self, table) + + class RedisTable: def __init__(self, state, table): self._state = state @@ -51,6 +64,27 @@ class RedisTable: yield json.loads(val) if val else None +class RedisBooleanTable: + def __init__(self, state, table): + self._state = state + self._table = table + + def __setitem__(self, key, value): + if value: + self._state.rdb.sadd(self._state.prefix + self._table, str(key)) + else: + self.__delitem__(key) + + def __getitem__(self, key): + return self._state.rdb.sismember(self._state.prefix + self._table, str(key)) + + def __delitem__(self, key): + self._state.rdb.srem(self._state.prefix + self._table, str(key)) + + def __iter__(self): + return iter(self._state.rdb.smembers(self._state.prefix + self._table)) + + class Table: def __init__(self, state, table): self._state = state @@ -132,7 +166,6 @@ def _deserialize(value, col_type): def pg_fetch_cursor_all(cur, name, batch_size=1000): - while True: cur.execute("FETCH FORWARD %d FROM %s" % (batch_size, name)) cnt = 0 diff --git a/hexlib/misc.py b/hexlib/misc.py index e8d463b..29667e7 100644 --- a/hexlib/misc.py +++ b/hexlib/misc.py @@ -9,6 +9,18 @@ import siphash last_time_called = dict() +def retry(attempts, callback=None): + def decorate(func): + retries = attempts + while retries > 0: + try: + func() + except Exception as e: + if callback: + callback(e) + return decorate + + def chunks(lst: list, chunk_len: int): for i in range(0, len(lst), chunk_len): yield lst[i:i + chunk_len] diff --git a/hexlib/web.py b/hexlib/web.py index d996522..74cc9cc 100644 --- a/hexlib/web.py +++ b/hexlib/web.py @@ -1,7 +1,11 @@ import pickle import re +import os +from datetime import datetime from base64 import b64encode, b64decode from http.cookiejar import Cookie +import requests +import orjson as json from dateutil.parser import parse from requests.cookies import RequestsCookieJar @@ -68,3 +72,35 @@ def cookiejar_filter(cj, pattern): if re.match(pattern, c.domain): filtered_cj.set_cookie(c) return filtered_cj + + +def download_file(url, destination, session=None, headers=None, overwrite=False, retries=1, err_cb=None, + save_meta=False): + if os.path.exists(destination) and not overwrite: + return + + if session is None: + session = requests.session() + + while retries > 0: + try: + r = session.get(url, stream=True, headers=headers) + + with open(destination + ".part", "wb") as f: + for chunk in r.iter_content(chunk_size=4096): + if chunk: + f.write(chunk) + os.rename(destination + ".part", destination) + + if save_meta: + with open(destination + ".meta", "wb") as f: + f.write(json.dumps({ + "headers": dict(**r.headers), + "url": url, + "timestamp": datetime.utcnow().replace(microsecond=0).isoformat() + })) + break + except Exception as e: + if err_cb: + err_cb(e) + retries -= 1 diff --git a/setup.py b/setup.py index 3aba8b0..2a0ba95 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ from setuptools import setup setup( name="hexlib", - version="1.17", + version="1.18", description="Misc utility methods", author="simon987", author_email="me@simon987.net", diff --git a/test/test_VolatileState.py b/test/test_VolatileState.py index 399d550..84ca5ae 100644 --- a/test/test_VolatileState.py +++ b/test/test_VolatileState.py @@ -1,5 +1,5 @@ from unittest import TestCase -from hexlib.db import VolatileState +from hexlib.db import VolatileState, VolatileBooleanState class TestVolatileState(TestCase): @@ -8,7 +8,7 @@ class TestVolatileState(TestCase): s = VolatileState(prefix="test1") val = { "field1": 1, - "arr1": [1,2,3] + "arr1": [1, 2, 3] } s["a"]["1"] = val @@ -38,3 +38,33 @@ class TestVolatileState(TestCase): del s["c"]["1"] self.assertIsNone(s["c"]["1"]) + +class TestVolatileBoolState(TestCase): + + def test_get_set(self): + s = VolatileBooleanState(prefix="test1") + + s["a"]["1"] = True + s["a"]["2"] = True + + self.assertTrue(s["a"]["1"]) + self.assertTrue(s["a"]["2"]) + self.assertFalse(s["a"]["3"]) + + def test_iter(self): + s = VolatileBooleanState(prefix="test2") + + s["b"]["1"] = True + s["b"]["2"] = True + s["b"]["3"] = True + s["b"]["4"] = True + + self.assertEqual(sum(int(x) for x in s["b"]), 10) + + def test_delete(self): + s = VolatileBooleanState(prefix="test3") + + s["c"]["1"] = True + self.assertTrue(s["c"]["1"]) + del s["c"]["1"] + self.assertFalse(s["c"]["1"]) diff --git a/test/test_download_file.py b/test/test_download_file.py new file mode 100644 index 0000000..52ab797 --- /dev/null +++ b/test/test_download_file.py @@ -0,0 +1,29 @@ +from unittest import TestCase +import os + +from hexlib.web import download_file + + +class TestDownloadFile(TestCase): + + def test_download_file(self): + download_file("http://ovh.net/files/10Mb.dat", "/tmp/10Mb.dat") + self.assertTrue(os.path.exists("/tmp/10Mb.dat")) + os.remove("/tmp/10Mb.dat") + + def test_download_file_error(self): + exceptions = [] + + def cb(ex): + exceptions.append(ex) + + download_file("http://thisUrlIsInvalid", "/tmp/file.txt", err_cb=cb, retries=3) + self.assertFalse(os.path.exists("/tmp/10Mb.dat")) + self.assertEqual(len(exceptions), 3) + + def test_download_file_meta(self): + download_file("http://ovh.net/files/10Mb.dat", "/tmp/10Mb.dat", save_meta=True) + self.assertTrue(os.path.exists("/tmp/10Mb.dat")) + self.assertTrue(os.path.exists("/tmp/10Mb.dat.meta")) + os.remove("/tmp/10Mb.dat") + # os.remove("/tmp/10Mb.dat.meta")