mirror of
https://github.com/simon987/hexlib.git
synced 2025-04-16 16:46:44 +00:00
Switch to orjson, add ndjson_iter
This commit is contained in:
parent
30854c7f8b
commit
52ad2d22b9
@ -1,7 +1,7 @@
|
|||||||
import base64
|
import base64
|
||||||
import sqlite3
|
import sqlite3
|
||||||
import redis
|
import redis
|
||||||
import ujson as json
|
import orjson as json
|
||||||
|
|
||||||
|
|
||||||
class PersistentState:
|
class PersistentState:
|
||||||
|
@ -1,6 +1,14 @@
|
|||||||
import os
|
import os
|
||||||
from io import BytesIO
|
from io import BytesIO, BufferedReader
|
||||||
from tarfile import TarFile, TarInfo
|
from tarfile import TarFile, TarInfo
|
||||||
|
import subprocess
|
||||||
|
import gzip
|
||||||
|
import zstandard
|
||||||
|
|
||||||
|
try:
|
||||||
|
import orjson as json
|
||||||
|
except ImportError:
|
||||||
|
import json
|
||||||
|
|
||||||
|
|
||||||
def ftw(path):
|
def ftw(path):
|
||||||
@ -27,3 +35,62 @@ def add_buf_to_tar(tar: TarFile, filename: str, buf: BytesIO):
|
|||||||
info = TarInfo(name=filename)
|
info = TarInfo(name=filename)
|
||||||
info.size = len(buf.getvalue())
|
info.size = len(buf.getvalue())
|
||||||
tar.addfile(info, buf)
|
tar.addfile(info, buf)
|
||||||
|
|
||||||
|
|
||||||
|
def _is_executable(fpath):
|
||||||
|
return os.path.isfile(fpath) and os.access(fpath, os.X_OK)
|
||||||
|
|
||||||
|
|
||||||
|
def find_program(*programs):
|
||||||
|
for program in programs:
|
||||||
|
for path in os.environ["PATH"].split(os.pathsep):
|
||||||
|
exe_file = os.path.join(path, program)
|
||||||
|
if _is_executable(exe_file):
|
||||||
|
return exe_file
|
||||||
|
|
||||||
|
|
||||||
|
def program_is_in_path(program) -> bool:
|
||||||
|
for path in os.environ["PATH"].split(os.pathsep):
|
||||||
|
exe_file = os.path.join(path, program)
|
||||||
|
if _is_executable(exe_file):
|
||||||
|
return True
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
COMPRESSION_GZIP = "gz"
|
||||||
|
COMPRESSION_ZSTD = "zstd"
|
||||||
|
|
||||||
|
|
||||||
|
def ndjson_iter(*files, compression=""):
|
||||||
|
for file in files:
|
||||||
|
cleanup = None
|
||||||
|
if compression == COMPRESSION_GZIP:
|
||||||
|
prog = find_program("pigz", "gzip")
|
||||||
|
if prog:
|
||||||
|
process = subprocess.Popen([prog, "-dc", file], stdout=subprocess.PIPE)
|
||||||
|
line_iter = process.stdout
|
||||||
|
else:
|
||||||
|
# This is much slower
|
||||||
|
line_iter = BufferedReader(gzip.open(file))
|
||||||
|
elif compression == COMPRESSION_ZSTD:
|
||||||
|
fp = open(file, "rb")
|
||||||
|
dctx = zstandard.ZstdDecompressor()
|
||||||
|
reader = dctx.stream_reader(fp)
|
||||||
|
line_iter = BufferedReader(reader)
|
||||||
|
|
||||||
|
def cleanup():
|
||||||
|
fp.close()
|
||||||
|
reader.close()
|
||||||
|
|
||||||
|
else:
|
||||||
|
line_iter = open(file)
|
||||||
|
|
||||||
|
def cleanup():
|
||||||
|
line_iter.close()
|
||||||
|
|
||||||
|
for line in line_iter:
|
||||||
|
yield json.loads(line)
|
||||||
|
if cleanup:
|
||||||
|
cleanup()
|
||||||
|
|
||||||
|
@ -2,7 +2,6 @@ import pickle
|
|||||||
import re
|
import re
|
||||||
from base64 import b64encode, b64decode
|
from base64 import b64encode, b64decode
|
||||||
from http.cookiejar import Cookie
|
from http.cookiejar import Cookie
|
||||||
from io import BytesIO
|
|
||||||
|
|
||||||
from dateutil.parser import parse
|
from dateutil.parser import parse
|
||||||
from requests.cookies import RequestsCookieJar
|
from requests.cookies import RequestsCookieJar
|
||||||
|
4
setup.py
4
setup.py
@ -2,7 +2,7 @@ from setuptools import setup
|
|||||||
|
|
||||||
setup(
|
setup(
|
||||||
name="hexlib",
|
name="hexlib",
|
||||||
version="1.13",
|
version="1.14",
|
||||||
description="Misc utility methods",
|
description="Misc utility methods",
|
||||||
author="simon987",
|
author="simon987",
|
||||||
author_email="me@simon987.net",
|
author_email="me@simon987.net",
|
||||||
@ -12,6 +12,6 @@ setup(
|
|||||||
"data/*"
|
"data/*"
|
||||||
]},
|
]},
|
||||||
install_requires=[
|
install_requires=[
|
||||||
"ImageHash", "influxdb", "siphash", "python-dateutil", "redis", "ujson"
|
"ImageHash", "influxdb", "siphash", "python-dateutil", "redis", "orjson", "zstandard"
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user