mirror of
https://github.com/simon987/hexlib.git
synced 2025-04-16 16:46:44 +00:00
Switch to orjson, add ndjson_iter
This commit is contained in:
parent
30854c7f8b
commit
52ad2d22b9
@ -1,7 +1,7 @@
|
||||
import base64
|
||||
import sqlite3
|
||||
import redis
|
||||
import ujson as json
|
||||
import orjson as json
|
||||
|
||||
|
||||
class PersistentState:
|
||||
|
@ -1,6 +1,14 @@
|
||||
import os
|
||||
from io import BytesIO
|
||||
from io import BytesIO, BufferedReader
|
||||
from tarfile import TarFile, TarInfo
|
||||
import subprocess
|
||||
import gzip
|
||||
import zstandard
|
||||
|
||||
try:
|
||||
import orjson as json
|
||||
except ImportError:
|
||||
import json
|
||||
|
||||
|
||||
def ftw(path):
|
||||
@ -27,3 +35,62 @@ def add_buf_to_tar(tar: TarFile, filename: str, buf: BytesIO):
|
||||
info = TarInfo(name=filename)
|
||||
info.size = len(buf.getvalue())
|
||||
tar.addfile(info, buf)
|
||||
|
||||
|
||||
def _is_executable(fpath):
|
||||
return os.path.isfile(fpath) and os.access(fpath, os.X_OK)
|
||||
|
||||
|
||||
def find_program(*programs):
|
||||
for program in programs:
|
||||
for path in os.environ["PATH"].split(os.pathsep):
|
||||
exe_file = os.path.join(path, program)
|
||||
if _is_executable(exe_file):
|
||||
return exe_file
|
||||
|
||||
|
||||
def program_is_in_path(program) -> bool:
|
||||
for path in os.environ["PATH"].split(os.pathsep):
|
||||
exe_file = os.path.join(path, program)
|
||||
if _is_executable(exe_file):
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
COMPRESSION_GZIP = "gz"
|
||||
COMPRESSION_ZSTD = "zstd"
|
||||
|
||||
|
||||
def ndjson_iter(*files, compression=""):
|
||||
for file in files:
|
||||
cleanup = None
|
||||
if compression == COMPRESSION_GZIP:
|
||||
prog = find_program("pigz", "gzip")
|
||||
if prog:
|
||||
process = subprocess.Popen([prog, "-dc", file], stdout=subprocess.PIPE)
|
||||
line_iter = process.stdout
|
||||
else:
|
||||
# This is much slower
|
||||
line_iter = BufferedReader(gzip.open(file))
|
||||
elif compression == COMPRESSION_ZSTD:
|
||||
fp = open(file, "rb")
|
||||
dctx = zstandard.ZstdDecompressor()
|
||||
reader = dctx.stream_reader(fp)
|
||||
line_iter = BufferedReader(reader)
|
||||
|
||||
def cleanup():
|
||||
fp.close()
|
||||
reader.close()
|
||||
|
||||
else:
|
||||
line_iter = open(file)
|
||||
|
||||
def cleanup():
|
||||
line_iter.close()
|
||||
|
||||
for line in line_iter:
|
||||
yield json.loads(line)
|
||||
if cleanup:
|
||||
cleanup()
|
||||
|
||||
|
@ -2,7 +2,6 @@ import pickle
|
||||
import re
|
||||
from base64 import b64encode, b64decode
|
||||
from http.cookiejar import Cookie
|
||||
from io import BytesIO
|
||||
|
||||
from dateutil.parser import parse
|
||||
from requests.cookies import RequestsCookieJar
|
||||
|
4
setup.py
4
setup.py
@ -2,7 +2,7 @@ from setuptools import setup
|
||||
|
||||
setup(
|
||||
name="hexlib",
|
||||
version="1.13",
|
||||
version="1.14",
|
||||
description="Misc utility methods",
|
||||
author="simon987",
|
||||
author_email="me@simon987.net",
|
||||
@ -12,6 +12,6 @@ setup(
|
||||
"data/*"
|
||||
]},
|
||||
install_requires=[
|
||||
"ImageHash", "influxdb", "siphash", "python-dateutil", "redis", "ujson"
|
||||
"ImageHash", "influxdb", "siphash", "python-dateutil", "redis", "orjson", "zstandard"
|
||||
]
|
||||
)
|
||||
|
Loading…
x
Reference in New Issue
Block a user