mirror of
https://github.com/simon987/sist2.git
synced 2025-12-12 15:08:53 +00:00
Compare commits
219 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| d3bd53a5ea | |||
| f7887f24d1 | |||
| 5c8de19188 | |||
| d861d278a4 | |||
| b6ddeee0e0 | |||
| 0cd2523b05 | |||
| 5e798f9367 | |||
| 5da6c1488b | |||
| 9568e25f84 | |||
| 6a8027789a | |||
| b1d16d8abf | |||
| b2a157e24d | |||
| 9aead9389a | |||
| a32c68cba8 | |||
| d116cf9d91 | |||
|
|
a020a8b32c | ||
| 5d5d9c3092 | |||
| 3379d5ce71 | |||
| a0ff4a1f01 | |||
| 4589f3bde7 | |||
| 1c898640cf | |||
| a0739d5177 | |||
| 8f9d29dbc6 | |||
| 3ff4b70223 | |||
| 02ad035b09 | |||
| c11feb213d | |||
| 72902947cd | |||
| a18bb81222 | |||
| 1520288f19 | |||
| e507de194b | |||
| 0e517d5e2b | |||
| 8223ef3860 | |||
| 995a196690 | |||
| 465d017e18 | |||
| ca994d3914 | |||
| db2285973f | |||
| 61de9e9f14 | |||
| 3015ef0ff4 | |||
| b55d432841 | |||
| ed90a140ce | |||
| 052df82373 | |||
| 5676136777 | |||
| c061613302 | |||
| d0325fd9b9 | |||
| e05a6f3863 | |||
| f1690a9cca | |||
| 100a264413 | |||
| 29390bb454 | |||
| 4d43036ded | |||
| 0b5cdbd130 | |||
| 53d7695f66 | |||
| 8d53456404 | |||
| cbc08a7cc9 | |||
| e629b4d7d3 | |||
| 22f7073b39 | |||
| 1781a74960 | |||
| db96c95ac7 | |||
| 7b9fa4cc0a | |||
| 5cc1fa86a9 | |||
| 649689ce30 | |||
| c8536f65a8 | |||
| 75b5e249c1 | |||
|
|
f49e03ac79 | ||
| a6d2afc8dc | |||
| 8f8f66ba05 | |||
| 1d9fcf7105 | |||
| 8127745f2b | |||
| 230988d6d1 | |||
| 13f4dbed2d | |||
| ed15e89f45 | |||
| c636d3d921 | |||
| 7e92d4b7d1 | |||
| 8ffe780ab2 | |||
| d3c8928fe8 | |||
| d9f628fca4 | |||
| 68289268c1 | |||
| 649c50c465 | |||
| 7b49a0dc49 | |||
| eb559b53aa | |||
| 6d01f9c0df | |||
| e724fec668 | |||
| fe5e93b300 | |||
| ecad85fd7d | |||
| 74cc898259 | |||
| dc2e4443c4 | |||
| 1a64431b52 | |||
|
|
9bad515e06 | ||
| 648559cedb | |||
| 3e6cd9cd5c | |||
| f249992798 | |||
|
|
e9645ecdaa | ||
| 046edea0e2 | |||
| a011b7e97b | |||
| 8c1c1697e0 | |||
| 018b49fa4c | |||
| 27b4e6403e | |||
| 13fdbd9e69 | |||
| 5e7fdaf8dd | |||
| 19d5c8ac9f | |||
| 99497049a8 | |||
|
|
1a3181d78b | ||
| 449aa77c8f | |||
| 3058c55510 | |||
| dedf9287b2 | |||
| ab199b0c0c | |||
| c4fbae123e | |||
| dd2397ef5c | |||
| ee0f71f4d3 | |||
| 0bbb96b149 | |||
| 78f6e16701 | |||
| 4625bca9a9 | |||
| f2ae653886 | |||
| 5686bc864d | |||
| cf513b4ad8 | |||
| 013423424e | |||
| 16514fd6b0 | |||
| 27509f97e1 | |||
| 4c540eae1c | |||
| d2b53ff6fc | |||
| 0ef4292abf | |||
| e6fde38c24 | |||
| 5fa343d40f | |||
| 7ee1374802 | |||
| bd9e56829c | |||
| 718169345e | |||
| 5a6aa763ca | |||
| 695d9abd83 | |||
| e436af7b2a | |||
| 4501a7810f | |||
|
|
e36761fa6a | ||
| fe53b79d56 | |||
| 09615bbed6 | |||
| a2be9b955c | |||
| 9298bd2d9d | |||
| 317034ba21 | |||
| 0505303503 | |||
| 6e5772f13b | |||
| ccccdb3b78 | |||
| 12d17acf4f | |||
| 48b56cdb7b | |||
| 048f707f80 | |||
| 98e0a5fd64 | |||
| 740a49a09f | |||
| 81be662574 | |||
| 02fa3f02f5 | |||
| cfdd7bdd87 | |||
| 7ceb645926 | |||
| 7d0091f647 | |||
| b3cd630399 | |||
| 5f7a1acfe3 | |||
| 513a21cca2 | |||
| 04dbfb23ab | |||
| 1abddabeec | |||
| 9ace5774af | |||
| eab6101cf7 | |||
| d7cbd5d2b6 | |||
| 641edf2715 | |||
| 7efb4957bf | |||
| 9ae77fdedb | |||
| 98c40901ed | |||
| 363375d5da | |||
| 149de95d88 | |||
| e5bb4856d2 | |||
| d78994d427 | |||
| f2d68d54df | |||
| e03625838b | |||
| 86840b46f4 | |||
| e57f9916eb | |||
| 565ba6ee76 | |||
| d83fc2c373 | |||
| d4da28249e | |||
| 483a454c8d | |||
| 018ac86640 | |||
| 398f1aead4 | |||
| d19a75926b | |||
| 1ac8b40e3d | |||
| a8505cb8c1 | |||
| ae8652d86e | |||
| 849beb09d8 | |||
| e1aaaee617 | |||
| c02b940945 | |||
| 2934ddb07f | |||
| 7f6f3c02fa | |||
| 7f98d5a682 | |||
| 7eb9c5d7d5 | |||
| 184439aa38 | |||
| 1ce8b298a1 | |||
| 75f99025d9 | |||
| ebe852bd5a | |||
| 402b103c49 | |||
| e9b6e1cdc2 | |||
| ed1ce8ab5e | |||
| d1fa4febc4 | |||
| 048c55df7b | |||
| f77bc6a025 | |||
| efdde2734e | |||
| 66658fa8f7 | |||
| df41c251e4 | |||
| 3282ab56ba | |||
| 8300838d30 | |||
| c9870a6d3d | |||
| a143cc4fcf | |||
| 9ef1f3781d | |||
| bbee8aa721 | |||
| d22f83c797 | |||
| 50615486a4 | |||
| ca79e4f797 | |||
| 6a9fd08a80 | |||
| cab890dc9b | |||
| b3c4faf2df | |||
| 353937171a | |||
| c80002bea4 | |||
| 56adee9d81 | |||
| d6493d6d5f | |||
| 0967e9676d | |||
| 487e998ea0 | |||
| 919f45c79c | |||
| d42129cfcb | |||
| 754983e34a |
18
.github/ISSUE_TEMPLATE/issue-template.md
vendored
Normal file
18
.github/ISSUE_TEMPLATE/issue-template.md
vendored
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
---
|
||||||
|
name: Issue template
|
||||||
|
about: General
|
||||||
|
title: ''
|
||||||
|
labels: ''
|
||||||
|
assignees: ''
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
sist2 version:
|
||||||
|
|
||||||
|
Platform (Linux or Docker, x86-64 or arm64):
|
||||||
|
|
||||||
|
Elasticsearch version:
|
||||||
|
|
||||||
|
Command with arguments: `ex: "scan ~/Documents -o ./i2 --threads 3 -q 1.0`
|
||||||
|
|
||||||
|
If the issue is related to the `scan` module, please attach the files necessary to reproduce the error or email them to me[at]simon987.net.
|
||||||
1
.gitignore
vendored
1
.gitignore
vendored
@@ -16,3 +16,4 @@ bundle.js
|
|||||||
*.a
|
*.a
|
||||||
vgcore.*
|
vgcore.*
|
||||||
build/
|
build/
|
||||||
|
third-party/
|
||||||
|
|||||||
46
.gitmodules
vendored
46
.gitmodules
vendored
@@ -1,42 +1,6 @@
|
|||||||
[submodule "argparse"]
|
[submodule "third-party/libscan"]
|
||||||
path = argparse
|
path = third-party/libscan
|
||||||
|
url = https://github.com/simon987/libscan
|
||||||
|
[submodule "third-party/argparse"]
|
||||||
|
path = third-party/argparse
|
||||||
url = https://github.com/cofyc/argparse
|
url = https://github.com/cofyc/argparse
|
||||||
[submodule "cJSON"]
|
|
||||||
path = cJSON
|
|
||||||
url = https://github.com/DaveGamble/cJSON
|
|
||||||
[submodule "lmdb"]
|
|
||||||
path = lmdb
|
|
||||||
url = https://github.com/LMDB/lmdb
|
|
||||||
[submodule "utf8.h"]
|
|
||||||
path = utf8.h
|
|
||||||
url = https://github.com/sheredom/utf8.h
|
|
||||||
[submodule "lib/bzip2-1.0.6"]
|
|
||||||
path = lib/bzip2-1.0.6
|
|
||||||
url = https://github.com/enthought/bzip2-1.0.6
|
|
||||||
[submodule "lib/libmagic"]
|
|
||||||
path = lib/libmagic
|
|
||||||
url = https://github.com/threatstack/libmagic
|
|
||||||
[submodule "lib/harfbuzz"]
|
|
||||||
path = lib/harfbuzz
|
|
||||||
url = https://github.com/harfbuzz/harfbuzz
|
|
||||||
[submodule "lib/openjpeg"]
|
|
||||||
path = lib/openjpeg
|
|
||||||
url = https://github.com/uclouvain/openjpeg
|
|
||||||
[submodule "lib/ffmpeg"]
|
|
||||||
path = lib/ffmpeg
|
|
||||||
url = https://git.ffmpeg.org/ffmpeg.git
|
|
||||||
[submodule "lib/onion"]
|
|
||||||
path = lib/onion
|
|
||||||
url = https://github.com/davidmoreno/onion
|
|
||||||
[submodule "lib/mupdf"]
|
|
||||||
path = lib/mupdf
|
|
||||||
url = git://git.ghostscript.com/mupdf.git
|
|
||||||
[submodule "lib/tesseract"]
|
|
||||||
path = lib/tesseract
|
|
||||||
url = https://github.com/tesseract-ocr/tesseract
|
|
||||||
[submodule "lib/leptonica"]
|
|
||||||
path = lib/leptonica
|
|
||||||
url = https://github.com/danbloomberg/leptonica
|
|
||||||
[submodule "lib/libtiff"]
|
|
||||||
path = lib/libtiff
|
|
||||||
url = https://gitlab.com/libtiff/libtiff
|
|
||||||
|
|||||||
166
CMakeLists.txt
166
CMakeLists.txt
@@ -2,134 +2,118 @@ cmake_minimum_required(VERSION 3.7)
|
|||||||
set(CMAKE_C_STANDARD 11)
|
set(CMAKE_C_STANDARD 11)
|
||||||
|
|
||||||
project(sist2 C)
|
project(sist2 C)
|
||||||
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}/CMakeModules")
|
|
||||||
|
option(SIST_DEBUG "Build a debug executable" on)
|
||||||
|
|
||||||
|
add_subdirectory(third-party/libscan)
|
||||||
|
set(ARGPARSE_SHARED off)
|
||||||
|
add_subdirectory(third-party/argparse)
|
||||||
|
|
||||||
add_executable(
|
add_executable(
|
||||||
sist2
|
sist2
|
||||||
src/main.c
|
src/main.c
|
||||||
src/sist.h
|
src/sist.h
|
||||||
src/io/walk.h src/io/walk.c
|
src/io/walk.h src/io/walk.c
|
||||||
src/parsing/media.h src/parsing/media.c
|
|
||||||
src/parsing/pdf.h src/parsing/pdf.c
|
|
||||||
src/io/store.h src/io/store.c
|
src/io/store.h src/io/store.c
|
||||||
src/tpool.h src/tpool.c
|
src/tpool.h src/tpool.c
|
||||||
src/parsing/parse.h src/parsing/parse.c
|
src/parsing/parse.h src/parsing/parse.c
|
||||||
src/io/serialize.h src/io/serialize.c
|
src/io/serialize.h src/io/serialize.c
|
||||||
src/parsing/mime.h src/parsing/mime.c src/parsing/mime_generated.c
|
src/parsing/mime.h src/parsing/mime.c src/parsing/mime_generated.c
|
||||||
src/parsing/text.h src/parsing/text.c
|
|
||||||
src/index/web.c src/index/web.h
|
src/index/web.c src/index/web.h
|
||||||
src/web/serve.c src/web/serve.h
|
src/web/serve.c src/web/serve.h
|
||||||
src/web/auth_basic.h src/web/auth_basic.c
|
|
||||||
src/index/elastic.c src/index/elastic.h
|
src/index/elastic.c src/index/elastic.h
|
||||||
src/util.c src/util.h
|
src/util.c src/util.h
|
||||||
src/ctx.h src/types.h src/parsing/font.c src/parsing/font.h
|
src/ctx.h src/types.h
|
||||||
src/parsing/arc.c src/parsing/arc.h
|
|
||||||
src/parsing/doc.c src/parsing/doc.h
|
|
||||||
src/log.c src/log.h
|
src/log.c src/log.h
|
||||||
|
|
||||||
# argparse
|
# argparse
|
||||||
argparse/argparse.h argparse/argparse.c
|
third-party/argparse/argparse.h third-party/argparse/argparse.c
|
||||||
|
|
||||||
# cJSON
|
|
||||||
cJSON/cJSON.h cJSON/cJSON.c
|
|
||||||
|
|
||||||
# LMDB
|
|
||||||
lmdb/libraries/liblmdb/lmdb.h lmdb/libraries/liblmdb/mdb.c
|
|
||||||
lmdb/libraries/liblmdb/midl.h lmdb/libraries/liblmdb/midl.c
|
|
||||||
src/cli.c src/cli.h
|
src/cli.c src/cli.h
|
||||||
|
src/stats.c src/stats.h src/ctx.c)
|
||||||
|
|
||||||
# utf8.h
|
target_link_directories(sist2 PRIVATE BEFORE ${_VCPKG_INSTALLED_DIR}/${VCPKG_TARGET_TRIPLET}/lib/)
|
||||||
utf8.h/utf8.h
|
set(CMAKE_FIND_LIBRARY_SUFFIXES .a .lib)
|
||||||
)
|
|
||||||
|
|
||||||
find_package(PkgConfig REQUIRED)
|
find_package(lmdb CONFIG REQUIRED)
|
||||||
set(ENV{PKG_CONFIG_PATH} "$ENV{PKG_CONFIG_PATH}:/usr/local/lib/pkgconfig/")
|
find_package(cJSON CONFIG REQUIRED)
|
||||||
|
find_package(unofficial-glib CONFIG REQUIRED)
|
||||||
|
find_package(unofficial-mongoose CONFIG REQUIRED)
|
||||||
|
find_library(UUID_LIB NAMES uuid)
|
||||||
|
find_package(CURL CONFIG REQUIRED)
|
||||||
|
|
||||||
#find_package(OpenSSL REQUIRED)
|
#find_package(OpenSSL REQUIRED)
|
||||||
find_package(Freetype REQUIRED)
|
|
||||||
|
|
||||||
pkg_check_modules(GLIB REQUIRED glib-2.0)
|
|
||||||
pkg_check_modules(GOBJECT REQUIRED gobject-2.0)
|
|
||||||
pkg_check_modules(UUID REQUIRED uuid)
|
|
||||||
|
|
||||||
add_definitions(${UUID_CFLAGS_OTHER})
|
|
||||||
add_definitions(${GLIB_CFLAGS_OTHER})
|
|
||||||
add_definitions(${GOBJECT_CFLAGS_OTHER})
|
|
||||||
add_definitions(${FREETYPE_CFLAGS_OTHER})
|
|
||||||
|
|
||||||
list(REMOVE_ITEM GLIB_LIBRARIES pcre)
|
|
||||||
list(REMOVE_ITEM GOBJECT_LIBRARIES pcre)
|
|
||||||
list(REMOVE_ITEM UUID_LIBRARIES pcre)
|
|
||||||
|
|
||||||
target_include_directories(
|
target_include_directories(
|
||||||
sist2 PUBLIC
|
sist2 PUBLIC
|
||||||
${GOBJECT_INCLUDE_DIRS}
|
${CMAKE_SOURCE_DIR}/third-party/onion/src/
|
||||||
${GLIB_INCLUDE_DIRS}
|
${CMAKE_SOURCE_DIR}/third-party/utf8.h/
|
||||||
${PROJECT_SOURCE_DIR}/lib/ffmpeg/
|
${CMAKE_SOURCE_DIR}/third-party/libscan/
|
||||||
${FREETYPE_INCLUDE_DIRS}
|
${CMAKE_SOURCE_DIR}/
|
||||||
${UUID_INCLUDE_DIRS}
|
|
||||||
${PROJECT_SOURCE_DIR}/
|
|
||||||
${PROJECT_SOURCE_DIR}/lmdb/libraries/liblmdb/
|
|
||||||
${PROJECT_SOURCE_DIR}/lib/onion/src/
|
|
||||||
${PROJECT_SOURCE_DIR}/lib/mupdf/include/
|
|
||||||
${PROJECT_SOURCE_DIR}/include/
|
|
||||||
/usr/include/libxml2/
|
|
||||||
${PROJECT_SOURCE_DIR}/lib/tesseract/include/
|
|
||||||
)
|
|
||||||
target_link_directories(
|
|
||||||
sist2 PUBLIC
|
|
||||||
${UUID_LIBRARY_DIRS}
|
|
||||||
)
|
)
|
||||||
|
|
||||||
target_compile_options(sist2
|
target_compile_options(
|
||||||
|
sist2
|
||||||
PRIVATE
|
PRIVATE
|
||||||
-Ofast
|
-fPIC
|
||||||
# -march=native
|
)
|
||||||
-fno-stack-protector
|
|
||||||
-fomit-frame-pointer
|
|
||||||
)
|
|
||||||
|
|
||||||
TARGET_LINK_LIBRARIES(
|
if (SIST_DEBUG)
|
||||||
|
target_compile_options(
|
||||||
|
sist2
|
||||||
|
PRIVATE
|
||||||
|
-g
|
||||||
|
-fstack-protector
|
||||||
|
-fno-omit-frame-pointer
|
||||||
|
-fsanitize=address
|
||||||
|
-O2
|
||||||
|
)
|
||||||
|
target_link_options(
|
||||||
|
sist2
|
||||||
|
PRIVATE
|
||||||
|
-fsanitize=address
|
||||||
|
# -static
|
||||||
|
)
|
||||||
|
set_target_properties(
|
||||||
|
sist2
|
||||||
|
PROPERTIES
|
||||||
|
OUTPUT_NAME sist2_debug
|
||||||
|
)
|
||||||
|
else ()
|
||||||
|
# set(VCPKG_BUILD_TYPE release)
|
||||||
|
target_compile_options(
|
||||||
|
sist2
|
||||||
|
PRIVATE
|
||||||
|
-Ofast
|
||||||
|
-fno-stack-protector
|
||||||
|
-fomit-frame-pointer
|
||||||
|
)
|
||||||
|
endif ()
|
||||||
|
|
||||||
|
add_dependencies(
|
||||||
|
sist2
|
||||||
|
scan
|
||||||
|
argparse
|
||||||
|
)
|
||||||
|
|
||||||
|
target_link_libraries(
|
||||||
sist2
|
sist2
|
||||||
|
|
||||||
${GLIB_LIBRARIES}
|
z
|
||||||
${GOBJECT_LIBRARIES}
|
lmdb
|
||||||
${UUID_LIBRARIES}
|
cjson
|
||||||
|
argparse
|
||||||
# ffmpeg
|
unofficial::glib::glib
|
||||||
${PROJECT_SOURCE_DIR}/lib/libavcodec.a
|
unofficial::mongoose::mongoose
|
||||||
${PROJECT_SOURCE_DIR}/lib/libavformat.a
|
# OpenSSL::SSL OpenSSL::Crypto
|
||||||
${PROJECT_SOURCE_DIR}/lib/libavutil.a
|
CURL::libcurl
|
||||||
${PROJECT_SOURCE_DIR}/lib/libswscale.a
|
|
||||||
${PROJECT_SOURCE_DIR}/lib/libswresample.a
|
|
||||||
|
|
||||||
# mupdf
|
|
||||||
${PROJECT_SOURCE_DIR}/lib/libmupdf.a
|
|
||||||
${PROJECT_SOURCE_DIR}/lib/libmupdf-third.a
|
|
||||||
|
|
||||||
# onion
|
|
||||||
${PROJECT_SOURCE_DIR}/lib/libonion_static.a
|
|
||||||
|
|
||||||
|
${UUID_LIB}
|
||||||
pthread
|
pthread
|
||||||
curl
|
magic
|
||||||
m
|
|
||||||
bz2
|
|
||||||
${PROJECT_SOURCE_DIR}/lib/libmagic.a
|
|
||||||
${PROJECT_SOURCE_DIR}/lib/libharfbuzz.a
|
|
||||||
${PROJECT_SOURCE_DIR}/lib/libopenjp2.a
|
|
||||||
freetype
|
|
||||||
archive
|
|
||||||
|
|
||||||
xml2
|
scan
|
||||||
${PROJECT_SOURCE_DIR}/lib/libopc/libmce.a
|
|
||||||
${PROJECT_SOURCE_DIR}/lib/libopc/libopc.a
|
|
||||||
${PROJECT_SOURCE_DIR}/lib/libopc/libplib.a
|
|
||||||
|
|
||||||
${PROJECT_SOURCE_DIR}/lib/libtesseract.a
|
|
||||||
${PROJECT_SOURCE_DIR}/lib/liblept.a
|
|
||||||
${PROJECT_SOURCE_DIR}/lib/libtiff.a
|
|
||||||
png
|
|
||||||
stdc++
|
|
||||||
)
|
)
|
||||||
|
|
||||||
add_custom_target(
|
add_custom_target(
|
||||||
|
|||||||
80
CMakeModules/FindFFmpeg.cmake
vendored
80
CMakeModules/FindFFmpeg.cmake
vendored
@@ -1,80 +0,0 @@
|
|||||||
# - Try to find ffmpeg libraries (libavcodec, libavformat and libavutil)
|
|
||||||
# Once done this will define
|
|
||||||
#
|
|
||||||
# FFMPEG_FOUND - system has ffmpeg or libav
|
|
||||||
# FFMPEG_INCLUDE_DIR - the ffmpeg include directory
|
|
||||||
# FFMPEG_LIBRARIES - Link these to use ffmpeg
|
|
||||||
# FFMPEG_LIBAVCODEC
|
|
||||||
# FFMPEG_LIBAVFORMAT
|
|
||||||
# FFMPEG_LIBAVUTIL
|
|
||||||
#
|
|
||||||
# Copyright (c) 2008 Andreas Schneider <mail@cynapses.org>
|
|
||||||
# Modified for other libraries by Lasse Kärkkäinen <tronic>
|
|
||||||
# Modified for Hedgewars by Stepik777
|
|
||||||
#
|
|
||||||
# Redistribution and use is allowed according to the terms of the New
|
|
||||||
# BSD license.
|
|
||||||
#
|
|
||||||
|
|
||||||
if (FFMPEG_LIBRARIES AND FFMPEG_INCLUDE_DIR)
|
|
||||||
# in cache already
|
|
||||||
set(FFMPEG_FOUND TRUE)
|
|
||||||
else (FFMPEG_LIBRARIES AND FFMPEG_INCLUDE_DIR)
|
|
||||||
# use pkg-config to get the directories and then use these values
|
|
||||||
# in the FIND_PATH() and FIND_LIBRARY() calls
|
|
||||||
find_package(PkgConfig)
|
|
||||||
if (PKG_CONFIG_FOUND)
|
|
||||||
pkg_check_modules(_FFMPEG_AVCODEC libavcodec)
|
|
||||||
pkg_check_modules(_FFMPEG_AVFORMAT libavformat)
|
|
||||||
pkg_check_modules(_FFMPEG_AVUTIL libavutil)
|
|
||||||
endif (PKG_CONFIG_FOUND)
|
|
||||||
|
|
||||||
find_path(FFMPEG_AVCODEC_INCLUDE_DIR
|
|
||||||
NAMES libavcodec/avcodec.h
|
|
||||||
PATHS ${_FFMPEG_AVCODEC_INCLUDE_DIRS} /usr/include /usr/local/include /opt/local/include /sw/include
|
|
||||||
PATH_SUFFIXES ffmpeg libav
|
|
||||||
)
|
|
||||||
|
|
||||||
find_library(FFMPEG_LIBAVCODEC
|
|
||||||
NAMES avcodec
|
|
||||||
PATHS ${_FFMPEG_AVCODEC_LIBRARY_DIRS} /usr/lib /usr/local/lib /opt/local/lib /sw/lib
|
|
||||||
)
|
|
||||||
|
|
||||||
find_library(FFMPEG_LIBAVFORMAT
|
|
||||||
NAMES avformat
|
|
||||||
PATHS ${_FFMPEG_AVFORMAT_LIBRARY_DIRS} /usr/lib /usr/local/lib /opt/local/lib /sw/lib
|
|
||||||
)
|
|
||||||
|
|
||||||
find_library(FFMPEG_LIBAVUTIL
|
|
||||||
NAMES avutil
|
|
||||||
PATHS ${_FFMPEG_AVUTIL_LIBRARY_DIRS} /usr/lib /usr/local/lib /opt/local/lib /sw/lib
|
|
||||||
)
|
|
||||||
|
|
||||||
if (FFMPEG_LIBAVCODEC AND FFMPEG_LIBAVFORMAT)
|
|
||||||
set(FFMPEG_FOUND TRUE)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
if (FFMPEG_FOUND)
|
|
||||||
set(FFMPEG_INCLUDE_DIR ${FFMPEG_AVCODEC_INCLUDE_DIR})
|
|
||||||
|
|
||||||
set(FFMPEG_LIBRARIES
|
|
||||||
${FFMPEG_LIBAVCODEC}
|
|
||||||
${FFMPEG_LIBAVFORMAT}
|
|
||||||
${FFMPEG_LIBAVUTIL}
|
|
||||||
)
|
|
||||||
|
|
||||||
endif (FFMPEG_FOUND)
|
|
||||||
|
|
||||||
if (FFMPEG_FOUND)
|
|
||||||
if (NOT FFMPEG_FIND_QUIETLY)
|
|
||||||
message(STATUS "Found FFMPEG or Libav: ${FFMPEG_LIBRARIES}, ${FFMPEG_INCLUDE_DIR}")
|
|
||||||
endif (NOT FFMPEG_FIND_QUIETLY)
|
|
||||||
else (FFMPEG_FOUND)
|
|
||||||
if (FFMPEG_FIND_REQUIRED)
|
|
||||||
message(FATAL_ERROR "Could not find libavcodec or libavformat or libavutil")
|
|
||||||
endif (FFMPEG_FIND_REQUIRED)
|
|
||||||
endif (FFMPEG_FOUND)
|
|
||||||
|
|
||||||
endif (FFMPEG_LIBRARIES AND FFMPEG_INCLUDE_DIR)
|
|
||||||
|
|
||||||
|
|
||||||
100
CMakeModules/FindLibMagic.cmake
vendored
100
CMakeModules/FindLibMagic.cmake
vendored
@@ -1,100 +0,0 @@
|
|||||||
|
|
||||||
#-------------------------------------------------------------------------------
|
|
||||||
# Copyright (c) 2013-2013, Lars Baehren <lbaehren@gmail.com>
|
|
||||||
# All rights reserved.
|
|
||||||
#
|
|
||||||
# Redistribution and use in source and binary forms, with or without modification,
|
|
||||||
# are permitted provided that the following conditions are met:
|
|
||||||
#
|
|
||||||
# * Redistributions of source code must retain the above copyright notice, this
|
|
||||||
# list of conditions and the following disclaimer.
|
|
||||||
# * Redistributions in binary form must reproduce the above copyright notice,
|
|
||||||
# this list of conditions and the following disclaimer in the documentation
|
|
||||||
# and/or other materials provided with the distribution.
|
|
||||||
#
|
|
||||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
||||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
||||||
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
|
||||||
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
||||||
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
|
||||||
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
||||||
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
||||||
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
||||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
#-------------------------------------------------------------------------------
|
|
||||||
|
|
||||||
# - Check for the presence of LIBMAGIC
|
|
||||||
#
|
|
||||||
# The following variables are set when LIBMAGIC is found:
|
|
||||||
# LIBMAGIC_FOUND = Set to true, if all components of LIBMAGIC have been
|
|
||||||
# found.
|
|
||||||
# LIBMAGIC_INCLUDES = Include path for the header files of LIBMAGIC
|
|
||||||
# LIBMAGIC_LIBRARIES = Link these to use LIBMAGIC
|
|
||||||
# LIBMAGIC_LFLAGS = Linker flags (optional)
|
|
||||||
|
|
||||||
if (NOT LIBMAGIC_FOUND)
|
|
||||||
|
|
||||||
if (NOT LIBMAGIC_ROOT_DIR)
|
|
||||||
set (LIBMAGIC_ROOT_DIR ${CMAKE_INSTALL_PREFIX})
|
|
||||||
endif (NOT LIBMAGIC_ROOT_DIR)
|
|
||||||
|
|
||||||
##____________________________________________________________________________
|
|
||||||
## Check for the header files
|
|
||||||
|
|
||||||
find_path (LIBMAGIC_FILE_H
|
|
||||||
NAMES file/file.h
|
|
||||||
HINTS ${LIBMAGIC_ROOT_DIR} ${CMAKE_INSTALL_PREFIX}
|
|
||||||
PATH_SUFFIXES include
|
|
||||||
)
|
|
||||||
if (LIBMAGIC_FILE_H)
|
|
||||||
list (APPEND LIBMAGIC_INCLUDES ${LIBMAGIC_FILE_H})
|
|
||||||
endif (LIBMAGIC_FILE_H)
|
|
||||||
|
|
||||||
find_path (LIBMAGIC_MAGIC_H
|
|
||||||
NAMES magic.h
|
|
||||||
HINTS ${LIBMAGIC_ROOT_DIR} ${CMAKE_INSTALL_PREFIX}
|
|
||||||
PATH_SUFFIXES include include/linux
|
|
||||||
)
|
|
||||||
if (LIBMAGIC_MAGIC_H)
|
|
||||||
list (APPEND LIBMAGIC_INCLUDES ${LIBMAGIC_MAGIC_H})
|
|
||||||
endif (LIBMAGIC_MAGIC_H)
|
|
||||||
|
|
||||||
list (REMOVE_DUPLICATES LIBMAGIC_INCLUDES)
|
|
||||||
|
|
||||||
##____________________________________________________________________________
|
|
||||||
## Check for the library
|
|
||||||
|
|
||||||
find_library (LIBMAGIC_LIBRARIES magic
|
|
||||||
HINTS ${LIBMAGIC_ROOT_DIR} ${CMAKE_INSTALL_PREFIX}
|
|
||||||
PATH_SUFFIXES lib
|
|
||||||
)
|
|
||||||
|
|
||||||
##____________________________________________________________________________
|
|
||||||
## Actions taken when all components have been found
|
|
||||||
|
|
||||||
#find_package_handle_standard_args (LIBMAGIC DEFAULT_MSG LIBMAGIC_LIBRARIES LIBMAGIC_INCLUDES)
|
|
||||||
|
|
||||||
if (LIBMAGIC_FOUND)
|
|
||||||
if (NOT LIBMAGIC_FIND_QUIETLY)
|
|
||||||
message (STATUS "Found components for LIBMAGIC")
|
|
||||||
message (STATUS "LIBMAGIC_ROOT_DIR = ${LIBMAGIC_ROOT_DIR}")
|
|
||||||
message (STATUS "LIBMAGIC_INCLUDES = ${LIBMAGIC_INCLUDES}")
|
|
||||||
message (STATUS "LIBMAGIC_LIBRARIES = ${LIBMAGIC_LIBRARIES}")
|
|
||||||
endif (NOT LIBMAGIC_FIND_QUIETLY)
|
|
||||||
else (LIBMAGIC_FOUND)
|
|
||||||
if (LIBMAGIC_FIND_REQUIRED)
|
|
||||||
message (FATAL_ERROR "Could not find LIBMAGIC!")
|
|
||||||
endif (LIBMAGIC_FIND_REQUIRED)
|
|
||||||
endif (LIBMAGIC_FOUND)
|
|
||||||
|
|
||||||
##____________________________________________________________________________
|
|
||||||
## Mark advanced variables
|
|
||||||
|
|
||||||
mark_as_advanced (
|
|
||||||
LIBMAGIC_ROOT_DIR
|
|
||||||
LIBMAGIC_INCLUDES
|
|
||||||
LIBMAGIC_LIBRARIES
|
|
||||||
)
|
|
||||||
|
|
||||||
endif (NOT LIBMAGIC_FOUND)
|
|
||||||
478
CMakeModules/FindOpenSSL.cmake
vendored
478
CMakeModules/FindOpenSSL.cmake
vendored
@@ -1,478 +0,0 @@
|
|||||||
# Distributed under the OSI-approved BSD 3-Clause License. See accompanying
|
|
||||||
# file Copyright.txt or https://cmake.org/licensing for details.
|
|
||||||
|
|
||||||
macro(_OpenSSL_test_and_find_dependencies ssl_library crypto_library)
|
|
||||||
if((CMAKE_SYSTEM_NAME STREQUAL "Linux") AND
|
|
||||||
(("${ssl_library}" MATCHES "\\${CMAKE_STATIC_LIBRARY_SUFFIX}$") OR
|
|
||||||
("${crypto_library}" MATCHES "\\${CMAKE_STATIC_LIBRARY_SUFFIX}$")))
|
|
||||||
set(_OpenSSL_has_dependencies TRUE)
|
|
||||||
find_package(Threads)
|
|
||||||
else()
|
|
||||||
set(_OpenSSL_has_dependencies FALSE)
|
|
||||||
endif()
|
|
||||||
endmacro()
|
|
||||||
|
|
||||||
function(_OpenSSL_add_dependencies libraries_var library)
|
|
||||||
if(CMAKE_THREAD_LIBS_INIT)
|
|
||||||
list(APPEND ${libraries_var} ${CMAKE_THREAD_LIBS_INIT})
|
|
||||||
endif()
|
|
||||||
list(APPEND ${libraries_var} ${CMAKE_DL_LIBS})
|
|
||||||
set(${libraries_var} ${${libraries_var}} PARENT_SCOPE)
|
|
||||||
endfunction()
|
|
||||||
|
|
||||||
function(_OpenSSL_target_add_dependencies target)
|
|
||||||
if(_OpenSSL_has_dependencies)
|
|
||||||
set_property( TARGET ${target} APPEND PROPERTY INTERFACE_LINK_LIBRARIES Threads::Threads )
|
|
||||||
set_property( TARGET ${target} APPEND PROPERTY INTERFACE_LINK_LIBRARIES ${CMAKE_DL_LIBS} )
|
|
||||||
endif()
|
|
||||||
endfunction()
|
|
||||||
|
|
||||||
if (UNIX)
|
|
||||||
find_package(PkgConfig QUIET)
|
|
||||||
pkg_check_modules(_OPENSSL QUIET openssl)
|
|
||||||
endif ()
|
|
||||||
|
|
||||||
# Support preference of static libs by adjusting CMAKE_FIND_LIBRARY_SUFFIXES
|
|
||||||
if(OPENSSL_USE_STATIC_LIBS)
|
|
||||||
set(_openssl_ORIG_CMAKE_FIND_LIBRARY_SUFFIXES ${CMAKE_FIND_LIBRARY_SUFFIXES})
|
|
||||||
if(WIN32)
|
|
||||||
set(CMAKE_FIND_LIBRARY_SUFFIXES .lib .a ${CMAKE_FIND_LIBRARY_SUFFIXES})
|
|
||||||
else()
|
|
||||||
set(CMAKE_FIND_LIBRARY_SUFFIXES .a )
|
|
||||||
endif()
|
|
||||||
endif()
|
|
||||||
|
|
||||||
if (WIN32)
|
|
||||||
# http://www.slproweb.com/products/Win32OpenSSL.html
|
|
||||||
set(_OPENSSL_ROOT_HINTS
|
|
||||||
${OPENSSL_ROOT_DIR}
|
|
||||||
"[HKEY_LOCAL_MACHINE\\SOFTWARE\\Microsoft\\Windows\\CurrentVersion\\Uninstall\\OpenSSL (32-bit)_is1;Inno Setup: App Path]"
|
|
||||||
"[HKEY_LOCAL_MACHINE\\SOFTWARE\\Microsoft\\Windows\\CurrentVersion\\Uninstall\\OpenSSL (64-bit)_is1;Inno Setup: App Path]"
|
|
||||||
ENV OPENSSL_ROOT_DIR
|
|
||||||
)
|
|
||||||
file(TO_CMAKE_PATH "$ENV{PROGRAMFILES}" _programfiles)
|
|
||||||
set(_OPENSSL_ROOT_PATHS
|
|
||||||
"${_programfiles}/OpenSSL"
|
|
||||||
"${_programfiles}/OpenSSL-Win32"
|
|
||||||
"${_programfiles}/OpenSSL-Win64"
|
|
||||||
"C:/OpenSSL/"
|
|
||||||
"C:/OpenSSL-Win32/"
|
|
||||||
"C:/OpenSSL-Win64/"
|
|
||||||
)
|
|
||||||
unset(_programfiles)
|
|
||||||
else ()
|
|
||||||
set(_OPENSSL_ROOT_HINTS
|
|
||||||
${OPENSSL_ROOT_DIR}
|
|
||||||
ENV OPENSSL_ROOT_DIR
|
|
||||||
)
|
|
||||||
endif ()
|
|
||||||
|
|
||||||
set(_OPENSSL_ROOT_HINTS_AND_PATHS
|
|
||||||
HINTS ${_OPENSSL_ROOT_HINTS}
|
|
||||||
PATHS ${_OPENSSL_ROOT_PATHS}
|
|
||||||
)
|
|
||||||
|
|
||||||
find_path(OPENSSL_INCLUDE_DIR
|
|
||||||
NAMES
|
|
||||||
openssl/ssl.h
|
|
||||||
${_OPENSSL_ROOT_HINTS_AND_PATHS}
|
|
||||||
HINTS
|
|
||||||
${_OPENSSL_INCLUDEDIR}
|
|
||||||
PATH_SUFFIXES
|
|
||||||
include
|
|
||||||
)
|
|
||||||
|
|
||||||
if(WIN32 AND NOT CYGWIN)
|
|
||||||
if(MSVC)
|
|
||||||
# /MD and /MDd are the standard values - if someone wants to use
|
|
||||||
# others, the libnames have to change here too
|
|
||||||
# use also ssl and ssleay32 in debug as fallback for openssl < 0.9.8b
|
|
||||||
# enable OPENSSL_MSVC_STATIC_RT to get the libs build /MT (Multithreaded no-DLL)
|
|
||||||
# In Visual C++ naming convention each of these four kinds of Windows libraries has it's standard suffix:
|
|
||||||
# * MD for dynamic-release
|
|
||||||
# * MDd for dynamic-debug
|
|
||||||
# * MT for static-release
|
|
||||||
# * MTd for static-debug
|
|
||||||
|
|
||||||
# Implementation details:
|
|
||||||
# We are using the libraries located in the VC subdir instead of the parent directory even though :
|
|
||||||
# libeay32MD.lib is identical to ../libeay32.lib, and
|
|
||||||
# ssleay32MD.lib is identical to ../ssleay32.lib
|
|
||||||
# enable OPENSSL_USE_STATIC_LIBS to use the static libs located in lib/VC/static
|
|
||||||
|
|
||||||
if (OPENSSL_MSVC_STATIC_RT)
|
|
||||||
set(_OPENSSL_MSVC_RT_MODE "MT")
|
|
||||||
else ()
|
|
||||||
set(_OPENSSL_MSVC_RT_MODE "MD")
|
|
||||||
endif ()
|
|
||||||
|
|
||||||
# Since OpenSSL 1.1, lib names are like libcrypto32MTd.lib and libssl32MTd.lib
|
|
||||||
if( "${CMAKE_SIZEOF_VOID_P}" STREQUAL "8" )
|
|
||||||
set(_OPENSSL_MSVC_ARCH_SUFFIX "64")
|
|
||||||
else()
|
|
||||||
set(_OPENSSL_MSVC_ARCH_SUFFIX "32")
|
|
||||||
endif()
|
|
||||||
|
|
||||||
if(OPENSSL_USE_STATIC_LIBS)
|
|
||||||
set(_OPENSSL_PATH_SUFFIXES
|
|
||||||
"lib/VC/static"
|
|
||||||
"VC/static"
|
|
||||||
"lib"
|
|
||||||
)
|
|
||||||
else()
|
|
||||||
set(_OPENSSL_PATH_SUFFIXES
|
|
||||||
"lib/VC"
|
|
||||||
"VC"
|
|
||||||
"lib"
|
|
||||||
)
|
|
||||||
endif ()
|
|
||||||
|
|
||||||
find_library(LIB_EAY_DEBUG
|
|
||||||
NAMES
|
|
||||||
libcrypto${_OPENSSL_MSVC_ARCH_SUFFIX}${_OPENSSL_MSVC_RT_MODE}d
|
|
||||||
libcrypto${_OPENSSL_MSVC_RT_MODE}d
|
|
||||||
libcryptod
|
|
||||||
libeay32${_OPENSSL_MSVC_RT_MODE}d
|
|
||||||
libeay32d
|
|
||||||
cryptod
|
|
||||||
NAMES_PER_DIR
|
|
||||||
${_OPENSSL_ROOT_HINTS_AND_PATHS}
|
|
||||||
PATH_SUFFIXES
|
|
||||||
${_OPENSSL_PATH_SUFFIXES}
|
|
||||||
)
|
|
||||||
|
|
||||||
find_library(LIB_EAY_RELEASE
|
|
||||||
NAMES
|
|
||||||
libcrypto${_OPENSSL_MSVC_ARCH_SUFFIX}${_OPENSSL_MSVC_RT_MODE}
|
|
||||||
libcrypto${_OPENSSL_MSVC_RT_MODE}
|
|
||||||
libcrypto
|
|
||||||
libeay32${_OPENSSL_MSVC_RT_MODE}
|
|
||||||
libeay32
|
|
||||||
crypto
|
|
||||||
NAMES_PER_DIR
|
|
||||||
${_OPENSSL_ROOT_HINTS_AND_PATHS}
|
|
||||||
PATH_SUFFIXES
|
|
||||||
${_OPENSSL_PATH_SUFFIXES}
|
|
||||||
)
|
|
||||||
|
|
||||||
find_library(SSL_EAY_DEBUG
|
|
||||||
NAMES
|
|
||||||
libssl${_OPENSSL_MSVC_ARCH_SUFFIX}${_OPENSSL_MSVC_RT_MODE}d
|
|
||||||
libssl${_OPENSSL_MSVC_RT_MODE}d
|
|
||||||
libssld
|
|
||||||
ssleay32${_OPENSSL_MSVC_RT_MODE}d
|
|
||||||
ssleay32d
|
|
||||||
ssld
|
|
||||||
NAMES_PER_DIR
|
|
||||||
${_OPENSSL_ROOT_HINTS_AND_PATHS}
|
|
||||||
PATH_SUFFIXES
|
|
||||||
${_OPENSSL_PATH_SUFFIXES}
|
|
||||||
)
|
|
||||||
|
|
||||||
find_library(SSL_EAY_RELEASE
|
|
||||||
NAMES
|
|
||||||
libssl${_OPENSSL_MSVC_ARCH_SUFFIX}${_OPENSSL_MSVC_RT_MODE}
|
|
||||||
libssl${_OPENSSL_MSVC_RT_MODE}
|
|
||||||
libssl
|
|
||||||
ssleay32${_OPENSSL_MSVC_RT_MODE}
|
|
||||||
ssleay32
|
|
||||||
ssl
|
|
||||||
NAMES_PER_DIR
|
|
||||||
${_OPENSSL_ROOT_HINTS_AND_PATHS}
|
|
||||||
PATH_SUFFIXES
|
|
||||||
${_OPENSSL_PATH_SUFFIXES}
|
|
||||||
)
|
|
||||||
|
|
||||||
set(LIB_EAY_LIBRARY_DEBUG "${LIB_EAY_DEBUG}")
|
|
||||||
set(LIB_EAY_LIBRARY_RELEASE "${LIB_EAY_RELEASE}")
|
|
||||||
set(SSL_EAY_LIBRARY_DEBUG "${SSL_EAY_DEBUG}")
|
|
||||||
set(SSL_EAY_LIBRARY_RELEASE "${SSL_EAY_RELEASE}")
|
|
||||||
|
|
||||||
include(${CMAKE_CURRENT_LIST_DIR}/SelectLibraryConfigurations.cmake)
|
|
||||||
select_library_configurations(LIB_EAY)
|
|
||||||
select_library_configurations(SSL_EAY)
|
|
||||||
|
|
||||||
mark_as_advanced(LIB_EAY_LIBRARY_DEBUG LIB_EAY_LIBRARY_RELEASE
|
|
||||||
SSL_EAY_LIBRARY_DEBUG SSL_EAY_LIBRARY_RELEASE)
|
|
||||||
set(OPENSSL_SSL_LIBRARY ${SSL_EAY_LIBRARY} )
|
|
||||||
set(OPENSSL_CRYPTO_LIBRARY ${LIB_EAY_LIBRARY} )
|
|
||||||
elseif(MINGW)
|
|
||||||
# same player, for MinGW
|
|
||||||
set(LIB_EAY_NAMES crypto libeay32)
|
|
||||||
set(SSL_EAY_NAMES ssl ssleay32)
|
|
||||||
find_library(LIB_EAY
|
|
||||||
NAMES
|
|
||||||
${LIB_EAY_NAMES}
|
|
||||||
NAMES_PER_DIR
|
|
||||||
${_OPENSSL_ROOT_HINTS_AND_PATHS}
|
|
||||||
PATH_SUFFIXES
|
|
||||||
"lib/MinGW"
|
|
||||||
"lib"
|
|
||||||
)
|
|
||||||
|
|
||||||
find_library(SSL_EAY
|
|
||||||
NAMES
|
|
||||||
${SSL_EAY_NAMES}
|
|
||||||
NAMES_PER_DIR
|
|
||||||
${_OPENSSL_ROOT_HINTS_AND_PATHS}
|
|
||||||
PATH_SUFFIXES
|
|
||||||
"lib/MinGW"
|
|
||||||
"lib"
|
|
||||||
)
|
|
||||||
|
|
||||||
mark_as_advanced(SSL_EAY LIB_EAY)
|
|
||||||
set(OPENSSL_SSL_LIBRARY ${SSL_EAY} )
|
|
||||||
set(OPENSSL_CRYPTO_LIBRARY ${LIB_EAY} )
|
|
||||||
unset(LIB_EAY_NAMES)
|
|
||||||
unset(SSL_EAY_NAMES)
|
|
||||||
else()
|
|
||||||
# Not sure what to pick for -say- intel, let's use the toplevel ones and hope someone report issues:
|
|
||||||
find_library(LIB_EAY
|
|
||||||
NAMES
|
|
||||||
libcrypto
|
|
||||||
libeay32
|
|
||||||
NAMES_PER_DIR
|
|
||||||
${_OPENSSL_ROOT_HINTS_AND_PATHS}
|
|
||||||
HINTS
|
|
||||||
${_OPENSSL_LIBDIR}
|
|
||||||
PATH_SUFFIXES
|
|
||||||
lib
|
|
||||||
)
|
|
||||||
|
|
||||||
find_library(SSL_EAY
|
|
||||||
NAMES
|
|
||||||
libssl
|
|
||||||
ssleay32
|
|
||||||
NAMES_PER_DIR
|
|
||||||
${_OPENSSL_ROOT_HINTS_AND_PATHS}
|
|
||||||
HINTS
|
|
||||||
${_OPENSSL_LIBDIR}
|
|
||||||
PATH_SUFFIXES
|
|
||||||
lib
|
|
||||||
)
|
|
||||||
|
|
||||||
mark_as_advanced(SSL_EAY LIB_EAY)
|
|
||||||
set(OPENSSL_SSL_LIBRARY ${SSL_EAY} )
|
|
||||||
set(OPENSSL_CRYPTO_LIBRARY ${LIB_EAY} )
|
|
||||||
endif()
|
|
||||||
else()
|
|
||||||
|
|
||||||
find_library(OPENSSL_SSL_LIBRARY
|
|
||||||
NAMES
|
|
||||||
ssl
|
|
||||||
ssleay32
|
|
||||||
ssleay32MD
|
|
||||||
NAMES_PER_DIR
|
|
||||||
${_OPENSSL_ROOT_HINTS_AND_PATHS}
|
|
||||||
HINTS
|
|
||||||
${_OPENSSL_LIBDIR}
|
|
||||||
PATH_SUFFIXES
|
|
||||||
lib
|
|
||||||
)
|
|
||||||
|
|
||||||
find_library(OPENSSL_CRYPTO_LIBRARY
|
|
||||||
NAMES
|
|
||||||
crypto
|
|
||||||
NAMES_PER_DIR
|
|
||||||
${_OPENSSL_ROOT_HINTS_AND_PATHS}
|
|
||||||
HINTS
|
|
||||||
${_OPENSSL_LIBDIR}
|
|
||||||
PATH_SUFFIXES
|
|
||||||
lib
|
|
||||||
)
|
|
||||||
|
|
||||||
mark_as_advanced(OPENSSL_CRYPTO_LIBRARY OPENSSL_SSL_LIBRARY)
|
|
||||||
|
|
||||||
endif()
|
|
||||||
|
|
||||||
# compat defines
|
|
||||||
set(OPENSSL_SSL_LIBRARIES ${OPENSSL_SSL_LIBRARY})
|
|
||||||
set(OPENSSL_CRYPTO_LIBRARIES ${OPENSSL_CRYPTO_LIBRARY})
|
|
||||||
_OpenSSL_test_and_find_dependencies("${OPENSSL_SSL_LIBRARY}" "${OPENSSL_CRYPTO_LIBRARY}")
|
|
||||||
if(_OpenSSL_has_dependencies)
|
|
||||||
_OpenSSL_add_dependencies( OPENSSL_SSL_LIBRARIES "${OPENSSL_SSL_LIBRARY}" )
|
|
||||||
_OpenSSL_add_dependencies( OPENSSL_CRYPTO_LIBRARIES "${OPENSSL_CRYPTO_LIBRARY}" )
|
|
||||||
endif()
|
|
||||||
|
|
||||||
function(from_hex HEX DEC)
|
|
||||||
string(TOUPPER "${HEX}" HEX)
|
|
||||||
set(_res 0)
|
|
||||||
string(LENGTH "${HEX}" _strlen)
|
|
||||||
|
|
||||||
while (_strlen GREATER 0)
|
|
||||||
math(EXPR _res "${_res} * 16")
|
|
||||||
string(SUBSTRING "${HEX}" 0 1 NIBBLE)
|
|
||||||
string(SUBSTRING "${HEX}" 1 -1 HEX)
|
|
||||||
if (NIBBLE STREQUAL "A")
|
|
||||||
math(EXPR _res "${_res} + 10")
|
|
||||||
elseif (NIBBLE STREQUAL "B")
|
|
||||||
math(EXPR _res "${_res} + 11")
|
|
||||||
elseif (NIBBLE STREQUAL "C")
|
|
||||||
math(EXPR _res "${_res} + 12")
|
|
||||||
elseif (NIBBLE STREQUAL "D")
|
|
||||||
math(EXPR _res "${_res} + 13")
|
|
||||||
elseif (NIBBLE STREQUAL "E")
|
|
||||||
math(EXPR _res "${_res} + 14")
|
|
||||||
elseif (NIBBLE STREQUAL "F")
|
|
||||||
math(EXPR _res "${_res} + 15")
|
|
||||||
else()
|
|
||||||
math(EXPR _res "${_res} + ${NIBBLE}")
|
|
||||||
endif()
|
|
||||||
|
|
||||||
string(LENGTH "${HEX}" _strlen)
|
|
||||||
endwhile()
|
|
||||||
|
|
||||||
set(${DEC} ${_res} PARENT_SCOPE)
|
|
||||||
endfunction()
|
|
||||||
|
|
||||||
if(OPENSSL_INCLUDE_DIR AND EXISTS "${OPENSSL_INCLUDE_DIR}/openssl/opensslv.h")
|
|
||||||
file(STRINGS "${OPENSSL_INCLUDE_DIR}/openssl/opensslv.h" openssl_version_str
|
|
||||||
REGEX "^#[\t ]*define[\t ]+OPENSSL_VERSION_NUMBER[\t ]+0x([0-9a-fA-F])+.*")
|
|
||||||
|
|
||||||
if(openssl_version_str)
|
|
||||||
# The version number is encoded as 0xMNNFFPPS: major minor fix patch status
|
|
||||||
# The status gives if this is a developer or prerelease and is ignored here.
|
|
||||||
# Major, minor, and fix directly translate into the version numbers shown in
|
|
||||||
# the string. The patch field translates to the single character suffix that
|
|
||||||
# indicates the bug fix state, which 00 -> nothing, 01 -> a, 02 -> b and so
|
|
||||||
# on.
|
|
||||||
|
|
||||||
string(REGEX REPLACE "^.*OPENSSL_VERSION_NUMBER[\t ]+0x([0-9a-fA-F])([0-9a-fA-F][0-9a-fA-F])([0-9a-fA-F][0-9a-fA-F])([0-9a-fA-F][0-9a-fA-F])([0-9a-fA-F]).*$"
|
|
||||||
"\\1;\\2;\\3;\\4;\\5" OPENSSL_VERSION_LIST "${openssl_version_str}")
|
|
||||||
list(GET OPENSSL_VERSION_LIST 0 OPENSSL_VERSION_MAJOR)
|
|
||||||
list(GET OPENSSL_VERSION_LIST 1 OPENSSL_VERSION_MINOR)
|
|
||||||
from_hex("${OPENSSL_VERSION_MINOR}" OPENSSL_VERSION_MINOR)
|
|
||||||
list(GET OPENSSL_VERSION_LIST 2 OPENSSL_VERSION_FIX)
|
|
||||||
from_hex("${OPENSSL_VERSION_FIX}" OPENSSL_VERSION_FIX)
|
|
||||||
list(GET OPENSSL_VERSION_LIST 3 OPENSSL_VERSION_PATCH)
|
|
||||||
|
|
||||||
if (NOT OPENSSL_VERSION_PATCH STREQUAL "00")
|
|
||||||
from_hex("${OPENSSL_VERSION_PATCH}" _tmp)
|
|
||||||
# 96 is the ASCII code of 'a' minus 1
|
|
||||||
math(EXPR OPENSSL_VERSION_PATCH_ASCII "${_tmp} + 96")
|
|
||||||
unset(_tmp)
|
|
||||||
# Once anyone knows how OpenSSL would call the patch versions beyond 'z'
|
|
||||||
# this should be updated to handle that, too. This has not happened yet
|
|
||||||
# so it is simply ignored here for now.
|
|
||||||
string(ASCII "${OPENSSL_VERSION_PATCH_ASCII}" OPENSSL_VERSION_PATCH_STRING)
|
|
||||||
endif ()
|
|
||||||
|
|
||||||
set(OPENSSL_VERSION "${OPENSSL_VERSION_MAJOR}.${OPENSSL_VERSION_MINOR}.${OPENSSL_VERSION_FIX}${OPENSSL_VERSION_PATCH_STRING}")
|
|
||||||
endif ()
|
|
||||||
endif ()
|
|
||||||
|
|
||||||
set(OPENSSL_LIBRARIES ${OPENSSL_SSL_LIBRARIES} ${OPENSSL_CRYPTO_LIBRARIES} )
|
|
||||||
list(REMOVE_DUPLICATES OPENSSL_LIBRARIES)
|
|
||||||
|
|
||||||
foreach(_comp IN LISTS OpenSSL_FIND_COMPONENTS)
|
|
||||||
if(_comp STREQUAL "Crypto")
|
|
||||||
if(EXISTS "${OPENSSL_INCLUDE_DIR}" AND
|
|
||||||
(EXISTS "${OPENSSL_CRYPTO_LIBRARY}" OR
|
|
||||||
EXISTS "${LIB_EAY_LIBRARY_DEBUG}" OR
|
|
||||||
EXISTS "${LIB_EAY_LIBRARY_RELEASE}")
|
|
||||||
)
|
|
||||||
set(OpenSSL_${_comp}_FOUND TRUE)
|
|
||||||
else()
|
|
||||||
set(OpenSSL_${_comp}_FOUND FALSE)
|
|
||||||
endif()
|
|
||||||
elseif(_comp STREQUAL "SSL")
|
|
||||||
if(EXISTS "${OPENSSL_INCLUDE_DIR}" AND
|
|
||||||
(EXISTS "${OPENSSL_SSL_LIBRARY}" OR
|
|
||||||
EXISTS "${SSL_EAY_LIBRARY_DEBUG}" OR
|
|
||||||
EXISTS "${SSL_EAY_LIBRARY_RELEASE}")
|
|
||||||
)
|
|
||||||
set(OpenSSL_${_comp}_FOUND TRUE)
|
|
||||||
else()
|
|
||||||
set(OpenSSL_${_comp}_FOUND FALSE)
|
|
||||||
endif()
|
|
||||||
else()
|
|
||||||
message(WARNING "${_comp} is not a valid OpenSSL component")
|
|
||||||
set(OpenSSL_${_comp}_FOUND FALSE)
|
|
||||||
endif()
|
|
||||||
endforeach()
|
|
||||||
unset(_comp)
|
|
||||||
|
|
||||||
include(${CMAKE_CURRENT_LIST_DIR}/FindPackageHandleStandardArgs.cmake)
|
|
||||||
find_package_handle_standard_args(OpenSSL
|
|
||||||
REQUIRED_VARS
|
|
||||||
OPENSSL_CRYPTO_LIBRARY
|
|
||||||
OPENSSL_INCLUDE_DIR
|
|
||||||
VERSION_VAR
|
|
||||||
OPENSSL_VERSION
|
|
||||||
HANDLE_COMPONENTS
|
|
||||||
FAIL_MESSAGE
|
|
||||||
"Could NOT find OpenSSL, try to set the path to OpenSSL root folder in the system variable OPENSSL_ROOT_DIR"
|
|
||||||
)
|
|
||||||
|
|
||||||
mark_as_advanced(OPENSSL_INCLUDE_DIR OPENSSL_LIBRARIES)
|
|
||||||
|
|
||||||
if(OPENSSL_FOUND)
|
|
||||||
if(NOT TARGET OpenSSL::Crypto AND
|
|
||||||
(EXISTS "${OPENSSL_CRYPTO_LIBRARY}" OR
|
|
||||||
EXISTS "${LIB_EAY_LIBRARY_DEBUG}" OR
|
|
||||||
EXISTS "${LIB_EAY_LIBRARY_RELEASE}")
|
|
||||||
)
|
|
||||||
add_library(OpenSSL::Crypto UNKNOWN IMPORTED)
|
|
||||||
set_target_properties(OpenSSL::Crypto PROPERTIES
|
|
||||||
INTERFACE_INCLUDE_DIRECTORIES "${OPENSSL_INCLUDE_DIR}")
|
|
||||||
if(EXISTS "${OPENSSL_CRYPTO_LIBRARY}")
|
|
||||||
set_target_properties(OpenSSL::Crypto PROPERTIES
|
|
||||||
IMPORTED_LINK_INTERFACE_LANGUAGES "C"
|
|
||||||
IMPORTED_LOCATION "${OPENSSL_CRYPTO_LIBRARY}")
|
|
||||||
endif()
|
|
||||||
if(EXISTS "${LIB_EAY_LIBRARY_RELEASE}")
|
|
||||||
set_property(TARGET OpenSSL::Crypto APPEND PROPERTY
|
|
||||||
IMPORTED_CONFIGURATIONS RELEASE)
|
|
||||||
set_target_properties(OpenSSL::Crypto PROPERTIES
|
|
||||||
IMPORTED_LINK_INTERFACE_LANGUAGES_RELEASE "C"
|
|
||||||
IMPORTED_LOCATION_RELEASE "${LIB_EAY_LIBRARY_RELEASE}")
|
|
||||||
endif()
|
|
||||||
if(EXISTS "${LIB_EAY_LIBRARY_DEBUG}")
|
|
||||||
set_property(TARGET OpenSSL::Crypto APPEND PROPERTY
|
|
||||||
IMPORTED_CONFIGURATIONS DEBUG)
|
|
||||||
set_target_properties(OpenSSL::Crypto PROPERTIES
|
|
||||||
IMPORTED_LINK_INTERFACE_LANGUAGES_DEBUG "C"
|
|
||||||
IMPORTED_LOCATION_DEBUG "${LIB_EAY_LIBRARY_DEBUG}")
|
|
||||||
endif()
|
|
||||||
_OpenSSL_target_add_dependencies(OpenSSL::Crypto)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
if(NOT TARGET OpenSSL::SSL AND
|
|
||||||
(EXISTS "${OPENSSL_SSL_LIBRARY}" OR
|
|
||||||
EXISTS "${SSL_EAY_LIBRARY_DEBUG}" OR
|
|
||||||
EXISTS "${SSL_EAY_LIBRARY_RELEASE}")
|
|
||||||
)
|
|
||||||
add_library(OpenSSL::SSL UNKNOWN IMPORTED)
|
|
||||||
set_target_properties(OpenSSL::SSL PROPERTIES
|
|
||||||
INTERFACE_INCLUDE_DIRECTORIES "${OPENSSL_INCLUDE_DIR}")
|
|
||||||
if(EXISTS "${OPENSSL_SSL_LIBRARY}")
|
|
||||||
set_target_properties(OpenSSL::SSL PROPERTIES
|
|
||||||
IMPORTED_LINK_INTERFACE_LANGUAGES "C"
|
|
||||||
IMPORTED_LOCATION "${OPENSSL_SSL_LIBRARY}")
|
|
||||||
endif()
|
|
||||||
if(EXISTS "${SSL_EAY_LIBRARY_RELEASE}")
|
|
||||||
set_property(TARGET OpenSSL::SSL APPEND PROPERTY
|
|
||||||
IMPORTED_CONFIGURATIONS RELEASE)
|
|
||||||
set_target_properties(OpenSSL::SSL PROPERTIES
|
|
||||||
IMPORTED_LINK_INTERFACE_LANGUAGES_RELEASE "C"
|
|
||||||
IMPORTED_LOCATION_RELEASE "${SSL_EAY_LIBRARY_RELEASE}")
|
|
||||||
endif()
|
|
||||||
if(EXISTS "${SSL_EAY_LIBRARY_DEBUG}")
|
|
||||||
set_property(TARGET OpenSSL::SSL APPEND PROPERTY
|
|
||||||
IMPORTED_CONFIGURATIONS DEBUG)
|
|
||||||
set_target_properties(OpenSSL::SSL PROPERTIES
|
|
||||||
IMPORTED_LINK_INTERFACE_LANGUAGES_DEBUG "C"
|
|
||||||
IMPORTED_LOCATION_DEBUG "${SSL_EAY_LIBRARY_DEBUG}")
|
|
||||||
endif()
|
|
||||||
if(TARGET OpenSSL::Crypto)
|
|
||||||
set_target_properties(OpenSSL::SSL PROPERTIES
|
|
||||||
INTERFACE_LINK_LIBRARIES OpenSSL::Crypto)
|
|
||||||
endif()
|
|
||||||
_OpenSSL_target_add_dependencies(OpenSSL::SSL)
|
|
||||||
endif()
|
|
||||||
endif()
|
|
||||||
|
|
||||||
# Restore the original find library ordering
|
|
||||||
if(OPENSSL_USE_STATIC_LIBS)
|
|
||||||
set(CMAKE_FIND_LIBRARY_SUFFIXES ${_openssl_ORIG_CMAKE_FIND_LIBRARY_SUFFIXES})
|
|
||||||
endif()
|
|
||||||
268
CMakeModules/FindPackageHandleStandardArgs.cmake
vendored
268
CMakeModules/FindPackageHandleStandardArgs.cmake
vendored
@@ -1,268 +0,0 @@
|
|||||||
# Distributed under the OSI-approved BSD 3-Clause License. See accompanying
|
|
||||||
# file Copyright.txt or https://cmake.org/licensing for details.
|
|
||||||
|
|
||||||
include(${CMAKE_CURRENT_LIST_DIR}/FindPackageMessage.cmake)
|
|
||||||
|
|
||||||
# internal helper macro
|
|
||||||
macro(_FPHSA_FAILURE_MESSAGE _msg)
|
|
||||||
set (__msg "${_msg}")
|
|
||||||
if (FPHSA_REASON_FAILURE_MESSAGE)
|
|
||||||
string(APPEND __msg "\n Reason given by package: ${FPHSA_REASON_FAILURE_MESSAGE}\n")
|
|
||||||
endif()
|
|
||||||
if (${_NAME}_FIND_REQUIRED)
|
|
||||||
message(FATAL_ERROR "${__msg}")
|
|
||||||
else ()
|
|
||||||
if (NOT ${_NAME}_FIND_QUIETLY)
|
|
||||||
message(STATUS "${__msg}")
|
|
||||||
endif ()
|
|
||||||
endif ()
|
|
||||||
endmacro()
|
|
||||||
|
|
||||||
|
|
||||||
# internal helper macro to generate the failure message when used in CONFIG_MODE:
|
|
||||||
macro(_FPHSA_HANDLE_FAILURE_CONFIG_MODE)
|
|
||||||
# <PackageName>_CONFIG is set, but FOUND is false, this means that some other of the REQUIRED_VARS was not found:
|
|
||||||
if(${_NAME}_CONFIG)
|
|
||||||
_FPHSA_FAILURE_MESSAGE("${FPHSA_FAIL_MESSAGE}: missing:${MISSING_VARS} (found ${${_NAME}_CONFIG} ${VERSION_MSG})")
|
|
||||||
else()
|
|
||||||
# If _CONSIDERED_CONFIGS is set, the config-file has been found, but no suitable version.
|
|
||||||
# List them all in the error message:
|
|
||||||
if(${_NAME}_CONSIDERED_CONFIGS)
|
|
||||||
set(configsText "")
|
|
||||||
list(LENGTH ${_NAME}_CONSIDERED_CONFIGS configsCount)
|
|
||||||
math(EXPR configsCount "${configsCount} - 1")
|
|
||||||
foreach(currentConfigIndex RANGE ${configsCount})
|
|
||||||
list(GET ${_NAME}_CONSIDERED_CONFIGS ${currentConfigIndex} filename)
|
|
||||||
list(GET ${_NAME}_CONSIDERED_VERSIONS ${currentConfigIndex} version)
|
|
||||||
string(APPEND configsText "\n ${filename} (version ${version})")
|
|
||||||
endforeach()
|
|
||||||
if (${_NAME}_NOT_FOUND_MESSAGE)
|
|
||||||
if (FPHSA_REASON_FAILURE_MESSAGE)
|
|
||||||
string(PREPEND FPHSA_REASON_FAILURE_MESSAGE "${${_NAME}_NOT_FOUND_MESSAGE}\n ")
|
|
||||||
else()
|
|
||||||
set(FPHSA_REASON_FAILURE_MESSAGE "${${_NAME}_NOT_FOUND_MESSAGE}")
|
|
||||||
endif()
|
|
||||||
else()
|
|
||||||
string(APPEND configsText "\n")
|
|
||||||
endif()
|
|
||||||
_FPHSA_FAILURE_MESSAGE("${FPHSA_FAIL_MESSAGE} ${VERSION_MSG}, checked the following files:${configsText}")
|
|
||||||
|
|
||||||
else()
|
|
||||||
# Simple case: No Config-file was found at all:
|
|
||||||
_FPHSA_FAILURE_MESSAGE("${FPHSA_FAIL_MESSAGE}: found neither ${_NAME}Config.cmake nor ${_NAME_LOWER}-config.cmake ${VERSION_MSG}")
|
|
||||||
endif()
|
|
||||||
endif()
|
|
||||||
endmacro()
|
|
||||||
|
|
||||||
|
|
||||||
function(FIND_PACKAGE_HANDLE_STANDARD_ARGS _NAME _FIRST_ARG)
|
|
||||||
|
|
||||||
# Set up the arguments for `cmake_parse_arguments`.
|
|
||||||
set(options CONFIG_MODE HANDLE_COMPONENTS)
|
|
||||||
set(oneValueArgs FAIL_MESSAGE REASON_FAILURE_MESSAGE VERSION_VAR FOUND_VAR)
|
|
||||||
set(multiValueArgs REQUIRED_VARS)
|
|
||||||
|
|
||||||
# Check whether we are in 'simple' or 'extended' mode:
|
|
||||||
set(_KEYWORDS_FOR_EXTENDED_MODE ${options} ${oneValueArgs} ${multiValueArgs} )
|
|
||||||
list(FIND _KEYWORDS_FOR_EXTENDED_MODE "${_FIRST_ARG}" INDEX)
|
|
||||||
|
|
||||||
if(${INDEX} EQUAL -1)
|
|
||||||
set(FPHSA_FAIL_MESSAGE ${_FIRST_ARG})
|
|
||||||
set(FPHSA_REQUIRED_VARS ${ARGN})
|
|
||||||
set(FPHSA_VERSION_VAR)
|
|
||||||
else()
|
|
||||||
cmake_parse_arguments(FPHSA "${options}" "${oneValueArgs}" "${multiValueArgs}" ${_FIRST_ARG} ${ARGN})
|
|
||||||
|
|
||||||
if(FPHSA_UNPARSED_ARGUMENTS)
|
|
||||||
message(FATAL_ERROR "Unknown keywords given to FIND_PACKAGE_HANDLE_STANDARD_ARGS(): \"${FPHSA_UNPARSED_ARGUMENTS}\"")
|
|
||||||
endif()
|
|
||||||
|
|
||||||
if(NOT FPHSA_FAIL_MESSAGE)
|
|
||||||
set(FPHSA_FAIL_MESSAGE "DEFAULT_MSG")
|
|
||||||
endif()
|
|
||||||
|
|
||||||
# In config-mode, we rely on the variable <PackageName>_CONFIG, which is set by find_package()
|
|
||||||
# when it successfully found the config-file, including version checking:
|
|
||||||
if(FPHSA_CONFIG_MODE)
|
|
||||||
list(INSERT FPHSA_REQUIRED_VARS 0 ${_NAME}_CONFIG)
|
|
||||||
list(REMOVE_DUPLICATES FPHSA_REQUIRED_VARS)
|
|
||||||
set(FPHSA_VERSION_VAR ${_NAME}_VERSION)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
if(NOT FPHSA_REQUIRED_VARS)
|
|
||||||
message(FATAL_ERROR "No REQUIRED_VARS specified for FIND_PACKAGE_HANDLE_STANDARD_ARGS()")
|
|
||||||
endif()
|
|
||||||
endif()
|
|
||||||
|
|
||||||
# now that we collected all arguments, process them
|
|
||||||
|
|
||||||
if("x${FPHSA_FAIL_MESSAGE}" STREQUAL "xDEFAULT_MSG")
|
|
||||||
set(FPHSA_FAIL_MESSAGE "Could NOT find ${_NAME}")
|
|
||||||
endif()
|
|
||||||
|
|
||||||
list(GET FPHSA_REQUIRED_VARS 0 _FIRST_REQUIRED_VAR)
|
|
||||||
|
|
||||||
string(TOUPPER ${_NAME} _NAME_UPPER)
|
|
||||||
string(TOLOWER ${_NAME} _NAME_LOWER)
|
|
||||||
|
|
||||||
if(FPHSA_FOUND_VAR)
|
|
||||||
if(FPHSA_FOUND_VAR MATCHES "^${_NAME}_FOUND$" OR FPHSA_FOUND_VAR MATCHES "^${_NAME_UPPER}_FOUND$")
|
|
||||||
set(_FOUND_VAR ${FPHSA_FOUND_VAR})
|
|
||||||
else()
|
|
||||||
message(FATAL_ERROR "The argument for FOUND_VAR is \"${FPHSA_FOUND_VAR}\", but only \"${_NAME}_FOUND\" and \"${_NAME_UPPER}_FOUND\" are valid names.")
|
|
||||||
endif()
|
|
||||||
else()
|
|
||||||
set(_FOUND_VAR ${_NAME_UPPER}_FOUND)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
# collect all variables which were not found, so they can be printed, so the
|
|
||||||
# user knows better what went wrong (#6375)
|
|
||||||
set(MISSING_VARS "")
|
|
||||||
set(DETAILS "")
|
|
||||||
# check if all passed variables are valid
|
|
||||||
set(FPHSA_FOUND_${_NAME} TRUE)
|
|
||||||
foreach(_CURRENT_VAR ${FPHSA_REQUIRED_VARS})
|
|
||||||
if(NOT ${_CURRENT_VAR})
|
|
||||||
set(FPHSA_FOUND_${_NAME} FALSE)
|
|
||||||
string(APPEND MISSING_VARS " ${_CURRENT_VAR}")
|
|
||||||
else()
|
|
||||||
string(APPEND DETAILS "[${${_CURRENT_VAR}}]")
|
|
||||||
endif()
|
|
||||||
endforeach()
|
|
||||||
if(FPHSA_FOUND_${_NAME})
|
|
||||||
set(${_NAME}_FOUND TRUE)
|
|
||||||
set(${_NAME_UPPER}_FOUND TRUE)
|
|
||||||
else()
|
|
||||||
set(${_NAME}_FOUND FALSE)
|
|
||||||
set(${_NAME_UPPER}_FOUND FALSE)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
# component handling
|
|
||||||
unset(FOUND_COMPONENTS_MSG)
|
|
||||||
unset(MISSING_COMPONENTS_MSG)
|
|
||||||
|
|
||||||
if(FPHSA_HANDLE_COMPONENTS)
|
|
||||||
foreach(comp ${${_NAME}_FIND_COMPONENTS})
|
|
||||||
if(${_NAME}_${comp}_FOUND)
|
|
||||||
|
|
||||||
if(NOT DEFINED FOUND_COMPONENTS_MSG)
|
|
||||||
set(FOUND_COMPONENTS_MSG "found components:")
|
|
||||||
endif()
|
|
||||||
string(APPEND FOUND_COMPONENTS_MSG " ${comp}")
|
|
||||||
|
|
||||||
else()
|
|
||||||
|
|
||||||
if(NOT DEFINED MISSING_COMPONENTS_MSG)
|
|
||||||
set(MISSING_COMPONENTS_MSG "missing components:")
|
|
||||||
endif()
|
|
||||||
string(APPEND MISSING_COMPONENTS_MSG " ${comp}")
|
|
||||||
|
|
||||||
if(${_NAME}_FIND_REQUIRED_${comp})
|
|
||||||
set(${_NAME}_FOUND FALSE)
|
|
||||||
string(APPEND MISSING_VARS " ${comp}")
|
|
||||||
endif()
|
|
||||||
|
|
||||||
endif()
|
|
||||||
endforeach()
|
|
||||||
set(COMPONENT_MSG "${FOUND_COMPONENTS_MSG} ${MISSING_COMPONENTS_MSG}")
|
|
||||||
string(APPEND DETAILS "[c${COMPONENT_MSG}]")
|
|
||||||
endif()
|
|
||||||
|
|
||||||
# version handling:
|
|
||||||
set(VERSION_MSG "")
|
|
||||||
set(VERSION_OK TRUE)
|
|
||||||
|
|
||||||
# check with DEFINED here as the requested or found version may be "0"
|
|
||||||
if (DEFINED ${_NAME}_FIND_VERSION)
|
|
||||||
if(DEFINED ${FPHSA_VERSION_VAR})
|
|
||||||
set(_FOUND_VERSION ${${FPHSA_VERSION_VAR}})
|
|
||||||
|
|
||||||
if(${_NAME}_FIND_VERSION_EXACT) # exact version required
|
|
||||||
# count the dots in the version string
|
|
||||||
string(REGEX REPLACE "[^.]" "" _VERSION_DOTS "${_FOUND_VERSION}")
|
|
||||||
# add one dot because there is one dot more than there are components
|
|
||||||
string(LENGTH "${_VERSION_DOTS}." _VERSION_DOTS)
|
|
||||||
if (_VERSION_DOTS GREATER ${_NAME}_FIND_VERSION_COUNT)
|
|
||||||
# Because of the C++ implementation of find_package() ${_NAME}_FIND_VERSION_COUNT
|
|
||||||
# is at most 4 here. Therefore a simple lookup table is used.
|
|
||||||
if (${_NAME}_FIND_VERSION_COUNT EQUAL 1)
|
|
||||||
set(_VERSION_REGEX "[^.]*")
|
|
||||||
elseif (${_NAME}_FIND_VERSION_COUNT EQUAL 2)
|
|
||||||
set(_VERSION_REGEX "[^.]*\\.[^.]*")
|
|
||||||
elseif (${_NAME}_FIND_VERSION_COUNT EQUAL 3)
|
|
||||||
set(_VERSION_REGEX "[^.]*\\.[^.]*\\.[^.]*")
|
|
||||||
else ()
|
|
||||||
set(_VERSION_REGEX "[^.]*\\.[^.]*\\.[^.]*\\.[^.]*")
|
|
||||||
endif ()
|
|
||||||
string(REGEX REPLACE "^(${_VERSION_REGEX})\\..*" "\\1" _VERSION_HEAD "${_FOUND_VERSION}")
|
|
||||||
unset(_VERSION_REGEX)
|
|
||||||
if (NOT ${_NAME}_FIND_VERSION VERSION_EQUAL _VERSION_HEAD)
|
|
||||||
set(VERSION_MSG "Found unsuitable version \"${_FOUND_VERSION}\", but required is exact version \"${${_NAME}_FIND_VERSION}\"")
|
|
||||||
set(VERSION_OK FALSE)
|
|
||||||
else ()
|
|
||||||
set(VERSION_MSG "(found suitable exact version \"${_FOUND_VERSION}\")")
|
|
||||||
endif ()
|
|
||||||
unset(_VERSION_HEAD)
|
|
||||||
else ()
|
|
||||||
if (NOT ${_NAME}_FIND_VERSION VERSION_EQUAL _FOUND_VERSION)
|
|
||||||
set(VERSION_MSG "Found unsuitable version \"${_FOUND_VERSION}\", but required is exact version \"${${_NAME}_FIND_VERSION}\"")
|
|
||||||
set(VERSION_OK FALSE)
|
|
||||||
else ()
|
|
||||||
set(VERSION_MSG "(found suitable exact version \"${_FOUND_VERSION}\")")
|
|
||||||
endif ()
|
|
||||||
endif ()
|
|
||||||
unset(_VERSION_DOTS)
|
|
||||||
|
|
||||||
else() # minimum version specified:
|
|
||||||
if (${_NAME}_FIND_VERSION VERSION_GREATER _FOUND_VERSION)
|
|
||||||
set(VERSION_MSG "Found unsuitable version \"${_FOUND_VERSION}\", but required is at least \"${${_NAME}_FIND_VERSION}\"")
|
|
||||||
set(VERSION_OK FALSE)
|
|
||||||
else ()
|
|
||||||
set(VERSION_MSG "(found suitable version \"${_FOUND_VERSION}\", minimum required is \"${${_NAME}_FIND_VERSION}\")")
|
|
||||||
endif ()
|
|
||||||
endif()
|
|
||||||
|
|
||||||
else()
|
|
||||||
|
|
||||||
# if the package was not found, but a version was given, add that to the output:
|
|
||||||
if(${_NAME}_FIND_VERSION_EXACT)
|
|
||||||
set(VERSION_MSG "(Required is exact version \"${${_NAME}_FIND_VERSION}\")")
|
|
||||||
else()
|
|
||||||
set(VERSION_MSG "(Required is at least version \"${${_NAME}_FIND_VERSION}\")")
|
|
||||||
endif()
|
|
||||||
|
|
||||||
endif()
|
|
||||||
else ()
|
|
||||||
# Check with DEFINED as the found version may be 0.
|
|
||||||
if(DEFINED ${FPHSA_VERSION_VAR})
|
|
||||||
set(VERSION_MSG "(found version \"${${FPHSA_VERSION_VAR}}\")")
|
|
||||||
endif()
|
|
||||||
endif ()
|
|
||||||
|
|
||||||
if(VERSION_OK)
|
|
||||||
string(APPEND DETAILS "[v${${FPHSA_VERSION_VAR}}(${${_NAME}_FIND_VERSION})]")
|
|
||||||
else()
|
|
||||||
set(${_NAME}_FOUND FALSE)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
|
|
||||||
# print the result:
|
|
||||||
if (${_NAME}_FOUND)
|
|
||||||
FIND_PACKAGE_MESSAGE(${_NAME} "Found ${_NAME}: ${${_FIRST_REQUIRED_VAR}} ${VERSION_MSG} ${COMPONENT_MSG}" "${DETAILS}")
|
|
||||||
else ()
|
|
||||||
|
|
||||||
if(FPHSA_CONFIG_MODE)
|
|
||||||
_FPHSA_HANDLE_FAILURE_CONFIG_MODE()
|
|
||||||
else()
|
|
||||||
if(NOT VERSION_OK)
|
|
||||||
_FPHSA_FAILURE_MESSAGE("${FPHSA_FAIL_MESSAGE}: ${VERSION_MSG} (found ${${_FIRST_REQUIRED_VAR}})")
|
|
||||||
else()
|
|
||||||
_FPHSA_FAILURE_MESSAGE("${FPHSA_FAIL_MESSAGE} (missing:${MISSING_VARS}) ${VERSION_MSG}")
|
|
||||||
endif()
|
|
||||||
endif()
|
|
||||||
|
|
||||||
endif ()
|
|
||||||
|
|
||||||
set(${_NAME}_FOUND ${${_NAME}_FOUND} PARENT_SCOPE)
|
|
||||||
set(${_NAME_UPPER}_FOUND ${${_NAME}_FOUND} PARENT_SCOPE)
|
|
||||||
endfunction()
|
|
||||||
48
CMakeModules/FindPackageMessage.cmake
vendored
48
CMakeModules/FindPackageMessage.cmake
vendored
@@ -1,48 +0,0 @@
|
|||||||
# Distributed under the OSI-approved BSD 3-Clause License. See accompanying
|
|
||||||
# file Copyright.txt or https://cmake.org/licensing for details.
|
|
||||||
|
|
||||||
#[=======================================================================[.rst:
|
|
||||||
FindPackageMessage
|
|
||||||
------------------
|
|
||||||
|
|
||||||
.. code-block:: cmake
|
|
||||||
|
|
||||||
find_package_message(<name> "message for user" "find result details")
|
|
||||||
|
|
||||||
This function is intended to be used in FindXXX.cmake modules files.
|
|
||||||
It will print a message once for each unique find result. This is
|
|
||||||
useful for telling the user where a package was found. The first
|
|
||||||
argument specifies the name (XXX) of the package. The second argument
|
|
||||||
specifies the message to display. The third argument lists details
|
|
||||||
about the find result so that if they change the message will be
|
|
||||||
displayed again. The macro also obeys the QUIET argument to the
|
|
||||||
find_package command.
|
|
||||||
|
|
||||||
Example:
|
|
||||||
|
|
||||||
.. code-block:: cmake
|
|
||||||
|
|
||||||
if(X11_FOUND)
|
|
||||||
find_package_message(X11 "Found X11: ${X11_X11_LIB}"
|
|
||||||
"[${X11_X11_LIB}][${X11_INCLUDE_DIR}]")
|
|
||||||
else()
|
|
||||||
...
|
|
||||||
endif()
|
|
||||||
#]=======================================================================]
|
|
||||||
|
|
||||||
function(find_package_message pkg msg details)
|
|
||||||
# Avoid printing a message repeatedly for the same find result.
|
|
||||||
if(NOT ${pkg}_FIND_QUIETLY)
|
|
||||||
string(REPLACE "\n" "" details "${details}")
|
|
||||||
set(DETAILS_VAR FIND_PACKAGE_MESSAGE_DETAILS_${pkg})
|
|
||||||
if(NOT "${details}" STREQUAL "${${DETAILS_VAR}}")
|
|
||||||
# The message has not yet been printed.
|
|
||||||
message(STATUS "${msg}")
|
|
||||||
|
|
||||||
# Save the find details in the cache to avoid printing the same
|
|
||||||
# message again.
|
|
||||||
set("${DETAILS_VAR}" "${details}"
|
|
||||||
CACHE INTERNAL "Details about finding ${pkg}")
|
|
||||||
endif()
|
|
||||||
endif()
|
|
||||||
endfunction()
|
|
||||||
@@ -3,7 +3,7 @@ MAINTAINER simon987 <me@simon987.net>
|
|||||||
|
|
||||||
RUN apt update
|
RUN apt update
|
||||||
RUN apt install -y libglib2.0-0 libcurl4 libmagic1 libharfbuzz-bin libopenjp2-7 libarchive13 liblzma5 libzstd1 liblz4-1 \
|
RUN apt install -y libglib2.0-0 libcurl4 libmagic1 libharfbuzz-bin libopenjp2-7 libarchive13 liblzma5 libzstd1 liblz4-1 \
|
||||||
curl libtiff5 libpng16-16
|
curl libtiff5 libpng16-16 libpcre3
|
||||||
|
|
||||||
RUN mkdir -p /usr/share/tessdata && \
|
RUN mkdir -p /usr/share/tessdata && \
|
||||||
cd /usr/share/tessdata/ && \
|
cd /usr/share/tessdata/ && \
|
||||||
@@ -16,4 +16,7 @@ RUN mkdir -p /usr/share/tessdata && \
|
|||||||
|
|
||||||
ADD sist2 /root/sist2
|
ADD sist2 /root/sist2
|
||||||
|
|
||||||
|
ENV LANG C.UTF-8
|
||||||
|
ENV LC_ALL C.UTF-8
|
||||||
|
|
||||||
ENTRYPOINT ["/root/sist2"]
|
ENTRYPOINT ["/root/sist2"]
|
||||||
|
|||||||
@@ -1,10 +1,14 @@
|
|||||||
rm ./sist2
|
rm ./sist2 sist2_debug
|
||||||
cp ../sist2 .
|
cp ../sist2.gz .
|
||||||
|
gzip -d sist2.gz
|
||||||
strip sist2
|
strip sist2
|
||||||
|
|
||||||
version=$(./sist2 --version)
|
version=$(./sist2 --version)
|
||||||
|
|
||||||
echo "Version ${version}"
|
echo "Version ${version}"
|
||||||
docker build . -t simon987/sist2:${version} -t simon987/sist2:latest
|
docker build . -t simon987/sist2:${version} -t simon987/sist2:latest
|
||||||
|
|
||||||
docker push simon987/sist2:${version}
|
docker push simon987/sist2:${version}
|
||||||
docker push simon987/sist2:latest
|
docker push simon987/sist2:latest
|
||||||
|
|
||||||
|
docker run --rm simon987/sist2 -v
|
||||||
22
DockerArm64/Dockerfile
Normal file
22
DockerArm64/Dockerfile
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
FROM ubuntu:19.10
|
||||||
|
MAINTAINER simon987 <me@simon987.net>
|
||||||
|
|
||||||
|
RUN apt update
|
||||||
|
RUN apt install -y libglib2.0-0 libcurl4 libmagic1 libharfbuzz-bin libopenjp2-7 libarchive13 liblzma5 libzstd1 liblz4-1 \
|
||||||
|
curl libtiff5 libpng16-16 libpcre3
|
||||||
|
|
||||||
|
RUN mkdir -p /usr/share/tessdata && \
|
||||||
|
cd /usr/share/tessdata/ && \
|
||||||
|
curl -o /usr/share/tessdata/hin.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/hin.traineddata &&\
|
||||||
|
curl -o /usr/share/tessdata/jpn.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/jpn.traineddata &&\
|
||||||
|
curl -o /usr/share/tessdata/eng.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/eng.traineddata &&\
|
||||||
|
curl -o /usr/share/tessdata/fra.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/fra.traineddata &&\
|
||||||
|
curl -o /usr/share/tessdata/rus.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/rus.traineddata &&\
|
||||||
|
curl -o /usr/share/tessdata/spa.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/spa.traineddata && ls -lh
|
||||||
|
|
||||||
|
ADD sist2_arm64 /root/sist2
|
||||||
|
|
||||||
|
ENV LANG C.UTF-8
|
||||||
|
ENV LC_ALL C.UTF-8
|
||||||
|
|
||||||
|
ENTRYPOINT ["/root/sist2"]
|
||||||
13
DockerArm64/build.sh
Executable file
13
DockerArm64/build.sh
Executable file
@@ -0,0 +1,13 @@
|
|||||||
|
rm ./sist2_arm64
|
||||||
|
cp ../sist2_arm64.gz .
|
||||||
|
gzip -d sist2_arm64.gz
|
||||||
|
|
||||||
|
version=$(./sist2_arm64 --version)
|
||||||
|
|
||||||
|
echo "Version ${version}"
|
||||||
|
docker build . -t simon987/sist2-arm64:"${version}" -t simon987/sist2-arm64:latest
|
||||||
|
|
||||||
|
docker push simon987/sist2-arm64:"${version}"
|
||||||
|
docker push simon987/sist2-arm64:latest
|
||||||
|
|
||||||
|
docker run --rm simon987/sist2-arm64 -v
|
||||||
122
README.md
122
README.md
@@ -2,101 +2,90 @@
|
|||||||
[](https://www.codefactor.io/repository/github/simon987/sist2)
|
[](https://www.codefactor.io/repository/github/simon987/sist2)
|
||||||
[/statusIcon)](https://files.simon987.net/artifacts/Sist2/Build/)
|
[/statusIcon)](https://files.simon987.net/artifacts/Sist2/Build/)
|
||||||
|
|
||||||
|
**Demo**: [sist2.simon987.net](https://sist2.simon987.net/)
|
||||||
|
|
||||||
# sist2
|
# sist2
|
||||||
|
|
||||||
sist2 (Simple incremental search tool)
|
sist2 (Simple incremental search tool)
|
||||||
|
|
||||||
*Warning: sist2 is in early development*
|
*Warning: sist2 is in early development*
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
## Features
|
## Features
|
||||||
|
|
||||||
* Fast, low memory usage, multi-threaded
|
* Fast, low memory usage, multi-threaded
|
||||||
|
* Mobile-friendly Web interface
|
||||||
* Portable (all its features are packaged in a single executable)
|
* Portable (all its features are packaged in a single executable)
|
||||||
* Extracts text from common file types \*
|
* Extracts text and metadata from common file types \*
|
||||||
* Generates thumbnails \*
|
* Generates thumbnails \*
|
||||||
* Incremental scanning
|
* Incremental scanning
|
||||||
* Automatic tagging from file attributes via [user scripts](scripting/README.md)
|
* Manual tagging from the UI and automatic tagging based on file attributes via [user scripts](docs/scripting.md)
|
||||||
* Recursive scan inside archive files \*\*
|
* Recursive scan inside archive files \*\*
|
||||||
* OCR support with tesseract \*\*\*
|
* OCR support with tesseract \*\*\*
|
||||||
|
* Stats page & disk utilisation visualization
|
||||||
|
|
||||||
|
|
||||||
\* See [format support](#format-support)
|
\* See [format support](#format-support)
|
||||||
\*\* See [Archive files](#archive-files)
|
\*\* See [Archive files](#archive-files)
|
||||||
\*\*\* See [OCR](#ocr)
|
\*\*\* See [OCR](#ocr)
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
|
||||||
## Getting Started
|
## Getting Started
|
||||||
|
|
||||||
1. Have an [Elasticsearch](https://www.elastic.co/downloads/elasticsearch) instance running
|
1. Have an Elasticsearch (>= 6.X.X) instance running
|
||||||
1.
|
1. Download [from official website](https://www.elastic.co/downloads/elasticsearch)
|
||||||
|
1. *(or)* Run using docker:
|
||||||
|
```bash
|
||||||
|
docker run -d --name es1 --net sist2_net -p 9200:9200 \
|
||||||
|
-e "discovery.type=single-node" elasticsearch:7.5.2
|
||||||
|
```
|
||||||
|
1. *(or)* Run using docker-compose:
|
||||||
|
```yaml
|
||||||
|
elasticsearch:
|
||||||
|
image: docker.elastic.co/elasticsearch/elasticsearch:7.5.2
|
||||||
|
environment:
|
||||||
|
- discovery.type=single-node
|
||||||
|
- "ES_JAVA_OPTS=-Xms1G -Xmx2G"
|
||||||
|
```
|
||||||
|
1. Download sist2 executable
|
||||||
1. Download the [latest sist2 release](https://github.com/simon987/sist2/releases) *
|
1. Download the [latest sist2 release](https://github.com/simon987/sist2/releases) *
|
||||||
1. *(or)* Download an [development snapshot](https://files.simon987.net/artifacts/Sist2/Build/) *(Not recommended!)*
|
1. *(or)* Download a [development snapshot](https://files.simon987.net/artifacts/Sist2/Build/) *(Not recommended!)*
|
||||||
1. *(or)* `docker pull simon987/sist2:latest`
|
1. *(or)* `docker pull simon987/sist2:latest`
|
||||||
|
|
||||||
|
1. See [Usage guide](docs/USAGE.md)
|
||||||
|
|
||||||
|
|
||||||
\* *Windows users*: **sist2** runs under [WSL](https://en.wikipedia.org/wiki/Windows_Subsystem_for_Linux)
|
\* *Windows users*: **sist2** runs under [WSL](https://en.wikipedia.org/wiki/Windows_Subsystem_for_Linux)
|
||||||
\* *Mac users*: See [#1](https://github.com/simon987/sist2/issues/1)
|
|
||||||
|
|
||||||
|
|
||||||
## Example usage
|
## Example usage
|
||||||
|
|
||||||
See help page `sist2 --help` for more details.
|
See [Usage guide](docs/USAGE.md) for more details
|
||||||
|
|
||||||
**Scan a directory**
|
1. Scan a directory: `sist2 scan ~/Documents -o ./docs_idx`
|
||||||
```bash
|
1. Push index to Elasticsearch: `sist2 index ./docs_idx`
|
||||||
sist2 scan ~/Documents -o ./orig_idx/
|
1. Start web interface: `sist2 web ./docs_idx`
|
||||||
sist2 scan --threads 4 --content-size 16384 /mnt/Pictures
|
|
||||||
sist2 scan --incremental ./orig_idx/ -o ./updated_idx/ ~/Documents
|
|
||||||
```
|
|
||||||
|
|
||||||
**Push index to Elasticsearch or file**
|
|
||||||
```bash
|
|
||||||
sist2 index --force-reset ./my_idx
|
|
||||||
sist2 index --print ./my_idx > raw_documents.ndjson
|
|
||||||
```
|
|
||||||
|
|
||||||
**Start web interface**
|
|
||||||
```bash
|
|
||||||
sist2 web --bind 0.0.0.0 --port 4321 ./my_idx1 ./my_idx2 ./my_idx3
|
|
||||||
```
|
|
||||||
|
|
||||||
### Use sist2 with docker
|
|
||||||
|
|
||||||
**scan**
|
|
||||||
```bash
|
|
||||||
docker run -it \
|
|
||||||
-v /path/to/files/:/files \
|
|
||||||
-v $PWD/out/:/out \
|
|
||||||
simon987/sist2 scan -t 4 /files -o /out/my_idx1
|
|
||||||
```
|
|
||||||
**index**
|
|
||||||
```bash
|
|
||||||
docker run -it --network host\
|
|
||||||
-v $PWD/out/:/out \
|
|
||||||
simon987/sist2 index /out/my_idx1
|
|
||||||
```
|
|
||||||
|
|
||||||
**web**
|
|
||||||
```bash
|
|
||||||
docker run --rm --network host -d --name sist2\
|
|
||||||
-v $PWD/out/my_idx:/idx \
|
|
||||||
-v $PWD/my/files:/files
|
|
||||||
simon987/sist2 web --bind 0.0.0.0 /idx
|
|
||||||
docker stop sist2
|
|
||||||
```
|
|
||||||
|
|
||||||
|
|
||||||
## Format support
|
## Format support
|
||||||
|
|
||||||
File type | Library | Content | Thumbnail | Metadata
|
File type | Library | Content | Thumbnail | Metadata
|
||||||
:---|:---|:---|:---|:---
|
:---|:---|:---|:---|:---
|
||||||
pdf,xps,cbz,fb2,epub | MuPDF | text+ocr | yes, `png` | title |
|
pdf,xps,fb2,epub | MuPDF | text+ocr | yes | title |
|
||||||
`audio/*` | ffmpeg | - | yes, `jpeg` | ID3 tags |
|
cbz,cbr | *(none)* | - | yes | - |
|
||||||
`video/*` | ffmpeg | - | yes, `jpeg` | title, comment, artist |
|
`audio/*` | ffmpeg | - | yes | ID3 tags |
|
||||||
`image/*` | ffmpeg | - | yes, `jpeg` | `EXIF:Artist`, `EXIF:ImageDescription` |
|
`video/*` | ffmpeg | - | yes | title, comment, artist |
|
||||||
|
`image/*` | ffmpeg | - | yes | [Common EXIF tags](https://github.com/simon987/sist2/blob/efdde2734eca9b14a54f84568863b7ffd59bdba3/src/parsing/media.c#L190) |
|
||||||
|
raw, rw2, dng, cr2, crw, dcr, k25, kdc, mrw, pef, xf3, arw, sr2, srf, erf | LibRaw | - | yes | Common EXIF tags |
|
||||||
ttf,ttc,cff,woff,fnt,otf | Freetype2 | - | yes, `bmp` | Name & style |
|
ttf,ttc,cff,woff,fnt,otf | Freetype2 | - | yes, `bmp` | Name & style |
|
||||||
`text/plain` | *(none)* | yes | no | - |
|
`text/plain` | *(none)* | yes | no | - |
|
||||||
|
html, xml | *(none)* | yes | no | - |
|
||||||
tar, zip, rar, 7z, ar ... | Libarchive | yes\* | - | no |
|
tar, zip, rar, 7z, ar ... | Libarchive | yes\* | - | no |
|
||||||
docx, xlsx, pptx | libOPC | yes | no | no |
|
docx, xlsx, pptx | *(none)* | yes | if embedded | creator, modified_by, title |
|
||||||
|
mobi, azw, azw3 | libmobi | yes | no | author, title |
|
||||||
|
|
||||||
\* *See [Archive files](#archive-files)*
|
\* *See [Archive files](#archive-files)*
|
||||||
|
|
||||||
@@ -106,22 +95,20 @@ they were directly in the file system. Recursive (archives inside archives)
|
|||||||
scan is also supported.
|
scan is also supported.
|
||||||
|
|
||||||
**Limitations**:
|
**Limitations**:
|
||||||
* Parsing media files with formats that require
|
* Support for parsing media files with formats that require *seek* (e.g. `.gif`, `.mp4` w/ fragmented metadata etc.)
|
||||||
*seek* (e.g. `.gif`, `.mp4` w/ fragmented metadata etc.) is not supported.
|
is limitted (see `--mem-buffer` option)
|
||||||
* Archive files are scanned sequentially, by a single thread. On systems where
|
* Archive files are scanned sequentially, by a single thread. On systems where
|
||||||
**sist2** is not I/O bound, scans might be faster when larger archives are split
|
**sist2** is not I/O bound, scans might be faster when larger archives are split
|
||||||
into smaller parts.
|
into smaller parts.
|
||||||
|
|
||||||
To check if a media file can be parsed without *seek*, execute `cat file.mp4 | ffprobe -`
|
|
||||||
|
|
||||||
|
|
||||||
### OCR
|
### OCR
|
||||||
|
|
||||||
You can enable OCR support for pdf,xps,cbz,fb2,epub file types with the
|
You can enable OCR support for pdf,xps,fb2,epub file types with the
|
||||||
`--ocr <lang>` option. Download the language data files with your
|
`--ocr <lang>` option. Download the language data files with your
|
||||||
package manager (`apt install tesseract-ocr-eng`) or directly [from Github](https://github.com/tesseract-ocr/tesseract/wiki/Data-Files).
|
package manager (`apt install tesseract-ocr-eng`) or directly [from Github](https://github.com/tesseract-ocr/tesseract/wiki/Data-Files).
|
||||||
|
|
||||||
The `simon987/sist2` github image comes with common languages
|
The `simon987/sist2` image comes with common languages
|
||||||
(hin, jpn, eng, fra, rus, spa) pre-installed.
|
(hin, jpn, eng, fra, rus, spa) pre-installed.
|
||||||
|
|
||||||
Examples
|
Examples
|
||||||
@@ -134,22 +121,17 @@ sist2 scan --ocr eng ~/Books/Textbooks/
|
|||||||
## Build from source
|
## Build from source
|
||||||
|
|
||||||
You can compile **sist2** by yourself if you don't want to use the pre-compiled
|
You can compile **sist2** by yourself if you don't want to use the pre-compiled
|
||||||
binaries.
|
binaries (GCC 7+ required).
|
||||||
|
|
||||||
1. Install compile-time dependencies
|
1. Install compile-time dependencies
|
||||||
|
|
||||||
*(Debian)*
|
```bash
|
||||||
```bash
|
vcpkg install lmdb cjson glib libarchive[core,bzip2,libxml2,lz4,lzma,lzo] pthread tesseract libxml2 ffmpeg zstd gtest mongoose libuuid libmagic libraw curl[core,ssl] jbig2dec brotli libmupdf
|
||||||
apt install git cmake pkg-config libglib2.0-dev \
|
|
||||||
libssl-dev uuid-dev python3 libmagic-dev libfreetype6-dev \
|
|
||||||
libcurl-dev libbz2-dev yasm libharfbuzz-dev ragel \
|
|
||||||
libarchive-dev libtiff5 libpng16-16 libpango1.0-dev
|
|
||||||
```
|
```
|
||||||
|
|
||||||
2. Build
|
2. Build
|
||||||
```bash
|
```bash
|
||||||
git clone --recurse-submodules https://github.com/simon987/sist2
|
git clone --recursive https://github.com/simon987/sist2/
|
||||||
./scripts/get_static_libs.sh
|
cmake -DCMAKE_TOOLCHAIN_FILE=<VCPKG_ROOT>/scripts/buildsystems/vcpkg.cmake .
|
||||||
cmake .
|
|
||||||
make
|
make
|
||||||
```
|
```
|
||||||
|
|||||||
1
argparse
1
argparse
Submodule argparse deleted from fafc503d23
1
cJSON
1
cJSON
Submodule cJSON deleted from 2d4ad84192
17
ci/build.sh
Normal file → Executable file
17
ci/build.sh
Normal file → Executable file
@@ -1,8 +1,17 @@
|
|||||||
#!/usr/bin/env bash
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
./scripts/get_static_libs.sh
|
VCPKG_ROOT="/vcpkg"
|
||||||
|
|
||||||
cmake .
|
rm *.gz
|
||||||
make
|
|
||||||
|
rm -rf CMakeFiles CMakeCache.txt
|
||||||
|
cmake -DSIST_DEBUG=off -DVCPKG_BUILD_TYPE=release -DCMAKE_TOOLCHAIN_FILE="${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" .
|
||||||
|
make -j 12
|
||||||
strip sist2
|
strip sist2
|
||||||
strip sist2_scan
|
gzip -9 sist2
|
||||||
|
|
||||||
|
rm -rf CMakeFiles CMakeCache.txt
|
||||||
|
cmake -DSIST_DEBUG=on -DVCPKG_BUILD_TYPE=debug -DCMAKE_TOOLCHAIN_FILE="${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" .
|
||||||
|
make -j 12
|
||||||
|
cp /usr/lib/x86_64-linux-gnu/libasan.so.2.0.0 libasan.so.2
|
||||||
|
tar -czf sist2_debug.tar.gz sist2_debug libasan.so.2
|
||||||
|
|||||||
12
ci/build_arm64.sh
Executable file
12
ci/build_arm64.sh
Executable file
@@ -0,0 +1,12 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
VCPKG_ROOT="/vcpkg"
|
||||||
|
|
||||||
|
rm *.gz
|
||||||
|
|
||||||
|
rm -rf CMakeFiles CMakeCache.txt
|
||||||
|
cmake -DSIST_DEBUG=off -DCMAKE_TOOLCHAIN_FILE="${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" .
|
||||||
|
make -j 4
|
||||||
|
strip sist2
|
||||||
|
mv sist2 sist2_arm64
|
||||||
|
gzip -9 sist2_arm64
|
||||||
351
docs/USAGE.md
Normal file
351
docs/USAGE.md
Normal file
@@ -0,0 +1,351 @@
|
|||||||
|
# Usage
|
||||||
|
|
||||||
|
*More examples (specifically with docker/compose) are in progress*
|
||||||
|
|
||||||
|
* [scan](#scan)
|
||||||
|
* [options](#scan-options)
|
||||||
|
* [examples](#scan-examples)
|
||||||
|
* [index format](#index-format)
|
||||||
|
* [index](#index)
|
||||||
|
* [options](#index-options)
|
||||||
|
* [examples](#index-examples)
|
||||||
|
* [web](#web)
|
||||||
|
* [options](#web-options)
|
||||||
|
* [examples](#web-examples)
|
||||||
|
* [rewrite_url](#rewrite_url)
|
||||||
|
* [link to specific indices](#link-to-specific-indices)
|
||||||
|
* [exec-script](#exec-script)
|
||||||
|
* [tagging](#tagging)
|
||||||
|
|
||||||
|
```
|
||||||
|
Usage: sist2 scan [OPTION]... PATH
|
||||||
|
or: sist2 index [OPTION]... INDEX
|
||||||
|
or: sist2 web [OPTION]... INDEX...
|
||||||
|
or: sist2 exec-script [OPTION]... INDEX
|
||||||
|
Lightning-fast file system indexer and search tool.
|
||||||
|
|
||||||
|
-h, --help show this help message and exit
|
||||||
|
-v, --version Show version and exit
|
||||||
|
--verbose Turn on logging
|
||||||
|
--very-verbose Turn on debug messages
|
||||||
|
|
||||||
|
Scan options
|
||||||
|
-t, --threads=<int> Number of threads. DEFAULT=1
|
||||||
|
-q, --quality=<flt> Thumbnail quality, on a scale of 1.0 to 31.0, 1.0 being the best. DEFAULT=5
|
||||||
|
--size=<int> Thumbnail size, in pixels. Use negative value to disable. DEFAULT=500
|
||||||
|
--content-size=<int> Number of bytes to be extracted from text documents. Use negative value to disable. DEFAULT=32768
|
||||||
|
--incremental=<str> Reuse an existing index and only scan modified files.
|
||||||
|
-o, --output=<str> Output directory. DEFAULT=index.sist2/
|
||||||
|
--rewrite-url=<str> Serve files from this url instead of from disk.
|
||||||
|
--name=<str> Index display name. DEFAULT: (name of the directory)
|
||||||
|
--depth=<int> Scan up to DEPTH subdirectories deep. Use 0 to only scan files in PATH. DEFAULT: -1
|
||||||
|
--archive=<str> Archive file mode (skip|list|shallow|recurse). skip: Don't parse, list: only get file names as text, shallow: Don't parse archives inside archives. DEFAULT: recurse
|
||||||
|
--ocr=<str> Tesseract language (use tesseract --list-langs to see which are installed on your machine)
|
||||||
|
-e, --exclude=<str> Files that match this regex will not be scanned
|
||||||
|
--fast Only index file names & mime type
|
||||||
|
--treemap-threshold=<str> Relative size threshold for treemap (see USAGE.md). DEFAULT: 0.0005
|
||||||
|
--mem-buffer=<int> Maximum memory buffer size per thread in MB for files inside archives (see USAGE.md). DEFAULT: 2000
|
||||||
|
|
||||||
|
Index options
|
||||||
|
-t, --threads=<int> Number of threads. DEFAULT=1
|
||||||
|
--es-url=<str> Elasticsearch url with port. DEFAULT=http://localhost:9200
|
||||||
|
--es-index=<str> Elasticsearch index name. DEFAULT=sist2
|
||||||
|
-p, --print Just print JSON documents to stdout.
|
||||||
|
--script-file=<str> Path to user script.
|
||||||
|
--async-script Execute user script asynchronously.
|
||||||
|
--batch-size=<int> Index batch size. DEFAULT: 100
|
||||||
|
-f, --force-reset Reset Elasticsearch mappings and settings. (You must use this option the first time you use the index command)
|
||||||
|
|
||||||
|
Web options
|
||||||
|
--es-url=<str> Elasticsearch url. DEFAULT=http://localhost:9200
|
||||||
|
--es-index=<str> Elasticsearch index name. DEFAULT=sist2
|
||||||
|
--bind=<str> Listen on this address. DEFAULT=localhost:4090
|
||||||
|
--auth=<str> Basic auth in user:password format
|
||||||
|
--tag-auth=<str> Basic auth in user:password format for tagging
|
||||||
|
|
||||||
|
Exec-script options
|
||||||
|
--es-url=<str> Elasticsearch url. DEFAULT=http://localhost:9200
|
||||||
|
--es-index=<str> Elasticsearch index name. DEFAULT=sist2
|
||||||
|
--script-file=<str> Path to user script.
|
||||||
|
--async-script Execute user script asynchronously.
|
||||||
|
Made by simon987 <me@simon987.net>. Released under GPL-3.0
|
||||||
|
```
|
||||||
|
|
||||||
|
## Scan
|
||||||
|
|
||||||
|
### Scan options
|
||||||
|
|
||||||
|
* `-t, --threads`
|
||||||
|
Number of threads for file parsing. **Do not set a number higher than `$(nproc)` or `$(Get-WmiObject Win32_ComputerSystem).NumberOfLogicalProcessors` in Windows!**
|
||||||
|
* `-q, --quality`
|
||||||
|
Thumbnail quality, on a scale of 1.0 to 31.0, 1.0 being the best. *Does not affect PDF thumbnails quality*
|
||||||
|
* `--size`
|
||||||
|
Thumbnail size in pixels.
|
||||||
|
* `--content-size`
|
||||||
|
Number of bytes of text to be extracted from the content of files (plain text and PDFs).
|
||||||
|
Repeated whitespace and special characters do not count toward this limit.
|
||||||
|
* `--incremental`
|
||||||
|
Specify an existing index. Information about files in this index that were not modified (based on *mtime* attribute)
|
||||||
|
will be copied to the new index and will not be parsed again.
|
||||||
|
* `-o, --output` Output directory.
|
||||||
|
* `--rewrite-url` Set the `rewrite_url` option for the web module (See [rewrite_url](#rewrite_url))
|
||||||
|
* `--name` Set the `name` option for the web module
|
||||||
|
* `--depth` Maximum scan dept. Set to 0 only scan files directly in the root directory, set to -1 for infinite depth
|
||||||
|
* `--archive` Archive file mode.
|
||||||
|
* skip: Don't parse
|
||||||
|
* list: Only get file names as text
|
||||||
|
* shallow: Don't parse archives inside archives.
|
||||||
|
* recurse: Scan archives recursively (default)
|
||||||
|
* `--ocr` See [OCR](../README.md#OCR)
|
||||||
|
* `-e, --exclude` Regex pattern to exclude files. A file is excluded if the pattern matches any
|
||||||
|
part of the full absolute path.
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
* `-e ".*\.ttf"`: Ignore ttf files
|
||||||
|
* `-e ".*\.(ttf|rar)"`: Ignore ttf and rar files
|
||||||
|
* `-e "^/mnt/backups/"`: Ignore all files in the `/mnt/backups/` directory
|
||||||
|
* `-e "^/mnt/Data[12]/"`: Ignore all files in the `/mnt/Data1/` and `/mnt/Data2/` directory
|
||||||
|
* `-e "(^/usr/)|(^/var/)|(^/media/DRIVE-A/tmp/)|(^/media/DRIVE-B/Trash/)"` Exclude the
|
||||||
|
`/usr`, `/var`, `/media/DRIVE-A/tmp`, `/media/DRIVE-B/Trash` directories
|
||||||
|
* `--fast` Only index file names and mime type
|
||||||
|
* `--treemap-threshold` Directories smaller than (`treemap-threshold` * `<total size of the index>`)
|
||||||
|
will not be considered for the disk utilisation visualization; their size will be added to
|
||||||
|
the parent directory. If the parent directory is still smaller than the threshold, it will also be "merged upwards"
|
||||||
|
and so on.
|
||||||
|
|
||||||
|
In effect, smaller `treemap-threshold` values will yield a more detailed
|
||||||
|
(but also a more cluttered and harder to read) visualization.
|
||||||
|
|
||||||
|
* `--mem-buffer` Maximum memory buffer size in MB (per thread) for files inside archives. Media files
|
||||||
|
larger than this number will be read sequentially and no *seek* operations will be supported.
|
||||||
|
|
||||||
|
To check if a media file can be parsed without *seek*, execute `cat file.mp4 | ffprobe -`
|
||||||
|
|
||||||
|
### Scan examples
|
||||||
|
|
||||||
|
Simple scan
|
||||||
|
```bash
|
||||||
|
sist2 scan ~/Documents
|
||||||
|
|
||||||
|
sist2 scan \
|
||||||
|
--threads 4 --content-size 16000000 --quality 1.0 --archive shallow \
|
||||||
|
--name "My Documents" --rewrite-url "http://nas.domain.local/My Documents/" \
|
||||||
|
~/Documents -o ./documents.idx/
|
||||||
|
```
|
||||||
|
|
||||||
|
Incremental scan
|
||||||
|
```
|
||||||
|
sist2 scan --incremental ./orig_idx/ -o ./updated_idx/ ~/Documents
|
||||||
|
```
|
||||||
|
|
||||||
|
### Index format
|
||||||
|
|
||||||
|
A typical `binary` type index structure looks like this:
|
||||||
|
```
|
||||||
|
documents.idx/
|
||||||
|
├── descriptor.json
|
||||||
|
├── _index_139965416830720
|
||||||
|
├── _index_139965425223424
|
||||||
|
├── _index_139965433616128
|
||||||
|
├── _index_139965442008832
|
||||||
|
├── _index_139965442008832
|
||||||
|
├── treemap.csv
|
||||||
|
├── agg_mime.csv
|
||||||
|
├── agg_date.csv
|
||||||
|
├── add_size.csv
|
||||||
|
├── thumbs
|
||||||
|
| ├── data.mdb
|
||||||
|
| └── lock.mdb
|
||||||
|
└── tags
|
||||||
|
├── data.mdb
|
||||||
|
└── lock.mdb
|
||||||
|
```
|
||||||
|
|
||||||
|
The `_index_*` files contain the raw binary index data and are not meant to be
|
||||||
|
read by other applications. The format is generally compatible across different
|
||||||
|
sist2 versions.
|
||||||
|
|
||||||
|
The `thumbs/` folder is a [LMDB](https://en.wikipedia.org/wiki/Lightning_Memory-Mapped_Database)
|
||||||
|
database containing the thumbnails.
|
||||||
|
|
||||||
|
The `descriptor.json` file contains general information about the index. The
|
||||||
|
following fields are safe to modify manually: `root`, `name`, [rewrite_url](#rewrite_url) and `timestamp`.
|
||||||
|
|
||||||
|
The `.csv` are pre-computed aggregations necessary for the stats page.
|
||||||
|
|
||||||
|
|
||||||
|
*Advanced usage*
|
||||||
|
|
||||||
|
Instead of using the `scan` module, you can also import an index generated
|
||||||
|
by a third party application. The 'external' index must have the following format:
|
||||||
|
|
||||||
|
```
|
||||||
|
my_index/
|
||||||
|
├── descriptor.json
|
||||||
|
├── _index_0
|
||||||
|
└── thumbs
|
||||||
|
├── data.mdb
|
||||||
|
└── lock.mdb
|
||||||
|
```
|
||||||
|
|
||||||
|
*descriptor.json*:
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"uuid": "<valid UUID4>",
|
||||||
|
"version": "_external_v1",
|
||||||
|
"root": "(optional)",
|
||||||
|
"name": "<name>",
|
||||||
|
"rewrite_url": "(optional)",
|
||||||
|
"type": "json",
|
||||||
|
"timestamp": 1578971024
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
*_index_0*: NDJSON format (One json object per line)
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"_id": "unique uuid for the file",
|
||||||
|
"index": "index uuid4 (same one as descriptor.json!)",
|
||||||
|
"mime": "application/x-cbz",
|
||||||
|
"size": 14341204,
|
||||||
|
"mtime": 1578882996,
|
||||||
|
"extension": "cbz",
|
||||||
|
"name": "my_book",
|
||||||
|
"path": "path/to/books",
|
||||||
|
"content": "text contents of the book",
|
||||||
|
"title": "Title of the book",
|
||||||
|
"tag": ["genre.fiction", "author.someguy", "etc..."],
|
||||||
|
"_keyword": [
|
||||||
|
{"k": "ISBN", "v": "ABCD34789231"}
|
||||||
|
],
|
||||||
|
"_text": [
|
||||||
|
{"k": "other", "v": "This will be indexed as text"}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
You can find the full list of supported fields [here](../src/io/serialize.c#L90)
|
||||||
|
|
||||||
|
The `_keyword.*` items will be indexed and searchable as **keyword** fields (only full matches allowed).
|
||||||
|
The `_text.*` items will be indexed and searchable as **text** fields (fuzzy searching allowed)
|
||||||
|
|
||||||
|
|
||||||
|
*thumbs/*:
|
||||||
|
|
||||||
|
LMDB key-value store. Keys are **binary** 128-bit UUID4s (`_id` field)
|
||||||
|
and values are raw image bytes.
|
||||||
|
|
||||||
|
Importing an external `binary` type index is technically possible but
|
||||||
|
it is currently unsupported and has no guaranties of back/forward compatibility.
|
||||||
|
|
||||||
|
|
||||||
|
## Index
|
||||||
|
### Index options
|
||||||
|
* `--es-url`
|
||||||
|
Elasticsearch url and port. If you are using docker, make sure that both containers are on the
|
||||||
|
same network.
|
||||||
|
* `--es-index`
|
||||||
|
Elasticsearch index name. DEFAULT=sist2
|
||||||
|
* `-p, --print`
|
||||||
|
Print index in JSON format to stdout.
|
||||||
|
* `--script-file`
|
||||||
|
Path to user script. See [Scripting](scripting.md).
|
||||||
|
* `--async-script`
|
||||||
|
Use `wait_for_completion=false` elasticsearch option while executing user script.
|
||||||
|
(See [Elasticsearch documentation](https://www.elastic.co/guide/en/elasticsearch/reference/current/tasks.html))
|
||||||
|
* `--batch-size=<int>`
|
||||||
|
Index batch size. Indexing is generally faster with larger batches, but payloads that
|
||||||
|
are too large will fail and additional overhead for retrying with smaller sizes may slow
|
||||||
|
down the process.
|
||||||
|
* `-f, --force-reset`
|
||||||
|
Reset Elasticsearch mappings and settings.
|
||||||
|
**(You must use this option the first time you use the index command)**.
|
||||||
|
|
||||||
|
### Index examples
|
||||||
|
|
||||||
|
**Push to elasticsearch**
|
||||||
|
```bash
|
||||||
|
sist2 index --force-reset --batch-size 1000 --es-url http://localhost:9200 ./my_index/
|
||||||
|
sist2 index ./my_index/
|
||||||
|
```
|
||||||
|
|
||||||
|
**Save index in JSON format**
|
||||||
|
```bash
|
||||||
|
sist2 index --print ./my_index/ > my_index.ndjson
|
||||||
|
```
|
||||||
|
|
||||||
|
**Inspect contents of an index**
|
||||||
|
```bash
|
||||||
|
sist2 index --print ./my_index/ | jq | less
|
||||||
|
```
|
||||||
|
|
||||||
|
## Web
|
||||||
|
|
||||||
|
### Web options
|
||||||
|
* `--es-url=<str>` Elasticsearch url.
|
||||||
|
* `--es-index`
|
||||||
|
Elasticsearch index name. DEFAULT=sist2
|
||||||
|
* `--bind=<str>` Listen on this address.
|
||||||
|
* `--auth=<str>` Basic auth in user:password format
|
||||||
|
* `--tag-auth=<str>` Basic auth in user:password format. Works the same way as the
|
||||||
|
`--auth` argument, but authentication is only applied the `/tag/` endpoint.
|
||||||
|
|
||||||
|
### Web examples
|
||||||
|
|
||||||
|
**Single index**
|
||||||
|
```bash
|
||||||
|
sist2 web --auth admin:hunter2 --bind 0.0.0.0:8888 my_index
|
||||||
|
```
|
||||||
|
|
||||||
|
**Multiple indices**
|
||||||
|
```bash
|
||||||
|
# Indices will be displayed in this order in the web interface
|
||||||
|
sist2 web index1 index2 index3 index4
|
||||||
|
```
|
||||||
|
|
||||||
|
### rewrite_url
|
||||||
|
|
||||||
|
When the `rewrite_url` field is not empty, the web module ignores the `root`
|
||||||
|
field and will return a HTTP redirect to `<rewrite_url><path>/<name><extension>`
|
||||||
|
instead of serving the file from disk.
|
||||||
|
Both the `root` and `rewrite_url` fields are safe to manually modify from the
|
||||||
|
`descriptor.json` file.
|
||||||
|
|
||||||
|
### Link to specific indices
|
||||||
|
|
||||||
|
To link to specific indices, you can add a list of comma-separated index name to
|
||||||
|
the URL: `?i=<name>,<name>`. By default, indices with `"(nsfw)"` in their name are
|
||||||
|
not displayed.
|
||||||
|
|
||||||
|
## exec-script
|
||||||
|
|
||||||
|
The `exec-script` command is used to execute a user script for an index that has already been imported to Elasticsearch with the `index` command. Note that the documents will not be reset to their default state before each execution as the `index` command does: if you make undesired changes to the documents by accident, you will need to run `index` again to revert to the original state.
|
||||||
|
|
||||||
|
|
||||||
|
# Tagging
|
||||||
|
|
||||||
|
### Manual tagging
|
||||||
|
|
||||||
|
You can modify tags of individual documents directly from the
|
||||||
|
`web` interface. Note that you can setup authentication for this feature
|
||||||
|
with the `--tag-auth` option (See [web options](#web-options))
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
Tags that are manually added are saved both in the
|
||||||
|
index folder (in `/tags/`) and in Elasticsearch*. When re-`index`ing,
|
||||||
|
they are read from the index and automatically applied.
|
||||||
|
|
||||||
|
You can safely copy the `/tags/` database to another index.
|
||||||
|
|
||||||
|
See [Automatic tagging](#automatic-tagging) for information about tag
|
||||||
|
hierarchies and tag colors.
|
||||||
|
|
||||||
|
\* *It can take a few seconds to take effect in new search queries, and the page needs
|
||||||
|
to be reloaded for the tag tab to update*
|
||||||
|
|
||||||
|
|
||||||
|
### Automatic tagging
|
||||||
|
|
||||||
|
See [scripting](scripting.md) documentation.
|
||||||
|
Before Width: | Height: | Size: 26 KiB After Width: | Height: | Size: 26 KiB |
BIN
docs/manual_tag.png
Normal file
BIN
docs/manual_tag.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 3.9 KiB |
@@ -39,7 +39,7 @@ it adds the `genre.<genre>` tag.
|
|||||||
ArrayList tags = ctx._source.tag = new ArrayList();
|
ArrayList tags = ctx._source.tag = new ArrayList();
|
||||||
|
|
||||||
if (ctx._source?.genre != null) {
|
if (ctx._source?.genre != null) {
|
||||||
tags.add("genre." + ctx._source.genre.toLowerCase())
|
tags.add("genre." + ctx._source.genre.toLowerCase());
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -54,6 +54,11 @@ script.painless.regex.enabled: true
|
|||||||
```
|
```
|
||||||
Or, if you're using docker add `-e "script.painless.regex.enabled=true"`
|
Or, if you're using docker add `-e "script.painless.regex.enabled=true"`
|
||||||
|
|
||||||
|
**Tag color**
|
||||||
|
|
||||||
|
You can specify the color for an individual tag by appending an
|
||||||
|
hexadecimal color code (`#RRGGBBAA`) to the tag name.
|
||||||
|
|
||||||
### Examples
|
### Examples
|
||||||
|
|
||||||
If `(20XX)` is in the file name, add the `year.<year>` tag:
|
If `(20XX)` is in the file name, add the `year.<year>` tag:
|
||||||
@@ -62,7 +67,7 @@ ArrayList tags = ctx._source.tag = new ArrayList();
|
|||||||
|
|
||||||
Matcher m = /[\(\.+](20[0-9]{2})[\)\.+]/.matcher(ctx._source.name);
|
Matcher m = /[\(\.+](20[0-9]{2})[\)\.+]/.matcher(ctx._source.name);
|
||||||
if (m.find()) {
|
if (m.find()) {
|
||||||
tags.add("year." + m.group(1))
|
tags.add("year." + m.group(1));
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -106,12 +111,32 @@ if (ctx._source.path != "") {
|
|||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
Set the name of the last folder (`/path/to/<studio>/file.mp4`) to `studio.<studio>` tag
|
Parse `EXIF:F Number` tag
|
||||||
```Java
|
```Java
|
||||||
ArrayList tags = ctx._source.tag = new ArrayList();
|
if (ctx._source?.exif_fnumber != null) {
|
||||||
|
String[] values = ctx._source.exif_fnumber.splitOnToken(' ');
|
||||||
if (ctx._source.path != "") {
|
String aperture = String.valueOf(Float.parseFloat(values[0]) / Float.parseFloat(values[1]));
|
||||||
String[] names = ctx._source.path.splitOnToken('/');
|
if (aperture == "NaN") {
|
||||||
tags.add("studio." + names[names.length-1]);
|
aperture = "0,0";
|
||||||
|
}
|
||||||
|
tags.add("Aperture.f/" + aperture.replace(".", ","));
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
|
Display year and months from `EXIF:DateTime` tag
|
||||||
|
```Java
|
||||||
|
if (ctx._source?.exif_datetime != null) {
|
||||||
|
SimpleDateFormat parser = new SimpleDateFormat("yyyy:MM:dd HH:mm:ss");
|
||||||
|
Date date = parser.parse(ctx._source.exif_datetime);
|
||||||
|
|
||||||
|
SimpleDateFormat yp = new SimpleDateFormat("yyyy");
|
||||||
|
SimpleDateFormat mp = new SimpleDateFormat("MMMMMMMMM");
|
||||||
|
|
||||||
|
String year = yp.format(date);
|
||||||
|
String month = mp.format(date);
|
||||||
|
|
||||||
|
tags.add("Month." + month);
|
||||||
|
tags.add("Year." + year);
|
||||||
|
}
|
||||||
|
|
||||||
|
```
|
||||||
BIN
docs/sist2.png
Normal file
BIN
docs/sist2.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 889 KiB |
BIN
docs/stats.png
Normal file
BIN
docs/stats.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 167 KiB |
@@ -1,53 +0,0 @@
|
|||||||
/*
|
|
||||||
Copyright (c) 2010, Florian Reuter
|
|
||||||
All rights reserved.
|
|
||||||
|
|
||||||
Redistribution and use in source and binary forms, with or without
|
|
||||||
modification, are permitted provided that the following conditions
|
|
||||||
are met:
|
|
||||||
|
|
||||||
* Redistributions of source code must retain the above copyright
|
|
||||||
notice, this list of conditions and the following disclaimer.
|
|
||||||
* Redistributions in binary form must reproduce the above copyright
|
|
||||||
notice, this list of conditions and the following disclaimer in
|
|
||||||
the documentation and/or other materials provided with the
|
|
||||||
distribution.
|
|
||||||
* Neither the name of Florian Reuter nor the names of its contributors
|
|
||||||
may be used to endorse or promote products derived from this
|
|
||||||
software without specific prior written permission.
|
|
||||||
|
|
||||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
||||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
|
||||||
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
||||||
COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
||||||
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
|
||||||
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
||||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
||||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
|
||||||
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
||||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
|
||||||
OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
*/
|
|
||||||
/**@file config/mce/config.h
|
|
||||||
*/
|
|
||||||
#ifndef MCE_CONFIG_H
|
|
||||||
#define MCE_CONFIG_H
|
|
||||||
|
|
||||||
#include <libxml/xmlstring.h>
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <plib/plib.h>
|
|
||||||
#include <assert.h>
|
|
||||||
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
extern "C" {
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#define MCE_NAMESPACE_SUBSUMPTION_ENABLED 0
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
} /* extern "C" */
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif /* MCE_CONFIG_H */
|
|
||||||
@@ -1,189 +0,0 @@
|
|||||||
/*
|
|
||||||
Copyright (c) 2010, Florian Reuter
|
|
||||||
All rights reserved.
|
|
||||||
|
|
||||||
Redistribution and use in source and binary forms, with or without
|
|
||||||
modification, are permitted provided that the following conditions
|
|
||||||
are met:
|
|
||||||
|
|
||||||
* Redistributions of source code must retain the above copyright
|
|
||||||
notice, this list of conditions and the following disclaimer.
|
|
||||||
* Redistributions in binary form must reproduce the above copyright
|
|
||||||
notice, this list of conditions and the following disclaimer in
|
|
||||||
the documentation and/or other materials provided with the
|
|
||||||
distribution.
|
|
||||||
* Neither the name of Florian Reuter nor the names of its contributors
|
|
||||||
may be used to endorse or promote products derived from this
|
|
||||||
software without specific prior written permission.
|
|
||||||
|
|
||||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
||||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
|
||||||
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
||||||
COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
||||||
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
|
||||||
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
||||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
||||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
|
||||||
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
||||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
|
||||||
OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
|
|
||||||
*/
|
|
||||||
/** @file mce/helper.h
|
|
||||||
Helper functions needed by mce/textreader.h and mce/textwriter.h to implement MCE:
|
|
||||||
- mceQNameLevelAdd(), mceQNameLevelLookup() and mceQNameLevelCleanup() maintain a set of mceQNameLevel_t tuples.
|
|
||||||
- mceQNameLevelPush() and mceQNameLevelPopIfMatch() maintain a stack of mceQNameLevel_t tuples.
|
|
||||||
- mceCtxInit(), mceCtxCleanup() and mceCtxUnderstandsNamespace() manage a context which holds all information needed to do MCE proprocessing.
|
|
||||||
*/
|
|
||||||
#include <mce/config.h>
|
|
||||||
|
|
||||||
#ifndef MCE_HELPER_H
|
|
||||||
#define MCE_HELPER_H
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
extern "C" {
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/**
|
|
||||||
Tiple (ns, ln, level).
|
|
||||||
*/
|
|
||||||
typedef struct MCE_QNAME_LEVEL {
|
|
||||||
xmlChar *ns;
|
|
||||||
xmlChar *ln;
|
|
||||||
puint32_t level;
|
|
||||||
puint32_t flag; // used by mceTextWriter
|
|
||||||
} mceQNameLevel_t;
|
|
||||||
|
|
||||||
/**
|
|
||||||
*/
|
|
||||||
typedef enum MCE_SKIP_STATE_ENUM {
|
|
||||||
MCE_SKIP_STATE_IGNORE,
|
|
||||||
MCE_SKIP_STATE_ALTERNATE_CONTENT,
|
|
||||||
MCE_SKIP_STATE_CHOICE_MATCHED
|
|
||||||
} mceSkipState_t;
|
|
||||||
|
|
||||||
/**
|
|
||||||
Represents an intervall of levels which are "skipped" i.e. ignored.
|
|
||||||
*/
|
|
||||||
typedef struct MCE_SKIP_ITEM {
|
|
||||||
puint32_t level_start;
|
|
||||||
puint32_t level_end;
|
|
||||||
mceSkipState_t state;
|
|
||||||
} mceSkipItem_t;
|
|
||||||
|
|
||||||
/**
|
|
||||||
Either represents a set of (ns, ln, level) triples.
|
|
||||||
*/
|
|
||||||
typedef struct MCE_QNAME_LEVEL_SET {
|
|
||||||
mceQNameLevel_t *list_array;
|
|
||||||
puint32_t list_items;
|
|
||||||
puint32_t max_level;
|
|
||||||
} mceQNameLevelSet_t;
|
|
||||||
|
|
||||||
/**
|
|
||||||
The skip stack.
|
|
||||||
*/
|
|
||||||
typedef struct MCE_SKIP_STACK {
|
|
||||||
mceSkipItem_t *stack_array;
|
|
||||||
puint32_t stack_items;
|
|
||||||
} mceSkipStack_t;
|
|
||||||
|
|
||||||
|
|
||||||
typedef enum MCE_ERROR_ENUM {
|
|
||||||
MCE_ERROR_NONE,
|
|
||||||
MCE_ERROR_XML,
|
|
||||||
MCE_ERROR_MUST_UNDERSTAND,
|
|
||||||
MCE_ERROR_VALIDATION,
|
|
||||||
MCE_ERROR_MEMORY
|
|
||||||
} mceError_t;
|
|
||||||
|
|
||||||
/**
|
|
||||||
Holds all information to do MCE preprocessing.
|
|
||||||
*/
|
|
||||||
typedef struct MCE_CONTEXT {
|
|
||||||
mceQNameLevelSet_t ignorable_set;
|
|
||||||
mceQNameLevelSet_t understands_set;
|
|
||||||
mceQNameLevelSet_t processcontent_set;
|
|
||||||
mceQNameLevelSet_t suspended_set;
|
|
||||||
#if (MCE_NAMESPACE_SUBSUMPTION_ENABLED)
|
|
||||||
mceQNameLevelSet_t subsume_namespace_set;
|
|
||||||
mceQNameLevelSet_t subsume_exclude_set;
|
|
||||||
mceQNameLevelSet_t subsume_prefix_set;
|
|
||||||
#endif
|
|
||||||
mceSkipStack_t skip_stack;
|
|
||||||
mceError_t error;
|
|
||||||
pbool_t mce_disabled;
|
|
||||||
puint32_t suspended_level;
|
|
||||||
} mceCtx_t;
|
|
||||||
|
|
||||||
/**
|
|
||||||
Add a new tiple (ns, ln, level) to the triple set \c qname_level_set.
|
|
||||||
The \c ns_sub string is optional and will not be touched.
|
|
||||||
*/
|
|
||||||
pbool_t mceQNameLevelAdd(mceQNameLevelSet_t *qname_level_set, const xmlChar *ns, const xmlChar *ln, puint32_t level);
|
|
||||||
|
|
||||||
/**
|
|
||||||
Lookup a tiple (ns, ln, level) via \c ns and \c ln. If \c ignore_ln is PTRUE then the first tiple matching \c ns will be returned.
|
|
||||||
*/
|
|
||||||
mceQNameLevel_t* mceQNameLevelLookup(mceQNameLevelSet_t *qname_level_set, const xmlChar *ns, const xmlChar *ln, pbool_t ignore_ln);
|
|
||||||
|
|
||||||
/**
|
|
||||||
Remove all triples (ns, ln, level) where the level greater or equal to \c level.
|
|
||||||
*/
|
|
||||||
pbool_t mceQNameLevelCleanup(mceQNameLevelSet_t *qname_level_set, puint32_t level);
|
|
||||||
|
|
||||||
/**
|
|
||||||
Push a new skip intervall (level_start, level_end, state) on the stack \c skip_stack.
|
|
||||||
*/
|
|
||||||
pbool_t mceSkipStackPush(mceSkipStack_t *skip_stack, puint32_t level_start, puint32_t level_end, mceSkipState_t state);
|
|
||||||
|
|
||||||
/**
|
|
||||||
Pop the intervall (ns, ln, level) from the stack \c qname_level_array.
|
|
||||||
*/
|
|
||||||
void mceSkipStackPop(mceSkipStack_t *skip_stack);
|
|
||||||
|
|
||||||
/**
|
|
||||||
Returns top item or NULL.
|
|
||||||
*/
|
|
||||||
mceSkipItem_t *mceSkipStackTop(mceSkipStack_t *skip_stack);
|
|
||||||
|
|
||||||
/**
|
|
||||||
Returns TRUE, if the \c level is in the top skip intervall.
|
|
||||||
*/
|
|
||||||
pbool_t mceSkipStackSkip(mceSkipStack_t *skip_stack, puint32_t level);
|
|
||||||
|
|
||||||
/**
|
|
||||||
Initialize the mceCtx_t \c ctx.
|
|
||||||
*/
|
|
||||||
pbool_t mceCtxInit(mceCtx_t *ctx);
|
|
||||||
|
|
||||||
/**
|
|
||||||
Cleanup, i.e. release all resourced from the mceCtx_t \c ctx.
|
|
||||||
*/
|
|
||||||
pbool_t mceCtxCleanup(mceCtx_t *ctx);
|
|
||||||
|
|
||||||
/**
|
|
||||||
Register the namespace \ns in \c ctx.
|
|
||||||
*/
|
|
||||||
pbool_t mceCtxUnderstandsNamespace(mceCtx_t *ctx, const xmlChar *ns);
|
|
||||||
|
|
||||||
/**
|
|
||||||
Register the namespace \ns in \c ctx.
|
|
||||||
*/
|
|
||||||
pbool_t mceCtxSuspendProcessing(mceCtx_t *ctx, const xmlChar *ns, const xmlChar *ln);
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#if (MCE_NAMESPACE_SUBSUMPTION_ENABLED)
|
|
||||||
/**
|
|
||||||
Subsume namespace \c ns_new with \c ns_old.
|
|
||||||
*/
|
|
||||||
pbool_t mceCtxSubsumeNamespace(mceCtx_t *ctx, const xmlChar *prefix_new, const xmlChar *ns_new, const xmlChar *ns_old);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
} /* extern "C" */
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif /* MCE_HELPER_H */
|
|
||||||
@@ -1,464 +0,0 @@
|
|||||||
/*
|
|
||||||
Copyright (c) 2010, Florian Reuter
|
|
||||||
All rights reserved.
|
|
||||||
|
|
||||||
Redistribution and use in source and binary forms, with or without
|
|
||||||
modification, are permitted provided that the following conditions
|
|
||||||
are met:
|
|
||||||
|
|
||||||
* Redistributions of source code must retain the above copyright
|
|
||||||
notice, this list of conditions and the following disclaimer.
|
|
||||||
* Redistributions in binary form must reproduce the above copyright
|
|
||||||
notice, this list of conditions and the following disclaimer in
|
|
||||||
the documentation and/or other materials provided with the
|
|
||||||
distribution.
|
|
||||||
* Neither the name of Florian Reuter nor the names of its contributors
|
|
||||||
may be used to endorse or promote products derived from this
|
|
||||||
software without specific prior written permission.
|
|
||||||
|
|
||||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
||||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
|
||||||
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
||||||
COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
||||||
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
|
||||||
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
||||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
||||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
|
||||||
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
||||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
|
||||||
OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
|
|
||||||
*/
|
|
||||||
/** @file mce/textreader.h
|
|
||||||
|
|
||||||
*/
|
|
||||||
#ifndef MCE_TEXTREADER_H
|
|
||||||
#define MCE_TEXTREADER_H
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
extern "C" {
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/**
|
|
||||||
A handle to an MCE-aware libxml2 xmlTextReader.
|
|
||||||
*/
|
|
||||||
typedef struct MCE_TEXTREADER mceTextReader_t;
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
} /* extern "C" */
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
#include <mce/config.h>
|
|
||||||
#include <opc/opc.h>
|
|
||||||
#include <mce/helper.h>
|
|
||||||
#include <libxml/xmlwriter.h>
|
|
||||||
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
extern "C" {
|
|
||||||
#endif
|
|
||||||
|
|
||||||
struct MCE_TEXTREADER {
|
|
||||||
xmlTextReaderPtr reader;
|
|
||||||
mceCtx_t mceCtx;
|
|
||||||
};
|
|
||||||
|
|
||||||
/**
|
|
||||||
Wrapper around an libxml2 xmlTextReaderRead function.
|
|
||||||
\see http://xmlsoft.org/html/libxml-xmlreader.html#xmlTextReaderRead
|
|
||||||
*/
|
|
||||||
int mceTextReaderRead(mceTextReader_t *mceTextReader);
|
|
||||||
|
|
||||||
/**
|
|
||||||
Wrapper around a libxml2 xmlTextReaderNext function.
|
|
||||||
\see http://xmlsoft.org/html/libxml-xmlreader.html#xmlTextReaderNext
|
|
||||||
*/
|
|
||||||
int mceTextReaderNext(mceTextReader_t *mceTextReader);
|
|
||||||
|
|
||||||
/**
|
|
||||||
Creates an mceTextReader from an XmlTextReader.
|
|
||||||
\code
|
|
||||||
mceTextReader reader;
|
|
||||||
mceTextReaderInit(&reader, xmlNewTextReaderFilename("sample.xml"));
|
|
||||||
// reader is ready to use.
|
|
||||||
mceTextReaderCleanup(&reader);
|
|
||||||
\endcode
|
|
||||||
\see http://xmlsoft.org/html/libxml-xmlreader.html#xmlNewTextReaderFilename
|
|
||||||
*/
|
|
||||||
int mceTextReaderInit(mceTextReader_t *mceTextReader, xmlTextReaderPtr reader);
|
|
||||||
|
|
||||||
/**
|
|
||||||
Cleanup MCE reader, i.e. free all resources. Also calls xmlTextReaderClose and xmlFreeTextReader.
|
|
||||||
\see http://xmlsoft.org/html/libxml-xmlreader.html#xmlTextReaderClose
|
|
||||||
\see http://xmlsoft.org/html/libxml-xmlreader.html#xmlFreeTextReader
|
|
||||||
*/
|
|
||||||
int mceTextReaderCleanup(mceTextReader_t *mceTextReader);
|
|
||||||
|
|
||||||
/**
|
|
||||||
Reads all events \c mceTextReader and pipes them to \writer.
|
|
||||||
\code
|
|
||||||
mceTextReader reader;
|
|
||||||
mceTextReaderInit(&reader, xmlNewTextReaderFilename("sample.xml"));
|
|
||||||
mceTextReaderUnderstandsNamespace(&reader, _X("http://myextension"));
|
|
||||||
xmlTextWriterPtr writer=xmlNewTextWriterFilename("out.xml", 0);
|
|
||||||
mceTextReaderDump(&reader, writer, P_FALSE);
|
|
||||||
xmlFreeTextWriter(writer);
|
|
||||||
mceTextReaderCleanup(&reader);
|
|
||||||
\endcode
|
|
||||||
*/
|
|
||||||
int mceTextReaderDump(mceTextReader_t *mceTextReader, xmlTextWriter *writer, pbool_t fragment);
|
|
||||||
|
|
||||||
/**
|
|
||||||
Registers an MCE namespace.
|
|
||||||
\see mceTextReaderDump()
|
|
||||||
*/
|
|
||||||
int mceTextReaderUnderstandsNamespace(mceTextReader_t *mceTextReader, const xmlChar *ns);
|
|
||||||
|
|
||||||
/**
|
|
||||||
Disable MCE processing.
|
|
||||||
\return Returns old value.
|
|
||||||
*/
|
|
||||||
pbool_t mceTextReaderDisableMCE(mceTextReader_t *mceTextReader, pbool_t flag);
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
Signal an error to the MCE processor.
|
|
||||||
*/
|
|
||||||
void mceRaiseError(xmlTextReader *reader, mceCtx_t *ctx, mceError_t error, const xmlChar *str, ...);
|
|
||||||
|
|
||||||
/**
|
|
||||||
Internal function which does the MCE postprocessing. E.g. mceTextReaderRead() is implemented as
|
|
||||||
\code
|
|
||||||
mceTextReaderPostprocess(mceTextReader->reader, &mceTextReader->mceCtx, xmlTextReaderRead(mceTextReader->reader))
|
|
||||||
\endcode
|
|
||||||
This function is exposed to make existing libxm2 xmlTextReader MCE aware.
|
|
||||||
*/
|
|
||||||
int mceTextReaderPostprocess(xmlTextReader *reader, mceCtx_t *ctx, int ret);
|
|
||||||
|
|
||||||
/**
|
|
||||||
Get the error code.
|
|
||||||
*/
|
|
||||||
mceError_t mceTextReaderGetError(mceTextReader_t *mceTextReader);
|
|
||||||
|
|
||||||
/**
|
|
||||||
Helper macro to declare a start/end document block in a declarative way:
|
|
||||||
\code
|
|
||||||
mce_start_document(reader) {
|
|
||||||
} mce_end_document(reader);
|
|
||||||
\endcode
|
|
||||||
\hideinitializer
|
|
||||||
*/
|
|
||||||
#define mce_start_document(_reader_) \
|
|
||||||
if (NULL!=(_reader_)) { \
|
|
||||||
mceTextReaderRead(_reader_); \
|
|
||||||
if (0)
|
|
||||||
|
|
||||||
/**
|
|
||||||
\see mce_start_document.
|
|
||||||
\hideinitializer
|
|
||||||
*/
|
|
||||||
#define mce_end_document(_reader_) \
|
|
||||||
} /* if (NULL!=reader) */ \
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
Container for mce_start_element and mce_start_attribute declarations.
|
|
||||||
\see mce_match_element
|
|
||||||
\see mce_match_attribute
|
|
||||||
\hideinitializer
|
|
||||||
*/
|
|
||||||
#define mce_start_choice(_reader_) \
|
|
||||||
if (0)
|
|
||||||
|
|
||||||
/**
|
|
||||||
\see mce_start_choice
|
|
||||||
\hideinitializer
|
|
||||||
*/
|
|
||||||
#define mce_end_choice(_reader_)
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
Skips the attributes.
|
|
||||||
\see mce_match_element.
|
|
||||||
\hideinitializer
|
|
||||||
*/
|
|
||||||
#define mce_skip_attributes(_reader_) \
|
|
||||||
mce_start_attributes(_reader_) { \
|
|
||||||
} mce_end_attributes(_reader_);
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
Skips the attributes.
|
|
||||||
\see mce_match_attribute.
|
|
||||||
\hideinitializer
|
|
||||||
*/
|
|
||||||
#define mce_skip_children(_reader_) \
|
|
||||||
mce_start_children(_reader_) { \
|
|
||||||
} mce_end_children(_reader_);
|
|
||||||
|
|
||||||
/**
|
|
||||||
\see mce_start_element.
|
|
||||||
\hideinitializer
|
|
||||||
*/
|
|
||||||
#define mce_start_children(_reader_) \
|
|
||||||
if (!xmlTextReaderIsEmptyElement((_reader_)->reader)) { \
|
|
||||||
mceTextReaderRead(_reader_); do { \
|
|
||||||
if (0)
|
|
||||||
|
|
||||||
/**
|
|
||||||
\see mce_start_element.
|
|
||||||
\hideinitializer
|
|
||||||
*/
|
|
||||||
#define mce_end_children(_reader_) \
|
|
||||||
else { \
|
|
||||||
if (XML_READER_TYPE_END_ELEMENT!=xmlTextReaderNodeType((_reader_)->reader)) { \
|
|
||||||
mceTextReaderNext(_reader_); /*skip unhandled element */ \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
} while(XML_READER_TYPE_END_ELEMENT!=xmlTextReaderNodeType((_reader_)->reader) && \
|
|
||||||
XML_READER_TYPE_NONE!=xmlTextReaderNodeType((_reader_)->reader)); \
|
|
||||||
} /* if (!xmlTextReaderIsEmptyElement(reader->reader)) */
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
Helper macro to match an element. Usefull for calling code in a seperate function:
|
|
||||||
|
|
||||||
\code
|
|
||||||
void handleElement(reader) {
|
|
||||||
mce_start_choice(reader) {
|
|
||||||
mce_start_element(reader, _X("ns"), _X("element")) {
|
|
||||||
|
|
||||||
} mce_end_element(reader)
|
|
||||||
} mce_end_choice(reader);
|
|
||||||
}
|
|
||||||
|
|
||||||
void parse(reader) {
|
|
||||||
mce_start_document(reader) {
|
|
||||||
mce_start_element(reader, _X("ns"), _X("ln")) {
|
|
||||||
mce_skip_attributes(reader);
|
|
||||||
mce_start_children(reader) {
|
|
||||||
mce_match_element(reader, _X("ns"), _X("element")) {
|
|
||||||
handleElement(reader);
|
|
||||||
}
|
|
||||||
} mce_end_children(reader);
|
|
||||||
} mce_end_element();
|
|
||||||
} mce_end_document(reader);
|
|
||||||
}
|
|
||||||
\endcode
|
|
||||||
\hideinitializer
|
|
||||||
*/
|
|
||||||
#define mce_match_element(_reader_, ns, ln) \
|
|
||||||
} else if (XML_READER_TYPE_ELEMENT==xmlTextReaderNodeType((_reader_)->reader) \
|
|
||||||
&& (NULL==ns || 0==xmlStrcmp(ns, xmlTextReaderConstNamespaceUri((_reader_)->reader))) \
|
|
||||||
&& (NULL==ln || 0==xmlStrcmp(ln, xmlTextReaderConstLocalName((_reader_)->reader)))) {
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
Helper macro to declare a element block in a declarative way:
|
|
||||||
\code
|
|
||||||
mce_start_element(reader) {
|
|
||||||
mce_start_attributes(reader) {
|
|
||||||
mce_start_attribute(reader, _X("ns"), _X("lnA")) {
|
|
||||||
// code for handling lnA.
|
|
||||||
} mce_end_attribute(reader);
|
|
||||||
mce_start_attribute(reader, _X("ns"), _X("lnB")) {
|
|
||||||
// code for handling lnB.
|
|
||||||
} mce_end_attribute(reader);
|
|
||||||
} mce_end_attributes(reader);
|
|
||||||
mce_start_children(reader) {
|
|
||||||
mce_start_element(reader, _X("ns"), _X("lnA")) {
|
|
||||||
// code for handling lnA.
|
|
||||||
} mce_end_element(reader);
|
|
||||||
mce_start_element(reader, _X("ns"), _X("lnB")) {
|
|
||||||
// code for handling lnB.
|
|
||||||
} mce_end_element(reader);
|
|
||||||
mce_start_text(reader) {
|
|
||||||
// code for handling text.
|
|
||||||
} mce_end_text(reader);
|
|
||||||
} mce_end_children(reader);
|
|
||||||
} mce_end_element(reader);
|
|
||||||
\endcode
|
|
||||||
\hideinitializer
|
|
||||||
*/
|
|
||||||
#define mce_start_element(_reader_, ns, ln) \
|
|
||||||
mce_match_element(_reader_, ns, ln)
|
|
||||||
|
|
||||||
/**
|
|
||||||
\see mce_start_element.
|
|
||||||
\hideinitializer
|
|
||||||
*/
|
|
||||||
#define mce_end_element(_reader_) \
|
|
||||||
mceTextReaderNext(_reader_)
|
|
||||||
|
|
||||||
/**
|
|
||||||
Matches #TEXT without consuming it.
|
|
||||||
\hideinitializer
|
|
||||||
*/
|
|
||||||
#define mce_match_text(_reader_) \
|
|
||||||
} else if (XML_READER_TYPE_TEXT==xmlTextReaderNodeType((_reader_)->reader) \
|
|
||||||
|| XML_READER_TYPE_SIGNIFICANT_WHITESPACE==xmlTextReaderNodeType((_reader_)->reader)) {
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
\see mce_start_element.
|
|
||||||
\hideinitializer
|
|
||||||
*/
|
|
||||||
#define mce_start_text(_reader_) \
|
|
||||||
mce_match_text(_reader_)
|
|
||||||
|
|
||||||
/**
|
|
||||||
\see mce_start_element.
|
|
||||||
\hideinitializer
|
|
||||||
*/
|
|
||||||
#define mce_end_text(_reader_) \
|
|
||||||
mceTextReaderNext(_reader_)
|
|
||||||
|
|
||||||
/**
|
|
||||||
\see mce_start_element.
|
|
||||||
\hideinitializer
|
|
||||||
*/
|
|
||||||
#define mce_start_attributes(_reader_) \
|
|
||||||
if (1==xmlTextReaderMoveToFirstAttribute((_reader_)->reader)) { \
|
|
||||||
do { \
|
|
||||||
if (0)
|
|
||||||
|
|
||||||
/**
|
|
||||||
\see mce_start_element.
|
|
||||||
\hideinitializer
|
|
||||||
*/
|
|
||||||
#define mce_end_attributes(_reader_) \
|
|
||||||
else { /* skipped attribute */ } \
|
|
||||||
} while(1==xmlTextReaderMoveToNextAttribute((_reader_)->reader)); \
|
|
||||||
xmlTextReaderMoveToElement((_reader_)->reader); }
|
|
||||||
|
|
||||||
/**
|
|
||||||
Helper macro to match an attribute. Usefull for calling code in a seperate function:
|
|
||||||
|
|
||||||
\code
|
|
||||||
void handleA(reader) {
|
|
||||||
mce_start_choice(reader) {
|
|
||||||
mce_start_attribute(reader, _X("ns"), _X("attr")) {
|
|
||||||
|
|
||||||
} mce_end_attribute(reader);
|
|
||||||
} mce_end_choice(reader);
|
|
||||||
}
|
|
||||||
|
|
||||||
void parse(reader) {
|
|
||||||
mce_start_document(reader) {
|
|
||||||
mce_start_element(reader, _X("ns"), _X("ln")) {
|
|
||||||
mce_start_attributes(reader) {
|
|
||||||
mce_match_attribute(reader, _X("ns"), _X("attr")) {
|
|
||||||
handleA(reader);
|
|
||||||
}
|
|
||||||
} mce_end_attributes(reader);
|
|
||||||
mce_skip_children(reader);
|
|
||||||
} mce_end_element();
|
|
||||||
} mce_end_document(reader);
|
|
||||||
}
|
|
||||||
\endcode
|
|
||||||
\hideinitializer
|
|
||||||
*/
|
|
||||||
#define mce_match_attribute(_reader_, ns, ln) \
|
|
||||||
} else if ((NULL==ns || 0==xmlStrcmp(ns, xmlTextReaderConstNamespaceUri((_reader_)->reader))) \
|
|
||||||
&& (NULL==ln || 0==xmlStrcmp(ln, xmlTextReaderConstLocalName((_reader_)->reader)))) {
|
|
||||||
|
|
||||||
/**
|
|
||||||
\see mce_start_element.
|
|
||||||
\hideinitializer
|
|
||||||
*/
|
|
||||||
#define mce_start_attribute(_reader_, ns, ln) \
|
|
||||||
mce_match_attribute(_reader_, ns, ln)
|
|
||||||
|
|
||||||
/**
|
|
||||||
\see mce_start_element.
|
|
||||||
\hideinitializer
|
|
||||||
*/
|
|
||||||
#define mce_end_attribute(_reader_)
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
Error handling for MCE parsers.
|
|
||||||
\code
|
|
||||||
mce_start_element(&reader, NULL, _X("Default")) {
|
|
||||||
const xmlChar *ext=NULL;
|
|
||||||
const xmlChar *type=NULL;
|
|
||||||
mce_start_attributes(&reader) {
|
|
||||||
mce_start_attribute(&reader, NULL, _X("Extension")) {
|
|
||||||
ext=xmlTextReaderConstValue(reader.reader);
|
|
||||||
} mce_end_attribute(&reader);
|
|
||||||
mce_start_attribute(&reader, NULL, _X("ContentType")) {
|
|
||||||
type=xmlTextReaderConstValue(reader.reader);
|
|
||||||
} mce_end_attribute(&reader);
|
|
||||||
} mce_end_attributes(&reader);
|
|
||||||
mce_error_guard_start(&reader) {
|
|
||||||
mce_error(&reader, NULL==ext || ext[0]==0, MCE_ERROR_VALIDATION, "Missing @Extension attribute!");
|
|
||||||
mce_error(&reader, NULL==type || type[0]==0, MCE_ERROR_VALIDATION, "Missing @ContentType attribute!");
|
|
||||||
opcContainerType *ct=insertType(c, type, OPC_TRUE);
|
|
||||||
mce_error(&reader, NULL==ct, MCE_ERROR_MEMORY, NULL);
|
|
||||||
opcContainerExtension *ce=opcContainerInsertExtension(c, ext, OPC_TRUE);
|
|
||||||
mce_error(&reader, NULL==ce, MCE_ERROR_MEMORY, NULL);
|
|
||||||
mce_errorf(&reader, NULL!=ce->type && 0!=xmlStrcmp(ce->type, type), MCE_ERROR_VALIDATION, "Extension \"%s\" is mapped to type \"%s\" as well as \"%s\"", ext, type, ce->type);
|
|
||||||
ce->type=ct->type;
|
|
||||||
} mce_error_guard_end(&reader);
|
|
||||||
mce_skip_children(&reader);
|
|
||||||
} mce_end_element(&reader);
|
|
||||||
\endcode
|
|
||||||
\hideinitializer
|
|
||||||
*/
|
|
||||||
#define mce_error_guard_start(_reader_) if (MCE_ERROR_NONE==(_reader_)->mceCtx.error) do {
|
|
||||||
|
|
||||||
/**
|
|
||||||
\see mce_error_guard_start
|
|
||||||
\hideinitializer
|
|
||||||
*/
|
|
||||||
#define mce_error_guard_end(_reader_) } while(0)
|
|
||||||
|
|
||||||
/**
|
|
||||||
Signal an error if guard if false.
|
|
||||||
\hideinitializer
|
|
||||||
*/
|
|
||||||
#define mce_error(_reader_, guard, err, msg) if (guard) { (_reader_)->mceCtx.error=(err); fprintf(stderr, (NULL!=msg?msg:#err)); continue; }
|
|
||||||
|
|
||||||
/**
|
|
||||||
Signal an error if guard if false.
|
|
||||||
\hideinitializer
|
|
||||||
*/
|
|
||||||
#if defined(__GNUC__)
|
|
||||||
#define mce_errorf(_reader_, guard, err, msg, ...) if (guard) { mceRaiseError((_reader_)->reader, &(_reader_)->mceCtx, err, _X((NULL!=msg?msg:#err)), ##__VA_ARGS__ ); continue; }
|
|
||||||
#else
|
|
||||||
#define mce_errorf(_reader_, guard, err, msg, ...) if (guard) { mceRaiseError((_reader_)->reader, &(_reader_)->mceCtx, err, _X((NULL!=msg?msg:#err)), __VA_ARGS__ ); continue; }
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/**
|
|
||||||
Only issues the error when in "strict mode".
|
|
||||||
\hideinitializer
|
|
||||||
*/
|
|
||||||
#define mce_error_strict mce_error
|
|
||||||
|
|
||||||
/**
|
|
||||||
\see mce_error_strict
|
|
||||||
\hideinitializer
|
|
||||||
*/
|
|
||||||
#define mce_error_strictf mce_errorf
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
Marker for a MCE defintion.
|
|
||||||
\hideinitializer
|
|
||||||
*/
|
|
||||||
#define mce_def
|
|
||||||
|
|
||||||
/**
|
|
||||||
Marker for a MCE reference.
|
|
||||||
\hideinitializer
|
|
||||||
*/
|
|
||||||
#define mce_ref(r) (r)
|
|
||||||
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
} /* extern "C" */
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif /* MCE_TEXTREADER_H */
|
|
||||||
@@ -1,176 +0,0 @@
|
|||||||
/*
|
|
||||||
Copyright (c) 2010, Florian Reuter
|
|
||||||
All rights reserved.
|
|
||||||
|
|
||||||
Redistribution and use in source and binary forms, with or without
|
|
||||||
modification, are permitted provided that the following conditions
|
|
||||||
are met:
|
|
||||||
|
|
||||||
* Redistributions of source code must retain the above copyright
|
|
||||||
notice, this list of conditions and the following disclaimer.
|
|
||||||
* Redistributions in binary form must reproduce the above copyright
|
|
||||||
notice, this list of conditions and the following disclaimer in
|
|
||||||
the documentation and/or other materials provided with the
|
|
||||||
distribution.
|
|
||||||
* Neither the name of Florian Reuter nor the names of its contributors
|
|
||||||
may be used to endorse or promote products derived from this
|
|
||||||
software without specific prior written permission.
|
|
||||||
|
|
||||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
||||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
|
||||||
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
||||||
COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
||||||
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
|
||||||
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
||||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
||||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
|
||||||
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
||||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
|
||||||
OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
|
|
||||||
*/
|
|
||||||
/** @file mce/textwriter.h
|
|
||||||
|
|
||||||
*/
|
|
||||||
#include <mce/config.h>
|
|
||||||
#include <libxml/xmlwriter.h>
|
|
||||||
#include <mce/helper.h>
|
|
||||||
|
|
||||||
#ifndef MCE_TEXTWRITER_H
|
|
||||||
#define MCE_TEXTWRITER_H
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
extern "C" {
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/**
|
|
||||||
Default flags for an MCE namespace declaration.
|
|
||||||
*/
|
|
||||||
#define MCE_DEFAULT 0x0
|
|
||||||
|
|
||||||
/**
|
|
||||||
Flags MCE namespace declaration "ignorable".
|
|
||||||
*/
|
|
||||||
#define MCE_IGNORABLE 0x1
|
|
||||||
|
|
||||||
/**
|
|
||||||
Flags MCE namespace declaration "must understand".
|
|
||||||
*/
|
|
||||||
#define MCE_MUSTUNDERSTAND 0x2
|
|
||||||
|
|
||||||
/**
|
|
||||||
The MCE text writer context.
|
|
||||||
*/
|
|
||||||
typedef struct MCE_TEXTWRITER_STRUCT mceTextWriter;
|
|
||||||
|
|
||||||
/**
|
|
||||||
Create a new MCE text writer.
|
|
||||||
\see http://xmlsoft.org/html/libxml-xmlIO.html#xmlOutputBufferCreateIO
|
|
||||||
\see http://xmlsoft.org/html/libxml-xmlwriter.html#xmlNewTextWriter
|
|
||||||
*/
|
|
||||||
mceTextWriter *mceTextWriterCreateIO(xmlOutputWriteCallback iowrite, xmlOutputCloseCallback ioclose, void *ioctx, xmlCharEncodingHandlerPtr encoder);
|
|
||||||
|
|
||||||
/**
|
|
||||||
Helper which create a new MCE text writer for a FILE handle.
|
|
||||||
*/
|
|
||||||
mceTextWriter *mceNewTextWriterFile(FILE *file);
|
|
||||||
|
|
||||||
/**
|
|
||||||
Free all resources for \w.
|
|
||||||
*/
|
|
||||||
int mceTextWriterFree(mceTextWriter *w);
|
|
||||||
|
|
||||||
/**
|
|
||||||
\see http://xmlsoft.org/html/libxml-xmlwriter.html#xmlTextWriterStartDocument
|
|
||||||
*/
|
|
||||||
int mceTextWriterStartDocument(mceTextWriter *w);
|
|
||||||
|
|
||||||
/**
|
|
||||||
\see http://xmlsoft.org/html/libxml-xmlwriter.html#xmlTextWriterEndDocument
|
|
||||||
*/
|
|
||||||
int mceTextWriterEndDocument(mceTextWriter *w);
|
|
||||||
|
|
||||||
/**
|
|
||||||
Start a new XML element. If ns==NULL then there is no namespace and ""==ns means the default namespace.
|
|
||||||
\see http://xmlsoft.org/html/libxml-xmlwriter.html#xmlTextWriterStartElement
|
|
||||||
\see http://xmlsoft.org/html/libxml-xmlwriter.html#xmlTextWriterStartElementNS
|
|
||||||
*/
|
|
||||||
int mceTextWriterStartElement(mceTextWriter *w, const xmlChar *ns, const xmlChar *ln);
|
|
||||||
|
|
||||||
/**
|
|
||||||
\see http://xmlsoft.org/html/libxml-xmlwriter.html#xmlTextWriterEndElement
|
|
||||||
*/
|
|
||||||
int mceTextWriterEndElement(mceTextWriter *w, const xmlChar *ns, const xmlChar *ln);
|
|
||||||
|
|
||||||
/**
|
|
||||||
\see http://xmlsoft.org/html/libxml-xmlwriter.html#xmlTextWriterWriteString
|
|
||||||
*/
|
|
||||||
int mceTextWriterWriteString(mceTextWriter *w, const xmlChar *content);
|
|
||||||
|
|
||||||
/**
|
|
||||||
Register a namespace. Must be called before mceTextWriterStartElement.
|
|
||||||
\see MCE_DEFAULT
|
|
||||||
\see MCE_IGNORABLE
|
|
||||||
\see MCE_MUSTUNDERSTAND
|
|
||||||
*/
|
|
||||||
const xmlChar *mceTextWriterRegisterNamespace(mceTextWriter *w, const xmlChar *ns, const xmlChar *prefix, int flags);
|
|
||||||
|
|
||||||
/**
|
|
||||||
Register qname (ns, ln) as a "process content" element wrt. MCE. Must be called before mceTextWriterStartElement.
|
|
||||||
*/
|
|
||||||
int mceTextWriterProcessContent(mceTextWriter *w, const xmlChar *ns, const xmlChar *ln);
|
|
||||||
|
|
||||||
/**
|
|
||||||
Writes a formatted attribute.
|
|
||||||
\see http://xmlsoft.org/html/libxml-xmlwriter.html#xmlTextWriterWriteFormatAttribute
|
|
||||||
*/
|
|
||||||
int mceTextWriterAttributeF(mceTextWriter *w, const xmlChar *ns, const xmlChar *ln, const char *value, ...);
|
|
||||||
|
|
||||||
/**
|
|
||||||
Starts an MCE alternate content section.
|
|
||||||
*/
|
|
||||||
int mceTextWriterStartAlternateContent(mceTextWriter *w);
|
|
||||||
|
|
||||||
/**
|
|
||||||
Ends an MCE alternate content section.
|
|
||||||
*/
|
|
||||||
int mceTextWriterEndAlternateContent(mceTextWriter *w);
|
|
||||||
|
|
||||||
/**
|
|
||||||
Start an MCE choice.
|
|
||||||
*/
|
|
||||||
int mceTextWriterStartChoice(mceTextWriter *w, const xmlChar *ns);
|
|
||||||
|
|
||||||
/**
|
|
||||||
Ends an MCE choice.
|
|
||||||
*/
|
|
||||||
int mceTextWriterEndChoice(mceTextWriter *w);
|
|
||||||
|
|
||||||
/**
|
|
||||||
Start an MCE fallback.
|
|
||||||
*/
|
|
||||||
int mceTextWriterStartFallback(mceTextWriter *w);
|
|
||||||
|
|
||||||
/**
|
|
||||||
Ends an MCE fallback.
|
|
||||||
*/
|
|
||||||
int mceTextWriterEndFallback(mceTextWriter *w);
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
Returns the underlying xmlTextWriter.
|
|
||||||
*/
|
|
||||||
xmlTextWriterPtr mceTextWriterIntern(mceTextWriter *w);
|
|
||||||
|
|
||||||
/**
|
|
||||||
Helper which create a new xmlTextWriterPtr for a FILE handle.
|
|
||||||
*/
|
|
||||||
xmlTextWriterPtr xmlNewTextWriterFile(FILE *file);
|
|
||||||
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
} /* extern "C" */
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif /* MCE_TEXTWRITER_H */
|
|
||||||
@@ -1,189 +0,0 @@
|
|||||||
/*
|
|
||||||
Copyright (c) 2010, Florian Reuter
|
|
||||||
All rights reserved.
|
|
||||||
|
|
||||||
Redistribution and use in source and binary forms, with or without
|
|
||||||
modification, are permitted provided that the following conditions
|
|
||||||
are met:
|
|
||||||
|
|
||||||
* Redistributions of source code must retain the above copyright
|
|
||||||
notice, this list of conditions and the following disclaimer.
|
|
||||||
* Redistributions in binary form must reproduce the above copyright
|
|
||||||
notice, this list of conditions and the following disclaimer in
|
|
||||||
the documentation and/or other materials provided with the
|
|
||||||
distribution.
|
|
||||||
* Neither the name of Florian Reuter nor the names of its contributors
|
|
||||||
may be used to endorse or promote products derived from this
|
|
||||||
software without specific prior written permission.
|
|
||||||
|
|
||||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
||||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
|
||||||
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
||||||
COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
||||||
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
|
||||||
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
||||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
||||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
|
||||||
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
||||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
|
||||||
OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
*/
|
|
||||||
/**@file config/opc/config.h
|
|
||||||
*/
|
|
||||||
#ifndef OPC_CONFIG_H
|
|
||||||
#define OPC_CONFIG_H
|
|
||||||
|
|
||||||
#include <libxml/xmlstring.h>
|
|
||||||
#include <plib/plib.h>
|
|
||||||
#include <assert.h>
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
extern "C" {
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
Assert expression e is true. Will be removed entirely in release mode.
|
|
||||||
\hideinitializer
|
|
||||||
*/
|
|
||||||
#define OPC_ASSERT(e) assert(e)
|
|
||||||
|
|
||||||
/**
|
|
||||||
Assert expression e is true. Expression will be executed in release mode too.
|
|
||||||
\hideinitializer
|
|
||||||
*/
|
|
||||||
#ifdef NDEBUG
|
|
||||||
#define OPC_ENSURE(e) (void)(e)
|
|
||||||
#else
|
|
||||||
#define OPC_ENSURE(e) assert(e)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
Constant for boolean true.
|
|
||||||
\hideinitializer
|
|
||||||
*/
|
|
||||||
#define OPC_TRUE (0==0)
|
|
||||||
|
|
||||||
/**
|
|
||||||
Constant for boolean false.
|
|
||||||
\hideinitializer
|
|
||||||
*/
|
|
||||||
#define OPC_FALSE (0==1)
|
|
||||||
|
|
||||||
/**
|
|
||||||
Boolean type.
|
|
||||||
\hideinitializer
|
|
||||||
*/
|
|
||||||
typedef pbool_t opc_bool_t;
|
|
||||||
|
|
||||||
/**
|
|
||||||
Type which represents an offset in e.g. a file.
|
|
||||||
\hideinitializer
|
|
||||||
*/
|
|
||||||
typedef pofs_t opc_ofs_t;
|
|
||||||
|
|
||||||
/**
|
|
||||||
8-bit unsigned integer.
|
|
||||||
\hideinitializer
|
|
||||||
*/
|
|
||||||
typedef puint8_t opc_uint8_t;
|
|
||||||
|
|
||||||
/**
|
|
||||||
16-bit unsigned integer.
|
|
||||||
\hideinitializer
|
|
||||||
*/
|
|
||||||
typedef puint16_t opc_uint16_t;
|
|
||||||
|
|
||||||
/**
|
|
||||||
32-bit unsigned integer.
|
|
||||||
\hideinitializer
|
|
||||||
*/
|
|
||||||
typedef puint32_t opc_uint32_t;
|
|
||||||
|
|
||||||
/**
|
|
||||||
64-bit unsigned integer.
|
|
||||||
\hideinitializer
|
|
||||||
*/
|
|
||||||
typedef puint64_t opc_uint64_t;
|
|
||||||
|
|
||||||
/**
|
|
||||||
8-bit signed integer.
|
|
||||||
\hideinitializer
|
|
||||||
*/
|
|
||||||
typedef pint8_t opc_int8_t;
|
|
||||||
|
|
||||||
/**
|
|
||||||
16-bit signed integer.
|
|
||||||
\hideinitializer
|
|
||||||
*/
|
|
||||||
typedef pint16_t opc_int16_t;
|
|
||||||
|
|
||||||
/**
|
|
||||||
32-bit signed integer.
|
|
||||||
\hideinitializer
|
|
||||||
*/
|
|
||||||
typedef pint32_t opc_int32_t;
|
|
||||||
|
|
||||||
/**
|
|
||||||
64-bit signed integer.
|
|
||||||
\hideinitializer
|
|
||||||
*/
|
|
||||||
typedef pint64_t opc_int64_t;
|
|
||||||
|
|
||||||
/**
|
|
||||||
Default size fo the deflate buffer used by zlib.
|
|
||||||
*/
|
|
||||||
#define OPC_DEFLATE_BUFFER_SIZE 4096
|
|
||||||
|
|
||||||
/**
|
|
||||||
Max system path len.
|
|
||||||
*/
|
|
||||||
#define OPC_MAX_PATH 512
|
|
||||||
|
|
||||||
/**
|
|
||||||
Error codes for the OPC module.
|
|
||||||
*/
|
|
||||||
typedef enum OPC_ERROR_ENUM {
|
|
||||||
OPC_ERROR_NONE,
|
|
||||||
OPC_ERROR_STREAM,
|
|
||||||
OPC_ERROR_SEEK, // can't seek
|
|
||||||
OPC_ERROR_UNSUPPORTED_DATA_DESCRIPTOR,
|
|
||||||
OPC_ERROR_UNSUPPORTED_COMPRESSION,
|
|
||||||
OPC_ERROR_DEFLATE,
|
|
||||||
OPC_ERROR_HEADER,
|
|
||||||
OPC_ERROR_MEMORY,
|
|
||||||
OPC_ERROR_XML,
|
|
||||||
OPC_ERROR_USER // user triggered an abort
|
|
||||||
} opc_error_t;
|
|
||||||
|
|
||||||
/**
|
|
||||||
Compression options for OPC streams.
|
|
||||||
*/
|
|
||||||
typedef enum OPC_COMPRESSIONOPTION_ENUM {
|
|
||||||
OPC_COMPRESSIONOPTION_NONE,
|
|
||||||
OPC_COMPRESSIONOPTION_NORMAL,
|
|
||||||
OPC_COMPRESSIONOPTION_MAXIMUM,
|
|
||||||
OPC_COMPRESSIONOPTION_FAST,
|
|
||||||
OPC_COMPRESSIONOPTION_SUPERFAST
|
|
||||||
} opcCompressionOption_t;
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
Helper for debug logs.
|
|
||||||
\hideinitializer
|
|
||||||
*/
|
|
||||||
#define opc_logf printf
|
|
||||||
|
|
||||||
/**
|
|
||||||
Abstraction for memset(m, 0, s).
|
|
||||||
\hideinitializer
|
|
||||||
*/
|
|
||||||
#define opc_bzero_mem(m,s) memset(m, 0, s)
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
} /* extern "C" */
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif /* OPC_CONFIG_H */
|
|
||||||
@@ -1,300 +0,0 @@
|
|||||||
/*
|
|
||||||
Copyright (c) 2010, Florian Reuter
|
|
||||||
All rights reserved.
|
|
||||||
|
|
||||||
Redistribution and use in source and binary forms, with or without
|
|
||||||
modification, are permitted provided that the following conditions
|
|
||||||
are met:
|
|
||||||
|
|
||||||
* Redistributions of source code must retain the above copyright
|
|
||||||
notice, this list of conditions and the following disclaimer.
|
|
||||||
* Redistributions in binary form must reproduce the above copyright
|
|
||||||
notice, this list of conditions and the following disclaimer in
|
|
||||||
the documentation and/or other materials provided with the
|
|
||||||
distribution.
|
|
||||||
* Neither the name of Florian Reuter nor the names of its contributors
|
|
||||||
may be used to endorse or promote products derived from this
|
|
||||||
software without specific prior written permission.
|
|
||||||
|
|
||||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
||||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
|
||||||
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
||||||
COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
||||||
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
|
||||||
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
||||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
||||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
|
||||||
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
||||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
|
||||||
OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
|
|
||||||
*/
|
|
||||||
/** @file opc/container.h
|
|
||||||
|
|
||||||
The container.h module has the fundamental methods for dealing with ZIP-based OPC container.
|
|
||||||
|
|
||||||
OPC container can be opened in READ-ONLY mode, WRITE-ONLY mode, READ/WRITE mode, TEMPLATE mode and TRANSITION mode.
|
|
||||||
The most notable mode is the READ/WRITE mode, which gives you concurrent stream-based READ and WRITE access to a
|
|
||||||
single ZIP-based OPC container. This is achieved without the use of temporary files by taking advantage of the
|
|
||||||
OPC specific “interleave” mode. \see http://standards.iso.org/ittf/PubliclyAvailableStandards/c051459_ISOIEC_29500-2_2008(E).zip
|
|
||||||
|
|
||||||
The TEMPLATE mode allows very fast customized "cloning" of ZIP-based OPC container by using "RAW access" to the ZIP streams.
|
|
||||||
The TRANSITION mode is a special version of the TEMPLATE mode, which allows transition-based READ/WRITE access to the
|
|
||||||
ZIP-based OPC container using a temporary file.
|
|
||||||
|
|
||||||
*/
|
|
||||||
#include <opc/config.h>
|
|
||||||
#include <opc/file.h>
|
|
||||||
|
|
||||||
#ifndef OPC_CONTAINER_H
|
|
||||||
#define OPC_CONTAINER_H
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
extern "C" {
|
|
||||||
#endif
|
|
||||||
/**
|
|
||||||
Handle to an OPC container created by \ref opcContainerOpen.
|
|
||||||
\see opcContainerOpen.
|
|
||||||
*/
|
|
||||||
typedef struct OPC_CONTAINER_STRUCT opcContainer;
|
|
||||||
|
|
||||||
/**
|
|
||||||
Modes for opcContainerOpen();
|
|
||||||
\see opcContainerOpen
|
|
||||||
*/
|
|
||||||
typedef enum {
|
|
||||||
/**
|
|
||||||
Opens the OPC container denoted by \a fileName in READ-ONLY mode. The \a destName parameter must be \a NULL.
|
|
||||||
\hideinitializer
|
|
||||||
*/
|
|
||||||
OPC_OPEN_READ_ONLY=0,
|
|
||||||
/**
|
|
||||||
Opens the OPC container denoted by \a fileName in WRITE-ONLY mode. The \a destName parameter must be \a NULL.
|
|
||||||
\hideinitializer
|
|
||||||
*/
|
|
||||||
OPC_OPEN_WRITE_ONLY=1,
|
|
||||||
/**
|
|
||||||
Opens the OPC container denoted by \a fileName in READ/WRITE mode. The \a destName parameter must be \a NULL.
|
|
||||||
\hideinitializer
|
|
||||||
*/
|
|
||||||
OPC_OPEN_READ_WRITE=2,
|
|
||||||
/**
|
|
||||||
This mode will open the container denoted by \a fileName in READ-ONLY mode and the container denoted by
|
|
||||||
\a destName in write-only mode. Any modifications will be written to the container denoted by \a destName
|
|
||||||
and the unmodified streams from \a fileName will be written to \a destName on closing.
|
|
||||||
\warning Currently not implemented.
|
|
||||||
\hideinitializer
|
|
||||||
*/
|
|
||||||
OPC_OPEN_TEMPLATE=3,
|
|
||||||
/**
|
|
||||||
Like the OPC_OPEN_TEMPLATE mode, but the \a destName will be renamed to the \a fileName on closing. If \a destName
|
|
||||||
is \a NULL, then the name of the temporary file will be generated automatically.
|
|
||||||
\warning Currently not implemented.
|
|
||||||
\hideinitializer
|
|
||||||
*/
|
|
||||||
OPC_OPEN_TRANSITION=4
|
|
||||||
} opcContainerOpenMode;
|
|
||||||
|
|
||||||
/** Modes for opcContainerClose.
|
|
||||||
\see opcContainerClose.
|
|
||||||
*/
|
|
||||||
typedef enum {
|
|
||||||
/**
|
|
||||||
Close the OPC container without any further postprocessing.
|
|
||||||
\hideinitializer
|
|
||||||
*/
|
|
||||||
OPC_CLOSE_NOW = 0,
|
|
||||||
/**
|
|
||||||
Close the OPC container and trim the file by removing unused fragments like e.g.
|
|
||||||
deleted parts.
|
|
||||||
\hideinitializer
|
|
||||||
*/
|
|
||||||
OPC_CLOSE_TRIM = 1,
|
|
||||||
/**
|
|
||||||
Close the OPC container like in \a OPC_CLOSE_TRIM mode, but additionally remove any
|
|
||||||
"interleaved" parts by reordering them.
|
|
||||||
\warning Currently not implemented. Same semantic as OPC_CLOSE_TRIM.
|
|
||||||
\hideinitializer
|
|
||||||
*/
|
|
||||||
OPC_CLOSE_DEFRAG = 2
|
|
||||||
} opcContainerCloseMode;
|
|
||||||
|
|
||||||
/**
|
|
||||||
Opens a ZIP-based OPC container.
|
|
||||||
@param[in] fileName. For more details see \ref opcContainerOpenMode.
|
|
||||||
@param[in] mode. For more details see \ref opcContainerOpenMode.
|
|
||||||
@param[in] userContext. Will not be modified by libopc. Can be used to e.g. store the "this" pointer for C++ bindings.
|
|
||||||
@param[in] destName. For more details see \ref opcContainerOpenMode.
|
|
||||||
@return \a NULL if failed.
|
|
||||||
\see opcContainerOpenMode
|
|
||||||
\see opcContainerDump
|
|
||||||
*/
|
|
||||||
opcContainer* opcContainerOpen(const xmlChar *fileName,
|
|
||||||
opcContainerOpenMode mode,
|
|
||||||
void *userContext,
|
|
||||||
const xmlChar *destName);
|
|
||||||
|
|
||||||
/**
|
|
||||||
Opens a ZIP-based OPC container from memory.
|
|
||||||
@param[in] data.
|
|
||||||
@param[in] data_len.
|
|
||||||
@param[in] userContext. Will not be modified by libopc. Can be used to e.g. store the "this" pointer for C++ bindings.
|
|
||||||
@param[in] mode. For more details see \ref opcContainerOpenMode.
|
|
||||||
@return \a NULL if failed.
|
|
||||||
*/
|
|
||||||
opcContainer* opcContainerOpenMem(const opc_uint8_t *data, opc_uint32_t data_len,
|
|
||||||
opcContainerOpenMode mode,
|
|
||||||
void *userContext);
|
|
||||||
|
|
||||||
/**
|
|
||||||
Opens a ZIP-based OPC container from memory.
|
|
||||||
@param[in] ioread.
|
|
||||||
@param[in] iowrite.
|
|
||||||
@param[in] ioclose.
|
|
||||||
@param[in] ioseek.
|
|
||||||
@param[in] iotrim.
|
|
||||||
@param[in] ioflush.
|
|
||||||
@param[in] iocontext.
|
|
||||||
@param[in] file_size.
|
|
||||||
@param[in] userContext. Will not be modified by libopc. Can be used to e.g. store the "this" pointer for C++ bindings.
|
|
||||||
@param[in] mode. For more details see \ref opcContainerOpenMode.
|
|
||||||
@return \a NULL if failed.
|
|
||||||
*/
|
|
||||||
opcContainer* opcContainerOpenIO(opcFileReadCallback *ioread,
|
|
||||||
opcFileWriteCallback *iowrite,
|
|
||||||
opcFileCloseCallback *ioclose,
|
|
||||||
opcFileSeekCallback *ioseek,
|
|
||||||
opcFileTrimCallback *iotrim,
|
|
||||||
opcFileFlushCallback *ioflush,
|
|
||||||
void *iocontext,
|
|
||||||
pofs_t file_size,
|
|
||||||
opcContainerOpenMode mode,
|
|
||||||
void *userContext);
|
|
||||||
|
|
||||||
/**
|
|
||||||
Close an OPC container.
|
|
||||||
@param[in] c. \ref opcContainer openered by \ref opcContainerOpen.
|
|
||||||
@param[in] mode. For more information see \ref opcContainerCloseMode.
|
|
||||||
@return Non-zero if successful.
|
|
||||||
\see opcContainerOpen
|
|
||||||
\see opcContainerCloseMode
|
|
||||||
*/
|
|
||||||
opc_error_t opcContainerClose(opcContainer *c, opcContainerCloseMode mode);
|
|
||||||
|
|
||||||
/**
|
|
||||||
Returns the unmodified user context passed to \ref opcContainerOpen.
|
|
||||||
\see opcContainerOpen
|
|
||||||
*/
|
|
||||||
void *opcContainerGetUserContext(opcContainer *c);
|
|
||||||
|
|
||||||
/**
|
|
||||||
List all types, relations and parts of the container \a c to \a out.
|
|
||||||
\par Sample:
|
|
||||||
\include opc_dump.c
|
|
||||||
*/
|
|
||||||
opc_error_t opcContainerDump(opcContainer *c, FILE *out);
|
|
||||||
|
|
||||||
/**
|
|
||||||
Exports the OPC container to "Flat OPC" (http://blogs.msdn.com/b/ericwhite/archive/2008/09/29/the-flat-opc-format.aspx).
|
|
||||||
The flat versions of an OPC file are very important when dealing with e.g XSL(T)-based or Javascript-based transformations.
|
|
||||||
\see opcContainerFlatImport.
|
|
||||||
\todo Implementation needed.
|
|
||||||
*/
|
|
||||||
int opcContainerFlatExport(opcContainer *c, const xmlChar *fileName);
|
|
||||||
|
|
||||||
/**
|
|
||||||
Imports the flat version of an OPC container.
|
|
||||||
\see opcContainerFlatExport.
|
|
||||||
\todo Implementation needed.
|
|
||||||
*/
|
|
||||||
int opcContainerFlatImport(opcContainer *c, const xmlChar *fileName);
|
|
||||||
|
|
||||||
/**
|
|
||||||
Iterate all types.
|
|
||||||
\code
|
|
||||||
for(xmlChar *type=opcContentTypeFirst(c);
|
|
||||||
NULL!=type;
|
|
||||||
type=opcContentTypeNext(c, type)) {
|
|
||||||
printf("%s\n", type);
|
|
||||||
}
|
|
||||||
\endcode
|
|
||||||
*/
|
|
||||||
const xmlChar *opcContentTypeFirst(opcContainer *container);
|
|
||||||
|
|
||||||
/**
|
|
||||||
\see opcContentTypeNext()
|
|
||||||
*/
|
|
||||||
const xmlChar *opcContentTypeNext(opcContainer *container, const xmlChar *type);
|
|
||||||
|
|
||||||
/**
|
|
||||||
Iterate extensions.
|
|
||||||
\code
|
|
||||||
for(const xmlChar *ext=opcExtensionFirst(c);
|
|
||||||
NULL!=ext;
|
|
||||||
ext=opcExtensionNext(ext)) {
|
|
||||||
printf("%s\n", ext);
|
|
||||||
}
|
|
||||||
\endcode
|
|
||||||
*/
|
|
||||||
const xmlChar *opcExtensionFirst(opcContainer *container);
|
|
||||||
|
|
||||||
/**
|
|
||||||
\see opcExtensionFirst()
|
|
||||||
*/
|
|
||||||
const xmlChar *opcExtensionNext(opcContainer *container, const xmlChar *ext);
|
|
||||||
|
|
||||||
/**
|
|
||||||
Get registered type for extension.
|
|
||||||
\see opcExtensionRegister()
|
|
||||||
*/
|
|
||||||
const xmlChar *opcExtensionGetType(opcContainer *container, const xmlChar *ext);
|
|
||||||
|
|
||||||
/**
|
|
||||||
Register a mime-type and and extension.
|
|
||||||
\see opcExtensionGetType()
|
|
||||||
*/
|
|
||||||
const xmlChar *opcExtensionRegister(opcContainer *container, const xmlChar *ext, const xmlChar *type);
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
Iterator through all relation types of the container:
|
|
||||||
\code
|
|
||||||
for(xmlChar *type=opcRelationTypeFirst(c);
|
|
||||||
NULL!=type;
|
|
||||||
type=opcRelationTypeNext(c, type)) {
|
|
||||||
printf("%s\n", type);
|
|
||||||
}
|
|
||||||
\endcode
|
|
||||||
*/
|
|
||||||
const xmlChar *opcRelationTypeFirst(opcContainer *container);
|
|
||||||
|
|
||||||
/**
|
|
||||||
\see opcRelationTypeFirst()
|
|
||||||
*/
|
|
||||||
const xmlChar *opcRelationTypeNext(opcContainer *container, const xmlChar *type);
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
Iterator through all relation types of the container:
|
|
||||||
\code
|
|
||||||
for(xmlChar *target=opcExternalTargetFirst(c);
|
|
||||||
NULL!=target;
|
|
||||||
type=opcExternalTargetNext(c, target)) {
|
|
||||||
printf("%s\n", target);
|
|
||||||
}
|
|
||||||
\endcode
|
|
||||||
*/
|
|
||||||
const xmlChar *opcExternalTargetFirst(opcContainer *container);
|
|
||||||
|
|
||||||
/**
|
|
||||||
\see opcExternalTargetFirst()
|
|
||||||
*/
|
|
||||||
const xmlChar *opcExternalTargetNext(opcContainer *container, const xmlChar *target);
|
|
||||||
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
} /* extern "C" */
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif /* OPC_CONTAINER_H */
|
|
||||||
@@ -1,200 +0,0 @@
|
|||||||
/*
|
|
||||||
Copyright (c) 2010, Florian Reuter
|
|
||||||
All rights reserved.
|
|
||||||
|
|
||||||
Redistribution and use in source and binary forms, with or without
|
|
||||||
modification, are permitted provided that the following conditions
|
|
||||||
are met:
|
|
||||||
|
|
||||||
* Redistributions of source code must retain the above copyright
|
|
||||||
notice, this list of conditions and the following disclaimer.
|
|
||||||
* Redistributions in binary form must reproduce the above copyright
|
|
||||||
notice, this list of conditions and the following disclaimer in
|
|
||||||
the documentation and/or other materials provided with the
|
|
||||||
distribution.
|
|
||||||
* Neither the name of Florian Reuter nor the names of its contributors
|
|
||||||
may be used to endorse or promote products derived from this
|
|
||||||
software without specific prior written permission.
|
|
||||||
|
|
||||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
||||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
|
||||||
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
||||||
COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
||||||
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
|
||||||
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
||||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
||||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
|
||||||
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
||||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
|
||||||
OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
|
|
||||||
*/
|
|
||||||
/** @file opc/file.h
|
|
||||||
The opc module contains the file library functions.
|
|
||||||
*/
|
|
||||||
#include <opc/config.h>
|
|
||||||
|
|
||||||
#ifndef OPC_FILE_H
|
|
||||||
#define OPC_FILE_H
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
extern "C" {
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/**
|
|
||||||
Flag for READ access.
|
|
||||||
\hideinitializer
|
|
||||||
*/
|
|
||||||
#define OPC_FILE_READ (1<<0)
|
|
||||||
|
|
||||||
/**
|
|
||||||
Flag for WRITE access.
|
|
||||||
\hideinitializer
|
|
||||||
*/
|
|
||||||
#define OPC_FILE_WRITE (1<<1)
|
|
||||||
|
|
||||||
/**
|
|
||||||
Flag indicates that file will be truncated when opened.
|
|
||||||
\hideinitializer
|
|
||||||
*/
|
|
||||||
#define OPC_FILE_TRUNC (1<<2)
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
Abstraction for see modes.
|
|
||||||
*/
|
|
||||||
typedef enum OPC_FILESEEKMODE_ENUM {
|
|
||||||
opcFileSeekSet = SEEK_SET,
|
|
||||||
opcFileSeekCur = SEEK_CUR,
|
|
||||||
opcFileSeekEnd = SEEK_END
|
|
||||||
} opcFileSeekMode;
|
|
||||||
|
|
||||||
/**
|
|
||||||
Callback to read a file. E.g. for a FILE * context this can be implemented as
|
|
||||||
\code
|
|
||||||
static int opcFileRead(void *iocontext, char *buffer, int len) {
|
|
||||||
return fread(buffer, sizeof(char), len, (FILE*)iocontext);
|
|
||||||
}
|
|
||||||
\endcode
|
|
||||||
*/
|
|
||||||
typedef int opcFileReadCallback(void *iocontext, char *buffer, int len);
|
|
||||||
|
|
||||||
/**
|
|
||||||
Callback to write a file. E.g. for a FILE * context this can be implemented as
|
|
||||||
\code
|
|
||||||
static int opcFileWrite(void *iocontext, const char *buffer, int len) {
|
|
||||||
return fwrite(buffer, sizeof(char), len, (FILE*)iocontext);
|
|
||||||
}
|
|
||||||
\endcode
|
|
||||||
*/
|
|
||||||
typedef int opcFileWriteCallback(void *iocontext, const char *buffer, int len);
|
|
||||||
|
|
||||||
/**
|
|
||||||
Callback to close a file. E.g. for a FILE * context this can be implemented as
|
|
||||||
\code
|
|
||||||
static int opcFileClose(void *iocontext) {
|
|
||||||
return fclose((FILE*)iocontext);
|
|
||||||
}
|
|
||||||
\endcode
|
|
||||||
*/
|
|
||||||
typedef int opcFileCloseCallback(void *iocontext);
|
|
||||||
|
|
||||||
/**
|
|
||||||
Callback to seek a file. E.g. for a FILE * context this can be implemented as
|
|
||||||
\code
|
|
||||||
static opc_ofs_t opcFileSeek(void *iocontext, opc_ofs_t ofs) {
|
|
||||||
int ret=fseek((FILE*)iocontext, ofs, SEEK_SET);
|
|
||||||
if (ret>=0) {
|
|
||||||
return ftell((FILE*)iocontext);
|
|
||||||
} else {
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
\endcode
|
|
||||||
*/
|
|
||||||
typedef opc_ofs_t opcFileSeekCallback(void *iocontext, opc_ofs_t ofs);
|
|
||||||
|
|
||||||
/**
|
|
||||||
Callback to trim a file. E.g. for a FILE * context this can be implemented as
|
|
||||||
\code
|
|
||||||
static int opcFileTrim(void *iocontext, opc_ofs_t new_size) {
|
|
||||||
#ifdef WIN32
|
|
||||||
return _chsize(fileno((FILE*)iocontext), new_size);
|
|
||||||
#else
|
|
||||||
return ftruncate(fileno((FILE*)iocontext), new_size);
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
\endcode
|
|
||||||
*/
|
|
||||||
typedef int opcFileTrimCallback(void *iocontext, opc_ofs_t new_size);
|
|
||||||
|
|
||||||
/**
|
|
||||||
Callback to flush a file. E.g. for a FILE * context this can be implemented as
|
|
||||||
\code
|
|
||||||
static int opcFileFlush(void *iocontext) {
|
|
||||||
return fflush((FILE*)iocontext);
|
|
||||||
}
|
|
||||||
\endcode
|
|
||||||
*/
|
|
||||||
typedef int opcFileFlushCallback(void *iocontext);
|
|
||||||
|
|
||||||
/**
|
|
||||||
Represents a state of a file, i.e. file position (buf_pos) and error status (err).
|
|
||||||
*/
|
|
||||||
typedef struct OPC_FILERAWSTATE_STRUCT {
|
|
||||||
opc_error_t err;
|
|
||||||
opc_ofs_t buf_pos; // current pos in file
|
|
||||||
} opcFileRawState;
|
|
||||||
|
|
||||||
/**
|
|
||||||
File IO context.
|
|
||||||
*/
|
|
||||||
typedef struct OPC_IO_STRUCT {
|
|
||||||
opcFileReadCallback *_ioread;
|
|
||||||
opcFileWriteCallback *_iowrite;
|
|
||||||
opcFileCloseCallback *_ioclose;
|
|
||||||
opcFileSeekCallback *_ioseek;
|
|
||||||
opcFileTrimCallback *_iotrim;
|
|
||||||
opcFileFlushCallback *_ioflush;
|
|
||||||
void *iocontext;
|
|
||||||
int flags;
|
|
||||||
opcFileRawState state;
|
|
||||||
opc_ofs_t file_size;
|
|
||||||
} opcIO_t;
|
|
||||||
|
|
||||||
/**
|
|
||||||
Initialize an IO context.
|
|
||||||
*/
|
|
||||||
opc_error_t opcFileInitIO(opcIO_t *io,
|
|
||||||
opcFileReadCallback *ioread,
|
|
||||||
opcFileWriteCallback *iowrite,
|
|
||||||
opcFileCloseCallback *ioclose,
|
|
||||||
opcFileSeekCallback *ioseek,
|
|
||||||
opcFileTrimCallback *iotrim,
|
|
||||||
opcFileFlushCallback *ioflush,
|
|
||||||
void *iocontext,
|
|
||||||
pofs_t file_size,
|
|
||||||
int flags);
|
|
||||||
|
|
||||||
/**
|
|
||||||
Initialize an IO context for a file.
|
|
||||||
*/
|
|
||||||
opc_error_t opcFileInitIOFile(opcIO_t *io, const xmlChar *filename, int flags);
|
|
||||||
|
|
||||||
/**
|
|
||||||
Initialize an IO for memory.
|
|
||||||
\warning Currently supports READ-ONLY file access.
|
|
||||||
*/
|
|
||||||
opc_error_t opcFileInitIOMemory(opcIO_t *io, const opc_uint8_t *data, opc_uint32_t data_len, int flags);
|
|
||||||
|
|
||||||
/**
|
|
||||||
Cleanup an IO context, i.e. release all system resources.
|
|
||||||
*/
|
|
||||||
opc_error_t opcFileCleanupIO(opcIO_t *io);
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
} /* extern "C" */
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif /* OPC_FILE_H */
|
|
||||||
@@ -1,60 +0,0 @@
|
|||||||
/*
|
|
||||||
Copyright (c) 2010, Florian Reuter
|
|
||||||
All rights reserved.
|
|
||||||
|
|
||||||
Redistribution and use in source and binary forms, with or without
|
|
||||||
modification, are permitted provided that the following conditions
|
|
||||||
are met:
|
|
||||||
|
|
||||||
* Redistributions of source code must retain the above copyright
|
|
||||||
notice, this list of conditions and the following disclaimer.
|
|
||||||
* Redistributions in binary form must reproduce the above copyright
|
|
||||||
notice, this list of conditions and the following disclaimer in
|
|
||||||
the documentation and/or other materials provided with the
|
|
||||||
distribution.
|
|
||||||
* Neither the name of Florian Reuter nor the names of its contributors
|
|
||||||
may be used to endorse or promote products derived from this
|
|
||||||
software without specific prior written permission.
|
|
||||||
|
|
||||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
||||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
|
||||||
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
||||||
COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
||||||
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
|
||||||
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
||||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
||||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
|
||||||
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
||||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
|
||||||
OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
|
|
||||||
*/
|
|
||||||
/** @file opc/helper.h
|
|
||||||
Contains helper functions for the opc module.
|
|
||||||
*/
|
|
||||||
#include <opc/config.h>
|
|
||||||
|
|
||||||
#ifndef OPC_HELPER_H
|
|
||||||
#define OPC_HELPER_H
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
extern "C" {
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
} /* extern "C" */
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/**
|
|
||||||
Constructs a segment name.
|
|
||||||
*/
|
|
||||||
opc_uint16_t opcHelperAssembleSegmentName(char *out, opc_uint16_t out_size, const xmlChar *name, opc_uint32_t segment_number, opc_uint32_t next_segment_id, opc_bool_t rels_segment, opc_uint16_t *out_max);
|
|
||||||
|
|
||||||
/**
|
|
||||||
Splits a filename into the segment informations.
|
|
||||||
*/
|
|
||||||
opc_error_t opcHelperSplitFilename(opc_uint8_t *filename, opc_uint32_t filename_length, opc_uint32_t *segment_number, opc_bool_t *last_segment, opc_bool_t *rel_segment);
|
|
||||||
|
|
||||||
#endif /* OPC_HELPER_H */
|
|
||||||
@@ -1,74 +0,0 @@
|
|||||||
/*
|
|
||||||
Copyright (c) 2010, Florian Reuter
|
|
||||||
All rights reserved.
|
|
||||||
|
|
||||||
Redistribution and use in source and binary forms, with or without
|
|
||||||
modification, are permitted provided that the following conditions
|
|
||||||
are met:
|
|
||||||
|
|
||||||
* Redistributions of source code must retain the above copyright
|
|
||||||
notice, this list of conditions and the following disclaimer.
|
|
||||||
* Redistributions in binary form must reproduce the above copyright
|
|
||||||
notice, this list of conditions and the following disclaimer in
|
|
||||||
the documentation and/or other materials provided with the
|
|
||||||
distribution.
|
|
||||||
* Neither the name of Florian Reuter nor the names of its contributors
|
|
||||||
may be used to endorse or promote products derived from this
|
|
||||||
software without specific prior written permission.
|
|
||||||
|
|
||||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
||||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
|
||||||
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
||||||
COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
||||||
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
|
||||||
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
||||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
||||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
|
||||||
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
||||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
|
||||||
OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
|
|
||||||
*/
|
|
||||||
/** @file opc/inputstream.h
|
|
||||||
|
|
||||||
*/
|
|
||||||
#include <opc/config.h>
|
|
||||||
|
|
||||||
#ifndef OPC_INPUTSTREAM_H
|
|
||||||
#define OPC_INPUTSTREAM_H
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
extern "C" {
|
|
||||||
#endif
|
|
||||||
/**
|
|
||||||
Internal type which represents a binary input stream.
|
|
||||||
*/
|
|
||||||
typedef struct OPC_CONTAINER_INPUTSTREAM_STRUCT opcContainerInputStream;
|
|
||||||
|
|
||||||
/**
|
|
||||||
Opens the part \c name of the \c container for reading.
|
|
||||||
*/
|
|
||||||
opcContainerInputStream* opcContainerOpenInputStream(opcContainer *container, const xmlChar *name);
|
|
||||||
|
|
||||||
/**
|
|
||||||
Reads maximal \c buffer_len bytes from the input \c stream to \c buffer.
|
|
||||||
\return The number of byes read or "0" in case of an error or end-of-stream.
|
|
||||||
*/
|
|
||||||
opc_uint32_t opcContainerReadInputStream(opcContainerInputStream* stream, opc_uint8_t *buffer, opc_uint32_t buffer_len);
|
|
||||||
|
|
||||||
/**
|
|
||||||
Closes the input stream and releases all system resources.
|
|
||||||
*/
|
|
||||||
opc_error_t opcContainerCloseInputStream(opcContainerInputStream* stream);
|
|
||||||
|
|
||||||
/**
|
|
||||||
Returns the type of compression used for the stream.
|
|
||||||
*/
|
|
||||||
opcCompressionOption_t opcContainerGetInputStreamCompressionOption(opcContainerInputStream* stream);
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
} /* extern "C" */
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif /* OPC_INPUTSTREAM_H */
|
|
||||||
@@ -1,73 +0,0 @@
|
|||||||
/*
|
|
||||||
Copyright (c) 2010, Florian Reuter
|
|
||||||
All rights reserved.
|
|
||||||
|
|
||||||
Redistribution and use in source and binary forms, with or without
|
|
||||||
modification, are permitted provided that the following conditions
|
|
||||||
are met:
|
|
||||||
|
|
||||||
* Redistributions of source code must retain the above copyright
|
|
||||||
notice, this list of conditions and the following disclaimer.
|
|
||||||
* Redistributions in binary form must reproduce the above copyright
|
|
||||||
notice, this list of conditions and the following disclaimer in
|
|
||||||
the documentation and/or other materials provided with the
|
|
||||||
distribution.
|
|
||||||
* Neither the name of Florian Reuter nor the names of its contributors
|
|
||||||
may be used to endorse or promote products derived from this
|
|
||||||
software without specific prior written permission.
|
|
||||||
|
|
||||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
||||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
|
||||||
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
||||||
COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
||||||
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
|
||||||
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
||||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
||||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
|
||||||
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
||||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
|
||||||
OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
|
|
||||||
*/
|
|
||||||
/** @file opc/opc.h
|
|
||||||
The opc module contains the basic library functions.
|
|
||||||
*/
|
|
||||||
#include <opc/config.h>
|
|
||||||
#include <opc/container.h>
|
|
||||||
#include <opc/part.h>
|
|
||||||
#include <opc/relation.h>
|
|
||||||
#include <opc/inputstream.h>
|
|
||||||
#include <opc/outputstream.h>
|
|
||||||
#include <opc/zip.h>
|
|
||||||
#include <opc/xmlreader.h>
|
|
||||||
#include <opc/xmlwriter.h>
|
|
||||||
#include <opc/properties.h>
|
|
||||||
|
|
||||||
#ifndef OPC_OPC_H
|
|
||||||
#define OPC_OPC_H
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
extern "C" {
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Initialize libopc.
|
|
||||||
* Sample:
|
|
||||||
* \include opc_helloworld.c
|
|
||||||
* @return Non-zero if successful.
|
|
||||||
*/
|
|
||||||
opc_error_t opcInitLibrary();
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Free libopc. Clean up all resources.
|
|
||||||
* @return Non-zero if successful.
|
|
||||||
* \see opcInitLibrary.
|
|
||||||
*/
|
|
||||||
opc_error_t opcFreeLibrary();
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
} /* extern "C" */
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif /* OPC_OPC_H */
|
|
||||||
@@ -1,71 +0,0 @@
|
|||||||
/*
|
|
||||||
Copyright (c) 2010, Florian Reuter
|
|
||||||
All rights reserved.
|
|
||||||
|
|
||||||
Redistribution and use in source and binary forms, with or without
|
|
||||||
modification, are permitted provided that the following conditions
|
|
||||||
are met:
|
|
||||||
|
|
||||||
* Redistributions of source code must retain the above copyright
|
|
||||||
notice, this list of conditions and the following disclaimer.
|
|
||||||
* Redistributions in binary form must reproduce the above copyright
|
|
||||||
notice, this list of conditions and the following disclaimer in
|
|
||||||
the documentation and/or other materials provided with the
|
|
||||||
distribution.
|
|
||||||
* Neither the name of Florian Reuter nor the names of its contributors
|
|
||||||
may be used to endorse or promote products derived from this
|
|
||||||
software without specific prior written permission.
|
|
||||||
|
|
||||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
||||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
|
||||||
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
||||||
COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
||||||
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
|
||||||
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
||||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
||||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
|
||||||
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
||||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
|
||||||
OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
|
|
||||||
*/
|
|
||||||
/** @file opc/outputstream.h
|
|
||||||
|
|
||||||
*/
|
|
||||||
#include <opc/config.h>
|
|
||||||
|
|
||||||
#ifndef OPC_OUTPUTSTREAM_H
|
|
||||||
#define OPC_OUTPUTSTREAM_H
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
extern "C" {
|
|
||||||
#endif
|
|
||||||
/**
|
|
||||||
Internal type which represents a binary output stream.
|
|
||||||
*/
|
|
||||||
typedef struct OPC_CONTAINER_OUTPUTSTREAM_STRUCT opcContainerOutputStream;
|
|
||||||
|
|
||||||
/**
|
|
||||||
Open the part \c name or writing in \c container with compression \c compression_option.
|
|
||||||
\note Make sure the part exists!
|
|
||||||
\see opcPartCreate.
|
|
||||||
*/
|
|
||||||
opcContainerOutputStream* opcContainerCreateOutputStream(opcContainer *container, const xmlChar *name, opcCompressionOption_t compression_option);
|
|
||||||
|
|
||||||
/**
|
|
||||||
Write \c buffer_len bytes from \c buffer to \c stream.
|
|
||||||
\return Returns the number of bytes written.
|
|
||||||
*/
|
|
||||||
opc_uint32_t opcContainerWriteOutputStream(opcContainerOutputStream* stream, const opc_uint8_t *buffer, opc_uint32_t buffer_len);
|
|
||||||
|
|
||||||
/**
|
|
||||||
Close the \c stream and free all associated resources.
|
|
||||||
*/
|
|
||||||
opc_error_t opcContainerCloseOutputStream(opcContainerOutputStream* stream);
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
} /* extern "C" */
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif /* OPC_OUTPUTSTREAM_H */
|
|
||||||
@@ -1,118 +0,0 @@
|
|||||||
/*
|
|
||||||
Copyright (c) 2010, Florian Reuter
|
|
||||||
All rights reserved.
|
|
||||||
|
|
||||||
Redistribution and use in source and binary forms, with or without
|
|
||||||
modification, are permitted provided that the following conditions
|
|
||||||
are met:
|
|
||||||
|
|
||||||
* Redistributions of source code must retain the above copyright
|
|
||||||
notice, this list of conditions and the following disclaimer.
|
|
||||||
* Redistributions in binary form must reproduce the above copyright
|
|
||||||
notice, this list of conditions and the following disclaimer in
|
|
||||||
the documentation and/or other materials provided with the
|
|
||||||
distribution.
|
|
||||||
* Neither the name of Florian Reuter nor the names of its contributors
|
|
||||||
may be used to endorse or promote products derived from this
|
|
||||||
software without specific prior written permission.
|
|
||||||
|
|
||||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
||||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
|
||||||
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
||||||
COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
||||||
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
|
||||||
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
||||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
||||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
|
||||||
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
||||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
|
||||||
OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
|
|
||||||
*/
|
|
||||||
/** @file opc/part.h
|
|
||||||
|
|
||||||
*/
|
|
||||||
#include <opc/config.h>
|
|
||||||
|
|
||||||
#ifndef OPC_PART_H
|
|
||||||
#define OPC_PART_H
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
extern "C" {
|
|
||||||
#endif
|
|
||||||
/**
|
|
||||||
Handle to an OPC part created by \ref opcPartOpen.
|
|
||||||
\see opcPartOpen.
|
|
||||||
*/
|
|
||||||
typedef xmlChar* opcPart;
|
|
||||||
|
|
||||||
/**
|
|
||||||
Represents an invalid (resp. NULL) part.
|
|
||||||
In releations OPC_PART_INVALID also represents the root part.
|
|
||||||
\hideinitializer
|
|
||||||
*/
|
|
||||||
#define OPC_PART_INVALID NULL
|
|
||||||
|
|
||||||
/**
|
|
||||||
Find a part in a \ container by \c absolutePath and/or \c type.
|
|
||||||
Currently no flags are supported.
|
|
||||||
*/
|
|
||||||
opcPart opcPartFind(opcContainer *container,
|
|
||||||
const xmlChar *absolutePath,
|
|
||||||
const xmlChar *type,
|
|
||||||
int flags);
|
|
||||||
|
|
||||||
/**
|
|
||||||
Creates a part in a \ container with \c absolutePath and \c type.
|
|
||||||
Currently no flags are supported.
|
|
||||||
*/
|
|
||||||
opcPart opcPartCreate(opcContainer *container,
|
|
||||||
const xmlChar *absolutePath,
|
|
||||||
const xmlChar *type,
|
|
||||||
int flags);
|
|
||||||
|
|
||||||
/**
|
|
||||||
Returns the type of the container.
|
|
||||||
The string is interned and must not be freed.
|
|
||||||
*/
|
|
||||||
const xmlChar *opcPartGetType(opcContainer *c, opcPart part);
|
|
||||||
|
|
||||||
/**
|
|
||||||
Returns the type of the container.
|
|
||||||
If \c override_only then the return value will be NULL for parts not having an override type.
|
|
||||||
The string is interned and must not be freed.
|
|
||||||
*/
|
|
||||||
const xmlChar *opcPartGetTypeEx(opcContainer *c, opcPart part, opc_bool_t override_only);
|
|
||||||
|
|
||||||
/**
|
|
||||||
Deleted that part \c absolutePath in the \c container.
|
|
||||||
*/
|
|
||||||
opc_error_t opcPartDelete(opcContainer *container, const xmlChar *absolutePath);
|
|
||||||
|
|
||||||
/**
|
|
||||||
Get the first part.
|
|
||||||
\code
|
|
||||||
for(opcPart part=opcPartGetFirst(c);OPC_PART_INVALID!=part;part=opcPartGetNext(c, part)) {
|
|
||||||
printf("%s; \n", part, opcPartGetType(c, part));
|
|
||||||
}
|
|
||||||
\endcode
|
|
||||||
*/
|
|
||||||
opcPart opcPartGetFirst(opcContainer *container);
|
|
||||||
|
|
||||||
/**
|
|
||||||
Get the next part.
|
|
||||||
\see opcPartGetFirst
|
|
||||||
*/
|
|
||||||
opcPart opcPartGetNext(opcContainer *container, opcPart part);
|
|
||||||
|
|
||||||
/**
|
|
||||||
Returns the size in bytes of the \c part.
|
|
||||||
*/
|
|
||||||
opc_ofs_t opcPartGetSize(opcContainer *c, opcPart part);
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
} /* extern "C" */
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif /* OPC_PART_H */
|
|
||||||
@@ -1,121 +0,0 @@
|
|||||||
/*
|
|
||||||
Copyright (c) 2010, Florian Reuter
|
|
||||||
All rights reserved.
|
|
||||||
|
|
||||||
Redistribution and use in source and binary forms, with or without
|
|
||||||
modification, are permitted provided that the following conditions
|
|
||||||
are met:
|
|
||||||
|
|
||||||
* Redistributions of source code must retain the above copyright
|
|
||||||
notice, this list of conditions and the following disclaimer.
|
|
||||||
* Redistributions in binary form must reproduce the above copyright
|
|
||||||
notice, this list of conditions and the following disclaimer in
|
|
||||||
the documentation and/or other materials provided with the
|
|
||||||
distribution.
|
|
||||||
* Neither the name of Florian Reuter nor the names of its contributors
|
|
||||||
may be used to endorse or promote products derived from this
|
|
||||||
software without specific prior written permission.
|
|
||||||
|
|
||||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
||||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
|
||||||
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
||||||
COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
||||||
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
|
||||||
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
||||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
||||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
|
||||||
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
||||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
|
||||||
OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
|
|
||||||
*/
|
|
||||||
/** @file opc/properties.h
|
|
||||||
|
|
||||||
*/
|
|
||||||
#include <opc/config.h>
|
|
||||||
#include <opc/container.h>
|
|
||||||
|
|
||||||
#ifndef OPC_PROPERTIES_H
|
|
||||||
#define OPC_PROPERTIES_H
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
extern "C" {
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/**
|
|
||||||
Represents a simple Dublin Core type.
|
|
||||||
*/
|
|
||||||
typedef struct OPC_DC_SIMPLE_TYPE {
|
|
||||||
xmlChar *str;
|
|
||||||
xmlChar *lang;
|
|
||||||
} opcDCSimpleType_t;
|
|
||||||
|
|
||||||
/**
|
|
||||||
Represents the core properties of an OPC container.
|
|
||||||
*/
|
|
||||||
typedef struct OPC_PROPERTIES_STRUCT {
|
|
||||||
xmlChar *category; /* xsd:string */
|
|
||||||
xmlChar *contentStatus; /* xsd:string */
|
|
||||||
xmlChar *created; /* dc:date */
|
|
||||||
opcDCSimpleType_t creator; /* dc:any */
|
|
||||||
opcDCSimpleType_t description; /* dc:any */
|
|
||||||
opcDCSimpleType_t identifier; /* dc:any */
|
|
||||||
opcDCSimpleType_t *keyword_array; /* cp:CT_Keywords */
|
|
||||||
opc_uint32_t keyword_items;
|
|
||||||
opcDCSimpleType_t language; /* dc:any */
|
|
||||||
xmlChar *lastModifiedBy; /* xsd:string */
|
|
||||||
xmlChar *lastPrinted; /* xsd:dateTime */
|
|
||||||
xmlChar *modified; /* dc:date */
|
|
||||||
xmlChar *revision; /* xsd:string */
|
|
||||||
opcDCSimpleType_t subject; /* dc:any */
|
|
||||||
opcDCSimpleType_t title; /* dc:any */
|
|
||||||
xmlChar *version; /* xsd:string */
|
|
||||||
} opcProperties_t;
|
|
||||||
|
|
||||||
/**
|
|
||||||
Initialize the core properties \c cp.
|
|
||||||
\see opcCorePropertiesSetString
|
|
||||||
*/
|
|
||||||
opc_error_t opcCorePropertiesInit(opcProperties_t *cp);
|
|
||||||
|
|
||||||
/**
|
|
||||||
Cleanup the core properties \c cp, i.e. release all resources.
|
|
||||||
\see opcCorePropertiesSetString
|
|
||||||
*/
|
|
||||||
opc_error_t opcCorePropertiesCleanup(opcProperties_t *cp);
|
|
||||||
|
|
||||||
/**
|
|
||||||
Rease the core properties \c cp from the container \c.
|
|
||||||
*/
|
|
||||||
opc_error_t opcCorePropertiesRead(opcProperties_t *cp, opcContainer *c);
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
Write/Update the core properties \c cp in the container \c.
|
|
||||||
*/
|
|
||||||
opc_error_t opcCorePropertiesWrite(opcProperties_t *cp, opcContainer *c);
|
|
||||||
|
|
||||||
/**
|
|
||||||
Update a string in the core properties the right way.
|
|
||||||
\code
|
|
||||||
opcProperties_t cp;
|
|
||||||
opcCorePropertiesInit(&cp);
|
|
||||||
opcCorePropertiesSetString(&cp.revision, "1");
|
|
||||||
opcCorePropertiesSetStringLang(&cp.creator, "Florian Reuter", NULL);
|
|
||||||
opcCorePropertiesCleanup(&cp);
|
|
||||||
\endcode
|
|
||||||
*/
|
|
||||||
opc_error_t opcCorePropertiesSetString(xmlChar **prop, const xmlChar *str);
|
|
||||||
|
|
||||||
/**
|
|
||||||
Update a core properties the right way.
|
|
||||||
\see opcCorePropertiesSetString
|
|
||||||
*/
|
|
||||||
opc_error_t opcCorePropertiesSetStringLang(opcDCSimpleType_t *prop, const xmlChar *str, const xmlChar *lang);
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
} /* extern "C" */
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif /* OPC_PROPERTIES_H */
|
|
||||||
@@ -1,140 +0,0 @@
|
|||||||
/*
|
|
||||||
Copyright (c) 2010, Florian Reuter
|
|
||||||
All rights reserved.
|
|
||||||
|
|
||||||
Redistribution and use in source and binary forms, with or without
|
|
||||||
modification, are permitted provided that the following conditions
|
|
||||||
are met:
|
|
||||||
|
|
||||||
* Redistributions of source code must retain the above copyright
|
|
||||||
notice, this list of conditions and the following disclaimer.
|
|
||||||
* Redistributions in binary form must reproduce the above copyright
|
|
||||||
notice, this list of conditions and the following disclaimer in
|
|
||||||
the documentation and/or other materials provided with the
|
|
||||||
distribution.
|
|
||||||
* Neither the name of Florian Reuter nor the names of its contributors
|
|
||||||
may be used to endorse or promote products derived from this
|
|
||||||
software without specific prior written permission.
|
|
||||||
|
|
||||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
||||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
|
||||||
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
||||||
COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
||||||
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
|
||||||
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
||||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
||||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
|
||||||
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
||||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
|
||||||
OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
|
|
||||||
*/
|
|
||||||
/** @file opc/relation.h
|
|
||||||
|
|
||||||
*/
|
|
||||||
#include <opc/config.h>
|
|
||||||
|
|
||||||
#ifndef OPC_RELATION_H
|
|
||||||
#define OPC_RELATION_H
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
extern "C" {
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/**
|
|
||||||
Indentifier for an OPC relation.
|
|
||||||
*/
|
|
||||||
typedef opc_uint32_t opcRelation;
|
|
||||||
|
|
||||||
/**
|
|
||||||
Constant which represents an invalid relation.
|
|
||||||
*/
|
|
||||||
#define OPC_RELATION_INVALID (-1)
|
|
||||||
|
|
||||||
/**
|
|
||||||
Find a relation originating from \c part in \c container with \c relationId and/or \c mimeType.
|
|
||||||
If \c part is OPC_PART_INVALID then part represents the root part.
|
|
||||||
@param[in] relationId The relationId (e.g. "rId1") or NULL.
|
|
||||||
@param[in] mimeType The mimeType or NULL.
|
|
||||||
*/
|
|
||||||
opcRelation opcRelationFind(opcContainer *container, opcPart part, const xmlChar *relationId, const xmlChar *mimeType);
|
|
||||||
|
|
||||||
/**
|
|
||||||
Deleted the relation from the container.
|
|
||||||
\see opcRelationFind.
|
|
||||||
*/
|
|
||||||
opc_error_t opcRelationDelete(opcContainer *container, opcPart part, const xmlChar *relationId, const xmlChar *mimeType);
|
|
||||||
|
|
||||||
/**
|
|
||||||
Returns the first relation.
|
|
||||||
The following code will dump all relations:
|
|
||||||
\code
|
|
||||||
for(opcPart part=opcPartGetFirst(c);OPC_PART_INVALID!=part;part=opcPartGetNext(c, part)) {
|
|
||||||
for(opcRelation rel=opcRelationFirst(part, c);
|
|
||||||
OPC_PART_INVALID!=rel;
|
|
||||||
rel=opcRelationNext(c, rel)) {
|
|
||||||
opcPart internal_target=opcRelationGetInternalTarget(c, part, rel);
|
|
||||||
const xmlChar *external_target=opcRelationGetExternalTarget(c, part, rel);
|
|
||||||
const xmlChar *target=(NULL!=internal_target?internal_target:external_target);
|
|
||||||
const xmlChar *prefix=NULL;
|
|
||||||
opc_uint32_t counter=-1;
|
|
||||||
const xmlChar *type=NULL;
|
|
||||||
opcRelationGetInformation(c, part, rel, &prefix, &counter, &type);
|
|
||||||
if (-1==counter) { // no counter after prefix
|
|
||||||
printf("%s;%s;%s;%s\n", part, prefix, target, type);
|
|
||||||
} else {
|
|
||||||
printf("%s;%s%i;%s;%s\n", part, prefix, counter, target, type);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
\endcode
|
|
||||||
*/
|
|
||||||
opcRelation opcRelationFirst(opcContainer *container, opcPart part);
|
|
||||||
|
|
||||||
/**
|
|
||||||
\see opcRelationFirst
|
|
||||||
*/
|
|
||||||
opcRelation opcRelationNext(opcContainer *container, opcPart part, opcRelation relation);
|
|
||||||
|
|
||||||
/**
|
|
||||||
Returns the internal target.
|
|
||||||
\note To test for an external target use opcRelationGetExternalTarget.
|
|
||||||
\see opcRelationGetExternalTarget
|
|
||||||
*/
|
|
||||||
opcPart opcRelationGetInternalTarget(opcContainer *container, opcPart part, opcRelation relation);
|
|
||||||
|
|
||||||
/**
|
|
||||||
Returns the external target or NULL if it is an internal target.
|
|
||||||
The string is interned. Must not be freed.
|
|
||||||
\see opcRelationGetExternalTarget
|
|
||||||
*/
|
|
||||||
const xmlChar *opcRelationGetExternalTarget(opcContainer *container, opcPart part, opcRelation relation);
|
|
||||||
|
|
||||||
/**
|
|
||||||
Returns the relations type.
|
|
||||||
The string is interned. Must not be freed.
|
|
||||||
*/
|
|
||||||
const xmlChar *opcRelationGetType(opcContainer *container, opcPart part, opcRelation relation);
|
|
||||||
|
|
||||||
/**
|
|
||||||
Get information about a relation.
|
|
||||||
\see opcRelationFirst
|
|
||||||
*/
|
|
||||||
void opcRelationGetInformation(opcContainer *container, opcPart part, opcRelation relation, const xmlChar **prefix, opc_uint32_t *counter, const xmlChar **type);
|
|
||||||
|
|
||||||
/**
|
|
||||||
Add a relation to \c container from \c src part to \c dest part with id \c rid and type \c type.
|
|
||||||
*/
|
|
||||||
opc_uint32_t opcRelationAdd(opcContainer *container, opcPart src, const xmlChar *rid, opcPart dest, const xmlChar *type);
|
|
||||||
|
|
||||||
/**
|
|
||||||
Add an external relation to \c container from \c src part to \c target URL with id \c rid and type \c type.
|
|
||||||
*/
|
|
||||||
opc_uint32_t opcRelationAddExternal(opcContainer *container, opcPart src, const xmlChar *rid, const xmlChar *target, const xmlChar *type);
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
} /* extern "C" */
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif /* OPC_RELATION_H */
|
|
||||||
@@ -1,69 +0,0 @@
|
|||||||
/*
|
|
||||||
Copyright (c) 2010, Florian Reuter
|
|
||||||
All rights reserved.
|
|
||||||
|
|
||||||
Redistribution and use in source and binary forms, with or without
|
|
||||||
modification, are permitted provided that the following conditions
|
|
||||||
are met:
|
|
||||||
|
|
||||||
* Redistributions of source code must retain the above copyright
|
|
||||||
notice, this list of conditions and the following disclaimer.
|
|
||||||
* Redistributions in binary form must reproduce the above copyright
|
|
||||||
notice, this list of conditions and the following disclaimer in
|
|
||||||
the documentation and/or other materials provided with the
|
|
||||||
distribution.
|
|
||||||
* Neither the name of Florian Reuter nor the names of its contributors
|
|
||||||
may be used to endorse or promote products derived from this
|
|
||||||
software without specific prior written permission.
|
|
||||||
|
|
||||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
||||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
|
||||||
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
||||||
COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
||||||
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
|
||||||
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
||||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
||||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
|
||||||
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
||||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
|
||||||
OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
|
|
||||||
*/
|
|
||||||
/** @file opc/xmlreader.h
|
|
||||||
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef OPC_XMLREADER_H
|
|
||||||
#define OPC_XMLREADER_H
|
|
||||||
|
|
||||||
#include <opc/config.h>
|
|
||||||
#include <libxml/xmlreader.h>
|
|
||||||
#include <mce/textreader.h>
|
|
||||||
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
extern "C" {
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/**
|
|
||||||
Open an MCE reader for \c partName. Parameters \c URL, \c encoding and \c options will be passed unmodified to
|
|
||||||
http://xmlsoft.org/html/libxml-xmlreader.html#xmlReaderForIO and they can we NULL, NULL, 0.
|
|
||||||
\note Make sure the part exists.
|
|
||||||
\see opcPartFind
|
|
||||||
*/
|
|
||||||
opc_error_t opcXmlReaderOpen(opcContainer *container, mceTextReader_t *mceTextReader, const xmlChar *partName, const char * URL, const char * encoding, int options);
|
|
||||||
|
|
||||||
/**
|
|
||||||
Returns an libxml DOM document. Parameters \c URL, \c encoding and \c options will be passed unmodified to
|
|
||||||
http://xmlsoft.org/html/libxml-parser.html#xmlReadIO and they can we NULL, NULL, 0.
|
|
||||||
\note Make sure the part exists.
|
|
||||||
\see opcPartFind
|
|
||||||
*/
|
|
||||||
xmlDocPtr opcXmlReaderReadDoc(opcContainer *container, const xmlChar *partName, const char * URL, const char * encoding, int options);
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
} /* extern "C" */
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif /* OPC_XMLREADER_H */
|
|
||||||
@@ -1,57 +0,0 @@
|
|||||||
/*
|
|
||||||
Copyright (c) 2010, Florian Reuter
|
|
||||||
All rights reserved.
|
|
||||||
|
|
||||||
Redistribution and use in source and binary forms, with or without
|
|
||||||
modification, are permitted provided that the following conditions
|
|
||||||
are met:
|
|
||||||
|
|
||||||
* Redistributions of source code must retain the above copyright
|
|
||||||
notice, this list of conditions and the following disclaimer.
|
|
||||||
* Redistributions in binary form must reproduce the above copyright
|
|
||||||
notice, this list of conditions and the following disclaimer in
|
|
||||||
the documentation and/or other materials provided with the
|
|
||||||
distribution.
|
|
||||||
* Neither the name of Florian Reuter nor the names of its contributors
|
|
||||||
may be used to endorse or promote products derived from this
|
|
||||||
software without specific prior written permission.
|
|
||||||
|
|
||||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
||||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
|
||||||
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
||||||
COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
||||||
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
|
||||||
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
||||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
||||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
|
||||||
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
||||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
|
||||||
OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
|
|
||||||
*/
|
|
||||||
/** @file opc/xmlwriter.h
|
|
||||||
|
|
||||||
*/
|
|
||||||
#include <opc/config.h>
|
|
||||||
#include <mce/textwriter.h>
|
|
||||||
|
|
||||||
#ifndef OPC_XMLWRITER_H
|
|
||||||
#define OPC_XMLWRITER_H
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
extern "C" {
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/**
|
|
||||||
Create an MCE text writer for \c part in \c container with compression \c compression_option.
|
|
||||||
\note Make sure the part exists.
|
|
||||||
\see opcPartFind
|
|
||||||
*/
|
|
||||||
mceTextWriter *mceTextWriterOpen(opcContainer *c, opcPart part, opcCompressionOption_t compression_option);
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
} /* extern "C" */
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif /* OPC_XMLWRITER_H */
|
|
||||||
@@ -1,255 +0,0 @@
|
|||||||
/*
|
|
||||||
Copyright (c) 2010, Florian Reuter
|
|
||||||
All rights reserved.
|
|
||||||
|
|
||||||
Redistribution and use in source and binary forms, with or without
|
|
||||||
modification, are permitted provided that the following conditions
|
|
||||||
are met:
|
|
||||||
|
|
||||||
* Redistributions of source code must retain the above copyright
|
|
||||||
notice, this list of conditions and the following disclaimer.
|
|
||||||
* Redistributions in binary form must reproduce the above copyright
|
|
||||||
notice, this list of conditions and the following disclaimer in
|
|
||||||
the documentation and/or other materials provided with the
|
|
||||||
distribution.
|
|
||||||
* Neither the name of Florian Reuter nor the names of its contributors
|
|
||||||
may be used to endorse or promote products derived from this
|
|
||||||
software without specific prior written permission.
|
|
||||||
|
|
||||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
||||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
|
||||||
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
||||||
COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
||||||
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
|
||||||
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
||||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
||||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
|
||||||
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
||||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
|
||||||
OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
|
|
||||||
*/
|
|
||||||
/** @file opc/zip.h
|
|
||||||
The ZIP file backend of an OPC container.
|
|
||||||
*/
|
|
||||||
#include <opc/config.h>
|
|
||||||
#include <opc/file.h>
|
|
||||||
#include <opc/container.h>
|
|
||||||
|
|
||||||
#ifndef OPC_ZIP_H
|
|
||||||
#define OPC_ZIP_H
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
extern "C" {
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/**
|
|
||||||
Default growth hint of an OPC stream.
|
|
||||||
*/
|
|
||||||
#define OPC_DEFAULT_GROWTH_HINT 512
|
|
||||||
|
|
||||||
/**
|
|
||||||
Handle to a ZIP archive.
|
|
||||||
\see internal.h
|
|
||||||
*/
|
|
||||||
typedef struct OPC_ZIP_STRUCT opcZip;
|
|
||||||
|
|
||||||
/**
|
|
||||||
Handle to a raw ZIP input stream.
|
|
||||||
\see internal.h
|
|
||||||
*/
|
|
||||||
typedef struct OPC_ZIPINPUTSTREAM_STRUCT opcZipInputStream;
|
|
||||||
|
|
||||||
/**
|
|
||||||
Handle to a raw ZIP output stream.
|
|
||||||
\see internal.h
|
|
||||||
*/
|
|
||||||
typedef struct OPC_ZIPOUTPUTSTREAM_STRUCT opcZipOutputStream;
|
|
||||||
|
|
||||||
/**
|
|
||||||
Holds all information of a ZIP segment.
|
|
||||||
*/
|
|
||||||
typedef struct OPC_ZIP_SEGMENT_INFO_STRUCT {
|
|
||||||
xmlChar name[OPC_MAX_PATH];
|
|
||||||
opc_uint32_t name_len;
|
|
||||||
opc_uint32_t segment_number;
|
|
||||||
opc_bool_t last_segment;
|
|
||||||
opc_bool_t rels_segment;
|
|
||||||
opc_uint32_t header_size;
|
|
||||||
opc_uint32_t min_header_size;
|
|
||||||
opc_uint32_t trailing_bytes;
|
|
||||||
opc_uint32_t compressed_size;
|
|
||||||
opc_uint32_t uncompressed_size;
|
|
||||||
opc_uint16_t bit_flag;
|
|
||||||
opc_uint32_t data_crc;
|
|
||||||
opc_uint16_t compression_method;
|
|
||||||
opc_ofs_t stream_ofs;
|
|
||||||
opc_uint16_t growth_hint;
|
|
||||||
} opcZipSegmentInfo_t;
|
|
||||||
|
|
||||||
/**
|
|
||||||
\see opcZipLoader
|
|
||||||
*/
|
|
||||||
typedef int opcZipLoaderOpenCallback(void *iocontext);
|
|
||||||
/**
|
|
||||||
\see opcZipLoader
|
|
||||||
*/
|
|
||||||
typedef int opcZipLoaderSkipCallback(void *iocontext);
|
|
||||||
/**
|
|
||||||
\see opcZipLoader
|
|
||||||
*/
|
|
||||||
typedef int opcZipLoaderReadCallback(void *iocontext, char *buffer, int len);
|
|
||||||
/**
|
|
||||||
\see opcZipLoader
|
|
||||||
*/
|
|
||||||
typedef int opcZipLoaderCloseCallback(void *iocontext);
|
|
||||||
|
|
||||||
/**
|
|
||||||
\see opcZipLoader
|
|
||||||
*/
|
|
||||||
typedef opc_error_t (opcZipLoaderSegmentCallback_t)(void *iocontext, void *userctx, opcZipSegmentInfo_t *info, opcZipLoaderOpenCallback *open, opcZipLoaderReadCallback *read, opcZipLoaderCloseCallback *close, opcZipLoaderSkipCallback *skip);
|
|
||||||
|
|
||||||
/**
|
|
||||||
Walks every segment in a ZIP archive and calls the \c segmentCallback callback method.
|
|
||||||
The implementer \c segmentCallback method must then eiher use the passed \c open, \c read and \c close methods
|
|
||||||
to read the stream or the passed \c skip methods to skip the stream.
|
|
||||||
This method can be used to e.g. read ZIP file in stream mode.
|
|
||||||
*/
|
|
||||||
opc_error_t opcZipLoader(opcIO_t *io, void *userctx, opcZipLoaderSegmentCallback_t *segmentCallback);
|
|
||||||
|
|
||||||
/**
|
|
||||||
\see opcZipClose
|
|
||||||
*/
|
|
||||||
typedef opc_error_t (opcZipSegmentReleaseCallback)(opcZip *zip, opc_uint32_t segment_id);
|
|
||||||
|
|
||||||
/**
|
|
||||||
Closes the ZIP archive \c zip and will call \c releaseCallback for every segment to give the implementer a chance
|
|
||||||
to free user resources.
|
|
||||||
*/
|
|
||||||
void opcZipClose(opcZip *zip, opcZipSegmentReleaseCallback* releaseCallback);
|
|
||||||
|
|
||||||
/**
|
|
||||||
Creates an empty ZIP archive with the given \c io.
|
|
||||||
*/
|
|
||||||
opcZip *opcZipCreate(opcIO_t *io);
|
|
||||||
|
|
||||||
/**
|
|
||||||
Commits all buffers and writes the ZIP archives local header directories.
|
|
||||||
if \c trim is true then padding bytes will be removed, i.e. the ZIP file size fill be minimalized.
|
|
||||||
*/
|
|
||||||
opc_error_t opcZipCommit(opcZip *zip, opc_bool_t trim);
|
|
||||||
|
|
||||||
/**
|
|
||||||
Garbage collection on the passed \c zip archive. This will e.g. make deleted files available as free space.
|
|
||||||
*/
|
|
||||||
opc_error_t opcZipGC(opcZip *zip);
|
|
||||||
|
|
||||||
/**
|
|
||||||
Load segment information into \c info.
|
|
||||||
If \c rels_segment is -1 then load the info for part with name \c partName.
|
|
||||||
Otherwise load the segment information for the ".rels." segment of \c partName.
|
|
||||||
\return Returns the segment_id.
|
|
||||||
*/
|
|
||||||
opc_uint32_t opcZipLoadSegment(opcZip *zip, const xmlChar *partName, opc_bool_t rels_segment, opcZipSegmentInfo_t *info);
|
|
||||||
|
|
||||||
/**
|
|
||||||
Create a segment with the given parameters.
|
|
||||||
\return Returns the segment_id.
|
|
||||||
*/
|
|
||||||
opc_uint32_t opcZipCreateSegment(opcZip *zip,
|
|
||||||
const xmlChar *partName,
|
|
||||||
opc_bool_t relsSegment,
|
|
||||||
opc_uint32_t segment_size,
|
|
||||||
opc_uint32_t growth_hint,
|
|
||||||
opc_uint16_t compression_method,
|
|
||||||
opc_uint16_t bit_flag);
|
|
||||||
|
|
||||||
/**
|
|
||||||
Creates an input stream for the segment with \c segment_id.
|
|
||||||
\see opcZipLoadSegment
|
|
||||||
\see opcZipCreateSegment
|
|
||||||
*/
|
|
||||||
opcZipInputStream *opcZipOpenInputStream(opcZip *zip, opc_uint32_t segment_id);
|
|
||||||
|
|
||||||
/**
|
|
||||||
Free all resources of the input stream.
|
|
||||||
*/
|
|
||||||
opc_error_t opcZipCloseInputStream(opcZip *zip, opcZipInputStream *stream);
|
|
||||||
|
|
||||||
/**
|
|
||||||
Read maximal \c buf_len bytes from the input stream into \buf.
|
|
||||||
\return Returns the number of bytes read.
|
|
||||||
*/
|
|
||||||
opc_uint32_t opcZipReadInputStream(opcZip *zip, opcZipInputStream *stream, opc_uint8_t *buf, opc_uint32_t buf_len);
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
Creates an output stream for the segment with \c segment_id.
|
|
||||||
If \c *segment_id is -1 then a new segment will be created.
|
|
||||||
Otherwise the segment with \c *segment_id will be overwritten.
|
|
||||||
*/
|
|
||||||
opcZipOutputStream *opcZipCreateOutputStream(opcZip *zip,
|
|
||||||
opc_uint32_t *segment_id,
|
|
||||||
const xmlChar *partName,
|
|
||||||
opc_bool_t relsSegment,
|
|
||||||
opc_uint32_t segment_size,
|
|
||||||
opc_uint32_t growth_hint,
|
|
||||||
opc_uint16_t compression_method,
|
|
||||||
opc_uint16_t bit_flag);
|
|
||||||
|
|
||||||
/**
|
|
||||||
Opens an existing ouput stream for reading.
|
|
||||||
The \c *segment_id will be set to -1 and reset on opcZipCloseOutputStream.
|
|
||||||
\see opcZipCloseOutputStream
|
|
||||||
*/
|
|
||||||
opcZipOutputStream *opcZipOpenOutputStream(opcZip *zip, opc_uint32_t *segment_id);
|
|
||||||
|
|
||||||
/**
|
|
||||||
Will close the stream and free all resources. Additionally the new segment id will be stored in \c *segment_id.
|
|
||||||
\see opcZipOpenOutputStream
|
|
||||||
*/
|
|
||||||
opc_error_t opcZipCloseOutputStream(opcZip *zip, opcZipOutputStream *stream, opc_uint32_t *segment_id);
|
|
||||||
|
|
||||||
/**
|
|
||||||
Write \c buf_len bytes to \c buf.
|
|
||||||
\return Returns the number of bytes written.
|
|
||||||
*/
|
|
||||||
opc_uint32_t opcZipWriteOutputStream(opcZip *zip, opcZipOutputStream *stream, const opc_uint8_t *buf, opc_uint32_t buf_len);
|
|
||||||
|
|
||||||
/**
|
|
||||||
Returns the first segment id or -1.
|
|
||||||
Use the following code to iterarte through all segments.
|
|
||||||
\code
|
|
||||||
for(opc_uint32_t segment_id=opcZipGetFirstSegmentId(zip);
|
|
||||||
-1!=segment_id;
|
|
||||||
segment_id=opcZipGetNextSegmentId(zip, segment_id) {
|
|
||||||
...
|
|
||||||
}
|
|
||||||
\endcode
|
|
||||||
\see opcZipGetNextSegmentId
|
|
||||||
*/
|
|
||||||
opc_uint32_t opcZipGetFirstSegmentId(opcZip *zip);
|
|
||||||
|
|
||||||
/**
|
|
||||||
Returns the next segment id or -1.
|
|
||||||
\see opcZipGetFirstSegmentId
|
|
||||||
*/
|
|
||||||
opc_uint32_t opcZipGetNextSegmentId(opcZip *zip, opc_uint32_t segment_id);
|
|
||||||
|
|
||||||
/**
|
|
||||||
Returns info about the given segment id.
|
|
||||||
*/
|
|
||||||
opc_error_t opcZipGetSegmentInfo(opcZip *zip, opc_uint32_t segment_id, const xmlChar **name, opc_bool_t *rels_segment, opc_uint32_t *crc);
|
|
||||||
|
|
||||||
/**
|
|
||||||
Marks a given segments as deleted.
|
|
||||||
\see opcZipGC
|
|
||||||
*/
|
|
||||||
opc_bool_t opcZipSegmentDelete(opcZip *zip, opc_uint32_t *first_segment, opc_uint32_t *last_segment, opcZipSegmentReleaseCallback* releaseCallback);
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
} /* extern "C" */
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif /* OPC_ZIP_H */
|
|
||||||
@@ -1,168 +0,0 @@
|
|||||||
/* include/plib/plib.h. Generated from plib.h by configure. */
|
|
||||||
/*
|
|
||||||
Copyright (c) 2010, Florian Reuter
|
|
||||||
All rights reserved.
|
|
||||||
|
|
||||||
Redistribution and use in source and binary forms, with or without
|
|
||||||
modification, are permitted provided that the following conditions
|
|
||||||
are met:
|
|
||||||
|
|
||||||
* Redistributions of source code must retain the above copyright
|
|
||||||
notice, this list of conditions and the following disclaimer.
|
|
||||||
* Redistributions in binary form must reproduce the above copyright
|
|
||||||
notice, this list of conditions and the following disclaimer in
|
|
||||||
the documentation and/or other materials provided with the
|
|
||||||
distribution.
|
|
||||||
* Neither the name of Florian Reuter nor the names of its contributors
|
|
||||||
may be used to endorse or promote products derived from this
|
|
||||||
software without specific prior written permission.
|
|
||||||
|
|
||||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
||||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
|
||||||
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
||||||
COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
||||||
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
|
||||||
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
||||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
||||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
|
||||||
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
||||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
|
||||||
OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
|
|
||||||
*/
|
|
||||||
#ifndef _PLIB_PLIB_H_
|
|
||||||
#define _PLIB_PLIB_H_
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
extern "C" {
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#define HAVE_STDINT_H 1
|
|
||||||
#define HAVE_STDDEF_H 1
|
|
||||||
#define HAVE_STDIO_H 1
|
|
||||||
#define HAVE_STRING_H 1
|
|
||||||
#define HAVE_LIMITS_H 1
|
|
||||||
#define HAVE_STDLIB_H 1
|
|
||||||
/* #undef HAVE_IO_H */
|
|
||||||
#define HAVE_UNISTD_H 1
|
|
||||||
#define HAVE_SYS_TYPES_H 1
|
|
||||||
#define IS_CONFIGURED 1
|
|
||||||
|
|
||||||
#if !defined(IS_CONFIGURED)
|
|
||||||
#if defined(WIN32)
|
|
||||||
#define HAVE_STRING_H 1
|
|
||||||
#define HAVE_STDINT_H 1
|
|
||||||
#define HAVE_LIMITS_H 1
|
|
||||||
#define HAVE_STDDEF_H 1
|
|
||||||
#define HAVE_STDIO_H 1
|
|
||||||
#define HAVE_STDLIB_H 1
|
|
||||||
#define HAVE_IO_H
|
|
||||||
#define snprintf _snprintf
|
|
||||||
#else
|
|
||||||
#error "configure not executed and we are not on a win32 machine? please run configure or define WIN32 is you are on a WIN32 platform."
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef HAVE_STDDEF_H
|
|
||||||
#include <stddef.h>
|
|
||||||
typedef size_t pofs_t; // maximum file offset for eg. read write ops
|
|
||||||
#else
|
|
||||||
#error "system types can not be determined"
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef HAVE_STDIO_H
|
|
||||||
#include <stdio.h>
|
|
||||||
#else
|
|
||||||
#error "system io can not be determined"
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef HAVE_STDINT_H
|
|
||||||
#include <stdint.h>
|
|
||||||
|
|
||||||
typedef int8_t pint8_t;
|
|
||||||
typedef uint8_t puint8_t;
|
|
||||||
|
|
||||||
typedef int16_t pint16_t;
|
|
||||||
typedef uint16_t puint16_t;
|
|
||||||
|
|
||||||
typedef int32_t pint32_t;
|
|
||||||
typedef uint32_t puint32_t;
|
|
||||||
|
|
||||||
typedef int64_t pint64_t;
|
|
||||||
typedef uint64_t puint64_t;
|
|
||||||
|
|
||||||
typedef int pbool_t;
|
|
||||||
|
|
||||||
typedef size_t psize_t;
|
|
||||||
|
|
||||||
// INTN_MAX, INTN_MIN, UINTN_MAX
|
|
||||||
#else
|
|
||||||
#error "system types can not be determined"
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef HAVE_STRING_H
|
|
||||||
#include <string.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef HAVE_LIMITS_H
|
|
||||||
#include <limits.h>
|
|
||||||
#define PUINT8_MAX UCHAR_MAX
|
|
||||||
#define PINT32_MAX INT_MAX
|
|
||||||
#define PINT32_MIN INT_MIN
|
|
||||||
#define PUINT32_MAX UINT_MAX
|
|
||||||
#define PUINT32_MIN 0
|
|
||||||
#define PUINT16_MAX USHRT_MAX
|
|
||||||
#define PUINT16_MIN 0
|
|
||||||
#else
|
|
||||||
#error "limits can not be determined"
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef HAVE_STDLIB_H
|
|
||||||
#include <stdlib.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef HAVE_IO_H
|
|
||||||
#include <io.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef HAVE_UNISTD_H
|
|
||||||
#include <unistd.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef HAVE_SYS_TYPES_H
|
|
||||||
#include <sys/types.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/**
|
|
||||||
Converts an ASCII string to a xmlChar string. This only works for ASCII strings.
|
|
||||||
*/
|
|
||||||
#ifndef _X
|
|
||||||
#define _X(s) BAD_CAST(s)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
Converts an xmlChar string to an ASCII string. This only works for ASCII charsets.
|
|
||||||
*/
|
|
||||||
#ifndef _X2C
|
|
||||||
#define _X2C(s) ((char*)(s))
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
#define PASSERT(e) assert(e)
|
|
||||||
#ifdef NDEBUG
|
|
||||||
#define PENSURE(e) (void)(e)
|
|
||||||
#else
|
|
||||||
#define PENSURE(e) assert(e)
|
|
||||||
#endif
|
|
||||||
#define PTRUE (0==0)
|
|
||||||
#define PFALSE (0==1)
|
|
||||||
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
} /* extern "C" */
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
#endif /* _PLIB_PLIB_H_ */
|
|
||||||
Submodule lib/bzip2-1.0.6 deleted from 288acf97a1
Submodule lib/ffmpeg deleted from 8887991a31
Submodule lib/harfbuzz deleted from b28c282585
Submodule lib/leptonica deleted from cc03be70fd
Submodule lib/libmagic deleted from 1249b5cd02
Binary file not shown.
Binary file not shown.
Binary file not shown.
Submodule lib/libtiff deleted from 3db0ff91bc
Submodule lib/mupdf deleted from c50ac19e41
Submodule lib/onion deleted from 73329b61eb
Submodule lib/openjpeg deleted from ac3737372a
Submodule lib/tesseract deleted from f268e6615e
1
lmdb
1
lmdb
Submodule lmdb deleted from 5c012bbe03
@@ -1,17 +1,39 @@
|
|||||||
{
|
{
|
||||||
"properties": {
|
"properties": {
|
||||||
|
"_tie": {
|
||||||
|
"type": "keyword",
|
||||||
|
"doc_values": true
|
||||||
|
},
|
||||||
|
"_depth": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
"path": {
|
"path": {
|
||||||
"type": "text",
|
"type": "text",
|
||||||
"analyzer": "path_analyzer",
|
"analyzer": "path_analyzer",
|
||||||
"copy_to": "suggest-path"
|
"copy_to": "suggest-path",
|
||||||
|
"fielddata": true,
|
||||||
|
"fields": {
|
||||||
|
"nGram": {
|
||||||
|
"type": "text",
|
||||||
|
"analyzer": "my_nGram"
|
||||||
|
},
|
||||||
|
"text": {
|
||||||
|
"type": "text",
|
||||||
|
"analyzer": "content_analyzer"
|
||||||
|
}
|
||||||
|
}
|
||||||
},
|
},
|
||||||
"suggest-path": {
|
"suggest-path": {
|
||||||
"type": "completion",
|
"type": "completion",
|
||||||
"analyzer": "keyword"
|
"analyzer": "case_insensitive_kw_analyzer"
|
||||||
},
|
},
|
||||||
"mime": {
|
"mime": {
|
||||||
"type": "keyword"
|
"type": "keyword"
|
||||||
},
|
},
|
||||||
|
"thumbnail": {
|
||||||
|
"type": "keyword",
|
||||||
|
"index": false
|
||||||
|
},
|
||||||
"videoc": {
|
"videoc": {
|
||||||
"type": "keyword",
|
"type": "keyword",
|
||||||
"index": false
|
"index": false
|
||||||
@@ -32,6 +54,10 @@
|
|||||||
"type": "integer",
|
"type": "integer",
|
||||||
"index": false
|
"index": false
|
||||||
},
|
},
|
||||||
|
"pages": {
|
||||||
|
"type": "integer",
|
||||||
|
"index": false
|
||||||
|
},
|
||||||
"mtime": {
|
"mtime": {
|
||||||
"type": "integer"
|
"type": "integer"
|
||||||
},
|
},
|
||||||
@@ -104,7 +130,42 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"tag": {
|
"tag": {
|
||||||
|
"type": "keyword",
|
||||||
|
"copy_to": "suggest-tag"
|
||||||
|
},
|
||||||
|
"suggest-tag": {
|
||||||
|
"type": "completion",
|
||||||
|
"analyzer": "case_insensitive_kw_analyzer"
|
||||||
|
},
|
||||||
|
"exif_make": {
|
||||||
|
"type": "text"
|
||||||
|
},
|
||||||
|
"exif_model": {
|
||||||
|
"type": "text"
|
||||||
|
},
|
||||||
|
"exif:software": {
|
||||||
|
"type": "text"
|
||||||
|
},
|
||||||
|
"exif_exposure_time": {
|
||||||
"type": "keyword"
|
"type": "keyword"
|
||||||
|
},
|
||||||
|
"exif_fnumber": {
|
||||||
|
"type": "keyword"
|
||||||
|
},
|
||||||
|
"exif_iso_speed_ratings": {
|
||||||
|
"type": "keyword"
|
||||||
|
},
|
||||||
|
"exif_focal_length": {
|
||||||
|
"type": "keyword"
|
||||||
|
},
|
||||||
|
"exif_user_comment": {
|
||||||
|
"type": "text"
|
||||||
|
},
|
||||||
|
"author": {
|
||||||
|
"type": "text"
|
||||||
|
},
|
||||||
|
"modified_by": {
|
||||||
|
"type": "text"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
10
schema/pipeline.json
Normal file
10
schema/pipeline.json
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
{
|
||||||
|
"description": "Copy _id to _tie, save path depth",
|
||||||
|
"processors": [
|
||||||
|
{
|
||||||
|
"script": {
|
||||||
|
"source": "ctx._tie = ctx._id; ctx._depth = ctx.path.length() == 0 ? 0 : 1 + ctx.path.length() - ctx.path.replace(\"/\", \"\").length();"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
@@ -1,7 +1,8 @@
|
|||||||
{
|
{
|
||||||
"index": {
|
"index": {
|
||||||
"refresh_interval": "30s",
|
"refresh_interval": "30s",
|
||||||
"codec": "best_compression"
|
"codec": "best_compression",
|
||||||
|
"number_of_replicas": 0
|
||||||
},
|
},
|
||||||
"analysis": {
|
"analysis": {
|
||||||
"tokenizer": {
|
"tokenizer": {
|
||||||
@@ -21,6 +22,12 @@
|
|||||||
"lowercase"
|
"lowercase"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
"case_insensitive_kw_analyzer": {
|
||||||
|
"tokenizer": "keyword",
|
||||||
|
"filter": [
|
||||||
|
"lowercase"
|
||||||
|
]
|
||||||
|
},
|
||||||
"my_nGram": {
|
"my_nGram": {
|
||||||
"tokenizer": "my_nGram_tokenizer",
|
"tokenizer": "my_nGram_tokenizer",
|
||||||
"filter": [
|
"filter": [
|
||||||
|
|||||||
@@ -2,15 +2,15 @@
|
|||||||
|
|
||||||
rm -rf index.sist2/
|
rm -rf index.sist2/
|
||||||
|
|
||||||
rm web/js/bundle.js 2> /dev/null
|
rm src/static/js/bundle.js 2> /dev/null
|
||||||
cat `ls web/js/*.min.js` > web/js/bundle.js
|
cat `ls src/static/js/*.min.js` > src/static/js/bundle.js
|
||||||
cat web/js/{util,dom,search}.js >> web/js/bundle.js
|
cat src/static/js/{util,dom}.js >> src/static/js/bundle.js
|
||||||
|
|
||||||
rm web/css/bundle*.css 2> /dev/null
|
rm src/static/css/bundle*.css 2> /dev/null
|
||||||
cat web/css/*.min.css > web/css/bundle.css
|
cat src/static/css/*.min.css > src/static/css/bundle.css
|
||||||
cat web/css/light.css >> web/css/bundle.css
|
cat src/static/css/light.css >> src/static/css/bundle.css
|
||||||
cat web/css/*.min.css > web/css/bundle_dark.css
|
cat src/static/css/*.min.css > src/static/css/bundle_dark.css
|
||||||
cat web/css/dark.css >> web/css/bundle_dark.css
|
cat src/static/css/dark.css >> src/static/css/bundle_dark.css
|
||||||
|
|
||||||
python3 scripts/mime.py > src/parsing/mime_generated.c
|
python3 scripts/mime.py > src/parsing/mime_generated.c
|
||||||
python3 scripts/serve_static.py > src/web/static_generated.c
|
python3 scripts/serve_static.py > src/web/static_generated.c
|
||||||
|
|||||||
@@ -1,99 +0,0 @@
|
|||||||
#!/usr/bin/env bash
|
|
||||||
|
|
||||||
THREADS=$(nproc)
|
|
||||||
|
|
||||||
cd lib
|
|
||||||
|
|
||||||
cd mupdf
|
|
||||||
make USE_SYSTEM_HARFBUZZ=yes USE_SYSTEM_OPENJPEG=yes HAVE_X11=no HAVE_GLUT=no -j $THREADS
|
|
||||||
cd ..
|
|
||||||
|
|
||||||
mv mupdf/build/release/libmupdf.a .
|
|
||||||
mv mupdf/build/release/libmupdf-third.a .
|
|
||||||
|
|
||||||
# openjp2
|
|
||||||
cd openjpeg
|
|
||||||
#cmake . -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_FLAGS="-O3 -march=native -DNDEBUG"
|
|
||||||
cmake . -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_FLAGS="-O3"
|
|
||||||
make -j $THREADS
|
|
||||||
cd ..
|
|
||||||
mv openjpeg/bin/libopenjp2.a .
|
|
||||||
|
|
||||||
# harfbuzz
|
|
||||||
cd harfbuzz
|
|
||||||
./autogen.sh
|
|
||||||
./configure --disable-shared --enable-static
|
|
||||||
make -j $THREADS
|
|
||||||
cd ..
|
|
||||||
mv harfbuzz/src/.libs/libharfbuzz.a .
|
|
||||||
|
|
||||||
# ffmpeg
|
|
||||||
cd ffmpeg
|
|
||||||
./configure --disable-shared --enable-static --disable-ffmpeg --disable-ffplay \
|
|
||||||
--disable-ffprobe --disable-doc\
|
|
||||||
--disable-manpages --disable-postproc --disable-avfilter \
|
|
||||||
--disable-alsa --disable-lzma --disable-xlib --disable-debug\
|
|
||||||
--disable-vdpau --disable-vaapi --disable-sdl2 --disable-network
|
|
||||||
make -j $THREADS
|
|
||||||
cd ..
|
|
||||||
|
|
||||||
mv ffmpeg/libavcodec/libavcodec.a .
|
|
||||||
mv ffmpeg/libavformat/libavformat.a .
|
|
||||||
mv ffmpeg/libavutil/libavutil.a .
|
|
||||||
mv ffmpeg/libswresample/libswresample.a .
|
|
||||||
mv ffmpeg/libswscale/libswscale.a .
|
|
||||||
|
|
||||||
# onion
|
|
||||||
cd onion
|
|
||||||
mkdir build 2> /dev/null
|
|
||||||
cd build
|
|
||||||
cmake -DONION_USE_SSL=false -DONION_USE_PAM=false -DONION_USE_PNG=false -DONION_USE_JPEG=false \
|
|
||||||
-DONION_USE_JPEG=false -DONION_USE_XML2=false -DONION_USE_SYSTEMD=false -DONION_USE_SQLITE3=false \
|
|
||||||
-DONION_USE_REDIS=false -DONION_USE_GC=false -DONION_USE_TESTS=false -DONION_EXAMPLES=false \
|
|
||||||
-DONION_USE_BINDINGS_CPP=false ..
|
|
||||||
make -j $THREADS
|
|
||||||
cd ../..
|
|
||||||
|
|
||||||
mv onion/build/src/onion/libonion_static.a .
|
|
||||||
|
|
||||||
#bzip2
|
|
||||||
cd bzip2-1.0.6
|
|
||||||
make -j $THREADS
|
|
||||||
cd ..
|
|
||||||
mv bzip2-1.0.6/libbz2.a .
|
|
||||||
|
|
||||||
# magic
|
|
||||||
cd libmagic
|
|
||||||
./autogen.sh
|
|
||||||
./configure --enable-static --disable-shared
|
|
||||||
make -j $THREADS
|
|
||||||
cd ..
|
|
||||||
mv libmagic/src/.libs/libmagic.a .
|
|
||||||
|
|
||||||
# tesseract
|
|
||||||
cd tesseract
|
|
||||||
mkdir build
|
|
||||||
cd build
|
|
||||||
cmake -DSTATIC=on -DBUILD_TRAINING_TOOLS=off ..
|
|
||||||
make -j $THREADS
|
|
||||||
cd ../..
|
|
||||||
mv tesseract/build/libtesseract.a .
|
|
||||||
|
|
||||||
# leptonica
|
|
||||||
cd leptonica
|
|
||||||
./autogen.sh
|
|
||||||
./configure --without-zlib --without-jpeg --without-giflib \
|
|
||||||
--without-giflib --without-libwebp --without-libwebpmux --without-libopenjpeg \
|
|
||||||
--enable-static --disable-shared
|
|
||||||
make -j $THREADS
|
|
||||||
cd ..
|
|
||||||
mv leptonica/src/.libs/liblept.a .
|
|
||||||
|
|
||||||
# tiff
|
|
||||||
cd libtiff
|
|
||||||
./autogen.sh
|
|
||||||
./configure --enable-static --disable-shared --disable-lzw --disable-jpeg --disable-webp \
|
|
||||||
--disable-lzma --disable-zstd --disable-jbig
|
|
||||||
make -j $THREADS
|
|
||||||
cd ..
|
|
||||||
mv libtiff/libtiff/.libs/libtiff.a .
|
|
||||||
@@ -1,6 +1,9 @@
|
|||||||
|
import json
|
||||||
|
|
||||||
files = [
|
files = [
|
||||||
"schema/mappings.json",
|
"schema/mappings.json",
|
||||||
"schema/settings.json",
|
"schema/settings.json",
|
||||||
|
"schema/pipeline.json",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
@@ -9,6 +12,7 @@ def clean(filepath):
|
|||||||
|
|
||||||
|
|
||||||
for file in files:
|
for file in files:
|
||||||
with open(file, "rb") as f:
|
with open(file, "r") as f:
|
||||||
data = f.read()
|
data = json.dumps(json.load(f), separators=(",", ":")).encode()
|
||||||
|
data += b'\0'
|
||||||
print("char %s[%d] = {%s};" % (clean(file), len(data), ",".join(str(int(b)) for b in data)))
|
print("char %s[%d] = {%s};" % (clean(file), len(data), ",".join(str(int(b)) for b in data)))
|
||||||
|
|||||||
@@ -2,14 +2,18 @@ application/arj, arj
|
|||||||
application/base64, mme
|
application/base64, mme
|
||||||
application/binhex, hqx
|
application/binhex, hqx
|
||||||
application/book, boo|book
|
application/book, boo|book
|
||||||
|
application/CDFV2-corrupt,
|
||||||
application/CDFV2, sdv
|
application/CDFV2, sdv
|
||||||
application/clariscad, ccad
|
application/clariscad, ccad
|
||||||
application/commonground, dp
|
application/commonground, dp
|
||||||
|
application/csv,
|
||||||
|
application/dicom, dcm
|
||||||
application/drafting, drw
|
application/drafting, drw
|
||||||
|
application/epub+zip, epub
|
||||||
application/freeloader, frl
|
application/freeloader, frl
|
||||||
application/futuresplash, spl
|
application/futuresplash, spl
|
||||||
application/groupwise, vew
|
application/groupwise, vew
|
||||||
application/gzip, gz
|
application/gzip, gz|tgz
|
||||||
application/hta, hta
|
application/hta, hta
|
||||||
application/i-deas, unv
|
application/i-deas, unv
|
||||||
application/iges, iges|igs
|
application/iges, iges|igs
|
||||||
@@ -17,7 +21,6 @@ application/inf, inf
|
|||||||
application/java-archive, jar
|
application/java-archive, jar
|
||||||
application/java, class
|
application/java, class
|
||||||
application/javascript,
|
application/javascript,
|
||||||
application/x-archive, a
|
|
||||||
application/json, json
|
application/json, json
|
||||||
application/marc, mrc
|
application/marc, mrc
|
||||||
application/mbedlet, mbd
|
application/mbedlet, mbd
|
||||||
@@ -27,7 +30,9 @@ application/msword, doc|dot|w6w|wiz|word
|
|||||||
application/netmc, mcp
|
application/netmc, mcp
|
||||||
application/octet-stream, bin|dump|gpg
|
application/octet-stream, bin|dump|gpg
|
||||||
application/oda, oda
|
application/oda, oda
|
||||||
|
application/ogg, ogv
|
||||||
application/pdf, pdf
|
application/pdf, pdf
|
||||||
|
application/pgp-keys,
|
||||||
application/pgp-signature, pgp
|
application/pgp-signature, pgp
|
||||||
application/pkcs7-signature, p7s
|
application/pkcs7-signature, p7s
|
||||||
application/pkix-cert, cer|crt
|
application/pkix-cert, cer|crt
|
||||||
@@ -43,6 +48,10 @@ application/vda, vda
|
|||||||
application/vnd.fdf, fdf
|
application/vnd.fdf, fdf
|
||||||
application/vnd.font-fontforge-sfd, sfd
|
application/vnd.font-fontforge-sfd, sfd
|
||||||
application/vnd.hp-hpgl, hgl|hpg|hpgl
|
application/vnd.hp-hpgl, hgl|hpg|hpgl
|
||||||
|
application/vnd.iccprofile, icm
|
||||||
|
application/vnd.iccprofile, icm
|
||||||
|
application/vnd.lotus-1-2-3,
|
||||||
|
application/vnd.ms-cab-compressed, cab
|
||||||
application/vnd.ms-excel, xlb|xlc|xll|xlm|xls|xlw
|
application/vnd.ms-excel, xlb|xlc|xll|xlm|xls|xlw
|
||||||
application/vnd.ms-fontobject, eot
|
application/vnd.ms-fontobject, eot
|
||||||
application/vnd.ms-opentype, otf
|
application/vnd.ms-opentype, otf
|
||||||
@@ -54,45 +63,75 @@ application/vnd.ms-project, mpp
|
|||||||
application/vnd.oasis.opendocument.base, odb
|
application/vnd.oasis.opendocument.base, odb
|
||||||
application/vnd.oasis.opendocument.formula, odf
|
application/vnd.oasis.opendocument.formula, odf
|
||||||
application/vnd.oasis.opendocument.graphics, odg
|
application/vnd.oasis.opendocument.graphics, odg
|
||||||
|
application/vnd.oasis.opendocument.presentation, odp
|
||||||
|
application/vnd.oasis.opendocument.spreadsheet, ods
|
||||||
application/vnd.oasis.opendocument.text, odt
|
application/vnd.oasis.opendocument.text, odt
|
||||||
|
application/vnd.openxmlformats-officedocument.presentationml.presentation, pptx
|
||||||
|
application/vnd.openxmlformats-officedocument.spreadsheetml.sheet, xlsx
|
||||||
|
application/vnd.openxmlformats-officedocument.wordprocessingml.document, docx
|
||||||
|
application/vnd.symbian.install,
|
||||||
|
application/vnd.tcpdump.pcap, pcap
|
||||||
application/vnd.wap.wmlc, wmlc
|
application/vnd.wap.wmlc, wmlc
|
||||||
application/vnd.wap.wmlscriptc, wmlsc
|
application/vnd.wap.wmlscriptc, wmlsc
|
||||||
application/vnd.xara, web
|
application/vnd.xara, web
|
||||||
application/vocaltec-media-desc, vmd
|
application/vocaltec-media-desc, vmd
|
||||||
application/vocaltec-media-file, vmf
|
application/vocaltec-media-file, vmf
|
||||||
|
application/warc, warc
|
||||||
|
application/winhelp, hlp
|
||||||
application/wordperfect6.0, w60
|
application/wordperfect6.0, w60
|
||||||
application/wordperfect6.1, w61
|
application/wordperfect6.1, w61
|
||||||
application/wordperfect, wp|wp5|wp6|wpd
|
application/wordperfect, wp|wp5|wp6|wpd
|
||||||
application/x-123, wk1
|
application/x-123, wk1
|
||||||
|
application/x-7z-compressed, 7z
|
||||||
application/x-aim, aim
|
application/x-aim, aim
|
||||||
|
application/x-apple-diskimage,
|
||||||
|
application/x-arc,
|
||||||
|
application/x-archive, a
|
||||||
|
application/x-atari-7800-rom, a78
|
||||||
application/x-authorware-bin, aab
|
application/x-authorware-bin, aab
|
||||||
application/x-authorware-map, aam
|
application/x-authorware-map, aam
|
||||||
application/x-authorware-seg, aas
|
application/x-authorware-seg, aas
|
||||||
|
application/x-avira-qua,
|
||||||
application/x-bcpio, bcpio
|
application/x-bcpio, bcpio
|
||||||
application/x-bittorrent, torrent
|
application/x-bittorrent, torrent
|
||||||
application/x-bsh, bsh
|
application/x-bsh, bsh
|
||||||
application/x-bytecode.python, pyc
|
application/x-bytecode.python, pyc
|
||||||
application/x-bzip2, boz|bz2
|
application/x-bzip2, boz|bz2
|
||||||
application/x-bzip, bz
|
application/x-bzip, bz
|
||||||
|
application/x-cbr, cbr
|
||||||
|
application/x-cbz, cbz
|
||||||
application/x-cdlink, vcd
|
application/x-cdlink, vcd
|
||||||
application/x-chat, cha|chat
|
application/x-chat, cha|chat
|
||||||
|
application/x-chrome-extension,
|
||||||
application/x-cocoa, cco
|
application/x-cocoa, cco
|
||||||
application/x-conference, nsc
|
application/x-conference, nsc
|
||||||
|
application/x-coredump,
|
||||||
application/x-cpio, cpio
|
application/x-cpio, cpio
|
||||||
application/x-dbf, dbf
|
application/x-dbf, dbf
|
||||||
application/x-dbt,
|
application/x-dbt,
|
||||||
|
application/x-debian-package, deb
|
||||||
application/x-deepv, deepv
|
application/x-deepv, deepv
|
||||||
application/x-director, dcr|dir|dxr
|
application/x-director, dir|dxr
|
||||||
|
application/x-dmp, dmp
|
||||||
|
application/x-dosdriver,
|
||||||
application/x-dosexec, dll
|
application/x-dosexec, dll
|
||||||
application/x-dvi, dvi
|
application/x-dvi, dvi
|
||||||
application/x-elc, elc
|
application/x-elc, elc
|
||||||
|
application/x-empty,
|
||||||
application/x-envoy, env|evy
|
application/x-envoy, env|evy
|
||||||
application/x-esrehber, es
|
application/x-esrehber, es
|
||||||
application/x-excel, xla|xld|xlk|xlt|xlv
|
application/x-excel, xla|xld|xlk|xlt|xlv
|
||||||
application/x-executable, exe
|
application/x-executable, exe
|
||||||
|
application/x-font-gdos,
|
||||||
|
application/x-font-pf2, pf2
|
||||||
|
application/x-font-pfm, pfm
|
||||||
application/x-font-sfn,
|
application/x-font-sfn,
|
||||||
application/x-font-ttf, ttf|ttc
|
application/x-font-ttf, ttf|ttc
|
||||||
|
application/x-fptapplication/x-dbt,
|
||||||
application/x-freelance, pre
|
application/x-freelance, pre
|
||||||
|
application/x-gamecube-rom,
|
||||||
|
application/x-gdbm,
|
||||||
|
application/x-gettext-translation,
|
||||||
application/x-git,
|
application/x-git,
|
||||||
application/x-gsp, gsp
|
application/x-gsp, gsp
|
||||||
application/x-gss, gss
|
application/x-gss, gss
|
||||||
@@ -102,46 +141,68 @@ application/x-hdf, hdf
|
|||||||
application/x-helpfile, help
|
application/x-helpfile, help
|
||||||
application/x-httpd-imap, imap
|
application/x-httpd-imap, imap
|
||||||
application/x-ima, ima
|
application/x-ima, ima
|
||||||
|
application/x-innosetup,
|
||||||
application/x-internett-signup, ins
|
application/x-internett-signup, ins
|
||||||
application/x-inventor, iv
|
application/x-inventor, iv
|
||||||
application/x-ip2, ip
|
application/x-ip2, ip
|
||||||
application/x-java-applet,
|
application/x-java-applet,
|
||||||
application/x-java-commerce, jcm
|
application/x-java-commerce, jcm
|
||||||
application/x-java-image,
|
application/x-java-image,
|
||||||
|
application/x-java-jmod, jmod
|
||||||
application/x-java-keystore,
|
application/x-java-keystore,
|
||||||
|
application/x-kdelnk,
|
||||||
application/x-koan, skd|skm|skp|skt
|
application/x-koan, skd|skm|skp|skt
|
||||||
application/x-latex, latex|ltx
|
application/x-latex, latex|ltx
|
||||||
application/x-livescreen, ivy
|
application/x-livescreen, ivy
|
||||||
application/x-lotus, wq1
|
application/x-lotus, wq1
|
||||||
|
application/x-lz4+json, jsonlz4
|
||||||
|
application/x-lz4, lz4
|
||||||
|
application/x-lz4, lz4
|
||||||
|
application/x-lzh-compressed,
|
||||||
application/x-lzh, lzh
|
application/x-lzh, lzh
|
||||||
|
application/x-lzip, lz
|
||||||
|
application/x-lzma, lzma
|
||||||
|
application/x-lzop, lzo
|
||||||
application/x-lzx, lzx
|
application/x-lzx, lzx
|
||||||
application/x-mach-binary, jnilib|dylib
|
application/x-mach-binary, jnilib|dylib
|
||||||
application/x-mach-executable,
|
application/x-mach-executable,
|
||||||
application/x-magic-cap-package-1.0, mc$
|
application/x-magic-cap-package-1.0, mc$
|
||||||
application/x-mathcad, mcd
|
application/x-mathcad, mcd
|
||||||
|
application/x-maxis-dbpf,
|
||||||
application/x-meme, mm
|
application/x-meme, mm
|
||||||
application/x-midi, midi
|
application/x-midi, midi
|
||||||
application/x-mif, mif
|
application/x-mif, mif
|
||||||
application/x-mix-transfer, nix
|
application/x-mix-transfer, nix
|
||||||
application/xml, opf
|
application/xml, opf
|
||||||
|
application/x-mobipocket-ebook, mobi
|
||||||
|
application/vnd.amazon.mobi8-ebook, azw|azw3
|
||||||
|
application/x-msaccess, accdb
|
||||||
|
application/x-ms-compress-szdd, fon
|
||||||
application/x-ms-pdb, pdb
|
application/x-ms-pdb, pdb
|
||||||
|
application/x-ms-reader, lit
|
||||||
|
application/x-n64-rom, z64
|
||||||
application/x-navi-animation, ani
|
application/x-navi-animation, ani
|
||||||
application/x-navidoc, nvd
|
application/x-navidoc, nvd
|
||||||
application/x-navimap, map
|
application/x-navimap, map
|
||||||
application/x-navistyle, stl
|
application/x-navistyle, stl
|
||||||
|
application/x-nes-rom, nes
|
||||||
application/x-netcdf, cdf|nc
|
application/x-netcdf, cdf|nc
|
||||||
application/x-newton-compatible-pkg, pkg
|
application/x-newton-compatible-pkg, pkg
|
||||||
|
application/x-nintendo-ds-rom,
|
||||||
application/x-object, o
|
application/x-object, o
|
||||||
application/x-omcdatamaker, omcd
|
application/x-omcdatamaker, omcd
|
||||||
application/x-omc, omc
|
application/x-omc, omc
|
||||||
application/x-omcregerator, omcr
|
application/x-omcregerator, omcr
|
||||||
application/x-pagemaker, pm4|pm5
|
application/x-pagemaker, pm4|pm5
|
||||||
application/x-pcl, pcl
|
application/x-pcl, pcl
|
||||||
|
application/x-pgp-keyring,
|
||||||
application/x-pixclscript, plx
|
application/x-pixclscript, plx
|
||||||
application/x-pkcs7-certreqresp, p7r
|
application/x-pkcs7-certreqresp, p7r
|
||||||
application/x-pkcs7-signature, p7a
|
application/x-pkcs7-signature, p7a
|
||||||
application/x-project, mpc|mpt|mpv|mpx
|
application/x-project, mpc|mpt|mpv|mpx
|
||||||
application/x-qpro, wb1
|
application/x-qpro, wb1
|
||||||
|
application/x-rar, rar
|
||||||
|
application/x-rpm, rpm
|
||||||
application/x-sdp, sdp
|
application/x-sdp, sdp
|
||||||
application/x-sea, sea
|
application/x-sea, sea
|
||||||
application/x-seelogo, sl
|
application/x-seelogo, sl
|
||||||
@@ -149,12 +210,17 @@ application/x-setupscript,
|
|||||||
application/x-sharedlib, so
|
application/x-sharedlib, so
|
||||||
application/x-shar, shar
|
application/x-shar, shar
|
||||||
application/x-shockwave-flash, swf
|
application/x-shockwave-flash, swf
|
||||||
|
application/x-snappy-framed,
|
||||||
application/x-sprite, spr|sprite
|
application/x-sprite, spr|sprite
|
||||||
application/x-sqlite3,
|
application/x-sqlite3,
|
||||||
|
application/x-stargallery-thm,
|
||||||
|
application/x-stuffit, sit
|
||||||
application/x-sv4cpio, sv4cpio
|
application/x-sv4cpio, sv4cpio
|
||||||
application/x-sv4crc, sv4crc
|
application/x-sv4crc, sv4crc
|
||||||
application/x-tar, tar
|
application/x-tar, tar
|
||||||
application/x-tbook, sbk|tbk
|
application/x-tbook, sbk|tbk
|
||||||
|
application/x-terminfo,
|
||||||
|
application/x-terminfo2,
|
||||||
application/x-texinfo, texi|texinfo
|
application/x-texinfo, texi|texinfo
|
||||||
application/x-tex-tfm, tfm
|
application/x-tex-tfm, tfm
|
||||||
application/x-ustar, ustar
|
application/x-ustar, ustar
|
||||||
@@ -163,16 +229,22 @@ application/x-vnd.audioexplosion.mzz, mzz
|
|||||||
application/x-vnd.ls-xpix, xpix
|
application/x-vnd.ls-xpix, xpix
|
||||||
application/x-vrml, vrml
|
application/x-vrml, vrml
|
||||||
application/x-wais-source, src|wsrc
|
application/x-wais-source, src|wsrc
|
||||||
|
application/x-wine-extension-ini,
|
||||||
application/x-wintalk, wtk
|
application/x-wintalk, wtk
|
||||||
application/x-world, svr
|
application/x-world, svr
|
||||||
application/x-wri, wri
|
application/x-wri, wri
|
||||||
application/x-x509-ca-cert, der
|
application/x-x509-ca-cert, der
|
||||||
application/x-xz, xz
|
application/x-xz, xz
|
||||||
|
application/x-zip,
|
||||||
|
application/x-zstd, zst
|
||||||
application/zip, zip
|
application/zip, zip
|
||||||
|
application/zlib, z
|
||||||
|
!audio/basic, au
|
||||||
audio/it, it
|
audio/it, it
|
||||||
audio/make, funk|my|pfunk
|
audio/make, funk|my|pfunk
|
||||||
audio/midi, kar
|
audio/midi, kar
|
||||||
audio/mid, rmi
|
audio/mid, rmi
|
||||||
|
audio/mp4, m4b
|
||||||
audio/mpeg, m2a|mpa
|
audio/mpeg, m2a|mpa
|
||||||
audio/ogg, ogg
|
audio/ogg, ogg
|
||||||
audio/s3m, s3m
|
audio/s3m, s3m
|
||||||
@@ -180,7 +252,10 @@ audio/tsp-audio, tsi
|
|||||||
audio/tsplayer, tsp
|
audio/tsplayer, tsp
|
||||||
audio/vnd.qcelp, qcp
|
audio/vnd.qcelp, qcp
|
||||||
audio/voxware, vox
|
audio/voxware, vox
|
||||||
|
audio/x-aiff, aiff|aif
|
||||||
|
audio/x-flac, flac
|
||||||
audio/x-gsm, gsd|gsm
|
audio/x-gsm, gsd|gsm
|
||||||
|
audio/x-hx-aac-adts,
|
||||||
audio/x-jam, jam
|
audio/x-jam, jam
|
||||||
audio/x-liveaudio, lam
|
audio/x-liveaudio, lam
|
||||||
audio/x-m4a, m4a
|
audio/x-m4a, m4a
|
||||||
@@ -194,17 +269,24 @@ audio/x-nspaudio, lma
|
|||||||
audio/x-pn-realaudio, ram|rm|rmm|rmp
|
audio/x-pn-realaudio, ram|rm|rmm|rmp
|
||||||
audio/x-psid, sid
|
audio/x-psid, sid
|
||||||
audio/x-realaudio, ra
|
audio/x-realaudio, ra
|
||||||
|
audio/x-s3m,
|
||||||
audio/x-twinvq-plugin, vqe|vql
|
audio/x-twinvq-plugin, vqe|vql
|
||||||
audio/x-twinvq, vqf
|
audio/x-twinvq, vqf
|
||||||
audio/x-voc, voc
|
audio/x-voc, voc
|
||||||
audio/x-wav, wav
|
audio/x-wav, wav
|
||||||
|
!audio/x-xbox360-executable, xex
|
||||||
|
!audio/x-xbox-executable, xbe
|
||||||
font/otf,
|
font/otf,
|
||||||
font/sfnt,
|
font/sfnt,
|
||||||
|
font/woff2, woff2
|
||||||
|
font/woff, woff
|
||||||
|
image/bmp,
|
||||||
image/cmu-raster, rast
|
image/cmu-raster, rast
|
||||||
image/fif, fif
|
image/fif, fif
|
||||||
image/florian, flo|turbot
|
image/florian, flo|turbot
|
||||||
image/g3fax, g3
|
image/g3fax, g3
|
||||||
image/gif, gif
|
image/gif, gif
|
||||||
|
image/heic, heic
|
||||||
image/ief, ief|iefs
|
image/ief, ief|iefs
|
||||||
image/jpeg, jfif|jfif-tbnl|jpe|jpeg|jpg
|
image/jpeg, jfif|jfif-tbnl|jpe|jpeg|jpg
|
||||||
image/jutvision, jut
|
image/jutvision, jut
|
||||||
@@ -213,6 +295,9 @@ image/pict, pic|pict
|
|||||||
image/png, png|x-png
|
image/png, png|x-png
|
||||||
!image/svg, svg
|
!image/svg, svg
|
||||||
!image/svg+xml,
|
!image/svg+xml,
|
||||||
|
image/tiff,
|
||||||
|
!image/vnd.adobe.photoshop, psd
|
||||||
|
!image/vnd.djvu, djvu
|
||||||
image/vnd.fpx, fpx
|
image/vnd.fpx, fpx
|
||||||
image/vnd.microsoft.icon,
|
image/vnd.microsoft.icon,
|
||||||
image/vnd.rn-realflash, rf
|
image/vnd.rn-realflash, rf
|
||||||
@@ -220,9 +305,15 @@ image/vnd.rn-realpix, rp
|
|||||||
image/vnd.wap.wbmp, wbmp
|
image/vnd.wap.wbmp, wbmp
|
||||||
image/vnd.xiff, xif
|
image/vnd.xiff, xif
|
||||||
image/webp, webp
|
image/webp, webp
|
||||||
|
image/wmf,
|
||||||
|
image/x-3ds, 3ds
|
||||||
|
image/x-award-bioslogo,
|
||||||
image/x-cmu-raster, ras
|
image/x-cmu-raster, ras
|
||||||
|
image/x-cur, tga
|
||||||
image/x-dwg, dwg|dxf|svf
|
image/x-dwg, dwg|dxf|svf
|
||||||
image/x-eps,
|
image/x-eps,
|
||||||
|
image/x-exr, exr
|
||||||
|
image/x-gem,
|
||||||
image/x-icns,
|
image/x-icns,
|
||||||
!image/x-icon, ico
|
!image/x-icon, ico
|
||||||
image/x-jg, art
|
image/x-jg, art
|
||||||
@@ -236,32 +327,31 @@ image/x-portable-graymap, pgm
|
|||||||
image/x-portable-pixmap, ppm
|
image/x-portable-pixmap, ppm
|
||||||
image/x-quicktime, qif|qti|qtif
|
image/x-quicktime, qif|qti|qtif
|
||||||
image/x-rgb, rgb
|
image/x-rgb, rgb
|
||||||
|
image/x-tga,
|
||||||
image/x-tiff, tif|tiff
|
image/x-tiff, tif|tiff
|
||||||
image/tiff,
|
image/x-win-bitmap,
|
||||||
!image/x-xcf, xcf
|
!image/x-xcf, xcf
|
||||||
!image/x-xpixmap, xpm
|
!image/x-xpixmap, xpm
|
||||||
|
image/x-xwindowdump, xwd
|
||||||
|
message/news,
|
||||||
message/rfc822, mht|mhtml|mime
|
message/rfc822, mht|mhtml|mime
|
||||||
model/vnd.dwf, dwf
|
model/vnd.dwf, dwf
|
||||||
|
model/vnd.gdl, gdl
|
||||||
|
model/vnd.gs.gdl, gdsl
|
||||||
model/vrml, wrz
|
model/vrml, wrz
|
||||||
model/x-pov, pov
|
model/x-pov, pov
|
||||||
text/asp, asp
|
text/asp, asp
|
||||||
text/css, css
|
text/css, css
|
||||||
text/x-sass, sass
|
|
||||||
text/x-scss, scss
|
|
||||||
text/html, acgi|htm|html|htmls|htx|shtml
|
text/html, acgi|htm|html|htmls|htx|shtml
|
||||||
text/javascript, js
|
text/javascript, js
|
||||||
text/mcf, mcf
|
text/mcf, mcf
|
||||||
text/pascal, pas
|
text/pascal, pas
|
||||||
text/plain, com|cmd|conf|def|g|idc|list|lst|mar|sdml|text|txt|md|groovy|license|properties|desktop|ini|rst|cmake|ipynb|readme|less|lo|go|yml|d|cs|hpp|srt|nfo|sfv|m3u|csv|eml
|
text/PGP,
|
||||||
|
text/plain, com|cmd|conf|def|g|idc|list|lst|mar|sdml|text|txt|md|groovy|license|properties|desktop|ini|rst|cmake|ipynb|readme|less|lo|go|yml|d|cs|hpp|srt|nfo|sfv|m3u|csv|eml|make|log|markdown|yaml
|
||||||
|
application/vnd.coffeescript, coffee
|
||||||
text/richtext, rt|rtf|rtx
|
text/richtext, rt|rtf|rtx
|
||||||
text/rtf,
|
text/rtf,
|
||||||
text/scriplet, wsc
|
text/scriplet, wsc
|
||||||
text/x-awk, awk
|
|
||||||
!video/x-jng, jng
|
|
||||||
video/x-mng, mng
|
|
||||||
image/x-cur, tga
|
|
||||||
image/x-xwindowdump, xwd
|
|
||||||
!image/vnd.adobe.photoshop, psd
|
|
||||||
text/tab-separated-values, tsv
|
text/tab-separated-values, tsv
|
||||||
text/troff, man|me|ms|roff|t|tr
|
text/troff, man|me|ms|roff|t|tr
|
||||||
text/uri-list, uji|unis|uri|uris
|
text/uri-list, uji|unis|uri|uris
|
||||||
@@ -273,6 +363,7 @@ text/webviewhtml, htt
|
|||||||
text/x-Algol68,
|
text/x-Algol68,
|
||||||
text/x-asm, asm|s
|
text/x-asm, asm|s
|
||||||
text/x-audiosoft-intra, aip
|
text/x-audiosoft-intra, aip
|
||||||
|
text/x-awk, awk
|
||||||
text/x-bcpl,
|
text/x-bcpl,
|
||||||
text/x-c, c|cc|h
|
text/x-c, c|cc|h
|
||||||
text/x-c++, cpp|cxx|c++
|
text/x-c++, cpp|cxx|c++
|
||||||
@@ -287,23 +378,31 @@ text/x-makefile, am|mak
|
|||||||
text/xml, xml|pom|iml|plist
|
text/xml, xml|pom|iml|plist
|
||||||
text/x-m, m
|
text/x-m, m
|
||||||
text/x-msdos-batch, bat
|
text/x-msdos-batch, bat
|
||||||
|
text/x-ms-regedit, reg
|
||||||
|
text/x-objective-c,
|
||||||
text/x-pascal, p
|
text/x-pascal, p
|
||||||
text/x-perl, pl
|
text/x-perl, pl
|
||||||
text/x-php, php
|
text/x-php, php
|
||||||
|
text/x-po, po
|
||||||
text/x-python, py
|
text/x-python, py
|
||||||
text/x-ruby, rb
|
text/x-ruby, rb
|
||||||
|
text/x-sass, sass
|
||||||
|
text/x-scss, scss
|
||||||
text/x-server-parsed-html, ssi
|
text/x-server-parsed-html, ssi
|
||||||
text/x-setext, etx
|
text/x-setext, etx
|
||||||
text/x-sgml, sgm|sgml
|
text/x-sgml, sgm|sgml
|
||||||
text/x-shellscript, sh
|
text/x-shellscript, sh
|
||||||
text/x-speech, talk
|
text/x-speech, talk
|
||||||
|
text/x-tcl,
|
||||||
text/x-tex, tex
|
text/x-tex, tex
|
||||||
text/x-uil, uil
|
text/x-uil, uil
|
||||||
text/x-uuencode, uue
|
text/x-uuencode, uue
|
||||||
text/x-vcalendar, vcs
|
text/x-vcalendar, vcs
|
||||||
|
text/x-vcard, vcf
|
||||||
video/animaflex, afl
|
video/animaflex, afl
|
||||||
video/avi, avi
|
video/avi, avi
|
||||||
video/avs-video, avs
|
video/avs-video, avs
|
||||||
|
video/MP2T,
|
||||||
video/mp4, mp4
|
video/mp4, mp4
|
||||||
video/mpeg, m1v|m2v|mpe|mpeg|mpg
|
video/mpeg, m1v|m2v|mpe|mpeg|mpg
|
||||||
video/quicktime, moov|mov|qt
|
video/quicktime, moov|mov|qt
|
||||||
@@ -318,101 +417,35 @@ video/x-atomic3d-feature, fmf
|
|||||||
video/x-dl, dl
|
video/x-dl, dl
|
||||||
video/x-dv, dif|dv
|
video/x-dv, dif|dv
|
||||||
video/x-fli, fli
|
video/x-fli, fli
|
||||||
|
video/x-flv, flv
|
||||||
video/x-isvideo, isu
|
video/x-isvideo, isu
|
||||||
|
!video/x-jng, jng
|
||||||
|
video/x-m4v, m4v
|
||||||
|
video/x-matroska, mkv
|
||||||
|
video/x-mng, mng
|
||||||
video/x-motion-jpeg, mjpg
|
video/x-motion-jpeg, mjpg
|
||||||
video/x-ms-asf, asf|asx
|
video/x-ms-asf, asf|asx|wmv
|
||||||
|
video/x-msvideo, divx
|
||||||
video/x-qtc, qtc
|
video/x-qtc, qtc
|
||||||
video/x-sgi-movie, movie|mv
|
video/x-sgi-movie, movie|mv
|
||||||
application/x-7z-compressed, 7z
|
|
||||||
application/vnd.openxmlformats-officedocument.wordprocessingml.document, docx
|
|
||||||
text/x-po, po
|
|
||||||
application/x-rpm, rpm
|
|
||||||
application/x-debian-package, deb
|
|
||||||
application/vnd.iccprofile, icm
|
|
||||||
application/dicom, dcm
|
|
||||||
image/x-exr, exr
|
|
||||||
application/vnd.iccprofile, icm
|
|
||||||
video/x-matroska, mkv
|
|
||||||
application/x-empty,
|
|
||||||
model/vnd.gdl, gdl
|
|
||||||
model/vnd.gs.gdl, gdsl
|
|
||||||
font/woff, woff
|
|
||||||
font/woff2, woff2
|
|
||||||
application/epub+zip, epub
|
|
||||||
application/x-mobipocket-ebook, mobi
|
|
||||||
audio/x-flac, flac
|
|
||||||
application/x-rar, rar
|
|
||||||
video/x-msvideo, divx
|
|
||||||
video/x-flv, flv
|
|
||||||
application/x-kdelnk,
|
|
||||||
text/x-tcl,
|
|
||||||
application/ogg, ogv
|
|
||||||
application/vnd.openxmlformats-officedocument.spreadsheetml.sheet, xlsx
|
|
||||||
application/vnd.ms-cab-compressed, cab
|
|
||||||
audio/mp4, m4b
|
|
||||||
!image/vnd.djvu, djvu
|
|
||||||
application/x-ms-reader, lit
|
|
||||||
application/CDFV2-corrupt,
|
|
||||||
text/x-vcard, vcf
|
|
||||||
application/x-innosetup,
|
|
||||||
application/winhelp, hlp
|
|
||||||
image/x-tga,
|
|
||||||
application/x-wine-extension-ini,
|
|
||||||
application/x-cbz, cbz
|
|
||||||
application/x-cbr, cbr
|
|
||||||
application/x-ms-compress-szdd, fon
|
|
||||||
application/x-atari-7800-rom, a78
|
|
||||||
application/x-nes-rom, nes
|
|
||||||
application/x-font-pfm, pfm
|
|
||||||
application/x-gettext-translation,
|
|
||||||
image/wmf,
|
|
||||||
application/pgp-keys,
|
|
||||||
image/x-3ds, 3ds
|
|
||||||
application/x-lz4, lz4
|
|
||||||
application/vnd.openxmlformats-officedocument.presentationml.presentation, pptx
|
|
||||||
application/vnd.oasis.opendocument.presentation, odp
|
|
||||||
application/x-msaccess, accdb
|
|
||||||
application/vnd.oasis.opendocument.spreadsheet, ods
|
|
||||||
audio/x-aiff, aiff|aif
|
|
||||||
text/x-ms-regedit, reg
|
|
||||||
application/x-gamecube-rom,
|
|
||||||
application/x-nintendo-ds-rom,
|
|
||||||
text/x-objective-c,
|
|
||||||
application/x-font-gdos,
|
|
||||||
application/x-apple-diskimage,
|
|
||||||
application/x-zstd, zst
|
|
||||||
video/x-m4v, m4v
|
|
||||||
message/news,
|
|
||||||
application/vnd.symbian.install,
|
|
||||||
application/x-lzh-compressed,
|
|
||||||
application/x-dosdriver,
|
|
||||||
application/vnd.tcpdump.pcap, pcap
|
|
||||||
x-epoc/x-sisx-app,
|
x-epoc/x-sisx-app,
|
||||||
application/x-avira-qua,
|
application/x-zstd-dictionary,
|
||||||
video/MP2T,
|
application/vnd.ms-outlook, msg
|
||||||
application/x-snappy-framed,
|
image/x-olympus-orf, orf
|
||||||
application/x-lz4+json, jsonlz4
|
image/x-nikon-nef, nef
|
||||||
application/x-dmp, dmp
|
image/x-fuji-raf, raf
|
||||||
application/zlib, z
|
image/x-panasonic-raw, rw2|raw
|
||||||
application/x-pgp-keyring,
|
image/x-adobe-dng, dng
|
||||||
application/x-gdbm,
|
image/x-canon-cr2, cr2
|
||||||
application/x-font-pf2, pf2
|
image/x-canon-crw, crw
|
||||||
application/x-zip,
|
image/x-dcraw,
|
||||||
application/x-coredump,
|
image/x-kodak-dcr, dcr
|
||||||
application/x-java-jmod, jmod
|
image/x-kodak-k25, k25
|
||||||
application/x-terminfo,
|
image/x-kodak-kdc, kdc
|
||||||
application/x-terminfo2,
|
image/x-minolta-mrw, mrw
|
||||||
application/x-arc,
|
image/x-pentax-pef, pef
|
||||||
application/vnd.lotus-1-2-3,
|
image/x-sigma-x3f, xf3
|
||||||
image/x-win-bitmap,
|
image/x-sony-arw, arw
|
||||||
application/x-maxis-dbpf,
|
image/x-sony-sr2, sr2
|
||||||
text/PGP,
|
image/x-sony-srf, srf
|
||||||
audio/x-hx-aac-adts,
|
image/x-epson-erf, erf
|
||||||
application/x-chrome-extension,
|
|
||||||
image/heic, heic
|
|
||||||
image/x-gem,
|
|
||||||
application/x-lzma, lzma
|
|
||||||
application/warc, warc
|
|
||||||
application/x-lz4, lz4
|
|
||||||
application/x-lzip, lz
|
|
||||||
application/x-lzop, lzo
|
|
||||||
|
@@ -18,7 +18,6 @@ major_mime = {
|
|||||||
|
|
||||||
pdf = (
|
pdf = (
|
||||||
"application/pdf",
|
"application/pdf",
|
||||||
"application/x-cbz",
|
|
||||||
"application/epub+zip",
|
"application/epub+zip",
|
||||||
"application/vnd.ms-xpsdocument",
|
"application/vnd.ms-xpsdocument",
|
||||||
)
|
)
|
||||||
@@ -62,6 +61,40 @@ doc = (
|
|||||||
"application/vnd.openxmlformats-officedocument.presentationml.presentation"
|
"application/vnd.openxmlformats-officedocument.presentationml.presentation"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
mobi = (
|
||||||
|
"application/x-mobipocket-ebook",
|
||||||
|
"application/vnd.amazon.mobi8-ebook"
|
||||||
|
)
|
||||||
|
|
||||||
|
markup = (
|
||||||
|
"text/xml",
|
||||||
|
"text/html",
|
||||||
|
"text/x-sgml"
|
||||||
|
)
|
||||||
|
|
||||||
|
raw = (
|
||||||
|
"image/x-olympus-orf",
|
||||||
|
"image/x-nikon-nef",
|
||||||
|
"image/x-fuji-raf",
|
||||||
|
"image/x-panasonic-raw",
|
||||||
|
"image/x-adobe-dng",
|
||||||
|
"image/x-canon-cr2",
|
||||||
|
"image/x-canon-crw",
|
||||||
|
"image/x-dcraw",
|
||||||
|
"image/x-kodak-dcr",
|
||||||
|
"image/x-kodak-k25",
|
||||||
|
"image/x-kodak-kdc",
|
||||||
|
"image/x-minolta-mrw",
|
||||||
|
"image/x-pentax-pef",
|
||||||
|
"image/x-sigma-x3f",
|
||||||
|
"image/x-sony-arw",
|
||||||
|
"image/x-sony-sr2",
|
||||||
|
"image/x-sony-srf",
|
||||||
|
"image/x-minolta-mrw",
|
||||||
|
"image/x-pentax-pef",
|
||||||
|
"image/x-epson-erf",
|
||||||
|
)
|
||||||
|
|
||||||
cnt = 1
|
cnt = 1
|
||||||
|
|
||||||
|
|
||||||
@@ -82,6 +115,12 @@ def mime_id(mime):
|
|||||||
mime_id += " | 0x08000000"
|
mime_id += " | 0x08000000"
|
||||||
elif mime in doc:
|
elif mime in doc:
|
||||||
mime_id += " | 0x04000000"
|
mime_id += " | 0x04000000"
|
||||||
|
elif mime in mobi:
|
||||||
|
mime_id += " | 0x02000000"
|
||||||
|
elif mime in markup:
|
||||||
|
mime_id += " | 0x01000000"
|
||||||
|
elif mime in raw:
|
||||||
|
mime_id += " | 0x00800000"
|
||||||
elif mime == "application/x-empty":
|
elif mime == "application/x-empty":
|
||||||
return "1"
|
return "1"
|
||||||
return mime_id
|
return mime_id
|
||||||
@@ -91,7 +130,7 @@ def clean(t):
|
|||||||
return t.replace("/", "_").replace(".", "_").replace("+", "_").replace("-", "_")
|
return t.replace("/", "_").replace(".", "_").replace("+", "_").replace("-", "_")
|
||||||
|
|
||||||
|
|
||||||
with open("mime.csv") as f:
|
with open("scripts/mime.csv") as f:
|
||||||
for l in f:
|
for l in f:
|
||||||
mime, ext_list = l.split(",")
|
mime, ext_list = l.split(",")
|
||||||
if l.startswith("!"):
|
if l.startswith("!"):
|
||||||
@@ -103,7 +142,7 @@ with open("mime.csv") as f:
|
|||||||
print("// **Generated by mime.py**")
|
print("// **Generated by mime.py**")
|
||||||
print("#ifndef MIME_GENERATED_C")
|
print("#ifndef MIME_GENERATED_C")
|
||||||
print("#define MIME_GENERATED_C")
|
print("#define MIME_GENERATED_C")
|
||||||
print("#include <glib-2.0/glib.h>\n")
|
print("#include <glib.h>\n")
|
||||||
print("#include <stdlib.h>\n")
|
print("#include <stdlib.h>\n")
|
||||||
# Enum
|
# Enum
|
||||||
print("enum mime {")
|
print("enum mime {")
|
||||||
|
|||||||
@@ -1,10 +1,12 @@
|
|||||||
files = [
|
files = [
|
||||||
"web/css/bundle.css",
|
"src/static/css/bundle.css",
|
||||||
"web/css/bundle_dark.css",
|
"src/static/css/bundle_dark.css",
|
||||||
"web/js/bundle.js",
|
"src/static/js/bundle.js",
|
||||||
"web/img/sprite-skin-flat.png",
|
"src/static/js/search.js",
|
||||||
"web/img/sprite-skin-flat-dark.png",
|
"src/static/img/sprite-skin-flat.png",
|
||||||
"web/search.html",
|
"src/static/img/sprite-skin-flat-dark.png",
|
||||||
|
"src/static/search.html",
|
||||||
|
"src/static/stats.html",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
248
src/cli.c
248
src/cli.c
@@ -1,6 +1,5 @@
|
|||||||
#include "cli.h"
|
#include "cli.h"
|
||||||
#include "ctx.h"
|
#include "ctx.h"
|
||||||
|
|
||||||
#include <tesseract/capi.h>
|
#include <tesseract/capi.h>
|
||||||
|
|
||||||
#define DEFAULT_OUTPUT "index.sist2/"
|
#define DEFAULT_OUTPUT "index.sist2/"
|
||||||
@@ -10,10 +9,20 @@
|
|||||||
#define DEFAULT_REWRITE_URL ""
|
#define DEFAULT_REWRITE_URL ""
|
||||||
|
|
||||||
#define DEFAULT_ES_URL "http://localhost:9200"
|
#define DEFAULT_ES_URL "http://localhost:9200"
|
||||||
|
#define DEFAULT_ES_INDEX "sist2"
|
||||||
#define DEFAULT_BATCH_SIZE 100
|
#define DEFAULT_BATCH_SIZE 100
|
||||||
|
|
||||||
#define DEFAULT_BIND_ADDR "localhost"
|
#define DEFAULT_LISTEN_ADDRESS "localhost:4090"
|
||||||
#define DEFAULT_PORT "4090"
|
#define DEFAULT_TREEMAP_THRESHOLD 0.0005
|
||||||
|
|
||||||
|
#define DEFAULT_MAX_MEM_BUFFER 2000
|
||||||
|
|
||||||
|
const char *TESS_DATAPATHS[] = {
|
||||||
|
"/usr/share/tessdata/",
|
||||||
|
"/usr/share/tesseract-ocr/tessdata/",
|
||||||
|
"./",
|
||||||
|
NULL
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
scan_args_t *scan_args_create() {
|
scan_args_t *scan_args_create() {
|
||||||
@@ -24,10 +33,18 @@ scan_args_t *scan_args_create() {
|
|||||||
return args;
|
return args;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
exec_args_t *exec_args_create() {
|
||||||
|
exec_args_t *args = calloc(sizeof(exec_args_t), 1);
|
||||||
|
return args;
|
||||||
|
}
|
||||||
|
|
||||||
void scan_args_destroy(scan_args_t *args) {
|
void scan_args_destroy(scan_args_t *args) {
|
||||||
if (args->name != NULL) {
|
if (args->name != NULL) {
|
||||||
free(args->name);
|
free(args->name);
|
||||||
}
|
}
|
||||||
|
if (args->incremental != NULL) {
|
||||||
|
free(args->incremental);
|
||||||
|
}
|
||||||
if (args->path != NULL) {
|
if (args->path != NULL) {
|
||||||
free(args->path);
|
free(args->path);
|
||||||
}
|
}
|
||||||
@@ -47,6 +64,10 @@ void web_args_destroy(web_args_t *args) {
|
|||||||
free(args);
|
free(args);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void exec_args_destroy(exec_args_t *args) {
|
||||||
|
free(args);
|
||||||
|
}
|
||||||
|
|
||||||
int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
|
int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
|
||||||
if (argc < 2) {
|
if (argc < 2) {
|
||||||
fprintf(stderr, "Required positional argument: PATH.\n");
|
fprintf(stderr, "Required positional argument: PATH.\n");
|
||||||
@@ -62,10 +83,10 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (args->incremental != NULL) {
|
if (args->incremental != NULL) {
|
||||||
abs_path = abspath(args->incremental);
|
args->incremental = abspath(args->incremental);
|
||||||
if (abs_path == NULL) {
|
if (abs_path == NULL) {
|
||||||
fprintf(stderr, "File not found: %s\n", args->incremental);
|
sist_log("main.c", SIST_WARNING, "Could not open original index! Disabled incremental scan feature.");
|
||||||
return 1;
|
args->incremental = NULL;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -107,7 +128,7 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
|
|||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (args->depth < 0) {
|
if (args->depth <= 0) {
|
||||||
args->depth = G_MAXINT32;
|
args->depth = G_MAXINT32;
|
||||||
} else {
|
} else {
|
||||||
args->depth += 1;
|
args->depth += 1;
|
||||||
@@ -115,6 +136,10 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
|
|||||||
|
|
||||||
if (args->name == NULL) {
|
if (args->name == NULL) {
|
||||||
args->name = g_path_get_basename(args->output);
|
args->name = g_path_get_basename(args->output);
|
||||||
|
} else {
|
||||||
|
char* tmp = malloc(strlen(args->name) + 1);
|
||||||
|
strcpy(tmp, args->name);
|
||||||
|
args->name = tmp;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (args->rewrite_url == NULL) {
|
if (args->rewrite_url == NULL) {
|
||||||
@@ -136,13 +161,53 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
|
|||||||
|
|
||||||
if (args->tesseract_lang != NULL) {
|
if (args->tesseract_lang != NULL) {
|
||||||
TessBaseAPI *api = TessBaseAPICreate();
|
TessBaseAPI *api = TessBaseAPICreate();
|
||||||
ret = TessBaseAPIInit3(api, TESS_DATAPATH, args->tesseract_lang);
|
|
||||||
|
char filename[128];
|
||||||
|
sprintf(filename, "%s.traineddata", args->tesseract_lang);
|
||||||
|
const char *path = find_file_in_paths(TESS_DATAPATHS, filename);
|
||||||
|
if (path == NULL) {
|
||||||
|
LOG_FATAL("cli.c", "Could not find tesseract language file!");
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = TessBaseAPIInit3(api, path, args->tesseract_lang);
|
||||||
if (ret != 0) {
|
if (ret != 0) {
|
||||||
fprintf(stderr, "Could not initialize tesseract with lang '%s'\n", args->tesseract_lang);
|
fprintf(stderr, "Could not initialize tesseract with lang '%s'\n", args->tesseract_lang);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
TessBaseAPIEnd(api);
|
TessBaseAPIEnd(api);
|
||||||
TessBaseAPIDelete(api);
|
TessBaseAPIDelete(api);
|
||||||
|
|
||||||
|
args->tesseract_path = path;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (args->exclude_regex != NULL) {
|
||||||
|
const char *error;
|
||||||
|
int error_offset;
|
||||||
|
|
||||||
|
pcre *re = pcre_compile(args->exclude_regex, 0, &error, &error_offset, 0);
|
||||||
|
if (error != NULL) {
|
||||||
|
LOG_FATALF("cli.c", "pcre_compile returned error: %s (offset:%d)", error, error_offset)
|
||||||
|
}
|
||||||
|
|
||||||
|
pcre_extra *re_extra = pcre_study(re, 0, &error);
|
||||||
|
if (error != NULL) {
|
||||||
|
LOG_FATALF("cli.c", "pcre_study returned error: %s", error)
|
||||||
|
}
|
||||||
|
|
||||||
|
ScanCtx.exclude = re;
|
||||||
|
ScanCtx.exclude_extra = re_extra;
|
||||||
|
} else {
|
||||||
|
ScanCtx.exclude = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (args->treemap_threshold_str == 0) {
|
||||||
|
args->treemap_threshold = DEFAULT_TREEMAP_THRESHOLD;
|
||||||
|
} else {
|
||||||
|
args->treemap_threshold = atof(args->treemap_threshold_str);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (args->max_memory_buffer == 0) {
|
||||||
|
args->max_memory_buffer = DEFAULT_MAX_MEM_BUFFER;
|
||||||
}
|
}
|
||||||
|
|
||||||
LOG_DEBUGF("cli.c", "arg quality=%f", args->quality)
|
LOG_DEBUGF("cli.c", "arg quality=%f", args->quality)
|
||||||
@@ -156,7 +221,40 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
|
|||||||
LOG_DEBUGF("cli.c", "arg depth=%d", args->depth)
|
LOG_DEBUGF("cli.c", "arg depth=%d", args->depth)
|
||||||
LOG_DEBUGF("cli.c", "arg path=%s", args->path)
|
LOG_DEBUGF("cli.c", "arg path=%s", args->path)
|
||||||
LOG_DEBUGF("cli.c", "arg archive=%s", args->archive)
|
LOG_DEBUGF("cli.c", "arg archive=%s", args->archive)
|
||||||
LOG_DEBUGF("cli.c", "arg ocr=%s", args->tesseract_lang)
|
LOG_DEBUGF("cli.c", "arg tesseract_lang=%s", args->tesseract_lang)
|
||||||
|
LOG_DEBUGF("cli.c", "arg tesseract_path=%s", args->tesseract_path)
|
||||||
|
LOG_DEBUGF("cli.c", "arg exclude=%s", args->exclude_regex)
|
||||||
|
LOG_DEBUGF("cli.c", "arg fast=%d", args->fast)
|
||||||
|
LOG_DEBUGF("cli.c", "arg treemap_threshold=%f", args->treemap_threshold)
|
||||||
|
LOG_DEBUGF("cli.c", "arg max_memory_buffer=%d", args->max_memory_buffer)
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int load_script(const char *script_path, char **dst) {
|
||||||
|
struct stat info;
|
||||||
|
int res = stat(script_path, &info);
|
||||||
|
|
||||||
|
if (res == -1) {
|
||||||
|
fprintf(stderr, "Error opening script file '%s': %s\n", script_path, strerror(errno));
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
int fd = open(script_path, O_RDONLY);
|
||||||
|
if (fd == -1) {
|
||||||
|
fprintf(stderr, "Error opening script file '%s': %s\n", script_path, strerror(errno));
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
*dst = malloc(info.st_size + 1);
|
||||||
|
res = read(fd, *dst, info.st_size);
|
||||||
|
if (res < 0) {
|
||||||
|
fprintf(stderr, "Error reading script file '%s': %s\n", script_path, strerror(errno));
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
*(*dst + info.st_size) = '\0';
|
||||||
|
close(fd);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@@ -170,6 +268,13 @@ int index_args_validate(index_args_t *args, int argc, const char **argv) {
|
|||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (args->threads == 0) {
|
||||||
|
args->threads = 1;
|
||||||
|
} else if (args->threads < 0) {
|
||||||
|
fprintf(stderr, "Invalid threads: %d\n", args->threads);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
char *index_path = abspath(argv[1]);
|
char *index_path = abspath(argv[1]);
|
||||||
if (index_path == NULL) {
|
if (index_path == NULL) {
|
||||||
fprintf(stderr, "File not found: %s\n", argv[1]);
|
fprintf(stderr, "File not found: %s\n", argv[1]);
|
||||||
@@ -183,30 +288,14 @@ int index_args_validate(index_args_t *args, int argc, const char **argv) {
|
|||||||
args->es_url = DEFAULT_ES_URL;
|
args->es_url = DEFAULT_ES_URL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (args->es_index == NULL) {
|
||||||
|
args->es_index = DEFAULT_ES_INDEX;
|
||||||
|
}
|
||||||
|
|
||||||
if (args->script_path != NULL) {
|
if (args->script_path != NULL) {
|
||||||
struct stat info;
|
if (load_script(args->script_path, &args->script) != 0) {
|
||||||
int res = stat(args->script_path, &info);
|
|
||||||
|
|
||||||
if (res == -1) {
|
|
||||||
fprintf(stderr, "Error opening script file '%s': %s\n", args->script_path, strerror(errno));
|
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
int fd = open(args->script_path, O_RDONLY);
|
|
||||||
if (fd == -1) {
|
|
||||||
fprintf(stderr, "Error opening script file '%s': %s\n", args->script_path, strerror(errno));
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
args->script = malloc(info.st_size + 1);
|
|
||||||
res = read(fd, args->script, info.st_size);
|
|
||||||
if (res == -1) {
|
|
||||||
fprintf(stderr, "Error reading script file '%s': %s\n", args->script_path, strerror(errno));
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
*(args->script + info.st_size) = '\0';
|
|
||||||
close(fd);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (args->batch_size == 0) {
|
if (args->batch_size == 0) {
|
||||||
@@ -214,8 +303,10 @@ int index_args_validate(index_args_t *args, int argc, const char **argv) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
LOG_DEBUGF("cli.c", "arg es_url=%s", args->es_url)
|
LOG_DEBUGF("cli.c", "arg es_url=%s", args->es_url)
|
||||||
|
LOG_DEBUGF("cli.c", "arg es_index=%s", args->es_index)
|
||||||
LOG_DEBUGF("cli.c", "arg index_path=%s", args->index_path)
|
LOG_DEBUGF("cli.c", "arg index_path=%s", args->index_path)
|
||||||
LOG_DEBUGF("cli.c", "arg script_path=%s", args->script_path)
|
LOG_DEBUGF("cli.c", "arg script_path=%s", args->script_path)
|
||||||
|
LOG_DEBUGF("cli.c", "arg async_script=%s", args->async_script)
|
||||||
LOG_DEBUGF("cli.c", "arg script=%s", args->script)
|
LOG_DEBUGF("cli.c", "arg script=%s", args->script)
|
||||||
LOG_DEBUGF("cli.c", "arg print=%d", args->print)
|
LOG_DEBUGF("cli.c", "arg print=%d", args->print)
|
||||||
LOG_DEBUGF("cli.c", "arg batch_size=%d", args->batch_size)
|
LOG_DEBUGF("cli.c", "arg batch_size=%d", args->batch_size)
|
||||||
@@ -237,18 +328,57 @@ int web_args_validate(web_args_t *args, int argc, const char **argv) {
|
|||||||
args->es_url = DEFAULT_ES_URL;
|
args->es_url = DEFAULT_ES_URL;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (args->bind == NULL) {
|
if (args->listen_address == NULL) {
|
||||||
args->bind = DEFAULT_BIND_ADDR;
|
args->listen_address = DEFAULT_LISTEN_ADDRESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (args->port == NULL) {
|
if (args->es_index == NULL) {
|
||||||
args->port = DEFAULT_PORT;
|
args->es_index = DEFAULT_ES_INDEX;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (args->credentials != NULL) {
|
if (args->credentials != NULL) {
|
||||||
args->b64credentials = onion_base64_encode(args->credentials, (int) strlen(args->credentials));
|
char *ptr = strstr(args->credentials, ":");
|
||||||
//Remove trailing newline
|
if (ptr == NULL) {
|
||||||
*(args->b64credentials + strlen(args->b64credentials) - 1) = '\0';
|
fprintf(stderr, "Invalid --auth format, see usage\n");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
strncpy(args->auth_user, args->credentials, (ptr - args->credentials));
|
||||||
|
strcpy(args->auth_pass, ptr + 1);
|
||||||
|
|
||||||
|
if (strlen(args->auth_user) == 0) {
|
||||||
|
fprintf(stderr, "--auth username must be at least one character long");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
args->auth_enabled = TRUE;
|
||||||
|
} else {
|
||||||
|
args->auth_enabled = FALSE;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (args->tag_credentials != NULL && args->credentials != NULL) {
|
||||||
|
fprintf(stderr, "--auth and --tag-auth are mutually exclusive");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (args->tag_credentials != NULL) {
|
||||||
|
char *ptr = strstr(args->tag_credentials, ":");
|
||||||
|
if (ptr == NULL) {
|
||||||
|
fprintf(stderr, "Invalid --tag-auth format, see usage\n");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
strncpy(args->auth_user, args->tag_credentials, (ptr - args->tag_credentials));
|
||||||
|
strcpy(args->auth_pass, ptr + 1);
|
||||||
|
|
||||||
|
if (strlen(args->auth_user) == 0) {
|
||||||
|
fprintf(stderr, "--tag-auth username must be at least one character long");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
args->tag_auth_enabled = TRUE;
|
||||||
|
} else {
|
||||||
|
args->tag_auth_enabled = FALSE;
|
||||||
}
|
}
|
||||||
|
|
||||||
args->index_count = argc - 1;
|
args->index_count = argc - 1;
|
||||||
@@ -263,10 +393,12 @@ int web_args_validate(web_args_t *args, int argc, const char **argv) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
LOG_DEBUGF("cli.c", "arg es_url=%s", args->es_url)
|
LOG_DEBUGF("cli.c", "arg es_url=%s", args->es_url)
|
||||||
LOG_DEBUGF("cli.c", "arg bind=%s", args->bind)
|
LOG_DEBUGF("cli.c", "arg es_index=%s", args->es_index)
|
||||||
LOG_DEBUGF("cli.c", "arg port=%s", args->port)
|
LOG_DEBUGF("cli.c", "arg listen=%s", args->listen_address)
|
||||||
LOG_DEBUGF("cli.c", "arg credentials=%s", args->credentials)
|
LOG_DEBUGF("cli.c", "arg credentials=%s", args->credentials)
|
||||||
LOG_DEBUGF("cli.c", "arg b64credentials=%s", args->b64credentials)
|
LOG_DEBUGF("cli.c", "arg tag_credentials=%s", args->tag_credentials)
|
||||||
|
LOG_DEBUGF("cli.c", "arg auth_user=%s", args->auth_user)
|
||||||
|
LOG_DEBUGF("cli.c", "arg auth_pass=%s", args->auth_pass)
|
||||||
LOG_DEBUGF("cli.c", "arg index_count=%d", args->index_count)
|
LOG_DEBUGF("cli.c", "arg index_count=%d", args->index_count)
|
||||||
for (int i = 0; i < args->index_count; i++) {
|
for (int i = 0; i < args->index_count; i++) {
|
||||||
LOG_DEBUGF("cli.c", "arg indices[%d]=%s", i, args->indices[i])
|
LOG_DEBUGF("cli.c", "arg indices[%d]=%s", i, args->indices[i])
|
||||||
@@ -285,3 +417,39 @@ web_args_t *web_args_create() {
|
|||||||
return args;
|
return args;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int exec_args_validate(exec_args_t *args, int argc, const char **argv) {
|
||||||
|
|
||||||
|
if (argc < 2) {
|
||||||
|
fprintf(stderr, "Required positional argument: PATH.\n");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
char *index_path = abspath(argv[1]);
|
||||||
|
if (index_path == NULL) {
|
||||||
|
fprintf(stderr, "File not found: %s\n", argv[1]);
|
||||||
|
return 1;
|
||||||
|
} else {
|
||||||
|
args->index_path = argv[1];
|
||||||
|
free(index_path);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (args->es_url == NULL) {
|
||||||
|
args->es_url = DEFAULT_ES_URL;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (args->es_index == NULL) {
|
||||||
|
args->es_index = DEFAULT_ES_INDEX;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (args->script_path == NULL) {
|
||||||
|
LOG_FATAL("cli.c", "--script-file argument is required");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (load_script(args->script_path, &args->script) != 0) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
LOG_DEBUGF("cli.c", "arg script_path=%s", args->script_path)
|
||||||
|
LOG_DEBUGF("cli.c", "arg script=%s", args->script)
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|||||||
41
src/cli.h
41
src/cli.h
@@ -3,6 +3,8 @@
|
|||||||
|
|
||||||
#include "sist.h"
|
#include "sist.h"
|
||||||
|
|
||||||
|
#include "libscan/arc/arc.h"
|
||||||
|
|
||||||
typedef struct scan_args {
|
typedef struct scan_args {
|
||||||
float quality;
|
float quality;
|
||||||
int size;
|
int size;
|
||||||
@@ -17,39 +19,72 @@ typedef struct scan_args {
|
|||||||
char *archive;
|
char *archive;
|
||||||
archive_mode_t archive_mode;
|
archive_mode_t archive_mode;
|
||||||
char *tesseract_lang;
|
char *tesseract_lang;
|
||||||
|
const char *tesseract_path;
|
||||||
|
char *exclude_regex;
|
||||||
|
int fast;
|
||||||
|
const char* treemap_threshold_str;
|
||||||
|
double treemap_threshold;
|
||||||
|
int max_memory_buffer;
|
||||||
} scan_args_t;
|
} scan_args_t;
|
||||||
|
|
||||||
scan_args_t *scan_args_create();
|
scan_args_t *scan_args_create();
|
||||||
|
|
||||||
void scan_args_destroy(scan_args_t *args);
|
void scan_args_destroy(scan_args_t *args);
|
||||||
|
|
||||||
int scan_args_validate(scan_args_t *args, int argc, const char **argv);
|
int scan_args_validate(scan_args_t *args, int argc, const char **argv);
|
||||||
|
|
||||||
typedef struct index_args {
|
typedef struct index_args {
|
||||||
char *es_url;
|
char *es_url;
|
||||||
|
char *es_index;
|
||||||
const char *index_path;
|
const char *index_path;
|
||||||
const char *script_path;
|
const char *script_path;
|
||||||
char *script;
|
char *script;
|
||||||
int print;
|
int print;
|
||||||
int batch_size;
|
int batch_size;
|
||||||
|
int async_script;
|
||||||
int force_reset;
|
int force_reset;
|
||||||
|
int threads;
|
||||||
} index_args_t;
|
} index_args_t;
|
||||||
|
|
||||||
typedef struct web_args {
|
typedef struct web_args {
|
||||||
char *es_url;
|
char *es_url;
|
||||||
char *bind;
|
char *es_index;
|
||||||
char *port;
|
char *listen_address;
|
||||||
char *credentials;
|
char *credentials;
|
||||||
char *b64credentials;
|
char *tag_credentials;
|
||||||
|
char auth_user[256];
|
||||||
|
char auth_pass[256];
|
||||||
|
int auth_enabled;
|
||||||
|
int tag_auth_enabled;
|
||||||
int index_count;
|
int index_count;
|
||||||
const char **indices;
|
const char **indices;
|
||||||
} web_args_t;
|
} web_args_t;
|
||||||
|
|
||||||
|
typedef struct exec_args {
|
||||||
|
char *es_url;
|
||||||
|
char *es_index;
|
||||||
|
const char *index_path;
|
||||||
|
const char *script_path;
|
||||||
|
int async_script;
|
||||||
|
char *script;
|
||||||
|
} exec_args_t;
|
||||||
|
|
||||||
index_args_t *index_args_create();
|
index_args_t *index_args_create();
|
||||||
|
|
||||||
void index_args_destroy(index_args_t *args);
|
void index_args_destroy(index_args_t *args);
|
||||||
|
|
||||||
web_args_t *web_args_create();
|
web_args_t *web_args_create();
|
||||||
|
|
||||||
void web_args_destroy(web_args_t *args);
|
void web_args_destroy(web_args_t *args);
|
||||||
|
|
||||||
int index_args_validate(index_args_t *args, int argc, const char **argv);
|
int index_args_validate(index_args_t *args, int argc, const char **argv);
|
||||||
|
|
||||||
int web_args_validate(web_args_t *args, int argc, const char **argv);
|
int web_args_validate(web_args_t *args, int argc, const char **argv);
|
||||||
|
|
||||||
|
exec_args_t *exec_args_create();
|
||||||
|
|
||||||
|
void exec_args_destroy(exec_args_t *args);
|
||||||
|
|
||||||
|
int exec_args_validate(exec_args_t *args, int argc, const char **argv);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
6
src/ctx.c
Normal file
6
src/ctx.c
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
#include "ctx.h"
|
||||||
|
|
||||||
|
ScanCtx_t ScanCtx;
|
||||||
|
WebCtx_t WebCtx;
|
||||||
|
IndexCtx_t IndexCtx;
|
||||||
|
LogCtx_t LogCtx;
|
||||||
69
src/ctx.h
69
src/ctx.h
@@ -2,8 +2,23 @@
|
|||||||
#define SIST2_CTX_H
|
#define SIST2_CTX_H
|
||||||
|
|
||||||
#include "sist.h"
|
#include "sist.h"
|
||||||
|
#include "tpool.h"
|
||||||
|
#include "libscan/scan.h"
|
||||||
|
#include "libscan/arc/arc.h"
|
||||||
|
#include "libscan/comic/comic.h"
|
||||||
|
#include "libscan/ebook/ebook.h"
|
||||||
|
#include "libscan/font/font.h"
|
||||||
|
#include "libscan/media/media.h"
|
||||||
|
#include "libscan/ooxml/ooxml.h"
|
||||||
|
#include "libscan/text/text.h"
|
||||||
|
#include "libscan/mobi/scan_mobi.h"
|
||||||
|
#include "libscan/raw/raw.h"
|
||||||
|
#include "src/io/store.h"
|
||||||
|
|
||||||
struct {
|
#include <glib.h>
|
||||||
|
#include <pcre.h>
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
struct index_t index;
|
struct index_t index;
|
||||||
|
|
||||||
GHashTable *mime_table;
|
GHashTable *mime_table;
|
||||||
@@ -11,14 +26,8 @@ struct {
|
|||||||
|
|
||||||
tpool_t *pool;
|
tpool_t *pool;
|
||||||
|
|
||||||
int tn_size;
|
|
||||||
int threads;
|
int threads;
|
||||||
int content_size;
|
|
||||||
float tn_qscale;
|
|
||||||
int depth;
|
int depth;
|
||||||
archive_mode_t archive_mode;
|
|
||||||
int verbose;
|
|
||||||
int very_verbose;
|
|
||||||
|
|
||||||
size_t stat_tn_size;
|
size_t stat_tn_size;
|
||||||
size_t stat_index_size;
|
size_t stat_index_size;
|
||||||
@@ -26,27 +35,51 @@ struct {
|
|||||||
GHashTable *original_table;
|
GHashTable *original_table;
|
||||||
GHashTable *copy_table;
|
GHashTable *copy_table;
|
||||||
|
|
||||||
pthread_mutex_t mupdf_mu;
|
pcre *exclude;
|
||||||
char * tesseract_lang;
|
pcre_extra *exclude_extra;
|
||||||
} ScanCtx;
|
int fast;
|
||||||
|
|
||||||
struct {
|
scan_arc_ctx_t arc_ctx;
|
||||||
|
scan_comic_ctx_t comic_ctx;
|
||||||
|
scan_ebook_ctx_t ebook_ctx;
|
||||||
|
scan_font_ctx_t font_ctx;
|
||||||
|
scan_media_ctx_t media_ctx;
|
||||||
|
scan_ooxml_ctx_t ooxml_ctx;
|
||||||
|
scan_text_ctx_t text_ctx;
|
||||||
|
scan_mobi_ctx_t mobi_ctx;
|
||||||
|
scan_raw_ctx_t raw_ctx;
|
||||||
|
} ScanCtx_t;
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
int verbose;
|
int verbose;
|
||||||
int very_verbose;
|
int very_verbose;
|
||||||
int no_color;
|
int no_color;
|
||||||
} LogCtx;
|
} LogCtx_t;
|
||||||
|
|
||||||
struct {
|
typedef struct {
|
||||||
char *es_url;
|
char *es_url;
|
||||||
|
char *es_index;
|
||||||
int batch_size;
|
int batch_size;
|
||||||
} IndexCtx;
|
tpool_t *pool;
|
||||||
|
store_t *tag_store;
|
||||||
|
GHashTable *tags;
|
||||||
|
} IndexCtx_t;
|
||||||
|
|
||||||
struct {
|
typedef struct {
|
||||||
char *es_url;
|
char *es_url;
|
||||||
|
char *es_index;
|
||||||
int index_count;
|
int index_count;
|
||||||
char *b64credentials;
|
char *auth_user;
|
||||||
struct index_t indices[16];
|
char *auth_pass;
|
||||||
} WebCtx;
|
int auth_enabled;
|
||||||
|
int tag_auth_enabled;
|
||||||
|
struct index_t indices[64];
|
||||||
|
} WebCtx_t;
|
||||||
|
|
||||||
|
extern ScanCtx_t ScanCtx;
|
||||||
|
extern WebCtx_t WebCtx;
|
||||||
|
extern IndexCtx_t IndexCtx;
|
||||||
|
extern LogCtx_t LogCtx;
|
||||||
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -1,11 +1,7 @@
|
|||||||
#include "elastic.h"
|
#include "elastic.h"
|
||||||
#include "src/ctx.h"
|
#include "src/ctx.h"
|
||||||
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include "web.h"
|
#include "web.h"
|
||||||
#include <stdio.h>
|
|
||||||
#include <string.h>
|
|
||||||
#include <cJSON/cJSON.h>
|
|
||||||
|
|
||||||
#include "static_generated.c"
|
#include "static_generated.c"
|
||||||
|
|
||||||
@@ -13,19 +9,33 @@
|
|||||||
typedef struct es_indexer {
|
typedef struct es_indexer {
|
||||||
int queued;
|
int queued;
|
||||||
char *es_url;
|
char *es_url;
|
||||||
|
char *es_index;
|
||||||
es_bulk_line_t *line_head;
|
es_bulk_line_t *line_head;
|
||||||
es_bulk_line_t *line_tail;
|
es_bulk_line_t *line_tail;
|
||||||
} es_indexer_t;
|
} es_indexer_t;
|
||||||
|
|
||||||
|
|
||||||
static es_indexer_t *Indexer;
|
static __thread es_indexer_t *Indexer;
|
||||||
|
|
||||||
|
void delete_queue(int max);
|
||||||
|
|
||||||
|
void elastic_flush();
|
||||||
|
|
||||||
|
void elastic_cleanup() {
|
||||||
|
elastic_flush();
|
||||||
|
if (Indexer != NULL) {
|
||||||
|
free(Indexer->es_index);
|
||||||
|
free(Indexer->es_url);
|
||||||
|
free(Indexer);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void print_json(cJSON *document, const char uuid_str[UUID_STR_LEN]) {
|
void print_json(cJSON *document, const char uuid_str[UUID_STR_LEN]) {
|
||||||
|
|
||||||
cJSON *line = cJSON_CreateObject();
|
cJSON *line = cJSON_CreateObject();
|
||||||
|
|
||||||
cJSON_AddStringToObject(line, "_id", uuid_str);
|
cJSON_AddStringToObject(line, "_id", uuid_str);
|
||||||
cJSON_AddStringToObject(line, "_index", "sist2");
|
cJSON_AddStringToObject(line, "_index", IndexCtx.es_index);
|
||||||
cJSON_AddStringToObject(line, "_type", "_doc");
|
cJSON_AddStringToObject(line, "_type", "_doc");
|
||||||
cJSON_AddItemReferenceToObject(line, "_source", document);
|
cJSON_AddItemReferenceToObject(line, "_source", document);
|
||||||
|
|
||||||
@@ -37,8 +47,12 @@ void print_json(cJSON *document, const char uuid_str[UUID_STR_LEN]) {
|
|||||||
cJSON_Delete(line);
|
cJSON_Delete(line);
|
||||||
}
|
}
|
||||||
|
|
||||||
void index_json(cJSON *document, const char uuid_str[UUID_STR_LEN]) {
|
void index_json_func(void *arg) {
|
||||||
|
es_bulk_line_t *line = arg;
|
||||||
|
elastic_index_line(line);
|
||||||
|
}
|
||||||
|
|
||||||
|
void index_json(cJSON *document, const char uuid_str[UUID_STR_LEN]) {
|
||||||
char *json = cJSON_PrintUnformatted(document);
|
char *json = cJSON_PrintUnformatted(document);
|
||||||
|
|
||||||
size_t json_len = strlen(json);
|
size_t json_len = strlen(json);
|
||||||
@@ -50,10 +64,14 @@ void index_json(cJSON *document, const char uuid_str[UUID_STR_LEN]) {
|
|||||||
bulk_line->next = NULL;
|
bulk_line->next = NULL;
|
||||||
|
|
||||||
cJSON_free(json);
|
cJSON_free(json);
|
||||||
elastic_index_line(bulk_line);
|
tpool_add_work(IndexCtx.pool, index_json_func, bulk_line);
|
||||||
}
|
}
|
||||||
|
|
||||||
void execute_update_script(const char *script, const char index_id[UUID_STR_LEN]) {
|
void execute_update_script(const char *script, int async, const char index_id[UUID_STR_LEN]) {
|
||||||
|
|
||||||
|
if (Indexer == NULL) {
|
||||||
|
Indexer = create_indexer(IndexCtx.es_url, IndexCtx.es_index);
|
||||||
|
}
|
||||||
|
|
||||||
cJSON *body = cJSON_CreateObject();
|
cJSON *body = cJSON_CreateObject();
|
||||||
cJSON *script_obj = cJSON_AddObjectToObject(body, "script");
|
cJSON *script_obj = cJSON_AddObjectToObject(body, "script");
|
||||||
@@ -64,12 +82,19 @@ void execute_update_script(const char *script, const char index_id[UUID_STR_LEN]
|
|||||||
cJSON *term_obj = cJSON_AddObjectToObject(query, "term");
|
cJSON *term_obj = cJSON_AddObjectToObject(query, "term");
|
||||||
cJSON_AddStringToObject(term_obj, "index", index_id);
|
cJSON_AddStringToObject(term_obj, "index", index_id);
|
||||||
|
|
||||||
char * str = cJSON_Print(body);
|
char *str = cJSON_Print(body);
|
||||||
|
|
||||||
char bulk_url[4096];
|
char bulk_url[4096];
|
||||||
snprintf(bulk_url, 4096, "%s/sist2/_update_by_query?pretty", Indexer->es_url);
|
if (async) {
|
||||||
response_t *r = web_post(bulk_url, str, "Content-Type: application/json");
|
snprintf(bulk_url, sizeof(bulk_url), "%s/%s/_update_by_query?wait_for_completion=false", Indexer->es_url,
|
||||||
LOG_INFOF("elastic.c", "Executed user script <%d>", r->status_code);
|
Indexer->es_index);
|
||||||
|
} else {
|
||||||
|
snprintf(bulk_url, sizeof(bulk_url), "%s/%s/_update_by_query", Indexer->es_url, Indexer->es_index);
|
||||||
|
}
|
||||||
|
response_t *r = web_post(bulk_url, str);
|
||||||
|
if (!async) {
|
||||||
|
LOG_INFOF("elastic.c", "Executed user script <%d>", r->status_code);
|
||||||
|
}
|
||||||
cJSON *resp = cJSON_Parse(r->body);
|
cJSON *resp = cJSON_Parse(r->body);
|
||||||
|
|
||||||
cJSON_free(str);
|
cJSON_free(str);
|
||||||
@@ -84,31 +109,39 @@ void execute_update_script(const char *script, const char index_id[UUID_STR_LEN]
|
|||||||
cJSON_free(error_str);
|
cJSON_free(error_str);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (async) {
|
||||||
|
cJSON *task = cJSON_GetObjectItem(resp, "task");
|
||||||
|
LOG_INFOF("elastic.c", "User script queued: %s/_tasks/%s", Indexer->es_url, task->valuestring);
|
||||||
|
}
|
||||||
|
|
||||||
cJSON_Delete(resp);
|
cJSON_Delete(resp);
|
||||||
}
|
}
|
||||||
|
|
||||||
void elastic_flush() {
|
void *create_bulk_buffer(int max, int *count, size_t *buf_len) {
|
||||||
|
|
||||||
if (Indexer == NULL) {
|
|
||||||
Indexer = create_indexer(IndexCtx.es_url);
|
|
||||||
}
|
|
||||||
|
|
||||||
es_bulk_line_t *line = Indexer->line_head;
|
es_bulk_line_t *line = Indexer->line_head;
|
||||||
|
*count = 0;
|
||||||
int count = 0;
|
|
||||||
|
|
||||||
size_t buf_size = 0;
|
size_t buf_size = 0;
|
||||||
size_t buf_cur = 0;
|
size_t buf_cur = 0;
|
||||||
char *buf = malloc(1);
|
char *buf = malloc(8192);
|
||||||
|
size_t buf_capacity = 8192;
|
||||||
|
|
||||||
|
while (line != NULL && *count < max) {
|
||||||
|
char action_str[256];
|
||||||
|
snprintf(
|
||||||
|
action_str, 256,
|
||||||
|
"{\"index\":{\"_id\":\"%s\",\"_type\":\"_doc\",\"_index\":\"%s\"}}\n",
|
||||||
|
line->uuid_str, Indexer->es_index
|
||||||
|
);
|
||||||
|
|
||||||
while (line != NULL) {
|
|
||||||
char action_str[512];
|
|
||||||
snprintf(action_str, 512,
|
|
||||||
"{\"index\":{\"_id\":\"%s\", \"_type\":\"_doc\", \"_index\":\"sist2\"}}\n", line->uuid_str);
|
|
||||||
size_t action_str_len = strlen(action_str);
|
size_t action_str_len = strlen(action_str);
|
||||||
|
|
||||||
size_t line_len = strlen(line->line);
|
size_t line_len = strlen(line->line);
|
||||||
buf = realloc(buf, buf_size + line_len + action_str_len);
|
|
||||||
|
while (buf_size + line_len + action_str_len > buf_capacity) {
|
||||||
|
buf_capacity *= 2;
|
||||||
|
buf = realloc(buf, buf_capacity);
|
||||||
|
}
|
||||||
|
|
||||||
buf_size += line_len + action_str_len;
|
buf_size += line_len + action_str_len;
|
||||||
|
|
||||||
memcpy(buf + buf_cur, action_str, action_str_len);
|
memcpy(buf + buf_cur, action_str, action_str_len);
|
||||||
@@ -116,50 +149,147 @@ void elastic_flush() {
|
|||||||
memcpy(buf + buf_cur, line->line, line_len);
|
memcpy(buf + buf_cur, line->line, line_len);
|
||||||
buf_cur += line_len;
|
buf_cur += line_len;
|
||||||
|
|
||||||
es_bulk_line_t *tmp = line;
|
|
||||||
line = line->next;
|
line = line->next;
|
||||||
free(tmp);
|
(*count)++;
|
||||||
count++;
|
|
||||||
}
|
|
||||||
buf = realloc(buf, buf_size + 1);
|
|
||||||
*(buf+buf_cur) = '\0';
|
|
||||||
|
|
||||||
Indexer->line_head = NULL;
|
|
||||||
Indexer->line_tail = NULL;
|
|
||||||
Indexer->queued = 0;
|
|
||||||
|
|
||||||
char bulk_url[4096];
|
|
||||||
snprintf(bulk_url, 4096, "%s/sist2/_bulk", Indexer->es_url);
|
|
||||||
response_t *r = web_post(bulk_url, buf, "Content-Type: application/x-ndjson");
|
|
||||||
|
|
||||||
if (r->status_code == 0) {
|
|
||||||
LOG_FATALF("elastic.c", "Could not connect to %s, make sure that elasticsearch is running!\n", IndexCtx.es_url)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
LOG_INFOF("elastic.c", "Indexed %3d documents (%zukB) <%d>", count, buf_cur / 1024, r->status_code);
|
if (buf_size + 1 > buf_capacity) {
|
||||||
|
buf = realloc(buf, buf_capacity + 1);
|
||||||
|
}
|
||||||
|
|
||||||
cJSON *ret_json = cJSON_Parse(r->body);
|
*(buf + buf_cur) = '\0';
|
||||||
|
|
||||||
|
*buf_len = buf_cur;
|
||||||
|
return buf;
|
||||||
|
}
|
||||||
|
|
||||||
|
void print_errors(response_t *r) {
|
||||||
|
char *tmp = malloc(r->size + 1);
|
||||||
|
memcpy(tmp, r->body, r->size);
|
||||||
|
*(tmp + r->size) = '\0';
|
||||||
|
|
||||||
|
cJSON *ret_json = cJSON_Parse(tmp);
|
||||||
if (cJSON_GetObjectItem(ret_json, "errors")->valueint != 0) {
|
if (cJSON_GetObjectItem(ret_json, "errors")->valueint != 0) {
|
||||||
cJSON *err;
|
cJSON *err;
|
||||||
cJSON_ArrayForEach(err, cJSON_GetObjectItem(ret_json, "items")) {
|
cJSON_ArrayForEach(err, cJSON_GetObjectItem(ret_json, "items")) {
|
||||||
if (cJSON_GetObjectItem(cJSON_GetObjectItem(err, "index"), "status")->valueint != 201) {
|
if (cJSON_GetObjectItem(cJSON_GetObjectItem(err, "index"), "status")->valueint != 201) {
|
||||||
char* str = cJSON_Print(err);
|
char *str = cJSON_Print(err);
|
||||||
LOG_ERRORF("elastic.c", "%s\n", str);
|
LOG_ERRORF("elastic.c", "%s\n", str);
|
||||||
cJSON_free(str);
|
cJSON_free(str);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
cJSON_Delete(ret_json);
|
cJSON_Delete(ret_json);
|
||||||
|
free(tmp);
|
||||||
|
}
|
||||||
|
|
||||||
|
void print_error(response_t *r) {
|
||||||
|
char *tmp = malloc(r->size + 1);
|
||||||
|
memcpy(tmp, r->body, r->size);
|
||||||
|
*(tmp + r->size) = '\0';
|
||||||
|
|
||||||
|
cJSON *ret_json = cJSON_Parse(tmp);
|
||||||
|
if (cJSON_GetObjectItem(ret_json, "error") != NULL) {
|
||||||
|
char *str = cJSON_Print(cJSON_GetObjectItem(ret_json, "error"));
|
||||||
|
LOG_ERRORF("elastic.c", "%s\n", str);
|
||||||
|
cJSON_free(str);
|
||||||
|
}
|
||||||
|
cJSON_Delete(ret_json);
|
||||||
|
free(tmp);
|
||||||
|
}
|
||||||
|
|
||||||
|
void _elastic_flush(int max) {
|
||||||
|
|
||||||
|
if (max == 0) {
|
||||||
|
LOG_WARNING("elastic.c", "calling _elastic_flush with 0 in queue")
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t buf_len;
|
||||||
|
int count;
|
||||||
|
void *buf = create_bulk_buffer(max, &count, &buf_len);
|
||||||
|
|
||||||
|
char bulk_url[4096];
|
||||||
|
snprintf(bulk_url, sizeof(bulk_url), "%s/%s/_bulk?pipeline=tie", Indexer->es_url, Indexer->es_index);
|
||||||
|
response_t *r = web_post(bulk_url, buf);
|
||||||
|
|
||||||
|
if (r->status_code == 0) {
|
||||||
|
LOG_FATALF("elastic.c", "Could not connect to %s, make sure that elasticsearch is running!\n", IndexCtx.es_url)
|
||||||
|
}
|
||||||
|
|
||||||
|
if (r->status_code == 413) {
|
||||||
|
|
||||||
|
if (max <= 1) {
|
||||||
|
LOG_ERRORF("elastic.c", "Single document too large, giving up: {%s}", Indexer->line_head->uuid_str)
|
||||||
|
free_response(r);
|
||||||
|
free(buf);
|
||||||
|
delete_queue(1);
|
||||||
|
if (Indexer->queued != 0) {
|
||||||
|
elastic_flush();
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
LOG_WARNINGF("elastic.c", "Payload too large, retrying (%d documents)", count);
|
||||||
|
|
||||||
|
free_response(r);
|
||||||
|
free(buf);
|
||||||
|
_elastic_flush(max / 2);
|
||||||
|
return;
|
||||||
|
|
||||||
|
} else if (r->status_code == 429) {
|
||||||
|
|
||||||
|
free_response(r);
|
||||||
|
free(buf);
|
||||||
|
LOG_WARNING("elastic.c", "Got 429 status, will retry after delay")
|
||||||
|
usleep(1000000 * 20);
|
||||||
|
_elastic_flush(max);
|
||||||
|
return;
|
||||||
|
|
||||||
|
} else if (r->status_code != 200) {
|
||||||
|
print_errors(r);
|
||||||
|
delete_queue(Indexer->queued);
|
||||||
|
|
||||||
|
} else {
|
||||||
|
|
||||||
|
print_errors(r);
|
||||||
|
LOG_INFOF("elastic.c", "Indexed %d documents (%zukB) <%d>", count, buf_len / 1024, r->status_code);
|
||||||
|
delete_queue(max);
|
||||||
|
|
||||||
|
if (Indexer->queued != 0) {
|
||||||
|
elastic_flush();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
free_response(r);
|
free_response(r);
|
||||||
free(buf);
|
free(buf);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void delete_queue(int max) {
|
||||||
|
for (int i = 0; i < max; i++) {
|
||||||
|
es_bulk_line_t *tmp = Indexer->line_head;
|
||||||
|
Indexer->line_head = tmp->next;
|
||||||
|
if (Indexer->line_head == NULL) {
|
||||||
|
Indexer->line_tail = NULL;
|
||||||
|
}
|
||||||
|
free(tmp);
|
||||||
|
Indexer->queued -= 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void elastic_flush() {
|
||||||
|
|
||||||
|
if (Indexer == NULL) {
|
||||||
|
Indexer = create_indexer(IndexCtx.es_url, IndexCtx.es_index);
|
||||||
|
}
|
||||||
|
|
||||||
|
_elastic_flush(Indexer->queued);
|
||||||
|
}
|
||||||
|
|
||||||
void elastic_index_line(es_bulk_line_t *line) {
|
void elastic_index_line(es_bulk_line_t *line) {
|
||||||
|
|
||||||
if (Indexer == NULL) {
|
if (Indexer == NULL) {
|
||||||
Indexer = create_indexer(IndexCtx.es_url);
|
Indexer = create_indexer(IndexCtx.es_url, IndexCtx.es_index);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (Indexer->line_head == NULL) {
|
if (Indexer->line_head == NULL) {
|
||||||
@@ -177,14 +307,18 @@ void elastic_index_line(es_bulk_line_t *line) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
es_indexer_t *create_indexer(const char *url) {
|
es_indexer_t *create_indexer(const char *url, const char *index) {
|
||||||
|
|
||||||
char *es_url = malloc(strlen(url) + 1);
|
char *es_url = malloc(strlen(url) + 1);
|
||||||
strcpy(es_url, url);
|
strcpy(es_url, url);
|
||||||
|
|
||||||
|
char *es_index = malloc(strlen(index) + 1);
|
||||||
|
strcpy(es_index, index);
|
||||||
|
|
||||||
es_indexer_t *indexer = malloc(sizeof(es_indexer_t));
|
es_indexer_t *indexer = malloc(sizeof(es_indexer_t));
|
||||||
|
|
||||||
indexer->es_url = es_url;
|
indexer->es_url = es_url;
|
||||||
|
indexer->es_index = es_index;
|
||||||
indexer->queued = 0;
|
indexer->queued = 0;
|
||||||
indexer->line_head = NULL;
|
indexer->line_head = NULL;
|
||||||
indexer->line_tail = NULL;
|
indexer->line_tail = NULL;
|
||||||
@@ -192,41 +326,42 @@ es_indexer_t *create_indexer(const char *url) {
|
|||||||
return indexer;
|
return indexer;
|
||||||
}
|
}
|
||||||
|
|
||||||
void destroy_indexer(char * script, char index_id[UUID_STR_LEN]) {
|
void finish_indexer(char *script, int async_script, char *index_id) {
|
||||||
|
|
||||||
char url[4096];
|
char url[4096];
|
||||||
|
|
||||||
snprintf(url, sizeof(url), "%s/sist2/_refresh", IndexCtx.es_url);
|
snprintf(url, sizeof(url), "%s/%s/_refresh", IndexCtx.es_url, IndexCtx.es_index);
|
||||||
response_t *r = web_post(url, "", NULL);
|
response_t *r = web_post(url, "");
|
||||||
LOG_INFOF("elastic.c", "Refresh index <%d>", r->status_code);
|
LOG_INFOF("elastic.c", "Refresh index <%d>", r->status_code);
|
||||||
free_response(r);
|
free_response(r);
|
||||||
|
|
||||||
if (script != NULL) {
|
if (script != NULL) {
|
||||||
execute_update_script(script, index_id);
|
execute_update_script(script, async_script, index_id);
|
||||||
|
free(script);
|
||||||
|
|
||||||
|
snprintf(url, sizeof(url), "%s/%s/_refresh", IndexCtx.es_url, IndexCtx.es_index);
|
||||||
|
r = web_post(url, "");
|
||||||
|
LOG_INFOF("elastic.c", "Refresh index <%d>", r->status_code);
|
||||||
|
free_response(r);
|
||||||
}
|
}
|
||||||
|
|
||||||
snprintf(url, sizeof(url), "%s/sist2/_refresh", IndexCtx.es_url);
|
snprintf(url, sizeof(url), "%s/%s/_forcemerge", IndexCtx.es_url, IndexCtx.es_index);
|
||||||
r = web_post(url, "", NULL);
|
r = web_post(url, "");
|
||||||
LOG_INFOF("elastic.c", "Refresh index <%d>", r->status_code);
|
|
||||||
free_response(r);
|
|
||||||
|
|
||||||
snprintf(url, sizeof(url), "%s/sist2/_forcemerge", IndexCtx.es_url);
|
|
||||||
r = web_post(url, "", NULL);
|
|
||||||
LOG_INFOF("elastic.c", "Merge index <%d>", r->status_code);
|
LOG_INFOF("elastic.c", "Merge index <%d>", r->status_code);
|
||||||
free_response(r);
|
free_response(r);
|
||||||
|
|
||||||
if (Indexer != NULL) {
|
snprintf(url, sizeof(url), "%s/%s/_settings", IndexCtx.es_url, IndexCtx.es_index);
|
||||||
free(Indexer->es_url);
|
r = web_put(url, "{\"index\":{\"refresh_interval\":\"1s\"}}");
|
||||||
free(Indexer);
|
LOG_INFOF("elastic.c", "Set refresh interval <%d>", r->status_code);
|
||||||
}
|
free_response(r);
|
||||||
}
|
}
|
||||||
|
|
||||||
void elastic_init(int force_reset) {
|
void elastic_init(int force_reset) {
|
||||||
|
|
||||||
// Check if index exists
|
// Check if index exists
|
||||||
char url[4096];
|
char url[4096];
|
||||||
snprintf(url, 4096, "%s/sist2", IndexCtx.es_url);
|
snprintf(url, sizeof(url), "%s/%s", IndexCtx.es_url, IndexCtx.es_index);
|
||||||
response_t *r = web_get(url);
|
response_t *r = web_get(url, 30);
|
||||||
int index_exists = r->status_code == 200;
|
int index_exists = r->status_code == 200;
|
||||||
free_response(r);
|
free_response(r);
|
||||||
|
|
||||||
@@ -235,28 +370,39 @@ void elastic_init(int force_reset) {
|
|||||||
LOG_INFOF("elastic.c", "Delete index <%d>", r->status_code);
|
LOG_INFOF("elastic.c", "Delete index <%d>", r->status_code);
|
||||||
free_response(r);
|
free_response(r);
|
||||||
|
|
||||||
snprintf(url, 4096, "%s/sist2", IndexCtx.es_url);
|
snprintf(url, sizeof(url), "%s/%s", IndexCtx.es_url, IndexCtx.es_index);
|
||||||
r = web_put(url, "", NULL);
|
r = web_put(url, "");
|
||||||
|
|
||||||
|
if (r->status_code != 200) {
|
||||||
|
print_error(r);
|
||||||
|
LOG_FATAL("elastic.c", "Could not create index")
|
||||||
|
}
|
||||||
|
|
||||||
LOG_INFOF("elastic.c", "Create index <%d>", r->status_code);
|
LOG_INFOF("elastic.c", "Create index <%d>", r->status_code);
|
||||||
free_response(r);
|
free_response(r);
|
||||||
|
|
||||||
snprintf(url, 4096, "%s/sist2/_close", IndexCtx.es_url);
|
snprintf(url, sizeof(url), "%s/%s/_close", IndexCtx.es_url, IndexCtx.es_index);
|
||||||
r = web_post(url, "", NULL);
|
r = web_post(url, "");
|
||||||
LOG_INFOF("elastic.c", "Close index <%d>", r->status_code);
|
LOG_INFOF("elastic.c", "Close index <%d>", r->status_code);
|
||||||
free_response(r);
|
free_response(r);
|
||||||
|
|
||||||
snprintf(url, 4096, "%s/sist2/_settings", IndexCtx.es_url);
|
snprintf(url, sizeof(url), "%s/_ingest/pipeline/tie", IndexCtx.es_url);
|
||||||
r = web_put(url, settings_json, "Content-Type: application/json");
|
r = web_put(url, pipeline_json);
|
||||||
|
LOG_INFOF("elastic.c", "Create pipeline <%d>", r->status_code);
|
||||||
|
free_response(r);
|
||||||
|
|
||||||
|
snprintf(url, sizeof(url), "%s/%s/_settings", IndexCtx.es_url, IndexCtx.es_index);
|
||||||
|
r = web_put(url, settings_json);
|
||||||
LOG_INFOF("elastic.c", "Update settings <%d>", r->status_code);
|
LOG_INFOF("elastic.c", "Update settings <%d>", r->status_code);
|
||||||
free_response(r);
|
free_response(r);
|
||||||
|
|
||||||
snprintf(url, 4096, "%s/sist2/_mappings/_doc?include_type_name=true", IndexCtx.es_url);
|
snprintf(url, sizeof(url), "%s/%s/_mappings/_doc?include_type_name=true", IndexCtx.es_url, IndexCtx.es_index);
|
||||||
r = web_put(url, mappings_json, "Content-Type: application/json");
|
r = web_put(url, mappings_json);
|
||||||
LOG_INFOF("elastic.c", "Update mappings <%d>", r->status_code);
|
LOG_INFOF("elastic.c", "Update mappings <%d>", r->status_code);
|
||||||
free_response(r);
|
free_response(r);
|
||||||
|
|
||||||
snprintf(url, 4096, "%s/sist2/_open", IndexCtx.es_url);
|
snprintf(url, sizeof(url), "%s/%s/_open", IndexCtx.es_url, IndexCtx.es_index);
|
||||||
r = web_post(url, "", NULL);
|
r = web_post(url, "");
|
||||||
LOG_INFOF("elastic.c", "Open index <%d>", r->status_code);
|
LOG_INFOF("elastic.c", "Open index <%d>", r->status_code);
|
||||||
free_response(r);
|
free_response(r);
|
||||||
}
|
}
|
||||||
@@ -264,13 +410,46 @@ void elastic_init(int force_reset) {
|
|||||||
|
|
||||||
cJSON *elastic_get_document(const char *uuid_str) {
|
cJSON *elastic_get_document(const char *uuid_str) {
|
||||||
char url[4096];
|
char url[4096];
|
||||||
snprintf(url, 4096, "%s/sist2/_doc/%s", WebCtx.es_url, uuid_str);
|
snprintf(url, sizeof(url), "%s/%s/_doc/%s", WebCtx.es_url, WebCtx.es_index, uuid_str);
|
||||||
|
|
||||||
response_t *r = web_get(url);
|
response_t *r = web_get(url, 3);
|
||||||
cJSON *json = NULL;
|
cJSON *json = NULL;
|
||||||
if (r->status_code == 200) {
|
if (r->status_code == 200) {
|
||||||
json = cJSON_Parse(r->body);
|
char *tmp = malloc(r->size + 1);
|
||||||
|
memcpy(tmp, r->body, r->size);
|
||||||
|
*(tmp + r->size) = '\0';
|
||||||
|
json = cJSON_Parse(tmp);
|
||||||
|
free(tmp);
|
||||||
}
|
}
|
||||||
free_response(r);
|
free_response(r);
|
||||||
return json;
|
return json;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
char *elastic_get_status() {
|
||||||
|
char url[4096];
|
||||||
|
snprintf(url, sizeof(url),
|
||||||
|
"%s/_cluster/state/metadata/%s?filter_path=metadata.indices.*.state", WebCtx.es_url, WebCtx.es_index);
|
||||||
|
|
||||||
|
response_t *r = web_get(url, 30);
|
||||||
|
cJSON *json = NULL;
|
||||||
|
char *status = malloc(128 * sizeof(char));
|
||||||
|
status[0] = '\0';
|
||||||
|
|
||||||
|
if (r->status_code == 200) {
|
||||||
|
char *tmp = malloc(r->size + 1);
|
||||||
|
memcpy(tmp, r->body, r->size);
|
||||||
|
*(tmp + r->size) = '\0';
|
||||||
|
json = cJSON_Parse(tmp);
|
||||||
|
free(tmp);
|
||||||
|
const cJSON *metadata = cJSON_GetObjectItem(json, "metadata");
|
||||||
|
if (metadata != NULL) {
|
||||||
|
const cJSON *indices = cJSON_GetObjectItem(metadata, "indices");
|
||||||
|
const cJSON *index = cJSON_GetObjectItem(indices, WebCtx.es_index);
|
||||||
|
const cJSON *state = cJSON_GetObjectItem(index, "state");
|
||||||
|
strcpy(status, state->valuestring);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
free_response(r);
|
||||||
|
cJSON_Delete(json);
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
|||||||
@@ -16,18 +16,21 @@ typedef struct es_indexer es_indexer_t;
|
|||||||
|
|
||||||
void elastic_index_line(es_bulk_line_t *line);
|
void elastic_index_line(es_bulk_line_t *line);
|
||||||
|
|
||||||
void elastic_flush();
|
|
||||||
|
|
||||||
void print_json(cJSON *document, const char uuid_str[UUID_STR_LEN]);
|
void print_json(cJSON *document, const char uuid_str[UUID_STR_LEN]);
|
||||||
|
|
||||||
void index_json(cJSON *document, const char uuid_str[UUID_STR_LEN]);
|
void index_json(cJSON *document, const char uuid_str[UUID_STR_LEN]);
|
||||||
|
|
||||||
es_indexer_t *create_indexer(const char* es_url);
|
es_indexer_t *create_indexer(const char *url, const char *index);
|
||||||
|
|
||||||
void destroy_indexer(char *script, char index_id[UUID_STR_LEN]);
|
void elastic_cleanup();
|
||||||
|
void finish_indexer(char *script, int async_script, char *index_id);
|
||||||
|
|
||||||
void elastic_init(int force_reset);
|
void elastic_init(int force_reset);
|
||||||
|
|
||||||
cJSON *elastic_get_document(const char *uuid_str);
|
cJSON *elastic_get_document(const char *uuid_str);
|
||||||
|
|
||||||
|
char *elastic_get_status();
|
||||||
|
|
||||||
|
void execute_update_script(const char *script, int async, const char index_id[UUID_STR_LEN]);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
File diff suppressed because one or more lines are too long
122
src/index/web.c
122
src/index/web.c
@@ -1,4 +1,11 @@
|
|||||||
#include "web.h"
|
#include "web.h"
|
||||||
|
#include "src/sist.h"
|
||||||
|
#include "src/ctx.h"
|
||||||
|
|
||||||
|
#include <mongoose.h>
|
||||||
|
#include <pthread.h>
|
||||||
|
#include <curl/curl.h>
|
||||||
|
|
||||||
|
|
||||||
size_t write_cb(char *ptr, size_t size, size_t nmemb, void *user_data) {
|
size_t write_cb(char *ptr, size_t size, size_t nmemb, void *user_data) {
|
||||||
|
|
||||||
@@ -9,11 +16,91 @@ size_t write_cb(char *ptr, size_t size, size_t nmemb, void *user_data) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void free_response(response_t *resp) {
|
void free_response(response_t *resp) {
|
||||||
free(resp->body);
|
if (resp->body != NULL) {
|
||||||
|
free(resp->body);
|
||||||
|
}
|
||||||
free(resp);
|
free(resp);
|
||||||
}
|
}
|
||||||
|
|
||||||
response_t *web_get(const char *url) {
|
void web_post_async_poll(subreq_ctx_t* req) {
|
||||||
|
fd_set fdread;
|
||||||
|
fd_set fdwrite;
|
||||||
|
fd_set fdexcep;
|
||||||
|
int maxfd = -1;
|
||||||
|
|
||||||
|
FD_ZERO(&fdread);
|
||||||
|
FD_ZERO(&fdwrite);
|
||||||
|
FD_ZERO(&fdexcep);
|
||||||
|
|
||||||
|
CURLMcode mc = curl_multi_fdset(req->multi, &fdread, &fdwrite, &fdexcep, &maxfd);
|
||||||
|
|
||||||
|
if(mc != CURLM_OK) {
|
||||||
|
req->done = TRUE;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (maxfd == -1) {
|
||||||
|
// no fds ready yet
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct timeval timeout = {1, 0};
|
||||||
|
int rc = select(maxfd + 1, &fdread, &fdwrite, &fdexcep, &timeout);
|
||||||
|
|
||||||
|
switch(rc) {
|
||||||
|
case -1:
|
||||||
|
req->done = TRUE;
|
||||||
|
break;
|
||||||
|
case 0:
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
curl_multi_perform(req->multi, &req->running_handles);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (req->running_handles == 0) {
|
||||||
|
req->done = TRUE;
|
||||||
|
req->response->body = req->response_buf.buf;
|
||||||
|
req->response->size = req->response_buf.cur;
|
||||||
|
curl_easy_getinfo(req->handle, CURLINFO_RESPONSE_CODE, &req->response->status_code);
|
||||||
|
|
||||||
|
curl_multi_cleanup(req->multi);
|
||||||
|
curl_easy_cleanup(req->handle);
|
||||||
|
curl_slist_free_all(req->headers);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
subreq_ctx_t *web_post_async(const char *url, char *data) {
|
||||||
|
subreq_ctx_t *req = calloc(1, sizeof(subreq_ctx_t));
|
||||||
|
req->response = calloc(1, sizeof(response_t));
|
||||||
|
req->data = data;
|
||||||
|
req->response_buf = dyn_buffer_create();
|
||||||
|
|
||||||
|
req->handle = curl_easy_init();
|
||||||
|
CURL *curl = req->handle;
|
||||||
|
curl_easy_setopt(curl, CURLOPT_URL, url);
|
||||||
|
curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *) (&req->response_buf));
|
||||||
|
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb);
|
||||||
|
curl_easy_setopt(curl, CURLOPT_POST, 1);
|
||||||
|
curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
|
||||||
|
|
||||||
|
struct curl_slist *headers = NULL;
|
||||||
|
headers = curl_slist_append(headers, "Content-Type: application/json");
|
||||||
|
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
|
||||||
|
|
||||||
|
curl_easy_setopt(curl, CURLOPT_POSTFIELDS, data);
|
||||||
|
|
||||||
|
req->multi = curl_multi_init();
|
||||||
|
curl_multi_add_handle(req->multi, curl);
|
||||||
|
curl_multi_perform(req->multi, &req->running_handles);
|
||||||
|
|
||||||
|
LOG_DEBUGF("web.c", "async request POST %s", url)
|
||||||
|
|
||||||
|
return req;
|
||||||
|
}
|
||||||
|
|
||||||
|
response_t *web_get(const char *url, int timeout) {
|
||||||
response_t *resp = malloc(sizeof(response_t));
|
response_t *resp = malloc(sizeof(response_t));
|
||||||
|
|
||||||
CURL *curl;
|
CURL *curl;
|
||||||
@@ -24,18 +111,24 @@ response_t *web_get(const char *url) {
|
|||||||
curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *) (&buffer));
|
curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *) (&buffer));
|
||||||
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb);
|
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb);
|
||||||
curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
|
curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
|
||||||
|
curl_easy_setopt(curl, CURLOPT_TIMEOUT, timeout);
|
||||||
|
|
||||||
|
struct curl_slist *headers = NULL;
|
||||||
|
headers = curl_slist_append(headers, "Content-Type: application/json");
|
||||||
|
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
|
||||||
|
|
||||||
curl_easy_perform(curl);
|
curl_easy_perform(curl);
|
||||||
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &resp->status_code);
|
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &resp->status_code);
|
||||||
|
|
||||||
curl_easy_cleanup(curl);
|
curl_easy_cleanup(curl);
|
||||||
|
curl_slist_free_all(headers);
|
||||||
|
|
||||||
resp->body = buffer.buf;
|
resp->body = buffer.buf;
|
||||||
resp->size = buffer.cur;
|
resp->size = buffer.cur;
|
||||||
return resp;
|
return resp;
|
||||||
}
|
}
|
||||||
|
|
||||||
response_t *web_post(const char *url, const char *data, const char *header) {
|
response_t *web_post(const char *url, const char *data) {
|
||||||
|
|
||||||
response_t *resp = malloc(sizeof(response_t));
|
response_t *resp = malloc(sizeof(response_t));
|
||||||
|
|
||||||
@@ -50,10 +143,8 @@ response_t *web_post(const char *url, const char *data, const char *header) {
|
|||||||
curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
|
curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
|
||||||
|
|
||||||
struct curl_slist *headers = NULL;
|
struct curl_slist *headers = NULL;
|
||||||
if (header != NULL) {
|
headers = curl_slist_append(headers, "Content-Type: application/json");
|
||||||
headers = curl_slist_append(headers, header);
|
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
|
||||||
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
|
|
||||||
}
|
|
||||||
|
|
||||||
curl_easy_setopt(curl, CURLOPT_POSTFIELDS, data);
|
curl_easy_setopt(curl, CURLOPT_POSTFIELDS, data);
|
||||||
|
|
||||||
@@ -70,7 +161,7 @@ response_t *web_post(const char *url, const char *data, const char *header) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
response_t *web_put(const char *url, const char *data, const char *header) {
|
response_t *web_put(const char *url, const char *data) {
|
||||||
|
|
||||||
response_t *resp = malloc(sizeof(response_t));
|
response_t *resp = malloc(sizeof(response_t));
|
||||||
|
|
||||||
@@ -86,11 +177,9 @@ response_t *web_put(const char *url, const char *data, const char *header) {
|
|||||||
curl_easy_setopt(curl, CURLOPT_DNS_USE_GLOBAL_CACHE, 0);
|
curl_easy_setopt(curl, CURLOPT_DNS_USE_GLOBAL_CACHE, 0);
|
||||||
curl_easy_setopt(curl, CURLOPT_IPRESOLVE, CURLOPT_DNS_LOCAL_IP4 );
|
curl_easy_setopt(curl, CURLOPT_IPRESOLVE, CURLOPT_DNS_LOCAL_IP4 );
|
||||||
|
|
||||||
if (header != NULL) {
|
struct curl_slist *headers = NULL;
|
||||||
struct curl_slist *headers = NULL;
|
headers = curl_slist_append(headers, "Content-Type: application/json");
|
||||||
headers = curl_slist_append(headers, header);
|
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
|
||||||
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
|
|
||||||
}
|
|
||||||
|
|
||||||
curl_easy_setopt(curl, CURLOPT_POSTFIELDS, data);
|
curl_easy_setopt(curl, CURLOPT_POSTFIELDS, data);
|
||||||
|
|
||||||
@@ -98,6 +187,7 @@ response_t *web_put(const char *url, const char *data, const char *header) {
|
|||||||
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &resp->status_code);
|
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &resp->status_code);
|
||||||
|
|
||||||
curl_easy_cleanup(curl);
|
curl_easy_cleanup(curl);
|
||||||
|
curl_slist_free_all(headers);
|
||||||
|
|
||||||
resp->body = buffer.buf;
|
resp->body = buffer.buf;
|
||||||
resp->size = buffer.cur;
|
resp->size = buffer.cur;
|
||||||
@@ -119,13 +209,17 @@ response_t *web_delete(const char *url) {
|
|||||||
curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
|
curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
|
||||||
|
|
||||||
curl_easy_setopt(curl, CURLOPT_POSTFIELDS, "");
|
curl_easy_setopt(curl, CURLOPT_POSTFIELDS, "");
|
||||||
|
struct curl_slist *headers = NULL;
|
||||||
|
headers = curl_slist_append(headers, "Content-Type: application/json");
|
||||||
|
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
|
||||||
|
|
||||||
curl_easy_perform(curl);
|
curl_easy_perform(curl);
|
||||||
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &resp->status_code);
|
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &resp->status_code);
|
||||||
|
|
||||||
curl_easy_cleanup(curl);
|
curl_easy_cleanup(curl);
|
||||||
|
curl_slist_free_all(headers);
|
||||||
|
|
||||||
resp->body = buffer.buf;
|
resp->body = buffer.buf;
|
||||||
resp->size = buffer.cur;
|
resp->size = buffer.cur;
|
||||||
return resp;
|
return resp;
|
||||||
}
|
}
|
||||||
@@ -2,6 +2,8 @@
|
|||||||
#define SIST2_WEB_H
|
#define SIST2_WEB_H
|
||||||
|
|
||||||
#include "src/sist.h"
|
#include "src/sist.h"
|
||||||
|
#include <mongoose.h>
|
||||||
|
#include <curl/curl.h>
|
||||||
|
|
||||||
typedef struct response {
|
typedef struct response {
|
||||||
char *body;
|
char *body;
|
||||||
@@ -9,9 +11,27 @@ typedef struct response {
|
|||||||
int status_code;
|
int status_code;
|
||||||
} response_t;
|
} response_t;
|
||||||
|
|
||||||
response_t *web_get(const char *url);
|
typedef struct {
|
||||||
response_t *web_post(const char * url, const char * data, const char* header);
|
response_t *resp;
|
||||||
response_t *web_put(const char *url, const char *data, const char *header);
|
int done;
|
||||||
|
} http_ev_data_t;
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
char* data;
|
||||||
|
dyn_buffer_t response_buf;
|
||||||
|
struct curl_slist *headers;
|
||||||
|
CURL *handle;
|
||||||
|
CURLM *multi;
|
||||||
|
response_t *response;
|
||||||
|
int running_handles;
|
||||||
|
int done;
|
||||||
|
} subreq_ctx_t;
|
||||||
|
|
||||||
|
response_t *web_get(const char *url, int timeout);
|
||||||
|
response_t *web_post(const char * url, const char * data);
|
||||||
|
void web_post_async_poll(subreq_ctx_t* req);
|
||||||
|
subreq_ctx_t *web_post_async(const char *url, char *data);
|
||||||
|
response_t *web_put(const char *url, const char *data);
|
||||||
response_t *web_delete(const char *url);
|
response_t *web_delete(const char *url);
|
||||||
|
|
||||||
void free_response(response_t *resp);
|
void free_response(response_t *resp);
|
||||||
|
|||||||
@@ -1,5 +1,7 @@
|
|||||||
#include "src/ctx.h"
|
#include "src/ctx.h"
|
||||||
#include "serialize.h"
|
#include "serialize.h"
|
||||||
|
#include "src/parsing/parse.h"
|
||||||
|
#include "src/parsing/mime.h"
|
||||||
|
|
||||||
static __thread int index_fd = -1;
|
static __thread int index_fd = -1;
|
||||||
|
|
||||||
@@ -39,11 +41,14 @@ void write_index_descriptor(char *path, index_descriptor_t *desc) {
|
|||||||
cJSON_AddNumberToObject(json, "timestamp", (double) desc->timestamp);
|
cJSON_AddNumberToObject(json, "timestamp", (double) desc->timestamp);
|
||||||
|
|
||||||
int fd = open(path, O_CREAT | O_WRONLY, S_IRUSR | S_IWUSR);
|
int fd = open(path, O_CREAT | O_WRONLY, S_IRUSR | S_IWUSR);
|
||||||
if (fd == -1) {
|
if (fd < 0) {
|
||||||
perror(path);
|
LOG_FATALF("serialize.c", "Could not open index descriptor: %s", strerror(errno));
|
||||||
}
|
}
|
||||||
char *str = cJSON_Print(json);
|
char *str = cJSON_Print(json);
|
||||||
write(fd, str, strlen(str));
|
int ret = write(fd, str, strlen(str));
|
||||||
|
if (ret == -1) {
|
||||||
|
LOG_FATALF("serialize.c", "Could not write index descriptor: %s", strerror(errno));
|
||||||
|
}
|
||||||
free(str);
|
free(str);
|
||||||
close(fd);
|
close(fd);
|
||||||
|
|
||||||
@@ -57,11 +62,14 @@ index_descriptor_t read_index_descriptor(char *path) {
|
|||||||
int fd = open(path, O_RDONLY);
|
int fd = open(path, O_RDONLY);
|
||||||
|
|
||||||
if (fd == -1) {
|
if (fd == -1) {
|
||||||
LOG_FATAL("serialize.c", "Invalid/corrupt index (Could not find descriptor)\n")
|
LOG_FATALF("serialize.c", "Invalid/corrupt index (Could not find descriptor): %s: %s\n", path, strerror(errno))
|
||||||
}
|
}
|
||||||
|
|
||||||
char *buf = malloc(info.st_size + 1);
|
char *buf = malloc(info.st_size + 1);
|
||||||
read(fd, buf, info.st_size);
|
int ret = read(fd, buf, info.st_size);
|
||||||
|
if (ret == -1) {
|
||||||
|
LOG_FATALF("serialize.c", "Could not read index descriptor: %s", strerror(errno));
|
||||||
|
}
|
||||||
*(buf + info.st_size) = '\0';
|
*(buf + info.st_size) = '\0';
|
||||||
close(fd);
|
close(fd);
|
||||||
|
|
||||||
@@ -118,6 +126,32 @@ char *get_meta_key_text(enum metakey meta_key) {
|
|||||||
return "font_name";
|
return "font_name";
|
||||||
case MetaParent:
|
case MetaParent:
|
||||||
return "parent";
|
return "parent";
|
||||||
|
case MetaExifMake:
|
||||||
|
return "exif_make";
|
||||||
|
case MetaExifSoftware:
|
||||||
|
return "exif_software";
|
||||||
|
case MetaExifExposureTime:
|
||||||
|
return "exif_exposure_time";
|
||||||
|
case MetaExifFNumber:
|
||||||
|
return "exif_fnumber";
|
||||||
|
case MetaExifFocalLength:
|
||||||
|
return "exif_focal_length";
|
||||||
|
case MetaExifUserComment:
|
||||||
|
return "exif_user_comment";
|
||||||
|
case MetaExifIsoSpeedRatings:
|
||||||
|
return "exif_iso_speed_ratings";
|
||||||
|
case MetaExifModel:
|
||||||
|
return "exif_model";
|
||||||
|
case MetaExifDateTime:
|
||||||
|
return "exif_datetime";
|
||||||
|
case MetaAuthor:
|
||||||
|
return "author";
|
||||||
|
case MetaModifiedBy:
|
||||||
|
return "modified_by";
|
||||||
|
case MetaThumbnail:
|
||||||
|
return "thumbnail";
|
||||||
|
case MetaPages:
|
||||||
|
return "pages";
|
||||||
default:
|
default:
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
@@ -140,8 +174,8 @@ void write_document(document_t *doc) {
|
|||||||
dyn_buffer_t buf = dyn_buffer_create();
|
dyn_buffer_t buf = dyn_buffer_create();
|
||||||
|
|
||||||
// Ignore root directory in the file path
|
// Ignore root directory in the file path
|
||||||
doc->ext = doc->ext - ScanCtx.index.desc.root_len;
|
doc->ext = (short) (doc->ext - ScanCtx.index.desc.root_len);
|
||||||
doc->base = doc->base - ScanCtx.index.desc.root_len;
|
doc->base = (short) (doc->base - ScanCtx.index.desc.root_len);
|
||||||
doc->filepath += ScanCtx.index.desc.root_len;
|
doc->filepath += ScanCtx.index.desc.root_len;
|
||||||
|
|
||||||
dyn_buffer_write(&buf, doc, sizeof(line_t));
|
dyn_buffer_write(&buf, doc, sizeof(line_t));
|
||||||
@@ -152,11 +186,11 @@ void write_document(document_t *doc) {
|
|||||||
dyn_buffer_write_char(&buf, meta->key);
|
dyn_buffer_write_char(&buf, meta->key);
|
||||||
|
|
||||||
if (IS_META_INT(meta->key)) {
|
if (IS_META_INT(meta->key)) {
|
||||||
dyn_buffer_write_int(&buf, meta->intval);
|
dyn_buffer_write_int(&buf, meta->int_val);
|
||||||
} else if (IS_META_LONG(meta->key)) {
|
} else if (IS_META_LONG(meta->key)) {
|
||||||
dyn_buffer_write_long(&buf, meta->longval);
|
dyn_buffer_write_long(&buf, meta->long_val);
|
||||||
} else {
|
} else {
|
||||||
dyn_buffer_write_str(&buf, meta->strval);
|
dyn_buffer_write_str(&buf, meta->str_val);
|
||||||
}
|
}
|
||||||
|
|
||||||
meta_line_t *tmp = meta;
|
meta_line_t *tmp = meta;
|
||||||
@@ -167,7 +201,7 @@ void write_document(document_t *doc) {
|
|||||||
|
|
||||||
int res = write(index_fd, buf.buf, buf.cur);
|
int res = write(index_fd, buf.buf, buf.cur);
|
||||||
if (res == -1) {
|
if (res == -1) {
|
||||||
perror("write");
|
LOG_FATALF("serialize.c", "Could not write document: %s", strerror(errno))
|
||||||
}
|
}
|
||||||
ScanCtx.stat_index_size += buf.cur;
|
ScanCtx.stat_index_size += buf.cur;
|
||||||
dyn_buffer_destroy(&buf);
|
dyn_buffer_destroy(&buf);
|
||||||
@@ -175,6 +209,8 @@ void write_document(document_t *doc) {
|
|||||||
|
|
||||||
void thread_cleanup() {
|
void thread_cleanup() {
|
||||||
close(index_fd);
|
close(index_fd);
|
||||||
|
cleanup_parse();
|
||||||
|
cleanup_font();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -185,7 +221,7 @@ void read_index_bin(const char *path, const char *index_id, index_func func) {
|
|||||||
FILE *file = fopen(path, "rb");
|
FILE *file = fopen(path, "rb");
|
||||||
while (1) {
|
while (1) {
|
||||||
buf.cur = 0;
|
buf.cur = 0;
|
||||||
fread((void *) &line, 1, sizeof(line_t), file);
|
size_t _ = fread((void *) &line, 1, sizeof(line_t), file);
|
||||||
if (feof(file)) {
|
if (feof(file)) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@@ -196,58 +232,71 @@ void read_index_bin(const char *path, const char *index_id, index_func func) {
|
|||||||
char uuid_str[UUID_STR_LEN];
|
char uuid_str[UUID_STR_LEN];
|
||||||
uuid_unparse(line.uuid, uuid_str);
|
uuid_unparse(line.uuid, uuid_str);
|
||||||
|
|
||||||
cJSON_AddStringToObject(document, "mime", mime_get_mime_text(line.mime));
|
const char *mime_text = mime_get_mime_text(line.mime);
|
||||||
|
if (mime_text == NULL) {
|
||||||
|
cJSON_AddNullToObject(document, "mime");
|
||||||
|
} else {
|
||||||
|
cJSON_AddStringToObject(document, "mime", mime_get_mime_text(line.mime));
|
||||||
|
}
|
||||||
cJSON_AddNumberToObject(document, "size", (double) line.size);
|
cJSON_AddNumberToObject(document, "size", (double) line.size);
|
||||||
cJSON_AddNumberToObject(document, "mtime", line.mtime);
|
cJSON_AddNumberToObject(document, "mtime", line.mtime);
|
||||||
|
|
||||||
int c;
|
int c = 0;
|
||||||
while ((c = getc(file)) != 0) {
|
while ((c = getc(file)) != 0) {
|
||||||
dyn_buffer_write_char(&buf, (char) c);
|
dyn_buffer_write_char(&buf, (char) c);
|
||||||
}
|
}
|
||||||
dyn_buffer_write_char(&buf, '\0');
|
dyn_buffer_write_char(&buf, '\0');
|
||||||
|
|
||||||
|
if (IndexCtx.tags != NULL) {
|
||||||
|
const char *tags_string = g_hash_table_lookup(IndexCtx.tags, buf.buf);
|
||||||
|
if (tags_string != NULL) {
|
||||||
|
cJSON *tags_arr = cJSON_Parse(tags_string);
|
||||||
|
cJSON_AddItemToObject(document, "tag", tags_arr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
cJSON_AddStringToObject(document, "extension", buf.buf + line.ext);
|
cJSON_AddStringToObject(document, "extension", buf.buf + line.ext);
|
||||||
if (*(buf.buf + line.ext - 1) == '.') {
|
if (*(buf.buf + line.ext - 1) == '.') {
|
||||||
*(buf.buf + line.ext - 1) = '\0';
|
*(buf.buf + line.ext - 1) = '\0';
|
||||||
} else {
|
} else {
|
||||||
*(buf.buf + line.ext) = '\0';
|
*(buf.buf + line.ext) = '\0';
|
||||||
}
|
}
|
||||||
cJSON_AddStringToObject(document, "name", buf.buf + line.base);
|
|
||||||
|
char tmp[PATH_MAX * 3];
|
||||||
|
|
||||||
|
str_escape(tmp, buf.buf + line.base);
|
||||||
|
cJSON_AddStringToObject(document, "name", tmp);
|
||||||
|
|
||||||
if (line.base > 0) {
|
if (line.base > 0) {
|
||||||
*(buf.buf + line.base - 1) = '\0';
|
*(buf.buf + line.base - 1) = '\0';
|
||||||
cJSON_AddStringToObject(document, "path", buf.buf);
|
|
||||||
|
str_escape(tmp, buf.buf);
|
||||||
|
cJSON_AddStringToObject(document, "path", tmp);
|
||||||
} else {
|
} else {
|
||||||
cJSON_AddStringToObject(document, "path", "");
|
cJSON_AddStringToObject(document, "path", "");
|
||||||
}
|
}
|
||||||
|
|
||||||
enum metakey key = getc(file);
|
enum metakey key = getc(file);
|
||||||
|
size_t ret = 0;
|
||||||
while (key != '\n') {
|
while (key != '\n') {
|
||||||
switch (key) {
|
switch (key) {
|
||||||
|
case MetaPages:
|
||||||
case MetaWidth:
|
case MetaWidth:
|
||||||
case MetaHeight: {
|
case MetaHeight: {
|
||||||
int value;
|
int value;
|
||||||
fread(&value, sizeof(int), 1, file);
|
ret = fread(&value, sizeof(int), 1, file);
|
||||||
cJSON_AddNumberToObject(document, get_meta_key_text(key), value);
|
cJSON_AddNumberToObject(document, get_meta_key_text(key), value);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case MetaMediaDuration:
|
case MetaMediaDuration:
|
||||||
case MetaMediaBitrate: {
|
case MetaMediaBitrate: {
|
||||||
long value;
|
long value;
|
||||||
fread(&value, sizeof(long), 1, file);
|
ret = fread(&value, sizeof(long), 1, file);
|
||||||
cJSON_AddNumberToObject(document, get_meta_key_text(key), (double) value);
|
cJSON_AddNumberToObject(document, get_meta_key_text(key), (double) value);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case MetaMediaAudioCodec:
|
case MetaMediaAudioCodec:
|
||||||
case MetaMediaVideoCodec: {
|
case MetaMediaVideoCodec:
|
||||||
int value;
|
|
||||||
fread(&value, sizeof(int), 1, file);
|
|
||||||
const AVCodecDescriptor *desc = avcodec_descriptor_get(value);
|
|
||||||
if (desc != NULL) {
|
|
||||||
cJSON_AddStringToObject(document, get_meta_key_text(key), desc->name);
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
case MetaContent:
|
case MetaContent:
|
||||||
case MetaArtist:
|
case MetaArtist:
|
||||||
case MetaAlbum:
|
case MetaAlbum:
|
||||||
@@ -255,6 +304,18 @@ void read_index_bin(const char *path, const char *index_id, index_func func) {
|
|||||||
case MetaGenre:
|
case MetaGenre:
|
||||||
case MetaFontName:
|
case MetaFontName:
|
||||||
case MetaParent:
|
case MetaParent:
|
||||||
|
case MetaExifMake:
|
||||||
|
case MetaExifSoftware:
|
||||||
|
case MetaExifExposureTime:
|
||||||
|
case MetaExifFNumber:
|
||||||
|
case MetaExifFocalLength:
|
||||||
|
case MetaExifUserComment:
|
||||||
|
case MetaExifIsoSpeedRatings:
|
||||||
|
case MetaExifDateTime:
|
||||||
|
case MetaExifModel:
|
||||||
|
case MetaAuthor:
|
||||||
|
case MetaModifiedBy:
|
||||||
|
case MetaThumbnail:
|
||||||
case MetaTitle: {
|
case MetaTitle: {
|
||||||
buf.cur = 0;
|
buf.cur = 0;
|
||||||
while ((c = getc(file)) != 0) {
|
while ((c = getc(file)) != 0) {
|
||||||
@@ -302,7 +363,7 @@ void read_index_json(const char *path, UNUSED(const char *index_id), index_func
|
|||||||
char *line = NULL;
|
char *line = NULL;
|
||||||
size_t len;
|
size_t len;
|
||||||
size_t read = getline(&line, &len, file);
|
size_t read = getline(&line, &len, file);
|
||||||
if (read == -1) {
|
if (read < 0) {
|
||||||
if (line) {
|
if (line) {
|
||||||
free(line);
|
free(line);
|
||||||
}
|
}
|
||||||
@@ -368,8 +429,8 @@ void incremental_read(GHashTable *table, const char *filepath) {
|
|||||||
line_t line;
|
line_t line;
|
||||||
|
|
||||||
while (1) {
|
while (1) {
|
||||||
fread((void *) &line, 1, sizeof(line_t), file);
|
size_t ret = fread((void *) &line, 1, sizeof(line_t), file);
|
||||||
if (feof(file)) {
|
if (ret != 1 || feof(file)) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -392,8 +453,8 @@ void incremental_copy(store_t *store, store_t *dst_store, const char *filepath,
|
|||||||
line_t line;
|
line_t line;
|
||||||
|
|
||||||
while (1) {
|
while (1) {
|
||||||
fread((void *) &line, 1, sizeof(line_t), file);
|
size_t ret = fread((void *) &line, 1, sizeof(line_t), file);
|
||||||
if (feof(file)) {
|
if (ret != 1 || feof(file)) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -421,11 +482,11 @@ void incremental_copy(store_t *store, store_t *dst_store, const char *filepath,
|
|||||||
|
|
||||||
if (IS_META_INT(key)) {
|
if (IS_META_INT(key)) {
|
||||||
int val;
|
int val;
|
||||||
fread(&val, sizeof(val), 1, file);
|
ret = fread(&val, sizeof(val), 1, file);
|
||||||
fwrite(&val, sizeof(val), 1, dst_file);
|
fwrite(&val, sizeof(val), 1, dst_file);
|
||||||
} else if (IS_META_LONG(key)) {
|
} else if (IS_META_LONG(key)) {
|
||||||
long val;
|
long val;
|
||||||
fread(&val, sizeof(val), 1, file);
|
ret = fread(&val, sizeof(val), 1, file);
|
||||||
fwrite(&val, sizeof(val), 1, dst_file);
|
fwrite(&val, sizeof(val), 1, dst_file);
|
||||||
} else {
|
} else {
|
||||||
while ((c = (char) getc(file))) {
|
while ((c = (char) getc(file))) {
|
||||||
@@ -433,6 +494,10 @@ void incremental_copy(store_t *store, store_t *dst_store, const char *filepath,
|
|||||||
}
|
}
|
||||||
fwrite("\0", sizeof(c), 1, dst_file);
|
fwrite("\0", sizeof(c), 1, dst_file);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (ret != 1) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
skip_meta(file);
|
skip_meta(file);
|
||||||
|
|||||||
@@ -2,7 +2,10 @@
|
|||||||
#define SIST2_SERIALIZE_H
|
#define SIST2_SERIALIZE_H
|
||||||
|
|
||||||
#include "src/sist.h"
|
#include "src/sist.h"
|
||||||
|
#include "store.h"
|
||||||
|
|
||||||
#include <sys/syscall.h>
|
#include <sys/syscall.h>
|
||||||
|
#include <glib.h>
|
||||||
|
|
||||||
typedef void(*index_func)(cJSON *, const char[UUID_STR_LEN]);
|
typedef void(*index_func)(cJSON *, const char[UUID_STR_LEN]);
|
||||||
|
|
||||||
|
|||||||
@@ -1,9 +1,10 @@
|
|||||||
#include "store.h"
|
#include "store.h"
|
||||||
#include "src/ctx.h"
|
#include "src/ctx.h"
|
||||||
|
|
||||||
store_t *store_create(char *path) {
|
store_t *store_create(char *path, size_t chunk_size) {
|
||||||
|
|
||||||
store_t *store = malloc(sizeof(struct store_t));
|
store_t *store = malloc(sizeof(struct store_t));
|
||||||
|
store->chunk_size = chunk_size;
|
||||||
pthread_rwlock_init(&store->lock, NULL);
|
pthread_rwlock_init(&store->lock, NULL);
|
||||||
|
|
||||||
mdb_env_create(&store->env);
|
mdb_env_create(&store->env);
|
||||||
@@ -15,11 +16,10 @@ store_t *store_create(char *path) {
|
|||||||
);
|
);
|
||||||
|
|
||||||
if (open_ret != 0) {
|
if (open_ret != 0) {
|
||||||
fprintf(stderr, "Error while opening store: %s (%s)\n", mdb_strerror(open_ret), path);
|
LOG_FATALF("store.c", "Error while opening store: %s (%s)\n", mdb_strerror(open_ret), path)
|
||||||
exit(1);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
store->size = (size_t) 1024 * 1024 * 5;
|
store->size = (size_t) store->chunk_size;
|
||||||
ScanCtx.stat_tn_size = 0;
|
ScanCtx.stat_tn_size = 0;
|
||||||
mdb_env_set_mapsize(store->env, store->size);
|
mdb_env_set_mapsize(store->env, store->size);
|
||||||
|
|
||||||
@@ -70,7 +70,7 @@ void store_write(store_t *store, char *key, size_t key_len, char *buf, size_t bu
|
|||||||
// Cannot resize when there is a opened transaction.
|
// Cannot resize when there is a opened transaction.
|
||||||
// Resize take effect on the next commit.
|
// Resize take effect on the next commit.
|
||||||
pthread_rwlock_wrlock(&store->lock);
|
pthread_rwlock_wrlock(&store->lock);
|
||||||
store->size += 1024 * 1024 * 50;
|
store->size += store->chunk_size;
|
||||||
mdb_env_set_mapsize(store->env, store->size);
|
mdb_env_set_mapsize(store->env, store->size);
|
||||||
mdb_txn_begin(store->env, NULL, 0, &txn);
|
mdb_txn_begin(store->env, NULL, 0, &txn);
|
||||||
put_ret = mdb_put(txn, store->dbi, &mdb_key, &mdb_value, 0);
|
put_ret = mdb_put(txn, store->dbi, &mdb_key, &mdb_value, 0);
|
||||||
@@ -82,7 +82,7 @@ void store_write(store_t *store, char *key, size_t key_len, char *buf, size_t bu
|
|||||||
pthread_rwlock_unlock(&store->lock);
|
pthread_rwlock_unlock(&store->lock);
|
||||||
|
|
||||||
if (put_ret != 0) {
|
if (put_ret != 0) {
|
||||||
printf("%s\n", mdb_strerror(put_ret));
|
LOG_ERROR("store.c", mdb_strerror(put_ret))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -111,3 +111,40 @@ char *store_read(store_t *store, char *key, size_t key_len, size_t *ret_vallen)
|
|||||||
return buf;
|
return buf;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
GHashTable *store_read_all(store_t *store) {
|
||||||
|
|
||||||
|
int count = 0;
|
||||||
|
|
||||||
|
GHashTable *table = g_hash_table_new_full(g_str_hash, g_str_equal, free, free);
|
||||||
|
|
||||||
|
MDB_txn *txn = NULL;
|
||||||
|
mdb_txn_begin(store->env, NULL, MDB_RDONLY, &txn);
|
||||||
|
|
||||||
|
MDB_cursor *cur = NULL;
|
||||||
|
mdb_cursor_open(txn, store->dbi, &cur);
|
||||||
|
|
||||||
|
MDB_val key;
|
||||||
|
MDB_val value;
|
||||||
|
|
||||||
|
while (mdb_cursor_get(cur, &key, &value, MDB_NEXT) == 0) {
|
||||||
|
char *key_str = malloc(key.mv_size);
|
||||||
|
memcpy(key_str, key.mv_data, key.mv_size);
|
||||||
|
char *val_str = malloc(value.mv_size);
|
||||||
|
memcpy(val_str, value.mv_data, value.mv_size);
|
||||||
|
|
||||||
|
g_hash_table_insert(table, key_str, val_str);
|
||||||
|
count += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
LOG_DEBUGF("store.c", "Read tags for %d documents", count);
|
||||||
|
|
||||||
|
mdb_cursor_close(cur);
|
||||||
|
mdb_txn_abort(txn);
|
||||||
|
return table;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void store_copy(store_t *store, const char *destination) {
|
||||||
|
mkdir(destination, S_IWUSR | S_IRUSR | S_IXUSR);
|
||||||
|
mdb_env_copy(store->env, destination);
|
||||||
|
}
|
||||||
|
|||||||
@@ -4,16 +4,20 @@
|
|||||||
#include <pthread.h>
|
#include <pthread.h>
|
||||||
#include <lmdb.h>
|
#include <lmdb.h>
|
||||||
|
|
||||||
|
#include <glib.h>
|
||||||
|
|
||||||
|
#define STORE_SIZE_TN 1024 * 1024 * 5
|
||||||
|
#define STORE_SIZE_TAG 1024 * 16
|
||||||
|
|
||||||
typedef struct store_t {
|
typedef struct store_t {
|
||||||
MDB_dbi dbi;
|
MDB_dbi dbi;
|
||||||
MDB_env *env;
|
MDB_env *env;
|
||||||
size_t size;
|
size_t size;
|
||||||
|
size_t chunk_size;
|
||||||
pthread_rwlock_t lock;
|
pthread_rwlock_t lock;
|
||||||
} store_t;
|
} store_t;
|
||||||
|
|
||||||
#include "src/sist.h"
|
store_t *store_create(char *path, size_t chunk_size);
|
||||||
|
|
||||||
store_t *store_create(char *path);
|
|
||||||
|
|
||||||
void store_destroy(store_t *store);
|
void store_destroy(store_t *store);
|
||||||
|
|
||||||
@@ -21,4 +25,8 @@ void store_write(store_t *store, char *key, size_t key_len, char *buf, size_t bu
|
|||||||
|
|
||||||
char *store_read(store_t *store, char *key, size_t key_len, size_t *ret_vallen);
|
char *store_read(store_t *store, char *key, size_t key_len, size_t *ret_vallen);
|
||||||
|
|
||||||
|
GHashTable *store_read_all(store_t *store);
|
||||||
|
|
||||||
|
void store_copy(store_t *store, const char *destination);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -1,5 +1,8 @@
|
|||||||
#include "walk.h"
|
#include "walk.h"
|
||||||
#include "src/ctx.h"
|
#include "src/ctx.h"
|
||||||
|
#include "src/parsing/parse.h"
|
||||||
|
|
||||||
|
#include <ftw.h>
|
||||||
|
|
||||||
__always_inline
|
__always_inline
|
||||||
parse_job_t *create_fs_parse_job(const char *filepath, const struct stat *info, int base) {
|
parse_job_t *create_fs_parse_job(const char *filepath, const struct stat *info, int base) {
|
||||||
@@ -15,12 +18,13 @@ parse_job_t *create_fs_parse_job(const char *filepath, const struct stat *info,
|
|||||||
job->ext = len;
|
job->ext = len;
|
||||||
}
|
}
|
||||||
|
|
||||||
job->info = *info;
|
job->vfile.info = *info;
|
||||||
|
|
||||||
memset(job->parent, 0, 16);
|
memset(job->parent, 0, 16);
|
||||||
|
|
||||||
job->vfile.filepath = job->filepath;
|
job->vfile.filepath = job->filepath;
|
||||||
job->vfile.read = fs_read;
|
job->vfile.read = fs_read;
|
||||||
|
job->vfile.reset = fs_reset;
|
||||||
job->vfile.close = fs_close;
|
job->vfile.close = fs_close;
|
||||||
job->vfile.fd = -1;
|
job->vfile.fd = -1;
|
||||||
job->vfile.is_fs_file = TRUE;
|
job->vfile.is_fs_file = TRUE;
|
||||||
@@ -28,8 +32,18 @@ parse_job_t *create_fs_parse_job(const char *filepath, const struct stat *info,
|
|||||||
return job;
|
return job;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int sub_strings[30];
|
||||||
|
#define EXCLUDED(str) (pcre_exec(ScanCtx.exclude, ScanCtx.exclude_extra, filepath, strlen(filepath), 0, 0, sub_strings, sizeof(sub_strings)) >= 0)
|
||||||
|
|
||||||
int handle_entry(const char *filepath, const struct stat *info, int typeflag, struct FTW *ftw) {
|
int handle_entry(const char *filepath, const struct stat *info, int typeflag, struct FTW *ftw) {
|
||||||
if (ftw->level <= ScanCtx.depth && typeflag == FTW_F && S_ISREG(info->st_mode)) {
|
|
||||||
|
if (typeflag == FTW_F && S_ISREG(info->st_mode) && ftw->level <= ScanCtx.depth) {
|
||||||
|
|
||||||
|
if (ScanCtx.exclude != NULL && EXCLUDED(filepath)) {
|
||||||
|
LOG_DEBUGF("walk.c", "Excluded: %s", filepath)
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
parse_job_t *job = create_fs_parse_job(filepath, info, ftw->base);
|
parse_job_t *job = create_fs_parse_job(filepath, info, ftw->base);
|
||||||
tpool_add_work(ScanCtx.pool, parse, job);
|
tpool_add_work(ScanCtx.pool, parse, job);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -3,8 +3,6 @@
|
|||||||
|
|
||||||
#define _XOPEN_SOURCE 500
|
#define _XOPEN_SOURCE 500
|
||||||
|
|
||||||
#include "src/sist.h"
|
|
||||||
|
|
||||||
int walk_directory_tree(const char *);
|
int walk_directory_tree(const char *);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
38
src/log.c
38
src/log.c
@@ -1,15 +1,17 @@
|
|||||||
#include "log.h"
|
#include "log.h"
|
||||||
|
|
||||||
|
#include <pthread.h>
|
||||||
|
#include <stdarg.h>
|
||||||
|
|
||||||
const char *log_colors[] = {
|
const char *log_colors[] = {
|
||||||
"\033[34m", "\033[01;34m", "\033[0m",
|
"\033[34m", "\033[01;34m", "\033[01;33m", "\033[0m", "\033[31m", "\033[01;31m"
|
||||||
"\033[01;33m", "\033[31m", "\033[01;31m"
|
|
||||||
};
|
};
|
||||||
|
|
||||||
const char *log_levels[] = {
|
const char *log_levels[] = {
|
||||||
"DEBUG", "INFO", "WARNING", "ERROR", "FATAL"
|
"DEBUG", "INFO", "WARNING", "ERROR", "FATAL"
|
||||||
};
|
};
|
||||||
|
|
||||||
void sist_logf(char *filepath, int level, char *format, ...) {
|
void vsist_logf(const char *filepath, int level, char *format, va_list ap) {
|
||||||
|
|
||||||
static int is_tty = -1;
|
static int is_tty = -1;
|
||||||
if (is_tty == -1) {
|
if (is_tty == -1) {
|
||||||
@@ -31,23 +33,20 @@ void sist_logf(char *filepath, int level, char *format, ...) {
|
|||||||
if (is_tty) {
|
if (is_tty) {
|
||||||
log_len = snprintf(
|
log_len = snprintf(
|
||||||
log_str, sizeof(log_str),
|
log_str, sizeof(log_str),
|
||||||
"\033[%dm[%04X]%s [%s] [%s %s] ",
|
"\033[%dm[%04llX]%s [%s] [%s %s] ",
|
||||||
31 + ((unsigned int) (pid)) % 7, pid, log_colors[level],
|
31 + ((unsigned int) (pid)) % 7, pid, log_colors[level],
|
||||||
datetime, log_levels[level], filepath
|
datetime, log_levels[level], filepath
|
||||||
);
|
);
|
||||||
} else {
|
} else {
|
||||||
log_len = snprintf(
|
log_len = snprintf(
|
||||||
log_str, sizeof(log_str),
|
log_str, sizeof(log_str),
|
||||||
"[%04X] [%s] [%s %s] ",
|
"[%04llX] [%s] [%s %s] ",
|
||||||
pid, datetime, log_levels[level], filepath
|
pid, datetime, log_levels[level], filepath
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
va_list ap;
|
|
||||||
va_start(ap, format);
|
|
||||||
size_t maxsize = sizeof(log_str) - log_len;
|
size_t maxsize = sizeof(log_str) - log_len;
|
||||||
log_len += vsnprintf(log_str + log_len, maxsize, format, ap);
|
log_len += vsnprintf(log_str + log_len, maxsize, format, ap);
|
||||||
va_end(ap);
|
|
||||||
|
|
||||||
if (is_tty) {
|
if (is_tty) {
|
||||||
log_len += sprintf(log_str + log_len, "\033[0m\n");
|
log_len += sprintf(log_str + log_len, "\033[0m\n");
|
||||||
@@ -56,10 +55,20 @@ void sist_logf(char *filepath, int level, char *format, ...) {
|
|||||||
log_len += 1;
|
log_len += 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
write(STDERR_FILENO, log_str, log_len);
|
int ret = write(STDERR_FILENO, log_str, log_len);
|
||||||
|
if (ret == -1) {
|
||||||
|
LOG_FATALF("serialize.c", "Could not write index descriptor: %s", strerror(errno))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void sist_log(char *filepath, int level, char *str) {
|
void sist_logf(const char *filepath, int level, char *format, ...) {
|
||||||
|
va_list ap;
|
||||||
|
va_start(ap, format);
|
||||||
|
vsist_logf(filepath, level, format, ap);
|
||||||
|
va_end(ap);
|
||||||
|
}
|
||||||
|
|
||||||
|
void sist_log(const char *filepath, int level, char *str) {
|
||||||
|
|
||||||
static int is_tty = -1;
|
static int is_tty = -1;
|
||||||
if (is_tty == -1) {
|
if (is_tty == -1) {
|
||||||
@@ -81,7 +90,7 @@ void sist_log(char *filepath, int level, char *str) {
|
|||||||
if (is_tty) {
|
if (is_tty) {
|
||||||
log_len = snprintf(
|
log_len = snprintf(
|
||||||
log_str, sizeof(log_str),
|
log_str, sizeof(log_str),
|
||||||
"\033[%dm[%04X]%s [%s] [%s %s] %s \033[0m\n",
|
"\033[%dm[%04llX]%s [%s] [%s %s] %s \033[0m\n",
|
||||||
31 + ((unsigned int) (pid)) % 7, pid, log_colors[level],
|
31 + ((unsigned int) (pid)) % 7, pid, log_colors[level],
|
||||||
datetime, log_levels[level], filepath,
|
datetime, log_levels[level], filepath,
|
||||||
str
|
str
|
||||||
@@ -89,11 +98,14 @@ void sist_log(char *filepath, int level, char *str) {
|
|||||||
} else {
|
} else {
|
||||||
log_len = snprintf(
|
log_len = snprintf(
|
||||||
log_str, sizeof(log_str),
|
log_str, sizeof(log_str),
|
||||||
"[%04X] [%s] [%s %s] %s \n",
|
"[%04llX] [%s] [%s %s] %s \n",
|
||||||
pid, datetime, log_levels[level], filepath,
|
pid, datetime, log_levels[level], filepath,
|
||||||
str
|
str
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
write(STDERR_FILENO, log_str, log_len);
|
int ret = write(STDERR_FILENO, log_str, log_len);
|
||||||
|
if (ret == -1) {
|
||||||
|
LOG_FATALF("serialize.c", "Could not write index descriptor: %s", strerror(errno));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
#ifndef SIST2_LOG_H
|
#ifndef SIST2_LOG_H
|
||||||
#define SIST2_LOG_H
|
#define SIST2_LOG_H
|
||||||
|
|
||||||
|
|
||||||
#define LOG_MAX_LENGTH 8192
|
#define LOG_MAX_LENGTH 8192
|
||||||
|
|
||||||
#define SIST_DEBUG 0
|
#define SIST_DEBUG 0
|
||||||
@@ -36,10 +37,11 @@
|
|||||||
sist_log(filepath, SIST_FATAL, str);\
|
sist_log(filepath, SIST_FATAL, str);\
|
||||||
exit(-1);
|
exit(-1);
|
||||||
|
|
||||||
#include "src/sist.h"
|
#include "sist.h"
|
||||||
|
|
||||||
void sist_logf(char *filepath, int level, char *format, ...);
|
void sist_logf(const char *filepath, int level, char *format, ...);
|
||||||
|
void vsist_logf(const char *filepath, int level, char *format, va_list ap);
|
||||||
|
|
||||||
void sist_log(char *filepath, int level, char *str);
|
void sist_log(const char *filepath, int level, char *str);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
319
src/main.c
319
src/main.c
@@ -1,25 +1,35 @@
|
|||||||
#include "sist.h"
|
#include "sist.h"
|
||||||
#include "ctx.h"
|
#include "ctx.h"
|
||||||
|
|
||||||
|
#include <third-party/argparse/argparse.h>
|
||||||
|
#include <locale.h>
|
||||||
|
|
||||||
|
#include "cli.h"
|
||||||
|
#include "io/serialize.h"
|
||||||
|
#include "io/store.h"
|
||||||
|
#include "tpool.h"
|
||||||
|
#include "io/walk.h"
|
||||||
|
#include "index/elastic.h"
|
||||||
|
#include "web/serve.h"
|
||||||
|
#include "parsing/mime.h"
|
||||||
|
#include "parsing/parse.h"
|
||||||
|
|
||||||
|
#include "stats.h"
|
||||||
|
|
||||||
#define DESCRIPTION "Lightning-fast file system indexer and search tool."
|
#define DESCRIPTION "Lightning-fast file system indexer and search tool."
|
||||||
|
|
||||||
#define EPILOG "Made by simon987 <me@simon987.net>. Released under GPL-3.0"
|
#define EPILOG "Made by simon987 <me@simon987.net>. Released under GPL-3.0"
|
||||||
|
|
||||||
|
|
||||||
static const char *const Version = "1.2.1";
|
static const char *const Version = "2.8.3";
|
||||||
static const char *const usage[] = {
|
static const char *const usage[] = {
|
||||||
"sist2 scan [OPTION]... PATH",
|
"sist2 scan [OPTION]... PATH",
|
||||||
"sist2 index [OPTION]... INDEX",
|
"sist2 index [OPTION]... INDEX",
|
||||||
"sist2 web [OPTION]... INDEX...",
|
"sist2 web [OPTION]... INDEX...",
|
||||||
|
"sist2 exec-script [OPTION]... INDEX",
|
||||||
NULL,
|
NULL,
|
||||||
};
|
};
|
||||||
|
|
||||||
void global_init() {
|
|
||||||
curl_global_init(CURL_GLOBAL_NOTHING);
|
|
||||||
av_log_set_level(AV_LOG_QUIET);
|
|
||||||
opcInitLibrary();
|
|
||||||
}
|
|
||||||
|
|
||||||
void init_dir(const char *dirpath) {
|
void init_dir(const char *dirpath) {
|
||||||
char path[PATH_MAX];
|
char path[PATH_MAX];
|
||||||
snprintf(path, PATH_MAX, "%sdescriptor.json", dirpath);
|
snprintf(path, PATH_MAX, "%sdescriptor.json", dirpath);
|
||||||
@@ -38,29 +48,139 @@ void scan_print_header() {
|
|||||||
LOG_INFOF("main.c", "sist2 v%s", Version)
|
LOG_INFOF("main.c", "sist2 v%s", Version)
|
||||||
}
|
}
|
||||||
|
|
||||||
void sist2_scan(scan_args_t *args) {
|
void _store(char *key, size_t key_len, char *buf, size_t buf_len) {
|
||||||
|
store_write(ScanCtx.index.store, key, key_len, buf, buf_len);
|
||||||
|
}
|
||||||
|
|
||||||
|
void _log(const char *filepath, int level, char *str) {
|
||||||
|
if (level == LEVEL_FATAL) {
|
||||||
|
sist_log(filepath, level, str);
|
||||||
|
exit(-1);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (LogCtx.verbose) {
|
||||||
|
if (level == LEVEL_DEBUG) {
|
||||||
|
if (LogCtx.very_verbose) {
|
||||||
|
sist_log(filepath, level, str);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
sist_log(filepath, level, str);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void _logf(const char *filepath, int level, char *format, ...) {
|
||||||
|
|
||||||
|
va_list args;
|
||||||
|
|
||||||
|
va_start(args, format);
|
||||||
|
if (level == LEVEL_FATAL) {
|
||||||
|
vsist_logf(filepath, level, format, args);
|
||||||
|
exit(-1);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (LogCtx.verbose) {
|
||||||
|
if (level == LEVEL_DEBUG) {
|
||||||
|
if (LogCtx.very_verbose) {
|
||||||
|
vsist_logf(filepath, level, format, args);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
vsist_logf(filepath, level, format, args);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
va_end(args);
|
||||||
|
}
|
||||||
|
|
||||||
|
void initialize_scan_context(scan_args_t *args) {
|
||||||
|
|
||||||
|
// Arc
|
||||||
|
ScanCtx.arc_ctx.mode = args->archive_mode;
|
||||||
|
ScanCtx.arc_ctx.log = _log;
|
||||||
|
ScanCtx.arc_ctx.logf = _logf;
|
||||||
|
ScanCtx.arc_ctx.parse = (parse_callback_t) parse;
|
||||||
|
|
||||||
|
// Comic
|
||||||
|
ScanCtx.comic_ctx.log = _log;
|
||||||
|
ScanCtx.comic_ctx.logf = _logf;
|
||||||
|
ScanCtx.comic_ctx.store = _store;
|
||||||
|
ScanCtx.comic_ctx.tn_size = args->size;
|
||||||
|
ScanCtx.comic_ctx.tn_qscale = args->quality;
|
||||||
|
ScanCtx.comic_ctx.cbr_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/x-cbr");
|
||||||
|
ScanCtx.comic_ctx.cbz_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/x-cbz");
|
||||||
|
|
||||||
|
// Ebook
|
||||||
|
pthread_mutex_init(&ScanCtx.ebook_ctx.mupdf_mutex, NULL);
|
||||||
|
ScanCtx.ebook_ctx.content_size = args->content_size;
|
||||||
|
ScanCtx.ebook_ctx.tn_size = args->size;
|
||||||
|
ScanCtx.ebook_ctx.tesseract_lang = args->tesseract_lang;
|
||||||
|
ScanCtx.ebook_ctx.tesseract_path = args->tesseract_path;
|
||||||
|
ScanCtx.ebook_ctx.log = _log;
|
||||||
|
ScanCtx.ebook_ctx.logf = _logf;
|
||||||
|
ScanCtx.ebook_ctx.store = _store;
|
||||||
|
|
||||||
|
// Font
|
||||||
|
ScanCtx.font_ctx.enable_tn = args->size > 0;
|
||||||
|
ScanCtx.font_ctx.log = _log;
|
||||||
|
ScanCtx.font_ctx.logf = _logf;
|
||||||
|
ScanCtx.font_ctx.store = _store;
|
||||||
|
|
||||||
|
// Media
|
||||||
|
ScanCtx.media_ctx.tn_qscale = args->quality;
|
||||||
|
ScanCtx.media_ctx.tn_size = args->size;
|
||||||
|
ScanCtx.media_ctx.log = _log;
|
||||||
|
ScanCtx.media_ctx.logf = _logf;
|
||||||
|
ScanCtx.media_ctx.store = _store;
|
||||||
|
ScanCtx.media_ctx.max_media_buffer = (long) args->max_memory_buffer * 1024 * 1024;
|
||||||
|
init_media();
|
||||||
|
|
||||||
|
// OOXML
|
||||||
|
ScanCtx.ooxml_ctx.content_size = args->content_size;
|
||||||
|
ScanCtx.ooxml_ctx.log = _log;
|
||||||
|
ScanCtx.ooxml_ctx.logf = _logf;
|
||||||
|
ScanCtx.ooxml_ctx.store = _store;
|
||||||
|
|
||||||
|
// MOBI
|
||||||
|
ScanCtx.mobi_ctx.content_size = args->content_size;
|
||||||
|
ScanCtx.mobi_ctx.log = _log;
|
||||||
|
ScanCtx.mobi_ctx.logf = _logf;
|
||||||
|
|
||||||
|
// TEXT
|
||||||
|
ScanCtx.text_ctx.content_size = args->content_size;
|
||||||
|
ScanCtx.text_ctx.log = _log;
|
||||||
|
ScanCtx.text_ctx.logf = _logf;
|
||||||
|
|
||||||
ScanCtx.tn_qscale = args->quality;
|
|
||||||
ScanCtx.tn_size = args->size;
|
|
||||||
ScanCtx.content_size = args->content_size;
|
|
||||||
ScanCtx.threads = args->threads;
|
ScanCtx.threads = args->threads;
|
||||||
ScanCtx.depth = args->depth;
|
ScanCtx.depth = args->depth;
|
||||||
ScanCtx.archive_mode = args->archive_mode;
|
|
||||||
strncpy(ScanCtx.index.path, args->output, sizeof(ScanCtx.index.path));
|
strncpy(ScanCtx.index.path, args->output, sizeof(ScanCtx.index.path));
|
||||||
strncpy(ScanCtx.index.desc.name, args->name, sizeof(ScanCtx.index.desc.name));
|
strncpy(ScanCtx.index.desc.name, args->name, sizeof(ScanCtx.index.desc.name));
|
||||||
strncpy(ScanCtx.index.desc.root, args->path, sizeof(ScanCtx.index.desc.root));
|
strncpy(ScanCtx.index.desc.root, args->path, sizeof(ScanCtx.index.desc.root));
|
||||||
|
strncpy(ScanCtx.index.desc.rewrite_url, args->rewrite_url, sizeof(ScanCtx.index.desc.rewrite_url));
|
||||||
ScanCtx.index.desc.root_len = (short) strlen(ScanCtx.index.desc.root);
|
ScanCtx.index.desc.root_len = (short) strlen(ScanCtx.index.desc.root);
|
||||||
ScanCtx.tesseract_lang = args->tesseract_lang;
|
ScanCtx.fast = args->fast;
|
||||||
|
|
||||||
init_dir(ScanCtx.index.path);
|
// Raw
|
||||||
|
ScanCtx.raw_ctx.tn_qscale = args->quality;
|
||||||
|
ScanCtx.raw_ctx.tn_size = args->size;
|
||||||
|
ScanCtx.raw_ctx.log = _log;
|
||||||
|
ScanCtx.raw_ctx.logf = _logf;
|
||||||
|
ScanCtx.raw_ctx.store = _store;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void sist2_scan(scan_args_t *args) {
|
||||||
|
|
||||||
ScanCtx.mime_table = mime_get_mime_table();
|
ScanCtx.mime_table = mime_get_mime_table();
|
||||||
ScanCtx.ext_table = mime_get_ext_table();
|
ScanCtx.ext_table = mime_get_ext_table();
|
||||||
|
|
||||||
|
initialize_scan_context(args);
|
||||||
|
|
||||||
|
init_dir(ScanCtx.index.path);
|
||||||
|
|
||||||
char store_path[PATH_MAX];
|
char store_path[PATH_MAX];
|
||||||
snprintf(store_path, PATH_MAX, "%sthumbs", ScanCtx.index.path);
|
snprintf(store_path, PATH_MAX, "%sthumbs", ScanCtx.index.path);
|
||||||
mkdir(store_path, S_IWUSR | S_IRUSR | S_IXUSR);
|
mkdir(store_path, S_IWUSR | S_IRUSR | S_IXUSR);
|
||||||
ScanCtx.index.store = store_create(store_path);
|
ScanCtx.index.store = store_create(store_path, STORE_SIZE_TN);
|
||||||
|
|
||||||
scan_print_header();
|
scan_print_header();
|
||||||
|
|
||||||
@@ -70,9 +190,18 @@ void sist2_scan(scan_args_t *args) {
|
|||||||
|
|
||||||
DIR *dir = opendir(args->incremental);
|
DIR *dir = opendir(args->incremental);
|
||||||
if (dir == NULL) {
|
if (dir == NULL) {
|
||||||
perror("opendir");
|
LOG_FATALF("main.c", "Could not open original index for incremental scan: %s", strerror(errno))
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
char descriptor_path[PATH_MAX];
|
||||||
|
snprintf(descriptor_path, PATH_MAX, "%s/descriptor.json", args->incremental);
|
||||||
|
index_descriptor_t original_desc = read_index_descriptor(descriptor_path);
|
||||||
|
|
||||||
|
if (strcmp(original_desc.version, Version) != 0) {
|
||||||
|
LOG_FATALF("main.c", "Version mismatch! Index is %s but executable is %s/%s", original_desc.version,
|
||||||
|
Version, INDEX_VERSION_EXTERNAL)
|
||||||
|
}
|
||||||
|
|
||||||
struct dirent *de;
|
struct dirent *de;
|
||||||
while ((de = readdir(dir)) != NULL) {
|
while ((de = readdir(dir)) != NULL) {
|
||||||
if (strncmp(de->d_name, "_index_", sizeof("_index_") - 1) == 0) {
|
if (strncmp(de->d_name, "_index_", sizeof("_index_") - 1) == 0) {
|
||||||
@@ -83,20 +212,22 @@ void sist2_scan(scan_args_t *args) {
|
|||||||
}
|
}
|
||||||
closedir(dir);
|
closedir(dir);
|
||||||
|
|
||||||
printf("Loaded %d items in to mtime table.", g_hash_table_size(ScanCtx.original_table));
|
LOG_INFOF("main.c", "Loaded %d items in to mtime table.", g_hash_table_size(ScanCtx.original_table))
|
||||||
}
|
}
|
||||||
|
|
||||||
ScanCtx.pool = tpool_create(args->threads, thread_cleanup);
|
ScanCtx.pool = tpool_create(args->threads, thread_cleanup, TRUE);
|
||||||
tpool_start(ScanCtx.pool);
|
tpool_start(ScanCtx.pool);
|
||||||
walk_directory_tree(ScanCtx.index.desc.root);
|
walk_directory_tree(ScanCtx.index.desc.root);
|
||||||
tpool_wait(ScanCtx.pool);
|
tpool_wait(ScanCtx.pool);
|
||||||
tpool_destroy(ScanCtx.pool);
|
tpool_destroy(ScanCtx.pool);
|
||||||
|
|
||||||
|
generate_stats(&ScanCtx.index, args->treemap_threshold, ScanCtx.index.path);
|
||||||
|
|
||||||
if (args->incremental != NULL) {
|
if (args->incremental != NULL) {
|
||||||
char dst_path[PATH_MAX];
|
char dst_path[PATH_MAX];
|
||||||
snprintf(store_path, PATH_MAX, "%sthumbs", args->incremental);
|
snprintf(store_path, PATH_MAX, "%sthumbs", args->incremental);
|
||||||
snprintf(dst_path, PATH_MAX, "%s_index_original", ScanCtx.index.path);
|
snprintf(dst_path, PATH_MAX, "%s_index_original", ScanCtx.index.path);
|
||||||
store_t *source = store_create(store_path);
|
store_t *source = store_create(store_path, STORE_SIZE_TN);
|
||||||
|
|
||||||
DIR *dir = opendir(args->incremental);
|
DIR *dir = opendir(args->incremental);
|
||||||
if (dir == NULL) {
|
if (dir == NULL) {
|
||||||
@@ -113,6 +244,13 @@ void sist2_scan(scan_args_t *args) {
|
|||||||
}
|
}
|
||||||
closedir(dir);
|
closedir(dir);
|
||||||
store_destroy(source);
|
store_destroy(source);
|
||||||
|
|
||||||
|
snprintf(store_path, PATH_MAX, "%stags", args->incremental);
|
||||||
|
snprintf(dst_path, PATH_MAX, "%stags", ScanCtx.index.path);
|
||||||
|
mkdir(store_path, S_IWUSR | S_IRUSR | S_IXUSR);
|
||||||
|
store_t *source_tags = store_create(store_path, STORE_SIZE_TAG);
|
||||||
|
store_copy(source_tags, dst_path);
|
||||||
|
store_destroy(source_tags);
|
||||||
}
|
}
|
||||||
|
|
||||||
store_destroy(ScanCtx.index.store);
|
store_destroy(ScanCtx.index.store);
|
||||||
@@ -121,6 +259,7 @@ void sist2_scan(scan_args_t *args) {
|
|||||||
void sist2_index(index_args_t *args) {
|
void sist2_index(index_args_t *args) {
|
||||||
|
|
||||||
IndexCtx.es_url = args->es_url;
|
IndexCtx.es_url = args->es_url;
|
||||||
|
IndexCtx.es_index = args->es_index;
|
||||||
IndexCtx.batch_size = args->batch_size;
|
IndexCtx.batch_size = args->batch_size;
|
||||||
|
|
||||||
if (!args->print) {
|
if (!args->print) {
|
||||||
@@ -135,17 +274,21 @@ void sist2_index(index_args_t *args) {
|
|||||||
LOG_DEBUGF("main.c", "descriptor version %s (%s)", desc.version, desc.type)
|
LOG_DEBUGF("main.c", "descriptor version %s (%s)", desc.version, desc.type)
|
||||||
|
|
||||||
if (strcmp(desc.version, Version) != 0 && strcmp(desc.version, INDEX_VERSION_EXTERNAL) != 0) {
|
if (strcmp(desc.version, Version) != 0 && strcmp(desc.version, INDEX_VERSION_EXTERNAL) != 0) {
|
||||||
fprintf(stderr, "Version mismatch! Index is %s but executable is %s/%s\n",
|
LOG_FATALF("main.c", "Version mismatch! Index is %s but executable is %s/%s", desc.version, Version,
|
||||||
desc.version, Version, INDEX_VERSION_EXTERNAL);
|
INDEX_VERSION_EXTERNAL)
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
DIR *dir = opendir(args->index_path);
|
DIR *dir = opendir(args->index_path);
|
||||||
if (dir == NULL) {
|
if (dir == NULL) {
|
||||||
perror("opendir");
|
LOG_FATALF("main.c", "Could not open index %s: %s", args->index_path, strerror(errno))
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
char path_tmp[PATH_MAX];
|
||||||
|
snprintf(path_tmp, sizeof(path_tmp), "%s/tags", args->index_path);
|
||||||
|
mkdir(path_tmp, S_IWUSR | S_IRUSR | S_IXUSR);
|
||||||
|
IndexCtx.tag_store = store_create(path_tmp, STORE_SIZE_TAG);
|
||||||
|
IndexCtx.tags = store_read_all(IndexCtx.tag_store);
|
||||||
|
|
||||||
index_func f;
|
index_func f;
|
||||||
if (args->print) {
|
if (args->print) {
|
||||||
f = print_json;
|
f = print_json;
|
||||||
@@ -153,6 +296,16 @@ void sist2_index(index_args_t *args) {
|
|||||||
f = index_json;
|
f = index_json;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void (*cleanup)();
|
||||||
|
if (args->print) {
|
||||||
|
cleanup = NULL;
|
||||||
|
} else {
|
||||||
|
cleanup = elastic_cleanup;
|
||||||
|
}
|
||||||
|
|
||||||
|
IndexCtx.pool = tpool_create(args->threads, cleanup, FALSE);
|
||||||
|
tpool_start(IndexCtx.pool);
|
||||||
|
|
||||||
struct dirent *de;
|
struct dirent *de;
|
||||||
while ((de = readdir(dir)) != NULL) {
|
while ((de = readdir(dir)) != NULL) {
|
||||||
if (strncmp(de->d_name, "_index_", sizeof("_index_") - 1) == 0) {
|
if (strncmp(de->d_name, "_index_", sizeof("_index_") - 1) == 0) {
|
||||||
@@ -163,17 +316,44 @@ void sist2_index(index_args_t *args) {
|
|||||||
}
|
}
|
||||||
closedir(dir);
|
closedir(dir);
|
||||||
|
|
||||||
|
tpool_wait(IndexCtx.pool);
|
||||||
|
|
||||||
|
tpool_destroy(IndexCtx.pool);
|
||||||
|
|
||||||
if (!args->print) {
|
if (!args->print) {
|
||||||
elastic_flush();
|
finish_indexer(args->script, args->async_script, desc.uuid);
|
||||||
destroy_indexer(args->script, desc.uuid);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
store_destroy(IndexCtx.tag_store);
|
||||||
|
g_hash_table_remove_all(IndexCtx.tags);
|
||||||
|
g_hash_table_destroy(IndexCtx.tags);
|
||||||
|
}
|
||||||
|
|
||||||
|
void sist2_exec_script(exec_args_t *args) {
|
||||||
|
|
||||||
|
LogCtx.verbose = TRUE;
|
||||||
|
|
||||||
|
char descriptor_path[PATH_MAX];
|
||||||
|
snprintf(descriptor_path, PATH_MAX, "%s/descriptor.json", args->index_path);
|
||||||
|
index_descriptor_t desc = read_index_descriptor(descriptor_path);
|
||||||
|
|
||||||
|
IndexCtx.es_url = args->es_url;
|
||||||
|
|
||||||
|
LOG_DEBUGF("main.c", "descriptor version %s (%s)", desc.version, desc.type)
|
||||||
|
|
||||||
|
execute_update_script(args->script, args->async_script, desc.uuid);
|
||||||
|
free(args->script);
|
||||||
}
|
}
|
||||||
|
|
||||||
void sist2_web(web_args_t *args) {
|
void sist2_web(web_args_t *args) {
|
||||||
|
|
||||||
WebCtx.es_url = args->es_url;
|
WebCtx.es_url = args->es_url;
|
||||||
|
WebCtx.es_index = args->es_index;
|
||||||
WebCtx.index_count = args->index_count;
|
WebCtx.index_count = args->index_count;
|
||||||
WebCtx.b64credentials = args->b64credentials;
|
WebCtx.auth_user = args->auth_user;
|
||||||
|
WebCtx.auth_pass = args->auth_pass;
|
||||||
|
WebCtx.auth_enabled = args->auth_enabled;
|
||||||
|
WebCtx.tag_auth_enabled = args->tag_auth_enabled;
|
||||||
|
|
||||||
for (int i = 0; i < args->index_count; i++) {
|
for (int i = 0; i < args->index_count; i++) {
|
||||||
char *abs_path = abspath(args->indices[i]);
|
char *abs_path = abspath(args->indices[i]);
|
||||||
@@ -183,7 +363,11 @@ void sist2_web(web_args_t *args) {
|
|||||||
char path_tmp[PATH_MAX];
|
char path_tmp[PATH_MAX];
|
||||||
|
|
||||||
snprintf(path_tmp, PATH_MAX, "%sthumbs", abs_path);
|
snprintf(path_tmp, PATH_MAX, "%sthumbs", abs_path);
|
||||||
WebCtx.indices[i].store = store_create(path_tmp);
|
WebCtx.indices[i].store = store_create(path_tmp, STORE_SIZE_TN);
|
||||||
|
|
||||||
|
snprintf(path_tmp, PATH_MAX, "%stags", abs_path);
|
||||||
|
mkdir(path_tmp, S_IWUSR | S_IRUSR | S_IXUSR);
|
||||||
|
WebCtx.indices[i].tag_store = store_create(path_tmp, STORE_SIZE_TAG);
|
||||||
|
|
||||||
snprintf(path_tmp, PATH_MAX, "%sdescriptor.json", abs_path);
|
snprintf(path_tmp, PATH_MAX, "%sdescriptor.json", abs_path);
|
||||||
WebCtx.indices[i].desc = read_index_descriptor(path_tmp);
|
WebCtx.indices[i].desc = read_index_descriptor(path_tmp);
|
||||||
@@ -193,21 +377,25 @@ void sist2_web(web_args_t *args) {
|
|||||||
free(abs_path);
|
free(abs_path);
|
||||||
}
|
}
|
||||||
|
|
||||||
serve(args->bind, args->port);
|
serve(args->listen_address);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
int main(int argc, const char *argv[]) {
|
int main(int argc, const char *argv[]) {
|
||||||
|
setlocale(LC_ALL, "");
|
||||||
global_init();
|
|
||||||
|
|
||||||
scan_args_t *scan_args = scan_args_create();
|
scan_args_t *scan_args = scan_args_create();
|
||||||
index_args_t *index_args = index_args_create();
|
index_args_t *index_args = index_args_create();
|
||||||
web_args_t *web_args = web_args_create();
|
web_args_t *web_args = web_args_create();
|
||||||
|
exec_args_t *exec_args = exec_args_create();
|
||||||
|
|
||||||
int arg_version = 0;
|
int arg_version = 0;
|
||||||
|
|
||||||
char *common_es_url = NULL;
|
char *common_es_url = NULL;
|
||||||
|
char *common_es_index = NULL;
|
||||||
|
char *common_script_path = NULL;
|
||||||
|
int common_async_script = 0;
|
||||||
|
int common_threads = 0;
|
||||||
|
|
||||||
struct argparse_option options[] = {
|
struct argparse_option options[] = {
|
||||||
OPT_HELP(),
|
OPT_HELP(),
|
||||||
@@ -217,7 +405,7 @@ int main(int argc, const char *argv[]) {
|
|||||||
OPT_BOOLEAN(0, "very-verbose", &LogCtx.very_verbose, "Turn on debug messages"),
|
OPT_BOOLEAN(0, "very-verbose", &LogCtx.very_verbose, "Turn on debug messages"),
|
||||||
|
|
||||||
OPT_GROUP("Scan options"),
|
OPT_GROUP("Scan options"),
|
||||||
OPT_INTEGER('t', "threads", &scan_args->threads, "Number of threads. DEFAULT=1"),
|
OPT_INTEGER('t', "threads", &common_threads, "Number of threads. DEFAULT=1"),
|
||||||
OPT_FLOAT('q', "quality", &scan_args->quality,
|
OPT_FLOAT('q', "quality", &scan_args->quality,
|
||||||
"Thumbnail quality, on a scale of 1.0 to 31.0, 1.0 being the best. DEFAULT=5"),
|
"Thumbnail quality, on a scale of 1.0 to 31.0, 1.0 being the best. DEFAULT=5"),
|
||||||
OPT_INTEGER(0, "size", &scan_args->size,
|
OPT_INTEGER(0, "size", &scan_args->size,
|
||||||
@@ -236,20 +424,37 @@ int main(int argc, const char *argv[]) {
|
|||||||
"shallow: Don't parse archives inside archives. DEFAULT: recurse"),
|
"shallow: Don't parse archives inside archives. DEFAULT: recurse"),
|
||||||
OPT_STRING(0, "ocr", &scan_args->tesseract_lang, "Tesseract language (use tesseract --list-langs to see "
|
OPT_STRING(0, "ocr", &scan_args->tesseract_lang, "Tesseract language (use tesseract --list-langs to see "
|
||||||
"which are installed on your machine)"),
|
"which are installed on your machine)"),
|
||||||
|
OPT_STRING('e', "exclude", &scan_args->exclude_regex, "Files that match this regex will not be scanned"),
|
||||||
|
OPT_BOOLEAN(0, "fast", &scan_args->fast, "Only index file names & mime type"),
|
||||||
|
OPT_STRING(0, "treemap-threshold", &scan_args->treemap_threshold_str, "Relative size threshold for treemap "
|
||||||
|
"(see USAGE.md). DEFAULT: 0.0005"),
|
||||||
|
OPT_INTEGER(0, "mem-buffer", &scan_args->max_memory_buffer,
|
||||||
|
"Maximum memory buffer size per thread in MB for files inside archives "
|
||||||
|
"(see USAGE.md). DEFAULT: 2000"),
|
||||||
|
|
||||||
OPT_GROUP("Index options"),
|
OPT_GROUP("Index options"),
|
||||||
|
OPT_INTEGER('t', "threads", &common_threads, "Number of threads. DEFAULT=1"),
|
||||||
OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url with port. DEFAULT=http://localhost:9200"),
|
OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url with port. DEFAULT=http://localhost:9200"),
|
||||||
|
OPT_STRING(0, "es-index", &common_es_index, "Elasticsearch index name. DEFAULT=sist2"),
|
||||||
OPT_BOOLEAN('p', "print", &index_args->print, "Just print JSON documents to stdout."),
|
OPT_BOOLEAN('p', "print", &index_args->print, "Just print JSON documents to stdout."),
|
||||||
OPT_STRING(0, "script-file", &index_args->script_path, "Path to user script."),
|
OPT_STRING(0, "script-file", &common_script_path, "Path to user script."),
|
||||||
|
OPT_BOOLEAN(0, "async-script", &common_async_script, "Execute user script asynchronously."),
|
||||||
OPT_INTEGER(0, "batch-size", &index_args->batch_size, "Index batch size. DEFAULT: 100"),
|
OPT_INTEGER(0, "batch-size", &index_args->batch_size, "Index batch size. DEFAULT: 100"),
|
||||||
OPT_BOOLEAN('f', "force-reset", &index_args->force_reset, "Reset Elasticsearch mappings and settings. "
|
OPT_BOOLEAN('f', "force-reset", &index_args->force_reset, "Reset Elasticsearch mappings and settings. "
|
||||||
"(You must use this option the first time you use the index command)"),
|
"(You must use this option the first time you use the index command)"),
|
||||||
|
|
||||||
OPT_GROUP("Web options"),
|
OPT_GROUP("Web options"),
|
||||||
OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url. DEFAULT=http://localhost:9200"),
|
OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url. DEFAULT=http://localhost:9200"),
|
||||||
OPT_STRING(0, "bind", &web_args->bind, "Listen on this address. DEFAULT=localhost"),
|
OPT_STRING(0, "es-index", &common_es_index, "Elasticsearch index name. DEFAULT=sist2"),
|
||||||
OPT_STRING(0, "port", &web_args->port, "Listen on this port. DEFAULT=4090"),
|
OPT_STRING(0, "bind", &web_args->listen_address, "Listen on this address. DEFAULT=localhost:4090"),
|
||||||
OPT_STRING(0, "auth", &web_args->credentials, "Basic auth in user:password format"),
|
OPT_STRING(0, "auth", &web_args->credentials, "Basic auth in user:password format"),
|
||||||
|
OPT_STRING(0, "tag-auth", &web_args->tag_credentials, "Basic auth in user:password format for tagging"),
|
||||||
|
|
||||||
|
OPT_GROUP("Exec-script options"),
|
||||||
|
OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url. DEFAULT=http://localhost:9200"),
|
||||||
|
OPT_STRING(0, "es-index", &common_es_index, "Elasticsearch index name. DEFAULT=sist2"),
|
||||||
|
OPT_STRING(0, "script-file", &common_script_path, "Path to user script."),
|
||||||
|
OPT_BOOLEAN(0, "async-script", &common_async_script, "Execute user script asynchronously."),
|
||||||
|
|
||||||
OPT_END(),
|
OPT_END(),
|
||||||
};
|
};
|
||||||
@@ -261,7 +466,7 @@ int main(int argc, const char *argv[]) {
|
|||||||
|
|
||||||
if (arg_version) {
|
if (arg_version) {
|
||||||
printf(Version);
|
printf(Version);
|
||||||
exit(0);
|
goto end;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (LogCtx.very_verbose != 0) {
|
if (LogCtx.very_verbose != 0) {
|
||||||
@@ -270,25 +475,35 @@ int main(int argc, const char *argv[]) {
|
|||||||
|
|
||||||
web_args->es_url = common_es_url;
|
web_args->es_url = common_es_url;
|
||||||
index_args->es_url = common_es_url;
|
index_args->es_url = common_es_url;
|
||||||
|
exec_args->es_url = common_es_url;
|
||||||
|
|
||||||
|
web_args->es_index = common_es_index;
|
||||||
|
index_args->es_index = common_es_index;
|
||||||
|
exec_args->es_index = common_es_index;
|
||||||
|
|
||||||
|
index_args->script_path = common_script_path;
|
||||||
|
exec_args->script_path = common_script_path;
|
||||||
|
index_args->threads = common_threads;
|
||||||
|
scan_args->threads = common_threads;
|
||||||
|
exec_args->async_script = common_async_script;
|
||||||
|
index_args->async_script = common_async_script;
|
||||||
|
|
||||||
if (argc == 0) {
|
if (argc == 0) {
|
||||||
argparse_usage(&argparse);
|
argparse_usage(&argparse);
|
||||||
return 1;
|
goto end;
|
||||||
} else if (strcmp(argv[0], "scan") == 0) {
|
} else if (strcmp(argv[0], "scan") == 0) {
|
||||||
|
|
||||||
int err = scan_args_validate(scan_args, argc, argv);
|
int err = scan_args_validate(scan_args, argc, argv);
|
||||||
if (err != 0) {
|
if (err != 0) {
|
||||||
return err;
|
goto end;
|
||||||
}
|
}
|
||||||
sist2_scan(scan_args);
|
sist2_scan(scan_args);
|
||||||
|
|
||||||
}
|
} else if (strcmp(argv[0], "index") == 0) {
|
||||||
|
|
||||||
else if (strcmp(argv[0], "index") == 0) {
|
|
||||||
|
|
||||||
int err = index_args_validate(index_args, argc, argv);
|
int err = index_args_validate(index_args, argc, argv);
|
||||||
if (err != 0) {
|
if (err != 0) {
|
||||||
return err;
|
goto end;
|
||||||
}
|
}
|
||||||
sist2_index(index_args);
|
sist2_index(index_args);
|
||||||
|
|
||||||
@@ -296,22 +511,30 @@ int main(int argc, const char *argv[]) {
|
|||||||
|
|
||||||
int err = web_args_validate(web_args, argc, argv);
|
int err = web_args_validate(web_args, argc, argv);
|
||||||
if (err != 0) {
|
if (err != 0) {
|
||||||
return err;
|
goto end;
|
||||||
}
|
}
|
||||||
sist2_web(web_args);
|
sist2_web(web_args);
|
||||||
|
|
||||||
}
|
} else if (strcmp(argv[0], "exec-script") == 0) {
|
||||||
else {
|
|
||||||
|
int err = exec_args_validate(exec_args, argc, argv);
|
||||||
|
if (err != 0) {
|
||||||
|
goto end;
|
||||||
|
}
|
||||||
|
sist2_exec_script(exec_args);
|
||||||
|
|
||||||
|
} else {
|
||||||
fprintf(stderr, "Invalid command: '%s'\n", argv[0]);
|
fprintf(stderr, "Invalid command: '%s'\n", argv[0]);
|
||||||
argparse_usage(&argparse);
|
argparse_usage(&argparse);
|
||||||
return 1;
|
goto end;
|
||||||
}
|
}
|
||||||
printf("\n");
|
printf("\n");
|
||||||
|
|
||||||
|
end:
|
||||||
scan_args_destroy(scan_args);
|
scan_args_destroy(scan_args);
|
||||||
|
|
||||||
index_args_destroy(index_args);
|
index_args_destroy(index_args);
|
||||||
web_args_destroy(web_args);
|
web_args_destroy(web_args);
|
||||||
|
exec_args_destroy(exec_args);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,157 +0,0 @@
|
|||||||
#include "arc.h"
|
|
||||||
#include "src/ctx.h"
|
|
||||||
|
|
||||||
#define ARC_BUF_SIZE 8192
|
|
||||||
|
|
||||||
int should_parse_filtered_file(const char *filepath, int ext) {
|
|
||||||
char tmp[PATH_MAX * 2];
|
|
||||||
|
|
||||||
if (ext == 0) {
|
|
||||||
return FALSE;
|
|
||||||
}
|
|
||||||
|
|
||||||
memcpy(tmp, filepath, ext - 1);
|
|
||||||
*(tmp + ext - 1) = '\0';
|
|
||||||
|
|
||||||
char *idx = strrchr(tmp, '.');
|
|
||||||
|
|
||||||
if (idx == NULL) {
|
|
||||||
return FALSE;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (strcmp(idx, ".tar") == 0) {
|
|
||||||
return TRUE;
|
|
||||||
}
|
|
||||||
|
|
||||||
return FALSE;
|
|
||||||
}
|
|
||||||
|
|
||||||
int arc_read(struct vfile *f, void *buf, size_t size) {
|
|
||||||
return archive_read_data(f->arc, buf, size);
|
|
||||||
}
|
|
||||||
|
|
||||||
typedef struct arc_data {
|
|
||||||
vfile_t *f;
|
|
||||||
char buf[ARC_BUF_SIZE];
|
|
||||||
} arc_data_f;
|
|
||||||
|
|
||||||
int vfile_open_callback(struct archive *a, void *user_data) {
|
|
||||||
arc_data_f *data = user_data;
|
|
||||||
|
|
||||||
if (data->f->is_fs_file && data->f->fd == -1) {
|
|
||||||
data->f->fd = open(data->f->filepath, O_RDONLY);
|
|
||||||
}
|
|
||||||
|
|
||||||
return ARCHIVE_OK;
|
|
||||||
}
|
|
||||||
|
|
||||||
long vfile_read_callback(struct archive *a, void *user_data, const void **buf) {
|
|
||||||
arc_data_f *data = user_data;
|
|
||||||
|
|
||||||
*buf = data->buf;
|
|
||||||
return data->f->read(data->f, data->buf, ARC_BUF_SIZE);
|
|
||||||
}
|
|
||||||
|
|
||||||
int vfile_close_callback(struct archive *a, void *user_data) {
|
|
||||||
arc_data_f *data = user_data;
|
|
||||||
|
|
||||||
if (data->f->close != NULL) {
|
|
||||||
data->f->close(data->f);
|
|
||||||
}
|
|
||||||
|
|
||||||
return ARCHIVE_OK;
|
|
||||||
}
|
|
||||||
|
|
||||||
void parse_archive(vfile_t *f, document_t *doc) {
|
|
||||||
|
|
||||||
struct archive *a;
|
|
||||||
struct archive_entry *entry;
|
|
||||||
|
|
||||||
|
|
||||||
arc_data_f data;
|
|
||||||
data.f = f;
|
|
||||||
|
|
||||||
int ret = 0;
|
|
||||||
if (data.f->is_fs_file) {
|
|
||||||
|
|
||||||
a = archive_read_new();
|
|
||||||
archive_read_support_filter_all(a);
|
|
||||||
archive_read_support_format_all(a);
|
|
||||||
|
|
||||||
ret = archive_read_open_filename(a, doc->filepath, ARC_BUF_SIZE);
|
|
||||||
} else if (ScanCtx.archive_mode == ARC_MODE_RECURSE) {
|
|
||||||
|
|
||||||
a = archive_read_new();
|
|
||||||
archive_read_support_filter_all(a);
|
|
||||||
archive_read_support_format_all(a);
|
|
||||||
|
|
||||||
ret = archive_read_open(
|
|
||||||
a, &data,
|
|
||||||
vfile_open_callback,
|
|
||||||
vfile_read_callback,
|
|
||||||
vfile_close_callback
|
|
||||||
);
|
|
||||||
} else {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (ret != ARCHIVE_OK) {
|
|
||||||
LOG_ERRORF(doc->filepath, "(arc.c) [%d] %s", ret, archive_error_string(a))
|
|
||||||
archive_read_free(a);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (ScanCtx.archive_mode == ARC_MODE_LIST) {
|
|
||||||
|
|
||||||
dyn_buffer_t buf = dyn_buffer_create();
|
|
||||||
|
|
||||||
while (archive_read_next_header(a, &entry) == ARCHIVE_OK) {
|
|
||||||
if (S_ISREG(archive_entry_stat(entry)->st_mode)) {
|
|
||||||
|
|
||||||
char *path = (char *) archive_entry_pathname(entry);
|
|
||||||
|
|
||||||
dyn_buffer_append_string(&buf, path);
|
|
||||||
dyn_buffer_write_char(&buf, '\n');
|
|
||||||
}
|
|
||||||
}
|
|
||||||
dyn_buffer_write_char(&buf, '\0');
|
|
||||||
|
|
||||||
meta_line_t *meta_list = malloc(sizeof(meta_line_t) + buf.cur);
|
|
||||||
meta_list->key = MetaContent;
|
|
||||||
strcpy(meta_list->strval, buf.buf);
|
|
||||||
APPEND_META(doc, meta_list);
|
|
||||||
dyn_buffer_destroy(&buf);
|
|
||||||
|
|
||||||
} else {
|
|
||||||
|
|
||||||
parse_job_t *sub_job = malloc(sizeof(parse_job_t) + PATH_MAX * 2);
|
|
||||||
|
|
||||||
sub_job->vfile.close = NULL;
|
|
||||||
sub_job->vfile.read = arc_read;
|
|
||||||
sub_job->vfile.arc = a;
|
|
||||||
sub_job->vfile.filepath = sub_job->filepath;
|
|
||||||
sub_job->vfile.is_fs_file = FALSE;
|
|
||||||
memcpy(sub_job->parent, doc->uuid, sizeof(uuid_t));
|
|
||||||
|
|
||||||
while (archive_read_next_header(a, &entry) == ARCHIVE_OK) {
|
|
||||||
sub_job->info = *archive_entry_stat(entry);
|
|
||||||
if (S_ISREG(sub_job->info.st_mode)) {
|
|
||||||
sprintf(sub_job->filepath, "%s#/%s", f->filepath, archive_entry_pathname(entry));
|
|
||||||
sub_job->base = (int) (strrchr(sub_job->filepath, '/') - sub_job->filepath) + 1;
|
|
||||||
|
|
||||||
char *p = strrchr(sub_job->filepath, '.');
|
|
||||||
if (p != NULL) {
|
|
||||||
sub_job->ext = (int) (p - sub_job->filepath + 1);
|
|
||||||
} else {
|
|
||||||
sub_job->ext = (int) strlen(sub_job->filepath);
|
|
||||||
}
|
|
||||||
|
|
||||||
parse(sub_job);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
free(sub_job);
|
|
||||||
}
|
|
||||||
|
|
||||||
archive_read_free(a);
|
|
||||||
}
|
|
||||||
@@ -1,12 +0,0 @@
|
|||||||
#ifndef SIST2_ARC_H
|
|
||||||
#define SIST2_ARC_H
|
|
||||||
|
|
||||||
#include "src/sist.h"
|
|
||||||
|
|
||||||
int should_parse_filtered_file(const char *filepath, int ext);
|
|
||||||
|
|
||||||
void parse_archive(vfile_t *f, document_t *doc);
|
|
||||||
|
|
||||||
int arc_read(struct vfile * f, void *buf, size_t size);
|
|
||||||
|
|
||||||
#endif
|
|
||||||
@@ -1,107 +0,0 @@
|
|||||||
#include "doc.h"
|
|
||||||
#include "src/ctx.h"
|
|
||||||
|
|
||||||
void dump_text(mceTextReader_t *reader, dyn_buffer_t *buf) {
|
|
||||||
|
|
||||||
mce_skip_attributes(reader);
|
|
||||||
|
|
||||||
mce_start_children(reader) {
|
|
||||||
mce_start_element(reader, NULL, _X("t")) {
|
|
||||||
mce_skip_attributes(reader);
|
|
||||||
mce_start_children(reader) {
|
|
||||||
mce_start_text(reader) {
|
|
||||||
char *str = (char *) xmlTextReaderConstValue(reader->reader);
|
|
||||||
dyn_buffer_append_string(buf, str);
|
|
||||||
dyn_buffer_write_char(buf, ' ');
|
|
||||||
} mce_end_text(reader);
|
|
||||||
} mce_end_children(reader);
|
|
||||||
} mce_end_element(reader);
|
|
||||||
|
|
||||||
mce_start_element(reader, NULL, NULL) {
|
|
||||||
dump_text(reader, buf);
|
|
||||||
} mce_end_element(reader);
|
|
||||||
|
|
||||||
} mce_end_children(reader)
|
|
||||||
}
|
|
||||||
|
|
||||||
__always_inline
|
|
||||||
int should_read_part(opcPart part) {
|
|
||||||
|
|
||||||
char *part_name = (char *) part;
|
|
||||||
|
|
||||||
if (part == NULL) {
|
|
||||||
return FALSE;
|
|
||||||
}
|
|
||||||
|
|
||||||
if ( // Word
|
|
||||||
strcmp(part_name, "word/document.xml") == 0
|
|
||||||
|| strncmp(part_name, "word/footer", sizeof("word/footer") - 1) == 0
|
|
||||||
|| strncmp(part_name, "word/header", sizeof("word/header") - 1) == 0
|
|
||||||
// PowerPoint
|
|
||||||
|| strncmp(part_name, "ppt/slides/slide", sizeof("ppt/slides/slide") - 1) == 0
|
|
||||||
|| strncmp(part_name, "ppt/notesSlides/notesSlide", sizeof("ppt/notesSlides/notesSlide") - 1) == 0
|
|
||||||
// Excel
|
|
||||||
|| strncmp(part_name, "xl/worksheets/sheet", sizeof("xl/worksheets/sheet") - 1) == 0
|
|
||||||
|| strcmp(part_name, "xl/sharedStrings.xml") == 0
|
|
||||||
|| strcmp(part_name, "xl/workbook.xml") == 0
|
|
||||||
) {
|
|
||||||
return TRUE;
|
|
||||||
}
|
|
||||||
|
|
||||||
return FALSE;
|
|
||||||
}
|
|
||||||
|
|
||||||
__always_inline
|
|
||||||
void read_part(opcContainer *c, dyn_buffer_t *buf, opcPart part, document_t *doc) {
|
|
||||||
|
|
||||||
mceTextReader_t reader;
|
|
||||||
int ret = opcXmlReaderOpen(c, &reader, part, NULL, "UTF-8", 0);
|
|
||||||
|
|
||||||
if (ret != OPC_ERROR_NONE) {
|
|
||||||
LOG_ERRORF(doc->filepath, "(doc.c) opcXmlReaderOpen() returned error code %d", ret);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
mce_start_document(&reader) {
|
|
||||||
mce_start_element(&reader, NULL, NULL) {
|
|
||||||
dump_text(&reader, buf);
|
|
||||||
} mce_end_element(&reader);
|
|
||||||
} mce_end_document(&reader);
|
|
||||||
|
|
||||||
mceTextReaderCleanup(&reader);
|
|
||||||
}
|
|
||||||
|
|
||||||
void parse_doc(void *mem, size_t mem_len, document_t *doc) {
|
|
||||||
|
|
||||||
if (mem == NULL) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
opcContainer *c = opcContainerOpenMem(mem, mem_len, OPC_OPEN_READ_ONLY, NULL);
|
|
||||||
if (c == NULL) {
|
|
||||||
LOG_ERROR(doc->filepath, "(doc.c) Couldn't open document with opcContainerOpenMem()");
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
dyn_buffer_t buf = dyn_buffer_create();
|
|
||||||
|
|
||||||
opcPart part = opcPartGetFirst(c);
|
|
||||||
do {
|
|
||||||
if (should_read_part(part)) {
|
|
||||||
read_part(c, &buf, part, doc);
|
|
||||||
}
|
|
||||||
} while ((part = opcPartGetNext(c, part)));
|
|
||||||
|
|
||||||
opcContainerClose(c, OPC_CLOSE_NOW);
|
|
||||||
|
|
||||||
if (buf.cur > 0) {
|
|
||||||
dyn_buffer_write_char(&buf, '\0');
|
|
||||||
|
|
||||||
meta_line_t *meta = malloc(sizeof(meta_line_t) + buf.cur);
|
|
||||||
meta->key = MetaContent;
|
|
||||||
strcpy(meta->strval, buf.buf);
|
|
||||||
APPEND_META(doc, meta)
|
|
||||||
}
|
|
||||||
|
|
||||||
dyn_buffer_destroy(&buf);
|
|
||||||
}
|
|
||||||
@@ -1,8 +0,0 @@
|
|||||||
#ifndef SIST2_DOC_H
|
|
||||||
#define SIST2_DOC_H
|
|
||||||
|
|
||||||
#include "src/sist.h"
|
|
||||||
|
|
||||||
void parse_doc(void *buf, size_t buf_len, document_t *doc);
|
|
||||||
|
|
||||||
#endif
|
|
||||||
@@ -1,226 +0,0 @@
|
|||||||
#include "font.h"
|
|
||||||
|
|
||||||
|
|
||||||
#include "src/ctx.h"
|
|
||||||
|
|
||||||
__thread FT_Library ft_lib = NULL;
|
|
||||||
|
|
||||||
|
|
||||||
typedef struct text_dimensions {
|
|
||||||
unsigned int width;
|
|
||||||
unsigned int height;
|
|
||||||
unsigned int baseline;
|
|
||||||
} text_dimensions_t;
|
|
||||||
|
|
||||||
typedef struct glyph {
|
|
||||||
int top;
|
|
||||||
int height;
|
|
||||||
int width;
|
|
||||||
int descent;
|
|
||||||
int ascent;
|
|
||||||
int advance_width;
|
|
||||||
unsigned char *pixmap;
|
|
||||||
} glyph_t;
|
|
||||||
|
|
||||||
|
|
||||||
__always_inline
|
|
||||||
int kerning_offset(char c, char pc, FT_Face face) {
|
|
||||||
FT_Vector kerning;
|
|
||||||
FT_Get_Kerning(face, c, pc, FT_KERNING_DEFAULT, &kerning);
|
|
||||||
|
|
||||||
return (int) (kerning.x / 64);
|
|
||||||
}
|
|
||||||
|
|
||||||
__always_inline
|
|
||||||
glyph_t ft_glyph_to_glyph(FT_GlyphSlot slot) {
|
|
||||||
glyph_t glyph;
|
|
||||||
|
|
||||||
glyph.pixmap = slot->bitmap.buffer;
|
|
||||||
|
|
||||||
glyph.width = (int) slot->bitmap.width;
|
|
||||||
glyph.height = (int) slot->bitmap.rows;
|
|
||||||
glyph.top = slot->bitmap_top;
|
|
||||||
glyph.advance_width = (int) slot->advance.x / 64;
|
|
||||||
|
|
||||||
glyph.descent = MAX(0, glyph.height - glyph.top);
|
|
||||||
glyph.ascent = MAX(0, MAX(glyph.top, glyph.height) - glyph.descent);
|
|
||||||
|
|
||||||
return glyph;
|
|
||||||
}
|
|
||||||
|
|
||||||
text_dimensions_t text_dimension(char *text, FT_Face face) {
|
|
||||||
text_dimensions_t dimensions;
|
|
||||||
|
|
||||||
dimensions.width = 0;
|
|
||||||
|
|
||||||
int num_chars = (int) strlen(text);
|
|
||||||
|
|
||||||
unsigned int max_ascent = 0;
|
|
||||||
int max_descent = 0;
|
|
||||||
|
|
||||||
char pc = 0;
|
|
||||||
for (int i = 0; i < num_chars; i++) {
|
|
||||||
char c = text[i];
|
|
||||||
|
|
||||||
FT_Load_Char(face, c, 0);
|
|
||||||
glyph_t glyph = ft_glyph_to_glyph(face->glyph);
|
|
||||||
|
|
||||||
max_descent = MAX(max_descent, glyph.descent);
|
|
||||||
max_ascent = MAX(max_ascent, MAX(glyph.height, glyph.ascent));
|
|
||||||
|
|
||||||
int kerning_x = kerning_offset(c, pc, face);
|
|
||||||
dimensions.width += MAX(glyph.advance_width, glyph.width) + kerning_x;
|
|
||||||
|
|
||||||
pc = c;
|
|
||||||
}
|
|
||||||
|
|
||||||
dimensions.height = max_ascent + max_descent;
|
|
||||||
dimensions.baseline = max_descent;
|
|
||||||
|
|
||||||
return dimensions;
|
|
||||||
}
|
|
||||||
|
|
||||||
void draw_glyph(glyph_t *glyph, int x, int y, struct text_dimensions text_info, unsigned char *bitmap) {
|
|
||||||
unsigned int src = 0;
|
|
||||||
unsigned int dst = y * text_info.width + x;
|
|
||||||
unsigned int row_offset = text_info.width - glyph->width;
|
|
||||||
unsigned int buf_len = text_info.width * text_info.height;
|
|
||||||
|
|
||||||
for (unsigned int sy = 0; sy < glyph->height; sy++) {
|
|
||||||
for (unsigned int sx = 0; sx < glyph->width; sx++) {
|
|
||||||
if (dst < buf_len) {
|
|
||||||
bitmap[dst] |= glyph->pixmap[src];
|
|
||||||
}
|
|
||||||
src++;
|
|
||||||
dst++;
|
|
||||||
}
|
|
||||||
dst += row_offset;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void bmp_format(dyn_buffer_t *buf, text_dimensions_t dimensions, const unsigned char *bitmap) {
|
|
||||||
|
|
||||||
dyn_buffer_write_short(buf, 0x4D42); // Magic
|
|
||||||
dyn_buffer_write_int(buf, 0); // Size placeholder
|
|
||||||
dyn_buffer_write_int(buf, 0x5157); //Reserved
|
|
||||||
dyn_buffer_write_int(buf, 14 + 40 + 256 * 4); // pixels offset
|
|
||||||
|
|
||||||
dyn_buffer_write_int(buf, 40); // DIB size
|
|
||||||
dyn_buffer_write_int(buf, (int) dimensions.width);
|
|
||||||
dyn_buffer_write_int(buf, (int) dimensions.height);
|
|
||||||
dyn_buffer_write_short(buf, 1); // Color planes
|
|
||||||
dyn_buffer_write_short(buf, 8); // bits per pixel
|
|
||||||
dyn_buffer_write_int(buf, 0); // compression
|
|
||||||
dyn_buffer_write_int(buf, 0); // Ignored
|
|
||||||
dyn_buffer_write_int(buf, 3800); // hres
|
|
||||||
dyn_buffer_write_int(buf, 3800); // vres
|
|
||||||
dyn_buffer_write_int(buf, 256); // Color count
|
|
||||||
dyn_buffer_write_int(buf, 0); // Ignored
|
|
||||||
|
|
||||||
// RGBA32 Color table (Grayscale)
|
|
||||||
for (int i = 255; i >= 0; i--) {
|
|
||||||
dyn_buffer_write_int(buf, i + (i << 8) + (i << 16));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Pixel array: write from bottom to top, with rows padded to multiples of 4-bytes
|
|
||||||
for (int y = (int) dimensions.height - 1; y >= 0; y--) {
|
|
||||||
for (unsigned int x = 0; x < dimensions.width; x++) {
|
|
||||||
dyn_buffer_write_char(buf, (char) bitmap[y * dimensions.width + x]);
|
|
||||||
}
|
|
||||||
while (buf->cur % 4 != 0) {
|
|
||||||
dyn_buffer_write_char(buf, 0);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Size
|
|
||||||
*(int *) ((char *) buf->buf + 2) = buf->cur;
|
|
||||||
}
|
|
||||||
|
|
||||||
void parse_font(const char *buf, size_t buf_len, document_t *doc) {
|
|
||||||
if (ft_lib == NULL) {
|
|
||||||
FT_Init_FreeType(&ft_lib);
|
|
||||||
}
|
|
||||||
if (buf == NULL) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
FT_Face face;
|
|
||||||
FT_Error err = FT_New_Memory_Face(ft_lib, (unsigned char *) buf, buf_len, 0, &face);
|
|
||||||
if (err != 0) {
|
|
||||||
LOG_ERRORF(doc->filepath, "(font.c) FT_New_Memory_Face() returned error code [%d] %s", err, ft_error_string(err));
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
char font_name[1024];
|
|
||||||
|
|
||||||
if (face->style_name == NULL || *(face->style_name) == '?') {
|
|
||||||
if (face->family_name == NULL) {
|
|
||||||
strcpy(font_name, "(null)");
|
|
||||||
} else {
|
|
||||||
strcpy(font_name, face->family_name);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
snprintf(font_name, sizeof(font_name), "%s %s", face->family_name, face->style_name);
|
|
||||||
}
|
|
||||||
|
|
||||||
meta_line_t *meta_name = malloc(sizeof(meta_line_t) + strlen(font_name));
|
|
||||||
meta_name->key = MetaFontName;
|
|
||||||
strcpy(meta_name->strval, font_name);
|
|
||||||
APPEND_META(doc, meta_name)
|
|
||||||
|
|
||||||
if (ScanCtx.tn_size <= 0) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
int pixel = 64;
|
|
||||||
int num_chars = (int) strlen(font_name);
|
|
||||||
|
|
||||||
err = FT_Set_Pixel_Sizes(face, 0, pixel);
|
|
||||||
if (err != 0) {
|
|
||||||
LOG_WARNINGF(doc->filepath, "(font.c) FT_Set_Pixel_Sizes() returned error code [%d] %s", err, ft_error_string(err))
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
text_dimensions_t dimensions = text_dimension(font_name, face);
|
|
||||||
unsigned char *bitmap = calloc(dimensions.width * dimensions.height, 1);
|
|
||||||
|
|
||||||
FT_Vector pen;
|
|
||||||
pen.x = 0;
|
|
||||||
|
|
||||||
char pc = 0;
|
|
||||||
for (int i = 0; i < num_chars; i++) {
|
|
||||||
char c = font_name[i];
|
|
||||||
|
|
||||||
err = FT_Load_Char(face, c, FT_LOAD_NO_HINTING | FT_LOAD_RENDER);
|
|
||||||
if (err != 0) {
|
|
||||||
c = c >= 'a' && c <= 'z' ? c - 32 : c + 32;
|
|
||||||
err = FT_Load_Char(face, c, FT_LOAD_NO_HINTING | FT_LOAD_RENDER);
|
|
||||||
if (err != 0) {
|
|
||||||
LOG_WARNINGF(doc->filepath, "(font.c) FT_Load_Char() returned error code [%d] %s", err, ft_error_string(err));
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
glyph_t glyph = ft_glyph_to_glyph(face->glyph);
|
|
||||||
|
|
||||||
pen.x += kerning_offset(c, pc, face);
|
|
||||||
if (pen.x <= 0) {
|
|
||||||
pen.x = ABS(glyph.advance_width - glyph.width);
|
|
||||||
}
|
|
||||||
pen.y = dimensions.height - glyph.ascent - dimensions.baseline;
|
|
||||||
|
|
||||||
draw_glyph(&glyph, pen.x, pen.y, dimensions, bitmap);
|
|
||||||
|
|
||||||
pen.x += glyph.advance_width;
|
|
||||||
pc = c;
|
|
||||||
}
|
|
||||||
|
|
||||||
dyn_buffer_t bmp_data = dyn_buffer_create();
|
|
||||||
bmp_format(&bmp_data, dimensions, bitmap);
|
|
||||||
|
|
||||||
store_write(ScanCtx.index.store, (char *) doc->uuid, sizeof(doc->uuid), (char *) bmp_data.buf, bmp_data.cur);
|
|
||||||
|
|
||||||
dyn_buffer_destroy(&bmp_data);
|
|
||||||
free(bitmap);
|
|
||||||
|
|
||||||
FT_Done_Face(face);
|
|
||||||
}
|
|
||||||
@@ -1,9 +0,0 @@
|
|||||||
#ifndef SIST2_FONT_H
|
|
||||||
#define SIST2_FONT_H
|
|
||||||
|
|
||||||
#include "src/sist.h"
|
|
||||||
|
|
||||||
|
|
||||||
void parse_font(const char * buf, size_t buf_len, document_t *doc);
|
|
||||||
|
|
||||||
#endif
|
|
||||||
@@ -1,381 +0,0 @@
|
|||||||
#include "src/sist.h"
|
|
||||||
#include "src/ctx.h"
|
|
||||||
|
|
||||||
#define MIN_SIZE 32
|
|
||||||
#define AVIO_BUF_SIZE 8192
|
|
||||||
|
|
||||||
__always_inline
|
|
||||||
AVCodecContext *alloc_jpeg_encoder(int dstW, int dstH, float qscale) {
|
|
||||||
|
|
||||||
AVCodec *jpeg_codec = avcodec_find_encoder(AV_CODEC_ID_MJPEG);
|
|
||||||
AVCodecContext *jpeg = avcodec_alloc_context3(jpeg_codec);
|
|
||||||
jpeg->width = dstW;
|
|
||||||
jpeg->height = dstH;
|
|
||||||
jpeg->time_base.den = 1000000;
|
|
||||||
jpeg->time_base.num = 1;
|
|
||||||
jpeg->i_quant_factor = qscale;
|
|
||||||
|
|
||||||
jpeg->pix_fmt = AV_PIX_FMT_YUVJ420P;
|
|
||||||
int ret = avcodec_open2(jpeg, jpeg_codec, NULL);
|
|
||||||
|
|
||||||
if (ret != 0) {
|
|
||||||
printf("Could not open jpeg encoder: %s!\n", av_err2str(ret));
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
return jpeg;
|
|
||||||
}
|
|
||||||
|
|
||||||
__always_inline
|
|
||||||
AVFrame *scale_frame(const AVCodecContext *decoder, const AVFrame *frame, int size) {
|
|
||||||
|
|
||||||
int dstW;
|
|
||||||
int dstH;
|
|
||||||
if (frame->width <= size && frame->height <= size) {
|
|
||||||
dstW = frame->width;
|
|
||||||
dstH = frame->height;
|
|
||||||
} else {
|
|
||||||
double ratio = (double) frame->width / frame->height;
|
|
||||||
if (frame->width > frame->height) {
|
|
||||||
dstW = size;
|
|
||||||
dstH = (int) (size / ratio);
|
|
||||||
} else {
|
|
||||||
dstW = (int) (size * ratio);
|
|
||||||
dstH = size;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (dstW <= MIN_SIZE || dstH <= MIN_SIZE) {
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
AVFrame *scaled_frame = av_frame_alloc();
|
|
||||||
|
|
||||||
struct SwsContext *ctx = sws_getContext(
|
|
||||||
decoder->width, decoder->height, decoder->pix_fmt,
|
|
||||||
dstW, dstH, AV_PIX_FMT_YUVJ420P,
|
|
||||||
SWS_FAST_BILINEAR, 0, 0, 0
|
|
||||||
);
|
|
||||||
|
|
||||||
int dst_buf_len = av_image_get_buffer_size(AV_PIX_FMT_YUV420P, dstW, dstH, 1);
|
|
||||||
uint8_t *dst_buf = (uint8_t *) av_malloc(dst_buf_len);
|
|
||||||
|
|
||||||
av_image_fill_arrays(scaled_frame->data, scaled_frame->linesize, dst_buf, AV_PIX_FMT_YUV420P, dstW, dstH, 1);
|
|
||||||
|
|
||||||
sws_scale(ctx,
|
|
||||||
(const uint8_t *const *) frame->data, frame->linesize,
|
|
||||||
0, decoder->height,
|
|
||||||
scaled_frame->data, scaled_frame->linesize
|
|
||||||
);
|
|
||||||
|
|
||||||
scaled_frame->width = dstW;
|
|
||||||
scaled_frame->height = dstH;
|
|
||||||
scaled_frame->format = AV_PIX_FMT_YUV420P;
|
|
||||||
|
|
||||||
sws_freeContext(ctx);
|
|
||||||
|
|
||||||
return scaled_frame;
|
|
||||||
}
|
|
||||||
|
|
||||||
__always_inline
|
|
||||||
AVFrame *read_frame(AVFormatContext *pFormatCtx, AVCodecContext *decoder, int stream_idx, document_t *doc) {
|
|
||||||
AVFrame *frame = av_frame_alloc();
|
|
||||||
|
|
||||||
AVPacket avPacket;
|
|
||||||
av_init_packet(&avPacket);
|
|
||||||
|
|
||||||
int receive_ret = -EAGAIN;
|
|
||||||
while (receive_ret == -EAGAIN) {
|
|
||||||
// Get video frame
|
|
||||||
while (1) {
|
|
||||||
int read_frame_ret = av_read_frame(pFormatCtx, &avPacket);
|
|
||||||
|
|
||||||
if (read_frame_ret != 0) {
|
|
||||||
if (read_frame_ret != AVERROR_EOF) {
|
|
||||||
LOG_WARNINGF(doc->filepath,
|
|
||||||
"(media.c) avcodec_read_frame() returned error code [%d] %s",
|
|
||||||
read_frame_ret, av_err2str(read_frame_ret)
|
|
||||||
)
|
|
||||||
}
|
|
||||||
av_frame_free(&frame);
|
|
||||||
av_packet_unref(&avPacket);
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
//Ignore audio/other frames
|
|
||||||
if (avPacket.stream_index != stream_idx) {
|
|
||||||
av_packet_unref(&avPacket);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Feed it to decoder
|
|
||||||
int decode_ret = avcodec_send_packet(decoder, &avPacket);
|
|
||||||
if (decode_ret != 0) {
|
|
||||||
LOG_WARNINGF(doc->filepath,
|
|
||||||
"(media.c) avcodec_send_packet() returned error code [%d] %s",
|
|
||||||
decode_ret, av_err2str(decode_ret)
|
|
||||||
)
|
|
||||||
}
|
|
||||||
av_packet_unref(&avPacket);
|
|
||||||
receive_ret = avcodec_receive_frame(decoder, frame);
|
|
||||||
}
|
|
||||||
return frame;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define APPEND_TAG_META(doc, tag_, keyname) \
|
|
||||||
text_buffer_t tex = text_buffer_create(-1); \
|
|
||||||
text_buffer_append_string0(&tex, tag_->value); \
|
|
||||||
text_buffer_terminate_string(&tex); \
|
|
||||||
meta_line_t *meta_tag = malloc(sizeof(meta_line_t) + tex.dyn_buffer.cur); \
|
|
||||||
meta_tag->key = keyname; \
|
|
||||||
strcpy(meta_tag->strval, tex.dyn_buffer.buf); \
|
|
||||||
APPEND_META(doc, meta_tag) \
|
|
||||||
text_buffer_destroy(&tex);
|
|
||||||
|
|
||||||
__always_inline
|
|
||||||
void append_audio_meta(AVFormatContext *pFormatCtx, document_t *doc) {
|
|
||||||
|
|
||||||
AVDictionaryEntry *tag = NULL;
|
|
||||||
while ((tag = av_dict_get(pFormatCtx->metadata, "", tag, AV_DICT_IGNORE_SUFFIX))) {
|
|
||||||
char key[32];
|
|
||||||
strncpy(key, tag->key, sizeof(key));
|
|
||||||
|
|
||||||
char *ptr = key;
|
|
||||||
for (; *ptr; ++ptr) *ptr = (char) tolower(*ptr);
|
|
||||||
|
|
||||||
if (strcmp(key, "artist") == 0) {
|
|
||||||
APPEND_TAG_META(doc, tag, MetaArtist)
|
|
||||||
} else if (strcmp(key, "genre") == 0) {
|
|
||||||
APPEND_TAG_META(doc, tag, MetaGenre)
|
|
||||||
} else if (strcmp(key, "title") == 0) {
|
|
||||||
APPEND_TAG_META(doc, tag, MetaTitle)
|
|
||||||
} else if (strcmp(key, "album_artist") == 0) {
|
|
||||||
APPEND_TAG_META(doc, tag, MetaAlbumArtist)
|
|
||||||
} else if (strcmp(key, "album") == 0) {
|
|
||||||
APPEND_TAG_META(doc, tag, MetaAlbum)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
__always_inline
|
|
||||||
void
|
|
||||||
append_video_meta(AVFormatContext *pFormatCtx, AVFrame *frame, document_t *doc, int include_audio_tags, int is_video) {
|
|
||||||
|
|
||||||
if (is_video) {
|
|
||||||
meta_line_t *meta_duration = malloc(sizeof(meta_line_t));
|
|
||||||
meta_duration->key = MetaMediaDuration;
|
|
||||||
meta_duration->longval = pFormatCtx->duration / AV_TIME_BASE;
|
|
||||||
APPEND_META(doc, meta_duration)
|
|
||||||
|
|
||||||
meta_line_t *meta_bitrate = malloc(sizeof(meta_line_t));
|
|
||||||
meta_bitrate->key = MetaMediaBitrate;
|
|
||||||
meta_bitrate->longval = pFormatCtx->bit_rate;
|
|
||||||
APPEND_META(doc, meta_bitrate)
|
|
||||||
}
|
|
||||||
|
|
||||||
AVDictionaryEntry *tag = NULL;
|
|
||||||
if (is_video) {
|
|
||||||
while ((tag = av_dict_get(pFormatCtx->metadata, "", tag, AV_DICT_IGNORE_SUFFIX))) {
|
|
||||||
if (include_audio_tags && strcmp(tag->key, "title") == 0) {
|
|
||||||
APPEND_TAG_META(doc, tag, MetaTitle)
|
|
||||||
} else if (strcmp(tag->key, "comment") == 0) {
|
|
||||||
APPEND_TAG_META(doc, tag, MetaContent)
|
|
||||||
} else if (include_audio_tags && strcmp(tag->key, "artist") == 0) {
|
|
||||||
APPEND_TAG_META(doc, tag, MetaArtist)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// EXIF metadata
|
|
||||||
while ((tag = av_dict_get(frame->metadata, "", tag, AV_DICT_IGNORE_SUFFIX))) {
|
|
||||||
if (include_audio_tags && strcmp(tag->key, "Artist") == 0) {
|
|
||||||
APPEND_TAG_META(doc, tag, MetaArtist)
|
|
||||||
} else if (strcmp(tag->key, "ImageDescription") == 0) {
|
|
||||||
APPEND_TAG_META(doc, tag, MetaContent)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void parse_media(AVFormatContext *pFormatCtx, document_t *doc) {
|
|
||||||
|
|
||||||
int video_stream = -1;
|
|
||||||
int audio_stream = -1;
|
|
||||||
|
|
||||||
avformat_find_stream_info(pFormatCtx, NULL);
|
|
||||||
|
|
||||||
for (int i = (int) pFormatCtx->nb_streams - 1; i >= 0; i--) {
|
|
||||||
AVStream *stream = pFormatCtx->streams[i];
|
|
||||||
|
|
||||||
if (stream->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) {
|
|
||||||
if (audio_stream == -1) {
|
|
||||||
meta_line_t *meta_audio = malloc(sizeof(meta_line_t));
|
|
||||||
meta_audio->key = MetaMediaAudioCodec;
|
|
||||||
meta_audio->intval = stream->codecpar->codec_id;
|
|
||||||
APPEND_META(doc, meta_audio)
|
|
||||||
|
|
||||||
append_audio_meta(pFormatCtx, doc);
|
|
||||||
audio_stream = i;
|
|
||||||
}
|
|
||||||
} else if (stream->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) {
|
|
||||||
|
|
||||||
if (video_stream == -1) {
|
|
||||||
meta_line_t *meta_vid = malloc(sizeof(meta_line_t));
|
|
||||||
meta_vid->key = MetaMediaVideoCodec;
|
|
||||||
meta_vid->intval = stream->codecpar->codec_id;
|
|
||||||
APPEND_META(doc, meta_vid)
|
|
||||||
|
|
||||||
meta_line_t *meta_w = malloc(sizeof(meta_line_t));
|
|
||||||
meta_w->key = MetaWidth;
|
|
||||||
meta_w->intval = stream->codecpar->width;
|
|
||||||
APPEND_META(doc, meta_w)
|
|
||||||
|
|
||||||
meta_line_t *meta_h = malloc(sizeof(meta_line_t));
|
|
||||||
meta_h->key = MetaHeight;
|
|
||||||
meta_h->intval = stream->codecpar->height;
|
|
||||||
APPEND_META(doc, meta_h)
|
|
||||||
|
|
||||||
video_stream = i;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (video_stream != -1 && ScanCtx.tn_size > 0) {
|
|
||||||
AVStream *stream = pFormatCtx->streams[video_stream];
|
|
||||||
|
|
||||||
if (stream->codecpar->width <= MIN_SIZE || stream->codecpar->height <= MIN_SIZE) {
|
|
||||||
avformat_close_input(&pFormatCtx);
|
|
||||||
avformat_free_context(pFormatCtx);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Decoder
|
|
||||||
AVCodec *video_codec = avcodec_find_decoder(stream->codecpar->codec_id);
|
|
||||||
AVCodecContext *decoder = avcodec_alloc_context3(video_codec);
|
|
||||||
avcodec_parameters_to_context(decoder, stream->codecpar);
|
|
||||||
avcodec_open2(decoder, video_codec, NULL);
|
|
||||||
|
|
||||||
//Seek
|
|
||||||
if (stream->nb_frames > 1 && stream->codecpar->codec_id != AV_CODEC_ID_GIF) {
|
|
||||||
int seek_ret = 0;
|
|
||||||
for (int i = 20; i >= 0; i--) {
|
|
||||||
seek_ret = av_seek_frame(pFormatCtx, video_stream,
|
|
||||||
stream->duration * 0.10, 0);
|
|
||||||
if (seek_ret == 0) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
AVFrame *frame = read_frame(pFormatCtx, decoder, video_stream, doc);
|
|
||||||
if (frame == NULL) {
|
|
||||||
avcodec_free_context(&decoder);
|
|
||||||
avformat_close_input(&pFormatCtx);
|
|
||||||
avformat_free_context(pFormatCtx);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
append_video_meta(pFormatCtx, frame, doc, audio_stream == -1, stream->nb_frames > 1);
|
|
||||||
|
|
||||||
// Scale frame
|
|
||||||
AVFrame *scaled_frame = scale_frame(decoder, frame, ScanCtx.tn_size);
|
|
||||||
|
|
||||||
if (scaled_frame == NULL) {
|
|
||||||
av_frame_free(&frame);
|
|
||||||
avcodec_free_context(&decoder);
|
|
||||||
avformat_close_input(&pFormatCtx);
|
|
||||||
avformat_free_context(pFormatCtx);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Encode frame to jpeg
|
|
||||||
AVCodecContext *jpeg_encoder = alloc_jpeg_encoder(scaled_frame->width, scaled_frame->height, ScanCtx.tn_qscale);
|
|
||||||
avcodec_send_frame(jpeg_encoder, scaled_frame);
|
|
||||||
|
|
||||||
AVPacket jpeg_packet;
|
|
||||||
av_init_packet(&jpeg_packet);
|
|
||||||
avcodec_receive_packet(jpeg_encoder, &jpeg_packet);
|
|
||||||
|
|
||||||
// Save thumbnail
|
|
||||||
store_write(ScanCtx.index.store, (char *) doc->uuid, sizeof(doc->uuid), (char *) jpeg_packet.data,
|
|
||||||
jpeg_packet.size);
|
|
||||||
|
|
||||||
av_packet_unref(&jpeg_packet);
|
|
||||||
av_frame_free(&frame);
|
|
||||||
av_free(*scaled_frame->data);
|
|
||||||
av_frame_free(&scaled_frame);
|
|
||||||
avcodec_free_context(&jpeg_encoder);
|
|
||||||
avcodec_free_context(&decoder);
|
|
||||||
}
|
|
||||||
|
|
||||||
avformat_close_input(&pFormatCtx);
|
|
||||||
avformat_free_context(pFormatCtx);
|
|
||||||
}
|
|
||||||
|
|
||||||
void parse_media_filename(const char *filepath, document_t *doc) {
|
|
||||||
|
|
||||||
AVFormatContext *pFormatCtx = avformat_alloc_context();
|
|
||||||
if (pFormatCtx == NULL) {
|
|
||||||
LOG_ERROR(doc->filepath, "(media.c) Could not allocate context with avformat_alloc_context()")
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
int res = avformat_open_input(&pFormatCtx, filepath, NULL, NULL);
|
|
||||||
if (res < 0) {
|
|
||||||
LOG_ERRORF(doc->filepath, "(media.c) avformat_open_input() returned [%d] %s", res, av_err2str(res))
|
|
||||||
avformat_close_input(&pFormatCtx);
|
|
||||||
avformat_free_context(pFormatCtx);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
parse_media(pFormatCtx, doc);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
int vfile_read(void *ptr, uint8_t *buf, int buf_size) {
|
|
||||||
struct vfile *f = ptr;
|
|
||||||
|
|
||||||
int ret = f->read(f, buf, buf_size);
|
|
||||||
|
|
||||||
if (ret == 0) {
|
|
||||||
return AVERROR_EOF;
|
|
||||||
}
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
void parse_media_vfile(struct vfile *f, document_t *doc) {
|
|
||||||
|
|
||||||
AVFormatContext *pFormatCtx = avformat_alloc_context();
|
|
||||||
if (pFormatCtx == NULL) {
|
|
||||||
LOG_ERROR(doc->filepath, "(media.c) Could not allocate context with avformat_alloc_context()")
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
unsigned char *buffer = (unsigned char *) av_malloc(AVIO_BUF_SIZE);
|
|
||||||
AVIOContext *io_ctx = avio_alloc_context(buffer, AVIO_BUF_SIZE, 0, f, vfile_read, NULL, NULL);
|
|
||||||
|
|
||||||
pFormatCtx->pb = io_ctx;
|
|
||||||
pFormatCtx->flags |= AVFMT_FLAG_CUSTOM_IO;
|
|
||||||
|
|
||||||
int res = avformat_open_input(&pFormatCtx, "", NULL, NULL);
|
|
||||||
if (res == -5) {
|
|
||||||
// Tried to parse media that requires seek
|
|
||||||
av_free(io_ctx->buffer);
|
|
||||||
avio_context_free(&io_ctx);
|
|
||||||
avformat_close_input(&pFormatCtx);
|
|
||||||
avformat_free_context(pFormatCtx);
|
|
||||||
return;
|
|
||||||
} else if (res < 0) {
|
|
||||||
LOG_ERRORF(doc->filepath, "(media.c) avformat_open_input() returned [%d] %s", res, av_err2str(res))
|
|
||||||
av_free(io_ctx->buffer);
|
|
||||||
avio_context_free(&io_ctx);
|
|
||||||
avformat_close_input(&pFormatCtx);
|
|
||||||
avformat_free_context(pFormatCtx);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
parse_media(pFormatCtx, doc);
|
|
||||||
av_free(io_ctx->buffer);
|
|
||||||
avio_context_free(&io_ctx);
|
|
||||||
}
|
|
||||||
|
|
||||||
@@ -1,14 +0,0 @@
|
|||||||
#ifndef SIST2_MEDIA_H
|
|
||||||
#define SIST2_MEDIA_H
|
|
||||||
|
|
||||||
|
|
||||||
#include "src/sist.h"
|
|
||||||
|
|
||||||
#define MIN_VIDEO_SIZE 1024 * 64
|
|
||||||
#define MIN_IMAGE_SIZE 1024 * 2
|
|
||||||
|
|
||||||
void parse_media_filename(const char * filepath, document_t *doc);
|
|
||||||
|
|
||||||
void parse_media_vfile(struct vfile *f, document_t *doc);
|
|
||||||
|
|
||||||
#endif
|
|
||||||
@@ -1,14 +1,14 @@
|
|||||||
#ifndef SIST2_MIME_H
|
#ifndef SIST2_MIME_H
|
||||||
#define SIST2_MIME_H
|
#define SIST2_MIME_H
|
||||||
|
|
||||||
#include "src/sist.h"
|
#include "../sist.h"
|
||||||
|
|
||||||
#define MAJOR_MIME(mime_id) (mime_id & 0x0FFF0000) >> 16
|
#define MAJOR_MIME(mime_id) (mime_id & 0x000F0000) >> 16
|
||||||
|
|
||||||
#define MIME_EMPTY 1
|
#define MIME_EMPTY 1
|
||||||
|
|
||||||
#define DONT_PARSE 0x80000000
|
#define DONT_PARSE 0x80000000
|
||||||
#define SHOULD_PARSE(mime_id) (mime_id & DONT_PARSE) != DONT_PARSE && mime_id != 0
|
#define SHOULD_PARSE(mime_id) (ScanCtx.fast == 0 && (mime_id & DONT_PARSE) != DONT_PARSE && mime_id != 0)
|
||||||
|
|
||||||
#define PDF_MASK 0x40000000
|
#define PDF_MASK 0x40000000
|
||||||
#define IS_PDF(mime_id) (mime_id & PDF_MASK) == PDF_MASK
|
#define IS_PDF(mime_id) (mime_id & PDF_MASK) == PDF_MASK
|
||||||
@@ -25,6 +25,15 @@
|
|||||||
#define DOC_MASK 0x04000000
|
#define DOC_MASK 0x04000000
|
||||||
#define IS_DOC(mime_id) (mime_id & DOC_MASK) == DOC_MASK
|
#define IS_DOC(mime_id) (mime_id & DOC_MASK) == DOC_MASK
|
||||||
|
|
||||||
|
#define MOBI_MASK 0x02000000
|
||||||
|
#define IS_MOBI(mime_id) (mime_id & MOBI_MASK) == MOBI_MASK
|
||||||
|
|
||||||
|
#define MARKUP_MASK 0x01000000
|
||||||
|
#define IS_MARKUP(mime_id) (mime_id & MARKUP_MASK) == MARKUP_MASK
|
||||||
|
|
||||||
|
#define RAW_MASK 0x00800000
|
||||||
|
#define IS_RAW(mime_id) (mime_id & RAW_MASK) == RAW_MASK
|
||||||
|
|
||||||
enum major_mime {
|
enum major_mime {
|
||||||
MimeInvalid = 0,
|
MimeInvalid = 0,
|
||||||
MimeModel = 1,
|
MimeModel = 1,
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -1,7 +1,15 @@
|
|||||||
|
#include "parse.h"
|
||||||
|
|
||||||
#include "src/sist.h"
|
#include "src/sist.h"
|
||||||
#include "src/ctx.h"
|
#include "src/ctx.h"
|
||||||
|
#include "mime.h"
|
||||||
|
#include "src/io/serialize.h"
|
||||||
|
|
||||||
__thread magic_t Magic = NULL;
|
#include <magic.h>
|
||||||
|
|
||||||
|
|
||||||
|
#define MIN_VIDEO_SIZE 1024 * 64
|
||||||
|
#define MIN_IMAGE_SIZE 1024 * 2
|
||||||
|
|
||||||
int fs_read(struct vfile *f, void *buf, size_t size) {
|
int fs_read(struct vfile *f, void *buf, size_t size) {
|
||||||
|
|
||||||
@@ -24,54 +32,37 @@ void fs_close(struct vfile *f) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void *read_all(parse_job_t *job, const char *buf, int bytes_read) {
|
void fs_reset(struct vfile *f) {
|
||||||
|
if (f->fd != -1) {
|
||||||
void *full_buf;
|
lseek(f->fd, 0, SEEK_SET);
|
||||||
|
|
||||||
if (job->info.st_size <= bytes_read) {
|
|
||||||
full_buf = malloc(job->info.st_size);
|
|
||||||
memcpy(full_buf, buf, job->info.st_size);
|
|
||||||
} else {
|
|
||||||
full_buf = malloc(job->info.st_size);
|
|
||||||
memcpy(full_buf, buf, bytes_read);
|
|
||||||
|
|
||||||
int ret = job->vfile.read(&job->vfile, full_buf + bytes_read, job->info.st_size - bytes_read);
|
|
||||||
if (ret == -1) {
|
|
||||||
LOG_ERRORF(job->filepath, "read(): [%d] %s", errno, strerror(errno))
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return full_buf;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define IS_GIT_OBJ (strlen(doc.filepath + doc.base) == 38 && (strstr(doc.filepath, "objects") != NULL))
|
||||||
|
|
||||||
void parse(void *arg) {
|
void parse(void *arg) {
|
||||||
|
|
||||||
parse_job_t *job = arg;
|
parse_job_t *job = arg;
|
||||||
document_t doc;
|
document_t doc;
|
||||||
|
|
||||||
int inc_ts = incremental_get(ScanCtx.original_table, job->info.st_ino);
|
int inc_ts = incremental_get(ScanCtx.original_table, job->vfile.info.st_ino);
|
||||||
if (inc_ts != 0 && inc_ts == job->info.st_mtim.tv_sec) {
|
if (inc_ts != 0 && inc_ts == job->vfile.info.st_mtim.tv_sec) {
|
||||||
incremental_mark_file_for_copy(ScanCtx.copy_table, job->info.st_ino);
|
incremental_mark_file_for_copy(ScanCtx.copy_table, job->vfile.info.st_ino);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (Magic == NULL) {
|
|
||||||
Magic = magic_open(MAGIC_MIME_TYPE);
|
|
||||||
}
|
|
||||||
|
|
||||||
doc.filepath = job->filepath;
|
doc.filepath = job->filepath;
|
||||||
doc.ext = (short) job->ext;
|
doc.ext = (short) job->ext;
|
||||||
doc.base = (short) job->base;
|
doc.base = (short) job->base;
|
||||||
doc.meta_head = NULL;
|
doc.meta_head = NULL;
|
||||||
doc.meta_tail = NULL;
|
doc.meta_tail = NULL;
|
||||||
doc.mime = 0;
|
doc.mime = 0;
|
||||||
doc.size = job->info.st_size;
|
doc.size = job->vfile.info.st_size;
|
||||||
doc.ino = job->info.st_ino;
|
doc.ino = job->vfile.info.st_ino;
|
||||||
doc.mtime = job->info.st_mtim.tv_sec;
|
doc.mtime = job->vfile.info.st_mtim.tv_sec;
|
||||||
|
|
||||||
uuid_generate(doc.uuid);
|
uuid_generate(doc.uuid);
|
||||||
char *buf[PARSE_BUF_SIZE];
|
char *buf[MAGIC_BUF_SIZE];
|
||||||
|
|
||||||
if (LogCtx.very_verbose) {
|
if (LogCtx.very_verbose) {
|
||||||
char uuid_str[UUID_STR_LEN];
|
char uuid_str[UUID_STR_LEN];
|
||||||
@@ -79,7 +70,7 @@ void parse(void *arg) {
|
|||||||
LOG_DEBUGF(job->filepath, "Starting parse job {%s}", uuid_str)
|
LOG_DEBUGF(job->filepath, "Starting parse job {%s}", uuid_str)
|
||||||
}
|
}
|
||||||
|
|
||||||
if (job->info.st_size == 0) {
|
if (job->vfile.info.st_size == 0) {
|
||||||
doc.mime = MIME_EMPTY;
|
doc.mime = MIME_EMPTY;
|
||||||
} else if (*(job->filepath + job->ext) != '\0' && (job->ext - job->base != 1)) {
|
} else if (*(job->filepath + job->ext) != '\0' && (job->ext - job->base != 1)) {
|
||||||
doc.mime = mime_get_mime_by_ext(ScanCtx.ext_table, job->filepath + job->ext);
|
doc.mime = mime_get_mime_by_ext(ScanCtx.ext_table, job->filepath + job->ext);
|
||||||
@@ -87,78 +78,94 @@ void parse(void *arg) {
|
|||||||
|
|
||||||
int bytes_read = 0;
|
int bytes_read = 0;
|
||||||
|
|
||||||
if (doc.mime == 0) {
|
if (doc.mime == 0 && !ScanCtx.fast) {
|
||||||
|
if (IS_GIT_OBJ) {
|
||||||
|
goto abort;
|
||||||
|
}
|
||||||
|
|
||||||
// Get mime type with libmagic
|
// Get mime type with libmagic
|
||||||
bytes_read = job->vfile.read(&job->vfile, buf, PARSE_BUF_SIZE);
|
if (!job->vfile.is_fs_file) {
|
||||||
if (bytes_read == -1) {
|
LOG_WARNING(job->filepath, "Guessing mime type with libmagic inside archive files is not currently supported");
|
||||||
LOG_WARNINGF(job->filepath, "read() Error: %s", strerror(errno))
|
goto abort;
|
||||||
|
}
|
||||||
|
|
||||||
|
bytes_read = job->vfile.read(&job->vfile, buf, MAGIC_BUF_SIZE);
|
||||||
|
if (bytes_read < 0) {
|
||||||
|
|
||||||
|
if (job->vfile.is_fs_file) {
|
||||||
|
LOG_ERRORF(job->filepath, "read(): [%d] %s", errno, strerror(errno))
|
||||||
|
} else {
|
||||||
|
LOG_ERRORF(job->filepath, "(virtual) read(): [%d] %s", bytes_read, archive_error_string(job->vfile.arc))
|
||||||
|
}
|
||||||
|
|
||||||
CLOSE_FILE(job->vfile)
|
CLOSE_FILE(job->vfile)
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
const char *magic_mime_str = magic_buffer(Magic, buf, bytes_read);
|
magic_t magic = magic_open(MAGIC_MIME_TYPE);
|
||||||
|
magic_load(magic, NULL);
|
||||||
|
|
||||||
|
const char *magic_mime_str = magic_buffer(magic, buf, bytes_read);
|
||||||
if (magic_mime_str != NULL) {
|
if (magic_mime_str != NULL) {
|
||||||
doc.mime = mime_get_mime_by_string(ScanCtx.mime_table, magic_mime_str);
|
doc.mime = mime_get_mime_by_string(ScanCtx.mime_table, magic_mime_str);
|
||||||
|
|
||||||
|
LOG_DEBUGF(job->filepath, "libmagic: %s", magic_mime_str);
|
||||||
|
|
||||||
if (doc.mime == 0) {
|
if (doc.mime == 0) {
|
||||||
LOG_WARNINGF(job->filepath, "Couldn't find mime %s", magic_mime_str);
|
LOG_WARNINGF(job->filepath, "Couldn't find mime %s", magic_mime_str);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
job->vfile.reset(&job->vfile);
|
||||||
|
|
||||||
|
magic_close(magic);
|
||||||
}
|
}
|
||||||
|
|
||||||
int mmime = MAJOR_MIME(doc.mime);
|
int mmime = MAJOR_MIME(doc.mime);
|
||||||
|
|
||||||
if (!(SHOULD_PARSE(doc.mime))) {
|
if (!(SHOULD_PARSE(doc.mime))) {
|
||||||
|
|
||||||
|
} else if (IS_RAW(doc.mime)) {
|
||||||
|
parse_raw(&ScanCtx.raw_ctx, &job->vfile, &doc);
|
||||||
} else if ((mmime == MimeVideo && doc.size >= MIN_VIDEO_SIZE) ||
|
} else if ((mmime == MimeVideo && doc.size >= MIN_VIDEO_SIZE) ||
|
||||||
(mmime == MimeImage && doc.size >= MIN_IMAGE_SIZE) || mmime == MimeAudio) {
|
(mmime == MimeImage && doc.size >= MIN_IMAGE_SIZE) || mmime == MimeAudio) {
|
||||||
|
|
||||||
if (job->vfile.is_fs_file) {
|
parse_media(&ScanCtx.media_ctx, &job->vfile, &doc);
|
||||||
parse_media_filename(job->filepath, &doc);
|
|
||||||
} else {
|
|
||||||
parse_media_vfile(&job->vfile, &doc);
|
|
||||||
}
|
|
||||||
|
|
||||||
} else if (IS_PDF(doc.mime)) {
|
} else if (IS_PDF(doc.mime)) {
|
||||||
void *pdf_buf = read_all(job, (char *) buf, bytes_read);
|
parse_ebook(&ScanCtx.ebook_ctx, &job->vfile, mime_get_mime_text(doc.mime), &doc);
|
||||||
parse_pdf(pdf_buf, doc.size, &doc);
|
|
||||||
|
|
||||||
if (pdf_buf != buf && pdf_buf != NULL) {
|
} else if (mmime == MimeText && ScanCtx.text_ctx.content_size > 0) {
|
||||||
free(pdf_buf);
|
if (IS_MARKUP(doc.mime)) {
|
||||||
|
parse_markup(&ScanCtx.text_ctx, &job->vfile, &doc);
|
||||||
|
} else {
|
||||||
|
parse_text(&ScanCtx.text_ctx, &job->vfile, &doc);
|
||||||
}
|
}
|
||||||
|
|
||||||
} else if (mmime == MimeText && ScanCtx.content_size > 0) {
|
|
||||||
parse_text(bytes_read, &job->vfile, (char *) buf, &doc);
|
|
||||||
|
|
||||||
} else if (IS_FONT(doc.mime)) {
|
} else if (IS_FONT(doc.mime)) {
|
||||||
void *font_buf = read_all(job, (char *) buf, bytes_read);
|
parse_font(&ScanCtx.font_ctx, &job->vfile, &doc);
|
||||||
parse_font(font_buf, doc.size, &doc);
|
|
||||||
|
|
||||||
if (font_buf != buf && font_buf != NULL) {
|
|
||||||
free(font_buf);
|
|
||||||
}
|
|
||||||
} else if (
|
} else if (
|
||||||
ScanCtx.archive_mode != ARC_MODE_SKIP && (
|
ScanCtx.arc_ctx.mode != ARC_MODE_SKIP && (
|
||||||
IS_ARC(doc.mime) ||
|
IS_ARC(doc.mime) ||
|
||||||
(IS_ARC_FILTER(doc.mime) && should_parse_filtered_file(doc.filepath, doc.ext))
|
(IS_ARC_FILTER(doc.mime) && should_parse_filtered_file(doc.filepath, doc.ext))
|
||||||
)) {
|
)) {
|
||||||
parse_archive(&job->vfile, &doc);
|
parse_archive(&ScanCtx.arc_ctx, &job->vfile, &doc);
|
||||||
} else if (ScanCtx.content_size > 0 && IS_DOC(doc.mime)) {
|
} else if ((ScanCtx.ooxml_ctx.content_size > 0 || ScanCtx.media_ctx.tn_size > 0) && IS_DOC(doc.mime)) {
|
||||||
void *doc_buf = read_all(job, (char *) buf, bytes_read);
|
parse_ooxml(&ScanCtx.ooxml_ctx, &job->vfile, &doc);
|
||||||
parse_doc(doc_buf, doc.size, &doc);
|
} else if (is_cbr(&ScanCtx.comic_ctx, doc.mime) || is_cbz(&ScanCtx.comic_ctx, doc.mime)) {
|
||||||
|
parse_comic(&ScanCtx.comic_ctx, &job->vfile, &doc);
|
||||||
if (doc_buf != buf && doc_buf != NULL) {
|
} else if (IS_MOBI(doc.mime)) {
|
||||||
free(doc_buf);
|
parse_mobi(&ScanCtx.mobi_ctx, &job->vfile, &doc);
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
abort:
|
||||||
|
|
||||||
//Parent meta
|
//Parent meta
|
||||||
if (!uuid_is_null(job->parent)) {
|
if (!uuid_is_null(job->parent)) {
|
||||||
char tmp[UUID_STR_LEN];
|
|
||||||
uuid_unparse(job->parent, tmp);
|
|
||||||
|
|
||||||
meta_line_t *meta_parent = malloc(sizeof(meta_line_t) + UUID_STR_LEN + 1);
|
meta_line_t *meta_parent = malloc(sizeof(meta_line_t) + UUID_STR_LEN + 1);
|
||||||
meta_parent->key = MetaParent;
|
meta_parent->key = MetaParent;
|
||||||
strcpy(meta_parent->strval, tmp);
|
uuid_unparse(job->parent, meta_parent->str_val);
|
||||||
APPEND_META((&doc), meta_parent)
|
APPEND_META((&doc), meta_parent)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -166,3 +173,7 @@ void parse(void *arg) {
|
|||||||
|
|
||||||
CLOSE_FILE(job->vfile)
|
CLOSE_FILE(job->vfile)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void cleanup_parse() {
|
||||||
|
// noop
|
||||||
|
}
|
||||||
|
|||||||
@@ -1,13 +1,16 @@
|
|||||||
#ifndef SIST2_PARSE_H
|
#ifndef SIST2_PARSE_H
|
||||||
#define SIST2_PARSE_H
|
#define SIST2_PARSE_H
|
||||||
|
|
||||||
#include "src/sist.h"
|
#include "../sist.h"
|
||||||
|
|
||||||
#define PARSE_BUF_SIZE 4096
|
#define MAGIC_BUF_SIZE 4096 * 6
|
||||||
|
|
||||||
int fs_read(struct vfile *f, void *buf, size_t size);
|
int fs_read(struct vfile *f, void *buf, size_t size);
|
||||||
void fs_close(struct vfile *f);
|
void fs_close(struct vfile *f);
|
||||||
|
void fs_reset(struct vfile *f);
|
||||||
|
|
||||||
void parse(void *arg);
|
void parse(void *arg);
|
||||||
|
|
||||||
|
void cleanup_parse();
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -1,335 +0,0 @@
|
|||||||
#include "pdf.h"
|
|
||||||
#include "src/ctx.h"
|
|
||||||
|
|
||||||
#define MIN_OCR_SIZE 128
|
|
||||||
__thread text_buffer_t thread_buffer;
|
|
||||||
|
|
||||||
|
|
||||||
fz_page *render_cover(fz_context *ctx, document_t *doc, fz_document *fzdoc) {
|
|
||||||
|
|
||||||
int err = 0;
|
|
||||||
fz_page *cover = NULL;
|
|
||||||
|
|
||||||
fz_var(cover);
|
|
||||||
fz_try(ctx)
|
|
||||||
cover = fz_load_page(ctx, fzdoc, 0);
|
|
||||||
fz_catch(ctx)
|
|
||||||
err = 1;
|
|
||||||
|
|
||||||
if (err != 0) {
|
|
||||||
fz_drop_page(ctx, cover);
|
|
||||||
LOG_WARNINGF(doc->filepath, "fz_load_page() returned error code [%d] %s", err, ctx->error.message)
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
fz_rect bounds = fz_bound_page(ctx, cover);
|
|
||||||
|
|
||||||
float scale;
|
|
||||||
float w = (float) bounds.x1 - bounds.x0;
|
|
||||||
float h = (float) bounds.y1 - bounds.y0;
|
|
||||||
if (w > h) {
|
|
||||||
scale = (float) ScanCtx.tn_size / w;
|
|
||||||
} else {
|
|
||||||
scale = (float) ScanCtx.tn_size / h;
|
|
||||||
}
|
|
||||||
fz_matrix m = fz_scale(scale, scale);
|
|
||||||
|
|
||||||
bounds = fz_transform_rect(bounds, m);
|
|
||||||
fz_irect bbox = fz_round_rect(bounds);
|
|
||||||
fz_pixmap *pixmap = fz_new_pixmap_with_bbox(ctx, ctx->colorspace->rgb, bbox, NULL, 0);
|
|
||||||
|
|
||||||
fz_clear_pixmap_with_value(ctx, pixmap, 0xFF);
|
|
||||||
fz_device *dev = fz_new_draw_device(ctx, m, pixmap);
|
|
||||||
|
|
||||||
fz_var(err);
|
|
||||||
fz_try(ctx)
|
|
||||||
{
|
|
||||||
pthread_mutex_lock(&ScanCtx.mupdf_mu);
|
|
||||||
fz_run_page(ctx, cover, dev, fz_identity, NULL);
|
|
||||||
}
|
|
||||||
fz_always(ctx)
|
|
||||||
{
|
|
||||||
fz_close_device(ctx, dev);
|
|
||||||
fz_drop_device(ctx, dev);
|
|
||||||
pthread_mutex_unlock(&ScanCtx.mupdf_mu);
|
|
||||||
}
|
|
||||||
fz_catch(ctx)
|
|
||||||
err = ctx->error.errcode;
|
|
||||||
|
|
||||||
if (err != 0) {
|
|
||||||
LOG_WARNINGF(doc->filepath, "fz_run_page() returned error code [%d] %s", err, ctx->error.message)
|
|
||||||
fz_drop_page(ctx, cover);
|
|
||||||
fz_drop_pixmap(ctx, pixmap);
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
fz_buffer *fzbuf = NULL;
|
|
||||||
fz_var(fzbuf);
|
|
||||||
fz_var(err);
|
|
||||||
|
|
||||||
fz_try(ctx)
|
|
||||||
fzbuf = fz_new_buffer_from_pixmap_as_png(ctx, pixmap, fz_default_color_params);
|
|
||||||
fz_catch(ctx)
|
|
||||||
err = ctx->error.errcode;
|
|
||||||
|
|
||||||
if (err == 0) {
|
|
||||||
unsigned char *tn_buf;
|
|
||||||
size_t tn_len = fz_buffer_storage(ctx, fzbuf, &tn_buf);
|
|
||||||
store_write(ScanCtx.index.store, (char *) doc->uuid, sizeof(doc->uuid), (char *) tn_buf, tn_len);
|
|
||||||
}
|
|
||||||
|
|
||||||
fz_drop_buffer(ctx, fzbuf);
|
|
||||||
fz_drop_pixmap(ctx, pixmap);
|
|
||||||
|
|
||||||
if (err != 0) {
|
|
||||||
LOG_WARNINGF(doc->filepath, "fz_new_buffer_from_pixmap_as_png() returned error code [%d] %s", err, ctx->error.message)
|
|
||||||
fz_drop_page(ctx, cover);
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
return cover;
|
|
||||||
}
|
|
||||||
|
|
||||||
void fz_err_callback(void *user, UNUSED(const char *message)) {
|
|
||||||
if (LogCtx.verbose) {
|
|
||||||
document_t *doc = (document_t *) user;
|
|
||||||
LOG_WARNINGF(doc->filepath, "FZ: %s", message)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
__always_inline
|
|
||||||
void init_ctx(fz_context *ctx, document_t *doc) {
|
|
||||||
fz_disable_icc(ctx);
|
|
||||||
fz_register_document_handlers(ctx);
|
|
||||||
|
|
||||||
ctx->warn.print_user = doc;
|
|
||||||
ctx->warn.print = fz_err_callback;
|
|
||||||
ctx->error.print_user = doc;
|
|
||||||
ctx->error.print = fz_err_callback;
|
|
||||||
}
|
|
||||||
|
|
||||||
int read_stext_block(fz_stext_block *block, text_buffer_t *tex) {
|
|
||||||
if (block->type != FZ_STEXT_BLOCK_TEXT) {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
fz_stext_line *line = block->u.t.first_line;
|
|
||||||
while (line != NULL) {
|
|
||||||
fz_stext_char *c = line->first_char;
|
|
||||||
while (c != NULL) {
|
|
||||||
if (text_buffer_append_char(tex, c->c) == TEXT_BUF_FULL) {
|
|
||||||
return TEXT_BUF_FULL;
|
|
||||||
}
|
|
||||||
c = c->next;
|
|
||||||
}
|
|
||||||
line = line->next;
|
|
||||||
}
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void fill_image(fz_context *ctx, UNUSED(fz_device *dev),
|
|
||||||
fz_image *img, UNUSED(fz_matrix ctm), UNUSED(float alpha),
|
|
||||||
UNUSED(fz_color_params color_params)) {
|
|
||||||
|
|
||||||
int l2factor = 0;
|
|
||||||
|
|
||||||
if (img->w > MIN_OCR_SIZE && img->h > MIN_OCR_SIZE) {
|
|
||||||
|
|
||||||
fz_pixmap *pix = img->get_pixmap(ctx, img, NULL, img->w, img->h, &l2factor);
|
|
||||||
|
|
||||||
if (pix->h > MIN_OCR_SIZE && img->h > MIN_OCR_SIZE && img->xres != 0) {
|
|
||||||
TessBaseAPI *api = TessBaseAPICreate();
|
|
||||||
TessBaseAPIInit3(api, TESS_DATAPATH, ScanCtx.tesseract_lang);
|
|
||||||
|
|
||||||
TessBaseAPISetImage(api, pix->samples, pix->w, pix->h, pix->n, pix->stride);
|
|
||||||
TessBaseAPISetSourceResolution(api, pix->xres);
|
|
||||||
|
|
||||||
char *text = TessBaseAPIGetUTF8Text(api);
|
|
||||||
size_t len = strlen(text);
|
|
||||||
text_buffer_append_string(&thread_buffer, text, len - 1);
|
|
||||||
LOG_DEBUGF(
|
|
||||||
"pdf.c",
|
|
||||||
"(OCR) %dx%d got %dB from tesseract (%s), buffer:%dB",
|
|
||||||
pix->w, pix->h, len, ScanCtx.tesseract_lang, thread_buffer.dyn_buffer.cur
|
|
||||||
)
|
|
||||||
|
|
||||||
TessBaseAPIEnd(api);
|
|
||||||
TessBaseAPIDelete(api);
|
|
||||||
fz_drop_pixmap(ctx, pix);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void parse_pdf(void *buf, size_t buf_len, document_t *doc) {
|
|
||||||
|
|
||||||
if (buf == NULL) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int mu_is_initialized = 0;
|
|
||||||
if (!mu_is_initialized) {
|
|
||||||
pthread_mutex_init(&ScanCtx.mupdf_mu, NULL);
|
|
||||||
mu_is_initialized = 1;
|
|
||||||
}
|
|
||||||
fz_context *ctx = fz_new_context(NULL, NULL, FZ_STORE_UNLIMITED);
|
|
||||||
|
|
||||||
init_ctx(ctx, doc);
|
|
||||||
|
|
||||||
int err = 0;
|
|
||||||
|
|
||||||
fz_document *fzdoc = NULL;
|
|
||||||
fz_stream *stream = NULL;
|
|
||||||
fz_var(fzdoc);
|
|
||||||
fz_var(stream);
|
|
||||||
fz_var(err);
|
|
||||||
|
|
||||||
fz_try(ctx)
|
|
||||||
{
|
|
||||||
stream = fz_open_memory(ctx, buf, buf_len);
|
|
||||||
fzdoc = fz_open_document_with_stream(ctx, mime_get_mime_text(doc->mime), stream);
|
|
||||||
}
|
|
||||||
fz_catch(ctx)
|
|
||||||
err = ctx->error.errcode;
|
|
||||||
|
|
||||||
if (err) {
|
|
||||||
fz_drop_stream(ctx, stream);
|
|
||||||
fz_drop_document(ctx, fzdoc);
|
|
||||||
fz_drop_context(ctx);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
char title[4096] = {'\0',};
|
|
||||||
fz_try(ctx)
|
|
||||||
fz_lookup_metadata(ctx, fzdoc, FZ_META_INFO_TITLE, title, sizeof(title));
|
|
||||||
fz_catch(ctx)
|
|
||||||
;
|
|
||||||
|
|
||||||
if (strlen(title) > 0) {
|
|
||||||
meta_line_t *meta_content = malloc(sizeof(meta_line_t) + strlen(title));
|
|
||||||
meta_content->key = MetaTitle;
|
|
||||||
strcpy(meta_content->strval, title);
|
|
||||||
APPEND_META(doc, meta_content)
|
|
||||||
}
|
|
||||||
|
|
||||||
int page_count = -1;
|
|
||||||
fz_var(err);
|
|
||||||
fz_try(ctx)
|
|
||||||
page_count = fz_count_pages(ctx, fzdoc);
|
|
||||||
fz_catch(ctx)
|
|
||||||
err = ctx->error.errcode;
|
|
||||||
|
|
||||||
if (err) {
|
|
||||||
LOG_WARNINGF(doc->filepath, "fz_count_pages() returned error code [%d] %s", err, ctx->error.message)
|
|
||||||
fz_drop_stream(ctx, stream);
|
|
||||||
fz_drop_document(ctx, fzdoc);
|
|
||||||
fz_drop_context(ctx);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
fz_page *cover = NULL;
|
|
||||||
if (ScanCtx.tn_size > 0) {
|
|
||||||
cover = render_cover(ctx, doc, fzdoc);
|
|
||||||
} else {
|
|
||||||
fz_var(cover);
|
|
||||||
fz_try(ctx)
|
|
||||||
cover = fz_load_page(ctx, fzdoc, 0);
|
|
||||||
fz_catch(ctx)
|
|
||||||
cover = NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (cover == NULL) {
|
|
||||||
fz_drop_stream(ctx, stream);
|
|
||||||
fz_drop_document(ctx, fzdoc);
|
|
||||||
fz_drop_context(ctx);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (ScanCtx.content_size > 0) {
|
|
||||||
fz_stext_options opts = {0};
|
|
||||||
thread_buffer = text_buffer_create(ScanCtx.content_size);
|
|
||||||
|
|
||||||
for (int current_page = 0; current_page < page_count; current_page++) {
|
|
||||||
fz_page *page = NULL;
|
|
||||||
if (current_page == 0) {
|
|
||||||
page = cover;
|
|
||||||
} else {
|
|
||||||
fz_var(err);
|
|
||||||
fz_try(ctx)
|
|
||||||
page = fz_load_page(ctx, fzdoc, current_page);
|
|
||||||
fz_catch(ctx)
|
|
||||||
err = ctx->error.errcode;
|
|
||||||
if (err != 0) {
|
|
||||||
LOG_WARNINGF(doc->filepath, "fz_load_page() returned error code [%d] %s", err, ctx->error.message)
|
|
||||||
text_buffer_destroy(&thread_buffer);
|
|
||||||
fz_drop_page(ctx, page);
|
|
||||||
fz_drop_stream(ctx, stream);
|
|
||||||
fz_drop_document(ctx, fzdoc);
|
|
||||||
fz_drop_context(ctx);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fz_stext_page *stext = fz_new_stext_page(ctx, fz_bound_page(ctx, page));
|
|
||||||
fz_device *dev = fz_new_stext_device(ctx, stext, &opts);
|
|
||||||
dev->stroke_path = NULL;
|
|
||||||
dev->stroke_text = NULL;
|
|
||||||
dev->clip_text = NULL;
|
|
||||||
dev->clip_stroke_path = NULL;
|
|
||||||
dev->clip_stroke_text = NULL;
|
|
||||||
|
|
||||||
if (ScanCtx.tesseract_lang != NULL) {
|
|
||||||
dev->fill_image = fill_image;
|
|
||||||
}
|
|
||||||
|
|
||||||
fz_var(err);
|
|
||||||
fz_try(ctx)
|
|
||||||
fz_run_page(ctx, page, dev, fz_identity, NULL);
|
|
||||||
fz_always(ctx)
|
|
||||||
{
|
|
||||||
fz_close_device(ctx, dev);
|
|
||||||
fz_drop_device(ctx, dev);
|
|
||||||
}
|
|
||||||
fz_catch(ctx)
|
|
||||||
err = ctx->error.errcode;
|
|
||||||
|
|
||||||
if (err != 0) {
|
|
||||||
LOG_WARNINGF(doc->filepath, "fz_run_page() returned error code [%d] %s", err, ctx->error.message)
|
|
||||||
text_buffer_destroy(&thread_buffer);
|
|
||||||
fz_drop_page(ctx, page);
|
|
||||||
fz_drop_stext_page(ctx, stext);
|
|
||||||
fz_drop_stream(ctx, stream);
|
|
||||||
fz_drop_document(ctx, fzdoc);
|
|
||||||
fz_drop_context(ctx);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
fz_stext_block *block = stext->first_block;
|
|
||||||
while (block != NULL) {
|
|
||||||
int ret = read_stext_block(block, &thread_buffer);
|
|
||||||
if (ret == TEXT_BUF_FULL) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
block = block->next;
|
|
||||||
}
|
|
||||||
fz_drop_stext_page(ctx, stext);
|
|
||||||
fz_drop_page(ctx, page);
|
|
||||||
|
|
||||||
if (thread_buffer.dyn_buffer.cur >= thread_buffer.dyn_buffer.size) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
text_buffer_terminate_string(&thread_buffer);
|
|
||||||
|
|
||||||
meta_line_t *meta_content = malloc(sizeof(meta_line_t) + thread_buffer.dyn_buffer.cur);
|
|
||||||
meta_content->key = MetaContent;
|
|
||||||
memcpy(meta_content->strval, thread_buffer.dyn_buffer.buf, thread_buffer.dyn_buffer.cur);
|
|
||||||
APPEND_META(doc, meta_content)
|
|
||||||
|
|
||||||
text_buffer_destroy(&thread_buffer);
|
|
||||||
}
|
|
||||||
|
|
||||||
fz_drop_stream(ctx, stream);
|
|
||||||
fz_drop_document(ctx, fzdoc);
|
|
||||||
fz_drop_context(ctx);
|
|
||||||
}
|
|
||||||
@@ -1,10 +0,0 @@
|
|||||||
#ifndef SIST2_PDF_H
|
|
||||||
#define SIST2_PDF_H
|
|
||||||
|
|
||||||
#include "src/sist.h"
|
|
||||||
#include <tesseract/capi.h>
|
|
||||||
|
|
||||||
|
|
||||||
void parse_pdf(void *buf, size_t buf_len, document_t *doc);
|
|
||||||
|
|
||||||
#endif
|
|
||||||
@@ -1,37 +0,0 @@
|
|||||||
#include "text.h"
|
|
||||||
#include "src/ctx.h"
|
|
||||||
|
|
||||||
void parse_text(int bytes_read, struct vfile *f, char *buf, document_t *doc) {
|
|
||||||
|
|
||||||
char *intermediate_buf;
|
|
||||||
int intermediate_buf_len;
|
|
||||||
|
|
||||||
if (bytes_read == doc->size || bytes_read >= ScanCtx.content_size) {
|
|
||||||
int to_copy = MIN(bytes_read, ScanCtx.content_size);
|
|
||||||
intermediate_buf = malloc(to_copy);
|
|
||||||
intermediate_buf_len = to_copy;
|
|
||||||
memcpy(intermediate_buf, buf, to_copy);
|
|
||||||
|
|
||||||
} else {
|
|
||||||
int to_read = MIN(ScanCtx.content_size, doc->size) - bytes_read;
|
|
||||||
|
|
||||||
intermediate_buf = malloc(to_read + bytes_read);
|
|
||||||
intermediate_buf_len = to_read + bytes_read;
|
|
||||||
if (bytes_read != 0) {
|
|
||||||
memcpy(intermediate_buf, buf, bytes_read);
|
|
||||||
}
|
|
||||||
|
|
||||||
f->read(f, intermediate_buf + bytes_read, to_read);
|
|
||||||
}
|
|
||||||
text_buffer_t tex = text_buffer_create(ScanCtx.content_size);
|
|
||||||
text_buffer_append_string(&tex, intermediate_buf, intermediate_buf_len);
|
|
||||||
text_buffer_terminate_string(&tex);
|
|
||||||
|
|
||||||
meta_line_t *meta = malloc(sizeof(meta_line_t) + tex.dyn_buffer.cur);
|
|
||||||
meta->key = MetaContent;
|
|
||||||
strcpy(meta->strval, tex.dyn_buffer.buf);
|
|
||||||
APPEND_META(doc, meta)
|
|
||||||
|
|
||||||
free(intermediate_buf);
|
|
||||||
text_buffer_destroy(&tex);
|
|
||||||
}
|
|
||||||
@@ -1,8 +0,0 @@
|
|||||||
#ifndef SIST2_TEXT_H
|
|
||||||
#define SIST2_TEXT_H
|
|
||||||
|
|
||||||
#include "src/sist.h"
|
|
||||||
|
|
||||||
void parse_text(int bytes_read, struct vfile *f, char *buf, document_t *doc);
|
|
||||||
|
|
||||||
#endif
|
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user