mirror of
https://github.com/simon987/sist2.git
synced 2025-12-12 23:18:51 +00:00
Compare commits
179 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 4b4ab12fac | |||
| ae283f77ad | |||
| d3bd53a5ea | |||
| f7887f24d1 | |||
| 5c8de19188 | |||
| d861d278a4 | |||
| b6ddeee0e0 | |||
| 0cd2523b05 | |||
| 5e798f9367 | |||
| 5da6c1488b | |||
| 9568e25f84 | |||
| 6a8027789a | |||
| b1d16d8abf | |||
| b2a157e24d | |||
| 9aead9389a | |||
| a32c68cba8 | |||
| d116cf9d91 | |||
|
|
a020a8b32c | ||
| 5d5d9c3092 | |||
| 3379d5ce71 | |||
| a0ff4a1f01 | |||
| 4589f3bde7 | |||
| 1c898640cf | |||
| a0739d5177 | |||
| 8f9d29dbc6 | |||
| 3ff4b70223 | |||
| 02ad035b09 | |||
| c11feb213d | |||
| 72902947cd | |||
| a18bb81222 | |||
| 1520288f19 | |||
| e507de194b | |||
| 0e517d5e2b | |||
| 8223ef3860 | |||
| 995a196690 | |||
| 465d017e18 | |||
| ca994d3914 | |||
| db2285973f | |||
| 61de9e9f14 | |||
| 3015ef0ff4 | |||
| b55d432841 | |||
| ed90a140ce | |||
| 052df82373 | |||
| 5676136777 | |||
| c061613302 | |||
| d0325fd9b9 | |||
| e05a6f3863 | |||
| f1690a9cca | |||
| 100a264413 | |||
| 29390bb454 | |||
| 4d43036ded | |||
| 0b5cdbd130 | |||
| 53d7695f66 | |||
| 8d53456404 | |||
| cbc08a7cc9 | |||
| e629b4d7d3 | |||
| 22f7073b39 | |||
| 1781a74960 | |||
| db96c95ac7 | |||
| 7b9fa4cc0a | |||
| 5cc1fa86a9 | |||
| 649689ce30 | |||
| c8536f65a8 | |||
| 75b5e249c1 | |||
|
|
f49e03ac79 | ||
| a6d2afc8dc | |||
| 8f8f66ba05 | |||
| 1d9fcf7105 | |||
| 8127745f2b | |||
| 230988d6d1 | |||
| 13f4dbed2d | |||
| ed15e89f45 | |||
| c636d3d921 | |||
| 7e92d4b7d1 | |||
| 8ffe780ab2 | |||
| d3c8928fe8 | |||
| d9f628fca4 | |||
| 68289268c1 | |||
| 649c50c465 | |||
| 7b49a0dc49 | |||
| eb559b53aa | |||
| 6d01f9c0df | |||
| e724fec668 | |||
| fe5e93b300 | |||
| ecad85fd7d | |||
| 74cc898259 | |||
| dc2e4443c4 | |||
| 1a64431b52 | |||
|
|
9bad515e06 | ||
| 648559cedb | |||
| 3e6cd9cd5c | |||
| f249992798 | |||
|
|
e9645ecdaa | ||
| 046edea0e2 | |||
| a011b7e97b | |||
| 8c1c1697e0 | |||
| 018b49fa4c | |||
| 27b4e6403e | |||
| 13fdbd9e69 | |||
| 5e7fdaf8dd | |||
| 19d5c8ac9f | |||
| 99497049a8 | |||
|
|
1a3181d78b | ||
| 449aa77c8f | |||
| 3058c55510 | |||
| dedf9287b2 | |||
| ab199b0c0c | |||
| c4fbae123e | |||
| dd2397ef5c | |||
| ee0f71f4d3 | |||
| 0bbb96b149 | |||
| 78f6e16701 | |||
| 4625bca9a9 | |||
| f2ae653886 | |||
| 5686bc864d | |||
| cf513b4ad8 | |||
| 013423424e | |||
| 16514fd6b0 | |||
| 27509f97e1 | |||
| 4c540eae1c | |||
| d2b53ff6fc | |||
| 0ef4292abf | |||
| e6fde38c24 | |||
| 5fa343d40f | |||
| 7ee1374802 | |||
| bd9e56829c | |||
| 718169345e | |||
| 5a6aa763ca | |||
| 695d9abd83 | |||
| e436af7b2a | |||
| 4501a7810f | |||
|
|
e36761fa6a | ||
| fe53b79d56 | |||
| 09615bbed6 | |||
| a2be9b955c | |||
| 9298bd2d9d | |||
| 317034ba21 | |||
| 0505303503 | |||
| 6e5772f13b | |||
| ccccdb3b78 | |||
| 12d17acf4f | |||
| 48b56cdb7b | |||
| 048f707f80 | |||
| 98e0a5fd64 | |||
| 740a49a09f | |||
| 81be662574 | |||
| 02fa3f02f5 | |||
| cfdd7bdd87 | |||
| 7ceb645926 | |||
| 7d0091f647 | |||
| b3cd630399 | |||
| 5f7a1acfe3 | |||
| 513a21cca2 | |||
| 04dbfb23ab | |||
| 1abddabeec | |||
| 9ace5774af | |||
| eab6101cf7 | |||
| d7cbd5d2b6 | |||
| 641edf2715 | |||
| 7efb4957bf | |||
| 9ae77fdedb | |||
| 98c40901ed | |||
| 363375d5da | |||
| 149de95d88 | |||
| e5bb4856d2 | |||
| d78994d427 | |||
| f2d68d54df | |||
| e03625838b | |||
| 86840b46f4 | |||
| e57f9916eb | |||
| 565ba6ee76 | |||
| d83fc2c373 | |||
| d4da28249e | |||
| 483a454c8d | |||
| 018ac86640 | |||
| 398f1aead4 | |||
| d19a75926b | |||
| 1ac8b40e3d | |||
| a8505cb8c1 |
18
.github/ISSUE_TEMPLATE/issue-template.md
vendored
Normal file
18
.github/ISSUE_TEMPLATE/issue-template.md
vendored
Normal file
@@ -0,0 +1,18 @@
|
||||
---
|
||||
name: Issue template
|
||||
about: General
|
||||
title: ''
|
||||
labels: ''
|
||||
assignees: ''
|
||||
|
||||
---
|
||||
|
||||
sist2 version:
|
||||
|
||||
Platform (Linux or Docker, x86-64 or arm64):
|
||||
|
||||
Elasticsearch version:
|
||||
|
||||
Command with arguments: `ex: "scan ~/Documents -o ./i2 --threads 3 -q 1.0`
|
||||
|
||||
If the issue is related to the `scan` module, please attach the files necessary to reproduce the error or email them to me[at]simon987.net.
|
||||
1
.gitignore
vendored
1
.gitignore
vendored
@@ -16,3 +16,4 @@ bundle.js
|
||||
*.a
|
||||
vgcore.*
|
||||
build/
|
||||
third-party/
|
||||
|
||||
49
.gitmodules
vendored
49
.gitmodules
vendored
@@ -1,45 +1,6 @@
|
||||
[submodule "argparse"]
|
||||
path = argparse
|
||||
[submodule "third-party/libscan"]
|
||||
path = third-party/libscan
|
||||
url = https://github.com/simon987/libscan
|
||||
[submodule "third-party/argparse"]
|
||||
path = third-party/argparse
|
||||
url = https://github.com/cofyc/argparse
|
||||
[submodule "cJSON"]
|
||||
path = cJSON
|
||||
url = https://github.com/DaveGamble/cJSON
|
||||
[submodule "lmdb"]
|
||||
path = lmdb
|
||||
url = https://github.com/LMDB/lmdb
|
||||
[submodule "utf8.h"]
|
||||
path = utf8.h
|
||||
url = https://github.com/sheredom/utf8.h
|
||||
[submodule "lib/bzip2-1.0.6"]
|
||||
path = lib/bzip2-1.0.6
|
||||
url = https://github.com/enthought/bzip2-1.0.6
|
||||
[submodule "lib/libmagic"]
|
||||
path = lib/libmagic
|
||||
url = https://github.com/threatstack/libmagic
|
||||
[submodule "lib/harfbuzz"]
|
||||
path = lib/harfbuzz
|
||||
url = https://github.com/harfbuzz/harfbuzz
|
||||
[submodule "lib/openjpeg"]
|
||||
path = lib/openjpeg
|
||||
url = https://github.com/uclouvain/openjpeg
|
||||
[submodule "lib/ffmpeg"]
|
||||
path = lib/ffmpeg
|
||||
url = https://git.ffmpeg.org/ffmpeg.git
|
||||
[submodule "lib/onion"]
|
||||
path = lib/onion
|
||||
url = https://github.com/davidmoreno/onion
|
||||
[submodule "lib/mupdf"]
|
||||
path = lib/mupdf
|
||||
url = git://git.ghostscript.com/mupdf.git
|
||||
[submodule "lib/tesseract"]
|
||||
path = lib/tesseract
|
||||
url = https://github.com/tesseract-ocr/tesseract
|
||||
[submodule "lib/leptonica"]
|
||||
path = lib/leptonica
|
||||
url = https://github.com/danbloomberg/leptonica
|
||||
[submodule "lib/libtiff"]
|
||||
path = lib/libtiff
|
||||
url = https://gitlab.com/libtiff/libtiff
|
||||
[submodule "lib/libpng"]
|
||||
path = lib/libpng
|
||||
url = https://github.com/glennrp/libpng
|
||||
|
||||
172
CMakeLists.txt
172
CMakeLists.txt
@@ -2,140 +2,118 @@ cmake_minimum_required(VERSION 3.7)
|
||||
set(CMAKE_C_STANDARD 11)
|
||||
|
||||
project(sist2 C)
|
||||
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}/CMakeModules")
|
||||
|
||||
option(SIST_DEBUG "Build a debug executable" on)
|
||||
|
||||
add_subdirectory(third-party/libscan)
|
||||
set(ARGPARSE_SHARED off)
|
||||
add_subdirectory(third-party/argparse)
|
||||
|
||||
add_executable(
|
||||
sist2
|
||||
src/main.c
|
||||
src/sist.h
|
||||
src/io/walk.h src/io/walk.c
|
||||
src/parsing/media.h src/parsing/media.c
|
||||
src/parsing/pdf.h src/parsing/pdf.c
|
||||
src/io/store.h src/io/store.c
|
||||
src/tpool.h src/tpool.c
|
||||
src/parsing/parse.h src/parsing/parse.c
|
||||
src/io/serialize.h src/io/serialize.c
|
||||
src/parsing/mime.h src/parsing/mime.c src/parsing/mime_generated.c
|
||||
src/parsing/text.h src/parsing/text.c
|
||||
src/index/web.c src/index/web.h
|
||||
src/web/serve.c src/web/serve.h
|
||||
src/web/auth_basic.h src/web/auth_basic.c
|
||||
src/index/elastic.c src/index/elastic.h
|
||||
src/util.c src/util.h
|
||||
src/ctx.h src/types.h src/parsing/font.c src/parsing/font.h
|
||||
src/parsing/arc.c src/parsing/arc.h
|
||||
src/parsing/doc.c src/parsing/doc.h
|
||||
src/ctx.h src/types.h
|
||||
src/log.c src/log.h
|
||||
|
||||
# argparse
|
||||
argparse/argparse.h argparse/argparse.c
|
||||
third-party/argparse/argparse.h third-party/argparse/argparse.c
|
||||
|
||||
# cJSON
|
||||
cJSON/cJSON.h cJSON/cJSON.c
|
||||
|
||||
# LMDB
|
||||
lmdb/libraries/liblmdb/lmdb.h lmdb/libraries/liblmdb/mdb.c
|
||||
lmdb/libraries/liblmdb/midl.h lmdb/libraries/liblmdb/midl.c
|
||||
src/cli.c src/cli.h
|
||||
src/stats.c src/stats.h src/ctx.c)
|
||||
|
||||
# utf8.h
|
||||
utf8.h/utf8.h
|
||||
)
|
||||
target_link_directories(sist2 PRIVATE BEFORE ${_VCPKG_INSTALLED_DIR}/${VCPKG_TARGET_TRIPLET}/lib/)
|
||||
set(CMAKE_FIND_LIBRARY_SUFFIXES .a .lib)
|
||||
|
||||
find_package(PkgConfig REQUIRED)
|
||||
set(ENV{PKG_CONFIG_PATH} "$ENV{PKG_CONFIG_PATH}:/usr/local/lib/pkgconfig/")
|
||||
find_package(lmdb CONFIG REQUIRED)
|
||||
find_package(cJSON CONFIG REQUIRED)
|
||||
find_package(unofficial-glib CONFIG REQUIRED)
|
||||
find_package(unofficial-mongoose CONFIG REQUIRED)
|
||||
find_library(UUID_LIB NAMES uuid)
|
||||
find_package(CURL CONFIG REQUIRED)
|
||||
|
||||
find_package(Freetype REQUIRED)
|
||||
#find_package(OpenSSL REQUIRED)
|
||||
|
||||
pkg_check_modules(GLIB REQUIRED glib-2.0)
|
||||
pkg_check_modules(GOBJECT REQUIRED gobject-2.0)
|
||||
pkg_check_modules(UUID REQUIRED uuid)
|
||||
|
||||
add_definitions(${UUID_CFLAGS_OTHER})
|
||||
add_definitions(${GLIB_CFLAGS_OTHER})
|
||||
add_definitions(${GOBJECT_CFLAGS_OTHER})
|
||||
add_definitions(${FREETYPE_CFLAGS_OTHER})
|
||||
|
||||
list(REMOVE_ITEM GLIB_LIBRARIES pcre)
|
||||
list(REMOVE_ITEM GOBJECT_LIBRARIES pcre)
|
||||
list(REMOVE_ITEM UUID_LIBRARIES pcre)
|
||||
|
||||
target_include_directories(
|
||||
sist2 PUBLIC
|
||||
${GOBJECT_INCLUDE_DIRS}
|
||||
${GLIB_INCLUDE_DIRS}
|
||||
${PROJECT_SOURCE_DIR}/lib/ffmpeg/
|
||||
${FREETYPE_INCLUDE_DIRS}
|
||||
${UUID_INCLUDE_DIRS}
|
||||
${PROJECT_SOURCE_DIR}/
|
||||
${PROJECT_SOURCE_DIR}/lmdb/libraries/liblmdb/
|
||||
${PROJECT_SOURCE_DIR}/lib/onion/src/
|
||||
${PROJECT_SOURCE_DIR}/lib/mupdf/include/
|
||||
${PROJECT_SOURCE_DIR}/include/
|
||||
/usr/include/libxml2/
|
||||
${PROJECT_SOURCE_DIR}/lib/tesseract/include/
|
||||
)
|
||||
target_link_directories(
|
||||
sist2 PUBLIC
|
||||
${UUID_LIBRARY_DIRS}
|
||||
${CMAKE_SOURCE_DIR}/third-party/onion/src/
|
||||
${CMAKE_SOURCE_DIR}/third-party/utf8.h/
|
||||
${CMAKE_SOURCE_DIR}/third-party/libscan/
|
||||
${CMAKE_SOURCE_DIR}/
|
||||
)
|
||||
|
||||
target_compile_options(sist2
|
||||
target_compile_options(
|
||||
sist2
|
||||
PRIVATE
|
||||
-Ofast
|
||||
# -march=native
|
||||
-fPIC
|
||||
-fno-stack-protector
|
||||
-fomit-frame-pointer
|
||||
)
|
||||
)
|
||||
|
||||
TARGET_LINK_LIBRARIES(
|
||||
if (SIST_DEBUG)
|
||||
target_compile_options(
|
||||
sist2
|
||||
PRIVATE
|
||||
-g
|
||||
-fstack-protector
|
||||
-fno-omit-frame-pointer
|
||||
-fsanitize=address
|
||||
-O2
|
||||
)
|
||||
target_link_options(
|
||||
sist2
|
||||
PRIVATE
|
||||
-fsanitize=address
|
||||
# -static
|
||||
)
|
||||
set_target_properties(
|
||||
sist2
|
||||
PROPERTIES
|
||||
OUTPUT_NAME sist2_debug
|
||||
)
|
||||
else ()
|
||||
# set(VCPKG_BUILD_TYPE release)
|
||||
target_compile_options(
|
||||
sist2
|
||||
PRIVATE
|
||||
-Ofast
|
||||
-fno-stack-protector
|
||||
-fomit-frame-pointer
|
||||
)
|
||||
endif ()
|
||||
|
||||
add_dependencies(
|
||||
sist2
|
||||
scan
|
||||
argparse
|
||||
)
|
||||
|
||||
target_link_libraries(
|
||||
sist2
|
||||
|
||||
${GLIB_LIBRARIES}
|
||||
${GOBJECT_LIBRARIES}
|
||||
${UUID_LIBRARIES}
|
||||
|
||||
# ffmpeg
|
||||
${PROJECT_SOURCE_DIR}/lib/libavcodec.a
|
||||
${PROJECT_SOURCE_DIR}/lib/libavformat.a
|
||||
${PROJECT_SOURCE_DIR}/lib/libavutil.a
|
||||
${PROJECT_SOURCE_DIR}/lib/libswscale.a
|
||||
${PROJECT_SOURCE_DIR}/lib/libswresample.a
|
||||
|
||||
# mupdf
|
||||
${PROJECT_SOURCE_DIR}/lib/libmupdf.a
|
||||
${PROJECT_SOURCE_DIR}/lib/libmupdf-third.a
|
||||
|
||||
# onion
|
||||
${PROJECT_SOURCE_DIR}/lib/libonion_static.a
|
||||
z
|
||||
lmdb
|
||||
cjson
|
||||
argparse
|
||||
unofficial::glib::glib
|
||||
unofficial::mongoose::mongoose
|
||||
# OpenSSL::SSL OpenSSL::Crypto
|
||||
CURL::libcurl
|
||||
|
||||
${UUID_LIB}
|
||||
pthread
|
||||
magic
|
||||
|
||||
m
|
||||
bz2
|
||||
${PROJECT_SOURCE_DIR}/lib/libmagic.a
|
||||
${PROJECT_SOURCE_DIR}/lib/libharfbuzz.a
|
||||
${PROJECT_SOURCE_DIR}/lib/libopenjp2.a
|
||||
freetype
|
||||
archive
|
||||
|
||||
xml2
|
||||
${PROJECT_SOURCE_DIR}/lib/libopc/libmce.a
|
||||
${PROJECT_SOURCE_DIR}/lib/libopc/libopc.a
|
||||
${PROJECT_SOURCE_DIR}/lib/libopc/libplib.a
|
||||
|
||||
${PROJECT_SOURCE_DIR}/lib/libtesseract.a
|
||||
${PROJECT_SOURCE_DIR}/lib/liblept.a
|
||||
${PROJECT_SOURCE_DIR}/lib/libtiff.a
|
||||
${PROJECT_SOURCE_DIR}/lib/libpng16.a
|
||||
stdc++
|
||||
|
||||
# curl
|
||||
${PROJECT_SOURCE_DIR}/lib/libcurl.a
|
||||
${PROJECT_SOURCE_DIR}/lib/libcrypto.a
|
||||
${PROJECT_SOURCE_DIR}/lib/libssl.a
|
||||
dl
|
||||
scan
|
||||
)
|
||||
|
||||
add_custom_target(
|
||||
|
||||
80
CMakeModules/FindFFmpeg.cmake
vendored
80
CMakeModules/FindFFmpeg.cmake
vendored
@@ -1,80 +0,0 @@
|
||||
# - Try to find ffmpeg libraries (libavcodec, libavformat and libavutil)
|
||||
# Once done this will define
|
||||
#
|
||||
# FFMPEG_FOUND - system has ffmpeg or libav
|
||||
# FFMPEG_INCLUDE_DIR - the ffmpeg include directory
|
||||
# FFMPEG_LIBRARIES - Link these to use ffmpeg
|
||||
# FFMPEG_LIBAVCODEC
|
||||
# FFMPEG_LIBAVFORMAT
|
||||
# FFMPEG_LIBAVUTIL
|
||||
#
|
||||
# Copyright (c) 2008 Andreas Schneider <mail@cynapses.org>
|
||||
# Modified for other libraries by Lasse Kärkkäinen <tronic>
|
||||
# Modified for Hedgewars by Stepik777
|
||||
#
|
||||
# Redistribution and use is allowed according to the terms of the New
|
||||
# BSD license.
|
||||
#
|
||||
|
||||
if (FFMPEG_LIBRARIES AND FFMPEG_INCLUDE_DIR)
|
||||
# in cache already
|
||||
set(FFMPEG_FOUND TRUE)
|
||||
else (FFMPEG_LIBRARIES AND FFMPEG_INCLUDE_DIR)
|
||||
# use pkg-config to get the directories and then use these values
|
||||
# in the FIND_PATH() and FIND_LIBRARY() calls
|
||||
find_package(PkgConfig)
|
||||
if (PKG_CONFIG_FOUND)
|
||||
pkg_check_modules(_FFMPEG_AVCODEC libavcodec)
|
||||
pkg_check_modules(_FFMPEG_AVFORMAT libavformat)
|
||||
pkg_check_modules(_FFMPEG_AVUTIL libavutil)
|
||||
endif (PKG_CONFIG_FOUND)
|
||||
|
||||
find_path(FFMPEG_AVCODEC_INCLUDE_DIR
|
||||
NAMES libavcodec/avcodec.h
|
||||
PATHS ${_FFMPEG_AVCODEC_INCLUDE_DIRS} /usr/include /usr/local/include /opt/local/include /sw/include
|
||||
PATH_SUFFIXES ffmpeg libav
|
||||
)
|
||||
|
||||
find_library(FFMPEG_LIBAVCODEC
|
||||
NAMES avcodec
|
||||
PATHS ${_FFMPEG_AVCODEC_LIBRARY_DIRS} /usr/lib /usr/local/lib /opt/local/lib /sw/lib
|
||||
)
|
||||
|
||||
find_library(FFMPEG_LIBAVFORMAT
|
||||
NAMES avformat
|
||||
PATHS ${_FFMPEG_AVFORMAT_LIBRARY_DIRS} /usr/lib /usr/local/lib /opt/local/lib /sw/lib
|
||||
)
|
||||
|
||||
find_library(FFMPEG_LIBAVUTIL
|
||||
NAMES avutil
|
||||
PATHS ${_FFMPEG_AVUTIL_LIBRARY_DIRS} /usr/lib /usr/local/lib /opt/local/lib /sw/lib
|
||||
)
|
||||
|
||||
if (FFMPEG_LIBAVCODEC AND FFMPEG_LIBAVFORMAT)
|
||||
set(FFMPEG_FOUND TRUE)
|
||||
endif()
|
||||
|
||||
if (FFMPEG_FOUND)
|
||||
set(FFMPEG_INCLUDE_DIR ${FFMPEG_AVCODEC_INCLUDE_DIR})
|
||||
|
||||
set(FFMPEG_LIBRARIES
|
||||
${FFMPEG_LIBAVCODEC}
|
||||
${FFMPEG_LIBAVFORMAT}
|
||||
${FFMPEG_LIBAVUTIL}
|
||||
)
|
||||
|
||||
endif (FFMPEG_FOUND)
|
||||
|
||||
if (FFMPEG_FOUND)
|
||||
if (NOT FFMPEG_FIND_QUIETLY)
|
||||
message(STATUS "Found FFMPEG or Libav: ${FFMPEG_LIBRARIES}, ${FFMPEG_INCLUDE_DIR}")
|
||||
endif (NOT FFMPEG_FIND_QUIETLY)
|
||||
else (FFMPEG_FOUND)
|
||||
if (FFMPEG_FIND_REQUIRED)
|
||||
message(FATAL_ERROR "Could not find libavcodec or libavformat or libavutil")
|
||||
endif (FFMPEG_FIND_REQUIRED)
|
||||
endif (FFMPEG_FOUND)
|
||||
|
||||
endif (FFMPEG_LIBRARIES AND FFMPEG_INCLUDE_DIR)
|
||||
|
||||
|
||||
100
CMakeModules/FindLibMagic.cmake
vendored
100
CMakeModules/FindLibMagic.cmake
vendored
@@ -1,100 +0,0 @@
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
# Copyright (c) 2013-2013, Lars Baehren <lbaehren@gmail.com>
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without modification,
|
||||
# are permitted provided that the following conditions are met:
|
||||
#
|
||||
# * Redistributions of source code must retain the above copyright notice, this
|
||||
# list of conditions and the following disclaimer.
|
||||
# * Redistributions in binary form must reproduce the above copyright notice,
|
||||
# this list of conditions and the following disclaimer in the documentation
|
||||
# and/or other materials provided with the distribution.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#-------------------------------------------------------------------------------
|
||||
|
||||
# - Check for the presence of LIBMAGIC
|
||||
#
|
||||
# The following variables are set when LIBMAGIC is found:
|
||||
# LIBMAGIC_FOUND = Set to true, if all components of LIBMAGIC have been
|
||||
# found.
|
||||
# LIBMAGIC_INCLUDES = Include path for the header files of LIBMAGIC
|
||||
# LIBMAGIC_LIBRARIES = Link these to use LIBMAGIC
|
||||
# LIBMAGIC_LFLAGS = Linker flags (optional)
|
||||
|
||||
if (NOT LIBMAGIC_FOUND)
|
||||
|
||||
if (NOT LIBMAGIC_ROOT_DIR)
|
||||
set (LIBMAGIC_ROOT_DIR ${CMAKE_INSTALL_PREFIX})
|
||||
endif (NOT LIBMAGIC_ROOT_DIR)
|
||||
|
||||
##____________________________________________________________________________
|
||||
## Check for the header files
|
||||
|
||||
find_path (LIBMAGIC_FILE_H
|
||||
NAMES file/file.h
|
||||
HINTS ${LIBMAGIC_ROOT_DIR} ${CMAKE_INSTALL_PREFIX}
|
||||
PATH_SUFFIXES include
|
||||
)
|
||||
if (LIBMAGIC_FILE_H)
|
||||
list (APPEND LIBMAGIC_INCLUDES ${LIBMAGIC_FILE_H})
|
||||
endif (LIBMAGIC_FILE_H)
|
||||
|
||||
find_path (LIBMAGIC_MAGIC_H
|
||||
NAMES magic.h
|
||||
HINTS ${LIBMAGIC_ROOT_DIR} ${CMAKE_INSTALL_PREFIX}
|
||||
PATH_SUFFIXES include include/linux
|
||||
)
|
||||
if (LIBMAGIC_MAGIC_H)
|
||||
list (APPEND LIBMAGIC_INCLUDES ${LIBMAGIC_MAGIC_H})
|
||||
endif (LIBMAGIC_MAGIC_H)
|
||||
|
||||
list (REMOVE_DUPLICATES LIBMAGIC_INCLUDES)
|
||||
|
||||
##____________________________________________________________________________
|
||||
## Check for the library
|
||||
|
||||
find_library (LIBMAGIC_LIBRARIES magic
|
||||
HINTS ${LIBMAGIC_ROOT_DIR} ${CMAKE_INSTALL_PREFIX}
|
||||
PATH_SUFFIXES lib
|
||||
)
|
||||
|
||||
##____________________________________________________________________________
|
||||
## Actions taken when all components have been found
|
||||
|
||||
#find_package_handle_standard_args (LIBMAGIC DEFAULT_MSG LIBMAGIC_LIBRARIES LIBMAGIC_INCLUDES)
|
||||
|
||||
if (LIBMAGIC_FOUND)
|
||||
if (NOT LIBMAGIC_FIND_QUIETLY)
|
||||
message (STATUS "Found components for LIBMAGIC")
|
||||
message (STATUS "LIBMAGIC_ROOT_DIR = ${LIBMAGIC_ROOT_DIR}")
|
||||
message (STATUS "LIBMAGIC_INCLUDES = ${LIBMAGIC_INCLUDES}")
|
||||
message (STATUS "LIBMAGIC_LIBRARIES = ${LIBMAGIC_LIBRARIES}")
|
||||
endif (NOT LIBMAGIC_FIND_QUIETLY)
|
||||
else (LIBMAGIC_FOUND)
|
||||
if (LIBMAGIC_FIND_REQUIRED)
|
||||
message (FATAL_ERROR "Could not find LIBMAGIC!")
|
||||
endif (LIBMAGIC_FIND_REQUIRED)
|
||||
endif (LIBMAGIC_FOUND)
|
||||
|
||||
##____________________________________________________________________________
|
||||
## Mark advanced variables
|
||||
|
||||
mark_as_advanced (
|
||||
LIBMAGIC_ROOT_DIR
|
||||
LIBMAGIC_INCLUDES
|
||||
LIBMAGIC_LIBRARIES
|
||||
)
|
||||
|
||||
endif (NOT LIBMAGIC_FOUND)
|
||||
478
CMakeModules/FindOpenSSL.cmake
vendored
478
CMakeModules/FindOpenSSL.cmake
vendored
@@ -1,478 +0,0 @@
|
||||
# Distributed under the OSI-approved BSD 3-Clause License. See accompanying
|
||||
# file Copyright.txt or https://cmake.org/licensing for details.
|
||||
|
||||
macro(_OpenSSL_test_and_find_dependencies ssl_library crypto_library)
|
||||
if((CMAKE_SYSTEM_NAME STREQUAL "Linux") AND
|
||||
(("${ssl_library}" MATCHES "\\${CMAKE_STATIC_LIBRARY_SUFFIX}$") OR
|
||||
("${crypto_library}" MATCHES "\\${CMAKE_STATIC_LIBRARY_SUFFIX}$")))
|
||||
set(_OpenSSL_has_dependencies TRUE)
|
||||
find_package(Threads)
|
||||
else()
|
||||
set(_OpenSSL_has_dependencies FALSE)
|
||||
endif()
|
||||
endmacro()
|
||||
|
||||
function(_OpenSSL_add_dependencies libraries_var library)
|
||||
if(CMAKE_THREAD_LIBS_INIT)
|
||||
list(APPEND ${libraries_var} ${CMAKE_THREAD_LIBS_INIT})
|
||||
endif()
|
||||
list(APPEND ${libraries_var} ${CMAKE_DL_LIBS})
|
||||
set(${libraries_var} ${${libraries_var}} PARENT_SCOPE)
|
||||
endfunction()
|
||||
|
||||
function(_OpenSSL_target_add_dependencies target)
|
||||
if(_OpenSSL_has_dependencies)
|
||||
set_property( TARGET ${target} APPEND PROPERTY INTERFACE_LINK_LIBRARIES Threads::Threads )
|
||||
set_property( TARGET ${target} APPEND PROPERTY INTERFACE_LINK_LIBRARIES ${CMAKE_DL_LIBS} )
|
||||
endif()
|
||||
endfunction()
|
||||
|
||||
if (UNIX)
|
||||
find_package(PkgConfig QUIET)
|
||||
pkg_check_modules(_OPENSSL QUIET openssl)
|
||||
endif ()
|
||||
|
||||
# Support preference of static libs by adjusting CMAKE_FIND_LIBRARY_SUFFIXES
|
||||
if(OPENSSL_USE_STATIC_LIBS)
|
||||
set(_openssl_ORIG_CMAKE_FIND_LIBRARY_SUFFIXES ${CMAKE_FIND_LIBRARY_SUFFIXES})
|
||||
if(WIN32)
|
||||
set(CMAKE_FIND_LIBRARY_SUFFIXES .lib .a ${CMAKE_FIND_LIBRARY_SUFFIXES})
|
||||
else()
|
||||
set(CMAKE_FIND_LIBRARY_SUFFIXES .a )
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if (WIN32)
|
||||
# http://www.slproweb.com/products/Win32OpenSSL.html
|
||||
set(_OPENSSL_ROOT_HINTS
|
||||
${OPENSSL_ROOT_DIR}
|
||||
"[HKEY_LOCAL_MACHINE\\SOFTWARE\\Microsoft\\Windows\\CurrentVersion\\Uninstall\\OpenSSL (32-bit)_is1;Inno Setup: App Path]"
|
||||
"[HKEY_LOCAL_MACHINE\\SOFTWARE\\Microsoft\\Windows\\CurrentVersion\\Uninstall\\OpenSSL (64-bit)_is1;Inno Setup: App Path]"
|
||||
ENV OPENSSL_ROOT_DIR
|
||||
)
|
||||
file(TO_CMAKE_PATH "$ENV{PROGRAMFILES}" _programfiles)
|
||||
set(_OPENSSL_ROOT_PATHS
|
||||
"${_programfiles}/OpenSSL"
|
||||
"${_programfiles}/OpenSSL-Win32"
|
||||
"${_programfiles}/OpenSSL-Win64"
|
||||
"C:/OpenSSL/"
|
||||
"C:/OpenSSL-Win32/"
|
||||
"C:/OpenSSL-Win64/"
|
||||
)
|
||||
unset(_programfiles)
|
||||
else ()
|
||||
set(_OPENSSL_ROOT_HINTS
|
||||
${OPENSSL_ROOT_DIR}
|
||||
ENV OPENSSL_ROOT_DIR
|
||||
)
|
||||
endif ()
|
||||
|
||||
set(_OPENSSL_ROOT_HINTS_AND_PATHS
|
||||
HINTS ${_OPENSSL_ROOT_HINTS}
|
||||
PATHS ${_OPENSSL_ROOT_PATHS}
|
||||
)
|
||||
|
||||
find_path(OPENSSL_INCLUDE_DIR
|
||||
NAMES
|
||||
openssl/ssl.h
|
||||
${_OPENSSL_ROOT_HINTS_AND_PATHS}
|
||||
HINTS
|
||||
${_OPENSSL_INCLUDEDIR}
|
||||
PATH_SUFFIXES
|
||||
include
|
||||
)
|
||||
|
||||
if(WIN32 AND NOT CYGWIN)
|
||||
if(MSVC)
|
||||
# /MD and /MDd are the standard values - if someone wants to use
|
||||
# others, the libnames have to change here too
|
||||
# use also ssl and ssleay32 in debug as fallback for openssl < 0.9.8b
|
||||
# enable OPENSSL_MSVC_STATIC_RT to get the libs build /MT (Multithreaded no-DLL)
|
||||
# In Visual C++ naming convention each of these four kinds of Windows libraries has it's standard suffix:
|
||||
# * MD for dynamic-release
|
||||
# * MDd for dynamic-debug
|
||||
# * MT for static-release
|
||||
# * MTd for static-debug
|
||||
|
||||
# Implementation details:
|
||||
# We are using the libraries located in the VC subdir instead of the parent directory even though :
|
||||
# libeay32MD.lib is identical to ../libeay32.lib, and
|
||||
# ssleay32MD.lib is identical to ../ssleay32.lib
|
||||
# enable OPENSSL_USE_STATIC_LIBS to use the static libs located in lib/VC/static
|
||||
|
||||
if (OPENSSL_MSVC_STATIC_RT)
|
||||
set(_OPENSSL_MSVC_RT_MODE "MT")
|
||||
else ()
|
||||
set(_OPENSSL_MSVC_RT_MODE "MD")
|
||||
endif ()
|
||||
|
||||
# Since OpenSSL 1.1, lib names are like libcrypto32MTd.lib and libssl32MTd.lib
|
||||
if( "${CMAKE_SIZEOF_VOID_P}" STREQUAL "8" )
|
||||
set(_OPENSSL_MSVC_ARCH_SUFFIX "64")
|
||||
else()
|
||||
set(_OPENSSL_MSVC_ARCH_SUFFIX "32")
|
||||
endif()
|
||||
|
||||
if(OPENSSL_USE_STATIC_LIBS)
|
||||
set(_OPENSSL_PATH_SUFFIXES
|
||||
"lib/VC/static"
|
||||
"VC/static"
|
||||
"lib"
|
||||
)
|
||||
else()
|
||||
set(_OPENSSL_PATH_SUFFIXES
|
||||
"lib/VC"
|
||||
"VC"
|
||||
"lib"
|
||||
)
|
||||
endif ()
|
||||
|
||||
find_library(LIB_EAY_DEBUG
|
||||
NAMES
|
||||
libcrypto${_OPENSSL_MSVC_ARCH_SUFFIX}${_OPENSSL_MSVC_RT_MODE}d
|
||||
libcrypto${_OPENSSL_MSVC_RT_MODE}d
|
||||
libcryptod
|
||||
libeay32${_OPENSSL_MSVC_RT_MODE}d
|
||||
libeay32d
|
||||
cryptod
|
||||
NAMES_PER_DIR
|
||||
${_OPENSSL_ROOT_HINTS_AND_PATHS}
|
||||
PATH_SUFFIXES
|
||||
${_OPENSSL_PATH_SUFFIXES}
|
||||
)
|
||||
|
||||
find_library(LIB_EAY_RELEASE
|
||||
NAMES
|
||||
libcrypto${_OPENSSL_MSVC_ARCH_SUFFIX}${_OPENSSL_MSVC_RT_MODE}
|
||||
libcrypto${_OPENSSL_MSVC_RT_MODE}
|
||||
libcrypto
|
||||
libeay32${_OPENSSL_MSVC_RT_MODE}
|
||||
libeay32
|
||||
crypto
|
||||
NAMES_PER_DIR
|
||||
${_OPENSSL_ROOT_HINTS_AND_PATHS}
|
||||
PATH_SUFFIXES
|
||||
${_OPENSSL_PATH_SUFFIXES}
|
||||
)
|
||||
|
||||
find_library(SSL_EAY_DEBUG
|
||||
NAMES
|
||||
libssl${_OPENSSL_MSVC_ARCH_SUFFIX}${_OPENSSL_MSVC_RT_MODE}d
|
||||
libssl${_OPENSSL_MSVC_RT_MODE}d
|
||||
libssld
|
||||
ssleay32${_OPENSSL_MSVC_RT_MODE}d
|
||||
ssleay32d
|
||||
ssld
|
||||
NAMES_PER_DIR
|
||||
${_OPENSSL_ROOT_HINTS_AND_PATHS}
|
||||
PATH_SUFFIXES
|
||||
${_OPENSSL_PATH_SUFFIXES}
|
||||
)
|
||||
|
||||
find_library(SSL_EAY_RELEASE
|
||||
NAMES
|
||||
libssl${_OPENSSL_MSVC_ARCH_SUFFIX}${_OPENSSL_MSVC_RT_MODE}
|
||||
libssl${_OPENSSL_MSVC_RT_MODE}
|
||||
libssl
|
||||
ssleay32${_OPENSSL_MSVC_RT_MODE}
|
||||
ssleay32
|
||||
ssl
|
||||
NAMES_PER_DIR
|
||||
${_OPENSSL_ROOT_HINTS_AND_PATHS}
|
||||
PATH_SUFFIXES
|
||||
${_OPENSSL_PATH_SUFFIXES}
|
||||
)
|
||||
|
||||
set(LIB_EAY_LIBRARY_DEBUG "${LIB_EAY_DEBUG}")
|
||||
set(LIB_EAY_LIBRARY_RELEASE "${LIB_EAY_RELEASE}")
|
||||
set(SSL_EAY_LIBRARY_DEBUG "${SSL_EAY_DEBUG}")
|
||||
set(SSL_EAY_LIBRARY_RELEASE "${SSL_EAY_RELEASE}")
|
||||
|
||||
include(${CMAKE_CURRENT_LIST_DIR}/SelectLibraryConfigurations.cmake)
|
||||
select_library_configurations(LIB_EAY)
|
||||
select_library_configurations(SSL_EAY)
|
||||
|
||||
mark_as_advanced(LIB_EAY_LIBRARY_DEBUG LIB_EAY_LIBRARY_RELEASE
|
||||
SSL_EAY_LIBRARY_DEBUG SSL_EAY_LIBRARY_RELEASE)
|
||||
set(OPENSSL_SSL_LIBRARY ${SSL_EAY_LIBRARY} )
|
||||
set(OPENSSL_CRYPTO_LIBRARY ${LIB_EAY_LIBRARY} )
|
||||
elseif(MINGW)
|
||||
# same player, for MinGW
|
||||
set(LIB_EAY_NAMES crypto libeay32)
|
||||
set(SSL_EAY_NAMES ssl ssleay32)
|
||||
find_library(LIB_EAY
|
||||
NAMES
|
||||
${LIB_EAY_NAMES}
|
||||
NAMES_PER_DIR
|
||||
${_OPENSSL_ROOT_HINTS_AND_PATHS}
|
||||
PATH_SUFFIXES
|
||||
"lib/MinGW"
|
||||
"lib"
|
||||
)
|
||||
|
||||
find_library(SSL_EAY
|
||||
NAMES
|
||||
${SSL_EAY_NAMES}
|
||||
NAMES_PER_DIR
|
||||
${_OPENSSL_ROOT_HINTS_AND_PATHS}
|
||||
PATH_SUFFIXES
|
||||
"lib/MinGW"
|
||||
"lib"
|
||||
)
|
||||
|
||||
mark_as_advanced(SSL_EAY LIB_EAY)
|
||||
set(OPENSSL_SSL_LIBRARY ${SSL_EAY} )
|
||||
set(OPENSSL_CRYPTO_LIBRARY ${LIB_EAY} )
|
||||
unset(LIB_EAY_NAMES)
|
||||
unset(SSL_EAY_NAMES)
|
||||
else()
|
||||
# Not sure what to pick for -say- intel, let's use the toplevel ones and hope someone report issues:
|
||||
find_library(LIB_EAY
|
||||
NAMES
|
||||
libcrypto
|
||||
libeay32
|
||||
NAMES_PER_DIR
|
||||
${_OPENSSL_ROOT_HINTS_AND_PATHS}
|
||||
HINTS
|
||||
${_OPENSSL_LIBDIR}
|
||||
PATH_SUFFIXES
|
||||
lib
|
||||
)
|
||||
|
||||
find_library(SSL_EAY
|
||||
NAMES
|
||||
libssl
|
||||
ssleay32
|
||||
NAMES_PER_DIR
|
||||
${_OPENSSL_ROOT_HINTS_AND_PATHS}
|
||||
HINTS
|
||||
${_OPENSSL_LIBDIR}
|
||||
PATH_SUFFIXES
|
||||
lib
|
||||
)
|
||||
|
||||
mark_as_advanced(SSL_EAY LIB_EAY)
|
||||
set(OPENSSL_SSL_LIBRARY ${SSL_EAY} )
|
||||
set(OPENSSL_CRYPTO_LIBRARY ${LIB_EAY} )
|
||||
endif()
|
||||
else()
|
||||
|
||||
find_library(OPENSSL_SSL_LIBRARY
|
||||
NAMES
|
||||
ssl
|
||||
ssleay32
|
||||
ssleay32MD
|
||||
NAMES_PER_DIR
|
||||
${_OPENSSL_ROOT_HINTS_AND_PATHS}
|
||||
HINTS
|
||||
${_OPENSSL_LIBDIR}
|
||||
PATH_SUFFIXES
|
||||
lib
|
||||
)
|
||||
|
||||
find_library(OPENSSL_CRYPTO_LIBRARY
|
||||
NAMES
|
||||
crypto
|
||||
NAMES_PER_DIR
|
||||
${_OPENSSL_ROOT_HINTS_AND_PATHS}
|
||||
HINTS
|
||||
${_OPENSSL_LIBDIR}
|
||||
PATH_SUFFIXES
|
||||
lib
|
||||
)
|
||||
|
||||
mark_as_advanced(OPENSSL_CRYPTO_LIBRARY OPENSSL_SSL_LIBRARY)
|
||||
|
||||
endif()
|
||||
|
||||
# compat defines
|
||||
set(OPENSSL_SSL_LIBRARIES ${OPENSSL_SSL_LIBRARY})
|
||||
set(OPENSSL_CRYPTO_LIBRARIES ${OPENSSL_CRYPTO_LIBRARY})
|
||||
_OpenSSL_test_and_find_dependencies("${OPENSSL_SSL_LIBRARY}" "${OPENSSL_CRYPTO_LIBRARY}")
|
||||
if(_OpenSSL_has_dependencies)
|
||||
_OpenSSL_add_dependencies( OPENSSL_SSL_LIBRARIES "${OPENSSL_SSL_LIBRARY}" )
|
||||
_OpenSSL_add_dependencies( OPENSSL_CRYPTO_LIBRARIES "${OPENSSL_CRYPTO_LIBRARY}" )
|
||||
endif()
|
||||
|
||||
function(from_hex HEX DEC)
|
||||
string(TOUPPER "${HEX}" HEX)
|
||||
set(_res 0)
|
||||
string(LENGTH "${HEX}" _strlen)
|
||||
|
||||
while (_strlen GREATER 0)
|
||||
math(EXPR _res "${_res} * 16")
|
||||
string(SUBSTRING "${HEX}" 0 1 NIBBLE)
|
||||
string(SUBSTRING "${HEX}" 1 -1 HEX)
|
||||
if (NIBBLE STREQUAL "A")
|
||||
math(EXPR _res "${_res} + 10")
|
||||
elseif (NIBBLE STREQUAL "B")
|
||||
math(EXPR _res "${_res} + 11")
|
||||
elseif (NIBBLE STREQUAL "C")
|
||||
math(EXPR _res "${_res} + 12")
|
||||
elseif (NIBBLE STREQUAL "D")
|
||||
math(EXPR _res "${_res} + 13")
|
||||
elseif (NIBBLE STREQUAL "E")
|
||||
math(EXPR _res "${_res} + 14")
|
||||
elseif (NIBBLE STREQUAL "F")
|
||||
math(EXPR _res "${_res} + 15")
|
||||
else()
|
||||
math(EXPR _res "${_res} + ${NIBBLE}")
|
||||
endif()
|
||||
|
||||
string(LENGTH "${HEX}" _strlen)
|
||||
endwhile()
|
||||
|
||||
set(${DEC} ${_res} PARENT_SCOPE)
|
||||
endfunction()
|
||||
|
||||
if(OPENSSL_INCLUDE_DIR AND EXISTS "${OPENSSL_INCLUDE_DIR}/openssl/opensslv.h")
|
||||
file(STRINGS "${OPENSSL_INCLUDE_DIR}/openssl/opensslv.h" openssl_version_str
|
||||
REGEX "^#[\t ]*define[\t ]+OPENSSL_VERSION_NUMBER[\t ]+0x([0-9a-fA-F])+.*")
|
||||
|
||||
if(openssl_version_str)
|
||||
# The version number is encoded as 0xMNNFFPPS: major minor fix patch status
|
||||
# The status gives if this is a developer or prerelease and is ignored here.
|
||||
# Major, minor, and fix directly translate into the version numbers shown in
|
||||
# the string. The patch field translates to the single character suffix that
|
||||
# indicates the bug fix state, which 00 -> nothing, 01 -> a, 02 -> b and so
|
||||
# on.
|
||||
|
||||
string(REGEX REPLACE "^.*OPENSSL_VERSION_NUMBER[\t ]+0x([0-9a-fA-F])([0-9a-fA-F][0-9a-fA-F])([0-9a-fA-F][0-9a-fA-F])([0-9a-fA-F][0-9a-fA-F])([0-9a-fA-F]).*$"
|
||||
"\\1;\\2;\\3;\\4;\\5" OPENSSL_VERSION_LIST "${openssl_version_str}")
|
||||
list(GET OPENSSL_VERSION_LIST 0 OPENSSL_VERSION_MAJOR)
|
||||
list(GET OPENSSL_VERSION_LIST 1 OPENSSL_VERSION_MINOR)
|
||||
from_hex("${OPENSSL_VERSION_MINOR}" OPENSSL_VERSION_MINOR)
|
||||
list(GET OPENSSL_VERSION_LIST 2 OPENSSL_VERSION_FIX)
|
||||
from_hex("${OPENSSL_VERSION_FIX}" OPENSSL_VERSION_FIX)
|
||||
list(GET OPENSSL_VERSION_LIST 3 OPENSSL_VERSION_PATCH)
|
||||
|
||||
if (NOT OPENSSL_VERSION_PATCH STREQUAL "00")
|
||||
from_hex("${OPENSSL_VERSION_PATCH}" _tmp)
|
||||
# 96 is the ASCII code of 'a' minus 1
|
||||
math(EXPR OPENSSL_VERSION_PATCH_ASCII "${_tmp} + 96")
|
||||
unset(_tmp)
|
||||
# Once anyone knows how OpenSSL would call the patch versions beyond 'z'
|
||||
# this should be updated to handle that, too. This has not happened yet
|
||||
# so it is simply ignored here for now.
|
||||
string(ASCII "${OPENSSL_VERSION_PATCH_ASCII}" OPENSSL_VERSION_PATCH_STRING)
|
||||
endif ()
|
||||
|
||||
set(OPENSSL_VERSION "${OPENSSL_VERSION_MAJOR}.${OPENSSL_VERSION_MINOR}.${OPENSSL_VERSION_FIX}${OPENSSL_VERSION_PATCH_STRING}")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
set(OPENSSL_LIBRARIES ${OPENSSL_SSL_LIBRARIES} ${OPENSSL_CRYPTO_LIBRARIES} )
|
||||
list(REMOVE_DUPLICATES OPENSSL_LIBRARIES)
|
||||
|
||||
foreach(_comp IN LISTS OpenSSL_FIND_COMPONENTS)
|
||||
if(_comp STREQUAL "Crypto")
|
||||
if(EXISTS "${OPENSSL_INCLUDE_DIR}" AND
|
||||
(EXISTS "${OPENSSL_CRYPTO_LIBRARY}" OR
|
||||
EXISTS "${LIB_EAY_LIBRARY_DEBUG}" OR
|
||||
EXISTS "${LIB_EAY_LIBRARY_RELEASE}")
|
||||
)
|
||||
set(OpenSSL_${_comp}_FOUND TRUE)
|
||||
else()
|
||||
set(OpenSSL_${_comp}_FOUND FALSE)
|
||||
endif()
|
||||
elseif(_comp STREQUAL "SSL")
|
||||
if(EXISTS "${OPENSSL_INCLUDE_DIR}" AND
|
||||
(EXISTS "${OPENSSL_SSL_LIBRARY}" OR
|
||||
EXISTS "${SSL_EAY_LIBRARY_DEBUG}" OR
|
||||
EXISTS "${SSL_EAY_LIBRARY_RELEASE}")
|
||||
)
|
||||
set(OpenSSL_${_comp}_FOUND TRUE)
|
||||
else()
|
||||
set(OpenSSL_${_comp}_FOUND FALSE)
|
||||
endif()
|
||||
else()
|
||||
message(WARNING "${_comp} is not a valid OpenSSL component")
|
||||
set(OpenSSL_${_comp}_FOUND FALSE)
|
||||
endif()
|
||||
endforeach()
|
||||
unset(_comp)
|
||||
|
||||
include(${CMAKE_CURRENT_LIST_DIR}/FindPackageHandleStandardArgs.cmake)
|
||||
find_package_handle_standard_args(OpenSSL
|
||||
REQUIRED_VARS
|
||||
OPENSSL_CRYPTO_LIBRARY
|
||||
OPENSSL_INCLUDE_DIR
|
||||
VERSION_VAR
|
||||
OPENSSL_VERSION
|
||||
HANDLE_COMPONENTS
|
||||
FAIL_MESSAGE
|
||||
"Could NOT find OpenSSL, try to set the path to OpenSSL root folder in the system variable OPENSSL_ROOT_DIR"
|
||||
)
|
||||
|
||||
mark_as_advanced(OPENSSL_INCLUDE_DIR OPENSSL_LIBRARIES)
|
||||
|
||||
if(OPENSSL_FOUND)
|
||||
if(NOT TARGET OpenSSL::Crypto AND
|
||||
(EXISTS "${OPENSSL_CRYPTO_LIBRARY}" OR
|
||||
EXISTS "${LIB_EAY_LIBRARY_DEBUG}" OR
|
||||
EXISTS "${LIB_EAY_LIBRARY_RELEASE}")
|
||||
)
|
||||
add_library(OpenSSL::Crypto UNKNOWN IMPORTED)
|
||||
set_target_properties(OpenSSL::Crypto PROPERTIES
|
||||
INTERFACE_INCLUDE_DIRECTORIES "${OPENSSL_INCLUDE_DIR}")
|
||||
if(EXISTS "${OPENSSL_CRYPTO_LIBRARY}")
|
||||
set_target_properties(OpenSSL::Crypto PROPERTIES
|
||||
IMPORTED_LINK_INTERFACE_LANGUAGES "C"
|
||||
IMPORTED_LOCATION "${OPENSSL_CRYPTO_LIBRARY}")
|
||||
endif()
|
||||
if(EXISTS "${LIB_EAY_LIBRARY_RELEASE}")
|
||||
set_property(TARGET OpenSSL::Crypto APPEND PROPERTY
|
||||
IMPORTED_CONFIGURATIONS RELEASE)
|
||||
set_target_properties(OpenSSL::Crypto PROPERTIES
|
||||
IMPORTED_LINK_INTERFACE_LANGUAGES_RELEASE "C"
|
||||
IMPORTED_LOCATION_RELEASE "${LIB_EAY_LIBRARY_RELEASE}")
|
||||
endif()
|
||||
if(EXISTS "${LIB_EAY_LIBRARY_DEBUG}")
|
||||
set_property(TARGET OpenSSL::Crypto APPEND PROPERTY
|
||||
IMPORTED_CONFIGURATIONS DEBUG)
|
||||
set_target_properties(OpenSSL::Crypto PROPERTIES
|
||||
IMPORTED_LINK_INTERFACE_LANGUAGES_DEBUG "C"
|
||||
IMPORTED_LOCATION_DEBUG "${LIB_EAY_LIBRARY_DEBUG}")
|
||||
endif()
|
||||
_OpenSSL_target_add_dependencies(OpenSSL::Crypto)
|
||||
endif()
|
||||
|
||||
if(NOT TARGET OpenSSL::SSL AND
|
||||
(EXISTS "${OPENSSL_SSL_LIBRARY}" OR
|
||||
EXISTS "${SSL_EAY_LIBRARY_DEBUG}" OR
|
||||
EXISTS "${SSL_EAY_LIBRARY_RELEASE}")
|
||||
)
|
||||
add_library(OpenSSL::SSL UNKNOWN IMPORTED)
|
||||
set_target_properties(OpenSSL::SSL PROPERTIES
|
||||
INTERFACE_INCLUDE_DIRECTORIES "${OPENSSL_INCLUDE_DIR}")
|
||||
if(EXISTS "${OPENSSL_SSL_LIBRARY}")
|
||||
set_target_properties(OpenSSL::SSL PROPERTIES
|
||||
IMPORTED_LINK_INTERFACE_LANGUAGES "C"
|
||||
IMPORTED_LOCATION "${OPENSSL_SSL_LIBRARY}")
|
||||
endif()
|
||||
if(EXISTS "${SSL_EAY_LIBRARY_RELEASE}")
|
||||
set_property(TARGET OpenSSL::SSL APPEND PROPERTY
|
||||
IMPORTED_CONFIGURATIONS RELEASE)
|
||||
set_target_properties(OpenSSL::SSL PROPERTIES
|
||||
IMPORTED_LINK_INTERFACE_LANGUAGES_RELEASE "C"
|
||||
IMPORTED_LOCATION_RELEASE "${SSL_EAY_LIBRARY_RELEASE}")
|
||||
endif()
|
||||
if(EXISTS "${SSL_EAY_LIBRARY_DEBUG}")
|
||||
set_property(TARGET OpenSSL::SSL APPEND PROPERTY
|
||||
IMPORTED_CONFIGURATIONS DEBUG)
|
||||
set_target_properties(OpenSSL::SSL PROPERTIES
|
||||
IMPORTED_LINK_INTERFACE_LANGUAGES_DEBUG "C"
|
||||
IMPORTED_LOCATION_DEBUG "${SSL_EAY_LIBRARY_DEBUG}")
|
||||
endif()
|
||||
if(TARGET OpenSSL::Crypto)
|
||||
set_target_properties(OpenSSL::SSL PROPERTIES
|
||||
INTERFACE_LINK_LIBRARIES OpenSSL::Crypto)
|
||||
endif()
|
||||
_OpenSSL_target_add_dependencies(OpenSSL::SSL)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# Restore the original find library ordering
|
||||
if(OPENSSL_USE_STATIC_LIBS)
|
||||
set(CMAKE_FIND_LIBRARY_SUFFIXES ${_openssl_ORIG_CMAKE_FIND_LIBRARY_SUFFIXES})
|
||||
endif()
|
||||
268
CMakeModules/FindPackageHandleStandardArgs.cmake
vendored
268
CMakeModules/FindPackageHandleStandardArgs.cmake
vendored
@@ -1,268 +0,0 @@
|
||||
# Distributed under the OSI-approved BSD 3-Clause License. See accompanying
|
||||
# file Copyright.txt or https://cmake.org/licensing for details.
|
||||
|
||||
include(${CMAKE_CURRENT_LIST_DIR}/FindPackageMessage.cmake)
|
||||
|
||||
# internal helper macro
|
||||
macro(_FPHSA_FAILURE_MESSAGE _msg)
|
||||
set (__msg "${_msg}")
|
||||
if (FPHSA_REASON_FAILURE_MESSAGE)
|
||||
string(APPEND __msg "\n Reason given by package: ${FPHSA_REASON_FAILURE_MESSAGE}\n")
|
||||
endif()
|
||||
if (${_NAME}_FIND_REQUIRED)
|
||||
message(FATAL_ERROR "${__msg}")
|
||||
else ()
|
||||
if (NOT ${_NAME}_FIND_QUIETLY)
|
||||
message(STATUS "${__msg}")
|
||||
endif ()
|
||||
endif ()
|
||||
endmacro()
|
||||
|
||||
|
||||
# internal helper macro to generate the failure message when used in CONFIG_MODE:
|
||||
macro(_FPHSA_HANDLE_FAILURE_CONFIG_MODE)
|
||||
# <PackageName>_CONFIG is set, but FOUND is false, this means that some other of the REQUIRED_VARS was not found:
|
||||
if(${_NAME}_CONFIG)
|
||||
_FPHSA_FAILURE_MESSAGE("${FPHSA_FAIL_MESSAGE}: missing:${MISSING_VARS} (found ${${_NAME}_CONFIG} ${VERSION_MSG})")
|
||||
else()
|
||||
# If _CONSIDERED_CONFIGS is set, the config-file has been found, but no suitable version.
|
||||
# List them all in the error message:
|
||||
if(${_NAME}_CONSIDERED_CONFIGS)
|
||||
set(configsText "")
|
||||
list(LENGTH ${_NAME}_CONSIDERED_CONFIGS configsCount)
|
||||
math(EXPR configsCount "${configsCount} - 1")
|
||||
foreach(currentConfigIndex RANGE ${configsCount})
|
||||
list(GET ${_NAME}_CONSIDERED_CONFIGS ${currentConfigIndex} filename)
|
||||
list(GET ${_NAME}_CONSIDERED_VERSIONS ${currentConfigIndex} version)
|
||||
string(APPEND configsText "\n ${filename} (version ${version})")
|
||||
endforeach()
|
||||
if (${_NAME}_NOT_FOUND_MESSAGE)
|
||||
if (FPHSA_REASON_FAILURE_MESSAGE)
|
||||
string(PREPEND FPHSA_REASON_FAILURE_MESSAGE "${${_NAME}_NOT_FOUND_MESSAGE}\n ")
|
||||
else()
|
||||
set(FPHSA_REASON_FAILURE_MESSAGE "${${_NAME}_NOT_FOUND_MESSAGE}")
|
||||
endif()
|
||||
else()
|
||||
string(APPEND configsText "\n")
|
||||
endif()
|
||||
_FPHSA_FAILURE_MESSAGE("${FPHSA_FAIL_MESSAGE} ${VERSION_MSG}, checked the following files:${configsText}")
|
||||
|
||||
else()
|
||||
# Simple case: No Config-file was found at all:
|
||||
_FPHSA_FAILURE_MESSAGE("${FPHSA_FAIL_MESSAGE}: found neither ${_NAME}Config.cmake nor ${_NAME_LOWER}-config.cmake ${VERSION_MSG}")
|
||||
endif()
|
||||
endif()
|
||||
endmacro()
|
||||
|
||||
|
||||
function(FIND_PACKAGE_HANDLE_STANDARD_ARGS _NAME _FIRST_ARG)
|
||||
|
||||
# Set up the arguments for `cmake_parse_arguments`.
|
||||
set(options CONFIG_MODE HANDLE_COMPONENTS)
|
||||
set(oneValueArgs FAIL_MESSAGE REASON_FAILURE_MESSAGE VERSION_VAR FOUND_VAR)
|
||||
set(multiValueArgs REQUIRED_VARS)
|
||||
|
||||
# Check whether we are in 'simple' or 'extended' mode:
|
||||
set(_KEYWORDS_FOR_EXTENDED_MODE ${options} ${oneValueArgs} ${multiValueArgs} )
|
||||
list(FIND _KEYWORDS_FOR_EXTENDED_MODE "${_FIRST_ARG}" INDEX)
|
||||
|
||||
if(${INDEX} EQUAL -1)
|
||||
set(FPHSA_FAIL_MESSAGE ${_FIRST_ARG})
|
||||
set(FPHSA_REQUIRED_VARS ${ARGN})
|
||||
set(FPHSA_VERSION_VAR)
|
||||
else()
|
||||
cmake_parse_arguments(FPHSA "${options}" "${oneValueArgs}" "${multiValueArgs}" ${_FIRST_ARG} ${ARGN})
|
||||
|
||||
if(FPHSA_UNPARSED_ARGUMENTS)
|
||||
message(FATAL_ERROR "Unknown keywords given to FIND_PACKAGE_HANDLE_STANDARD_ARGS(): \"${FPHSA_UNPARSED_ARGUMENTS}\"")
|
||||
endif()
|
||||
|
||||
if(NOT FPHSA_FAIL_MESSAGE)
|
||||
set(FPHSA_FAIL_MESSAGE "DEFAULT_MSG")
|
||||
endif()
|
||||
|
||||
# In config-mode, we rely on the variable <PackageName>_CONFIG, which is set by find_package()
|
||||
# when it successfully found the config-file, including version checking:
|
||||
if(FPHSA_CONFIG_MODE)
|
||||
list(INSERT FPHSA_REQUIRED_VARS 0 ${_NAME}_CONFIG)
|
||||
list(REMOVE_DUPLICATES FPHSA_REQUIRED_VARS)
|
||||
set(FPHSA_VERSION_VAR ${_NAME}_VERSION)
|
||||
endif()
|
||||
|
||||
if(NOT FPHSA_REQUIRED_VARS)
|
||||
message(FATAL_ERROR "No REQUIRED_VARS specified for FIND_PACKAGE_HANDLE_STANDARD_ARGS()")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# now that we collected all arguments, process them
|
||||
|
||||
if("x${FPHSA_FAIL_MESSAGE}" STREQUAL "xDEFAULT_MSG")
|
||||
set(FPHSA_FAIL_MESSAGE "Could NOT find ${_NAME}")
|
||||
endif()
|
||||
|
||||
list(GET FPHSA_REQUIRED_VARS 0 _FIRST_REQUIRED_VAR)
|
||||
|
||||
string(TOUPPER ${_NAME} _NAME_UPPER)
|
||||
string(TOLOWER ${_NAME} _NAME_LOWER)
|
||||
|
||||
if(FPHSA_FOUND_VAR)
|
||||
if(FPHSA_FOUND_VAR MATCHES "^${_NAME}_FOUND$" OR FPHSA_FOUND_VAR MATCHES "^${_NAME_UPPER}_FOUND$")
|
||||
set(_FOUND_VAR ${FPHSA_FOUND_VAR})
|
||||
else()
|
||||
message(FATAL_ERROR "The argument for FOUND_VAR is \"${FPHSA_FOUND_VAR}\", but only \"${_NAME}_FOUND\" and \"${_NAME_UPPER}_FOUND\" are valid names.")
|
||||
endif()
|
||||
else()
|
||||
set(_FOUND_VAR ${_NAME_UPPER}_FOUND)
|
||||
endif()
|
||||
|
||||
# collect all variables which were not found, so they can be printed, so the
|
||||
# user knows better what went wrong (#6375)
|
||||
set(MISSING_VARS "")
|
||||
set(DETAILS "")
|
||||
# check if all passed variables are valid
|
||||
set(FPHSA_FOUND_${_NAME} TRUE)
|
||||
foreach(_CURRENT_VAR ${FPHSA_REQUIRED_VARS})
|
||||
if(NOT ${_CURRENT_VAR})
|
||||
set(FPHSA_FOUND_${_NAME} FALSE)
|
||||
string(APPEND MISSING_VARS " ${_CURRENT_VAR}")
|
||||
else()
|
||||
string(APPEND DETAILS "[${${_CURRENT_VAR}}]")
|
||||
endif()
|
||||
endforeach()
|
||||
if(FPHSA_FOUND_${_NAME})
|
||||
set(${_NAME}_FOUND TRUE)
|
||||
set(${_NAME_UPPER}_FOUND TRUE)
|
||||
else()
|
||||
set(${_NAME}_FOUND FALSE)
|
||||
set(${_NAME_UPPER}_FOUND FALSE)
|
||||
endif()
|
||||
|
||||
# component handling
|
||||
unset(FOUND_COMPONENTS_MSG)
|
||||
unset(MISSING_COMPONENTS_MSG)
|
||||
|
||||
if(FPHSA_HANDLE_COMPONENTS)
|
||||
foreach(comp ${${_NAME}_FIND_COMPONENTS})
|
||||
if(${_NAME}_${comp}_FOUND)
|
||||
|
||||
if(NOT DEFINED FOUND_COMPONENTS_MSG)
|
||||
set(FOUND_COMPONENTS_MSG "found components:")
|
||||
endif()
|
||||
string(APPEND FOUND_COMPONENTS_MSG " ${comp}")
|
||||
|
||||
else()
|
||||
|
||||
if(NOT DEFINED MISSING_COMPONENTS_MSG)
|
||||
set(MISSING_COMPONENTS_MSG "missing components:")
|
||||
endif()
|
||||
string(APPEND MISSING_COMPONENTS_MSG " ${comp}")
|
||||
|
||||
if(${_NAME}_FIND_REQUIRED_${comp})
|
||||
set(${_NAME}_FOUND FALSE)
|
||||
string(APPEND MISSING_VARS " ${comp}")
|
||||
endif()
|
||||
|
||||
endif()
|
||||
endforeach()
|
||||
set(COMPONENT_MSG "${FOUND_COMPONENTS_MSG} ${MISSING_COMPONENTS_MSG}")
|
||||
string(APPEND DETAILS "[c${COMPONENT_MSG}]")
|
||||
endif()
|
||||
|
||||
# version handling:
|
||||
set(VERSION_MSG "")
|
||||
set(VERSION_OK TRUE)
|
||||
|
||||
# check with DEFINED here as the requested or found version may be "0"
|
||||
if (DEFINED ${_NAME}_FIND_VERSION)
|
||||
if(DEFINED ${FPHSA_VERSION_VAR})
|
||||
set(_FOUND_VERSION ${${FPHSA_VERSION_VAR}})
|
||||
|
||||
if(${_NAME}_FIND_VERSION_EXACT) # exact version required
|
||||
# count the dots in the version string
|
||||
string(REGEX REPLACE "[^.]" "" _VERSION_DOTS "${_FOUND_VERSION}")
|
||||
# add one dot because there is one dot more than there are components
|
||||
string(LENGTH "${_VERSION_DOTS}." _VERSION_DOTS)
|
||||
if (_VERSION_DOTS GREATER ${_NAME}_FIND_VERSION_COUNT)
|
||||
# Because of the C++ implementation of find_package() ${_NAME}_FIND_VERSION_COUNT
|
||||
# is at most 4 here. Therefore a simple lookup table is used.
|
||||
if (${_NAME}_FIND_VERSION_COUNT EQUAL 1)
|
||||
set(_VERSION_REGEX "[^.]*")
|
||||
elseif (${_NAME}_FIND_VERSION_COUNT EQUAL 2)
|
||||
set(_VERSION_REGEX "[^.]*\\.[^.]*")
|
||||
elseif (${_NAME}_FIND_VERSION_COUNT EQUAL 3)
|
||||
set(_VERSION_REGEX "[^.]*\\.[^.]*\\.[^.]*")
|
||||
else ()
|
||||
set(_VERSION_REGEX "[^.]*\\.[^.]*\\.[^.]*\\.[^.]*")
|
||||
endif ()
|
||||
string(REGEX REPLACE "^(${_VERSION_REGEX})\\..*" "\\1" _VERSION_HEAD "${_FOUND_VERSION}")
|
||||
unset(_VERSION_REGEX)
|
||||
if (NOT ${_NAME}_FIND_VERSION VERSION_EQUAL _VERSION_HEAD)
|
||||
set(VERSION_MSG "Found unsuitable version \"${_FOUND_VERSION}\", but required is exact version \"${${_NAME}_FIND_VERSION}\"")
|
||||
set(VERSION_OK FALSE)
|
||||
else ()
|
||||
set(VERSION_MSG "(found suitable exact version \"${_FOUND_VERSION}\")")
|
||||
endif ()
|
||||
unset(_VERSION_HEAD)
|
||||
else ()
|
||||
if (NOT ${_NAME}_FIND_VERSION VERSION_EQUAL _FOUND_VERSION)
|
||||
set(VERSION_MSG "Found unsuitable version \"${_FOUND_VERSION}\", but required is exact version \"${${_NAME}_FIND_VERSION}\"")
|
||||
set(VERSION_OK FALSE)
|
||||
else ()
|
||||
set(VERSION_MSG "(found suitable exact version \"${_FOUND_VERSION}\")")
|
||||
endif ()
|
||||
endif ()
|
||||
unset(_VERSION_DOTS)
|
||||
|
||||
else() # minimum version specified:
|
||||
if (${_NAME}_FIND_VERSION VERSION_GREATER _FOUND_VERSION)
|
||||
set(VERSION_MSG "Found unsuitable version \"${_FOUND_VERSION}\", but required is at least \"${${_NAME}_FIND_VERSION}\"")
|
||||
set(VERSION_OK FALSE)
|
||||
else ()
|
||||
set(VERSION_MSG "(found suitable version \"${_FOUND_VERSION}\", minimum required is \"${${_NAME}_FIND_VERSION}\")")
|
||||
endif ()
|
||||
endif()
|
||||
|
||||
else()
|
||||
|
||||
# if the package was not found, but a version was given, add that to the output:
|
||||
if(${_NAME}_FIND_VERSION_EXACT)
|
||||
set(VERSION_MSG "(Required is exact version \"${${_NAME}_FIND_VERSION}\")")
|
||||
else()
|
||||
set(VERSION_MSG "(Required is at least version \"${${_NAME}_FIND_VERSION}\")")
|
||||
endif()
|
||||
|
||||
endif()
|
||||
else ()
|
||||
# Check with DEFINED as the found version may be 0.
|
||||
if(DEFINED ${FPHSA_VERSION_VAR})
|
||||
set(VERSION_MSG "(found version \"${${FPHSA_VERSION_VAR}}\")")
|
||||
endif()
|
||||
endif ()
|
||||
|
||||
if(VERSION_OK)
|
||||
string(APPEND DETAILS "[v${${FPHSA_VERSION_VAR}}(${${_NAME}_FIND_VERSION})]")
|
||||
else()
|
||||
set(${_NAME}_FOUND FALSE)
|
||||
endif()
|
||||
|
||||
|
||||
# print the result:
|
||||
if (${_NAME}_FOUND)
|
||||
FIND_PACKAGE_MESSAGE(${_NAME} "Found ${_NAME}: ${${_FIRST_REQUIRED_VAR}} ${VERSION_MSG} ${COMPONENT_MSG}" "${DETAILS}")
|
||||
else ()
|
||||
|
||||
if(FPHSA_CONFIG_MODE)
|
||||
_FPHSA_HANDLE_FAILURE_CONFIG_MODE()
|
||||
else()
|
||||
if(NOT VERSION_OK)
|
||||
_FPHSA_FAILURE_MESSAGE("${FPHSA_FAIL_MESSAGE}: ${VERSION_MSG} (found ${${_FIRST_REQUIRED_VAR}})")
|
||||
else()
|
||||
_FPHSA_FAILURE_MESSAGE("${FPHSA_FAIL_MESSAGE} (missing:${MISSING_VARS}) ${VERSION_MSG}")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
endif ()
|
||||
|
||||
set(${_NAME}_FOUND ${${_NAME}_FOUND} PARENT_SCOPE)
|
||||
set(${_NAME_UPPER}_FOUND ${${_NAME}_FOUND} PARENT_SCOPE)
|
||||
endfunction()
|
||||
48
CMakeModules/FindPackageMessage.cmake
vendored
48
CMakeModules/FindPackageMessage.cmake
vendored
@@ -1,48 +0,0 @@
|
||||
# Distributed under the OSI-approved BSD 3-Clause License. See accompanying
|
||||
# file Copyright.txt or https://cmake.org/licensing for details.
|
||||
|
||||
#[=======================================================================[.rst:
|
||||
FindPackageMessage
|
||||
------------------
|
||||
|
||||
.. code-block:: cmake
|
||||
|
||||
find_package_message(<name> "message for user" "find result details")
|
||||
|
||||
This function is intended to be used in FindXXX.cmake modules files.
|
||||
It will print a message once for each unique find result. This is
|
||||
useful for telling the user where a package was found. The first
|
||||
argument specifies the name (XXX) of the package. The second argument
|
||||
specifies the message to display. The third argument lists details
|
||||
about the find result so that if they change the message will be
|
||||
displayed again. The macro also obeys the QUIET argument to the
|
||||
find_package command.
|
||||
|
||||
Example:
|
||||
|
||||
.. code-block:: cmake
|
||||
|
||||
if(X11_FOUND)
|
||||
find_package_message(X11 "Found X11: ${X11_X11_LIB}"
|
||||
"[${X11_X11_LIB}][${X11_INCLUDE_DIR}]")
|
||||
else()
|
||||
...
|
||||
endif()
|
||||
#]=======================================================================]
|
||||
|
||||
function(find_package_message pkg msg details)
|
||||
# Avoid printing a message repeatedly for the same find result.
|
||||
if(NOT ${pkg}_FIND_QUIETLY)
|
||||
string(REPLACE "\n" "" details "${details}")
|
||||
set(DETAILS_VAR FIND_PACKAGE_MESSAGE_DETAILS_${pkg})
|
||||
if(NOT "${details}" STREQUAL "${${DETAILS_VAR}}")
|
||||
# The message has not yet been printed.
|
||||
message(STATUS "${msg}")
|
||||
|
||||
# Save the find details in the cache to avoid printing the same
|
||||
# message again.
|
||||
set("${DETAILS_VAR}" "${details}"
|
||||
CACHE INTERNAL "Details about finding ${pkg}")
|
||||
endif()
|
||||
endif()
|
||||
endfunction()
|
||||
@@ -3,7 +3,7 @@ MAINTAINER simon987 <me@simon987.net>
|
||||
|
||||
RUN apt update
|
||||
RUN apt install -y libglib2.0-0 libcurl4 libmagic1 libharfbuzz-bin libopenjp2-7 libarchive13 liblzma5 libzstd1 liblz4-1 \
|
||||
curl libtiff5 libpng16-16
|
||||
curl libtiff5 libpng16-16 libpcre3
|
||||
|
||||
RUN mkdir -p /usr/share/tessdata && \
|
||||
cd /usr/share/tessdata/ && \
|
||||
|
||||
@@ -1,15 +1,14 @@
|
||||
rm ./sist2
|
||||
cp ../sist2 .
|
||||
rm ./sist2 sist2_debug
|
||||
cp ../sist2.gz .
|
||||
gzip -d sist2.gz
|
||||
strip sist2
|
||||
|
||||
version=$(./sist2 --version)
|
||||
|
||||
echo "Version ${version}"
|
||||
docker build . -t simon987/sist2:${version} -t simon987/sist2:latest \
|
||||
-t docker.pkg.github.com/simon987/sist2/sist2:latest -t docker.pkg.github.com/simon987/sist2/sist2:${version}
|
||||
docker build . -t simon987/sist2:${version} -t simon987/sist2:latest
|
||||
|
||||
docker push simon987/sist2:${version}
|
||||
docker push simon987/sist2:latest
|
||||
docker push docker.pkg.github.com/simon987/sist2/sist2:latest
|
||||
docker push docker.pkg.github.com/simon987/sist2/sist2:${version}
|
||||
|
||||
docker run --rm -it simon987/sist2 -v
|
||||
docker run --rm simon987/sist2 -v
|
||||
22
DockerArm64/Dockerfile
Normal file
22
DockerArm64/Dockerfile
Normal file
@@ -0,0 +1,22 @@
|
||||
FROM ubuntu:19.10
|
||||
MAINTAINER simon987 <me@simon987.net>
|
||||
|
||||
RUN apt update
|
||||
RUN apt install -y libglib2.0-0 libcurl4 libmagic1 libharfbuzz-bin libopenjp2-7 libarchive13 liblzma5 libzstd1 liblz4-1 \
|
||||
curl libtiff5 libpng16-16 libpcre3
|
||||
|
||||
RUN mkdir -p /usr/share/tessdata && \
|
||||
cd /usr/share/tessdata/ && \
|
||||
curl -o /usr/share/tessdata/hin.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/hin.traineddata &&\
|
||||
curl -o /usr/share/tessdata/jpn.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/jpn.traineddata &&\
|
||||
curl -o /usr/share/tessdata/eng.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/eng.traineddata &&\
|
||||
curl -o /usr/share/tessdata/fra.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/fra.traineddata &&\
|
||||
curl -o /usr/share/tessdata/rus.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/rus.traineddata &&\
|
||||
curl -o /usr/share/tessdata/spa.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/spa.traineddata && ls -lh
|
||||
|
||||
ADD sist2_arm64 /root/sist2
|
||||
|
||||
ENV LANG C.UTF-8
|
||||
ENV LC_ALL C.UTF-8
|
||||
|
||||
ENTRYPOINT ["/root/sist2"]
|
||||
13
DockerArm64/build.sh
Executable file
13
DockerArm64/build.sh
Executable file
@@ -0,0 +1,13 @@
|
||||
rm ./sist2_arm64
|
||||
cp ../sist2_arm64.gz .
|
||||
gzip -d sist2_arm64.gz
|
||||
|
||||
version=$(./sist2_arm64 --version)
|
||||
|
||||
echo "Version ${version}"
|
||||
docker build . -t simon987/sist2-arm64:"${version}" -t simon987/sist2-arm64:latest
|
||||
|
||||
docker push simon987/sist2-arm64:"${version}"
|
||||
docker push simon987/sist2-arm64:latest
|
||||
|
||||
docker run --rm simon987/sist2-arm64 -v
|
||||
123
README.md
123
README.md
@@ -2,104 +2,90 @@
|
||||
[](https://www.codefactor.io/repository/github/simon987/sist2)
|
||||
[/statusIcon)](https://files.simon987.net/artifacts/Sist2/Build/)
|
||||
|
||||
**Demo**: [sist2.simon987.net](https://sist2.simon987.net/)
|
||||
|
||||
# sist2
|
||||
|
||||
sist2 (Simple incremental search tool)
|
||||
|
||||
*Warning: sist2 is in early development*
|
||||
|
||||

|
||||
|
||||
## Features
|
||||
|
||||
* Fast, low memory usage, multi-threaded
|
||||
* Mobile-friendly Web interface
|
||||
* Portable (all its features are packaged in a single executable)
|
||||
* Extracts text from common file types \*
|
||||
* Extracts text and metadata from common file types \*
|
||||
* Generates thumbnails \*
|
||||
* Incremental scanning
|
||||
* Automatic tagging from file attributes via [user scripts](scripting/README.md)
|
||||
* Manual tagging from the UI and automatic tagging based on file attributes via [user scripts](docs/scripting.md)
|
||||
* Recursive scan inside archive files \*\*
|
||||
* OCR support with tesseract \*\*\*
|
||||
* Stats page & disk utilisation visualization
|
||||
|
||||
|
||||
\* See [format support](#format-support)
|
||||
\*\* See [Archive files](#archive-files)
|
||||
\*\*\* See [OCR](#ocr)
|
||||
|
||||

|
||||
|
||||
|
||||
## Getting Started
|
||||
|
||||
1. Have an [Elasticsearch](https://www.elastic.co/downloads/elasticsearch) instance running
|
||||
1.
|
||||
1. Have an Elasticsearch (>= 6.X.X) instance running
|
||||
1. Download [from official website](https://www.elastic.co/downloads/elasticsearch)
|
||||
1. *(or)* Run using docker:
|
||||
```bash
|
||||
docker run -d --name es1 --net sist2_net -p 9200:9200 \
|
||||
-e "discovery.type=single-node" elasticsearch:7.5.2
|
||||
```
|
||||
1. *(or)* Run using docker-compose:
|
||||
```yaml
|
||||
elasticsearch:
|
||||
image: docker.elastic.co/elasticsearch/elasticsearch:7.5.2
|
||||
environment:
|
||||
- discovery.type=single-node
|
||||
- "ES_JAVA_OPTS=-Xms1G -Xmx2G"
|
||||
```
|
||||
1. Download sist2 executable
|
||||
1. Download the [latest sist2 release](https://github.com/simon987/sist2/releases) *
|
||||
1. *(or)* Download a [development snapshot](https://files.simon987.net/artifacts/Sist2/Build/) *(Not recommended!)*
|
||||
1. *(or)* `docker pull simon987/sist2:latest`
|
||||
|
||||
1. See [Usage guide](docs/USAGE.md)
|
||||
|
||||
|
||||
\* *Windows users*: **sist2** runs under [WSL](https://en.wikipedia.org/wiki/Windows_Subsystem_for_Linux)
|
||||
\* *Mac users*: See [#1](https://github.com/simon987/sist2/issues/1)
|
||||
|
||||
|
||||
## Example usage
|
||||
|
||||
See [Usage guide](docs/USAGE.md) for more details
|
||||
|
||||

|
||||
|
||||
See help page `sist2 --help` for more details.
|
||||
|
||||
**Scan a directory**
|
||||
```bash
|
||||
sist2 scan ~/Documents -o ./orig_idx/
|
||||
sist2 scan --threads 4 --content-size 16384 /mnt/Pictures
|
||||
sist2 scan --incremental ./orig_idx/ -o ./updated_idx/ ~/Documents
|
||||
```
|
||||
|
||||
**Push index to Elasticsearch or file**
|
||||
```bash
|
||||
sist2 index --force-reset ./my_idx
|
||||
sist2 index --print ./my_idx > raw_documents.ndjson
|
||||
```
|
||||
|
||||
**Start web interface**
|
||||
```bash
|
||||
sist2 web --bind 0.0.0.0 --port 4321 ./my_idx1 ./my_idx2 ./my_idx3
|
||||
```
|
||||
|
||||
### Use sist2 with docker
|
||||
|
||||
**scan**
|
||||
```bash
|
||||
docker run -it \
|
||||
-v /path/to/files/:/files \
|
||||
-v $PWD/out/:/out \
|
||||
simon987/sist2 scan -t 4 /files -o /out/my_idx1
|
||||
```
|
||||
**index**
|
||||
```bash
|
||||
docker run -it --network host\
|
||||
-v $PWD/out/:/out \
|
||||
simon987/sist2 index /out/my_idx1
|
||||
```
|
||||
|
||||
**web**
|
||||
```bash
|
||||
docker run --rm --network host -d --name sist2\
|
||||
-v $PWD/out/my_idx:/idx \
|
||||
-v $PWD/my/files:/files
|
||||
simon987/sist2 web --bind 0.0.0.0 /idx
|
||||
docker stop sist2
|
||||
```
|
||||
1. Scan a directory: `sist2 scan ~/Documents -o ./docs_idx`
|
||||
1. Push index to Elasticsearch: `sist2 index ./docs_idx`
|
||||
1. Start web interface: `sist2 web ./docs_idx`
|
||||
|
||||
|
||||
## Format support
|
||||
|
||||
File type | Library | Content | Thumbnail | Metadata
|
||||
:---|:---|:---|:---|:---
|
||||
pdf,xps,cbz,fb2,epub | MuPDF | text+ocr | yes, `png` | title |
|
||||
`audio/*` | ffmpeg | - | yes, `jpeg` | ID3 tags |
|
||||
`video/*` | ffmpeg | - | yes, `jpeg` | title, comment, artist |
|
||||
`image/*` | ffmpeg | - | yes, `jpeg` | [Common EXIF tags](https://github.com/simon987/sist2/blob/efdde2734eca9b14a54f84568863b7ffd59bdba3/src/parsing/media.c#L190) |
|
||||
pdf,xps,fb2,epub | MuPDF | text+ocr | yes | title |
|
||||
cbz,cbr | *(none)* | - | yes | - |
|
||||
`audio/*` | ffmpeg | - | yes | ID3 tags |
|
||||
`video/*` | ffmpeg | - | yes | title, comment, artist |
|
||||
`image/*` | ffmpeg | - | yes | [Common EXIF tags](https://github.com/simon987/sist2/blob/efdde2734eca9b14a54f84568863b7ffd59bdba3/src/parsing/media.c#L190) |
|
||||
raw, rw2, dng, cr2, crw, dcr, k25, kdc, mrw, pef, xf3, arw, sr2, srf, erf | LibRaw | - | yes | Common EXIF tags |
|
||||
ttf,ttc,cff,woff,fnt,otf | Freetype2 | - | yes, `bmp` | Name & style |
|
||||
`text/plain` | *(none)* | yes | no | - |
|
||||
html, xml | *(none)* | yes | no | - |
|
||||
tar, zip, rar, 7z, ar ... | Libarchive | yes\* | - | no |
|
||||
docx, xlsx, pptx | libOPC | yes | no | no |
|
||||
docx, xlsx, pptx | *(none)* | yes | if embedded | creator, modified_by, title |
|
||||
mobi, azw, azw3 | libmobi | yes | no | author, title |
|
||||
|
||||
\* *See [Archive files](#archive-files)*
|
||||
|
||||
@@ -109,22 +95,20 @@ they were directly in the file system. Recursive (archives inside archives)
|
||||
scan is also supported.
|
||||
|
||||
**Limitations**:
|
||||
* Parsing media files with formats that require
|
||||
*seek* (e.g. `.gif`, `.mp4` w/ fragmented metadata etc.) is not supported.
|
||||
* Support for parsing media files with formats that require *seek* (e.g. `.gif`, `.mp4` w/ fragmented metadata etc.)
|
||||
is limitted (see `--mem-buffer` option)
|
||||
* Archive files are scanned sequentially, by a single thread. On systems where
|
||||
**sist2** is not I/O bound, scans might be faster when larger archives are split
|
||||
into smaller parts.
|
||||
|
||||
To check if a media file can be parsed without *seek*, execute `cat file.mp4 | ffprobe -`
|
||||
|
||||
|
||||
### OCR
|
||||
|
||||
You can enable OCR support for pdf,xps,cbz,fb2,epub file types with the
|
||||
You can enable OCR support for pdf,xps,fb2,epub file types with the
|
||||
`--ocr <lang>` option. Download the language data files with your
|
||||
package manager (`apt install tesseract-ocr-eng`) or directly [from Github](https://github.com/tesseract-ocr/tesseract/wiki/Data-Files).
|
||||
|
||||
The `simon987/sist2` github image comes with common languages
|
||||
The `simon987/sist2` image comes with common languages
|
||||
(hin, jpn, eng, fra, rus, spa) pre-installed.
|
||||
|
||||
Examples
|
||||
@@ -137,22 +121,17 @@ sist2 scan --ocr eng ~/Books/Textbooks/
|
||||
## Build from source
|
||||
|
||||
You can compile **sist2** by yourself if you don't want to use the pre-compiled
|
||||
binaries.
|
||||
binaries (GCC 7+ required).
|
||||
|
||||
1. Install compile-time dependencies
|
||||
|
||||
*(Debian)*
|
||||
```bash
|
||||
apt install git cmake pkg-config libglib2.0-dev \
|
||||
libssl-dev uuid-dev python3 libmagic-dev libfreetype6-dev \
|
||||
libcurl-dev libbz2-dev yasm libharfbuzz-dev ragel \
|
||||
libarchive-dev libtiff5 libpng16-16 libpango1.0-dev
|
||||
```bash
|
||||
vcpkg install lmdb cjson glib libarchive[core,bzip2,libxml2,lz4,lzma,lzo] pthread tesseract libxml2 ffmpeg zstd gtest mongoose libuuid libmagic libraw curl[core,ssl] jbig2dec brotli libmupdf
|
||||
```
|
||||
|
||||
2. Build
|
||||
```bash
|
||||
git clone --recurse-submodules https://github.com/simon987/sist2
|
||||
./scripts/get_static_libs.sh
|
||||
cmake .
|
||||
git clone --recursive https://github.com/simon987/sist2/
|
||||
cmake -DCMAKE_TOOLCHAIN_FILE=<VCPKG_ROOT>/scripts/buildsystems/vcpkg.cmake .
|
||||
make
|
||||
```
|
||||
|
||||
1
argparse
1
argparse
Submodule argparse deleted from fafc503d23
1
cJSON
1
cJSON
Submodule cJSON deleted from 2d4ad84192
16
ci/build.sh
Normal file → Executable file
16
ci/build.sh
Normal file → Executable file
@@ -1,7 +1,17 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
./scripts/get_static_libs.sh
|
||||
VCPKG_ROOT="/vcpkg"
|
||||
|
||||
cmake .
|
||||
make
|
||||
rm *.gz
|
||||
|
||||
rm -rf CMakeFiles CMakeCache.txt
|
||||
cmake -DSIST_DEBUG=off -DVCPKG_BUILD_TYPE=release -DCMAKE_TOOLCHAIN_FILE="${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" .
|
||||
make -j 12
|
||||
strip sist2
|
||||
gzip -9 sist2
|
||||
|
||||
rm -rf CMakeFiles CMakeCache.txt
|
||||
cmake -DSIST_DEBUG=on -DVCPKG_BUILD_TYPE=debug -DCMAKE_TOOLCHAIN_FILE="${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" .
|
||||
make -j 12
|
||||
cp /usr/lib/x86_64-linux-gnu/libasan.so.2.0.0 libasan.so.2
|
||||
tar -czf sist2_debug.tar.gz sist2_debug libasan.so.2
|
||||
|
||||
12
ci/build_arm64.sh
Executable file
12
ci/build_arm64.sh
Executable file
@@ -0,0 +1,12 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
VCPKG_ROOT="/vcpkg"
|
||||
|
||||
rm *.gz
|
||||
|
||||
rm -rf CMakeFiles CMakeCache.txt
|
||||
cmake -DSIST_DEBUG=off -DCMAKE_TOOLCHAIN_FILE="${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" .
|
||||
make -j 4
|
||||
strip sist2
|
||||
mv sist2 sist2_arm64
|
||||
gzip -9 sist2_arm64
|
||||
351
docs/USAGE.md
Normal file
351
docs/USAGE.md
Normal file
@@ -0,0 +1,351 @@
|
||||
# Usage
|
||||
|
||||
*More examples (specifically with docker/compose) are in progress*
|
||||
|
||||
* [scan](#scan)
|
||||
* [options](#scan-options)
|
||||
* [examples](#scan-examples)
|
||||
* [index format](#index-format)
|
||||
* [index](#index)
|
||||
* [options](#index-options)
|
||||
* [examples](#index-examples)
|
||||
* [web](#web)
|
||||
* [options](#web-options)
|
||||
* [examples](#web-examples)
|
||||
* [rewrite_url](#rewrite_url)
|
||||
* [link to specific indices](#link-to-specific-indices)
|
||||
* [exec-script](#exec-script)
|
||||
* [tagging](#tagging)
|
||||
|
||||
```
|
||||
Usage: sist2 scan [OPTION]... PATH
|
||||
or: sist2 index [OPTION]... INDEX
|
||||
or: sist2 web [OPTION]... INDEX...
|
||||
or: sist2 exec-script [OPTION]... INDEX
|
||||
Lightning-fast file system indexer and search tool.
|
||||
|
||||
-h, --help show this help message and exit
|
||||
-v, --version Show version and exit
|
||||
--verbose Turn on logging
|
||||
--very-verbose Turn on debug messages
|
||||
|
||||
Scan options
|
||||
-t, --threads=<int> Number of threads. DEFAULT=1
|
||||
-q, --quality=<flt> Thumbnail quality, on a scale of 1.0 to 31.0, 1.0 being the best. DEFAULT=5
|
||||
--size=<int> Thumbnail size, in pixels. Use negative value to disable. DEFAULT=500
|
||||
--content-size=<int> Number of bytes to be extracted from text documents. Use negative value to disable. DEFAULT=32768
|
||||
--incremental=<str> Reuse an existing index and only scan modified files.
|
||||
-o, --output=<str> Output directory. DEFAULT=index.sist2/
|
||||
--rewrite-url=<str> Serve files from this url instead of from disk.
|
||||
--name=<str> Index display name. DEFAULT: (name of the directory)
|
||||
--depth=<int> Scan up to DEPTH subdirectories deep. Use 0 to only scan files in PATH. DEFAULT: -1
|
||||
--archive=<str> Archive file mode (skip|list|shallow|recurse). skip: Don't parse, list: only get file names as text, shallow: Don't parse archives inside archives. DEFAULT: recurse
|
||||
--ocr=<str> Tesseract language (use tesseract --list-langs to see which are installed on your machine)
|
||||
-e, --exclude=<str> Files that match this regex will not be scanned
|
||||
--fast Only index file names & mime type
|
||||
--treemap-threshold=<str> Relative size threshold for treemap (see USAGE.md). DEFAULT: 0.0005
|
||||
--mem-buffer=<int> Maximum memory buffer size per thread in MB for files inside archives (see USAGE.md). DEFAULT: 2000
|
||||
|
||||
Index options
|
||||
-t, --threads=<int> Number of threads. DEFAULT=1
|
||||
--es-url=<str> Elasticsearch url with port. DEFAULT=http://localhost:9200
|
||||
--es-index=<str> Elasticsearch index name. DEFAULT=sist2
|
||||
-p, --print Just print JSON documents to stdout.
|
||||
--script-file=<str> Path to user script.
|
||||
--async-script Execute user script asynchronously.
|
||||
--batch-size=<int> Index batch size. DEFAULT: 100
|
||||
-f, --force-reset Reset Elasticsearch mappings and settings. (You must use this option the first time you use the index command)
|
||||
|
||||
Web options
|
||||
--es-url=<str> Elasticsearch url. DEFAULT=http://localhost:9200
|
||||
--es-index=<str> Elasticsearch index name. DEFAULT=sist2
|
||||
--bind=<str> Listen on this address. DEFAULT=localhost:4090
|
||||
--auth=<str> Basic auth in user:password format
|
||||
--tag-auth=<str> Basic auth in user:password format for tagging
|
||||
|
||||
Exec-script options
|
||||
--es-url=<str> Elasticsearch url. DEFAULT=http://localhost:9200
|
||||
--es-index=<str> Elasticsearch index name. DEFAULT=sist2
|
||||
--script-file=<str> Path to user script.
|
||||
--async-script Execute user script asynchronously.
|
||||
Made by simon987 <me@simon987.net>. Released under GPL-3.0
|
||||
```
|
||||
|
||||
## Scan
|
||||
|
||||
### Scan options
|
||||
|
||||
* `-t, --threads`
|
||||
Number of threads for file parsing. **Do not set a number higher than `$(nproc)` or `$(Get-WmiObject Win32_ComputerSystem).NumberOfLogicalProcessors` in Windows!**
|
||||
* `-q, --quality`
|
||||
Thumbnail quality, on a scale of 1.0 to 31.0, 1.0 being the best. *Does not affect PDF thumbnails quality*
|
||||
* `--size`
|
||||
Thumbnail size in pixels.
|
||||
* `--content-size`
|
||||
Number of bytes of text to be extracted from the content of files (plain text and PDFs).
|
||||
Repeated whitespace and special characters do not count toward this limit.
|
||||
* `--incremental`
|
||||
Specify an existing index. Information about files in this index that were not modified (based on *mtime* attribute)
|
||||
will be copied to the new index and will not be parsed again.
|
||||
* `-o, --output` Output directory.
|
||||
* `--rewrite-url` Set the `rewrite_url` option for the web module (See [rewrite_url](#rewrite_url))
|
||||
* `--name` Set the `name` option for the web module
|
||||
* `--depth` Maximum scan dept. Set to 0 only scan files directly in the root directory, set to -1 for infinite depth
|
||||
* `--archive` Archive file mode.
|
||||
* skip: Don't parse
|
||||
* list: Only get file names as text
|
||||
* shallow: Don't parse archives inside archives.
|
||||
* recurse: Scan archives recursively (default)
|
||||
* `--ocr` See [OCR](../README.md#OCR)
|
||||
* `-e, --exclude` Regex pattern to exclude files. A file is excluded if the pattern matches any
|
||||
part of the full absolute path.
|
||||
|
||||
Examples:
|
||||
* `-e ".*\.ttf"`: Ignore ttf files
|
||||
* `-e ".*\.(ttf|rar)"`: Ignore ttf and rar files
|
||||
* `-e "^/mnt/backups/"`: Ignore all files in the `/mnt/backups/` directory
|
||||
* `-e "^/mnt/Data[12]/"`: Ignore all files in the `/mnt/Data1/` and `/mnt/Data2/` directory
|
||||
* `-e "(^/usr/)|(^/var/)|(^/media/DRIVE-A/tmp/)|(^/media/DRIVE-B/Trash/)"` Exclude the
|
||||
`/usr`, `/var`, `/media/DRIVE-A/tmp`, `/media/DRIVE-B/Trash` directories
|
||||
* `--fast` Only index file names and mime type
|
||||
* `--treemap-threshold` Directories smaller than (`treemap-threshold` * `<total size of the index>`)
|
||||
will not be considered for the disk utilisation visualization; their size will be added to
|
||||
the parent directory. If the parent directory is still smaller than the threshold, it will also be "merged upwards"
|
||||
and so on.
|
||||
|
||||
In effect, smaller `treemap-threshold` values will yield a more detailed
|
||||
(but also a more cluttered and harder to read) visualization.
|
||||
|
||||
* `--mem-buffer` Maximum memory buffer size in MB (per thread) for files inside archives. Media files
|
||||
larger than this number will be read sequentially and no *seek* operations will be supported.
|
||||
|
||||
To check if a media file can be parsed without *seek*, execute `cat file.mp4 | ffprobe -`
|
||||
|
||||
### Scan examples
|
||||
|
||||
Simple scan
|
||||
```bash
|
||||
sist2 scan ~/Documents
|
||||
|
||||
sist2 scan \
|
||||
--threads 4 --content-size 16000000 --quality 1.0 --archive shallow \
|
||||
--name "My Documents" --rewrite-url "http://nas.domain.local/My Documents/" \
|
||||
~/Documents -o ./documents.idx/
|
||||
```
|
||||
|
||||
Incremental scan
|
||||
```
|
||||
sist2 scan --incremental ./orig_idx/ -o ./updated_idx/ ~/Documents
|
||||
```
|
||||
|
||||
### Index format
|
||||
|
||||
A typical `binary` type index structure looks like this:
|
||||
```
|
||||
documents.idx/
|
||||
├── descriptor.json
|
||||
├── _index_139965416830720
|
||||
├── _index_139965425223424
|
||||
├── _index_139965433616128
|
||||
├── _index_139965442008832
|
||||
├── _index_139965442008832
|
||||
├── treemap.csv
|
||||
├── agg_mime.csv
|
||||
├── agg_date.csv
|
||||
├── add_size.csv
|
||||
├── thumbs
|
||||
| ├── data.mdb
|
||||
| └── lock.mdb
|
||||
└── tags
|
||||
├── data.mdb
|
||||
└── lock.mdb
|
||||
```
|
||||
|
||||
The `_index_*` files contain the raw binary index data and are not meant to be
|
||||
read by other applications. The format is generally compatible across different
|
||||
sist2 versions.
|
||||
|
||||
The `thumbs/` folder is a [LMDB](https://en.wikipedia.org/wiki/Lightning_Memory-Mapped_Database)
|
||||
database containing the thumbnails.
|
||||
|
||||
The `descriptor.json` file contains general information about the index. The
|
||||
following fields are safe to modify manually: `root`, `name`, [rewrite_url](#rewrite_url) and `timestamp`.
|
||||
|
||||
The `.csv` are pre-computed aggregations necessary for the stats page.
|
||||
|
||||
|
||||
*Advanced usage*
|
||||
|
||||
Instead of using the `scan` module, you can also import an index generated
|
||||
by a third party application. The 'external' index must have the following format:
|
||||
|
||||
```
|
||||
my_index/
|
||||
├── descriptor.json
|
||||
├── _index_0
|
||||
└── thumbs
|
||||
├── data.mdb
|
||||
└── lock.mdb
|
||||
```
|
||||
|
||||
*descriptor.json*:
|
||||
```json
|
||||
{
|
||||
"uuid": "<valid UUID4>",
|
||||
"version": "_external_v1",
|
||||
"root": "(optional)",
|
||||
"name": "<name>",
|
||||
"rewrite_url": "(optional)",
|
||||
"type": "json",
|
||||
"timestamp": 1578971024
|
||||
}
|
||||
```
|
||||
|
||||
*_index_0*: NDJSON format (One json object per line)
|
||||
|
||||
```json
|
||||
{
|
||||
"_id": "unique uuid for the file",
|
||||
"index": "index uuid4 (same one as descriptor.json!)",
|
||||
"mime": "application/x-cbz",
|
||||
"size": 14341204,
|
||||
"mtime": 1578882996,
|
||||
"extension": "cbz",
|
||||
"name": "my_book",
|
||||
"path": "path/to/books",
|
||||
"content": "text contents of the book",
|
||||
"title": "Title of the book",
|
||||
"tag": ["genre.fiction", "author.someguy", "etc..."],
|
||||
"_keyword": [
|
||||
{"k": "ISBN", "v": "ABCD34789231"}
|
||||
],
|
||||
"_text": [
|
||||
{"k": "other", "v": "This will be indexed as text"}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
You can find the full list of supported fields [here](../src/io/serialize.c#L90)
|
||||
|
||||
The `_keyword.*` items will be indexed and searchable as **keyword** fields (only full matches allowed).
|
||||
The `_text.*` items will be indexed and searchable as **text** fields (fuzzy searching allowed)
|
||||
|
||||
|
||||
*thumbs/*:
|
||||
|
||||
LMDB key-value store. Keys are **binary** 128-bit UUID4s (`_id` field)
|
||||
and values are raw image bytes.
|
||||
|
||||
Importing an external `binary` type index is technically possible but
|
||||
it is currently unsupported and has no guaranties of back/forward compatibility.
|
||||
|
||||
|
||||
## Index
|
||||
### Index options
|
||||
* `--es-url`
|
||||
Elasticsearch url and port. If you are using docker, make sure that both containers are on the
|
||||
same network.
|
||||
* `--es-index`
|
||||
Elasticsearch index name. DEFAULT=sist2
|
||||
* `-p, --print`
|
||||
Print index in JSON format to stdout.
|
||||
* `--script-file`
|
||||
Path to user script. See [Scripting](scripting.md).
|
||||
* `--async-script`
|
||||
Use `wait_for_completion=false` elasticsearch option while executing user script.
|
||||
(See [Elasticsearch documentation](https://www.elastic.co/guide/en/elasticsearch/reference/current/tasks.html))
|
||||
* `--batch-size=<int>`
|
||||
Index batch size. Indexing is generally faster with larger batches, but payloads that
|
||||
are too large will fail and additional overhead for retrying with smaller sizes may slow
|
||||
down the process.
|
||||
* `-f, --force-reset`
|
||||
Reset Elasticsearch mappings and settings.
|
||||
**(You must use this option the first time you use the index command)**.
|
||||
|
||||
### Index examples
|
||||
|
||||
**Push to elasticsearch**
|
||||
```bash
|
||||
sist2 index --force-reset --batch-size 1000 --es-url http://localhost:9200 ./my_index/
|
||||
sist2 index ./my_index/
|
||||
```
|
||||
|
||||
**Save index in JSON format**
|
||||
```bash
|
||||
sist2 index --print ./my_index/ > my_index.ndjson
|
||||
```
|
||||
|
||||
**Inspect contents of an index**
|
||||
```bash
|
||||
sist2 index --print ./my_index/ | jq | less
|
||||
```
|
||||
|
||||
## Web
|
||||
|
||||
### Web options
|
||||
* `--es-url=<str>` Elasticsearch url.
|
||||
* `--es-index`
|
||||
Elasticsearch index name. DEFAULT=sist2
|
||||
* `--bind=<str>` Listen on this address.
|
||||
* `--auth=<str>` Basic auth in user:password format
|
||||
* `--tag-auth=<str>` Basic auth in user:password format. Works the same way as the
|
||||
`--auth` argument, but authentication is only applied the `/tag/` endpoint.
|
||||
|
||||
### Web examples
|
||||
|
||||
**Single index**
|
||||
```bash
|
||||
sist2 web --auth admin:hunter2 --bind 0.0.0.0:8888 my_index
|
||||
```
|
||||
|
||||
**Multiple indices**
|
||||
```bash
|
||||
# Indices will be displayed in this order in the web interface
|
||||
sist2 web index1 index2 index3 index4
|
||||
```
|
||||
|
||||
### rewrite_url
|
||||
|
||||
When the `rewrite_url` field is not empty, the web module ignores the `root`
|
||||
field and will return a HTTP redirect to `<rewrite_url><path>/<name><extension>`
|
||||
instead of serving the file from disk.
|
||||
Both the `root` and `rewrite_url` fields are safe to manually modify from the
|
||||
`descriptor.json` file.
|
||||
|
||||
### Link to specific indices
|
||||
|
||||
To link to specific indices, you can add a list of comma-separated index name to
|
||||
the URL: `?i=<name>,<name>`. By default, indices with `"(nsfw)"` in their name are
|
||||
not displayed.
|
||||
|
||||
## exec-script
|
||||
|
||||
The `exec-script` command is used to execute a user script for an index that has already been imported to Elasticsearch with the `index` command. Note that the documents will not be reset to their default state before each execution as the `index` command does: if you make undesired changes to the documents by accident, you will need to run `index` again to revert to the original state.
|
||||
|
||||
|
||||
# Tagging
|
||||
|
||||
### Manual tagging
|
||||
|
||||
You can modify tags of individual documents directly from the
|
||||
`web` interface. Note that you can setup authentication for this feature
|
||||
with the `--tag-auth` option (See [web options](#web-options))
|
||||
|
||||

|
||||
|
||||
Tags that are manually added are saved both in the
|
||||
index folder (in `/tags/`) and in Elasticsearch*. When re-`index`ing,
|
||||
they are read from the index and automatically applied.
|
||||
|
||||
You can safely copy the `/tags/` database to another index.
|
||||
|
||||
See [Automatic tagging](#automatic-tagging) for information about tag
|
||||
hierarchies and tag colors.
|
||||
|
||||
\* *It can take a few seconds to take effect in new search queries, and the page needs
|
||||
to be reloaded for the tag tab to update*
|
||||
|
||||
|
||||
### Automatic tagging
|
||||
|
||||
See [scripting](scripting.md) documentation.
|
||||
|
Before Width: | Height: | Size: 26 KiB After Width: | Height: | Size: 26 KiB |
BIN
docs/manual_tag.png
Normal file
BIN
docs/manual_tag.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 3.9 KiB |
@@ -39,7 +39,7 @@ it adds the `genre.<genre>` tag.
|
||||
ArrayList tags = ctx._source.tag = new ArrayList();
|
||||
|
||||
if (ctx._source?.genre != null) {
|
||||
tags.add("genre." + ctx._source.genre.toLowerCase())
|
||||
tags.add("genre." + ctx._source.genre.toLowerCase());
|
||||
}
|
||||
```
|
||||
|
||||
@@ -54,6 +54,11 @@ script.painless.regex.enabled: true
|
||||
```
|
||||
Or, if you're using docker add `-e "script.painless.regex.enabled=true"`
|
||||
|
||||
**Tag color**
|
||||
|
||||
You can specify the color for an individual tag by appending an
|
||||
hexadecimal color code (`#RRGGBBAA`) to the tag name.
|
||||
|
||||
### Examples
|
||||
|
||||
If `(20XX)` is in the file name, add the `year.<year>` tag:
|
||||
@@ -62,7 +67,7 @@ ArrayList tags = ctx._source.tag = new ArrayList();
|
||||
|
||||
Matcher m = /[\(\.+](20[0-9]{2})[\)\.+]/.matcher(ctx._source.name);
|
||||
if (m.find()) {
|
||||
tags.add("year." + m.group(1))
|
||||
tags.add("year." + m.group(1));
|
||||
}
|
||||
```
|
||||
|
||||
@@ -106,12 +111,32 @@ if (ctx._source.path != "") {
|
||||
}
|
||||
```
|
||||
|
||||
Set the name of the last folder (`/path/to/<studio>/file.mp4`) to `studio.<studio>` tag
|
||||
Parse `EXIF:F Number` tag
|
||||
```Java
|
||||
ArrayList tags = ctx._source.tag = new ArrayList();
|
||||
|
||||
if (ctx._source.path != "") {
|
||||
String[] names = ctx._source.path.splitOnToken('/');
|
||||
tags.add("studio." + names[names.length-1]);
|
||||
if (ctx._source?.exif_fnumber != null) {
|
||||
String[] values = ctx._source.exif_fnumber.splitOnToken(' ');
|
||||
String aperture = String.valueOf(Float.parseFloat(values[0]) / Float.parseFloat(values[1]));
|
||||
if (aperture == "NaN") {
|
||||
aperture = "0,0";
|
||||
}
|
||||
tags.add("Aperture.f/" + aperture.replace(".", ","));
|
||||
}
|
||||
```
|
||||
|
||||
Display year and months from `EXIF:DateTime` tag
|
||||
```Java
|
||||
if (ctx._source?.exif_datetime != null) {
|
||||
SimpleDateFormat parser = new SimpleDateFormat("yyyy:MM:dd HH:mm:ss");
|
||||
Date date = parser.parse(ctx._source.exif_datetime);
|
||||
|
||||
SimpleDateFormat yp = new SimpleDateFormat("yyyy");
|
||||
SimpleDateFormat mp = new SimpleDateFormat("MMMMMMMMM");
|
||||
|
||||
String year = yp.format(date);
|
||||
String month = mp.format(date);
|
||||
|
||||
tags.add("Month." + month);
|
||||
tags.add("Year." + year);
|
||||
}
|
||||
|
||||
```
|
||||
BIN
docs/sist2.png
Normal file
BIN
docs/sist2.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 889 KiB |
BIN
docs/stats.png
Normal file
BIN
docs/stats.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 167 KiB |
@@ -1,53 +0,0 @@
|
||||
/*
|
||||
Copyright (c) 2010, Florian Reuter
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Florian Reuter nor the names of its contributors
|
||||
may be used to endorse or promote products derived from this
|
||||
software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||
COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
||||
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
||||
OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
/**@file config/mce/config.h
|
||||
*/
|
||||
#ifndef MCE_CONFIG_H
|
||||
#define MCE_CONFIG_H
|
||||
|
||||
#include <libxml/xmlstring.h>
|
||||
#include <stdio.h>
|
||||
#include <plib/plib.h>
|
||||
#include <assert.h>
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define MCE_NAMESPACE_SUBSUMPTION_ENABLED 0
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* MCE_CONFIG_H */
|
||||
@@ -1,189 +0,0 @@
|
||||
/*
|
||||
Copyright (c) 2010, Florian Reuter
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Florian Reuter nor the names of its contributors
|
||||
may be used to endorse or promote products derived from this
|
||||
software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||
COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
||||
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
||||
OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
/** @file mce/helper.h
|
||||
Helper functions needed by mce/textreader.h and mce/textwriter.h to implement MCE:
|
||||
- mceQNameLevelAdd(), mceQNameLevelLookup() and mceQNameLevelCleanup() maintain a set of mceQNameLevel_t tuples.
|
||||
- mceQNameLevelPush() and mceQNameLevelPopIfMatch() maintain a stack of mceQNameLevel_t tuples.
|
||||
- mceCtxInit(), mceCtxCleanup() and mceCtxUnderstandsNamespace() manage a context which holds all information needed to do MCE proprocessing.
|
||||
*/
|
||||
#include <mce/config.h>
|
||||
|
||||
#ifndef MCE_HELPER_H
|
||||
#define MCE_HELPER_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/**
|
||||
Tiple (ns, ln, level).
|
||||
*/
|
||||
typedef struct MCE_QNAME_LEVEL {
|
||||
xmlChar *ns;
|
||||
xmlChar *ln;
|
||||
puint32_t level;
|
||||
puint32_t flag; // used by mceTextWriter
|
||||
} mceQNameLevel_t;
|
||||
|
||||
/**
|
||||
*/
|
||||
typedef enum MCE_SKIP_STATE_ENUM {
|
||||
MCE_SKIP_STATE_IGNORE,
|
||||
MCE_SKIP_STATE_ALTERNATE_CONTENT,
|
||||
MCE_SKIP_STATE_CHOICE_MATCHED
|
||||
} mceSkipState_t;
|
||||
|
||||
/**
|
||||
Represents an intervall of levels which are "skipped" i.e. ignored.
|
||||
*/
|
||||
typedef struct MCE_SKIP_ITEM {
|
||||
puint32_t level_start;
|
||||
puint32_t level_end;
|
||||
mceSkipState_t state;
|
||||
} mceSkipItem_t;
|
||||
|
||||
/**
|
||||
Either represents a set of (ns, ln, level) triples.
|
||||
*/
|
||||
typedef struct MCE_QNAME_LEVEL_SET {
|
||||
mceQNameLevel_t *list_array;
|
||||
puint32_t list_items;
|
||||
puint32_t max_level;
|
||||
} mceQNameLevelSet_t;
|
||||
|
||||
/**
|
||||
The skip stack.
|
||||
*/
|
||||
typedef struct MCE_SKIP_STACK {
|
||||
mceSkipItem_t *stack_array;
|
||||
puint32_t stack_items;
|
||||
} mceSkipStack_t;
|
||||
|
||||
|
||||
typedef enum MCE_ERROR_ENUM {
|
||||
MCE_ERROR_NONE,
|
||||
MCE_ERROR_XML,
|
||||
MCE_ERROR_MUST_UNDERSTAND,
|
||||
MCE_ERROR_VALIDATION,
|
||||
MCE_ERROR_MEMORY
|
||||
} mceError_t;
|
||||
|
||||
/**
|
||||
Holds all information to do MCE preprocessing.
|
||||
*/
|
||||
typedef struct MCE_CONTEXT {
|
||||
mceQNameLevelSet_t ignorable_set;
|
||||
mceQNameLevelSet_t understands_set;
|
||||
mceQNameLevelSet_t processcontent_set;
|
||||
mceQNameLevelSet_t suspended_set;
|
||||
#if (MCE_NAMESPACE_SUBSUMPTION_ENABLED)
|
||||
mceQNameLevelSet_t subsume_namespace_set;
|
||||
mceQNameLevelSet_t subsume_exclude_set;
|
||||
mceQNameLevelSet_t subsume_prefix_set;
|
||||
#endif
|
||||
mceSkipStack_t skip_stack;
|
||||
mceError_t error;
|
||||
pbool_t mce_disabled;
|
||||
puint32_t suspended_level;
|
||||
} mceCtx_t;
|
||||
|
||||
/**
|
||||
Add a new tiple (ns, ln, level) to the triple set \c qname_level_set.
|
||||
The \c ns_sub string is optional and will not be touched.
|
||||
*/
|
||||
pbool_t mceQNameLevelAdd(mceQNameLevelSet_t *qname_level_set, const xmlChar *ns, const xmlChar *ln, puint32_t level);
|
||||
|
||||
/**
|
||||
Lookup a tiple (ns, ln, level) via \c ns and \c ln. If \c ignore_ln is PTRUE then the first tiple matching \c ns will be returned.
|
||||
*/
|
||||
mceQNameLevel_t* mceQNameLevelLookup(mceQNameLevelSet_t *qname_level_set, const xmlChar *ns, const xmlChar *ln, pbool_t ignore_ln);
|
||||
|
||||
/**
|
||||
Remove all triples (ns, ln, level) where the level greater or equal to \c level.
|
||||
*/
|
||||
pbool_t mceQNameLevelCleanup(mceQNameLevelSet_t *qname_level_set, puint32_t level);
|
||||
|
||||
/**
|
||||
Push a new skip intervall (level_start, level_end, state) on the stack \c skip_stack.
|
||||
*/
|
||||
pbool_t mceSkipStackPush(mceSkipStack_t *skip_stack, puint32_t level_start, puint32_t level_end, mceSkipState_t state);
|
||||
|
||||
/**
|
||||
Pop the intervall (ns, ln, level) from the stack \c qname_level_array.
|
||||
*/
|
||||
void mceSkipStackPop(mceSkipStack_t *skip_stack);
|
||||
|
||||
/**
|
||||
Returns top item or NULL.
|
||||
*/
|
||||
mceSkipItem_t *mceSkipStackTop(mceSkipStack_t *skip_stack);
|
||||
|
||||
/**
|
||||
Returns TRUE, if the \c level is in the top skip intervall.
|
||||
*/
|
||||
pbool_t mceSkipStackSkip(mceSkipStack_t *skip_stack, puint32_t level);
|
||||
|
||||
/**
|
||||
Initialize the mceCtx_t \c ctx.
|
||||
*/
|
||||
pbool_t mceCtxInit(mceCtx_t *ctx);
|
||||
|
||||
/**
|
||||
Cleanup, i.e. release all resourced from the mceCtx_t \c ctx.
|
||||
*/
|
||||
pbool_t mceCtxCleanup(mceCtx_t *ctx);
|
||||
|
||||
/**
|
||||
Register the namespace \ns in \c ctx.
|
||||
*/
|
||||
pbool_t mceCtxUnderstandsNamespace(mceCtx_t *ctx, const xmlChar *ns);
|
||||
|
||||
/**
|
||||
Register the namespace \ns in \c ctx.
|
||||
*/
|
||||
pbool_t mceCtxSuspendProcessing(mceCtx_t *ctx, const xmlChar *ns, const xmlChar *ln);
|
||||
|
||||
|
||||
|
||||
#if (MCE_NAMESPACE_SUBSUMPTION_ENABLED)
|
||||
/**
|
||||
Subsume namespace \c ns_new with \c ns_old.
|
||||
*/
|
||||
pbool_t mceCtxSubsumeNamespace(mceCtx_t *ctx, const xmlChar *prefix_new, const xmlChar *ns_new, const xmlChar *ns_old);
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* MCE_HELPER_H */
|
||||
@@ -1,464 +0,0 @@
|
||||
/*
|
||||
Copyright (c) 2010, Florian Reuter
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Florian Reuter nor the names of its contributors
|
||||
may be used to endorse or promote products derived from this
|
||||
software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||
COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
||||
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
||||
OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
/** @file mce/textreader.h
|
||||
|
||||
*/
|
||||
#ifndef MCE_TEXTREADER_H
|
||||
#define MCE_TEXTREADER_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/**
|
||||
A handle to an MCE-aware libxml2 xmlTextReader.
|
||||
*/
|
||||
typedef struct MCE_TEXTREADER mceTextReader_t;
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
|
||||
#include <mce/config.h>
|
||||
#include <opc/opc.h>
|
||||
#include <mce/helper.h>
|
||||
#include <libxml/xmlwriter.h>
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct MCE_TEXTREADER {
|
||||
xmlTextReaderPtr reader;
|
||||
mceCtx_t mceCtx;
|
||||
};
|
||||
|
||||
/**
|
||||
Wrapper around an libxml2 xmlTextReaderRead function.
|
||||
\see http://xmlsoft.org/html/libxml-xmlreader.html#xmlTextReaderRead
|
||||
*/
|
||||
int mceTextReaderRead(mceTextReader_t *mceTextReader);
|
||||
|
||||
/**
|
||||
Wrapper around a libxml2 xmlTextReaderNext function.
|
||||
\see http://xmlsoft.org/html/libxml-xmlreader.html#xmlTextReaderNext
|
||||
*/
|
||||
int mceTextReaderNext(mceTextReader_t *mceTextReader);
|
||||
|
||||
/**
|
||||
Creates an mceTextReader from an XmlTextReader.
|
||||
\code
|
||||
mceTextReader reader;
|
||||
mceTextReaderInit(&reader, xmlNewTextReaderFilename("sample.xml"));
|
||||
// reader is ready to use.
|
||||
mceTextReaderCleanup(&reader);
|
||||
\endcode
|
||||
\see http://xmlsoft.org/html/libxml-xmlreader.html#xmlNewTextReaderFilename
|
||||
*/
|
||||
int mceTextReaderInit(mceTextReader_t *mceTextReader, xmlTextReaderPtr reader);
|
||||
|
||||
/**
|
||||
Cleanup MCE reader, i.e. free all resources. Also calls xmlTextReaderClose and xmlFreeTextReader.
|
||||
\see http://xmlsoft.org/html/libxml-xmlreader.html#xmlTextReaderClose
|
||||
\see http://xmlsoft.org/html/libxml-xmlreader.html#xmlFreeTextReader
|
||||
*/
|
||||
int mceTextReaderCleanup(mceTextReader_t *mceTextReader);
|
||||
|
||||
/**
|
||||
Reads all events \c mceTextReader and pipes them to \writer.
|
||||
\code
|
||||
mceTextReader reader;
|
||||
mceTextReaderInit(&reader, xmlNewTextReaderFilename("sample.xml"));
|
||||
mceTextReaderUnderstandsNamespace(&reader, _X("http://myextension"));
|
||||
xmlTextWriterPtr writer=xmlNewTextWriterFilename("out.xml", 0);
|
||||
mceTextReaderDump(&reader, writer, P_FALSE);
|
||||
xmlFreeTextWriter(writer);
|
||||
mceTextReaderCleanup(&reader);
|
||||
\endcode
|
||||
*/
|
||||
int mceTextReaderDump(mceTextReader_t *mceTextReader, xmlTextWriter *writer, pbool_t fragment);
|
||||
|
||||
/**
|
||||
Registers an MCE namespace.
|
||||
\see mceTextReaderDump()
|
||||
*/
|
||||
int mceTextReaderUnderstandsNamespace(mceTextReader_t *mceTextReader, const xmlChar *ns);
|
||||
|
||||
/**
|
||||
Disable MCE processing.
|
||||
\return Returns old value.
|
||||
*/
|
||||
pbool_t mceTextReaderDisableMCE(mceTextReader_t *mceTextReader, pbool_t flag);
|
||||
|
||||
|
||||
/**
|
||||
Signal an error to the MCE processor.
|
||||
*/
|
||||
void mceRaiseError(xmlTextReader *reader, mceCtx_t *ctx, mceError_t error, const xmlChar *str, ...);
|
||||
|
||||
/**
|
||||
Internal function which does the MCE postprocessing. E.g. mceTextReaderRead() is implemented as
|
||||
\code
|
||||
mceTextReaderPostprocess(mceTextReader->reader, &mceTextReader->mceCtx, xmlTextReaderRead(mceTextReader->reader))
|
||||
\endcode
|
||||
This function is exposed to make existing libxm2 xmlTextReader MCE aware.
|
||||
*/
|
||||
int mceTextReaderPostprocess(xmlTextReader *reader, mceCtx_t *ctx, int ret);
|
||||
|
||||
/**
|
||||
Get the error code.
|
||||
*/
|
||||
mceError_t mceTextReaderGetError(mceTextReader_t *mceTextReader);
|
||||
|
||||
/**
|
||||
Helper macro to declare a start/end document block in a declarative way:
|
||||
\code
|
||||
mce_start_document(reader) {
|
||||
} mce_end_document(reader);
|
||||
\endcode
|
||||
\hideinitializer
|
||||
*/
|
||||
#define mce_start_document(_reader_) \
|
||||
if (NULL!=(_reader_)) { \
|
||||
mceTextReaderRead(_reader_); \
|
||||
if (0)
|
||||
|
||||
/**
|
||||
\see mce_start_document.
|
||||
\hideinitializer
|
||||
*/
|
||||
#define mce_end_document(_reader_) \
|
||||
} /* if (NULL!=reader) */ \
|
||||
|
||||
|
||||
/**
|
||||
Container for mce_start_element and mce_start_attribute declarations.
|
||||
\see mce_match_element
|
||||
\see mce_match_attribute
|
||||
\hideinitializer
|
||||
*/
|
||||
#define mce_start_choice(_reader_) \
|
||||
if (0)
|
||||
|
||||
/**
|
||||
\see mce_start_choice
|
||||
\hideinitializer
|
||||
*/
|
||||
#define mce_end_choice(_reader_)
|
||||
|
||||
|
||||
/**
|
||||
Skips the attributes.
|
||||
\see mce_match_element.
|
||||
\hideinitializer
|
||||
*/
|
||||
#define mce_skip_attributes(_reader_) \
|
||||
mce_start_attributes(_reader_) { \
|
||||
} mce_end_attributes(_reader_);
|
||||
|
||||
|
||||
/**
|
||||
Skips the attributes.
|
||||
\see mce_match_attribute.
|
||||
\hideinitializer
|
||||
*/
|
||||
#define mce_skip_children(_reader_) \
|
||||
mce_start_children(_reader_) { \
|
||||
} mce_end_children(_reader_);
|
||||
|
||||
/**
|
||||
\see mce_start_element.
|
||||
\hideinitializer
|
||||
*/
|
||||
#define mce_start_children(_reader_) \
|
||||
if (!xmlTextReaderIsEmptyElement((_reader_)->reader)) { \
|
||||
mceTextReaderRead(_reader_); do { \
|
||||
if (0)
|
||||
|
||||
/**
|
||||
\see mce_start_element.
|
||||
\hideinitializer
|
||||
*/
|
||||
#define mce_end_children(_reader_) \
|
||||
else { \
|
||||
if (XML_READER_TYPE_END_ELEMENT!=xmlTextReaderNodeType((_reader_)->reader)) { \
|
||||
mceTextReaderNext(_reader_); /*skip unhandled element */ \
|
||||
} \
|
||||
} \
|
||||
} while(XML_READER_TYPE_END_ELEMENT!=xmlTextReaderNodeType((_reader_)->reader) && \
|
||||
XML_READER_TYPE_NONE!=xmlTextReaderNodeType((_reader_)->reader)); \
|
||||
} /* if (!xmlTextReaderIsEmptyElement(reader->reader)) */
|
||||
|
||||
|
||||
/**
|
||||
Helper macro to match an element. Usefull for calling code in a seperate function:
|
||||
|
||||
\code
|
||||
void handleElement(reader) {
|
||||
mce_start_choice(reader) {
|
||||
mce_start_element(reader, _X("ns"), _X("element")) {
|
||||
|
||||
} mce_end_element(reader)
|
||||
} mce_end_choice(reader);
|
||||
}
|
||||
|
||||
void parse(reader) {
|
||||
mce_start_document(reader) {
|
||||
mce_start_element(reader, _X("ns"), _X("ln")) {
|
||||
mce_skip_attributes(reader);
|
||||
mce_start_children(reader) {
|
||||
mce_match_element(reader, _X("ns"), _X("element")) {
|
||||
handleElement(reader);
|
||||
}
|
||||
} mce_end_children(reader);
|
||||
} mce_end_element();
|
||||
} mce_end_document(reader);
|
||||
}
|
||||
\endcode
|
||||
\hideinitializer
|
||||
*/
|
||||
#define mce_match_element(_reader_, ns, ln) \
|
||||
} else if (XML_READER_TYPE_ELEMENT==xmlTextReaderNodeType((_reader_)->reader) \
|
||||
&& (NULL==ns || 0==xmlStrcmp(ns, xmlTextReaderConstNamespaceUri((_reader_)->reader))) \
|
||||
&& (NULL==ln || 0==xmlStrcmp(ln, xmlTextReaderConstLocalName((_reader_)->reader)))) {
|
||||
|
||||
|
||||
/**
|
||||
Helper macro to declare a element block in a declarative way:
|
||||
\code
|
||||
mce_start_element(reader) {
|
||||
mce_start_attributes(reader) {
|
||||
mce_start_attribute(reader, _X("ns"), _X("lnA")) {
|
||||
// code for handling lnA.
|
||||
} mce_end_attribute(reader);
|
||||
mce_start_attribute(reader, _X("ns"), _X("lnB")) {
|
||||
// code for handling lnB.
|
||||
} mce_end_attribute(reader);
|
||||
} mce_end_attributes(reader);
|
||||
mce_start_children(reader) {
|
||||
mce_start_element(reader, _X("ns"), _X("lnA")) {
|
||||
// code for handling lnA.
|
||||
} mce_end_element(reader);
|
||||
mce_start_element(reader, _X("ns"), _X("lnB")) {
|
||||
// code for handling lnB.
|
||||
} mce_end_element(reader);
|
||||
mce_start_text(reader) {
|
||||
// code for handling text.
|
||||
} mce_end_text(reader);
|
||||
} mce_end_children(reader);
|
||||
} mce_end_element(reader);
|
||||
\endcode
|
||||
\hideinitializer
|
||||
*/
|
||||
#define mce_start_element(_reader_, ns, ln) \
|
||||
mce_match_element(_reader_, ns, ln)
|
||||
|
||||
/**
|
||||
\see mce_start_element.
|
||||
\hideinitializer
|
||||
*/
|
||||
#define mce_end_element(_reader_) \
|
||||
mceTextReaderNext(_reader_)
|
||||
|
||||
/**
|
||||
Matches #TEXT without consuming it.
|
||||
\hideinitializer
|
||||
*/
|
||||
#define mce_match_text(_reader_) \
|
||||
} else if (XML_READER_TYPE_TEXT==xmlTextReaderNodeType((_reader_)->reader) \
|
||||
|| XML_READER_TYPE_SIGNIFICANT_WHITESPACE==xmlTextReaderNodeType((_reader_)->reader)) {
|
||||
|
||||
|
||||
/**
|
||||
\see mce_start_element.
|
||||
\hideinitializer
|
||||
*/
|
||||
#define mce_start_text(_reader_) \
|
||||
mce_match_text(_reader_)
|
||||
|
||||
/**
|
||||
\see mce_start_element.
|
||||
\hideinitializer
|
||||
*/
|
||||
#define mce_end_text(_reader_) \
|
||||
mceTextReaderNext(_reader_)
|
||||
|
||||
/**
|
||||
\see mce_start_element.
|
||||
\hideinitializer
|
||||
*/
|
||||
#define mce_start_attributes(_reader_) \
|
||||
if (1==xmlTextReaderMoveToFirstAttribute((_reader_)->reader)) { \
|
||||
do { \
|
||||
if (0)
|
||||
|
||||
/**
|
||||
\see mce_start_element.
|
||||
\hideinitializer
|
||||
*/
|
||||
#define mce_end_attributes(_reader_) \
|
||||
else { /* skipped attribute */ } \
|
||||
} while(1==xmlTextReaderMoveToNextAttribute((_reader_)->reader)); \
|
||||
xmlTextReaderMoveToElement((_reader_)->reader); }
|
||||
|
||||
/**
|
||||
Helper macro to match an attribute. Usefull for calling code in a seperate function:
|
||||
|
||||
\code
|
||||
void handleA(reader) {
|
||||
mce_start_choice(reader) {
|
||||
mce_start_attribute(reader, _X("ns"), _X("attr")) {
|
||||
|
||||
} mce_end_attribute(reader);
|
||||
} mce_end_choice(reader);
|
||||
}
|
||||
|
||||
void parse(reader) {
|
||||
mce_start_document(reader) {
|
||||
mce_start_element(reader, _X("ns"), _X("ln")) {
|
||||
mce_start_attributes(reader) {
|
||||
mce_match_attribute(reader, _X("ns"), _X("attr")) {
|
||||
handleA(reader);
|
||||
}
|
||||
} mce_end_attributes(reader);
|
||||
mce_skip_children(reader);
|
||||
} mce_end_element();
|
||||
} mce_end_document(reader);
|
||||
}
|
||||
\endcode
|
||||
\hideinitializer
|
||||
*/
|
||||
#define mce_match_attribute(_reader_, ns, ln) \
|
||||
} else if ((NULL==ns || 0==xmlStrcmp(ns, xmlTextReaderConstNamespaceUri((_reader_)->reader))) \
|
||||
&& (NULL==ln || 0==xmlStrcmp(ln, xmlTextReaderConstLocalName((_reader_)->reader)))) {
|
||||
|
||||
/**
|
||||
\see mce_start_element.
|
||||
\hideinitializer
|
||||
*/
|
||||
#define mce_start_attribute(_reader_, ns, ln) \
|
||||
mce_match_attribute(_reader_, ns, ln)
|
||||
|
||||
/**
|
||||
\see mce_start_element.
|
||||
\hideinitializer
|
||||
*/
|
||||
#define mce_end_attribute(_reader_)
|
||||
|
||||
|
||||
/**
|
||||
Error handling for MCE parsers.
|
||||
\code
|
||||
mce_start_element(&reader, NULL, _X("Default")) {
|
||||
const xmlChar *ext=NULL;
|
||||
const xmlChar *type=NULL;
|
||||
mce_start_attributes(&reader) {
|
||||
mce_start_attribute(&reader, NULL, _X("Extension")) {
|
||||
ext=xmlTextReaderConstValue(reader.reader);
|
||||
} mce_end_attribute(&reader);
|
||||
mce_start_attribute(&reader, NULL, _X("ContentType")) {
|
||||
type=xmlTextReaderConstValue(reader.reader);
|
||||
} mce_end_attribute(&reader);
|
||||
} mce_end_attributes(&reader);
|
||||
mce_error_guard_start(&reader) {
|
||||
mce_error(&reader, NULL==ext || ext[0]==0, MCE_ERROR_VALIDATION, "Missing @Extension attribute!");
|
||||
mce_error(&reader, NULL==type || type[0]==0, MCE_ERROR_VALIDATION, "Missing @ContentType attribute!");
|
||||
opcContainerType *ct=insertType(c, type, OPC_TRUE);
|
||||
mce_error(&reader, NULL==ct, MCE_ERROR_MEMORY, NULL);
|
||||
opcContainerExtension *ce=opcContainerInsertExtension(c, ext, OPC_TRUE);
|
||||
mce_error(&reader, NULL==ce, MCE_ERROR_MEMORY, NULL);
|
||||
mce_errorf(&reader, NULL!=ce->type && 0!=xmlStrcmp(ce->type, type), MCE_ERROR_VALIDATION, "Extension \"%s\" is mapped to type \"%s\" as well as \"%s\"", ext, type, ce->type);
|
||||
ce->type=ct->type;
|
||||
} mce_error_guard_end(&reader);
|
||||
mce_skip_children(&reader);
|
||||
} mce_end_element(&reader);
|
||||
\endcode
|
||||
\hideinitializer
|
||||
*/
|
||||
#define mce_error_guard_start(_reader_) if (MCE_ERROR_NONE==(_reader_)->mceCtx.error) do {
|
||||
|
||||
/**
|
||||
\see mce_error_guard_start
|
||||
\hideinitializer
|
||||
*/
|
||||
#define mce_error_guard_end(_reader_) } while(0)
|
||||
|
||||
/**
|
||||
Signal an error if guard if false.
|
||||
\hideinitializer
|
||||
*/
|
||||
#define mce_error(_reader_, guard, err, msg) if (guard) { (_reader_)->mceCtx.error=(err); fprintf(stderr, (NULL!=msg?msg:#err)); continue; }
|
||||
|
||||
/**
|
||||
Signal an error if guard if false.
|
||||
\hideinitializer
|
||||
*/
|
||||
#if defined(__GNUC__)
|
||||
#define mce_errorf(_reader_, guard, err, msg, ...) if (guard) { mceRaiseError((_reader_)->reader, &(_reader_)->mceCtx, err, _X((NULL!=msg?msg:#err)), ##__VA_ARGS__ ); continue; }
|
||||
#else
|
||||
#define mce_errorf(_reader_, guard, err, msg, ...) if (guard) { mceRaiseError((_reader_)->reader, &(_reader_)->mceCtx, err, _X((NULL!=msg?msg:#err)), __VA_ARGS__ ); continue; }
|
||||
#endif
|
||||
|
||||
/**
|
||||
Only issues the error when in "strict mode".
|
||||
\hideinitializer
|
||||
*/
|
||||
#define mce_error_strict mce_error
|
||||
|
||||
/**
|
||||
\see mce_error_strict
|
||||
\hideinitializer
|
||||
*/
|
||||
#define mce_error_strictf mce_errorf
|
||||
|
||||
|
||||
/**
|
||||
Marker for a MCE defintion.
|
||||
\hideinitializer
|
||||
*/
|
||||
#define mce_def
|
||||
|
||||
/**
|
||||
Marker for a MCE reference.
|
||||
\hideinitializer
|
||||
*/
|
||||
#define mce_ref(r) (r)
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* MCE_TEXTREADER_H */
|
||||
@@ -1,176 +0,0 @@
|
||||
/*
|
||||
Copyright (c) 2010, Florian Reuter
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Florian Reuter nor the names of its contributors
|
||||
may be used to endorse or promote products derived from this
|
||||
software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||
COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
||||
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
||||
OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
/** @file mce/textwriter.h
|
||||
|
||||
*/
|
||||
#include <mce/config.h>
|
||||
#include <libxml/xmlwriter.h>
|
||||
#include <mce/helper.h>
|
||||
|
||||
#ifndef MCE_TEXTWRITER_H
|
||||
#define MCE_TEXTWRITER_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/**
|
||||
Default flags for an MCE namespace declaration.
|
||||
*/
|
||||
#define MCE_DEFAULT 0x0
|
||||
|
||||
/**
|
||||
Flags MCE namespace declaration "ignorable".
|
||||
*/
|
||||
#define MCE_IGNORABLE 0x1
|
||||
|
||||
/**
|
||||
Flags MCE namespace declaration "must understand".
|
||||
*/
|
||||
#define MCE_MUSTUNDERSTAND 0x2
|
||||
|
||||
/**
|
||||
The MCE text writer context.
|
||||
*/
|
||||
typedef struct MCE_TEXTWRITER_STRUCT mceTextWriter;
|
||||
|
||||
/**
|
||||
Create a new MCE text writer.
|
||||
\see http://xmlsoft.org/html/libxml-xmlIO.html#xmlOutputBufferCreateIO
|
||||
\see http://xmlsoft.org/html/libxml-xmlwriter.html#xmlNewTextWriter
|
||||
*/
|
||||
mceTextWriter *mceTextWriterCreateIO(xmlOutputWriteCallback iowrite, xmlOutputCloseCallback ioclose, void *ioctx, xmlCharEncodingHandlerPtr encoder);
|
||||
|
||||
/**
|
||||
Helper which create a new MCE text writer for a FILE handle.
|
||||
*/
|
||||
mceTextWriter *mceNewTextWriterFile(FILE *file);
|
||||
|
||||
/**
|
||||
Free all resources for \w.
|
||||
*/
|
||||
int mceTextWriterFree(mceTextWriter *w);
|
||||
|
||||
/**
|
||||
\see http://xmlsoft.org/html/libxml-xmlwriter.html#xmlTextWriterStartDocument
|
||||
*/
|
||||
int mceTextWriterStartDocument(mceTextWriter *w);
|
||||
|
||||
/**
|
||||
\see http://xmlsoft.org/html/libxml-xmlwriter.html#xmlTextWriterEndDocument
|
||||
*/
|
||||
int mceTextWriterEndDocument(mceTextWriter *w);
|
||||
|
||||
/**
|
||||
Start a new XML element. If ns==NULL then there is no namespace and ""==ns means the default namespace.
|
||||
\see http://xmlsoft.org/html/libxml-xmlwriter.html#xmlTextWriterStartElement
|
||||
\see http://xmlsoft.org/html/libxml-xmlwriter.html#xmlTextWriterStartElementNS
|
||||
*/
|
||||
int mceTextWriterStartElement(mceTextWriter *w, const xmlChar *ns, const xmlChar *ln);
|
||||
|
||||
/**
|
||||
\see http://xmlsoft.org/html/libxml-xmlwriter.html#xmlTextWriterEndElement
|
||||
*/
|
||||
int mceTextWriterEndElement(mceTextWriter *w, const xmlChar *ns, const xmlChar *ln);
|
||||
|
||||
/**
|
||||
\see http://xmlsoft.org/html/libxml-xmlwriter.html#xmlTextWriterWriteString
|
||||
*/
|
||||
int mceTextWriterWriteString(mceTextWriter *w, const xmlChar *content);
|
||||
|
||||
/**
|
||||
Register a namespace. Must be called before mceTextWriterStartElement.
|
||||
\see MCE_DEFAULT
|
||||
\see MCE_IGNORABLE
|
||||
\see MCE_MUSTUNDERSTAND
|
||||
*/
|
||||
const xmlChar *mceTextWriterRegisterNamespace(mceTextWriter *w, const xmlChar *ns, const xmlChar *prefix, int flags);
|
||||
|
||||
/**
|
||||
Register qname (ns, ln) as a "process content" element wrt. MCE. Must be called before mceTextWriterStartElement.
|
||||
*/
|
||||
int mceTextWriterProcessContent(mceTextWriter *w, const xmlChar *ns, const xmlChar *ln);
|
||||
|
||||
/**
|
||||
Writes a formatted attribute.
|
||||
\see http://xmlsoft.org/html/libxml-xmlwriter.html#xmlTextWriterWriteFormatAttribute
|
||||
*/
|
||||
int mceTextWriterAttributeF(mceTextWriter *w, const xmlChar *ns, const xmlChar *ln, const char *value, ...);
|
||||
|
||||
/**
|
||||
Starts an MCE alternate content section.
|
||||
*/
|
||||
int mceTextWriterStartAlternateContent(mceTextWriter *w);
|
||||
|
||||
/**
|
||||
Ends an MCE alternate content section.
|
||||
*/
|
||||
int mceTextWriterEndAlternateContent(mceTextWriter *w);
|
||||
|
||||
/**
|
||||
Start an MCE choice.
|
||||
*/
|
||||
int mceTextWriterStartChoice(mceTextWriter *w, const xmlChar *ns);
|
||||
|
||||
/**
|
||||
Ends an MCE choice.
|
||||
*/
|
||||
int mceTextWriterEndChoice(mceTextWriter *w);
|
||||
|
||||
/**
|
||||
Start an MCE fallback.
|
||||
*/
|
||||
int mceTextWriterStartFallback(mceTextWriter *w);
|
||||
|
||||
/**
|
||||
Ends an MCE fallback.
|
||||
*/
|
||||
int mceTextWriterEndFallback(mceTextWriter *w);
|
||||
|
||||
|
||||
/**
|
||||
Returns the underlying xmlTextWriter.
|
||||
*/
|
||||
xmlTextWriterPtr mceTextWriterIntern(mceTextWriter *w);
|
||||
|
||||
/**
|
||||
Helper which create a new xmlTextWriterPtr for a FILE handle.
|
||||
*/
|
||||
xmlTextWriterPtr xmlNewTextWriterFile(FILE *file);
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* MCE_TEXTWRITER_H */
|
||||
@@ -1,189 +0,0 @@
|
||||
/*
|
||||
Copyright (c) 2010, Florian Reuter
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Florian Reuter nor the names of its contributors
|
||||
may be used to endorse or promote products derived from this
|
||||
software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||
COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
||||
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
||||
OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
/**@file config/opc/config.h
|
||||
*/
|
||||
#ifndef OPC_CONFIG_H
|
||||
#define OPC_CONFIG_H
|
||||
|
||||
#include <libxml/xmlstring.h>
|
||||
#include <plib/plib.h>
|
||||
#include <assert.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
/**
|
||||
Assert expression e is true. Will be removed entirely in release mode.
|
||||
\hideinitializer
|
||||
*/
|
||||
#define OPC_ASSERT(e) assert(e)
|
||||
|
||||
/**
|
||||
Assert expression e is true. Expression will be executed in release mode too.
|
||||
\hideinitializer
|
||||
*/
|
||||
#ifdef NDEBUG
|
||||
#define OPC_ENSURE(e) (void)(e)
|
||||
#else
|
||||
#define OPC_ENSURE(e) assert(e)
|
||||
#endif
|
||||
|
||||
|
||||
/**
|
||||
Constant for boolean true.
|
||||
\hideinitializer
|
||||
*/
|
||||
#define OPC_TRUE (0==0)
|
||||
|
||||
/**
|
||||
Constant for boolean false.
|
||||
\hideinitializer
|
||||
*/
|
||||
#define OPC_FALSE (0==1)
|
||||
|
||||
/**
|
||||
Boolean type.
|
||||
\hideinitializer
|
||||
*/
|
||||
typedef pbool_t opc_bool_t;
|
||||
|
||||
/**
|
||||
Type which represents an offset in e.g. a file.
|
||||
\hideinitializer
|
||||
*/
|
||||
typedef pofs_t opc_ofs_t;
|
||||
|
||||
/**
|
||||
8-bit unsigned integer.
|
||||
\hideinitializer
|
||||
*/
|
||||
typedef puint8_t opc_uint8_t;
|
||||
|
||||
/**
|
||||
16-bit unsigned integer.
|
||||
\hideinitializer
|
||||
*/
|
||||
typedef puint16_t opc_uint16_t;
|
||||
|
||||
/**
|
||||
32-bit unsigned integer.
|
||||
\hideinitializer
|
||||
*/
|
||||
typedef puint32_t opc_uint32_t;
|
||||
|
||||
/**
|
||||
64-bit unsigned integer.
|
||||
\hideinitializer
|
||||
*/
|
||||
typedef puint64_t opc_uint64_t;
|
||||
|
||||
/**
|
||||
8-bit signed integer.
|
||||
\hideinitializer
|
||||
*/
|
||||
typedef pint8_t opc_int8_t;
|
||||
|
||||
/**
|
||||
16-bit signed integer.
|
||||
\hideinitializer
|
||||
*/
|
||||
typedef pint16_t opc_int16_t;
|
||||
|
||||
/**
|
||||
32-bit signed integer.
|
||||
\hideinitializer
|
||||
*/
|
||||
typedef pint32_t opc_int32_t;
|
||||
|
||||
/**
|
||||
64-bit signed integer.
|
||||
\hideinitializer
|
||||
*/
|
||||
typedef pint64_t opc_int64_t;
|
||||
|
||||
/**
|
||||
Default size fo the deflate buffer used by zlib.
|
||||
*/
|
||||
#define OPC_DEFLATE_BUFFER_SIZE 4096
|
||||
|
||||
/**
|
||||
Max system path len.
|
||||
*/
|
||||
#define OPC_MAX_PATH 512
|
||||
|
||||
/**
|
||||
Error codes for the OPC module.
|
||||
*/
|
||||
typedef enum OPC_ERROR_ENUM {
|
||||
OPC_ERROR_NONE,
|
||||
OPC_ERROR_STREAM,
|
||||
OPC_ERROR_SEEK, // can't seek
|
||||
OPC_ERROR_UNSUPPORTED_DATA_DESCRIPTOR,
|
||||
OPC_ERROR_UNSUPPORTED_COMPRESSION,
|
||||
OPC_ERROR_DEFLATE,
|
||||
OPC_ERROR_HEADER,
|
||||
OPC_ERROR_MEMORY,
|
||||
OPC_ERROR_XML,
|
||||
OPC_ERROR_USER // user triggered an abort
|
||||
} opc_error_t;
|
||||
|
||||
/**
|
||||
Compression options for OPC streams.
|
||||
*/
|
||||
typedef enum OPC_COMPRESSIONOPTION_ENUM {
|
||||
OPC_COMPRESSIONOPTION_NONE,
|
||||
OPC_COMPRESSIONOPTION_NORMAL,
|
||||
OPC_COMPRESSIONOPTION_MAXIMUM,
|
||||
OPC_COMPRESSIONOPTION_FAST,
|
||||
OPC_COMPRESSIONOPTION_SUPERFAST
|
||||
} opcCompressionOption_t;
|
||||
|
||||
|
||||
/**
|
||||
Helper for debug logs.
|
||||
\hideinitializer
|
||||
*/
|
||||
#define opc_logf printf
|
||||
|
||||
/**
|
||||
Abstraction for memset(m, 0, s).
|
||||
\hideinitializer
|
||||
*/
|
||||
#define opc_bzero_mem(m,s) memset(m, 0, s)
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* OPC_CONFIG_H */
|
||||
@@ -1,300 +0,0 @@
|
||||
/*
|
||||
Copyright (c) 2010, Florian Reuter
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Florian Reuter nor the names of its contributors
|
||||
may be used to endorse or promote products derived from this
|
||||
software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||
COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
||||
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
||||
OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
/** @file opc/container.h
|
||||
|
||||
The container.h module has the fundamental methods for dealing with ZIP-based OPC container.
|
||||
|
||||
OPC container can be opened in READ-ONLY mode, WRITE-ONLY mode, READ/WRITE mode, TEMPLATE mode and TRANSITION mode.
|
||||
The most notable mode is the READ/WRITE mode, which gives you concurrent stream-based READ and WRITE access to a
|
||||
single ZIP-based OPC container. This is achieved without the use of temporary files by taking advantage of the
|
||||
OPC specific “interleave” mode. \see http://standards.iso.org/ittf/PubliclyAvailableStandards/c051459_ISOIEC_29500-2_2008(E).zip
|
||||
|
||||
The TEMPLATE mode allows very fast customized "cloning" of ZIP-based OPC container by using "RAW access" to the ZIP streams.
|
||||
The TRANSITION mode is a special version of the TEMPLATE mode, which allows transition-based READ/WRITE access to the
|
||||
ZIP-based OPC container using a temporary file.
|
||||
|
||||
*/
|
||||
#include <opc/config.h>
|
||||
#include <opc/file.h>
|
||||
|
||||
#ifndef OPC_CONTAINER_H
|
||||
#define OPC_CONTAINER_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
/**
|
||||
Handle to an OPC container created by \ref opcContainerOpen.
|
||||
\see opcContainerOpen.
|
||||
*/
|
||||
typedef struct OPC_CONTAINER_STRUCT opcContainer;
|
||||
|
||||
/**
|
||||
Modes for opcContainerOpen();
|
||||
\see opcContainerOpen
|
||||
*/
|
||||
typedef enum {
|
||||
/**
|
||||
Opens the OPC container denoted by \a fileName in READ-ONLY mode. The \a destName parameter must be \a NULL.
|
||||
\hideinitializer
|
||||
*/
|
||||
OPC_OPEN_READ_ONLY=0,
|
||||
/**
|
||||
Opens the OPC container denoted by \a fileName in WRITE-ONLY mode. The \a destName parameter must be \a NULL.
|
||||
\hideinitializer
|
||||
*/
|
||||
OPC_OPEN_WRITE_ONLY=1,
|
||||
/**
|
||||
Opens the OPC container denoted by \a fileName in READ/WRITE mode. The \a destName parameter must be \a NULL.
|
||||
\hideinitializer
|
||||
*/
|
||||
OPC_OPEN_READ_WRITE=2,
|
||||
/**
|
||||
This mode will open the container denoted by \a fileName in READ-ONLY mode and the container denoted by
|
||||
\a destName in write-only mode. Any modifications will be written to the container denoted by \a destName
|
||||
and the unmodified streams from \a fileName will be written to \a destName on closing.
|
||||
\warning Currently not implemented.
|
||||
\hideinitializer
|
||||
*/
|
||||
OPC_OPEN_TEMPLATE=3,
|
||||
/**
|
||||
Like the OPC_OPEN_TEMPLATE mode, but the \a destName will be renamed to the \a fileName on closing. If \a destName
|
||||
is \a NULL, then the name of the temporary file will be generated automatically.
|
||||
\warning Currently not implemented.
|
||||
\hideinitializer
|
||||
*/
|
||||
OPC_OPEN_TRANSITION=4
|
||||
} opcContainerOpenMode;
|
||||
|
||||
/** Modes for opcContainerClose.
|
||||
\see opcContainerClose.
|
||||
*/
|
||||
typedef enum {
|
||||
/**
|
||||
Close the OPC container without any further postprocessing.
|
||||
\hideinitializer
|
||||
*/
|
||||
OPC_CLOSE_NOW = 0,
|
||||
/**
|
||||
Close the OPC container and trim the file by removing unused fragments like e.g.
|
||||
deleted parts.
|
||||
\hideinitializer
|
||||
*/
|
||||
OPC_CLOSE_TRIM = 1,
|
||||
/**
|
||||
Close the OPC container like in \a OPC_CLOSE_TRIM mode, but additionally remove any
|
||||
"interleaved" parts by reordering them.
|
||||
\warning Currently not implemented. Same semantic as OPC_CLOSE_TRIM.
|
||||
\hideinitializer
|
||||
*/
|
||||
OPC_CLOSE_DEFRAG = 2
|
||||
} opcContainerCloseMode;
|
||||
|
||||
/**
|
||||
Opens a ZIP-based OPC container.
|
||||
@param[in] fileName. For more details see \ref opcContainerOpenMode.
|
||||
@param[in] mode. For more details see \ref opcContainerOpenMode.
|
||||
@param[in] userContext. Will not be modified by libopc. Can be used to e.g. store the "this" pointer for C++ bindings.
|
||||
@param[in] destName. For more details see \ref opcContainerOpenMode.
|
||||
@return \a NULL if failed.
|
||||
\see opcContainerOpenMode
|
||||
\see opcContainerDump
|
||||
*/
|
||||
opcContainer* opcContainerOpen(const xmlChar *fileName,
|
||||
opcContainerOpenMode mode,
|
||||
void *userContext,
|
||||
const xmlChar *destName);
|
||||
|
||||
/**
|
||||
Opens a ZIP-based OPC container from memory.
|
||||
@param[in] data.
|
||||
@param[in] data_len.
|
||||
@param[in] userContext. Will not be modified by libopc. Can be used to e.g. store the "this" pointer for C++ bindings.
|
||||
@param[in] mode. For more details see \ref opcContainerOpenMode.
|
||||
@return \a NULL if failed.
|
||||
*/
|
||||
opcContainer* opcContainerOpenMem(const opc_uint8_t *data, opc_uint32_t data_len,
|
||||
opcContainerOpenMode mode,
|
||||
void *userContext);
|
||||
|
||||
/**
|
||||
Opens a ZIP-based OPC container from memory.
|
||||
@param[in] ioread.
|
||||
@param[in] iowrite.
|
||||
@param[in] ioclose.
|
||||
@param[in] ioseek.
|
||||
@param[in] iotrim.
|
||||
@param[in] ioflush.
|
||||
@param[in] iocontext.
|
||||
@param[in] file_size.
|
||||
@param[in] userContext. Will not be modified by libopc. Can be used to e.g. store the "this" pointer for C++ bindings.
|
||||
@param[in] mode. For more details see \ref opcContainerOpenMode.
|
||||
@return \a NULL if failed.
|
||||
*/
|
||||
opcContainer* opcContainerOpenIO(opcFileReadCallback *ioread,
|
||||
opcFileWriteCallback *iowrite,
|
||||
opcFileCloseCallback *ioclose,
|
||||
opcFileSeekCallback *ioseek,
|
||||
opcFileTrimCallback *iotrim,
|
||||
opcFileFlushCallback *ioflush,
|
||||
void *iocontext,
|
||||
pofs_t file_size,
|
||||
opcContainerOpenMode mode,
|
||||
void *userContext);
|
||||
|
||||
/**
|
||||
Close an OPC container.
|
||||
@param[in] c. \ref opcContainer openered by \ref opcContainerOpen.
|
||||
@param[in] mode. For more information see \ref opcContainerCloseMode.
|
||||
@return Non-zero if successful.
|
||||
\see opcContainerOpen
|
||||
\see opcContainerCloseMode
|
||||
*/
|
||||
opc_error_t opcContainerClose(opcContainer *c, opcContainerCloseMode mode);
|
||||
|
||||
/**
|
||||
Returns the unmodified user context passed to \ref opcContainerOpen.
|
||||
\see opcContainerOpen
|
||||
*/
|
||||
void *opcContainerGetUserContext(opcContainer *c);
|
||||
|
||||
/**
|
||||
List all types, relations and parts of the container \a c to \a out.
|
||||
\par Sample:
|
||||
\include opc_dump.c
|
||||
*/
|
||||
opc_error_t opcContainerDump(opcContainer *c, FILE *out);
|
||||
|
||||
/**
|
||||
Exports the OPC container to "Flat OPC" (http://blogs.msdn.com/b/ericwhite/archive/2008/09/29/the-flat-opc-format.aspx).
|
||||
The flat versions of an OPC file are very important when dealing with e.g XSL(T)-based or Javascript-based transformations.
|
||||
\see opcContainerFlatImport.
|
||||
\todo Implementation needed.
|
||||
*/
|
||||
int opcContainerFlatExport(opcContainer *c, const xmlChar *fileName);
|
||||
|
||||
/**
|
||||
Imports the flat version of an OPC container.
|
||||
\see opcContainerFlatExport.
|
||||
\todo Implementation needed.
|
||||
*/
|
||||
int opcContainerFlatImport(opcContainer *c, const xmlChar *fileName);
|
||||
|
||||
/**
|
||||
Iterate all types.
|
||||
\code
|
||||
for(xmlChar *type=opcContentTypeFirst(c);
|
||||
NULL!=type;
|
||||
type=opcContentTypeNext(c, type)) {
|
||||
printf("%s\n", type);
|
||||
}
|
||||
\endcode
|
||||
*/
|
||||
const xmlChar *opcContentTypeFirst(opcContainer *container);
|
||||
|
||||
/**
|
||||
\see opcContentTypeNext()
|
||||
*/
|
||||
const xmlChar *opcContentTypeNext(opcContainer *container, const xmlChar *type);
|
||||
|
||||
/**
|
||||
Iterate extensions.
|
||||
\code
|
||||
for(const xmlChar *ext=opcExtensionFirst(c);
|
||||
NULL!=ext;
|
||||
ext=opcExtensionNext(ext)) {
|
||||
printf("%s\n", ext);
|
||||
}
|
||||
\endcode
|
||||
*/
|
||||
const xmlChar *opcExtensionFirst(opcContainer *container);
|
||||
|
||||
/**
|
||||
\see opcExtensionFirst()
|
||||
*/
|
||||
const xmlChar *opcExtensionNext(opcContainer *container, const xmlChar *ext);
|
||||
|
||||
/**
|
||||
Get registered type for extension.
|
||||
\see opcExtensionRegister()
|
||||
*/
|
||||
const xmlChar *opcExtensionGetType(opcContainer *container, const xmlChar *ext);
|
||||
|
||||
/**
|
||||
Register a mime-type and and extension.
|
||||
\see opcExtensionGetType()
|
||||
*/
|
||||
const xmlChar *opcExtensionRegister(opcContainer *container, const xmlChar *ext, const xmlChar *type);
|
||||
|
||||
|
||||
/**
|
||||
Iterator through all relation types of the container:
|
||||
\code
|
||||
for(xmlChar *type=opcRelationTypeFirst(c);
|
||||
NULL!=type;
|
||||
type=opcRelationTypeNext(c, type)) {
|
||||
printf("%s\n", type);
|
||||
}
|
||||
\endcode
|
||||
*/
|
||||
const xmlChar *opcRelationTypeFirst(opcContainer *container);
|
||||
|
||||
/**
|
||||
\see opcRelationTypeFirst()
|
||||
*/
|
||||
const xmlChar *opcRelationTypeNext(opcContainer *container, const xmlChar *type);
|
||||
|
||||
|
||||
/**
|
||||
Iterator through all relation types of the container:
|
||||
\code
|
||||
for(xmlChar *target=opcExternalTargetFirst(c);
|
||||
NULL!=target;
|
||||
type=opcExternalTargetNext(c, target)) {
|
||||
printf("%s\n", target);
|
||||
}
|
||||
\endcode
|
||||
*/
|
||||
const xmlChar *opcExternalTargetFirst(opcContainer *container);
|
||||
|
||||
/**
|
||||
\see opcExternalTargetFirst()
|
||||
*/
|
||||
const xmlChar *opcExternalTargetNext(opcContainer *container, const xmlChar *target);
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* OPC_CONTAINER_H */
|
||||
@@ -1,200 +0,0 @@
|
||||
/*
|
||||
Copyright (c) 2010, Florian Reuter
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Florian Reuter nor the names of its contributors
|
||||
may be used to endorse or promote products derived from this
|
||||
software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||
COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
||||
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
||||
OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
/** @file opc/file.h
|
||||
The opc module contains the file library functions.
|
||||
*/
|
||||
#include <opc/config.h>
|
||||
|
||||
#ifndef OPC_FILE_H
|
||||
#define OPC_FILE_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/**
|
||||
Flag for READ access.
|
||||
\hideinitializer
|
||||
*/
|
||||
#define OPC_FILE_READ (1<<0)
|
||||
|
||||
/**
|
||||
Flag for WRITE access.
|
||||
\hideinitializer
|
||||
*/
|
||||
#define OPC_FILE_WRITE (1<<1)
|
||||
|
||||
/**
|
||||
Flag indicates that file will be truncated when opened.
|
||||
\hideinitializer
|
||||
*/
|
||||
#define OPC_FILE_TRUNC (1<<2)
|
||||
|
||||
|
||||
/**
|
||||
Abstraction for see modes.
|
||||
*/
|
||||
typedef enum OPC_FILESEEKMODE_ENUM {
|
||||
opcFileSeekSet = SEEK_SET,
|
||||
opcFileSeekCur = SEEK_CUR,
|
||||
opcFileSeekEnd = SEEK_END
|
||||
} opcFileSeekMode;
|
||||
|
||||
/**
|
||||
Callback to read a file. E.g. for a FILE * context this can be implemented as
|
||||
\code
|
||||
static int opcFileRead(void *iocontext, char *buffer, int len) {
|
||||
return fread(buffer, sizeof(char), len, (FILE*)iocontext);
|
||||
}
|
||||
\endcode
|
||||
*/
|
||||
typedef int opcFileReadCallback(void *iocontext, char *buffer, int len);
|
||||
|
||||
/**
|
||||
Callback to write a file. E.g. for a FILE * context this can be implemented as
|
||||
\code
|
||||
static int opcFileWrite(void *iocontext, const char *buffer, int len) {
|
||||
return fwrite(buffer, sizeof(char), len, (FILE*)iocontext);
|
||||
}
|
||||
\endcode
|
||||
*/
|
||||
typedef int opcFileWriteCallback(void *iocontext, const char *buffer, int len);
|
||||
|
||||
/**
|
||||
Callback to close a file. E.g. for a FILE * context this can be implemented as
|
||||
\code
|
||||
static int opcFileClose(void *iocontext) {
|
||||
return fclose((FILE*)iocontext);
|
||||
}
|
||||
\endcode
|
||||
*/
|
||||
typedef int opcFileCloseCallback(void *iocontext);
|
||||
|
||||
/**
|
||||
Callback to seek a file. E.g. for a FILE * context this can be implemented as
|
||||
\code
|
||||
static opc_ofs_t opcFileSeek(void *iocontext, opc_ofs_t ofs) {
|
||||
int ret=fseek((FILE*)iocontext, ofs, SEEK_SET);
|
||||
if (ret>=0) {
|
||||
return ftell((FILE*)iocontext);
|
||||
} else {
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
\endcode
|
||||
*/
|
||||
typedef opc_ofs_t opcFileSeekCallback(void *iocontext, opc_ofs_t ofs);
|
||||
|
||||
/**
|
||||
Callback to trim a file. E.g. for a FILE * context this can be implemented as
|
||||
\code
|
||||
static int opcFileTrim(void *iocontext, opc_ofs_t new_size) {
|
||||
#ifdef WIN32
|
||||
return _chsize(fileno((FILE*)iocontext), new_size);
|
||||
#else
|
||||
return ftruncate(fileno((FILE*)iocontext), new_size);
|
||||
#endif
|
||||
}
|
||||
\endcode
|
||||
*/
|
||||
typedef int opcFileTrimCallback(void *iocontext, opc_ofs_t new_size);
|
||||
|
||||
/**
|
||||
Callback to flush a file. E.g. for a FILE * context this can be implemented as
|
||||
\code
|
||||
static int opcFileFlush(void *iocontext) {
|
||||
return fflush((FILE*)iocontext);
|
||||
}
|
||||
\endcode
|
||||
*/
|
||||
typedef int opcFileFlushCallback(void *iocontext);
|
||||
|
||||
/**
|
||||
Represents a state of a file, i.e. file position (buf_pos) and error status (err).
|
||||
*/
|
||||
typedef struct OPC_FILERAWSTATE_STRUCT {
|
||||
opc_error_t err;
|
||||
opc_ofs_t buf_pos; // current pos in file
|
||||
} opcFileRawState;
|
||||
|
||||
/**
|
||||
File IO context.
|
||||
*/
|
||||
typedef struct OPC_IO_STRUCT {
|
||||
opcFileReadCallback *_ioread;
|
||||
opcFileWriteCallback *_iowrite;
|
||||
opcFileCloseCallback *_ioclose;
|
||||
opcFileSeekCallback *_ioseek;
|
||||
opcFileTrimCallback *_iotrim;
|
||||
opcFileFlushCallback *_ioflush;
|
||||
void *iocontext;
|
||||
int flags;
|
||||
opcFileRawState state;
|
||||
opc_ofs_t file_size;
|
||||
} opcIO_t;
|
||||
|
||||
/**
|
||||
Initialize an IO context.
|
||||
*/
|
||||
opc_error_t opcFileInitIO(opcIO_t *io,
|
||||
opcFileReadCallback *ioread,
|
||||
opcFileWriteCallback *iowrite,
|
||||
opcFileCloseCallback *ioclose,
|
||||
opcFileSeekCallback *ioseek,
|
||||
opcFileTrimCallback *iotrim,
|
||||
opcFileFlushCallback *ioflush,
|
||||
void *iocontext,
|
||||
pofs_t file_size,
|
||||
int flags);
|
||||
|
||||
/**
|
||||
Initialize an IO context for a file.
|
||||
*/
|
||||
opc_error_t opcFileInitIOFile(opcIO_t *io, const xmlChar *filename, int flags);
|
||||
|
||||
/**
|
||||
Initialize an IO for memory.
|
||||
\warning Currently supports READ-ONLY file access.
|
||||
*/
|
||||
opc_error_t opcFileInitIOMemory(opcIO_t *io, const opc_uint8_t *data, opc_uint32_t data_len, int flags);
|
||||
|
||||
/**
|
||||
Cleanup an IO context, i.e. release all system resources.
|
||||
*/
|
||||
opc_error_t opcFileCleanupIO(opcIO_t *io);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* OPC_FILE_H */
|
||||
@@ -1,60 +0,0 @@
|
||||
/*
|
||||
Copyright (c) 2010, Florian Reuter
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Florian Reuter nor the names of its contributors
|
||||
may be used to endorse or promote products derived from this
|
||||
software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||
COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
||||
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
||||
OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
/** @file opc/helper.h
|
||||
Contains helper functions for the opc module.
|
||||
*/
|
||||
#include <opc/config.h>
|
||||
|
||||
#ifndef OPC_HELPER_H
|
||||
#define OPC_HELPER_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
/**
|
||||
Constructs a segment name.
|
||||
*/
|
||||
opc_uint16_t opcHelperAssembleSegmentName(char *out, opc_uint16_t out_size, const xmlChar *name, opc_uint32_t segment_number, opc_uint32_t next_segment_id, opc_bool_t rels_segment, opc_uint16_t *out_max);
|
||||
|
||||
/**
|
||||
Splits a filename into the segment informations.
|
||||
*/
|
||||
opc_error_t opcHelperSplitFilename(opc_uint8_t *filename, opc_uint32_t filename_length, opc_uint32_t *segment_number, opc_bool_t *last_segment, opc_bool_t *rel_segment);
|
||||
|
||||
#endif /* OPC_HELPER_H */
|
||||
@@ -1,74 +0,0 @@
|
||||
/*
|
||||
Copyright (c) 2010, Florian Reuter
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Florian Reuter nor the names of its contributors
|
||||
may be used to endorse or promote products derived from this
|
||||
software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||
COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
||||
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
||||
OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
/** @file opc/inputstream.h
|
||||
|
||||
*/
|
||||
#include <opc/config.h>
|
||||
|
||||
#ifndef OPC_INPUTSTREAM_H
|
||||
#define OPC_INPUTSTREAM_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
/**
|
||||
Internal type which represents a binary input stream.
|
||||
*/
|
||||
typedef struct OPC_CONTAINER_INPUTSTREAM_STRUCT opcContainerInputStream;
|
||||
|
||||
/**
|
||||
Opens the part \c name of the \c container for reading.
|
||||
*/
|
||||
opcContainerInputStream* opcContainerOpenInputStream(opcContainer *container, const xmlChar *name);
|
||||
|
||||
/**
|
||||
Reads maximal \c buffer_len bytes from the input \c stream to \c buffer.
|
||||
\return The number of byes read or "0" in case of an error or end-of-stream.
|
||||
*/
|
||||
opc_uint32_t opcContainerReadInputStream(opcContainerInputStream* stream, opc_uint8_t *buffer, opc_uint32_t buffer_len);
|
||||
|
||||
/**
|
||||
Closes the input stream and releases all system resources.
|
||||
*/
|
||||
opc_error_t opcContainerCloseInputStream(opcContainerInputStream* stream);
|
||||
|
||||
/**
|
||||
Returns the type of compression used for the stream.
|
||||
*/
|
||||
opcCompressionOption_t opcContainerGetInputStreamCompressionOption(opcContainerInputStream* stream);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* OPC_INPUTSTREAM_H */
|
||||
@@ -1,73 +0,0 @@
|
||||
/*
|
||||
Copyright (c) 2010, Florian Reuter
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Florian Reuter nor the names of its contributors
|
||||
may be used to endorse or promote products derived from this
|
||||
software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||
COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
||||
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
||||
OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
/** @file opc/opc.h
|
||||
The opc module contains the basic library functions.
|
||||
*/
|
||||
#include <opc/config.h>
|
||||
#include <opc/container.h>
|
||||
#include <opc/part.h>
|
||||
#include <opc/relation.h>
|
||||
#include <opc/inputstream.h>
|
||||
#include <opc/outputstream.h>
|
||||
#include <opc/zip.h>
|
||||
#include <opc/xmlreader.h>
|
||||
#include <opc/xmlwriter.h>
|
||||
#include <opc/properties.h>
|
||||
|
||||
#ifndef OPC_OPC_H
|
||||
#define OPC_OPC_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Initialize libopc.
|
||||
* Sample:
|
||||
* \include opc_helloworld.c
|
||||
* @return Non-zero if successful.
|
||||
*/
|
||||
opc_error_t opcInitLibrary();
|
||||
|
||||
/**
|
||||
* Free libopc. Clean up all resources.
|
||||
* @return Non-zero if successful.
|
||||
* \see opcInitLibrary.
|
||||
*/
|
||||
opc_error_t opcFreeLibrary();
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* OPC_OPC_H */
|
||||
@@ -1,71 +0,0 @@
|
||||
/*
|
||||
Copyright (c) 2010, Florian Reuter
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Florian Reuter nor the names of its contributors
|
||||
may be used to endorse or promote products derived from this
|
||||
software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||
COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
||||
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
||||
OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
/** @file opc/outputstream.h
|
||||
|
||||
*/
|
||||
#include <opc/config.h>
|
||||
|
||||
#ifndef OPC_OUTPUTSTREAM_H
|
||||
#define OPC_OUTPUTSTREAM_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
/**
|
||||
Internal type which represents a binary output stream.
|
||||
*/
|
||||
typedef struct OPC_CONTAINER_OUTPUTSTREAM_STRUCT opcContainerOutputStream;
|
||||
|
||||
/**
|
||||
Open the part \c name or writing in \c container with compression \c compression_option.
|
||||
\note Make sure the part exists!
|
||||
\see opcPartCreate.
|
||||
*/
|
||||
opcContainerOutputStream* opcContainerCreateOutputStream(opcContainer *container, const xmlChar *name, opcCompressionOption_t compression_option);
|
||||
|
||||
/**
|
||||
Write \c buffer_len bytes from \c buffer to \c stream.
|
||||
\return Returns the number of bytes written.
|
||||
*/
|
||||
opc_uint32_t opcContainerWriteOutputStream(opcContainerOutputStream* stream, const opc_uint8_t *buffer, opc_uint32_t buffer_len);
|
||||
|
||||
/**
|
||||
Close the \c stream and free all associated resources.
|
||||
*/
|
||||
opc_error_t opcContainerCloseOutputStream(opcContainerOutputStream* stream);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* OPC_OUTPUTSTREAM_H */
|
||||
@@ -1,118 +0,0 @@
|
||||
/*
|
||||
Copyright (c) 2010, Florian Reuter
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Florian Reuter nor the names of its contributors
|
||||
may be used to endorse or promote products derived from this
|
||||
software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||
COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
||||
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
||||
OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
/** @file opc/part.h
|
||||
|
||||
*/
|
||||
#include <opc/config.h>
|
||||
|
||||
#ifndef OPC_PART_H
|
||||
#define OPC_PART_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
/**
|
||||
Handle to an OPC part created by \ref opcPartOpen.
|
||||
\see opcPartOpen.
|
||||
*/
|
||||
typedef xmlChar* opcPart;
|
||||
|
||||
/**
|
||||
Represents an invalid (resp. NULL) part.
|
||||
In releations OPC_PART_INVALID also represents the root part.
|
||||
\hideinitializer
|
||||
*/
|
||||
#define OPC_PART_INVALID NULL
|
||||
|
||||
/**
|
||||
Find a part in a \ container by \c absolutePath and/or \c type.
|
||||
Currently no flags are supported.
|
||||
*/
|
||||
opcPart opcPartFind(opcContainer *container,
|
||||
const xmlChar *absolutePath,
|
||||
const xmlChar *type,
|
||||
int flags);
|
||||
|
||||
/**
|
||||
Creates a part in a \ container with \c absolutePath and \c type.
|
||||
Currently no flags are supported.
|
||||
*/
|
||||
opcPart opcPartCreate(opcContainer *container,
|
||||
const xmlChar *absolutePath,
|
||||
const xmlChar *type,
|
||||
int flags);
|
||||
|
||||
/**
|
||||
Returns the type of the container.
|
||||
The string is interned and must not be freed.
|
||||
*/
|
||||
const xmlChar *opcPartGetType(opcContainer *c, opcPart part);
|
||||
|
||||
/**
|
||||
Returns the type of the container.
|
||||
If \c override_only then the return value will be NULL for parts not having an override type.
|
||||
The string is interned and must not be freed.
|
||||
*/
|
||||
const xmlChar *opcPartGetTypeEx(opcContainer *c, opcPart part, opc_bool_t override_only);
|
||||
|
||||
/**
|
||||
Deleted that part \c absolutePath in the \c container.
|
||||
*/
|
||||
opc_error_t opcPartDelete(opcContainer *container, const xmlChar *absolutePath);
|
||||
|
||||
/**
|
||||
Get the first part.
|
||||
\code
|
||||
for(opcPart part=opcPartGetFirst(c);OPC_PART_INVALID!=part;part=opcPartGetNext(c, part)) {
|
||||
printf("%s; \n", part, opcPartGetType(c, part));
|
||||
}
|
||||
\endcode
|
||||
*/
|
||||
opcPart opcPartGetFirst(opcContainer *container);
|
||||
|
||||
/**
|
||||
Get the next part.
|
||||
\see opcPartGetFirst
|
||||
*/
|
||||
opcPart opcPartGetNext(opcContainer *container, opcPart part);
|
||||
|
||||
/**
|
||||
Returns the size in bytes of the \c part.
|
||||
*/
|
||||
opc_ofs_t opcPartGetSize(opcContainer *c, opcPart part);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* OPC_PART_H */
|
||||
@@ -1,121 +0,0 @@
|
||||
/*
|
||||
Copyright (c) 2010, Florian Reuter
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Florian Reuter nor the names of its contributors
|
||||
may be used to endorse or promote products derived from this
|
||||
software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||
COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
||||
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
||||
OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
/** @file opc/properties.h
|
||||
|
||||
*/
|
||||
#include <opc/config.h>
|
||||
#include <opc/container.h>
|
||||
|
||||
#ifndef OPC_PROPERTIES_H
|
||||
#define OPC_PROPERTIES_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/**
|
||||
Represents a simple Dublin Core type.
|
||||
*/
|
||||
typedef struct OPC_DC_SIMPLE_TYPE {
|
||||
xmlChar *str;
|
||||
xmlChar *lang;
|
||||
} opcDCSimpleType_t;
|
||||
|
||||
/**
|
||||
Represents the core properties of an OPC container.
|
||||
*/
|
||||
typedef struct OPC_PROPERTIES_STRUCT {
|
||||
xmlChar *category; /* xsd:string */
|
||||
xmlChar *contentStatus; /* xsd:string */
|
||||
xmlChar *created; /* dc:date */
|
||||
opcDCSimpleType_t creator; /* dc:any */
|
||||
opcDCSimpleType_t description; /* dc:any */
|
||||
opcDCSimpleType_t identifier; /* dc:any */
|
||||
opcDCSimpleType_t *keyword_array; /* cp:CT_Keywords */
|
||||
opc_uint32_t keyword_items;
|
||||
opcDCSimpleType_t language; /* dc:any */
|
||||
xmlChar *lastModifiedBy; /* xsd:string */
|
||||
xmlChar *lastPrinted; /* xsd:dateTime */
|
||||
xmlChar *modified; /* dc:date */
|
||||
xmlChar *revision; /* xsd:string */
|
||||
opcDCSimpleType_t subject; /* dc:any */
|
||||
opcDCSimpleType_t title; /* dc:any */
|
||||
xmlChar *version; /* xsd:string */
|
||||
} opcProperties_t;
|
||||
|
||||
/**
|
||||
Initialize the core properties \c cp.
|
||||
\see opcCorePropertiesSetString
|
||||
*/
|
||||
opc_error_t opcCorePropertiesInit(opcProperties_t *cp);
|
||||
|
||||
/**
|
||||
Cleanup the core properties \c cp, i.e. release all resources.
|
||||
\see opcCorePropertiesSetString
|
||||
*/
|
||||
opc_error_t opcCorePropertiesCleanup(opcProperties_t *cp);
|
||||
|
||||
/**
|
||||
Rease the core properties \c cp from the container \c.
|
||||
*/
|
||||
opc_error_t opcCorePropertiesRead(opcProperties_t *cp, opcContainer *c);
|
||||
|
||||
|
||||
/**
|
||||
Write/Update the core properties \c cp in the container \c.
|
||||
*/
|
||||
opc_error_t opcCorePropertiesWrite(opcProperties_t *cp, opcContainer *c);
|
||||
|
||||
/**
|
||||
Update a string in the core properties the right way.
|
||||
\code
|
||||
opcProperties_t cp;
|
||||
opcCorePropertiesInit(&cp);
|
||||
opcCorePropertiesSetString(&cp.revision, "1");
|
||||
opcCorePropertiesSetStringLang(&cp.creator, "Florian Reuter", NULL);
|
||||
opcCorePropertiesCleanup(&cp);
|
||||
\endcode
|
||||
*/
|
||||
opc_error_t opcCorePropertiesSetString(xmlChar **prop, const xmlChar *str);
|
||||
|
||||
/**
|
||||
Update a core properties the right way.
|
||||
\see opcCorePropertiesSetString
|
||||
*/
|
||||
opc_error_t opcCorePropertiesSetStringLang(opcDCSimpleType_t *prop, const xmlChar *str, const xmlChar *lang);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* OPC_PROPERTIES_H */
|
||||
@@ -1,140 +0,0 @@
|
||||
/*
|
||||
Copyright (c) 2010, Florian Reuter
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Florian Reuter nor the names of its contributors
|
||||
may be used to endorse or promote products derived from this
|
||||
software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||
COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
||||
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
||||
OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
/** @file opc/relation.h
|
||||
|
||||
*/
|
||||
#include <opc/config.h>
|
||||
|
||||
#ifndef OPC_RELATION_H
|
||||
#define OPC_RELATION_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/**
|
||||
Indentifier for an OPC relation.
|
||||
*/
|
||||
typedef opc_uint32_t opcRelation;
|
||||
|
||||
/**
|
||||
Constant which represents an invalid relation.
|
||||
*/
|
||||
#define OPC_RELATION_INVALID (-1)
|
||||
|
||||
/**
|
||||
Find a relation originating from \c part in \c container with \c relationId and/or \c mimeType.
|
||||
If \c part is OPC_PART_INVALID then part represents the root part.
|
||||
@param[in] relationId The relationId (e.g. "rId1") or NULL.
|
||||
@param[in] mimeType The mimeType or NULL.
|
||||
*/
|
||||
opcRelation opcRelationFind(opcContainer *container, opcPart part, const xmlChar *relationId, const xmlChar *mimeType);
|
||||
|
||||
/**
|
||||
Deleted the relation from the container.
|
||||
\see opcRelationFind.
|
||||
*/
|
||||
opc_error_t opcRelationDelete(opcContainer *container, opcPart part, const xmlChar *relationId, const xmlChar *mimeType);
|
||||
|
||||
/**
|
||||
Returns the first relation.
|
||||
The following code will dump all relations:
|
||||
\code
|
||||
for(opcPart part=opcPartGetFirst(c);OPC_PART_INVALID!=part;part=opcPartGetNext(c, part)) {
|
||||
for(opcRelation rel=opcRelationFirst(part, c);
|
||||
OPC_PART_INVALID!=rel;
|
||||
rel=opcRelationNext(c, rel)) {
|
||||
opcPart internal_target=opcRelationGetInternalTarget(c, part, rel);
|
||||
const xmlChar *external_target=opcRelationGetExternalTarget(c, part, rel);
|
||||
const xmlChar *target=(NULL!=internal_target?internal_target:external_target);
|
||||
const xmlChar *prefix=NULL;
|
||||
opc_uint32_t counter=-1;
|
||||
const xmlChar *type=NULL;
|
||||
opcRelationGetInformation(c, part, rel, &prefix, &counter, &type);
|
||||
if (-1==counter) { // no counter after prefix
|
||||
printf("%s;%s;%s;%s\n", part, prefix, target, type);
|
||||
} else {
|
||||
printf("%s;%s%i;%s;%s\n", part, prefix, counter, target, type);
|
||||
}
|
||||
}
|
||||
}
|
||||
\endcode
|
||||
*/
|
||||
opcRelation opcRelationFirst(opcContainer *container, opcPart part);
|
||||
|
||||
/**
|
||||
\see opcRelationFirst
|
||||
*/
|
||||
opcRelation opcRelationNext(opcContainer *container, opcPart part, opcRelation relation);
|
||||
|
||||
/**
|
||||
Returns the internal target.
|
||||
\note To test for an external target use opcRelationGetExternalTarget.
|
||||
\see opcRelationGetExternalTarget
|
||||
*/
|
||||
opcPart opcRelationGetInternalTarget(opcContainer *container, opcPart part, opcRelation relation);
|
||||
|
||||
/**
|
||||
Returns the external target or NULL if it is an internal target.
|
||||
The string is interned. Must not be freed.
|
||||
\see opcRelationGetExternalTarget
|
||||
*/
|
||||
const xmlChar *opcRelationGetExternalTarget(opcContainer *container, opcPart part, opcRelation relation);
|
||||
|
||||
/**
|
||||
Returns the relations type.
|
||||
The string is interned. Must not be freed.
|
||||
*/
|
||||
const xmlChar *opcRelationGetType(opcContainer *container, opcPart part, opcRelation relation);
|
||||
|
||||
/**
|
||||
Get information about a relation.
|
||||
\see opcRelationFirst
|
||||
*/
|
||||
void opcRelationGetInformation(opcContainer *container, opcPart part, opcRelation relation, const xmlChar **prefix, opc_uint32_t *counter, const xmlChar **type);
|
||||
|
||||
/**
|
||||
Add a relation to \c container from \c src part to \c dest part with id \c rid and type \c type.
|
||||
*/
|
||||
opc_uint32_t opcRelationAdd(opcContainer *container, opcPart src, const xmlChar *rid, opcPart dest, const xmlChar *type);
|
||||
|
||||
/**
|
||||
Add an external relation to \c container from \c src part to \c target URL with id \c rid and type \c type.
|
||||
*/
|
||||
opc_uint32_t opcRelationAddExternal(opcContainer *container, opcPart src, const xmlChar *rid, const xmlChar *target, const xmlChar *type);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* OPC_RELATION_H */
|
||||
@@ -1,69 +0,0 @@
|
||||
/*
|
||||
Copyright (c) 2010, Florian Reuter
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Florian Reuter nor the names of its contributors
|
||||
may be used to endorse or promote products derived from this
|
||||
software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||
COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
||||
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
||||
OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
/** @file opc/xmlreader.h
|
||||
|
||||
*/
|
||||
|
||||
#ifndef OPC_XMLREADER_H
|
||||
#define OPC_XMLREADER_H
|
||||
|
||||
#include <opc/config.h>
|
||||
#include <libxml/xmlreader.h>
|
||||
#include <mce/textreader.h>
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/**
|
||||
Open an MCE reader for \c partName. Parameters \c URL, \c encoding and \c options will be passed unmodified to
|
||||
http://xmlsoft.org/html/libxml-xmlreader.html#xmlReaderForIO and they can we NULL, NULL, 0.
|
||||
\note Make sure the part exists.
|
||||
\see opcPartFind
|
||||
*/
|
||||
opc_error_t opcXmlReaderOpen(opcContainer *container, mceTextReader_t *mceTextReader, const xmlChar *partName, const char * URL, const char * encoding, int options);
|
||||
|
||||
/**
|
||||
Returns an libxml DOM document. Parameters \c URL, \c encoding and \c options will be passed unmodified to
|
||||
http://xmlsoft.org/html/libxml-parser.html#xmlReadIO and they can we NULL, NULL, 0.
|
||||
\note Make sure the part exists.
|
||||
\see opcPartFind
|
||||
*/
|
||||
xmlDocPtr opcXmlReaderReadDoc(opcContainer *container, const xmlChar *partName, const char * URL, const char * encoding, int options);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* OPC_XMLREADER_H */
|
||||
@@ -1,57 +0,0 @@
|
||||
/*
|
||||
Copyright (c) 2010, Florian Reuter
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Florian Reuter nor the names of its contributors
|
||||
may be used to endorse or promote products derived from this
|
||||
software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||
COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
||||
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
||||
OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
/** @file opc/xmlwriter.h
|
||||
|
||||
*/
|
||||
#include <opc/config.h>
|
||||
#include <mce/textwriter.h>
|
||||
|
||||
#ifndef OPC_XMLWRITER_H
|
||||
#define OPC_XMLWRITER_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/**
|
||||
Create an MCE text writer for \c part in \c container with compression \c compression_option.
|
||||
\note Make sure the part exists.
|
||||
\see opcPartFind
|
||||
*/
|
||||
mceTextWriter *mceTextWriterOpen(opcContainer *c, opcPart part, opcCompressionOption_t compression_option);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* OPC_XMLWRITER_H */
|
||||
@@ -1,255 +0,0 @@
|
||||
/*
|
||||
Copyright (c) 2010, Florian Reuter
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Florian Reuter nor the names of its contributors
|
||||
may be used to endorse or promote products derived from this
|
||||
software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||
COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
||||
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
||||
OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
/** @file opc/zip.h
|
||||
The ZIP file backend of an OPC container.
|
||||
*/
|
||||
#include <opc/config.h>
|
||||
#include <opc/file.h>
|
||||
#include <opc/container.h>
|
||||
|
||||
#ifndef OPC_ZIP_H
|
||||
#define OPC_ZIP_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/**
|
||||
Default growth hint of an OPC stream.
|
||||
*/
|
||||
#define OPC_DEFAULT_GROWTH_HINT 512
|
||||
|
||||
/**
|
||||
Handle to a ZIP archive.
|
||||
\see internal.h
|
||||
*/
|
||||
typedef struct OPC_ZIP_STRUCT opcZip;
|
||||
|
||||
/**
|
||||
Handle to a raw ZIP input stream.
|
||||
\see internal.h
|
||||
*/
|
||||
typedef struct OPC_ZIPINPUTSTREAM_STRUCT opcZipInputStream;
|
||||
|
||||
/**
|
||||
Handle to a raw ZIP output stream.
|
||||
\see internal.h
|
||||
*/
|
||||
typedef struct OPC_ZIPOUTPUTSTREAM_STRUCT opcZipOutputStream;
|
||||
|
||||
/**
|
||||
Holds all information of a ZIP segment.
|
||||
*/
|
||||
typedef struct OPC_ZIP_SEGMENT_INFO_STRUCT {
|
||||
xmlChar name[OPC_MAX_PATH];
|
||||
opc_uint32_t name_len;
|
||||
opc_uint32_t segment_number;
|
||||
opc_bool_t last_segment;
|
||||
opc_bool_t rels_segment;
|
||||
opc_uint32_t header_size;
|
||||
opc_uint32_t min_header_size;
|
||||
opc_uint32_t trailing_bytes;
|
||||
opc_uint32_t compressed_size;
|
||||
opc_uint32_t uncompressed_size;
|
||||
opc_uint16_t bit_flag;
|
||||
opc_uint32_t data_crc;
|
||||
opc_uint16_t compression_method;
|
||||
opc_ofs_t stream_ofs;
|
||||
opc_uint16_t growth_hint;
|
||||
} opcZipSegmentInfo_t;
|
||||
|
||||
/**
|
||||
\see opcZipLoader
|
||||
*/
|
||||
typedef int opcZipLoaderOpenCallback(void *iocontext);
|
||||
/**
|
||||
\see opcZipLoader
|
||||
*/
|
||||
typedef int opcZipLoaderSkipCallback(void *iocontext);
|
||||
/**
|
||||
\see opcZipLoader
|
||||
*/
|
||||
typedef int opcZipLoaderReadCallback(void *iocontext, char *buffer, int len);
|
||||
/**
|
||||
\see opcZipLoader
|
||||
*/
|
||||
typedef int opcZipLoaderCloseCallback(void *iocontext);
|
||||
|
||||
/**
|
||||
\see opcZipLoader
|
||||
*/
|
||||
typedef opc_error_t (opcZipLoaderSegmentCallback_t)(void *iocontext, void *userctx, opcZipSegmentInfo_t *info, opcZipLoaderOpenCallback *open, opcZipLoaderReadCallback *read, opcZipLoaderCloseCallback *close, opcZipLoaderSkipCallback *skip);
|
||||
|
||||
/**
|
||||
Walks every segment in a ZIP archive and calls the \c segmentCallback callback method.
|
||||
The implementer \c segmentCallback method must then eiher use the passed \c open, \c read and \c close methods
|
||||
to read the stream or the passed \c skip methods to skip the stream.
|
||||
This method can be used to e.g. read ZIP file in stream mode.
|
||||
*/
|
||||
opc_error_t opcZipLoader(opcIO_t *io, void *userctx, opcZipLoaderSegmentCallback_t *segmentCallback);
|
||||
|
||||
/**
|
||||
\see opcZipClose
|
||||
*/
|
||||
typedef opc_error_t (opcZipSegmentReleaseCallback)(opcZip *zip, opc_uint32_t segment_id);
|
||||
|
||||
/**
|
||||
Closes the ZIP archive \c zip and will call \c releaseCallback for every segment to give the implementer a chance
|
||||
to free user resources.
|
||||
*/
|
||||
void opcZipClose(opcZip *zip, opcZipSegmentReleaseCallback* releaseCallback);
|
||||
|
||||
/**
|
||||
Creates an empty ZIP archive with the given \c io.
|
||||
*/
|
||||
opcZip *opcZipCreate(opcIO_t *io);
|
||||
|
||||
/**
|
||||
Commits all buffers and writes the ZIP archives local header directories.
|
||||
if \c trim is true then padding bytes will be removed, i.e. the ZIP file size fill be minimalized.
|
||||
*/
|
||||
opc_error_t opcZipCommit(opcZip *zip, opc_bool_t trim);
|
||||
|
||||
/**
|
||||
Garbage collection on the passed \c zip archive. This will e.g. make deleted files available as free space.
|
||||
*/
|
||||
opc_error_t opcZipGC(opcZip *zip);
|
||||
|
||||
/**
|
||||
Load segment information into \c info.
|
||||
If \c rels_segment is -1 then load the info for part with name \c partName.
|
||||
Otherwise load the segment information for the ".rels." segment of \c partName.
|
||||
\return Returns the segment_id.
|
||||
*/
|
||||
opc_uint32_t opcZipLoadSegment(opcZip *zip, const xmlChar *partName, opc_bool_t rels_segment, opcZipSegmentInfo_t *info);
|
||||
|
||||
/**
|
||||
Create a segment with the given parameters.
|
||||
\return Returns the segment_id.
|
||||
*/
|
||||
opc_uint32_t opcZipCreateSegment(opcZip *zip,
|
||||
const xmlChar *partName,
|
||||
opc_bool_t relsSegment,
|
||||
opc_uint32_t segment_size,
|
||||
opc_uint32_t growth_hint,
|
||||
opc_uint16_t compression_method,
|
||||
opc_uint16_t bit_flag);
|
||||
|
||||
/**
|
||||
Creates an input stream for the segment with \c segment_id.
|
||||
\see opcZipLoadSegment
|
||||
\see opcZipCreateSegment
|
||||
*/
|
||||
opcZipInputStream *opcZipOpenInputStream(opcZip *zip, opc_uint32_t segment_id);
|
||||
|
||||
/**
|
||||
Free all resources of the input stream.
|
||||
*/
|
||||
opc_error_t opcZipCloseInputStream(opcZip *zip, opcZipInputStream *stream);
|
||||
|
||||
/**
|
||||
Read maximal \c buf_len bytes from the input stream into \buf.
|
||||
\return Returns the number of bytes read.
|
||||
*/
|
||||
opc_uint32_t opcZipReadInputStream(opcZip *zip, opcZipInputStream *stream, opc_uint8_t *buf, opc_uint32_t buf_len);
|
||||
|
||||
|
||||
/**
|
||||
Creates an output stream for the segment with \c segment_id.
|
||||
If \c *segment_id is -1 then a new segment will be created.
|
||||
Otherwise the segment with \c *segment_id will be overwritten.
|
||||
*/
|
||||
opcZipOutputStream *opcZipCreateOutputStream(opcZip *zip,
|
||||
opc_uint32_t *segment_id,
|
||||
const xmlChar *partName,
|
||||
opc_bool_t relsSegment,
|
||||
opc_uint32_t segment_size,
|
||||
opc_uint32_t growth_hint,
|
||||
opc_uint16_t compression_method,
|
||||
opc_uint16_t bit_flag);
|
||||
|
||||
/**
|
||||
Opens an existing ouput stream for reading.
|
||||
The \c *segment_id will be set to -1 and reset on opcZipCloseOutputStream.
|
||||
\see opcZipCloseOutputStream
|
||||
*/
|
||||
opcZipOutputStream *opcZipOpenOutputStream(opcZip *zip, opc_uint32_t *segment_id);
|
||||
|
||||
/**
|
||||
Will close the stream and free all resources. Additionally the new segment id will be stored in \c *segment_id.
|
||||
\see opcZipOpenOutputStream
|
||||
*/
|
||||
opc_error_t opcZipCloseOutputStream(opcZip *zip, opcZipOutputStream *stream, opc_uint32_t *segment_id);
|
||||
|
||||
/**
|
||||
Write \c buf_len bytes to \c buf.
|
||||
\return Returns the number of bytes written.
|
||||
*/
|
||||
opc_uint32_t opcZipWriteOutputStream(opcZip *zip, opcZipOutputStream *stream, const opc_uint8_t *buf, opc_uint32_t buf_len);
|
||||
|
||||
/**
|
||||
Returns the first segment id or -1.
|
||||
Use the following code to iterarte through all segments.
|
||||
\code
|
||||
for(opc_uint32_t segment_id=opcZipGetFirstSegmentId(zip);
|
||||
-1!=segment_id;
|
||||
segment_id=opcZipGetNextSegmentId(zip, segment_id) {
|
||||
...
|
||||
}
|
||||
\endcode
|
||||
\see opcZipGetNextSegmentId
|
||||
*/
|
||||
opc_uint32_t opcZipGetFirstSegmentId(opcZip *zip);
|
||||
|
||||
/**
|
||||
Returns the next segment id or -1.
|
||||
\see opcZipGetFirstSegmentId
|
||||
*/
|
||||
opc_uint32_t opcZipGetNextSegmentId(opcZip *zip, opc_uint32_t segment_id);
|
||||
|
||||
/**
|
||||
Returns info about the given segment id.
|
||||
*/
|
||||
opc_error_t opcZipGetSegmentInfo(opcZip *zip, opc_uint32_t segment_id, const xmlChar **name, opc_bool_t *rels_segment, opc_uint32_t *crc);
|
||||
|
||||
/**
|
||||
Marks a given segments as deleted.
|
||||
\see opcZipGC
|
||||
*/
|
||||
opc_bool_t opcZipSegmentDelete(opcZip *zip, opc_uint32_t *first_segment, opc_uint32_t *last_segment, opcZipSegmentReleaseCallback* releaseCallback);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* OPC_ZIP_H */
|
||||
@@ -1,168 +0,0 @@
|
||||
/* include/plib/plib.h. Generated from plib.h by configure. */
|
||||
/*
|
||||
Copyright (c) 2010, Florian Reuter
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Florian Reuter nor the names of its contributors
|
||||
may be used to endorse or promote products derived from this
|
||||
software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||
COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
||||
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
||||
OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
#ifndef _PLIB_PLIB_H_
|
||||
#define _PLIB_PLIB_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define HAVE_STDINT_H 1
|
||||
#define HAVE_STDDEF_H 1
|
||||
#define HAVE_STDIO_H 1
|
||||
#define HAVE_STRING_H 1
|
||||
#define HAVE_LIMITS_H 1
|
||||
#define HAVE_STDLIB_H 1
|
||||
/* #undef HAVE_IO_H */
|
||||
#define HAVE_UNISTD_H 1
|
||||
#define HAVE_SYS_TYPES_H 1
|
||||
#define IS_CONFIGURED 1
|
||||
|
||||
#if !defined(IS_CONFIGURED)
|
||||
#if defined(WIN32)
|
||||
#define HAVE_STRING_H 1
|
||||
#define HAVE_STDINT_H 1
|
||||
#define HAVE_LIMITS_H 1
|
||||
#define HAVE_STDDEF_H 1
|
||||
#define HAVE_STDIO_H 1
|
||||
#define HAVE_STDLIB_H 1
|
||||
#define HAVE_IO_H
|
||||
#define snprintf _snprintf
|
||||
#else
|
||||
#error "configure not executed and we are not on a win32 machine? please run configure or define WIN32 is you are on a WIN32 platform."
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_STDDEF_H
|
||||
#include <stddef.h>
|
||||
typedef size_t pofs_t; // maximum file offset for eg. read write ops
|
||||
#else
|
||||
#error "system types can not be determined"
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_STDIO_H
|
||||
#include <stdio.h>
|
||||
#else
|
||||
#error "system io can not be determined"
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_STDINT_H
|
||||
#include <stdint.h>
|
||||
|
||||
typedef int8_t pint8_t;
|
||||
typedef uint8_t puint8_t;
|
||||
|
||||
typedef int16_t pint16_t;
|
||||
typedef uint16_t puint16_t;
|
||||
|
||||
typedef int32_t pint32_t;
|
||||
typedef uint32_t puint32_t;
|
||||
|
||||
typedef int64_t pint64_t;
|
||||
typedef uint64_t puint64_t;
|
||||
|
||||
typedef int pbool_t;
|
||||
|
||||
typedef size_t psize_t;
|
||||
|
||||
// INTN_MAX, INTN_MIN, UINTN_MAX
|
||||
#else
|
||||
#error "system types can not be determined"
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_STRING_H
|
||||
#include <string.h>
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_LIMITS_H
|
||||
#include <limits.h>
|
||||
#define PUINT8_MAX UCHAR_MAX
|
||||
#define PINT32_MAX INT_MAX
|
||||
#define PINT32_MIN INT_MIN
|
||||
#define PUINT32_MAX UINT_MAX
|
||||
#define PUINT32_MIN 0
|
||||
#define PUINT16_MAX USHRT_MAX
|
||||
#define PUINT16_MIN 0
|
||||
#else
|
||||
#error "limits can not be determined"
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_STDLIB_H
|
||||
#include <stdlib.h>
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_IO_H
|
||||
#include <io.h>
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_UNISTD_H
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_SYS_TYPES_H
|
||||
#include <sys/types.h>
|
||||
#endif
|
||||
|
||||
/**
|
||||
Converts an ASCII string to a xmlChar string. This only works for ASCII strings.
|
||||
*/
|
||||
#ifndef _X
|
||||
#define _X(s) BAD_CAST(s)
|
||||
#endif
|
||||
|
||||
|
||||
/**
|
||||
Converts an xmlChar string to an ASCII string. This only works for ASCII charsets.
|
||||
*/
|
||||
#ifndef _X2C
|
||||
#define _X2C(s) ((char*)(s))
|
||||
#endif
|
||||
|
||||
|
||||
#define PASSERT(e) assert(e)
|
||||
#ifdef NDEBUG
|
||||
#define PENSURE(e) (void)(e)
|
||||
#else
|
||||
#define PENSURE(e) assert(e)
|
||||
#endif
|
||||
#define PTRUE (0==0)
|
||||
#define PFALSE (0==1)
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
|
||||
#endif /* _PLIB_PLIB_H_ */
|
||||
Submodule lib/bzip2-1.0.6 deleted from 288acf97a1
Submodule lib/ffmpeg deleted from 8887991a31
Submodule lib/harfbuzz deleted from b28c282585
Submodule lib/leptonica deleted from cc03be70fd
Submodule lib/libmagic deleted from 1249b5cd02
Binary file not shown.
Binary file not shown.
Binary file not shown.
Submodule lib/libpng deleted from 301f7a1429
Submodule lib/libtiff deleted from 3db0ff91bc
Submodule lib/mupdf deleted from c50ac19e41
Submodule lib/onion deleted from 73329b61eb
Submodule lib/openjpeg deleted from ac3737372a
Submodule lib/tesseract deleted from f268e6615e
1
lmdb
1
lmdb
Submodule lmdb deleted from 5c012bbe03
@@ -4,10 +4,24 @@
|
||||
"type": "keyword",
|
||||
"doc_values": true
|
||||
},
|
||||
"_depth": {
|
||||
"type": "integer"
|
||||
},
|
||||
"path": {
|
||||
"type": "text",
|
||||
"analyzer": "path_analyzer",
|
||||
"copy_to": "suggest-path"
|
||||
"copy_to": "suggest-path",
|
||||
"fielddata": true,
|
||||
"fields": {
|
||||
"nGram": {
|
||||
"type": "text",
|
||||
"analyzer": "my_nGram"
|
||||
},
|
||||
"text": {
|
||||
"type": "text",
|
||||
"analyzer": "content_analyzer"
|
||||
}
|
||||
}
|
||||
},
|
||||
"suggest-path": {
|
||||
"type": "completion",
|
||||
@@ -16,6 +30,10 @@
|
||||
"mime": {
|
||||
"type": "keyword"
|
||||
},
|
||||
"thumbnail": {
|
||||
"type": "keyword",
|
||||
"index": false
|
||||
},
|
||||
"videoc": {
|
||||
"type": "keyword",
|
||||
"index": false
|
||||
@@ -36,6 +54,10 @@
|
||||
"type": "integer",
|
||||
"index": false
|
||||
},
|
||||
"pages": {
|
||||
"type": "integer",
|
||||
"index": false
|
||||
},
|
||||
"mtime": {
|
||||
"type": "integer"
|
||||
},
|
||||
@@ -108,7 +130,12 @@
|
||||
}
|
||||
},
|
||||
"tag": {
|
||||
"type": "keyword"
|
||||
"type": "keyword",
|
||||
"copy_to": "suggest-tag"
|
||||
},
|
||||
"suggest-tag": {
|
||||
"type": "completion",
|
||||
"analyzer": "case_insensitive_kw_analyzer"
|
||||
},
|
||||
"exif_make": {
|
||||
"type": "text"
|
||||
@@ -133,6 +160,12 @@
|
||||
},
|
||||
"exif_user_comment": {
|
||||
"type": "text"
|
||||
},
|
||||
"author": {
|
||||
"type": "text"
|
||||
},
|
||||
"modified_by": {
|
||||
"type": "text"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
{
|
||||
"description": "Copy _id to _tie",
|
||||
"description": "Copy _id to _tie, save path depth",
|
||||
"processors": [
|
||||
{
|
||||
"script": {
|
||||
"source": "ctx._tie = ctx._id;"
|
||||
"source": "ctx._tie = ctx._id; ctx._depth = ctx.path.length() == 0 ? 0 : 1 + ctx.path.length() - ctx.path.replace(\"/\", \"\").length();"
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
{
|
||||
"index": {
|
||||
"refresh_interval": "30s",
|
||||
"codec": "best_compression"
|
||||
"codec": "best_compression",
|
||||
"number_of_replicas": 0
|
||||
},
|
||||
"analysis": {
|
||||
"tokenizer": {
|
||||
|
||||
@@ -2,15 +2,15 @@
|
||||
|
||||
rm -rf index.sist2/
|
||||
|
||||
rm web/js/bundle.js 2> /dev/null
|
||||
cat `ls web/js/*.min.js` > web/js/bundle.js
|
||||
cat web/js/{util,dom,search}.js >> web/js/bundle.js
|
||||
rm src/static/js/bundle.js 2> /dev/null
|
||||
cat `ls src/static/js/*.min.js` > src/static/js/bundle.js
|
||||
cat src/static/js/{util,dom}.js >> src/static/js/bundle.js
|
||||
|
||||
rm web/css/bundle*.css 2> /dev/null
|
||||
cat web/css/*.min.css > web/css/bundle.css
|
||||
cat web/css/light.css >> web/css/bundle.css
|
||||
cat web/css/*.min.css > web/css/bundle_dark.css
|
||||
cat web/css/dark.css >> web/css/bundle_dark.css
|
||||
rm src/static/css/bundle*.css 2> /dev/null
|
||||
cat src/static/css/*.min.css > src/static/css/bundle.css
|
||||
cat src/static/css/light.css >> src/static/css/bundle.css
|
||||
cat src/static/css/*.min.css > src/static/css/bundle_dark.css
|
||||
cat src/static/css/dark.css >> src/static/css/bundle_dark.css
|
||||
|
||||
python3 scripts/mime.py > src/parsing/mime_generated.c
|
||||
python3 scripts/serve_static.py > src/web/static_generated.c
|
||||
|
||||
@@ -1,128 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
THREADS=$(nproc)
|
||||
|
||||
cd lib
|
||||
|
||||
cd mupdf
|
||||
CFLAGS=-fPIC make USE_SYSTEM_HARFBUZZ=yes USE_SYSTEM_OPENJPEG=yes HAVE_X11=no HAVE_GLUT=no -j $THREADS
|
||||
cd ..
|
||||
|
||||
mv mupdf/build/release/libmupdf.a .
|
||||
mv mupdf/build/release/libmupdf-third.a .
|
||||
|
||||
# openjp2
|
||||
cd openjpeg
|
||||
cmake . -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_FLAGS="-O3 -DNDEBUG -fPIC"
|
||||
make -j $THREADS
|
||||
cd ..
|
||||
mv openjpeg/bin/libopenjp2.a .
|
||||
|
||||
# harfbuzz
|
||||
cd harfbuzz
|
||||
./autogen.sh
|
||||
CFLAGS=-fPIC ./configure --disable-shared --enable-static
|
||||
make -j $THREADS
|
||||
cd ..
|
||||
mv harfbuzz/src/.libs/libharfbuzz.a .
|
||||
|
||||
# ffmpeg
|
||||
cd ffmpeg
|
||||
./configure --disable-shared --enable-static --disable-ffmpeg --disable-ffplay \
|
||||
--disable-ffprobe --disable-doc\
|
||||
--disable-manpages --disable-postproc --disable-avfilter \
|
||||
--disable-alsa --disable-lzma --disable-xlib --disable-debug\
|
||||
--disable-vdpau --disable-vaapi --disable-sdl2 --disable-network\
|
||||
--extra-cflags=-fPIC
|
||||
make -j $THREADS
|
||||
cd ..
|
||||
|
||||
mv ffmpeg/libavcodec/libavcodec.a .
|
||||
mv ffmpeg/libavformat/libavformat.a .
|
||||
mv ffmpeg/libavutil/libavutil.a .
|
||||
mv ffmpeg/libswresample/libswresample.a .
|
||||
mv ffmpeg/libswscale/libswscale.a .
|
||||
|
||||
# onion
|
||||
cd onion
|
||||
mkdir build 2> /dev/null
|
||||
cd build
|
||||
cmake -DONION_USE_SSL=false -DONION_USE_PAM=false -DONION_USE_PNG=false -DONION_USE_JPEG=false \
|
||||
-DONION_USE_JPEG=false -DONION_USE_XML2=false -DONION_USE_SYSTEMD=false -DONION_USE_SQLITE3=false \
|
||||
-DONION_USE_REDIS=false -DONION_USE_GC=false -DONION_USE_TESTS=false -DONION_EXAMPLES=false \
|
||||
-DONION_USE_BINDINGS_CPP=false ..
|
||||
make -j $THREADS
|
||||
cd ../..
|
||||
|
||||
mv onion/build/src/onion/libonion_static.a .
|
||||
|
||||
#bzip2
|
||||
cd bzip2-1.0.6
|
||||
make -j $THREADS
|
||||
cd ..
|
||||
mv bzip2-1.0.6/libbz2.a .
|
||||
|
||||
# magic
|
||||
cd libmagic
|
||||
./autogen.sh
|
||||
./configure --enable-static --disable-shared
|
||||
make -j $THREADS
|
||||
cd ..
|
||||
mv libmagic/src/.libs/libmagic.a .
|
||||
|
||||
# tesseract
|
||||
cd tesseract
|
||||
mkdir build
|
||||
cd build
|
||||
cmake -DSTATIC=on -DBUILD_TRAINING_TOOLS=off -DBUILD_TESTS=off -DCMAKE_BUILD_TYPE=Release \
|
||||
-DCMAKE_CXX_FLAGS="-fPIC" -DAUTO_OPTIMIZE=off ..
|
||||
make -j $THREADS
|
||||
cd ../..
|
||||
mv tesseract/build/libtesseract.a .
|
||||
|
||||
# leptonica
|
||||
cd leptonica
|
||||
./autogen.sh
|
||||
CFLAGS="-fPIC" ./configure --without-zlib --without-jpeg --without-giflib \
|
||||
--without-giflib --without-libwebp --without-libwebpmux --without-libopenjpeg \
|
||||
--enable-static --disable-shared
|
||||
make -j $THREADS
|
||||
cd ..
|
||||
mv leptonica/src/.libs/liblept.a .
|
||||
|
||||
# tiff
|
||||
cd libtiff
|
||||
./autogen.sh
|
||||
CFLAGS="-fPIC" CXXFLAGS="-fPIC" CXX_FLAGS="-fPIC" ./configure --enable-static --disable-shared --disable-lzw --disable-jpeg --disable-webp \
|
||||
--disable-lzma --disable-zstd --disable-jbig
|
||||
make -j $THREADS
|
||||
cd ..
|
||||
mv libtiff/libtiff/.libs/libtiff.a .
|
||||
|
||||
# png
|
||||
cd libpng
|
||||
CFLAGS="-fPIC" ./configure --enable-static --disable-shared
|
||||
make -j $THREADS
|
||||
cd ..
|
||||
mv libpng/.libs/libpng16.a .
|
||||
|
||||
# openssl...
|
||||
git clone --depth 1 -b OpenSSL_1_1_0-stable https://github.com/openssl/openssl
|
||||
cd openssl
|
||||
./config --prefix=$(pwd)/../ssl
|
||||
make depend
|
||||
make -j $THREADS
|
||||
make install
|
||||
cd ..
|
||||
mv ./openssl/libcrypto.a ./openssl/libssl.a .
|
||||
|
||||
# curl
|
||||
wget -nc https://curl.haxx.se/download/curl-7.68.0.tar.gz
|
||||
tar -xzf curl-7.68.0.tar.gz
|
||||
cd curl-7.68.0
|
||||
./configure --disable-ldap --disable-ldaps --without-librtmp --disable-rtsp --disable-crypto-auth \
|
||||
--disable-smtp --without-libidn2 --without-nghttp2 --without-brotli --enable-static --disable-shared \
|
||||
--without-libpsl --with-ssl=$(pwd)/../ssl
|
||||
make -j $THREADS
|
||||
cd ..
|
||||
mv curl-7.68.0/lib/.libs/libcurl.a .
|
||||
@@ -14,4 +14,5 @@ def clean(filepath):
|
||||
for file in files:
|
||||
with open(file, "r") as f:
|
||||
data = json.dumps(json.load(f), separators=(",", ":")).encode()
|
||||
data += b'\0'
|
||||
print("char %s[%d] = {%s};" % (clean(file), len(data), ",".join(str(int(b)) for b in data)))
|
||||
|
||||
@@ -2,14 +2,18 @@ application/arj, arj
|
||||
application/base64, mme
|
||||
application/binhex, hqx
|
||||
application/book, boo|book
|
||||
application/CDFV2-corrupt,
|
||||
application/CDFV2, sdv
|
||||
application/clariscad, ccad
|
||||
application/commonground, dp
|
||||
application/csv,
|
||||
application/dicom, dcm
|
||||
application/drafting, drw
|
||||
application/epub+zip, epub
|
||||
application/freeloader, frl
|
||||
application/futuresplash, spl
|
||||
application/groupwise, vew
|
||||
application/gzip, gz
|
||||
application/gzip, gz|tgz
|
||||
application/hta, hta
|
||||
application/i-deas, unv
|
||||
application/iges, iges|igs
|
||||
@@ -17,7 +21,6 @@ application/inf, inf
|
||||
application/java-archive, jar
|
||||
application/java, class
|
||||
application/javascript,
|
||||
application/x-archive, a
|
||||
application/json, json
|
||||
application/marc, mrc
|
||||
application/mbedlet, mbd
|
||||
@@ -27,7 +30,9 @@ application/msword, doc|dot|w6w|wiz|word
|
||||
application/netmc, mcp
|
||||
application/octet-stream, bin|dump|gpg
|
||||
application/oda, oda
|
||||
application/ogg, ogv
|
||||
application/pdf, pdf
|
||||
application/pgp-keys,
|
||||
application/pgp-signature, pgp
|
||||
application/pkcs7-signature, p7s
|
||||
application/pkix-cert, cer|crt
|
||||
@@ -43,6 +48,10 @@ application/vda, vda
|
||||
application/vnd.fdf, fdf
|
||||
application/vnd.font-fontforge-sfd, sfd
|
||||
application/vnd.hp-hpgl, hgl|hpg|hpgl
|
||||
application/vnd.iccprofile, icm
|
||||
application/vnd.iccprofile, icm
|
||||
application/vnd.lotus-1-2-3,
|
||||
application/vnd.ms-cab-compressed, cab
|
||||
application/vnd.ms-excel, xlb|xlc|xll|xlm|xls|xlw
|
||||
application/vnd.ms-fontobject, eot
|
||||
application/vnd.ms-opentype, otf
|
||||
@@ -54,45 +63,75 @@ application/vnd.ms-project, mpp
|
||||
application/vnd.oasis.opendocument.base, odb
|
||||
application/vnd.oasis.opendocument.formula, odf
|
||||
application/vnd.oasis.opendocument.graphics, odg
|
||||
application/vnd.oasis.opendocument.presentation, odp
|
||||
application/vnd.oasis.opendocument.spreadsheet, ods
|
||||
application/vnd.oasis.opendocument.text, odt
|
||||
application/vnd.openxmlformats-officedocument.presentationml.presentation, pptx
|
||||
application/vnd.openxmlformats-officedocument.spreadsheetml.sheet, xlsx
|
||||
application/vnd.openxmlformats-officedocument.wordprocessingml.document, docx
|
||||
application/vnd.symbian.install,
|
||||
application/vnd.tcpdump.pcap, pcap
|
||||
application/vnd.wap.wmlc, wmlc
|
||||
application/vnd.wap.wmlscriptc, wmlsc
|
||||
application/vnd.xara, web
|
||||
application/vocaltec-media-desc, vmd
|
||||
application/vocaltec-media-file, vmf
|
||||
application/warc, warc
|
||||
application/winhelp, hlp
|
||||
application/wordperfect6.0, w60
|
||||
application/wordperfect6.1, w61
|
||||
application/wordperfect, wp|wp5|wp6|wpd
|
||||
application/x-123, wk1
|
||||
application/x-7z-compressed, 7z
|
||||
application/x-aim, aim
|
||||
application/x-apple-diskimage,
|
||||
application/x-arc,
|
||||
application/x-archive, a
|
||||
application/x-atari-7800-rom, a78
|
||||
application/x-authorware-bin, aab
|
||||
application/x-authorware-map, aam
|
||||
application/x-authorware-seg, aas
|
||||
application/x-avira-qua,
|
||||
application/x-bcpio, bcpio
|
||||
application/x-bittorrent, torrent
|
||||
application/x-bsh, bsh
|
||||
application/x-bytecode.python, pyc
|
||||
application/x-bzip2, boz|bz2
|
||||
application/x-bzip, bz
|
||||
application/x-cbr, cbr
|
||||
application/x-cbz, cbz
|
||||
application/x-cdlink, vcd
|
||||
application/x-chat, cha|chat
|
||||
application/x-chrome-extension,
|
||||
application/x-cocoa, cco
|
||||
application/x-conference, nsc
|
||||
application/x-coredump,
|
||||
application/x-cpio, cpio
|
||||
application/x-dbf, dbf
|
||||
application/x-dbt,
|
||||
application/x-debian-package, deb
|
||||
application/x-deepv, deepv
|
||||
application/x-director, dcr|dir|dxr
|
||||
application/x-director, dir|dxr
|
||||
application/x-dmp, dmp
|
||||
application/x-dosdriver,
|
||||
application/x-dosexec, dll
|
||||
application/x-dvi, dvi
|
||||
application/x-elc, elc
|
||||
application/x-empty,
|
||||
application/x-envoy, env|evy
|
||||
application/x-esrehber, es
|
||||
application/x-excel, xla|xld|xlk|xlt|xlv
|
||||
application/x-executable, exe
|
||||
application/x-font-gdos,
|
||||
application/x-font-pf2, pf2
|
||||
application/x-font-pfm, pfm
|
||||
application/x-font-sfn,
|
||||
application/x-font-ttf, ttf|ttc
|
||||
application/x-fptapplication/x-dbt,
|
||||
application/x-freelance, pre
|
||||
application/x-gamecube-rom,
|
||||
application/x-gdbm,
|
||||
application/x-gettext-translation,
|
||||
application/x-git,
|
||||
application/x-gsp, gsp
|
||||
application/x-gss, gss
|
||||
@@ -102,46 +141,68 @@ application/x-hdf, hdf
|
||||
application/x-helpfile, help
|
||||
application/x-httpd-imap, imap
|
||||
application/x-ima, ima
|
||||
application/x-innosetup,
|
||||
application/x-internett-signup, ins
|
||||
application/x-inventor, iv
|
||||
application/x-ip2, ip
|
||||
application/x-java-applet,
|
||||
application/x-java-commerce, jcm
|
||||
application/x-java-image,
|
||||
application/x-java-jmod, jmod
|
||||
application/x-java-keystore,
|
||||
application/x-kdelnk,
|
||||
application/x-koan, skd|skm|skp|skt
|
||||
application/x-latex, latex|ltx
|
||||
application/x-livescreen, ivy
|
||||
application/x-lotus, wq1
|
||||
application/x-lz4+json, jsonlz4
|
||||
application/x-lz4, lz4
|
||||
application/x-lz4, lz4
|
||||
application/x-lzh-compressed,
|
||||
application/x-lzh, lzh
|
||||
application/x-lzip, lz
|
||||
application/x-lzma, lzma
|
||||
application/x-lzop, lzo
|
||||
application/x-lzx, lzx
|
||||
application/x-mach-binary, jnilib|dylib
|
||||
application/x-mach-executable,
|
||||
application/x-magic-cap-package-1.0, mc$
|
||||
application/x-mathcad, mcd
|
||||
application/x-maxis-dbpf,
|
||||
application/x-meme, mm
|
||||
application/x-midi, midi
|
||||
application/x-mif, mif
|
||||
application/x-mix-transfer, nix
|
||||
application/xml, opf
|
||||
application/x-mobipocket-ebook, mobi
|
||||
application/vnd.amazon.mobi8-ebook, azw|azw3
|
||||
application/x-msaccess, accdb
|
||||
application/x-ms-compress-szdd, fon
|
||||
application/x-ms-pdb, pdb
|
||||
application/x-ms-reader, lit
|
||||
application/x-n64-rom, z64
|
||||
application/x-navi-animation, ani
|
||||
application/x-navidoc, nvd
|
||||
application/x-navimap, map
|
||||
application/x-navistyle, stl
|
||||
application/x-nes-rom, nes
|
||||
application/x-netcdf, cdf|nc
|
||||
application/x-newton-compatible-pkg, pkg
|
||||
application/x-nintendo-ds-rom,
|
||||
application/x-object, o
|
||||
application/x-omcdatamaker, omcd
|
||||
application/x-omc, omc
|
||||
application/x-omcregerator, omcr
|
||||
application/x-pagemaker, pm4|pm5
|
||||
application/x-pcl, pcl
|
||||
application/x-pgp-keyring,
|
||||
application/x-pixclscript, plx
|
||||
application/x-pkcs7-certreqresp, p7r
|
||||
application/x-pkcs7-signature, p7a
|
||||
application/x-project, mpc|mpt|mpv|mpx
|
||||
application/x-qpro, wb1
|
||||
application/x-rar, rar
|
||||
application/x-rpm, rpm
|
||||
application/x-sdp, sdp
|
||||
application/x-sea, sea
|
||||
application/x-seelogo, sl
|
||||
@@ -149,12 +210,17 @@ application/x-setupscript,
|
||||
application/x-sharedlib, so
|
||||
application/x-shar, shar
|
||||
application/x-shockwave-flash, swf
|
||||
application/x-snappy-framed,
|
||||
application/x-sprite, spr|sprite
|
||||
application/x-sqlite3,
|
||||
application/x-stargallery-thm,
|
||||
application/x-stuffit, sit
|
||||
application/x-sv4cpio, sv4cpio
|
||||
application/x-sv4crc, sv4crc
|
||||
application/x-tar, tar
|
||||
application/x-tbook, sbk|tbk
|
||||
application/x-terminfo,
|
||||
application/x-terminfo2,
|
||||
application/x-texinfo, texi|texinfo
|
||||
application/x-tex-tfm, tfm
|
||||
application/x-ustar, ustar
|
||||
@@ -163,16 +229,22 @@ application/x-vnd.audioexplosion.mzz, mzz
|
||||
application/x-vnd.ls-xpix, xpix
|
||||
application/x-vrml, vrml
|
||||
application/x-wais-source, src|wsrc
|
||||
application/x-wine-extension-ini,
|
||||
application/x-wintalk, wtk
|
||||
application/x-world, svr
|
||||
application/x-wri, wri
|
||||
application/x-x509-ca-cert, der
|
||||
application/x-xz, xz
|
||||
application/x-zip,
|
||||
application/x-zstd, zst
|
||||
application/zip, zip
|
||||
application/zlib, z
|
||||
!audio/basic, au
|
||||
audio/it, it
|
||||
audio/make, funk|my|pfunk
|
||||
audio/midi, kar
|
||||
audio/mid, rmi
|
||||
audio/mp4, m4b
|
||||
audio/mpeg, m2a|mpa
|
||||
audio/ogg, ogg
|
||||
audio/s3m, s3m
|
||||
@@ -180,7 +252,10 @@ audio/tsp-audio, tsi
|
||||
audio/tsplayer, tsp
|
||||
audio/vnd.qcelp, qcp
|
||||
audio/voxware, vox
|
||||
audio/x-aiff, aiff|aif
|
||||
audio/x-flac, flac
|
||||
audio/x-gsm, gsd|gsm
|
||||
audio/x-hx-aac-adts,
|
||||
audio/x-jam, jam
|
||||
audio/x-liveaudio, lam
|
||||
audio/x-m4a, m4a
|
||||
@@ -194,17 +269,24 @@ audio/x-nspaudio, lma
|
||||
audio/x-pn-realaudio, ram|rm|rmm|rmp
|
||||
audio/x-psid, sid
|
||||
audio/x-realaudio, ra
|
||||
audio/x-s3m,
|
||||
audio/x-twinvq-plugin, vqe|vql
|
||||
audio/x-twinvq, vqf
|
||||
audio/x-voc, voc
|
||||
audio/x-wav, wav
|
||||
!audio/x-xbox360-executable, xex
|
||||
!audio/x-xbox-executable, xbe
|
||||
font/otf,
|
||||
font/sfnt,
|
||||
font/woff2, woff2
|
||||
font/woff, woff
|
||||
image/bmp,
|
||||
image/cmu-raster, rast
|
||||
image/fif, fif
|
||||
image/florian, flo|turbot
|
||||
image/g3fax, g3
|
||||
image/gif, gif
|
||||
image/heic, heic
|
||||
image/ief, ief|iefs
|
||||
image/jpeg, jfif|jfif-tbnl|jpe|jpeg|jpg
|
||||
image/jutvision, jut
|
||||
@@ -213,6 +295,9 @@ image/pict, pic|pict
|
||||
image/png, png|x-png
|
||||
!image/svg, svg
|
||||
!image/svg+xml,
|
||||
image/tiff,
|
||||
!image/vnd.adobe.photoshop, psd
|
||||
!image/vnd.djvu, djvu
|
||||
image/vnd.fpx, fpx
|
||||
image/vnd.microsoft.icon,
|
||||
image/vnd.rn-realflash, rf
|
||||
@@ -220,9 +305,15 @@ image/vnd.rn-realpix, rp
|
||||
image/vnd.wap.wbmp, wbmp
|
||||
image/vnd.xiff, xif
|
||||
image/webp, webp
|
||||
image/wmf,
|
||||
image/x-3ds, 3ds
|
||||
image/x-award-bioslogo,
|
||||
image/x-cmu-raster, ras
|
||||
image/x-cur, tga
|
||||
image/x-dwg, dwg|dxf|svf
|
||||
image/x-eps,
|
||||
image/x-exr, exr
|
||||
image/x-gem,
|
||||
image/x-icns,
|
||||
!image/x-icon, ico
|
||||
image/x-jg, art
|
||||
@@ -236,32 +327,31 @@ image/x-portable-graymap, pgm
|
||||
image/x-portable-pixmap, ppm
|
||||
image/x-quicktime, qif|qti|qtif
|
||||
image/x-rgb, rgb
|
||||
image/x-tga,
|
||||
image/x-tiff, tif|tiff
|
||||
image/tiff,
|
||||
image/x-win-bitmap,
|
||||
!image/x-xcf, xcf
|
||||
!image/x-xpixmap, xpm
|
||||
image/x-xwindowdump, xwd
|
||||
message/news,
|
||||
message/rfc822, mht|mhtml|mime
|
||||
model/vnd.dwf, dwf
|
||||
model/vnd.gdl, gdl
|
||||
model/vnd.gs.gdl, gdsl
|
||||
model/vrml, wrz
|
||||
model/x-pov, pov
|
||||
text/asp, asp
|
||||
text/css, css
|
||||
text/x-sass, sass
|
||||
text/x-scss, scss
|
||||
text/html, acgi|htm|html|htmls|htx|shtml
|
||||
text/javascript, js
|
||||
text/mcf, mcf
|
||||
text/pascal, pas
|
||||
text/plain, com|cmd|conf|def|g|idc|list|lst|mar|sdml|text|txt|md|groovy|license|properties|desktop|ini|rst|cmake|ipynb|readme|less|lo|go|yml|d|cs|hpp|srt|nfo|sfv|m3u|csv|eml
|
||||
text/PGP,
|
||||
text/plain, com|cmd|conf|def|g|idc|list|lst|mar|sdml|text|txt|md|groovy|license|properties|desktop|ini|rst|cmake|ipynb|readme|less|lo|go|yml|d|cs|hpp|srt|nfo|sfv|m3u|csv|eml|make|log|markdown|yaml
|
||||
application/vnd.coffeescript, coffee
|
||||
text/richtext, rt|rtf|rtx
|
||||
text/rtf,
|
||||
text/scriplet, wsc
|
||||
text/x-awk, awk
|
||||
!video/x-jng, jng
|
||||
video/x-mng, mng
|
||||
image/x-cur, tga
|
||||
image/x-xwindowdump, xwd
|
||||
!image/vnd.adobe.photoshop, psd
|
||||
text/tab-separated-values, tsv
|
||||
text/troff, man|me|ms|roff|t|tr
|
||||
text/uri-list, uji|unis|uri|uris
|
||||
@@ -273,6 +363,7 @@ text/webviewhtml, htt
|
||||
text/x-Algol68,
|
||||
text/x-asm, asm|s
|
||||
text/x-audiosoft-intra, aip
|
||||
text/x-awk, awk
|
||||
text/x-bcpl,
|
||||
text/x-c, c|cc|h
|
||||
text/x-c++, cpp|cxx|c++
|
||||
@@ -287,23 +378,31 @@ text/x-makefile, am|mak
|
||||
text/xml, xml|pom|iml|plist
|
||||
text/x-m, m
|
||||
text/x-msdos-batch, bat
|
||||
text/x-ms-regedit, reg
|
||||
text/x-objective-c,
|
||||
text/x-pascal, p
|
||||
text/x-perl, pl
|
||||
text/x-php, php
|
||||
text/x-po, po
|
||||
text/x-python, py
|
||||
text/x-ruby, rb
|
||||
text/x-sass, sass
|
||||
text/x-scss, scss
|
||||
text/x-server-parsed-html, ssi
|
||||
text/x-setext, etx
|
||||
text/x-sgml, sgm|sgml
|
||||
text/x-shellscript, sh
|
||||
text/x-speech, talk
|
||||
text/x-tcl,
|
||||
text/x-tex, tex
|
||||
text/x-uil, uil
|
||||
text/x-uuencode, uue
|
||||
text/x-vcalendar, vcs
|
||||
text/x-vcard, vcf
|
||||
video/animaflex, afl
|
||||
video/avi, avi
|
||||
video/avs-video, avs
|
||||
video/MP2T,
|
||||
video/mp4, mp4
|
||||
video/mpeg, m1v|m2v|mpe|mpeg|mpg
|
||||
video/quicktime, moov|mov|qt
|
||||
@@ -318,101 +417,35 @@ video/x-atomic3d-feature, fmf
|
||||
video/x-dl, dl
|
||||
video/x-dv, dif|dv
|
||||
video/x-fli, fli
|
||||
video/x-flv, flv
|
||||
video/x-isvideo, isu
|
||||
!video/x-jng, jng
|
||||
video/x-m4v, m4v
|
||||
video/x-matroska, mkv
|
||||
video/x-mng, mng
|
||||
video/x-motion-jpeg, mjpg
|
||||
video/x-ms-asf, asf|asx|wmv
|
||||
video/x-msvideo, divx
|
||||
video/x-qtc, qtc
|
||||
video/x-sgi-movie, movie|mv
|
||||
application/x-7z-compressed, 7z
|
||||
application/vnd.openxmlformats-officedocument.wordprocessingml.document, docx
|
||||
text/x-po, po
|
||||
application/x-rpm, rpm
|
||||
application/x-debian-package, deb
|
||||
application/vnd.iccprofile, icm
|
||||
application/dicom, dcm
|
||||
image/x-exr, exr
|
||||
application/vnd.iccprofile, icm
|
||||
video/x-matroska, mkv
|
||||
application/x-empty,
|
||||
model/vnd.gdl, gdl
|
||||
model/vnd.gs.gdl, gdsl
|
||||
font/woff, woff
|
||||
font/woff2, woff2
|
||||
application/epub+zip, epub
|
||||
application/x-mobipocket-ebook, mobi
|
||||
audio/x-flac, flac
|
||||
application/x-rar, rar
|
||||
video/x-msvideo, divx
|
||||
video/x-flv, flv
|
||||
application/x-kdelnk,
|
||||
text/x-tcl,
|
||||
application/ogg, ogv
|
||||
application/vnd.openxmlformats-officedocument.spreadsheetml.sheet, xlsx
|
||||
application/vnd.ms-cab-compressed, cab
|
||||
audio/mp4, m4b
|
||||
!image/vnd.djvu, djvu
|
||||
application/x-ms-reader, lit
|
||||
application/CDFV2-corrupt,
|
||||
text/x-vcard, vcf
|
||||
application/x-innosetup,
|
||||
application/winhelp, hlp
|
||||
image/x-tga,
|
||||
application/x-wine-extension-ini,
|
||||
application/x-cbz, cbz
|
||||
application/x-cbr, cbr
|
||||
application/x-ms-compress-szdd, fon
|
||||
application/x-atari-7800-rom, a78
|
||||
application/x-nes-rom, nes
|
||||
application/x-font-pfm, pfm
|
||||
application/x-gettext-translation,
|
||||
image/wmf,
|
||||
application/pgp-keys,
|
||||
image/x-3ds, 3ds
|
||||
application/x-lz4, lz4
|
||||
application/vnd.openxmlformats-officedocument.presentationml.presentation, pptx
|
||||
application/vnd.oasis.opendocument.presentation, odp
|
||||
application/x-msaccess, accdb
|
||||
application/vnd.oasis.opendocument.spreadsheet, ods
|
||||
audio/x-aiff, aiff|aif
|
||||
text/x-ms-regedit, reg
|
||||
application/x-gamecube-rom,
|
||||
application/x-nintendo-ds-rom,
|
||||
text/x-objective-c,
|
||||
application/x-font-gdos,
|
||||
application/x-apple-diskimage,
|
||||
application/x-zstd, zst
|
||||
video/x-m4v, m4v
|
||||
message/news,
|
||||
application/vnd.symbian.install,
|
||||
application/x-lzh-compressed,
|
||||
application/x-dosdriver,
|
||||
application/vnd.tcpdump.pcap, pcap
|
||||
x-epoc/x-sisx-app,
|
||||
application/x-avira-qua,
|
||||
video/MP2T,
|
||||
application/x-snappy-framed,
|
||||
application/x-lz4+json, jsonlz4
|
||||
application/x-dmp, dmp
|
||||
application/zlib, z
|
||||
application/x-pgp-keyring,
|
||||
application/x-gdbm,
|
||||
application/x-font-pf2, pf2
|
||||
application/x-zip,
|
||||
application/x-coredump,
|
||||
application/x-java-jmod, jmod
|
||||
application/x-terminfo,
|
||||
application/x-terminfo2,
|
||||
application/x-arc,
|
||||
application/vnd.lotus-1-2-3,
|
||||
image/x-win-bitmap,
|
||||
application/x-maxis-dbpf,
|
||||
text/PGP,
|
||||
audio/x-hx-aac-adts,
|
||||
application/x-chrome-extension,
|
||||
image/heic, heic
|
||||
image/x-gem,
|
||||
application/x-lzma, lzma
|
||||
application/warc, warc
|
||||
application/x-lz4, lz4
|
||||
application/x-lzip, lz
|
||||
application/x-lzop, lzo
|
||||
application/x-zstd-dictionary,
|
||||
application/vnd.ms-outlook, msg
|
||||
image/x-olympus-orf, orf
|
||||
image/x-nikon-nef, nef
|
||||
image/x-fuji-raf, raf
|
||||
image/x-panasonic-raw, rw2|raw
|
||||
image/x-adobe-dng, dng
|
||||
image/x-canon-cr2, cr2
|
||||
image/x-canon-crw, crw
|
||||
image/x-dcraw,
|
||||
image/x-kodak-dcr, dcr
|
||||
image/x-kodak-k25, k25
|
||||
image/x-kodak-kdc, kdc
|
||||
image/x-minolta-mrw, mrw
|
||||
image/x-pentax-pef, pef
|
||||
image/x-sigma-x3f, xf3
|
||||
image/x-sony-arw, arw
|
||||
image/x-sony-sr2, sr2
|
||||
image/x-sony-srf, srf
|
||||
image/x-epson-erf, erf
|
||||
|
@@ -18,7 +18,6 @@ major_mime = {
|
||||
|
||||
pdf = (
|
||||
"application/pdf",
|
||||
"application/x-cbz",
|
||||
"application/epub+zip",
|
||||
"application/vnd.ms-xpsdocument",
|
||||
)
|
||||
@@ -62,6 +61,40 @@ doc = (
|
||||
"application/vnd.openxmlformats-officedocument.presentationml.presentation"
|
||||
)
|
||||
|
||||
mobi = (
|
||||
"application/x-mobipocket-ebook",
|
||||
"application/vnd.amazon.mobi8-ebook"
|
||||
)
|
||||
|
||||
markup = (
|
||||
"text/xml",
|
||||
"text/html",
|
||||
"text/x-sgml"
|
||||
)
|
||||
|
||||
raw = (
|
||||
"image/x-olympus-orf",
|
||||
"image/x-nikon-nef",
|
||||
"image/x-fuji-raf",
|
||||
"image/x-panasonic-raw",
|
||||
"image/x-adobe-dng",
|
||||
"image/x-canon-cr2",
|
||||
"image/x-canon-crw",
|
||||
"image/x-dcraw",
|
||||
"image/x-kodak-dcr",
|
||||
"image/x-kodak-k25",
|
||||
"image/x-kodak-kdc",
|
||||
"image/x-minolta-mrw",
|
||||
"image/x-pentax-pef",
|
||||
"image/x-sigma-x3f",
|
||||
"image/x-sony-arw",
|
||||
"image/x-sony-sr2",
|
||||
"image/x-sony-srf",
|
||||
"image/x-minolta-mrw",
|
||||
"image/x-pentax-pef",
|
||||
"image/x-epson-erf",
|
||||
)
|
||||
|
||||
cnt = 1
|
||||
|
||||
|
||||
@@ -82,6 +115,12 @@ def mime_id(mime):
|
||||
mime_id += " | 0x08000000"
|
||||
elif mime in doc:
|
||||
mime_id += " | 0x04000000"
|
||||
elif mime in mobi:
|
||||
mime_id += " | 0x02000000"
|
||||
elif mime in markup:
|
||||
mime_id += " | 0x01000000"
|
||||
elif mime in raw:
|
||||
mime_id += " | 0x00800000"
|
||||
elif mime == "application/x-empty":
|
||||
return "1"
|
||||
return mime_id
|
||||
@@ -91,7 +130,7 @@ def clean(t):
|
||||
return t.replace("/", "_").replace(".", "_").replace("+", "_").replace("-", "_")
|
||||
|
||||
|
||||
with open("mime.csv") as f:
|
||||
with open("scripts/mime.csv") as f:
|
||||
for l in f:
|
||||
mime, ext_list = l.split(",")
|
||||
if l.startswith("!"):
|
||||
@@ -103,7 +142,7 @@ with open("mime.csv") as f:
|
||||
print("// **Generated by mime.py**")
|
||||
print("#ifndef MIME_GENERATED_C")
|
||||
print("#define MIME_GENERATED_C")
|
||||
print("#include <glib-2.0/glib.h>\n")
|
||||
print("#include <glib.h>\n")
|
||||
print("#include <stdlib.h>\n")
|
||||
# Enum
|
||||
print("enum mime {")
|
||||
|
||||
@@ -1,10 +1,12 @@
|
||||
files = [
|
||||
"web/css/bundle.css",
|
||||
"web/css/bundle_dark.css",
|
||||
"web/js/bundle.js",
|
||||
"web/img/sprite-skin-flat.png",
|
||||
"web/img/sprite-skin-flat-dark.png",
|
||||
"web/search.html",
|
||||
"src/static/css/bundle.css",
|
||||
"src/static/css/bundle_dark.css",
|
||||
"src/static/js/bundle.js",
|
||||
"src/static/js/search.js",
|
||||
"src/static/img/sprite-skin-flat.png",
|
||||
"src/static/img/sprite-skin-flat-dark.png",
|
||||
"src/static/search.html",
|
||||
"src/static/stats.html",
|
||||
]
|
||||
|
||||
|
||||
|
||||
230
src/cli.c
230
src/cli.c
@@ -1,6 +1,5 @@
|
||||
#include "cli.h"
|
||||
#include "ctx.h"
|
||||
|
||||
#include <tesseract/capi.h>
|
||||
|
||||
#define DEFAULT_OUTPUT "index.sist2/"
|
||||
@@ -10,12 +9,15 @@
|
||||
#define DEFAULT_REWRITE_URL ""
|
||||
|
||||
#define DEFAULT_ES_URL "http://localhost:9200"
|
||||
#define DEFAULT_ES_INDEX "sist2"
|
||||
#define DEFAULT_BATCH_SIZE 100
|
||||
|
||||
#define DEFAULT_BIND_ADDR "localhost"
|
||||
#define DEFAULT_PORT "4090"
|
||||
#define DEFAULT_LISTEN_ADDRESS "localhost:4090"
|
||||
#define DEFAULT_TREEMAP_THRESHOLD 0.0005
|
||||
|
||||
const char* TESS_DATAPATHS[] = {
|
||||
#define DEFAULT_MAX_MEM_BUFFER 2000
|
||||
|
||||
const char *TESS_DATAPATHS[] = {
|
||||
"/usr/share/tessdata/",
|
||||
"/usr/share/tesseract-ocr/tessdata/",
|
||||
"./",
|
||||
@@ -31,10 +33,18 @@ scan_args_t *scan_args_create() {
|
||||
return args;
|
||||
}
|
||||
|
||||
exec_args_t *exec_args_create() {
|
||||
exec_args_t *args = calloc(sizeof(exec_args_t), 1);
|
||||
return args;
|
||||
}
|
||||
|
||||
void scan_args_destroy(scan_args_t *args) {
|
||||
if (args->name != NULL) {
|
||||
free(args->name);
|
||||
}
|
||||
if (args->incremental != NULL) {
|
||||
free(args->incremental);
|
||||
}
|
||||
if (args->path != NULL) {
|
||||
free(args->path);
|
||||
}
|
||||
@@ -54,6 +64,10 @@ void web_args_destroy(web_args_t *args) {
|
||||
free(args);
|
||||
}
|
||||
|
||||
void exec_args_destroy(exec_args_t *args) {
|
||||
free(args);
|
||||
}
|
||||
|
||||
int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
|
||||
if (argc < 2) {
|
||||
fprintf(stderr, "Required positional argument: PATH.\n");
|
||||
@@ -69,10 +83,10 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
|
||||
}
|
||||
|
||||
if (args->incremental != NULL) {
|
||||
abs_path = abspath(args->incremental);
|
||||
args->incremental = abspath(args->incremental);
|
||||
if (abs_path == NULL) {
|
||||
fprintf(stderr, "File not found: %s\n", args->incremental);
|
||||
return 1;
|
||||
sist_log("main.c", SIST_WARNING, "Could not open original index! Disabled incremental scan feature.");
|
||||
args->incremental = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -114,7 +128,7 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (args->depth < 0) {
|
||||
if (args->depth <= 0) {
|
||||
args->depth = G_MAXINT32;
|
||||
} else {
|
||||
args->depth += 1;
|
||||
@@ -122,6 +136,10 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
|
||||
|
||||
if (args->name == NULL) {
|
||||
args->name = g_path_get_basename(args->output);
|
||||
} else {
|
||||
char* tmp = malloc(strlen(args->name) + 1);
|
||||
strcpy(tmp, args->name);
|
||||
args->name = tmp;
|
||||
}
|
||||
|
||||
if (args->rewrite_url == NULL) {
|
||||
@@ -146,7 +164,7 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
|
||||
|
||||
char filename[128];
|
||||
sprintf(filename, "%s.traineddata", args->tesseract_lang);
|
||||
const char * path = find_file_in_paths(TESS_DATAPATHS, filename);
|
||||
const char *path = find_file_in_paths(TESS_DATAPATHS, filename);
|
||||
if (path == NULL) {
|
||||
LOG_FATAL("cli.c", "Could not find tesseract language file!");
|
||||
}
|
||||
@@ -162,6 +180,36 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
|
||||
args->tesseract_path = path;
|
||||
}
|
||||
|
||||
if (args->exclude_regex != NULL) {
|
||||
const char *error;
|
||||
int error_offset;
|
||||
|
||||
pcre *re = pcre_compile(args->exclude_regex, 0, &error, &error_offset, 0);
|
||||
if (error != NULL) {
|
||||
LOG_FATALF("cli.c", "pcre_compile returned error: %s (offset:%d)", error, error_offset)
|
||||
}
|
||||
|
||||
pcre_extra *re_extra = pcre_study(re, 0, &error);
|
||||
if (error != NULL) {
|
||||
LOG_FATALF("cli.c", "pcre_study returned error: %s", error)
|
||||
}
|
||||
|
||||
ScanCtx.exclude = re;
|
||||
ScanCtx.exclude_extra = re_extra;
|
||||
} else {
|
||||
ScanCtx.exclude = NULL;
|
||||
}
|
||||
|
||||
if (args->treemap_threshold_str == 0) {
|
||||
args->treemap_threshold = DEFAULT_TREEMAP_THRESHOLD;
|
||||
} else {
|
||||
args->treemap_threshold = atof(args->treemap_threshold_str);
|
||||
}
|
||||
|
||||
if (args->max_memory_buffer == 0) {
|
||||
args->max_memory_buffer = DEFAULT_MAX_MEM_BUFFER;
|
||||
}
|
||||
|
||||
LOG_DEBUGF("cli.c", "arg quality=%f", args->quality)
|
||||
LOG_DEBUGF("cli.c", "arg size=%d", args->size)
|
||||
LOG_DEBUGF("cli.c", "arg content_size=%d", args->content_size)
|
||||
@@ -175,6 +223,38 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
|
||||
LOG_DEBUGF("cli.c", "arg archive=%s", args->archive)
|
||||
LOG_DEBUGF("cli.c", "arg tesseract_lang=%s", args->tesseract_lang)
|
||||
LOG_DEBUGF("cli.c", "arg tesseract_path=%s", args->tesseract_path)
|
||||
LOG_DEBUGF("cli.c", "arg exclude=%s", args->exclude_regex)
|
||||
LOG_DEBUGF("cli.c", "arg fast=%d", args->fast)
|
||||
LOG_DEBUGF("cli.c", "arg treemap_threshold=%f", args->treemap_threshold)
|
||||
LOG_DEBUGF("cli.c", "arg max_memory_buffer=%d", args->max_memory_buffer)
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int load_script(const char *script_path, char **dst) {
|
||||
struct stat info;
|
||||
int res = stat(script_path, &info);
|
||||
|
||||
if (res == -1) {
|
||||
fprintf(stderr, "Error opening script file '%s': %s\n", script_path, strerror(errno));
|
||||
return 1;
|
||||
}
|
||||
|
||||
int fd = open(script_path, O_RDONLY);
|
||||
if (fd == -1) {
|
||||
fprintf(stderr, "Error opening script file '%s': %s\n", script_path, strerror(errno));
|
||||
return 1;
|
||||
}
|
||||
|
||||
*dst = malloc(info.st_size + 1);
|
||||
res = read(fd, *dst, info.st_size);
|
||||
if (res < 0) {
|
||||
fprintf(stderr, "Error reading script file '%s': %s\n", script_path, strerror(errno));
|
||||
return 1;
|
||||
}
|
||||
|
||||
*(*dst + info.st_size) = '\0';
|
||||
close(fd);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -188,6 +268,13 @@ int index_args_validate(index_args_t *args, int argc, const char **argv) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (args->threads == 0) {
|
||||
args->threads = 1;
|
||||
} else if (args->threads < 0) {
|
||||
fprintf(stderr, "Invalid threads: %d\n", args->threads);
|
||||
return 1;
|
||||
}
|
||||
|
||||
char *index_path = abspath(argv[1]);
|
||||
if (index_path == NULL) {
|
||||
fprintf(stderr, "File not found: %s\n", argv[1]);
|
||||
@@ -201,30 +288,14 @@ int index_args_validate(index_args_t *args, int argc, const char **argv) {
|
||||
args->es_url = DEFAULT_ES_URL;
|
||||
}
|
||||
|
||||
if (args->es_index == NULL) {
|
||||
args->es_index = DEFAULT_ES_INDEX;
|
||||
}
|
||||
|
||||
if (args->script_path != NULL) {
|
||||
struct stat info;
|
||||
int res = stat(args->script_path, &info);
|
||||
|
||||
if (res == -1) {
|
||||
fprintf(stderr, "Error opening script file '%s': %s\n", args->script_path, strerror(errno));
|
||||
if (load_script(args->script_path, &args->script) != 0) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
int fd = open(args->script_path, O_RDONLY);
|
||||
if (fd == -1) {
|
||||
fprintf(stderr, "Error opening script file '%s': %s\n", args->script_path, strerror(errno));
|
||||
return 1;
|
||||
}
|
||||
|
||||
args->script = malloc(info.st_size + 1);
|
||||
res = read(fd, args->script, info.st_size);
|
||||
if (res == -1) {
|
||||
fprintf(stderr, "Error reading script file '%s': %s\n", args->script_path, strerror(errno));
|
||||
return 1;
|
||||
}
|
||||
|
||||
*(args->script + info.st_size) = '\0';
|
||||
close(fd);
|
||||
}
|
||||
|
||||
if (args->batch_size == 0) {
|
||||
@@ -232,8 +303,10 @@ int index_args_validate(index_args_t *args, int argc, const char **argv) {
|
||||
}
|
||||
|
||||
LOG_DEBUGF("cli.c", "arg es_url=%s", args->es_url)
|
||||
LOG_DEBUGF("cli.c", "arg es_index=%s", args->es_index)
|
||||
LOG_DEBUGF("cli.c", "arg index_path=%s", args->index_path)
|
||||
LOG_DEBUGF("cli.c", "arg script_path=%s", args->script_path)
|
||||
LOG_DEBUGF("cli.c", "arg async_script=%s", args->async_script)
|
||||
LOG_DEBUGF("cli.c", "arg script=%s", args->script)
|
||||
LOG_DEBUGF("cli.c", "arg print=%d", args->print)
|
||||
LOG_DEBUGF("cli.c", "arg batch_size=%d", args->batch_size)
|
||||
@@ -255,18 +328,57 @@ int web_args_validate(web_args_t *args, int argc, const char **argv) {
|
||||
args->es_url = DEFAULT_ES_URL;
|
||||
}
|
||||
|
||||
if (args->bind == NULL) {
|
||||
args->bind = DEFAULT_BIND_ADDR;
|
||||
if (args->listen_address == NULL) {
|
||||
args->listen_address = DEFAULT_LISTEN_ADDRESS;
|
||||
}
|
||||
|
||||
if (args->port == NULL) {
|
||||
args->port = DEFAULT_PORT;
|
||||
if (args->es_index == NULL) {
|
||||
args->es_index = DEFAULT_ES_INDEX;
|
||||
}
|
||||
|
||||
if (args->credentials != NULL) {
|
||||
args->b64credentials = onion_base64_encode(args->credentials, (int) strlen(args->credentials));
|
||||
//Remove trailing newline
|
||||
*(args->b64credentials + strlen(args->b64credentials) - 1) = '\0';
|
||||
char *ptr = strstr(args->credentials, ":");
|
||||
if (ptr == NULL) {
|
||||
fprintf(stderr, "Invalid --auth format, see usage\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
strncpy(args->auth_user, args->credentials, (ptr - args->credentials));
|
||||
strcpy(args->auth_pass, ptr + 1);
|
||||
|
||||
if (strlen(args->auth_user) == 0) {
|
||||
fprintf(stderr, "--auth username must be at least one character long");
|
||||
return 1;
|
||||
}
|
||||
|
||||
args->auth_enabled = TRUE;
|
||||
} else {
|
||||
args->auth_enabled = FALSE;
|
||||
}
|
||||
|
||||
if (args->tag_credentials != NULL && args->credentials != NULL) {
|
||||
fprintf(stderr, "--auth and --tag-auth are mutually exclusive");
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (args->tag_credentials != NULL) {
|
||||
char *ptr = strstr(args->tag_credentials, ":");
|
||||
if (ptr == NULL) {
|
||||
fprintf(stderr, "Invalid --tag-auth format, see usage\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
strncpy(args->auth_user, args->tag_credentials, (ptr - args->tag_credentials));
|
||||
strcpy(args->auth_pass, ptr + 1);
|
||||
|
||||
if (strlen(args->auth_user) == 0) {
|
||||
fprintf(stderr, "--tag-auth username must be at least one character long");
|
||||
return 1;
|
||||
}
|
||||
|
||||
args->tag_auth_enabled = TRUE;
|
||||
} else {
|
||||
args->tag_auth_enabled = FALSE;
|
||||
}
|
||||
|
||||
args->index_count = argc - 1;
|
||||
@@ -281,10 +393,12 @@ int web_args_validate(web_args_t *args, int argc, const char **argv) {
|
||||
}
|
||||
|
||||
LOG_DEBUGF("cli.c", "arg es_url=%s", args->es_url)
|
||||
LOG_DEBUGF("cli.c", "arg bind=%s", args->bind)
|
||||
LOG_DEBUGF("cli.c", "arg port=%s", args->port)
|
||||
LOG_DEBUGF("cli.c", "arg es_index=%s", args->es_index)
|
||||
LOG_DEBUGF("cli.c", "arg listen=%s", args->listen_address)
|
||||
LOG_DEBUGF("cli.c", "arg credentials=%s", args->credentials)
|
||||
LOG_DEBUGF("cli.c", "arg b64credentials=%s", args->b64credentials)
|
||||
LOG_DEBUGF("cli.c", "arg tag_credentials=%s", args->tag_credentials)
|
||||
LOG_DEBUGF("cli.c", "arg auth_user=%s", args->auth_user)
|
||||
LOG_DEBUGF("cli.c", "arg auth_pass=%s", args->auth_pass)
|
||||
LOG_DEBUGF("cli.c", "arg index_count=%d", args->index_count)
|
||||
for (int i = 0; i < args->index_count; i++) {
|
||||
LOG_DEBUGF("cli.c", "arg indices[%d]=%s", i, args->indices[i])
|
||||
@@ -303,3 +417,39 @@ web_args_t *web_args_create() {
|
||||
return args;
|
||||
}
|
||||
|
||||
int exec_args_validate(exec_args_t *args, int argc, const char **argv) {
|
||||
|
||||
if (argc < 2) {
|
||||
fprintf(stderr, "Required positional argument: PATH.\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
char *index_path = abspath(argv[1]);
|
||||
if (index_path == NULL) {
|
||||
fprintf(stderr, "File not found: %s\n", argv[1]);
|
||||
return 1;
|
||||
} else {
|
||||
args->index_path = argv[1];
|
||||
free(index_path);
|
||||
}
|
||||
|
||||
if (args->es_url == NULL) {
|
||||
args->es_url = DEFAULT_ES_URL;
|
||||
}
|
||||
|
||||
if (args->es_index == NULL) {
|
||||
args->es_index = DEFAULT_ES_INDEX;
|
||||
}
|
||||
|
||||
if (args->script_path == NULL) {
|
||||
LOG_FATAL("cli.c", "--script-file argument is required");
|
||||
}
|
||||
|
||||
if (load_script(args->script_path, &args->script) != 0) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
LOG_DEBUGF("cli.c", "arg script_path=%s", args->script_path)
|
||||
LOG_DEBUGF("cli.c", "arg script=%s", args->script)
|
||||
return 0;
|
||||
}
|
||||
|
||||
40
src/cli.h
40
src/cli.h
@@ -3,6 +3,8 @@
|
||||
|
||||
#include "sist.h"
|
||||
|
||||
#include "libscan/arc/arc.h"
|
||||
|
||||
typedef struct scan_args {
|
||||
float quality;
|
||||
int size;
|
||||
@@ -18,39 +20,71 @@ typedef struct scan_args {
|
||||
archive_mode_t archive_mode;
|
||||
char *tesseract_lang;
|
||||
const char *tesseract_path;
|
||||
char *exclude_regex;
|
||||
int fast;
|
||||
const char* treemap_threshold_str;
|
||||
double treemap_threshold;
|
||||
int max_memory_buffer;
|
||||
} scan_args_t;
|
||||
|
||||
scan_args_t *scan_args_create();
|
||||
|
||||
void scan_args_destroy(scan_args_t *args);
|
||||
|
||||
int scan_args_validate(scan_args_t *args, int argc, const char **argv);
|
||||
|
||||
typedef struct index_args {
|
||||
char *es_url;
|
||||
char *es_index;
|
||||
const char *index_path;
|
||||
const char *script_path;
|
||||
char *script;
|
||||
int print;
|
||||
int batch_size;
|
||||
int async_script;
|
||||
int force_reset;
|
||||
int threads;
|
||||
} index_args_t;
|
||||
|
||||
typedef struct web_args {
|
||||
char *es_url;
|
||||
char *bind;
|
||||
char *port;
|
||||
char *es_index;
|
||||
char *listen_address;
|
||||
char *credentials;
|
||||
char *b64credentials;
|
||||
char *tag_credentials;
|
||||
char auth_user[256];
|
||||
char auth_pass[256];
|
||||
int auth_enabled;
|
||||
int tag_auth_enabled;
|
||||
int index_count;
|
||||
const char **indices;
|
||||
} web_args_t;
|
||||
|
||||
typedef struct exec_args {
|
||||
char *es_url;
|
||||
char *es_index;
|
||||
const char *index_path;
|
||||
const char *script_path;
|
||||
int async_script;
|
||||
char *script;
|
||||
} exec_args_t;
|
||||
|
||||
index_args_t *index_args_create();
|
||||
|
||||
void index_args_destroy(index_args_t *args);
|
||||
|
||||
web_args_t *web_args_create();
|
||||
|
||||
void web_args_destroy(web_args_t *args);
|
||||
|
||||
int index_args_validate(index_args_t *args, int argc, const char **argv);
|
||||
|
||||
int web_args_validate(web_args_t *args, int argc, const char **argv);
|
||||
|
||||
exec_args_t *exec_args_create();
|
||||
|
||||
void exec_args_destroy(exec_args_t *args);
|
||||
|
||||
int exec_args_validate(exec_args_t *args, int argc, const char **argv);
|
||||
|
||||
#endif
|
||||
|
||||
6
src/ctx.c
Normal file
6
src/ctx.c
Normal file
@@ -0,0 +1,6 @@
|
||||
#include "ctx.h"
|
||||
|
||||
ScanCtx_t ScanCtx;
|
||||
WebCtx_t WebCtx;
|
||||
IndexCtx_t IndexCtx;
|
||||
LogCtx_t LogCtx;
|
||||
70
src/ctx.h
70
src/ctx.h
@@ -2,8 +2,23 @@
|
||||
#define SIST2_CTX_H
|
||||
|
||||
#include "sist.h"
|
||||
#include "tpool.h"
|
||||
#include "libscan/scan.h"
|
||||
#include "libscan/arc/arc.h"
|
||||
#include "libscan/comic/comic.h"
|
||||
#include "libscan/ebook/ebook.h"
|
||||
#include "libscan/font/font.h"
|
||||
#include "libscan/media/media.h"
|
||||
#include "libscan/ooxml/ooxml.h"
|
||||
#include "libscan/text/text.h"
|
||||
#include "libscan/mobi/scan_mobi.h"
|
||||
#include "libscan/raw/raw.h"
|
||||
#include "src/io/store.h"
|
||||
|
||||
struct {
|
||||
#include <glib.h>
|
||||
#include <pcre.h>
|
||||
|
||||
typedef struct {
|
||||
struct index_t index;
|
||||
|
||||
GHashTable *mime_table;
|
||||
@@ -11,14 +26,8 @@ struct {
|
||||
|
||||
tpool_t *pool;
|
||||
|
||||
int tn_size;
|
||||
int threads;
|
||||
int content_size;
|
||||
float tn_qscale;
|
||||
int depth;
|
||||
archive_mode_t archive_mode;
|
||||
int verbose;
|
||||
int very_verbose;
|
||||
|
||||
size_t stat_tn_size;
|
||||
size_t stat_index_size;
|
||||
@@ -26,28 +35,51 @@ struct {
|
||||
GHashTable *original_table;
|
||||
GHashTable *copy_table;
|
||||
|
||||
pthread_mutex_t mupdf_mu;
|
||||
char * tesseract_lang;
|
||||
const char * tesseract_path;
|
||||
} ScanCtx;
|
||||
pcre *exclude;
|
||||
pcre_extra *exclude_extra;
|
||||
int fast;
|
||||
|
||||
struct {
|
||||
scan_arc_ctx_t arc_ctx;
|
||||
scan_comic_ctx_t comic_ctx;
|
||||
scan_ebook_ctx_t ebook_ctx;
|
||||
scan_font_ctx_t font_ctx;
|
||||
scan_media_ctx_t media_ctx;
|
||||
scan_ooxml_ctx_t ooxml_ctx;
|
||||
scan_text_ctx_t text_ctx;
|
||||
scan_mobi_ctx_t mobi_ctx;
|
||||
scan_raw_ctx_t raw_ctx;
|
||||
} ScanCtx_t;
|
||||
|
||||
typedef struct {
|
||||
int verbose;
|
||||
int very_verbose;
|
||||
int no_color;
|
||||
} LogCtx;
|
||||
} LogCtx_t;
|
||||
|
||||
struct {
|
||||
typedef struct {
|
||||
char *es_url;
|
||||
char *es_index;
|
||||
int batch_size;
|
||||
} IndexCtx;
|
||||
tpool_t *pool;
|
||||
store_t *tag_store;
|
||||
GHashTable *tags;
|
||||
} IndexCtx_t;
|
||||
|
||||
struct {
|
||||
typedef struct {
|
||||
char *es_url;
|
||||
char *es_index;
|
||||
int index_count;
|
||||
char *b64credentials;
|
||||
struct index_t indices[16];
|
||||
} WebCtx;
|
||||
char *auth_user;
|
||||
char *auth_pass;
|
||||
int auth_enabled;
|
||||
int tag_auth_enabled;
|
||||
struct index_t indices[64];
|
||||
} WebCtx_t;
|
||||
|
||||
extern ScanCtx_t ScanCtx;
|
||||
extern WebCtx_t WebCtx;
|
||||
extern IndexCtx_t IndexCtx;
|
||||
extern LogCtx_t LogCtx;
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
@@ -1,11 +1,7 @@
|
||||
#include "elastic.h"
|
||||
#include "src/ctx.h"
|
||||
|
||||
#include <stdlib.h>
|
||||
#include "web.h"
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <cJSON/cJSON.h>
|
||||
|
||||
#include "static_generated.c"
|
||||
|
||||
@@ -13,19 +9,33 @@
|
||||
typedef struct es_indexer {
|
||||
int queued;
|
||||
char *es_url;
|
||||
char *es_index;
|
||||
es_bulk_line_t *line_head;
|
||||
es_bulk_line_t *line_tail;
|
||||
} es_indexer_t;
|
||||
|
||||
|
||||
static es_indexer_t *Indexer;
|
||||
static __thread es_indexer_t *Indexer;
|
||||
|
||||
void delete_queue(int max);
|
||||
|
||||
void elastic_flush();
|
||||
|
||||
void elastic_cleanup() {
|
||||
elastic_flush();
|
||||
if (Indexer != NULL) {
|
||||
free(Indexer->es_index);
|
||||
free(Indexer->es_url);
|
||||
free(Indexer);
|
||||
}
|
||||
}
|
||||
|
||||
void print_json(cJSON *document, const char uuid_str[UUID_STR_LEN]) {
|
||||
|
||||
cJSON *line = cJSON_CreateObject();
|
||||
|
||||
cJSON_AddStringToObject(line, "_id", uuid_str);
|
||||
cJSON_AddStringToObject(line, "_index", "sist2");
|
||||
cJSON_AddStringToObject(line, "_index", IndexCtx.es_index);
|
||||
cJSON_AddStringToObject(line, "_type", "_doc");
|
||||
cJSON_AddItemReferenceToObject(line, "_source", document);
|
||||
|
||||
@@ -37,8 +47,12 @@ void print_json(cJSON *document, const char uuid_str[UUID_STR_LEN]) {
|
||||
cJSON_Delete(line);
|
||||
}
|
||||
|
||||
void index_json(cJSON *document, const char uuid_str[UUID_STR_LEN]) {
|
||||
void index_json_func(void *arg) {
|
||||
es_bulk_line_t *line = arg;
|
||||
elastic_index_line(line);
|
||||
}
|
||||
|
||||
void index_json(cJSON *document, const char uuid_str[UUID_STR_LEN]) {
|
||||
char *json = cJSON_PrintUnformatted(document);
|
||||
|
||||
size_t json_len = strlen(json);
|
||||
@@ -50,10 +64,14 @@ void index_json(cJSON *document, const char uuid_str[UUID_STR_LEN]) {
|
||||
bulk_line->next = NULL;
|
||||
|
||||
cJSON_free(json);
|
||||
elastic_index_line(bulk_line);
|
||||
tpool_add_work(IndexCtx.pool, index_json_func, bulk_line);
|
||||
}
|
||||
|
||||
void execute_update_script(const char *script, const char index_id[UUID_STR_LEN]) {
|
||||
void execute_update_script(const char *script, int async, const char index_id[UUID_STR_LEN]) {
|
||||
|
||||
if (Indexer == NULL) {
|
||||
Indexer = create_indexer(IndexCtx.es_url, IndexCtx.es_index);
|
||||
}
|
||||
|
||||
cJSON *body = cJSON_CreateObject();
|
||||
cJSON *script_obj = cJSON_AddObjectToObject(body, "script");
|
||||
@@ -64,12 +82,19 @@ void execute_update_script(const char *script, const char index_id[UUID_STR_LEN]
|
||||
cJSON *term_obj = cJSON_AddObjectToObject(query, "term");
|
||||
cJSON_AddStringToObject(term_obj, "index", index_id);
|
||||
|
||||
char * str = cJSON_Print(body);
|
||||
char *str = cJSON_Print(body);
|
||||
|
||||
char bulk_url[4096];
|
||||
snprintf(bulk_url, 4096, "%s/sist2/_update_by_query?pretty", Indexer->es_url);
|
||||
response_t *r = web_post(bulk_url, str, "Content-Type: application/json");
|
||||
LOG_INFOF("elastic.c", "Executed user script <%d>", r->status_code);
|
||||
if (async) {
|
||||
snprintf(bulk_url, sizeof(bulk_url), "%s/%s/_update_by_query?wait_for_completion=false", Indexer->es_url,
|
||||
Indexer->es_index);
|
||||
} else {
|
||||
snprintf(bulk_url, sizeof(bulk_url), "%s/%s/_update_by_query", Indexer->es_url, Indexer->es_index);
|
||||
}
|
||||
response_t *r = web_post(bulk_url, str);
|
||||
if (!async) {
|
||||
LOG_INFOF("elastic.c", "Executed user script <%d>", r->status_code);
|
||||
}
|
||||
cJSON *resp = cJSON_Parse(r->body);
|
||||
|
||||
cJSON_free(str);
|
||||
@@ -84,31 +109,39 @@ void execute_update_script(const char *script, const char index_id[UUID_STR_LEN]
|
||||
cJSON_free(error_str);
|
||||
}
|
||||
|
||||
if (async) {
|
||||
cJSON *task = cJSON_GetObjectItem(resp, "task");
|
||||
LOG_INFOF("elastic.c", "User script queued: %s/_tasks/%s", Indexer->es_url, task->valuestring);
|
||||
}
|
||||
|
||||
cJSON_Delete(resp);
|
||||
}
|
||||
|
||||
void elastic_flush() {
|
||||
|
||||
if (Indexer == NULL) {
|
||||
Indexer = create_indexer(IndexCtx.es_url);
|
||||
}
|
||||
|
||||
void *create_bulk_buffer(int max, int *count, size_t *buf_len) {
|
||||
es_bulk_line_t *line = Indexer->line_head;
|
||||
|
||||
int count = 0;
|
||||
*count = 0;
|
||||
|
||||
size_t buf_size = 0;
|
||||
size_t buf_cur = 0;
|
||||
char *buf = malloc(1);
|
||||
char *buf = malloc(8192);
|
||||
size_t buf_capacity = 8192;
|
||||
|
||||
while (line != NULL && *count < max) {
|
||||
char action_str[256];
|
||||
snprintf(
|
||||
action_str, 256,
|
||||
"{\"index\":{\"_id\":\"%s\",\"_type\":\"_doc\",\"_index\":\"%s\"}}\n",
|
||||
line->uuid_str, Indexer->es_index
|
||||
);
|
||||
|
||||
while (line != NULL) {
|
||||
char action_str[512];
|
||||
snprintf(action_str, 512,
|
||||
"{\"index\":{\"_id\":\"%s\", \"_type\":\"_doc\", \"_index\":\"sist2\"}}\n", line->uuid_str);
|
||||
size_t action_str_len = strlen(action_str);
|
||||
|
||||
size_t line_len = strlen(line->line);
|
||||
buf = realloc(buf, buf_size + line_len + action_str_len);
|
||||
|
||||
while (buf_size + line_len + action_str_len > buf_capacity) {
|
||||
buf_capacity *= 2;
|
||||
buf = realloc(buf, buf_capacity);
|
||||
}
|
||||
|
||||
buf_size += line_len + action_str_len;
|
||||
|
||||
memcpy(buf + buf_cur, action_str, action_str_len);
|
||||
@@ -116,52 +149,147 @@ void elastic_flush() {
|
||||
memcpy(buf + buf_cur, line->line, line_len);
|
||||
buf_cur += line_len;
|
||||
|
||||
es_bulk_line_t *tmp = line;
|
||||
line = line->next;
|
||||
free(tmp);
|
||||
count++;
|
||||
(*count)++;
|
||||
}
|
||||
buf = realloc(buf, buf_size + 1);
|
||||
*(buf+buf_cur) = '\0';
|
||||
|
||||
Indexer->line_head = NULL;
|
||||
Indexer->line_tail = NULL;
|
||||
Indexer->queued = 0;
|
||||
if (buf_size + 1 > buf_capacity) {
|
||||
buf = realloc(buf, buf_capacity + 1);
|
||||
}
|
||||
|
||||
*(buf + buf_cur) = '\0';
|
||||
|
||||
*buf_len = buf_cur;
|
||||
return buf;
|
||||
}
|
||||
|
||||
void print_errors(response_t *r) {
|
||||
char *tmp = malloc(r->size + 1);
|
||||
memcpy(tmp, r->body, r->size);
|
||||
*(tmp + r->size) = '\0';
|
||||
|
||||
cJSON *ret_json = cJSON_Parse(tmp);
|
||||
if (cJSON_GetObjectItem(ret_json, "errors")->valueint != 0) {
|
||||
cJSON *err;
|
||||
cJSON_ArrayForEach(err, cJSON_GetObjectItem(ret_json, "items")) {
|
||||
if (cJSON_GetObjectItem(cJSON_GetObjectItem(err, "index"), "status")->valueint != 201) {
|
||||
char *str = cJSON_Print(err);
|
||||
LOG_ERRORF("elastic.c", "%s\n", str);
|
||||
cJSON_free(str);
|
||||
}
|
||||
}
|
||||
}
|
||||
cJSON_Delete(ret_json);
|
||||
free(tmp);
|
||||
}
|
||||
|
||||
void print_error(response_t *r) {
|
||||
char *tmp = malloc(r->size + 1);
|
||||
memcpy(tmp, r->body, r->size);
|
||||
*(tmp + r->size) = '\0';
|
||||
|
||||
cJSON *ret_json = cJSON_Parse(tmp);
|
||||
if (cJSON_GetObjectItem(ret_json, "error") != NULL) {
|
||||
char *str = cJSON_Print(cJSON_GetObjectItem(ret_json, "error"));
|
||||
LOG_ERRORF("elastic.c", "%s\n", str);
|
||||
cJSON_free(str);
|
||||
}
|
||||
cJSON_Delete(ret_json);
|
||||
free(tmp);
|
||||
}
|
||||
|
||||
void _elastic_flush(int max) {
|
||||
|
||||
if (max == 0) {
|
||||
LOG_WARNING("elastic.c", "calling _elastic_flush with 0 in queue")
|
||||
return;
|
||||
}
|
||||
|
||||
size_t buf_len;
|
||||
int count;
|
||||
void *buf = create_bulk_buffer(max, &count, &buf_len);
|
||||
|
||||
char bulk_url[4096];
|
||||
snprintf(bulk_url, 4096, "%s/sist2/_bulk?pipeline=tie", Indexer->es_url);
|
||||
response_t *r = web_post(bulk_url, buf, "Content-Type: application/x-ndjson");
|
||||
snprintf(bulk_url, sizeof(bulk_url), "%s/%s/_bulk?pipeline=tie", Indexer->es_url, Indexer->es_index);
|
||||
response_t *r = web_post(bulk_url, buf);
|
||||
|
||||
if (r->status_code == 0) {
|
||||
LOG_FATALF("elastic.c", "Could not connect to %s, make sure that elasticsearch is running!\n", IndexCtx.es_url)
|
||||
}
|
||||
|
||||
LOG_INFOF("elastic.c", "Indexed %d documents (%zukB) <%d>", count, buf_cur / 1024, r->status_code);
|
||||
if (r->status_code == 413) {
|
||||
|
||||
if (r->status_code != 200 && r->status_code != 413) {
|
||||
cJSON *ret_json = cJSON_Parse(r->body);
|
||||
if (cJSON_GetObjectItem(ret_json, "errors")->valueint != 0) {
|
||||
cJSON *err;
|
||||
cJSON_ArrayForEach(err, cJSON_GetObjectItem(ret_json, "items")) {
|
||||
if (cJSON_GetObjectItem(cJSON_GetObjectItem(err, "index"), "status")->valueint != 201) {
|
||||
char* str = cJSON_Print(err);
|
||||
LOG_ERRORF("elastic.c", "%s\n", str);
|
||||
cJSON_free(str);
|
||||
}
|
||||
if (max <= 1) {
|
||||
LOG_ERRORF("elastic.c", "Single document too large, giving up: {%s}", Indexer->line_head->uuid_str)
|
||||
free_response(r);
|
||||
free(buf);
|
||||
delete_queue(1);
|
||||
if (Indexer->queued != 0) {
|
||||
elastic_flush();
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
cJSON_Delete(ret_json);
|
||||
LOG_WARNINGF("elastic.c", "Payload too large, retrying (%d documents)", count);
|
||||
|
||||
free_response(r);
|
||||
free(buf);
|
||||
_elastic_flush(max / 2);
|
||||
return;
|
||||
|
||||
} else if (r->status_code == 429) {
|
||||
|
||||
free_response(r);
|
||||
free(buf);
|
||||
LOG_WARNING("elastic.c", "Got 429 status, will retry after delay")
|
||||
usleep(1000000 * 20);
|
||||
_elastic_flush(max);
|
||||
return;
|
||||
|
||||
} else if (r->status_code != 200) {
|
||||
print_errors(r);
|
||||
delete_queue(Indexer->queued);
|
||||
|
||||
} else {
|
||||
|
||||
print_errors(r);
|
||||
LOG_INFOF("elastic.c", "Indexed %d documents (%zukB) <%d>", count, buf_len / 1024, r->status_code);
|
||||
delete_queue(max);
|
||||
|
||||
if (Indexer->queued != 0) {
|
||||
elastic_flush();
|
||||
}
|
||||
}
|
||||
|
||||
free_response(r);
|
||||
free(buf);
|
||||
}
|
||||
|
||||
void delete_queue(int max) {
|
||||
for (int i = 0; i < max; i++) {
|
||||
es_bulk_line_t *tmp = Indexer->line_head;
|
||||
Indexer->line_head = tmp->next;
|
||||
if (Indexer->line_head == NULL) {
|
||||
Indexer->line_tail = NULL;
|
||||
}
|
||||
free(tmp);
|
||||
Indexer->queued -= 1;
|
||||
}
|
||||
}
|
||||
|
||||
void elastic_flush() {
|
||||
|
||||
if (Indexer == NULL) {
|
||||
Indexer = create_indexer(IndexCtx.es_url, IndexCtx.es_index);
|
||||
}
|
||||
|
||||
_elastic_flush(Indexer->queued);
|
||||
}
|
||||
|
||||
void elastic_index_line(es_bulk_line_t *line) {
|
||||
|
||||
if (Indexer == NULL) {
|
||||
Indexer = create_indexer(IndexCtx.es_url);
|
||||
Indexer = create_indexer(IndexCtx.es_url, IndexCtx.es_index);
|
||||
}
|
||||
|
||||
if (Indexer->line_head == NULL) {
|
||||
@@ -179,14 +307,18 @@ void elastic_index_line(es_bulk_line_t *line) {
|
||||
}
|
||||
}
|
||||
|
||||
es_indexer_t *create_indexer(const char *url) {
|
||||
es_indexer_t *create_indexer(const char *url, const char *index) {
|
||||
|
||||
char *es_url = malloc(strlen(url) + 1);
|
||||
strcpy(es_url, url);
|
||||
|
||||
char *es_index = malloc(strlen(index) + 1);
|
||||
strcpy(es_index, index);
|
||||
|
||||
es_indexer_t *indexer = malloc(sizeof(es_indexer_t));
|
||||
|
||||
indexer->es_url = es_url;
|
||||
indexer->es_index = es_index;
|
||||
indexer->queued = 0;
|
||||
indexer->line_head = NULL;
|
||||
indexer->line_tail = NULL;
|
||||
@@ -194,41 +326,42 @@ es_indexer_t *create_indexer(const char *url) {
|
||||
return indexer;
|
||||
}
|
||||
|
||||
void destroy_indexer(char * script, char index_id[UUID_STR_LEN]) {
|
||||
void finish_indexer(char *script, int async_script, char *index_id) {
|
||||
|
||||
char url[4096];
|
||||
|
||||
snprintf(url, sizeof(url), "%s/sist2/_refresh", IndexCtx.es_url);
|
||||
response_t *r = web_post(url, "", NULL);
|
||||
snprintf(url, sizeof(url), "%s/%s/_refresh", IndexCtx.es_url, IndexCtx.es_index);
|
||||
response_t *r = web_post(url, "");
|
||||
LOG_INFOF("elastic.c", "Refresh index <%d>", r->status_code);
|
||||
free_response(r);
|
||||
|
||||
if (script != NULL) {
|
||||
execute_update_script(script, index_id);
|
||||
execute_update_script(script, async_script, index_id);
|
||||
free(script);
|
||||
|
||||
snprintf(url, sizeof(url), "%s/%s/_refresh", IndexCtx.es_url, IndexCtx.es_index);
|
||||
r = web_post(url, "");
|
||||
LOG_INFOF("elastic.c", "Refresh index <%d>", r->status_code);
|
||||
free_response(r);
|
||||
}
|
||||
|
||||
snprintf(url, sizeof(url), "%s/sist2/_refresh", IndexCtx.es_url);
|
||||
r = web_post(url, "", NULL);
|
||||
LOG_INFOF("elastic.c", "Refresh index <%d>", r->status_code);
|
||||
free_response(r);
|
||||
|
||||
snprintf(url, sizeof(url), "%s/sist2/_forcemerge", IndexCtx.es_url);
|
||||
r = web_post(url, "", NULL);
|
||||
snprintf(url, sizeof(url), "%s/%s/_forcemerge", IndexCtx.es_url, IndexCtx.es_index);
|
||||
r = web_post(url, "");
|
||||
LOG_INFOF("elastic.c", "Merge index <%d>", r->status_code);
|
||||
free_response(r);
|
||||
|
||||
if (Indexer != NULL) {
|
||||
free(Indexer->es_url);
|
||||
free(Indexer);
|
||||
}
|
||||
snprintf(url, sizeof(url), "%s/%s/_settings", IndexCtx.es_url, IndexCtx.es_index);
|
||||
r = web_put(url, "{\"index\":{\"refresh_interval\":\"1s\"}}");
|
||||
LOG_INFOF("elastic.c", "Set refresh interval <%d>", r->status_code);
|
||||
free_response(r);
|
||||
}
|
||||
|
||||
void elastic_init(int force_reset) {
|
||||
|
||||
// Check if index exists
|
||||
char url[4096];
|
||||
snprintf(url, 4096, "%s/sist2", IndexCtx.es_url);
|
||||
response_t *r = web_get(url);
|
||||
snprintf(url, sizeof(url), "%s/%s", IndexCtx.es_url, IndexCtx.es_index);
|
||||
response_t *r = web_get(url, 30);
|
||||
int index_exists = r->status_code == 200;
|
||||
free_response(r);
|
||||
|
||||
@@ -237,33 +370,39 @@ void elastic_init(int force_reset) {
|
||||
LOG_INFOF("elastic.c", "Delete index <%d>", r->status_code);
|
||||
free_response(r);
|
||||
|
||||
snprintf(url, 4096, "%s/sist2", IndexCtx.es_url);
|
||||
r = web_put(url, "", NULL);
|
||||
snprintf(url, sizeof(url), "%s/%s", IndexCtx.es_url, IndexCtx.es_index);
|
||||
r = web_put(url, "");
|
||||
|
||||
if (r->status_code != 200) {
|
||||
print_error(r);
|
||||
LOG_FATAL("elastic.c", "Could not create index")
|
||||
}
|
||||
|
||||
LOG_INFOF("elastic.c", "Create index <%d>", r->status_code);
|
||||
free_response(r);
|
||||
|
||||
snprintf(url, 4096, "%s/sist2/_close", IndexCtx.es_url);
|
||||
r = web_post(url, "", NULL);
|
||||
snprintf(url, sizeof(url), "%s/%s/_close", IndexCtx.es_url, IndexCtx.es_index);
|
||||
r = web_post(url, "");
|
||||
LOG_INFOF("elastic.c", "Close index <%d>", r->status_code);
|
||||
free_response(r);
|
||||
|
||||
snprintf(url, 4096, "%s/_ingest/pipeline/tie", IndexCtx.es_url);
|
||||
r = web_put(url, pipeline_json, "Content-Type: application/json");
|
||||
snprintf(url, sizeof(url), "%s/_ingest/pipeline/tie", IndexCtx.es_url);
|
||||
r = web_put(url, pipeline_json);
|
||||
LOG_INFOF("elastic.c", "Create pipeline <%d>", r->status_code);
|
||||
free_response(r);
|
||||
|
||||
snprintf(url, 4096, "%s/sist2/_settings", IndexCtx.es_url);
|
||||
r = web_put(url, settings_json, "Content-Type: application/json");
|
||||
snprintf(url, sizeof(url), "%s/%s/_settings", IndexCtx.es_url, IndexCtx.es_index);
|
||||
r = web_put(url, settings_json);
|
||||
LOG_INFOF("elastic.c", "Update settings <%d>", r->status_code);
|
||||
free_response(r);
|
||||
|
||||
snprintf(url, 4096, "%s/sist2/_mappings/_doc?include_type_name=true", IndexCtx.es_url);
|
||||
r = web_put(url, mappings_json, "Content-Type: application/json");
|
||||
snprintf(url, sizeof(url), "%s/%s/_mappings/_doc?include_type_name=true", IndexCtx.es_url, IndexCtx.es_index);
|
||||
r = web_put(url, mappings_json);
|
||||
LOG_INFOF("elastic.c", "Update mappings <%d>", r->status_code);
|
||||
free_response(r);
|
||||
|
||||
snprintf(url, 4096, "%s/sist2/_open", IndexCtx.es_url);
|
||||
r = web_post(url, "", NULL);
|
||||
snprintf(url, sizeof(url), "%s/%s/_open", IndexCtx.es_url, IndexCtx.es_index);
|
||||
r = web_post(url, "");
|
||||
LOG_INFOF("elastic.c", "Open index <%d>", r->status_code);
|
||||
free_response(r);
|
||||
}
|
||||
@@ -271,12 +410,16 @@ void elastic_init(int force_reset) {
|
||||
|
||||
cJSON *elastic_get_document(const char *uuid_str) {
|
||||
char url[4096];
|
||||
snprintf(url, 4096, "%s/sist2/_doc/%s", WebCtx.es_url, uuid_str);
|
||||
snprintf(url, sizeof(url), "%s/%s/_doc/%s", WebCtx.es_url, WebCtx.es_index, uuid_str);
|
||||
|
||||
response_t *r = web_get(url);
|
||||
response_t *r = web_get(url, 3);
|
||||
cJSON *json = NULL;
|
||||
if (r->status_code == 200) {
|
||||
json = cJSON_Parse(r->body);
|
||||
char *tmp = malloc(r->size + 1);
|
||||
memcpy(tmp, r->body, r->size);
|
||||
*(tmp + r->size) = '\0';
|
||||
json = cJSON_Parse(tmp);
|
||||
free(tmp);
|
||||
}
|
||||
free_response(r);
|
||||
return json;
|
||||
@@ -284,21 +427,25 @@ cJSON *elastic_get_document(const char *uuid_str) {
|
||||
|
||||
char *elastic_get_status() {
|
||||
char url[4096];
|
||||
snprintf(url, 4096,
|
||||
"%s/_cluster/state/metadata/sist2?filter_path=metadata.indices.*.state", WebCtx.es_url);
|
||||
snprintf(url, sizeof(url),
|
||||
"%s/_cluster/state/metadata/%s?filter_path=metadata.indices.*.state", WebCtx.es_url, WebCtx.es_index);
|
||||
|
||||
response_t *r = web_get(url);
|
||||
response_t *r = web_get(url, 30);
|
||||
cJSON *json = NULL;
|
||||
char *status = malloc(128 * sizeof(char));
|
||||
status[0] = '\0';
|
||||
|
||||
if (r->status_code == 200) {
|
||||
json = cJSON_Parse(r->body);
|
||||
char *tmp = malloc(r->size + 1);
|
||||
memcpy(tmp, r->body, r->size);
|
||||
*(tmp + r->size) = '\0';
|
||||
json = cJSON_Parse(tmp);
|
||||
free(tmp);
|
||||
const cJSON *metadata = cJSON_GetObjectItem(json, "metadata");
|
||||
if (metadata != NULL) {
|
||||
const cJSON *indices = cJSON_GetObjectItem(metadata, "indices");
|
||||
const cJSON *sist2 = cJSON_GetObjectItem(indices, "sist2");
|
||||
const cJSON *state = cJSON_GetObjectItem(sist2, "state");
|
||||
const cJSON *index = cJSON_GetObjectItem(indices, WebCtx.es_index);
|
||||
const cJSON *state = cJSON_GetObjectItem(index, "state");
|
||||
strcpy(status, state->valuestring);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -16,15 +16,14 @@ typedef struct es_indexer es_indexer_t;
|
||||
|
||||
void elastic_index_line(es_bulk_line_t *line);
|
||||
|
||||
void elastic_flush();
|
||||
|
||||
void print_json(cJSON *document, const char uuid_str[UUID_STR_LEN]);
|
||||
|
||||
void index_json(cJSON *document, const char uuid_str[UUID_STR_LEN]);
|
||||
|
||||
es_indexer_t *create_indexer(const char* es_url);
|
||||
es_indexer_t *create_indexer(const char *url, const char *index);
|
||||
|
||||
void destroy_indexer(char *script, char index_id[UUID_STR_LEN]);
|
||||
void elastic_cleanup();
|
||||
void finish_indexer(char *script, int async_script, char *index_id);
|
||||
|
||||
void elastic_init(int force_reset);
|
||||
|
||||
@@ -32,4 +31,6 @@ cJSON *elastic_get_document(const char *uuid_str);
|
||||
|
||||
char *elastic_get_status();
|
||||
|
||||
void execute_update_script(const char *script, int async, const char index_id[UUID_STR_LEN]);
|
||||
|
||||
#endif
|
||||
|
||||
File diff suppressed because one or more lines are too long
122
src/index/web.c
122
src/index/web.c
@@ -1,4 +1,11 @@
|
||||
#include "web.h"
|
||||
#include "src/sist.h"
|
||||
#include "src/ctx.h"
|
||||
|
||||
#include <mongoose.h>
|
||||
#include <pthread.h>
|
||||
#include <curl/curl.h>
|
||||
|
||||
|
||||
size_t write_cb(char *ptr, size_t size, size_t nmemb, void *user_data) {
|
||||
|
||||
@@ -9,11 +16,91 @@ size_t write_cb(char *ptr, size_t size, size_t nmemb, void *user_data) {
|
||||
}
|
||||
|
||||
void free_response(response_t *resp) {
|
||||
free(resp->body);
|
||||
if (resp->body != NULL) {
|
||||
free(resp->body);
|
||||
}
|
||||
free(resp);
|
||||
}
|
||||
|
||||
response_t *web_get(const char *url) {
|
||||
void web_post_async_poll(subreq_ctx_t* req) {
|
||||
fd_set fdread;
|
||||
fd_set fdwrite;
|
||||
fd_set fdexcep;
|
||||
int maxfd = -1;
|
||||
|
||||
FD_ZERO(&fdread);
|
||||
FD_ZERO(&fdwrite);
|
||||
FD_ZERO(&fdexcep);
|
||||
|
||||
CURLMcode mc = curl_multi_fdset(req->multi, &fdread, &fdwrite, &fdexcep, &maxfd);
|
||||
|
||||
if(mc != CURLM_OK) {
|
||||
req->done = TRUE;
|
||||
return;
|
||||
}
|
||||
|
||||
if (maxfd == -1) {
|
||||
// no fds ready yet
|
||||
return;
|
||||
}
|
||||
|
||||
struct timeval timeout = {1, 0};
|
||||
int rc = select(maxfd + 1, &fdread, &fdwrite, &fdexcep, &timeout);
|
||||
|
||||
switch(rc) {
|
||||
case -1:
|
||||
req->done = TRUE;
|
||||
break;
|
||||
case 0:
|
||||
break;
|
||||
default:
|
||||
curl_multi_perform(req->multi, &req->running_handles);
|
||||
break;
|
||||
}
|
||||
|
||||
if (req->running_handles == 0) {
|
||||
req->done = TRUE;
|
||||
req->response->body = req->response_buf.buf;
|
||||
req->response->size = req->response_buf.cur;
|
||||
curl_easy_getinfo(req->handle, CURLINFO_RESPONSE_CODE, &req->response->status_code);
|
||||
|
||||
curl_multi_cleanup(req->multi);
|
||||
curl_easy_cleanup(req->handle);
|
||||
curl_slist_free_all(req->headers);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
subreq_ctx_t *web_post_async(const char *url, char *data) {
|
||||
subreq_ctx_t *req = calloc(1, sizeof(subreq_ctx_t));
|
||||
req->response = calloc(1, sizeof(response_t));
|
||||
req->data = data;
|
||||
req->response_buf = dyn_buffer_create();
|
||||
|
||||
req->handle = curl_easy_init();
|
||||
CURL *curl = req->handle;
|
||||
curl_easy_setopt(curl, CURLOPT_URL, url);
|
||||
curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *) (&req->response_buf));
|
||||
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb);
|
||||
curl_easy_setopt(curl, CURLOPT_POST, 1);
|
||||
curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
|
||||
|
||||
struct curl_slist *headers = NULL;
|
||||
headers = curl_slist_append(headers, "Content-Type: application/json");
|
||||
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
|
||||
|
||||
curl_easy_setopt(curl, CURLOPT_POSTFIELDS, data);
|
||||
|
||||
req->multi = curl_multi_init();
|
||||
curl_multi_add_handle(req->multi, curl);
|
||||
curl_multi_perform(req->multi, &req->running_handles);
|
||||
|
||||
LOG_DEBUGF("web.c", "async request POST %s", url)
|
||||
|
||||
return req;
|
||||
}
|
||||
|
||||
response_t *web_get(const char *url, int timeout) {
|
||||
response_t *resp = malloc(sizeof(response_t));
|
||||
|
||||
CURL *curl;
|
||||
@@ -24,18 +111,24 @@ response_t *web_get(const char *url) {
|
||||
curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *) (&buffer));
|
||||
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb);
|
||||
curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
|
||||
curl_easy_setopt(curl, CURLOPT_TIMEOUT, timeout);
|
||||
|
||||
struct curl_slist *headers = NULL;
|
||||
headers = curl_slist_append(headers, "Content-Type: application/json");
|
||||
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
|
||||
|
||||
curl_easy_perform(curl);
|
||||
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &resp->status_code);
|
||||
|
||||
curl_easy_cleanup(curl);
|
||||
curl_slist_free_all(headers);
|
||||
|
||||
resp->body = buffer.buf;
|
||||
resp->size = buffer.cur;
|
||||
return resp;
|
||||
}
|
||||
|
||||
response_t *web_post(const char *url, const char *data, const char *header) {
|
||||
response_t *web_post(const char *url, const char *data) {
|
||||
|
||||
response_t *resp = malloc(sizeof(response_t));
|
||||
|
||||
@@ -50,10 +143,8 @@ response_t *web_post(const char *url, const char *data, const char *header) {
|
||||
curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
|
||||
|
||||
struct curl_slist *headers = NULL;
|
||||
if (header != NULL) {
|
||||
headers = curl_slist_append(headers, header);
|
||||
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
|
||||
}
|
||||
headers = curl_slist_append(headers, "Content-Type: application/json");
|
||||
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
|
||||
|
||||
curl_easy_setopt(curl, CURLOPT_POSTFIELDS, data);
|
||||
|
||||
@@ -70,7 +161,7 @@ response_t *web_post(const char *url, const char *data, const char *header) {
|
||||
}
|
||||
|
||||
|
||||
response_t *web_put(const char *url, const char *data, const char *header) {
|
||||
response_t *web_put(const char *url, const char *data) {
|
||||
|
||||
response_t *resp = malloc(sizeof(response_t));
|
||||
|
||||
@@ -86,11 +177,9 @@ response_t *web_put(const char *url, const char *data, const char *header) {
|
||||
curl_easy_setopt(curl, CURLOPT_DNS_USE_GLOBAL_CACHE, 0);
|
||||
curl_easy_setopt(curl, CURLOPT_IPRESOLVE, CURLOPT_DNS_LOCAL_IP4 );
|
||||
|
||||
if (header != NULL) {
|
||||
struct curl_slist *headers = NULL;
|
||||
headers = curl_slist_append(headers, header);
|
||||
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
|
||||
}
|
||||
struct curl_slist *headers = NULL;
|
||||
headers = curl_slist_append(headers, "Content-Type: application/json");
|
||||
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
|
||||
|
||||
curl_easy_setopt(curl, CURLOPT_POSTFIELDS, data);
|
||||
|
||||
@@ -98,6 +187,7 @@ response_t *web_put(const char *url, const char *data, const char *header) {
|
||||
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &resp->status_code);
|
||||
|
||||
curl_easy_cleanup(curl);
|
||||
curl_slist_free_all(headers);
|
||||
|
||||
resp->body = buffer.buf;
|
||||
resp->size = buffer.cur;
|
||||
@@ -119,13 +209,17 @@ response_t *web_delete(const char *url) {
|
||||
curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
|
||||
|
||||
curl_easy_setopt(curl, CURLOPT_POSTFIELDS, "");
|
||||
struct curl_slist *headers = NULL;
|
||||
headers = curl_slist_append(headers, "Content-Type: application/json");
|
||||
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
|
||||
|
||||
curl_easy_perform(curl);
|
||||
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &resp->status_code);
|
||||
|
||||
curl_easy_cleanup(curl);
|
||||
curl_slist_free_all(headers);
|
||||
|
||||
resp->body = buffer.buf;
|
||||
resp->size = buffer.cur;
|
||||
return resp;
|
||||
}
|
||||
}
|
||||
@@ -2,6 +2,8 @@
|
||||
#define SIST2_WEB_H
|
||||
|
||||
#include "src/sist.h"
|
||||
#include <mongoose.h>
|
||||
#include <curl/curl.h>
|
||||
|
||||
typedef struct response {
|
||||
char *body;
|
||||
@@ -9,9 +11,27 @@ typedef struct response {
|
||||
int status_code;
|
||||
} response_t;
|
||||
|
||||
response_t *web_get(const char *url);
|
||||
response_t *web_post(const char * url, const char * data, const char* header);
|
||||
response_t *web_put(const char *url, const char *data, const char *header);
|
||||
typedef struct {
|
||||
response_t *resp;
|
||||
int done;
|
||||
} http_ev_data_t;
|
||||
|
||||
typedef struct {
|
||||
char* data;
|
||||
dyn_buffer_t response_buf;
|
||||
struct curl_slist *headers;
|
||||
CURL *handle;
|
||||
CURLM *multi;
|
||||
response_t *response;
|
||||
int running_handles;
|
||||
int done;
|
||||
} subreq_ctx_t;
|
||||
|
||||
response_t *web_get(const char *url, int timeout);
|
||||
response_t *web_post(const char * url, const char * data);
|
||||
void web_post_async_poll(subreq_ctx_t* req);
|
||||
subreq_ctx_t *web_post_async(const char *url, char *data);
|
||||
response_t *web_put(const char *url, const char *data);
|
||||
response_t *web_delete(const char *url);
|
||||
|
||||
void free_response(response_t *resp);
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
#include "src/ctx.h"
|
||||
#include "serialize.h"
|
||||
#include "src/parsing/parse.h"
|
||||
#include "src/parsing/mime.h"
|
||||
|
||||
static __thread int index_fd = -1;
|
||||
|
||||
@@ -39,11 +41,14 @@ void write_index_descriptor(char *path, index_descriptor_t *desc) {
|
||||
cJSON_AddNumberToObject(json, "timestamp", (double) desc->timestamp);
|
||||
|
||||
int fd = open(path, O_CREAT | O_WRONLY, S_IRUSR | S_IWUSR);
|
||||
if (fd == -1) {
|
||||
perror(path);
|
||||
if (fd < 0) {
|
||||
LOG_FATALF("serialize.c", "Could not open index descriptor: %s", strerror(errno));
|
||||
}
|
||||
char *str = cJSON_Print(json);
|
||||
write(fd, str, strlen(str));
|
||||
int ret = write(fd, str, strlen(str));
|
||||
if (ret == -1) {
|
||||
LOG_FATALF("serialize.c", "Could not write index descriptor: %s", strerror(errno));
|
||||
}
|
||||
free(str);
|
||||
close(fd);
|
||||
|
||||
@@ -57,11 +62,14 @@ index_descriptor_t read_index_descriptor(char *path) {
|
||||
int fd = open(path, O_RDONLY);
|
||||
|
||||
if (fd == -1) {
|
||||
LOG_FATAL("serialize.c", "Invalid/corrupt index (Could not find descriptor)\n")
|
||||
LOG_FATALF("serialize.c", "Invalid/corrupt index (Could not find descriptor): %s: %s\n", path, strerror(errno))
|
||||
}
|
||||
|
||||
char *buf = malloc(info.st_size + 1);
|
||||
read(fd, buf, info.st_size);
|
||||
int ret = read(fd, buf, info.st_size);
|
||||
if (ret == -1) {
|
||||
LOG_FATALF("serialize.c", "Could not read index descriptor: %s", strerror(errno));
|
||||
}
|
||||
*(buf + info.st_size) = '\0';
|
||||
close(fd);
|
||||
|
||||
@@ -136,6 +144,14 @@ char *get_meta_key_text(enum metakey meta_key) {
|
||||
return "exif_model";
|
||||
case MetaExifDateTime:
|
||||
return "exif_datetime";
|
||||
case MetaAuthor:
|
||||
return "author";
|
||||
case MetaModifiedBy:
|
||||
return "modified_by";
|
||||
case MetaThumbnail:
|
||||
return "thumbnail";
|
||||
case MetaPages:
|
||||
return "pages";
|
||||
default:
|
||||
return NULL;
|
||||
}
|
||||
@@ -158,8 +174,8 @@ void write_document(document_t *doc) {
|
||||
dyn_buffer_t buf = dyn_buffer_create();
|
||||
|
||||
// Ignore root directory in the file path
|
||||
doc->ext = doc->ext - ScanCtx.index.desc.root_len;
|
||||
doc->base = doc->base - ScanCtx.index.desc.root_len;
|
||||
doc->ext = (short) (doc->ext - ScanCtx.index.desc.root_len);
|
||||
doc->base = (short) (doc->base - ScanCtx.index.desc.root_len);
|
||||
doc->filepath += ScanCtx.index.desc.root_len;
|
||||
|
||||
dyn_buffer_write(&buf, doc, sizeof(line_t));
|
||||
@@ -170,11 +186,11 @@ void write_document(document_t *doc) {
|
||||
dyn_buffer_write_char(&buf, meta->key);
|
||||
|
||||
if (IS_META_INT(meta->key)) {
|
||||
dyn_buffer_write_int(&buf, meta->intval);
|
||||
dyn_buffer_write_int(&buf, meta->int_val);
|
||||
} else if (IS_META_LONG(meta->key)) {
|
||||
dyn_buffer_write_long(&buf, meta->longval);
|
||||
dyn_buffer_write_long(&buf, meta->long_val);
|
||||
} else {
|
||||
dyn_buffer_write_str(&buf, meta->strval);
|
||||
dyn_buffer_write_str(&buf, meta->str_val);
|
||||
}
|
||||
|
||||
meta_line_t *tmp = meta;
|
||||
@@ -185,7 +201,7 @@ void write_document(document_t *doc) {
|
||||
|
||||
int res = write(index_fd, buf.buf, buf.cur);
|
||||
if (res == -1) {
|
||||
perror("write");
|
||||
LOG_FATALF("serialize.c", "Could not write document: %s", strerror(errno))
|
||||
}
|
||||
ScanCtx.stat_index_size += buf.cur;
|
||||
dyn_buffer_destroy(&buf);
|
||||
@@ -193,6 +209,8 @@ void write_document(document_t *doc) {
|
||||
|
||||
void thread_cleanup() {
|
||||
close(index_fd);
|
||||
cleanup_parse();
|
||||
cleanup_font();
|
||||
}
|
||||
|
||||
|
||||
@@ -203,7 +221,7 @@ void read_index_bin(const char *path, const char *index_id, index_func func) {
|
||||
FILE *file = fopen(path, "rb");
|
||||
while (1) {
|
||||
buf.cur = 0;
|
||||
fread((void *) &line, 1, sizeof(line_t), file);
|
||||
size_t _ = fread((void *) &line, 1, sizeof(line_t), file);
|
||||
if (feof(file)) {
|
||||
break;
|
||||
}
|
||||
@@ -214,7 +232,7 @@ void read_index_bin(const char *path, const char *index_id, index_func func) {
|
||||
char uuid_str[UUID_STR_LEN];
|
||||
uuid_unparse(line.uuid, uuid_str);
|
||||
|
||||
const char* mime_text = mime_get_mime_text(line.mime);
|
||||
const char *mime_text = mime_get_mime_text(line.mime);
|
||||
if (mime_text == NULL) {
|
||||
cJSON_AddNullToObject(document, "mime");
|
||||
} else {
|
||||
@@ -223,54 +241,62 @@ void read_index_bin(const char *path, const char *index_id, index_func func) {
|
||||
cJSON_AddNumberToObject(document, "size", (double) line.size);
|
||||
cJSON_AddNumberToObject(document, "mtime", line.mtime);
|
||||
|
||||
int c;
|
||||
int c = 0;
|
||||
while ((c = getc(file)) != 0) {
|
||||
dyn_buffer_write_char(&buf, (char) c);
|
||||
}
|
||||
dyn_buffer_write_char(&buf, '\0');
|
||||
|
||||
if (IndexCtx.tags != NULL) {
|
||||
const char *tags_string = g_hash_table_lookup(IndexCtx.tags, buf.buf);
|
||||
if (tags_string != NULL) {
|
||||
cJSON *tags_arr = cJSON_Parse(tags_string);
|
||||
cJSON_AddItemToObject(document, "tag", tags_arr);
|
||||
}
|
||||
}
|
||||
|
||||
cJSON_AddStringToObject(document, "extension", buf.buf + line.ext);
|
||||
if (*(buf.buf + line.ext - 1) == '.') {
|
||||
*(buf.buf + line.ext - 1) = '\0';
|
||||
} else {
|
||||
*(buf.buf + line.ext) = '\0';
|
||||
}
|
||||
cJSON_AddStringToObject(document, "name", buf.buf + line.base);
|
||||
|
||||
char tmp[PATH_MAX * 3];
|
||||
|
||||
str_escape(tmp, buf.buf + line.base);
|
||||
cJSON_AddStringToObject(document, "name", tmp);
|
||||
|
||||
if (line.base > 0) {
|
||||
*(buf.buf + line.base - 1) = '\0';
|
||||
cJSON_AddStringToObject(document, "path", buf.buf);
|
||||
|
||||
str_escape(tmp, buf.buf);
|
||||
cJSON_AddStringToObject(document, "path", tmp);
|
||||
} else {
|
||||
cJSON_AddStringToObject(document, "path", "");
|
||||
}
|
||||
|
||||
enum metakey key = getc(file);
|
||||
size_t ret = 0;
|
||||
while (key != '\n') {
|
||||
switch (key) {
|
||||
case MetaPages:
|
||||
case MetaWidth:
|
||||
case MetaHeight: {
|
||||
int value;
|
||||
fread(&value, sizeof(int), 1, file);
|
||||
ret = fread(&value, sizeof(int), 1, file);
|
||||
cJSON_AddNumberToObject(document, get_meta_key_text(key), value);
|
||||
break;
|
||||
}
|
||||
case MetaMediaDuration:
|
||||
case MetaMediaBitrate: {
|
||||
long value;
|
||||
fread(&value, sizeof(long), 1, file);
|
||||
ret = fread(&value, sizeof(long), 1, file);
|
||||
cJSON_AddNumberToObject(document, get_meta_key_text(key), (double) value);
|
||||
break;
|
||||
}
|
||||
case MetaMediaAudioCodec:
|
||||
case MetaMediaVideoCodec: {
|
||||
int value;
|
||||
fread(&value, sizeof(int), 1, file);
|
||||
const AVCodecDescriptor *desc = avcodec_descriptor_get(value);
|
||||
if (desc != NULL) {
|
||||
cJSON_AddStringToObject(document, get_meta_key_text(key), desc->name);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case MetaMediaVideoCodec:
|
||||
case MetaContent:
|
||||
case MetaArtist:
|
||||
case MetaAlbum:
|
||||
@@ -287,6 +313,9 @@ void read_index_bin(const char *path, const char *index_id, index_func func) {
|
||||
case MetaExifIsoSpeedRatings:
|
||||
case MetaExifDateTime:
|
||||
case MetaExifModel:
|
||||
case MetaAuthor:
|
||||
case MetaModifiedBy:
|
||||
case MetaThumbnail:
|
||||
case MetaTitle: {
|
||||
buf.cur = 0;
|
||||
while ((c = getc(file)) != 0) {
|
||||
@@ -334,7 +363,7 @@ void read_index_json(const char *path, UNUSED(const char *index_id), index_func
|
||||
char *line = NULL;
|
||||
size_t len;
|
||||
size_t read = getline(&line, &len, file);
|
||||
if (read == -1) {
|
||||
if (read < 0) {
|
||||
if (line) {
|
||||
free(line);
|
||||
}
|
||||
@@ -400,8 +429,8 @@ void incremental_read(GHashTable *table, const char *filepath) {
|
||||
line_t line;
|
||||
|
||||
while (1) {
|
||||
fread((void *) &line, 1, sizeof(line_t), file);
|
||||
if (feof(file)) {
|
||||
size_t ret = fread((void *) &line, 1, sizeof(line_t), file);
|
||||
if (ret != 1 || feof(file)) {
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -424,8 +453,8 @@ void incremental_copy(store_t *store, store_t *dst_store, const char *filepath,
|
||||
line_t line;
|
||||
|
||||
while (1) {
|
||||
fread((void *) &line, 1, sizeof(line_t), file);
|
||||
if (feof(file)) {
|
||||
size_t ret = fread((void *) &line, 1, sizeof(line_t), file);
|
||||
if (ret != 1 || feof(file)) {
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -453,11 +482,11 @@ void incremental_copy(store_t *store, store_t *dst_store, const char *filepath,
|
||||
|
||||
if (IS_META_INT(key)) {
|
||||
int val;
|
||||
fread(&val, sizeof(val), 1, file);
|
||||
ret = fread(&val, sizeof(val), 1, file);
|
||||
fwrite(&val, sizeof(val), 1, dst_file);
|
||||
} else if (IS_META_LONG(key)) {
|
||||
long val;
|
||||
fread(&val, sizeof(val), 1, file);
|
||||
ret = fread(&val, sizeof(val), 1, file);
|
||||
fwrite(&val, sizeof(val), 1, dst_file);
|
||||
} else {
|
||||
while ((c = (char) getc(file))) {
|
||||
@@ -465,6 +494,10 @@ void incremental_copy(store_t *store, store_t *dst_store, const char *filepath,
|
||||
}
|
||||
fwrite("\0", sizeof(c), 1, dst_file);
|
||||
}
|
||||
|
||||
if (ret != 1) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
skip_meta(file);
|
||||
|
||||
@@ -2,7 +2,10 @@
|
||||
#define SIST2_SERIALIZE_H
|
||||
|
||||
#include "src/sist.h"
|
||||
#include "store.h"
|
||||
|
||||
#include <sys/syscall.h>
|
||||
#include <glib.h>
|
||||
|
||||
typedef void(*index_func)(cJSON *, const char[UUID_STR_LEN]);
|
||||
|
||||
|
||||
@@ -1,9 +1,10 @@
|
||||
#include "store.h"
|
||||
#include "src/ctx.h"
|
||||
|
||||
store_t *store_create(char *path) {
|
||||
store_t *store_create(char *path, size_t chunk_size) {
|
||||
|
||||
store_t *store = malloc(sizeof(struct store_t));
|
||||
store->chunk_size = chunk_size;
|
||||
pthread_rwlock_init(&store->lock, NULL);
|
||||
|
||||
mdb_env_create(&store->env);
|
||||
@@ -15,11 +16,10 @@ store_t *store_create(char *path) {
|
||||
);
|
||||
|
||||
if (open_ret != 0) {
|
||||
fprintf(stderr, "Error while opening store: %s (%s)\n", mdb_strerror(open_ret), path);
|
||||
exit(1);
|
||||
LOG_FATALF("store.c", "Error while opening store: %s (%s)\n", mdb_strerror(open_ret), path)
|
||||
}
|
||||
|
||||
store->size = (size_t) 1024 * 1024 * 5;
|
||||
store->size = (size_t) store->chunk_size;
|
||||
ScanCtx.stat_tn_size = 0;
|
||||
mdb_env_set_mapsize(store->env, store->size);
|
||||
|
||||
@@ -70,7 +70,7 @@ void store_write(store_t *store, char *key, size_t key_len, char *buf, size_t bu
|
||||
// Cannot resize when there is a opened transaction.
|
||||
// Resize take effect on the next commit.
|
||||
pthread_rwlock_wrlock(&store->lock);
|
||||
store->size += 1024 * 1024 * 50;
|
||||
store->size += store->chunk_size;
|
||||
mdb_env_set_mapsize(store->env, store->size);
|
||||
mdb_txn_begin(store->env, NULL, 0, &txn);
|
||||
put_ret = mdb_put(txn, store->dbi, &mdb_key, &mdb_value, 0);
|
||||
@@ -82,7 +82,7 @@ void store_write(store_t *store, char *key, size_t key_len, char *buf, size_t bu
|
||||
pthread_rwlock_unlock(&store->lock);
|
||||
|
||||
if (put_ret != 0) {
|
||||
printf("%s\n", mdb_strerror(put_ret));
|
||||
LOG_ERROR("store.c", mdb_strerror(put_ret))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -111,3 +111,40 @@ char *store_read(store_t *store, char *key, size_t key_len, size_t *ret_vallen)
|
||||
return buf;
|
||||
}
|
||||
|
||||
GHashTable *store_read_all(store_t *store) {
|
||||
|
||||
int count = 0;
|
||||
|
||||
GHashTable *table = g_hash_table_new_full(g_str_hash, g_str_equal, free, free);
|
||||
|
||||
MDB_txn *txn = NULL;
|
||||
mdb_txn_begin(store->env, NULL, MDB_RDONLY, &txn);
|
||||
|
||||
MDB_cursor *cur = NULL;
|
||||
mdb_cursor_open(txn, store->dbi, &cur);
|
||||
|
||||
MDB_val key;
|
||||
MDB_val value;
|
||||
|
||||
while (mdb_cursor_get(cur, &key, &value, MDB_NEXT) == 0) {
|
||||
char *key_str = malloc(key.mv_size);
|
||||
memcpy(key_str, key.mv_data, key.mv_size);
|
||||
char *val_str = malloc(value.mv_size);
|
||||
memcpy(val_str, value.mv_data, value.mv_size);
|
||||
|
||||
g_hash_table_insert(table, key_str, val_str);
|
||||
count += 1;
|
||||
}
|
||||
|
||||
LOG_DEBUGF("store.c", "Read tags for %d documents", count);
|
||||
|
||||
mdb_cursor_close(cur);
|
||||
mdb_txn_abort(txn);
|
||||
return table;
|
||||
}
|
||||
|
||||
|
||||
void store_copy(store_t *store, const char *destination) {
|
||||
mkdir(destination, S_IWUSR | S_IRUSR | S_IXUSR);
|
||||
mdb_env_copy(store->env, destination);
|
||||
}
|
||||
|
||||
@@ -4,16 +4,20 @@
|
||||
#include <pthread.h>
|
||||
#include <lmdb.h>
|
||||
|
||||
#include <glib.h>
|
||||
|
||||
#define STORE_SIZE_TN 1024 * 1024 * 5
|
||||
#define STORE_SIZE_TAG 1024 * 16
|
||||
|
||||
typedef struct store_t {
|
||||
MDB_dbi dbi;
|
||||
MDB_env *env;
|
||||
size_t size;
|
||||
size_t chunk_size;
|
||||
pthread_rwlock_t lock;
|
||||
} store_t;
|
||||
|
||||
#include "src/sist.h"
|
||||
|
||||
store_t *store_create(char *path);
|
||||
store_t *store_create(char *path, size_t chunk_size);
|
||||
|
||||
void store_destroy(store_t *store);
|
||||
|
||||
@@ -21,4 +25,8 @@ void store_write(store_t *store, char *key, size_t key_len, char *buf, size_t bu
|
||||
|
||||
char *store_read(store_t *store, char *key, size_t key_len, size_t *ret_vallen);
|
||||
|
||||
GHashTable *store_read_all(store_t *store);
|
||||
|
||||
void store_copy(store_t *store, const char *destination);
|
||||
|
||||
#endif
|
||||
|
||||
@@ -1,5 +1,8 @@
|
||||
#include "walk.h"
|
||||
#include "src/ctx.h"
|
||||
#include "src/parsing/parse.h"
|
||||
|
||||
#include <ftw.h>
|
||||
|
||||
__always_inline
|
||||
parse_job_t *create_fs_parse_job(const char *filepath, const struct stat *info, int base) {
|
||||
@@ -15,12 +18,13 @@ parse_job_t *create_fs_parse_job(const char *filepath, const struct stat *info,
|
||||
job->ext = len;
|
||||
}
|
||||
|
||||
job->info = *info;
|
||||
job->vfile.info = *info;
|
||||
|
||||
memset(job->parent, 0, 16);
|
||||
|
||||
job->vfile.filepath = job->filepath;
|
||||
job->vfile.read = fs_read;
|
||||
job->vfile.reset = fs_reset;
|
||||
job->vfile.close = fs_close;
|
||||
job->vfile.fd = -1;
|
||||
job->vfile.is_fs_file = TRUE;
|
||||
@@ -28,8 +32,18 @@ parse_job_t *create_fs_parse_job(const char *filepath, const struct stat *info,
|
||||
return job;
|
||||
}
|
||||
|
||||
int sub_strings[30];
|
||||
#define EXCLUDED(str) (pcre_exec(ScanCtx.exclude, ScanCtx.exclude_extra, filepath, strlen(filepath), 0, 0, sub_strings, sizeof(sub_strings)) >= 0)
|
||||
|
||||
int handle_entry(const char *filepath, const struct stat *info, int typeflag, struct FTW *ftw) {
|
||||
if (ftw->level <= ScanCtx.depth && typeflag == FTW_F && S_ISREG(info->st_mode)) {
|
||||
|
||||
if (typeflag == FTW_F && S_ISREG(info->st_mode) && ftw->level <= ScanCtx.depth) {
|
||||
|
||||
if (ScanCtx.exclude != NULL && EXCLUDED(filepath)) {
|
||||
LOG_DEBUGF("walk.c", "Excluded: %s", filepath)
|
||||
return 0;
|
||||
}
|
||||
|
||||
parse_job_t *job = create_fs_parse_job(filepath, info, ftw->base);
|
||||
tpool_add_work(ScanCtx.pool, parse, job);
|
||||
}
|
||||
|
||||
@@ -3,8 +3,6 @@
|
||||
|
||||
#define _XOPEN_SOURCE 500
|
||||
|
||||
#include "src/sist.h"
|
||||
|
||||
int walk_directory_tree(const char *);
|
||||
|
||||
#endif
|
||||
|
||||
38
src/log.c
38
src/log.c
@@ -1,15 +1,17 @@
|
||||
#include "log.h"
|
||||
|
||||
#include <pthread.h>
|
||||
#include <stdarg.h>
|
||||
|
||||
const char *log_colors[] = {
|
||||
"\033[34m", "\033[01;34m", "\033[0m",
|
||||
"\033[01;33m", "\033[31m", "\033[01;31m"
|
||||
"\033[34m", "\033[01;34m", "\033[01;33m", "\033[0m", "\033[31m", "\033[01;31m"
|
||||
};
|
||||
|
||||
const char *log_levels[] = {
|
||||
"DEBUG", "INFO", "WARNING", "ERROR", "FATAL"
|
||||
};
|
||||
|
||||
void sist_logf(char *filepath, int level, char *format, ...) {
|
||||
void vsist_logf(const char *filepath, int level, char *format, va_list ap) {
|
||||
|
||||
static int is_tty = -1;
|
||||
if (is_tty == -1) {
|
||||
@@ -31,23 +33,20 @@ void sist_logf(char *filepath, int level, char *format, ...) {
|
||||
if (is_tty) {
|
||||
log_len = snprintf(
|
||||
log_str, sizeof(log_str),
|
||||
"\033[%dm[%04X]%s [%s] [%s %s] ",
|
||||
"\033[%dm[%04llX]%s [%s] [%s %s] ",
|
||||
31 + ((unsigned int) (pid)) % 7, pid, log_colors[level],
|
||||
datetime, log_levels[level], filepath
|
||||
);
|
||||
} else {
|
||||
log_len = snprintf(
|
||||
log_str, sizeof(log_str),
|
||||
"[%04X] [%s] [%s %s] ",
|
||||
"[%04llX] [%s] [%s %s] ",
|
||||
pid, datetime, log_levels[level], filepath
|
||||
);
|
||||
}
|
||||
|
||||
va_list ap;
|
||||
va_start(ap, format);
|
||||
size_t maxsize = sizeof(log_str) - log_len;
|
||||
log_len += vsnprintf(log_str + log_len, maxsize, format, ap);
|
||||
va_end(ap);
|
||||
|
||||
if (is_tty) {
|
||||
log_len += sprintf(log_str + log_len, "\033[0m\n");
|
||||
@@ -56,10 +55,20 @@ void sist_logf(char *filepath, int level, char *format, ...) {
|
||||
log_len += 1;
|
||||
}
|
||||
|
||||
write(STDERR_FILENO, log_str, log_len);
|
||||
int ret = write(STDERR_FILENO, log_str, log_len);
|
||||
if (ret == -1) {
|
||||
LOG_FATALF("serialize.c", "Could not write index descriptor: %s", strerror(errno))
|
||||
}
|
||||
}
|
||||
|
||||
void sist_log(char *filepath, int level, char *str) {
|
||||
void sist_logf(const char *filepath, int level, char *format, ...) {
|
||||
va_list ap;
|
||||
va_start(ap, format);
|
||||
vsist_logf(filepath, level, format, ap);
|
||||
va_end(ap);
|
||||
}
|
||||
|
||||
void sist_log(const char *filepath, int level, char *str) {
|
||||
|
||||
static int is_tty = -1;
|
||||
if (is_tty == -1) {
|
||||
@@ -81,7 +90,7 @@ void sist_log(char *filepath, int level, char *str) {
|
||||
if (is_tty) {
|
||||
log_len = snprintf(
|
||||
log_str, sizeof(log_str),
|
||||
"\033[%dm[%04X]%s [%s] [%s %s] %s \033[0m\n",
|
||||
"\033[%dm[%04llX]%s [%s] [%s %s] %s \033[0m\n",
|
||||
31 + ((unsigned int) (pid)) % 7, pid, log_colors[level],
|
||||
datetime, log_levels[level], filepath,
|
||||
str
|
||||
@@ -89,11 +98,14 @@ void sist_log(char *filepath, int level, char *str) {
|
||||
} else {
|
||||
log_len = snprintf(
|
||||
log_str, sizeof(log_str),
|
||||
"[%04X] [%s] [%s %s] %s \n",
|
||||
"[%04llX] [%s] [%s %s] %s \n",
|
||||
pid, datetime, log_levels[level], filepath,
|
||||
str
|
||||
);
|
||||
}
|
||||
|
||||
write(STDERR_FILENO, log_str, log_len);
|
||||
int ret = write(STDERR_FILENO, log_str, log_len);
|
||||
if (ret == -1) {
|
||||
LOG_FATALF("serialize.c", "Could not write index descriptor: %s", strerror(errno));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
#ifndef SIST2_LOG_H
|
||||
#define SIST2_LOG_H
|
||||
|
||||
|
||||
#define LOG_MAX_LENGTH 8192
|
||||
|
||||
#define SIST_DEBUG 0
|
||||
@@ -36,10 +37,11 @@
|
||||
sist_log(filepath, SIST_FATAL, str);\
|
||||
exit(-1);
|
||||
|
||||
#include "src/sist.h"
|
||||
#include "sist.h"
|
||||
|
||||
void sist_logf(char *filepath, int level, char *format, ...);
|
||||
void sist_logf(const char *filepath, int level, char *format, ...);
|
||||
void vsist_logf(const char *filepath, int level, char *format, va_list ap);
|
||||
|
||||
void sist_log(char *filepath, int level, char *str);
|
||||
void sist_log(const char *filepath, int level, char *str);
|
||||
|
||||
#endif
|
||||
|
||||
319
src/main.c
319
src/main.c
@@ -1,25 +1,35 @@
|
||||
#include "sist.h"
|
||||
#include "ctx.h"
|
||||
|
||||
#include <third-party/argparse/argparse.h>
|
||||
#include <locale.h>
|
||||
|
||||
#include "cli.h"
|
||||
#include "io/serialize.h"
|
||||
#include "io/store.h"
|
||||
#include "tpool.h"
|
||||
#include "io/walk.h"
|
||||
#include "index/elastic.h"
|
||||
#include "web/serve.h"
|
||||
#include "parsing/mime.h"
|
||||
#include "parsing/parse.h"
|
||||
|
||||
#include "stats.h"
|
||||
|
||||
#define DESCRIPTION "Lightning-fast file system indexer and search tool."
|
||||
|
||||
#define EPILOG "Made by simon987 <me@simon987.net>. Released under GPL-3.0"
|
||||
|
||||
|
||||
static const char *const Version = "1.2.12";
|
||||
static const char *const Version = "2.8.4";
|
||||
static const char *const usage[] = {
|
||||
"sist2 scan [OPTION]... PATH",
|
||||
"sist2 index [OPTION]... INDEX",
|
||||
"sist2 web [OPTION]... INDEX...",
|
||||
"sist2 exec-script [OPTION]... INDEX",
|
||||
NULL,
|
||||
};
|
||||
|
||||
void global_init() {
|
||||
curl_global_init(CURL_GLOBAL_NOTHING);
|
||||
av_log_set_level(AV_LOG_QUIET);
|
||||
opcInitLibrary();
|
||||
}
|
||||
|
||||
void init_dir(const char *dirpath) {
|
||||
char path[PATH_MAX];
|
||||
snprintf(path, PATH_MAX, "%sdescriptor.json", dirpath);
|
||||
@@ -38,31 +48,139 @@ void scan_print_header() {
|
||||
LOG_INFOF("main.c", "sist2 v%s", Version)
|
||||
}
|
||||
|
||||
void sist2_scan(scan_args_t *args) {
|
||||
void _store(char *key, size_t key_len, char *buf, size_t buf_len) {
|
||||
store_write(ScanCtx.index.store, key, key_len, buf, buf_len);
|
||||
}
|
||||
|
||||
void _log(const char *filepath, int level, char *str) {
|
||||
if (level == LEVEL_FATAL) {
|
||||
sist_log(filepath, level, str);
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
if (LogCtx.verbose) {
|
||||
if (level == LEVEL_DEBUG) {
|
||||
if (LogCtx.very_verbose) {
|
||||
sist_log(filepath, level, str);
|
||||
}
|
||||
} else {
|
||||
sist_log(filepath, level, str);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void _logf(const char *filepath, int level, char *format, ...) {
|
||||
|
||||
va_list args;
|
||||
|
||||
va_start(args, format);
|
||||
if (level == LEVEL_FATAL) {
|
||||
vsist_logf(filepath, level, format, args);
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
if (LogCtx.verbose) {
|
||||
if (level == LEVEL_DEBUG) {
|
||||
if (LogCtx.very_verbose) {
|
||||
vsist_logf(filepath, level, format, args);
|
||||
}
|
||||
} else {
|
||||
vsist_logf(filepath, level, format, args);
|
||||
}
|
||||
}
|
||||
va_end(args);
|
||||
}
|
||||
|
||||
void initialize_scan_context(scan_args_t *args) {
|
||||
|
||||
// Arc
|
||||
ScanCtx.arc_ctx.mode = args->archive_mode;
|
||||
ScanCtx.arc_ctx.log = _log;
|
||||
ScanCtx.arc_ctx.logf = _logf;
|
||||
ScanCtx.arc_ctx.parse = (parse_callback_t) parse;
|
||||
|
||||
// Comic
|
||||
ScanCtx.comic_ctx.log = _log;
|
||||
ScanCtx.comic_ctx.logf = _logf;
|
||||
ScanCtx.comic_ctx.store = _store;
|
||||
ScanCtx.comic_ctx.tn_size = args->size;
|
||||
ScanCtx.comic_ctx.tn_qscale = args->quality;
|
||||
ScanCtx.comic_ctx.cbr_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/x-cbr");
|
||||
ScanCtx.comic_ctx.cbz_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/x-cbz");
|
||||
|
||||
// Ebook
|
||||
pthread_mutex_init(&ScanCtx.ebook_ctx.mupdf_mutex, NULL);
|
||||
ScanCtx.ebook_ctx.content_size = args->content_size;
|
||||
ScanCtx.ebook_ctx.tn_size = args->size;
|
||||
ScanCtx.ebook_ctx.tesseract_lang = args->tesseract_lang;
|
||||
ScanCtx.ebook_ctx.tesseract_path = args->tesseract_path;
|
||||
ScanCtx.ebook_ctx.log = _log;
|
||||
ScanCtx.ebook_ctx.logf = _logf;
|
||||
ScanCtx.ebook_ctx.store = _store;
|
||||
|
||||
// Font
|
||||
ScanCtx.font_ctx.enable_tn = args->size > 0;
|
||||
ScanCtx.font_ctx.log = _log;
|
||||
ScanCtx.font_ctx.logf = _logf;
|
||||
ScanCtx.font_ctx.store = _store;
|
||||
|
||||
// Media
|
||||
ScanCtx.media_ctx.tn_qscale = args->quality;
|
||||
ScanCtx.media_ctx.tn_size = args->size;
|
||||
ScanCtx.media_ctx.log = _log;
|
||||
ScanCtx.media_ctx.logf = _logf;
|
||||
ScanCtx.media_ctx.store = _store;
|
||||
ScanCtx.media_ctx.max_media_buffer = (long) args->max_memory_buffer * 1024 * 1024;
|
||||
init_media();
|
||||
|
||||
// OOXML
|
||||
ScanCtx.ooxml_ctx.content_size = args->content_size;
|
||||
ScanCtx.ooxml_ctx.log = _log;
|
||||
ScanCtx.ooxml_ctx.logf = _logf;
|
||||
ScanCtx.ooxml_ctx.store = _store;
|
||||
|
||||
// MOBI
|
||||
ScanCtx.mobi_ctx.content_size = args->content_size;
|
||||
ScanCtx.mobi_ctx.log = _log;
|
||||
ScanCtx.mobi_ctx.logf = _logf;
|
||||
|
||||
// TEXT
|
||||
ScanCtx.text_ctx.content_size = args->content_size;
|
||||
ScanCtx.text_ctx.log = _log;
|
||||
ScanCtx.text_ctx.logf = _logf;
|
||||
|
||||
ScanCtx.tn_qscale = args->quality;
|
||||
ScanCtx.tn_size = args->size;
|
||||
ScanCtx.content_size = args->content_size;
|
||||
ScanCtx.threads = args->threads;
|
||||
ScanCtx.depth = args->depth;
|
||||
ScanCtx.archive_mode = args->archive_mode;
|
||||
|
||||
strncpy(ScanCtx.index.path, args->output, sizeof(ScanCtx.index.path));
|
||||
strncpy(ScanCtx.index.desc.name, args->name, sizeof(ScanCtx.index.desc.name));
|
||||
strncpy(ScanCtx.index.desc.root, args->path, sizeof(ScanCtx.index.desc.root));
|
||||
strncpy(ScanCtx.index.desc.rewrite_url, args->rewrite_url, sizeof(ScanCtx.index.desc.rewrite_url));
|
||||
ScanCtx.index.desc.root_len = (short) strlen(ScanCtx.index.desc.root);
|
||||
ScanCtx.tesseract_lang = args->tesseract_lang;
|
||||
ScanCtx.tesseract_path = args->tesseract_path;
|
||||
ScanCtx.fast = args->fast;
|
||||
|
||||
init_dir(ScanCtx.index.path);
|
||||
// Raw
|
||||
ScanCtx.raw_ctx.tn_qscale = args->quality;
|
||||
ScanCtx.raw_ctx.tn_size = args->size;
|
||||
ScanCtx.raw_ctx.log = _log;
|
||||
ScanCtx.raw_ctx.logf = _logf;
|
||||
ScanCtx.raw_ctx.store = _store;
|
||||
}
|
||||
|
||||
|
||||
void sist2_scan(scan_args_t *args) {
|
||||
|
||||
ScanCtx.mime_table = mime_get_mime_table();
|
||||
ScanCtx.ext_table = mime_get_ext_table();
|
||||
|
||||
initialize_scan_context(args);
|
||||
|
||||
init_dir(ScanCtx.index.path);
|
||||
|
||||
char store_path[PATH_MAX];
|
||||
snprintf(store_path, PATH_MAX, "%sthumbs", ScanCtx.index.path);
|
||||
mkdir(store_path, S_IWUSR | S_IRUSR | S_IXUSR);
|
||||
ScanCtx.index.store = store_create(store_path);
|
||||
ScanCtx.index.store = store_create(store_path, STORE_SIZE_TN);
|
||||
|
||||
scan_print_header();
|
||||
|
||||
@@ -72,9 +190,18 @@ void sist2_scan(scan_args_t *args) {
|
||||
|
||||
DIR *dir = opendir(args->incremental);
|
||||
if (dir == NULL) {
|
||||
perror("opendir");
|
||||
return;
|
||||
LOG_FATALF("main.c", "Could not open original index for incremental scan: %s", strerror(errno))
|
||||
}
|
||||
|
||||
char descriptor_path[PATH_MAX];
|
||||
snprintf(descriptor_path, PATH_MAX, "%s/descriptor.json", args->incremental);
|
||||
index_descriptor_t original_desc = read_index_descriptor(descriptor_path);
|
||||
|
||||
if (strcmp(original_desc.version, Version) != 0) {
|
||||
LOG_FATALF("main.c", "Version mismatch! Index is %s but executable is %s/%s", original_desc.version,
|
||||
Version, INDEX_VERSION_EXTERNAL)
|
||||
}
|
||||
|
||||
struct dirent *de;
|
||||
while ((de = readdir(dir)) != NULL) {
|
||||
if (strncmp(de->d_name, "_index_", sizeof("_index_") - 1) == 0) {
|
||||
@@ -85,20 +212,22 @@ void sist2_scan(scan_args_t *args) {
|
||||
}
|
||||
closedir(dir);
|
||||
|
||||
printf("Loaded %d items in to mtime table.", g_hash_table_size(ScanCtx.original_table));
|
||||
LOG_INFOF("main.c", "Loaded %d items in to mtime table.", g_hash_table_size(ScanCtx.original_table))
|
||||
}
|
||||
|
||||
ScanCtx.pool = tpool_create(args->threads, thread_cleanup);
|
||||
ScanCtx.pool = tpool_create(args->threads, thread_cleanup, TRUE);
|
||||
tpool_start(ScanCtx.pool);
|
||||
walk_directory_tree(ScanCtx.index.desc.root);
|
||||
tpool_wait(ScanCtx.pool);
|
||||
tpool_destroy(ScanCtx.pool);
|
||||
|
||||
generate_stats(&ScanCtx.index, args->treemap_threshold, ScanCtx.index.path);
|
||||
|
||||
if (args->incremental != NULL) {
|
||||
char dst_path[PATH_MAX];
|
||||
snprintf(store_path, PATH_MAX, "%sthumbs", args->incremental);
|
||||
snprintf(dst_path, PATH_MAX, "%s_index_original", ScanCtx.index.path);
|
||||
store_t *source = store_create(store_path);
|
||||
store_t *source = store_create(store_path, STORE_SIZE_TN);
|
||||
|
||||
DIR *dir = opendir(args->incremental);
|
||||
if (dir == NULL) {
|
||||
@@ -115,6 +244,13 @@ void sist2_scan(scan_args_t *args) {
|
||||
}
|
||||
closedir(dir);
|
||||
store_destroy(source);
|
||||
|
||||
snprintf(store_path, PATH_MAX, "%stags", args->incremental);
|
||||
snprintf(dst_path, PATH_MAX, "%stags", ScanCtx.index.path);
|
||||
mkdir(store_path, S_IWUSR | S_IRUSR | S_IXUSR);
|
||||
store_t *source_tags = store_create(store_path, STORE_SIZE_TAG);
|
||||
store_copy(source_tags, dst_path);
|
||||
store_destroy(source_tags);
|
||||
}
|
||||
|
||||
store_destroy(ScanCtx.index.store);
|
||||
@@ -123,6 +259,7 @@ void sist2_scan(scan_args_t *args) {
|
||||
void sist2_index(index_args_t *args) {
|
||||
|
||||
IndexCtx.es_url = args->es_url;
|
||||
IndexCtx.es_index = args->es_index;
|
||||
IndexCtx.batch_size = args->batch_size;
|
||||
|
||||
if (!args->print) {
|
||||
@@ -137,17 +274,21 @@ void sist2_index(index_args_t *args) {
|
||||
LOG_DEBUGF("main.c", "descriptor version %s (%s)", desc.version, desc.type)
|
||||
|
||||
if (strcmp(desc.version, Version) != 0 && strcmp(desc.version, INDEX_VERSION_EXTERNAL) != 0) {
|
||||
fprintf(stderr, "Version mismatch! Index is %s but executable is %s/%s\n",
|
||||
desc.version, Version, INDEX_VERSION_EXTERNAL);
|
||||
return;
|
||||
LOG_FATALF("main.c", "Version mismatch! Index is %s but executable is %s/%s", desc.version, Version,
|
||||
INDEX_VERSION_EXTERNAL)
|
||||
}
|
||||
|
||||
DIR *dir = opendir(args->index_path);
|
||||
if (dir == NULL) {
|
||||
perror("opendir");
|
||||
return;
|
||||
LOG_FATALF("main.c", "Could not open index %s: %s", args->index_path, strerror(errno))
|
||||
}
|
||||
|
||||
char path_tmp[PATH_MAX];
|
||||
snprintf(path_tmp, sizeof(path_tmp), "%s/tags", args->index_path);
|
||||
mkdir(path_tmp, S_IWUSR | S_IRUSR | S_IXUSR);
|
||||
IndexCtx.tag_store = store_create(path_tmp, STORE_SIZE_TAG);
|
||||
IndexCtx.tags = store_read_all(IndexCtx.tag_store);
|
||||
|
||||
index_func f;
|
||||
if (args->print) {
|
||||
f = print_json;
|
||||
@@ -155,6 +296,16 @@ void sist2_index(index_args_t *args) {
|
||||
f = index_json;
|
||||
}
|
||||
|
||||
void (*cleanup)();
|
||||
if (args->print) {
|
||||
cleanup = NULL;
|
||||
} else {
|
||||
cleanup = elastic_cleanup;
|
||||
}
|
||||
|
||||
IndexCtx.pool = tpool_create(args->threads, cleanup, FALSE);
|
||||
tpool_start(IndexCtx.pool);
|
||||
|
||||
struct dirent *de;
|
||||
while ((de = readdir(dir)) != NULL) {
|
||||
if (strncmp(de->d_name, "_index_", sizeof("_index_") - 1) == 0) {
|
||||
@@ -165,17 +316,44 @@ void sist2_index(index_args_t *args) {
|
||||
}
|
||||
closedir(dir);
|
||||
|
||||
tpool_wait(IndexCtx.pool);
|
||||
|
||||
tpool_destroy(IndexCtx.pool);
|
||||
|
||||
if (!args->print) {
|
||||
elastic_flush();
|
||||
destroy_indexer(args->script, desc.uuid);
|
||||
finish_indexer(args->script, args->async_script, desc.uuid);
|
||||
}
|
||||
|
||||
store_destroy(IndexCtx.tag_store);
|
||||
g_hash_table_remove_all(IndexCtx.tags);
|
||||
g_hash_table_destroy(IndexCtx.tags);
|
||||
}
|
||||
|
||||
void sist2_exec_script(exec_args_t *args) {
|
||||
|
||||
LogCtx.verbose = TRUE;
|
||||
|
||||
char descriptor_path[PATH_MAX];
|
||||
snprintf(descriptor_path, PATH_MAX, "%s/descriptor.json", args->index_path);
|
||||
index_descriptor_t desc = read_index_descriptor(descriptor_path);
|
||||
|
||||
IndexCtx.es_url = args->es_url;
|
||||
|
||||
LOG_DEBUGF("main.c", "descriptor version %s (%s)", desc.version, desc.type)
|
||||
|
||||
execute_update_script(args->script, args->async_script, desc.uuid);
|
||||
free(args->script);
|
||||
}
|
||||
|
||||
void sist2_web(web_args_t *args) {
|
||||
|
||||
WebCtx.es_url = args->es_url;
|
||||
WebCtx.es_index = args->es_index;
|
||||
WebCtx.index_count = args->index_count;
|
||||
WebCtx.b64credentials = args->b64credentials;
|
||||
WebCtx.auth_user = args->auth_user;
|
||||
WebCtx.auth_pass = args->auth_pass;
|
||||
WebCtx.auth_enabled = args->auth_enabled;
|
||||
WebCtx.tag_auth_enabled = args->tag_auth_enabled;
|
||||
|
||||
for (int i = 0; i < args->index_count; i++) {
|
||||
char *abs_path = abspath(args->indices[i]);
|
||||
@@ -185,7 +363,11 @@ void sist2_web(web_args_t *args) {
|
||||
char path_tmp[PATH_MAX];
|
||||
|
||||
snprintf(path_tmp, PATH_MAX, "%sthumbs", abs_path);
|
||||
WebCtx.indices[i].store = store_create(path_tmp);
|
||||
WebCtx.indices[i].store = store_create(path_tmp, STORE_SIZE_TN);
|
||||
|
||||
snprintf(path_tmp, PATH_MAX, "%stags", abs_path);
|
||||
mkdir(path_tmp, S_IWUSR | S_IRUSR | S_IXUSR);
|
||||
WebCtx.indices[i].tag_store = store_create(path_tmp, STORE_SIZE_TAG);
|
||||
|
||||
snprintf(path_tmp, PATH_MAX, "%sdescriptor.json", abs_path);
|
||||
WebCtx.indices[i].desc = read_index_descriptor(path_tmp);
|
||||
@@ -195,21 +377,25 @@ void sist2_web(web_args_t *args) {
|
||||
free(abs_path);
|
||||
}
|
||||
|
||||
serve(args->bind, args->port);
|
||||
serve(args->listen_address);
|
||||
}
|
||||
|
||||
|
||||
int main(int argc, const char *argv[]) {
|
||||
|
||||
global_init();
|
||||
setlocale(LC_ALL, "");
|
||||
|
||||
scan_args_t *scan_args = scan_args_create();
|
||||
index_args_t *index_args = index_args_create();
|
||||
web_args_t *web_args = web_args_create();
|
||||
exec_args_t *exec_args = exec_args_create();
|
||||
|
||||
int arg_version = 0;
|
||||
|
||||
char *common_es_url = NULL;
|
||||
char *common_es_index = NULL;
|
||||
char *common_script_path = NULL;
|
||||
int common_async_script = 0;
|
||||
int common_threads = 0;
|
||||
|
||||
struct argparse_option options[] = {
|
||||
OPT_HELP(),
|
||||
@@ -219,7 +405,7 @@ int main(int argc, const char *argv[]) {
|
||||
OPT_BOOLEAN(0, "very-verbose", &LogCtx.very_verbose, "Turn on debug messages"),
|
||||
|
||||
OPT_GROUP("Scan options"),
|
||||
OPT_INTEGER('t', "threads", &scan_args->threads, "Number of threads. DEFAULT=1"),
|
||||
OPT_INTEGER('t', "threads", &common_threads, "Number of threads. DEFAULT=1"),
|
||||
OPT_FLOAT('q', "quality", &scan_args->quality,
|
||||
"Thumbnail quality, on a scale of 1.0 to 31.0, 1.0 being the best. DEFAULT=5"),
|
||||
OPT_INTEGER(0, "size", &scan_args->size,
|
||||
@@ -238,20 +424,37 @@ int main(int argc, const char *argv[]) {
|
||||
"shallow: Don't parse archives inside archives. DEFAULT: recurse"),
|
||||
OPT_STRING(0, "ocr", &scan_args->tesseract_lang, "Tesseract language (use tesseract --list-langs to see "
|
||||
"which are installed on your machine)"),
|
||||
OPT_STRING('e', "exclude", &scan_args->exclude_regex, "Files that match this regex will not be scanned"),
|
||||
OPT_BOOLEAN(0, "fast", &scan_args->fast, "Only index file names & mime type"),
|
||||
OPT_STRING(0, "treemap-threshold", &scan_args->treemap_threshold_str, "Relative size threshold for treemap "
|
||||
"(see USAGE.md). DEFAULT: 0.0005"),
|
||||
OPT_INTEGER(0, "mem-buffer", &scan_args->max_memory_buffer,
|
||||
"Maximum memory buffer size per thread in MB for files inside archives "
|
||||
"(see USAGE.md). DEFAULT: 2000"),
|
||||
|
||||
OPT_GROUP("Index options"),
|
||||
OPT_INTEGER('t', "threads", &common_threads, "Number of threads. DEFAULT=1"),
|
||||
OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url with port. DEFAULT=http://localhost:9200"),
|
||||
OPT_STRING(0, "es-index", &common_es_index, "Elasticsearch index name. DEFAULT=sist2"),
|
||||
OPT_BOOLEAN('p', "print", &index_args->print, "Just print JSON documents to stdout."),
|
||||
OPT_STRING(0, "script-file", &index_args->script_path, "Path to user script."),
|
||||
OPT_STRING(0, "script-file", &common_script_path, "Path to user script."),
|
||||
OPT_BOOLEAN(0, "async-script", &common_async_script, "Execute user script asynchronously."),
|
||||
OPT_INTEGER(0, "batch-size", &index_args->batch_size, "Index batch size. DEFAULT: 100"),
|
||||
OPT_BOOLEAN('f', "force-reset", &index_args->force_reset, "Reset Elasticsearch mappings and settings. "
|
||||
"(You must use this option the first time you use the index command)"),
|
||||
|
||||
OPT_GROUP("Web options"),
|
||||
OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url. DEFAULT=http://localhost:9200"),
|
||||
OPT_STRING(0, "bind", &web_args->bind, "Listen on this address. DEFAULT=localhost"),
|
||||
OPT_STRING(0, "port", &web_args->port, "Listen on this port. DEFAULT=4090"),
|
||||
OPT_STRING(0, "es-index", &common_es_index, "Elasticsearch index name. DEFAULT=sist2"),
|
||||
OPT_STRING(0, "bind", &web_args->listen_address, "Listen on this address. DEFAULT=localhost:4090"),
|
||||
OPT_STRING(0, "auth", &web_args->credentials, "Basic auth in user:password format"),
|
||||
OPT_STRING(0, "tag-auth", &web_args->tag_credentials, "Basic auth in user:password format for tagging"),
|
||||
|
||||
OPT_GROUP("Exec-script options"),
|
||||
OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url. DEFAULT=http://localhost:9200"),
|
||||
OPT_STRING(0, "es-index", &common_es_index, "Elasticsearch index name. DEFAULT=sist2"),
|
||||
OPT_STRING(0, "script-file", &common_script_path, "Path to user script."),
|
||||
OPT_BOOLEAN(0, "async-script", &common_async_script, "Execute user script asynchronously."),
|
||||
|
||||
OPT_END(),
|
||||
};
|
||||
@@ -263,7 +466,7 @@ int main(int argc, const char *argv[]) {
|
||||
|
||||
if (arg_version) {
|
||||
printf(Version);
|
||||
exit(0);
|
||||
goto end;
|
||||
}
|
||||
|
||||
if (LogCtx.very_verbose != 0) {
|
||||
@@ -272,25 +475,35 @@ int main(int argc, const char *argv[]) {
|
||||
|
||||
web_args->es_url = common_es_url;
|
||||
index_args->es_url = common_es_url;
|
||||
exec_args->es_url = common_es_url;
|
||||
|
||||
web_args->es_index = common_es_index;
|
||||
index_args->es_index = common_es_index;
|
||||
exec_args->es_index = common_es_index;
|
||||
|
||||
index_args->script_path = common_script_path;
|
||||
exec_args->script_path = common_script_path;
|
||||
index_args->threads = common_threads;
|
||||
scan_args->threads = common_threads;
|
||||
exec_args->async_script = common_async_script;
|
||||
index_args->async_script = common_async_script;
|
||||
|
||||
if (argc == 0) {
|
||||
argparse_usage(&argparse);
|
||||
return 1;
|
||||
goto end;
|
||||
} else if (strcmp(argv[0], "scan") == 0) {
|
||||
|
||||
int err = scan_args_validate(scan_args, argc, argv);
|
||||
if (err != 0) {
|
||||
return err;
|
||||
goto end;
|
||||
}
|
||||
sist2_scan(scan_args);
|
||||
|
||||
}
|
||||
|
||||
else if (strcmp(argv[0], "index") == 0) {
|
||||
} else if (strcmp(argv[0], "index") == 0) {
|
||||
|
||||
int err = index_args_validate(index_args, argc, argv);
|
||||
if (err != 0) {
|
||||
return err;
|
||||
goto end;
|
||||
}
|
||||
sist2_index(index_args);
|
||||
|
||||
@@ -298,22 +511,30 @@ int main(int argc, const char *argv[]) {
|
||||
|
||||
int err = web_args_validate(web_args, argc, argv);
|
||||
if (err != 0) {
|
||||
return err;
|
||||
goto end;
|
||||
}
|
||||
sist2_web(web_args);
|
||||
|
||||
}
|
||||
else {
|
||||
} else if (strcmp(argv[0], "exec-script") == 0) {
|
||||
|
||||
int err = exec_args_validate(exec_args, argc, argv);
|
||||
if (err != 0) {
|
||||
goto end;
|
||||
}
|
||||
sist2_exec_script(exec_args);
|
||||
|
||||
} else {
|
||||
fprintf(stderr, "Invalid command: '%s'\n", argv[0]);
|
||||
argparse_usage(&argparse);
|
||||
return 1;
|
||||
goto end;
|
||||
}
|
||||
printf("\n");
|
||||
|
||||
end:
|
||||
scan_args_destroy(scan_args);
|
||||
|
||||
index_args_destroy(index_args);
|
||||
web_args_destroy(web_args);
|
||||
exec_args_destroy(exec_args);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -1,157 +0,0 @@
|
||||
#include "arc.h"
|
||||
#include "src/ctx.h"
|
||||
|
||||
#define ARC_BUF_SIZE 8192
|
||||
|
||||
int should_parse_filtered_file(const char *filepath, int ext) {
|
||||
char tmp[PATH_MAX * 2];
|
||||
|
||||
if (ext == 0) {
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
memcpy(tmp, filepath, ext - 1);
|
||||
*(tmp + ext - 1) = '\0';
|
||||
|
||||
char *idx = strrchr(tmp, '.');
|
||||
|
||||
if (idx == NULL) {
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
if (strcmp(idx, ".tar") == 0) {
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
int arc_read(struct vfile *f, void *buf, size_t size) {
|
||||
return archive_read_data(f->arc, buf, size);
|
||||
}
|
||||
|
||||
typedef struct arc_data {
|
||||
vfile_t *f;
|
||||
char buf[ARC_BUF_SIZE];
|
||||
} arc_data_f;
|
||||
|
||||
int vfile_open_callback(struct archive *a, void *user_data) {
|
||||
arc_data_f *data = user_data;
|
||||
|
||||
if (data->f->is_fs_file && data->f->fd == -1) {
|
||||
data->f->fd = open(data->f->filepath, O_RDONLY);
|
||||
}
|
||||
|
||||
return ARCHIVE_OK;
|
||||
}
|
||||
|
||||
long vfile_read_callback(struct archive *a, void *user_data, const void **buf) {
|
||||
arc_data_f *data = user_data;
|
||||
|
||||
*buf = data->buf;
|
||||
return data->f->read(data->f, data->buf, ARC_BUF_SIZE);
|
||||
}
|
||||
|
||||
int vfile_close_callback(struct archive *a, void *user_data) {
|
||||
arc_data_f *data = user_data;
|
||||
|
||||
if (data->f->close != NULL) {
|
||||
data->f->close(data->f);
|
||||
}
|
||||
|
||||
return ARCHIVE_OK;
|
||||
}
|
||||
|
||||
void parse_archive(vfile_t *f, document_t *doc) {
|
||||
|
||||
struct archive *a;
|
||||
struct archive_entry *entry;
|
||||
|
||||
|
||||
arc_data_f data;
|
||||
data.f = f;
|
||||
|
||||
int ret = 0;
|
||||
if (data.f->is_fs_file) {
|
||||
|
||||
a = archive_read_new();
|
||||
archive_read_support_filter_all(a);
|
||||
archive_read_support_format_all(a);
|
||||
|
||||
ret = archive_read_open_filename(a, doc->filepath, ARC_BUF_SIZE);
|
||||
} else if (ScanCtx.archive_mode == ARC_MODE_RECURSE) {
|
||||
|
||||
a = archive_read_new();
|
||||
archive_read_support_filter_all(a);
|
||||
archive_read_support_format_all(a);
|
||||
|
||||
ret = archive_read_open(
|
||||
a, &data,
|
||||
vfile_open_callback,
|
||||
vfile_read_callback,
|
||||
vfile_close_callback
|
||||
);
|
||||
} else {
|
||||
return;
|
||||
}
|
||||
|
||||
if (ret != ARCHIVE_OK) {
|
||||
LOG_ERRORF(doc->filepath, "(arc.c) [%d] %s", ret, archive_error_string(a))
|
||||
archive_read_free(a);
|
||||
return;
|
||||
}
|
||||
|
||||
if (ScanCtx.archive_mode == ARC_MODE_LIST) {
|
||||
|
||||
dyn_buffer_t buf = dyn_buffer_create();
|
||||
|
||||
while (archive_read_next_header(a, &entry) == ARCHIVE_OK) {
|
||||
if (S_ISREG(archive_entry_stat(entry)->st_mode)) {
|
||||
|
||||
char *path = (char *) archive_entry_pathname(entry);
|
||||
|
||||
dyn_buffer_append_string(&buf, path);
|
||||
dyn_buffer_write_char(&buf, '\n');
|
||||
}
|
||||
}
|
||||
dyn_buffer_write_char(&buf, '\0');
|
||||
|
||||
meta_line_t *meta_list = malloc(sizeof(meta_line_t) + buf.cur);
|
||||
meta_list->key = MetaContent;
|
||||
strcpy(meta_list->strval, buf.buf);
|
||||
APPEND_META(doc, meta_list);
|
||||
dyn_buffer_destroy(&buf);
|
||||
|
||||
} else {
|
||||
|
||||
parse_job_t *sub_job = malloc(sizeof(parse_job_t) + PATH_MAX * 2);
|
||||
|
||||
sub_job->vfile.close = NULL;
|
||||
sub_job->vfile.read = arc_read;
|
||||
sub_job->vfile.arc = a;
|
||||
sub_job->vfile.filepath = sub_job->filepath;
|
||||
sub_job->vfile.is_fs_file = FALSE;
|
||||
memcpy(sub_job->parent, doc->uuid, sizeof(uuid_t));
|
||||
|
||||
while (archive_read_next_header(a, &entry) == ARCHIVE_OK) {
|
||||
sub_job->info = *archive_entry_stat(entry);
|
||||
if (S_ISREG(sub_job->info.st_mode)) {
|
||||
sprintf(sub_job->filepath, "%s#/%s", f->filepath, archive_entry_pathname(entry));
|
||||
sub_job->base = (int) (strrchr(sub_job->filepath, '/') - sub_job->filepath) + 1;
|
||||
|
||||
char *p = strrchr(sub_job->filepath, '.');
|
||||
if (p != NULL) {
|
||||
sub_job->ext = (int) (p - sub_job->filepath + 1);
|
||||
} else {
|
||||
sub_job->ext = (int) strlen(sub_job->filepath);
|
||||
}
|
||||
|
||||
parse(sub_job);
|
||||
}
|
||||
}
|
||||
|
||||
free(sub_job);
|
||||
}
|
||||
|
||||
archive_read_free(a);
|
||||
}
|
||||
@@ -1,12 +0,0 @@
|
||||
#ifndef SIST2_ARC_H
|
||||
#define SIST2_ARC_H
|
||||
|
||||
#include "src/sist.h"
|
||||
|
||||
int should_parse_filtered_file(const char *filepath, int ext);
|
||||
|
||||
void parse_archive(vfile_t *f, document_t *doc);
|
||||
|
||||
int arc_read(struct vfile * f, void *buf, size_t size);
|
||||
|
||||
#endif
|
||||
@@ -1,129 +0,0 @@
|
||||
#include "doc.h"
|
||||
#include "src/ctx.h"
|
||||
|
||||
int dump_text(mceTextReader_t *reader, dyn_buffer_t *buf) {
|
||||
|
||||
mce_skip_attributes(reader);
|
||||
|
||||
xmlErrorPtr err = xmlGetLastError();
|
||||
if (err != NULL) {
|
||||
if (err->level == XML_ERR_FATAL) {
|
||||
LOG_ERRORF("doc.c", "Got fatal XML error while parsing document: %s", err->message)
|
||||
return -1;
|
||||
} else {
|
||||
LOG_ERRORF("doc.c", "Got recoverable XML error while parsing document: %s", err->message)
|
||||
}
|
||||
}
|
||||
|
||||
mce_start_children(reader) {
|
||||
mce_start_element(reader, NULL, _X("t")) {
|
||||
mce_skip_attributes(reader);
|
||||
mce_start_children(reader) {
|
||||
mce_start_text(reader) {
|
||||
char *str = (char *) xmlTextReaderConstValue(reader->reader);
|
||||
dyn_buffer_append_string(buf, str);
|
||||
dyn_buffer_write_char(buf, ' ');
|
||||
} mce_end_text(reader);
|
||||
} mce_end_children(reader);
|
||||
} mce_end_element(reader);
|
||||
|
||||
mce_start_element(reader, NULL, NULL) {
|
||||
int ret = dump_text(reader, buf);
|
||||
if (ret != 0) {
|
||||
return ret;
|
||||
}
|
||||
} mce_end_element(reader);
|
||||
|
||||
} mce_end_children(reader)
|
||||
return 0;
|
||||
}
|
||||
|
||||
__always_inline
|
||||
int should_read_part(opcPart part) {
|
||||
|
||||
char *part_name = (char *) part;
|
||||
|
||||
if (part == NULL) {
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
if ( // Word
|
||||
strcmp(part_name, "word/document.xml") == 0
|
||||
|| strncmp(part_name, "word/footer", sizeof("word/footer") - 1) == 0
|
||||
|| strncmp(part_name, "word/header", sizeof("word/header") - 1) == 0
|
||||
// PowerPoint
|
||||
|| strncmp(part_name, "ppt/slides/slide", sizeof("ppt/slides/slide") - 1) == 0
|
||||
|| strncmp(part_name, "ppt/notesSlides/notesSlide", sizeof("ppt/notesSlides/notesSlide") - 1) == 0
|
||||
// Excel
|
||||
|| strncmp(part_name, "xl/worksheets/sheet", sizeof("xl/worksheets/sheet") - 1) == 0
|
||||
|| strcmp(part_name, "xl/sharedStrings.xml") == 0
|
||||
|| strcmp(part_name, "xl/workbook.xml") == 0
|
||||
) {
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
__always_inline
|
||||
int read_part(opcContainer *c, dyn_buffer_t *buf, opcPart part, document_t *doc) {
|
||||
|
||||
mceTextReader_t reader;
|
||||
int ret = opcXmlReaderOpen(c, &reader, part, NULL, "UTF-8", XML_PARSE_NOWARNING | XML_PARSE_NOERROR | XML_PARSE_NONET);
|
||||
|
||||
if (ret != OPC_ERROR_NONE) {
|
||||
LOG_ERRORF(doc->filepath, "(doc.c) opcXmlReaderOpen() returned error code %d", ret);
|
||||
return -1;
|
||||
}
|
||||
|
||||
mce_start_document(&reader) {
|
||||
mce_start_element(&reader, NULL, NULL) {
|
||||
ret = dump_text(&reader, buf);
|
||||
if (ret != 0) {
|
||||
mceTextReaderCleanup(&reader);
|
||||
return -1;
|
||||
}
|
||||
} mce_end_element(&reader);
|
||||
} mce_end_document(&reader);
|
||||
|
||||
mceTextReaderCleanup(&reader);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void parse_doc(void *mem, size_t mem_len, document_t *doc) {
|
||||
|
||||
if (mem == NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
opcContainer *c = opcContainerOpenMem(mem, mem_len, OPC_OPEN_READ_ONLY, NULL);
|
||||
if (c == NULL) {
|
||||
LOG_ERROR(doc->filepath, "(doc.c) Couldn't open document with opcContainerOpenMem()");
|
||||
return;
|
||||
}
|
||||
|
||||
dyn_buffer_t buf = dyn_buffer_create();
|
||||
|
||||
opcPart part = opcPartGetFirst(c);
|
||||
do {
|
||||
if (should_read_part(part)) {
|
||||
int ret = read_part(c, &buf, part, doc);
|
||||
if (ret != 0) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
} while ((part = opcPartGetNext(c, part)));
|
||||
|
||||
opcContainerClose(c, OPC_CLOSE_NOW);
|
||||
|
||||
if (buf.cur > 0) {
|
||||
dyn_buffer_write_char(&buf, '\0');
|
||||
|
||||
meta_line_t *meta = malloc(sizeof(meta_line_t) + buf.cur);
|
||||
meta->key = MetaContent;
|
||||
strcpy(meta->strval, buf.buf);
|
||||
APPEND_META(doc, meta)
|
||||
}
|
||||
|
||||
dyn_buffer_destroy(&buf);
|
||||
}
|
||||
@@ -1,8 +0,0 @@
|
||||
#ifndef SIST2_DOC_H
|
||||
#define SIST2_DOC_H
|
||||
|
||||
#include "src/sist.h"
|
||||
|
||||
void parse_doc(void *buf, size_t buf_len, document_t *doc);
|
||||
|
||||
#endif
|
||||
@@ -1,226 +0,0 @@
|
||||
#include "font.h"
|
||||
|
||||
|
||||
#include "src/ctx.h"
|
||||
|
||||
__thread FT_Library ft_lib = NULL;
|
||||
|
||||
|
||||
typedef struct text_dimensions {
|
||||
unsigned int width;
|
||||
unsigned int height;
|
||||
unsigned int baseline;
|
||||
} text_dimensions_t;
|
||||
|
||||
typedef struct glyph {
|
||||
int top;
|
||||
int height;
|
||||
int width;
|
||||
int descent;
|
||||
int ascent;
|
||||
int advance_width;
|
||||
unsigned char *pixmap;
|
||||
} glyph_t;
|
||||
|
||||
|
||||
__always_inline
|
||||
int kerning_offset(char c, char pc, FT_Face face) {
|
||||
FT_Vector kerning;
|
||||
FT_Get_Kerning(face, c, pc, FT_KERNING_DEFAULT, &kerning);
|
||||
|
||||
return (int) (kerning.x / 64);
|
||||
}
|
||||
|
||||
__always_inline
|
||||
glyph_t ft_glyph_to_glyph(FT_GlyphSlot slot) {
|
||||
glyph_t glyph;
|
||||
|
||||
glyph.pixmap = slot->bitmap.buffer;
|
||||
|
||||
glyph.width = (int) slot->bitmap.width;
|
||||
glyph.height = (int) slot->bitmap.rows;
|
||||
glyph.top = slot->bitmap_top;
|
||||
glyph.advance_width = (int) slot->advance.x / 64;
|
||||
|
||||
glyph.descent = MAX(0, glyph.height - glyph.top);
|
||||
glyph.ascent = MAX(0, MAX(glyph.top, glyph.height) - glyph.descent);
|
||||
|
||||
return glyph;
|
||||
}
|
||||
|
||||
text_dimensions_t text_dimension(char *text, FT_Face face) {
|
||||
text_dimensions_t dimensions;
|
||||
|
||||
dimensions.width = 0;
|
||||
|
||||
int num_chars = (int) strlen(text);
|
||||
|
||||
unsigned int max_ascent = 0;
|
||||
int max_descent = 0;
|
||||
|
||||
char pc = 0;
|
||||
for (int i = 0; i < num_chars; i++) {
|
||||
char c = text[i];
|
||||
|
||||
FT_Load_Char(face, c, 0);
|
||||
glyph_t glyph = ft_glyph_to_glyph(face->glyph);
|
||||
|
||||
max_descent = MAX(max_descent, glyph.descent);
|
||||
max_ascent = MAX(max_ascent, MAX(glyph.height, glyph.ascent));
|
||||
|
||||
int kerning_x = kerning_offset(c, pc, face);
|
||||
dimensions.width += MAX(glyph.advance_width, glyph.width) + kerning_x;
|
||||
|
||||
pc = c;
|
||||
}
|
||||
|
||||
dimensions.height = max_ascent + max_descent;
|
||||
dimensions.baseline = max_descent;
|
||||
|
||||
return dimensions;
|
||||
}
|
||||
|
||||
void draw_glyph(glyph_t *glyph, int x, int y, struct text_dimensions text_info, unsigned char *bitmap) {
|
||||
unsigned int src = 0;
|
||||
unsigned int dst = y * text_info.width + x;
|
||||
unsigned int row_offset = text_info.width - glyph->width;
|
||||
unsigned int buf_len = text_info.width * text_info.height;
|
||||
|
||||
for (unsigned int sy = 0; sy < glyph->height; sy++) {
|
||||
for (unsigned int sx = 0; sx < glyph->width; sx++) {
|
||||
if (dst < buf_len) {
|
||||
bitmap[dst] |= glyph->pixmap[src];
|
||||
}
|
||||
src++;
|
||||
dst++;
|
||||
}
|
||||
dst += row_offset;
|
||||
}
|
||||
}
|
||||
|
||||
void bmp_format(dyn_buffer_t *buf, text_dimensions_t dimensions, const unsigned char *bitmap) {
|
||||
|
||||
dyn_buffer_write_short(buf, 0x4D42); // Magic
|
||||
dyn_buffer_write_int(buf, 0); // Size placeholder
|
||||
dyn_buffer_write_int(buf, 0x5157); //Reserved
|
||||
dyn_buffer_write_int(buf, 14 + 40 + 256 * 4); // pixels offset
|
||||
|
||||
dyn_buffer_write_int(buf, 40); // DIB size
|
||||
dyn_buffer_write_int(buf, (int) dimensions.width);
|
||||
dyn_buffer_write_int(buf, (int) dimensions.height);
|
||||
dyn_buffer_write_short(buf, 1); // Color planes
|
||||
dyn_buffer_write_short(buf, 8); // bits per pixel
|
||||
dyn_buffer_write_int(buf, 0); // compression
|
||||
dyn_buffer_write_int(buf, 0); // Ignored
|
||||
dyn_buffer_write_int(buf, 3800); // hres
|
||||
dyn_buffer_write_int(buf, 3800); // vres
|
||||
dyn_buffer_write_int(buf, 256); // Color count
|
||||
dyn_buffer_write_int(buf, 0); // Ignored
|
||||
|
||||
// RGBA32 Color table (Grayscale)
|
||||
for (int i = 255; i >= 0; i--) {
|
||||
dyn_buffer_write_int(buf, i + (i << 8) + (i << 16));
|
||||
}
|
||||
|
||||
// Pixel array: write from bottom to top, with rows padded to multiples of 4-bytes
|
||||
for (int y = (int) dimensions.height - 1; y >= 0; y--) {
|
||||
for (unsigned int x = 0; x < dimensions.width; x++) {
|
||||
dyn_buffer_write_char(buf, (char) bitmap[y * dimensions.width + x]);
|
||||
}
|
||||
while (buf->cur % 4 != 0) {
|
||||
dyn_buffer_write_char(buf, 0);
|
||||
}
|
||||
}
|
||||
|
||||
// Size
|
||||
*(int *) ((char *) buf->buf + 2) = buf->cur;
|
||||
}
|
||||
|
||||
void parse_font(const char *buf, size_t buf_len, document_t *doc) {
|
||||
if (ft_lib == NULL) {
|
||||
FT_Init_FreeType(&ft_lib);
|
||||
}
|
||||
if (buf == NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
FT_Face face;
|
||||
FT_Error err = FT_New_Memory_Face(ft_lib, (unsigned char *) buf, buf_len, 0, &face);
|
||||
if (err != 0) {
|
||||
LOG_ERRORF(doc->filepath, "(font.c) FT_New_Memory_Face() returned error code [%d] %s", err, ft_error_string(err));
|
||||
return;
|
||||
}
|
||||
|
||||
char font_name[1024];
|
||||
|
||||
if (face->style_name == NULL || *(face->style_name) == '?') {
|
||||
if (face->family_name == NULL) {
|
||||
strcpy(font_name, "(null)");
|
||||
} else {
|
||||
strcpy(font_name, face->family_name);
|
||||
}
|
||||
} else {
|
||||
snprintf(font_name, sizeof(font_name), "%s %s", face->family_name, face->style_name);
|
||||
}
|
||||
|
||||
meta_line_t *meta_name = malloc(sizeof(meta_line_t) + strlen(font_name));
|
||||
meta_name->key = MetaFontName;
|
||||
strcpy(meta_name->strval, font_name);
|
||||
APPEND_META(doc, meta_name)
|
||||
|
||||
if (ScanCtx.tn_size <= 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
int pixel = 64;
|
||||
int num_chars = (int) strlen(font_name);
|
||||
|
||||
err = FT_Set_Pixel_Sizes(face, 0, pixel);
|
||||
if (err != 0) {
|
||||
LOG_WARNINGF(doc->filepath, "(font.c) FT_Set_Pixel_Sizes() returned error code [%d] %s", err, ft_error_string(err))
|
||||
return;
|
||||
}
|
||||
|
||||
text_dimensions_t dimensions = text_dimension(font_name, face);
|
||||
unsigned char *bitmap = calloc(dimensions.width * dimensions.height, 1);
|
||||
|
||||
FT_Vector pen;
|
||||
pen.x = 0;
|
||||
|
||||
char pc = 0;
|
||||
for (int i = 0; i < num_chars; i++) {
|
||||
char c = font_name[i];
|
||||
|
||||
err = FT_Load_Char(face, c, FT_LOAD_NO_HINTING | FT_LOAD_RENDER);
|
||||
if (err != 0) {
|
||||
c = c >= 'a' && c <= 'z' ? c - 32 : c + 32;
|
||||
err = FT_Load_Char(face, c, FT_LOAD_NO_HINTING | FT_LOAD_RENDER);
|
||||
if (err != 0) {
|
||||
LOG_WARNINGF(doc->filepath, "(font.c) FT_Load_Char() returned error code [%d] %s", err, ft_error_string(err));
|
||||
continue;
|
||||
}
|
||||
}
|
||||
glyph_t glyph = ft_glyph_to_glyph(face->glyph);
|
||||
|
||||
pen.x += kerning_offset(c, pc, face);
|
||||
if (pen.x <= 0) {
|
||||
pen.x = ABS(glyph.advance_width - glyph.width);
|
||||
}
|
||||
pen.y = dimensions.height - glyph.ascent - dimensions.baseline;
|
||||
|
||||
draw_glyph(&glyph, pen.x, pen.y, dimensions, bitmap);
|
||||
|
||||
pen.x += glyph.advance_width;
|
||||
pc = c;
|
||||
}
|
||||
|
||||
dyn_buffer_t bmp_data = dyn_buffer_create();
|
||||
bmp_format(&bmp_data, dimensions, bitmap);
|
||||
|
||||
store_write(ScanCtx.index.store, (char *) doc->uuid, sizeof(doc->uuid), (char *) bmp_data.buf, bmp_data.cur);
|
||||
|
||||
dyn_buffer_destroy(&bmp_data);
|
||||
free(bitmap);
|
||||
|
||||
FT_Done_Face(face);
|
||||
}
|
||||
@@ -1,9 +0,0 @@
|
||||
#ifndef SIST2_FONT_H
|
||||
#define SIST2_FONT_H
|
||||
|
||||
#include "src/sist.h"
|
||||
|
||||
|
||||
void parse_font(const char * buf, size_t buf_len, document_t *doc);
|
||||
|
||||
#endif
|
||||
@@ -1,399 +0,0 @@
|
||||
#include "src/sist.h"
|
||||
#include "src/ctx.h"
|
||||
|
||||
#define MIN_SIZE 32
|
||||
#define AVIO_BUF_SIZE 8192
|
||||
|
||||
__always_inline
|
||||
AVCodecContext *alloc_jpeg_encoder(int dstW, int dstH, float qscale) {
|
||||
|
||||
AVCodec *jpeg_codec = avcodec_find_encoder(AV_CODEC_ID_MJPEG);
|
||||
AVCodecContext *jpeg = avcodec_alloc_context3(jpeg_codec);
|
||||
jpeg->width = dstW;
|
||||
jpeg->height = dstH;
|
||||
jpeg->time_base.den = 1000000;
|
||||
jpeg->time_base.num = 1;
|
||||
jpeg->i_quant_factor = qscale;
|
||||
|
||||
jpeg->pix_fmt = AV_PIX_FMT_YUVJ420P;
|
||||
int ret = avcodec_open2(jpeg, jpeg_codec, NULL);
|
||||
|
||||
if (ret != 0) {
|
||||
printf("Could not open jpeg encoder: %s!\n", av_err2str(ret));
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return jpeg;
|
||||
}
|
||||
|
||||
__always_inline
|
||||
AVFrame *scale_frame(const AVCodecContext *decoder, const AVFrame *frame, int size) {
|
||||
|
||||
int dstW;
|
||||
int dstH;
|
||||
if (frame->width <= size && frame->height <= size) {
|
||||
dstW = frame->width;
|
||||
dstH = frame->height;
|
||||
} else {
|
||||
double ratio = (double) frame->width / frame->height;
|
||||
if (frame->width > frame->height) {
|
||||
dstW = size;
|
||||
dstH = (int) (size / ratio);
|
||||
} else {
|
||||
dstW = (int) (size * ratio);
|
||||
dstH = size;
|
||||
}
|
||||
}
|
||||
|
||||
if (dstW <= MIN_SIZE || dstH <= MIN_SIZE) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
AVFrame *scaled_frame = av_frame_alloc();
|
||||
|
||||
struct SwsContext *ctx = sws_getContext(
|
||||
decoder->width, decoder->height, decoder->pix_fmt,
|
||||
dstW, dstH, AV_PIX_FMT_YUVJ420P,
|
||||
SWS_FAST_BILINEAR, 0, 0, 0
|
||||
);
|
||||
|
||||
int dst_buf_len = av_image_get_buffer_size(AV_PIX_FMT_YUV420P, dstW, dstH, 1);
|
||||
uint8_t *dst_buf = (uint8_t *) av_malloc(dst_buf_len);
|
||||
|
||||
av_image_fill_arrays(scaled_frame->data, scaled_frame->linesize, dst_buf, AV_PIX_FMT_YUV420P, dstW, dstH, 1);
|
||||
|
||||
sws_scale(ctx,
|
||||
(const uint8_t *const *) frame->data, frame->linesize,
|
||||
0, decoder->height,
|
||||
scaled_frame->data, scaled_frame->linesize
|
||||
);
|
||||
|
||||
scaled_frame->width = dstW;
|
||||
scaled_frame->height = dstH;
|
||||
scaled_frame->format = AV_PIX_FMT_YUV420P;
|
||||
|
||||
sws_freeContext(ctx);
|
||||
|
||||
return scaled_frame;
|
||||
}
|
||||
|
||||
__always_inline
|
||||
AVFrame *read_frame(AVFormatContext *pFormatCtx, AVCodecContext *decoder, int stream_idx, document_t *doc) {
|
||||
AVFrame *frame = av_frame_alloc();
|
||||
|
||||
AVPacket avPacket;
|
||||
av_init_packet(&avPacket);
|
||||
|
||||
int receive_ret = -EAGAIN;
|
||||
while (receive_ret == -EAGAIN) {
|
||||
// Get video frame
|
||||
while (1) {
|
||||
int read_frame_ret = av_read_frame(pFormatCtx, &avPacket);
|
||||
|
||||
if (read_frame_ret != 0) {
|
||||
if (read_frame_ret != AVERROR_EOF) {
|
||||
LOG_WARNINGF(doc->filepath,
|
||||
"(media.c) avcodec_read_frame() returned error code [%d] %s",
|
||||
read_frame_ret, av_err2str(read_frame_ret)
|
||||
)
|
||||
}
|
||||
av_frame_free(&frame);
|
||||
av_packet_unref(&avPacket);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
//Ignore audio/other frames
|
||||
if (avPacket.stream_index != stream_idx) {
|
||||
av_packet_unref(&avPacket);
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
// Feed it to decoder
|
||||
int decode_ret = avcodec_send_packet(decoder, &avPacket);
|
||||
if (decode_ret != 0) {
|
||||
LOG_WARNINGF(doc->filepath,
|
||||
"(media.c) avcodec_send_packet() returned error code [%d] %s",
|
||||
decode_ret, av_err2str(decode_ret)
|
||||
)
|
||||
}
|
||||
av_packet_unref(&avPacket);
|
||||
receive_ret = avcodec_receive_frame(decoder, frame);
|
||||
}
|
||||
return frame;
|
||||
}
|
||||
|
||||
#define APPEND_TAG_META(doc, tag_, keyname) \
|
||||
text_buffer_t tex = text_buffer_create(-1); \
|
||||
text_buffer_append_string0(&tex, tag_->value); \
|
||||
text_buffer_terminate_string(&tex); \
|
||||
meta_line_t *meta_tag = malloc(sizeof(meta_line_t) + tex.dyn_buffer.cur); \
|
||||
meta_tag->key = keyname; \
|
||||
strcpy(meta_tag->strval, tex.dyn_buffer.buf); \
|
||||
APPEND_META(doc, meta_tag) \
|
||||
text_buffer_destroy(&tex);
|
||||
|
||||
__always_inline
|
||||
void append_audio_meta(AVFormatContext *pFormatCtx, document_t *doc) {
|
||||
|
||||
AVDictionaryEntry *tag = NULL;
|
||||
while ((tag = av_dict_get(pFormatCtx->metadata, "", tag, AV_DICT_IGNORE_SUFFIX))) {
|
||||
char key[32];
|
||||
strncpy(key, tag->key, sizeof(key));
|
||||
|
||||
char *ptr = key;
|
||||
for (; *ptr; ++ptr) *ptr = (char) tolower(*ptr);
|
||||
|
||||
if (strcmp(key, "artist") == 0) {
|
||||
APPEND_TAG_META(doc, tag, MetaArtist)
|
||||
} else if (strcmp(key, "genre") == 0) {
|
||||
APPEND_TAG_META(doc, tag, MetaGenre)
|
||||
} else if (strcmp(key, "title") == 0) {
|
||||
APPEND_TAG_META(doc, tag, MetaTitle)
|
||||
} else if (strcmp(key, "album_artist") == 0) {
|
||||
APPEND_TAG_META(doc, tag, MetaAlbumArtist)
|
||||
} else if (strcmp(key, "album") == 0) {
|
||||
APPEND_TAG_META(doc, tag, MetaAlbum)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__always_inline
|
||||
void
|
||||
append_video_meta(AVFormatContext *pFormatCtx, AVFrame *frame, document_t *doc, int include_audio_tags, int is_video) {
|
||||
|
||||
if (is_video) {
|
||||
meta_line_t *meta_duration = malloc(sizeof(meta_line_t));
|
||||
meta_duration->key = MetaMediaDuration;
|
||||
meta_duration->longval = pFormatCtx->duration / AV_TIME_BASE;
|
||||
APPEND_META(doc, meta_duration)
|
||||
|
||||
meta_line_t *meta_bitrate = malloc(sizeof(meta_line_t));
|
||||
meta_bitrate->key = MetaMediaBitrate;
|
||||
meta_bitrate->longval = pFormatCtx->bit_rate;
|
||||
APPEND_META(doc, meta_bitrate)
|
||||
}
|
||||
|
||||
AVDictionaryEntry *tag = NULL;
|
||||
if (is_video) {
|
||||
while ((tag = av_dict_get(pFormatCtx->metadata, "", tag, AV_DICT_IGNORE_SUFFIX))) {
|
||||
if (include_audio_tags && strcmp(tag->key, "title") == 0) {
|
||||
APPEND_TAG_META(doc, tag, MetaTitle)
|
||||
} else if (strcmp(tag->key, "comment") == 0) {
|
||||
APPEND_TAG_META(doc, tag, MetaContent)
|
||||
} else if (include_audio_tags && strcmp(tag->key, "artist") == 0) {
|
||||
APPEND_TAG_META(doc, tag, MetaArtist)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// EXIF metadata
|
||||
while ((tag = av_dict_get(frame->metadata, "", tag, AV_DICT_IGNORE_SUFFIX))) {
|
||||
if (include_audio_tags && strcmp(tag->key, "Artist") == 0) {
|
||||
APPEND_TAG_META(doc, tag, MetaArtist)
|
||||
} else if (strcmp(tag->key, "ImageDescription") == 0) {
|
||||
APPEND_TAG_META(doc, tag, MetaContent)
|
||||
} else if (strcmp(tag->key, "Make") == 0) {
|
||||
APPEND_TAG_META(doc, tag, MetaExifMake)
|
||||
} else if (strcmp(tag->key, "Model") == 0) {
|
||||
APPEND_TAG_META(doc, tag, MetaExifModel)
|
||||
} else if (strcmp(tag->key, "Software") == 0) {
|
||||
APPEND_TAG_META(doc, tag, MetaExifSoftware)
|
||||
} else if (strcmp(tag->key, "FNumber") == 0) {
|
||||
APPEND_TAG_META(doc, tag, MetaExifFNumber)
|
||||
} else if (strcmp(tag->key, "FocalLength") == 0) {
|
||||
APPEND_TAG_META(doc, tag, MetaExifFocalLength)
|
||||
} else if (strcmp(tag->key, "UserComment") == 0) {
|
||||
APPEND_TAG_META(doc, tag, MetaExifUserComment)
|
||||
} else if (strcmp(tag->key, "ISOSpeedRatings") == 0) {
|
||||
APPEND_TAG_META(doc, tag, MetaExifIsoSpeedRatings)
|
||||
} else if (strcmp(tag->key, "ExposureTime") == 0) {
|
||||
APPEND_TAG_META(doc, tag, MetaExifExposureTime)
|
||||
} else if (strcmp(tag->key, "DateTime") == 0) {
|
||||
APPEND_TAG_META(doc, tag, MetaExifDateTime)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void parse_media(AVFormatContext *pFormatCtx, document_t *doc) {
|
||||
|
||||
int video_stream = -1;
|
||||
int audio_stream = -1;
|
||||
|
||||
avformat_find_stream_info(pFormatCtx, NULL);
|
||||
|
||||
for (int i = (int) pFormatCtx->nb_streams - 1; i >= 0; i--) {
|
||||
AVStream *stream = pFormatCtx->streams[i];
|
||||
|
||||
if (stream->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) {
|
||||
if (audio_stream == -1) {
|
||||
meta_line_t *meta_audio = malloc(sizeof(meta_line_t));
|
||||
meta_audio->key = MetaMediaAudioCodec;
|
||||
meta_audio->intval = stream->codecpar->codec_id;
|
||||
APPEND_META(doc, meta_audio)
|
||||
|
||||
append_audio_meta(pFormatCtx, doc);
|
||||
audio_stream = i;
|
||||
}
|
||||
} else if (stream->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) {
|
||||
|
||||
if (video_stream == -1) {
|
||||
meta_line_t *meta_vid = malloc(sizeof(meta_line_t));
|
||||
meta_vid->key = MetaMediaVideoCodec;
|
||||
meta_vid->intval = stream->codecpar->codec_id;
|
||||
APPEND_META(doc, meta_vid)
|
||||
|
||||
meta_line_t *meta_w = malloc(sizeof(meta_line_t));
|
||||
meta_w->key = MetaWidth;
|
||||
meta_w->intval = stream->codecpar->width;
|
||||
APPEND_META(doc, meta_w)
|
||||
|
||||
meta_line_t *meta_h = malloc(sizeof(meta_line_t));
|
||||
meta_h->key = MetaHeight;
|
||||
meta_h->intval = stream->codecpar->height;
|
||||
APPEND_META(doc, meta_h)
|
||||
|
||||
video_stream = i;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (video_stream != -1 && ScanCtx.tn_size > 0) {
|
||||
AVStream *stream = pFormatCtx->streams[video_stream];
|
||||
|
||||
if (stream->codecpar->width <= MIN_SIZE || stream->codecpar->height <= MIN_SIZE) {
|
||||
avformat_close_input(&pFormatCtx);
|
||||
avformat_free_context(pFormatCtx);
|
||||
return;
|
||||
}
|
||||
|
||||
// Decoder
|
||||
AVCodec *video_codec = avcodec_find_decoder(stream->codecpar->codec_id);
|
||||
AVCodecContext *decoder = avcodec_alloc_context3(video_codec);
|
||||
avcodec_parameters_to_context(decoder, stream->codecpar);
|
||||
avcodec_open2(decoder, video_codec, NULL);
|
||||
|
||||
//Seek
|
||||
if (stream->nb_frames > 1 && stream->codecpar->codec_id != AV_CODEC_ID_GIF) {
|
||||
int seek_ret = 0;
|
||||
for (int i = 20; i >= 0; i--) {
|
||||
seek_ret = av_seek_frame(pFormatCtx, video_stream,
|
||||
stream->duration * 0.10, 0);
|
||||
if (seek_ret == 0) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
AVFrame *frame = read_frame(pFormatCtx, decoder, video_stream, doc);
|
||||
if (frame == NULL) {
|
||||
avcodec_free_context(&decoder);
|
||||
avformat_close_input(&pFormatCtx);
|
||||
avformat_free_context(pFormatCtx);
|
||||
return;
|
||||
}
|
||||
|
||||
append_video_meta(pFormatCtx, frame, doc, audio_stream == -1, stream->nb_frames > 1);
|
||||
|
||||
// Scale frame
|
||||
AVFrame *scaled_frame = scale_frame(decoder, frame, ScanCtx.tn_size);
|
||||
|
||||
if (scaled_frame == NULL) {
|
||||
av_frame_free(&frame);
|
||||
avcodec_free_context(&decoder);
|
||||
avformat_close_input(&pFormatCtx);
|
||||
avformat_free_context(pFormatCtx);
|
||||
return;
|
||||
}
|
||||
|
||||
// Encode frame to jpeg
|
||||
AVCodecContext *jpeg_encoder = alloc_jpeg_encoder(scaled_frame->width, scaled_frame->height, ScanCtx.tn_qscale);
|
||||
avcodec_send_frame(jpeg_encoder, scaled_frame);
|
||||
|
||||
AVPacket jpeg_packet;
|
||||
av_init_packet(&jpeg_packet);
|
||||
avcodec_receive_packet(jpeg_encoder, &jpeg_packet);
|
||||
|
||||
// Save thumbnail
|
||||
store_write(ScanCtx.index.store, (char *) doc->uuid, sizeof(doc->uuid), (char *) jpeg_packet.data,
|
||||
jpeg_packet.size);
|
||||
|
||||
av_packet_unref(&jpeg_packet);
|
||||
av_frame_free(&frame);
|
||||
av_free(*scaled_frame->data);
|
||||
av_frame_free(&scaled_frame);
|
||||
avcodec_free_context(&jpeg_encoder);
|
||||
avcodec_free_context(&decoder);
|
||||
}
|
||||
|
||||
avformat_close_input(&pFormatCtx);
|
||||
avformat_free_context(pFormatCtx);
|
||||
}
|
||||
|
||||
void parse_media_filename(const char *filepath, document_t *doc) {
|
||||
|
||||
AVFormatContext *pFormatCtx = avformat_alloc_context();
|
||||
if (pFormatCtx == NULL) {
|
||||
LOG_ERROR(doc->filepath, "(media.c) Could not allocate context with avformat_alloc_context()")
|
||||
return;
|
||||
}
|
||||
int res = avformat_open_input(&pFormatCtx, filepath, NULL, NULL);
|
||||
if (res < 0) {
|
||||
LOG_ERRORF(doc->filepath, "(media.c) avformat_open_input() returned [%d] %s", res, av_err2str(res))
|
||||
avformat_close_input(&pFormatCtx);
|
||||
avformat_free_context(pFormatCtx);
|
||||
return;
|
||||
}
|
||||
|
||||
parse_media(pFormatCtx, doc);
|
||||
}
|
||||
|
||||
|
||||
int vfile_read(void *ptr, uint8_t *buf, int buf_size) {
|
||||
struct vfile *f = ptr;
|
||||
|
||||
int ret = f->read(f, buf, buf_size);
|
||||
|
||||
if (ret == 0) {
|
||||
return AVERROR_EOF;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
void parse_media_vfile(struct vfile *f, document_t *doc) {
|
||||
|
||||
AVFormatContext *pFormatCtx = avformat_alloc_context();
|
||||
if (pFormatCtx == NULL) {
|
||||
LOG_ERROR(doc->filepath, "(media.c) Could not allocate context with avformat_alloc_context()")
|
||||
return;
|
||||
}
|
||||
|
||||
unsigned char *buffer = (unsigned char *) av_malloc(AVIO_BUF_SIZE);
|
||||
AVIOContext *io_ctx = avio_alloc_context(buffer, AVIO_BUF_SIZE, 0, f, vfile_read, NULL, NULL);
|
||||
|
||||
pFormatCtx->pb = io_ctx;
|
||||
pFormatCtx->flags |= AVFMT_FLAG_CUSTOM_IO;
|
||||
|
||||
int res = avformat_open_input(&pFormatCtx, "", NULL, NULL);
|
||||
if (res == -5) {
|
||||
// Tried to parse media that requires seek
|
||||
av_free(io_ctx->buffer);
|
||||
avio_context_free(&io_ctx);
|
||||
avformat_close_input(&pFormatCtx);
|
||||
avformat_free_context(pFormatCtx);
|
||||
return;
|
||||
} else if (res < 0) {
|
||||
LOG_ERRORF(doc->filepath, "(media.c) avformat_open_input() returned [%d] %s", res, av_err2str(res))
|
||||
av_free(io_ctx->buffer);
|
||||
avio_context_free(&io_ctx);
|
||||
avformat_close_input(&pFormatCtx);
|
||||
avformat_free_context(pFormatCtx);
|
||||
return;
|
||||
}
|
||||
|
||||
parse_media(pFormatCtx, doc);
|
||||
av_free(io_ctx->buffer);
|
||||
avio_context_free(&io_ctx);
|
||||
}
|
||||
|
||||
@@ -1,14 +0,0 @@
|
||||
#ifndef SIST2_MEDIA_H
|
||||
#define SIST2_MEDIA_H
|
||||
|
||||
|
||||
#include "src/sist.h"
|
||||
|
||||
#define MIN_VIDEO_SIZE 1024 * 64
|
||||
#define MIN_IMAGE_SIZE 1024 * 2
|
||||
|
||||
void parse_media_filename(const char * filepath, document_t *doc);
|
||||
|
||||
void parse_media_vfile(struct vfile *f, document_t *doc);
|
||||
|
||||
#endif
|
||||
@@ -1,14 +1,14 @@
|
||||
#ifndef SIST2_MIME_H
|
||||
#define SIST2_MIME_H
|
||||
|
||||
#include "src/sist.h"
|
||||
#include "../sist.h"
|
||||
|
||||
#define MAJOR_MIME(mime_id) (mime_id & 0x0FFF0000) >> 16
|
||||
#define MAJOR_MIME(mime_id) (mime_id & 0x000F0000) >> 16
|
||||
|
||||
#define MIME_EMPTY 1
|
||||
|
||||
#define DONT_PARSE 0x80000000
|
||||
#define SHOULD_PARSE(mime_id) (mime_id & DONT_PARSE) != DONT_PARSE && mime_id != 0
|
||||
#define SHOULD_PARSE(mime_id) (ScanCtx.fast == 0 && (mime_id & DONT_PARSE) != DONT_PARSE && mime_id != 0)
|
||||
|
||||
#define PDF_MASK 0x40000000
|
||||
#define IS_PDF(mime_id) (mime_id & PDF_MASK) == PDF_MASK
|
||||
@@ -25,6 +25,15 @@
|
||||
#define DOC_MASK 0x04000000
|
||||
#define IS_DOC(mime_id) (mime_id & DOC_MASK) == DOC_MASK
|
||||
|
||||
#define MOBI_MASK 0x02000000
|
||||
#define IS_MOBI(mime_id) (mime_id & MOBI_MASK) == MOBI_MASK
|
||||
|
||||
#define MARKUP_MASK 0x01000000
|
||||
#define IS_MARKUP(mime_id) (mime_id & MARKUP_MASK) == MARKUP_MASK
|
||||
|
||||
#define RAW_MASK 0x00800000
|
||||
#define IS_RAW(mime_id) (mime_id & RAW_MASK) == RAW_MASK
|
||||
|
||||
enum major_mime {
|
||||
MimeInvalid = 0,
|
||||
MimeModel = 1,
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,7 +1,15 @@
|
||||
#include "parse.h"
|
||||
|
||||
#include "src/sist.h"
|
||||
#include "src/ctx.h"
|
||||
#include "mime.h"
|
||||
#include "src/io/serialize.h"
|
||||
|
||||
__thread magic_t Magic = NULL;
|
||||
#include <magic.h>
|
||||
|
||||
|
||||
#define MIN_VIDEO_SIZE 1024 * 64
|
||||
#define MIN_IMAGE_SIZE 1024 * 2
|
||||
|
||||
int fs_read(struct vfile *f, void *buf, size_t size) {
|
||||
|
||||
@@ -24,54 +32,37 @@ void fs_close(struct vfile *f) {
|
||||
}
|
||||
}
|
||||
|
||||
void *read_all(parse_job_t *job, const char *buf, int bytes_read) {
|
||||
|
||||
void *full_buf;
|
||||
|
||||
if (job->info.st_size <= bytes_read) {
|
||||
full_buf = malloc(job->info.st_size);
|
||||
memcpy(full_buf, buf, job->info.st_size);
|
||||
} else {
|
||||
full_buf = malloc(job->info.st_size);
|
||||
memcpy(full_buf, buf, bytes_read);
|
||||
|
||||
int ret = job->vfile.read(&job->vfile, full_buf + bytes_read, job->info.st_size - bytes_read);
|
||||
if (ret == -1) {
|
||||
LOG_ERRORF(job->filepath, "read(): [%d] %s", errno, strerror(errno))
|
||||
return NULL;
|
||||
}
|
||||
void fs_reset(struct vfile *f) {
|
||||
if (f->fd != -1) {
|
||||
lseek(f->fd, 0, SEEK_SET);
|
||||
}
|
||||
|
||||
return full_buf;
|
||||
}
|
||||
|
||||
#define IS_GIT_OBJ (strlen(doc.filepath + doc.base) == 38 && (strstr(doc.filepath, "objects") != NULL))
|
||||
|
||||
void parse(void *arg) {
|
||||
|
||||
parse_job_t *job = arg;
|
||||
document_t doc;
|
||||
|
||||
int inc_ts = incremental_get(ScanCtx.original_table, job->info.st_ino);
|
||||
if (inc_ts != 0 && inc_ts == job->info.st_mtim.tv_sec) {
|
||||
incremental_mark_file_for_copy(ScanCtx.copy_table, job->info.st_ino);
|
||||
int inc_ts = incremental_get(ScanCtx.original_table, job->vfile.info.st_ino);
|
||||
if (inc_ts != 0 && inc_ts == job->vfile.info.st_mtim.tv_sec) {
|
||||
incremental_mark_file_for_copy(ScanCtx.copy_table, job->vfile.info.st_ino);
|
||||
return;
|
||||
}
|
||||
|
||||
if (Magic == NULL) {
|
||||
Magic = magic_open(MAGIC_MIME_TYPE);
|
||||
}
|
||||
|
||||
doc.filepath = job->filepath;
|
||||
doc.ext = (short) job->ext;
|
||||
doc.base = (short) job->base;
|
||||
doc.meta_head = NULL;
|
||||
doc.meta_tail = NULL;
|
||||
doc.mime = 0;
|
||||
doc.size = job->info.st_size;
|
||||
doc.ino = job->info.st_ino;
|
||||
doc.mtime = job->info.st_mtim.tv_sec;
|
||||
doc.size = job->vfile.info.st_size;
|
||||
doc.ino = job->vfile.info.st_ino;
|
||||
doc.mtime = job->vfile.info.st_mtim.tv_sec;
|
||||
|
||||
uuid_generate(doc.uuid);
|
||||
char *buf[PARSE_BUF_SIZE];
|
||||
char *buf[MAGIC_BUF_SIZE];
|
||||
|
||||
if (LogCtx.very_verbose) {
|
||||
char uuid_str[UUID_STR_LEN];
|
||||
@@ -79,7 +70,7 @@ void parse(void *arg) {
|
||||
LOG_DEBUGF(job->filepath, "Starting parse job {%s}", uuid_str)
|
||||
}
|
||||
|
||||
if (job->info.st_size == 0) {
|
||||
if (job->vfile.info.st_size == 0) {
|
||||
doc.mime = MIME_EMPTY;
|
||||
} else if (*(job->filepath + job->ext) != '\0' && (job->ext - job->base != 1)) {
|
||||
doc.mime = mime_get_mime_by_ext(ScanCtx.ext_table, job->filepath + job->ext);
|
||||
@@ -87,78 +78,94 @@ void parse(void *arg) {
|
||||
|
||||
int bytes_read = 0;
|
||||
|
||||
if (doc.mime == 0) {
|
||||
if (doc.mime == 0 && !ScanCtx.fast) {
|
||||
if (IS_GIT_OBJ) {
|
||||
goto abort;
|
||||
}
|
||||
|
||||
// Get mime type with libmagic
|
||||
bytes_read = job->vfile.read(&job->vfile, buf, PARSE_BUF_SIZE);
|
||||
if (bytes_read == -1) {
|
||||
LOG_WARNINGF(job->filepath, "read() Error: %s", strerror(errno))
|
||||
if (!job->vfile.is_fs_file) {
|
||||
LOG_WARNING(job->filepath, "Guessing mime type with libmagic inside archive files is not currently supported");
|
||||
goto abort;
|
||||
}
|
||||
|
||||
bytes_read = job->vfile.read(&job->vfile, buf, MAGIC_BUF_SIZE);
|
||||
if (bytes_read < 0) {
|
||||
|
||||
if (job->vfile.is_fs_file) {
|
||||
LOG_ERRORF(job->filepath, "read(): [%d] %s", errno, strerror(errno))
|
||||
} else {
|
||||
LOG_ERRORF(job->filepath, "(virtual) read(): [%d] %s", bytes_read, archive_error_string(job->vfile.arc))
|
||||
}
|
||||
|
||||
CLOSE_FILE(job->vfile)
|
||||
return;
|
||||
}
|
||||
|
||||
const char *magic_mime_str = magic_buffer(Magic, buf, bytes_read);
|
||||
magic_t magic = magic_open(MAGIC_MIME_TYPE);
|
||||
magic_load(magic, NULL);
|
||||
|
||||
const char *magic_mime_str = magic_buffer(magic, buf, bytes_read);
|
||||
if (magic_mime_str != NULL) {
|
||||
doc.mime = mime_get_mime_by_string(ScanCtx.mime_table, magic_mime_str);
|
||||
|
||||
LOG_DEBUGF(job->filepath, "libmagic: %s", magic_mime_str);
|
||||
|
||||
if (doc.mime == 0) {
|
||||
LOG_WARNINGF(job->filepath, "Couldn't find mime %s", magic_mime_str);
|
||||
}
|
||||
}
|
||||
|
||||
job->vfile.reset(&job->vfile);
|
||||
|
||||
magic_close(magic);
|
||||
}
|
||||
|
||||
int mmime = MAJOR_MIME(doc.mime);
|
||||
|
||||
if (!(SHOULD_PARSE(doc.mime))) {
|
||||
|
||||
} else if (IS_RAW(doc.mime)) {
|
||||
parse_raw(&ScanCtx.raw_ctx, &job->vfile, &doc);
|
||||
} else if ((mmime == MimeVideo && doc.size >= MIN_VIDEO_SIZE) ||
|
||||
(mmime == MimeImage && doc.size >= MIN_IMAGE_SIZE) || mmime == MimeAudio) {
|
||||
|
||||
if (job->vfile.is_fs_file) {
|
||||
parse_media_filename(job->filepath, &doc);
|
||||
} else {
|
||||
parse_media_vfile(&job->vfile, &doc);
|
||||
}
|
||||
parse_media(&ScanCtx.media_ctx, &job->vfile, &doc);
|
||||
|
||||
} else if (IS_PDF(doc.mime)) {
|
||||
void *pdf_buf = read_all(job, (char *) buf, bytes_read);
|
||||
parse_pdf(pdf_buf, doc.size, &doc);
|
||||
parse_ebook(&ScanCtx.ebook_ctx, &job->vfile, mime_get_mime_text(doc.mime), &doc);
|
||||
|
||||
if (pdf_buf != buf && pdf_buf != NULL) {
|
||||
free(pdf_buf);
|
||||
} else if (mmime == MimeText && ScanCtx.text_ctx.content_size > 0) {
|
||||
if (IS_MARKUP(doc.mime)) {
|
||||
parse_markup(&ScanCtx.text_ctx, &job->vfile, &doc);
|
||||
} else {
|
||||
parse_text(&ScanCtx.text_ctx, &job->vfile, &doc);
|
||||
}
|
||||
|
||||
} else if (mmime == MimeText && ScanCtx.content_size > 0) {
|
||||
parse_text(bytes_read, &job->vfile, (char *) buf, &doc);
|
||||
|
||||
} else if (IS_FONT(doc.mime)) {
|
||||
void *font_buf = read_all(job, (char *) buf, bytes_read);
|
||||
parse_font(font_buf, doc.size, &doc);
|
||||
parse_font(&ScanCtx.font_ctx, &job->vfile, &doc);
|
||||
|
||||
if (font_buf != buf && font_buf != NULL) {
|
||||
free(font_buf);
|
||||
}
|
||||
} else if (
|
||||
ScanCtx.archive_mode != ARC_MODE_SKIP && (
|
||||
ScanCtx.arc_ctx.mode != ARC_MODE_SKIP && (
|
||||
IS_ARC(doc.mime) ||
|
||||
(IS_ARC_FILTER(doc.mime) && should_parse_filtered_file(doc.filepath, doc.ext))
|
||||
)) {
|
||||
parse_archive(&job->vfile, &doc);
|
||||
} else if (ScanCtx.content_size > 0 && IS_DOC(doc.mime)) {
|
||||
void *doc_buf = read_all(job, (char *) buf, bytes_read);
|
||||
parse_doc(doc_buf, doc.size, &doc);
|
||||
|
||||
if (doc_buf != buf && doc_buf != NULL) {
|
||||
free(doc_buf);
|
||||
}
|
||||
parse_archive(&ScanCtx.arc_ctx, &job->vfile, &doc);
|
||||
} else if ((ScanCtx.ooxml_ctx.content_size > 0 || ScanCtx.media_ctx.tn_size > 0) && IS_DOC(doc.mime)) {
|
||||
parse_ooxml(&ScanCtx.ooxml_ctx, &job->vfile, &doc);
|
||||
} else if (is_cbr(&ScanCtx.comic_ctx, doc.mime) || is_cbz(&ScanCtx.comic_ctx, doc.mime)) {
|
||||
parse_comic(&ScanCtx.comic_ctx, &job->vfile, &doc);
|
||||
} else if (IS_MOBI(doc.mime)) {
|
||||
parse_mobi(&ScanCtx.mobi_ctx, &job->vfile, &doc);
|
||||
}
|
||||
|
||||
abort:
|
||||
|
||||
//Parent meta
|
||||
if (!uuid_is_null(job->parent)) {
|
||||
char tmp[UUID_STR_LEN];
|
||||
uuid_unparse(job->parent, tmp);
|
||||
|
||||
meta_line_t *meta_parent = malloc(sizeof(meta_line_t) + UUID_STR_LEN + 1);
|
||||
meta_parent->key = MetaParent;
|
||||
strcpy(meta_parent->strval, tmp);
|
||||
uuid_unparse(job->parent, meta_parent->str_val);
|
||||
APPEND_META((&doc), meta_parent)
|
||||
}
|
||||
|
||||
@@ -166,3 +173,7 @@ void parse(void *arg) {
|
||||
|
||||
CLOSE_FILE(job->vfile)
|
||||
}
|
||||
|
||||
void cleanup_parse() {
|
||||
// noop
|
||||
}
|
||||
|
||||
@@ -1,13 +1,16 @@
|
||||
#ifndef SIST2_PARSE_H
|
||||
#define SIST2_PARSE_H
|
||||
|
||||
#include "src/sist.h"
|
||||
#include "../sist.h"
|
||||
|
||||
#define PARSE_BUF_SIZE 4096
|
||||
#define MAGIC_BUF_SIZE 4096 * 6
|
||||
|
||||
int fs_read(struct vfile *f, void *buf, size_t size);
|
||||
void fs_close(struct vfile *f);
|
||||
void fs_reset(struct vfile *f);
|
||||
|
||||
void parse(void *arg);
|
||||
|
||||
void cleanup_parse();
|
||||
|
||||
#endif
|
||||
|
||||
@@ -1,340 +0,0 @@
|
||||
#include "pdf.h"
|
||||
#include "src/ctx.h"
|
||||
|
||||
#define MIN_OCR_SIZE 350
|
||||
#define MIN_OCR_LEN 10
|
||||
__thread text_buffer_t thread_buffer;
|
||||
|
||||
|
||||
fz_page *render_cover(fz_context *ctx, document_t *doc, fz_document *fzdoc) {
|
||||
|
||||
int err = 0;
|
||||
fz_page *cover = NULL;
|
||||
|
||||
fz_var(cover);
|
||||
fz_try(ctx)
|
||||
cover = fz_load_page(ctx, fzdoc, 0);
|
||||
fz_catch(ctx)
|
||||
err = 1;
|
||||
|
||||
if (err != 0) {
|
||||
fz_drop_page(ctx, cover);
|
||||
LOG_WARNINGF(doc->filepath, "fz_load_page() returned error code [%d] %s", err, ctx->error.message)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
fz_rect bounds = fz_bound_page(ctx, cover);
|
||||
|
||||
float scale;
|
||||
float w = (float) bounds.x1 - bounds.x0;
|
||||
float h = (float) bounds.y1 - bounds.y0;
|
||||
if (w > h) {
|
||||
scale = (float) ScanCtx.tn_size / w;
|
||||
} else {
|
||||
scale = (float) ScanCtx.tn_size / h;
|
||||
}
|
||||
fz_matrix m = fz_scale(scale, scale);
|
||||
|
||||
bounds = fz_transform_rect(bounds, m);
|
||||
fz_irect bbox = fz_round_rect(bounds);
|
||||
fz_pixmap *pixmap = fz_new_pixmap_with_bbox(ctx, ctx->colorspace->rgb, bbox, NULL, 0);
|
||||
|
||||
fz_clear_pixmap_with_value(ctx, pixmap, 0xFF);
|
||||
fz_device *dev = fz_new_draw_device(ctx, m, pixmap);
|
||||
|
||||
fz_var(err);
|
||||
fz_try(ctx)
|
||||
{
|
||||
pthread_mutex_lock(&ScanCtx.mupdf_mu);
|
||||
fz_run_page(ctx, cover, dev, fz_identity, NULL);
|
||||
}
|
||||
fz_always(ctx)
|
||||
{
|
||||
fz_close_device(ctx, dev);
|
||||
fz_drop_device(ctx, dev);
|
||||
pthread_mutex_unlock(&ScanCtx.mupdf_mu);
|
||||
}
|
||||
fz_catch(ctx)
|
||||
err = ctx->error.errcode;
|
||||
|
||||
if (err != 0) {
|
||||
LOG_WARNINGF(doc->filepath, "fz_run_page() returned error code [%d] %s", err, ctx->error.message)
|
||||
fz_drop_page(ctx, cover);
|
||||
fz_drop_pixmap(ctx, pixmap);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
fz_buffer *fzbuf = NULL;
|
||||
fz_var(fzbuf);
|
||||
fz_var(err);
|
||||
|
||||
fz_try(ctx)
|
||||
fzbuf = fz_new_buffer_from_pixmap_as_png(ctx, pixmap, fz_default_color_params);
|
||||
fz_catch(ctx)
|
||||
err = ctx->error.errcode;
|
||||
|
||||
if (err == 0) {
|
||||
unsigned char *tn_buf;
|
||||
size_t tn_len = fz_buffer_storage(ctx, fzbuf, &tn_buf);
|
||||
store_write(ScanCtx.index.store, (char *) doc->uuid, sizeof(doc->uuid), (char *) tn_buf, tn_len);
|
||||
}
|
||||
|
||||
fz_drop_buffer(ctx, fzbuf);
|
||||
fz_drop_pixmap(ctx, pixmap);
|
||||
|
||||
if (err != 0) {
|
||||
LOG_WARNINGF(doc->filepath, "fz_new_buffer_from_pixmap_as_png() returned error code [%d] %s", err,
|
||||
ctx->error.message)
|
||||
fz_drop_page(ctx, cover);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return cover;
|
||||
}
|
||||
|
||||
void fz_err_callback(void *user, UNUSED(const char *message)) {
|
||||
if (LogCtx.verbose) {
|
||||
document_t *doc = (document_t *) user;
|
||||
LOG_WARNINGF(doc->filepath, "FZ: %s", message)
|
||||
}
|
||||
}
|
||||
|
||||
__always_inline
|
||||
void init_ctx(fz_context *ctx, document_t *doc) {
|
||||
fz_disable_icc(ctx);
|
||||
fz_register_document_handlers(ctx);
|
||||
|
||||
ctx->warn.print_user = doc;
|
||||
ctx->warn.print = fz_err_callback;
|
||||
ctx->error.print_user = doc;
|
||||
ctx->error.print = fz_err_callback;
|
||||
}
|
||||
|
||||
int read_stext_block(fz_stext_block *block, text_buffer_t *tex) {
|
||||
if (block->type != FZ_STEXT_BLOCK_TEXT) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
fz_stext_line *line = block->u.t.first_line;
|
||||
while (line != NULL) {
|
||||
fz_stext_char *c = line->first_char;
|
||||
while (c != NULL) {
|
||||
if (text_buffer_append_char(tex, c->c) == TEXT_BUF_FULL) {
|
||||
return TEXT_BUF_FULL;
|
||||
}
|
||||
c = c->next;
|
||||
}
|
||||
line = line->next;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define IS_VALID_BPP(d) (d==1 || d==2 || d==4 || d==8 || d==16 || d==24 || d==32)
|
||||
|
||||
void fill_image(fz_context *ctx, UNUSED(fz_device *dev),
|
||||
fz_image *img, UNUSED(fz_matrix ctm), UNUSED(float alpha),
|
||||
UNUSED(fz_color_params color_params)) {
|
||||
|
||||
int l2factor = 0;
|
||||
|
||||
if (img->w > MIN_OCR_SIZE && img->h > MIN_OCR_SIZE && IS_VALID_BPP(img->n)) {
|
||||
|
||||
fz_pixmap *pix = img->get_pixmap(ctx, img, NULL, img->w, img->h, &l2factor);
|
||||
|
||||
if (pix->h > MIN_OCR_SIZE && img->h > MIN_OCR_SIZE && img->xres != 0) {
|
||||
TessBaseAPI *api = TessBaseAPICreate();
|
||||
TessBaseAPIInit3(api, ScanCtx.tesseract_path, ScanCtx.tesseract_lang);
|
||||
|
||||
TessBaseAPISetImage(api, pix->samples, pix->w, pix->h, pix->n, pix->stride);
|
||||
TessBaseAPISetSourceResolution(api, pix->xres);
|
||||
|
||||
char *text = TessBaseAPIGetUTF8Text(api);
|
||||
size_t len = strlen(text);
|
||||
if (len >= MIN_OCR_LEN) {
|
||||
text_buffer_append_string(&thread_buffer, text, len - 1);
|
||||
LOG_DEBUGF(
|
||||
"pdf.c",
|
||||
"(OCR) %dx%d got %dB from tesseract (%s), buffer:%dB",
|
||||
pix->w, pix->h, len, ScanCtx.tesseract_lang, thread_buffer.dyn_buffer.cur
|
||||
)
|
||||
}
|
||||
|
||||
TessBaseAPIEnd(api);
|
||||
TessBaseAPIDelete(api);
|
||||
}
|
||||
fz_drop_pixmap(ctx, pix);
|
||||
}
|
||||
}
|
||||
|
||||
void parse_pdf(void *buf, size_t buf_len, document_t *doc) {
|
||||
|
||||
if (buf == NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
static int mu_is_initialized = 0;
|
||||
if (!mu_is_initialized) {
|
||||
pthread_mutex_init(&ScanCtx.mupdf_mu, NULL);
|
||||
mu_is_initialized = 1;
|
||||
}
|
||||
fz_context *ctx = fz_new_context(NULL, NULL, FZ_STORE_UNLIMITED);
|
||||
|
||||
init_ctx(ctx, doc);
|
||||
|
||||
int err = 0;
|
||||
|
||||
fz_document *fzdoc = NULL;
|
||||
fz_stream *stream = NULL;
|
||||
fz_var(fzdoc);
|
||||
fz_var(stream);
|
||||
fz_var(err);
|
||||
|
||||
fz_try(ctx)
|
||||
{
|
||||
stream = fz_open_memory(ctx, buf, buf_len);
|
||||
fzdoc = fz_open_document_with_stream(ctx, mime_get_mime_text(doc->mime), stream);
|
||||
}
|
||||
fz_catch(ctx)
|
||||
err = ctx->error.errcode;
|
||||
|
||||
if (err) {
|
||||
fz_drop_stream(ctx, stream);
|
||||
fz_drop_document(ctx, fzdoc);
|
||||
fz_drop_context(ctx);
|
||||
return;
|
||||
}
|
||||
|
||||
char title[4096] = {'\0',};
|
||||
fz_try(ctx)
|
||||
fz_lookup_metadata(ctx, fzdoc, FZ_META_INFO_TITLE, title, sizeof(title));
|
||||
fz_catch(ctx)
|
||||
;
|
||||
|
||||
if (strlen(title) > 0) {
|
||||
meta_line_t *meta_content = malloc(sizeof(meta_line_t) + strlen(title));
|
||||
meta_content->key = MetaTitle;
|
||||
strcpy(meta_content->strval, title);
|
||||
APPEND_META(doc, meta_content)
|
||||
}
|
||||
|
||||
int page_count = -1;
|
||||
fz_var(err);
|
||||
fz_try(ctx)
|
||||
page_count = fz_count_pages(ctx, fzdoc);
|
||||
fz_catch(ctx)
|
||||
err = ctx->error.errcode;
|
||||
|
||||
if (err) {
|
||||
LOG_WARNINGF(doc->filepath, "fz_count_pages() returned error code [%d] %s", err, ctx->error.message)
|
||||
fz_drop_stream(ctx, stream);
|
||||
fz_drop_document(ctx, fzdoc);
|
||||
fz_drop_context(ctx);
|
||||
return;
|
||||
}
|
||||
|
||||
fz_page *cover = NULL;
|
||||
if (ScanCtx.tn_size > 0) {
|
||||
cover = render_cover(ctx, doc, fzdoc);
|
||||
} else {
|
||||
fz_var(cover);
|
||||
fz_try(ctx)
|
||||
cover = fz_load_page(ctx, fzdoc, 0);
|
||||
fz_catch(ctx)
|
||||
cover = NULL;
|
||||
}
|
||||
|
||||
if (cover == NULL) {
|
||||
fz_drop_stream(ctx, stream);
|
||||
fz_drop_document(ctx, fzdoc);
|
||||
fz_drop_context(ctx);
|
||||
return;
|
||||
}
|
||||
|
||||
if (ScanCtx.content_size > 0) {
|
||||
fz_stext_options opts = {0};
|
||||
thread_buffer = text_buffer_create(ScanCtx.content_size);
|
||||
|
||||
for (int current_page = 0; current_page < page_count; current_page++) {
|
||||
fz_page *page = NULL;
|
||||
if (current_page == 0) {
|
||||
page = cover;
|
||||
} else {
|
||||
fz_var(err);
|
||||
fz_try(ctx)
|
||||
page = fz_load_page(ctx, fzdoc, current_page);
|
||||
fz_catch(ctx)
|
||||
err = ctx->error.errcode;
|
||||
if (err != 0) {
|
||||
LOG_WARNINGF(doc->filepath, "fz_load_page() returned error code [%d] %s", err, ctx->error.message)
|
||||
text_buffer_destroy(&thread_buffer);
|
||||
fz_drop_page(ctx, page);
|
||||
fz_drop_stream(ctx, stream);
|
||||
fz_drop_document(ctx, fzdoc);
|
||||
fz_drop_context(ctx);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
fz_stext_page *stext = fz_new_stext_page(ctx, fz_bound_page(ctx, page));
|
||||
fz_device *dev = fz_new_stext_device(ctx, stext, &opts);
|
||||
dev->stroke_path = NULL;
|
||||
dev->stroke_text = NULL;
|
||||
dev->clip_text = NULL;
|
||||
dev->clip_stroke_path = NULL;
|
||||
dev->clip_stroke_text = NULL;
|
||||
|
||||
if (ScanCtx.tesseract_lang != NULL) {
|
||||
dev->fill_image = fill_image;
|
||||
}
|
||||
|
||||
fz_var(err);
|
||||
fz_try(ctx)
|
||||
fz_run_page(ctx, page, dev, fz_identity, NULL);
|
||||
fz_always(ctx)
|
||||
{
|
||||
fz_close_device(ctx, dev);
|
||||
fz_drop_device(ctx, dev);
|
||||
}
|
||||
fz_catch(ctx)
|
||||
err = ctx->error.errcode;
|
||||
|
||||
if (err != 0) {
|
||||
LOG_WARNINGF(doc->filepath, "fz_run_page() returned error code [%d] %s", err, ctx->error.message)
|
||||
text_buffer_destroy(&thread_buffer);
|
||||
fz_drop_page(ctx, page);
|
||||
fz_drop_stext_page(ctx, stext);
|
||||
fz_drop_stream(ctx, stream);
|
||||
fz_drop_document(ctx, fzdoc);
|
||||
fz_drop_context(ctx);
|
||||
return;
|
||||
}
|
||||
|
||||
fz_stext_block *block = stext->first_block;
|
||||
while (block != NULL) {
|
||||
int ret = read_stext_block(block, &thread_buffer);
|
||||
if (ret == TEXT_BUF_FULL) {
|
||||
break;
|
||||
}
|
||||
block = block->next;
|
||||
}
|
||||
fz_drop_stext_page(ctx, stext);
|
||||
fz_drop_page(ctx, page);
|
||||
|
||||
if (thread_buffer.dyn_buffer.cur >= thread_buffer.dyn_buffer.size) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
text_buffer_terminate_string(&thread_buffer);
|
||||
|
||||
meta_line_t *meta_content = malloc(sizeof(meta_line_t) + thread_buffer.dyn_buffer.cur);
|
||||
meta_content->key = MetaContent;
|
||||
memcpy(meta_content->strval, thread_buffer.dyn_buffer.buf, thread_buffer.dyn_buffer.cur);
|
||||
APPEND_META(doc, meta_content)
|
||||
|
||||
text_buffer_destroy(&thread_buffer);
|
||||
}
|
||||
|
||||
fz_drop_stream(ctx, stream);
|
||||
fz_drop_document(ctx, fzdoc);
|
||||
fz_drop_context(ctx);
|
||||
}
|
||||
@@ -1,9 +0,0 @@
|
||||
#ifndef SIST2_PDF_H
|
||||
#define SIST2_PDF_H
|
||||
|
||||
#include "src/sist.h"
|
||||
|
||||
|
||||
void parse_pdf(void *buf, size_t buf_len, document_t *doc);
|
||||
|
||||
#endif
|
||||
@@ -1,37 +0,0 @@
|
||||
#include "text.h"
|
||||
#include "src/ctx.h"
|
||||
|
||||
void parse_text(int bytes_read, struct vfile *f, char *buf, document_t *doc) {
|
||||
|
||||
char *intermediate_buf;
|
||||
int intermediate_buf_len;
|
||||
|
||||
if (bytes_read == doc->size || bytes_read >= ScanCtx.content_size) {
|
||||
int to_copy = MIN(bytes_read, ScanCtx.content_size);
|
||||
intermediate_buf = malloc(to_copy);
|
||||
intermediate_buf_len = to_copy;
|
||||
memcpy(intermediate_buf, buf, to_copy);
|
||||
|
||||
} else {
|
||||
int to_read = MIN(ScanCtx.content_size, doc->size) - bytes_read;
|
||||
|
||||
intermediate_buf = malloc(to_read + bytes_read);
|
||||
intermediate_buf_len = to_read + bytes_read;
|
||||
if (bytes_read != 0) {
|
||||
memcpy(intermediate_buf, buf, bytes_read);
|
||||
}
|
||||
|
||||
f->read(f, intermediate_buf + bytes_read, to_read);
|
||||
}
|
||||
text_buffer_t tex = text_buffer_create(ScanCtx.content_size);
|
||||
text_buffer_append_string(&tex, intermediate_buf, intermediate_buf_len);
|
||||
text_buffer_terminate_string(&tex);
|
||||
|
||||
meta_line_t *meta = malloc(sizeof(meta_line_t) + tex.dyn_buffer.cur);
|
||||
meta->key = MetaContent;
|
||||
strcpy(meta->strval, tex.dyn_buffer.buf);
|
||||
APPEND_META(doc, meta)
|
||||
|
||||
free(intermediate_buf);
|
||||
text_buffer_destroy(&tex);
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user