* extract scan code to libscan, (wip)

* submodules

* replace curl with mongoose (wip)

* replace onion with mongoose (wip)

* replace onion with mongoose (wip)

* It compiles! (I think)

* Update readme

* Entirely remove libonion (WIP)

* unscramble submodules

* recover screenshot

* Update mappings

* Bug fixes

* update

* media meta fix

* memory fixes

* More bug fixes...

* Bug fix w/ libmagic & vfile

* libmagic fix (again)

* Better lightbox, better video handler, random reloads fix

* Use svg for info icon

* re-enable http auth

* mobi support #41, fix logs

* Update README & cleanup
This commit is contained in:
simon987 2020-04-21 14:42:20 -04:00 committed by GitHub
parent 4501a7810f
commit e436af7b2a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
104 changed files with 1338 additions and 4292 deletions

1
.gitignore vendored
View File

@ -16,3 +16,4 @@ bundle.js
*.a
vgcore.*
build/
third-party/

49
.gitmodules vendored
View File

@ -1,45 +1,6 @@
[submodule "argparse"]
path = argparse
[submodule "third-party/libscan"]
path = third-party/libscan
url = https://github.com/simon987/libscan
[submodule "third-party/argparse"]
path = third-party/argparse
url = https://github.com/cofyc/argparse
[submodule "cJSON"]
path = cJSON
url = https://github.com/DaveGamble/cJSON
[submodule "lmdb"]
path = lmdb
url = https://github.com/LMDB/lmdb
[submodule "utf8.h"]
path = utf8.h
url = https://github.com/sheredom/utf8.h
[submodule "lib/bzip2-1.0.6"]
path = lib/bzip2-1.0.6
url = https://github.com/enthought/bzip2-1.0.6
[submodule "lib/libmagic"]
path = lib/libmagic
url = https://github.com/threatstack/libmagic
[submodule "lib/harfbuzz"]
path = lib/harfbuzz
url = https://github.com/harfbuzz/harfbuzz
[submodule "lib/openjpeg"]
path = lib/openjpeg
url = https://github.com/uclouvain/openjpeg
[submodule "lib/ffmpeg"]
path = lib/ffmpeg
url = https://git.ffmpeg.org/ffmpeg.git
[submodule "lib/onion"]
path = lib/onion
url = https://github.com/davidmoreno/onion
[submodule "lib/mupdf"]
path = lib/mupdf
url = git://git.ghostscript.com/mupdf.git
[submodule "lib/tesseract"]
path = lib/tesseract
url = https://github.com/tesseract-ocr/tesseract
[submodule "lib/leptonica"]
path = lib/leptonica
url = https://github.com/danbloomberg/leptonica
[submodule "lib/libtiff"]
path = lib/libtiff
url = https://gitlab.com/libtiff/libtiff
[submodule "lib/libpng"]
path = lib/libpng
url = https://github.com/glennrp/libpng

View File

@ -2,85 +2,52 @@ cmake_minimum_required(VERSION 3.7)
set(CMAKE_C_STANDARD 11)
project(sist2 C)
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}/CMakeModules")
option(SIST_DEBUG "Build a debug executable" on)
add_subdirectory(third-party/libscan)
set(ARGPARSE_SHARED off)
add_subdirectory(third-party/argparse)
add_executable(
sist2
src/main.c
src/sist.h
src/io/walk.h src/io/walk.c
src/parsing/media.h src/parsing/media.c
src/parsing/pdf.h src/parsing/pdf.c
src/io/store.h src/io/store.c
src/tpool.h src/tpool.c
src/parsing/parse.h src/parsing/parse.c
src/io/serialize.h src/io/serialize.c
src/parsing/mime.h src/parsing/mime.c src/parsing/mime_generated.c
src/parsing/text.h src/parsing/text.c
src/index/web.c src/index/web.h
src/web/serve.c src/web/serve.h
src/web/auth_basic.h src/web/auth_basic.c
src/index/elastic.c src/index/elastic.h
src/util.c src/util.h
src/ctx.h src/types.h src/parsing/font.c src/parsing/font.h
src/parsing/arc.c src/parsing/arc.h
src/parsing/doc.c src/parsing/doc.h
src/ctx.h src/types.h
src/log.c src/log.h
src/parsing/cbr.h src/parsing/cbr.c
# argparse
argparse/argparse.h argparse/argparse.c
third-party/argparse/argparse.h third-party/argparse/argparse.c
# cJSON
cJSON/cJSON.h cJSON/cJSON.c
# LMDB
lmdb/libraries/liblmdb/lmdb.h lmdb/libraries/liblmdb/mdb.c
lmdb/libraries/liblmdb/midl.h lmdb/libraries/liblmdb/midl.c
src/cli.c src/cli.h
# utf8.h
utf8.h/utf8.h
)
find_package(PkgConfig REQUIRED)
set(ENV{PKG_CONFIG_PATH} "$ENV{PKG_CONFIG_PATH}:/usr/local/lib/pkgconfig/")
target_link_directories(sist2 PRIVATE BEFORE /usr/share/vcpkg/installed/x64-linux/lib/)
set(CMAKE_FIND_LIBRARY_SUFFIXES .a .lib)
find_package(Freetype REQUIRED)
find_package(lmdb CONFIG REQUIRED)
find_package(cJSON CONFIG REQUIRED)
find_package(unofficial-glib CONFIG REQUIRED)
find_package(unofficial-mongoose CONFIG REQUIRED)
#find_package(OpenSSL REQUIRED)
pkg_check_modules(GLIB REQUIRED glib-2.0)
pkg_check_modules(GOBJECT REQUIRED gobject-2.0)
pkg_check_modules(UUID REQUIRED uuid)
add_definitions(${UUID_CFLAGS_OTHER})
add_definitions(${GLIB_CFLAGS_OTHER})
add_definitions(${GOBJECT_CFLAGS_OTHER})
add_definitions(${FREETYPE_CFLAGS_OTHER})
list(REMOVE_ITEM GLIB_LIBRARIES pcre)
list(REMOVE_ITEM GOBJECT_LIBRARIES pcre)
list(REMOVE_ITEM UUID_LIBRARIES pcre)
target_include_directories(
sist2 PUBLIC
${GOBJECT_INCLUDE_DIRS}
${GLIB_INCLUDE_DIRS}
${PROJECT_SOURCE_DIR}/lib/ffmpeg/
${FREETYPE_INCLUDE_DIRS}
${UUID_INCLUDE_DIRS}
${PROJECT_SOURCE_DIR}/
${PROJECT_SOURCE_DIR}/lmdb/libraries/liblmdb/
${PROJECT_SOURCE_DIR}/lib/onion/src/
${PROJECT_SOURCE_DIR}/lib/mupdf/include/
${PROJECT_SOURCE_DIR}/include/
/usr/include/libxml2/
${PROJECT_SOURCE_DIR}/lib/tesseract/include/
)
target_link_directories(
sist2 PUBLIC
${UUID_LIBRARY_DIRS}
${CMAKE_SOURCE_DIR}/third-party/onion/src/
${CMAKE_SOURCE_DIR}/third-party/utf8.h/
${CMAKE_SOURCE_DIR}/third-party/libscan/
${CMAKE_SOURCE_DIR}/
)
target_compile_options(
@ -103,6 +70,7 @@ if (SIST_DEBUG)
sist2
PRIVATE
-fsanitize=address
# -static
)
set_target_properties(
sist2
@ -119,53 +87,28 @@ else ()
)
endif ()
TARGET_LINK_LIBRARIES(
add_dependencies(
sist2
scan
argparse
)
target_link_libraries(
sist2
${GLIB_LIBRARIES}
${GOBJECT_LIBRARIES}
${UUID_LIBRARIES}
# ffmpeg
${PROJECT_SOURCE_DIR}/lib/libavcodec.a
${PROJECT_SOURCE_DIR}/lib/libavformat.a
${PROJECT_SOURCE_DIR}/lib/libavutil.a
${PROJECT_SOURCE_DIR}/lib/libswscale.a
${PROJECT_SOURCE_DIR}/lib/libswresample.a
# mupdf
${PROJECT_SOURCE_DIR}/lib/libmupdf.a
${PROJECT_SOURCE_DIR}/lib/libmupdf-third.a
# onion
${PROJECT_SOURCE_DIR}/lib/libonion_static.a
z
lmdb
cjson
argparse
unofficial::glib::glib
unofficial::mongoose::mongoose
# OpenSSL::SSL OpenSSL::Crypto
uuid
pthread
m
bz2
# ${PROJECT_SOURCE_DIR}/lib/libmagic.a
magic
${PROJECT_SOURCE_DIR}/lib/libharfbuzz.a
${PROJECT_SOURCE_DIR}/lib/libopenjp2.a
freetype
archive
xml2
${PROJECT_SOURCE_DIR}/lib/libtesseract.a
${PROJECT_SOURCE_DIR}/lib/liblept.a
${PROJECT_SOURCE_DIR}/lib/libtiff.a
${PROJECT_SOURCE_DIR}/lib/libpng16.a
stdc++
# curl
${PROJECT_SOURCE_DIR}/lib/libcurl.a
${PROJECT_SOURCE_DIR}/lib/libcrypto.a
${PROJECT_SOURCE_DIR}/lib/libssl.a
dl
pcre
scan
)
add_custom_target(

View File

@ -1,80 +0,0 @@
# - Try to find ffmpeg libraries (libavcodec, libavformat and libavutil)
# Once done this will define
#
# FFMPEG_FOUND - system has ffmpeg or libav
# FFMPEG_INCLUDE_DIR - the ffmpeg include directory
# FFMPEG_LIBRARIES - Link these to use ffmpeg
# FFMPEG_LIBAVCODEC
# FFMPEG_LIBAVFORMAT
# FFMPEG_LIBAVUTIL
#
# Copyright (c) 2008 Andreas Schneider <mail@cynapses.org>
# Modified for other libraries by Lasse Kärkkäinen <tronic>
# Modified for Hedgewars by Stepik777
#
# Redistribution and use is allowed according to the terms of the New
# BSD license.
#
if (FFMPEG_LIBRARIES AND FFMPEG_INCLUDE_DIR)
# in cache already
set(FFMPEG_FOUND TRUE)
else (FFMPEG_LIBRARIES AND FFMPEG_INCLUDE_DIR)
# use pkg-config to get the directories and then use these values
# in the FIND_PATH() and FIND_LIBRARY() calls
find_package(PkgConfig)
if (PKG_CONFIG_FOUND)
pkg_check_modules(_FFMPEG_AVCODEC libavcodec)
pkg_check_modules(_FFMPEG_AVFORMAT libavformat)
pkg_check_modules(_FFMPEG_AVUTIL libavutil)
endif (PKG_CONFIG_FOUND)
find_path(FFMPEG_AVCODEC_INCLUDE_DIR
NAMES libavcodec/avcodec.h
PATHS ${_FFMPEG_AVCODEC_INCLUDE_DIRS} /usr/include /usr/local/include /opt/local/include /sw/include
PATH_SUFFIXES ffmpeg libav
)
find_library(FFMPEG_LIBAVCODEC
NAMES avcodec
PATHS ${_FFMPEG_AVCODEC_LIBRARY_DIRS} /usr/lib /usr/local/lib /opt/local/lib /sw/lib
)
find_library(FFMPEG_LIBAVFORMAT
NAMES avformat
PATHS ${_FFMPEG_AVFORMAT_LIBRARY_DIRS} /usr/lib /usr/local/lib /opt/local/lib /sw/lib
)
find_library(FFMPEG_LIBAVUTIL
NAMES avutil
PATHS ${_FFMPEG_AVUTIL_LIBRARY_DIRS} /usr/lib /usr/local/lib /opt/local/lib /sw/lib
)
if (FFMPEG_LIBAVCODEC AND FFMPEG_LIBAVFORMAT)
set(FFMPEG_FOUND TRUE)
endif()
if (FFMPEG_FOUND)
set(FFMPEG_INCLUDE_DIR ${FFMPEG_AVCODEC_INCLUDE_DIR})
set(FFMPEG_LIBRARIES
${FFMPEG_LIBAVCODEC}
${FFMPEG_LIBAVFORMAT}
${FFMPEG_LIBAVUTIL}
)
endif (FFMPEG_FOUND)
if (FFMPEG_FOUND)
if (NOT FFMPEG_FIND_QUIETLY)
message(STATUS "Found FFMPEG or Libav: ${FFMPEG_LIBRARIES}, ${FFMPEG_INCLUDE_DIR}")
endif (NOT FFMPEG_FIND_QUIETLY)
else (FFMPEG_FOUND)
if (FFMPEG_FIND_REQUIRED)
message(FATAL_ERROR "Could not find libavcodec or libavformat or libavutil")
endif (FFMPEG_FIND_REQUIRED)
endif (FFMPEG_FOUND)
endif (FFMPEG_LIBRARIES AND FFMPEG_INCLUDE_DIR)

View File

@ -1,100 +0,0 @@
#-------------------------------------------------------------------------------
# Copyright (c) 2013-2013, Lars Baehren <lbaehren@gmail.com>
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without modification,
# are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#-------------------------------------------------------------------------------
# - Check for the presence of LIBMAGIC
#
# The following variables are set when LIBMAGIC is found:
# LIBMAGIC_FOUND = Set to true, if all components of LIBMAGIC have been
# found.
# LIBMAGIC_INCLUDES = Include path for the header files of LIBMAGIC
# LIBMAGIC_LIBRARIES = Link these to use LIBMAGIC
# LIBMAGIC_LFLAGS = Linker flags (optional)
if (NOT LIBMAGIC_FOUND)
if (NOT LIBMAGIC_ROOT_DIR)
set (LIBMAGIC_ROOT_DIR ${CMAKE_INSTALL_PREFIX})
endif (NOT LIBMAGIC_ROOT_DIR)
##____________________________________________________________________________
## Check for the header files
find_path (LIBMAGIC_FILE_H
NAMES file/file.h
HINTS ${LIBMAGIC_ROOT_DIR} ${CMAKE_INSTALL_PREFIX}
PATH_SUFFIXES include
)
if (LIBMAGIC_FILE_H)
list (APPEND LIBMAGIC_INCLUDES ${LIBMAGIC_FILE_H})
endif (LIBMAGIC_FILE_H)
find_path (LIBMAGIC_MAGIC_H
NAMES magic.h
HINTS ${LIBMAGIC_ROOT_DIR} ${CMAKE_INSTALL_PREFIX}
PATH_SUFFIXES include include/linux
)
if (LIBMAGIC_MAGIC_H)
list (APPEND LIBMAGIC_INCLUDES ${LIBMAGIC_MAGIC_H})
endif (LIBMAGIC_MAGIC_H)
list (REMOVE_DUPLICATES LIBMAGIC_INCLUDES)
##____________________________________________________________________________
## Check for the library
find_library (LIBMAGIC_LIBRARIES magic
HINTS ${LIBMAGIC_ROOT_DIR} ${CMAKE_INSTALL_PREFIX}
PATH_SUFFIXES lib
)
##____________________________________________________________________________
## Actions taken when all components have been found
#find_package_handle_standard_args (LIBMAGIC DEFAULT_MSG LIBMAGIC_LIBRARIES LIBMAGIC_INCLUDES)
if (LIBMAGIC_FOUND)
if (NOT LIBMAGIC_FIND_QUIETLY)
message (STATUS "Found components for LIBMAGIC")
message (STATUS "LIBMAGIC_ROOT_DIR = ${LIBMAGIC_ROOT_DIR}")
message (STATUS "LIBMAGIC_INCLUDES = ${LIBMAGIC_INCLUDES}")
message (STATUS "LIBMAGIC_LIBRARIES = ${LIBMAGIC_LIBRARIES}")
endif (NOT LIBMAGIC_FIND_QUIETLY)
else (LIBMAGIC_FOUND)
if (LIBMAGIC_FIND_REQUIRED)
message (FATAL_ERROR "Could not find LIBMAGIC!")
endif (LIBMAGIC_FIND_REQUIRED)
endif (LIBMAGIC_FOUND)
##____________________________________________________________________________
## Mark advanced variables
mark_as_advanced (
LIBMAGIC_ROOT_DIR
LIBMAGIC_INCLUDES
LIBMAGIC_LIBRARIES
)
endif (NOT LIBMAGIC_FOUND)

View File

@ -1,478 +0,0 @@
# Distributed under the OSI-approved BSD 3-Clause License. See accompanying
# file Copyright.txt or https://cmake.org/licensing for details.
macro(_OpenSSL_test_and_find_dependencies ssl_library crypto_library)
if((CMAKE_SYSTEM_NAME STREQUAL "Linux") AND
(("${ssl_library}" MATCHES "\\${CMAKE_STATIC_LIBRARY_SUFFIX}$") OR
("${crypto_library}" MATCHES "\\${CMAKE_STATIC_LIBRARY_SUFFIX}$")))
set(_OpenSSL_has_dependencies TRUE)
find_package(Threads)
else()
set(_OpenSSL_has_dependencies FALSE)
endif()
endmacro()
function(_OpenSSL_add_dependencies libraries_var library)
if(CMAKE_THREAD_LIBS_INIT)
list(APPEND ${libraries_var} ${CMAKE_THREAD_LIBS_INIT})
endif()
list(APPEND ${libraries_var} ${CMAKE_DL_LIBS})
set(${libraries_var} ${${libraries_var}} PARENT_SCOPE)
endfunction()
function(_OpenSSL_target_add_dependencies target)
if(_OpenSSL_has_dependencies)
set_property( TARGET ${target} APPEND PROPERTY INTERFACE_LINK_LIBRARIES Threads::Threads )
set_property( TARGET ${target} APPEND PROPERTY INTERFACE_LINK_LIBRARIES ${CMAKE_DL_LIBS} )
endif()
endfunction()
if (UNIX)
find_package(PkgConfig QUIET)
pkg_check_modules(_OPENSSL QUIET openssl)
endif ()
# Support preference of static libs by adjusting CMAKE_FIND_LIBRARY_SUFFIXES
if(OPENSSL_USE_STATIC_LIBS)
set(_openssl_ORIG_CMAKE_FIND_LIBRARY_SUFFIXES ${CMAKE_FIND_LIBRARY_SUFFIXES})
if(WIN32)
set(CMAKE_FIND_LIBRARY_SUFFIXES .lib .a ${CMAKE_FIND_LIBRARY_SUFFIXES})
else()
set(CMAKE_FIND_LIBRARY_SUFFIXES .a )
endif()
endif()
if (WIN32)
# http://www.slproweb.com/products/Win32OpenSSL.html
set(_OPENSSL_ROOT_HINTS
${OPENSSL_ROOT_DIR}
"[HKEY_LOCAL_MACHINE\\SOFTWARE\\Microsoft\\Windows\\CurrentVersion\\Uninstall\\OpenSSL (32-bit)_is1;Inno Setup: App Path]"
"[HKEY_LOCAL_MACHINE\\SOFTWARE\\Microsoft\\Windows\\CurrentVersion\\Uninstall\\OpenSSL (64-bit)_is1;Inno Setup: App Path]"
ENV OPENSSL_ROOT_DIR
)
file(TO_CMAKE_PATH "$ENV{PROGRAMFILES}" _programfiles)
set(_OPENSSL_ROOT_PATHS
"${_programfiles}/OpenSSL"
"${_programfiles}/OpenSSL-Win32"
"${_programfiles}/OpenSSL-Win64"
"C:/OpenSSL/"
"C:/OpenSSL-Win32/"
"C:/OpenSSL-Win64/"
)
unset(_programfiles)
else ()
set(_OPENSSL_ROOT_HINTS
${OPENSSL_ROOT_DIR}
ENV OPENSSL_ROOT_DIR
)
endif ()
set(_OPENSSL_ROOT_HINTS_AND_PATHS
HINTS ${_OPENSSL_ROOT_HINTS}
PATHS ${_OPENSSL_ROOT_PATHS}
)
find_path(OPENSSL_INCLUDE_DIR
NAMES
openssl/ssl.h
${_OPENSSL_ROOT_HINTS_AND_PATHS}
HINTS
${_OPENSSL_INCLUDEDIR}
PATH_SUFFIXES
include
)
if(WIN32 AND NOT CYGWIN)
if(MSVC)
# /MD and /MDd are the standard values - if someone wants to use
# others, the libnames have to change here too
# use also ssl and ssleay32 in debug as fallback for openssl < 0.9.8b
# enable OPENSSL_MSVC_STATIC_RT to get the libs build /MT (Multithreaded no-DLL)
# In Visual C++ naming convention each of these four kinds of Windows libraries has it's standard suffix:
# * MD for dynamic-release
# * MDd for dynamic-debug
# * MT for static-release
# * MTd for static-debug
# Implementation details:
# We are using the libraries located in the VC subdir instead of the parent directory even though :
# libeay32MD.lib is identical to ../libeay32.lib, and
# ssleay32MD.lib is identical to ../ssleay32.lib
# enable OPENSSL_USE_STATIC_LIBS to use the static libs located in lib/VC/static
if (OPENSSL_MSVC_STATIC_RT)
set(_OPENSSL_MSVC_RT_MODE "MT")
else ()
set(_OPENSSL_MSVC_RT_MODE "MD")
endif ()
# Since OpenSSL 1.1, lib names are like libcrypto32MTd.lib and libssl32MTd.lib
if( "${CMAKE_SIZEOF_VOID_P}" STREQUAL "8" )
set(_OPENSSL_MSVC_ARCH_SUFFIX "64")
else()
set(_OPENSSL_MSVC_ARCH_SUFFIX "32")
endif()
if(OPENSSL_USE_STATIC_LIBS)
set(_OPENSSL_PATH_SUFFIXES
"lib/VC/static"
"VC/static"
"lib"
)
else()
set(_OPENSSL_PATH_SUFFIXES
"lib/VC"
"VC"
"lib"
)
endif ()
find_library(LIB_EAY_DEBUG
NAMES
libcrypto${_OPENSSL_MSVC_ARCH_SUFFIX}${_OPENSSL_MSVC_RT_MODE}d
libcrypto${_OPENSSL_MSVC_RT_MODE}d
libcryptod
libeay32${_OPENSSL_MSVC_RT_MODE}d
libeay32d
cryptod
NAMES_PER_DIR
${_OPENSSL_ROOT_HINTS_AND_PATHS}
PATH_SUFFIXES
${_OPENSSL_PATH_SUFFIXES}
)
find_library(LIB_EAY_RELEASE
NAMES
libcrypto${_OPENSSL_MSVC_ARCH_SUFFIX}${_OPENSSL_MSVC_RT_MODE}
libcrypto${_OPENSSL_MSVC_RT_MODE}
libcrypto
libeay32${_OPENSSL_MSVC_RT_MODE}
libeay32
crypto
NAMES_PER_DIR
${_OPENSSL_ROOT_HINTS_AND_PATHS}
PATH_SUFFIXES
${_OPENSSL_PATH_SUFFIXES}
)
find_library(SSL_EAY_DEBUG
NAMES
libssl${_OPENSSL_MSVC_ARCH_SUFFIX}${_OPENSSL_MSVC_RT_MODE}d
libssl${_OPENSSL_MSVC_RT_MODE}d
libssld
ssleay32${_OPENSSL_MSVC_RT_MODE}d
ssleay32d
ssld
NAMES_PER_DIR
${_OPENSSL_ROOT_HINTS_AND_PATHS}
PATH_SUFFIXES
${_OPENSSL_PATH_SUFFIXES}
)
find_library(SSL_EAY_RELEASE
NAMES
libssl${_OPENSSL_MSVC_ARCH_SUFFIX}${_OPENSSL_MSVC_RT_MODE}
libssl${_OPENSSL_MSVC_RT_MODE}
libssl
ssleay32${_OPENSSL_MSVC_RT_MODE}
ssleay32
ssl
NAMES_PER_DIR
${_OPENSSL_ROOT_HINTS_AND_PATHS}
PATH_SUFFIXES
${_OPENSSL_PATH_SUFFIXES}
)
set(LIB_EAY_LIBRARY_DEBUG "${LIB_EAY_DEBUG}")
set(LIB_EAY_LIBRARY_RELEASE "${LIB_EAY_RELEASE}")
set(SSL_EAY_LIBRARY_DEBUG "${SSL_EAY_DEBUG}")
set(SSL_EAY_LIBRARY_RELEASE "${SSL_EAY_RELEASE}")
include(${CMAKE_CURRENT_LIST_DIR}/SelectLibraryConfigurations.cmake)
select_library_configurations(LIB_EAY)
select_library_configurations(SSL_EAY)
mark_as_advanced(LIB_EAY_LIBRARY_DEBUG LIB_EAY_LIBRARY_RELEASE
SSL_EAY_LIBRARY_DEBUG SSL_EAY_LIBRARY_RELEASE)
set(OPENSSL_SSL_LIBRARY ${SSL_EAY_LIBRARY} )
set(OPENSSL_CRYPTO_LIBRARY ${LIB_EAY_LIBRARY} )
elseif(MINGW)
# same player, for MinGW
set(LIB_EAY_NAMES crypto libeay32)
set(SSL_EAY_NAMES ssl ssleay32)
find_library(LIB_EAY
NAMES
${LIB_EAY_NAMES}
NAMES_PER_DIR
${_OPENSSL_ROOT_HINTS_AND_PATHS}
PATH_SUFFIXES
"lib/MinGW"
"lib"
)
find_library(SSL_EAY
NAMES
${SSL_EAY_NAMES}
NAMES_PER_DIR
${_OPENSSL_ROOT_HINTS_AND_PATHS}
PATH_SUFFIXES
"lib/MinGW"
"lib"
)
mark_as_advanced(SSL_EAY LIB_EAY)
set(OPENSSL_SSL_LIBRARY ${SSL_EAY} )
set(OPENSSL_CRYPTO_LIBRARY ${LIB_EAY} )
unset(LIB_EAY_NAMES)
unset(SSL_EAY_NAMES)
else()
# Not sure what to pick for -say- intel, let's use the toplevel ones and hope someone report issues:
find_library(LIB_EAY
NAMES
libcrypto
libeay32
NAMES_PER_DIR
${_OPENSSL_ROOT_HINTS_AND_PATHS}
HINTS
${_OPENSSL_LIBDIR}
PATH_SUFFIXES
lib
)
find_library(SSL_EAY
NAMES
libssl
ssleay32
NAMES_PER_DIR
${_OPENSSL_ROOT_HINTS_AND_PATHS}
HINTS
${_OPENSSL_LIBDIR}
PATH_SUFFIXES
lib
)
mark_as_advanced(SSL_EAY LIB_EAY)
set(OPENSSL_SSL_LIBRARY ${SSL_EAY} )
set(OPENSSL_CRYPTO_LIBRARY ${LIB_EAY} )
endif()
else()
find_library(OPENSSL_SSL_LIBRARY
NAMES
ssl
ssleay32
ssleay32MD
NAMES_PER_DIR
${_OPENSSL_ROOT_HINTS_AND_PATHS}
HINTS
${_OPENSSL_LIBDIR}
PATH_SUFFIXES
lib
)
find_library(OPENSSL_CRYPTO_LIBRARY
NAMES
crypto
NAMES_PER_DIR
${_OPENSSL_ROOT_HINTS_AND_PATHS}
HINTS
${_OPENSSL_LIBDIR}
PATH_SUFFIXES
lib
)
mark_as_advanced(OPENSSL_CRYPTO_LIBRARY OPENSSL_SSL_LIBRARY)
endif()
# compat defines
set(OPENSSL_SSL_LIBRARIES ${OPENSSL_SSL_LIBRARY})
set(OPENSSL_CRYPTO_LIBRARIES ${OPENSSL_CRYPTO_LIBRARY})
_OpenSSL_test_and_find_dependencies("${OPENSSL_SSL_LIBRARY}" "${OPENSSL_CRYPTO_LIBRARY}")
if(_OpenSSL_has_dependencies)
_OpenSSL_add_dependencies( OPENSSL_SSL_LIBRARIES "${OPENSSL_SSL_LIBRARY}" )
_OpenSSL_add_dependencies( OPENSSL_CRYPTO_LIBRARIES "${OPENSSL_CRYPTO_LIBRARY}" )
endif()
function(from_hex HEX DEC)
string(TOUPPER "${HEX}" HEX)
set(_res 0)
string(LENGTH "${HEX}" _strlen)
while (_strlen GREATER 0)
math(EXPR _res "${_res} * 16")
string(SUBSTRING "${HEX}" 0 1 NIBBLE)
string(SUBSTRING "${HEX}" 1 -1 HEX)
if (NIBBLE STREQUAL "A")
math(EXPR _res "${_res} + 10")
elseif (NIBBLE STREQUAL "B")
math(EXPR _res "${_res} + 11")
elseif (NIBBLE STREQUAL "C")
math(EXPR _res "${_res} + 12")
elseif (NIBBLE STREQUAL "D")
math(EXPR _res "${_res} + 13")
elseif (NIBBLE STREQUAL "E")
math(EXPR _res "${_res} + 14")
elseif (NIBBLE STREQUAL "F")
math(EXPR _res "${_res} + 15")
else()
math(EXPR _res "${_res} + ${NIBBLE}")
endif()
string(LENGTH "${HEX}" _strlen)
endwhile()
set(${DEC} ${_res} PARENT_SCOPE)
endfunction()
if(OPENSSL_INCLUDE_DIR AND EXISTS "${OPENSSL_INCLUDE_DIR}/openssl/opensslv.h")
file(STRINGS "${OPENSSL_INCLUDE_DIR}/openssl/opensslv.h" openssl_version_str
REGEX "^#[\t ]*define[\t ]+OPENSSL_VERSION_NUMBER[\t ]+0x([0-9a-fA-F])+.*")
if(openssl_version_str)
# The version number is encoded as 0xMNNFFPPS: major minor fix patch status
# The status gives if this is a developer or prerelease and is ignored here.
# Major, minor, and fix directly translate into the version numbers shown in
# the string. The patch field translates to the single character suffix that
# indicates the bug fix state, which 00 -> nothing, 01 -> a, 02 -> b and so
# on.
string(REGEX REPLACE "^.*OPENSSL_VERSION_NUMBER[\t ]+0x([0-9a-fA-F])([0-9a-fA-F][0-9a-fA-F])([0-9a-fA-F][0-9a-fA-F])([0-9a-fA-F][0-9a-fA-F])([0-9a-fA-F]).*$"
"\\1;\\2;\\3;\\4;\\5" OPENSSL_VERSION_LIST "${openssl_version_str}")
list(GET OPENSSL_VERSION_LIST 0 OPENSSL_VERSION_MAJOR)
list(GET OPENSSL_VERSION_LIST 1 OPENSSL_VERSION_MINOR)
from_hex("${OPENSSL_VERSION_MINOR}" OPENSSL_VERSION_MINOR)
list(GET OPENSSL_VERSION_LIST 2 OPENSSL_VERSION_FIX)
from_hex("${OPENSSL_VERSION_FIX}" OPENSSL_VERSION_FIX)
list(GET OPENSSL_VERSION_LIST 3 OPENSSL_VERSION_PATCH)
if (NOT OPENSSL_VERSION_PATCH STREQUAL "00")
from_hex("${OPENSSL_VERSION_PATCH}" _tmp)
# 96 is the ASCII code of 'a' minus 1
math(EXPR OPENSSL_VERSION_PATCH_ASCII "${_tmp} + 96")
unset(_tmp)
# Once anyone knows how OpenSSL would call the patch versions beyond 'z'
# this should be updated to handle that, too. This has not happened yet
# so it is simply ignored here for now.
string(ASCII "${OPENSSL_VERSION_PATCH_ASCII}" OPENSSL_VERSION_PATCH_STRING)
endif ()
set(OPENSSL_VERSION "${OPENSSL_VERSION_MAJOR}.${OPENSSL_VERSION_MINOR}.${OPENSSL_VERSION_FIX}${OPENSSL_VERSION_PATCH_STRING}")
endif ()
endif ()
set(OPENSSL_LIBRARIES ${OPENSSL_SSL_LIBRARIES} ${OPENSSL_CRYPTO_LIBRARIES} )
list(REMOVE_DUPLICATES OPENSSL_LIBRARIES)
foreach(_comp IN LISTS OpenSSL_FIND_COMPONENTS)
if(_comp STREQUAL "Crypto")
if(EXISTS "${OPENSSL_INCLUDE_DIR}" AND
(EXISTS "${OPENSSL_CRYPTO_LIBRARY}" OR
EXISTS "${LIB_EAY_LIBRARY_DEBUG}" OR
EXISTS "${LIB_EAY_LIBRARY_RELEASE}")
)
set(OpenSSL_${_comp}_FOUND TRUE)
else()
set(OpenSSL_${_comp}_FOUND FALSE)
endif()
elseif(_comp STREQUAL "SSL")
if(EXISTS "${OPENSSL_INCLUDE_DIR}" AND
(EXISTS "${OPENSSL_SSL_LIBRARY}" OR
EXISTS "${SSL_EAY_LIBRARY_DEBUG}" OR
EXISTS "${SSL_EAY_LIBRARY_RELEASE}")
)
set(OpenSSL_${_comp}_FOUND TRUE)
else()
set(OpenSSL_${_comp}_FOUND FALSE)
endif()
else()
message(WARNING "${_comp} is not a valid OpenSSL component")
set(OpenSSL_${_comp}_FOUND FALSE)
endif()
endforeach()
unset(_comp)
include(${CMAKE_CURRENT_LIST_DIR}/FindPackageHandleStandardArgs.cmake)
find_package_handle_standard_args(OpenSSL
REQUIRED_VARS
OPENSSL_CRYPTO_LIBRARY
OPENSSL_INCLUDE_DIR
VERSION_VAR
OPENSSL_VERSION
HANDLE_COMPONENTS
FAIL_MESSAGE
"Could NOT find OpenSSL, try to set the path to OpenSSL root folder in the system variable OPENSSL_ROOT_DIR"
)
mark_as_advanced(OPENSSL_INCLUDE_DIR OPENSSL_LIBRARIES)
if(OPENSSL_FOUND)
if(NOT TARGET OpenSSL::Crypto AND
(EXISTS "${OPENSSL_CRYPTO_LIBRARY}" OR
EXISTS "${LIB_EAY_LIBRARY_DEBUG}" OR
EXISTS "${LIB_EAY_LIBRARY_RELEASE}")
)
add_library(OpenSSL::Crypto UNKNOWN IMPORTED)
set_target_properties(OpenSSL::Crypto PROPERTIES
INTERFACE_INCLUDE_DIRECTORIES "${OPENSSL_INCLUDE_DIR}")
if(EXISTS "${OPENSSL_CRYPTO_LIBRARY}")
set_target_properties(OpenSSL::Crypto PROPERTIES
IMPORTED_LINK_INTERFACE_LANGUAGES "C"
IMPORTED_LOCATION "${OPENSSL_CRYPTO_LIBRARY}")
endif()
if(EXISTS "${LIB_EAY_LIBRARY_RELEASE}")
set_property(TARGET OpenSSL::Crypto APPEND PROPERTY
IMPORTED_CONFIGURATIONS RELEASE)
set_target_properties(OpenSSL::Crypto PROPERTIES
IMPORTED_LINK_INTERFACE_LANGUAGES_RELEASE "C"
IMPORTED_LOCATION_RELEASE "${LIB_EAY_LIBRARY_RELEASE}")
endif()
if(EXISTS "${LIB_EAY_LIBRARY_DEBUG}")
set_property(TARGET OpenSSL::Crypto APPEND PROPERTY
IMPORTED_CONFIGURATIONS DEBUG)
set_target_properties(OpenSSL::Crypto PROPERTIES
IMPORTED_LINK_INTERFACE_LANGUAGES_DEBUG "C"
IMPORTED_LOCATION_DEBUG "${LIB_EAY_LIBRARY_DEBUG}")
endif()
_OpenSSL_target_add_dependencies(OpenSSL::Crypto)
endif()
if(NOT TARGET OpenSSL::SSL AND
(EXISTS "${OPENSSL_SSL_LIBRARY}" OR
EXISTS "${SSL_EAY_LIBRARY_DEBUG}" OR
EXISTS "${SSL_EAY_LIBRARY_RELEASE}")
)
add_library(OpenSSL::SSL UNKNOWN IMPORTED)
set_target_properties(OpenSSL::SSL PROPERTIES
INTERFACE_INCLUDE_DIRECTORIES "${OPENSSL_INCLUDE_DIR}")
if(EXISTS "${OPENSSL_SSL_LIBRARY}")
set_target_properties(OpenSSL::SSL PROPERTIES
IMPORTED_LINK_INTERFACE_LANGUAGES "C"
IMPORTED_LOCATION "${OPENSSL_SSL_LIBRARY}")
endif()
if(EXISTS "${SSL_EAY_LIBRARY_RELEASE}")
set_property(TARGET OpenSSL::SSL APPEND PROPERTY
IMPORTED_CONFIGURATIONS RELEASE)
set_target_properties(OpenSSL::SSL PROPERTIES
IMPORTED_LINK_INTERFACE_LANGUAGES_RELEASE "C"
IMPORTED_LOCATION_RELEASE "${SSL_EAY_LIBRARY_RELEASE}")
endif()
if(EXISTS "${SSL_EAY_LIBRARY_DEBUG}")
set_property(TARGET OpenSSL::SSL APPEND PROPERTY
IMPORTED_CONFIGURATIONS DEBUG)
set_target_properties(OpenSSL::SSL PROPERTIES
IMPORTED_LINK_INTERFACE_LANGUAGES_DEBUG "C"
IMPORTED_LOCATION_DEBUG "${SSL_EAY_LIBRARY_DEBUG}")
endif()
if(TARGET OpenSSL::Crypto)
set_target_properties(OpenSSL::SSL PROPERTIES
INTERFACE_LINK_LIBRARIES OpenSSL::Crypto)
endif()
_OpenSSL_target_add_dependencies(OpenSSL::SSL)
endif()
endif()
# Restore the original find library ordering
if(OPENSSL_USE_STATIC_LIBS)
set(CMAKE_FIND_LIBRARY_SUFFIXES ${_openssl_ORIG_CMAKE_FIND_LIBRARY_SUFFIXES})
endif()

View File

@ -1,268 +0,0 @@
# Distributed under the OSI-approved BSD 3-Clause License. See accompanying
# file Copyright.txt or https://cmake.org/licensing for details.
include(${CMAKE_CURRENT_LIST_DIR}/FindPackageMessage.cmake)
# internal helper macro
macro(_FPHSA_FAILURE_MESSAGE _msg)
set (__msg "${_msg}")
if (FPHSA_REASON_FAILURE_MESSAGE)
string(APPEND __msg "\n Reason given by package: ${FPHSA_REASON_FAILURE_MESSAGE}\n")
endif()
if (${_NAME}_FIND_REQUIRED)
message(FATAL_ERROR "${__msg}")
else ()
if (NOT ${_NAME}_FIND_QUIETLY)
message(STATUS "${__msg}")
endif ()
endif ()
endmacro()
# internal helper macro to generate the failure message when used in CONFIG_MODE:
macro(_FPHSA_HANDLE_FAILURE_CONFIG_MODE)
# <PackageName>_CONFIG is set, but FOUND is false, this means that some other of the REQUIRED_VARS was not found:
if(${_NAME}_CONFIG)
_FPHSA_FAILURE_MESSAGE("${FPHSA_FAIL_MESSAGE}: missing:${MISSING_VARS} (found ${${_NAME}_CONFIG} ${VERSION_MSG})")
else()
# If _CONSIDERED_CONFIGS is set, the config-file has been found, but no suitable version.
# List them all in the error message:
if(${_NAME}_CONSIDERED_CONFIGS)
set(configsText "")
list(LENGTH ${_NAME}_CONSIDERED_CONFIGS configsCount)
math(EXPR configsCount "${configsCount} - 1")
foreach(currentConfigIndex RANGE ${configsCount})
list(GET ${_NAME}_CONSIDERED_CONFIGS ${currentConfigIndex} filename)
list(GET ${_NAME}_CONSIDERED_VERSIONS ${currentConfigIndex} version)
string(APPEND configsText "\n ${filename} (version ${version})")
endforeach()
if (${_NAME}_NOT_FOUND_MESSAGE)
if (FPHSA_REASON_FAILURE_MESSAGE)
string(PREPEND FPHSA_REASON_FAILURE_MESSAGE "${${_NAME}_NOT_FOUND_MESSAGE}\n ")
else()
set(FPHSA_REASON_FAILURE_MESSAGE "${${_NAME}_NOT_FOUND_MESSAGE}")
endif()
else()
string(APPEND configsText "\n")
endif()
_FPHSA_FAILURE_MESSAGE("${FPHSA_FAIL_MESSAGE} ${VERSION_MSG}, checked the following files:${configsText}")
else()
# Simple case: No Config-file was found at all:
_FPHSA_FAILURE_MESSAGE("${FPHSA_FAIL_MESSAGE}: found neither ${_NAME}Config.cmake nor ${_NAME_LOWER}-config.cmake ${VERSION_MSG}")
endif()
endif()
endmacro()
function(FIND_PACKAGE_HANDLE_STANDARD_ARGS _NAME _FIRST_ARG)
# Set up the arguments for `cmake_parse_arguments`.
set(options CONFIG_MODE HANDLE_COMPONENTS)
set(oneValueArgs FAIL_MESSAGE REASON_FAILURE_MESSAGE VERSION_VAR FOUND_VAR)
set(multiValueArgs REQUIRED_VARS)
# Check whether we are in 'simple' or 'extended' mode:
set(_KEYWORDS_FOR_EXTENDED_MODE ${options} ${oneValueArgs} ${multiValueArgs} )
list(FIND _KEYWORDS_FOR_EXTENDED_MODE "${_FIRST_ARG}" INDEX)
if(${INDEX} EQUAL -1)
set(FPHSA_FAIL_MESSAGE ${_FIRST_ARG})
set(FPHSA_REQUIRED_VARS ${ARGN})
set(FPHSA_VERSION_VAR)
else()
cmake_parse_arguments(FPHSA "${options}" "${oneValueArgs}" "${multiValueArgs}" ${_FIRST_ARG} ${ARGN})
if(FPHSA_UNPARSED_ARGUMENTS)
message(FATAL_ERROR "Unknown keywords given to FIND_PACKAGE_HANDLE_STANDARD_ARGS(): \"${FPHSA_UNPARSED_ARGUMENTS}\"")
endif()
if(NOT FPHSA_FAIL_MESSAGE)
set(FPHSA_FAIL_MESSAGE "DEFAULT_MSG")
endif()
# In config-mode, we rely on the variable <PackageName>_CONFIG, which is set by find_package()
# when it successfully found the config-file, including version checking:
if(FPHSA_CONFIG_MODE)
list(INSERT FPHSA_REQUIRED_VARS 0 ${_NAME}_CONFIG)
list(REMOVE_DUPLICATES FPHSA_REQUIRED_VARS)
set(FPHSA_VERSION_VAR ${_NAME}_VERSION)
endif()
if(NOT FPHSA_REQUIRED_VARS)
message(FATAL_ERROR "No REQUIRED_VARS specified for FIND_PACKAGE_HANDLE_STANDARD_ARGS()")
endif()
endif()
# now that we collected all arguments, process them
if("x${FPHSA_FAIL_MESSAGE}" STREQUAL "xDEFAULT_MSG")
set(FPHSA_FAIL_MESSAGE "Could NOT find ${_NAME}")
endif()
list(GET FPHSA_REQUIRED_VARS 0 _FIRST_REQUIRED_VAR)
string(TOUPPER ${_NAME} _NAME_UPPER)
string(TOLOWER ${_NAME} _NAME_LOWER)
if(FPHSA_FOUND_VAR)
if(FPHSA_FOUND_VAR MATCHES "^${_NAME}_FOUND$" OR FPHSA_FOUND_VAR MATCHES "^${_NAME_UPPER}_FOUND$")
set(_FOUND_VAR ${FPHSA_FOUND_VAR})
else()
message(FATAL_ERROR "The argument for FOUND_VAR is \"${FPHSA_FOUND_VAR}\", but only \"${_NAME}_FOUND\" and \"${_NAME_UPPER}_FOUND\" are valid names.")
endif()
else()
set(_FOUND_VAR ${_NAME_UPPER}_FOUND)
endif()
# collect all variables which were not found, so they can be printed, so the
# user knows better what went wrong (#6375)
set(MISSING_VARS "")
set(DETAILS "")
# check if all passed variables are valid
set(FPHSA_FOUND_${_NAME} TRUE)
foreach(_CURRENT_VAR ${FPHSA_REQUIRED_VARS})
if(NOT ${_CURRENT_VAR})
set(FPHSA_FOUND_${_NAME} FALSE)
string(APPEND MISSING_VARS " ${_CURRENT_VAR}")
else()
string(APPEND DETAILS "[${${_CURRENT_VAR}}]")
endif()
endforeach()
if(FPHSA_FOUND_${_NAME})
set(${_NAME}_FOUND TRUE)
set(${_NAME_UPPER}_FOUND TRUE)
else()
set(${_NAME}_FOUND FALSE)
set(${_NAME_UPPER}_FOUND FALSE)
endif()
# component handling
unset(FOUND_COMPONENTS_MSG)
unset(MISSING_COMPONENTS_MSG)
if(FPHSA_HANDLE_COMPONENTS)
foreach(comp ${${_NAME}_FIND_COMPONENTS})
if(${_NAME}_${comp}_FOUND)
if(NOT DEFINED FOUND_COMPONENTS_MSG)
set(FOUND_COMPONENTS_MSG "found components:")
endif()
string(APPEND FOUND_COMPONENTS_MSG " ${comp}")
else()
if(NOT DEFINED MISSING_COMPONENTS_MSG)
set(MISSING_COMPONENTS_MSG "missing components:")
endif()
string(APPEND MISSING_COMPONENTS_MSG " ${comp}")
if(${_NAME}_FIND_REQUIRED_${comp})
set(${_NAME}_FOUND FALSE)
string(APPEND MISSING_VARS " ${comp}")
endif()
endif()
endforeach()
set(COMPONENT_MSG "${FOUND_COMPONENTS_MSG} ${MISSING_COMPONENTS_MSG}")
string(APPEND DETAILS "[c${COMPONENT_MSG}]")
endif()
# version handling:
set(VERSION_MSG "")
set(VERSION_OK TRUE)
# check with DEFINED here as the requested or found version may be "0"
if (DEFINED ${_NAME}_FIND_VERSION)
if(DEFINED ${FPHSA_VERSION_VAR})
set(_FOUND_VERSION ${${FPHSA_VERSION_VAR}})
if(${_NAME}_FIND_VERSION_EXACT) # exact version required
# count the dots in the version string
string(REGEX REPLACE "[^.]" "" _VERSION_DOTS "${_FOUND_VERSION}")
# add one dot because there is one dot more than there are components
string(LENGTH "${_VERSION_DOTS}." _VERSION_DOTS)
if (_VERSION_DOTS GREATER ${_NAME}_FIND_VERSION_COUNT)
# Because of the C++ implementation of find_package() ${_NAME}_FIND_VERSION_COUNT
# is at most 4 here. Therefore a simple lookup table is used.
if (${_NAME}_FIND_VERSION_COUNT EQUAL 1)
set(_VERSION_REGEX "[^.]*")
elseif (${_NAME}_FIND_VERSION_COUNT EQUAL 2)
set(_VERSION_REGEX "[^.]*\\.[^.]*")
elseif (${_NAME}_FIND_VERSION_COUNT EQUAL 3)
set(_VERSION_REGEX "[^.]*\\.[^.]*\\.[^.]*")
else ()
set(_VERSION_REGEX "[^.]*\\.[^.]*\\.[^.]*\\.[^.]*")
endif ()
string(REGEX REPLACE "^(${_VERSION_REGEX})\\..*" "\\1" _VERSION_HEAD "${_FOUND_VERSION}")
unset(_VERSION_REGEX)
if (NOT ${_NAME}_FIND_VERSION VERSION_EQUAL _VERSION_HEAD)
set(VERSION_MSG "Found unsuitable version \"${_FOUND_VERSION}\", but required is exact version \"${${_NAME}_FIND_VERSION}\"")
set(VERSION_OK FALSE)
else ()
set(VERSION_MSG "(found suitable exact version \"${_FOUND_VERSION}\")")
endif ()
unset(_VERSION_HEAD)
else ()
if (NOT ${_NAME}_FIND_VERSION VERSION_EQUAL _FOUND_VERSION)
set(VERSION_MSG "Found unsuitable version \"${_FOUND_VERSION}\", but required is exact version \"${${_NAME}_FIND_VERSION}\"")
set(VERSION_OK FALSE)
else ()
set(VERSION_MSG "(found suitable exact version \"${_FOUND_VERSION}\")")
endif ()
endif ()
unset(_VERSION_DOTS)
else() # minimum version specified:
if (${_NAME}_FIND_VERSION VERSION_GREATER _FOUND_VERSION)
set(VERSION_MSG "Found unsuitable version \"${_FOUND_VERSION}\", but required is at least \"${${_NAME}_FIND_VERSION}\"")
set(VERSION_OK FALSE)
else ()
set(VERSION_MSG "(found suitable version \"${_FOUND_VERSION}\", minimum required is \"${${_NAME}_FIND_VERSION}\")")
endif ()
endif()
else()
# if the package was not found, but a version was given, add that to the output:
if(${_NAME}_FIND_VERSION_EXACT)
set(VERSION_MSG "(Required is exact version \"${${_NAME}_FIND_VERSION}\")")
else()
set(VERSION_MSG "(Required is at least version \"${${_NAME}_FIND_VERSION}\")")
endif()
endif()
else ()
# Check with DEFINED as the found version may be 0.
if(DEFINED ${FPHSA_VERSION_VAR})
set(VERSION_MSG "(found version \"${${FPHSA_VERSION_VAR}}\")")
endif()
endif ()
if(VERSION_OK)
string(APPEND DETAILS "[v${${FPHSA_VERSION_VAR}}(${${_NAME}_FIND_VERSION})]")
else()
set(${_NAME}_FOUND FALSE)
endif()
# print the result:
if (${_NAME}_FOUND)
FIND_PACKAGE_MESSAGE(${_NAME} "Found ${_NAME}: ${${_FIRST_REQUIRED_VAR}} ${VERSION_MSG} ${COMPONENT_MSG}" "${DETAILS}")
else ()
if(FPHSA_CONFIG_MODE)
_FPHSA_HANDLE_FAILURE_CONFIG_MODE()
else()
if(NOT VERSION_OK)
_FPHSA_FAILURE_MESSAGE("${FPHSA_FAIL_MESSAGE}: ${VERSION_MSG} (found ${${_FIRST_REQUIRED_VAR}})")
else()
_FPHSA_FAILURE_MESSAGE("${FPHSA_FAIL_MESSAGE} (missing:${MISSING_VARS}) ${VERSION_MSG}")
endif()
endif()
endif ()
set(${_NAME}_FOUND ${${_NAME}_FOUND} PARENT_SCOPE)
set(${_NAME_UPPER}_FOUND ${${_NAME}_FOUND} PARENT_SCOPE)
endfunction()

View File

@ -1,48 +0,0 @@
# Distributed under the OSI-approved BSD 3-Clause License. See accompanying
# file Copyright.txt or https://cmake.org/licensing for details.
#[=======================================================================[.rst:
FindPackageMessage
------------------
.. code-block:: cmake
find_package_message(<name> "message for user" "find result details")
This function is intended to be used in FindXXX.cmake modules files.
It will print a message once for each unique find result. This is
useful for telling the user where a package was found. The first
argument specifies the name (XXX) of the package. The second argument
specifies the message to display. The third argument lists details
about the find result so that if they change the message will be
displayed again. The macro also obeys the QUIET argument to the
find_package command.
Example:
.. code-block:: cmake
if(X11_FOUND)
find_package_message(X11 "Found X11: ${X11_X11_LIB}"
"[${X11_X11_LIB}][${X11_INCLUDE_DIR}]")
else()
...
endif()
#]=======================================================================]
function(find_package_message pkg msg details)
# Avoid printing a message repeatedly for the same find result.
if(NOT ${pkg}_FIND_QUIETLY)
string(REPLACE "\n" "" details "${details}")
set(DETAILS_VAR FIND_PACKAGE_MESSAGE_DETAILS_${pkg})
if(NOT "${details}" STREQUAL "${${DETAILS_VAR}}")
# The message has not yet been printed.
message(STATUS "${msg}")
# Save the find details in the cache to avoid printing the same
# message again.
set("${DETAILS_VAR}" "${details}"
CACHE INTERNAL "Details about finding ${pkg}")
endif()
endif()
endfunction()

View File

@ -8,7 +8,7 @@ sist2 (Simple incremental search tool)
*Warning: sist2 is in early development*
![sist2.png](sist2.png)
![sist2.png](docs/sist2.png)
## Features
@ -49,7 +49,7 @@ sist2 (Simple incremental search tool)
1. *(or)* Download a [development snapshot](https://files.simon987.net/artifacts/Sist2/Build/) *(Not recommended!)*
1. *(or)* `docker pull simon987/sist2:latest`
1. See [Usage guide](USAGE.md)
1. See [Usage guide](DOCS/USAGE.md)
\* *Windows users*: **sist2** runs under [WSL](https://en.wikipedia.org/wiki/Windows_Subsystem_for_Linux)
@ -57,7 +57,7 @@ sist2 (Simple incremental search tool)
## Example usage
See [Usage guide](USAGE.md) for more details
See [Usage guide](DOCS/USAGE.md) for more details
1. Scan a directory: `sist2 scan ~/Documents -o ./docs_idx`
1. Push index to Elasticsearch: `sist2 index ./docs_idx`
@ -75,7 +75,8 @@ pdf,xps,cbz,cbr,fb2,epub | MuPDF | text+ocr | yes, `png` | title |
ttf,ttc,cff,woff,fnt,otf | Freetype2 | - | yes, `bmp` | Name & style |
`text/plain` | *(none)* | yes | no | - |
tar, zip, rar, 7z, ar ... | Libarchive | yes\* | - | no |
docx, xlsx, pptx | *(none)* | yes | no | no |
docx, xlsx, pptx | *(none)* | yes | no | creator, modified_by, title |
mobi, azw, azw3 | libmobi | yes | no | author, title |
\* *See [Archive files](#archive-files)*
@ -113,23 +114,17 @@ sist2 scan --ocr eng ~/Books/Textbooks/
## Build from source
You can compile **sist2** by yourself if you don't want to use the pre-compiled
binaries.
binaries (GCC 7+ required).
1. Install compile-time dependencies
*(Debian)*
```bash
apt install git cmake pkg-config libglib2.0-dev \
libssl-dev uuid-dev python3 libmagic-dev libfreetype6-dev \
libcurl4-openssl-dev libbz2-dev yasm libharfbuzz-dev ragel \
libarchive-dev libtiff5 libpng16-16 libpango1.0-dev \
libxml2-dev libopenjp2-7-dev libleptonica-dev
```bash
vcpkg install lmdb cjson glib libarchive[core,bzip2,libxml2,lz4,lzma,lzo] pthread tesseract libxml2 ffmpeg zstd
```
2. Build
```bash
git clone --recurse-submodules https://github.com/simon987/sist2
./scripts/get_static_libs.sh
cmake .
git clone --recursive https://github.com/simon987/sist2/
cmake -D <VCPKG_ROOT>/scripts/buildsystems/vcpkg.cmake .
make
```

1
cJSON

@ -1 +0,0 @@
Subproject commit 3ece4c893c123aa3d77f90d580cf6b0a4b3a2ad5

View File

@ -1,12 +1,11 @@
#!/usr/bin/env bash
./scripts/get_static_libs.sh
rm -rf CMakeFiles CmakeCache.txt
cmake -DSIST_DEBUG=off .
cmake -DSIST_DEBUG=off -DCMAKE_TOOLCHAIN_FILE=/vcpkg/scripts/buildsystems/vcpkg.cmake .
make
strip sist2
rm -rf CMakeFiles CmakeCache.txt
cmake -DSIST_DEBUG=on .
cmake -DSIST_DEBUG=on -DCMAKE_TOOLCHAIN_FILE=/vcpkg/scripts/buildsystems/vcpkg.cmake .
make

View File

@ -82,7 +82,7 @@ Made by simon987 <me@simon987.net>. Released under GPL-3.0
* list: Only get file names as text
* shallow: Don't parse archives inside archives.
* recurse: Scan archives recursively (default)
* `--ocr` See [OCR](README.md#OCR)
* `--ocr` See [OCR](../README.md#OCR)
* `-e, --exclude` Regex pattern to exclude files. A file is excluded if the pattern matches any
part of the full absolute path.
@ -189,7 +189,7 @@ my_index/
}
```
You can find the full list of supported fields [here](src/io/serialize.c#L90)
You can find the full list of supported fields [here](../src/io/serialize.c#L90)
The `_keyword.*` items will be indexed and searchable as **keyword** fields (only full matches allowed).
The `_text.*` items will be indexed and searchable as **text** fields (fuzzy searching allowed)

View File

Before

Width:  |  Height:  |  Size: 26 KiB

After

Width:  |  Height:  |  Size: 26 KiB

View File

Before

Width:  |  Height:  |  Size: 889 KiB

After

Width:  |  Height:  |  Size: 889 KiB

@ -1 +0,0 @@
Subproject commit 288acf97a15d558f96c24c89f578b724d6e06b0c

@ -1 +0,0 @@
Subproject commit 7239254b1da28c1a0d8dff8916edefc98b88775e

@ -1 +0,0 @@
Subproject commit b07714d6b53bb20a2796f5efa607dc32aac587f1

@ -1 +0,0 @@
Subproject commit a9d04f7b0f72711677241e1c6805612ef56f157b

@ -1 +0,0 @@
Subproject commit 1249b5cd02c3b6fb9b917d16c76bc76c862932b6

@ -1 +0,0 @@
Subproject commit 301f7a14295a3bdfaf406dbb5004d0784dc137ea

@ -1 +0,0 @@
Subproject commit ed881da0db424dd77a195a90511130aca1122dc3

@ -1 +0,0 @@
Subproject commit abada6f26c8d3e5190bea932cb05e3474558e18d

@ -1 +0,0 @@
Subproject commit 2b3b230b79ecae119b7eb847f2f9545a46bef13c

@ -1 +0,0 @@
Subproject commit 563ecfb55ca77c0fc5ea19e4885e00f55ec82ca9

@ -1 +0,0 @@
Subproject commit d576c7cbd57393ed26d1e02cf60530e690b4f5c2

1
lmdb

@ -1 +0,0 @@
Subproject commit 5c012bbe033f9bbb273078b07dded59f080d348d

View File

@ -16,6 +16,10 @@
"mime": {
"type": "keyword"
},
"thumbnail": {
"type": "keyword",
"index": false
},
"videoc": {
"type": "keyword",
"index": false
@ -133,6 +137,12 @@
},
"exif_user_comment": {
"type": "text"
},
"author": {
"type": "text"
},
"modified_by": {
"type": "text"
}
}
}

View File

@ -2,15 +2,15 @@
rm -rf index.sist2/
rm web/js/bundle.js 2> /dev/null
cat `ls web/js/*.min.js` > web/js/bundle.js
cat web/js/{util,dom,search}.js >> web/js/bundle.js
rm src/static/js/bundle.js 2> /dev/null
cat `ls src/static/js/*.min.js` > src/static/js/bundle.js
cat src/static/js/{util,dom,search}.js >> src/static/js/bundle.js
rm web/css/bundle*.css 2> /dev/null
cat web/css/*.min.css > web/css/bundle.css
cat web/css/light.css >> web/css/bundle.css
cat web/css/*.min.css > web/css/bundle_dark.css
cat web/css/dark.css >> web/css/bundle_dark.css
rm src/static/css/bundle*.css 2> /dev/null
cat src/static/css/*.min.css > src/static/css/bundle.css
cat src/static/css/light.css >> src/static/css/bundle.css
cat src/static/css/*.min.css > src/static/css/bundle_dark.css
cat src/static/css/dark.css >> src/static/css/bundle_dark.css
python3 scripts/mime.py > src/parsing/mime_generated.c
python3 scripts/serve_static.py > src/web/static_generated.c

View File

@ -1,128 +0,0 @@
#!/usr/bin/env bash
THREADS=$(nproc)
cd lib
cd mupdf
CFLAGS=-fPIC make USE_SYSTEM_HARFBUZZ=yes USE_SYSTEM_OPENJPEG=yes HAVE_X11=no HAVE_GLUT=no -j $THREADS
cd ..
mv mupdf/build/release/libmupdf.a .
mv mupdf/build/release/libmupdf-third.a .
# openjp2
cd openjpeg
cmake . -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_FLAGS="-O3 -DNDEBUG -fPIC"
make -j $THREADS
cd ..
mv openjpeg/bin/libopenjp2.a .
# harfbuzz
cd harfbuzz
./autogen.sh
CFLAGS=-fPIC ./configure --disable-shared --enable-static
make -j $THREADS
cd ..
mv harfbuzz/src/.libs/libharfbuzz.a .
# ffmpeg
cd ffmpeg
./configure --disable-shared --enable-static --disable-ffmpeg --disable-ffplay \
--disable-ffprobe --disable-doc\
--disable-manpages --disable-postproc --disable-avfilter \
--disable-alsa --disable-lzma --disable-xlib --disable-debug\
--disable-vdpau --disable-vaapi --disable-sdl2 --disable-network\
--extra-cflags=-fPIC
make -j $THREADS
cd ..
mv ffmpeg/libavcodec/libavcodec.a .
mv ffmpeg/libavformat/libavformat.a .
mv ffmpeg/libavutil/libavutil.a .
mv ffmpeg/libswresample/libswresample.a .
mv ffmpeg/libswscale/libswscale.a .
# onion
cd onion
mkdir build 2> /dev/null
cd build
cmake -DONION_USE_SSL=false -DONION_USE_PAM=false -DONION_USE_PNG=false -DONION_USE_JPEG=false \
-DONION_USE_JPEG=false -DONION_USE_XML2=false -DONION_USE_SYSTEMD=false -DONION_USE_SQLITE3=false \
-DONION_USE_REDIS=false -DONION_USE_GC=false -DONION_USE_TESTS=false -DONION_EXAMPLES=false \
-DONION_USE_BINDINGS_CPP=false ..
make -j $THREADS
cd ../..
mv onion/build/src/onion/libonion_static.a .
#bzip2
cd bzip2-1.0.6
make -j $THREADS
cd ..
mv bzip2-1.0.6/libbz2.a .
# magic
cd libmagic
./autogen.sh
./configure --enable-static --disable-shared
make -j $THREADS
cd ..
mv libmagic/src/.libs/libmagic.a .
# tesseract
cd tesseract
mkdir build
cd build
cmake -DSTATIC=on -DBUILD_TRAINING_TOOLS=off -DBUILD_TESTS=off -DCMAKE_BUILD_TYPE=Release \
-DCMAKE_CXX_FLAGS="-fPIC" -DAUTO_OPTIMIZE=off ..
make -j $THREADS
cd ../..
mv tesseract/build/libtesseract.a .
# leptonica
cd leptonica
./autogen.sh
CFLAGS="-fPIC" ./configure --without-zlib --without-jpeg --without-giflib \
--without-giflib --without-libwebp --without-libwebpmux --without-libopenjpeg \
--enable-static --disable-shared
make -j $THREADS
cd ..
mv leptonica/src/.libs/liblept.a .
# tiff
cd libtiff
./autogen.sh
CFLAGS="-fPIC" CXXFLAGS="-fPIC" CXX_FLAGS="-fPIC" ./configure --enable-static --disable-shared --disable-lzw --disable-jpeg --disable-webp \
--disable-lzma --disable-zstd --disable-jbig
make -j $THREADS
cd ..
mv libtiff/libtiff/.libs/libtiff.a .
# png
cd libpng
CFLAGS="-fPIC" ./configure --enable-static --disable-shared
make -j $THREADS
cd ..
mv libpng/.libs/libpng16.a .
# openssl...
git clone --depth 1 -b OpenSSL_1_1_0-stable https://github.com/openssl/openssl
cd openssl
./config --prefix=$(pwd)/../ssl
make depend
make -j $THREADS
make install
cd ..
mv ./openssl/libcrypto.a ./openssl/libssl.a .
# curl
wget -nc https://curl.haxx.se/download/curl-7.68.0.tar.gz
tar -xzf curl-7.68.0.tar.gz
cd curl-7.68.0
./configure --disable-ldap --disable-ldaps --without-librtmp --disable-rtsp --disable-crypto-auth \
--disable-smtp --without-libidn2 --without-nghttp2 --without-brotli --enable-static --disable-shared \
--without-libpsl --with-ssl=$(pwd)/../ssl
make -j $THREADS
cd ..
mv curl-7.68.0/lib/.libs/libcurl.a .

View File

@ -175,6 +175,7 @@ application/x-mif, mif
application/x-mix-transfer, nix
application/xml, opf
application/x-mobipocket-ebook, mobi
application/vnd.amazon.mobi8-ebook, azw|azw3
application/x-msaccess, accdb
application/x-ms-compress-szdd, fon
application/x-ms-pdb, pdb
1 application/arj arj
175 application/x-mix-transfer nix
176 application/xml opf
177 application/x-mobipocket-ebook mobi
178 application/vnd.amazon.mobi8-ebook azw|azw3
179 application/x-msaccess accdb
180 application/x-ms-compress-szdd fon
181 application/x-ms-pdb pdb

View File

@ -62,6 +62,11 @@ doc = (
"application/vnd.openxmlformats-officedocument.presentationml.presentation"
)
mobi = (
"application/x-mobipocket-ebook",
"application/vnd.amazon.mobi8-ebook"
)
cnt = 1
@ -82,6 +87,8 @@ def mime_id(mime):
mime_id += " | 0x08000000"
elif mime in doc:
mime_id += " | 0x04000000"
elif mime in mobi:
mime_id += " | 0x02000000"
elif mime == "application/x-empty":
return "1"
return mime_id
@ -91,7 +98,7 @@ def clean(t):
return t.replace("/", "_").replace(".", "_").replace("+", "_").replace("-", "_")
with open("mime.csv") as f:
with open("scripts/mime.csv") as f:
for l in f:
mime, ext_list = l.split(",")
if l.startswith("!"):
@ -103,7 +110,7 @@ with open("mime.csv") as f:
print("// **Generated by mime.py**")
print("#ifndef MIME_GENERATED_C")
print("#define MIME_GENERATED_C")
print("#include <glib-2.0/glib.h>\n")
print("#include <glib.h>\n")
print("#include <stdlib.h>\n")
# Enum
print("enum mime {")

View File

@ -1,10 +1,10 @@
files = [
"web/css/bundle.css",
"web/css/bundle_dark.css",
"web/js/bundle.js",
"web/img/sprite-skin-flat.png",
"web/img/sprite-skin-flat-dark.png",
"web/search.html",
"src/static/css/bundle.css",
"src/static/css/bundle_dark.css",
"src/static/js/bundle.js",
"src/static/img/sprite-skin-flat.png",
"src/static/img/sprite-skin-flat-dark.png",
"src/static/search.html",
]

View File

@ -1,5 +1,6 @@
#include "cli.h"
#include "ctx.h"
#include <tesseract/capi.h>
#define DEFAULT_OUTPUT "index.sist2/"
#define DEFAULT_CONTENT_SIZE 32768
@ -284,9 +285,23 @@ int web_args_validate(web_args_t *args, int argc, const char **argv) {
}
if (args->credentials != NULL) {
args->b64credentials = onion_base64_encode(args->credentials, (int) strlen(args->credentials));
//Remove trailing newline
*(args->b64credentials + strlen(args->b64credentials) - 1) = '\0';
char * ptr = strstr(args->credentials, ":");
if (ptr == NULL) {
fprintf(stderr, "Invalid --auth format, see usage\n");
return 1;
}
strncpy(args->auth_user, args->credentials, (ptr - args->credentials));
strncpy(args->auth_pass, ptr + 1, strlen(ptr + 1));
if (strlen(args->auth_user) == 0) {
fprintf(stderr, "--auth username must be at least one character long");
return 1;
}
args->auth_enabled = TRUE;
} else {
args->auth_enabled = FALSE;
}
args->index_count = argc - 1;
@ -304,7 +319,8 @@ int web_args_validate(web_args_t *args, int argc, const char **argv) {
LOG_DEBUGF("cli.c", "arg bind=%s", args->bind)
LOG_DEBUGF("cli.c", "arg port=%s", args->port)
LOG_DEBUGF("cli.c", "arg credentials=%s", args->credentials)
LOG_DEBUGF("cli.c", "arg b64credentials=%s", args->b64credentials)
LOG_DEBUGF("cli.c", "arg auth_user=%s", args->auth_user)
LOG_DEBUGF("cli.c", "arg auth_pass=%s", args->auth_pass)
LOG_DEBUGF("cli.c", "arg index_count=%d", args->index_count)
for (int i = 0; i < args->index_count; i++) {
LOG_DEBUGF("cli.c", "arg indices[%d]=%s", i, args->indices[i])

View File

@ -3,6 +3,8 @@
#include "sist.h"
#include "libscan/arc/arc.h"
typedef struct scan_args {
float quality;
int size;
@ -43,7 +45,9 @@ typedef struct web_args {
char *bind;
char *port;
char *credentials;
char *b64credentials;
char auth_user[256];
char auth_pass[256];
int auth_enabled;
int index_count;
const char **indices;
} web_args_t;

View File

@ -2,7 +2,21 @@
#define SIST2_CTX_H
#include "sist.h"
#include "tpool.h"
#include "libscan/scan.h"
#include "libscan/arc/arc.h"
#include "libscan/cbr/cbr.h"
#include "libscan/ebook/ebook.h"
#include "libscan/font/font.h"
#include "libscan/media/media.h"
#include "libscan/ooxml/ooxml.h"
#include "libscan/text/text.h"
#include "libscan/mobi/scan_mobi.h"
#include <glib.h>
#include <pcre.h>
//TODO Move to individual scan ctx
struct {
struct index_t index;
@ -11,12 +25,8 @@ struct {
tpool_t *pool;
int tn_size;
int threads;
int content_size;
float tn_qscale;
int depth;
archive_mode_t archive_mode;
int verbose;
int very_verbose;
@ -26,12 +36,18 @@ struct {
GHashTable *original_table;
GHashTable *copy_table;
pthread_mutex_t mupdf_mu;
char * tesseract_lang;
const char * tesseract_path;
pcre *exclude;
pcre_extra *exclude_extra;
int fast;
scan_arc_ctx_t arc_ctx;
scan_cbr_ctx_t cbr_ctx;
scan_ebook_ctx_t ebook_ctx;
scan_font_ctx_t font_ctx;
scan_media_ctx_t media_ctx;
scan_ooxml_ctx_t ooxml_ctx;
scan_text_ctx_t text_ctx;
scan_mobi_ctx_t mobi_ctx;
} ScanCtx;
struct {
@ -48,7 +64,9 @@ struct {
struct {
char *es_url;
int index_count;
char *b64credentials;
char *auth_user;
char *auth_pass;
int auth_enabled;
struct index_t indices[16];
} WebCtx;

View File

@ -1,6 +1,8 @@
#include "elastic.h"
#include "src/ctx.h"
#include "web.h"
#include "static_generated.c"
@ -64,7 +66,7 @@ void execute_update_script(const char *script, const char index_id[UUID_STR_LEN]
char bulk_url[4096];
snprintf(bulk_url, 4096, "%s/sist2/_update_by_query?pretty", Indexer->es_url);
response_t *r = web_post(bulk_url, str, "Content-Type: application/json");
response_t *r = web_post(bulk_url, str);
LOG_INFOF("elastic.c", "Executed user script <%d>", r->status_code);
cJSON *resp = cJSON_Parse(r->body);
@ -137,13 +139,19 @@ void print_errors(response_t *r) {
}
void _elastic_flush(int max) {
if (max == 0) {
LOG_WARNING("elastic.c", "calling _elastic_flush with 0 in queue")
return;
}
size_t buf_len;
int count;
void *buf = create_bulk_buffer(max, &count, &buf_len);
char bulk_url[4096];
snprintf(bulk_url, 4096, "%s/sist2/_bulk?pipeline=tie", Indexer->es_url);
response_t *r = web_post(bulk_url, buf, "Content-Type: application/x-ndjson");
response_t *r = web_post(bulk_url, buf);
if (r->status_code == 0) {
LOG_FATALF("elastic.c", "Could not connect to %s, make sure that elasticsearch is running!\n", IndexCtx.es_url)
@ -251,7 +259,7 @@ void destroy_indexer(char *script, char index_id[UUID_STR_LEN]) {
char url[4096];
snprintf(url, sizeof(url), "%s/sist2/_refresh", IndexCtx.es_url);
response_t *r = web_post(url, "", NULL);
response_t *r = web_post(url, "");
LOG_INFOF("elastic.c", "Refresh index <%d>", r->status_code);
free_response(r);
@ -260,12 +268,12 @@ void destroy_indexer(char *script, char index_id[UUID_STR_LEN]) {
}
snprintf(url, sizeof(url), "%s/sist2/_refresh", IndexCtx.es_url);
r = web_post(url, "", NULL);
r = web_post(url, "");
LOG_INFOF("elastic.c", "Refresh index <%d>", r->status_code);
free_response(r);
snprintf(url, sizeof(url), "%s/sist2/_forcemerge", IndexCtx.es_url);
r = web_post(url, "", NULL);
r = web_post(url, "");
LOG_INFOF("elastic.c", "Merge index <%d>", r->status_code);
free_response(r);
@ -290,32 +298,32 @@ void elastic_init(int force_reset) {
free_response(r);
snprintf(url, 4096, "%s/sist2", IndexCtx.es_url);
r = web_put(url, "", NULL);
r = web_put(url, "");
LOG_INFOF("elastic.c", "Create index <%d>", r->status_code);
free_response(r);
snprintf(url, 4096, "%s/sist2/_close", IndexCtx.es_url);
r = web_post(url, "", NULL);
r = web_post(url, "");
LOG_INFOF("elastic.c", "Close index <%d>", r->status_code);
free_response(r);
snprintf(url, 4096, "%s/_ingest/pipeline/tie", IndexCtx.es_url);
r = web_put(url, pipeline_json, "Content-Type: application/json");
r = web_put(url, pipeline_json);
LOG_INFOF("elastic.c", "Create pipeline <%d>", r->status_code);
free_response(r);
snprintf(url, 4096, "%s/sist2/_settings", IndexCtx.es_url);
r = web_put(url, settings_json, "Content-Type: application/json");
r = web_put(url, settings_json);
LOG_INFOF("elastic.c", "Update settings <%d>", r->status_code);
free_response(r);
snprintf(url, 4096, "%s/sist2/_mappings/_doc?include_type_name=true", IndexCtx.es_url);
r = web_put(url, mappings_json, "Content-Type: application/json");
r = web_put(url, mappings_json);
LOG_INFOF("elastic.c", "Update mappings <%d>", r->status_code);
free_response(r);
snprintf(url, 4096, "%s/sist2/_open", IndexCtx.es_url);
r = web_post(url, "", NULL);
r = web_post(url, "");
LOG_INFOF("elastic.c", "Open index <%d>", r->status_code);
free_response(r);
}

File diff suppressed because one or more lines are too long

View File

@ -1,131 +1,152 @@
#include "web.h"
#include "src/sist.h"
#include "src/ctx.h"
size_t write_cb(char *ptr, size_t size, size_t nmemb, void *user_data) {
#include <mongoose.h>
#include <pthread.h>
size_t real_size = size * nmemb;
dyn_buffer_t *buf = user_data;
dyn_buffer_write(buf, ptr, real_size);
return real_size;
}
void free_response(response_t *resp) {
free(resp->body);
free(resp);
}
#define SIST2_HEADERS "User-Agent: sist2\r\nContent-Type: application/json\r\n"
void http_req_ev(struct mg_connection *nc, int ev, void *ptr) {
http_ev_data_t *ev_data = (http_ev_data_t *) nc->user_data;
switch (ev) {
case MG_EV_CONNECT: {
int connect_status = *(int *) ptr;
if (connect_status != 0) {
ev_data->done = TRUE;
//TODO: set error
}
break;
}
case MG_EV_HTTP_REPLY: {
struct http_message *hm = (struct http_message *) ptr;
//TODO: Check errors?
ev_data->resp->size = hm->body.len;
ev_data->resp->status_code = hm->resp_code;
ev_data->resp->body = malloc(hm->body.len + 1);
memcpy(ev_data->resp->body, hm->body.p, hm->body.len);
*(ev_data->resp->body + hm->body.len) = '\0';
ev_data->done = TRUE;
break;
}
case MG_EV_CLOSE: {
ev_data->done = TRUE;
break;
}
default:
break;
}
}
subreq_ctx_t *http_req(const char *url, const char *extra_headers, const char *post_data, const char *method) {
struct mg_str scheme;
struct mg_str user_info;
struct mg_str host;
unsigned int port;
struct mg_str path;
struct mg_str query;
struct mg_str fragment;
if (post_data == NULL) post_data = "";
if (extra_headers == NULL) extra_headers = "";
if (path.len == 0) path = mg_mk_str("/");
if (host.len == 0) host = mg_mk_str("");
// [scheme://[user_info@]]host[:port][/path][?query][#fragment]
mg_parse_uri(mg_mk_str(url), &scheme, &user_info, &host, &port, &path, &query, &fragment);
if (query.len > 0) path.len += query.len + 1;
subreq_ctx_t *ctx = malloc(sizeof(subreq_ctx_t));
mg_mgr_init(&ctx->mgr, NULL);
char address[8196];
snprintf(address, sizeof(address), "tcp://%.*s:%u", (int) host.len, host.p, port);
struct mg_connection *nc = mg_connect(&ctx->mgr, address, http_req_ev);
nc->user_data = &ctx->ev_data;
mg_set_protocol_http_websocket(nc);
ctx->ev_data.resp = malloc(sizeof(response_t));
ctx->ev_data.done = FALSE;
mg_printf(
nc, "%s %.*s HTTP/1.1\r\n"
"Host: %.*s\r\n"
"Content-Length: %zu\r\n"
"%s\r\n"
"%s",
method, (int) path.len, path.p,
(int) (path.p - host.p), host.p,
strlen(post_data),
extra_headers,
post_data
);
return ctx;
}
response_t *web_get(const char *url) {
response_t *resp = malloc(sizeof(response_t));
subreq_ctx_t *ctx = http_req(url, SIST2_HEADERS, NULL, "GET");
while (ctx->ev_data.done == FALSE) {
mg_mgr_poll(&ctx->mgr, 50);
}
mg_mgr_free(&ctx->mgr);
CURL *curl;
dyn_buffer_t buffer = dyn_buffer_create();
curl = curl_easy_init();
curl_easy_setopt(curl, CURLOPT_URL, url);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *) (&buffer));
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb);
curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
curl_easy_perform(curl);
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &resp->status_code);
curl_easy_cleanup(curl);
resp->body = buffer.buf;
resp->size = buffer.cur;
return resp;
response_t *ret = ctx->ev_data.resp;
free(ctx);
return ret;
}
response_t *web_post(const char *url, const char *data, const char *header) {
response_t *resp = malloc(sizeof(response_t));
CURL *curl;
dyn_buffer_t buffer = dyn_buffer_create();
curl = curl_easy_init();
curl_easy_setopt(curl, CURLOPT_URL, url);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *) (&buffer));
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb);
curl_easy_setopt(curl, CURLOPT_POST, 1);
curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
struct curl_slist *headers = NULL;
if (header != NULL) {
headers = curl_slist_append(headers, header);
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
}
curl_easy_setopt(curl, CURLOPT_POSTFIELDS, data);
curl_easy_perform(curl);
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &resp->status_code);
curl_easy_cleanup(curl);
curl_slist_free_all(headers);
resp->body = buffer.buf;
resp->size = buffer.cur;
return resp;
subreq_ctx_t *web_post_async(const char *url, const char *data) {
return http_req(url, SIST2_HEADERS, data, "POST");
}
response_t *web_post(const char *url, const char *data) {
subreq_ctx_t *ctx = http_req(url, SIST2_HEADERS, data, "POST");
response_t *web_put(const char *url, const char *data, const char *header) {
response_t *resp = malloc(sizeof(response_t));
CURL *curl;
dyn_buffer_t buffer = dyn_buffer_create();
curl = curl_easy_init();
curl_easy_setopt(curl, CURLOPT_URL, url);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *) (&buffer));
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb);
curl_easy_setopt(curl, CURLOPT_CUSTOMREQUEST, "PUT");
curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
curl_easy_setopt(curl, CURLOPT_DNS_USE_GLOBAL_CACHE, 0);
curl_easy_setopt(curl, CURLOPT_IPRESOLVE, CURLOPT_DNS_LOCAL_IP4 );
if (header != NULL) {
struct curl_slist *headers = NULL;
headers = curl_slist_append(headers, header);
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
while (ctx->ev_data.done == FALSE) {
mg_mgr_poll(&ctx->mgr, 50);
}
mg_mgr_free(&ctx->mgr);
curl_easy_setopt(curl, CURLOPT_POSTFIELDS, data);
response_t *ret = ctx->ev_data.resp;
free(ctx);
return ret;
}
curl_easy_perform(curl);
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &resp->status_code);
response_t *web_put(const char *url, const char *data) {
subreq_ctx_t *ctx = http_req(url, SIST2_HEADERS, data, "PUT");
while (ctx->ev_data.done == FALSE) {
mg_mgr_poll(&ctx->mgr, 50);
}
mg_mgr_free(&ctx->mgr);
curl_easy_cleanup(curl);
resp->body = buffer.buf;
resp->size = buffer.cur;
return resp;
response_t *ret = ctx->ev_data.resp;
free(ctx);
return ret;
}
response_t *web_delete(const char *url) {
subreq_ctx_t *ctx = http_req(url, SIST2_HEADERS, NULL, "DELETE");
while (ctx->ev_data.done == FALSE) {
mg_mgr_poll(&ctx->mgr, 50);
}
mg_mgr_free(&ctx->mgr);
response_t *resp = malloc(sizeof(response_t));
CURL *curl;
dyn_buffer_t buffer = dyn_buffer_create();
curl = curl_easy_init();
curl_easy_setopt(curl, CURLOPT_URL, url);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *) (&buffer));
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb);
curl_easy_setopt(curl, CURLOPT_CUSTOMREQUEST, "DELETE");
curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
curl_easy_setopt(curl, CURLOPT_POSTFIELDS, "");
curl_easy_perform(curl);
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &resp->status_code);
curl_easy_cleanup(curl);
resp->body = buffer.buf;
resp->size = buffer.cur;
return resp;
response_t *ret = ctx->ev_data.resp;
free(ctx);
return ret;
}

View File

@ -2,6 +2,7 @@
#define SIST2_WEB_H
#include "src/sist.h"
#include <mongoose.h>
typedef struct response {
char *body;
@ -9,9 +10,20 @@ typedef struct response {
int status_code;
} response_t;
typedef struct {
response_t *resp;
int done;
} http_ev_data_t;
typedef struct {
http_ev_data_t ev_data;
struct mg_mgr mgr;
} subreq_ctx_t;
response_t *web_get(const char *url);
response_t *web_post(const char * url, const char * data, const char* header);
response_t *web_put(const char *url, const char *data, const char *header);
response_t *web_post(const char * url, const char * data);
subreq_ctx_t *web_post_async(const char *url, const char *data);
response_t *web_put(const char *url, const char *data);
response_t *web_delete(const char *url);
void free_response(response_t *resp);

View File

@ -1,5 +1,7 @@
#include "src/ctx.h"
#include "serialize.h"
#include "src/parsing/parse.h"
#include "src/parsing/mime.h"
static __thread int index_fd = -1;
@ -142,6 +144,12 @@ char *get_meta_key_text(enum metakey meta_key) {
return "exif_model";
case MetaExifDateTime:
return "exif_datetime";
case MetaAuthor:
return "author";
case MetaModifiedBy:
return "modified_by";
case MetaThumbnail:
return "thumbnail";
default:
return NULL;
}
@ -176,11 +184,11 @@ void write_document(document_t *doc) {
dyn_buffer_write_char(&buf, meta->key);
if (IS_META_INT(meta->key)) {
dyn_buffer_write_int(&buf, meta->intval);
dyn_buffer_write_int(&buf, meta->int_val);
} else if (IS_META_LONG(meta->key)) {
dyn_buffer_write_long(&buf, meta->longval);
dyn_buffer_write_long(&buf, meta->long_val);
} else {
dyn_buffer_write_str(&buf, meta->strval);
dyn_buffer_write_str(&buf, meta->str_val);
}
meta_line_t *tmp = meta;
@ -211,8 +219,8 @@ void read_index_bin(const char *path, const char *index_id, index_func func) {
FILE *file = fopen(path, "rb");
while (1) {
buf.cur = 0;
size_t read = fread((void *) &line, 1, sizeof(line_t), file);
if (read != 1 || feof(file)) {
size_t _ = fread((void *) &line, 1, sizeof(line_t), file);
if (feof(file)) {
break;
}
@ -270,16 +278,7 @@ void read_index_bin(const char *path, const char *index_id, index_func func) {
break;
}
case MetaMediaAudioCodec:
case MetaMediaVideoCodec: {
int value;
ret = fread(&value, sizeof(int), 1, file);
const AVCodecDescriptor *desc = avcodec_descriptor_get(value);
if (desc != NULL) {
cJSON_AddStringToObject(document, get_meta_key_text(key), desc->name);
}
break;
}
case MetaMediaVideoCodec:
case MetaContent:
case MetaArtist:
case MetaAlbum:
@ -296,6 +295,9 @@ void read_index_bin(const char *path, const char *index_id, index_func func) {
case MetaExifIsoSpeedRatings:
case MetaExifDateTime:
case MetaExifModel:
case MetaAuthor:
case MetaModifiedBy:
case MetaThumbnail:
case MetaTitle: {
buf.cur = 0;
while ((c = getc(file)) != 0) {
@ -311,10 +313,6 @@ void read_index_bin(const char *path, const char *index_id, index_func func) {
LOG_FATALF("serialize.c", "Invalid meta key (corrupt index): %x", key)
}
if (ret != 1) {
break;
}
key = getc(file);
}

View File

@ -2,7 +2,10 @@
#define SIST2_SERIALIZE_H
#include "src/sist.h"
#include "store.h"
#include <sys/syscall.h>
#include <glib.h>
typedef void(*index_func)(cJSON *, const char[UUID_STR_LEN]);

View File

@ -11,8 +11,6 @@ typedef struct store_t {
pthread_rwlock_t lock;
} store_t;
#include "src/sist.h"
store_t *store_create(char *path);
void store_destroy(store_t *store);

View File

@ -1,5 +1,8 @@
#include "walk.h"
#include "src/ctx.h"
#include "src/parsing/parse.h"
#include <ftw.h>
__always_inline
parse_job_t *create_fs_parse_job(const char *filepath, const struct stat *info, int base) {
@ -15,12 +18,13 @@ parse_job_t *create_fs_parse_job(const char *filepath, const struct stat *info,
job->ext = len;
}
job->info = *info;
job->vfile.info = *info;
memset(job->parent, 0, 16);
job->vfile.filepath = job->filepath;
job->vfile.read = fs_read;
job->vfile.reset = fs_reset;
job->vfile.close = fs_close;
job->vfile.fd = -1;
job->vfile.is_fs_file = TRUE;

View File

@ -3,8 +3,6 @@
#define _XOPEN_SOURCE 500
#include "src/sist.h"
int walk_directory_tree(const char *);
#endif

View File

@ -1,5 +1,8 @@
#include "log.h"
#include <pthread.h>
#include <stdarg.h>
const char *log_colors[] = {
"\033[34m", "\033[01;34m", "\033[0m",
"\033[01;33m", "\033[31m", "\033[01;31m"
@ -9,7 +12,7 @@ const char *log_levels[] = {
"DEBUG", "INFO", "WARNING", "ERROR", "FATAL"
};
void sist_logf(char *filepath, int level, char *format, ...) {
void sist_logf(const char *filepath, int level, char *format, ...) {
static int is_tty = -1;
if (is_tty == -1) {
@ -58,11 +61,11 @@ void sist_logf(char *filepath, int level, char *format, ...) {
int ret = write(STDERR_FILENO, log_str, log_len);
if (ret == -1) {
LOG_FATALF("serialize.c", "Could not write index descriptor: %s", strerror(errno));
LOG_FATALF("serialize.c", "Could not write index descriptor: %s", strerror(errno))
}
}
void sist_log(char *filepath, int level, char *str) {
void sist_log(const char *filepath, int level, char *str) {
static int is_tty = -1;
if (is_tty == -1) {

View File

@ -1,6 +1,7 @@
#ifndef SIST2_LOG_H
#define SIST2_LOG_H
#define LOG_MAX_LENGTH 8192
#define SIST_DEBUG 0
@ -36,10 +37,10 @@
sist_log(filepath, SIST_FATAL, str);\
exit(-1);
#include "src/sist.h"
#include "sist.h"
void sist_logf(char *filepath, int level, char *format, ...);
void sist_logf(const char *filepath, int level, char *format, ...);
void sist_log(char *filepath, int level, char *str);
void sist_log(const char *filepath, int level, char *str);
#endif

View File

@ -1,12 +1,25 @@
#include "sist.h"
#include "ctx.h"
#include <third-party/argparse/argparse.h>
#include <glib.h>
#include "cli.h"
#include "io/serialize.h"
#include "io/store.h"
#include "tpool.h"
#include "io/walk.h"
#include "index/elastic.h"
#include "web/serve.h"
#include "parsing/mime.h"
#include "parsing/parse.h"
#define DESCRIPTION "Lightning-fast file system indexer and search tool."
#define EPILOG "Made by simon987 <me@simon987.net>. Released under GPL-3.0"
static const char *const Version = "1.3.4";
static const char *const Version = "2.0.0";
static const char *const usage[] = {
"sist2 scan [OPTION]... PATH",
"sist2 index [OPTION]... INDEX",
@ -14,11 +27,6 @@ static const char *const usage[] = {
NULL,
};
void global_init() {
curl_global_init(CURL_GLOBAL_NOTHING);
av_log_set_level(AV_LOG_QUIET);
}
void init_dir(const char *dirpath) {
char path[PATH_MAX];
snprintf(path, PATH_MAX, "%sdescriptor.json", dirpath);
@ -37,29 +45,117 @@ void scan_print_header() {
LOG_INFOF("main.c", "sist2 v%s", Version)
}
void sist2_scan(scan_args_t *args) {
void _store(char *key, size_t key_len, char *buf, size_t buf_len) {
store_write(ScanCtx.index.store, key, key_len, buf, buf_len);
}
void _log(const char *filepath, int level, char *str) {
if (level == LEVEL_FATAL) {
sist_log(filepath, level, str);
exit(-1);
}
if (ScanCtx.verbose) {
if (level == LEVEL_DEBUG) {
if (ScanCtx.very_verbose) {
sist_log(filepath, level, str);
}
} else {
sist_log(filepath, level, str);
}
}
}
void _logf(const char *filepath, int level, char *format, ...) {
va_list args;
va_start(args, format);
if (level == LEVEL_FATAL) {
sist_logf(filepath, level, format, args);
exit(-1);
}
if (ScanCtx.verbose) {
if (level == LEVEL_DEBUG) {
if (ScanCtx.very_verbose) {
sist_logf(filepath, level, format, args);
}
} else {
sist_logf(filepath, level, format, args);
}
}
va_end(args);
}
void initialize_scan_context(scan_args_t *args) {
// Arc
ScanCtx.arc_ctx.mode = args->archive_mode;
ScanCtx.arc_ctx.log = _log;
ScanCtx.arc_ctx.logf = _logf;
ScanCtx.arc_ctx.parse = (parse_callback_t) parse;
// Cbr
ScanCtx.cbr_ctx.log = _log;
ScanCtx.cbr_ctx.logf = _logf;
ScanCtx.cbr_ctx.store = _store;
ScanCtx.cbr_ctx.cbr_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/x-cbr");
// Ebook
pthread_mutex_init(&ScanCtx.ebook_ctx.mupdf_mutex, NULL);
ScanCtx.ebook_ctx.content_size = args->content_size;
ScanCtx.ebook_ctx.tn_size = args->size;
ScanCtx.ebook_ctx.tesseract_lang = args->tesseract_lang;
ScanCtx.ebook_ctx.tesseract_path = args->tesseract_path;
ScanCtx.ebook_ctx.log = _log;
ScanCtx.ebook_ctx.logf = _logf;
ScanCtx.ebook_ctx.store = _store;
// Font
ScanCtx.font_ctx.enable_tn = args->size > 0;
ScanCtx.font_ctx.log = _log;
ScanCtx.font_ctx.logf = _logf;
ScanCtx.font_ctx.store = _store;
// Media
ScanCtx.media_ctx.tn_qscale = args->quality;
ScanCtx.media_ctx.tn_size = args->size;
ScanCtx.media_ctx.log = _log;
ScanCtx.media_ctx.logf = _logf;
ScanCtx.media_ctx.store = _store;
init_media();
// OOXML
ScanCtx.ooxml_ctx.content_size = args->content_size;
ScanCtx.ooxml_ctx.log = _log;
ScanCtx.ooxml_ctx.logf = _logf;
// MOBI
ScanCtx.mobi_ctx.content_size = args->content_size;
ScanCtx.mobi_ctx.log = _log;
ScanCtx.mobi_ctx.logf = _logf;
ScanCtx.tn_qscale = args->quality;
ScanCtx.tn_size = args->size;
ScanCtx.content_size = args->content_size;
ScanCtx.threads = args->threads;
ScanCtx.depth = args->depth;
ScanCtx.archive_mode = args->archive_mode;
strncpy(ScanCtx.index.path, args->output, sizeof(ScanCtx.index.path));
strncpy(ScanCtx.index.desc.name, args->name, sizeof(ScanCtx.index.desc.name));
strncpy(ScanCtx.index.desc.root, args->path, sizeof(ScanCtx.index.desc.root));
strncpy(ScanCtx.index.desc.rewrite_url, args->rewrite_url, sizeof(ScanCtx.index.desc.rewrite_url));
ScanCtx.index.desc.root_len = (short) strlen(ScanCtx.index.desc.root);
ScanCtx.tesseract_lang = args->tesseract_lang;
ScanCtx.tesseract_path = args->tesseract_path;
ScanCtx.fast = args->fast;
}
init_dir(ScanCtx.index.path);
void sist2_scan(scan_args_t *args) {
ScanCtx.mime_table = mime_get_mime_table();
ScanCtx.ext_table = mime_get_ext_table();
cbr_init();
initialize_scan_context(args);
init_dir(ScanCtx.index.path);
char store_path[PATH_MAX];
snprintf(store_path, PATH_MAX, "%sthumbs", ScanCtx.index.path);
@ -184,7 +280,9 @@ void sist2_web(web_args_t *args) {
WebCtx.es_url = args->es_url;
WebCtx.index_count = args->index_count;
WebCtx.b64credentials = args->b64credentials;
WebCtx.auth_user = args->auth_user;
WebCtx.auth_pass = args->auth_pass;
WebCtx.auth_enabled = args->auth_enabled;
for (int i = 0; i < args->index_count; i++) {
char *abs_path = abspath(args->indices[i]);
@ -210,8 +308,6 @@ void sist2_web(web_args_t *args) {
int main(int argc, const char *argv[]) {
global_init();
scan_args_t *scan_args = scan_args_create();
index_args_t *index_args = index_args_create();
web_args_t *web_args = web_args_create();

View File

@ -1,155 +0,0 @@
#include "arc.h"
#include "src/ctx.h"
int should_parse_filtered_file(const char *filepath, int ext) {
char tmp[PATH_MAX * 2];
if (ext == 0) {
return FALSE;
}
memcpy(tmp, filepath, ext - 1);
*(tmp + ext - 1) = '\0';
char *idx = strrchr(tmp, '.');
if (idx == NULL) {
return FALSE;
}
if (strcmp(idx, ".tar") == 0) {
return TRUE;
}
return FALSE;
}
int arc_read(struct vfile *f, void *buf, size_t size) {
return archive_read_data(f->arc, buf, size);
}
typedef struct arc_data {
vfile_t *f;
char buf[ARC_BUF_SIZE];
} arc_data_f;
int vfile_open_callback(struct archive *a, void *user_data) {
arc_data_f *data = user_data;
if (data->f->is_fs_file && data->f->fd == -1) {
data->f->fd = open(data->f->filepath, O_RDONLY);
}
return ARCHIVE_OK;
}
long vfile_read_callback(struct archive *a, void *user_data, const void **buf) {
arc_data_f *data = user_data;
*buf = data->buf;
return data->f->read(data->f, data->buf, ARC_BUF_SIZE);
}
int vfile_close_callback(struct archive *a, void *user_data) {
arc_data_f *data = user_data;
if (data->f->close != NULL) {
data->f->close(data->f);
}
return ARCHIVE_OK;
}
void parse_archive(vfile_t *f, document_t *doc) {
struct archive *a;
struct archive_entry *entry;
arc_data_f data;
data.f = f;
int ret = 0;
if (data.f->is_fs_file) {
a = archive_read_new();
archive_read_support_filter_all(a);
archive_read_support_format_all(a);
ret = archive_read_open_filename(a, doc->filepath, ARC_BUF_SIZE);
} else if (ScanCtx.archive_mode == ARC_MODE_RECURSE) {
a = archive_read_new();
archive_read_support_filter_all(a);
archive_read_support_format_all(a);
ret = archive_read_open(
a, &data,
vfile_open_callback,
vfile_read_callback,
vfile_close_callback
);
} else {
return;
}
if (ret != ARCHIVE_OK) {
LOG_ERRORF(doc->filepath, "(arc.c) [%d] %s", ret, archive_error_string(a))
archive_read_free(a);
return;
}
if (ScanCtx.archive_mode == ARC_MODE_LIST) {
dyn_buffer_t buf = dyn_buffer_create();
while (archive_read_next_header(a, &entry) == ARCHIVE_OK) {
if (S_ISREG(archive_entry_stat(entry)->st_mode)) {
char *path = (char *) archive_entry_pathname(entry);
dyn_buffer_append_string(&buf, path);
dyn_buffer_write_char(&buf, '\n');
}
}
dyn_buffer_write_char(&buf, '\0');
meta_line_t *meta_list = malloc(sizeof(meta_line_t) + buf.cur);
meta_list->key = MetaContent;
strcpy(meta_list->strval, buf.buf);
APPEND_META(doc, meta_list);
dyn_buffer_destroy(&buf);
} else {
parse_job_t *sub_job = malloc(sizeof(parse_job_t) + PATH_MAX * 2);
sub_job->vfile.close = NULL;
sub_job->vfile.read = arc_read;
sub_job->vfile.arc = a;
sub_job->vfile.filepath = sub_job->filepath;
sub_job->vfile.is_fs_file = FALSE;
memcpy(sub_job->parent, doc->uuid, sizeof(uuid_t));
while (archive_read_next_header(a, &entry) == ARCHIVE_OK) {
sub_job->info = *archive_entry_stat(entry);
if (S_ISREG(sub_job->info.st_mode)) {
sprintf(sub_job->filepath, "%s#/%s", f->filepath, archive_entry_pathname(entry));
sub_job->base = (int) (strrchr(sub_job->filepath, '/') - sub_job->filepath) + 1;
char *p = strrchr(sub_job->filepath, '.');
if (p != NULL) {
sub_job->ext = (int) (p - sub_job->filepath + 1);
} else {
sub_job->ext = (int) strlen(sub_job->filepath);
}
parse(sub_job);
}
}
free(sub_job);
}
archive_read_free(a);
}

View File

@ -1,13 +0,0 @@
#ifndef SIST2_ARC_H
#define SIST2_ARC_H
#include "src/sist.h"
#define ARC_BUF_SIZE 8192
int should_parse_filtered_file(const char *filepath, int ext);
void parse_archive(vfile_t *f, document_t *doc);
int arc_read(struct vfile * f, void *buf, size_t size);
#endif

View File

@ -1,52 +0,0 @@
#include "cbr.h"
#include "src/ctx.h"
unsigned int cbr_mime;
unsigned int cbz_mime;
void cbr_init() {
cbr_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/x-cbr");
cbz_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/x-cbz");
}
int is_cbr(unsigned int mime) {
return mime == cbr_mime;
}
void parse_cbr(void *buf, size_t buf_len, document_t *doc) {
char *out_buf = malloc(buf_len * 2);
size_t out_buf_used = 0;
struct archive *rar_in = archive_read_new();
archive_read_support_filter_none(rar_in);
archive_read_support_format_rar(rar_in);
archive_read_open_memory(rar_in, buf, buf_len);
struct archive *zip_out = archive_write_new();
archive_write_set_format_zip(zip_out);
archive_write_open_memory(zip_out, out_buf, buf_len * 2, &out_buf_used);
struct archive_entry *entry;
while (archive_read_next_header(rar_in, &entry) == ARCHIVE_OK) {
archive_write_header(zip_out, entry);
char arc_buf[ARC_BUF_SIZE];
int len = archive_read_data(rar_in, arc_buf, ARC_BUF_SIZE);
while (len > 0) {
archive_write_data(zip_out, arc_buf, len);
len = archive_read_data(rar_in, arc_buf, ARC_BUF_SIZE);
}
}
archive_write_close(zip_out);
archive_write_free(zip_out);
archive_read_close(rar_in);
archive_read_free(rar_in);
doc->mime = cbz_mime;
parse_pdf(out_buf, out_buf_used, doc);
doc->mime = cbr_mime;
free(out_buf);
}

View File

@ -1,12 +0,0 @@
#ifndef SIST2_CBR_H
#define SIST2_CBR_H
#include "src/sist.h"
void cbr_init();
int is_cbr(unsigned int mime);
void parse_cbr(void *buf, size_t buf_len, document_t *doc);
#endif

View File

@ -1,141 +0,0 @@
#include "doc.h"
#include "src/ctx.h"
#define STR_STARTS_WITH(x, y) (strncmp(y, x, sizeof(y) - 1) == 0)
__always_inline
static int should_read_part(const char *part) {
LOG_DEBUGF("doc.c", "Got part : %s", part)
if (part == NULL) {
return FALSE;
}
if ( // Word
STR_STARTS_WITH(part, "word/document.xml")
|| STR_STARTS_WITH(part, "word/footnotes.xml")
|| STR_STARTS_WITH(part, "word/endnotes.xml")
|| STR_STARTS_WITH(part, "word/footer")
|| STR_STARTS_WITH(part, "word/header")
// PowerPoint
|| STR_STARTS_WITH(part, "ppt/slides/slide")
|| STR_STARTS_WITH(part, "ppt/notesSlides/slide")
// Excel
|| STR_STARTS_WITH(part, "xl/worksheets/sheet")
|| STR_STARTS_WITH(part, "xl/sharedStrings.xml")
|| STR_STARTS_WITH(part, "xl/workbook.xml")
) {
return TRUE;
}
return FALSE;
}
int extract_text(xmlDoc *xml, xmlNode *node, text_buffer_t *buf) {
//TODO: Check which nodes are likely to have a 't' child, and ignore nodes that aren't
xmlErrorPtr err = xmlGetLastError();
if (err != NULL) {
if (err->level == XML_ERR_FATAL) {
LOG_ERRORF("doc.c", "Got fatal XML error while parsing document: %s", err->message)
return -1;
} else {
LOG_ERRORF("doc.c", "Got recoverable XML error while parsing document: %s", err->message)
}
}
for (xmlNode *child = node; child; child = child->next) {
if (*child->name == 't' && *(child->name + 1) == '\0') {
xmlChar *text = xmlNodeListGetString(xml, child->xmlChildrenNode, 1);
if (text) {
text_buffer_append_string0(buf, (char *) text);
text_buffer_append_char(buf, ' ');
xmlFree(text);
}
}
extract_text(xml, child->children, buf);
}
return 0;
}
int xml_io_read(void *context, char *buffer, int len) {
struct archive *a = context;
return archive_read_data(a, buffer, len);
}
int xml_io_close(UNUSED(void *context)) {
//noop
return 0;
}
__always_inline
static int read_part(struct archive *a, text_buffer_t *buf, document_t *doc) {
xmlDoc *xml = xmlReadIO(xml_io_read, xml_io_close, a, "/", NULL, XML_PARSE_RECOVER | XML_PARSE_NOWARNING | XML_PARSE_NOERROR | XML_PARSE_NONET);
if (xml == NULL) {
LOG_ERROR(doc->filepath, "Could not parse XML")
return -1;
}
xmlNode *root = xmlDocGetRootElement(xml);
if (root == NULL) {
LOG_ERROR(doc->filepath, "Empty document")
xmlFreeDoc(xml);
return -1;
}
extract_text(xml, root, buf);
xmlFreeDoc(xml);
return 0;
}
void parse_doc(void *mem, size_t mem_len, document_t *doc) {
if (mem == NULL) {
return;
}
struct archive *a = archive_read_new();
archive_read_support_format_zip(a);
int ret = archive_read_open_memory(a, mem, mem_len);
if (ret != ARCHIVE_OK) {
LOG_ERRORF(doc->filepath, "Could not read archive: %s", archive_error_string(a))
archive_read_free(a);
return;
}
text_buffer_t buf = text_buffer_create(ScanCtx.content_size);
struct archive_entry *entry;
while (archive_read_next_header(a, &entry) == ARCHIVE_OK) {
if (S_ISREG(archive_entry_stat(entry)->st_mode)) {
const char *path = archive_entry_pathname(entry);
if (should_read_part(path)) {
ret = read_part(a, &buf, doc);
if (ret != 0) {
break;
}
}
}
}
if (buf.dyn_buffer.cur > 0) {
text_buffer_terminate_string(&buf);
meta_line_t *meta = malloc(sizeof(meta_line_t) + buf.dyn_buffer.cur);
meta->key = MetaContent;
strcpy(meta->strval, buf.dyn_buffer.buf);
APPEND_META(doc, meta)
}
archive_read_close(a);
archive_read_free(a);
text_buffer_destroy(&buf);
}

View File

@ -1,8 +0,0 @@
#ifndef SIST2_DOC_H
#define SIST2_DOC_H
#include "src/sist.h"
void parse_doc(void *buf, size_t buf_len, document_t *doc);
#endif

View File

@ -1,233 +0,0 @@
#include "font.h"
#include "src/ctx.h"
__thread FT_Library ft_lib = NULL;
typedef struct text_dimensions {
unsigned int width;
unsigned int height;
unsigned int baseline;
} text_dimensions_t;
typedef struct glyph {
int top;
int height;
int width;
int descent;
int ascent;
int advance_width;
unsigned char *pixmap;
} glyph_t;
__always_inline
int kerning_offset(char c, char pc, FT_Face face) {
FT_Vector kerning;
FT_Get_Kerning(face, c, pc, FT_KERNING_DEFAULT, &kerning);
return (int) (kerning.x / 64);
}
__always_inline
glyph_t ft_glyph_to_glyph(FT_GlyphSlot slot) {
glyph_t glyph;
glyph.pixmap = slot->bitmap.buffer;
glyph.width = (int) slot->bitmap.width;
glyph.height = (int) slot->bitmap.rows;
glyph.top = slot->bitmap_top;
glyph.advance_width = (int) slot->advance.x / 64;
glyph.descent = MAX(0, glyph.height - glyph.top);
glyph.ascent = MAX(0, MAX(glyph.top, glyph.height) - glyph.descent);
return glyph;
}
text_dimensions_t text_dimension(char *text, FT_Face face) {
text_dimensions_t dimensions;
dimensions.width = 0;
int num_chars = (int) strlen(text);
unsigned int max_ascent = 0;
int max_descent = 0;
char pc = 0;
for (int i = 0; i < num_chars; i++) {
char c = text[i];
FT_Load_Char(face, c, 0);
glyph_t glyph = ft_glyph_to_glyph(face->glyph);
max_descent = MAX(max_descent, glyph.descent);
max_ascent = MAX(max_ascent, MAX(glyph.height, glyph.ascent));
int kerning_x = kerning_offset(c, pc, face);
dimensions.width += MAX(glyph.advance_width, glyph.width) + kerning_x;
pc = c;
}
dimensions.height = max_ascent + max_descent;
dimensions.baseline = max_descent;
return dimensions;
}
void draw_glyph(glyph_t *glyph, int x, int y, struct text_dimensions text_info, unsigned char *bitmap) {
unsigned int src = 0;
unsigned int dst = y * text_info.width + x;
unsigned int row_offset = text_info.width - glyph->width;
unsigned int buf_len = text_info.width * text_info.height;
for (unsigned int sy = 0; sy < glyph->height; sy++) {
for (unsigned int sx = 0; sx < glyph->width; sx++) {
if (dst < buf_len) {
bitmap[dst] |= glyph->pixmap[src];
}
src++;
dst++;
}
dst += row_offset;
}
}
void bmp_format(dyn_buffer_t *buf, text_dimensions_t dimensions, const unsigned char *bitmap) {
dyn_buffer_write_short(buf, 0x4D42); // Magic
dyn_buffer_write_int(buf, 0); // Size placeholder
dyn_buffer_write_int(buf, 0x5157); //Reserved
dyn_buffer_write_int(buf, 14 + 40 + 256 * 4); // pixels offset
dyn_buffer_write_int(buf, 40); // DIB size
dyn_buffer_write_int(buf, (int) dimensions.width);
dyn_buffer_write_int(buf, (int) dimensions.height);
dyn_buffer_write_short(buf, 1); // Color planes
dyn_buffer_write_short(buf, 8); // bits per pixel
dyn_buffer_write_int(buf, 0); // compression
dyn_buffer_write_int(buf, 0); // Ignored
dyn_buffer_write_int(buf, 3800); // hres
dyn_buffer_write_int(buf, 3800); // vres
dyn_buffer_write_int(buf, 256); // Color count
dyn_buffer_write_int(buf, 0); // Ignored
// RGBA32 Color table (Grayscale)
for (int i = 255; i >= 0; i--) {
dyn_buffer_write_int(buf, i + (i << 8) + (i << 16));
}
// Pixel array: write from bottom to top, with rows padded to multiples of 4-bytes
for (int y = (int) dimensions.height - 1; y >= 0; y--) {
for (unsigned int x = 0; x < dimensions.width; x++) {
dyn_buffer_write_char(buf, (char) bitmap[y * dimensions.width + x]);
}
while (buf->cur % 4 != 0) {
dyn_buffer_write_char(buf, 0);
}
}
// Size
*(int *) ((char *) buf->buf + 2) = buf->cur;
}
void parse_font(const char *buf, size_t buf_len, document_t *doc) {
if (ft_lib == NULL) {
FT_Init_FreeType(&ft_lib);
}
if (buf == NULL) {
return;
}
FT_Face face;
FT_Error err = FT_New_Memory_Face(ft_lib, (unsigned char *) buf, buf_len, 0, &face);
if (err != 0) {
LOG_ERRORF(doc->filepath, "(font.c) FT_New_Memory_Face() returned error code [%d] %s", err, ft_error_string(err));
return;
}
char font_name[1024];
if (face->style_name == NULL || *(face->style_name) == '?') {
if (face->family_name == NULL) {
strcpy(font_name, "(null)");
} else {
strcpy(font_name, face->family_name);
}
} else {
snprintf(font_name, sizeof(font_name), "%s %s", face->family_name, face->style_name);
}
meta_line_t *meta_name = malloc(sizeof(meta_line_t) + strlen(font_name));
meta_name->key = MetaFontName;
strcpy(meta_name->strval, font_name);
APPEND_META(doc, meta_name)
if (ScanCtx.tn_size <= 0) {
FT_Done_Face(face);
return;
}
int pixel = 64;
int num_chars = (int) strlen(font_name);
err = FT_Set_Pixel_Sizes(face, 0, pixel);
if (err != 0) {
LOG_WARNINGF(doc->filepath, "(font.c) FT_Set_Pixel_Sizes() returned error code [%d] %s", err, ft_error_string(err))
FT_Done_Face(face);
return;
}
text_dimensions_t dimensions = text_dimension(font_name, face);
unsigned char *bitmap = calloc(dimensions.width * dimensions.height, 1);
FT_Vector pen;
pen.x = 0;
char pc = 0;
for (int i = 0; i < num_chars; i++) {
char c = font_name[i];
err = FT_Load_Char(face, c, FT_LOAD_NO_HINTING | FT_LOAD_RENDER);
if (err != 0) {
c = c >= 'a' && c <= 'z' ? c - 32 : c + 32;
err = FT_Load_Char(face, c, FT_LOAD_NO_HINTING | FT_LOAD_RENDER);
if (err != 0) {
LOG_WARNINGF(doc->filepath, "(font.c) FT_Load_Char() returned error code [%d] %s", err, ft_error_string(err));
continue;
}
}
glyph_t glyph = ft_glyph_to_glyph(face->glyph);
pen.x += kerning_offset(c, pc, face);
if (pen.x <= 0) {
pen.x = ABS(glyph.advance_width - glyph.width);
}
pen.y = dimensions.height - glyph.ascent - dimensions.baseline;
draw_glyph(&glyph, pen.x, pen.y, dimensions, bitmap);
pen.x += glyph.advance_width;
pc = c;
}
dyn_buffer_t bmp_data = dyn_buffer_create();
bmp_format(&bmp_data, dimensions, bitmap);
store_write(ScanCtx.index.store, (char *) doc->uuid, sizeof(doc->uuid), (char *) bmp_data.buf, bmp_data.cur);
dyn_buffer_destroy(&bmp_data);
free(bitmap);
FT_Done_Face(face);
}
void cleanup_font() {
FT_Done_FreeType(ft_lib);
}

View File

@ -1,10 +0,0 @@
#ifndef SIST2_FONT_H
#define SIST2_FONT_H
#include "src/sist.h"
void parse_font(const char * buf, size_t buf_len, document_t *doc);
void cleanup_font();
#endif

View File

@ -1,402 +0,0 @@
#include "src/sist.h"
#include "src/ctx.h"
#define MIN_SIZE 32
#define AVIO_BUF_SIZE 8192
__always_inline
static AVCodecContext *alloc_jpeg_encoder(int dstW, int dstH, float qscale) {
AVCodec *jpeg_codec = avcodec_find_encoder(AV_CODEC_ID_MJPEG);
AVCodecContext *jpeg = avcodec_alloc_context3(jpeg_codec);
jpeg->width = dstW;
jpeg->height = dstH;
jpeg->time_base.den = 1000000;
jpeg->time_base.num = 1;
jpeg->i_quant_factor = qscale;
jpeg->pix_fmt = AV_PIX_FMT_YUVJ420P;
int ret = avcodec_open2(jpeg, jpeg_codec, NULL);
if (ret != 0) {
printf("Could not open jpeg encoder: %s!\n", av_err2str(ret));
return NULL;
}
return jpeg;
}
__always_inline
AVFrame *scale_frame(const AVCodecContext *decoder, const AVFrame *frame, int size) {
int dstW;
int dstH;
if (frame->width <= size && frame->height <= size) {
dstW = frame->width;
dstH = frame->height;
} else {
double ratio = (double) frame->width / frame->height;
if (frame->width > frame->height) {
dstW = size;
dstH = (int) (size / ratio);
} else {
dstW = (int) (size * ratio);
dstH = size;
}
}
if (dstW <= MIN_SIZE || dstH <= MIN_SIZE) {
return NULL;
}
AVFrame *scaled_frame = av_frame_alloc();
struct SwsContext *ctx = sws_getContext(
decoder->width, decoder->height, decoder->pix_fmt,
dstW, dstH, AV_PIX_FMT_YUVJ420P,
SWS_FAST_BILINEAR, 0, 0, 0
);
int dst_buf_len = av_image_get_buffer_size(AV_PIX_FMT_YUV420P, dstW, dstH, 1);
uint8_t *dst_buf = (uint8_t *) av_malloc(dst_buf_len);
av_image_fill_arrays(scaled_frame->data, scaled_frame->linesize, dst_buf, AV_PIX_FMT_YUV420P, dstW, dstH, 1);
sws_scale(ctx,
(const uint8_t *const *) frame->data, frame->linesize,
0, decoder->height,
scaled_frame->data, scaled_frame->linesize
);
scaled_frame->width = dstW;
scaled_frame->height = dstH;
scaled_frame->format = AV_PIX_FMT_YUV420P;
sws_freeContext(ctx);
return scaled_frame;
}
__always_inline
static AVFrame *read_frame(AVFormatContext *pFormatCtx, AVCodecContext *decoder, int stream_idx, document_t *doc) {
AVFrame *frame = av_frame_alloc();
AVPacket avPacket;
av_init_packet(&avPacket);
int receive_ret = -EAGAIN;
while (receive_ret == -EAGAIN) {
// Get video frame
while (1) {
int read_frame_ret = av_read_frame(pFormatCtx, &avPacket);
if (read_frame_ret != 0) {
if (read_frame_ret != AVERROR_EOF) {
LOG_WARNINGF(doc->filepath,
"(media.c) avcodec_read_frame() returned error code [%d] %s",
read_frame_ret, av_err2str(read_frame_ret)
)
}
av_frame_free(&frame);
av_packet_unref(&avPacket);
return NULL;
}
//Ignore audio/other frames
if (avPacket.stream_index != stream_idx) {
av_packet_unref(&avPacket);
continue;
}
break;
}
// Feed it to decoder
int decode_ret = avcodec_send_packet(decoder, &avPacket);
if (decode_ret != 0) {
LOG_ERRORF(doc->filepath,
"(media.c) avcodec_send_packet() returned error code [%d] %s",
decode_ret, av_err2str(decode_ret)
)
av_frame_free(&frame);
av_packet_unref(&avPacket);
return NULL;
}
av_packet_unref(&avPacket);
receive_ret = avcodec_receive_frame(decoder, frame);
}
return frame;
}
#define APPEND_TAG_META(doc, tag_, keyname) \
text_buffer_t tex = text_buffer_create(-1); \
text_buffer_append_string0(&tex, tag_->value); \
text_buffer_terminate_string(&tex); \
meta_line_t *meta_tag = malloc(sizeof(meta_line_t) + tex.dyn_buffer.cur); \
meta_tag->key = keyname; \
strcpy(meta_tag->strval, tex.dyn_buffer.buf); \
APPEND_META(doc, meta_tag) \
text_buffer_destroy(&tex);
__always_inline
static void append_audio_meta(AVFormatContext *pFormatCtx, document_t *doc) {
AVDictionaryEntry *tag = NULL;
while ((tag = av_dict_get(pFormatCtx->metadata, "", tag, AV_DICT_IGNORE_SUFFIX))) {
char key[256];
strncpy(key, tag->key, sizeof(key));
char *ptr = key;
for (; *ptr; ++ptr) *ptr = (char) tolower(*ptr);
if (strcmp(key, "artist") == 0) {
APPEND_TAG_META(doc, tag, MetaArtist)
} else if (strcmp(key, "genre") == 0) {
APPEND_TAG_META(doc, tag, MetaGenre)
} else if (strcmp(key, "title") == 0) {
APPEND_TAG_META(doc, tag, MetaTitle)
} else if (strcmp(key, "album_artist") == 0) {
APPEND_TAG_META(doc, tag, MetaAlbumArtist)
} else if (strcmp(key, "album") == 0) {
APPEND_TAG_META(doc, tag, MetaAlbum)
}
}
}
__always_inline
static void
append_video_meta(AVFormatContext *pFormatCtx, AVFrame *frame, document_t *doc, int include_audio_tags, int is_video) {
if (is_video) {
meta_line_t *meta_duration = malloc(sizeof(meta_line_t));
meta_duration->key = MetaMediaDuration;
meta_duration->longval = pFormatCtx->duration / AV_TIME_BASE;
APPEND_META(doc, meta_duration)
meta_line_t *meta_bitrate = malloc(sizeof(meta_line_t));
meta_bitrate->key = MetaMediaBitrate;
meta_bitrate->longval = pFormatCtx->bit_rate;
APPEND_META(doc, meta_bitrate)
}
AVDictionaryEntry *tag = NULL;
if (is_video) {
while ((tag = av_dict_get(pFormatCtx->metadata, "", tag, AV_DICT_IGNORE_SUFFIX))) {
if (include_audio_tags && strcmp(tag->key, "title") == 0) {
APPEND_TAG_META(doc, tag, MetaTitle)
} else if (strcmp(tag->key, "comment") == 0) {
APPEND_TAG_META(doc, tag, MetaContent)
} else if (include_audio_tags && strcmp(tag->key, "artist") == 0) {
APPEND_TAG_META(doc, tag, MetaArtist)
}
}
} else {
// EXIF metadata
while ((tag = av_dict_get(frame->metadata, "", tag, AV_DICT_IGNORE_SUFFIX))) {
if (include_audio_tags && strcmp(tag->key, "Artist") == 0) {
APPEND_TAG_META(doc, tag, MetaArtist)
} else if (strcmp(tag->key, "ImageDescription") == 0) {
APPEND_TAG_META(doc, tag, MetaContent)
} else if (strcmp(tag->key, "Make") == 0) {
APPEND_TAG_META(doc, tag, MetaExifMake)
} else if (strcmp(tag->key, "Model") == 0) {
APPEND_TAG_META(doc, tag, MetaExifModel)
} else if (strcmp(tag->key, "Software") == 0) {
APPEND_TAG_META(doc, tag, MetaExifSoftware)
} else if (strcmp(tag->key, "FNumber") == 0) {
APPEND_TAG_META(doc, tag, MetaExifFNumber)
} else if (strcmp(tag->key, "FocalLength") == 0) {
APPEND_TAG_META(doc, tag, MetaExifFocalLength)
} else if (strcmp(tag->key, "UserComment") == 0) {
APPEND_TAG_META(doc, tag, MetaExifUserComment)
} else if (strcmp(tag->key, "ISOSpeedRatings") == 0) {
APPEND_TAG_META(doc, tag, MetaExifIsoSpeedRatings)
} else if (strcmp(tag->key, "ExposureTime") == 0) {
APPEND_TAG_META(doc, tag, MetaExifExposureTime)
} else if (strcmp(tag->key, "DateTime") == 0) {
APPEND_TAG_META(doc, tag, MetaExifDateTime)
}
}
}
}
void parse_media(AVFormatContext *pFormatCtx, document_t *doc) {
int video_stream = -1;
int audio_stream = -1;
avformat_find_stream_info(pFormatCtx, NULL);
for (int i = (int) pFormatCtx->nb_streams - 1; i >= 0; i--) {
AVStream *stream = pFormatCtx->streams[i];
if (stream->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) {
if (audio_stream == -1) {
meta_line_t *meta_audio = malloc(sizeof(meta_line_t));
meta_audio->key = MetaMediaAudioCodec;
meta_audio->intval = stream->codecpar->codec_id;
APPEND_META(doc, meta_audio)
append_audio_meta(pFormatCtx, doc);
audio_stream = i;
}
} else if (stream->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) {
if (video_stream == -1) {
meta_line_t *meta_vid = malloc(sizeof(meta_line_t));
meta_vid->key = MetaMediaVideoCodec;
meta_vid->intval = stream->codecpar->codec_id;
APPEND_META(doc, meta_vid)
meta_line_t *meta_w = malloc(sizeof(meta_line_t));
meta_w->key = MetaWidth;
meta_w->intval = stream->codecpar->width;
APPEND_META(doc, meta_w)
meta_line_t *meta_h = malloc(sizeof(meta_line_t));
meta_h->key = MetaHeight;
meta_h->intval = stream->codecpar->height;
APPEND_META(doc, meta_h)
video_stream = i;
}
}
}
if (video_stream != -1 && ScanCtx.tn_size > 0) {
AVStream *stream = pFormatCtx->streams[video_stream];
if (stream->codecpar->width <= MIN_SIZE || stream->codecpar->height <= MIN_SIZE) {
avformat_close_input(&pFormatCtx);
avformat_free_context(pFormatCtx);
return;
}
// Decoder
AVCodec *video_codec = avcodec_find_decoder(stream->codecpar->codec_id);
AVCodecContext *decoder = avcodec_alloc_context3(video_codec);
avcodec_parameters_to_context(decoder, stream->codecpar);
avcodec_open2(decoder, video_codec, NULL);
//Seek
if (stream->nb_frames > 1 && stream->codecpar->codec_id != AV_CODEC_ID_GIF) {
int seek_ret = 0;
for (int i = 20; i >= 0; i--) {
seek_ret = av_seek_frame(pFormatCtx, video_stream,
stream->duration * 0.10, 0);
if (seek_ret == 0) {
break;
}
}
}
AVFrame *frame = read_frame(pFormatCtx, decoder, video_stream, doc);
if (frame == NULL) {
avcodec_free_context(&decoder);
avformat_close_input(&pFormatCtx);
avformat_free_context(pFormatCtx);
return;
}
append_video_meta(pFormatCtx, frame, doc, audio_stream == -1, stream->nb_frames > 1);
// Scale frame
AVFrame *scaled_frame = scale_frame(decoder, frame, ScanCtx.tn_size);
if (scaled_frame == NULL) {
av_frame_free(&frame);
avcodec_free_context(&decoder);
avformat_close_input(&pFormatCtx);
avformat_free_context(pFormatCtx);
return;
}
// Encode frame to jpeg
AVCodecContext *jpeg_encoder = alloc_jpeg_encoder(scaled_frame->width, scaled_frame->height, ScanCtx.tn_qscale);
avcodec_send_frame(jpeg_encoder, scaled_frame);
AVPacket jpeg_packet;
av_init_packet(&jpeg_packet);
avcodec_receive_packet(jpeg_encoder, &jpeg_packet);
// Save thumbnail
store_write(ScanCtx.index.store, (char *) doc->uuid, sizeof(doc->uuid), (char *) jpeg_packet.data,
jpeg_packet.size);
av_packet_unref(&jpeg_packet);
av_frame_free(&frame);
av_free(*scaled_frame->data);
av_frame_free(&scaled_frame);
avcodec_free_context(&jpeg_encoder);
avcodec_free_context(&decoder);
}
avformat_close_input(&pFormatCtx);
avformat_free_context(pFormatCtx);
}
void parse_media_filename(const char *filepath, document_t *doc) {
AVFormatContext *pFormatCtx = avformat_alloc_context();
if (pFormatCtx == NULL) {
LOG_ERROR(doc->filepath, "(media.c) Could not allocate context with avformat_alloc_context()")
return;
}
int res = avformat_open_input(&pFormatCtx, filepath, NULL, NULL);
if (res < 0) {
LOG_ERRORF(doc->filepath, "(media.c) avformat_open_input() returned [%d] %s", res, av_err2str(res))
avformat_close_input(&pFormatCtx);
avformat_free_context(pFormatCtx);
return;
}
parse_media(pFormatCtx, doc);
}
int vfile_read(void *ptr, uint8_t *buf, int buf_size) {
struct vfile *f = ptr;
int ret = f->read(f, buf, buf_size);
if (ret == 0) {
return AVERROR_EOF;
}
return ret;
}
void parse_media_vfile(struct vfile *f, document_t *doc) {
AVFormatContext *pFormatCtx = avformat_alloc_context();
if (pFormatCtx == NULL) {
LOG_ERROR(doc->filepath, "(media.c) Could not allocate context with avformat_alloc_context()")
return;
}
unsigned char *buffer = (unsigned char *) av_malloc(AVIO_BUF_SIZE);
AVIOContext *io_ctx = avio_alloc_context(buffer, AVIO_BUF_SIZE, 0, f, vfile_read, NULL, NULL);
pFormatCtx->pb = io_ctx;
pFormatCtx->flags |= AVFMT_FLAG_CUSTOM_IO;
int res = avformat_open_input(&pFormatCtx, "", NULL, NULL);
if (res == -5) {
// Tried to parse media that requires seek
av_free(io_ctx->buffer);
avio_context_free(&io_ctx);
avformat_close_input(&pFormatCtx);
avformat_free_context(pFormatCtx);
return;
} else if (res < 0) {
LOG_ERRORF(doc->filepath, "(media.c) avformat_open_input() returned [%d] %s", res, av_err2str(res))
av_free(io_ctx->buffer);
avio_context_free(&io_ctx);
avformat_close_input(&pFormatCtx);
avformat_free_context(pFormatCtx);
return;
}
parse_media(pFormatCtx, doc);
av_free(io_ctx->buffer);
avio_context_free(&io_ctx);
}

View File

@ -1,14 +0,0 @@
#ifndef SIST2_MEDIA_H
#define SIST2_MEDIA_H
#include "src/sist.h"
#define MIN_VIDEO_SIZE 1024 * 64
#define MIN_IMAGE_SIZE 1024 * 2
void parse_media_filename(const char * filepath, document_t *doc);
void parse_media_vfile(struct vfile *f, document_t *doc);
#endif

View File

@ -1,7 +1,7 @@
#ifndef SIST2_MIME_H
#define SIST2_MIME_H
#include "src/sist.h"
#include "../sist.h"
#define MAJOR_MIME(mime_id) (mime_id & 0x0FFF0000) >> 16
@ -25,6 +25,9 @@
#define DOC_MASK 0x04000000
#define IS_DOC(mime_id) (mime_id & DOC_MASK) == DOC_MASK
#define MOBI_MASK 0x02000000
#define IS_MOBI(mime_id) (mime_id & MOBI_MASK) == MOBI_MASK
enum major_mime {
MimeInvalid = 0,
MimeModel = 1,

View File

@ -1,7 +1,7 @@
// **Generated by mime.py**
#ifndef MIME_GENERATED_C
#define MIME_GENERATED_C
#include <glib-2.0/glib.h>
#include <glib.h>
#include <stdlib.h>
@ -53,386 +53,387 @@ enum mime {
application_step=655405,
application_streamingmedia=655406,
application_vda=655407,
application_vnd_fdf=655408,
application_vnd_font_fontforge_sfd=655409,
application_vnd_hp_hpgl=655410,
application_vnd_iccprofile=655411,
application_vnd_lotus_1_2_3=655412,
application_vnd_ms_cab_compressed=655413,
application_vnd_ms_excel=655414,
application_vnd_ms_fontobject=655415,
application_vnd_ms_opentype=655416 | 0x20000000,
application_vnd_ms_pki_certstore=655417,
application_vnd_ms_pki_pko=655418,
application_vnd_ms_pki_seccat=655419,
application_vnd_ms_powerpoint=655420,
application_vnd_ms_project=655421,
application_vnd_oasis_opendocument_base=655422,
application_vnd_oasis_opendocument_formula=655423,
application_vnd_oasis_opendocument_graphics=655424,
application_vnd_oasis_opendocument_presentation=655425,
application_vnd_oasis_opendocument_spreadsheet=655426,
application_vnd_oasis_opendocument_text=655427,
application_vnd_openxmlformats_officedocument_presentationml_presentation=655428 | 0x04000000,
application_vnd_openxmlformats_officedocument_spreadsheetml_sheet=655429 | 0x04000000,
application_vnd_openxmlformats_officedocument_wordprocessingml_document=655430 | 0x04000000,
application_vnd_symbian_install=655431,
application_vnd_tcpdump_pcap=655432,
application_vnd_wap_wmlc=655433,
application_vnd_wap_wmlscriptc=655434,
application_vnd_xara=655435,
application_vocaltec_media_desc=655436,
application_vocaltec_media_file=655437,
application_warc=655438,
application_winhelp=655439,
application_wordperfect=655440,
application_wordperfect6_0=655441,
application_wordperfect6_1=655442,
application_x_123=655443,
application_x_7z_compressed=655444 | 0x10000000,
application_x_aim=655445,
application_x_apple_diskimage=655446,
application_x_arc=655447 | 0x10000000,
application_x_archive=655448,
application_x_atari_7800_rom=655449,
application_x_authorware_bin=655450,
application_x_authorware_map=655451,
application_x_authorware_seg=655452,
application_x_avira_qua=655453,
application_x_bcpio=655454,
application_x_bittorrent=655455,
application_x_bsh=655456,
application_x_bytecode_python=655457,
application_x_bzip=655458,
application_x_bzip2=655459 | 0x08000000,
application_x_cbr=655460,
application_x_cbz=655461 | 0x40000000,
application_x_cdlink=655462,
application_x_chat=655463,
application_x_chrome_extension=655464,
application_x_cocoa=655465,
application_x_conference=655466,
application_x_coredump=655467,
application_x_cpio=655468,
application_x_dbf=655469,
application_x_dbt=655470,
application_x_debian_package=655471,
application_x_deepv=655472,
application_x_director=655473,
application_x_dmp=655474,
application_x_dosdriver=655475,
application_x_dosexec=655476,
application_x_dvi=655477,
application_x_elc=655478,
application_vnd_amazon_mobi8_ebook=655408 | 0x02000000,
application_vnd_fdf=655409,
application_vnd_font_fontforge_sfd=655410,
application_vnd_hp_hpgl=655411,
application_vnd_iccprofile=655412,
application_vnd_lotus_1_2_3=655413,
application_vnd_ms_cab_compressed=655414,
application_vnd_ms_excel=655415,
application_vnd_ms_fontobject=655416,
application_vnd_ms_opentype=655417 | 0x20000000,
application_vnd_ms_pki_certstore=655418,
application_vnd_ms_pki_pko=655419,
application_vnd_ms_pki_seccat=655420,
application_vnd_ms_powerpoint=655421,
application_vnd_ms_project=655422,
application_vnd_oasis_opendocument_base=655423,
application_vnd_oasis_opendocument_formula=655424,
application_vnd_oasis_opendocument_graphics=655425,
application_vnd_oasis_opendocument_presentation=655426,
application_vnd_oasis_opendocument_spreadsheet=655427,
application_vnd_oasis_opendocument_text=655428,
application_vnd_openxmlformats_officedocument_presentationml_presentation=655429 | 0x04000000,
application_vnd_openxmlformats_officedocument_spreadsheetml_sheet=655430 | 0x04000000,
application_vnd_openxmlformats_officedocument_wordprocessingml_document=655431 | 0x04000000,
application_vnd_symbian_install=655432,
application_vnd_tcpdump_pcap=655433,
application_vnd_wap_wmlc=655434,
application_vnd_wap_wmlscriptc=655435,
application_vnd_xara=655436,
application_vocaltec_media_desc=655437,
application_vocaltec_media_file=655438,
application_warc=655439,
application_winhelp=655440,
application_wordperfect=655441,
application_wordperfect6_0=655442,
application_wordperfect6_1=655443,
application_x_123=655444,
application_x_7z_compressed=655445 | 0x10000000,
application_x_aim=655446,
application_x_apple_diskimage=655447,
application_x_arc=655448 | 0x10000000,
application_x_archive=655449,
application_x_atari_7800_rom=655450,
application_x_authorware_bin=655451,
application_x_authorware_map=655452,
application_x_authorware_seg=655453,
application_x_avira_qua=655454,
application_x_bcpio=655455,
application_x_bittorrent=655456,
application_x_bsh=655457,
application_x_bytecode_python=655458,
application_x_bzip=655459,
application_x_bzip2=655460 | 0x08000000,
application_x_cbr=655461,
application_x_cbz=655462 | 0x40000000,
application_x_cdlink=655463,
application_x_chat=655464,
application_x_chrome_extension=655465,
application_x_cocoa=655466,
application_x_conference=655467,
application_x_coredump=655468,
application_x_cpio=655469,
application_x_dbf=655470,
application_x_dbt=655471,
application_x_debian_package=655472,
application_x_deepv=655473,
application_x_director=655474,
application_x_dmp=655475,
application_x_dosdriver=655476,
application_x_dosexec=655477,
application_x_dvi=655478,
application_x_elc=655479,
application_x_empty=1,
application_x_envoy=655480,
application_x_esrehber=655481,
application_x_excel=655482,
application_x_executable=655483,
application_x_font_gdos=655484,
application_x_font_pf2=655485,
application_x_font_pfm=655486,
application_x_font_sfn=655487,
application_x_font_ttf=655488 | 0x20000000,
application_x_fptapplication_x_dbt=655489,
application_x_freelance=655490,
application_x_gamecube_rom=655491,
application_x_gdbm=655492,
application_x_gettext_translation=655493,
application_x_git=655494,
application_x_gsp=655495,
application_x_gss=655496,
application_x_gtar=655497,
application_x_gzip=655498,
application_x_hdf=655499,
application_x_helpfile=655500,
application_x_httpd_imap=655501,
application_x_ima=655502,
application_x_innosetup=655503,
application_x_internett_signup=655504,
application_x_inventor=655505,
application_x_ip2=655506,
application_x_java_applet=655507,
application_x_java_commerce=655508,
application_x_java_image=655509,
application_x_java_jmod=655510,
application_x_java_keystore=655511,
application_x_kdelnk=655512,
application_x_koan=655513,
application_x_latex=655514,
application_x_livescreen=655515,
application_x_lotus=655516,
application_x_lz4=655517 | 0x08000000,
application_x_lz4_json=655518,
application_x_lzh=655519,
application_x_lzh_compressed=655520,
application_x_lzip=655521 | 0x08000000,
application_x_lzma=655522 | 0x08000000,
application_x_lzop=655523 | 0x08000000,
application_x_lzx=655524,
application_x_mach_binary=655525,
application_x_mach_executable=655526,
application_x_magic_cap_package_1_0=655527,
application_x_mathcad=655528,
application_x_maxis_dbpf=655529,
application_x_meme=655530,
application_x_midi=655531,
application_x_mif=655532,
application_x_mix_transfer=655533,
application_x_mobipocket_ebook=655534,
application_x_ms_compress_szdd=655535,
application_x_ms_pdb=655536,
application_x_ms_reader=655537,
application_x_msaccess=655538,
application_x_n64_rom=655539,
application_x_navi_animation=655540,
application_x_navidoc=655541,
application_x_navimap=655542,
application_x_navistyle=655543,
application_x_nes_rom=655544,
application_x_netcdf=655545,
application_x_newton_compatible_pkg=655546,
application_x_nintendo_ds_rom=655547,
application_x_object=655548,
application_x_omc=655549,
application_x_omcdatamaker=655550,
application_x_omcregerator=655551,
application_x_pagemaker=655552,
application_x_pcl=655553,
application_x_pgp_keyring=655554,
application_x_pixclscript=655555,
application_x_pkcs7_certreqresp=655556,
application_x_pkcs7_signature=655557,
application_x_project=655558,
application_x_qpro=655559,
application_x_rar=655560 | 0x10000000,
application_x_rpm=655561,
application_x_sdp=655562,
application_x_sea=655563,
application_x_seelogo=655564,
application_x_setupscript=655565,
application_x_shar=655566,
application_x_sharedlib=655567,
application_x_shockwave_flash=655568,
application_x_snappy_framed=655569,
application_x_sprite=655570,
application_x_sqlite3=655571,
application_x_stargallery_thm=655572,
application_x_stuffit=655573,
application_x_sv4cpio=655574,
application_x_sv4crc=655575,
application_x_tar=655576 | 0x10000000,
application_x_tbook=655577,
application_x_terminfo=655578,
application_x_terminfo2=655579,
application_x_tex_tfm=655580,
application_x_texinfo=655581,
application_x_ustar=655582,
application_x_visio=655583,
application_x_vnd_audioexplosion_mzz=655584,
application_x_vnd_ls_xpix=655585,
application_x_vrml=655586,
application_x_wais_source=655587,
application_x_wine_extension_ini=655588,
application_x_wintalk=655589,
application_x_world=655590,
application_x_wri=655591,
application_x_x509_ca_cert=655592,
application_x_xz=655593 | 0x08000000,
application_x_zip=655594,
application_x_zstd=655595 | 0x08000000,
application_xml=655596,
application_zip=655597 | 0x10000000,
application_zlib=655598,
audio_basic=458991 | 0x80000000,
audio_it=458992,
audio_make=458993,
audio_mid=458994,
audio_midi=458995,
audio_mp4=458996,
audio_mpeg=458997,
audio_ogg=458998,
audio_s3m=458999,
audio_tsp_audio=459000,
audio_tsplayer=459001,
audio_vnd_qcelp=459002,
audio_voxware=459003,
audio_x_aiff=459004,
audio_x_flac=459005,
audio_x_gsm=459006,
audio_x_hx_aac_adts=459007,
audio_x_jam=459008,
audio_x_liveaudio=459009,
audio_x_m4a=459010,
audio_x_midi=459011,
audio_x_mod=459012,
audio_x_mp4a_latm=459013,
audio_x_mpeg_3=459014,
audio_x_mpequrl=459015,
audio_x_nspaudio=459016,
audio_x_pn_realaudio=459017,
audio_x_psid=459018,
audio_x_realaudio=459019,
audio_x_s3m=459020,
audio_x_twinvq=459021,
audio_x_twinvq_plugin=459022,
audio_x_voc=459023,
audio_x_wav=459024,
audio_x_xbox_executable=459025 | 0x80000000,
audio_x_xbox360_executable=459026 | 0x80000000,
audio_xm=459027,
font_otf=327956 | 0x20000000,
font_sfnt=327957 | 0x20000000,
font_woff=327958 | 0x20000000,
font_woff2=327959 | 0x20000000,
image_bmp=524568,
image_cmu_raster=524569,
image_fif=524570,
image_florian=524571,
image_g3fax=524572,
image_gif=524573,
image_heic=524574,
image_ief=524575,
image_jpeg=524576,
image_jutvision=524577,
image_naplps=524578,
image_pict=524579,
image_png=524580,
image_svg=524581 | 0x80000000,
image_svg_xml=524582 | 0x80000000,
image_tiff=524583,
image_vnd_adobe_photoshop=524584 | 0x80000000,
image_vnd_djvu=524585 | 0x80000000,
image_vnd_fpx=524586,
image_vnd_microsoft_icon=524587,
image_vnd_rn_realflash=524588,
image_vnd_rn_realpix=524589,
image_vnd_wap_wbmp=524590,
image_vnd_xiff=524591,
image_webp=524592,
image_wmf=524593,
image_x_3ds=524594,
image_x_award_bioslogo=524595,
image_x_cmu_raster=524596,
image_x_cur=524597,
image_x_dwg=524598,
image_x_eps=524599,
image_x_exr=524600,
image_x_gem=524601,
image_x_icns=524602,
image_x_icon=524603 | 0x80000000,
image_x_jg=524604,
image_x_jps=524605,
image_x_ms_bmp=524606,
image_x_niff=524607,
image_x_pcx=524608,
image_x_pict=524609,
image_x_portable_bitmap=524610,
image_x_portable_graymap=524611,
image_x_portable_pixmap=524612,
image_x_quicktime=524613,
image_x_rgb=524614,
image_x_tga=524615,
image_x_tiff=524616,
image_x_win_bitmap=524617,
image_x_xcf=524618 | 0x80000000,
image_x_xpixmap=524619 | 0x80000000,
image_x_xwindowdump=524620,
message_news=196941,
message_rfc822=196942,
model_vnd_dwf=65871,
model_vnd_gdl=65872,
model_vnd_gs_gdl=65873,
model_vrml=65874,
model_x_pov=65875,
text_PGP=590164,
text_asp=590165,
text_css=590166,
text_html=590167,
text_javascript=590168,
text_mcf=590169,
text_pascal=590170,
text_plain=590171,
text_richtext=590172,
text_rtf=590173,
text_scriplet=590174,
text_tab_separated_values=590175,
text_troff=590176,
text_uri_list=590177,
text_vnd_abc=590178,
text_vnd_fmi_flexstor=590179,
text_vnd_wap_wml=590180,
text_vnd_wap_wmlscript=590181,
text_webviewhtml=590182,
text_x_Algol68=590183,
text_x_asm=590184,
text_x_audiosoft_intra=590185,
text_x_awk=590186,
text_x_bcpl=590187,
text_x_c=590188,
text_x_c__=590189,
text_x_component=590190,
text_x_diff=590191,
text_x_fortran=590192,
text_x_java=590193,
text_x_la_asf=590194,
text_x_lisp=590195,
text_x_m=590196,
text_x_m4=590197,
text_x_makefile=590198,
text_x_ms_regedit=590199,
text_x_msdos_batch=590200,
text_x_objective_c=590201,
text_x_pascal=590202,
text_x_perl=590203,
text_x_php=590204,
text_x_po=590205,
text_x_python=590206,
text_x_ruby=590207,
text_x_sass=590208,
text_x_scss=590209,
text_x_server_parsed_html=590210,
text_x_setext=590211,
text_x_sgml=590212,
text_x_shellscript=590213,
text_x_speech=590214,
text_x_tcl=590215,
text_x_tex=590216,
text_x_uil=590217,
text_x_uuencode=590218,
text_x_vcalendar=590219,
text_x_vcard=590220,
text_xml=590221,
video_MP2T=393614,
video_animaflex=393615,
video_avi=393616,
video_avs_video=393617,
video_mp4=393618,
video_mpeg=393619,
video_quicktime=393620,
video_vdo=393621,
video_vivo=393622,
video_vnd_rn_realvideo=393623,
video_vosaic=393624,
video_webm=393625,
video_x_amt_demorun=393626,
video_x_amt_showrun=393627,
video_x_atomic3d_feature=393628,
video_x_dl=393629,
video_x_dv=393630,
video_x_fli=393631,
video_x_flv=393632,
video_x_isvideo=393633,
video_x_jng=393634 | 0x80000000,
video_x_m4v=393635,
video_x_matroska=393636,
video_x_mng=393637,
video_x_motion_jpeg=393638,
video_x_ms_asf=393639,
video_x_msvideo=393640,
video_x_qtc=393641,
video_x_sgi_movie=393642,
x_epoc_x_sisx_app=721323,
application_x_envoy=655481,
application_x_esrehber=655482,
application_x_excel=655483,
application_x_executable=655484,
application_x_font_gdos=655485,
application_x_font_pf2=655486,
application_x_font_pfm=655487,
application_x_font_sfn=655488,
application_x_font_ttf=655489 | 0x20000000,
application_x_fptapplication_x_dbt=655490,
application_x_freelance=655491,
application_x_gamecube_rom=655492,
application_x_gdbm=655493,
application_x_gettext_translation=655494,
application_x_git=655495,
application_x_gsp=655496,
application_x_gss=655497,
application_x_gtar=655498,
application_x_gzip=655499,
application_x_hdf=655500,
application_x_helpfile=655501,
application_x_httpd_imap=655502,
application_x_ima=655503,
application_x_innosetup=655504,
application_x_internett_signup=655505,
application_x_inventor=655506,
application_x_ip2=655507,
application_x_java_applet=655508,
application_x_java_commerce=655509,
application_x_java_image=655510,
application_x_java_jmod=655511,
application_x_java_keystore=655512,
application_x_kdelnk=655513,
application_x_koan=655514,
application_x_latex=655515,
application_x_livescreen=655516,
application_x_lotus=655517,
application_x_lz4=655518 | 0x08000000,
application_x_lz4_json=655519,
application_x_lzh=655520,
application_x_lzh_compressed=655521,
application_x_lzip=655522 | 0x08000000,
application_x_lzma=655523 | 0x08000000,
application_x_lzop=655524 | 0x08000000,
application_x_lzx=655525,
application_x_mach_binary=655526,
application_x_mach_executable=655527,
application_x_magic_cap_package_1_0=655528,
application_x_mathcad=655529,
application_x_maxis_dbpf=655530,
application_x_meme=655531,
application_x_midi=655532,
application_x_mif=655533,
application_x_mix_transfer=655534,
application_x_mobipocket_ebook=655535 | 0x02000000,
application_x_ms_compress_szdd=655536,
application_x_ms_pdb=655537,
application_x_ms_reader=655538,
application_x_msaccess=655539,
application_x_n64_rom=655540,
application_x_navi_animation=655541,
application_x_navidoc=655542,
application_x_navimap=655543,
application_x_navistyle=655544,
application_x_nes_rom=655545,
application_x_netcdf=655546,
application_x_newton_compatible_pkg=655547,
application_x_nintendo_ds_rom=655548,
application_x_object=655549,
application_x_omc=655550,
application_x_omcdatamaker=655551,
application_x_omcregerator=655552,
application_x_pagemaker=655553,
application_x_pcl=655554,
application_x_pgp_keyring=655555,
application_x_pixclscript=655556,
application_x_pkcs7_certreqresp=655557,
application_x_pkcs7_signature=655558,
application_x_project=655559,
application_x_qpro=655560,
application_x_rar=655561 | 0x10000000,
application_x_rpm=655562,
application_x_sdp=655563,
application_x_sea=655564,
application_x_seelogo=655565,
application_x_setupscript=655566,
application_x_shar=655567,
application_x_sharedlib=655568,
application_x_shockwave_flash=655569,
application_x_snappy_framed=655570,
application_x_sprite=655571,
application_x_sqlite3=655572,
application_x_stargallery_thm=655573,
application_x_stuffit=655574,
application_x_sv4cpio=655575,
application_x_sv4crc=655576,
application_x_tar=655577 | 0x10000000,
application_x_tbook=655578,
application_x_terminfo=655579,
application_x_terminfo2=655580,
application_x_tex_tfm=655581,
application_x_texinfo=655582,
application_x_ustar=655583,
application_x_visio=655584,
application_x_vnd_audioexplosion_mzz=655585,
application_x_vnd_ls_xpix=655586,
application_x_vrml=655587,
application_x_wais_source=655588,
application_x_wine_extension_ini=655589,
application_x_wintalk=655590,
application_x_world=655591,
application_x_wri=655592,
application_x_x509_ca_cert=655593,
application_x_xz=655594 | 0x08000000,
application_x_zip=655595,
application_x_zstd=655596 | 0x08000000,
application_xml=655597,
application_zip=655598 | 0x10000000,
application_zlib=655599,
audio_basic=458992 | 0x80000000,
audio_it=458993,
audio_make=458994,
audio_mid=458995,
audio_midi=458996,
audio_mp4=458997,
audio_mpeg=458998,
audio_ogg=458999,
audio_s3m=459000,
audio_tsp_audio=459001,
audio_tsplayer=459002,
audio_vnd_qcelp=459003,
audio_voxware=459004,
audio_x_aiff=459005,
audio_x_flac=459006,
audio_x_gsm=459007,
audio_x_hx_aac_adts=459008,
audio_x_jam=459009,
audio_x_liveaudio=459010,
audio_x_m4a=459011,
audio_x_midi=459012,
audio_x_mod=459013,
audio_x_mp4a_latm=459014,
audio_x_mpeg_3=459015,
audio_x_mpequrl=459016,
audio_x_nspaudio=459017,
audio_x_pn_realaudio=459018,
audio_x_psid=459019,
audio_x_realaudio=459020,
audio_x_s3m=459021,
audio_x_twinvq=459022,
audio_x_twinvq_plugin=459023,
audio_x_voc=459024,
audio_x_wav=459025,
audio_x_xbox_executable=459026 | 0x80000000,
audio_x_xbox360_executable=459027 | 0x80000000,
audio_xm=459028,
font_otf=327957 | 0x20000000,
font_sfnt=327958 | 0x20000000,
font_woff=327959 | 0x20000000,
font_woff2=327960 | 0x20000000,
image_bmp=524569,
image_cmu_raster=524570,
image_fif=524571,
image_florian=524572,
image_g3fax=524573,
image_gif=524574,
image_heic=524575,
image_ief=524576,
image_jpeg=524577,
image_jutvision=524578,
image_naplps=524579,
image_pict=524580,
image_png=524581,
image_svg=524582 | 0x80000000,
image_svg_xml=524583 | 0x80000000,
image_tiff=524584,
image_vnd_adobe_photoshop=524585 | 0x80000000,
image_vnd_djvu=524586 | 0x80000000,
image_vnd_fpx=524587,
image_vnd_microsoft_icon=524588,
image_vnd_rn_realflash=524589,
image_vnd_rn_realpix=524590,
image_vnd_wap_wbmp=524591,
image_vnd_xiff=524592,
image_webp=524593,
image_wmf=524594,
image_x_3ds=524595,
image_x_award_bioslogo=524596,
image_x_cmu_raster=524597,
image_x_cur=524598,
image_x_dwg=524599,
image_x_eps=524600,
image_x_exr=524601,
image_x_gem=524602,
image_x_icns=524603,
image_x_icon=524604 | 0x80000000,
image_x_jg=524605,
image_x_jps=524606,
image_x_ms_bmp=524607,
image_x_niff=524608,
image_x_pcx=524609,
image_x_pict=524610,
image_x_portable_bitmap=524611,
image_x_portable_graymap=524612,
image_x_portable_pixmap=524613,
image_x_quicktime=524614,
image_x_rgb=524615,
image_x_tga=524616,
image_x_tiff=524617,
image_x_win_bitmap=524618,
image_x_xcf=524619 | 0x80000000,
image_x_xpixmap=524620 | 0x80000000,
image_x_xwindowdump=524621,
message_news=196942,
message_rfc822=196943,
model_vnd_dwf=65872,
model_vnd_gdl=65873,
model_vnd_gs_gdl=65874,
model_vrml=65875,
model_x_pov=65876,
text_PGP=590165,
text_asp=590166,
text_css=590167,
text_html=590168,
text_javascript=590169,
text_mcf=590170,
text_pascal=590171,
text_plain=590172,
text_richtext=590173,
text_rtf=590174,
text_scriplet=590175,
text_tab_separated_values=590176,
text_troff=590177,
text_uri_list=590178,
text_vnd_abc=590179,
text_vnd_fmi_flexstor=590180,
text_vnd_wap_wml=590181,
text_vnd_wap_wmlscript=590182,
text_webviewhtml=590183,
text_x_Algol68=590184,
text_x_asm=590185,
text_x_audiosoft_intra=590186,
text_x_awk=590187,
text_x_bcpl=590188,
text_x_c=590189,
text_x_c__=590190,
text_x_component=590191,
text_x_diff=590192,
text_x_fortran=590193,
text_x_java=590194,
text_x_la_asf=590195,
text_x_lisp=590196,
text_x_m=590197,
text_x_m4=590198,
text_x_makefile=590199,
text_x_ms_regedit=590200,
text_x_msdos_batch=590201,
text_x_objective_c=590202,
text_x_pascal=590203,
text_x_perl=590204,
text_x_php=590205,
text_x_po=590206,
text_x_python=590207,
text_x_ruby=590208,
text_x_sass=590209,
text_x_scss=590210,
text_x_server_parsed_html=590211,
text_x_setext=590212,
text_x_sgml=590213,
text_x_shellscript=590214,
text_x_speech=590215,
text_x_tcl=590216,
text_x_tex=590217,
text_x_uil=590218,
text_x_uuencode=590219,
text_x_vcalendar=590220,
text_x_vcard=590221,
text_xml=590222,
video_MP2T=393615,
video_animaflex=393616,
video_avi=393617,
video_avs_video=393618,
video_mp4=393619,
video_mpeg=393620,
video_quicktime=393621,
video_vdo=393622,
video_vivo=393623,
video_vnd_rn_realvideo=393624,
video_vosaic=393625,
video_webm=393626,
video_x_amt_demorun=393627,
video_x_amt_showrun=393628,
video_x_atomic3d_feature=393629,
video_x_dl=393630,
video_x_dv=393631,
video_x_fli=393632,
video_x_flv=393633,
video_x_isvideo=393634,
video_x_jng=393635 | 0x80000000,
video_x_m4v=393636,
video_x_matroska=393637,
video_x_mng=393638,
video_x_motion_jpeg=393639,
video_x_ms_asf=393640,
video_x_msvideo=393641,
video_x_qtc=393642,
video_x_sgi_movie=393643,
x_epoc_x_sisx_app=721324,
};
char *mime_get_mime_text(unsigned int mime_id) {switch (mime_id) {
case application_arj: return "application/arj";
@ -610,6 +611,7 @@ case application_x_mif: return "application/x-mif";
case application_x_mix_transfer: return "application/x-mix-transfer";
case application_xml: return "application/xml";
case application_x_mobipocket_ebook: return "application/x-mobipocket-ebook";
case application_vnd_amazon_mobi8_ebook: return "application/vnd.amazon.mobi8-ebook";
case application_x_msaccess: return "application/x-msaccess";
case application_x_ms_compress_szdd: return "application/x-ms-compress-szdd";
case application_x_ms_pdb: return "application/x-ms-pdb";
@ -1052,6 +1054,8 @@ g_hash_table_insert(ext_table, "mif", (gpointer)application_x_mif);
g_hash_table_insert(ext_table, "nix", (gpointer)application_x_mix_transfer);
g_hash_table_insert(ext_table, "opf", (gpointer)application_xml);
g_hash_table_insert(ext_table, "mobi", (gpointer)application_x_mobipocket_ebook);
g_hash_table_insert(ext_table, "azw", (gpointer)application_vnd_amazon_mobi8_ebook);
g_hash_table_insert(ext_table, "azw3", (gpointer)application_vnd_amazon_mobi8_ebook);
g_hash_table_insert(ext_table, "accdb", (gpointer)application_x_msaccess);
g_hash_table_insert(ext_table, "fon", (gpointer)application_x_ms_compress_szdd);
g_hash_table_insert(ext_table, "pdb", (gpointer)application_x_ms_pdb);
@ -1556,6 +1560,7 @@ g_hash_table_insert(mime_table, "application/x-mif", (gpointer)application_x_mif
g_hash_table_insert(mime_table, "application/x-mix-transfer", (gpointer)application_x_mix_transfer);
g_hash_table_insert(mime_table, "application/xml", (gpointer)application_xml);
g_hash_table_insert(mime_table, "application/x-mobipocket-ebook", (gpointer)application_x_mobipocket_ebook);
g_hash_table_insert(mime_table, "application/vnd.amazon.mobi8-ebook", (gpointer)application_vnd_amazon_mobi8_ebook);
g_hash_table_insert(mime_table, "application/x-msaccess", (gpointer)application_x_msaccess);
g_hash_table_insert(mime_table, "application/x-ms-compress-szdd", (gpointer)application_x_ms_compress_szdd);
g_hash_table_insert(mime_table, "application/x-ms-pdb", (gpointer)application_x_ms_pdb);

View File

@ -1,7 +1,15 @@
#include "parse.h"
#include "src/sist.h"
#include "src/ctx.h"
#include "mime.h"
#include "src/io/serialize.h"
__thread magic_t Magic = NULL;
#include <magic.h>
#define MIN_VIDEO_SIZE 1024 * 64
#define MIN_IMAGE_SIZE 1024 * 2
int fs_read(struct vfile *f, void *buf, size_t size) {
@ -24,31 +32,10 @@ void fs_close(struct vfile *f) {
}
}
void *read_all(parse_job_t *job, const char *buf, int bytes_read) {
void *full_buf;
if (job->info.st_size <= bytes_read) {
full_buf = malloc(job->info.st_size);
memcpy(full_buf, buf, job->info.st_size);
} else {
full_buf = malloc(job->info.st_size);
memcpy(full_buf, buf, bytes_read);
int ret = job->vfile.read(&job->vfile, full_buf + bytes_read, job->info.st_size - bytes_read);
if (ret < 0) {
free(full_buf);
if (job->vfile.is_fs_file) {
LOG_ERRORF(job->filepath, "read(): [%d] %s", errno, strerror(errno))
} else {
LOG_ERRORF(job->filepath, "(virtual) read(): [%d] %s", ret, archive_error_string(job->vfile.arc))
}
return NULL;
}
void fs_reset(struct vfile *f) {
if (f->fd != -1) {
lseek(f->fd, 0, SEEK_SET);
}
return full_buf;
}
void parse(void *arg) {
@ -56,16 +43,12 @@ void parse(void *arg) {
parse_job_t *job = arg;
document_t doc;
int inc_ts = incremental_get(ScanCtx.original_table, job->info.st_ino);
if (inc_ts != 0 && inc_ts == job->info.st_mtim.tv_sec) {
incremental_mark_file_for_copy(ScanCtx.copy_table, job->info.st_ino);
int inc_ts = incremental_get(ScanCtx.original_table, job->vfile.info.st_ino);
if (inc_ts != 0 && inc_ts == job->vfile.info.st_mtim.tv_sec) {
incremental_mark_file_for_copy(ScanCtx.copy_table, job->vfile.info.st_ino);
return;
}
if (Magic == NULL) {
Magic = magic_open(MAGIC_MIME_TYPE);
magic_load(Magic, NULL);
}
doc.filepath = job->filepath;
doc.ext = (short) job->ext;
@ -73,9 +56,9 @@ void parse(void *arg) {
doc.meta_head = NULL;
doc.meta_tail = NULL;
doc.mime = 0;
doc.size = job->info.st_size;
doc.ino = job->info.st_ino;
doc.mtime = job->info.st_mtim.tv_sec;
doc.size = job->vfile.info.st_size;
doc.ino = job->vfile.info.st_ino;
doc.mtime = job->vfile.info.st_mtim.tv_sec;
uuid_generate(doc.uuid);
char *buf[PARSE_BUF_SIZE];
@ -86,7 +69,7 @@ void parse(void *arg) {
LOG_DEBUGF(job->filepath, "Starting parse job {%s}", uuid_str)
}
if (job->info.st_size == 0) {
if (job->vfile.info.st_size == 0) {
doc.mime = MIME_EMPTY;
} else if (*(job->filepath + job->ext) != '\0' && (job->ext - job->base != 1)) {
doc.mime = mime_get_mime_by_ext(ScanCtx.ext_table, job->filepath + job->ext);
@ -109,7 +92,10 @@ void parse(void *arg) {
return;
}
const char *magic_mime_str = magic_buffer(Magic, buf, bytes_read);
magic_t magic = magic_open(MAGIC_MIME_TYPE);
magic_load(magic, NULL);
const char *magic_mime_str = magic_buffer(magic, buf, bytes_read);
if (magic_mime_str != NULL) {
doc.mime = mime_get_mime_by_string(ScanCtx.mime_table, magic_mime_str);
@ -120,8 +106,9 @@ void parse(void *arg) {
}
}
magic_close(Magic);
Magic = NULL;
job->vfile.reset(&job->vfile);
magic_close(magic);
}
int mmime = MAJOR_MIME(doc.mime);
@ -131,50 +118,30 @@ void parse(void *arg) {
} else if ((mmime == MimeVideo && doc.size >= MIN_VIDEO_SIZE) ||
(mmime == MimeImage && doc.size >= MIN_IMAGE_SIZE) || mmime == MimeAudio) {
if (job->vfile.is_fs_file) {
parse_media_filename(job->filepath, &doc);
} else {
parse_media_vfile(&job->vfile, &doc);
}
parse_media(&ScanCtx.media_ctx, &job->vfile, &doc);
} else if (IS_PDF(doc.mime)) {
void *pdf_buf = read_all(job, (char *) buf, bytes_read);
parse_pdf(pdf_buf, doc.size, &doc);
parse_ebook(&ScanCtx.ebook_ctx, &job->vfile, mime_get_mime_text(doc.mime), &doc);
if (pdf_buf != buf && pdf_buf != NULL) {
free(pdf_buf);
}
} else if (mmime == MimeText && ScanCtx.content_size > 0) {
parse_text(bytes_read, &job->vfile, (char *) buf, &doc);
} else if (mmime == MimeText && ScanCtx.text_ctx.content_size > 0) {
parse_text(&ScanCtx.text_ctx, &job->vfile, &doc);
} else if (IS_FONT(doc.mime)) {
void *font_buf = read_all(job, (char *) buf, bytes_read);
parse_font(font_buf, doc.size, &doc);
parse_font(&ScanCtx.font_ctx, &job->vfile, &doc);
if (font_buf != buf && font_buf != NULL) {
free(font_buf);
}
} else if (
ScanCtx.archive_mode != ARC_MODE_SKIP && (
ScanCtx.arc_ctx.mode != ARC_MODE_SKIP && (
IS_ARC(doc.mime) ||
(IS_ARC_FILTER(doc.mime) && should_parse_filtered_file(doc.filepath, doc.ext))
)) {
parse_archive(&job->vfile, &doc);
} else if (ScanCtx.content_size > 0 && IS_DOC(doc.mime)) {
void *doc_buf = read_all(job, (char *) buf, bytes_read);
parse_doc(doc_buf, doc.size, &doc);
parse_archive(&ScanCtx.arc_ctx, &job->vfile, &doc);
} else if (ScanCtx.ooxml_ctx.content_size > 0 && IS_DOC(doc.mime)) {
parse_ooxml(&ScanCtx.ooxml_ctx, &job->vfile, &doc);
if (doc_buf != buf && doc_buf != NULL) {
free(doc_buf);
}
} else if (is_cbr(doc.mime)) {
void *cbr_buf = read_all(job, (char *) buf, bytes_read);
parse_cbr(cbr_buf, doc.size, &doc);
if (cbr_buf != buf && cbr_buf != NULL) {
free(cbr_buf);
}
} else if (is_cbr(&ScanCtx.cbr_ctx, doc.mime)) {
parse_cbr(&ScanCtx.cbr_ctx, &job->vfile, &doc);
} else if (IS_MOBI(doc.mime)) {
parse_mobi(&ScanCtx.mobi_ctx, &job->vfile, &doc);
}
//Parent meta
@ -184,7 +151,7 @@ void parse(void *arg) {
meta_line_t *meta_parent = malloc(sizeof(meta_line_t) + UUID_STR_LEN + 1);
meta_parent->key = MetaParent;
strcpy(meta_parent->strval, tmp);
strcpy(meta_parent->str_val, tmp);
APPEND_META((&doc), meta_parent)
}
@ -194,7 +161,5 @@ void parse(void *arg) {
}
void cleanup_parse() {
if (Magic != NULL) {
magic_close(Magic);
}
// noop
}

View File

@ -1,12 +1,13 @@
#ifndef SIST2_PARSE_H
#define SIST2_PARSE_H
#include "src/sist.h"
#include "../sist.h"
#define PARSE_BUF_SIZE 4096
int fs_read(struct vfile *f, void *buf, size_t size);
void fs_close(struct vfile *f);
void fs_reset(struct vfile *f);
void parse(void *arg);

View File

@ -1,331 +0,0 @@
#include "pdf.h"
#include "src/ctx.h"
#define MIN_OCR_SIZE 350
#define MIN_OCR_LEN 10
__thread text_buffer_t thread_buffer;
int render_cover(fz_context *ctx, document_t *doc, fz_document *fzdoc) {
int err = 0;
fz_page *cover = NULL;
fz_var(cover);
fz_var(err);
fz_try(ctx)
cover = fz_load_page(ctx, fzdoc, 0);
fz_catch(ctx)
err = 1;
if (err != 0) {
fz_drop_page(ctx, cover);
LOG_WARNINGF(doc->filepath, "fz_load_page() returned error code [%d] %s", err, ctx->error.message)
return FALSE;
}
fz_rect bounds = fz_bound_page(ctx, cover);
float scale;
float w = (float) bounds.x1 - bounds.x0;
float h = (float) bounds.y1 - bounds.y0;
if (w > h) {
scale = (float) ScanCtx.tn_size / w;
} else {
scale = (float) ScanCtx.tn_size / h;
}
fz_matrix m = fz_scale(scale, scale);
bounds = fz_transform_rect(bounds, m);
fz_irect bbox = fz_round_rect(bounds);
fz_pixmap *pixmap = fz_new_pixmap_with_bbox(ctx, ctx->colorspace->rgb, bbox, NULL, 0);
fz_clear_pixmap_with_value(ctx, pixmap, 0xFF);
fz_device *dev = fz_new_draw_device(ctx, m, pixmap);
fz_var(err);
fz_try(ctx)
{
pthread_mutex_lock(&ScanCtx.mupdf_mu);
fz_run_page(ctx, cover, dev, fz_identity, NULL);
}
fz_always(ctx)
{
fz_close_device(ctx, dev);
fz_drop_device(ctx, dev);
pthread_mutex_unlock(&ScanCtx.mupdf_mu);
}
fz_catch(ctx)
err = ctx->error.errcode;
if (err != 0) {
LOG_WARNINGF(doc->filepath, "fz_run_page() returned error code [%d] %s", err, ctx->error.message)
fz_drop_page(ctx, cover);
fz_drop_pixmap(ctx, pixmap);
return FALSE;
}
fz_buffer *fzbuf = NULL;
fz_var(fzbuf);
fz_var(err);
fz_try(ctx)
fzbuf = fz_new_buffer_from_pixmap_as_png(ctx, pixmap, fz_default_color_params);
fz_catch(ctx)
err = ctx->error.errcode;
if (err == 0) {
unsigned char *tn_buf;
size_t tn_len = fz_buffer_storage(ctx, fzbuf, &tn_buf);
store_write(ScanCtx.index.store, (char *) doc->uuid, sizeof(doc->uuid), (char *) tn_buf, tn_len);
}
fz_drop_buffer(ctx, fzbuf);
fz_drop_pixmap(ctx, pixmap);
fz_drop_page(ctx, cover);
if (err != 0) {
LOG_WARNINGF(doc->filepath, "fz_new_buffer_from_pixmap_as_png() returned error code [%d] %s", err,
ctx->error.message)
return FALSE;
}
return TRUE;
}
void fz_err_callback(void *user, UNUSED(const char *message)) {
if (LogCtx.verbose) {
document_t *doc = (document_t *) user;
LOG_WARNINGF(doc->filepath, "FZ: %s", message)
}
}
__always_inline
static void init_ctx(fz_context *ctx, document_t *doc) {
fz_disable_icc(ctx);
fz_register_document_handlers(ctx);
ctx->warn.print_user = doc;
ctx->warn.print = fz_err_callback;
ctx->error.print_user = doc;
ctx->error.print = fz_err_callback;
}
__always_inline
static int read_stext_block(fz_stext_block *block, text_buffer_t *tex) {
if (block->type != FZ_STEXT_BLOCK_TEXT) {
return 0;
}
fz_stext_line *line = block->u.t.first_line;
while (line != NULL) {
fz_stext_char *c = line->first_char;
while (c != NULL) {
if (text_buffer_append_char(tex, c->c) == TEXT_BUF_FULL) {
return TEXT_BUF_FULL;
}
c = c->next;
}
line = line->next;
}
return 0;
}
#define IS_VALID_BPP(d) (d==1 || d==2 || d==4 || d==8 || d==16 || d==24 || d==32)
void fill_image(fz_context *ctx, UNUSED(fz_device *dev),
fz_image *img, UNUSED(fz_matrix ctm), UNUSED(float alpha),
UNUSED(fz_color_params color_params)) {
int l2factor = 0;
if (img->w > MIN_OCR_SIZE && img->h > MIN_OCR_SIZE && IS_VALID_BPP(img->n)) {
fz_pixmap *pix = img->get_pixmap(ctx, img, NULL, img->w, img->h, &l2factor);
if (pix->h > MIN_OCR_SIZE && img->h > MIN_OCR_SIZE && img->xres != 0) {
TessBaseAPI *api = TessBaseAPICreate();
TessBaseAPIInit3(api, ScanCtx.tesseract_path, ScanCtx.tesseract_lang);
TessBaseAPISetImage(api, pix->samples, pix->w, pix->h, pix->n, pix->stride);
TessBaseAPISetSourceResolution(api, pix->xres);
char *text = TessBaseAPIGetUTF8Text(api);
size_t len = strlen(text);
if (len >= MIN_OCR_LEN) {
text_buffer_append_string(&thread_buffer, text, len - 1);
LOG_DEBUGF(
"pdf.c",
"(OCR) %dx%d got %dB from tesseract (%s), buffer:%dB",
pix->w, pix->h, len, ScanCtx.tesseract_lang, thread_buffer.dyn_buffer.cur
)
}
TessBaseAPIEnd(api);
TessBaseAPIDelete(api);
}
fz_drop_pixmap(ctx, pix);
}
}
void parse_pdf(const void *buf, size_t buf_len, document_t *doc) {
if (buf == NULL) {
return;
}
static int mu_is_initialized = 0;
if (!mu_is_initialized) {
pthread_mutex_init(&ScanCtx.mupdf_mu, NULL);
mu_is_initialized = 1;
}
fz_context *ctx = fz_new_context(NULL, NULL, FZ_STORE_UNLIMITED);
init_ctx(ctx, doc);
int err = 0;
fz_document *fzdoc = NULL;
fz_stream *stream = NULL;
fz_var(fzdoc);
fz_var(stream);
fz_var(err);
fz_try(ctx)
{
stream = fz_open_memory(ctx, buf, buf_len);
fzdoc = fz_open_document_with_stream(ctx, mime_get_mime_text(doc->mime), stream);
}
fz_catch(ctx)
err = ctx->error.errcode;
if (err != 0) {
fz_drop_stream(ctx, stream);
fz_drop_document(ctx, fzdoc);
fz_drop_context(ctx);
return;
}
char title[4096] = {'\0',};
fz_try(ctx)
fz_lookup_metadata(ctx, fzdoc, FZ_META_INFO_TITLE, title, sizeof(title));
fz_catch(ctx)
;
if (strlen(title) > 0) {
meta_line_t *meta_content = malloc(sizeof(meta_line_t) + strlen(title));
meta_content->key = MetaTitle;
strcpy(meta_content->strval, title);
APPEND_META(doc, meta_content)
}
int page_count = -1;
fz_var(err);
fz_try(ctx)
page_count = fz_count_pages(ctx, fzdoc);
fz_catch(ctx)
err = ctx->error.errcode;
if (err) {
LOG_WARNINGF(doc->filepath, "fz_count_pages() returned error code [%d] %s", err, ctx->error.message)
fz_drop_stream(ctx, stream);
fz_drop_document(ctx, fzdoc);
fz_drop_context(ctx);
return;
}
if (ScanCtx.tn_size > 0) {
err = render_cover(ctx, doc, fzdoc);
}
if (err == TRUE) {
fz_drop_stream(ctx, stream);
fz_drop_document(ctx, fzdoc);
fz_drop_context(ctx);
return;
}
if (ScanCtx.content_size > 0) {
fz_stext_options opts = {0};
thread_buffer = text_buffer_create(ScanCtx.content_size);
for (int current_page = 0; current_page < page_count; current_page++) {
fz_page *page = NULL;
fz_var(err);
fz_try(ctx)
page = fz_load_page(ctx, fzdoc, current_page);
fz_catch(ctx)
err = ctx->error.errcode;
if (err != 0) {
LOG_WARNINGF(doc->filepath, "fz_load_page() returned error code [%d] %s", err, ctx->error.message)
text_buffer_destroy(&thread_buffer);
fz_drop_page(ctx, page);
fz_drop_stream(ctx, stream);
fz_drop_document(ctx, fzdoc);
fz_drop_context(ctx);
return;
}
fz_stext_page *stext = fz_new_stext_page(ctx, fz_bound_page(ctx, page));
fz_device *dev = fz_new_stext_device(ctx, stext, &opts);
dev->stroke_path = NULL;
dev->stroke_text = NULL;
dev->clip_text = NULL;
dev->clip_stroke_path = NULL;
dev->clip_stroke_text = NULL;
if (ScanCtx.tesseract_lang != NULL) {
dev->fill_image = fill_image;
}
fz_var(err);
fz_try(ctx)
fz_run_page(ctx, page, dev, fz_identity, NULL);
fz_always(ctx)
{
fz_close_device(ctx, dev);
fz_drop_device(ctx, dev);
}
fz_catch(ctx)
err = ctx->error.errcode;
if (err != 0) {
LOG_WARNINGF(doc->filepath, "fz_run_page() returned error code [%d] %s", err, ctx->error.message)
text_buffer_destroy(&thread_buffer);
fz_drop_page(ctx, page);
fz_drop_stext_page(ctx, stext);
fz_drop_stream(ctx, stream);
fz_drop_document(ctx, fzdoc);
fz_drop_context(ctx);
return;
}
fz_stext_block *block = stext->first_block;
while (block != NULL) {
int ret = read_stext_block(block, &thread_buffer);
if (ret == TEXT_BUF_FULL) {
break;
}
block = block->next;
}
fz_drop_stext_page(ctx, stext);
fz_drop_page(ctx, page);
if (thread_buffer.dyn_buffer.cur >= thread_buffer.dyn_buffer.size) {
break;
}
}
text_buffer_terminate_string(&thread_buffer);
meta_line_t *meta_content = malloc(sizeof(meta_line_t) + thread_buffer.dyn_buffer.cur);
meta_content->key = MetaContent;
memcpy(meta_content->strval, thread_buffer.dyn_buffer.buf, thread_buffer.dyn_buffer.cur);
APPEND_META(doc, meta_content)
text_buffer_destroy(&thread_buffer);
}
fz_drop_stream(ctx, stream);
fz_drop_document(ctx, fzdoc);
fz_drop_context(ctx);
}

View File

@ -1,9 +0,0 @@
#ifndef SIST2_PDF_H
#define SIST2_PDF_H
#include "src/sist.h"
void parse_pdf(const void *buf, size_t buf_len, document_t *doc);
#endif

View File

@ -1,37 +0,0 @@
#include "text.h"
#include "src/ctx.h"
void parse_text(int bytes_read, struct vfile *f, char *buf, document_t *doc) {
char *intermediate_buf;
int intermediate_buf_len;
if (bytes_read == doc->size || bytes_read >= ScanCtx.content_size) {
int to_copy = MIN(bytes_read, ScanCtx.content_size);
intermediate_buf = malloc(to_copy);
intermediate_buf_len = to_copy;
memcpy(intermediate_buf, buf, to_copy);
} else {
int to_read = MIN(ScanCtx.content_size, doc->size) - bytes_read;
intermediate_buf = malloc(to_read + bytes_read);
intermediate_buf_len = to_read + bytes_read;
if (bytes_read != 0) {
memcpy(intermediate_buf, buf, bytes_read);
}
f->read(f, intermediate_buf + bytes_read, to_read);
}
text_buffer_t tex = text_buffer_create(ScanCtx.content_size);
text_buffer_append_string(&tex, intermediate_buf, intermediate_buf_len);
text_buffer_terminate_string(&tex);
meta_line_t *meta = malloc(sizeof(meta_line_t) + tex.dyn_buffer.cur);
meta->key = MetaContent;
strcpy(meta->strval, tex.dyn_buffer.buf);
APPEND_META(doc, meta)
free(intermediate_buf);
text_buffer_destroy(&tex);
}

View File

@ -1,8 +0,0 @@
#ifndef SIST2_TEXT_H
#define SIST2_TEXT_H
#include "src/sist.h"
void parse_text(int bytes_read, struct vfile *f, char *buf, document_t *doc);
#endif

View File

@ -1,75 +1,51 @@
#ifndef SIST_H
#define SIST_H
#ifndef FALSE
#define FALSE (0)
#define BOOL int
#endif
#ifndef TRUE
#define TRUE (!FALSE)
#endif
#undef MAX
#define MAX(a, b) (((a) > (b)) ? (a) : (b))
#undef MIN
#define MIN(a, b) (((a) < (b)) ? (a) : (b))
#ifndef PATH_MAX
#define PATH_MAX 4096
#endif
#undef ABS
#define ABS(a) (((a) < 0) ? -(a) : (a))
#define UUID_STR_LEN 37
#define UNUSED(x) __attribute__((__unused__)) x
#include <glib-2.0/glib.h>
#include <unistd.h>
#include "util.h"
#include "log.h"
#include "types.h"
#include "libscan/scan.h"
#include <cjson/cJSON.h>
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdarg.h>
#include <unistd.h>
#include <fcntl.h>
#include <ftw.h>
#include <uuid.h>
#include <magic.h>
#include "libavformat/avformat.h"
#include "libswscale/swscale.h"
#include "libswresample/swresample.h"
#include "libavcodec/avcodec.h"
#include "libavutil/imgutils.h"
#include <ctype.h>
#include <mupdf/fitz.h>
#include <mupdf/pdf.h>
#include "argparse/argparse.h"
#include <time.h>
#include <limits.h>
#include <pthread.h>
#include <dirent.h>
#include <sys/stat.h>
#include <wordexp.h>
#include "ft2build.h"
#include "freetype/freetype.h"
#include <archive.h>
#include <archive_entry.h>
#include <libxml/xmlstring.h>
#include <libxml/parser.h>
#define BOOL int
#include <tesseract/capi.h>
#include <pcre.h>
#include <onion/onion.h>
#include <onion/handler.h>
#include <onion/block.h>
#include <onion/shortcuts.h>
#include <onion/codecs.h>
#include <curl/curl.h>
#include "cJSON/cJSON.h"
#include "types.h"
#include "tpool.h"
#include "utf8.h/utf8.h"
#include "util.h"
#include "io/store.h"
#include "io/serialize.h"
#include "io/walk.h"
#include "parsing/parse.h"
#include "parsing/mime.h"
#include "parsing/text.h"
#include "parsing/pdf.h"
#include "parsing/media.h"
#include "parsing/font.h"
#include "parsing/arc.h"
#include "parsing/doc.h"
#include "parsing/cbr.h"
#include "cli.h"
#include "log.h"
#include "src/index/elastic.h"
#include "index/web.h"
#include "web/serve.h"
#include "web/auth_basic.h"
#include <sys/types.h>
#include <errno.h>
#include <ctype.h>
#endif

View File

@ -6,9 +6,10 @@
width: 1rem;
margin-right: 0.2rem;
cursor: pointer;
color: #757575;
line-height: 1rem;
height: 1.1rem;
height: 1rem;
background-image: url(data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHhtbG5zOnhsaW5rPSJodHRwOi8vd3d3LnczLm9yZy8xOTk5L3hsaW5rIiB4PSIwcHgiIHk9IjBweCIKICAgICB2aWV3Qm94PSIwIDAgNDI2LjY2NyA0MjYuNjY3IiBzdHlsZT0iZW5hYmxlLWJhY2tncm91bmQ6bmV3IDAgMCA0MjYuNjY3IDQyNi42Njc7IiBmaWxsPSIjZmZmIj4KPGc+CiAgICA8Zz4KICAgICAgICA8Zz4KICAgICAgICAgICAgPHJlY3QgeD0iMTkyIiB5PSIxOTIiIHdpZHRoPSI0Mi42NjciIGhlaWdodD0iMTI4Ii8+CiAgICAgICAgICAgIDxwYXRoIGQ9Ik0yMTMuMzMzLDBDOTUuNDY3LDAsMCw5NS40NjcsMCwyMTMuMzMzczk1LjQ2NywyMTMuMzMzLDIxMy4zMzMsMjEzLjMzM1M0MjYuNjY3LDMzMS4yLDQyNi42NjcsMjEzLjMzMwogICAgICAgICAgICAgICAgUzMzMS4yLDAsMjEzLjMzMywweiBNMjEzLjMzMywzODRjLTk0LjA4LDAtMTcwLjY2Ny03Ni41ODctMTcwLjY2Ny0xNzAuNjY3UzExOS4yNTMsNDIuNjY3LDIxMy4zMzMsNDIuNjY3CiAgICAgICAgICAgICAgICBTMzg0LDExOS4yNTMsMzg0LDIxMy4zMzNTMzA3LjQxMywzODQsMjEzLjMzMywzODR6Ii8+CiAgICAgICAgICAgIDxyZWN0IHg9IjE5MiIgeT0iMTA2LjY2NyIgd2lkdGg9IjQyLjY2NyIgaGVpZ2h0PSI0Mi42NjciLz4KICAgICAgICA8L2c+CiAgICA8L2c+CjwvZz4KPC9zdmc+Cg==);
filter: brightness(65%);
}
.info-icon:hover {
@ -456,3 +457,26 @@ option {
svg {
fill: white;
}
.play {
position: absolute;
width: 50px;
height: 50px;
left: 50%;
top: 50%;
transform: translate(-50%, -50%);
pointer-events: none;
}
.play svg {
fill: rgba(255, 255, 255, 0.7);
}
.img-wrapper:hover svg {
fill: rgba(255, 255, 255, 1);
}
.pointer {
cursor: pointer;
}

View File

@ -6,9 +6,10 @@
width: 1rem;
margin-right: 0.2rem;
cursor: pointer;
color: #757575;
line-height: 1rem;
height: 1rem;
background-image: url(data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHhtbG5zOnhsaW5rPSJodHRwOi8vd3d3LnczLm9yZy8xOTk5L3hsaW5rIiB4PSIwcHgiIHk9IjBweCIKICAgICB2aWV3Qm94PSIwIDAgNDI2LjY2NyA0MjYuNjY3IiBzdHlsZT0iZW5hYmxlLWJhY2tncm91bmQ6bmV3IDAgMCA0MjYuNjY3IDQyNi42Njc7IiBmaWxsPSIjZmZmIj4KPGc+CiAgICA8Zz4KICAgICAgICA8Zz4KICAgICAgICAgICAgPHJlY3QgeD0iMTkyIiB5PSIxOTIiIHdpZHRoPSI0Mi42NjciIGhlaWdodD0iMTI4Ii8+CiAgICAgICAgICAgIDxwYXRoIGQ9Ik0yMTMuMzMzLDBDOTUuNDY3LDAsMCw5NS40NjcsMCwyMTMuMzMzczk1LjQ2NywyMTMuMzMzLDIxMy4zMzMsMjEzLjMzM1M0MjYuNjY3LDMzMS4yLDQyNi42NjcsMjEzLjMzMwogICAgICAgICAgICAgICAgUzMzMS4yLDAsMjEzLjMzMywweiBNMjEzLjMzMywzODRjLTk0LjA4LDAtMTcwLjY2Ny03Ni41ODctMTcwLjY2Ny0xNzAuNjY3UzExOS4yNTMsNDIuNjY3LDIxMy4zMzMsNDIuNjY3CiAgICAgICAgICAgICAgICBTMzg0LDExOS4yNTMsMzg0LDIxMy4zMzNTMzA3LjQxMywzODQsMjEzLjMzMywzODR6Ii8+CiAgICAgICAgICAgIDxyZWN0IHg9IjE5MiIgeT0iMTA2LjY2NyIgd2lkdGg9IjQyLjY2NyIgaGVpZ2h0PSI0Mi42NjciLz4KICAgICAgICA8L2c+CiAgICA8L2c+CjwvZz4KPC9zdmc+Cg==);
filter: brightness(45%);
}
.info-icon:hover {
@ -161,6 +162,11 @@ body {
width: 100%;
}
@media screen and (max-width: 1200px) {
.bricklayer-column {
max-width: 100%;
}
}
@media screen and (min-width: 1500px) {
.container {
@ -295,6 +301,7 @@ mark {
.small-btn {
display: none;
}
.large-btn {
display: inherit;
}
@ -304,6 +311,7 @@ mark {
.small-btn {
display: inherit;
}
.large-btn {
display: none;
}
@ -316,3 +324,26 @@ mark {
#pathTree .title {
cursor: pointer;
}
.play {
position: absolute;
width: 50px;
height: 50px;
left: 50%;
top: 50%;
transform: translate(-50%, -50%);
pointer-events: none;
}
.play svg {
fill: rgba(0, 0, 0, 0.7);
}
.img-wrapper:hover svg {
fill: rgba(0, 0, 0, 1);
}
.pointer {
cursor: pointer;
}

3
src/static/css/lity.min.css vendored Normal file
View File

@ -0,0 +1,3 @@
/*! Lity - v2.4.0 - 2019-08-10
* http://sorgalla.com/lity/
* Copyright (c) 2015-2019 Jan Sorgalla; Licensed MIT */.lity{z-index:9990;position:fixed;top:0;right:0;bottom:0;left:0;white-space:nowrap;background:#0b0b0b;background:rgba(0,0,0,0.9);outline:none !important;opacity:0;-webkit-transition:opacity .3s ease;-o-transition:opacity .3s ease;transition:opacity .3s ease}.lity.lity-opened{opacity:1}.lity.lity-closed{opacity:0}.lity *{-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box}.lity-wrap{z-index:9990;position:fixed;top:0;right:0;bottom:0;left:0;text-align:center;outline:none !important}.lity-wrap:before{content:'';display:inline-block;height:100%;vertical-align:middle;margin-right:-0.25em}.lity-loader{z-index:9991;color:#fff;position:absolute;top:50%;margin-top:-0.8em;width:100%;text-align:center;font-size:14px;font-family:Arial,Helvetica,sans-serif;opacity:0;-webkit-transition:opacity .3s ease;-o-transition:opacity .3s ease;transition:opacity .3s ease}.lity-loading .lity-loader{opacity:1}.lity-container{z-index:9992;position:relative;text-align:left;vertical-align:middle;display:inline-block;white-space:normal;max-width:100%;max-height:100%;outline:none !important}.lity-content{z-index:9993;width:100%;-webkit-transform:scale(1);-ms-transform:scale(1);-o-transform:scale(1);transform:scale(1);-webkit-transition:-webkit-transform .3s ease;transition:-webkit-transform .3s ease;-o-transition:-o-transform .3s ease;transition:transform .3s ease;transition:transform .3s ease, -webkit-transform .3s ease, -o-transform .3s ease}.lity-loading .lity-content,.lity-closed .lity-content{-webkit-transform:scale(.8);-ms-transform:scale(.8);-o-transform:scale(.8);transform:scale(.8)}.lity-content:after{content:'';position:absolute;left:0;top:0;bottom:0;display:block;right:0;width:auto;height:auto;z-index:-1;-webkit-box-shadow:0 0 8px rgba(0,0,0,0.6);box-shadow:0 0 8px rgba(0,0,0,0.6)}.lity-close{z-index:9994;width:35px;height:35px;position:fixed;right:0;top:0;-webkit-appearance:none;cursor:pointer;text-decoration:none;text-align:center;padding:0;color:#fff;font-style:normal;font-size:35px;font-family:Arial,Baskerville,monospace;line-height:35px;text-shadow:0 1px 2px rgba(0,0,0,0.6);border:0;background:none;outline:none;-webkit-box-shadow:none;box-shadow:none}.lity-close::-moz-focus-inner{border:0;padding:0}.lity-close:hover,.lity-close:focus,.lity-close:active,.lity-close:visited{text-decoration:none;text-align:center;padding:0;color:#fff;font-style:normal;font-size:35px;font-family:Arial,Baskerville,monospace;line-height:35px;text-shadow:0 1px 2px rgba(0,0,0,0.6);border:0;background:none;outline:none;-webkit-box-shadow:none;box-shadow:none}.lity-close:active{top:1px}.lity-image img{max-width:100%;display:block;line-height:0;border:0}.lity-iframe .lity-container,.lity-youtube .lity-container,.lity-vimeo .lity-container,.lity-facebookvideo .lity-container,.lity-googlemaps .lity-container{width:100%;max-width:964px}.lity-iframe-container{width:100%;height:0;padding-top:56.25%;overflow:hidden;pointer-events:all;-webkit-transform:translateZ(0);transform:translateZ(0);-webkit-overflow-scrolling:touch}.lity-iframe-container iframe{position:absolute;display:block;top:0;left:0;width:100%;height:100%;-webkit-box-shadow:0 0 8px rgba(0,0,0,0.6);box-shadow:0 0 8px rgba(0,0,0,0.6);background:#000}.lity-hide{display:none}

View File

Before

Width:  |  Height:  |  Size: 595 B

After

Width:  |  Height:  |  Size: 595 B

View File

Before

Width:  |  Height:  |  Size: 669 B

After

Width:  |  Height:  |  Size: 669 B

View File

@ -59,24 +59,28 @@ function applyNameToTitle(hit, title, extension) {
title.appendChild(document.createTextNode(hit["_source"]["name"] + extension));
}
function addVidSrc(url, mime, video) {
let vidSource = document.createElement("source");
vidSource.setAttribute("src", url);
if (video.canPlayType(mime)) {
vidSource.setAttribute("type", mime);
} else {
vidSource.setAttribute("type", "video/webm");
}
video.appendChild(vidSource);
}
function shouldPlayVideo(hit) {
const videoc = hit["_source"]["videoc"];
return videoc !== "hevc" && videoc !== "mpeg2video" && videoc !== "wmv3";
const mime = hit["_source"]["mime"];
return mime &&
mime.startsWith("video/") &&
hit["_source"]["extension"] !== "mkv" &&
hit["_source"]["extension"] !== "avi" &&
videoc !== "hevc" &&
videoc !== "mpeg2video" &&
videoc !== "wmv3";
}
function shouldDisplayRawImage(hit) {
return hit["_source"]["mime"] && hit["_source"]["mime"].startsWith("image/") && hit["_source"]["videoc"] !== "tiff";
const mime = hit["_source"]["mime"];
return mime &&
mime.startsWith("image/") &&
hit["_source"]["mime"] &&
!hit["_source"]["parent"] &&
hit["_source"]["videoc"] !== "tiff" &&
hit["_source"]["videoc"] !== "ppm";
}
function makePlaceholder(w, h, small) {
@ -229,6 +233,7 @@ function createDocCard(hit) {
let thumbnailOverlay = null;
let imgWrapper = document.createElement("div");
imgWrapper.setAttribute("style", "position: relative");
imgWrapper.setAttribute("class", "img-wrapper");
let mimeCategory = hit["_source"]["mime"].split("/")[0];
@ -237,7 +242,6 @@ function createDocCard(hit) {
//Thumbnail overlay
switch (mimeCategory) {
case "image":
thumbnailOverlay = document.createElement("div");
thumbnailOverlay.setAttribute("class", "card-img-overlay");
@ -330,91 +334,55 @@ function createDocCard(hit) {
}
function makeThumbnail(mimeCategory, hit, imgWrapper, small) {
let thumbnail;
let isSubDocument = hit["_source"].hasOwnProperty("parent");
if (mimeCategory === "video" && shouldPlayVideo(hit) && !isSubDocument) {
thumbnail = document.createElement("video");
addVidSrc("f/" + hit["_id"], hit["_source"]["mime"], thumbnail);
const placeholder = makePlaceholder(hit["_source"]["width"], hit["_source"]["height"], small);
imgWrapper.appendChild(placeholder);
if (small) {
thumbnail.setAttribute("class", "fit-sm");
} else {
thumbnail.setAttribute("class", "fit");
}
if (small) {
thumbnail.style.cursor = "pointer";
thumbnail.title = "Enlarge";
thumbnail.addEventListener("click", function () {
imgWrapper.classList.remove("wrapper-sm", "mr-1");
imgWrapper.parentElement.classList.add("media-expanded");
thumbnail.setAttribute("class", "fit");
thumbnail.setAttribute("controls", "");
});
} else {
thumbnail.setAttribute("controls", "");
}
thumbnail.setAttribute("preload", "none");
thumbnail.setAttribute("poster", `t/${hit["_source"]["index"]}/${hit["_id"]}`);
thumbnail.addEventListener("dblclick", function () {
thumbnail.setAttribute("controls", "");
if (thumbnail.webkitRequestFullScreen) {
thumbnail.webkitRequestFullScreen();
} else {
thumbnail.requestFullscreen();
}
});
const poster = new Image();
poster.src = thumbnail.getAttribute('poster');
poster.addEventListener("load", function () {
placeholder.remove();
imgWrapper.appendChild(thumbnail);
});
} else if ((hit["_source"].hasOwnProperty("width") && hit["_source"]["width"] > 32 && hit["_source"]["height"] > 32)
|| hit["_source"]["mime"] === "application/pdf"
|| hit["_source"]["mime"] === "application/epub+zip"
|| hit["_source"]["mime"] === "application/x-cbz"
|| hit["_source"]["mime"] === "application/x-cbr"
|| hit["_source"].hasOwnProperty("font_name")
) {
thumbnail = document.createElement("img");
if (small) {
thumbnail.setAttribute("class", "fit-sm");
} else {
thumbnail.setAttribute("class", "card-img-top fit");
}
thumbnail.setAttribute("src", `t/${hit["_source"]["index"]}/${hit["_id"]}`);
if (!hit["_source"]["parent"] && shouldDisplayRawImage(hit)) {
imgWrapper.setAttribute("id", "sp" + hit["_id"]);
imgWrapper.setAttribute("data-src", `t/${hit["_source"]["index"]}/${hit["_id"]}`);
imgWrapper.setAttribute("href", `f/${hit["_id"]}`);
imgWrapper.setAttribute("data-caption", hit["_source"]["path"] + "/" + hit["_source"]["name"] + ext(hit));
imgWrapper.setAttribute("data-group", "p" + Math.floor(docCount / SIZE));
imgWrapper.classList.add("sp");
}
const placeholder = makePlaceholder(hit["_source"]["width"], hit["_source"]["height"], small);
imgWrapper.appendChild(placeholder);
thumbnail.addEventListener("error", () => {
imgWrapper.remove();
});
thumbnail.addEventListener("load", () => {
placeholder.remove();
imgWrapper.appendChild(thumbnail);
});
if (!hit["_source"].hasOwnProperty("thumbnail")) {
return null;
}
let thumbnail = document.createElement("img");
if (small) {
thumbnail.setAttribute("class", "fit-sm");
} else {
thumbnail.setAttribute("class", "card-img-top fit");
}
thumbnail.setAttribute("src", `t/${hit["_source"]["index"]}/${hit["_id"]}`);
if (shouldDisplayRawImage(hit)) {
thumbnail.addEventListener("click", () => {
const l = lity(`f/${hit["_id"]}#.jpg`);
window.addEventListener("scroll", () => l.close());
});
thumbnail.classList.add("pointer");
} else if (shouldPlayVideo(hit)) {
thumbnail.addEventListener("click", () => lity(`f/${hit["_id"]}#.mp4`));
thumbnail.classList.add("pointer");
if (!small) {
const playOverlay = document.createElement("div");
playOverlay.innerHTML = '<svg viewBox="0 0 494.942 494.942" xmlns="http://www.w3.org/2000/svg"><path d="m35.353 0 424.236 247.471-424.236 247.471z"/></svg>';
playOverlay.classList.add("play");
imgWrapper.prepend(playOverlay);
}
}
const placeholder = makePlaceholder(hit["_source"]["width"], hit["_source"]["height"], small);
imgWrapper.appendChild(placeholder);
thumbnail.addEventListener("error", () => {
imgWrapper.remove();
});
thumbnail.addEventListener("load", () => {
placeholder.remove();
imgWrapper.appendChild(thumbnail);
});
return thumbnail;
}
function makeInfoButton(hit) {
const infoButton = document.createElement("span");
infoButton.appendChild(document.createTextNode("🛈"));
infoButton.setAttribute("class", "info-icon");
infoButton.addEventListener("click", infoButtonCb(hit));
return infoButton;
@ -605,7 +573,7 @@ function makeStatsCard(searchResult) {
item.appendChild(document.createTextNode(SORT_MODES[mode].text));
sortModeMenu.appendChild(item);
item.onclick = function() {
item.onclick = function () {
CONF.options.sort = mode;
CONF.save();
searchDebounced();

5
src/static/js/lity.min.js vendored Normal file

File diff suppressed because one or more lines are too long

View File

@ -166,7 +166,11 @@ function getDocumentInfo(id) {
}
function handleTreeClick(tree) {
return (node) => {
return (node, e) => {
if (e !== "checked") {
return
}
if (node.id === "any") {
if (!node.itree.state.checked) {
tree.deselect();
@ -294,7 +298,6 @@ function insertHits(resultContainer, hits) {
for (let i = 0; i < hits.length; i++) {
if (CONF.options.display === "grid") {
console.log(resultContainer._brick)
resultContainer._brick.append(createDocCard(hits[i]));
} else {
resultContainer.appendChild(createDocLine(hits[i]));
@ -461,10 +464,6 @@ function search(after = null) {
resultContainer._brick = new Bricklayer(resultContainer);
}
window.setTimeout(() => {
$(".sp").SmartPhoto({animationSpeed: 0, swipeTopToClose: true, showAnimation: false, forceInterval: 50});
}, 100);
if (!after) {
docCount = 0;
}
@ -633,8 +632,8 @@ function handlePathTreeClick(tree) {
return (event, node, handler) => {
if (node.depth !== 0) {
$("#pathBar").val(node.id)
$("#pathTreeModal").modal("hide")
$("#pathBar").val(node.id);
$("#pathTreeModal").modal("hide");
searchDebounced();
}

View File

@ -11,7 +11,7 @@
<nav class="navbar navbar-expand-lg">
<a class="navbar-brand" href="/">sist2</a>
<span class="badge badge-pill version">v1.3.4</span>
<span class="badge badge-pill version">2.0.0</span>
<span class="tagline">Lightning-fast file system indexer and search tool </span>
<button style="margin-left: auto" class="btn" type="button" data-toggle="modal" data-target="#settings" onclick="loadSettings()">Settings</button>
<a id="theme" class="btn" title="Toggle theme" href="/">Theme</a>

View File

@ -1,5 +1,7 @@
#include "tpool.h"
#include "ctx.h"
#include "sist.h"
#include <pthread.h>
typedef void (*thread_func_t)(void *arg);

View File

@ -1,48 +1,6 @@
#ifndef SIST2_TYPES_H
#define SIST2_TYPES_H
#define META_INT_MASK 0x80
#define META_STR_MASK 0x40
#define META_LONG_MASK 0x20
#define IS_META_INT(key) (key & META_INT_MASK) == META_INT_MASK
#define IS_META_LONG(key) (key & META_LONG_MASK) == META_LONG_MASK
#define IS_META_STR(meta) (meta->key & META_STR_MASK) == META_STR_MASK
#define ARC_MODE_SKIP 0
#define ARC_MODE_LIST 1
#define ARC_MODE_SHALLOW 2
#define ARC_MODE_RECURSE 3
typedef int archive_mode_t;
// This is written to file as a 8bit char!
enum metakey {
MetaContent = 1 | META_STR_MASK,
MetaWidth = 2 | META_INT_MASK,
MetaHeight = 3 | META_INT_MASK,
MetaMediaDuration = 4 | META_LONG_MASK,
MetaMediaAudioCodec = 5 | META_INT_MASK,
MetaMediaVideoCodec = 6 | META_INT_MASK,
MetaMediaBitrate = 7 | META_LONG_MASK,
MetaArtist = 8 | META_STR_MASK,
MetaAlbum = 9 | META_STR_MASK,
MetaAlbumArtist = 10 | META_STR_MASK,
MetaGenre = 11 | META_STR_MASK,
MetaTitle = 12 | META_STR_MASK,
MetaFontName = 13 | META_STR_MASK,
MetaParent = 14 | META_STR_MASK,
MetaExifMake = 15 | META_STR_MASK,
MetaExifSoftware = 16 | META_STR_MASK,
MetaExifExposureTime = 17 | META_STR_MASK,
MetaExifFNumber = 18 | META_STR_MASK,
MetaExifFocalLength = 19 | META_STR_MASK,
MetaExifUserComment = 20 | META_STR_MASK,
MetaExifModel = 21 | META_STR_MASK,
MetaExifIsoSpeedRatings = 22 | META_STR_MASK,
MetaExifDateTime = 23 | META_STR_MASK,
//Note to self: this will break after 31 entries
};
#define INDEX_TYPE_BIN "binary"
#define INDEX_TYPE_JSON "json"
#define INDEX_VERSION_EXTERNAL "_external_v1"
@ -64,67 +22,4 @@ typedef struct index_t {
char path[PATH_MAX];
} index_t;
typedef struct meta_line {
struct meta_line *next;
enum metakey key;
union {
unsigned long longval;
int intval;
char strval[0];
};
} meta_line_t;
typedef struct document {
unsigned char uuid[16];
unsigned long ino;
unsigned long size;
unsigned int mime;
int mtime;
short base;
short ext;
meta_line_t *meta_head;
meta_line_t *meta_tail;
char *filepath;
} document_t;
typedef struct vfile vfile_t;
typedef int (*read_func_t)(struct vfile *, void *buf, size_t size);
typedef void (*close_func_t)(struct vfile *);
typedef struct vfile {
union {
int fd;
struct archive *arc;
};
int is_fs_file;
char *filepath;
read_func_t read;
close_func_t close;
} vfile_t;
typedef struct parse_job_t {
int base;
int ext;
struct stat info;
struct vfile vfile;
uuid_t parent;
char filepath[1];
} parse_job_t;
#define APPEND_META(doc, meta) \
meta->next = NULL;\
if (doc->meta_head == NULL) {\
doc->meta_head = meta;\
doc->meta_tail = doc->meta_head;\
} else {\
doc->meta_tail->next = meta;\
doc->meta_tail = meta;\
}
#endif

View File

@ -1,6 +1,9 @@
#include "util.h"
#include "src/ctx.h"
#include <wordexp.h>
#include <glib.h>
#define PBSTR "========================================"
#define PBWIDTH 40

View File

@ -2,28 +2,15 @@
#define SIST2_UTIL_H
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define TEXT_BUF_FULL -1
#define INITIAL_BUF_SIZE 1024 * 16
#include <glib.h>
#define SHOULD_IGNORE_CHAR(c) !(SHOULD_KEEP_CHAR(c))
#define SHOULD_KEEP_CHAR(c) ((c >= '\'' && c <= ';') || (c >= 'A' && c <= 'z') || (c > 127))
#include "third-party/utf8.h/utf8.h"
#include "libscan/scan.h"
typedef struct dyn_buffer {
char *buf;
size_t cur;
size_t size;
} dyn_buffer_t;
#include "sist.h"
typedef struct text_buffer {
long max_size;
int last_char_was_whitespace;
dyn_buffer_t dyn_buffer;
} text_buffer_t;
char *abspath(const char *path);
char *expandpath(const char *path);
@ -34,260 +21,6 @@ void progress_bar_print(double percentage, size_t tn_size, size_t index_size);
GHashTable *incremental_get_table();
__always_inline
static int utf8_validchr2(const char *s) {
if (0x00 == (0x80 & *s)) {
return TRUE;
} else if (0xf0 == (0xf8 & *s)) {
if ((0x80 != (0xc0 & s[1])) || (0x80 != (0xc0 & s[2])) ||
(0x80 != (0xc0 & s[3]))) {
return FALSE;
}
if (0x80 == (0xc0 & s[4])) {
return FALSE;
}
if ((0 == (0x07 & s[0])) && (0 == (0x30 & s[1]))) {
return FALSE;
}
} else if (0xe0 == (0xf0 & *s)) {
if ((0x80 != (0xc0 & s[1])) || (0x80 != (0xc0 & s[2]))) {
return FALSE;
}
if (0x80 == (0xc0 & s[3])) {
return FALSE;
}
if ((0 == (0x0f & s[0])) && (0 == (0x20 & s[1]))) {
return FALSE;
}
} else if (0xc0 == (0xe0 & *s)) {
if (0x80 != (0xc0 & s[1])) {
return FALSE;
}
if (0x80 == (0xc0 & s[2])) {
return FALSE;
}
if (0 == (0x1e & s[0])) {
return FALSE;
}
} else {
return FALSE;
}
return TRUE;
}
__always_inline
static dyn_buffer_t dyn_buffer_create() {
dyn_buffer_t buf;
buf.size = INITIAL_BUF_SIZE;
buf.cur = 0;
buf.buf = malloc(INITIAL_BUF_SIZE);
return buf;
}
__always_inline
static void grow_buffer(dyn_buffer_t *buf, size_t size) {
if (buf->cur + size > buf->size) {
do {
buf->size *= 2;
} while (buf->cur + size > buf->size);
buf->buf = realloc(buf->buf, buf->size);
}
}
__always_inline
static void grow_buffer_small(dyn_buffer_t *buf) {
if (buf->cur + sizeof(long) > buf->size) {
buf->size *= 2;
buf->buf = realloc(buf->buf, buf->size);
}
}
__always_inline
static void dyn_buffer_write(dyn_buffer_t *buf, const void *data, size_t size) {
grow_buffer(buf, size);
memcpy(buf->buf + buf->cur, data, size);
buf->cur += size;
}
__always_inline
static void dyn_buffer_write_char(dyn_buffer_t *buf, char c) {
grow_buffer_small(buf);
*(buf->buf + buf->cur) = c;
buf->cur += sizeof(c);
}
__always_inline
static void dyn_buffer_write_str(dyn_buffer_t *buf, char *str) {
dyn_buffer_write(buf, str, strlen(str));
dyn_buffer_write_char(buf, '\0');
}
__always_inline
static void dyn_buffer_append_string(dyn_buffer_t *buf, char *str) {
dyn_buffer_write(buf, str, strlen(str));
}
__always_inline
static void dyn_buffer_write_int(dyn_buffer_t *buf, int d) {
grow_buffer_small(buf);
*(int *) (buf->buf + buf->cur) = d;
buf->cur += sizeof(int);
}
__always_inline
static void dyn_buffer_write_short(dyn_buffer_t *buf, short s) {
grow_buffer_small(buf);
*(short *) (buf->buf + buf->cur) = s;
buf->cur += sizeof(short);
}
__always_inline
static void dyn_buffer_write_long(dyn_buffer_t *buf, unsigned long l) {
grow_buffer_small(buf);
*(unsigned long *) (buf->buf + buf->cur) = l;
buf->cur += sizeof(unsigned long);
}
__always_inline
static void dyn_buffer_destroy(dyn_buffer_t *buf) {
free(buf->buf);
}
__always_inline
static void text_buffer_destroy(text_buffer_t *buf) {
dyn_buffer_destroy(&buf->dyn_buffer);
}
__always_inline
static text_buffer_t text_buffer_create(int max_size) {
text_buffer_t text_buf;
text_buf.dyn_buffer = dyn_buffer_create();
text_buf.max_size = max_size;
text_buf.last_char_was_whitespace = FALSE;
return text_buf;
}
__always_inline
static int text_buffer_append_char(text_buffer_t *buf, int c) {
if (SHOULD_IGNORE_CHAR(c) || c == ' ') {
if (!buf->last_char_was_whitespace && buf->dyn_buffer.cur != 0) {
dyn_buffer_write_char(&buf->dyn_buffer, ' ');
buf->last_char_was_whitespace = TRUE;
if (buf->max_size > 0 && buf->dyn_buffer.cur >= buf->max_size) {
return TEXT_BUF_FULL;
}
}
} else {
buf->last_char_was_whitespace = FALSE;
grow_buffer_small(&buf->dyn_buffer);
if (0 == ((utf8_int32_t) 0xffffff80 & c)) {
*(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = (char) c;
} else if (0 == ((utf8_int32_t) 0xfffff800 & c)) {
*(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0xc0 | (char) (c >> 6);
*(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0x80 | (char) (c & 0x3f);
} else if (0 == ((utf8_int32_t) 0xffff0000 & c)) {
*(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0xe0 | (char) (c >> 12);
*(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0x80 | (char) ((c >> 6) & 0x3f);
*(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0x80 | (char) (c & 0x3f);
} else {
*(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0xf0 | (char) (c >> 18);
*(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0x80 | (char) ((c >> 12) & 0x3f);
*(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0x80 | (char) ((c >> 6) & 0x3f);
*(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0x80 | (char) (c & 0x3f);
}
if (buf->max_size > 0 && buf->dyn_buffer.cur >= buf->max_size) {
return TEXT_BUF_FULL;
}
}
return 0;
}
__always_inline
static void text_buffer_terminate_string(text_buffer_t *buf) {
if (buf->dyn_buffer.cur > 0 && *(buf->dyn_buffer.buf + buf->dyn_buffer.cur - 1) == ' ') {
*(buf->dyn_buffer.buf + buf->dyn_buffer.cur - 1) = '\0';
} else {
dyn_buffer_write_char(&buf->dyn_buffer, '\0');
}
}
#define UTF8_END_OF_STRING \
(ptr - str >= len || *ptr == 0 || \
(0xc0 == (0xe0 & *ptr) && ptr - str > len - 2) || \
(0xe0 == (0xf0 & *ptr) && ptr - str > len - 3) || \
(0xf0 == (0xf8 & *ptr) && ptr - str > len - 4))
__always_inline
static int text_buffer_append_string(text_buffer_t *buf, const char *str, size_t len) {
const char *ptr = str;
const char *oldPtr = ptr;
if (str == NULL || UTF8_END_OF_STRING) {
return 0;
}
if (len <= 4) {
for (int i = 0; i < len; i++) {
if (((utf8_int32_t)0xffffff80 & str[i]) == 0) {
dyn_buffer_write_char(&buf->dyn_buffer, str[i]);
}
}
return 0;
}
utf8_int32_t c;
char tmp[16];
do {
ptr = utf8codepoint(ptr, &c);
*(int *) tmp = 0x00000000;
memcpy(tmp, oldPtr, ptr - oldPtr);
oldPtr = ptr;
if (!utf8_validchr2(tmp)) {
continue;
}
int ret = text_buffer_append_char(buf, c);
if (ret != 0) {
return ret;
}
} while (!UTF8_END_OF_STRING);
return 0;
}
__always_inline
static int text_buffer_append_string0(text_buffer_t *buf, char *str) {
return text_buffer_append_string(buf, str, strlen(str));
}
__always_inline
static void incremental_put(GHashTable *table, unsigned long inode_no, int mtime) {
g_hash_table_insert(table, (gpointer) inode_no, GINT_TO_POINTER(mtime));

View File

@ -1,59 +0,0 @@
#include "auth_basic.h"
#define UNAUTHORIZED_TEXT "Unauthorized"
typedef struct auth_basic_data {
onion_handler *inside;
const char *b64credentials;
} auth_basic_data_t;
int authenticate(const char *expected, const char *credentials) {
if (expected == NULL) {
return TRUE;
}
if (credentials && strncmp(credentials, "Basic ", 6) == 0) {
if (strcmp((credentials + 6), expected) == 0) {
return TRUE;
}
}
return FALSE;
}
int auth_basic_handler(auth_basic_data_t *d,
onion_request *req,
onion_response *res) {
const char *credentials = onion_request_get_header(req, "Authorization");
if (authenticate(d->b64credentials, credentials)) {
return onion_handler_handle(d->inside, req, res);
}
onion_response_set_header(res, "WWW-Authenticate", "Basic realm=\"sist2\"");
onion_response_set_code(res, HTTP_UNAUTHORIZED);
onion_response_write(res, UNAUTHORIZED_TEXT, sizeof(UNAUTHORIZED_TEXT));
onion_response_set_length(res, sizeof(UNAUTHORIZED_TEXT));
return OCS_PROCESSED;
}
void auth_basic_free(auth_basic_data_t *data) {
onion_handler_free(data->inside);
free(data);
}
onion_handler *auth_basic(const char *b64credentials, onion_handler *inside_level) {
auth_basic_data_t *privdata = malloc(sizeof(auth_basic_data_t));
privdata->b64credentials = b64credentials;
privdata->inside = inside_level;
return onion_handler_new((onion_handler_handler) auth_basic_handler, privdata,
(onion_handler_private_data_free) auth_basic_free);
}

View File

@ -1,4 +0,0 @@
#include "src/sist.h"
onion_handler *auth_basic(const char *b64credentials, onion_handler *inside_level);

View File

@ -1,16 +1,41 @@
#include "serve.h"
#include <src/ctx.h>
#include <onion/types_internal.h>
#include "src/sist.h"
#include "src/io/store.h"
#include "static_generated.c"
#include "src/index/elastic.h"
#include "src/index/web.h"
#include <src/ctx.h>
#include <mongoose.h>
#define CHUNK_SIZE 1024 * 1024 * 10
__always_inline
void set_default_headers(onion_response *res) {
onion_response_set_header(res, "Server", "sist2");
static int has_prefix(const struct mg_str *str, const struct mg_str *prefix) {
return str->len > prefix->len && memcmp(str->p, prefix->p, prefix->len) == 0;
}
static int is_equal(const struct mg_str *s1, const struct mg_str *s2) {
return s1->len == s2->len && memcmp(s1->p, s2->p, s2->len) == 0;
}
static void send_response_line(struct mg_connection *nc, int status_code, int length, char *extra_headers) {
mg_printf(
nc,
"HTTP/1.1 %d %s\r\n"
"Server: sist2\r\n"
"Content-Length: %d\r\n"
"Connection: close\r\n"
"%s\r\n\r\n",
status_code, "OK",
length,
extra_headers
);
}
index_t *get_index_by_id(const char *index_id) {
for (int i = WebCtx.index_count; i >= 0; i--) {
if (strcmp(index_id, WebCtx.indices[i].desc.uuid) == 0) {
@ -28,241 +53,124 @@ store_t *get_store(const char *index_id) {
return NULL;
}
int search_index(void *p, onion_request *req, onion_response *res) {
set_default_headers(res);
onion_response_set_header(res, "Content-Type", "text/html");
onion_response_set_length(res, sizeof(search_html));
onion_response_write(res, search_html, sizeof(search_html));
return OCS_PROCESSED;
void search_index(struct mg_connection *nc) {
send_response_line(nc, 200, sizeof(search_html), "Content-Type: text/html");
mg_send(nc, search_html, sizeof(search_html));
nc->flags |= MG_F_SEND_AND_CLOSE;
}
int javascript(void *p, onion_request *req, onion_response *res) {
onion_response_set_header(res, "Content-Type", "text/javascript");
onion_response_set_length(res, sizeof(bundle_js));
onion_response_write(res, bundle_js, sizeof(bundle_js));
return OCS_PROCESSED;
void javascript(struct mg_connection *nc) {
send_response_line(nc, 200, sizeof(bundle_js), "Content-Type: application/javascript");
mg_send(nc, bundle_js, sizeof(bundle_js));
nc->flags |= MG_F_SEND_AND_CLOSE;
}
int client_requested_dark_theme(onion_request *req) {
const char *cookie = onion_request_get_cookie(req, "sist");
if (cookie == NULL) {
int client_requested_dark_theme(struct http_message *hm) {
struct mg_str *cookie_header = mg_get_http_header(hm, "cookie");
if (cookie_header == NULL) {
return FALSE;
}
return strcmp(cookie, "dark") == 0;
char buf[4096];
char *sist_cookie = buf;
if (mg_http_parse_header2(cookie_header, "sist", &sist_cookie, sizeof(buf)) == 0) {
return FALSE;
}
int ret = strcmp(sist_cookie, "dark") == 0;
if (sist_cookie != buf) {
free(sist_cookie);
}
return ret;
}
int style(void *p, onion_request *req, onion_response *res) {
set_default_headers(res);
void style(struct mg_connection *nc, struct http_message *hm) {
onion_response_set_header(res, "Content-Type", "text/css");
if (client_requested_dark_theme(req)) {
onion_response_set_length(res, sizeof(bundle_dark_css));
onion_response_write(res, bundle_dark_css, sizeof(bundle_dark_css));
if (client_requested_dark_theme(hm)) {
send_response_line(nc, 200, sizeof(bundle_dark_css), "Content-Type: text/css");
mg_send(nc, bundle_dark_css, sizeof(bundle_dark_css));
} else {
onion_response_set_length(res, sizeof(bundle_css));
onion_response_write(res, bundle_css, sizeof(bundle_css));
send_response_line(nc, 200, sizeof(bundle_css), "Content-Type: text/css");
mg_send(nc, bundle_css, sizeof(bundle_css));
}
return OCS_PROCESSED;
nc->flags |= MG_F_SEND_AND_CLOSE;
}
int img_sprite_skin_flag(void *p, onion_request *req, onion_response *res) {
set_default_headers(res);
onion_response_set_header(res, "Content-Type", "image/png");
if (client_requested_dark_theme(req)) {
onion_response_set_length(res, sizeof(sprite_skin_flat_dark_png));
onion_response_write(res, sprite_skin_flat_dark_png, sizeof(sprite_skin_flat_dark_png));
void img_sprite_skin_flat(struct mg_connection *nc, struct http_message *hm) {
if (client_requested_dark_theme(hm)) {
send_response_line(nc, 200, sizeof(sprite_skin_flat_dark_png), "Content-Type: image/png");
mg_send(nc, sprite_skin_flat_dark_png, sizeof(sprite_skin_flat_dark_png));
} else {
onion_response_set_length(res, sizeof(sprite_skin_flat_png));
onion_response_write(res, sprite_skin_flat_png, sizeof(sprite_skin_flat_png));
send_response_line(nc, 200, sizeof(sprite_skin_flat_png), "Content-Type: image/png");
mg_send(nc, sprite_skin_flat_png, sizeof(sprite_skin_flat_png));
}
return OCS_PROCESSED;
nc->flags |= MG_F_SEND_AND_CLOSE;
}
int thumbnail(void *p, onion_request *req, onion_response *res) {
int flags = onion_request_get_flags(req);
if ((flags & OR_METHODS) != OR_GET) {
return OCS_NOT_PROCESSED;
}
const char *arg_index = onion_request_get_query(req, "1");
const char *arg_uuid = onion_request_get_query(req, "2");
void thumbnail(struct mg_connection *nc, struct http_message *hm, struct mg_str *path) {
if (arg_uuid == NULL || arg_index == NULL) {
return OCS_NOT_PROCESSED;
if (path->len != UUID_STR_LEN * 2 + 2) {
LOG_DEBUGF("serve.c", "Invalid thumbnail path: %.*s", (int) path->len, path->p)
nc->flags |= MG_F_SEND_AND_CLOSE;
return;
}
char arg_uuid[UUID_STR_LEN];
char arg_index[UUID_STR_LEN];
memcpy(arg_index, hm->uri.p + 3, UUID_STR_LEN);
*(arg_index + UUID_STR_LEN - 1) = '\0';
memcpy(arg_uuid, hm->uri.p + 3 + UUID_STR_LEN, UUID_STR_LEN);
*(arg_uuid + UUID_STR_LEN - 1) = '\0';
uuid_t uuid;
uuid_parse(arg_uuid, uuid);
store_t *store = get_store(arg_index);
if (store == NULL) {
return OCS_NOT_PROCESSED;
int ret = uuid_parse(arg_uuid, uuid);
if (ret != 0) {
LOG_DEBUGF("serve.c", "Invalid thumbnail UUID: %s", arg_uuid)
nc->flags |= MG_F_SEND_AND_CLOSE;
return;
}
set_default_headers(res);
onion_response_set_header(res, "Content-Type", "image/jpeg");
store_t *store = get_store(arg_index);
if (store == NULL) {
LOG_DEBUGF("serve.c", "Could not get store for index: %s", arg_index)
nc->flags |= MG_F_SEND_AND_CLOSE;
return;
}
size_t data_len = 0;
char *data = store_read(store, (char *) uuid, sizeof(uuid_t), &data_len);
onion_response_set_length(res, data_len);
int written = onion_response_write(res, data, data_len);
onion_response_flush(res);
if (written != data_len || data_len == 0) {
LOG_DEBUG("serve.c", "Couldn't write thumbnail");
if (data_len != 0) {
send_response_line(nc, 200, data_len, "Content-Type: image/jpeg");
mg_send(nc, data, data_len);
free(data);
}
free(data);
return OCS_PROCESSED;
nc->flags |= MG_F_SEND_AND_CLOSE;
}
/**
* Modified version of onion_shortcut_response_file that allows
* browsers to seek media files.
*/
int chunked_response_file(const char *filename, const char *mime,
int partial, onion_request *request, onion_response *res) {
int fd = open(filename, O_RDONLY | O_CLOEXEC);
struct stat st;
void search(struct mg_connection *nc, struct http_message *hm) {
if (fd < 0 || stat(filename, &st) != 0 || S_ISDIR(st.st_mode)) {
close(fd);
return OCS_NOT_PROCESSED;
if (hm->body.len == 0) {
LOG_DEBUG("serve.c", "Client sent empty body, ignoring request")
nc->flags |= MG_F_SEND_AND_CLOSE;
return;
}
size_t length = st.st_size;
size_t ends;
const char *range = onion_request_get_header(request, "Range");
if (partial && range && strncmp(range, "bytes=", 6) == 0) {
onion_response_set_header(res, "Accept-Ranges", "bytes");
onion_response_set_code(res, HTTP_PARTIAL_CONTENT);
char tmp[1024];
if (strlen(range + 6) >= sizeof(tmp)) {
close(fd);
return OCS_INTERNAL_ERROR;
}
strncpy(tmp, range + 6, sizeof(tmp) - 1);
char *start = tmp;
char *end = tmp;
while (*end != '-' && *end) {
end++;
}
if (*end == '-') {
*end = '\0';
end++;
size_t starts;
starts = atol(start);
if (*end) {
// %d-%d
ends = atol(end);
} else {
// %d-
ends = MIN(starts + CHUNK_SIZE, length);
}
if (ends > length || starts >= length || starts < 0) {
close(fd);
return OCS_INTERNAL_ERROR;
}
length = ends - starts;
if (starts != 0) {
lseek(fd, starts, SEEK_SET);
}
snprintf(tmp, sizeof(tmp), "bytes %ld-%ld/%ld",
starts, ends - 1, st.st_size);
onion_response_set_header(res, "Content-Range", tmp);
}
}
onion_response_set_length(res, length);
if (mime != NULL) {
onion_response_set_header(res, "Content-Type", mime);
} else {
onion_response_set_header(res, "Content-Type", "application/octet-stream");
}
onion_response_write_headers(res);
if ((onion_request_get_flags(request) & OR_HEAD) == OR_HEAD) {
length = 0;
}
if (length) {
int bytes_read = 0, bytes_written;
size_t total_read = 0;
char buf[4046];
if (length > sizeof(buf)) {
size_t max = length - sizeof(buf);
while (total_read < max) {
bytes_read = read(fd, buf, sizeof(buf));
if (bytes_read < 0) {
break;
}
total_read += bytes_read;
bytes_written = onion_response_write(res, buf, bytes_read);
if (bytes_written != bytes_read) {
break;
}
}
}
if (sizeof(buf) >= (length - total_read)) {
bytes_read = read(fd, buf, length - total_read);
onion_response_write(res, buf, bytes_read);
}
}
close(fd);
return OCS_PROCESSED;
}
int search(UNUSED(void *p), onion_request *req, onion_response *res) {
int flags = onion_request_get_flags(req);
if ((flags & OR_METHODS) != OR_POST) {
return OCS_NOT_PROCESSED;
}
const struct onion_block_t *block = onion_request_get_data(req);
if (block == NULL) {
return OCS_NOT_PROCESSED;
}
char *body = malloc(hm->body.len + 1);
memcpy(body, hm->body.p, hm->body.len);
*(body + hm->body.len) = '\0';
char url[4096];
snprintf(url, 4096, "%s/sist2/_search", WebCtx.es_url);
response_t *r = web_post(url, onion_block_data(block), "Content-Type: application/json");
set_default_headers(res);
onion_response_set_header(res, "Content-Type", "application/json");
onion_response_set_length(res, r->size);
if (r->status_code == 200) {
onion_response_write(res, r->body, r->size);
} else {
sist_log("serve.c", SIST_WARNING, "ElasticSearch error during query");
if (r->size != 0) {
char * tmp = malloc(r->size + 1);
memcpy(tmp, r->body, r->size);
*(tmp + r->size) = '\0';
cJSON *json = cJSON_Parse(tmp);
char *json_str = cJSON_Print(json);
sist_log("serve.c", SIST_WARNING, json_str);
free(json_str);
free(tmp);
}
onion_response_set_code(res, HTTP_INTERNAL_ERROR);
}
free_response(r);
return OCS_PROCESSED;
nc->user_data = web_post_async(url, body);
free(body);
}
int serve_file_from_url(cJSON *json, index_t *idx, onion_request *req, onion_response *res) {
int serve_file_from_url(cJSON *json, index_t *idx, struct mg_connection *nc) {
const char *path = cJSON_GetObjectItem(json, "path")->valuestring;
const char *name = cJSON_GetObjectItem(json, "name")->valuestring;
@ -274,12 +182,16 @@ int serve_file_from_url(cJSON *json, index_t *idx, onion_request *req, onion_res
idx->desc.rewrite_url, path, name, strlen(ext) == 0 ? "" : ".", ext);
dyn_buffer_t encoded = url_escape(url);
int ret = onion_shortcut_redirect(encoded.buf, req, res);
mg_http_send_redirect(
nc, 308,
(struct mg_str) MG_MK_STR_N(encoded.buf, encoded.cur),
(struct mg_str) MG_NULL_STR
);
dyn_buffer_destroy(&encoded);
return ret;
nc->flags |= MG_F_SEND_AND_CLOSE;
}
int serve_file_from_disk(cJSON *json, index_t *idx, onion_request *req, onion_response *res) {
void serve_file_from_disk(cJSON *json, index_t *idx, struct mg_connection *nc, struct http_message *hm) {
const char *path = cJSON_GetObjectItem(json, "path")->valuestring;
const char *name = cJSON_GetObjectItem(json, "name")->valuestring;
@ -287,24 +199,23 @@ int serve_file_from_disk(cJSON *json, index_t *idx, onion_request *req, onion_re
const char *mime = cJSON_GetObjectItem(json, "mime")->valuestring;
char full_path[PATH_MAX];
snprintf(full_path, PATH_MAX, "%s%s/%s%s%s",
idx->desc.root, path, name, strlen(ext) == 0 ? "" : ".", ext);
snprintf(full_path, PATH_MAX, "%s%s%s%s%s%s",
idx->desc.root, path, strlen(path) == 0 ? "" : "/",
name, strlen(ext) == 0 ? "" : ".", ext);
LOG_DEBUGF("serve.c", "Serving file from disk: %s", full_path)
char disposition[8196];
snprintf(disposition, sizeof(disposition), "inline; filename=\"%s%s%s\"",
snprintf(disposition, sizeof(disposition), "Content-Disposition: inline; filename=\"%s%s%s\"",
name, strlen(ext) == 0 ? "" : ".", ext);
onion_response_set_header(res, "Content-Disposition", disposition);
return chunked_response_file(full_path, mime, 1, req, res);
mg_http_serve_file(nc, hm, full_path, mg_mk_str(mime), mg_mk_str(disposition));
}
int index_info(UNUSED(void *p), onion_request *req, onion_response *res) {
void index_info(struct mg_connection *nc) {
cJSON *json = cJSON_CreateObject();
cJSON *arr = cJSON_AddArrayToObject(json, "indices");
set_default_headers(res);
onion_response_set_header(res, "Content-Type", "application/json");
for (int i = 0; i < WebCtx.index_count; i++) {
index_t *idx = &WebCtx.indices[i];
@ -317,53 +228,66 @@ int index_info(UNUSED(void *p), onion_request *req, onion_response *res) {
}
char *json_str = cJSON_PrintUnformatted(json);
onion_response_write0(res, json_str);
send_response_line(nc, 200, strlen(json_str), "Content-Type: application/json");
mg_send(nc, json_str, strlen(json_str));
free(json_str);
cJSON_Delete(json);
return OCS_PROCESSED;
nc->flags |= MG_F_SEND_AND_CLOSE;
}
int document_info(UNUSED(void *p), onion_request *req, onion_response *res) {
void document_info(struct mg_connection *nc, struct http_message *hm, struct mg_str *path) {
const char *arg_uuid = onion_request_get_query(req, "1");
if (arg_uuid == NULL) {
return OCS_PROCESSED;
if (path->len != UUID_STR_LEN + 2) {
LOG_DEBUGF("serve.c", "Invalid document_info path: %.*s", (int) path->len, path->p)
nc->flags |= MG_F_SEND_AND_CLOSE;
return;
}
char arg_uuid[UUID_STR_LEN];
memcpy(arg_uuid, hm->uri.p + 3, UUID_STR_LEN);
*(arg_uuid + UUID_STR_LEN - 1) = '\0';
cJSON *doc = elastic_get_document(arg_uuid);
cJSON *source = cJSON_GetObjectItem(doc, "_source");
cJSON *index_id = cJSON_GetObjectItem(source, "index");
if (index_id == NULL) {
cJSON_Delete(doc);
return OCS_NOT_PROCESSED;
nc->flags |= MG_F_SEND_AND_CLOSE;
return;
}
index_t *idx = get_index_by_id(index_id->valuestring);
if (idx == NULL) {
cJSON_Delete(doc);
return OCS_NOT_PROCESSED;
nc->flags |= MG_F_SEND_AND_CLOSE;
return;
}
onion_response_set_header(res, "Content-Type", "application/json");
char *json_str = cJSON_PrintUnformatted(source);
onion_response_write0(res, json_str);
send_response_line(nc, 200, (int) strlen(json_str), "Content-Type: application/json");
mg_send(nc, json_str, (int) strlen(json_str));
free(json_str);
cJSON_Delete(doc);
return OCS_PROCESSED;
nc->flags |= MG_F_SEND_AND_CLOSE;
}
int file(UNUSED(void *p), onion_request *req, onion_response *res) {
void file(struct mg_connection *nc, struct http_message *hm, struct mg_str *path) {
const char *arg_uuid = onion_request_get_query(req, "1");
if (arg_uuid == NULL) {
return OCS_PROCESSED;
if (path->len != UUID_STR_LEN + 2) {
LOG_DEBUGF("serve.c", "Invalid file path: %.*s", (int) path->len, path->p)
nc->flags |= MG_F_SEND_AND_CLOSE;
return;
}
char arg_uuid[UUID_STR_LEN];
memcpy(arg_uuid, hm->uri.p + 3, UUID_STR_LEN);
*(arg_uuid + UUID_STR_LEN - 1) = '\0';
const char *next = arg_uuid;
cJSON *doc = NULL;
cJSON *index_id = NULL;
@ -375,7 +299,8 @@ int file(UNUSED(void *p), onion_request *req, onion_response *res) {
index_id = cJSON_GetObjectItem(source, "index");
if (index_id == NULL) {
cJSON_Delete(doc);
return OCS_NOT_PROCESSED;
nc->flags |= MG_F_SEND_AND_CLOSE;
return;
}
cJSON *parent = cJSON_GetObjectItem(source, "parent");
if (parent == NULL) {
@ -388,69 +313,140 @@ int file(UNUSED(void *p), onion_request *req, onion_response *res) {
if (idx == NULL) {
cJSON_Delete(doc);
return OCS_NOT_PROCESSED;
nc->flags |= MG_F_SEND_AND_CLOSE;
return;
}
int ret;
if (strlen(idx->desc.rewrite_url) == 0) {
ret = serve_file_from_disk(source, idx, req, res);
serve_file_from_disk(source, idx, nc, hm);
} else {
ret = serve_file_from_url(source, idx, req, res);
serve_file_from_url(source, idx, nc);
}
cJSON_Delete(doc);
return ret;
}
int status(UNUSED(void *p), UNUSED(onion_request *req), onion_response *res) {
set_default_headers(res);
onion_response_set_header(res, "Content-Type", "application/x-empty");
void status(struct mg_connection *nc) {
char *status = elastic_get_status();
if (strcmp(status, "open") == 0) {
onion_response_set_code(res, 204);
send_response_line(nc, 204, 0, "Content-Type: application/json");
} else {
onion_response_set_code(res, 500);
send_response_line(nc, 500, 0, "Content-Type: application/json");
}
free(status);
return OCS_PROCESSED;
nc->flags |= MG_F_SEND_AND_CLOSE;
}
static void ev_router(struct mg_connection *nc, int ev, void *p) {
struct mg_str scheme;
struct mg_str user_info;
struct mg_str host;
unsigned int port;
struct mg_str path;
struct mg_str query;
struct mg_str fragment;
if (ev == MG_EV_HTTP_REQUEST) {
struct http_message *hm = (struct http_message *) p;
if (mg_parse_uri(hm->uri, &scheme, &user_info, &host, &port, &path, &query, &fragment) != 0) {
nc->flags |= MG_F_SEND_AND_CLOSE;
return;
}
if (WebCtx.auth_enabled == TRUE) {
char user[256] = {0,};
char pass[256] = {0,};
int ret = mg_get_http_basic_auth(hm, user, sizeof(user), pass, sizeof(pass));
if (ret == -1 || strcmp(user, WebCtx.auth_user) != 0 || strcmp(pass, WebCtx.auth_pass) != 0) {
mg_printf(nc, "HTTP/1.1 401 Unauthorized\r\n"
"WWW-Authenticate: Basic realm=\"sist2\"\r\n"
"Content-Length: 0\r\n\r\n");
nc->flags |= MG_F_SEND_AND_CLOSE;
return;
}
}
if (is_equal(&path, &((struct mg_str) MG_MK_STR("/")))) {
search_index(nc);
} else if (is_equal(&path, &((struct mg_str) MG_MK_STR("/css")))) {
style(nc, hm);
} else if (is_equal(&path, &((struct mg_str) MG_MK_STR("/js")))) {
javascript(nc);
} else if (is_equal(&path, &((struct mg_str) MG_MK_STR("/img/sprite-skin-flat.png")))) {
img_sprite_skin_flat(nc, hm);
} else if (is_equal(&path, &((struct mg_str) MG_MK_STR("/es")))) {
search(nc, hm);
} else if (is_equal(&path, &((struct mg_str) MG_MK_STR("/i")))) {
index_info(nc);
} else if (is_equal(&path, &((struct mg_str) MG_MK_STR("/status")))) {
status(nc);
} else if (has_prefix(&path, &((struct mg_str) MG_MK_STR("/f/")))) {
file(nc, hm, &path);
} else if (has_prefix(&path, &((struct mg_str) MG_MK_STR("/t/")))) {
thumbnail(nc, hm, &path);
} else if (has_prefix(&path, &((struct mg_str) MG_MK_STR("/d/")))) {
document_info(nc, hm, &path);
} else {
nc->flags |= MG_F_SEND_AND_CLOSE;
}
} else if (ev == MG_EV_POLL) {
if (nc->user_data != NULL) {
//Waiting for ES reply
subreq_ctx_t *ctx = (subreq_ctx_t *) nc->user_data;
mg_mgr_poll(&ctx->mgr, 0);
if (ctx->ev_data.done == TRUE) {
response_t *r = ctx->ev_data.resp;
if (r->status_code == 200) {
send_response_line(nc, 200, r->size, "Content-Type: application/json");
mg_send(nc, r->body, r->size);
} else {
sist_log("serve.c", SIST_WARNING, "ElasticSearch error during query");
if (r->size != 0) {
char *tmp = malloc(r->size + 1);
memcpy(tmp, r->body, r->size);
*(tmp + r->size) = '\0';
cJSON *json = cJSON_Parse(tmp);
char *json_str = cJSON_Print(json);
sist_log("serve.c", SIST_WARNING, json_str);
free(json_str);
free(tmp);
}
//todo return error code
}
free_response(r);
nc->flags |= MG_F_SEND_AND_CLOSE;
nc->user_data = NULL;
}
}
}
}
void serve(const char *hostname, const char *port) {
onion *o = onion_new(O_POOL);
onion_set_timeout(o, 3500);
onion_set_hostname(o, hostname);
onion_set_port(o, port);
onion_url *urls = onion_url_new();
// Static paths
onion_set_root_handler(o, auth_basic(WebCtx.b64credentials, onion_url_to_handler(urls)));
onion_url_add(urls, "", search_index);
onion_url_add(urls, "css", style);
onion_url_add(urls, "js", javascript);
onion_url_add(urls, "img/sprite-skin-flat.png", img_sprite_skin_flag);
onion_url_add(urls, "es", search);
onion_url_add(urls, "status", status);
onion_url_add(
urls,
"^t/([a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{12})/"
"([a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{12})$",
thumbnail
);
onion_url_add(urls, "^f/([a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{12})$", file);
onion_url_add(urls, "^d/([a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{12})$", document_info);
onion_url_add(urls, "i", index_info);
printf("Starting web server @ http://%s:%s\n", hostname, port);
onion_listen(o);
onion_free(o);
struct mg_mgr mgr;
mg_mgr_init(&mgr, NULL);
struct mg_connection *nc = mg_bind(&mgr, "0.0.0.0:8000", ev_router);
if (nc == NULL) {
printf("Failed to create listener\n");
return;
}
mg_set_protocol_http_websocket(nc);
for (;;) {
mg_mgr_poll(&mgr, 10);
}
// onion_set_root_handler(o, auth_basic(WebCtx.b64credentials, onion_url_to_handler(urls)));
}

File diff suppressed because one or more lines are too long

View File

Some files were not shown because too many files have changed in this diff Show More