Compare commits

...

42 Commits

Author SHA1 Message Date
ee0f71f4d3 fix compile warning 2020-05-17 15:00:56 -04:00
0bbb96b149 Merge pull request #51 from simon987/stats
Stats page
2020-05-17 14:49:28 -04:00
78f6e16701 image 2020-05-17 12:47:45 -04:00
4625bca9a9 stats 2020-05-17 12:47:02 -04:00
f2ae653886 Revert "wip"
This reverts commit 5686bc86
2020-05-16 08:16:49 -04:00
5686bc864d wip 2020-05-13 17:37:40 -04:00
cf513b4ad8 Escape invalid UTF8 characters simon987/sist2#44, increase magic buffer size 2020-05-12 19:28:02 -04:00
013423424e UTF-8 fix attempt w/ libarchive (#44) 2020-05-10 19:52:42 -04:00
16514fd6b0 Option to search in path #49 2020-05-09 22:00:22 -04:00
27509f97e1 Update USAGE.md 2020-05-08 19:08:46 -04:00
4c540eae1c Update USAGE.md 2020-05-08 19:07:45 -04:00
d2b53ff6fc Update README.md 2020-05-08 18:32:32 -04:00
0ef4292abf Fix duplicate tag problem (simon987/sist2#48) 2020-05-05 20:20:10 -04:00
e6fde38c24 Load defaults when LocalStorage is outdated 2020-05-03 08:13:25 -04:00
5fa343d40f fix version typo 2020-05-03 08:10:28 -04:00
7ee1374802 oops 2020-04-30 21:21:48 -04:00
bd9e56829c Support for markup files 2020-04-30 20:21:09 -04:00
718169345e gzip artifacts in CI 2020-04-21 19:34:46 -04:00
5a6aa763ca build fix 2020-04-21 18:50:32 -04:00
695d9abd83 revert debug hard-coded listen address 2020-04-21 15:52:35 -04:00
e436af7b2a 2.0 (#46)
* extract scan code to libscan, (wip)

* submodules

* replace curl with mongoose (wip)

* replace onion with mongoose (wip)

* replace onion with mongoose (wip)

* It compiles! (I think)

* Update readme

* Entirely remove libonion (WIP)

* unscramble submodules

* recover screenshot

* Update mappings

* Bug fixes

* update

* media meta fix

* memory fixes

* More bug fixes...

* Bug fix w/ libmagic & vfile

* libmagic fix (again)

* Better lightbox, better video handler, random reloads fix

* Use svg for info icon

* re-enable http auth

* mobi support #41, fix logs

* Update README & cleanup
2020-04-21 14:42:20 -04:00
4501a7810f Update issue-template.md 2020-04-11 07:33:33 -04:00
simon987
e36761fa6a Update issue templates 2020-04-11 07:28:48 -04:00
fe53b79d56 Fix warnings 2020-03-25 08:18:59 -04:00
09615bbed6 Update dependencies 2020-03-24 14:30:23 -04:00
a2be9b955c Fix build errors 2020-03-24 11:49:13 -04:00
9298bd2d9d CI fix... 2020-03-24 10:09:33 -04:00
317034ba21 teamcity automation attempt 2020-03-24 10:01:27 -04:00
0505303503 text_buffer bug fixes & Sort option 2020-03-20 20:54:22 -04:00
6e5772f13b Errors cleanup 2020-03-20 10:05:10 -04:00
ccccdb3b78 Fixes #38 2020-03-13 16:35:11 -04:00
12d17acf4f UI fixes 2020-03-06 12:27:38 -05:00
48b56cdb7b I forgot to commit this somehow 2020-03-06 10:32:05 -05:00
048f707f80 Fix buffer overflow in json parse function (index module) 2020-03-06 10:17:21 -05:00
98e0a5fd64 Update CI script 2020-03-06 09:41:33 -05:00
740a49a09f version bump 2020-03-06 09:36:46 -05:00
81be662574 (breaking) update mime list 2020-03-06 09:36:21 -05:00
02fa3f02f5 Fix memory leak with virtual files in parse.c 2020-03-06 09:36:07 -05:00
cfdd7bdd87 Fix memory leak in font.c 2020-03-06 09:35:19 -05:00
7ceb645926 hotfix invalid read in text_buffer 2020-03-06 09:34:41 -05:00
7d0091f647 whoops 2020-03-05 21:54:56 -05:00
b3cd630399 Update README.md 2020-03-05 19:42:06 -05:00
116 changed files with 3862 additions and 4913 deletions

View File

@@ -0,0 +1,16 @@
---
name: Issue template
about: General
title: ''
labels: ''
assignees: ''
---
sist2 version:
Platform (please indicate if you're using Docker):
Command with arguments: `ex: "scan ~/Documents -o ./i2 --threads 3 -q 1.0`
If the issue is related to the `scan` module, please attach the files necessary to reproduce the error or email them to me[at]simon987.net.

1
.gitignore vendored
View File

@@ -16,3 +16,4 @@ bundle.js
*.a
vgcore.*
build/
third-party/

49
.gitmodules vendored
View File

@@ -1,45 +1,6 @@
[submodule "argparse"]
path = argparse
[submodule "third-party/libscan"]
path = third-party/libscan
url = https://github.com/simon987/libscan
[submodule "third-party/argparse"]
path = third-party/argparse
url = https://github.com/cofyc/argparse
[submodule "cJSON"]
path = cJSON
url = https://github.com/DaveGamble/cJSON
[submodule "lmdb"]
path = lmdb
url = https://github.com/LMDB/lmdb
[submodule "utf8.h"]
path = utf8.h
url = https://github.com/sheredom/utf8.h
[submodule "lib/bzip2-1.0.6"]
path = lib/bzip2-1.0.6
url = https://github.com/enthought/bzip2-1.0.6
[submodule "lib/libmagic"]
path = lib/libmagic
url = https://github.com/threatstack/libmagic
[submodule "lib/harfbuzz"]
path = lib/harfbuzz
url = https://github.com/harfbuzz/harfbuzz
[submodule "lib/openjpeg"]
path = lib/openjpeg
url = https://github.com/uclouvain/openjpeg
[submodule "lib/ffmpeg"]
path = lib/ffmpeg
url = https://git.ffmpeg.org/ffmpeg.git
[submodule "lib/onion"]
path = lib/onion
url = https://github.com/davidmoreno/onion
[submodule "lib/mupdf"]
path = lib/mupdf
url = git://git.ghostscript.com/mupdf.git
[submodule "lib/tesseract"]
path = lib/tesseract
url = https://github.com/tesseract-ocr/tesseract
[submodule "lib/leptonica"]
path = lib/leptonica
url = https://github.com/danbloomberg/leptonica
[submodule "lib/libtiff"]
path = lib/libtiff
url = https://gitlab.com/libtiff/libtiff
[submodule "lib/libpng"]
path = lib/libpng
url = https://github.com/glennrp/libpng

View File

@@ -2,91 +2,61 @@ cmake_minimum_required(VERSION 3.7)
set(CMAKE_C_STANDARD 11)
project(sist2 C)
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}/CMakeModules")
option(SIST_DEBUG "Build a debug executable" on)
add_subdirectory(third-party/libscan)
set(ARGPARSE_SHARED off)
add_subdirectory(third-party/argparse)
add_executable(
sist2
src/main.c
src/sist.h
src/io/walk.h src/io/walk.c
src/parsing/media.h src/parsing/media.c
src/parsing/pdf.h src/parsing/pdf.c
src/io/store.h src/io/store.c
src/tpool.h src/tpool.c
src/parsing/parse.h src/parsing/parse.c
src/io/serialize.h src/io/serialize.c
src/parsing/mime.h src/parsing/mime.c src/parsing/mime_generated.c
src/parsing/text.h src/parsing/text.c
src/index/web.c src/index/web.h
src/web/serve.c src/web/serve.h
src/web/auth_basic.h src/web/auth_basic.c
src/index/elastic.c src/index/elastic.h
src/util.c src/util.h
src/ctx.h src/types.h src/parsing/font.c src/parsing/font.h
src/parsing/arc.c src/parsing/arc.h
src/parsing/doc.c src/parsing/doc.h
src/ctx.h src/types.h
src/log.c src/log.h
src/parsing/cbr.h src/parsing/cbr.c
# argparse
argparse/argparse.h argparse/argparse.c
third-party/argparse/argparse.h third-party/argparse/argparse.c
# cJSON
cJSON/cJSON.h cJSON/cJSON.c
# LMDB
lmdb/libraries/liblmdb/lmdb.h lmdb/libraries/liblmdb/mdb.c
lmdb/libraries/liblmdb/midl.h lmdb/libraries/liblmdb/midl.c
src/cli.c src/cli.h
src/stats.c src/stats.h)
# utf8.h
utf8.h/utf8.h
)
target_link_directories(sist2 PRIVATE BEFORE ${_VCPKG_INSTALLED_DIR}/${VCPKG_TARGET_TRIPLET}/lib/)
set(CMAKE_FIND_LIBRARY_SUFFIXES .a .lib)
find_package(PkgConfig REQUIRED)
set(ENV{PKG_CONFIG_PATH} "$ENV{PKG_CONFIG_PATH}:/usr/local/lib/pkgconfig/")
find_package(lmdb CONFIG REQUIRED)
find_package(cJSON CONFIG REQUIRED)
find_package(unofficial-glib CONFIG REQUIRED)
find_package(unofficial-mongoose CONFIG REQUIRED)
find_library(UUID_LIB NAMES uuid)
find_package(Freetype REQUIRED)
#find_package(OpenSSL REQUIRED)
pkg_check_modules(GLIB REQUIRED glib-2.0)
pkg_check_modules(GOBJECT REQUIRED gobject-2.0)
pkg_check_modules(UUID REQUIRED uuid)
add_definitions(${UUID_CFLAGS_OTHER})
add_definitions(${GLIB_CFLAGS_OTHER})
add_definitions(${GOBJECT_CFLAGS_OTHER})
add_definitions(${FREETYPE_CFLAGS_OTHER})
list(REMOVE_ITEM GLIB_LIBRARIES pcre)
list(REMOVE_ITEM GOBJECT_LIBRARIES pcre)
list(REMOVE_ITEM UUID_LIBRARIES pcre)
target_include_directories(
sist2 PUBLIC
${GOBJECT_INCLUDE_DIRS}
${GLIB_INCLUDE_DIRS}
${PROJECT_SOURCE_DIR}/lib/ffmpeg/
${FREETYPE_INCLUDE_DIRS}
${UUID_INCLUDE_DIRS}
${PROJECT_SOURCE_DIR}/
${PROJECT_SOURCE_DIR}/lmdb/libraries/liblmdb/
${PROJECT_SOURCE_DIR}/lib/onion/src/
${PROJECT_SOURCE_DIR}/lib/mupdf/include/
${PROJECT_SOURCE_DIR}/include/
/usr/include/libxml2/
${PROJECT_SOURCE_DIR}/lib/tesseract/include/
)
target_link_directories(
sist2 PUBLIC
${UUID_LIBRARY_DIRS}
${CMAKE_SOURCE_DIR}/third-party/onion/src/
${CMAKE_SOURCE_DIR}/third-party/utf8.h/
${CMAKE_SOURCE_DIR}/third-party/libscan/
${CMAKE_SOURCE_DIR}/
)
target_compile_options(
sist2
PRIVATE
-fPIC
-Werror
)
if (SIST_DEBUG)
@@ -102,6 +72,7 @@ if (SIST_DEBUG)
sist2
PRIVATE
-fsanitize=address
# -static
)
set_target_properties(
sist2
@@ -118,53 +89,28 @@ else ()
)
endif ()
TARGET_LINK_LIBRARIES(
add_dependencies(
sist2
scan
argparse
)
target_link_libraries(
sist2
${GLIB_LIBRARIES}
${GOBJECT_LIBRARIES}
${UUID_LIBRARIES}
# ffmpeg
${PROJECT_SOURCE_DIR}/lib/libavcodec.a
${PROJECT_SOURCE_DIR}/lib/libavformat.a
${PROJECT_SOURCE_DIR}/lib/libavutil.a
${PROJECT_SOURCE_DIR}/lib/libswscale.a
${PROJECT_SOURCE_DIR}/lib/libswresample.a
# mupdf
${PROJECT_SOURCE_DIR}/lib/libmupdf.a
${PROJECT_SOURCE_DIR}/lib/libmupdf-third.a
# onion
${PROJECT_SOURCE_DIR}/lib/libonion_static.a
z
lmdb
cjson
argparse
unofficial::glib::glib
unofficial::mongoose::mongoose
# OpenSSL::SSL OpenSSL::Crypto
${UUID_LIB}
pthread
m
bz2
# ${PROJECT_SOURCE_DIR}/lib/libmagic.a
magic
${PROJECT_SOURCE_DIR}/lib/libharfbuzz.a
${PROJECT_SOURCE_DIR}/lib/libopenjp2.a
freetype
archive
xml2
${PROJECT_SOURCE_DIR}/lib/libtesseract.a
${PROJECT_SOURCE_DIR}/lib/liblept.a
${PROJECT_SOURCE_DIR}/lib/libtiff.a
${PROJECT_SOURCE_DIR}/lib/libpng16.a
stdc++
# curl
${PROJECT_SOURCE_DIR}/lib/libcurl.a
${PROJECT_SOURCE_DIR}/lib/libcrypto.a
${PROJECT_SOURCE_DIR}/lib/libssl.a
dl
pcre
scan
)
add_custom_target(

View File

@@ -1,80 +0,0 @@
# - Try to find ffmpeg libraries (libavcodec, libavformat and libavutil)
# Once done this will define
#
# FFMPEG_FOUND - system has ffmpeg or libav
# FFMPEG_INCLUDE_DIR - the ffmpeg include directory
# FFMPEG_LIBRARIES - Link these to use ffmpeg
# FFMPEG_LIBAVCODEC
# FFMPEG_LIBAVFORMAT
# FFMPEG_LIBAVUTIL
#
# Copyright (c) 2008 Andreas Schneider <mail@cynapses.org>
# Modified for other libraries by Lasse Kärkkäinen <tronic>
# Modified for Hedgewars by Stepik777
#
# Redistribution and use is allowed according to the terms of the New
# BSD license.
#
if (FFMPEG_LIBRARIES AND FFMPEG_INCLUDE_DIR)
# in cache already
set(FFMPEG_FOUND TRUE)
else (FFMPEG_LIBRARIES AND FFMPEG_INCLUDE_DIR)
# use pkg-config to get the directories and then use these values
# in the FIND_PATH() and FIND_LIBRARY() calls
find_package(PkgConfig)
if (PKG_CONFIG_FOUND)
pkg_check_modules(_FFMPEG_AVCODEC libavcodec)
pkg_check_modules(_FFMPEG_AVFORMAT libavformat)
pkg_check_modules(_FFMPEG_AVUTIL libavutil)
endif (PKG_CONFIG_FOUND)
find_path(FFMPEG_AVCODEC_INCLUDE_DIR
NAMES libavcodec/avcodec.h
PATHS ${_FFMPEG_AVCODEC_INCLUDE_DIRS} /usr/include /usr/local/include /opt/local/include /sw/include
PATH_SUFFIXES ffmpeg libav
)
find_library(FFMPEG_LIBAVCODEC
NAMES avcodec
PATHS ${_FFMPEG_AVCODEC_LIBRARY_DIRS} /usr/lib /usr/local/lib /opt/local/lib /sw/lib
)
find_library(FFMPEG_LIBAVFORMAT
NAMES avformat
PATHS ${_FFMPEG_AVFORMAT_LIBRARY_DIRS} /usr/lib /usr/local/lib /opt/local/lib /sw/lib
)
find_library(FFMPEG_LIBAVUTIL
NAMES avutil
PATHS ${_FFMPEG_AVUTIL_LIBRARY_DIRS} /usr/lib /usr/local/lib /opt/local/lib /sw/lib
)
if (FFMPEG_LIBAVCODEC AND FFMPEG_LIBAVFORMAT)
set(FFMPEG_FOUND TRUE)
endif()
if (FFMPEG_FOUND)
set(FFMPEG_INCLUDE_DIR ${FFMPEG_AVCODEC_INCLUDE_DIR})
set(FFMPEG_LIBRARIES
${FFMPEG_LIBAVCODEC}
${FFMPEG_LIBAVFORMAT}
${FFMPEG_LIBAVUTIL}
)
endif (FFMPEG_FOUND)
if (FFMPEG_FOUND)
if (NOT FFMPEG_FIND_QUIETLY)
message(STATUS "Found FFMPEG or Libav: ${FFMPEG_LIBRARIES}, ${FFMPEG_INCLUDE_DIR}")
endif (NOT FFMPEG_FIND_QUIETLY)
else (FFMPEG_FOUND)
if (FFMPEG_FIND_REQUIRED)
message(FATAL_ERROR "Could not find libavcodec or libavformat or libavutil")
endif (FFMPEG_FIND_REQUIRED)
endif (FFMPEG_FOUND)
endif (FFMPEG_LIBRARIES AND FFMPEG_INCLUDE_DIR)

View File

@@ -1,100 +0,0 @@
#-------------------------------------------------------------------------------
# Copyright (c) 2013-2013, Lars Baehren <lbaehren@gmail.com>
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without modification,
# are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#-------------------------------------------------------------------------------
# - Check for the presence of LIBMAGIC
#
# The following variables are set when LIBMAGIC is found:
# LIBMAGIC_FOUND = Set to true, if all components of LIBMAGIC have been
# found.
# LIBMAGIC_INCLUDES = Include path for the header files of LIBMAGIC
# LIBMAGIC_LIBRARIES = Link these to use LIBMAGIC
# LIBMAGIC_LFLAGS = Linker flags (optional)
if (NOT LIBMAGIC_FOUND)
if (NOT LIBMAGIC_ROOT_DIR)
set (LIBMAGIC_ROOT_DIR ${CMAKE_INSTALL_PREFIX})
endif (NOT LIBMAGIC_ROOT_DIR)
##____________________________________________________________________________
## Check for the header files
find_path (LIBMAGIC_FILE_H
NAMES file/file.h
HINTS ${LIBMAGIC_ROOT_DIR} ${CMAKE_INSTALL_PREFIX}
PATH_SUFFIXES include
)
if (LIBMAGIC_FILE_H)
list (APPEND LIBMAGIC_INCLUDES ${LIBMAGIC_FILE_H})
endif (LIBMAGIC_FILE_H)
find_path (LIBMAGIC_MAGIC_H
NAMES magic.h
HINTS ${LIBMAGIC_ROOT_DIR} ${CMAKE_INSTALL_PREFIX}
PATH_SUFFIXES include include/linux
)
if (LIBMAGIC_MAGIC_H)
list (APPEND LIBMAGIC_INCLUDES ${LIBMAGIC_MAGIC_H})
endif (LIBMAGIC_MAGIC_H)
list (REMOVE_DUPLICATES LIBMAGIC_INCLUDES)
##____________________________________________________________________________
## Check for the library
find_library (LIBMAGIC_LIBRARIES magic
HINTS ${LIBMAGIC_ROOT_DIR} ${CMAKE_INSTALL_PREFIX}
PATH_SUFFIXES lib
)
##____________________________________________________________________________
## Actions taken when all components have been found
#find_package_handle_standard_args (LIBMAGIC DEFAULT_MSG LIBMAGIC_LIBRARIES LIBMAGIC_INCLUDES)
if (LIBMAGIC_FOUND)
if (NOT LIBMAGIC_FIND_QUIETLY)
message (STATUS "Found components for LIBMAGIC")
message (STATUS "LIBMAGIC_ROOT_DIR = ${LIBMAGIC_ROOT_DIR}")
message (STATUS "LIBMAGIC_INCLUDES = ${LIBMAGIC_INCLUDES}")
message (STATUS "LIBMAGIC_LIBRARIES = ${LIBMAGIC_LIBRARIES}")
endif (NOT LIBMAGIC_FIND_QUIETLY)
else (LIBMAGIC_FOUND)
if (LIBMAGIC_FIND_REQUIRED)
message (FATAL_ERROR "Could not find LIBMAGIC!")
endif (LIBMAGIC_FIND_REQUIRED)
endif (LIBMAGIC_FOUND)
##____________________________________________________________________________
## Mark advanced variables
mark_as_advanced (
LIBMAGIC_ROOT_DIR
LIBMAGIC_INCLUDES
LIBMAGIC_LIBRARIES
)
endif (NOT LIBMAGIC_FOUND)

View File

@@ -1,478 +0,0 @@
# Distributed under the OSI-approved BSD 3-Clause License. See accompanying
# file Copyright.txt or https://cmake.org/licensing for details.
macro(_OpenSSL_test_and_find_dependencies ssl_library crypto_library)
if((CMAKE_SYSTEM_NAME STREQUAL "Linux") AND
(("${ssl_library}" MATCHES "\\${CMAKE_STATIC_LIBRARY_SUFFIX}$") OR
("${crypto_library}" MATCHES "\\${CMAKE_STATIC_LIBRARY_SUFFIX}$")))
set(_OpenSSL_has_dependencies TRUE)
find_package(Threads)
else()
set(_OpenSSL_has_dependencies FALSE)
endif()
endmacro()
function(_OpenSSL_add_dependencies libraries_var library)
if(CMAKE_THREAD_LIBS_INIT)
list(APPEND ${libraries_var} ${CMAKE_THREAD_LIBS_INIT})
endif()
list(APPEND ${libraries_var} ${CMAKE_DL_LIBS})
set(${libraries_var} ${${libraries_var}} PARENT_SCOPE)
endfunction()
function(_OpenSSL_target_add_dependencies target)
if(_OpenSSL_has_dependencies)
set_property( TARGET ${target} APPEND PROPERTY INTERFACE_LINK_LIBRARIES Threads::Threads )
set_property( TARGET ${target} APPEND PROPERTY INTERFACE_LINK_LIBRARIES ${CMAKE_DL_LIBS} )
endif()
endfunction()
if (UNIX)
find_package(PkgConfig QUIET)
pkg_check_modules(_OPENSSL QUIET openssl)
endif ()
# Support preference of static libs by adjusting CMAKE_FIND_LIBRARY_SUFFIXES
if(OPENSSL_USE_STATIC_LIBS)
set(_openssl_ORIG_CMAKE_FIND_LIBRARY_SUFFIXES ${CMAKE_FIND_LIBRARY_SUFFIXES})
if(WIN32)
set(CMAKE_FIND_LIBRARY_SUFFIXES .lib .a ${CMAKE_FIND_LIBRARY_SUFFIXES})
else()
set(CMAKE_FIND_LIBRARY_SUFFIXES .a )
endif()
endif()
if (WIN32)
# http://www.slproweb.com/products/Win32OpenSSL.html
set(_OPENSSL_ROOT_HINTS
${OPENSSL_ROOT_DIR}
"[HKEY_LOCAL_MACHINE\\SOFTWARE\\Microsoft\\Windows\\CurrentVersion\\Uninstall\\OpenSSL (32-bit)_is1;Inno Setup: App Path]"
"[HKEY_LOCAL_MACHINE\\SOFTWARE\\Microsoft\\Windows\\CurrentVersion\\Uninstall\\OpenSSL (64-bit)_is1;Inno Setup: App Path]"
ENV OPENSSL_ROOT_DIR
)
file(TO_CMAKE_PATH "$ENV{PROGRAMFILES}" _programfiles)
set(_OPENSSL_ROOT_PATHS
"${_programfiles}/OpenSSL"
"${_programfiles}/OpenSSL-Win32"
"${_programfiles}/OpenSSL-Win64"
"C:/OpenSSL/"
"C:/OpenSSL-Win32/"
"C:/OpenSSL-Win64/"
)
unset(_programfiles)
else ()
set(_OPENSSL_ROOT_HINTS
${OPENSSL_ROOT_DIR}
ENV OPENSSL_ROOT_DIR
)
endif ()
set(_OPENSSL_ROOT_HINTS_AND_PATHS
HINTS ${_OPENSSL_ROOT_HINTS}
PATHS ${_OPENSSL_ROOT_PATHS}
)
find_path(OPENSSL_INCLUDE_DIR
NAMES
openssl/ssl.h
${_OPENSSL_ROOT_HINTS_AND_PATHS}
HINTS
${_OPENSSL_INCLUDEDIR}
PATH_SUFFIXES
include
)
if(WIN32 AND NOT CYGWIN)
if(MSVC)
# /MD and /MDd are the standard values - if someone wants to use
# others, the libnames have to change here too
# use also ssl and ssleay32 in debug as fallback for openssl < 0.9.8b
# enable OPENSSL_MSVC_STATIC_RT to get the libs build /MT (Multithreaded no-DLL)
# In Visual C++ naming convention each of these four kinds of Windows libraries has it's standard suffix:
# * MD for dynamic-release
# * MDd for dynamic-debug
# * MT for static-release
# * MTd for static-debug
# Implementation details:
# We are using the libraries located in the VC subdir instead of the parent directory even though :
# libeay32MD.lib is identical to ../libeay32.lib, and
# ssleay32MD.lib is identical to ../ssleay32.lib
# enable OPENSSL_USE_STATIC_LIBS to use the static libs located in lib/VC/static
if (OPENSSL_MSVC_STATIC_RT)
set(_OPENSSL_MSVC_RT_MODE "MT")
else ()
set(_OPENSSL_MSVC_RT_MODE "MD")
endif ()
# Since OpenSSL 1.1, lib names are like libcrypto32MTd.lib and libssl32MTd.lib
if( "${CMAKE_SIZEOF_VOID_P}" STREQUAL "8" )
set(_OPENSSL_MSVC_ARCH_SUFFIX "64")
else()
set(_OPENSSL_MSVC_ARCH_SUFFIX "32")
endif()
if(OPENSSL_USE_STATIC_LIBS)
set(_OPENSSL_PATH_SUFFIXES
"lib/VC/static"
"VC/static"
"lib"
)
else()
set(_OPENSSL_PATH_SUFFIXES
"lib/VC"
"VC"
"lib"
)
endif ()
find_library(LIB_EAY_DEBUG
NAMES
libcrypto${_OPENSSL_MSVC_ARCH_SUFFIX}${_OPENSSL_MSVC_RT_MODE}d
libcrypto${_OPENSSL_MSVC_RT_MODE}d
libcryptod
libeay32${_OPENSSL_MSVC_RT_MODE}d
libeay32d
cryptod
NAMES_PER_DIR
${_OPENSSL_ROOT_HINTS_AND_PATHS}
PATH_SUFFIXES
${_OPENSSL_PATH_SUFFIXES}
)
find_library(LIB_EAY_RELEASE
NAMES
libcrypto${_OPENSSL_MSVC_ARCH_SUFFIX}${_OPENSSL_MSVC_RT_MODE}
libcrypto${_OPENSSL_MSVC_RT_MODE}
libcrypto
libeay32${_OPENSSL_MSVC_RT_MODE}
libeay32
crypto
NAMES_PER_DIR
${_OPENSSL_ROOT_HINTS_AND_PATHS}
PATH_SUFFIXES
${_OPENSSL_PATH_SUFFIXES}
)
find_library(SSL_EAY_DEBUG
NAMES
libssl${_OPENSSL_MSVC_ARCH_SUFFIX}${_OPENSSL_MSVC_RT_MODE}d
libssl${_OPENSSL_MSVC_RT_MODE}d
libssld
ssleay32${_OPENSSL_MSVC_RT_MODE}d
ssleay32d
ssld
NAMES_PER_DIR
${_OPENSSL_ROOT_HINTS_AND_PATHS}
PATH_SUFFIXES
${_OPENSSL_PATH_SUFFIXES}
)
find_library(SSL_EAY_RELEASE
NAMES
libssl${_OPENSSL_MSVC_ARCH_SUFFIX}${_OPENSSL_MSVC_RT_MODE}
libssl${_OPENSSL_MSVC_RT_MODE}
libssl
ssleay32${_OPENSSL_MSVC_RT_MODE}
ssleay32
ssl
NAMES_PER_DIR
${_OPENSSL_ROOT_HINTS_AND_PATHS}
PATH_SUFFIXES
${_OPENSSL_PATH_SUFFIXES}
)
set(LIB_EAY_LIBRARY_DEBUG "${LIB_EAY_DEBUG}")
set(LIB_EAY_LIBRARY_RELEASE "${LIB_EAY_RELEASE}")
set(SSL_EAY_LIBRARY_DEBUG "${SSL_EAY_DEBUG}")
set(SSL_EAY_LIBRARY_RELEASE "${SSL_EAY_RELEASE}")
include(${CMAKE_CURRENT_LIST_DIR}/SelectLibraryConfigurations.cmake)
select_library_configurations(LIB_EAY)
select_library_configurations(SSL_EAY)
mark_as_advanced(LIB_EAY_LIBRARY_DEBUG LIB_EAY_LIBRARY_RELEASE
SSL_EAY_LIBRARY_DEBUG SSL_EAY_LIBRARY_RELEASE)
set(OPENSSL_SSL_LIBRARY ${SSL_EAY_LIBRARY} )
set(OPENSSL_CRYPTO_LIBRARY ${LIB_EAY_LIBRARY} )
elseif(MINGW)
# same player, for MinGW
set(LIB_EAY_NAMES crypto libeay32)
set(SSL_EAY_NAMES ssl ssleay32)
find_library(LIB_EAY
NAMES
${LIB_EAY_NAMES}
NAMES_PER_DIR
${_OPENSSL_ROOT_HINTS_AND_PATHS}
PATH_SUFFIXES
"lib/MinGW"
"lib"
)
find_library(SSL_EAY
NAMES
${SSL_EAY_NAMES}
NAMES_PER_DIR
${_OPENSSL_ROOT_HINTS_AND_PATHS}
PATH_SUFFIXES
"lib/MinGW"
"lib"
)
mark_as_advanced(SSL_EAY LIB_EAY)
set(OPENSSL_SSL_LIBRARY ${SSL_EAY} )
set(OPENSSL_CRYPTO_LIBRARY ${LIB_EAY} )
unset(LIB_EAY_NAMES)
unset(SSL_EAY_NAMES)
else()
# Not sure what to pick for -say- intel, let's use the toplevel ones and hope someone report issues:
find_library(LIB_EAY
NAMES
libcrypto
libeay32
NAMES_PER_DIR
${_OPENSSL_ROOT_HINTS_AND_PATHS}
HINTS
${_OPENSSL_LIBDIR}
PATH_SUFFIXES
lib
)
find_library(SSL_EAY
NAMES
libssl
ssleay32
NAMES_PER_DIR
${_OPENSSL_ROOT_HINTS_AND_PATHS}
HINTS
${_OPENSSL_LIBDIR}
PATH_SUFFIXES
lib
)
mark_as_advanced(SSL_EAY LIB_EAY)
set(OPENSSL_SSL_LIBRARY ${SSL_EAY} )
set(OPENSSL_CRYPTO_LIBRARY ${LIB_EAY} )
endif()
else()
find_library(OPENSSL_SSL_LIBRARY
NAMES
ssl
ssleay32
ssleay32MD
NAMES_PER_DIR
${_OPENSSL_ROOT_HINTS_AND_PATHS}
HINTS
${_OPENSSL_LIBDIR}
PATH_SUFFIXES
lib
)
find_library(OPENSSL_CRYPTO_LIBRARY
NAMES
crypto
NAMES_PER_DIR
${_OPENSSL_ROOT_HINTS_AND_PATHS}
HINTS
${_OPENSSL_LIBDIR}
PATH_SUFFIXES
lib
)
mark_as_advanced(OPENSSL_CRYPTO_LIBRARY OPENSSL_SSL_LIBRARY)
endif()
# compat defines
set(OPENSSL_SSL_LIBRARIES ${OPENSSL_SSL_LIBRARY})
set(OPENSSL_CRYPTO_LIBRARIES ${OPENSSL_CRYPTO_LIBRARY})
_OpenSSL_test_and_find_dependencies("${OPENSSL_SSL_LIBRARY}" "${OPENSSL_CRYPTO_LIBRARY}")
if(_OpenSSL_has_dependencies)
_OpenSSL_add_dependencies( OPENSSL_SSL_LIBRARIES "${OPENSSL_SSL_LIBRARY}" )
_OpenSSL_add_dependencies( OPENSSL_CRYPTO_LIBRARIES "${OPENSSL_CRYPTO_LIBRARY}" )
endif()
function(from_hex HEX DEC)
string(TOUPPER "${HEX}" HEX)
set(_res 0)
string(LENGTH "${HEX}" _strlen)
while (_strlen GREATER 0)
math(EXPR _res "${_res} * 16")
string(SUBSTRING "${HEX}" 0 1 NIBBLE)
string(SUBSTRING "${HEX}" 1 -1 HEX)
if (NIBBLE STREQUAL "A")
math(EXPR _res "${_res} + 10")
elseif (NIBBLE STREQUAL "B")
math(EXPR _res "${_res} + 11")
elseif (NIBBLE STREQUAL "C")
math(EXPR _res "${_res} + 12")
elseif (NIBBLE STREQUAL "D")
math(EXPR _res "${_res} + 13")
elseif (NIBBLE STREQUAL "E")
math(EXPR _res "${_res} + 14")
elseif (NIBBLE STREQUAL "F")
math(EXPR _res "${_res} + 15")
else()
math(EXPR _res "${_res} + ${NIBBLE}")
endif()
string(LENGTH "${HEX}" _strlen)
endwhile()
set(${DEC} ${_res} PARENT_SCOPE)
endfunction()
if(OPENSSL_INCLUDE_DIR AND EXISTS "${OPENSSL_INCLUDE_DIR}/openssl/opensslv.h")
file(STRINGS "${OPENSSL_INCLUDE_DIR}/openssl/opensslv.h" openssl_version_str
REGEX "^#[\t ]*define[\t ]+OPENSSL_VERSION_NUMBER[\t ]+0x([0-9a-fA-F])+.*")
if(openssl_version_str)
# The version number is encoded as 0xMNNFFPPS: major minor fix patch status
# The status gives if this is a developer or prerelease and is ignored here.
# Major, minor, and fix directly translate into the version numbers shown in
# the string. The patch field translates to the single character suffix that
# indicates the bug fix state, which 00 -> nothing, 01 -> a, 02 -> b and so
# on.
string(REGEX REPLACE "^.*OPENSSL_VERSION_NUMBER[\t ]+0x([0-9a-fA-F])([0-9a-fA-F][0-9a-fA-F])([0-9a-fA-F][0-9a-fA-F])([0-9a-fA-F][0-9a-fA-F])([0-9a-fA-F]).*$"
"\\1;\\2;\\3;\\4;\\5" OPENSSL_VERSION_LIST "${openssl_version_str}")
list(GET OPENSSL_VERSION_LIST 0 OPENSSL_VERSION_MAJOR)
list(GET OPENSSL_VERSION_LIST 1 OPENSSL_VERSION_MINOR)
from_hex("${OPENSSL_VERSION_MINOR}" OPENSSL_VERSION_MINOR)
list(GET OPENSSL_VERSION_LIST 2 OPENSSL_VERSION_FIX)
from_hex("${OPENSSL_VERSION_FIX}" OPENSSL_VERSION_FIX)
list(GET OPENSSL_VERSION_LIST 3 OPENSSL_VERSION_PATCH)
if (NOT OPENSSL_VERSION_PATCH STREQUAL "00")
from_hex("${OPENSSL_VERSION_PATCH}" _tmp)
# 96 is the ASCII code of 'a' minus 1
math(EXPR OPENSSL_VERSION_PATCH_ASCII "${_tmp} + 96")
unset(_tmp)
# Once anyone knows how OpenSSL would call the patch versions beyond 'z'
# this should be updated to handle that, too. This has not happened yet
# so it is simply ignored here for now.
string(ASCII "${OPENSSL_VERSION_PATCH_ASCII}" OPENSSL_VERSION_PATCH_STRING)
endif ()
set(OPENSSL_VERSION "${OPENSSL_VERSION_MAJOR}.${OPENSSL_VERSION_MINOR}.${OPENSSL_VERSION_FIX}${OPENSSL_VERSION_PATCH_STRING}")
endif ()
endif ()
set(OPENSSL_LIBRARIES ${OPENSSL_SSL_LIBRARIES} ${OPENSSL_CRYPTO_LIBRARIES} )
list(REMOVE_DUPLICATES OPENSSL_LIBRARIES)
foreach(_comp IN LISTS OpenSSL_FIND_COMPONENTS)
if(_comp STREQUAL "Crypto")
if(EXISTS "${OPENSSL_INCLUDE_DIR}" AND
(EXISTS "${OPENSSL_CRYPTO_LIBRARY}" OR
EXISTS "${LIB_EAY_LIBRARY_DEBUG}" OR
EXISTS "${LIB_EAY_LIBRARY_RELEASE}")
)
set(OpenSSL_${_comp}_FOUND TRUE)
else()
set(OpenSSL_${_comp}_FOUND FALSE)
endif()
elseif(_comp STREQUAL "SSL")
if(EXISTS "${OPENSSL_INCLUDE_DIR}" AND
(EXISTS "${OPENSSL_SSL_LIBRARY}" OR
EXISTS "${SSL_EAY_LIBRARY_DEBUG}" OR
EXISTS "${SSL_EAY_LIBRARY_RELEASE}")
)
set(OpenSSL_${_comp}_FOUND TRUE)
else()
set(OpenSSL_${_comp}_FOUND FALSE)
endif()
else()
message(WARNING "${_comp} is not a valid OpenSSL component")
set(OpenSSL_${_comp}_FOUND FALSE)
endif()
endforeach()
unset(_comp)
include(${CMAKE_CURRENT_LIST_DIR}/FindPackageHandleStandardArgs.cmake)
find_package_handle_standard_args(OpenSSL
REQUIRED_VARS
OPENSSL_CRYPTO_LIBRARY
OPENSSL_INCLUDE_DIR
VERSION_VAR
OPENSSL_VERSION
HANDLE_COMPONENTS
FAIL_MESSAGE
"Could NOT find OpenSSL, try to set the path to OpenSSL root folder in the system variable OPENSSL_ROOT_DIR"
)
mark_as_advanced(OPENSSL_INCLUDE_DIR OPENSSL_LIBRARIES)
if(OPENSSL_FOUND)
if(NOT TARGET OpenSSL::Crypto AND
(EXISTS "${OPENSSL_CRYPTO_LIBRARY}" OR
EXISTS "${LIB_EAY_LIBRARY_DEBUG}" OR
EXISTS "${LIB_EAY_LIBRARY_RELEASE}")
)
add_library(OpenSSL::Crypto UNKNOWN IMPORTED)
set_target_properties(OpenSSL::Crypto PROPERTIES
INTERFACE_INCLUDE_DIRECTORIES "${OPENSSL_INCLUDE_DIR}")
if(EXISTS "${OPENSSL_CRYPTO_LIBRARY}")
set_target_properties(OpenSSL::Crypto PROPERTIES
IMPORTED_LINK_INTERFACE_LANGUAGES "C"
IMPORTED_LOCATION "${OPENSSL_CRYPTO_LIBRARY}")
endif()
if(EXISTS "${LIB_EAY_LIBRARY_RELEASE}")
set_property(TARGET OpenSSL::Crypto APPEND PROPERTY
IMPORTED_CONFIGURATIONS RELEASE)
set_target_properties(OpenSSL::Crypto PROPERTIES
IMPORTED_LINK_INTERFACE_LANGUAGES_RELEASE "C"
IMPORTED_LOCATION_RELEASE "${LIB_EAY_LIBRARY_RELEASE}")
endif()
if(EXISTS "${LIB_EAY_LIBRARY_DEBUG}")
set_property(TARGET OpenSSL::Crypto APPEND PROPERTY
IMPORTED_CONFIGURATIONS DEBUG)
set_target_properties(OpenSSL::Crypto PROPERTIES
IMPORTED_LINK_INTERFACE_LANGUAGES_DEBUG "C"
IMPORTED_LOCATION_DEBUG "${LIB_EAY_LIBRARY_DEBUG}")
endif()
_OpenSSL_target_add_dependencies(OpenSSL::Crypto)
endif()
if(NOT TARGET OpenSSL::SSL AND
(EXISTS "${OPENSSL_SSL_LIBRARY}" OR
EXISTS "${SSL_EAY_LIBRARY_DEBUG}" OR
EXISTS "${SSL_EAY_LIBRARY_RELEASE}")
)
add_library(OpenSSL::SSL UNKNOWN IMPORTED)
set_target_properties(OpenSSL::SSL PROPERTIES
INTERFACE_INCLUDE_DIRECTORIES "${OPENSSL_INCLUDE_DIR}")
if(EXISTS "${OPENSSL_SSL_LIBRARY}")
set_target_properties(OpenSSL::SSL PROPERTIES
IMPORTED_LINK_INTERFACE_LANGUAGES "C"
IMPORTED_LOCATION "${OPENSSL_SSL_LIBRARY}")
endif()
if(EXISTS "${SSL_EAY_LIBRARY_RELEASE}")
set_property(TARGET OpenSSL::SSL APPEND PROPERTY
IMPORTED_CONFIGURATIONS RELEASE)
set_target_properties(OpenSSL::SSL PROPERTIES
IMPORTED_LINK_INTERFACE_LANGUAGES_RELEASE "C"
IMPORTED_LOCATION_RELEASE "${SSL_EAY_LIBRARY_RELEASE}")
endif()
if(EXISTS "${SSL_EAY_LIBRARY_DEBUG}")
set_property(TARGET OpenSSL::SSL APPEND PROPERTY
IMPORTED_CONFIGURATIONS DEBUG)
set_target_properties(OpenSSL::SSL PROPERTIES
IMPORTED_LINK_INTERFACE_LANGUAGES_DEBUG "C"
IMPORTED_LOCATION_DEBUG "${SSL_EAY_LIBRARY_DEBUG}")
endif()
if(TARGET OpenSSL::Crypto)
set_target_properties(OpenSSL::SSL PROPERTIES
INTERFACE_LINK_LIBRARIES OpenSSL::Crypto)
endif()
_OpenSSL_target_add_dependencies(OpenSSL::SSL)
endif()
endif()
# Restore the original find library ordering
if(OPENSSL_USE_STATIC_LIBS)
set(CMAKE_FIND_LIBRARY_SUFFIXES ${_openssl_ORIG_CMAKE_FIND_LIBRARY_SUFFIXES})
endif()

View File

@@ -1,268 +0,0 @@
# Distributed under the OSI-approved BSD 3-Clause License. See accompanying
# file Copyright.txt or https://cmake.org/licensing for details.
include(${CMAKE_CURRENT_LIST_DIR}/FindPackageMessage.cmake)
# internal helper macro
macro(_FPHSA_FAILURE_MESSAGE _msg)
set (__msg "${_msg}")
if (FPHSA_REASON_FAILURE_MESSAGE)
string(APPEND __msg "\n Reason given by package: ${FPHSA_REASON_FAILURE_MESSAGE}\n")
endif()
if (${_NAME}_FIND_REQUIRED)
message(FATAL_ERROR "${__msg}")
else ()
if (NOT ${_NAME}_FIND_QUIETLY)
message(STATUS "${__msg}")
endif ()
endif ()
endmacro()
# internal helper macro to generate the failure message when used in CONFIG_MODE:
macro(_FPHSA_HANDLE_FAILURE_CONFIG_MODE)
# <PackageName>_CONFIG is set, but FOUND is false, this means that some other of the REQUIRED_VARS was not found:
if(${_NAME}_CONFIG)
_FPHSA_FAILURE_MESSAGE("${FPHSA_FAIL_MESSAGE}: missing:${MISSING_VARS} (found ${${_NAME}_CONFIG} ${VERSION_MSG})")
else()
# If _CONSIDERED_CONFIGS is set, the config-file has been found, but no suitable version.
# List them all in the error message:
if(${_NAME}_CONSIDERED_CONFIGS)
set(configsText "")
list(LENGTH ${_NAME}_CONSIDERED_CONFIGS configsCount)
math(EXPR configsCount "${configsCount} - 1")
foreach(currentConfigIndex RANGE ${configsCount})
list(GET ${_NAME}_CONSIDERED_CONFIGS ${currentConfigIndex} filename)
list(GET ${_NAME}_CONSIDERED_VERSIONS ${currentConfigIndex} version)
string(APPEND configsText "\n ${filename} (version ${version})")
endforeach()
if (${_NAME}_NOT_FOUND_MESSAGE)
if (FPHSA_REASON_FAILURE_MESSAGE)
string(PREPEND FPHSA_REASON_FAILURE_MESSAGE "${${_NAME}_NOT_FOUND_MESSAGE}\n ")
else()
set(FPHSA_REASON_FAILURE_MESSAGE "${${_NAME}_NOT_FOUND_MESSAGE}")
endif()
else()
string(APPEND configsText "\n")
endif()
_FPHSA_FAILURE_MESSAGE("${FPHSA_FAIL_MESSAGE} ${VERSION_MSG}, checked the following files:${configsText}")
else()
# Simple case: No Config-file was found at all:
_FPHSA_FAILURE_MESSAGE("${FPHSA_FAIL_MESSAGE}: found neither ${_NAME}Config.cmake nor ${_NAME_LOWER}-config.cmake ${VERSION_MSG}")
endif()
endif()
endmacro()
function(FIND_PACKAGE_HANDLE_STANDARD_ARGS _NAME _FIRST_ARG)
# Set up the arguments for `cmake_parse_arguments`.
set(options CONFIG_MODE HANDLE_COMPONENTS)
set(oneValueArgs FAIL_MESSAGE REASON_FAILURE_MESSAGE VERSION_VAR FOUND_VAR)
set(multiValueArgs REQUIRED_VARS)
# Check whether we are in 'simple' or 'extended' mode:
set(_KEYWORDS_FOR_EXTENDED_MODE ${options} ${oneValueArgs} ${multiValueArgs} )
list(FIND _KEYWORDS_FOR_EXTENDED_MODE "${_FIRST_ARG}" INDEX)
if(${INDEX} EQUAL -1)
set(FPHSA_FAIL_MESSAGE ${_FIRST_ARG})
set(FPHSA_REQUIRED_VARS ${ARGN})
set(FPHSA_VERSION_VAR)
else()
cmake_parse_arguments(FPHSA "${options}" "${oneValueArgs}" "${multiValueArgs}" ${_FIRST_ARG} ${ARGN})
if(FPHSA_UNPARSED_ARGUMENTS)
message(FATAL_ERROR "Unknown keywords given to FIND_PACKAGE_HANDLE_STANDARD_ARGS(): \"${FPHSA_UNPARSED_ARGUMENTS}\"")
endif()
if(NOT FPHSA_FAIL_MESSAGE)
set(FPHSA_FAIL_MESSAGE "DEFAULT_MSG")
endif()
# In config-mode, we rely on the variable <PackageName>_CONFIG, which is set by find_package()
# when it successfully found the config-file, including version checking:
if(FPHSA_CONFIG_MODE)
list(INSERT FPHSA_REQUIRED_VARS 0 ${_NAME}_CONFIG)
list(REMOVE_DUPLICATES FPHSA_REQUIRED_VARS)
set(FPHSA_VERSION_VAR ${_NAME}_VERSION)
endif()
if(NOT FPHSA_REQUIRED_VARS)
message(FATAL_ERROR "No REQUIRED_VARS specified for FIND_PACKAGE_HANDLE_STANDARD_ARGS()")
endif()
endif()
# now that we collected all arguments, process them
if("x${FPHSA_FAIL_MESSAGE}" STREQUAL "xDEFAULT_MSG")
set(FPHSA_FAIL_MESSAGE "Could NOT find ${_NAME}")
endif()
list(GET FPHSA_REQUIRED_VARS 0 _FIRST_REQUIRED_VAR)
string(TOUPPER ${_NAME} _NAME_UPPER)
string(TOLOWER ${_NAME} _NAME_LOWER)
if(FPHSA_FOUND_VAR)
if(FPHSA_FOUND_VAR MATCHES "^${_NAME}_FOUND$" OR FPHSA_FOUND_VAR MATCHES "^${_NAME_UPPER}_FOUND$")
set(_FOUND_VAR ${FPHSA_FOUND_VAR})
else()
message(FATAL_ERROR "The argument for FOUND_VAR is \"${FPHSA_FOUND_VAR}\", but only \"${_NAME}_FOUND\" and \"${_NAME_UPPER}_FOUND\" are valid names.")
endif()
else()
set(_FOUND_VAR ${_NAME_UPPER}_FOUND)
endif()
# collect all variables which were not found, so they can be printed, so the
# user knows better what went wrong (#6375)
set(MISSING_VARS "")
set(DETAILS "")
# check if all passed variables are valid
set(FPHSA_FOUND_${_NAME} TRUE)
foreach(_CURRENT_VAR ${FPHSA_REQUIRED_VARS})
if(NOT ${_CURRENT_VAR})
set(FPHSA_FOUND_${_NAME} FALSE)
string(APPEND MISSING_VARS " ${_CURRENT_VAR}")
else()
string(APPEND DETAILS "[${${_CURRENT_VAR}}]")
endif()
endforeach()
if(FPHSA_FOUND_${_NAME})
set(${_NAME}_FOUND TRUE)
set(${_NAME_UPPER}_FOUND TRUE)
else()
set(${_NAME}_FOUND FALSE)
set(${_NAME_UPPER}_FOUND FALSE)
endif()
# component handling
unset(FOUND_COMPONENTS_MSG)
unset(MISSING_COMPONENTS_MSG)
if(FPHSA_HANDLE_COMPONENTS)
foreach(comp ${${_NAME}_FIND_COMPONENTS})
if(${_NAME}_${comp}_FOUND)
if(NOT DEFINED FOUND_COMPONENTS_MSG)
set(FOUND_COMPONENTS_MSG "found components:")
endif()
string(APPEND FOUND_COMPONENTS_MSG " ${comp}")
else()
if(NOT DEFINED MISSING_COMPONENTS_MSG)
set(MISSING_COMPONENTS_MSG "missing components:")
endif()
string(APPEND MISSING_COMPONENTS_MSG " ${comp}")
if(${_NAME}_FIND_REQUIRED_${comp})
set(${_NAME}_FOUND FALSE)
string(APPEND MISSING_VARS " ${comp}")
endif()
endif()
endforeach()
set(COMPONENT_MSG "${FOUND_COMPONENTS_MSG} ${MISSING_COMPONENTS_MSG}")
string(APPEND DETAILS "[c${COMPONENT_MSG}]")
endif()
# version handling:
set(VERSION_MSG "")
set(VERSION_OK TRUE)
# check with DEFINED here as the requested or found version may be "0"
if (DEFINED ${_NAME}_FIND_VERSION)
if(DEFINED ${FPHSA_VERSION_VAR})
set(_FOUND_VERSION ${${FPHSA_VERSION_VAR}})
if(${_NAME}_FIND_VERSION_EXACT) # exact version required
# count the dots in the version string
string(REGEX REPLACE "[^.]" "" _VERSION_DOTS "${_FOUND_VERSION}")
# add one dot because there is one dot more than there are components
string(LENGTH "${_VERSION_DOTS}." _VERSION_DOTS)
if (_VERSION_DOTS GREATER ${_NAME}_FIND_VERSION_COUNT)
# Because of the C++ implementation of find_package() ${_NAME}_FIND_VERSION_COUNT
# is at most 4 here. Therefore a simple lookup table is used.
if (${_NAME}_FIND_VERSION_COUNT EQUAL 1)
set(_VERSION_REGEX "[^.]*")
elseif (${_NAME}_FIND_VERSION_COUNT EQUAL 2)
set(_VERSION_REGEX "[^.]*\\.[^.]*")
elseif (${_NAME}_FIND_VERSION_COUNT EQUAL 3)
set(_VERSION_REGEX "[^.]*\\.[^.]*\\.[^.]*")
else ()
set(_VERSION_REGEX "[^.]*\\.[^.]*\\.[^.]*\\.[^.]*")
endif ()
string(REGEX REPLACE "^(${_VERSION_REGEX})\\..*" "\\1" _VERSION_HEAD "${_FOUND_VERSION}")
unset(_VERSION_REGEX)
if (NOT ${_NAME}_FIND_VERSION VERSION_EQUAL _VERSION_HEAD)
set(VERSION_MSG "Found unsuitable version \"${_FOUND_VERSION}\", but required is exact version \"${${_NAME}_FIND_VERSION}\"")
set(VERSION_OK FALSE)
else ()
set(VERSION_MSG "(found suitable exact version \"${_FOUND_VERSION}\")")
endif ()
unset(_VERSION_HEAD)
else ()
if (NOT ${_NAME}_FIND_VERSION VERSION_EQUAL _FOUND_VERSION)
set(VERSION_MSG "Found unsuitable version \"${_FOUND_VERSION}\", but required is exact version \"${${_NAME}_FIND_VERSION}\"")
set(VERSION_OK FALSE)
else ()
set(VERSION_MSG "(found suitable exact version \"${_FOUND_VERSION}\")")
endif ()
endif ()
unset(_VERSION_DOTS)
else() # minimum version specified:
if (${_NAME}_FIND_VERSION VERSION_GREATER _FOUND_VERSION)
set(VERSION_MSG "Found unsuitable version \"${_FOUND_VERSION}\", but required is at least \"${${_NAME}_FIND_VERSION}\"")
set(VERSION_OK FALSE)
else ()
set(VERSION_MSG "(found suitable version \"${_FOUND_VERSION}\", minimum required is \"${${_NAME}_FIND_VERSION}\")")
endif ()
endif()
else()
# if the package was not found, but a version was given, add that to the output:
if(${_NAME}_FIND_VERSION_EXACT)
set(VERSION_MSG "(Required is exact version \"${${_NAME}_FIND_VERSION}\")")
else()
set(VERSION_MSG "(Required is at least version \"${${_NAME}_FIND_VERSION}\")")
endif()
endif()
else ()
# Check with DEFINED as the found version may be 0.
if(DEFINED ${FPHSA_VERSION_VAR})
set(VERSION_MSG "(found version \"${${FPHSA_VERSION_VAR}}\")")
endif()
endif ()
if(VERSION_OK)
string(APPEND DETAILS "[v${${FPHSA_VERSION_VAR}}(${${_NAME}_FIND_VERSION})]")
else()
set(${_NAME}_FOUND FALSE)
endif()
# print the result:
if (${_NAME}_FOUND)
FIND_PACKAGE_MESSAGE(${_NAME} "Found ${_NAME}: ${${_FIRST_REQUIRED_VAR}} ${VERSION_MSG} ${COMPONENT_MSG}" "${DETAILS}")
else ()
if(FPHSA_CONFIG_MODE)
_FPHSA_HANDLE_FAILURE_CONFIG_MODE()
else()
if(NOT VERSION_OK)
_FPHSA_FAILURE_MESSAGE("${FPHSA_FAIL_MESSAGE}: ${VERSION_MSG} (found ${${_FIRST_REQUIRED_VAR}})")
else()
_FPHSA_FAILURE_MESSAGE("${FPHSA_FAIL_MESSAGE} (missing:${MISSING_VARS}) ${VERSION_MSG}")
endif()
endif()
endif ()
set(${_NAME}_FOUND ${${_NAME}_FOUND} PARENT_SCOPE)
set(${_NAME_UPPER}_FOUND ${${_NAME}_FOUND} PARENT_SCOPE)
endfunction()

View File

@@ -1,48 +0,0 @@
# Distributed under the OSI-approved BSD 3-Clause License. See accompanying
# file Copyright.txt or https://cmake.org/licensing for details.
#[=======================================================================[.rst:
FindPackageMessage
------------------
.. code-block:: cmake
find_package_message(<name> "message for user" "find result details")
This function is intended to be used in FindXXX.cmake modules files.
It will print a message once for each unique find result. This is
useful for telling the user where a package was found. The first
argument specifies the name (XXX) of the package. The second argument
specifies the message to display. The third argument lists details
about the find result so that if they change the message will be
displayed again. The macro also obeys the QUIET argument to the
find_package command.
Example:
.. code-block:: cmake
if(X11_FOUND)
find_package_message(X11 "Found X11: ${X11_X11_LIB}"
"[${X11_X11_LIB}][${X11_INCLUDE_DIR}]")
else()
...
endif()
#]=======================================================================]
function(find_package_message pkg msg details)
# Avoid printing a message repeatedly for the same find result.
if(NOT ${pkg}_FIND_QUIETLY)
string(REPLACE "\n" "" details "${details}")
set(DETAILS_VAR FIND_PACKAGE_MESSAGE_DETAILS_${pkg})
if(NOT "${details}" STREQUAL "${${DETAILS_VAR}}")
# The message has not yet been printed.
message(STATUS "${msg}")
# Save the find details in the cache to avoid printing the same
# message again.
set("${DETAILS_VAR}" "${details}"
CACHE INTERNAL "Details about finding ${pkg}")
endif()
endif()
endfunction()

View File

@@ -1,15 +1,14 @@
rm ./sist2
cp ../sist2 .
rm ./sist2 sist2_debug
cp ../sist2.gz .
gzip -d sist2.gz
strip sist2
version=$(./sist2 --version)
echo "Version ${version}"
docker build . -t simon987/sist2:${version} -t simon987/sist2:latest \
-t docker.pkg.github.com/simon987/sist2/sist2:latest -t docker.pkg.github.com/simon987/sist2/sist2:${version}
docker build . -t simon987/sist2:${version} -t simon987/sist2:latest
docker push simon987/sist2:${version}
docker push simon987/sist2:latest
docker push docker.pkg.github.com/simon987/sist2/sist2:latest
docker push docker.pkg.github.com/simon987/sist2/sist2:${version}
docker run --rm -it simon987/sist2 -v
docker run --rm simon987/sist2 -v

View File

@@ -8,7 +8,7 @@ sist2 (Simple incremental search tool)
*Warning: sist2 is in early development*
![sist2.png](sist2.png)
![sist2.png](docs/sist2.png)
## Features
@@ -21,12 +21,16 @@ sist2 (Simple incremental search tool)
* Automatic tagging from file attributes via [user scripts](scripting/README.md)
* Recursive scan inside archive files \*\*
* OCR support with tesseract \*\*\*
* Stats page & disk utilisation visualization
\* See [format support](#format-support)
\*\* See [Archive files](#archive-files)
\*\*\* See [OCR](#ocr)
![stats](docs/stats.png)
## Getting Started
1. Have an Elasticsearch (>= 6.X.X) instance running
@@ -49,7 +53,7 @@ sist2 (Simple incremental search tool)
1. *(or)* Download a [development snapshot](https://files.simon987.net/artifacts/Sist2/Build/) *(Not recommended!)*
1. *(or)* `docker pull simon987/sist2:latest`
1. See [Usage guide](USAGE.md)
1. See [Usage guide](DOCS/USAGE.md)
\* *Windows users*: **sist2** runs under [WSL](https://en.wikipedia.org/wiki/Windows_Subsystem_for_Linux)
@@ -57,7 +61,7 @@ sist2 (Simple incremental search tool)
## Example usage
See [Usage guide](USAGE.md) for more details
See [Usage guide](DOCS/USAGE.md) for more details
1. Scan a directory: `sist2 scan ~/Documents -o ./docs_idx`
1. Push index to Elasticsearch: `sist2 index ./docs_idx`
@@ -74,8 +78,10 @@ pdf,xps,cbz,cbr,fb2,epub | MuPDF | text+ocr | yes, `png` | title |
`image/*` | ffmpeg | - | yes, `jpeg` | [Common EXIF tags](https://github.com/simon987/sist2/blob/efdde2734eca9b14a54f84568863b7ffd59bdba3/src/parsing/media.c#L190) |
ttf,ttc,cff,woff,fnt,otf | Freetype2 | - | yes, `bmp` | Name & style |
`text/plain` | *(none)* | yes | no | - |
html, xml | *(none)* | yes | no | - |
tar, zip, rar, 7z, ar ... | Libarchive | yes\* | - | no |
docx, xlsx, pptx | libOPC | yes | no | no |
docx, xlsx, pptx | *(none)* | yes | no | creator, modified_by, title |
mobi, azw, azw3 | libmobi | yes | no | author, title |
\* *See [Archive files](#archive-files)*
@@ -113,23 +119,17 @@ sist2 scan --ocr eng ~/Books/Textbooks/
## Build from source
You can compile **sist2** by yourself if you don't want to use the pre-compiled
binaries.
binaries (GCC 7+ required).
1. Install compile-time dependencies
*(Debian)*
```bash
apt install git cmake pkg-config libglib2.0-dev \
libssl-dev uuid-dev python3 libmagic-dev libfreetype6-dev \
libcurl4-openssl-dev libbz2-dev yasm libharfbuzz-dev ragel \
libarchive-dev libtiff5 libpng16-16 libpango1.0-dev \
libxml2-dev libopenjp2-7-dev libleptonica-dev
vcpkg install lmdb cjson glib libarchive[core,bzip2,libxml2,lz4,lzma,lzo] pthread tesseract libxml2 ffmpeg zstd gtest mongoose libuuid libmagic
```
2. Build
```bash
git clone --recurse-submodules https://github.com/simon987/sist2
./scripts/get_static_libs.sh
cmake .
git clone --recursive https://github.com/simon987/sist2/
cmake -D <VCPKG_ROOT>/scripts/buildsystems/vcpkg.cmake .
make
```

1
cJSON

Submodule cJSON deleted from e8077d0150

13
ci/build.sh Normal file → Executable file
View File

@@ -1,7 +1,16 @@
#!/usr/bin/env bash
./scripts/get_static_libs.sh
cmake .
rm *.gz
rm -rf CMakeFiles CMakeCache.txt
cmake -DSIST_DEBUG=off -DCMAKE_TOOLCHAIN_FILE=/vcpkg/scripts/buildsystems/vcpkg.cmake .
make
strip sist2
gzip -9 sist2
rm -rf CMakeFiles CMakeCache.txt
cmake -DSIST_DEBUG=on -DCMAKE_TOOLCHAIN_FILE=/vcpkg/scripts/buildsystems/vcpkg.cmake .
make
cp /usr/lib/x86_64-linux-gnu/libasan.so.2.0.0 libasan.so.2
tar -czf sist2_debug.tar.gz sist2_debug libasan.so.2

View File

@@ -48,10 +48,10 @@ Index options
--batch-size=<int> Index batch size. DEFAULT: 100
-f, --force-reset Reset Elasticsearch mappings and settings. (You must use this option the first time you use the index command)
Web options
--es-url=<str> Elasticsearch url. DEFAULT=http://localhost:9200
--bind=<str> Listen on this address. DEFAULT=localhost
--port=<str> Listen on this port. DEFAULT=4090
--bind=<str> Listen on this address. DEFAULT=localhost:4090
--auth=<str> Basic auth in user:password format
Made by simon987 <me@simon987.net>. Released under GPL-3.0
@@ -82,7 +82,7 @@ Made by simon987 <me@simon987.net>. Released under GPL-3.0
* list: Only get file names as text
* shallow: Don't parse archives inside archives.
* recurse: Scan archives recursively (default)
* `--ocr` See [OCR](README.md#OCR)
* `--ocr` See [OCR](../README.md#OCR)
* `-e, --exclude` Regex pattern to exclude files. A file is excluded if the pattern matches any
part of the full absolute path.
@@ -94,6 +94,13 @@ Made by simon987 <me@simon987.net>. Released under GPL-3.0
* `-e "(^/usr/)|(^/var/)|(^/media/DRIVE-A/tmp/)|(^/media/DRIVE-B/Trash/)"` Exclude the
`/usr`, `/var`, `/media/DRIVE-A/tmp`, `/media/DRIVE-B/Trash` directories
* `--fast` Only index file names and mime type
* `--treemap-threshold` Directories smaller than (`treemap-threshold` * `<total size of the index>`)
will not be considered for the disk utilisation visualization; their size will be added to
the parent directory. If the parent directory is still smaller than the threshold, it will also be "merged upwards"
and so on.
In effect, smaller `treemap-threshold` values will yield a more detailed
(but also a more cluttered and harder to read) visualization.
### Scan examples
@@ -122,6 +129,11 @@ documents.idx/
├── _index_139965425223424
├── _index_139965433616128
├── _index_139965442008832
├── _index_139965442008832
├── treemap.csv
├── agg_mime.csv
├── agg_date.csv
├── add_size.csv
└── thumbs
├── data.mdb
└── lock.mdb
@@ -137,6 +149,8 @@ database containing the thumbnails.
The `descriptor.json` file contains general information about the index. The
following fields are safe to modify manually: `root`, `name`, [rewrite_url](#rewrite_url) and `timestamp`.
The `.csv` are pre-computed aggregations necessary for the stats page.
*Advanced usage*
@@ -189,7 +203,7 @@ my_index/
}
```
You can find the full list of supported fields [here](src/io/serialize.c#L90)
You can find the full list of supported fields [here](../src/io/serialize.c#L90)
The `_keyword.*` items will be indexed and searchable as **keyword** fields (only full matches allowed).
The `_text.*` items will be indexed and searchable as **text** fields (fuzzy searching allowed)
@@ -244,14 +258,13 @@ sist2 index --print ./my_index/ | jq | less
### Web options
* `--es-url=<str>` Elasticsearch url.
* `--bind=<str>` Listen on this address.
* `--port=<str>` Listen on this port.
* `--auth=<str>` Basic auth in user:password format
### Web examples
**Single index**
```bash
sist2 web --auth admin:hunter2 --bind 0.0.0.0 --port 8888 my_index
sist2 web --auth admin:hunter2 --bind 0.0.0.0:8888 my_index
```
**Multiple indices**

View File

Before

Width:  |  Height:  |  Size: 26 KiB

After

Width:  |  Height:  |  Size: 26 KiB

View File

Before

Width:  |  Height:  |  Size: 889 KiB

After

Width:  |  Height:  |  Size: 889 KiB

BIN
docs/stats.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 167 KiB

Submodule lib/bzip2-1.0.6 deleted from 288acf97a1

Submodule lib/ffmpeg deleted from e27a35e045

Submodule lib/harfbuzz deleted from b7617f6b3c

Submodule lib/leptonica deleted from 320b4bbb02

Submodule lib/libmagic deleted from 1249b5cd02

Submodule lib/libpng deleted from 301f7a1429

Submodule lib/libtiff deleted from a6d3c1d64b

Submodule lib/mupdf deleted from 1e1ac03fe4

Submodule lib/onion deleted from 2b3b230b79

Submodule lib/openjpeg deleted from 563ecfb55c

Submodule lib/tesseract deleted from 90405ad0e3

1
lmdb

Submodule lmdb deleted from 5c012bbe03

View File

@@ -11,11 +11,24 @@
"type": "text",
"analyzer": "path_analyzer",
"fielddata": true,
"index_prefixes": {}
"fields": {
"nGram": {
"type": "text",
"analyzer": "my_nGram"
},
"text": {
"type": "text",
"analyzer": "content_analyzer"
}
}
},
"mime": {
"type": "keyword"
},
"thumbnail": {
"type": "keyword",
"index": false
},
"videoc": {
"type": "keyword",
"index": false
@@ -133,6 +146,12 @@
},
"exif_user_comment": {
"type": "text"
},
"author": {
"type": "text"
},
"modified_by": {
"type": "text"
}
}
}

View File

@@ -2,15 +2,15 @@
rm -rf index.sist2/
rm web/js/bundle.js 2> /dev/null
cat `ls web/js/*.min.js` > web/js/bundle.js
cat web/js/{util,dom,search}.js >> web/js/bundle.js
rm src/static/js/bundle.js 2> /dev/null
cat `ls src/static/js/*.min.js` > src/static/js/bundle.js
cat src/static/js/{util,dom}.js >> src/static/js/bundle.js
rm web/css/bundle*.css 2> /dev/null
cat web/css/*.min.css > web/css/bundle.css
cat web/css/light.css >> web/css/bundle.css
cat web/css/*.min.css > web/css/bundle_dark.css
cat web/css/dark.css >> web/css/bundle_dark.css
rm src/static/css/bundle*.css 2> /dev/null
cat src/static/css/*.min.css > src/static/css/bundle.css
cat src/static/css/light.css >> src/static/css/bundle.css
cat src/static/css/*.min.css > src/static/css/bundle_dark.css
cat src/static/css/dark.css >> src/static/css/bundle_dark.css
python3 scripts/mime.py > src/parsing/mime_generated.c
python3 scripts/serve_static.py > src/web/static_generated.c

View File

@@ -1,128 +0,0 @@
#!/usr/bin/env bash
THREADS=$(nproc)
cd lib
cd mupdf
CFLAGS=-fPIC make USE_SYSTEM_HARFBUZZ=yes USE_SYSTEM_OPENJPEG=yes HAVE_X11=no HAVE_GLUT=no -j $THREADS
cd ..
mv mupdf/build/release/libmupdf.a .
mv mupdf/build/release/libmupdf-third.a .
# openjp2
cd openjpeg
cmake . -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_FLAGS="-O3 -DNDEBUG -fPIC"
make -j $THREADS
cd ..
mv openjpeg/bin/libopenjp2.a .
# harfbuzz
cd harfbuzz
./autogen.sh
CFLAGS=-fPIC ./configure --disable-shared --enable-static
make -j $THREADS
cd ..
mv harfbuzz/src/.libs/libharfbuzz.a .
# ffmpeg
cd ffmpeg
./configure --disable-shared --enable-static --disable-ffmpeg --disable-ffplay \
--disable-ffprobe --disable-doc\
--disable-manpages --disable-postproc --disable-avfilter \
--disable-alsa --disable-lzma --disable-xlib --disable-debug\
--disable-vdpau --disable-vaapi --disable-sdl2 --disable-network\
--extra-cflags=-fPIC
make -j $THREADS
cd ..
mv ffmpeg/libavcodec/libavcodec.a .
mv ffmpeg/libavformat/libavformat.a .
mv ffmpeg/libavutil/libavutil.a .
mv ffmpeg/libswresample/libswresample.a .
mv ffmpeg/libswscale/libswscale.a .
# onion
cd onion
mkdir build 2> /dev/null
cd build
cmake -DONION_USE_SSL=false -DONION_USE_PAM=false -DONION_USE_PNG=false -DONION_USE_JPEG=false \
-DONION_USE_JPEG=false -DONION_USE_XML2=false -DONION_USE_SYSTEMD=false -DONION_USE_SQLITE3=false \
-DONION_USE_REDIS=false -DONION_USE_GC=false -DONION_USE_TESTS=false -DONION_EXAMPLES=false \
-DONION_USE_BINDINGS_CPP=false ..
make -j $THREADS
cd ../..
mv onion/build/src/onion/libonion_static.a .
#bzip2
cd bzip2-1.0.6
make -j $THREADS
cd ..
mv bzip2-1.0.6/libbz2.a .
# magic
cd libmagic
./autogen.sh
./configure --enable-static --disable-shared
make -j $THREADS
cd ..
mv libmagic/src/.libs/libmagic.a .
# tesseract
cd tesseract
mkdir build
cd build
cmake -DSTATIC=on -DBUILD_TRAINING_TOOLS=off -DBUILD_TESTS=off -DCMAKE_BUILD_TYPE=Release \
-DCMAKE_CXX_FLAGS="-fPIC" -DAUTO_OPTIMIZE=off ..
make -j $THREADS
cd ../..
mv tesseract/build/libtesseract.a .
# leptonica
cd leptonica
./autogen.sh
CFLAGS="-fPIC" ./configure --without-zlib --without-jpeg --without-giflib \
--without-giflib --without-libwebp --without-libwebpmux --without-libopenjpeg \
--enable-static --disable-shared
make -j $THREADS
cd ..
mv leptonica/src/.libs/liblept.a .
# tiff
cd libtiff
./autogen.sh
CFLAGS="-fPIC" CXXFLAGS="-fPIC" CXX_FLAGS="-fPIC" ./configure --enable-static --disable-shared --disable-lzw --disable-jpeg --disable-webp \
--disable-lzma --disable-zstd --disable-jbig
make -j $THREADS
cd ..
mv libtiff/libtiff/.libs/libtiff.a .
# png
cd libpng
CFLAGS="-fPIC" ./configure --enable-static --disable-shared
make -j $THREADS
cd ..
mv libpng/.libs/libpng16.a .
# openssl...
git clone --depth 1 -b OpenSSL_1_1_0-stable https://github.com/openssl/openssl
cd openssl
./config --prefix=$(pwd)/../ssl
make depend
make -j $THREADS
make install
cd ..
mv ./openssl/libcrypto.a ./openssl/libssl.a .
# curl
wget -nc https://curl.haxx.se/download/curl-7.68.0.tar.gz
tar -xzf curl-7.68.0.tar.gz
cd curl-7.68.0
./configure --disable-ldap --disable-ldaps --without-librtmp --disable-rtsp --disable-crypto-auth \
--disable-smtp --without-libidn2 --without-nghttp2 --without-brotli --enable-static --disable-shared \
--without-libpsl --with-ssl=$(pwd)/../ssl
make -j $THREADS
cd ..
mv curl-7.68.0/lib/.libs/libcurl.a .

View File

@@ -14,4 +14,5 @@ def clean(filepath):
for file in files:
with open(file, "r") as f:
data = json.dumps(json.load(f), separators=(",", ":")).encode()
data += b'\0'
print("char %s[%d] = {%s};" % (clean(file), len(data), ",".join(str(int(b)) for b in data)))

View File

@@ -2,10 +2,14 @@ application/arj, arj
application/base64, mme
application/binhex, hqx
application/book, boo|book
application/CDFV2-corrupt,
application/CDFV2, sdv
application/clariscad, ccad
application/commonground, dp
application/csv,
application/dicom, dcm
application/drafting, drw
application/epub+zip, epub
application/freeloader, frl
application/futuresplash, spl
application/groupwise, vew
@@ -17,7 +21,6 @@ application/inf, inf
application/java-archive, jar
application/java, class
application/javascript,
application/x-archive, a
application/json, json
application/marc, mrc
application/mbedlet, mbd
@@ -27,7 +30,9 @@ application/msword, doc|dot|w6w|wiz|word
application/netmc, mcp
application/octet-stream, bin|dump|gpg
application/oda, oda
application/ogg, ogv
application/pdf, pdf
application/pgp-keys,
application/pgp-signature, pgp
application/pkcs7-signature, p7s
application/pkix-cert, cer|crt
@@ -43,6 +48,10 @@ application/vda, vda
application/vnd.fdf, fdf
application/vnd.font-fontforge-sfd, sfd
application/vnd.hp-hpgl, hgl|hpg|hpgl
application/vnd.iccprofile, icm
application/vnd.iccprofile, icm
application/vnd.lotus-1-2-3,
application/vnd.ms-cab-compressed, cab
application/vnd.ms-excel, xlb|xlc|xll|xlm|xls|xlw
application/vnd.ms-fontobject, eot
application/vnd.ms-opentype, otf
@@ -54,45 +63,75 @@ application/vnd.ms-project, mpp
application/vnd.oasis.opendocument.base, odb
application/vnd.oasis.opendocument.formula, odf
application/vnd.oasis.opendocument.graphics, odg
application/vnd.oasis.opendocument.presentation, odp
application/vnd.oasis.opendocument.spreadsheet, ods
application/vnd.oasis.opendocument.text, odt
application/vnd.openxmlformats-officedocument.presentationml.presentation, pptx
application/vnd.openxmlformats-officedocument.spreadsheetml.sheet, xlsx
application/vnd.openxmlformats-officedocument.wordprocessingml.document, docx
application/vnd.symbian.install,
application/vnd.tcpdump.pcap, pcap
application/vnd.wap.wmlc, wmlc
application/vnd.wap.wmlscriptc, wmlsc
application/vnd.xara, web
application/vocaltec-media-desc, vmd
application/vocaltec-media-file, vmf
application/warc, warc
application/winhelp, hlp
application/wordperfect6.0, w60
application/wordperfect6.1, w61
application/wordperfect, wp|wp5|wp6|wpd
application/x-123, wk1
application/x-7z-compressed, 7z
application/x-aim, aim
application/x-apple-diskimage,
application/x-arc,
application/x-archive, a
application/x-atari-7800-rom, a78
application/x-authorware-bin, aab
application/x-authorware-map, aam
application/x-authorware-seg, aas
application/x-avira-qua,
application/x-bcpio, bcpio
application/x-bittorrent, torrent
application/x-bsh, bsh
application/x-bytecode.python, pyc
application/x-bzip2, boz|bz2
application/x-bzip, bz
application/x-cbr, cbr
application/x-cbz, cbz
application/x-cdlink, vcd
application/x-chat, cha|chat
application/x-chrome-extension,
application/x-cocoa, cco
application/x-conference, nsc
application/x-coredump,
application/x-cpio, cpio
application/x-dbf, dbf
application/x-dbt,
application/x-debian-package, deb
application/x-deepv, deepv
application/x-director, dcr|dir|dxr
application/x-dmp, dmp
application/x-dosdriver,
application/x-dosexec, dll
application/x-dvi, dvi
application/x-elc, elc
application/x-empty,
application/x-envoy, env|evy
application/x-esrehber, es
application/x-excel, xla|xld|xlk|xlt|xlv
application/x-executable, exe
application/x-font-gdos,
application/x-font-pf2, pf2
application/x-font-pfm, pfm
application/x-font-sfn,
application/x-font-ttf, ttf|ttc
application/x-fptapplication/x-dbt,
application/x-freelance, pre
application/x-gamecube-rom,
application/x-gdbm,
application/x-gettext-translation,
application/x-git,
application/x-gsp, gsp
application/x-gss, gss
@@ -102,46 +141,68 @@ application/x-hdf, hdf
application/x-helpfile, help
application/x-httpd-imap, imap
application/x-ima, ima
application/x-innosetup,
application/x-internett-signup, ins
application/x-inventor, iv
application/x-ip2, ip
application/x-java-applet,
application/x-java-commerce, jcm
application/x-java-image,
application/x-java-jmod, jmod
application/x-java-keystore,
application/x-kdelnk,
application/x-koan, skd|skm|skp|skt
application/x-latex, latex|ltx
application/x-livescreen, ivy
application/x-lotus, wq1
application/x-lz4+json, jsonlz4
application/x-lz4, lz4
application/x-lz4, lz4
application/x-lzh-compressed,
application/x-lzh, lzh
application/x-lzip, lz
application/x-lzma, lzma
application/x-lzop, lzo
application/x-lzx, lzx
application/x-mach-binary, jnilib|dylib
application/x-mach-executable,
application/x-magic-cap-package-1.0, mc$
application/x-mathcad, mcd
application/x-maxis-dbpf,
application/x-meme, mm
application/x-midi, midi
application/x-mif, mif
application/x-mix-transfer, nix
application/xml, opf
application/x-mobipocket-ebook, mobi
application/vnd.amazon.mobi8-ebook, azw|azw3
application/x-msaccess, accdb
application/x-ms-compress-szdd, fon
application/x-ms-pdb, pdb
application/x-ms-reader, lit
application/x-n64-rom, z64
application/x-navi-animation, ani
application/x-navidoc, nvd
application/x-navimap, map
application/x-navistyle, stl
application/x-nes-rom, nes
application/x-netcdf, cdf|nc
application/x-newton-compatible-pkg, pkg
application/x-nintendo-ds-rom,
application/x-object, o
application/x-omcdatamaker, omcd
application/x-omc, omc
application/x-omcregerator, omcr
application/x-pagemaker, pm4|pm5
application/x-pcl, pcl
application/x-pgp-keyring,
application/x-pixclscript, plx
application/x-pkcs7-certreqresp, p7r
application/x-pkcs7-signature, p7a
application/x-project, mpc|mpt|mpv|mpx
application/x-qpro, wb1
application/x-rar, rar
application/x-rpm, rpm
application/x-sdp, sdp
application/x-sea, sea
application/x-seelogo, sl
@@ -149,12 +210,17 @@ application/x-setupscript,
application/x-sharedlib, so
application/x-shar, shar
application/x-shockwave-flash, swf
application/x-snappy-framed,
application/x-sprite, spr|sprite
application/x-sqlite3,
application/x-stargallery-thm,
application/x-stuffit, sit
application/x-sv4cpio, sv4cpio
application/x-sv4crc, sv4crc
application/x-tar, tar
application/x-tbook, sbk|tbk
application/x-terminfo,
application/x-terminfo2,
application/x-texinfo, texi|texinfo
application/x-tex-tfm, tfm
application/x-ustar, ustar
@@ -163,16 +229,22 @@ application/x-vnd.audioexplosion.mzz, mzz
application/x-vnd.ls-xpix, xpix
application/x-vrml, vrml
application/x-wais-source, src|wsrc
application/x-wine-extension-ini,
application/x-wintalk, wtk
application/x-world, svr
application/x-wri, wri
application/x-x509-ca-cert, der
application/x-xz, xz
application/x-zip,
application/x-zstd, zst
application/zip, zip
application/zlib, z
!audio/basic, au
audio/it, it
audio/make, funk|my|pfunk
audio/midi, kar
audio/mid, rmi
audio/mp4, m4b
audio/mpeg, m2a|mpa
audio/ogg, ogg
audio/s3m, s3m
@@ -180,7 +252,10 @@ audio/tsp-audio, tsi
audio/tsplayer, tsp
audio/vnd.qcelp, qcp
audio/voxware, vox
audio/x-aiff, aiff|aif
audio/x-flac, flac
audio/x-gsm, gsd|gsm
audio/x-hx-aac-adts,
audio/x-jam, jam
audio/x-liveaudio, lam
audio/x-m4a, m4a
@@ -194,17 +269,24 @@ audio/x-nspaudio, lma
audio/x-pn-realaudio, ram|rm|rmm|rmp
audio/x-psid, sid
audio/x-realaudio, ra
audio/x-s3m,
audio/x-twinvq-plugin, vqe|vql
audio/x-twinvq, vqf
audio/x-voc, voc
audio/x-wav, wav
!audio/x-xbox360-executable, xex
!audio/x-xbox-executable, xbe
font/otf,
font/sfnt,
font/woff2, woff2
font/woff, woff
image/bmp,
image/cmu-raster, rast
image/fif, fif
image/florian, flo|turbot
image/g3fax, g3
image/gif, gif
image/heic, heic
image/ief, ief|iefs
image/jpeg, jfif|jfif-tbnl|jpe|jpeg|jpg
image/jutvision, jut
@@ -213,6 +295,9 @@ image/pict, pic|pict
image/png, png|x-png
!image/svg, svg
!image/svg+xml,
image/tiff,
!image/vnd.adobe.photoshop, psd
!image/vnd.djvu, djvu
image/vnd.fpx, fpx
image/vnd.microsoft.icon,
image/vnd.rn-realflash, rf
@@ -220,9 +305,15 @@ image/vnd.rn-realpix, rp
image/vnd.wap.wbmp, wbmp
image/vnd.xiff, xif
image/webp, webp
image/wmf,
image/x-3ds, 3ds
image/x-award-bioslogo,
image/x-cmu-raster, ras
image/x-cur, tga
image/x-dwg, dwg|dxf|svf
image/x-eps,
image/x-exr, exr
image/x-gem,
image/x-icns,
!image/x-icon, ico
image/x-jg, art
@@ -236,32 +327,30 @@ image/x-portable-graymap, pgm
image/x-portable-pixmap, ppm
image/x-quicktime, qif|qti|qtif
image/x-rgb, rgb
image/x-tga,
image/x-tiff, tif|tiff
image/tiff,
image/x-win-bitmap,
!image/x-xcf, xcf
!image/x-xpixmap, xpm
image/x-xwindowdump, xwd
message/news,
message/rfc822, mht|mhtml|mime
model/vnd.dwf, dwf
model/vnd.gdl, gdl
model/vnd.gs.gdl, gdsl
model/vrml, wrz
model/x-pov, pov
text/asp, asp
text/css, css
text/x-sass, sass
text/x-scss, scss
text/html, acgi|htm|html|htmls|htx|shtml
text/javascript, js
text/mcf, mcf
text/pascal, pas
text/PGP,
text/plain, com|cmd|conf|def|g|idc|list|lst|mar|sdml|text|txt|md|groovy|license|properties|desktop|ini|rst|cmake|ipynb|readme|less|lo|go|yml|d|cs|hpp|srt|nfo|sfv|m3u|csv|eml
text/richtext, rt|rtf|rtx
text/rtf,
text/scriplet, wsc
text/x-awk, awk
!video/x-jng, jng
video/x-mng, mng
image/x-cur, tga
image/x-xwindowdump, xwd
!image/vnd.adobe.photoshop, psd
text/tab-separated-values, tsv
text/troff, man|me|ms|roff|t|tr
text/uri-list, uji|unis|uri|uris
@@ -273,6 +362,7 @@ text/webviewhtml, htt
text/x-Algol68,
text/x-asm, asm|s
text/x-audiosoft-intra, aip
text/x-awk, awk
text/x-bcpl,
text/x-c, c|cc|h
text/x-c++, cpp|cxx|c++
@@ -287,23 +377,31 @@ text/x-makefile, am|mak
text/xml, xml|pom|iml|plist
text/x-m, m
text/x-msdos-batch, bat
text/x-ms-regedit, reg
text/x-objective-c,
text/x-pascal, p
text/x-perl, pl
text/x-php, php
text/x-po, po
text/x-python, py
text/x-ruby, rb
text/x-sass, sass
text/x-scss, scss
text/x-server-parsed-html, ssi
text/x-setext, etx
text/x-sgml, sgm|sgml
text/x-shellscript, sh
text/x-speech, talk
text/x-tcl,
text/x-tex, tex
text/x-uil, uil
text/x-uuencode, uue
text/x-vcalendar, vcs
text/x-vcard, vcf
video/animaflex, afl
video/avi, avi
video/avs-video, avs
video/MP2T,
video/mp4, mp4
video/mpeg, m1v|m2v|mpe|mpeg|mpg
video/quicktime, moov|mov|qt
@@ -318,101 +416,17 @@ video/x-atomic3d-feature, fmf
video/x-dl, dl
video/x-dv, dif|dv
video/x-fli, fli
video/x-flv, flv
video/x-isvideo, isu
!video/x-jng, jng
video/x-m4v, m4v
video/x-matroska, mkv
video/x-mng, mng
video/x-motion-jpeg, mjpg
video/x-ms-asf, asf|asx|wmv
video/x-msvideo, divx
video/x-qtc, qtc
video/x-sgi-movie, movie|mv
application/x-7z-compressed, 7z
application/vnd.openxmlformats-officedocument.wordprocessingml.document, docx
text/x-po, po
application/x-rpm, rpm
application/x-debian-package, deb
application/vnd.iccprofile, icm
application/dicom, dcm
image/x-exr, exr
application/vnd.iccprofile, icm
video/x-matroska, mkv
application/x-empty,
model/vnd.gdl, gdl
model/vnd.gs.gdl, gdsl
font/woff, woff
font/woff2, woff2
application/epub+zip, epub
application/x-mobipocket-ebook, mobi
audio/x-flac, flac
application/x-rar, rar
video/x-msvideo, divx
video/x-flv, flv
application/x-kdelnk,
text/x-tcl,
application/ogg, ogv
application/vnd.openxmlformats-officedocument.spreadsheetml.sheet, xlsx
application/vnd.ms-cab-compressed, cab
audio/mp4, m4b
!image/vnd.djvu, djvu
application/x-ms-reader, lit
application/CDFV2-corrupt,
text/x-vcard, vcf
application/x-innosetup,
application/winhelp, hlp
image/x-tga,
application/x-wine-extension-ini,
application/x-cbz, cbz
application/x-cbr, cbr
application/x-ms-compress-szdd, fon
application/x-atari-7800-rom, a78
application/x-nes-rom, nes
application/x-font-pfm, pfm
application/x-gettext-translation,
image/wmf,
application/pgp-keys,
image/x-3ds, 3ds
application/x-lz4, lz4
application/vnd.openxmlformats-officedocument.presentationml.presentation, pptx
application/vnd.oasis.opendocument.presentation, odp
application/x-msaccess, accdb
application/vnd.oasis.opendocument.spreadsheet, ods
audio/x-aiff, aiff|aif
text/x-ms-regedit, reg
application/x-gamecube-rom,
application/x-nintendo-ds-rom,
text/x-objective-c,
application/x-font-gdos,
application/x-apple-diskimage,
application/x-zstd, zst
video/x-m4v, m4v
message/news,
application/vnd.symbian.install,
application/x-lzh-compressed,
application/x-dosdriver,
application/vnd.tcpdump.pcap, pcap
x-epoc/x-sisx-app,
application/x-avira-qua,
video/MP2T,
application/x-snappy-framed,
application/x-lz4+json, jsonlz4
application/x-dmp, dmp
application/zlib, z
application/x-pgp-keyring,
application/x-gdbm,
application/x-font-pf2, pf2
application/x-zip,
application/x-coredump,
application/x-java-jmod, jmod
application/x-terminfo,
application/x-terminfo2,
application/x-arc,
application/vnd.lotus-1-2-3,
image/x-win-bitmap,
application/x-maxis-dbpf,
text/PGP,
audio/x-hx-aac-adts,
application/x-chrome-extension,
image/heic, heic
image/x-gem,
application/x-lzma, lzma
application/warc, warc
application/x-lz4, lz4
application/x-lzip, lz
application/x-lzop, lzo
application/x-zstd-dictionary,
application/vnd.ms-outlook,
1 application/arj arj
2 application/base64 mme
3 application/binhex hqx
4 application/book boo|book
5 application/CDFV2-corrupt
6 application/CDFV2 sdv
7 application/clariscad ccad
8 application/commonground dp
9 application/csv
10 application/dicom dcm
11 application/drafting drw
12 application/epub+zip epub
13 application/freeloader frl
14 application/futuresplash spl
15 application/groupwise vew
21 application/java-archive jar
22 application/java class
23 application/javascript
application/x-archive a
24 application/json json
25 application/marc mrc
26 application/mbedlet mbd
30 application/netmc mcp
31 application/octet-stream bin|dump|gpg
32 application/oda oda
33 application/ogg ogv
34 application/pdf pdf
35 application/pgp-keys
36 application/pgp-signature pgp
37 application/pkcs7-signature p7s
38 application/pkix-cert cer|crt
48 application/vnd.fdf fdf
49 application/vnd.font-fontforge-sfd sfd
50 application/vnd.hp-hpgl hgl|hpg|hpgl
51 application/vnd.iccprofile icm
52 application/vnd.iccprofile icm
53 application/vnd.lotus-1-2-3
54 application/vnd.ms-cab-compressed cab
55 application/vnd.ms-excel xlb|xlc|xll|xlm|xls|xlw
56 application/vnd.ms-fontobject eot
57 application/vnd.ms-opentype otf
63 application/vnd.oasis.opendocument.base odb
64 application/vnd.oasis.opendocument.formula odf
65 application/vnd.oasis.opendocument.graphics odg
66 application/vnd.oasis.opendocument.presentation odp
67 application/vnd.oasis.opendocument.spreadsheet ods
68 application/vnd.oasis.opendocument.text odt
69 application/vnd.openxmlformats-officedocument.presentationml.presentation pptx
70 application/vnd.openxmlformats-officedocument.spreadsheetml.sheet xlsx
71 application/vnd.openxmlformats-officedocument.wordprocessingml.document docx
72 application/vnd.symbian.install
73 application/vnd.tcpdump.pcap pcap
74 application/vnd.wap.wmlc wmlc
75 application/vnd.wap.wmlscriptc wmlsc
76 application/vnd.xara web
77 application/vocaltec-media-desc vmd
78 application/vocaltec-media-file vmf
79 application/warc warc
80 application/winhelp hlp
81 application/wordperfect6.0 w60
82 application/wordperfect6.1 w61
83 application/wordperfect wp|wp5|wp6|wpd
84 application/x-123 wk1
85 application/x-7z-compressed 7z
86 application/x-aim aim
87 application/x-apple-diskimage
88 application/x-arc
89 application/x-archive a
90 application/x-atari-7800-rom a78
91 application/x-authorware-bin aab
92 application/x-authorware-map aam
93 application/x-authorware-seg aas
94 application/x-avira-qua
95 application/x-bcpio bcpio
96 application/x-bittorrent torrent
97 application/x-bsh bsh
98 application/x-bytecode.python pyc
99 application/x-bzip2 boz|bz2
100 application/x-bzip bz
101 application/x-cbr cbr
102 application/x-cbz cbz
103 application/x-cdlink vcd
104 application/x-chat cha|chat
105 application/x-chrome-extension
106 application/x-cocoa cco
107 application/x-conference nsc
108 application/x-coredump
109 application/x-cpio cpio
110 application/x-dbf dbf
111 application/x-dbt
112 application/x-debian-package deb
113 application/x-deepv deepv
114 application/x-director dcr|dir|dxr
115 application/x-dmp dmp
116 application/x-dosdriver
117 application/x-dosexec dll
118 application/x-dvi dvi
119 application/x-elc elc
120 application/x-empty
121 application/x-envoy env|evy
122 application/x-esrehber es
123 application/x-excel xla|xld|xlk|xlt|xlv
124 application/x-executable exe
125 application/x-font-gdos
126 application/x-font-pf2 pf2
127 application/x-font-pfm pfm
128 application/x-font-sfn
129 application/x-font-ttf ttf|ttc
130 application/x-fptapplication/x-dbt
131 application/x-freelance pre
132 application/x-gamecube-rom
133 application/x-gdbm
134 application/x-gettext-translation
135 application/x-git
136 application/x-gsp gsp
137 application/x-gss gss
141 application/x-helpfile help
142 application/x-httpd-imap imap
143 application/x-ima ima
144 application/x-innosetup
145 application/x-internett-signup ins
146 application/x-inventor iv
147 application/x-ip2 ip
148 application/x-java-applet
149 application/x-java-commerce jcm
150 application/x-java-image
151 application/x-java-jmod jmod
152 application/x-java-keystore
153 application/x-kdelnk
154 application/x-koan skd|skm|skp|skt
155 application/x-latex latex|ltx
156 application/x-livescreen ivy
157 application/x-lotus wq1
158 application/x-lz4+json jsonlz4
159 application/x-lz4 lz4
160 application/x-lz4 lz4
161 application/x-lzh-compressed
162 application/x-lzh lzh
163 application/x-lzip lz
164 application/x-lzma lzma
165 application/x-lzop lzo
166 application/x-lzx lzx
167 application/x-mach-binary jnilib|dylib
168 application/x-mach-executable
169 application/x-magic-cap-package-1.0 mc$
170 application/x-mathcad mcd
171 application/x-maxis-dbpf
172 application/x-meme mm
173 application/x-midi midi
174 application/x-mif mif
175 application/x-mix-transfer nix
176 application/xml opf
177 application/x-mobipocket-ebook mobi
178 application/vnd.amazon.mobi8-ebook azw|azw3
179 application/x-msaccess accdb
180 application/x-ms-compress-szdd fon
181 application/x-ms-pdb pdb
182 application/x-ms-reader lit
183 application/x-n64-rom z64
184 application/x-navi-animation ani
185 application/x-navidoc nvd
186 application/x-navimap map
187 application/x-navistyle stl
188 application/x-nes-rom nes
189 application/x-netcdf cdf|nc
190 application/x-newton-compatible-pkg pkg
191 application/x-nintendo-ds-rom
192 application/x-object o
193 application/x-omcdatamaker omcd
194 application/x-omc omc
195 application/x-omcregerator omcr
196 application/x-pagemaker pm4|pm5
197 application/x-pcl pcl
198 application/x-pgp-keyring
199 application/x-pixclscript plx
200 application/x-pkcs7-certreqresp p7r
201 application/x-pkcs7-signature p7a
202 application/x-project mpc|mpt|mpv|mpx
203 application/x-qpro wb1
204 application/x-rar rar
205 application/x-rpm rpm
206 application/x-sdp sdp
207 application/x-sea sea
208 application/x-seelogo sl
210 application/x-sharedlib so
211 application/x-shar shar
212 application/x-shockwave-flash swf
213 application/x-snappy-framed
214 application/x-sprite spr|sprite
215 application/x-sqlite3
216 application/x-stargallery-thm
217 application/x-stuffit sit
218 application/x-sv4cpio sv4cpio
219 application/x-sv4crc sv4crc
220 application/x-tar tar
221 application/x-tbook sbk|tbk
222 application/x-terminfo
223 application/x-terminfo2
224 application/x-texinfo texi|texinfo
225 application/x-tex-tfm tfm
226 application/x-ustar ustar
229 application/x-vnd.ls-xpix xpix
230 application/x-vrml vrml
231 application/x-wais-source src|wsrc
232 application/x-wine-extension-ini
233 application/x-wintalk wtk
234 application/x-world svr
235 application/x-wri wri
236 application/x-x509-ca-cert der
237 application/x-xz xz
238 application/x-zip
239 application/x-zstd zst
240 application/zip zip
241 application/zlib z
242 !audio/basic au
243 audio/it it
244 audio/make funk|my|pfunk
245 audio/midi kar
246 audio/mid rmi
247 audio/mp4 m4b
248 audio/mpeg m2a|mpa
249 audio/ogg ogg
250 audio/s3m s3m
252 audio/tsplayer tsp
253 audio/vnd.qcelp qcp
254 audio/voxware vox
255 audio/x-aiff aiff|aif
256 audio/x-flac flac
257 audio/x-gsm gsd|gsm
258 audio/x-hx-aac-adts
259 audio/x-jam jam
260 audio/x-liveaudio lam
261 audio/x-m4a m4a
269 audio/x-pn-realaudio ram|rm|rmm|rmp
270 audio/x-psid sid
271 audio/x-realaudio ra
272 audio/x-s3m
273 audio/x-twinvq-plugin vqe|vql
274 audio/x-twinvq vqf
275 audio/x-voc voc
276 audio/x-wav wav
277 !audio/x-xbox360-executable xex
278 !audio/x-xbox-executable xbe
279 font/otf
280 font/sfnt
281 font/woff2 woff2
282 font/woff woff
283 image/bmp
284 image/cmu-raster rast
285 image/fif fif
286 image/florian flo|turbot
287 image/g3fax g3
288 image/gif gif
289 image/heic heic
290 image/ief ief|iefs
291 image/jpeg jfif|jfif-tbnl|jpe|jpeg|jpg
292 image/jutvision jut
295 image/png png|x-png
296 !image/svg svg
297 !image/svg+xml
298 image/tiff
299 !image/vnd.adobe.photoshop psd
300 !image/vnd.djvu djvu
301 image/vnd.fpx fpx
302 image/vnd.microsoft.icon
303 image/vnd.rn-realflash rf
305 image/vnd.wap.wbmp wbmp
306 image/vnd.xiff xif
307 image/webp webp
308 image/wmf
309 image/x-3ds 3ds
310 image/x-award-bioslogo
311 image/x-cmu-raster ras
312 image/x-cur tga
313 image/x-dwg dwg|dxf|svf
314 image/x-eps
315 image/x-exr exr
316 image/x-gem
317 image/x-icns
318 !image/x-icon ico
319 image/x-jg art
327 image/x-portable-pixmap ppm
328 image/x-quicktime qif|qti|qtif
329 image/x-rgb rgb
330 image/x-tga
331 image/x-tiff tif|tiff
332 image/tiff image/x-win-bitmap
333 !image/x-xcf xcf
334 !image/x-xpixmap xpm
335 image/x-xwindowdump xwd
336 message/news
337 message/rfc822 mht|mhtml|mime
338 model/vnd.dwf dwf
339 model/vnd.gdl gdl
340 model/vnd.gs.gdl gdsl
341 model/vrml wrz
342 model/x-pov pov
343 text/asp asp
344 text/css css
text/x-sass sass
text/x-scss scss
345 text/html acgi|htm|html|htmls|htx|shtml
346 text/javascript js
347 text/mcf mcf
348 text/pascal pas
349 text/PGP
350 text/plain com|cmd|conf|def|g|idc|list|lst|mar|sdml|text|txt|md|groovy|license|properties|desktop|ini|rst|cmake|ipynb|readme|less|lo|go|yml|d|cs|hpp|srt|nfo|sfv|m3u|csv|eml
351 text/richtext rt|rtf|rtx
352 text/rtf
353 text/scriplet wsc
text/x-awk awk
!video/x-jng jng
video/x-mng mng
image/x-cur tga
image/x-xwindowdump xwd
!image/vnd.adobe.photoshop psd
354 text/tab-separated-values tsv
355 text/troff man|me|ms|roff|t|tr
356 text/uri-list uji|unis|uri|uris
362 text/x-Algol68
363 text/x-asm asm|s
364 text/x-audiosoft-intra aip
365 text/x-awk awk
366 text/x-bcpl
367 text/x-c c|cc|h
368 text/x-c++ cpp|cxx|c++
377 text/xml xml|pom|iml|plist
378 text/x-m m
379 text/x-msdos-batch bat
380 text/x-ms-regedit reg
381 text/x-objective-c
382 text/x-pascal p
383 text/x-perl pl
384 text/x-php php
385 text/x-po po
386 text/x-python py
387 text/x-ruby rb
388 text/x-sass sass
389 text/x-scss scss
390 text/x-server-parsed-html ssi
391 text/x-setext etx
392 text/x-sgml sgm|sgml
393 text/x-shellscript sh
394 text/x-speech talk
395 text/x-tcl
396 text/x-tex tex
397 text/x-uil uil
398 text/x-uuencode uue
399 text/x-vcalendar vcs
400 text/x-vcard vcf
401 video/animaflex afl
402 video/avi avi
403 video/avs-video avs
404 video/MP2T
405 video/mp4 mp4
406 video/mpeg m1v|m2v|mpe|mpeg|mpg
407 video/quicktime moov|mov|qt
416 video/x-dl dl
417 video/x-dv dif|dv
418 video/x-fli fli
419 video/x-flv flv
420 video/x-isvideo isu
421 !video/x-jng jng
422 video/x-m4v m4v
423 video/x-matroska mkv
424 video/x-mng mng
425 video/x-motion-jpeg mjpg
426 video/x-ms-asf asf|asx|wmv
427 video/x-msvideo divx
428 video/x-qtc qtc
429 video/x-sgi-movie movie|mv
application/x-7z-compressed 7z
application/vnd.openxmlformats-officedocument.wordprocessingml.document docx
text/x-po po
application/x-rpm rpm
application/x-debian-package deb
application/vnd.iccprofile icm
application/dicom dcm
image/x-exr exr
application/vnd.iccprofile icm
video/x-matroska mkv
application/x-empty
model/vnd.gdl gdl
model/vnd.gs.gdl gdsl
font/woff woff
font/woff2 woff2
application/epub+zip epub
application/x-mobipocket-ebook mobi
audio/x-flac flac
application/x-rar rar
video/x-msvideo divx
video/x-flv flv
application/x-kdelnk
text/x-tcl
application/ogg ogv
application/vnd.openxmlformats-officedocument.spreadsheetml.sheet xlsx
application/vnd.ms-cab-compressed cab
audio/mp4 m4b
!image/vnd.djvu djvu
application/x-ms-reader lit
application/CDFV2-corrupt
text/x-vcard vcf
application/x-innosetup
application/winhelp hlp
image/x-tga
application/x-wine-extension-ini
application/x-cbz cbz
application/x-cbr cbr
application/x-ms-compress-szdd fon
application/x-atari-7800-rom a78
application/x-nes-rom nes
application/x-font-pfm pfm
application/x-gettext-translation
image/wmf
application/pgp-keys
image/x-3ds 3ds
application/x-lz4 lz4
application/vnd.openxmlformats-officedocument.presentationml.presentation pptx
application/vnd.oasis.opendocument.presentation odp
application/x-msaccess accdb
application/vnd.oasis.opendocument.spreadsheet ods
audio/x-aiff aiff|aif
text/x-ms-regedit reg
application/x-gamecube-rom
application/x-nintendo-ds-rom
text/x-objective-c
application/x-font-gdos
application/x-apple-diskimage
application/x-zstd zst
video/x-m4v m4v
message/news
application/vnd.symbian.install
application/x-lzh-compressed
application/x-dosdriver
application/vnd.tcpdump.pcap pcap
430 x-epoc/x-sisx-app
431 application/x-avira-qua application/x-zstd-dictionary
432 video/MP2T application/vnd.ms-outlook
application/x-snappy-framed
application/x-lz4+json jsonlz4
application/x-dmp dmp
application/zlib z
application/x-pgp-keyring
application/x-gdbm
application/x-font-pf2 pf2
application/x-zip
application/x-coredump
application/x-java-jmod jmod
application/x-terminfo
application/x-terminfo2
application/x-arc
application/vnd.lotus-1-2-3
image/x-win-bitmap
application/x-maxis-dbpf
text/PGP
audio/x-hx-aac-adts
application/x-chrome-extension
image/heic heic
image/x-gem
application/x-lzma lzma
application/warc warc
application/x-lz4 lz4
application/x-lzip lz
application/x-lzop lzo

View File

@@ -62,6 +62,17 @@ doc = (
"application/vnd.openxmlformats-officedocument.presentationml.presentation"
)
mobi = (
"application/x-mobipocket-ebook",
"application/vnd.amazon.mobi8-ebook"
)
markup = (
"text/xml",
"text/html",
"text/x-sgml"
)
cnt = 1
@@ -82,6 +93,10 @@ def mime_id(mime):
mime_id += " | 0x08000000"
elif mime in doc:
mime_id += " | 0x04000000"
elif mime in mobi:
mime_id += " | 0x02000000"
elif mime in markup:
mime_id += " | 0x01000000"
elif mime == "application/x-empty":
return "1"
return mime_id
@@ -91,7 +106,7 @@ def clean(t):
return t.replace("/", "_").replace(".", "_").replace("+", "_").replace("-", "_")
with open("mime.csv") as f:
with open("scripts/mime.csv") as f:
for l in f:
mime, ext_list = l.split(",")
if l.startswith("!"):
@@ -103,7 +118,7 @@ with open("mime.csv") as f:
print("// **Generated by mime.py**")
print("#ifndef MIME_GENERATED_C")
print("#define MIME_GENERATED_C")
print("#include <glib-2.0/glib.h>\n")
print("#include <glib.h>\n")
print("#include <stdlib.h>\n")
# Enum
print("enum mime {")

View File

@@ -1,10 +1,12 @@
files = [
"web/css/bundle.css",
"web/css/bundle_dark.css",
"web/js/bundle.js",
"web/img/sprite-skin-flat.png",
"web/img/sprite-skin-flat-dark.png",
"web/search.html",
"src/static/css/bundle.css",
"src/static/css/bundle_dark.css",
"src/static/js/bundle.js",
"src/static/js/search.js",
"src/static/img/sprite-skin-flat.png",
"src/static/img/sprite-skin-flat-dark.png",
"src/static/search.html",
"src/static/stats.html",
]

View File

@@ -1,5 +1,6 @@
#include "cli.h"
#include "ctx.h"
#include <tesseract/capi.h>
#define DEFAULT_OUTPUT "index.sist2/"
#define DEFAULT_CONTENT_SIZE 32768
@@ -10,8 +11,8 @@
#define DEFAULT_ES_URL "http://localhost:9200"
#define DEFAULT_BATCH_SIZE 100
#define DEFAULT_BIND_ADDR "localhost"
#define DEFAULT_PORT "4090"
#define DEFAULT_LISTEN_ADDRESS "localhost:4090"
#define DEFAULT_TREEMAP_THRESHOLD 0.0005
const char* TESS_DATAPATHS[] = {
"/usr/share/tessdata/",
@@ -180,6 +181,12 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
ScanCtx.exclude = NULL;
}
if (args->treemap_threshold_str == 0) {
args->treemap_threshold = DEFAULT_TREEMAP_THRESHOLD;
} else {
args->treemap_threshold = atof(args->treemap_threshold_str);
}
LOG_DEBUGF("cli.c", "arg quality=%f", args->quality)
LOG_DEBUGF("cli.c", "arg size=%d", args->size)
LOG_DEBUGF("cli.c", "arg content_size=%d", args->content_size)
@@ -195,6 +202,7 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
LOG_DEBUGF("cli.c", "arg tesseract_path=%s", args->tesseract_path)
LOG_DEBUGF("cli.c", "arg exclude=%s", args->exclude_regex)
LOG_DEBUGF("cli.c", "arg fast=%d", args->fast)
LOG_DEBUGF("cli.c", "arg treemap_threshold=%f", args->treemap_threshold)
return 0;
}
@@ -275,18 +283,28 @@ int web_args_validate(web_args_t *args, int argc, const char **argv) {
args->es_url = DEFAULT_ES_URL;
}
if (args->bind == NULL) {
args->bind = DEFAULT_BIND_ADDR;
}
if (args->port == NULL) {
args->port = DEFAULT_PORT;
if (args->listen_address == NULL) {
args->listen_address = DEFAULT_LISTEN_ADDRESS;
}
if (args->credentials != NULL) {
args->b64credentials = onion_base64_encode(args->credentials, (int) strlen(args->credentials));
//Remove trailing newline
*(args->b64credentials + strlen(args->b64credentials) - 1) = '\0';
char * ptr = strstr(args->credentials, ":");
if (ptr == NULL) {
fprintf(stderr, "Invalid --auth format, see usage\n");
return 1;
}
strncpy(args->auth_user, args->credentials, (ptr - args->credentials));
strncpy(args->auth_pass, ptr + 1, strlen(ptr + 1));
if (strlen(args->auth_user) == 0) {
fprintf(stderr, "--auth username must be at least one character long");
return 1;
}
args->auth_enabled = TRUE;
} else {
args->auth_enabled = FALSE;
}
args->index_count = argc - 1;
@@ -301,10 +319,10 @@ int web_args_validate(web_args_t *args, int argc, const char **argv) {
}
LOG_DEBUGF("cli.c", "arg es_url=%s", args->es_url)
LOG_DEBUGF("cli.c", "arg bind=%s", args->bind)
LOG_DEBUGF("cli.c", "arg port=%s", args->port)
LOG_DEBUGF("cli.c", "arg listen=%s", args->listen_address)
LOG_DEBUGF("cli.c", "arg credentials=%s", args->credentials)
LOG_DEBUGF("cli.c", "arg b64credentials=%s", args->b64credentials)
LOG_DEBUGF("cli.c", "arg auth_user=%s", args->auth_user)
LOG_DEBUGF("cli.c", "arg auth_pass=%s", args->auth_pass)
LOG_DEBUGF("cli.c", "arg index_count=%d", args->index_count)
for (int i = 0; i < args->index_count; i++) {
LOG_DEBUGF("cli.c", "arg indices[%d]=%s", i, args->indices[i])

View File

@@ -3,6 +3,8 @@
#include "sist.h"
#include "libscan/arc/arc.h"
typedef struct scan_args {
float quality;
int size;
@@ -20,6 +22,8 @@ typedef struct scan_args {
const char *tesseract_path;
char *exclude_regex;
int fast;
const char* treemap_threshold_str;
double treemap_threshold;
} scan_args_t;
scan_args_t *scan_args_create();
@@ -40,10 +44,11 @@ typedef struct index_args {
typedef struct web_args {
char *es_url;
char *bind;
char *port;
char *listen_address;
char *credentials;
char *b64credentials;
char auth_user[256];
char auth_pass[256];
int auth_enabled;
int index_count;
const char **indices;
} web_args_t;

View File

@@ -2,7 +2,21 @@
#define SIST2_CTX_H
#include "sist.h"
#include "tpool.h"
#include "libscan/scan.h"
#include "libscan/arc/arc.h"
#include "libscan/cbr/cbr.h"
#include "libscan/ebook/ebook.h"
#include "libscan/font/font.h"
#include "libscan/media/media.h"
#include "libscan/ooxml/ooxml.h"
#include "libscan/text/text.h"
#include "libscan/mobi/scan_mobi.h"
#include <glib.h>
#include <pcre.h>
//TODO Move to individual scan ctx
struct {
struct index_t index;
@@ -11,14 +25,8 @@ struct {
tpool_t *pool;
int tn_size;
int threads;
int content_size;
float tn_qscale;
int depth;
archive_mode_t archive_mode;
int verbose;
int very_verbose;
size_t stat_tn_size;
size_t stat_index_size;
@@ -26,12 +34,18 @@ struct {
GHashTable *original_table;
GHashTable *copy_table;
pthread_mutex_t mupdf_mu;
char * tesseract_lang;
const char * tesseract_path;
pcre *exclude;
pcre_extra *exclude_extra;
int fast;
scan_arc_ctx_t arc_ctx;
scan_cbr_ctx_t cbr_ctx;
scan_ebook_ctx_t ebook_ctx;
scan_font_ctx_t font_ctx;
scan_media_ctx_t media_ctx;
scan_ooxml_ctx_t ooxml_ctx;
scan_text_ctx_t text_ctx;
scan_mobi_ctx_t mobi_ctx;
} ScanCtx;
struct {
@@ -48,7 +62,9 @@ struct {
struct {
char *es_url;
int index_count;
char *b64credentials;
char *auth_user;
char *auth_pass;
int auth_enabled;
struct index_t indices[16];
} WebCtx;

View File

@@ -1,6 +1,8 @@
#include "elastic.h"
#include "src/ctx.h"
#include "web.h"
#include "static_generated.c"
@@ -64,7 +66,7 @@ void execute_update_script(const char *script, const char index_id[UUID_STR_LEN]
char bulk_url[4096];
snprintf(bulk_url, 4096, "%s/sist2/_update_by_query?pretty", Indexer->es_url);
response_t *r = web_post(bulk_url, str, "Content-Type: application/json");
response_t *r = web_post(bulk_url, str);
LOG_INFOF("elastic.c", "Executed user script <%d>", r->status_code);
cJSON *resp = cJSON_Parse(r->body);
@@ -116,8 +118,12 @@ void *create_bulk_buffer(int max, int *count, size_t *buf_len) {
return buf;
}
void *print_errors(response_t *r) {
cJSON *ret_json = cJSON_Parse(r->body);
void print_errors(response_t *r) {
char * tmp = malloc(r->size + 1);
memcpy(tmp, r->body, r->size);
*(tmp + r->size) = '\0';
cJSON *ret_json = cJSON_Parse(tmp);
if (cJSON_GetObjectItem(ret_json, "errors")->valueint != 0) {
cJSON *err;
cJSON_ArrayForEach(err, cJSON_GetObjectItem(ret_json, "items")) {
@@ -129,16 +135,23 @@ void *print_errors(response_t *r) {
}
}
cJSON_Delete(ret_json);
free(tmp);
}
void _elastic_flush(int max) {
if (max == 0) {
LOG_WARNING("elastic.c", "calling _elastic_flush with 0 in queue")
return;
}
size_t buf_len;
int count;
void *buf = create_bulk_buffer(max, &count, &buf_len);
char bulk_url[4096];
snprintf(bulk_url, 4096, "%s/sist2/_bulk?pipeline=tie", Indexer->es_url);
response_t *r = web_post(bulk_url, buf, "Content-Type: application/x-ndjson");
response_t *r = web_post(bulk_url, buf);
if (r->status_code == 0) {
LOG_FATALF("elastic.c", "Could not connect to %s, make sure that elasticsearch is running!\n", IndexCtx.es_url)
@@ -246,7 +259,7 @@ void destroy_indexer(char *script, char index_id[UUID_STR_LEN]) {
char url[4096];
snprintf(url, sizeof(url), "%s/sist2/_refresh", IndexCtx.es_url);
response_t *r = web_post(url, "", NULL);
response_t *r = web_post(url, "");
LOG_INFOF("elastic.c", "Refresh index <%d>", r->status_code);
free_response(r);
@@ -255,12 +268,12 @@ void destroy_indexer(char *script, char index_id[UUID_STR_LEN]) {
}
snprintf(url, sizeof(url), "%s/sist2/_refresh", IndexCtx.es_url);
r = web_post(url, "", NULL);
r = web_post(url, "");
LOG_INFOF("elastic.c", "Refresh index <%d>", r->status_code);
free_response(r);
snprintf(url, sizeof(url), "%s/sist2/_forcemerge", IndexCtx.es_url);
r = web_post(url, "", NULL);
r = web_post(url, "");
LOG_INFOF("elastic.c", "Merge index <%d>", r->status_code);
free_response(r);
@@ -285,32 +298,32 @@ void elastic_init(int force_reset) {
free_response(r);
snprintf(url, 4096, "%s/sist2", IndexCtx.es_url);
r = web_put(url, "", NULL);
r = web_put(url, "");
LOG_INFOF("elastic.c", "Create index <%d>", r->status_code);
free_response(r);
snprintf(url, 4096, "%s/sist2/_close", IndexCtx.es_url);
r = web_post(url, "", NULL);
r = web_post(url, "");
LOG_INFOF("elastic.c", "Close index <%d>", r->status_code);
free_response(r);
snprintf(url, 4096, "%s/_ingest/pipeline/tie", IndexCtx.es_url);
r = web_put(url, pipeline_json, "Content-Type: application/json");
r = web_put(url, pipeline_json);
LOG_INFOF("elastic.c", "Create pipeline <%d>", r->status_code);
free_response(r);
snprintf(url, 4096, "%s/sist2/_settings", IndexCtx.es_url);
r = web_put(url, settings_json, "Content-Type: application/json");
r = web_put(url, settings_json);
LOG_INFOF("elastic.c", "Update settings <%d>", r->status_code);
free_response(r);
snprintf(url, 4096, "%s/sist2/_mappings/_doc?include_type_name=true", IndexCtx.es_url);
r = web_put(url, mappings_json, "Content-Type: application/json");
r = web_put(url, mappings_json);
LOG_INFOF("elastic.c", "Update mappings <%d>", r->status_code);
free_response(r);
snprintf(url, 4096, "%s/sist2/_open", IndexCtx.es_url);
r = web_post(url, "", NULL);
r = web_post(url, "");
LOG_INFOF("elastic.c", "Open index <%d>", r->status_code);
free_response(r);
}

File diff suppressed because one or more lines are too long

View File

@@ -1,131 +1,154 @@
#include "web.h"
#include "src/sist.h"
#include "src/ctx.h"
size_t write_cb(char *ptr, size_t size, size_t nmemb, void *user_data) {
#include <mongoose.h>
#include <pthread.h>
size_t real_size = size * nmemb;
dyn_buffer_t *buf = user_data;
dyn_buffer_write(buf, ptr, real_size);
return real_size;
}
void free_response(response_t *resp) {
if (resp->body != NULL) {
free(resp->body);
}
free(resp);
}
#define SIST2_HEADERS "User-Agent: sist2\r\nContent-Type: application/json\r\n"
void http_req_ev(struct mg_connection *nc, int ev, void *ptr) {
http_ev_data_t *ev_data = (http_ev_data_t *) nc->user_data;
switch (ev) {
case MG_EV_CONNECT: {
int connect_status = *(int *) ptr;
if (connect_status != 0) {
ev_data->done = TRUE;
ev_data->resp->status_code = 0;
}
break;
}
case MG_EV_HTTP_REPLY: {
struct http_message *hm = (struct http_message *) ptr;
//TODO: Check errors?
ev_data->resp->size = hm->body.len;
ev_data->resp->status_code = hm->resp_code;
ev_data->resp->body = malloc(hm->body.len + 1);
memcpy(ev_data->resp->body, hm->body.p, hm->body.len);
*(ev_data->resp->body + hm->body.len) = '\0';
ev_data->done = TRUE;
break;
}
case MG_EV_CLOSE: {
ev_data->done = TRUE;
break;
}
default:
break;
}
}
subreq_ctx_t *http_req(const char *url, const char *extra_headers, const char *post_data, const char *method) {
struct mg_str scheme;
struct mg_str user_info;
struct mg_str host;
unsigned int port;
struct mg_str path;
struct mg_str query;
struct mg_str fragment;
if (post_data == NULL) post_data = "";
if (extra_headers == NULL) extra_headers = "";
if (path.len == 0) path = mg_mk_str("/");
if (host.len == 0) host = mg_mk_str("");
// [scheme://[user_info@]]host[:port][/path][?query][#fragment]
mg_parse_uri(mg_mk_str(url), &scheme, &user_info, &host, &port, &path, &query, &fragment);
if (query.len > 0) path.len += query.len + 1;
subreq_ctx_t *ctx = malloc(sizeof(subreq_ctx_t));
mg_mgr_init(&ctx->mgr, NULL);
char address[8196];
snprintf(address, sizeof(address), "tcp://%.*s:%u", (int) host.len, host.p, port);
struct mg_connection *nc = mg_connect(&ctx->mgr, address, http_req_ev);
nc->user_data = &ctx->ev_data;
mg_set_protocol_http_websocket(nc);
ctx->ev_data.resp = calloc(1, sizeof(response_t));
ctx->ev_data.done = FALSE;
mg_printf(
nc, "%s %.*s HTTP/1.1\r\n"
"Host: %.*s\r\n"
"Content-Length: %zu\r\n"
"%s\r\n"
"%s",
method, (int) path.len, path.p,
(int) (path.p - host.p), host.p,
strlen(post_data),
extra_headers,
post_data
);
return ctx;
}
response_t *web_get(const char *url) {
response_t *resp = malloc(sizeof(response_t));
subreq_ctx_t *ctx = http_req(url, SIST2_HEADERS, NULL, "GET");
while (ctx->ev_data.done == FALSE) {
mg_mgr_poll(&ctx->mgr, 50);
}
mg_mgr_free(&ctx->mgr);
CURL *curl;
dyn_buffer_t buffer = dyn_buffer_create();
curl = curl_easy_init();
curl_easy_setopt(curl, CURLOPT_URL, url);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *) (&buffer));
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb);
curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
curl_easy_perform(curl);
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &resp->status_code);
curl_easy_cleanup(curl);
resp->body = buffer.buf;
resp->size = buffer.cur;
return resp;
response_t *ret = ctx->ev_data.resp;
free(ctx);
return ret;
}
response_t *web_post(const char *url, const char *data, const char *header) {
response_t *resp = malloc(sizeof(response_t));
CURL *curl;
dyn_buffer_t buffer = dyn_buffer_create();
curl = curl_easy_init();
curl_easy_setopt(curl, CURLOPT_URL, url);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *) (&buffer));
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb);
curl_easy_setopt(curl, CURLOPT_POST, 1);
curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
struct curl_slist *headers = NULL;
if (header != NULL) {
headers = curl_slist_append(headers, header);
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
}
curl_easy_setopt(curl, CURLOPT_POSTFIELDS, data);
curl_easy_perform(curl);
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &resp->status_code);
curl_easy_cleanup(curl);
curl_slist_free_all(headers);
resp->body = buffer.buf;
resp->size = buffer.cur;
return resp;
subreq_ctx_t *web_post_async(const char *url, const char *data) {
return http_req(url, SIST2_HEADERS, data, "POST");
}
response_t *web_post(const char *url, const char *data) {
subreq_ctx_t *ctx = http_req(url, SIST2_HEADERS, data, "POST");
response_t *web_put(const char *url, const char *data, const char *header) {
response_t *resp = malloc(sizeof(response_t));
CURL *curl;
dyn_buffer_t buffer = dyn_buffer_create();
curl = curl_easy_init();
curl_easy_setopt(curl, CURLOPT_URL, url);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *) (&buffer));
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb);
curl_easy_setopt(curl, CURLOPT_CUSTOMREQUEST, "PUT");
curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
curl_easy_setopt(curl, CURLOPT_DNS_USE_GLOBAL_CACHE, 0);
curl_easy_setopt(curl, CURLOPT_IPRESOLVE, CURLOPT_DNS_LOCAL_IP4 );
if (header != NULL) {
struct curl_slist *headers = NULL;
headers = curl_slist_append(headers, header);
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
while (ctx->ev_data.done == FALSE) {
mg_mgr_poll(&ctx->mgr, 50);
}
mg_mgr_free(&ctx->mgr);
curl_easy_setopt(curl, CURLOPT_POSTFIELDS, data);
response_t *ret = ctx->ev_data.resp;
free(ctx);
return ret;
}
curl_easy_perform(curl);
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &resp->status_code);
response_t *web_put(const char *url, const char *data) {
subreq_ctx_t *ctx = http_req(url, SIST2_HEADERS, data, "PUT");
while (ctx->ev_data.done == FALSE) {
mg_mgr_poll(&ctx->mgr, 50);
}
mg_mgr_free(&ctx->mgr);
curl_easy_cleanup(curl);
resp->body = buffer.buf;
resp->size = buffer.cur;
return resp;
response_t *ret = ctx->ev_data.resp;
free(ctx);
return ret;
}
response_t *web_delete(const char *url) {
subreq_ctx_t *ctx = http_req(url, SIST2_HEADERS, NULL, "DELETE");
while (ctx->ev_data.done == FALSE) {
mg_mgr_poll(&ctx->mgr, 50);
}
mg_mgr_free(&ctx->mgr);
response_t *resp = malloc(sizeof(response_t));
CURL *curl;
dyn_buffer_t buffer = dyn_buffer_create();
curl = curl_easy_init();
curl_easy_setopt(curl, CURLOPT_URL, url);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *) (&buffer));
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb);
curl_easy_setopt(curl, CURLOPT_CUSTOMREQUEST, "DELETE");
curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
curl_easy_setopt(curl, CURLOPT_POSTFIELDS, "");
curl_easy_perform(curl);
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &resp->status_code);
curl_easy_cleanup(curl);
resp->body = buffer.buf;
resp->size = buffer.cur;
return resp;
response_t *ret = ctx->ev_data.resp;
free(ctx);
return ret;
}

View File

@@ -2,6 +2,7 @@
#define SIST2_WEB_H
#include "src/sist.h"
#include <mongoose.h>
typedef struct response {
char *body;
@@ -9,9 +10,20 @@ typedef struct response {
int status_code;
} response_t;
typedef struct {
response_t *resp;
int done;
} http_ev_data_t;
typedef struct {
http_ev_data_t ev_data;
struct mg_mgr mgr;
} subreq_ctx_t;
response_t *web_get(const char *url);
response_t *web_post(const char * url, const char * data, const char* header);
response_t *web_put(const char *url, const char *data, const char *header);
response_t *web_post(const char * url, const char * data);
subreq_ctx_t *web_post_async(const char *url, const char *data);
response_t *web_put(const char *url, const char *data);
response_t *web_delete(const char *url);
void free_response(response_t *resp);

View File

@@ -1,5 +1,7 @@
#include "src/ctx.h"
#include "serialize.h"
#include "src/parsing/parse.h"
#include "src/parsing/mime.h"
static __thread int index_fd = -1;
@@ -40,10 +42,13 @@ void write_index_descriptor(char *path, index_descriptor_t *desc) {
int fd = open(path, O_CREAT | O_WRONLY, S_IRUSR | S_IWUSR);
if (fd < 0) {
LOG_FATALF("serialize.c", "Could not write index descriptor: %s", strerror(errno));
LOG_FATALF("serialize.c", "Could not open index descriptor: %s", strerror(errno));
}
char *str = cJSON_Print(json);
write(fd, str, strlen(str));
int ret = write(fd, str, strlen(str));
if (ret == -1) {
LOG_FATALF("serialize.c", "Could not write index descriptor: %s", strerror(errno));
}
free(str);
close(fd);
@@ -61,7 +66,10 @@ index_descriptor_t read_index_descriptor(char *path) {
}
char *buf = malloc(info.st_size + 1);
read(fd, buf, info.st_size);
int ret = read(fd, buf, info.st_size);
if (ret == -1) {
LOG_FATALF("serialize.c", "Could not read index descriptor: %s", strerror(errno));
}
*(buf + info.st_size) = '\0';
close(fd);
@@ -136,6 +144,12 @@ char *get_meta_key_text(enum metakey meta_key) {
return "exif_model";
case MetaExifDateTime:
return "exif_datetime";
case MetaAuthor:
return "author";
case MetaModifiedBy:
return "modified_by";
case MetaThumbnail:
return "thumbnail";
default:
return NULL;
}
@@ -170,11 +184,11 @@ void write_document(document_t *doc) {
dyn_buffer_write_char(&buf, meta->key);
if (IS_META_INT(meta->key)) {
dyn_buffer_write_int(&buf, meta->intval);
dyn_buffer_write_int(&buf, meta->int_val);
} else if (IS_META_LONG(meta->key)) {
dyn_buffer_write_long(&buf, meta->longval);
dyn_buffer_write_long(&buf, meta->long_val);
} else {
dyn_buffer_write_str(&buf, meta->strval);
dyn_buffer_write_str(&buf, meta->str_val);
}
meta_line_t *tmp = meta;
@@ -193,6 +207,8 @@ void write_document(document_t *doc) {
void thread_cleanup() {
close(index_fd);
cleanup_parse();
cleanup_font();
}
@@ -203,7 +219,7 @@ void read_index_bin(const char *path, const char *index_id, index_func func) {
FILE *file = fopen(path, "rb");
while (1) {
buf.cur = 0;
fread((void *) &line, 1, sizeof(line_t), file);
size_t _ = fread((void *) &line, 1, sizeof(line_t), file);
if (feof(file)) {
break;
}
@@ -235,42 +251,41 @@ void read_index_bin(const char *path, const char *index_id, index_func func) {
} else {
*(buf.buf + line.ext) = '\0';
}
cJSON_AddStringToObject(document, "name", buf.buf + line.base);
char tmp[PATH_MAX * 3];
str_escape(tmp, buf.buf + line.base);
cJSON_AddStringToObject(document, "name", tmp);
if (line.base > 0) {
*(buf.buf + line.base - 1) = '\0';
cJSON_AddStringToObject(document, "path", buf.buf);
str_escape(tmp, buf.buf);
cJSON_AddStringToObject(document, "path", tmp);
} else {
cJSON_AddStringToObject(document, "path", "");
}
enum metakey key = getc(file);
size_t ret = 0;
while (key != '\n') {
switch (key) {
case MetaWidth:
case MetaHeight: {
int value;
fread(&value, sizeof(int), 1, file);
ret = fread(&value, sizeof(int), 1, file);
cJSON_AddNumberToObject(document, get_meta_key_text(key), value);
break;
}
case MetaMediaDuration:
case MetaMediaBitrate: {
long value;
fread(&value, sizeof(long), 1, file);
ret = fread(&value, sizeof(long), 1, file);
cJSON_AddNumberToObject(document, get_meta_key_text(key), (double) value);
break;
}
case MetaMediaAudioCodec:
case MetaMediaVideoCodec: {
int value;
fread(&value, sizeof(int), 1, file);
const AVCodecDescriptor *desc = avcodec_descriptor_get(value);
if (desc != NULL) {
cJSON_AddStringToObject(document, get_meta_key_text(key), desc->name);
}
break;
}
case MetaMediaVideoCodec:
case MetaContent:
case MetaArtist:
case MetaAlbum:
@@ -287,6 +302,9 @@ void read_index_bin(const char *path, const char *index_id, index_func func) {
case MetaExifIsoSpeedRatings:
case MetaExifDateTime:
case MetaExifModel:
case MetaAuthor:
case MetaModifiedBy:
case MetaThumbnail:
case MetaTitle: {
buf.cur = 0;
while ((c = getc(file)) != 0) {
@@ -400,8 +418,8 @@ void incremental_read(GHashTable *table, const char *filepath) {
line_t line;
while (1) {
fread((void *) &line, 1, sizeof(line_t), file);
if (feof(file)) {
size_t ret = fread((void *) &line, 1, sizeof(line_t), file);
if (ret != 1 || feof(file)) {
break;
}
@@ -424,8 +442,8 @@ void incremental_copy(store_t *store, store_t *dst_store, const char *filepath,
line_t line;
while (1) {
fread((void *) &line, 1, sizeof(line_t), file);
if (feof(file)) {
size_t ret = fread((void *) &line, 1, sizeof(line_t), file);
if (ret != 1 || feof(file)) {
break;
}
@@ -453,11 +471,11 @@ void incremental_copy(store_t *store, store_t *dst_store, const char *filepath,
if (IS_META_INT(key)) {
int val;
fread(&val, sizeof(val), 1, file);
ret = fread(&val, sizeof(val), 1, file);
fwrite(&val, sizeof(val), 1, dst_file);
} else if (IS_META_LONG(key)) {
long val;
fread(&val, sizeof(val), 1, file);
ret = fread(&val, sizeof(val), 1, file);
fwrite(&val, sizeof(val), 1, dst_file);
} else {
while ((c = (char) getc(file))) {
@@ -465,6 +483,10 @@ void incremental_copy(store_t *store, store_t *dst_store, const char *filepath,
}
fwrite("\0", sizeof(c), 1, dst_file);
}
if (ret != 1) {
break;
}
}
} else {
skip_meta(file);

View File

@@ -2,7 +2,10 @@
#define SIST2_SERIALIZE_H
#include "src/sist.h"
#include "store.h"
#include <sys/syscall.h>
#include <glib.h>
typedef void(*index_func)(cJSON *, const char[UUID_STR_LEN]);

View File

@@ -11,8 +11,6 @@ typedef struct store_t {
pthread_rwlock_t lock;
} store_t;
#include "src/sist.h"
store_t *store_create(char *path);
void store_destroy(store_t *store);

View File

@@ -1,5 +1,8 @@
#include "walk.h"
#include "src/ctx.h"
#include "src/parsing/parse.h"
#include <ftw.h>
__always_inline
parse_job_t *create_fs_parse_job(const char *filepath, const struct stat *info, int base) {
@@ -15,12 +18,13 @@ parse_job_t *create_fs_parse_job(const char *filepath, const struct stat *info,
job->ext = len;
}
job->info = *info;
job->vfile.info = *info;
memset(job->parent, 0, 16);
job->vfile.filepath = job->filepath;
job->vfile.read = fs_read;
job->vfile.reset = fs_reset;
job->vfile.close = fs_close;
job->vfile.fd = -1;
job->vfile.is_fs_file = TRUE;

View File

@@ -3,8 +3,6 @@
#define _XOPEN_SOURCE 500
#include "src/sist.h"
int walk_directory_tree(const char *);
#endif

View File

@@ -1,5 +1,8 @@
#include "log.h"
#include <pthread.h>
#include <stdarg.h>
const char *log_colors[] = {
"\033[34m", "\033[01;34m", "\033[0m",
"\033[01;33m", "\033[31m", "\033[01;31m"
@@ -9,7 +12,7 @@ const char *log_levels[] = {
"DEBUG", "INFO", "WARNING", "ERROR", "FATAL"
};
void sist_logf(char *filepath, int level, char *format, ...) {
void vsist_logf(const char *filepath, int level, char *format, va_list ap) {
static int is_tty = -1;
if (is_tty == -1) {
@@ -31,23 +34,20 @@ void sist_logf(char *filepath, int level, char *format, ...) {
if (is_tty) {
log_len = snprintf(
log_str, sizeof(log_str),
"\033[%dm[%04X]%s [%s] [%s %s] ",
"\033[%dm[%04llX]%s [%s] [%s %s] ",
31 + ((unsigned int) (pid)) % 7, pid, log_colors[level],
datetime, log_levels[level], filepath
);
} else {
log_len = snprintf(
log_str, sizeof(log_str),
"[%04X] [%s] [%s %s] ",
"[%04llX] [%s] [%s %s] ",
pid, datetime, log_levels[level], filepath
);
}
va_list ap;
va_start(ap, format);
size_t maxsize = sizeof(log_str) - log_len;
log_len += vsnprintf(log_str + log_len, maxsize, format, ap);
va_end(ap);
if (is_tty) {
log_len += sprintf(log_str + log_len, "\033[0m\n");
@@ -56,10 +56,20 @@ void sist_logf(char *filepath, int level, char *format, ...) {
log_len += 1;
}
write(STDERR_FILENO, log_str, log_len);
int ret = write(STDERR_FILENO, log_str, log_len);
if (ret == -1) {
LOG_FATALF("serialize.c", "Could not write index descriptor: %s", strerror(errno))
}
}
void sist_log(char *filepath, int level, char *str) {
void sist_logf(const char *filepath, int level, char *format, ...) {
va_list ap;
va_start(ap, format);
vsist_logf(filepath, level, format, ap);
va_end(ap);
}
void sist_log(const char *filepath, int level, char *str) {
static int is_tty = -1;
if (is_tty == -1) {
@@ -81,7 +91,7 @@ void sist_log(char *filepath, int level, char *str) {
if (is_tty) {
log_len = snprintf(
log_str, sizeof(log_str),
"\033[%dm[%04X]%s [%s] [%s %s] %s \033[0m\n",
"\033[%dm[%04llX]%s [%s] [%s %s] %s \033[0m\n",
31 + ((unsigned int) (pid)) % 7, pid, log_colors[level],
datetime, log_levels[level], filepath,
str
@@ -89,11 +99,14 @@ void sist_log(char *filepath, int level, char *str) {
} else {
log_len = snprintf(
log_str, sizeof(log_str),
"[%04X] [%s] [%s %s] %s \n",
"[%04llX] [%s] [%s %s] %s \n",
pid, datetime, log_levels[level], filepath,
str
);
}
write(STDERR_FILENO, log_str, log_len);
int ret = write(STDERR_FILENO, log_str, log_len);
if (ret == -1) {
LOG_FATALF("serialize.c", "Could not write index descriptor: %s", strerror(errno));
}
}

View File

@@ -1,6 +1,7 @@
#ifndef SIST2_LOG_H
#define SIST2_LOG_H
#define LOG_MAX_LENGTH 8192
#define SIST_DEBUG 0
@@ -36,10 +37,11 @@
sist_log(filepath, SIST_FATAL, str);\
exit(-1);
#include "src/sist.h"
#include "sist.h"
void sist_logf(char *filepath, int level, char *format, ...);
void sist_logf(const char *filepath, int level, char *format, ...);
void vsist_logf(const char *filepath, int level, char *format, va_list ap);
void sist_log(char *filepath, int level, char *str);
void sist_log(const char *filepath, int level, char *str);
#endif

View File

@@ -1,12 +1,28 @@
#include "sist.h"
#include "ctx.h"
#include <third-party/argparse/argparse.h>
#include <glib.h>
#include <locale.h>
#include "cli.h"
#include "io/serialize.h"
#include "io/store.h"
#include "tpool.h"
#include "io/walk.h"
#include "index/elastic.h"
#include "web/serve.h"
#include "parsing/mime.h"
#include "parsing/parse.h"
#include "stats.h"
#define DESCRIPTION "Lightning-fast file system indexer and search tool."
#define EPILOG "Made by simon987 <me@simon987.net>. Released under GPL-3.0"
static const char *const Version = "1.3.1";
static const char *const Version = "2.3.0";
static const char *const usage[] = {
"sist2 scan [OPTION]... PATH",
"sist2 index [OPTION]... INDEX",
@@ -14,11 +30,6 @@ static const char *const usage[] = {
NULL,
};
void global_init() {
curl_global_init(CURL_GLOBAL_NOTHING);
av_log_set_level(AV_LOG_QUIET);
}
void init_dir(const char *dirpath) {
char path[PATH_MAX];
snprintf(path, PATH_MAX, "%sdescriptor.json", dirpath);
@@ -37,29 +48,122 @@ void scan_print_header() {
LOG_INFOF("main.c", "sist2 v%s", Version)
}
void sist2_scan(scan_args_t *args) {
void _store(char *key, size_t key_len, char *buf, size_t buf_len) {
store_write(ScanCtx.index.store, key, key_len, buf, buf_len);
}
void _log(const char *filepath, int level, char *str) {
if (level == LEVEL_FATAL) {
sist_log(filepath, level, str);
exit(-1);
}
if (LogCtx.verbose) {
if (level == LEVEL_DEBUG) {
if (LogCtx.very_verbose) {
sist_log(filepath, level, str);
}
} else {
sist_log(filepath, level, str);
}
}
}
void _logf(const char *filepath, int level, char *format, ...) {
va_list args;
va_start(args, format);
if (level == LEVEL_FATAL) {
sist_logf(filepath, level, format, args);
exit(-1);
}
if (LogCtx.verbose) {
if (level == LEVEL_DEBUG) {
if (LogCtx.very_verbose) {
vsist_logf(filepath, level, format, args);
}
} else {
sist_logf(filepath, level, format, args);
}
}
va_end(args);
}
void initialize_scan_context(scan_args_t *args) {
// Arc
ScanCtx.arc_ctx.mode = args->archive_mode;
ScanCtx.arc_ctx.log = _log;
ScanCtx.arc_ctx.logf = _logf;
ScanCtx.arc_ctx.parse = (parse_callback_t) parse;
// Cbr
ScanCtx.cbr_ctx.log = _log;
ScanCtx.cbr_ctx.logf = _logf;
ScanCtx.cbr_ctx.store = _store;
ScanCtx.cbr_ctx.cbr_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/x-cbr");
// Ebook
pthread_mutex_init(&ScanCtx.ebook_ctx.mupdf_mutex, NULL);
ScanCtx.ebook_ctx.content_size = args->content_size;
ScanCtx.ebook_ctx.tn_size = args->size;
ScanCtx.ebook_ctx.tesseract_lang = args->tesseract_lang;
ScanCtx.ebook_ctx.tesseract_path = args->tesseract_path;
ScanCtx.ebook_ctx.log = _log;
ScanCtx.ebook_ctx.logf = _logf;
ScanCtx.ebook_ctx.store = _store;
// Font
ScanCtx.font_ctx.enable_tn = args->size > 0;
ScanCtx.font_ctx.log = _log;
ScanCtx.font_ctx.logf = _logf;
ScanCtx.font_ctx.store = _store;
// Media
ScanCtx.media_ctx.tn_qscale = args->quality;
ScanCtx.media_ctx.tn_size = args->size;
ScanCtx.media_ctx.log = _log;
ScanCtx.media_ctx.logf = _logf;
ScanCtx.media_ctx.store = _store;
init_media();
// OOXML
ScanCtx.ooxml_ctx.content_size = args->content_size;
ScanCtx.ooxml_ctx.log = _log;
ScanCtx.ooxml_ctx.logf = _logf;
// MOBI
ScanCtx.mobi_ctx.content_size = args->content_size;
ScanCtx.mobi_ctx.log = _log;
ScanCtx.mobi_ctx.logf = _logf;
// TEXT
ScanCtx.text_ctx.content_size = args->content_size;
ScanCtx.text_ctx.log = _log;
ScanCtx.text_ctx.logf = _logf;
ScanCtx.tn_qscale = args->quality;
ScanCtx.tn_size = args->size;
ScanCtx.content_size = args->content_size;
ScanCtx.threads = args->threads;
ScanCtx.depth = args->depth;
ScanCtx.archive_mode = args->archive_mode;
strncpy(ScanCtx.index.path, args->output, sizeof(ScanCtx.index.path));
strncpy(ScanCtx.index.desc.name, args->name, sizeof(ScanCtx.index.desc.name));
strncpy(ScanCtx.index.desc.root, args->path, sizeof(ScanCtx.index.desc.root));
strncpy(ScanCtx.index.desc.rewrite_url, args->rewrite_url, sizeof(ScanCtx.index.desc.rewrite_url));
ScanCtx.index.desc.root_len = (short) strlen(ScanCtx.index.desc.root);
ScanCtx.tesseract_lang = args->tesseract_lang;
ScanCtx.tesseract_path = args->tesseract_path;
ScanCtx.fast = args->fast;
}
init_dir(ScanCtx.index.path);
void sist2_scan(scan_args_t *args) {
ScanCtx.mime_table = mime_get_mime_table();
ScanCtx.ext_table = mime_get_ext_table();
cbr_init();
initialize_scan_context(args);
init_dir(ScanCtx.index.path);
char store_path[PATH_MAX];
snprintf(store_path, PATH_MAX, "%sthumbs", ScanCtx.index.path);
@@ -105,6 +209,8 @@ void sist2_scan(scan_args_t *args) {
tpool_wait(ScanCtx.pool);
tpool_destroy(ScanCtx.pool);
generate_stats(&ScanCtx.index, args->treemap_threshold, ScanCtx.index.path);
if (args->incremental != NULL) {
char dst_path[PATH_MAX];
snprintf(store_path, PATH_MAX, "%sthumbs", args->incremental);
@@ -184,7 +290,9 @@ void sist2_web(web_args_t *args) {
WebCtx.es_url = args->es_url;
WebCtx.index_count = args->index_count;
WebCtx.b64credentials = args->b64credentials;
WebCtx.auth_user = args->auth_user;
WebCtx.auth_pass = args->auth_pass;
WebCtx.auth_enabled = args->auth_enabled;
for (int i = 0; i < args->index_count; i++) {
char *abs_path = abspath(args->indices[i]);
@@ -204,13 +312,12 @@ void sist2_web(web_args_t *args) {
free(abs_path);
}
serve(args->bind, args->port);
serve(args->listen_address);
}
int main(int argc, const char *argv[]) {
global_init();
setlocale(LC_ALL, "");
scan_args_t *scan_args = scan_args_create();
index_args_t *index_args = index_args_create();
@@ -249,6 +356,8 @@ int main(int argc, const char *argv[]) {
"which are installed on your machine)"),
OPT_STRING('e', "exclude", &scan_args->exclude_regex, "Files that match this regex will not be scanned"),
OPT_BOOLEAN(0, "fast", &scan_args->fast, "Only index file names & mime type"),
OPT_STRING(0, "treemap-threshold", &scan_args->treemap_threshold_str, "Relative size threshold for treemap "
"(see USAGE.md). DEFAULT: 0.0005"),
OPT_GROUP("Index options"),
OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url with port. DEFAULT=http://localhost:9200"),
@@ -260,8 +369,7 @@ int main(int argc, const char *argv[]) {
OPT_GROUP("Web options"),
OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url. DEFAULT=http://localhost:9200"),
OPT_STRING(0, "bind", &web_args->bind, "Listen on this address. DEFAULT=localhost"),
OPT_STRING(0, "port", &web_args->port, "Listen on this port. DEFAULT=4090"),
OPT_STRING(0, "bind", &web_args->listen_address, "Listen on this address. DEFAULT=localhost:4090"),
OPT_STRING(0, "auth", &web_args->credentials, "Basic auth in user:password format"),
OPT_END(),

View File

@@ -1,155 +0,0 @@
#include "arc.h"
#include "src/ctx.h"
int should_parse_filtered_file(const char *filepath, int ext) {
char tmp[PATH_MAX * 2];
if (ext == 0) {
return FALSE;
}
memcpy(tmp, filepath, ext - 1);
*(tmp + ext - 1) = '\0';
char *idx = strrchr(tmp, '.');
if (idx == NULL) {
return FALSE;
}
if (strcmp(idx, ".tar") == 0) {
return TRUE;
}
return FALSE;
}
int arc_read(struct vfile *f, void *buf, size_t size) {
return archive_read_data(f->arc, buf, size);
}
typedef struct arc_data {
vfile_t *f;
char buf[ARC_BUF_SIZE];
} arc_data_f;
int vfile_open_callback(struct archive *a, void *user_data) {
arc_data_f *data = user_data;
if (data->f->is_fs_file && data->f->fd == -1) {
data->f->fd = open(data->f->filepath, O_RDONLY);
}
return ARCHIVE_OK;
}
long vfile_read_callback(struct archive *a, void *user_data, const void **buf) {
arc_data_f *data = user_data;
*buf = data->buf;
return data->f->read(data->f, data->buf, ARC_BUF_SIZE);
}
int vfile_close_callback(struct archive *a, void *user_data) {
arc_data_f *data = user_data;
if (data->f->close != NULL) {
data->f->close(data->f);
}
return ARCHIVE_OK;
}
void parse_archive(vfile_t *f, document_t *doc) {
struct archive *a;
struct archive_entry *entry;
arc_data_f data;
data.f = f;
int ret = 0;
if (data.f->is_fs_file) {
a = archive_read_new();
archive_read_support_filter_all(a);
archive_read_support_format_all(a);
ret = archive_read_open_filename(a, doc->filepath, ARC_BUF_SIZE);
} else if (ScanCtx.archive_mode == ARC_MODE_RECURSE) {
a = archive_read_new();
archive_read_support_filter_all(a);
archive_read_support_format_all(a);
ret = archive_read_open(
a, &data,
vfile_open_callback,
vfile_read_callback,
vfile_close_callback
);
} else {
return;
}
if (ret != ARCHIVE_OK) {
LOG_ERRORF(doc->filepath, "(arc.c) [%d] %s", ret, archive_error_string(a))
archive_read_free(a);
return;
}
if (ScanCtx.archive_mode == ARC_MODE_LIST) {
dyn_buffer_t buf = dyn_buffer_create();
while (archive_read_next_header(a, &entry) == ARCHIVE_OK) {
if (S_ISREG(archive_entry_stat(entry)->st_mode)) {
char *path = (char *) archive_entry_pathname(entry);
dyn_buffer_append_string(&buf, path);
dyn_buffer_write_char(&buf, '\n');
}
}
dyn_buffer_write_char(&buf, '\0');
meta_line_t *meta_list = malloc(sizeof(meta_line_t) + buf.cur);
meta_list->key = MetaContent;
strcpy(meta_list->strval, buf.buf);
APPEND_META(doc, meta_list);
dyn_buffer_destroy(&buf);
} else {
parse_job_t *sub_job = malloc(sizeof(parse_job_t) + PATH_MAX * 2);
sub_job->vfile.close = NULL;
sub_job->vfile.read = arc_read;
sub_job->vfile.arc = a;
sub_job->vfile.filepath = sub_job->filepath;
sub_job->vfile.is_fs_file = FALSE;
memcpy(sub_job->parent, doc->uuid, sizeof(uuid_t));
while (archive_read_next_header(a, &entry) == ARCHIVE_OK) {
sub_job->info = *archive_entry_stat(entry);
if (S_ISREG(sub_job->info.st_mode)) {
sprintf(sub_job->filepath, "%s#/%s", f->filepath, archive_entry_pathname(entry));
sub_job->base = (int) (strrchr(sub_job->filepath, '/') - sub_job->filepath) + 1;
char *p = strrchr(sub_job->filepath, '.');
if (p != NULL) {
sub_job->ext = (int) (p - sub_job->filepath + 1);
} else {
sub_job->ext = (int) strlen(sub_job->filepath);
}
parse(sub_job);
}
}
free(sub_job);
}
archive_read_free(a);
}

View File

@@ -1,13 +0,0 @@
#ifndef SIST2_ARC_H
#define SIST2_ARC_H
#include "src/sist.h"
#define ARC_BUF_SIZE 8192
int should_parse_filtered_file(const char *filepath, int ext);
void parse_archive(vfile_t *f, document_t *doc);
int arc_read(struct vfile * f, void *buf, size_t size);
#endif

View File

@@ -1,52 +0,0 @@
#include "cbr.h"
#include "src/ctx.h"
unsigned int cbr_mime;
unsigned int cbz_mime;
void cbr_init() {
cbr_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/x-cbr");
cbz_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/x-cbz");
}
int is_cbr(unsigned int mime) {
return mime == cbr_mime;
}
void parse_cbr(void *buf, size_t buf_len, document_t *doc) {
char *out_buf = malloc(buf_len * 2);
size_t out_buf_used = 0;
struct archive *rar_in = archive_read_new();
archive_read_support_filter_none(rar_in);
archive_read_support_format_rar(rar_in);
archive_read_open_memory(rar_in, buf, buf_len);
struct archive *zip_out = archive_write_new();
archive_write_set_format_zip(zip_out);
archive_write_open_memory(zip_out, out_buf, buf_len * 2, &out_buf_used);
struct archive_entry *entry;
while (archive_read_next_header(rar_in, &entry) == ARCHIVE_OK) {
archive_write_header(zip_out, entry);
char arc_buf[ARC_BUF_SIZE];
int len = archive_read_data(rar_in, arc_buf, ARC_BUF_SIZE);
while (len > 0) {
archive_write_data(zip_out, arc_buf, len);
len = archive_read_data(rar_in, arc_buf, ARC_BUF_SIZE);
}
}
archive_write_close(zip_out);
archive_write_free(zip_out);
archive_read_close(rar_in);
archive_read_free(rar_in);
doc->mime = cbz_mime;
parse_pdf(out_buf, out_buf_used, doc);
doc->mime = cbr_mime;
free(out_buf);
}

View File

@@ -1,12 +0,0 @@
#ifndef SIST2_CBR_H
#define SIST2_CBR_H
#include "src/sist.h"
void cbr_init();
int is_cbr(unsigned int mime);
void parse_cbr(void *buf, size_t buf_len, document_t *doc);
#endif

View File

@@ -1,140 +0,0 @@
#include "doc.h"
#include "src/ctx.h"
#define STR_STARTS_WITH(x, y) (strncmp(y, x, sizeof(y) - 1) == 0)
__always_inline
static int should_read_part(const char *part) {
LOG_DEBUGF("doc.c", "Got part : %s", part)
if (part == NULL) {
return FALSE;
}
if ( // Word
STR_STARTS_WITH(part, "word/document.xml")
|| STR_STARTS_WITH(part, "word/footnotes.xml")
|| STR_STARTS_WITH(part, "word/endnotes.xml")
|| STR_STARTS_WITH(part, "word/footer")
|| STR_STARTS_WITH(part, "word/header")
// PowerPoint
|| STR_STARTS_WITH(part, "ppt/slides/slide")
|| STR_STARTS_WITH(part, "ppt/notesSlides/slide")
// Excel
|| STR_STARTS_WITH(part, "xl/worksheets/sheet")
|| STR_STARTS_WITH(part, "xl/sharedStrings.xml")
|| STR_STARTS_WITH(part, "xl/workbook.xml")
) {
return TRUE;
}
return FALSE;
}
int extract_text(xmlDoc *xml, xmlNode *node, text_buffer_t *buf) {
//TODO: Check which nodes are likely to have a 't' child, and ignore nodes that aren't
xmlErrorPtr err = xmlGetLastError();
if (err != NULL) {
if (err->level == XML_ERR_FATAL) {
LOG_ERRORF("doc.c", "Got fatal XML error while parsing document: %s", err->message)
return -1;
} else {
LOG_ERRORF("doc.c", "Got recoverable XML error while parsing document: %s", err->message)
}
}
for (xmlNode *child = node; child; child = child->next) {
if (*child->name == 't' && *(child->name + 1) == '\0') {
xmlChar *text = xmlNodeListGetString(xml, child->xmlChildrenNode, 1);
if (text) {
text_buffer_append_string0(buf, (char *) text);
text_buffer_append_char(buf, ' ');
xmlFree(text);
}
}
extract_text(xml, child->children, buf);
}
}
int xml_io_read(void *context, char *buffer, int len) {
struct archive *a = context;
return archive_read_data(a, buffer, len);
}
int xml_io_close(UNUSED(void *context)) {
//noop
return 0;
}
__always_inline
static int read_part(struct archive *a, text_buffer_t *buf, document_t *doc) {
xmlDoc *xml = xmlReadIO(xml_io_read, xml_io_close, a, "/", NULL, XML_PARSE_RECOVER | XML_PARSE_NOWARNING | XML_PARSE_NOERROR | XML_PARSE_NONET);
if (xml == NULL) {
LOG_ERROR(doc->filepath, "Could not parse XML")
return -1;
}
xmlNode *root = xmlDocGetRootElement(xml);
if (root == NULL) {
LOG_ERROR(doc->filepath, "Empty document")
xmlFreeDoc(xml);
return -1;
}
extract_text(xml, root, buf);
xmlFreeDoc(xml);
return 0;
}
void parse_doc(void *mem, size_t mem_len, document_t *doc) {
if (mem == NULL) {
return;
}
struct archive *a = archive_read_new();
archive_read_support_format_zip(a);
int ret = archive_read_open_memory(a, mem, mem_len);
if (ret != ARCHIVE_OK) {
LOG_ERRORF(doc->filepath, "Could not read archive: %s", archive_error_string(a))
archive_read_free(a);
return;
}
text_buffer_t buf = text_buffer_create(ScanCtx.content_size);
struct archive_entry *entry;
while (archive_read_next_header(a, &entry) == ARCHIVE_OK) {
if (S_ISREG(archive_entry_stat(entry)->st_mode)) {
const char *path = archive_entry_pathname(entry);
if (should_read_part(path)) {
ret = read_part(a, &buf, doc);
if (ret != 0) {
break;
}
}
}
}
if (buf.dyn_buffer.cur > 0) {
text_buffer_terminate_string(&buf);
meta_line_t *meta = malloc(sizeof(meta_line_t) + buf.dyn_buffer.cur);
meta->key = MetaContent;
strcpy(meta->strval, buf.dyn_buffer.buf);
APPEND_META(doc, meta)
}
archive_read_close(a);
archive_read_free(a);
text_buffer_destroy(&buf);
}

View File

@@ -1,8 +0,0 @@
#ifndef SIST2_DOC_H
#define SIST2_DOC_H
#include "src/sist.h"
void parse_doc(void *buf, size_t buf_len, document_t *doc);
#endif

View File

@@ -1,226 +0,0 @@
#include "font.h"
#include "src/ctx.h"
__thread FT_Library ft_lib = NULL;
typedef struct text_dimensions {
unsigned int width;
unsigned int height;
unsigned int baseline;
} text_dimensions_t;
typedef struct glyph {
int top;
int height;
int width;
int descent;
int ascent;
int advance_width;
unsigned char *pixmap;
} glyph_t;
__always_inline
int kerning_offset(char c, char pc, FT_Face face) {
FT_Vector kerning;
FT_Get_Kerning(face, c, pc, FT_KERNING_DEFAULT, &kerning);
return (int) (kerning.x / 64);
}
__always_inline
glyph_t ft_glyph_to_glyph(FT_GlyphSlot slot) {
glyph_t glyph;
glyph.pixmap = slot->bitmap.buffer;
glyph.width = (int) slot->bitmap.width;
glyph.height = (int) slot->bitmap.rows;
glyph.top = slot->bitmap_top;
glyph.advance_width = (int) slot->advance.x / 64;
glyph.descent = MAX(0, glyph.height - glyph.top);
glyph.ascent = MAX(0, MAX(glyph.top, glyph.height) - glyph.descent);
return glyph;
}
text_dimensions_t text_dimension(char *text, FT_Face face) {
text_dimensions_t dimensions;
dimensions.width = 0;
int num_chars = (int) strlen(text);
unsigned int max_ascent = 0;
int max_descent = 0;
char pc = 0;
for (int i = 0; i < num_chars; i++) {
char c = text[i];
FT_Load_Char(face, c, 0);
glyph_t glyph = ft_glyph_to_glyph(face->glyph);
max_descent = MAX(max_descent, glyph.descent);
max_ascent = MAX(max_ascent, MAX(glyph.height, glyph.ascent));
int kerning_x = kerning_offset(c, pc, face);
dimensions.width += MAX(glyph.advance_width, glyph.width) + kerning_x;
pc = c;
}
dimensions.height = max_ascent + max_descent;
dimensions.baseline = max_descent;
return dimensions;
}
void draw_glyph(glyph_t *glyph, int x, int y, struct text_dimensions text_info, unsigned char *bitmap) {
unsigned int src = 0;
unsigned int dst = y * text_info.width + x;
unsigned int row_offset = text_info.width - glyph->width;
unsigned int buf_len = text_info.width * text_info.height;
for (unsigned int sy = 0; sy < glyph->height; sy++) {
for (unsigned int sx = 0; sx < glyph->width; sx++) {
if (dst < buf_len) {
bitmap[dst] |= glyph->pixmap[src];
}
src++;
dst++;
}
dst += row_offset;
}
}
void bmp_format(dyn_buffer_t *buf, text_dimensions_t dimensions, const unsigned char *bitmap) {
dyn_buffer_write_short(buf, 0x4D42); // Magic
dyn_buffer_write_int(buf, 0); // Size placeholder
dyn_buffer_write_int(buf, 0x5157); //Reserved
dyn_buffer_write_int(buf, 14 + 40 + 256 * 4); // pixels offset
dyn_buffer_write_int(buf, 40); // DIB size
dyn_buffer_write_int(buf, (int) dimensions.width);
dyn_buffer_write_int(buf, (int) dimensions.height);
dyn_buffer_write_short(buf, 1); // Color planes
dyn_buffer_write_short(buf, 8); // bits per pixel
dyn_buffer_write_int(buf, 0); // compression
dyn_buffer_write_int(buf, 0); // Ignored
dyn_buffer_write_int(buf, 3800); // hres
dyn_buffer_write_int(buf, 3800); // vres
dyn_buffer_write_int(buf, 256); // Color count
dyn_buffer_write_int(buf, 0); // Ignored
// RGBA32 Color table (Grayscale)
for (int i = 255; i >= 0; i--) {
dyn_buffer_write_int(buf, i + (i << 8) + (i << 16));
}
// Pixel array: write from bottom to top, with rows padded to multiples of 4-bytes
for (int y = (int) dimensions.height - 1; y >= 0; y--) {
for (unsigned int x = 0; x < dimensions.width; x++) {
dyn_buffer_write_char(buf, (char) bitmap[y * dimensions.width + x]);
}
while (buf->cur % 4 != 0) {
dyn_buffer_write_char(buf, 0);
}
}
// Size
*(int *) ((char *) buf->buf + 2) = buf->cur;
}
void parse_font(const char *buf, size_t buf_len, document_t *doc) {
if (ft_lib == NULL) {
FT_Init_FreeType(&ft_lib);
}
if (buf == NULL) {
return;
}
FT_Face face;
FT_Error err = FT_New_Memory_Face(ft_lib, (unsigned char *) buf, buf_len, 0, &face);
if (err != 0) {
LOG_ERRORF(doc->filepath, "(font.c) FT_New_Memory_Face() returned error code [%d] %s", err, ft_error_string(err));
return;
}
char font_name[1024];
if (face->style_name == NULL || *(face->style_name) == '?') {
if (face->family_name == NULL) {
strcpy(font_name, "(null)");
} else {
strcpy(font_name, face->family_name);
}
} else {
snprintf(font_name, sizeof(font_name), "%s %s", face->family_name, face->style_name);
}
meta_line_t *meta_name = malloc(sizeof(meta_line_t) + strlen(font_name));
meta_name->key = MetaFontName;
strcpy(meta_name->strval, font_name);
APPEND_META(doc, meta_name)
if (ScanCtx.tn_size <= 0) {
return;
}
int pixel = 64;
int num_chars = (int) strlen(font_name);
err = FT_Set_Pixel_Sizes(face, 0, pixel);
if (err != 0) {
LOG_WARNINGF(doc->filepath, "(font.c) FT_Set_Pixel_Sizes() returned error code [%d] %s", err, ft_error_string(err))
return;
}
text_dimensions_t dimensions = text_dimension(font_name, face);
unsigned char *bitmap = calloc(dimensions.width * dimensions.height, 1);
FT_Vector pen;
pen.x = 0;
char pc = 0;
for (int i = 0; i < num_chars; i++) {
char c = font_name[i];
err = FT_Load_Char(face, c, FT_LOAD_NO_HINTING | FT_LOAD_RENDER);
if (err != 0) {
c = c >= 'a' && c <= 'z' ? c - 32 : c + 32;
err = FT_Load_Char(face, c, FT_LOAD_NO_HINTING | FT_LOAD_RENDER);
if (err != 0) {
LOG_WARNINGF(doc->filepath, "(font.c) FT_Load_Char() returned error code [%d] %s", err, ft_error_string(err));
continue;
}
}
glyph_t glyph = ft_glyph_to_glyph(face->glyph);
pen.x += kerning_offset(c, pc, face);
if (pen.x <= 0) {
pen.x = ABS(glyph.advance_width - glyph.width);
}
pen.y = dimensions.height - glyph.ascent - dimensions.baseline;
draw_glyph(&glyph, pen.x, pen.y, dimensions, bitmap);
pen.x += glyph.advance_width;
pc = c;
}
dyn_buffer_t bmp_data = dyn_buffer_create();
bmp_format(&bmp_data, dimensions, bitmap);
store_write(ScanCtx.index.store, (char *) doc->uuid, sizeof(doc->uuid), (char *) bmp_data.buf, bmp_data.cur);
dyn_buffer_destroy(&bmp_data);
free(bitmap);
FT_Done_Face(face);
}

View File

@@ -1,9 +0,0 @@
#ifndef SIST2_FONT_H
#define SIST2_FONT_H
#include "src/sist.h"
void parse_font(const char * buf, size_t buf_len, document_t *doc);
#endif

View File

@@ -1,398 +0,0 @@
#include "src/sist.h"
#include "src/ctx.h"
#define MIN_SIZE 32
#define AVIO_BUF_SIZE 8192
__always_inline
static AVCodecContext *alloc_jpeg_encoder(int dstW, int dstH, float qscale) {
AVCodec *jpeg_codec = avcodec_find_encoder(AV_CODEC_ID_MJPEG);
AVCodecContext *jpeg = avcodec_alloc_context3(jpeg_codec);
jpeg->width = dstW;
jpeg->height = dstH;
jpeg->time_base.den = 1000000;
jpeg->time_base.num = 1;
jpeg->i_quant_factor = qscale;
jpeg->pix_fmt = AV_PIX_FMT_YUVJ420P;
int ret = avcodec_open2(jpeg, jpeg_codec, NULL);
if (ret != 0) {
printf("Could not open jpeg encoder: %s!\n", av_err2str(ret));
return NULL;
}
return jpeg;
}
__always_inline
AVFrame *scale_frame(const AVCodecContext *decoder, const AVFrame *frame, int size) {
int dstW;
int dstH;
if (frame->width <= size && frame->height <= size) {
dstW = frame->width;
dstH = frame->height;
} else {
double ratio = (double) frame->width / frame->height;
if (frame->width > frame->height) {
dstW = size;
dstH = (int) (size / ratio);
} else {
dstW = (int) (size * ratio);
dstH = size;
}
}
if (dstW <= MIN_SIZE || dstH <= MIN_SIZE) {
return NULL;
}
AVFrame *scaled_frame = av_frame_alloc();
struct SwsContext *ctx = sws_getContext(
decoder->width, decoder->height, decoder->pix_fmt,
dstW, dstH, AV_PIX_FMT_YUVJ420P,
SWS_FAST_BILINEAR, 0, 0, 0
);
int dst_buf_len = av_image_get_buffer_size(AV_PIX_FMT_YUV420P, dstW, dstH, 1);
uint8_t *dst_buf = (uint8_t *) av_malloc(dst_buf_len);
av_image_fill_arrays(scaled_frame->data, scaled_frame->linesize, dst_buf, AV_PIX_FMT_YUV420P, dstW, dstH, 1);
sws_scale(ctx,
(const uint8_t *const *) frame->data, frame->linesize,
0, decoder->height,
scaled_frame->data, scaled_frame->linesize
);
scaled_frame->width = dstW;
scaled_frame->height = dstH;
scaled_frame->format = AV_PIX_FMT_YUV420P;
sws_freeContext(ctx);
return scaled_frame;
}
__always_inline
static AVFrame *read_frame(AVFormatContext *pFormatCtx, AVCodecContext *decoder, int stream_idx, document_t *doc) {
AVFrame *frame = av_frame_alloc();
AVPacket avPacket;
av_init_packet(&avPacket);
int receive_ret = -EAGAIN;
while (receive_ret == -EAGAIN) {
// Get video frame
while (1) {
int read_frame_ret = av_read_frame(pFormatCtx, &avPacket);
if (read_frame_ret != 0) {
if (read_frame_ret != AVERROR_EOF) {
LOG_WARNINGF(doc->filepath,
"(media.c) avcodec_read_frame() returned error code [%d] %s",
read_frame_ret, av_err2str(read_frame_ret)
)
}
av_frame_free(&frame);
av_packet_unref(&avPacket);
return NULL;
}
//Ignore audio/other frames
if (avPacket.stream_index != stream_idx) {
av_packet_unref(&avPacket);
continue;
}
break;
}
// Feed it to decoder
int decode_ret = avcodec_send_packet(decoder, &avPacket);
if (decode_ret != 0) {
LOG_WARNINGF(doc->filepath,
"(media.c) avcodec_send_packet() returned error code [%d] %s",
decode_ret, av_err2str(decode_ret)
)
}
av_packet_unref(&avPacket);
receive_ret = avcodec_receive_frame(decoder, frame);
}
return frame;
}
#define APPEND_TAG_META(doc, tag_, keyname) \
text_buffer_t tex = text_buffer_create(-1); \
text_buffer_append_string0(&tex, tag_->value); \
text_buffer_terminate_string(&tex); \
meta_line_t *meta_tag = malloc(sizeof(meta_line_t) + tex.dyn_buffer.cur); \
meta_tag->key = keyname; \
strcpy(meta_tag->strval, tex.dyn_buffer.buf); \
APPEND_META(doc, meta_tag) \
text_buffer_destroy(&tex);
__always_inline
static void append_audio_meta(AVFormatContext *pFormatCtx, document_t *doc) {
AVDictionaryEntry *tag = NULL;
while ((tag = av_dict_get(pFormatCtx->metadata, "", tag, AV_DICT_IGNORE_SUFFIX))) {
char key[32];
strncpy(key, tag->key, sizeof(key));
char *ptr = key;
for (; *ptr; ++ptr) *ptr = (char) tolower(*ptr);
if (strcmp(key, "artist") == 0) {
APPEND_TAG_META(doc, tag, MetaArtist)
} else if (strcmp(key, "genre") == 0) {
APPEND_TAG_META(doc, tag, MetaGenre)
} else if (strcmp(key, "title") == 0) {
APPEND_TAG_META(doc, tag, MetaTitle)
} else if (strcmp(key, "album_artist") == 0) {
APPEND_TAG_META(doc, tag, MetaAlbumArtist)
} else if (strcmp(key, "album") == 0) {
APPEND_TAG_META(doc, tag, MetaAlbum)
}
}
}
__always_inline
static void append_video_meta(AVFormatContext *pFormatCtx, AVFrame *frame, document_t *doc, int include_audio_tags, int is_video) {
if (is_video) {
meta_line_t *meta_duration = malloc(sizeof(meta_line_t));
meta_duration->key = MetaMediaDuration;
meta_duration->longval = pFormatCtx->duration / AV_TIME_BASE;
APPEND_META(doc, meta_duration)
meta_line_t *meta_bitrate = malloc(sizeof(meta_line_t));
meta_bitrate->key = MetaMediaBitrate;
meta_bitrate->longval = pFormatCtx->bit_rate;
APPEND_META(doc, meta_bitrate)
}
AVDictionaryEntry *tag = NULL;
if (is_video) {
while ((tag = av_dict_get(pFormatCtx->metadata, "", tag, AV_DICT_IGNORE_SUFFIX))) {
if (include_audio_tags && strcmp(tag->key, "title") == 0) {
APPEND_TAG_META(doc, tag, MetaTitle)
} else if (strcmp(tag->key, "comment") == 0) {
APPEND_TAG_META(doc, tag, MetaContent)
} else if (include_audio_tags && strcmp(tag->key, "artist") == 0) {
APPEND_TAG_META(doc, tag, MetaArtist)
}
}
} else {
// EXIF metadata
while ((tag = av_dict_get(frame->metadata, "", tag, AV_DICT_IGNORE_SUFFIX))) {
if (include_audio_tags && strcmp(tag->key, "Artist") == 0) {
APPEND_TAG_META(doc, tag, MetaArtist)
} else if (strcmp(tag->key, "ImageDescription") == 0) {
APPEND_TAG_META(doc, tag, MetaContent)
} else if (strcmp(tag->key, "Make") == 0) {
APPEND_TAG_META(doc, tag, MetaExifMake)
} else if (strcmp(tag->key, "Model") == 0) {
APPEND_TAG_META(doc, tag, MetaExifModel)
} else if (strcmp(tag->key, "Software") == 0) {
APPEND_TAG_META(doc, tag, MetaExifSoftware)
} else if (strcmp(tag->key, "FNumber") == 0) {
APPEND_TAG_META(doc, tag, MetaExifFNumber)
} else if (strcmp(tag->key, "FocalLength") == 0) {
APPEND_TAG_META(doc, tag, MetaExifFocalLength)
} else if (strcmp(tag->key, "UserComment") == 0) {
APPEND_TAG_META(doc, tag, MetaExifUserComment)
} else if (strcmp(tag->key, "ISOSpeedRatings") == 0) {
APPEND_TAG_META(doc, tag, MetaExifIsoSpeedRatings)
} else if (strcmp(tag->key, "ExposureTime") == 0) {
APPEND_TAG_META(doc, tag, MetaExifExposureTime)
} else if (strcmp(tag->key, "DateTime") == 0) {
APPEND_TAG_META(doc, tag, MetaExifDateTime)
}
}
}
}
void parse_media(AVFormatContext *pFormatCtx, document_t *doc) {
int video_stream = -1;
int audio_stream = -1;
avformat_find_stream_info(pFormatCtx, NULL);
for (int i = (int) pFormatCtx->nb_streams - 1; i >= 0; i--) {
AVStream *stream = pFormatCtx->streams[i];
if (stream->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) {
if (audio_stream == -1) {
meta_line_t *meta_audio = malloc(sizeof(meta_line_t));
meta_audio->key = MetaMediaAudioCodec;
meta_audio->intval = stream->codecpar->codec_id;
APPEND_META(doc, meta_audio)
append_audio_meta(pFormatCtx, doc);
audio_stream = i;
}
} else if (stream->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) {
if (video_stream == -1) {
meta_line_t *meta_vid = malloc(sizeof(meta_line_t));
meta_vid->key = MetaMediaVideoCodec;
meta_vid->intval = stream->codecpar->codec_id;
APPEND_META(doc, meta_vid)
meta_line_t *meta_w = malloc(sizeof(meta_line_t));
meta_w->key = MetaWidth;
meta_w->intval = stream->codecpar->width;
APPEND_META(doc, meta_w)
meta_line_t *meta_h = malloc(sizeof(meta_line_t));
meta_h->key = MetaHeight;
meta_h->intval = stream->codecpar->height;
APPEND_META(doc, meta_h)
video_stream = i;
}
}
}
if (video_stream != -1 && ScanCtx.tn_size > 0) {
AVStream *stream = pFormatCtx->streams[video_stream];
if (stream->codecpar->width <= MIN_SIZE || stream->codecpar->height <= MIN_SIZE) {
avformat_close_input(&pFormatCtx);
avformat_free_context(pFormatCtx);
return;
}
// Decoder
AVCodec *video_codec = avcodec_find_decoder(stream->codecpar->codec_id);
AVCodecContext *decoder = avcodec_alloc_context3(video_codec);
avcodec_parameters_to_context(decoder, stream->codecpar);
avcodec_open2(decoder, video_codec, NULL);
//Seek
if (stream->nb_frames > 1 && stream->codecpar->codec_id != AV_CODEC_ID_GIF) {
int seek_ret = 0;
for (int i = 20; i >= 0; i--) {
seek_ret = av_seek_frame(pFormatCtx, video_stream,
stream->duration * 0.10, 0);
if (seek_ret == 0) {
break;
}
}
}
AVFrame *frame = read_frame(pFormatCtx, decoder, video_stream, doc);
if (frame == NULL) {
avcodec_free_context(&decoder);
avformat_close_input(&pFormatCtx);
avformat_free_context(pFormatCtx);
return;
}
append_video_meta(pFormatCtx, frame, doc, audio_stream == -1, stream->nb_frames > 1);
// Scale frame
AVFrame *scaled_frame = scale_frame(decoder, frame, ScanCtx.tn_size);
if (scaled_frame == NULL) {
av_frame_free(&frame);
avcodec_free_context(&decoder);
avformat_close_input(&pFormatCtx);
avformat_free_context(pFormatCtx);
return;
}
// Encode frame to jpeg
AVCodecContext *jpeg_encoder = alloc_jpeg_encoder(scaled_frame->width, scaled_frame->height, ScanCtx.tn_qscale);
avcodec_send_frame(jpeg_encoder, scaled_frame);
AVPacket jpeg_packet;
av_init_packet(&jpeg_packet);
avcodec_receive_packet(jpeg_encoder, &jpeg_packet);
// Save thumbnail
store_write(ScanCtx.index.store, (char *) doc->uuid, sizeof(doc->uuid), (char *) jpeg_packet.data,
jpeg_packet.size);
av_packet_unref(&jpeg_packet);
av_frame_free(&frame);
av_free(*scaled_frame->data);
av_frame_free(&scaled_frame);
avcodec_free_context(&jpeg_encoder);
avcodec_free_context(&decoder);
}
avformat_close_input(&pFormatCtx);
avformat_free_context(pFormatCtx);
}
void parse_media_filename(const char *filepath, document_t *doc) {
AVFormatContext *pFormatCtx = avformat_alloc_context();
if (pFormatCtx == NULL) {
LOG_ERROR(doc->filepath, "(media.c) Could not allocate context with avformat_alloc_context()")
return;
}
int res = avformat_open_input(&pFormatCtx, filepath, NULL, NULL);
if (res < 0) {
LOG_ERRORF(doc->filepath, "(media.c) avformat_open_input() returned [%d] %s", res, av_err2str(res))
avformat_close_input(&pFormatCtx);
avformat_free_context(pFormatCtx);
return;
}
parse_media(pFormatCtx, doc);
}
int vfile_read(void *ptr, uint8_t *buf, int buf_size) {
struct vfile *f = ptr;
int ret = f->read(f, buf, buf_size);
if (ret == 0) {
return AVERROR_EOF;
}
return ret;
}
void parse_media_vfile(struct vfile *f, document_t *doc) {
AVFormatContext *pFormatCtx = avformat_alloc_context();
if (pFormatCtx == NULL) {
LOG_ERROR(doc->filepath, "(media.c) Could not allocate context with avformat_alloc_context()")
return;
}
unsigned char *buffer = (unsigned char *) av_malloc(AVIO_BUF_SIZE);
AVIOContext *io_ctx = avio_alloc_context(buffer, AVIO_BUF_SIZE, 0, f, vfile_read, NULL, NULL);
pFormatCtx->pb = io_ctx;
pFormatCtx->flags |= AVFMT_FLAG_CUSTOM_IO;
int res = avformat_open_input(&pFormatCtx, "", NULL, NULL);
if (res == -5) {
// Tried to parse media that requires seek
av_free(io_ctx->buffer);
avio_context_free(&io_ctx);
avformat_close_input(&pFormatCtx);
avformat_free_context(pFormatCtx);
return;
} else if (res < 0) {
LOG_ERRORF(doc->filepath, "(media.c) avformat_open_input() returned [%d] %s", res, av_err2str(res))
av_free(io_ctx->buffer);
avio_context_free(&io_ctx);
avformat_close_input(&pFormatCtx);
avformat_free_context(pFormatCtx);
return;
}
parse_media(pFormatCtx, doc);
av_free(io_ctx->buffer);
avio_context_free(&io_ctx);
}

View File

@@ -1,14 +0,0 @@
#ifndef SIST2_MEDIA_H
#define SIST2_MEDIA_H
#include "src/sist.h"
#define MIN_VIDEO_SIZE 1024 * 64
#define MIN_IMAGE_SIZE 1024 * 2
void parse_media_filename(const char * filepath, document_t *doc);
void parse_media_vfile(struct vfile *f, document_t *doc);
#endif

View File

@@ -1,9 +1,9 @@
#ifndef SIST2_MIME_H
#define SIST2_MIME_H
#include "src/sist.h"
#include "../sist.h"
#define MAJOR_MIME(mime_id) (mime_id & 0x0FFF0000) >> 16
#define MAJOR_MIME(mime_id) (mime_id & 0x00FF0000) >> 16
#define MIME_EMPTY 1
@@ -25,6 +25,12 @@
#define DOC_MASK 0x04000000
#define IS_DOC(mime_id) (mime_id & DOC_MASK) == DOC_MASK
#define MOBI_MASK 0x02000000
#define IS_MOBI(mime_id) (mime_id & MOBI_MASK) == MOBI_MASK
#define MARKUP_MASK 0x01000000
#define IS_MARKUP(mime_id) (mime_id & MARKUP_MASK) == MARKUP_MASK
enum major_mime {
MimeInvalid = 0,
MimeModel = 1,

File diff suppressed because it is too large Load Diff

View File

@@ -1,7 +1,15 @@
#include "parse.h"
#include "src/sist.h"
#include "src/ctx.h"
#include "mime.h"
#include "src/io/serialize.h"
__thread magic_t Magic = NULL;
#include <magic.h>
#define MIN_VIDEO_SIZE 1024 * 64
#define MIN_IMAGE_SIZE 1024 * 2
int fs_read(struct vfile *f, void *buf, size_t size) {
@@ -24,25 +32,10 @@ void fs_close(struct vfile *f) {
}
}
void *read_all(parse_job_t *job, const char *buf, int bytes_read) {
void *full_buf;
if (job->info.st_size <= bytes_read) {
full_buf = malloc(job->info.st_size);
memcpy(full_buf, buf, job->info.st_size);
} else {
full_buf = malloc(job->info.st_size);
memcpy(full_buf, buf, bytes_read);
int ret = job->vfile.read(&job->vfile, full_buf + bytes_read, job->info.st_size - bytes_read);
if (ret < 0) {
LOG_ERRORF(job->filepath, "read(): [%d] %s", errno, strerror(errno))
return NULL;
void fs_reset(struct vfile *f) {
if (f->fd != -1) {
lseek(f->fd, 0, SEEK_SET);
}
}
return full_buf;
}
void parse(void *arg) {
@@ -50,29 +43,24 @@ void parse(void *arg) {
parse_job_t *job = arg;
document_t doc;
int inc_ts = incremental_get(ScanCtx.original_table, job->info.st_ino);
if (inc_ts != 0 && inc_ts == job->info.st_mtim.tv_sec) {
incremental_mark_file_for_copy(ScanCtx.copy_table, job->info.st_ino);
int inc_ts = incremental_get(ScanCtx.original_table, job->vfile.info.st_ino);
if (inc_ts != 0 && inc_ts == job->vfile.info.st_mtim.tv_sec) {
incremental_mark_file_for_copy(ScanCtx.copy_table, job->vfile.info.st_ino);
return;
}
if (Magic == NULL) {
Magic = magic_open(MAGIC_MIME_TYPE);
magic_load(Magic, NULL);
}
doc.filepath = job->filepath;
doc.ext = (short) job->ext;
doc.base = (short) job->base;
doc.meta_head = NULL;
doc.meta_tail = NULL;
doc.mime = 0;
doc.size = job->info.st_size;
doc.ino = job->info.st_ino;
doc.mtime = job->info.st_mtim.tv_sec;
doc.size = job->vfile.info.st_size;
doc.ino = job->vfile.info.st_ino;
doc.mtime = job->vfile.info.st_mtim.tv_sec;
uuid_generate(doc.uuid);
char *buf[PARSE_BUF_SIZE];
char *buf[MAGIC_BUF_SIZE];
if (LogCtx.very_verbose) {
char uuid_str[UUID_STR_LEN];
@@ -80,7 +68,7 @@ void parse(void *arg) {
LOG_DEBUGF(job->filepath, "Starting parse job {%s}", uuid_str)
}
if (job->info.st_size == 0) {
if (job->vfile.info.st_size == 0) {
doc.mime = MIME_EMPTY;
} else if (*(job->filepath + job->ext) != '\0' && (job->ext - job->base != 1)) {
doc.mime = mime_get_mime_by_ext(ScanCtx.ext_table, job->filepath + job->ext);
@@ -90,14 +78,23 @@ void parse(void *arg) {
if (doc.mime == 0 && !ScanCtx.fast) {
// Get mime type with libmagic
bytes_read = job->vfile.read(&job->vfile, buf, PARSE_BUF_SIZE);
bytes_read = job->vfile.read(&job->vfile, buf, MAGIC_BUF_SIZE);
if (bytes_read < 0) {
LOG_WARNINGF(job->filepath, "read() Error: %s", strerror(errno))
if (job->vfile.is_fs_file) {
LOG_ERRORF(job->filepath, "read(): [%d] %s", errno, strerror(errno))
} else {
LOG_ERRORF(job->filepath, "(virtual) read(): [%d] %s", bytes_read, archive_error_string(job->vfile.arc))
}
CLOSE_FILE(job->vfile)
return;
}
const char *magic_mime_str = magic_buffer(Magic, buf, bytes_read);
magic_t magic = magic_open(MAGIC_MIME_TYPE);
magic_load(magic, NULL);
const char *magic_mime_str = magic_buffer(magic, buf, bytes_read);
if (magic_mime_str != NULL) {
doc.mime = mime_get_mime_by_string(ScanCtx.mime_table, magic_mime_str);
@@ -108,8 +105,9 @@ void parse(void *arg) {
}
}
magic_close(Magic);
Magic = NULL;
job->vfile.reset(&job->vfile);
magic_close(magic);
}
int mmime = MAJOR_MIME(doc.mime);
@@ -119,50 +117,34 @@ void parse(void *arg) {
} else if ((mmime == MimeVideo && doc.size >= MIN_VIDEO_SIZE) ||
(mmime == MimeImage && doc.size >= MIN_IMAGE_SIZE) || mmime == MimeAudio) {
// if (job->vfile.is_fs_file) {
// parse_media_filename(job->filepath, &doc);
// } else {
// parse_media_vfile(&job->vfile, &doc);
// }
parse_media(&ScanCtx.media_ctx, &job->vfile, &doc);
} else if (IS_PDF(doc.mime)) {
void *pdf_buf = read_all(job, (char *) buf, bytes_read);
parse_pdf(pdf_buf, doc.size, &doc);
parse_ebook(&ScanCtx.ebook_ctx, &job->vfile, mime_get_mime_text(doc.mime), &doc);
if (pdf_buf != buf && pdf_buf != NULL) {
free(pdf_buf);
} else if (mmime == MimeText && ScanCtx.text_ctx.content_size > 0) {
if (IS_MARKUP(doc.mime)) {
parse_markup(&ScanCtx.text_ctx, &job->vfile, &doc);
} else {
parse_text(&ScanCtx.text_ctx, &job->vfile, &doc);
}
} else if (mmime == MimeText && ScanCtx.content_size > 0) {
parse_text(bytes_read, &job->vfile, (char *) buf, &doc);
} else if (IS_FONT(doc.mime)) {
void *font_buf = read_all(job, (char *) buf, bytes_read);
parse_font(font_buf, doc.size, &doc);
parse_font(&ScanCtx.font_ctx, &job->vfile, &doc);
if (font_buf != buf && font_buf != NULL) {
free(font_buf);
}
} else if (
ScanCtx.archive_mode != ARC_MODE_SKIP && (
ScanCtx.arc_ctx.mode != ARC_MODE_SKIP && (
IS_ARC(doc.mime) ||
(IS_ARC_FILTER(doc.mime) && should_parse_filtered_file(doc.filepath, doc.ext))
)) {
parse_archive(&job->vfile, &doc);
} else if (ScanCtx.content_size > 0 && IS_DOC(doc.mime)) {
void *doc_buf = read_all(job, (char *) buf, bytes_read);
parse_doc(doc_buf, doc.size, &doc);
parse_archive(&ScanCtx.arc_ctx, &job->vfile, &doc);
} else if (ScanCtx.ooxml_ctx.content_size > 0 && IS_DOC(doc.mime)) {
parse_ooxml(&ScanCtx.ooxml_ctx, &job->vfile, &doc);
if (doc_buf != buf && doc_buf != NULL) {
free(doc_buf);
}
} else if (is_cbr(doc.mime)) {
void *cbr_buf = read_all(job, (char *) buf, bytes_read);
parse_cbr(cbr_buf, doc.size, &doc);
if (cbr_buf != buf && cbr_buf != NULL) {
free(cbr_buf);
}
} else if (is_cbr(&ScanCtx.cbr_ctx, doc.mime)) {
parse_cbr(&ScanCtx.cbr_ctx, &job->vfile, &doc);
} else if (IS_MOBI(doc.mime)) {
parse_mobi(&ScanCtx.mobi_ctx, &job->vfile, &doc);
}
//Parent meta
@@ -172,7 +154,7 @@ void parse(void *arg) {
meta_line_t *meta_parent = malloc(sizeof(meta_line_t) + UUID_STR_LEN + 1);
meta_parent->key = MetaParent;
strcpy(meta_parent->strval, tmp);
strcpy(meta_parent->str_val, tmp);
APPEND_META((&doc), meta_parent)
}
@@ -180,3 +162,7 @@ void parse(void *arg) {
CLOSE_FILE(job->vfile)
}
void cleanup_parse() {
// noop
}

View File

@@ -1,13 +1,16 @@
#ifndef SIST2_PARSE_H
#define SIST2_PARSE_H
#include "src/sist.h"
#include "../sist.h"
#define PARSE_BUF_SIZE 4096
#define MAGIC_BUF_SIZE 4096 * 6
int fs_read(struct vfile *f, void *buf, size_t size);
void fs_close(struct vfile *f);
void fs_reset(struct vfile *f);
void parse(void *arg);
void cleanup_parse();
#endif

View File

@@ -1,331 +0,0 @@
#include "pdf.h"
#include "src/ctx.h"
#define MIN_OCR_SIZE 350
#define MIN_OCR_LEN 10
__thread text_buffer_t thread_buffer;
int render_cover(fz_context *ctx, document_t *doc, fz_document *fzdoc) {
int err = 0;
fz_page *cover = NULL;
fz_var(cover);
fz_var(err);
fz_try(ctx)
cover = fz_load_page(ctx, fzdoc, 0);
fz_catch(ctx)
err = 1;
if (err != 0) {
fz_drop_page(ctx, cover);
LOG_WARNINGF(doc->filepath, "fz_load_page() returned error code [%d] %s", err, ctx->error.message)
return FALSE;
}
fz_rect bounds = fz_bound_page(ctx, cover);
float scale;
float w = (float) bounds.x1 - bounds.x0;
float h = (float) bounds.y1 - bounds.y0;
if (w > h) {
scale = (float) ScanCtx.tn_size / w;
} else {
scale = (float) ScanCtx.tn_size / h;
}
fz_matrix m = fz_scale(scale, scale);
bounds = fz_transform_rect(bounds, m);
fz_irect bbox = fz_round_rect(bounds);
fz_pixmap *pixmap = fz_new_pixmap_with_bbox(ctx, ctx->colorspace->rgb, bbox, NULL, 0);
fz_clear_pixmap_with_value(ctx, pixmap, 0xFF);
fz_device *dev = fz_new_draw_device(ctx, m, pixmap);
fz_var(err);
fz_try(ctx)
{
pthread_mutex_lock(&ScanCtx.mupdf_mu);
fz_run_page(ctx, cover, dev, fz_identity, NULL);
}
fz_always(ctx)
{
fz_close_device(ctx, dev);
fz_drop_device(ctx, dev);
pthread_mutex_unlock(&ScanCtx.mupdf_mu);
}
fz_catch(ctx)
err = ctx->error.errcode;
if (err != 0) {
LOG_WARNINGF(doc->filepath, "fz_run_page() returned error code [%d] %s", err, ctx->error.message)
fz_drop_page(ctx, cover);
fz_drop_pixmap(ctx, pixmap);
return FALSE;
}
fz_buffer *fzbuf = NULL;
fz_var(fzbuf);
fz_var(err);
fz_try(ctx)
fzbuf = fz_new_buffer_from_pixmap_as_png(ctx, pixmap, fz_default_color_params);
fz_catch(ctx)
err = ctx->error.errcode;
if (err == 0) {
unsigned char *tn_buf;
size_t tn_len = fz_buffer_storage(ctx, fzbuf, &tn_buf);
store_write(ScanCtx.index.store, (char *) doc->uuid, sizeof(doc->uuid), (char *) tn_buf, tn_len);
}
fz_drop_buffer(ctx, fzbuf);
fz_drop_pixmap(ctx, pixmap);
fz_drop_page(ctx, cover);
if (err != 0) {
LOG_WARNINGF(doc->filepath, "fz_new_buffer_from_pixmap_as_png() returned error code [%d] %s", err,
ctx->error.message)
return FALSE;
}
return TRUE;
}
void fz_err_callback(void *user, UNUSED(const char *message)) {
if (LogCtx.verbose) {
document_t *doc = (document_t *) user;
LOG_WARNINGF(doc->filepath, "FZ: %s", message)
}
}
__always_inline
static void init_ctx(fz_context *ctx, document_t *doc) {
fz_disable_icc(ctx);
fz_register_document_handlers(ctx);
ctx->warn.print_user = doc;
ctx->warn.print = fz_err_callback;
ctx->error.print_user = doc;
ctx->error.print = fz_err_callback;
}
__always_inline
static int read_stext_block(fz_stext_block *block, text_buffer_t *tex) {
if (block->type != FZ_STEXT_BLOCK_TEXT) {
return 0;
}
fz_stext_line *line = block->u.t.first_line;
while (line != NULL) {
fz_stext_char *c = line->first_char;
while (c != NULL) {
if (text_buffer_append_char(tex, c->c) == TEXT_BUF_FULL) {
return TEXT_BUF_FULL;
}
c = c->next;
}
line = line->next;
}
return 0;
}
#define IS_VALID_BPP(d) (d==1 || d==2 || d==4 || d==8 || d==16 || d==24 || d==32)
void fill_image(fz_context *ctx, UNUSED(fz_device *dev),
fz_image *img, UNUSED(fz_matrix ctm), UNUSED(float alpha),
UNUSED(fz_color_params color_params)) {
int l2factor = 0;
if (img->w > MIN_OCR_SIZE && img->h > MIN_OCR_SIZE && IS_VALID_BPP(img->n)) {
fz_pixmap *pix = img->get_pixmap(ctx, img, NULL, img->w, img->h, &l2factor);
if (pix->h > MIN_OCR_SIZE && img->h > MIN_OCR_SIZE && img->xres != 0) {
TessBaseAPI *api = TessBaseAPICreate();
TessBaseAPIInit3(api, ScanCtx.tesseract_path, ScanCtx.tesseract_lang);
TessBaseAPISetImage(api, pix->samples, pix->w, pix->h, pix->n, pix->stride);
TessBaseAPISetSourceResolution(api, pix->xres);
char *text = TessBaseAPIGetUTF8Text(api);
size_t len = strlen(text);
if (len >= MIN_OCR_LEN) {
text_buffer_append_string(&thread_buffer, text, len - 1);
LOG_DEBUGF(
"pdf.c",
"(OCR) %dx%d got %dB from tesseract (%s), buffer:%dB",
pix->w, pix->h, len, ScanCtx.tesseract_lang, thread_buffer.dyn_buffer.cur
)
}
TessBaseAPIEnd(api);
TessBaseAPIDelete(api);
}
fz_drop_pixmap(ctx, pix);
}
}
void parse_pdf(void *buf, size_t buf_len, document_t *doc) {
if (buf == NULL) {
return;
}
static int mu_is_initialized = 0;
if (!mu_is_initialized) {
pthread_mutex_init(&ScanCtx.mupdf_mu, NULL);
mu_is_initialized = 1;
}
fz_context *ctx = fz_new_context(NULL, NULL, FZ_STORE_UNLIMITED);
init_ctx(ctx, doc);
int err = 0;
fz_document *fzdoc = NULL;
fz_stream *stream = NULL;
fz_var(fzdoc);
fz_var(stream);
fz_var(err);
fz_try(ctx)
{
stream = fz_open_memory(ctx, buf, buf_len);
fzdoc = fz_open_document_with_stream(ctx, mime_get_mime_text(doc->mime), stream);
}
fz_catch(ctx)
err = ctx->error.errcode;
if (err != 0) {
fz_drop_stream(ctx, stream);
fz_drop_document(ctx, fzdoc);
fz_drop_context(ctx);
return;
}
char title[4096] = {'\0',};
fz_try(ctx)
fz_lookup_metadata(ctx, fzdoc, FZ_META_INFO_TITLE, title, sizeof(title));
fz_catch(ctx)
;
if (strlen(title) > 0) {
meta_line_t *meta_content = malloc(sizeof(meta_line_t) + strlen(title));
meta_content->key = MetaTitle;
strcpy(meta_content->strval, title);
APPEND_META(doc, meta_content)
}
int page_count = -1;
fz_var(err);
fz_try(ctx)
page_count = fz_count_pages(ctx, fzdoc);
fz_catch(ctx)
err = ctx->error.errcode;
if (err) {
LOG_WARNINGF(doc->filepath, "fz_count_pages() returned error code [%d] %s", err, ctx->error.message)
fz_drop_stream(ctx, stream);
fz_drop_document(ctx, fzdoc);
fz_drop_context(ctx);
return;
}
if (ScanCtx.tn_size > 0) {
err = render_cover(ctx, doc, fzdoc);
}
if (err == TRUE) {
fz_drop_stream(ctx, stream);
fz_drop_document(ctx, fzdoc);
fz_drop_context(ctx);
return;
}
if (ScanCtx.content_size > 0) {
fz_stext_options opts = {0};
thread_buffer = text_buffer_create(ScanCtx.content_size);
for (int current_page = 0; current_page < page_count; current_page++) {
fz_page *page = NULL;
fz_var(err);
fz_try(ctx)
page = fz_load_page(ctx, fzdoc, current_page);
fz_catch(ctx)
err = ctx->error.errcode;
if (err != 0) {
LOG_WARNINGF(doc->filepath, "fz_load_page() returned error code [%d] %s", err, ctx->error.message)
text_buffer_destroy(&thread_buffer);
fz_drop_page(ctx, page);
fz_drop_stream(ctx, stream);
fz_drop_document(ctx, fzdoc);
fz_drop_context(ctx);
return;
}
fz_stext_page *stext = fz_new_stext_page(ctx, fz_bound_page(ctx, page));
fz_device *dev = fz_new_stext_device(ctx, stext, &opts);
dev->stroke_path = NULL;
dev->stroke_text = NULL;
dev->clip_text = NULL;
dev->clip_stroke_path = NULL;
dev->clip_stroke_text = NULL;
if (ScanCtx.tesseract_lang != NULL) {
dev->fill_image = fill_image;
}
fz_var(err);
fz_try(ctx)
fz_run_page(ctx, page, dev, fz_identity, NULL);
fz_always(ctx)
{
fz_close_device(ctx, dev);
fz_drop_device(ctx, dev);
}
fz_catch(ctx)
err = ctx->error.errcode;
if (err != 0) {
LOG_WARNINGF(doc->filepath, "fz_run_page() returned error code [%d] %s", err, ctx->error.message)
text_buffer_destroy(&thread_buffer);
fz_drop_page(ctx, page);
fz_drop_stext_page(ctx, stext);
fz_drop_stream(ctx, stream);
fz_drop_document(ctx, fzdoc);
fz_drop_context(ctx);
return;
}
fz_stext_block *block = stext->first_block;
while (block != NULL) {
int ret = read_stext_block(block, &thread_buffer);
if (ret == TEXT_BUF_FULL) {
break;
}
block = block->next;
}
fz_drop_stext_page(ctx, stext);
fz_drop_page(ctx, page);
if (thread_buffer.dyn_buffer.cur >= thread_buffer.dyn_buffer.size) {
break;
}
}
text_buffer_terminate_string(&thread_buffer);
meta_line_t *meta_content = malloc(sizeof(meta_line_t) + thread_buffer.dyn_buffer.cur);
meta_content->key = MetaContent;
memcpy(meta_content->strval, thread_buffer.dyn_buffer.buf, thread_buffer.dyn_buffer.cur);
APPEND_META(doc, meta_content)
text_buffer_destroy(&thread_buffer);
}
fz_drop_stream(ctx, stream);
fz_drop_document(ctx, fzdoc);
fz_drop_context(ctx);
}

View File

@@ -1,9 +0,0 @@
#ifndef SIST2_PDF_H
#define SIST2_PDF_H
#include "src/sist.h"
void parse_pdf(void *buf, size_t buf_len, document_t *doc);
#endif

View File

@@ -1,37 +0,0 @@
#include "text.h"
#include "src/ctx.h"
void parse_text(int bytes_read, struct vfile *f, char *buf, document_t *doc) {
char *intermediate_buf;
int intermediate_buf_len;
if (bytes_read == doc->size || bytes_read >= ScanCtx.content_size) {
int to_copy = MIN(bytes_read, ScanCtx.content_size);
intermediate_buf = malloc(to_copy);
intermediate_buf_len = to_copy;
memcpy(intermediate_buf, buf, to_copy);
} else {
int to_read = MIN(ScanCtx.content_size, doc->size) - bytes_read;
intermediate_buf = malloc(to_read + bytes_read);
intermediate_buf_len = to_read + bytes_read;
if (bytes_read != 0) {
memcpy(intermediate_buf, buf, bytes_read);
}
f->read(f, intermediate_buf + bytes_read, to_read);
}
text_buffer_t tex = text_buffer_create(ScanCtx.content_size);
text_buffer_append_string(&tex, intermediate_buf, intermediate_buf_len);
text_buffer_terminate_string(&tex);
meta_line_t *meta = malloc(sizeof(meta_line_t) + tex.dyn_buffer.cur);
meta->key = MetaContent;
strcpy(meta->strval, tex.dyn_buffer.buf);
APPEND_META(doc, meta)
free(intermediate_buf);
text_buffer_destroy(&tex);
}

View File

@@ -1,8 +0,0 @@
#ifndef SIST2_TEXT_H
#define SIST2_TEXT_H
#include "src/sist.h"
void parse_text(int bytes_read, struct vfile *f, char *buf, document_t *doc);
#endif

View File

@@ -1,75 +1,51 @@
#ifndef SIST_H
#define SIST_H
#ifndef FALSE
#define FALSE (0)
#define BOOL int
#endif
#ifndef TRUE
#define TRUE (!FALSE)
#endif
#undef MAX
#define MAX(a, b) (((a) > (b)) ? (a) : (b))
#undef MIN
#define MIN(a, b) (((a) < (b)) ? (a) : (b))
#ifndef PATH_MAX
#define PATH_MAX 4096
#endif
#undef ABS
#define ABS(a) (((a) < 0) ? -(a) : (a))
#define UUID_STR_LEN 37
#define UNUSED(x) __attribute__((__unused__)) x
#include <glib-2.0/glib.h>
#include <unistd.h>
#include "util.h"
#include "log.h"
#include "types.h"
#include "libscan/scan.h"
#include <cjson/cJSON.h>
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdarg.h>
#include <unistd.h>
#include <fcntl.h>
#include <ftw.h>
#include <uuid.h>
#include <magic.h>
#include "libavformat/avformat.h"
#include "libswscale/swscale.h"
#include "libswresample/swresample.h"
#include "libavcodec/avcodec.h"
#include "libavutil/imgutils.h"
#include <ctype.h>
#include <mupdf/fitz.h>
#include <mupdf/pdf.h>
#include "argparse/argparse.h"
#include <time.h>
#include <limits.h>
#include <pthread.h>
#include <dirent.h>
#include <sys/stat.h>
#include <wordexp.h>
#include "ft2build.h"
#include "freetype/freetype.h"
#include <archive.h>
#include <archive_entry.h>
#include <libxml/xmlstring.h>
#include <libxml/parser.h>
#define BOOL int
#include <tesseract/capi.h>
#include <pcre.h>
#include <sys/types.h>
#include <errno.h>
#include <ctype.h>
#include <onion/onion.h>
#include <onion/handler.h>
#include <onion/block.h>
#include <onion/shortcuts.h>
#include <onion/codecs.h>
#include <curl/curl.h>
#include "cJSON/cJSON.h"
#include "types.h"
#include "tpool.h"
#include "utf8.h/utf8.h"
#include "util.h"
#include "io/store.h"
#include "io/serialize.h"
#include "io/walk.h"
#include "parsing/parse.h"
#include "parsing/mime.h"
#include "parsing/text.h"
#include "parsing/pdf.h"
#include "parsing/media.h"
#include "parsing/font.h"
#include "parsing/arc.h"
#include "parsing/doc.h"
#include "parsing/cbr.h"
#include "cli.h"
#include "log.h"
#include "src/index/elastic.h"
#include "index/web.h"
#include "web/serve.h"
#include "web/auth_basic.h"
;
#endif

1
src/static/css/bricklayer.min.css vendored Normal file
View File

@@ -0,0 +1 @@
.bricklayer{display:-webkit-box;display:-webkit-flex;display:-ms-flexbox;display:flex;-webkit-box-align:start;-webkit-align-items:flex-start;-ms-flex-align:start;align-items:flex-start;-webkit-box-pack:center;-webkit-justify-content:center;-ms-flex-pack:center;justify-content:center;-webkit-flex-wrap:wrap;-ms-flex-wrap:wrap;flex-wrap:wrap}.bricklayer-column-sizer{width:100%;display:none}@media screen and (min-width:640px){.bricklayer-column-sizer{width:50%}}@media screen and (min-width:980px){.bricklayer-column-sizer{width:33.333%}}@media screen and (min-width:1200px){.bricklayer-column-sizer{width:25%}}.bricklayer-column{-webkit-box-flex:1;-webkit-flex:1;-ms-flex:1;flex:1;padding-left:5px;padding-right:5px}

View File

@@ -6,9 +6,10 @@
width: 1rem;
margin-right: 0.2rem;
cursor: pointer;
color: #757575;
line-height: 1rem;
height: 1.1rem;
height: 1rem;
background-image: url();
filter: brightness(65%);
}
.info-icon:hover {
@@ -37,6 +38,7 @@
color: #BBB;
text-overflow: ellipsis;
overflow: hidden;
white-space: nowrap;
}
a {
@@ -119,7 +121,7 @@ body {
background: #546b7a;
}
.navbar a:hover {
a:hover,.btn:hover {
color: #fff;
}
@@ -221,23 +223,21 @@ body {
width: 100%;
}
@media (min-width: 1200px) {
.card-columns {
column-count: 4;
}
}
@media (min-width: 1500px) {
@media screen and (min-width: 1500px) {
.container {
max-width: 1440px;
}
.card-columns {
column-count: 5;
.bricklayer-column-sizer {
width: 20% !important;
}
.bricklayer-column {
max-width: 20%;
}
}
@media (min-width: 1800px) {
@media screen and (min-width: 1800px) {
.container {
max-width: 1550px;
}
@@ -411,7 +411,7 @@ option {
max-height: 250px;
}
@media (max-width: 600px) {
@media (max-width: 650px) {
.media-expanded .fit {
max-height: none;
}
@@ -457,3 +457,57 @@ option {
svg {
fill: white;
}
.play {
position: absolute;
width: 50px;
height: 50px;
left: 50%;
top: 50%;
transform: translate(-50%, -50%);
pointer-events: none;
}
.play svg {
fill: rgba(255, 255, 255, 0.7);
}
.img-wrapper:hover svg {
fill: rgba(255, 255, 255, 1);
}
.pointer {
cursor: pointer;
}
.stats-card {
text-align: center;
margin-top: 1em;
padding: 1em;
box-shadow: 0 .125rem .25rem rgba(0, 0, 0, .08) !important;
border-radius: 0;
border: none;
background: #212121;
}
.graph {
display: inline-block;
width: 40%;
}
.full-screen {
position: absolute;
left: 0;
width: 100%;
}
.stats-btn {
float: right;
margin-bottom: 10px;
}
#graphs-card svg text {
fill: #eee;
}

View File

@@ -6,9 +6,10 @@
width: 1rem;
margin-right: 0.2rem;
cursor: pointer;
color: #757575;
line-height: 1rem;
height: 1rem;
background-image: url();
filter: brightness(45%);
}
.info-icon:hover {
@@ -36,6 +37,7 @@
color: #444;
text-overflow: ellipsis;
overflow: hidden;
white-space: nowrap;
}
body {
@@ -48,7 +50,9 @@ body {
.card {
margin-top: 1em;
box-shadow: 0 .125rem .25rem rgba(0, 0, 0, .075) !important;
box-shadow: 0 .125rem .25rem rgba(0, 0, 0, .08) !important;
border-radius: 0;
border: none;
}
.sub-document {
@@ -158,23 +162,27 @@ body {
width: 100%;
}
@media (min-width: 1200px) {
.card-columns {
column-count: 4;
@media screen and (max-width: 1200px) {
.bricklayer-column {
max-width: 100%;
}
}
@media (min-width: 1500px) {
@media screen and (min-width: 1500px) {
.container {
max-width: 1440px;
}
.card-columns {
column-count: 5;
.bricklayer-column-sizer {
width: 20% !important;
}
.bricklayer-column {
max-width: 20%;
}
}
@media (min-width: 1800px) {
@media screen and (min-width: 1800px) {
.container {
max-width: 1550px;
}
@@ -272,7 +280,7 @@ mark {
max-height: 250px;
}
@media (max-width: 600px) {
@media (max-width: 650px) {
.media-expanded .fit {
max-height: none;
}
@@ -293,6 +301,7 @@ mark {
.small-btn {
display: none;
}
.large-btn {
display: inherit;
}
@@ -302,6 +311,7 @@ mark {
.small-btn {
display: inherit;
}
.large-btn {
display: none;
}
@@ -314,3 +324,53 @@ mark {
#pathTree .title {
cursor: pointer;
}
.play {
position: absolute;
width: 50px;
height: 50px;
left: 50%;
top: 50%;
transform: translate(-50%, -50%);
pointer-events: none;
}
.play svg {
fill: rgba(0, 0, 0, 0.7);
}
.img-wrapper:hover svg {
fill: rgba(0, 0, 0, 1);
}
.pointer {
cursor: pointer;
}
.stats-card {
text-align: center;
margin-top: 1em;
padding: 1em;
box-shadow: 0 .125rem .25rem rgba(0, 0, 0, .08) !important;
border-radius: 0;
border: none;
background: #fff;
}
.graph {
display: inline-block;
width: 40%;
}
.full-screen {
position: absolute;
left: 0;
width: 100%;
}
.stats-btn {
float: right;
margin-bottom: 10px;
}

3
src/static/css/lity.min.css vendored Normal file
View File

@@ -0,0 +1,3 @@
/*! Lity - v2.4.0 - 2019-08-10
* http://sorgalla.com/lity/
* Copyright (c) 2015-2019 Jan Sorgalla; Licensed MIT */.lity{z-index:9990;position:fixed;top:0;right:0;bottom:0;left:0;white-space:nowrap;background:#0b0b0b;background:rgba(0,0,0,0.9);outline:none !important;opacity:0;-webkit-transition:opacity .3s ease;-o-transition:opacity .3s ease;transition:opacity .3s ease}.lity.lity-opened{opacity:1}.lity.lity-closed{opacity:0}.lity *{-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box}.lity-wrap{z-index:9990;position:fixed;top:0;right:0;bottom:0;left:0;text-align:center;outline:none !important}.lity-wrap:before{content:'';display:inline-block;height:100%;vertical-align:middle;margin-right:-0.25em}.lity-loader{z-index:9991;color:#fff;position:absolute;top:50%;margin-top:-0.8em;width:100%;text-align:center;font-size:14px;font-family:Arial,Helvetica,sans-serif;opacity:0;-webkit-transition:opacity .3s ease;-o-transition:opacity .3s ease;transition:opacity .3s ease}.lity-loading .lity-loader{opacity:1}.lity-container{z-index:9992;position:relative;text-align:left;vertical-align:middle;display:inline-block;white-space:normal;max-width:100%;max-height:100%;outline:none !important}.lity-content{z-index:9993;width:100%;-webkit-transform:scale(1);-ms-transform:scale(1);-o-transform:scale(1);transform:scale(1);-webkit-transition:-webkit-transform .3s ease;transition:-webkit-transform .3s ease;-o-transition:-o-transform .3s ease;transition:transform .3s ease;transition:transform .3s ease, -webkit-transform .3s ease, -o-transform .3s ease}.lity-loading .lity-content,.lity-closed .lity-content{-webkit-transform:scale(.8);-ms-transform:scale(.8);-o-transform:scale(.8);transform:scale(.8)}.lity-content:after{content:'';position:absolute;left:0;top:0;bottom:0;display:block;right:0;width:auto;height:auto;z-index:-1;-webkit-box-shadow:0 0 8px rgba(0,0,0,0.6);box-shadow:0 0 8px rgba(0,0,0,0.6)}.lity-close{z-index:9994;width:35px;height:35px;position:fixed;right:0;top:0;-webkit-appearance:none;cursor:pointer;text-decoration:none;text-align:center;padding:0;color:#fff;font-style:normal;font-size:35px;font-family:Arial,Baskerville,monospace;line-height:35px;text-shadow:0 1px 2px rgba(0,0,0,0.6);border:0;background:none;outline:none;-webkit-box-shadow:none;box-shadow:none}.lity-close::-moz-focus-inner{border:0;padding:0}.lity-close:hover,.lity-close:focus,.lity-close:active,.lity-close:visited{text-decoration:none;text-align:center;padding:0;color:#fff;font-style:normal;font-size:35px;font-family:Arial,Baskerville,monospace;line-height:35px;text-shadow:0 1px 2px rgba(0,0,0,0.6);border:0;background:none;outline:none;-webkit-box-shadow:none;box-shadow:none}.lity-close:active{top:1px}.lity-image img{max-width:100%;display:block;line-height:0;border:0}.lity-iframe .lity-container,.lity-youtube .lity-container,.lity-vimeo .lity-container,.lity-facebookvideo .lity-container,.lity-googlemaps .lity-container{width:100%;max-width:964px}.lity-iframe-container{width:100%;height:0;padding-top:56.25%;overflow:hidden;pointer-events:all;-webkit-transform:translateZ(0);transform:translateZ(0);-webkit-overflow-scrolling:touch}.lity-iframe-container iframe{position:absolute;display:block;top:0;left:0;width:100%;height:100%;-webkit-box-shadow:0 0 8px rgba(0,0,0,0.6);box-shadow:0 0 8px rgba(0,0,0,0.6);background:#000}.lity-hide{display:none}

View File

Before

Width:  |  Height:  |  Size: 595 B

After

Width:  |  Height:  |  Size: 595 B

View File

Before

Width:  |  Height:  |  Size: 669 B

After

Width:  |  Height:  |  Size: 669 B

1
src/static/js/bricklayer.min.js vendored Normal file
View File

@@ -0,0 +1 @@
!function t(e,n,r){function o(s,u){if(!n[s]){if(!e[s]){var l="function"==typeof require&&require;if(!u&&l)return l(s,!0);if(i)return i(s,!0);var a=new Error("Cannot find module '"+s+"'");throw a.code="MODULE_NOT_FOUND",a}var p=n[s]={exports:{}};e[s][0].call(p.exports,function(t){var n=e[s][1][t];return o(n?n:t)},p,p.exports,t,e,n,r)}return n[s].exports}for(var i="function"==typeof require&&require,s=0;s<r.length;s++)o(r[s]);return o}({1:[function(t,e,n){var r,o=this&&this.__extends||function(t,e){function n(){this.constructor=t}for(var r in e)e.hasOwnProperty(r)&&(t[r]=e[r]);t.prototype=null===e?Object.create(e):(n.prototype=e.prototype,new n)};!function(t){function e(t){return[].slice.call(t)}function n(t,e,n){if(window.CustomEvent)var r=new CustomEvent(e,{detail:n});else{var r=document.createEvent("CustomEvent");r.initCustomEvent(e,!0,!0,n)}return t.dispatchEvent(r)}var r={rulerClassName:"bricklayer-column-sizer",columnClassName:"bricklayer-column"},i=function(){function t(t){this.element=document.createElement("div"),this.element.className=t}return t.prototype.destroy=function(){this.element.parentNode.removeChild(this.element)},t}(),s=function(t){function e(){t.apply(this,arguments)}return o(e,t),e.prototype.getWidth=function(){this.element.setAttribute("style","\n display: block;\n visibility: hidden !important;\n top: -1000px !important;\n ");var t=this.element.offsetWidth;return this.element.removeAttribute("style"),t},e}(i),u=function(t){function e(){t.apply(this,arguments)}return o(e,t),e}(i),l=function(){function t(t,e){void 0===e&&(e=r),this.element=t,this.options=e,this.build(),this.buildResponsive()}return t.prototype.append=function(t){var n=this;if(Array.isArray(t))return void t.forEach(function(t){return n.append(t)});var r=this.findMinHeightColumn();this.elements=e(this.elements).concat([t]),this.applyPosition("append",r,t)},t.prototype.prepend=function(t){var n=this;if(Array.isArray(t))return void t.forEach(function(t){return n.prepend(t)});var r=this.findMinHeightColumn();this.elements=[t].concat(e(this.elements)),this.applyPosition("prepend",r,t)},t.prototype.on=function(t,e){return this.element.addEventListener("bricklayer."+t,e),this},t.prototype.redraw=function(){var t=this.columnCount;this.checkColumnCount(!1),this.reorderElements(t),n(this.element,"bricklayer.redraw",{columnCount:t})},t.prototype.destroy=function(){var t=this;this.ruler.destroy(),e(this.elements).forEach(function(e){return t.element.appendChild(e)}),e(this.getColumns()).forEach(function(t){return t.parentNode.removeChild(t)}),n(this.element,"bricklayer.destroy",{})},t.prototype.build=function(){this.ruler=new s(this.options.rulerClassName),this.elements=this.getElementsInOrder(),this.element.insertBefore(this.ruler.element,this.element.firstChild)},t.prototype.buildResponsive=function(){var t=this;window.addEventListener("resize",function(e){return t.checkColumnCount()}),this.checkColumnCount(),this.on("breakpoint",function(e){return t.reorderElements(e.detail.columnCount)}),this.columnCount>=1&&this.reorderElements(this.columnCount)},t.prototype.getColumns=function(){return this.element.querySelectorAll(":scope > ."+this.options.columnClassName)},t.prototype.findMinHeightColumn=function(){var t=e(this.getColumns()),n=t.map(function(t){return t.offsetHeight}),r=Math.min.apply(null,n);return t[n.indexOf(r)]},t.prototype.getElementsInOrder=function(){return this.element.querySelectorAll(":scope > *:not(."+this.options.columnClassName+"):not(."+this.options.rulerClassName+")")},t.prototype.checkColumnCount=function(t){void 0===t&&(t=!0);var e=this.getColumnCount();this.columnCount!==e&&(t&&n(this.element,"bricklayer.breakpoint",{columnCount:e}),this.columnCount=e)},t.prototype.reorderElements=function(t){var n=this;void 0===t&&(t=1),(t==1/0||1>t)&&(t=1);for(var r=e(this.elements).map(function(t){var e=t.parentNode?t.parentNode.removeChild(t):t;return e}),o=this.getColumns(),i=0;i<o.length;i++)o[i].parentNode.removeChild(o[i]);for(var i=0;t>i;i++){var s=new u(this.options.columnClassName).element;this.element.appendChild(s)}r.forEach(function(t){var e=n.findMinHeightColumn();e.appendChild(t)})},t.prototype.getColumnCount=function(){var t=this.element.offsetWidth,e=this.ruler.getWidth();return Math.round(t/e)},t.prototype.applyPosition=function(t,e,r){var o=this,i=function(i){var s=i+t.charAt(0).toUpperCase()+t.substr(1);n(o.element,"bricklayer."+s,{item:r,column:e})};switch(i("before"),t){case"append":e.appendChild(r);break;case"prepend":e.insertBefore(r,e.firstChild)}i("after")},t}();t.Container=l}(r||(r={})),function(t,n){"function"==typeof define&&define.amd?define(function(){return n()}):"undefined"!=typeof window&&t===window?t.Bricklayer=n():"object"==typeof e&&e.exports&&(e.exports=n())}("undefined"!=typeof window?window:this,function(){return r.Container})},{}]},{},[1]);

2
src/static/js/d3.v5.min.js vendored Normal file

File diff suppressed because one or more lines are too long

2
src/static/js/dom-to-image.min.js vendored Normal file

File diff suppressed because one or more lines are too long

View File

@@ -45,6 +45,18 @@ function getContentHighlight(hit) {
return undefined;
}
function getPathHighlight(hit) {
if (hit.hasOwnProperty("highlight")) {
if (hit["highlight"].hasOwnProperty("path.text")) {
return hit["highlight"]["path.text"][0];
} else if (hit["highlight"].hasOwnProperty("path.nGram")) {
return hit["highlight"]["path.nGram"][0];
}
}
return undefined;
}
function applyNameToTitle(hit, title, extension) {
if (hit.hasOwnProperty("highlight")) {
if (hit["highlight"].hasOwnProperty("name")) {
@@ -59,24 +71,28 @@ function applyNameToTitle(hit, title, extension) {
title.appendChild(document.createTextNode(hit["_source"]["name"] + extension));
}
function addVidSrc(url, mime, video) {
let vidSource = document.createElement("source");
vidSource.setAttribute("src", url);
if (video.canPlayType(mime)) {
vidSource.setAttribute("type", mime);
} else {
vidSource.setAttribute("type", "video/webm");
}
video.appendChild(vidSource);
}
function shouldPlayVideo(hit) {
const videoc = hit["_source"]["videoc"];
return videoc !== "hevc" && videoc !== "mpeg2video" && videoc !== "wmv3";
const mime = hit["_source"]["mime"];
return mime &&
mime.startsWith("video/") &&
hit["_source"]["extension"] !== "mkv" &&
hit["_source"]["extension"] !== "avi" &&
videoc !== "hevc" &&
videoc !== "mpeg2video" &&
videoc !== "wmv3";
}
function shouldDisplayRawImage(hit) {
return hit["_source"]["mime"] && hit["_source"]["mime"].startsWith("image/") && hit["_source"]["videoc"] !== "tiff";
const mime = hit["_source"]["mime"];
return mime &&
mime.startsWith("image/") &&
hit["_source"]["mime"] &&
!hit["_source"]["parent"] &&
hit["_source"]["videoc"] !== "tiff" &&
hit["_source"]["videoc"] !== "ppm";
}
function makePlaceholder(w, h, small) {
@@ -229,6 +245,7 @@ function createDocCard(hit) {
let thumbnailOverlay = null;
let imgWrapper = document.createElement("div");
imgWrapper.setAttribute("style", "position: relative");
imgWrapper.setAttribute("class", "img-wrapper");
let mimeCategory = hit["_source"]["mime"].split("/")[0];
@@ -237,7 +254,6 @@ function createDocCard(hit) {
//Thumbnail overlay
switch (mimeCategory) {
case "image":
thumbnailOverlay = document.createElement("div");
thumbnailOverlay.setAttribute("class", "card-img-overlay");
@@ -330,57 +346,12 @@ function createDocCard(hit) {
}
function makeThumbnail(mimeCategory, hit, imgWrapper, small) {
let thumbnail;
let isSubDocument = hit["_source"].hasOwnProperty("parent");
if (mimeCategory === "video" && shouldPlayVideo(hit) && !isSubDocument) {
thumbnail = document.createElement("video");
addVidSrc("f/" + hit["_id"], hit["_source"]["mime"], thumbnail);
if (!hit["_source"].hasOwnProperty("thumbnail")) {
return null;
}
const placeholder = makePlaceholder(hit["_source"]["width"], hit["_source"]["height"], small);
imgWrapper.appendChild(placeholder);
if (small) {
thumbnail.setAttribute("class", "fit-sm");
} else {
thumbnail.setAttribute("class", "fit");
}
if (small) {
thumbnail.style.cursor = "pointer";
thumbnail.title = "Enlarge";
thumbnail.addEventListener("click", function () {
imgWrapper.classList.remove("wrapper-sm", "mr-1");
imgWrapper.parentElement.classList.add("media-expanded");
thumbnail.setAttribute("class", "fit");
thumbnail.setAttribute("controls", "");
});
} else {
thumbnail.setAttribute("controls", "");
}
thumbnail.setAttribute("preload", "none");
thumbnail.setAttribute("poster", `t/${hit["_source"]["index"]}/${hit["_id"]}`);
thumbnail.addEventListener("dblclick", function () {
thumbnail.setAttribute("controls", "");
if (thumbnail.webkitRequestFullScreen) {
thumbnail.webkitRequestFullScreen();
} else {
thumbnail.requestFullscreen();
}
});
const poster = new Image();
poster.src = thumbnail.getAttribute('poster');
poster.addEventListener("load", function () {
placeholder.remove();
imgWrapper.appendChild(thumbnail);
});
} else if ((hit["_source"].hasOwnProperty("width") && hit["_source"]["width"] > 32 && hit["_source"]["height"] > 32)
|| hit["_source"]["mime"] === "application/pdf"
|| hit["_source"]["mime"] === "application/epub+zip"
|| hit["_source"]["mime"] === "application/x-cbz"
|| hit["_source"]["mime"] === "application/x-cbr"
|| hit["_source"].hasOwnProperty("font_name")
) {
thumbnail = document.createElement("img");
let thumbnail = document.createElement("img");
if (small) {
thumbnail.setAttribute("class", "fit-sm");
} else {
@@ -388,13 +359,24 @@ function makeThumbnail(mimeCategory, hit, imgWrapper, small) {
}
thumbnail.setAttribute("src", `t/${hit["_source"]["index"]}/${hit["_id"]}`);
if (!hit["_source"]["parent"] && shouldDisplayRawImage(hit)) {
imgWrapper.setAttribute("id", "sp" + hit["_id"]);
imgWrapper.setAttribute("data-src", `t/${hit["_source"]["index"]}/${hit["_id"]}`);
imgWrapper.setAttribute("href", `f/${hit["_id"]}`);
imgWrapper.setAttribute("data-caption", hit["_source"]["path"] + "/" + hit["_source"]["name"] + ext(hit));
imgWrapper.setAttribute("data-group", "p" + Math.floor(docCount / SIZE));
imgWrapper.classList.add("sp");
if (shouldDisplayRawImage(hit)) {
thumbnail.addEventListener("click", () => {
const l = lity(`f/${hit["_id"]}#.jpg`);
window.addEventListener("scroll", () => l.close());
});
thumbnail.classList.add("pointer");
} else if (shouldPlayVideo(hit)) {
thumbnail.addEventListener("click", () => lity(`f/${hit["_id"]}#.mp4`));
thumbnail.classList.add("pointer");
if (!small) {
const playOverlay = document.createElement("div");
playOverlay.innerHTML = '<svg viewBox="0 0 494.942 494.942" xmlns="http://www.w3.org/2000/svg"><path d="m35.353 0 424.236 247.471-424.236 247.471z"/></svg>';
playOverlay.classList.add("play");
imgWrapper.prepend(playOverlay);
}
}
const placeholder = makePlaceholder(hit["_source"]["width"], hit["_source"]["height"], small);
@@ -407,14 +389,12 @@ function makeThumbnail(mimeCategory, hit, imgWrapper, small) {
placeholder.remove();
imgWrapper.appendChild(thumbnail);
});
}
return thumbnail;
}
function makeInfoButton(hit) {
const infoButton = document.createElement("span");
infoButton.appendChild(document.createTextNode("🛈"));
infoButton.setAttribute("class", "info-icon");
infoButton.addEventListener("click", infoButtonCb(hit));
return infoButton;
@@ -447,6 +427,7 @@ function createDocLine(hit) {
let link = document.createElement("a");
link.setAttribute("href", "f/" + hit["_id"]);
link.setAttribute("target", "_blank");
link.style.maxWidth = "calc(100% - 1.2rem)";
link.appendChild(title);
const titleDiv = document.createElement("div");
@@ -463,6 +444,9 @@ function createDocLine(hit) {
let thumbnail = makeThumbnail(mimeCategory, hit, imgWrapper, true);
if (thumbnail) {
media.appendChild(imgWrapper);
titleDiv.style.maxWidth = "calc(100% - 64px)";
} else {
titleDiv.style.maxWidth = "100%";
}
media.appendChild(titleDiv);
@@ -471,7 +455,7 @@ function createDocLine(hit) {
if (contentHl !== undefined) {
const contentDiv = document.createElement("div");
contentDiv.setAttribute("class", "content-div");
contentDiv.insertAdjacentHTML('afterbegin', contentHl);
contentDiv.insertAdjacentHTML("afterbegin", contentHl);
titleDiv.appendChild(contentDiv);
}
@@ -481,7 +465,13 @@ function createDocLine(hit) {
let path = document.createElement("div");
path.setAttribute("class", "path-line");
path.setAttribute("title", hit["_source"]["path"] + "/");
const pathHighlight = getPathHighlight(hit);
if (pathHighlight) {
path.insertAdjacentHTML("afterbegin", pathHighlight + "/");
} else {
path.appendChild(document.createTextNode(hit["_source"]["path"] + "/"));
}
let tagContainer = document.createElement("div");
tagContainer.setAttribute("class", "tag-container");
@@ -530,6 +520,14 @@ function makeStatsCard(searchResult) {
let statsCardBody = document.createElement("div");
statsCardBody.setAttribute("class", "card-body");
// Stats
let stat = document.createElement("span");
const totalHits = searchResult["aggregations"]["total_count"]["value"];
stat.appendChild(document.createTextNode(totalHits + " results in " + searchResult["took"] + "ms"));
statsCardBody.appendChild(stat);
// Display mode
const resultMode = document.createElement("div");
resultMode.setAttribute("class", "btn-group btn-group-toggle");
resultMode.setAttribute("data-toggle", "buttons");
@@ -537,11 +535,13 @@ function makeStatsCard(searchResult) {
const listMode = document.createElement("label");
listMode.setAttribute("class", "btn btn-primary");
listMode.appendChild(document.createTextNode("List"));
listMode.setAttribute("title", "List mode");
listMode.innerHTML = '<svg width="20px" height="20px" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M80 368H16a16 16 0 0 0-16 16v64a16 16 0 0 0 16 16h64a16 16 0 0 0 16-16v-64a16 16 0 0 0-16-16zm0-320H16A16 16 0 0 0 0 64v64a16 16 0 0 0 16 16h64a16 16 0 0 0 16-16V64a16 16 0 0 0-16-16zm0 160H16a16 16 0 0 0-16 16v64a16 16 0 0 0 16 16h64a16 16 0 0 0 16-16v-64a16 16 0 0 0-16-16zm416 176H176a16 16 0 0 0-16 16v32a16 16 0 0 0 16 16h320a16 16 0 0 0 16-16v-32a16 16 0 0 0-16-16zm0-320H176a16 16 0 0 0-16 16v32a16 16 0 0 0 16 16h320a16 16 0 0 0 16-16V80a16 16 0 0 0-16-16zm0 160H176a16 16 0 0 0-16 16v32a16 16 0 0 0 16 16h320a16 16 0 0 0 16-16v-32a16 16 0 0 0-16-16z"></path></svg>';
const gridMode = document.createElement("label");
gridMode.setAttribute("class", "btn btn-primary");
gridMode.appendChild(document.createTextNode("Grid"));
gridMode.setAttribute("title", "Grid mode");
gridMode.innerHTML = '<svg width="20px" height="20px" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M149.333 56v80c0 13.255-10.745 24-24 24H24c-13.255 0-24-10.745-24-24V56c0-13.255 10.745-24 24-24h101.333c13.255 0 24 10.745 24 24zm181.334 240v-80c0-13.255-10.745-24-24-24H205.333c-13.255 0-24 10.745-24 24v80c0 13.255 10.745 24 24 24h101.333c13.256 0 24.001-10.745 24.001-24zm32-240v80c0 13.255 10.745 24 24 24H488c13.255 0 24-10.745 24-24V56c0-13.255-10.745-24-24-24H386.667c-13.255 0-24 10.745-24 24zm-32 80V56c0-13.255-10.745-24-24-24H205.333c-13.255 0-24 10.745-24 24v80c0 13.255 10.745 24 24 24h101.333c13.256 0 24.001-10.745 24.001-24zm-205.334 56H24c-13.255 0-24 10.745-24 24v80c0 13.255 10.745 24 24 24h101.333c13.255 0 24-10.745 24-24v-80c0-13.255-10.745-24-24-24zM0 376v80c0 13.255 10.745 24 24 24h101.333c13.255 0 24-10.745 24-24v-80c0-13.255-10.745-24-24-24H24c-13.255 0-24 10.745-24 24zm386.667-56H488c13.255 0 24-10.745 24-24v-80c0-13.255-10.745-24-24-24H386.667c-13.255 0-24 10.745-24 24v80c0 13.255 10.745 24 24 24zm0 160H488c13.255 0 24-10.745 24-24v-80c0-13.255-10.745-24-24-24H386.667c-13.255 0-24 10.745-24 24v80c0 13.255 10.745 24 24 24zM181.333 376v80c0 13.255 10.745 24 24 24h101.333c13.255 0 24-10.745 24-24v-80c0-13.255-10.745-24-24-24H205.333c-13.255 0-24 10.745-24 24z"></path></svg>';
resultMode.appendChild(gridMode);
resultMode.appendChild(listMode);
@@ -553,12 +553,8 @@ function makeStatsCard(searchResult) {
}
gridMode.addEventListener("click", () => {
console.log("what");
console.log(CONF.options);
CONF.options.display = "grid";
console.log(CONF.options);
CONF.save();
console.log(CONF.options);
searchDebounced();
});
listMode.addEventListener("click", () => {
@@ -567,13 +563,46 @@ function makeStatsCard(searchResult) {
searchDebounced();
});
let stat = document.createElement("span");
const totalHits = searchResult["aggregations"]["total_count"]["value"];
stat.appendChild(document.createTextNode(totalHits + " results in " + searchResult["took"] + "ms"));
statsCardBody.appendChild(stat);
statsCardBody.appendChild(resultMode);
// Sort mode
const sortMode = document.createElement("div");
sortMode.setAttribute("class", "dropdown");
sortMode.style.cssFloat = "right";
sortMode.style.marginRight = "10px";
const sortModeBtn = document.createElement("button");
sortModeBtn.setAttribute("class", "btn btn-md btn-primary dropdown-toggle");
sortModeBtn.setAttribute("id", "sortModeBtn");
sortModeBtn.setAttribute("type", "button");
sortModeBtn.setAttribute("data-toggle", "dropdown");
sortModeBtn.setAttribute("aria-haspopup", "true");
sortModeBtn.setAttribute("aria-expanded", "false");
sortModeBtn.setAttribute("title", "Sort options");
sortModeBtn.innerHTML = '<svg aria-hidden="true" width="20px" height="20px" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 320 512"><path fill="currentColor" d="M41 288h238c21.4 0 32.1 25.9 17 41L177 448c-9.4 9.4-24.6 9.4-33.9 0L24 329c-15.1-15.1-4.4-41 17-41zm255-105L177 64c-9.4-9.4-24.6-9.4-33.9 0L24 183c-15.1 15.1-4.4 41 17 41h238c21.4 0 32.1-25.9 17-41z"></path></svg>';
const sortModeMenu = document.createElement("div");
sortModeMenu.setAttribute("class", "dropdown-menu");
sortModeMenu.setAttribute("aria-labelledby", "sortModeBtn");
Object.keys(SORT_MODES).forEach(mode => {
const item = document.createElement("div");
item.setAttribute("class", "dropdown-item");
item.appendChild(document.createTextNode(SORT_MODES[mode].text));
sortModeMenu.appendChild(item);
item.onclick = function () {
CONF.options.sort = mode;
CONF.save();
searchDebounced();
};
});
sortMode.appendChild(sortModeBtn);
sortMode.appendChild(sortModeMenu);
statsCardBody.appendChild(sortMode);
if (totalHits !== 0) {
let sizeStat = document.createElement("div");
sizeStat.appendChild(document.createTextNode(humanFileSize(searchResult["aggregations"]["total_size"]["value"])));
@@ -589,7 +618,7 @@ function makeResultContainer() {
let resultContainer = document.createElement("div");
if (CONF.options.display === "grid") {
resultContainer.setAttribute("class", "card-columns");
resultContainer.setAttribute("class", "bricklayer");
} else {
resultContainer.setAttribute("class", "list-group");
}

5
src/static/js/lity.min.js vendored Normal file

File diff suppressed because one or more lines are too long

View File

@@ -14,39 +14,51 @@ let searchBusy = true;
let selectedIndices = [];
let indexMap = {};
const CONF = new Settings();
let size_min = 0;
let size_max = 10000000000000;
const _defaults = {
display: "grid",
fuzzy: true,
highlight: true
let date_min = null;
let date_max = null;
SORT_MODES = {
score: {
text: "Relevance",
mode: [
{_score: {order: "desc"}},
{_tie: {order: "asc"}}
],
key: hit => hit["_score"]
},
date_asc: {
text: "Date (Ascending)", mode: [
{mtime: {order: "asc"}},
{_tie: {order: "asc"}}
],
key: hit => hit["_source"]["mtime"]
},
date_desc: {
text: "Date (Descending)", mode: [
{mtime: {order: "desc"}},
{_tie: {order: "asc"}}
],
key: hit => hit["_source"]["mtime"]
},
size_asc: {
text: "Size (Ascending)", mode: [
{size: {order: "asc"}},
{_tie: {order: "asc"}}
],
key: hit => hit["_source"]["size"]
},
size_desc: {
text: "Size (Descending)", mode: [
{size: {order: "desc"}},
{_tie: {order: "asc"}}
],
key: hit => hit["_source"]["size"]
},
};
function Settings() {
this.options = {};
this._onUpdate = function () {
$("#fuzzyToggle").prop("checked", this.options.fuzzy);
}
this.load = function () {
const raw = window.localStorage.getItem("options");
if (raw === null) {
this.options = _defaults;
} else {
this.options = JSON.parse(raw);
}
this._onUpdate();
}
this.save = function () {
window.localStorage.setItem("options", JSON.stringify(this.options));
this._onUpdate();
}
}
function showEsError() {
$.toast({
heading: "Elasticsearch connection error",
@@ -60,27 +72,7 @@ function showEsError() {
});
}
jQuery["jsonPost"] = function (url, data) {
return jQuery.ajax({
url: url,
type: "post",
data: JSON.stringify(data),
contentType: "application/json"
}).fail(err => {
showEsError();
console.log(err);
});
};
window.onload = () => {
$("#theme").on("click", () => {
if (!document.cookie.includes("sist")) {
document.cookie = "sist=dark";
} else {
document.cookie = "sist=; Max-Age=-99999999;";
}
window.location.reload();
})
CONF.load();
};
@@ -120,8 +112,10 @@ function getDocumentInfo(id) {
}
function handleTreeClick(tree) {
return (event, node, handler) => {
event.preventTreeDefault();
return (node, e) => {
if (e !== "checked") {
return
}
if (node.id === "any") {
if (!node.itree.state.checked) {
@@ -131,12 +125,10 @@ function handleTreeClick(tree) {
tree.node("any").deselect();
}
handler();
searchDebounced();
}
}
//TODO: filter based on selected indexes, sort mime types
$.jsonPost("es", {
aggs: {
mimeTypes: {
@@ -148,7 +140,7 @@ $.jsonPost("es", {
},
size: 0,
}).then(resp => {
resp["aggregations"]["mimeTypes"]["buckets"].forEach(bucket => {
resp["aggregations"]["mimeTypes"]["buckets"].sort((a, b) => a.key > b.key).forEach(bucket => {
let tmp = bucket["key"].split("/");
let category = tmp[0];
let mime = tmp[1];
@@ -182,7 +174,7 @@ $.jsonPost("es", {
new InspireTreeDOM(mimeTree, {
target: '#mimeTree'
});
mimeTree.on("node.click", handleTreeClick(mimeTree));
mimeTree.on("node.state.changed", handleTreeClick(mimeTree));
mimeTree.deselect();
mimeTree.node("any").select();
});
@@ -215,7 +207,7 @@ $.jsonPost("es", {
new InspireTreeDOM(tagTree, {
target: '#tagTree'
});
tagTree.on("node.click", handleTreeClick(tagTree));
tagTree.on("node.state.changed", handleTreeClick(tagTree));
tagTree.node("any").select();
searchBusy = false;
});
@@ -252,7 +244,7 @@ function insertHits(resultContainer, hits) {
for (let i = 0; i < hits.length; i++) {
if (CONF.options.display === "grid") {
resultContainer.appendChild(createDocCard(hits[i]));
resultContainer._brick.append(createDocCard(hits[i]));
} else {
resultContainer.appendChild(createDocLine(hits[i]));
}
@@ -326,8 +318,15 @@ function search(after = null) {
"font_name^6"
];
if (CONF.options.searchInPath) {
fields.push("path.text^5");
}
if ($("#fuzzyToggle").prop("checked")) {
fields.push("content.nGram");
if (CONF.options.searchInPath) {
fields.push("path.nGram");
}
fields.push("name.nGram^3");
}
@@ -345,6 +344,14 @@ function search(after = null) {
filters.push([{terms: {"tag": tags}}]);
}
if (date_min && date_max) {
filters.push([{range: {mtime: {gte: date_min, lte: date_max}}}])
} else if (date_min) {
filters.push([{range: {mtime: {gte: date_min}}}])
} else if (date_max) {
filters.push([{range: {mtime: {lte: date_max}}}])
}
let q = {
"_source": {
excludes: ["content", "_tie"]
@@ -361,10 +368,7 @@ function search(after = null) {
filter: filters
}
},
"sort": [
{"_score": {"order": "desc"}},
{"_tie": {"order": "asc"}}
],
"sort": SORT_MODES[CONF.options.sort].mode,
aggs:
{
total_size: {"sum": {"field": "size"}},
@@ -374,7 +378,7 @@ function search(after = null) {
};
if (after) {
q.search_after = [after["_score"], after["_id"]];
q.search_after = [SORT_MODES[CONF.options.sort].key(after), after["_id"]];
}
if (CONF.options.highlight) {
@@ -389,6 +393,10 @@ function search(after = null) {
font_name: {},
}
};
if (CONF.options.searchInPath) {
q.highlight.fields["path.text"] = {};
q.highlight.fields["path.nGram"] = {};
}
}
$.jsonPost("es", q).then(searchResult => {
@@ -397,6 +405,11 @@ function search(after = null) {
lastDoc = hits[hits.length - 1];
}
hits.forEach(hit => {
hit["_source"]["name"] = strUnescape(hit["_source"]["name"]);
hit["_source"]["path"] = strUnescape(hit["_source"]["path"]);
});
if (!after) {
preload.remove();
searchResults.appendChild(makeStatsCard(searchResult));
@@ -409,9 +422,9 @@ function search(after = null) {
let resultContainer = makeResultContainer();
searchResults.appendChild(resultContainer);
window.setTimeout(() => {
$(".sp").SmartPhoto({animationSpeed: 0, swipeTopToClose: true, showAnimation: false, forceInterval: 50});
}, 100);
if (CONF.options.display === "grid") {
resultContainer._brick = new Bricklayer(resultContainer);
}
if (!after) {
docCount = 0;
@@ -422,8 +435,6 @@ function search(after = null) {
});
}
let size_min = 0;
let size_max = 10000000000000;
let searchDebounced = _.debounce(function () {
coolingDown = false;
@@ -431,6 +442,7 @@ let searchDebounced = _.debounce(function () {
}, 500);
searchBar.addEventListener("keyup", searchDebounced);
pathBar.addEventListener("keyup", searchDebounced);
//Size slider
$("#sizeSlider").ionRangeSlider({
@@ -465,6 +477,37 @@ $("#sizeSlider").ionRangeSlider({
}
});
//Date slider
$.jsonPost("es", {
aggs: {
date_min: {min: {field: "mtime"}},
date_max: {max: {field: "mtime"}},
},
size: 0
}).then(resp => {
$("#dateSlider").ionRangeSlider({
type: "double",
grid: false,
force_edges: true,
min: resp["aggregations"]["date_min"]["value"],
max: resp["aggregations"]["date_max"]["value"],
from: resp["aggregations"]["date_min"]["value"],
to: (Date.now() / 1000),
min_interval: 3600 * 24 * 7,
step: 3600 * 24,
drag_interval: true,
prettify: function (num) {
let date = (new Date(num * 1000));
return date.getUTCFullYear() + "-" + ("0" + (date.getUTCMonth() + 1)).slice(-2) + "-" + ("0" + date.getUTCDate()).slice(-2)
},
onFinish: function (e) {
date_min = e.from === e.min ? null : e.from;
date_max = e.to === e.max ? null : e.to;
searchDebounced();
}
});
})
function updateIndices() {
let selected = $('#indices').find('option:selected');
selectedIndices = [];
@@ -492,7 +535,7 @@ function getNextDepth(node) {
bool: {
filter: [
{term: {index: node.index}},
{term: {_depth: node.depth + 1}}
{range: {_depth: {gte: node.depth + 1, lte: node.depth + 3}}},
]
}
},
@@ -505,7 +548,7 @@ function getNextDepth(node) {
}
},
size: 0
}
};
if (node.depth > 0) {
q.query.bool.must = {
@@ -520,12 +563,21 @@ function getNextDepth(node) {
if (!buckets) {
return false;
}
const paths = [];
return buckets
.filter(bucket => bucket.key.length > node.id.length || node.id.startsWith("/"))
.sort((a, b) => a.key > b.key)
.map(bucket => {
const i = bucket.key.lastIndexOf("/");
const name = (i === -1 || i === 1) ? bucket.key : bucket.key.slice(i + 1);
if (paths.some(n => bucket.key.startsWith(n))) {
return null;
}
const name = node.id.startsWith("/") ? bucket.key : bucket.key.slice(node.id.length + 1);
paths.push(bucket.key);
return {
id: bucket.key,
@@ -534,7 +586,7 @@ function getNextDepth(node) {
index: node.index,
children: true,
}
})
}).filter(x => x !== null)
})
}
@@ -542,8 +594,8 @@ function handlePathTreeClick(tree) {
return (event, node, handler) => {
if (node.depth !== 0) {
$("#pathBar").val(node.id)
$("#pathTreeModal").modal("hide")
$("#pathBar").val(node.id);
$("#pathTreeModal").modal("hide");
searchDebounced();
}
@@ -566,48 +618,12 @@ function createPathTree(target) {
depth: 0,
children: true
})
})
});
new InspireTreeDOM(pathTree, {
target: target
});
pathTree.on("node.click", handlePathTreeClick(pathTree));
const button = document.querySelector("#pathBarHelper")
const tooltip = document.querySelector("#pathTreeTooltip")
console.log(button)
console.log(tooltip)
Popper.createPopper(button, tooltip ,{
trigger: "click",
placement: "right",
});
}
function updateSettings() {
CONF.options.display = $("#settingDisplay").val();
CONF.options.fuzzy = $("#settingFuzzy").prop("checked");
CONF.options.highlight = $("#settingHighlight").prop("checked");
CONF.save();
searchDebounced();
$.toast({
heading: "Settings updated",
text: "Settings saved to browser storage",
stack: 3,
bgColor: "#00a4bc",
textColor: "#fff",
position: 'bottom-right',
hideAfter: 3000,
loaderBg: "#08c7e8",
});
}
function loadSettings() {
CONF.load();
$("#settingDisplay").val(CONF.options.display);
$("#settingFuzzy").prop("checked", CONF.options.fuzzy);
$("#settingHighlight").prop("checked", CONF.options.highlight);
}

199
src/static/js/util.js Normal file
View File

@@ -0,0 +1,199 @@
/**
* https://stackoverflow.com/questions/10420352
*/
function humanFileSize(bytes) {
if (bytes === 0) {
return "0 B"
}
let thresh = 1000;
if (Math.abs(bytes) < thresh) {
return bytes + ' B';
}
let units = ['k', 'M', 'G', 'T', 'P', 'E', 'Z', 'Y'];
let u = -1;
do {
bytes /= thresh;
++u;
} while (Math.abs(bytes) >= thresh && u < units.length - 1);
return bytes.toFixed(1) + units[u];
}
/**
* https://stackoverflow.com/questions/6312993
*/
function humanTime(sec_num) {
sec_num = Math.floor(sec_num);
let hours = Math.floor(sec_num / 3600);
let minutes = Math.floor((sec_num - (hours * 3600)) / 60);
let seconds = sec_num - (hours * 3600) - (minutes * 60);
if (hours < 10) {
hours = "0" + hours;
}
if (minutes < 10) {
minutes = "0" + minutes;
}
if (seconds < 10) {
seconds = "0" + seconds;
}
return hours + ":" + minutes + ":" + seconds;
}
function debounce(func, wait) {
let timeout;
return function () {
let context = this, args = arguments;
let later = function () {
timeout = null;
func.apply(context, args);
};
clearTimeout(timeout);
timeout = setTimeout(later, wait);
func.apply(context, args);
};
}
function lum(c) {
c = c.substring(1);
let rgb = parseInt(c, 16);
let r = (rgb >> 16) & 0xff;
let g = (rgb >> 8) & 0xff;
let b = (rgb >> 0) & 0xff;
return 0.2126 * r + 0.7152 * g + 0.0722 * b;
}
function strUnescape(str) {
let result = "";
for (let i = 0; i < str.length; i++) {
const c = str[i];
const next = str[i+1];
if (c === ']') {
if (next === ']') {
result += c;
i += 1;
} else {
result += String.fromCharCode(parseInt(str.slice(i, i + 2), 16));
i += 2;
}
} else {
result += c;
}
}
return result;
}
const CONF = new Settings();
const _defaults = {
display: "grid",
fuzzy: true,
highlight: true,
sort: "score",
searchInPath: false,
treemapType: "cascaded",
treemapTiling: "squarify",
treemapGroupingDepth: 3,
treemapColor: "PuBuGn",
treemapSize: "large",
};
function loadSettings() {
CONF.load();
$("#settingDisplay").val(CONF.options.display);
$("#settingFuzzy").prop("checked", CONF.options.fuzzy);
$("#settingHighlight").prop("checked", CONF.options.highlight);
$("#settingSearchInPath").prop("checked", CONF.options.searchInPath);
$("#settingTreemapTiling").val(CONF.options.treemapTiling);
$("#settingTreemapGroupingDepth").val(CONF.options.treemapGroupingDepth);
$("#settingTreemapColor").val(CONF.options.treemapColor);
$("#settingTreemapSize").val(CONF.options.treemapSize);
$("#settingTreemapType").val(CONF.options.treemapType);
}
function Settings() {
this.options = {};
this._onUpdate = function () {
$("#fuzzyToggle").prop("checked", this.options.fuzzy);
};
this.load = function () {
const raw = window.localStorage.getItem("options");
if (raw === null) {
this.options = _defaults;
} else {
const j = JSON.parse(raw);
if (!j || Object.keys(_defaults).some(k => !j.hasOwnProperty(k))) {
this.options = _defaults;
} else {
this.options = j;
}
}
this._onUpdate();
};
this.save = function () {
window.localStorage.setItem("options", JSON.stringify(this.options));
this._onUpdate();
}
}
function updateSettings() {
CONF.options.display = $("#settingDisplay").val();
CONF.options.fuzzy = $("#settingFuzzy").prop("checked");
CONF.options.highlight = $("#settingHighlight").prop("checked");
CONF.options.searchInPath = $("#settingSearchInPath").prop("checked");
CONF.options.treemapTiling = $("#settingTreemapTiling").val();
CONF.options.treemapGroupingDepth = $("#settingTreemapGroupingDepth").val();
CONF.options.treemapColor = $("#settingTreemapColor").val();
CONF.options.treemapSize = $("#settingTreemapSize").val();
CONF.options.treemapType = $("#settingTreemapType").val();
CONF.save();
if (typeof searchDebounced !== "undefined") {
searchDebounced();
}
if (typeof updateStats !== "undefined") {
updateStats();
}
$.toast({
heading: "Settings updated",
text: "Settings saved to browser storage",
stack: 3,
bgColor: "#00a4bc",
textColor: "#fff",
position: 'bottom-right',
hideAfter: 3000,
loaderBg: "#08c7e8",
});
}
jQuery["jsonPost"] = function (url, data) {
return jQuery.ajax({
url: url,
type: "post",
data: JSON.stringify(data),
contentType: "application/json"
}).fail(err => {
showEsError();
console.log(err);
});
};
function toggleTheme() {
if (!document.cookie.includes("sist")) {
document.cookie = "sist=dark;SameSite=Strict";
} else {
document.cookie = "sist=; Max-Age=-99999999;";
}
window.location.reload();
}

View File

@@ -11,27 +11,16 @@
<nav class="navbar navbar-expand-lg">
<a class="navbar-brand" href="/">sist2</a>
<span class="badge badge-pill version">v1.3.0</span>
<span class="badge badge-pill version">2.3.0</span>
<span class="tagline">Lightning-fast file system indexer and search tool </span>
<button style="margin-left: auto" class="btn" type="button" data-toggle="modal" data-target="#settings" onclick="loadSettings()">Settings</button>
<a id="theme" class="btn" title="Toggle theme" href="/">Theme</a>
<a class="btn ml-auto" href="/stats">Stats</a>
<button class="btn" type="button" data-toggle="modal" data-target="#settings" onclick="loadSettings()">Settings</button>
<button class="btn" title="Toggle theme" onclick="toggleTheme()">Theme</button>
</nav>
<div class="container">
<div class="card">
<div class="card-body">
<div class="form-group">
<div class="input-group">
<div class="input-group-prepend">
<button id="pathBarHelper" class="btn btn-outline-secondary" data-toggle="modal" data-target="#pathTreeModal">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 576 512" width="20px"><path d="M288 224h224a32 32 0 0 0 32-32V64a32 32 0 0 0-32-32H400L368 0h-80a32 32 0 0 0-32 32v64H64V8a8 8 0 0 0-8-8H40a8 8 0 0 0-8 8v392a16 16 0 0 0 16 16h208v64a32 32 0 0 0 32 32h224a32 32 0 0 0 32-32V352a32 32 0 0 0-32-32H400l-32-32h-80a32 32 0 0 0-32 32v64H64V128h192v64a32 32 0 0 0 32 32zm0 96h66.74l32 32H512v128H288zm0-288h66.74l32 32H512v128H288z"/></svg>
</button>
</div>
<input id="pathBar" type="search" class="form-control" placeholder="Filter path">
</div>
</div>
<div class="input-group">
<div class="input-group-prepend">
<div class="input-group-text">
@@ -51,7 +40,24 @@
</div>
</div>
<input title="File size" id="sizeSlider" name="size">
<div class="row">
<div class="col">
<input title="File size" id="sizeSlider" name="size" width="100%">
</div>
<div class="col">
<div class="input-group" style="margin-bottom: 0.5em; margin-top: 1em">
<div class="input-group-prepend">
<button id="pathBarHelper" class="btn btn-outline-secondary" data-toggle="modal" data-target="#pathTreeModal">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 576 512" width="20px"><path d="M288 224h224a32 32 0 0 0 32-32V64a32 32 0 0 0-32-32H400L368 0h-80a32 32 0 0 0-32 32v64H64V8a8 8 0 0 0-8-8H40a8 8 0 0 0-8 8v392a16 16 0 0 0 16 16h208v64a32 32 0 0 0 32 32h224a32 32 0 0 0 32-32V352a32 32 0 0 0-32-32H400l-32-32h-80a32 32 0 0 0-32 32v64H64V128h192v64a32 32 0 0 0 32 32zm0 96h66.74l32 32H512v128H288zm0-288h66.74l32 32H512v128H288z"/></svg>
</button>
</div>
<input id="pathBar" type="search" class="form-control" placeholder="Filter path">
</div>
</div>
</div>
<input title="Date filter" id="dateSlider" name="size" width="100%">
<div class="row">
<div class="col">
@@ -181,14 +187,75 @@
<label class="custom-control-label" for="settingFuzzy">Set fuzzy search by default</label>
</div>
<div class="custom-control custom-checkbox">
<input type="checkbox" class="custom-control-input" id="settingSearchInPath">
<label class="custom-control-label" for="settingSearchInPath">Enable matching query against document path</label>
</div>
<label for="settingDisplay">Display</label>
<select id="settingDisplay" class="form-control form-control-sm">
<option value="grid">Grid</option>
<option value="list">List</option>
</select>
<hr/>
<h4>Stats</h4>
<div class="form-group">
<label for="settingTreemapType">Treemap type</label>
<select id="settingTreemapType" class="form-control form-control-sm">
<option value="cascaded">Cascaded</option>
<option value="flat">Flat (compact)</option>
</select>
</div>
<div class="form-group">
<label for="settingTreemapTiling">Treemap tiling</label>
<select id="settingTreemapTiling" class="form-control form-control-sm">
<option value="binary">Binary</option>
<option value="squarify">Squarify</option>
<option value="slice">Slice</option>
<option value="dice">Dice</option>
<option value="sliceDice">Slide & Dice</option>
</select>
</div>
<div class="form-group">
<label for="settingTreemapGroupingDepth">Treemap color grouping depth (flat)</label>
<input type="number" class="form-control" id="settingTreemapGroupingDepth" min="1" max="10">
</div>
<div class="form-group">
<label for="settingTreemapColor">Treemap color (cascaded)</label>
<select id="settingTreemapColor" class="form-control form-control-sm">
<option value="PuBuGn">Purple-Blue-Green</option>
<option value="PuRd">Purple-Red</option>
<option value="PuBu">Purple-Blue</option>
<option value="YlOrBr">Yellow-Orange-Brown</option>
<option value="YlOrRd">Yellow-Orange-Red</option>
<option value="YlGn">Yellow-Green</option>
<option value="YlGnBu">Yellow-Green-Blue</option>
<option value="Plasma">Plasma</option>
<option value="Magma">Magma</option>
<option value="Inferno">Inferno</option>
<option value="Viridis">Viridis</option>
<option value="Turbo">Turbo</option>
</select>
</div>
<div class="form-group">
<label for="settingTreemapSize">Treemap size</label>
<select id="settingTreemapSize" class="form-control form-control-sm">
<option value="small">Small</option>
<option value="medium">Medium</option>
<option value="large">Large</option>
<option value="x-large">X-Large</option>
<option value="xx-large">XX-Large</option>
</select>
</div>
<br>
<button style="float: right" class="btn btn-primary" onclick="updateSettings()">Update settings</button>
<button class="btn btn-primary ml-auto" onclick="updateSettings()">Update settings</button>
</div>
</div>
</div>
@@ -213,6 +280,7 @@
<div id="searchResults"></div>
</div>
<script src="js" type="text/javascript"></script>
<script src="jslib" type="text/javascript"></script>
<script src="jssearch" type="text/javascript"></script>
</body>
</html>

789
src/static/stats.html Normal file
View File

@@ -0,0 +1,789 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<title>sist2 - Stats</title>
<meta name='viewport' content='width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no'/>
<link href="css" rel="stylesheet" type="text/css">
</head>
<body>
<nav class="navbar navbar-expand-lg">
<a class="navbar-brand" href="/">sist2</a>
<span class="badge badge-pill version">2.3.0</span>
<span class="tagline">Lightning-fast file system indexer and search tool </span>
<a style="margin-left: auto" class="btn" href="/">Back</a>
<button class="btn" type="button" data-toggle="modal" data-target="#settings"
onclick="loadSettings()">Settings
</button>
<button class="btn" title="Toggle theme" onclick="toggleTheme()">Theme</button>
</nav>
<div class="container pb-3">
<div class="card">
<div class="card-body">
<label for="indices">Index</label>
<select id="indices" onchange="updateStats()"></select>
</div>
</div>
<div id="treemap-card" class="stats-card">
<button class="btn stats-btn" onclick="fullScreen('treemap-card')">Enlarge</button>
<button class="btn stats-btn" onclick="exportTreemap()">Export</button>
<svg id="treemap"></svg>
</div>
<div id="graphs-card" class="stats-card">
<button class="btn stats-btn" onclick="fullScreen('graphs-card')">Enlarge</button>
<div class="graph">
<svg id="agg_mime_size"></svg>
</div>
<div class="graph">
<svg id="agg_mime_count"></svg>
</div>
<div class="graph">
<svg id="date_histogram"></svg>
</div>
<div class="graph">
<svg id="size_histogram"></svg>
</div>
</div>
</div>
<div class="modal" id="settings" tabindex="-1" role="dialog" aria-labelledby="modal-title" aria-hidden="true">
<div class="modal-dialog modal-dialog-centered" role="document">
<div class="modal-content">
<div class="modal-header">
<h5 class="modal-title">Settings</h5>
<button type="button" class="close" data-dismiss="modal" aria-label="Close">
<span aria-hidden="true">&times;</span>
</button>
</div>
<div class="modal-body">
<div class="custom-control custom-checkbox">
<input type="checkbox" class="custom-control-input" id="settingHighlight">
<label class="custom-control-label" for="settingHighlight">Enable highlighting</label>
</div>
<div class="custom-control custom-checkbox">
<input type="checkbox" class="custom-control-input" id="settingFuzzy">
<label class="custom-control-label" for="settingFuzzy">Set fuzzy search by default</label>
</div>
<div class="custom-control custom-checkbox">
<input type="checkbox" class="custom-control-input" id="settingSearchInPath">
<label class="custom-control-label" for="settingSearchInPath">Enable matching query against document
path</label>
</div>
<label for="settingDisplay">Display</label>
<select id="settingDisplay" class="form-control form-control-sm">
<option value="grid">Grid</option>
<option value="list">List</option>
</select>
<hr/>
<h4>Stats</h4>
<div class="form-group">
<label for="settingTreemapType">Treemap type</label>
<select id="settingTreemapType" class="form-control form-control-sm">
<option value="cascaded">Cascaded</option>
<option value="flat">Flat (compact)</option>
</select>
</div>
<div class="form-group">
<label for="settingTreemapTiling">Treemap tiling</label>
<select id="settingTreemapTiling" class="form-control form-control-sm">
<option value="binary">Binary</option>
<option value="squarify">Squarify</option>
<option value="slice">Slice</option>
<option value="dice">Dice</option>
<option value="sliceDice">Slide & Dice</option>
</select>
</div>
<div class="form-group">
<label for="settingTreemapGroupingDepth">Treemap color grouping depth (flat)</label>
<input type="number" class="form-control" id="settingTreemapGroupingDepth" min="1" max="10">
</div>
<div class="form-group">
<label for="settingTreemapColor">Treemap color (cascaded)</label>
<select id="settingTreemapColor" class="form-control form-control-sm">
<option value="PuBuGn">Purple-Blue-Green</option>
<option value="PuRd">Purple-Red</option>
<option value="PuBu">Purple-Blue</option>
<option value="YlOrBr">Yellow-Orange-Brown</option>
<option value="YlOrRd">Yellow-Orange-Red</option>
<option value="YlGn">Yellow-Green</option>
<option value="YlGnBu">Yellow-Green-Blue</option>
<option value="Plasma">Plasma</option>
<option value="Magma">Magma</option>
<option value="Inferno">Inferno</option>
<option value="Viridis">Viridis</option>
<option value="Turbo">Turbo</option>
</select>
</div>
<div class="form-group">
<label for="settingTreemapSize">Treemap size</label>
<select id="settingTreemapSize" class="form-control form-control-sm">
<option value="small">Small</option>
<option value="medium">Medium</option>
<option value="large">Large</option>
<option value="x-large">X-Large</option>
<option value="xx-large">XX-Large</option>
</select>
</div>
<br>
<button class="btn btn-primary float-right" onclick="updateSettings()">Update settings</button>
</div>
</div>
</div>
</div>
<script src="jslib" type="text/javascript"></script>
<script>
let width;
let height;
let indexMap = {};
const barHeight = 20;
const ordinalColor = d3.scaleOrdinal(d3.schemeCategory10);
const formatSI = d3.format("~s");
const TILING_MODES = {
"squarify": d3.treemapSquarify,
"binary": d3.treemapBinary,
"sliceDice": d3.treemapSliceDice,
"slice": d3.treemapSlice,
"dice": d3.treemapDice,
};
const COLORS = {
"PuBuGn": d3.interpolatePuBuGn,
"PuRd": d3.interpolatePuRd,
"PuBu": d3.interpolatePuBu,
"YlOrBr": d3.interpolateYlOrBr,
"YlOrRd": d3.interpolateYlOrRd,
"YlGn": d3.interpolateYlGn,
"YlGnBu": d3.interpolateYlGnBu,
"Plasma": d3.interpolatePlasma,
"Magma": d3.interpolateMagma,
"Inferno": d3.interpolateInferno,
"Viridis": d3.interpolateViridis,
"Turbo": d3.interpolateTurbo,
};
const SIZES = {
"small": [800, 600],
"medium": [1300, 750],
"large": [1900, 900],
"x-large": [2800, 1700],
"xx-large": [3600, 2000],
};
const fillOpacity = document.cookie.includes("sist") ? 0.9 : 0.6;
const uids = {};
function uid(name) {
let id = uids[name] || 0;
uids[name] = id + 1;
return name + id;
}
const burrow = function (table, addSelfDir) {
const root = {};
table.forEach(row => {
let layer = root;
row.taxonomy.forEach(key => {
layer[key] = key in layer ? layer[key] : {};
layer = layer[key];
});
if (Object.keys(layer).length === 0) {
layer["$size$"] = row.size;
} else if (addSelfDir) {
layer["."] = {
"$size$": row.size,
};
}
});
const descend = function (obj, depth) {
return Object.keys(obj).filter(k => k !== "$size$").map(k => {
const child = {
name: k,
depth: depth,
value: 0,
children: descend(obj[k], depth + 1)
};
if ("$size$" in obj[k]) {
child.value = obj[k]["$size$"];
}
return child;
});
};
return {
name: `[${indexMap[$("#indices").val()]}]`,
children: descend(root, 1),
value: 0,
depth: 0,
}
};
function flatTreemap(data, svg) {
const root = d3.treemap()
.tile(TILING_MODES[CONF.options.treemapTiling])
.size([width, height])
.padding(1)
.round(true)(
d3.hierarchy(data)
.sum(d => d.value)
.sort((a, b) => b.value - a.value)
);
const leaf = svg.selectAll("g")
.data(root.leaves())
.join("g")
.attr("transform", d => `translate(${d.x0},${d.y0})`);
leaf.append("title")
.text(d => `${d.ancestors().reverse().map(d => d.data.name).join("/")}\n${humanFileSize(d.value)}`);
leaf.append("rect")
.attr("id", d => (d.leafUid = uid("leaf")))
.attr("fill", d => {
while (d.depth > CONF.options.treemapGroupingDepth) d = d.parent;
return ordinalColor(d.data.name);
})
.attr("fill-opacity", fillOpacity)
.attr("width", d => d.x1 - d.x0)
.attr("height", d => d.y1 - d.y0);
leaf.append("clipPath")
.attr("id", d => (d.clipUid = uid("clip")))
.append("use")
.attr("href", d => `#${d.leafUid}`);
leaf.append("text")
.attr("clip-path", d => `url(#${d.clipUid})`)
.selectAll("tspan")
.data(d => {
if (d.data.name === ".") {
d = d.parent;
}
return [d.data.name, humanFileSize(d.value)]
})
.join("tspan")
.attr("x", 2)
.attr("y", (d, i, nodes) => `${i === 0 ? 1.1 : 2.3}em`)
.text(d => d);
}
function cascade(root, offset) {
const x = new Map;
const y = new Map;
return root.eachAfter(d => {
if (d.children && d.children.length !== 0) {
x.set(d, 1 + d3.max(d.children, c => c.x1 === d.x1 - offset ? x.get(c) : NaN));
y.set(d, 1 + d3.max(d.children, c => c.y1 === d.y1 - offset ? y.get(c) : NaN));
} else {
x.set(d, 0);
y.set(d, 0);
}
}).eachBefore(d => {
d.x1 -= 2 * offset * x.get(d);
d.y1 -= 2 * offset * y.get(d);
});
}
function cascadeTreemap(data, svg) {
const root = cascade(
d3.treemap()
.size([width, height])
.tile(TILING_MODES[CONF.options.treemapTiling])
.paddingOuter(3)
.paddingTop(16)
.paddingInner(1)
.round(true)(
d3.hierarchy(data)
.sum(d => d.value)
.sort((a, b) => b.value - a.value)
),
3 // treemap.paddingOuter
);
const maxDepth = Math.max(...root.descendants().map(d => d.depth));
const color = d3.scaleSequential([maxDepth, -1], COLORS[CONF.options.treemapColor]);
svg.append("filter")
.attr("id", "shadow")
.append("feDropShadow")
.attr("flood-opacity", 0.3)
.attr("dx", 0)
.attr("stdDeviation", 3);
const node = svg.selectAll("g")
.data(
d3.nest()
.key(d => d.depth).sortKeys(d3.ascending)
.entries(root.descendants())
)
.join("g")
.attr("filter", "url(#shadow)")
.selectAll("g")
.data(d => d.values)
.join("g")
.attr("transform", d => `translate(${d.x0},${d.y0})`);
node.append("title")
.text(d => `${d.ancestors().reverse().splice(1).map(d => d.data.name).join("/")}\n${humanFileSize(d.value)}`);
node.append("rect")
.attr("id", d => (d.nodeUid = uid("node")))
.attr("fill", d => color(d.depth))
.attr("width", d => d.x1 - d.x0)
.attr("height", d => d.y1 - d.y0);
node.append("clipPath")
.attr("id", d => (d.clipUid = uid("clip")))
.append("use")
.attr("href", d => `#${d.nodeUid}`);
node.append("text")
.attr("fill", d => d3.hsl(color(d.depth)).l > .5 ? "#333" : "#eee")
.attr("clip-path", d => `url(#${d.clipUid})`)
.selectAll("tspan")
.data(d => [d.data.name, humanFileSize(d.value)])
.join("tspan")
.text(d => d);
node.filter(d => d.children).selectAll("tspan")
.attr("dx", 3)
.attr("y", 13);
node.filter(d => !d.children).selectAll("tspan")
.attr("x", 3)
.attr("y", (d, i, nodes) => `${i === 0 ? 1.1 : 2.3}em`);
}
function mimeBarSize(data, svg) {
const margin = {
top: 50,
right: 0,
bottom: 10,
left: Math.max(
d3.max(data.sort((a, b) => b.count - a.count).slice(0, 15), d => d.mime.length) * 6,
d3.max(data.sort((a, b) => b.size - a.size).slice(0, 15), d => d.mime.length) * 6,
)
};
data.forEach(d => {
d.name = d.mime;
d.value = Number(d.size);
});
data = data.sort((a, b) => b.value - a.value).slice(0, 15);
const width = 550;
const height = Math.ceil((data.length + 0.1) * barHeight) + margin.top + margin.bottom;
svg.selectAll("*").remove();
svg.attr("viewBox", [0, 0, width, height]);
const y = d3.scaleBand()
.domain(d3.range(data.length))
.rangeRound([margin.top, height - margin.bottom]);
const x = d3.scaleLinear()
.domain([0, d3.max(data, d => d.value)])
.range([margin.left, width - margin.right]);
svg.append("g")
.attr("fill-opacity", fillOpacity)
.selectAll("rect")
.data(data)
.join("rect")
.attr("fill", d => ordinalColor(d.name))
.attr("x", x(0))
.attr("y", (d, i) => y(i))
.attr("width", d => x(d.value) - x(0))
.attr("height", y.bandwidth())
.append("title")
.text(d => formatSI(d.value));
svg.append("g")
.attr("transform", `translate(0,${margin.top})`)
.call(d3.axisTop(x).ticks(width / 80, data.format).tickFormat(formatSI))
.call(g => g.select(".domain").remove());
svg.append("g")
.attr("transform", `translate(${margin.left},0)`)
.call(d3.axisLeft(y).tickFormat(i => data[i].name).tickSizeOuter(0));
svg.append("text")
.attr("x", (width / 2))
.attr("y", (margin.top / 2))
.attr("text-anchor", "middle")
.style("font-size", "16px")
.text("Size distribution by MIME type");
}
function mimeBarCount(data, svg) {
const margin = {
top: 50,
right: 0,
bottom: 10,
left: Math.max(
d3.max(data.sort((a, b) => b.count - a.count).slice(0, 15), d => d.mime.length) * 6,
d3.max(data.sort((a, b) => b.size - a.size).slice(0, 15), d => d.mime.length) * 6,
)
};
data.forEach(d => {
d.name = d.mime;
d.value = Number(d.count);
});
data = data.sort((a, b) => b.value - a.value).slice(0, 15);
const width = 550;
const height = Math.ceil((data.length + 0.1) * barHeight) + margin.top + margin.bottom;
svg.selectAll("*").remove();
svg.attr("viewBox", [0, 0, width, height]);
const y = d3.scaleBand()
.domain(d3.range(data.length))
.rangeRound([margin.top, height - margin.bottom]);
const x = d3.scaleLinear()
.domain([0, d3.max(data, d => d.value)])
.range([margin.left, width - margin.right]);
svg.append("g")
.attr("fill-opacity", fillOpacity)
.selectAll("rect")
.data(data)
.join("rect")
.attr("fill", d => ordinalColor(d.name))
.attr("x", x(0))
.attr("y", (d, i) => y(i))
.attr("width", d => x(d.value) - x(0))
.attr("height", y.bandwidth())
.append("title")
.text(d => d3.format(",")(d.value));
svg.append("g")
.attr("transform", `translate(0,${margin.top})`)
.call(d3.axisTop(x).ticks(width / 80, data.format).tickFormat(formatSI))
.call(g => g.select(".domain").remove());
svg.append("g")
.attr("transform", `translate(${margin.left},0)`)
.call(d3.axisLeft(y).tickFormat(i => data[i].name).tickSizeOuter(0));
svg.append("text")
.attr("x", (width / 2))
.attr("y", (margin.top / 2))
.attr("text-anchor", "middle")
.style("font-size", "16px")
.text("File count distribution by MIME type");
}
function dateHistogram(data, svg) {
let bins = data.map(d => {
return {
length: Number(d.count),
x0: Number(d.bucket),
x1: Number(d.bucket) + 2629800
}
});
bins.sort((a, b) => a.length - b.length);
const margin = {
top: 50,
right: 20,
bottom: 70,
left: 40
};
const thresh = d3.quantile(bins, 0.9, d => d.length);
bins = bins.filter(d => d.length > thresh);
const width = 550;
const height = 450;
svg.selectAll("*").remove();
svg.attr("viewBox", [0, 0, width, height]);
const y = d3.scaleLinear()
.domain([0, d3.max(bins, d => d.length)]).nice()
.range([height - margin.bottom, margin.top]);
const x = d3.scaleLinear()
.domain(d3.extent(bins, d => d.x0)).nice()
.range([margin.left, width - margin.right]);
svg.append("g")
.attr("fill", "steelblue")
.selectAll("rect")
.data(bins)
.join("rect")
.attr("x", d => x(d.x0) + 1)
.attr("width", d => Math.max(1, x(d.x1) - x(d.x0) - 1))
.attr("y", d => y(d.length))
.attr("height", d => y(0) - y(d.length))
.call(g => g
.append("title")
.text(d => d.length)
);
svg.append("g")
.attr("transform", `translate(0,${height - margin.bottom})`)
.call(
d3.axisBottom(x)
.ticks(width / 30)
.tickSizeOuter(0)
.tickFormat(t => d3.timeFormat("%Y-%m-%d")(d3.utcParse("%s")(t)))
)
.call(g => g
.selectAll("text")
.style("text-anchor", "end")
.attr("dx", "-.8em")
.attr("dy", ".15em")
.attr("transform", "rotate(-65)")
)
.call(g => g.append("text")
.attr("x", width - margin.right)
.attr("y", -4)
.attr("fill", "currentColor")
.attr("font-weight", "bold")
.attr("text-anchor", "end")
.text("mtime")
);
svg.append("g")
.attr("transform", `translate(${margin.left},0)`)
.call(
d3.axisLeft(y)
.ticks(height / 40)
.tickFormat(t => formatSI(t))
)
.call(g => g.select(".domain").remove())
.call(g => g.select(".tick:last-of-type text").clone()
.attr("x", 4)
.attr("text-anchor", "start")
.attr("font-weight", "bold")
.text("File count"));
svg.append("text")
.attr("x", (width / 2))
.attr("y", (margin.top / 2))
.attr("text-anchor", "middle")
.style("font-size", "16px")
.text("File modification time distribution");
}
function sizeHistogram(data, svg) {
let bins = data.map(d => {
return {
length: Number(d.count),
x0: Number(d.bucket),
x1: Number(d.bucket) + (5 * 1024 * 1024)
}
});
bins = bins.sort((a, b) => b.length - a.length).slice(0, 25);
const margin = {
top: 50,
right: 20,
bottom: 70,
left: 40
};
const width = 550;
const height = 450;
svg.selectAll("*").remove();
svg.attr("viewBox", [0, 0, width, height]);
const y = d3.scaleLinear()
.domain([0, d3.max(bins, d => d.length)])
.range([height - margin.bottom, margin.top]);
const x = d3.scaleLinear()
.domain(d3.extent(bins, d => d.x0)).nice()
.range([margin.left, width - margin.right]);
svg.append("g")
.attr("fill", "steelblue")
.selectAll("rect")
.data(bins)
.join("rect")
.attr("x", d => x(d.x0) + 1)
.attr("width", d => Math.max(1, x(d.x1) - x(d.x0) - 1))
.attr("y", d => y(d.length))
.attr("height", d => y(0) - y(d.length))
.call(g => g
.append("title")
.text(d => d.length)
);
svg.append("g")
.attr("transform", `translate(0,${height - margin.bottom})`)
.call(
d3.axisBottom(x)
.ticks(width / 30)
.tickSizeOuter(0)
.tickFormat(formatSI)
)
.call(g => g
.selectAll("text")
.style("text-anchor", "end")
.attr("dx", "-.8em")
.attr("dy", ".15em")
.attr("transform", "rotate(-65)")
)
.call(g => g.append("text")
.attr("x", width - margin.right)
.attr("y", -4)
.attr("fill", "currentColor")
.attr("font-weight", "bold")
.attr("text-anchor", "end")
.text("size (bytes)")
);
svg.append("g")
.attr("transform", `translate(${margin.left},0)`)
.call(
d3.axisLeft(y)
.ticks(height / 40)
.tickFormat(t => formatSI(t))
)
.call(g => g.select(".domain").remove())
.call(g => g.select(".tick:last-of-type text").clone()
.attr("x", 4)
.attr("text-anchor", "start")
.attr("font-weight", "bold")
.text("File count"));
svg.append("text")
.attr("x", (width / 2))
.attr("y", (margin.top / 2))
.attr("text-anchor", "middle")
.style("font-size", "16px")
.text("File size distribution");
}
function updateStats() {
width = SIZES[CONF.options.treemapSize][0];
height = SIZES[CONF.options.treemapSize][1];
const treemapSvg = d3.select("#treemap");
const mimeSvgSize = d3.select("#agg_mime_size");
const mimeSvgCount = d3.select("#agg_mime_count");
const dateHistogramSvg = d3.select("#date_histogram");
const sizeHistogramSvg = d3.select("#size_histogram");
const indexId = $("#indices").val();
d3.csv(`/s/${indexId}/1`).then(tabularData => {
tabularData.forEach(row => {
row.taxonomy = row.path.split("/");
row.size = Number(row.size);
});
if (CONF.options.treemapType === "cascaded") {
const data = burrow(tabularData, false);
cascadeTreemap(data, treemapSvg);
} else {
const data = burrow(tabularData.sort((a, b) => b.taxonomy.length - a.taxonomy.length), true);
flatTreemap(data, treemapSvg);
}
});
d3.csv(`/s/${indexId}/2`).then(tabularData => {
mimeBarSize(tabularData.slice(), mimeSvgSize);
mimeBarCount(tabularData.slice(), mimeSvgCount);
});
d3.csv(`/s/${indexId}/3`).then(tabularData => {
sizeHistogram(tabularData, sizeHistogramSvg);
});
d3.csv(`/s/${indexId}/4`).then(tabularData => {
dateHistogram(tabularData, dateHistogramSvg);
});
treemapSvg.selectAll("*").remove();
treemapSvg.attr("viewBox", [0, 0, width, height])
.attr("xmlns", "http://www.w3.org/2000/svg")
.attr("xmlns:xlink", "http://www.w3.org/1999/xlink")
.attr("version", "1.1")
.style("overflow", "visible")
.style("font", "10px sans-serif");
}
window.onload = function () {
CONF.load();
$.jsonPost("i").then(resp => {
const select = $("#indices");
const urlIndices = (new URLSearchParams(location.search)).get("i");
resp["indices"].forEach(idx => {
indexMap[idx.id] = idx.name;
select.append($("<option>")
.attr("value", idx.id)
.append(idx.name));
if (urlIndices && urlIndices.split(",").indexOf(idx.name) !== -1) {
select.select(idx.name);
}
});
updateStats();
});
};
function fullScreen(selector) {
const card = document.getElementById(selector);
card.classList.toggle("full-screen");
}
function exportTreemap() {
domtoimage.toBlob(document.getElementById("treemap"), {width: width, height: height})
.then(function (blob) {
let a = document.createElement("a");
let url = URL.createObjectURL(blob);
a.href = url;
a.download = `${indexMap[$("#indices").val()]}_treemap.png`;
document.body.appendChild(a);
a.click();
setTimeout(function() {
document.body.removeChild(a);
window.URL.revokeObjectURL(url);
}, 0);
});
}
</script>
</body>
</html>

347
src/stats.c Normal file
View File

@@ -0,0 +1,347 @@
#include "sist.h"
#include "io/serialize.h"
#include "ctx.h"
#include <glib.h>
static GHashTable *FlatTree;
static GHashTable *BufferTable;
static GHashTable *AggMime;
static GHashTable *AggSize;
static GHashTable *AggDate;
#define SIZE_BUCKET (long)(5 * 1024 * 1024)
#define DATE_BUCKET (long)(2629800)
static long TotalSize = 0;
static long DocumentCount = 0;
typedef struct {
long size;
long count;
} agg_t;
void fill_tables(cJSON *document, UNUSED(const char uuid_str[UUID_STR_LEN])) {
const char *json_path = cJSON_GetObjectItem(document, "path")->valuestring;
char *path = malloc(strlen(json_path) + 1);
strcpy(path, json_path);
const char *json_mime = cJSON_GetObjectItem(document, "mime")->valuestring;
char *mime;
if (json_mime == NULL) {
mime = NULL;
} else {
mime = malloc(strlen(json_mime) + 1);
strcpy(mime, json_mime);
}
long size = (long) cJSON_GetObjectItem(document, "size")->valuedouble;
int mtime = cJSON_GetObjectItem(document, "mtime")->valueint;
// treemap
void *existing_path = g_hash_table_lookup(FlatTree, path);
if (existing_path == NULL) {
g_hash_table_insert(FlatTree, path, (gpointer) size);
} else {
g_hash_table_replace(FlatTree, path, (gpointer) ((long) existing_path + size));
}
// mime agg
if (mime != NULL) {
agg_t *orig_agg = g_hash_table_lookup(AggMime, mime);
if (orig_agg == NULL) {
agg_t *agg = malloc(sizeof(agg_t));
agg->size = size;
agg->count = 1;
g_hash_table_insert(AggMime, mime, agg);
} else {
orig_agg->size += size;
orig_agg->count += 1;
free(mime);
}
}
// size agg
long size_bucket = size - (size % SIZE_BUCKET);
agg_t *orig_agg = g_hash_table_lookup(AggSize, (gpointer) size_bucket);
if (orig_agg == NULL) {
agg_t *agg = malloc(sizeof(agg_t));
agg->size = size;
agg->count = 1;
g_hash_table_insert(AggSize, (gpointer) size_bucket, agg);
} else {
orig_agg->count += 1;
orig_agg->size += size;
}
// date agg
long date_bucket = mtime - (mtime % DATE_BUCKET);
orig_agg = g_hash_table_lookup(AggDate, (gpointer) date_bucket);
if (orig_agg == NULL) {
agg_t *agg = malloc(sizeof(agg_t));
agg->size = size;
agg->count = 1;
g_hash_table_insert(AggDate, (gpointer) date_bucket, agg);
} else {
orig_agg->count += 1;
orig_agg->size += size;
}
TotalSize += size;
DocumentCount += 1;
}
void read_index_into_tables(index_t *index) {
DIR *dir = opendir(index->path);
struct dirent *de;
while ((de = readdir(dir)) != NULL) {
if (strncmp(de->d_name, "_index_", sizeof("_index_") - 1) == 0) {
char file_path[PATH_MAX];
snprintf(file_path, PATH_MAX, "%s/%s", index->path, de->d_name);
read_index(file_path, index->desc.uuid, index->desc.type, fill_tables);
}
}
closedir(dir);
}
static size_t rfind(const char *str, int c) {
for (int i = (int)strlen(str); i >= 0; i--) {
if (str[i] == c) {
return i;
}
}
return -1;
}
int merge_up(double thresh) {
long min_size = (long) (thresh * (double) TotalSize);
int count = 0;
GHashTableIter iter;
g_hash_table_iter_init(&iter, FlatTree);
void *key;
void *value;
while (g_hash_table_iter_next(&iter, &key, &value)) {
long size = (long) value;
if (size < min_size) {
int stop = rfind(key, '/');
if (stop == -1) {
stop = 0;
}
char *parent = malloc(stop + 1);
strncpy(parent, key, stop);
*(parent + stop) = '\0';
void *existing_parent = g_hash_table_lookup(FlatTree, parent);
if (existing_parent == NULL) {
void *existing_parent2_key;
void *existing_parent2_val;
int found = g_hash_table_lookup_extended(BufferTable, parent, &existing_parent2_key,
&existing_parent2_val);
if (!found) {
g_hash_table_insert(BufferTable, parent, value);
} else {
g_hash_table_replace(BufferTable, parent, (gpointer) ((long) existing_parent2_val + size));
free(existing_parent2_key);
}
} else {
g_hash_table_replace(FlatTree, parent, (gpointer) ((long) existing_parent + size));
}
g_hash_table_iter_remove(&iter);
count += 1;
}
}
g_hash_table_iter_init(&iter, BufferTable);
while (g_hash_table_iter_next(&iter, &key, &value)) {
g_hash_table_insert(FlatTree, key, value);
g_hash_table_iter_remove(&iter);
}
int size = g_hash_table_size(FlatTree);
LOG_DEBUGF("stats.h", "Merge up iteration (%d merged, %d in tree)", count, size)
return count;
}
/**
* Assumes out is at at least PATH_MAX *4
*/
void csv_escape(char *dst, const char *str) {
const char *ptr = str;
char *out = dst;
if (rfind(str, ',') == -1 && rfind(str, '"') == -1) {
strcpy(dst, str);
return;
}
while (*ptr++ != 0) {
char c = *ptr;
if (c == '"') {
*out++ = '"';
*out++ = '"';
} else {
*out++ = c;
}
}
}
int open_or_exit(const char *path) {
int fd = open(path, O_CREAT | O_WRONLY, S_IRUSR | S_IWUSR);
if (fd < 0) {
LOG_FATALF("stats.c", "Error while creating file: %s [%d]\n", strerror(errno), errno)
}
return fd;
}
#define TREEMAP_CSV_HEADER "path,size"
#define MIME_AGG_CSV_HEADER "mime,size,count"
#define SIZE_AGG_CSV_HEADER "bucket,size,count"
#define DATE_AGG_CSV_HEADER "bucket,size,count"
void write_treemap_csv(double thresh, const char *out_path) {
void *key;
void *value;
long min_size = (long) (thresh * (double) TotalSize);
int fd = open_or_exit(out_path);
int ret = write(fd, TREEMAP_CSV_HEADER, sizeof(TREEMAP_CSV_HEADER) - 1);
if (ret == -1) {
LOG_FATALF("stats.c", "Write error: %s", strerror(errno))
}
GHashTableIter iter;
g_hash_table_iter_init(&iter, FlatTree);
while (g_hash_table_iter_next(&iter, &key, &value)) {
long size = (long) value;
if (size >= min_size) {
char path_buf[PATH_MAX * 4];
char buf[PATH_MAX * 4 + 16];
csv_escape(path_buf, key);
size_t written = sprintf(buf, "\n%s,%ld", path_buf, (long) value);
ret = write(fd, buf, written);
if (ret == -1) {
LOG_FATALF("stats.c", "Write error: %s", strerror(errno))
}
}
}
close(fd);
}
void write_agg_csv_str(const char *out_path, const char *header, GHashTable *table) {
void *key;
void *value;
char buf[4096];
int fd = open_or_exit(out_path);
int ret = write(fd, header, strlen(header));
if (ret == -1) {
LOG_FATALF("stats.c", "Write error: %s", strerror(errno))
}
GHashTableIter iter;
g_hash_table_iter_init(&iter, table);
while (g_hash_table_iter_next(&iter, &key, &value)) {
agg_t *agg = value;
size_t written = sprintf(buf, "\n%s,%ld,%ld", (const char*)key, agg->size, agg->count);
ret = write(fd, buf, written);
if (ret == -1) {
LOG_FATALF("stats.c", "Write error: %s", strerror(errno))
}
}
close(fd);
}
void write_agg_csv_long(const char *out_path, const char *header, GHashTable *table) {
void *key;
void *value;
char buf[4096];
int fd = open_or_exit(out_path);
int ret = write(fd, header, strlen(header));
if (ret == -1) {
LOG_FATALF("stats.c", "Write error: %s", strerror(errno))
}
GHashTableIter iter;
g_hash_table_iter_init(&iter, table);
while (g_hash_table_iter_next(&iter, &key, &value)) {
agg_t *agg = value;
size_t written = sprintf(buf, "\n%ld,%ld,%ld", (long)key, agg->size, agg->count);
ret = write(fd, buf, written);
if (ret == -1) {
LOG_FATALF("stats.c", "Write error: %s", strerror(errno))
}
}
close(fd);
}
int generate_stats(index_t *index, const double threshold, const char *out_prefix) {
FlatTree = g_hash_table_new_full(g_str_hash, g_str_equal, free, NULL);
BufferTable = g_hash_table_new(g_str_hash, g_str_equal);
AggMime = g_hash_table_new_full(g_str_hash, g_str_equal, free, free);
AggSize = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, free);
AggDate = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, free);
LOG_INFO("stats.c", "Generating stats...")
read_index_into_tables(index);
LOG_DEBUG("stats.c", "Read index into tables")
LOG_DEBUGF("stats.c", "Total size is %ld", TotalSize)
LOG_DEBUGF("stats.c", "Document count is %ld", DocumentCount)
LOG_DEBUGF("stats.c", "Merging small directories upwards with a threshold of %f%%", threshold * 100)
while (merge_up(threshold) > 100) {}
char tmp[PATH_MAX];
strncpy(tmp, out_prefix, sizeof(tmp));
strcat(tmp, "treemap.csv");
write_treemap_csv(threshold, tmp);
strncpy(tmp, out_prefix, sizeof(tmp));
strcat(tmp, "mime_agg.csv");
write_agg_csv_str(tmp, MIME_AGG_CSV_HEADER, AggMime);
strncpy(tmp, out_prefix, sizeof(tmp));
strcat(tmp, "size_agg.csv");
write_agg_csv_long(tmp, SIZE_AGG_CSV_HEADER, AggSize);
strncpy(tmp, out_prefix, sizeof(tmp));
strcat(tmp, "date_agg.csv");
write_agg_csv_long(tmp, DATE_AGG_CSV_HEADER, AggDate);
g_hash_table_remove_all(FlatTree);
g_hash_table_destroy(FlatTree);
g_hash_table_destroy(BufferTable);
g_hash_table_remove_all(AggMime);
g_hash_table_destroy(AggMime);
g_hash_table_remove_all(AggSize);
g_hash_table_destroy(AggSize);
g_hash_table_remove_all(AggDate);
g_hash_table_destroy(AggDate);
return 0;
}

6
src/stats.h Normal file
View File

@@ -0,0 +1,6 @@
#ifndef SIST2_STATS_H
#define SIST2_STATS_H
int generate_stats(index_t *index, double threshold, const char* out_prefix);
#endif

Some files were not shown because too many files have changed in this diff Show More