diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..431b3ae --- /dev/null +++ b/.dockerignore @@ -0,0 +1,24 @@ +.idea +*/thumbs +*.cbp +CMakeCache.txt +CMakeFiles +cmake-build-debug +cmake_install.cmake +Makefile +*.out +LOG +sist2* +index.sist2/ +bundle*.css +bundle.js +**/*.a +**/vgcore.* +build/ +.git/ +third-party/libscan/libscan-test-files/ +**/ext_ffmpeg +**/ext_libmobi +**/scan_a_test +Dockerfile +*.idx/ diff --git a/.drone.yml b/.drone.yml index b56731d..1b2ed2d 100644 --- a/.drone.yml +++ b/.drone.yml @@ -8,7 +8,7 @@ platform: steps: - name: build - image: simon987/ubuntu_ci + image: simon987/sist2-build commands: - ./ci/build.sh - name: docker @@ -19,8 +19,8 @@ steps: password: from_secret: DOCKER_PASSWORD repo: simon987/sist2 - context: ./Docker/ - dockerfile: ./Docker/Dockerfile + context: ./ + dockerfile: ./Dockerfile auto_tag: true auto_tag_suffix: x64-linux when: @@ -41,32 +41,32 @@ steps: source: - ./VERSION - ./sist2-x64-linux - - ./sist2-x64-linux-debug.tar.gz + - ./sist2-x64-linux-debug ---- -kind: pipeline -type: docker -name: arm64 - -platform: - arch: arm64 - -steps: - - name: build - image: simon987/ubuntu_ci_arm - commands: - - ./ci/build_arm64.sh - - name: scp files - image: appleboy/drone-scp - settings: - host: - from_secret: SSH_HOST - port: - from_secret: SSH_PORT - user: - from_secret: SSH_USER - key: - from_secret: SSH_KEY - target: /files/sist2/${DRONE_REPO_OWNER}_${DRONE_REPO_NAME}/arm_${DRONE_BRANCH}_${DRONE_BUILD_NUMBER}_${DRONE_COMMIT}/ - source: - - ./sist2-arm64-linux +#--- +#kind: pipeline +#type: docker +#name: arm64 +# +#platform: +# arch: arm64 +# +#steps: +# - name: build +# image: simon987/ubuntu_ci_arm +# commands: +# - ./ci/build_arm64.sh +# - name: scp files +# image: appleboy/drone-scp +# settings: +# host: +# from_secret: SSH_HOST +# port: +# from_secret: SSH_PORT +# user: +# from_secret: SSH_USER +# key: +# from_secret: SSH_KEY +# target: /files/sist2/${DRONE_REPO_OWNER}_${DRONE_REPO_NAME}/arm_${DRONE_BRANCH}_${DRONE_BUILD_NUMBER}_${DRONE_COMMIT}/ +# source: +# - ./sist2-arm64-linux diff --git a/.gitignore b/.gitignore index eb4db54..471fda9 100644 --- a/.gitignore +++ b/.gitignore @@ -16,3 +16,4 @@ bundle.js vgcore.* build/ third-party/ +*.idx/ \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index 5a02c37..34516e6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -5,7 +5,7 @@ project(sist2 C) option(SIST_DEBUG "Build a debug executable" on) -set(BUILD_TESTS off) +set(BUILD_TESTS on) add_subdirectory(third-party/libscan) set(ARGPARSE_SHARED off) add_subdirectory(third-party/argparse) @@ -36,14 +36,15 @@ add_executable(sist2 target_link_directories(sist2 PRIVATE BEFORE ${_VCPKG_INSTALLED_DIR}/${VCPKG_TARGET_TRIPLET}/lib/) set(CMAKE_FIND_LIBRARY_SUFFIXES .a .lib) +find_package(PkgConfig REQUIRED) + +pkg_search_module(GLIB REQUIRED glib-2.0) + find_package(lmdb CONFIG REQUIRED) find_package(cJSON CONFIG REQUIRED) -find_package(unofficial-glib CONFIG REQUIRED) find_package(unofficial-mongoose CONFIG REQUIRED) find_package(CURL CONFIG REQUIRED) -#find_package(OpenSSL REQUIRED) - target_include_directories( sist2 PUBLIC @@ -51,6 +52,7 @@ target_include_directories( ${CMAKE_SOURCE_DIR}/third-party/utf8.h/ ${CMAKE_SOURCE_DIR}/third-party/libscan/ ${CMAKE_SOURCE_DIR}/ + ${GLIB_INCLUDE_DIRS} ) target_compile_options( @@ -103,7 +105,7 @@ target_link_libraries( lmdb cjson argparse - unofficial::glib::glib + ${GLIB_LDFLAGS} unofficial::mongoose::mongoose CURL::libcurl diff --git a/DockerArm64/Dockerfile b/DockerArm64/Dockerfile deleted file mode 100644 index bb910ec..0000000 --- a/DockerArm64/Dockerfile +++ /dev/null @@ -1,22 +0,0 @@ -FROM ubuntu:19.10 -MAINTAINER simon987 - -RUN apt update -RUN apt install -y libglib2.0-0 libcurl4 libmagic1 libharfbuzz-bin libopenjp2-7 libarchive13 liblzma5 libzstd1 liblz4-1 \ - curl libtiff5 libpng16-16 libpcre3 - -RUN mkdir -p /usr/share/tessdata && \ - cd /usr/share/tessdata/ && \ - curl -o /usr/share/tessdata/hin.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/hin.traineddata &&\ - curl -o /usr/share/tessdata/jpn.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/jpn.traineddata &&\ - curl -o /usr/share/tessdata/eng.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/eng.traineddata &&\ - curl -o /usr/share/tessdata/fra.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/fra.traineddata &&\ - curl -o /usr/share/tessdata/rus.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/rus.traineddata &&\ - curl -o /usr/share/tessdata/spa.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/spa.traineddata && ls -lh - -ADD sist2_arm64 /root/sist2 - -ENV LANG C.UTF-8 -ENV LC_ALL C.UTF-8 - -ENTRYPOINT ["/root/sist2"] diff --git a/DockerArm64/build.sh b/DockerArm64/build.sh deleted file mode 100755 index 55d30ab..0000000 --- a/DockerArm64/build.sh +++ /dev/null @@ -1,13 +0,0 @@ -rm ./sist2_arm64 -cp ../sist2_arm64.gz . -gzip -d sist2_arm64.gz - -version=$(./sist2_arm64 --version) - -echo "Version ${version}" -docker build . -t simon987/sist2-arm64:"${version}" -t simon987/sist2-arm64:latest - -docker push simon987/sist2-arm64:"${version}" -docker push simon987/sist2-arm64:latest - -docker run --rm simon987/sist2-arm64 -v \ No newline at end of file diff --git a/Docker/Dockerfile b/Dockerfile similarity index 76% rename from Docker/Dockerfile rename to Dockerfile index 08f6e12..4b525cd 100644 --- a/Docker/Dockerfile +++ b/Dockerfile @@ -1,6 +1,14 @@ -FROM ubuntu:20.04 +FROM simon987/sist2-build as build MAINTAINER simon987 +WORKDIR /build/ +ADD . /build/ +RUN cmake -DSIST_DEBUG=off -DBUILD_TESTS=off -DCMAKE_TOOLCHAIN_FILE=/vcpkg/scripts/buildsystems/vcpkg.cmake . +RUN make -j$(nproc) +RUN strip sist2 + +FROM ubuntu:20.10 + RUN apt update RUN apt install -y libglib2.0-0 libcurl4 libmagic1 libharfbuzz-bin libopenjp2-7 libarchive13 liblzma5 libzstd1 liblz4-1 \ curl libtiff5 libpng16-16 libpcre3 @@ -12,9 +20,9 @@ RUN mkdir -p /usr/share/tessdata && \ curl -o /usr/share/tessdata/eng.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/eng.traineddata &&\ curl -o /usr/share/tessdata/fra.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/fra.traineddata &&\ curl -o /usr/share/tessdata/rus.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/rus.traineddata &&\ - curl -o /usr/share/tessdata/spa.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/spa.traineddata && ls -lh + curl -o /usr/share/tessdata/spa.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/spa.traineddata -ADD sist2 /root/sist2 +COPY --from=build /build/sist2 /root/sist2 ENV LANG C.UTF-8 ENV LC_ALL C.UTF-8 diff --git a/Dockerfile.arm64 b/Dockerfile.arm64 new file mode 100644 index 0000000..f87f5c1 --- /dev/null +++ b/Dockerfile.arm64 @@ -0,0 +1 @@ +# TODO \ No newline at end of file diff --git a/README.md b/README.md index 12026b4..0ff5e9e 100644 --- a/README.md +++ b/README.md @@ -25,14 +25,12 @@ sist2 (Simple incremental search tool) * OCR support with tesseract \*\*\* * Stats page & disk utilisation visualization - \* See [format support](#format-support) \*\* See [Archive files](#archive-files) -\*\*\* See [OCR](#ocr) +\*\*\* See [OCR](#ocr) ![stats](docs/stats.png) - ## Getting Started 1. Have an Elasticsearch (>= 6.X.X) instance running @@ -56,10 +54,8 @@ sist2 (Simple incremental search tool) 1. *(or)* `docker pull simon987/sist2:latest` 1. See [Usage guide](docs/USAGE.md) - - -\* *Windows users*: **sist2** runs under [WSL](https://en.wikipedia.org/wiki/Windows_Subsystem_for_Linux) +\* *Windows users*: **sist2** runs under [WSL](https://en.wikipedia.org/wiki/Windows_Subsystem_for_Linux) ## Example usage @@ -69,7 +65,6 @@ See [Usage guide](docs/USAGE.md) for more details 1. Push index to Elasticsearch: `sist2 index ./docs_idx` 1. Start web interface: `sist2 web ./docs_idx` - ## Format support File type | Library | Content | Thumbnail | Metadata @@ -78,8 +73,8 @@ pdf,xps,fb2,epub | MuPDF | text+ocr | yes | author, title | cbz,cbr | *(none)* | - | yes | - | `audio/*` | ffmpeg | - | yes | ID3 tags | `video/*` | ffmpeg | - | yes | title, comment, artist | -`image/*` | ffmpeg | - | yes | [Common EXIF tags](https://github.com/simon987/sist2/blob/efdde2734eca9b14a54f84568863b7ffd59bdba3/src/parsing/media.c#L190) | -raw, rw2, dng, cr2, crw, dcr, k25, kdc, mrw, pef, xf3, arw, sr2, srf, erf | LibRaw | - | yes | Common EXIF tags | +`image/*` | ffmpeg | - | yes | [Common EXIF tags](https://github.com/simon987/sist2/blob/efdde2734eca9b14a54f84568863b7ffd59bdba3/src/parsing/media.c#L190), GPS tags | +raw, rw2, dng, cr2, crw, dcr, k25, kdc, mrw, pef, xf3, arw, sr2, srf, erf | LibRaw | - | yes | Common EXIF tags, GPS tags | ttf,ttc,cff,woff,fnt,otf | Freetype2 | - | yes, `bmp` | Name & style | `text/plain` | *(none)* | yes | no | - | html, xml | *(none)* | yes | no | - | @@ -89,40 +84,39 @@ doc (MS Word 97-2003) | antiword | yes | yes | author, title | mobi, azw, azw3 | libmobi | yes | no | author, title | \* *See [Archive files](#archive-files)* - + ### Archive files -**sist2** will scan files stored into archive files (zip, tar, 7z...) as if -they were directly in the file system. Recursive (archives inside archives) + +**sist2** will scan files stored into archive files (zip, tar, 7z...) as if they were directly in the file system. +Recursive (archives inside archives) scan is also supported. **Limitations**: -* Support for parsing media files with formats that require *seek* (e.g. `.gif`, `.mp4` w/ fragmented metadata etc.) + +* Support for parsing media files with formats that require *seek* (e.g. `.gif`, `.mp4` w/ fragmented metadata etc.) is limitted (see `--mem-buffer` option) * Archive files are scanned sequentially, by a single thread. On systems where -**sist2** is not I/O bound, scans might be faster when larger archives are split - into smaller parts. - - + **sist2** is not I/O bound, scans might be faster when larger archives are split into smaller parts. + ### OCR You can enable OCR support for pdf,xps,fb2,epub file types with the -`--ocr ` option. Download the language data files with your -package manager (`apt install tesseract-ocr-eng`) or directly [from Github](https://github.com/tesseract-ocr/tesseract/wiki/Data-Files). +`--ocr ` option. Download the language data files with your package manager (`apt install tesseract-ocr-eng`) or +directly [from Github](https://github.com/tesseract-ocr/tesseract/wiki/Data-Files). -The `simon987/sist2` image comes with common languages +The `simon987/sist2` image comes with common languages (hin, jpn, eng, fra, rus, spa) pre-installed. Examples + ```bash sist2 scan --ocr jpn ~/Books/Manga/ sist2 scan --ocr eng ~/Books/Textbooks/ ``` - ## Build from source -You can compile **sist2** by yourself if you don't want to use the pre-compiled -binaries (GCC 7+ required). +You can compile **sist2** by yourself if you don't want to use the pre-compiled binaries (GCC 7+ required). 1. Install compile-time dependencies diff --git a/ci/build.sh b/ci/build.sh index 91c3df8..0333686 100755 --- a/ci/build.sh +++ b/ci/build.sh @@ -8,7 +8,7 @@ git submodule update --init --recursive rm -rf CMakeFiles CMakeCache.txt cmake -DSIST_DEBUG=off -DCMAKE_TOOLCHAIN_FILE="${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" . -make -j 33 +make -j $(nproc) strip sist2 ./sist2 -v > VERSION cp sist2 Docker/ @@ -16,7 +16,5 @@ mv sist2 sist2-x64-linux rm -rf CMakeFiles CMakeCache.txt cmake -DSIST_DEBUG=on -DCMAKE_TOOLCHAIN_FILE="${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" . -make -j 33 -cp /usr/lib/x86_64-linux-gnu/libasan.so.2.0.0 libasan.so.2 -mv sist2_debug sist2-x64-linux-debug -tar -czf sist2-x64-linux-debug.tar.gz sist2-x64-linux-debug libasan.so.2 +make -j $(nproc) +mv sist2_debug sist2-x64-linux-debug \ No newline at end of file diff --git a/docs/USAGE.md b/docs/USAGE.md index bc7278b..a1cb6de 100644 --- a/docs/USAGE.md +++ b/docs/USAGE.md @@ -46,6 +46,7 @@ Scan options --fast Only index file names & mime type --treemap-threshold= Relative size threshold for treemap (see USAGE.md). DEFAULT: 0.0005 --mem-buffer= Maximum memory buffer size per thread in MB for files inside archives (see USAGE.md). DEFAULT: 2000 + --read-subtitles Read subtitles from media files Index options -t, --threads= Number of threads. DEFAULT=1 @@ -91,7 +92,7 @@ Made by simon987 . Released under GPL-3.0 Specify an existing index. Information about files in this index that were not modified (based on *mtime* attribute) will be copied to the new index and will not be parsed again. * `-o, --output` Output directory. -* `--rewrite-url` Set the `rewrite_url` option for the web module (See [rewrite_url](#rewrite_url)) +* `--rewrite-url` Set the `rewrite_url` option for the web module (See [rewrite_url](#rewrite_url)) * `--name` Set the `name` option for the web module * `--depth` Maximum scan dept. Set to 0 only scan files directly in the root directory, set to -1 for infinite depth * `--archive` Archive file mode. @@ -123,6 +124,7 @@ Made by simon987 . Released under GPL-3.0 larger than this number will be read sequentially and no *seek* operations will be supported. To check if a media file can be parsed without *seek*, execute `cat file.mp4 | ffprobe -` +* `--read-subtitles` When enabled, will attempt to read the subtitles stream from media files. ### Scan examples diff --git a/schema/mappings.json b/schema/mappings.json index 4925c22..f780e3d 100644 --- a/schema/mappings.json +++ b/schema/mappings.json @@ -105,10 +105,10 @@ "analyzer": "my_nGram", "type": "text" }, - "_keyword.*": { + "_keyword.*": { "type": "keyword" }, - "_text.*": { + "_text.*": { "analyzer": "content_analyzer", "type": "text", "fields": { @@ -165,6 +165,30 @@ "exif_user_comment": { "type": "text" }, + "exif_gps_longitude_ref": { + "type": "keyword", + "index": false + }, + "exif_gps_longitude_dms": { + "type": "keyword", + "index": false + }, + "exif_gps_longitude_dec": { + "type": "keyword", + "index": false + }, + "exif_gps_latitude_ref": { + "type": "keyword", + "index": false + }, + "exif_gps_latitude_dms": { + "type": "keyword", + "index": false + }, + "exif_gps_latitude_dec": { + "type": "keyword", + "index": false + }, "author": { "type": "text" }, diff --git a/scripts/reset.sh b/scripts/reset.sh new file mode 100755 index 0000000..6763b16 --- /dev/null +++ b/scripts/reset.sh @@ -0,0 +1,6 @@ +#!/usr/bin/env bash + +make clean +rm -rf CMakeFiles/ CMakeCache.txt Makefile \ + third-party/libscan/CMakeFiles third-party/libscan/CMakeCache.txt third-party/libscan/third-party/ext_ffmpeg \ + third-party/libscan/third-party/ext_libmobi third-party/libscan/Makefile \ No newline at end of file diff --git a/src/cli.c b/src/cli.c index 1cd8f2c..2aa3c54 100644 --- a/src/cli.c +++ b/src/cli.c @@ -227,6 +227,7 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) { LOG_DEBUGF("cli.c", "arg depth=%d", args->depth) LOG_DEBUGF("cli.c", "arg path=%s", args->path) LOG_DEBUGF("cli.c", "arg archive=%s", args->archive) + LOG_DEBUGF("cli.c", "arg archive_passphrase=%s", args->archive_passphrase) LOG_DEBUGF("cli.c", "arg tesseract_lang=%s", args->tesseract_lang) LOG_DEBUGF("cli.c", "arg tesseract_path=%s", args->tesseract_path) LOG_DEBUGF("cli.c", "arg exclude=%s", args->exclude_regex) diff --git a/src/cli.h b/src/cli.h index 27139ef..085807b 100644 --- a/src/cli.h +++ b/src/cli.h @@ -18,6 +18,7 @@ typedef struct scan_args { char *path; char *archive; archive_mode_t archive_mode; + char *archive_passphrase; char *tesseract_lang; const char *tesseract_path; char *exclude_regex; @@ -25,6 +26,7 @@ typedef struct scan_args { const char* treemap_threshold_str; double treemap_threshold; int max_memory_buffer; + int read_subtitles; } scan_args_t; scan_args_t *scan_args_create(); diff --git a/src/ctx.h b/src/ctx.h index b8296fe..07ca3c2 100644 --- a/src/ctx.h +++ b/src/ctx.h @@ -40,6 +40,8 @@ typedef struct { pcre_extra *exclude_extra; int fast; + GHashTable *dbg_current_files; + scan_arc_ctx_t arc_ctx; scan_comic_ctx_t comic_ctx; scan_ebook_ctx_t ebook_ctx; diff --git a/src/index/static_generated.c b/src/index/static_generated.c index 14a7539..a5df136 100644 --- a/src/index/static_generated.c +++ b/src/index/static_generated.c @@ -1,3 +1,3 @@ -char mappings_json[1996] = {123,34,112,114,111,112,101,114,116,105,101,115,34,58,123,34,95,116,105,101,34,58,123,34,116,121,112,101,34,58,34,107,101,121,119,111,114,100,34,44,34,100,111,99,95,118,97,108,117,101,115,34,58,116,114,117,101,125,44,34,95,100,101,112,116,104,34,58,123,34,116,121,112,101,34,58,34,105,110,116,101,103,101,114,34,125,44,34,112,97,116,104,34,58,123,34,116,121,112,101,34,58,34,116,101,120,116,34,44,34,97,110,97,108,121,122,101,114,34,58,34,112,97,116,104,95,97,110,97,108,121,122,101,114,34,44,34,99,111,112,121,95,116,111,34,58,34,115,117,103,103,101,115,116,45,112,97,116,104,34,44,34,102,105,101,108,100,100,97,116,97,34,58,116,114,117,101,44,34,102,105,101,108,100,115,34,58,123,34,110,71,114,97,109,34,58,123,34,116,121,112,101,34,58,34,116,101,120,116,34,44,34,97,110,97,108,121,122,101,114,34,58,34,109,121,95,110,71,114,97,109,34,125,44,34,116,101,120,116,34,58,123,34,116,121,112,101,34,58,34,116,101,120,116,34,44,34,97,110,97,108,121,122,101,114,34,58,34,99,111,110,116,101,110,116,95,97,110,97,108,121,122,101,114,34,125,125,125,44,34,115,117,103,103,101,115,116,45,112,97,116,104,34,58,123,34,116,121,112,101,34,58,34,99,111,109,112,108,101,116,105,111,110,34,44,34,97,110,97,108,121,122,101,114,34,58,34,99,97,115,101,95,105,110,115,101,110,115,105,116,105,118,101,95,107,119,95,97,110,97,108,121,122,101,114,34,125,44,34,109,105,109,101,34,58,123,34,116,121,112,101,34,58,34,107,101,121,119,111,114,100,34,125,44,34,112,97,114,101,110,116,34,58,123,34,116,121,112,101,34,58,34,107,101,121,119,111,114,100,34,44,34,105,110,100,101,120,34,58,102,97,108,115,101,125,44,34,116,104,117,109,98,110,97,105,108,34,58,123,34,116,121,112,101,34,58,34,107,101,121,119,111,114,100,34,44,34,105,110,100,101,120,34,58,102,97,108,115,101,125,44,34,118,105,100,101,111,99,34,58,123,34,116,121,112,101,34,58,34,107,101,121,119,111,114,100,34,44,34,105,110,100,101,120,34,58,102,97,108,115,101,125,44,34,97,117,100,105,111,99,34,58,123,34,116,121,112,101,34,58,34,107,101,121,119,111,114,100,34,44,34,105,110,100,101,120,34,58,102,97,108,115,101,125,44,34,100,117,114,97,116,105,111,110,34,58,123,34,116,121,112,101,34,58,34,102,108,111,97,116,34,44,34,105,110,100,101,120,34,58,102,97,108,115,101,125,44,34,119,105,100,116,104,34,58,123,34,116,121,112,101,34,58,34,105,110,116,101,103,101,114,34,44,34,105,110,100,101,120,34,58,102,97,108,115,101,125,44,34,104,101,105,103,104,116,34,58,123,34,116,121,112,101,34,58,34,105,110,116,101,103,101,114,34,44,34,105,110,100,101,120,34,58,102,97,108,115,101,125,44,34,112,97,103,101,115,34,58,123,34,116,121,112,101,34,58,34,105,110,116,101,103,101,114,34,44,34,105,110,100,101,120,34,58,102,97,108,115,101,125,44,34,109,116,105,109,101,34,58,123,34,116,121,112,101,34,58,34,105,110,116,101,103,101,114,34,125,44,34,115,105,122,101,34,58,123,34,116,121,112,101,34,58,34,108,111,110,103,34,125,44,34,105,110,100,101,120,34,58,123,34,116,121,112,101,34,58,34,107,101,121,119,111,114,100,34,125,44,34,110,97,109,101,34,58,123,34,97,110,97,108,121,122,101,114,34,58,34,99,111,110,116,101,110,116,95,97,110,97,108,121,122,101,114,34,44,34,116,121,112,101,34,58,34,116,101,120,116,34,44,34,102,105,101,108,100,115,34,58,123,34,110,71,114,97,109,34,58,123,34,116,121,112,101,34,58,34,116,101,120,116,34,44,34,97,110,97,108,121,122,101,114,34,58,34,109,121,95,110,71,114,97,109,34,125,125,125,44,34,102,111,110,116,95,110,97,109,101,34,58,123,34,97,110,97,108,121,122,101,114,34,58,34,109,121,95,110,71,114,97,109,34,44,34,116,121,112,101,34,58,34,116,101,120,116,34,125,44,34,97,108,98,117,109,34,58,123,34,97,110,97,108,121,122,101,114,34,58,34,109,121,95,110,71,114,97,109,34,44,34,116,121,112,101,34,58,34,116,101,120,116,34,125,44,34,97,114,116,105,115,116,34,58,123,34,97,110,97,108,121,122,101,114,34,58,34,109,121,95,110,71,114,97,109,34,44,34,116,121,112,101,34,58,34,116,101,120,116,34,125,44,34,116,105,116,108,101,34,58,123,34,97,110,97,108,121,122,101,114,34,58,34,109,121,95,110,71,114,97,109,34,44,34,116,121,112,101,34,58,34,116,101,120,116,34,125,44,34,103,101,110,114,101,34,58,123,34,97,110,97,108,121,122,101,114,34,58,34,109,121,95,110,71,114,97,109,34,44,34,116,121,112,101,34,58,34,116,101,120,116,34,125,44,34,97,108,98,117,109,95,97,114,116,105,115,116,34,58,123,34,97,110,97,108,121,122,101,114,34,58,34,109,121,95,110,71,114,97,109,34,44,34,116,121,112,101,34,58,34,116,101,120,116,34,125,44,34,95,107,101,121,119,111,114,100,46,42,34,58,123,34,116,121,112,101,34,58,34,107,101,121,119,111,114,100,34,125,44,34,95,116,101,120,116,46,42,34,58,123,34,97,110,97,108,121,122,101,114,34,58,34,99,111,110,116,101,110,116,95,97,110,97,108,121,122,101,114,34,44,34,116,121,112,101,34,58,34,116,101,120,116,34,44,34,102,105,101,108,100,115,34,58,123,34,110,71,114,97,109,34,58,123,34,116,121,112,101,34,58,34,116,101,120,116,34,44,34,97,110,97,108,121,122,101,114,34,58,34,109,121,95,110,71,114,97,109,34,125,125,125,44,34,95,117,114,108,34,58,123,34,116,121,112,101,34,58,34,107,101,121,119,111,114,100,34,44,34,105,110,100,101,120,34,58,102,97,108,115,101,125,44,34,99,111,110,116,101,110,116,34,58,123,34,97,110,97,108,121,122,101,114,34,58,34,99,111,110,116,101,110,116,95,97,110,97,108,121,122,101,114,34,44,34,116,121,112,101,34,58,34,116,101,120,116,34,44,34,105,110,100,101,120,95,111,112,116,105,111,110,115,34,58,34,111,102,102,115,101,116,115,34,44,34,102,105,101,108,100,115,34,58,123,34,110,71,114,97,109,34,58,123,34,116,121,112,101,34,58,34,116,101,120,116,34,44,34,97,110,97,108,121,122,101,114,34,58,34,109,121,95,110,71,114,97,109,34,125,125,125,44,34,116,97,103,34,58,123,34,116,121,112,101,34,58,34,107,101,121,119,111,114,100,34,44,34,99,111,112,121,95,116,111,34,58,34,115,117,103,103,101,115,116,45,116,97,103,34,125,44,34,115,117,103,103,101,115,116,45,116,97,103,34,58,123,34,116,121,112,101,34,58,34,99,111,109,112,108,101,116,105,111,110,34,44,34,97,110,97,108,121,122,101,114,34,58,34,99,97,115,101,95,105,110,115,101,110,115,105,116,105,118,101,95,107,119,95,97,110,97,108,121,122,101,114,34,125,44,34,101,120,105,102,95,109,97,107,101,34,58,123,34,116,121,112,101,34,58,34,116,101,120,116,34,125,44,34,101,120,105,102,95,109,111,100,101,108,34,58,123,34,116,121,112,101,34,58,34,116,101,120,116,34,125,44,34,101,120,105,102,58,115,111,102,116,119,97,114,101,34,58,123,34,116,121,112,101,34,58,34,116,101,120,116,34,125,44,34,101,120,105,102,95,101,120,112,111,115,117,114,101,95,116,105,109,101,34,58,123,34,116,121,112,101,34,58,34,107,101,121,119,111,114,100,34,125,44,34,101,120,105,102,95,102,110,117,109,98,101,114,34,58,123,34,116,121,112,101,34,58,34,107,101,121,119,111,114,100,34,125,44,34,101,120,105,102,95,105,115,111,95,115,112,101,101,100,95,114,97,116,105,110,103,115,34,58,123,34,116,121,112,101,34,58,34,107,101,121,119,111,114,100,34,125,44,34,101,120,105,102,95,102,111,99,97,108,95,108,101,110,103,116,104,34,58,123,34,116,121,112,101,34,58,34,107,101,121,119,111,114,100,34,125,44,34,101,120,105,102,95,117,115,101,114,95,99,111,109,109,101,110,116,34,58,123,34,116,121,112,101,34,58,34,116,101,120,116,34,125,44,34,97,117,116,104,111,114,34,58,123,34,116,121,112,101,34,58,34,116,101,120,116,34,125,44,34,109,111,100,105,102,105,101,100,95,98,121,34,58,123,34,116,121,112,101,34,58,34,116,101,120,116,34,125,125,125,0}; +char mappings_json[2341] = {123,34,112,114,111,112,101,114,116,105,101,115,34,58,123,34,95,116,105,101,34,58,123,34,116,121,112,101,34,58,34,107,101,121,119,111,114,100,34,44,34,100,111,99,95,118,97,108,117,101,115,34,58,116,114,117,101,125,44,34,95,100,101,112,116,104,34,58,123,34,116,121,112,101,34,58,34,105,110,116,101,103,101,114,34,125,44,34,112,97,116,104,34,58,123,34,116,121,112,101,34,58,34,116,101,120,116,34,44,34,97,110,97,108,121,122,101,114,34,58,34,112,97,116,104,95,97,110,97,108,121,122,101,114,34,44,34,99,111,112,121,95,116,111,34,58,34,115,117,103,103,101,115,116,45,112,97,116,104,34,44,34,102,105,101,108,100,100,97,116,97,34,58,116,114,117,101,44,34,102,105,101,108,100,115,34,58,123,34,110,71,114,97,109,34,58,123,34,116,121,112,101,34,58,34,116,101,120,116,34,44,34,97,110,97,108,121,122,101,114,34,58,34,109,121,95,110,71,114,97,109,34,125,44,34,116,101,120,116,34,58,123,34,116,121,112,101,34,58,34,116,101,120,116,34,44,34,97,110,97,108,121,122,101,114,34,58,34,99,111,110,116,101,110,116,95,97,110,97,108,121,122,101,114,34,125,125,125,44,34,115,117,103,103,101,115,116,45,112,97,116,104,34,58,123,34,116,121,112,101,34,58,34,99,111,109,112,108,101,116,105,111,110,34,44,34,97,110,97,108,121,122,101,114,34,58,34,99,97,115,101,95,105,110,115,101,110,115,105,116,105,118,101,95,107,119,95,97,110,97,108,121,122,101,114,34,125,44,34,109,105,109,101,34,58,123,34,116,121,112,101,34,58,34,107,101,121,119,111,114,100,34,125,44,34,112,97,114,101,110,116,34,58,123,34,116,121,112,101,34,58,34,107,101,121,119,111,114,100,34,44,34,105,110,100,101,120,34,58,102,97,108,115,101,125,44,34,116,104,117,109,98,110,97,105,108,34,58,123,34,116,121,112,101,34,58,34,107,101,121,119,111,114,100,34,44,34,105,110,100,101,120,34,58,102,97,108,115,101,125,44,34,118,105,100,101,111,99,34,58,123,34,116,121,112,101,34,58,34,107,101,121,119,111,114,100,34,44,34,105,110,100,101,120,34,58,102,97,108,115,101,125,44,34,97,117,100,105,111,99,34,58,123,34,116,121,112,101,34,58,34,107,101,121,119,111,114,100,34,44,34,105,110,100,101,120,34,58,102,97,108,115,101,125,44,34,100,117,114,97,116,105,111,110,34,58,123,34,116,121,112,101,34,58,34,102,108,111,97,116,34,44,34,105,110,100,101,120,34,58,102,97,108,115,101,125,44,34,119,105,100,116,104,34,58,123,34,116,121,112,101,34,58,34,105,110,116,101,103,101,114,34,44,34,105,110,100,101,120,34,58,102,97,108,115,101,125,44,34,104,101,105,103,104,116,34,58,123,34,116,121,112,101,34,58,34,105,110,116,101,103,101,114,34,44,34,105,110,100,101,120,34,58,102,97,108,115,101,125,44,34,112,97,103,101,115,34,58,123,34,116,121,112,101,34,58,34,105,110,116,101,103,101,114,34,44,34,105,110,100,101,120,34,58,102,97,108,115,101,125,44,34,109,116,105,109,101,34,58,123,34,116,121,112,101,34,58,34,105,110,116,101,103,101,114,34,125,44,34,115,105,122,101,34,58,123,34,116,121,112,101,34,58,34,108,111,110,103,34,125,44,34,105,110,100,101,120,34,58,123,34,116,121,112,101,34,58,34,107,101,121,119,111,114,100,34,125,44,34,110,97,109,101,34,58,123,34,97,110,97,108,121,122,101,114,34,58,34,99,111,110,116,101,110,116,95,97,110,97,108,121,122,101,114,34,44,34,116,121,112,101,34,58,34,116,101,120,116,34,44,34,102,105,101,108,100,115,34,58,123,34,110,71,114,97,109,34,58,123,34,116,121,112,101,34,58,34,116,101,120,116,34,44,34,97,110,97,108,121,122,101,114,34,58,34,109,121,95,110,71,114,97,109,34,125,125,125,44,34,102,111,110,116,95,110,97,109,101,34,58,123,34,97,110,97,108,121,122,101,114,34,58,34,109,121,95,110,71,114,97,109,34,44,34,116,121,112,101,34,58,34,116,101,120,116,34,125,44,34,97,108,98,117,109,34,58,123,34,97,110,97,108,121,122,101,114,34,58,34,109,121,95,110,71,114,97,109,34,44,34,116,121,112,101,34,58,34,116,101,120,116,34,125,44,34,97,114,116,105,115,116,34,58,123,34,97,110,97,108,121,122,101,114,34,58,34,109,121,95,110,71,114,97,109,34,44,34,116,121,112,101,34,58,34,116,101,120,116,34,125,44,34,116,105,116,108,101,34,58,123,34,97,110,97,108,121,122,101,114,34,58,34,109,121,95,110,71,114,97,109,34,44,34,116,121,112,101,34,58,34,116,101,120,116,34,125,44,34,103,101,110,114,101,34,58,123,34,97,110,97,108,121,122,101,114,34,58,34,109,121,95,110,71,114,97,109,34,44,34,116,121,112,101,34,58,34,116,101,120,116,34,125,44,34,97,108,98,117,109,95,97,114,116,105,115,116,34,58,123,34,97,110,97,108,121,122,101,114,34,58,34,109,121,95,110,71,114,97,109,34,44,34,116,121,112,101,34,58,34,116,101,120,116,34,125,44,34,95,107,101,121,119,111,114,100,46,42,34,58,123,34,116,121,112,101,34,58,34,107,101,121,119,111,114,100,34,125,44,34,95,116,101,120,116,46,42,34,58,123,34,97,110,97,108,121,122,101,114,34,58,34,99,111,110,116,101,110,116,95,97,110,97,108,121,122,101,114,34,44,34,116,121,112,101,34,58,34,116,101,120,116,34,44,34,102,105,101,108,100,115,34,58,123,34,110,71,114,97,109,34,58,123,34,116,121,112,101,34,58,34,116,101,120,116,34,44,34,97,110,97,108,121,122,101,114,34,58,34,109,121,95,110,71,114,97,109,34,125,125,125,44,34,95,117,114,108,34,58,123,34,116,121,112,101,34,58,34,107,101,121,119,111,114,100,34,44,34,105,110,100,101,120,34,58,102,97,108,115,101,125,44,34,99,111,110,116,101,110,116,34,58,123,34,97,110,97,108,121,122,101,114,34,58,34,99,111,110,116,101,110,116,95,97,110,97,108,121,122,101,114,34,44,34,116,121,112,101,34,58,34,116,101,120,116,34,44,34,105,110,100,101,120,95,111,112,116,105,111,110,115,34,58,34,111,102,102,115,101,116,115,34,44,34,102,105,101,108,100,115,34,58,123,34,110,71,114,97,109,34,58,123,34,116,121,112,101,34,58,34,116,101,120,116,34,44,34,97,110,97,108,121,122,101,114,34,58,34,109,121,95,110,71,114,97,109,34,125,125,125,44,34,116,97,103,34,58,123,34,116,121,112,101,34,58,34,107,101,121,119,111,114,100,34,44,34,99,111,112,121,95,116,111,34,58,34,115,117,103,103,101,115,116,45,116,97,103,34,125,44,34,115,117,103,103,101,115,116,45,116,97,103,34,58,123,34,116,121,112,101,34,58,34,99,111,109,112,108,101,116,105,111,110,34,44,34,97,110,97,108,121,122,101,114,34,58,34,99,97,115,101,95,105,110,115,101,110,115,105,116,105,118,101,95,107,119,95,97,110,97,108,121,122,101,114,34,125,44,34,101,120,105,102,95,109,97,107,101,34,58,123,34,116,121,112,101,34,58,34,116,101,120,116,34,125,44,34,101,120,105,102,95,109,111,100,101,108,34,58,123,34,116,121,112,101,34,58,34,116,101,120,116,34,125,44,34,101,120,105,102,58,115,111,102,116,119,97,114,101,34,58,123,34,116,121,112,101,34,58,34,116,101,120,116,34,125,44,34,101,120,105,102,95,101,120,112,111,115,117,114,101,95,116,105,109,101,34,58,123,34,116,121,112,101,34,58,34,107,101,121,119,111,114,100,34,125,44,34,101,120,105,102,95,102,110,117,109,98,101,114,34,58,123,34,116,121,112,101,34,58,34,107,101,121,119,111,114,100,34,125,44,34,101,120,105,102,95,105,115,111,95,115,112,101,101,100,95,114,97,116,105,110,103,115,34,58,123,34,116,121,112,101,34,58,34,107,101,121,119,111,114,100,34,125,44,34,101,120,105,102,95,102,111,99,97,108,95,108,101,110,103,116,104,34,58,123,34,116,121,112,101,34,58,34,107,101,121,119,111,114,100,34,125,44,34,101,120,105,102,95,117,115,101,114,95,99,111,109,109,101,110,116,34,58,123,34,116,121,112,101,34,58,34,116,101,120,116,34,125,44,34,101,120,105,102,95,103,112,115,95,108,111,110,103,105,116,117,100,101,95,114,101,102,34,58,123,34,116,121,112,101,34,58,34,107,101,121,119,111,114,100,34,44,34,105,110,100,101,120,34,58,102,97,108,115,101,125,44,34,101,120,105,102,95,103,112,115,95,108,111,110,103,105,116,117,100,101,95,100,109,115,34,58,123,34,116,121,112,101,34,58,34,107,101,121,119,111,114,100,34,44,34,105,110,100,101,120,34,58,102,97,108,115,101,125,44,34,101,120,105,102,95,103,112,115,95,108,111,110,103,105,116,117,100,101,95,100,101,99,34,58,123,34,116,121,112,101,34,58,34,107,101,121,119,111,114,100,34,44,34,105,110,100,101,120,34,58,102,97,108,115,101,125,44,34,101,120,105,102,95,103,112,115,95,108,97,116,105,116,117,100,101,95,114,101,102,34,58,123,34,116,121,112,101,34,58,34,107,101,121,119,111,114,100,34,44,34,105,110,100,101,120,34,58,102,97,108,115,101,125,44,34,101,120,105,102,95,103,112,115,95,108,97,116,105,116,117,100,101,95,100,109,115,34,58,123,34,116,121,112,101,34,58,34,107,101,121,119,111,114,100,34,44,34,105,110,100,101,120,34,58,102,97,108,115,101,125,44,34,101,120,105,102,95,103,112,115,95,108,97,116,105,116,117,100,101,95,100,101,99,34,58,123,34,116,121,112,101,34,58,34,107,101,121,119,111,114,100,34,44,34,105,110,100,101,120,34,58,102,97,108,115,101,125,44,34,97,117,116,104,111,114,34,58,123,34,116,121,112,101,34,58,34,116,101,120,116,34,125,44,34,109,111,100,105,102,105,101,100,95,98,121,34,58,123,34,116,121,112,101,34,58,34,116,101,120,116,34,125,125,125,0}; char settings_json[548] = {123,34,105,110,100,101,120,34,58,123,34,114,101,102,114,101,115,104,95,105,110,116,101,114,118,97,108,34,58,34,51,48,115,34,44,34,99,111,100,101,99,34,58,34,98,101,115,116,95,99,111,109,112,114,101,115,115,105,111,110,34,44,34,110,117,109,98,101,114,95,111,102,95,114,101,112,108,105,99,97,115,34,58,48,125,44,34,97,110,97,108,121,115,105,115,34,58,123,34,116,111,107,101,110,105,122,101,114,34,58,123,34,112,97,116,104,95,116,111,107,101,110,105,122,101,114,34,58,123,34,116,121,112,101,34,58,34,112,97,116,104,95,104,105,101,114,97,114,99,104,121,34,125,44,34,109,121,95,110,71,114,97,109,95,116,111,107,101,110,105,122,101,114,34,58,123,34,116,121,112,101,34,58,34,110,71,114,97,109,34,44,34,109,105,110,95,103,114,97,109,34,58,51,44,34,109,97,120,95,103,114,97,109,34,58,51,125,125,44,34,97,110,97,108,121,122,101,114,34,58,123,34,112,97,116,104,95,97,110,97,108,121,122,101,114,34,58,123,34,116,111,107,101,110,105,122,101,114,34,58,34,112,97,116,104,95,116,111,107,101,110,105,122,101,114,34,44,34,102,105,108,116,101,114,34,58,91,34,108,111,119,101,114,99,97,115,101,34,93,125,44,34,99,97,115,101,95,105,110,115,101,110,115,105,116,105,118,101,95,107,119,95,97,110,97,108,121,122,101,114,34,58,123,34,116,111,107,101,110,105,122,101,114,34,58,34,107,101,121,119,111,114,100,34,44,34,102,105,108,116,101,114,34,58,91,34,108,111,119,101,114,99,97,115,101,34,93,125,44,34,109,121,95,110,71,114,97,109,34,58,123,34,116,111,107,101,110,105,122,101,114,34,58,34,109,121,95,110,71,114,97,109,95,116,111,107,101,110,105,122,101,114,34,44,34,102,105,108,116,101,114,34,58,91,34,108,111,119,101,114,99,97,115,101,34,44,34,97,115,99,105,105,102,111,108,100,105,110,103,34,93,125,44,34,99,111,110,116,101,110,116,95,97,110,97,108,121,122,101,114,34,58,123,34,116,111,107,101,110,105,122,101,114,34,58,34,115,116,97,110,100,97,114,100,34,44,34,102,105,108,116,101,114,34,58,91,34,108,111,119,101,114,99,97,115,101,34,44,34,97,115,99,105,105,102,111,108,100,105,110,103,34,93,125,125,125,125,0}; char pipeline_json[217] = {123,34,100,101,115,99,114,105,112,116,105,111,110,34,58,34,67,111,112,121,32,95,105,100,32,116,111,32,95,116,105,101,44,32,115,97,118,101,32,112,97,116,104,32,100,101,112,116,104,34,44,34,112,114,111,99,101,115,115,111,114,115,34,58,91,123,34,115,99,114,105,112,116,34,58,123,34,115,111,117,114,99,101,34,58,34,99,116,120,46,95,116,105,101,32,61,32,99,116,120,46,95,105,100,59,32,99,116,120,46,95,100,101,112,116,104,32,61,32,99,116,120,46,112,97,116,104,46,108,101,110,103,116,104,40,41,32,61,61,32,48,32,63,32,48,32,58,32,49,32,43,32,99,116,120,46,112,97,116,104,46,108,101,110,103,116,104,40,41,32,45,32,99,116,120,46,112,97,116,104,46,114,101,112,108,97,99,101,40,92,34,47,92,34,44,32,92,34,92,34,41,46,108,101,110,103,116,104,40,41,59,34,125,125,93,125,0}; diff --git a/src/io/serialize.c b/src/io/serialize.c index 424b132..a6c8413 100644 --- a/src/io/serialize.c +++ b/src/io/serialize.c @@ -15,9 +15,13 @@ typedef struct { char has_parent; } line_t; +#define META_NEXT 0xFFFF + void skip_meta(FILE *file) { - enum metakey key = getc(file); - while (key != '\n') { + enum metakey key = 0; + fread(&key, sizeof(uint16_t), 1, file); + + while (key != META_NEXT) { if (IS_META_INT(key)) { fseek(file, sizeof(int), SEEK_CUR); } else if (IS_META_LONG(key)) { @@ -26,7 +30,7 @@ void skip_meta(FILE *file) { while ((getc(file))) {} } - key = getc(file); + fread(&key, sizeof(uint16_t), 1, file); } } @@ -66,7 +70,7 @@ index_descriptor_t read_index_descriptor(char *path) { } char *buf = malloc(info.st_size + 1); - int ret = read(fd, buf, info.st_size); + size_t ret = read(fd, buf, info.st_size); if (ret == -1) { LOG_FATALF("serialize.c", "Could not read index descriptor: %s", strerror(errno)); } @@ -152,8 +156,20 @@ char *get_meta_key_text(enum metakey meta_key) { return "thumbnail"; case MetaPages: return "pages"; + case MetaExifGpsLongitudeRef: + return "exif_gps_longitude_ref"; + case MetaExifGpsLongitudeDMS: + return "exif_gps_longitude_dms"; + case MetaExifGpsLongitudeDec: + return "exif_gps_longitude_dec"; + case MetaExifGpsLatitudeRef: + return "exif_gps_latitude_ref"; + case MetaExifGpsLatitudeDMS: + return "exif_gps_latitude_dms"; + case MetaExifGpsLatitudeDec: + return "exif_gps_latitude_dec"; default: - return NULL; + LOG_FATALF("serialize.c", "FIXME: Unknown meta key: %d", meta_key) } } @@ -183,7 +199,7 @@ void write_document(document_t *doc) { meta_line_t *meta = doc->meta_head; while (meta != NULL) { - dyn_buffer_write_char(&buf, meta->key); + dyn_buffer_write_short(&buf, (uint16_t) meta->key); if (IS_META_INT(meta->key)) { dyn_buffer_write_int(&buf, meta->int_val); @@ -197,7 +213,7 @@ void write_document(document_t *doc) { meta = meta->next; free(tmp); } - dyn_buffer_write_char(&buf, '\n'); + dyn_buffer_write_short(&buf, META_NEXT); int res = write(index_fd, buf.buf, buf.cur); if (res == -1) { @@ -221,7 +237,7 @@ void read_index_bin(const char *path, const char *index_id, index_func func) { FILE *file = fopen(path, "rb"); while (TRUE) { buf.cur = 0; - size_t _ = fread((void *) &line, 1, sizeof(line_t), file); + size_t _ = fread((void *) &line, sizeof(line_t), 1, file); if (feof(file)) { break; } @@ -268,9 +284,10 @@ void read_index_bin(const char *path, const char *index_id, index_func func) { cJSON_AddStringToObject(document, "path", ""); } - enum metakey key = getc(file); - size_t ret = 0; - while (key != '\n') { + enum metakey key = 0; + fread(&key, sizeof(uint16_t), 1, file); + size_t ret; + while (key != META_NEXT) { switch (key) { case MetaPages: case MetaWidth: @@ -308,6 +325,12 @@ void read_index_bin(const char *path, const char *index_id, index_func func) { case MetaAuthor: case MetaModifiedBy: case MetaThumbnail: + case MetaExifGpsLongitudeDMS: + case MetaExifGpsLongitudeDec: + case MetaExifGpsLongitudeRef: + case MetaExifGpsLatitudeDMS: + case MetaExifGpsLatitudeDec: + case MetaExifGpsLatitudeRef: case MetaTitle: { buf.cur = 0; while ((c = getc(file)) != 0) { @@ -323,7 +346,7 @@ void read_index_bin(const char *path, const char *index_id, index_func func) { LOG_FATALF("serialize.c", "Invalid meta key (corrupt index): %x", key) } - key = getc(file); + fread(&key, sizeof(uint16_t), 1, file); } cJSON *meta_obj = NULL; @@ -458,7 +481,7 @@ void incremental_read(GHashTable *table, const char *filepath) { incremental_put(table, line.path_md5, line.mtime); - while ((getc(file))) {} + while ((getc(file)) != 0) {} skip_meta(file); } fclose(file); @@ -508,11 +531,11 @@ void incremental_copy(store_t *store, store_t *dst_store, const char *filepath, free(buf); } - enum metakey key; + enum metakey key = 0; while (1) { - key = getc(file); - fwrite(&key, sizeof(char), 1, dst_file); - if (key == '\n') { + fread(&key, sizeof(uint16_t), 1, file); + fwrite(&key, sizeof(uint16_t), 1, dst_file); + if (key == META_NEXT) { break; } diff --git a/src/io/store.c b/src/io/store.c index a7871cc..4cafb0f 100644 --- a/src/io/store.c +++ b/src/io/store.c @@ -4,6 +4,7 @@ store_t *store_create(char *path, size_t chunk_size) { store_t *store = malloc(sizeof(struct store_t)); +#if (SIST_FAKE_STORE != 1) store->chunk_size = chunk_size; pthread_rwlock_init(&store->lock, NULL); @@ -28,15 +29,18 @@ store_t *store_create(char *path, size_t chunk_size) { mdb_txn_begin(store->env, NULL, 0, &txn); mdb_dbi_open(txn, NULL, 0, &store->dbi); mdb_txn_commit(txn); +#endif return store; } void store_destroy(store_t *store) { +#if (SIST_FAKE_STORE != 1) pthread_rwlock_destroy(&store->lock); mdb_close(store->env, store->dbi); mdb_env_close(store->env); +#endif free(store); } @@ -56,6 +60,8 @@ void store_write(store_t *store, char *key, size_t key_len, char *buf, size_t bu } } +#if (SIST_FAKE_STORE != 1) + MDB_val mdb_key; mdb_key.mv_data = key; mdb_key.mv_size = key_len; @@ -92,10 +98,13 @@ void store_write(store_t *store, char *key, size_t key_len, char *buf, size_t bu if (put_ret != 0) { LOG_ERROR("store.c", mdb_strerror(put_ret)) } +#endif } char *store_read(store_t *store, char *key, size_t key_len, size_t *ret_vallen) { char *buf = NULL; + +#if (SIST_FAKE_STORE != 1) MDB_val mdb_key; mdb_key.mv_data = key; mdb_key.mv_size = key_len; @@ -116,6 +125,7 @@ char *store_read(store_t *store, char *key, size_t key_len, size_t *ret_vallen) } mdb_txn_abort(txn); +#endif return buf; } diff --git a/src/main.c b/src/main.c index d5b5593..e1fd9ca 100644 --- a/src/main.c +++ b/src/main.c @@ -21,7 +21,7 @@ #define EPILOG "Made by simon987 . Released under GPL-3.0" -static const char *const Version = "2.9.0"; +static const char *const Version = "2.10.1"; static const char *const usage[] = { "sist2 scan [OPTION]... PATH", "sist2 index [OPTION]... INDEX", @@ -30,6 +30,69 @@ static const char *const usage[] = { NULL, }; +#include +#include + +static __sighandler_t sigsegv_handler = NULL; +static __sighandler_t sigabrt_handler = NULL; + +void sig_handler(int signum) { + + LogCtx.verbose = 1; + LogCtx.very_verbose = 1; + + LOG_ERROR("*SIGNAL HANDLER*", "=============================================\n\n"); + LOG_ERRORF("*SIGNAL HANDLER*", "Uh oh! Caught fatal signal: %s", strsignal(signum)); + + GHashTableIter iter; + g_hash_table_iter_init(&iter, ScanCtx.dbg_current_files); + + void *key; + void *value; + while (g_hash_table_iter_next(&iter, &key, &value)) { + parse_job_t *job = value; + + if (isatty(STDERR_FILENO)) { + LOG_DEBUGF( + "*SIGNAL HANDLER*", + "Thread \033[%dm[%04llX]\033[0m was working on job '%s'", + 31 + ((unsigned int) key) % 7, key, job->filepath + ); + } else { + LOG_DEBUGF( + "*SIGNAL HANDLER*", + "THREAD [%04llX] was working on job %s", + key, job->filepath + ); + } + } + + tpool_dump_debug_info(ScanCtx.pool); + + LOG_INFO( + "*SIGNAL HANDLER*", + "Please consider creating a bug report at https://github.com/simon987/sist2/issues !" + ) + LOG_INFO( + "*SIGNAL HANDLER*", + "sist2 is an open source project and relies on the collaboration of its users to diagnose and fix bugs" + ) + +#ifndef SIST_DEBUG + LOG_WARNING( + "*SIGNAL HANDLER*", + "You are running sist2 in release mode! Please consider downloading the debug binary from the Github " + "releases page to provide additionnal information when submitting a bug report." + ) +#endif + + if (signum == SIGSEGV && sigsegv_handler != NULL) { + sigsegv_handler(signum); + } else if (signum == SIGABRT && sigabrt_handler != NULL) { + sigabrt_handler(signum); + } +} + void init_dir(const char *dirpath) { char path[PATH_MAX]; snprintf(path, PATH_MAX, "%sdescriptor.json", dirpath); @@ -99,6 +162,13 @@ void initialize_scan_context(scan_args_t *args) { ScanCtx.arc_ctx.log = _log; ScanCtx.arc_ctx.logf = _logf; ScanCtx.arc_ctx.parse = (parse_callback_t) parse; + if (args->archive_passphrase != NULL) { + strcpy(ScanCtx.arc_ctx.passphrase, args->archive_passphrase); + } else { + ScanCtx.arc_ctx.passphrase[0] = 0; + } + + ScanCtx.dbg_current_files = g_hash_table_new(g_int64_hash, g_int64_equal); // Comic ScanCtx.comic_ctx.log = _log; @@ -132,6 +202,7 @@ void initialize_scan_context(scan_args_t *args) { ScanCtx.media_ctx.logf = _logf; ScanCtx.media_ctx.store = _store; ScanCtx.media_ctx.max_media_buffer = (long) args->max_memory_buffer * 1024 * 1024; + ScanCtx.media_ctx.read_subtitles = args->read_subtitles; init_media(); // OOXML @@ -399,6 +470,9 @@ void sist2_web(web_args_t *args) { int main(int argc, const char *argv[]) { + sigsegv_handler = signal(SIGSEGV, sig_handler); + sigabrt_handler = signal(SIGABRT, sig_handler); + setlocale(LC_ALL, ""); scan_args_t *scan_args = scan_args_create(); @@ -439,6 +513,9 @@ int main(int argc, const char *argv[]) { OPT_STRING(0, "archive", &scan_args->archive, "Archive file mode (skip|list|shallow|recurse). " "skip: Don't parse, list: only get file names as text, " "shallow: Don't parse archives inside archives. DEFAULT: recurse"), + OPT_STRING(0, "archive-passphrase", &scan_args->archive_passphrase, + "Passphrase for encrypted archive files"), + OPT_STRING(0, "ocr", &scan_args->tesseract_lang, "Tesseract language (use tesseract --list-langs to see " "which are installed on your machine)"), OPT_STRING('e', "exclude", &scan_args->exclude_regex, "Files that match this regex will not be scanned"), @@ -448,6 +525,7 @@ int main(int argc, const char *argv[]) { OPT_INTEGER(0, "mem-buffer", &scan_args->max_memory_buffer, "Maximum memory buffer size per thread in MB for files inside archives " "(see USAGE.md). DEFAULT: 2000"), + OPT_BOOLEAN(0, "read-subtitles", &scan_args->read_subtitles, "Read subtitles from media files."), OPT_GROUP("Index options"), OPT_INTEGER('t', "threads", &common_threads, "Number of threads. DEFAULT=1"), diff --git a/src/parsing/parse.c b/src/parsing/parse.c index 91951dd..46bac17 100644 --- a/src/parsing/parse.c +++ b/src/parsing/parse.c @@ -41,11 +41,18 @@ void fs_reset(struct vfile *f) { #define IS_GIT_OBJ (strlen(doc.filepath + doc.base) == 38 && (strstr(doc.filepath, "objects") != NULL)) +void set_dbg_current_file(parse_job_t *job) { + unsigned long long pid = (unsigned long long) pthread_self(); + g_hash_table_replace(ScanCtx.dbg_current_files, GINT_TO_POINTER(pid), job); +} + void parse(void *arg) { parse_job_t *job = arg; document_t doc; + set_dbg_current_file(job); + doc.filepath = job->filepath; doc.ext = (short) job->ext; doc.base = (short) job->base; diff --git a/src/static/js/dom.js b/src/static/js/dom.js index 6ccbf45..737ce43 100644 --- a/src/static/js/dom.js +++ b/src/static/js/dom.js @@ -192,6 +192,19 @@ function makeUserTag(tag, hit) { return userTag; } +function makeGpsMetaRow(tbody, latitude, longitude) { + tbody.append($("") + .append($("").text("Exif GPS")) + .append($("") + .append($("") + .text(`${latitude}, ${longitude}`) + .attr("href", `https://maps.google.com/?q=${latitude},${longitude}&ll=${latitude},${longitude}&t=k&z=17`) + .attr("target", "_blank") + ) + ) + ); +} + function infoButtonCb(hit) { return () => { getDocumentInfo(hit["_id"]).then(doc => { @@ -229,13 +242,25 @@ function infoButtonCb(hit) { .text(new Date(doc["mtime"] * 1000).toISOString().split(".")[0].replace("T", " ")) .attr("title", doc["mtime"])) ); + + // Exif GPS + if ("exif_gps_longitude_dec" in doc) { + makeGpsMetaRow(tbody, doc["exif_gps_latitude_dec"], doc["exif_gps_longitude_dec"]) + } else if ("exif_gps_longitude_dms" in doc) { + makeGpsMetaRow( + tbody, + dmsToDecimal(doc["exif_gps_latitude_dms"], doc["exif_gps_latitude_ref"]), + dmsToDecimal(doc["exif_gps_longitude_dms"], doc["exif_gps_longitude_ref"]), + ) + } + const displayFields = new Set([ "mime", "size", "path", "title", "width", "height", "duration", "audioc", "videoc", "bitrate", "artist", "album", "album_artist", "genre", "title", "font_name", "tag", "author", "modified_by", "pages" ]); Object.keys(doc) - .filter(key => key.startsWith("_keyword.") || key.startsWith("_text.") || displayFields.has(key) || key.startsWith("exif_")) + .filter(key => key.startsWith("_keyword.") || key.startsWith("_text.") || displayFields.has(key) || (key.startsWith("exif_") && !key.includes("gps"))) .forEach(key => { tbody.append($("") .append($("").text(key)) @@ -350,6 +375,14 @@ function createDocCard(hit) { audio.setAttribute("controls", ""); audio.setAttribute("type", hit["_source"]["mime"]); audio.setAttribute("src", "f/" + hit["_id"]); + audio.addEventListener("play", () => { + // Pause all currently playing audio tags + $("audio").each(function () { + if (this !== audio) { + this.pause(); + } + }); + }); docCard.appendChild(audio) } diff --git a/src/static/js/search.js b/src/static/js/search.js index 34416f8..fb753b7 100644 --- a/src/static/js/search.js +++ b/src/static/js/search.js @@ -511,8 +511,8 @@ function search(after = null) { searchResults.appendChild(preload); } - let query = searchBar.value; - let empty = query === ""; + let searchBarValue = searchBar.value; + let empty = searchBarValue === ""; let condition = empty ? "should" : "must"; let filters = [ {range: {size: {gte: size_min, lte: size_max}}}, @@ -561,19 +561,32 @@ function search(after = null) { filters.push({range: {mtime: {lte: date_max}}}) } + let query; + if (CONF.options.queryMode === "simple") { + query = { + simple_query_string: { + query: searchBarValue, + fields: fields, + default_operator: "and" + } + } + } else { + query = { + query_string: { + query: searchBarValue, + default_field: "name", + default_operator: "and" + } + } + } + let q = { "_source": { excludes: ["content", "_tie"] }, query: { bool: { - [condition]: { - simple_query_string: { - query: query, - fields: fields, - default_operator: "and" - } - }, + [condition]: query, filter: filters } }, @@ -611,7 +624,9 @@ function search(after = null) { } } - $.jsonPost("es", q).then(searchResult => { + const showError = CONF.options.queryMode === "advanced"; + + $.jsonPost("es", q, showError).then(searchResult => { let hits = searchResult["hits"]["hits"]; if (hits) { lastDoc = hits[hits.length - 1]; @@ -645,7 +660,25 @@ function search(after = null) { reachedEnd = hits.length !== SIZE; insertHits(resultContainer, hits); searchBusy = false; - }); + }).fail(() => { + searchBusy = false; + if (!after) { + preload.remove(); + } + + console.log("QUERY:") + console.log(q) + $.toast({ + heading: "Query error", + text: "Could not parse or execute query, please check the Advanced search documentation. " + + "See server logs for more information.", + stack: false, + bgColor: "#FF8F00", + textColor: "#FFF3E0", + position: 'bottom-right', + hideAfter: false + }); + }) } diff --git a/src/static/js/util.js b/src/static/js/util.js index 8d3b30f..06398d4 100644 --- a/src/static/js/util.js +++ b/src/static/js/util.js @@ -70,7 +70,7 @@ function strUnescape(str) { for (let i = 0; i < str.length; i++) { const c = str[i]; - const next = str[i+1]; + const next = str[i + 1]; if (c === ']') { if (next === ']') { @@ -102,7 +102,8 @@ const _defaults = { treemapSize: "large", suggestPath: true, fragmentSize: 100, - columns: 5 + columns: 5, + queryMode: "simple" }; function loadSettings() { @@ -120,6 +121,7 @@ function loadSettings() { $("#settingSuggestPath").prop("checked", CONF.options.suggestPath); $("#settingFragmentSize").val(CONF.options.fragmentSize); $("#settingColumns").val(CONF.options.columns); + $("#settingQueryMode").val(CONF.options.queryMode); } function Settings() { @@ -127,6 +129,7 @@ function Settings() { this._onUpdate = function () { $("#fuzzyToggle").prop("checked", this.options.fuzzy); + $("#searchBar").attr("placeholder", this.options.queryMode === "simple" ? "Search" : "Advanced search"); updateColumnStyle(); }; @@ -165,6 +168,7 @@ function updateSettings() { CONF.options.suggestPath = $("#settingSuggestPath").prop("checked"); CONF.options.fragmentSize = $("#settingFragmentSize").val(); CONF.options.columns = $("#settingColumns").val(); + CONF.options.queryMode = $("#settingQueryMode").val(); CONF.save(); if (typeof searchDebounced !== "undefined") { @@ -187,14 +191,16 @@ function updateSettings() { }); } -jQuery["jsonPost"] = function (url, data) { +jQuery["jsonPost"] = function (url, data, showError = true) { return jQuery.ajax({ url: url, type: "post", data: JSON.stringify(data), contentType: "application/json" }).fail(err => { - showEsError(); + if (showError) { + showEsError(); + } console.log(err); }); }; @@ -212,7 +218,7 @@ function updateColumnStyle() { const style = document.getElementById("style"); if (style) { style.innerHTML = - ` + ` @media screen and (min-width: 1500px) { .container { max-width: 1440px; @@ -229,4 +235,14 @@ function updateColumnStyle() { } ` } +} + +function dmsToDecimal(dms, ref) { + const tokens = dms.split(",") + + const d = Number(tokens[0].trim().split(":")[0]) / Number(tokens[0].trim().split(":")[1]) + const m = Number(tokens[1].trim().split(":")[0]) / Number(tokens[1].trim().split(":")[1]) + const s = Number(tokens[2].trim().split(":")[0]) / Number(tokens[2].trim().split(":")[1]) + + return (d + (m / 60) + (s / 3600)) * (ref === "S" || ref === "W" ? -1 : 1) } \ No newline at end of file diff --git a/src/static/search.html b/src/static/search.html index d55a765..f2ef165 100644 --- a/src/static/search.html +++ b/src/static/search.html @@ -12,7 +12,7 @@