mirror of
https://github.com/simon987/sist2.git
synced 2025-12-12 15:08:53 +00:00
Compare commits
285 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 3da2c8cae3 | |||
| 2f0e999b06 | |||
| bf28dc8993 | |||
| c6fee7f6e2 | |||
| 201c2a1a47 | |||
| 7c46ad632a | |||
| 5b8c13fd13 | |||
| efa4a06e56 | |||
| 81670ee107 | |||
| f9dac80905 | |||
| f8d9b718c0 | |||
| 6f5fdc2935 | |||
| a01f6dff1f | |||
| 22dd58e140 | |||
| f3e07fb7f7 | |||
| 7990e5cd2e | |||
| e3ca660983 | |||
| b87fb25458 | |||
| c7a77869ad | |||
| 523c123e2e | |||
| fc7f30d670 | |||
| 152fe11669 | |||
| 33f97f6bfb | |||
| 71f9dfcfe0 | |||
| 5f657d61b3 | |||
| 908def1016 | |||
| db3d312835 | |||
| 32c9cb28a3 | |||
| f839127129 | |||
| 8111a6c143 | |||
| 707a570828 | |||
|
|
5073b00225 | ||
|
|
4923d1b51f | ||
|
|
097e332015 | ||
|
|
d4babe216b | ||
|
|
44511a2202 | ||
| 50771bd1dc | |||
| bc884e137c | |||
| ce1e241dea | |||
| 5fe9c9efa3 | |||
| 75e4e93ddd | |||
| 013c54daa0 | |||
| 54308ef5e2 | |||
| 638c2a5c1a | |||
| 9587caddd9 | |||
| f5bbe0dc97 | |||
| f87eac1f90 | |||
| ddafbab6a6 | |||
| b91d574756 | |||
| 576140e542 | |||
| 050c1283a3 | |||
| c6e1ba03bc | |||
| 10e32f707f | |||
| 86e83bafaf | |||
| 51a40c8819 | |||
|
|
36281a5108 | ||
|
|
76a0bda48b | ||
| 0cf29a660c | |||
| 6cd0741848 | |||
| bc120f349d | |||
| 8cac8c98d7 | |||
| 30921ac52e | |||
| 95bbe39afc | |||
| 72ce217f9c | |||
| 641a8ec90c | |||
| 7a505c2287 | |||
| 12f162d760 | |||
| 4b4ab12fac | |||
| ae283f77ad | |||
| d3bd53a5ea | |||
| f7887f24d1 | |||
| 5c8de19188 | |||
| d861d278a4 | |||
| b6ddeee0e0 | |||
| 0cd2523b05 | |||
| 5e798f9367 | |||
| 5da6c1488b | |||
| 9568e25f84 | |||
| 6a8027789a | |||
| b1d16d8abf | |||
| b2a157e24d | |||
| 9aead9389a | |||
| a32c68cba8 | |||
| d116cf9d91 | |||
|
|
a020a8b32c | ||
| 5d5d9c3092 | |||
| 3379d5ce71 | |||
| a0ff4a1f01 | |||
| 4589f3bde7 | |||
| 1c898640cf | |||
| a0739d5177 | |||
| 8f9d29dbc6 | |||
| 3ff4b70223 | |||
| 02ad035b09 | |||
| c11feb213d | |||
| 72902947cd | |||
| a18bb81222 | |||
| 1520288f19 | |||
| e507de194b | |||
| 0e517d5e2b | |||
| 8223ef3860 | |||
| 995a196690 | |||
| 465d017e18 | |||
| ca994d3914 | |||
| db2285973f | |||
| 61de9e9f14 | |||
| 3015ef0ff4 | |||
| b55d432841 | |||
| ed90a140ce | |||
| 052df82373 | |||
| 5676136777 | |||
| c061613302 | |||
| d0325fd9b9 | |||
| e05a6f3863 | |||
| f1690a9cca | |||
| 100a264413 | |||
| 29390bb454 | |||
| 4d43036ded | |||
| 0b5cdbd130 | |||
| 53d7695f66 | |||
| 8d53456404 | |||
| cbc08a7cc9 | |||
| e629b4d7d3 | |||
| 22f7073b39 | |||
| 1781a74960 | |||
| db96c95ac7 | |||
| 7b9fa4cc0a | |||
| 5cc1fa86a9 | |||
| 649689ce30 | |||
| c8536f65a8 | |||
| 75b5e249c1 | |||
|
|
f49e03ac79 | ||
| a6d2afc8dc | |||
| 8f8f66ba05 | |||
| 1d9fcf7105 | |||
| 8127745f2b | |||
| 230988d6d1 | |||
| 13f4dbed2d | |||
| ed15e89f45 | |||
| c636d3d921 | |||
| 7e92d4b7d1 | |||
| 8ffe780ab2 | |||
| d3c8928fe8 | |||
| d9f628fca4 | |||
| 68289268c1 | |||
| 649c50c465 | |||
| 7b49a0dc49 | |||
| eb559b53aa | |||
| 6d01f9c0df | |||
| e724fec668 | |||
| fe5e93b300 | |||
| ecad85fd7d | |||
| 74cc898259 | |||
| dc2e4443c4 | |||
| 1a64431b52 | |||
|
|
9bad515e06 | ||
| 648559cedb | |||
| 3e6cd9cd5c | |||
| f249992798 | |||
|
|
e9645ecdaa | ||
| 046edea0e2 | |||
| a011b7e97b | |||
| 8c1c1697e0 | |||
| 018b49fa4c | |||
| 27b4e6403e | |||
| 13fdbd9e69 | |||
| 5e7fdaf8dd | |||
| 19d5c8ac9f | |||
| 99497049a8 | |||
|
|
1a3181d78b | ||
| 449aa77c8f | |||
| 3058c55510 | |||
| dedf9287b2 | |||
| ab199b0c0c | |||
| c4fbae123e | |||
| dd2397ef5c | |||
| ee0f71f4d3 | |||
| 0bbb96b149 | |||
| 78f6e16701 | |||
| 4625bca9a9 | |||
| f2ae653886 | |||
| 5686bc864d | |||
| cf513b4ad8 | |||
| 013423424e | |||
| 16514fd6b0 | |||
| 27509f97e1 | |||
| 4c540eae1c | |||
| d2b53ff6fc | |||
| 0ef4292abf | |||
| e6fde38c24 | |||
| 5fa343d40f | |||
| 7ee1374802 | |||
| bd9e56829c | |||
| 718169345e | |||
| 5a6aa763ca | |||
| 695d9abd83 | |||
| e436af7b2a | |||
| 4501a7810f | |||
|
|
e36761fa6a | ||
| fe53b79d56 | |||
| 09615bbed6 | |||
| a2be9b955c | |||
| 9298bd2d9d | |||
| 317034ba21 | |||
| 0505303503 | |||
| 6e5772f13b | |||
| ccccdb3b78 | |||
| 12d17acf4f | |||
| 48b56cdb7b | |||
| 048f707f80 | |||
| 98e0a5fd64 | |||
| 740a49a09f | |||
| 81be662574 | |||
| 02fa3f02f5 | |||
| cfdd7bdd87 | |||
| 7ceb645926 | |||
| 7d0091f647 | |||
| b3cd630399 | |||
| 5f7a1acfe3 | |||
| 513a21cca2 | |||
| 04dbfb23ab | |||
| 1abddabeec | |||
| 9ace5774af | |||
| eab6101cf7 | |||
| d7cbd5d2b6 | |||
| 641edf2715 | |||
| 7efb4957bf | |||
| 9ae77fdedb | |||
| 98c40901ed | |||
| 363375d5da | |||
| 149de95d88 | |||
| e5bb4856d2 | |||
| d78994d427 | |||
| f2d68d54df | |||
| e03625838b | |||
| 86840b46f4 | |||
| e57f9916eb | |||
| 565ba6ee76 | |||
| d83fc2c373 | |||
| d4da28249e | |||
| 483a454c8d | |||
| 018ac86640 | |||
| 398f1aead4 | |||
| d19a75926b | |||
| 1ac8b40e3d | |||
| a8505cb8c1 | |||
| ae8652d86e | |||
| 849beb09d8 | |||
| e1aaaee617 | |||
| c02b940945 | |||
| 2934ddb07f | |||
| 7f6f3c02fa | |||
| 7f98d5a682 | |||
| 7eb9c5d7d5 | |||
| 184439aa38 | |||
| 1ce8b298a1 | |||
| 75f99025d9 | |||
| ebe852bd5a | |||
| 402b103c49 | |||
| e9b6e1cdc2 | |||
| ed1ce8ab5e | |||
| d1fa4febc4 | |||
| 048c55df7b | |||
| f77bc6a025 | |||
| efdde2734e | |||
| 66658fa8f7 | |||
| df41c251e4 | |||
| 3282ab56ba | |||
| 8300838d30 | |||
| c9870a6d3d | |||
| a143cc4fcf | |||
| 9ef1f3781d | |||
| bbee8aa721 | |||
| d22f83c797 | |||
| 50615486a4 | |||
| ca79e4f797 | |||
| 6a9fd08a80 | |||
| cab890dc9b | |||
| b3c4faf2df | |||
| 353937171a | |||
| c80002bea4 | |||
| 56adee9d81 | |||
| d6493d6d5f | |||
| 0967e9676d | |||
| 487e998ea0 |
25
.dockerignore
Normal file
25
.dockerignore
Normal file
@@ -0,0 +1,25 @@
|
||||
.idea
|
||||
*/thumbs
|
||||
*.cbp
|
||||
CMakeCache.txt
|
||||
CMakeFiles
|
||||
cmake-build-debug
|
||||
cmake_install.cmake
|
||||
Makefile
|
||||
*.out
|
||||
LOG
|
||||
sist2*
|
||||
index.sist2/
|
||||
bundle*.css
|
||||
bundle.js
|
||||
**/*.a
|
||||
**/vgcore.*
|
||||
build/
|
||||
.git/
|
||||
third-party/libscan/libscan-test-files/
|
||||
**/ext_ffmpeg
|
||||
**/ext_libmobi
|
||||
**/scan_a_test
|
||||
Dockerfile
|
||||
*.idx/
|
||||
VERSION
|
||||
72
.drone.yml
Normal file
72
.drone.yml
Normal file
@@ -0,0 +1,72 @@
|
||||
kind: pipeline
|
||||
type: docker
|
||||
name: amd64
|
||||
|
||||
platform:
|
||||
os: linux
|
||||
arch: amd64
|
||||
|
||||
steps:
|
||||
- name: build
|
||||
image: simon987/sist2-build
|
||||
commands:
|
||||
- ./ci/build.sh
|
||||
- name: docker
|
||||
image: plugins/docker
|
||||
settings:
|
||||
username:
|
||||
from_secret: DOCKER_USER
|
||||
password:
|
||||
from_secret: DOCKER_PASSWORD
|
||||
repo: simon987/sist2
|
||||
context: ./
|
||||
dockerfile: ./Dockerfile
|
||||
auto_tag: true
|
||||
auto_tag_suffix: x64-linux
|
||||
when:
|
||||
event:
|
||||
- tag
|
||||
- name: scp files
|
||||
image: appleboy/drone-scp
|
||||
settings:
|
||||
host:
|
||||
from_secret: SSH_HOST
|
||||
port:
|
||||
from_secret: SSH_PORT
|
||||
user:
|
||||
from_secret: SSH_USER
|
||||
key:
|
||||
from_secret: SSH_KEY
|
||||
target: /files/sist2/${DRONE_REPO_OWNER}_${DRONE_REPO_NAME}/${DRONE_BRANCH}_${DRONE_BUILD_NUMBER}_${DRONE_COMMIT}/
|
||||
source:
|
||||
- ./VERSION
|
||||
- ./sist2-x64-linux
|
||||
- ./sist2-x64-linux-debug
|
||||
|
||||
---
|
||||
kind: pipeline
|
||||
type: docker
|
||||
name: arm64
|
||||
|
||||
platform:
|
||||
arch: arm64
|
||||
|
||||
steps:
|
||||
- name: build
|
||||
image: simon987/sist2-build-arm64
|
||||
commands:
|
||||
- ./ci/build_arm64.sh
|
||||
- name: scp files
|
||||
image: appleboy/drone-scp
|
||||
settings:
|
||||
host:
|
||||
from_secret: SSH_HOST
|
||||
port:
|
||||
from_secret: SSH_PORT
|
||||
user:
|
||||
from_secret: SSH_USER
|
||||
key:
|
||||
from_secret: SSH_KEY
|
||||
target: /files/sist2/${DRONE_REPO_OWNER}_${DRONE_REPO_NAME}/arm_${DRONE_BRANCH}_${DRONE_BUILD_NUMBER}_${DRONE_COMMIT}/
|
||||
source:
|
||||
- ./sist2-arm64-linux
|
||||
40
.github/ISSUE_TEMPLATE/bug_report.md
vendored
Normal file
40
.github/ISSUE_TEMPLATE/bug_report.md
vendored
Normal file
@@ -0,0 +1,40 @@
|
||||
---
|
||||
name: "🐞 Bug Report"
|
||||
about: Submit a bug report
|
||||
title: ''
|
||||
labels: bug
|
||||
assignees: ''
|
||||
|
||||
---
|
||||
|
||||
**Device Information (please complete the following information):**
|
||||
- OS: `[e.g., Ubuntu 20.04, WSL2]`
|
||||
- Deployment: `[Linux, Linux ARM64 or Docker]`
|
||||
- Browser *(if relevant)*: `[e.g., chrome, safari]`
|
||||
- SIST2 Version: `[e.g., v2.9.0]`
|
||||
- Elasticsearch Version *(if relevant)* : ``
|
||||
|
||||
**Command with arguments**
|
||||
<!-- `ex: "scan ~/Documents -o ./i2 --threads 3 -q 1.0` -->
|
||||
|
||||
**Describe the bug**
|
||||
<!-- A clear and concise description of what the bug is. -->
|
||||
|
||||
**Steps To Reproduce**
|
||||
Please be specific!
|
||||
1. Go to '...'
|
||||
2. Click on '....'
|
||||
3. etc.
|
||||
|
||||
**Expected behavior**
|
||||
<!-- A clear and concise description of what you expected to happen. -->
|
||||
|
||||
**Actual Behavior**
|
||||
<!-- A clear and concise description of what actually happens. -->
|
||||
|
||||
**Screenshots**
|
||||
<!-- If applicable, add screenshots to help explain your problem. -->
|
||||
|
||||
**Additional context**
|
||||
<!-- Add any other context about the problem here. If applicable, please include why you think the bug is occurring and/or troubleshooting you have already performed. -->
|
||||
<!-- If the issue is related to the `scan` module, please attach the files necessary to reproduce the error or email them to me[at]simon987.net. -->
|
||||
5
.github/ISSUE_TEMPLATE/config.yml
vendored
Normal file
5
.github/ISSUE_TEMPLATE/config.yml
vendored
Normal file
@@ -0,0 +1,5 @@
|
||||
blank_issues_enabled: false
|
||||
contact_links:
|
||||
- name: SIST2 Documentation
|
||||
url: https://github.com/simon987/sist2/blob/master/docs/USAGE.md
|
||||
about: Check out the SIST2 documentation for answers to common questions
|
||||
18
.github/ISSUE_TEMPLATE/feature_request.md
vendored
Normal file
18
.github/ISSUE_TEMPLATE/feature_request.md
vendored
Normal file
@@ -0,0 +1,18 @@
|
||||
---
|
||||
name: "🚀 Feature Request"
|
||||
about: Suggest an idea for SIST2
|
||||
title: ''
|
||||
assignees: ''
|
||||
|
||||
---
|
||||
**Which SIST2 component is your Feature Request related to?**
|
||||
<!-- e.g., Scan, Index, or Web? -->
|
||||
|
||||
**Is your feature request related to a problem? Please describe.**
|
||||
<!-- A clear and concise description of what the problem is. e.g., "I'm always frustrated when [...]" -->
|
||||
|
||||
**What would you like to see happen?**
|
||||
<!-- A clear and concise description of what you want to happen. -->
|
||||
|
||||
**Additional context**
|
||||
<!-- Add any other context or screenshots about the feature request here. -->
|
||||
18
.github/ISSUE_TEMPLATE/issue-template.md
vendored
Normal file
18
.github/ISSUE_TEMPLATE/issue-template.md
vendored
Normal file
@@ -0,0 +1,18 @@
|
||||
---
|
||||
name: Issue template
|
||||
about: General
|
||||
title: ''
|
||||
labels: ''
|
||||
assignees: ''
|
||||
|
||||
---
|
||||
|
||||
sist2 version:
|
||||
|
||||
Platform (Linux or Docker, x86-64 or arm64):
|
||||
|
||||
Elasticsearch version:
|
||||
|
||||
Command with arguments: `ex: "scan ~/Documents -o ./i2 --threads 3 -q 1.0`
|
||||
|
||||
If the issue is related to the `scan` module, please attach the files necessary to reproduce the error or email them to me[at]simon987.net.
|
||||
4
.gitignore
vendored
4
.gitignore
vendored
@@ -1,6 +1,5 @@
|
||||
.idea
|
||||
thumbs
|
||||
test
|
||||
*.cbp
|
||||
CMakeCache.txt
|
||||
CMakeFiles
|
||||
@@ -16,3 +15,6 @@ bundle.js
|
||||
*.a
|
||||
vgcore.*
|
||||
build/
|
||||
third-party/
|
||||
*.idx/
|
||||
VERSION
|
||||
46
.gitmodules
vendored
46
.gitmodules
vendored
@@ -1,42 +1,6 @@
|
||||
[submodule "argparse"]
|
||||
path = argparse
|
||||
[submodule "third-party/libscan"]
|
||||
path = third-party/libscan
|
||||
url = https://github.com/simon987/libscan
|
||||
[submodule "third-party/argparse"]
|
||||
path = third-party/argparse
|
||||
url = https://github.com/cofyc/argparse
|
||||
[submodule "cJSON"]
|
||||
path = cJSON
|
||||
url = https://github.com/DaveGamble/cJSON
|
||||
[submodule "lmdb"]
|
||||
path = lmdb
|
||||
url = https://github.com/LMDB/lmdb
|
||||
[submodule "utf8.h"]
|
||||
path = utf8.h
|
||||
url = https://github.com/sheredom/utf8.h
|
||||
[submodule "lib/bzip2-1.0.6"]
|
||||
path = lib/bzip2-1.0.6
|
||||
url = https://github.com/enthought/bzip2-1.0.6
|
||||
[submodule "lib/libmagic"]
|
||||
path = lib/libmagic
|
||||
url = https://github.com/threatstack/libmagic
|
||||
[submodule "lib/harfbuzz"]
|
||||
path = lib/harfbuzz
|
||||
url = https://github.com/harfbuzz/harfbuzz
|
||||
[submodule "lib/openjpeg"]
|
||||
path = lib/openjpeg
|
||||
url = https://github.com/uclouvain/openjpeg
|
||||
[submodule "lib/ffmpeg"]
|
||||
path = lib/ffmpeg
|
||||
url = https://git.ffmpeg.org/ffmpeg.git
|
||||
[submodule "lib/onion"]
|
||||
path = lib/onion
|
||||
url = https://github.com/davidmoreno/onion
|
||||
[submodule "lib/mupdf"]
|
||||
path = lib/mupdf
|
||||
url = git://git.ghostscript.com/mupdf.git
|
||||
[submodule "lib/tesseract"]
|
||||
path = lib/tesseract
|
||||
url = https://github.com/tesseract-ocr/tesseract
|
||||
[submodule "lib/leptonica"]
|
||||
path = lib/leptonica
|
||||
url = https://github.com/danbloomberg/leptonica
|
||||
[submodule "lib/libtiff"]
|
||||
path = lib/libtiff
|
||||
url = https://gitlab.com/libtiff/libtiff
|
||||
|
||||
69
.teamcity/settings.kts
vendored
69
.teamcity/settings.kts
vendored
@@ -1,69 +0,0 @@
|
||||
import jetbrains.buildServer.configs.kotlin.v2019_2.*
|
||||
import jetbrains.buildServer.configs.kotlin.v2019_2.buildSteps.ExecBuildStep
|
||||
import jetbrains.buildServer.configs.kotlin.v2019_2.buildSteps.exec
|
||||
import jetbrains.buildServer.configs.kotlin.v2019_2.triggers.vcs
|
||||
import jetbrains.buildServer.configs.kotlin.v2019_2.vcs.GitVcsRoot
|
||||
|
||||
/*
|
||||
The settings script is an entry point for defining a TeamCity
|
||||
project hierarchy. The script should contain a single call to the
|
||||
project() function with a Project instance or an init function as
|
||||
an argument.
|
||||
|
||||
VcsRoots, BuildTypes, Templates, and subprojects can be
|
||||
registered inside the project using the vcsRoot(), buildType(),
|
||||
template(), and subProject() methods respectively.
|
||||
|
||||
To debug settings scripts in command-line, run the
|
||||
|
||||
mvnDebug org.jetbrains.teamcity:teamcity-configs-maven-plugin:generate
|
||||
|
||||
command and attach your debugger to the port 8000.
|
||||
|
||||
To debug in IntelliJ Idea, open the 'Maven Projects' tool window (View
|
||||
-> Tool Windows -> Maven Projects), find the generate task node
|
||||
(Plugins -> teamcity-configs -> teamcity-configs:generate), the
|
||||
'Debug' option is available in the context menu for the task.
|
||||
*/
|
||||
|
||||
version = "2019.2"
|
||||
|
||||
project {
|
||||
|
||||
vcsRoot(HttpsGithubComSimon987sist2refsHeadsMaster)
|
||||
|
||||
buildType(Build)
|
||||
}
|
||||
|
||||
object Build : BuildType({
|
||||
name = "Build"
|
||||
|
||||
artifactRules = """
|
||||
sist2
|
||||
sist2_scan
|
||||
""".trimIndent()
|
||||
|
||||
vcs {
|
||||
root(HttpsGithubComSimon987sist2refsHeadsMaster)
|
||||
}
|
||||
|
||||
steps {
|
||||
exec {
|
||||
name = "Build"
|
||||
path = "./ci/build.sh"
|
||||
dockerImage = "simon987/general_ci"
|
||||
dockerImagePlatform = ExecBuildStep.ImagePlatform.Linux
|
||||
dockerPull = true
|
||||
}
|
||||
}
|
||||
|
||||
triggers {
|
||||
vcs {
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
object HttpsGithubComSimon987sist2refsHeadsMaster : GitVcsRoot({
|
||||
name = "https://github.com/simon987/sist2#refs/heads/master"
|
||||
url = "https://github.com/simon987/sist2"
|
||||
})
|
||||
171
CMakeLists.txt
171
CMakeLists.txt
@@ -2,134 +2,119 @@ cmake_minimum_required(VERSION 3.7)
|
||||
set(CMAKE_C_STANDARD 11)
|
||||
|
||||
project(sist2 C)
|
||||
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}/CMakeModules")
|
||||
|
||||
add_executable(
|
||||
sist2
|
||||
option(SIST_DEBUG "Build a debug executable" on)
|
||||
|
||||
set(BUILD_TESTS on)
|
||||
add_subdirectory(third-party/libscan)
|
||||
set(ARGPARSE_SHARED off)
|
||||
add_subdirectory(third-party/argparse)
|
||||
|
||||
add_executable(sist2
|
||||
|
||||
# argparse
|
||||
third-party/argparse/argparse.h third-party/argparse/argparse.c
|
||||
|
||||
src/main.c
|
||||
src/sist.h
|
||||
src/io/walk.h src/io/walk.c
|
||||
src/parsing/media.h src/parsing/media.c
|
||||
src/parsing/pdf.h src/parsing/pdf.c
|
||||
src/io/store.h src/io/store.c
|
||||
src/tpool.h src/tpool.c
|
||||
src/parsing/parse.h src/parsing/parse.c
|
||||
src/io/serialize.h src/io/serialize.c
|
||||
src/parsing/mime.h src/parsing/mime.c src/parsing/mime_generated.c
|
||||
src/parsing/text.h src/parsing/text.c
|
||||
src/index/web.c src/index/web.h
|
||||
src/web/serve.c src/web/serve.h
|
||||
src/web/auth_basic.h src/web/auth_basic.c
|
||||
src/index/elastic.c src/index/elastic.h
|
||||
src/util.c src/util.h
|
||||
src/ctx.h src/types.h src/parsing/font.c src/parsing/font.h
|
||||
src/parsing/arc.c src/parsing/arc.h
|
||||
src/parsing/doc.c src/parsing/doc.h
|
||||
src/ctx.h src/types.h
|
||||
src/log.c src/log.h
|
||||
|
||||
# argparse
|
||||
argparse/argparse.h argparse/argparse.c
|
||||
|
||||
# cJSON
|
||||
cJSON/cJSON.h cJSON/cJSON.c
|
||||
|
||||
# LMDB
|
||||
lmdb/libraries/liblmdb/lmdb.h lmdb/libraries/liblmdb/mdb.c
|
||||
lmdb/libraries/liblmdb/midl.h lmdb/libraries/liblmdb/midl.c
|
||||
src/cli.c src/cli.h
|
||||
src/stats.c src/stats.h src/ctx.c
|
||||
src/parsing/sidecar.c src/parsing/sidecar.h)
|
||||
|
||||
# utf8.h
|
||||
utf8.h/utf8.h
|
||||
)
|
||||
target_link_directories(sist2 PRIVATE BEFORE ${_VCPKG_INSTALLED_DIR}/${VCPKG_TARGET_TRIPLET}/lib/)
|
||||
set(CMAKE_FIND_LIBRARY_SUFFIXES .a .lib)
|
||||
|
||||
find_package(PkgConfig REQUIRED)
|
||||
set(ENV{PKG_CONFIG_PATH} "$ENV{PKG_CONFIG_PATH}:/usr/local/lib/pkgconfig/")
|
||||
|
||||
#find_package(OpenSSL REQUIRED)
|
||||
find_package(Freetype REQUIRED)
|
||||
pkg_search_module(GLIB REQUIRED glib-2.0)
|
||||
|
||||
pkg_check_modules(GLIB REQUIRED glib-2.0)
|
||||
pkg_check_modules(GOBJECT REQUIRED gobject-2.0)
|
||||
pkg_check_modules(UUID REQUIRED uuid)
|
||||
find_package(lmdb CONFIG REQUIRED)
|
||||
find_package(cJSON CONFIG REQUIRED)
|
||||
find_package(unofficial-mongoose CONFIG REQUIRED)
|
||||
find_package(CURL CONFIG REQUIRED)
|
||||
|
||||
add_definitions(${UUID_CFLAGS_OTHER})
|
||||
add_definitions(${GLIB_CFLAGS_OTHER})
|
||||
add_definitions(${GOBJECT_CFLAGS_OTHER})
|
||||
add_definitions(${FREETYPE_CFLAGS_OTHER})
|
||||
|
||||
list(REMOVE_ITEM GLIB_LIBRARIES pcre)
|
||||
list(REMOVE_ITEM GOBJECT_LIBRARIES pcre)
|
||||
list(REMOVE_ITEM UUID_LIBRARIES pcre)
|
||||
|
||||
target_include_directories(
|
||||
sist2 PUBLIC
|
||||
${GOBJECT_INCLUDE_DIRS}
|
||||
${CMAKE_SOURCE_DIR}/third-party/onion/src/
|
||||
${CMAKE_SOURCE_DIR}/third-party/utf8.h/
|
||||
${CMAKE_SOURCE_DIR}/third-party/libscan/
|
||||
${CMAKE_SOURCE_DIR}/
|
||||
${GLIB_INCLUDE_DIRS}
|
||||
${PROJECT_SOURCE_DIR}/lib/ffmpeg/
|
||||
${FREETYPE_INCLUDE_DIRS}
|
||||
${UUID_INCLUDE_DIRS}
|
||||
${PROJECT_SOURCE_DIR}/
|
||||
${PROJECT_SOURCE_DIR}/lmdb/libraries/liblmdb/
|
||||
${PROJECT_SOURCE_DIR}/lib/onion/src/
|
||||
${PROJECT_SOURCE_DIR}/lib/mupdf/include/
|
||||
${PROJECT_SOURCE_DIR}/include/
|
||||
/usr/include/libxml2/
|
||||
${PROJECT_SOURCE_DIR}/lib/tesseract/include/
|
||||
)
|
||||
target_link_directories(
|
||||
sist2 PUBLIC
|
||||
${UUID_LIBRARY_DIRS}
|
||||
)
|
||||
|
||||
target_compile_options(sist2
|
||||
target_compile_options(
|
||||
sist2
|
||||
PRIVATE
|
||||
-Ofast
|
||||
# -march=native
|
||||
-fno-stack-protector
|
||||
-fomit-frame-pointer
|
||||
)
|
||||
-fPIC
|
||||
)
|
||||
|
||||
TARGET_LINK_LIBRARIES(
|
||||
if (SIST_DEBUG)
|
||||
target_compile_options(
|
||||
sist2
|
||||
PRIVATE
|
||||
-g
|
||||
-fstack-protector
|
||||
-fno-omit-frame-pointer
|
||||
-fsanitize=address
|
||||
-fno-inline
|
||||
# -O2
|
||||
)
|
||||
target_link_options(
|
||||
sist2
|
||||
PRIVATE
|
||||
-fsanitize=address
|
||||
)
|
||||
set_target_properties(
|
||||
sist2
|
||||
PROPERTIES
|
||||
OUTPUT_NAME sist2_debug
|
||||
)
|
||||
else ()
|
||||
target_compile_options(
|
||||
sist2
|
||||
PRIVATE
|
||||
-Ofast
|
||||
-fno-stack-protector
|
||||
-fomit-frame-pointer
|
||||
)
|
||||
endif ()
|
||||
|
||||
add_dependencies(
|
||||
sist2
|
||||
scan
|
||||
argparse
|
||||
)
|
||||
|
||||
target_link_libraries(
|
||||
sist2
|
||||
|
||||
${GLIB_LIBRARIES}
|
||||
${GOBJECT_LIBRARIES}
|
||||
${UUID_LIBRARIES}
|
||||
|
||||
# ffmpeg
|
||||
${PROJECT_SOURCE_DIR}/lib/libavcodec.a
|
||||
${PROJECT_SOURCE_DIR}/lib/libavformat.a
|
||||
${PROJECT_SOURCE_DIR}/lib/libavutil.a
|
||||
${PROJECT_SOURCE_DIR}/lib/libswscale.a
|
||||
${PROJECT_SOURCE_DIR}/lib/libswresample.a
|
||||
|
||||
# mupdf
|
||||
${PROJECT_SOURCE_DIR}/lib/libmupdf.a
|
||||
${PROJECT_SOURCE_DIR}/lib/libmupdf-third.a
|
||||
|
||||
# onion
|
||||
${PROJECT_SOURCE_DIR}/lib/libonion_static.a
|
||||
z
|
||||
lmdb
|
||||
cjson
|
||||
argparse
|
||||
${GLIB_LDFLAGS}
|
||||
unofficial::mongoose::mongoose
|
||||
CURL::libcurl
|
||||
|
||||
pthread
|
||||
curl
|
||||
m
|
||||
bz2
|
||||
${PROJECT_SOURCE_DIR}/lib/libmagic.a
|
||||
${PROJECT_SOURCE_DIR}/lib/libharfbuzz.a
|
||||
${PROJECT_SOURCE_DIR}/lib/libopenjp2.a
|
||||
freetype
|
||||
archive
|
||||
magic
|
||||
|
||||
xml2
|
||||
${PROJECT_SOURCE_DIR}/lib/libopc/libmce.a
|
||||
${PROJECT_SOURCE_DIR}/lib/libopc/libopc.a
|
||||
${PROJECT_SOURCE_DIR}/lib/libopc/libplib.a
|
||||
c
|
||||
|
||||
${PROJECT_SOURCE_DIR}/lib/libtesseract.a
|
||||
${PROJECT_SOURCE_DIR}/lib/liblept.a
|
||||
${PROJECT_SOURCE_DIR}/lib/libtiff.a
|
||||
png
|
||||
stdc++
|
||||
scan
|
||||
)
|
||||
|
||||
add_custom_target(
|
||||
|
||||
80
CMakeModules/FindFFmpeg.cmake
vendored
80
CMakeModules/FindFFmpeg.cmake
vendored
@@ -1,80 +0,0 @@
|
||||
# - Try to find ffmpeg libraries (libavcodec, libavformat and libavutil)
|
||||
# Once done this will define
|
||||
#
|
||||
# FFMPEG_FOUND - system has ffmpeg or libav
|
||||
# FFMPEG_INCLUDE_DIR - the ffmpeg include directory
|
||||
# FFMPEG_LIBRARIES - Link these to use ffmpeg
|
||||
# FFMPEG_LIBAVCODEC
|
||||
# FFMPEG_LIBAVFORMAT
|
||||
# FFMPEG_LIBAVUTIL
|
||||
#
|
||||
# Copyright (c) 2008 Andreas Schneider <mail@cynapses.org>
|
||||
# Modified for other libraries by Lasse Kärkkäinen <tronic>
|
||||
# Modified for Hedgewars by Stepik777
|
||||
#
|
||||
# Redistribution and use is allowed according to the terms of the New
|
||||
# BSD license.
|
||||
#
|
||||
|
||||
if (FFMPEG_LIBRARIES AND FFMPEG_INCLUDE_DIR)
|
||||
# in cache already
|
||||
set(FFMPEG_FOUND TRUE)
|
||||
else (FFMPEG_LIBRARIES AND FFMPEG_INCLUDE_DIR)
|
||||
# use pkg-config to get the directories and then use these values
|
||||
# in the FIND_PATH() and FIND_LIBRARY() calls
|
||||
find_package(PkgConfig)
|
||||
if (PKG_CONFIG_FOUND)
|
||||
pkg_check_modules(_FFMPEG_AVCODEC libavcodec)
|
||||
pkg_check_modules(_FFMPEG_AVFORMAT libavformat)
|
||||
pkg_check_modules(_FFMPEG_AVUTIL libavutil)
|
||||
endif (PKG_CONFIG_FOUND)
|
||||
|
||||
find_path(FFMPEG_AVCODEC_INCLUDE_DIR
|
||||
NAMES libavcodec/avcodec.h
|
||||
PATHS ${_FFMPEG_AVCODEC_INCLUDE_DIRS} /usr/include /usr/local/include /opt/local/include /sw/include
|
||||
PATH_SUFFIXES ffmpeg libav
|
||||
)
|
||||
|
||||
find_library(FFMPEG_LIBAVCODEC
|
||||
NAMES avcodec
|
||||
PATHS ${_FFMPEG_AVCODEC_LIBRARY_DIRS} /usr/lib /usr/local/lib /opt/local/lib /sw/lib
|
||||
)
|
||||
|
||||
find_library(FFMPEG_LIBAVFORMAT
|
||||
NAMES avformat
|
||||
PATHS ${_FFMPEG_AVFORMAT_LIBRARY_DIRS} /usr/lib /usr/local/lib /opt/local/lib /sw/lib
|
||||
)
|
||||
|
||||
find_library(FFMPEG_LIBAVUTIL
|
||||
NAMES avutil
|
||||
PATHS ${_FFMPEG_AVUTIL_LIBRARY_DIRS} /usr/lib /usr/local/lib /opt/local/lib /sw/lib
|
||||
)
|
||||
|
||||
if (FFMPEG_LIBAVCODEC AND FFMPEG_LIBAVFORMAT)
|
||||
set(FFMPEG_FOUND TRUE)
|
||||
endif()
|
||||
|
||||
if (FFMPEG_FOUND)
|
||||
set(FFMPEG_INCLUDE_DIR ${FFMPEG_AVCODEC_INCLUDE_DIR})
|
||||
|
||||
set(FFMPEG_LIBRARIES
|
||||
${FFMPEG_LIBAVCODEC}
|
||||
${FFMPEG_LIBAVFORMAT}
|
||||
${FFMPEG_LIBAVUTIL}
|
||||
)
|
||||
|
||||
endif (FFMPEG_FOUND)
|
||||
|
||||
if (FFMPEG_FOUND)
|
||||
if (NOT FFMPEG_FIND_QUIETLY)
|
||||
message(STATUS "Found FFMPEG or Libav: ${FFMPEG_LIBRARIES}, ${FFMPEG_INCLUDE_DIR}")
|
||||
endif (NOT FFMPEG_FIND_QUIETLY)
|
||||
else (FFMPEG_FOUND)
|
||||
if (FFMPEG_FIND_REQUIRED)
|
||||
message(FATAL_ERROR "Could not find libavcodec or libavformat or libavutil")
|
||||
endif (FFMPEG_FIND_REQUIRED)
|
||||
endif (FFMPEG_FOUND)
|
||||
|
||||
endif (FFMPEG_LIBRARIES AND FFMPEG_INCLUDE_DIR)
|
||||
|
||||
|
||||
100
CMakeModules/FindLibMagic.cmake
vendored
100
CMakeModules/FindLibMagic.cmake
vendored
@@ -1,100 +0,0 @@
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
# Copyright (c) 2013-2013, Lars Baehren <lbaehren@gmail.com>
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without modification,
|
||||
# are permitted provided that the following conditions are met:
|
||||
#
|
||||
# * Redistributions of source code must retain the above copyright notice, this
|
||||
# list of conditions and the following disclaimer.
|
||||
# * Redistributions in binary form must reproduce the above copyright notice,
|
||||
# this list of conditions and the following disclaimer in the documentation
|
||||
# and/or other materials provided with the distribution.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#-------------------------------------------------------------------------------
|
||||
|
||||
# - Check for the presence of LIBMAGIC
|
||||
#
|
||||
# The following variables are set when LIBMAGIC is found:
|
||||
# LIBMAGIC_FOUND = Set to true, if all components of LIBMAGIC have been
|
||||
# found.
|
||||
# LIBMAGIC_INCLUDES = Include path for the header files of LIBMAGIC
|
||||
# LIBMAGIC_LIBRARIES = Link these to use LIBMAGIC
|
||||
# LIBMAGIC_LFLAGS = Linker flags (optional)
|
||||
|
||||
if (NOT LIBMAGIC_FOUND)
|
||||
|
||||
if (NOT LIBMAGIC_ROOT_DIR)
|
||||
set (LIBMAGIC_ROOT_DIR ${CMAKE_INSTALL_PREFIX})
|
||||
endif (NOT LIBMAGIC_ROOT_DIR)
|
||||
|
||||
##____________________________________________________________________________
|
||||
## Check for the header files
|
||||
|
||||
find_path (LIBMAGIC_FILE_H
|
||||
NAMES file/file.h
|
||||
HINTS ${LIBMAGIC_ROOT_DIR} ${CMAKE_INSTALL_PREFIX}
|
||||
PATH_SUFFIXES include
|
||||
)
|
||||
if (LIBMAGIC_FILE_H)
|
||||
list (APPEND LIBMAGIC_INCLUDES ${LIBMAGIC_FILE_H})
|
||||
endif (LIBMAGIC_FILE_H)
|
||||
|
||||
find_path (LIBMAGIC_MAGIC_H
|
||||
NAMES magic.h
|
||||
HINTS ${LIBMAGIC_ROOT_DIR} ${CMAKE_INSTALL_PREFIX}
|
||||
PATH_SUFFIXES include include/linux
|
||||
)
|
||||
if (LIBMAGIC_MAGIC_H)
|
||||
list (APPEND LIBMAGIC_INCLUDES ${LIBMAGIC_MAGIC_H})
|
||||
endif (LIBMAGIC_MAGIC_H)
|
||||
|
||||
list (REMOVE_DUPLICATES LIBMAGIC_INCLUDES)
|
||||
|
||||
##____________________________________________________________________________
|
||||
## Check for the library
|
||||
|
||||
find_library (LIBMAGIC_LIBRARIES magic
|
||||
HINTS ${LIBMAGIC_ROOT_DIR} ${CMAKE_INSTALL_PREFIX}
|
||||
PATH_SUFFIXES lib
|
||||
)
|
||||
|
||||
##____________________________________________________________________________
|
||||
## Actions taken when all components have been found
|
||||
|
||||
#find_package_handle_standard_args (LIBMAGIC DEFAULT_MSG LIBMAGIC_LIBRARIES LIBMAGIC_INCLUDES)
|
||||
|
||||
if (LIBMAGIC_FOUND)
|
||||
if (NOT LIBMAGIC_FIND_QUIETLY)
|
||||
message (STATUS "Found components for LIBMAGIC")
|
||||
message (STATUS "LIBMAGIC_ROOT_DIR = ${LIBMAGIC_ROOT_DIR}")
|
||||
message (STATUS "LIBMAGIC_INCLUDES = ${LIBMAGIC_INCLUDES}")
|
||||
message (STATUS "LIBMAGIC_LIBRARIES = ${LIBMAGIC_LIBRARIES}")
|
||||
endif (NOT LIBMAGIC_FIND_QUIETLY)
|
||||
else (LIBMAGIC_FOUND)
|
||||
if (LIBMAGIC_FIND_REQUIRED)
|
||||
message (FATAL_ERROR "Could not find LIBMAGIC!")
|
||||
endif (LIBMAGIC_FIND_REQUIRED)
|
||||
endif (LIBMAGIC_FOUND)
|
||||
|
||||
##____________________________________________________________________________
|
||||
## Mark advanced variables
|
||||
|
||||
mark_as_advanced (
|
||||
LIBMAGIC_ROOT_DIR
|
||||
LIBMAGIC_INCLUDES
|
||||
LIBMAGIC_LIBRARIES
|
||||
)
|
||||
|
||||
endif (NOT LIBMAGIC_FOUND)
|
||||
478
CMakeModules/FindOpenSSL.cmake
vendored
478
CMakeModules/FindOpenSSL.cmake
vendored
@@ -1,478 +0,0 @@
|
||||
# Distributed under the OSI-approved BSD 3-Clause License. See accompanying
|
||||
# file Copyright.txt or https://cmake.org/licensing for details.
|
||||
|
||||
macro(_OpenSSL_test_and_find_dependencies ssl_library crypto_library)
|
||||
if((CMAKE_SYSTEM_NAME STREQUAL "Linux") AND
|
||||
(("${ssl_library}" MATCHES "\\${CMAKE_STATIC_LIBRARY_SUFFIX}$") OR
|
||||
("${crypto_library}" MATCHES "\\${CMAKE_STATIC_LIBRARY_SUFFIX}$")))
|
||||
set(_OpenSSL_has_dependencies TRUE)
|
||||
find_package(Threads)
|
||||
else()
|
||||
set(_OpenSSL_has_dependencies FALSE)
|
||||
endif()
|
||||
endmacro()
|
||||
|
||||
function(_OpenSSL_add_dependencies libraries_var library)
|
||||
if(CMAKE_THREAD_LIBS_INIT)
|
||||
list(APPEND ${libraries_var} ${CMAKE_THREAD_LIBS_INIT})
|
||||
endif()
|
||||
list(APPEND ${libraries_var} ${CMAKE_DL_LIBS})
|
||||
set(${libraries_var} ${${libraries_var}} PARENT_SCOPE)
|
||||
endfunction()
|
||||
|
||||
function(_OpenSSL_target_add_dependencies target)
|
||||
if(_OpenSSL_has_dependencies)
|
||||
set_property( TARGET ${target} APPEND PROPERTY INTERFACE_LINK_LIBRARIES Threads::Threads )
|
||||
set_property( TARGET ${target} APPEND PROPERTY INTERFACE_LINK_LIBRARIES ${CMAKE_DL_LIBS} )
|
||||
endif()
|
||||
endfunction()
|
||||
|
||||
if (UNIX)
|
||||
find_package(PkgConfig QUIET)
|
||||
pkg_check_modules(_OPENSSL QUIET openssl)
|
||||
endif ()
|
||||
|
||||
# Support preference of static libs by adjusting CMAKE_FIND_LIBRARY_SUFFIXES
|
||||
if(OPENSSL_USE_STATIC_LIBS)
|
||||
set(_openssl_ORIG_CMAKE_FIND_LIBRARY_SUFFIXES ${CMAKE_FIND_LIBRARY_SUFFIXES})
|
||||
if(WIN32)
|
||||
set(CMAKE_FIND_LIBRARY_SUFFIXES .lib .a ${CMAKE_FIND_LIBRARY_SUFFIXES})
|
||||
else()
|
||||
set(CMAKE_FIND_LIBRARY_SUFFIXES .a )
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if (WIN32)
|
||||
# http://www.slproweb.com/products/Win32OpenSSL.html
|
||||
set(_OPENSSL_ROOT_HINTS
|
||||
${OPENSSL_ROOT_DIR}
|
||||
"[HKEY_LOCAL_MACHINE\\SOFTWARE\\Microsoft\\Windows\\CurrentVersion\\Uninstall\\OpenSSL (32-bit)_is1;Inno Setup: App Path]"
|
||||
"[HKEY_LOCAL_MACHINE\\SOFTWARE\\Microsoft\\Windows\\CurrentVersion\\Uninstall\\OpenSSL (64-bit)_is1;Inno Setup: App Path]"
|
||||
ENV OPENSSL_ROOT_DIR
|
||||
)
|
||||
file(TO_CMAKE_PATH "$ENV{PROGRAMFILES}" _programfiles)
|
||||
set(_OPENSSL_ROOT_PATHS
|
||||
"${_programfiles}/OpenSSL"
|
||||
"${_programfiles}/OpenSSL-Win32"
|
||||
"${_programfiles}/OpenSSL-Win64"
|
||||
"C:/OpenSSL/"
|
||||
"C:/OpenSSL-Win32/"
|
||||
"C:/OpenSSL-Win64/"
|
||||
)
|
||||
unset(_programfiles)
|
||||
else ()
|
||||
set(_OPENSSL_ROOT_HINTS
|
||||
${OPENSSL_ROOT_DIR}
|
||||
ENV OPENSSL_ROOT_DIR
|
||||
)
|
||||
endif ()
|
||||
|
||||
set(_OPENSSL_ROOT_HINTS_AND_PATHS
|
||||
HINTS ${_OPENSSL_ROOT_HINTS}
|
||||
PATHS ${_OPENSSL_ROOT_PATHS}
|
||||
)
|
||||
|
||||
find_path(OPENSSL_INCLUDE_DIR
|
||||
NAMES
|
||||
openssl/ssl.h
|
||||
${_OPENSSL_ROOT_HINTS_AND_PATHS}
|
||||
HINTS
|
||||
${_OPENSSL_INCLUDEDIR}
|
||||
PATH_SUFFIXES
|
||||
include
|
||||
)
|
||||
|
||||
if(WIN32 AND NOT CYGWIN)
|
||||
if(MSVC)
|
||||
# /MD and /MDd are the standard values - if someone wants to use
|
||||
# others, the libnames have to change here too
|
||||
# use also ssl and ssleay32 in debug as fallback for openssl < 0.9.8b
|
||||
# enable OPENSSL_MSVC_STATIC_RT to get the libs build /MT (Multithreaded no-DLL)
|
||||
# In Visual C++ naming convention each of these four kinds of Windows libraries has it's standard suffix:
|
||||
# * MD for dynamic-release
|
||||
# * MDd for dynamic-debug
|
||||
# * MT for static-release
|
||||
# * MTd for static-debug
|
||||
|
||||
# Implementation details:
|
||||
# We are using the libraries located in the VC subdir instead of the parent directory even though :
|
||||
# libeay32MD.lib is identical to ../libeay32.lib, and
|
||||
# ssleay32MD.lib is identical to ../ssleay32.lib
|
||||
# enable OPENSSL_USE_STATIC_LIBS to use the static libs located in lib/VC/static
|
||||
|
||||
if (OPENSSL_MSVC_STATIC_RT)
|
||||
set(_OPENSSL_MSVC_RT_MODE "MT")
|
||||
else ()
|
||||
set(_OPENSSL_MSVC_RT_MODE "MD")
|
||||
endif ()
|
||||
|
||||
# Since OpenSSL 1.1, lib names are like libcrypto32MTd.lib and libssl32MTd.lib
|
||||
if( "${CMAKE_SIZEOF_VOID_P}" STREQUAL "8" )
|
||||
set(_OPENSSL_MSVC_ARCH_SUFFIX "64")
|
||||
else()
|
||||
set(_OPENSSL_MSVC_ARCH_SUFFIX "32")
|
||||
endif()
|
||||
|
||||
if(OPENSSL_USE_STATIC_LIBS)
|
||||
set(_OPENSSL_PATH_SUFFIXES
|
||||
"lib/VC/static"
|
||||
"VC/static"
|
||||
"lib"
|
||||
)
|
||||
else()
|
||||
set(_OPENSSL_PATH_SUFFIXES
|
||||
"lib/VC"
|
||||
"VC"
|
||||
"lib"
|
||||
)
|
||||
endif ()
|
||||
|
||||
find_library(LIB_EAY_DEBUG
|
||||
NAMES
|
||||
libcrypto${_OPENSSL_MSVC_ARCH_SUFFIX}${_OPENSSL_MSVC_RT_MODE}d
|
||||
libcrypto${_OPENSSL_MSVC_RT_MODE}d
|
||||
libcryptod
|
||||
libeay32${_OPENSSL_MSVC_RT_MODE}d
|
||||
libeay32d
|
||||
cryptod
|
||||
NAMES_PER_DIR
|
||||
${_OPENSSL_ROOT_HINTS_AND_PATHS}
|
||||
PATH_SUFFIXES
|
||||
${_OPENSSL_PATH_SUFFIXES}
|
||||
)
|
||||
|
||||
find_library(LIB_EAY_RELEASE
|
||||
NAMES
|
||||
libcrypto${_OPENSSL_MSVC_ARCH_SUFFIX}${_OPENSSL_MSVC_RT_MODE}
|
||||
libcrypto${_OPENSSL_MSVC_RT_MODE}
|
||||
libcrypto
|
||||
libeay32${_OPENSSL_MSVC_RT_MODE}
|
||||
libeay32
|
||||
crypto
|
||||
NAMES_PER_DIR
|
||||
${_OPENSSL_ROOT_HINTS_AND_PATHS}
|
||||
PATH_SUFFIXES
|
||||
${_OPENSSL_PATH_SUFFIXES}
|
||||
)
|
||||
|
||||
find_library(SSL_EAY_DEBUG
|
||||
NAMES
|
||||
libssl${_OPENSSL_MSVC_ARCH_SUFFIX}${_OPENSSL_MSVC_RT_MODE}d
|
||||
libssl${_OPENSSL_MSVC_RT_MODE}d
|
||||
libssld
|
||||
ssleay32${_OPENSSL_MSVC_RT_MODE}d
|
||||
ssleay32d
|
||||
ssld
|
||||
NAMES_PER_DIR
|
||||
${_OPENSSL_ROOT_HINTS_AND_PATHS}
|
||||
PATH_SUFFIXES
|
||||
${_OPENSSL_PATH_SUFFIXES}
|
||||
)
|
||||
|
||||
find_library(SSL_EAY_RELEASE
|
||||
NAMES
|
||||
libssl${_OPENSSL_MSVC_ARCH_SUFFIX}${_OPENSSL_MSVC_RT_MODE}
|
||||
libssl${_OPENSSL_MSVC_RT_MODE}
|
||||
libssl
|
||||
ssleay32${_OPENSSL_MSVC_RT_MODE}
|
||||
ssleay32
|
||||
ssl
|
||||
NAMES_PER_DIR
|
||||
${_OPENSSL_ROOT_HINTS_AND_PATHS}
|
||||
PATH_SUFFIXES
|
||||
${_OPENSSL_PATH_SUFFIXES}
|
||||
)
|
||||
|
||||
set(LIB_EAY_LIBRARY_DEBUG "${LIB_EAY_DEBUG}")
|
||||
set(LIB_EAY_LIBRARY_RELEASE "${LIB_EAY_RELEASE}")
|
||||
set(SSL_EAY_LIBRARY_DEBUG "${SSL_EAY_DEBUG}")
|
||||
set(SSL_EAY_LIBRARY_RELEASE "${SSL_EAY_RELEASE}")
|
||||
|
||||
include(${CMAKE_CURRENT_LIST_DIR}/SelectLibraryConfigurations.cmake)
|
||||
select_library_configurations(LIB_EAY)
|
||||
select_library_configurations(SSL_EAY)
|
||||
|
||||
mark_as_advanced(LIB_EAY_LIBRARY_DEBUG LIB_EAY_LIBRARY_RELEASE
|
||||
SSL_EAY_LIBRARY_DEBUG SSL_EAY_LIBRARY_RELEASE)
|
||||
set(OPENSSL_SSL_LIBRARY ${SSL_EAY_LIBRARY} )
|
||||
set(OPENSSL_CRYPTO_LIBRARY ${LIB_EAY_LIBRARY} )
|
||||
elseif(MINGW)
|
||||
# same player, for MinGW
|
||||
set(LIB_EAY_NAMES crypto libeay32)
|
||||
set(SSL_EAY_NAMES ssl ssleay32)
|
||||
find_library(LIB_EAY
|
||||
NAMES
|
||||
${LIB_EAY_NAMES}
|
||||
NAMES_PER_DIR
|
||||
${_OPENSSL_ROOT_HINTS_AND_PATHS}
|
||||
PATH_SUFFIXES
|
||||
"lib/MinGW"
|
||||
"lib"
|
||||
)
|
||||
|
||||
find_library(SSL_EAY
|
||||
NAMES
|
||||
${SSL_EAY_NAMES}
|
||||
NAMES_PER_DIR
|
||||
${_OPENSSL_ROOT_HINTS_AND_PATHS}
|
||||
PATH_SUFFIXES
|
||||
"lib/MinGW"
|
||||
"lib"
|
||||
)
|
||||
|
||||
mark_as_advanced(SSL_EAY LIB_EAY)
|
||||
set(OPENSSL_SSL_LIBRARY ${SSL_EAY} )
|
||||
set(OPENSSL_CRYPTO_LIBRARY ${LIB_EAY} )
|
||||
unset(LIB_EAY_NAMES)
|
||||
unset(SSL_EAY_NAMES)
|
||||
else()
|
||||
# Not sure what to pick for -say- intel, let's use the toplevel ones and hope someone report issues:
|
||||
find_library(LIB_EAY
|
||||
NAMES
|
||||
libcrypto
|
||||
libeay32
|
||||
NAMES_PER_DIR
|
||||
${_OPENSSL_ROOT_HINTS_AND_PATHS}
|
||||
HINTS
|
||||
${_OPENSSL_LIBDIR}
|
||||
PATH_SUFFIXES
|
||||
lib
|
||||
)
|
||||
|
||||
find_library(SSL_EAY
|
||||
NAMES
|
||||
libssl
|
||||
ssleay32
|
||||
NAMES_PER_DIR
|
||||
${_OPENSSL_ROOT_HINTS_AND_PATHS}
|
||||
HINTS
|
||||
${_OPENSSL_LIBDIR}
|
||||
PATH_SUFFIXES
|
||||
lib
|
||||
)
|
||||
|
||||
mark_as_advanced(SSL_EAY LIB_EAY)
|
||||
set(OPENSSL_SSL_LIBRARY ${SSL_EAY} )
|
||||
set(OPENSSL_CRYPTO_LIBRARY ${LIB_EAY} )
|
||||
endif()
|
||||
else()
|
||||
|
||||
find_library(OPENSSL_SSL_LIBRARY
|
||||
NAMES
|
||||
ssl
|
||||
ssleay32
|
||||
ssleay32MD
|
||||
NAMES_PER_DIR
|
||||
${_OPENSSL_ROOT_HINTS_AND_PATHS}
|
||||
HINTS
|
||||
${_OPENSSL_LIBDIR}
|
||||
PATH_SUFFIXES
|
||||
lib
|
||||
)
|
||||
|
||||
find_library(OPENSSL_CRYPTO_LIBRARY
|
||||
NAMES
|
||||
crypto
|
||||
NAMES_PER_DIR
|
||||
${_OPENSSL_ROOT_HINTS_AND_PATHS}
|
||||
HINTS
|
||||
${_OPENSSL_LIBDIR}
|
||||
PATH_SUFFIXES
|
||||
lib
|
||||
)
|
||||
|
||||
mark_as_advanced(OPENSSL_CRYPTO_LIBRARY OPENSSL_SSL_LIBRARY)
|
||||
|
||||
endif()
|
||||
|
||||
# compat defines
|
||||
set(OPENSSL_SSL_LIBRARIES ${OPENSSL_SSL_LIBRARY})
|
||||
set(OPENSSL_CRYPTO_LIBRARIES ${OPENSSL_CRYPTO_LIBRARY})
|
||||
_OpenSSL_test_and_find_dependencies("${OPENSSL_SSL_LIBRARY}" "${OPENSSL_CRYPTO_LIBRARY}")
|
||||
if(_OpenSSL_has_dependencies)
|
||||
_OpenSSL_add_dependencies( OPENSSL_SSL_LIBRARIES "${OPENSSL_SSL_LIBRARY}" )
|
||||
_OpenSSL_add_dependencies( OPENSSL_CRYPTO_LIBRARIES "${OPENSSL_CRYPTO_LIBRARY}" )
|
||||
endif()
|
||||
|
||||
function(from_hex HEX DEC)
|
||||
string(TOUPPER "${HEX}" HEX)
|
||||
set(_res 0)
|
||||
string(LENGTH "${HEX}" _strlen)
|
||||
|
||||
while (_strlen GREATER 0)
|
||||
math(EXPR _res "${_res} * 16")
|
||||
string(SUBSTRING "${HEX}" 0 1 NIBBLE)
|
||||
string(SUBSTRING "${HEX}" 1 -1 HEX)
|
||||
if (NIBBLE STREQUAL "A")
|
||||
math(EXPR _res "${_res} + 10")
|
||||
elseif (NIBBLE STREQUAL "B")
|
||||
math(EXPR _res "${_res} + 11")
|
||||
elseif (NIBBLE STREQUAL "C")
|
||||
math(EXPR _res "${_res} + 12")
|
||||
elseif (NIBBLE STREQUAL "D")
|
||||
math(EXPR _res "${_res} + 13")
|
||||
elseif (NIBBLE STREQUAL "E")
|
||||
math(EXPR _res "${_res} + 14")
|
||||
elseif (NIBBLE STREQUAL "F")
|
||||
math(EXPR _res "${_res} + 15")
|
||||
else()
|
||||
math(EXPR _res "${_res} + ${NIBBLE}")
|
||||
endif()
|
||||
|
||||
string(LENGTH "${HEX}" _strlen)
|
||||
endwhile()
|
||||
|
||||
set(${DEC} ${_res} PARENT_SCOPE)
|
||||
endfunction()
|
||||
|
||||
if(OPENSSL_INCLUDE_DIR AND EXISTS "${OPENSSL_INCLUDE_DIR}/openssl/opensslv.h")
|
||||
file(STRINGS "${OPENSSL_INCLUDE_DIR}/openssl/opensslv.h" openssl_version_str
|
||||
REGEX "^#[\t ]*define[\t ]+OPENSSL_VERSION_NUMBER[\t ]+0x([0-9a-fA-F])+.*")
|
||||
|
||||
if(openssl_version_str)
|
||||
# The version number is encoded as 0xMNNFFPPS: major minor fix patch status
|
||||
# The status gives if this is a developer or prerelease and is ignored here.
|
||||
# Major, minor, and fix directly translate into the version numbers shown in
|
||||
# the string. The patch field translates to the single character suffix that
|
||||
# indicates the bug fix state, which 00 -> nothing, 01 -> a, 02 -> b and so
|
||||
# on.
|
||||
|
||||
string(REGEX REPLACE "^.*OPENSSL_VERSION_NUMBER[\t ]+0x([0-9a-fA-F])([0-9a-fA-F][0-9a-fA-F])([0-9a-fA-F][0-9a-fA-F])([0-9a-fA-F][0-9a-fA-F])([0-9a-fA-F]).*$"
|
||||
"\\1;\\2;\\3;\\4;\\5" OPENSSL_VERSION_LIST "${openssl_version_str}")
|
||||
list(GET OPENSSL_VERSION_LIST 0 OPENSSL_VERSION_MAJOR)
|
||||
list(GET OPENSSL_VERSION_LIST 1 OPENSSL_VERSION_MINOR)
|
||||
from_hex("${OPENSSL_VERSION_MINOR}" OPENSSL_VERSION_MINOR)
|
||||
list(GET OPENSSL_VERSION_LIST 2 OPENSSL_VERSION_FIX)
|
||||
from_hex("${OPENSSL_VERSION_FIX}" OPENSSL_VERSION_FIX)
|
||||
list(GET OPENSSL_VERSION_LIST 3 OPENSSL_VERSION_PATCH)
|
||||
|
||||
if (NOT OPENSSL_VERSION_PATCH STREQUAL "00")
|
||||
from_hex("${OPENSSL_VERSION_PATCH}" _tmp)
|
||||
# 96 is the ASCII code of 'a' minus 1
|
||||
math(EXPR OPENSSL_VERSION_PATCH_ASCII "${_tmp} + 96")
|
||||
unset(_tmp)
|
||||
# Once anyone knows how OpenSSL would call the patch versions beyond 'z'
|
||||
# this should be updated to handle that, too. This has not happened yet
|
||||
# so it is simply ignored here for now.
|
||||
string(ASCII "${OPENSSL_VERSION_PATCH_ASCII}" OPENSSL_VERSION_PATCH_STRING)
|
||||
endif ()
|
||||
|
||||
set(OPENSSL_VERSION "${OPENSSL_VERSION_MAJOR}.${OPENSSL_VERSION_MINOR}.${OPENSSL_VERSION_FIX}${OPENSSL_VERSION_PATCH_STRING}")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
set(OPENSSL_LIBRARIES ${OPENSSL_SSL_LIBRARIES} ${OPENSSL_CRYPTO_LIBRARIES} )
|
||||
list(REMOVE_DUPLICATES OPENSSL_LIBRARIES)
|
||||
|
||||
foreach(_comp IN LISTS OpenSSL_FIND_COMPONENTS)
|
||||
if(_comp STREQUAL "Crypto")
|
||||
if(EXISTS "${OPENSSL_INCLUDE_DIR}" AND
|
||||
(EXISTS "${OPENSSL_CRYPTO_LIBRARY}" OR
|
||||
EXISTS "${LIB_EAY_LIBRARY_DEBUG}" OR
|
||||
EXISTS "${LIB_EAY_LIBRARY_RELEASE}")
|
||||
)
|
||||
set(OpenSSL_${_comp}_FOUND TRUE)
|
||||
else()
|
||||
set(OpenSSL_${_comp}_FOUND FALSE)
|
||||
endif()
|
||||
elseif(_comp STREQUAL "SSL")
|
||||
if(EXISTS "${OPENSSL_INCLUDE_DIR}" AND
|
||||
(EXISTS "${OPENSSL_SSL_LIBRARY}" OR
|
||||
EXISTS "${SSL_EAY_LIBRARY_DEBUG}" OR
|
||||
EXISTS "${SSL_EAY_LIBRARY_RELEASE}")
|
||||
)
|
||||
set(OpenSSL_${_comp}_FOUND TRUE)
|
||||
else()
|
||||
set(OpenSSL_${_comp}_FOUND FALSE)
|
||||
endif()
|
||||
else()
|
||||
message(WARNING "${_comp} is not a valid OpenSSL component")
|
||||
set(OpenSSL_${_comp}_FOUND FALSE)
|
||||
endif()
|
||||
endforeach()
|
||||
unset(_comp)
|
||||
|
||||
include(${CMAKE_CURRENT_LIST_DIR}/FindPackageHandleStandardArgs.cmake)
|
||||
find_package_handle_standard_args(OpenSSL
|
||||
REQUIRED_VARS
|
||||
OPENSSL_CRYPTO_LIBRARY
|
||||
OPENSSL_INCLUDE_DIR
|
||||
VERSION_VAR
|
||||
OPENSSL_VERSION
|
||||
HANDLE_COMPONENTS
|
||||
FAIL_MESSAGE
|
||||
"Could NOT find OpenSSL, try to set the path to OpenSSL root folder in the system variable OPENSSL_ROOT_DIR"
|
||||
)
|
||||
|
||||
mark_as_advanced(OPENSSL_INCLUDE_DIR OPENSSL_LIBRARIES)
|
||||
|
||||
if(OPENSSL_FOUND)
|
||||
if(NOT TARGET OpenSSL::Crypto AND
|
||||
(EXISTS "${OPENSSL_CRYPTO_LIBRARY}" OR
|
||||
EXISTS "${LIB_EAY_LIBRARY_DEBUG}" OR
|
||||
EXISTS "${LIB_EAY_LIBRARY_RELEASE}")
|
||||
)
|
||||
add_library(OpenSSL::Crypto UNKNOWN IMPORTED)
|
||||
set_target_properties(OpenSSL::Crypto PROPERTIES
|
||||
INTERFACE_INCLUDE_DIRECTORIES "${OPENSSL_INCLUDE_DIR}")
|
||||
if(EXISTS "${OPENSSL_CRYPTO_LIBRARY}")
|
||||
set_target_properties(OpenSSL::Crypto PROPERTIES
|
||||
IMPORTED_LINK_INTERFACE_LANGUAGES "C"
|
||||
IMPORTED_LOCATION "${OPENSSL_CRYPTO_LIBRARY}")
|
||||
endif()
|
||||
if(EXISTS "${LIB_EAY_LIBRARY_RELEASE}")
|
||||
set_property(TARGET OpenSSL::Crypto APPEND PROPERTY
|
||||
IMPORTED_CONFIGURATIONS RELEASE)
|
||||
set_target_properties(OpenSSL::Crypto PROPERTIES
|
||||
IMPORTED_LINK_INTERFACE_LANGUAGES_RELEASE "C"
|
||||
IMPORTED_LOCATION_RELEASE "${LIB_EAY_LIBRARY_RELEASE}")
|
||||
endif()
|
||||
if(EXISTS "${LIB_EAY_LIBRARY_DEBUG}")
|
||||
set_property(TARGET OpenSSL::Crypto APPEND PROPERTY
|
||||
IMPORTED_CONFIGURATIONS DEBUG)
|
||||
set_target_properties(OpenSSL::Crypto PROPERTIES
|
||||
IMPORTED_LINK_INTERFACE_LANGUAGES_DEBUG "C"
|
||||
IMPORTED_LOCATION_DEBUG "${LIB_EAY_LIBRARY_DEBUG}")
|
||||
endif()
|
||||
_OpenSSL_target_add_dependencies(OpenSSL::Crypto)
|
||||
endif()
|
||||
|
||||
if(NOT TARGET OpenSSL::SSL AND
|
||||
(EXISTS "${OPENSSL_SSL_LIBRARY}" OR
|
||||
EXISTS "${SSL_EAY_LIBRARY_DEBUG}" OR
|
||||
EXISTS "${SSL_EAY_LIBRARY_RELEASE}")
|
||||
)
|
||||
add_library(OpenSSL::SSL UNKNOWN IMPORTED)
|
||||
set_target_properties(OpenSSL::SSL PROPERTIES
|
||||
INTERFACE_INCLUDE_DIRECTORIES "${OPENSSL_INCLUDE_DIR}")
|
||||
if(EXISTS "${OPENSSL_SSL_LIBRARY}")
|
||||
set_target_properties(OpenSSL::SSL PROPERTIES
|
||||
IMPORTED_LINK_INTERFACE_LANGUAGES "C"
|
||||
IMPORTED_LOCATION "${OPENSSL_SSL_LIBRARY}")
|
||||
endif()
|
||||
if(EXISTS "${SSL_EAY_LIBRARY_RELEASE}")
|
||||
set_property(TARGET OpenSSL::SSL APPEND PROPERTY
|
||||
IMPORTED_CONFIGURATIONS RELEASE)
|
||||
set_target_properties(OpenSSL::SSL PROPERTIES
|
||||
IMPORTED_LINK_INTERFACE_LANGUAGES_RELEASE "C"
|
||||
IMPORTED_LOCATION_RELEASE "${SSL_EAY_LIBRARY_RELEASE}")
|
||||
endif()
|
||||
if(EXISTS "${SSL_EAY_LIBRARY_DEBUG}")
|
||||
set_property(TARGET OpenSSL::SSL APPEND PROPERTY
|
||||
IMPORTED_CONFIGURATIONS DEBUG)
|
||||
set_target_properties(OpenSSL::SSL PROPERTIES
|
||||
IMPORTED_LINK_INTERFACE_LANGUAGES_DEBUG "C"
|
||||
IMPORTED_LOCATION_DEBUG "${SSL_EAY_LIBRARY_DEBUG}")
|
||||
endif()
|
||||
if(TARGET OpenSSL::Crypto)
|
||||
set_target_properties(OpenSSL::SSL PROPERTIES
|
||||
INTERFACE_LINK_LIBRARIES OpenSSL::Crypto)
|
||||
endif()
|
||||
_OpenSSL_target_add_dependencies(OpenSSL::SSL)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# Restore the original find library ordering
|
||||
if(OPENSSL_USE_STATIC_LIBS)
|
||||
set(CMAKE_FIND_LIBRARY_SUFFIXES ${_openssl_ORIG_CMAKE_FIND_LIBRARY_SUFFIXES})
|
||||
endif()
|
||||
268
CMakeModules/FindPackageHandleStandardArgs.cmake
vendored
268
CMakeModules/FindPackageHandleStandardArgs.cmake
vendored
@@ -1,268 +0,0 @@
|
||||
# Distributed under the OSI-approved BSD 3-Clause License. See accompanying
|
||||
# file Copyright.txt or https://cmake.org/licensing for details.
|
||||
|
||||
include(${CMAKE_CURRENT_LIST_DIR}/FindPackageMessage.cmake)
|
||||
|
||||
# internal helper macro
|
||||
macro(_FPHSA_FAILURE_MESSAGE _msg)
|
||||
set (__msg "${_msg}")
|
||||
if (FPHSA_REASON_FAILURE_MESSAGE)
|
||||
string(APPEND __msg "\n Reason given by package: ${FPHSA_REASON_FAILURE_MESSAGE}\n")
|
||||
endif()
|
||||
if (${_NAME}_FIND_REQUIRED)
|
||||
message(FATAL_ERROR "${__msg}")
|
||||
else ()
|
||||
if (NOT ${_NAME}_FIND_QUIETLY)
|
||||
message(STATUS "${__msg}")
|
||||
endif ()
|
||||
endif ()
|
||||
endmacro()
|
||||
|
||||
|
||||
# internal helper macro to generate the failure message when used in CONFIG_MODE:
|
||||
macro(_FPHSA_HANDLE_FAILURE_CONFIG_MODE)
|
||||
# <PackageName>_CONFIG is set, but FOUND is false, this means that some other of the REQUIRED_VARS was not found:
|
||||
if(${_NAME}_CONFIG)
|
||||
_FPHSA_FAILURE_MESSAGE("${FPHSA_FAIL_MESSAGE}: missing:${MISSING_VARS} (found ${${_NAME}_CONFIG} ${VERSION_MSG})")
|
||||
else()
|
||||
# If _CONSIDERED_CONFIGS is set, the config-file has been found, but no suitable version.
|
||||
# List them all in the error message:
|
||||
if(${_NAME}_CONSIDERED_CONFIGS)
|
||||
set(configsText "")
|
||||
list(LENGTH ${_NAME}_CONSIDERED_CONFIGS configsCount)
|
||||
math(EXPR configsCount "${configsCount} - 1")
|
||||
foreach(currentConfigIndex RANGE ${configsCount})
|
||||
list(GET ${_NAME}_CONSIDERED_CONFIGS ${currentConfigIndex} filename)
|
||||
list(GET ${_NAME}_CONSIDERED_VERSIONS ${currentConfigIndex} version)
|
||||
string(APPEND configsText "\n ${filename} (version ${version})")
|
||||
endforeach()
|
||||
if (${_NAME}_NOT_FOUND_MESSAGE)
|
||||
if (FPHSA_REASON_FAILURE_MESSAGE)
|
||||
string(PREPEND FPHSA_REASON_FAILURE_MESSAGE "${${_NAME}_NOT_FOUND_MESSAGE}\n ")
|
||||
else()
|
||||
set(FPHSA_REASON_FAILURE_MESSAGE "${${_NAME}_NOT_FOUND_MESSAGE}")
|
||||
endif()
|
||||
else()
|
||||
string(APPEND configsText "\n")
|
||||
endif()
|
||||
_FPHSA_FAILURE_MESSAGE("${FPHSA_FAIL_MESSAGE} ${VERSION_MSG}, checked the following files:${configsText}")
|
||||
|
||||
else()
|
||||
# Simple case: No Config-file was found at all:
|
||||
_FPHSA_FAILURE_MESSAGE("${FPHSA_FAIL_MESSAGE}: found neither ${_NAME}Config.cmake nor ${_NAME_LOWER}-config.cmake ${VERSION_MSG}")
|
||||
endif()
|
||||
endif()
|
||||
endmacro()
|
||||
|
||||
|
||||
function(FIND_PACKAGE_HANDLE_STANDARD_ARGS _NAME _FIRST_ARG)
|
||||
|
||||
# Set up the arguments for `cmake_parse_arguments`.
|
||||
set(options CONFIG_MODE HANDLE_COMPONENTS)
|
||||
set(oneValueArgs FAIL_MESSAGE REASON_FAILURE_MESSAGE VERSION_VAR FOUND_VAR)
|
||||
set(multiValueArgs REQUIRED_VARS)
|
||||
|
||||
# Check whether we are in 'simple' or 'extended' mode:
|
||||
set(_KEYWORDS_FOR_EXTENDED_MODE ${options} ${oneValueArgs} ${multiValueArgs} )
|
||||
list(FIND _KEYWORDS_FOR_EXTENDED_MODE "${_FIRST_ARG}" INDEX)
|
||||
|
||||
if(${INDEX} EQUAL -1)
|
||||
set(FPHSA_FAIL_MESSAGE ${_FIRST_ARG})
|
||||
set(FPHSA_REQUIRED_VARS ${ARGN})
|
||||
set(FPHSA_VERSION_VAR)
|
||||
else()
|
||||
cmake_parse_arguments(FPHSA "${options}" "${oneValueArgs}" "${multiValueArgs}" ${_FIRST_ARG} ${ARGN})
|
||||
|
||||
if(FPHSA_UNPARSED_ARGUMENTS)
|
||||
message(FATAL_ERROR "Unknown keywords given to FIND_PACKAGE_HANDLE_STANDARD_ARGS(): \"${FPHSA_UNPARSED_ARGUMENTS}\"")
|
||||
endif()
|
||||
|
||||
if(NOT FPHSA_FAIL_MESSAGE)
|
||||
set(FPHSA_FAIL_MESSAGE "DEFAULT_MSG")
|
||||
endif()
|
||||
|
||||
# In config-mode, we rely on the variable <PackageName>_CONFIG, which is set by find_package()
|
||||
# when it successfully found the config-file, including version checking:
|
||||
if(FPHSA_CONFIG_MODE)
|
||||
list(INSERT FPHSA_REQUIRED_VARS 0 ${_NAME}_CONFIG)
|
||||
list(REMOVE_DUPLICATES FPHSA_REQUIRED_VARS)
|
||||
set(FPHSA_VERSION_VAR ${_NAME}_VERSION)
|
||||
endif()
|
||||
|
||||
if(NOT FPHSA_REQUIRED_VARS)
|
||||
message(FATAL_ERROR "No REQUIRED_VARS specified for FIND_PACKAGE_HANDLE_STANDARD_ARGS()")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# now that we collected all arguments, process them
|
||||
|
||||
if("x${FPHSA_FAIL_MESSAGE}" STREQUAL "xDEFAULT_MSG")
|
||||
set(FPHSA_FAIL_MESSAGE "Could NOT find ${_NAME}")
|
||||
endif()
|
||||
|
||||
list(GET FPHSA_REQUIRED_VARS 0 _FIRST_REQUIRED_VAR)
|
||||
|
||||
string(TOUPPER ${_NAME} _NAME_UPPER)
|
||||
string(TOLOWER ${_NAME} _NAME_LOWER)
|
||||
|
||||
if(FPHSA_FOUND_VAR)
|
||||
if(FPHSA_FOUND_VAR MATCHES "^${_NAME}_FOUND$" OR FPHSA_FOUND_VAR MATCHES "^${_NAME_UPPER}_FOUND$")
|
||||
set(_FOUND_VAR ${FPHSA_FOUND_VAR})
|
||||
else()
|
||||
message(FATAL_ERROR "The argument for FOUND_VAR is \"${FPHSA_FOUND_VAR}\", but only \"${_NAME}_FOUND\" and \"${_NAME_UPPER}_FOUND\" are valid names.")
|
||||
endif()
|
||||
else()
|
||||
set(_FOUND_VAR ${_NAME_UPPER}_FOUND)
|
||||
endif()
|
||||
|
||||
# collect all variables which were not found, so they can be printed, so the
|
||||
# user knows better what went wrong (#6375)
|
||||
set(MISSING_VARS "")
|
||||
set(DETAILS "")
|
||||
# check if all passed variables are valid
|
||||
set(FPHSA_FOUND_${_NAME} TRUE)
|
||||
foreach(_CURRENT_VAR ${FPHSA_REQUIRED_VARS})
|
||||
if(NOT ${_CURRENT_VAR})
|
||||
set(FPHSA_FOUND_${_NAME} FALSE)
|
||||
string(APPEND MISSING_VARS " ${_CURRENT_VAR}")
|
||||
else()
|
||||
string(APPEND DETAILS "[${${_CURRENT_VAR}}]")
|
||||
endif()
|
||||
endforeach()
|
||||
if(FPHSA_FOUND_${_NAME})
|
||||
set(${_NAME}_FOUND TRUE)
|
||||
set(${_NAME_UPPER}_FOUND TRUE)
|
||||
else()
|
||||
set(${_NAME}_FOUND FALSE)
|
||||
set(${_NAME_UPPER}_FOUND FALSE)
|
||||
endif()
|
||||
|
||||
# component handling
|
||||
unset(FOUND_COMPONENTS_MSG)
|
||||
unset(MISSING_COMPONENTS_MSG)
|
||||
|
||||
if(FPHSA_HANDLE_COMPONENTS)
|
||||
foreach(comp ${${_NAME}_FIND_COMPONENTS})
|
||||
if(${_NAME}_${comp}_FOUND)
|
||||
|
||||
if(NOT DEFINED FOUND_COMPONENTS_MSG)
|
||||
set(FOUND_COMPONENTS_MSG "found components:")
|
||||
endif()
|
||||
string(APPEND FOUND_COMPONENTS_MSG " ${comp}")
|
||||
|
||||
else()
|
||||
|
||||
if(NOT DEFINED MISSING_COMPONENTS_MSG)
|
||||
set(MISSING_COMPONENTS_MSG "missing components:")
|
||||
endif()
|
||||
string(APPEND MISSING_COMPONENTS_MSG " ${comp}")
|
||||
|
||||
if(${_NAME}_FIND_REQUIRED_${comp})
|
||||
set(${_NAME}_FOUND FALSE)
|
||||
string(APPEND MISSING_VARS " ${comp}")
|
||||
endif()
|
||||
|
||||
endif()
|
||||
endforeach()
|
||||
set(COMPONENT_MSG "${FOUND_COMPONENTS_MSG} ${MISSING_COMPONENTS_MSG}")
|
||||
string(APPEND DETAILS "[c${COMPONENT_MSG}]")
|
||||
endif()
|
||||
|
||||
# version handling:
|
||||
set(VERSION_MSG "")
|
||||
set(VERSION_OK TRUE)
|
||||
|
||||
# check with DEFINED here as the requested or found version may be "0"
|
||||
if (DEFINED ${_NAME}_FIND_VERSION)
|
||||
if(DEFINED ${FPHSA_VERSION_VAR})
|
||||
set(_FOUND_VERSION ${${FPHSA_VERSION_VAR}})
|
||||
|
||||
if(${_NAME}_FIND_VERSION_EXACT) # exact version required
|
||||
# count the dots in the version string
|
||||
string(REGEX REPLACE "[^.]" "" _VERSION_DOTS "${_FOUND_VERSION}")
|
||||
# add one dot because there is one dot more than there are components
|
||||
string(LENGTH "${_VERSION_DOTS}." _VERSION_DOTS)
|
||||
if (_VERSION_DOTS GREATER ${_NAME}_FIND_VERSION_COUNT)
|
||||
# Because of the C++ implementation of find_package() ${_NAME}_FIND_VERSION_COUNT
|
||||
# is at most 4 here. Therefore a simple lookup table is used.
|
||||
if (${_NAME}_FIND_VERSION_COUNT EQUAL 1)
|
||||
set(_VERSION_REGEX "[^.]*")
|
||||
elseif (${_NAME}_FIND_VERSION_COUNT EQUAL 2)
|
||||
set(_VERSION_REGEX "[^.]*\\.[^.]*")
|
||||
elseif (${_NAME}_FIND_VERSION_COUNT EQUAL 3)
|
||||
set(_VERSION_REGEX "[^.]*\\.[^.]*\\.[^.]*")
|
||||
else ()
|
||||
set(_VERSION_REGEX "[^.]*\\.[^.]*\\.[^.]*\\.[^.]*")
|
||||
endif ()
|
||||
string(REGEX REPLACE "^(${_VERSION_REGEX})\\..*" "\\1" _VERSION_HEAD "${_FOUND_VERSION}")
|
||||
unset(_VERSION_REGEX)
|
||||
if (NOT ${_NAME}_FIND_VERSION VERSION_EQUAL _VERSION_HEAD)
|
||||
set(VERSION_MSG "Found unsuitable version \"${_FOUND_VERSION}\", but required is exact version \"${${_NAME}_FIND_VERSION}\"")
|
||||
set(VERSION_OK FALSE)
|
||||
else ()
|
||||
set(VERSION_MSG "(found suitable exact version \"${_FOUND_VERSION}\")")
|
||||
endif ()
|
||||
unset(_VERSION_HEAD)
|
||||
else ()
|
||||
if (NOT ${_NAME}_FIND_VERSION VERSION_EQUAL _FOUND_VERSION)
|
||||
set(VERSION_MSG "Found unsuitable version \"${_FOUND_VERSION}\", but required is exact version \"${${_NAME}_FIND_VERSION}\"")
|
||||
set(VERSION_OK FALSE)
|
||||
else ()
|
||||
set(VERSION_MSG "(found suitable exact version \"${_FOUND_VERSION}\")")
|
||||
endif ()
|
||||
endif ()
|
||||
unset(_VERSION_DOTS)
|
||||
|
||||
else() # minimum version specified:
|
||||
if (${_NAME}_FIND_VERSION VERSION_GREATER _FOUND_VERSION)
|
||||
set(VERSION_MSG "Found unsuitable version \"${_FOUND_VERSION}\", but required is at least \"${${_NAME}_FIND_VERSION}\"")
|
||||
set(VERSION_OK FALSE)
|
||||
else ()
|
||||
set(VERSION_MSG "(found suitable version \"${_FOUND_VERSION}\", minimum required is \"${${_NAME}_FIND_VERSION}\")")
|
||||
endif ()
|
||||
endif()
|
||||
|
||||
else()
|
||||
|
||||
# if the package was not found, but a version was given, add that to the output:
|
||||
if(${_NAME}_FIND_VERSION_EXACT)
|
||||
set(VERSION_MSG "(Required is exact version \"${${_NAME}_FIND_VERSION}\")")
|
||||
else()
|
||||
set(VERSION_MSG "(Required is at least version \"${${_NAME}_FIND_VERSION}\")")
|
||||
endif()
|
||||
|
||||
endif()
|
||||
else ()
|
||||
# Check with DEFINED as the found version may be 0.
|
||||
if(DEFINED ${FPHSA_VERSION_VAR})
|
||||
set(VERSION_MSG "(found version \"${${FPHSA_VERSION_VAR}}\")")
|
||||
endif()
|
||||
endif ()
|
||||
|
||||
if(VERSION_OK)
|
||||
string(APPEND DETAILS "[v${${FPHSA_VERSION_VAR}}(${${_NAME}_FIND_VERSION})]")
|
||||
else()
|
||||
set(${_NAME}_FOUND FALSE)
|
||||
endif()
|
||||
|
||||
|
||||
# print the result:
|
||||
if (${_NAME}_FOUND)
|
||||
FIND_PACKAGE_MESSAGE(${_NAME} "Found ${_NAME}: ${${_FIRST_REQUIRED_VAR}} ${VERSION_MSG} ${COMPONENT_MSG}" "${DETAILS}")
|
||||
else ()
|
||||
|
||||
if(FPHSA_CONFIG_MODE)
|
||||
_FPHSA_HANDLE_FAILURE_CONFIG_MODE()
|
||||
else()
|
||||
if(NOT VERSION_OK)
|
||||
_FPHSA_FAILURE_MESSAGE("${FPHSA_FAIL_MESSAGE}: ${VERSION_MSG} (found ${${_FIRST_REQUIRED_VAR}})")
|
||||
else()
|
||||
_FPHSA_FAILURE_MESSAGE("${FPHSA_FAIL_MESSAGE} (missing:${MISSING_VARS}) ${VERSION_MSG}")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
endif ()
|
||||
|
||||
set(${_NAME}_FOUND ${${_NAME}_FOUND} PARENT_SCOPE)
|
||||
set(${_NAME_UPPER}_FOUND ${${_NAME}_FOUND} PARENT_SCOPE)
|
||||
endfunction()
|
||||
48
CMakeModules/FindPackageMessage.cmake
vendored
48
CMakeModules/FindPackageMessage.cmake
vendored
@@ -1,48 +0,0 @@
|
||||
# Distributed under the OSI-approved BSD 3-Clause License. See accompanying
|
||||
# file Copyright.txt or https://cmake.org/licensing for details.
|
||||
|
||||
#[=======================================================================[.rst:
|
||||
FindPackageMessage
|
||||
------------------
|
||||
|
||||
.. code-block:: cmake
|
||||
|
||||
find_package_message(<name> "message for user" "find result details")
|
||||
|
||||
This function is intended to be used in FindXXX.cmake modules files.
|
||||
It will print a message once for each unique find result. This is
|
||||
useful for telling the user where a package was found. The first
|
||||
argument specifies the name (XXX) of the package. The second argument
|
||||
specifies the message to display. The third argument lists details
|
||||
about the find result so that if they change the message will be
|
||||
displayed again. The macro also obeys the QUIET argument to the
|
||||
find_package command.
|
||||
|
||||
Example:
|
||||
|
||||
.. code-block:: cmake
|
||||
|
||||
if(X11_FOUND)
|
||||
find_package_message(X11 "Found X11: ${X11_X11_LIB}"
|
||||
"[${X11_X11_LIB}][${X11_INCLUDE_DIR}]")
|
||||
else()
|
||||
...
|
||||
endif()
|
||||
#]=======================================================================]
|
||||
|
||||
function(find_package_message pkg msg details)
|
||||
# Avoid printing a message repeatedly for the same find result.
|
||||
if(NOT ${pkg}_FIND_QUIETLY)
|
||||
string(REPLACE "\n" "" details "${details}")
|
||||
set(DETAILS_VAR FIND_PACKAGE_MESSAGE_DETAILS_${pkg})
|
||||
if(NOT "${details}" STREQUAL "${${DETAILS_VAR}}")
|
||||
# The message has not yet been printed.
|
||||
message(STATUS "${msg}")
|
||||
|
||||
# Save the find details in the cache to avoid printing the same
|
||||
# message again.
|
||||
set("${DETAILS_VAR}" "${details}"
|
||||
CACHE INTERNAL "Details about finding ${pkg}")
|
||||
endif()
|
||||
endif()
|
||||
endfunction()
|
||||
@@ -1,10 +0,0 @@
|
||||
rm ./sist2
|
||||
cp ../sist2 .
|
||||
strip sist2
|
||||
|
||||
version=$(./sist2 --version)
|
||||
|
||||
echo "Version ${version}"
|
||||
docker build . -t simon987/sist2:${version} -t simon987/sist2:latest
|
||||
docker push simon987/sist2:${version}
|
||||
docker push simon987/sist2:latest
|
||||
@@ -1,9 +1,15 @@
|
||||
FROM ubuntu:19.10
|
||||
FROM simon987/sist2-build as build
|
||||
MAINTAINER simon987 <me@simon987.net>
|
||||
|
||||
RUN apt update
|
||||
RUN apt install -y libglib2.0-0 libcurl4 libmagic1 libharfbuzz-bin libopenjp2-7 libarchive13 liblzma5 libzstd1 liblz4-1 \
|
||||
curl libtiff5 libpng16-16
|
||||
WORKDIR /build/
|
||||
ADD . /build/
|
||||
RUN cmake -DSIST_DEBUG=off -DBUILD_TESTS=off -DCMAKE_TOOLCHAIN_FILE=/vcpkg/scripts/buildsystems/vcpkg.cmake .
|
||||
RUN make -j$(nproc)
|
||||
RUN strip sist2
|
||||
|
||||
FROM ubuntu:20.10
|
||||
|
||||
RUN apt update && apt install -y curl
|
||||
|
||||
RUN mkdir -p /usr/share/tessdata && \
|
||||
cd /usr/share/tessdata/ && \
|
||||
@@ -12,8 +18,11 @@ RUN mkdir -p /usr/share/tessdata && \
|
||||
curl -o /usr/share/tessdata/eng.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/eng.traineddata &&\
|
||||
curl -o /usr/share/tessdata/fra.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/fra.traineddata &&\
|
||||
curl -o /usr/share/tessdata/rus.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/rus.traineddata &&\
|
||||
curl -o /usr/share/tessdata/spa.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/spa.traineddata && ls -lh
|
||||
curl -o /usr/share/tessdata/spa.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/spa.traineddata
|
||||
|
||||
ADD sist2 /root/sist2
|
||||
COPY --from=build /build/sist2 /root/sist2
|
||||
|
||||
ENV LANG C.UTF-8
|
||||
ENV LC_ALL C.UTF-8
|
||||
|
||||
ENTRYPOINT ["/root/sist2"]
|
||||
28
Dockerfile.arm64
Normal file
28
Dockerfile.arm64
Normal file
@@ -0,0 +1,28 @@
|
||||
FROM simon987/sist2-build-arm64 as build
|
||||
MAINTAINER simon987 <me@simon987.net>
|
||||
|
||||
WORKDIR /build/
|
||||
ADD . /build/
|
||||
RUN cmake -DSIST_DEBUG=off -DBUILD_TESTS=off -DCMAKE_TOOLCHAIN_FILE=/vcpkg/scripts/buildsystems/vcpkg.cmake .
|
||||
RUN make -j$(nproc)
|
||||
RUN strip sist2
|
||||
|
||||
FROM ubuntu:20.10
|
||||
|
||||
RUN apt update && apt install -y curl
|
||||
|
||||
RUN mkdir -p /usr/share/tessdata && \
|
||||
cd /usr/share/tessdata/ && \
|
||||
curl -o /usr/share/tessdata/hin.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/hin.traineddata &&\
|
||||
curl -o /usr/share/tessdata/jpn.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/jpn.traineddata &&\
|
||||
curl -o /usr/share/tessdata/eng.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/eng.traineddata &&\
|
||||
curl -o /usr/share/tessdata/fra.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/fra.traineddata &&\
|
||||
curl -o /usr/share/tessdata/rus.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/rus.traineddata &&\
|
||||
curl -o /usr/share/tessdata/spa.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/spa.traineddata
|
||||
|
||||
COPY --from=build /build/sist2 /root/sist2
|
||||
|
||||
ENV LANG C.UTF-8
|
||||
ENV LC_ALL C.UTF-8
|
||||
|
||||
ENTRYPOINT ["/root/sist2"]
|
||||
177
README.md
177
README.md
@@ -1,6 +1,8 @@
|
||||

|
||||
[](https://www.codefactor.io/repository/github/simon987/sist2)
|
||||
[/statusIcon)](https://files.simon987.net/artifacts/Sist2/Build/)
|
||||
[](https://files.simon987.net/.gate/sist2/simon987_sist2/)
|
||||
|
||||
**Demo**: [sist2.simon987.net](https://sist2.simon987.net/?i=Demo%20files)
|
||||
|
||||
# sist2
|
||||
|
||||
@@ -8,148 +10,143 @@ sist2 (Simple incremental search tool)
|
||||
|
||||
*Warning: sist2 is in early development*
|
||||
|
||||

|
||||
|
||||
## Features
|
||||
|
||||
* Fast, low memory usage, multi-threaded
|
||||
* Mobile-friendly Web interface
|
||||
* Portable (all its features are packaged in a single executable)
|
||||
* Extracts text from common file types \*
|
||||
* Extracts text and metadata from common file types \*
|
||||
* Generates thumbnails \*
|
||||
* Incremental scanning
|
||||
* Automatic tagging from file attributes via [user scripts](scripting/README.md)
|
||||
* Manual tagging from the UI and automatic tagging based on file attributes via [user scripts](docs/scripting.md)
|
||||
* Recursive scan inside archive files \*\*
|
||||
* OCR support with tesseract \*\*\*
|
||||
|
||||
* Stats page & disk utilisation visualization
|
||||
|
||||
\* See [format support](#format-support)
|
||||
\*\* See [Archive files](#archive-files)
|
||||
\*\*\* See [OCR](#ocr)
|
||||
\*\*\* See [OCR](#ocr)
|
||||
|
||||

|
||||
|
||||
## Getting Started
|
||||
|
||||
1. Have an [Elasticsearch](https://www.elastic.co/downloads/elasticsearch) instance running
|
||||
1.
|
||||
1. Have an Elasticsearch (>= 6.X.X) instance running
|
||||
1. Download [from official website](https://www.elastic.co/downloads/elasticsearch)
|
||||
1. *(or)* Run using docker:
|
||||
```bash
|
||||
docker run -d --name es1 --net sist2_net -p 9200:9200 \
|
||||
-e "discovery.type=single-node" elasticsearch:7.5.2
|
||||
```
|
||||
1. *(or)* Run using docker-compose:
|
||||
```yaml
|
||||
elasticsearch:
|
||||
image: docker.elastic.co/elasticsearch/elasticsearch:7.5.2
|
||||
environment:
|
||||
- discovery.type=single-node
|
||||
- "ES_JAVA_OPTS=-Xms1G -Xmx2G"
|
||||
```
|
||||
1. Download sist2 executable
|
||||
1. Download the [latest sist2 release](https://github.com/simon987/sist2/releases) *
|
||||
1. *(or)* Download an [development snapshot](https://files.simon987.net/artifacts/Sist2/Build/) *(Not recommended!)*
|
||||
1. *(or)* `docker pull simon987/sist2:latest`
|
||||
|
||||
1. *(or)* Download a [development snapshot](https://files.simon987.net/.gate/sist2/simon987_sist2/) *(Not recommended!)*
|
||||
1. *(or)* `docker pull simon987/sist2:2.10.1-x64-linux`
|
||||
|
||||
\* *Windows users*: **sist2** runs under [WSL](https://en.wikipedia.org/wiki/Windows_Subsystem_for_Linux)
|
||||
\* *Mac users*: See [#1](https://github.com/simon987/sist2/issues/1)
|
||||
1. See [Usage guide](docs/USAGE.md)
|
||||
|
||||
\* *Windows users*: **sist2** runs under [WSL](https://en.wikipedia.org/wiki/Windows_Subsystem_for_Linux)
|
||||
|
||||
## Example usage
|
||||
|
||||
See help page `sist2 --help` for more details.
|
||||
|
||||
**Scan a directory**
|
||||
```bash
|
||||
sist2 scan ~/Documents -o ./orig_idx/
|
||||
sist2 scan --threads 4 --content-size 16384 /mnt/Pictures
|
||||
sist2 scan --incremental ./orig_idx/ -o ./updated_idx/ ~/Documents
|
||||
```
|
||||
|
||||
**Push index to Elasticsearch or file**
|
||||
```bash
|
||||
sist2 index --force-reset ./my_idx
|
||||
sist2 index --print ./my_idx > raw_documents.ndjson
|
||||
```
|
||||
|
||||
**Start web interface**
|
||||
```bash
|
||||
sist2 web --bind 0.0.0.0 --port 4321 ./my_idx1 ./my_idx2 ./my_idx3
|
||||
```
|
||||
|
||||
### Use sist2 with docker
|
||||
|
||||
**scan**
|
||||
```bash
|
||||
docker run -it \
|
||||
-v /path/to/files/:/files \
|
||||
-v $PWD/out/:/out \
|
||||
simon987/sist2 scan -t 4 /files -o /out/my_idx1
|
||||
```
|
||||
**index**
|
||||
```bash
|
||||
docker run -it --network host\
|
||||
-v $PWD/out/:/out \
|
||||
simon987/sist2 index /out/my_idx1
|
||||
```
|
||||
|
||||
**web**
|
||||
```bash
|
||||
docker run --rm --network host -d --name sist2\
|
||||
-v $PWD/out/my_idx:/idx \
|
||||
-v $PWD/my/files:/files
|
||||
simon987/sist2 web --bind 0.0.0.0 /idx
|
||||
docker stop sist2
|
||||
```
|
||||
See [Usage guide](docs/USAGE.md) for more details
|
||||
|
||||
1. Scan a directory: `sist2 scan ~/Documents -o ./docs_idx`
|
||||
1. Push index to Elasticsearch: `sist2 index ./docs_idx`
|
||||
1. Start web interface: `sist2 web ./docs_idx`
|
||||
|
||||
## Format support
|
||||
|
||||
File type | Library | Content | Thumbnail | Metadata
|
||||
File type | Library | Content | Thumbnail | Metadata
|
||||
:---|:---|:---|:---|:---
|
||||
pdf,xps,cbz,fb2,epub | MuPDF | text+ocr | yes, `png` | title |
|
||||
`audio/*` | ffmpeg | - | yes, `jpeg` | ID3 tags |
|
||||
`video/*` | ffmpeg | - | yes, `jpeg` | title, comment, artist |
|
||||
`image/*` | ffmpeg | - | yes, `jpeg` | `EXIF:Artist`, `EXIF:ImageDescription` |
|
||||
pdf,xps,fb2,epub | MuPDF | text+ocr | yes | author, title |
|
||||
cbz,cbr | *(none)* | - | yes | - |
|
||||
`audio/*` | ffmpeg | - | yes | ID3 tags |
|
||||
`video/*` | ffmpeg | - | yes | title, comment, artist |
|
||||
`image/*` | ffmpeg | - | yes | [Common EXIF tags](https://github.com/simon987/sist2/blob/efdde2734eca9b14a54f84568863b7ffd59bdba3/src/parsing/media.c#L190), GPS tags |
|
||||
raw, rw2, dng, cr2, crw, dcr, k25, kdc, mrw, pef, xf3, arw, sr2, srf, erf | LibRaw | - | yes | Common EXIF tags, GPS tags |
|
||||
ttf,ttc,cff,woff,fnt,otf | Freetype2 | - | yes, `bmp` | Name & style |
|
||||
`text/plain` | *(none)* | yes | no | - |
|
||||
html, xml | *(none)* | yes | no | - |
|
||||
tar, zip, rar, 7z, ar ... | Libarchive | yes\* | - | no |
|
||||
docx, xlsx, pptx | libOPC | yes | no | no |
|
||||
docx, xlsx, pptx | *(none)* | yes | if embedded | creator, modified_by, title |
|
||||
doc (MS Word 97-2003) | antiword | yes | yes | author, title |
|
||||
mobi, azw, azw3 | libmobi | yes | no | author, title |
|
||||
|
||||
\* *See [Archive files](#archive-files)*
|
||||
|
||||
|
||||
### Archive files
|
||||
**sist2** will scan files stored into archive files (zip, tar, 7z...) as if
|
||||
they were directly in the file system. Recursive (archives inside archives)
|
||||
|
||||
**sist2** will scan files stored into archive files (zip, tar, 7z...) as if they were directly in the file system.
|
||||
Recursive (archives inside archives)
|
||||
scan is also supported.
|
||||
|
||||
**Limitations**:
|
||||
* Parsing media files with formats that require
|
||||
*seek* (e.g. `.gif`, `.mp4` w/ fragmented metadata etc.) is not supported.
|
||||
* Archive files are scanned sequentially, by a single thread. On systems where
|
||||
**sist2** is not I/O bound, scans might be faster when larger archives are split
|
||||
into smaller parts.
|
||||
|
||||
To check if a media file can be parsed without *seek*, execute `cat file.mp4 | ffprobe -`
|
||||
|
||||
|
||||
* Support for parsing media files with formats that require *seek* (e.g. `.gif`, `.mp4` w/ fragmented metadata etc.)
|
||||
is limitted (see `--mem-buffer` option)
|
||||
* Archive files are scanned sequentially, by a single thread. On systems where
|
||||
**sist2** is not I/O bound, scans might be faster when larger archives are split into smaller parts.
|
||||
|
||||
### OCR
|
||||
|
||||
You can enable OCR support for pdf,xps,cbz,fb2,epub file types with the
|
||||
`--ocr <lang>` option. Download the language data files with your
|
||||
package manager (`apt install tesseract-ocr-eng`) or directly [from Github](https://github.com/tesseract-ocr/tesseract/wiki/Data-Files).
|
||||
You can enable OCR support for pdf,xps,fb2,epub file types with the
|
||||
`--ocr <lang>` option. Download the language data files with your package manager (`apt install tesseract-ocr-eng`) or
|
||||
directly [from Github](https://github.com/tesseract-ocr/tesseract/wiki/Data-Files).
|
||||
|
||||
The `simon987/sist2` github image comes with common languages
|
||||
The `simon987/sist2` image comes with common languages
|
||||
(hin, jpn, eng, fra, rus, spa) pre-installed.
|
||||
|
||||
Examples
|
||||
|
||||
```bash
|
||||
sist2 scan --ocr jpn ~/Books/Manga/
|
||||
sist2 scan --ocr eng ~/Books/Textbooks/
|
||||
```
|
||||
|
||||
|
||||
## Build from source
|
||||
|
||||
You can compile **sist2** by yourself if you don't want to use the pre-compiled
|
||||
binaries.
|
||||
You can compile **sist2** by yourself if you don't want to use the pre-compiled binaries
|
||||
|
||||
### With docker (recommended)
|
||||
|
||||
```bash
|
||||
git clone --recursive https://github.com/simon987/sist2/
|
||||
cd sist2
|
||||
docker build . -f ./Dockerfile -t my-sist2-image
|
||||
docker run --rm my-sist2-image cat /root/sist2 > sist2-x64-linux
|
||||
```
|
||||
|
||||
### On a linux computer
|
||||
|
||||
1. Install compile-time dependencies
|
||||
|
||||
*(Debian)*
|
||||
```bash
|
||||
apt install git cmake pkg-config libglib2.0-dev \
|
||||
libssl-dev uuid-dev python3 libmagic-dev libfreetype6-dev \
|
||||
libcurl-dev libbz2-dev yasm libharfbuzz-dev ragel \
|
||||
libarchive-dev libtiff5 libpng16-16 libpango1.0-dev
|
||||
```bash
|
||||
apt install gcc g++ python3 yasm ragel automake autotools-dev wget libtool libssl-dev curl zip unzip tar xorg-dev libglu1-mesa-dev libxcursor-dev libxml2-dev libxinerama-dev gettext nasm git
|
||||
```
|
||||
|
||||
1. Apply vcpkg patches, as per [sist2-build](https://github.com/simon987/sist2-build) Dockerfile
|
||||
|
||||
1. Install vcpkg dependencies
|
||||
|
||||
2. Build
|
||||
```bash
|
||||
git clone --recurse-submodules https://github.com/simon987/sist2
|
||||
./scripts/get_static_libs.sh
|
||||
cmake .
|
||||
vcpkg install curl[core,openssl]
|
||||
vcpkg install lmdb cjson glib brotli libarchive[core,bzip2,libxml2,lz4,lzma,lzo] pthread tesseract libxml2 libmupdf gtest mongoose libuuid libmagic libraw jasper lcms gumbo
|
||||
```
|
||||
|
||||
1. Build
|
||||
```bash
|
||||
git clone --recursive https://github.com/simon987/sist2/
|
||||
cmake -DSIST_DEBUG=off -DCMAKE_TOOLCHAIN_FILE=<VCPKG_ROOT>/scripts/buildsystems/vcpkg.cmake .
|
||||
make
|
||||
```
|
||||
|
||||
1
argparse
1
argparse
Submodule argparse deleted from fafc503d23
1
cJSON
1
cJSON
Submodule cJSON deleted from 2d4ad84192
19
ci/build.sh
Normal file → Executable file
19
ci/build.sh
Normal file → Executable file
@@ -1,8 +1,19 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
./scripts/get_static_libs.sh
|
||||
VCPKG_ROOT="/vcpkg"
|
||||
|
||||
cmake .
|
||||
make
|
||||
rm *.gz &>/dev/null
|
||||
|
||||
git submodule update --init --recursive
|
||||
|
||||
rm -rf CMakeFiles CMakeCache.txt
|
||||
cmake -DSIST_DEBUG=off -DCMAKE_TOOLCHAIN_FILE="${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" .
|
||||
make -j $(nproc)
|
||||
strip sist2
|
||||
strip sist2_scan
|
||||
./sist2 -v > VERSION
|
||||
mv sist2 sist2-x64-linux
|
||||
|
||||
rm -rf CMakeFiles CMakeCache.txt
|
||||
cmake -DSIST_DEBUG=on -DCMAKE_TOOLCHAIN_FILE="${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" .
|
||||
make -j $(nproc)
|
||||
mv sist2_debug sist2-x64-linux-debug
|
||||
13
ci/build_arm64.sh
Executable file
13
ci/build_arm64.sh
Executable file
@@ -0,0 +1,13 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
VCPKG_ROOT="/vcpkg"
|
||||
|
||||
rm *.gz &>/dev/null
|
||||
|
||||
git submodule update --init --recursive
|
||||
|
||||
rm -rf CMakeFiles CMakeCache.txt
|
||||
cmake -DSIST_DEBUG=off -DCMAKE_TOOLCHAIN_FILE="${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" .
|
||||
make -j $(nproc)
|
||||
strip sist2
|
||||
mv sist2 sist2-arm64-linux
|
||||
404
docs/USAGE.md
Normal file
404
docs/USAGE.md
Normal file
@@ -0,0 +1,404 @@
|
||||
# Usage
|
||||
|
||||
*More examples (specifically with docker/compose) are in progress*
|
||||
|
||||
* [scan](#scan)
|
||||
* [options](#scan-options)
|
||||
* [examples](#scan-examples)
|
||||
* [index format](#index-format)
|
||||
* [index](#index)
|
||||
* [options](#index-options)
|
||||
* [examples](#index-examples)
|
||||
* [web](#web)
|
||||
* [options](#web-options)
|
||||
* [examples](#web-examples)
|
||||
* [rewrite_url](#rewrite_url)
|
||||
* [link to specific indices](#link-to-specific-indices)
|
||||
* [exec-script](#exec-script)
|
||||
* [tagging](#tagging)
|
||||
* [sidecar files](#sidecar-files)
|
||||
|
||||
```
|
||||
Usage: sist2 scan [OPTION]... PATH
|
||||
or: sist2 index [OPTION]... INDEX
|
||||
or: sist2 web [OPTION]... INDEX...
|
||||
or: sist2 exec-script [OPTION]... INDEX
|
||||
Lightning-fast file system indexer and search tool.
|
||||
|
||||
-h, --help show this help message and exit
|
||||
-v, --version Show version and exit
|
||||
--verbose Turn on logging
|
||||
--very-verbose Turn on debug messages
|
||||
|
||||
Scan options
|
||||
-t, --threads=<int> Number of threads. DEFAULT=1
|
||||
-q, --quality=<flt> Thumbnail quality, on a scale of 1.0 to 31.0, 1.0 being the best. DEFAULT=5
|
||||
--size=<int> Thumbnail size, in pixels. Use negative value to disable. DEFAULT=500
|
||||
--content-size=<int> Number of bytes to be extracted from text documents. Use negative value to disable. DEFAULT=32768
|
||||
--incremental=<str> Reuse an existing index and only scan modified files.
|
||||
-o, --output=<str> Output directory. DEFAULT=index.sist2/
|
||||
--rewrite-url=<str> Serve files from this url instead of from disk.
|
||||
--name=<str> Index display name. DEFAULT: (name of the directory)
|
||||
--depth=<int> Scan up to DEPTH subdirectories deep. Use 0 to only scan files in PATH. DEFAULT: -1
|
||||
--archive=<str> Archive file mode (skip|list|shallow|recurse). skip: Don't parse, list: only get file names as text, shallow: Don't parse archives inside archives. DEFAULT: recurse
|
||||
--ocr=<str> Tesseract language (use tesseract --list-langs to see which are installed on your machine)
|
||||
-e, --exclude=<str> Files that match this regex will not be scanned
|
||||
--fast Only index file names & mime type
|
||||
--treemap-threshold=<str> Relative size threshold for treemap (see USAGE.md). DEFAULT: 0.0005
|
||||
--mem-buffer=<int> Maximum memory buffer size per thread in MB for files inside archives (see USAGE.md). DEFAULT: 2000
|
||||
--read-subtitles Read subtitles from media files
|
||||
|
||||
Index options
|
||||
-t, --threads=<int> Number of threads. DEFAULT=1
|
||||
--es-url=<str> Elasticsearch url with port. DEFAULT=http://localhost:9200
|
||||
--es-index=<str> Elasticsearch index name. DEFAULT=sist2
|
||||
-p, --print Just print JSON documents to stdout.
|
||||
--script-file=<str> Path to user script.
|
||||
--mappings-file=<str> Path to Elasticsearch mappings.
|
||||
--settings-file=<str> Path to Elasticsearch settings.
|
||||
--async-script Execute user script asynchronously.
|
||||
--batch-size=<int> Index batch size. DEFAULT: 100
|
||||
-f, --force-reset Reset Elasticsearch mappings and settings. (You must use this option the first time you use the index command)
|
||||
|
||||
Web options
|
||||
--es-url=<str> Elasticsearch url. DEFAULT=http://localhost:9200
|
||||
--es-index=<str> Elasticsearch index name. DEFAULT=sist2
|
||||
--bind=<str> Listen on this address. DEFAULT=localhost:4090
|
||||
--auth=<str> Basic auth in user:password format
|
||||
--tag-auth=<str> Basic auth in user:password format for tagging
|
||||
|
||||
Exec-script options
|
||||
--es-url=<str> Elasticsearch url. DEFAULT=http://localhost:9200
|
||||
--es-index=<str> Elasticsearch index name. DEFAULT=sist2
|
||||
--script-file=<str> Path to user script.
|
||||
--async-script Execute user script asynchronously.
|
||||
Made by simon987 <me@simon987.net>. Released under GPL-3.0
|
||||
```
|
||||
|
||||
## Scan
|
||||
|
||||
### Scan options
|
||||
|
||||
* `-t, --threads`
|
||||
Number of threads for file parsing. **Do not set a number higher than `$(nproc)` or `$(Get-WmiObject Win32_ComputerSystem).NumberOfLogicalProcessors` in Windows!**
|
||||
* `-q, --quality`
|
||||
Thumbnail quality, on a scale of 1.0 to 31.0, 1.0 being the best. *Does not affect PDF thumbnails quality*
|
||||
* `--size`
|
||||
Thumbnail size in pixels.
|
||||
* `--content-size`
|
||||
Number of bytes of text to be extracted from the content of files (plain text and PDFs).
|
||||
Repeated whitespace and special characters do not count toward this limit.
|
||||
* `--incremental`
|
||||
Specify an existing index. Information about files in this index that were not modified (based on *mtime* attribute)
|
||||
will be copied to the new index and will not be parsed again.
|
||||
* `-o, --output` Output directory.
|
||||
* `--rewrite-url` Set the `rewrite_url` option for the web module (See [rewrite_url](#rewrite_url))
|
||||
* `--name` Set the `name` option for the web module
|
||||
* `--depth` Maximum scan dept. Set to 0 only scan files directly in the root directory, set to -1 for infinite depth
|
||||
* `--archive` Archive file mode.
|
||||
* skip: Don't parse
|
||||
* list: Only get file names as text
|
||||
* shallow: Don't parse archives inside archives.
|
||||
* recurse: Scan archives recursively (default)
|
||||
* `--ocr` See [OCR](../README.md#OCR)
|
||||
* `-e, --exclude` Regex pattern to exclude files. A file is excluded if the pattern matches any
|
||||
part of the full absolute path.
|
||||
|
||||
Examples:
|
||||
* `-e ".*\.ttf"`: Ignore ttf files
|
||||
* `-e ".*\.(ttf|rar)"`: Ignore ttf and rar files
|
||||
* `-e "^/mnt/backups/"`: Ignore all files in the `/mnt/backups/` directory
|
||||
* `-e "^/mnt/Data[12]/"`: Ignore all files in the `/mnt/Data1/` and `/mnt/Data2/` directory
|
||||
* `-e "(^/usr/)|(^/var/)|(^/media/DRIVE-A/tmp/)|(^/media/DRIVE-B/Trash/)"` Exclude the
|
||||
`/usr`, `/var`, `/media/DRIVE-A/tmp`, `/media/DRIVE-B/Trash` directories
|
||||
* `--fast` Only index file names and mime type
|
||||
* `--treemap-threshold` Directories smaller than (`treemap-threshold` * `<total size of the index>`)
|
||||
will not be considered for the disk utilisation visualization; their size will be added to
|
||||
the parent directory. If the parent directory is still smaller than the threshold, it will also be "merged upwards"
|
||||
and so on.
|
||||
|
||||
In effect, smaller `treemap-threshold` values will yield a more detailed
|
||||
(but also a more cluttered and harder to read) visualization.
|
||||
|
||||
* `--mem-buffer` Maximum memory buffer size in MB (per thread) for files inside archives. Media files
|
||||
larger than this number will be read sequentially and no *seek* operations will be supported.
|
||||
|
||||
To check if a media file can be parsed without *seek*, execute `cat file.mp4 | ffprobe -`
|
||||
* `--read-subtitles` When enabled, will attempt to read the subtitles stream from media files.
|
||||
|
||||
### Scan examples
|
||||
|
||||
Simple scan
|
||||
```bash
|
||||
sist2 scan ~/Documents
|
||||
|
||||
sist2 scan \
|
||||
--threads 4 --content-size 16000000 --quality 1.0 --archive shallow \
|
||||
--name "My Documents" --rewrite-url "http://nas.domain.local/My Documents/" \
|
||||
~/Documents -o ./documents.idx/
|
||||
```
|
||||
|
||||
Incremental scan
|
||||
```
|
||||
sist2 scan --incremental ./orig_idx/ -o ./updated_idx/ ~/Documents
|
||||
```
|
||||
|
||||
### Index format
|
||||
|
||||
A typical `binary` type index structure looks like this:
|
||||
```
|
||||
documents.idx/
|
||||
├── descriptor.json
|
||||
├── _index_139965416830720
|
||||
├── _index_139965425223424
|
||||
├── _index_139965433616128
|
||||
├── _index_139965442008832
|
||||
├── _index_139965442008832
|
||||
├── treemap.csv
|
||||
├── agg_mime.csv
|
||||
├── agg_date.csv
|
||||
├── add_size.csv
|
||||
├── thumbs/
|
||||
| ├── data.mdb
|
||||
| └── lock.mdb
|
||||
├── tags/
|
||||
| ├── data.mdb
|
||||
| └── lock.mdb
|
||||
└── meta/
|
||||
├── data.mdb
|
||||
└── lock.mdb
|
||||
```
|
||||
|
||||
The `_index_*` files contain the raw binary index data and are not meant to be
|
||||
read by other applications. The format is generally compatible across different
|
||||
sist2 versions.
|
||||
|
||||
The `thumbs/` folder is a [LMDB](https://en.wikipedia.org/wiki/Lightning_Memory-Mapped_Database)
|
||||
database containing the thumbnails.
|
||||
|
||||
The `descriptor.json` file contains general information about the index. The
|
||||
following fields are safe to modify manually: `root`, `name`, [rewrite_url](#rewrite_url) and `timestamp`.
|
||||
|
||||
The `.csv` are pre-computed aggregations necessary for the stats page.
|
||||
|
||||
|
||||
*Advanced usage*
|
||||
|
||||
Instead of using the `scan` module, you can also import an index generated
|
||||
by a third party application. The 'external' index must have the following format:
|
||||
|
||||
```
|
||||
my_index/
|
||||
├── descriptor.json
|
||||
├── _index_0
|
||||
└── thumbs/
|
||||
| ├── data.mdb
|
||||
| └── lock.mdb
|
||||
└── meta/
|
||||
└── <empty>
|
||||
```
|
||||
|
||||
*descriptor.json*:
|
||||
```json
|
||||
{
|
||||
"uuid": "<valid UUID4>",
|
||||
"version": "_external_v1",
|
||||
"root": "(optional)",
|
||||
"name": "<name>",
|
||||
"rewrite_url": "(optional)",
|
||||
"type": "json",
|
||||
"timestamp": 1578971024
|
||||
}
|
||||
```
|
||||
|
||||
*_index_0*: NDJSON format (One json object per line)
|
||||
|
||||
```json
|
||||
{
|
||||
"_id": "unique uuid for the file",
|
||||
"index": "index uuid4 (same one as descriptor.json!)",
|
||||
"mime": "application/x-cbz",
|
||||
"size": 14341204,
|
||||
"mtime": 1578882996,
|
||||
"extension": "cbz",
|
||||
"name": "my_book",
|
||||
"path": "path/to/books",
|
||||
"content": "text contents of the book",
|
||||
"title": "Title of the book",
|
||||
"tag": ["genre.fiction", "author.someguy", "etc..."],
|
||||
"_keyword": [
|
||||
{"k": "ISBN", "v": "ABCD34789231"}
|
||||
],
|
||||
"_text": [
|
||||
{"k": "other", "v": "This will be indexed as text"}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
You can find the full list of supported fields [here](../src/io/serialize.c#L90)
|
||||
|
||||
The `_keyword.*` items will be indexed and searchable as **keyword** fields (only full matches allowed).
|
||||
The `_text.*` items will be indexed and searchable as **text** fields (fuzzy searching allowed)
|
||||
|
||||
|
||||
*thumbs/*:
|
||||
|
||||
LMDB key-value store. Keys are **binary** 16-byte md5 hash* (`_id` field)
|
||||
and values are raw image bytes.
|
||||
|
||||
*\* Hash is calculated from the full path of the file, including the extension, relative to the index root*
|
||||
|
||||
Importing an external `binary` type index is technically possible but
|
||||
it is currently unsupported and has no guaranties of back/forward compatibility.
|
||||
|
||||
|
||||
## Index
|
||||
### Index options
|
||||
* `--es-url`
|
||||
Elasticsearch url and port. If you are using docker, make sure that both containers are on the
|
||||
same network.
|
||||
* `--es-index`
|
||||
Elasticsearch index name. DEFAULT=sist2
|
||||
* `-p, --print`
|
||||
Print index in JSON format to stdout.
|
||||
* `--script-file`
|
||||
Path to user script. See [Scripting](scripting.md).
|
||||
* `--mappings-file`
|
||||
Path to custom Elasticsearch mappings. If none is specified, [the bundled mappings](https://github.com/simon987/sist2/tree/master/schema) will be used.
|
||||
* `--settings-file`
|
||||
Path to custom Elasticsearch settings. *(See above)*
|
||||
* `--async-script`
|
||||
Use `wait_for_completion=false` elasticsearch option while executing user script.
|
||||
(See [Elasticsearch documentation](https://www.elastic.co/guide/en/elasticsearch/reference/current/tasks.html))
|
||||
* `--batch-size=<int>`
|
||||
Index batch size. Indexing is generally faster with larger batches, but payloads that
|
||||
are too large will fail and additional overhead for retrying with smaller sizes may slow
|
||||
down the process.
|
||||
* `-f, --force-reset`
|
||||
Reset Elasticsearch mappings and settings.
|
||||
|
||||
### Index examples
|
||||
|
||||
**Push to elasticsearch**
|
||||
```bash
|
||||
sist2 index --force-reset --batch-size 1000 --es-url http://localhost:9200 ./my_index/
|
||||
sist2 index ./my_index/
|
||||
```
|
||||
|
||||
**Save index in JSON format**
|
||||
```bash
|
||||
sist2 index --print ./my_index/ > my_index.ndjson
|
||||
```
|
||||
|
||||
**Inspect contents of an index**
|
||||
```bash
|
||||
sist2 index --print ./my_index/ | jq | less
|
||||
```
|
||||
|
||||
## Web
|
||||
|
||||
### Web options
|
||||
* `--es-url=<str>` Elasticsearch url.
|
||||
* `--es-index`
|
||||
Elasticsearch index name. DEFAULT=sist2
|
||||
* `--bind=<str>` Listen on this address.
|
||||
* `--auth=<str>` Basic auth in user:password format
|
||||
* `--tag-auth=<str>` Basic auth in user:password format. Works the same way as the
|
||||
`--auth` argument, but authentication is only applied the `/tag/` endpoint.
|
||||
|
||||
### Web examples
|
||||
|
||||
**Single index**
|
||||
```bash
|
||||
sist2 web --auth admin:hunter2 --bind 0.0.0.0:8888 my_index
|
||||
```
|
||||
|
||||
**Multiple indices**
|
||||
```bash
|
||||
# Indices will be displayed in this order in the web interface
|
||||
sist2 web index1 index2 index3 index4
|
||||
```
|
||||
|
||||
### rewrite_url
|
||||
|
||||
When the `rewrite_url` field is not empty, the web module ignores the `root`
|
||||
field and will return a HTTP redirect to `<rewrite_url><path>/<name><extension>`
|
||||
instead of serving the file from disk.
|
||||
Both the `root` and `rewrite_url` fields are safe to manually modify from the
|
||||
`descriptor.json` file.
|
||||
|
||||
### Link to specific indices
|
||||
|
||||
To link to specific indices, you can add a list of comma-separated index name to
|
||||
the URL: `?i=<name>,<name>`. By default, indices with `"(nsfw)"` in their name are
|
||||
not displayed.
|
||||
|
||||
## exec-script
|
||||
|
||||
The `exec-script` command is used to execute a user script for an index that has already been imported to Elasticsearch with the `index` command. Note that the documents will not be reset to their default state before each execution as the `index` command does: if you make undesired changes to the documents by accident, you will need to run `index` again to revert to the original state.
|
||||
|
||||
|
||||
# Tagging
|
||||
|
||||
### Manual tagging
|
||||
|
||||
You can modify tags of individual documents directly from the
|
||||
`web` interface. Note that you can setup authentication for this feature
|
||||
with the `--tag-auth` option (See [web options](#web-options))
|
||||
|
||||

|
||||
|
||||
Tags that are manually added are saved both in the
|
||||
index folder (in `/tags/`) and in Elasticsearch*. When re-`index`ing,
|
||||
they are read from the index and automatically applied.
|
||||
|
||||
You can safely copy the `/tags/` database to another index.
|
||||
|
||||
See [Automatic tagging](#automatic-tagging) for information about tag
|
||||
hierarchies and tag colors.
|
||||
|
||||
\* *It can take a few seconds to take effect in new search queries.*
|
||||
|
||||
|
||||
### Automatic tagging
|
||||
|
||||
See [scripting](scripting.md) documentation.
|
||||
|
||||
# Sidecar files
|
||||
|
||||
When scanning, sist2 will read metadata from `.s2meta` JSON files and overwrite the
|
||||
original document's metadata. Sidecar metadata files will also work inside archives.
|
||||
Sidecar files themselves are not saved in the index.
|
||||
|
||||
This feature is useful to leverage third-party applications such as speech-to-text or
|
||||
OCR to add additional metadata to a file.
|
||||
|
||||
**Example**
|
||||
|
||||
```
|
||||
~/Documents/
|
||||
├── Video.mp4
|
||||
└── Video.mp4.s2meta
|
||||
```
|
||||
|
||||
The sidecar file must have exactly the same file path and the `.s2meta` suffix.
|
||||
|
||||
`Video.mp4.s2meta`:
|
||||
```json
|
||||
{
|
||||
"content": "This sidecar file will overwrite some metadata fields of Video.mp4",
|
||||
"author": "Some author",
|
||||
"duration": 12345,
|
||||
"bitrate": 67890,
|
||||
"some_arbitrary_field": [1,2,3]
|
||||
}
|
||||
```
|
||||
|
||||
```
|
||||
sist2 scan ~/Documents -o ./docs.idx
|
||||
sist2 index ./docs.idx
|
||||
```
|
||||
|
||||
*NOTE*: It is technically possible to overwrite the `tag` value using sidecar files, however,
|
||||
it is not currently possible to restore both manual tags and sidecar tags without user scripts
|
||||
while reindexing.
|
||||
|
Before Width: | Height: | Size: 26 KiB After Width: | Height: | Size: 26 KiB |
BIN
docs/manual_tag.png
Normal file
BIN
docs/manual_tag.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 3.9 KiB |
@@ -39,7 +39,7 @@ it adds the `genre.<genre>` tag.
|
||||
ArrayList tags = ctx._source.tag = new ArrayList();
|
||||
|
||||
if (ctx._source?.genre != null) {
|
||||
tags.add("genre." + ctx._source.genre.toLowerCase())
|
||||
tags.add("genre." + ctx._source.genre.toLowerCase());
|
||||
}
|
||||
```
|
||||
|
||||
@@ -54,6 +54,11 @@ script.painless.regex.enabled: true
|
||||
```
|
||||
Or, if you're using docker add `-e "script.painless.regex.enabled=true"`
|
||||
|
||||
**Tag color**
|
||||
|
||||
You can specify the color for an individual tag by appending an
|
||||
hexadecimal color code (`#RRGGBBAA`) to the tag name.
|
||||
|
||||
### Examples
|
||||
|
||||
If `(20XX)` is in the file name, add the `year.<year>` tag:
|
||||
@@ -62,7 +67,7 @@ ArrayList tags = ctx._source.tag = new ArrayList();
|
||||
|
||||
Matcher m = /[\(\.+](20[0-9]{2})[\)\.+]/.matcher(ctx._source.name);
|
||||
if (m.find()) {
|
||||
tags.add("year." + m.group(1))
|
||||
tags.add("year." + m.group(1));
|
||||
}
|
||||
```
|
||||
|
||||
@@ -106,12 +111,32 @@ if (ctx._source.path != "") {
|
||||
}
|
||||
```
|
||||
|
||||
Set the name of the last folder (`/path/to/<studio>/file.mp4`) to `studio.<studio>` tag
|
||||
Parse `EXIF:F Number` tag
|
||||
```Java
|
||||
ArrayList tags = ctx._source.tag = new ArrayList();
|
||||
|
||||
if (ctx._source.path != "") {
|
||||
String[] names = ctx._source.path.splitOnToken('/');
|
||||
tags.add("studio." + names[names.length-1]);
|
||||
if (ctx._source?.exif_fnumber != null) {
|
||||
String[] values = ctx._source.exif_fnumber.splitOnToken(' ');
|
||||
String aperture = String.valueOf(Float.parseFloat(values[0]) / Float.parseFloat(values[1]));
|
||||
if (aperture == "NaN") {
|
||||
aperture = "0,0";
|
||||
}
|
||||
tags.add("Aperture.f/" + aperture.replace(".", ","));
|
||||
}
|
||||
```
|
||||
|
||||
Display year and months from `EXIF:DateTime` tag
|
||||
```Java
|
||||
if (ctx._source?.exif_datetime != null) {
|
||||
SimpleDateFormat parser = new SimpleDateFormat("yyyy:MM:dd HH:mm:ss");
|
||||
Date date = parser.parse(ctx._source.exif_datetime);
|
||||
|
||||
SimpleDateFormat yp = new SimpleDateFormat("yyyy");
|
||||
SimpleDateFormat mp = new SimpleDateFormat("MMMMMMMMM");
|
||||
|
||||
String year = yp.format(date);
|
||||
String month = mp.format(date);
|
||||
|
||||
tags.add("Month." + month);
|
||||
tags.add("Year." + year);
|
||||
}
|
||||
|
||||
```
|
||||
BIN
docs/sist2.png
Normal file
BIN
docs/sist2.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 889 KiB |
BIN
docs/stats.png
Normal file
BIN
docs/stats.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 167 KiB |
@@ -1,53 +0,0 @@
|
||||
/*
|
||||
Copyright (c) 2010, Florian Reuter
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Florian Reuter nor the names of its contributors
|
||||
may be used to endorse or promote products derived from this
|
||||
software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||
COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
||||
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
||||
OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
/**@file config/mce/config.h
|
||||
*/
|
||||
#ifndef MCE_CONFIG_H
|
||||
#define MCE_CONFIG_H
|
||||
|
||||
#include <libxml/xmlstring.h>
|
||||
#include <stdio.h>
|
||||
#include <plib/plib.h>
|
||||
#include <assert.h>
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define MCE_NAMESPACE_SUBSUMPTION_ENABLED 0
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* MCE_CONFIG_H */
|
||||
@@ -1,189 +0,0 @@
|
||||
/*
|
||||
Copyright (c) 2010, Florian Reuter
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Florian Reuter nor the names of its contributors
|
||||
may be used to endorse or promote products derived from this
|
||||
software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||
COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
||||
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
||||
OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
/** @file mce/helper.h
|
||||
Helper functions needed by mce/textreader.h and mce/textwriter.h to implement MCE:
|
||||
- mceQNameLevelAdd(), mceQNameLevelLookup() and mceQNameLevelCleanup() maintain a set of mceQNameLevel_t tuples.
|
||||
- mceQNameLevelPush() and mceQNameLevelPopIfMatch() maintain a stack of mceQNameLevel_t tuples.
|
||||
- mceCtxInit(), mceCtxCleanup() and mceCtxUnderstandsNamespace() manage a context which holds all information needed to do MCE proprocessing.
|
||||
*/
|
||||
#include <mce/config.h>
|
||||
|
||||
#ifndef MCE_HELPER_H
|
||||
#define MCE_HELPER_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/**
|
||||
Tiple (ns, ln, level).
|
||||
*/
|
||||
typedef struct MCE_QNAME_LEVEL {
|
||||
xmlChar *ns;
|
||||
xmlChar *ln;
|
||||
puint32_t level;
|
||||
puint32_t flag; // used by mceTextWriter
|
||||
} mceQNameLevel_t;
|
||||
|
||||
/**
|
||||
*/
|
||||
typedef enum MCE_SKIP_STATE_ENUM {
|
||||
MCE_SKIP_STATE_IGNORE,
|
||||
MCE_SKIP_STATE_ALTERNATE_CONTENT,
|
||||
MCE_SKIP_STATE_CHOICE_MATCHED
|
||||
} mceSkipState_t;
|
||||
|
||||
/**
|
||||
Represents an intervall of levels which are "skipped" i.e. ignored.
|
||||
*/
|
||||
typedef struct MCE_SKIP_ITEM {
|
||||
puint32_t level_start;
|
||||
puint32_t level_end;
|
||||
mceSkipState_t state;
|
||||
} mceSkipItem_t;
|
||||
|
||||
/**
|
||||
Either represents a set of (ns, ln, level) triples.
|
||||
*/
|
||||
typedef struct MCE_QNAME_LEVEL_SET {
|
||||
mceQNameLevel_t *list_array;
|
||||
puint32_t list_items;
|
||||
puint32_t max_level;
|
||||
} mceQNameLevelSet_t;
|
||||
|
||||
/**
|
||||
The skip stack.
|
||||
*/
|
||||
typedef struct MCE_SKIP_STACK {
|
||||
mceSkipItem_t *stack_array;
|
||||
puint32_t stack_items;
|
||||
} mceSkipStack_t;
|
||||
|
||||
|
||||
typedef enum MCE_ERROR_ENUM {
|
||||
MCE_ERROR_NONE,
|
||||
MCE_ERROR_XML,
|
||||
MCE_ERROR_MUST_UNDERSTAND,
|
||||
MCE_ERROR_VALIDATION,
|
||||
MCE_ERROR_MEMORY
|
||||
} mceError_t;
|
||||
|
||||
/**
|
||||
Holds all information to do MCE preprocessing.
|
||||
*/
|
||||
typedef struct MCE_CONTEXT {
|
||||
mceQNameLevelSet_t ignorable_set;
|
||||
mceQNameLevelSet_t understands_set;
|
||||
mceQNameLevelSet_t processcontent_set;
|
||||
mceQNameLevelSet_t suspended_set;
|
||||
#if (MCE_NAMESPACE_SUBSUMPTION_ENABLED)
|
||||
mceQNameLevelSet_t subsume_namespace_set;
|
||||
mceQNameLevelSet_t subsume_exclude_set;
|
||||
mceQNameLevelSet_t subsume_prefix_set;
|
||||
#endif
|
||||
mceSkipStack_t skip_stack;
|
||||
mceError_t error;
|
||||
pbool_t mce_disabled;
|
||||
puint32_t suspended_level;
|
||||
} mceCtx_t;
|
||||
|
||||
/**
|
||||
Add a new tiple (ns, ln, level) to the triple set \c qname_level_set.
|
||||
The \c ns_sub string is optional and will not be touched.
|
||||
*/
|
||||
pbool_t mceQNameLevelAdd(mceQNameLevelSet_t *qname_level_set, const xmlChar *ns, const xmlChar *ln, puint32_t level);
|
||||
|
||||
/**
|
||||
Lookup a tiple (ns, ln, level) via \c ns and \c ln. If \c ignore_ln is PTRUE then the first tiple matching \c ns will be returned.
|
||||
*/
|
||||
mceQNameLevel_t* mceQNameLevelLookup(mceQNameLevelSet_t *qname_level_set, const xmlChar *ns, const xmlChar *ln, pbool_t ignore_ln);
|
||||
|
||||
/**
|
||||
Remove all triples (ns, ln, level) where the level greater or equal to \c level.
|
||||
*/
|
||||
pbool_t mceQNameLevelCleanup(mceQNameLevelSet_t *qname_level_set, puint32_t level);
|
||||
|
||||
/**
|
||||
Push a new skip intervall (level_start, level_end, state) on the stack \c skip_stack.
|
||||
*/
|
||||
pbool_t mceSkipStackPush(mceSkipStack_t *skip_stack, puint32_t level_start, puint32_t level_end, mceSkipState_t state);
|
||||
|
||||
/**
|
||||
Pop the intervall (ns, ln, level) from the stack \c qname_level_array.
|
||||
*/
|
||||
void mceSkipStackPop(mceSkipStack_t *skip_stack);
|
||||
|
||||
/**
|
||||
Returns top item or NULL.
|
||||
*/
|
||||
mceSkipItem_t *mceSkipStackTop(mceSkipStack_t *skip_stack);
|
||||
|
||||
/**
|
||||
Returns TRUE, if the \c level is in the top skip intervall.
|
||||
*/
|
||||
pbool_t mceSkipStackSkip(mceSkipStack_t *skip_stack, puint32_t level);
|
||||
|
||||
/**
|
||||
Initialize the mceCtx_t \c ctx.
|
||||
*/
|
||||
pbool_t mceCtxInit(mceCtx_t *ctx);
|
||||
|
||||
/**
|
||||
Cleanup, i.e. release all resourced from the mceCtx_t \c ctx.
|
||||
*/
|
||||
pbool_t mceCtxCleanup(mceCtx_t *ctx);
|
||||
|
||||
/**
|
||||
Register the namespace \ns in \c ctx.
|
||||
*/
|
||||
pbool_t mceCtxUnderstandsNamespace(mceCtx_t *ctx, const xmlChar *ns);
|
||||
|
||||
/**
|
||||
Register the namespace \ns in \c ctx.
|
||||
*/
|
||||
pbool_t mceCtxSuspendProcessing(mceCtx_t *ctx, const xmlChar *ns, const xmlChar *ln);
|
||||
|
||||
|
||||
|
||||
#if (MCE_NAMESPACE_SUBSUMPTION_ENABLED)
|
||||
/**
|
||||
Subsume namespace \c ns_new with \c ns_old.
|
||||
*/
|
||||
pbool_t mceCtxSubsumeNamespace(mceCtx_t *ctx, const xmlChar *prefix_new, const xmlChar *ns_new, const xmlChar *ns_old);
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* MCE_HELPER_H */
|
||||
@@ -1,464 +0,0 @@
|
||||
/*
|
||||
Copyright (c) 2010, Florian Reuter
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Florian Reuter nor the names of its contributors
|
||||
may be used to endorse or promote products derived from this
|
||||
software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||
COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
||||
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
||||
OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
/** @file mce/textreader.h
|
||||
|
||||
*/
|
||||
#ifndef MCE_TEXTREADER_H
|
||||
#define MCE_TEXTREADER_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/**
|
||||
A handle to an MCE-aware libxml2 xmlTextReader.
|
||||
*/
|
||||
typedef struct MCE_TEXTREADER mceTextReader_t;
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
|
||||
#include <mce/config.h>
|
||||
#include <opc/opc.h>
|
||||
#include <mce/helper.h>
|
||||
#include <libxml/xmlwriter.h>
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct MCE_TEXTREADER {
|
||||
xmlTextReaderPtr reader;
|
||||
mceCtx_t mceCtx;
|
||||
};
|
||||
|
||||
/**
|
||||
Wrapper around an libxml2 xmlTextReaderRead function.
|
||||
\see http://xmlsoft.org/html/libxml-xmlreader.html#xmlTextReaderRead
|
||||
*/
|
||||
int mceTextReaderRead(mceTextReader_t *mceTextReader);
|
||||
|
||||
/**
|
||||
Wrapper around a libxml2 xmlTextReaderNext function.
|
||||
\see http://xmlsoft.org/html/libxml-xmlreader.html#xmlTextReaderNext
|
||||
*/
|
||||
int mceTextReaderNext(mceTextReader_t *mceTextReader);
|
||||
|
||||
/**
|
||||
Creates an mceTextReader from an XmlTextReader.
|
||||
\code
|
||||
mceTextReader reader;
|
||||
mceTextReaderInit(&reader, xmlNewTextReaderFilename("sample.xml"));
|
||||
// reader is ready to use.
|
||||
mceTextReaderCleanup(&reader);
|
||||
\endcode
|
||||
\see http://xmlsoft.org/html/libxml-xmlreader.html#xmlNewTextReaderFilename
|
||||
*/
|
||||
int mceTextReaderInit(mceTextReader_t *mceTextReader, xmlTextReaderPtr reader);
|
||||
|
||||
/**
|
||||
Cleanup MCE reader, i.e. free all resources. Also calls xmlTextReaderClose and xmlFreeTextReader.
|
||||
\see http://xmlsoft.org/html/libxml-xmlreader.html#xmlTextReaderClose
|
||||
\see http://xmlsoft.org/html/libxml-xmlreader.html#xmlFreeTextReader
|
||||
*/
|
||||
int mceTextReaderCleanup(mceTextReader_t *mceTextReader);
|
||||
|
||||
/**
|
||||
Reads all events \c mceTextReader and pipes them to \writer.
|
||||
\code
|
||||
mceTextReader reader;
|
||||
mceTextReaderInit(&reader, xmlNewTextReaderFilename("sample.xml"));
|
||||
mceTextReaderUnderstandsNamespace(&reader, _X("http://myextension"));
|
||||
xmlTextWriterPtr writer=xmlNewTextWriterFilename("out.xml", 0);
|
||||
mceTextReaderDump(&reader, writer, P_FALSE);
|
||||
xmlFreeTextWriter(writer);
|
||||
mceTextReaderCleanup(&reader);
|
||||
\endcode
|
||||
*/
|
||||
int mceTextReaderDump(mceTextReader_t *mceTextReader, xmlTextWriter *writer, pbool_t fragment);
|
||||
|
||||
/**
|
||||
Registers an MCE namespace.
|
||||
\see mceTextReaderDump()
|
||||
*/
|
||||
int mceTextReaderUnderstandsNamespace(mceTextReader_t *mceTextReader, const xmlChar *ns);
|
||||
|
||||
/**
|
||||
Disable MCE processing.
|
||||
\return Returns old value.
|
||||
*/
|
||||
pbool_t mceTextReaderDisableMCE(mceTextReader_t *mceTextReader, pbool_t flag);
|
||||
|
||||
|
||||
/**
|
||||
Signal an error to the MCE processor.
|
||||
*/
|
||||
void mceRaiseError(xmlTextReader *reader, mceCtx_t *ctx, mceError_t error, const xmlChar *str, ...);
|
||||
|
||||
/**
|
||||
Internal function which does the MCE postprocessing. E.g. mceTextReaderRead() is implemented as
|
||||
\code
|
||||
mceTextReaderPostprocess(mceTextReader->reader, &mceTextReader->mceCtx, xmlTextReaderRead(mceTextReader->reader))
|
||||
\endcode
|
||||
This function is exposed to make existing libxm2 xmlTextReader MCE aware.
|
||||
*/
|
||||
int mceTextReaderPostprocess(xmlTextReader *reader, mceCtx_t *ctx, int ret);
|
||||
|
||||
/**
|
||||
Get the error code.
|
||||
*/
|
||||
mceError_t mceTextReaderGetError(mceTextReader_t *mceTextReader);
|
||||
|
||||
/**
|
||||
Helper macro to declare a start/end document block in a declarative way:
|
||||
\code
|
||||
mce_start_document(reader) {
|
||||
} mce_end_document(reader);
|
||||
\endcode
|
||||
\hideinitializer
|
||||
*/
|
||||
#define mce_start_document(_reader_) \
|
||||
if (NULL!=(_reader_)) { \
|
||||
mceTextReaderRead(_reader_); \
|
||||
if (0)
|
||||
|
||||
/**
|
||||
\see mce_start_document.
|
||||
\hideinitializer
|
||||
*/
|
||||
#define mce_end_document(_reader_) \
|
||||
} /* if (NULL!=reader) */ \
|
||||
|
||||
|
||||
/**
|
||||
Container for mce_start_element and mce_start_attribute declarations.
|
||||
\see mce_match_element
|
||||
\see mce_match_attribute
|
||||
\hideinitializer
|
||||
*/
|
||||
#define mce_start_choice(_reader_) \
|
||||
if (0)
|
||||
|
||||
/**
|
||||
\see mce_start_choice
|
||||
\hideinitializer
|
||||
*/
|
||||
#define mce_end_choice(_reader_)
|
||||
|
||||
|
||||
/**
|
||||
Skips the attributes.
|
||||
\see mce_match_element.
|
||||
\hideinitializer
|
||||
*/
|
||||
#define mce_skip_attributes(_reader_) \
|
||||
mce_start_attributes(_reader_) { \
|
||||
} mce_end_attributes(_reader_);
|
||||
|
||||
|
||||
/**
|
||||
Skips the attributes.
|
||||
\see mce_match_attribute.
|
||||
\hideinitializer
|
||||
*/
|
||||
#define mce_skip_children(_reader_) \
|
||||
mce_start_children(_reader_) { \
|
||||
} mce_end_children(_reader_);
|
||||
|
||||
/**
|
||||
\see mce_start_element.
|
||||
\hideinitializer
|
||||
*/
|
||||
#define mce_start_children(_reader_) \
|
||||
if (!xmlTextReaderIsEmptyElement((_reader_)->reader)) { \
|
||||
mceTextReaderRead(_reader_); do { \
|
||||
if (0)
|
||||
|
||||
/**
|
||||
\see mce_start_element.
|
||||
\hideinitializer
|
||||
*/
|
||||
#define mce_end_children(_reader_) \
|
||||
else { \
|
||||
if (XML_READER_TYPE_END_ELEMENT!=xmlTextReaderNodeType((_reader_)->reader)) { \
|
||||
mceTextReaderNext(_reader_); /*skip unhandled element */ \
|
||||
} \
|
||||
} \
|
||||
} while(XML_READER_TYPE_END_ELEMENT!=xmlTextReaderNodeType((_reader_)->reader) && \
|
||||
XML_READER_TYPE_NONE!=xmlTextReaderNodeType((_reader_)->reader)); \
|
||||
} /* if (!xmlTextReaderIsEmptyElement(reader->reader)) */
|
||||
|
||||
|
||||
/**
|
||||
Helper macro to match an element. Usefull for calling code in a seperate function:
|
||||
|
||||
\code
|
||||
void handleElement(reader) {
|
||||
mce_start_choice(reader) {
|
||||
mce_start_element(reader, _X("ns"), _X("element")) {
|
||||
|
||||
} mce_end_element(reader)
|
||||
} mce_end_choice(reader);
|
||||
}
|
||||
|
||||
void parse(reader) {
|
||||
mce_start_document(reader) {
|
||||
mce_start_element(reader, _X("ns"), _X("ln")) {
|
||||
mce_skip_attributes(reader);
|
||||
mce_start_children(reader) {
|
||||
mce_match_element(reader, _X("ns"), _X("element")) {
|
||||
handleElement(reader);
|
||||
}
|
||||
} mce_end_children(reader);
|
||||
} mce_end_element();
|
||||
} mce_end_document(reader);
|
||||
}
|
||||
\endcode
|
||||
\hideinitializer
|
||||
*/
|
||||
#define mce_match_element(_reader_, ns, ln) \
|
||||
} else if (XML_READER_TYPE_ELEMENT==xmlTextReaderNodeType((_reader_)->reader) \
|
||||
&& (NULL==ns || 0==xmlStrcmp(ns, xmlTextReaderConstNamespaceUri((_reader_)->reader))) \
|
||||
&& (NULL==ln || 0==xmlStrcmp(ln, xmlTextReaderConstLocalName((_reader_)->reader)))) {
|
||||
|
||||
|
||||
/**
|
||||
Helper macro to declare a element block in a declarative way:
|
||||
\code
|
||||
mce_start_element(reader) {
|
||||
mce_start_attributes(reader) {
|
||||
mce_start_attribute(reader, _X("ns"), _X("lnA")) {
|
||||
// code for handling lnA.
|
||||
} mce_end_attribute(reader);
|
||||
mce_start_attribute(reader, _X("ns"), _X("lnB")) {
|
||||
// code for handling lnB.
|
||||
} mce_end_attribute(reader);
|
||||
} mce_end_attributes(reader);
|
||||
mce_start_children(reader) {
|
||||
mce_start_element(reader, _X("ns"), _X("lnA")) {
|
||||
// code for handling lnA.
|
||||
} mce_end_element(reader);
|
||||
mce_start_element(reader, _X("ns"), _X("lnB")) {
|
||||
// code for handling lnB.
|
||||
} mce_end_element(reader);
|
||||
mce_start_text(reader) {
|
||||
// code for handling text.
|
||||
} mce_end_text(reader);
|
||||
} mce_end_children(reader);
|
||||
} mce_end_element(reader);
|
||||
\endcode
|
||||
\hideinitializer
|
||||
*/
|
||||
#define mce_start_element(_reader_, ns, ln) \
|
||||
mce_match_element(_reader_, ns, ln)
|
||||
|
||||
/**
|
||||
\see mce_start_element.
|
||||
\hideinitializer
|
||||
*/
|
||||
#define mce_end_element(_reader_) \
|
||||
mceTextReaderNext(_reader_)
|
||||
|
||||
/**
|
||||
Matches #TEXT without consuming it.
|
||||
\hideinitializer
|
||||
*/
|
||||
#define mce_match_text(_reader_) \
|
||||
} else if (XML_READER_TYPE_TEXT==xmlTextReaderNodeType((_reader_)->reader) \
|
||||
|| XML_READER_TYPE_SIGNIFICANT_WHITESPACE==xmlTextReaderNodeType((_reader_)->reader)) {
|
||||
|
||||
|
||||
/**
|
||||
\see mce_start_element.
|
||||
\hideinitializer
|
||||
*/
|
||||
#define mce_start_text(_reader_) \
|
||||
mce_match_text(_reader_)
|
||||
|
||||
/**
|
||||
\see mce_start_element.
|
||||
\hideinitializer
|
||||
*/
|
||||
#define mce_end_text(_reader_) \
|
||||
mceTextReaderNext(_reader_)
|
||||
|
||||
/**
|
||||
\see mce_start_element.
|
||||
\hideinitializer
|
||||
*/
|
||||
#define mce_start_attributes(_reader_) \
|
||||
if (1==xmlTextReaderMoveToFirstAttribute((_reader_)->reader)) { \
|
||||
do { \
|
||||
if (0)
|
||||
|
||||
/**
|
||||
\see mce_start_element.
|
||||
\hideinitializer
|
||||
*/
|
||||
#define mce_end_attributes(_reader_) \
|
||||
else { /* skipped attribute */ } \
|
||||
} while(1==xmlTextReaderMoveToNextAttribute((_reader_)->reader)); \
|
||||
xmlTextReaderMoveToElement((_reader_)->reader); }
|
||||
|
||||
/**
|
||||
Helper macro to match an attribute. Usefull for calling code in a seperate function:
|
||||
|
||||
\code
|
||||
void handleA(reader) {
|
||||
mce_start_choice(reader) {
|
||||
mce_start_attribute(reader, _X("ns"), _X("attr")) {
|
||||
|
||||
} mce_end_attribute(reader);
|
||||
} mce_end_choice(reader);
|
||||
}
|
||||
|
||||
void parse(reader) {
|
||||
mce_start_document(reader) {
|
||||
mce_start_element(reader, _X("ns"), _X("ln")) {
|
||||
mce_start_attributes(reader) {
|
||||
mce_match_attribute(reader, _X("ns"), _X("attr")) {
|
||||
handleA(reader);
|
||||
}
|
||||
} mce_end_attributes(reader);
|
||||
mce_skip_children(reader);
|
||||
} mce_end_element();
|
||||
} mce_end_document(reader);
|
||||
}
|
||||
\endcode
|
||||
\hideinitializer
|
||||
*/
|
||||
#define mce_match_attribute(_reader_, ns, ln) \
|
||||
} else if ((NULL==ns || 0==xmlStrcmp(ns, xmlTextReaderConstNamespaceUri((_reader_)->reader))) \
|
||||
&& (NULL==ln || 0==xmlStrcmp(ln, xmlTextReaderConstLocalName((_reader_)->reader)))) {
|
||||
|
||||
/**
|
||||
\see mce_start_element.
|
||||
\hideinitializer
|
||||
*/
|
||||
#define mce_start_attribute(_reader_, ns, ln) \
|
||||
mce_match_attribute(_reader_, ns, ln)
|
||||
|
||||
/**
|
||||
\see mce_start_element.
|
||||
\hideinitializer
|
||||
*/
|
||||
#define mce_end_attribute(_reader_)
|
||||
|
||||
|
||||
/**
|
||||
Error handling for MCE parsers.
|
||||
\code
|
||||
mce_start_element(&reader, NULL, _X("Default")) {
|
||||
const xmlChar *ext=NULL;
|
||||
const xmlChar *type=NULL;
|
||||
mce_start_attributes(&reader) {
|
||||
mce_start_attribute(&reader, NULL, _X("Extension")) {
|
||||
ext=xmlTextReaderConstValue(reader.reader);
|
||||
} mce_end_attribute(&reader);
|
||||
mce_start_attribute(&reader, NULL, _X("ContentType")) {
|
||||
type=xmlTextReaderConstValue(reader.reader);
|
||||
} mce_end_attribute(&reader);
|
||||
} mce_end_attributes(&reader);
|
||||
mce_error_guard_start(&reader) {
|
||||
mce_error(&reader, NULL==ext || ext[0]==0, MCE_ERROR_VALIDATION, "Missing @Extension attribute!");
|
||||
mce_error(&reader, NULL==type || type[0]==0, MCE_ERROR_VALIDATION, "Missing @ContentType attribute!");
|
||||
opcContainerType *ct=insertType(c, type, OPC_TRUE);
|
||||
mce_error(&reader, NULL==ct, MCE_ERROR_MEMORY, NULL);
|
||||
opcContainerExtension *ce=opcContainerInsertExtension(c, ext, OPC_TRUE);
|
||||
mce_error(&reader, NULL==ce, MCE_ERROR_MEMORY, NULL);
|
||||
mce_errorf(&reader, NULL!=ce->type && 0!=xmlStrcmp(ce->type, type), MCE_ERROR_VALIDATION, "Extension \"%s\" is mapped to type \"%s\" as well as \"%s\"", ext, type, ce->type);
|
||||
ce->type=ct->type;
|
||||
} mce_error_guard_end(&reader);
|
||||
mce_skip_children(&reader);
|
||||
} mce_end_element(&reader);
|
||||
\endcode
|
||||
\hideinitializer
|
||||
*/
|
||||
#define mce_error_guard_start(_reader_) if (MCE_ERROR_NONE==(_reader_)->mceCtx.error) do {
|
||||
|
||||
/**
|
||||
\see mce_error_guard_start
|
||||
\hideinitializer
|
||||
*/
|
||||
#define mce_error_guard_end(_reader_) } while(0)
|
||||
|
||||
/**
|
||||
Signal an error if guard if false.
|
||||
\hideinitializer
|
||||
*/
|
||||
#define mce_error(_reader_, guard, err, msg) if (guard) { (_reader_)->mceCtx.error=(err); fprintf(stderr, (NULL!=msg?msg:#err)); continue; }
|
||||
|
||||
/**
|
||||
Signal an error if guard if false.
|
||||
\hideinitializer
|
||||
*/
|
||||
#if defined(__GNUC__)
|
||||
#define mce_errorf(_reader_, guard, err, msg, ...) if (guard) { mceRaiseError((_reader_)->reader, &(_reader_)->mceCtx, err, _X((NULL!=msg?msg:#err)), ##__VA_ARGS__ ); continue; }
|
||||
#else
|
||||
#define mce_errorf(_reader_, guard, err, msg, ...) if (guard) { mceRaiseError((_reader_)->reader, &(_reader_)->mceCtx, err, _X((NULL!=msg?msg:#err)), __VA_ARGS__ ); continue; }
|
||||
#endif
|
||||
|
||||
/**
|
||||
Only issues the error when in "strict mode".
|
||||
\hideinitializer
|
||||
*/
|
||||
#define mce_error_strict mce_error
|
||||
|
||||
/**
|
||||
\see mce_error_strict
|
||||
\hideinitializer
|
||||
*/
|
||||
#define mce_error_strictf mce_errorf
|
||||
|
||||
|
||||
/**
|
||||
Marker for a MCE defintion.
|
||||
\hideinitializer
|
||||
*/
|
||||
#define mce_def
|
||||
|
||||
/**
|
||||
Marker for a MCE reference.
|
||||
\hideinitializer
|
||||
*/
|
||||
#define mce_ref(r) (r)
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* MCE_TEXTREADER_H */
|
||||
@@ -1,176 +0,0 @@
|
||||
/*
|
||||
Copyright (c) 2010, Florian Reuter
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Florian Reuter nor the names of its contributors
|
||||
may be used to endorse or promote products derived from this
|
||||
software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||
COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
||||
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
||||
OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
/** @file mce/textwriter.h
|
||||
|
||||
*/
|
||||
#include <mce/config.h>
|
||||
#include <libxml/xmlwriter.h>
|
||||
#include <mce/helper.h>
|
||||
|
||||
#ifndef MCE_TEXTWRITER_H
|
||||
#define MCE_TEXTWRITER_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/**
|
||||
Default flags for an MCE namespace declaration.
|
||||
*/
|
||||
#define MCE_DEFAULT 0x0
|
||||
|
||||
/**
|
||||
Flags MCE namespace declaration "ignorable".
|
||||
*/
|
||||
#define MCE_IGNORABLE 0x1
|
||||
|
||||
/**
|
||||
Flags MCE namespace declaration "must understand".
|
||||
*/
|
||||
#define MCE_MUSTUNDERSTAND 0x2
|
||||
|
||||
/**
|
||||
The MCE text writer context.
|
||||
*/
|
||||
typedef struct MCE_TEXTWRITER_STRUCT mceTextWriter;
|
||||
|
||||
/**
|
||||
Create a new MCE text writer.
|
||||
\see http://xmlsoft.org/html/libxml-xmlIO.html#xmlOutputBufferCreateIO
|
||||
\see http://xmlsoft.org/html/libxml-xmlwriter.html#xmlNewTextWriter
|
||||
*/
|
||||
mceTextWriter *mceTextWriterCreateIO(xmlOutputWriteCallback iowrite, xmlOutputCloseCallback ioclose, void *ioctx, xmlCharEncodingHandlerPtr encoder);
|
||||
|
||||
/**
|
||||
Helper which create a new MCE text writer for a FILE handle.
|
||||
*/
|
||||
mceTextWriter *mceNewTextWriterFile(FILE *file);
|
||||
|
||||
/**
|
||||
Free all resources for \w.
|
||||
*/
|
||||
int mceTextWriterFree(mceTextWriter *w);
|
||||
|
||||
/**
|
||||
\see http://xmlsoft.org/html/libxml-xmlwriter.html#xmlTextWriterStartDocument
|
||||
*/
|
||||
int mceTextWriterStartDocument(mceTextWriter *w);
|
||||
|
||||
/**
|
||||
\see http://xmlsoft.org/html/libxml-xmlwriter.html#xmlTextWriterEndDocument
|
||||
*/
|
||||
int mceTextWriterEndDocument(mceTextWriter *w);
|
||||
|
||||
/**
|
||||
Start a new XML element. If ns==NULL then there is no namespace and ""==ns means the default namespace.
|
||||
\see http://xmlsoft.org/html/libxml-xmlwriter.html#xmlTextWriterStartElement
|
||||
\see http://xmlsoft.org/html/libxml-xmlwriter.html#xmlTextWriterStartElementNS
|
||||
*/
|
||||
int mceTextWriterStartElement(mceTextWriter *w, const xmlChar *ns, const xmlChar *ln);
|
||||
|
||||
/**
|
||||
\see http://xmlsoft.org/html/libxml-xmlwriter.html#xmlTextWriterEndElement
|
||||
*/
|
||||
int mceTextWriterEndElement(mceTextWriter *w, const xmlChar *ns, const xmlChar *ln);
|
||||
|
||||
/**
|
||||
\see http://xmlsoft.org/html/libxml-xmlwriter.html#xmlTextWriterWriteString
|
||||
*/
|
||||
int mceTextWriterWriteString(mceTextWriter *w, const xmlChar *content);
|
||||
|
||||
/**
|
||||
Register a namespace. Must be called before mceTextWriterStartElement.
|
||||
\see MCE_DEFAULT
|
||||
\see MCE_IGNORABLE
|
||||
\see MCE_MUSTUNDERSTAND
|
||||
*/
|
||||
const xmlChar *mceTextWriterRegisterNamespace(mceTextWriter *w, const xmlChar *ns, const xmlChar *prefix, int flags);
|
||||
|
||||
/**
|
||||
Register qname (ns, ln) as a "process content" element wrt. MCE. Must be called before mceTextWriterStartElement.
|
||||
*/
|
||||
int mceTextWriterProcessContent(mceTextWriter *w, const xmlChar *ns, const xmlChar *ln);
|
||||
|
||||
/**
|
||||
Writes a formatted attribute.
|
||||
\see http://xmlsoft.org/html/libxml-xmlwriter.html#xmlTextWriterWriteFormatAttribute
|
||||
*/
|
||||
int mceTextWriterAttributeF(mceTextWriter *w, const xmlChar *ns, const xmlChar *ln, const char *value, ...);
|
||||
|
||||
/**
|
||||
Starts an MCE alternate content section.
|
||||
*/
|
||||
int mceTextWriterStartAlternateContent(mceTextWriter *w);
|
||||
|
||||
/**
|
||||
Ends an MCE alternate content section.
|
||||
*/
|
||||
int mceTextWriterEndAlternateContent(mceTextWriter *w);
|
||||
|
||||
/**
|
||||
Start an MCE choice.
|
||||
*/
|
||||
int mceTextWriterStartChoice(mceTextWriter *w, const xmlChar *ns);
|
||||
|
||||
/**
|
||||
Ends an MCE choice.
|
||||
*/
|
||||
int mceTextWriterEndChoice(mceTextWriter *w);
|
||||
|
||||
/**
|
||||
Start an MCE fallback.
|
||||
*/
|
||||
int mceTextWriterStartFallback(mceTextWriter *w);
|
||||
|
||||
/**
|
||||
Ends an MCE fallback.
|
||||
*/
|
||||
int mceTextWriterEndFallback(mceTextWriter *w);
|
||||
|
||||
|
||||
/**
|
||||
Returns the underlying xmlTextWriter.
|
||||
*/
|
||||
xmlTextWriterPtr mceTextWriterIntern(mceTextWriter *w);
|
||||
|
||||
/**
|
||||
Helper which create a new xmlTextWriterPtr for a FILE handle.
|
||||
*/
|
||||
xmlTextWriterPtr xmlNewTextWriterFile(FILE *file);
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* MCE_TEXTWRITER_H */
|
||||
@@ -1,189 +0,0 @@
|
||||
/*
|
||||
Copyright (c) 2010, Florian Reuter
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Florian Reuter nor the names of its contributors
|
||||
may be used to endorse or promote products derived from this
|
||||
software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||
COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
||||
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
||||
OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
/**@file config/opc/config.h
|
||||
*/
|
||||
#ifndef OPC_CONFIG_H
|
||||
#define OPC_CONFIG_H
|
||||
|
||||
#include <libxml/xmlstring.h>
|
||||
#include <plib/plib.h>
|
||||
#include <assert.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
/**
|
||||
Assert expression e is true. Will be removed entirely in release mode.
|
||||
\hideinitializer
|
||||
*/
|
||||
#define OPC_ASSERT(e) assert(e)
|
||||
|
||||
/**
|
||||
Assert expression e is true. Expression will be executed in release mode too.
|
||||
\hideinitializer
|
||||
*/
|
||||
#ifdef NDEBUG
|
||||
#define OPC_ENSURE(e) (void)(e)
|
||||
#else
|
||||
#define OPC_ENSURE(e) assert(e)
|
||||
#endif
|
||||
|
||||
|
||||
/**
|
||||
Constant for boolean true.
|
||||
\hideinitializer
|
||||
*/
|
||||
#define OPC_TRUE (0==0)
|
||||
|
||||
/**
|
||||
Constant for boolean false.
|
||||
\hideinitializer
|
||||
*/
|
||||
#define OPC_FALSE (0==1)
|
||||
|
||||
/**
|
||||
Boolean type.
|
||||
\hideinitializer
|
||||
*/
|
||||
typedef pbool_t opc_bool_t;
|
||||
|
||||
/**
|
||||
Type which represents an offset in e.g. a file.
|
||||
\hideinitializer
|
||||
*/
|
||||
typedef pofs_t opc_ofs_t;
|
||||
|
||||
/**
|
||||
8-bit unsigned integer.
|
||||
\hideinitializer
|
||||
*/
|
||||
typedef puint8_t opc_uint8_t;
|
||||
|
||||
/**
|
||||
16-bit unsigned integer.
|
||||
\hideinitializer
|
||||
*/
|
||||
typedef puint16_t opc_uint16_t;
|
||||
|
||||
/**
|
||||
32-bit unsigned integer.
|
||||
\hideinitializer
|
||||
*/
|
||||
typedef puint32_t opc_uint32_t;
|
||||
|
||||
/**
|
||||
64-bit unsigned integer.
|
||||
\hideinitializer
|
||||
*/
|
||||
typedef puint64_t opc_uint64_t;
|
||||
|
||||
/**
|
||||
8-bit signed integer.
|
||||
\hideinitializer
|
||||
*/
|
||||
typedef pint8_t opc_int8_t;
|
||||
|
||||
/**
|
||||
16-bit signed integer.
|
||||
\hideinitializer
|
||||
*/
|
||||
typedef pint16_t opc_int16_t;
|
||||
|
||||
/**
|
||||
32-bit signed integer.
|
||||
\hideinitializer
|
||||
*/
|
||||
typedef pint32_t opc_int32_t;
|
||||
|
||||
/**
|
||||
64-bit signed integer.
|
||||
\hideinitializer
|
||||
*/
|
||||
typedef pint64_t opc_int64_t;
|
||||
|
||||
/**
|
||||
Default size fo the deflate buffer used by zlib.
|
||||
*/
|
||||
#define OPC_DEFLATE_BUFFER_SIZE 4096
|
||||
|
||||
/**
|
||||
Max system path len.
|
||||
*/
|
||||
#define OPC_MAX_PATH 512
|
||||
|
||||
/**
|
||||
Error codes for the OPC module.
|
||||
*/
|
||||
typedef enum OPC_ERROR_ENUM {
|
||||
OPC_ERROR_NONE,
|
||||
OPC_ERROR_STREAM,
|
||||
OPC_ERROR_SEEK, // can't seek
|
||||
OPC_ERROR_UNSUPPORTED_DATA_DESCRIPTOR,
|
||||
OPC_ERROR_UNSUPPORTED_COMPRESSION,
|
||||
OPC_ERROR_DEFLATE,
|
||||
OPC_ERROR_HEADER,
|
||||
OPC_ERROR_MEMORY,
|
||||
OPC_ERROR_XML,
|
||||
OPC_ERROR_USER // user triggered an abort
|
||||
} opc_error_t;
|
||||
|
||||
/**
|
||||
Compression options for OPC streams.
|
||||
*/
|
||||
typedef enum OPC_COMPRESSIONOPTION_ENUM {
|
||||
OPC_COMPRESSIONOPTION_NONE,
|
||||
OPC_COMPRESSIONOPTION_NORMAL,
|
||||
OPC_COMPRESSIONOPTION_MAXIMUM,
|
||||
OPC_COMPRESSIONOPTION_FAST,
|
||||
OPC_COMPRESSIONOPTION_SUPERFAST
|
||||
} opcCompressionOption_t;
|
||||
|
||||
|
||||
/**
|
||||
Helper for debug logs.
|
||||
\hideinitializer
|
||||
*/
|
||||
#define opc_logf printf
|
||||
|
||||
/**
|
||||
Abstraction for memset(m, 0, s).
|
||||
\hideinitializer
|
||||
*/
|
||||
#define opc_bzero_mem(m,s) memset(m, 0, s)
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* OPC_CONFIG_H */
|
||||
@@ -1,300 +0,0 @@
|
||||
/*
|
||||
Copyright (c) 2010, Florian Reuter
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Florian Reuter nor the names of its contributors
|
||||
may be used to endorse or promote products derived from this
|
||||
software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||
COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
||||
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
||||
OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
/** @file opc/container.h
|
||||
|
||||
The container.h module has the fundamental methods for dealing with ZIP-based OPC container.
|
||||
|
||||
OPC container can be opened in READ-ONLY mode, WRITE-ONLY mode, READ/WRITE mode, TEMPLATE mode and TRANSITION mode.
|
||||
The most notable mode is the READ/WRITE mode, which gives you concurrent stream-based READ and WRITE access to a
|
||||
single ZIP-based OPC container. This is achieved without the use of temporary files by taking advantage of the
|
||||
OPC specific “interleave” mode. \see http://standards.iso.org/ittf/PubliclyAvailableStandards/c051459_ISOIEC_29500-2_2008(E).zip
|
||||
|
||||
The TEMPLATE mode allows very fast customized "cloning" of ZIP-based OPC container by using "RAW access" to the ZIP streams.
|
||||
The TRANSITION mode is a special version of the TEMPLATE mode, which allows transition-based READ/WRITE access to the
|
||||
ZIP-based OPC container using a temporary file.
|
||||
|
||||
*/
|
||||
#include <opc/config.h>
|
||||
#include <opc/file.h>
|
||||
|
||||
#ifndef OPC_CONTAINER_H
|
||||
#define OPC_CONTAINER_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
/**
|
||||
Handle to an OPC container created by \ref opcContainerOpen.
|
||||
\see opcContainerOpen.
|
||||
*/
|
||||
typedef struct OPC_CONTAINER_STRUCT opcContainer;
|
||||
|
||||
/**
|
||||
Modes for opcContainerOpen();
|
||||
\see opcContainerOpen
|
||||
*/
|
||||
typedef enum {
|
||||
/**
|
||||
Opens the OPC container denoted by \a fileName in READ-ONLY mode. The \a destName parameter must be \a NULL.
|
||||
\hideinitializer
|
||||
*/
|
||||
OPC_OPEN_READ_ONLY=0,
|
||||
/**
|
||||
Opens the OPC container denoted by \a fileName in WRITE-ONLY mode. The \a destName parameter must be \a NULL.
|
||||
\hideinitializer
|
||||
*/
|
||||
OPC_OPEN_WRITE_ONLY=1,
|
||||
/**
|
||||
Opens the OPC container denoted by \a fileName in READ/WRITE mode. The \a destName parameter must be \a NULL.
|
||||
\hideinitializer
|
||||
*/
|
||||
OPC_OPEN_READ_WRITE=2,
|
||||
/**
|
||||
This mode will open the container denoted by \a fileName in READ-ONLY mode and the container denoted by
|
||||
\a destName in write-only mode. Any modifications will be written to the container denoted by \a destName
|
||||
and the unmodified streams from \a fileName will be written to \a destName on closing.
|
||||
\warning Currently not implemented.
|
||||
\hideinitializer
|
||||
*/
|
||||
OPC_OPEN_TEMPLATE=3,
|
||||
/**
|
||||
Like the OPC_OPEN_TEMPLATE mode, but the \a destName will be renamed to the \a fileName on closing. If \a destName
|
||||
is \a NULL, then the name of the temporary file will be generated automatically.
|
||||
\warning Currently not implemented.
|
||||
\hideinitializer
|
||||
*/
|
||||
OPC_OPEN_TRANSITION=4
|
||||
} opcContainerOpenMode;
|
||||
|
||||
/** Modes for opcContainerClose.
|
||||
\see opcContainerClose.
|
||||
*/
|
||||
typedef enum {
|
||||
/**
|
||||
Close the OPC container without any further postprocessing.
|
||||
\hideinitializer
|
||||
*/
|
||||
OPC_CLOSE_NOW = 0,
|
||||
/**
|
||||
Close the OPC container and trim the file by removing unused fragments like e.g.
|
||||
deleted parts.
|
||||
\hideinitializer
|
||||
*/
|
||||
OPC_CLOSE_TRIM = 1,
|
||||
/**
|
||||
Close the OPC container like in \a OPC_CLOSE_TRIM mode, but additionally remove any
|
||||
"interleaved" parts by reordering them.
|
||||
\warning Currently not implemented. Same semantic as OPC_CLOSE_TRIM.
|
||||
\hideinitializer
|
||||
*/
|
||||
OPC_CLOSE_DEFRAG = 2
|
||||
} opcContainerCloseMode;
|
||||
|
||||
/**
|
||||
Opens a ZIP-based OPC container.
|
||||
@param[in] fileName. For more details see \ref opcContainerOpenMode.
|
||||
@param[in] mode. For more details see \ref opcContainerOpenMode.
|
||||
@param[in] userContext. Will not be modified by libopc. Can be used to e.g. store the "this" pointer for C++ bindings.
|
||||
@param[in] destName. For more details see \ref opcContainerOpenMode.
|
||||
@return \a NULL if failed.
|
||||
\see opcContainerOpenMode
|
||||
\see opcContainerDump
|
||||
*/
|
||||
opcContainer* opcContainerOpen(const xmlChar *fileName,
|
||||
opcContainerOpenMode mode,
|
||||
void *userContext,
|
||||
const xmlChar *destName);
|
||||
|
||||
/**
|
||||
Opens a ZIP-based OPC container from memory.
|
||||
@param[in] data.
|
||||
@param[in] data_len.
|
||||
@param[in] userContext. Will not be modified by libopc. Can be used to e.g. store the "this" pointer for C++ bindings.
|
||||
@param[in] mode. For more details see \ref opcContainerOpenMode.
|
||||
@return \a NULL if failed.
|
||||
*/
|
||||
opcContainer* opcContainerOpenMem(const opc_uint8_t *data, opc_uint32_t data_len,
|
||||
opcContainerOpenMode mode,
|
||||
void *userContext);
|
||||
|
||||
/**
|
||||
Opens a ZIP-based OPC container from memory.
|
||||
@param[in] ioread.
|
||||
@param[in] iowrite.
|
||||
@param[in] ioclose.
|
||||
@param[in] ioseek.
|
||||
@param[in] iotrim.
|
||||
@param[in] ioflush.
|
||||
@param[in] iocontext.
|
||||
@param[in] file_size.
|
||||
@param[in] userContext. Will not be modified by libopc. Can be used to e.g. store the "this" pointer for C++ bindings.
|
||||
@param[in] mode. For more details see \ref opcContainerOpenMode.
|
||||
@return \a NULL if failed.
|
||||
*/
|
||||
opcContainer* opcContainerOpenIO(opcFileReadCallback *ioread,
|
||||
opcFileWriteCallback *iowrite,
|
||||
opcFileCloseCallback *ioclose,
|
||||
opcFileSeekCallback *ioseek,
|
||||
opcFileTrimCallback *iotrim,
|
||||
opcFileFlushCallback *ioflush,
|
||||
void *iocontext,
|
||||
pofs_t file_size,
|
||||
opcContainerOpenMode mode,
|
||||
void *userContext);
|
||||
|
||||
/**
|
||||
Close an OPC container.
|
||||
@param[in] c. \ref opcContainer openered by \ref opcContainerOpen.
|
||||
@param[in] mode. For more information see \ref opcContainerCloseMode.
|
||||
@return Non-zero if successful.
|
||||
\see opcContainerOpen
|
||||
\see opcContainerCloseMode
|
||||
*/
|
||||
opc_error_t opcContainerClose(opcContainer *c, opcContainerCloseMode mode);
|
||||
|
||||
/**
|
||||
Returns the unmodified user context passed to \ref opcContainerOpen.
|
||||
\see opcContainerOpen
|
||||
*/
|
||||
void *opcContainerGetUserContext(opcContainer *c);
|
||||
|
||||
/**
|
||||
List all types, relations and parts of the container \a c to \a out.
|
||||
\par Sample:
|
||||
\include opc_dump.c
|
||||
*/
|
||||
opc_error_t opcContainerDump(opcContainer *c, FILE *out);
|
||||
|
||||
/**
|
||||
Exports the OPC container to "Flat OPC" (http://blogs.msdn.com/b/ericwhite/archive/2008/09/29/the-flat-opc-format.aspx).
|
||||
The flat versions of an OPC file are very important when dealing with e.g XSL(T)-based or Javascript-based transformations.
|
||||
\see opcContainerFlatImport.
|
||||
\todo Implementation needed.
|
||||
*/
|
||||
int opcContainerFlatExport(opcContainer *c, const xmlChar *fileName);
|
||||
|
||||
/**
|
||||
Imports the flat version of an OPC container.
|
||||
\see opcContainerFlatExport.
|
||||
\todo Implementation needed.
|
||||
*/
|
||||
int opcContainerFlatImport(opcContainer *c, const xmlChar *fileName);
|
||||
|
||||
/**
|
||||
Iterate all types.
|
||||
\code
|
||||
for(xmlChar *type=opcContentTypeFirst(c);
|
||||
NULL!=type;
|
||||
type=opcContentTypeNext(c, type)) {
|
||||
printf("%s\n", type);
|
||||
}
|
||||
\endcode
|
||||
*/
|
||||
const xmlChar *opcContentTypeFirst(opcContainer *container);
|
||||
|
||||
/**
|
||||
\see opcContentTypeNext()
|
||||
*/
|
||||
const xmlChar *opcContentTypeNext(opcContainer *container, const xmlChar *type);
|
||||
|
||||
/**
|
||||
Iterate extensions.
|
||||
\code
|
||||
for(const xmlChar *ext=opcExtensionFirst(c);
|
||||
NULL!=ext;
|
||||
ext=opcExtensionNext(ext)) {
|
||||
printf("%s\n", ext);
|
||||
}
|
||||
\endcode
|
||||
*/
|
||||
const xmlChar *opcExtensionFirst(opcContainer *container);
|
||||
|
||||
/**
|
||||
\see opcExtensionFirst()
|
||||
*/
|
||||
const xmlChar *opcExtensionNext(opcContainer *container, const xmlChar *ext);
|
||||
|
||||
/**
|
||||
Get registered type for extension.
|
||||
\see opcExtensionRegister()
|
||||
*/
|
||||
const xmlChar *opcExtensionGetType(opcContainer *container, const xmlChar *ext);
|
||||
|
||||
/**
|
||||
Register a mime-type and and extension.
|
||||
\see opcExtensionGetType()
|
||||
*/
|
||||
const xmlChar *opcExtensionRegister(opcContainer *container, const xmlChar *ext, const xmlChar *type);
|
||||
|
||||
|
||||
/**
|
||||
Iterator through all relation types of the container:
|
||||
\code
|
||||
for(xmlChar *type=opcRelationTypeFirst(c);
|
||||
NULL!=type;
|
||||
type=opcRelationTypeNext(c, type)) {
|
||||
printf("%s\n", type);
|
||||
}
|
||||
\endcode
|
||||
*/
|
||||
const xmlChar *opcRelationTypeFirst(opcContainer *container);
|
||||
|
||||
/**
|
||||
\see opcRelationTypeFirst()
|
||||
*/
|
||||
const xmlChar *opcRelationTypeNext(opcContainer *container, const xmlChar *type);
|
||||
|
||||
|
||||
/**
|
||||
Iterator through all relation types of the container:
|
||||
\code
|
||||
for(xmlChar *target=opcExternalTargetFirst(c);
|
||||
NULL!=target;
|
||||
type=opcExternalTargetNext(c, target)) {
|
||||
printf("%s\n", target);
|
||||
}
|
||||
\endcode
|
||||
*/
|
||||
const xmlChar *opcExternalTargetFirst(opcContainer *container);
|
||||
|
||||
/**
|
||||
\see opcExternalTargetFirst()
|
||||
*/
|
||||
const xmlChar *opcExternalTargetNext(opcContainer *container, const xmlChar *target);
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* OPC_CONTAINER_H */
|
||||
@@ -1,200 +0,0 @@
|
||||
/*
|
||||
Copyright (c) 2010, Florian Reuter
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Florian Reuter nor the names of its contributors
|
||||
may be used to endorse or promote products derived from this
|
||||
software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||
COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
||||
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
||||
OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
/** @file opc/file.h
|
||||
The opc module contains the file library functions.
|
||||
*/
|
||||
#include <opc/config.h>
|
||||
|
||||
#ifndef OPC_FILE_H
|
||||
#define OPC_FILE_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/**
|
||||
Flag for READ access.
|
||||
\hideinitializer
|
||||
*/
|
||||
#define OPC_FILE_READ (1<<0)
|
||||
|
||||
/**
|
||||
Flag for WRITE access.
|
||||
\hideinitializer
|
||||
*/
|
||||
#define OPC_FILE_WRITE (1<<1)
|
||||
|
||||
/**
|
||||
Flag indicates that file will be truncated when opened.
|
||||
\hideinitializer
|
||||
*/
|
||||
#define OPC_FILE_TRUNC (1<<2)
|
||||
|
||||
|
||||
/**
|
||||
Abstraction for see modes.
|
||||
*/
|
||||
typedef enum OPC_FILESEEKMODE_ENUM {
|
||||
opcFileSeekSet = SEEK_SET,
|
||||
opcFileSeekCur = SEEK_CUR,
|
||||
opcFileSeekEnd = SEEK_END
|
||||
} opcFileSeekMode;
|
||||
|
||||
/**
|
||||
Callback to read a file. E.g. for a FILE * context this can be implemented as
|
||||
\code
|
||||
static int opcFileRead(void *iocontext, char *buffer, int len) {
|
||||
return fread(buffer, sizeof(char), len, (FILE*)iocontext);
|
||||
}
|
||||
\endcode
|
||||
*/
|
||||
typedef int opcFileReadCallback(void *iocontext, char *buffer, int len);
|
||||
|
||||
/**
|
||||
Callback to write a file. E.g. for a FILE * context this can be implemented as
|
||||
\code
|
||||
static int opcFileWrite(void *iocontext, const char *buffer, int len) {
|
||||
return fwrite(buffer, sizeof(char), len, (FILE*)iocontext);
|
||||
}
|
||||
\endcode
|
||||
*/
|
||||
typedef int opcFileWriteCallback(void *iocontext, const char *buffer, int len);
|
||||
|
||||
/**
|
||||
Callback to close a file. E.g. for a FILE * context this can be implemented as
|
||||
\code
|
||||
static int opcFileClose(void *iocontext) {
|
||||
return fclose((FILE*)iocontext);
|
||||
}
|
||||
\endcode
|
||||
*/
|
||||
typedef int opcFileCloseCallback(void *iocontext);
|
||||
|
||||
/**
|
||||
Callback to seek a file. E.g. for a FILE * context this can be implemented as
|
||||
\code
|
||||
static opc_ofs_t opcFileSeek(void *iocontext, opc_ofs_t ofs) {
|
||||
int ret=fseek((FILE*)iocontext, ofs, SEEK_SET);
|
||||
if (ret>=0) {
|
||||
return ftell((FILE*)iocontext);
|
||||
} else {
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
\endcode
|
||||
*/
|
||||
typedef opc_ofs_t opcFileSeekCallback(void *iocontext, opc_ofs_t ofs);
|
||||
|
||||
/**
|
||||
Callback to trim a file. E.g. for a FILE * context this can be implemented as
|
||||
\code
|
||||
static int opcFileTrim(void *iocontext, opc_ofs_t new_size) {
|
||||
#ifdef WIN32
|
||||
return _chsize(fileno((FILE*)iocontext), new_size);
|
||||
#else
|
||||
return ftruncate(fileno((FILE*)iocontext), new_size);
|
||||
#endif
|
||||
}
|
||||
\endcode
|
||||
*/
|
||||
typedef int opcFileTrimCallback(void *iocontext, opc_ofs_t new_size);
|
||||
|
||||
/**
|
||||
Callback to flush a file. E.g. for a FILE * context this can be implemented as
|
||||
\code
|
||||
static int opcFileFlush(void *iocontext) {
|
||||
return fflush((FILE*)iocontext);
|
||||
}
|
||||
\endcode
|
||||
*/
|
||||
typedef int opcFileFlushCallback(void *iocontext);
|
||||
|
||||
/**
|
||||
Represents a state of a file, i.e. file position (buf_pos) and error status (err).
|
||||
*/
|
||||
typedef struct OPC_FILERAWSTATE_STRUCT {
|
||||
opc_error_t err;
|
||||
opc_ofs_t buf_pos; // current pos in file
|
||||
} opcFileRawState;
|
||||
|
||||
/**
|
||||
File IO context.
|
||||
*/
|
||||
typedef struct OPC_IO_STRUCT {
|
||||
opcFileReadCallback *_ioread;
|
||||
opcFileWriteCallback *_iowrite;
|
||||
opcFileCloseCallback *_ioclose;
|
||||
opcFileSeekCallback *_ioseek;
|
||||
opcFileTrimCallback *_iotrim;
|
||||
opcFileFlushCallback *_ioflush;
|
||||
void *iocontext;
|
||||
int flags;
|
||||
opcFileRawState state;
|
||||
opc_ofs_t file_size;
|
||||
} opcIO_t;
|
||||
|
||||
/**
|
||||
Initialize an IO context.
|
||||
*/
|
||||
opc_error_t opcFileInitIO(opcIO_t *io,
|
||||
opcFileReadCallback *ioread,
|
||||
opcFileWriteCallback *iowrite,
|
||||
opcFileCloseCallback *ioclose,
|
||||
opcFileSeekCallback *ioseek,
|
||||
opcFileTrimCallback *iotrim,
|
||||
opcFileFlushCallback *ioflush,
|
||||
void *iocontext,
|
||||
pofs_t file_size,
|
||||
int flags);
|
||||
|
||||
/**
|
||||
Initialize an IO context for a file.
|
||||
*/
|
||||
opc_error_t opcFileInitIOFile(opcIO_t *io, const xmlChar *filename, int flags);
|
||||
|
||||
/**
|
||||
Initialize an IO for memory.
|
||||
\warning Currently supports READ-ONLY file access.
|
||||
*/
|
||||
opc_error_t opcFileInitIOMemory(opcIO_t *io, const opc_uint8_t *data, opc_uint32_t data_len, int flags);
|
||||
|
||||
/**
|
||||
Cleanup an IO context, i.e. release all system resources.
|
||||
*/
|
||||
opc_error_t opcFileCleanupIO(opcIO_t *io);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* OPC_FILE_H */
|
||||
@@ -1,60 +0,0 @@
|
||||
/*
|
||||
Copyright (c) 2010, Florian Reuter
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Florian Reuter nor the names of its contributors
|
||||
may be used to endorse or promote products derived from this
|
||||
software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||
COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
||||
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
||||
OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
/** @file opc/helper.h
|
||||
Contains helper functions for the opc module.
|
||||
*/
|
||||
#include <opc/config.h>
|
||||
|
||||
#ifndef OPC_HELPER_H
|
||||
#define OPC_HELPER_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
/**
|
||||
Constructs a segment name.
|
||||
*/
|
||||
opc_uint16_t opcHelperAssembleSegmentName(char *out, opc_uint16_t out_size, const xmlChar *name, opc_uint32_t segment_number, opc_uint32_t next_segment_id, opc_bool_t rels_segment, opc_uint16_t *out_max);
|
||||
|
||||
/**
|
||||
Splits a filename into the segment informations.
|
||||
*/
|
||||
opc_error_t opcHelperSplitFilename(opc_uint8_t *filename, opc_uint32_t filename_length, opc_uint32_t *segment_number, opc_bool_t *last_segment, opc_bool_t *rel_segment);
|
||||
|
||||
#endif /* OPC_HELPER_H */
|
||||
@@ -1,74 +0,0 @@
|
||||
/*
|
||||
Copyright (c) 2010, Florian Reuter
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Florian Reuter nor the names of its contributors
|
||||
may be used to endorse or promote products derived from this
|
||||
software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||
COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
||||
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
||||
OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
/** @file opc/inputstream.h
|
||||
|
||||
*/
|
||||
#include <opc/config.h>
|
||||
|
||||
#ifndef OPC_INPUTSTREAM_H
|
||||
#define OPC_INPUTSTREAM_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
/**
|
||||
Internal type which represents a binary input stream.
|
||||
*/
|
||||
typedef struct OPC_CONTAINER_INPUTSTREAM_STRUCT opcContainerInputStream;
|
||||
|
||||
/**
|
||||
Opens the part \c name of the \c container for reading.
|
||||
*/
|
||||
opcContainerInputStream* opcContainerOpenInputStream(opcContainer *container, const xmlChar *name);
|
||||
|
||||
/**
|
||||
Reads maximal \c buffer_len bytes from the input \c stream to \c buffer.
|
||||
\return The number of byes read or "0" in case of an error or end-of-stream.
|
||||
*/
|
||||
opc_uint32_t opcContainerReadInputStream(opcContainerInputStream* stream, opc_uint8_t *buffer, opc_uint32_t buffer_len);
|
||||
|
||||
/**
|
||||
Closes the input stream and releases all system resources.
|
||||
*/
|
||||
opc_error_t opcContainerCloseInputStream(opcContainerInputStream* stream);
|
||||
|
||||
/**
|
||||
Returns the type of compression used for the stream.
|
||||
*/
|
||||
opcCompressionOption_t opcContainerGetInputStreamCompressionOption(opcContainerInputStream* stream);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* OPC_INPUTSTREAM_H */
|
||||
@@ -1,73 +0,0 @@
|
||||
/*
|
||||
Copyright (c) 2010, Florian Reuter
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Florian Reuter nor the names of its contributors
|
||||
may be used to endorse or promote products derived from this
|
||||
software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||
COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
||||
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
||||
OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
/** @file opc/opc.h
|
||||
The opc module contains the basic library functions.
|
||||
*/
|
||||
#include <opc/config.h>
|
||||
#include <opc/container.h>
|
||||
#include <opc/part.h>
|
||||
#include <opc/relation.h>
|
||||
#include <opc/inputstream.h>
|
||||
#include <opc/outputstream.h>
|
||||
#include <opc/zip.h>
|
||||
#include <opc/xmlreader.h>
|
||||
#include <opc/xmlwriter.h>
|
||||
#include <opc/properties.h>
|
||||
|
||||
#ifndef OPC_OPC_H
|
||||
#define OPC_OPC_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Initialize libopc.
|
||||
* Sample:
|
||||
* \include opc_helloworld.c
|
||||
* @return Non-zero if successful.
|
||||
*/
|
||||
opc_error_t opcInitLibrary();
|
||||
|
||||
/**
|
||||
* Free libopc. Clean up all resources.
|
||||
* @return Non-zero if successful.
|
||||
* \see opcInitLibrary.
|
||||
*/
|
||||
opc_error_t opcFreeLibrary();
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* OPC_OPC_H */
|
||||
@@ -1,71 +0,0 @@
|
||||
/*
|
||||
Copyright (c) 2010, Florian Reuter
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Florian Reuter nor the names of its contributors
|
||||
may be used to endorse or promote products derived from this
|
||||
software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||
COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
||||
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
||||
OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
/** @file opc/outputstream.h
|
||||
|
||||
*/
|
||||
#include <opc/config.h>
|
||||
|
||||
#ifndef OPC_OUTPUTSTREAM_H
|
||||
#define OPC_OUTPUTSTREAM_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
/**
|
||||
Internal type which represents a binary output stream.
|
||||
*/
|
||||
typedef struct OPC_CONTAINER_OUTPUTSTREAM_STRUCT opcContainerOutputStream;
|
||||
|
||||
/**
|
||||
Open the part \c name or writing in \c container with compression \c compression_option.
|
||||
\note Make sure the part exists!
|
||||
\see opcPartCreate.
|
||||
*/
|
||||
opcContainerOutputStream* opcContainerCreateOutputStream(opcContainer *container, const xmlChar *name, opcCompressionOption_t compression_option);
|
||||
|
||||
/**
|
||||
Write \c buffer_len bytes from \c buffer to \c stream.
|
||||
\return Returns the number of bytes written.
|
||||
*/
|
||||
opc_uint32_t opcContainerWriteOutputStream(opcContainerOutputStream* stream, const opc_uint8_t *buffer, opc_uint32_t buffer_len);
|
||||
|
||||
/**
|
||||
Close the \c stream and free all associated resources.
|
||||
*/
|
||||
opc_error_t opcContainerCloseOutputStream(opcContainerOutputStream* stream);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* OPC_OUTPUTSTREAM_H */
|
||||
@@ -1,118 +0,0 @@
|
||||
/*
|
||||
Copyright (c) 2010, Florian Reuter
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Florian Reuter nor the names of its contributors
|
||||
may be used to endorse or promote products derived from this
|
||||
software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||
COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
||||
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
||||
OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
/** @file opc/part.h
|
||||
|
||||
*/
|
||||
#include <opc/config.h>
|
||||
|
||||
#ifndef OPC_PART_H
|
||||
#define OPC_PART_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
/**
|
||||
Handle to an OPC part created by \ref opcPartOpen.
|
||||
\see opcPartOpen.
|
||||
*/
|
||||
typedef xmlChar* opcPart;
|
||||
|
||||
/**
|
||||
Represents an invalid (resp. NULL) part.
|
||||
In releations OPC_PART_INVALID also represents the root part.
|
||||
\hideinitializer
|
||||
*/
|
||||
#define OPC_PART_INVALID NULL
|
||||
|
||||
/**
|
||||
Find a part in a \ container by \c absolutePath and/or \c type.
|
||||
Currently no flags are supported.
|
||||
*/
|
||||
opcPart opcPartFind(opcContainer *container,
|
||||
const xmlChar *absolutePath,
|
||||
const xmlChar *type,
|
||||
int flags);
|
||||
|
||||
/**
|
||||
Creates a part in a \ container with \c absolutePath and \c type.
|
||||
Currently no flags are supported.
|
||||
*/
|
||||
opcPart opcPartCreate(opcContainer *container,
|
||||
const xmlChar *absolutePath,
|
||||
const xmlChar *type,
|
||||
int flags);
|
||||
|
||||
/**
|
||||
Returns the type of the container.
|
||||
The string is interned and must not be freed.
|
||||
*/
|
||||
const xmlChar *opcPartGetType(opcContainer *c, opcPart part);
|
||||
|
||||
/**
|
||||
Returns the type of the container.
|
||||
If \c override_only then the return value will be NULL for parts not having an override type.
|
||||
The string is interned and must not be freed.
|
||||
*/
|
||||
const xmlChar *opcPartGetTypeEx(opcContainer *c, opcPart part, opc_bool_t override_only);
|
||||
|
||||
/**
|
||||
Deleted that part \c absolutePath in the \c container.
|
||||
*/
|
||||
opc_error_t opcPartDelete(opcContainer *container, const xmlChar *absolutePath);
|
||||
|
||||
/**
|
||||
Get the first part.
|
||||
\code
|
||||
for(opcPart part=opcPartGetFirst(c);OPC_PART_INVALID!=part;part=opcPartGetNext(c, part)) {
|
||||
printf("%s; \n", part, opcPartGetType(c, part));
|
||||
}
|
||||
\endcode
|
||||
*/
|
||||
opcPart opcPartGetFirst(opcContainer *container);
|
||||
|
||||
/**
|
||||
Get the next part.
|
||||
\see opcPartGetFirst
|
||||
*/
|
||||
opcPart opcPartGetNext(opcContainer *container, opcPart part);
|
||||
|
||||
/**
|
||||
Returns the size in bytes of the \c part.
|
||||
*/
|
||||
opc_ofs_t opcPartGetSize(opcContainer *c, opcPart part);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* OPC_PART_H */
|
||||
@@ -1,121 +0,0 @@
|
||||
/*
|
||||
Copyright (c) 2010, Florian Reuter
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Florian Reuter nor the names of its contributors
|
||||
may be used to endorse or promote products derived from this
|
||||
software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||
COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
||||
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
||||
OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
/** @file opc/properties.h
|
||||
|
||||
*/
|
||||
#include <opc/config.h>
|
||||
#include <opc/container.h>
|
||||
|
||||
#ifndef OPC_PROPERTIES_H
|
||||
#define OPC_PROPERTIES_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/**
|
||||
Represents a simple Dublin Core type.
|
||||
*/
|
||||
typedef struct OPC_DC_SIMPLE_TYPE {
|
||||
xmlChar *str;
|
||||
xmlChar *lang;
|
||||
} opcDCSimpleType_t;
|
||||
|
||||
/**
|
||||
Represents the core properties of an OPC container.
|
||||
*/
|
||||
typedef struct OPC_PROPERTIES_STRUCT {
|
||||
xmlChar *category; /* xsd:string */
|
||||
xmlChar *contentStatus; /* xsd:string */
|
||||
xmlChar *created; /* dc:date */
|
||||
opcDCSimpleType_t creator; /* dc:any */
|
||||
opcDCSimpleType_t description; /* dc:any */
|
||||
opcDCSimpleType_t identifier; /* dc:any */
|
||||
opcDCSimpleType_t *keyword_array; /* cp:CT_Keywords */
|
||||
opc_uint32_t keyword_items;
|
||||
opcDCSimpleType_t language; /* dc:any */
|
||||
xmlChar *lastModifiedBy; /* xsd:string */
|
||||
xmlChar *lastPrinted; /* xsd:dateTime */
|
||||
xmlChar *modified; /* dc:date */
|
||||
xmlChar *revision; /* xsd:string */
|
||||
opcDCSimpleType_t subject; /* dc:any */
|
||||
opcDCSimpleType_t title; /* dc:any */
|
||||
xmlChar *version; /* xsd:string */
|
||||
} opcProperties_t;
|
||||
|
||||
/**
|
||||
Initialize the core properties \c cp.
|
||||
\see opcCorePropertiesSetString
|
||||
*/
|
||||
opc_error_t opcCorePropertiesInit(opcProperties_t *cp);
|
||||
|
||||
/**
|
||||
Cleanup the core properties \c cp, i.e. release all resources.
|
||||
\see opcCorePropertiesSetString
|
||||
*/
|
||||
opc_error_t opcCorePropertiesCleanup(opcProperties_t *cp);
|
||||
|
||||
/**
|
||||
Rease the core properties \c cp from the container \c.
|
||||
*/
|
||||
opc_error_t opcCorePropertiesRead(opcProperties_t *cp, opcContainer *c);
|
||||
|
||||
|
||||
/**
|
||||
Write/Update the core properties \c cp in the container \c.
|
||||
*/
|
||||
opc_error_t opcCorePropertiesWrite(opcProperties_t *cp, opcContainer *c);
|
||||
|
||||
/**
|
||||
Update a string in the core properties the right way.
|
||||
\code
|
||||
opcProperties_t cp;
|
||||
opcCorePropertiesInit(&cp);
|
||||
opcCorePropertiesSetString(&cp.revision, "1");
|
||||
opcCorePropertiesSetStringLang(&cp.creator, "Florian Reuter", NULL);
|
||||
opcCorePropertiesCleanup(&cp);
|
||||
\endcode
|
||||
*/
|
||||
opc_error_t opcCorePropertiesSetString(xmlChar **prop, const xmlChar *str);
|
||||
|
||||
/**
|
||||
Update a core properties the right way.
|
||||
\see opcCorePropertiesSetString
|
||||
*/
|
||||
opc_error_t opcCorePropertiesSetStringLang(opcDCSimpleType_t *prop, const xmlChar *str, const xmlChar *lang);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* OPC_PROPERTIES_H */
|
||||
@@ -1,140 +0,0 @@
|
||||
/*
|
||||
Copyright (c) 2010, Florian Reuter
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Florian Reuter nor the names of its contributors
|
||||
may be used to endorse or promote products derived from this
|
||||
software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||
COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
||||
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
||||
OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
/** @file opc/relation.h
|
||||
|
||||
*/
|
||||
#include <opc/config.h>
|
||||
|
||||
#ifndef OPC_RELATION_H
|
||||
#define OPC_RELATION_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/**
|
||||
Indentifier for an OPC relation.
|
||||
*/
|
||||
typedef opc_uint32_t opcRelation;
|
||||
|
||||
/**
|
||||
Constant which represents an invalid relation.
|
||||
*/
|
||||
#define OPC_RELATION_INVALID (-1)
|
||||
|
||||
/**
|
||||
Find a relation originating from \c part in \c container with \c relationId and/or \c mimeType.
|
||||
If \c part is OPC_PART_INVALID then part represents the root part.
|
||||
@param[in] relationId The relationId (e.g. "rId1") or NULL.
|
||||
@param[in] mimeType The mimeType or NULL.
|
||||
*/
|
||||
opcRelation opcRelationFind(opcContainer *container, opcPart part, const xmlChar *relationId, const xmlChar *mimeType);
|
||||
|
||||
/**
|
||||
Deleted the relation from the container.
|
||||
\see opcRelationFind.
|
||||
*/
|
||||
opc_error_t opcRelationDelete(opcContainer *container, opcPart part, const xmlChar *relationId, const xmlChar *mimeType);
|
||||
|
||||
/**
|
||||
Returns the first relation.
|
||||
The following code will dump all relations:
|
||||
\code
|
||||
for(opcPart part=opcPartGetFirst(c);OPC_PART_INVALID!=part;part=opcPartGetNext(c, part)) {
|
||||
for(opcRelation rel=opcRelationFirst(part, c);
|
||||
OPC_PART_INVALID!=rel;
|
||||
rel=opcRelationNext(c, rel)) {
|
||||
opcPart internal_target=opcRelationGetInternalTarget(c, part, rel);
|
||||
const xmlChar *external_target=opcRelationGetExternalTarget(c, part, rel);
|
||||
const xmlChar *target=(NULL!=internal_target?internal_target:external_target);
|
||||
const xmlChar *prefix=NULL;
|
||||
opc_uint32_t counter=-1;
|
||||
const xmlChar *type=NULL;
|
||||
opcRelationGetInformation(c, part, rel, &prefix, &counter, &type);
|
||||
if (-1==counter) { // no counter after prefix
|
||||
printf("%s;%s;%s;%s\n", part, prefix, target, type);
|
||||
} else {
|
||||
printf("%s;%s%i;%s;%s\n", part, prefix, counter, target, type);
|
||||
}
|
||||
}
|
||||
}
|
||||
\endcode
|
||||
*/
|
||||
opcRelation opcRelationFirst(opcContainer *container, opcPart part);
|
||||
|
||||
/**
|
||||
\see opcRelationFirst
|
||||
*/
|
||||
opcRelation opcRelationNext(opcContainer *container, opcPart part, opcRelation relation);
|
||||
|
||||
/**
|
||||
Returns the internal target.
|
||||
\note To test for an external target use opcRelationGetExternalTarget.
|
||||
\see opcRelationGetExternalTarget
|
||||
*/
|
||||
opcPart opcRelationGetInternalTarget(opcContainer *container, opcPart part, opcRelation relation);
|
||||
|
||||
/**
|
||||
Returns the external target or NULL if it is an internal target.
|
||||
The string is interned. Must not be freed.
|
||||
\see opcRelationGetExternalTarget
|
||||
*/
|
||||
const xmlChar *opcRelationGetExternalTarget(opcContainer *container, opcPart part, opcRelation relation);
|
||||
|
||||
/**
|
||||
Returns the relations type.
|
||||
The string is interned. Must not be freed.
|
||||
*/
|
||||
const xmlChar *opcRelationGetType(opcContainer *container, opcPart part, opcRelation relation);
|
||||
|
||||
/**
|
||||
Get information about a relation.
|
||||
\see opcRelationFirst
|
||||
*/
|
||||
void opcRelationGetInformation(opcContainer *container, opcPart part, opcRelation relation, const xmlChar **prefix, opc_uint32_t *counter, const xmlChar **type);
|
||||
|
||||
/**
|
||||
Add a relation to \c container from \c src part to \c dest part with id \c rid and type \c type.
|
||||
*/
|
||||
opc_uint32_t opcRelationAdd(opcContainer *container, opcPart src, const xmlChar *rid, opcPart dest, const xmlChar *type);
|
||||
|
||||
/**
|
||||
Add an external relation to \c container from \c src part to \c target URL with id \c rid and type \c type.
|
||||
*/
|
||||
opc_uint32_t opcRelationAddExternal(opcContainer *container, opcPart src, const xmlChar *rid, const xmlChar *target, const xmlChar *type);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* OPC_RELATION_H */
|
||||
@@ -1,69 +0,0 @@
|
||||
/*
|
||||
Copyright (c) 2010, Florian Reuter
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Florian Reuter nor the names of its contributors
|
||||
may be used to endorse or promote products derived from this
|
||||
software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||
COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
||||
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
||||
OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
/** @file opc/xmlreader.h
|
||||
|
||||
*/
|
||||
|
||||
#ifndef OPC_XMLREADER_H
|
||||
#define OPC_XMLREADER_H
|
||||
|
||||
#include <opc/config.h>
|
||||
#include <libxml/xmlreader.h>
|
||||
#include <mce/textreader.h>
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/**
|
||||
Open an MCE reader for \c partName. Parameters \c URL, \c encoding and \c options will be passed unmodified to
|
||||
http://xmlsoft.org/html/libxml-xmlreader.html#xmlReaderForIO and they can we NULL, NULL, 0.
|
||||
\note Make sure the part exists.
|
||||
\see opcPartFind
|
||||
*/
|
||||
opc_error_t opcXmlReaderOpen(opcContainer *container, mceTextReader_t *mceTextReader, const xmlChar *partName, const char * URL, const char * encoding, int options);
|
||||
|
||||
/**
|
||||
Returns an libxml DOM document. Parameters \c URL, \c encoding and \c options will be passed unmodified to
|
||||
http://xmlsoft.org/html/libxml-parser.html#xmlReadIO and they can we NULL, NULL, 0.
|
||||
\note Make sure the part exists.
|
||||
\see opcPartFind
|
||||
*/
|
||||
xmlDocPtr opcXmlReaderReadDoc(opcContainer *container, const xmlChar *partName, const char * URL, const char * encoding, int options);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* OPC_XMLREADER_H */
|
||||
@@ -1,57 +0,0 @@
|
||||
/*
|
||||
Copyright (c) 2010, Florian Reuter
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Florian Reuter nor the names of its contributors
|
||||
may be used to endorse or promote products derived from this
|
||||
software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||
COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
||||
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
||||
OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
/** @file opc/xmlwriter.h
|
||||
|
||||
*/
|
||||
#include <opc/config.h>
|
||||
#include <mce/textwriter.h>
|
||||
|
||||
#ifndef OPC_XMLWRITER_H
|
||||
#define OPC_XMLWRITER_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/**
|
||||
Create an MCE text writer for \c part in \c container with compression \c compression_option.
|
||||
\note Make sure the part exists.
|
||||
\see opcPartFind
|
||||
*/
|
||||
mceTextWriter *mceTextWriterOpen(opcContainer *c, opcPart part, opcCompressionOption_t compression_option);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* OPC_XMLWRITER_H */
|
||||
@@ -1,255 +0,0 @@
|
||||
/*
|
||||
Copyright (c) 2010, Florian Reuter
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Florian Reuter nor the names of its contributors
|
||||
may be used to endorse or promote products derived from this
|
||||
software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||
COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
||||
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
||||
OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
/** @file opc/zip.h
|
||||
The ZIP file backend of an OPC container.
|
||||
*/
|
||||
#include <opc/config.h>
|
||||
#include <opc/file.h>
|
||||
#include <opc/container.h>
|
||||
|
||||
#ifndef OPC_ZIP_H
|
||||
#define OPC_ZIP_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/**
|
||||
Default growth hint of an OPC stream.
|
||||
*/
|
||||
#define OPC_DEFAULT_GROWTH_HINT 512
|
||||
|
||||
/**
|
||||
Handle to a ZIP archive.
|
||||
\see internal.h
|
||||
*/
|
||||
typedef struct OPC_ZIP_STRUCT opcZip;
|
||||
|
||||
/**
|
||||
Handle to a raw ZIP input stream.
|
||||
\see internal.h
|
||||
*/
|
||||
typedef struct OPC_ZIPINPUTSTREAM_STRUCT opcZipInputStream;
|
||||
|
||||
/**
|
||||
Handle to a raw ZIP output stream.
|
||||
\see internal.h
|
||||
*/
|
||||
typedef struct OPC_ZIPOUTPUTSTREAM_STRUCT opcZipOutputStream;
|
||||
|
||||
/**
|
||||
Holds all information of a ZIP segment.
|
||||
*/
|
||||
typedef struct OPC_ZIP_SEGMENT_INFO_STRUCT {
|
||||
xmlChar name[OPC_MAX_PATH];
|
||||
opc_uint32_t name_len;
|
||||
opc_uint32_t segment_number;
|
||||
opc_bool_t last_segment;
|
||||
opc_bool_t rels_segment;
|
||||
opc_uint32_t header_size;
|
||||
opc_uint32_t min_header_size;
|
||||
opc_uint32_t trailing_bytes;
|
||||
opc_uint32_t compressed_size;
|
||||
opc_uint32_t uncompressed_size;
|
||||
opc_uint16_t bit_flag;
|
||||
opc_uint32_t data_crc;
|
||||
opc_uint16_t compression_method;
|
||||
opc_ofs_t stream_ofs;
|
||||
opc_uint16_t growth_hint;
|
||||
} opcZipSegmentInfo_t;
|
||||
|
||||
/**
|
||||
\see opcZipLoader
|
||||
*/
|
||||
typedef int opcZipLoaderOpenCallback(void *iocontext);
|
||||
/**
|
||||
\see opcZipLoader
|
||||
*/
|
||||
typedef int opcZipLoaderSkipCallback(void *iocontext);
|
||||
/**
|
||||
\see opcZipLoader
|
||||
*/
|
||||
typedef int opcZipLoaderReadCallback(void *iocontext, char *buffer, int len);
|
||||
/**
|
||||
\see opcZipLoader
|
||||
*/
|
||||
typedef int opcZipLoaderCloseCallback(void *iocontext);
|
||||
|
||||
/**
|
||||
\see opcZipLoader
|
||||
*/
|
||||
typedef opc_error_t (opcZipLoaderSegmentCallback_t)(void *iocontext, void *userctx, opcZipSegmentInfo_t *info, opcZipLoaderOpenCallback *open, opcZipLoaderReadCallback *read, opcZipLoaderCloseCallback *close, opcZipLoaderSkipCallback *skip);
|
||||
|
||||
/**
|
||||
Walks every segment in a ZIP archive and calls the \c segmentCallback callback method.
|
||||
The implementer \c segmentCallback method must then eiher use the passed \c open, \c read and \c close methods
|
||||
to read the stream or the passed \c skip methods to skip the stream.
|
||||
This method can be used to e.g. read ZIP file in stream mode.
|
||||
*/
|
||||
opc_error_t opcZipLoader(opcIO_t *io, void *userctx, opcZipLoaderSegmentCallback_t *segmentCallback);
|
||||
|
||||
/**
|
||||
\see opcZipClose
|
||||
*/
|
||||
typedef opc_error_t (opcZipSegmentReleaseCallback)(opcZip *zip, opc_uint32_t segment_id);
|
||||
|
||||
/**
|
||||
Closes the ZIP archive \c zip and will call \c releaseCallback for every segment to give the implementer a chance
|
||||
to free user resources.
|
||||
*/
|
||||
void opcZipClose(opcZip *zip, opcZipSegmentReleaseCallback* releaseCallback);
|
||||
|
||||
/**
|
||||
Creates an empty ZIP archive with the given \c io.
|
||||
*/
|
||||
opcZip *opcZipCreate(opcIO_t *io);
|
||||
|
||||
/**
|
||||
Commits all buffers and writes the ZIP archives local header directories.
|
||||
if \c trim is true then padding bytes will be removed, i.e. the ZIP file size fill be minimalized.
|
||||
*/
|
||||
opc_error_t opcZipCommit(opcZip *zip, opc_bool_t trim);
|
||||
|
||||
/**
|
||||
Garbage collection on the passed \c zip archive. This will e.g. make deleted files available as free space.
|
||||
*/
|
||||
opc_error_t opcZipGC(opcZip *zip);
|
||||
|
||||
/**
|
||||
Load segment information into \c info.
|
||||
If \c rels_segment is -1 then load the info for part with name \c partName.
|
||||
Otherwise load the segment information for the ".rels." segment of \c partName.
|
||||
\return Returns the segment_id.
|
||||
*/
|
||||
opc_uint32_t opcZipLoadSegment(opcZip *zip, const xmlChar *partName, opc_bool_t rels_segment, opcZipSegmentInfo_t *info);
|
||||
|
||||
/**
|
||||
Create a segment with the given parameters.
|
||||
\return Returns the segment_id.
|
||||
*/
|
||||
opc_uint32_t opcZipCreateSegment(opcZip *zip,
|
||||
const xmlChar *partName,
|
||||
opc_bool_t relsSegment,
|
||||
opc_uint32_t segment_size,
|
||||
opc_uint32_t growth_hint,
|
||||
opc_uint16_t compression_method,
|
||||
opc_uint16_t bit_flag);
|
||||
|
||||
/**
|
||||
Creates an input stream for the segment with \c segment_id.
|
||||
\see opcZipLoadSegment
|
||||
\see opcZipCreateSegment
|
||||
*/
|
||||
opcZipInputStream *opcZipOpenInputStream(opcZip *zip, opc_uint32_t segment_id);
|
||||
|
||||
/**
|
||||
Free all resources of the input stream.
|
||||
*/
|
||||
opc_error_t opcZipCloseInputStream(opcZip *zip, opcZipInputStream *stream);
|
||||
|
||||
/**
|
||||
Read maximal \c buf_len bytes from the input stream into \buf.
|
||||
\return Returns the number of bytes read.
|
||||
*/
|
||||
opc_uint32_t opcZipReadInputStream(opcZip *zip, opcZipInputStream *stream, opc_uint8_t *buf, opc_uint32_t buf_len);
|
||||
|
||||
|
||||
/**
|
||||
Creates an output stream for the segment with \c segment_id.
|
||||
If \c *segment_id is -1 then a new segment will be created.
|
||||
Otherwise the segment with \c *segment_id will be overwritten.
|
||||
*/
|
||||
opcZipOutputStream *opcZipCreateOutputStream(opcZip *zip,
|
||||
opc_uint32_t *segment_id,
|
||||
const xmlChar *partName,
|
||||
opc_bool_t relsSegment,
|
||||
opc_uint32_t segment_size,
|
||||
opc_uint32_t growth_hint,
|
||||
opc_uint16_t compression_method,
|
||||
opc_uint16_t bit_flag);
|
||||
|
||||
/**
|
||||
Opens an existing ouput stream for reading.
|
||||
The \c *segment_id will be set to -1 and reset on opcZipCloseOutputStream.
|
||||
\see opcZipCloseOutputStream
|
||||
*/
|
||||
opcZipOutputStream *opcZipOpenOutputStream(opcZip *zip, opc_uint32_t *segment_id);
|
||||
|
||||
/**
|
||||
Will close the stream and free all resources. Additionally the new segment id will be stored in \c *segment_id.
|
||||
\see opcZipOpenOutputStream
|
||||
*/
|
||||
opc_error_t opcZipCloseOutputStream(opcZip *zip, opcZipOutputStream *stream, opc_uint32_t *segment_id);
|
||||
|
||||
/**
|
||||
Write \c buf_len bytes to \c buf.
|
||||
\return Returns the number of bytes written.
|
||||
*/
|
||||
opc_uint32_t opcZipWriteOutputStream(opcZip *zip, opcZipOutputStream *stream, const opc_uint8_t *buf, opc_uint32_t buf_len);
|
||||
|
||||
/**
|
||||
Returns the first segment id or -1.
|
||||
Use the following code to iterarte through all segments.
|
||||
\code
|
||||
for(opc_uint32_t segment_id=opcZipGetFirstSegmentId(zip);
|
||||
-1!=segment_id;
|
||||
segment_id=opcZipGetNextSegmentId(zip, segment_id) {
|
||||
...
|
||||
}
|
||||
\endcode
|
||||
\see opcZipGetNextSegmentId
|
||||
*/
|
||||
opc_uint32_t opcZipGetFirstSegmentId(opcZip *zip);
|
||||
|
||||
/**
|
||||
Returns the next segment id or -1.
|
||||
\see opcZipGetFirstSegmentId
|
||||
*/
|
||||
opc_uint32_t opcZipGetNextSegmentId(opcZip *zip, opc_uint32_t segment_id);
|
||||
|
||||
/**
|
||||
Returns info about the given segment id.
|
||||
*/
|
||||
opc_error_t opcZipGetSegmentInfo(opcZip *zip, opc_uint32_t segment_id, const xmlChar **name, opc_bool_t *rels_segment, opc_uint32_t *crc);
|
||||
|
||||
/**
|
||||
Marks a given segments as deleted.
|
||||
\see opcZipGC
|
||||
*/
|
||||
opc_bool_t opcZipSegmentDelete(opcZip *zip, opc_uint32_t *first_segment, opc_uint32_t *last_segment, opcZipSegmentReleaseCallback* releaseCallback);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* OPC_ZIP_H */
|
||||
@@ -1,168 +0,0 @@
|
||||
/* include/plib/plib.h. Generated from plib.h by configure. */
|
||||
/*
|
||||
Copyright (c) 2010, Florian Reuter
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Florian Reuter nor the names of its contributors
|
||||
may be used to endorse or promote products derived from this
|
||||
software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||
COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
||||
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
||||
OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
#ifndef _PLIB_PLIB_H_
|
||||
#define _PLIB_PLIB_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define HAVE_STDINT_H 1
|
||||
#define HAVE_STDDEF_H 1
|
||||
#define HAVE_STDIO_H 1
|
||||
#define HAVE_STRING_H 1
|
||||
#define HAVE_LIMITS_H 1
|
||||
#define HAVE_STDLIB_H 1
|
||||
/* #undef HAVE_IO_H */
|
||||
#define HAVE_UNISTD_H 1
|
||||
#define HAVE_SYS_TYPES_H 1
|
||||
#define IS_CONFIGURED 1
|
||||
|
||||
#if !defined(IS_CONFIGURED)
|
||||
#if defined(WIN32)
|
||||
#define HAVE_STRING_H 1
|
||||
#define HAVE_STDINT_H 1
|
||||
#define HAVE_LIMITS_H 1
|
||||
#define HAVE_STDDEF_H 1
|
||||
#define HAVE_STDIO_H 1
|
||||
#define HAVE_STDLIB_H 1
|
||||
#define HAVE_IO_H
|
||||
#define snprintf _snprintf
|
||||
#else
|
||||
#error "configure not executed and we are not on a win32 machine? please run configure or define WIN32 is you are on a WIN32 platform."
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_STDDEF_H
|
||||
#include <stddef.h>
|
||||
typedef size_t pofs_t; // maximum file offset for eg. read write ops
|
||||
#else
|
||||
#error "system types can not be determined"
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_STDIO_H
|
||||
#include <stdio.h>
|
||||
#else
|
||||
#error "system io can not be determined"
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_STDINT_H
|
||||
#include <stdint.h>
|
||||
|
||||
typedef int8_t pint8_t;
|
||||
typedef uint8_t puint8_t;
|
||||
|
||||
typedef int16_t pint16_t;
|
||||
typedef uint16_t puint16_t;
|
||||
|
||||
typedef int32_t pint32_t;
|
||||
typedef uint32_t puint32_t;
|
||||
|
||||
typedef int64_t pint64_t;
|
||||
typedef uint64_t puint64_t;
|
||||
|
||||
typedef int pbool_t;
|
||||
|
||||
typedef size_t psize_t;
|
||||
|
||||
// INTN_MAX, INTN_MIN, UINTN_MAX
|
||||
#else
|
||||
#error "system types can not be determined"
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_STRING_H
|
||||
#include <string.h>
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_LIMITS_H
|
||||
#include <limits.h>
|
||||
#define PUINT8_MAX UCHAR_MAX
|
||||
#define PINT32_MAX INT_MAX
|
||||
#define PINT32_MIN INT_MIN
|
||||
#define PUINT32_MAX UINT_MAX
|
||||
#define PUINT32_MIN 0
|
||||
#define PUINT16_MAX USHRT_MAX
|
||||
#define PUINT16_MIN 0
|
||||
#else
|
||||
#error "limits can not be determined"
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_STDLIB_H
|
||||
#include <stdlib.h>
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_IO_H
|
||||
#include <io.h>
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_UNISTD_H
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_SYS_TYPES_H
|
||||
#include <sys/types.h>
|
||||
#endif
|
||||
|
||||
/**
|
||||
Converts an ASCII string to a xmlChar string. This only works for ASCII strings.
|
||||
*/
|
||||
#ifndef _X
|
||||
#define _X(s) BAD_CAST(s)
|
||||
#endif
|
||||
|
||||
|
||||
/**
|
||||
Converts an xmlChar string to an ASCII string. This only works for ASCII charsets.
|
||||
*/
|
||||
#ifndef _X2C
|
||||
#define _X2C(s) ((char*)(s))
|
||||
#endif
|
||||
|
||||
|
||||
#define PASSERT(e) assert(e)
|
||||
#ifdef NDEBUG
|
||||
#define PENSURE(e) (void)(e)
|
||||
#else
|
||||
#define PENSURE(e) assert(e)
|
||||
#endif
|
||||
#define PTRUE (0==0)
|
||||
#define PFALSE (0==1)
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
|
||||
#endif /* _PLIB_PLIB_H_ */
|
||||
Submodule lib/bzip2-1.0.6 deleted from 288acf97a1
Submodule lib/ffmpeg deleted from 8887991a31
Submodule lib/harfbuzz deleted from b28c282585
Submodule lib/leptonica deleted from cc03be70fd
Submodule lib/libmagic deleted from 1249b5cd02
Binary file not shown.
Binary file not shown.
Binary file not shown.
Submodule lib/libtiff deleted from 3db0ff91bc
Submodule lib/mupdf deleted from c50ac19e41
Submodule lib/onion deleted from 73329b61eb
Submodule lib/openjpeg deleted from ac3737372a
Submodule lib/tesseract deleted from f268e6615e
1
lmdb
1
lmdb
Submodule lmdb deleted from 5c012bbe03
@@ -1,17 +1,43 @@
|
||||
{
|
||||
"properties": {
|
||||
"_tie": {
|
||||
"type": "keyword",
|
||||
"doc_values": true
|
||||
},
|
||||
"_depth": {
|
||||
"type": "integer"
|
||||
},
|
||||
"path": {
|
||||
"type": "text",
|
||||
"analyzer": "path_analyzer",
|
||||
"copy_to": "suggest-path"
|
||||
"copy_to": "suggest-path",
|
||||
"fielddata": true,
|
||||
"fields": {
|
||||
"nGram": {
|
||||
"type": "text",
|
||||
"analyzer": "my_nGram"
|
||||
},
|
||||
"text": {
|
||||
"type": "text",
|
||||
"analyzer": "content_analyzer"
|
||||
}
|
||||
}
|
||||
},
|
||||
"suggest-path": {
|
||||
"type": "completion",
|
||||
"analyzer": "keyword"
|
||||
"analyzer": "case_insensitive_kw_analyzer"
|
||||
},
|
||||
"mime": {
|
||||
"type": "keyword"
|
||||
},
|
||||
"parent": {
|
||||
"type": "keyword",
|
||||
"index": false
|
||||
},
|
||||
"thumbnail": {
|
||||
"type": "keyword",
|
||||
"index": false
|
||||
},
|
||||
"videoc": {
|
||||
"type": "keyword",
|
||||
"index": false
|
||||
@@ -32,6 +58,10 @@
|
||||
"type": "integer",
|
||||
"index": false
|
||||
},
|
||||
"pages": {
|
||||
"type": "integer",
|
||||
"index": false
|
||||
},
|
||||
"mtime": {
|
||||
"type": "integer"
|
||||
},
|
||||
@@ -75,10 +105,10 @@
|
||||
"analyzer": "my_nGram",
|
||||
"type": "text"
|
||||
},
|
||||
"_keyword.*": {
|
||||
"_keyword.*": {
|
||||
"type": "keyword"
|
||||
},
|
||||
"_text.*": {
|
||||
"_text.*": {
|
||||
"analyzer": "content_analyzer",
|
||||
"type": "text",
|
||||
"fields": {
|
||||
@@ -104,7 +134,66 @@
|
||||
}
|
||||
},
|
||||
"tag": {
|
||||
"type": "keyword",
|
||||
"copy_to": "suggest-tag"
|
||||
},
|
||||
"suggest-tag": {
|
||||
"type": "completion",
|
||||
"analyzer": "case_insensitive_kw_analyzer"
|
||||
},
|
||||
"exif_make": {
|
||||
"type": "text"
|
||||
},
|
||||
"exif_model": {
|
||||
"type": "text"
|
||||
},
|
||||
"exif:software": {
|
||||
"type": "text"
|
||||
},
|
||||
"exif_exposure_time": {
|
||||
"type": "keyword"
|
||||
},
|
||||
"exif_fnumber": {
|
||||
"type": "keyword"
|
||||
},
|
||||
"exif_iso_speed_ratings": {
|
||||
"type": "keyword"
|
||||
},
|
||||
"exif_focal_length": {
|
||||
"type": "keyword"
|
||||
},
|
||||
"exif_user_comment": {
|
||||
"type": "text"
|
||||
},
|
||||
"exif_gps_longitude_ref": {
|
||||
"type": "keyword",
|
||||
"index": false
|
||||
},
|
||||
"exif_gps_longitude_dms": {
|
||||
"type": "keyword",
|
||||
"index": false
|
||||
},
|
||||
"exif_gps_longitude_dec": {
|
||||
"type": "keyword",
|
||||
"index": false
|
||||
},
|
||||
"exif_gps_latitude_ref": {
|
||||
"type": "keyword",
|
||||
"index": false
|
||||
},
|
||||
"exif_gps_latitude_dms": {
|
||||
"type": "keyword",
|
||||
"index": false
|
||||
},
|
||||
"exif_gps_latitude_dec": {
|
||||
"type": "keyword",
|
||||
"index": false
|
||||
},
|
||||
"author": {
|
||||
"type": "text"
|
||||
},
|
||||
"modified_by": {
|
||||
"type": "text"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
10
schema/pipeline.json
Normal file
10
schema/pipeline.json
Normal file
@@ -0,0 +1,10 @@
|
||||
{
|
||||
"description": "Copy _id to _tie, save path depth",
|
||||
"processors": [
|
||||
{
|
||||
"script": {
|
||||
"source": "ctx._tie = ctx._id; ctx._depth = ctx.path.length() == 0 ? 0 : 1 + ctx.path.length() - ctx.path.replace(\"/\", \"\").length();"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -1,7 +1,8 @@
|
||||
{
|
||||
"index": {
|
||||
"refresh_interval": "30s",
|
||||
"codec": "best_compression"
|
||||
"codec": "best_compression",
|
||||
"number_of_replicas": 0
|
||||
},
|
||||
"analysis": {
|
||||
"tokenizer": {
|
||||
@@ -21,6 +22,12 @@
|
||||
"lowercase"
|
||||
]
|
||||
},
|
||||
"case_insensitive_kw_analyzer": {
|
||||
"tokenizer": "keyword",
|
||||
"filter": [
|
||||
"lowercase"
|
||||
]
|
||||
},
|
||||
"my_nGram": {
|
||||
"tokenizer": "my_nGram_tokenizer",
|
||||
"filter": [
|
||||
|
||||
@@ -2,15 +2,15 @@
|
||||
|
||||
rm -rf index.sist2/
|
||||
|
||||
rm web/js/bundle.js 2> /dev/null
|
||||
cat `ls web/js/*.min.js` > web/js/bundle.js
|
||||
cat web/js/{util,dom,search}.js >> web/js/bundle.js
|
||||
rm src/static/js/bundle.js 2> /dev/null
|
||||
cat `ls src/static/js/*.min.js` > src/static/js/bundle.js
|
||||
cat src/static/js/{util,dom}.js >> src/static/js/bundle.js
|
||||
|
||||
rm web/css/bundle*.css 2> /dev/null
|
||||
cat web/css/*.min.css > web/css/bundle.css
|
||||
cat web/css/light.css >> web/css/bundle.css
|
||||
cat web/css/*.min.css > web/css/bundle_dark.css
|
||||
cat web/css/dark.css >> web/css/bundle_dark.css
|
||||
rm src/static/css/bundle*.css 2> /dev/null
|
||||
cat src/static/css/*.min.css > src/static/css/bundle.css
|
||||
cat src/static/css/light.css >> src/static/css/bundle.css
|
||||
cat src/static/css/*.min.css > src/static/css/bundle_dark.css
|
||||
cat src/static/css/dark.css >> src/static/css/bundle_dark.css
|
||||
|
||||
python3 scripts/mime.py > src/parsing/mime_generated.c
|
||||
python3 scripts/serve_static.py > src/web/static_generated.c
|
||||
|
||||
@@ -1,100 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
THREADS=$(nproc)
|
||||
|
||||
cd lib
|
||||
|
||||
cd mupdf
|
||||
CFLAGS=-fPIC make USE_SYSTEM_HARFBUZZ=yes USE_SYSTEM_OPENJPEG=yes HAVE_X11=no HAVE_GLUT=no -j $THREADS
|
||||
cd ..
|
||||
|
||||
mv mupdf/build/release/libmupdf.a .
|
||||
mv mupdf/build/release/libmupdf-third.a .
|
||||
|
||||
# openjp2
|
||||
cd openjpeg
|
||||
#cmake . -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_FLAGS="-O3 -march=native -DNDEBUG"
|
||||
cmake . -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_FLAGS="-O3"
|
||||
make -j $THREADS
|
||||
cd ..
|
||||
mv openjpeg/bin/libopenjp2.a .
|
||||
|
||||
# harfbuzz
|
||||
cd harfbuzz
|
||||
./autogen.sh
|
||||
./configure --disable-shared --enable-static
|
||||
make -j $THREADS
|
||||
cd ..
|
||||
mv harfbuzz/src/.libs/libharfbuzz.a .
|
||||
|
||||
# ffmpeg
|
||||
cd ffmpeg
|
||||
./configure --disable-shared --enable-static --disable-ffmpeg --disable-ffplay \
|
||||
--disable-ffprobe --disable-doc\
|
||||
--disable-manpages --disable-postproc --disable-avfilter \
|
||||
--disable-alsa --disable-lzma --disable-xlib --disable-debug\
|
||||
--disable-vdpau --disable-vaapi --disable-sdl2 --disable-network\
|
||||
--extra-cflags=-fPIC
|
||||
make -j $THREADS
|
||||
cd ..
|
||||
|
||||
mv ffmpeg/libavcodec/libavcodec.a .
|
||||
mv ffmpeg/libavformat/libavformat.a .
|
||||
mv ffmpeg/libavutil/libavutil.a .
|
||||
mv ffmpeg/libswresample/libswresample.a .
|
||||
mv ffmpeg/libswscale/libswscale.a .
|
||||
|
||||
# onion
|
||||
cd onion
|
||||
mkdir build 2> /dev/null
|
||||
cd build
|
||||
cmake -DONION_USE_SSL=false -DONION_USE_PAM=false -DONION_USE_PNG=false -DONION_USE_JPEG=false \
|
||||
-DONION_USE_JPEG=false -DONION_USE_XML2=false -DONION_USE_SYSTEMD=false -DONION_USE_SQLITE3=false \
|
||||
-DONION_USE_REDIS=false -DONION_USE_GC=false -DONION_USE_TESTS=false -DONION_EXAMPLES=false \
|
||||
-DONION_USE_BINDINGS_CPP=false ..
|
||||
make -j $THREADS
|
||||
cd ../..
|
||||
|
||||
mv onion/build/src/onion/libonion_static.a .
|
||||
|
||||
#bzip2
|
||||
cd bzip2-1.0.6
|
||||
make -j $THREADS
|
||||
cd ..
|
||||
mv bzip2-1.0.6/libbz2.a .
|
||||
|
||||
# magic
|
||||
cd libmagic
|
||||
./autogen.sh
|
||||
./configure --enable-static --disable-shared
|
||||
make -j $THREADS
|
||||
cd ..
|
||||
mv libmagic/src/.libs/libmagic.a .
|
||||
|
||||
# tesseract
|
||||
cd tesseract
|
||||
mkdir build
|
||||
cd build
|
||||
cmake -DSTATIC=on -DBUILD_TRAINING_TOOLS=off ..
|
||||
make -j $THREADS
|
||||
cd ../..
|
||||
mv tesseract/build/libtesseract.a .
|
||||
|
||||
# leptonica
|
||||
cd leptonica
|
||||
./autogen.sh
|
||||
./configure --without-zlib --without-jpeg --without-giflib \
|
||||
--without-giflib --without-libwebp --without-libwebpmux --without-libopenjpeg \
|
||||
--enable-static --disable-shared
|
||||
make -j $THREADS
|
||||
cd ..
|
||||
mv leptonica/src/.libs/liblept.a .
|
||||
|
||||
# tiff
|
||||
cd libtiff
|
||||
./autogen.sh
|
||||
./configure --enable-static --disable-shared --disable-lzw --disable-jpeg --disable-webp \
|
||||
--disable-lzma --disable-zstd --disable-jbig
|
||||
make -j $THREADS
|
||||
cd ..
|
||||
mv libtiff/libtiff/.libs/libtiff.a .
|
||||
@@ -1,6 +1,9 @@
|
||||
import json
|
||||
|
||||
files = [
|
||||
"schema/mappings.json",
|
||||
"schema/settings.json",
|
||||
"schema/pipeline.json",
|
||||
]
|
||||
|
||||
|
||||
@@ -9,6 +12,7 @@ def clean(filepath):
|
||||
|
||||
|
||||
for file in files:
|
||||
with open(file, "rb") as f:
|
||||
data = f.read()
|
||||
with open(file, "r") as f:
|
||||
data = json.dumps(json.load(f), separators=(",", ":")).encode()
|
||||
data += b'\0'
|
||||
print("char %s[%d] = {%s};" % (clean(file), len(data), ",".join(str(int(b)) for b in data)))
|
||||
|
||||
@@ -2,14 +2,18 @@ application/arj, arj
|
||||
application/base64, mme
|
||||
application/binhex, hqx
|
||||
application/book, boo|book
|
||||
application/CDFV2-corrupt,
|
||||
application/CDFV2, sdv
|
||||
application/clariscad, ccad
|
||||
application/commonground, dp
|
||||
application/csv,
|
||||
application/dicom, dcm
|
||||
application/drafting, drw
|
||||
application/epub+zip, epub
|
||||
application/freeloader, frl
|
||||
application/futuresplash, spl
|
||||
application/groupwise, vew
|
||||
application/gzip, gz
|
||||
application/gzip, gz|tgz
|
||||
application/hta, hta
|
||||
application/i-deas, unv
|
||||
application/iges, iges|igs
|
||||
@@ -17,7 +21,6 @@ application/inf, inf
|
||||
application/java-archive, jar
|
||||
application/java, class
|
||||
application/javascript,
|
||||
application/x-archive, a
|
||||
application/json, json
|
||||
application/marc, mrc
|
||||
application/mbedlet, mbd
|
||||
@@ -27,7 +30,9 @@ application/msword, doc|dot|w6w|wiz|word
|
||||
application/netmc, mcp
|
||||
application/octet-stream, bin|dump|gpg
|
||||
application/oda, oda
|
||||
application/ogg, ogv
|
||||
application/pdf, pdf
|
||||
application/pgp-keys,
|
||||
application/pgp-signature, pgp
|
||||
application/pkcs7-signature, p7s
|
||||
application/pkix-cert, cer|crt
|
||||
@@ -43,6 +48,10 @@ application/vda, vda
|
||||
application/vnd.fdf, fdf
|
||||
application/vnd.font-fontforge-sfd, sfd
|
||||
application/vnd.hp-hpgl, hgl|hpg|hpgl
|
||||
application/vnd.iccprofile, icm
|
||||
application/vnd.iccprofile, icm
|
||||
application/vnd.lotus-1-2-3,
|
||||
application/vnd.ms-cab-compressed, cab
|
||||
application/vnd.ms-excel, xlb|xlc|xll|xlm|xls|xlw
|
||||
application/vnd.ms-fontobject, eot
|
||||
application/vnd.ms-opentype, otf
|
||||
@@ -54,45 +63,75 @@ application/vnd.ms-project, mpp
|
||||
application/vnd.oasis.opendocument.base, odb
|
||||
application/vnd.oasis.opendocument.formula, odf
|
||||
application/vnd.oasis.opendocument.graphics, odg
|
||||
application/vnd.oasis.opendocument.presentation, odp
|
||||
application/vnd.oasis.opendocument.spreadsheet, ods
|
||||
application/vnd.oasis.opendocument.text, odt
|
||||
application/vnd.openxmlformats-officedocument.presentationml.presentation, pptx
|
||||
application/vnd.openxmlformats-officedocument.spreadsheetml.sheet, xlsx
|
||||
application/vnd.openxmlformats-officedocument.wordprocessingml.document, docx
|
||||
application/vnd.symbian.install,
|
||||
application/vnd.tcpdump.pcap, pcap
|
||||
application/vnd.wap.wmlc, wmlc
|
||||
application/vnd.wap.wmlscriptc, wmlsc
|
||||
application/vnd.xara, web
|
||||
application/vocaltec-media-desc, vmd
|
||||
application/vocaltec-media-file, vmf
|
||||
application/warc, warc
|
||||
application/winhelp, hlp
|
||||
application/wordperfect6.0, w60
|
||||
application/wordperfect6.1, w61
|
||||
application/wordperfect, wp|wp5|wp6|wpd
|
||||
application/x-123, wk1
|
||||
application/x-7z-compressed, 7z
|
||||
application/x-aim, aim
|
||||
application/x-apple-diskimage,
|
||||
application/x-arc,
|
||||
application/x-archive, a
|
||||
application/x-atari-7800-rom, a78
|
||||
application/x-authorware-bin, aab
|
||||
application/x-authorware-map, aam
|
||||
application/x-authorware-seg, aas
|
||||
application/x-avira-qua,
|
||||
application/x-bcpio, bcpio
|
||||
application/x-bittorrent, torrent
|
||||
application/x-bsh, bsh
|
||||
application/x-bytecode.python, pyc
|
||||
application/x-bzip2, boz|bz2
|
||||
application/x-bzip, bz
|
||||
application/x-cbr, cbr
|
||||
application/x-cbz, cbz
|
||||
application/x-cdlink, vcd
|
||||
application/x-chat, cha|chat
|
||||
application/x-chrome-extension,
|
||||
application/x-cocoa, cco
|
||||
application/x-conference, nsc
|
||||
application/x-coredump,
|
||||
application/x-cpio, cpio
|
||||
application/x-dbf, dbf
|
||||
application/x-dbt,
|
||||
application/x-debian-package, deb
|
||||
application/x-deepv, deepv
|
||||
application/x-director, dcr|dir|dxr
|
||||
application/x-director, dir|dxr
|
||||
application/x-dmp, dmp
|
||||
application/x-dosdriver,
|
||||
application/x-dosexec, dll
|
||||
application/x-dvi, dvi
|
||||
application/x-elc, elc
|
||||
application/x-empty,
|
||||
application/x-envoy, env|evy
|
||||
application/x-esrehber, es
|
||||
application/x-excel, xla|xld|xlk|xlt|xlv
|
||||
application/x-executable, exe
|
||||
application/x-font-gdos,
|
||||
application/x-font-pf2, pf2
|
||||
application/x-font-pfm, pfm
|
||||
application/x-font-sfn,
|
||||
application/x-font-ttf, ttf|ttc
|
||||
application/x-fptapplication/x-dbt,
|
||||
application/x-freelance, pre
|
||||
application/x-gamecube-rom,
|
||||
application/x-gdbm,
|
||||
application/x-gettext-translation,
|
||||
application/x-git,
|
||||
application/x-gsp, gsp
|
||||
application/x-gss, gss
|
||||
@@ -102,46 +141,68 @@ application/x-hdf, hdf
|
||||
application/x-helpfile, help
|
||||
application/x-httpd-imap, imap
|
||||
application/x-ima, ima
|
||||
application/x-innosetup,
|
||||
application/x-internett-signup, ins
|
||||
application/x-inventor, iv
|
||||
application/x-ip2, ip
|
||||
application/x-java-applet,
|
||||
application/x-java-commerce, jcm
|
||||
application/x-java-image,
|
||||
application/x-java-jmod, jmod
|
||||
application/x-java-keystore,
|
||||
application/x-kdelnk,
|
||||
application/x-koan, skd|skm|skp|skt
|
||||
application/x-latex, latex|ltx
|
||||
application/x-livescreen, ivy
|
||||
application/x-lotus, wq1
|
||||
application/x-lz4+json, jsonlz4
|
||||
application/x-lz4, lz4
|
||||
application/x-lz4, lz4
|
||||
application/x-lzh-compressed,
|
||||
application/x-lzh, lzh
|
||||
application/x-lzip, lz
|
||||
application/x-lzma, lzma
|
||||
application/x-lzop, lzo
|
||||
application/x-lzx, lzx
|
||||
application/x-mach-binary, jnilib|dylib
|
||||
application/x-mach-executable,
|
||||
application/x-magic-cap-package-1.0, mc$
|
||||
application/x-mathcad, mcd
|
||||
application/x-maxis-dbpf,
|
||||
application/x-meme, mm
|
||||
application/x-midi, midi
|
||||
application/x-mif, mif
|
||||
application/x-mix-transfer, nix
|
||||
application/xml, opf
|
||||
application/x-mobipocket-ebook, mobi
|
||||
application/vnd.amazon.mobi8-ebook, azw|azw3
|
||||
application/x-msaccess, accdb
|
||||
application/x-ms-compress-szdd, fon
|
||||
application/x-ms-pdb, pdb
|
||||
application/x-ms-reader, lit
|
||||
application/x-n64-rom, z64
|
||||
application/x-navi-animation, ani
|
||||
application/x-navidoc, nvd
|
||||
application/x-navimap, map
|
||||
application/x-navistyle, stl
|
||||
application/x-nes-rom, nes
|
||||
application/x-netcdf, cdf|nc
|
||||
application/x-newton-compatible-pkg, pkg
|
||||
application/x-nintendo-ds-rom,
|
||||
application/x-object, o
|
||||
application/x-omcdatamaker, omcd
|
||||
application/x-omc, omc
|
||||
application/x-omcregerator, omcr
|
||||
application/x-pagemaker, pm4|pm5
|
||||
application/x-pcl, pcl
|
||||
application/x-pgp-keyring,
|
||||
application/x-pixclscript, plx
|
||||
application/x-pkcs7-certreqresp, p7r
|
||||
application/x-pkcs7-signature, p7a
|
||||
application/x-project, mpc|mpt|mpv|mpx
|
||||
application/x-qpro, wb1
|
||||
application/x-rar, rar
|
||||
application/x-rpm, rpm
|
||||
application/x-sdp, sdp
|
||||
application/x-sea, sea
|
||||
application/x-seelogo, sl
|
||||
@@ -149,12 +210,17 @@ application/x-setupscript,
|
||||
application/x-sharedlib, so
|
||||
application/x-shar, shar
|
||||
application/x-shockwave-flash, swf
|
||||
application/x-snappy-framed,
|
||||
application/x-sprite, spr|sprite
|
||||
application/x-sqlite3,
|
||||
application/x-stargallery-thm,
|
||||
application/x-stuffit, sit
|
||||
application/x-sv4cpio, sv4cpio
|
||||
application/x-sv4crc, sv4crc
|
||||
application/x-tar, tar
|
||||
application/x-tbook, sbk|tbk
|
||||
application/x-terminfo,
|
||||
application/x-terminfo2,
|
||||
application/x-texinfo, texi|texinfo
|
||||
application/x-tex-tfm, tfm
|
||||
application/x-ustar, ustar
|
||||
@@ -163,16 +229,22 @@ application/x-vnd.audioexplosion.mzz, mzz
|
||||
application/x-vnd.ls-xpix, xpix
|
||||
application/x-vrml, vrml
|
||||
application/x-wais-source, src|wsrc
|
||||
application/x-wine-extension-ini,
|
||||
application/x-wintalk, wtk
|
||||
application/x-world, svr
|
||||
application/x-wri, wri
|
||||
application/x-x509-ca-cert, der
|
||||
application/x-xz, xz
|
||||
application/x-zip,
|
||||
application/x-zstd, zst
|
||||
application/zip, zip
|
||||
application/zlib, z
|
||||
!audio/basic, au
|
||||
audio/it, it
|
||||
audio/make, funk|my|pfunk
|
||||
audio/midi, kar
|
||||
audio/mid, rmi
|
||||
audio/mp4, m4b
|
||||
audio/mpeg, m2a|mpa
|
||||
audio/ogg, ogg
|
||||
audio/s3m, s3m
|
||||
@@ -180,7 +252,10 @@ audio/tsp-audio, tsi
|
||||
audio/tsplayer, tsp
|
||||
audio/vnd.qcelp, qcp
|
||||
audio/voxware, vox
|
||||
audio/x-aiff, aiff|aif
|
||||
audio/x-flac, flac
|
||||
audio/x-gsm, gsd|gsm
|
||||
audio/x-hx-aac-adts,
|
||||
audio/x-jam, jam
|
||||
audio/x-liveaudio, lam
|
||||
audio/x-m4a, m4a
|
||||
@@ -194,17 +269,24 @@ audio/x-nspaudio, lma
|
||||
audio/x-pn-realaudio, ram|rm|rmm|rmp
|
||||
audio/x-psid, sid
|
||||
audio/x-realaudio, ra
|
||||
audio/x-s3m,
|
||||
audio/x-twinvq-plugin, vqe|vql
|
||||
audio/x-twinvq, vqf
|
||||
audio/x-voc, voc
|
||||
audio/x-wav, wav
|
||||
!audio/x-xbox360-executable, xex
|
||||
!audio/x-xbox-executable, xbe
|
||||
font/otf,
|
||||
font/sfnt,
|
||||
font/woff2, woff2
|
||||
font/woff, woff
|
||||
image/bmp,
|
||||
image/cmu-raster, rast
|
||||
image/fif, fif
|
||||
image/florian, flo|turbot
|
||||
image/g3fax, g3
|
||||
image/gif, gif
|
||||
image/heic, heic
|
||||
image/ief, ief|iefs
|
||||
image/jpeg, jfif|jfif-tbnl|jpe|jpeg|jpg
|
||||
image/jutvision, jut
|
||||
@@ -213,6 +295,9 @@ image/pict, pic|pict
|
||||
image/png, png|x-png
|
||||
!image/svg, svg
|
||||
!image/svg+xml,
|
||||
image/tiff,
|
||||
!image/vnd.adobe.photoshop, psd
|
||||
!image/vnd.djvu, djvu
|
||||
image/vnd.fpx, fpx
|
||||
image/vnd.microsoft.icon,
|
||||
image/vnd.rn-realflash, rf
|
||||
@@ -220,9 +305,15 @@ image/vnd.rn-realpix, rp
|
||||
image/vnd.wap.wbmp, wbmp
|
||||
image/vnd.xiff, xif
|
||||
image/webp, webp
|
||||
image/wmf,
|
||||
image/x-3ds, 3ds
|
||||
image/x-award-bioslogo,
|
||||
image/x-cmu-raster, ras
|
||||
image/x-cur, tga
|
||||
image/x-dwg, dwg|dxf|svf
|
||||
image/x-eps,
|
||||
image/x-exr, exr
|
||||
image/x-gem,
|
||||
image/x-icns,
|
||||
!image/x-icon, ico
|
||||
image/x-jg, art
|
||||
@@ -236,32 +327,31 @@ image/x-portable-graymap, pgm
|
||||
image/x-portable-pixmap, ppm
|
||||
image/x-quicktime, qif|qti|qtif
|
||||
image/x-rgb, rgb
|
||||
image/x-tga,
|
||||
image/x-tiff, tif|tiff
|
||||
image/tiff,
|
||||
image/x-win-bitmap,
|
||||
!image/x-xcf, xcf
|
||||
!image/x-xpixmap, xpm
|
||||
image/x-xwindowdump, xwd
|
||||
message/news,
|
||||
message/rfc822, mht|mhtml|mime
|
||||
model/vnd.dwf, dwf
|
||||
model/vnd.gdl, gdl
|
||||
model/vnd.gs.gdl, gdsl
|
||||
model/vrml, wrz
|
||||
model/x-pov, pov
|
||||
text/asp, asp
|
||||
text/css, css
|
||||
text/x-sass, sass
|
||||
text/x-scss, scss
|
||||
text/html, acgi|htm|html|htmls|htx|shtml
|
||||
text/javascript, js
|
||||
text/mcf, mcf
|
||||
text/pascal, pas
|
||||
text/plain, com|cmd|conf|def|g|idc|list|lst|mar|sdml|text|txt|md|groovy|license|properties|desktop|ini|rst|cmake|ipynb|readme|less|lo|go|yml|d|cs|hpp|srt|nfo|sfv|m3u|csv|eml
|
||||
text/PGP,
|
||||
text/plain, com|cmd|conf|def|g|idc|list|lst|mar|sdml|text|txt|md|groovy|license|properties|desktop|ini|rst|cmake|ipynb|readme|less|lo|go|yml|d|cs|hpp|srt|nfo|sfv|m3u|csv|eml|make|log|markdown|yaml
|
||||
application/vnd.coffeescript, coffee
|
||||
text/richtext, rt|rtf|rtx
|
||||
text/rtf,
|
||||
text/scriplet, wsc
|
||||
text/x-awk, awk
|
||||
!video/x-jng, jng
|
||||
video/x-mng, mng
|
||||
image/x-cur, tga
|
||||
image/x-xwindowdump, xwd
|
||||
!image/vnd.adobe.photoshop, psd
|
||||
text/tab-separated-values, tsv
|
||||
text/troff, man|me|ms|roff|t|tr
|
||||
text/uri-list, uji|unis|uri|uris
|
||||
@@ -273,6 +363,7 @@ text/webviewhtml, htt
|
||||
text/x-Algol68,
|
||||
text/x-asm, asm|s
|
||||
text/x-audiosoft-intra, aip
|
||||
text/x-awk, awk
|
||||
text/x-bcpl,
|
||||
text/x-c, c|cc|h
|
||||
text/x-c++, cpp|cxx|c++
|
||||
@@ -287,23 +378,31 @@ text/x-makefile, am|mak
|
||||
text/xml, xml|pom|iml|plist
|
||||
text/x-m, m
|
||||
text/x-msdos-batch, bat
|
||||
text/x-ms-regedit, reg
|
||||
text/x-objective-c,
|
||||
text/x-pascal, p
|
||||
text/x-perl, pl
|
||||
text/x-php, php
|
||||
text/x-po, po
|
||||
text/x-python, py
|
||||
text/x-ruby, rb
|
||||
text/x-sass, sass
|
||||
text/x-scss, scss
|
||||
text/x-server-parsed-html, ssi
|
||||
text/x-setext, etx
|
||||
text/x-sgml, sgm|sgml
|
||||
text/x-shellscript, sh
|
||||
text/x-speech, talk
|
||||
text/x-tcl,
|
||||
text/x-tex, tex
|
||||
text/x-uil, uil
|
||||
text/x-uuencode, uue
|
||||
text/x-vcalendar, vcs
|
||||
text/x-vcard, vcf
|
||||
video/animaflex, afl
|
||||
video/avi, avi
|
||||
video/avs-video, avs
|
||||
video/MP2T,
|
||||
video/mp4, mp4
|
||||
video/mpeg, m1v|m2v|mpe|mpeg|mpg
|
||||
video/quicktime, moov|mov|qt
|
||||
@@ -318,101 +417,36 @@ video/x-atomic3d-feature, fmf
|
||||
video/x-dl, dl
|
||||
video/x-dv, dif|dv
|
||||
video/x-fli, fli
|
||||
video/x-flv, flv
|
||||
video/x-isvideo, isu
|
||||
!video/x-jng, jng
|
||||
video/x-m4v, m4v
|
||||
video/x-matroska, mkv
|
||||
video/x-mng, mng
|
||||
video/x-motion-jpeg, mjpg
|
||||
video/x-ms-asf, asf|asx
|
||||
video/x-ms-asf, asf|asx|wmv
|
||||
video/x-msvideo, divx
|
||||
video/x-qtc, qtc
|
||||
video/x-sgi-movie, movie|mv
|
||||
application/x-7z-compressed, 7z
|
||||
application/vnd.openxmlformats-officedocument.wordprocessingml.document, docx
|
||||
text/x-po, po
|
||||
application/x-rpm, rpm
|
||||
application/x-debian-package, deb
|
||||
application/vnd.iccprofile, icm
|
||||
application/dicom, dcm
|
||||
image/x-exr, exr
|
||||
application/vnd.iccprofile, icm
|
||||
video/x-matroska, mkv
|
||||
application/x-empty,
|
||||
model/vnd.gdl, gdl
|
||||
model/vnd.gs.gdl, gdsl
|
||||
font/woff, woff
|
||||
font/woff2, woff2
|
||||
application/epub+zip, epub
|
||||
application/x-mobipocket-ebook, mobi
|
||||
audio/x-flac, flac
|
||||
application/x-rar, rar
|
||||
video/x-msvideo, divx
|
||||
video/x-flv, flv
|
||||
application/x-kdelnk,
|
||||
text/x-tcl,
|
||||
application/ogg, ogv
|
||||
application/vnd.openxmlformats-officedocument.spreadsheetml.sheet, xlsx
|
||||
application/vnd.ms-cab-compressed, cab
|
||||
audio/mp4, m4b
|
||||
!image/vnd.djvu, djvu
|
||||
application/x-ms-reader, lit
|
||||
application/CDFV2-corrupt,
|
||||
text/x-vcard, vcf
|
||||
application/x-innosetup,
|
||||
application/winhelp, hlp
|
||||
image/x-tga,
|
||||
application/x-wine-extension-ini,
|
||||
application/x-cbz, cbz
|
||||
application/x-cbr, cbr
|
||||
application/x-ms-compress-szdd, fon
|
||||
application/x-atari-7800-rom, a78
|
||||
application/x-nes-rom, nes
|
||||
application/x-font-pfm, pfm
|
||||
application/x-gettext-translation,
|
||||
image/wmf,
|
||||
application/pgp-keys,
|
||||
image/x-3ds, 3ds
|
||||
application/x-lz4, lz4
|
||||
application/vnd.openxmlformats-officedocument.presentationml.presentation, pptx
|
||||
application/vnd.oasis.opendocument.presentation, odp
|
||||
application/x-msaccess, accdb
|
||||
application/vnd.oasis.opendocument.spreadsheet, ods
|
||||
audio/x-aiff, aiff|aif
|
||||
text/x-ms-regedit, reg
|
||||
application/x-gamecube-rom,
|
||||
application/x-nintendo-ds-rom,
|
||||
text/x-objective-c,
|
||||
application/x-font-gdos,
|
||||
application/x-apple-diskimage,
|
||||
application/x-zstd, zst
|
||||
video/x-m4v, m4v
|
||||
message/news,
|
||||
application/vnd.symbian.install,
|
||||
application/x-lzh-compressed,
|
||||
application/x-dosdriver,
|
||||
application/vnd.tcpdump.pcap, pcap
|
||||
x-epoc/x-sisx-app,
|
||||
application/x-avira-qua,
|
||||
video/MP2T,
|
||||
application/x-snappy-framed,
|
||||
application/x-lz4+json, jsonlz4
|
||||
application/x-dmp, dmp
|
||||
application/zlib, z
|
||||
application/x-pgp-keyring,
|
||||
application/x-gdbm,
|
||||
application/x-font-pf2, pf2
|
||||
application/x-zip,
|
||||
application/x-coredump,
|
||||
application/x-java-jmod, jmod
|
||||
application/x-terminfo,
|
||||
application/x-terminfo2,
|
||||
application/x-arc,
|
||||
application/vnd.lotus-1-2-3,
|
||||
image/x-win-bitmap,
|
||||
application/x-maxis-dbpf,
|
||||
text/PGP,
|
||||
audio/x-hx-aac-adts,
|
||||
application/x-chrome-extension,
|
||||
image/heic, heic
|
||||
image/x-gem,
|
||||
application/x-lzma, lzma
|
||||
application/warc, warc
|
||||
application/x-lz4, lz4
|
||||
application/x-lzip, lz
|
||||
application/x-lzop, lzo
|
||||
application/x-zstd-dictionary,
|
||||
application/vnd.ms-outlook, msg
|
||||
image/x-olympus-orf, orf
|
||||
image/x-nikon-nef, nef
|
||||
image/x-fuji-raf, raf
|
||||
image/x-panasonic-raw, rw2|raw
|
||||
image/x-adobe-dng, dng
|
||||
image/x-canon-cr2, cr2
|
||||
image/x-canon-crw, crw
|
||||
image/x-dcraw,
|
||||
image/x-kodak-dcr, dcr
|
||||
image/x-kodak-k25, k25
|
||||
image/x-kodak-kdc, kdc
|
||||
image/x-minolta-mrw, mrw
|
||||
image/x-pentax-pef, pef
|
||||
image/x-sigma-x3f, xf3
|
||||
image/x-sony-arw, arw
|
||||
image/x-sony-sr2, sr2
|
||||
image/x-sony-srf, srf
|
||||
image/x-epson-erf, erf
|
||||
sist2/sidecar, s2meta
|
||||
|
@@ -3,6 +3,7 @@ noparse = set()
|
||||
ext_in_hash = set()
|
||||
|
||||
major_mime = {
|
||||
"sist2": 0,
|
||||
"model": 1,
|
||||
"example": 2,
|
||||
"message": 3,
|
||||
@@ -18,7 +19,6 @@ major_mime = {
|
||||
|
||||
pdf = (
|
||||
"application/pdf",
|
||||
"application/x-cbz",
|
||||
"application/epub+zip",
|
||||
"application/vnd.ms-xpsdocument",
|
||||
)
|
||||
@@ -62,6 +62,40 @@ doc = (
|
||||
"application/vnd.openxmlformats-officedocument.presentationml.presentation"
|
||||
)
|
||||
|
||||
mobi = (
|
||||
"application/x-mobipocket-ebook",
|
||||
"application/vnd.amazon.mobi8-ebook"
|
||||
)
|
||||
|
||||
markup = (
|
||||
"text/xml",
|
||||
"text/html",
|
||||
"text/x-sgml"
|
||||
)
|
||||
|
||||
raw = (
|
||||
"image/x-olympus-orf",
|
||||
"image/x-nikon-nef",
|
||||
"image/x-fuji-raf",
|
||||
"image/x-panasonic-raw",
|
||||
"image/x-adobe-dng",
|
||||
"image/x-canon-cr2",
|
||||
"image/x-canon-crw",
|
||||
"image/x-dcraw",
|
||||
"image/x-kodak-dcr",
|
||||
"image/x-kodak-k25",
|
||||
"image/x-kodak-kdc",
|
||||
"image/x-minolta-mrw",
|
||||
"image/x-pentax-pef",
|
||||
"image/x-sigma-x3f",
|
||||
"image/x-sony-arw",
|
||||
"image/x-sony-sr2",
|
||||
"image/x-sony-srf",
|
||||
"image/x-minolta-mrw",
|
||||
"image/x-pentax-pef",
|
||||
"image/x-epson-erf",
|
||||
)
|
||||
|
||||
cnt = 1
|
||||
|
||||
|
||||
@@ -82,8 +116,18 @@ def mime_id(mime):
|
||||
mime_id += " | 0x08000000"
|
||||
elif mime in doc:
|
||||
mime_id += " | 0x04000000"
|
||||
elif mime in mobi:
|
||||
mime_id += " | 0x02000000"
|
||||
elif mime in markup:
|
||||
mime_id += " | 0x01000000"
|
||||
elif mime in raw:
|
||||
mime_id += " | 0x00800000"
|
||||
elif mime == "application/x-empty":
|
||||
cnt -= 1
|
||||
return "1"
|
||||
elif mime == "sist2/sidecar":
|
||||
cnt -= 1
|
||||
return "2"
|
||||
return mime_id
|
||||
|
||||
|
||||
@@ -91,7 +135,7 @@ def clean(t):
|
||||
return t.replace("/", "_").replace(".", "_").replace("+", "_").replace("-", "_")
|
||||
|
||||
|
||||
with open("mime.csv") as f:
|
||||
with open("scripts/mime.csv") as f:
|
||||
for l in f:
|
||||
mime, ext_list = l.split(",")
|
||||
if l.startswith("!"):
|
||||
@@ -103,7 +147,7 @@ with open("mime.csv") as f:
|
||||
print("// **Generated by mime.py**")
|
||||
print("#ifndef MIME_GENERATED_C")
|
||||
print("#define MIME_GENERATED_C")
|
||||
print("#include <glib-2.0/glib.h>\n")
|
||||
print("#include <glib.h>\n")
|
||||
print("#include <stdlib.h>\n")
|
||||
# Enum
|
||||
print("enum mime {")
|
||||
|
||||
6
scripts/reset.sh
Executable file
6
scripts/reset.sh
Executable file
@@ -0,0 +1,6 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
make clean
|
||||
rm -rf CMakeFiles/ CMakeCache.txt Makefile \
|
||||
third-party/libscan/CMakeFiles third-party/libscan/CMakeCache.txt third-party/libscan/third-party/ext_ffmpeg \
|
||||
third-party/libscan/third-party/ext_libmobi third-party/libscan/Makefile
|
||||
@@ -1,10 +1,12 @@
|
||||
files = [
|
||||
"web/css/bundle.css",
|
||||
"web/css/bundle_dark.css",
|
||||
"web/js/bundle.js",
|
||||
"web/img/sprite-skin-flat.png",
|
||||
"web/img/sprite-skin-flat-dark.png",
|
||||
"web/search.html",
|
||||
"src/static/css/bundle.css",
|
||||
"src/static/css/bundle_dark.css",
|
||||
"src/static/js/bundle.js",
|
||||
"src/static/js/search.js",
|
||||
"src/static/img/sprite-skin-flat.png",
|
||||
"src/static/img/sprite-skin-flat-dark.png",
|
||||
"src/static/search.html",
|
||||
"src/static/stats.html",
|
||||
]
|
||||
|
||||
|
||||
|
||||
259
src/cli.c
259
src/cli.c
@@ -1,6 +1,5 @@
|
||||
#include "cli.h"
|
||||
#include "ctx.h"
|
||||
|
||||
#include <tesseract/capi.h>
|
||||
|
||||
#define DEFAULT_OUTPUT "index.sist2/"
|
||||
@@ -10,10 +9,20 @@
|
||||
#define DEFAULT_REWRITE_URL ""
|
||||
|
||||
#define DEFAULT_ES_URL "http://localhost:9200"
|
||||
#define DEFAULT_ES_INDEX "sist2"
|
||||
#define DEFAULT_BATCH_SIZE 100
|
||||
|
||||
#define DEFAULT_BIND_ADDR "localhost"
|
||||
#define DEFAULT_PORT "4090"
|
||||
#define DEFAULT_LISTEN_ADDRESS "localhost:4090"
|
||||
#define DEFAULT_TREEMAP_THRESHOLD 0.0005
|
||||
|
||||
#define DEFAULT_MAX_MEM_BUFFER 2000
|
||||
|
||||
const char *TESS_DATAPATHS[] = {
|
||||
"/usr/share/tessdata/",
|
||||
"/usr/share/tesseract-ocr/tessdata/",
|
||||
"./",
|
||||
NULL
|
||||
};
|
||||
|
||||
|
||||
scan_args_t *scan_args_create() {
|
||||
@@ -24,10 +33,18 @@ scan_args_t *scan_args_create() {
|
||||
return args;
|
||||
}
|
||||
|
||||
exec_args_t *exec_args_create() {
|
||||
exec_args_t *args = calloc(sizeof(exec_args_t), 1);
|
||||
return args;
|
||||
}
|
||||
|
||||
void scan_args_destroy(scan_args_t *args) {
|
||||
if (args->name != NULL) {
|
||||
free(args->name);
|
||||
}
|
||||
if (args->incremental != NULL) {
|
||||
free(args->incremental);
|
||||
}
|
||||
if (args->path != NULL) {
|
||||
free(args->path);
|
||||
}
|
||||
@@ -39,6 +56,12 @@ void scan_args_destroy(scan_args_t *args) {
|
||||
|
||||
void index_args_destroy(index_args_t *args) {
|
||||
//todo
|
||||
if (args->es_mappings_path) {
|
||||
free(args->es_mappings);
|
||||
}
|
||||
if (args->es_settings_path) {
|
||||
free(args->es_settings);
|
||||
}
|
||||
free(args);
|
||||
}
|
||||
|
||||
@@ -47,6 +70,10 @@ void web_args_destroy(web_args_t *args) {
|
||||
free(args);
|
||||
}
|
||||
|
||||
void exec_args_destroy(exec_args_t *args) {
|
||||
free(args);
|
||||
}
|
||||
|
||||
int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
|
||||
if (argc < 2) {
|
||||
fprintf(stderr, "Required positional argument: PATH.\n");
|
||||
@@ -62,10 +89,10 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
|
||||
}
|
||||
|
||||
if (args->incremental != NULL) {
|
||||
abs_path = abspath(args->incremental);
|
||||
args->incremental = abspath(args->incremental);
|
||||
if (abs_path == NULL) {
|
||||
fprintf(stderr, "File not found: %s\n", args->incremental);
|
||||
return 1;
|
||||
sist_log("main.c", SIST_WARNING, "Could not open original index! Disabled incremental scan feature.");
|
||||
args->incremental = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -107,7 +134,7 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (args->depth < 0) {
|
||||
if (args->depth <= 0) {
|
||||
args->depth = G_MAXINT32;
|
||||
} else {
|
||||
args->depth += 1;
|
||||
@@ -115,6 +142,10 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
|
||||
|
||||
if (args->name == NULL) {
|
||||
args->name = g_path_get_basename(args->output);
|
||||
} else {
|
||||
char* tmp = malloc(strlen(args->name) + 1);
|
||||
strcpy(tmp, args->name);
|
||||
args->name = tmp;
|
||||
}
|
||||
|
||||
if (args->rewrite_url == NULL) {
|
||||
@@ -136,13 +167,53 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
|
||||
|
||||
if (args->tesseract_lang != NULL) {
|
||||
TessBaseAPI *api = TessBaseAPICreate();
|
||||
ret = TessBaseAPIInit3(api, TESS_DATAPATH, args->tesseract_lang);
|
||||
|
||||
char filename[128];
|
||||
sprintf(filename, "%s.traineddata", args->tesseract_lang);
|
||||
const char *path = find_file_in_paths(TESS_DATAPATHS, filename);
|
||||
if (path == NULL) {
|
||||
LOG_FATAL("cli.c", "Could not find tesseract language file!");
|
||||
}
|
||||
|
||||
ret = TessBaseAPIInit3(api, path, args->tesseract_lang);
|
||||
if (ret != 0) {
|
||||
fprintf(stderr, "Could not initialize tesseract with lang '%s'\n", args->tesseract_lang);
|
||||
return 1;
|
||||
}
|
||||
TessBaseAPIEnd(api);
|
||||
TessBaseAPIDelete(api);
|
||||
|
||||
args->tesseract_path = path;
|
||||
}
|
||||
|
||||
if (args->exclude_regex != NULL) {
|
||||
const char *error;
|
||||
int error_offset;
|
||||
|
||||
pcre *re = pcre_compile(args->exclude_regex, 0, &error, &error_offset, 0);
|
||||
if (error != NULL) {
|
||||
LOG_FATALF("cli.c", "pcre_compile returned error: %s (offset:%d)", error, error_offset)
|
||||
}
|
||||
|
||||
pcre_extra *re_extra = pcre_study(re, 0, &error);
|
||||
if (error != NULL) {
|
||||
LOG_FATALF("cli.c", "pcre_study returned error: %s", error)
|
||||
}
|
||||
|
||||
ScanCtx.exclude = re;
|
||||
ScanCtx.exclude_extra = re_extra;
|
||||
} else {
|
||||
ScanCtx.exclude = NULL;
|
||||
}
|
||||
|
||||
if (args->treemap_threshold_str == 0) {
|
||||
args->treemap_threshold = DEFAULT_TREEMAP_THRESHOLD;
|
||||
} else {
|
||||
args->treemap_threshold = atof(args->treemap_threshold_str);
|
||||
}
|
||||
|
||||
if (args->max_memory_buffer == 0) {
|
||||
args->max_memory_buffer = DEFAULT_MAX_MEM_BUFFER;
|
||||
}
|
||||
|
||||
LOG_DEBUGF("cli.c", "arg quality=%f", args->quality)
|
||||
@@ -156,7 +227,41 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
|
||||
LOG_DEBUGF("cli.c", "arg depth=%d", args->depth)
|
||||
LOG_DEBUGF("cli.c", "arg path=%s", args->path)
|
||||
LOG_DEBUGF("cli.c", "arg archive=%s", args->archive)
|
||||
LOG_DEBUGF("cli.c", "arg ocr=%s", args->tesseract_lang)
|
||||
LOG_DEBUGF("cli.c", "arg archive_passphrase=%s", args->archive_passphrase)
|
||||
LOG_DEBUGF("cli.c", "arg tesseract_lang=%s", args->tesseract_lang)
|
||||
LOG_DEBUGF("cli.c", "arg tesseract_path=%s", args->tesseract_path)
|
||||
LOG_DEBUGF("cli.c", "arg exclude=%s", args->exclude_regex)
|
||||
LOG_DEBUGF("cli.c", "arg fast=%d", args->fast)
|
||||
LOG_DEBUGF("cli.c", "arg treemap_threshold=%f", args->treemap_threshold)
|
||||
LOG_DEBUGF("cli.c", "arg max_memory_buffer=%d", args->max_memory_buffer)
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int load_external_file(const char *file_path, char **dst) {
|
||||
struct stat info;
|
||||
int res = stat(file_path, &info);
|
||||
|
||||
if (res == -1) {
|
||||
LOG_ERRORF("cli.c", "Error opening file '%s': %s\n", file_path, strerror(errno))
|
||||
return 1;
|
||||
}
|
||||
|
||||
int fd = open(file_path, O_RDONLY);
|
||||
if (fd == -1) {
|
||||
LOG_ERRORF("cli.c", "Error opening file '%s': %s\n", file_path, strerror(errno))
|
||||
return 1;
|
||||
}
|
||||
|
||||
*dst = malloc(info.st_size + 1);
|
||||
res = read(fd, *dst, info.st_size);
|
||||
if (res < 0) {
|
||||
LOG_ERRORF("cli.c", "Error reading file '%s': %s\n", file_path, strerror(errno))
|
||||
return 1;
|
||||
}
|
||||
|
||||
*(*dst + info.st_size) = '\0';
|
||||
close(fd);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -170,6 +275,13 @@ int index_args_validate(index_args_t *args, int argc, const char **argv) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (args->threads == 0) {
|
||||
args->threads = 1;
|
||||
} else if (args->threads < 0) {
|
||||
fprintf(stderr, "Invalid threads: %d\n", args->threads);
|
||||
return 1;
|
||||
}
|
||||
|
||||
char *index_path = abspath(argv[1]);
|
||||
if (index_path == NULL) {
|
||||
fprintf(stderr, "File not found: %s\n", argv[1]);
|
||||
@@ -183,30 +295,26 @@ int index_args_validate(index_args_t *args, int argc, const char **argv) {
|
||||
args->es_url = DEFAULT_ES_URL;
|
||||
}
|
||||
|
||||
if (args->es_index == NULL) {
|
||||
args->es_index = DEFAULT_ES_INDEX;
|
||||
}
|
||||
|
||||
if (args->script_path != NULL) {
|
||||
struct stat info;
|
||||
int res = stat(args->script_path, &info);
|
||||
|
||||
if (res == -1) {
|
||||
fprintf(stderr, "Error opening script file '%s': %s\n", args->script_path, strerror(errno));
|
||||
if (load_external_file(args->script_path, &args->script) != 0) {
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
int fd = open(args->script_path, O_RDONLY);
|
||||
if (fd == -1) {
|
||||
fprintf(stderr, "Error opening script file '%s': %s\n", args->script_path, strerror(errno));
|
||||
if (args->es_settings_path != NULL) {
|
||||
if (load_external_file(args->es_settings_path, &args->es_settings) != 0) {
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
args->script = malloc(info.st_size + 1);
|
||||
res = read(fd, args->script, info.st_size);
|
||||
if (res == -1) {
|
||||
fprintf(stderr, "Error reading script file '%s': %s\n", args->script_path, strerror(errno));
|
||||
if (args->es_mappings_path != NULL) {
|
||||
if (load_external_file(args->es_mappings_path, &args->es_mappings) != 0) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
*(args->script + info.st_size) = '\0';
|
||||
close(fd);
|
||||
}
|
||||
|
||||
if (args->batch_size == 0) {
|
||||
@@ -214,10 +322,16 @@ int index_args_validate(index_args_t *args, int argc, const char **argv) {
|
||||
}
|
||||
|
||||
LOG_DEBUGF("cli.c", "arg es_url=%s", args->es_url)
|
||||
LOG_DEBUGF("cli.c", "arg es_index=%s", args->es_index)
|
||||
LOG_DEBUGF("cli.c", "arg index_path=%s", args->index_path)
|
||||
LOG_DEBUGF("cli.c", "arg script_path=%s", args->script_path)
|
||||
LOG_DEBUGF("cli.c", "arg async_script=%s", args->async_script)
|
||||
LOG_DEBUGF("cli.c", "arg script=%s", args->script)
|
||||
LOG_DEBUGF("cli.c", "arg print=%d", args->print)
|
||||
LOG_DEBUGF("cli.c", "arg es_mappings_path=%s", args->es_mappings_path)
|
||||
LOG_DEBUGF("cli.c", "arg es_mappings=%s", args->es_mappings)
|
||||
LOG_DEBUGF("cli.c", "arg es_settings_path=%s", args->es_settings_path)
|
||||
LOG_DEBUGF("cli.c", "arg es_settings=%s", args->es_settings)
|
||||
LOG_DEBUGF("cli.c", "arg batch_size=%d", args->batch_size)
|
||||
LOG_DEBUGF("cli.c", "arg force_reset=%d", args->force_reset)
|
||||
|
||||
@@ -237,18 +351,57 @@ int web_args_validate(web_args_t *args, int argc, const char **argv) {
|
||||
args->es_url = DEFAULT_ES_URL;
|
||||
}
|
||||
|
||||
if (args->bind == NULL) {
|
||||
args->bind = DEFAULT_BIND_ADDR;
|
||||
if (args->listen_address == NULL) {
|
||||
args->listen_address = DEFAULT_LISTEN_ADDRESS;
|
||||
}
|
||||
|
||||
if (args->port == NULL) {
|
||||
args->port = DEFAULT_PORT;
|
||||
if (args->es_index == NULL) {
|
||||
args->es_index = DEFAULT_ES_INDEX;
|
||||
}
|
||||
|
||||
if (args->credentials != NULL) {
|
||||
args->b64credentials = onion_base64_encode(args->credentials, (int) strlen(args->credentials));
|
||||
//Remove trailing newline
|
||||
*(args->b64credentials + strlen(args->b64credentials) - 1) = '\0';
|
||||
char *ptr = strstr(args->credentials, ":");
|
||||
if (ptr == NULL) {
|
||||
fprintf(stderr, "Invalid --auth format, see usage\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
strncpy(args->auth_user, args->credentials, (ptr - args->credentials));
|
||||
strcpy(args->auth_pass, ptr + 1);
|
||||
|
||||
if (strlen(args->auth_user) == 0) {
|
||||
fprintf(stderr, "--auth username must be at least one character long");
|
||||
return 1;
|
||||
}
|
||||
|
||||
args->auth_enabled = TRUE;
|
||||
} else {
|
||||
args->auth_enabled = FALSE;
|
||||
}
|
||||
|
||||
if (args->tag_credentials != NULL && args->credentials != NULL) {
|
||||
fprintf(stderr, "--auth and --tag-auth are mutually exclusive");
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (args->tag_credentials != NULL) {
|
||||
char *ptr = strstr(args->tag_credentials, ":");
|
||||
if (ptr == NULL) {
|
||||
fprintf(stderr, "Invalid --tag-auth format, see usage\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
strncpy(args->auth_user, args->tag_credentials, (ptr - args->tag_credentials));
|
||||
strcpy(args->auth_pass, ptr + 1);
|
||||
|
||||
if (strlen(args->auth_user) == 0) {
|
||||
fprintf(stderr, "--tag-auth username must be at least one character long");
|
||||
return 1;
|
||||
}
|
||||
|
||||
args->tag_auth_enabled = TRUE;
|
||||
} else {
|
||||
args->tag_auth_enabled = FALSE;
|
||||
}
|
||||
|
||||
args->index_count = argc - 1;
|
||||
@@ -263,10 +416,12 @@ int web_args_validate(web_args_t *args, int argc, const char **argv) {
|
||||
}
|
||||
|
||||
LOG_DEBUGF("cli.c", "arg es_url=%s", args->es_url)
|
||||
LOG_DEBUGF("cli.c", "arg bind=%s", args->bind)
|
||||
LOG_DEBUGF("cli.c", "arg port=%s", args->port)
|
||||
LOG_DEBUGF("cli.c", "arg es_index=%s", args->es_index)
|
||||
LOG_DEBUGF("cli.c", "arg listen=%s", args->listen_address)
|
||||
LOG_DEBUGF("cli.c", "arg credentials=%s", args->credentials)
|
||||
LOG_DEBUGF("cli.c", "arg b64credentials=%s", args->b64credentials)
|
||||
LOG_DEBUGF("cli.c", "arg tag_credentials=%s", args->tag_credentials)
|
||||
LOG_DEBUGF("cli.c", "arg auth_user=%s", args->auth_user)
|
||||
LOG_DEBUGF("cli.c", "arg auth_pass=%s", args->auth_pass)
|
||||
LOG_DEBUGF("cli.c", "arg index_count=%d", args->index_count)
|
||||
for (int i = 0; i < args->index_count; i++) {
|
||||
LOG_DEBUGF("cli.c", "arg indices[%d]=%s", i, args->indices[i])
|
||||
@@ -285,3 +440,39 @@ web_args_t *web_args_create() {
|
||||
return args;
|
||||
}
|
||||
|
||||
int exec_args_validate(exec_args_t *args, int argc, const char **argv) {
|
||||
|
||||
if (argc < 2) {
|
||||
fprintf(stderr, "Required positional argument: PATH.\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
char *index_path = abspath(argv[1]);
|
||||
if (index_path == NULL) {
|
||||
fprintf(stderr, "File not found: %s\n", argv[1]);
|
||||
return 1;
|
||||
} else {
|
||||
args->index_path = argv[1];
|
||||
free(index_path);
|
||||
}
|
||||
|
||||
if (args->es_url == NULL) {
|
||||
args->es_url = DEFAULT_ES_URL;
|
||||
}
|
||||
|
||||
if (args->es_index == NULL) {
|
||||
args->es_index = DEFAULT_ES_INDEX;
|
||||
}
|
||||
|
||||
if (args->script_path == NULL) {
|
||||
LOG_FATAL("cli.c", "--script-file argument is required");
|
||||
}
|
||||
|
||||
if (load_external_file(args->script_path, &args->script) != 0) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
LOG_DEBUGF("cli.c", "arg script_path=%s", args->script_path)
|
||||
LOG_DEBUGF("cli.c", "arg script=%s", args->script)
|
||||
return 0;
|
||||
}
|
||||
|
||||
47
src/cli.h
47
src/cli.h
@@ -3,6 +3,8 @@
|
||||
|
||||
#include "sist.h"
|
||||
|
||||
#include "libscan/arc/arc.h"
|
||||
|
||||
typedef struct scan_args {
|
||||
float quality;
|
||||
int size;
|
||||
@@ -16,40 +18,79 @@ typedef struct scan_args {
|
||||
char *path;
|
||||
char *archive;
|
||||
archive_mode_t archive_mode;
|
||||
char *archive_passphrase;
|
||||
char *tesseract_lang;
|
||||
const char *tesseract_path;
|
||||
char *exclude_regex;
|
||||
int fast;
|
||||
const char* treemap_threshold_str;
|
||||
double treemap_threshold;
|
||||
int max_memory_buffer;
|
||||
int read_subtitles;
|
||||
} scan_args_t;
|
||||
|
||||
scan_args_t *scan_args_create();
|
||||
|
||||
void scan_args_destroy(scan_args_t *args);
|
||||
|
||||
int scan_args_validate(scan_args_t *args, int argc, const char **argv);
|
||||
|
||||
typedef struct index_args {
|
||||
char *es_url;
|
||||
char *es_index;
|
||||
const char *index_path;
|
||||
const char *script_path;
|
||||
char *script;
|
||||
const char *es_settings_path;
|
||||
char *es_settings;
|
||||
const char *es_mappings_path;
|
||||
char *es_mappings;
|
||||
int print;
|
||||
int batch_size;
|
||||
int async_script;
|
||||
int force_reset;
|
||||
int threads;
|
||||
} index_args_t;
|
||||
|
||||
typedef struct web_args {
|
||||
char *es_url;
|
||||
char *bind;
|
||||
char *port;
|
||||
char *es_index;
|
||||
char *listen_address;
|
||||
char *credentials;
|
||||
char *b64credentials;
|
||||
char *tag_credentials;
|
||||
char auth_user[256];
|
||||
char auth_pass[256];
|
||||
int auth_enabled;
|
||||
int tag_auth_enabled;
|
||||
int index_count;
|
||||
const char **indices;
|
||||
} web_args_t;
|
||||
|
||||
typedef struct exec_args {
|
||||
char *es_url;
|
||||
char *es_index;
|
||||
const char *index_path;
|
||||
const char *script_path;
|
||||
int async_script;
|
||||
char *script;
|
||||
} exec_args_t;
|
||||
|
||||
index_args_t *index_args_create();
|
||||
|
||||
void index_args_destroy(index_args_t *args);
|
||||
|
||||
web_args_t *web_args_create();
|
||||
|
||||
void web_args_destroy(web_args_t *args);
|
||||
|
||||
int index_args_validate(index_args_t *args, int argc, const char **argv);
|
||||
|
||||
int web_args_validate(web_args_t *args, int argc, const char **argv);
|
||||
|
||||
exec_args_t *exec_args_create();
|
||||
|
||||
void exec_args_destroy(exec_args_t *args);
|
||||
|
||||
int exec_args_validate(exec_args_t *args, int argc, const char **argv);
|
||||
|
||||
#endif
|
||||
|
||||
6
src/ctx.c
Normal file
6
src/ctx.c
Normal file
@@ -0,0 +1,6 @@
|
||||
#include "ctx.h"
|
||||
|
||||
ScanCtx_t ScanCtx;
|
||||
WebCtx_t WebCtx;
|
||||
IndexCtx_t IndexCtx;
|
||||
LogCtx_t LogCtx;
|
||||
75
src/ctx.h
75
src/ctx.h
@@ -2,8 +2,24 @@
|
||||
#define SIST2_CTX_H
|
||||
|
||||
#include "sist.h"
|
||||
#include "tpool.h"
|
||||
#include "libscan/scan.h"
|
||||
#include "libscan/arc/arc.h"
|
||||
#include "libscan/comic/comic.h"
|
||||
#include "libscan/ebook/ebook.h"
|
||||
#include "libscan/font/font.h"
|
||||
#include "libscan/media/media.h"
|
||||
#include "libscan/ooxml/ooxml.h"
|
||||
#include "libscan/text/text.h"
|
||||
#include "libscan/mobi/scan_mobi.h"
|
||||
#include "libscan/raw/raw.h"
|
||||
#include "libscan/msdoc/msdoc.h"
|
||||
#include "src/io/store.h"
|
||||
|
||||
struct {
|
||||
#include <glib.h>
|
||||
#include <pcre.h>
|
||||
|
||||
typedef struct {
|
||||
struct index_t index;
|
||||
|
||||
GHashTable *mime_table;
|
||||
@@ -11,14 +27,8 @@ struct {
|
||||
|
||||
tpool_t *pool;
|
||||
|
||||
int tn_size;
|
||||
int threads;
|
||||
int content_size;
|
||||
float tn_qscale;
|
||||
int depth;
|
||||
archive_mode_t archive_mode;
|
||||
int verbose;
|
||||
int very_verbose;
|
||||
|
||||
size_t stat_tn_size;
|
||||
size_t stat_index_size;
|
||||
@@ -26,27 +36,56 @@ struct {
|
||||
GHashTable *original_table;
|
||||
GHashTable *copy_table;
|
||||
|
||||
pthread_mutex_t mupdf_mu;
|
||||
char * tesseract_lang;
|
||||
} ScanCtx;
|
||||
pcre *exclude;
|
||||
pcre_extra *exclude_extra;
|
||||
int fast;
|
||||
|
||||
struct {
|
||||
GHashTable *dbg_current_files;
|
||||
|
||||
scan_arc_ctx_t arc_ctx;
|
||||
scan_comic_ctx_t comic_ctx;
|
||||
scan_ebook_ctx_t ebook_ctx;
|
||||
scan_font_ctx_t font_ctx;
|
||||
scan_media_ctx_t media_ctx;
|
||||
scan_ooxml_ctx_t ooxml_ctx;
|
||||
scan_text_ctx_t text_ctx;
|
||||
scan_mobi_ctx_t mobi_ctx;
|
||||
scan_raw_ctx_t raw_ctx;
|
||||
scan_msdoc_ctx_t msdoc_ctx;
|
||||
} ScanCtx_t;
|
||||
|
||||
typedef struct {
|
||||
int verbose;
|
||||
int very_verbose;
|
||||
int no_color;
|
||||
} LogCtx;
|
||||
} LogCtx_t;
|
||||
|
||||
struct {
|
||||
typedef struct {
|
||||
char *es_url;
|
||||
char *es_index;
|
||||
int batch_size;
|
||||
} IndexCtx;
|
||||
tpool_t *pool;
|
||||
store_t *tag_store;
|
||||
GHashTable *tags;
|
||||
store_t *meta_store;
|
||||
GHashTable *meta;
|
||||
} IndexCtx_t;
|
||||
|
||||
struct {
|
||||
typedef struct {
|
||||
char *es_url;
|
||||
char *es_index;
|
||||
int index_count;
|
||||
char *b64credentials;
|
||||
struct index_t indices[16];
|
||||
} WebCtx;
|
||||
char *auth_user;
|
||||
char *auth_pass;
|
||||
int auth_enabled;
|
||||
int tag_auth_enabled;
|
||||
struct index_t indices[64];
|
||||
} WebCtx_t;
|
||||
|
||||
extern ScanCtx_t ScanCtx;
|
||||
extern WebCtx_t WebCtx;
|
||||
extern IndexCtx_t IndexCtx;
|
||||
extern LogCtx_t LogCtx;
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
@@ -1,11 +1,7 @@
|
||||
#include "elastic.h"
|
||||
#include "src/ctx.h"
|
||||
|
||||
#include <stdlib.h>
|
||||
#include "web.h"
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <cJSON/cJSON.h>
|
||||
|
||||
#include "static_generated.c"
|
||||
|
||||
@@ -13,19 +9,33 @@
|
||||
typedef struct es_indexer {
|
||||
int queued;
|
||||
char *es_url;
|
||||
char *es_index;
|
||||
es_bulk_line_t *line_head;
|
||||
es_bulk_line_t *line_tail;
|
||||
} es_indexer_t;
|
||||
|
||||
|
||||
static es_indexer_t *Indexer;
|
||||
static __thread es_indexer_t *Indexer;
|
||||
|
||||
void print_json(cJSON *document, const char uuid_str[UUID_STR_LEN]) {
|
||||
void delete_queue(int max);
|
||||
|
||||
void elastic_flush();
|
||||
|
||||
void elastic_cleanup() {
|
||||
elastic_flush();
|
||||
if (Indexer != NULL) {
|
||||
free(Indexer->es_index);
|
||||
free(Indexer->es_url);
|
||||
free(Indexer);
|
||||
}
|
||||
}
|
||||
|
||||
void print_json(cJSON *document, const char id_str[MD5_STR_LENGTH]) {
|
||||
|
||||
cJSON *line = cJSON_CreateObject();
|
||||
|
||||
cJSON_AddStringToObject(line, "_id", uuid_str);
|
||||
cJSON_AddStringToObject(line, "_index", "sist2");
|
||||
cJSON_AddStringToObject(line, "_id", id_str);
|
||||
cJSON_AddStringToObject(line, "_index", IndexCtx.es_index);
|
||||
cJSON_AddStringToObject(line, "_type", "_doc");
|
||||
cJSON_AddItemReferenceToObject(line, "_source", document);
|
||||
|
||||
@@ -37,23 +47,31 @@ void print_json(cJSON *document, const char uuid_str[UUID_STR_LEN]) {
|
||||
cJSON_Delete(line);
|
||||
}
|
||||
|
||||
void index_json(cJSON *document, const char uuid_str[UUID_STR_LEN]) {
|
||||
void index_json_func(void *arg) {
|
||||
es_bulk_line_t *line = arg;
|
||||
elastic_index_line(line);
|
||||
}
|
||||
|
||||
void index_json(cJSON *document, const char index_id_str[MD5_STR_LENGTH]) {
|
||||
char *json = cJSON_PrintUnformatted(document);
|
||||
|
||||
size_t json_len = strlen(json);
|
||||
es_bulk_line_t *bulk_line = malloc(sizeof(es_bulk_line_t) + json_len + 2);
|
||||
memcpy(bulk_line->line, json, json_len);
|
||||
memcpy(bulk_line->uuid_str, uuid_str, UUID_STR_LEN);
|
||||
memcpy(bulk_line->path_md5_str, index_id_str, MD5_STR_LENGTH);
|
||||
*(bulk_line->line + json_len) = '\n';
|
||||
*(bulk_line->line + json_len + 1) = '\0';
|
||||
bulk_line->next = NULL;
|
||||
|
||||
cJSON_free(json);
|
||||
elastic_index_line(bulk_line);
|
||||
tpool_add_work(IndexCtx.pool, index_json_func, bulk_line);
|
||||
}
|
||||
|
||||
void execute_update_script(const char *script, const char index_id[UUID_STR_LEN]) {
|
||||
void execute_update_script(const char *script, int async, const char index_id[MD5_STR_LENGTH]) {
|
||||
|
||||
if (Indexer == NULL) {
|
||||
Indexer = create_indexer(IndexCtx.es_url, IndexCtx.es_index);
|
||||
}
|
||||
|
||||
cJSON *body = cJSON_CreateObject();
|
||||
cJSON *script_obj = cJSON_AddObjectToObject(body, "script");
|
||||
@@ -64,12 +82,19 @@ void execute_update_script(const char *script, const char index_id[UUID_STR_LEN]
|
||||
cJSON *term_obj = cJSON_AddObjectToObject(query, "term");
|
||||
cJSON_AddStringToObject(term_obj, "index", index_id);
|
||||
|
||||
char * str = cJSON_Print(body);
|
||||
char *str = cJSON_Print(body);
|
||||
|
||||
char bulk_url[4096];
|
||||
snprintf(bulk_url, 4096, "%s/sist2/_update_by_query?pretty", Indexer->es_url);
|
||||
response_t *r = web_post(bulk_url, str, "Content-Type: application/json");
|
||||
LOG_INFOF("elastic.c", "Executed user script <%d>", r->status_code);
|
||||
if (async) {
|
||||
snprintf(bulk_url, sizeof(bulk_url), "%s/%s/_update_by_query?wait_for_completion=false", Indexer->es_url,
|
||||
Indexer->es_index);
|
||||
} else {
|
||||
snprintf(bulk_url, sizeof(bulk_url), "%s/%s/_update_by_query", Indexer->es_url, Indexer->es_index);
|
||||
}
|
||||
response_t *r = web_post(bulk_url, str);
|
||||
if (!async) {
|
||||
LOG_INFOF("elastic.c", "Executed user script <%d>", r->status_code);
|
||||
}
|
||||
cJSON *resp = cJSON_Parse(r->body);
|
||||
|
||||
cJSON_free(str);
|
||||
@@ -84,31 +109,39 @@ void execute_update_script(const char *script, const char index_id[UUID_STR_LEN]
|
||||
cJSON_free(error_str);
|
||||
}
|
||||
|
||||
if (async) {
|
||||
cJSON *task = cJSON_GetObjectItem(resp, "task");
|
||||
LOG_INFOF("elastic.c", "User script queued: %s/_tasks/%s", Indexer->es_url, task->valuestring);
|
||||
}
|
||||
|
||||
cJSON_Delete(resp);
|
||||
}
|
||||
|
||||
void elastic_flush() {
|
||||
|
||||
if (Indexer == NULL) {
|
||||
Indexer = create_indexer(IndexCtx.es_url);
|
||||
}
|
||||
|
||||
void *create_bulk_buffer(int max, int *count, size_t *buf_len) {
|
||||
es_bulk_line_t *line = Indexer->line_head;
|
||||
|
||||
int count = 0;
|
||||
*count = 0;
|
||||
|
||||
size_t buf_size = 0;
|
||||
size_t buf_cur = 0;
|
||||
char *buf = malloc(1);
|
||||
char *buf = malloc(8192);
|
||||
size_t buf_capacity = 8192;
|
||||
|
||||
while (line != NULL && *count < max) {
|
||||
char action_str[256];
|
||||
snprintf(
|
||||
action_str, sizeof(action_str),
|
||||
"{\"index\":{\"_id\":\"%s\",\"_type\":\"_doc\",\"_index\":\"%s\"}}\n",
|
||||
line->path_md5_str, Indexer->es_index
|
||||
);
|
||||
|
||||
while (line != NULL) {
|
||||
char action_str[512];
|
||||
snprintf(action_str, 512,
|
||||
"{\"index\":{\"_id\":\"%s\", \"_type\":\"_doc\", \"_index\":\"sist2\"}}\n", line->uuid_str);
|
||||
size_t action_str_len = strlen(action_str);
|
||||
|
||||
size_t line_len = strlen(line->line);
|
||||
buf = realloc(buf, buf_size + line_len + action_str_len);
|
||||
|
||||
while (buf_size + line_len + action_str_len > buf_capacity) {
|
||||
buf_capacity *= 2;
|
||||
buf = realloc(buf, buf_capacity);
|
||||
}
|
||||
|
||||
buf_size += line_len + action_str_len;
|
||||
|
||||
memcpy(buf + buf_cur, action_str, action_str_len);
|
||||
@@ -116,50 +149,147 @@ void elastic_flush() {
|
||||
memcpy(buf + buf_cur, line->line, line_len);
|
||||
buf_cur += line_len;
|
||||
|
||||
es_bulk_line_t *tmp = line;
|
||||
line = line->next;
|
||||
free(tmp);
|
||||
count++;
|
||||
}
|
||||
buf = realloc(buf, buf_size + 1);
|
||||
*(buf+buf_cur) = '\0';
|
||||
|
||||
Indexer->line_head = NULL;
|
||||
Indexer->line_tail = NULL;
|
||||
Indexer->queued = 0;
|
||||
|
||||
char bulk_url[4096];
|
||||
snprintf(bulk_url, 4096, "%s/sist2/_bulk", Indexer->es_url);
|
||||
response_t *r = web_post(bulk_url, buf, "Content-Type: application/x-ndjson");
|
||||
|
||||
if (r->status_code == 0) {
|
||||
LOG_FATALF("elastic.c", "Could not connect to %s, make sure that elasticsearch is running!\n", IndexCtx.es_url)
|
||||
(*count)++;
|
||||
}
|
||||
|
||||
LOG_INFOF("elastic.c", "Indexed %d documents (%zukB) <%d>", count, buf_cur / 1024, r->status_code);
|
||||
if (buf_size + 1 > buf_capacity) {
|
||||
buf = realloc(buf, buf_capacity + 1);
|
||||
}
|
||||
|
||||
cJSON *ret_json = cJSON_Parse(r->body);
|
||||
*(buf + buf_cur) = '\0';
|
||||
|
||||
*buf_len = buf_cur;
|
||||
return buf;
|
||||
}
|
||||
|
||||
void print_errors(response_t *r) {
|
||||
char *tmp = malloc(r->size + 1);
|
||||
memcpy(tmp, r->body, r->size);
|
||||
*(tmp + r->size) = '\0';
|
||||
|
||||
cJSON *ret_json = cJSON_Parse(tmp);
|
||||
if (cJSON_GetObjectItem(ret_json, "errors")->valueint != 0) {
|
||||
cJSON *err;
|
||||
cJSON_ArrayForEach(err, cJSON_GetObjectItem(ret_json, "items")) {
|
||||
if (cJSON_GetObjectItem(cJSON_GetObjectItem(err, "index"), "status")->valueint != 201) {
|
||||
char* str = cJSON_Print(err);
|
||||
char *str = cJSON_Print(err);
|
||||
LOG_ERRORF("elastic.c", "%s\n", str);
|
||||
cJSON_free(str);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cJSON_Delete(ret_json);
|
||||
free(tmp);
|
||||
}
|
||||
|
||||
void print_error(response_t *r) {
|
||||
char *tmp = malloc(r->size + 1);
|
||||
memcpy(tmp, r->body, r->size);
|
||||
*(tmp + r->size) = '\0';
|
||||
|
||||
cJSON *ret_json = cJSON_Parse(tmp);
|
||||
if (cJSON_GetObjectItem(ret_json, "error") != NULL) {
|
||||
char *str = cJSON_Print(cJSON_GetObjectItem(ret_json, "error"));
|
||||
LOG_ERRORF("elastic.c", "%s\n", str);
|
||||
cJSON_free(str);
|
||||
}
|
||||
cJSON_Delete(ret_json);
|
||||
free(tmp);
|
||||
}
|
||||
|
||||
void _elastic_flush(int max) {
|
||||
|
||||
if (max == 0) {
|
||||
LOG_WARNING("elastic.c", "calling _elastic_flush with 0 in queue")
|
||||
return;
|
||||
}
|
||||
|
||||
size_t buf_len;
|
||||
int count;
|
||||
void *buf = create_bulk_buffer(max, &count, &buf_len);
|
||||
|
||||
char bulk_url[4096];
|
||||
snprintf(bulk_url, sizeof(bulk_url), "%s/%s/_bulk?pipeline=tie", Indexer->es_url, Indexer->es_index);
|
||||
response_t *r = web_post(bulk_url, buf);
|
||||
|
||||
if (r->status_code == 0) {
|
||||
LOG_FATALF("elastic.c", "Could not connect to %s, make sure that elasticsearch is running!\n", IndexCtx.es_url)
|
||||
}
|
||||
|
||||
if (r->status_code == 413) {
|
||||
|
||||
if (max <= 1) {
|
||||
LOG_ERRORF("elastic.c", "Single document too large, giving up: {%s}", Indexer->line_head->path_md5_str)
|
||||
free_response(r);
|
||||
free(buf);
|
||||
delete_queue(1);
|
||||
if (Indexer->queued != 0) {
|
||||
elastic_flush();
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
LOG_WARNINGF("elastic.c", "Payload too large, retrying (%d documents)", count);
|
||||
|
||||
free_response(r);
|
||||
free(buf);
|
||||
_elastic_flush(max / 2);
|
||||
return;
|
||||
|
||||
} else if (r->status_code == 429) {
|
||||
|
||||
free_response(r);
|
||||
free(buf);
|
||||
LOG_WARNING("elastic.c", "Got 429 status, will retry after delay")
|
||||
usleep(1000000 * 20);
|
||||
_elastic_flush(max);
|
||||
return;
|
||||
|
||||
} else if (r->status_code != 200) {
|
||||
print_errors(r);
|
||||
delete_queue(Indexer->queued);
|
||||
|
||||
} else {
|
||||
|
||||
print_errors(r);
|
||||
LOG_INFOF("elastic.c", "Indexed %d documents (%zukB) <%d>", count, buf_len / 1024, r->status_code);
|
||||
delete_queue(max);
|
||||
|
||||
if (Indexer->queued != 0) {
|
||||
elastic_flush();
|
||||
}
|
||||
}
|
||||
|
||||
free_response(r);
|
||||
free(buf);
|
||||
}
|
||||
|
||||
void delete_queue(int max) {
|
||||
for (int i = 0; i < max; i++) {
|
||||
es_bulk_line_t *tmp = Indexer->line_head;
|
||||
Indexer->line_head = tmp->next;
|
||||
if (Indexer->line_head == NULL) {
|
||||
Indexer->line_tail = NULL;
|
||||
}
|
||||
free(tmp);
|
||||
Indexer->queued -= 1;
|
||||
}
|
||||
}
|
||||
|
||||
void elastic_flush() {
|
||||
|
||||
if (Indexer == NULL) {
|
||||
Indexer = create_indexer(IndexCtx.es_url, IndexCtx.es_index);
|
||||
}
|
||||
|
||||
_elastic_flush(Indexer->queued);
|
||||
}
|
||||
|
||||
void elastic_index_line(es_bulk_line_t *line) {
|
||||
|
||||
if (Indexer == NULL) {
|
||||
Indexer = create_indexer(IndexCtx.es_url);
|
||||
Indexer = create_indexer(IndexCtx.es_url, IndexCtx.es_index);
|
||||
}
|
||||
|
||||
if (Indexer->line_head == NULL) {
|
||||
@@ -177,14 +307,18 @@ void elastic_index_line(es_bulk_line_t *line) {
|
||||
}
|
||||
}
|
||||
|
||||
es_indexer_t *create_indexer(const char *url) {
|
||||
es_indexer_t *create_indexer(const char *url, const char *index) {
|
||||
|
||||
char *es_url = malloc(strlen(url) + 1);
|
||||
strcpy(es_url, url);
|
||||
|
||||
char *es_index = malloc(strlen(index) + 1);
|
||||
strcpy(es_index, index);
|
||||
|
||||
es_indexer_t *indexer = malloc(sizeof(es_indexer_t));
|
||||
|
||||
indexer->es_url = es_url;
|
||||
indexer->es_index = es_index;
|
||||
indexer->queued = 0;
|
||||
indexer->line_head = NULL;
|
||||
indexer->line_tail = NULL;
|
||||
@@ -192,41 +326,42 @@ es_indexer_t *create_indexer(const char *url) {
|
||||
return indexer;
|
||||
}
|
||||
|
||||
void destroy_indexer(char * script, char index_id[UUID_STR_LEN]) {
|
||||
void finish_indexer(char *script, int async_script, char *index_id) {
|
||||
|
||||
char url[4096];
|
||||
|
||||
snprintf(url, sizeof(url), "%s/sist2/_refresh", IndexCtx.es_url);
|
||||
response_t *r = web_post(url, "", NULL);
|
||||
snprintf(url, sizeof(url), "%s/%s/_refresh", IndexCtx.es_url, IndexCtx.es_index);
|
||||
response_t *r = web_post(url, "");
|
||||
LOG_INFOF("elastic.c", "Refresh index <%d>", r->status_code);
|
||||
free_response(r);
|
||||
|
||||
if (script != NULL) {
|
||||
execute_update_script(script, index_id);
|
||||
execute_update_script(script, async_script, index_id);
|
||||
free(script);
|
||||
|
||||
snprintf(url, sizeof(url), "%s/%s/_refresh", IndexCtx.es_url, IndexCtx.es_index);
|
||||
r = web_post(url, "");
|
||||
LOG_INFOF("elastic.c", "Refresh index <%d>", r->status_code);
|
||||
free_response(r);
|
||||
}
|
||||
|
||||
snprintf(url, sizeof(url), "%s/sist2/_refresh", IndexCtx.es_url);
|
||||
r = web_post(url, "", NULL);
|
||||
LOG_INFOF("elastic.c", "Refresh index <%d>", r->status_code);
|
||||
free_response(r);
|
||||
|
||||
snprintf(url, sizeof(url), "%s/sist2/_forcemerge", IndexCtx.es_url);
|
||||
r = web_post(url, "", NULL);
|
||||
snprintf(url, sizeof(url), "%s/%s/_forcemerge", IndexCtx.es_url, IndexCtx.es_index);
|
||||
r = web_post(url, "");
|
||||
LOG_INFOF("elastic.c", "Merge index <%d>", r->status_code);
|
||||
free_response(r);
|
||||
|
||||
if (Indexer != NULL) {
|
||||
free(Indexer->es_url);
|
||||
free(Indexer);
|
||||
}
|
||||
snprintf(url, sizeof(url), "%s/%s/_settings", IndexCtx.es_url, IndexCtx.es_index);
|
||||
r = web_put(url, "{\"index\":{\"refresh_interval\":\"1s\"}}");
|
||||
LOG_INFOF("elastic.c", "Set refresh interval <%d>", r->status_code);
|
||||
free_response(r);
|
||||
}
|
||||
|
||||
void elastic_init(int force_reset) {
|
||||
void elastic_init(int force_reset, const char* user_mappings, const char* user_settings) {
|
||||
|
||||
// Check if index exists
|
||||
char url[4096];
|
||||
snprintf(url, 4096, "%s/sist2", IndexCtx.es_url);
|
||||
response_t *r = web_get(url);
|
||||
snprintf(url, sizeof(url), "%s/%s", IndexCtx.es_url, IndexCtx.es_index);
|
||||
response_t *r = web_get(url, 30);
|
||||
int index_exists = r->status_code == 200;
|
||||
free_response(r);
|
||||
|
||||
@@ -235,42 +370,86 @@ void elastic_init(int force_reset) {
|
||||
LOG_INFOF("elastic.c", "Delete index <%d>", r->status_code);
|
||||
free_response(r);
|
||||
|
||||
snprintf(url, 4096, "%s/sist2", IndexCtx.es_url);
|
||||
r = web_put(url, "", NULL);
|
||||
snprintf(url, sizeof(url), "%s/%s", IndexCtx.es_url, IndexCtx.es_index);
|
||||
r = web_put(url, "");
|
||||
|
||||
if (r->status_code != 200) {
|
||||
print_error(r);
|
||||
LOG_FATAL("elastic.c", "Could not create index")
|
||||
}
|
||||
|
||||
LOG_INFOF("elastic.c", "Create index <%d>", r->status_code);
|
||||
free_response(r);
|
||||
|
||||
snprintf(url, 4096, "%s/sist2/_close", IndexCtx.es_url);
|
||||
r = web_post(url, "", NULL);
|
||||
snprintf(url, sizeof(url), "%s/%s/_close", IndexCtx.es_url, IndexCtx.es_index);
|
||||
r = web_post(url, "");
|
||||
LOG_INFOF("elastic.c", "Close index <%d>", r->status_code);
|
||||
free_response(r);
|
||||
|
||||
snprintf(url, 4096, "%s/sist2/_settings", IndexCtx.es_url);
|
||||
r = web_put(url, settings_json, "Content-Type: application/json");
|
||||
LOG_INFOF("elastic.c", "Update settings <%d>", r->status_code);
|
||||
snprintf(url, sizeof(url), "%s/_ingest/pipeline/tie", IndexCtx.es_url);
|
||||
r = web_put(url, pipeline_json);
|
||||
LOG_INFOF("elastic.c", "Create pipeline <%d>", r->status_code);
|
||||
free_response(r);
|
||||
|
||||
snprintf(url, 4096, "%s/sist2/_mappings/_doc?include_type_name=true", IndexCtx.es_url);
|
||||
r = web_put(url, mappings_json, "Content-Type: application/json");
|
||||
LOG_INFOF("elastic.c", "Update mappings <%d>", r->status_code);
|
||||
snprintf(url, sizeof(url), "%s/%s/_settings", IndexCtx.es_url, IndexCtx.es_index);
|
||||
r = web_put(url, user_settings ? user_settings : settings_json);
|
||||
LOG_INFOF("elastic.c", "Update user_settings <%d>", r->status_code);
|
||||
free_response(r);
|
||||
|
||||
snprintf(url, 4096, "%s/sist2/_open", IndexCtx.es_url);
|
||||
r = web_post(url, "", NULL);
|
||||
snprintf(url, sizeof(url), "%s/%s/_mappings/_doc?include_type_name=true", IndexCtx.es_url, IndexCtx.es_index);
|
||||
r = web_put(url, user_mappings ? user_mappings : mappings_json);
|
||||
LOG_INFOF("elastic.c", "Update user_mappings <%d>", r->status_code);
|
||||
free_response(r);
|
||||
|
||||
snprintf(url, sizeof(url), "%s/%s/_open", IndexCtx.es_url, IndexCtx.es_index);
|
||||
r = web_post(url, "");
|
||||
LOG_INFOF("elastic.c", "Open index <%d>", r->status_code);
|
||||
free_response(r);
|
||||
}
|
||||
}
|
||||
|
||||
cJSON *elastic_get_document(const char *uuid_str) {
|
||||
cJSON *elastic_get_document(const char *id_str) {
|
||||
char url[4096];
|
||||
snprintf(url, 4096, "%s/sist2/_doc/%s", WebCtx.es_url, uuid_str);
|
||||
snprintf(url, sizeof(url), "%s/%s/_doc/%s", WebCtx.es_url, WebCtx.es_index, id_str);
|
||||
|
||||
response_t *r = web_get(url);
|
||||
response_t *r = web_get(url, 3);
|
||||
cJSON *json = NULL;
|
||||
if (r->status_code == 200) {
|
||||
json = cJSON_Parse(r->body);
|
||||
char *tmp = malloc(r->size + 1);
|
||||
memcpy(tmp, r->body, r->size);
|
||||
*(tmp + r->size) = '\0';
|
||||
json = cJSON_Parse(tmp);
|
||||
free(tmp);
|
||||
}
|
||||
free_response(r);
|
||||
return json;
|
||||
}
|
||||
|
||||
char *elastic_get_status() {
|
||||
char url[4096];
|
||||
snprintf(url, sizeof(url),
|
||||
"%s/_cluster/state/metadata/%s?filter_path=metadata.indices.*.state", WebCtx.es_url, WebCtx.es_index);
|
||||
|
||||
response_t *r = web_get(url, 30);
|
||||
cJSON *json = NULL;
|
||||
char *status = malloc(128 * sizeof(char));
|
||||
status[0] = '\0';
|
||||
|
||||
if (r->status_code == 200) {
|
||||
char *tmp = malloc(r->size + 1);
|
||||
memcpy(tmp, r->body, r->size);
|
||||
*(tmp + r->size) = '\0';
|
||||
json = cJSON_Parse(tmp);
|
||||
free(tmp);
|
||||
const cJSON *metadata = cJSON_GetObjectItem(json, "metadata");
|
||||
if (metadata != NULL) {
|
||||
const cJSON *indices = cJSON_GetObjectItem(metadata, "indices");
|
||||
const cJSON *index = cJSON_GetObjectItem(indices, WebCtx.es_index);
|
||||
const cJSON *state = cJSON_GetObjectItem(index, "state");
|
||||
strcpy(status, state->valuestring);
|
||||
}
|
||||
}
|
||||
free_response(r);
|
||||
cJSON_Delete(json);
|
||||
return status;
|
||||
}
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
|
||||
typedef struct es_bulk_line {
|
||||
struct es_bulk_line *next;
|
||||
char uuid_str[UUID_STR_LEN];
|
||||
char path_md5_str[MD5_STR_LENGTH];
|
||||
char line[0];
|
||||
} es_bulk_line_t;
|
||||
|
||||
@@ -16,18 +16,21 @@ typedef struct es_indexer es_indexer_t;
|
||||
|
||||
void elastic_index_line(es_bulk_line_t *line);
|
||||
|
||||
void elastic_flush();
|
||||
void print_json(cJSON *document, const char index_id_str[MD5_STR_LENGTH]);
|
||||
|
||||
void print_json(cJSON *document, const char uuid_str[UUID_STR_LEN]);
|
||||
void index_json(cJSON *document, const char index_id_str[MD5_STR_LENGTH]);
|
||||
|
||||
void index_json(cJSON *document, const char uuid_str[UUID_STR_LEN]);
|
||||
es_indexer_t *create_indexer(const char *url, const char *index);
|
||||
|
||||
es_indexer_t *create_indexer(const char* es_url);
|
||||
void elastic_cleanup();
|
||||
void finish_indexer(char *script, int async_script, char *index_id);
|
||||
|
||||
void destroy_indexer(char *script, char index_id[UUID_STR_LEN]);
|
||||
void elastic_init(int force_reset, const char* user_mappings, const char* user_settings);
|
||||
|
||||
void elastic_init(int force_reset);
|
||||
cJSON *elastic_get_document(const char *id_str);
|
||||
|
||||
cJSON *elastic_get_document(const char *uuid_str);
|
||||
char *elastic_get_status();
|
||||
|
||||
void execute_update_script(const char *script, int async, const char index_id[MD5_STR_LENGTH]);
|
||||
|
||||
#endif
|
||||
|
||||
File diff suppressed because one or more lines are too long
122
src/index/web.c
122
src/index/web.c
@@ -1,4 +1,11 @@
|
||||
#include "web.h"
|
||||
#include "src/sist.h"
|
||||
#include "src/ctx.h"
|
||||
|
||||
#include <mongoose.h>
|
||||
#include <pthread.h>
|
||||
#include <curl/curl.h>
|
||||
|
||||
|
||||
size_t write_cb(char *ptr, size_t size, size_t nmemb, void *user_data) {
|
||||
|
||||
@@ -9,11 +16,91 @@ size_t write_cb(char *ptr, size_t size, size_t nmemb, void *user_data) {
|
||||
}
|
||||
|
||||
void free_response(response_t *resp) {
|
||||
free(resp->body);
|
||||
if (resp->body != NULL) {
|
||||
free(resp->body);
|
||||
}
|
||||
free(resp);
|
||||
}
|
||||
|
||||
response_t *web_get(const char *url) {
|
||||
void web_post_async_poll(subreq_ctx_t* req) {
|
||||
fd_set fdread;
|
||||
fd_set fdwrite;
|
||||
fd_set fdexcep;
|
||||
int maxfd = -1;
|
||||
|
||||
FD_ZERO(&fdread);
|
||||
FD_ZERO(&fdwrite);
|
||||
FD_ZERO(&fdexcep);
|
||||
|
||||
CURLMcode mc = curl_multi_fdset(req->multi, &fdread, &fdwrite, &fdexcep, &maxfd);
|
||||
|
||||
if(mc != CURLM_OK) {
|
||||
req->done = TRUE;
|
||||
return;
|
||||
}
|
||||
|
||||
if (maxfd == -1) {
|
||||
// no fds ready yet
|
||||
return;
|
||||
}
|
||||
|
||||
struct timeval timeout = {1, 0};
|
||||
int rc = select(maxfd + 1, &fdread, &fdwrite, &fdexcep, &timeout);
|
||||
|
||||
switch(rc) {
|
||||
case -1:
|
||||
req->done = TRUE;
|
||||
break;
|
||||
case 0:
|
||||
break;
|
||||
default:
|
||||
curl_multi_perform(req->multi, &req->running_handles);
|
||||
break;
|
||||
}
|
||||
|
||||
if (req->running_handles == 0) {
|
||||
req->done = TRUE;
|
||||
req->response->body = req->response_buf.buf;
|
||||
req->response->size = req->response_buf.cur;
|
||||
curl_easy_getinfo(req->handle, CURLINFO_RESPONSE_CODE, &req->response->status_code);
|
||||
|
||||
curl_multi_cleanup(req->multi);
|
||||
curl_easy_cleanup(req->handle);
|
||||
curl_slist_free_all(req->headers);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
subreq_ctx_t *web_post_async(const char *url, char *data) {
|
||||
subreq_ctx_t *req = calloc(1, sizeof(subreq_ctx_t));
|
||||
req->response = calloc(1, sizeof(response_t));
|
||||
req->data = data;
|
||||
req->response_buf = dyn_buffer_create();
|
||||
|
||||
req->handle = curl_easy_init();
|
||||
CURL *curl = req->handle;
|
||||
curl_easy_setopt(curl, CURLOPT_URL, url);
|
||||
curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *) (&req->response_buf));
|
||||
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb);
|
||||
curl_easy_setopt(curl, CURLOPT_POST, 1);
|
||||
curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
|
||||
|
||||
struct curl_slist *headers = NULL;
|
||||
headers = curl_slist_append(headers, "Content-Type: application/json");
|
||||
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
|
||||
|
||||
curl_easy_setopt(curl, CURLOPT_POSTFIELDS, data);
|
||||
|
||||
req->multi = curl_multi_init();
|
||||
curl_multi_add_handle(req->multi, curl);
|
||||
curl_multi_perform(req->multi, &req->running_handles);
|
||||
|
||||
LOG_DEBUGF("web.c", "async request POST %s", url)
|
||||
|
||||
return req;
|
||||
}
|
||||
|
||||
response_t *web_get(const char *url, int timeout) {
|
||||
response_t *resp = malloc(sizeof(response_t));
|
||||
|
||||
CURL *curl;
|
||||
@@ -24,18 +111,24 @@ response_t *web_get(const char *url) {
|
||||
curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *) (&buffer));
|
||||
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb);
|
||||
curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
|
||||
curl_easy_setopt(curl, CURLOPT_TIMEOUT, timeout);
|
||||
|
||||
struct curl_slist *headers = NULL;
|
||||
headers = curl_slist_append(headers, "Content-Type: application/json");
|
||||
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
|
||||
|
||||
curl_easy_perform(curl);
|
||||
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &resp->status_code);
|
||||
|
||||
curl_easy_cleanup(curl);
|
||||
curl_slist_free_all(headers);
|
||||
|
||||
resp->body = buffer.buf;
|
||||
resp->size = buffer.cur;
|
||||
return resp;
|
||||
}
|
||||
|
||||
response_t *web_post(const char *url, const char *data, const char *header) {
|
||||
response_t *web_post(const char *url, const char *data) {
|
||||
|
||||
response_t *resp = malloc(sizeof(response_t));
|
||||
|
||||
@@ -50,10 +143,8 @@ response_t *web_post(const char *url, const char *data, const char *header) {
|
||||
curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
|
||||
|
||||
struct curl_slist *headers = NULL;
|
||||
if (header != NULL) {
|
||||
headers = curl_slist_append(headers, header);
|
||||
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
|
||||
}
|
||||
headers = curl_slist_append(headers, "Content-Type: application/json");
|
||||
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
|
||||
|
||||
curl_easy_setopt(curl, CURLOPT_POSTFIELDS, data);
|
||||
|
||||
@@ -70,7 +161,7 @@ response_t *web_post(const char *url, const char *data, const char *header) {
|
||||
}
|
||||
|
||||
|
||||
response_t *web_put(const char *url, const char *data, const char *header) {
|
||||
response_t *web_put(const char *url, const char *data) {
|
||||
|
||||
response_t *resp = malloc(sizeof(response_t));
|
||||
|
||||
@@ -86,11 +177,9 @@ response_t *web_put(const char *url, const char *data, const char *header) {
|
||||
curl_easy_setopt(curl, CURLOPT_DNS_USE_GLOBAL_CACHE, 0);
|
||||
curl_easy_setopt(curl, CURLOPT_IPRESOLVE, CURLOPT_DNS_LOCAL_IP4 );
|
||||
|
||||
if (header != NULL) {
|
||||
struct curl_slist *headers = NULL;
|
||||
headers = curl_slist_append(headers, header);
|
||||
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
|
||||
}
|
||||
struct curl_slist *headers = NULL;
|
||||
headers = curl_slist_append(headers, "Content-Type: application/json");
|
||||
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
|
||||
|
||||
curl_easy_setopt(curl, CURLOPT_POSTFIELDS, data);
|
||||
|
||||
@@ -98,6 +187,7 @@ response_t *web_put(const char *url, const char *data, const char *header) {
|
||||
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &resp->status_code);
|
||||
|
||||
curl_easy_cleanup(curl);
|
||||
curl_slist_free_all(headers);
|
||||
|
||||
resp->body = buffer.buf;
|
||||
resp->size = buffer.cur;
|
||||
@@ -119,13 +209,17 @@ response_t *web_delete(const char *url) {
|
||||
curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
|
||||
|
||||
curl_easy_setopt(curl, CURLOPT_POSTFIELDS, "");
|
||||
struct curl_slist *headers = NULL;
|
||||
headers = curl_slist_append(headers, "Content-Type: application/json");
|
||||
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
|
||||
|
||||
curl_easy_perform(curl);
|
||||
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &resp->status_code);
|
||||
|
||||
curl_easy_cleanup(curl);
|
||||
curl_slist_free_all(headers);
|
||||
|
||||
resp->body = buffer.buf;
|
||||
resp->size = buffer.cur;
|
||||
return resp;
|
||||
}
|
||||
}
|
||||
@@ -2,6 +2,8 @@
|
||||
#define SIST2_WEB_H
|
||||
|
||||
#include "src/sist.h"
|
||||
#include <mongoose.h>
|
||||
#include <curl/curl.h>
|
||||
|
||||
typedef struct response {
|
||||
char *body;
|
||||
@@ -9,9 +11,27 @@ typedef struct response {
|
||||
int status_code;
|
||||
} response_t;
|
||||
|
||||
response_t *web_get(const char *url);
|
||||
response_t *web_post(const char * url, const char * data, const char* header);
|
||||
response_t *web_put(const char *url, const char *data, const char *header);
|
||||
typedef struct {
|
||||
response_t *resp;
|
||||
int done;
|
||||
} http_ev_data_t;
|
||||
|
||||
typedef struct {
|
||||
char* data;
|
||||
dyn_buffer_t response_buf;
|
||||
struct curl_slist *headers;
|
||||
CURL *handle;
|
||||
CURLM *multi;
|
||||
response_t *response;
|
||||
int running_handles;
|
||||
int done;
|
||||
} subreq_ctx_t;
|
||||
|
||||
response_t *web_get(const char *url, int timeout);
|
||||
response_t *web_post(const char * url, const char * data);
|
||||
void web_post_async_poll(subreq_ctx_t* req);
|
||||
subreq_ctx_t *web_post_async(const char *url, char *data);
|
||||
response_t *web_put(const char *url, const char *data);
|
||||
response_t *web_delete(const char *url);
|
||||
|
||||
void free_response(response_t *resp);
|
||||
|
||||
@@ -1,21 +1,27 @@
|
||||
#include "src/ctx.h"
|
||||
#include "serialize.h"
|
||||
#include "src/parsing/parse.h"
|
||||
#include "src/parsing/mime.h"
|
||||
|
||||
static __thread int index_fd = -1;
|
||||
|
||||
typedef struct {
|
||||
unsigned char uuid[16];
|
||||
unsigned long ino;
|
||||
unsigned char path_md5[MD5_DIGEST_LENGTH];
|
||||
unsigned long size;
|
||||
unsigned int mime;
|
||||
int mtime;
|
||||
short base;
|
||||
short ext;
|
||||
char has_parent;
|
||||
} line_t;
|
||||
|
||||
#define META_NEXT 0xFFFF
|
||||
|
||||
void skip_meta(FILE *file) {
|
||||
enum metakey key = getc(file);
|
||||
while (key != '\n') {
|
||||
enum metakey key = 0;
|
||||
fread(&key, sizeof(uint16_t), 1, file);
|
||||
|
||||
while (key != META_NEXT) {
|
||||
if (IS_META_INT(key)) {
|
||||
fseek(file, sizeof(int), SEEK_CUR);
|
||||
} else if (IS_META_LONG(key)) {
|
||||
@@ -24,13 +30,13 @@ void skip_meta(FILE *file) {
|
||||
while ((getc(file))) {}
|
||||
}
|
||||
|
||||
key = getc(file);
|
||||
fread(&key, sizeof(uint16_t), 1, file);
|
||||
}
|
||||
}
|
||||
|
||||
void write_index_descriptor(char *path, index_descriptor_t *desc) {
|
||||
cJSON *json = cJSON_CreateObject();
|
||||
cJSON_AddStringToObject(json, "uuid", desc->uuid);
|
||||
cJSON_AddStringToObject(json, "id", desc->id);
|
||||
cJSON_AddStringToObject(json, "version", desc->version);
|
||||
cJSON_AddStringToObject(json, "root", desc->root);
|
||||
cJSON_AddStringToObject(json, "name", desc->name);
|
||||
@@ -39,11 +45,14 @@ void write_index_descriptor(char *path, index_descriptor_t *desc) {
|
||||
cJSON_AddNumberToObject(json, "timestamp", (double) desc->timestamp);
|
||||
|
||||
int fd = open(path, O_CREAT | O_WRONLY, S_IRUSR | S_IWUSR);
|
||||
if (fd == -1) {
|
||||
perror(path);
|
||||
if (fd < 0) {
|
||||
LOG_FATALF("serialize.c", "Could not open index descriptor: %s", strerror(errno));
|
||||
}
|
||||
char *str = cJSON_Print(json);
|
||||
write(fd, str, strlen(str));
|
||||
int ret = write(fd, str, strlen(str));
|
||||
if (ret == -1) {
|
||||
LOG_FATALF("serialize.c", "Could not write index descriptor: %s", strerror(errno));
|
||||
}
|
||||
free(str);
|
||||
close(fd);
|
||||
|
||||
@@ -57,11 +66,14 @@ index_descriptor_t read_index_descriptor(char *path) {
|
||||
int fd = open(path, O_RDONLY);
|
||||
|
||||
if (fd == -1) {
|
||||
LOG_FATAL("serialize.c", "Invalid/corrupt index (Could not find descriptor)\n")
|
||||
LOG_FATALF("serialize.c", "Invalid/corrupt index (Could not find descriptor): %s: %s\n", path, strerror(errno))
|
||||
}
|
||||
|
||||
char *buf = malloc(info.st_size + 1);
|
||||
read(fd, buf, info.st_size);
|
||||
size_t ret = read(fd, buf, info.st_size);
|
||||
if (ret == -1) {
|
||||
LOG_FATALF("serialize.c", "Could not read index descriptor: %s", strerror(errno));
|
||||
}
|
||||
*(buf + info.st_size) = '\0';
|
||||
close(fd);
|
||||
|
||||
@@ -74,7 +86,7 @@ index_descriptor_t read_index_descriptor(char *path) {
|
||||
strcpy(descriptor.rewrite_url, cJSON_GetObjectItem(json, "rewrite_url")->valuestring);
|
||||
descriptor.root_len = (short) strlen(descriptor.root);
|
||||
strcpy(descriptor.version, cJSON_GetObjectItem(json, "version")->valuestring);
|
||||
strcpy(descriptor.uuid, cJSON_GetObjectItem(json, "uuid")->valuestring);
|
||||
strcpy(descriptor.id, cJSON_GetObjectItem(json, "id")->valuestring);
|
||||
if (cJSON_GetObjectItem(json, "type") == NULL) {
|
||||
strcpy(descriptor.type, INDEX_TYPE_BIN);
|
||||
} else {
|
||||
@@ -118,8 +130,46 @@ char *get_meta_key_text(enum metakey meta_key) {
|
||||
return "font_name";
|
||||
case MetaParent:
|
||||
return "parent";
|
||||
case MetaExifMake:
|
||||
return "exif_make";
|
||||
case MetaExifSoftware:
|
||||
return "exif_software";
|
||||
case MetaExifExposureTime:
|
||||
return "exif_exposure_time";
|
||||
case MetaExifFNumber:
|
||||
return "exif_fnumber";
|
||||
case MetaExifFocalLength:
|
||||
return "exif_focal_length";
|
||||
case MetaExifUserComment:
|
||||
return "exif_user_comment";
|
||||
case MetaExifIsoSpeedRatings:
|
||||
return "exif_iso_speed_ratings";
|
||||
case MetaExifModel:
|
||||
return "exif_model";
|
||||
case MetaExifDateTime:
|
||||
return "exif_datetime";
|
||||
case MetaAuthor:
|
||||
return "author";
|
||||
case MetaModifiedBy:
|
||||
return "modified_by";
|
||||
case MetaThumbnail:
|
||||
return "thumbnail";
|
||||
case MetaPages:
|
||||
return "pages";
|
||||
case MetaExifGpsLongitudeRef:
|
||||
return "exif_gps_longitude_ref";
|
||||
case MetaExifGpsLongitudeDMS:
|
||||
return "exif_gps_longitude_dms";
|
||||
case MetaExifGpsLongitudeDec:
|
||||
return "exif_gps_longitude_dec";
|
||||
case MetaExifGpsLatitudeRef:
|
||||
return "exif_gps_latitude_ref";
|
||||
case MetaExifGpsLatitudeDMS:
|
||||
return "exif_gps_latitude_dms";
|
||||
case MetaExifGpsLatitudeDec:
|
||||
return "exif_gps_latitude_dec";
|
||||
default:
|
||||
return NULL;
|
||||
LOG_FATALF("serialize.c", "FIXME: Unknown meta key: %d", meta_key)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -140,8 +190,8 @@ void write_document(document_t *doc) {
|
||||
dyn_buffer_t buf = dyn_buffer_create();
|
||||
|
||||
// Ignore root directory in the file path
|
||||
doc->ext = doc->ext - ScanCtx.index.desc.root_len;
|
||||
doc->base = doc->base - ScanCtx.index.desc.root_len;
|
||||
doc->ext = (short) (doc->ext - ScanCtx.index.desc.root_len);
|
||||
doc->base = (short) (doc->base - ScanCtx.index.desc.root_len);
|
||||
doc->filepath += ScanCtx.index.desc.root_len;
|
||||
|
||||
dyn_buffer_write(&buf, doc, sizeof(line_t));
|
||||
@@ -149,25 +199,25 @@ void write_document(document_t *doc) {
|
||||
|
||||
meta_line_t *meta = doc->meta_head;
|
||||
while (meta != NULL) {
|
||||
dyn_buffer_write_char(&buf, meta->key);
|
||||
dyn_buffer_write_short(&buf, (uint16_t) meta->key);
|
||||
|
||||
if (IS_META_INT(meta->key)) {
|
||||
dyn_buffer_write_int(&buf, meta->intval);
|
||||
dyn_buffer_write_int(&buf, meta->int_val);
|
||||
} else if (IS_META_LONG(meta->key)) {
|
||||
dyn_buffer_write_long(&buf, meta->longval);
|
||||
dyn_buffer_write_long(&buf, meta->long_val);
|
||||
} else {
|
||||
dyn_buffer_write_str(&buf, meta->strval);
|
||||
dyn_buffer_write_str(&buf, meta->str_val);
|
||||
}
|
||||
|
||||
meta_line_t *tmp = meta;
|
||||
meta = meta->next;
|
||||
free(tmp);
|
||||
}
|
||||
dyn_buffer_write_char(&buf, '\n');
|
||||
dyn_buffer_write_short(&buf, META_NEXT);
|
||||
|
||||
int res = write(index_fd, buf.buf, buf.cur);
|
||||
if (res == -1) {
|
||||
perror("write");
|
||||
LOG_FATALF("serialize.c", "Could not write document: %s", strerror(errno))
|
||||
}
|
||||
ScanCtx.stat_index_size += buf.cur;
|
||||
dyn_buffer_destroy(&buf);
|
||||
@@ -175,6 +225,8 @@ void write_document(document_t *doc) {
|
||||
|
||||
void thread_cleanup() {
|
||||
close(index_fd);
|
||||
cleanup_parse();
|
||||
cleanup_font();
|
||||
}
|
||||
|
||||
|
||||
@@ -183,9 +235,9 @@ void read_index_bin(const char *path, const char *index_id, index_func func) {
|
||||
dyn_buffer_t buf = dyn_buffer_create();
|
||||
|
||||
FILE *file = fopen(path, "rb");
|
||||
while (1) {
|
||||
while (TRUE) {
|
||||
buf.cur = 0;
|
||||
fread((void *) &line, 1, sizeof(line_t), file);
|
||||
size_t _ = fread((void *) &line, sizeof(line_t), 1, file);
|
||||
if (feof(file)) {
|
||||
break;
|
||||
}
|
||||
@@ -193,14 +245,19 @@ void read_index_bin(const char *path, const char *index_id, index_func func) {
|
||||
cJSON *document = cJSON_CreateObject();
|
||||
cJSON_AddStringToObject(document, "index", index_id);
|
||||
|
||||
char uuid_str[UUID_STR_LEN];
|
||||
uuid_unparse(line.uuid, uuid_str);
|
||||
char path_md5_str[MD5_STR_LENGTH];
|
||||
buf2hex(line.path_md5, sizeof(line.path_md5), path_md5_str);
|
||||
|
||||
cJSON_AddStringToObject(document, "mime", mime_get_mime_text(line.mime));
|
||||
const char *mime_text = mime_get_mime_text(line.mime);
|
||||
if (mime_text == NULL) {
|
||||
cJSON_AddNullToObject(document, "mime");
|
||||
} else {
|
||||
cJSON_AddStringToObject(document, "mime", mime_get_mime_text(line.mime));
|
||||
}
|
||||
cJSON_AddNumberToObject(document, "size", (double) line.size);
|
||||
cJSON_AddNumberToObject(document, "mtime", line.mtime);
|
||||
|
||||
int c;
|
||||
int c = 0;
|
||||
while ((c = getc(file)) != 0) {
|
||||
dyn_buffer_write_char(&buf, (char) c);
|
||||
}
|
||||
@@ -212,42 +269,43 @@ void read_index_bin(const char *path, const char *index_id, index_func func) {
|
||||
} else {
|
||||
*(buf.buf + line.ext) = '\0';
|
||||
}
|
||||
cJSON_AddStringToObject(document, "name", buf.buf + line.base);
|
||||
|
||||
char tmp[PATH_MAX * 3];
|
||||
|
||||
str_escape(tmp, buf.buf + line.base);
|
||||
cJSON_AddStringToObject(document, "name", tmp);
|
||||
|
||||
if (line.base > 0) {
|
||||
*(buf.buf + line.base - 1) = '\0';
|
||||
cJSON_AddStringToObject(document, "path", buf.buf);
|
||||
|
||||
str_escape(tmp, buf.buf);
|
||||
cJSON_AddStringToObject(document, "path", tmp);
|
||||
} else {
|
||||
cJSON_AddStringToObject(document, "path", "");
|
||||
}
|
||||
|
||||
enum metakey key = getc(file);
|
||||
while (key != '\n') {
|
||||
enum metakey key = 0;
|
||||
fread(&key, sizeof(uint16_t), 1, file);
|
||||
size_t ret;
|
||||
while (key != META_NEXT) {
|
||||
switch (key) {
|
||||
case MetaPages:
|
||||
case MetaWidth:
|
||||
case MetaHeight: {
|
||||
int value;
|
||||
fread(&value, sizeof(int), 1, file);
|
||||
ret = fread(&value, sizeof(int), 1, file);
|
||||
cJSON_AddNumberToObject(document, get_meta_key_text(key), value);
|
||||
break;
|
||||
}
|
||||
case MetaMediaDuration:
|
||||
case MetaMediaBitrate: {
|
||||
long value;
|
||||
fread(&value, sizeof(long), 1, file);
|
||||
ret = fread(&value, sizeof(long), 1, file);
|
||||
cJSON_AddNumberToObject(document, get_meta_key_text(key), (double) value);
|
||||
break;
|
||||
}
|
||||
case MetaMediaAudioCodec:
|
||||
case MetaMediaVideoCodec: {
|
||||
int value;
|
||||
fread(&value, sizeof(int), 1, file);
|
||||
const AVCodecDescriptor *desc = avcodec_descriptor_get(value);
|
||||
if (desc != NULL) {
|
||||
cJSON_AddStringToObject(document, get_meta_key_text(key), desc->name);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case MetaMediaVideoCodec:
|
||||
case MetaContent:
|
||||
case MetaArtist:
|
||||
case MetaAlbum:
|
||||
@@ -255,6 +313,24 @@ void read_index_bin(const char *path, const char *index_id, index_func func) {
|
||||
case MetaGenre:
|
||||
case MetaFontName:
|
||||
case MetaParent:
|
||||
case MetaExifMake:
|
||||
case MetaExifSoftware:
|
||||
case MetaExifExposureTime:
|
||||
case MetaExifFNumber:
|
||||
case MetaExifFocalLength:
|
||||
case MetaExifUserComment:
|
||||
case MetaExifIsoSpeedRatings:
|
||||
case MetaExifDateTime:
|
||||
case MetaExifModel:
|
||||
case MetaAuthor:
|
||||
case MetaModifiedBy:
|
||||
case MetaThumbnail:
|
||||
case MetaExifGpsLongitudeDMS:
|
||||
case MetaExifGpsLongitudeDec:
|
||||
case MetaExifGpsLongitudeRef:
|
||||
case MetaExifGpsLatitudeDMS:
|
||||
case MetaExifGpsLatitudeDec:
|
||||
case MetaExifGpsLatitudeRef:
|
||||
case MetaTitle: {
|
||||
buf.cur = 0;
|
||||
while ((c = getc(file)) != 0) {
|
||||
@@ -270,11 +346,39 @@ void read_index_bin(const char *path, const char *index_id, index_func func) {
|
||||
LOG_FATALF("serialize.c", "Invalid meta key (corrupt index): %x", key)
|
||||
}
|
||||
|
||||
key = getc(file);
|
||||
fread(&key, sizeof(uint16_t), 1, file);
|
||||
}
|
||||
|
||||
func(document, uuid_str);
|
||||
cJSON *meta_obj = NULL;
|
||||
if (IndexCtx.meta != NULL) {
|
||||
const char *meta_string = g_hash_table_lookup(IndexCtx.meta, path_md5_str);
|
||||
if (meta_string != NULL) {
|
||||
meta_obj = cJSON_Parse(meta_string);
|
||||
|
||||
cJSON *child;
|
||||
for (child = meta_obj->child; child != NULL; child = child->next) {
|
||||
char meta_key[4096];
|
||||
strcpy(meta_key, child->string);
|
||||
cJSON_DeleteItemFromObject(document, meta_key);
|
||||
cJSON_AddItemReferenceToObject(document, meta_key, child);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (IndexCtx.tags != NULL) {
|
||||
const char *tags_string = g_hash_table_lookup(IndexCtx.tags, path_md5_str);
|
||||
if (tags_string != NULL) {
|
||||
cJSON *tags_arr = cJSON_Parse(tags_string);
|
||||
cJSON_DeleteItemFromObject(document, "tag");
|
||||
cJSON_AddItemToObject(document, "tag", tags_arr);
|
||||
}
|
||||
}
|
||||
|
||||
func(document, path_md5_str);
|
||||
cJSON_Delete(document);
|
||||
if (meta_obj) {
|
||||
cJSON_Delete(meta_obj);
|
||||
}
|
||||
}
|
||||
dyn_buffer_destroy(&buf);
|
||||
fclose(file);
|
||||
@@ -298,11 +402,11 @@ const char *json_type_array_fields[] = {
|
||||
void read_index_json(const char *path, UNUSED(const char *index_id), index_func func) {
|
||||
|
||||
FILE *file = fopen(path, "r");
|
||||
while (1) {
|
||||
while (TRUE) {
|
||||
char *line = NULL;
|
||||
size_t len;
|
||||
size_t read = getline(&line, &len, file);
|
||||
if (read == -1) {
|
||||
if (read < 0) {
|
||||
if (line) {
|
||||
free(line);
|
||||
}
|
||||
@@ -318,7 +422,7 @@ void read_index_json(const char *path, UNUSED(const char *index_id), index_func
|
||||
}
|
||||
|
||||
cJSON *document = cJSON_CreateObject();
|
||||
const char *uuid_str = cJSON_GetObjectItem(input, "_id")->valuestring;
|
||||
const char *id_str = cJSON_GetObjectItem(input, "_id")->valuestring;
|
||||
|
||||
for (int i = 0; i < (sizeof(json_type_copy_fields) / sizeof(json_type_copy_fields[0])); i++) {
|
||||
cJSON *value = cJSON_GetObjectItem(input, json_type_copy_fields[i]);
|
||||
@@ -346,7 +450,7 @@ void read_index_json(const char *path, UNUSED(const char *index_id), index_func
|
||||
}
|
||||
}
|
||||
|
||||
func(document, uuid_str);
|
||||
func(document, id_str);
|
||||
cJSON_Delete(document);
|
||||
cJSON_Delete(input);
|
||||
|
||||
@@ -354,7 +458,7 @@ void read_index_json(const char *path, UNUSED(const char *index_id), index_func
|
||||
fclose(file);
|
||||
}
|
||||
|
||||
void read_index(const char *path, const char index_id[UUID_STR_LEN], const char *type, index_func func) {
|
||||
void read_index(const char *path, const char index_id[MD5_STR_LENGTH], const char *type, index_func func) {
|
||||
|
||||
if (strcmp(type, INDEX_TYPE_BIN) == 0) {
|
||||
read_index_bin(path, index_id, func);
|
||||
@@ -367,15 +471,17 @@ void incremental_read(GHashTable *table, const char *filepath) {
|
||||
FILE *file = fopen(filepath, "rb");
|
||||
line_t line;
|
||||
|
||||
LOG_DEBUGF("serialize.c", "Incremental read %s", filepath)
|
||||
|
||||
while (1) {
|
||||
fread((void *) &line, 1, sizeof(line_t), file);
|
||||
if (feof(file)) {
|
||||
size_t ret = fread((void *) &line, sizeof(line_t), 1, file);
|
||||
if (ret != 1 || feof(file)) {
|
||||
break;
|
||||
}
|
||||
|
||||
incremental_put(table, line.ino, line.mtime);
|
||||
incremental_put(table, line.path_md5, line.mtime);
|
||||
|
||||
while ((getc(file))) {}
|
||||
while ((getc(file)) != 0) {}
|
||||
skip_meta(file);
|
||||
}
|
||||
fclose(file);
|
||||
@@ -391,41 +497,55 @@ void incremental_copy(store_t *store, store_t *dst_store, const char *filepath,
|
||||
FILE *dst_file = fopen(dst_filepath, "ab");
|
||||
line_t line;
|
||||
|
||||
while (1) {
|
||||
fread((void *) &line, 1, sizeof(line_t), file);
|
||||
if (feof(file)) {
|
||||
LOG_DEBUGF("serialize.c", "Incremental copy %s", filepath)
|
||||
|
||||
while (TRUE) {
|
||||
size_t ret = fread((void *) &line, sizeof(line_t), 1, file);
|
||||
if (ret != 1 || feof(file)) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (incremental_get(copy_table, line.ino)) {
|
||||
// Assume that files with parents still exist.
|
||||
// One way to "fix" this would be to check if the parent is marked for copy but it would consistently
|
||||
// delete files with grandparents, which is a side-effect worse than having orphaned files
|
||||
if (line.has_parent || incremental_get(copy_table, line.path_md5)) {
|
||||
fwrite(&line, sizeof(line), 1, dst_file);
|
||||
|
||||
size_t buf_len;
|
||||
char *buf = store_read(store, (char *) line.uuid, 16, &buf_len);
|
||||
store_write(dst_store, (char *) line.uuid, 16, buf, buf_len);
|
||||
free(buf);
|
||||
|
||||
// Copy filepath
|
||||
char filepath_buf[PATH_MAX];
|
||||
char c;
|
||||
char *ptr = filepath_buf;
|
||||
while ((c = (char) getc(file))) {
|
||||
fwrite(&c, sizeof(c), 1, dst_file);
|
||||
*ptr++ = c;
|
||||
}
|
||||
fwrite("\0", sizeof(c), 1, dst_file);
|
||||
*ptr = '\0';
|
||||
fwrite(filepath_buf, (ptr - filepath_buf) + 1, 1, dst_file);
|
||||
|
||||
enum metakey key;
|
||||
// Copy tn store contents
|
||||
size_t buf_len;
|
||||
char path_md5[MD5_DIGEST_LENGTH];
|
||||
MD5((unsigned char *) filepath_buf, (ptr - filepath_buf), (unsigned char *) path_md5);
|
||||
char *buf = store_read(store, path_md5, sizeof(path_md5), &buf_len);
|
||||
if (buf_len != 0) {
|
||||
store_write(dst_store, path_md5, sizeof(path_md5), buf, buf_len);
|
||||
free(buf);
|
||||
}
|
||||
|
||||
enum metakey key = 0;
|
||||
while (1) {
|
||||
key = getc(file);
|
||||
if (key == '\n') {
|
||||
fread(&key, sizeof(uint16_t), 1, file);
|
||||
fwrite(&key, sizeof(uint16_t), 1, dst_file);
|
||||
if (key == META_NEXT) {
|
||||
break;
|
||||
}
|
||||
fwrite(&key, sizeof(char), 1, dst_file);
|
||||
|
||||
if (IS_META_INT(key)) {
|
||||
int val;
|
||||
fread(&val, sizeof(val), 1, file);
|
||||
ret = fread(&val, sizeof(val), 1, file);
|
||||
fwrite(&val, sizeof(val), 1, dst_file);
|
||||
} else if (IS_META_LONG(key)) {
|
||||
long val;
|
||||
fread(&val, sizeof(val), 1, file);
|
||||
ret = fread(&val, sizeof(val), 1, file);
|
||||
fwrite(&val, sizeof(val), 1, dst_file);
|
||||
} else {
|
||||
while ((c = (char) getc(file))) {
|
||||
@@ -435,8 +555,10 @@ void incremental_copy(store_t *store, store_t *dst_store, const char *filepath,
|
||||
}
|
||||
}
|
||||
} else {
|
||||
while ((getc(file))) {}
|
||||
skip_meta(file);
|
||||
}
|
||||
}
|
||||
fclose(file);
|
||||
fclose(dst_file);
|
||||
}
|
||||
|
||||
@@ -2,16 +2,19 @@
|
||||
#define SIST2_SERIALIZE_H
|
||||
|
||||
#include "src/sist.h"
|
||||
#include <sys/syscall.h>
|
||||
#include "store.h"
|
||||
|
||||
typedef void(*index_func)(cJSON *, const char[UUID_STR_LEN]);
|
||||
#include <sys/syscall.h>
|
||||
#include <glib.h>
|
||||
|
||||
typedef void(*index_func)(cJSON *, const char[MD5_STR_LENGTH]);
|
||||
|
||||
void incremental_copy(store_t *store, store_t *dst_store, const char *filepath,
|
||||
const char *dst_filepath, GHashTable *copy_table);
|
||||
|
||||
void write_document(document_t *doc);
|
||||
|
||||
void read_index(const char *path, const char[UUID_STR_LEN], const char *type, index_func);
|
||||
void read_index(const char *path, const char[MD5_STR_LENGTH], const char *type, index_func);
|
||||
|
||||
void incremental_read(GHashTable *table, const char *filepath);
|
||||
|
||||
|
||||
@@ -1,9 +1,11 @@
|
||||
#include "store.h"
|
||||
#include "src/ctx.h"
|
||||
|
||||
store_t *store_create(char *path) {
|
||||
store_t *store_create(char *path, size_t chunk_size) {
|
||||
|
||||
store_t *store = malloc(sizeof(struct store_t));
|
||||
#if (SIST_FAKE_STORE != 1)
|
||||
store->chunk_size = chunk_size;
|
||||
pthread_rwlock_init(&store->lock, NULL);
|
||||
|
||||
mdb_env_create(&store->env);
|
||||
@@ -15,11 +17,10 @@ store_t *store_create(char *path) {
|
||||
);
|
||||
|
||||
if (open_ret != 0) {
|
||||
fprintf(stderr, "Error while opening store: %s (%s)\n", mdb_strerror(open_ret), path);
|
||||
exit(1);
|
||||
LOG_FATALF("store.c", "Error while opening store: %s (%s)\n", mdb_strerror(open_ret), path)
|
||||
}
|
||||
|
||||
store->size = (size_t) 1024 * 1024 * 5;
|
||||
store->size = (size_t) store->chunk_size;
|
||||
ScanCtx.stat_tn_size = 0;
|
||||
mdb_env_set_mapsize(store->env, store->size);
|
||||
|
||||
@@ -28,26 +29,39 @@ store_t *store_create(char *path) {
|
||||
mdb_txn_begin(store->env, NULL, 0, &txn);
|
||||
mdb_dbi_open(txn, NULL, 0, &store->dbi);
|
||||
mdb_txn_commit(txn);
|
||||
#endif
|
||||
|
||||
return store;
|
||||
}
|
||||
|
||||
void store_destroy(store_t *store) {
|
||||
|
||||
#if (SIST_FAKE_STORE != 1)
|
||||
pthread_rwlock_destroy(&store->lock);
|
||||
mdb_close(store->env, store->dbi);
|
||||
mdb_env_close(store->env);
|
||||
#endif
|
||||
free(store);
|
||||
}
|
||||
|
||||
void store_flush(store_t *store) {
|
||||
mdb_env_sync(store->env, TRUE);
|
||||
}
|
||||
|
||||
void store_write(store_t *store, char *key, size_t key_len, char *buf, size_t buf_len) {
|
||||
|
||||
if (LogCtx.very_verbose) {
|
||||
char uuid_str[UUID_STR_LEN];
|
||||
uuid_unparse((unsigned char *) key, uuid_str);
|
||||
LOG_DEBUGF("store.c", "Store write {%s} %lu bytes", uuid_str, buf_len)
|
||||
if (key_len == MD5_DIGEST_LENGTH) {
|
||||
char path_md5_str[MD5_STR_LENGTH];
|
||||
buf2hex((unsigned char *) key, MD5_DIGEST_LENGTH, path_md5_str);
|
||||
LOG_DEBUGF("store.c", "Store write {%s} %lu bytes", path_md5_str, buf_len)
|
||||
} else {
|
||||
LOG_DEBUGF("store.c", "Store write {%s} %lu bytes", key, buf_len)
|
||||
}
|
||||
}
|
||||
|
||||
#if (SIST_FAKE_STORE != 1)
|
||||
|
||||
MDB_val mdb_key;
|
||||
mdb_key.mv_data = key;
|
||||
mdb_key.mv_size = key_len;
|
||||
@@ -70,7 +84,7 @@ void store_write(store_t *store, char *key, size_t key_len, char *buf, size_t bu
|
||||
// Cannot resize when there is a opened transaction.
|
||||
// Resize take effect on the next commit.
|
||||
pthread_rwlock_wrlock(&store->lock);
|
||||
store->size += 1024 * 1024 * 50;
|
||||
store->size += store->chunk_size;
|
||||
mdb_env_set_mapsize(store->env, store->size);
|
||||
mdb_txn_begin(store->env, NULL, 0, &txn);
|
||||
put_ret = mdb_put(txn, store->dbi, &mdb_key, &mdb_value, 0);
|
||||
@@ -82,12 +96,15 @@ void store_write(store_t *store, char *key, size_t key_len, char *buf, size_t bu
|
||||
pthread_rwlock_unlock(&store->lock);
|
||||
|
||||
if (put_ret != 0) {
|
||||
printf("%s\n", mdb_strerror(put_ret));
|
||||
LOG_ERROR("store.c", mdb_strerror(put_ret))
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
char *store_read(store_t *store, char *key, size_t key_len, size_t *ret_vallen) {
|
||||
char *buf = NULL;
|
||||
|
||||
#if (SIST_FAKE_STORE != 1)
|
||||
MDB_val mdb_key;
|
||||
mdb_key.mv_data = key;
|
||||
mdb_key.mv_size = key_len;
|
||||
@@ -108,6 +125,46 @@ char *store_read(store_t *store, char *key, size_t key_len, size_t *ret_vallen)
|
||||
}
|
||||
|
||||
mdb_txn_abort(txn);
|
||||
#endif
|
||||
return buf;
|
||||
}
|
||||
|
||||
GHashTable *store_read_all(store_t *store) {
|
||||
|
||||
int count = 0;
|
||||
|
||||
GHashTable *table = g_hash_table_new_full(g_str_hash, g_str_equal, free, free);
|
||||
|
||||
MDB_txn *txn = NULL;
|
||||
mdb_txn_begin(store->env, NULL, MDB_RDONLY, &txn);
|
||||
|
||||
MDB_cursor *cur = NULL;
|
||||
mdb_cursor_open(txn, store->dbi, &cur);
|
||||
|
||||
MDB_val key;
|
||||
MDB_val value;
|
||||
|
||||
while (mdb_cursor_get(cur, &key, &value, MDB_NEXT) == 0) {
|
||||
char *key_str = malloc(key.mv_size);
|
||||
memcpy(key_str, key.mv_data, key.mv_size);
|
||||
char *val_str = malloc(value.mv_size);
|
||||
memcpy(val_str, value.mv_data, value.mv_size);
|
||||
|
||||
g_hash_table_insert(table, key_str, val_str);
|
||||
count += 1;
|
||||
}
|
||||
|
||||
const char *path;
|
||||
mdb_env_get_path(store->env, &path);
|
||||
LOG_DEBUGF("store.c", "Read %d entries from %s", count, path);
|
||||
|
||||
mdb_cursor_close(cur);
|
||||
mdb_txn_abort(txn);
|
||||
return table;
|
||||
}
|
||||
|
||||
|
||||
void store_copy(store_t *store, const char *destination) {
|
||||
mkdir(destination, S_IWUSR | S_IRUSR | S_IXUSR);
|
||||
mdb_env_copy(store->env, destination);
|
||||
}
|
||||
|
||||
@@ -4,21 +4,32 @@
|
||||
#include <pthread.h>
|
||||
#include <lmdb.h>
|
||||
|
||||
#include <glib.h>
|
||||
|
||||
#define STORE_SIZE_TN 1024 * 1024 * 5
|
||||
#define STORE_SIZE_TAG 1024 * 16
|
||||
#define STORE_SIZE_META STORE_SIZE_TAG
|
||||
|
||||
typedef struct store_t {
|
||||
MDB_dbi dbi;
|
||||
MDB_env *env;
|
||||
size_t size;
|
||||
size_t chunk_size;
|
||||
pthread_rwlock_t lock;
|
||||
} store_t;
|
||||
|
||||
#include "src/sist.h"
|
||||
|
||||
store_t *store_create(char *path);
|
||||
store_t *store_create(char *path, size_t chunk_size);
|
||||
|
||||
void store_destroy(store_t *store);
|
||||
|
||||
void store_write(store_t *store, char *key, size_t key_len, char *buf, size_t buf_len);
|
||||
|
||||
void store_flush(store_t *store);
|
||||
|
||||
char *store_read(store_t *store, char *key, size_t key_len, size_t *ret_vallen);
|
||||
|
||||
GHashTable *store_read_all(store_t *store);
|
||||
|
||||
void store_copy(store_t *store, const char *destination);
|
||||
|
||||
#endif
|
||||
|
||||
@@ -1,5 +1,8 @@
|
||||
#include "walk.h"
|
||||
#include "src/ctx.h"
|
||||
#include "src/parsing/parse.h"
|
||||
|
||||
#include <ftw.h>
|
||||
|
||||
__always_inline
|
||||
parse_job_t *create_fs_parse_job(const char *filepath, const struct stat *info, int base) {
|
||||
@@ -15,12 +18,13 @@ parse_job_t *create_fs_parse_job(const char *filepath, const struct stat *info,
|
||||
job->ext = len;
|
||||
}
|
||||
|
||||
job->info = *info;
|
||||
job->vfile.info = *info;
|
||||
|
||||
memset(job->parent, 0, 16);
|
||||
memset(job->parent, 0, MD5_DIGEST_LENGTH);
|
||||
|
||||
job->vfile.filepath = job->filepath;
|
||||
job->vfile.read = fs_read;
|
||||
job->vfile.reset = fs_reset;
|
||||
job->vfile.close = fs_close;
|
||||
job->vfile.fd = -1;
|
||||
job->vfile.is_fs_file = TRUE;
|
||||
@@ -28,8 +32,18 @@ parse_job_t *create_fs_parse_job(const char *filepath, const struct stat *info,
|
||||
return job;
|
||||
}
|
||||
|
||||
int sub_strings[30];
|
||||
#define EXCLUDED(str) (pcre_exec(ScanCtx.exclude, ScanCtx.exclude_extra, filepath, strlen(filepath), 0, 0, sub_strings, sizeof(sub_strings)) >= 0)
|
||||
|
||||
int handle_entry(const char *filepath, const struct stat *info, int typeflag, struct FTW *ftw) {
|
||||
if (ftw->level <= ScanCtx.depth && typeflag == FTW_F && S_ISREG(info->st_mode)) {
|
||||
|
||||
if (typeflag == FTW_F && S_ISREG(info->st_mode) && ftw->level <= ScanCtx.depth) {
|
||||
|
||||
if (ScanCtx.exclude != NULL && EXCLUDED(filepath)) {
|
||||
LOG_DEBUGF("walk.c", "Excluded: %s", filepath)
|
||||
return 0;
|
||||
}
|
||||
|
||||
parse_job_t *job = create_fs_parse_job(filepath, info, ftw->base);
|
||||
tpool_add_work(ScanCtx.pool, parse, job);
|
||||
}
|
||||
|
||||
@@ -3,8 +3,6 @@
|
||||
|
||||
#define _XOPEN_SOURCE 500
|
||||
|
||||
#include "src/sist.h"
|
||||
|
||||
int walk_directory_tree(const char *);
|
||||
|
||||
#endif
|
||||
|
||||
38
src/log.c
38
src/log.c
@@ -1,15 +1,17 @@
|
||||
#include "log.h"
|
||||
|
||||
#include <pthread.h>
|
||||
#include <stdarg.h>
|
||||
|
||||
const char *log_colors[] = {
|
||||
"\033[34m", "\033[01;34m", "\033[0m",
|
||||
"\033[01;33m", "\033[31m", "\033[01;31m"
|
||||
"\033[34m", "\033[01;34m", "\033[01;33m", "\033[0m", "\033[31m", "\033[01;31m"
|
||||
};
|
||||
|
||||
const char *log_levels[] = {
|
||||
"DEBUG", "INFO", "WARNING", "ERROR", "FATAL"
|
||||
};
|
||||
|
||||
void sist_logf(char *filepath, int level, char *format, ...) {
|
||||
void vsist_logf(const char *filepath, int level, char *format, va_list ap) {
|
||||
|
||||
static int is_tty = -1;
|
||||
if (is_tty == -1) {
|
||||
@@ -31,23 +33,20 @@ void sist_logf(char *filepath, int level, char *format, ...) {
|
||||
if (is_tty) {
|
||||
log_len = snprintf(
|
||||
log_str, sizeof(log_str),
|
||||
"\033[%dm[%04X]%s [%s] [%s %s] ",
|
||||
"\033[%dm[%04llX]%s [%s] [%s %s] ",
|
||||
31 + ((unsigned int) (pid)) % 7, pid, log_colors[level],
|
||||
datetime, log_levels[level], filepath
|
||||
);
|
||||
} else {
|
||||
log_len = snprintf(
|
||||
log_str, sizeof(log_str),
|
||||
"[%04X] [%s] [%s %s] ",
|
||||
"[%04llX] [%s] [%s %s] ",
|
||||
pid, datetime, log_levels[level], filepath
|
||||
);
|
||||
}
|
||||
|
||||
va_list ap;
|
||||
va_start(ap, format);
|
||||
size_t maxsize = sizeof(log_str) - log_len;
|
||||
log_len += vsnprintf(log_str + log_len, maxsize, format, ap);
|
||||
va_end(ap);
|
||||
|
||||
if (is_tty) {
|
||||
log_len += sprintf(log_str + log_len, "\033[0m\n");
|
||||
@@ -56,10 +55,20 @@ void sist_logf(char *filepath, int level, char *format, ...) {
|
||||
log_len += 1;
|
||||
}
|
||||
|
||||
write(STDERR_FILENO, log_str, log_len);
|
||||
int ret = write(STDERR_FILENO, log_str, log_len);
|
||||
if (ret == -1) {
|
||||
LOG_FATALF("serialize.c", "Could not write index descriptor: %s", strerror(errno))
|
||||
}
|
||||
}
|
||||
|
||||
void sist_log(char *filepath, int level, char *str) {
|
||||
void sist_logf(const char *filepath, int level, char *format, ...) {
|
||||
va_list ap;
|
||||
va_start(ap, format);
|
||||
vsist_logf(filepath, level, format, ap);
|
||||
va_end(ap);
|
||||
}
|
||||
|
||||
void sist_log(const char *filepath, int level, char *str) {
|
||||
|
||||
static int is_tty = -1;
|
||||
if (is_tty == -1) {
|
||||
@@ -81,7 +90,7 @@ void sist_log(char *filepath, int level, char *str) {
|
||||
if (is_tty) {
|
||||
log_len = snprintf(
|
||||
log_str, sizeof(log_str),
|
||||
"\033[%dm[%04X]%s [%s] [%s %s] %s \033[0m\n",
|
||||
"\033[%dm[%04llX]%s [%s] [%s %s] %s \033[0m\n",
|
||||
31 + ((unsigned int) (pid)) % 7, pid, log_colors[level],
|
||||
datetime, log_levels[level], filepath,
|
||||
str
|
||||
@@ -89,11 +98,14 @@ void sist_log(char *filepath, int level, char *str) {
|
||||
} else {
|
||||
log_len = snprintf(
|
||||
log_str, sizeof(log_str),
|
||||
"[%04X] [%s] [%s %s] %s \n",
|
||||
"[%04llX] [%s] [%s %s] %s \n",
|
||||
pid, datetime, log_levels[level], filepath,
|
||||
str
|
||||
);
|
||||
}
|
||||
|
||||
write(STDERR_FILENO, log_str, log_len);
|
||||
int ret = write(STDERR_FILENO, log_str, log_len);
|
||||
if (ret == -1) {
|
||||
LOG_FATALF("serialize.c", "Could not write index descriptor: %s", strerror(errno));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
#ifndef SIST2_LOG_H
|
||||
#define SIST2_LOG_H
|
||||
|
||||
|
||||
#define LOG_MAX_LENGTH 8192
|
||||
|
||||
#define SIST_DEBUG 0
|
||||
@@ -36,10 +37,11 @@
|
||||
sist_log(filepath, SIST_FATAL, str);\
|
||||
exit(-1);
|
||||
|
||||
#include "src/sist.h"
|
||||
#include "sist.h"
|
||||
|
||||
void sist_logf(char *filepath, int level, char *format, ...);
|
||||
void sist_logf(const char *filepath, int level, char *format, ...);
|
||||
void vsist_logf(const char *filepath, int level, char *format, va_list ap);
|
||||
|
||||
void sist_log(char *filepath, int level, char *str);
|
||||
void sist_log(const char *filepath, int level, char *str);
|
||||
|
||||
#endif
|
||||
|
||||
424
src/main.c
424
src/main.c
@@ -1,32 +1,106 @@
|
||||
#include "sist.h"
|
||||
#include "ctx.h"
|
||||
|
||||
#include <third-party/argparse/argparse.h>
|
||||
#include <locale.h>
|
||||
|
||||
#include "cli.h"
|
||||
#include "io/serialize.h"
|
||||
#include "io/store.h"
|
||||
#include "tpool.h"
|
||||
#include "io/walk.h"
|
||||
#include "index/elastic.h"
|
||||
#include "web/serve.h"
|
||||
#include "parsing/mime.h"
|
||||
#include "parsing/parse.h"
|
||||
|
||||
#include "stats.h"
|
||||
|
||||
#define DESCRIPTION "Lightning-fast file system indexer and search tool."
|
||||
|
||||
#define EPILOG "Made by simon987 <me@simon987.net>. Released under GPL-3.0"
|
||||
|
||||
|
||||
static const char *const Version = "1.2.2";
|
||||
static const char *const Version = "2.10.1";
|
||||
static const char *const usage[] = {
|
||||
"sist2 scan [OPTION]... PATH",
|
||||
"sist2 index [OPTION]... INDEX",
|
||||
"sist2 web [OPTION]... INDEX...",
|
||||
"sist2 exec-script [OPTION]... INDEX",
|
||||
NULL,
|
||||
};
|
||||
|
||||
void global_init() {
|
||||
curl_global_init(CURL_GLOBAL_NOTHING);
|
||||
av_log_set_level(AV_LOG_QUIET);
|
||||
opcInitLibrary();
|
||||
#include<signal.h>
|
||||
#include<unistd.h>
|
||||
|
||||
static __sighandler_t sigsegv_handler = NULL;
|
||||
static __sighandler_t sigabrt_handler = NULL;
|
||||
|
||||
void sig_handler(int signum) {
|
||||
|
||||
LogCtx.verbose = 1;
|
||||
LogCtx.very_verbose = 1;
|
||||
|
||||
LOG_ERROR("*SIGNAL HANDLER*", "=============================================\n\n");
|
||||
LOG_ERRORF("*SIGNAL HANDLER*", "Uh oh! Caught fatal signal: %s", strsignal(signum));
|
||||
|
||||
GHashTableIter iter;
|
||||
g_hash_table_iter_init(&iter, ScanCtx.dbg_current_files);
|
||||
|
||||
void *key;
|
||||
void *value;
|
||||
while (g_hash_table_iter_next(&iter, &key, &value)) {
|
||||
parse_job_t *job = value;
|
||||
|
||||
if (isatty(STDERR_FILENO)) {
|
||||
LOG_DEBUGF(
|
||||
"*SIGNAL HANDLER*",
|
||||
"Thread \033[%dm[%04llX]\033[0m was working on job '%s'",
|
||||
31 + ((unsigned int) key) % 7, key, job->filepath
|
||||
);
|
||||
} else {
|
||||
LOG_DEBUGF(
|
||||
"*SIGNAL HANDLER*",
|
||||
"THREAD [%04llX] was working on job %s",
|
||||
key, job->filepath
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
tpool_dump_debug_info(ScanCtx.pool);
|
||||
|
||||
LOG_INFO(
|
||||
"*SIGNAL HANDLER*",
|
||||
"Please consider creating a bug report at https://github.com/simon987/sist2/issues !"
|
||||
)
|
||||
LOG_INFO(
|
||||
"*SIGNAL HANDLER*",
|
||||
"sist2 is an open source project and relies on the collaboration of its users to diagnose and fix bugs"
|
||||
)
|
||||
|
||||
#ifndef SIST_DEBUG
|
||||
LOG_WARNING(
|
||||
"*SIGNAL HANDLER*",
|
||||
"You are running sist2 in release mode! Please consider downloading the debug binary from the Github "
|
||||
"releases page to provide additionnal information when submitting a bug report."
|
||||
)
|
||||
#endif
|
||||
|
||||
if (signum == SIGSEGV && sigsegv_handler != NULL) {
|
||||
sigsegv_handler(signum);
|
||||
} else if (signum == SIGABRT && sigabrt_handler != NULL) {
|
||||
sigabrt_handler(signum);
|
||||
}
|
||||
}
|
||||
|
||||
void init_dir(const char *dirpath) {
|
||||
char path[PATH_MAX];
|
||||
snprintf(path, PATH_MAX, "%sdescriptor.json", dirpath);
|
||||
|
||||
uuid_t uuid;
|
||||
uuid_generate(uuid);
|
||||
uuid_unparse(uuid, ScanCtx.index.desc.uuid);
|
||||
unsigned char index_md5[MD5_DIGEST_LENGTH];
|
||||
MD5((unsigned char *) ScanCtx.index.desc.name, strlen(ScanCtx.index.desc.name), index_md5);
|
||||
buf2hex(index_md5, MD5_DIGEST_LENGTH, ScanCtx.index.desc.id);
|
||||
|
||||
time(&ScanCtx.index.desc.timestamp);
|
||||
strcpy(ScanCtx.index.desc.version, Version);
|
||||
strcpy(ScanCtx.index.desc.type, INDEX_TYPE_BIN);
|
||||
@@ -38,29 +112,159 @@ void scan_print_header() {
|
||||
LOG_INFOF("main.c", "sist2 v%s", Version)
|
||||
}
|
||||
|
||||
void sist2_scan(scan_args_t *args) {
|
||||
void _store(char *key, size_t key_len, char *buf, size_t buf_len) {
|
||||
store_write(ScanCtx.index.store, key, key_len, buf, buf_len);
|
||||
}
|
||||
|
||||
void _log(const char *filepath, int level, char *str) {
|
||||
if (level == LEVEL_FATAL) {
|
||||
sist_log(filepath, level, str);
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
if (LogCtx.verbose) {
|
||||
if (level == LEVEL_DEBUG) {
|
||||
if (LogCtx.very_verbose) {
|
||||
sist_log(filepath, level, str);
|
||||
}
|
||||
} else {
|
||||
sist_log(filepath, level, str);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void _logf(const char *filepath, int level, char *format, ...) {
|
||||
|
||||
va_list args;
|
||||
|
||||
va_start(args, format);
|
||||
if (level == LEVEL_FATAL) {
|
||||
vsist_logf(filepath, level, format, args);
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
if (LogCtx.verbose) {
|
||||
if (level == LEVEL_DEBUG) {
|
||||
if (LogCtx.very_verbose) {
|
||||
vsist_logf(filepath, level, format, args);
|
||||
}
|
||||
} else {
|
||||
vsist_logf(filepath, level, format, args);
|
||||
}
|
||||
}
|
||||
va_end(args);
|
||||
}
|
||||
|
||||
void initialize_scan_context(scan_args_t *args) {
|
||||
|
||||
// Arc
|
||||
ScanCtx.arc_ctx.mode = args->archive_mode;
|
||||
ScanCtx.arc_ctx.log = _log;
|
||||
ScanCtx.arc_ctx.logf = _logf;
|
||||
ScanCtx.arc_ctx.parse = (parse_callback_t) parse;
|
||||
if (args->archive_passphrase != NULL) {
|
||||
strcpy(ScanCtx.arc_ctx.passphrase, args->archive_passphrase);
|
||||
} else {
|
||||
ScanCtx.arc_ctx.passphrase[0] = 0;
|
||||
}
|
||||
|
||||
ScanCtx.dbg_current_files = g_hash_table_new(g_int64_hash, g_int64_equal);
|
||||
|
||||
// Comic
|
||||
ScanCtx.comic_ctx.log = _log;
|
||||
ScanCtx.comic_ctx.logf = _logf;
|
||||
ScanCtx.comic_ctx.store = _store;
|
||||
ScanCtx.comic_ctx.tn_size = args->size;
|
||||
ScanCtx.comic_ctx.tn_qscale = args->quality;
|
||||
ScanCtx.comic_ctx.cbr_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/x-cbr");
|
||||
ScanCtx.comic_ctx.cbz_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/x-cbz");
|
||||
|
||||
// Ebook
|
||||
pthread_mutex_init(&ScanCtx.ebook_ctx.mupdf_mutex, NULL);
|
||||
ScanCtx.ebook_ctx.content_size = args->content_size;
|
||||
ScanCtx.ebook_ctx.tn_size = args->size;
|
||||
ScanCtx.ebook_ctx.tesseract_lang = args->tesseract_lang;
|
||||
ScanCtx.ebook_ctx.tesseract_path = args->tesseract_path;
|
||||
ScanCtx.ebook_ctx.log = _log;
|
||||
ScanCtx.ebook_ctx.logf = _logf;
|
||||
ScanCtx.ebook_ctx.store = _store;
|
||||
|
||||
// Font
|
||||
ScanCtx.font_ctx.enable_tn = args->size > 0;
|
||||
ScanCtx.font_ctx.log = _log;
|
||||
ScanCtx.font_ctx.logf = _logf;
|
||||
ScanCtx.font_ctx.store = _store;
|
||||
|
||||
// Media
|
||||
ScanCtx.media_ctx.tn_qscale = args->quality;
|
||||
ScanCtx.media_ctx.tn_size = args->size;
|
||||
ScanCtx.media_ctx.log = _log;
|
||||
ScanCtx.media_ctx.logf = _logf;
|
||||
ScanCtx.media_ctx.store = _store;
|
||||
ScanCtx.media_ctx.max_media_buffer = (long) args->max_memory_buffer * 1024 * 1024;
|
||||
ScanCtx.media_ctx.read_subtitles = args->read_subtitles;
|
||||
init_media();
|
||||
|
||||
// OOXML
|
||||
ScanCtx.ooxml_ctx.content_size = args->content_size;
|
||||
ScanCtx.ooxml_ctx.log = _log;
|
||||
ScanCtx.ooxml_ctx.logf = _logf;
|
||||
ScanCtx.ooxml_ctx.store = _store;
|
||||
|
||||
// MOBI
|
||||
ScanCtx.mobi_ctx.content_size = args->content_size;
|
||||
ScanCtx.mobi_ctx.log = _log;
|
||||
ScanCtx.mobi_ctx.logf = _logf;
|
||||
|
||||
// TEXT
|
||||
ScanCtx.text_ctx.content_size = args->content_size;
|
||||
ScanCtx.text_ctx.log = _log;
|
||||
ScanCtx.text_ctx.logf = _logf;
|
||||
|
||||
// MSDOC
|
||||
ScanCtx.msdoc_ctx.tn_size = args->size;
|
||||
ScanCtx.msdoc_ctx.content_size = args->content_size;
|
||||
ScanCtx.msdoc_ctx.log = _log;
|
||||
ScanCtx.msdoc_ctx.logf = _logf;
|
||||
ScanCtx.msdoc_ctx.store = _store;
|
||||
ScanCtx.msdoc_ctx.msdoc_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/msword");
|
||||
|
||||
ScanCtx.tn_qscale = args->quality;
|
||||
ScanCtx.tn_size = args->size;
|
||||
ScanCtx.content_size = args->content_size;
|
||||
ScanCtx.threads = args->threads;
|
||||
ScanCtx.depth = args->depth;
|
||||
ScanCtx.archive_mode = args->archive_mode;
|
||||
|
||||
strncpy(ScanCtx.index.path, args->output, sizeof(ScanCtx.index.path));
|
||||
strncpy(ScanCtx.index.desc.name, args->name, sizeof(ScanCtx.index.desc.name));
|
||||
strncpy(ScanCtx.index.desc.root, args->path, sizeof(ScanCtx.index.desc.root));
|
||||
strncpy(ScanCtx.index.desc.rewrite_url, args->rewrite_url, sizeof(ScanCtx.index.desc.rewrite_url));
|
||||
ScanCtx.index.desc.root_len = (short) strlen(ScanCtx.index.desc.root);
|
||||
ScanCtx.tesseract_lang = args->tesseract_lang;
|
||||
ScanCtx.fast = args->fast;
|
||||
|
||||
init_dir(ScanCtx.index.path);
|
||||
// Raw
|
||||
ScanCtx.raw_ctx.tn_qscale = args->quality;
|
||||
ScanCtx.raw_ctx.tn_size = args->size;
|
||||
ScanCtx.raw_ctx.log = _log;
|
||||
ScanCtx.raw_ctx.logf = _logf;
|
||||
ScanCtx.raw_ctx.store = _store;
|
||||
}
|
||||
|
||||
|
||||
void sist2_scan(scan_args_t *args) {
|
||||
|
||||
ScanCtx.mime_table = mime_get_mime_table();
|
||||
ScanCtx.ext_table = mime_get_ext_table();
|
||||
|
||||
initialize_scan_context(args);
|
||||
|
||||
init_dir(ScanCtx.index.path);
|
||||
|
||||
char store_path[PATH_MAX];
|
||||
snprintf(store_path, PATH_MAX, "%sthumbs", ScanCtx.index.path);
|
||||
mkdir(store_path, S_IWUSR | S_IRUSR | S_IXUSR);
|
||||
ScanCtx.index.store = store_create(store_path);
|
||||
ScanCtx.index.store = store_create(store_path, STORE_SIZE_TN);
|
||||
|
||||
snprintf(store_path, PATH_MAX, "%smeta", ScanCtx.index.path);
|
||||
mkdir(store_path, S_IWUSR | S_IRUSR | S_IXUSR);
|
||||
ScanCtx.index.meta_store = store_create(store_path, STORE_SIZE_META);
|
||||
|
||||
scan_print_header();
|
||||
|
||||
@@ -70,23 +274,32 @@ void sist2_scan(scan_args_t *args) {
|
||||
|
||||
DIR *dir = opendir(args->incremental);
|
||||
if (dir == NULL) {
|
||||
perror("opendir");
|
||||
return;
|
||||
LOG_FATALF("main.c", "Could not open original index for incremental scan: %s", strerror(errno))
|
||||
}
|
||||
|
||||
char descriptor_path[PATH_MAX];
|
||||
snprintf(descriptor_path, PATH_MAX, "%s/descriptor.json", args->incremental);
|
||||
index_descriptor_t original_desc = read_index_descriptor(descriptor_path);
|
||||
|
||||
if (strcmp(original_desc.version, Version) != 0) {
|
||||
LOG_FATALF("main.c", "Version mismatch! Index is %s but executable is %s/%s", original_desc.version,
|
||||
Version, INDEX_VERSION_EXTERNAL)
|
||||
}
|
||||
|
||||
struct dirent *de;
|
||||
while ((de = readdir(dir)) != NULL) {
|
||||
if (strncmp(de->d_name, "_index_", sizeof("_index_") - 1) == 0) {
|
||||
char file_path[PATH_MAX];
|
||||
snprintf(file_path, PATH_MAX, "%s/%s", args->incremental, de->d_name);
|
||||
snprintf(file_path, PATH_MAX, "%s%s", args->incremental, de->d_name);
|
||||
incremental_read(ScanCtx.original_table, file_path);
|
||||
}
|
||||
}
|
||||
closedir(dir);
|
||||
|
||||
printf("Loaded %d items in to mtime table.", g_hash_table_size(ScanCtx.original_table));
|
||||
LOG_INFOF("main.c", "Loaded %d items in to mtime table.", g_hash_table_size(ScanCtx.original_table))
|
||||
}
|
||||
|
||||
ScanCtx.pool = tpool_create(args->threads, thread_cleanup);
|
||||
ScanCtx.pool = tpool_create(args->threads, thread_cleanup, TRUE);
|
||||
tpool_start(ScanCtx.pool);
|
||||
walk_directory_tree(ScanCtx.index.desc.root);
|
||||
tpool_wait(ScanCtx.pool);
|
||||
@@ -96,7 +309,7 @@ void sist2_scan(scan_args_t *args) {
|
||||
char dst_path[PATH_MAX];
|
||||
snprintf(store_path, PATH_MAX, "%sthumbs", args->incremental);
|
||||
snprintf(dst_path, PATH_MAX, "%s_index_original", ScanCtx.index.path);
|
||||
store_t *source = store_create(store_path);
|
||||
store_t *source = store_create(store_path, STORE_SIZE_TN);
|
||||
|
||||
DIR *dir = opendir(args->incremental);
|
||||
if (dir == NULL) {
|
||||
@@ -107,24 +320,34 @@ void sist2_scan(scan_args_t *args) {
|
||||
while ((de = readdir(dir)) != NULL) {
|
||||
if (strncmp(de->d_name, "_index_", sizeof("_index_") - 1) == 0) {
|
||||
char file_path[PATH_MAX];
|
||||
snprintf(file_path, PATH_MAX, "%s/%s", args->incremental, de->d_name);
|
||||
snprintf(file_path, PATH_MAX, "%s%s", args->incremental, de->d_name);
|
||||
incremental_copy(source, ScanCtx.index.store, file_path, dst_path, ScanCtx.copy_table);
|
||||
}
|
||||
}
|
||||
closedir(dir);
|
||||
store_destroy(source);
|
||||
|
||||
snprintf(store_path, PATH_MAX, "%stags", args->incremental);
|
||||
snprintf(dst_path, PATH_MAX, "%stags", ScanCtx.index.path);
|
||||
mkdir(store_path, S_IWUSR | S_IRUSR | S_IXUSR);
|
||||
store_t *source_tags = store_create(store_path, STORE_SIZE_TAG);
|
||||
store_copy(source_tags, dst_path);
|
||||
store_destroy(source_tags);
|
||||
}
|
||||
|
||||
generate_stats(&ScanCtx.index, args->treemap_threshold, ScanCtx.index.path);
|
||||
|
||||
store_destroy(ScanCtx.index.store);
|
||||
}
|
||||
|
||||
void sist2_index(index_args_t *args) {
|
||||
|
||||
IndexCtx.es_url = args->es_url;
|
||||
IndexCtx.es_index = args->es_index;
|
||||
IndexCtx.batch_size = args->batch_size;
|
||||
|
||||
if (!args->print) {
|
||||
elastic_init(args->force_reset);
|
||||
elastic_init(args->force_reset, args->es_mappings, args->es_settings);
|
||||
}
|
||||
|
||||
char descriptor_path[PATH_MAX];
|
||||
@@ -135,17 +358,25 @@ void sist2_index(index_args_t *args) {
|
||||
LOG_DEBUGF("main.c", "descriptor version %s (%s)", desc.version, desc.type)
|
||||
|
||||
if (strcmp(desc.version, Version) != 0 && strcmp(desc.version, INDEX_VERSION_EXTERNAL) != 0) {
|
||||
fprintf(stderr, "Version mismatch! Index is %s but executable is %s/%s\n",
|
||||
desc.version, Version, INDEX_VERSION_EXTERNAL);
|
||||
return;
|
||||
LOG_FATALF("main.c", "Version mismatch! Index is %s but executable is %s/%s", desc.version, Version,
|
||||
INDEX_VERSION_EXTERNAL)
|
||||
}
|
||||
|
||||
DIR *dir = opendir(args->index_path);
|
||||
if (dir == NULL) {
|
||||
perror("opendir");
|
||||
return;
|
||||
LOG_FATALF("main.c", "Could not open index %s: %s", args->index_path, strerror(errno))
|
||||
}
|
||||
|
||||
char path_tmp[PATH_MAX];
|
||||
snprintf(path_tmp, sizeof(path_tmp), "%s/tags", args->index_path);
|
||||
mkdir(path_tmp, S_IWUSR | S_IRUSR | S_IXUSR);
|
||||
IndexCtx.tag_store = store_create(path_tmp, STORE_SIZE_TAG);
|
||||
IndexCtx.tags = store_read_all(IndexCtx.tag_store);
|
||||
|
||||
snprintf(path_tmp, sizeof(path_tmp), "%s/meta", args->index_path);
|
||||
IndexCtx.meta_store = store_create(path_tmp, STORE_SIZE_META);
|
||||
IndexCtx.meta = store_read_all(IndexCtx.meta_store);
|
||||
|
||||
index_func f;
|
||||
if (args->print) {
|
||||
f = print_json;
|
||||
@@ -153,27 +384,64 @@ void sist2_index(index_args_t *args) {
|
||||
f = index_json;
|
||||
}
|
||||
|
||||
void (*cleanup)();
|
||||
if (args->print) {
|
||||
cleanup = NULL;
|
||||
} else {
|
||||
cleanup = elastic_cleanup;
|
||||
}
|
||||
|
||||
IndexCtx.pool = tpool_create(args->threads, cleanup, FALSE);
|
||||
tpool_start(IndexCtx.pool);
|
||||
|
||||
struct dirent *de;
|
||||
while ((de = readdir(dir)) != NULL) {
|
||||
if (strncmp(de->d_name, "_index_", sizeof("_index_") - 1) == 0) {
|
||||
char file_path[PATH_MAX];
|
||||
snprintf(file_path, PATH_MAX, "%s/%s", args->index_path, de->d_name);
|
||||
read_index(file_path, desc.uuid, desc.type, f);
|
||||
read_index(file_path, desc.id, desc.type, f);
|
||||
}
|
||||
}
|
||||
closedir(dir);
|
||||
|
||||
tpool_wait(IndexCtx.pool);
|
||||
|
||||
tpool_destroy(IndexCtx.pool);
|
||||
|
||||
if (!args->print) {
|
||||
elastic_flush();
|
||||
destroy_indexer(args->script, desc.uuid);
|
||||
finish_indexer(args->script, args->async_script, desc.id);
|
||||
}
|
||||
|
||||
store_destroy(IndexCtx.tag_store);
|
||||
g_hash_table_remove_all(IndexCtx.tags);
|
||||
g_hash_table_destroy(IndexCtx.tags);
|
||||
}
|
||||
|
||||
void sist2_exec_script(exec_args_t *args) {
|
||||
|
||||
LogCtx.verbose = TRUE;
|
||||
|
||||
char descriptor_path[PATH_MAX];
|
||||
snprintf(descriptor_path, PATH_MAX, "%s/descriptor.json", args->index_path);
|
||||
index_descriptor_t desc = read_index_descriptor(descriptor_path);
|
||||
|
||||
IndexCtx.es_url = args->es_url;
|
||||
|
||||
LOG_DEBUGF("main.c", "descriptor version %s (%s)", desc.version, desc.type)
|
||||
|
||||
execute_update_script(args->script, args->async_script, desc.id);
|
||||
free(args->script);
|
||||
}
|
||||
|
||||
void sist2_web(web_args_t *args) {
|
||||
|
||||
WebCtx.es_url = args->es_url;
|
||||
WebCtx.es_index = args->es_index;
|
||||
WebCtx.index_count = args->index_count;
|
||||
WebCtx.b64credentials = args->b64credentials;
|
||||
WebCtx.auth_user = args->auth_user;
|
||||
WebCtx.auth_pass = args->auth_pass;
|
||||
WebCtx.auth_enabled = args->auth_enabled;
|
||||
WebCtx.tag_auth_enabled = args->tag_auth_enabled;
|
||||
|
||||
for (int i = 0; i < args->index_count; i++) {
|
||||
char *abs_path = abspath(args->indices[i]);
|
||||
@@ -183,7 +451,11 @@ void sist2_web(web_args_t *args) {
|
||||
char path_tmp[PATH_MAX];
|
||||
|
||||
snprintf(path_tmp, PATH_MAX, "%sthumbs", abs_path);
|
||||
WebCtx.indices[i].store = store_create(path_tmp);
|
||||
WebCtx.indices[i].store = store_create(path_tmp, STORE_SIZE_TN);
|
||||
|
||||
snprintf(path_tmp, PATH_MAX, "%stags", abs_path);
|
||||
mkdir(path_tmp, S_IWUSR | S_IRUSR | S_IXUSR);
|
||||
WebCtx.indices[i].tag_store = store_create(path_tmp, STORE_SIZE_TAG);
|
||||
|
||||
snprintf(path_tmp, PATH_MAX, "%sdescriptor.json", abs_path);
|
||||
WebCtx.indices[i].desc = read_index_descriptor(path_tmp);
|
||||
@@ -193,21 +465,28 @@ void sist2_web(web_args_t *args) {
|
||||
free(abs_path);
|
||||
}
|
||||
|
||||
serve(args->bind, args->port);
|
||||
serve(args->listen_address);
|
||||
}
|
||||
|
||||
|
||||
int main(int argc, const char *argv[]) {
|
||||
sigsegv_handler = signal(SIGSEGV, sig_handler);
|
||||
sigabrt_handler = signal(SIGABRT, sig_handler);
|
||||
|
||||
global_init();
|
||||
setlocale(LC_ALL, "");
|
||||
|
||||
scan_args_t *scan_args = scan_args_create();
|
||||
index_args_t *index_args = index_args_create();
|
||||
web_args_t *web_args = web_args_create();
|
||||
exec_args_t *exec_args = exec_args_create();
|
||||
|
||||
int arg_version = 0;
|
||||
|
||||
char *common_es_url = NULL;
|
||||
char *common_es_index = NULL;
|
||||
char *common_script_path = NULL;
|
||||
int common_async_script = 0;
|
||||
int common_threads = 0;
|
||||
|
||||
struct argparse_option options[] = {
|
||||
OPT_HELP(),
|
||||
@@ -217,7 +496,7 @@ int main(int argc, const char *argv[]) {
|
||||
OPT_BOOLEAN(0, "very-verbose", &LogCtx.very_verbose, "Turn on debug messages"),
|
||||
|
||||
OPT_GROUP("Scan options"),
|
||||
OPT_INTEGER('t', "threads", &scan_args->threads, "Number of threads. DEFAULT=1"),
|
||||
OPT_INTEGER('t', "threads", &common_threads, "Number of threads. DEFAULT=1"),
|
||||
OPT_FLOAT('q', "quality", &scan_args->quality,
|
||||
"Thumbnail quality, on a scale of 1.0 to 31.0, 1.0 being the best. DEFAULT=5"),
|
||||
OPT_INTEGER(0, "size", &scan_args->size,
|
||||
@@ -234,22 +513,45 @@ int main(int argc, const char *argv[]) {
|
||||
OPT_STRING(0, "archive", &scan_args->archive, "Archive file mode (skip|list|shallow|recurse). "
|
||||
"skip: Don't parse, list: only get file names as text, "
|
||||
"shallow: Don't parse archives inside archives. DEFAULT: recurse"),
|
||||
OPT_STRING(0, "archive-passphrase", &scan_args->archive_passphrase,
|
||||
"Passphrase for encrypted archive files"),
|
||||
|
||||
OPT_STRING(0, "ocr", &scan_args->tesseract_lang, "Tesseract language (use tesseract --list-langs to see "
|
||||
"which are installed on your machine)"),
|
||||
OPT_STRING('e', "exclude", &scan_args->exclude_regex, "Files that match this regex will not be scanned"),
|
||||
OPT_BOOLEAN(0, "fast", &scan_args->fast, "Only index file names & mime type"),
|
||||
OPT_STRING(0, "treemap-threshold", &scan_args->treemap_threshold_str, "Relative size threshold for treemap "
|
||||
"(see USAGE.md). DEFAULT: 0.0005"),
|
||||
OPT_INTEGER(0, "mem-buffer", &scan_args->max_memory_buffer,
|
||||
"Maximum memory buffer size per thread in MB for files inside archives "
|
||||
"(see USAGE.md). DEFAULT: 2000"),
|
||||
OPT_BOOLEAN(0, "read-subtitles", &scan_args->read_subtitles, "Read subtitles from media files."),
|
||||
|
||||
OPT_GROUP("Index options"),
|
||||
OPT_INTEGER('t', "threads", &common_threads, "Number of threads. DEFAULT=1"),
|
||||
OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url with port. DEFAULT=http://localhost:9200"),
|
||||
OPT_STRING(0, "es-index", &common_es_index, "Elasticsearch index name. DEFAULT=sist2"),
|
||||
OPT_BOOLEAN('p', "print", &index_args->print, "Just print JSON documents to stdout."),
|
||||
OPT_STRING(0, "script-file", &index_args->script_path, "Path to user script."),
|
||||
OPT_STRING(0, "script-file", &common_script_path, "Path to user script."),
|
||||
OPT_STRING(0, "mappings-file", &index_args->es_mappings_path, "Path to Elasticsearch mappings."),
|
||||
OPT_STRING(0, "settings-file", &index_args->es_settings_path, "Path to Elasticsearch settings."),
|
||||
OPT_BOOLEAN(0, "async-script", &common_async_script, "Execute user script asynchronously."),
|
||||
OPT_INTEGER(0, "batch-size", &index_args->batch_size, "Index batch size. DEFAULT: 100"),
|
||||
OPT_BOOLEAN('f', "force-reset", &index_args->force_reset, "Reset Elasticsearch mappings and settings. "
|
||||
"(You must use this option the first time you use the index command)"),
|
||||
|
||||
OPT_GROUP("Web options"),
|
||||
OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url. DEFAULT=http://localhost:9200"),
|
||||
OPT_STRING(0, "bind", &web_args->bind, "Listen on this address. DEFAULT=localhost"),
|
||||
OPT_STRING(0, "port", &web_args->port, "Listen on this port. DEFAULT=4090"),
|
||||
OPT_STRING(0, "es-index", &common_es_index, "Elasticsearch index name. DEFAULT=sist2"),
|
||||
OPT_STRING(0, "bind", &web_args->listen_address, "Listen on this address. DEFAULT=localhost:4090"),
|
||||
OPT_STRING(0, "auth", &web_args->credentials, "Basic auth in user:password format"),
|
||||
OPT_STRING(0, "tag-auth", &web_args->tag_credentials, "Basic auth in user:password format for tagging"),
|
||||
|
||||
OPT_GROUP("Exec-script options"),
|
||||
OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url. DEFAULT=http://localhost:9200"),
|
||||
OPT_STRING(0, "es-index", &common_es_index, "Elasticsearch index name. DEFAULT=sist2"),
|
||||
OPT_STRING(0, "script-file", &common_script_path, "Path to user script."),
|
||||
OPT_BOOLEAN(0, "async-script", &common_async_script, "Execute user script asynchronously."),
|
||||
|
||||
OPT_END(),
|
||||
};
|
||||
@@ -261,7 +563,7 @@ int main(int argc, const char *argv[]) {
|
||||
|
||||
if (arg_version) {
|
||||
printf(Version);
|
||||
exit(0);
|
||||
goto end;
|
||||
}
|
||||
|
||||
if (LogCtx.very_verbose != 0) {
|
||||
@@ -270,25 +572,35 @@ int main(int argc, const char *argv[]) {
|
||||
|
||||
web_args->es_url = common_es_url;
|
||||
index_args->es_url = common_es_url;
|
||||
exec_args->es_url = common_es_url;
|
||||
|
||||
web_args->es_index = common_es_index;
|
||||
index_args->es_index = common_es_index;
|
||||
exec_args->es_index = common_es_index;
|
||||
|
||||
index_args->script_path = common_script_path;
|
||||
exec_args->script_path = common_script_path;
|
||||
index_args->threads = common_threads;
|
||||
scan_args->threads = common_threads;
|
||||
exec_args->async_script = common_async_script;
|
||||
index_args->async_script = common_async_script;
|
||||
|
||||
if (argc == 0) {
|
||||
argparse_usage(&argparse);
|
||||
return 1;
|
||||
goto end;
|
||||
} else if (strcmp(argv[0], "scan") == 0) {
|
||||
|
||||
int err = scan_args_validate(scan_args, argc, argv);
|
||||
if (err != 0) {
|
||||
return err;
|
||||
goto end;
|
||||
}
|
||||
sist2_scan(scan_args);
|
||||
|
||||
}
|
||||
|
||||
else if (strcmp(argv[0], "index") == 0) {
|
||||
} else if (strcmp(argv[0], "index") == 0) {
|
||||
|
||||
int err = index_args_validate(index_args, argc, argv);
|
||||
if (err != 0) {
|
||||
return err;
|
||||
goto end;
|
||||
}
|
||||
sist2_index(index_args);
|
||||
|
||||
@@ -296,22 +608,30 @@ int main(int argc, const char *argv[]) {
|
||||
|
||||
int err = web_args_validate(web_args, argc, argv);
|
||||
if (err != 0) {
|
||||
return err;
|
||||
goto end;
|
||||
}
|
||||
sist2_web(web_args);
|
||||
|
||||
}
|
||||
else {
|
||||
} else if (strcmp(argv[0], "exec-script") == 0) {
|
||||
|
||||
int err = exec_args_validate(exec_args, argc, argv);
|
||||
if (err != 0) {
|
||||
goto end;
|
||||
}
|
||||
sist2_exec_script(exec_args);
|
||||
|
||||
} else {
|
||||
fprintf(stderr, "Invalid command: '%s'\n", argv[0]);
|
||||
argparse_usage(&argparse);
|
||||
return 1;
|
||||
goto end;
|
||||
}
|
||||
printf("\n");
|
||||
|
||||
end:
|
||||
scan_args_destroy(scan_args);
|
||||
|
||||
index_args_destroy(index_args);
|
||||
web_args_destroy(web_args);
|
||||
exec_args_destroy(exec_args);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -1,157 +0,0 @@
|
||||
#include "arc.h"
|
||||
#include "src/ctx.h"
|
||||
|
||||
#define ARC_BUF_SIZE 8192
|
||||
|
||||
int should_parse_filtered_file(const char *filepath, int ext) {
|
||||
char tmp[PATH_MAX * 2];
|
||||
|
||||
if (ext == 0) {
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
memcpy(tmp, filepath, ext - 1);
|
||||
*(tmp + ext - 1) = '\0';
|
||||
|
||||
char *idx = strrchr(tmp, '.');
|
||||
|
||||
if (idx == NULL) {
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
if (strcmp(idx, ".tar") == 0) {
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
int arc_read(struct vfile *f, void *buf, size_t size) {
|
||||
return archive_read_data(f->arc, buf, size);
|
||||
}
|
||||
|
||||
typedef struct arc_data {
|
||||
vfile_t *f;
|
||||
char buf[ARC_BUF_SIZE];
|
||||
} arc_data_f;
|
||||
|
||||
int vfile_open_callback(struct archive *a, void *user_data) {
|
||||
arc_data_f *data = user_data;
|
||||
|
||||
if (data->f->is_fs_file && data->f->fd == -1) {
|
||||
data->f->fd = open(data->f->filepath, O_RDONLY);
|
||||
}
|
||||
|
||||
return ARCHIVE_OK;
|
||||
}
|
||||
|
||||
long vfile_read_callback(struct archive *a, void *user_data, const void **buf) {
|
||||
arc_data_f *data = user_data;
|
||||
|
||||
*buf = data->buf;
|
||||
return data->f->read(data->f, data->buf, ARC_BUF_SIZE);
|
||||
}
|
||||
|
||||
int vfile_close_callback(struct archive *a, void *user_data) {
|
||||
arc_data_f *data = user_data;
|
||||
|
||||
if (data->f->close != NULL) {
|
||||
data->f->close(data->f);
|
||||
}
|
||||
|
||||
return ARCHIVE_OK;
|
||||
}
|
||||
|
||||
void parse_archive(vfile_t *f, document_t *doc) {
|
||||
|
||||
struct archive *a;
|
||||
struct archive_entry *entry;
|
||||
|
||||
|
||||
arc_data_f data;
|
||||
data.f = f;
|
||||
|
||||
int ret = 0;
|
||||
if (data.f->is_fs_file) {
|
||||
|
||||
a = archive_read_new();
|
||||
archive_read_support_filter_all(a);
|
||||
archive_read_support_format_all(a);
|
||||
|
||||
ret = archive_read_open_filename(a, doc->filepath, ARC_BUF_SIZE);
|
||||
} else if (ScanCtx.archive_mode == ARC_MODE_RECURSE) {
|
||||
|
||||
a = archive_read_new();
|
||||
archive_read_support_filter_all(a);
|
||||
archive_read_support_format_all(a);
|
||||
|
||||
ret = archive_read_open(
|
||||
a, &data,
|
||||
vfile_open_callback,
|
||||
vfile_read_callback,
|
||||
vfile_close_callback
|
||||
);
|
||||
} else {
|
||||
return;
|
||||
}
|
||||
|
||||
if (ret != ARCHIVE_OK) {
|
||||
LOG_ERRORF(doc->filepath, "(arc.c) [%d] %s", ret, archive_error_string(a))
|
||||
archive_read_free(a);
|
||||
return;
|
||||
}
|
||||
|
||||
if (ScanCtx.archive_mode == ARC_MODE_LIST) {
|
||||
|
||||
dyn_buffer_t buf = dyn_buffer_create();
|
||||
|
||||
while (archive_read_next_header(a, &entry) == ARCHIVE_OK) {
|
||||
if (S_ISREG(archive_entry_stat(entry)->st_mode)) {
|
||||
|
||||
char *path = (char *) archive_entry_pathname(entry);
|
||||
|
||||
dyn_buffer_append_string(&buf, path);
|
||||
dyn_buffer_write_char(&buf, '\n');
|
||||
}
|
||||
}
|
||||
dyn_buffer_write_char(&buf, '\0');
|
||||
|
||||
meta_line_t *meta_list = malloc(sizeof(meta_line_t) + buf.cur);
|
||||
meta_list->key = MetaContent;
|
||||
strcpy(meta_list->strval, buf.buf);
|
||||
APPEND_META(doc, meta_list);
|
||||
dyn_buffer_destroy(&buf);
|
||||
|
||||
} else {
|
||||
|
||||
parse_job_t *sub_job = malloc(sizeof(parse_job_t) + PATH_MAX * 2);
|
||||
|
||||
sub_job->vfile.close = NULL;
|
||||
sub_job->vfile.read = arc_read;
|
||||
sub_job->vfile.arc = a;
|
||||
sub_job->vfile.filepath = sub_job->filepath;
|
||||
sub_job->vfile.is_fs_file = FALSE;
|
||||
memcpy(sub_job->parent, doc->uuid, sizeof(uuid_t));
|
||||
|
||||
while (archive_read_next_header(a, &entry) == ARCHIVE_OK) {
|
||||
sub_job->info = *archive_entry_stat(entry);
|
||||
if (S_ISREG(sub_job->info.st_mode)) {
|
||||
sprintf(sub_job->filepath, "%s#/%s", f->filepath, archive_entry_pathname(entry));
|
||||
sub_job->base = (int) (strrchr(sub_job->filepath, '/') - sub_job->filepath) + 1;
|
||||
|
||||
char *p = strrchr(sub_job->filepath, '.');
|
||||
if (p != NULL) {
|
||||
sub_job->ext = (int) (p - sub_job->filepath + 1);
|
||||
} else {
|
||||
sub_job->ext = (int) strlen(sub_job->filepath);
|
||||
}
|
||||
|
||||
parse(sub_job);
|
||||
}
|
||||
}
|
||||
|
||||
free(sub_job);
|
||||
}
|
||||
|
||||
archive_read_free(a);
|
||||
}
|
||||
@@ -1,12 +0,0 @@
|
||||
#ifndef SIST2_ARC_H
|
||||
#define SIST2_ARC_H
|
||||
|
||||
#include "src/sist.h"
|
||||
|
||||
int should_parse_filtered_file(const char *filepath, int ext);
|
||||
|
||||
void parse_archive(vfile_t *f, document_t *doc);
|
||||
|
||||
int arc_read(struct vfile * f, void *buf, size_t size);
|
||||
|
||||
#endif
|
||||
@@ -1,107 +0,0 @@
|
||||
#include "doc.h"
|
||||
#include "src/ctx.h"
|
||||
|
||||
void dump_text(mceTextReader_t *reader, dyn_buffer_t *buf) {
|
||||
|
||||
mce_skip_attributes(reader);
|
||||
|
||||
mce_start_children(reader) {
|
||||
mce_start_element(reader, NULL, _X("t")) {
|
||||
mce_skip_attributes(reader);
|
||||
mce_start_children(reader) {
|
||||
mce_start_text(reader) {
|
||||
char *str = (char *) xmlTextReaderConstValue(reader->reader);
|
||||
dyn_buffer_append_string(buf, str);
|
||||
dyn_buffer_write_char(buf, ' ');
|
||||
} mce_end_text(reader);
|
||||
} mce_end_children(reader);
|
||||
} mce_end_element(reader);
|
||||
|
||||
mce_start_element(reader, NULL, NULL) {
|
||||
dump_text(reader, buf);
|
||||
} mce_end_element(reader);
|
||||
|
||||
} mce_end_children(reader)
|
||||
}
|
||||
|
||||
__always_inline
|
||||
int should_read_part(opcPart part) {
|
||||
|
||||
char *part_name = (char *) part;
|
||||
|
||||
if (part == NULL) {
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
if ( // Word
|
||||
strcmp(part_name, "word/document.xml") == 0
|
||||
|| strncmp(part_name, "word/footer", sizeof("word/footer") - 1) == 0
|
||||
|| strncmp(part_name, "word/header", sizeof("word/header") - 1) == 0
|
||||
// PowerPoint
|
||||
|| strncmp(part_name, "ppt/slides/slide", sizeof("ppt/slides/slide") - 1) == 0
|
||||
|| strncmp(part_name, "ppt/notesSlides/notesSlide", sizeof("ppt/notesSlides/notesSlide") - 1) == 0
|
||||
// Excel
|
||||
|| strncmp(part_name, "xl/worksheets/sheet", sizeof("xl/worksheets/sheet") - 1) == 0
|
||||
|| strcmp(part_name, "xl/sharedStrings.xml") == 0
|
||||
|| strcmp(part_name, "xl/workbook.xml") == 0
|
||||
) {
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
__always_inline
|
||||
void read_part(opcContainer *c, dyn_buffer_t *buf, opcPart part, document_t *doc) {
|
||||
|
||||
mceTextReader_t reader;
|
||||
int ret = opcXmlReaderOpen(c, &reader, part, NULL, "UTF-8", 0);
|
||||
|
||||
if (ret != OPC_ERROR_NONE) {
|
||||
LOG_ERRORF(doc->filepath, "(doc.c) opcXmlReaderOpen() returned error code %d", ret);
|
||||
return;
|
||||
}
|
||||
|
||||
mce_start_document(&reader) {
|
||||
mce_start_element(&reader, NULL, NULL) {
|
||||
dump_text(&reader, buf);
|
||||
} mce_end_element(&reader);
|
||||
} mce_end_document(&reader);
|
||||
|
||||
mceTextReaderCleanup(&reader);
|
||||
}
|
||||
|
||||
void parse_doc(void *mem, size_t mem_len, document_t *doc) {
|
||||
|
||||
if (mem == NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
opcContainer *c = opcContainerOpenMem(mem, mem_len, OPC_OPEN_READ_ONLY, NULL);
|
||||
if (c == NULL) {
|
||||
LOG_ERROR(doc->filepath, "(doc.c) Couldn't open document with opcContainerOpenMem()");
|
||||
return;
|
||||
}
|
||||
|
||||
dyn_buffer_t buf = dyn_buffer_create();
|
||||
|
||||
opcPart part = opcPartGetFirst(c);
|
||||
do {
|
||||
if (should_read_part(part)) {
|
||||
read_part(c, &buf, part, doc);
|
||||
}
|
||||
} while ((part = opcPartGetNext(c, part)));
|
||||
|
||||
opcContainerClose(c, OPC_CLOSE_NOW);
|
||||
|
||||
if (buf.cur > 0) {
|
||||
dyn_buffer_write_char(&buf, '\0');
|
||||
|
||||
meta_line_t *meta = malloc(sizeof(meta_line_t) + buf.cur);
|
||||
meta->key = MetaContent;
|
||||
strcpy(meta->strval, buf.buf);
|
||||
APPEND_META(doc, meta)
|
||||
}
|
||||
|
||||
dyn_buffer_destroy(&buf);
|
||||
}
|
||||
@@ -1,8 +0,0 @@
|
||||
#ifndef SIST2_DOC_H
|
||||
#define SIST2_DOC_H
|
||||
|
||||
#include "src/sist.h"
|
||||
|
||||
void parse_doc(void *buf, size_t buf_len, document_t *doc);
|
||||
|
||||
#endif
|
||||
@@ -1,226 +0,0 @@
|
||||
#include "font.h"
|
||||
|
||||
|
||||
#include "src/ctx.h"
|
||||
|
||||
__thread FT_Library ft_lib = NULL;
|
||||
|
||||
|
||||
typedef struct text_dimensions {
|
||||
unsigned int width;
|
||||
unsigned int height;
|
||||
unsigned int baseline;
|
||||
} text_dimensions_t;
|
||||
|
||||
typedef struct glyph {
|
||||
int top;
|
||||
int height;
|
||||
int width;
|
||||
int descent;
|
||||
int ascent;
|
||||
int advance_width;
|
||||
unsigned char *pixmap;
|
||||
} glyph_t;
|
||||
|
||||
|
||||
__always_inline
|
||||
int kerning_offset(char c, char pc, FT_Face face) {
|
||||
FT_Vector kerning;
|
||||
FT_Get_Kerning(face, c, pc, FT_KERNING_DEFAULT, &kerning);
|
||||
|
||||
return (int) (kerning.x / 64);
|
||||
}
|
||||
|
||||
__always_inline
|
||||
glyph_t ft_glyph_to_glyph(FT_GlyphSlot slot) {
|
||||
glyph_t glyph;
|
||||
|
||||
glyph.pixmap = slot->bitmap.buffer;
|
||||
|
||||
glyph.width = (int) slot->bitmap.width;
|
||||
glyph.height = (int) slot->bitmap.rows;
|
||||
glyph.top = slot->bitmap_top;
|
||||
glyph.advance_width = (int) slot->advance.x / 64;
|
||||
|
||||
glyph.descent = MAX(0, glyph.height - glyph.top);
|
||||
glyph.ascent = MAX(0, MAX(glyph.top, glyph.height) - glyph.descent);
|
||||
|
||||
return glyph;
|
||||
}
|
||||
|
||||
text_dimensions_t text_dimension(char *text, FT_Face face) {
|
||||
text_dimensions_t dimensions;
|
||||
|
||||
dimensions.width = 0;
|
||||
|
||||
int num_chars = (int) strlen(text);
|
||||
|
||||
unsigned int max_ascent = 0;
|
||||
int max_descent = 0;
|
||||
|
||||
char pc = 0;
|
||||
for (int i = 0; i < num_chars; i++) {
|
||||
char c = text[i];
|
||||
|
||||
FT_Load_Char(face, c, 0);
|
||||
glyph_t glyph = ft_glyph_to_glyph(face->glyph);
|
||||
|
||||
max_descent = MAX(max_descent, glyph.descent);
|
||||
max_ascent = MAX(max_ascent, MAX(glyph.height, glyph.ascent));
|
||||
|
||||
int kerning_x = kerning_offset(c, pc, face);
|
||||
dimensions.width += MAX(glyph.advance_width, glyph.width) + kerning_x;
|
||||
|
||||
pc = c;
|
||||
}
|
||||
|
||||
dimensions.height = max_ascent + max_descent;
|
||||
dimensions.baseline = max_descent;
|
||||
|
||||
return dimensions;
|
||||
}
|
||||
|
||||
void draw_glyph(glyph_t *glyph, int x, int y, struct text_dimensions text_info, unsigned char *bitmap) {
|
||||
unsigned int src = 0;
|
||||
unsigned int dst = y * text_info.width + x;
|
||||
unsigned int row_offset = text_info.width - glyph->width;
|
||||
unsigned int buf_len = text_info.width * text_info.height;
|
||||
|
||||
for (unsigned int sy = 0; sy < glyph->height; sy++) {
|
||||
for (unsigned int sx = 0; sx < glyph->width; sx++) {
|
||||
if (dst < buf_len) {
|
||||
bitmap[dst] |= glyph->pixmap[src];
|
||||
}
|
||||
src++;
|
||||
dst++;
|
||||
}
|
||||
dst += row_offset;
|
||||
}
|
||||
}
|
||||
|
||||
void bmp_format(dyn_buffer_t *buf, text_dimensions_t dimensions, const unsigned char *bitmap) {
|
||||
|
||||
dyn_buffer_write_short(buf, 0x4D42); // Magic
|
||||
dyn_buffer_write_int(buf, 0); // Size placeholder
|
||||
dyn_buffer_write_int(buf, 0x5157); //Reserved
|
||||
dyn_buffer_write_int(buf, 14 + 40 + 256 * 4); // pixels offset
|
||||
|
||||
dyn_buffer_write_int(buf, 40); // DIB size
|
||||
dyn_buffer_write_int(buf, (int) dimensions.width);
|
||||
dyn_buffer_write_int(buf, (int) dimensions.height);
|
||||
dyn_buffer_write_short(buf, 1); // Color planes
|
||||
dyn_buffer_write_short(buf, 8); // bits per pixel
|
||||
dyn_buffer_write_int(buf, 0); // compression
|
||||
dyn_buffer_write_int(buf, 0); // Ignored
|
||||
dyn_buffer_write_int(buf, 3800); // hres
|
||||
dyn_buffer_write_int(buf, 3800); // vres
|
||||
dyn_buffer_write_int(buf, 256); // Color count
|
||||
dyn_buffer_write_int(buf, 0); // Ignored
|
||||
|
||||
// RGBA32 Color table (Grayscale)
|
||||
for (int i = 255; i >= 0; i--) {
|
||||
dyn_buffer_write_int(buf, i + (i << 8) + (i << 16));
|
||||
}
|
||||
|
||||
// Pixel array: write from bottom to top, with rows padded to multiples of 4-bytes
|
||||
for (int y = (int) dimensions.height - 1; y >= 0; y--) {
|
||||
for (unsigned int x = 0; x < dimensions.width; x++) {
|
||||
dyn_buffer_write_char(buf, (char) bitmap[y * dimensions.width + x]);
|
||||
}
|
||||
while (buf->cur % 4 != 0) {
|
||||
dyn_buffer_write_char(buf, 0);
|
||||
}
|
||||
}
|
||||
|
||||
// Size
|
||||
*(int *) ((char *) buf->buf + 2) = buf->cur;
|
||||
}
|
||||
|
||||
void parse_font(const char *buf, size_t buf_len, document_t *doc) {
|
||||
if (ft_lib == NULL) {
|
||||
FT_Init_FreeType(&ft_lib);
|
||||
}
|
||||
if (buf == NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
FT_Face face;
|
||||
FT_Error err = FT_New_Memory_Face(ft_lib, (unsigned char *) buf, buf_len, 0, &face);
|
||||
if (err != 0) {
|
||||
LOG_ERRORF(doc->filepath, "(font.c) FT_New_Memory_Face() returned error code [%d] %s", err, ft_error_string(err));
|
||||
return;
|
||||
}
|
||||
|
||||
char font_name[1024];
|
||||
|
||||
if (face->style_name == NULL || *(face->style_name) == '?') {
|
||||
if (face->family_name == NULL) {
|
||||
strcpy(font_name, "(null)");
|
||||
} else {
|
||||
strcpy(font_name, face->family_name);
|
||||
}
|
||||
} else {
|
||||
snprintf(font_name, sizeof(font_name), "%s %s", face->family_name, face->style_name);
|
||||
}
|
||||
|
||||
meta_line_t *meta_name = malloc(sizeof(meta_line_t) + strlen(font_name));
|
||||
meta_name->key = MetaFontName;
|
||||
strcpy(meta_name->strval, font_name);
|
||||
APPEND_META(doc, meta_name)
|
||||
|
||||
if (ScanCtx.tn_size <= 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
int pixel = 64;
|
||||
int num_chars = (int) strlen(font_name);
|
||||
|
||||
err = FT_Set_Pixel_Sizes(face, 0, pixel);
|
||||
if (err != 0) {
|
||||
LOG_WARNINGF(doc->filepath, "(font.c) FT_Set_Pixel_Sizes() returned error code [%d] %s", err, ft_error_string(err))
|
||||
return;
|
||||
}
|
||||
|
||||
text_dimensions_t dimensions = text_dimension(font_name, face);
|
||||
unsigned char *bitmap = calloc(dimensions.width * dimensions.height, 1);
|
||||
|
||||
FT_Vector pen;
|
||||
pen.x = 0;
|
||||
|
||||
char pc = 0;
|
||||
for (int i = 0; i < num_chars; i++) {
|
||||
char c = font_name[i];
|
||||
|
||||
err = FT_Load_Char(face, c, FT_LOAD_NO_HINTING | FT_LOAD_RENDER);
|
||||
if (err != 0) {
|
||||
c = c >= 'a' && c <= 'z' ? c - 32 : c + 32;
|
||||
err = FT_Load_Char(face, c, FT_LOAD_NO_HINTING | FT_LOAD_RENDER);
|
||||
if (err != 0) {
|
||||
LOG_WARNINGF(doc->filepath, "(font.c) FT_Load_Char() returned error code [%d] %s", err, ft_error_string(err));
|
||||
continue;
|
||||
}
|
||||
}
|
||||
glyph_t glyph = ft_glyph_to_glyph(face->glyph);
|
||||
|
||||
pen.x += kerning_offset(c, pc, face);
|
||||
if (pen.x <= 0) {
|
||||
pen.x = ABS(glyph.advance_width - glyph.width);
|
||||
}
|
||||
pen.y = dimensions.height - glyph.ascent - dimensions.baseline;
|
||||
|
||||
draw_glyph(&glyph, pen.x, pen.y, dimensions, bitmap);
|
||||
|
||||
pen.x += glyph.advance_width;
|
||||
pc = c;
|
||||
}
|
||||
|
||||
dyn_buffer_t bmp_data = dyn_buffer_create();
|
||||
bmp_format(&bmp_data, dimensions, bitmap);
|
||||
|
||||
store_write(ScanCtx.index.store, (char *) doc->uuid, sizeof(doc->uuid), (char *) bmp_data.buf, bmp_data.cur);
|
||||
|
||||
dyn_buffer_destroy(&bmp_data);
|
||||
free(bitmap);
|
||||
|
||||
FT_Done_Face(face);
|
||||
}
|
||||
@@ -1,9 +0,0 @@
|
||||
#ifndef SIST2_FONT_H
|
||||
#define SIST2_FONT_H
|
||||
|
||||
#include "src/sist.h"
|
||||
|
||||
|
||||
void parse_font(const char * buf, size_t buf_len, document_t *doc);
|
||||
|
||||
#endif
|
||||
@@ -1,381 +0,0 @@
|
||||
#include "src/sist.h"
|
||||
#include "src/ctx.h"
|
||||
|
||||
#define MIN_SIZE 32
|
||||
#define AVIO_BUF_SIZE 8192
|
||||
|
||||
__always_inline
|
||||
AVCodecContext *alloc_jpeg_encoder(int dstW, int dstH, float qscale) {
|
||||
|
||||
AVCodec *jpeg_codec = avcodec_find_encoder(AV_CODEC_ID_MJPEG);
|
||||
AVCodecContext *jpeg = avcodec_alloc_context3(jpeg_codec);
|
||||
jpeg->width = dstW;
|
||||
jpeg->height = dstH;
|
||||
jpeg->time_base.den = 1000000;
|
||||
jpeg->time_base.num = 1;
|
||||
jpeg->i_quant_factor = qscale;
|
||||
|
||||
jpeg->pix_fmt = AV_PIX_FMT_YUVJ420P;
|
||||
int ret = avcodec_open2(jpeg, jpeg_codec, NULL);
|
||||
|
||||
if (ret != 0) {
|
||||
printf("Could not open jpeg encoder: %s!\n", av_err2str(ret));
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return jpeg;
|
||||
}
|
||||
|
||||
__always_inline
|
||||
AVFrame *scale_frame(const AVCodecContext *decoder, const AVFrame *frame, int size) {
|
||||
|
||||
int dstW;
|
||||
int dstH;
|
||||
if (frame->width <= size && frame->height <= size) {
|
||||
dstW = frame->width;
|
||||
dstH = frame->height;
|
||||
} else {
|
||||
double ratio = (double) frame->width / frame->height;
|
||||
if (frame->width > frame->height) {
|
||||
dstW = size;
|
||||
dstH = (int) (size / ratio);
|
||||
} else {
|
||||
dstW = (int) (size * ratio);
|
||||
dstH = size;
|
||||
}
|
||||
}
|
||||
|
||||
if (dstW <= MIN_SIZE || dstH <= MIN_SIZE) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
AVFrame *scaled_frame = av_frame_alloc();
|
||||
|
||||
struct SwsContext *ctx = sws_getContext(
|
||||
decoder->width, decoder->height, decoder->pix_fmt,
|
||||
dstW, dstH, AV_PIX_FMT_YUVJ420P,
|
||||
SWS_FAST_BILINEAR, 0, 0, 0
|
||||
);
|
||||
|
||||
int dst_buf_len = av_image_get_buffer_size(AV_PIX_FMT_YUV420P, dstW, dstH, 1);
|
||||
uint8_t *dst_buf = (uint8_t *) av_malloc(dst_buf_len);
|
||||
|
||||
av_image_fill_arrays(scaled_frame->data, scaled_frame->linesize, dst_buf, AV_PIX_FMT_YUV420P, dstW, dstH, 1);
|
||||
|
||||
sws_scale(ctx,
|
||||
(const uint8_t *const *) frame->data, frame->linesize,
|
||||
0, decoder->height,
|
||||
scaled_frame->data, scaled_frame->linesize
|
||||
);
|
||||
|
||||
scaled_frame->width = dstW;
|
||||
scaled_frame->height = dstH;
|
||||
scaled_frame->format = AV_PIX_FMT_YUV420P;
|
||||
|
||||
sws_freeContext(ctx);
|
||||
|
||||
return scaled_frame;
|
||||
}
|
||||
|
||||
__always_inline
|
||||
AVFrame *read_frame(AVFormatContext *pFormatCtx, AVCodecContext *decoder, int stream_idx, document_t *doc) {
|
||||
AVFrame *frame = av_frame_alloc();
|
||||
|
||||
AVPacket avPacket;
|
||||
av_init_packet(&avPacket);
|
||||
|
||||
int receive_ret = -EAGAIN;
|
||||
while (receive_ret == -EAGAIN) {
|
||||
// Get video frame
|
||||
while (1) {
|
||||
int read_frame_ret = av_read_frame(pFormatCtx, &avPacket);
|
||||
|
||||
if (read_frame_ret != 0) {
|
||||
if (read_frame_ret != AVERROR_EOF) {
|
||||
LOG_WARNINGF(doc->filepath,
|
||||
"(media.c) avcodec_read_frame() returned error code [%d] %s",
|
||||
read_frame_ret, av_err2str(read_frame_ret)
|
||||
)
|
||||
}
|
||||
av_frame_free(&frame);
|
||||
av_packet_unref(&avPacket);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
//Ignore audio/other frames
|
||||
if (avPacket.stream_index != stream_idx) {
|
||||
av_packet_unref(&avPacket);
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
// Feed it to decoder
|
||||
int decode_ret = avcodec_send_packet(decoder, &avPacket);
|
||||
if (decode_ret != 0) {
|
||||
LOG_WARNINGF(doc->filepath,
|
||||
"(media.c) avcodec_send_packet() returned error code [%d] %s",
|
||||
decode_ret, av_err2str(decode_ret)
|
||||
)
|
||||
}
|
||||
av_packet_unref(&avPacket);
|
||||
receive_ret = avcodec_receive_frame(decoder, frame);
|
||||
}
|
||||
return frame;
|
||||
}
|
||||
|
||||
#define APPEND_TAG_META(doc, tag_, keyname) \
|
||||
text_buffer_t tex = text_buffer_create(-1); \
|
||||
text_buffer_append_string0(&tex, tag_->value); \
|
||||
text_buffer_terminate_string(&tex); \
|
||||
meta_line_t *meta_tag = malloc(sizeof(meta_line_t) + tex.dyn_buffer.cur); \
|
||||
meta_tag->key = keyname; \
|
||||
strcpy(meta_tag->strval, tex.dyn_buffer.buf); \
|
||||
APPEND_META(doc, meta_tag) \
|
||||
text_buffer_destroy(&tex);
|
||||
|
||||
__always_inline
|
||||
void append_audio_meta(AVFormatContext *pFormatCtx, document_t *doc) {
|
||||
|
||||
AVDictionaryEntry *tag = NULL;
|
||||
while ((tag = av_dict_get(pFormatCtx->metadata, "", tag, AV_DICT_IGNORE_SUFFIX))) {
|
||||
char key[32];
|
||||
strncpy(key, tag->key, sizeof(key));
|
||||
|
||||
char *ptr = key;
|
||||
for (; *ptr; ++ptr) *ptr = (char) tolower(*ptr);
|
||||
|
||||
if (strcmp(key, "artist") == 0) {
|
||||
APPEND_TAG_META(doc, tag, MetaArtist)
|
||||
} else if (strcmp(key, "genre") == 0) {
|
||||
APPEND_TAG_META(doc, tag, MetaGenre)
|
||||
} else if (strcmp(key, "title") == 0) {
|
||||
APPEND_TAG_META(doc, tag, MetaTitle)
|
||||
} else if (strcmp(key, "album_artist") == 0) {
|
||||
APPEND_TAG_META(doc, tag, MetaAlbumArtist)
|
||||
} else if (strcmp(key, "album") == 0) {
|
||||
APPEND_TAG_META(doc, tag, MetaAlbum)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__always_inline
|
||||
void
|
||||
append_video_meta(AVFormatContext *pFormatCtx, AVFrame *frame, document_t *doc, int include_audio_tags, int is_video) {
|
||||
|
||||
if (is_video) {
|
||||
meta_line_t *meta_duration = malloc(sizeof(meta_line_t));
|
||||
meta_duration->key = MetaMediaDuration;
|
||||
meta_duration->longval = pFormatCtx->duration / AV_TIME_BASE;
|
||||
APPEND_META(doc, meta_duration)
|
||||
|
||||
meta_line_t *meta_bitrate = malloc(sizeof(meta_line_t));
|
||||
meta_bitrate->key = MetaMediaBitrate;
|
||||
meta_bitrate->longval = pFormatCtx->bit_rate;
|
||||
APPEND_META(doc, meta_bitrate)
|
||||
}
|
||||
|
||||
AVDictionaryEntry *tag = NULL;
|
||||
if (is_video) {
|
||||
while ((tag = av_dict_get(pFormatCtx->metadata, "", tag, AV_DICT_IGNORE_SUFFIX))) {
|
||||
if (include_audio_tags && strcmp(tag->key, "title") == 0) {
|
||||
APPEND_TAG_META(doc, tag, MetaTitle)
|
||||
} else if (strcmp(tag->key, "comment") == 0) {
|
||||
APPEND_TAG_META(doc, tag, MetaContent)
|
||||
} else if (include_audio_tags && strcmp(tag->key, "artist") == 0) {
|
||||
APPEND_TAG_META(doc, tag, MetaArtist)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// EXIF metadata
|
||||
while ((tag = av_dict_get(frame->metadata, "", tag, AV_DICT_IGNORE_SUFFIX))) {
|
||||
if (include_audio_tags && strcmp(tag->key, "Artist") == 0) {
|
||||
APPEND_TAG_META(doc, tag, MetaArtist)
|
||||
} else if (strcmp(tag->key, "ImageDescription") == 0) {
|
||||
APPEND_TAG_META(doc, tag, MetaContent)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void parse_media(AVFormatContext *pFormatCtx, document_t *doc) {
|
||||
|
||||
int video_stream = -1;
|
||||
int audio_stream = -1;
|
||||
|
||||
avformat_find_stream_info(pFormatCtx, NULL);
|
||||
|
||||
for (int i = (int) pFormatCtx->nb_streams - 1; i >= 0; i--) {
|
||||
AVStream *stream = pFormatCtx->streams[i];
|
||||
|
||||
if (stream->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) {
|
||||
if (audio_stream == -1) {
|
||||
meta_line_t *meta_audio = malloc(sizeof(meta_line_t));
|
||||
meta_audio->key = MetaMediaAudioCodec;
|
||||
meta_audio->intval = stream->codecpar->codec_id;
|
||||
APPEND_META(doc, meta_audio)
|
||||
|
||||
append_audio_meta(pFormatCtx, doc);
|
||||
audio_stream = i;
|
||||
}
|
||||
} else if (stream->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) {
|
||||
|
||||
if (video_stream == -1) {
|
||||
meta_line_t *meta_vid = malloc(sizeof(meta_line_t));
|
||||
meta_vid->key = MetaMediaVideoCodec;
|
||||
meta_vid->intval = stream->codecpar->codec_id;
|
||||
APPEND_META(doc, meta_vid)
|
||||
|
||||
meta_line_t *meta_w = malloc(sizeof(meta_line_t));
|
||||
meta_w->key = MetaWidth;
|
||||
meta_w->intval = stream->codecpar->width;
|
||||
APPEND_META(doc, meta_w)
|
||||
|
||||
meta_line_t *meta_h = malloc(sizeof(meta_line_t));
|
||||
meta_h->key = MetaHeight;
|
||||
meta_h->intval = stream->codecpar->height;
|
||||
APPEND_META(doc, meta_h)
|
||||
|
||||
video_stream = i;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (video_stream != -1 && ScanCtx.tn_size > 0) {
|
||||
AVStream *stream = pFormatCtx->streams[video_stream];
|
||||
|
||||
if (stream->codecpar->width <= MIN_SIZE || stream->codecpar->height <= MIN_SIZE) {
|
||||
avformat_close_input(&pFormatCtx);
|
||||
avformat_free_context(pFormatCtx);
|
||||
return;
|
||||
}
|
||||
|
||||
// Decoder
|
||||
AVCodec *video_codec = avcodec_find_decoder(stream->codecpar->codec_id);
|
||||
AVCodecContext *decoder = avcodec_alloc_context3(video_codec);
|
||||
avcodec_parameters_to_context(decoder, stream->codecpar);
|
||||
avcodec_open2(decoder, video_codec, NULL);
|
||||
|
||||
//Seek
|
||||
if (stream->nb_frames > 1 && stream->codecpar->codec_id != AV_CODEC_ID_GIF) {
|
||||
int seek_ret = 0;
|
||||
for (int i = 20; i >= 0; i--) {
|
||||
seek_ret = av_seek_frame(pFormatCtx, video_stream,
|
||||
stream->duration * 0.10, 0);
|
||||
if (seek_ret == 0) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
AVFrame *frame = read_frame(pFormatCtx, decoder, video_stream, doc);
|
||||
if (frame == NULL) {
|
||||
avcodec_free_context(&decoder);
|
||||
avformat_close_input(&pFormatCtx);
|
||||
avformat_free_context(pFormatCtx);
|
||||
return;
|
||||
}
|
||||
|
||||
append_video_meta(pFormatCtx, frame, doc, audio_stream == -1, stream->nb_frames > 1);
|
||||
|
||||
// Scale frame
|
||||
AVFrame *scaled_frame = scale_frame(decoder, frame, ScanCtx.tn_size);
|
||||
|
||||
if (scaled_frame == NULL) {
|
||||
av_frame_free(&frame);
|
||||
avcodec_free_context(&decoder);
|
||||
avformat_close_input(&pFormatCtx);
|
||||
avformat_free_context(pFormatCtx);
|
||||
return;
|
||||
}
|
||||
|
||||
// Encode frame to jpeg
|
||||
AVCodecContext *jpeg_encoder = alloc_jpeg_encoder(scaled_frame->width, scaled_frame->height, ScanCtx.tn_qscale);
|
||||
avcodec_send_frame(jpeg_encoder, scaled_frame);
|
||||
|
||||
AVPacket jpeg_packet;
|
||||
av_init_packet(&jpeg_packet);
|
||||
avcodec_receive_packet(jpeg_encoder, &jpeg_packet);
|
||||
|
||||
// Save thumbnail
|
||||
store_write(ScanCtx.index.store, (char *) doc->uuid, sizeof(doc->uuid), (char *) jpeg_packet.data,
|
||||
jpeg_packet.size);
|
||||
|
||||
av_packet_unref(&jpeg_packet);
|
||||
av_frame_free(&frame);
|
||||
av_free(*scaled_frame->data);
|
||||
av_frame_free(&scaled_frame);
|
||||
avcodec_free_context(&jpeg_encoder);
|
||||
avcodec_free_context(&decoder);
|
||||
}
|
||||
|
||||
avformat_close_input(&pFormatCtx);
|
||||
avformat_free_context(pFormatCtx);
|
||||
}
|
||||
|
||||
void parse_media_filename(const char *filepath, document_t *doc) {
|
||||
|
||||
AVFormatContext *pFormatCtx = avformat_alloc_context();
|
||||
if (pFormatCtx == NULL) {
|
||||
LOG_ERROR(doc->filepath, "(media.c) Could not allocate context with avformat_alloc_context()")
|
||||
return;
|
||||
}
|
||||
int res = avformat_open_input(&pFormatCtx, filepath, NULL, NULL);
|
||||
if (res < 0) {
|
||||
LOG_ERRORF(doc->filepath, "(media.c) avformat_open_input() returned [%d] %s", res, av_err2str(res))
|
||||
avformat_close_input(&pFormatCtx);
|
||||
avformat_free_context(pFormatCtx);
|
||||
return;
|
||||
}
|
||||
|
||||
parse_media(pFormatCtx, doc);
|
||||
}
|
||||
|
||||
|
||||
int vfile_read(void *ptr, uint8_t *buf, int buf_size) {
|
||||
struct vfile *f = ptr;
|
||||
|
||||
int ret = f->read(f, buf, buf_size);
|
||||
|
||||
if (ret == 0) {
|
||||
return AVERROR_EOF;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
void parse_media_vfile(struct vfile *f, document_t *doc) {
|
||||
|
||||
AVFormatContext *pFormatCtx = avformat_alloc_context();
|
||||
if (pFormatCtx == NULL) {
|
||||
LOG_ERROR(doc->filepath, "(media.c) Could not allocate context with avformat_alloc_context()")
|
||||
return;
|
||||
}
|
||||
|
||||
unsigned char *buffer = (unsigned char *) av_malloc(AVIO_BUF_SIZE);
|
||||
AVIOContext *io_ctx = avio_alloc_context(buffer, AVIO_BUF_SIZE, 0, f, vfile_read, NULL, NULL);
|
||||
|
||||
pFormatCtx->pb = io_ctx;
|
||||
pFormatCtx->flags |= AVFMT_FLAG_CUSTOM_IO;
|
||||
|
||||
int res = avformat_open_input(&pFormatCtx, "", NULL, NULL);
|
||||
if (res == -5) {
|
||||
// Tried to parse media that requires seek
|
||||
av_free(io_ctx->buffer);
|
||||
avio_context_free(&io_ctx);
|
||||
avformat_close_input(&pFormatCtx);
|
||||
avformat_free_context(pFormatCtx);
|
||||
return;
|
||||
} else if (res < 0) {
|
||||
LOG_ERRORF(doc->filepath, "(media.c) avformat_open_input() returned [%d] %s", res, av_err2str(res))
|
||||
av_free(io_ctx->buffer);
|
||||
avio_context_free(&io_ctx);
|
||||
avformat_close_input(&pFormatCtx);
|
||||
avformat_free_context(pFormatCtx);
|
||||
return;
|
||||
}
|
||||
|
||||
parse_media(pFormatCtx, doc);
|
||||
av_free(io_ctx->buffer);
|
||||
avio_context_free(&io_ctx);
|
||||
}
|
||||
|
||||
@@ -1,14 +0,0 @@
|
||||
#ifndef SIST2_MEDIA_H
|
||||
#define SIST2_MEDIA_H
|
||||
|
||||
|
||||
#include "src/sist.h"
|
||||
|
||||
#define MIN_VIDEO_SIZE 1024 * 64
|
||||
#define MIN_IMAGE_SIZE 1024 * 2
|
||||
|
||||
void parse_media_filename(const char * filepath, document_t *doc);
|
||||
|
||||
void parse_media_vfile(struct vfile *f, document_t *doc);
|
||||
|
||||
#endif
|
||||
@@ -1,14 +1,15 @@
|
||||
#ifndef SIST2_MIME_H
|
||||
#define SIST2_MIME_H
|
||||
|
||||
#include "src/sist.h"
|
||||
#include "../sist.h"
|
||||
|
||||
#define MAJOR_MIME(mime_id) (mime_id & 0x0FFF0000) >> 16
|
||||
#define MAJOR_MIME(mime_id) (mime_id & 0x000F0000) >> 16
|
||||
|
||||
#define MIME_EMPTY 1
|
||||
#define MIME_SIST2_SIDECAR 2
|
||||
|
||||
#define DONT_PARSE 0x80000000
|
||||
#define SHOULD_PARSE(mime_id) (mime_id & DONT_PARSE) != DONT_PARSE && mime_id != 0
|
||||
#define SHOULD_PARSE(mime_id) (ScanCtx.fast == 0 && (mime_id & DONT_PARSE) != DONT_PARSE && mime_id != 0)
|
||||
|
||||
#define PDF_MASK 0x40000000
|
||||
#define IS_PDF(mime_id) (mime_id & PDF_MASK) == PDF_MASK
|
||||
@@ -25,6 +26,15 @@
|
||||
#define DOC_MASK 0x04000000
|
||||
#define IS_DOC(mime_id) (mime_id & DOC_MASK) == DOC_MASK
|
||||
|
||||
#define MOBI_MASK 0x02000000
|
||||
#define IS_MOBI(mime_id) (mime_id & MOBI_MASK) == MOBI_MASK
|
||||
|
||||
#define MARKUP_MASK 0x01000000
|
||||
#define IS_MARKUP(mime_id) (mime_id & MARKUP_MASK) == MARKUP_MASK
|
||||
|
||||
#define RAW_MASK 0x00800000
|
||||
#define IS_RAW(mime_id) (mime_id & RAW_MASK) == RAW_MASK
|
||||
|
||||
enum major_mime {
|
||||
MimeInvalid = 0,
|
||||
MimeModel = 1,
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user