Compare commits

...

201 Commits

Author SHA1 Message Date
7b49a0dc49 Build fix 2020-06-21 12:56:13 -04:00
eb559b53aa RAW picture file support 2020-06-21 10:46:11 -04:00
6d01f9c0df whoops 2020-06-19 22:12:19 -04:00
e724fec668 Fix web return codes 2020-06-19 21:41:17 -04:00
fe5e93b300 Update USAGE.md 2020-06-19 21:29:09 -04:00
ecad85fd7d version bump 2020-06-19 21:10:03 -04:00
74cc898259 Fix tag display issue 2020-06-19 21:07:19 -04:00
dc2e4443c4 Add exec-script command 2020-06-19 21:07:19 -04:00
1a64431b52 Merge pull request #63 from dpieski/patch-3
Correct typos in example
2020-06-19 18:26:10 -04:00
Andrew
9bad515e06 Correct typos in example
Correct typos in examples.
2020-06-19 17:22:02 -05:00
648559cedb Update README.md 2020-06-17 13:25:20 -04:00
3e6cd9cd5c Merge pull request #60 from dpieski/patch-2
update Usage.md
2020-06-17 13:04:46 -04:00
f249992798 Update scripting.md 2020-06-17 13:00:07 -04:00
Andrew
e9645ecdaa update Usage.md
Fixing a link.
2020-06-17 10:58:25 -05:00
046edea0e2 Handle special characters in file paths 2020-06-10 19:45:36 -04:00
a011b7e97b Fragment size setting 2020-06-09 21:40:53 -04:00
8c1c1697e0 Fix file wordexp in some paths #59 2020-06-05 19:41:02 -04:00
018b49fa4c Fix csv_escape #58 2020-06-05 19:13:03 -04:00
27b4e6403e Re-enable path autocomplete #54 2020-06-02 19:46:58 -04:00
13fdbd9e69 Fix for ES 7.7 #54 2020-06-01 18:14:34 -04:00
5e7fdaf8dd Update issue-template.md 2020-06-01 10:45:43 -04:00
19d5c8ac9f Update issue-template.md 2020-05-29 18:19:21 -04:00
99497049a8 Merge pull request #53 from dpieski/patch-1
Update README
2020-05-29 18:16:13 -04:00
Andrew
1a3181d78b Update README
changed case of path in a link to the usage guide to fix 404 error.
2020-05-29 15:37:20 -05:00
449aa77c8f Fix for unknown mime inside archives 2020-05-25 17:36:04 -04:00
3058c55510 Memory leak fix #37 2020-05-24 15:42:42 -04:00
dedf9287b2 Fix name separation in --archive list mode 2020-05-24 14:36:59 -04:00
ab199b0c0c Remove arc_reset() function because seek() inside archive doesn't work 2020-05-24 14:18:31 -04:00
c4fbae123e Better support for media files inside archives 2020-05-24 14:10:23 -04:00
dd2397ef5c handle .tgz #44, ignore files inside archives for stats page 2020-05-24 10:10:28 -04:00
ee0f71f4d3 fix compile warning 2020-05-17 15:00:56 -04:00
0bbb96b149 Merge pull request #51 from simon987/stats
Stats page
2020-05-17 14:49:28 -04:00
78f6e16701 image 2020-05-17 12:47:45 -04:00
4625bca9a9 stats 2020-05-17 12:47:02 -04:00
f2ae653886 Revert "wip"
This reverts commit 5686bc86
2020-05-16 08:16:49 -04:00
5686bc864d wip 2020-05-13 17:37:40 -04:00
cf513b4ad8 Escape invalid UTF8 characters simon987/sist2#44, increase magic buffer size 2020-05-12 19:28:02 -04:00
013423424e UTF-8 fix attempt w/ libarchive (#44) 2020-05-10 19:52:42 -04:00
16514fd6b0 Option to search in path #49 2020-05-09 22:00:22 -04:00
27509f97e1 Update USAGE.md 2020-05-08 19:08:46 -04:00
4c540eae1c Update USAGE.md 2020-05-08 19:07:45 -04:00
d2b53ff6fc Update README.md 2020-05-08 18:32:32 -04:00
0ef4292abf Fix duplicate tag problem (simon987/sist2#48) 2020-05-05 20:20:10 -04:00
e6fde38c24 Load defaults when LocalStorage is outdated 2020-05-03 08:13:25 -04:00
5fa343d40f fix version typo 2020-05-03 08:10:28 -04:00
7ee1374802 oops 2020-04-30 21:21:48 -04:00
bd9e56829c Support for markup files 2020-04-30 20:21:09 -04:00
718169345e gzip artifacts in CI 2020-04-21 19:34:46 -04:00
5a6aa763ca build fix 2020-04-21 18:50:32 -04:00
695d9abd83 revert debug hard-coded listen address 2020-04-21 15:52:35 -04:00
e436af7b2a 2.0 (#46)
* extract scan code to libscan, (wip)

* submodules

* replace curl with mongoose (wip)

* replace onion with mongoose (wip)

* replace onion with mongoose (wip)

* It compiles! (I think)

* Update readme

* Entirely remove libonion (WIP)

* unscramble submodules

* recover screenshot

* Update mappings

* Bug fixes

* update

* media meta fix

* memory fixes

* More bug fixes...

* Bug fix w/ libmagic & vfile

* libmagic fix (again)

* Better lightbox, better video handler, random reloads fix

* Use svg for info icon

* re-enable http auth

* mobi support #41, fix logs

* Update README & cleanup
2020-04-21 14:42:20 -04:00
4501a7810f Update issue-template.md 2020-04-11 07:33:33 -04:00
simon987
e36761fa6a Update issue templates 2020-04-11 07:28:48 -04:00
fe53b79d56 Fix warnings 2020-03-25 08:18:59 -04:00
09615bbed6 Update dependencies 2020-03-24 14:30:23 -04:00
a2be9b955c Fix build errors 2020-03-24 11:49:13 -04:00
9298bd2d9d CI fix... 2020-03-24 10:09:33 -04:00
317034ba21 teamcity automation attempt 2020-03-24 10:01:27 -04:00
0505303503 text_buffer bug fixes & Sort option 2020-03-20 20:54:22 -04:00
6e5772f13b Errors cleanup 2020-03-20 10:05:10 -04:00
ccccdb3b78 Fixes #38 2020-03-13 16:35:11 -04:00
12d17acf4f UI fixes 2020-03-06 12:27:38 -05:00
48b56cdb7b I forgot to commit this somehow 2020-03-06 10:32:05 -05:00
048f707f80 Fix buffer overflow in json parse function (index module) 2020-03-06 10:17:21 -05:00
98e0a5fd64 Update CI script 2020-03-06 09:41:33 -05:00
740a49a09f version bump 2020-03-06 09:36:46 -05:00
81be662574 (breaking) update mime list 2020-03-06 09:36:21 -05:00
02fa3f02f5 Fix memory leak with virtual files in parse.c 2020-03-06 09:36:07 -05:00
cfdd7bdd87 Fix memory leak in font.c 2020-03-06 09:35:19 -05:00
7ceb645926 hotfix invalid read in text_buffer 2020-03-06 09:34:41 -05:00
7d0091f647 whoops 2020-03-05 21:54:56 -05:00
b3cd630399 Update README.md 2020-03-05 19:42:06 -05:00
5f7a1acfe3 Merge pull request #36 from simon987/wip-doc
Wip doc
2020-03-05 18:43:56 -05:00
513a21cca2 Undo debug stuff 2020-03-05 18:42:51 -05:00
04dbfb23ab Cleanup warnings 2020-03-05 16:53:30 -05:00
1abddabeec Rewrite doc.c module, fix bad error handling, fix pdf.c memory leaks 2020-03-05 16:12:34 -05:00
9ace5774af Update dependencies 2020-03-05 16:10:45 -05:00
eab6101cf7 make --fast faster 2020-03-05 12:26:43 -05:00
d7cbd5d2b6 wip doc rewrite 2020-03-05 09:13:37 -05:00
641edf2715 Prettier warning messages in main.c 2020-03-04 17:57:49 -05:00
7efb4957bf inline text/util functions 2020-03-04 17:50:31 -05:00
9ae77fdedb Fix css glitch 2020-03-03 16:51:01 -05:00
98c40901ed Disallow incremental scan when version does not match (#33) 2020-03-03 16:36:07 -05:00
363375d5da version bump 2020-03-03 16:25:41 -05:00
149de95d88 (breaking) Upgrade path filter bar 2020-03-03 16:24:24 -05:00
e5bb4856d2 (breaking) Set item depth in ingest pipeline 2020-03-02 17:39:25 -05:00
d78994d427 Ignore --incremental option when the directory does not exist (#31) 2020-03-01 21:16:50 -05:00
f2d68d54df Update README.md 2020-03-01 13:55:08 -05:00
e03625838b Settings menu (#30) and UI tweaks 2020-02-29 19:26:09 -05:00
86840b46f4 Version bump 2020-02-27 09:47:06 -05:00
e57f9916eb Rewrite documentation 2020-02-27 09:45:14 -05:00
565ba6ee76 Fix for #29 2020-02-27 09:44:19 -05:00
d83fc2c373 Fix docker build for 1.2.15 2020-02-27 09:42:18 -05:00
d4da28249e --fast option #27 2020-02-22 18:37:08 -05:00
483a454c8d --exclude argument #26 2020-02-22 16:55:35 -05:00
018ac86640 fix build... 2020-02-22 13:20:41 -05:00
398f1aead4 Support for cbr documents 2020-02-22 13:11:19 -05:00
d19a75926b Fix invalid read in terminate_string() 2020-02-22 13:10:40 -05:00
1ac8b40e3d Code style 2020-02-22 09:02:59 -05:00
a8505cb8c1 Fix for #28 2020-02-20 16:42:13 -05:00
ae8652d86e UI tweaks, search syntax (#25) 2020-02-16 15:24:29 -05:00
849beb09d8 hotfix 2020-02-15 19:33:18 -05:00
e1aaaee617 UI tweak 2020-02-15 09:30:14 -05:00
c02b940945 (I forgot to commit this) 2020-02-14 20:58:10 -05:00
2934ddb07f Add image viewer (#2) 2020-02-14 18:28:55 -05:00
7f6f3c02fa OCR tweaks 2020-02-11 21:13:47 -05:00
7f98d5a682 Fix buffer overflow (whoops) 2020-02-09 18:11:29 -05:00
7eb9c5d7d5 Fix web/index issue with NULL mime types 2020-02-09 17:23:49 -05:00
184439aa38 increase minimum image size for OCR 2020-02-09 14:06:59 -05:00
1ce8b298a1 Display EXIF tags on document info panel, remove march=native on openjp 2020-02-09 13:21:19 -05:00
75f99025d9 add exif dateTime, allow some special characters in text meta 2020-02-09 08:47:13 -05:00
ebe852bd5a Fix rewrite-url arg 2020-02-09 08:23:17 -05:00
402b103c49 Fix total count for ES 7.5 2020-02-08 09:25:00 -05:00
e9b6e1cdc2 Turn off auto optimisation in libtesseract build 2020-02-08 08:32:04 -05:00
ed1ce8ab5e Handle XML errors #18 2020-02-07 10:08:01 -05:00
d1fa4febc4 Improve scroll feature, UI fix 2020-02-07 10:08:01 -05:00
048c55df7b Update README.md 2020-02-06 19:56:29 -05:00
f77bc6a025 Update README.md 2020-02-06 19:55:32 -05:00
efdde2734e version bump 2020-02-06 19:28:05 -05:00
66658fa8f7 Remove trailing/leading white space in text meta fields 2020-02-06 19:27:30 -05:00
df41c251e4 (Breaking!) Add some exif tags 2020-02-06 19:21:50 -05:00
3282ab56ba Version bump 2020-02-02 09:26:54 -05:00
8300838d30 Suppress XML parsing errors (#18) 2020-02-02 09:26:03 -05:00
c9870a6d3d Remove -march=native for release build... 2020-02-02 09:03:06 -05:00
a143cc4fcf bundle openssl... 2020-02-02 08:39:20 -05:00
9ef1f3781d fix attempt for #11 2020-02-01 20:04:26 -05:00
bbee8aa721 tesseract ocr path fix 2020-02-01 20:03:59 -05:00
d22f83c797 curl fix 2020-02-01 15:22:43 -05:00
50615486a4 curl fix attempt 2020-02-01 14:42:42 -05:00
ca79e4f797 add /status endpoint 2020-01-28 10:18:37 -05:00
6a9fd08a80 Merge pull request #21 from simon987/wip-20
Fixes #20
2020-01-27 09:16:00 -05:00
cab890dc9b #20 wip 2020-01-27 09:09:42 -05:00
b3c4faf2df Update README.md 2020-01-26 12:37:13 -05:00
353937171a Update README.md 2020-01-20 15:54:53 -05:00
c80002bea4 Bundle libcurl attempt 2 2020-01-18 11:53:12 -05:00
56adee9d81 Bundle libcurl, libopc bugfix #18 2020-01-18 10:25:02 -05:00
d6493d6d5f Bundle libpng 2020-01-16 16:21:38 -05:00
0967e9676d remove static build in CI... 2020-01-16 15:45:18 -05:00
487e998ea0 Display error message on /d/ error 2020-01-16 15:04:50 -05:00
919f45c79c Document info modal #19 2020-01-16 14:37:19 -05:00
d42129cfcb CI fix attempt 2020-01-15 20:11:45 -05:00
754983e34a Minor cleanup 2020-01-15 18:16:06 -05:00
7c8a3e2f9d Support for external json indices 2020-01-14 15:44:31 -05:00
3bb24b4453 Use bundled libtiff 2020-01-14 12:21:26 -05:00
9a56b959d3 Fix build problems... 2020-01-14 10:55:02 -05:00
5e3a2dbcc2 Update README 2020-01-14 10:47:00 -05:00
573f94f24e OCR support, remove static build 2020-01-14 10:26:40 -05:00
f5db78a69f Ignore special ascii chars, strip binary in docker build 2020-01-12 10:59:17 -05:00
5a2820d339 UI tweak auto-select based on query args 2020-01-11 17:48:51 -05:00
b7f13f425c Fix memory leaks (whoops) 2020-01-11 17:34:34 -05:00
d1a2f9b1d5 Strip binary (CI) 2020-01-07 14:32:39 -05:00
71f17986db build settings 2020-01-06 21:34:41 -05:00
acdd2fb3c1 Use bundled ffmpeg libraries 2020-01-06 16:25:34 -05:00
0cda6c00e1 CI attempt 2020-01-03 20:21:07 -05:00
14d0e5a1e1 possible fix for #18 2019-12-28 14:32:42 -05:00
0d06d39281 Path in list view #16 2019-12-28 14:32:05 -05:00
80708ca636 Merge pull request #17 from dpieski/patch-1
maybe a typo in cli.c
2019-12-23 18:33:28 -05:00
Andrew
43b7b40dc4 maybe a typo in cli.c
possibly corrected a typo
2019-12-23 13:18:18 -06:00
d051f541e2 Show client error on ES connection failure, fixes #13 2019-12-21 20:52:53 -05:00
0eefbac7b4 Update libopc. should fix #14 2019-12-21 19:43:33 -05:00
663f8e21c1 Better logging, fixes #15 2019-12-21 12:32:08 -05:00
80fbcb2a01 empty docx bugfix 2019-12-19 17:26:11 -05:00
8451109ecd OOXML files support 2019-12-19 16:53:18 -05:00
d6fe61cfdc Clarify help string for es url #12 2019-12-19 16:52:22 -05:00
254094130f Fix submodules 2019-12-13 12:35:39 -05:00
eaaa75c04c Fix submodules 2019-12-13 11:24:17 -05:00
bb87f4270f Update docker script 2019-12-13 11:16:17 -05:00
be23201210 Archive file support 2019-12-13 10:53:51 -05:00
9778acda77 uifix 2019-12-12 19:19:53 -05:00
8d187926d9 Bugfix with incremental comparison 2019-12-12 15:41:31 -05:00
88c37e3523 Update README.md 2019-12-04 20:56:52 -05:00
d816dae8b3 UI fix, disable thumbnail option, batch index size option 2019-12-01 10:57:29 -05:00
4346c3e063 Also use static libraries in sist2 build 2019-11-30 20:02:26 -05:00
1a1032a8a7 Cleaner shutdown 2019-11-30 19:59:11 -05:00
4ab2ba1a02 #8 Skip PDF scan when content-size is 0 2019-11-21 16:06:31 -05:00
d089601dc5 Add sfv & m3u 2019-11-20 12:31:31 -05:00
11df6cc88f Add nfo to ext list 2019-11-20 11:41:50 -05:00
373ac01e4e Fix for #3 and maximum scan depth 2019-11-19 11:23:30 -05:00
893ff145c5 List mode tweak 2019-11-17 16:28:47 -05:00
6111ded77f Merge pull request #6 from simon987/wip
List mode #5
2019-11-17 16:15:36 -05:00
34cc26b2fd List mode #5 wip 2019-11-17 15:03:24 -05:00
204034d859 Add basic auth. Fixes #4 2019-11-17 10:00:17 -05:00
16ccc6c0d3 Show error message on elasticsearch connection fail 2019-11-17 09:55:16 -05:00
94c617fdc3 Bug fix 2019-11-12 22:11:50 -05:00
ebfd7e03ce User scripts, bug fixes, docker image 2019-11-12 20:58:43 -05:00
6931d320a2 bugfix with invalid/corrupted index path 2019-11-11 20:49:38 -05:00
fc22e52eae Image placeholder 2019-11-09 23:26:49 -05:00
ba81748a74 Update build 2019-11-09 17:15:20 -05:00
e72fa1587b EXIF metadata for images 2019-11-09 15:18:44 -05:00
ea4fb7fa0d Bug fixes 2019-11-09 12:00:07 -05:00
b0a868bb73 remove 'must match' 2019-11-08 21:46:54 -05:00
d761a3b595 update readme 2019-11-08 19:42:36 -05:00
2d7a8a2fdc fuzzy toggle 2019-11-08 16:15:10 -05:00
152d2ddf8a bug fix in deserialize 2019-11-08 09:03:44 -05:00
bc5f22b759 update readme 2019-11-05 18:59:00 -05:00
534b397876 update readme, UI tweak: don't show broken images 2019-11-03 10:39:02 -05:00
7962a994e2 utf8 update + bug fixes 2019-11-03 07:50:31 -05:00
f8f1a27180 video metadata 2019-10-31 11:54:13 -04:00
784c3c9435 Font rendering fixes 2019-10-31 10:15:01 -04:00
f8b081a3f4 UI tweaks, path autocomplete 2019-10-31 08:26:19 -04:00
5661573b06 Dark theme, pdf meta, de-serialize bugfix 2019-10-30 22:20:22 -04:00
117 changed files with 8045 additions and 4610 deletions

View File

@@ -0,0 +1,18 @@
---
name: Issue template
about: General
title: ''
labels: ''
assignees: ''
---
sist2 version:
Platform (Linux or Docker):
Elasticsearch version:
Command with arguments: `ex: "scan ~/Documents -o ./i2 --threads 3 -q 1.0`
If the issue is related to the `scan` module, please attach the files necessary to reproduce the error or email them to me[at]simon987.net.

4
.gitignore vendored
View File

@@ -11,7 +11,9 @@ Makefile
LOG LOG
sist2* sist2*
index.sist2/ index.sist2/
bundle.css bundle*.css
bundle.js bundle.js
*.a *.a
vgcore.* vgcore.*
build/
third-party/

22
.gitmodules vendored
View File

@@ -1,18 +1,6 @@
[submodule "argparse"] [submodule "third-party/libscan"]
path = argparse path = third-party/libscan
url = https://github.com/simon987/libscan
[submodule "third-party/argparse"]
path = third-party/argparse
url = https://github.com/cofyc/argparse url = https://github.com/cofyc/argparse
[submodule "cJSON"]
path = cJSON
url = https://github.com/DaveGamble/cJSON
[submodule "lib/mupdf"]
path = lib/mupdf
url = git://git.ghostscript.com/mupdf.git
[submodule "lib/onion"]
path = lib/onion
url = https://github.com/davidmoreno/onion
[submodule "lib/ffmpeg"]
path = lib/ffmpeg
url = https://git.ffmpeg.org/ffmpeg.git
[submodule "lmdb"]
path = lmdb
url = https://github.com/LMDB/lmdb

69
.teamcity/settings.kts vendored Normal file
View File

@@ -0,0 +1,69 @@
import jetbrains.buildServer.configs.kotlin.v2019_2.*
import jetbrains.buildServer.configs.kotlin.v2019_2.buildSteps.ExecBuildStep
import jetbrains.buildServer.configs.kotlin.v2019_2.buildSteps.exec
import jetbrains.buildServer.configs.kotlin.v2019_2.triggers.vcs
import jetbrains.buildServer.configs.kotlin.v2019_2.vcs.GitVcsRoot
/*
The settings script is an entry point for defining a TeamCity
project hierarchy. The script should contain a single call to the
project() function with a Project instance or an init function as
an argument.
VcsRoots, BuildTypes, Templates, and subprojects can be
registered inside the project using the vcsRoot(), buildType(),
template(), and subProject() methods respectively.
To debug settings scripts in command-line, run the
mvnDebug org.jetbrains.teamcity:teamcity-configs-maven-plugin:generate
command and attach your debugger to the port 8000.
To debug in IntelliJ Idea, open the 'Maven Projects' tool window (View
-> Tool Windows -> Maven Projects), find the generate task node
(Plugins -> teamcity-configs -> teamcity-configs:generate), the
'Debug' option is available in the context menu for the task.
*/
version = "2019.2"
project {
vcsRoot(HttpsGithubComSimon987sist2refsHeadsMaster)
buildType(Build)
}
object Build : BuildType({
name = "Build"
artifactRules = """
sist2
sist2_scan
""".trimIndent()
vcs {
root(HttpsGithubComSimon987sist2refsHeadsMaster)
}
steps {
exec {
name = "Build"
path = "./ci/build.sh"
dockerImage = "simon987/general_ci"
dockerImagePlatform = ExecBuildStep.ImagePlatform.Linux
dockerPull = true
}
}
triggers {
vcs {
}
}
})
object HttpsGithubComSimon987sist2refsHeadsMaster : GitVcsRoot({
name = "https://github.com/simon987/sist2#refs/heads/master"
url = "https://github.com/simon987/sist2"
})

View File

@@ -1,230 +1,121 @@
cmake_minimum_required(VERSION 3.7) cmake_minimum_required(VERSION 3.7)
set(CMAKE_C_STANDARD 11) set(CMAKE_C_STANDARD 11)
option(WITH_SIST2 "Build main executable" ON)
option(WITH_SIST2_SCAN "Build scan executable" ON)
project(sist2 C) project(sist2 C)
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}/CMakeModules")
if (WITH_SIST2) option(SIST_DEBUG "Build a debug executable" on)
add_executable(
add_subdirectory(third-party/libscan)
set(ARGPARSE_SHARED off)
add_subdirectory(third-party/argparse)
add_executable(
sist2
src/main.c
src/sist.h
src/io/walk.h src/io/walk.c
src/io/store.h src/io/store.c
src/tpool.h src/tpool.c
src/parsing/parse.h src/parsing/parse.c
src/io/serialize.h src/io/serialize.c
src/parsing/mime.h src/parsing/mime.c src/parsing/mime_generated.c
src/index/web.c src/index/web.h
src/web/serve.c src/web/serve.h
src/index/elastic.c src/index/elastic.h
src/util.c src/util.h
src/ctx.h src/types.h
src/log.c src/log.h
# argparse
third-party/argparse/argparse.h third-party/argparse/argparse.c
src/cli.c src/cli.h
src/stats.c src/stats.h src/ctx.c)
target_link_directories(sist2 PRIVATE BEFORE ${_VCPKG_INSTALLED_DIR}/${VCPKG_TARGET_TRIPLET}/lib/)
set(CMAKE_FIND_LIBRARY_SUFFIXES .a .lib)
find_package(lmdb CONFIG REQUIRED)
find_package(cJSON CONFIG REQUIRED)
find_package(unofficial-glib CONFIG REQUIRED)
find_package(unofficial-mongoose CONFIG REQUIRED)
find_library(UUID_LIB NAMES uuid)
#find_package(OpenSSL REQUIRED)
target_include_directories(
sist2 PUBLIC
${CMAKE_SOURCE_DIR}/third-party/onion/src/
${CMAKE_SOURCE_DIR}/third-party/utf8.h/
${CMAKE_SOURCE_DIR}/third-party/libscan/
${CMAKE_SOURCE_DIR}/
)
target_compile_options(
sist2
PRIVATE
-fPIC
-Werror
)
if (SIST_DEBUG)
target_compile_options(
sist2 sist2
src/main.c
src/sist.h
src/io/walk.h src/io/walk.c
src/parsing/media.h src/parsing/media.c
src/parsing/pdf.h src/parsing/pdf.c
src/io/store.h src/io/store.c
src/tpool.h src/tpool.c
src/parsing/parse.h src/parsing/parse.c
src/io/serialize.h src/io/serialize.c
src/parsing/mime.h src/parsing/mime.c src/parsing/mime_generated.c
src/parsing/text.h src/parsing/text.c
src/index/web.c src/index/web.h
src/web/serve.c src/web/serve.h
src/index/elastic.c src/index/elastic.h
src/util.c src/util.h
src/ctx.h src/types.h src/parsing/font.c src/parsing/font.h
# argparse
argparse/argparse.h argparse/argparse.c
# cJSON
cJSON/cJSON.h cJSON/cJSON.c
# LMDB
lmdb/libraries/liblmdb/lmdb.h lmdb/libraries/liblmdb/mdb.c
lmdb/libraries/liblmdb/midl.h lmdb/libraries/liblmdb/midl.c
src/cli.c src/cli.h
)
endif ()
if (WITH_SIST2_SCAN)
add_executable(
sist2_scan
src/main.c
src/sist.h
src/io/walk.h src/io/walk.c
src/parsing/media.h src/parsing/media.c
src/parsing/pdf.h src/parsing/pdf.c
src/io/store.h src/io/store.c
src/tpool.h src/tpool.c
src/parsing/parse.h src/parsing/parse.c
src/io/serialize.h src/io/serialize.c
src/parsing/mime.h src/parsing/mime.c src/parsing/mime_generated.c
src/parsing/text.h src/parsing/text.c
src/util.c src/util.h
src/ctx.h src/types.h src/parsing/font.c src/parsing/font.h
# argparse
argparse/argparse.h argparse/argparse.c
# cJSON
cJSON/cJSON.h cJSON/cJSON.c
# LMDB
lmdb/libraries/liblmdb/lmdb.h lmdb/libraries/liblmdb/mdb.c
lmdb/libraries/liblmdb/midl.h lmdb/libraries/liblmdb/midl.c
src/cli.c src/cli.h
)
endif ()
find_package(PkgConfig REQUIRED)
set(ENV{PKG_CONFIG_PATH} "$ENV{PKG_CONFIG_PATH}:/usr/local/lib/pkgconfig/")
find_package(LibMagic REQUIRED)
find_package(FFmpeg REQUIRED)
find_package(OpenSSL REQUIRED)
find_package(Freetype REQUIRED)
pkg_check_modules(GLIB REQUIRED glib-2.0)
pkg_check_modules(GOBJECT REQUIRED gobject-2.0)
pkg_check_modules(UUID REQUIRED uuid)
add_definitions(${LIBMAGIC_CFLAGS_OTHER})
add_definitions(${UUID_CFLAGS_OTHER})
add_definitions(${GLIB_CFLAGS_OTHER})
add_definitions(${GOBJECT_CFLAGS_OTHER})
add_definitions(${FREETYPE_CFLAGS_OTHER})
list(REMOVE_ITEM GLIB_LIBRARIES pcre)
list(REMOVE_ITEM GOBJECT_LIBRARIES pcre)
list(REMOVE_ITEM UUID_LIBRARIES pcre)
if (WITH_SIST2)
target_include_directories(
sist2 PUBLIC
${LIBMAGIC_INCLUDE_DIRS}
${GOBJECT_INCLUDE_DIRS}
${OPENSSL_INCLUDE_DIR}
${FFMPEG_INCLUDE_DIRS}
${GLIB_INCLUDE_DIRS}
${FREETYPE_INCLUDE_DIRS}
${UUID_INCLUDE_DIRS}
${PROJECT_SOURCE_DIR}/
${PROJECT_SOURCE_DIR}/lmdb/libraries/liblmdb/
${PROJECT_SOURCE_DIR}/lib/onion/src/
${PROJECT_SOURCE_DIR}/lib/mupdf/include/
)
target_link_directories(
sist2 PUBLIC
${UUID_LIBRARY_DIRS}
${FFMPEG_LIBRARY_DIRS}
)
target_compile_options(sist2
PRIVATE PRIVATE
-O3 -g
# -march=native -fstack-protector
-fno-stack-protector -fno-omit-frame-pointer
-fomit-frame-pointer -fsanitize=address
) )
target_link_options(
TARGET_LINK_LIBRARIES(
sist2 sist2
PRIVATE
${GLIB_LIBRARIES} -fsanitize=address
${GOBJECT_LIBRARIES} # -static
${UUID_LIBRARIES}
# ffmpeg
${PROJECT_SOURCE_DIR}/lib/libavcodec.a
${PROJECT_SOURCE_DIR}/lib/libavformat.a
${PROJECT_SOURCE_DIR}/lib/libavutil.a
${PROJECT_SOURCE_DIR}/lib/libswscale.a
${PROJECT_SOURCE_DIR}/lib/libswresample.a
# ${FFMPEG_LIBRARIES}
# swscale
# mupdf
${PROJECT_SOURCE_DIR}/lib/libmupdf.a
${PROJECT_SOURCE_DIR}/lib/libmupdf-third.a
# onion
${PROJECT_SOURCE_DIR}/lib/libonion_static.a
pthread
curl
m
bz2
magic
)
endif ()
if (WITH_SIST2_SCAN)
set_target_properties(
sist2_scan
PROPERTIES COMPILE_DEFINITIONS SIST_SCAN_ONLY
) )
set_target_properties( set_target_properties(
sist2_scan sist2
PROPERTIES PROPERTIES
COMPILE_DEFINITIONS SIST_SCAN_ONLY OUTPUT_NAME sist2_debug
LINK_FLAGS -static
) )
target_include_directories( else ()
sist2_scan PUBLIC target_compile_options(
${LIBMAGIC_INCLUDE_DIRS} sist2
${GOBJECT_INCLUDE_DIRS}
${OPENSSL_INCLUDE_DIR}
${FFMPEG_INCLUDE_DIRS}
${GLIB_INCLUDE_DIRS}
${UUID_INCLUDE_DIRS}
${FREETYPE_INCLUDE_DIRS}
${PROJECT_SOURCE_DIR}/
${PROJECT_SOURCE_DIR}/lmdb/libraries/liblmdb/
${PROJECT_SOURCE_DIR}/lib/onion/src/
${PROJECT_SOURCE_DIR}/lib/mupdf/include/
)
target_link_directories(
sist2_scan PUBLIC
${UUID_LIBRARY_DIRS}
${FFMPEG_LIBRARY_DIRS}
)
target_compile_options(sist2_scan
PRIVATE PRIVATE
-O3 -Ofast
# -march=native
-fno-stack-protector -fno-stack-protector
-fomit-frame-pointer -fomit-frame-pointer
)
TARGET_LINK_LIBRARIES(
sist2_scan
${GLIB_LIBRARIES}
${GOBJECT_LIBRARIES}
${UUID_LIBRARIES}
# ffmpeg
${PROJECT_SOURCE_DIR}/lib/libavcodec.a
${PROJECT_SOURCE_DIR}/lib/libavformat.a
${PROJECT_SOURCE_DIR}/lib/libavutil.a
${PROJECT_SOURCE_DIR}/lib/libswscale.a
${PROJECT_SOURCE_DIR}/lib/libswresample.a
# mupdf
${PROJECT_SOURCE_DIR}/lib/libmupdf.a
${PROJECT_SOURCE_DIR}/lib/libmupdf-third.a
${PROJECT_SOURCE_DIR}/lib/libbz2.a
${PROJECT_SOURCE_DIR}/lib/libmagic.a
pthread
m
) )
endif () endif ()
add_dependencies(
sist2
scan
argparse
)
target_link_libraries(
sist2
z
lmdb
cjson
argparse
unofficial::glib::glib
unofficial::mongoose::mongoose
# OpenSSL::SSL OpenSSL::Crypto
${UUID_LIB}
pthread
magic
scan
)
add_custom_target( add_custom_target(
before_sist2 before_sist2
COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/scripts/before_build.sh COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/scripts/before_build.sh
) )
IF (WITH_SIST2) add_dependencies(sist2 before_sist2)
add_dependencies(sist2 before_sist2)
else ()
add_dependencies(sist2_scan before_sist2)
endif ()

View File

@@ -1,80 +0,0 @@
# - Try to find ffmpeg libraries (libavcodec, libavformat and libavutil)
# Once done this will define
#
# FFMPEG_FOUND - system has ffmpeg or libav
# FFMPEG_INCLUDE_DIR - the ffmpeg include directory
# FFMPEG_LIBRARIES - Link these to use ffmpeg
# FFMPEG_LIBAVCODEC
# FFMPEG_LIBAVFORMAT
# FFMPEG_LIBAVUTIL
#
# Copyright (c) 2008 Andreas Schneider <mail@cynapses.org>
# Modified for other libraries by Lasse Kärkkäinen <tronic>
# Modified for Hedgewars by Stepik777
#
# Redistribution and use is allowed according to the terms of the New
# BSD license.
#
if (FFMPEG_LIBRARIES AND FFMPEG_INCLUDE_DIR)
# in cache already
set(FFMPEG_FOUND TRUE)
else (FFMPEG_LIBRARIES AND FFMPEG_INCLUDE_DIR)
# use pkg-config to get the directories and then use these values
# in the FIND_PATH() and FIND_LIBRARY() calls
find_package(PkgConfig)
if (PKG_CONFIG_FOUND)
pkg_check_modules(_FFMPEG_AVCODEC libavcodec)
pkg_check_modules(_FFMPEG_AVFORMAT libavformat)
pkg_check_modules(_FFMPEG_AVUTIL libavutil)
endif (PKG_CONFIG_FOUND)
find_path(FFMPEG_AVCODEC_INCLUDE_DIR
NAMES libavcodec/avcodec.h
PATHS ${_FFMPEG_AVCODEC_INCLUDE_DIRS} /usr/include /usr/local/include /opt/local/include /sw/include
PATH_SUFFIXES ffmpeg libav
)
find_library(FFMPEG_LIBAVCODEC
NAMES avcodec
PATHS ${_FFMPEG_AVCODEC_LIBRARY_DIRS} /usr/lib /usr/local/lib /opt/local/lib /sw/lib
)
find_library(FFMPEG_LIBAVFORMAT
NAMES avformat
PATHS ${_FFMPEG_AVFORMAT_LIBRARY_DIRS} /usr/lib /usr/local/lib /opt/local/lib /sw/lib
)
find_library(FFMPEG_LIBAVUTIL
NAMES avutil
PATHS ${_FFMPEG_AVUTIL_LIBRARY_DIRS} /usr/lib /usr/local/lib /opt/local/lib /sw/lib
)
if (FFMPEG_LIBAVCODEC AND FFMPEG_LIBAVFORMAT)
set(FFMPEG_FOUND TRUE)
endif()
if (FFMPEG_FOUND)
set(FFMPEG_INCLUDE_DIR ${FFMPEG_AVCODEC_INCLUDE_DIR})
set(FFMPEG_LIBRARIES
${FFMPEG_LIBAVCODEC}
${FFMPEG_LIBAVFORMAT}
${FFMPEG_LIBAVUTIL}
)
endif (FFMPEG_FOUND)
if (FFMPEG_FOUND)
if (NOT FFMPEG_FIND_QUIETLY)
message(STATUS "Found FFMPEG or Libav: ${FFMPEG_LIBRARIES}, ${FFMPEG_INCLUDE_DIR}")
endif (NOT FFMPEG_FIND_QUIETLY)
else (FFMPEG_FOUND)
if (FFMPEG_FIND_REQUIRED)
message(FATAL_ERROR "Could not find libavcodec or libavformat or libavutil")
endif (FFMPEG_FIND_REQUIRED)
endif (FFMPEG_FOUND)
endif (FFMPEG_LIBRARIES AND FFMPEG_INCLUDE_DIR)

View File

@@ -1,100 +0,0 @@
#-------------------------------------------------------------------------------
# Copyright (c) 2013-2013, Lars Baehren <lbaehren@gmail.com>
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without modification,
# are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#-------------------------------------------------------------------------------
# - Check for the presence of LIBMAGIC
#
# The following variables are set when LIBMAGIC is found:
# LIBMAGIC_FOUND = Set to true, if all components of LIBMAGIC have been
# found.
# LIBMAGIC_INCLUDES = Include path for the header files of LIBMAGIC
# LIBMAGIC_LIBRARIES = Link these to use LIBMAGIC
# LIBMAGIC_LFLAGS = Linker flags (optional)
if (NOT LIBMAGIC_FOUND)
if (NOT LIBMAGIC_ROOT_DIR)
set (LIBMAGIC_ROOT_DIR ${CMAKE_INSTALL_PREFIX})
endif (NOT LIBMAGIC_ROOT_DIR)
##____________________________________________________________________________
## Check for the header files
find_path (LIBMAGIC_FILE_H
NAMES file/file.h
HINTS ${LIBMAGIC_ROOT_DIR} ${CMAKE_INSTALL_PREFIX}
PATH_SUFFIXES include
)
if (LIBMAGIC_FILE_H)
list (APPEND LIBMAGIC_INCLUDES ${LIBMAGIC_FILE_H})
endif (LIBMAGIC_FILE_H)
find_path (LIBMAGIC_MAGIC_H
NAMES magic.h
HINTS ${LIBMAGIC_ROOT_DIR} ${CMAKE_INSTALL_PREFIX}
PATH_SUFFIXES include include/linux
)
if (LIBMAGIC_MAGIC_H)
list (APPEND LIBMAGIC_INCLUDES ${LIBMAGIC_MAGIC_H})
endif (LIBMAGIC_MAGIC_H)
list (REMOVE_DUPLICATES LIBMAGIC_INCLUDES)
##____________________________________________________________________________
## Check for the library
find_library (LIBMAGIC_LIBRARIES magic
HINTS ${LIBMAGIC_ROOT_DIR} ${CMAKE_INSTALL_PREFIX}
PATH_SUFFIXES lib
)
##____________________________________________________________________________
## Actions taken when all components have been found
#find_package_handle_standard_args (LIBMAGIC DEFAULT_MSG LIBMAGIC_LIBRARIES LIBMAGIC_INCLUDES)
if (LIBMAGIC_FOUND)
if (NOT LIBMAGIC_FIND_QUIETLY)
message (STATUS "Found components for LIBMAGIC")
message (STATUS "LIBMAGIC_ROOT_DIR = ${LIBMAGIC_ROOT_DIR}")
message (STATUS "LIBMAGIC_INCLUDES = ${LIBMAGIC_INCLUDES}")
message (STATUS "LIBMAGIC_LIBRARIES = ${LIBMAGIC_LIBRARIES}")
endif (NOT LIBMAGIC_FIND_QUIETLY)
else (LIBMAGIC_FOUND)
if (LIBMAGIC_FIND_REQUIRED)
message (FATAL_ERROR "Could not find LIBMAGIC!")
endif (LIBMAGIC_FIND_REQUIRED)
endif (LIBMAGIC_FOUND)
##____________________________________________________________________________
## Mark advanced variables
mark_as_advanced (
LIBMAGIC_ROOT_DIR
LIBMAGIC_INCLUDES
LIBMAGIC_LIBRARIES
)
endif (NOT LIBMAGIC_FOUND)

View File

@@ -1,478 +0,0 @@
# Distributed under the OSI-approved BSD 3-Clause License. See accompanying
# file Copyright.txt or https://cmake.org/licensing for details.
macro(_OpenSSL_test_and_find_dependencies ssl_library crypto_library)
if((CMAKE_SYSTEM_NAME STREQUAL "Linux") AND
(("${ssl_library}" MATCHES "\\${CMAKE_STATIC_LIBRARY_SUFFIX}$") OR
("${crypto_library}" MATCHES "\\${CMAKE_STATIC_LIBRARY_SUFFIX}$")))
set(_OpenSSL_has_dependencies TRUE)
find_package(Threads)
else()
set(_OpenSSL_has_dependencies FALSE)
endif()
endmacro()
function(_OpenSSL_add_dependencies libraries_var library)
if(CMAKE_THREAD_LIBS_INIT)
list(APPEND ${libraries_var} ${CMAKE_THREAD_LIBS_INIT})
endif()
list(APPEND ${libraries_var} ${CMAKE_DL_LIBS})
set(${libraries_var} ${${libraries_var}} PARENT_SCOPE)
endfunction()
function(_OpenSSL_target_add_dependencies target)
if(_OpenSSL_has_dependencies)
set_property( TARGET ${target} APPEND PROPERTY INTERFACE_LINK_LIBRARIES Threads::Threads )
set_property( TARGET ${target} APPEND PROPERTY INTERFACE_LINK_LIBRARIES ${CMAKE_DL_LIBS} )
endif()
endfunction()
if (UNIX)
find_package(PkgConfig QUIET)
pkg_check_modules(_OPENSSL QUIET openssl)
endif ()
# Support preference of static libs by adjusting CMAKE_FIND_LIBRARY_SUFFIXES
if(OPENSSL_USE_STATIC_LIBS)
set(_openssl_ORIG_CMAKE_FIND_LIBRARY_SUFFIXES ${CMAKE_FIND_LIBRARY_SUFFIXES})
if(WIN32)
set(CMAKE_FIND_LIBRARY_SUFFIXES .lib .a ${CMAKE_FIND_LIBRARY_SUFFIXES})
else()
set(CMAKE_FIND_LIBRARY_SUFFIXES .a )
endif()
endif()
if (WIN32)
# http://www.slproweb.com/products/Win32OpenSSL.html
set(_OPENSSL_ROOT_HINTS
${OPENSSL_ROOT_DIR}
"[HKEY_LOCAL_MACHINE\\SOFTWARE\\Microsoft\\Windows\\CurrentVersion\\Uninstall\\OpenSSL (32-bit)_is1;Inno Setup: App Path]"
"[HKEY_LOCAL_MACHINE\\SOFTWARE\\Microsoft\\Windows\\CurrentVersion\\Uninstall\\OpenSSL (64-bit)_is1;Inno Setup: App Path]"
ENV OPENSSL_ROOT_DIR
)
file(TO_CMAKE_PATH "$ENV{PROGRAMFILES}" _programfiles)
set(_OPENSSL_ROOT_PATHS
"${_programfiles}/OpenSSL"
"${_programfiles}/OpenSSL-Win32"
"${_programfiles}/OpenSSL-Win64"
"C:/OpenSSL/"
"C:/OpenSSL-Win32/"
"C:/OpenSSL-Win64/"
)
unset(_programfiles)
else ()
set(_OPENSSL_ROOT_HINTS
${OPENSSL_ROOT_DIR}
ENV OPENSSL_ROOT_DIR
)
endif ()
set(_OPENSSL_ROOT_HINTS_AND_PATHS
HINTS ${_OPENSSL_ROOT_HINTS}
PATHS ${_OPENSSL_ROOT_PATHS}
)
find_path(OPENSSL_INCLUDE_DIR
NAMES
openssl/ssl.h
${_OPENSSL_ROOT_HINTS_AND_PATHS}
HINTS
${_OPENSSL_INCLUDEDIR}
PATH_SUFFIXES
include
)
if(WIN32 AND NOT CYGWIN)
if(MSVC)
# /MD and /MDd are the standard values - if someone wants to use
# others, the libnames have to change here too
# use also ssl and ssleay32 in debug as fallback for openssl < 0.9.8b
# enable OPENSSL_MSVC_STATIC_RT to get the libs build /MT (Multithreaded no-DLL)
# In Visual C++ naming convention each of these four kinds of Windows libraries has it's standard suffix:
# * MD for dynamic-release
# * MDd for dynamic-debug
# * MT for static-release
# * MTd for static-debug
# Implementation details:
# We are using the libraries located in the VC subdir instead of the parent directory even though :
# libeay32MD.lib is identical to ../libeay32.lib, and
# ssleay32MD.lib is identical to ../ssleay32.lib
# enable OPENSSL_USE_STATIC_LIBS to use the static libs located in lib/VC/static
if (OPENSSL_MSVC_STATIC_RT)
set(_OPENSSL_MSVC_RT_MODE "MT")
else ()
set(_OPENSSL_MSVC_RT_MODE "MD")
endif ()
# Since OpenSSL 1.1, lib names are like libcrypto32MTd.lib and libssl32MTd.lib
if( "${CMAKE_SIZEOF_VOID_P}" STREQUAL "8" )
set(_OPENSSL_MSVC_ARCH_SUFFIX "64")
else()
set(_OPENSSL_MSVC_ARCH_SUFFIX "32")
endif()
if(OPENSSL_USE_STATIC_LIBS)
set(_OPENSSL_PATH_SUFFIXES
"lib/VC/static"
"VC/static"
"lib"
)
else()
set(_OPENSSL_PATH_SUFFIXES
"lib/VC"
"VC"
"lib"
)
endif ()
find_library(LIB_EAY_DEBUG
NAMES
libcrypto${_OPENSSL_MSVC_ARCH_SUFFIX}${_OPENSSL_MSVC_RT_MODE}d
libcrypto${_OPENSSL_MSVC_RT_MODE}d
libcryptod
libeay32${_OPENSSL_MSVC_RT_MODE}d
libeay32d
cryptod
NAMES_PER_DIR
${_OPENSSL_ROOT_HINTS_AND_PATHS}
PATH_SUFFIXES
${_OPENSSL_PATH_SUFFIXES}
)
find_library(LIB_EAY_RELEASE
NAMES
libcrypto${_OPENSSL_MSVC_ARCH_SUFFIX}${_OPENSSL_MSVC_RT_MODE}
libcrypto${_OPENSSL_MSVC_RT_MODE}
libcrypto
libeay32${_OPENSSL_MSVC_RT_MODE}
libeay32
crypto
NAMES_PER_DIR
${_OPENSSL_ROOT_HINTS_AND_PATHS}
PATH_SUFFIXES
${_OPENSSL_PATH_SUFFIXES}
)
find_library(SSL_EAY_DEBUG
NAMES
libssl${_OPENSSL_MSVC_ARCH_SUFFIX}${_OPENSSL_MSVC_RT_MODE}d
libssl${_OPENSSL_MSVC_RT_MODE}d
libssld
ssleay32${_OPENSSL_MSVC_RT_MODE}d
ssleay32d
ssld
NAMES_PER_DIR
${_OPENSSL_ROOT_HINTS_AND_PATHS}
PATH_SUFFIXES
${_OPENSSL_PATH_SUFFIXES}
)
find_library(SSL_EAY_RELEASE
NAMES
libssl${_OPENSSL_MSVC_ARCH_SUFFIX}${_OPENSSL_MSVC_RT_MODE}
libssl${_OPENSSL_MSVC_RT_MODE}
libssl
ssleay32${_OPENSSL_MSVC_RT_MODE}
ssleay32
ssl
NAMES_PER_DIR
${_OPENSSL_ROOT_HINTS_AND_PATHS}
PATH_SUFFIXES
${_OPENSSL_PATH_SUFFIXES}
)
set(LIB_EAY_LIBRARY_DEBUG "${LIB_EAY_DEBUG}")
set(LIB_EAY_LIBRARY_RELEASE "${LIB_EAY_RELEASE}")
set(SSL_EAY_LIBRARY_DEBUG "${SSL_EAY_DEBUG}")
set(SSL_EAY_LIBRARY_RELEASE "${SSL_EAY_RELEASE}")
include(${CMAKE_CURRENT_LIST_DIR}/SelectLibraryConfigurations.cmake)
select_library_configurations(LIB_EAY)
select_library_configurations(SSL_EAY)
mark_as_advanced(LIB_EAY_LIBRARY_DEBUG LIB_EAY_LIBRARY_RELEASE
SSL_EAY_LIBRARY_DEBUG SSL_EAY_LIBRARY_RELEASE)
set(OPENSSL_SSL_LIBRARY ${SSL_EAY_LIBRARY} )
set(OPENSSL_CRYPTO_LIBRARY ${LIB_EAY_LIBRARY} )
elseif(MINGW)
# same player, for MinGW
set(LIB_EAY_NAMES crypto libeay32)
set(SSL_EAY_NAMES ssl ssleay32)
find_library(LIB_EAY
NAMES
${LIB_EAY_NAMES}
NAMES_PER_DIR
${_OPENSSL_ROOT_HINTS_AND_PATHS}
PATH_SUFFIXES
"lib/MinGW"
"lib"
)
find_library(SSL_EAY
NAMES
${SSL_EAY_NAMES}
NAMES_PER_DIR
${_OPENSSL_ROOT_HINTS_AND_PATHS}
PATH_SUFFIXES
"lib/MinGW"
"lib"
)
mark_as_advanced(SSL_EAY LIB_EAY)
set(OPENSSL_SSL_LIBRARY ${SSL_EAY} )
set(OPENSSL_CRYPTO_LIBRARY ${LIB_EAY} )
unset(LIB_EAY_NAMES)
unset(SSL_EAY_NAMES)
else()
# Not sure what to pick for -say- intel, let's use the toplevel ones and hope someone report issues:
find_library(LIB_EAY
NAMES
libcrypto
libeay32
NAMES_PER_DIR
${_OPENSSL_ROOT_HINTS_AND_PATHS}
HINTS
${_OPENSSL_LIBDIR}
PATH_SUFFIXES
lib
)
find_library(SSL_EAY
NAMES
libssl
ssleay32
NAMES_PER_DIR
${_OPENSSL_ROOT_HINTS_AND_PATHS}
HINTS
${_OPENSSL_LIBDIR}
PATH_SUFFIXES
lib
)
mark_as_advanced(SSL_EAY LIB_EAY)
set(OPENSSL_SSL_LIBRARY ${SSL_EAY} )
set(OPENSSL_CRYPTO_LIBRARY ${LIB_EAY} )
endif()
else()
find_library(OPENSSL_SSL_LIBRARY
NAMES
ssl
ssleay32
ssleay32MD
NAMES_PER_DIR
${_OPENSSL_ROOT_HINTS_AND_PATHS}
HINTS
${_OPENSSL_LIBDIR}
PATH_SUFFIXES
lib
)
find_library(OPENSSL_CRYPTO_LIBRARY
NAMES
crypto
NAMES_PER_DIR
${_OPENSSL_ROOT_HINTS_AND_PATHS}
HINTS
${_OPENSSL_LIBDIR}
PATH_SUFFIXES
lib
)
mark_as_advanced(OPENSSL_CRYPTO_LIBRARY OPENSSL_SSL_LIBRARY)
endif()
# compat defines
set(OPENSSL_SSL_LIBRARIES ${OPENSSL_SSL_LIBRARY})
set(OPENSSL_CRYPTO_LIBRARIES ${OPENSSL_CRYPTO_LIBRARY})
_OpenSSL_test_and_find_dependencies("${OPENSSL_SSL_LIBRARY}" "${OPENSSL_CRYPTO_LIBRARY}")
if(_OpenSSL_has_dependencies)
_OpenSSL_add_dependencies( OPENSSL_SSL_LIBRARIES "${OPENSSL_SSL_LIBRARY}" )
_OpenSSL_add_dependencies( OPENSSL_CRYPTO_LIBRARIES "${OPENSSL_CRYPTO_LIBRARY}" )
endif()
function(from_hex HEX DEC)
string(TOUPPER "${HEX}" HEX)
set(_res 0)
string(LENGTH "${HEX}" _strlen)
while (_strlen GREATER 0)
math(EXPR _res "${_res} * 16")
string(SUBSTRING "${HEX}" 0 1 NIBBLE)
string(SUBSTRING "${HEX}" 1 -1 HEX)
if (NIBBLE STREQUAL "A")
math(EXPR _res "${_res} + 10")
elseif (NIBBLE STREQUAL "B")
math(EXPR _res "${_res} + 11")
elseif (NIBBLE STREQUAL "C")
math(EXPR _res "${_res} + 12")
elseif (NIBBLE STREQUAL "D")
math(EXPR _res "${_res} + 13")
elseif (NIBBLE STREQUAL "E")
math(EXPR _res "${_res} + 14")
elseif (NIBBLE STREQUAL "F")
math(EXPR _res "${_res} + 15")
else()
math(EXPR _res "${_res} + ${NIBBLE}")
endif()
string(LENGTH "${HEX}" _strlen)
endwhile()
set(${DEC} ${_res} PARENT_SCOPE)
endfunction()
if(OPENSSL_INCLUDE_DIR AND EXISTS "${OPENSSL_INCLUDE_DIR}/openssl/opensslv.h")
file(STRINGS "${OPENSSL_INCLUDE_DIR}/openssl/opensslv.h" openssl_version_str
REGEX "^#[\t ]*define[\t ]+OPENSSL_VERSION_NUMBER[\t ]+0x([0-9a-fA-F])+.*")
if(openssl_version_str)
# The version number is encoded as 0xMNNFFPPS: major minor fix patch status
# The status gives if this is a developer or prerelease and is ignored here.
# Major, minor, and fix directly translate into the version numbers shown in
# the string. The patch field translates to the single character suffix that
# indicates the bug fix state, which 00 -> nothing, 01 -> a, 02 -> b and so
# on.
string(REGEX REPLACE "^.*OPENSSL_VERSION_NUMBER[\t ]+0x([0-9a-fA-F])([0-9a-fA-F][0-9a-fA-F])([0-9a-fA-F][0-9a-fA-F])([0-9a-fA-F][0-9a-fA-F])([0-9a-fA-F]).*$"
"\\1;\\2;\\3;\\4;\\5" OPENSSL_VERSION_LIST "${openssl_version_str}")
list(GET OPENSSL_VERSION_LIST 0 OPENSSL_VERSION_MAJOR)
list(GET OPENSSL_VERSION_LIST 1 OPENSSL_VERSION_MINOR)
from_hex("${OPENSSL_VERSION_MINOR}" OPENSSL_VERSION_MINOR)
list(GET OPENSSL_VERSION_LIST 2 OPENSSL_VERSION_FIX)
from_hex("${OPENSSL_VERSION_FIX}" OPENSSL_VERSION_FIX)
list(GET OPENSSL_VERSION_LIST 3 OPENSSL_VERSION_PATCH)
if (NOT OPENSSL_VERSION_PATCH STREQUAL "00")
from_hex("${OPENSSL_VERSION_PATCH}" _tmp)
# 96 is the ASCII code of 'a' minus 1
math(EXPR OPENSSL_VERSION_PATCH_ASCII "${_tmp} + 96")
unset(_tmp)
# Once anyone knows how OpenSSL would call the patch versions beyond 'z'
# this should be updated to handle that, too. This has not happened yet
# so it is simply ignored here for now.
string(ASCII "${OPENSSL_VERSION_PATCH_ASCII}" OPENSSL_VERSION_PATCH_STRING)
endif ()
set(OPENSSL_VERSION "${OPENSSL_VERSION_MAJOR}.${OPENSSL_VERSION_MINOR}.${OPENSSL_VERSION_FIX}${OPENSSL_VERSION_PATCH_STRING}")
endif ()
endif ()
set(OPENSSL_LIBRARIES ${OPENSSL_SSL_LIBRARIES} ${OPENSSL_CRYPTO_LIBRARIES} )
list(REMOVE_DUPLICATES OPENSSL_LIBRARIES)
foreach(_comp IN LISTS OpenSSL_FIND_COMPONENTS)
if(_comp STREQUAL "Crypto")
if(EXISTS "${OPENSSL_INCLUDE_DIR}" AND
(EXISTS "${OPENSSL_CRYPTO_LIBRARY}" OR
EXISTS "${LIB_EAY_LIBRARY_DEBUG}" OR
EXISTS "${LIB_EAY_LIBRARY_RELEASE}")
)
set(OpenSSL_${_comp}_FOUND TRUE)
else()
set(OpenSSL_${_comp}_FOUND FALSE)
endif()
elseif(_comp STREQUAL "SSL")
if(EXISTS "${OPENSSL_INCLUDE_DIR}" AND
(EXISTS "${OPENSSL_SSL_LIBRARY}" OR
EXISTS "${SSL_EAY_LIBRARY_DEBUG}" OR
EXISTS "${SSL_EAY_LIBRARY_RELEASE}")
)
set(OpenSSL_${_comp}_FOUND TRUE)
else()
set(OpenSSL_${_comp}_FOUND FALSE)
endif()
else()
message(WARNING "${_comp} is not a valid OpenSSL component")
set(OpenSSL_${_comp}_FOUND FALSE)
endif()
endforeach()
unset(_comp)
include(${CMAKE_CURRENT_LIST_DIR}/FindPackageHandleStandardArgs.cmake)
find_package_handle_standard_args(OpenSSL
REQUIRED_VARS
OPENSSL_CRYPTO_LIBRARY
OPENSSL_INCLUDE_DIR
VERSION_VAR
OPENSSL_VERSION
HANDLE_COMPONENTS
FAIL_MESSAGE
"Could NOT find OpenSSL, try to set the path to OpenSSL root folder in the system variable OPENSSL_ROOT_DIR"
)
mark_as_advanced(OPENSSL_INCLUDE_DIR OPENSSL_LIBRARIES)
if(OPENSSL_FOUND)
if(NOT TARGET OpenSSL::Crypto AND
(EXISTS "${OPENSSL_CRYPTO_LIBRARY}" OR
EXISTS "${LIB_EAY_LIBRARY_DEBUG}" OR
EXISTS "${LIB_EAY_LIBRARY_RELEASE}")
)
add_library(OpenSSL::Crypto UNKNOWN IMPORTED)
set_target_properties(OpenSSL::Crypto PROPERTIES
INTERFACE_INCLUDE_DIRECTORIES "${OPENSSL_INCLUDE_DIR}")
if(EXISTS "${OPENSSL_CRYPTO_LIBRARY}")
set_target_properties(OpenSSL::Crypto PROPERTIES
IMPORTED_LINK_INTERFACE_LANGUAGES "C"
IMPORTED_LOCATION "${OPENSSL_CRYPTO_LIBRARY}")
endif()
if(EXISTS "${LIB_EAY_LIBRARY_RELEASE}")
set_property(TARGET OpenSSL::Crypto APPEND PROPERTY
IMPORTED_CONFIGURATIONS RELEASE)
set_target_properties(OpenSSL::Crypto PROPERTIES
IMPORTED_LINK_INTERFACE_LANGUAGES_RELEASE "C"
IMPORTED_LOCATION_RELEASE "${LIB_EAY_LIBRARY_RELEASE}")
endif()
if(EXISTS "${LIB_EAY_LIBRARY_DEBUG}")
set_property(TARGET OpenSSL::Crypto APPEND PROPERTY
IMPORTED_CONFIGURATIONS DEBUG)
set_target_properties(OpenSSL::Crypto PROPERTIES
IMPORTED_LINK_INTERFACE_LANGUAGES_DEBUG "C"
IMPORTED_LOCATION_DEBUG "${LIB_EAY_LIBRARY_DEBUG}")
endif()
_OpenSSL_target_add_dependencies(OpenSSL::Crypto)
endif()
if(NOT TARGET OpenSSL::SSL AND
(EXISTS "${OPENSSL_SSL_LIBRARY}" OR
EXISTS "${SSL_EAY_LIBRARY_DEBUG}" OR
EXISTS "${SSL_EAY_LIBRARY_RELEASE}")
)
add_library(OpenSSL::SSL UNKNOWN IMPORTED)
set_target_properties(OpenSSL::SSL PROPERTIES
INTERFACE_INCLUDE_DIRECTORIES "${OPENSSL_INCLUDE_DIR}")
if(EXISTS "${OPENSSL_SSL_LIBRARY}")
set_target_properties(OpenSSL::SSL PROPERTIES
IMPORTED_LINK_INTERFACE_LANGUAGES "C"
IMPORTED_LOCATION "${OPENSSL_SSL_LIBRARY}")
endif()
if(EXISTS "${SSL_EAY_LIBRARY_RELEASE}")
set_property(TARGET OpenSSL::SSL APPEND PROPERTY
IMPORTED_CONFIGURATIONS RELEASE)
set_target_properties(OpenSSL::SSL PROPERTIES
IMPORTED_LINK_INTERFACE_LANGUAGES_RELEASE "C"
IMPORTED_LOCATION_RELEASE "${SSL_EAY_LIBRARY_RELEASE}")
endif()
if(EXISTS "${SSL_EAY_LIBRARY_DEBUG}")
set_property(TARGET OpenSSL::SSL APPEND PROPERTY
IMPORTED_CONFIGURATIONS DEBUG)
set_target_properties(OpenSSL::SSL PROPERTIES
IMPORTED_LINK_INTERFACE_LANGUAGES_DEBUG "C"
IMPORTED_LOCATION_DEBUG "${SSL_EAY_LIBRARY_DEBUG}")
endif()
if(TARGET OpenSSL::Crypto)
set_target_properties(OpenSSL::SSL PROPERTIES
INTERFACE_LINK_LIBRARIES OpenSSL::Crypto)
endif()
_OpenSSL_target_add_dependencies(OpenSSL::SSL)
endif()
endif()
# Restore the original find library ordering
if(OPENSSL_USE_STATIC_LIBS)
set(CMAKE_FIND_LIBRARY_SUFFIXES ${_openssl_ORIG_CMAKE_FIND_LIBRARY_SUFFIXES})
endif()

View File

@@ -1,268 +0,0 @@
# Distributed under the OSI-approved BSD 3-Clause License. See accompanying
# file Copyright.txt or https://cmake.org/licensing for details.
include(${CMAKE_CURRENT_LIST_DIR}/FindPackageMessage.cmake)
# internal helper macro
macro(_FPHSA_FAILURE_MESSAGE _msg)
set (__msg "${_msg}")
if (FPHSA_REASON_FAILURE_MESSAGE)
string(APPEND __msg "\n Reason given by package: ${FPHSA_REASON_FAILURE_MESSAGE}\n")
endif()
if (${_NAME}_FIND_REQUIRED)
message(FATAL_ERROR "${__msg}")
else ()
if (NOT ${_NAME}_FIND_QUIETLY)
message(STATUS "${__msg}")
endif ()
endif ()
endmacro()
# internal helper macro to generate the failure message when used in CONFIG_MODE:
macro(_FPHSA_HANDLE_FAILURE_CONFIG_MODE)
# <PackageName>_CONFIG is set, but FOUND is false, this means that some other of the REQUIRED_VARS was not found:
if(${_NAME}_CONFIG)
_FPHSA_FAILURE_MESSAGE("${FPHSA_FAIL_MESSAGE}: missing:${MISSING_VARS} (found ${${_NAME}_CONFIG} ${VERSION_MSG})")
else()
# If _CONSIDERED_CONFIGS is set, the config-file has been found, but no suitable version.
# List them all in the error message:
if(${_NAME}_CONSIDERED_CONFIGS)
set(configsText "")
list(LENGTH ${_NAME}_CONSIDERED_CONFIGS configsCount)
math(EXPR configsCount "${configsCount} - 1")
foreach(currentConfigIndex RANGE ${configsCount})
list(GET ${_NAME}_CONSIDERED_CONFIGS ${currentConfigIndex} filename)
list(GET ${_NAME}_CONSIDERED_VERSIONS ${currentConfigIndex} version)
string(APPEND configsText "\n ${filename} (version ${version})")
endforeach()
if (${_NAME}_NOT_FOUND_MESSAGE)
if (FPHSA_REASON_FAILURE_MESSAGE)
string(PREPEND FPHSA_REASON_FAILURE_MESSAGE "${${_NAME}_NOT_FOUND_MESSAGE}\n ")
else()
set(FPHSA_REASON_FAILURE_MESSAGE "${${_NAME}_NOT_FOUND_MESSAGE}")
endif()
else()
string(APPEND configsText "\n")
endif()
_FPHSA_FAILURE_MESSAGE("${FPHSA_FAIL_MESSAGE} ${VERSION_MSG}, checked the following files:${configsText}")
else()
# Simple case: No Config-file was found at all:
_FPHSA_FAILURE_MESSAGE("${FPHSA_FAIL_MESSAGE}: found neither ${_NAME}Config.cmake nor ${_NAME_LOWER}-config.cmake ${VERSION_MSG}")
endif()
endif()
endmacro()
function(FIND_PACKAGE_HANDLE_STANDARD_ARGS _NAME _FIRST_ARG)
# Set up the arguments for `cmake_parse_arguments`.
set(options CONFIG_MODE HANDLE_COMPONENTS)
set(oneValueArgs FAIL_MESSAGE REASON_FAILURE_MESSAGE VERSION_VAR FOUND_VAR)
set(multiValueArgs REQUIRED_VARS)
# Check whether we are in 'simple' or 'extended' mode:
set(_KEYWORDS_FOR_EXTENDED_MODE ${options} ${oneValueArgs} ${multiValueArgs} )
list(FIND _KEYWORDS_FOR_EXTENDED_MODE "${_FIRST_ARG}" INDEX)
if(${INDEX} EQUAL -1)
set(FPHSA_FAIL_MESSAGE ${_FIRST_ARG})
set(FPHSA_REQUIRED_VARS ${ARGN})
set(FPHSA_VERSION_VAR)
else()
cmake_parse_arguments(FPHSA "${options}" "${oneValueArgs}" "${multiValueArgs}" ${_FIRST_ARG} ${ARGN})
if(FPHSA_UNPARSED_ARGUMENTS)
message(FATAL_ERROR "Unknown keywords given to FIND_PACKAGE_HANDLE_STANDARD_ARGS(): \"${FPHSA_UNPARSED_ARGUMENTS}\"")
endif()
if(NOT FPHSA_FAIL_MESSAGE)
set(FPHSA_FAIL_MESSAGE "DEFAULT_MSG")
endif()
# In config-mode, we rely on the variable <PackageName>_CONFIG, which is set by find_package()
# when it successfully found the config-file, including version checking:
if(FPHSA_CONFIG_MODE)
list(INSERT FPHSA_REQUIRED_VARS 0 ${_NAME}_CONFIG)
list(REMOVE_DUPLICATES FPHSA_REQUIRED_VARS)
set(FPHSA_VERSION_VAR ${_NAME}_VERSION)
endif()
if(NOT FPHSA_REQUIRED_VARS)
message(FATAL_ERROR "No REQUIRED_VARS specified for FIND_PACKAGE_HANDLE_STANDARD_ARGS()")
endif()
endif()
# now that we collected all arguments, process them
if("x${FPHSA_FAIL_MESSAGE}" STREQUAL "xDEFAULT_MSG")
set(FPHSA_FAIL_MESSAGE "Could NOT find ${_NAME}")
endif()
list(GET FPHSA_REQUIRED_VARS 0 _FIRST_REQUIRED_VAR)
string(TOUPPER ${_NAME} _NAME_UPPER)
string(TOLOWER ${_NAME} _NAME_LOWER)
if(FPHSA_FOUND_VAR)
if(FPHSA_FOUND_VAR MATCHES "^${_NAME}_FOUND$" OR FPHSA_FOUND_VAR MATCHES "^${_NAME_UPPER}_FOUND$")
set(_FOUND_VAR ${FPHSA_FOUND_VAR})
else()
message(FATAL_ERROR "The argument for FOUND_VAR is \"${FPHSA_FOUND_VAR}\", but only \"${_NAME}_FOUND\" and \"${_NAME_UPPER}_FOUND\" are valid names.")
endif()
else()
set(_FOUND_VAR ${_NAME_UPPER}_FOUND)
endif()
# collect all variables which were not found, so they can be printed, so the
# user knows better what went wrong (#6375)
set(MISSING_VARS "")
set(DETAILS "")
# check if all passed variables are valid
set(FPHSA_FOUND_${_NAME} TRUE)
foreach(_CURRENT_VAR ${FPHSA_REQUIRED_VARS})
if(NOT ${_CURRENT_VAR})
set(FPHSA_FOUND_${_NAME} FALSE)
string(APPEND MISSING_VARS " ${_CURRENT_VAR}")
else()
string(APPEND DETAILS "[${${_CURRENT_VAR}}]")
endif()
endforeach()
if(FPHSA_FOUND_${_NAME})
set(${_NAME}_FOUND TRUE)
set(${_NAME_UPPER}_FOUND TRUE)
else()
set(${_NAME}_FOUND FALSE)
set(${_NAME_UPPER}_FOUND FALSE)
endif()
# component handling
unset(FOUND_COMPONENTS_MSG)
unset(MISSING_COMPONENTS_MSG)
if(FPHSA_HANDLE_COMPONENTS)
foreach(comp ${${_NAME}_FIND_COMPONENTS})
if(${_NAME}_${comp}_FOUND)
if(NOT DEFINED FOUND_COMPONENTS_MSG)
set(FOUND_COMPONENTS_MSG "found components:")
endif()
string(APPEND FOUND_COMPONENTS_MSG " ${comp}")
else()
if(NOT DEFINED MISSING_COMPONENTS_MSG)
set(MISSING_COMPONENTS_MSG "missing components:")
endif()
string(APPEND MISSING_COMPONENTS_MSG " ${comp}")
if(${_NAME}_FIND_REQUIRED_${comp})
set(${_NAME}_FOUND FALSE)
string(APPEND MISSING_VARS " ${comp}")
endif()
endif()
endforeach()
set(COMPONENT_MSG "${FOUND_COMPONENTS_MSG} ${MISSING_COMPONENTS_MSG}")
string(APPEND DETAILS "[c${COMPONENT_MSG}]")
endif()
# version handling:
set(VERSION_MSG "")
set(VERSION_OK TRUE)
# check with DEFINED here as the requested or found version may be "0"
if (DEFINED ${_NAME}_FIND_VERSION)
if(DEFINED ${FPHSA_VERSION_VAR})
set(_FOUND_VERSION ${${FPHSA_VERSION_VAR}})
if(${_NAME}_FIND_VERSION_EXACT) # exact version required
# count the dots in the version string
string(REGEX REPLACE "[^.]" "" _VERSION_DOTS "${_FOUND_VERSION}")
# add one dot because there is one dot more than there are components
string(LENGTH "${_VERSION_DOTS}." _VERSION_DOTS)
if (_VERSION_DOTS GREATER ${_NAME}_FIND_VERSION_COUNT)
# Because of the C++ implementation of find_package() ${_NAME}_FIND_VERSION_COUNT
# is at most 4 here. Therefore a simple lookup table is used.
if (${_NAME}_FIND_VERSION_COUNT EQUAL 1)
set(_VERSION_REGEX "[^.]*")
elseif (${_NAME}_FIND_VERSION_COUNT EQUAL 2)
set(_VERSION_REGEX "[^.]*\\.[^.]*")
elseif (${_NAME}_FIND_VERSION_COUNT EQUAL 3)
set(_VERSION_REGEX "[^.]*\\.[^.]*\\.[^.]*")
else ()
set(_VERSION_REGEX "[^.]*\\.[^.]*\\.[^.]*\\.[^.]*")
endif ()
string(REGEX REPLACE "^(${_VERSION_REGEX})\\..*" "\\1" _VERSION_HEAD "${_FOUND_VERSION}")
unset(_VERSION_REGEX)
if (NOT ${_NAME}_FIND_VERSION VERSION_EQUAL _VERSION_HEAD)
set(VERSION_MSG "Found unsuitable version \"${_FOUND_VERSION}\", but required is exact version \"${${_NAME}_FIND_VERSION}\"")
set(VERSION_OK FALSE)
else ()
set(VERSION_MSG "(found suitable exact version \"${_FOUND_VERSION}\")")
endif ()
unset(_VERSION_HEAD)
else ()
if (NOT ${_NAME}_FIND_VERSION VERSION_EQUAL _FOUND_VERSION)
set(VERSION_MSG "Found unsuitable version \"${_FOUND_VERSION}\", but required is exact version \"${${_NAME}_FIND_VERSION}\"")
set(VERSION_OK FALSE)
else ()
set(VERSION_MSG "(found suitable exact version \"${_FOUND_VERSION}\")")
endif ()
endif ()
unset(_VERSION_DOTS)
else() # minimum version specified:
if (${_NAME}_FIND_VERSION VERSION_GREATER _FOUND_VERSION)
set(VERSION_MSG "Found unsuitable version \"${_FOUND_VERSION}\", but required is at least \"${${_NAME}_FIND_VERSION}\"")
set(VERSION_OK FALSE)
else ()
set(VERSION_MSG "(found suitable version \"${_FOUND_VERSION}\", minimum required is \"${${_NAME}_FIND_VERSION}\")")
endif ()
endif()
else()
# if the package was not found, but a version was given, add that to the output:
if(${_NAME}_FIND_VERSION_EXACT)
set(VERSION_MSG "(Required is exact version \"${${_NAME}_FIND_VERSION}\")")
else()
set(VERSION_MSG "(Required is at least version \"${${_NAME}_FIND_VERSION}\")")
endif()
endif()
else ()
# Check with DEFINED as the found version may be 0.
if(DEFINED ${FPHSA_VERSION_VAR})
set(VERSION_MSG "(found version \"${${FPHSA_VERSION_VAR}}\")")
endif()
endif ()
if(VERSION_OK)
string(APPEND DETAILS "[v${${FPHSA_VERSION_VAR}}(${${_NAME}_FIND_VERSION})]")
else()
set(${_NAME}_FOUND FALSE)
endif()
# print the result:
if (${_NAME}_FOUND)
FIND_PACKAGE_MESSAGE(${_NAME} "Found ${_NAME}: ${${_FIRST_REQUIRED_VAR}} ${VERSION_MSG} ${COMPONENT_MSG}" "${DETAILS}")
else ()
if(FPHSA_CONFIG_MODE)
_FPHSA_HANDLE_FAILURE_CONFIG_MODE()
else()
if(NOT VERSION_OK)
_FPHSA_FAILURE_MESSAGE("${FPHSA_FAIL_MESSAGE}: ${VERSION_MSG} (found ${${_FIRST_REQUIRED_VAR}})")
else()
_FPHSA_FAILURE_MESSAGE("${FPHSA_FAIL_MESSAGE} (missing:${MISSING_VARS}) ${VERSION_MSG}")
endif()
endif()
endif ()
set(${_NAME}_FOUND ${${_NAME}_FOUND} PARENT_SCOPE)
set(${_NAME_UPPER}_FOUND ${${_NAME}_FOUND} PARENT_SCOPE)
endfunction()

View File

@@ -1,48 +0,0 @@
# Distributed under the OSI-approved BSD 3-Clause License. See accompanying
# file Copyright.txt or https://cmake.org/licensing for details.
#[=======================================================================[.rst:
FindPackageMessage
------------------
.. code-block:: cmake
find_package_message(<name> "message for user" "find result details")
This function is intended to be used in FindXXX.cmake modules files.
It will print a message once for each unique find result. This is
useful for telling the user where a package was found. The first
argument specifies the name (XXX) of the package. The second argument
specifies the message to display. The third argument lists details
about the find result so that if they change the message will be
displayed again. The macro also obeys the QUIET argument to the
find_package command.
Example:
.. code-block:: cmake
if(X11_FOUND)
find_package_message(X11 "Found X11: ${X11_X11_LIB}"
"[${X11_X11_LIB}][${X11_INCLUDE_DIR}]")
else()
...
endif()
#]=======================================================================]
function(find_package_message pkg msg details)
# Avoid printing a message repeatedly for the same find result.
if(NOT ${pkg}_FIND_QUIETLY)
string(REPLACE "\n" "" details "${details}")
set(DETAILS_VAR FIND_PACKAGE_MESSAGE_DETAILS_${pkg})
if(NOT "${details}" STREQUAL "${${DETAILS_VAR}}")
# The message has not yet been printed.
message(STATUS "${msg}")
# Save the find details in the cache to avoid printing the same
# message again.
set("${DETAILS_VAR}" "${details}"
CACHE INTERNAL "Details about finding ${pkg}")
endif()
endif()
endfunction()

22
Docker/Dockerfile Normal file
View File

@@ -0,0 +1,22 @@
FROM ubuntu:19.10
MAINTAINER simon987 <me@simon987.net>
RUN apt update
RUN apt install -y libglib2.0-0 libcurl4 libmagic1 libharfbuzz-bin libopenjp2-7 libarchive13 liblzma5 libzstd1 liblz4-1 \
curl libtiff5 libpng16-16 libpcre3
RUN mkdir -p /usr/share/tessdata && \
cd /usr/share/tessdata/ && \
curl -o /usr/share/tessdata/hin.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/hin.traineddata &&\
curl -o /usr/share/tessdata/jpn.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/jpn.traineddata &&\
curl -o /usr/share/tessdata/eng.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/eng.traineddata &&\
curl -o /usr/share/tessdata/fra.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/fra.traineddata &&\
curl -o /usr/share/tessdata/rus.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/rus.traineddata &&\
curl -o /usr/share/tessdata/spa.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/spa.traineddata && ls -lh
ADD sist2 /root/sist2
ENV LANG C.UTF-8
ENV LC_ALL C.UTF-8
ENTRYPOINT ["/root/sist2"]

14
Docker/build.sh Executable file
View File

@@ -0,0 +1,14 @@
rm ./sist2 sist2_debug
cp ../sist2.gz .
gzip -d sist2.gz
strip sist2
version=$(./sist2 --version)
echo "Version ${version}"
docker build . -t simon987/sist2:${version} -t simon987/sist2:latest
docker push simon987/sist2:${version}
docker push simon987/sist2:latest
docker run --rm simon987/sist2 -v

134
README.md
View File

@@ -1,5 +1,6 @@
![GitHub](https://img.shields.io/github/license/simon987/sist2.svg) ![GitHub](https://img.shields.io/github/license/simon987/sist2.svg)
[![CodeFactor](https://www.codefactor.io/repository/github/simon987/sist2/badge?s=05daa325188aac4eae32c786f3d9cf4e0593f822)](https://www.codefactor.io/repository/github/simon987/sist2) [![CodeFactor](https://www.codefactor.io/repository/github/simon987/sist2/badge?s=05daa325188aac4eae32c786f3d9cf4e0593f822)](https://www.codefactor.io/repository/github/simon987/sist2)
[![Development snapshots](https://ci.simon987.net/app/rest/builds/buildType(Sist2_Build)/statusIcon)](https://files.simon987.net/artifacts/Sist2/Build/)
# sist2 # sist2
@@ -7,89 +8,126 @@ sist2 (Simple incremental search tool)
*Warning: sist2 is in early development* *Warning: sist2 is in early development*
![sist2.png](docs/sist2.png)
## Features ## Features
* Fast, low memory usage * Fast, low memory usage, multi-threaded
* Mobile-friendly Web interface
* Portable (all its features are packaged in a single executable) * Portable (all its features are packaged in a single executable)
* Extracts text from common file types\* * Extracts text from common file types \*
* Generates thumbnails\* * Generates thumbnails \*
* Incremental scanning * Incremental scanning
* Automatic tagging from file attributes via [user scripts](docs/scripting.md)
* Recursive scan inside archive files \*\*
* OCR support with tesseract \*\*\*
* Stats page & disk utilisation visualization
\* See [format support](#format-support) \* See [format support](#format-support)
\*\* See [Archive files](#archive-files)
\*\*\* See [OCR](#ocr)
![stats](docs/stats.png)
## Getting Started ## Getting Started
1. Have an [Elasticsearch](https://www.elastic.co/downloads/elasticsearch) instance running 1. Have an Elasticsearch (>= 6.X.X) instance running
1. Download the [latest sist2 release](https://github.com/simon987/sist2/releases) 1. Download [from official website](https://www.elastic.co/downloads/elasticsearch)
1. *(or)* Run using docker:
```bash
docker run -d --name es1 --net sist2_net -p 9200:9200 \
-e "discovery.type=single-node" elasticsearch:7.5.2
```
1. *(or)* Run using docker-compose:
```yaml
elasticsearch:
image: docker.elastic.co/elasticsearch/elasticsearch:7.5.2
environment:
- discovery.type=single-node
- "ES_JAVA_OPTS=-Xms1G -Xmx2G"
```
1. Download sist2 executable
1. Download the [latest sist2 release](https://github.com/simon987/sist2/releases) *
1. *(or)* Download a [development snapshot](https://files.simon987.net/artifacts/Sist2/Build/) *(Not recommended!)*
1. *(or)* `docker pull simon987/sist2:latest`
*Windows users*: `sist2` runs under [WSL](https://en.wikipedia.org/wiki/Windows_Subsystem_for_Linux) 1. See [Usage guide](docs/USAGE.md)
*Mac users*: See [#1](https://github.com/simon987/sist2/issues/1) \* *Windows users*: **sist2** runs under [WSL](https://en.wikipedia.org/wiki/Windows_Subsystem_for_Linux)
## Example usage ## Example usage
![demo](demo.gif) See [Usage guide](docs/USAGE.md) for more details
See help page `sist2 --help` for more details. 1. Scan a directory: `sist2 scan ~/Documents -o ./docs_idx`
1. Push index to Elasticsearch: `sist2 index ./docs_idx`
1. Start web interface: `sist2 web ./docs_idx`
**Scan a directory**
```bash
sist2 scan ~/Documents -o ./orig_idx/
sist2 scan --threads 4 --content-size 16384 /mnt/Pictures
sist2 scan --incremental ./orig_idx/ -o ./updated_idx/ ~/Documents
```
**Push index to Elasticsearch or file**
```bash
sist2 index --force-reset ./my_idx
sist2 index --print ./my_idx > raw_documents.ndjson
```
**Start web interface**
```bash
sist2 web --bind 0.0.0.0 --port 4321 ./my_idx1 ./my_idx2 ./my_idx3
```
## Format support ## Format support
File type | Library | Content | Thumbnail | Metadata File type | Library | Content | Thumbnail | Metadata
:---|:---|:---|:---|:--- :---|:---|:---|:---|:---
pdf,xps,cbz,cbr,fb2,epub | MuPDF | yes | yes, `png` | *planned* | pdf,xps,cbz,cbr,fb2,epub | MuPDF | text+ocr | yes, `png` | title |
`audio/*` | libav | - | yes, `jpeg` | ID3 tags | `audio/*` | ffmpeg | - | yes, `jpeg` | ID3 tags |
`video/*` | libav | - | yes, `jpeg` | *planned* | `video/*` | ffmpeg | - | yes, `jpeg` | title, comment, artist |
`image/*` | libav | - | yes, `jpeg` | *planned* | `image/*` | ffmpeg | - | yes, `jpeg` | [Common EXIF tags](https://github.com/simon987/sist2/blob/efdde2734eca9b14a54f84568863b7ffd59bdba3/src/parsing/media.c#L190) |
ttf,ttc,cff,woff,fnt,otf | Freetype2 | - | yes, `bmp` | Name & style | ttf,ttc,cff,woff,fnt,otf | Freetype2 | - | yes, `bmp` | Name & style |
`text/plain` | *(none)* | yes | no | - | `text/plain` | *(none)* | yes | no | - |
docx, xlsx, pptx | | *planned* | no | *planned* | html, xml | *(none)* | yes | no | - |
tar, zip, rar, 7z, ar ... | Libarchive | yes\* | - | no |
docx, xlsx, pptx | *(none)* | yes | no | creator, modified_by, title |
mobi, azw, azw3 | libmobi | yes | no | author, title |
\* *See [Archive files](#archive-files)*
### Archive files
**sist2** will scan files stored into archive files (zip, tar, 7z...) as if
they were directly in the file system. Recursive (archives inside archives)
scan is also supported.
**Limitations**:
* Support for parsing media files with formats that require *seek* (e.g. `.gif`, `.mp4` w/ fragmented metadata etc.)
is limitted (see `--mem-buffer` option)
* Archive files are scanned sequentially, by a single thread. On systems where
**sist2** is not I/O bound, scans might be faster when larger archives are split
into smaller parts.
### OCR
You can enable OCR support for pdf,xps,cbz,cbr,fb2,epub file types with the
`--ocr <lang>` option. Download the language data files with your
package manager (`apt install tesseract-ocr-eng`) or directly [from Github](https://github.com/tesseract-ocr/tesseract/wiki/Data-Files).
The `simon987/sist2` image comes with common languages
(hin, jpn, eng, fra, rus, spa) pre-installed.
Examples
```bash
sist2 scan --ocr jpn ~/Books/Manga/
sist2 scan --ocr eng ~/Books/Textbooks/
```
## Build from source ## Build from source
You can compile **sist2** by yourself if you don't want to use the pre-compiled You can compile **sist2** by yourself if you don't want to use the pre-compiled
binaries. binaries (GCC 7+ required).
1. Install compile-time dependencies 1. Install compile-time dependencies
*(Debian)* ```bash
```bash vcpkg install lmdb cjson glib libarchive[core,bzip2,libxml2,lz4,lzma,lzo] pthread tesseract libxml2 ffmpeg zstd gtest mongoose libuuid libmagic
apt install git cmake pkg-config libglib2.0-dev\
libssl-dev uuid-dev libavformat-dev libswscale-dev \
python3 libmagic-dev libfreetype6-dev libcurl-dev \
libbz2-dev yasm
``` ```
*(FreeBSD)*
```bash
pkg install cmake gcc yasm gmake bash ffmpeg e2fsprogs-uuid
```
__
2. Build 2. Build
```bash ```bash
git clone --recurse-submodules https://github.com/simon987/sist2 git clone --recursive https://github.com/simon987/sist2/
./scripts/get_static_libs.sh cmake -DCMAKE_TOOLCHAIN_FILE=<VCPKG_ROOT>/scripts/buildsystems/vcpkg.cmake .
cmake .
make make
``` ```

Submodule argparse deleted from fafc503d23

1
cJSON

Submodule cJSON deleted from 2de7d04aaf

16
ci/build.sh Executable file
View File

@@ -0,0 +1,16 @@
#!/usr/bin/env bash
rm *.gz
rm -rf CMakeFiles CMakeCache.txt
cmake -DSIST_DEBUG=off -DCMAKE_TOOLCHAIN_FILE=/vcpkg/scripts/buildsystems/vcpkg.cmake .
make
strip sist2
gzip -9 sist2
rm -rf CMakeFiles CMakeCache.txt
cmake -DSIST_DEBUG=on -DCMAKE_TOOLCHAIN_FILE=/vcpkg/scripts/buildsystems/vcpkg.cmake .
make
cp /usr/lib/x86_64-linux-gnu/libasan.so.2.0.0 libasan.so.2
tar -czf sist2_debug.tar.gz sist2_debug libasan.so.2

BIN
demo.gif

Binary file not shown.

Before

Width:  |  Height:  |  Size: 18 MiB

302
docs/USAGE.md Normal file
View File

@@ -0,0 +1,302 @@
# Usage
*More examples (specifically with docker/compose) are in progress*
* [scan](#scan)
* [options](#scan-options)
* [examples](#scan-examples)
* [index format](#index-format)
* [index](#index)
* [options](#index-options)
* [examples](#index-examples)
* [web](#web)
* [options](#web-options)
* [examples](#web-examples)
* [rewrite_url](#rewrite_url)
* [link to specific indices](#link-to-specific-indices)
* [exec-script](#exec-script)
```
Usage: sist2 scan [OPTION]... PATH
or: sist2 index [OPTION]... INDEX
or: sist2 web [OPTION]... INDEX...
or: sist2 exec-script [OPTION]... INDEX
Lightning-fast file system indexer and search tool.
-h, --help show this help message and exit
-v, --version Show version and exit
--verbose Turn on logging
--very-verbose Turn on debug messages
Scan options
-t, --threads=<int> Number of threads. DEFAULT=1
-q, --quality=<flt> Thumbnail quality, on a scale of 1.0 to 31.0, 1.0 being the best. DEFAULT=5
--size=<int> Thumbnail size, in pixels. Use negative value to disable. DEFAULT=500
--content-size=<int> Number of bytes to be extracted from text documents. Use negative value to disable. DEFAULT=32768
--incremental=<str> Reuse an existing index and only scan modified files.
-o, --output=<str> Output directory. DEFAULT=index.sist2/
--rewrite-url=<str> Serve files from this url instead of from disk.
--name=<str> Index display name. DEFAULT: (name of the directory)
--depth=<int> Scan up to DEPTH subdirectories deep. Use 0 to only scan files in PATH. DEFAULT: -1
--archive=<str> Archive file mode (skip|list|shallow|recurse). skip: Don't parse, list: only get file names as text, shallow: Don't parse archives inside archives. DEFAULT: recurse
--ocr=<str> Tesseract language (use tesseract --list-langs to see which are installed on your machine)
-e, --exclude=<str> Files that match this regex will not be scanned
--fast Only index file names & mime type
--treemap-threshold=<str> Relative size threshold for treemap (see USAGE.md). DEFAULT: 0.0005
--mem-buffer=<int> Maximum memory buffer size per thread in MB for files inside archives (see USAGE.md). DEFAULT: 2000
Index options
--es-url=<str> Elasticsearch url with port. DEFAULT=http://localhost:9200
-p, --print Just print JSON documents to stdout.
--script-file=<str> Path to user script.
--batch-size=<int> Index batch size. DEFAULT: 100
-f, --force-reset Reset Elasticsearch mappings and settings. (You must use this option the first time you use the index command)
Web options
--es-url=<str> Elasticsearch url. DEFAULT=http://localhost:9200
--bind=<str> Listen on this address. DEFAULT=localhost:4090
--auth=<str> Basic auth in user:password format
Exec-script options
--script-file=<str> Path to user script.
Made by simon987 <me@simon987.net>. Released under GPL-3.0
```
## Scan
### Scan options
* `-t, --threads`
Number of threads for file parsing. **Do not set a number higher than `$(nproc)`!**.
* `-q, --quality`
Thumbnail quality, on a scale of 1.0 to 31.0, 1.0 being the best. *Does not affect PDF thumbnails quality*
* `--size`
Thumbnail size in pixels.
* `--content-size`
Number of bytes of text to be extracted from the content of files (plain text and PDFs).
Repeated whitespace and special characters do not count toward this limit.
* `--incremental`
Specify an existing index. Information about files in this index that were not modified (based on *mtime* attribute)
will be copied to the new index and will not be parsed again.
* `-o, --output` Output directory.
* `--rewrite-url` Set the `rewrite_url` option for the web module (See [rewrite_url](#rewrite_url))
* `--name` Set the `name` option for the web module
* `--depth` Maximum scan dept. Set to 0 only scan files directly in the root directory, set to -1 for infinite depth
* `--archive` Archive file mode.
* skip: Don't parse
* list: Only get file names as text
* shallow: Don't parse archives inside archives.
* recurse: Scan archives recursively (default)
* `--ocr` See [OCR](../README.md#OCR)
* `-e, --exclude` Regex pattern to exclude files. A file is excluded if the pattern matches any
part of the full absolute path.
Examples:
* `-e ".*\.ttf"`: Ignore ttf files
* `-e ".*\.(ttf|rar)"`: Ignore ttf and rar files
* `-e "^/mnt/backups/"`: Ignore all files in the `/mnt/backups/` directory
* `-e "^/mnt/Data[12]/"`: Ignore all files in the `/mnt/Data1/` and `/mnt/Data2/` directory
* `-e "(^/usr/)|(^/var/)|(^/media/DRIVE-A/tmp/)|(^/media/DRIVE-B/Trash/)"` Exclude the
`/usr`, `/var`, `/media/DRIVE-A/tmp`, `/media/DRIVE-B/Trash` directories
* `--fast` Only index file names and mime type
* `--treemap-threshold` Directories smaller than (`treemap-threshold` * `<total size of the index>`)
will not be considered for the disk utilisation visualization; their size will be added to
the parent directory. If the parent directory is still smaller than the threshold, it will also be "merged upwards"
and so on.
In effect, smaller `treemap-threshold` values will yield a more detailed
(but also a more cluttered and harder to read) visualization.
* `--mem-buffer` Maximum memory buffer size in MB (per thread) for files inside archives. Media files
larger than this number will be read sequentially and no *seek* operations will be supported.
To check if a media file can be parsed without *seek*, execute `cat file.mp4 | ffprobe -`
### Scan examples
Simple scan
```bash
sist2 scan ~/Documents
sist2 scan \
--threads 4 --content-size 16000000 --quality 1.0 --archive shallow \
--name "My Documents" --rewrite-url "http://nas.domain.local/My Documents/" \
~/Documents -o ./documents.idx/
```
Incremental scan
```
sist2 scan --incremental ./orig_idx/ -o ./updated_idx/ ~/Documents
```
### Index format
A typical `binary` type index structure looks like this:
```
documents.idx/
├── descriptor.json
├── _index_139965416830720
├── _index_139965425223424
├── _index_139965433616128
├── _index_139965442008832
├── _index_139965442008832
├── treemap.csv
├── agg_mime.csv
├── agg_date.csv
├── add_size.csv
└── thumbs
├── data.mdb
└── lock.mdb
```
The `_index_*` files contain the raw binary index data and are not meant to be
read by other applications. The format is generally compatible across different
sist2 versions.
The `thumbs/` folder is a [LMDB](https://en.wikipedia.org/wiki/Lightning_Memory-Mapped_Database)
database containing the thumbnails.
The `descriptor.json` file contains general information about the index. The
following fields are safe to modify manually: `root`, `name`, [rewrite_url](#rewrite_url) and `timestamp`.
The `.csv` are pre-computed aggregations necessary for the stats page.
*Advanced usage*
Instead of using the `scan` module, you can also import an index generated
by a third party application. The 'external' index must have the following format:
```
my_index/
├── descriptor.json
├── _index_0
└── thumbs
├── data.mdb
└── lock.mdb
```
*descriptor.json*:
```json
{
"uuid": "<valid UUID4>",
"version": "_external_v1",
"root": "(optional)",
"name": "<name>",
"rewrite_url": "(optional)",
"type": "json",
"timestamp": 1578971024
}
```
*_index_0*: NDJSON format (One json object per line)
```json
{
"_id": "unique uuid for the file",
"index": "index uuid4 (same one as descriptor.json!)",
"mime": "application/x-cbz",
"size": 14341204,
"mtime": 1578882996,
"extension": "cbz",
"name": "my_book",
"path": "path/to/books",
"content": "text contents of the book",
"title": "Title of the book",
"tag": ["genre.fiction", "author.someguy", "etc..."],
"_keyword": [
{"k": "ISBN", "v": "ABCD34789231"}
],
"_text": [
{"k": "other", "v": "This will be indexed as text"}
]
}
```
You can find the full list of supported fields [here](../src/io/serialize.c#L90)
The `_keyword.*` items will be indexed and searchable as **keyword** fields (only full matches allowed).
The `_text.*` items will be indexed and searchable as **text** fields (fuzzy searching allowed)
*thumbs/*:
LMDB key-value store. Keys are **binary** 128-bit UUID4s (`_id` field)
and values are raw image bytes.
Importing an external `binary` type index is technically possible but
it is currently unsupported and has no guaranties of back/forward compatibility.
## Index
### Index options
* `--es-url`
Elasticsearch url and port. If you are using docker, make sure that both containers are on the
same network.
* `-p, --print`
Print index in JSON format to stdout.
* `--script-file`
Path to user script. See [Scripting](scripting.md).
* `--batch-size=<int>`
Index batch size. Indexing is generally faster with larger batches, but payloads that
are too large will fail and additional overhead for retrying with smaller sizes may slow
down the process.
* `-f, --force-reset`
Reset Elasticsearch mappings and settings.
**(You must use this option the first time you use the index command)**.
### Index examples
**Push to elasticsearch**
```bash
sist2 index --force-reset --batch-size 1000 --es-url http://localhost:9200 ./my_index/
sist2 index ./my_index/
```
**Save index in JSON format**
```bash
sist2 index --print ./my_index/ > my_index.ndjson
```
**Inspect contents of an index**
```bash
sist2 index --print ./my_index/ | jq | less
```
## Web
### Web options
* `--es-url=<str>` Elasticsearch url.
* `--bind=<str>` Listen on this address.
* `--auth=<str>` Basic auth in user:password format
### Web examples
**Single index**
```bash
sist2 web --auth admin:hunter2 --bind 0.0.0.0:8888 my_index
```
**Multiple indices**
```bash
# Indices will be displayed in this order in the web interface
sist2 web index1 index2 index3 index4
```
### rewrite_url
When the `rewrite_url` field is not empty, the web module ignores the `root`
field and will return a HTTP redirect to `<rewrite_url><path>/<name><extension>`
instead of serving the file from disk.
Both the `root` and `rewrite_url` fields are safe to manually modify from the
`descriptor.json` file.
### Link to specific indices
To link to specific indices, you can add a list of comma-separated index name to
the URL: `?i=<name>,<name>`. By default, indices with `"(nsfw)"` in their name are
not displayed.
## exec-script
The `exec-script` command is used to execute a user script for an index that has already been imported to Elasticsearch with the `index` command. Note that the documents will not be reset to their default state before each execution as the `index` command does: if you make undesired changes to the documents by accident, you will need to run `index` again to revert to the original state.

BIN
docs/genre_example.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 26 KiB

142
docs/scripting.md Normal file
View File

@@ -0,0 +1,142 @@
## User scripts
*This document is under construction, more in-depth guide coming soon*
During the `index` step, you can use the `--script-file <script>` option to
modify documents or add user tags. This option is mainly used to
implement automatic tagging based on file attributes.
The scripting language used
([Painless Scripting Language](https://www.elastic.co/guide/en/elasticsearch/painless/7.4/index.html))
is very similar to Java, but you should be able to create user scripts
without programming experience at all if you're somewhat familiar with
regex.
This is the base structure of the documents we're working with:
```json
{
"_id": "e171405c-fdb5-4feb-bb32-82637bc32084",
"_index": "sist2",
"_type": "_doc",
"_source": {
"index": "206b3050-e821-421a-891d-12fcf6c2db0d",
"mime": "application/json",
"size": 1799,
"mtime": 1545443685,
"extension": "md",
"name": "README",
"path": "sist2/scripting",
"content": "..."
}
}
```
**Example script**
This script checks if the `genre` attribute exists, if it does
it adds the `genre.<genre>` tag.
```Java
ArrayList tags = ctx._source.tag = new ArrayList();
if (ctx._source?.genre != null) {
tags.add("genre." + ctx._source.genre.toLowerCase());
}
```
You can use `.` to create a hierarchical tag tree:
![scripting/genre_example](genre_example.png)
To use regular expressions, you need to add this line in `/etc/elasticsearch/elasticsearch.yml`
```yaml
script.painless.regex.enabled: true
```
Or, if you're using docker add `-e "script.painless.regex.enabled=true"`
**Tag color**
You can specify the color for an individual tag by appending an
hexadecimal color code (`#RRGGBBAA`) to the tag name.
### Examples
If `(20XX)` is in the file name, add the `year.<year>` tag:
```Java
ArrayList tags = ctx._source.tag = new ArrayList();
Matcher m = /[\(\.+](20[0-9]{2})[\)\.+]/.matcher(ctx._source.name);
if (m.find()) {
tags.add("year." + m.group(1));
}
```
Use default *Calibre* folder structure to infer author.
```Java
ArrayList tags = ctx._source.tag = new ArrayList();
// We expect the book path to look like this:
// /path/to/Calibre Library/Author/Title/Title - Author.pdf
if (ctx._source.name.contains("-") && ctx._source.extension == "pdf") {
String[] names = ctx._source.name.splitOnToken('-');
tags.add("author." + names[1].strip());
}
```
If the file matches a specific pattern `AAAA-000 fName1 lName1, <fName2 lName2>...`, add the `actress.<actress>` and
`studio.<studio>` tag:
```Java
ArrayList tags = ctx._source.tag = new ArrayList();
Matcher m = /([A-Z]{4})-[0-9]{3} (.*)/.matcher(ctx._source.name);
if (m.find()) {
tags.add("studio." + m.group(1));
// Take the matched group (.*), and add a tag for
// each name, separated by comma
for (String name : m.group(2).splitOnToken(',')) {
tags.add("actress." + name);
}
}
```
Set the name of the last folder (`/path/to/<studio>/file.mp4`) to `studio.<studio>` tag
```Java
ArrayList tags = ctx._source.tag = new ArrayList();
if (ctx._source.path != "") {
String[] names = ctx._source.path.splitOnToken('/');
tags.add("studio." + names[names.length-1]);
}
```
Parse `EXIF:F Number` tag
```Java
if (ctx._source?.exif_fnumber != null) {
String[] values = ctx._source.exif_fnumber.splitOnToken(' ');
String aperture = String.valueOf(Float.parseFloat(values[0]) / Float.parseFloat(values[1]));
if (aperture == "NaN") {
aperture = "0,0";
}
tags.add("Aperture.f/" + aperture.replace(".", ","));
}
```
Display year and months from `EXIF:DateTime` tag
```Java
if (ctx._source?.exif_datetime != null) {
SimpleDateFormat parser = new SimpleDateFormat("yyyy:MM:dd HH:mm:ss");
Date date = parser.parse(ctx._source.exif_datetime);
SimpleDateFormat yp = new SimpleDateFormat("yyyy");
SimpleDateFormat mp = new SimpleDateFormat("MMMMMMMMM");
String year = yp.format(date);
String month = mp.format(date);
tags.add("Month." + month);
tags.add("Year." + year);
}
```

BIN
docs/sist2.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 889 KiB

BIN
docs/stats.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 167 KiB

Submodule lib/ffmpeg deleted from 0481a1f6e5

Submodule lib/mupdf deleted from 91782a4348

Submodule lib/onion deleted from d8d4cc9290

1
lmdb

Submodule lmdb deleted from 5c012bbe03

View File

@@ -1,31 +1,58 @@
{ {
"properties": { "properties": {
"_tie": {
"type": "keyword",
"doc_values": true
},
"_depth": {
"type": "integer"
},
"path": { "path": {
"type": "text", "type": "text",
"analyzer": "path_analyzer", "analyzer": "path_analyzer",
"copy_to": "suggest-path" "copy_to": "suggest-path",
"fielddata": true,
"fields": {
"nGram": {
"type": "text",
"analyzer": "my_nGram"
},
"text": {
"type": "text",
"analyzer": "content_analyzer"
}
}
}, },
"suggest-path": { "suggest-path": {
"type": "completion", "type": "completion",
"analyzer": "keyword" "analyzer": "case_insensitive_kw_analyzer"
}, },
"mime": { "mime": {
"type": "keyword" "type": "keyword"
}, },
"thumbnail": {
"type": "keyword",
"index": false
},
"videoc": { "videoc": {
"type": "keyword" "type": "keyword",
"index": false
}, },
"audioc": { "audioc": {
"type": "keyword" "type": "keyword",
"index": false
}, },
"duration": { "duration": {
"type": "float" "type": "float",
"index": false
}, },
"width": { "width": {
"type": "integer" "type": "integer",
"index": false
}, },
"height": { "height": {
"type": "integer" "type": "integer",
"index": false
}, },
"mtime": { "mtime": {
"type": "integer" "type": "integer"
@@ -70,6 +97,23 @@
"analyzer": "my_nGram", "analyzer": "my_nGram",
"type": "text" "type": "text"
}, },
"_keyword.*": {
"type": "keyword"
},
"_text.*": {
"analyzer": "content_analyzer",
"type": "text",
"fields": {
"nGram": {
"type": "text",
"analyzer": "my_nGram"
}
}
},
"_url": {
"type": "keyword",
"index": false
},
"content": { "content": {
"analyzer": "content_analyzer", "analyzer": "content_analyzer",
"type": "text", "type": "text",
@@ -80,6 +124,39 @@
"analyzer": "my_nGram" "analyzer": "my_nGram"
} }
} }
},
"tag": {
"type": "keyword"
},
"exif_make": {
"type": "text"
},
"exif_model": {
"type": "text"
},
"exif:software": {
"type": "text"
},
"exif_exposure_time": {
"type": "keyword"
},
"exif_fnumber": {
"type": "keyword"
},
"exif_iso_speed_ratings": {
"type": "keyword"
},
"exif_focal_length": {
"type": "keyword"
},
"exif_user_comment": {
"type": "text"
},
"author": {
"type": "text"
},
"modified_by": {
"type": "text"
} }
} }
} }

10
schema/pipeline.json Normal file
View File

@@ -0,0 +1,10 @@
{
"description": "Copy _id to _tie, save path depth",
"processors": [
{
"script": {
"source": "ctx._tie = ctx._id; ctx._depth = ctx.path.length() == 0 ? 0 : 1 + ctx.path.length() - ctx.path.replace(\"/\", \"\").length();"
}
}
]
}

View File

@@ -21,6 +21,12 @@
"lowercase" "lowercase"
] ]
}, },
"case_insensitive_kw_analyzer": {
"tokenizer": "keyword",
"filter": [
"lowercase"
]
},
"my_nGram": { "my_nGram": {
"tokenizer": "my_nGram_tokenizer", "tokenizer": "my_nGram_tokenizer",
"filter": [ "filter": [

View File

@@ -2,13 +2,15 @@
rm -rf index.sist2/ rm -rf index.sist2/
rm web/js/bundle.js 2> /dev/null rm src/static/js/bundle.js 2> /dev/null
cat `ls web/js/*.min.js` > web/js/bundle.js cat `ls src/static/js/*.min.js` > src/static/js/bundle.js
cat web/js/{util,dom,search}.js >> web/js/bundle.js cat src/static/js/{util,dom}.js >> src/static/js/bundle.js
rm web/css/bundle.css 2> /dev/null rm src/static/css/bundle*.css 2> /dev/null
cat web/css/*.min.css > web/css/bundle.css cat src/static/css/*.min.css > src/static/css/bundle.css
cat web/css/main.css >> web/css/bundle.css cat src/static/css/light.css >> src/static/css/bundle.css
cat src/static/css/*.min.css > src/static/css/bundle_dark.css
cat src/static/css/dark.css >> src/static/css/bundle_dark.css
python3 scripts/mime.py > src/parsing/mime_generated.c python3 scripts/mime.py > src/parsing/mime_generated.c
python3 scripts/serve_static.py > src/web/static_generated.c python3 scripts/serve_static.py > src/web/static_generated.c

View File

@@ -1,57 +0,0 @@
#!/usr/bin/env bash
cd lib
cd mupdf
HAVE_X11=no HAVE_GLUT=no make -j 4
cd ..
mv mupdf/build/release/libmupdf.a .
mv mupdf/build/release/libmupdf-third.a .
# ffmpeg
cd ffmpeg
./configure --disable-shared --enable-static --disable-ffmpeg --disable-ffplay \
--disable-ffprobe --disable-doc\
--disable-manpages --disable-postproc --disable-avfilter \
--disable-alsa --disable-lzma --disable-xlib --disable-debug\
--disable-vdpau --disable-vaapi --disable-sdl2 --disable-network
make -j 4
cd ..
mv ffmpeg/libavcodec/libavcodec.a .
mv ffmpeg/libavformat/libavformat.a .
mv ffmpeg/libavutil/libavutil.a .
mv ffmpeg/libswresample/libswresample.a .
mv ffmpeg/libswscale/libswscale.a .
# onion
cd onion
mkdir build 2> /dev/null
cd build
cmake -DONION_USE_SSL=false -DONION_USE_PAM=false -DONION_USE_PNG=false -DONION_USE_JPEG=false \
-DONION_USE_JPEG=false -DONION_USE_XML2=false -DONION_USE_SYSTEMD=false -DONION_USE_SQLITE3=false \
-DONION_USE_REDIS=false -DONION_USE_GC=false -DONION_USE_TESTS=false -DONION_EXAMPLES=false \
-DONION_USE_BINDINGS_CPP=false ..
make -j 4
cd ../..
mv onion/build/src/onion/libonion_static.a .
#bzip2
git clone https://github.com/enthought/bzip2-1.0.6
cd bzip2-1.0.6
make -j 4
cd ..
mv bzip2-1.0.6/libbz2.a .
# magic
git clone https://github.com/threatstack/libmagic
cd libmagic
./autogen.sh
./configure --enable-static --disable-shared
make -j 4
cd ..
mv libmagic/src/.libs/libmagic.a .
cd ..

View File

@@ -1,44 +0,0 @@
#!/usr/bin/env bash
cd lib
# mupdf
cd mupdf
HAVE_X11=no HAVE_GLUT=no gmake -j 4
cd ..
mv mupdf/build/release/libmupdf.a .
mv mupdf/build/release/libmupdf-third.a .
# ffmpeg
cd ffmpeg
./configure --disable-shared --enable-static --disable-ffmpeg --disable-ffplay \
--disable-ffprobe --disable-doc\
--disable-manpages --disable-postproc --disable-avfilter \
--disable-alsa --disable-lzma --disable-xlib --disable-debug\
--disable-vdpau --disable-vaapi --disable-sdl2 --disable-network
gmake -j 4
cd ..
mv ffmpeg/libavcodec/libavcodec.a .
mv ffmpeg/libavformat/libavformat.a .
mv ffmpeg/libavutil/libavutil.a .
mv ffmpeg/libswresample/libswresample.a .
mv ffmpeg/libswscale/libswscale.a .
#bzip2
git clone https://github.com/enthought/bzip2-1.0.6
cd bzip2-1.0.6
make -j 4
cd ..
mv bzip2-1.0.6/libbz2.a .
# magic
git clone https://github.com/threatstack/libmagic
cd libmagic
./autogen.sh
./configure --enable-static --disable-shared
make -j 4
cd ..
mv libmagic/src/.libs/libmagic.a .
cd ..

View File

@@ -1,6 +1,9 @@
import json
files = [ files = [
"schema/mappings.json", "schema/mappings.json",
"schema/settings.json", "schema/settings.json",
"schema/pipeline.json",
] ]
@@ -9,6 +12,7 @@ def clean(filepath):
for file in files: for file in files:
with open(file, "rb") as f: with open(file, "r") as f:
data = f.read() data = json.dumps(json.load(f), separators=(",", ":")).encode()
data += b'\0'
print("char %s[%d] = {%s};" % (clean(file), len(data), ",".join(str(int(b)) for b in data))) print("char %s[%d] = {%s};" % (clean(file), len(data), ",".join(str(int(b)) for b in data)))

View File

@@ -2,14 +2,18 @@ application/arj, arj
application/base64, mme application/base64, mme
application/binhex, hqx application/binhex, hqx
application/book, boo|book application/book, boo|book
application/CDFV2-corrupt,
application/CDFV2, sdv application/CDFV2, sdv
application/clariscad, ccad application/clariscad, ccad
application/commonground, dp application/commonground, dp
application/csv,
application/dicom, dcm
application/drafting, drw application/drafting, drw
application/epub+zip, epub
application/freeloader, frl application/freeloader, frl
application/futuresplash, spl application/futuresplash, spl
application/groupwise, vew application/groupwise, vew
application/gzip, gz application/gzip, gz|tgz
application/hta, hta application/hta, hta
application/i-deas, unv application/i-deas, unv
application/iges, iges|igs application/iges, iges|igs
@@ -17,7 +21,6 @@ application/inf, inf
application/java-archive, jar application/java-archive, jar
application/java, class application/java, class
application/javascript, application/javascript,
application/x-archive, a
application/json, json application/json, json
application/marc, mrc application/marc, mrc
application/mbedlet, mbd application/mbedlet, mbd
@@ -27,7 +30,9 @@ application/msword, doc|dot|w6w|wiz|word
application/netmc, mcp application/netmc, mcp
application/octet-stream, bin|dump|gpg application/octet-stream, bin|dump|gpg
application/oda, oda application/oda, oda
application/ogg, ogv
application/pdf, pdf application/pdf, pdf
application/pgp-keys,
application/pgp-signature, pgp application/pgp-signature, pgp
application/pkcs7-signature, p7s application/pkcs7-signature, p7s
application/pkix-cert, cer|crt application/pkix-cert, cer|crt
@@ -43,6 +48,10 @@ application/vda, vda
application/vnd.fdf, fdf application/vnd.fdf, fdf
application/vnd.font-fontforge-sfd, sfd application/vnd.font-fontforge-sfd, sfd
application/vnd.hp-hpgl, hgl|hpg|hpgl application/vnd.hp-hpgl, hgl|hpg|hpgl
application/vnd.iccprofile, icm
application/vnd.iccprofile, icm
application/vnd.lotus-1-2-3,
application/vnd.ms-cab-compressed, cab
application/vnd.ms-excel, xlb|xlc|xll|xlm|xls|xlw application/vnd.ms-excel, xlb|xlc|xll|xlm|xls|xlw
application/vnd.ms-fontobject, eot application/vnd.ms-fontobject, eot
application/vnd.ms-opentype, otf application/vnd.ms-opentype, otf
@@ -54,45 +63,75 @@ application/vnd.ms-project, mpp
application/vnd.oasis.opendocument.base, odb application/vnd.oasis.opendocument.base, odb
application/vnd.oasis.opendocument.formula, odf application/vnd.oasis.opendocument.formula, odf
application/vnd.oasis.opendocument.graphics, odg application/vnd.oasis.opendocument.graphics, odg
application/vnd.oasis.opendocument.presentation, odp
application/vnd.oasis.opendocument.spreadsheet, ods
application/vnd.oasis.opendocument.text, odt application/vnd.oasis.opendocument.text, odt
application/vnd.openxmlformats-officedocument.presentationml.presentation, pptx
application/vnd.openxmlformats-officedocument.spreadsheetml.sheet, xlsx
application/vnd.openxmlformats-officedocument.wordprocessingml.document, docx
application/vnd.symbian.install,
application/vnd.tcpdump.pcap, pcap
application/vnd.wap.wmlc, wmlc application/vnd.wap.wmlc, wmlc
application/vnd.wap.wmlscriptc, wmlsc application/vnd.wap.wmlscriptc, wmlsc
application/vnd.xara, web application/vnd.xara, web
application/vocaltec-media-desc, vmd application/vocaltec-media-desc, vmd
application/vocaltec-media-file, vmf application/vocaltec-media-file, vmf
application/warc, warc
application/winhelp, hlp
application/wordperfect6.0, w60 application/wordperfect6.0, w60
application/wordperfect6.1, w61 application/wordperfect6.1, w61
application/wordperfect, wp|wp5|wp6|wpd application/wordperfect, wp|wp5|wp6|wpd
application/x-123, wk1 application/x-123, wk1
application/x-7z-compressed, 7z
application/x-aim, aim application/x-aim, aim
application/x-apple-diskimage,
application/x-arc,
application/x-archive, a
application/x-atari-7800-rom, a78
application/x-authorware-bin, aab application/x-authorware-bin, aab
application/x-authorware-map, aam application/x-authorware-map, aam
application/x-authorware-seg, aas application/x-authorware-seg, aas
application/x-avira-qua,
application/x-bcpio, bcpio application/x-bcpio, bcpio
application/x-bittorrent, torrent application/x-bittorrent, torrent
application/x-bsh, bsh application/x-bsh, bsh
application/x-bytecode.python, pyc application/x-bytecode.python, pyc
application/x-bzip2, boz|bz2 application/x-bzip2, boz|bz2
application/x-bzip, bz application/x-bzip, bz
application/x-cbr, cbr
application/x-cbz, cbz
application/x-cdlink, vcd application/x-cdlink, vcd
application/x-chat, cha|chat application/x-chat, cha|chat
application/x-chrome-extension,
application/x-cocoa, cco application/x-cocoa, cco
application/x-conference, nsc application/x-conference, nsc
application/x-coredump,
application/x-cpio, cpio application/x-cpio, cpio
application/x-dbf, dbf application/x-dbf, dbf
application/x-dbt, application/x-dbt,
application/x-debian-package, deb
application/x-deepv, deepv application/x-deepv, deepv
application/x-director, dcr|dir|dxr application/x-director, dir|dxr
application/x-dmp, dmp
application/x-dosdriver,
application/x-dosexec, dll application/x-dosexec, dll
application/x-dvi, dvi application/x-dvi, dvi
application/x-elc, elc application/x-elc, elc
application/x-empty,
application/x-envoy, env|evy application/x-envoy, env|evy
application/x-esrehber, es application/x-esrehber, es
application/x-excel, xla|xld|xlk|xlt|xlv application/x-excel, xla|xld|xlk|xlt|xlv
application/x-executable, exe application/x-executable, exe
application/x-font-gdos,
application/x-font-pf2, pf2
application/x-font-pfm, pfm
application/x-font-sfn, application/x-font-sfn,
application/x-font-ttf, ttf application/x-font-ttf, ttf|ttc
application/x-fptapplication/x-dbt,
application/x-freelance, pre application/x-freelance, pre
application/x-gamecube-rom,
application/x-gdbm,
application/x-gettext-translation,
application/x-git, application/x-git,
application/x-gsp, gsp application/x-gsp, gsp
application/x-gss, gss application/x-gss, gss
@@ -102,46 +141,68 @@ application/x-hdf, hdf
application/x-helpfile, help application/x-helpfile, help
application/x-httpd-imap, imap application/x-httpd-imap, imap
application/x-ima, ima application/x-ima, ima
application/x-innosetup,
application/x-internett-signup, ins application/x-internett-signup, ins
application/x-inventor, iv application/x-inventor, iv
application/x-ip2, ip application/x-ip2, ip
application/x-java-applet, application/x-java-applet,
application/x-java-commerce, jcm application/x-java-commerce, jcm
application/x-java-image, application/x-java-image,
application/x-java-jmod, jmod
application/x-java-keystore, application/x-java-keystore,
application/x-kdelnk,
application/x-koan, skd|skm|skp|skt application/x-koan, skd|skm|skp|skt
application/x-latex, latex|ltx application/x-latex, latex|ltx
application/x-livescreen, ivy application/x-livescreen, ivy
application/x-lotus, wq1 application/x-lotus, wq1
application/x-lz4+json, jsonlz4
application/x-lz4, lz4
application/x-lz4, lz4
application/x-lzh-compressed,
application/x-lzh, lzh application/x-lzh, lzh
application/x-lzip, lz
application/x-lzma, lzma
application/x-lzop, lzo
application/x-lzx, lzx application/x-lzx, lzx
application/x-mach-binary, jnilib|dylib application/x-mach-binary, jnilib|dylib
application/x-mach-executable, application/x-mach-executable,
application/x-magic-cap-package-1.0, mc$ application/x-magic-cap-package-1.0, mc$
application/x-mathcad, mcd application/x-mathcad, mcd
application/x-maxis-dbpf,
application/x-meme, mm application/x-meme, mm
application/x-midi, midi application/x-midi, midi
application/x-mif, mif application/x-mif, mif
application/x-mix-transfer, nix application/x-mix-transfer, nix
application/xml, opf application/xml, opf
application/x-mobipocket-ebook, mobi
application/vnd.amazon.mobi8-ebook, azw|azw3
application/x-msaccess, accdb
application/x-ms-compress-szdd, fon
application/x-ms-pdb, pdb application/x-ms-pdb, pdb
application/x-ms-reader, lit
application/x-n64-rom, z64
application/x-navi-animation, ani application/x-navi-animation, ani
application/x-navidoc, nvd application/x-navidoc, nvd
application/x-navimap, map application/x-navimap, map
application/x-navistyle, stl application/x-navistyle, stl
application/x-nes-rom, nes
application/x-netcdf, cdf|nc application/x-netcdf, cdf|nc
application/x-newton-compatible-pkg, pkg application/x-newton-compatible-pkg, pkg
application/x-nintendo-ds-rom,
application/x-object, o application/x-object, o
application/x-omcdatamaker, omcd application/x-omcdatamaker, omcd
application/x-omc, omc application/x-omc, omc
application/x-omcregerator, omcr application/x-omcregerator, omcr
application/x-pagemaker, pm4|pm5 application/x-pagemaker, pm4|pm5
application/x-pcl, pcl application/x-pcl, pcl
application/x-pgp-keyring,
application/x-pixclscript, plx application/x-pixclscript, plx
application/x-pkcs7-certreqresp, p7r application/x-pkcs7-certreqresp, p7r
application/x-pkcs7-signature, p7a application/x-pkcs7-signature, p7a
application/x-project, mpc|mpt|mpv|mpx application/x-project, mpc|mpt|mpv|mpx
application/x-qpro, wb1 application/x-qpro, wb1
application/x-rar, rar
application/x-rpm, rpm
application/x-sdp, sdp application/x-sdp, sdp
application/x-sea, sea application/x-sea, sea
application/x-seelogo, sl application/x-seelogo, sl
@@ -149,12 +210,17 @@ application/x-setupscript,
application/x-sharedlib, so application/x-sharedlib, so
application/x-shar, shar application/x-shar, shar
application/x-shockwave-flash, swf application/x-shockwave-flash, swf
application/x-snappy-framed,
application/x-sprite, spr|sprite application/x-sprite, spr|sprite
application/x-sqlite3, application/x-sqlite3,
application/x-stargallery-thm,
application/x-stuffit, sit
application/x-sv4cpio, sv4cpio application/x-sv4cpio, sv4cpio
application/x-sv4crc, sv4crc application/x-sv4crc, sv4crc
application/x-tar, tar application/x-tar, tar
application/x-tbook, sbk|tbk application/x-tbook, sbk|tbk
application/x-terminfo,
application/x-terminfo2,
application/x-texinfo, texi|texinfo application/x-texinfo, texi|texinfo
application/x-tex-tfm, tfm application/x-tex-tfm, tfm
application/x-ustar, ustar application/x-ustar, ustar
@@ -163,16 +229,22 @@ application/x-vnd.audioexplosion.mzz, mzz
application/x-vnd.ls-xpix, xpix application/x-vnd.ls-xpix, xpix
application/x-vrml, vrml application/x-vrml, vrml
application/x-wais-source, src|wsrc application/x-wais-source, src|wsrc
application/x-wine-extension-ini,
application/x-wintalk, wtk application/x-wintalk, wtk
application/x-world, svr application/x-world, svr
application/x-wri, wri application/x-wri, wri
application/x-x509-ca-cert, der application/x-x509-ca-cert, der
application/x-xz, xz application/x-xz, xz
application/x-zip,
application/x-zstd, zst
application/zip, zip application/zip, zip
application/zlib, z
!audio/basic, au
audio/it, it audio/it, it
audio/make, funk|my|pfunk audio/make, funk|my|pfunk
audio/midi, kar audio/midi, kar
audio/mid, rmi audio/mid, rmi
audio/mp4, m4b
audio/mpeg, m2a|mpa audio/mpeg, m2a|mpa
audio/ogg, ogg audio/ogg, ogg
audio/s3m, s3m audio/s3m, s3m
@@ -180,7 +252,10 @@ audio/tsp-audio, tsi
audio/tsplayer, tsp audio/tsplayer, tsp
audio/vnd.qcelp, qcp audio/vnd.qcelp, qcp
audio/voxware, vox audio/voxware, vox
audio/x-aiff, aiff|aif
audio/x-flac, flac
audio/x-gsm, gsd|gsm audio/x-gsm, gsd|gsm
audio/x-hx-aac-adts,
audio/x-jam, jam audio/x-jam, jam
audio/x-liveaudio, lam audio/x-liveaudio, lam
audio/x-m4a, m4a audio/x-m4a, m4a
@@ -194,17 +269,24 @@ audio/x-nspaudio, lma
audio/x-pn-realaudio, ram|rm|rmm|rmp audio/x-pn-realaudio, ram|rm|rmm|rmp
audio/x-psid, sid audio/x-psid, sid
audio/x-realaudio, ra audio/x-realaudio, ra
audio/x-s3m,
audio/x-twinvq-plugin, vqe|vql audio/x-twinvq-plugin, vqe|vql
audio/x-twinvq, vqf audio/x-twinvq, vqf
audio/x-voc, voc audio/x-voc, voc
audio/x-wav, wav audio/x-wav, wav
!audio/x-xbox360-executable, xex
!audio/x-xbox-executable, xbe
font/otf, font/otf,
font/sfnt, font/sfnt,
font/woff2, woff2
font/woff, woff
image/bmp,
image/cmu-raster, rast image/cmu-raster, rast
image/fif, fif image/fif, fif
image/florian, flo|turbot image/florian, flo|turbot
image/g3fax, g3 image/g3fax, g3
image/gif, gif image/gif, gif
image/heic, heic
image/ief, ief|iefs image/ief, ief|iefs
image/jpeg, jfif|jfif-tbnl|jpe|jpeg|jpg image/jpeg, jfif|jfif-tbnl|jpe|jpeg|jpg
image/jutvision, jut image/jutvision, jut
@@ -213,6 +295,9 @@ image/pict, pic|pict
image/png, png|x-png image/png, png|x-png
!image/svg, svg !image/svg, svg
!image/svg+xml, !image/svg+xml,
image/tiff,
!image/vnd.adobe.photoshop, psd
!image/vnd.djvu, djvu
image/vnd.fpx, fpx image/vnd.fpx, fpx
image/vnd.microsoft.icon, image/vnd.microsoft.icon,
image/vnd.rn-realflash, rf image/vnd.rn-realflash, rf
@@ -220,9 +305,15 @@ image/vnd.rn-realpix, rp
image/vnd.wap.wbmp, wbmp image/vnd.wap.wbmp, wbmp
image/vnd.xiff, xif image/vnd.xiff, xif
image/webp, webp image/webp, webp
image/wmf,
image/x-3ds, 3ds
image/x-award-bioslogo,
image/x-cmu-raster, ras image/x-cmu-raster, ras
image/x-cur, tga
image/x-dwg, dwg|dxf|svf image/x-dwg, dwg|dxf|svf
image/x-eps, image/x-eps,
image/x-exr, exr
image/x-gem,
image/x-icns, image/x-icns,
!image/x-icon, ico !image/x-icon, ico
image/x-jg, art image/x-jg, art
@@ -236,34 +327,33 @@ image/x-portable-graymap, pgm
image/x-portable-pixmap, ppm image/x-portable-pixmap, ppm
image/x-quicktime, qif|qti|qtif image/x-quicktime, qif|qti|qtif
image/x-rgb, rgb image/x-rgb, rgb
image/x-tga,
image/x-tiff, tif|tiff image/x-tiff, tif|tiff
image/tiff, image/x-win-bitmap,
!image/x-xcf, xcf !image/x-xcf, xcf
!image/x-xpixmap, xpm !image/x-xpixmap, xpm
image/x-xwindowdump, xwd
message/news,
message/rfc822, mht|mhtml|mime message/rfc822, mht|mhtml|mime
model/vnd.dwf, dwf model/vnd.dwf, dwf
model/vnd.gdl, gdl
model/vnd.gs.gdl, gdsl
model/vrml, wrz model/vrml, wrz
model/x-pov, pov model/x-pov, pov
text/asp, asp text/asp, asp
text/css, css text/css, css
text/x-sass, sass
text/x-scss, scss
text/html, acgi|htm|html|htmls|htx|shtml text/html, acgi|htm|html|htmls|htx|shtml
text/javascript, js text/javascript, js
text/mcf, mcf text/mcf, mcf
text/pascal, pas text/pascal, pas
text/plain, com|cmd|conf|def|g|idc|list|lst|mar|sdml|text|txt|md|groovy|license|properties|desktop|ini|rst|cmake|ipynb|readme|less|lo|go|yml|d|cs|hpp|srt text/PGP,
text/plain, com|cmd|conf|def|g|idc|list|lst|mar|sdml|text|txt|md|groovy|license|properties|desktop|ini|rst|cmake|ipynb|readme|less|lo|go|yml|d|cs|hpp|srt|nfo|sfv|m3u|csv|eml
text/richtext, rt|rtf|rtx text/richtext, rt|rtf|rtx
text/rtf,
text/scriplet, wsc text/scriplet, wsc
text/x-awk, awk
!video/x-jng, jng
video/x-mng, mng
image/x-cur, tga
image/x-xwindowdump, xwd
!image/vnd.adobe.photoshop, psd
text/tab-separated-values, tsv text/tab-separated-values, tsv
text/troff, man|me|ms|roff|t|tr text/troff, man|me|ms|roff|t|tr
text/uri-list, uni|unis|uri|uris text/uri-list, uji|unis|uri|uris
text/vnd.abc, abc text/vnd.abc, abc
text/vnd.fmi.flexstor, flx text/vnd.fmi.flexstor, flx
text/vnd.wap.wmlscript, wmls text/vnd.wap.wmlscript, wmls
@@ -272,6 +362,7 @@ text/webviewhtml, htt
text/x-Algol68, text/x-Algol68,
text/x-asm, asm|s text/x-asm, asm|s
text/x-audiosoft-intra, aip text/x-audiosoft-intra, aip
text/x-awk, awk
text/x-bcpl, text/x-bcpl,
text/x-c, c|cc|h text/x-c, c|cc|h
text/x-c++, cpp|cxx|c++ text/x-c++, cpp|cxx|c++
@@ -286,23 +377,31 @@ text/x-makefile, am|mak
text/xml, xml|pom|iml|plist text/xml, xml|pom|iml|plist
text/x-m, m text/x-m, m
text/x-msdos-batch, bat text/x-msdos-batch, bat
text/x-ms-regedit, reg
text/x-objective-c,
text/x-pascal, p text/x-pascal, p
text/x-perl, pl text/x-perl, pl
text/x-php, php text/x-php, php
text/x-po, po
text/x-python, py text/x-python, py
text/x-ruby, rb text/x-ruby, rb
text/x-sass, sass
text/x-scss, scss
text/x-server-parsed-html, ssi text/x-server-parsed-html, ssi
text/x-setext, etx text/x-setext, etx
text/x-sgml, sgm|sgml text/x-sgml, sgm|sgml
text/x-shellscript, sh text/x-shellscript, sh
text/x-speech, talk text/x-speech, talk
text/x-tcl,
text/x-tex, tex text/x-tex, tex
text/x-uil, uil text/x-uil, uil
text/x-uuencode, uue text/x-uuencode, uue
text/x-vcalendar, vcs text/x-vcalendar, vcs
text/x-vcard, vcf
video/animaflex, afl video/animaflex, afl
video/avi, avi video/avi, avi
video/avs-video, avs video/avs-video, avs
video/MP2T,
video/mp4, mp4 video/mp4, mp4
video/mpeg, m1v|m2v|mpe|mpeg|mpg video/mpeg, m1v|m2v|mpe|mpeg|mpg
video/quicktime, moov|mov|qt video/quicktime, moov|mov|qt
@@ -317,45 +416,35 @@ video/x-atomic3d-feature, fmf
video/x-dl, dl video/x-dl, dl
video/x-dv, dif|dv video/x-dv, dif|dv
video/x-fli, fli video/x-fli, fli
video/x-flv, flv
video/x-isvideo, isu video/x-isvideo, isu
!video/x-jng, jng
video/x-m4v, m4v
video/x-matroska, mkv
video/x-mng, mng
video/x-motion-jpeg, mjpg video/x-motion-jpeg, mjpg
video/x-ms-asf, asf|asx video/x-ms-asf, asf|asx|wmv
video/x-msvideo, divx
video/x-qtc, qtc video/x-qtc, qtc
video/x-sgi-movie, movie|mv video/x-sgi-movie, movie|mv
application/x-7z-compressed, 7z x-epoc/x-sisx-app,
application/vnd.openxmlformats-officedocument.wordprocessingml.document, docx application/x-zstd-dictionary,
text/x-po, po application/vnd.ms-outlook, msg
application/x-rpm, rpm image/x-olympus-orf, orf
application/x-debian-package, deb image/x-nikon-nef, nef
application/vnd.iccprofile, icm image/x-fuji-raf, raf
application/dicom, dcm image/x-panasonic-raw, rw2|raw
image/x-exr, exr image/x-adobe-dng, dng
application/vnd.iccprofile, icm image/x-canon-cr2, cr2
video/x-matroska, mkv image/x-canon-crw, crw
application/x-empty, image/x-dcraw,
model/vnd.gdl, gdl image/x-kodak-dcr, dcr
model/vnd.gs.gdl, gdsl image/x-kodak-k25, k25
font/woff, woff image/x-kodak-kdc, kdc
font/woff2, woff2 image/x-minolta-mrw, mrw
application/epub+zip, epub image/x-pentax-pef, pef
application/x-mobipocket-ebook, mobi image/x-sigma-x3f, xf3
audio/x-flac, flac image/x-sony-arw, arw
application/x-rar, rar image/x-sony-sr2, sr2
video/x-msvideo, divx image/x-sony-srf, srf
video/x-flv, flv image/x-epson-erf, erf
application/x-kdelnk,
text/x-tcl,
application/ogg, ogv
application/vnd.openxmlformats-officedocument.spreadsheetml.sheet, xlsx
application/vnd.ms-cab-compressed, cab
audio/mp4, m4b
!image/vnd.djvu, djvu
application/x-ms-reader, lit
application/CDFV2-corrupt,
text/x-vcard, vcf
application/x-innosetup,
application/winhelp, hlp
image/x-tga,
application/x-wine-extension-ini,
application/x-cbz, cbz
application/x-cbr, cbr
1 application/arj arj
2 application/base64 mme
3 application/binhex hqx
4 application/book boo|book
5 application/CDFV2-corrupt
6 application/CDFV2 sdv
7 application/clariscad ccad
8 application/commonground dp
9 application/csv
10 application/dicom dcm
11 application/drafting drw
12 application/epub+zip epub
13 application/freeloader frl
14 application/futuresplash spl
15 application/groupwise vew
16 application/gzip gz gz|tgz
17 application/hta hta
18 application/i-deas unv
19 application/iges iges|igs
21 application/java-archive jar
22 application/java class
23 application/javascript
application/x-archive a
24 application/json json
25 application/marc mrc
26 application/mbedlet mbd
30 application/netmc mcp
31 application/octet-stream bin|dump|gpg
32 application/oda oda
33 application/ogg ogv
34 application/pdf pdf
35 application/pgp-keys
36 application/pgp-signature pgp
37 application/pkcs7-signature p7s
38 application/pkix-cert cer|crt
48 application/vnd.fdf fdf
49 application/vnd.font-fontforge-sfd sfd
50 application/vnd.hp-hpgl hgl|hpg|hpgl
51 application/vnd.iccprofile icm
52 application/vnd.iccprofile icm
53 application/vnd.lotus-1-2-3
54 application/vnd.ms-cab-compressed cab
55 application/vnd.ms-excel xlb|xlc|xll|xlm|xls|xlw
56 application/vnd.ms-fontobject eot
57 application/vnd.ms-opentype otf
63 application/vnd.oasis.opendocument.base odb
64 application/vnd.oasis.opendocument.formula odf
65 application/vnd.oasis.opendocument.graphics odg
66 application/vnd.oasis.opendocument.presentation odp
67 application/vnd.oasis.opendocument.spreadsheet ods
68 application/vnd.oasis.opendocument.text odt
69 application/vnd.openxmlformats-officedocument.presentationml.presentation pptx
70 application/vnd.openxmlformats-officedocument.spreadsheetml.sheet xlsx
71 application/vnd.openxmlformats-officedocument.wordprocessingml.document docx
72 application/vnd.symbian.install
73 application/vnd.tcpdump.pcap pcap
74 application/vnd.wap.wmlc wmlc
75 application/vnd.wap.wmlscriptc wmlsc
76 application/vnd.xara web
77 application/vocaltec-media-desc vmd
78 application/vocaltec-media-file vmf
79 application/warc warc
80 application/winhelp hlp
81 application/wordperfect6.0 w60
82 application/wordperfect6.1 w61
83 application/wordperfect wp|wp5|wp6|wpd
84 application/x-123 wk1
85 application/x-7z-compressed 7z
86 application/x-aim aim
87 application/x-apple-diskimage
88 application/x-arc
89 application/x-archive a
90 application/x-atari-7800-rom a78
91 application/x-authorware-bin aab
92 application/x-authorware-map aam
93 application/x-authorware-seg aas
94 application/x-avira-qua
95 application/x-bcpio bcpio
96 application/x-bittorrent torrent
97 application/x-bsh bsh
98 application/x-bytecode.python pyc
99 application/x-bzip2 boz|bz2
100 application/x-bzip bz
101 application/x-cbr cbr
102 application/x-cbz cbz
103 application/x-cdlink vcd
104 application/x-chat cha|chat
105 application/x-chrome-extension
106 application/x-cocoa cco
107 application/x-conference nsc
108 application/x-coredump
109 application/x-cpio cpio
110 application/x-dbf dbf
111 application/x-dbt
112 application/x-debian-package deb
113 application/x-deepv deepv
114 application/x-director dcr|dir|dxr dir|dxr
115 application/x-dmp dmp
116 application/x-dosdriver
117 application/x-dosexec dll
118 application/x-dvi dvi
119 application/x-elc elc
120 application/x-empty
121 application/x-envoy env|evy
122 application/x-esrehber es
123 application/x-excel xla|xld|xlk|xlt|xlv
124 application/x-executable exe
125 application/x-font-gdos
126 application/x-font-pf2 pf2
127 application/x-font-pfm pfm
128 application/x-font-sfn
129 application/x-font-ttf ttf ttf|ttc
130 application/x-fptapplication/x-dbt
131 application/x-freelance pre
132 application/x-gamecube-rom
133 application/x-gdbm
134 application/x-gettext-translation
135 application/x-git
136 application/x-gsp gsp
137 application/x-gss gss
141 application/x-helpfile help
142 application/x-httpd-imap imap
143 application/x-ima ima
144 application/x-innosetup
145 application/x-internett-signup ins
146 application/x-inventor iv
147 application/x-ip2 ip
148 application/x-java-applet
149 application/x-java-commerce jcm
150 application/x-java-image
151 application/x-java-jmod jmod
152 application/x-java-keystore
153 application/x-kdelnk
154 application/x-koan skd|skm|skp|skt
155 application/x-latex latex|ltx
156 application/x-livescreen ivy
157 application/x-lotus wq1
158 application/x-lz4+json jsonlz4
159 application/x-lz4 lz4
160 application/x-lz4 lz4
161 application/x-lzh-compressed
162 application/x-lzh lzh
163 application/x-lzip lz
164 application/x-lzma lzma
165 application/x-lzop lzo
166 application/x-lzx lzx
167 application/x-mach-binary jnilib|dylib
168 application/x-mach-executable
169 application/x-magic-cap-package-1.0 mc$
170 application/x-mathcad mcd
171 application/x-maxis-dbpf
172 application/x-meme mm
173 application/x-midi midi
174 application/x-mif mif
175 application/x-mix-transfer nix
176 application/xml opf
177 application/x-mobipocket-ebook mobi
178 application/vnd.amazon.mobi8-ebook azw|azw3
179 application/x-msaccess accdb
180 application/x-ms-compress-szdd fon
181 application/x-ms-pdb pdb
182 application/x-ms-reader lit
183 application/x-n64-rom z64
184 application/x-navi-animation ani
185 application/x-navidoc nvd
186 application/x-navimap map
187 application/x-navistyle stl
188 application/x-nes-rom nes
189 application/x-netcdf cdf|nc
190 application/x-newton-compatible-pkg pkg
191 application/x-nintendo-ds-rom
192 application/x-object o
193 application/x-omcdatamaker omcd
194 application/x-omc omc
195 application/x-omcregerator omcr
196 application/x-pagemaker pm4|pm5
197 application/x-pcl pcl
198 application/x-pgp-keyring
199 application/x-pixclscript plx
200 application/x-pkcs7-certreqresp p7r
201 application/x-pkcs7-signature p7a
202 application/x-project mpc|mpt|mpv|mpx
203 application/x-qpro wb1
204 application/x-rar rar
205 application/x-rpm rpm
206 application/x-sdp sdp
207 application/x-sea sea
208 application/x-seelogo sl
210 application/x-sharedlib so
211 application/x-shar shar
212 application/x-shockwave-flash swf
213 application/x-snappy-framed
214 application/x-sprite spr|sprite
215 application/x-sqlite3
216 application/x-stargallery-thm
217 application/x-stuffit sit
218 application/x-sv4cpio sv4cpio
219 application/x-sv4crc sv4crc
220 application/x-tar tar
221 application/x-tbook sbk|tbk
222 application/x-terminfo
223 application/x-terminfo2
224 application/x-texinfo texi|texinfo
225 application/x-tex-tfm tfm
226 application/x-ustar ustar
229 application/x-vnd.ls-xpix xpix
230 application/x-vrml vrml
231 application/x-wais-source src|wsrc
232 application/x-wine-extension-ini
233 application/x-wintalk wtk
234 application/x-world svr
235 application/x-wri wri
236 application/x-x509-ca-cert der
237 application/x-xz xz
238 application/x-zip
239 application/x-zstd zst
240 application/zip zip
241 application/zlib z
242 !audio/basic au
243 audio/it it
244 audio/make funk|my|pfunk
245 audio/midi kar
246 audio/mid rmi
247 audio/mp4 m4b
248 audio/mpeg m2a|mpa
249 audio/ogg ogg
250 audio/s3m s3m
252 audio/tsplayer tsp
253 audio/vnd.qcelp qcp
254 audio/voxware vox
255 audio/x-aiff aiff|aif
256 audio/x-flac flac
257 audio/x-gsm gsd|gsm
258 audio/x-hx-aac-adts
259 audio/x-jam jam
260 audio/x-liveaudio lam
261 audio/x-m4a m4a
269 audio/x-pn-realaudio ram|rm|rmm|rmp
270 audio/x-psid sid
271 audio/x-realaudio ra
272 audio/x-s3m
273 audio/x-twinvq-plugin vqe|vql
274 audio/x-twinvq vqf
275 audio/x-voc voc
276 audio/x-wav wav
277 !audio/x-xbox360-executable xex
278 !audio/x-xbox-executable xbe
279 font/otf
280 font/sfnt
281 font/woff2 woff2
282 font/woff woff
283 image/bmp
284 image/cmu-raster rast
285 image/fif fif
286 image/florian flo|turbot
287 image/g3fax g3
288 image/gif gif
289 image/heic heic
290 image/ief ief|iefs
291 image/jpeg jfif|jfif-tbnl|jpe|jpeg|jpg
292 image/jutvision jut
295 image/png png|x-png
296 !image/svg svg
297 !image/svg+xml
298 image/tiff
299 !image/vnd.adobe.photoshop psd
300 !image/vnd.djvu djvu
301 image/vnd.fpx fpx
302 image/vnd.microsoft.icon
303 image/vnd.rn-realflash rf
305 image/vnd.wap.wbmp wbmp
306 image/vnd.xiff xif
307 image/webp webp
308 image/wmf
309 image/x-3ds 3ds
310 image/x-award-bioslogo
311 image/x-cmu-raster ras
312 image/x-cur tga
313 image/x-dwg dwg|dxf|svf
314 image/x-eps
315 image/x-exr exr
316 image/x-gem
317 image/x-icns
318 !image/x-icon ico
319 image/x-jg art
327 image/x-portable-pixmap ppm
328 image/x-quicktime qif|qti|qtif
329 image/x-rgb rgb
330 image/x-tga
331 image/x-tiff tif|tiff
332 image/tiff image/x-win-bitmap
333 !image/x-xcf xcf
334 !image/x-xpixmap xpm
335 image/x-xwindowdump xwd
336 message/news
337 message/rfc822 mht|mhtml|mime
338 model/vnd.dwf dwf
339 model/vnd.gdl gdl
340 model/vnd.gs.gdl gdsl
341 model/vrml wrz
342 model/x-pov pov
343 text/asp asp
344 text/css css
text/x-sass sass
text/x-scss scss
345 text/html acgi|htm|html|htmls|htx|shtml
346 text/javascript js
347 text/mcf mcf
348 text/pascal pas
349 text/plain text/PGP com|cmd|conf|def|g|idc|list|lst|mar|sdml|text|txt|md|groovy|license|properties|desktop|ini|rst|cmake|ipynb|readme|less|lo|go|yml|d|cs|hpp|srt
350 text/plain com|cmd|conf|def|g|idc|list|lst|mar|sdml|text|txt|md|groovy|license|properties|desktop|ini|rst|cmake|ipynb|readme|less|lo|go|yml|d|cs|hpp|srt|nfo|sfv|m3u|csv|eml
351 text/richtext rt|rtf|rtx
352 text/rtf
353 text/scriplet wsc
text/x-awk awk
!video/x-jng jng
video/x-mng mng
image/x-cur tga
image/x-xwindowdump xwd
!image/vnd.adobe.photoshop psd
354 text/tab-separated-values tsv
355 text/troff man|me|ms|roff|t|tr
356 text/uri-list uni|unis|uri|uris uji|unis|uri|uris
357 text/vnd.abc abc
358 text/vnd.fmi.flexstor flx
359 text/vnd.wap.wmlscript wmls
362 text/x-Algol68
363 text/x-asm asm|s
364 text/x-audiosoft-intra aip
365 text/x-awk awk
366 text/x-bcpl
367 text/x-c c|cc|h
368 text/x-c++ cpp|cxx|c++
377 text/xml xml|pom|iml|plist
378 text/x-m m
379 text/x-msdos-batch bat
380 text/x-ms-regedit reg
381 text/x-objective-c
382 text/x-pascal p
383 text/x-perl pl
384 text/x-php php
385 text/x-po po
386 text/x-python py
387 text/x-ruby rb
388 text/x-sass sass
389 text/x-scss scss
390 text/x-server-parsed-html ssi
391 text/x-setext etx
392 text/x-sgml sgm|sgml
393 text/x-shellscript sh
394 text/x-speech talk
395 text/x-tcl
396 text/x-tex tex
397 text/x-uil uil
398 text/x-uuencode uue
399 text/x-vcalendar vcs
400 text/x-vcard vcf
401 video/animaflex afl
402 video/avi avi
403 video/avs-video avs
404 video/MP2T
405 video/mp4 mp4
406 video/mpeg m1v|m2v|mpe|mpeg|mpg
407 video/quicktime moov|mov|qt
416 video/x-dl dl
417 video/x-dv dif|dv
418 video/x-fli fli
419 video/x-flv flv
420 video/x-isvideo isu
421 !video/x-jng jng
422 video/x-m4v m4v
423 video/x-matroska mkv
424 video/x-mng mng
425 video/x-motion-jpeg mjpg
426 video/x-ms-asf asf|asx asf|asx|wmv
427 video/x-msvideo divx
428 video/x-qtc qtc
429 video/x-sgi-movie movie|mv
430 application/x-7z-compressed x-epoc/x-sisx-app 7z
431 application/vnd.openxmlformats-officedocument.wordprocessingml.document application/x-zstd-dictionary docx
432 text/x-po application/vnd.ms-outlook po msg
433 application/x-rpm image/x-olympus-orf rpm orf
434 application/x-debian-package image/x-nikon-nef deb nef
435 application/vnd.iccprofile image/x-fuji-raf icm raf
436 application/dicom image/x-panasonic-raw dcm rw2|raw
437 image/x-exr image/x-adobe-dng exr dng
438 application/vnd.iccprofile image/x-canon-cr2 icm cr2
439 video/x-matroska image/x-canon-crw mkv crw
440 application/x-empty image/x-dcraw
441 model/vnd.gdl image/x-kodak-dcr gdl dcr
442 model/vnd.gs.gdl image/x-kodak-k25 gdsl k25
443 font/woff image/x-kodak-kdc woff kdc
444 font/woff2 image/x-minolta-mrw woff2 mrw
445 application/epub+zip image/x-pentax-pef epub pef
446 application/x-mobipocket-ebook image/x-sigma-x3f mobi xf3
447 audio/x-flac image/x-sony-arw flac arw
448 application/x-rar image/x-sony-sr2 rar sr2
449 video/x-msvideo image/x-sony-srf divx srf
450 video/x-flv image/x-epson-erf flv erf
application/x-kdelnk
text/x-tcl
application/ogg ogv
application/vnd.openxmlformats-officedocument.spreadsheetml.sheet xlsx
application/vnd.ms-cab-compressed cab
audio/mp4 m4b
!image/vnd.djvu djvu
application/x-ms-reader lit
application/CDFV2-corrupt
text/x-vcard vcf
application/x-innosetup
application/winhelp hlp
image/x-tga
application/x-wine-extension-ini
application/x-cbz cbz
application/x-cbr cbr

View File

@@ -12,7 +12,8 @@ major_mime = {
"audio": 7, "audio": 7,
"image": 8, "image": 8,
"text": 9, "text": 9,
"application": 10 "application": 10,
"x-epoc": 11,
} }
pdf = ( pdf = (
@@ -24,6 +25,7 @@ pdf = (
font = ( font = (
"application/vnd.ms-opentype", "application/vnd.ms-opentype",
"application/x-ms-compress-szdd"
"application/x-font-sfn", "application/x-font-sfn",
"application/x-font-ttf", "application/x-font-ttf",
"font/otf", "font/otf",
@@ -32,6 +34,68 @@ font = (
"font/woff2" "font/woff2"
) )
# Archive "formats"
archive = (
"application/x-tar",
"application/zip",
"application/x-rar",
"application/x-arc",
"application/x-warc",
"application/x-7z-compressed",
)
# Archive "filters"
arc_filter = (
"application/gzip",
"application/x-bzip2",
"application/x-xz",
"application/x-zstd",
"application/x-lzma",
"application/x-lz4",
"application/x-lzip",
"application/x-lzop",
)
doc = (
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
"application/vnd.openxmlformats-officedocument.presentationml.presentation"
)
mobi = (
"application/x-mobipocket-ebook",
"application/vnd.amazon.mobi8-ebook"
)
markup = (
"text/xml",
"text/html",
"text/x-sgml"
)
raw = (
"image/x-olympus-orf",
"image/x-nikon-nef",
"image/x-fuji-raf",
"image/x-panasonic-raw",
"image/x-adobe-dng",
"image/x-canon-cr2",
"image/x-canon-crw",
"image/x-dcraw",
"image/x-kodak-dcr",
"image/x-kodak-k25",
"image/x-kodak-kdc",
"image/x-minolta-mrw",
"image/x-pentax-pef",
"image/x-sigma-x3f",
"image/x-sony-arw",
"image/x-sony-sr2",
"image/x-sony-srf",
"image/x-minolta-mrw",
"image/x-pentax-pef",
"image/x-epson-erf",
)
cnt = 1 cnt = 1
@@ -46,6 +110,18 @@ def mime_id(mime):
mime_id += " | 0x40000000" mime_id += " | 0x40000000"
elif mime in font: elif mime in font:
mime_id += " | 0x20000000" mime_id += " | 0x20000000"
elif mime in archive:
mime_id += " | 0x10000000"
elif mime in arc_filter:
mime_id += " | 0x08000000"
elif mime in doc:
mime_id += " | 0x04000000"
elif mime in mobi:
mime_id += " | 0x02000000"
elif mime in markup:
mime_id += " | 0x01000000"
elif mime in raw:
mime_id += " | 0x00800000"
elif mime == "application/x-empty": elif mime == "application/x-empty":
return "1" return "1"
return mime_id return mime_id
@@ -55,7 +131,7 @@ def clean(t):
return t.replace("/", "_").replace(".", "_").replace("+", "_").replace("-", "_") return t.replace("/", "_").replace(".", "_").replace("+", "_").replace("-", "_")
with open("mime.csv") as f: with open("scripts/mime.csv") as f:
for l in f: for l in f:
mime, ext_list = l.split(",") mime, ext_list = l.split(",")
if l.startswith("!"): if l.startswith("!"):
@@ -67,7 +143,7 @@ with open("mime.csv") as f:
print("// **Generated by mime.py**") print("// **Generated by mime.py**")
print("#ifndef MIME_GENERATED_C") print("#ifndef MIME_GENERATED_C")
print("#define MIME_GENERATED_C") print("#define MIME_GENERATED_C")
print("#include <glib-2.0/glib.h>\n") print("#include <glib.h>\n")
print("#include <stdlib.h>\n") print("#include <stdlib.h>\n")
# Enum # Enum
print("enum mime {") print("enum mime {")

View File

@@ -1,9 +1,12 @@
files = [ files = [
"web/css/bundle.css", "src/static/css/bundle.css",
"web/js/bundle.js", "src/static/css/bundle_dark.css",
"web/img/bg-bars.png", "src/static/js/bundle.js",
"web/img/sprite-skin-flat.png", "src/static/js/search.js",
"web/search.html", "src/static/img/sprite-skin-flat.png",
"src/static/img/sprite-skin-flat-dark.png",
"src/static/search.html",
"src/static/stats.html",
] ]

279
src/cli.c
View File

@@ -1,22 +1,69 @@
#include "cli.h" #include "cli.h"
#include "ctx.h"
#include <tesseract/capi.h>
#define DEFAULT_OUTPUT "index.sist2/" #define DEFAULT_OUTPUT "index.sist2/"
#define DEFAULT_CONTENT_SIZE 4096 #define DEFAULT_CONTENT_SIZE 32768
#define DEFAULT_QUALITY 15 #define DEFAULT_QUALITY 5
#define DEFAULT_SIZE 200 #define DEFAULT_SIZE 500
#define DEFAULT_REWRITE_URL "" #define DEFAULT_REWRITE_URL ""
#define DEFAULT_ES_URL "http://localhost:9200" #define DEFAULT_ES_URL "http://localhost:9200"
#define DEFAULT_BATCH_SIZE 100
#define DEFAULT_BIND_ADDR "localhost" #define DEFAULT_LISTEN_ADDRESS "localhost:4090"
#define DEFAULT_PORT "4090" #define DEFAULT_TREEMAP_THRESHOLD 0.0005
#define DEFAULT_MAX_MEM_BUFFER 2000
const char *TESS_DATAPATHS[] = {
"/usr/share/tessdata/",
"/usr/share/tesseract-ocr/tessdata/",
"./",
NULL
};
scan_args_t *scan_args_create() { scan_args_t *scan_args_create() {
scan_args_t *args = calloc(sizeof(scan_args_t), 1); scan_args_t *args = calloc(sizeof(scan_args_t), 1);
args->depth = -1;
return args; return args;
} }
exec_args_t *exec_args_create() {
exec_args_t *args = calloc(sizeof(exec_args_t), 1);
return args;
}
void scan_args_destroy(scan_args_t *args) {
if (args->name != NULL) {
free(args->name);
}
if (args->path != NULL) {
free(args->path);
}
if (args->output != NULL) {
free(args->output);
}
free(args);
}
void index_args_destroy(index_args_t *args) {
//todo
free(args);
}
void web_args_destroy(web_args_t *args) {
//todo
free(args);
}
void exec_args_destroy(exec_args_t *args) {
free(args);
}
int scan_args_validate(scan_args_t *args, int argc, const char **argv) { int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
if (argc < 2) { if (argc < 2) {
fprintf(stderr, "Required positional argument: PATH.\n"); fprintf(stderr, "Required positional argument: PATH.\n");
@@ -25,7 +72,7 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
char *abs_path = abspath(argv[1]); char *abs_path = abspath(argv[1]);
if (abs_path == NULL) { if (abs_path == NULL) {
fprintf(stderr, "File not found: %s", argv[1]); fprintf(stderr, "File not found: %s\n", argv[1]);
return 1; return 1;
} else { } else {
args->path = abs_path; args->path = abs_path;
@@ -34,8 +81,8 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
if (args->incremental != NULL) { if (args->incremental != NULL) {
abs_path = abspath(args->incremental); abs_path = abspath(args->incremental);
if (abs_path == NULL) { if (abs_path == NULL) {
fprintf(stderr, "File not found: %s", args->incremental); sist_log("main.c", SIST_WARNING, "Could not open original index! Disabled incremental scan feature.");
return 1; args->incremental = NULL;
} }
} }
@@ -48,16 +95,13 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
if (args->size == 0) { if (args->size == 0) {
args->size = DEFAULT_SIZE; args->size = DEFAULT_SIZE;
} else if (args->size <= 0) { } else if (args->size > 0 && args->size < 32) {
fprintf(stderr, "Invalid size: %d\n", args->size); printf("Invalid size: %d\n", args->content_size);
return 1; return 1;
} }
if (args->content_size == 0) { if (args->content_size == 0) {
args->content_size = DEFAULT_CONTENT_SIZE; args->content_size = DEFAULT_CONTENT_SIZE;
} else if (args->content_size <= 0) {
fprintf(stderr, "Invalid content-size: %d\n", args->content_size);
return 1;
} }
if (args->threads == 0) { if (args->threads == 0) {
@@ -80,6 +124,12 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
return 1; return 1;
} }
if (args->depth <= 0) {
args->depth = G_MAXINT32;
} else {
args->depth += 1;
}
if (args->name == NULL) { if (args->name == NULL) {
args->name = g_path_get_basename(args->output); args->name = g_path_get_basename(args->output);
} }
@@ -87,12 +137,124 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
if (args->rewrite_url == NULL) { if (args->rewrite_url == NULL) {
args->rewrite_url = DEFAULT_REWRITE_URL; args->rewrite_url = DEFAULT_REWRITE_URL;
} }
if (args->archive == NULL || strcmp(args->archive, "recurse") == 0) {
args->archive_mode = ARC_MODE_RECURSE;
} else if (strcmp(args->archive, "list") == 0) {
args->archive_mode = ARC_MODE_LIST;
} else if (strcmp(args->archive, "shallow") == 0) {
args->archive_mode = ARC_MODE_SHALLOW;
} else if (strcmp(args->archive, "skip") == 0) {
args->archive_mode = ARC_MODE_SKIP;
} else {
fprintf(stderr, "Archive mode must be one of (skip, list, shallow, recurse), got '%s'", args->archive);
return 1;
}
if (args->tesseract_lang != NULL) {
TessBaseAPI *api = TessBaseAPICreate();
char filename[128];
sprintf(filename, "%s.traineddata", args->tesseract_lang);
const char *path = find_file_in_paths(TESS_DATAPATHS, filename);
if (path == NULL) {
LOG_FATAL("cli.c", "Could not find tesseract language file!");
}
ret = TessBaseAPIInit3(api, path, args->tesseract_lang);
if (ret != 0) {
fprintf(stderr, "Could not initialize tesseract with lang '%s'\n", args->tesseract_lang);
return 1;
}
TessBaseAPIEnd(api);
TessBaseAPIDelete(api);
args->tesseract_path = path;
}
if (args->exclude_regex != NULL) {
const char *error;
int error_offset;
pcre *re = pcre_compile(args->exclude_regex, 0, &error, &error_offset, 0);
if (error != NULL) {
LOG_FATALF("cli.c", "pcre_compile returned error: %s (offset:%d)", error, error_offset)
}
pcre_extra *re_extra = pcre_study(re, 0, &error);
if (error != NULL) {
LOG_FATALF("cli.c", "pcre_study returned error: %s", error)
}
ScanCtx.exclude = re;
ScanCtx.exclude_extra = re_extra;
} else {
ScanCtx.exclude = NULL;
}
if (args->treemap_threshold_str == 0) {
args->treemap_threshold = DEFAULT_TREEMAP_THRESHOLD;
} else {
args->treemap_threshold = atof(args->treemap_threshold_str);
}
if (args->max_memory_buffer == 0) {
args->max_memory_buffer = DEFAULT_MAX_MEM_BUFFER;
}
LOG_DEBUGF("cli.c", "arg quality=%f", args->quality)
LOG_DEBUGF("cli.c", "arg size=%d", args->size)
LOG_DEBUGF("cli.c", "arg content_size=%d", args->content_size)
LOG_DEBUGF("cli.c", "arg threads=%d", args->threads)
LOG_DEBUGF("cli.c", "arg incremental=%s", args->incremental)
LOG_DEBUGF("cli.c", "arg output=%s", args->output)
LOG_DEBUGF("cli.c", "arg rewrite_url=%s", args->rewrite_url)
LOG_DEBUGF("cli.c", "arg name=%s", args->name)
LOG_DEBUGF("cli.c", "arg depth=%d", args->depth)
LOG_DEBUGF("cli.c", "arg path=%s", args->path)
LOG_DEBUGF("cli.c", "arg archive=%s", args->archive)
LOG_DEBUGF("cli.c", "arg tesseract_lang=%s", args->tesseract_lang)
LOG_DEBUGF("cli.c", "arg tesseract_path=%s", args->tesseract_path)
LOG_DEBUGF("cli.c", "arg exclude=%s", args->exclude_regex)
LOG_DEBUGF("cli.c", "arg fast=%d", args->fast)
LOG_DEBUGF("cli.c", "arg treemap_threshold=%f", args->treemap_threshold)
LOG_DEBUGF("cli.c", "arg max_memory_buffer=%d", args->max_memory_buffer)
return 0;
}
int load_script(const char *script_path, char **dst) {
struct stat info;
int res = stat(script_path, &info);
if (res == -1) {
fprintf(stderr, "Error opening script file '%s': %s\n", script_path, strerror(errno));
return 1;
}
int fd = open(script_path, O_RDONLY);
if (fd == -1) {
fprintf(stderr, "Error opening script file '%s': %s\n", script_path, strerror(errno));
return 1;
}
*dst = malloc(info.st_size + 1);
res = read(fd, *dst, info.st_size);
if (res < 0) {
fprintf(stderr, "Error reading script file '%s': %s\n", script_path, strerror(errno));
return 1;
}
*(*dst + info.st_size) = '\0';
close(fd);
return 0; return 0;
} }
#ifndef SIST_SCAN_ONLY
int index_args_validate(index_args_t *args, int argc, const char **argv) { int index_args_validate(index_args_t *args, int argc, const char **argv) {
LogCtx.verbose = 1;
if (argc < 2) { if (argc < 2) {
fprintf(stderr, "Required positional argument: PATH.\n"); fprintf(stderr, "Required positional argument: PATH.\n");
return 1; return 1;
@@ -100,20 +262,42 @@ int index_args_validate(index_args_t *args, int argc, const char **argv) {
char *index_path = abspath(argv[1]); char *index_path = abspath(argv[1]);
if (index_path == NULL) { if (index_path == NULL) {
fprintf(stderr, "File not found: %s", argv[1]); fprintf(stderr, "File not found: %s\n", argv[1]);
return 1; return 1;
} else { } else {
args->index_path = argv[1]; args->index_path = argv[1];
free(index_path);
} }
if (args->es_url == NULL) { if (args->es_url == NULL) {
args->es_url = DEFAULT_ES_URL; args->es_url = DEFAULT_ES_URL;
} }
if (args->script_path != NULL) {
if (load_script(args->script_path, &args->script) != 0) {
return 1;
}
}
if (args->batch_size == 0) {
args->batch_size = DEFAULT_BATCH_SIZE;
}
LOG_DEBUGF("cli.c", "arg es_url=%s", args->es_url)
LOG_DEBUGF("cli.c", "arg index_path=%s", args->index_path)
LOG_DEBUGF("cli.c", "arg script_path=%s", args->script_path)
LOG_DEBUGF("cli.c", "arg script=%s", args->script)
LOG_DEBUGF("cli.c", "arg print=%d", args->print)
LOG_DEBUGF("cli.c", "arg batch_size=%d", args->batch_size)
LOG_DEBUGF("cli.c", "arg force_reset=%d", args->force_reset)
return 0; return 0;
} }
int web_args_validate(web_args_t *args, int argc, const char **argv) { int web_args_validate(web_args_t *args, int argc, const char **argv) {
LogCtx.verbose = 1;
if (argc < 2) { if (argc < 2) {
fprintf(stderr, "Required positional argument: PATH.\n"); fprintf(stderr, "Required positional argument: PATH.\n");
return 1; return 1;
@@ -123,12 +307,28 @@ int web_args_validate(web_args_t *args, int argc, const char **argv) {
args->es_url = DEFAULT_ES_URL; args->es_url = DEFAULT_ES_URL;
} }
if (args->bind == NULL) { if (args->listen_address == NULL) {
args->bind = DEFAULT_BIND_ADDR; args->listen_address = DEFAULT_LISTEN_ADDRESS;
} }
if (args->port == NULL) { if (args->credentials != NULL) {
args->port = DEFAULT_PORT; char *ptr = strstr(args->credentials, ":");
if (ptr == NULL) {
fprintf(stderr, "Invalid --auth format, see usage\n");
return 1;
}
strncpy(args->auth_user, args->credentials, (ptr - args->credentials));
strncpy(args->auth_pass, ptr + 1, strlen(ptr + 1));
if (strlen(args->auth_user) == 0) {
fprintf(stderr, "--auth username must be at least one character long");
return 1;
}
args->auth_enabled = TRUE;
} else {
args->auth_enabled = FALSE;
} }
args->index_count = argc - 1; args->index_count = argc - 1;
@@ -137,10 +337,21 @@ int web_args_validate(web_args_t *args, int argc, const char **argv) {
for (int i = 0; i < args->index_count; i++) { for (int i = 0; i < args->index_count; i++) {
char *abs_path = abspath(args->indices[i]); char *abs_path = abspath(args->indices[i]);
if (abs_path == NULL) { if (abs_path == NULL) {
fprintf(stderr, "File not found: %s", abs_path); fprintf(stderr, "File not found: %s\n", args->indices[i]);
return 1; return 1;
} }
} }
LOG_DEBUGF("cli.c", "arg es_url=%s", args->es_url)
LOG_DEBUGF("cli.c", "arg listen=%s", args->listen_address)
LOG_DEBUGF("cli.c", "arg credentials=%s", args->credentials)
LOG_DEBUGF("cli.c", "arg auth_user=%s", args->auth_user)
LOG_DEBUGF("cli.c", "arg auth_pass=%s", args->auth_pass)
LOG_DEBUGF("cli.c", "arg index_count=%d", args->index_count)
for (int i = 0; i < args->index_count; i++) {
LOG_DEBUGF("cli.c", "arg indices[%d]=%s", i, args->indices[i])
}
return 0; return 0;
} }
@@ -153,5 +364,31 @@ web_args_t *web_args_create() {
web_args_t *args = calloc(sizeof(web_args_t), 1); web_args_t *args = calloc(sizeof(web_args_t), 1);
return args; return args;
} }
#endif
int exec_args_validate(exec_args_t *args, int argc, const char **argv) {
char *index_path = abspath(argv[1]);
if (index_path == NULL) {
fprintf(stderr, "File not found: %s\n", argv[1]);
return 1;
} else {
args->index_path = argv[1];
free(index_path);
}
if (args->es_url == NULL) {
args->es_url = DEFAULT_ES_URL;
}
if (args->script_path == NULL) {
LOG_FATAL("cli.c", "--script-file argument is required");
}
if (load_script(args->script_path, &args->script) != 0) {
return 1;
}
LOG_DEBUGF("cli.c", "arg script_path=%s", args->script_path)
LOG_DEBUGF("cli.c", "arg script=%s", args->script)
return 0;
}

View File

@@ -3,6 +3,8 @@
#include "sist.h" #include "sist.h"
#include "libscan/arc/arc.h"
typedef struct scan_args { typedef struct scan_args {
float quality; float quality;
int size; int size;
@@ -12,33 +14,69 @@ typedef struct scan_args {
char *output; char *output;
char *rewrite_url; char *rewrite_url;
char *name; char *name;
int depth;
char *path; char *path;
char *archive;
archive_mode_t archive_mode;
char *tesseract_lang;
const char *tesseract_path;
char *exclude_regex;
int fast;
const char* treemap_threshold_str;
double treemap_threshold;
int max_memory_buffer;
} scan_args_t; } scan_args_t;
scan_args_t *scan_args_create(); scan_args_t *scan_args_create();
void scan_args_destroy(scan_args_t *args);
int scan_args_validate(scan_args_t *args, int argc, const char **argv); int scan_args_validate(scan_args_t *args, int argc, const char **argv);
#ifndef SIST_SCAN_ONLY
typedef struct index_args { typedef struct index_args {
char *es_url; char *es_url;
const char *index_path; const char *index_path;
const char *script_path;
char *script;
int print; int print;
int batch_size;
int force_reset; int force_reset;
} index_args_t; } index_args_t;
typedef struct web_args { typedef struct web_args {
char *es_url; char *es_url;
char *bind; char *listen_address;
char *port; char *credentials;
char auth_user[256];
char auth_pass[256];
int auth_enabled;
int index_count; int index_count;
const char **indices; const char **indices;
} web_args_t; } web_args_t;
typedef struct exec_args {
char *es_url;
const char *index_path;
const char *script_path;
char *script;
} exec_args_t;
index_args_t *index_args_create(); index_args_t *index_args_create();
void index_args_destroy(index_args_t *args);
web_args_t *web_args_create(); web_args_t *web_args_create();
void web_args_destroy(web_args_t *args);
int index_args_validate(index_args_t *args, int argc, const char **argv); int index_args_validate(index_args_t *args, int argc, const char **argv);
int web_args_validate(web_args_t *args, int argc, const char **argv); int web_args_validate(web_args_t *args, int argc, const char **argv);
#endif
exec_args_t *exec_args_create();
void exec_args_destroy(exec_args_t *args);
int exec_args_validate(exec_args_t *args, int argc, const char **argv);
#endif #endif

6
src/ctx.c Normal file
View File

@@ -0,0 +1,6 @@
#include "ctx.h"
ScanCtx_t ScanCtx;
WebCtx_t WebCtx;
IndexCtx_t IndexCtx;
LogCtx_t LogCtx;

View File

@@ -2,8 +2,22 @@
#define SIST2_CTX_H #define SIST2_CTX_H
#include "sist.h" #include "sist.h"
#include "tpool.h"
#include "libscan/scan.h"
#include "libscan/arc/arc.h"
#include "libscan/cbr/cbr.h"
#include "libscan/ebook/ebook.h"
#include "libscan/font/font.h"
#include "libscan/media/media.h"
#include "libscan/ooxml/ooxml.h"
#include "libscan/text/text.h"
#include "libscan/mobi/scan_mobi.h"
#include "libscan/raw/raw.h"
struct { #include <glib.h>
#include <pcre.h>
typedef struct {
struct index_t index; struct index_t index;
GHashTable *mime_table; GHashTable *mime_table;
@@ -11,10 +25,8 @@ struct {
tpool_t *pool; tpool_t *pool;
int tn_size;
int threads; int threads;
int content_size; int depth;
float tn_qscale;
size_t stat_tn_size; size_t stat_tn_size;
size_t stat_index_size; size_t stat_index_size;
@@ -22,21 +34,45 @@ struct {
GHashTable *original_table; GHashTable *original_table;
GHashTable *copy_table; GHashTable *copy_table;
pthread_mutex_t mupdf_mu; pcre *exclude;
} ScanCtx; pcre_extra *exclude_extra;
int fast;
scan_arc_ctx_t arc_ctx;
scan_cbr_ctx_t cbr_ctx;
scan_ebook_ctx_t ebook_ctx;
scan_font_ctx_t font_ctx;
scan_media_ctx_t media_ctx;
scan_ooxml_ctx_t ooxml_ctx;
scan_text_ctx_t text_ctx;
scan_mobi_ctx_t mobi_ctx;
scan_raw_ctx_t raw_ctx;
} ScanCtx_t;
#ifndef SIST_SCAN_ONLY typedef struct {
struct { int verbose;
int very_verbose;
int no_color;
} LogCtx_t;
typedef struct {
char *es_url; char *es_url;
} IndexCtx; int batch_size;
} IndexCtx_t;
struct { typedef struct {
char *es_url; char *es_url;
int index_count; int index_count;
struct index_t indices[16]; char *auth_user;
} WebCtx; char *auth_pass;
#endif int auth_enabled;
struct index_t indices[64];
} WebCtx_t;
extern ScanCtx_t ScanCtx;
extern WebCtx_t WebCtx;
extern IndexCtx_t IndexCtx;
extern LogCtx_t LogCtx;
#endif #endif

View File

@@ -1,16 +1,10 @@
#include "elastic.h" #include "elastic.h"
#include "src/ctx.h" #include "src/ctx.h"
#include <stdlib.h>
#include "web.h" #include "web.h"
#include <stdio.h>
#include <string.h>
#include <cJSON/cJSON.h>
#include <src/ctx.h>
#include "static_generated.c" #include "static_generated.c"
#define BULK_INDEX_SIZE 100
typedef struct es_indexer { typedef struct es_indexer {
int queued; int queued;
@@ -22,6 +16,8 @@ typedef struct es_indexer {
static es_indexer_t *Indexer; static es_indexer_t *Indexer;
void delete_queue(int max);
void print_json(cJSON *document, const char uuid_str[UUID_STR_LEN]) { void print_json(cJSON *document, const char uuid_str[UUID_STR_LEN]) {
cJSON *line = cJSON_CreateObject(); cJSON *line = cJSON_CreateObject();
@@ -29,13 +25,14 @@ void print_json(cJSON *document, const char uuid_str[UUID_STR_LEN]) {
cJSON_AddStringToObject(line, "_id", uuid_str); cJSON_AddStringToObject(line, "_id", uuid_str);
cJSON_AddStringToObject(line, "_index", "sist2"); cJSON_AddStringToObject(line, "_index", "sist2");
cJSON_AddStringToObject(line, "_type", "_doc"); cJSON_AddStringToObject(line, "_type", "_doc");
cJSON_AddItemToObject(line, "_source", document); cJSON_AddItemReferenceToObject(line, "_source", document);
char *json = cJSON_PrintUnformatted(line); char *json = cJSON_PrintUnformatted(line);
printf("%s\n", json); printf("%s\n", json);
cJSON_free(line); cJSON_free(json);
cJSON_Delete(line);
} }
void index_json(cJSON *document, const char uuid_str[UUID_STR_LEN]) { void index_json(cJSON *document, const char uuid_str[UUID_STR_LEN]) {
@@ -54,24 +51,56 @@ void index_json(cJSON *document, const char uuid_str[UUID_STR_LEN]) {
elastic_index_line(bulk_line); elastic_index_line(bulk_line);
} }
void elastic_flush() { void execute_update_script(const char *script, const char index_id[UUID_STR_LEN]) {
if (Indexer == NULL) { if (Indexer == NULL) {
Indexer = create_indexer(IndexCtx.es_url); Indexer = create_indexer(IndexCtx.es_url);
} }
es_bulk_line_t *line = Indexer->line_head; cJSON *body = cJSON_CreateObject();
cJSON *script_obj = cJSON_AddObjectToObject(body, "script");
cJSON_AddStringToObject(script_obj, "lang", "painless");
cJSON_AddStringToObject(script_obj, "source", script);
int count = 0; cJSON *query = cJSON_AddObjectToObject(body, "query");
cJSON *term_obj = cJSON_AddObjectToObject(query, "term");
cJSON_AddStringToObject(term_obj, "index", index_id);
char *str = cJSON_Print(body);
char bulk_url[4096];
snprintf(bulk_url, 4096, "%s/sist2/_update_by_query?pretty", Indexer->es_url);
response_t *r = web_post(bulk_url, str);
LOG_INFOF("elastic.c", "Executed user script <%d>", r->status_code);
cJSON *resp = cJSON_Parse(r->body);
cJSON_free(str);
cJSON_Delete(body);
free_response(r);
cJSON *error = cJSON_GetObjectItem(resp, "error");
if (error != NULL) {
char *error_str = cJSON_Print(error);
LOG_ERRORF("elastic.c", "User script error: \n%s", error_str);
cJSON_free(error_str);
}
cJSON_Delete(resp);
}
void *create_bulk_buffer(int max, int *count, size_t *buf_len) {
es_bulk_line_t *line = Indexer->line_head;
*count = 0;
size_t buf_size = 0; size_t buf_size = 0;
size_t buf_cur = 0; size_t buf_cur = 0;
char *buf = malloc(1); char *buf = malloc(1);
while (line != NULL) { while (line != NULL && *count < max) {
char action_str[512]; char action_str[512];
snprintf(action_str, 512, snprintf(action_str, 512,
"{\"index\":{\"_id\":\"%s\", \"_type\":\"_doc\", \"_index\":\"sist2\"}}\n", line->uuid_str); "{\"index\":{\"_id\":\"%s\", \"_type\":\"_doc\", \"_index\":\"sist2\"}}\n", line->uuid_str);
size_t action_str_len = strlen(action_str); size_t action_str_len = strlen(action_str);
size_t line_len = strlen(line->line); size_t line_len = strlen(line->line);
@@ -83,31 +112,113 @@ void elastic_flush() {
memcpy(buf + buf_cur, line->line, line_len); memcpy(buf + buf_cur, line->line, line_len);
buf_cur += line_len; buf_cur += line_len;
es_bulk_line_t *tmp = line;
line = line->next; line = line->next;
free(tmp); (*count)++;
count++;
} }
buf = realloc(buf, buf_size + 1); buf = realloc(buf, buf_size + 1);
*(buf+buf_cur) = '\0'; *(buf + buf_cur) = '\0';
Indexer->line_head = NULL; *buf_len = buf_cur;
Indexer->line_tail = NULL; return buf;
Indexer->queued = 0; }
char bulk_url[4096]; void print_errors(response_t *r) {
snprintf(bulk_url, 4096, "%s/sist2/_bulk", Indexer->es_url); char * tmp = malloc(r->size + 1);
response_t *r = web_post(bulk_url, buf, "Content-Type: application/x-ndjson"); memcpy(tmp, r->body, r->size);
printf("Indexed %3d documents (%zukB) <%d>\n", count, buf_cur / 1024, r->status_code); *(tmp + r->size) = '\0';
cJSON *ret_json = cJSON_Parse(r->body); cJSON *ret_json = cJSON_Parse(tmp);
if (cJSON_GetObjectItem(ret_json, "errors")->valueint != 0) { if (cJSON_GetObjectItem(ret_json, "errors")->valueint != 0) {
fprintf(stderr, "%s\n", r->body); cJSON *err;
cJSON_ArrayForEach(err, cJSON_GetObjectItem(ret_json, "items")) {
if (cJSON_GetObjectItem(cJSON_GetObjectItem(err, "index"), "status")->valueint != 201) {
char *str = cJSON_Print(err);
LOG_ERRORF("elastic.c", "%s\n", str);
cJSON_free(str);
}
}
}
cJSON_Delete(ret_json);
free(tmp);
}
void _elastic_flush(int max) {
if (max == 0) {
LOG_WARNING("elastic.c", "calling _elastic_flush with 0 in queue")
return;
} }
cJSON_Delete(ret_json); size_t buf_len;
int count;
void *buf = create_bulk_buffer(max, &count, &buf_len);
char bulk_url[4096];
snprintf(bulk_url, 4096, "%s/sist2/_bulk?pipeline=tie", Indexer->es_url);
response_t *r = web_post(bulk_url, buf);
if (r->status_code == 0) {
LOG_FATALF("elastic.c", "Could not connect to %s, make sure that elasticsearch is running!\n", IndexCtx.es_url)
}
if (r->status_code == 413) {
if (max <= 1) {
LOG_ERRORF("elastic.c", "Single document too large, giving up: {%s}", Indexer->line_head->uuid_str)
free_response(r);
free(buf);
delete_queue(1);
if (Indexer->queued != 0) {
elastic_flush();
}
return;
}
LOG_WARNINGF("elastic.c", "Payload too large, retrying (%d documents)", count);
free_response(r);
free(buf);
_elastic_flush(max / 2);
return;
} else if (r->status_code != 200) {
print_errors(r);
delete_queue(Indexer->queued);
} else {
print_errors(r);
LOG_INFOF("elastic.c", "Indexed %d documents (%zukB) <%d>", count, buf_len / 1024, r->status_code);
delete_queue(max);
if (Indexer->queued != 0) {
elastic_flush();
}
}
free_response(r); free_response(r);
free(buf);
}
void delete_queue(int max) {
for (int i = 0; i < max; i++) {
es_bulk_line_t *tmp = Indexer->line_head;
Indexer->line_head = tmp->next;
if (Indexer->line_head == NULL) {
Indexer->line_tail = NULL;
}
free(tmp);
Indexer->queued -= 1;
}
}
void elastic_flush() {
if (Indexer == NULL) {
Indexer = create_indexer(IndexCtx.es_url);
}
_elastic_flush(Indexer->queued);
} }
void elastic_index_line(es_bulk_line_t *line) { void elastic_index_line(es_bulk_line_t *line) {
@@ -126,15 +237,14 @@ void elastic_index_line(es_bulk_line_t *line) {
Indexer->queued += 1; Indexer->queued += 1;
if (Indexer->queued >= BULK_INDEX_SIZE) { if (Indexer->queued >= IndexCtx.batch_size) {
elastic_flush(); elastic_flush();
} }
} }
es_indexer_t *create_indexer(const char *url) { es_indexer_t *create_indexer(const char *url) {
size_t url_len = strlen(url); char *es_url = malloc(strlen(url) + 1);
char *es_url = malloc(url_len);
strcpy(es_url, url); strcpy(es_url, url);
es_indexer_t *indexer = malloc(sizeof(es_indexer_t)); es_indexer_t *indexer = malloc(sizeof(es_indexer_t));
@@ -147,18 +257,28 @@ es_indexer_t *create_indexer(const char *url) {
return indexer; return indexer;
} }
void destroy_indexer() { void destroy_indexer(char *script, char index_id[UUID_STR_LEN]) {
char url[4096]; char url[4096];
snprintf(url, sizeof(url), "%s/sist2/_refresh", IndexCtx.es_url); snprintf(url, sizeof(url), "%s/sist2/_refresh", IndexCtx.es_url);
response_t *r = web_post(url, "", NULL); response_t *r = web_post(url, "");
printf("Refresh index <%d>\n", r->status_code); LOG_INFOF("elastic.c", "Refresh index <%d>", r->status_code);
free_response(r);
if (script != NULL) {
execute_update_script(script, index_id);
free(script);
}
snprintf(url, sizeof(url), "%s/sist2/_refresh", IndexCtx.es_url);
r = web_post(url, "");
LOG_INFOF("elastic.c", "Refresh index <%d>", r->status_code);
free_response(r); free_response(r);
snprintf(url, sizeof(url), "%s/sist2/_forcemerge", IndexCtx.es_url); snprintf(url, sizeof(url), "%s/sist2/_forcemerge", IndexCtx.es_url);
r = web_post(url, "", NULL); r = web_post(url, "");
printf("Merge index <%d>\n", r->status_code); LOG_INFOF("elastic.c", "Merge index <%d>", r->status_code);
free_response(r); free_response(r);
if (Indexer != NULL) { if (Indexer != NULL) {
@@ -178,32 +298,37 @@ void elastic_init(int force_reset) {
if (!index_exists || force_reset) { if (!index_exists || force_reset) {
r = web_delete(url); r = web_delete(url);
printf("Delete index <%d>\n", r->status_code); LOG_INFOF("elastic.c", "Delete index <%d>", r->status_code);
free_response(r); free_response(r);
snprintf(url, 4096, "%s/sist2", IndexCtx.es_url); snprintf(url, 4096, "%s/sist2", IndexCtx.es_url);
r = web_put(url, "", NULL); r = web_put(url, "");
printf("Create index <%d>\n", r->status_code); LOG_INFOF("elastic.c", "Create index <%d>", r->status_code);
free_response(r); free_response(r);
snprintf(url, 4096, "%s/sist2/_close", IndexCtx.es_url); snprintf(url, 4096, "%s/sist2/_close", IndexCtx.es_url);
r = web_post(url, "", NULL); r = web_post(url, "");
printf("Close index <%d>\n", r->status_code); LOG_INFOF("elastic.c", "Close index <%d>", r->status_code);
free_response(r);
snprintf(url, 4096, "%s/_ingest/pipeline/tie", IndexCtx.es_url);
r = web_put(url, pipeline_json);
LOG_INFOF("elastic.c", "Create pipeline <%d>", r->status_code);
free_response(r); free_response(r);
snprintf(url, 4096, "%s/sist2/_settings", IndexCtx.es_url); snprintf(url, 4096, "%s/sist2/_settings", IndexCtx.es_url);
r = web_put(url, settings_json, "Content-Type: application/json"); r = web_put(url, settings_json);
printf("Update settings <%d>\n", r->status_code); LOG_INFOF("elastic.c", "Update settings <%d>", r->status_code);
free_response(r); free_response(r);
snprintf(url, 4096, "%s/sist2/_mappings/_doc?include_type_name=true", IndexCtx.es_url); snprintf(url, 4096, "%s/sist2/_mappings/_doc?include_type_name=true", IndexCtx.es_url);
r = web_put(url, mappings_json, "Content-Type: application/json"); r = web_put(url, mappings_json);
printf("Update mappings <%d>\n", r->status_code); LOG_INFOF("elastic.c", "Update mappings <%d>", r->status_code);
free_response(r); free_response(r);
snprintf(url, 4096, "%s/sist2/_open", IndexCtx.es_url); snprintf(url, 4096, "%s/sist2/_open", IndexCtx.es_url);
r = web_post(url, "", NULL); r = web_post(url, "");
printf("Open index <%d>\n", r->status_code); LOG_INFOF("elastic.c", "Open index <%d>", r->status_code);
free_response(r); free_response(r);
} }
} }
@@ -220,3 +345,28 @@ cJSON *elastic_get_document(const char *uuid_str) {
free_response(r); free_response(r);
return json; return json;
} }
char *elastic_get_status() {
char url[4096];
snprintf(url, 4096,
"%s/_cluster/state/metadata/sist2?filter_path=metadata.indices.*.state", WebCtx.es_url);
response_t *r = web_get(url);
cJSON *json = NULL;
char *status = malloc(128 * sizeof(char));
status[0] = '\0';
if (r->status_code == 200) {
json = cJSON_Parse(r->body);
const cJSON *metadata = cJSON_GetObjectItem(json, "metadata");
if (metadata != NULL) {
const cJSON *indices = cJSON_GetObjectItem(metadata, "indices");
const cJSON *sist2 = cJSON_GetObjectItem(indices, "sist2");
const cJSON *state = cJSON_GetObjectItem(sist2, "state");
strcpy(status, state->valuestring);
}
}
free_response(r);
cJSON_Delete(json);
return status;
}

View File

@@ -24,10 +24,14 @@ void index_json(cJSON *document, const char uuid_str[UUID_STR_LEN]);
es_indexer_t *create_indexer(const char* es_url); es_indexer_t *create_indexer(const char* es_url);
void destroy_indexer(); void destroy_indexer(char *script, char index_id[UUID_STR_LEN]);
void elastic_init(int force_reset); void elastic_init(int force_reset);
cJSON *elastic_get_document(const char *uuid_str); cJSON *elastic_get_document(const char *uuid_str);
char *elastic_get_status();
void execute_update_script(const char *script, const char index_id[UUID_STR_LEN]);
#endif #endif

File diff suppressed because one or more lines are too long

View File

@@ -1,131 +1,154 @@
#include "web.h" #include "web.h"
#include "src/sist.h"
#include "src/ctx.h"
size_t write_cb(char *ptr, size_t size, size_t nmemb, void *user_data) { #include <mongoose.h>
#include <pthread.h>
size_t real_size = size * nmemb;
dyn_buffer_t *buf = user_data;
dyn_buffer_write(buf, ptr, real_size);
return real_size;
}
void free_response(response_t *resp) { void free_response(response_t *resp) {
free(resp->body); if (resp->body != NULL) {
free(resp->body);
}
free(resp); free(resp);
} }
#define SIST2_HEADERS "User-Agent: sist2\r\nContent-Type: application/json\r\n"
void http_req_ev(struct mg_connection *nc, int ev, void *ptr) {
http_ev_data_t *ev_data = (http_ev_data_t *) nc->user_data;
switch (ev) {
case MG_EV_CONNECT: {
int connect_status = *(int *) ptr;
if (connect_status != 0) {
ev_data->done = TRUE;
ev_data->resp->status_code = 0;
}
break;
}
case MG_EV_HTTP_REPLY: {
struct http_message *hm = (struct http_message *) ptr;
//TODO: Check errors?
ev_data->resp->size = hm->body.len;
ev_data->resp->status_code = hm->resp_code;
ev_data->resp->body = malloc(hm->body.len + 1);
memcpy(ev_data->resp->body, hm->body.p, hm->body.len);
*(ev_data->resp->body + hm->body.len) = '\0';
ev_data->done = TRUE;
break;
}
case MG_EV_CLOSE: {
ev_data->done = TRUE;
break;
}
default:
break;
}
}
subreq_ctx_t *http_req(const char *url, const char *extra_headers, const char *post_data, const char *method) {
struct mg_str scheme;
struct mg_str user_info;
struct mg_str host;
unsigned int port;
struct mg_str path;
struct mg_str query;
struct mg_str fragment;
if (post_data == NULL) post_data = "";
if (extra_headers == NULL) extra_headers = "";
if (path.len == 0) path = mg_mk_str("/");
if (host.len == 0) host = mg_mk_str("");
// [scheme://[user_info@]]host[:port][/path][?query][#fragment]
mg_parse_uri(mg_mk_str(url), &scheme, &user_info, &host, &port, &path, &query, &fragment);
if (query.len > 0) path.len += query.len + 1;
subreq_ctx_t *ctx = malloc(sizeof(subreq_ctx_t));
mg_mgr_init(&ctx->mgr, NULL);
char address[8196];
snprintf(address, sizeof(address), "tcp://%.*s:%u", (int) host.len, host.p, port);
struct mg_connection *nc = mg_connect(&ctx->mgr, address, http_req_ev);
nc->user_data = &ctx->ev_data;
mg_set_protocol_http_websocket(nc);
ctx->ev_data.resp = calloc(1, sizeof(response_t));
ctx->ev_data.done = FALSE;
mg_printf(
nc, "%s %.*s HTTP/1.1\r\n"
"Host: %.*s\r\n"
"Content-Length: %zu\r\n"
"%s\r\n"
"%s",
method, (int) path.len, path.p,
(int) (path.p - host.p), host.p,
strlen(post_data),
extra_headers,
post_data
);
return ctx;
}
response_t *web_get(const char *url) { response_t *web_get(const char *url) {
response_t *resp = malloc(sizeof(response_t)); subreq_ctx_t *ctx = http_req(url, SIST2_HEADERS, NULL, "GET");
while (ctx->ev_data.done == FALSE) {
mg_mgr_poll(&ctx->mgr, 50);
}
mg_mgr_free(&ctx->mgr);
CURL *curl; response_t *ret = ctx->ev_data.resp;
dyn_buffer_t buffer = dyn_buffer_create(); free(ctx);
return ret;
curl = curl_easy_init();
curl_easy_setopt(curl, CURLOPT_URL, url);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *) (&buffer));
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb);
curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
curl_easy_perform(curl);
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &resp->status_code);
curl_easy_cleanup(curl);
resp->body = buffer.buf;
resp->size = buffer.cur;
return resp;
} }
response_t *web_post(const char *url, const char *data, const char *header) { subreq_ctx_t *web_post_async(const char *url, const char *data) {
return http_req(url, SIST2_HEADERS, data, "POST");
response_t *resp = malloc(sizeof(response_t));
CURL *curl;
dyn_buffer_t buffer = dyn_buffer_create();
curl = curl_easy_init();
curl_easy_setopt(curl, CURLOPT_URL, url);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *) (&buffer));
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb);
curl_easy_setopt(curl, CURLOPT_POST, 1);
curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
struct curl_slist *headers = NULL;
if (header != NULL) {
headers = curl_slist_append(headers, header);
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
}
curl_easy_setopt(curl, CURLOPT_POSTFIELDS, data);
curl_easy_perform(curl);
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &resp->status_code);
curl_easy_cleanup(curl);
curl_slist_free_all(headers);
resp->body = buffer.buf;
resp->size = buffer.cur;
return resp;
} }
response_t *web_post(const char *url, const char *data) {
subreq_ctx_t *ctx = http_req(url, SIST2_HEADERS, data, "POST");
response_t *web_put(const char *url, const char *data, const char *header) { while (ctx->ev_data.done == FALSE) {
mg_mgr_poll(&ctx->mgr, 50);
response_t *resp = malloc(sizeof(response_t));
CURL *curl;
dyn_buffer_t buffer = dyn_buffer_create();
curl = curl_easy_init();
curl_easy_setopt(curl, CURLOPT_URL, url);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *) (&buffer));
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb);
curl_easy_setopt(curl, CURLOPT_CUSTOMREQUEST, "PUT");
curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
curl_easy_setopt(curl, CURLOPT_DNS_USE_GLOBAL_CACHE, 0);
curl_easy_setopt(curl, CURLOPT_IPRESOLVE, CURLOPT_DNS_LOCAL_IP4 );
if (header != NULL) {
struct curl_slist *headers = NULL;
headers = curl_slist_append(headers, header);
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
} }
mg_mgr_free(&ctx->mgr);
curl_easy_setopt(curl, CURLOPT_POSTFIELDS, data); response_t *ret = ctx->ev_data.resp;
free(ctx);
return ret;
}
curl_easy_perform(curl); response_t *web_put(const char *url, const char *data) {
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &resp->status_code); subreq_ctx_t *ctx = http_req(url, SIST2_HEADERS, data, "PUT");
while (ctx->ev_data.done == FALSE) {
mg_mgr_poll(&ctx->mgr, 50);
}
mg_mgr_free(&ctx->mgr);
curl_easy_cleanup(curl); response_t *ret = ctx->ev_data.resp;
free(ctx);
resp->body = buffer.buf; return ret;
resp->size = buffer.cur;
return resp;
} }
response_t *web_delete(const char *url) { response_t *web_delete(const char *url) {
subreq_ctx_t *ctx = http_req(url, SIST2_HEADERS, NULL, "DELETE");
while (ctx->ev_data.done == FALSE) {
mg_mgr_poll(&ctx->mgr, 50);
}
mg_mgr_free(&ctx->mgr);
response_t *resp = malloc(sizeof(response_t)); response_t *ret = ctx->ev_data.resp;
free(ctx);
CURL *curl; return ret;
dyn_buffer_t buffer = dyn_buffer_create();
curl = curl_easy_init();
curl_easy_setopt(curl, CURLOPT_URL, url);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *) (&buffer));
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb);
curl_easy_setopt(curl, CURLOPT_CUSTOMREQUEST, "DELETE");
curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
curl_easy_setopt(curl, CURLOPT_POSTFIELDS, "");
curl_easy_perform(curl);
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &resp->status_code);
curl_easy_cleanup(curl);
resp->body = buffer.buf;
resp->size = buffer.cur;
return resp;
} }

View File

@@ -2,6 +2,7 @@
#define SIST2_WEB_H #define SIST2_WEB_H
#include "src/sist.h" #include "src/sist.h"
#include <mongoose.h>
typedef struct response { typedef struct response {
char *body; char *body;
@@ -9,9 +10,20 @@ typedef struct response {
int status_code; int status_code;
} response_t; } response_t;
typedef struct {
response_t *resp;
int done;
} http_ev_data_t;
typedef struct {
http_ev_data_t ev_data;
struct mg_mgr mgr;
} subreq_ctx_t;
response_t *web_get(const char *url); response_t *web_get(const char *url);
response_t *web_post(const char * url, const char * data, const char* header); response_t *web_post(const char * url, const char * data);
response_t *web_put(const char *url, const char *data, const char *header); subreq_ctx_t *web_post_async(const char *url, const char *data);
response_t *web_put(const char *url, const char *data);
response_t *web_delete(const char *url); response_t *web_delete(const char *url);
void free_response(response_t *resp); void free_response(response_t *resp);

View File

@@ -1,7 +1,9 @@
#include "src/ctx.h" #include "src/ctx.h"
#include "serialize.h" #include "serialize.h"
#include "src/parsing/parse.h"
#include "src/parsing/mime.h"
static __thread int IndexFd = -1; static __thread int index_fd = -1;
typedef struct { typedef struct {
unsigned char uuid[16]; unsigned char uuid[16];
@@ -34,15 +36,19 @@ void write_index_descriptor(char *path, index_descriptor_t *desc) {
cJSON_AddStringToObject(json, "version", desc->version); cJSON_AddStringToObject(json, "version", desc->version);
cJSON_AddStringToObject(json, "root", desc->root); cJSON_AddStringToObject(json, "root", desc->root);
cJSON_AddStringToObject(json, "name", desc->name); cJSON_AddStringToObject(json, "name", desc->name);
cJSON_AddStringToObject(json, "type", desc->type);
cJSON_AddStringToObject(json, "rewrite_url", desc->rewrite_url); cJSON_AddStringToObject(json, "rewrite_url", desc->rewrite_url);
cJSON_AddNumberToObject(json, "timestamp", (double) desc->timestamp); cJSON_AddNumberToObject(json, "timestamp", (double) desc->timestamp);
int fd = open(path, O_CREAT | O_WRONLY, S_IRUSR | S_IWUSR); int fd = open(path, O_CREAT | O_WRONLY, S_IRUSR | S_IWUSR);
if (fd == -1) { if (fd < 0) {
perror(path); LOG_FATALF("serialize.c", "Could not open index descriptor: %s", strerror(errno));
} }
char *str = cJSON_Print(json); char *str = cJSON_Print(json);
write(fd, str, strlen(str)); int ret = write(fd, str, strlen(str));
if (ret == -1) {
LOG_FATALF("serialize.c", "Could not write index descriptor: %s", strerror(errno));
}
free(str); free(str);
close(fd); close(fd);
@@ -54,8 +60,16 @@ index_descriptor_t read_index_descriptor(char *path) {
struct stat info; struct stat info;
stat(path, &info); stat(path, &info);
int fd = open(path, O_RDONLY); int fd = open(path, O_RDONLY);
if (fd == -1) {
LOG_FATALF("serialize.c", "Invalid/corrupt index (Could not find descriptor): %s: %s\n", path ,strerror(errno))
}
char *buf = malloc(info.st_size + 1); char *buf = malloc(info.st_size + 1);
read(fd, buf, info.st_size); int ret = read(fd, buf, info.st_size);
if (ret == -1) {
LOG_FATALF("serialize.c", "Could not read index descriptor: %s", strerror(errno));
}
*(buf + info.st_size) = '\0'; *(buf + info.st_size) = '\0';
close(fd); close(fd);
@@ -66,9 +80,14 @@ index_descriptor_t read_index_descriptor(char *path) {
strcpy(descriptor.root, cJSON_GetObjectItem(json, "root")->valuestring); strcpy(descriptor.root, cJSON_GetObjectItem(json, "root")->valuestring);
strcpy(descriptor.name, cJSON_GetObjectItem(json, "name")->valuestring); strcpy(descriptor.name, cJSON_GetObjectItem(json, "name")->valuestring);
strcpy(descriptor.rewrite_url, cJSON_GetObjectItem(json, "rewrite_url")->valuestring); strcpy(descriptor.rewrite_url, cJSON_GetObjectItem(json, "rewrite_url")->valuestring);
descriptor.root_len = (short)strlen(descriptor.root); descriptor.root_len = (short) strlen(descriptor.root);
strcpy(descriptor.version, cJSON_GetObjectItem(json, "version")->valuestring); strcpy(descriptor.version, cJSON_GetObjectItem(json, "version")->valuestring);
strcpy(descriptor.uuid, cJSON_GetObjectItem(json, "uuid")->valuestring); strcpy(descriptor.uuid, cJSON_GetObjectItem(json, "uuid")->valuestring);
if (cJSON_GetObjectItem(json, "type") == NULL) {
strcpy(descriptor.type, INDEX_TYPE_BIN);
} else {
strcpy(descriptor.type, cJSON_GetObjectItem(json, "type")->valuestring);
}
cJSON_Delete(json); cJSON_Delete(json);
free(buf); free(buf);
@@ -105,6 +124,32 @@ char *get_meta_key_text(enum metakey meta_key) {
return "title"; return "title";
case MetaFontName: case MetaFontName:
return "font_name"; return "font_name";
case MetaParent:
return "parent";
case MetaExifMake:
return "exif_make";
case MetaExifSoftware:
return "exif_software";
case MetaExifExposureTime:
return "exif_exposure_time";
case MetaExifFNumber:
return "exif_fnumber";
case MetaExifFocalLength:
return "exif_focal_length";
case MetaExifUserComment:
return "exif_user_comment";
case MetaExifIsoSpeedRatings:
return "exif_iso_speed_ratings";
case MetaExifModel:
return "exif_model";
case MetaExifDateTime:
return "exif_datetime";
case MetaAuthor:
return "author";
case MetaModifiedBy:
return "modified_by";
case MetaThumbnail:
return "thumbnail";
default: default:
return NULL; return NULL;
} }
@@ -113,13 +158,13 @@ char *get_meta_key_text(enum metakey meta_key) {
void write_document(document_t *doc) { void write_document(document_t *doc) {
if (IndexFd == -1) { if (index_fd == -1) {
char dstfile[PATH_MAX]; char dstfile[PATH_MAX];
pthread_t self = pthread_self(); pthread_t self = pthread_self();
snprintf(dstfile, PATH_MAX, "%s_index_%lu", ScanCtx.index.path, self); snprintf(dstfile, PATH_MAX, "%s_index_%lu", ScanCtx.index.path, self);
IndexFd = open(dstfile, O_CREAT | O_WRONLY | O_APPEND, S_IRUSR | S_IWUSR); index_fd = open(dstfile, O_CREAT | O_WRONLY | O_APPEND, S_IRUSR | S_IWUSR);
if (IndexFd == -1) { if (index_fd == -1) {
perror("open"); perror("open");
} }
} }
@@ -139,11 +184,11 @@ void write_document(document_t *doc) {
dyn_buffer_write_char(&buf, meta->key); dyn_buffer_write_char(&buf, meta->key);
if (IS_META_INT(meta->key)) { if (IS_META_INT(meta->key)) {
dyn_buffer_write_int(&buf, meta->intval); dyn_buffer_write_int(&buf, meta->int_val);
} else if (IS_META_LONG(meta->key)) { } else if (IS_META_LONG(meta->key)) {
dyn_buffer_write_long(&buf, meta->longval); dyn_buffer_write_long(&buf, meta->long_val);
} else { } else {
dyn_buffer_write_str(&buf, meta->strval); dyn_buffer_write_str(&buf, meta->str_val);
} }
meta_line_t *tmp = meta; meta_line_t *tmp = meta;
@@ -152,24 +197,29 @@ void write_document(document_t *doc) {
} }
dyn_buffer_write_char(&buf, '\n'); dyn_buffer_write_char(&buf, '\n');
write(IndexFd, buf.buf, buf.cur); int res = write(index_fd, buf.buf, buf.cur);
if (res == -1) {
LOG_FATALF("serialize.c", "Could not write document: %s", strerror(errno))
}
ScanCtx.stat_index_size += buf.cur; ScanCtx.stat_index_size += buf.cur;
dyn_buffer_destroy(&buf); dyn_buffer_destroy(&buf);
} }
void serializer_cleanup() { void thread_cleanup() {
close(IndexFd); close(index_fd);
cleanup_parse();
cleanup_font();
} }
void read_index(const char *path, const char index_id[UUID_STR_LEN], index_func func) {
void read_index_bin(const char *path, const char *index_id, index_func func) {
line_t line; line_t line;
dyn_buffer_t buf = dyn_buffer_create(); dyn_buffer_t buf = dyn_buffer_create();
FILE *file = fopen(path, "rb"); FILE *file = fopen(path, "rb");
while (1) { while (1) {
buf.cur = 0; buf.cur = 0;
fread((void *) &line, 1, sizeof(line_t), file); size_t _ = fread((void *) &line, 1, sizeof(line_t), file);
if (feof(file)) { if (feof(file)) {
break; break;
} }
@@ -180,8 +230,13 @@ void read_index(const char *path, const char index_id[UUID_STR_LEN], index_func
char uuid_str[UUID_STR_LEN]; char uuid_str[UUID_STR_LEN];
uuid_unparse(line.uuid, uuid_str); uuid_unparse(line.uuid, uuid_str);
cJSON_AddStringToObject(document, "mime", mime_get_mime_text(line.mime)); const char* mime_text = mime_get_mime_text(line.mime);
cJSON_AddNumberToObject(document, "size", (double)line.size); if (mime_text == NULL) {
cJSON_AddNullToObject(document, "mime");
} else {
cJSON_AddStringToObject(document, "mime", mime_get_mime_text(line.mime));
}
cJSON_AddNumberToObject(document, "size", (double) line.size);
cJSON_AddNumberToObject(document, "mtime", line.mtime); cJSON_AddNumberToObject(document, "mtime", line.mtime);
int c; int c;
@@ -196,43 +251,64 @@ void read_index(const char *path, const char index_id[UUID_STR_LEN], index_func
} else { } else {
*(buf.buf + line.ext) = '\0'; *(buf.buf + line.ext) = '\0';
} }
cJSON_AddStringToObject(document, "name", buf.buf + line.base);
*(buf.buf + line.base - 1) = '\0'; char tmp[PATH_MAX * 3];
cJSON_AddStringToObject(document, "path", buf.buf);
str_escape(tmp, buf.buf + line.base);
cJSON_AddStringToObject(document, "name", tmp);
if (line.base > 0) {
*(buf.buf + line.base - 1) = '\0';
str_escape(tmp, buf.buf);
cJSON_AddStringToObject(document, "path", tmp);
} else {
cJSON_AddStringToObject(document, "path", "");
}
enum metakey key = getc(file); enum metakey key = getc(file);
size_t ret = 0;
while (key != '\n') { while (key != '\n') {
switch (key) { switch (key) {
case MetaWidth: case MetaWidth:
case MetaHeight: case MetaHeight: {
case MetaMediaDuration:
case MetaMediaBitrate: {
int value; int value;
fread(&value, sizeof(int), 1, file); ret = fread(&value, sizeof(int), 1, file);
cJSON_AddNumberToObject(document, get_meta_key_text(key), value); cJSON_AddNumberToObject(document, get_meta_key_text(key), value);
break; break;
} }
case MetaMediaAudioCodec: case MetaMediaDuration:
case MetaMediaVideoCodec: { case MetaMediaBitrate: {
int value; long value;
fread(&value, sizeof(int), 1, file); ret = fread(&value, sizeof(long), 1, file);
const AVCodecDescriptor *desc = avcodec_descriptor_get(value); cJSON_AddNumberToObject(document, get_meta_key_text(key), (double) value);
if (desc != NULL) {
cJSON_AddStringToObject(document, get_meta_key_text(key), desc->name);
}
break; break;
} }
case MetaMediaAudioCodec:
case MetaMediaVideoCodec:
case MetaContent: case MetaContent:
case MetaArtist: case MetaArtist:
case MetaAlbum: case MetaAlbum:
case MetaAlbumArtist: case MetaAlbumArtist:
case MetaGenre: case MetaGenre:
case MetaFontName: case MetaFontName:
case MetaParent:
case MetaExifMake:
case MetaExifSoftware:
case MetaExifExposureTime:
case MetaExifFNumber:
case MetaExifFocalLength:
case MetaExifUserComment:
case MetaExifIsoSpeedRatings:
case MetaExifDateTime:
case MetaExifModel:
case MetaAuthor:
case MetaModifiedBy:
case MetaThumbnail:
case MetaTitle: { case MetaTitle: {
buf.cur = 0; buf.cur = 0;
while ((c = getc(file)) != 0) { while ((c = getc(file)) != 0) {
if (!(SHOULD_IGNORE_CHAR(c)) || c == ' ') { if (SHOULD_KEEP_CHAR(c) || c == ' ') {
dyn_buffer_write_char(&buf, (char) c); dyn_buffer_write_char(&buf, (char) c);
} }
} }
@@ -240,24 +316,110 @@ void read_index(const char *path, const char index_id[UUID_STR_LEN], index_func
cJSON_AddStringToObject(document, get_meta_key_text(key), buf.buf); cJSON_AddStringToObject(document, get_meta_key_text(key), buf.buf);
break; break;
} }
default:
LOG_FATALF("serialize.c", "Invalid meta key (corrupt index): %x", key)
} }
key = getc(file); key = getc(file);
} }
func(document, uuid_str); func(document, uuid_str);
cJSON_free(document); cJSON_Delete(document);
}
dyn_buffer_destroy(&buf);
fclose(file);
}
const char *json_type_copy_fields[] = {
"mime", "name", "path", "extension", "index", "size", "mtime", "parent",
// Meta
"title", "content", "width", "height", "duration", "audioc", "videoc",
"bitrate", "artist", "album", "album_artist", "genre", "title", "font_name",
// Special
"tag", "_url"
};
const char *json_type_array_fields[] = {
"_keyword", "_text"
};
void read_index_json(const char *path, UNUSED(const char *index_id), index_func func) {
FILE *file = fopen(path, "r");
while (1) {
char *line = NULL;
size_t len;
size_t read = getline(&line, &len, file);
if (read < 0) {
if (line) {
free(line);
}
break;
}
cJSON *input = cJSON_Parse(line);
if (input == NULL) {
LOG_FATALF("serialize.c", "Could not parse JSON line: \n%s", line)
}
if (line) {
free(line);
}
cJSON *document = cJSON_CreateObject();
const char *uuid_str = cJSON_GetObjectItem(input, "_id")->valuestring;
for (int i = 0; i < (sizeof(json_type_copy_fields) / sizeof(json_type_copy_fields[0])); i++) {
cJSON *value = cJSON_GetObjectItem(input, json_type_copy_fields[i]);
if (value != NULL) {
cJSON_AddItemReferenceToObject(document, json_type_copy_fields[i], value);
}
}
for (int i = 0; i < (sizeof(json_type_array_fields) / sizeof(json_type_array_fields[0])); i++) {
cJSON *arr = cJSON_GetObjectItem(input, json_type_array_fields[i]);
if (arr != NULL) {
cJSON *obj;
cJSON_ArrayForEach(obj, arr) {
char key[1024];
cJSON *k = cJSON_GetObjectItem(obj, "k");
cJSON *v = cJSON_GetObjectItem(obj, "v");
if (k == NULL || v == NULL || !cJSON_IsString(k) || !cJSON_IsString(v)) {
char *str = cJSON_Print(obj);
LOG_FATALF("serialize.c", "Invalid %s member: must contain .k and .v string fields: \n%s",
json_type_array_fields[i], str)
}
snprintf(key, sizeof(key), "%s.%s", json_type_array_fields[i], k->valuestring);
cJSON_AddStringToObject(document, key, v->valuestring);
}
}
}
func(document, uuid_str);
cJSON_Delete(document);
cJSON_Delete(input);
} }
fclose(file); fclose(file);
} }
void read_index(const char *path, const char index_id[UUID_STR_LEN], const char *type, index_func func) {
if (strcmp(type, INDEX_TYPE_BIN) == 0) {
read_index_bin(path, index_id, func);
} else if (strcmp(type, INDEX_TYPE_JSON) == 0) {
read_index_json(path, index_id, func);
}
}
void incremental_read(GHashTable *table, const char *filepath) { void incremental_read(GHashTable *table, const char *filepath) {
FILE *file = fopen(filepath, "rb"); FILE *file = fopen(filepath, "rb");
line_t line; line_t line;
while (1) { while (1) {
fread((void *) &line, 1, sizeof(line_t), file); size_t ret = fread((void *) &line, 1, sizeof(line_t), file);
if (feof(file)) { if (ret != 1 || feof(file)) {
break; break;
} }
@@ -280,8 +442,8 @@ void incremental_copy(store_t *store, store_t *dst_store, const char *filepath,
line_t line; line_t line;
while (1) { while (1) {
fread((void *) &line, 1, sizeof(line_t), file); size_t ret = fread((void *) &line, 1, sizeof(line_t), file);
if (feof(file)) { if (ret != 1 || feof(file)) {
break; break;
} }
@@ -309,11 +471,11 @@ void incremental_copy(store_t *store, store_t *dst_store, const char *filepath,
if (IS_META_INT(key)) { if (IS_META_INT(key)) {
int val; int val;
fread(&val, sizeof(val), 1, file); ret = fread(&val, sizeof(val), 1, file);
fwrite(&val, sizeof(val), 1, dst_file); fwrite(&val, sizeof(val), 1, dst_file);
} else if (IS_META_LONG(key)) { } else if (IS_META_LONG(key)) {
long val; long val;
fread(&val, sizeof(val), 1, file); ret = fread(&val, sizeof(val), 1, file);
fwrite(&val, sizeof(val), 1, dst_file); fwrite(&val, sizeof(val), 1, dst_file);
} else { } else {
while ((c = (char) getc(file))) { while ((c = (char) getc(file))) {
@@ -321,6 +483,10 @@ void incremental_copy(store_t *store, store_t *dst_store, const char *filepath,
} }
fwrite("\0", sizeof(c), 1, dst_file); fwrite("\0", sizeof(c), 1, dst_file);
} }
if (ret != 1) {
break;
}
} }
} else { } else {
skip_meta(file); skip_meta(file);

View File

@@ -2,7 +2,10 @@
#define SIST2_SERIALIZE_H #define SIST2_SERIALIZE_H
#include "src/sist.h" #include "src/sist.h"
#include "store.h"
#include <sys/syscall.h> #include <sys/syscall.h>
#include <glib.h>
typedef void(*index_func)(cJSON *, const char[UUID_STR_LEN]); typedef void(*index_func)(cJSON *, const char[UUID_STR_LEN]);
@@ -11,14 +14,14 @@ void incremental_copy(store_t *store, store_t *dst_store, const char *filepath,
void write_document(document_t *doc); void write_document(document_t *doc);
void read_index(const char *path, const char[UUID_STR_LEN], index_func); void read_index(const char *path, const char[UUID_STR_LEN], const char *type, index_func);
void incremental_read(GHashTable *table, const char *filepath); void incremental_read(GHashTable *table, const char *filepath);
/** /**
* Must be called after write_document * Must be called after write_document
*/ */
void serializer_cleanup(); void thread_cleanup();
void write_index_descriptor(char *path, index_descriptor_t *desc); void write_index_descriptor(char *path, index_descriptor_t *desc);

View File

@@ -9,14 +9,13 @@ store_t *store_create(char *path) {
mdb_env_create(&store->env); mdb_env_create(&store->env);
int open_ret = mdb_env_open(store->env, int open_ret = mdb_env_open(store->env,
path, path,
MDB_WRITEMAP | MDB_MAPASYNC, MDB_WRITEMAP | MDB_MAPASYNC,
S_IRUSR | S_IWUSR S_IRUSR | S_IWUSR
); );
if (open_ret != 0) { if (open_ret != 0) {
fprintf(stderr, "Error while opening store: %s", mdb_strerror(open_ret)); LOG_FATALF("store.c", "Error while opening store: %s (%s)\n", mdb_strerror(open_ret), path)
exit(1);
} }
store->size = (size_t) 1024 * 1024 * 5; store->size = (size_t) 1024 * 1024 * 5;
@@ -42,6 +41,12 @@ void store_destroy(store_t *store) {
void store_write(store_t *store, char *key, size_t key_len, char *buf, size_t buf_len) { void store_write(store_t *store, char *key, size_t key_len, char *buf, size_t buf_len) {
if (LogCtx.very_verbose) {
char uuid_str[UUID_STR_LEN];
uuid_unparse((unsigned char *) key, uuid_str);
LOG_DEBUGF("store.c", "Store write {%s} %lu bytes", uuid_str, buf_len)
}
MDB_val mdb_key; MDB_val mdb_key;
mdb_key.mv_data = key; mdb_key.mv_data = key;
mdb_key.mv_size = key_len; mdb_key.mv_size = key_len;
@@ -68,13 +73,15 @@ void store_write(store_t *store, char *key, size_t key_len, char *buf, size_t bu
mdb_env_set_mapsize(store->env, store->size); mdb_env_set_mapsize(store->env, store->size);
mdb_txn_begin(store->env, NULL, 0, &txn); mdb_txn_begin(store->env, NULL, 0, &txn);
put_ret = mdb_put(txn, store->dbi, &mdb_key, &mdb_value, 0); put_ret = mdb_put(txn, store->dbi, &mdb_key, &mdb_value, 0);
LOG_INFOF("store.c", "Updated mdb mapsize to %lu bytes", store->size)
} }
mdb_txn_commit(txn); mdb_txn_commit(txn);
pthread_rwlock_unlock(&store->lock); pthread_rwlock_unlock(&store->lock);
if (put_ret != 0) { if (put_ret != 0) {
printf("%s\n", mdb_strerror(put_ret)); LOG_ERROR("store.c", mdb_strerror(put_ret))
} }
} }

View File

@@ -11,8 +11,6 @@ typedef struct store_t {
pthread_rwlock_t lock; pthread_rwlock_t lock;
} store_t; } store_t;
#include "src/sist.h"
store_t *store_create(char *path); store_t *store_create(char *path);
void store_destroy(store_t *store); void store_destroy(store_t *store);

View File

@@ -1,28 +1,50 @@
#include "walk.h" #include "walk.h"
#include "src/ctx.h" #include "src/ctx.h"
#include "src/parsing/parse.h"
parse_job_t *create_parse_job(const char *filepath, const struct stat *info, int base) { #include <ftw.h>
__always_inline
parse_job_t *create_fs_parse_job(const char *filepath, const struct stat *info, int base) {
int len = (int) strlen(filepath); int len = (int) strlen(filepath);
parse_job_t *job = malloc(sizeof(parse_job_t) + len); parse_job_t *job = malloc(sizeof(parse_job_t) + len);
memcpy(&(job->filepath), filepath, len + 1); strcpy(job->filepath, filepath);
job->base = base; job->base = base;
char *p = strrchr(filepath + base, '.'); char *p = strrchr(filepath + base, '.');
if (p != NULL) { if (p != NULL) {
job->ext = (int)(p - filepath + 1); job->ext = (int) (p - filepath + 1);
} else { } else {
job->ext = len; job->ext = len;
} }
memcpy(&(job->info), info, sizeof(struct stat)); job->vfile.info = *info;
memset(job->parent, 0, 16);
job->vfile.filepath = job->filepath;
job->vfile.read = fs_read;
job->vfile.reset = fs_reset;
job->vfile.close = fs_close;
job->vfile.fd = -1;
job->vfile.is_fs_file = TRUE;
return job; return job;
} }
int sub_strings[30];
#define EXCLUDED(str) (pcre_exec(ScanCtx.exclude, ScanCtx.exclude_extra, filepath, strlen(filepath), 0, 0, sub_strings, sizeof(sub_strings)) >= 0)
int handle_entry(const char *filepath, const struct stat *info, int typeflag, struct FTW *ftw) { int handle_entry(const char *filepath, const struct stat *info, int typeflag, struct FTW *ftw) {
if (typeflag == FTW_F && S_ISREG(info->st_mode)) {
parse_job_t *job = create_parse_job(filepath, info, ftw->base); if (typeflag == FTW_F && S_ISREG(info->st_mode) && ftw->level <= ScanCtx.depth) {
if (ScanCtx.exclude != NULL && EXCLUDED(filepath)) {
LOG_DEBUGF("walk.c", "Excluded: %s", filepath)
return 0;
}
parse_job_t *job = create_fs_parse_job(filepath, info, ftw->base);
tpool_add_work(ScanCtx.pool, parse, job); tpool_add_work(ScanCtx.pool, parse, job);
} }

View File

@@ -3,8 +3,6 @@
#define _XOPEN_SOURCE 500 #define _XOPEN_SOURCE 500
#include "src/sist.h"
int walk_directory_tree(const char *); int walk_directory_tree(const char *);
#endif #endif

112
src/log.c Normal file
View File

@@ -0,0 +1,112 @@
#include "log.h"
#include <pthread.h>
#include <stdarg.h>
const char *log_colors[] = {
"\033[34m", "\033[01;34m", "\033[0m",
"\033[01;33m", "\033[31m", "\033[01;31m"
};
const char *log_levels[] = {
"DEBUG", "INFO", "WARNING", "ERROR", "FATAL"
};
void vsist_logf(const char *filepath, int level, char *format, va_list ap) {
static int is_tty = -1;
if (is_tty == -1) {
is_tty = isatty(STDERR_FILENO);
}
char log_str[LOG_MAX_LENGTH];
unsigned long long pid = (unsigned long long) pthread_self();
char datetime[32];
time_t t;
struct tm result;
t = time(NULL);
localtime_r(&t, &result);
strftime(datetime, sizeof(datetime), "%Y-%m-%d %H:%M:%S", &result);
int log_len;
if (is_tty) {
log_len = snprintf(
log_str, sizeof(log_str),
"\033[%dm[%04llX]%s [%s] [%s %s] ",
31 + ((unsigned int) (pid)) % 7, pid, log_colors[level],
datetime, log_levels[level], filepath
);
} else {
log_len = snprintf(
log_str, sizeof(log_str),
"[%04llX] [%s] [%s %s] ",
pid, datetime, log_levels[level], filepath
);
}
size_t maxsize = sizeof(log_str) - log_len;
log_len += vsnprintf(log_str + log_len, maxsize, format, ap);
if (is_tty) {
log_len += sprintf(log_str + log_len, "\033[0m\n");
} else {
*(log_str + log_len) = '\n';
log_len += 1;
}
int ret = write(STDERR_FILENO, log_str, log_len);
if (ret == -1) {
LOG_FATALF("serialize.c", "Could not write index descriptor: %s", strerror(errno))
}
}
void sist_logf(const char *filepath, int level, char *format, ...) {
va_list ap;
va_start(ap, format);
vsist_logf(filepath, level, format, ap);
va_end(ap);
}
void sist_log(const char *filepath, int level, char *str) {
static int is_tty = -1;
if (is_tty == -1) {
is_tty = isatty(STDERR_FILENO);
}
char log_str[LOG_MAX_LENGTH];
unsigned long long pid = (unsigned long long) pthread_self();
char datetime[32];
time_t t;
struct tm result;
t = time(NULL);
localtime_r(&t, &result);
strftime(datetime, sizeof(datetime), "%Y-%m-%d %H:%M:%S", &result);
int log_len;
if (is_tty) {
log_len = snprintf(
log_str, sizeof(log_str),
"\033[%dm[%04llX]%s [%s] [%s %s] %s \033[0m\n",
31 + ((unsigned int) (pid)) % 7, pid, log_colors[level],
datetime, log_levels[level], filepath,
str
);
} else {
log_len = snprintf(
log_str, sizeof(log_str),
"[%04llX] [%s] [%s %s] %s \n",
pid, datetime, log_levels[level], filepath,
str
);
}
int ret = write(STDERR_FILENO, log_str, log_len);
if (ret == -1) {
LOG_FATALF("serialize.c", "Could not write index descriptor: %s", strerror(errno));
}
}

47
src/log.h Normal file
View File

@@ -0,0 +1,47 @@
#ifndef SIST2_LOG_H
#define SIST2_LOG_H
#define LOG_MAX_LENGTH 8192
#define SIST_DEBUG 0
#define SIST_INFO 1
#define SIST_WARNING 2
#define SIST_ERROR 3
#define SIST_FATAL 4
#define LOG_DEBUGF(filepath, fmt, ...) \
if (LogCtx.very_verbose) {sist_logf(filepath, SIST_DEBUG, fmt, __VA_ARGS__);}
#define LOG_DEBUG(filepath, str) \
if (LogCtx.very_verbose) {sist_log(filepath, SIST_DEBUG, str);}
#define LOG_INFOF(filepath, fmt, ...) \
if (LogCtx.verbose) {sist_logf(filepath, SIST_INFO, fmt, __VA_ARGS__);}
#define LOG_INFO(filepath, str) \
if (LogCtx.verbose) {sist_log(filepath, SIST_INFO, str);}
#define LOG_WARNINGF(filepath, fmt, ...) \
if (LogCtx.verbose) {sist_logf(filepath, SIST_WARNING, fmt, __VA_ARGS__);}
#define LOG_WARNING(filepath, str) \
if (LogCtx.verbose) {sist_log(filepath, SIST_WARNING, str);}
#define LOG_ERRORF(filepath, fmt, ...) \
if (LogCtx.verbose) {sist_logf(filepath, SIST_ERROR, fmt, __VA_ARGS__);}
#define LOG_ERROR(filepath, str) \
if (LogCtx.verbose) {sist_log(filepath, SIST_ERROR, str);}
#define LOG_FATALF(filepath, fmt, ...) \
sist_logf(filepath, SIST_FATAL, fmt, __VA_ARGS__);\
exit(-1);
#define LOG_FATAL(filepath, str) \
sist_log(filepath, SIST_FATAL, str);\
exit(-1);
#include "sist.h"
void sist_logf(const char *filepath, int level, char *format, ...);
void vsist_logf(const char *filepath, int level, char *format, va_list ap);
void sist_log(const char *filepath, int level, char *str);
#endif

View File

@@ -1,30 +1,35 @@
#include "sist.h" #include "sist.h"
#include "ctx.h" #include "ctx.h"
#ifndef SIST_SCAN_ONLY #include <third-party/argparse/argparse.h>
#include <locale.h>
#include "cli.h"
#include "io/serialize.h"
#include "io/store.h"
#include "tpool.h"
#include "io/walk.h"
#include "index/elastic.h"
#include "web/serve.h"
#include "parsing/mime.h"
#include "parsing/parse.h"
#include "stats.h"
#define DESCRIPTION "Lightning-fast file system indexer and search tool." #define DESCRIPTION "Lightning-fast file system indexer and search tool."
#else
#define DESCRIPTION "Lightning-fast file system indexer and search tool. (SCAN ONLY)"
#endif
#define EPILOG "Made by simon987 <me@simon987.net>. Released under GPL-3.0" #define EPILOG "Made by simon987 <me@simon987.net>. Released under GPL-3.0"
static const char *const Version = "1.0.10"; static const char *const Version = "2.5.0";
static const char *const usage[] = { static const char *const usage[] = {
"sist2 scan [OPTION]... PATH", "sist2 scan [OPTION]... PATH",
"sist2 index [OPTION]... INDEX", "sist2 index [OPTION]... INDEX",
"sist2 web [OPTION]... INDEX...", "sist2 web [OPTION]... INDEX...",
"sist2 exec-script [OPTION]... INDEX",
NULL, NULL,
}; };
void global_init() {
#ifndef SIST_SCAN_ONLY
curl_global_init(CURL_GLOBAL_NOTHING);
#endif
av_log_set_level(AV_LOG_QUIET);
}
void init_dir(const char *dirpath) { void init_dir(const char *dirpath) {
char path[PATH_MAX]; char path[PATH_MAX];
snprintf(path, PATH_MAX, "%sdescriptor.json", dirpath); snprintf(path, PATH_MAX, "%sdescriptor.json", dirpath);
@@ -34,36 +39,140 @@ void init_dir(const char *dirpath) {
uuid_unparse(uuid, ScanCtx.index.desc.uuid); uuid_unparse(uuid, ScanCtx.index.desc.uuid);
time(&ScanCtx.index.desc.timestamp); time(&ScanCtx.index.desc.timestamp);
strcpy(ScanCtx.index.desc.version, Version); strcpy(ScanCtx.index.desc.version, Version);
strcpy(ScanCtx.index.desc.type, INDEX_TYPE_BIN);
write_index_descriptor(path, &ScanCtx.index.desc); write_index_descriptor(path, &ScanCtx.index.desc);
} }
void scan_print_header() { void scan_print_header() {
printf("sist2 V%s\n", Version); LOG_INFOF("main.c", "sist2 v%s", Version)
printf("---------------------\n");
printf("threads\t\t%d\n", ScanCtx.threads);
printf("tn_qscale\t%.1f/31.0\n", ScanCtx.tn_qscale);
printf("tn_size\t\t%dpx\n", ScanCtx.tn_size);
printf("output\t\t%s\n", ScanCtx.index.path);
} }
void _store(char *key, size_t key_len, char *buf, size_t buf_len) {
store_write(ScanCtx.index.store, key, key_len, buf, buf_len);
}
void _log(const char *filepath, int level, char *str) {
if (level == LEVEL_FATAL) {
sist_log(filepath, level, str);
exit(-1);
}
if (LogCtx.verbose) {
if (level == LEVEL_DEBUG) {
if (LogCtx.very_verbose) {
sist_log(filepath, level, str);
}
} else {
sist_log(filepath, level, str);
}
}
}
void _logf(const char *filepath, int level, char *format, ...) {
va_list args;
va_start(args, format);
if (level == LEVEL_FATAL) {
sist_logf(filepath, level, format, args);
exit(-1);
}
if (LogCtx.verbose) {
if (level == LEVEL_DEBUG) {
if (LogCtx.very_verbose) {
vsist_logf(filepath, level, format, args);
}
} else {
sist_logf(filepath, level, format, args);
}
}
va_end(args);
}
void initialize_scan_context(scan_args_t *args) {
// Arc
ScanCtx.arc_ctx.mode = args->archive_mode;
ScanCtx.arc_ctx.log = _log;
ScanCtx.arc_ctx.logf = _logf;
ScanCtx.arc_ctx.parse = (parse_callback_t) parse;
// Cbr
ScanCtx.cbr_ctx.log = _log;
ScanCtx.cbr_ctx.logf = _logf;
ScanCtx.cbr_ctx.store = _store;
ScanCtx.cbr_ctx.cbr_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/x-cbr");
// Ebook
pthread_mutex_init(&ScanCtx.ebook_ctx.mupdf_mutex, NULL);
ScanCtx.ebook_ctx.content_size = args->content_size;
ScanCtx.ebook_ctx.tn_size = args->size;
ScanCtx.ebook_ctx.tesseract_lang = args->tesseract_lang;
ScanCtx.ebook_ctx.tesseract_path = args->tesseract_path;
ScanCtx.ebook_ctx.log = _log;
ScanCtx.ebook_ctx.logf = _logf;
ScanCtx.ebook_ctx.store = _store;
// Font
ScanCtx.font_ctx.enable_tn = args->size > 0;
ScanCtx.font_ctx.log = _log;
ScanCtx.font_ctx.logf = _logf;
ScanCtx.font_ctx.store = _store;
// Media
ScanCtx.media_ctx.tn_qscale = args->quality;
ScanCtx.media_ctx.tn_size = args->size;
ScanCtx.media_ctx.log = _log;
ScanCtx.media_ctx.logf = _logf;
ScanCtx.media_ctx.store = _store;
ScanCtx.media_ctx.max_media_buffer = (long) args->max_memory_buffer * 1024 * 1024;
init_media();
// OOXML
ScanCtx.ooxml_ctx.content_size = args->content_size;
ScanCtx.ooxml_ctx.log = _log;
ScanCtx.ooxml_ctx.logf = _logf;
// MOBI
ScanCtx.mobi_ctx.content_size = args->content_size;
ScanCtx.mobi_ctx.log = _log;
ScanCtx.mobi_ctx.logf = _logf;
// TEXT
ScanCtx.text_ctx.content_size = args->content_size;
ScanCtx.text_ctx.log = _log;
ScanCtx.text_ctx.logf = _logf;
ScanCtx.threads = args->threads;
ScanCtx.depth = args->depth;
strncpy(ScanCtx.index.path, args->output, sizeof(ScanCtx.index.path));
strncpy(ScanCtx.index.desc.name, args->name, sizeof(ScanCtx.index.desc.name));
strncpy(ScanCtx.index.desc.root, args->path, sizeof(ScanCtx.index.desc.root));
strncpy(ScanCtx.index.desc.rewrite_url, args->rewrite_url, sizeof(ScanCtx.index.desc.rewrite_url));
ScanCtx.index.desc.root_len = (short) strlen(ScanCtx.index.desc.root);
ScanCtx.fast = args->fast;
// Raw
ScanCtx.raw_ctx.tn_qscale = args->quality;
ScanCtx.raw_ctx.tn_size = args->size;
ScanCtx.raw_ctx.log = _log;
ScanCtx.raw_ctx.logf = _logf;
ScanCtx.raw_ctx.store = _store;
}
void sist2_scan(scan_args_t *args) { void sist2_scan(scan_args_t *args) {
ScanCtx.tn_qscale = args->quality;
ScanCtx.tn_size = args->size;
ScanCtx.content_size = args->content_size;
ScanCtx.pool = tpool_create(args->threads, serializer_cleanup);
ScanCtx.threads = args->threads;
strncpy(ScanCtx.index.path, args->output, sizeof(ScanCtx.index.path));
strncpy(ScanCtx.index.desc.name, args->name, sizeof(ScanCtx.index.desc.name));
strcpy(ScanCtx.index.desc.root, args->path);
ScanCtx.index.desc.root_len = (short) strlen(ScanCtx.index.desc.root);
init_dir(ScanCtx.index.path);
ScanCtx.mime_table = mime_get_mime_table(); ScanCtx.mime_table = mime_get_mime_table();
ScanCtx.ext_table = mime_get_ext_table(); ScanCtx.ext_table = mime_get_ext_table();
initialize_scan_context(args);
init_dir(ScanCtx.index.path);
char store_path[PATH_MAX]; char store_path[PATH_MAX];
snprintf(store_path, PATH_MAX, "%sthumbs", ScanCtx.index.path); snprintf(store_path, PATH_MAX, "%sthumbs", ScanCtx.index.path);
mkdir(store_path, S_IWUSR | S_IRUSR | S_IXUSR); mkdir(store_path, S_IWUSR | S_IRUSR | S_IXUSR);
@@ -77,9 +186,18 @@ void sist2_scan(scan_args_t *args) {
DIR *dir = opendir(args->incremental); DIR *dir = opendir(args->incremental);
if (dir == NULL) { if (dir == NULL) {
perror("opendir"); LOG_FATALF("main.c", "Could not open original index for incremental scan: %s", strerror(errno))
return;
} }
char descriptor_path[PATH_MAX];
snprintf(descriptor_path, PATH_MAX, "%s/descriptor.json", args->incremental);
index_descriptor_t original_desc = read_index_descriptor(descriptor_path);
if (strcmp(original_desc.version, Version) != 0) {
LOG_FATALF("main.c", "Version mismatch! Index is %s but executable is %s/%s", original_desc.version,
Version, INDEX_VERSION_EXTERNAL)
}
struct dirent *de; struct dirent *de;
while ((de = readdir(dir)) != NULL) { while ((de = readdir(dir)) != NULL) {
if (strncmp(de->d_name, "_index_", sizeof("_index_") - 1) == 0) { if (strncmp(de->d_name, "_index_", sizeof("_index_") - 1) == 0) {
@@ -90,13 +208,17 @@ void sist2_scan(scan_args_t *args) {
} }
closedir(dir); closedir(dir);
printf("Loaded %d items in to mtime table.", g_hash_table_size(ScanCtx.original_table)); LOG_INFOF("main.c", "Loaded %d items in to mtime table.", g_hash_table_size(ScanCtx.original_table))
} }
ScanCtx.pool = tpool_create(args->threads, thread_cleanup);
tpool_start(ScanCtx.pool);
walk_directory_tree(ScanCtx.index.desc.root); walk_directory_tree(ScanCtx.index.desc.root);
tpool_wait(ScanCtx.pool); tpool_wait(ScanCtx.pool);
tpool_destroy(ScanCtx.pool); tpool_destroy(ScanCtx.pool);
generate_stats(&ScanCtx.index, args->treemap_threshold, ScanCtx.index.path);
if (args->incremental != NULL) { if (args->incremental != NULL) {
char dst_path[PATH_MAX]; char dst_path[PATH_MAX];
snprintf(store_path, PATH_MAX, "%sthumbs", args->incremental); snprintf(store_path, PATH_MAX, "%sthumbs", args->incremental);
@@ -123,10 +245,10 @@ void sist2_scan(scan_args_t *args) {
store_destroy(ScanCtx.index.store); store_destroy(ScanCtx.index.store);
} }
#ifndef SIST_SCAN_ONLY
void sist2_index(index_args_t *args) { void sist2_index(index_args_t *args) {
IndexCtx.es_url = args->es_url; IndexCtx.es_url = args->es_url;
IndexCtx.batch_size = args->batch_size;
if (!args->print) { if (!args->print) {
elastic_init(args->force_reset); elastic_init(args->force_reset);
@@ -136,15 +258,17 @@ void sist2_index(index_args_t *args) {
snprintf(descriptor_path, PATH_MAX, "%s/descriptor.json", args->index_path); snprintf(descriptor_path, PATH_MAX, "%s/descriptor.json", args->index_path);
index_descriptor_t desc = read_index_descriptor(descriptor_path); index_descriptor_t desc = read_index_descriptor(descriptor_path);
if (strcmp(desc.version, Version) != 0) {
fprintf(stderr, "Version mismatch! Index is v%s but executable is v%s\n", desc.version, Version); LOG_DEBUGF("main.c", "descriptor version %s (%s)", desc.version, desc.type)
return;
if (strcmp(desc.version, Version) != 0 && strcmp(desc.version, INDEX_VERSION_EXTERNAL) != 0) {
LOG_FATALF("main.c", "Version mismatch! Index is %s but executable is %s/%s", desc.version, Version,
INDEX_VERSION_EXTERNAL)
} }
DIR *dir = opendir(args->index_path); DIR *dir = opendir(args->index_path);
if (dir == NULL) { if (dir == NULL) {
perror("opendir"); LOG_FATALF("main.c", "Could not open index %s: %s", args->index_path, strerror(errno))
return;
} }
index_func f; index_func f;
@@ -159,20 +283,40 @@ void sist2_index(index_args_t *args) {
if (strncmp(de->d_name, "_index_", sizeof("_index_") - 1) == 0) { if (strncmp(de->d_name, "_index_", sizeof("_index_") - 1) == 0) {
char file_path[PATH_MAX]; char file_path[PATH_MAX];
snprintf(file_path, PATH_MAX, "%s/%s", args->index_path, de->d_name); snprintf(file_path, PATH_MAX, "%s/%s", args->index_path, de->d_name);
read_index(file_path, desc.uuid, f); read_index(file_path, desc.uuid, desc.type, f);
} }
} }
closedir(dir);
if (!args->print) { if (!args->print) {
elastic_flush(); elastic_flush();
destroy_indexer(); destroy_indexer(args->script, desc.uuid);
} }
} }
void sist2_exec_script(exec_args_t *args) {
LogCtx.verbose = TRUE;
char descriptor_path[PATH_MAX];
snprintf(descriptor_path, PATH_MAX, "%s/descriptor.json", args->index_path);
index_descriptor_t desc = read_index_descriptor(descriptor_path);
IndexCtx.es_url = args->es_url;
LOG_DEBUGF("main.c", "descriptor version %s (%s)", desc.version, desc.type)
execute_update_script(args->script, desc.uuid);
free(args->script);
}
void sist2_web(web_args_t *args) { void sist2_web(web_args_t *args) {
WebCtx.es_url = args->es_url; WebCtx.es_url = args->es_url;
WebCtx.index_count = args->index_count; WebCtx.index_count = args->index_count;
WebCtx.auth_user = args->auth_user;
WebCtx.auth_pass = args->auth_pass;
WebCtx.auth_enabled = args->auth_enabled;
for (int i = 0; i < args->index_count; i++) { for (int i = 0; i < args->index_count; i++) {
char *abs_path = abspath(args->indices[i]); char *abs_path = abspath(args->indices[i]);
@@ -192,51 +336,73 @@ void sist2_web(web_args_t *args) {
free(abs_path); free(abs_path);
} }
serve(args->bind, args->port); serve(args->listen_address);
} }
#endif
int main(int argc, const char *argv[]) { int main(int argc, const char *argv[]) {
setlocale(LC_ALL, "");
global_init();
scan_args_t *scan_args = scan_args_create(); scan_args_t *scan_args = scan_args_create();
#ifndef SIST_SCAN_ONLY
index_args_t *index_args = index_args_create(); index_args_t *index_args = index_args_create();
web_args_t *web_args = web_args_create(); web_args_t *web_args = web_args_create();
#endif exec_args_t *exec_args = exec_args_create();
char * common_es_url = NULL; int arg_version = 0;
char *common_es_url = NULL;
char *common_script_path = NULL;
struct argparse_option options[] = { struct argparse_option options[] = {
OPT_HELP(), OPT_HELP(),
OPT_BOOLEAN('v', "version", &arg_version, "Show version and exit"),
OPT_BOOLEAN(0, "verbose", &LogCtx.verbose, "Turn on logging"),
OPT_BOOLEAN(0, "very-verbose", &LogCtx.very_verbose, "Turn on debug messages"),
OPT_GROUP("Scan options"), OPT_GROUP("Scan options"),
OPT_INTEGER('t', "threads", &scan_args->threads, "Number of threads. DEFAULT=1"), OPT_INTEGER('t', "threads", &scan_args->threads, "Number of threads. DEFAULT=1"),
OPT_FLOAT('q', "quality", &scan_args->quality, OPT_FLOAT('q', "quality", &scan_args->quality,
"Thumbnail quality, on a scale of 1.0 to 31.0, 1.0 being the best. DEFAULT=15"), "Thumbnail quality, on a scale of 1.0 to 31.0, 1.0 being the best. DEFAULT=5"),
OPT_INTEGER(0, "size", &scan_args->size, "Thumbnail size, in pixels. DEFAULT=200"), OPT_INTEGER(0, "size", &scan_args->size,
"Thumbnail size, in pixels. Use negative value to disable. DEFAULT=500"),
OPT_INTEGER(0, "content-size", &scan_args->content_size, OPT_INTEGER(0, "content-size", &scan_args->content_size,
"Number of bytes to be extracted from text documents. DEFAULT=4096"), "Number of bytes to be extracted from text documents. Use negative value to disable. DEFAULT=32768"),
OPT_STRING(0, "incremental", &scan_args->incremental, OPT_STRING(0, "incremental", &scan_args->incremental,
"Reuse an existing index and only scan modified files."), "Reuse an existing index and only scan modified files."),
OPT_STRING('o', "output", &scan_args->output, "Output directory. DEFAULT=index.sist2/"), OPT_STRING('o', "output", &scan_args->output, "Output directory. DEFAULT=index.sist2/"),
OPT_STRING(0, "rewrite-url", &scan_args->rewrite_url, "Serve files from this url instead of from disk."), OPT_STRING(0, "rewrite-url", &scan_args->rewrite_url, "Serve files from this url instead of from disk."),
OPT_STRING(0, "name", &scan_args->name, "Index display name. DEFAULT: (name of the directory)"), OPT_STRING(0, "name", &scan_args->name, "Index display name. DEFAULT: (name of the directory)"),
OPT_INTEGER(0, "depth", &scan_args->depth, "Scan up to DEPTH subdirectories deep. "
"Use 0 to only scan files in PATH. DEFAULT: -1"),
OPT_STRING(0, "archive", &scan_args->archive, "Archive file mode (skip|list|shallow|recurse). "
"skip: Don't parse, list: only get file names as text, "
"shallow: Don't parse archives inside archives. DEFAULT: recurse"),
OPT_STRING(0, "ocr", &scan_args->tesseract_lang, "Tesseract language (use tesseract --list-langs to see "
"which are installed on your machine)"),
OPT_STRING('e', "exclude", &scan_args->exclude_regex, "Files that match this regex will not be scanned"),
OPT_BOOLEAN(0, "fast", &scan_args->fast, "Only index file names & mime type"),
OPT_STRING(0, "treemap-threshold", &scan_args->treemap_threshold_str, "Relative size threshold for treemap "
"(see USAGE.md). DEFAULT: 0.0005"),
OPT_INTEGER(0, "mem-buffer", &scan_args->max_memory_buffer,
"Maximum memory buffer size per thread in MB for files inside archives "
"(see USAGE.md). DEFAULT: 2000"),
#ifndef SIST_SCAN_ONLY
OPT_GROUP("Index options"), OPT_GROUP("Index options"),
OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url. DEFAULT=http://localhost:9200"), OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url with port. DEFAULT=http://localhost:9200"),
OPT_BOOLEAN('p', "print", &index_args->print, "Just print JSON documents to stdout."), OPT_BOOLEAN('p', "print", &index_args->print, "Just print JSON documents to stdout."),
OPT_STRING(0, "script-file", &common_script_path, "Path to user script."),
OPT_INTEGER(0, "batch-size", &index_args->batch_size, "Index batch size. DEFAULT: 100"),
OPT_BOOLEAN('f', "force-reset", &index_args->force_reset, "Reset Elasticsearch mappings and settings. " OPT_BOOLEAN('f', "force-reset", &index_args->force_reset, "Reset Elasticsearch mappings and settings. "
"(You must use this option the first time you use the index command)"), "(You must use this option the first time you use the index command)"),
OPT_GROUP("Web options"), OPT_GROUP("Web options"),
OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url. DEFAULT=http://localhost:9200"), OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url. DEFAULT=http://localhost:9200"),
OPT_STRING(0, "bind", &web_args->bind, "Listen on this address. DEFAULT=localhost"), OPT_STRING(0, "bind", &web_args->listen_address, "Listen on this address. DEFAULT=localhost:4090"),
OPT_STRING(0, "port", &web_args->port, "Listen on this port. DEFAULT=4090"), OPT_STRING(0, "auth", &web_args->credentials, "Basic auth in user:password format"),
#endif
OPT_GROUP("Exec-script options"),
OPT_STRING(0, "script-file", &common_script_path, "Path to user script."),
OPT_END(), OPT_END(),
}; };
@@ -246,30 +412,36 @@ int main(int argc, const char *argv[]) {
argparse_describe(&argparse, DESCRIPTION, EPILOG); argparse_describe(&argparse, DESCRIPTION, EPILOG);
argc = argparse_parse(&argparse, argc, argv); argc = argparse_parse(&argparse, argc, argv);
#ifndef SIST_SCAN_ONLY if (arg_version) {
printf(Version);
goto end;
}
if (LogCtx.very_verbose != 0) {
LogCtx.verbose = 1;
}
web_args->es_url = common_es_url; web_args->es_url = common_es_url;
index_args->es_url = common_es_url; index_args->es_url = common_es_url;
#endif index_args->script_path = common_script_path;
exec_args->script_path = common_script_path;
if (argc == 0) { if (argc == 0) {
argparse_usage(&argparse); argparse_usage(&argparse);
return 1; goto end;
} else if (strcmp(argv[0], "scan") == 0) { } else if (strcmp(argv[0], "scan") == 0) {
int err = scan_args_validate(scan_args, argc, argv); int err = scan_args_validate(scan_args, argc, argv);
if (err != 0) { if (err != 0) {
return err; goto end;
} }
sist2_scan(scan_args); sist2_scan(scan_args);
} } else if (strcmp(argv[0], "index") == 0) {
#ifndef SIST_SCAN_ONLY
else if (strcmp(argv[0], "index") == 0) {
int err = index_args_validate(index_args, argc, argv); int err = index_args_validate(index_args, argc, argv);
if (err != 0) { if (err != 0) {
return err; goto end;
} }
sist2_index(index_args); sist2_index(index_args);
@@ -277,17 +449,30 @@ int main(int argc, const char *argv[]) {
int err = web_args_validate(web_args, argc, argv); int err = web_args_validate(web_args, argc, argv);
if (err != 0) { if (err != 0) {
return err; goto end;
} }
sist2_web(web_args); sist2_web(web_args);
} } else if (strcmp(argv[0], "exec-script") == 0) {
#endif
else { int err = exec_args_validate(exec_args, argc, argv);
if (err != 0) {
goto end;
}
sist2_exec_script(exec_args);
} else {
fprintf(stderr, "Invalid command: '%s'\n", argv[0]); fprintf(stderr, "Invalid command: '%s'\n", argv[0]);
argparse_usage(&argparse); argparse_usage(&argparse);
return 1; goto end;
} }
printf("\n"); printf("\n");
end:
scan_args_destroy(scan_args);
index_args_destroy(index_args);
web_args_destroy(web_args);
exec_args_destroy(exec_args);
return 0; return 0;
} }

View File

@@ -1,215 +0,0 @@
#include "font.h"
#include "ft2build.h"
#include "freetype/freetype.h"
#include "src/ctx.h"
__thread FT_Library library = NULL;
typedef struct text_dimensions {
unsigned int width;
unsigned int height;
unsigned int baseline;
} text_dimensions_t;
typedef struct glyph {
unsigned int top;
unsigned int height;
unsigned int width;
unsigned int descent;
unsigned int ascent;
unsigned int advance_width;
unsigned char *pixmap;
} glyph_t;
__always_inline
int kerning_offset(char c, char pc, FT_Face face) {
FT_Vector kerning;
FT_Get_Kerning(face, c, pc, FT_KERNING_DEFAULT, &kerning);
return (int) (kerning.x / 64);
}
__always_inline
glyph_t ft_glyph_to_glyph(FT_GlyphSlot slot) {
glyph_t glyph;
glyph.pixmap = slot->bitmap.buffer;
glyph.width = slot->bitmap.width;
glyph.height = slot->bitmap.rows;
glyph.top = slot->bitmap_top;
glyph.advance_width = slot->advance.x / 64;
glyph.descent = MAX(0, glyph.height - glyph.top);
glyph.ascent = MAX(0, MAX(glyph.top, glyph.height) - glyph.descent);
return glyph;
}
__always_inline
glyph_t get_glyph(char character, FT_Face face) {
}
text_dimensions_t text_dimension(char *text, FT_Face face) {
text_dimensions_t dimensions;
dimensions.width = 0;
int num_chars = (int) strlen(text);
unsigned int max_ascent = 0;
unsigned int max_descent = 0;
char pc = 0;
for (int i = 0; i < num_chars; i++) {
char c = text[i];
FT_Load_Char(face, c, 0);
glyph_t glyph = ft_glyph_to_glyph(face->glyph);
max_descent = MAX(max_descent, glyph.descent);
max_ascent = MAX(max_ascent, glyph.ascent);
int kerning_x = kerning_offset(c, pc, face);
dimensions.width += MAX(glyph.advance_width, glyph.width) + kerning_x;
pc = c;
}
dimensions.height = max_ascent + max_descent;
dimensions.baseline = max_descent;
return dimensions;
}
void draw_glyph(glyph_t *glyph, int x, int y, struct text_dimensions text_info, unsigned char *bitmap) {
unsigned int src = 0;
unsigned int dst = y * text_info.width + x;
unsigned int row_offset = text_info.width - glyph->width;
unsigned int buf_len = text_info.width * text_info.height;
for (unsigned int sy = 0; sy < glyph->height; sy++) {
for (unsigned int sx = 0; sx < glyph->width; sx++) {
if (dst < buf_len) {
bitmap[dst] |= glyph->pixmap[src];
}
src++;
dst++;
}
dst += row_offset;
}
}
void bmp_format(dyn_buffer_t *buf, text_dimensions_t dimensions, const unsigned char *bitmap) {
dyn_buffer_write_short(buf, 0x4D42); // Magic
dyn_buffer_write_int(buf, 0); // Size placeholder
dyn_buffer_write_int(buf, 0x5157); //Reserved
dyn_buffer_write_int(buf, 14 + 40 + 256 * 4); // pixels offset
dyn_buffer_write_int(buf, 40); // DIB size
dyn_buffer_write_int(buf, (int) dimensions.width);
dyn_buffer_write_int(buf, (int) dimensions.height);
dyn_buffer_write_short(buf, 1); // Color planes
dyn_buffer_write_short(buf, 8); // bits per pixel
dyn_buffer_write_int(buf, 0); // compression
dyn_buffer_write_int(buf, 0); // Ignored
dyn_buffer_write_int(buf, 3800); // hres
dyn_buffer_write_int(buf, 3800); // vres
dyn_buffer_write_int(buf, 256); // Color count
dyn_buffer_write_int(buf, 0); // Ignored
// RGBA32 Color table (Grayscale)
for (int i = 255; i >= 0; i--) {
dyn_buffer_write_int(buf, i + (i << 8) + (i << 16));
}
// Pixel array: write from bottom to top, with rows padded to multiples of 4-bytes
for (int y = (int) dimensions.height - 1; y >= 0; y--) {
for (unsigned int x = 0; x < dimensions.width; x++) {
dyn_buffer_write_char(buf, (char) bitmap[y * dimensions.width + x]);
}
while (buf->cur % 4 != 0) {
dyn_buffer_write_char(buf, 0);
}
}
// Size
*(int *) ((char *) buf->buf + 2) = buf->cur;
}
void parse_font(const char *buf, size_t buf_len, document_t *doc) {
if (library == NULL) {
FT_Init_FreeType(&library);
}
FT_Face face;
FT_Error err = FT_New_Memory_Face(library, (unsigned char *) buf, buf_len, 0, &face);
if (err != 0) {
return;
}
char font_name[1024];
if (face->style_name == NULL || *(face->style_name) == '?') {
if (face->family_name == NULL) {
strcpy(font_name, "(null)");
} else {
strcpy(font_name, face->family_name);
}
} else {
snprintf(font_name, sizeof(font_name), "%s %s", face->family_name, face->style_name);
}
meta_line_t *meta_name = malloc(sizeof(meta_line_t) + strlen(font_name));
meta_name->key = MetaFontName;
strcpy(meta_name->strval, font_name);
APPEND_META(doc, meta_name)
int pixel = 64;
int num_chars = (int) strlen(font_name);
err = FT_Set_Pixel_Sizes(face, 0, pixel);
if (err != 0) {
return;
}
text_dimensions_t dimensions = text_dimension(font_name, face);
unsigned char *bitmap = calloc(dimensions.width * dimensions.height, 1);
FT_Vector pen;
pen.x = 0;
char pc = 0;
for (int i = 0; i < num_chars; i++) {
char c = font_name[i];
err = FT_Load_Char(face, c, FT_LOAD_NO_HINTING | FT_LOAD_RENDER);
if (err != 0) {
continue;
}
glyph_t glyph = ft_glyph_to_glyph(face->glyph);
pen.x += kerning_offset(c, pc, face);
pen.y = dimensions.height - glyph.ascent - dimensions.baseline;
draw_glyph(&glyph, pen.x, pen.y, dimensions, bitmap);
pen.x += glyph.advance_width;
pc = c;
}
dyn_buffer_t bmp_data = dyn_buffer_create();
bmp_format(&bmp_data, dimensions, bitmap);
store_write(ScanCtx.index.store, (char *) doc->uuid, sizeof(doc->uuid), (char *) bmp_data.buf, bmp_data.cur);
dyn_buffer_destroy(&bmp_data);
free(bitmap);
FT_Done_Face(face);
}

View File

@@ -1,9 +0,0 @@
#ifndef SIST2_FONT_H
#define SIST2_FONT_H
#include "src/sist.h"
void parse_font(const char * buf, size_t buf_len, document_t *doc);
#endif

View File

@@ -1,278 +0,0 @@
#include "src/sist.h"
#include "src/ctx.h"
AVCodecContext *alloc_jpeg_encoder(int dstW, int dstH, float qscale) {
AVCodec *jpeg_codec = avcodec_find_encoder(AV_CODEC_ID_MJPEG);
AVCodecContext *jpeg = avcodec_alloc_context3(jpeg_codec);
jpeg->width = dstW;
jpeg->height = dstH;
jpeg->time_base.den = 1000000;
jpeg->time_base.num = 1;
jpeg->i_quant_factor = qscale;
jpeg->pix_fmt = AV_PIX_FMT_YUVJ420P;
int ret = avcodec_open2(jpeg, jpeg_codec, NULL);
if (ret != 0) {
printf("Could not open jpeg encoder: %s!\n", av_err2str(ret));
return NULL;
}
return jpeg;
}
AVFrame *scale_frame(const AVCodecContext *decoder, const AVFrame *frame, int size) {
AVFrame *scaled_frame = av_frame_alloc();
int dstW;
int dstH;
if (frame->width <= size && frame->height <= size) {
dstW = frame->width;
dstH = frame->height;
} else {
double ratio = (double) frame->width / frame->height;
if (frame->width > frame->height) {
dstW = size;
dstH = (int) (size / ratio);
} else {
dstW = (int) (size * ratio);
dstH = size;
}
}
struct SwsContext *ctx = sws_getContext(
decoder->width, decoder->height, decoder->pix_fmt,
dstW, dstH, AV_PIX_FMT_YUVJ420P,
SWS_FAST_BILINEAR, 0, 0, 0
);
int dst_buf_len = avpicture_get_size(AV_PIX_FMT_YUVJ420P, dstW, dstH);
uint8_t *dst_buf = (uint8_t *) av_malloc(dst_buf_len);
avpicture_fill((AVPicture *) scaled_frame, dst_buf, AV_PIX_FMT_YUVJ420P, dstW, dstH);
sws_scale(ctx,
(const uint8_t *const *) frame->data, frame->linesize,
0, decoder->height,
scaled_frame->data, scaled_frame->linesize
);
scaled_frame->width = dstW;
scaled_frame->height = dstH;
scaled_frame->format = AV_PIX_FMT_YUV420P;
sws_freeContext(ctx);
return scaled_frame;
}
AVFrame *read_frame(AVFormatContext *pFormatCtx, AVCodecContext *decoder, int stream_idx) {
AVFrame *frame = av_frame_alloc();
AVPacket avPacket;
av_init_packet(&avPacket);
int receive_ret = -EAGAIN;
while (receive_ret == -EAGAIN) {
// Get video frame
while (1) {
int read_frame_ret = av_read_frame(pFormatCtx, &avPacket);
if (read_frame_ret != 0) {
if (read_frame_ret != AVERROR_EOF) {
fprintf(stderr, "Error reading frame: %s\n", av_err2str(read_frame_ret));
}
av_frame_free(&frame);
av_packet_unref(&avPacket);
return NULL;
}
//Ignore audio/other frames
if (avPacket.stream_index != stream_idx) {
av_packet_unref(&avPacket);
continue;
}
break;
}
// Feed it to decoder
int decode_ret = avcodec_send_packet(decoder, &avPacket);
if (decode_ret != 0) {
printf("Error decoding frame: %s\n", av_err2str(decode_ret));
}
av_packet_unref(&avPacket);
receive_ret = avcodec_receive_frame(decoder, frame);
}
return frame;
}
void append_audio_meta(AVFormatContext *pFormatCtx, document_t *doc) {
AVDictionaryEntry *tag = NULL;
while ((tag = av_dict_get(pFormatCtx->metadata, "", tag, AV_DICT_IGNORE_SUFFIX))) {
char *key = tag->key;
for (; *key; ++key) *key = (char) tolower(*key);
if (strcmp(tag->key, "artist") == 0) {
size_t len = strlen(tag->value);
meta_line_t *meta_tag = malloc(sizeof(meta_line_t) + len);
meta_tag->key = MetaArtist;
memcpy(meta_tag->strval, tag->value, len);
APPEND_META(doc, meta_tag)
} else if (strcmp(tag->key, "genre") == 0) {
size_t len = strlen(tag->value);
meta_line_t *meta_tag = malloc(sizeof(meta_line_t) + len);
meta_tag->key = MetaGenre;
memcpy(meta_tag->strval, tag->value, len);
APPEND_META(doc, meta_tag)
} else if (strcmp(tag->key, "title") == 0) {
size_t len = strlen(tag->value);
meta_line_t *meta_tag = malloc(sizeof(meta_line_t) + len);
meta_tag->key = MetaTitle;
memcpy(meta_tag->strval, tag->value, len);
APPEND_META(doc, meta_tag)
} else if (strcmp(tag->key, "album_artist") == 0) {
size_t len = strlen(tag->value);
meta_line_t *meta_tag = malloc(sizeof(meta_line_t) + len);
meta_tag->key = MetaAlbumArtist;
memcpy(meta_tag->strval, tag->value, len);
APPEND_META(doc, meta_tag)
} else if (strcmp(tag->key, "album") == 0) {
size_t len = strlen(tag->value);
meta_line_t *meta_tag = malloc(sizeof(meta_line_t) + len);
meta_tag->key = MetaAlbum;
memcpy(meta_tag->strval, tag->value, len);
APPEND_META(doc, meta_tag)
}
}
}
void parse_media(const char *filepath, document_t *doc) {
int video_stream = -1;
int audio_stream = -1;
AVFormatContext *pFormatCtx = avformat_alloc_context();
if (pFormatCtx == NULL) {
fprintf(stderr, "Could not allocate AVFormatContext! %s \n", filepath);
return;
}
int res = avformat_open_input(&pFormatCtx, filepath, NULL, NULL);
if (res < 0) {
printf("ERR%s %s\n", filepath, av_err2str(res));
return;
}
avformat_find_stream_info(pFormatCtx, NULL);
for (int i = (int) pFormatCtx->nb_streams - 1; i >= 0; i--) {
AVStream *stream = pFormatCtx->streams[i];
if (stream->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) {
if (audio_stream == -1) {
meta_line_t *meta_audio = malloc(sizeof(meta_line_t));
meta_audio->key = MetaMediaAudioCodec;
meta_audio->intval = stream->codecpar->codec_id;
APPEND_META(doc, meta_audio)
append_audio_meta(pFormatCtx, doc);
audio_stream = i;
}
} else if (stream->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) {
if (video_stream == -1) {
meta_line_t *meta_vid = malloc(sizeof(meta_line_t));
meta_vid->key = MetaMediaVideoCodec;
meta_vid->intval = stream->codecpar->codec_id;
APPEND_META(doc, meta_vid)
meta_line_t *meta_w = malloc(sizeof(meta_line_t));
meta_w->key = MetaWidth;
meta_w->intval = stream->codecpar->width;
APPEND_META(doc, meta_w)
meta_line_t *meta_h = malloc(sizeof(meta_line_t));
meta_h->key = MetaHeight;
meta_h->intval = stream->codecpar->height;
APPEND_META(doc, meta_h)
video_stream = i;
}
}
}
if (video_stream != -1) {
AVStream *stream = pFormatCtx->streams[video_stream];
if (stream->nb_frames > 1) {
//This is a video (not a still image)
meta_line_t *meta_duration = malloc(sizeof(meta_line_t));
meta_duration->key = MetaMediaDuration;
meta_duration->longval = pFormatCtx->duration / AV_TIME_BASE;
APPEND_META(doc, meta_duration)
meta_line_t *meta_bitrate = malloc(sizeof(meta_line_t));
meta_bitrate->key = MetaMediaBitrate;
meta_bitrate->intval = pFormatCtx->bit_rate;
APPEND_META(doc, meta_bitrate)
}
if (stream->codecpar->width <= 20 || stream->codecpar->height <= 20) {
avformat_close_input(&pFormatCtx);
avformat_free_context(pFormatCtx);
return;
}
// Decoder
AVCodec *video_codec = avcodec_find_decoder(stream->codecpar->codec_id);
AVCodecContext *decoder = avcodec_alloc_context3(video_codec);
avcodec_parameters_to_context(decoder, stream->codecpar);
avcodec_open2(decoder, video_codec, NULL);
//Seek
if (stream->nb_frames > 1 && stream->codecpar->codec_id != AV_CODEC_ID_GIF) {
int seek_ret = 0;
for (int i = 20; i >= 0; i--) {
seek_ret = av_seek_frame(pFormatCtx, video_stream,
stream->duration * 0.10, 0);
if (seek_ret == 0) {
break;
}
}
}
AVFrame *frame = read_frame(pFormatCtx, decoder, video_stream);
if (frame == NULL) {
avcodec_free_context(&decoder);
avformat_close_input(&pFormatCtx);
avformat_free_context(pFormatCtx);
return;
}
// Scale frame
AVFrame *scaled_frame = scale_frame(decoder, frame, ScanCtx.tn_size);
// Encode frame to jpeg
AVCodecContext *jpeg_encoder = alloc_jpeg_encoder(scaled_frame->width, scaled_frame->height, ScanCtx.tn_qscale);
avcodec_send_frame(jpeg_encoder, scaled_frame);
AVPacket jpeg_packet;
av_init_packet(&jpeg_packet);
avcodec_receive_packet(jpeg_encoder, &jpeg_packet);
// Save thumbnail
store_write(ScanCtx.index.store, (char *) doc->uuid, sizeof(doc->uuid), (char *) jpeg_packet.data, jpeg_packet.size);
av_packet_unref(&jpeg_packet);
av_frame_free(&frame);
av_free(*scaled_frame->data);
av_frame_free(&scaled_frame);
avcodec_free_context(&jpeg_encoder);
avcodec_free_context(&decoder);
}
avformat_close_input(&pFormatCtx);
avformat_free_context(pFormatCtx);
}

View File

@@ -1,11 +0,0 @@
#ifndef SIST2_MEDIA_H
#define SIST2_MEDIA_H
#include "src/sist.h"
#define MIN_VIDEO_SIZE 1024 * 64
void parse_media(const char * filepath, document_t *doc);
#endif

View File

@@ -1,10 +1,12 @@
#include "mime.h" #include "mime.h"
unsigned int mime_get_mime_by_ext(GHashTable *ext_table, const char * ext) { unsigned int mime_get_mime_by_ext(GHashTable *ext_table, const char * ext) {
char lower[64]; char lower[8];
char *p = lower; char *p = lower;
while ((*ext)) { int cnt = 0;
while ((*ext) != '\0' && cnt + 1 < sizeof(lower)) {
*p++ = (char)tolower(*ext++); *p++ = (char)tolower(*ext++);
cnt++;
} }
*p = '\0'; *p = '\0';
return (size_t) g_hash_table_lookup(ext_table, lower); return (size_t) g_hash_table_lookup(ext_table, lower);

View File

@@ -1,14 +1,14 @@
#ifndef SIST2_MIME_H #ifndef SIST2_MIME_H
#define SIST2_MIME_H #define SIST2_MIME_H
#include "src/sist.h" #include "../sist.h"
#define MAJOR_MIME(mime_id) (mime_id & 0x0FFF0000) >> 16 #define MAJOR_MIME(mime_id) (mime_id & 0x000F0000) >> 16
#define MIME_EMPTY 1 #define MIME_EMPTY 1
#define DONT_PARSE 0x80000000 #define DONT_PARSE 0x80000000
#define SHOULD_PARSE(mime_id) (mime_id & DONT_PARSE) != DONT_PARSE #define SHOULD_PARSE(mime_id) (ScanCtx.fast == 0 && (mime_id & DONT_PARSE) != DONT_PARSE && mime_id != 0)
#define PDF_MASK 0x40000000 #define PDF_MASK 0x40000000
#define IS_PDF(mime_id) (mime_id & PDF_MASK) == PDF_MASK #define IS_PDF(mime_id) (mime_id & PDF_MASK) == PDF_MASK
@@ -16,6 +16,24 @@
#define FONT_MASK 0x20000000 #define FONT_MASK 0x20000000
#define IS_FONT(mime_id) (mime_id & FONT_MASK) == FONT_MASK #define IS_FONT(mime_id) (mime_id & FONT_MASK) == FONT_MASK
#define ARC_MASK 0x10000000
#define IS_ARC(mime_id) (mime_id & ARC_MASK) == ARC_MASK
#define ARC_FILTER_MASK 0x08000000
#define IS_ARC_FILTER(mime_id) (mime_id & ARC_FILTER_MASK) == ARC_FILTER_MASK
#define DOC_MASK 0x04000000
#define IS_DOC(mime_id) (mime_id & DOC_MASK) == DOC_MASK
#define MOBI_MASK 0x02000000
#define IS_MOBI(mime_id) (mime_id & MOBI_MASK) == MOBI_MASK
#define MARKUP_MASK 0x01000000
#define IS_MARKUP(mime_id) (mime_id & MARKUP_MASK) == MARKUP_MASK
#define RAW_MASK 0x00800000
#define IS_RAW(mime_id) (mime_id & RAW_MASK) == RAW_MASK
enum major_mime { enum major_mime {
MimeInvalid = 0, MimeInvalid = 0,
MimeModel = 1, MimeModel = 1,

File diff suppressed because it is too large Load Diff

View File

@@ -1,34 +1,41 @@
#include "parse.h"
#include "src/sist.h" #include "src/sist.h"
#include "src/ctx.h" #include "src/ctx.h"
#include "mime.h"
#include "src/io/serialize.h"
__thread magic_t Magic; #include <magic.h>
void *read_all(parse_job_t *job, const char *buf, int bytes_read, int *fd) {
void *full_buf; #define MIN_VIDEO_SIZE 1024 * 64
#define MIN_IMAGE_SIZE 1024 * 2
if (job->info.st_size <= bytes_read) { int fs_read(struct vfile *f, void *buf, size_t size) {
full_buf = malloc(job->info.st_size);
memcpy(full_buf, buf, job->info.st_size); if (f->fd == -1) {
} else { f->fd = open(f->filepath, O_RDONLY);
if (*fd == -1) { if (f->fd == -1) {
*fd = open(job->filepath, O_RDONLY); LOG_ERRORF(f->filepath, "open(): [%d] %s", errno, strerror(errno))
if (*fd == -1) { return -1;
perror("open");
printf("%s\n", job->filepath);
free(job);
return NULL;
}
}
full_buf = malloc(job->info.st_size);
memcpy(full_buf, buf, bytes_read);
int ret = read(*fd, full_buf + bytes_read, job->info.st_size - bytes_read);
if (ret == -1) {
perror("read");
} }
} }
return full_buf; return read(f->fd, buf, size);
}
#define CLOSE_FILE(f) if (f.close != NULL) {f.close(&f);};
void fs_close(struct vfile *f) {
if (f->fd != -1) {
close(f->fd);
}
}
void fs_reset(struct vfile *f) {
if (f->fd != -1) {
lseek(f->fd, 0, SEEK_SET);
}
} }
void parse(void *arg) { void parse(void *arg) {
@@ -36,91 +43,132 @@ void parse(void *arg) {
parse_job_t *job = arg; parse_job_t *job = arg;
document_t doc; document_t doc;
if (incremental_get(ScanCtx.original_table, job->info.st_ino) == job->info.st_mtim.tv_sec) { int inc_ts = incremental_get(ScanCtx.original_table, job->vfile.info.st_ino);
incremental_mark_file_for_copy(ScanCtx.copy_table, job->info.st_ino); if (inc_ts != 0 && inc_ts == job->vfile.info.st_mtim.tv_sec) {
free(job); incremental_mark_file_for_copy(ScanCtx.copy_table, job->vfile.info.st_ino);
return; return;
} }
if (Magic == NULL) {
Magic = magic_open(MAGIC_MIME_TYPE);
magic_load(Magic, NULL);
}
doc.filepath = job->filepath; doc.filepath = job->filepath;
doc.ext = (short) job->ext; doc.ext = (short) job->ext;
doc.base = (short) job->base; doc.base = (short) job->base;
doc.meta_head = NULL; doc.meta_head = NULL;
doc.meta_tail = NULL; doc.meta_tail = NULL;
doc.mime = 0; doc.mime = 0;
doc.size = job->info.st_size; doc.size = job->vfile.info.st_size;
doc.ino = job->info.st_ino; doc.ino = job->vfile.info.st_ino;
doc.mtime = job->info.st_mtim.tv_sec; doc.mtime = job->vfile.info.st_mtim.tv_sec;
uuid_generate(doc.uuid); uuid_generate(doc.uuid);
char *buf[PARSE_BUF_SIZE]; char *buf[MAGIC_BUF_SIZE];
if (job->info.st_size == 0) { if (LogCtx.very_verbose) {
char uuid_str[UUID_STR_LEN];
uuid_unparse(doc.uuid, uuid_str);
LOG_DEBUGF(job->filepath, "Starting parse job {%s}", uuid_str)
}
if (job->vfile.info.st_size == 0) {
doc.mime = MIME_EMPTY; doc.mime = MIME_EMPTY;
} else if (*(job->filepath + job->ext) != '\0') { } else if (*(job->filepath + job->ext) != '\0' && (job->ext - job->base != 1)) {
doc.mime = mime_get_mime_by_ext(ScanCtx.ext_table, job->filepath + job->ext); doc.mime = mime_get_mime_by_ext(ScanCtx.ext_table, job->filepath + job->ext);
} }
int fd = -1;
int bytes_read = 0; int bytes_read = 0;
if (doc.mime == 0) { if (doc.mime == 0 && !ScanCtx.fast) {
// Get mime type with libmagic // Get mime type with libmagic
fd = open(job->filepath, O_RDONLY); if (!job->vfile.is_fs_file) {
if (fd == -1) { LOG_WARNING(job->filepath, "Guessing mime type with libmagic inside archive files is not currently supported");
perror("open"); goto abort;
free(job); }
bytes_read = job->vfile.read(&job->vfile, buf, MAGIC_BUF_SIZE);
if (bytes_read < 0) {
if (job->vfile.is_fs_file) {
LOG_ERRORF(job->filepath, "read(): [%d] %s", errno, strerror(errno))
} else {
LOG_ERRORF(job->filepath, "(virtual) read(): [%d] %s", bytes_read, archive_error_string(job->vfile.arc))
}
CLOSE_FILE(job->vfile)
return; return;
} }
bytes_read = read(fd, buf, PARSE_BUF_SIZE); magic_t magic = magic_open(MAGIC_MIME_TYPE);
magic_load(magic, NULL);
const char *magic_mime_str = magic_buffer(Magic, buf, bytes_read); const char *magic_mime_str = magic_buffer(magic, buf, bytes_read);
if (magic_mime_str != NULL) { if (magic_mime_str != NULL) {
doc.mime = mime_get_mime_by_string(ScanCtx.mime_table, magic_mime_str); doc.mime = mime_get_mime_by_string(ScanCtx.mime_table, magic_mime_str);
LOG_DEBUGF(job->filepath, "libmagic: %s", magic_mime_str);
if (doc.mime == 0) { if (doc.mime == 0) {
fprintf(stderr, "Couldn't find mime %s, %s!\n", magic_mime_str, job->filepath + job->base); LOG_WARNINGF(job->filepath, "Couldn't find mime %s", magic_mime_str);
} }
} }
job->vfile.reset(&job->vfile);
magic_close(magic);
} }
int mmime = MAJOR_MIME(doc.mime); int mmime = MAJOR_MIME(doc.mime);
if (!(SHOULD_PARSE(doc.mime))) { if (!(SHOULD_PARSE(doc.mime))) {
} else if ((mmime == MimeVideo && doc.size >= MIN_VIDEO_SIZE) || mmime == MimeAudio || mmime == MimeImage) { } else if (IS_RAW(doc.mime)) {
parse_media(job->filepath, &doc); parse_raw(&ScanCtx.raw_ctx, &job->vfile, &doc);
} else if ((mmime == MimeVideo && doc.size >= MIN_VIDEO_SIZE) ||
(mmime == MimeImage && doc.size >= MIN_IMAGE_SIZE) || mmime == MimeAudio) {
parse_media(&ScanCtx.media_ctx, &job->vfile, &doc);
} else if (IS_PDF(doc.mime)) { } else if (IS_PDF(doc.mime)) {
void *pdf_buf = read_all(job, (char *) buf, bytes_read, &fd); parse_ebook(&ScanCtx.ebook_ctx, &job->vfile, mime_get_mime_text(doc.mime), &doc);
parse_pdf(pdf_buf, doc.size, &doc);
if (pdf_buf != buf) { } else if (mmime == MimeText && ScanCtx.text_ctx.content_size > 0) {
free(pdf_buf); if (IS_MARKUP(doc.mime)) {
parse_markup(&ScanCtx.text_ctx, &job->vfile, &doc);
} else {
parse_text(&ScanCtx.text_ctx, &job->vfile, &doc);
} }
} else if (mmime == MimeText && ScanCtx.content_size > 0) {
parse_text(bytes_read, &fd, (char *) buf, &doc);
} else if (IS_FONT(doc.mime)) { } else if (IS_FONT(doc.mime)) {
void *font_buf = read_all(job, (char *) buf, bytes_read, &fd); parse_font(&ScanCtx.font_ctx, &job->vfile, &doc);
parse_font(font_buf, doc.size, &doc);
if (font_buf != buf) { } else if (
free(font_buf); ScanCtx.arc_ctx.mode != ARC_MODE_SKIP && (
} IS_ARC(doc.mime) ||
(IS_ARC_FILTER(doc.mime) && should_parse_filtered_file(doc.filepath, doc.ext))
)) {
parse_archive(&ScanCtx.arc_ctx, &job->vfile, &doc);
} else if (ScanCtx.ooxml_ctx.content_size > 0 && IS_DOC(doc.mime)) {
parse_ooxml(&ScanCtx.ooxml_ctx, &job->vfile, &doc);
} else if (is_cbr(&ScanCtx.cbr_ctx, doc.mime)) {
parse_cbr(&ScanCtx.cbr_ctx, &job->vfile, &doc);
} else if (IS_MOBI(doc.mime)) {
parse_mobi(&ScanCtx.mobi_ctx, &job->vfile, &doc);
}
abort:
//Parent meta
if (!uuid_is_null(job->parent)) {
meta_line_t *meta_parent = malloc(sizeof(meta_line_t) + UUID_STR_LEN + 1);
meta_parent->key = MetaParent;
uuid_unparse(job->parent, meta_parent->str_val);
APPEND_META((&doc), meta_parent)
} }
write_document(&doc); write_document(&doc);
if (fd != -1) { CLOSE_FILE(job->vfile)
close(fd); }
}
void cleanup_parse() {
free(job); // noop
} }

View File

@@ -1,10 +1,16 @@
#ifndef SIST2_PARSE_H #ifndef SIST2_PARSE_H
#define SIST2_PARSE_H #define SIST2_PARSE_H
#include "src/sist.h" #include "../sist.h"
#define PARSE_BUF_SIZE 4096 #define MAGIC_BUF_SIZE 4096 * 6
int fs_read(struct vfile *f, void *buf, size_t size);
void fs_close(struct vfile *f);
void fs_reset(struct vfile *f);
void parse(void *arg); void parse(void *arg);
void cleanup_parse();
#endif #endif

View File

@@ -1,151 +0,0 @@
#include <src/ctx.h>
#include "pdf.h"
#include "src/ctx.h"
fz_page *render_cover(fz_context *ctx, document_t *doc, fz_document *fzdoc) {
fz_page *cover = fz_load_page(ctx, fzdoc, 0);
fz_rect bounds = fz_bound_page(ctx, cover);
float scale;
float w = (float) bounds.x1 - bounds.x0;
float h = (float) bounds.y1 - bounds.y0;
if (w > h) {
scale = (float) ScanCtx.tn_size / w;
} else {
scale = (float) ScanCtx.tn_size / h;
}
fz_matrix m = fz_scale(scale, scale);
bounds = fz_transform_rect(bounds, m);
fz_irect bbox = fz_round_rect(bounds);
fz_pixmap *pixmap = fz_new_pixmap_with_bbox(ctx, ctx->colorspace->rgb, bbox, NULL, 0);
fz_clear_pixmap_with_value(ctx, pixmap, 0xFF);
fz_device *dev = fz_new_draw_device(ctx, m, pixmap);
pthread_mutex_lock(&ScanCtx.mupdf_mu);
fz_try(ctx)
fz_run_page(ctx, cover, dev, fz_identity, NULL);
fz_always(ctx)
pthread_mutex_unlock(&ScanCtx.mupdf_mu);
fz_catch(ctx)
fz_rethrow(ctx);
fz_drop_device(ctx, dev);
fz_buffer *fzbuf = fz_new_buffer_from_pixmap_as_png(ctx, pixmap, fz_default_color_params);
unsigned char *tn_buf;
size_t tn_len = fz_buffer_storage(ctx, fzbuf, &tn_buf);
store_write(ScanCtx.index.store, (char *) doc->uuid, sizeof(doc->uuid), (char *) tn_buf, tn_len);
fz_drop_pixmap(ctx, pixmap);
fz_drop_buffer(ctx, fzbuf);
return cover;
}
void fz_noop_callback(__attribute__((unused)) void *user, __attribute__((unused)) const char *message) {}
void parse_pdf(void *buf, size_t buf_len, document_t *doc) {
static int mu_is_initialized = 0;
if (!mu_is_initialized) {
pthread_mutex_init(&ScanCtx.mupdf_mu, NULL);
mu_is_initialized = 1;
}
fz_context *ctx = fz_new_context(NULL, NULL, FZ_STORE_UNLIMITED);
fz_stream *stream = NULL;
fz_document *fzdoc = NULL;
fz_var(stream);
fz_var(fzdoc);
fz_try(ctx)
{
fz_disable_icc(ctx);
fz_register_document_handlers(ctx);
//disable warnings
ctx->warn.print = fz_noop_callback;
ctx->error.print = fz_noop_callback;
stream = fz_open_memory(ctx, buf, buf_len);
fzdoc = fz_open_document_with_stream(ctx, mime_get_mime_text(doc->mime), stream);
int page_count = fz_count_pages(ctx, fzdoc);
fz_page *cover = render_cover(ctx, doc, fzdoc);
fz_stext_options opts;
text_buffer_t text_buf = text_buffer_create(ScanCtx.content_size);
for (int current_page = 0; current_page < page_count; current_page++) {
fz_page *page;
if (current_page == 0) {
page = cover;
} else {
page = fz_load_page(ctx, fzdoc, current_page);
}
fz_stext_page *stext = fz_new_stext_page(ctx, fz_bound_page(ctx, page));
fz_device *dev = fz_new_stext_device(ctx, stext, &opts);
pthread_mutex_lock(&ScanCtx.mupdf_mu);
fz_try(ctx)
fz_run_page_contents(ctx, page, dev, fz_identity, NULL);
fz_always(ctx)
pthread_mutex_unlock(&ScanCtx.mupdf_mu);
fz_catch(ctx)
fz_rethrow(ctx);
fz_drop_device(ctx, dev);
fz_stext_block *block = stext->first_block;
while (block != NULL) {
if (block->type != FZ_STEXT_BLOCK_TEXT) {
block = block->next;
continue;
}
fz_stext_line *line = block->u.t.first_line;
while (line != NULL) {
fz_stext_char *c = line->first_char;
while (c != NULL) {
if (text_buffer_append_char(&text_buf, c->c) == TEXT_BUF_FULL) {
fz_drop_page(ctx, page);
fz_drop_stext_page(ctx, stext);
goto write_loop_end;
}
c = c->next;
}
line = line->next;
}
block = block->next;
}
fz_drop_page(ctx, page);
fz_drop_stext_page(ctx, stext);
}
write_loop_end:;
text_buffer_terminate_string(&text_buf);
meta_line_t *meta_content = malloc(sizeof(meta_line_t) + text_buf.dyn_buffer.cur);
meta_content->key = MetaContent;
memcpy(meta_content->strval, text_buf.dyn_buffer.buf, text_buf.dyn_buffer.cur);
text_buffer_destroy(&text_buf);
APPEND_META(doc, meta_content)
}
fz_always(ctx)
{
fz_drop_stream(ctx, stream);
fz_drop_document(ctx, fzdoc);
fz_drop_context(ctx);
} fz_catch(ctx) {
fprintf(stderr, "Error %s %s\n", doc->filepath, ctx->error.message);
}
}

View File

@@ -1,9 +0,0 @@
#ifndef SIST2_PDF_H
#define SIST2_PDF_H
#include "src/sist.h"
void parse_pdf(void *buf, size_t buf_len, document_t *doc);
#endif

View File

@@ -1,43 +0,0 @@
#include "text.h"
#include "src/ctx.h"
void parse_text(int bytes_read, int *fd, char *buf, document_t *doc) {
char *intermediate_buf;
int intermediate_buf_len;
if (bytes_read == doc->size || bytes_read >= ScanCtx.content_size) {
int to_copy = MIN(bytes_read, ScanCtx.content_size);
intermediate_buf = malloc(to_copy);
intermediate_buf_len = to_copy;
memcpy(intermediate_buf, buf, to_copy);
} else {
if (*fd == -1) {
*fd = open(doc->filepath, O_RDONLY);
}
int to_read = MIN(ScanCtx.content_size, doc->size) - bytes_read;
intermediate_buf = malloc(to_read + bytes_read);
intermediate_buf_len = to_read + bytes_read;
if (bytes_read != 0) {
memcpy(intermediate_buf, buf, bytes_read);
}
read(*fd, intermediate_buf + bytes_read, to_read);
}
text_buffer_t text_buf = text_buffer_create(ScanCtx.content_size);
for (int i = 0; i < intermediate_buf_len; i++) {
text_buffer_append_char(&text_buf, *(intermediate_buf + i));
}
text_buffer_terminate_string(&text_buf);
meta_line_t *meta = malloc(sizeof(meta_line_t) + text_buf.dyn_buffer.cur);
meta->key = MetaContent;
strcpy(meta->strval, text_buf.dyn_buffer.buf);
text_buffer_destroy(&text_buf);
free(intermediate_buf);
APPEND_META(doc, meta)
}

View File

@@ -1,8 +0,0 @@
#ifndef SIST2_TEXT_H
#define SIST2_TEXT_H
#include "src/sist.h"
void parse_text(int bytes_read, int *fd, char *buf, document_t *doc);
#endif

View File

@@ -1,61 +1,51 @@
#ifndef SIST_H #ifndef SIST_H
#define SIST_H #define SIST_H
#define UUID_STR_LEN 37 #ifndef FALSE
#define FALSE (0)
#define BOOL int
#endif
#include <glib-2.0/glib.h> #ifndef TRUE
#include <unistd.h> #define TRUE (!FALSE)
#endif
#undef MAX
#define MAX(a, b) (((a) > (b)) ? (a) : (b))
#undef MIN
#define MIN(a, b) (((a) < (b)) ? (a) : (b))
#ifndef PATH_MAX
#define PATH_MAX 4096
#endif
#undef ABS
#define ABS(a) (((a) < 0) ? -(a) : (a))
#define UUID_STR_LEN 37
#define UNUSED(x) __attribute__((__unused__)) x
#include "util.h"
#include "log.h"
#include "types.h"
#include "libscan/scan.h"
#include <cjson/cJSON.h>
#include <string.h>
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <stdarg.h>
#include <unistd.h>
#include <fcntl.h> #include <fcntl.h>
#include <ftw.h> #include <dirent.h>
#include <uuid.h>
#include <magic.h>
#include <libavformat/avformat.h>
#include <libswscale/swscale.h>
#include <libswresample/swresample.h>
#include <libavcodec/avcodec.h>
#include <ctype.h>
#include <mupdf/fitz.h>
#include <mupdf/pdf.h>
#include "argparse/argparse.h"
#include <time.h>
#include <limits.h>
#include <pthread.h>
#include <sys/stat.h> #include <sys/stat.h>
#include <wordexp.h> #include <sys/types.h>
#include <errno.h>
#include <ctype.h>
#ifndef SIST_SCAN_ONLY
#include <onion/onion.h>
#include <onion/handler.h>
#include <onion/block.h>
#include <onion/shortcuts.h>
#include <curl/curl.h>
#endif
#include "cJSON/cJSON.h"
#include "types.h"
#include "tpool.h"
#include "util.h"
#include "io/store.h"
#include "io/serialize.h"
#include "io/walk.h"
#include "parsing/parse.h"
#include "parsing/mime.h"
#include "parsing/text.h"
#include "parsing/pdf.h"
#include "parsing/media.h"
#include "parsing/font.h"
#include "cli.h"
#ifndef SIST_SCAN_ONLY
#include "src/index/elastic.h"
#include "index/web.h"
#include "web/serve.h"
#endif
;
#endif #endif

4
src/static/css/autocomplete.min.css vendored Normal file
View File

@@ -0,0 +1,4 @@
.autocomplete-suggestions { text-align: left; cursor: default; border: 1px solid #ccc; border-top: 0; background: #fff; box-shadow: -1px 1px 3px rgba(0,0,0,.1); position: absolute; display: none; z-index: 9999; max-height: 254px; overflow: hidden; overflow-y: auto; box-sizing: border-box; }
.autocomplete-suggestion { position: relative; padding: 0 .6em; line-height: 23px; white-space: nowrap; overflow: hidden; text-overflow: ellipsis; font-size: 1.02em; color: #333; }
.autocomplete-suggestion b { font-weight: normal; color: #1f8dd6; }
.autocomplete-suggestion.selected { background: #f0f0f0; }

1
src/static/css/bricklayer.min.css vendored Normal file
View File

@@ -0,0 +1 @@
.bricklayer{display:-webkit-box;display:-webkit-flex;display:-ms-flexbox;display:flex;-webkit-box-align:start;-webkit-align-items:flex-start;-ms-flex-align:start;align-items:flex-start;-webkit-box-pack:center;-webkit-justify-content:center;-ms-flex-pack:center;justify-content:center;-webkit-flex-wrap:wrap;-ms-flex-wrap:wrap;flex-wrap:wrap}.bricklayer-column-sizer{width:100%;display:none}@media screen and (min-width:640px){.bricklayer-column-sizer{width:50%}}@media screen and (min-width:980px){.bricklayer-column-sizer{width:33.333%}}@media screen and (min-width:1200px){.bricklayer-column-sizer{width:25%}}.bricklayer-column{-webkit-box-flex:1;-webkit-flex:1;-ms-flex:1;flex:1;padding-left:5px;padding-right:5px}

514
src/static/css/dark.css Normal file
View File

@@ -0,0 +1,514 @@
*:focus {
outline: 0;
}
.info-icon {
width: 1rem;
margin-right: 0.2rem;
cursor: pointer;
line-height: 1rem;
height: 1rem;
background-image: url();
filter: brightness(65%);
}
.info-icon:hover {
color: inherit;
}
.modal-title {
max-width: calc(100% - 2rem);
overflow: hidden;
text-overflow: ellipsis;
}
.path-row {
display: -ms-flexbox;
display: flex;
-ms-flex-align: start;
align-items: flex-start;
}
.tag-container {
margin-left: 0.3rem;
}
.path-line {
color: #BBB;
text-overflow: ellipsis;
overflow: hidden;
white-space: nowrap;
}
a {
color: #00BCD4;
}
body {
overflow-y: scroll;
background: black;
}
.progress {
margin-top: 1em;
}
.card, .modal-content {
margin-top: 1em;
background: #212121;
color: #e0e0e0;
border-radius: 1px;
border: none;
}
.table {
color: #e0e0e0;
}
.table td, .table th {
border: none;
}
.table thead th {
border-bottom: 1px solid #646464;
}
.modal-header .close {
color: #e0e0e0;
text-shadow: none;
}
.modal-header {
border-bottom: 1px solid #646464;
}
.sub-document {
background: #37474F !important;
}
.list-group-item.sub-document {
border-top: 1px solid #646464 !important;
}
.sub-document .text-muted {
color: #8a949c !important;
}
.list-group-item {
background: #212121;
color: #e0e0e0;
border-top: 1px solid #424242;
border-bottom: none;
border-left: none;
border-right: none;
padding: .25rem 0.5rem;
}
.list-group-item:first-child {
border-top: none;
}
.navbar-brand {
font-size: 1.75rem;
padding: 0;
color: #f5f5f5;
}
.navbar {
background: #546b7a;
}
a:hover,.btn:hover {
color: #fff;
}
.navbar span {
color: #eee;
}
.document {
padding: 0.5rem;
}
.document p {
margin-bottom: 0;
}
.document:hover p {
text-decoration: underline;
}
.badge-video {
color: #FFFFFF;
background-color: #F27761;
}
.badge-image {
color: #FFFFFF;
background-color: #AA99C9;
}
.badge-audio {
color: #FFFFFF;
background-color: #00ADEF;
}
.badge-resolution {
color: #212529;
background-color: #B0BEC5;
}
.badge-text {
color: #FFFFFF;
background-color: #FAAB3C;
}
.card-img-overlay {
pointer-events: none;
padding: 0.75rem;
bottom: unset;
top: 0;
left: unset;
right: unset;
}
.file-title {
width: 100%;
line-height: 1rem;
height: 1.1rem;
font-size: 10pt;
white-space: nowrap;
text-overflow: ellipsis;
overflow: hidden;
color: #00BCD4;
}
.badge {
margin-right: 3px;
}
.badge-user {
color: #212529;
background-color: #e0e0e0;
}
.fit {
display: block;
min-width: 64px;
max-width: 100%;
max-height: 175px;
margin: 0 auto 0;
padding: 3px 3px 0;
width: auto;
height: auto;
}
.fit-sm {
display: block;
max-width: 64px;
max-height: 64px;
margin: 0 auto;
width: auto;
height: auto;
}
.audio-fit {
height: 39px;
vertical-align: bottom;
display: inline;
width: 100%;
}
@media screen and (min-width: 1500px) {
.container {
max-width: 1440px;
}
.bricklayer-column-sizer {
width: 20% !important;
}
.bricklayer-column {
max-width: 20%;
}
}
@media screen and (min-width: 1800px) {
.container {
max-width: 1550px;
}
}
mark {
background: rgba(251, 191, 41, 0.25);
border-radius: 0;
padding: 1px 0;
color: inherit;
}
.content-div mark {
background: rgba(251, 191, 41, 0.40);
color: white;
}
.content-div {
font-family: SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace;
font-size: 13px;
padding: 1em;
background-color: #37474F;
border: 1px solid #616161;
border-radius: 4px;
margin: 3px;
white-space: normal;
color: rgb(224, 224, 224);
overflow: hidden;
}
.irs-single, .irs-from, .irs-to {
font-size: 13px;
background-color: #00BCD4;
}
.irs-slider {
cursor: col-resize;
}
.irs {
margin-top: 1em;
margin-bottom: 1em;
}
.custom-select {
overflow: auto;
background-color: #37474F;
border: 1px solid #616161;
color: #bdbdbd;
}
.custom-select:focus {
border-color: #757575;
outline: 0;
box-shadow: 0 0 0 .2rem rgba(0, 123, 255, .25);
}
option {
outline: none;
}
.form-control {
background-color: #37474F;
border: 1px solid #616161;
color: #fff;
}
.form-control:focus {
background-color: #546E7A;
color: #fff;
}
.input-group-text {
background: #263238;
border: 1px solid #616161;
color: #dbdbdb;
}
::placeholder {
color: #BDBDBD !important;
opacity: 1;
}
.inspire-tree .selected > .wholerow, .inspire-tree .selected > .title-wrap:hover + .wholerow {
background: none;
}
.inspire-tree .icon-expand::before, .inspire-tree .icon-collapse::before {
background-color: black;
}
.inspire-tree .title {
color: #eee;
}
.inspire-tree {
font-weight: 400;
font-size: 14px;
font-family: Helvetica, Nueue, Verdana, sans-serif;
max-height: 350px;
overflow: auto;
}
.page-indicator {
line-height: 1rem;
padding: 0.5rem;
background: #212121;
color: #eee;
margin-top: 1em;
}
.btn-xs {
padding: .1rem .3rem;
font-size: .875rem;
border-radius: .2rem;
}
.btn {
color: #eee;
}
.nav-tabs .nav-link {
color: #e0e0e0;
}
.nav-tabs .nav-item.show .nav-link, .nav-tabs .nav-link.active {
background-color: #212121;
border-color: #616161 #616161 #212121;
color: #e0e0e0;
}
.nav-tabs .nav-link:focus, .nav-tabs .nav-link:focus {
border-color: #616161 #616161 #212121;
color: #e0e0e0;
}
.nav-tabs .nav-link:focus, .nav-tabs .nav-link:hover {
border-color: #e0e0e0 #e0e0e0 #212121;
color: #e0e0e0;
}
.nav-tabs {
border-bottom: #616161;
}
.nav {
margin-top: 0.5rem;
}
@media (max-width: 800px) {
#treeTabs {
flex-basis: inherit;
flex-grow: inherit;
}
}
.list-group {
margin-top: 1em;
}
.wrapper-sm {
min-width: 64px;
}
.media-expanded {
display: inherit;
}
.media-expanded .fit {
max-height: 250px;
}
@media (max-width: 650px) {
.media-expanded .fit {
max-height: none;
}
.tagline {
display: none;
}
}
.version {
color: #00BCD4;
margin-left: -18px;
margin-top: -14px;
font-size: 11px;
}
@media (min-width: 800px) {
.small-btn {
display: none;
}
.large-btn {
display: inherit;
}
}
@media (max-width: 801px) {
.small-btn {
display: inherit;
}
.large-btn {
display: none;
}
}
#searchBar {
border-right: none;
}
#pathTree .title {
cursor: pointer;
}
svg {
fill: white;
}
.play {
position: absolute;
width: 50px;
height: 50px;
left: 50%;
top: 50%;
transform: translate(-50%, -50%);
pointer-events: none;
}
.play svg {
fill: rgba(255, 255, 255, 0.7);
}
.img-wrapper:hover svg {
fill: rgba(255, 255, 255, 1);
}
.pointer {
cursor: pointer;
}
.stats-card {
text-align: center;
margin-top: 1em;
padding: 1em;
box-shadow: 0 .125rem .25rem rgba(0, 0, 0, .08) !important;
border-radius: 0;
border: none;
background: #212121;
}
.graph {
display: inline-block;
width: 40%;
}
.full-screen {
position: absolute;
left: 0;
width: 100%;
}
.stats-btn {
float: right;
margin-bottom: 10px;
}
#graphs-card svg text {
fill: #eee;
}

1
src/static/css/jquery.toast.min.css vendored Normal file
View File

@@ -0,0 +1 @@
.jq-toast-wrap,.jq-toast-wrap *{margin:0;padding:0}.jq-toast-wrap{display:block;position:fixed;width:250px;pointer-events:none!important;letter-spacing:normal;z-index:9000!important}.jq-toast-wrap.bottom-left{bottom:20px;left:20px}.jq-toast-wrap.bottom-right{bottom:20px;right:40px}.jq-toast-wrap.top-left{top:20px;left:20px}.jq-toast-wrap.top-right{top:20px;right:40px}.jq-toast-single{display:block;width:100%;padding:10px;margin:0 0 5px;border-radius:4px;font-size:12px;font-family:arial,sans-serif;line-height:17px;position:relative;pointer-events:all!important;background-color:#444;color:#fff}.jq-toast-single h2{font-family:arial,sans-serif;font-size:14px;margin:0 0 7px;background:0 0;color:inherit;line-height:inherit;letter-spacing:normal}.jq-toast-single a{color:#eee;text-decoration:none;font-weight:700;border-bottom:1px solid #fff;padding-bottom:3px;font-size:12px}.jq-toast-single ul{margin:0 0 0 15px;background:0 0;padding:0}.jq-toast-single ul li{list-style-type:disc!important;line-height:17px;background:0 0;margin:0;padding:0;letter-spacing:normal}.close-jq-toast-single{position:absolute;top:3px;right:7px;font-size:14px;cursor:pointer}.jq-toast-loader{display:block;position:absolute;top:-2px;height:5px;width:0;left:0;border-radius:5px;background:red}.jq-toast-loaded{width:100%}.jq-has-icon{padding:10px 10px 10px 50px;background-repeat:no-repeat;background-position:10px}.jq-icon-info{background-image:url();background-color:#31708f;color:#d9edf7;border-color:#bce8f1}.jq-icon-warning{background-image:url();background-color:#8a6d3b;color:#fcf8e3;border-color:#faebcc}.jq-icon-error{background-image:url();background-color:#a94442;color:#f2dede;border-color:#ebccd1}.jq-icon-success{background-image:url();color:#dff0d8;background-color:#3c763d;border-color:#d6e9c6}

377
src/static/css/light.css Normal file
View File

@@ -0,0 +1,377 @@
*:focus {
outline: 0;
}
.info-icon {
width: 1rem;
margin-right: 0.2rem;
cursor: pointer;
line-height: 1rem;
height: 1rem;
background-image: url();
filter: brightness(45%);
}
.info-icon:hover {
color: inherit;
}
.modal-title {
max-width: calc(100% - 2rem);
overflow: hidden;
text-overflow: ellipsis;
}
.path-row {
display: -ms-flexbox;
display: flex;
-ms-flex-align: start;
align-items: flex-start;
}
.tag-container {
margin-left: 0.3rem;
}
.path-line {
color: #444;
text-overflow: ellipsis;
overflow: hidden;
white-space: nowrap;
}
body {
overflow-y: scroll;
}
.progress {
margin-top: 1em;
}
.card {
margin-top: 1em;
box-shadow: 0 .125rem .25rem rgba(0, 0, 0, .08) !important;
border-radius: 0;
border: none;
}
.sub-document {
background: #AB47BC1F !important;
}
.navbar-brand {
font-size: 1.75rem;
padding: 0;
}
.navbar {
background: #F7F7F7;
border-bottom: solid 1px #dfdfdf;
}
.document {
padding: 0.5rem;
}
.document p {
margin-bottom: 0;
}
.document:hover p {
text-decoration: underline;
}
.badge-video {
color: #FFFFFF;
background-color: #F27761;
}
.badge-image {
color: #FFFFFF;
background-color: #AA99C9;
}
.badge-audio {
color: #FFFFFF;
background-color: #00ADEF;
}
.badge-resolution {
color: #212529;
background-color: #FFC107;
}
.badge-user {
color: #212529;
background-color: #e0e0e0;
}
.badge-text {
color: #FFFFFF;
background-color: #FAAB3C;
}
.card-img-overlay {
pointer-events: none;
padding: 0.75rem;
bottom: unset;
top: 0;
left: unset;
right: unset;
}
.file-title {
width: 100%;
line-height: 1rem;
height: 1.1rem;
font-size: 10pt;
white-space: nowrap;
text-overflow: ellipsis;
overflow: hidden;
}
.badge {
margin-right: 3px;
}
.fit {
display: block;
min-width: 64px;
max-width: 100%;
max-height: 175px;
margin: 0 auto 0;
padding: 3px 3px 0 3px;
width: auto;
height: auto;
}
.fit-sm {
display: block;
max-width: 64px;
max-height: 64px;
margin: 0 auto 0;
width: auto;
height: auto;
}
.audio-fit {
height: 39px;
vertical-align: bottom;
display: inline;
width: 100%;
}
@media screen and (max-width: 1200px) {
.bricklayer-column {
max-width: 100%;
}
}
@media screen and (min-width: 1500px) {
.container {
max-width: 1440px;
}
.bricklayer-column-sizer {
width: 20% !important;
}
.bricklayer-column {
max-width: 20%;
}
}
@media screen and (min-width: 1800px) {
.container {
max-width: 1550px;
}
}
mark {
background: #fff217;
border-radius: 0;
padding: 1px 0;
color: inherit;
}
.content-div {
font-family: SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace;
font-size: 13px;
padding: 1em;
background-color: #f5f5f5;
border: 1px solid #ccc;
border-radius: 4px;
margin: 3px;
white-space: normal;
color: #000;
overflow: hidden;
}
.irs-single, .irs-from, .irs-to {
font-size: 13px;
}
.irs-slider {
cursor: col-resize;
}
.custom-select {
overflow: auto;
}
.irs {
margin-top: 1em;
margin-bottom: 1em;
}
.inspire-tree .selected > .wholerow, .inspire-tree .selected > .title-wrap:hover + .wholerow {
background: none;
}
.inspire-tree {
font-weight: 400;
font-size: 14px;
font-family: Helvetica, Nueue, Verdana, sans-serif;
max-height: 350px;
overflow: auto;
}
.page-indicator {
line-height: 1rem;
padding: 0.5rem;
background: #f8f9fa;
margin-top: 1em;
}
.btn-xs {
padding: .1rem .3rem;
font-size: .875rem;
border-radius: .2rem;
}
.nav {
margin-top: 0.5rem;
}
@media (max-width: 800px) {
#treeTabs {
flex-basis: inherit;
flex-grow: inherit;
}
}
.list-group {
margin-top: 1em;
}
.list-group-item {
padding: .25rem 0.5rem;
}
.wrapper-sm {
min-width: 64px;
}
.media-expanded {
display: inherit;
}
.media-expanded .fit {
max-height: 250px;
}
@media (max-width: 650px) {
.media-expanded .fit {
max-height: none;
}
.tagline {
display: none;
}
}
.version {
color: #007bff;
margin-left: -18px;
margin-top: -14px;
font-size: 11px;
}
@media (min-width: 800px) {
.small-btn {
display: none;
}
.large-btn {
display: inherit;
}
}
@media (max-width: 801px) {
.small-btn {
display: inherit;
}
.large-btn {
display: none;
}
}
#searchBar {
border-right: none;
}
#pathTree .title {
cursor: pointer;
}
.play {
position: absolute;
width: 50px;
height: 50px;
left: 50%;
top: 50%;
transform: translate(-50%, -50%);
pointer-events: none;
}
.play svg {
fill: rgba(0, 0, 0, 0.7);
}
.img-wrapper:hover svg {
fill: rgba(0, 0, 0, 1);
}
.pointer {
cursor: pointer;
}
.stats-card {
text-align: center;
margin-top: 1em;
padding: 1em;
box-shadow: 0 .125rem .25rem rgba(0, 0, 0, .08) !important;
border-radius: 0;
border: none;
background: #fff;
}
.graph {
display: inline-block;
width: 40%;
}
.full-screen {
position: absolute;
left: 0;
width: 100%;
}
.stats-btn {
float: right;
margin-bottom: 10px;
}

3
src/static/css/lity.min.css vendored Normal file
View File

@@ -0,0 +1,3 @@
/*! Lity - v2.4.0 - 2019-08-10
* http://sorgalla.com/lity/
* Copyright (c) 2015-2019 Jan Sorgalla; Licensed MIT */.lity{z-index:9990;position:fixed;top:0;right:0;bottom:0;left:0;white-space:nowrap;background:#0b0b0b;background:rgba(0,0,0,0.9);outline:none !important;opacity:0;-webkit-transition:opacity .3s ease;-o-transition:opacity .3s ease;transition:opacity .3s ease}.lity.lity-opened{opacity:1}.lity.lity-closed{opacity:0}.lity *{-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box}.lity-wrap{z-index:9990;position:fixed;top:0;right:0;bottom:0;left:0;text-align:center;outline:none !important}.lity-wrap:before{content:'';display:inline-block;height:100%;vertical-align:middle;margin-right:-0.25em}.lity-loader{z-index:9991;color:#fff;position:absolute;top:50%;margin-top:-0.8em;width:100%;text-align:center;font-size:14px;font-family:Arial,Helvetica,sans-serif;opacity:0;-webkit-transition:opacity .3s ease;-o-transition:opacity .3s ease;transition:opacity .3s ease}.lity-loading .lity-loader{opacity:1}.lity-container{z-index:9992;position:relative;text-align:left;vertical-align:middle;display:inline-block;white-space:normal;max-width:100%;max-height:100%;outline:none !important}.lity-content{z-index:9993;width:100%;-webkit-transform:scale(1);-ms-transform:scale(1);-o-transform:scale(1);transform:scale(1);-webkit-transition:-webkit-transform .3s ease;transition:-webkit-transform .3s ease;-o-transition:-o-transform .3s ease;transition:transform .3s ease;transition:transform .3s ease, -webkit-transform .3s ease, -o-transform .3s ease}.lity-loading .lity-content,.lity-closed .lity-content{-webkit-transform:scale(.8);-ms-transform:scale(.8);-o-transform:scale(.8);transform:scale(.8)}.lity-content:after{content:'';position:absolute;left:0;top:0;bottom:0;display:block;right:0;width:auto;height:auto;z-index:-1;-webkit-box-shadow:0 0 8px rgba(0,0,0,0.6);box-shadow:0 0 8px rgba(0,0,0,0.6)}.lity-close{z-index:9994;width:35px;height:35px;position:fixed;right:0;top:0;-webkit-appearance:none;cursor:pointer;text-decoration:none;text-align:center;padding:0;color:#fff;font-style:normal;font-size:35px;font-family:Arial,Baskerville,monospace;line-height:35px;text-shadow:0 1px 2px rgba(0,0,0,0.6);border:0;background:none;outline:none;-webkit-box-shadow:none;box-shadow:none}.lity-close::-moz-focus-inner{border:0;padding:0}.lity-close:hover,.lity-close:focus,.lity-close:active,.lity-close:visited{text-decoration:none;text-align:center;padding:0;color:#fff;font-style:normal;font-size:35px;font-family:Arial,Baskerville,monospace;line-height:35px;text-shadow:0 1px 2px rgba(0,0,0,0.6);border:0;background:none;outline:none;-webkit-box-shadow:none;box-shadow:none}.lity-close:active{top:1px}.lity-image img{max-width:100%;display:block;line-height:0;border:0}.lity-iframe .lity-container,.lity-youtube .lity-container,.lity-vimeo .lity-container,.lity-facebookvideo .lity-container,.lity-googlemaps .lity-container{width:100%;max-width:964px}.lity-iframe-container{width:100%;height:0;padding-top:56.25%;overflow:hidden;pointer-events:all;-webkit-transform:translateZ(0);transform:translateZ(0);-webkit-overflow-scrolling:touch}.lity-iframe-container iframe{position:absolute;display:block;top:0;left:0;width:100%;height:100%;-webkit-box-shadow:0 0 8px rgba(0,0,0,0.6);box-shadow:0 0 8px rgba(0,0,0,0.6);background:#000}.lity-hide{display:none}

Binary file not shown.

After

Width:  |  Height:  |  Size: 595 B

View File

Before

Width:  |  Height:  |  Size: 669 B

After

Width:  |  Height:  |  Size: 669 B

16
src/static/js/5_inspire-tree.min.js vendored Normal file

File diff suppressed because one or more lines are too long

1
src/static/js/7_jquery.toast.min.js vendored Normal file

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

1
src/static/js/bricklayer.min.js vendored Normal file
View File

@@ -0,0 +1 @@
!function t(e,n,r){function o(s,u){if(!n[s]){if(!e[s]){var l="function"==typeof require&&require;if(!u&&l)return l(s,!0);if(i)return i(s,!0);var a=new Error("Cannot find module '"+s+"'");throw a.code="MODULE_NOT_FOUND",a}var p=n[s]={exports:{}};e[s][0].call(p.exports,function(t){var n=e[s][1][t];return o(n?n:t)},p,p.exports,t,e,n,r)}return n[s].exports}for(var i="function"==typeof require&&require,s=0;s<r.length;s++)o(r[s]);return o}({1:[function(t,e,n){var r,o=this&&this.__extends||function(t,e){function n(){this.constructor=t}for(var r in e)e.hasOwnProperty(r)&&(t[r]=e[r]);t.prototype=null===e?Object.create(e):(n.prototype=e.prototype,new n)};!function(t){function e(t){return[].slice.call(t)}function n(t,e,n){if(window.CustomEvent)var r=new CustomEvent(e,{detail:n});else{var r=document.createEvent("CustomEvent");r.initCustomEvent(e,!0,!0,n)}return t.dispatchEvent(r)}var r={rulerClassName:"bricklayer-column-sizer",columnClassName:"bricklayer-column"},i=function(){function t(t){this.element=document.createElement("div"),this.element.className=t}return t.prototype.destroy=function(){this.element.parentNode.removeChild(this.element)},t}(),s=function(t){function e(){t.apply(this,arguments)}return o(e,t),e.prototype.getWidth=function(){this.element.setAttribute("style","\n display: block;\n visibility: hidden !important;\n top: -1000px !important;\n ");var t=this.element.offsetWidth;return this.element.removeAttribute("style"),t},e}(i),u=function(t){function e(){t.apply(this,arguments)}return o(e,t),e}(i),l=function(){function t(t,e){void 0===e&&(e=r),this.element=t,this.options=e,this.build(),this.buildResponsive()}return t.prototype.append=function(t){var n=this;if(Array.isArray(t))return void t.forEach(function(t){return n.append(t)});var r=this.findMinHeightColumn();this.elements=e(this.elements).concat([t]),this.applyPosition("append",r,t)},t.prototype.prepend=function(t){var n=this;if(Array.isArray(t))return void t.forEach(function(t){return n.prepend(t)});var r=this.findMinHeightColumn();this.elements=[t].concat(e(this.elements)),this.applyPosition("prepend",r,t)},t.prototype.on=function(t,e){return this.element.addEventListener("bricklayer."+t,e),this},t.prototype.redraw=function(){var t=this.columnCount;this.checkColumnCount(!1),this.reorderElements(t),n(this.element,"bricklayer.redraw",{columnCount:t})},t.prototype.destroy=function(){var t=this;this.ruler.destroy(),e(this.elements).forEach(function(e){return t.element.appendChild(e)}),e(this.getColumns()).forEach(function(t){return t.parentNode.removeChild(t)}),n(this.element,"bricklayer.destroy",{})},t.prototype.build=function(){this.ruler=new s(this.options.rulerClassName),this.elements=this.getElementsInOrder(),this.element.insertBefore(this.ruler.element,this.element.firstChild)},t.prototype.buildResponsive=function(){var t=this;window.addEventListener("resize",function(e){return t.checkColumnCount()}),this.checkColumnCount(),this.on("breakpoint",function(e){return t.reorderElements(e.detail.columnCount)}),this.columnCount>=1&&this.reorderElements(this.columnCount)},t.prototype.getColumns=function(){return this.element.querySelectorAll(":scope > ."+this.options.columnClassName)},t.prototype.findMinHeightColumn=function(){var t=e(this.getColumns()),n=t.map(function(t){return t.offsetHeight}),r=Math.min.apply(null,n);return t[n.indexOf(r)]},t.prototype.getElementsInOrder=function(){return this.element.querySelectorAll(":scope > *:not(."+this.options.columnClassName+"):not(."+this.options.rulerClassName+")")},t.prototype.checkColumnCount=function(t){void 0===t&&(t=!0);var e=this.getColumnCount();this.columnCount!==e&&(t&&n(this.element,"bricklayer.breakpoint",{columnCount:e}),this.columnCount=e)},t.prototype.reorderElements=function(t){var n=this;void 0===t&&(t=1),(t==1/0||1>t)&&(t=1);for(var r=e(this.elements).map(function(t){var e=t.parentNode?t.parentNode.removeChild(t):t;return e}),o=this.getColumns(),i=0;i<o.length;i++)o[i].parentNode.removeChild(o[i]);for(var i=0;t>i;i++){var s=new u(this.options.columnClassName).element;this.element.appendChild(s)}r.forEach(function(t){var e=n.findMinHeightColumn();e.appendChild(t)})},t.prototype.getColumnCount=function(){var t=this.element.offsetWidth,e=this.ruler.getWidth();return Math.round(t/e)},t.prototype.applyPosition=function(t,e,r){var o=this,i=function(i){var s=i+t.charAt(0).toUpperCase()+t.substr(1);n(o.element,"bricklayer."+s,{item:r,column:e})};switch(i("before"),t){case"append":e.appendChild(r);break;case"prepend":e.insertBefore(r,e.firstChild)}i("after")},t}();t.Container=l}(r||(r={})),function(t,n){"function"==typeof define&&define.amd?define(function(){return n()}):"undefined"!=typeof window&&t===window?t.Bricklayer=n():"object"==typeof e&&e.exports&&(e.exports=n())}("undefined"!=typeof window?window:this,function(){return r.Container})},{}]},{},[1]);

2
src/static/js/d3.v5.min.js vendored Normal file

File diff suppressed because one or more lines are too long

2
src/static/js/dom-to-image.min.js vendored Normal file

File diff suppressed because one or more lines are too long

622
src/static/js/dom.js Normal file
View File

@@ -0,0 +1,622 @@
/**
* Enable gif loading on hover
*/
function gifOver(thumbnail, hit) {
let callee = arguments.callee;
thumbnail.addEventListener("mouseover", function () {
thumbnail.mouseStayedOver = true;
window.setTimeout(function () {
if (thumbnail.mouseStayedOver) {
thumbnail.removeEventListener('mouseover', callee, false);
//Load gif
thumbnail.setAttribute("src", "f/" + hit["_id"]);
}
}, 600);
});
thumbnail.addEventListener("mouseout", function () {
//Reset timer
thumbnail.mouseStayedOver = false;
thumbnail.setAttribute("src", `t/${hit["_source"]["index"]}/${hit["_id"]}`);
})
}
function getContentHighlight(hit) {
if (hit.hasOwnProperty("highlight")) {
if (hit["highlight"].hasOwnProperty("content")) {
return hit["highlight"]["content"][0];
} else if (hit["highlight"].hasOwnProperty("content.nGram")) {
return hit["highlight"]["content.nGram"][0];
}
}
return undefined;
}
function getPathHighlight(hit) {
if (hit.hasOwnProperty("highlight")) {
if (hit["highlight"].hasOwnProperty("path.text")) {
return hit["highlight"]["path.text"][0];
} else if (hit["highlight"].hasOwnProperty("path.nGram")) {
return hit["highlight"]["path.nGram"][0];
}
}
return undefined;
}
function applyNameToTitle(hit, title, extension) {
if (hit.hasOwnProperty("highlight")) {
if (hit["highlight"].hasOwnProperty("name")) {
title.insertAdjacentHTML('afterbegin', hit["highlight"]["name"] + extension);
return;
} else if (hit["highlight"].hasOwnProperty("name.nGram")) {
title.insertAdjacentHTML('afterbegin', hit["highlight"]["name.nGram"] + extension);
return;
}
}
title.appendChild(document.createTextNode(hit["_source"]["name"] + extension));
}
function shouldPlayVideo(hit) {
const videoc = hit["_source"]["videoc"];
const mime = hit["_source"]["mime"];
return mime &&
mime.startsWith("video/") &&
!("parent" in hit["_source"]) &&
hit["_source"]["extension"] !== "mkv" &&
hit["_source"]["extension"] !== "avi" &&
videoc !== "hevc" &&
videoc !== "mpeg2video" &&
videoc !== "wmv3";
}
function shouldDisplayRawImage(hit) {
const mime = hit["_source"]["mime"];
return mime &&
mime.startsWith("image/") &&
hit["_source"]["mime"] &&
!hit["_source"]["parent"] &&
hit["_source"]["videoc"] !== "tiff" &&
hit["_source"]["videoc"] !== "raw" &&
hit["_source"]["videoc"] !== "ppm";
}
function makePlaceholder(w, h, small) {
let calc;
if (small) {
calc = w > h
? (64 / w / h) >= 100
? (64 * w / h)
: 64
: 64;
} else {
calc = w > h
? (175 / w / h) >= 272
? (175 * w / h)
: 175
: 175;
}
const el = document.createElement("div");
el.setAttribute("style", `height: ${calc}px`);
return el;
}
function ext(hit) {
return hit["_source"].hasOwnProperty("extension") && hit["_source"]["extension"] !== "" ? "." + hit["_source"]["extension"] : "";
}
function makeTitle(hit) {
let title = document.createElement("div");
title.setAttribute("class", "file-title");
let extension = ext(hit);
applyNameToTitle(hit, title, extension);
title.setAttribute("title", hit["_source"]["path"] + "/" + hit["_source"]["name"] + extension);
return title;
}
function getTags(hit, mimeCategory) {
let tags = [];
switch (mimeCategory) {
case "video":
case "image":
if (hit["_source"].hasOwnProperty("videoc") && hit["_source"]["videoc"]) {
const formatTag = document.createElement("span");
formatTag.setAttribute("class", "badge badge-pill badge-video");
formatTag.appendChild(document.createTextNode(hit["_source"]["videoc"].replace(" ", "")));
tags.push(formatTag);
}
break;
case "audio": {
if (hit["_source"].hasOwnProperty("audioc") && hit["_source"]["audioc"]) {
let formatTag = document.createElement("span");
formatTag.setAttribute("class", "badge badge-pill badge-audio");
formatTag.appendChild(document.createTextNode(hit["_source"]["audioc"]));
tags.push(formatTag);
}
}
break;
}
// User tags
if (hit["_source"].hasOwnProperty("tag")) {
hit["_source"]["tag"].forEach(tag => {
const userTag = document.createElement("span");
userTag.setAttribute("class", "badge badge-pill badge-user");
const tokens = tag.split("#");
if (tokens.length > 1) {
const bg = "#" + tokens[1];
const fg = lum(tokens[1]) > 40 ? "#000" : "#fff";
userTag.setAttribute("style", `background-color: ${bg}; color: ${fg}`);
}
const name = tokens[0].split(".")[tokens[0].split(".").length - 1];
userTag.appendChild(document.createTextNode(name));
tags.push(userTag);
})
}
return tags
}
function infoButtonCb(hit) {
return () => {
getDocumentInfo(hit["_id"]).then(doc => {
$("#modal-title").text(doc["name"] + ext(hit));
const tbody = $("<tbody>");
$("#modal-body").empty()
.append($("<table class='table table-sm'>")
.append($("<thead>")
.append($("<tr>")
.append($("<th>").text("Field"))
.append($("<th>").text("Value"))
)
)
.append(tbody)
);
const displayFields = new Set([
"mime", "size", "mtime", "path", "title", "width", "height", "duration", "audioc", "videoc",
"bitrate", "artist", "album", "album_artist", "genre", "title", "font_name", "tag"
]);
Object.keys(doc)
.filter(key => key.startsWith("_keyword.") || key.startsWith("_text.") || displayFields.has(key) || key.startsWith("exif_"))
.forEach(key => {
tbody.append($("<tr>")
.append($("<td>").text(key))
.append($("<td>").text(doc[key]))
);
});
if (doc.hasOwnProperty("content") && doc["content"]) {
$("#modal-body").append($("<div class='content-div'>").text(doc["content"]))
}
$("#modal").modal();
});
}
}
function createDocCard(hit) {
let docCard = document.createElement("div");
docCard.setAttribute("class", "card");
let docCardBody = document.createElement("div");
docCardBody.setAttribute("class", "card-body document");
//Title
let title = makeTitle(hit);
let isSubDocument = false;
let link = document.createElement("a");
link.setAttribute("href", "f/" + hit["_id"]);
link.setAttribute("target", "_blank");
link.style.maxWidth = "calc(100% - 1.2rem)";
link.appendChild(title);
if (hit["_source"].hasOwnProperty("parent")) {
docCard.classList.add("sub-document");
isSubDocument = true;
}
let tagContainer = document.createElement("div");
tagContainer.setAttribute("class", "card-text");
if (hit["_source"].hasOwnProperty("mime") && hit["_source"]["mime"] !== null) {
let thumbnailOverlay = null;
let imgWrapper = document.createElement("div");
imgWrapper.setAttribute("style", "position: relative");
imgWrapper.setAttribute("class", "img-wrapper");
let mimeCategory = hit["_source"]["mime"].split("/")[0];
//Thumbnail
let thumbnail = makeThumbnail(mimeCategory, hit, imgWrapper, false);
//Thumbnail overlay
switch (mimeCategory) {
case "image":
thumbnailOverlay = document.createElement("div");
thumbnailOverlay.setAttribute("class", "card-img-overlay");
//Resolution
if (hit["_source"].hasOwnProperty("width") && hit["_source"]["width"] > 32 && hit["_source"]["height"] > 32) {
let resolutionBadge = document.createElement("span");
resolutionBadge.setAttribute("class", "badge badge-resolution");
if (hit["_source"].hasOwnProperty("width")) {
resolutionBadge.appendChild(document.createTextNode(hit["_source"]["width"] + "x" + hit["_source"]["height"]));
}
thumbnailOverlay.appendChild(resolutionBadge);
}
// Hover
if (thumbnail && hit["_source"]["videoc"] === "gif" && !isSubDocument) {
gifOver(thumbnail, hit);
}
break;
case "video":
//Duration
if (hit["_source"].hasOwnProperty("duration")) {
thumbnailOverlay = document.createElement("div");
thumbnailOverlay.setAttribute("class", "card-img-overlay");
const durationBadge = document.createElement("span");
durationBadge.setAttribute("class", "badge badge-resolution");
durationBadge.appendChild(document.createTextNode(humanTime(hit["_source"]["duration"])));
thumbnailOverlay.appendChild(durationBadge);
}
}
// Tags
let tags = getTags(hit, mimeCategory);
for (let i = 0; i < tags.length; i++) {
tagContainer.appendChild(tags[i]);
}
//Content
let contentHl = getContentHighlight(hit);
if (contentHl !== undefined) {
const contentDiv = document.createElement("div");
contentDiv.setAttribute("class", "content-div");
contentDiv.insertAdjacentHTML('afterbegin', contentHl);
docCard.appendChild(contentDiv);
}
if (thumbnail !== null) {
docCard.appendChild(imgWrapper);
}
//Audio
if (mimeCategory === "audio" && hit["_source"].hasOwnProperty("audioc") && !isSubDocument) {
let audio = document.createElement("audio");
audio.setAttribute("preload", "none");
audio.setAttribute("class", "audio-fit fit");
audio.setAttribute("controls", "");
audio.setAttribute("type", hit["_source"]["mime"]);
audio.setAttribute("src", "f/" + hit["_id"]);
docCard.appendChild(audio)
}
if (thumbnailOverlay !== null) {
imgWrapper.appendChild(thumbnailOverlay);
}
}
//Size tag
let sizeTag = document.createElement("small");
sizeTag.appendChild(document.createTextNode(humanFileSize(hit["_source"]["size"])));
sizeTag.setAttribute("class", "text-muted");
tagContainer.appendChild(sizeTag);
const titleWrapper = document.createElement("div");
titleWrapper.style.display = "flex";
const infoButton = makeInfoButton(hit);
titleWrapper.appendChild(infoButton);
titleWrapper.appendChild(link);
docCardBody.appendChild(titleWrapper);
docCard.appendChild(docCardBody);
docCardBody.appendChild(tagContainer);
return docCard;
}
function makeThumbnail(mimeCategory, hit, imgWrapper, small) {
if (!hit["_source"].hasOwnProperty("thumbnail")) {
return null;
}
let thumbnail = document.createElement("img");
if (small) {
thumbnail.setAttribute("class", "fit-sm");
} else {
thumbnail.setAttribute("class", "card-img-top fit");
}
thumbnail.setAttribute("src", `t/${hit["_source"]["index"]}/${hit["_id"]}`);
if (shouldDisplayRawImage(hit)) {
thumbnail.addEventListener("click", () => {
const l = lity(`f/${hit["_id"]}#.jpg`);
window.addEventListener("scroll", () => l.close());
});
thumbnail.classList.add("pointer");
} else if (shouldPlayVideo(hit)) {
thumbnail.addEventListener("click", () => lity(`f/${hit["_id"]}#.mp4`));
thumbnail.classList.add("pointer");
if (!small) {
const playOverlay = document.createElement("div");
playOverlay.innerHTML = '<svg viewBox="0 0 494.942 494.942" xmlns="http://www.w3.org/2000/svg"><path d="m35.353 0 424.236 247.471-424.236 247.471z"/></svg>';
playOverlay.classList.add("play");
imgWrapper.prepend(playOverlay);
}
}
const placeholder = makePlaceholder(hit["_source"]["width"], hit["_source"]["height"], small);
imgWrapper.appendChild(placeholder);
thumbnail.addEventListener("error", () => {
imgWrapper.remove();
});
thumbnail.addEventListener("load", () => {
placeholder.remove();
imgWrapper.appendChild(thumbnail);
});
return thumbnail;
}
function makeInfoButton(hit) {
const infoButton = document.createElement("span");
infoButton.setAttribute("class", "info-icon");
infoButton.addEventListener("click", infoButtonCb(hit));
return infoButton;
}
function createDocLine(hit) {
const mime = hit["_source"]["mime"];
let mimeCategory = mime ? mime.split("/")[0] : null;
let tags = getTags(hit, mimeCategory);
let imgWrapper = document.createElement("div");
imgWrapper.setAttribute("class", "align-self-start mr-1 wrapper-sm");
let media = document.createElement("div");
media.setAttribute("class", "media");
const line = document.createElement("div");
line.setAttribute("class", "list-group-item flex-column align-items-start");
if (hit["_source"].hasOwnProperty("parent")) {
line.classList.add("sub-document");
isSubDocument = true;
}
const infoButton = makeInfoButton(hit);
const title = makeTitle(hit);
let link = document.createElement("a");
link.setAttribute("href", "f/" + hit["_id"]);
link.setAttribute("target", "_blank");
link.style.maxWidth = "calc(100% - 1.2rem)";
link.appendChild(title);
const titleDiv = document.createElement("div");
const titleWrapper = document.createElement("div");
titleWrapper.style.display = "flex";
titleWrapper.appendChild(infoButton);
titleWrapper.appendChild(link);
titleDiv.appendChild(titleWrapper);
line.appendChild(media);
let thumbnail = makeThumbnail(mimeCategory, hit, imgWrapper, true);
if (thumbnail) {
media.appendChild(imgWrapper);
titleDiv.style.maxWidth = "calc(100% - 64px)";
} else {
titleDiv.style.maxWidth = "100%";
}
media.appendChild(titleDiv);
// Content
let contentHl = getContentHighlight(hit);
if (contentHl !== undefined) {
const contentDiv = document.createElement("div");
contentDiv.setAttribute("class", "content-div");
contentDiv.insertAdjacentHTML("afterbegin", contentHl);
titleDiv.appendChild(contentDiv);
}
let pathLine = document.createElement("div");
pathLine.setAttribute("class", "path-row");
let path = document.createElement("div");
path.setAttribute("class", "path-line");
path.setAttribute("title", hit["_source"]["path"] + "/");
const pathHighlight = getPathHighlight(hit);
if (pathHighlight) {
path.insertAdjacentHTML("afterbegin", pathHighlight + "/");
} else {
path.appendChild(document.createTextNode(hit["_source"]["path"] + "/"));
}
let tagContainer = document.createElement("div");
tagContainer.setAttribute("class", "tag-container");
for (let i = 0; i < tags.length; i++) {
tagContainer.appendChild(tags[i]);
}
//Size tag
let sizeTag = document.createElement("small");
sizeTag.appendChild(document.createTextNode(humanFileSize(hit["_source"]["size"])));
sizeTag.setAttribute("class", "text-muted");
tagContainer.appendChild(sizeTag);
titleDiv.appendChild(pathLine);
pathLine.appendChild(path);
pathLine.appendChild(tagContainer);
return line;
}
function makePreloader() {
const elem = document.createElement("div");
elem.setAttribute("class", "progress");
const bar = document.createElement("div");
bar.setAttribute("class", "progress-bar progress-bar-striped progress-bar-animated");
bar.setAttribute("style", "width: 100%");
elem.appendChild(bar);
return elem;
}
function makePageIndicator(searchResult) {
let pageIndicator = document.createElement("div");
pageIndicator.setAttribute("class", "page-indicator font-weight-light");
const totalHits = searchResult["aggregations"]["total_count"]["value"];
pageIndicator.appendChild(document.createTextNode(docCount + " / " + totalHits));
return pageIndicator;
}
function makeStatsCard(searchResult) {
let statsCard = document.createElement("div");
statsCard.setAttribute("class", "card stat");
let statsCardBody = document.createElement("div");
statsCardBody.setAttribute("class", "card-body");
// Stats
let stat = document.createElement("span");
const totalHits = searchResult["aggregations"]["total_count"]["value"];
stat.appendChild(document.createTextNode(totalHits + " results in " + searchResult["took"] + "ms"));
statsCardBody.appendChild(stat);
// Display mode
const resultMode = document.createElement("div");
resultMode.setAttribute("class", "btn-group btn-group-toggle");
resultMode.setAttribute("data-toggle", "buttons");
resultMode.style.cssFloat = "right";
const listMode = document.createElement("label");
listMode.setAttribute("class", "btn btn-primary");
listMode.setAttribute("title", "List mode");
listMode.innerHTML = '<svg width="20px" height="20px" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M80 368H16a16 16 0 0 0-16 16v64a16 16 0 0 0 16 16h64a16 16 0 0 0 16-16v-64a16 16 0 0 0-16-16zm0-320H16A16 16 0 0 0 0 64v64a16 16 0 0 0 16 16h64a16 16 0 0 0 16-16V64a16 16 0 0 0-16-16zm0 160H16a16 16 0 0 0-16 16v64a16 16 0 0 0 16 16h64a16 16 0 0 0 16-16v-64a16 16 0 0 0-16-16zm416 176H176a16 16 0 0 0-16 16v32a16 16 0 0 0 16 16h320a16 16 0 0 0 16-16v-32a16 16 0 0 0-16-16zm0-320H176a16 16 0 0 0-16 16v32a16 16 0 0 0 16 16h320a16 16 0 0 0 16-16V80a16 16 0 0 0-16-16zm0 160H176a16 16 0 0 0-16 16v32a16 16 0 0 0 16 16h320a16 16 0 0 0 16-16v-32a16 16 0 0 0-16-16z"></path></svg>';
const gridMode = document.createElement("label");
gridMode.setAttribute("class", "btn btn-primary");
gridMode.setAttribute("title", "Grid mode");
gridMode.innerHTML = '<svg width="20px" height="20px" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M149.333 56v80c0 13.255-10.745 24-24 24H24c-13.255 0-24-10.745-24-24V56c0-13.255 10.745-24 24-24h101.333c13.255 0 24 10.745 24 24zm181.334 240v-80c0-13.255-10.745-24-24-24H205.333c-13.255 0-24 10.745-24 24v80c0 13.255 10.745 24 24 24h101.333c13.256 0 24.001-10.745 24.001-24zm32-240v80c0 13.255 10.745 24 24 24H488c13.255 0 24-10.745 24-24V56c0-13.255-10.745-24-24-24H386.667c-13.255 0-24 10.745-24 24zm-32 80V56c0-13.255-10.745-24-24-24H205.333c-13.255 0-24 10.745-24 24v80c0 13.255 10.745 24 24 24h101.333c13.256 0 24.001-10.745 24.001-24zm-205.334 56H24c-13.255 0-24 10.745-24 24v80c0 13.255 10.745 24 24 24h101.333c13.255 0 24-10.745 24-24v-80c0-13.255-10.745-24-24-24zM0 376v80c0 13.255 10.745 24 24 24h101.333c13.255 0 24-10.745 24-24v-80c0-13.255-10.745-24-24-24H24c-13.255 0-24 10.745-24 24zm386.667-56H488c13.255 0 24-10.745 24-24v-80c0-13.255-10.745-24-24-24H386.667c-13.255 0-24 10.745-24 24v80c0 13.255 10.745 24 24 24zm0 160H488c13.255 0 24-10.745 24-24v-80c0-13.255-10.745-24-24-24H386.667c-13.255 0-24 10.745-24 24v80c0 13.255 10.745 24 24 24zM181.333 376v80c0 13.255 10.745 24 24 24h101.333c13.255 0 24-10.745 24-24v-80c0-13.255-10.745-24-24-24H205.333c-13.255 0-24 10.745-24 24z"></path></svg>';
resultMode.appendChild(gridMode);
resultMode.appendChild(listMode);
if (CONF.options.display === "grid") {
gridMode.classList.add("active")
} else {
listMode.classList.add("active")
}
gridMode.addEventListener("click", () => {
CONF.options.display = "grid";
CONF.save();
searchDebounced();
});
listMode.addEventListener("click", () => {
CONF.options.display = "list";
CONF.save();
searchDebounced();
});
statsCardBody.appendChild(resultMode);
// Sort mode
const sortMode = document.createElement("div");
sortMode.setAttribute("class", "dropdown");
sortMode.style.cssFloat = "right";
sortMode.style.marginRight = "10px";
const sortModeBtn = document.createElement("button");
sortModeBtn.setAttribute("class", "btn btn-md btn-primary dropdown-toggle");
sortModeBtn.setAttribute("id", "sortModeBtn");
sortModeBtn.setAttribute("type", "button");
sortModeBtn.setAttribute("data-toggle", "dropdown");
sortModeBtn.setAttribute("aria-haspopup", "true");
sortModeBtn.setAttribute("aria-expanded", "false");
sortModeBtn.setAttribute("title", "Sort options");
sortModeBtn.innerHTML = '<svg aria-hidden="true" width="20px" height="20px" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 320 512"><path fill="currentColor" d="M41 288h238c21.4 0 32.1 25.9 17 41L177 448c-9.4 9.4-24.6 9.4-33.9 0L24 329c-15.1-15.1-4.4-41 17-41zm255-105L177 64c-9.4-9.4-24.6-9.4-33.9 0L24 183c-15.1 15.1-4.4 41 17 41h238c21.4 0 32.1-25.9 17-41z"></path></svg>';
const sortModeMenu = document.createElement("div");
sortModeMenu.setAttribute("class", "dropdown-menu");
sortModeMenu.setAttribute("aria-labelledby", "sortModeBtn");
Object.keys(SORT_MODES).forEach(mode => {
const item = document.createElement("div");
item.setAttribute("class", "dropdown-item");
item.appendChild(document.createTextNode(SORT_MODES[mode].text));
sortModeMenu.appendChild(item);
item.onclick = function () {
CONF.options.sort = mode;
CONF.save();
searchDebounced();
};
});
sortMode.appendChild(sortModeBtn);
sortMode.appendChild(sortModeMenu);
statsCardBody.appendChild(sortMode);
if (totalHits !== 0) {
let sizeStat = document.createElement("div");
sizeStat.appendChild(document.createTextNode(humanFileSize(searchResult["aggregations"]["total_size"]["value"])));
statsCardBody.appendChild(sizeStat);
}
statsCard.appendChild(statsCardBody);
return statsCard;
}
function makeResultContainer() {
let resultContainer = document.createElement("div");
if (CONF.options.display === "grid") {
resultContainer.setAttribute("class", "bricklayer");
} else {
resultContainer.setAttribute("class", "list-group");
}
return resultContainer;
}

5
src/static/js/lity.min.js vendored Normal file

File diff suppressed because one or more lines are too long

680
src/static/js/search.js Normal file
View File

@@ -0,0 +1,680 @@
const SIZE = 40;
let mimeMap = [];
let tagMap = [];
let mimeTree;
let tagTree;
let searchBar = document.getElementById("searchBar");
let pathBar = document.getElementById("pathBar");
let lastDoc = null;
let reachedEnd = false;
let docCount = 0;
let coolingDown = false;
let searchBusy = true;
let selectedIndices = [];
let indexMap = {};
let size_min = 0;
let size_max = 10000000000000;
let date_min = null;
let date_max = null;
SORT_MODES = {
score: {
text: "Relevance",
mode: [
{_score: {order: "desc"}},
{_tie: {order: "asc"}}
],
key: hit => hit["_score"]
},
date_asc: {
text: "Date (Ascending)", mode: [
{mtime: {order: "asc"}},
{_tie: {order: "asc"}}
],
key: hit => hit["_source"]["mtime"]
},
date_desc: {
text: "Date (Descending)", mode: [
{mtime: {order: "desc"}},
{_tie: {order: "asc"}}
],
key: hit => hit["_source"]["mtime"]
},
size_asc: {
text: "Size (Ascending)", mode: [
{size: {order: "asc"}},
{_tie: {order: "asc"}}
],
key: hit => hit["_source"]["size"]
},
size_desc: {
text: "Size (Descending)", mode: [
{size: {order: "desc"}},
{_tie: {order: "asc"}}
],
key: hit => hit["_source"]["size"]
},
};
function showEsError() {
$.toast({
heading: "Elasticsearch connection error",
text: "sist2 web module encountered an error while connecting " +
"to Elasticsearch. See server logs for more information.",
stack: false,
bgColor: "#a94442",
textColor: "#f2dede",
position: 'bottom-right',
hideAfter: false
});
}
window.onload = () => {
CONF.load();
new autoComplete({
selector: '#pathBar',
minChars: 1,
delay: 400,
renderItem: function (item) {
return '<div class="autocomplete-suggestion" data-val="' + item + '">' + item + '</div>';
},
source: async function (term, suggest) {
if (!CONF.options.suggestPath) {
return []
}
term = term.toLowerCase();
const choices = await getPathChoices();
let matches = [];
for (let i = 0; i < choices.length; i++) {
if (~choices[i].toLowerCase().indexOf(term)) {
matches.push(choices[i]);
}
}
suggest(matches.sort());
},
onSelect: function () {
searchDebounced();
}
});
searchBar.addEventListener("keyup", searchDebounced);
pathBar.addEventListener("keyup", e => {
if (e.key === "Enter") {
searchDebounced();
}
});
};
function toggleFuzzy() {
searchDebounced();
}
$.jsonPost("i").then(resp => {
const urlIndices = (new URLSearchParams(location.search)).get("i");
resp["indices"].forEach(idx => {
indexMap[idx.id] = idx.name;
const opt = $("<option>")
.attr("value", idx.id)
.append(idx.name);
if (urlIndices) {
if (urlIndices.split(",").indexOf(idx.name) !== -1) {
opt.attr("selected", true);
selectedIndices.push(idx.id);
}
} else if (!idx.name.includes("(nsfw)")) {
opt.attr("selected", true);
selectedIndices.push(idx.id);
}
$("#indices").append(opt);
});
createPathTree("#pathTree");
});
function getDocumentInfo(id) {
return $.getJSON("d/" + id).fail(showEsError)
}
function handleTreeClick(tree) {
return (node, e) => {
if (e !== "checked") {
return
}
if (node.id === "any") {
if (!node.itree.state.checked) {
tree.deselect();
}
} else {
tree.node("any").deselect();
}
searchDebounced();
}
}
$.jsonPost("es", {
aggs: {
mimeTypes: {
terms: {
field: "mime",
size: 10000
}
}
},
size: 0,
}).then(resp => {
resp["aggregations"]["mimeTypes"]["buckets"].sort((a, b) => a.key > b.key).forEach(bucket => {
let tmp = bucket["key"].split("/");
let category = tmp[0];
let mime = tmp[1];
let category_exists = false;
let child = {
"id": bucket["key"],
"text": `${mime} (${bucket["doc_count"]})`
};
mimeMap.forEach(node => {
if (node.text === category) {
node.children.push(child);
category_exists = true;
}
});
if (!category_exists) {
mimeMap.push({"text": category, children: [child]});
}
});
mimeMap.push({"text": "All", "id": "any"});
mimeTree = new InspireTree({
selection: {
mode: 'checkbox'
},
data: mimeMap
});
new InspireTreeDOM(mimeTree, {
target: '#mimeTree'
});
mimeTree.on("node.state.changed", handleTreeClick(mimeTree));
mimeTree.deselect();
mimeTree.node("any").select();
});
// Tags tree
$.jsonPost("es", {
aggs: {
tags: {
terms: {
field: "tag",
size: 10000
}
}
},
size: 0,
}).then(resp => {
resp["aggregations"]["tags"]["buckets"]
.sort((a, b) => a["key"].localeCompare(b["key"]))
.forEach(bucket => {
addTag(tagMap, bucket["key"], bucket["key"], bucket["doc_count"])
});
tagMap.push({"text": "All", "id": "any"});
tagTree = new InspireTree({
selection: {
mode: 'checkbox'
},
data: tagMap
});
new InspireTreeDOM(tagTree, {
target: '#tagTree'
});
tagTree.on("node.state.changed", handleTreeClick(tagTree));
tagTree.node("any").select();
searchBusy = false;
});
function addTag(map, tag, id, count) {
let tags = tag.split("#")[0].split(".");
let child = {
id: id,
text: tags.length !== 1 ? tags[0] : `${tags[0]} (${count})`,
name: tags[0],
children: []
};
let found = false;
map.forEach(node => {
if (node.name === child.name) {
found = true;
if (tags.length !== 1) {
addTag(node.children, tags.slice(1).join("."), id, count);
}
}
});
if (!found) {
if (tags.length !== 1) {
addTag(child.children, tags.slice(1).join("."), id, count);
map.push(child);
} else {
map.push(child);
}
}
}
function insertHits(resultContainer, hits) {
for (let i = 0; i < hits.length; i++) {
if (CONF.options.display === "grid") {
resultContainer._brick.append(createDocCard(hits[i]));
} else {
resultContainer.appendChild(createDocLine(hits[i]));
}
docCount++;
}
}
window.addEventListener("scroll", function () {
if (!searchBusy) {
let threshold = 400;
if ((window.innerHeight + window.scrollY) >= document.body.offsetHeight - threshold) {
if (!reachedEnd) {
coolingDown = true;
search(lastDoc);
}
}
}
});
function getSelectedNodes(tree) {
let selectedNodes = [];
let selected = tree.selected();
for (let i = 0; i < selected.length; i++) {
if (selected[i].id === "any") {
return ["any"]
}
//Only get children
if (selected[i].text.indexOf("(") !== -1) {
selectedNodes.push(selected[i].id);
}
}
return selectedNodes
}
function search(after = null) {
lastDoc = null;
if (searchBusy) {
return;
}
searchBusy = true;
let searchResults = document.getElementById("searchResults");
//Clear old search results
let preload;
if (!after) {
while (searchResults.firstChild) {
searchResults.removeChild(searchResults.firstChild);
}
preload = makePreloader();
searchResults.appendChild(preload);
}
let query = searchBar.value;
let empty = query === "";
let condition = empty ? "should" : "must";
let filters = [
{range: {size: {gte: size_min, lte: size_max}}},
{terms: {index: selectedIndices}}
];
let fields = [
"name^8",
"content^3",
"album^8", "artist^8", "title^8", "genre^2", "album_artist^8",
"font_name^6"
];
if (CONF.options.searchInPath) {
fields.push("path.text^5");
}
if ($("#fuzzyToggle").prop("checked")) {
fields.push("content.nGram");
if (CONF.options.searchInPath) {
fields.push("path.nGram");
}
fields.push("name.nGram^3");
}
let path = pathBar.value.replace(/\/$/, "").toLowerCase(); //remove trailing slashes
if (path !== "") {
filters.push({term: {path: path}})
}
let mimeTypes = getSelectedNodes(mimeTree);
if (!mimeTypes.includes("any")) {
filters.push({terms: {"mime": mimeTypes}});
}
let tags = getSelectedNodes(tagTree);
if (!tags.includes("any")) {
filters.push({terms: {"tag": tags}});
}
if (date_min && date_max) {
filters.push({range: {mtime: {gte: date_min, lte: date_max}}})
} else if (date_min) {
filters.push({range: {mtime: {gte: date_min}}})
} else if (date_max) {
filters.push({range: {mtime: {lte: date_max}}})
}
let q = {
"_source": {
excludes: ["content", "_tie"]
},
query: {
bool: {
[condition]: {
simple_query_string: {
query: query,
fields: fields,
default_operator: "and"
}
},
filter: filters
}
},
"sort": SORT_MODES[CONF.options.sort].mode,
aggs:
{
total_size: {"sum": {"field": "size"}},
total_count: {"value_count": {"field": "size"}}
},
size: SIZE,
};
if (after) {
q.search_after = [SORT_MODES[CONF.options.sort].key(after), after["_id"]];
}
if (CONF.options.highlight) {
q.highlight = {
pre_tags: ["<mark>"],
post_tags: ["</mark>"],
fragment_size: CONF.options.fragmentSize,
number_of_fragments: 1,
order: "score",
fields: {
content: {},
// "content.nGram": {},
name: {},
"name.nGram": {},
font_name: {},
}
};
if (CONF.options.searchInPath) {
q.highlight.fields["path.text"] = {};
q.highlight.fields["path.nGram"] = {};
}
}
$.jsonPost("es", q).then(searchResult => {
let hits = searchResult["hits"]["hits"];
if (hits) {
lastDoc = hits[hits.length - 1];
}
hits.forEach(hit => {
hit["_source"]["name"] = strUnescape(hit["_source"]["name"]);
hit["_source"]["path"] = strUnescape(hit["_source"]["path"]);
});
if (!after) {
preload.remove();
searchResults.appendChild(makeStatsCard(searchResult));
} else {
let pageIndicator = makePageIndicator(searchResult);
searchResults.appendChild(pageIndicator);
}
//Setup page
let resultContainer = makeResultContainer();
searchResults.appendChild(resultContainer);
if (CONF.options.display === "grid") {
resultContainer._brick = new Bricklayer(resultContainer);
}
if (!after) {
docCount = 0;
}
reachedEnd = hits.length !== SIZE;
insertHits(resultContainer, hits);
searchBusy = false;
});
}
let searchDebounced = _.debounce(function () {
coolingDown = false;
search()
}, 500);
//Size slider
$("#sizeSlider").ionRangeSlider({
type: "double",
grid: false,
force_edges: true,
min: 0,
max: 3684.03149864,
from: 0,
to: 3684.03149864,
min_interval: 5,
drag_interval: true,
prettify: function (num) {
if (num === 0) {
return "0 B"
} else if (num >= 3684) {
return humanFileSize(num * num * num) + "+";
}
return humanFileSize(num * num * num)
},
onChange: function (e) {
size_min = (e.from * e.from * e.from);
size_max = (e.to * e.to * e.to);
if (e.to >= 3684) {
size_max = 10000000000000;
}
searchDebounced();
}
});
//Date slider
$.jsonPost("es", {
aggs: {
date_min: {min: {field: "mtime"}},
date_max: {max: {field: "mtime"}},
},
size: 0
}).then(resp => {
$("#dateSlider").ionRangeSlider({
type: "double",
grid: false,
force_edges: true,
min: resp["aggregations"]["date_min"]["value"],
max: resp["aggregations"]["date_max"]["value"],
from: resp["aggregations"]["date_min"]["value"],
to: (Date.now() / 1000),
min_interval: 3600 * 24 * 7,
step: 3600 * 24,
drag_interval: true,
prettify: function (num) {
let date = (new Date(num * 1000));
return date.getUTCFullYear() + "-" + ("0" + (date.getUTCMonth() + 1)).slice(-2) + "-" + ("0" + date.getUTCDate()).slice(-2)
},
onFinish: function (e) {
date_min = e.from === e.min ? null : e.from;
date_max = e.to === e.max ? null : e.to;
searchDebounced();
}
});
})
function updateIndices() {
let selected = $('#indices').find('option:selected');
selectedIndices = [];
$(selected).each(function () {
selectedIndices.push($(this).val());
});
searchDebounced();
}
document.getElementById("indices").addEventListener("change", updateIndices);
updateIndices();
window.onkeyup = function (e) {
if (e.key === "/" || e.key === "Escape") {
const bar = document.getElementById("searchBar");
bar.scrollIntoView();
bar.focus();
}
};
function getNextDepth(node) {
let q = {
query: {
bool: {
filter: [
{term: {index: node.index}},
{range: {_depth: {gte: node.depth + 1, lte: node.depth + 3}}},
]
}
},
aggs: {
paths: {
terms: {
field: "path",
size: 10000
}
}
},
size: 0
};
if (node.depth > 0) {
q.query.bool.must = {
prefix: {
path: node.id,
}
};
}
return $.jsonPost("es", q).then(resp => {
const buckets = resp["aggregations"]["paths"]["buckets"];
if (!buckets) {
return false;
}
const paths = [];
return buckets
.filter(bucket => bucket.key.length > node.id.length || node.id.startsWith("/"))
.sort((a, b) => a.key > b.key)
.map(bucket => {
if (paths.some(n => bucket.key.startsWith(n))) {
return null;
}
const name = node.id.startsWith("/") ? bucket.key : bucket.key.slice(node.id.length + 1);
paths.push(bucket.key);
return {
id: bucket.key,
text: `${name}/ (${bucket.doc_count})`,
depth: node.depth + 1,
index: node.index,
children: true,
}
}).filter(x => x !== null)
})
}
function handlePathTreeClick(tree) {
return (event, node, handler) => {
if (node.depth !== 0) {
$("#pathBar").val(node.id);
$("#pathTreeModal").modal("hide");
searchDebounced();
}
handler();
}
}
function createPathTree(target) {
let pathTree = new InspireTree({
data: function (node, resolve, reject) {
return getNextDepth(node);
},
sort: "text"
});
selectedIndices.forEach(index => {
pathTree.addNode({
id: "/" + index,
text: `/[${indexMap[index]}]`,
index: index,
depth: 0,
children: true
})
});
new InspireTreeDOM(pathTree, {
target: target
});
pathTree.on("node.click", handlePathTreeClick(pathTree));
}
function getPathChoices() {
return new Promise(getPaths => {
$.jsonPost("es", {
suggest: {
path: {
prefix: pathBar.value,
completion: {
field: "suggest-path",
skip_duplicates: true,
size: 10000
}
}
}
}).then(resp => getPaths(resp["suggest"]["path"][0]["options"].map(opt => opt["_source"]["path"])));
})
}

205
src/static/js/util.js Normal file
View File

@@ -0,0 +1,205 @@
/**
* https://stackoverflow.com/questions/10420352
*/
function humanFileSize(bytes) {
if (bytes === 0) {
return "0 B"
}
let thresh = 1000;
if (Math.abs(bytes) < thresh) {
return bytes + ' B';
}
let units = ['k', 'M', 'G', 'T', 'P', 'E', 'Z', 'Y'];
let u = -1;
do {
bytes /= thresh;
++u;
} while (Math.abs(bytes) >= thresh && u < units.length - 1);
return bytes.toFixed(1) + units[u];
}
/**
* https://stackoverflow.com/questions/6312993
*/
function humanTime(sec_num) {
sec_num = Math.floor(sec_num);
let hours = Math.floor(sec_num / 3600);
let minutes = Math.floor((sec_num - (hours * 3600)) / 60);
let seconds = sec_num - (hours * 3600) - (minutes * 60);
if (hours < 10) {
hours = "0" + hours;
}
if (minutes < 10) {
minutes = "0" + minutes;
}
if (seconds < 10) {
seconds = "0" + seconds;
}
return hours + ":" + minutes + ":" + seconds;
}
function debounce(func, wait) {
let timeout;
return function () {
let context = this, args = arguments;
let later = function () {
timeout = null;
func.apply(context, args);
};
clearTimeout(timeout);
timeout = setTimeout(later, wait);
func.apply(context, args);
};
}
function lum(c) {
c = c.substring(1);
let rgb = parseInt(c, 16);
let r = (rgb >> 16) & 0xff;
let g = (rgb >> 8) & 0xff;
let b = (rgb >> 0) & 0xff;
return 0.2126 * r + 0.7152 * g + 0.0722 * b;
}
function strUnescape(str) {
let result = "";
for (let i = 0; i < str.length; i++) {
const c = str[i];
const next = str[i+1];
if (c === ']') {
if (next === ']') {
result += c;
i += 1;
} else {
result += String.fromCharCode(parseInt(str.slice(i, i + 2), 16));
i += 2;
}
} else {
result += c;
}
}
return result;
}
const CONF = new Settings();
const _defaults = {
display: "grid",
fuzzy: true,
highlight: true,
sort: "score",
searchInPath: false,
treemapType: "cascaded",
treemapTiling: "squarify",
treemapGroupingDepth: 3,
treemapColor: "PuBuGn",
treemapSize: "large",
suggestPath: true,
fragmentSize: 100
};
function loadSettings() {
CONF.load();
$("#settingDisplay").val(CONF.options.display);
$("#settingFuzzy").prop("checked", CONF.options.fuzzy);
$("#settingHighlight").prop("checked", CONF.options.highlight);
$("#settingSearchInPath").prop("checked", CONF.options.searchInPath);
$("#settingTreemapTiling").val(CONF.options.treemapTiling);
$("#settingTreemapGroupingDepth").val(CONF.options.treemapGroupingDepth);
$("#settingTreemapColor").val(CONF.options.treemapColor);
$("#settingTreemapSize").val(CONF.options.treemapSize);
$("#settingTreemapType").val(CONF.options.treemapType);
$("#settingSuggestPath").prop("checked", CONF.options.suggestPath);
$("#settingFragmentSize").val(CONF.options.fragmentSize);
}
function Settings() {
this.options = {};
this._onUpdate = function () {
$("#fuzzyToggle").prop("checked", this.options.fuzzy);
};
this.load = function () {
const raw = window.localStorage.getItem("options");
if (raw === null) {
this.options = _defaults;
} else {
const j = JSON.parse(raw);
if (!j || Object.keys(_defaults).some(k => !j.hasOwnProperty(k))) {
this.options = _defaults;
} else {
this.options = j;
}
}
this._onUpdate();
};
this.save = function () {
window.localStorage.setItem("options", JSON.stringify(this.options));
this._onUpdate();
}
}
function updateSettings() {
CONF.options.display = $("#settingDisplay").val();
CONF.options.fuzzy = $("#settingFuzzy").prop("checked");
CONF.options.highlight = $("#settingHighlight").prop("checked");
CONF.options.searchInPath = $("#settingSearchInPath").prop("checked");
CONF.options.treemapTiling = $("#settingTreemapTiling").val();
CONF.options.treemapGroupingDepth = $("#settingTreemapGroupingDepth").val();
CONF.options.treemapColor = $("#settingTreemapColor").val();
CONF.options.treemapSize = $("#settingTreemapSize").val();
CONF.options.treemapType = $("#settingTreemapType").val();
CONF.options.suggestPath = $("#settingSuggestPath").prop("checked");
CONF.options.fragmentSize = $("#settingFragmentSize").val();
CONF.save();
if (typeof searchDebounced !== "undefined") {
searchDebounced();
}
if (typeof updateStats !== "undefined") {
updateStats();
}
$.toast({
heading: "Settings updated",
text: "Settings saved to browser storage",
stack: 3,
bgColor: "#00a4bc",
textColor: "#fff",
position: 'bottom-right',
hideAfter: 3000,
loaderBg: "#08c7e8",
});
}
jQuery["jsonPost"] = function (url, data) {
return jQuery.ajax({
url: url,
type: "post",
data: JSON.stringify(data),
contentType: "application/json"
}).fail(err => {
showEsError();
console.log(err);
});
};
function toggleTheme() {
if (!document.cookie.includes("sist")) {
document.cookie = "sist=dark;SameSite=Strict";
} else {
document.cookie = "sist=; Max-Age=-99999999;";
}
window.location.reload();
}

297
src/static/search.html Normal file
View File

@@ -0,0 +1,297 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<title>sist2 - Search</title>
<meta name='viewport' content='width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no'/>
<link href="css" rel="stylesheet" type="text/css">
</head>
<body>
<nav class="navbar navbar-expand-lg">
<a class="navbar-brand" href="/">sist2</a>
<span class="badge badge-pill version">2.5.0</span>
<span class="tagline">Lightning-fast file system indexer and search tool </span>
<a class="btn ml-auto" href="/stats">Stats</a>
<button class="btn" type="button" data-toggle="modal" data-target="#settings" onclick="loadSettings()">Settings</button>
<button class="btn" title="Toggle theme" onclick="toggleTheme()">Theme</button>
</nav>
<div class="container">
<div class="card">
<div class="card-body">
<div class="input-group">
<div class="input-group-prepend">
<div class="input-group-text">
<span title="Toggle fuzzy searching" onclick="document.getElementById('fuzzyToggle').click()">Fuzzy&nbsp</span>
<input title="Toggle fuzzy searching" type="checkbox" id="fuzzyToggle"
onclick="toggleFuzzy()" checked>
</div>
</div>
<input id="searchBar" type="search" class="form-control" placeholder="Search">
<div class="input-group-append">
<button class="btn btn-outline-secondary small-btn" type="button" data-toggle="modal"
data-target="#help">?
</button>
<button class="btn btn-outline-secondary large-btn" type="button" data-toggle="modal"
data-target="#help">Help
</button>
</div>
</div>
<div class="row">
<div class="col">
<input title="File size" id="sizeSlider" name="size" width="100%">
</div>
<div class="col">
<div class="input-group" style="margin-bottom: 0.5em; margin-top: 1em">
<div class="input-group-prepend">
<button id="pathBarHelper" class="btn btn-outline-secondary" data-toggle="modal" data-target="#pathTreeModal">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 576 512" width="20px"><path d="M288 224h224a32 32 0 0 0 32-32V64a32 32 0 0 0-32-32H400L368 0h-80a32 32 0 0 0-32 32v64H64V8a8 8 0 0 0-8-8H40a8 8 0 0 0-8 8v392a16 16 0 0 0 16 16h208v64a32 32 0 0 0 32 32h224a32 32 0 0 0 32-32V352a32 32 0 0 0-32-32H400l-32-32h-80a32 32 0 0 0-32 32v64H64V128h192v64a32 32 0 0 0 32 32zm0 96h66.74l32 32H512v128H288zm0-288h66.74l32 32H512v128H288z"/></svg>
</button>
</div>
<input id="pathBar" type="search" class="form-control" placeholder="Filter path">
</div>
</div>
</div>
<input title="Date filter" id="dateSlider" name="size" width="100%">
<div class="row">
<div class="col">
<label for="indices">Search in indices</label>
<select class="custom-select" id="indices" multiple size="6"></select>
</div>
<div class="col" id="treeTabs">
<ul class="nav nav-tabs" role="tablist">
<li class="nav-item">
<a class="nav-link active" data-toggle="tab" href="#mime" role="tab" aria-controls="home"
aria-selected="true">Mime Types</a>
</li>
<li class="nav-item">
<a class="nav-link" data-toggle="tab" href="#tag" role="tab" aria-controls="profile"
aria-selected="false" title="User-defined tags">Tags</a>
</li>
</ul>
<div class="tab-content" id="myTabContent">
<div class="tab-pane fade show active" id="mime" role="tabpanel" aria-labelledby="home-tab">
<div id="mimeTree" class="tree"></div>
</div>
<div class="tab-pane fade" id="tag" role="tabpanel" aria-labelledby="profile-tab">
<div id="tagTree" class="tree"></div>
</div>
</div>
</div>
</div>
</div>
</div>
<div class="modal" id="modal" tabindex="-1" role="dialog" aria-labelledby="modal-title" aria-hidden="true">
<div class="modal-dialog modal-lg modal-dialog-centered" role="document">
<div class="modal-content">
<div class="modal-header">
<h5 class="modal-title" id="modal-title"></h5>
<button type="button" class="close" data-dismiss="modal" aria-label="Close">
<span aria-hidden="true">&times;</span>
</button>
</div>
<div class="modal-body" id="modal-body"></div>
</div>
</div>
</div>
<div class="modal" id="help" tabindex="-1" role="dialog" aria-labelledby="modal-title" aria-hidden="true">
<div class="modal-dialog modal-lg modal-dialog-centered" role="document">
<div class="modal-content">
<div class="modal-header">
<h5 class="modal-title">Search help</h5>
<button type="button" class="close" data-dismiss="modal" aria-label="Close">
<span aria-hidden="true">&times;</span>
</button>
</div>
<div class="modal-body">
<table class="table">
<tbody>
<tr>
<td><code>+</code></td>
<td>signifies AND operation</td>
</tr>
<tr>
<td><code>|</code></td>
<td>signifies OR operation</td>
</tr>
<tr>
<td><code>-</code></td>
<td>negates a single token</td>
</tr>
<tr>
<td><code>""</code></td>
<td>wraps a number of tokens to signify a phrase for searching</td>
</tr>
<tr>
<td><code>*</code></td>
<td>at the end of a term signifies a prefix query</td>
</tr>
<tr>
<td><code>(</code> and <code>)</code></td>
<td>signify precedence</td>
</tr>
<tr>
<td><code>~N</code></td>
<td>after a word signifies edit distance (fuzziness)</td>
</tr>
<tr>
<td><code>~N</code></td>
<td>after a phrase signifies slop amount</td>
</tr>
</tbody>
</table>
<p>For example: <code>"fried eggs" +(eggplant | potato) -frittata</code> will match the phrase
<i>fried eggs</i> and either <i>eggplant</i> or <i>potato</i>, but will ignore results
containing <i>frittata</i>.</p>
<p>When neither <code>+</code> or <code>|</code> is specified, the default operator is <code>+</code> (and).</p>
<p>When the <b>Fuzzy</b> option is checked, partial matches are also returned.</p>
<br>
<p>For more information, see <a target="_blank"
href="//www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-simple-query-string-query.html">Elasticsearch
documentation</a></p>
</div>
</div>
</div>
</div>
<div class="modal" id="settings" tabindex="-1" role="dialog" aria-labelledby="modal-title" aria-hidden="true">
<div class="modal-dialog modal-dialog-centered" role="document">
<div class="modal-content">
<div class="modal-header">
<h5 class="modal-title">Settings</h5>
<button type="button" class="close" data-dismiss="modal" aria-label="Close">
<span aria-hidden="true">&times;</span>
</button>
</div>
<div class="modal-body">
<div class="custom-control custom-checkbox">
<input type="checkbox" class="custom-control-input" id="settingHighlight">
<label class="custom-control-label" for="settingHighlight">Enable highlighting</label>
</div>
<div class="custom-control custom-checkbox">
<input type="checkbox" class="custom-control-input" id="settingFuzzy">
<label class="custom-control-label" for="settingFuzzy">Set fuzzy search by default</label>
</div>
<div class="custom-control custom-checkbox">
<input type="checkbox" class="custom-control-input" id="settingSearchInPath">
<label class="custom-control-label" for="settingSearchInPath">Enable matching query against document path</label>
</div>
<div class="custom-control custom-checkbox">
<input type="checkbox" class="custom-control-input" id="settingSuggestPath">
<label class="custom-control-label" for="settingSuggestPath">Enable auto-complete in path filter bar</label>
</div>
<br/>
<div class="form-group">
<input type="number" class="form-control" id="settingFragmentSize">
<label for="settingFragmentSize">Highlight context size in characters</label>
</div>
<label for="settingDisplay">Display</label>
<select id="settingDisplay" class="form-control form-control-sm">
<option value="grid">Grid</option>
<option value="list">List</option>
</select>
<hr/>
<h4>Stats</h4>
<div class="form-group">
<label for="settingTreemapType">Treemap type</label>
<select id="settingTreemapType" class="form-control form-control-sm">
<option value="cascaded">Cascaded</option>
<option value="flat">Flat (compact)</option>
</select>
</div>
<div class="form-group">
<label for="settingTreemapTiling">Treemap tiling</label>
<select id="settingTreemapTiling" class="form-control form-control-sm">
<option value="binary">Binary</option>
<option value="squarify">Squarify</option>
<option value="slice">Slice</option>
<option value="dice">Dice</option>
<option value="sliceDice">Slide & Dice</option>
</select>
</div>
<div class="form-group">
<label for="settingTreemapGroupingDepth">Treemap color grouping depth (flat)</label>
<input type="number" class="form-control" id="settingTreemapGroupingDepth" min="1" max="10">
</div>
<div class="form-group">
<label for="settingTreemapColor">Treemap color (cascaded)</label>
<select id="settingTreemapColor" class="form-control form-control-sm">
<option value="PuBuGn">Purple-Blue-Green</option>
<option value="PuRd">Purple-Red</option>
<option value="PuBu">Purple-Blue</option>
<option value="YlOrBr">Yellow-Orange-Brown</option>
<option value="YlOrRd">Yellow-Orange-Red</option>
<option value="YlGn">Yellow-Green</option>
<option value="YlGnBu">Yellow-Green-Blue</option>
<option value="Plasma">Plasma</option>
<option value="Magma">Magma</option>
<option value="Inferno">Inferno</option>
<option value="Viridis">Viridis</option>
<option value="Turbo">Turbo</option>
</select>
</div>
<div class="form-group">
<label for="settingTreemapSize">Treemap size</label>
<select id="settingTreemapSize" class="form-control form-control-sm">
<option value="small">Small</option>
<option value="medium">Medium</option>
<option value="large">Large</option>
<option value="x-large">X-Large</option>
<option value="xx-large">XX-Large</option>
</select>
</div>
<br>
<button class="btn btn-primary ml-auto" onclick="updateSettings()">Update settings</button>
</div>
</div>
</div>
</div>
<div class="modal" id="pathTreeModal" tabindex="-1" role="dialog" aria-labelledby="modal-title" aria-hidden="true">
<div class="modal-dialog modal-lg" role="document">
<div class="modal-content">
<div class="modal-header">
<h5 class="modal-title">Select path</h5>
<button type="button" class="close" data-dismiss="modal" aria-label="Close">
<span aria-hidden="true">&times;</span>
</button>
</div>
<div class="modal-body">
<div id="pathTree" class="tree"></div>
</div>
</div>
</div>
</div>
<div id="searchResults"></div>
</div>
<script src="jslib" type="text/javascript"></script>
<script src="jssearch" type="text/javascript"></script>
</body>
</html>

800
src/static/stats.html Normal file
View File

@@ -0,0 +1,800 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<title>sist2 - Stats</title>
<meta name='viewport' content='width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no'/>
<link href="css" rel="stylesheet" type="text/css">
</head>
<body>
<nav class="navbar navbar-expand-lg">
<a class="navbar-brand" href="/">sist2</a>
<span class="badge badge-pill version">2.5.0</span>
<span class="tagline">Lightning-fast file system indexer and search tool </span>
<a style="margin-left: auto" class="btn" href="/">Back</a>
<button class="btn" type="button" data-toggle="modal" data-target="#settings"
onclick="loadSettings()">Settings
</button>
<button class="btn" title="Toggle theme" onclick="toggleTheme()">Theme</button>
</nav>
<div class="container pb-3">
<div class="card">
<div class="card-body">
<label for="indices">Index</label>
<select id="indices" onchange="updateStats()"></select>
</div>
</div>
<div id="treemap-card" class="stats-card">
<button class="btn stats-btn" onclick="fullScreen('treemap-card')">Enlarge</button>
<button class="btn stats-btn" onclick="exportTreemap()">Export</button>
<svg id="treemap"></svg>
</div>
<div id="graphs-card" class="stats-card">
<button class="btn stats-btn" onclick="fullScreen('graphs-card')">Enlarge</button>
<div class="graph">
<svg id="agg_mime_size"></svg>
</div>
<div class="graph">
<svg id="agg_mime_count"></svg>
</div>
<div class="graph">
<svg id="date_histogram"></svg>
</div>
<div class="graph">
<svg id="size_histogram"></svg>
</div>
</div>
</div>
<div class="modal" id="settings" tabindex="-1" role="dialog" aria-labelledby="modal-title" aria-hidden="true">
<div class="modal-dialog modal-dialog-centered" role="document">
<div class="modal-content">
<div class="modal-header">
<h5 class="modal-title">Settings</h5>
<button type="button" class="close" data-dismiss="modal" aria-label="Close">
<span aria-hidden="true">&times;</span>
</button>
</div>
<div class="modal-body">
<div class="custom-control custom-checkbox">
<input type="checkbox" class="custom-control-input" id="settingHighlight">
<label class="custom-control-label" for="settingHighlight">Enable highlighting</label>
</div>
<div class="custom-control custom-checkbox">
<input type="checkbox" class="custom-control-input" id="settingFuzzy">
<label class="custom-control-label" for="settingFuzzy">Set fuzzy search by default</label>
</div>
<div class="custom-control custom-checkbox">
<input type="checkbox" class="custom-control-input" id="settingSearchInPath">
<label class="custom-control-label" for="settingSearchInPath">Enable matching query against document
path</label>
</div>
<div class="custom-control custom-checkbox">
<input type="checkbox" class="custom-control-input" id="settingSuggestPath">
<label class="custom-control-label" for="settingSuggestPath">Enable auto-complete in path filter bar</label>
</div>
<br/>
<div class="form-group">
<input type="number" class="form-control" id="settingFragmentSize">
<label for="settingFragmentSize">Highlight context size in characters</label>
</div>
<label for="settingDisplay">Display</label>
<select id="settingDisplay" class="form-control form-control-sm">
<option value="grid">Grid</option>
<option value="list">List</option>
</select>
<hr/>
<h4>Stats</h4>
<div class="form-group">
<label for="settingTreemapType">Treemap type</label>
<select id="settingTreemapType" class="form-control form-control-sm">
<option value="cascaded">Cascaded</option>
<option value="flat">Flat (compact)</option>
</select>
</div>
<div class="form-group">
<label for="settingTreemapTiling">Treemap tiling</label>
<select id="settingTreemapTiling" class="form-control form-control-sm">
<option value="binary">Binary</option>
<option value="squarify">Squarify</option>
<option value="slice">Slice</option>
<option value="dice">Dice</option>
<option value="sliceDice">Slide & Dice</option>
</select>
</div>
<div class="form-group">
<label for="settingTreemapGroupingDepth">Treemap color grouping depth (flat)</label>
<input type="number" class="form-control" id="settingTreemapGroupingDepth" min="1" max="10">
</div>
<div class="form-group">
<label for="settingTreemapColor">Treemap color (cascaded)</label>
<select id="settingTreemapColor" class="form-control form-control-sm">
<option value="PuBuGn">Purple-Blue-Green</option>
<option value="PuRd">Purple-Red</option>
<option value="PuBu">Purple-Blue</option>
<option value="YlOrBr">Yellow-Orange-Brown</option>
<option value="YlOrRd">Yellow-Orange-Red</option>
<option value="YlGn">Yellow-Green</option>
<option value="YlGnBu">Yellow-Green-Blue</option>
<option value="Plasma">Plasma</option>
<option value="Magma">Magma</option>
<option value="Inferno">Inferno</option>
<option value="Viridis">Viridis</option>
<option value="Turbo">Turbo</option>
</select>
</div>
<div class="form-group">
<label for="settingTreemapSize">Treemap size</label>
<select id="settingTreemapSize" class="form-control form-control-sm">
<option value="small">Small</option>
<option value="medium">Medium</option>
<option value="large">Large</option>
<option value="x-large">X-Large</option>
<option value="xx-large">XX-Large</option>
</select>
</div>
<br>
<button class="btn btn-primary float-right" onclick="updateSettings()">Update settings</button>
</div>
</div>
</div>
</div>
<script src="jslib" type="text/javascript"></script>
<script>
let width;
let height;
let indexMap = {};
const barHeight = 20;
const ordinalColor = d3.scaleOrdinal(d3.schemeCategory10);
const formatSI = d3.format("~s");
const TILING_MODES = {
"squarify": d3.treemapSquarify,
"binary": d3.treemapBinary,
"sliceDice": d3.treemapSliceDice,
"slice": d3.treemapSlice,
"dice": d3.treemapDice,
};
const COLORS = {
"PuBuGn": d3.interpolatePuBuGn,
"PuRd": d3.interpolatePuRd,
"PuBu": d3.interpolatePuBu,
"YlOrBr": d3.interpolateYlOrBr,
"YlOrRd": d3.interpolateYlOrRd,
"YlGn": d3.interpolateYlGn,
"YlGnBu": d3.interpolateYlGnBu,
"Plasma": d3.interpolatePlasma,
"Magma": d3.interpolateMagma,
"Inferno": d3.interpolateInferno,
"Viridis": d3.interpolateViridis,
"Turbo": d3.interpolateTurbo,
};
const SIZES = {
"small": [800, 600],
"medium": [1300, 750],
"large": [1900, 900],
"x-large": [2800, 1700],
"xx-large": [3600, 2000],
};
const fillOpacity = document.cookie.includes("sist") ? 0.9 : 0.6;
const uids = {};
function uid(name) {
let id = uids[name] || 0;
uids[name] = id + 1;
return name + id;
}
const burrow = function (table, addSelfDir) {
const root = {};
table.forEach(row => {
let layer = root;
row.taxonomy.forEach(key => {
layer[key] = key in layer ? layer[key] : {};
layer = layer[key];
});
if (Object.keys(layer).length === 0) {
layer["$size$"] = row.size;
} else if (addSelfDir) {
layer["."] = {
"$size$": row.size,
};
}
});
const descend = function (obj, depth) {
return Object.keys(obj).filter(k => k !== "$size$").map(k => {
const child = {
name: k,
depth: depth,
value: 0,
children: descend(obj[k], depth + 1)
};
if ("$size$" in obj[k]) {
child.value = obj[k]["$size$"];
}
return child;
});
};
return {
name: `[${indexMap[$("#indices").val()]}]`,
children: descend(root, 1),
value: 0,
depth: 0,
}
};
function flatTreemap(data, svg) {
const root = d3.treemap()
.tile(TILING_MODES[CONF.options.treemapTiling])
.size([width, height])
.padding(1)
.round(true)(
d3.hierarchy(data)
.sum(d => d.value)
.sort((a, b) => b.value - a.value)
);
const leaf = svg.selectAll("g")
.data(root.leaves())
.join("g")
.attr("transform", d => `translate(${d.x0},${d.y0})`);
leaf.append("title")
.text(d => `${d.ancestors().reverse().map(d => d.data.name).join("/")}\n${humanFileSize(d.value)}`);
leaf.append("rect")
.attr("id", d => (d.leafUid = uid("leaf")))
.attr("fill", d => {
while (d.depth > CONF.options.treemapGroupingDepth) d = d.parent;
return ordinalColor(d.data.name);
})
.attr("fill-opacity", fillOpacity)
.attr("width", d => d.x1 - d.x0)
.attr("height", d => d.y1 - d.y0);
leaf.append("clipPath")
.attr("id", d => (d.clipUid = uid("clip")))
.append("use")
.attr("href", d => `#${d.leafUid}`);
leaf.append("text")
.attr("clip-path", d => `url(#${d.clipUid})`)
.selectAll("tspan")
.data(d => {
if (d.data.name === ".") {
d = d.parent;
}
return [d.data.name, humanFileSize(d.value)]
})
.join("tspan")
.attr("x", 2)
.attr("y", (d, i, nodes) => `${i === 0 ? 1.1 : 2.3}em`)
.text(d => d);
}
function cascade(root, offset) {
const x = new Map;
const y = new Map;
return root.eachAfter(d => {
if (d.children && d.children.length !== 0) {
x.set(d, 1 + d3.max(d.children, c => c.x1 === d.x1 - offset ? x.get(c) : NaN));
y.set(d, 1 + d3.max(d.children, c => c.y1 === d.y1 - offset ? y.get(c) : NaN));
} else {
x.set(d, 0);
y.set(d, 0);
}
}).eachBefore(d => {
d.x1 -= 2 * offset * x.get(d);
d.y1 -= 2 * offset * y.get(d);
});
}
function cascadeTreemap(data, svg) {
const root = cascade(
d3.treemap()
.size([width, height])
.tile(TILING_MODES[CONF.options.treemapTiling])
.paddingOuter(3)
.paddingTop(16)
.paddingInner(1)
.round(true)(
d3.hierarchy(data)
.sum(d => d.value)
.sort((a, b) => b.value - a.value)
),
3 // treemap.paddingOuter
);
const maxDepth = Math.max(...root.descendants().map(d => d.depth));
const color = d3.scaleSequential([maxDepth, -1], COLORS[CONF.options.treemapColor]);
svg.append("filter")
.attr("id", "shadow")
.append("feDropShadow")
.attr("flood-opacity", 0.3)
.attr("dx", 0)
.attr("stdDeviation", 3);
const node = svg.selectAll("g")
.data(
d3.nest()
.key(d => d.depth).sortKeys(d3.ascending)
.entries(root.descendants())
)
.join("g")
.attr("filter", "url(#shadow)")
.selectAll("g")
.data(d => d.values)
.join("g")
.attr("transform", d => `translate(${d.x0},${d.y0})`);
node.append("title")
.text(d => `${d.ancestors().reverse().splice(1).map(d => d.data.name).join("/")}\n${humanFileSize(d.value)}`);
node.append("rect")
.attr("id", d => (d.nodeUid = uid("node")))
.attr("fill", d => color(d.depth))
.attr("width", d => d.x1 - d.x0)
.attr("height", d => d.y1 - d.y0);
node.append("clipPath")
.attr("id", d => (d.clipUid = uid("clip")))
.append("use")
.attr("href", d => `#${d.nodeUid}`);
node.append("text")
.attr("fill", d => d3.hsl(color(d.depth)).l > .5 ? "#333" : "#eee")
.attr("clip-path", d => `url(#${d.clipUid})`)
.selectAll("tspan")
.data(d => [d.data.name, humanFileSize(d.value)])
.join("tspan")
.text(d => d);
node.filter(d => d.children).selectAll("tspan")
.attr("dx", 3)
.attr("y", 13);
node.filter(d => !d.children).selectAll("tspan")
.attr("x", 3)
.attr("y", (d, i, nodes) => `${i === 0 ? 1.1 : 2.3}em`);
}
function mimeBarSize(data, svg) {
const margin = {
top: 50,
right: 0,
bottom: 10,
left: Math.max(
d3.max(data.sort((a, b) => b.count - a.count).slice(0, 15), d => d.mime.length) * 6,
d3.max(data.sort((a, b) => b.size - a.size).slice(0, 15), d => d.mime.length) * 6,
)
};
data.forEach(d => {
d.name = d.mime;
d.value = Number(d.size);
});
data = data.sort((a, b) => b.value - a.value).slice(0, 15);
const width = 550;
const height = Math.ceil((data.length + 0.1) * barHeight) + margin.top + margin.bottom;
svg.selectAll("*").remove();
svg.attr("viewBox", [0, 0, width, height]);
const y = d3.scaleBand()
.domain(d3.range(data.length))
.rangeRound([margin.top, height - margin.bottom]);
const x = d3.scaleLinear()
.domain([0, d3.max(data, d => d.value)])
.range([margin.left, width - margin.right]);
svg.append("g")
.attr("fill-opacity", fillOpacity)
.selectAll("rect")
.data(data)
.join("rect")
.attr("fill", d => ordinalColor(d.name))
.attr("x", x(0))
.attr("y", (d, i) => y(i))
.attr("width", d => x(d.value) - x(0))
.attr("height", y.bandwidth())
.append("title")
.text(d => formatSI(d.value));
svg.append("g")
.attr("transform", `translate(0,${margin.top})`)
.call(d3.axisTop(x).ticks(width / 80, data.format).tickFormat(formatSI))
.call(g => g.select(".domain").remove());
svg.append("g")
.attr("transform", `translate(${margin.left},0)`)
.call(d3.axisLeft(y).tickFormat(i => data[i].name).tickSizeOuter(0));
svg.append("text")
.attr("x", (width / 2))
.attr("y", (margin.top / 2))
.attr("text-anchor", "middle")
.style("font-size", "16px")
.text("Size distribution by MIME type");
}
function mimeBarCount(data, svg) {
const margin = {
top: 50,
right: 0,
bottom: 10,
left: Math.max(
d3.max(data.sort((a, b) => b.count - a.count).slice(0, 15), d => d.mime.length) * 6,
d3.max(data.sort((a, b) => b.size - a.size).slice(0, 15), d => d.mime.length) * 6,
)
};
data.forEach(d => {
d.name = d.mime;
d.value = Number(d.count);
});
data = data.sort((a, b) => b.value - a.value).slice(0, 15);
const width = 550;
const height = Math.ceil((data.length + 0.1) * barHeight) + margin.top + margin.bottom;
svg.selectAll("*").remove();
svg.attr("viewBox", [0, 0, width, height]);
const y = d3.scaleBand()
.domain(d3.range(data.length))
.rangeRound([margin.top, height - margin.bottom]);
const x = d3.scaleLinear()
.domain([0, d3.max(data, d => d.value)])
.range([margin.left, width - margin.right]);
svg.append("g")
.attr("fill-opacity", fillOpacity)
.selectAll("rect")
.data(data)
.join("rect")
.attr("fill", d => ordinalColor(d.name))
.attr("x", x(0))
.attr("y", (d, i) => y(i))
.attr("width", d => x(d.value) - x(0))
.attr("height", y.bandwidth())
.append("title")
.text(d => d3.format(",")(d.value));
svg.append("g")
.attr("transform", `translate(0,${margin.top})`)
.call(d3.axisTop(x).ticks(width / 80, data.format).tickFormat(formatSI))
.call(g => g.select(".domain").remove());
svg.append("g")
.attr("transform", `translate(${margin.left},0)`)
.call(d3.axisLeft(y).tickFormat(i => data[i].name).tickSizeOuter(0));
svg.append("text")
.attr("x", (width / 2))
.attr("y", (margin.top / 2))
.attr("text-anchor", "middle")
.style("font-size", "16px")
.text("File count distribution by MIME type");
}
function dateHistogram(data, svg) {
let bins = data.map(d => {
return {
length: Number(d.count),
x0: Number(d.bucket),
x1: Number(d.bucket) + 2629800
}
});
bins.sort((a, b) => a.length - b.length);
const margin = {
top: 50,
right: 20,
bottom: 70,
left: 40
};
const thresh = d3.quantile(bins, 0.9, d => d.length);
bins = bins.filter(d => d.length > thresh);
const width = 550;
const height = 450;
svg.selectAll("*").remove();
svg.attr("viewBox", [0, 0, width, height]);
const y = d3.scaleLinear()
.domain([0, d3.max(bins, d => d.length)]).nice()
.range([height - margin.bottom, margin.top]);
const x = d3.scaleLinear()
.domain(d3.extent(bins, d => d.x0)).nice()
.range([margin.left, width - margin.right]);
svg.append("g")
.attr("fill", "steelblue")
.selectAll("rect")
.data(bins)
.join("rect")
.attr("x", d => x(d.x0) + 1)
.attr("width", d => Math.max(1, x(d.x1) - x(d.x0) - 1))
.attr("y", d => y(d.length))
.attr("height", d => y(0) - y(d.length))
.call(g => g
.append("title")
.text(d => d.length)
);
svg.append("g")
.attr("transform", `translate(0,${height - margin.bottom})`)
.call(
d3.axisBottom(x)
.ticks(width / 30)
.tickSizeOuter(0)
.tickFormat(t => d3.timeFormat("%Y-%m-%d")(d3.utcParse("%s")(t)))
)
.call(g => g
.selectAll("text")
.style("text-anchor", "end")
.attr("dx", "-.8em")
.attr("dy", ".15em")
.attr("transform", "rotate(-65)")
)
.call(g => g.append("text")
.attr("x", width - margin.right)
.attr("y", -4)
.attr("fill", "currentColor")
.attr("font-weight", "bold")
.attr("text-anchor", "end")
.text("mtime")
);
svg.append("g")
.attr("transform", `translate(${margin.left},0)`)
.call(
d3.axisLeft(y)
.ticks(height / 40)
.tickFormat(t => formatSI(t))
)
.call(g => g.select(".domain").remove())
.call(g => g.select(".tick:last-of-type text").clone()
.attr("x", 4)
.attr("text-anchor", "start")
.attr("font-weight", "bold")
.text("File count"));
svg.append("text")
.attr("x", (width / 2))
.attr("y", (margin.top / 2))
.attr("text-anchor", "middle")
.style("font-size", "16px")
.text("File modification time distribution");
}
function sizeHistogram(data, svg) {
let bins = data.map(d => {
return {
length: Number(d.count),
x0: Number(d.bucket),
x1: Number(d.bucket) + (5 * 1024 * 1024)
}
});
bins = bins.sort((a, b) => b.length - a.length).slice(0, 25);
const margin = {
top: 50,
right: 20,
bottom: 70,
left: 40
};
const width = 550;
const height = 450;
svg.selectAll("*").remove();
svg.attr("viewBox", [0, 0, width, height]);
const y = d3.scaleLinear()
.domain([0, d3.max(bins, d => d.length)])
.range([height - margin.bottom, margin.top]);
const x = d3.scaleLinear()
.domain(d3.extent(bins, d => d.x0)).nice()
.range([margin.left, width - margin.right]);
svg.append("g")
.attr("fill", "steelblue")
.selectAll("rect")
.data(bins)
.join("rect")
.attr("x", d => x(d.x0) + 1)
.attr("width", d => Math.max(1, x(d.x1) - x(d.x0) - 1))
.attr("y", d => y(d.length))
.attr("height", d => y(0) - y(d.length))
.call(g => g
.append("title")
.text(d => d.length)
);
svg.append("g")
.attr("transform", `translate(0,${height - margin.bottom})`)
.call(
d3.axisBottom(x)
.ticks(width / 30)
.tickSizeOuter(0)
.tickFormat(formatSI)
)
.call(g => g
.selectAll("text")
.style("text-anchor", "end")
.attr("dx", "-.8em")
.attr("dy", ".15em")
.attr("transform", "rotate(-65)")
)
.call(g => g.append("text")
.attr("x", width - margin.right)
.attr("y", -4)
.attr("fill", "currentColor")
.attr("font-weight", "bold")
.attr("text-anchor", "end")
.text("size (bytes)")
);
svg.append("g")
.attr("transform", `translate(${margin.left},0)`)
.call(
d3.axisLeft(y)
.ticks(height / 40)
.tickFormat(t => formatSI(t))
)
.call(g => g.select(".domain").remove())
.call(g => g.select(".tick:last-of-type text").clone()
.attr("x", 4)
.attr("text-anchor", "start")
.attr("font-weight", "bold")
.text("File count"));
svg.append("text")
.attr("x", (width / 2))
.attr("y", (margin.top / 2))
.attr("text-anchor", "middle")
.style("font-size", "16px")
.text("File size distribution");
}
function updateStats() {
width = SIZES[CONF.options.treemapSize][0];
height = SIZES[CONF.options.treemapSize][1];
const treemapSvg = d3.select("#treemap");
const mimeSvgSize = d3.select("#agg_mime_size");
const mimeSvgCount = d3.select("#agg_mime_count");
const dateHistogramSvg = d3.select("#date_histogram");
const sizeHistogramSvg = d3.select("#size_histogram");
const indexId = $("#indices").val();
d3.csv(`/s/${indexId}/1`).then(tabularData => {
tabularData.forEach(row => {
row.taxonomy = row.path.split("/");
row.size = Number(row.size);
});
if (CONF.options.treemapType === "cascaded") {
const data = burrow(tabularData, false);
cascadeTreemap(data, treemapSvg);
} else {
const data = burrow(tabularData.sort((a, b) => b.taxonomy.length - a.taxonomy.length), true);
flatTreemap(data, treemapSvg);
}
});
d3.csv(`/s/${indexId}/2`).then(tabularData => {
mimeBarSize(tabularData.slice(), mimeSvgSize);
mimeBarCount(tabularData.slice(), mimeSvgCount);
});
d3.csv(`/s/${indexId}/3`).then(tabularData => {
sizeHistogram(tabularData, sizeHistogramSvg);
});
d3.csv(`/s/${indexId}/4`).then(tabularData => {
dateHistogram(tabularData, dateHistogramSvg);
});
treemapSvg.selectAll("*").remove();
treemapSvg.attr("viewBox", [0, 0, width, height])
.attr("xmlns", "http://www.w3.org/2000/svg")
.attr("xmlns:xlink", "http://www.w3.org/1999/xlink")
.attr("version", "1.1")
.style("overflow", "visible")
.style("font", "10px sans-serif");
}
window.onload = function () {
CONF.load();
$.jsonPost("i").then(resp => {
const select = $("#indices");
const urlIndices = (new URLSearchParams(location.search)).get("i");
resp["indices"].forEach(idx => {
indexMap[idx.id] = idx.name;
select.append($("<option>")
.attr("value", idx.id)
.append(idx.name));
if (urlIndices && urlIndices.split(",").indexOf(idx.name) !== -1) {
select.select(idx.name);
}
});
updateStats();
});
};
function fullScreen(selector) {
const card = document.getElementById(selector);
card.classList.toggle("full-screen");
}
function exportTreemap() {
domtoimage.toBlob(document.getElementById("treemap"), {width: width, height: height})
.then(function (blob) {
let a = document.createElement("a");
let url = URL.createObjectURL(blob);
a.href = url;
a.download = `${indexMap[$("#indices").val()]}_treemap.png`;
document.body.appendChild(a);
a.click();
setTimeout(function() {
document.body.removeChild(a);
window.URL.revokeObjectURL(url);
}, 0);
});
}
</script>
</body>
</html>

353
src/stats.c Normal file
View File

@@ -0,0 +1,353 @@
#include "sist.h"
#include "io/serialize.h"
#include "ctx.h"
#include <glib.h>
static GHashTable *FlatTree;
static GHashTable *BufferTable;
static GHashTable *AggMime;
static GHashTable *AggSize;
static GHashTable *AggDate;
#define SIZE_BUCKET (long)(5 * 1024 * 1024)
#define DATE_BUCKET (long)(2629800)
static long TotalSize = 0;
static long DocumentCount = 0;
typedef struct {
long size;
long count;
} agg_t;
void fill_tables(cJSON *document, UNUSED(const char uuid_str[UUID_STR_LEN])) {
if (cJSON_GetObjectItem(document, "parent") != NULL) {
return;
}
const char *json_path = cJSON_GetObjectItem(document, "path")->valuestring;
char *path = malloc(strlen(json_path) + 1);
strcpy(path, json_path);
const char *json_mime = cJSON_GetObjectItem(document, "mime")->valuestring;
char *mime;
if (json_mime == NULL) {
mime = NULL;
} else {
mime = malloc(strlen(json_mime) + 1);
strcpy(mime, json_mime);
}
long size = (long) cJSON_GetObjectItem(document, "size")->valuedouble;
int mtime = cJSON_GetObjectItem(document, "mtime")->valueint;
// treemap
void *existing_path = g_hash_table_lookup(FlatTree, path);
if (existing_path == NULL) {
g_hash_table_insert(FlatTree, path, (gpointer) size);
} else {
g_hash_table_replace(FlatTree, path, (gpointer) ((long) existing_path + size));
}
// mime agg
if (mime != NULL) {
agg_t *orig_agg = g_hash_table_lookup(AggMime, mime);
if (orig_agg == NULL) {
agg_t *agg = malloc(sizeof(agg_t));
agg->size = size;
agg->count = 1;
g_hash_table_insert(AggMime, mime, agg);
} else {
orig_agg->size += size;
orig_agg->count += 1;
free(mime);
}
}
// size agg
long size_bucket = size - (size % SIZE_BUCKET);
agg_t *orig_agg = g_hash_table_lookup(AggSize, (gpointer) size_bucket);
if (orig_agg == NULL) {
agg_t *agg = malloc(sizeof(agg_t));
agg->size = size;
agg->count = 1;
g_hash_table_insert(AggSize, (gpointer) size_bucket, agg);
} else {
orig_agg->count += 1;
orig_agg->size += size;
}
// date agg
long date_bucket = mtime - (mtime % DATE_BUCKET);
orig_agg = g_hash_table_lookup(AggDate, (gpointer) date_bucket);
if (orig_agg == NULL) {
agg_t *agg = malloc(sizeof(agg_t));
agg->size = size;
agg->count = 1;
g_hash_table_insert(AggDate, (gpointer) date_bucket, agg);
} else {
orig_agg->count += 1;
orig_agg->size += size;
}
TotalSize += size;
DocumentCount += 1;
}
void read_index_into_tables(index_t *index) {
DIR *dir = opendir(index->path);
struct dirent *de;
while ((de = readdir(dir)) != NULL) {
if (strncmp(de->d_name, "_index_", sizeof("_index_") - 1) == 0) {
char file_path[PATH_MAX];
snprintf(file_path, PATH_MAX, "%s/%s", index->path, de->d_name);
read_index(file_path, index->desc.uuid, index->desc.type, fill_tables);
}
}
closedir(dir);
}
static size_t rfind(const char *str, int c) {
for (int i = (int)strlen(str); i >= 0; i--) {
if (str[i] == c) {
return i;
}
}
return -1;
}
int merge_up(double thresh) {
long min_size = (long) (thresh * (double) TotalSize);
int count = 0;
GHashTableIter iter;
g_hash_table_iter_init(&iter, FlatTree);
void *key;
void *value;
while (g_hash_table_iter_next(&iter, &key, &value)) {
long size = (long) value;
if (size < min_size) {
int stop = rfind(key, '/');
if (stop == -1) {
stop = 0;
}
char *parent = malloc(stop + 1);
strncpy(parent, key, stop);
*(parent + stop) = '\0';
void *existing_parent = g_hash_table_lookup(FlatTree, parent);
if (existing_parent == NULL) {
void *existing_parent2_key;
void *existing_parent2_val;
int found = g_hash_table_lookup_extended(BufferTable, parent, &existing_parent2_key,
&existing_parent2_val);
if (!found) {
g_hash_table_insert(BufferTable, parent, value);
} else {
g_hash_table_replace(BufferTable, parent, (gpointer) ((long) existing_parent2_val + size));
free(existing_parent2_key);
}
} else {
g_hash_table_replace(FlatTree, parent, (gpointer) ((long) existing_parent + size));
}
g_hash_table_iter_remove(&iter);
count += 1;
}
}
g_hash_table_iter_init(&iter, BufferTable);
while (g_hash_table_iter_next(&iter, &key, &value)) {
g_hash_table_insert(FlatTree, key, value);
g_hash_table_iter_remove(&iter);
}
int size = g_hash_table_size(FlatTree);
LOG_DEBUGF("stats.c", "Merge up iteration (%d merged, %d in tree)", count, size)
return count;
}
/**
* Assumes out is at at least PATH_MAX *4
*/
void csv_escape(char *dst, const char *str) {
const char *ptr = str;
char *out = dst;
if (rfind(str, ',') == -1 && rfind(str, '"') == -1) {
strcpy(dst, str);
return;
}
*out++ = '"';
char c;
while ((c = *ptr++) != 0) {
if (c == '"') {
*out++ = '"';
*out++ = '"';
} else {
*out++ = c;
}
}
*out++ = '"';
*out = '\0';
}
int open_or_exit(const char *path) {
int fd = open(path, O_CREAT | O_WRONLY, S_IRUSR | S_IWUSR);
if (fd < 0) {
LOG_FATALF("stats.c", "Error while creating file: %s [%d]\n", strerror(errno), errno)
}
return fd;
}
#define TREEMAP_CSV_HEADER "path,size"
#define MIME_AGG_CSV_HEADER "mime,size,count"
#define SIZE_AGG_CSV_HEADER "bucket,size,count"
#define DATE_AGG_CSV_HEADER "bucket,size,count"
void write_treemap_csv(double thresh, const char *out_path) {
void *key;
void *value;
long min_size = (long) (thresh * (double) TotalSize);
int fd = open_or_exit(out_path);
int ret = write(fd, TREEMAP_CSV_HEADER, sizeof(TREEMAP_CSV_HEADER) - 1);
if (ret == -1) {
LOG_FATALF("stats.c", "Write error: %s", strerror(errno))
}
GHashTableIter iter;
g_hash_table_iter_init(&iter, FlatTree);
while (g_hash_table_iter_next(&iter, &key, &value)) {
long size = (long) value;
if (size >= min_size) {
char path_buf[PATH_MAX * 4];
char buf[PATH_MAX * 4 + 16];
csv_escape(path_buf, key);
size_t written = sprintf(buf, "\n%s,%ld", path_buf, (long) value);
ret = write(fd, buf, written);
if (ret == -1) {
LOG_FATALF("stats.c", "Write error: %s", strerror(errno))
}
}
}
close(fd);
}
void write_agg_csv_str(const char *out_path, const char *header, GHashTable *table) {
void *key;
void *value;
char buf[4096];
int fd = open_or_exit(out_path);
int ret = write(fd, header, strlen(header));
if (ret == -1) {
LOG_FATALF("stats.c", "Write error: %s", strerror(errno))
}
GHashTableIter iter;
g_hash_table_iter_init(&iter, table);
while (g_hash_table_iter_next(&iter, &key, &value)) {
agg_t *agg = value;
size_t written = sprintf(buf, "\n%s,%ld,%ld", (const char*)key, agg->size, agg->count);
ret = write(fd, buf, written);
if (ret == -1) {
LOG_FATALF("stats.c", "Write error: %s", strerror(errno))
}
}
close(fd);
}
void write_agg_csv_long(const char *out_path, const char *header, GHashTable *table) {
void *key;
void *value;
char buf[4096];
int fd = open_or_exit(out_path);
int ret = write(fd, header, strlen(header));
if (ret == -1) {
LOG_FATALF("stats.c", "Write error: %s", strerror(errno))
}
GHashTableIter iter;
g_hash_table_iter_init(&iter, table);
while (g_hash_table_iter_next(&iter, &key, &value)) {
agg_t *agg = value;
size_t written = sprintf(buf, "\n%ld,%ld,%ld", (long)key, agg->size, agg->count);
ret = write(fd, buf, written);
if (ret == -1) {
LOG_FATALF("stats.c", "Write error: %s", strerror(errno))
}
}
close(fd);
}
int generate_stats(index_t *index, const double threshold, const char *out_prefix) {
FlatTree = g_hash_table_new_full(g_str_hash, g_str_equal, free, NULL);
BufferTable = g_hash_table_new(g_str_hash, g_str_equal);
AggMime = g_hash_table_new_full(g_str_hash, g_str_equal, free, free);
AggSize = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, free);
AggDate = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, free);
LOG_INFO("stats.c", "Generating stats...")
read_index_into_tables(index);
LOG_DEBUG("stats.c", "Read index into tables")
LOG_DEBUGF("stats.c", "Total size is %ld", TotalSize)
LOG_DEBUGF("stats.c", "Document count is %ld", DocumentCount)
LOG_DEBUGF("stats.c", "Merging small directories upwards with a threshold of %f%%", threshold * 100)
while (merge_up(threshold) > 100) {}
char tmp[PATH_MAX];
strncpy(tmp, out_prefix, sizeof(tmp));
strcat(tmp, "treemap.csv");
write_treemap_csv(threshold, tmp);
strncpy(tmp, out_prefix, sizeof(tmp));
strcat(tmp, "mime_agg.csv");
write_agg_csv_str(tmp, MIME_AGG_CSV_HEADER, AggMime);
strncpy(tmp, out_prefix, sizeof(tmp));
strcat(tmp, "size_agg.csv");
write_agg_csv_long(tmp, SIZE_AGG_CSV_HEADER, AggSize);
strncpy(tmp, out_prefix, sizeof(tmp));
strcat(tmp, "date_agg.csv");
write_agg_csv_long(tmp, DATE_AGG_CSV_HEADER, AggDate);
g_hash_table_remove_all(FlatTree);
g_hash_table_destroy(FlatTree);
g_hash_table_destroy(BufferTable);
g_hash_table_remove_all(AggMime);
g_hash_table_destroy(AggMime);
g_hash_table_remove_all(AggSize);
g_hash_table_destroy(AggSize);
g_hash_table_remove_all(AggDate);
g_hash_table_destroy(AggDate);
return 0;
}

6
src/stats.h Normal file
View File

@@ -0,0 +1,6 @@
#ifndef SIST2_STATS_H
#define SIST2_STATS_H
int generate_stats(index_t *index, double threshold, const char* out_prefix);
#endif

View File

@@ -1,5 +1,7 @@
#include "tpool.h" #include "tpool.h"
#include "ctx.h" #include "ctx.h"
#include "sist.h"
#include <pthread.h>
typedef void (*thread_func_t)(void *arg); typedef void (*thread_func_t)(void *arg);
@@ -25,6 +27,7 @@ typedef struct tpool {
int done_cnt; int done_cnt;
int stop; int stop;
void (*cleanup_func)(); void (*cleanup_func)();
} tpool_t; } tpool_t;
@@ -100,7 +103,7 @@ static void *tpool_worker(void *arg) {
tpool_t *pool = arg; tpool_t *pool = arg;
while (1) { while (1) {
pthread_mutex_lock(&(pool->work_mutex)); pthread_mutex_lock(&pool->work_mutex);
if (pool->stop) { if (pool->stop) {
break; break;
} }
@@ -113,14 +116,21 @@ static void *tpool_worker(void *arg) {
pthread_mutex_unlock(&(pool->work_mutex)); pthread_mutex_unlock(&(pool->work_mutex));
if (work != NULL) { if (work != NULL) {
if (pool->stop) {
break;
}
work->func(work->arg); work->func(work->arg);
free(work->arg);
free(work); free(work);
} }
pthread_mutex_lock(&(pool->work_mutex)); pthread_mutex_lock(&(pool->work_mutex));
pool->done_cnt++; if (work != NULL) {
pool->done_cnt++;
}
progress_bar_print((double)pool->done_cnt / pool->work_cnt, ScanCtx.stat_tn_size, ScanCtx.stat_index_size); progress_bar_print((double) pool->done_cnt / pool->work_cnt, ScanCtx.stat_tn_size, ScanCtx.stat_index_size);
if (pool->work_head == NULL) { if (pool->work_head == NULL) {
pthread_cond_signal(&(pool->working_cond)); pthread_cond_signal(&(pool->working_cond));
@@ -128,6 +138,7 @@ static void *tpool_worker(void *arg) {
pthread_mutex_unlock(&(pool->work_mutex)); pthread_mutex_unlock(&(pool->work_mutex));
} }
LOG_INFO("tpool.c", "Executing cleaup function")
pool->cleanup_func(); pool->cleanup_func();
pthread_cond_signal(&(pool->working_cond)); pthread_cond_signal(&(pool->working_cond));
@@ -136,17 +147,24 @@ static void *tpool_worker(void *arg) {
} }
void tpool_wait(tpool_t *pool) { void tpool_wait(tpool_t *pool) {
LOG_INFO("tpool.c", "Waiting for worker threads to finish")
pthread_mutex_lock(&(pool->work_mutex)); pthread_mutex_lock(&(pool->work_mutex));
while (1) { while (1) {
if (pool->done_cnt < pool->work_cnt) { if (pool->done_cnt < pool->work_cnt) {
pthread_cond_wait(&(pool->working_cond), &(pool->work_mutex)); pthread_cond_wait(&(pool->working_cond), &(pool->work_mutex));
} else { } else {
pool->stop = 1; usleep(500000);
break; if (pool->done_cnt == pool->work_cnt) {
pool->stop = 1;
usleep(1000000);
break;
}
} }
progress_bar_print(100.0, ScanCtx.stat_tn_size, ScanCtx.stat_index_size);
} }
progress_bar_print(1.0, ScanCtx.stat_tn_size, ScanCtx.stat_index_size);
pthread_mutex_unlock(&(pool->work_mutex)); pthread_mutex_unlock(&(pool->work_mutex));
LOG_INFO("tpool.c", "Worker threads finished")
} }
void tpool_destroy(tpool_t *pool) { void tpool_destroy(tpool_t *pool) {
@@ -154,6 +172,8 @@ void tpool_destroy(tpool_t *pool) {
return; return;
} }
LOG_INFO("tpool.c", "Destroying thread pool")
pthread_mutex_lock(&(pool->work_mutex)); pthread_mutex_lock(&(pool->work_mutex));
tpool_work_t *work = pool->work_head; tpool_work_t *work = pool->work_head;
while (work != NULL) { while (work != NULL) {
@@ -168,10 +188,13 @@ void tpool_destroy(tpool_t *pool) {
for (size_t i = 0; i < pool->thread_cnt; i++) { for (size_t i = 0; i < pool->thread_cnt; i++) {
pthread_t thread = pool->threads[i]; pthread_t thread = pool->threads[i];
if (thread != 0) { if (thread != 0) {
pthread_cancel(thread); void *_;
pthread_join(thread, &_);
} }
} }
LOG_INFO("tpool.c", "Final cleanup")
pthread_mutex_destroy(&(pool->work_mutex)); pthread_mutex_destroy(&(pool->work_mutex));
pthread_cond_destroy(&(pool->has_work_cond)); pthread_cond_destroy(&(pool->has_work_cond));
pthread_cond_destroy(&(pool->working_cond)); pthread_cond_destroy(&(pool->working_cond));
@@ -188,11 +211,11 @@ tpool_t *tpool_create(size_t thread_cnt, void cleanup_func()) {
tpool_t *pool = malloc(sizeof(tpool_t)); tpool_t *pool = malloc(sizeof(tpool_t));
pool->thread_cnt = thread_cnt; pool->thread_cnt = thread_cnt;
pool->work_cnt =0; pool->work_cnt = 0;
pool->done_cnt =0; pool->done_cnt = 0;
pool->stop = 0; pool->stop = 0;
pool->cleanup_func = cleanup_func; pool->cleanup_func = cleanup_func;
pool->threads = malloc(sizeof(pthread_t) * thread_cnt); pool->threads = calloc(sizeof(pthread_t), thread_cnt);
pthread_mutex_init(&(pool->work_mutex), NULL); pthread_mutex_init(&(pool->work_mutex), NULL);
@@ -202,11 +225,14 @@ tpool_t *tpool_create(size_t thread_cnt, void cleanup_func()) {
pool->work_head = NULL; pool->work_head = NULL;
pool->work_tail = NULL; pool->work_tail = NULL;
for (size_t i = 0; i < thread_cnt; i++) {
pthread_t thread = pool->threads[i];
pthread_create(&thread, NULL, tpool_worker, pool);
pthread_detach(thread);
}
return pool; return pool;
} }
void tpool_start(tpool_t *pool) {
LOG_INFOF("tpool.c", "Starting thread pool with %d threads", pool->thread_cnt)
for (size_t i = 0; i < pool->thread_cnt; i++) {
pthread_create(&pool->threads[i], NULL, tpool_worker, pool);
}
}

Some files were not shown because too many files have changed in this diff Show More