Compare commits

...

254 Commits

Author SHA1 Message Date
052df82373 Fix #83 2020-07-19 13:10:30 -04:00
5676136777 Remove println that was left accidentally 2020-07-18 20:55:12 -04:00
c061613302 Fix #76 2020-07-18 19:23:43 -04:00
d0325fd9b9 Fix for simon987/sist2#85 2020-07-18 18:48:54 -04:00
e05a6f3863 Fix for #75 2020-07-18 18:46:52 -04:00
f1690a9cca Mobi build fix 2020-07-18 13:10:45 -04:00
100a264413 Don't show MuPDF warnings unless --very-verbose is specified 2020-07-18 10:28:05 -04:00
29390bb454 Update README 2020-07-18 09:54:36 -04:00
4d43036ded Fix simon987/sist2#78 2020-07-18 09:41:39 -04:00
0b5cdbd130 Fix #79 2020-07-18 09:36:10 -04:00
53d7695f66 Read .raw thumbnails #80, fix media probing for some formats 2020-07-18 09:31:42 -04:00
8d53456404 fix libscan submodule 2020-07-17 20:33:50 -04:00
cbc08a7cc9 Save ebook renders as jpeg 2020-07-17 20:18:21 -04:00
e629b4d7d3 Faster comic book parsing, probably fixes #77 2020-07-17 19:10:18 -04:00
22f7073b39 mobi reading bugfix 2020-07-16 20:30:28 -04:00
1781a74960 Oops I didn't mean to push this 2020-07-16 19:23:52 -04:00
db96c95ac7 log fix #73 2020-07-16 19:19:23 -04:00
7b9fa4cc0a Fix bad merge... 2020-07-15 21:00:51 -04:00
5cc1fa86a9 Read embedded thumbnail simon987/sist2#74 2020-07-15 20:56:25 -04:00
649689ce30 Remove warning when generating stats 2020-07-15 20:41:38 -04:00
c8536f65a8 Fix memory leak in index 2020-07-15 20:41:09 -04:00
75b5e249c1 Merge pull request #72 from dpieski/patch-1
Update USAGE.md
2020-07-15 14:37:28 -04:00
Andrew
f49e03ac79 Update USAGE.md
added example for Windows to display number of logical processors. 
Does this same limitation apply to the new `index` threads option?
2020-07-15 13:21:02 -05:00
a6d2afc8dc Merge pull request #71 from simon987/web-tag
Web tag
2020-07-14 20:23:22 -04:00
8f8f66ba05 Update README.md 2020-07-14 20:22:03 -04:00
1d9fcf7105 Manual tagging 2020-07-13 19:18:07 -04:00
8127745f2b wip 2020-07-13 19:16:51 -04:00
230988d6d1 frontend tags 2020-07-13 19:15:59 -04:00
13f4dbed2d Handle 429, multi-threaded index module 2020-07-11 17:42:46 -04:00
ed15e89f45 Fix exec-script --es-url not being passed 2020-06-28 12:41:09 -04:00
c636d3d921 Set number_of_replicas to 0 by default in elasticsearch 2020-06-26 18:10:51 -04:00
7e92d4b7d1 refresh index only if user script is ran 2020-06-25 20:48:47 -04:00
8ffe780ab2 Tag tree fix for #64, validate required argument in exec-script 2020-06-25 20:11:30 -04:00
d3c8928fe8 Update readme 2020-06-24 21:06:27 -04:00
d9f628fca4 Build fix 2020-06-21 16:53:22 -04:00
68289268c1 Add exif tag 2020-06-21 16:51:14 -04:00
649c50c465 Update README.md 2020-06-21 14:35:18 -04:00
7b49a0dc49 Build fix 2020-06-21 12:56:13 -04:00
eb559b53aa RAW picture file support 2020-06-21 10:46:11 -04:00
6d01f9c0df whoops 2020-06-19 22:12:19 -04:00
e724fec668 Fix web return codes 2020-06-19 21:41:17 -04:00
fe5e93b300 Update USAGE.md 2020-06-19 21:29:09 -04:00
ecad85fd7d version bump 2020-06-19 21:10:03 -04:00
74cc898259 Fix tag display issue 2020-06-19 21:07:19 -04:00
dc2e4443c4 Add exec-script command 2020-06-19 21:07:19 -04:00
1a64431b52 Merge pull request #63 from dpieski/patch-3
Correct typos in example
2020-06-19 18:26:10 -04:00
Andrew
9bad515e06 Correct typos in example
Correct typos in examples.
2020-06-19 17:22:02 -05:00
648559cedb Update README.md 2020-06-17 13:25:20 -04:00
3e6cd9cd5c Merge pull request #60 from dpieski/patch-2
update Usage.md
2020-06-17 13:04:46 -04:00
f249992798 Update scripting.md 2020-06-17 13:00:07 -04:00
Andrew
e9645ecdaa update Usage.md
Fixing a link.
2020-06-17 10:58:25 -05:00
046edea0e2 Handle special characters in file paths 2020-06-10 19:45:36 -04:00
a011b7e97b Fragment size setting 2020-06-09 21:40:53 -04:00
8c1c1697e0 Fix file wordexp in some paths #59 2020-06-05 19:41:02 -04:00
018b49fa4c Fix csv_escape #58 2020-06-05 19:13:03 -04:00
27b4e6403e Re-enable path autocomplete #54 2020-06-02 19:46:58 -04:00
13fdbd9e69 Fix for ES 7.7 #54 2020-06-01 18:14:34 -04:00
5e7fdaf8dd Update issue-template.md 2020-06-01 10:45:43 -04:00
19d5c8ac9f Update issue-template.md 2020-05-29 18:19:21 -04:00
99497049a8 Merge pull request #53 from dpieski/patch-1
Update README
2020-05-29 18:16:13 -04:00
Andrew
1a3181d78b Update README
changed case of path in a link to the usage guide to fix 404 error.
2020-05-29 15:37:20 -05:00
449aa77c8f Fix for unknown mime inside archives 2020-05-25 17:36:04 -04:00
3058c55510 Memory leak fix #37 2020-05-24 15:42:42 -04:00
dedf9287b2 Fix name separation in --archive list mode 2020-05-24 14:36:59 -04:00
ab199b0c0c Remove arc_reset() function because seek() inside archive doesn't work 2020-05-24 14:18:31 -04:00
c4fbae123e Better support for media files inside archives 2020-05-24 14:10:23 -04:00
dd2397ef5c handle .tgz #44, ignore files inside archives for stats page 2020-05-24 10:10:28 -04:00
ee0f71f4d3 fix compile warning 2020-05-17 15:00:56 -04:00
0bbb96b149 Merge pull request #51 from simon987/stats
Stats page
2020-05-17 14:49:28 -04:00
78f6e16701 image 2020-05-17 12:47:45 -04:00
4625bca9a9 stats 2020-05-17 12:47:02 -04:00
f2ae653886 Revert "wip"
This reverts commit 5686bc86
2020-05-16 08:16:49 -04:00
5686bc864d wip 2020-05-13 17:37:40 -04:00
cf513b4ad8 Escape invalid UTF8 characters simon987/sist2#44, increase magic buffer size 2020-05-12 19:28:02 -04:00
013423424e UTF-8 fix attempt w/ libarchive (#44) 2020-05-10 19:52:42 -04:00
16514fd6b0 Option to search in path #49 2020-05-09 22:00:22 -04:00
27509f97e1 Update USAGE.md 2020-05-08 19:08:46 -04:00
4c540eae1c Update USAGE.md 2020-05-08 19:07:45 -04:00
d2b53ff6fc Update README.md 2020-05-08 18:32:32 -04:00
0ef4292abf Fix duplicate tag problem (simon987/sist2#48) 2020-05-05 20:20:10 -04:00
e6fde38c24 Load defaults when LocalStorage is outdated 2020-05-03 08:13:25 -04:00
5fa343d40f fix version typo 2020-05-03 08:10:28 -04:00
7ee1374802 oops 2020-04-30 21:21:48 -04:00
bd9e56829c Support for markup files 2020-04-30 20:21:09 -04:00
718169345e gzip artifacts in CI 2020-04-21 19:34:46 -04:00
5a6aa763ca build fix 2020-04-21 18:50:32 -04:00
695d9abd83 revert debug hard-coded listen address 2020-04-21 15:52:35 -04:00
e436af7b2a 2.0 (#46)
* extract scan code to libscan, (wip)

* submodules

* replace curl with mongoose (wip)

* replace onion with mongoose (wip)

* replace onion with mongoose (wip)

* It compiles! (I think)

* Update readme

* Entirely remove libonion (WIP)

* unscramble submodules

* recover screenshot

* Update mappings

* Bug fixes

* update

* media meta fix

* memory fixes

* More bug fixes...

* Bug fix w/ libmagic & vfile

* libmagic fix (again)

* Better lightbox, better video handler, random reloads fix

* Use svg for info icon

* re-enable http auth

* mobi support #41, fix logs

* Update README & cleanup
2020-04-21 14:42:20 -04:00
4501a7810f Update issue-template.md 2020-04-11 07:33:33 -04:00
simon987
e36761fa6a Update issue templates 2020-04-11 07:28:48 -04:00
fe53b79d56 Fix warnings 2020-03-25 08:18:59 -04:00
09615bbed6 Update dependencies 2020-03-24 14:30:23 -04:00
a2be9b955c Fix build errors 2020-03-24 11:49:13 -04:00
9298bd2d9d CI fix... 2020-03-24 10:09:33 -04:00
317034ba21 teamcity automation attempt 2020-03-24 10:01:27 -04:00
0505303503 text_buffer bug fixes & Sort option 2020-03-20 20:54:22 -04:00
6e5772f13b Errors cleanup 2020-03-20 10:05:10 -04:00
ccccdb3b78 Fixes #38 2020-03-13 16:35:11 -04:00
12d17acf4f UI fixes 2020-03-06 12:27:38 -05:00
48b56cdb7b I forgot to commit this somehow 2020-03-06 10:32:05 -05:00
048f707f80 Fix buffer overflow in json parse function (index module) 2020-03-06 10:17:21 -05:00
98e0a5fd64 Update CI script 2020-03-06 09:41:33 -05:00
740a49a09f version bump 2020-03-06 09:36:46 -05:00
81be662574 (breaking) update mime list 2020-03-06 09:36:21 -05:00
02fa3f02f5 Fix memory leak with virtual files in parse.c 2020-03-06 09:36:07 -05:00
cfdd7bdd87 Fix memory leak in font.c 2020-03-06 09:35:19 -05:00
7ceb645926 hotfix invalid read in text_buffer 2020-03-06 09:34:41 -05:00
7d0091f647 whoops 2020-03-05 21:54:56 -05:00
b3cd630399 Update README.md 2020-03-05 19:42:06 -05:00
5f7a1acfe3 Merge pull request #36 from simon987/wip-doc
Wip doc
2020-03-05 18:43:56 -05:00
513a21cca2 Undo debug stuff 2020-03-05 18:42:51 -05:00
04dbfb23ab Cleanup warnings 2020-03-05 16:53:30 -05:00
1abddabeec Rewrite doc.c module, fix bad error handling, fix pdf.c memory leaks 2020-03-05 16:12:34 -05:00
9ace5774af Update dependencies 2020-03-05 16:10:45 -05:00
eab6101cf7 make --fast faster 2020-03-05 12:26:43 -05:00
d7cbd5d2b6 wip doc rewrite 2020-03-05 09:13:37 -05:00
641edf2715 Prettier warning messages in main.c 2020-03-04 17:57:49 -05:00
7efb4957bf inline text/util functions 2020-03-04 17:50:31 -05:00
9ae77fdedb Fix css glitch 2020-03-03 16:51:01 -05:00
98c40901ed Disallow incremental scan when version does not match (#33) 2020-03-03 16:36:07 -05:00
363375d5da version bump 2020-03-03 16:25:41 -05:00
149de95d88 (breaking) Upgrade path filter bar 2020-03-03 16:24:24 -05:00
e5bb4856d2 (breaking) Set item depth in ingest pipeline 2020-03-02 17:39:25 -05:00
d78994d427 Ignore --incremental option when the directory does not exist (#31) 2020-03-01 21:16:50 -05:00
f2d68d54df Update README.md 2020-03-01 13:55:08 -05:00
e03625838b Settings menu (#30) and UI tweaks 2020-02-29 19:26:09 -05:00
86840b46f4 Version bump 2020-02-27 09:47:06 -05:00
e57f9916eb Rewrite documentation 2020-02-27 09:45:14 -05:00
565ba6ee76 Fix for #29 2020-02-27 09:44:19 -05:00
d83fc2c373 Fix docker build for 1.2.15 2020-02-27 09:42:18 -05:00
d4da28249e --fast option #27 2020-02-22 18:37:08 -05:00
483a454c8d --exclude argument #26 2020-02-22 16:55:35 -05:00
018ac86640 fix build... 2020-02-22 13:20:41 -05:00
398f1aead4 Support for cbr documents 2020-02-22 13:11:19 -05:00
d19a75926b Fix invalid read in terminate_string() 2020-02-22 13:10:40 -05:00
1ac8b40e3d Code style 2020-02-22 09:02:59 -05:00
a8505cb8c1 Fix for #28 2020-02-20 16:42:13 -05:00
ae8652d86e UI tweaks, search syntax (#25) 2020-02-16 15:24:29 -05:00
849beb09d8 hotfix 2020-02-15 19:33:18 -05:00
e1aaaee617 UI tweak 2020-02-15 09:30:14 -05:00
c02b940945 (I forgot to commit this) 2020-02-14 20:58:10 -05:00
2934ddb07f Add image viewer (#2) 2020-02-14 18:28:55 -05:00
7f6f3c02fa OCR tweaks 2020-02-11 21:13:47 -05:00
7f98d5a682 Fix buffer overflow (whoops) 2020-02-09 18:11:29 -05:00
7eb9c5d7d5 Fix web/index issue with NULL mime types 2020-02-09 17:23:49 -05:00
184439aa38 increase minimum image size for OCR 2020-02-09 14:06:59 -05:00
1ce8b298a1 Display EXIF tags on document info panel, remove march=native on openjp 2020-02-09 13:21:19 -05:00
75f99025d9 add exif dateTime, allow some special characters in text meta 2020-02-09 08:47:13 -05:00
ebe852bd5a Fix rewrite-url arg 2020-02-09 08:23:17 -05:00
402b103c49 Fix total count for ES 7.5 2020-02-08 09:25:00 -05:00
e9b6e1cdc2 Turn off auto optimisation in libtesseract build 2020-02-08 08:32:04 -05:00
ed1ce8ab5e Handle XML errors #18 2020-02-07 10:08:01 -05:00
d1fa4febc4 Improve scroll feature, UI fix 2020-02-07 10:08:01 -05:00
048c55df7b Update README.md 2020-02-06 19:56:29 -05:00
f77bc6a025 Update README.md 2020-02-06 19:55:32 -05:00
efdde2734e version bump 2020-02-06 19:28:05 -05:00
66658fa8f7 Remove trailing/leading white space in text meta fields 2020-02-06 19:27:30 -05:00
df41c251e4 (Breaking!) Add some exif tags 2020-02-06 19:21:50 -05:00
3282ab56ba Version bump 2020-02-02 09:26:54 -05:00
8300838d30 Suppress XML parsing errors (#18) 2020-02-02 09:26:03 -05:00
c9870a6d3d Remove -march=native for release build... 2020-02-02 09:03:06 -05:00
a143cc4fcf bundle openssl... 2020-02-02 08:39:20 -05:00
9ef1f3781d fix attempt for #11 2020-02-01 20:04:26 -05:00
bbee8aa721 tesseract ocr path fix 2020-02-01 20:03:59 -05:00
d22f83c797 curl fix 2020-02-01 15:22:43 -05:00
50615486a4 curl fix attempt 2020-02-01 14:42:42 -05:00
ca79e4f797 add /status endpoint 2020-01-28 10:18:37 -05:00
6a9fd08a80 Merge pull request #21 from simon987/wip-20
Fixes #20
2020-01-27 09:16:00 -05:00
cab890dc9b #20 wip 2020-01-27 09:09:42 -05:00
b3c4faf2df Update README.md 2020-01-26 12:37:13 -05:00
353937171a Update README.md 2020-01-20 15:54:53 -05:00
c80002bea4 Bundle libcurl attempt 2 2020-01-18 11:53:12 -05:00
56adee9d81 Bundle libcurl, libopc bugfix #18 2020-01-18 10:25:02 -05:00
d6493d6d5f Bundle libpng 2020-01-16 16:21:38 -05:00
0967e9676d remove static build in CI... 2020-01-16 15:45:18 -05:00
487e998ea0 Display error message on /d/ error 2020-01-16 15:04:50 -05:00
919f45c79c Document info modal #19 2020-01-16 14:37:19 -05:00
d42129cfcb CI fix attempt 2020-01-15 20:11:45 -05:00
754983e34a Minor cleanup 2020-01-15 18:16:06 -05:00
7c8a3e2f9d Support for external json indices 2020-01-14 15:44:31 -05:00
3bb24b4453 Use bundled libtiff 2020-01-14 12:21:26 -05:00
9a56b959d3 Fix build problems... 2020-01-14 10:55:02 -05:00
5e3a2dbcc2 Update README 2020-01-14 10:47:00 -05:00
573f94f24e OCR support, remove static build 2020-01-14 10:26:40 -05:00
f5db78a69f Ignore special ascii chars, strip binary in docker build 2020-01-12 10:59:17 -05:00
5a2820d339 UI tweak auto-select based on query args 2020-01-11 17:48:51 -05:00
b7f13f425c Fix memory leaks (whoops) 2020-01-11 17:34:34 -05:00
d1a2f9b1d5 Strip binary (CI) 2020-01-07 14:32:39 -05:00
71f17986db build settings 2020-01-06 21:34:41 -05:00
acdd2fb3c1 Use bundled ffmpeg libraries 2020-01-06 16:25:34 -05:00
0cda6c00e1 CI attempt 2020-01-03 20:21:07 -05:00
14d0e5a1e1 possible fix for #18 2019-12-28 14:32:42 -05:00
0d06d39281 Path in list view #16 2019-12-28 14:32:05 -05:00
80708ca636 Merge pull request #17 from dpieski/patch-1
maybe a typo in cli.c
2019-12-23 18:33:28 -05:00
Andrew
43b7b40dc4 maybe a typo in cli.c
possibly corrected a typo
2019-12-23 13:18:18 -06:00
d051f541e2 Show client error on ES connection failure, fixes #13 2019-12-21 20:52:53 -05:00
0eefbac7b4 Update libopc. should fix #14 2019-12-21 19:43:33 -05:00
663f8e21c1 Better logging, fixes #15 2019-12-21 12:32:08 -05:00
80fbcb2a01 empty docx bugfix 2019-12-19 17:26:11 -05:00
8451109ecd OOXML files support 2019-12-19 16:53:18 -05:00
d6fe61cfdc Clarify help string for es url #12 2019-12-19 16:52:22 -05:00
254094130f Fix submodules 2019-12-13 12:35:39 -05:00
eaaa75c04c Fix submodules 2019-12-13 11:24:17 -05:00
bb87f4270f Update docker script 2019-12-13 11:16:17 -05:00
be23201210 Archive file support 2019-12-13 10:53:51 -05:00
9778acda77 uifix 2019-12-12 19:19:53 -05:00
8d187926d9 Bugfix with incremental comparison 2019-12-12 15:41:31 -05:00
88c37e3523 Update README.md 2019-12-04 20:56:52 -05:00
d816dae8b3 UI fix, disable thumbnail option, batch index size option 2019-12-01 10:57:29 -05:00
4346c3e063 Also use static libraries in sist2 build 2019-11-30 20:02:26 -05:00
1a1032a8a7 Cleaner shutdown 2019-11-30 19:59:11 -05:00
4ab2ba1a02 #8 Skip PDF scan when content-size is 0 2019-11-21 16:06:31 -05:00
d089601dc5 Add sfv & m3u 2019-11-20 12:31:31 -05:00
11df6cc88f Add nfo to ext list 2019-11-20 11:41:50 -05:00
373ac01e4e Fix for #3 and maximum scan depth 2019-11-19 11:23:30 -05:00
893ff145c5 List mode tweak 2019-11-17 16:28:47 -05:00
6111ded77f Merge pull request #6 from simon987/wip
List mode #5
2019-11-17 16:15:36 -05:00
34cc26b2fd List mode #5 wip 2019-11-17 15:03:24 -05:00
204034d859 Add basic auth. Fixes #4 2019-11-17 10:00:17 -05:00
16ccc6c0d3 Show error message on elasticsearch connection fail 2019-11-17 09:55:16 -05:00
94c617fdc3 Bug fix 2019-11-12 22:11:50 -05:00
ebfd7e03ce User scripts, bug fixes, docker image 2019-11-12 20:58:43 -05:00
6931d320a2 bugfix with invalid/corrupted index path 2019-11-11 20:49:38 -05:00
fc22e52eae Image placeholder 2019-11-09 23:26:49 -05:00
ba81748a74 Update build 2019-11-09 17:15:20 -05:00
e72fa1587b EXIF metadata for images 2019-11-09 15:18:44 -05:00
ea4fb7fa0d Bug fixes 2019-11-09 12:00:07 -05:00
b0a868bb73 remove 'must match' 2019-11-08 21:46:54 -05:00
d761a3b595 update readme 2019-11-08 19:42:36 -05:00
2d7a8a2fdc fuzzy toggle 2019-11-08 16:15:10 -05:00
152d2ddf8a bug fix in deserialize 2019-11-08 09:03:44 -05:00
bc5f22b759 update readme 2019-11-05 18:59:00 -05:00
534b397876 update readme, UI tweak: don't show broken images 2019-11-03 10:39:02 -05:00
7962a994e2 utf8 update + bug fixes 2019-11-03 07:50:31 -05:00
f8f1a27180 video metadata 2019-10-31 11:54:13 -04:00
784c3c9435 Font rendering fixes 2019-10-31 10:15:01 -04:00
f8b081a3f4 UI tweaks, path autocomplete 2019-10-31 08:26:19 -04:00
5661573b06 Dark theme, pdf meta, de-serialize bugfix 2019-10-30 22:20:22 -04:00
130fb78787 Fix some memory leaks 2019-10-27 15:40:48 -04:00
2943ca9365 UI tweak 2019-10-27 14:10:24 -04:00
7234c22d2f epub fix 2019-10-27 14:00:52 -04:00
bdbd7ca7ed cbz fix 2019-10-27 13:33:55 -04:00
9b7c56a608 Static build (scan only) 2019-10-27 12:25:34 -04:00
4109ba6d34 Fix files with # character in url redirect 2019-10-27 08:30:47 -04:00
69f0c1f2cf do 'should' search if search bar is empty 2019-10-26 21:56:03 -04:00
c063d87232 Fix bug with files with multiple video or audio streams 2019-10-26 21:12:51 -04:00
f44e6336dc Deserialize typo 2019-10-26 20:54:46 -04:00
7be6234f0d Add preloader (UI Tweak) 2019-10-26 20:49:50 -04:00
85ab2858f6 Fix UI bugs 2019-10-26 20:28:29 -04:00
cbb043f03f Fix elasticsearch 6 bug 2019-10-26 20:18:58 -04:00
50fcec25f7 Fix flaky mime table generation 2019-10-26 19:53:41 -04:00
b4199a1fd8 fix for elasticsearch 2019-10-26 19:09:26 -04:00
ef79681024 fix for elasticsearch 6.8 2019-10-26 17:58:52 -04:00
0d4deb3eb3 Update readme 2019-10-26 17:40:06 -04:00
113 changed files with 8893 additions and 3326 deletions

View File

@@ -0,0 +1,18 @@
---
name: Issue template
about: General
title: ''
labels: ''
assignees: ''
---
sist2 version:
Platform (Linux or Docker):
Elasticsearch version:
Command with arguments: `ex: "scan ~/Documents -o ./i2 --threads 3 -q 1.0`
If the issue is related to the `scan` module, please attach the files necessary to reproduce the error or email them to me[at]simon987.net.

4
.gitignore vendored
View File

@@ -11,7 +11,9 @@ Makefile
LOG
sist2*
index.sist2/
bundle.css
bundle*.css
bundle.js
*.a
vgcore.*
build/
third-party/

22
.gitmodules vendored
View File

@@ -1,18 +1,6 @@
[submodule "argparse"]
path = argparse
[submodule "third-party/libscan"]
path = third-party/libscan
url = https://github.com/simon987/libscan
[submodule "third-party/argparse"]
path = third-party/argparse
url = https://github.com/cofyc/argparse
[submodule "cJSON"]
path = cJSON
url = https://github.com/DaveGamble/cJSON
[submodule "lib/mupdf"]
path = lib/mupdf
url = git://git.ghostscript.com/mupdf.git
[submodule "lib/onion"]
path = lib/onion
url = https://github.com/davidmoreno/onion
[submodule "lib/ffmpeg"]
path = lib/ffmpeg
url = https://git.ffmpeg.org/ffmpeg.git
[submodule "lmdb"]
path = lmdb
url = https://github.com/LMDB/lmdb

69
.teamcity/settings.kts vendored Normal file
View File

@@ -0,0 +1,69 @@
import jetbrains.buildServer.configs.kotlin.v2019_2.*
import jetbrains.buildServer.configs.kotlin.v2019_2.buildSteps.ExecBuildStep
import jetbrains.buildServer.configs.kotlin.v2019_2.buildSteps.exec
import jetbrains.buildServer.configs.kotlin.v2019_2.triggers.vcs
import jetbrains.buildServer.configs.kotlin.v2019_2.vcs.GitVcsRoot
/*
The settings script is an entry point for defining a TeamCity
project hierarchy. The script should contain a single call to the
project() function with a Project instance or an init function as
an argument.
VcsRoots, BuildTypes, Templates, and subprojects can be
registered inside the project using the vcsRoot(), buildType(),
template(), and subProject() methods respectively.
To debug settings scripts in command-line, run the
mvnDebug org.jetbrains.teamcity:teamcity-configs-maven-plugin:generate
command and attach your debugger to the port 8000.
To debug in IntelliJ Idea, open the 'Maven Projects' tool window (View
-> Tool Windows -> Maven Projects), find the generate task node
(Plugins -> teamcity-configs -> teamcity-configs:generate), the
'Debug' option is available in the context menu for the task.
*/
version = "2019.2"
project {
vcsRoot(HttpsGithubComSimon987sist2refsHeadsMaster)
buildType(Build)
}
object Build : BuildType({
name = "Build"
artifactRules = """
sist2
sist2_scan
""".trimIndent()
vcs {
root(HttpsGithubComSimon987sist2refsHeadsMaster)
}
steps {
exec {
name = "Build"
path = "./ci/build.sh"
dockerImage = "simon987/general_ci"
dockerImagePlatform = ExecBuildStep.ImagePlatform.Linux
dockerPull = true
}
}
triggers {
vcs {
}
}
})
object HttpsGithubComSimon987sist2refsHeadsMaster : GitVcsRoot({
name = "https://github.com/simon987/sist2#refs/heads/master"
url = "https://github.com/simon987/sist2"
})

View File

@@ -2,126 +2,123 @@ cmake_minimum_required(VERSION 3.7)
set(CMAKE_C_STANDARD 11)
project(sist2 C)
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}/CMakeModules")
option(SIST_DEBUG "Build a debug executable" on)
add_subdirectory(third-party/libscan)
set(ARGPARSE_SHARED off)
add_subdirectory(third-party/argparse)
add_executable(
sist2
src/main.c
src/sist.h
src/io/walk.h src/io/walk.c
src/parsing/media.h src/parsing/media.c
src/parsing/pdf.h src/parsing/pdf.c
src/io/store.h src/io/store.c
src/tpool.h src/tpool.c
src/parsing/parse.h src/parsing/parse.c
src/io/serialize.h src/io/serialize.c
src/parsing/mime.h src/parsing/mime.c src/parsing/mime_generated.c
src/parsing/text.h src/parsing/text.c
src/index/web.c src/index/web.h
src/web/serve.c src/web/serve.h
src/index/elastic.c src/index/elastic.h
src/util.c src/util.h
src/ctx.h src/types.h src/parsing/font.c src/parsing/font.h
src/ctx.h src/types.h
src/log.c src/log.h
# argparse
argparse/argparse.h argparse/argparse.c
third-party/argparse/argparse.h third-party/argparse/argparse.c
# cJSON
cJSON/cJSON.h cJSON/cJSON.c
src/cli.c src/cli.h
src/stats.c src/stats.h src/ctx.c)
# LMDB
lmdb/libraries/liblmdb/lmdb.h lmdb/libraries/liblmdb/mdb.c
lmdb/libraries/liblmdb/midl.h lmdb/libraries/liblmdb/midl.c
src/cli.c src/cli.h)
target_link_directories(sist2 PRIVATE BEFORE ${_VCPKG_INSTALLED_DIR}/${VCPKG_TARGET_TRIPLET}/lib/)
set(CMAKE_FIND_LIBRARY_SUFFIXES .a .lib)
find_package(PkgConfig REQUIRED)
set(ENV{PKG_CONFIG_PATH} "$ENV{PKG_CONFIG_PATH}:/usr/local/lib/pkgconfig/")
find_package(lmdb CONFIG REQUIRED)
find_package(cJSON CONFIG REQUIRED)
find_package(unofficial-glib CONFIG REQUIRED)
find_package(unofficial-mongoose CONFIG REQUIRED)
find_library(UUID_LIB NAMES uuid)
find_package(CURL CONFIG REQUIRED)
find_package(LibMagic REQUIRED)
find_package(FFmpeg REQUIRED)
find_package(OpenSSL REQUIRED)
find_package(Freetype REQUIRED)
#find_package(OpenSSL REQUIRED)
pkg_check_modules(GLIB REQUIRED glib-2.0)
pkg_check_modules(GOBJECT REQUIRED gobject-2.0)
pkg_check_modules(UUID REQUIRED uuid)
include_directories(${LIBMAGIC_INCLUDE_DIRS})
link_directories(${LIBMAGIC_LIBRARY_DIRS})
add_definitions(${LIBMAGIC_CFLAGS_OTHER})
link_directories(${UUID_LIBRARY_DIRS})
include_directories(${UUID_INCLUDE_DIRS})
add_definitions(${UUID_CFLAGS_OTHER})
include_directories(${GLIB_INCLUDE_DIRS})
link_directories(${GLIB_LIBRARY_DIRS})
add_definitions(${GLIB_CFLAGS_OTHER})
include_directories(${GOBJECT_INCLUDE_DIRS})
link_directories(${GOBJECT_LIBRARY_DIRS})
add_definitions(${GOBJECT_CFLAGS_OTHER})
link_directories(${FFMPEG_LIBRARY_DIRS})
include_directories(${FFMPEG_INCLUDE_DIRS})
include_directories(${OPENSSL_INCLUDE_DIR})
link_directories(${OPENSSL_CRYPTO_LIBRARY})
list(REMOVE_ITEM GLIB_LIBRARIES pcre)
list(REMOVE_ITEM GOBJECT_LIBRARIES pcre)
list(REMOVE_ITEM UUID_LIBRARIES pcre)
include_directories(${FREETYPE_INCLUDE_DIRS})
add_definitions(${FREETYPE_CFLAGS_OTHER})
include_directories(
${PROJECT_SOURCE_DIR}/
${PROJECT_SOURCE_DIR}/lmdb/libraries/liblmdb/
${PROJECT_SOURCE_DIR}/lib/onion/src/
${PROJECT_SOURCE_DIR}/lib/mupdf/include/
target_include_directories(
sist2 PUBLIC
${CMAKE_SOURCE_DIR}/third-party/onion/src/
${CMAKE_SOURCE_DIR}/third-party/utf8.h/
${CMAKE_SOURCE_DIR}/third-party/libscan/
${CMAKE_SOURCE_DIR}/
)
target_compile_options(sist2
target_compile_options(
sist2
PRIVATE
-O3
# -march=native
-fno-stack-protector
-fomit-frame-pointer
)
-fPIC
)
TARGET_LINK_LIBRARIES(
if (SIST_DEBUG)
target_compile_options(
sist2
PRIVATE
-g
-fstack-protector
-fno-omit-frame-pointer
-fsanitize=address
-O2
)
target_link_options(
sist2
PRIVATE
-fsanitize=address
# -static
)
set_target_properties(
sist2
PROPERTIES
OUTPUT_NAME sist2_debug
)
else ()
# set(VCPKG_BUILD_TYPE release)
target_compile_options(
sist2
PRIVATE
-Ofast
-fno-stack-protector
-fomit-frame-pointer
)
endif ()
add_dependencies(
sist2
scan
argparse
)
target_link_libraries(
sist2
${GLIB_LIBRARIES}
${GOBJECT_LIBRARIES}
${UUID_LIBRARIES}
# ffmpeg
# ${PROJECT_SOURCE_DIR}/lib/libavcodec.a
# ${PROJECT_SOURCE_DIR}/lib/libavformat.a
# ${PROJECT_SOURCE_DIR}/lib/libavutil.a
# ${PROJECT_SOURCE_DIR}/lib/libswscale.a
# ${PROJECT_SOURCE_DIR}/lib/libswresample.a
${FFMPEG_LIBRARIES}
swscale
# mupdf
${PROJECT_SOURCE_DIR}/lib/libmupdf.a
${PROJECT_SOURCE_DIR}/lib/libmupdf-third.a
# onion
${PROJECT_SOURCE_DIR}/lib/libonion_static.a
z
lmdb
cjson
argparse
unofficial::glib::glib
unofficial::mongoose::mongoose
# OpenSSL::SSL OpenSSL::Crypto
CURL::libcurl
${UUID_LIB}
pthread
curl
m
bz2
magic
scan
)
add_custom_target(
before_sist2
COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/scripts/before_build.sh
)
add_dependencies(sist2 before_sist2)

22
Docker/Dockerfile Normal file
View File

@@ -0,0 +1,22 @@
FROM ubuntu:19.10
MAINTAINER simon987 <me@simon987.net>
RUN apt update
RUN apt install -y libglib2.0-0 libcurl4 libmagic1 libharfbuzz-bin libopenjp2-7 libarchive13 liblzma5 libzstd1 liblz4-1 \
curl libtiff5 libpng16-16 libpcre3
RUN mkdir -p /usr/share/tessdata && \
cd /usr/share/tessdata/ && \
curl -o /usr/share/tessdata/hin.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/hin.traineddata &&\
curl -o /usr/share/tessdata/jpn.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/jpn.traineddata &&\
curl -o /usr/share/tessdata/eng.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/eng.traineddata &&\
curl -o /usr/share/tessdata/fra.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/fra.traineddata &&\
curl -o /usr/share/tessdata/rus.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/rus.traineddata &&\
curl -o /usr/share/tessdata/spa.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/spa.traineddata && ls -lh
ADD sist2 /root/sist2
ENV LANG C.UTF-8
ENV LC_ALL C.UTF-8
ENTRYPOINT ["/root/sist2"]

14
Docker/build.sh Executable file
View File

@@ -0,0 +1,14 @@
rm ./sist2 sist2_debug
cp ../sist2.gz .
gzip -d sist2.gz
strip sist2
version=$(./sist2 --version)
echo "Version ${version}"
docker build . -t simon987/sist2:${version} -t simon987/sist2:latest
docker push simon987/sist2:${version}
docker push simon987/sist2:latest
docker run --rm simon987/sist2 -v

146
README.md
View File

@@ -1,85 +1,135 @@
![GitHub](https://img.shields.io/github/license/simon987/sist2.svg)
[![CodeFactor](https://www.codefactor.io/repository/github/simon987/sist2/badge?s=05daa325188aac4eae32c786f3d9cf4e0593f822)](https://www.codefactor.io/repository/github/simon987/sist2)
[![Development snapshots](https://ci.simon987.net/app/rest/builds/buildType(Sist2_Build)/statusIcon)](https://files.simon987.net/artifacts/Sist2/Build/)
# sist2
sist2 (Simple incremental search tool)
*Warning: sist2 is in early development*
![sist2.png](docs/sist2.png)
## Features
* Fast, low memory usage, multi-threaded
* Mobile-friendly Web interface
* Portable (all its features are packaged in a single executable)
* Extracts text and metadata from common file types \*
* Generates thumbnails \*
* Incremental scanning
* Manual tagging from the UI and automatic tagging based on file attributes via [user scripts](docs/scripting.md)
* Recursive scan inside archive files \*\*
* OCR support with tesseract \*\*\*
* Stats page & disk utilisation visualization
\* See [format support](#format-support)
\*\* See [Archive files](#archive-files)
\*\*\* See [OCR](#ocr)
![stats](docs/stats.png)
## Getting Started
1. Have an Elasticsearch (>= 6.X.X) instance running
1. Download [from official website](https://www.elastic.co/downloads/elasticsearch)
1. *(or)* Run using docker:
```bash
docker run -d --name es1 --net sist2_net -p 9200:9200 \
-e "discovery.type=single-node" elasticsearch:7.5.2
```
1. *(or)* Run using docker-compose:
```yaml
elasticsearch:
image: docker.elastic.co/elasticsearch/elasticsearch:7.5.2
environment:
- discovery.type=single-node
- "ES_JAVA_OPTS=-Xms1G -Xmx2G"
```
1. Download sist2 executable
1. Download the [latest sist2 release](https://github.com/simon987/sist2/releases) *
1. *(or)* Download a [development snapshot](https://files.simon987.net/artifacts/Sist2/Build/) *(Not recommended!)*
1. *(or)* `docker pull simon987/sist2:latest`
1. See [Usage guide](docs/USAGE.md)
\* *Windows users*: **sist2** runs under [WSL](https://en.wikipedia.org/wiki/Windows_Subsystem_for_Linux)
## Example usage
See help page `sist2 --help` for more details.
See [Usage guide](docs/USAGE.md) for more details
**Scan a directory**
```bash
sist2 scan ~/Documents -o ./orig_idx/
sist2 scan --threads 4 --content-size 16384 /mnt/Pictures
sist2 scan -i ./orig_idx/ -o ./updated_idx/ ~/Documents
```
1. Scan a directory: `sist2 scan ~/Documents -o ./docs_idx`
1. Push index to Elasticsearch: `sist2 index ./docs_idx`
1. Start web interface: `sist2 web ./docs_idx`
**Push index to Elasticsearch or file**
```bash
sist2 index --force-reset ./my_idx
sist2 index --print ./my_idx > raw_documents.ndjson
```
**Start web interface**
```bash
sist2 web --bind 0.0.0.0 --port 4321 ./my_idx1 ./my_idx2 ./my_idx3
```
## Format support
File type | Library | Content | Thumbnail | Metadata
:---|:---|:---|:---|:---
pdf,xps,cbz,cbr,fb2,epub | MuPDF | yes | yes, `png` | *planned* |
`audio/*` | libav | - | yes, `jpeg` | ID3 tags |
`video/*` | libav | - | yes, `jpeg` | *planned* |
`image/*` | libav | - | yes, `jpeg` | *planned* |
pdf,xps,fb2,epub | MuPDF | text+ocr | yes | title |
cbz,cbr | *(none)* | - | yes | - |
`audio/*` | ffmpeg | - | yes | ID3 tags |
`video/*` | ffmpeg | - | yes | title, comment, artist |
`image/*` | ffmpeg | - | yes | [Common EXIF tags](https://github.com/simon987/sist2/blob/efdde2734eca9b14a54f84568863b7ffd59bdba3/src/parsing/media.c#L190) |
raw, rw2, dng, cr2, crw, dcr, k25, kdc, mrw, pef, xf3, arw, sr2, srf, erf | LibRaw | - | yes | Common EXIF tags |
ttf,ttc,cff,woff,fnt,otf | Freetype2 | - | yes, `bmp` | Name & style |
`text/plain` | *(none)* | yes | no | - |
docx, xlsx, pptx | | *planned* | no | *planned* |
html, xml | *(none)* | yes | no | - |
tar, zip, rar, 7z, ar ... | Libarchive | yes\* | - | no |
docx, xlsx, pptx | *(none)* | yes | if embedded | creator, modified_by, title |
mobi, azw, azw3 | libmobi | yes | no | author, title |
\* *See [Archive files](#archive-files)*
### Archive files
**sist2** will scan files stored into archive files (zip, tar, 7z...) as if
they were directly in the file system. Recursive (archives inside archives)
scan is also supported.
**Limitations**:
* Support for parsing media files with formats that require *seek* (e.g. `.gif`, `.mp4` w/ fragmented metadata etc.)
is limitted (see `--mem-buffer` option)
* Archive files are scanned sequentially, by a single thread. On systems where
**sist2** is not I/O bound, scans might be faster when larger archives are split
into smaller parts.
### OCR
You can enable OCR support for pdf,xps,cbz,cbr,fb2,epub file types with the
`--ocr <lang>` option. Download the language data files with your
package manager (`apt install tesseract-ocr-eng`) or directly [from Github](https://github.com/tesseract-ocr/tesseract/wiki/Data-Files).
## Installation
The `simon987/sist2` image comes with common languages
(hin, jpn, eng, fra, rus, spa) pre-installed.
1. Download runtime dependencies
Examples
```bash
sist2 scan --ocr jpn ~/Books/Manga/
sist2 scan --ocr eng ~/Books/Textbooks/
```
`apt install curl bzip2`
1. Download binary
Get [the latest release](https://github.com/simon987/sist2/releases) from GitHub
1. (Optional) Add to search path
`mv sist2 /usr/bin/`
## Build from source
You can compile **sist2** by yourself if you don't want to use the pre-compiled
binaries (GCC 7+ required).
1. Install compile-time dependencies
*(Debian)*
```bash
apt install git cmake pkg-config libglib2.0-dev\
libssl-dev uuid-dev libavformat-dev libswscale-dev \
python3 libmagic-dev libfreetype6-dev libcurl-dev \
libbz2-dev yasm
```
*(Archlinux)*
```bash
pacman -S git ffmpeg pkg-config cmake openssl curl \
bzip2 yasm libutil-linux
```
```bash
vcpkg install lmdb cjson glib libarchive[core,bzip2,libxml2,lz4,lzma,lzo] pthread tesseract libxml2 ffmpeg zstd gtest mongoose libuuid libmagic libraw
```
2. Build
```bash
git clone --recurse-submodules https://github.com/simon987/sist2
./scripts/get_static_libs.sh
cmake .
git clone --recursive https://github.com/simon987/sist2/
cmake -DCMAKE_TOOLCHAIN_FILE=<VCPKG_ROOT>/scripts/buildsystems/vcpkg.cmake .
make
```

Submodule argparse deleted from fafc503d23

1
cJSON

Submodule cJSON deleted from 2de7d04aaf

17
ci/build.sh Executable file
View File

@@ -0,0 +1,17 @@
#!/usr/bin/env bash
VCPKG_ROOT="/vcpkg"
rm *.gz
rm -rf CMakeFiles CMakeCache.txt
cmake -DSIST_DEBUG=off -DVCPKG_BUILD_TYPE=release -DCMAKE_TOOLCHAIN_FILE="${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" .
make -j 12
strip sist2
gzip -9 sist2
rm -rf CMakeFiles CMakeCache.txt
cmake -DSIST_DEBUG=on -DVCPKG_BUILD_TYPE=debug -DCMAKE_TOOLCHAIN_FILE="${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" .
make -j 12
cp /usr/lib/x86_64-linux-gnu/libasan.so.2.0.0 libasan.so.2
tar -czf sist2_debug.tar.gz sist2_debug libasan.so.2

338
docs/USAGE.md Normal file
View File

@@ -0,0 +1,338 @@
# Usage
*More examples (specifically with docker/compose) are in progress*
* [scan](#scan)
* [options](#scan-options)
* [examples](#scan-examples)
* [index format](#index-format)
* [index](#index)
* [options](#index-options)
* [examples](#index-examples)
* [web](#web)
* [options](#web-options)
* [examples](#web-examples)
* [rewrite_url](#rewrite_url)
* [link to specific indices](#link-to-specific-indices)
* [exec-script](#exec-script)
* [tagging](#tagging)
```
Usage: sist2 scan [OPTION]... PATH
or: sist2 index [OPTION]... INDEX
or: sist2 web [OPTION]... INDEX...
or: sist2 exec-script [OPTION]... INDEX
Lightning-fast file system indexer and search tool.
-h, --help show this help message and exit
-v, --version Show version and exit
--verbose Turn on logging
--very-verbose Turn on debug messages
Scan options
-t, --threads=<int> Number of threads. DEFAULT=1
-q, --quality=<flt> Thumbnail quality, on a scale of 1.0 to 31.0, 1.0 being the best. DEFAULT=5
--size=<int> Thumbnail size, in pixels. Use negative value to disable. DEFAULT=500
--content-size=<int> Number of bytes to be extracted from text documents. Use negative value to disable. DEFAULT=32768
--incremental=<str> Reuse an existing index and only scan modified files.
-o, --output=<str> Output directory. DEFAULT=index.sist2/
--rewrite-url=<str> Serve files from this url instead of from disk.
--name=<str> Index display name. DEFAULT: (name of the directory)
--depth=<int> Scan up to DEPTH subdirectories deep. Use 0 to only scan files in PATH. DEFAULT: -1
--archive=<str> Archive file mode (skip|list|shallow|recurse). skip: Don't parse, list: only get file names as text, shallow: Don't parse archives inside archives. DEFAULT: recurse
--ocr=<str> Tesseract language (use tesseract --list-langs to see which are installed on your machine)
-e, --exclude=<str> Files that match this regex will not be scanned
--fast Only index file names & mime type
--treemap-threshold=<str> Relative size threshold for treemap (see USAGE.md). DEFAULT: 0.0005
--mem-buffer=<int> Maximum memory buffer size per thread in MB for files inside archives (see USAGE.md). DEFAULT: 2000
Index options
-t, --threads=<int> Number of threads. DEFAULT=1
--es-url=<str> Elasticsearch url with port. DEFAULT=http://localhost:9200
-p, --print Just print JSON documents to stdout.
--script-file=<str> Path to user script.
--batch-size=<int> Index batch size. DEFAULT: 100
-f, --force-reset Reset Elasticsearch mappings and settings. (You must use this option the first time you use the index command)
Web options
--es-url=<str> Elasticsearch url. DEFAULT=http://localhost:9200
--bind=<str> Listen on this address. DEFAULT=localhost:4090
--auth=<str> Basic auth in user:password format
--tag-auth=<str> Basic auth in user:password format for tagging
Exec-script options
--script-file=<str> Path to user script.
Made by simon987 <me@simon987.net>. Released under GPL-3.0
```
## Scan
### Scan options
* `-t, --threads`
Number of threads for file parsing. **Do not set a number higher than `$(nproc)` or `$(Get-WmiObject Win32_ComputerSystem).NumberOfLogicalProcessors` in Windows!**
* `-q, --quality`
Thumbnail quality, on a scale of 1.0 to 31.0, 1.0 being the best. *Does not affect PDF thumbnails quality*
* `--size`
Thumbnail size in pixels.
* `--content-size`
Number of bytes of text to be extracted from the content of files (plain text and PDFs).
Repeated whitespace and special characters do not count toward this limit.
* `--incremental`
Specify an existing index. Information about files in this index that were not modified (based on *mtime* attribute)
will be copied to the new index and will not be parsed again.
* `-o, --output` Output directory.
* `--rewrite-url` Set the `rewrite_url` option for the web module (See [rewrite_url](#rewrite_url))
* `--name` Set the `name` option for the web module
* `--depth` Maximum scan dept. Set to 0 only scan files directly in the root directory, set to -1 for infinite depth
* `--archive` Archive file mode.
* skip: Don't parse
* list: Only get file names as text
* shallow: Don't parse archives inside archives.
* recurse: Scan archives recursively (default)
* `--ocr` See [OCR](../README.md#OCR)
* `-e, --exclude` Regex pattern to exclude files. A file is excluded if the pattern matches any
part of the full absolute path.
Examples:
* `-e ".*\.ttf"`: Ignore ttf files
* `-e ".*\.(ttf|rar)"`: Ignore ttf and rar files
* `-e "^/mnt/backups/"`: Ignore all files in the `/mnt/backups/` directory
* `-e "^/mnt/Data[12]/"`: Ignore all files in the `/mnt/Data1/` and `/mnt/Data2/` directory
* `-e "(^/usr/)|(^/var/)|(^/media/DRIVE-A/tmp/)|(^/media/DRIVE-B/Trash/)"` Exclude the
`/usr`, `/var`, `/media/DRIVE-A/tmp`, `/media/DRIVE-B/Trash` directories
* `--fast` Only index file names and mime type
* `--treemap-threshold` Directories smaller than (`treemap-threshold` * `<total size of the index>`)
will not be considered for the disk utilisation visualization; their size will be added to
the parent directory. If the parent directory is still smaller than the threshold, it will also be "merged upwards"
and so on.
In effect, smaller `treemap-threshold` values will yield a more detailed
(but also a more cluttered and harder to read) visualization.
* `--mem-buffer` Maximum memory buffer size in MB (per thread) for files inside archives. Media files
larger than this number will be read sequentially and no *seek* operations will be supported.
To check if a media file can be parsed without *seek*, execute `cat file.mp4 | ffprobe -`
### Scan examples
Simple scan
```bash
sist2 scan ~/Documents
sist2 scan \
--threads 4 --content-size 16000000 --quality 1.0 --archive shallow \
--name "My Documents" --rewrite-url "http://nas.domain.local/My Documents/" \
~/Documents -o ./documents.idx/
```
Incremental scan
```
sist2 scan --incremental ./orig_idx/ -o ./updated_idx/ ~/Documents
```
### Index format
A typical `binary` type index structure looks like this:
```
documents.idx/
├── descriptor.json
├── _index_139965416830720
├── _index_139965425223424
├── _index_139965433616128
├── _index_139965442008832
├── _index_139965442008832
├── treemap.csv
├── agg_mime.csv
├── agg_date.csv
├── add_size.csv
├── thumbs
| ├── data.mdb
| └── lock.mdb
└── tags
├── data.mdb
└── lock.mdb
```
The `_index_*` files contain the raw binary index data and are not meant to be
read by other applications. The format is generally compatible across different
sist2 versions.
The `thumbs/` folder is a [LMDB](https://en.wikipedia.org/wiki/Lightning_Memory-Mapped_Database)
database containing the thumbnails.
The `descriptor.json` file contains general information about the index. The
following fields are safe to modify manually: `root`, `name`, [rewrite_url](#rewrite_url) and `timestamp`.
The `.csv` are pre-computed aggregations necessary for the stats page.
*Advanced usage*
Instead of using the `scan` module, you can also import an index generated
by a third party application. The 'external' index must have the following format:
```
my_index/
├── descriptor.json
├── _index_0
└── thumbs
├── data.mdb
└── lock.mdb
```
*descriptor.json*:
```json
{
"uuid": "<valid UUID4>",
"version": "_external_v1",
"root": "(optional)",
"name": "<name>",
"rewrite_url": "(optional)",
"type": "json",
"timestamp": 1578971024
}
```
*_index_0*: NDJSON format (One json object per line)
```json
{
"_id": "unique uuid for the file",
"index": "index uuid4 (same one as descriptor.json!)",
"mime": "application/x-cbz",
"size": 14341204,
"mtime": 1578882996,
"extension": "cbz",
"name": "my_book",
"path": "path/to/books",
"content": "text contents of the book",
"title": "Title of the book",
"tag": ["genre.fiction", "author.someguy", "etc..."],
"_keyword": [
{"k": "ISBN", "v": "ABCD34789231"}
],
"_text": [
{"k": "other", "v": "This will be indexed as text"}
]
}
```
You can find the full list of supported fields [here](../src/io/serialize.c#L90)
The `_keyword.*` items will be indexed and searchable as **keyword** fields (only full matches allowed).
The `_text.*` items will be indexed and searchable as **text** fields (fuzzy searching allowed)
*thumbs/*:
LMDB key-value store. Keys are **binary** 128-bit UUID4s (`_id` field)
and values are raw image bytes.
Importing an external `binary` type index is technically possible but
it is currently unsupported and has no guaranties of back/forward compatibility.
## Index
### Index options
* `--es-url`
Elasticsearch url and port. If you are using docker, make sure that both containers are on the
same network.
* `-p, --print`
Print index in JSON format to stdout.
* `--script-file`
Path to user script. See [Scripting](scripting.md).
* `--batch-size=<int>`
Index batch size. Indexing is generally faster with larger batches, but payloads that
are too large will fail and additional overhead for retrying with smaller sizes may slow
down the process.
* `-f, --force-reset`
Reset Elasticsearch mappings and settings.
**(You must use this option the first time you use the index command)**.
### Index examples
**Push to elasticsearch**
```bash
sist2 index --force-reset --batch-size 1000 --es-url http://localhost:9200 ./my_index/
sist2 index ./my_index/
```
**Save index in JSON format**
```bash
sist2 index --print ./my_index/ > my_index.ndjson
```
**Inspect contents of an index**
```bash
sist2 index --print ./my_index/ | jq | less
```
## Web
### Web options
* `--es-url=<str>` Elasticsearch url.
* `--bind=<str>` Listen on this address.
* `--auth=<str>` Basic auth in user:password format
* `--tag-auth=<str>` Basic auth in user:password format. Works the same way as the
`--auth` argument, but authentication is only applied the `/tag/` endpoint.
### Web examples
**Single index**
```bash
sist2 web --auth admin:hunter2 --bind 0.0.0.0:8888 my_index
```
**Multiple indices**
```bash
# Indices will be displayed in this order in the web interface
sist2 web index1 index2 index3 index4
```
### rewrite_url
When the `rewrite_url` field is not empty, the web module ignores the `root`
field and will return a HTTP redirect to `<rewrite_url><path>/<name><extension>`
instead of serving the file from disk.
Both the `root` and `rewrite_url` fields are safe to manually modify from the
`descriptor.json` file.
### Link to specific indices
To link to specific indices, you can add a list of comma-separated index name to
the URL: `?i=<name>,<name>`. By default, indices with `"(nsfw)"` in their name are
not displayed.
## exec-script
The `exec-script` command is used to execute a user script for an index that has already been imported to Elasticsearch with the `index` command. Note that the documents will not be reset to their default state before each execution as the `index` command does: if you make undesired changes to the documents by accident, you will need to run `index` again to revert to the original state.
# Tagging
### Manual tagging
You can modify tags of individual documents directly from the
`web` interface. Note that you can setup authentication for this feature
with the `--tag-auth` option (See [web options](#web-options))
![manual_tag](manual_tag.png)
Tags that are manually added are saved both in the
index folder (in `/tags/`) and in Elasticsearch*. When re-`index`ing,
they are read from the index and automatically applied.
You can safely copy the `/tags/` database to another index.
See [Automatic tagging](#automatic-tagging) for information about tag
hierarchies and tag colors.
\* *It can take a few seconds to take effect in new search queries, and the page needs
to be reloaded for the tag tab to update*
### Automatic tagging
See [scripting](docs/scripting.md) documentation.

BIN
docs/genre_example.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 26 KiB

BIN
docs/manual_tag.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.9 KiB

142
docs/scripting.md Normal file
View File

@@ -0,0 +1,142 @@
## User scripts
*This document is under construction, more in-depth guide coming soon*
During the `index` step, you can use the `--script-file <script>` option to
modify documents or add user tags. This option is mainly used to
implement automatic tagging based on file attributes.
The scripting language used
([Painless Scripting Language](https://www.elastic.co/guide/en/elasticsearch/painless/7.4/index.html))
is very similar to Java, but you should be able to create user scripts
without programming experience at all if you're somewhat familiar with
regex.
This is the base structure of the documents we're working with:
```json
{
"_id": "e171405c-fdb5-4feb-bb32-82637bc32084",
"_index": "sist2",
"_type": "_doc",
"_source": {
"index": "206b3050-e821-421a-891d-12fcf6c2db0d",
"mime": "application/json",
"size": 1799,
"mtime": 1545443685,
"extension": "md",
"name": "README",
"path": "sist2/scripting",
"content": "..."
}
}
```
**Example script**
This script checks if the `genre` attribute exists, if it does
it adds the `genre.<genre>` tag.
```Java
ArrayList tags = ctx._source.tag = new ArrayList();
if (ctx._source?.genre != null) {
tags.add("genre." + ctx._source.genre.toLowerCase());
}
```
You can use `.` to create a hierarchical tag tree:
![scripting/genre_example](genre_example.png)
To use regular expressions, you need to add this line in `/etc/elasticsearch/elasticsearch.yml`
```yaml
script.painless.regex.enabled: true
```
Or, if you're using docker add `-e "script.painless.regex.enabled=true"`
**Tag color**
You can specify the color for an individual tag by appending an
hexadecimal color code (`#RRGGBBAA`) to the tag name.
### Examples
If `(20XX)` is in the file name, add the `year.<year>` tag:
```Java
ArrayList tags = ctx._source.tag = new ArrayList();
Matcher m = /[\(\.+](20[0-9]{2})[\)\.+]/.matcher(ctx._source.name);
if (m.find()) {
tags.add("year." + m.group(1));
}
```
Use default *Calibre* folder structure to infer author.
```Java
ArrayList tags = ctx._source.tag = new ArrayList();
// We expect the book path to look like this:
// /path/to/Calibre Library/Author/Title/Title - Author.pdf
if (ctx._source.name.contains("-") && ctx._source.extension == "pdf") {
String[] names = ctx._source.name.splitOnToken('-');
tags.add("author." + names[1].strip());
}
```
If the file matches a specific pattern `AAAA-000 fName1 lName1, <fName2 lName2>...`, add the `actress.<actress>` and
`studio.<studio>` tag:
```Java
ArrayList tags = ctx._source.tag = new ArrayList();
Matcher m = /([A-Z]{4})-[0-9]{3} (.*)/.matcher(ctx._source.name);
if (m.find()) {
tags.add("studio." + m.group(1));
// Take the matched group (.*), and add a tag for
// each name, separated by comma
for (String name : m.group(2).splitOnToken(',')) {
tags.add("actress." + name);
}
}
```
Set the name of the last folder (`/path/to/<studio>/file.mp4`) to `studio.<studio>` tag
```Java
ArrayList tags = ctx._source.tag = new ArrayList();
if (ctx._source.path != "") {
String[] names = ctx._source.path.splitOnToken('/');
tags.add("studio." + names[names.length-1]);
}
```
Parse `EXIF:F Number` tag
```Java
if (ctx._source?.exif_fnumber != null) {
String[] values = ctx._source.exif_fnumber.splitOnToken(' ');
String aperture = String.valueOf(Float.parseFloat(values[0]) / Float.parseFloat(values[1]));
if (aperture == "NaN") {
aperture = "0,0";
}
tags.add("Aperture.f/" + aperture.replace(".", ","));
}
```
Display year and months from `EXIF:DateTime` tag
```Java
if (ctx._source?.exif_datetime != null) {
SimpleDateFormat parser = new SimpleDateFormat("yyyy:MM:dd HH:mm:ss");
Date date = parser.parse(ctx._source.exif_datetime);
SimpleDateFormat yp = new SimpleDateFormat("yyyy");
SimpleDateFormat mp = new SimpleDateFormat("MMMMMMMMM");
String year = yp.format(date);
String month = mp.format(date);
tags.add("Month." + month);
tags.add("Year." + year);
}
```

BIN
docs/sist2.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 889 KiB

BIN
docs/stats.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 167 KiB

Submodule lib/ffmpeg deleted from 0481a1f6e5

Submodule lib/mupdf deleted from 91782a4348

Submodule lib/onion deleted from d8d4cc9290

1
lmdb

Submodule lmdb deleted from 5c012bbe03

View File

@@ -1,31 +1,58 @@
{
"properties": {
"_tie": {
"type": "keyword",
"doc_values": true
},
"_depth": {
"type": "integer"
},
"path": {
"type": "text",
"analyzer": "path_analyzer",
"copy_to": "suggest-path"
"copy_to": "suggest-path",
"fielddata": true,
"fields": {
"nGram": {
"type": "text",
"analyzer": "my_nGram"
},
"text": {
"type": "text",
"analyzer": "content_analyzer"
}
}
},
"suggest-path": {
"type": "completion",
"analyzer": "keyword"
"analyzer": "case_insensitive_kw_analyzer"
},
"mime": {
"type": "keyword"
},
"thumbnail": {
"type": "keyword",
"index": false
},
"videoc": {
"type": "keyword"
"type": "keyword",
"index": false
},
"audioc": {
"type": "keyword"
"type": "keyword",
"index": false
},
"duration": {
"type": "float"
"type": "float",
"index": false
},
"width": {
"type": "integer"
"type": "integer",
"index": false
},
"height": {
"type": "integer"
"type": "integer",
"index": false
},
"mtime": {
"type": "integer"
@@ -70,6 +97,23 @@
"analyzer": "my_nGram",
"type": "text"
},
"_keyword.*": {
"type": "keyword"
},
"_text.*": {
"analyzer": "content_analyzer",
"type": "text",
"fields": {
"nGram": {
"type": "text",
"analyzer": "my_nGram"
}
}
},
"_url": {
"type": "keyword",
"index": false
},
"content": {
"analyzer": "content_analyzer",
"type": "text",
@@ -80,6 +124,44 @@
"analyzer": "my_nGram"
}
}
},
"tag": {
"type": "keyword",
"copy_to": "suggest-tag"
},
"suggest-tag": {
"type": "completion",
"analyzer": "case_insensitive_kw_analyzer"
},
"exif_make": {
"type": "text"
},
"exif_model": {
"type": "text"
},
"exif:software": {
"type": "text"
},
"exif_exposure_time": {
"type": "keyword"
},
"exif_fnumber": {
"type": "keyword"
},
"exif_iso_speed_ratings": {
"type": "keyword"
},
"exif_focal_length": {
"type": "keyword"
},
"exif_user_comment": {
"type": "text"
},
"author": {
"type": "text"
},
"modified_by": {
"type": "text"
}
}
}

10
schema/pipeline.json Normal file
View File

@@ -0,0 +1,10 @@
{
"description": "Copy _id to _tie, save path depth",
"processors": [
{
"script": {
"source": "ctx._tie = ctx._id; ctx._depth = ctx.path.length() == 0 ? 0 : 1 + ctx.path.length() - ctx.path.replace(\"/\", \"\").length();"
}
}
]
}

View File

@@ -1,7 +1,8 @@
{
"index": {
"refresh_interval": "-1",
"codec": "best_compression"
"refresh_interval": "30s",
"codec": "best_compression",
"number_of_replicas": 0
},
"analysis": {
"tokenizer": {
@@ -21,16 +22,24 @@
"lowercase"
]
},
"case_insensitive_kw_analyzer": {
"tokenizer": "keyword",
"filter": [
"lowercase"
]
},
"my_nGram": {
"tokenizer": "my_nGram_tokenizer",
"filter": [
"lowercase"
"lowercase",
"asciifolding"
]
},
"content_analyzer": {
"tokenizer": "standard",
"filter": [
"lowercase"
"lowercase",
"asciifolding"
]
}
}

View File

@@ -1,14 +1,16 @@
#!/bin/bash
#!/usr/bin/env bash
rm -rf index.sist2/
rm web/js/bundle.js 2> /dev/null
cat `ls -v web/js/*.min.js` > web/js/bundle.js
cat web/js/{util,dom,search}.js >> web/js/bundle.js
rm src/static/js/bundle.js 2> /dev/null
cat `ls src/static/js/*.min.js` > src/static/js/bundle.js
cat src/static/js/{util,dom}.js >> src/static/js/bundle.js
rm web/css/bundle.css 2> /dev/null
cat web/css/*.min.css > web/css/bundle.css
cat web/css/main.css >> web/css/bundle.css
rm src/static/css/bundle*.css 2> /dev/null
cat src/static/css/*.min.css > src/static/css/bundle.css
cat src/static/css/light.css >> src/static/css/bundle.css
cat src/static/css/*.min.css > src/static/css/bundle_dark.css
cat src/static/css/dark.css >> src/static/css/bundle_dark.css
python3 scripts/mime.py > src/parsing/mime_generated.c
python3 scripts/serve_static.py > src/web/static_generated.c

View File

@@ -1,39 +0,0 @@
#!/bin/bash
cd lib
cd mupdf
HAVE_X11=no HAVE_GLUT=no make -j 4
cd ..
mv mupdf/build/release/libmupdf.a .
mv mupdf/build/release/libmupdf-third.a .
# ffmpeg
cd ffmpeg
./configure --disable-shared --enable-static --disable-ffmpeg --disable-ffplay \
--disable-ffprobe --disable-doc\
--disable-manpages --disable-postproc --disable-avfilter \
--disable-alsa --disable-lzma --disable-xlib --disable-debug\
--disable-vdpau --disable-vaapi --disable-sdl2 --disable-network
make -j 4
cd ..
mv ffmpeg/libavcodec/libavcodec.a .
mv ffmpeg/libavformat/libavformat.a .
mv ffmpeg/libavutil/libavutil.a .
mv ffmpeg/libswresample/libswresample.a .
mv ffmpeg/libswscale/libswscale.a .
# onion
cd onion
mkdir build 2> /dev/null
cd build
cmake -DONION_USE_SSL=false -DONION_USE_PAM=false -DONION_USE_PNG=false -DONION_USE_JPEG=false \
-DONION_USE_JPEG=false -DONION_USE_XML2=false -DONION_USE_SYSTEMD=false -DONION_USE_SQLITE3=false \
-DONION_USE_REDIS=false -DONION_USE_GC=false -DONION_USE_TESTS=false -DONION_EXAMPLES=false \
-DONION_USE_BINDINGS_CPP=false ..
make -j 4
cd ../..
mv onion/build/src/onion/libonion_static.a .
cd ..

View File

@@ -1,6 +1,9 @@
import json
files = [
"schema/mappings.json",
"schema/settings.json",
"schema/pipeline.json",
]
@@ -9,6 +12,7 @@ def clean(filepath):
for file in files:
with open(file, "rb") as f:
data = f.read()
with open(file, "r") as f:
data = json.dumps(json.load(f), separators=(",", ":")).encode()
data += b'\0'
print("char %s[%d] = {%s};" % (clean(file), len(data), ",".join(str(int(b)) for b in data)))

View File

@@ -2,14 +2,18 @@ application/arj, arj
application/base64, mme
application/binhex, hqx
application/book, boo|book
application/CDFV2-corrupt,
application/CDFV2, sdv
application/clariscad, ccad
application/commonground, dp
application/csv,
application/dicom, dcm
application/drafting, drw
application/epub+zip, epub
application/freeloader, frl
application/futuresplash, spl
application/groupwise, vew
application/gzip, gz
application/gzip, gz|tgz
application/hta, hta
application/i-deas, unv
application/iges, iges|igs
@@ -17,7 +21,6 @@ application/inf, inf
application/java-archive, jar
application/java, class
application/javascript,
application/x-archive, a
application/json, json
application/marc, mrc
application/mbedlet, mbd
@@ -27,7 +30,9 @@ application/msword, doc|dot|w6w|wiz|word
application/netmc, mcp
application/octet-stream, bin|dump|gpg
application/oda, oda
application/ogg, ogv
application/pdf, pdf
application/pgp-keys,
application/pgp-signature, pgp
application/pkcs7-signature, p7s
application/pkix-cert, cer|crt
@@ -43,6 +48,10 @@ application/vda, vda
application/vnd.fdf, fdf
application/vnd.font-fontforge-sfd, sfd
application/vnd.hp-hpgl, hgl|hpg|hpgl
application/vnd.iccprofile, icm
application/vnd.iccprofile, icm
application/vnd.lotus-1-2-3,
application/vnd.ms-cab-compressed, cab
application/vnd.ms-excel, xlb|xlc|xll|xlm|xls|xlw
application/vnd.ms-fontobject, eot
application/vnd.ms-opentype, otf
@@ -54,45 +63,75 @@ application/vnd.ms-project, mpp
application/vnd.oasis.opendocument.base, odb
application/vnd.oasis.opendocument.formula, odf
application/vnd.oasis.opendocument.graphics, odg
application/vnd.oasis.opendocument.presentation, odp
application/vnd.oasis.opendocument.spreadsheet, ods
application/vnd.oasis.opendocument.text, odt
application/vnd.openxmlformats-officedocument.presentationml.presentation, pptx
application/vnd.openxmlformats-officedocument.spreadsheetml.sheet, xlsx
application/vnd.openxmlformats-officedocument.wordprocessingml.document, docx
application/vnd.symbian.install,
application/vnd.tcpdump.pcap, pcap
application/vnd.wap.wmlc, wmlc
application/vnd.wap.wmlscriptc, wmlsc
application/vnd.xara, web
application/vocaltec-media-desc, vmd
application/vocaltec-media-file, vmf
application/warc, warc
application/winhelp, hlp
application/wordperfect6.0, w60
application/wordperfect6.1, w61
application/wordperfect, wp|wp5|wp6|wpd
application/x-123, wk1
application/x-7z-compressed, 7z
application/x-aim, aim
application/x-apple-diskimage,
application/x-arc,
application/x-archive, a
application/x-atari-7800-rom, a78
application/x-authorware-bin, aab
application/x-authorware-map, aam
application/x-authorware-seg, aas
application/x-avira-qua,
application/x-bcpio, bcpio
application/x-bittorrent, torrent
application/x-bsh, bsh
application/x-bytecode.python, pyc
application/x-bzip2, boz|bz2
application/x-bzip, bz
application/x-cbr, cbr
application/x-cbz, cbz
application/x-cdlink, vcd
application/x-chat, cha|chat
application/x-chrome-extension,
application/x-cocoa, cco
application/x-conference, nsc
application/x-coredump,
application/x-cpio, cpio
application/x-dbf, dbf
application/x-dbt,
application/x-debian-package, deb
application/x-deepv, deepv
application/x-director, dcr|dir|dxr
application/x-director, dir|dxr
application/x-dmp, dmp
application/x-dosdriver,
application/x-dosexec, dll
application/x-dvi, dvi
application/x-elc, elc
application/x-empty,
application/x-envoy, env|evy
application/x-esrehber, es
application/x-excel, xla|xld|xlk|xlt|xlv
application/x-executable, exe
application/x-font-gdos,
application/x-font-pf2, pf2
application/x-font-pfm, pfm
application/x-font-sfn,
application/x-font-ttf, ttf
application/x-font-ttf, ttf|ttc
application/x-fptapplication/x-dbt,
application/x-freelance, pre
application/x-gamecube-rom,
application/x-gdbm,
application/x-gettext-translation,
application/x-git,
application/x-gsp, gsp
application/x-gss, gss
@@ -102,46 +141,68 @@ application/x-hdf, hdf
application/x-helpfile, help
application/x-httpd-imap, imap
application/x-ima, ima
application/x-innosetup,
application/x-internett-signup, ins
application/x-inventor, iv
application/x-ip2, ip
application/x-java-applet,
application/x-java-commerce, jcm
application/x-java-image,
application/x-java-jmod, jmod
application/x-java-keystore,
application/x-kdelnk,
application/x-koan, skd|skm|skp|skt
application/x-latex, latex|ltx
application/x-livescreen, ivy
application/x-lotus, wq1
application/x-lz4+json, jsonlz4
application/x-lz4, lz4
application/x-lz4, lz4
application/x-lzh-compressed,
application/x-lzh, lzh
application/x-lzip, lz
application/x-lzma, lzma
application/x-lzop, lzo
application/x-lzx, lzx
application/x-mach-binary, jnilib|dylib
application/x-mach-executable,
application/x-magic-cap-package-1.0, mc$
application/x-mathcad, mcd
application/x-maxis-dbpf,
application/x-meme, mm
application/x-midi, midi
application/x-mif, mif
application/x-mix-transfer, nix
application/xml, opf
application/x-mobipocket-ebook, mobi
application/vnd.amazon.mobi8-ebook, azw|azw3
application/x-msaccess, accdb
application/x-ms-compress-szdd, fon
application/x-ms-pdb, pdb
application/x-ms-reader, lit
application/x-n64-rom, z64
application/x-navi-animation, ani
application/x-navidoc, nvd
application/x-navimap, map
application/x-navistyle, stl
application/x-nes-rom, nes
application/x-netcdf, cdf|nc
application/x-newton-compatible-pkg, pkg
application/x-nintendo-ds-rom,
application/x-object, o
application/x-omcdatamaker, omcd
application/x-omc, omc
application/x-omcregerator, omcr
application/x-pagemaker, pm4|pm5
application/x-pcl, pcl
application/x-pgp-keyring,
application/x-pixclscript, plx
application/x-pkcs7-certreqresp, p7r
application/x-pkcs7-signature, p7a
application/x-project, mpc|mpt|mpv|mpx
application/x-qpro, wb1
application/x-rar, rar
application/x-rpm, rpm
application/x-sdp, sdp
application/x-sea, sea
application/x-seelogo, sl
@@ -149,12 +210,17 @@ application/x-setupscript,
application/x-sharedlib, so
application/x-shar, shar
application/x-shockwave-flash, swf
application/x-snappy-framed,
application/x-sprite, spr|sprite
application/x-sqlite3,
application/x-stargallery-thm,
application/x-stuffit, sit
application/x-sv4cpio, sv4cpio
application/x-sv4crc, sv4crc
application/x-tar, tar
application/x-tbook, sbk|tbk
application/x-terminfo,
application/x-terminfo2,
application/x-texinfo, texi|texinfo
application/x-tex-tfm, tfm
application/x-ustar, ustar
@@ -163,16 +229,22 @@ application/x-vnd.audioexplosion.mzz, mzz
application/x-vnd.ls-xpix, xpix
application/x-vrml, vrml
application/x-wais-source, src|wsrc
application/x-wine-extension-ini,
application/x-wintalk, wtk
application/x-world, svr
application/x-wri, wri
application/x-x509-ca-cert, der
application/x-xz, xz
application/x-zip,
application/x-zstd, zst
application/zip, zip
application/zlib, z
!audio/basic, au
audio/it, it
audio/make, funk|my|pfunk
audio/midi, kar
audio/mid, rmi
audio/mp4, m4b
audio/mpeg, m2a|mpa
audio/ogg, ogg
audio/s3m, s3m
@@ -180,7 +252,10 @@ audio/tsp-audio, tsi
audio/tsplayer, tsp
audio/vnd.qcelp, qcp
audio/voxware, vox
audio/x-aiff, aiff|aif
audio/x-flac, flac
audio/x-gsm, gsd|gsm
audio/x-hx-aac-adts,
audio/x-jam, jam
audio/x-liveaudio, lam
audio/x-m4a, m4a
@@ -194,17 +269,24 @@ audio/x-nspaudio, lma
audio/x-pn-realaudio, ram|rm|rmm|rmp
audio/x-psid, sid
audio/x-realaudio, ra
audio/x-s3m,
audio/x-twinvq-plugin, vqe|vql
audio/x-twinvq, vqf
audio/x-voc, voc
audio/x-wav, wav
!audio/x-xbox360-executable, xex
!audio/x-xbox-executable, xbe
font/otf,
font/sfnt,
font/woff2, woff2
font/woff, woff
image/bmp,
image/cmu-raster, rast
image/fif, fif
image/florian, flo|turbot
image/g3fax, g3
image/gif, gif
image/heic, heic
image/ief, ief|iefs
image/jpeg, jfif|jfif-tbnl|jpe|jpeg|jpg
image/jutvision, jut
@@ -213,6 +295,9 @@ image/pict, pic|pict
image/png, png|x-png
!image/svg, svg
!image/svg+xml,
image/tiff,
!image/vnd.adobe.photoshop, psd
!image/vnd.djvu, djvu
image/vnd.fpx, fpx
image/vnd.microsoft.icon,
image/vnd.rn-realflash, rf
@@ -220,9 +305,15 @@ image/vnd.rn-realpix, rp
image/vnd.wap.wbmp, wbmp
image/vnd.xiff, xif
image/webp, webp
image/wmf,
image/x-3ds, 3ds
image/x-award-bioslogo,
image/x-cmu-raster, ras
image/x-cur, tga
image/x-dwg, dwg|dxf|svf
image/x-eps,
image/x-exr, exr
image/x-gem,
image/x-icns,
!image/x-icon, ico
image/x-jg, art
@@ -236,34 +327,33 @@ image/x-portable-graymap, pgm
image/x-portable-pixmap, ppm
image/x-quicktime, qif|qti|qtif
image/x-rgb, rgb
image/x-tga,
image/x-tiff, tif|tiff
image/tiff,
image/x-win-bitmap,
!image/x-xcf, xcf
!image/x-xpixmap, xpm
image/x-xwindowdump, xwd
message/news,
message/rfc822, mht|mhtml|mime
model/vnd.dwf, dwf
model/vnd.gdl, gdl
model/vnd.gs.gdl, gdsl
model/vrml, wrz
model/x-pov, pov
text/asp, asp
text/css, css
text/x-sass, sass
text/x-scss, scss
text/html, acgi|htm|html|htmls|htx|shtml
text/javascript, js
text/mcf, mcf
text/pascal, pas
text/plain, com|cmd|conf|def|g|idc|list|lst|mar|sdml|text|txt|md|groovy|license|properties|desktop|ini|rst|cmake|ipynb|readme|less|lo|go|yml|d|cs|hpp|srt
text/PGP,
text/plain, com|cmd|conf|def|g|idc|list|lst|mar|sdml|text|txt|md|groovy|license|properties|desktop|ini|rst|cmake|ipynb|readme|less|lo|go|yml|d|cs|hpp|srt|nfo|sfv|m3u|csv|eml
text/richtext, rt|rtf|rtx
text/rtf,
text/scriplet, wsc
text/x-awk, awk
!video/x-jng, jng
video/x-mng, mng
image/x-cur, tga
image/x-xwindowdump, xwd
!image/vnd.adobe.photoshop, psd
text/tab-separated-values, tsv
text/troff, man|me|ms|roff|t|tr
text/uri-list, uni|unis|uri|uris
text/uri-list, uji|unis|uri|uris
text/vnd.abc, abc
text/vnd.fmi.flexstor, flx
text/vnd.wap.wmlscript, wmls
@@ -272,6 +362,7 @@ text/webviewhtml, htt
text/x-Algol68,
text/x-asm, asm|s
text/x-audiosoft-intra, aip
text/x-awk, awk
text/x-bcpl,
text/x-c, c|cc|h
text/x-c++, cpp|cxx|c++
@@ -286,23 +377,31 @@ text/x-makefile, am|mak
text/xml, xml|pom|iml|plist
text/x-m, m
text/x-msdos-batch, bat
text/x-ms-regedit, reg
text/x-objective-c,
text/x-pascal, p
text/x-perl, pl
text/x-php, php
text/x-po, po
text/x-python, py
text/x-ruby, rb
text/x-sass, sass
text/x-scss, scss
text/x-server-parsed-html, ssi
text/x-setext, etx
text/x-sgml, sgm|sgml
text/x-shellscript, sh
text/x-speech, talk
text/x-tcl,
text/x-tex, tex
text/x-uil, uil
text/x-uuencode, uue
text/x-vcalendar, vcs
text/x-vcard, vcf
video/animaflex, afl
video/avi, avi
video/avs-video, avs
video/MP2T,
video/mp4, mp4
video/mpeg, m1v|m2v|mpe|mpeg|mpg
video/quicktime, moov|mov|qt
@@ -317,43 +416,35 @@ video/x-atomic3d-feature, fmf
video/x-dl, dl
video/x-dv, dif|dv
video/x-fli, fli
video/x-flv, flv
video/x-isvideo, isu
!video/x-jng, jng
video/x-m4v, m4v
video/x-matroska, mkv
video/x-mng, mng
video/x-motion-jpeg, mjpg
video/x-ms-asf, asf|asx
video/x-ms-asf, asf|asx|wmv
video/x-msvideo, divx
video/x-qtc, qtc
video/x-sgi-movie, movie|mv
application/x-7z-compressed, 7z
application/vnd.openxmlformats-officedocument.wordprocessingml.document, docx
text/x-po, po
application/x-rpm, rpm
application/x-debian-package, deb
application/vnd.iccprofile, icm
application/dicom, dcm
image/x-exr, exr
application/vnd.iccprofile, icm
video/x-matroska, mkv
application/x-empty,
model/vnd.gdl, gdl
model/vnd.gs.gdl, gdsl
font/woff, woff
font/woff2, woff2
application/epub+zip, epub
application/x-mobipocket-ebook, mobi
audio/x-flac, flac
application/x-rar, rar
video/x-msvideo, divx
video/x-flv, flv
application/x-kdelnk,
text/x-tcl,
application/ogg, ogv
application/vnd.openxmlformats-officedocument.spreadsheetml.sheet, xlsx
application/vnd.ms-cab-compressed, cab
audio/mp4, m4b
!image/vnd.djvu, djvu
application/x-ms-reader, lit
application/CDFV2-corrupt,
text/x-vcard, vcf
application/x-innosetup,
application/winhelp, hlp
image/x-tga,
application/x-wine-extension-ini,
x-epoc/x-sisx-app,
application/x-zstd-dictionary,
application/vnd.ms-outlook, msg
image/x-olympus-orf, orf
image/x-nikon-nef, nef
image/x-fuji-raf, raf
image/x-panasonic-raw, rw2|raw
image/x-adobe-dng, dng
image/x-canon-cr2, cr2
image/x-canon-crw, crw
image/x-dcraw,
image/x-kodak-dcr, dcr
image/x-kodak-k25, k25
image/x-kodak-kdc, kdc
image/x-minolta-mrw, mrw
image/x-pentax-pef, pef
image/x-sigma-x3f, xf3
image/x-sony-arw, arw
image/x-sony-sr2, sr2
image/x-sony-srf, srf
image/x-epson-erf, erf
1 application/arj arj
2 application/base64 mme
3 application/binhex hqx
4 application/book boo|book
5 application/CDFV2-corrupt
6 application/CDFV2 sdv
7 application/clariscad ccad
8 application/commonground dp
9 application/csv
10 application/dicom dcm
11 application/drafting drw
12 application/epub+zip epub
13 application/freeloader frl
14 application/futuresplash spl
15 application/groupwise vew
16 application/gzip gz gz|tgz
17 application/hta hta
18 application/i-deas unv
19 application/iges iges|igs
21 application/java-archive jar
22 application/java class
23 application/javascript
application/x-archive a
24 application/json json
25 application/marc mrc
26 application/mbedlet mbd
30 application/netmc mcp
31 application/octet-stream bin|dump|gpg
32 application/oda oda
33 application/ogg ogv
34 application/pdf pdf
35 application/pgp-keys
36 application/pgp-signature pgp
37 application/pkcs7-signature p7s
38 application/pkix-cert cer|crt
48 application/vnd.fdf fdf
49 application/vnd.font-fontforge-sfd sfd
50 application/vnd.hp-hpgl hgl|hpg|hpgl
51 application/vnd.iccprofile icm
52 application/vnd.iccprofile icm
53 application/vnd.lotus-1-2-3
54 application/vnd.ms-cab-compressed cab
55 application/vnd.ms-excel xlb|xlc|xll|xlm|xls|xlw
56 application/vnd.ms-fontobject eot
57 application/vnd.ms-opentype otf
63 application/vnd.oasis.opendocument.base odb
64 application/vnd.oasis.opendocument.formula odf
65 application/vnd.oasis.opendocument.graphics odg
66 application/vnd.oasis.opendocument.presentation odp
67 application/vnd.oasis.opendocument.spreadsheet ods
68 application/vnd.oasis.opendocument.text odt
69 application/vnd.openxmlformats-officedocument.presentationml.presentation pptx
70 application/vnd.openxmlformats-officedocument.spreadsheetml.sheet xlsx
71 application/vnd.openxmlformats-officedocument.wordprocessingml.document docx
72 application/vnd.symbian.install
73 application/vnd.tcpdump.pcap pcap
74 application/vnd.wap.wmlc wmlc
75 application/vnd.wap.wmlscriptc wmlsc
76 application/vnd.xara web
77 application/vocaltec-media-desc vmd
78 application/vocaltec-media-file vmf
79 application/warc warc
80 application/winhelp hlp
81 application/wordperfect6.0 w60
82 application/wordperfect6.1 w61
83 application/wordperfect wp|wp5|wp6|wpd
84 application/x-123 wk1
85 application/x-7z-compressed 7z
86 application/x-aim aim
87 application/x-apple-diskimage
88 application/x-arc
89 application/x-archive a
90 application/x-atari-7800-rom a78
91 application/x-authorware-bin aab
92 application/x-authorware-map aam
93 application/x-authorware-seg aas
94 application/x-avira-qua
95 application/x-bcpio bcpio
96 application/x-bittorrent torrent
97 application/x-bsh bsh
98 application/x-bytecode.python pyc
99 application/x-bzip2 boz|bz2
100 application/x-bzip bz
101 application/x-cbr cbr
102 application/x-cbz cbz
103 application/x-cdlink vcd
104 application/x-chat cha|chat
105 application/x-chrome-extension
106 application/x-cocoa cco
107 application/x-conference nsc
108 application/x-coredump
109 application/x-cpio cpio
110 application/x-dbf dbf
111 application/x-dbt
112 application/x-debian-package deb
113 application/x-deepv deepv
114 application/x-director dcr|dir|dxr dir|dxr
115 application/x-dmp dmp
116 application/x-dosdriver
117 application/x-dosexec dll
118 application/x-dvi dvi
119 application/x-elc elc
120 application/x-empty
121 application/x-envoy env|evy
122 application/x-esrehber es
123 application/x-excel xla|xld|xlk|xlt|xlv
124 application/x-executable exe
125 application/x-font-gdos
126 application/x-font-pf2 pf2
127 application/x-font-pfm pfm
128 application/x-font-sfn
129 application/x-font-ttf ttf ttf|ttc
130 application/x-fptapplication/x-dbt
131 application/x-freelance pre
132 application/x-gamecube-rom
133 application/x-gdbm
134 application/x-gettext-translation
135 application/x-git
136 application/x-gsp gsp
137 application/x-gss gss
141 application/x-helpfile help
142 application/x-httpd-imap imap
143 application/x-ima ima
144 application/x-innosetup
145 application/x-internett-signup ins
146 application/x-inventor iv
147 application/x-ip2 ip
148 application/x-java-applet
149 application/x-java-commerce jcm
150 application/x-java-image
151 application/x-java-jmod jmod
152 application/x-java-keystore
153 application/x-kdelnk
154 application/x-koan skd|skm|skp|skt
155 application/x-latex latex|ltx
156 application/x-livescreen ivy
157 application/x-lotus wq1
158 application/x-lz4+json jsonlz4
159 application/x-lz4 lz4
160 application/x-lz4 lz4
161 application/x-lzh-compressed
162 application/x-lzh lzh
163 application/x-lzip lz
164 application/x-lzma lzma
165 application/x-lzop lzo
166 application/x-lzx lzx
167 application/x-mach-binary jnilib|dylib
168 application/x-mach-executable
169 application/x-magic-cap-package-1.0 mc$
170 application/x-mathcad mcd
171 application/x-maxis-dbpf
172 application/x-meme mm
173 application/x-midi midi
174 application/x-mif mif
175 application/x-mix-transfer nix
176 application/xml opf
177 application/x-mobipocket-ebook mobi
178 application/vnd.amazon.mobi8-ebook azw|azw3
179 application/x-msaccess accdb
180 application/x-ms-compress-szdd fon
181 application/x-ms-pdb pdb
182 application/x-ms-reader lit
183 application/x-n64-rom z64
184 application/x-navi-animation ani
185 application/x-navidoc nvd
186 application/x-navimap map
187 application/x-navistyle stl
188 application/x-nes-rom nes
189 application/x-netcdf cdf|nc
190 application/x-newton-compatible-pkg pkg
191 application/x-nintendo-ds-rom
192 application/x-object o
193 application/x-omcdatamaker omcd
194 application/x-omc omc
195 application/x-omcregerator omcr
196 application/x-pagemaker pm4|pm5
197 application/x-pcl pcl
198 application/x-pgp-keyring
199 application/x-pixclscript plx
200 application/x-pkcs7-certreqresp p7r
201 application/x-pkcs7-signature p7a
202 application/x-project mpc|mpt|mpv|mpx
203 application/x-qpro wb1
204 application/x-rar rar
205 application/x-rpm rpm
206 application/x-sdp sdp
207 application/x-sea sea
208 application/x-seelogo sl
210 application/x-sharedlib so
211 application/x-shar shar
212 application/x-shockwave-flash swf
213 application/x-snappy-framed
214 application/x-sprite spr|sprite
215 application/x-sqlite3
216 application/x-stargallery-thm
217 application/x-stuffit sit
218 application/x-sv4cpio sv4cpio
219 application/x-sv4crc sv4crc
220 application/x-tar tar
221 application/x-tbook sbk|tbk
222 application/x-terminfo
223 application/x-terminfo2
224 application/x-texinfo texi|texinfo
225 application/x-tex-tfm tfm
226 application/x-ustar ustar
229 application/x-vnd.ls-xpix xpix
230 application/x-vrml vrml
231 application/x-wais-source src|wsrc
232 application/x-wine-extension-ini
233 application/x-wintalk wtk
234 application/x-world svr
235 application/x-wri wri
236 application/x-x509-ca-cert der
237 application/x-xz xz
238 application/x-zip
239 application/x-zstd zst
240 application/zip zip
241 application/zlib z
242 !audio/basic au
243 audio/it it
244 audio/make funk|my|pfunk
245 audio/midi kar
246 audio/mid rmi
247 audio/mp4 m4b
248 audio/mpeg m2a|mpa
249 audio/ogg ogg
250 audio/s3m s3m
252 audio/tsplayer tsp
253 audio/vnd.qcelp qcp
254 audio/voxware vox
255 audio/x-aiff aiff|aif
256 audio/x-flac flac
257 audio/x-gsm gsd|gsm
258 audio/x-hx-aac-adts
259 audio/x-jam jam
260 audio/x-liveaudio lam
261 audio/x-m4a m4a
269 audio/x-pn-realaudio ram|rm|rmm|rmp
270 audio/x-psid sid
271 audio/x-realaudio ra
272 audio/x-s3m
273 audio/x-twinvq-plugin vqe|vql
274 audio/x-twinvq vqf
275 audio/x-voc voc
276 audio/x-wav wav
277 !audio/x-xbox360-executable xex
278 !audio/x-xbox-executable xbe
279 font/otf
280 font/sfnt
281 font/woff2 woff2
282 font/woff woff
283 image/bmp
284 image/cmu-raster rast
285 image/fif fif
286 image/florian flo|turbot
287 image/g3fax g3
288 image/gif gif
289 image/heic heic
290 image/ief ief|iefs
291 image/jpeg jfif|jfif-tbnl|jpe|jpeg|jpg
292 image/jutvision jut
295 image/png png|x-png
296 !image/svg svg
297 !image/svg+xml
298 image/tiff
299 !image/vnd.adobe.photoshop psd
300 !image/vnd.djvu djvu
301 image/vnd.fpx fpx
302 image/vnd.microsoft.icon
303 image/vnd.rn-realflash rf
305 image/vnd.wap.wbmp wbmp
306 image/vnd.xiff xif
307 image/webp webp
308 image/wmf
309 image/x-3ds 3ds
310 image/x-award-bioslogo
311 image/x-cmu-raster ras
312 image/x-cur tga
313 image/x-dwg dwg|dxf|svf
314 image/x-eps
315 image/x-exr exr
316 image/x-gem
317 image/x-icns
318 !image/x-icon ico
319 image/x-jg art
327 image/x-portable-pixmap ppm
328 image/x-quicktime qif|qti|qtif
329 image/x-rgb rgb
330 image/x-tga
331 image/x-tiff tif|tiff
332 image/tiff image/x-win-bitmap
333 !image/x-xcf xcf
334 !image/x-xpixmap xpm
335 image/x-xwindowdump xwd
336 message/news
337 message/rfc822 mht|mhtml|mime
338 model/vnd.dwf dwf
339 model/vnd.gdl gdl
340 model/vnd.gs.gdl gdsl
341 model/vrml wrz
342 model/x-pov pov
343 text/asp asp
344 text/css css
text/x-sass sass
text/x-scss scss
345 text/html acgi|htm|html|htmls|htx|shtml
346 text/javascript js
347 text/mcf mcf
348 text/pascal pas
349 text/plain text/PGP com|cmd|conf|def|g|idc|list|lst|mar|sdml|text|txt|md|groovy|license|properties|desktop|ini|rst|cmake|ipynb|readme|less|lo|go|yml|d|cs|hpp|srt
350 text/plain com|cmd|conf|def|g|idc|list|lst|mar|sdml|text|txt|md|groovy|license|properties|desktop|ini|rst|cmake|ipynb|readme|less|lo|go|yml|d|cs|hpp|srt|nfo|sfv|m3u|csv|eml
351 text/richtext rt|rtf|rtx
352 text/rtf
353 text/scriplet wsc
text/x-awk awk
!video/x-jng jng
video/x-mng mng
image/x-cur tga
image/x-xwindowdump xwd
!image/vnd.adobe.photoshop psd
354 text/tab-separated-values tsv
355 text/troff man|me|ms|roff|t|tr
356 text/uri-list uni|unis|uri|uris uji|unis|uri|uris
357 text/vnd.abc abc
358 text/vnd.fmi.flexstor flx
359 text/vnd.wap.wmlscript wmls
362 text/x-Algol68
363 text/x-asm asm|s
364 text/x-audiosoft-intra aip
365 text/x-awk awk
366 text/x-bcpl
367 text/x-c c|cc|h
368 text/x-c++ cpp|cxx|c++
377 text/xml xml|pom|iml|plist
378 text/x-m m
379 text/x-msdos-batch bat
380 text/x-ms-regedit reg
381 text/x-objective-c
382 text/x-pascal p
383 text/x-perl pl
384 text/x-php php
385 text/x-po po
386 text/x-python py
387 text/x-ruby rb
388 text/x-sass sass
389 text/x-scss scss
390 text/x-server-parsed-html ssi
391 text/x-setext etx
392 text/x-sgml sgm|sgml
393 text/x-shellscript sh
394 text/x-speech talk
395 text/x-tcl
396 text/x-tex tex
397 text/x-uil uil
398 text/x-uuencode uue
399 text/x-vcalendar vcs
400 text/x-vcard vcf
401 video/animaflex afl
402 video/avi avi
403 video/avs-video avs
404 video/MP2T
405 video/mp4 mp4
406 video/mpeg m1v|m2v|mpe|mpeg|mpg
407 video/quicktime moov|mov|qt
416 video/x-dl dl
417 video/x-dv dif|dv
418 video/x-fli fli
419 video/x-flv flv
420 video/x-isvideo isu
421 !video/x-jng jng
422 video/x-m4v m4v
423 video/x-matroska mkv
424 video/x-mng mng
425 video/x-motion-jpeg mjpg
426 video/x-ms-asf asf|asx asf|asx|wmv
427 video/x-msvideo divx
428 video/x-qtc qtc
429 video/x-sgi-movie movie|mv
430 application/x-7z-compressed x-epoc/x-sisx-app 7z
431 application/vnd.openxmlformats-officedocument.wordprocessingml.document application/x-zstd-dictionary docx
432 text/x-po application/vnd.ms-outlook po msg
433 application/x-rpm image/x-olympus-orf rpm orf
434 application/x-debian-package image/x-nikon-nef deb nef
435 application/vnd.iccprofile image/x-fuji-raf icm raf
436 application/dicom image/x-panasonic-raw dcm rw2|raw
437 image/x-exr image/x-adobe-dng exr dng
438 application/vnd.iccprofile image/x-canon-cr2 icm cr2
439 video/x-matroska image/x-canon-crw mkv crw
440 application/x-empty image/x-dcraw
441 model/vnd.gdl image/x-kodak-dcr gdl dcr
442 model/vnd.gs.gdl image/x-kodak-k25 gdsl k25
443 font/woff image/x-kodak-kdc woff kdc
444 font/woff2 image/x-minolta-mrw woff2 mrw
445 application/epub+zip image/x-pentax-pef epub pef
446 application/x-mobipocket-ebook image/x-sigma-x3f mobi xf3
447 audio/x-flac image/x-sony-arw flac arw
448 application/x-rar image/x-sony-sr2 rar sr2
449 video/x-msvideo image/x-sony-srf divx srf
450 video/x-flv image/x-epson-erf flv erf
application/x-kdelnk
text/x-tcl
application/ogg ogv
application/vnd.openxmlformats-officedocument.spreadsheetml.sheet xlsx
application/vnd.ms-cab-compressed cab
audio/mp4 m4b
!image/vnd.djvu djvu
application/x-ms-reader lit
application/CDFV2-corrupt
text/x-vcard vcf
application/x-innosetup
application/winhelp hlp
image/x-tga
application/x-wine-extension-ini

View File

@@ -12,18 +12,19 @@ major_mime = {
"audio": 7,
"image": 8,
"text": 9,
"application": 10
"application": 10,
"x-epoc": 11,
}
pdf = (
"application/pdf",
"application/x-cbr",
"application/x-cbz",
"application/epub+zip",
"application/vnd.ms-xpsdocument",
)
font = (
"application/vnd.ms-opentype",
"application/x-ms-compress-szdd"
"application/x-font-sfn",
"application/x-font-ttf",
"font/otf",
@@ -32,6 +33,68 @@ font = (
"font/woff2"
)
# Archive "formats"
archive = (
"application/x-tar",
"application/zip",
"application/x-rar",
"application/x-arc",
"application/x-warc",
"application/x-7z-compressed",
)
# Archive "filters"
arc_filter = (
"application/gzip",
"application/x-bzip2",
"application/x-xz",
"application/x-zstd",
"application/x-lzma",
"application/x-lz4",
"application/x-lzip",
"application/x-lzop",
)
doc = (
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
"application/vnd.openxmlformats-officedocument.presentationml.presentation"
)
mobi = (
"application/x-mobipocket-ebook",
"application/vnd.amazon.mobi8-ebook"
)
markup = (
"text/xml",
"text/html",
"text/x-sgml"
)
raw = (
"image/x-olympus-orf",
"image/x-nikon-nef",
"image/x-fuji-raf",
"image/x-panasonic-raw",
"image/x-adobe-dng",
"image/x-canon-cr2",
"image/x-canon-crw",
"image/x-dcraw",
"image/x-kodak-dcr",
"image/x-kodak-k25",
"image/x-kodak-kdc",
"image/x-minolta-mrw",
"image/x-pentax-pef",
"image/x-sigma-x3f",
"image/x-sony-arw",
"image/x-sony-sr2",
"image/x-sony-srf",
"image/x-minolta-mrw",
"image/x-pentax-pef",
"image/x-epson-erf",
)
cnt = 1
@@ -46,6 +109,18 @@ def mime_id(mime):
mime_id += " | 0x40000000"
elif mime in font:
mime_id += " | 0x20000000"
elif mime in archive:
mime_id += " | 0x10000000"
elif mime in arc_filter:
mime_id += " | 0x08000000"
elif mime in doc:
mime_id += " | 0x04000000"
elif mime in mobi:
mime_id += " | 0x02000000"
elif mime in markup:
mime_id += " | 0x01000000"
elif mime in raw:
mime_id += " | 0x00800000"
elif mime == "application/x-empty":
return "1"
return mime_id
@@ -55,7 +130,7 @@ def clean(t):
return t.replace("/", "_").replace(".", "_").replace("+", "_").replace("-", "_")
with open("mime.csv") as f:
with open("scripts/mime.csv") as f:
for l in f:
mime, ext_list = l.split(",")
if l.startswith("!"):
@@ -67,11 +142,11 @@ with open("mime.csv") as f:
print("// **Generated by mime.py**")
print("#ifndef MIME_GENERATED_C")
print("#define MIME_GENERATED_C")
print("#include <glib-2.0/glib.h>\n")
print("#include <glib.h>\n")
print("#include <stdlib.h>\n")
# Enum
print("enum mime {")
for mime, ext in mimes.items():
for mime, ext in sorted(mimes.items()):
print(" " + clean(mime) + "=" + mime_id(mime) + ",")
print("};")

View File

@@ -1,9 +1,12 @@
files = [
"web/css/bundle.css",
"web/js/bundle.js",
"web/img/bg-bars.png",
"web/img/sprite-skin-flat.png",
"web/search.html",
"src/static/css/bundle.css",
"src/static/css/bundle_dark.css",
"src/static/js/bundle.js",
"src/static/js/search.js",
"src/static/img/sprite-skin-flat.png",
"src/static/img/sprite-skin-flat-dark.png",
"src/static/search.html",
"src/static/stats.html",
]

330
src/cli.c
View File

@@ -1,30 +1,70 @@
#include "cli.h"
#include "ctx.h"
#include <tesseract/capi.h>
#define DEFAULT_OUTPUT "index.sist2/"
#define DEFAULT_CONTENT_SIZE 4096
#define DEFAULT_QUALITY 15
#define DEFAULT_SIZE 200
#define DEFAULT_CONTENT_SIZE 32768
#define DEFAULT_QUALITY 5
#define DEFAULT_SIZE 500
#define DEFAULT_REWRITE_URL ""
#define DEFAULT_ES_URL "http://localhost:9200"
#define DEFAULT_BATCH_SIZE 100
#define DEFAULT_BIND_ADDR "localhost"
#define DEFAULT_PORT "4090"
#define DEFAULT_LISTEN_ADDRESS "localhost:4090"
#define DEFAULT_TREEMAP_THRESHOLD 0.0005
#define DEFAULT_MAX_MEM_BUFFER 2000
const char *TESS_DATAPATHS[] = {
"/usr/share/tessdata/",
"/usr/share/tesseract-ocr/tessdata/",
"./",
NULL
};
scan_args_t *scan_args_create() {
scan_args_t *args = calloc(sizeof(scan_args_t), 1);
args->depth = -1;
return args;
}
index_args_t *index_args_create() {
index_args_t *args = calloc(sizeof(index_args_t), 1);
exec_args_t *exec_args_create() {
exec_args_t *args = calloc(sizeof(exec_args_t), 1);
return args;
}
web_args_t *web_args_create() {
web_args_t *args = calloc(sizeof(web_args_t), 1);
return args;
void scan_args_destroy(scan_args_t *args) {
if (args->name != NULL) {
free(args->name);
}
if (args->incremental != NULL) {
free(args->incremental);
}
if (args->path != NULL) {
free(args->path);
}
if (args->output != NULL) {
free(args->output);
}
free(args);
}
void index_args_destroy(index_args_t *args) {
//todo
free(args);
}
void web_args_destroy(web_args_t *args) {
//todo
free(args);
}
void exec_args_destroy(exec_args_t *args) {
free(args);
}
int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
@@ -35,17 +75,17 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
char *abs_path = abspath(argv[1]);
if (abs_path == NULL) {
fprintf(stderr, "File not found: %s", argv[1]);
fprintf(stderr, "File not found: %s\n", argv[1]);
return 1;
} else {
args->path = abs_path;
}
if (args->incremental != NULL) {
abs_path = abspath(args->incremental);
args->incremental = abspath(args->incremental);
if (abs_path == NULL) {
fprintf(stderr, "File not found: %s", args->incremental);
return 1;
sist_log("main.c", SIST_WARNING, "Could not open original index! Disabled incremental scan feature.");
args->incremental = NULL;
}
}
@@ -58,16 +98,13 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
if (args->size == 0) {
args->size = DEFAULT_SIZE;
} else if (args->size <= 0) {
fprintf(stderr, "Invalid size: %d\n", args->size);
} else if (args->size > 0 && args->size < 32) {
printf("Invalid size: %d\n", args->content_size);
return 1;
}
if (args->content_size == 0) {
args->content_size = DEFAULT_CONTENT_SIZE;
} else if (args->content_size <= 0) {
fprintf(stderr, "Invalid content-size: %d\n", args->content_size);
return 1;
}
if (args->threads == 0) {
@@ -90,6 +127,12 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
return 1;
}
if (args->depth <= 0) {
args->depth = G_MAXINT32;
} else {
args->depth += 1;
}
if (args->name == NULL) {
args->name = g_path_get_basename(args->output);
}
@@ -97,32 +140,174 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
if (args->rewrite_url == NULL) {
args->rewrite_url = DEFAULT_REWRITE_URL;
}
if (args->archive == NULL || strcmp(args->archive, "recurse") == 0) {
args->archive_mode = ARC_MODE_RECURSE;
} else if (strcmp(args->archive, "list") == 0) {
args->archive_mode = ARC_MODE_LIST;
} else if (strcmp(args->archive, "shallow") == 0) {
args->archive_mode = ARC_MODE_SHALLOW;
} else if (strcmp(args->archive, "skip") == 0) {
args->archive_mode = ARC_MODE_SKIP;
} else {
fprintf(stderr, "Archive mode must be one of (skip, list, shallow, recurse), got '%s'", args->archive);
return 1;
}
if (args->tesseract_lang != NULL) {
TessBaseAPI *api = TessBaseAPICreate();
char filename[128];
sprintf(filename, "%s.traineddata", args->tesseract_lang);
const char *path = find_file_in_paths(TESS_DATAPATHS, filename);
if (path == NULL) {
LOG_FATAL("cli.c", "Could not find tesseract language file!");
}
ret = TessBaseAPIInit3(api, path, args->tesseract_lang);
if (ret != 0) {
fprintf(stderr, "Could not initialize tesseract with lang '%s'\n", args->tesseract_lang);
return 1;
}
TessBaseAPIEnd(api);
TessBaseAPIDelete(api);
args->tesseract_path = path;
}
if (args->exclude_regex != NULL) {
const char *error;
int error_offset;
pcre *re = pcre_compile(args->exclude_regex, 0, &error, &error_offset, 0);
if (error != NULL) {
LOG_FATALF("cli.c", "pcre_compile returned error: %s (offset:%d)", error, error_offset)
}
pcre_extra *re_extra = pcre_study(re, 0, &error);
if (error != NULL) {
LOG_FATALF("cli.c", "pcre_study returned error: %s", error)
}
ScanCtx.exclude = re;
ScanCtx.exclude_extra = re_extra;
} else {
ScanCtx.exclude = NULL;
}
if (args->treemap_threshold_str == 0) {
args->treemap_threshold = DEFAULT_TREEMAP_THRESHOLD;
} else {
args->treemap_threshold = atof(args->treemap_threshold_str);
}
if (args->max_memory_buffer == 0) {
args->max_memory_buffer = DEFAULT_MAX_MEM_BUFFER;
}
LOG_DEBUGF("cli.c", "arg quality=%f", args->quality)
LOG_DEBUGF("cli.c", "arg size=%d", args->size)
LOG_DEBUGF("cli.c", "arg content_size=%d", args->content_size)
LOG_DEBUGF("cli.c", "arg threads=%d", args->threads)
LOG_DEBUGF("cli.c", "arg incremental=%s", args->incremental)
LOG_DEBUGF("cli.c", "arg output=%s", args->output)
LOG_DEBUGF("cli.c", "arg rewrite_url=%s", args->rewrite_url)
LOG_DEBUGF("cli.c", "arg name=%s", args->name)
LOG_DEBUGF("cli.c", "arg depth=%d", args->depth)
LOG_DEBUGF("cli.c", "arg path=%s", args->path)
LOG_DEBUGF("cli.c", "arg archive=%s", args->archive)
LOG_DEBUGF("cli.c", "arg tesseract_lang=%s", args->tesseract_lang)
LOG_DEBUGF("cli.c", "arg tesseract_path=%s", args->tesseract_path)
LOG_DEBUGF("cli.c", "arg exclude=%s", args->exclude_regex)
LOG_DEBUGF("cli.c", "arg fast=%d", args->fast)
LOG_DEBUGF("cli.c", "arg treemap_threshold=%f", args->treemap_threshold)
LOG_DEBUGF("cli.c", "arg max_memory_buffer=%d", args->max_memory_buffer)
return 0;
}
int load_script(const char *script_path, char **dst) {
struct stat info;
int res = stat(script_path, &info);
if (res == -1) {
fprintf(stderr, "Error opening script file '%s': %s\n", script_path, strerror(errno));
return 1;
}
int fd = open(script_path, O_RDONLY);
if (fd == -1) {
fprintf(stderr, "Error opening script file '%s': %s\n", script_path, strerror(errno));
return 1;
}
*dst = malloc(info.st_size + 1);
res = read(fd, *dst, info.st_size);
if (res < 0) {
fprintf(stderr, "Error reading script file '%s': %s\n", script_path, strerror(errno));
return 1;
}
*(*dst + info.st_size) = '\0';
close(fd);
return 0;
}
int index_args_validate(index_args_t *args, int argc, const char **argv) {
LogCtx.verbose = 1;
if (argc < 2) {
fprintf(stderr, "Required positional argument: PATH.\n");
return 1;
}
if (args->threads == 0) {
args->threads = 1;
} else if (args->threads < 0) {
fprintf(stderr, "Invalid threads: %d\n", args->threads);
return 1;
}
char *index_path = abspath(argv[1]);
if (index_path == NULL) {
fprintf(stderr, "File not found: %s", argv[1]);
fprintf(stderr, "File not found: %s\n", argv[1]);
return 1;
} else {
args->index_path = argv[1];
free(index_path);
}
if (args->es_url == NULL) {
args->es_url = DEFAULT_ES_URL;
}
if (args->script_path != NULL) {
if (load_script(args->script_path, &args->script) != 0) {
return 1;
}
}
if (args->batch_size == 0) {
args->batch_size = DEFAULT_BATCH_SIZE;
}
LOG_DEBUGF("cli.c", "arg es_url=%s", args->es_url)
LOG_DEBUGF("cli.c", "arg index_path=%s", args->index_path)
LOG_DEBUGF("cli.c", "arg script_path=%s", args->script_path)
LOG_DEBUGF("cli.c", "arg script=%s", args->script)
LOG_DEBUGF("cli.c", "arg print=%d", args->print)
LOG_DEBUGF("cli.c", "arg batch_size=%d", args->batch_size)
LOG_DEBUGF("cli.c", "arg force_reset=%d", args->force_reset)
return 0;
}
int web_args_validate(web_args_t *args, int argc, const char **argv) {
LogCtx.verbose = 1;
if (argc < 2) {
fprintf(stderr, "Required positional argument: PATH.\n");
return 1;
@@ -132,12 +317,53 @@ int web_args_validate(web_args_t *args, int argc, const char **argv) {
args->es_url = DEFAULT_ES_URL;
}
if (args->bind == NULL) {
args->bind = DEFAULT_BIND_ADDR;
if (args->listen_address == NULL) {
args->listen_address = DEFAULT_LISTEN_ADDRESS;
}
if (args->port == NULL) {
args->port = DEFAULT_PORT;
if (args->credentials != NULL) {
char *ptr = strstr(args->credentials, ":");
if (ptr == NULL) {
fprintf(stderr, "Invalid --auth format, see usage\n");
return 1;
}
strncpy(args->auth_user, args->credentials, (ptr - args->credentials));
strcpy(args->auth_pass, ptr + 1);
if (strlen(args->auth_user) == 0) {
fprintf(stderr, "--auth username must be at least one character long");
return 1;
}
args->auth_enabled = TRUE;
} else {
args->auth_enabled = FALSE;
}
if (args->tag_credentials != NULL && args->credentials != NULL) {
fprintf(stderr, "--auth and --tag-auth are mutually exclusive");
return 1;
}
if (args->tag_credentials != NULL) {
char *ptr = strstr(args->tag_credentials, ":");
if (ptr == NULL) {
fprintf(stderr, "Invalid --tag-auth format, see usage\n");
return 1;
}
strncpy(args->auth_user, args->tag_credentials, (ptr - args->tag_credentials));
strcpy(args->auth_pass, ptr + 1);
if (strlen(args->auth_user) == 0) {
fprintf(stderr, "--tag-auth username must be at least one character long");
return 1;
}
args->tag_auth_enabled = TRUE;
} else {
args->tag_auth_enabled = FALSE;
}
args->index_count = argc - 1;
@@ -146,10 +372,64 @@ int web_args_validate(web_args_t *args, int argc, const char **argv) {
for (int i = 0; i < args->index_count; i++) {
char *abs_path = abspath(args->indices[i]);
if (abs_path == NULL) {
fprintf(stderr, "File not found: %s", abs_path);
fprintf(stderr, "File not found: %s\n", args->indices[i]);
return 1;
}
}
LOG_DEBUGF("cli.c", "arg es_url=%s", args->es_url)
LOG_DEBUGF("cli.c", "arg listen=%s", args->listen_address)
LOG_DEBUGF("cli.c", "arg credentials=%s", args->credentials)
LOG_DEBUGF("cli.c", "arg tag_credentials=%s", args->tag_credentials)
LOG_DEBUGF("cli.c", "arg auth_user=%s", args->auth_user)
LOG_DEBUGF("cli.c", "arg auth_pass=%s", args->auth_pass)
LOG_DEBUGF("cli.c", "arg index_count=%d", args->index_count)
for (int i = 0; i < args->index_count; i++) {
LOG_DEBUGF("cli.c", "arg indices[%d]=%s", i, args->indices[i])
}
return 0;
}
index_args_t *index_args_create() {
index_args_t *args = calloc(sizeof(index_args_t), 1);
return args;
}
web_args_t *web_args_create() {
web_args_t *args = calloc(sizeof(web_args_t), 1);
return args;
}
int exec_args_validate(exec_args_t *args, int argc, const char **argv) {
if (argc < 2) {
fprintf(stderr, "Required positional argument: PATH.\n");
return 1;
}
char *index_path = abspath(argv[1]);
if (index_path == NULL) {
fprintf(stderr, "File not found: %s\n", argv[1]);
return 1;
} else {
args->index_path = argv[1];
free(index_path);
}
if (args->es_url == NULL) {
args->es_url = DEFAULT_ES_URL;
}
if (args->script_path == NULL) {
LOG_FATAL("cli.c", "--script-file argument is required");
}
if (load_script(args->script_path, &args->script) != 0) {
return 1;
}
LOG_DEBUGF("cli.c", "arg script_path=%s", args->script_path)
LOG_DEBUGF("cli.c", "arg script=%s", args->script)
return 0;
}

View File

@@ -3,6 +3,8 @@
#include "sist.h"
#include "libscan/arc/arc.h"
typedef struct scan_args {
float quality;
int size;
@@ -12,30 +14,72 @@ typedef struct scan_args {
char *output;
char *rewrite_url;
char *name;
int depth;
char *path;
char *archive;
archive_mode_t archive_mode;
char *tesseract_lang;
const char *tesseract_path;
char *exclude_regex;
int fast;
const char* treemap_threshold_str;
double treemap_threshold;
int max_memory_buffer;
} scan_args_t;
scan_args_t *scan_args_create();
void scan_args_destroy(scan_args_t *args);
int scan_args_validate(scan_args_t *args, int argc, const char **argv);
typedef struct index_args {
char *es_url;
const char *index_path;
const char *script_path;
char *script;
int print;
int batch_size;
int force_reset;
int threads;
} index_args_t;
typedef struct web_args {
char *es_url;
char *bind;
char *port;
char *listen_address;
char *credentials;
char *tag_credentials;
char auth_user[256];
char auth_pass[256];
int auth_enabled;
int tag_auth_enabled;
int index_count;
const char **indices;
} web_args_t;
scan_args_t *scan_args_create();
typedef struct exec_args {
char *es_url;
const char *index_path;
const char *script_path;
char *script;
} exec_args_t;
index_args_t *index_args_create();
void index_args_destroy(index_args_t *args);
web_args_t *web_args_create();
int scan_args_validate(scan_args_t *args, int argc, const char **argv);
void web_args_destroy(web_args_t *args);
int index_args_validate(index_args_t *args, int argc, const char **argv);
int web_args_validate(web_args_t *args, int argc, const char **argv);
exec_args_t *exec_args_create();
void exec_args_destroy(exec_args_t *args);
int exec_args_validate(exec_args_t *args, int argc, const char **argv);
#endif

6
src/ctx.c Normal file
View File

@@ -0,0 +1,6 @@
#include "ctx.h"
ScanCtx_t ScanCtx;
WebCtx_t WebCtx;
IndexCtx_t IndexCtx;
LogCtx_t LogCtx;

View File

@@ -2,8 +2,23 @@
#define SIST2_CTX_H
#include "sist.h"
#include "tpool.h"
#include "libscan/scan.h"
#include "libscan/arc/arc.h"
#include "libscan/comic/comic.h"
#include "libscan/ebook/ebook.h"
#include "libscan/font/font.h"
#include "libscan/media/media.h"
#include "libscan/ooxml/ooxml.h"
#include "libscan/text/text.h"
#include "libscan/mobi/scan_mobi.h"
#include "libscan/raw/raw.h"
#include "src/io/store.h"
struct {
#include <glib.h>
#include <pcre.h>
typedef struct {
struct index_t index;
GHashTable *mime_table;
@@ -11,10 +26,8 @@ struct {
tpool_t *pool;
int tn_size;
int threads;
int content_size;
float tn_qscale;
int depth;
size_t stat_tn_size;
size_t stat_index_size;
@@ -22,20 +35,49 @@ struct {
GHashTable *original_table;
GHashTable *copy_table;
pthread_mutex_t mupdf_mu;
} ScanCtx;
pcre *exclude;
pcre_extra *exclude_extra;
int fast;
scan_arc_ctx_t arc_ctx;
scan_comic_ctx_t comic_ctx;
scan_ebook_ctx_t ebook_ctx;
scan_font_ctx_t font_ctx;
scan_media_ctx_t media_ctx;
scan_ooxml_ctx_t ooxml_ctx;
scan_text_ctx_t text_ctx;
scan_mobi_ctx_t mobi_ctx;
scan_raw_ctx_t raw_ctx;
} ScanCtx_t;
struct {
typedef struct {
int verbose;
int very_verbose;
int no_color;
} LogCtx_t;
typedef struct {
char *es_url;
} IndexCtx;
int batch_size;
tpool_t *pool;
store_t *tag_store;
GHashTable *tags;
} IndexCtx_t;
struct {
typedef struct {
char *es_url;
int index_count;
struct index_t indices[16];
} WebCtx;
char *auth_user;
char *auth_pass;
int auth_enabled;
int tag_auth_enabled;
struct index_t indices[64];
} WebCtx_t;
extern ScanCtx_t ScanCtx;
extern WebCtx_t WebCtx;
extern IndexCtx_t IndexCtx;
extern LogCtx_t LogCtx;
#endif

View File

@@ -1,16 +1,10 @@
#include "elastic.h"
#include "src/ctx.h"
#include <stdlib.h>
#include "web.h"
#include <stdio.h>
#include <string.h>
#include <cJSON/cJSON.h>
#include <src/ctx.h>
#include "static_generated.c"
#define BULK_INDEX_SIZE 100
typedef struct es_indexer {
int queued;
@@ -20,7 +14,18 @@ typedef struct es_indexer {
} es_indexer_t;
static es_indexer_t *Indexer;
static __thread es_indexer_t *Indexer;
void delete_queue(int max);
void elastic_flush();
void elastic_cleanup() {
elastic_flush();
if (Indexer != NULL) {
free(Indexer->es_url);
free(Indexer);
}
}
void print_json(cJSON *document, const char uuid_str[UUID_STR_LEN]) {
@@ -29,17 +34,22 @@ void print_json(cJSON *document, const char uuid_str[UUID_STR_LEN]) {
cJSON_AddStringToObject(line, "_id", uuid_str);
cJSON_AddStringToObject(line, "_index", "sist2");
cJSON_AddStringToObject(line, "_type", "_doc");
cJSON_AddItemToObject(line, "_source", document);
cJSON_AddItemReferenceToObject(line, "_source", document);
char *json = cJSON_PrintUnformatted(line);
printf("%s\n", json);
cJSON_free(line);
cJSON_free(json);
cJSON_Delete(line);
}
void index_json_func(void *arg) {
es_bulk_line_t *line = arg;
elastic_index_line(line);
}
void index_json(cJSON *document, const char uuid_str[UUID_STR_LEN]) {
char *json = cJSON_PrintUnformatted(document);
size_t json_len = strlen(json);
@@ -51,7 +61,188 @@ void index_json(cJSON *document, const char uuid_str[UUID_STR_LEN]) {
bulk_line->next = NULL;
cJSON_free(json);
elastic_index_line(bulk_line);
tpool_add_work(IndexCtx.pool, index_json_func, bulk_line);
}
void execute_update_script(const char *script, const char index_id[UUID_STR_LEN]) {
if (Indexer == NULL) {
Indexer = create_indexer(IndexCtx.es_url);
}
cJSON *body = cJSON_CreateObject();
cJSON *script_obj = cJSON_AddObjectToObject(body, "script");
cJSON_AddStringToObject(script_obj, "lang", "painless");
cJSON_AddStringToObject(script_obj, "source", script);
cJSON *query = cJSON_AddObjectToObject(body, "query");
cJSON *term_obj = cJSON_AddObjectToObject(query, "term");
cJSON_AddStringToObject(term_obj, "index", index_id);
char *str = cJSON_Print(body);
char bulk_url[4096];
snprintf(bulk_url, 4096, "%s/sist2/_update_by_query?wait_for_completion=false", Indexer->es_url);
response_t *r = web_post(bulk_url, str);
LOG_INFOF("elastic.c", "Executed user script <%d>", r->status_code);
cJSON *resp = cJSON_Parse(r->body);
cJSON_free(str);
cJSON_Delete(body);
free_response(r);
cJSON *error = cJSON_GetObjectItem(resp, "error");
if (error != NULL) {
char *error_str = cJSON_Print(error);
LOG_ERRORF("elastic.c", "User script error: \n%s", error_str);
cJSON_free(error_str);
}
cJSON_Delete(resp);
}
#define ACTION_STR_LEN 91
void *create_bulk_buffer(int max, int *count, size_t *buf_len) {
es_bulk_line_t *line = Indexer->line_head;
*count = 0;
size_t buf_size = 0;
size_t buf_cur = 0;
char *buf = malloc(8196);
size_t buf_capacity = 8196;
while (line != NULL && *count < max) {
char action_str[256];
snprintf(action_str, 256,
"{\"index\":{\"_id\":\"%s\", \"_type\":\"_doc\", \"_index\":\"sist2\"}}\n", line->uuid_str);
size_t line_len = strlen(line->line);
while (buf_size + line_len + ACTION_STR_LEN > buf_capacity) {
buf_capacity *= 2;
buf = realloc(buf, buf_capacity);
}
buf_size += line_len + ACTION_STR_LEN;
memcpy(buf + buf_cur, action_str, ACTION_STR_LEN);
buf_cur += ACTION_STR_LEN;
memcpy(buf + buf_cur, line->line, line_len);
buf_cur += line_len;
line = line->next;
(*count)++;
}
if (buf_size + 1 > buf_capacity) {
buf = realloc(buf, buf_capacity + 1);
}
*(buf + buf_cur) = '\0';
*buf_len = buf_cur;
return buf;
}
void print_errors(response_t *r) {
char *tmp = malloc(r->size + 1);
memcpy(tmp, r->body, r->size);
*(tmp + r->size) = '\0';
cJSON *ret_json = cJSON_Parse(tmp);
if (cJSON_GetObjectItem(ret_json, "errors")->valueint != 0) {
cJSON *err;
cJSON_ArrayForEach(err, cJSON_GetObjectItem(ret_json, "items")) {
if (cJSON_GetObjectItem(cJSON_GetObjectItem(err, "index"), "status")->valueint != 201) {
char *str = cJSON_Print(err);
LOG_ERRORF("elastic.c", "%s\n", str);
cJSON_free(str);
}
}
}
cJSON_Delete(ret_json);
free(tmp);
}
void _elastic_flush(int max) {
if (max == 0) {
LOG_WARNING("elastic.c", "calling _elastic_flush with 0 in queue")
return;
}
size_t buf_len;
int count;
void *buf = create_bulk_buffer(max, &count, &buf_len);
char bulk_url[4096];
snprintf(bulk_url, 4096, "%s/sist2/_bulk?pipeline=tie", Indexer->es_url);
response_t *r = web_post(bulk_url, buf);
if (r->status_code == 0) {
LOG_FATALF("elastic.c", "Could not connect to %s, make sure that elasticsearch is running!\n", IndexCtx.es_url)
}
if (r->status_code == 413) {
if (max <= 1) {
LOG_ERRORF("elastic.c", "Single document too large, giving up: {%s}", Indexer->line_head->uuid_str)
free_response(r);
free(buf);
delete_queue(1);
if (Indexer->queued != 0) {
elastic_flush();
}
return;
}
LOG_WARNINGF("elastic.c", "Payload too large, retrying (%d documents)", count);
free_response(r);
free(buf);
_elastic_flush(max / 2);
return;
} else if (r->status_code == 429) {
free_response(r);
free(buf);
LOG_WARNING("elastic.c", "Got 429 status, will retry after delay")
usleep(1000000 * 20);
_elastic_flush(max);
return;
} else if (r->status_code != 200) {
print_errors(r);
delete_queue(Indexer->queued);
} else {
print_errors(r);
LOG_INFOF("elastic.c", "Indexed %d documents (%zukB) <%d>", count, buf_len / 1024, r->status_code);
delete_queue(max);
if (Indexer->queued != 0) {
elastic_flush();
}
}
free_response(r);
free(buf);
}
void delete_queue(int max) {
for (int i = 0; i < max; i++) {
es_bulk_line_t *tmp = Indexer->line_head;
Indexer->line_head = tmp->next;
if (Indexer->line_head == NULL) {
Indexer->line_tail = NULL;
}
free(tmp);
Indexer->queued -= 1;
}
}
void elastic_flush() {
@@ -60,45 +251,7 @@ void elastic_flush() {
Indexer = create_indexer(IndexCtx.es_url);
}
es_bulk_line_t *line = Indexer->line_head;
int count = 0;
size_t buf_size = 0;
size_t buf_cur = 0;
char *buf = malloc(1);
while (line != NULL) {
char action_str[512];
snprintf(action_str, 512, "{\"index\":{\"_id\": \"%s\"}}\n", line->uuid_str);
size_t action_str_len = strlen(action_str);
size_t line_len = strlen(line->line);
buf = realloc(buf, buf_size + line_len + action_str_len);
buf_size += line_len + action_str_len;
memcpy(buf + buf_cur, action_str, action_str_len);
buf_cur += action_str_len;
memcpy(buf + buf_cur, line->line, line_len);
buf_cur += line_len;
es_bulk_line_t *tmp = line;
line = line->next;
free(tmp);
count++;
}
buf = realloc(buf, buf_size + 1);
*(buf+buf_cur) = '\0';
Indexer->line_head = NULL;
Indexer->line_tail = NULL;
Indexer->queued = 0;
char bulk_url[4096];
snprintf(bulk_url, 4096, "%s/sist2/_bulk", Indexer->es_url);
response_t *r = web_post(bulk_url, buf, "Content-Type: application/x-ndjson");
printf("Indexed %3d documents (%zukB) <%d>\n", count, buf_cur / 1024, r->status_code);
free_response(r);
_elastic_flush(Indexer->queued);
}
void elastic_index_line(es_bulk_line_t *line) {
@@ -117,15 +270,14 @@ void elastic_index_line(es_bulk_line_t *line) {
Indexer->queued += 1;
if (Indexer->queued >= BULK_INDEX_SIZE) {
if (Indexer->queued >= IndexCtx.batch_size) {
elastic_flush();
}
}
es_indexer_t *create_indexer(const char *url) {
size_t url_len = strlen(url);
char *es_url = malloc(url_len);
char *es_url = malloc(strlen(url) + 1);
strcpy(es_url, url);
es_indexer_t *indexer = malloc(sizeof(es_indexer_t));
@@ -138,24 +290,34 @@ es_indexer_t *create_indexer(const char *url) {
return indexer;
}
void destroy_indexer() {
void finish_indexer(char *script, char *index_id) {
char url[4096];
snprintf(url, sizeof(url), "%s/sist2/_refresh", IndexCtx.es_url);
response_t *r = web_post(url, "", NULL);
printf("Refresh index <%d>\n", r->status_code);
response_t *r = web_post(url, "");
LOG_INFOF("elastic.c", "Refresh index <%d>", r->status_code);
free_response(r);
if (script != NULL) {
execute_update_script(script, index_id);
free(script);
snprintf(url, sizeof(url), "%s/sist2/_refresh", IndexCtx.es_url);
r = web_post(url, "");
LOG_INFOF("elastic.c", "Refresh index <%d>", r->status_code);
free_response(r);
}
snprintf(url, sizeof(url), "%s/sist2/_forcemerge", IndexCtx.es_url);
r = web_post(url, "", NULL);
printf("Merge index <%d>\n", r->status_code);
r = web_post(url, "");
LOG_INFOF("elastic.c", "Merge index <%d>", r->status_code);
free_response(r);
if (Indexer != NULL) {
free(Indexer->es_url);
free(Indexer);
}
snprintf(url, sizeof(url), "%s/sist2/_settings", IndexCtx.es_url);
r = web_put(url, "{\"index\":{\"refresh_interval\":\"1s\"}}");
LOG_INFOF("elastic.c", "Set refresh interval <%d>", r->status_code);
free_response(r);
}
void elastic_init(int force_reset) {
@@ -169,40 +331,75 @@ void elastic_init(int force_reset) {
if (!index_exists || force_reset) {
r = web_delete(url);
printf("Delete index <%d>\n", r->status_code);
LOG_INFOF("elastic.c", "Delete index <%d>", r->status_code);
free_response(r);
snprintf(url, 4096, "%s/sist2", IndexCtx.es_url);
r = web_put(url, "", NULL);
printf("Create index <%d>\n", r->status_code);
r = web_put(url, "");
LOG_INFOF("elastic.c", "Create index <%d>", r->status_code);
free_response(r);
snprintf(url, 4096, "%s/sist2/_close", IndexCtx.es_url);
r = web_post(url, "", NULL);
printf("Close index <%d>\n", r->status_code);
r = web_post(url, "");
LOG_INFOF("elastic.c", "Close index <%d>", r->status_code);
free_response(r);
snprintf(url, 4096, "%s/_ingest/pipeline/tie", IndexCtx.es_url);
r = web_put(url, pipeline_json);
LOG_INFOF("elastic.c", "Create pipeline <%d>", r->status_code);
free_response(r);
snprintf(url, 4096, "%s/sist2/_settings", IndexCtx.es_url);
r = web_put(url, settings_json, "Content-Type: application/json");
printf("Update settings <%d>\n", r->status_code);
r = web_put(url, settings_json);
LOG_INFOF("elastic.c", "Update settings <%d>", r->status_code);
free_response(r);
snprintf(url, 4096, "%s/sist2/_mappings", IndexCtx.es_url);
r = web_put(url, mappings_json, "Content-Type: application/json");
printf("Update mappings <%d>\n", r->status_code);
snprintf(url, 4096, "%s/sist2/_mappings/_doc?include_type_name=true", IndexCtx.es_url);
r = web_put(url, mappings_json);
LOG_INFOF("elastic.c", "Update mappings <%d>", r->status_code);
free_response(r);
snprintf(url, 4096, "%s/sist2/_open", IndexCtx.es_url);
r = web_post(url, "", NULL);
printf("Open index <%d>\n", r->status_code);
r = web_post(url, "");
LOG_INFOF("elastic.c", "Open index <%d>", r->status_code);
free_response(r);
}
}
cJSON *elastic_get_document(const char *uuid_str) {
char url[4096];
snprintf(url, 4096, "%s/sist2/_source/%s", WebCtx.es_url, uuid_str);
snprintf(url, 4096, "%s/sist2/_doc/%s", WebCtx.es_url, uuid_str);
response_t *r = web_get(url);
return cJSON_Parse(r->body);
cJSON *json = NULL;
if (r->status_code == 200) {
json = cJSON_Parse(r->body);
}
free_response(r);
return json;
}
char *elastic_get_status() {
char url[4096];
snprintf(url, 4096,
"%s/_cluster/state/metadata/sist2?filter_path=metadata.indices.*.state", WebCtx.es_url);
response_t *r = web_get(url);
cJSON *json = NULL;
char *status = malloc(128 * sizeof(char));
status[0] = '\0';
if (r->status_code == 200) {
json = cJSON_Parse(r->body);
const cJSON *metadata = cJSON_GetObjectItem(json, "metadata");
if (metadata != NULL) {
const cJSON *indices = cJSON_GetObjectItem(metadata, "indices");
const cJSON *sist2 = cJSON_GetObjectItem(indices, "sist2");
const cJSON *state = cJSON_GetObjectItem(sist2, "state");
strcpy(status, state->valuestring);
}
}
free_response(r);
cJSON_Delete(json);
return status;
}

View File

@@ -16,18 +16,21 @@ typedef struct es_indexer es_indexer_t;
void elastic_index_line(es_bulk_line_t *line);
void elastic_flush();
void print_json(cJSON *document, const char uuid_str[UUID_STR_LEN]);
void index_json(cJSON *document, const char uuid_str[UUID_STR_LEN]);
es_indexer_t *create_indexer(const char* es_url);
void destroy_indexer();
void elastic_cleanup();
void finish_indexer(char *script, char *index_id);
void elastic_init(int force_reset);
cJSON *elastic_get_document(const char *uuid_str);
char *elastic_get_status();
void execute_update_script(const char *script, const char index_id[UUID_STR_LEN]);
#endif

File diff suppressed because one or more lines are too long

View File

@@ -1,4 +1,10 @@
#include "web.h"
#include "src/sist.h"
#include <mongoose.h>
#include <pthread.h>
#include <curl/curl.h>
size_t write_cb(char *ptr, size_t size, size_t nmemb, void *user_data) {
@@ -9,10 +15,103 @@ size_t write_cb(char *ptr, size_t size, size_t nmemb, void *user_data) {
}
void free_response(response_t *resp) {
free(resp->body);
if (resp->body != NULL) {
free(resp->body);
}
free(resp);
}
#define SIST2_HEADERS "User-Agent: sist2\r\nContent-Type: application/json\r\n"
void http_req_ev(struct mg_connection *nc, int ev, void *ptr) {
http_ev_data_t *ev_data = (http_ev_data_t *) nc->user_data;
switch (ev) {
case MG_EV_CONNECT: {
int connect_status = *(int *) ptr;
if (connect_status != 0) {
ev_data->done = TRUE;
ev_data->resp->status_code = 0;
}
break;
}
case MG_EV_HTTP_REPLY: {
struct http_message *hm = (struct http_message *) ptr;
//TODO: Check errors?
ev_data->resp->size = hm->body.len;
ev_data->resp->status_code = hm->resp_code;
ev_data->resp->body = malloc(hm->body.len + 1);
memcpy(ev_data->resp->body, hm->body.p, hm->body.len);
*(ev_data->resp->body + hm->body.len) = '\0';
ev_data->done = TRUE;
break;
}
case MG_EV_CLOSE: {
ev_data->done = TRUE;
break;
}
default:
break;
}
}
subreq_ctx_t *http_req(const char *url, const char *extra_headers, const char *post_data, const char *method) {
struct mg_str scheme;
struct mg_str user_info;
struct mg_str host;
unsigned int port;
struct mg_str path;
struct mg_str query;
struct mg_str fragment;
if (post_data == NULL) post_data = "";
if (extra_headers == NULL) extra_headers = "";
if (path.len == 0) path = mg_mk_str("/");
if (host.len == 0) host = mg_mk_str("");
// [scheme://[user_info@]]host[:port][/path][?query][#fragment]
mg_parse_uri(mg_mk_str(url), &scheme, &user_info, &host, &port, &path, &query, &fragment);
if (query.len > 0) path.len += query.len + 1;
subreq_ctx_t *ctx = malloc(sizeof(subreq_ctx_t));
mg_mgr_init(&ctx->mgr, NULL);
char address[8196];
snprintf(address, sizeof(address), "tcp://%.*s:%u", (int) host.len, host.p, port);
struct mg_connection *nc = mg_connect(&ctx->mgr, address, http_req_ev);
nc->user_data = &ctx->ev_data;
mg_set_protocol_http_websocket(nc);
ctx->ev_data.resp = calloc(1, sizeof(response_t));
ctx->ev_data.done = FALSE;
mg_printf(
nc, "%s %.*s HTTP/1.1\r\n"
"Host: %.*s\r\n"
"Content-Length: %zu\r\n"
"%s\r\n"
"%s",
method, (int) path.len, path.p,
(int) (path.p - host.p), host.p,
strlen(post_data),
extra_headers,
post_data
);
return ctx;
}
subreq_ctx_t *web_post_async(const char *url, const char *data) {
return http_req(url, SIST2_HEADERS, data, "POST");
}
response_t *web_get(const char *url) {
response_t *resp = malloc(sizeof(response_t));
@@ -25,17 +124,21 @@ response_t *web_get(const char *url) {
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb);
curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
struct curl_slist *headers = curl_slist_append(headers, "Content-Type: application/json");
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
curl_easy_perform(curl);
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &resp->status_code);
curl_easy_cleanup(curl);
curl_slist_free_all(headers);
resp->body = buffer.buf;
resp->size = buffer.cur;
return resp;
}
response_t *web_post(const char *url, const char *data, const char *header) {
response_t *web_post(const char *url, const char *data) {
response_t *resp = malloc(sizeof(response_t));
@@ -49,18 +152,16 @@ response_t *web_post(const char *url, const char *data, const char *header) {
curl_easy_setopt(curl, CURLOPT_POST, 1);
curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
if (header != NULL) {
struct curl_slist *headers = NULL;
headers = curl_slist_append(headers, header);
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
}
struct curl_slist *headers = curl_slist_append(headers, "Content-Type: application/json");
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
curl_easy_setopt(curl, CURLOPT_POSTFIELDS, data);
int r1 = curl_easy_perform(curl);
curl_easy_perform(curl);
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &resp->status_code);
curl_easy_cleanup(curl);
curl_slist_free_all(headers);
resp->body = buffer.buf;
resp->size = buffer.cur;
@@ -69,7 +170,7 @@ response_t *web_post(const char *url, const char *data, const char *header) {
}
response_t *web_put(const char *url, const char *data, const char *header) {
response_t *web_put(const char *url, const char *data) {
response_t *resp = malloc(sizeof(response_t));
@@ -85,11 +186,8 @@ response_t *web_put(const char *url, const char *data, const char *header) {
curl_easy_setopt(curl, CURLOPT_DNS_USE_GLOBAL_CACHE, 0);
curl_easy_setopt(curl, CURLOPT_IPRESOLVE, CURLOPT_DNS_LOCAL_IP4 );
if (header != NULL) {
struct curl_slist *headers = NULL;
headers = curl_slist_append(headers, header);
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
}
struct curl_slist *headers = curl_slist_append(headers, "Content-Type: application/json");
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
curl_easy_setopt(curl, CURLOPT_POSTFIELDS, data);
@@ -97,6 +195,7 @@ response_t *web_put(const char *url, const char *data, const char *header) {
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &resp->status_code);
curl_easy_cleanup(curl);
curl_slist_free_all(headers);
resp->body = buffer.buf;
resp->size = buffer.cur;
@@ -118,11 +217,14 @@ response_t *web_delete(const char *url) {
curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
curl_easy_setopt(curl, CURLOPT_POSTFIELDS, "");
struct curl_slist *headers = curl_slist_append(headers, "Content-Type: application/json");
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
curl_easy_perform(curl);
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &resp->status_code);
curl_easy_cleanup(curl);
curl_slist_free_all(headers);
resp->body = buffer.buf;
resp->size = buffer.cur;

View File

@@ -2,6 +2,7 @@
#define SIST2_WEB_H
#include "src/sist.h"
#include <mongoose.h>
typedef struct response {
char *body;
@@ -9,9 +10,20 @@ typedef struct response {
int status_code;
} response_t;
typedef struct {
response_t *resp;
int done;
} http_ev_data_t;
typedef struct {
http_ev_data_t ev_data;
struct mg_mgr mgr;
} subreq_ctx_t;
response_t *web_get(const char *url);
response_t *web_post(const char * url, const char * data, const char* header);
response_t *web_put(const char *url, const char *data, const char *header);
response_t *web_post(const char * url, const char * data);
subreq_ctx_t *web_post_async(const char *url, const char *data);
response_t *web_put(const char *url, const char *data);
response_t *web_delete(const char *url);
void free_response(response_t *resp);

View File

@@ -1,7 +1,9 @@
#include "src/ctx.h"
#include "serialize.h"
#include "src/parsing/parse.h"
#include "src/parsing/mime.h"
static __thread int IndexFd = -1;
static __thread int index_fd = -1;
typedef struct {
unsigned char uuid[16];
@@ -34,15 +36,19 @@ void write_index_descriptor(char *path, index_descriptor_t *desc) {
cJSON_AddStringToObject(json, "version", desc->version);
cJSON_AddStringToObject(json, "root", desc->root);
cJSON_AddStringToObject(json, "name", desc->name);
cJSON_AddStringToObject(json, "type", desc->type);
cJSON_AddStringToObject(json, "rewrite_url", desc->rewrite_url);
cJSON_AddNumberToObject(json, "timestamp", (double) desc->timestamp);
int fd = open(path, O_CREAT | O_WRONLY, S_IRUSR | S_IWUSR);
if (fd == -1) {
perror(path);
if (fd < 0) {
LOG_FATALF("serialize.c", "Could not open index descriptor: %s", strerror(errno));
}
char *str = cJSON_Print(json);
write(fd, str, strlen(str));
int ret = write(fd, str, strlen(str));
if (ret == -1) {
LOG_FATALF("serialize.c", "Could not write index descriptor: %s", strerror(errno));
}
free(str);
close(fd);
@@ -54,8 +60,16 @@ index_descriptor_t read_index_descriptor(char *path) {
struct stat info;
stat(path, &info);
int fd = open(path, O_RDONLY);
if (fd == -1) {
LOG_FATALF("serialize.c", "Invalid/corrupt index (Could not find descriptor): %s: %s\n", path, strerror(errno))
}
char *buf = malloc(info.st_size + 1);
read(fd, buf, info.st_size);
int ret = read(fd, buf, info.st_size);
if (ret == -1) {
LOG_FATALF("serialize.c", "Could not read index descriptor: %s", strerror(errno));
}
*(buf + info.st_size) = '\0';
close(fd);
@@ -66,9 +80,14 @@ index_descriptor_t read_index_descriptor(char *path) {
strcpy(descriptor.root, cJSON_GetObjectItem(json, "root")->valuestring);
strcpy(descriptor.name, cJSON_GetObjectItem(json, "name")->valuestring);
strcpy(descriptor.rewrite_url, cJSON_GetObjectItem(json, "rewrite_url")->valuestring);
descriptor.root_len = (short)strlen(descriptor.root);
descriptor.root_len = (short) strlen(descriptor.root);
strcpy(descriptor.version, cJSON_GetObjectItem(json, "version")->valuestring);
strcpy(descriptor.uuid, cJSON_GetObjectItem(json, "uuid")->valuestring);
if (cJSON_GetObjectItem(json, "type") == NULL) {
strcpy(descriptor.type, INDEX_TYPE_BIN);
} else {
strcpy(descriptor.type, cJSON_GetObjectItem(json, "type")->valuestring);
}
cJSON_Delete(json);
free(buf);
@@ -105,6 +124,32 @@ char *get_meta_key_text(enum metakey meta_key) {
return "title";
case MetaFontName:
return "font_name";
case MetaParent:
return "parent";
case MetaExifMake:
return "exif_make";
case MetaExifSoftware:
return "exif_software";
case MetaExifExposureTime:
return "exif_exposure_time";
case MetaExifFNumber:
return "exif_fnumber";
case MetaExifFocalLength:
return "exif_focal_length";
case MetaExifUserComment:
return "exif_user_comment";
case MetaExifIsoSpeedRatings:
return "exif_iso_speed_ratings";
case MetaExifModel:
return "exif_model";
case MetaExifDateTime:
return "exif_datetime";
case MetaAuthor:
return "author";
case MetaModifiedBy:
return "modified_by";
case MetaThumbnail:
return "thumbnail";
default:
return NULL;
}
@@ -113,13 +158,13 @@ char *get_meta_key_text(enum metakey meta_key) {
void write_document(document_t *doc) {
if (IndexFd == -1) {
if (index_fd == -1) {
char dstfile[PATH_MAX];
pid_t tid = syscall(SYS_gettid);
snprintf(dstfile, PATH_MAX, "%s_index_%d", ScanCtx.index.path, tid);
IndexFd = open(dstfile, O_CREAT | O_WRONLY | O_APPEND, S_IRUSR | S_IWUSR);
pthread_t self = pthread_self();
snprintf(dstfile, PATH_MAX, "%s_index_%lu", ScanCtx.index.path, self);
index_fd = open(dstfile, O_CREAT | O_WRONLY | O_APPEND, S_IRUSR | S_IWUSR);
if (IndexFd == -1) {
if (index_fd == -1) {
perror("open");
}
}
@@ -127,8 +172,8 @@ void write_document(document_t *doc) {
dyn_buffer_t buf = dyn_buffer_create();
// Ignore root directory in the file path
doc->ext = doc->ext - ScanCtx.index.desc.root_len;
doc->base = doc->base - ScanCtx.index.desc.root_len;
doc->ext = (short) (doc->ext - ScanCtx.index.desc.root_len);
doc->base = (short) (doc->base - ScanCtx.index.desc.root_len);
doc->filepath += ScanCtx.index.desc.root_len;
dyn_buffer_write(&buf, doc, sizeof(line_t));
@@ -139,11 +184,11 @@ void write_document(document_t *doc) {
dyn_buffer_write_char(&buf, meta->key);
if (IS_META_INT(meta->key)) {
dyn_buffer_write_int(&buf, meta->intval);
dyn_buffer_write_int(&buf, meta->int_val);
} else if (IS_META_LONG(meta->key)) {
dyn_buffer_write_long(&buf, meta->longval);
dyn_buffer_write_long(&buf, meta->long_val);
} else {
dyn_buffer_write_str(&buf, meta->strval);
dyn_buffer_write_str(&buf, meta->str_val);
}
meta_line_t *tmp = meta;
@@ -152,24 +197,29 @@ void write_document(document_t *doc) {
}
dyn_buffer_write_char(&buf, '\n');
write(IndexFd, buf.buf, buf.cur);
int res = write(index_fd, buf.buf, buf.cur);
if (res == -1) {
LOG_FATALF("serialize.c", "Could not write document: %s", strerror(errno))
}
ScanCtx.stat_index_size += buf.cur;
dyn_buffer_destroy(&buf);
}
void serializer_cleanup() {
close(IndexFd);
void thread_cleanup() {
close(index_fd);
cleanup_parse();
cleanup_font();
}
void read_index(const char *path, const char index_id[UUID_STR_LEN], index_func func) {
void read_index_bin(const char *path, const char *index_id, index_func func) {
line_t line;
dyn_buffer_t buf = dyn_buffer_create();
FILE *file = fopen(path, "rb");
while (1) {
buf.cur = 0;
fread((void *) &line, 1, sizeof(line_t), file);
size_t _ = fread((void *) &line, 1, sizeof(line_t), file);
if (feof(file)) {
break;
}
@@ -180,82 +230,204 @@ void read_index(const char *path, const char index_id[UUID_STR_LEN], index_func
char uuid_str[UUID_STR_LEN];
uuid_unparse(line.uuid, uuid_str);
cJSON_AddStringToObject(document, "mime", mime_get_mime_text(line.mime));
cJSON_AddNumberToObject(document, "size", (double)line.size);
const char *mime_text = mime_get_mime_text(line.mime);
if (mime_text == NULL) {
cJSON_AddNullToObject(document, "mime");
} else {
cJSON_AddStringToObject(document, "mime", mime_get_mime_text(line.mime));
}
cJSON_AddNumberToObject(document, "size", (double) line.size);
cJSON_AddNumberToObject(document, "mtime", line.mtime);
int c;
int c = 0;
while ((c = getc(file)) != 0) {
dyn_buffer_write_char(&buf, (char) c);
}
dyn_buffer_write_char(&buf, '\0');
if (IndexCtx.tags != NULL) {
const char *tags_string = g_hash_table_lookup(IndexCtx.tags, buf.buf);
if (tags_string != NULL) {
cJSON *tags_arr = cJSON_Parse(tags_string);
cJSON_AddItemToObject(document, "tag", tags_arr);
}
}
cJSON_AddStringToObject(document, "extension", buf.buf + line.ext);
if (*(buf.buf + line.ext - 1) == '.') {
*(buf.buf + line.ext - 1) = '\0';
} else {
*(buf.buf + line.ext) = '\0';
}
cJSON_AddStringToObject(document, "name", buf.buf + line.base);
*(buf.buf + line.base - 1) = '\0';
cJSON_AddStringToObject(document, "path", buf.buf);
char tmp[PATH_MAX * 3];
str_escape(tmp, buf.buf + line.base);
cJSON_AddStringToObject(document, "name", tmp);
if (line.base > 0) {
*(buf.buf + line.base - 1) = '\0';
str_escape(tmp, buf.buf);
cJSON_AddStringToObject(document, "path", tmp);
} else {
cJSON_AddStringToObject(document, "path", "");
}
enum metakey key = getc(file);
size_t ret = 0;
while (key != '\n') {
switch (key) {
case MetaWidth:
case MetaHeight:
case MetaMediaDuration:
case MetaMediaBitrate: {
case MetaHeight: {
int value;
fread(&value, sizeof(int), 1, file);
ret = fread(&value, sizeof(int), 1, file);
cJSON_AddNumberToObject(document, get_meta_key_text(key), value);
break;
}
case MetaMediaAudioCodec:
case MetaMediaVideoCodec: {
int value;
fread(&value, sizeof(int), 1, file);
const AVCodecDescriptor *desc = avcodec_descriptor_get(value);
if (desc != NULL) {
cJSON_AddStringToObject(document, get_meta_key_text(key), desc->name);
}
case MetaMediaDuration:
case MetaMediaBitrate: {
long value;
ret = fread(&value, sizeof(long), 1, file);
cJSON_AddNumberToObject(document, get_meta_key_text(key), (double) value);
break;
}
case MetaMediaAudioCodec:
case MetaMediaVideoCodec:
case MetaContent:
case MetaArtist:
case MetaAlbum:
case MetaAlbumArtist:
case MetaGenre:
case MetaFontName:
case MetaParent:
case MetaExifMake:
case MetaExifSoftware:
case MetaExifExposureTime:
case MetaExifFNumber:
case MetaExifFocalLength:
case MetaExifUserComment:
case MetaExifIsoSpeedRatings:
case MetaExifDateTime:
case MetaExifModel:
case MetaAuthor:
case MetaModifiedBy:
case MetaThumbnail:
case MetaTitle: {
buf.cur = 0;
while ((c = getc(file)) != 0) {
dyn_buffer_write_char(&buf, (char) c);
if (SHOULD_KEEP_CHAR(c) || c == ' ') {
dyn_buffer_write_char(&buf, (char) c);
}
}
dyn_buffer_write_char(&buf, '\0');
cJSON_AddStringToObject(document, get_meta_key_text(key), buf.buf);
break;
}
default:
LOG_FATALF("serialize.c", "Invalid meta key (corrupt index): %x", key)
}
key = getc(file);
}
func(document, uuid_str);
cJSON_free(document);
cJSON_Delete(document);
}
dyn_buffer_destroy(&buf);
fclose(file);
}
const char *json_type_copy_fields[] = {
"mime", "name", "path", "extension", "index", "size", "mtime", "parent",
// Meta
"title", "content", "width", "height", "duration", "audioc", "videoc",
"bitrate", "artist", "album", "album_artist", "genre", "title", "font_name",
// Special
"tag", "_url"
};
const char *json_type_array_fields[] = {
"_keyword", "_text"
};
void read_index_json(const char *path, UNUSED(const char *index_id), index_func func) {
FILE *file = fopen(path, "r");
while (1) {
char *line = NULL;
size_t len;
size_t read = getline(&line, &len, file);
if (read < 0) {
if (line) {
free(line);
}
break;
}
cJSON *input = cJSON_Parse(line);
if (input == NULL) {
LOG_FATALF("serialize.c", "Could not parse JSON line: \n%s", line)
}
if (line) {
free(line);
}
cJSON *document = cJSON_CreateObject();
const char *uuid_str = cJSON_GetObjectItem(input, "_id")->valuestring;
for (int i = 0; i < (sizeof(json_type_copy_fields) / sizeof(json_type_copy_fields[0])); i++) {
cJSON *value = cJSON_GetObjectItem(input, json_type_copy_fields[i]);
if (value != NULL) {
cJSON_AddItemReferenceToObject(document, json_type_copy_fields[i], value);
}
}
for (int i = 0; i < (sizeof(json_type_array_fields) / sizeof(json_type_array_fields[0])); i++) {
cJSON *arr = cJSON_GetObjectItem(input, json_type_array_fields[i]);
if (arr != NULL) {
cJSON *obj;
cJSON_ArrayForEach(obj, arr) {
char key[1024];
cJSON *k = cJSON_GetObjectItem(obj, "k");
cJSON *v = cJSON_GetObjectItem(obj, "v");
if (k == NULL || v == NULL || !cJSON_IsString(k) || !cJSON_IsString(v)) {
char *str = cJSON_Print(obj);
LOG_FATALF("serialize.c", "Invalid %s member: must contain .k and .v string fields: \n%s",
json_type_array_fields[i], str)
}
snprintf(key, sizeof(key), "%s.%s", json_type_array_fields[i], k->valuestring);
cJSON_AddStringToObject(document, key, v->valuestring);
}
}
}
func(document, uuid_str);
cJSON_Delete(document);
cJSON_Delete(input);
}
fclose(file);
}
void read_index(const char *path, const char index_id[UUID_STR_LEN], const char *type, index_func func) {
if (strcmp(type, INDEX_TYPE_BIN) == 0) {
read_index_bin(path, index_id, func);
} else if (strcmp(type, INDEX_TYPE_JSON) == 0) {
read_index_json(path, index_id, func);
}
}
void incremental_read(GHashTable *table, const char *filepath) {
FILE *file = fopen(filepath, "rb");
line_t line;
while (1) {
fread((void *) &line, 1, sizeof(line_t), file);
if (feof(file)) {
size_t ret = fread((void *) &line, 1, sizeof(line_t), file);
if (ret != 1 || feof(file)) {
break;
}
@@ -278,8 +450,8 @@ void incremental_copy(store_t *store, store_t *dst_store, const char *filepath,
line_t line;
while (1) {
fread((void *) &line, 1, sizeof(line_t), file);
if (feof(file)) {
size_t ret = fread((void *) &line, 1, sizeof(line_t), file);
if (ret != 1 || feof(file)) {
break;
}
@@ -289,6 +461,7 @@ void incremental_copy(store_t *store, store_t *dst_store, const char *filepath,
size_t buf_len;
char *buf = store_read(store, (char *) line.uuid, 16, &buf_len);
store_write(dst_store, (char *) line.uuid, 16, buf, buf_len);
free(buf);
char c;
while ((c = (char) getc(file))) {
@@ -306,11 +479,11 @@ void incremental_copy(store_t *store, store_t *dst_store, const char *filepath,
if (IS_META_INT(key)) {
int val;
fread(&val, sizeof(val), 1, file);
ret = fread(&val, sizeof(val), 1, file);
fwrite(&val, sizeof(val), 1, dst_file);
} else if (IS_META_LONG(key)) {
long val;
fread(&val, sizeof(val), 1, file);
ret = fread(&val, sizeof(val), 1, file);
fwrite(&val, sizeof(val), 1, dst_file);
} else {
while ((c = (char) getc(file))) {
@@ -318,6 +491,10 @@ void incremental_copy(store_t *store, store_t *dst_store, const char *filepath,
}
fwrite("\0", sizeof(c), 1, dst_file);
}
if (ret != 1) {
break;
}
}
} else {
skip_meta(file);

View File

@@ -2,7 +2,10 @@
#define SIST2_SERIALIZE_H
#include "src/sist.h"
#include "store.h"
#include <sys/syscall.h>
#include <glib.h>
typedef void(*index_func)(cJSON *, const char[UUID_STR_LEN]);
@@ -11,14 +14,14 @@ void incremental_copy(store_t *store, store_t *dst_store, const char *filepath,
void write_document(document_t *doc);
void read_index(const char *path, const char[UUID_STR_LEN], index_func);
void read_index(const char *path, const char[UUID_STR_LEN], const char *type, index_func);
void incremental_read(GHashTable *table, const char *filepath);
/**
* Must be called after write_document
*/
void serializer_cleanup();
void thread_cleanup();
void write_index_descriptor(char *path, index_descriptor_t *desc);

View File

@@ -1,25 +1,25 @@
#include "store.h"
#include "src/ctx.h"
store_t *store_create(char *path) {
store_t *store_create(char *path, size_t chunk_size) {
store_t *store = malloc(sizeof(struct store_t));
store->chunk_size = chunk_size;
pthread_rwlock_init(&store->lock, NULL);
mdb_env_create(&store->env);
int open_ret = mdb_env_open(store->env,
path,
MDB_WRITEMAP | MDB_MAPASYNC,
S_IRUSR | S_IWUSR
path,
MDB_WRITEMAP | MDB_MAPASYNC,
S_IRUSR | S_IWUSR
);
if (open_ret != 0) {
fprintf(stderr, "Error while opening store: %s", mdb_strerror(open_ret));
exit(1);
LOG_FATALF("store.c", "Error while opening store: %s (%s)\n", mdb_strerror(open_ret), path)
}
store->size = (size_t) 1024 * 1024 * 5;
store->size = (size_t) store->chunk_size;
ScanCtx.stat_tn_size = 0;
mdb_env_set_mapsize(store->env, store->size);
@@ -42,6 +42,12 @@ void store_destroy(store_t *store) {
void store_write(store_t *store, char *key, size_t key_len, char *buf, size_t buf_len) {
if (LogCtx.very_verbose) {
char uuid_str[UUID_STR_LEN];
uuid_unparse((unsigned char *) key, uuid_str);
LOG_DEBUGF("store.c", "Store write {%s} %lu bytes", uuid_str, buf_len)
}
MDB_val mdb_key;
mdb_key.mv_data = key;
mdb_key.mv_size = key_len;
@@ -64,17 +70,19 @@ void store_write(store_t *store, char *key, size_t key_len, char *buf, size_t bu
// Cannot resize when there is a opened transaction.
// Resize take effect on the next commit.
pthread_rwlock_wrlock(&store->lock);
store->size += 1024 * 1024 * 5;
store->size += store->chunk_size;
mdb_env_set_mapsize(store->env, store->size);
mdb_txn_begin(store->env, NULL, 0, &txn);
put_ret = mdb_put(txn, store->dbi, &mdb_key, &mdb_value, 0);
LOG_INFOF("store.c", "Updated mdb mapsize to %lu bytes", store->size)
}
mdb_txn_commit(txn);
pthread_rwlock_unlock(&store->lock);
if (put_ret != 0) {
printf("%s\n", mdb_strerror(put_ret));
LOG_ERROR("store.c", mdb_strerror(put_ret))
}
}
@@ -103,3 +111,40 @@ char *store_read(store_t *store, char *key, size_t key_len, size_t *ret_vallen)
return buf;
}
GHashTable *store_read_all(store_t *store) {
int count = 0;
GHashTable *table = g_hash_table_new_full(g_str_hash, g_str_equal, free, free);
MDB_txn *txn = NULL;
mdb_txn_begin(store->env, NULL, MDB_RDONLY, &txn);
MDB_cursor *cur = NULL;
mdb_cursor_open(txn, store->dbi, &cur);
MDB_val key;
MDB_val value;
while (mdb_cursor_get(cur, &key, &value, MDB_NEXT) == 0) {
char *key_str = malloc(key.mv_size);
memcpy(key_str, key.mv_data, key.mv_size);
char *val_str = malloc(value.mv_size);
memcpy(val_str, value.mv_data, value.mv_size);
g_hash_table_insert(table, key_str, val_str);
count += 1;
}
LOG_DEBUGF("store.c", "Read tags for %d documents", count);
mdb_cursor_close(cur);
mdb_txn_abort(txn);
return table;
}
void store_copy(store_t *store, const char *destination) {
mkdir(destination, S_IWUSR | S_IRUSR | S_IXUSR);
mdb_env_copy(store->env, destination);
}

View File

@@ -4,16 +4,20 @@
#include <pthread.h>
#include <lmdb.h>
#include <glib.h>
#define STORE_SIZE_TN 1024 * 1024 * 5
#define STORE_SIZE_TAG 1024 * 16
typedef struct store_t {
MDB_dbi dbi;
MDB_env *env;
size_t size;
size_t chunk_size;
pthread_rwlock_t lock;
} store_t;
#include "src/sist.h"
store_t *store_create(char *path);
store_t *store_create(char *path, size_t chunk_size);
void store_destroy(store_t *store);
@@ -21,4 +25,8 @@ void store_write(store_t *store, char *key, size_t key_len, char *buf, size_t bu
char *store_read(store_t *store, char *key, size_t key_len, size_t *ret_vallen);
GHashTable *store_read_all(store_t *store);
void store_copy(store_t *store, const char *destination);
#endif

View File

@@ -1,28 +1,50 @@
#include "walk.h"
#include "src/ctx.h"
#include "src/parsing/parse.h"
parse_job_t *create_parse_job(const char *filepath, const struct stat *info, int base) {
#include <ftw.h>
__always_inline
parse_job_t *create_fs_parse_job(const char *filepath, const struct stat *info, int base) {
int len = (int) strlen(filepath);
parse_job_t *job = malloc(sizeof(parse_job_t) + len);
memcpy(&(job->filepath), filepath, len + 1);
strcpy(job->filepath, filepath);
job->base = base;
char *p = strrchr(filepath + base, '.');
if (p != NULL) {
job->ext = (int)(p - filepath + 1);
job->ext = (int) (p - filepath + 1);
} else {
job->ext = len;
}
memcpy(&(job->info), info, sizeof(struct stat));
job->vfile.info = *info;
memset(job->parent, 0, 16);
job->vfile.filepath = job->filepath;
job->vfile.read = fs_read;
job->vfile.reset = fs_reset;
job->vfile.close = fs_close;
job->vfile.fd = -1;
job->vfile.is_fs_file = TRUE;
return job;
}
int sub_strings[30];
#define EXCLUDED(str) (pcre_exec(ScanCtx.exclude, ScanCtx.exclude_extra, filepath, strlen(filepath), 0, 0, sub_strings, sizeof(sub_strings)) >= 0)
int handle_entry(const char *filepath, const struct stat *info, int typeflag, struct FTW *ftw) {
if (typeflag == FTW_F && S_ISREG(info->st_mode)) {
parse_job_t *job = create_parse_job(filepath, info, ftw->base);
if (typeflag == FTW_F && S_ISREG(info->st_mode) && ftw->level <= ScanCtx.depth) {
if (ScanCtx.exclude != NULL && EXCLUDED(filepath)) {
LOG_DEBUGF("walk.c", "Excluded: %s", filepath)
return 0;
}
parse_job_t *job = create_fs_parse_job(filepath, info, ftw->base);
tpool_add_work(ScanCtx.pool, parse, job);
}

View File

@@ -3,8 +3,6 @@
#define _XOPEN_SOURCE 500
#include "src/sist.h"
int walk_directory_tree(const char *);
#endif

112
src/log.c Normal file
View File

@@ -0,0 +1,112 @@
#include "log.h"
#include <pthread.h>
#include <stdarg.h>
const char *log_colors[] = {
"\033[34m", "\033[01;34m", "\033[0m",
"\033[01;33m", "\033[31m", "\033[01;31m"
};
const char *log_levels[] = {
"DEBUG", "INFO", "WARNING", "ERROR", "FATAL"
};
void vsist_logf(const char *filepath, int level, char *format, va_list ap) {
static int is_tty = -1;
if (is_tty == -1) {
is_tty = isatty(STDERR_FILENO);
}
char log_str[LOG_MAX_LENGTH];
unsigned long long pid = (unsigned long long) pthread_self();
char datetime[32];
time_t t;
struct tm result;
t = time(NULL);
localtime_r(&t, &result);
strftime(datetime, sizeof(datetime), "%Y-%m-%d %H:%M:%S", &result);
int log_len;
if (is_tty) {
log_len = snprintf(
log_str, sizeof(log_str),
"\033[%dm[%04llX]%s [%s] [%s %s] ",
31 + ((unsigned int) (pid)) % 7, pid, log_colors[level],
datetime, log_levels[level], filepath
);
} else {
log_len = snprintf(
log_str, sizeof(log_str),
"[%04llX] [%s] [%s %s] ",
pid, datetime, log_levels[level], filepath
);
}
size_t maxsize = sizeof(log_str) - log_len;
log_len += vsnprintf(log_str + log_len, maxsize, format, ap);
if (is_tty) {
log_len += sprintf(log_str + log_len, "\033[0m\n");
} else {
*(log_str + log_len) = '\n';
log_len += 1;
}
int ret = write(STDERR_FILENO, log_str, log_len);
if (ret == -1) {
LOG_FATALF("serialize.c", "Could not write index descriptor: %s", strerror(errno))
}
}
void sist_logf(const char *filepath, int level, char *format, ...) {
va_list ap;
va_start(ap, format);
vsist_logf(filepath, level, format, ap);
va_end(ap);
}
void sist_log(const char *filepath, int level, char *str) {
static int is_tty = -1;
if (is_tty == -1) {
is_tty = isatty(STDERR_FILENO);
}
char log_str[LOG_MAX_LENGTH];
unsigned long long pid = (unsigned long long) pthread_self();
char datetime[32];
time_t t;
struct tm result;
t = time(NULL);
localtime_r(&t, &result);
strftime(datetime, sizeof(datetime), "%Y-%m-%d %H:%M:%S", &result);
int log_len;
if (is_tty) {
log_len = snprintf(
log_str, sizeof(log_str),
"\033[%dm[%04llX]%s [%s] [%s %s] %s \033[0m\n",
31 + ((unsigned int) (pid)) % 7, pid, log_colors[level],
datetime, log_levels[level], filepath,
str
);
} else {
log_len = snprintf(
log_str, sizeof(log_str),
"[%04llX] [%s] [%s %s] %s \n",
pid, datetime, log_levels[level], filepath,
str
);
}
int ret = write(STDERR_FILENO, log_str, log_len);
if (ret == -1) {
LOG_FATALF("serialize.c", "Could not write index descriptor: %s", strerror(errno));
}
}

47
src/log.h Normal file
View File

@@ -0,0 +1,47 @@
#ifndef SIST2_LOG_H
#define SIST2_LOG_H
#define LOG_MAX_LENGTH 8192
#define SIST_DEBUG 0
#define SIST_INFO 1
#define SIST_WARNING 2
#define SIST_ERROR 3
#define SIST_FATAL 4
#define LOG_DEBUGF(filepath, fmt, ...) \
if (LogCtx.very_verbose) {sist_logf(filepath, SIST_DEBUG, fmt, __VA_ARGS__);}
#define LOG_DEBUG(filepath, str) \
if (LogCtx.very_verbose) {sist_log(filepath, SIST_DEBUG, str);}
#define LOG_INFOF(filepath, fmt, ...) \
if (LogCtx.verbose) {sist_logf(filepath, SIST_INFO, fmt, __VA_ARGS__);}
#define LOG_INFO(filepath, str) \
if (LogCtx.verbose) {sist_log(filepath, SIST_INFO, str);}
#define LOG_WARNINGF(filepath, fmt, ...) \
if (LogCtx.verbose) {sist_logf(filepath, SIST_WARNING, fmt, __VA_ARGS__);}
#define LOG_WARNING(filepath, str) \
if (LogCtx.verbose) {sist_log(filepath, SIST_WARNING, str);}
#define LOG_ERRORF(filepath, fmt, ...) \
if (LogCtx.verbose) {sist_logf(filepath, SIST_ERROR, fmt, __VA_ARGS__);}
#define LOG_ERROR(filepath, str) \
if (LogCtx.verbose) {sist_log(filepath, SIST_ERROR, str);}
#define LOG_FATALF(filepath, fmt, ...) \
sist_logf(filepath, SIST_FATAL, fmt, __VA_ARGS__);\
exit(-1);
#define LOG_FATAL(filepath, str) \
sist_log(filepath, SIST_FATAL, str);\
exit(-1);
#include "sist.h"
void sist_logf(const char *filepath, int level, char *format, ...);
void vsist_logf(const char *filepath, int level, char *format, va_list ap);
void sist_log(const char *filepath, int level, char *str);
#endif

View File

@@ -1,66 +1,186 @@
#include "sist.h"
#include "ctx.h"
#include <third-party/argparse/argparse.h>
#include <locale.h>
#include "cli.h"
#include "io/serialize.h"
#include "io/store.h"
#include "tpool.h"
#include "io/walk.h"
#include "index/elastic.h"
#include "web/serve.h"
#include "parsing/mime.h"
#include "parsing/parse.h"
#include "stats.h"
#define DESCRIPTION "Lightning-fast file system indexer and search tool."
#define EPILOG "Made by simon987 <me@simon987.net>. Released under GPL-3.0"
static const char *const Version = "1.0.0";
static const char *const Version = "2.7.3";
static const char *const usage[] = {
"sist2 scan [OPTION]... PATH",
"sist2 index [OPTION]... INDEX",
"sist2 web [OPTION]... INDEX...",
"sist2 exec-script [OPTION]... INDEX",
NULL,
};
void global_init() {
curl_global_init(CURL_GLOBAL_NOTHING);
av_log_set_level(AV_LOG_QUIET);
}
void init_dir(const char *dirpath) {
char path[PATH_MAX];
snprintf(path, PATH_MAX, "%sdescriptor.json", dirpath);
uuid_t uuid;
uuid_generate_time_safe(uuid);
uuid_generate(uuid);
uuid_unparse(uuid, ScanCtx.index.desc.uuid);
time(&ScanCtx.index.desc.timestamp);
strcpy(ScanCtx.index.desc.version, Version);
strcpy(ScanCtx.index.desc.type, INDEX_TYPE_BIN);
write_index_descriptor(path, &ScanCtx.index.desc);
}
void scan_print_header() {
printf("sist2 V%s\n", Version);
printf("---------------------\n");
printf("threads\t\t%d\n", ScanCtx.threads);
printf("tn_qscale\t%.1f/31.0\n", ScanCtx.tn_qscale);
printf("tn_size\t\t%dpx\n", ScanCtx.tn_size);
printf("output\t\t%s\n", ScanCtx.index.path);
LOG_INFOF("main.c", "sist2 v%s", Version)
}
void sist2_scan(scan_args_t *args) {
void _store(char *key, size_t key_len, char *buf, size_t buf_len) {
store_write(ScanCtx.index.store, key, key_len, buf, buf_len);
}
void _log(const char *filepath, int level, char *str) {
if (level == LEVEL_FATAL) {
sist_log(filepath, level, str);
exit(-1);
}
if (LogCtx.verbose) {
if (level == LEVEL_DEBUG) {
if (LogCtx.very_verbose) {
sist_log(filepath, level, str);
}
} else {
sist_log(filepath, level, str);
}
}
}
void _logf(const char *filepath, int level, char *format, ...) {
va_list args;
va_start(args, format);
if (level == LEVEL_FATAL) {
vsist_logf(filepath, level, format, args);
exit(-1);
}
if (LogCtx.verbose) {
if (level == LEVEL_DEBUG) {
if (LogCtx.very_verbose) {
vsist_logf(filepath, level, format, args);
}
} else {
vsist_logf(filepath, level, format, args);
}
}
va_end(args);
}
void initialize_scan_context(scan_args_t *args) {
// Arc
ScanCtx.arc_ctx.mode = args->archive_mode;
ScanCtx.arc_ctx.log = _log;
ScanCtx.arc_ctx.logf = _logf;
ScanCtx.arc_ctx.parse = (parse_callback_t) parse;
// Comic
ScanCtx.comic_ctx.log = _log;
ScanCtx.comic_ctx.logf = _logf;
ScanCtx.comic_ctx.store = _store;
ScanCtx.comic_ctx.tn_size = args->size;
ScanCtx.comic_ctx.tn_qscale = args->quality;
ScanCtx.comic_ctx.cbr_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/x-cbr");
ScanCtx.comic_ctx.cbz_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/x-cbz");
// Ebook
pthread_mutex_init(&ScanCtx.ebook_ctx.mupdf_mutex, NULL);
ScanCtx.ebook_ctx.content_size = args->content_size;
ScanCtx.ebook_ctx.tn_size = args->size;
ScanCtx.ebook_ctx.tesseract_lang = args->tesseract_lang;
ScanCtx.ebook_ctx.tesseract_path = args->tesseract_path;
ScanCtx.ebook_ctx.log = _log;
ScanCtx.ebook_ctx.logf = _logf;
ScanCtx.ebook_ctx.store = _store;
// Font
ScanCtx.font_ctx.enable_tn = args->size > 0;
ScanCtx.font_ctx.log = _log;
ScanCtx.font_ctx.logf = _logf;
ScanCtx.font_ctx.store = _store;
// Media
ScanCtx.media_ctx.tn_qscale = args->quality;
ScanCtx.media_ctx.tn_size = args->size;
ScanCtx.media_ctx.log = _log;
ScanCtx.media_ctx.logf = _logf;
ScanCtx.media_ctx.store = _store;
ScanCtx.media_ctx.max_media_buffer = (long) args->max_memory_buffer * 1024 * 1024;
init_media();
// OOXML
ScanCtx.ooxml_ctx.content_size = args->content_size;
ScanCtx.ooxml_ctx.log = _log;
ScanCtx.ooxml_ctx.logf = _logf;
ScanCtx.ooxml_ctx.store = _store;
// MOBI
ScanCtx.mobi_ctx.content_size = args->content_size;
ScanCtx.mobi_ctx.log = _log;
ScanCtx.mobi_ctx.logf = _logf;
// TEXT
ScanCtx.text_ctx.content_size = args->content_size;
ScanCtx.text_ctx.log = _log;
ScanCtx.text_ctx.logf = _logf;
ScanCtx.tn_qscale = args->quality;
ScanCtx.tn_size = args->size;
ScanCtx.content_size = args->content_size;
ScanCtx.pool = tpool_create(args->threads, serializer_cleanup);
ScanCtx.threads = args->threads;
ScanCtx.depth = args->depth;
strncpy(ScanCtx.index.path, args->output, sizeof(ScanCtx.index.path));
strncpy(ScanCtx.index.desc.name, args->name, sizeof(ScanCtx.index.desc.name));
strcpy(ScanCtx.index.desc.root, args->path);
strncpy(ScanCtx.index.desc.root, args->path, sizeof(ScanCtx.index.desc.root));
strncpy(ScanCtx.index.desc.rewrite_url, args->rewrite_url, sizeof(ScanCtx.index.desc.rewrite_url));
ScanCtx.index.desc.root_len = (short) strlen(ScanCtx.index.desc.root);
ScanCtx.fast = args->fast;
init_dir(ScanCtx.index.path);
// Raw
ScanCtx.raw_ctx.tn_qscale = args->quality;
ScanCtx.raw_ctx.tn_size = args->size;
ScanCtx.raw_ctx.log = _log;
ScanCtx.raw_ctx.logf = _logf;
ScanCtx.raw_ctx.store = _store;
}
void sist2_scan(scan_args_t *args) {
ScanCtx.mime_table = mime_get_mime_table();
ScanCtx.ext_table = mime_get_ext_table();
initialize_scan_context(args);
init_dir(ScanCtx.index.path);
char store_path[PATH_MAX];
snprintf(store_path, PATH_MAX, "%sthumbs", ScanCtx.index.path);
mkdir(store_path, S_IWUSR | S_IRUSR | S_IXUSR);
ScanCtx.index.store = store_create(store_path);
ScanCtx.index.store = store_create(store_path, STORE_SIZE_TN);
scan_print_header();
@@ -70,9 +190,18 @@ void sist2_scan(scan_args_t *args) {
DIR *dir = opendir(args->incremental);
if (dir == NULL) {
perror("opendir");
return;
LOG_FATALF("main.c", "Could not open original index for incremental scan: %s", strerror(errno))
}
char descriptor_path[PATH_MAX];
snprintf(descriptor_path, PATH_MAX, "%s/descriptor.json", args->incremental);
index_descriptor_t original_desc = read_index_descriptor(descriptor_path);
if (strcmp(original_desc.version, Version) != 0) {
LOG_FATALF("main.c", "Version mismatch! Index is %s but executable is %s/%s", original_desc.version,
Version, INDEX_VERSION_EXTERNAL)
}
struct dirent *de;
while ((de = readdir(dir)) != NULL) {
if (strncmp(de->d_name, "_index_", sizeof("_index_") - 1) == 0) {
@@ -83,18 +212,22 @@ void sist2_scan(scan_args_t *args) {
}
closedir(dir);
printf("Loaded %d items in to mtime table.", g_hash_table_size(ScanCtx.original_table));
LOG_INFOF("main.c", "Loaded %d items in to mtime table.", g_hash_table_size(ScanCtx.original_table))
}
ScanCtx.pool = tpool_create(args->threads, thread_cleanup, TRUE);
tpool_start(ScanCtx.pool);
walk_directory_tree(ScanCtx.index.desc.root);
tpool_wait(ScanCtx.pool);
tpool_destroy(ScanCtx.pool);
generate_stats(&ScanCtx.index, args->treemap_threshold, ScanCtx.index.path);
if (args->incremental != NULL) {
char dst_path[PATH_MAX];
snprintf(store_path, PATH_MAX, "%sthumbs", args->incremental);
snprintf(dst_path, PATH_MAX, "%s_index_original", ScanCtx.index.path);
store_t *source = store_create(store_path);
store_t *source = store_create(store_path, STORE_SIZE_TN);
DIR *dir = opendir(args->incremental);
if (dir == NULL) {
@@ -111,6 +244,13 @@ void sist2_scan(scan_args_t *args) {
}
closedir(dir);
store_destroy(source);
snprintf(store_path, PATH_MAX, "%stags", args->incremental);
snprintf(dst_path, PATH_MAX, "%stags", ScanCtx.index.path);
mkdir(store_path, S_IWUSR | S_IRUSR | S_IXUSR);
store_t *source_tags = store_create(store_path, STORE_SIZE_TAG);
store_copy(source_tags, dst_path);
store_destroy(source_tags);
}
store_destroy(ScanCtx.index.store);
@@ -119,6 +259,7 @@ void sist2_scan(scan_args_t *args) {
void sist2_index(index_args_t *args) {
IndexCtx.es_url = args->es_url;
IndexCtx.batch_size = args->batch_size;
if (!args->print) {
elastic_init(args->force_reset);
@@ -128,17 +269,25 @@ void sist2_index(index_args_t *args) {
snprintf(descriptor_path, PATH_MAX, "%s/descriptor.json", args->index_path);
index_descriptor_t desc = read_index_descriptor(descriptor_path);
if (strcmp(desc.version, Version) != 0) {
fprintf(stderr, "Version mismatch! Index is v%s but executable is v%s\n", desc.version, Version);
return;
LOG_DEBUGF("main.c", "descriptor version %s (%s)", desc.version, desc.type)
if (strcmp(desc.version, Version) != 0 && strcmp(desc.version, INDEX_VERSION_EXTERNAL) != 0) {
LOG_FATALF("main.c", "Version mismatch! Index is %s but executable is %s/%s", desc.version, Version,
INDEX_VERSION_EXTERNAL)
}
DIR *dir = opendir(args->index_path);
if (dir == NULL) {
perror("opendir");
return;
LOG_FATALF("main.c", "Could not open index %s: %s", args->index_path, strerror(errno))
}
char path_tmp[PATH_MAX];
snprintf(path_tmp, sizeof(path_tmp), "%s/tags", args->index_path);
mkdir(path_tmp, S_IWUSR | S_IRUSR | S_IXUSR);
IndexCtx.tag_store = store_create(path_tmp, STORE_SIZE_TAG);
IndexCtx.tags = store_read_all(IndexCtx.tag_store);
index_func f;
if (args->print) {
f = print_json;
@@ -146,25 +295,62 @@ void sist2_index(index_args_t *args) {
f = index_json;
}
void (*cleanup)();
if (args->print) {
cleanup = NULL;
} else {
cleanup = elastic_cleanup;
}
IndexCtx.pool = tpool_create(args->threads, cleanup, FALSE);
tpool_start(IndexCtx.pool);
struct dirent *de;
while ((de = readdir(dir)) != NULL) {
if (strncmp(de->d_name, "_index_", sizeof("_index_") - 1) == 0) {
char file_path[PATH_MAX];
snprintf(file_path, PATH_MAX, "%s/%s", args->index_path, de->d_name);
read_index(file_path, desc.uuid, f);
read_index(file_path, desc.uuid, desc.type, f);
}
}
closedir(dir);
tpool_wait(IndexCtx.pool);
if (!args->print) {
elastic_flush();
destroy_indexer();
finish_indexer(args->script, desc.uuid);
}
tpool_destroy(IndexCtx.pool);
store_destroy(IndexCtx.tag_store);
g_hash_table_remove_all(IndexCtx.tags);
g_hash_table_destroy(IndexCtx.tags);
}
void sist2_exec_script(exec_args_t *args) {
LogCtx.verbose = TRUE;
char descriptor_path[PATH_MAX];
snprintf(descriptor_path, PATH_MAX, "%s/descriptor.json", args->index_path);
index_descriptor_t desc = read_index_descriptor(descriptor_path);
IndexCtx.es_url = args->es_url;
LOG_DEBUGF("main.c", "descriptor version %s (%s)", desc.version, desc.type)
execute_update_script(args->script, desc.uuid);
free(args->script);
}
void sist2_web(web_args_t *args) {
WebCtx.es_url = args->es_url;
WebCtx.index_count = args->index_count;
WebCtx.auth_user = args->auth_user;
WebCtx.auth_pass = args->auth_pass;
WebCtx.auth_enabled = args->auth_enabled;
WebCtx.tag_auth_enabled = args->tag_auth_enabled;
for (int i = 0; i < args->index_count; i++) {
char *abs_path = abspath(args->indices[i]);
@@ -174,7 +360,11 @@ void sist2_web(web_args_t *args) {
char path_tmp[PATH_MAX];
snprintf(path_tmp, PATH_MAX, "%sthumbs", abs_path);
WebCtx.indices[i].store = store_create(path_tmp);
WebCtx.indices[i].store = store_create(path_tmp, STORE_SIZE_TN);
snprintf(path_tmp, PATH_MAX, "%stags", abs_path);
mkdir(path_tmp, S_IWUSR | S_IRUSR | S_IXUSR);
WebCtx.indices[i].tag_store = store_create(path_tmp, STORE_SIZE_TAG);
snprintf(path_tmp, PATH_MAX, "%sdescriptor.json", abs_path);
WebCtx.indices[i].desc = read_index_descriptor(path_tmp);
@@ -184,46 +374,76 @@ void sist2_web(web_args_t *args) {
free(abs_path);
}
serve(args->bind, args->port);
serve(args->listen_address);
}
int main(int argc, const char *argv[]) {
global_init();
setlocale(LC_ALL, "");
scan_args_t *scan_args = scan_args_create();
index_args_t *index_args = index_args_create();
web_args_t *web_args = web_args_create();
exec_args_t *exec_args = exec_args_create();
char * common_es_url;
int arg_version = 0;
char *common_es_url = NULL;
char *common_script_path = NULL;
int common_threads = 0;
struct argparse_option options[] = {
OPT_HELP(),
OPT_BOOLEAN('v', "version", &arg_version, "Show version and exit"),
OPT_BOOLEAN(0, "verbose", &LogCtx.verbose, "Turn on logging"),
OPT_BOOLEAN(0, "very-verbose", &LogCtx.very_verbose, "Turn on debug messages"),
OPT_GROUP("Scan options"),
OPT_INTEGER('t', "threads", &scan_args->threads, "Number of threads. DEFAULT=1"),
OPT_INTEGER('t', "threads", &common_threads, "Number of threads. DEFAULT=1"),
OPT_FLOAT('q', "quality", &scan_args->quality,
"Thumbnail quality, on a scale of 1.0 to 31.0, 1.0 being the best. DEFAULT=15"),
OPT_INTEGER(0, "size", &scan_args->size, "Thumbnail size, in pixels. DEFAULT=200"),
"Thumbnail quality, on a scale of 1.0 to 31.0, 1.0 being the best. DEFAULT=5"),
OPT_INTEGER(0, "size", &scan_args->size,
"Thumbnail size, in pixels. Use negative value to disable. DEFAULT=500"),
OPT_INTEGER(0, "content-size", &scan_args->content_size,
"Number of bytes to be extracted from text documents. DEFAULT=4096"),
"Number of bytes to be extracted from text documents. Use negative value to disable. DEFAULT=32768"),
OPT_STRING(0, "incremental", &scan_args->incremental,
"Reuse an existing index and only scan modified files."),
OPT_STRING('o', "output", &scan_args->output, "Output directory. DEFAULT=index.sist2/"),
OPT_STRING(0, "rewrite-url", &scan_args->rewrite_url, "Serve files from this url instead of from disk."),
OPT_STRING(0, "name", &scan_args->name, "Index display name. DEFAULT: (name of the directory)"),
OPT_INTEGER(0, "depth", &scan_args->depth, "Scan up to DEPTH subdirectories deep. "
"Use 0 to only scan files in PATH. DEFAULT: -1"),
OPT_STRING(0, "archive", &scan_args->archive, "Archive file mode (skip|list|shallow|recurse). "
"skip: Don't parse, list: only get file names as text, "
"shallow: Don't parse archives inside archives. DEFAULT: recurse"),
OPT_STRING(0, "ocr", &scan_args->tesseract_lang, "Tesseract language (use tesseract --list-langs to see "
"which are installed on your machine)"),
OPT_STRING('e', "exclude", &scan_args->exclude_regex, "Files that match this regex will not be scanned"),
OPT_BOOLEAN(0, "fast", &scan_args->fast, "Only index file names & mime type"),
OPT_STRING(0, "treemap-threshold", &scan_args->treemap_threshold_str, "Relative size threshold for treemap "
"(see USAGE.md). DEFAULT: 0.0005"),
OPT_INTEGER(0, "mem-buffer", &scan_args->max_memory_buffer,
"Maximum memory buffer size per thread in MB for files inside archives "
"(see USAGE.md). DEFAULT: 2000"),
OPT_GROUP("Index options"),
OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url. DEFAULT=http://localhost:9200"),
OPT_INTEGER('t', "threads", &common_threads, "Number of threads. DEFAULT=1"),
OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url with port. DEFAULT=http://localhost:9200"),
OPT_BOOLEAN('p', "print", &index_args->print, "Just print JSON documents to stdout."),
OPT_STRING(0, "script-file", &common_script_path, "Path to user script."),
OPT_INTEGER(0, "batch-size", &index_args->batch_size, "Index batch size. DEFAULT: 100"),
OPT_BOOLEAN('f', "force-reset", &index_args->force_reset, "Reset Elasticsearch mappings and settings. "
"(You must use this option the first time you use the index command)"),
"(You must use this option the first time you use the index command)"),
OPT_GROUP("Web options"),
OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url. DEFAULT=http://localhost:9200"),
OPT_STRING(0, "bind", &web_args->bind, "Listen on this address. DEFAULT=localhost"),
OPT_STRING(0, "port", &web_args->port, "Listen on this port. DEFAULT=4090"),
OPT_STRING(0, "bind", &web_args->listen_address, "Listen on this address. DEFAULT=localhost:4090"),
OPT_STRING(0, "auth", &web_args->credentials, "Basic auth in user:password format"),
OPT_STRING(0, "tag-auth", &web_args->tag_credentials, "Basic auth in user:password format for tagging"),
OPT_GROUP("Exec-script options"),
OPT_STRING(0, "script-file", &common_script_path, "Path to user script."),
OPT_END(),
};
@@ -233,17 +453,31 @@ int main(int argc, const char *argv[]) {
argparse_describe(&argparse, DESCRIPTION, EPILOG);
argc = argparse_parse(&argparse, argc, argv);
if (arg_version) {
printf(Version);
goto end;
}
if (LogCtx.very_verbose != 0) {
LogCtx.verbose = 1;
}
web_args->es_url = common_es_url;
index_args->es_url = common_es_url;
exec_args->es_url = common_es_url;
index_args->script_path = common_script_path;
exec_args->script_path = common_script_path;
index_args->threads = common_threads;
scan_args->threads = common_threads;
if (argc == 0) {
argparse_usage(&argparse);
return 1;
goto end;
} else if (strcmp(argv[0], "scan") == 0) {
int err = scan_args_validate(scan_args, argc, argv);
if (err != 0) {
return err;
goto end;
}
sist2_scan(scan_args);
@@ -251,7 +485,7 @@ int main(int argc, const char *argv[]) {
int err = index_args_validate(index_args, argc, argv);
if (err != 0) {
return err;
goto end;
}
sist2_index(index_args);
@@ -259,15 +493,30 @@ int main(int argc, const char *argv[]) {
int err = web_args_validate(web_args, argc, argv);
if (err != 0) {
return err;
goto end;
}
sist2_web(web_args);
} else if (strcmp(argv[0], "exec-script") == 0) {
int err = exec_args_validate(exec_args, argc, argv);
if (err != 0) {
goto end;
}
sist2_exec_script(exec_args);
} else {
fprintf(stderr, "Invalid command: '%s'\n", argv[0]);
argparse_usage(&argparse);
return 1;
goto end;
}
printf("\n");
end:
scan_args_destroy(scan_args);
index_args_destroy(index_args);
web_args_destroy(web_args);
exec_args_destroy(exec_args);
return 0;
}

View File

@@ -1,211 +0,0 @@
#include "font.h"
#include "ft2build.h"
#include "freetype/freetype.h"
#include "src/ctx.h"
__thread FT_Library library = NULL;
typedef struct text_dimensions {
unsigned int width;
unsigned int height;
unsigned int baseline;
} text_dimensions_t;
typedef struct glyph {
unsigned int top;
unsigned int height;
unsigned int width;
unsigned int descent;
unsigned int ascent;
unsigned int advance_width;
unsigned char *pixmap;
} glyph_t;
__always_inline
int kerning_offset(char c, char pc, FT_Face face) {
FT_Vector kerning;
FT_Get_Kerning(face, c, pc, FT_KERNING_DEFAULT, &kerning);
return (int) (kerning.x / 64);
}
__always_inline
glyph_t ft_glyph_to_glyph(FT_GlyphSlot slot) {
glyph_t glyph;
glyph.pixmap = slot->bitmap.buffer;
glyph.width = slot->bitmap.width;
glyph.height = slot->bitmap.rows;
glyph.top = slot->bitmap_top;
glyph.advance_width = slot->advance.x / 64;
glyph.descent = MAX(0, glyph.height - glyph.top);
glyph.ascent = MAX(0, MAX(glyph.top, glyph.height) - glyph.descent);
return glyph;
}
__always_inline
glyph_t get_glyph(char character, FT_Face face) {
}
text_dimensions_t text_dimension(char *text, FT_Face face) {
text_dimensions_t dimensions;
dimensions.width = 0;
int num_chars = (int) strlen(text);
unsigned int max_ascent = 0;
unsigned int max_descent = 0;
char pc = 0;
for (int i = 0; i < num_chars; i++) {
char c = text[i];
FT_Load_Char(face, c, 0);
glyph_t glyph = ft_glyph_to_glyph(face->glyph);
max_descent = MAX(max_descent, glyph.descent);
max_ascent = MAX(max_ascent, glyph.ascent);
int kerning_x = kerning_offset(c, pc, face);
dimensions.width += MAX(glyph.advance_width, glyph.width) + kerning_x;
pc = c;
}
dimensions.height = max_ascent + max_descent;
dimensions.baseline = max_descent;
return dimensions;
}
void draw_glyph(glyph_t *glyph, int x, int y, struct text_dimensions text_info, unsigned char *bitmap) {
unsigned int src = 0;
unsigned int dst = y * text_info.width + x;
unsigned int row_offset = text_info.width - glyph->width;
unsigned int buf_len = text_info.width * text_info.height;
for (unsigned int sy = 0; sy < glyph->height; sy++) {
for (unsigned int sx = 0; sx < glyph->width; sx++) {
if (dst < buf_len) {
bitmap[dst] |= glyph->pixmap[src];
}
src++;
dst++;
}
dst += row_offset;
}
}
void bmp_format(dyn_buffer_t *buf, text_dimensions_t dimensions, const unsigned char *bitmap) {
dyn_buffer_write_short(buf, 0x4D42); // Magic
dyn_buffer_write_int(buf, 0); // Size placeholder
dyn_buffer_write_int(buf, 0x5157); //Reserved
dyn_buffer_write_int(buf, 14 + 40 + 256 * 4); // pixels offset
dyn_buffer_write_int(buf, 40); // DIB size
dyn_buffer_write_int(buf, (int) dimensions.width);
dyn_buffer_write_int(buf, (int) dimensions.height);
dyn_buffer_write_short(buf, 1); // Color planes
dyn_buffer_write_short(buf, 8); // bits per pixel
dyn_buffer_write_int(buf, 0); // compression
dyn_buffer_write_int(buf, 0); // Ignored
dyn_buffer_write_int(buf, 3800); // hres
dyn_buffer_write_int(buf, 3800); // vres
dyn_buffer_write_int(buf, 256); // Color count
dyn_buffer_write_int(buf, 0); // Ignored
// RGBA32 Color table (Grayscale)
for (int i = 255; i >= 0; i--) {
dyn_buffer_write_int(buf, i + (i << 8) + (i << 16));
}
// Pixel array: write from bottom to top, with rows padded to multiples of 4-bytes
for (int y = (int) dimensions.height - 1; y >= 0; y--) {
for (unsigned int x = 0; x < dimensions.width; x++) {
dyn_buffer_write_char(buf, (char) bitmap[y * dimensions.width + x]);
}
while (buf->cur % 4 != 0) {
dyn_buffer_write_char(buf, 0);
}
}
// Size
*(int *) ((char *) buf->buf + 2) = buf->cur;
}
void parse_font(const char *buf, size_t buf_len, document_t *doc) {
if (library == NULL) {
FT_Init_FreeType(&library);
}
FT_Face face;
FT_Error err = FT_New_Memory_Face(library, (unsigned char *) buf, buf_len, 0, &face);
if (err != 0) {
return;
}
char font_name[1024];
if (face->style_name == NULL || *(face->style_name) == '?') {
strcpy(font_name, face->family_name);
} else {
snprintf(font_name, sizeof(font_name), "%s %s", face->family_name, face->style_name);
}
meta_line_t *meta_name = malloc(sizeof(meta_line_t) + strlen(font_name));
meta_name->key = MetaFontName;
strcpy(meta_name->strval, font_name);
APPEND_META(doc, meta_name)
int pixel = 64;
int num_chars = (int) strlen(font_name);
err = FT_Set_Pixel_Sizes(face, 0, pixel);
if (err != 0) {
return;
}
text_dimensions_t dimensions = text_dimension(font_name, face);
unsigned char *bitmap = calloc(dimensions.width * dimensions.height, 1);
FT_Vector pen;
pen.x = 0;
char pc = 0;
for (int i = 0; i < num_chars; i++) {
char c = font_name[i];
err = FT_Load_Char(face, c, FT_LOAD_NO_HINTING | FT_LOAD_RENDER);
if (err != 0) {
continue;
}
glyph_t glyph = ft_glyph_to_glyph(face->glyph);
pen.x += kerning_offset(c, pc, face);
pen.y = dimensions.height - glyph.ascent - dimensions.baseline;
draw_glyph(&glyph, pen.x, pen.y, dimensions, bitmap);
pen.x += glyph.advance_width;
pc = c;
}
dyn_buffer_t bmp_data = dyn_buffer_create();
bmp_format(&bmp_data, dimensions, bitmap);
store_write(ScanCtx.index.store, (char *) doc->uuid, sizeof(doc->uuid), (char *) bmp_data.buf, bmp_data.cur);
dyn_buffer_destroy(&bmp_data);
free(bitmap);
FT_Done_Face(face);
}

View File

@@ -1,9 +0,0 @@
#ifndef SIST2_FONT_H
#define SIST2_FONT_H
#include "src/sist.h"
void parse_font(const char * buf, size_t buf_len, document_t *doc);
#endif

View File

@@ -1,273 +0,0 @@
#include "src/sist.h"
#include "src/ctx.h"
AVCodecContext *alloc_jpeg_encoder(int dstW, int dstH, float qscale) {
AVCodec *jpeg_codec = avcodec_find_encoder(AV_CODEC_ID_MJPEG);
AVCodecContext *jpeg = avcodec_alloc_context3(jpeg_codec);
jpeg->width = dstW;
jpeg->height = dstH;
jpeg->time_base.den = 1000000;
jpeg->time_base.num = 1;
jpeg->i_quant_factor = qscale;
jpeg->pix_fmt = AV_PIX_FMT_YUVJ420P;
int ret = avcodec_open2(jpeg, jpeg_codec, NULL);
if (ret != 0) {
printf("Could not open jpeg encoder: %s!\n", av_err2str(ret));
return NULL;
}
return jpeg;
}
AVFrame *scale_frame(const AVCodecContext *decoder, const AVFrame *frame, int size) {
AVFrame *scaled_frame = av_frame_alloc();
int dstW;
int dstH;
if (frame->width <= size && frame->height <= size) {
dstW = frame->width;
dstH = frame->height;
} else {
double ratio = (double) frame->width / frame->height;
if (frame->width > frame->height) {
dstW = size;
dstH = (int) (size / ratio);
} else {
dstW = (int) (size * ratio);
dstH = size;
}
}
struct SwsContext *ctx = sws_getContext(
decoder->width, decoder->height, decoder->pix_fmt,
dstW, dstH, AV_PIX_FMT_YUVJ420P,
SWS_FAST_BILINEAR, 0, 0, 0
);
int dst_buf_len = avpicture_get_size(AV_PIX_FMT_YUVJ420P, dstW, dstH);
uint8_t *dst_buf = (uint8_t *) av_malloc(dst_buf_len);
avpicture_fill((AVPicture *) scaled_frame, dst_buf, AV_PIX_FMT_YUVJ420P, dstW, dstH);
sws_scale(ctx,
(const uint8_t *const *) frame->data, frame->linesize,
0, decoder->height,
scaled_frame->data, scaled_frame->linesize
);
scaled_frame->width = dstW;
scaled_frame->height = dstH;
scaled_frame->format = AV_PIX_FMT_YUV420P;
sws_freeContext(ctx);
return scaled_frame;
}
AVFrame *read_frame(AVFormatContext *pFormatCtx, AVCodecContext *decoder, int stream_idx) {
AVFrame *frame = av_frame_alloc();
AVPacket avPacket;
av_init_packet(&avPacket);
int receive_ret = -EAGAIN;
while (receive_ret == -EAGAIN) {
// Get video frame
while (1) {
int read_frame_ret = av_read_frame(pFormatCtx, &avPacket);
if (read_frame_ret != 0) {
if (read_frame_ret != AVERROR_EOF) {
fprintf(stderr, "Error reading frame: %s\n", av_err2str(read_frame_ret));
}
av_frame_free(&frame);
av_packet_unref(&avPacket);
return NULL;
}
//Ignore audio/other frames
if (avPacket.stream_index != stream_idx) {
av_packet_unref(&avPacket);
continue;
}
break;
}
// Feed it to decoder
int decode_ret = avcodec_send_packet(decoder, &avPacket);
if (decode_ret != 0) {
printf("Error decoding frame: %s\n", av_err2str(decode_ret));
}
av_packet_unref(&avPacket);
receive_ret = avcodec_receive_frame(decoder, frame);
}
return frame;
}
void append_audio_meta(AVFormatContext *pFormatCtx, document_t *doc) {
AVDictionaryEntry *tag = NULL;
while ((tag = av_dict_get(pFormatCtx->metadata, "", tag, AV_DICT_IGNORE_SUFFIX))) {
char *key = tag->key;
for (; *key; ++key) *key = (char) tolower(*key);
if (strcmp(tag->key, "artist") == 0) {
size_t len = strlen(tag->value);
meta_line_t *meta_tag = malloc(sizeof(meta_line_t) + len);
meta_tag->key = MetaArtist;
memcpy(meta_tag->strval, tag->value, len);
APPEND_META(doc, meta_tag)
} else if (strcmp(tag->key, "genre") == 0) {
size_t len = strlen(tag->value);
meta_line_t *meta_tag = malloc(sizeof(meta_line_t) + len);
meta_tag->key = MetaGenre;
memcpy(meta_tag->strval, tag->value, len);
APPEND_META(doc, meta_tag)
} else if (strcmp(tag->key, "title") == 0) {
size_t len = strlen(tag->value);
meta_line_t *meta_tag = malloc(sizeof(meta_line_t) + len);
meta_tag->key = MetaTitle;
memcpy(meta_tag->strval, tag->value, len);
APPEND_META(doc, meta_tag)
} else if (strcmp(tag->key, "album_artist") == 0) {
size_t len = strlen(tag->value);
meta_line_t *meta_tag = malloc(sizeof(meta_line_t) + len);
meta_tag->key = MetaAlbumArtist;
memcpy(meta_tag->strval, tag->value, len);
APPEND_META(doc, meta_tag)
} else if (strcmp(tag->key, "album") == 0) {
size_t len = strlen(tag->value);
meta_line_t *meta_tag = malloc(sizeof(meta_line_t) + len);
meta_tag->key = MetaAlbum;
memcpy(meta_tag->strval, tag->value, len);
APPEND_META(doc, meta_tag)
}
}
}
void parse_media(const char *filepath, document_t *doc) {
int video_stream = -1;
AVFormatContext *pFormatCtx = avformat_alloc_context();
if (pFormatCtx == NULL) {
fprintf(stderr, "Could not allocate AVFormatContext! %s \n", filepath);
return;
}
int res = avformat_open_input(&pFormatCtx, filepath, NULL, NULL);
if (res < 0) {
printf("ERR%s %s\n", filepath, av_err2str(res));
return;
}
avformat_find_stream_info(pFormatCtx, NULL);
for (int i = (int) pFormatCtx->nb_streams - 1; i >= 0; i--) {
AVStream *stream = pFormatCtx->streams[i];
if (stream->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) {
meta_line_t *meta_audio = malloc(sizeof(meta_line_t));
meta_audio->key = MetaMediaAudioCodec;
meta_audio->intval = stream->codecpar->codec_id;
APPEND_META(doc, meta_audio)
append_audio_meta(pFormatCtx, doc);
} else if (stream->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) {
meta_line_t *meta_vid = malloc(sizeof(meta_line_t));
meta_vid->key = MetaMediaVideoCodec;
meta_vid->intval = stream->codecpar->codec_id;
APPEND_META(doc, meta_vid)
meta_line_t *meta_w = malloc(sizeof(meta_line_t));
meta_w->key = MetaWidth;
meta_w->intval = stream->codecpar->width;
APPEND_META(doc, meta_w)
meta_line_t *meta_h = malloc(sizeof(meta_line_t));
meta_h->key = MetaHeight;
meta_h->intval = stream->codecpar->height;
APPEND_META(doc, meta_h)
video_stream = i;
}
}
if (video_stream != -1) {
AVStream *stream = pFormatCtx->streams[video_stream];
if (stream->nb_frames > 1) {
//This is a video (not a still image)
meta_line_t *meta_duration = malloc(sizeof(meta_line_t));
meta_duration->key = MetaMediaDuration;
meta_duration->longval = pFormatCtx->duration / AV_TIME_BASE;
APPEND_META(doc, meta_duration)
meta_line_t *meta_bitrate = malloc(sizeof(meta_line_t));
meta_bitrate->key = MetaMediaBitrate;
meta_bitrate->intval = pFormatCtx->bit_rate;
APPEND_META(doc, meta_bitrate)
}
if (stream->codecpar->width <= 20 || stream->codecpar->height <= 20) {
avformat_close_input(&pFormatCtx);
avformat_free_context(pFormatCtx);
return;
}
// Decoder
AVCodec *video_codec = avcodec_find_decoder(stream->codecpar->codec_id);
AVCodecContext *decoder = avcodec_alloc_context3(video_codec);
avcodec_parameters_to_context(decoder, stream->codecpar);
avcodec_open2(decoder, video_codec, NULL);
//Seek
if (stream->nb_frames > 1 && stream->codecpar->codec_id != AV_CODEC_ID_GIF) {
int seek_ret = 0;
for (int i = 20; i >= 0; i--) {
seek_ret = av_seek_frame(pFormatCtx, video_stream,
stream->duration * 0.10, 0);
if (seek_ret == 0) {
break;
}
}
}
AVFrame *frame = read_frame(pFormatCtx, decoder, video_stream);
if (frame == NULL) {
avcodec_free_context(&decoder);
avformat_close_input(&pFormatCtx);
avformat_free_context(pFormatCtx);
return;
}
// Scale frame
AVFrame *scaled_frame = scale_frame(decoder, frame, ScanCtx.tn_size);
// Encode frame to jpeg
AVCodecContext *jpeg_encoder = alloc_jpeg_encoder(scaled_frame->width, scaled_frame->height, ScanCtx.tn_qscale);
avcodec_send_frame(jpeg_encoder, scaled_frame);
AVPacket jpeg_packet;
av_init_packet(&jpeg_packet);
avcodec_receive_packet(jpeg_encoder, &jpeg_packet);
// Save thumbnail
store_write(ScanCtx.index.store, (char *) doc->uuid, sizeof(doc->uuid), (char *) jpeg_packet.data, jpeg_packet.size);
av_packet_unref(&jpeg_packet);
av_frame_free(&frame);
av_free(*scaled_frame->data);
av_frame_free(&scaled_frame);
avcodec_free_context(&jpeg_encoder);
avcodec_free_context(&decoder);
}
avformat_close_input(&pFormatCtx);
avformat_free_context(pFormatCtx);
}

View File

@@ -1,11 +0,0 @@
#ifndef SIST2_MEDIA_H
#define SIST2_MEDIA_H
#include "src/sist.h"
#define MIN_VIDEO_SIZE 1024 * 64
void parse_media(const char * filepath, document_t *doc);
#endif

View File

@@ -1,10 +1,12 @@
#include "mime.h"
unsigned int mime_get_mime_by_ext(GHashTable *ext_table, const char * ext) {
char lower[64];
char lower[8];
char *p = lower;
while ((*ext)) {
int cnt = 0;
while ((*ext) != '\0' && cnt + 1 < sizeof(lower)) {
*p++ = (char)tolower(*ext++);
cnt++;
}
*p = '\0';
return (size_t) g_hash_table_lookup(ext_table, lower);

View File

@@ -1,14 +1,14 @@
#ifndef SIST2_MIME_H
#define SIST2_MIME_H
#include "src/sist.h"
#include "../sist.h"
#define MAJOR_MIME(mime_id) (mime_id & 0x0FFF0000) >> 16
#define MAJOR_MIME(mime_id) (mime_id & 0x000F0000) >> 16
#define MIME_EMPTY 1
#define DONT_PARSE 0x80000000
#define SHOULD_PARSE(mime_id) (mime_id & DONT_PARSE) != DONT_PARSE
#define SHOULD_PARSE(mime_id) (ScanCtx.fast == 0 && (mime_id & DONT_PARSE) != DONT_PARSE && mime_id != 0)
#define PDF_MASK 0x40000000
#define IS_PDF(mime_id) (mime_id & PDF_MASK) == PDF_MASK
@@ -16,6 +16,24 @@
#define FONT_MASK 0x20000000
#define IS_FONT(mime_id) (mime_id & FONT_MASK) == FONT_MASK
#define ARC_MASK 0x10000000
#define IS_ARC(mime_id) (mime_id & ARC_MASK) == ARC_MASK
#define ARC_FILTER_MASK 0x08000000
#define IS_ARC_FILTER(mime_id) (mime_id & ARC_FILTER_MASK) == ARC_FILTER_MASK
#define DOC_MASK 0x04000000
#define IS_DOC(mime_id) (mime_id & DOC_MASK) == DOC_MASK
#define MOBI_MASK 0x02000000
#define IS_MOBI(mime_id) (mime_id & MOBI_MASK) == MOBI_MASK
#define MARKUP_MASK 0x01000000
#define IS_MARKUP(mime_id) (mime_id & MARKUP_MASK) == MARKUP_MASK
#define RAW_MASK 0x00800000
#define IS_RAW(mime_id) (mime_id & RAW_MASK) == RAW_MASK
enum major_mime {
MimeInvalid = 0,
MimeModel = 1,

File diff suppressed because it is too large Load Diff

View File

@@ -1,34 +1,41 @@
#include "parse.h"
#include "src/sist.h"
#include "src/ctx.h"
#include "mime.h"
#include "src/io/serialize.h"
__thread magic_t Magic;
#include <magic.h>
void *read_all(parse_job_t *job, const char *buf, int bytes_read, int *fd) {
void *full_buf;
#define MIN_VIDEO_SIZE 1024 * 64
#define MIN_IMAGE_SIZE 1024 * 2
if (job->info.st_size <= bytes_read) {
full_buf = malloc(job->info.st_size);
memcpy(full_buf, buf, job->info.st_size);
} else {
if (*fd == -1) {
*fd = open(job->filepath, O_RDONLY);
if (*fd == -1) {
perror("open");
printf("%s\n", job->filepath);
free(job);
return NULL;
}
}
full_buf = malloc(job->info.st_size);
memcpy(full_buf, buf, bytes_read);
int ret = read(*fd, full_buf + bytes_read, job->info.st_size - bytes_read);
if (ret == -1) {
perror("read");
int fs_read(struct vfile *f, void *buf, size_t size) {
if (f->fd == -1) {
f->fd = open(f->filepath, O_RDONLY);
if (f->fd == -1) {
LOG_ERRORF(f->filepath, "open(): [%d] %s", errno, strerror(errno))
return -1;
}
}
return full_buf;
return read(f->fd, buf, size);
}
#define CLOSE_FILE(f) if (f.close != NULL) {f.close(&f);};
void fs_close(struct vfile *f) {
if (f->fd != -1) {
close(f->fd);
}
}
void fs_reset(struct vfile *f) {
if (f->fd != -1) {
lseek(f->fd, 0, SEEK_SET);
}
}
void parse(void *arg) {
@@ -36,91 +43,131 @@ void parse(void *arg) {
parse_job_t *job = arg;
document_t doc;
if (incremental_get(ScanCtx.original_table, job->info.st_ino) == job->info.st_mtim.tv_sec) {
incremental_mark_file_for_copy(ScanCtx.copy_table, job->info.st_ino);
free(job);
int inc_ts = incremental_get(ScanCtx.original_table, job->vfile.info.st_ino);
if (inc_ts != 0 && inc_ts == job->vfile.info.st_mtim.tv_sec) {
incremental_mark_file_for_copy(ScanCtx.copy_table, job->vfile.info.st_ino);
return;
}
if (Magic == NULL) {
Magic = magic_open(MAGIC_MIME_TYPE);
magic_load(Magic, NULL);
}
doc.filepath = job->filepath;
doc.ext = (short) job->ext;
doc.base = (short) job->base;
doc.meta_head = NULL;
doc.meta_tail = NULL;
doc.mime = 0;
doc.size = job->info.st_size;
doc.ino = job->info.st_ino;
doc.mtime = job->info.st_mtim.tv_sec;
doc.size = job->vfile.info.st_size;
doc.ino = job->vfile.info.st_ino;
doc.mtime = job->vfile.info.st_mtim.tv_sec;
uuid_generate_time_safe(doc.uuid);
char *buf[PARSE_BUF_SIZE];
uuid_generate(doc.uuid);
char *buf[MAGIC_BUF_SIZE];
if (job->info.st_size == 0) {
if (LogCtx.very_verbose) {
char uuid_str[UUID_STR_LEN];
uuid_unparse(doc.uuid, uuid_str);
LOG_DEBUGF(job->filepath, "Starting parse job {%s}", uuid_str)
}
if (job->vfile.info.st_size == 0) {
doc.mime = MIME_EMPTY;
} else if (*(job->filepath + job->ext) != '\0') {
} else if (*(job->filepath + job->ext) != '\0' && (job->ext - job->base != 1)) {
doc.mime = mime_get_mime_by_ext(ScanCtx.ext_table, job->filepath + job->ext);
}
int fd = -1;
int bytes_read = 0;
if (doc.mime == 0) {
if (doc.mime == 0 && !ScanCtx.fast) {
// Get mime type with libmagic
fd = open(job->filepath, O_RDONLY);
if (fd == -1) {
perror("open");
free(job);
if (!job->vfile.is_fs_file) {
LOG_WARNING(job->filepath, "Guessing mime type with libmagic inside archive files is not currently supported");
goto abort;
}
bytes_read = job->vfile.read(&job->vfile, buf, MAGIC_BUF_SIZE);
if (bytes_read < 0) {
if (job->vfile.is_fs_file) {
LOG_ERRORF(job->filepath, "read(): [%d] %s", errno, strerror(errno))
} else {
LOG_ERRORF(job->filepath, "(virtual) read(): [%d] %s", bytes_read, archive_error_string(job->vfile.arc))
}
CLOSE_FILE(job->vfile)
return;
}
bytes_read = read(fd, buf, PARSE_BUF_SIZE);
magic_t magic = magic_open(MAGIC_MIME_TYPE);
magic_load(magic, NULL);
const char *magic_mime_str = magic_buffer(Magic, buf, bytes_read);
const char *magic_mime_str = magic_buffer(magic, buf, bytes_read);
if (magic_mime_str != NULL) {
doc.mime = mime_get_mime_by_string(ScanCtx.mime_table, magic_mime_str);
LOG_DEBUGF(job->filepath, "libmagic: %s", magic_mime_str);
if (doc.mime == 0) {
fprintf(stderr, "Couldn't find mime %s, %s!\n", magic_mime_str, job->filepath + job->base);
LOG_WARNINGF(job->filepath, "Couldn't find mime %s", magic_mime_str);
}
}
job->vfile.reset(&job->vfile);
magic_close(magic);
}
int mmime = MAJOR_MIME(doc.mime);
if (!(SHOULD_PARSE(doc.mime))) {
} else if ((mmime == MimeVideo && doc.size >= MIN_VIDEO_SIZE) || mmime == MimeAudio || mmime == MimeImage) {
// parse_media(job->filepath, &doc);
} else if (IS_RAW(doc.mime)) {
parse_raw(&ScanCtx.raw_ctx, &job->vfile, &doc);
} else if ((mmime == MimeVideo && doc.size >= MIN_VIDEO_SIZE) ||
(mmime == MimeImage && doc.size >= MIN_IMAGE_SIZE) || mmime == MimeAudio) {
parse_media(&ScanCtx.media_ctx, &job->vfile, &doc);
} else if (IS_PDF(doc.mime)) {
void *pdf_buf = read_all(job, (char *) buf, bytes_read, &fd);
parse_pdf(pdf_buf, doc.size, &doc);
parse_ebook(&ScanCtx.ebook_ctx, &job->vfile, mime_get_mime_text(doc.mime), &doc);
if (pdf_buf != buf) {
free(pdf_buf);
} else if (mmime == MimeText && ScanCtx.text_ctx.content_size > 0) {
if (IS_MARKUP(doc.mime)) {
parse_markup(&ScanCtx.text_ctx, &job->vfile, &doc);
} else {
parse_text(&ScanCtx.text_ctx, &job->vfile, &doc);
}
} else if (mmime == MimeText && ScanCtx.content_size > 0) {
// parse_text(bytes_read, &fd, (char *) buf, &doc);
} else if (IS_FONT(doc.mime)) {
// void *font_buf = read_all(job, (char *) buf, bytes_read, &fd);
// parse_font(font_buf, doc.size, &doc);
//
// if (font_buf != buf) {
// free(font_buf);
// }
parse_font(&ScanCtx.font_ctx, &job->vfile, &doc);
} else if (
ScanCtx.arc_ctx.mode != ARC_MODE_SKIP && (
IS_ARC(doc.mime) ||
(IS_ARC_FILTER(doc.mime) && should_parse_filtered_file(doc.filepath, doc.ext))
)) {
parse_archive(&ScanCtx.arc_ctx, &job->vfile, &doc);
} else if ((ScanCtx.ooxml_ctx.content_size > 0 || ScanCtx.media_ctx.tn_size > 0) && IS_DOC(doc.mime)) {
parse_ooxml(&ScanCtx.ooxml_ctx, &job->vfile, &doc);
} else if (is_cbr(&ScanCtx.comic_ctx, doc.mime) || is_cbz(&ScanCtx.comic_ctx, doc.mime)) {
parse_comic(&ScanCtx.comic_ctx, &job->vfile, &doc);
} else if (IS_MOBI(doc.mime)) {
parse_mobi(&ScanCtx.mobi_ctx, &job->vfile, &doc);
}
abort:
//Parent meta
if (!uuid_is_null(job->parent)) {
meta_line_t *meta_parent = malloc(sizeof(meta_line_t) + UUID_STR_LEN + 1);
meta_parent->key = MetaParent;
uuid_unparse(job->parent, meta_parent->str_val);
APPEND_META((&doc), meta_parent)
}
write_document(&doc);
if (fd != -1) {
close(fd);
}
free(job);
CLOSE_FILE(job->vfile)
}
void cleanup_parse() {
// noop
}

View File

@@ -1,10 +1,16 @@
#ifndef SIST2_PARSE_H
#define SIST2_PARSE_H
#include "src/sist.h"
#include "../sist.h"
#define PARSE_BUF_SIZE 4096
#define MAGIC_BUF_SIZE 4096 * 6
int fs_read(struct vfile *f, void *buf, size_t size);
void fs_close(struct vfile *f);
void fs_reset(struct vfile *f);
void parse(void *arg);
void cleanup_parse();
#endif

View File

@@ -1,142 +0,0 @@
#include "pdf.h"
#include "src/ctx.h"
__always_inline
fz_page *render_cover(fz_context *ctx, document_t *doc, fz_document *fzdoc) {
fz_page *cover = fz_load_page(ctx, fzdoc, 0);
fz_rect bounds = fz_bound_page(ctx, cover);
float scale;
float w = (float) bounds.x1 - bounds.x0;
float h = (float) bounds.y1 - bounds.y0;
if (w > h) {
scale = (float) ScanCtx.tn_size / w;
} else {
scale = (float) ScanCtx.tn_size / h;
}
fz_matrix m = fz_scale(scale, scale);
bounds = fz_transform_rect(bounds, m);
fz_irect bbox = fz_round_rect(bounds);
fz_pixmap *pixmap = fz_new_pixmap_with_bbox(ctx, ctx->colorspace->rgb, bbox, NULL, 0);
fz_clear_pixmap_with_value(ctx, pixmap, 0xFF);
fz_device *dev = fz_new_draw_device(ctx, m, pixmap);
pthread_mutex_lock(&ScanCtx.mupdf_mu);
fz_run_page(ctx, cover, dev, fz_identity, NULL);
pthread_mutex_unlock(&ScanCtx.mupdf_mu);
fz_drop_device(ctx, dev);
fz_buffer *fzbuf = fz_new_buffer_from_pixmap_as_png(ctx, pixmap, fz_default_color_params);
unsigned char *tn_buf;
size_t tn_len = fz_buffer_storage(ctx, fzbuf, &tn_buf);
store_write(ScanCtx.index.store, (char *) doc->uuid, sizeof(doc->uuid), (char *) tn_buf, tn_len);
fz_drop_pixmap(ctx, pixmap);
fz_drop_buffer(ctx, fzbuf);
return cover;
}
void fz_noop_callback(__attribute__((unused)) void *user, __attribute__((unused)) const char *message) {}
void parse_pdf(void *buf, size_t buf_len, document_t *doc) {
static int mu_is_initialized = 0;
if (!mu_is_initialized) {
pthread_mutex_init(&ScanCtx.mupdf_mu, NULL);
mu_is_initialized = 1;
}
fz_context *ctx = fz_new_context(NULL, NULL, FZ_STORE_UNLIMITED);
fz_stream *stream = NULL;
fz_document *fzdoc = NULL;
fz_var(stream);
fz_var(fzdoc);
fz_try(ctx)
{
fz_disable_icc(ctx);
fz_register_document_handlers(ctx);
//disable warnings
ctx->warn.print = fz_noop_callback;
ctx->error.print = fz_noop_callback;
stream = fz_open_memory(ctx, buf, buf_len);
fzdoc = fz_open_document_with_stream(ctx, mime_get_mime_text(doc->mime), stream);
int page_count = fz_count_pages(ctx, fzdoc);
fz_page *cover = render_cover(ctx, doc, fzdoc);
fz_stext_options opts;
text_buffer_t text_buf = text_buffer_create(ScanCtx.content_size);
for (int current_page = 0; current_page < page_count; current_page++) {
fz_page *page;
if (current_page == 0) {
page = cover;
} else {
page = fz_load_page(ctx, fzdoc, current_page);
}
fz_stext_page *stext = fz_new_stext_page(ctx, fz_bound_page(ctx, page));
fz_device *dev = fz_new_stext_device(ctx, stext, &opts);
pthread_mutex_lock(&ScanCtx.mupdf_mu);
fz_run_page_contents(ctx, page, dev, fz_identity, NULL);
pthread_mutex_unlock(&ScanCtx.mupdf_mu);
fz_drop_device(ctx, dev);
fz_stext_block *block = stext->first_block;
while (block != NULL) {
if (block->type != FZ_STEXT_BLOCK_TEXT) {
block = block->next;
continue;
}
fz_stext_line *line = block->u.t.first_line;
while (line != NULL) {
fz_stext_char *c = line->first_char;
while (c != NULL) {
if (text_buffer_append_char(&text_buf, c->c) == TEXT_BUF_FULL) {
fz_drop_page(ctx, page);
fz_drop_stext_page(ctx, stext);
goto write_loop_end;
}
c = c->next;
}
line = line->next;
}
block = block->next;
}
fz_drop_page(ctx, page);
fz_drop_stext_page(ctx, stext);
}
write_loop_end:;
text_buffer_terminate_string(&text_buf);
meta_line_t *meta_content = malloc(sizeof(meta_line_t) + text_buf.dyn_buffer.cur);
meta_content->key = MetaContent;
memcpy(meta_content->strval, text_buf.dyn_buffer.buf, text_buf.dyn_buffer.cur);
text_buffer_destroy(&text_buf);
APPEND_META(doc, meta_content)
}
fz_always(ctx)
{
fz_drop_stream(ctx, stream);
fz_drop_document(ctx, fzdoc);
fz_drop_context(ctx);
} fz_catch(ctx) {
fprintf(stderr, "Error %s %s\n", doc->filepath, ctx->error.message);
}
}

View File

@@ -1,9 +0,0 @@
#ifndef SIST2_PDF_H
#define SIST2_PDF_H
#include "src/sist.h"
void parse_pdf(void *buf, size_t buf_len, document_t *doc);
#endif

View File

@@ -1,43 +0,0 @@
#include "text.h"
#include "src/ctx.h"
void parse_text(int bytes_read, int *fd, char *buf, document_t *doc) {
char *intermediate_buf;
int intermediate_buf_len;
if (bytes_read == doc->size || bytes_read >= ScanCtx.content_size) {
int to_copy = MIN(bytes_read, ScanCtx.content_size);
intermediate_buf = malloc(to_copy);
intermediate_buf_len = to_copy;
memcpy(intermediate_buf, buf, to_copy);
} else {
if (*fd == -1) {
*fd = open(doc->filepath, O_RDONLY);
}
int to_read = MIN(ScanCtx.content_size, doc->size) - bytes_read;
intermediate_buf = malloc(to_read + bytes_read);
intermediate_buf_len = to_read + bytes_read;
if (bytes_read != 0) {
memcpy(intermediate_buf, buf, bytes_read);
}
read(*fd, intermediate_buf + bytes_read, to_read);
}
text_buffer_t text_buf = text_buffer_create(ScanCtx.content_size);
for (int i = 0; i < intermediate_buf_len; i++) {
text_buffer_append_char(&text_buf, *(intermediate_buf + i));
}
text_buffer_terminate_string(&text_buf);
meta_line_t *meta = malloc(sizeof(meta_line_t) + text_buf.dyn_buffer.cur);
meta->key = MetaContent;
strcpy(meta->strval, text_buf.dyn_buffer.buf);
text_buffer_destroy(&text_buf);
free(intermediate_buf);
APPEND_META(doc, meta)
}

View File

@@ -1,8 +0,0 @@
#ifndef SIST2_TEXT_H
#define SIST2_TEXT_H
#include "src/sist.h"
void parse_text(int bytes_read, int *fd, char *buf, document_t *doc);
#endif

View File

@@ -1,56 +1,51 @@
#ifndef SIST_H
#define SIST_H
#define UUID_STR_LEN 37
#ifndef FALSE
#define FALSE (0)
#define BOOL int
#endif
#include <glib-2.0/glib.h>
#include <unistd.h>
#ifndef TRUE
#define TRUE (!FALSE)
#endif
#undef MAX
#define MAX(a, b) (((a) > (b)) ? (a) : (b))
#undef MIN
#define MIN(a, b) (((a) < (b)) ? (a) : (b))
#ifndef PATH_MAX
#define PATH_MAX 4096
#endif
#undef ABS
#define ABS(a) (((a) < 0) ? -(a) : (a))
#define UUID_STR_LEN 37
#define UNUSED(x) __attribute__((__unused__)) x
#include "util.h"
#include "log.h"
#include "types.h"
#include "libscan/scan.h"
#include <cjson/cJSON.h>
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdarg.h>
#include <unistd.h>
#include <fcntl.h>
#include <ftw.h>
#include <uuid.h>
#include <magic.h>
#include <libavformat/avformat.h>
#include <libswscale/swscale.h>
#include <libswresample/swresample.h>
#include <libavcodec/avcodec.h>
#include <ctype.h>
#include <mupdf/fitz.h>
#include <mupdf/pdf.h>
#include "argparse/argparse.h"
#include <time.h>
#include <limits.h>
#include <pthread.h>
#include <dirent.h>
#include <sys/stat.h>
#include <wordexp.h>
#include <onion/onion.h>
#include <onion/handler.h>
#include <onion/block.h>
#include <onion/shortcuts.h>
#include <curl/curl.h>
#include <sys/types.h>
#include <errno.h>
#include <ctype.h>
#include "cJSON/cJSON.h"
#include "types.h"
#include "tpool.h"
#include "util.h"
#include "src/index/elastic.h"
#include "io/store.h"
#include "io/serialize.h"
#include "io/walk.h"
#include "parsing/parse.h"
#include "parsing/mime.h"
#include "parsing/text.h"
#include "parsing/pdf.h"
#include "parsing/media.h"
#include "parsing/font.h"
#include "index/web.h"
#include "web/serve.h"
#include "cli.h"
;
#endif

4
src/static/css/autocomplete.min.css vendored Normal file
View File

@@ -0,0 +1,4 @@
.autocomplete-suggestions { text-align: left; cursor: default; border: 1px solid #ccc; border-top: 0; background: #fff; box-shadow: -1px 1px 3px rgba(0,0,0,.1); position: absolute; display: none; z-index: 9999; max-height: 254px; overflow: hidden; overflow-y: auto; box-sizing: border-box; }
.autocomplete-suggestion { position: relative; padding: 0 .6em; line-height: 23px; white-space: nowrap; overflow: hidden; text-overflow: ellipsis; font-size: 1.02em; color: #333; }
.autocomplete-suggestion b { font-weight: normal; color: #1f8dd6; }
.autocomplete-suggestion.selected { background: #f0f0f0; }

File diff suppressed because one or more lines are too long

1
src/static/css/bricklayer.min.css vendored Normal file
View File

@@ -0,0 +1 @@
.bricklayer{display:-webkit-box;display:-webkit-flex;display:-ms-flexbox;display:flex;-webkit-box-align:start;-webkit-align-items:flex-start;-ms-flex-align:start;align-items:flex-start;-webkit-box-pack:center;-webkit-justify-content:center;-ms-flex-pack:center;justify-content:center;-webkit-flex-wrap:wrap;-ms-flex-wrap:wrap;flex-wrap:wrap}.bricklayer-column-sizer{width:100%;display:none}@media screen and (min-width:640px){.bricklayer-column-sizer{width:50%}}@media screen and (min-width:980px){.bricklayer-column-sizer{width:33.333%}}@media screen and (min-width:1200px){.bricklayer-column-sizer{width:25%}}.bricklayer-column{-webkit-box-flex:1;-webkit-flex:1;-ms-flex:1;flex:1;padding-left:5px;padding-right:5px}

540
src/static/css/dark.css Normal file
View File

@@ -0,0 +1,540 @@
*:focus {
outline: 0;
}
.info-icon {
width: 1rem;
margin-right: 0.2rem;
cursor: pointer;
line-height: 1rem;
height: 1rem;
background-image: url(data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHhtbG5zOnhsaW5rPSJodHRwOi8vd3d3LnczLm9yZy8xOTk5L3hsaW5rIiB4PSIwcHgiIHk9IjBweCIKICAgICB2aWV3Qm94PSIwIDAgNDI2LjY2NyA0MjYuNjY3IiBzdHlsZT0iZW5hYmxlLWJhY2tncm91bmQ6bmV3IDAgMCA0MjYuNjY3IDQyNi42Njc7IiBmaWxsPSIjZmZmIj4KPGc+CiAgICA8Zz4KICAgICAgICA8Zz4KICAgICAgICAgICAgPHJlY3QgeD0iMTkyIiB5PSIxOTIiIHdpZHRoPSI0Mi42NjciIGhlaWdodD0iMTI4Ii8+CiAgICAgICAgICAgIDxwYXRoIGQ9Ik0yMTMuMzMzLDBDOTUuNDY3LDAsMCw5NS40NjcsMCwyMTMuMzMzczk1LjQ2NywyMTMuMzMzLDIxMy4zMzMsMjEzLjMzM1M0MjYuNjY3LDMzMS4yLDQyNi42NjcsMjEzLjMzMwogICAgICAgICAgICAgICAgUzMzMS4yLDAsMjEzLjMzMywweiBNMjEzLjMzMywzODRjLTk0LjA4LDAtMTcwLjY2Ny03Ni41ODctMTcwLjY2Ny0xNzAuNjY3UzExOS4yNTMsNDIuNjY3LDIxMy4zMzMsNDIuNjY3CiAgICAgICAgICAgICAgICBTMzg0LDExOS4yNTMsMzg0LDIxMy4zMzNTMzA3LjQxMywzODQsMjEzLjMzMywzODR6Ii8+CiAgICAgICAgICAgIDxyZWN0IHg9IjE5MiIgeT0iMTA2LjY2NyIgd2lkdGg9IjQyLjY2NyIgaGVpZ2h0PSI0Mi42NjciLz4KICAgICAgICA8L2c+CiAgICA8L2c+CjwvZz4KPC9zdmc+Cg==);
filter: brightness(65%);
}
.info-icon:hover {
color: inherit;
}
.modal-title {
max-width: calc(100% - 2rem);
overflow: hidden;
text-overflow: ellipsis;
}
.path-row {
display: -ms-flexbox;
display: flex;
-ms-flex-align: start;
align-items: flex-start;
}
.tag-container {
margin-left: 0.3rem;
}
.path-line {
color: #BBB;
text-overflow: ellipsis;
overflow: hidden;
white-space: nowrap;
}
a {
color: #00BCD4;
}
body {
overflow-y: scroll;
background: black;
}
.progress {
margin-top: 1em;
}
.card, .modal-content {
margin-top: 1em;
background: #212121;
color: #e0e0e0;
border-radius: 1px;
border: none;
}
.table {
color: #e0e0e0;
}
.table td, .table th {
border: none;
}
.table thead th {
border-bottom: 1px solid #646464;
}
.modal-header .close {
color: #e0e0e0;
text-shadow: none;
}
.modal-header {
border-bottom: 1px solid #646464;
}
.sub-document {
background: #37474F !important;
}
.list-group-item.sub-document {
border-top: 1px solid #646464 !important;
}
.sub-document .text-muted {
color: #8a949c !important;
}
.list-group-item {
background: #212121;
color: #e0e0e0;
border-top: 1px solid #424242;
border-bottom: none;
border-left: none;
border-right: none;
padding: .25rem 0.5rem;
}
.list-group-item:first-child {
border-top: none;
}
.navbar-brand {
font-size: 1.75rem;
padding: 0;
color: #f5f5f5;
}
.navbar {
background: #546b7a;
}
a:hover,.btn:hover {
color: #fff;
}
.navbar span {
color: #eee;
}
.document {
padding: 0.5rem;
}
.document p {
margin-bottom: 0;
}
.document:hover p {
text-decoration: underline;
}
.badge-video {
color: #FFFFFF;
background-color: #F27761;
}
.badge-image {
color: #FFFFFF;
background-color: #AA99C9;
}
.badge-audio {
color: #FFFFFF;
background-color: #00ADEF;
}
.badge-resolution {
color: #212529;
background-color: #B0BEC5;
}
.badge-text {
color: #FFFFFF;
background-color: #FAAB3C;
}
.add-tag-button {
cursor: pointer;
color: #212529;
background-color: #e0e0e0;
}
.card-img-overlay {
pointer-events: none;
padding: 0.75rem;
bottom: unset;
top: 0;
left: unset;
right: unset;
}
.file-title {
width: 100%;
line-height: 1rem;
height: 1.1rem;
font-size: 10pt;
white-space: nowrap;
text-overflow: ellipsis;
overflow: hidden;
color: #00BCD4;
}
.badge {
margin-right: 3px;
}
.badge-delete {
margin-right: -2px;
margin-left: 2px;
margin-top: -1px;
font-family: monospace;
font-size: 90%;
background: rgba(0,0,0,0.2);
padding: 0.1em 0.4em;
color: white;
cursor: pointer;
}
.badge-user {
color: #212529;
background-color: #e0e0e0;
}
.fit {
display: block;
min-width: 64px;
max-width: 100%;
max-height: 175px;
margin: 0 auto 0;
padding: 3px 3px 0;
width: auto;
height: auto;
}
.fit-sm {
display: block;
max-width: 64px;
max-height: 64px;
margin: 0 auto;
width: auto;
height: auto;
}
.audio-fit {
height: 39px;
vertical-align: bottom;
display: inline;
width: 100%;
}
@media screen and (min-width: 1500px) {
.container {
max-width: 1440px;
}
.bricklayer-column-sizer {
width: 20% !important;
}
.bricklayer-column {
max-width: 20%;
}
}
@media screen and (min-width: 1800px) {
.container {
max-width: 1550px;
}
}
mark {
background: rgba(251, 191, 41, 0.25);
border-radius: 0;
padding: 1px 0;
color: inherit;
}
.content-div mark {
background: rgba(251, 191, 41, 0.40);
color: white;
}
.content-div {
font-family: SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace;
font-size: 13px;
padding: 1em;
background-color: #37474F;
border: 1px solid #616161;
border-radius: 4px;
margin: 3px;
white-space: normal;
color: rgb(224, 224, 224);
overflow: hidden;
}
.irs-single, .irs-from, .irs-to {
font-size: 13px;
background-color: #00BCD4;
}
.irs-slider {
cursor: col-resize;
}
.irs {
margin-top: 1em;
margin-bottom: 1em;
}
.custom-select {
overflow: auto;
background-color: #37474F;
border: 1px solid #616161;
color: #bdbdbd;
}
.custom-select:focus {
border-color: #757575;
outline: 0;
box-shadow: 0 0 0 .2rem rgba(0, 123, 255, .25);
}
option {
outline: none;
}
.form-control {
background-color: #37474F;
border: 1px solid #616161;
color: #fff;
}
.form-control:focus {
background-color: #546E7A;
color: #fff;
}
.input-group-text {
background: #263238;
border: 1px solid #616161;
color: #dbdbdb;
}
::placeholder {
color: #BDBDBD !important;
opacity: 1;
}
.inspire-tree .selected > .wholerow, .inspire-tree .selected > .title-wrap:hover + .wholerow {
background: none;
}
.inspire-tree .icon-expand::before, .inspire-tree .icon-collapse::before {
background-color: black;
}
.inspire-tree .title {
color: #eee;
}
.inspire-tree {
font-weight: 400;
font-size: 14px;
font-family: Helvetica, Nueue, Verdana, sans-serif;
max-height: 350px;
overflow: auto;
}
.page-indicator {
line-height: 1rem;
padding: 0.5rem;
background: #212121;
color: #eee;
margin-top: 1em;
}
.btn-xs {
padding: .1rem .3rem;
font-size: .875rem;
border-radius: .2rem;
}
.btn {
color: #eee;
}
.nav-tabs .nav-link {
color: #e0e0e0;
}
.nav-tabs .nav-item.show .nav-link, .nav-tabs .nav-link.active {
background-color: #212121;
border-color: #616161 #616161 #212121;
color: #e0e0e0;
}
.nav-tabs .nav-link:focus, .nav-tabs .nav-link:focus {
border-color: #616161 #616161 #212121;
color: #e0e0e0;
}
.nav-tabs .nav-link:focus, .nav-tabs .nav-link:hover {
border-color: #e0e0e0 #e0e0e0 #212121;
color: #e0e0e0;
}
.nav-tabs {
border-bottom: #616161;
}
.nav {
margin-top: 0.5rem;
}
@media (max-width: 800px) {
#treeTabs {
flex-basis: inherit;
flex-grow: inherit;
}
}
.list-group {
margin-top: 1em;
}
.wrapper-sm {
min-width: 64px;
}
.media-expanded {
display: inherit;
}
.media-expanded .fit {
max-height: 250px;
}
@media (max-width: 650px) {
.media-expanded .fit {
max-height: none;
}
.tagline {
display: none;
}
}
.version {
color: #00BCD4;
margin-left: -18px;
margin-top: -14px;
font-size: 11px;
}
@media (min-width: 800px) {
.small-btn {
display: none;
}
.large-btn {
display: inherit;
}
}
@media (max-width: 801px) {
.small-btn {
display: inherit;
}
.large-btn {
display: none;
}
}
#searchBar {
border-right: none;
}
#pathTree .title {
cursor: pointer;
}
svg {
fill: white;
}
.play {
position: absolute;
width: 50px;
height: 50px;
left: 50%;
top: 50%;
transform: translate(-50%, -50%);
pointer-events: none;
}
.play svg {
fill: rgba(255, 255, 255, 0.7);
}
.img-wrapper:hover svg {
fill: rgba(255, 255, 255, 1);
}
.pointer {
cursor: pointer;
}
.stats-card {
text-align: center;
margin-top: 1em;
padding: 1em;
box-shadow: 0 .125rem .25rem rgba(0, 0, 0, .08) !important;
border-radius: 0;
border: none;
background: #212121;
}
.graph {
display: inline-block;
width: 40%;
}
.full-screen {
position: absolute;
left: 0;
width: 100%;
}
.stats-btn {
float: right;
margin-bottom: 10px;
}
#graphs-card svg text {
fill: #eee;
}
.wholerow {
outline: none !important;
}
.stat > .card-body {
padding: 0.7em 1.25em;
}

1
src/static/css/jquery.toast.min.css vendored Normal file
View File

@@ -0,0 +1 @@
.jq-toast-wrap,.jq-toast-wrap *{margin:0;padding:0}.jq-toast-wrap{display:block;position:fixed;width:250px;pointer-events:none!important;letter-spacing:normal;z-index:9000!important}.jq-toast-wrap.bottom-left{bottom:20px;left:20px}.jq-toast-wrap.bottom-right{bottom:20px;right:40px}.jq-toast-wrap.top-left{top:20px;left:20px}.jq-toast-wrap.top-right{top:20px;right:40px}.jq-toast-single{display:block;width:100%;padding:10px;margin:0 0 5px;border-radius:4px;font-size:12px;font-family:arial,sans-serif;line-height:17px;position:relative;pointer-events:all!important;background-color:#444;color:#fff}.jq-toast-single h2{font-family:arial,sans-serif;font-size:14px;margin:0 0 7px;background:0 0;color:inherit;line-height:inherit;letter-spacing:normal}.jq-toast-single a{color:#eee;text-decoration:none;font-weight:700;border-bottom:1px solid #fff;padding-bottom:3px;font-size:12px}.jq-toast-single ul{margin:0 0 0 15px;background:0 0;padding:0}.jq-toast-single ul li{list-style-type:disc!important;line-height:17px;background:0 0;margin:0;padding:0;letter-spacing:normal}.close-jq-toast-single{position:absolute;top:3px;right:7px;font-size:14px;cursor:pointer}.jq-toast-loader{display:block;position:absolute;top:-2px;height:5px;width:0;left:0;border-radius:5px;background:red}.jq-toast-loaded{width:100%}.jq-has-icon{padding:10px 10px 10px 50px;background-repeat:no-repeat;background-position:10px}.jq-icon-info{background-image:url(data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABgAAAAYCAYAAADgdz34AAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAAAJcEhZcwAADsMAAA7DAcdvqGQAAAGwSURBVEhLtZa9SgNBEMc9sUxxRcoUKSzSWIhXpFMhhYWFhaBg4yPYiWCXZxBLERsLRS3EQkEfwCKdjWJAwSKCgoKCcudv4O5YLrt7EzgXhiU3/4+b2ckmwVjJSpKkQ6wAi4gwhT+z3wRBcEz0yjSseUTrcRyfsHsXmD0AmbHOC9Ii8VImnuXBPglHpQ5wwSVM7sNnTG7Za4JwDdCjxyAiH3nyA2mtaTJufiDZ5dCaqlItILh1NHatfN5skvjx9Z38m69CgzuXmZgVrPIGE763Jx9qKsRozWYw6xOHdER+nn2KkO+Bb+UV5CBN6WC6QtBgbRVozrahAbmm6HtUsgtPC19tFdxXZYBOfkbmFJ1VaHA1VAHjd0pp70oTZzvR+EVrx2Ygfdsq6eu55BHYR8hlcki+n+kERUFG8BrA0BwjeAv2M8WLQBtcy+SD6fNsmnB3AlBLrgTtVW1c2QN4bVWLATaIS60J2Du5y1TiJgjSBvFVZgTmwCU+dAZFoPxGEEs8nyHC9Bwe2GvEJv2WXZb0vjdyFT4Cxk3e/kIqlOGoVLwwPevpYHT+00T+hWwXDf4AJAOUqWcDhbwAAAAASUVORK5CYII=);background-color:#31708f;color:#d9edf7;border-color:#bce8f1}.jq-icon-warning{background-image:url(data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABgAAAAYCAYAAADgdz34AAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAAAJcEhZcwAADsMAAA7DAcdvqGQAAAGYSURBVEhL5ZSvTsNQFMbXZGICMYGYmJhAQIJAICYQPAACiSDB8AiICQQJT4CqQEwgJvYASAQCiZiYmJhAIBATCARJy+9rTsldd8sKu1M0+dLb057v6/lbq/2rK0mS/TRNj9cWNAKPYIJII7gIxCcQ51cvqID+GIEX8ASG4B1bK5gIZFeQfoJdEXOfgX4QAQg7kH2A65yQ87lyxb27sggkAzAuFhbbg1K2kgCkB1bVwyIR9m2L7PRPIhDUIXgGtyKw575yz3lTNs6X4JXnjV+LKM/m3MydnTbtOKIjtz6VhCBq4vSm3ncdrD2lk0VgUXSVKjVDJXJzijW1RQdsU7F77He8u68koNZTz8Oz5yGa6J3H3lZ0xYgXBK2QymlWWA+RWnYhskLBv2vmE+hBMCtbA7KX5drWyRT/2JsqZ2IvfB9Y4bWDNMFbJRFmC9E74SoS0CqulwjkC0+5bpcV1CZ8NMej4pjy0U+doDQsGyo1hzVJttIjhQ7GnBtRFN1UarUlH8F3xict+HY07rEzoUGPlWcjRFRr4/gChZgc3ZL2d8oAAAAASUVORK5CYII=);background-color:#8a6d3b;color:#fcf8e3;border-color:#faebcc}.jq-icon-error{background-image:url(data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABgAAAAYCAYAAADgdz34AAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAAAJcEhZcwAADsMAAA7DAcdvqGQAAAHOSURBVEhLrZa/SgNBEMZzh0WKCClSCKaIYOED+AAKeQQLG8HWztLCImBrYadgIdY+gIKNYkBFSwu7CAoqCgkkoGBI/E28PdbLZmeDLgzZzcx83/zZ2SSXC1j9fr+I1Hq93g2yxH4iwM1vkoBWAdxCmpzTxfkN2RcyZNaHFIkSo10+8kgxkXIURV5HGxTmFuc75B2RfQkpxHG8aAgaAFa0tAHqYFfQ7Iwe2yhODk8+J4C7yAoRTWI3w/4klGRgR4lO7Rpn9+gvMyWp+uxFh8+H+ARlgN1nJuJuQAYvNkEnwGFck18Er4q3egEc/oO+mhLdKgRyhdNFiacC0rlOCbhNVz4H9FnAYgDBvU3QIioZlJFLJtsoHYRDfiZoUyIxqCtRpVlANq0EU4dApjrtgezPFad5S19Wgjkc0hNVnuF4HjVA6C7QrSIbylB+oZe3aHgBsqlNqKYH48jXyJKMuAbiyVJ8KzaB3eRc0pg9VwQ4niFryI68qiOi3AbjwdsfnAtk0bCjTLJKr6mrD9g8iq/S/B81hguOMlQTnVyG40wAcjnmgsCNESDrjme7wfftP4P7SP4N3CJZdvzoNyGq2c/HWOXJGsvVg+RA/k2MC/wN6I2YA2Pt8GkAAAAASUVORK5CYII=);background-color:#a94442;color:#f2dede;border-color:#ebccd1}.jq-icon-success{background-image:url(data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABgAAAAYCAYAAADgdz34AAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAAAJcEhZcwAADsMAAA7DAcdvqGQAAADsSURBVEhLY2AYBfQMgf///3P8+/evAIgvA/FsIF+BavYDDWMBGroaSMMBiE8VC7AZDrIFaMFnii3AZTjUgsUUWUDA8OdAH6iQbQEhw4HyGsPEcKBXBIC4ARhex4G4BsjmweU1soIFaGg/WtoFZRIZdEvIMhxkCCjXIVsATV6gFGACs4Rsw0EGgIIH3QJYJgHSARQZDrWAB+jawzgs+Q2UO49D7jnRSRGoEFRILcdmEMWGI0cm0JJ2QpYA1RDvcmzJEWhABhD/pqrL0S0CWuABKgnRki9lLseS7g2AlqwHWQSKH4oKLrILpRGhEQCw2LiRUIa4lwAAAABJRU5ErkJggg==);color:#dff0d8;background-color:#3c763d;border-color:#d6e9c6}

404
src/static/css/light.css Normal file
View File

@@ -0,0 +1,404 @@
*:focus {
outline: 0;
}
.info-icon {
width: 1rem;
margin-right: 0.2rem;
cursor: pointer;
line-height: 1rem;
height: 1rem;
background-image: url(data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHhtbG5zOnhsaW5rPSJodHRwOi8vd3d3LnczLm9yZy8xOTk5L3hsaW5rIiB4PSIwcHgiIHk9IjBweCIKICAgICB2aWV3Qm94PSIwIDAgNDI2LjY2NyA0MjYuNjY3IiBzdHlsZT0iZW5hYmxlLWJhY2tncm91bmQ6bmV3IDAgMCA0MjYuNjY3IDQyNi42Njc7IiBmaWxsPSIjZmZmIj4KPGc+CiAgICA8Zz4KICAgICAgICA8Zz4KICAgICAgICAgICAgPHJlY3QgeD0iMTkyIiB5PSIxOTIiIHdpZHRoPSI0Mi42NjciIGhlaWdodD0iMTI4Ii8+CiAgICAgICAgICAgIDxwYXRoIGQ9Ik0yMTMuMzMzLDBDOTUuNDY3LDAsMCw5NS40NjcsMCwyMTMuMzMzczk1LjQ2NywyMTMuMzMzLDIxMy4zMzMsMjEzLjMzM1M0MjYuNjY3LDMzMS4yLDQyNi42NjcsMjEzLjMzMwogICAgICAgICAgICAgICAgUzMzMS4yLDAsMjEzLjMzMywweiBNMjEzLjMzMywzODRjLTk0LjA4LDAtMTcwLjY2Ny03Ni41ODctMTcwLjY2Ny0xNzAuNjY3UzExOS4yNTMsNDIuNjY3LDIxMy4zMzMsNDIuNjY3CiAgICAgICAgICAgICAgICBTMzg0LDExOS4yNTMsMzg0LDIxMy4zMzNTMzA3LjQxMywzODQsMjEzLjMzMywzODR6Ii8+CiAgICAgICAgICAgIDxyZWN0IHg9IjE5MiIgeT0iMTA2LjY2NyIgd2lkdGg9IjQyLjY2NyIgaGVpZ2h0PSI0Mi42NjciLz4KICAgICAgICA8L2c+CiAgICA8L2c+CjwvZz4KPC9zdmc+Cg==);
filter: brightness(45%);
}
.info-icon:hover {
color: inherit;
}
.modal-title {
max-width: calc(100% - 2rem);
overflow: hidden;
text-overflow: ellipsis;
}
.path-row {
display: -ms-flexbox;
display: flex;
-ms-flex-align: start;
align-items: flex-start;
}
.tag-container {
margin-left: 0.3rem;
}
.path-line {
color: #444;
text-overflow: ellipsis;
overflow: hidden;
white-space: nowrap;
}
body {
overflow-y: scroll;
}
.progress {
margin-top: 1em;
}
.card {
margin-top: 1em;
box-shadow: 0 .125rem .25rem rgba(0, 0, 0, .08) !important;
border-radius: 0;
border: none;
}
.sub-document {
background: #AB47BC1F !important;
}
.navbar-brand {
font-size: 1.75rem;
padding: 0;
}
.navbar {
background: #F7F7F7;
border-bottom: solid 1px #dfdfdf;
}
.document {
padding: 0.5rem;
}
.document p {
margin-bottom: 0;
}
.document:hover p {
text-decoration: underline;
}
.badge-video {
color: #FFFFFF;
background-color: #F27761;
}
.badge-image {
color: #FFFFFF;
background-color: #AA99C9;
}
.badge-audio {
color: #FFFFFF;
background-color: #00ADEF;
}
.badge-resolution {
color: #212529;
background-color: #FFC107;
}
.badge-user {
color: #212529;
background-color: #e0e0e0;
}
.badge {
margin-right: 3px;
}
.badge-delete {
margin-right: -2px;
margin-left: 2px;
margin-top: -1px;
font-family: monospace;
font-size: 90%;
background: rgba(0,0,0,0.2);
padding: 0.1em 0.4em;
color: white;
cursor: pointer;
}
.badge-text {
color: #FFFFFF;
background-color: #FAAB3C;
}
.add-tag-button {
cursor: pointer;
color: #212529;
background-color: #e0e0e0;
}
.card-img-overlay {
pointer-events: none;
padding: 0.75rem;
bottom: unset;
top: 0;
left: unset;
right: unset;
}
.file-title {
width: 100%;
line-height: 1rem;
height: 1.1rem;
font-size: 10pt;
white-space: nowrap;
text-overflow: ellipsis;
overflow: hidden;
}
.fit {
display: block;
min-width: 64px;
max-width: 100%;
max-height: 175px;
margin: 0 auto 0;
padding: 3px 3px 0 3px;
width: auto;
height: auto;
}
.fit-sm {
display: block;
max-width: 64px;
max-height: 64px;
margin: 0 auto 0;
width: auto;
height: auto;
}
.audio-fit {
height: 39px;
vertical-align: bottom;
display: inline;
width: 100%;
}
@media screen and (max-width: 1200px) {
.bricklayer-column {
max-width: 100%;
}
}
@media screen and (min-width: 1500px) {
.container {
max-width: 1440px;
}
.bricklayer-column-sizer {
width: 20% !important;
}
.bricklayer-column {
max-width: 20%;
}
}
@media screen and (min-width: 1800px) {
.container {
max-width: 1550px;
}
}
mark {
background: #fff217;
border-radius: 0;
padding: 1px 0;
color: inherit;
}
.content-div {
font-family: SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace;
font-size: 13px;
padding: 1em;
background-color: #f5f5f5;
border: 1px solid #ccc;
border-radius: 4px;
margin: 3px;
white-space: normal;
color: #000;
overflow: hidden;
}
.irs-single, .irs-from, .irs-to {
font-size: 13px;
}
.irs-slider {
cursor: col-resize;
}
.custom-select {
overflow: auto;
}
.irs {
margin-top: 1em;
margin-bottom: 1em;
}
.inspire-tree .selected > .wholerow, .inspire-tree .selected > .title-wrap:hover + .wholerow {
background: none;
}
.inspire-tree {
font-weight: 400;
font-size: 14px;
font-family: Helvetica, Nueue, Verdana, sans-serif;
max-height: 350px;
overflow: auto;
}
.page-indicator {
line-height: 1rem;
padding: 0.5rem;
background: #f8f9fa;
margin-top: 1em;
}
.btn-xs {
padding: .1rem .3rem;
font-size: .875rem;
border-radius: .2rem;
}
.nav {
margin-top: 0.5rem;
}
@media (max-width: 800px) {
#treeTabs {
flex-basis: inherit;
flex-grow: inherit;
}
}
.list-group {
margin-top: 1em;
}
.list-group-item {
padding: .25rem 0.5rem;
}
.wrapper-sm {
min-width: 64px;
}
.media-expanded {
display: inherit;
}
.media-expanded .fit {
max-height: 250px;
}
@media (max-width: 650px) {
.media-expanded .fit {
max-height: none;
}
.tagline {
display: none;
}
}
.version {
color: #007bff;
margin-left: -18px;
margin-top: -14px;
font-size: 11px;
}
@media (min-width: 800px) {
.small-btn {
display: none;
}
.large-btn {
display: inherit;
}
}
@media (max-width: 801px) {
.small-btn {
display: inherit;
}
.large-btn {
display: none;
}
}
#searchBar {
border-right: none;
}
#pathTree .title {
cursor: pointer;
}
.play {
position: absolute;
width: 50px;
height: 50px;
left: 50%;
top: 50%;
transform: translate(-50%, -50%);
pointer-events: none;
}
.play svg {
fill: rgba(0, 0, 0, 0.7);
}
.img-wrapper:hover svg {
fill: rgba(0, 0, 0, 1);
}
.pointer {
cursor: pointer;
}
.stats-card {
text-align: center;
margin-top: 1em;
padding: 1em;
box-shadow: 0 .125rem .25rem rgba(0, 0, 0, .08) !important;
border-radius: 0;
border: none;
background: #fff;
}
.graph {
display: inline-block;
width: 40%;
}
.full-screen {
position: absolute;
left: 0;
width: 100%;
}
.stats-btn {
float: right;
margin-bottom: 10px;
}
.wholerow {
outline: none !important;
}
.stat > .card-body {
padding: 0.7em 1.25em;
}

3
src/static/css/lity.min.css vendored Normal file
View File

@@ -0,0 +1,3 @@
/*! Lity - v2.4.0 - 2019-08-10
* http://sorgalla.com/lity/
* Copyright (c) 2015-2019 Jan Sorgalla; Licensed MIT */.lity{z-index:9990;position:fixed;top:0;right:0;bottom:0;left:0;white-space:nowrap;background:#0b0b0b;background:rgba(0,0,0,0.9);outline:none !important;opacity:0;-webkit-transition:opacity .3s ease;-o-transition:opacity .3s ease;transition:opacity .3s ease}.lity.lity-opened{opacity:1}.lity.lity-closed{opacity:0}.lity *{-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box}.lity-wrap{z-index:9990;position:fixed;top:0;right:0;bottom:0;left:0;text-align:center;outline:none !important}.lity-wrap:before{content:'';display:inline-block;height:100%;vertical-align:middle;margin-right:-0.25em}.lity-loader{z-index:9991;color:#fff;position:absolute;top:50%;margin-top:-0.8em;width:100%;text-align:center;font-size:14px;font-family:Arial,Helvetica,sans-serif;opacity:0;-webkit-transition:opacity .3s ease;-o-transition:opacity .3s ease;transition:opacity .3s ease}.lity-loading .lity-loader{opacity:1}.lity-container{z-index:9992;position:relative;text-align:left;vertical-align:middle;display:inline-block;white-space:normal;max-width:100%;max-height:100%;outline:none !important}.lity-content{z-index:9993;width:100%;-webkit-transform:scale(1);-ms-transform:scale(1);-o-transform:scale(1);transform:scale(1);-webkit-transition:-webkit-transform .3s ease;transition:-webkit-transform .3s ease;-o-transition:-o-transform .3s ease;transition:transform .3s ease;transition:transform .3s ease, -webkit-transform .3s ease, -o-transform .3s ease}.lity-loading .lity-content,.lity-closed .lity-content{-webkit-transform:scale(.8);-ms-transform:scale(.8);-o-transform:scale(.8);transform:scale(.8)}.lity-content:after{content:'';position:absolute;left:0;top:0;bottom:0;display:block;right:0;width:auto;height:auto;z-index:-1;-webkit-box-shadow:0 0 8px rgba(0,0,0,0.6);box-shadow:0 0 8px rgba(0,0,0,0.6)}.lity-close{z-index:9994;width:35px;height:35px;position:fixed;right:0;top:0;-webkit-appearance:none;cursor:pointer;text-decoration:none;text-align:center;padding:0;color:#fff;font-style:normal;font-size:35px;font-family:Arial,Baskerville,monospace;line-height:35px;text-shadow:0 1px 2px rgba(0,0,0,0.6);border:0;background:none;outline:none;-webkit-box-shadow:none;box-shadow:none}.lity-close::-moz-focus-inner{border:0;padding:0}.lity-close:hover,.lity-close:focus,.lity-close:active,.lity-close:visited{text-decoration:none;text-align:center;padding:0;color:#fff;font-style:normal;font-size:35px;font-family:Arial,Baskerville,monospace;line-height:35px;text-shadow:0 1px 2px rgba(0,0,0,0.6);border:0;background:none;outline:none;-webkit-box-shadow:none;box-shadow:none}.lity-close:active{top:1px}.lity-image img{max-width:100%;display:block;line-height:0;border:0}.lity-iframe .lity-container,.lity-youtube .lity-container,.lity-vimeo .lity-container,.lity-facebookvideo .lity-container,.lity-googlemaps .lity-container{width:100%;max-width:964px}.lity-iframe-container{width:100%;height:0;padding-top:56.25%;overflow:hidden;pointer-events:all;-webkit-transform:translateZ(0);transform:translateZ(0);-webkit-overflow-scrolling:touch}.lity-iframe-container iframe{position:absolute;display:block;top:0;left:0;width:100%;height:100%;-webkit-box-shadow:0 0 8px rgba(0,0,0,0.6);box-shadow:0 0 8px rgba(0,0,0,0.6);background:#000}.lity-hide{display:none}

Binary file not shown.

After

Width:  |  Height:  |  Size: 595 B

View File

Before

Width:  |  Height:  |  Size: 669 B

After

Width:  |  Height:  |  Size: 669 B

16
src/static/js/5_inspire-tree.min.js vendored Normal file

File diff suppressed because one or more lines are too long

1
src/static/js/7_jquery.toast.min.js vendored Normal file

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

1
src/static/js/bricklayer.min.js vendored Normal file
View File

@@ -0,0 +1 @@
!function t(e,n,r){function o(s,u){if(!n[s]){if(!e[s]){var l="function"==typeof require&&require;if(!u&&l)return l(s,!0);if(i)return i(s,!0);var a=new Error("Cannot find module '"+s+"'");throw a.code="MODULE_NOT_FOUND",a}var p=n[s]={exports:{}};e[s][0].call(p.exports,function(t){var n=e[s][1][t];return o(n?n:t)},p,p.exports,t,e,n,r)}return n[s].exports}for(var i="function"==typeof require&&require,s=0;s<r.length;s++)o(r[s]);return o}({1:[function(t,e,n){var r,o=this&&this.__extends||function(t,e){function n(){this.constructor=t}for(var r in e)e.hasOwnProperty(r)&&(t[r]=e[r]);t.prototype=null===e?Object.create(e):(n.prototype=e.prototype,new n)};!function(t){function e(t){return[].slice.call(t)}function n(t,e,n){if(window.CustomEvent)var r=new CustomEvent(e,{detail:n});else{var r=document.createEvent("CustomEvent");r.initCustomEvent(e,!0,!0,n)}return t.dispatchEvent(r)}var r={rulerClassName:"bricklayer-column-sizer",columnClassName:"bricklayer-column"},i=function(){function t(t){this.element=document.createElement("div"),this.element.className=t}return t.prototype.destroy=function(){this.element.parentNode.removeChild(this.element)},t}(),s=function(t){function e(){t.apply(this,arguments)}return o(e,t),e.prototype.getWidth=function(){this.element.setAttribute("style","\n display: block;\n visibility: hidden !important;\n top: -1000px !important;\n ");var t=this.element.offsetWidth;return this.element.removeAttribute("style"),t},e}(i),u=function(t){function e(){t.apply(this,arguments)}return o(e,t),e}(i),l=function(){function t(t,e){void 0===e&&(e=r),this.element=t,this.options=e,this.build(),this.buildResponsive()}return t.prototype.append=function(t){var n=this;if(Array.isArray(t))return void t.forEach(function(t){return n.append(t)});var r=this.findMinHeightColumn();this.elements=e(this.elements).concat([t]),this.applyPosition("append",r,t)},t.prototype.prepend=function(t){var n=this;if(Array.isArray(t))return void t.forEach(function(t){return n.prepend(t)});var r=this.findMinHeightColumn();this.elements=[t].concat(e(this.elements)),this.applyPosition("prepend",r,t)},t.prototype.on=function(t,e){return this.element.addEventListener("bricklayer."+t,e),this},t.prototype.redraw=function(){var t=this.columnCount;this.checkColumnCount(!1),this.reorderElements(t),n(this.element,"bricklayer.redraw",{columnCount:t})},t.prototype.destroy=function(){var t=this;this.ruler.destroy(),e(this.elements).forEach(function(e){return t.element.appendChild(e)}),e(this.getColumns()).forEach(function(t){return t.parentNode.removeChild(t)}),n(this.element,"bricklayer.destroy",{})},t.prototype.build=function(){this.ruler=new s(this.options.rulerClassName),this.elements=this.getElementsInOrder(),this.element.insertBefore(this.ruler.element,this.element.firstChild)},t.prototype.buildResponsive=function(){var t=this;window.addEventListener("resize",function(e){return t.checkColumnCount()}),this.checkColumnCount(),this.on("breakpoint",function(e){return t.reorderElements(e.detail.columnCount)}),this.columnCount>=1&&this.reorderElements(this.columnCount)},t.prototype.getColumns=function(){return this.element.querySelectorAll(":scope > ."+this.options.columnClassName)},t.prototype.findMinHeightColumn=function(){var t=e(this.getColumns()),n=t.map(function(t){return t.offsetHeight}),r=Math.min.apply(null,n);return t[n.indexOf(r)]},t.prototype.getElementsInOrder=function(){return this.element.querySelectorAll(":scope > *:not(."+this.options.columnClassName+"):not(."+this.options.rulerClassName+")")},t.prototype.checkColumnCount=function(t){void 0===t&&(t=!0);var e=this.getColumnCount();this.columnCount!==e&&(t&&n(this.element,"bricklayer.breakpoint",{columnCount:e}),this.columnCount=e)},t.prototype.reorderElements=function(t){var n=this;void 0===t&&(t=1),(t==1/0||1>t)&&(t=1);for(var r=e(this.elements).map(function(t){var e=t.parentNode?t.parentNode.removeChild(t):t;return e}),o=this.getColumns(),i=0;i<o.length;i++)o[i].parentNode.removeChild(o[i]);for(var i=0;t>i;i++){var s=new u(this.options.columnClassName).element;this.element.appendChild(s)}r.forEach(function(t){var e=n.findMinHeightColumn();e.appendChild(t)})},t.prototype.getColumnCount=function(){var t=this.element.offsetWidth,e=this.ruler.getWidth();return Math.round(t/e)},t.prototype.applyPosition=function(t,e,r){var o=this,i=function(i){var s=i+t.charAt(0).toUpperCase()+t.substr(1);n(o.element,"bricklayer."+s,{item:r,column:e})};switch(i("before"),t){case"append":e.appendChild(r);break;case"prepend":e.insertBefore(r,e.firstChild)}i("after")},t}();t.Container=l}(r||(r={})),function(t,n){"function"==typeof define&&define.amd?define(function(){return n()}):"undefined"!=typeof window&&t===window?t.Bricklayer=n():"object"==typeof e&&e.exports&&(e.exports=n())}("undefined"!=typeof window?window:this,function(){return r.Container})},{}]},{},[1]);

2
src/static/js/d3.v5.min.js vendored Normal file

File diff suppressed because one or more lines are too long

2
src/static/js/dom-to-image.min.js vendored Normal file

File diff suppressed because one or more lines are too long

664
src/static/js/dom.js Normal file
View File

@@ -0,0 +1,664 @@
/**
* Enable gif loading on hover
*/
function gifOver(thumbnail, hit) {
let callee = arguments.callee;
thumbnail.addEventListener("mouseover", function () {
thumbnail.mouseStayedOver = true;
window.setTimeout(function () {
if (thumbnail.mouseStayedOver) {
thumbnail.removeEventListener('mouseover', callee, false);
//Load gif
thumbnail.setAttribute("src", "f/" + hit["_id"]);
}
}, 600);
});
thumbnail.addEventListener("mouseout", function () {
//Reset timer
thumbnail.mouseStayedOver = false;
thumbnail.setAttribute("src", `t/${hit["_source"]["index"]}/${hit["_id"]}`);
})
}
function getContentHighlight(hit) {
if (hit.hasOwnProperty("highlight")) {
if (hit["highlight"].hasOwnProperty("content")) {
return hit["highlight"]["content"][0];
} else if (hit["highlight"].hasOwnProperty("content.nGram")) {
return hit["highlight"]["content.nGram"][0];
}
}
return undefined;
}
function getPathHighlight(hit) {
if (hit.hasOwnProperty("highlight")) {
if (hit["highlight"].hasOwnProperty("path.text")) {
return hit["highlight"]["path.text"][0];
} else if (hit["highlight"].hasOwnProperty("path.nGram")) {
return hit["highlight"]["path.nGram"][0];
}
}
return undefined;
}
function applyNameToTitle(hit, title, extension) {
if (hit.hasOwnProperty("highlight")) {
if (hit["highlight"].hasOwnProperty("name")) {
title.insertAdjacentHTML('afterbegin', hit["highlight"]["name"] + extension);
return;
} else if (hit["highlight"].hasOwnProperty("name.nGram")) {
title.insertAdjacentHTML('afterbegin', hit["highlight"]["name.nGram"] + extension);
return;
}
}
title.appendChild(document.createTextNode(hit["_source"]["name"] + extension));
}
function shouldPlayVideo(hit) {
const videoc = hit["_source"]["videoc"];
const mime = hit["_source"]["mime"];
return mime &&
mime.startsWith("video/") &&
!("parent" in hit["_source"]) &&
hit["_source"]["extension"] !== "mkv" &&
hit["_source"]["extension"] !== "avi" &&
videoc !== "hevc" &&
videoc !== "mpeg2video" &&
videoc !== "wmv3";
}
function shouldDisplayRawImage(hit) {
const mime = hit["_source"]["mime"];
return mime &&
mime.startsWith("image/") &&
hit["_source"]["mime"] &&
!hit["_source"]["parent"] &&
hit["_source"]["videoc"] !== "tiff" &&
hit["_source"]["videoc"] !== "raw" &&
hit["_source"]["videoc"] !== "ppm";
}
function makePlaceholder(w, h, small) {
let calc;
if (small) {
calc = w > h
? (64 / w / h) >= 100
? (64 * w / h)
: 64
: 64;
} else {
calc = w > h
? (175 / w / h) >= 272
? (175 * w / h)
: 175
: 175;
}
const el = document.createElement("div");
el.setAttribute("style", `height: ${calc}px`);
return el;
}
function ext(hit) {
return hit["_source"].hasOwnProperty("extension") && hit["_source"]["extension"] !== "" ? "." + hit["_source"]["extension"] : "";
}
function makeTitle(hit) {
let title = document.createElement("div");
title.setAttribute("class", "file-title");
let extension = ext(hit);
applyNameToTitle(hit, title, extension);
title.setAttribute("title", hit["_source"]["path"] + "/" + hit["_source"]["name"] + extension);
return title;
}
function getTags(hit, mimeCategory) {
let tags = [];
switch (mimeCategory) {
case "video":
case "image":
if (hit["_source"].hasOwnProperty("videoc") && hit["_source"]["videoc"]) {
const formatTag = document.createElement("span");
formatTag.setAttribute("class", "badge badge-pill badge-video");
formatTag.appendChild(document.createTextNode(hit["_source"]["videoc"].replace(" ", "")));
tags.push(formatTag);
}
break;
case "audio": {
if (hit["_source"].hasOwnProperty("audioc") && hit["_source"]["audioc"]) {
let formatTag = document.createElement("span");
formatTag.setAttribute("class", "badge badge-pill badge-audio");
formatTag.appendChild(document.createTextNode(hit["_source"]["audioc"]));
tags.push(formatTag);
}
}
break;
}
// User tags
if (hit["_source"].hasOwnProperty("tag")) {
hit["_source"]["tag"].forEach(tag => {
tags.push(makeUserTag(tag, hit));
})
}
return tags
}
function makeUserTag(tag, hit) {
const userTag = document.createElement("span");
userTag.setAttribute("class", "badge badge-pill badge-user");
userTag.setAttribute("title", tag.split("#")[0])
const tokens = tag.split("#");
if (tokens.length > 1) {
const bg = "#" + tokens[1];
const fg = lum(tokens[1]) > 50 ? "#000" : "#fff";
userTag.setAttribute("style", `background-color: ${bg}; color: ${fg}`);
}
const deleteButton = document.createElement("span");
deleteButton.setAttribute("class", "badge badge-pill badge-delete")
deleteButton.setAttribute("title", "Delete tag")
deleteButton.appendChild(document.createTextNode("X"));
deleteButton.addEventListener("click", () => {
deleteTag(tag, hit).then(() => {
userTag.remove();
});
});
userTag.addEventListener("mouseenter", () => userTag.appendChild(deleteButton));
userTag.addEventListener("mouseleave", () => deleteButton.remove());
const name = tokens[0].split(".")[tokens[0].split(".").length - 1];
userTag.appendChild(document.createTextNode(name));
return userTag;
}
function infoButtonCb(hit) {
return () => {
getDocumentInfo(hit["_id"]).then(doc => {
$("#modal-title").text(doc["name"] + ext(hit));
const tbody = $("<tbody>");
$("#modal-body").empty()
.append($("<table class='table table-sm'>")
.append($("<thead>")
.append($("<tr>")
.append($("<th>").text("Field"))
.append($("<th>").text("Value"))
)
)
.append(tbody)
);
const displayFields = new Set([
"mime", "size", "mtime", "path", "title", "width", "height", "duration", "audioc", "videoc",
"bitrate", "artist", "album", "album_artist", "genre", "title", "font_name", "tag", "author",
"modified_by"
]);
Object.keys(doc)
.filter(key => key.startsWith("_keyword.") || key.startsWith("_text.") || displayFields.has(key) || key.startsWith("exif_"))
.forEach(key => {
tbody.append($("<tr>")
.append($("<td>").text(key))
.append($("<td>").text(doc[key]))
);
});
if (doc.hasOwnProperty("content") && doc["content"]) {
$("#modal-body").append($("<div class='content-div'>").text(doc["content"]))
}
$("#modal").modal();
});
}
}
function createDocCard(hit) {
let docCard = document.createElement("div");
docCard.setAttribute("class", "card");
let docCardBody = document.createElement("div");
docCardBody.setAttribute("class", "card-body document");
//Title
let title = makeTitle(hit);
let isSubDocument = false;
let link = document.createElement("a");
link.setAttribute("href", "f/" + hit["_id"]);
link.setAttribute("target", "_blank");
link.style.maxWidth = "calc(100% - 1.2rem)";
link.appendChild(title);
if (hit["_source"].hasOwnProperty("parent")) {
docCard.classList.add("sub-document");
isSubDocument = true;
}
let tagContainer = document.createElement("div");
tagContainer.setAttribute("class", "card-text");
if (hit["_source"].hasOwnProperty("mime") && hit["_source"]["mime"] !== null) {
let thumbnailOverlay = null;
let imgWrapper = document.createElement("div");
imgWrapper.setAttribute("style", "position: relative");
imgWrapper.setAttribute("class", "img-wrapper");
let mimeCategory = hit["_source"]["mime"].split("/")[0];
//Thumbnail
let thumbnail = makeThumbnail(mimeCategory, hit, imgWrapper, false);
//Thumbnail overlay
switch (mimeCategory) {
case "image":
thumbnailOverlay = document.createElement("div");
thumbnailOverlay.setAttribute("class", "card-img-overlay");
//Resolution
if (hit["_source"].hasOwnProperty("width") && hit["_source"]["width"] > 32 && hit["_source"]["height"] > 32) {
let resolutionBadge = document.createElement("span");
resolutionBadge.setAttribute("class", "badge badge-resolution");
if (hit["_source"].hasOwnProperty("width")) {
resolutionBadge.appendChild(document.createTextNode(hit["_source"]["width"] + "x" + hit["_source"]["height"]));
}
thumbnailOverlay.appendChild(resolutionBadge);
}
// Hover
if (thumbnail && hit["_source"]["videoc"] === "gif" && !isSubDocument) {
gifOver(thumbnail, hit);
}
break;
case "video":
//Duration
if (hit["_source"].hasOwnProperty("duration")) {
thumbnailOverlay = document.createElement("div");
thumbnailOverlay.setAttribute("class", "card-img-overlay");
const durationBadge = document.createElement("span");
durationBadge.setAttribute("class", "badge badge-resolution");
durationBadge.appendChild(document.createTextNode(humanTime(hit["_source"]["duration"])));
thumbnailOverlay.appendChild(durationBadge);
}
}
// Tags
let tags = getTags(hit, mimeCategory);
for (let i = 0; i < tags.length; i++) {
tagContainer.appendChild(tags[i]);
}
//Content
let contentHl = getContentHighlight(hit);
if (contentHl !== undefined) {
const contentDiv = document.createElement("div");
contentDiv.setAttribute("class", "content-div");
contentDiv.insertAdjacentHTML('afterbegin', contentHl);
docCard.appendChild(contentDiv);
}
if (thumbnail !== null) {
docCard.appendChild(imgWrapper);
}
//Audio
if (mimeCategory === "audio" && hit["_source"].hasOwnProperty("audioc") && !isSubDocument) {
let audio = document.createElement("audio");
audio.setAttribute("preload", "none");
audio.setAttribute("class", "audio-fit fit");
audio.setAttribute("controls", "");
audio.setAttribute("type", hit["_source"]["mime"]);
audio.setAttribute("src", "f/" + hit["_id"]);
docCard.appendChild(audio)
}
if (thumbnailOverlay !== null) {
imgWrapper.appendChild(thumbnailOverlay);
}
}
//Size tag
let sizeTag = document.createElement("small");
sizeTag.appendChild(document.createTextNode(humanFileSize(hit["_source"]["size"])));
sizeTag.setAttribute("class", "text-muted");
tagContainer.appendChild(sizeTag);
const titleWrapper = document.createElement("div");
titleWrapper.style.display = "flex";
const infoButton = makeInfoButton(hit);
titleWrapper.appendChild(infoButton);
titleWrapper.appendChild(link);
docCardBody.appendChild(titleWrapper);
docCard.appendChild(docCardBody);
docCardBody.appendChild(tagContainer);
attachTagContainerEventListener(tagContainer, hit);
return docCard;
}
function attachTagContainerEventListener(tagContainer, hit) {
const sizeTag = Array.from(tagContainer.children).find(child => child.tagName === "SMALL");
const addTagButton = document.createElement("span");
addTagButton.setAttribute("class", "badge badge-pill add-tag-button");
addTagButton.appendChild(document.createTextNode("+Add"));
tagContainer.addEventListener("mouseenter", () => tagContainer.insertBefore(addTagButton, sizeTag));
tagContainer.addEventListener("mouseleave", () => addTagButton.remove());
addTagButton.addEventListener("click", () => {
tagBar.value = "";
currentDocToTag = hit;
currentTagCallback = tag => {
tagContainer.insertBefore(makeUserTag(tag, hit), sizeTag);
}
$("#tagModal").modal("show");
tagBar.focus();
});
}
function makeThumbnail(mimeCategory, hit, imgWrapper, small) {
if (!hit["_source"].hasOwnProperty("thumbnail")) {
return null;
}
let thumbnail = document.createElement("img");
if (small) {
thumbnail.setAttribute("class", "fit-sm");
} else {
thumbnail.setAttribute("class", "card-img-top fit");
}
thumbnail.setAttribute("src", `t/${hit["_source"]["index"]}/${hit["_id"]}`);
if (shouldDisplayRawImage(hit)) {
thumbnail.addEventListener("click", () => {
const l = lity(`f/${hit["_id"]}#.jpg`);
window.addEventListener("scroll", () => l.close());
});
thumbnail.classList.add("pointer");
} else if (shouldPlayVideo(hit)) {
thumbnail.addEventListener("click", () => lity(`f/${hit["_id"]}#.mp4`));
thumbnail.classList.add("pointer");
if (!small) {
const playOverlay = document.createElement("div");
playOverlay.innerHTML = '<svg viewBox="0 0 494.942 494.942" xmlns="http://www.w3.org/2000/svg"><path d="m35.353 0 424.236 247.471-424.236 247.471z"/></svg>';
playOverlay.classList.add("play");
imgWrapper.prepend(playOverlay);
}
}
const placeholder = makePlaceholder(hit["_source"]["width"], hit["_source"]["height"], small);
imgWrapper.appendChild(placeholder);
thumbnail.addEventListener("error", () => {
imgWrapper.remove();
});
thumbnail.addEventListener("load", () => {
placeholder.remove();
imgWrapper.appendChild(thumbnail);
});
return thumbnail;
}
function makeInfoButton(hit) {
const infoButton = document.createElement("span");
infoButton.setAttribute("class", "info-icon");
infoButton.addEventListener("click", infoButtonCb(hit));
return infoButton;
}
function createDocLine(hit) {
const mime = hit["_source"]["mime"];
let mimeCategory = mime ? mime.split("/")[0] : null;
let tags = getTags(hit, mimeCategory);
let imgWrapper = document.createElement("div");
imgWrapper.setAttribute("class", "align-self-start mr-1 wrapper-sm");
let media = document.createElement("div");
media.setAttribute("class", "media");
const line = document.createElement("div");
line.setAttribute("class", "list-group-item flex-column align-items-start");
if (hit["_source"].hasOwnProperty("parent")) {
line.classList.add("sub-document");
}
const infoButton = makeInfoButton(hit);
const title = makeTitle(hit);
let link = document.createElement("a");
link.setAttribute("href", "f/" + hit["_id"]);
link.setAttribute("target", "_blank");
link.style.maxWidth = "calc(100% - 1.2rem)";
link.appendChild(title);
const titleDiv = document.createElement("div");
const titleWrapper = document.createElement("div");
titleWrapper.style.display = "flex";
titleWrapper.appendChild(infoButton);
titleWrapper.appendChild(link);
titleDiv.appendChild(titleWrapper);
line.appendChild(media);
let thumbnail = makeThumbnail(mimeCategory, hit, imgWrapper, true);
if (thumbnail) {
media.appendChild(imgWrapper);
titleDiv.style.maxWidth = "calc(100% - 64px)";
} else {
titleDiv.style.maxWidth = "100%";
}
media.appendChild(titleDiv);
// Content
let contentHl = getContentHighlight(hit);
if (contentHl !== undefined) {
const contentDiv = document.createElement("div");
contentDiv.setAttribute("class", "content-div");
contentDiv.insertAdjacentHTML("afterbegin", contentHl);
titleDiv.appendChild(contentDiv);
}
let pathLine = document.createElement("div");
pathLine.setAttribute("class", "path-row");
let path = document.createElement("div");
path.setAttribute("class", "path-line");
path.setAttribute("title", hit["_source"]["path"] + "/");
const pathHighlight = getPathHighlight(hit);
if (pathHighlight) {
path.insertAdjacentHTML("afterbegin", pathHighlight + "/");
} else {
path.appendChild(document.createTextNode(hit["_source"]["path"] + "/"));
}
let tagContainer = document.createElement("div");
tagContainer.setAttribute("class", "tag-container");
for (let i = 0; i < tags.length; i++) {
tagContainer.appendChild(tags[i]);
}
//Size tag
let sizeTag = document.createElement("small");
sizeTag.appendChild(document.createTextNode(humanFileSize(hit["_source"]["size"])));
sizeTag.setAttribute("class", "text-muted");
tagContainer.appendChild(sizeTag);
titleDiv.appendChild(pathLine);
pathLine.appendChild(path);
pathLine.appendChild(tagContainer);
attachTagContainerEventListener(tagContainer, hit);
return line;
}
function makePreloader() {
const elem = document.createElement("div");
elem.setAttribute("class", "progress");
const bar = document.createElement("div");
bar.setAttribute("class", "progress-bar progress-bar-striped progress-bar-animated");
bar.setAttribute("style", "width: 100%");
elem.appendChild(bar);
return elem;
}
function makePageIndicator(searchResult) {
let pageIndicator = document.createElement("div");
pageIndicator.setAttribute("class", "page-indicator font-weight-light");
const totalHits = searchResult["aggregations"]["total_count"]["value"];
pageIndicator.appendChild(document.createTextNode(docCount + " / " + totalHits));
return pageIndicator;
}
function makeStatsCard(searchResult) {
let statsCard = document.createElement("div");
statsCard.setAttribute("class", "card stat");
let statsCardBody = document.createElement("div");
statsCardBody.setAttribute("class", "card-body");
// Stats
let stat = document.createElement("span");
const totalHits = searchResult["aggregations"]["total_count"]["value"];
stat.appendChild(document.createTextNode(totalHits + " results in " + searchResult["took"] + "ms"));
statsCardBody.appendChild(stat);
// Display mode
const resultMode = document.createElement("div");
resultMode.setAttribute("class", "btn-group btn-group-toggle");
resultMode.setAttribute("data-toggle", "buttons");
resultMode.style.cssFloat = "right";
const listMode = document.createElement("label");
listMode.setAttribute("class", "btn btn-primary");
listMode.setAttribute("title", "List mode");
listMode.innerHTML = '<svg width="20px" height="20px" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M80 368H16a16 16 0 0 0-16 16v64a16 16 0 0 0 16 16h64a16 16 0 0 0 16-16v-64a16 16 0 0 0-16-16zm0-320H16A16 16 0 0 0 0 64v64a16 16 0 0 0 16 16h64a16 16 0 0 0 16-16V64a16 16 0 0 0-16-16zm0 160H16a16 16 0 0 0-16 16v64a16 16 0 0 0 16 16h64a16 16 0 0 0 16-16v-64a16 16 0 0 0-16-16zm416 176H176a16 16 0 0 0-16 16v32a16 16 0 0 0 16 16h320a16 16 0 0 0 16-16v-32a16 16 0 0 0-16-16zm0-320H176a16 16 0 0 0-16 16v32a16 16 0 0 0 16 16h320a16 16 0 0 0 16-16V80a16 16 0 0 0-16-16zm0 160H176a16 16 0 0 0-16 16v32a16 16 0 0 0 16 16h320a16 16 0 0 0 16-16v-32a16 16 0 0 0-16-16z"></path></svg>';
const gridMode = document.createElement("label");
gridMode.setAttribute("class", "btn btn-primary");
gridMode.setAttribute("title", "Grid mode");
gridMode.innerHTML = '<svg width="20px" height="20px" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M149.333 56v80c0 13.255-10.745 24-24 24H24c-13.255 0-24-10.745-24-24V56c0-13.255 10.745-24 24-24h101.333c13.255 0 24 10.745 24 24zm181.334 240v-80c0-13.255-10.745-24-24-24H205.333c-13.255 0-24 10.745-24 24v80c0 13.255 10.745 24 24 24h101.333c13.256 0 24.001-10.745 24.001-24zm32-240v80c0 13.255 10.745 24 24 24H488c13.255 0 24-10.745 24-24V56c0-13.255-10.745-24-24-24H386.667c-13.255 0-24 10.745-24 24zm-32 80V56c0-13.255-10.745-24-24-24H205.333c-13.255 0-24 10.745-24 24v80c0 13.255 10.745 24 24 24h101.333c13.256 0 24.001-10.745 24.001-24zm-205.334 56H24c-13.255 0-24 10.745-24 24v80c0 13.255 10.745 24 24 24h101.333c13.255 0 24-10.745 24-24v-80c0-13.255-10.745-24-24-24zM0 376v80c0 13.255 10.745 24 24 24h101.333c13.255 0 24-10.745 24-24v-80c0-13.255-10.745-24-24-24H24c-13.255 0-24 10.745-24 24zm386.667-56H488c13.255 0 24-10.745 24-24v-80c0-13.255-10.745-24-24-24H386.667c-13.255 0-24 10.745-24 24v80c0 13.255 10.745 24 24 24zm0 160H488c13.255 0 24-10.745 24-24v-80c0-13.255-10.745-24-24-24H386.667c-13.255 0-24 10.745-24 24v80c0 13.255 10.745 24 24 24zM181.333 376v80c0 13.255 10.745 24 24 24h101.333c13.255 0 24-10.745 24-24v-80c0-13.255-10.745-24-24-24H205.333c-13.255 0-24 10.745-24 24z"></path></svg>';
resultMode.appendChild(gridMode);
resultMode.appendChild(listMode);
if (CONF.options.display === "grid") {
gridMode.classList.add("active")
} else {
listMode.classList.add("active")
}
gridMode.addEventListener("click", () => {
CONF.options.display = "grid";
CONF.save();
searchDebounced();
});
listMode.addEventListener("click", () => {
CONF.options.display = "list";
CONF.save();
searchDebounced();
});
statsCardBody.appendChild(resultMode);
// Sort mode
const sortMode = document.createElement("div");
sortMode.setAttribute("class", "dropdown");
sortMode.style.cssFloat = "right";
sortMode.style.marginRight = "10px";
const sortModeBtn = document.createElement("button");
sortModeBtn.setAttribute("class", "btn btn-md btn-primary dropdown-toggle");
sortModeBtn.setAttribute("id", "sortModeBtn");
sortModeBtn.setAttribute("type", "button");
sortModeBtn.setAttribute("data-toggle", "dropdown");
sortModeBtn.setAttribute("aria-haspopup", "true");
sortModeBtn.setAttribute("aria-expanded", "false");
sortModeBtn.setAttribute("title", "Sort options");
sortModeBtn.innerHTML = '<svg aria-hidden="true" width="20px" height="20px" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 320 512"><path fill="currentColor" d="M41 288h238c21.4 0 32.1 25.9 17 41L177 448c-9.4 9.4-24.6 9.4-33.9 0L24 329c-15.1-15.1-4.4-41 17-41zm255-105L177 64c-9.4-9.4-24.6-9.4-33.9 0L24 183c-15.1 15.1-4.4 41 17 41h238c21.4 0 32.1-25.9 17-41z"></path></svg>';
const sortModeMenu = document.createElement("div");
sortModeMenu.setAttribute("class", "dropdown-menu");
sortModeMenu.setAttribute("aria-labelledby", "sortModeBtn");
Object.keys(SORT_MODES).forEach(mode => {
const item = document.createElement("div");
item.setAttribute("class", "dropdown-item");
item.appendChild(document.createTextNode(SORT_MODES[mode].text));
sortModeMenu.appendChild(item);
item.onclick = function () {
CONF.options.sort = mode;
CONF.save();
searchDebounced();
};
});
sortMode.appendChild(sortModeBtn);
sortMode.appendChild(sortModeMenu);
statsCardBody.appendChild(sortMode);
if (totalHits !== 0) {
let sizeStat = document.createElement("div");
sizeStat.appendChild(document.createTextNode(humanFileSize(searchResult["aggregations"]["total_size"]["value"])));
statsCardBody.appendChild(sizeStat);
}
statsCard.appendChild(statsCardBody);
return statsCard;
}
function makeResultContainer() {
let resultContainer = document.createElement("div");
if (CONF.options.display === "grid") {
resultContainer.setAttribute("class", "bricklayer");
} else {
resultContainer.setAttribute("class", "list-group");
}
return resultContainer;
}

5
src/static/js/lity.min.js vendored Normal file

File diff suppressed because one or more lines are too long

871
src/static/js/search.js Normal file
View File

@@ -0,0 +1,871 @@
const SIZE = 40;
let mimeMap = [];
let tagMap = [];
let mimeTree;
let tagTree;
let searchBar = document.getElementById("searchBar");
let pathBar = document.getElementById("pathBar");
let tagBar = document.getElementById("tagBar");
let currentDocToTag = null;
let currentTagCallback = null;
let lastDoc = null;
let reachedEnd = false;
let docCount = 0;
let coolingDown = false;
let searchBusy = true;
let selectedIndices = [];
let indexMap = {};
let size_min = 0;
let size_max = 10000000000000;
let date_min = null;
let date_max = null;
SORT_MODES = {
score: {
text: "Relevance",
mode: [
{_score: {order: "desc"}},
{_tie: {order: "asc"}}
],
key: hit => hit["_score"]
},
date_asc: {
text: "Date (Ascending)", mode: [
{mtime: {order: "asc"}},
{_tie: {order: "asc"}}
],
key: hit => hit["_source"]["mtime"]
},
date_desc: {
text: "Date (Descending)", mode: [
{mtime: {order: "desc"}},
{_tie: {order: "asc"}}
],
key: hit => hit["_source"]["mtime"]
},
size_asc: {
text: "Size (Ascending)", mode: [
{size: {order: "asc"}},
{_tie: {order: "asc"}}
],
key: hit => hit["_source"]["size"]
},
size_desc: {
text: "Size (Descending)", mode: [
{size: {order: "desc"}},
{_tie: {order: "asc"}}
],
key: hit => hit["_source"]["size"]
},
};
function showEsError() {
$.toast({
heading: "Elasticsearch connection error",
text: "sist2 web module encountered an error while connecting " +
"to Elasticsearch. See server logs for more information.",
stack: false,
bgColor: "#a94442",
textColor: "#f2dede",
position: 'bottom-right',
hideAfter: false
});
}
window.onload = () => {
CONF.load();
new autoComplete({
selector: '#pathBar',
minChars: 1,
delay: 400,
renderItem: function (item) {
return '<div class="autocomplete-suggestion" data-val="' + item + '">' + item + '</div>';
},
source: async function (term, suggest) {
if (!CONF.options.suggestPath) {
return []
}
term = term.toLowerCase();
const choices = await getPathChoices();
let matches = [];
for (let i = 0; i < choices.length; i++) {
if (~choices[i].toLowerCase().indexOf(term)) {
matches.push(choices[i]);
}
}
suggest(matches.sort());
},
onSelect: function () {
searchDebounced();
}
});
searchBar.addEventListener("keyup", searchDebounced);
pathBar.addEventListener("keyup", e => {
if (e.key === "Enter") {
searchDebounced();
}
});
new autoComplete({
selector: '#tagBar',
minChars: 1,
delay: 200,
renderItem: function (item) {
return '<div class="autocomplete-suggestion" data-val="' + item + '">' + item.split("#")[0] + '</div>';
},
source: async function (term, suggest) {
term = term.toLowerCase();
const choices = await getTagChoices();
let matches = [];
for (let i = 0; i < choices.length; i++) {
if (~choices[i].toLowerCase().indexOf(term)) {
matches.push(choices[i]);
}
}
suggest(matches.sort());
},
onSelect: function (e, item) {
const name = item.split("#")[0];
const color = "#" + item.split("#")[1];
$("#tag-color").val(color);
$("#tag-color").trigger("keyup", color);
tagBar.value = name;
e.preventDefault();
}
});
[tagBar, document.getElementById("tag-color")].forEach(elem => {
elem.addEventListener("keyup", e => {
if (e.key === "Enter" && tagBar.value.length > 0) {
const tag = tagBar.value + document.getElementById("tag-color").value;
saveTag(tag, currentDocToTag).then(() => currentTagCallback(tag));
}
});
})
$("#tag-color").colorpicker({
format: "hex",
sliders: {
saturation: {
selector: '.colorpicker-saturation',
callLeft: 'setSaturationRatio',
callTop: 'setValueRatio'
},
hue: {
selector: '.colorpicker-hue',
maxLeft: 0,
callLeft: false,
callTop: 'setHueRatio'
}
}
});
};
function saveTag(tag, hit) {
const relPath = hit["_source"]["path"] + "/" + hit["_source"]["name"] + ext(hit);
return $.jsonPost("/tag/" + hit["_source"]["index"], {
delete: false,
name: tag,
doc_id: hit["_id"],
relpath: relPath
}).then(() => {
tagBar.blur();
$("#tagModal").modal("hide");
$.toast({
heading: "Tag added",
text: "Tag saved to index storage and updated in ElasticSearch",
stack: 3,
bgColor: "#00a4bc",
textColor: "#fff",
position: 'bottom-right',
hideAfter: 3000,
loaderBg: "#08c7e8",
});
})
}
function deleteTag(tag, hit) {
const relPath = hit["_source"]["path"] + "/" + hit["_source"]["name"] + ext(hit);
return $.jsonPost("/tag/" + hit["_source"]["index"], {
delete: true,
name: tag,
doc_id: hit["_id"],
relpath: relPath
}).then(() => {
$.toast({
heading: "Tag deleted",
text: "Tag deleted index storage and updated in ElasticSearch",
stack: 3,
bgColor: "#00a4bc",
textColor: "#fff",
position: 'bottom-right',
hideAfter: 3000,
loaderBg: "#08c7e8",
});
})
}
function toggleFuzzy() {
searchDebounced();
}
$.get("i").then(resp => {
const urlIndices = (new URLSearchParams(location.search)).get("i");
resp["indices"].forEach(idx => {
indexMap[idx.id] = idx.name;
const opt = $("<option>")
.attr("value", idx.id)
.append(idx.name);
if (urlIndices) {
if (urlIndices.split(",").indexOf(idx.name) !== -1) {
opt.attr("selected", true);
selectedIndices.push(idx.id);
}
} else if (!idx.name.includes("(nsfw)")) {
opt.attr("selected", true);
selectedIndices.push(idx.id);
}
$("#indices").append(opt);
});
createPathTree("#pathTree");
});
function getDocumentInfo(id) {
return $.getJSON("d/" + id).fail(showEsError)
}
function handleTreeClick(tree) {
return (node, e) => {
if (e !== "checked") {
return
}
if (node.id === "any") {
if (!node.itree.state.checked) {
tree.deselect();
}
} else {
tree.node("any").deselect();
}
searchDebounced();
}
}
$.jsonPost("es", {
aggs: {
mimeTypes: {
terms: {
field: "mime",
size: 10000
}
}
},
size: 0,
}).then(resp => {
resp["aggregations"]["mimeTypes"]["buckets"].sort((a, b) => a.key > b.key).forEach(bucket => {
let tmp = bucket["key"].split("/");
let category = tmp[0];
let mime = tmp[1];
let category_exists = false;
let child = {
"id": bucket["key"],
"text": `${mime} (${bucket["doc_count"]})`
};
mimeMap.forEach(node => {
if (node.text === category) {
node.children.push(child);
category_exists = true;
}
});
if (!category_exists) {
mimeMap.push({"text": category, children: [child]});
}
});
mimeMap.push({"text": "All", "id": "any"});
mimeTree = new InspireTree({
selection: {
mode: 'checkbox'
},
data: mimeMap
});
new InspireTreeDOM(mimeTree, {
target: '#mimeTree'
});
mimeTree.on("node.state.changed", handleTreeClick(mimeTree));
mimeTree.deselect();
mimeTree.node("any").select();
});
// Tags tree
$.jsonPost("es", {
aggs: {
tags: {
terms: {
field: "tag",
size: 10000
}
}
},
size: 0,
}).then(resp => {
resp["aggregations"]["tags"]["buckets"]
.sort((a, b) => a["key"].localeCompare(b["key"]))
.forEach(bucket => {
addTag(tagMap, bucket["key"], bucket["key"], bucket["doc_count"])
});
tagMap.push({"text": "All", "id": "any"});
tagTree = new InspireTree({
selection: {
mode: 'checkbox'
},
checkbox: {
autoCheckChildren: false
},
data: tagMap
});
new InspireTreeDOM(tagTree, {
target: '#tagTree'
});
tagTree.on("node.state.changed", handleTreeClick(tagTree));
tagTree.node("any").select();
searchBusy = false;
});
function addTag(map, tag, id, count) {
// let tags = tag.split("#")[0].split(".");
let tags = tag.split(".");
let child = {
id: id,
values: [id],
count: count,
text: tags.length !== 1 ? tags[0] : `${tags[0].split("#")[0]} (${count})`,
name: tags[0],
children: [],
isLeaf: tags.length === 1,
//Overwrite base functions
blur: function () {
},
select: function () {
this.state("selected", true);
return this.check()
},
deselect: function () {
this.state("selected", false);
return this.uncheck()
},
uncheck: function () {
if (!this.isLeaf) {
return;
}
baseStateChange('checked', false, 'unchecked', this, false);
this.state('indeterminate', false);
if (this.hasParent()) {
this.getParent().refreshIndeterminateState();
}
this._tree.end();
return this;
},
check: function () {
if (!this.isLeaf) {
return;
}
baseStateChange('checked', true, 'checked', this, false);
if (this.hasParent()) {
this.getParent().refreshIndeterminateState();
}
this._tree.end();
return this;
}
};
let found = false;
map.forEach(node => {
if (node.name.split("#")[0] === child.name.split("#")[0]) {
found = true;
if (tags.length !== 1) {
addTag(node.children, tags.slice(1).join("."), id, count);
} else {
// Same name, different color
node.count += count;
node.text = `${tags[0].split("#")[0]} (${node.count})`;
node.values.push(id);
}
}
});
if (!found) {
if (tags.length !== 1) {
addTag(child.children, tags.slice(1).join("."), id, count);
map.push(child);
} else {
map.push(child);
}
}
}
function insertHits(resultContainer, hits) {
for (let i = 0; i < hits.length; i++) {
if (CONF.options.display === "grid") {
resultContainer._brick.append(createDocCard(hits[i]));
} else {
resultContainer.appendChild(createDocLine(hits[i]));
}
docCount++;
}
}
window.addEventListener("scroll", function () {
if (!searchBusy) {
let threshold = 400;
if ((window.innerHeight + window.scrollY) >= document.body.offsetHeight - threshold) {
if (!reachedEnd) {
coolingDown = true;
search(lastDoc);
}
}
}
});
function getSelectedNodes(tree) {
let selectedNodes = [];
let selected = tree.selected();
for (let i = 0; i < selected.length; i++) {
if (selected[i].id === "any") {
return ["any"]
}
//Only get children
if (selected[i].text.indexOf("(") !== -1) {
if (selected[i].values) {
selectedNodes.push(selected[i].values);
} else {
selectedNodes.push(selected[i].id);
}
}
}
return selectedNodes
}
function search(after = null) {
lastDoc = null;
if (searchBusy) {
return;
}
searchBusy = true;
let searchResults = document.getElementById("searchResults");
//Clear old search results
let preload;
if (!after) {
while (searchResults.firstChild) {
searchResults.removeChild(searchResults.firstChild);
}
preload = makePreloader();
searchResults.appendChild(preload);
}
let query = searchBar.value;
let empty = query === "";
let condition = empty ? "should" : "must";
let filters = [
{range: {size: {gte: size_min, lte: size_max}}},
{terms: {index: selectedIndices}}
];
let fields = [
"name^8",
"content^3",
"album^8", "artist^8", "title^8", "genre^2", "album_artist^8",
"font_name^6"
];
if (CONF.options.searchInPath) {
fields.push("path.text^5");
}
if ($("#fuzzyToggle").prop("checked")) {
fields.push("content.nGram");
if (CONF.options.searchInPath) {
fields.push("path.nGram");
}
fields.push("name.nGram^3");
}
let path = pathBar.value.replace(/\/$/, "").toLowerCase(); //remove trailing slashes
if (path !== "") {
filters.push({term: {path: path}})
}
let mimeTypes = getSelectedNodes(mimeTree);
if (!mimeTypes.includes("any")) {
filters.push({terms: {"mime": mimeTypes}});
}
let tags = getSelectedNodes(tagTree);
if (!tags.includes("any")) {
tags.forEach(tagGroup => {
filters.push({terms: {"tag": tagGroup}})
})
}
if (date_min && date_max) {
filters.push({range: {mtime: {gte: date_min, lte: date_max}}})
} else if (date_min) {
filters.push({range: {mtime: {gte: date_min}}})
} else if (date_max) {
filters.push({range: {mtime: {lte: date_max}}})
}
let q = {
"_source": {
excludes: ["content", "_tie"]
},
query: {
bool: {
[condition]: {
simple_query_string: {
query: query,
fields: fields,
default_operator: "and"
}
},
filter: filters
}
},
"sort": SORT_MODES[CONF.options.sort].mode,
aggs:
{
total_size: {"sum": {"field": "size"}},
total_count: {"value_count": {"field": "size"}}
},
size: SIZE,
};
if (after) {
q.search_after = [SORT_MODES[CONF.options.sort].key(after), after["_id"]];
}
if (CONF.options.highlight) {
q.highlight = {
pre_tags: ["<mark>"],
post_tags: ["</mark>"],
fragment_size: CONF.options.fragmentSize,
number_of_fragments: 1,
order: "score",
fields: {
content: {},
// "content.nGram": {},
name: {},
"name.nGram": {},
font_name: {},
}
};
if (CONF.options.searchInPath) {
q.highlight.fields["path.text"] = {};
q.highlight.fields["path.nGram"] = {};
}
}
$.jsonPost("es", q).then(searchResult => {
let hits = searchResult["hits"]["hits"];
if (hits) {
lastDoc = hits[hits.length - 1];
}
hits.forEach(hit => {
hit["_source"]["name"] = strUnescape(hit["_source"]["name"]);
hit["_source"]["path"] = strUnescape(hit["_source"]["path"]);
});
if (!after) {
preload.remove();
searchResults.appendChild(makeStatsCard(searchResult));
} else {
let pageIndicator = makePageIndicator(searchResult);
searchResults.appendChild(pageIndicator);
}
//Setup page
let resultContainer = makeResultContainer();
searchResults.appendChild(resultContainer);
if (CONF.options.display === "grid") {
resultContainer._brick = new Bricklayer(resultContainer);
}
if (!after) {
docCount = 0;
}
reachedEnd = hits.length !== SIZE;
insertHits(resultContainer, hits);
searchBusy = false;
});
}
let searchDebounced = _.debounce(function () {
coolingDown = false;
search()
}, 500);
//Size slider
$("#sizeSlider").ionRangeSlider({
type: "double",
grid: false,
force_edges: true,
min: 0,
max: 3684.03149864,
from: 0,
to: 3684.03149864,
min_interval: 5,
drag_interval: true,
prettify: function (num) {
if (num === 0) {
return "0 B"
} else if (num >= 3684) {
return humanFileSize(num * num * num) + "+";
}
return humanFileSize(num * num * num)
},
onChange: function (e) {
size_min = (e.from * e.from * e.from);
size_max = (e.to * e.to * e.to);
if (e.to >= 3684) {
size_max = 10000000000000;
}
searchDebounced();
}
});
//Date slider
$.jsonPost("es", {
aggs: {
date_min: {min: {field: "mtime"}},
date_max: {max: {field: "mtime"}},
},
size: 0
}).then(resp => {
$("#dateSlider").ionRangeSlider({
type: "double",
grid: false,
force_edges: true,
min: resp["aggregations"]["date_min"]["value"],
max: resp["aggregations"]["date_max"]["value"],
from: resp["aggregations"]["date_min"]["value"],
to: (Date.now() / 1000),
min_interval: 3600 * 24 * 7,
step: 3600 * 24,
drag_interval: true,
prettify: function (num) {
let date = (new Date(num * 1000));
return date.getUTCFullYear() + "-" + ("0" + (date.getUTCMonth() + 1)).slice(-2) + "-" + ("0" + date.getUTCDate()).slice(-2)
},
onFinish: function (e) {
date_min = e.from === e.min ? null : e.from;
date_max = e.to === e.max ? null : e.to;
searchDebounced();
}
});
})
function updateIndices() {
let selected = $('#indices').find('option:selected');
selectedIndices = [];
$(selected).each(function () {
selectedIndices.push($(this).val());
});
searchDebounced();
}
document.getElementById("indices").addEventListener("change", updateIndices);
updateIndices();
window.onkeyup = function (e) {
if (e.key === "/" || e.key === "Escape") {
const bar = document.getElementById("searchBar");
bar.scrollIntoView();
bar.focus();
}
};
function getNextDepth(node) {
let q = {
query: {
bool: {
filter: [
{term: {index: node.index}},
{range: {_depth: {gte: node.depth + 1, lte: node.depth + 3}}},
]
}
},
aggs: {
paths: {
terms: {
field: "path",
size: 10000
}
}
},
size: 0
};
if (node.depth > 0) {
q.query.bool.must = {
prefix: {
path: node.id,
}
};
}
return $.jsonPost("es", q).then(resp => {
const buckets = resp["aggregations"]["paths"]["buckets"];
if (!buckets) {
return false;
}
const paths = [];
return buckets
.filter(bucket => bucket.key.length > node.id.length || node.id.startsWith("/"))
.sort((a, b) => a.key > b.key)
.map(bucket => {
if (paths.some(n => bucket.key.startsWith(n))) {
return null;
}
const name = node.id.startsWith("/") ? bucket.key : bucket.key.slice(node.id.length + 1);
paths.push(bucket.key);
return {
id: bucket.key,
text: `${name}/ (${bucket.doc_count})`,
depth: node.depth + 1,
index: node.index,
values: [bucket.key],
children: true,
}
}).filter(x => x !== null)
})
}
function handlePathTreeClick(tree) {
return (event, node, handler) => {
if (node.depth !== 0) {
$("#pathBar").val(node.id);
$("#pathTreeModal").modal("hide");
searchDebounced();
}
handler();
}
}
function createPathTree(target) {
let pathTree = new InspireTree({
data: function (node, resolve, reject) {
return getNextDepth(node);
},
sort: "text"
});
selectedIndices.forEach(index => {
pathTree.addNode({
id: "/" + index,
values: ["/" + index],
text: `/[${indexMap[index]}]`,
index: index,
depth: 0,
children: true
})
});
new InspireTreeDOM(pathTree, {
target: target
});
pathTree.on("node.click", handlePathTreeClick(pathTree));
}
function getPathChoices() {
return new Promise(getPaths => {
$.jsonPost("es", {
suggest: {
path: {
prefix: pathBar.value,
completion: {
field: "suggest-path",
skip_duplicates: true,
size: 10000
}
}
}
}).then(resp => getPaths(resp["suggest"]["path"][0]["options"].map(opt => opt["_source"]["path"])));
});
}
function getTagChoices() {
return new Promise(getPaths => {
$.jsonPost("es", {
suggest: {
tag: {
prefix: tagBar.value,
completion: {
field: "suggest-tag",
skip_duplicates: true,
size: 10000
}
}
}
}).then(resp => {
const result = [];
resp["suggest"]["tag"][0]["options"].map(opt => opt["_source"]["tag"]).forEach(tags => {
tags.forEach(tag => {
const t = tag.split("#")[0];
if (!result.find(x => x.split("#")[0] === t)) {
result.push(tag);
}
});
});
getPaths(result);
});
});
}

205
src/static/js/util.js Normal file
View File

@@ -0,0 +1,205 @@
/**
* https://stackoverflow.com/questions/10420352
*/
function humanFileSize(bytes) {
if (bytes === 0) {
return "0 B"
}
let thresh = 1000;
if (Math.abs(bytes) < thresh) {
return bytes + ' B';
}
let units = ['k', 'M', 'G', 'T', 'P', 'E', 'Z', 'Y'];
let u = -1;
do {
bytes /= thresh;
++u;
} while (Math.abs(bytes) >= thresh && u < units.length - 1);
return bytes.toFixed(1) + units[u];
}
/**
* https://stackoverflow.com/questions/6312993
*/
function humanTime(sec_num) {
sec_num = Math.floor(sec_num);
let hours = Math.floor(sec_num / 3600);
let minutes = Math.floor((sec_num - (hours * 3600)) / 60);
let seconds = sec_num - (hours * 3600) - (minutes * 60);
if (hours < 10) {
hours = "0" + hours;
}
if (minutes < 10) {
minutes = "0" + minutes;
}
if (seconds < 10) {
seconds = "0" + seconds;
}
return hours + ":" + minutes + ":" + seconds;
}
function debounce(func, wait) {
let timeout;
return function () {
let context = this, args = arguments;
let later = function () {
timeout = null;
func.apply(context, args);
};
clearTimeout(timeout);
timeout = setTimeout(later, wait);
func.apply(context, args);
};
}
function lum(c) {
c = c.substring(1);
let rgb = parseInt(c, 16);
let r = (rgb >> 16) & 0xff;
let g = (rgb >> 8) & 0xff;
let b = (rgb >> 0) & 0xff;
return 0.2126 * r + 0.7152 * g + 0.0722 * b;
}
function strUnescape(str) {
let result = "";
for (let i = 0; i < str.length; i++) {
const c = str[i];
const next = str[i+1];
if (c === ']') {
if (next === ']') {
result += c;
i += 1;
} else {
result += String.fromCharCode(parseInt(str.slice(i, i + 2), 16));
i += 2;
}
} else {
result += c;
}
}
return result;
}
const CONF = new Settings();
const _defaults = {
display: "grid",
fuzzy: true,
highlight: true,
sort: "score",
searchInPath: false,
treemapType: "cascaded",
treemapTiling: "squarify",
treemapGroupingDepth: 3,
treemapColor: "PuBuGn",
treemapSize: "large",
suggestPath: true,
fragmentSize: 100
};
function loadSettings() {
CONF.load();
$("#settingDisplay").val(CONF.options.display);
$("#settingFuzzy").prop("checked", CONF.options.fuzzy);
$("#settingHighlight").prop("checked", CONF.options.highlight);
$("#settingSearchInPath").prop("checked", CONF.options.searchInPath);
$("#settingTreemapTiling").val(CONF.options.treemapTiling);
$("#settingTreemapGroupingDepth").val(CONF.options.treemapGroupingDepth);
$("#settingTreemapColor").val(CONF.options.treemapColor);
$("#settingTreemapSize").val(CONF.options.treemapSize);
$("#settingTreemapType").val(CONF.options.treemapType);
$("#settingSuggestPath").prop("checked", CONF.options.suggestPath);
$("#settingFragmentSize").val(CONF.options.fragmentSize);
}
function Settings() {
this.options = {};
this._onUpdate = function () {
$("#fuzzyToggle").prop("checked", this.options.fuzzy);
};
this.load = function () {
const raw = window.localStorage.getItem("options");
if (raw === null) {
this.options = _defaults;
} else {
const j = JSON.parse(raw);
if (!j || Object.keys(_defaults).some(k => !j.hasOwnProperty(k))) {
this.options = _defaults;
} else {
this.options = j;
}
}
this._onUpdate();
};
this.save = function () {
window.localStorage.setItem("options", JSON.stringify(this.options));
this._onUpdate();
}
}
function updateSettings() {
CONF.options.display = $("#settingDisplay").val();
CONF.options.fuzzy = $("#settingFuzzy").prop("checked");
CONF.options.highlight = $("#settingHighlight").prop("checked");
CONF.options.searchInPath = $("#settingSearchInPath").prop("checked");
CONF.options.treemapTiling = $("#settingTreemapTiling").val();
CONF.options.treemapGroupingDepth = $("#settingTreemapGroupingDepth").val();
CONF.options.treemapColor = $("#settingTreemapColor").val();
CONF.options.treemapSize = $("#settingTreemapSize").val();
CONF.options.treemapType = $("#settingTreemapType").val();
CONF.options.suggestPath = $("#settingSuggestPath").prop("checked");
CONF.options.fragmentSize = $("#settingFragmentSize").val();
CONF.save();
if (typeof searchDebounced !== "undefined") {
searchDebounced();
}
if (typeof updateStats !== "undefined") {
updateStats();
}
$.toast({
heading: "Settings updated",
text: "Settings saved to browser storage",
stack: 3,
bgColor: "#00a4bc",
textColor: "#fff",
position: 'bottom-right',
hideAfter: 3000,
loaderBg: "#08c7e8",
});
}
jQuery["jsonPost"] = function (url, data) {
return jQuery.ajax({
url: url,
type: "post",
data: JSON.stringify(data),
contentType: "application/json"
}).fail(err => {
showEsError();
console.log(err);
});
};
function toggleTheme() {
if (!document.cookie.includes("sist")) {
document.cookie = "sist=dark;SameSite=Strict";
} else {
document.cookie = "sist=; Max-Age=-99999999;";
}
window.location.reload();
}

330
src/static/search.html Normal file
View File

@@ -0,0 +1,330 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<title>sist2 - Search</title>
<meta name='viewport' content='width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no'/>
<link href="css" rel="stylesheet" type="text/css">
</head>
<body>
<nav class="navbar navbar-expand-lg">
<a class="navbar-brand" href="/">sist2</a>
<span class="badge badge-pill version">2.7.3</span>
<span class="tagline">Lightning-fast file system indexer and search tool </span>
<a class="btn ml-auto" href="/stats">Stats</a>
<button class="btn" type="button" data-toggle="modal" data-target="#settings" onclick="loadSettings()">Settings
</button>
<button class="btn" title="Toggle theme" onclick="toggleTheme()">Theme</button>
</nav>
<div class="container">
<div class="card">
<div class="card-body">
<div class="input-group">
<div class="input-group-prepend">
<div class="input-group-text">
<span title="Toggle fuzzy searching" onclick="document.getElementById('fuzzyToggle').click()">Fuzzy&nbsp</span>
<input title="Toggle fuzzy searching" type="checkbox" id="fuzzyToggle"
onclick="toggleFuzzy()" checked>
</div>
</div>
<input id="searchBar" type="search" class="form-control" placeholder="Search">
<div class="input-group-append">
<button class="btn btn-outline-secondary small-btn" type="button" data-toggle="modal"
data-target="#help">?
</button>
<button class="btn btn-outline-secondary large-btn" type="button" data-toggle="modal"
data-target="#help">Help
</button>
</div>
</div>
<div class="row">
<div class="col">
<input title="File size" id="sizeSlider" name="size" width="100%">
</div>
<div class="col">
<div class="input-group" style="margin-bottom: 0.5em; margin-top: 1em">
<div class="input-group-prepend">
<button id="pathBarHelper" class="btn btn-outline-secondary" data-toggle="modal"
data-target="#pathTreeModal">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 576 512" width="20px">
<path d="M288 224h224a32 32 0 0 0 32-32V64a32 32 0 0 0-32-32H400L368 0h-80a32 32 0 0 0-32 32v64H64V8a8 8 0 0 0-8-8H40a8 8 0 0 0-8 8v392a16 16 0 0 0 16 16h208v64a32 32 0 0 0 32 32h224a32 32 0 0 0 32-32V352a32 32 0 0 0-32-32H400l-32-32h-80a32 32 0 0 0-32 32v64H64V128h192v64a32 32 0 0 0 32 32zm0 96h66.74l32 32H512v128H288zm0-288h66.74l32 32H512v128H288z"/>
</svg>
</button>
</div>
<input id="pathBar" type="search" class="form-control" placeholder="Filter path">
</div>
</div>
</div>
<input title="Date filter" id="dateSlider" name="size" width="100%">
<div class="row">
<div class="col">
<label for="indices">Search in indices</label>
<select class="custom-select" id="indices" multiple size="6"></select>
</div>
<div class="col" id="treeTabs">
<ul class="nav nav-tabs" role="tablist">
<li class="nav-item">
<a class="nav-link active" data-toggle="tab" href="#mime" role="tab" aria-controls="home"
aria-selected="true">Mime Types</a>
</li>
<li class="nav-item">
<a class="nav-link" data-toggle="tab" href="#tag" role="tab" aria-controls="profile"
aria-selected="false" title="User-defined tags">Tags</a>
</li>
</ul>
<div class="tab-content" id="myTabContent">
<div class="tab-pane fade show active" id="mime" role="tabpanel" aria-labelledby="home-tab">
<div id="mimeTree" class="tree"></div>
</div>
<div class="tab-pane fade" id="tag" role="tabpanel" aria-labelledby="profile-tab">
<div id="tagTree" class="tree"></div>
</div>
</div>
</div>
</div>
</div>
</div>
<div class="modal" id="modal" tabindex="-1" role="dialog" aria-labelledby="modal-title" aria-hidden="true">
<div class="modal-dialog modal-lg modal-dialog-centered" role="document">
<div class="modal-content">
<div class="modal-header">
<h5 class="modal-title" id="modal-title"></h5>
<button type="button" class="close" data-dismiss="modal" aria-label="Close">
<span aria-hidden="true">&times;</span>
</button>
</div>
<div class="modal-body" id="modal-body"></div>
</div>
</div>
</div>
<div class="modal" id="help" tabindex="-1" role="dialog" aria-labelledby="modal-title" aria-hidden="true">
<div class="modal-dialog modal-lg modal-dialog-centered" role="document">
<div class="modal-content">
<div class="modal-header">
<h5 class="modal-title">Search help</h5>
<button type="button" class="close" data-dismiss="modal" aria-label="Close">
<span aria-hidden="true">&times;</span>
</button>
</div>
<div class="modal-body">
<table class="table">
<tbody>
<tr>
<td><code>+</code></td>
<td>signifies AND operation</td>
</tr>
<tr>
<td><code>|</code></td>
<td>signifies OR operation</td>
</tr>
<tr>
<td><code>-</code></td>
<td>negates a single token</td>
</tr>
<tr>
<td><code>""</code></td>
<td>wraps a number of tokens to signify a phrase for searching</td>
</tr>
<tr>
<td><code>*</code></td>
<td>at the end of a term signifies a prefix query</td>
</tr>
<tr>
<td><code>(</code> and <code>)</code></td>
<td>signify precedence</td>
</tr>
<tr>
<td><code>~N</code></td>
<td>after a word signifies edit distance (fuzziness)</td>
</tr>
<tr>
<td><code>~N</code></td>
<td>after a phrase signifies slop amount</td>
</tr>
</tbody>
</table>
<p>For example: <code>"fried eggs" +(eggplant | potato) -frittata</code> will match the phrase
<i>fried eggs</i> and either <i>eggplant</i> or <i>potato</i>, but will ignore results
containing <i>frittata</i>.</p>
<p>When neither <code>+</code> or <code>|</code> is specified, the default operator is
<code>+</code> (and).</p>
<p>When the <b>Fuzzy</b> option is checked, partial matches are also returned.</p>
<br>
<p>For more information, see <a target="_blank"
href="//www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-simple-query-string-query.html">Elasticsearch
documentation</a></p>
</div>
</div>
</div>
</div>
<div class="modal" id="settings" tabindex="-1" role="dialog" aria-labelledby="modal-title" aria-hidden="true">
<div class="modal-dialog modal-dialog-centered" role="document">
<div class="modal-content">
<div class="modal-header">
<h5 class="modal-title">Settings</h5>
<button type="button" class="close" data-dismiss="modal" aria-label="Close">
<span aria-hidden="true">&times;</span>
</button>
</div>
<div class="modal-body">
<div class="custom-control custom-checkbox">
<input type="checkbox" class="custom-control-input" id="settingHighlight">
<label class="custom-control-label" for="settingHighlight">Enable highlighting</label>
</div>
<div class="custom-control custom-checkbox">
<input type="checkbox" class="custom-control-input" id="settingFuzzy">
<label class="custom-control-label" for="settingFuzzy">Set fuzzy search by default</label>
</div>
<div class="custom-control custom-checkbox">
<input type="checkbox" class="custom-control-input" id="settingSearchInPath">
<label class="custom-control-label" for="settingSearchInPath">Enable matching query against
document path</label>
</div>
<div class="custom-control custom-checkbox">
<input type="checkbox" class="custom-control-input" id="settingSuggestPath">
<label class="custom-control-label" for="settingSuggestPath">Enable auto-complete in path filter
bar</label>
</div>
<br/>
<div class="form-group">
<input type="number" class="form-control" id="settingFragmentSize">
<label for="settingFragmentSize">Highlight context size in characters</label>
</div>
<label for="settingDisplay">Display</label>
<select id="settingDisplay" class="form-control form-control-sm">
<option value="grid">Grid</option>
<option value="list">List</option>
</select>
<hr/>
<h4>Stats</h4>
<div class="form-group">
<label for="settingTreemapType">Treemap type</label>
<select id="settingTreemapType" class="form-control form-control-sm">
<option value="cascaded">Cascaded</option>
<option value="flat">Flat (compact)</option>
</select>
</div>
<div class="form-group">
<label for="settingTreemapTiling">Treemap tiling</label>
<select id="settingTreemapTiling" class="form-control form-control-sm">
<option value="binary">Binary</option>
<option value="squarify">Squarify</option>
<option value="slice">Slice</option>
<option value="dice">Dice</option>
<option value="sliceDice">Slide & Dice</option>
</select>
</div>
<div class="form-group">
<label for="settingTreemapGroupingDepth">Treemap color grouping depth (flat)</label>
<input type="number" class="form-control" id="settingTreemapGroupingDepth" min="1" max="10">
</div>
<div class="form-group">
<label for="settingTreemapColor">Treemap color (cascaded)</label>
<select id="settingTreemapColor" class="form-control form-control-sm">
<option value="PuBuGn">Purple-Blue-Green</option>
<option value="PuRd">Purple-Red</option>
<option value="PuBu">Purple-Blue</option>
<option value="YlOrBr">Yellow-Orange-Brown</option>
<option value="YlOrRd">Yellow-Orange-Red</option>
<option value="YlGn">Yellow-Green</option>
<option value="YlGnBu">Yellow-Green-Blue</option>
<option value="Plasma">Plasma</option>
<option value="Magma">Magma</option>
<option value="Inferno">Inferno</option>
<option value="Viridis">Viridis</option>
<option value="Turbo">Turbo</option>
</select>
</div>
<div class="form-group">
<label for="settingTreemapSize">Treemap size</label>
<select id="settingTreemapSize" class="form-control form-control-sm">
<option value="small">Small</option>
<option value="medium">Medium</option>
<option value="large">Large</option>
<option value="x-large">X-Large</option>
<option value="xx-large">XX-Large</option>
</select>
</div>
<br>
<button class="btn btn-primary ml-auto" onclick="updateSettings()">Update settings</button>
</div>
</div>
</div>
</div>
<div class="modal" id="pathTreeModal" tabindex="-1" role="dialog" aria-labelledby="modal-title" aria-hidden="true">
<div class="modal-dialog modal-lg" role="document">
<div class="modal-content">
<div class="modal-header">
<h5 class="modal-title">Select path</h5>
<button type="button" class="close" data-dismiss="modal" aria-label="Close">
<span aria-hidden="true">&times;</span>
</button>
</div>
<div class="modal-body">
<div id="pathTree" class="tree"></div>
</div>
</div>
</div>
</div>
<div class="modal" id="tagModal" tabindex="-1" role="dialog" aria-labelledby="modal-title" aria-hidden="true">
<div class="modal-dialog modal-dialog-centered" role="document">
<div class="modal-content">
<div class="modal-header">
<h5 class="modal-title">Add tag</h5>
<button type="button" class="close" data-dismiss="modal" aria-label="Close">
<span aria-hidden="true">&times;</span>
</button>
</div>
<div class="modal-body">
<div class="form-group">
<div class="row">
<div class="col col-8">
<input type="text" id="tagBar" class="form-control">
</div>
<div class="col col-4">
<input type="text" id="tag-color" value="" class="form-control"/>
</div>
</div>
</div>
</div>
</div>
</div>
</div>
<div id="searchResults"></div>
</div>
<script src="jslib" type="text/javascript"></script>
<script src="jssearch" type="text/javascript"></script>
</body>
</html>

800
src/static/stats.html Normal file
View File

@@ -0,0 +1,800 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<title>sist2 - Stats</title>
<meta name='viewport' content='width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no'/>
<link href="css" rel="stylesheet" type="text/css">
</head>
<body>
<nav class="navbar navbar-expand-lg">
<a class="navbar-brand" href="/">sist2</a>
<span class="badge badge-pill version">2.7.3</span>
<span class="tagline">Lightning-fast file system indexer and search tool </span>
<a style="margin-left: auto" class="btn" href="/">Back</a>
<button class="btn" type="button" data-toggle="modal" data-target="#settings"
onclick="loadSettings()">Settings
</button>
<button class="btn" title="Toggle theme" onclick="toggleTheme()">Theme</button>
</nav>
<div class="container pb-3">
<div class="card">
<div class="card-body">
<label for="indices">Index</label>
<select id="indices" onchange="updateStats()"></select>
</div>
</div>
<div id="treemap-card" class="stats-card">
<button class="btn stats-btn" onclick="fullScreen('treemap-card')">Enlarge</button>
<button class="btn stats-btn" onclick="exportTreemap()">Export</button>
<svg id="treemap"></svg>
</div>
<div id="graphs-card" class="stats-card">
<button class="btn stats-btn" onclick="fullScreen('graphs-card')">Enlarge</button>
<div class="graph">
<svg id="agg_mime_size"></svg>
</div>
<div class="graph">
<svg id="agg_mime_count"></svg>
</div>
<div class="graph">
<svg id="date_histogram"></svg>
</div>
<div class="graph">
<svg id="size_histogram"></svg>
</div>
</div>
</div>
<div class="modal" id="settings" tabindex="-1" role="dialog" aria-labelledby="modal-title" aria-hidden="true">
<div class="modal-dialog modal-dialog-centered" role="document">
<div class="modal-content">
<div class="modal-header">
<h5 class="modal-title">Settings</h5>
<button type="button" class="close" data-dismiss="modal" aria-label="Close">
<span aria-hidden="true">&times;</span>
</button>
</div>
<div class="modal-body">
<div class="custom-control custom-checkbox">
<input type="checkbox" class="custom-control-input" id="settingHighlight">
<label class="custom-control-label" for="settingHighlight">Enable highlighting</label>
</div>
<div class="custom-control custom-checkbox">
<input type="checkbox" class="custom-control-input" id="settingFuzzy">
<label class="custom-control-label" for="settingFuzzy">Set fuzzy search by default</label>
</div>
<div class="custom-control custom-checkbox">
<input type="checkbox" class="custom-control-input" id="settingSearchInPath">
<label class="custom-control-label" for="settingSearchInPath">Enable matching query against document
path</label>
</div>
<div class="custom-control custom-checkbox">
<input type="checkbox" class="custom-control-input" id="settingSuggestPath">
<label class="custom-control-label" for="settingSuggestPath">Enable auto-complete in path filter bar</label>
</div>
<br/>
<div class="form-group">
<input type="number" class="form-control" id="settingFragmentSize">
<label for="settingFragmentSize">Highlight context size in characters</label>
</div>
<label for="settingDisplay">Display</label>
<select id="settingDisplay" class="form-control form-control-sm">
<option value="grid">Grid</option>
<option value="list">List</option>
</select>
<hr/>
<h4>Stats</h4>
<div class="form-group">
<label for="settingTreemapType">Treemap type</label>
<select id="settingTreemapType" class="form-control form-control-sm">
<option value="cascaded">Cascaded</option>
<option value="flat">Flat (compact)</option>
</select>
</div>
<div class="form-group">
<label for="settingTreemapTiling">Treemap tiling</label>
<select id="settingTreemapTiling" class="form-control form-control-sm">
<option value="binary">Binary</option>
<option value="squarify">Squarify</option>
<option value="slice">Slice</option>
<option value="dice">Dice</option>
<option value="sliceDice">Slide & Dice</option>
</select>
</div>
<div class="form-group">
<label for="settingTreemapGroupingDepth">Treemap color grouping depth (flat)</label>
<input type="number" class="form-control" id="settingTreemapGroupingDepth" min="1" max="10">
</div>
<div class="form-group">
<label for="settingTreemapColor">Treemap color (cascaded)</label>
<select id="settingTreemapColor" class="form-control form-control-sm">
<option value="PuBuGn">Purple-Blue-Green</option>
<option value="PuRd">Purple-Red</option>
<option value="PuBu">Purple-Blue</option>
<option value="YlOrBr">Yellow-Orange-Brown</option>
<option value="YlOrRd">Yellow-Orange-Red</option>
<option value="YlGn">Yellow-Green</option>
<option value="YlGnBu">Yellow-Green-Blue</option>
<option value="Plasma">Plasma</option>
<option value="Magma">Magma</option>
<option value="Inferno">Inferno</option>
<option value="Viridis">Viridis</option>
<option value="Turbo">Turbo</option>
</select>
</div>
<div class="form-group">
<label for="settingTreemapSize">Treemap size</label>
<select id="settingTreemapSize" class="form-control form-control-sm">
<option value="small">Small</option>
<option value="medium">Medium</option>
<option value="large">Large</option>
<option value="x-large">X-Large</option>
<option value="xx-large">XX-Large</option>
</select>
</div>
<br>
<button class="btn btn-primary float-right" onclick="updateSettings()">Update settings</button>
</div>
</div>
</div>
</div>
<script src="jslib" type="text/javascript"></script>
<script>
let width;
let height;
let indexMap = {};
const barHeight = 20;
const ordinalColor = d3.scaleOrdinal(d3.schemeCategory10);
const formatSI = d3.format("~s");
const TILING_MODES = {
"squarify": d3.treemapSquarify,
"binary": d3.treemapBinary,
"sliceDice": d3.treemapSliceDice,
"slice": d3.treemapSlice,
"dice": d3.treemapDice,
};
const COLORS = {
"PuBuGn": d3.interpolatePuBuGn,
"PuRd": d3.interpolatePuRd,
"PuBu": d3.interpolatePuBu,
"YlOrBr": d3.interpolateYlOrBr,
"YlOrRd": d3.interpolateYlOrRd,
"YlGn": d3.interpolateYlGn,
"YlGnBu": d3.interpolateYlGnBu,
"Plasma": d3.interpolatePlasma,
"Magma": d3.interpolateMagma,
"Inferno": d3.interpolateInferno,
"Viridis": d3.interpolateViridis,
"Turbo": d3.interpolateTurbo,
};
const SIZES = {
"small": [800, 600],
"medium": [1300, 750],
"large": [1900, 900],
"x-large": [2800, 1700],
"xx-large": [3600, 2000],
};
const fillOpacity = document.cookie.includes("sist") ? 0.9 : 0.6;
const uids = {};
function uid(name) {
let id = uids[name] || 0;
uids[name] = id + 1;
return name + id;
}
const burrow = function (table, addSelfDir) {
const root = {};
table.forEach(row => {
let layer = root;
row.taxonomy.forEach(key => {
layer[key] = key in layer ? layer[key] : {};
layer = layer[key];
});
if (Object.keys(layer).length === 0) {
layer["$size$"] = row.size;
} else if (addSelfDir) {
layer["."] = {
"$size$": row.size,
};
}
});
const descend = function (obj, depth) {
return Object.keys(obj).filter(k => k !== "$size$").map(k => {
const child = {
name: k,
depth: depth,
value: 0,
children: descend(obj[k], depth + 1)
};
if ("$size$" in obj[k]) {
child.value = obj[k]["$size$"];
}
return child;
});
};
return {
name: `[${indexMap[$("#indices").val()]}]`,
children: descend(root, 1),
value: 0,
depth: 0,
}
};
function flatTreemap(data, svg) {
const root = d3.treemap()
.tile(TILING_MODES[CONF.options.treemapTiling])
.size([width, height])
.padding(1)
.round(true)(
d3.hierarchy(data)
.sum(d => d.value)
.sort((a, b) => b.value - a.value)
);
const leaf = svg.selectAll("g")
.data(root.leaves())
.join("g")
.attr("transform", d => `translate(${d.x0},${d.y0})`);
leaf.append("title")
.text(d => `${d.ancestors().reverse().map(d => d.data.name).join("/")}\n${humanFileSize(d.value)}`);
leaf.append("rect")
.attr("id", d => (d.leafUid = uid("leaf")))
.attr("fill", d => {
while (d.depth > CONF.options.treemapGroupingDepth) d = d.parent;
return ordinalColor(d.data.name);
})
.attr("fill-opacity", fillOpacity)
.attr("width", d => d.x1 - d.x0)
.attr("height", d => d.y1 - d.y0);
leaf.append("clipPath")
.attr("id", d => (d.clipUid = uid("clip")))
.append("use")
.attr("href", d => `#${d.leafUid}`);
leaf.append("text")
.attr("clip-path", d => `url(#${d.clipUid})`)
.selectAll("tspan")
.data(d => {
if (d.data.name === ".") {
d = d.parent;
}
return [d.data.name, humanFileSize(d.value)]
})
.join("tspan")
.attr("x", 2)
.attr("y", (d, i, nodes) => `${i === 0 ? 1.1 : 2.3}em`)
.text(d => d);
}
function cascade(root, offset) {
const x = new Map;
const y = new Map;
return root.eachAfter(d => {
if (d.children && d.children.length !== 0) {
x.set(d, 1 + d3.max(d.children, c => c.x1 === d.x1 - offset ? x.get(c) : NaN));
y.set(d, 1 + d3.max(d.children, c => c.y1 === d.y1 - offset ? y.get(c) : NaN));
} else {
x.set(d, 0);
y.set(d, 0);
}
}).eachBefore(d => {
d.x1 -= 2 * offset * x.get(d);
d.y1 -= 2 * offset * y.get(d);
});
}
function cascadeTreemap(data, svg) {
const root = cascade(
d3.treemap()
.size([width, height])
.tile(TILING_MODES[CONF.options.treemapTiling])
.paddingOuter(3)
.paddingTop(16)
.paddingInner(1)
.round(true)(
d3.hierarchy(data)
.sum(d => d.value)
.sort((a, b) => b.value - a.value)
),
3 // treemap.paddingOuter
);
const maxDepth = Math.max(...root.descendants().map(d => d.depth));
const color = d3.scaleSequential([maxDepth, -1], COLORS[CONF.options.treemapColor]);
svg.append("filter")
.attr("id", "shadow")
.append("feDropShadow")
.attr("flood-opacity", 0.3)
.attr("dx", 0)
.attr("stdDeviation", 3);
const node = svg.selectAll("g")
.data(
d3.nest()
.key(d => d.depth).sortKeys(d3.ascending)
.entries(root.descendants())
)
.join("g")
.attr("filter", "url(#shadow)")
.selectAll("g")
.data(d => d.values)
.join("g")
.attr("transform", d => `translate(${d.x0},${d.y0})`);
node.append("title")
.text(d => `${d.ancestors().reverse().splice(1).map(d => d.data.name).join("/")}\n${humanFileSize(d.value)}`);
node.append("rect")
.attr("id", d => (d.nodeUid = uid("node")))
.attr("fill", d => color(d.depth))
.attr("width", d => d.x1 - d.x0)
.attr("height", d => d.y1 - d.y0);
node.append("clipPath")
.attr("id", d => (d.clipUid = uid("clip")))
.append("use")
.attr("href", d => `#${d.nodeUid}`);
node.append("text")
.attr("fill", d => d3.hsl(color(d.depth)).l > .5 ? "#333" : "#eee")
.attr("clip-path", d => `url(#${d.clipUid})`)
.selectAll("tspan")
.data(d => [d.data.name, humanFileSize(d.value)])
.join("tspan")
.text(d => d);
node.filter(d => d.children).selectAll("tspan")
.attr("dx", 3)
.attr("y", 13);
node.filter(d => !d.children).selectAll("tspan")
.attr("x", 3)
.attr("y", (d, i, nodes) => `${i === 0 ? 1.1 : 2.3}em`);
}
function mimeBarSize(data, svg) {
const margin = {
top: 50,
right: 0,
bottom: 10,
left: Math.max(
d3.max(data.sort((a, b) => b.count - a.count).slice(0, 15), d => d.mime.length) * 6,
d3.max(data.sort((a, b) => b.size - a.size).slice(0, 15), d => d.mime.length) * 6,
)
};
data.forEach(d => {
d.name = d.mime;
d.value = Number(d.size);
});
data = data.sort((a, b) => b.value - a.value).slice(0, 15);
const width = 550;
const height = Math.ceil((data.length + 0.1) * barHeight) + margin.top + margin.bottom;
svg.selectAll("*").remove();
svg.attr("viewBox", [0, 0, width, height]);
const y = d3.scaleBand()
.domain(d3.range(data.length))
.rangeRound([margin.top, height - margin.bottom]);
const x = d3.scaleLinear()
.domain([0, d3.max(data, d => d.value)])
.range([margin.left, width - margin.right]);
svg.append("g")
.attr("fill-opacity", fillOpacity)
.selectAll("rect")
.data(data)
.join("rect")
.attr("fill", d => ordinalColor(d.name))
.attr("x", x(0))
.attr("y", (d, i) => y(i))
.attr("width", d => x(d.value) - x(0))
.attr("height", y.bandwidth())
.append("title")
.text(d => formatSI(d.value));
svg.append("g")
.attr("transform", `translate(0,${margin.top})`)
.call(d3.axisTop(x).ticks(width / 80, data.format).tickFormat(formatSI))
.call(g => g.select(".domain").remove());
svg.append("g")
.attr("transform", `translate(${margin.left},0)`)
.call(d3.axisLeft(y).tickFormat(i => data[i].name).tickSizeOuter(0));
svg.append("text")
.attr("x", (width / 2))
.attr("y", (margin.top / 2))
.attr("text-anchor", "middle")
.style("font-size", "16px")
.text("Size distribution by MIME type");
}
function mimeBarCount(data, svg) {
const margin = {
top: 50,
right: 0,
bottom: 10,
left: Math.max(
d3.max(data.sort((a, b) => b.count - a.count).slice(0, 15), d => d.mime.length) * 6,
d3.max(data.sort((a, b) => b.size - a.size).slice(0, 15), d => d.mime.length) * 6,
)
};
data.forEach(d => {
d.name = d.mime;
d.value = Number(d.count);
});
data = data.sort((a, b) => b.value - a.value).slice(0, 15);
const width = 550;
const height = Math.ceil((data.length + 0.1) * barHeight) + margin.top + margin.bottom;
svg.selectAll("*").remove();
svg.attr("viewBox", [0, 0, width, height]);
const y = d3.scaleBand()
.domain(d3.range(data.length))
.rangeRound([margin.top, height - margin.bottom]);
const x = d3.scaleLinear()
.domain([0, d3.max(data, d => d.value)])
.range([margin.left, width - margin.right]);
svg.append("g")
.attr("fill-opacity", fillOpacity)
.selectAll("rect")
.data(data)
.join("rect")
.attr("fill", d => ordinalColor(d.name))
.attr("x", x(0))
.attr("y", (d, i) => y(i))
.attr("width", d => x(d.value) - x(0))
.attr("height", y.bandwidth())
.append("title")
.text(d => d3.format(",")(d.value));
svg.append("g")
.attr("transform", `translate(0,${margin.top})`)
.call(d3.axisTop(x).ticks(width / 80, data.format).tickFormat(formatSI))
.call(g => g.select(".domain").remove());
svg.append("g")
.attr("transform", `translate(${margin.left},0)`)
.call(d3.axisLeft(y).tickFormat(i => data[i].name).tickSizeOuter(0));
svg.append("text")
.attr("x", (width / 2))
.attr("y", (margin.top / 2))
.attr("text-anchor", "middle")
.style("font-size", "16px")
.text("File count distribution by MIME type");
}
function dateHistogram(data, svg) {
let bins = data.map(d => {
return {
length: Number(d.count),
x0: Number(d.bucket),
x1: Number(d.bucket) + 2629800
}
});
bins.sort((a, b) => a.length - b.length);
const margin = {
top: 50,
right: 20,
bottom: 70,
left: 40
};
const thresh = d3.quantile(bins, 0.9, d => d.length);
bins = bins.filter(d => d.length > thresh);
const width = 550;
const height = 450;
svg.selectAll("*").remove();
svg.attr("viewBox", [0, 0, width, height]);
const y = d3.scaleLinear()
.domain([0, d3.max(bins, d => d.length)]).nice()
.range([height - margin.bottom, margin.top]);
const x = d3.scaleLinear()
.domain(d3.extent(bins, d => d.x0)).nice()
.range([margin.left, width - margin.right]);
svg.append("g")
.attr("fill", "steelblue")
.selectAll("rect")
.data(bins)
.join("rect")
.attr("x", d => x(d.x0) + 1)
.attr("width", d => Math.max(1, x(d.x1) - x(d.x0) - 1))
.attr("y", d => y(d.length))
.attr("height", d => y(0) - y(d.length))
.call(g => g
.append("title")
.text(d => d.length)
);
svg.append("g")
.attr("transform", `translate(0,${height - margin.bottom})`)
.call(
d3.axisBottom(x)
.ticks(width / 30)
.tickSizeOuter(0)
.tickFormat(t => d3.timeFormat("%Y-%m-%d")(d3.utcParse("%s")(t)))
)
.call(g => g
.selectAll("text")
.style("text-anchor", "end")
.attr("dx", "-.8em")
.attr("dy", ".15em")
.attr("transform", "rotate(-65)")
)
.call(g => g.append("text")
.attr("x", width - margin.right)
.attr("y", -4)
.attr("fill", "currentColor")
.attr("font-weight", "bold")
.attr("text-anchor", "end")
.text("mtime")
);
svg.append("g")
.attr("transform", `translate(${margin.left},0)`)
.call(
d3.axisLeft(y)
.ticks(height / 40)
.tickFormat(t => formatSI(t))
)
.call(g => g.select(".domain").remove())
.call(g => g.select(".tick:last-of-type text").clone()
.attr("x", 4)
.attr("text-anchor", "start")
.attr("font-weight", "bold")
.text("File count"));
svg.append("text")
.attr("x", (width / 2))
.attr("y", (margin.top / 2))
.attr("text-anchor", "middle")
.style("font-size", "16px")
.text("File modification time distribution");
}
function sizeHistogram(data, svg) {
let bins = data.map(d => {
return {
length: Number(d.count),
x0: Number(d.bucket),
x1: Number(d.bucket) + (5 * 1024 * 1024)
}
});
bins = bins.sort((a, b) => b.length - a.length).slice(0, 25);
const margin = {
top: 50,
right: 20,
bottom: 70,
left: 40
};
const width = 550;
const height = 450;
svg.selectAll("*").remove();
svg.attr("viewBox", [0, 0, width, height]);
const y = d3.scaleLinear()
.domain([0, d3.max(bins, d => d.length)])
.range([height - margin.bottom, margin.top]);
const x = d3.scaleLinear()
.domain(d3.extent(bins, d => d.x0)).nice()
.range([margin.left, width - margin.right]);
svg.append("g")
.attr("fill", "steelblue")
.selectAll("rect")
.data(bins)
.join("rect")
.attr("x", d => x(d.x0) + 1)
.attr("width", d => Math.max(1, x(d.x1) - x(d.x0) - 1))
.attr("y", d => y(d.length))
.attr("height", d => y(0) - y(d.length))
.call(g => g
.append("title")
.text(d => d.length)
);
svg.append("g")
.attr("transform", `translate(0,${height - margin.bottom})`)
.call(
d3.axisBottom(x)
.ticks(width / 30)
.tickSizeOuter(0)
.tickFormat(formatSI)
)
.call(g => g
.selectAll("text")
.style("text-anchor", "end")
.attr("dx", "-.8em")
.attr("dy", ".15em")
.attr("transform", "rotate(-65)")
)
.call(g => g.append("text")
.attr("x", width - margin.right)
.attr("y", -4)
.attr("fill", "currentColor")
.attr("font-weight", "bold")
.attr("text-anchor", "end")
.text("size (bytes)")
);
svg.append("g")
.attr("transform", `translate(${margin.left},0)`)
.call(
d3.axisLeft(y)
.ticks(height / 40)
.tickFormat(t => formatSI(t))
)
.call(g => g.select(".domain").remove())
.call(g => g.select(".tick:last-of-type text").clone()
.attr("x", 4)
.attr("text-anchor", "start")
.attr("font-weight", "bold")
.text("File count"));
svg.append("text")
.attr("x", (width / 2))
.attr("y", (margin.top / 2))
.attr("text-anchor", "middle")
.style("font-size", "16px")
.text("File size distribution");
}
function updateStats() {
width = SIZES[CONF.options.treemapSize][0];
height = SIZES[CONF.options.treemapSize][1];
const treemapSvg = d3.select("#treemap");
const mimeSvgSize = d3.select("#agg_mime_size");
const mimeSvgCount = d3.select("#agg_mime_count");
const dateHistogramSvg = d3.select("#date_histogram");
const sizeHistogramSvg = d3.select("#size_histogram");
const indexId = $("#indices").val();
d3.csv(`/s/${indexId}/1`).then(tabularData => {
tabularData.forEach(row => {
row.taxonomy = row.path.split("/");
row.size = Number(row.size);
});
if (CONF.options.treemapType === "cascaded") {
const data = burrow(tabularData, false);
cascadeTreemap(data, treemapSvg);
} else {
const data = burrow(tabularData.sort((a, b) => b.taxonomy.length - a.taxonomy.length), true);
flatTreemap(data, treemapSvg);
}
});
d3.csv(`/s/${indexId}/2`).then(tabularData => {
mimeBarSize(tabularData.slice(), mimeSvgSize);
mimeBarCount(tabularData.slice(), mimeSvgCount);
});
d3.csv(`/s/${indexId}/3`).then(tabularData => {
sizeHistogram(tabularData, sizeHistogramSvg);
});
d3.csv(`/s/${indexId}/4`).then(tabularData => {
dateHistogram(tabularData, dateHistogramSvg);
});
treemapSvg.selectAll("*").remove();
treemapSvg.attr("viewBox", [0, 0, width, height])
.attr("xmlns", "http://www.w3.org/2000/svg")
.attr("xmlns:xlink", "http://www.w3.org/1999/xlink")
.attr("version", "1.1")
.style("overflow", "visible")
.style("font", "10px sans-serif");
}
window.onload = function () {
CONF.load();
$.jsonPost("i").then(resp => {
const select = $("#indices");
const urlIndices = (new URLSearchParams(location.search)).get("i");
resp["indices"].forEach(idx => {
indexMap[idx.id] = idx.name;
select.append($("<option>")
.attr("value", idx.id)
.append(idx.name));
if (urlIndices && urlIndices.split(",").indexOf(idx.name) !== -1) {
select.select(idx.name);
}
});
updateStats();
});
};
function fullScreen(selector) {
const card = document.getElementById(selector);
card.classList.toggle("full-screen");
}
function exportTreemap() {
domtoimage.toBlob(document.getElementById("treemap"), {width: width, height: height})
.then(function (blob) {
let a = document.createElement("a");
let url = URL.createObjectURL(blob);
a.href = url;
a.download = `${indexMap[$("#indices").val()]}_treemap.png`;
document.body.appendChild(a);
a.click();
setTimeout(function() {
document.body.removeChild(a);
window.URL.revokeObjectURL(url);
}, 0);
});
}
</script>
</body>
</html>

353
src/stats.c Normal file
View File

@@ -0,0 +1,353 @@
#include "sist.h"
#include "io/serialize.h"
#include "ctx.h"
#include <glib.h>
static GHashTable *FlatTree;
static GHashTable *BufferTable;
static GHashTable *AggMime;
static GHashTable *AggSize;
static GHashTable *AggDate;
#define SIZE_BUCKET (long)(5 * 1024 * 1024)
#define DATE_BUCKET (long)(2629800)
static long TotalSize = 0;
static long DocumentCount = 0;
typedef struct {
long size;
long count;
} agg_t;
void fill_tables(cJSON *document, UNUSED(const char uuid_str[UUID_STR_LEN])) {
if (cJSON_GetObjectItem(document, "parent") != NULL) {
return;
}
const char *json_path = cJSON_GetObjectItem(document, "path")->valuestring;
char *path = malloc(strlen(json_path) + 1);
strcpy(path, json_path);
const char *json_mime = cJSON_GetObjectItem(document, "mime")->valuestring;
char *mime;
if (json_mime == NULL) {
mime = NULL;
} else {
mime = malloc(strlen(json_mime) + 1);
strcpy(mime, json_mime);
}
long size = (long) cJSON_GetObjectItem(document, "size")->valuedouble;
int mtime = cJSON_GetObjectItem(document, "mtime")->valueint;
// treemap
void *existing_path = g_hash_table_lookup(FlatTree, path);
if (existing_path == NULL) {
g_hash_table_insert(FlatTree, path, (gpointer) size);
} else {
g_hash_table_replace(FlatTree, path, (gpointer) ((long) existing_path + size));
}
// mime agg
if (mime != NULL) {
agg_t *orig_agg = g_hash_table_lookup(AggMime, mime);
if (orig_agg == NULL) {
agg_t *agg = malloc(sizeof(agg_t));
agg->size = size;
agg->count = 1;
g_hash_table_insert(AggMime, mime, agg);
} else {
orig_agg->size += size;
orig_agg->count += 1;
free(mime);
}
}
// size agg
long size_bucket = size - (size % SIZE_BUCKET);
agg_t *orig_agg = g_hash_table_lookup(AggSize, (gpointer) size_bucket);
if (orig_agg == NULL) {
agg_t *agg = malloc(sizeof(agg_t));
agg->size = size;
agg->count = 1;
g_hash_table_insert(AggSize, (gpointer) size_bucket, agg);
} else {
orig_agg->count += 1;
orig_agg->size += size;
}
// date agg
long date_bucket = mtime - (mtime % DATE_BUCKET);
orig_agg = g_hash_table_lookup(AggDate, (gpointer) date_bucket);
if (orig_agg == NULL) {
agg_t *agg = malloc(sizeof(agg_t));
agg->size = size;
agg->count = 1;
g_hash_table_insert(AggDate, (gpointer) date_bucket, agg);
} else {
orig_agg->count += 1;
orig_agg->size += size;
}
TotalSize += size;
DocumentCount += 1;
}
void read_index_into_tables(index_t *index) {
DIR *dir = opendir(index->path);
struct dirent *de;
while ((de = readdir(dir)) != NULL) {
if (strncmp(de->d_name, "_index_", sizeof("_index_") - 1) == 0) {
char file_path[PATH_MAX];
snprintf(file_path, PATH_MAX, "%s/%s", index->path, de->d_name);
read_index(file_path, index->desc.uuid, index->desc.type, fill_tables);
}
}
closedir(dir);
}
static size_t rfind(const char *str, int c) {
for (int i = (int)strlen(str); i >= 0; i--) {
if (str[i] == c) {
return i;
}
}
return -1;
}
int merge_up(double thresh) {
long min_size = (long) (thresh * (double) TotalSize);
int count = 0;
GHashTableIter iter;
g_hash_table_iter_init(&iter, FlatTree);
void *key;
void *value;
while (g_hash_table_iter_next(&iter, &key, &value)) {
long size = (long) value;
if (size < min_size) {
int stop = rfind(key, '/');
if (stop == -1) {
stop = 0;
}
char *parent = malloc(stop + 1);
strncpy(parent, key, stop);
*(parent + stop) = '\0';
void *existing_parent = g_hash_table_lookup(FlatTree, parent);
if (existing_parent == NULL) {
void *existing_parent2_key;
void *existing_parent2_val;
int found = g_hash_table_lookup_extended(BufferTable, parent, &existing_parent2_key,
&existing_parent2_val);
if (!found) {
g_hash_table_insert(BufferTable, parent, value);
} else {
g_hash_table_replace(BufferTable, parent, (gpointer) ((long) existing_parent2_val + size));
free(existing_parent2_key);
}
} else {
g_hash_table_replace(FlatTree, parent, (gpointer) ((long) existing_parent + size));
}
g_hash_table_iter_remove(&iter);
count += 1;
}
}
g_hash_table_iter_init(&iter, BufferTable);
while (g_hash_table_iter_next(&iter, &key, &value)) {
g_hash_table_insert(FlatTree, key, value);
g_hash_table_iter_remove(&iter);
}
int size = g_hash_table_size(FlatTree);
LOG_DEBUGF("stats.c", "Merge up iteration (%d merged, %d in tree)", count, size)
return count;
}
/**
* Assumes out is at at least PATH_MAX *4
*/
void csv_escape(char *dst, const char *str) {
const char *ptr = str;
char *out = dst;
if (rfind(str, ',') == -1 && rfind(str, '"') == -1) {
strcpy(dst, str);
return;
}
*out++ = '"';
char c;
while ((c = *ptr++) != 0) {
if (c == '"') {
*out++ = '"';
*out++ = '"';
} else {
*out++ = c;
}
}
*out++ = '"';
*out = '\0';
}
int open_or_exit(const char *path) {
int fd = open(path, O_CREAT | O_WRONLY, S_IRUSR | S_IWUSR);
if (fd < 0) {
LOG_FATALF("stats.c", "Error while creating file: %s [%d]\n", strerror(errno), errno)
}
return fd;
}
#define TREEMAP_CSV_HEADER "path,size"
#define MIME_AGG_CSV_HEADER "mime,size,count"
#define SIZE_AGG_CSV_HEADER "bucket,size,count"
#define DATE_AGG_CSV_HEADER "bucket,size,count"
void write_treemap_csv(double thresh, const char *out_path) {
void *key;
void *value;
long min_size = (long) (thresh * (double) TotalSize);
int fd = open_or_exit(out_path);
int ret = write(fd, TREEMAP_CSV_HEADER, sizeof(TREEMAP_CSV_HEADER) - 1);
if (ret == -1) {
LOG_FATALF("stats.c", "Write error: %s", strerror(errno))
}
GHashTableIter iter;
g_hash_table_iter_init(&iter, FlatTree);
while (g_hash_table_iter_next(&iter, &key, &value)) {
long size = (long) value;
if (size >= min_size) {
char path_buf[PATH_MAX * 4];
char buf[PATH_MAX * 4 + 16];
csv_escape(path_buf, key);
size_t written = sprintf(buf, "\n%s,%ld", path_buf, (long) value);
ret = write(fd, buf, written);
if (ret == -1) {
LOG_FATALF("stats.c", "Write error: %s", strerror(errno))
}
}
}
close(fd);
}
void write_agg_csv_str(const char *out_path, const char *header, GHashTable *table) {
void *key;
void *value;
char buf[4096];
int fd = open_or_exit(out_path);
int ret = write(fd, header, strlen(header));
if (ret == -1) {
LOG_FATALF("stats.c", "Write error: %s", strerror(errno))
}
GHashTableIter iter;
g_hash_table_iter_init(&iter, table);
while (g_hash_table_iter_next(&iter, &key, &value)) {
agg_t *agg = value;
size_t written = sprintf(buf, "\n%s,%ld,%ld", (const char*)key, agg->size, agg->count);
ret = write(fd, buf, written);
if (ret == -1) {
LOG_FATALF("stats.c", "Write error: %s", strerror(errno))
}
}
close(fd);
}
void write_agg_csv_long(const char *out_path, const char *header, GHashTable *table) {
void *key;
void *value;
char buf[4096];
int fd = open_or_exit(out_path);
int ret = write(fd, header, strlen(header));
if (ret == -1) {
LOG_FATALF("stats.c", "Write error: %s", strerror(errno))
}
GHashTableIter iter;
g_hash_table_iter_init(&iter, table);
while (g_hash_table_iter_next(&iter, &key, &value)) {
agg_t *agg = value;
size_t written = sprintf(buf, "\n%ld,%ld,%ld", (long)key, agg->size, agg->count);
ret = write(fd, buf, written);
if (ret == -1) {
LOG_FATALF("stats.c", "Write error: %s", strerror(errno))
}
}
close(fd);
}
int generate_stats(index_t *index, const double threshold, const char *out_prefix) {
FlatTree = g_hash_table_new_full(g_str_hash, g_str_equal, free, NULL);
BufferTable = g_hash_table_new(g_str_hash, g_str_equal);
AggMime = g_hash_table_new_full(g_str_hash, g_str_equal, free, free);
AggSize = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, free);
AggDate = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, free);
LOG_INFO("stats.c", "Generating stats...")
read_index_into_tables(index);
LOG_DEBUG("stats.c", "Read index into tables")
LOG_DEBUGF("stats.c", "Total size is %ld", TotalSize)
LOG_DEBUGF("stats.c", "Document count is %ld", DocumentCount)
LOG_DEBUGF("stats.c", "Merging small directories upwards with a threshold of %f%%", threshold * 100)
while (merge_up(threshold) > 100) {}
char tmp[PATH_MAX];
strncpy(tmp, out_prefix, sizeof(tmp));
strcat(tmp, "treemap.csv");
write_treemap_csv(threshold, tmp);
strncpy(tmp, out_prefix, sizeof(tmp));
strcat(tmp, "mime_agg.csv");
write_agg_csv_str(tmp, MIME_AGG_CSV_HEADER, AggMime);
strncpy(tmp, out_prefix, sizeof(tmp));
strcat(tmp, "size_agg.csv");
write_agg_csv_long(tmp, SIZE_AGG_CSV_HEADER, AggSize);
strncpy(tmp, out_prefix, sizeof(tmp));
strcat(tmp, "date_agg.csv");
write_agg_csv_long(tmp, DATE_AGG_CSV_HEADER, AggDate);
g_hash_table_remove_all(FlatTree);
g_hash_table_destroy(FlatTree);
g_hash_table_destroy(BufferTable);
g_hash_table_remove_all(AggMime);
g_hash_table_destroy(AggMime);
g_hash_table_remove_all(AggSize);
g_hash_table_destroy(AggSize);
g_hash_table_remove_all(AggDate);
g_hash_table_destroy(AggDate);
return 0;
}

6
src/stats.h Normal file
View File

@@ -0,0 +1,6 @@
#ifndef SIST2_STATS_H
#define SIST2_STATS_H
int generate_stats(index_t *index, double threshold, const char* out_prefix);
#endif

View File

@@ -1,5 +1,9 @@
#include "tpool.h"
#include "ctx.h"
#include "sist.h"
#include <pthread.h>
#define MAX_QUEUE_SIZE 10000
typedef void (*thread_func_t)(void *arg);
@@ -24,7 +28,9 @@ typedef struct tpool {
int work_cnt;
int done_cnt;
int free_arg;
int stop;
void (*cleanup_func)();
} tpool_t;
@@ -76,6 +82,10 @@ int tpool_add_work(tpool_t *pool, thread_func_t func, void *arg) {
return 0;
}
while ((pool->work_cnt - pool->done_cnt) >= MAX_QUEUE_SIZE) {
usleep(100000);
}
pthread_mutex_lock(&(pool->work_mutex));
if (pool->work_head == NULL) {
pool->work_head = work;
@@ -100,7 +110,7 @@ static void *tpool_worker(void *arg) {
tpool_t *pool = arg;
while (1) {
pthread_mutex_lock(&(pool->work_mutex));
pthread_mutex_lock(&pool->work_mutex);
if (pool->stop) {
break;
}
@@ -113,14 +123,23 @@ static void *tpool_worker(void *arg) {
pthread_mutex_unlock(&(pool->work_mutex));
if (work != NULL) {
if (pool->stop) {
break;
}
work->func(work->arg);
if (pool->free_arg) {
free(work->arg);
}
free(work);
}
pthread_mutex_lock(&(pool->work_mutex));
pool->done_cnt++;
if (work != NULL) {
pool->done_cnt++;
}
progress_bar_print((double)pool->done_cnt / pool->work_cnt, ScanCtx.stat_tn_size, ScanCtx.stat_index_size);
progress_bar_print((double) pool->done_cnt / pool->work_cnt, ScanCtx.stat_tn_size, ScanCtx.stat_index_size);
if (pool->work_head == NULL) {
pthread_cond_signal(&(pool->working_cond));
@@ -128,7 +147,10 @@ static void *tpool_worker(void *arg) {
pthread_mutex_unlock(&(pool->work_mutex));
}
pool->cleanup_func();
if (pool->cleanup_func != NULL) {
LOG_INFO("tpool.c", "Executing cleanup function")
pool->cleanup_func();
}
pthread_cond_signal(&(pool->working_cond));
pthread_mutex_unlock(&(pool->work_mutex));
@@ -136,17 +158,24 @@ static void *tpool_worker(void *arg) {
}
void tpool_wait(tpool_t *pool) {
LOG_INFO("tpool.c", "Waiting for worker threads to finish")
pthread_mutex_lock(&(pool->work_mutex));
while (1) {
if (pool->done_cnt < pool->work_cnt) {
pthread_cond_wait(&(pool->working_cond), &(pool->work_mutex));
} else {
pool->stop = 1;
break;
usleep(500000);
if (pool->done_cnt == pool->work_cnt) {
pool->stop = 1;
usleep(1000000);
break;
}
}
progress_bar_print(100.0, ScanCtx.stat_tn_size, ScanCtx.stat_index_size);
}
progress_bar_print(1.0, ScanCtx.stat_tn_size, ScanCtx.stat_index_size);
pthread_mutex_unlock(&(pool->work_mutex));
LOG_INFO("tpool.c", "Worker threads finished")
}
void tpool_destroy(tpool_t *pool) {
@@ -154,6 +183,8 @@ void tpool_destroy(tpool_t *pool) {
return;
}
LOG_INFO("tpool.c", "Destroying thread pool")
pthread_mutex_lock(&(pool->work_mutex));
tpool_work_t *work = pool->work_head;
while (work != NULL) {
@@ -167,9 +198,14 @@ void tpool_destroy(tpool_t *pool) {
for (size_t i = 0; i < pool->thread_cnt; i++) {
pthread_t thread = pool->threads[i];
pthread_cancel(thread);
if (thread != 0) {
void *_;
pthread_join(thread, &_);
}
}
LOG_INFO("tpool.c", "Final cleanup")
pthread_mutex_destroy(&(pool->work_mutex));
pthread_cond_destroy(&(pool->has_work_cond));
pthread_cond_destroy(&(pool->working_cond));
@@ -182,15 +218,16 @@ void tpool_destroy(tpool_t *pool) {
* Create a thread pool
* @param thread_cnt Worker threads count
*/
tpool_t *tpool_create(size_t thread_cnt, void cleanup_func()) {
tpool_t *tpool_create(size_t thread_cnt, void cleanup_func(), int free_arg) {
tpool_t *pool = malloc(sizeof(tpool_t));
pool->thread_cnt = thread_cnt;
pool->work_cnt =0;
pool->done_cnt =0;
pool->work_cnt = 0;
pool->done_cnt = 0;
pool->stop = 0;
pool->free_arg = free_arg;
pool->cleanup_func = cleanup_func;
pool->threads = malloc(sizeof(pthread_t) * thread_cnt);
pool->threads = calloc(sizeof(pthread_t), thread_cnt);
pthread_mutex_init(&(pool->work_mutex), NULL);
@@ -200,11 +237,14 @@ tpool_t *tpool_create(size_t thread_cnt, void cleanup_func()) {
pool->work_head = NULL;
pool->work_tail = NULL;
for (size_t i = 0; i < thread_cnt; i++) {
pthread_t thread = pool->threads[i];
pthread_create(&thread, NULL, tpool_worker, pool);
pthread_detach(thread);
}
return pool;
}
void tpool_start(tpool_t *pool) {
LOG_INFOF("tpool.c", "Starting thread pool with %d threads", pool->thread_cnt)
for (size_t i = 0; i < pool->thread_cnt; i++) {
pthread_create(&pool->threads[i], NULL, tpool_worker, pool);
}
}

View File

@@ -8,7 +8,8 @@ typedef struct tpool tpool_t;
typedef void (*thread_func_t)(void *arg);
tpool_t *tpool_create(size_t num, void (*cleanup_func)());
tpool_t *tpool_create(size_t num, void (*cleanup_func)(), int free_arg);
void tpool_start(tpool_t *pool);
void tpool_destroy(tpool_t *tm);
int tpool_add_work(tpool_t *pool, thread_func_t func, void *arg);

View File

@@ -1,87 +1,26 @@
#ifndef SIST2_TYPES_H
#define SIST2_TYPES_H
#define META_INT_MASK 0xF0
#define META_STR_MASK 0xE0
#define META_LONG_MASK 0xD0
#define IS_META_INT(key) (key & META_INT_MASK) == META_INT_MASK
#define IS_META_LONG(key) (key & META_LONG_MASK) == META_LONG_MASK
#define IS_META_STR(meta) (meta->key & META_STR_MASK) == META_STR_MASK
// This is written to file as a 8bit char!
enum metakey {
MetaContent = 1 | META_STR_MASK,
MetaWidth = 2 | META_INT_MASK,
MetaHeight = 3 | META_INT_MASK,
MetaMediaDuration = 4 | META_LONG_MASK,
MetaMediaAudioCodec = 5 | META_INT_MASK,
MetaMediaVideoCodec = 6 | META_INT_MASK,
MetaMediaBitrate = 7 | META_LONG_MASK,
MetaArtist = 8 | META_STR_MASK,
MetaAlbum = 9 | META_STR_MASK,
MetaAlbumArtist = 10 | META_STR_MASK,
MetaGenre = 11 | META_STR_MASK,
MetaTitle = 12 | META_STR_MASK,
MetaFontName = 13 | META_STR_MASK,
};
#define INDEX_TYPE_BIN "binary"
#define INDEX_TYPE_JSON "json"
#define INDEX_VERSION_EXTERNAL "_external_v1"
typedef struct index_descriptor {
char uuid[UUID_STR_LEN];
char version[6];
char version[64];
long timestamp;
char root[PATH_MAX];
char rewrite_url[8196];
short root_len;
char name[1024];
char type[64];
} index_descriptor_t;
typedef struct index_t {
struct index_descriptor desc;
struct store_t *store;
struct store_t *tag_store;
char path[PATH_MAX];
} index_t;
typedef struct meta_line {
struct meta_line *next;
enum metakey key;
union {
unsigned long longval;
int intval;
char strval[0];
};
} meta_line_t;
typedef struct document {
unsigned char uuid[16];
unsigned long ino;
unsigned long size;
unsigned int mime;
int mtime;
short base;
short ext;
meta_line_t *meta_head;
meta_line_t *meta_tail;
char *filepath;
} document_t;
typedef struct parse_job_t {
int base;
int ext;
struct stat info;
char filepath[1];
} parse_job_t;
#define APPEND_META(doc, meta) \
meta->next = NULL;\
if (doc->meta_head == NULL) {\
doc->meta_head = meta;\
doc->meta_tail = doc->meta_head;\
} else {\
doc->meta_tail->next = meta;\
doc->meta_tail = meta;\
}
#endif

View File

@@ -1,31 +1,84 @@
#define _GNU_SOURCE
#include "util.h"
#include "src/ctx.h"
#include <wordexp.h>
#include <glib.h>
#define PBSTR "========================================"
#define PBWIDTH 40
char *abspath(const char *path) {
wordexp_t w;
wordexp(path, &w, 0);
dyn_buffer_t url_escape(char *str) {
char *abs = canonicalize_file_name(w.we_wordv[0]);
dyn_buffer_t text = dyn_buffer_create();
char *ptr = str;
while (*ptr) {
if (*ptr == '#') {
dyn_buffer_write(&text, "%23", 3);
ptr++;
}
dyn_buffer_write_char(&text, *ptr++);
}
dyn_buffer_write_char(&text, '\0');
return text;
}
char *abspath(const char *path) {
char *expanded = expandpath(path);
char *abs = realpath(expanded, NULL);
free(expanded);
if (abs == NULL) {
return NULL;
}
abs = realloc(abs, strlen(abs) + 2);
strcat(abs, "/");
if (strlen(abs) > 1) {
abs = realloc(abs, strlen(abs) + 2);
strcat(abs, "/");
}
wordfree(&w);
return abs;
}
char *expandpath(const char *path) {
wordexp_t w;
wordexp(path, &w, 0);
void shell_escape(char *dst, const char *src) {
const char *ptr = src;
char *out = dst;
while ((*ptr)) {
char c = *ptr++;
char * expanded = malloc(strlen(w.we_wordv[0]) + 2);
strcpy(expanded, w.we_wordv[0]);
if (c == '&' || c == '\n' || c == '|' || c == ';' || c == '<' ||
c == '>' || c == '(' || c == ')' || c == '{' || c == '}') {
*out++ = '\\';
}
*out++ = c;
}
*out = 0;
}
char *expandpath(const char *path) {
char tmp[PATH_MAX * 2];
shell_escape(tmp, path);
wordexp_t w;
wordexp(tmp, &w, 0);
if (w.we_wordv == NULL) {
return NULL;
}
*tmp = '\0';
for (int i = 0; i < w.we_wordc; i++) {
strcat(tmp, w.we_wordv[i]);
if (i != w.we_wordc - 1) {
strcat(tmp, " ");
}
}
char *expanded = malloc(strlen(tmp) + 2);
strcpy(expanded, tmp);
strcat(expanded, "/");
wordfree(&w);
@@ -76,4 +129,117 @@ GHashTable *incremental_get_table() {
return file_table;
}
const char *find_file_in_paths(const char *paths[], const char *filename) {
for (int i = 0; paths[i] != NULL; i++) {
char *apath = abspath(paths[i]);
if (apath == NULL) {
continue;
}
char path[PATH_MAX];
snprintf(path, sizeof(path), "%s%s", apath, filename);
LOG_DEBUGF("util.c", "Looking for '%s' in folder '%s'", filename, apath)
free(apath);
struct stat info;
int ret = stat(path, &info);
if (ret != -1) {
return paths[i];
}
}
return NULL;
}
#define ESCAPE_CHAR ']'
void str_escape(char *dst, const char *str) {
const size_t len = strlen(str);
char buf[16384];
memset(buf + len, 0, 8);
strcpy(buf, str);
char *cur = dst;
const char *ptr = buf;
const char *oldPtr = ptr;
utf8_int32_t c;
char tmp[16];
do {
ptr = (char *) utf8codepoint(ptr, &c);
*(int *) tmp = 0x00000000;
size_t code_len = (ptr - oldPtr);
memcpy(tmp, oldPtr, code_len);
oldPtr = ptr;
if (!utf8_validchr2(tmp)) {
for (int i = 0; i < code_len; i++) {
if (tmp[i] == 0) {
break;
}
cur += sprintf(cur, "%c%02X", ESCAPE_CHAR, (unsigned char) tmp[i]);
}
continue;
}
if (c == ESCAPE_CHAR) {
*cur++ = ESCAPE_CHAR;
*cur++ = ESCAPE_CHAR;
continue;
}
if (((utf8_int32_t) 0xffffff80 & c) == 0) {
*(cur++) = (char) c;
} else if (((utf8_int32_t) 0xfffff800 & c) == 0) {
*(cur++) = 0xc0 | (char) (c >> 6);
*(cur++) = 0x80 | (char) (c & 0x3f);
} else if (((utf8_int32_t) 0xffff0000 & c) == 0) {
*(cur++) = 0xe0 | (char) (c >> 12);
*(cur++) = 0x80 | (char) ((c >> 6) & 0x3f);
*(cur++) = 0x80 | (char) (c & 0x3f);
} else {
*(cur++) = 0xf0 | (char) (c >> 18);
*(cur++) = 0x80 | (char) ((c >> 12) & 0x3f);
*(cur++) = 0x80 | (char) ((c >> 6) & 0x3f);
*(cur++) = 0x80 | (char) (c & 0x3f);
}
} while (*ptr != '\0');
*cur = '\0';
}
void str_unescape(char *dst, const char *str) {
char *cur = dst;
const char *ptr = str;
char tmp[3];
tmp[2] = '\0';
while (*ptr != 0) {
char c = *ptr++;
if (c == ESCAPE_CHAR) {
char next = *ptr;
if (next == ESCAPE_CHAR) {
*cur++ = (char) c;
ptr += 1;
} else {
tmp[0] = *(ptr);
tmp[1] = *(ptr + 1);
*cur++ = (char) strtol(tmp, NULL, 16);
ptr += 2;
}
} else {
*cur++ = c;
}
}
*cur = '\0';
}

View File

@@ -2,164 +2,32 @@
#define SIST2_UTIL_H
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define TEXT_BUF_FULL -1
#define INITIAL_BUF_SIZE 1024 * 16
#define SHOULD_IGNORE_CHAR(c) c < '0' || c > 'z'
#include <glib.h>
typedef struct dyn_buffer {
char *buf;
size_t cur;
size_t size;
} dyn_buffer_t;
#include "third-party/utf8.h/utf8.h"
#include "libscan/scan.h"
#include "sist.h"
typedef struct text_buffer {
size_t max_size;
int last_char_was_whitespace;
dyn_buffer_t dyn_buffer;
} text_buffer_t;
char *abspath(const char *path);
__always_inline
dyn_buffer_t dyn_buffer_create() {
dyn_buffer_t buf;
buf.size = INITIAL_BUF_SIZE;
buf.cur = 0;
buf.buf = malloc(INITIAL_BUF_SIZE);
return buf;
}
__always_inline
void grow_buffer(dyn_buffer_t *buf, size_t size) {
if (buf->cur + size > buf->size) {
do {
buf->size *= 2;
} while (buf->cur + size > buf->size);
buf->buf = realloc(buf->buf, buf->size);
}
}
__always_inline
void grow_buffer_small(dyn_buffer_t *buf) {
if (buf->cur + sizeof(long) > buf->size) {
buf->size *= 2;
buf->buf = realloc(buf->buf, buf->size);
}
}
__always_inline
void dyn_buffer_write(dyn_buffer_t *buf, void *data, size_t size) {
grow_buffer(buf, size);
memcpy(buf->buf + buf->cur, data, size);
buf->cur += size;
}
__always_inline
void dyn_buffer_write_char(dyn_buffer_t *buf, char c) {
grow_buffer_small(buf);
*(buf->buf + buf->cur) = c;
buf->cur += sizeof(c);
}
__always_inline
void dyn_buffer_write_str(dyn_buffer_t *buf, char *str) {
dyn_buffer_write(buf, str, strlen(str));
dyn_buffer_write_char(buf, '\0');
}
__always_inline
void dyn_buffer_write_int(dyn_buffer_t *buf, int d) {
grow_buffer_small(buf);
*(int *) (buf->buf + buf->cur) = d;
buf->cur += sizeof(int);
}
__always_inline
void dyn_buffer_write_short(dyn_buffer_t *buf, short s) {
grow_buffer_small(buf);
*(short *) (buf->buf + buf->cur) = s;
buf->cur += sizeof(short);
}
__always_inline
void dyn_buffer_write_long(dyn_buffer_t *buf, unsigned long l) {
grow_buffer_small(buf);
*(unsigned long *) (buf->buf + buf->cur) = l;
buf->cur += sizeof(unsigned long);
}
__always_inline
void dyn_buffer_destroy(dyn_buffer_t *buf) {
free(buf->buf);
}
__always_inline
void text_buffer_destroy(text_buffer_t *buf) {
dyn_buffer_destroy(&buf->dyn_buffer);
}
__always_inline
text_buffer_t text_buffer_create(int max_size) {
text_buffer_t text_buf;
text_buf.dyn_buffer = dyn_buffer_create();
text_buf.max_size = max_size;
text_buf.last_char_was_whitespace = FALSE;
return text_buf;
}
__always_inline
void text_buffer_terminate_string(text_buffer_t *buf) {
dyn_buffer_write_char(&buf->dyn_buffer, '\0');
}
__always_inline
int text_buffer_append_char(text_buffer_t *buf, int c) {
if (SHOULD_IGNORE_CHAR(c)) {
if (!buf->last_char_was_whitespace) {
dyn_buffer_write_char(&buf->dyn_buffer, ' ');
buf->last_char_was_whitespace = TRUE;
if (buf->dyn_buffer.cur >= buf->max_size) {
return TEXT_BUF_FULL;
}
}
} else {
buf->last_char_was_whitespace = FALSE;
dyn_buffer_write_char(&buf->dyn_buffer, (char) c);
if (buf->dyn_buffer.cur >= buf->max_size) {
return TEXT_BUF_FULL;
}
}
return 0;
}
char *abspath(const char * path);
char *expandpath(const char *path);
dyn_buffer_t url_escape(char *str);
void progress_bar_print(double percentage, size_t tn_size, size_t index_size);
GHashTable *incremental_get_table();
__always_inline
void incremental_put(GHashTable *table, unsigned long inode_no, int mtime) {
static void incremental_put(GHashTable *table, unsigned long inode_no, int mtime) {
g_hash_table_insert(table, (gpointer) inode_no, GINT_TO_POINTER(mtime));
}
__always_inline
int incremental_get(GHashTable *table, unsigned long inode_no) {
static int incremental_get(GHashTable *table, unsigned long inode_no) {
if (table != NULL) {
return GPOINTER_TO_INT(g_hash_table_lookup(table, (gpointer) inode_no));
} else {
@@ -168,12 +36,16 @@ int incremental_get(GHashTable *table, unsigned long inode_no) {
}
__always_inline
int incremental_mark_file_for_copy(GHashTable *table, unsigned long inode_no) {
g_hash_table_insert(table, GINT_TO_POINTER(inode_no), GINT_TO_POINTER(1));
static int incremental_mark_file_for_copy(GHashTable *table, unsigned long inode_no) {
return g_hash_table_insert(table, GINT_TO_POINTER(inode_no), GINT_TO_POINTER(1));
}
GHashTable *incremental_get_table();
const char *find_file_in_paths(const char **paths, const char *filename);
void str_escape(char *dst, const char *str);
void str_unescape(char *dst, const char *str);
#endif

Some files were not shown because too many files have changed in this diff Show More