Compare commits

...

244 Commits

Author SHA1 Message Date
013c54daa0 Fix tag delete 2020-12-31 12:55:37 -05:00
54308ef5e2 Update tags tab automatically 2020-12-31 12:45:23 -05:00
638c2a5c1a Update binary names (again) 2020-12-31 11:03:25 -05:00
9587caddd9 Don't build tests by default, fix enlarge button 2020-12-31 10:55:34 -05:00
f5bbe0dc97 Update binary names 2020-12-31 10:54:30 -05:00
f87eac1f90 Update submodules 2020-12-31 10:26:05 -05:00
ddafbab6a6 Update readme 2020-12-31 10:26:05 -05:00
b91d574756 Add md5 client-side lib 2020-12-31 10:26:05 -05:00
576140e542 fix submodules 2020-12-31 10:26:05 -05:00
050c1283a3 Remove UUID dep, fix incremental scan, use MD5(path) as unique id, version bump 2020-12-31 10:26:05 -05:00
c6e1ba03bc Better support for .doc files 2020-12-31 10:26:05 -05:00
10e32f707f Update README.md 2020-12-31 10:26:05 -05:00
86e83bafaf Update README.md 2020-12-31 10:26:05 -05:00
51a40c8819 Add .doc support 2020-12-31 10:26:05 -05:00
acc557
36281a5108 Use relative path for loading csv in stats 2020-12-31 10:26:05 -05:00
acc557
76a0bda48b Update search.html
Fix relative stats URL
2020-12-31 10:26:05 -05:00
0cf29a660c Fix relative image URL #122 2020-12-31 10:26:05 -05:00
6cd0741848 update build instructions 2020-12-31 10:26:05 -05:00
bc120f349d Setup ARM CI builds 2020-12-23 10:26:26 -05:00
8cac8c98d7 Update dev builds template 2020-12-22 14:45:16 -05:00
30921ac52e Setup drone ci 2020-12-22 14:09:45 -05:00
95bbe39afc Update libmupdf 2020-10-25 09:44:30 -04:00
72ce217f9c Optionally ES schema from file #117 2020-10-25 09:44:30 -04:00
641a8ec90c sidecar files #114, version bump 2020-10-25 09:44:30 -04:00
7a505c2287 Fix typo 2020-10-25 09:44:30 -04:00
12f162d760 Fix #110 2020-10-25 09:44:30 -04:00
4b4ab12fac Version bump 2020-09-22 21:08:24 -04:00
ae283f77ad Fix #112 2020-09-22 21:08:24 -04:00
d3bd53a5ea Fix arm Dockerfile 2020-09-13 16:16:26 -04:00
f7887f24d1 sync libscan 2020-09-13 16:16:26 -04:00
5c8de19188 Update build instructions 2020-09-13 16:16:26 -04:00
d861d278a4 version bump 2020-09-13 16:16:26 -04:00
b6ddeee0e0 Use async curl for ES requests #108 2020-09-13 16:16:26 -04:00
0cd2523b05 arm64 build 2020-09-13 16:16:26 -04:00
5e798f9367 Update issue-template.md 2020-09-13 10:29:42 -04:00
5da6c1488b Handle null mime in document info dialog 2020-08-29 10:34:58 -04:00
9568e25f84 Fix #99 2020-08-29 10:17:28 -04:00
6a8027789a Limited support for UTF16 2020-08-29 10:17:28 -04:00
b1d16d8abf Fix #100 2020-08-29 10:17:28 -04:00
b2a157e24d Update docs 2020-08-25 10:38:38 -04:00
9aead9389a Fix typo in elastic.c 2020-08-25 10:38:38 -04:00
a32c68cba8 Build fixes 2020-08-25 10:38:38 -04:00
d116cf9d91 Default index for web & exec 2020-08-25 10:38:38 -04:00
Andrew
a020a8b32c Update USAGE.md
Fix link to scripting.
2020-08-25 10:38:38 -04:00
5d5d9c3092 Fix heap buffer overflow warning 2020-08-25 10:38:38 -04:00
3379d5ce71 Fix #97 2020-08-25 10:38:38 -04:00
a0ff4a1f01 Fix heap buffer overflow warning 2020-08-25 10:38:38 -04:00
4589f3bde7 Fix #94 2020-08-25 10:38:38 -04:00
1c898640cf Fix #88 2020-08-25 10:38:38 -04:00
a0739d5177 Fix #92 2020-08-25 10:38:38 -04:00
8f9d29dbc6 Fix #91 2020-08-25 10:38:38 -04:00
3ff4b70223 Update README.md 2020-08-25 10:38:38 -04:00
02ad035b09 Workaround when first ebook page is blank 2020-08-25 10:38:38 -04:00
c11feb213d Gracefully handle archive errors in comic.c 2020-08-25 10:38:38 -04:00
72902947cd Fix for #90 2020-08-25 10:38:38 -04:00
a18bb81222 remove warning 2020-08-25 10:38:38 -04:00
1520288f19 Fix #89 2020-08-25 10:38:38 -04:00
e507de194b Fix log colors 2020-08-25 10:38:38 -04:00
0e517d5e2b Fix #81 2020-08-03 20:09:07 -04:00
8223ef3860 Update USAGE.md 2020-08-03 19:48:49 -04:00
995a196690 Log user script task, add async arg 2020-08-03 19:44:43 -04:00
465d017e18 CSS tweaks, fix #87 2020-08-03 19:15:12 -04:00
ca994d3914 Fix bug with media files, don't encode thumbnail when not necessary 2020-07-26 11:52:48 -04:00
db2285973f Configurable column count 2020-07-26 11:50:21 -04:00
61de9e9f14 Set timeout for HTTP get request 2020-07-25 19:55:27 -04:00
3015ef0ff4 Increase file preview file 2020-07-25 17:26:17 -04:00
b55d432841 Fix #65 2020-07-25 09:37:37 -04:00
ed90a140ce Update README.md 2020-07-19 14:53:03 -04:00
052df82373 Fix #83 2020-07-19 13:10:30 -04:00
5676136777 Remove println that was left accidentally 2020-07-18 20:55:12 -04:00
c061613302 Fix #76 2020-07-18 19:23:43 -04:00
d0325fd9b9 Fix for simon987/sist2#85 2020-07-18 18:48:54 -04:00
e05a6f3863 Fix for #75 2020-07-18 18:46:52 -04:00
f1690a9cca Mobi build fix 2020-07-18 13:10:45 -04:00
100a264413 Don't show MuPDF warnings unless --very-verbose is specified 2020-07-18 10:28:05 -04:00
29390bb454 Update README 2020-07-18 09:54:36 -04:00
4d43036ded Fix simon987/sist2#78 2020-07-18 09:41:39 -04:00
0b5cdbd130 Fix #79 2020-07-18 09:36:10 -04:00
53d7695f66 Read .raw thumbnails #80, fix media probing for some formats 2020-07-18 09:31:42 -04:00
8d53456404 fix libscan submodule 2020-07-17 20:33:50 -04:00
cbc08a7cc9 Save ebook renders as jpeg 2020-07-17 20:18:21 -04:00
e629b4d7d3 Faster comic book parsing, probably fixes #77 2020-07-17 19:10:18 -04:00
22f7073b39 mobi reading bugfix 2020-07-16 20:30:28 -04:00
1781a74960 Oops I didn't mean to push this 2020-07-16 19:23:52 -04:00
db96c95ac7 log fix #73 2020-07-16 19:19:23 -04:00
7b9fa4cc0a Fix bad merge... 2020-07-15 21:00:51 -04:00
5cc1fa86a9 Read embedded thumbnail simon987/sist2#74 2020-07-15 20:56:25 -04:00
649689ce30 Remove warning when generating stats 2020-07-15 20:41:38 -04:00
c8536f65a8 Fix memory leak in index 2020-07-15 20:41:09 -04:00
75b5e249c1 Merge pull request #72 from dpieski/patch-1
Update USAGE.md
2020-07-15 14:37:28 -04:00
Andrew
f49e03ac79 Update USAGE.md
added example for Windows to display number of logical processors. 
Does this same limitation apply to the new `index` threads option?
2020-07-15 13:21:02 -05:00
a6d2afc8dc Merge pull request #71 from simon987/web-tag
Web tag
2020-07-14 20:23:22 -04:00
8f8f66ba05 Update README.md 2020-07-14 20:22:03 -04:00
1d9fcf7105 Manual tagging 2020-07-13 19:18:07 -04:00
8127745f2b wip 2020-07-13 19:16:51 -04:00
230988d6d1 frontend tags 2020-07-13 19:15:59 -04:00
13f4dbed2d Handle 429, multi-threaded index module 2020-07-11 17:42:46 -04:00
ed15e89f45 Fix exec-script --es-url not being passed 2020-06-28 12:41:09 -04:00
c636d3d921 Set number_of_replicas to 0 by default in elasticsearch 2020-06-26 18:10:51 -04:00
7e92d4b7d1 refresh index only if user script is ran 2020-06-25 20:48:47 -04:00
8ffe780ab2 Tag tree fix for #64, validate required argument in exec-script 2020-06-25 20:11:30 -04:00
d3c8928fe8 Update readme 2020-06-24 21:06:27 -04:00
d9f628fca4 Build fix 2020-06-21 16:53:22 -04:00
68289268c1 Add exif tag 2020-06-21 16:51:14 -04:00
649c50c465 Update README.md 2020-06-21 14:35:18 -04:00
7b49a0dc49 Build fix 2020-06-21 12:56:13 -04:00
eb559b53aa RAW picture file support 2020-06-21 10:46:11 -04:00
6d01f9c0df whoops 2020-06-19 22:12:19 -04:00
e724fec668 Fix web return codes 2020-06-19 21:41:17 -04:00
fe5e93b300 Update USAGE.md 2020-06-19 21:29:09 -04:00
ecad85fd7d version bump 2020-06-19 21:10:03 -04:00
74cc898259 Fix tag display issue 2020-06-19 21:07:19 -04:00
dc2e4443c4 Add exec-script command 2020-06-19 21:07:19 -04:00
1a64431b52 Merge pull request #63 from dpieski/patch-3
Correct typos in example
2020-06-19 18:26:10 -04:00
Andrew
9bad515e06 Correct typos in example
Correct typos in examples.
2020-06-19 17:22:02 -05:00
648559cedb Update README.md 2020-06-17 13:25:20 -04:00
3e6cd9cd5c Merge pull request #60 from dpieski/patch-2
update Usage.md
2020-06-17 13:04:46 -04:00
f249992798 Update scripting.md 2020-06-17 13:00:07 -04:00
Andrew
e9645ecdaa update Usage.md
Fixing a link.
2020-06-17 10:58:25 -05:00
046edea0e2 Handle special characters in file paths 2020-06-10 19:45:36 -04:00
a011b7e97b Fragment size setting 2020-06-09 21:40:53 -04:00
8c1c1697e0 Fix file wordexp in some paths #59 2020-06-05 19:41:02 -04:00
018b49fa4c Fix csv_escape #58 2020-06-05 19:13:03 -04:00
27b4e6403e Re-enable path autocomplete #54 2020-06-02 19:46:58 -04:00
13fdbd9e69 Fix for ES 7.7 #54 2020-06-01 18:14:34 -04:00
5e7fdaf8dd Update issue-template.md 2020-06-01 10:45:43 -04:00
19d5c8ac9f Update issue-template.md 2020-05-29 18:19:21 -04:00
99497049a8 Merge pull request #53 from dpieski/patch-1
Update README
2020-05-29 18:16:13 -04:00
Andrew
1a3181d78b Update README
changed case of path in a link to the usage guide to fix 404 error.
2020-05-29 15:37:20 -05:00
449aa77c8f Fix for unknown mime inside archives 2020-05-25 17:36:04 -04:00
3058c55510 Memory leak fix #37 2020-05-24 15:42:42 -04:00
dedf9287b2 Fix name separation in --archive list mode 2020-05-24 14:36:59 -04:00
ab199b0c0c Remove arc_reset() function because seek() inside archive doesn't work 2020-05-24 14:18:31 -04:00
c4fbae123e Better support for media files inside archives 2020-05-24 14:10:23 -04:00
dd2397ef5c handle .tgz #44, ignore files inside archives for stats page 2020-05-24 10:10:28 -04:00
ee0f71f4d3 fix compile warning 2020-05-17 15:00:56 -04:00
0bbb96b149 Merge pull request #51 from simon987/stats
Stats page
2020-05-17 14:49:28 -04:00
78f6e16701 image 2020-05-17 12:47:45 -04:00
4625bca9a9 stats 2020-05-17 12:47:02 -04:00
f2ae653886 Revert "wip"
This reverts commit 5686bc86
2020-05-16 08:16:49 -04:00
5686bc864d wip 2020-05-13 17:37:40 -04:00
cf513b4ad8 Escape invalid UTF8 characters simon987/sist2#44, increase magic buffer size 2020-05-12 19:28:02 -04:00
013423424e UTF-8 fix attempt w/ libarchive (#44) 2020-05-10 19:52:42 -04:00
16514fd6b0 Option to search in path #49 2020-05-09 22:00:22 -04:00
27509f97e1 Update USAGE.md 2020-05-08 19:08:46 -04:00
4c540eae1c Update USAGE.md 2020-05-08 19:07:45 -04:00
d2b53ff6fc Update README.md 2020-05-08 18:32:32 -04:00
0ef4292abf Fix duplicate tag problem (simon987/sist2#48) 2020-05-05 20:20:10 -04:00
e6fde38c24 Load defaults when LocalStorage is outdated 2020-05-03 08:13:25 -04:00
5fa343d40f fix version typo 2020-05-03 08:10:28 -04:00
7ee1374802 oops 2020-04-30 21:21:48 -04:00
bd9e56829c Support for markup files 2020-04-30 20:21:09 -04:00
718169345e gzip artifacts in CI 2020-04-21 19:34:46 -04:00
5a6aa763ca build fix 2020-04-21 18:50:32 -04:00
695d9abd83 revert debug hard-coded listen address 2020-04-21 15:52:35 -04:00
e436af7b2a 2.0 (#46)
* extract scan code to libscan, (wip)

* submodules

* replace curl with mongoose (wip)

* replace onion with mongoose (wip)

* replace onion with mongoose (wip)

* It compiles! (I think)

* Update readme

* Entirely remove libonion (WIP)

* unscramble submodules

* recover screenshot

* Update mappings

* Bug fixes

* update

* media meta fix

* memory fixes

* More bug fixes...

* Bug fix w/ libmagic & vfile

* libmagic fix (again)

* Better lightbox, better video handler, random reloads fix

* Use svg for info icon

* re-enable http auth

* mobi support #41, fix logs

* Update README & cleanup
2020-04-21 14:42:20 -04:00
4501a7810f Update issue-template.md 2020-04-11 07:33:33 -04:00
simon987
e36761fa6a Update issue templates 2020-04-11 07:28:48 -04:00
fe53b79d56 Fix warnings 2020-03-25 08:18:59 -04:00
09615bbed6 Update dependencies 2020-03-24 14:30:23 -04:00
a2be9b955c Fix build errors 2020-03-24 11:49:13 -04:00
9298bd2d9d CI fix... 2020-03-24 10:09:33 -04:00
317034ba21 teamcity automation attempt 2020-03-24 10:01:27 -04:00
0505303503 text_buffer bug fixes & Sort option 2020-03-20 20:54:22 -04:00
6e5772f13b Errors cleanup 2020-03-20 10:05:10 -04:00
ccccdb3b78 Fixes #38 2020-03-13 16:35:11 -04:00
12d17acf4f UI fixes 2020-03-06 12:27:38 -05:00
48b56cdb7b I forgot to commit this somehow 2020-03-06 10:32:05 -05:00
048f707f80 Fix buffer overflow in json parse function (index module) 2020-03-06 10:17:21 -05:00
98e0a5fd64 Update CI script 2020-03-06 09:41:33 -05:00
740a49a09f version bump 2020-03-06 09:36:46 -05:00
81be662574 (breaking) update mime list 2020-03-06 09:36:21 -05:00
02fa3f02f5 Fix memory leak with virtual files in parse.c 2020-03-06 09:36:07 -05:00
cfdd7bdd87 Fix memory leak in font.c 2020-03-06 09:35:19 -05:00
7ceb645926 hotfix invalid read in text_buffer 2020-03-06 09:34:41 -05:00
7d0091f647 whoops 2020-03-05 21:54:56 -05:00
b3cd630399 Update README.md 2020-03-05 19:42:06 -05:00
5f7a1acfe3 Merge pull request #36 from simon987/wip-doc
Wip doc
2020-03-05 18:43:56 -05:00
513a21cca2 Undo debug stuff 2020-03-05 18:42:51 -05:00
04dbfb23ab Cleanup warnings 2020-03-05 16:53:30 -05:00
1abddabeec Rewrite doc.c module, fix bad error handling, fix pdf.c memory leaks 2020-03-05 16:12:34 -05:00
9ace5774af Update dependencies 2020-03-05 16:10:45 -05:00
eab6101cf7 make --fast faster 2020-03-05 12:26:43 -05:00
d7cbd5d2b6 wip doc rewrite 2020-03-05 09:13:37 -05:00
641edf2715 Prettier warning messages in main.c 2020-03-04 17:57:49 -05:00
7efb4957bf inline text/util functions 2020-03-04 17:50:31 -05:00
9ae77fdedb Fix css glitch 2020-03-03 16:51:01 -05:00
98c40901ed Disallow incremental scan when version does not match (#33) 2020-03-03 16:36:07 -05:00
363375d5da version bump 2020-03-03 16:25:41 -05:00
149de95d88 (breaking) Upgrade path filter bar 2020-03-03 16:24:24 -05:00
e5bb4856d2 (breaking) Set item depth in ingest pipeline 2020-03-02 17:39:25 -05:00
d78994d427 Ignore --incremental option when the directory does not exist (#31) 2020-03-01 21:16:50 -05:00
f2d68d54df Update README.md 2020-03-01 13:55:08 -05:00
e03625838b Settings menu (#30) and UI tweaks 2020-02-29 19:26:09 -05:00
86840b46f4 Version bump 2020-02-27 09:47:06 -05:00
e57f9916eb Rewrite documentation 2020-02-27 09:45:14 -05:00
565ba6ee76 Fix for #29 2020-02-27 09:44:19 -05:00
d83fc2c373 Fix docker build for 1.2.15 2020-02-27 09:42:18 -05:00
d4da28249e --fast option #27 2020-02-22 18:37:08 -05:00
483a454c8d --exclude argument #26 2020-02-22 16:55:35 -05:00
018ac86640 fix build... 2020-02-22 13:20:41 -05:00
398f1aead4 Support for cbr documents 2020-02-22 13:11:19 -05:00
d19a75926b Fix invalid read in terminate_string() 2020-02-22 13:10:40 -05:00
1ac8b40e3d Code style 2020-02-22 09:02:59 -05:00
a8505cb8c1 Fix for #28 2020-02-20 16:42:13 -05:00
ae8652d86e UI tweaks, search syntax (#25) 2020-02-16 15:24:29 -05:00
849beb09d8 hotfix 2020-02-15 19:33:18 -05:00
e1aaaee617 UI tweak 2020-02-15 09:30:14 -05:00
c02b940945 (I forgot to commit this) 2020-02-14 20:58:10 -05:00
2934ddb07f Add image viewer (#2) 2020-02-14 18:28:55 -05:00
7f6f3c02fa OCR tweaks 2020-02-11 21:13:47 -05:00
7f98d5a682 Fix buffer overflow (whoops) 2020-02-09 18:11:29 -05:00
7eb9c5d7d5 Fix web/index issue with NULL mime types 2020-02-09 17:23:49 -05:00
184439aa38 increase minimum image size for OCR 2020-02-09 14:06:59 -05:00
1ce8b298a1 Display EXIF tags on document info panel, remove march=native on openjp 2020-02-09 13:21:19 -05:00
75f99025d9 add exif dateTime, allow some special characters in text meta 2020-02-09 08:47:13 -05:00
ebe852bd5a Fix rewrite-url arg 2020-02-09 08:23:17 -05:00
402b103c49 Fix total count for ES 7.5 2020-02-08 09:25:00 -05:00
e9b6e1cdc2 Turn off auto optimisation in libtesseract build 2020-02-08 08:32:04 -05:00
ed1ce8ab5e Handle XML errors #18 2020-02-07 10:08:01 -05:00
d1fa4febc4 Improve scroll feature, UI fix 2020-02-07 10:08:01 -05:00
048c55df7b Update README.md 2020-02-06 19:56:29 -05:00
f77bc6a025 Update README.md 2020-02-06 19:55:32 -05:00
efdde2734e version bump 2020-02-06 19:28:05 -05:00
66658fa8f7 Remove trailing/leading white space in text meta fields 2020-02-06 19:27:30 -05:00
df41c251e4 (Breaking!) Add some exif tags 2020-02-06 19:21:50 -05:00
3282ab56ba Version bump 2020-02-02 09:26:54 -05:00
8300838d30 Suppress XML parsing errors (#18) 2020-02-02 09:26:03 -05:00
c9870a6d3d Remove -march=native for release build... 2020-02-02 09:03:06 -05:00
a143cc4fcf bundle openssl... 2020-02-02 08:39:20 -05:00
9ef1f3781d fix attempt for #11 2020-02-01 20:04:26 -05:00
bbee8aa721 tesseract ocr path fix 2020-02-01 20:03:59 -05:00
d22f83c797 curl fix 2020-02-01 15:22:43 -05:00
50615486a4 curl fix attempt 2020-02-01 14:42:42 -05:00
ca79e4f797 add /status endpoint 2020-01-28 10:18:37 -05:00
6a9fd08a80 Merge pull request #21 from simon987/wip-20
Fixes #20
2020-01-27 09:16:00 -05:00
cab890dc9b #20 wip 2020-01-27 09:09:42 -05:00
b3c4faf2df Update README.md 2020-01-26 12:37:13 -05:00
353937171a Update README.md 2020-01-20 15:54:53 -05:00
c80002bea4 Bundle libcurl attempt 2 2020-01-18 11:53:12 -05:00
56adee9d81 Bundle libcurl, libopc bugfix #18 2020-01-18 10:25:02 -05:00
d6493d6d5f Bundle libpng 2020-01-16 16:21:38 -05:00
0967e9676d remove static build in CI... 2020-01-16 15:45:18 -05:00
487e998ea0 Display error message on /d/ error 2020-01-16 15:04:50 -05:00
159 changed files with 7525 additions and 8358 deletions

56
.drone.yml Normal file
View File

@@ -0,0 +1,56 @@
kind: pipeline
type: docker
name: amd64
platform:
os: linux
arch: amd64
steps:
- name: build
image: simon987/ubuntu_ci
commands:
- ./ci/build.sh
- name: scp files
image: appleboy/drone-scp
settings:
host:
from_secret: SSH_HOST
port:
from_secret: SSH_PORT
user:
from_secret: SSH_USER
key:
from_secret: SSH_KEY
target: /files/sist2/${DRONE_REPO_OWNER}_${DRONE_REPO_NAME}/${DRONE_BRANCH}_${DRONE_BUILD_NUMBER}_${DRONE_COMMIT}/
source:
- ./sist2-x64-linux
- ./sist2-x64-linux-debug.tar.gz
---
kind: pipeline
type: docker
name: arm64
platform:
arch: arm64
steps:
- name: build
image: simon987/ubuntu_ci_arm
commands:
- ./ci/build_arm64.sh
- name: scp files
image: appleboy/drone-scp
settings:
host:
from_secret: SSH_HOST
port:
from_secret: SSH_PORT
user:
from_secret: SSH_USER
key:
from_secret: SSH_KEY
target: /files/sist2/${DRONE_REPO_OWNER}_${DRONE_REPO_NAME}/${DRONE_BRANCH}_${DRONE_BUILD_NUMBER}_${DRONE_COMMIT}/
source:
- ./sist2-arm64-linux

View File

@@ -0,0 +1,18 @@
---
name: Issue template
about: General
title: ''
labels: ''
assignees: ''
---
sist2 version:
Platform (Linux or Docker, x86-64 or arm64):
Elasticsearch version:
Command with arguments: `ex: "scan ~/Documents -o ./i2 --threads 3 -q 1.0`
If the issue is related to the `scan` module, please attach the files necessary to reproduce the error or email them to me[at]simon987.net.

2
.gitignore vendored
View File

@@ -1,6 +1,5 @@
.idea
thumbs
test
*.cbp
CMakeCache.txt
CMakeFiles
@@ -16,3 +15,4 @@ bundle.js
*.a
vgcore.*
build/
third-party/

46
.gitmodules vendored
View File

@@ -1,42 +1,6 @@
[submodule "argparse"]
path = argparse
[submodule "third-party/libscan"]
path = third-party/libscan
url = https://github.com/simon987/libscan
[submodule "third-party/argparse"]
path = third-party/argparse
url = https://github.com/cofyc/argparse
[submodule "cJSON"]
path = cJSON
url = https://github.com/DaveGamble/cJSON
[submodule "lmdb"]
path = lmdb
url = https://github.com/LMDB/lmdb
[submodule "utf8.h"]
path = utf8.h
url = https://github.com/sheredom/utf8.h
[submodule "lib/bzip2-1.0.6"]
path = lib/bzip2-1.0.6
url = https://github.com/enthought/bzip2-1.0.6
[submodule "lib/libmagic"]
path = lib/libmagic
url = https://github.com/threatstack/libmagic
[submodule "lib/harfbuzz"]
path = lib/harfbuzz
url = https://github.com/harfbuzz/harfbuzz
[submodule "lib/openjpeg"]
path = lib/openjpeg
url = https://github.com/uclouvain/openjpeg
[submodule "lib/ffmpeg"]
path = lib/ffmpeg
url = https://git.ffmpeg.org/ffmpeg.git
[submodule "lib/onion"]
path = lib/onion
url = https://github.com/davidmoreno/onion
[submodule "lib/mupdf"]
path = lib/mupdf
url = git://git.ghostscript.com/mupdf.git
[submodule "lib/tesseract"]
path = lib/tesseract
url = https://github.com/tesseract-ocr/tesseract
[submodule "lib/leptonica"]
path = lib/leptonica
url = https://github.com/danbloomberg/leptonica
[submodule "lib/libtiff"]
path = lib/libtiff
url = https://gitlab.com/libtiff/libtiff

View File

@@ -1,69 +0,0 @@
import jetbrains.buildServer.configs.kotlin.v2019_2.*
import jetbrains.buildServer.configs.kotlin.v2019_2.buildSteps.ExecBuildStep
import jetbrains.buildServer.configs.kotlin.v2019_2.buildSteps.exec
import jetbrains.buildServer.configs.kotlin.v2019_2.triggers.vcs
import jetbrains.buildServer.configs.kotlin.v2019_2.vcs.GitVcsRoot
/*
The settings script is an entry point for defining a TeamCity
project hierarchy. The script should contain a single call to the
project() function with a Project instance or an init function as
an argument.
VcsRoots, BuildTypes, Templates, and subprojects can be
registered inside the project using the vcsRoot(), buildType(),
template(), and subProject() methods respectively.
To debug settings scripts in command-line, run the
mvnDebug org.jetbrains.teamcity:teamcity-configs-maven-plugin:generate
command and attach your debugger to the port 8000.
To debug in IntelliJ Idea, open the 'Maven Projects' tool window (View
-> Tool Windows -> Maven Projects), find the generate task node
(Plugins -> teamcity-configs -> teamcity-configs:generate), the
'Debug' option is available in the context menu for the task.
*/
version = "2019.2"
project {
vcsRoot(HttpsGithubComSimon987sist2refsHeadsMaster)
buildType(Build)
}
object Build : BuildType({
name = "Build"
artifactRules = """
sist2
sist2_scan
""".trimIndent()
vcs {
root(HttpsGithubComSimon987sist2refsHeadsMaster)
}
steps {
exec {
name = "Build"
path = "./ci/build.sh"
dockerImage = "simon987/general_ci"
dockerImagePlatform = ExecBuildStep.ImagePlatform.Linux
dockerPull = true
}
}
triggers {
vcs {
}
}
})
object HttpsGithubComSimon987sist2refsHeadsMaster : GitVcsRoot({
name = "https://github.com/simon987/sist2#refs/heads/master"
url = "https://github.com/simon987/sist2"
})

View File

@@ -2,134 +2,117 @@ cmake_minimum_required(VERSION 3.7)
set(CMAKE_C_STANDARD 11)
project(sist2 C)
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}/CMakeModules")
add_executable(
sist2
option(SIST_DEBUG "Build a debug executable" on)
set(BUILD_TESTS off)
add_subdirectory(third-party/libscan)
set(ARGPARSE_SHARED off)
add_subdirectory(third-party/argparse)
add_executable(sist2
# argparse
third-party/argparse/argparse.h third-party/argparse/argparse.c
src/main.c
src/sist.h
src/io/walk.h src/io/walk.c
src/parsing/media.h src/parsing/media.c
src/parsing/pdf.h src/parsing/pdf.c
src/io/store.h src/io/store.c
src/tpool.h src/tpool.c
src/parsing/parse.h src/parsing/parse.c
src/io/serialize.h src/io/serialize.c
src/parsing/mime.h src/parsing/mime.c src/parsing/mime_generated.c
src/parsing/text.h src/parsing/text.c
src/index/web.c src/index/web.h
src/web/serve.c src/web/serve.h
src/web/auth_basic.h src/web/auth_basic.c
src/index/elastic.c src/index/elastic.h
src/util.c src/util.h
src/ctx.h src/types.h src/parsing/font.c src/parsing/font.h
src/parsing/arc.c src/parsing/arc.h
src/parsing/doc.c src/parsing/doc.h
src/ctx.h src/types.h
src/log.c src/log.h
# argparse
argparse/argparse.h argparse/argparse.c
# cJSON
cJSON/cJSON.h cJSON/cJSON.c
# LMDB
lmdb/libraries/liblmdb/lmdb.h lmdb/libraries/liblmdb/mdb.c
lmdb/libraries/liblmdb/midl.h lmdb/libraries/liblmdb/midl.c
src/cli.c src/cli.h
src/stats.c src/stats.h src/ctx.c
src/parsing/sidecar.c src/parsing/sidecar.h)
# utf8.h
utf8.h/utf8.h
)
target_link_directories(sist2 PRIVATE BEFORE ${_VCPKG_INSTALLED_DIR}/${VCPKG_TARGET_TRIPLET}/lib/)
set(CMAKE_FIND_LIBRARY_SUFFIXES .a .lib)
find_package(PkgConfig REQUIRED)
set(ENV{PKG_CONFIG_PATH} "$ENV{PKG_CONFIG_PATH}:/usr/local/lib/pkgconfig/")
find_package(lmdb CONFIG REQUIRED)
find_package(cJSON CONFIG REQUIRED)
find_package(unofficial-glib CONFIG REQUIRED)
find_package(unofficial-mongoose CONFIG REQUIRED)
find_package(CURL CONFIG REQUIRED)
#find_package(OpenSSL REQUIRED)
find_package(Freetype REQUIRED)
pkg_check_modules(GLIB REQUIRED glib-2.0)
pkg_check_modules(GOBJECT REQUIRED gobject-2.0)
pkg_check_modules(UUID REQUIRED uuid)
add_definitions(${UUID_CFLAGS_OTHER})
add_definitions(${GLIB_CFLAGS_OTHER})
add_definitions(${GOBJECT_CFLAGS_OTHER})
add_definitions(${FREETYPE_CFLAGS_OTHER})
list(REMOVE_ITEM GLIB_LIBRARIES pcre)
list(REMOVE_ITEM GOBJECT_LIBRARIES pcre)
list(REMOVE_ITEM UUID_LIBRARIES pcre)
target_include_directories(
sist2 PUBLIC
${GOBJECT_INCLUDE_DIRS}
${GLIB_INCLUDE_DIRS}
${PROJECT_SOURCE_DIR}/lib/ffmpeg/
${FREETYPE_INCLUDE_DIRS}
${UUID_INCLUDE_DIRS}
${PROJECT_SOURCE_DIR}/
${PROJECT_SOURCE_DIR}/lmdb/libraries/liblmdb/
${PROJECT_SOURCE_DIR}/lib/onion/src/
${PROJECT_SOURCE_DIR}/lib/mupdf/include/
${PROJECT_SOURCE_DIR}/include/
/usr/include/libxml2/
${PROJECT_SOURCE_DIR}/lib/tesseract/include/
)
target_link_directories(
sist2 PUBLIC
${UUID_LIBRARY_DIRS}
${CMAKE_SOURCE_DIR}/third-party/onion/src/
${CMAKE_SOURCE_DIR}/third-party/utf8.h/
${CMAKE_SOURCE_DIR}/third-party/libscan/
${CMAKE_SOURCE_DIR}/
)
target_compile_options(sist2
target_compile_options(
sist2
PRIVATE
-fPIC
)
if (SIST_DEBUG)
target_compile_options(
sist2
PRIVATE
-g
-fstack-protector
-fno-omit-frame-pointer
-fsanitize=address
-fno-inline
# -O2
)
target_link_options(
sist2
PRIVATE
-fsanitize=address
)
set_target_properties(
sist2
PROPERTIES
OUTPUT_NAME sist2_debug
)
else ()
target_compile_options(
sist2
PRIVATE
-Ofast
# -march=native
-fno-stack-protector
-fomit-frame-pointer
)
endif ()
TARGET_LINK_LIBRARIES(
add_dependencies(
sist2
scan
argparse
)
target_link_libraries(
sist2
${GLIB_LIBRARIES}
${GOBJECT_LIBRARIES}
${UUID_LIBRARIES}
# ffmpeg
${PROJECT_SOURCE_DIR}/lib/libavcodec.a
${PROJECT_SOURCE_DIR}/lib/libavformat.a
${PROJECT_SOURCE_DIR}/lib/libavutil.a
${PROJECT_SOURCE_DIR}/lib/libswscale.a
${PROJECT_SOURCE_DIR}/lib/libswresample.a
# mupdf
${PROJECT_SOURCE_DIR}/lib/libmupdf.a
${PROJECT_SOURCE_DIR}/lib/libmupdf-third.a
# onion
${PROJECT_SOURCE_DIR}/lib/libonion_static.a
z
lmdb
cjson
argparse
unofficial::glib::glib
unofficial::mongoose::mongoose
CURL::libcurl
pthread
curl
m
bz2
${PROJECT_SOURCE_DIR}/lib/libmagic.a
${PROJECT_SOURCE_DIR}/lib/libharfbuzz.a
${PROJECT_SOURCE_DIR}/lib/libopenjp2.a
freetype
archive
magic
xml2
${PROJECT_SOURCE_DIR}/lib/libopc/libmce.a
${PROJECT_SOURCE_DIR}/lib/libopc/libopc.a
${PROJECT_SOURCE_DIR}/lib/libopc/libplib.a
c
${PROJECT_SOURCE_DIR}/lib/libtesseract.a
${PROJECT_SOURCE_DIR}/lib/liblept.a
${PROJECT_SOURCE_DIR}/lib/libtiff.a
png
stdc++
scan
)
add_custom_target(

View File

@@ -1,80 +0,0 @@
# - Try to find ffmpeg libraries (libavcodec, libavformat and libavutil)
# Once done this will define
#
# FFMPEG_FOUND - system has ffmpeg or libav
# FFMPEG_INCLUDE_DIR - the ffmpeg include directory
# FFMPEG_LIBRARIES - Link these to use ffmpeg
# FFMPEG_LIBAVCODEC
# FFMPEG_LIBAVFORMAT
# FFMPEG_LIBAVUTIL
#
# Copyright (c) 2008 Andreas Schneider <mail@cynapses.org>
# Modified for other libraries by Lasse Kärkkäinen <tronic>
# Modified for Hedgewars by Stepik777
#
# Redistribution and use is allowed according to the terms of the New
# BSD license.
#
if (FFMPEG_LIBRARIES AND FFMPEG_INCLUDE_DIR)
# in cache already
set(FFMPEG_FOUND TRUE)
else (FFMPEG_LIBRARIES AND FFMPEG_INCLUDE_DIR)
# use pkg-config to get the directories and then use these values
# in the FIND_PATH() and FIND_LIBRARY() calls
find_package(PkgConfig)
if (PKG_CONFIG_FOUND)
pkg_check_modules(_FFMPEG_AVCODEC libavcodec)
pkg_check_modules(_FFMPEG_AVFORMAT libavformat)
pkg_check_modules(_FFMPEG_AVUTIL libavutil)
endif (PKG_CONFIG_FOUND)
find_path(FFMPEG_AVCODEC_INCLUDE_DIR
NAMES libavcodec/avcodec.h
PATHS ${_FFMPEG_AVCODEC_INCLUDE_DIRS} /usr/include /usr/local/include /opt/local/include /sw/include
PATH_SUFFIXES ffmpeg libav
)
find_library(FFMPEG_LIBAVCODEC
NAMES avcodec
PATHS ${_FFMPEG_AVCODEC_LIBRARY_DIRS} /usr/lib /usr/local/lib /opt/local/lib /sw/lib
)
find_library(FFMPEG_LIBAVFORMAT
NAMES avformat
PATHS ${_FFMPEG_AVFORMAT_LIBRARY_DIRS} /usr/lib /usr/local/lib /opt/local/lib /sw/lib
)
find_library(FFMPEG_LIBAVUTIL
NAMES avutil
PATHS ${_FFMPEG_AVUTIL_LIBRARY_DIRS} /usr/lib /usr/local/lib /opt/local/lib /sw/lib
)
if (FFMPEG_LIBAVCODEC AND FFMPEG_LIBAVFORMAT)
set(FFMPEG_FOUND TRUE)
endif()
if (FFMPEG_FOUND)
set(FFMPEG_INCLUDE_DIR ${FFMPEG_AVCODEC_INCLUDE_DIR})
set(FFMPEG_LIBRARIES
${FFMPEG_LIBAVCODEC}
${FFMPEG_LIBAVFORMAT}
${FFMPEG_LIBAVUTIL}
)
endif (FFMPEG_FOUND)
if (FFMPEG_FOUND)
if (NOT FFMPEG_FIND_QUIETLY)
message(STATUS "Found FFMPEG or Libav: ${FFMPEG_LIBRARIES}, ${FFMPEG_INCLUDE_DIR}")
endif (NOT FFMPEG_FIND_QUIETLY)
else (FFMPEG_FOUND)
if (FFMPEG_FIND_REQUIRED)
message(FATAL_ERROR "Could not find libavcodec or libavformat or libavutil")
endif (FFMPEG_FIND_REQUIRED)
endif (FFMPEG_FOUND)
endif (FFMPEG_LIBRARIES AND FFMPEG_INCLUDE_DIR)

View File

@@ -1,100 +0,0 @@
#-------------------------------------------------------------------------------
# Copyright (c) 2013-2013, Lars Baehren <lbaehren@gmail.com>
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without modification,
# are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#-------------------------------------------------------------------------------
# - Check for the presence of LIBMAGIC
#
# The following variables are set when LIBMAGIC is found:
# LIBMAGIC_FOUND = Set to true, if all components of LIBMAGIC have been
# found.
# LIBMAGIC_INCLUDES = Include path for the header files of LIBMAGIC
# LIBMAGIC_LIBRARIES = Link these to use LIBMAGIC
# LIBMAGIC_LFLAGS = Linker flags (optional)
if (NOT LIBMAGIC_FOUND)
if (NOT LIBMAGIC_ROOT_DIR)
set (LIBMAGIC_ROOT_DIR ${CMAKE_INSTALL_PREFIX})
endif (NOT LIBMAGIC_ROOT_DIR)
##____________________________________________________________________________
## Check for the header files
find_path (LIBMAGIC_FILE_H
NAMES file/file.h
HINTS ${LIBMAGIC_ROOT_DIR} ${CMAKE_INSTALL_PREFIX}
PATH_SUFFIXES include
)
if (LIBMAGIC_FILE_H)
list (APPEND LIBMAGIC_INCLUDES ${LIBMAGIC_FILE_H})
endif (LIBMAGIC_FILE_H)
find_path (LIBMAGIC_MAGIC_H
NAMES magic.h
HINTS ${LIBMAGIC_ROOT_DIR} ${CMAKE_INSTALL_PREFIX}
PATH_SUFFIXES include include/linux
)
if (LIBMAGIC_MAGIC_H)
list (APPEND LIBMAGIC_INCLUDES ${LIBMAGIC_MAGIC_H})
endif (LIBMAGIC_MAGIC_H)
list (REMOVE_DUPLICATES LIBMAGIC_INCLUDES)
##____________________________________________________________________________
## Check for the library
find_library (LIBMAGIC_LIBRARIES magic
HINTS ${LIBMAGIC_ROOT_DIR} ${CMAKE_INSTALL_PREFIX}
PATH_SUFFIXES lib
)
##____________________________________________________________________________
## Actions taken when all components have been found
#find_package_handle_standard_args (LIBMAGIC DEFAULT_MSG LIBMAGIC_LIBRARIES LIBMAGIC_INCLUDES)
if (LIBMAGIC_FOUND)
if (NOT LIBMAGIC_FIND_QUIETLY)
message (STATUS "Found components for LIBMAGIC")
message (STATUS "LIBMAGIC_ROOT_DIR = ${LIBMAGIC_ROOT_DIR}")
message (STATUS "LIBMAGIC_INCLUDES = ${LIBMAGIC_INCLUDES}")
message (STATUS "LIBMAGIC_LIBRARIES = ${LIBMAGIC_LIBRARIES}")
endif (NOT LIBMAGIC_FIND_QUIETLY)
else (LIBMAGIC_FOUND)
if (LIBMAGIC_FIND_REQUIRED)
message (FATAL_ERROR "Could not find LIBMAGIC!")
endif (LIBMAGIC_FIND_REQUIRED)
endif (LIBMAGIC_FOUND)
##____________________________________________________________________________
## Mark advanced variables
mark_as_advanced (
LIBMAGIC_ROOT_DIR
LIBMAGIC_INCLUDES
LIBMAGIC_LIBRARIES
)
endif (NOT LIBMAGIC_FOUND)

View File

@@ -1,478 +0,0 @@
# Distributed under the OSI-approved BSD 3-Clause License. See accompanying
# file Copyright.txt or https://cmake.org/licensing for details.
macro(_OpenSSL_test_and_find_dependencies ssl_library crypto_library)
if((CMAKE_SYSTEM_NAME STREQUAL "Linux") AND
(("${ssl_library}" MATCHES "\\${CMAKE_STATIC_LIBRARY_SUFFIX}$") OR
("${crypto_library}" MATCHES "\\${CMAKE_STATIC_LIBRARY_SUFFIX}$")))
set(_OpenSSL_has_dependencies TRUE)
find_package(Threads)
else()
set(_OpenSSL_has_dependencies FALSE)
endif()
endmacro()
function(_OpenSSL_add_dependencies libraries_var library)
if(CMAKE_THREAD_LIBS_INIT)
list(APPEND ${libraries_var} ${CMAKE_THREAD_LIBS_INIT})
endif()
list(APPEND ${libraries_var} ${CMAKE_DL_LIBS})
set(${libraries_var} ${${libraries_var}} PARENT_SCOPE)
endfunction()
function(_OpenSSL_target_add_dependencies target)
if(_OpenSSL_has_dependencies)
set_property( TARGET ${target} APPEND PROPERTY INTERFACE_LINK_LIBRARIES Threads::Threads )
set_property( TARGET ${target} APPEND PROPERTY INTERFACE_LINK_LIBRARIES ${CMAKE_DL_LIBS} )
endif()
endfunction()
if (UNIX)
find_package(PkgConfig QUIET)
pkg_check_modules(_OPENSSL QUIET openssl)
endif ()
# Support preference of static libs by adjusting CMAKE_FIND_LIBRARY_SUFFIXES
if(OPENSSL_USE_STATIC_LIBS)
set(_openssl_ORIG_CMAKE_FIND_LIBRARY_SUFFIXES ${CMAKE_FIND_LIBRARY_SUFFIXES})
if(WIN32)
set(CMAKE_FIND_LIBRARY_SUFFIXES .lib .a ${CMAKE_FIND_LIBRARY_SUFFIXES})
else()
set(CMAKE_FIND_LIBRARY_SUFFIXES .a )
endif()
endif()
if (WIN32)
# http://www.slproweb.com/products/Win32OpenSSL.html
set(_OPENSSL_ROOT_HINTS
${OPENSSL_ROOT_DIR}
"[HKEY_LOCAL_MACHINE\\SOFTWARE\\Microsoft\\Windows\\CurrentVersion\\Uninstall\\OpenSSL (32-bit)_is1;Inno Setup: App Path]"
"[HKEY_LOCAL_MACHINE\\SOFTWARE\\Microsoft\\Windows\\CurrentVersion\\Uninstall\\OpenSSL (64-bit)_is1;Inno Setup: App Path]"
ENV OPENSSL_ROOT_DIR
)
file(TO_CMAKE_PATH "$ENV{PROGRAMFILES}" _programfiles)
set(_OPENSSL_ROOT_PATHS
"${_programfiles}/OpenSSL"
"${_programfiles}/OpenSSL-Win32"
"${_programfiles}/OpenSSL-Win64"
"C:/OpenSSL/"
"C:/OpenSSL-Win32/"
"C:/OpenSSL-Win64/"
)
unset(_programfiles)
else ()
set(_OPENSSL_ROOT_HINTS
${OPENSSL_ROOT_DIR}
ENV OPENSSL_ROOT_DIR
)
endif ()
set(_OPENSSL_ROOT_HINTS_AND_PATHS
HINTS ${_OPENSSL_ROOT_HINTS}
PATHS ${_OPENSSL_ROOT_PATHS}
)
find_path(OPENSSL_INCLUDE_DIR
NAMES
openssl/ssl.h
${_OPENSSL_ROOT_HINTS_AND_PATHS}
HINTS
${_OPENSSL_INCLUDEDIR}
PATH_SUFFIXES
include
)
if(WIN32 AND NOT CYGWIN)
if(MSVC)
# /MD and /MDd are the standard values - if someone wants to use
# others, the libnames have to change here too
# use also ssl and ssleay32 in debug as fallback for openssl < 0.9.8b
# enable OPENSSL_MSVC_STATIC_RT to get the libs build /MT (Multithreaded no-DLL)
# In Visual C++ naming convention each of these four kinds of Windows libraries has it's standard suffix:
# * MD for dynamic-release
# * MDd for dynamic-debug
# * MT for static-release
# * MTd for static-debug
# Implementation details:
# We are using the libraries located in the VC subdir instead of the parent directory even though :
# libeay32MD.lib is identical to ../libeay32.lib, and
# ssleay32MD.lib is identical to ../ssleay32.lib
# enable OPENSSL_USE_STATIC_LIBS to use the static libs located in lib/VC/static
if (OPENSSL_MSVC_STATIC_RT)
set(_OPENSSL_MSVC_RT_MODE "MT")
else ()
set(_OPENSSL_MSVC_RT_MODE "MD")
endif ()
# Since OpenSSL 1.1, lib names are like libcrypto32MTd.lib and libssl32MTd.lib
if( "${CMAKE_SIZEOF_VOID_P}" STREQUAL "8" )
set(_OPENSSL_MSVC_ARCH_SUFFIX "64")
else()
set(_OPENSSL_MSVC_ARCH_SUFFIX "32")
endif()
if(OPENSSL_USE_STATIC_LIBS)
set(_OPENSSL_PATH_SUFFIXES
"lib/VC/static"
"VC/static"
"lib"
)
else()
set(_OPENSSL_PATH_SUFFIXES
"lib/VC"
"VC"
"lib"
)
endif ()
find_library(LIB_EAY_DEBUG
NAMES
libcrypto${_OPENSSL_MSVC_ARCH_SUFFIX}${_OPENSSL_MSVC_RT_MODE}d
libcrypto${_OPENSSL_MSVC_RT_MODE}d
libcryptod
libeay32${_OPENSSL_MSVC_RT_MODE}d
libeay32d
cryptod
NAMES_PER_DIR
${_OPENSSL_ROOT_HINTS_AND_PATHS}
PATH_SUFFIXES
${_OPENSSL_PATH_SUFFIXES}
)
find_library(LIB_EAY_RELEASE
NAMES
libcrypto${_OPENSSL_MSVC_ARCH_SUFFIX}${_OPENSSL_MSVC_RT_MODE}
libcrypto${_OPENSSL_MSVC_RT_MODE}
libcrypto
libeay32${_OPENSSL_MSVC_RT_MODE}
libeay32
crypto
NAMES_PER_DIR
${_OPENSSL_ROOT_HINTS_AND_PATHS}
PATH_SUFFIXES
${_OPENSSL_PATH_SUFFIXES}
)
find_library(SSL_EAY_DEBUG
NAMES
libssl${_OPENSSL_MSVC_ARCH_SUFFIX}${_OPENSSL_MSVC_RT_MODE}d
libssl${_OPENSSL_MSVC_RT_MODE}d
libssld
ssleay32${_OPENSSL_MSVC_RT_MODE}d
ssleay32d
ssld
NAMES_PER_DIR
${_OPENSSL_ROOT_HINTS_AND_PATHS}
PATH_SUFFIXES
${_OPENSSL_PATH_SUFFIXES}
)
find_library(SSL_EAY_RELEASE
NAMES
libssl${_OPENSSL_MSVC_ARCH_SUFFIX}${_OPENSSL_MSVC_RT_MODE}
libssl${_OPENSSL_MSVC_RT_MODE}
libssl
ssleay32${_OPENSSL_MSVC_RT_MODE}
ssleay32
ssl
NAMES_PER_DIR
${_OPENSSL_ROOT_HINTS_AND_PATHS}
PATH_SUFFIXES
${_OPENSSL_PATH_SUFFIXES}
)
set(LIB_EAY_LIBRARY_DEBUG "${LIB_EAY_DEBUG}")
set(LIB_EAY_LIBRARY_RELEASE "${LIB_EAY_RELEASE}")
set(SSL_EAY_LIBRARY_DEBUG "${SSL_EAY_DEBUG}")
set(SSL_EAY_LIBRARY_RELEASE "${SSL_EAY_RELEASE}")
include(${CMAKE_CURRENT_LIST_DIR}/SelectLibraryConfigurations.cmake)
select_library_configurations(LIB_EAY)
select_library_configurations(SSL_EAY)
mark_as_advanced(LIB_EAY_LIBRARY_DEBUG LIB_EAY_LIBRARY_RELEASE
SSL_EAY_LIBRARY_DEBUG SSL_EAY_LIBRARY_RELEASE)
set(OPENSSL_SSL_LIBRARY ${SSL_EAY_LIBRARY} )
set(OPENSSL_CRYPTO_LIBRARY ${LIB_EAY_LIBRARY} )
elseif(MINGW)
# same player, for MinGW
set(LIB_EAY_NAMES crypto libeay32)
set(SSL_EAY_NAMES ssl ssleay32)
find_library(LIB_EAY
NAMES
${LIB_EAY_NAMES}
NAMES_PER_DIR
${_OPENSSL_ROOT_HINTS_AND_PATHS}
PATH_SUFFIXES
"lib/MinGW"
"lib"
)
find_library(SSL_EAY
NAMES
${SSL_EAY_NAMES}
NAMES_PER_DIR
${_OPENSSL_ROOT_HINTS_AND_PATHS}
PATH_SUFFIXES
"lib/MinGW"
"lib"
)
mark_as_advanced(SSL_EAY LIB_EAY)
set(OPENSSL_SSL_LIBRARY ${SSL_EAY} )
set(OPENSSL_CRYPTO_LIBRARY ${LIB_EAY} )
unset(LIB_EAY_NAMES)
unset(SSL_EAY_NAMES)
else()
# Not sure what to pick for -say- intel, let's use the toplevel ones and hope someone report issues:
find_library(LIB_EAY
NAMES
libcrypto
libeay32
NAMES_PER_DIR
${_OPENSSL_ROOT_HINTS_AND_PATHS}
HINTS
${_OPENSSL_LIBDIR}
PATH_SUFFIXES
lib
)
find_library(SSL_EAY
NAMES
libssl
ssleay32
NAMES_PER_DIR
${_OPENSSL_ROOT_HINTS_AND_PATHS}
HINTS
${_OPENSSL_LIBDIR}
PATH_SUFFIXES
lib
)
mark_as_advanced(SSL_EAY LIB_EAY)
set(OPENSSL_SSL_LIBRARY ${SSL_EAY} )
set(OPENSSL_CRYPTO_LIBRARY ${LIB_EAY} )
endif()
else()
find_library(OPENSSL_SSL_LIBRARY
NAMES
ssl
ssleay32
ssleay32MD
NAMES_PER_DIR
${_OPENSSL_ROOT_HINTS_AND_PATHS}
HINTS
${_OPENSSL_LIBDIR}
PATH_SUFFIXES
lib
)
find_library(OPENSSL_CRYPTO_LIBRARY
NAMES
crypto
NAMES_PER_DIR
${_OPENSSL_ROOT_HINTS_AND_PATHS}
HINTS
${_OPENSSL_LIBDIR}
PATH_SUFFIXES
lib
)
mark_as_advanced(OPENSSL_CRYPTO_LIBRARY OPENSSL_SSL_LIBRARY)
endif()
# compat defines
set(OPENSSL_SSL_LIBRARIES ${OPENSSL_SSL_LIBRARY})
set(OPENSSL_CRYPTO_LIBRARIES ${OPENSSL_CRYPTO_LIBRARY})
_OpenSSL_test_and_find_dependencies("${OPENSSL_SSL_LIBRARY}" "${OPENSSL_CRYPTO_LIBRARY}")
if(_OpenSSL_has_dependencies)
_OpenSSL_add_dependencies( OPENSSL_SSL_LIBRARIES "${OPENSSL_SSL_LIBRARY}" )
_OpenSSL_add_dependencies( OPENSSL_CRYPTO_LIBRARIES "${OPENSSL_CRYPTO_LIBRARY}" )
endif()
function(from_hex HEX DEC)
string(TOUPPER "${HEX}" HEX)
set(_res 0)
string(LENGTH "${HEX}" _strlen)
while (_strlen GREATER 0)
math(EXPR _res "${_res} * 16")
string(SUBSTRING "${HEX}" 0 1 NIBBLE)
string(SUBSTRING "${HEX}" 1 -1 HEX)
if (NIBBLE STREQUAL "A")
math(EXPR _res "${_res} + 10")
elseif (NIBBLE STREQUAL "B")
math(EXPR _res "${_res} + 11")
elseif (NIBBLE STREQUAL "C")
math(EXPR _res "${_res} + 12")
elseif (NIBBLE STREQUAL "D")
math(EXPR _res "${_res} + 13")
elseif (NIBBLE STREQUAL "E")
math(EXPR _res "${_res} + 14")
elseif (NIBBLE STREQUAL "F")
math(EXPR _res "${_res} + 15")
else()
math(EXPR _res "${_res} + ${NIBBLE}")
endif()
string(LENGTH "${HEX}" _strlen)
endwhile()
set(${DEC} ${_res} PARENT_SCOPE)
endfunction()
if(OPENSSL_INCLUDE_DIR AND EXISTS "${OPENSSL_INCLUDE_DIR}/openssl/opensslv.h")
file(STRINGS "${OPENSSL_INCLUDE_DIR}/openssl/opensslv.h" openssl_version_str
REGEX "^#[\t ]*define[\t ]+OPENSSL_VERSION_NUMBER[\t ]+0x([0-9a-fA-F])+.*")
if(openssl_version_str)
# The version number is encoded as 0xMNNFFPPS: major minor fix patch status
# The status gives if this is a developer or prerelease and is ignored here.
# Major, minor, and fix directly translate into the version numbers shown in
# the string. The patch field translates to the single character suffix that
# indicates the bug fix state, which 00 -> nothing, 01 -> a, 02 -> b and so
# on.
string(REGEX REPLACE "^.*OPENSSL_VERSION_NUMBER[\t ]+0x([0-9a-fA-F])([0-9a-fA-F][0-9a-fA-F])([0-9a-fA-F][0-9a-fA-F])([0-9a-fA-F][0-9a-fA-F])([0-9a-fA-F]).*$"
"\\1;\\2;\\3;\\4;\\5" OPENSSL_VERSION_LIST "${openssl_version_str}")
list(GET OPENSSL_VERSION_LIST 0 OPENSSL_VERSION_MAJOR)
list(GET OPENSSL_VERSION_LIST 1 OPENSSL_VERSION_MINOR)
from_hex("${OPENSSL_VERSION_MINOR}" OPENSSL_VERSION_MINOR)
list(GET OPENSSL_VERSION_LIST 2 OPENSSL_VERSION_FIX)
from_hex("${OPENSSL_VERSION_FIX}" OPENSSL_VERSION_FIX)
list(GET OPENSSL_VERSION_LIST 3 OPENSSL_VERSION_PATCH)
if (NOT OPENSSL_VERSION_PATCH STREQUAL "00")
from_hex("${OPENSSL_VERSION_PATCH}" _tmp)
# 96 is the ASCII code of 'a' minus 1
math(EXPR OPENSSL_VERSION_PATCH_ASCII "${_tmp} + 96")
unset(_tmp)
# Once anyone knows how OpenSSL would call the patch versions beyond 'z'
# this should be updated to handle that, too. This has not happened yet
# so it is simply ignored here for now.
string(ASCII "${OPENSSL_VERSION_PATCH_ASCII}" OPENSSL_VERSION_PATCH_STRING)
endif ()
set(OPENSSL_VERSION "${OPENSSL_VERSION_MAJOR}.${OPENSSL_VERSION_MINOR}.${OPENSSL_VERSION_FIX}${OPENSSL_VERSION_PATCH_STRING}")
endif ()
endif ()
set(OPENSSL_LIBRARIES ${OPENSSL_SSL_LIBRARIES} ${OPENSSL_CRYPTO_LIBRARIES} )
list(REMOVE_DUPLICATES OPENSSL_LIBRARIES)
foreach(_comp IN LISTS OpenSSL_FIND_COMPONENTS)
if(_comp STREQUAL "Crypto")
if(EXISTS "${OPENSSL_INCLUDE_DIR}" AND
(EXISTS "${OPENSSL_CRYPTO_LIBRARY}" OR
EXISTS "${LIB_EAY_LIBRARY_DEBUG}" OR
EXISTS "${LIB_EAY_LIBRARY_RELEASE}")
)
set(OpenSSL_${_comp}_FOUND TRUE)
else()
set(OpenSSL_${_comp}_FOUND FALSE)
endif()
elseif(_comp STREQUAL "SSL")
if(EXISTS "${OPENSSL_INCLUDE_DIR}" AND
(EXISTS "${OPENSSL_SSL_LIBRARY}" OR
EXISTS "${SSL_EAY_LIBRARY_DEBUG}" OR
EXISTS "${SSL_EAY_LIBRARY_RELEASE}")
)
set(OpenSSL_${_comp}_FOUND TRUE)
else()
set(OpenSSL_${_comp}_FOUND FALSE)
endif()
else()
message(WARNING "${_comp} is not a valid OpenSSL component")
set(OpenSSL_${_comp}_FOUND FALSE)
endif()
endforeach()
unset(_comp)
include(${CMAKE_CURRENT_LIST_DIR}/FindPackageHandleStandardArgs.cmake)
find_package_handle_standard_args(OpenSSL
REQUIRED_VARS
OPENSSL_CRYPTO_LIBRARY
OPENSSL_INCLUDE_DIR
VERSION_VAR
OPENSSL_VERSION
HANDLE_COMPONENTS
FAIL_MESSAGE
"Could NOT find OpenSSL, try to set the path to OpenSSL root folder in the system variable OPENSSL_ROOT_DIR"
)
mark_as_advanced(OPENSSL_INCLUDE_DIR OPENSSL_LIBRARIES)
if(OPENSSL_FOUND)
if(NOT TARGET OpenSSL::Crypto AND
(EXISTS "${OPENSSL_CRYPTO_LIBRARY}" OR
EXISTS "${LIB_EAY_LIBRARY_DEBUG}" OR
EXISTS "${LIB_EAY_LIBRARY_RELEASE}")
)
add_library(OpenSSL::Crypto UNKNOWN IMPORTED)
set_target_properties(OpenSSL::Crypto PROPERTIES
INTERFACE_INCLUDE_DIRECTORIES "${OPENSSL_INCLUDE_DIR}")
if(EXISTS "${OPENSSL_CRYPTO_LIBRARY}")
set_target_properties(OpenSSL::Crypto PROPERTIES
IMPORTED_LINK_INTERFACE_LANGUAGES "C"
IMPORTED_LOCATION "${OPENSSL_CRYPTO_LIBRARY}")
endif()
if(EXISTS "${LIB_EAY_LIBRARY_RELEASE}")
set_property(TARGET OpenSSL::Crypto APPEND PROPERTY
IMPORTED_CONFIGURATIONS RELEASE)
set_target_properties(OpenSSL::Crypto PROPERTIES
IMPORTED_LINK_INTERFACE_LANGUAGES_RELEASE "C"
IMPORTED_LOCATION_RELEASE "${LIB_EAY_LIBRARY_RELEASE}")
endif()
if(EXISTS "${LIB_EAY_LIBRARY_DEBUG}")
set_property(TARGET OpenSSL::Crypto APPEND PROPERTY
IMPORTED_CONFIGURATIONS DEBUG)
set_target_properties(OpenSSL::Crypto PROPERTIES
IMPORTED_LINK_INTERFACE_LANGUAGES_DEBUG "C"
IMPORTED_LOCATION_DEBUG "${LIB_EAY_LIBRARY_DEBUG}")
endif()
_OpenSSL_target_add_dependencies(OpenSSL::Crypto)
endif()
if(NOT TARGET OpenSSL::SSL AND
(EXISTS "${OPENSSL_SSL_LIBRARY}" OR
EXISTS "${SSL_EAY_LIBRARY_DEBUG}" OR
EXISTS "${SSL_EAY_LIBRARY_RELEASE}")
)
add_library(OpenSSL::SSL UNKNOWN IMPORTED)
set_target_properties(OpenSSL::SSL PROPERTIES
INTERFACE_INCLUDE_DIRECTORIES "${OPENSSL_INCLUDE_DIR}")
if(EXISTS "${OPENSSL_SSL_LIBRARY}")
set_target_properties(OpenSSL::SSL PROPERTIES
IMPORTED_LINK_INTERFACE_LANGUAGES "C"
IMPORTED_LOCATION "${OPENSSL_SSL_LIBRARY}")
endif()
if(EXISTS "${SSL_EAY_LIBRARY_RELEASE}")
set_property(TARGET OpenSSL::SSL APPEND PROPERTY
IMPORTED_CONFIGURATIONS RELEASE)
set_target_properties(OpenSSL::SSL PROPERTIES
IMPORTED_LINK_INTERFACE_LANGUAGES_RELEASE "C"
IMPORTED_LOCATION_RELEASE "${SSL_EAY_LIBRARY_RELEASE}")
endif()
if(EXISTS "${SSL_EAY_LIBRARY_DEBUG}")
set_property(TARGET OpenSSL::SSL APPEND PROPERTY
IMPORTED_CONFIGURATIONS DEBUG)
set_target_properties(OpenSSL::SSL PROPERTIES
IMPORTED_LINK_INTERFACE_LANGUAGES_DEBUG "C"
IMPORTED_LOCATION_DEBUG "${SSL_EAY_LIBRARY_DEBUG}")
endif()
if(TARGET OpenSSL::Crypto)
set_target_properties(OpenSSL::SSL PROPERTIES
INTERFACE_LINK_LIBRARIES OpenSSL::Crypto)
endif()
_OpenSSL_target_add_dependencies(OpenSSL::SSL)
endif()
endif()
# Restore the original find library ordering
if(OPENSSL_USE_STATIC_LIBS)
set(CMAKE_FIND_LIBRARY_SUFFIXES ${_openssl_ORIG_CMAKE_FIND_LIBRARY_SUFFIXES})
endif()

View File

@@ -1,268 +0,0 @@
# Distributed under the OSI-approved BSD 3-Clause License. See accompanying
# file Copyright.txt or https://cmake.org/licensing for details.
include(${CMAKE_CURRENT_LIST_DIR}/FindPackageMessage.cmake)
# internal helper macro
macro(_FPHSA_FAILURE_MESSAGE _msg)
set (__msg "${_msg}")
if (FPHSA_REASON_FAILURE_MESSAGE)
string(APPEND __msg "\n Reason given by package: ${FPHSA_REASON_FAILURE_MESSAGE}\n")
endif()
if (${_NAME}_FIND_REQUIRED)
message(FATAL_ERROR "${__msg}")
else ()
if (NOT ${_NAME}_FIND_QUIETLY)
message(STATUS "${__msg}")
endif ()
endif ()
endmacro()
# internal helper macro to generate the failure message when used in CONFIG_MODE:
macro(_FPHSA_HANDLE_FAILURE_CONFIG_MODE)
# <PackageName>_CONFIG is set, but FOUND is false, this means that some other of the REQUIRED_VARS was not found:
if(${_NAME}_CONFIG)
_FPHSA_FAILURE_MESSAGE("${FPHSA_FAIL_MESSAGE}: missing:${MISSING_VARS} (found ${${_NAME}_CONFIG} ${VERSION_MSG})")
else()
# If _CONSIDERED_CONFIGS is set, the config-file has been found, but no suitable version.
# List them all in the error message:
if(${_NAME}_CONSIDERED_CONFIGS)
set(configsText "")
list(LENGTH ${_NAME}_CONSIDERED_CONFIGS configsCount)
math(EXPR configsCount "${configsCount} - 1")
foreach(currentConfigIndex RANGE ${configsCount})
list(GET ${_NAME}_CONSIDERED_CONFIGS ${currentConfigIndex} filename)
list(GET ${_NAME}_CONSIDERED_VERSIONS ${currentConfigIndex} version)
string(APPEND configsText "\n ${filename} (version ${version})")
endforeach()
if (${_NAME}_NOT_FOUND_MESSAGE)
if (FPHSA_REASON_FAILURE_MESSAGE)
string(PREPEND FPHSA_REASON_FAILURE_MESSAGE "${${_NAME}_NOT_FOUND_MESSAGE}\n ")
else()
set(FPHSA_REASON_FAILURE_MESSAGE "${${_NAME}_NOT_FOUND_MESSAGE}")
endif()
else()
string(APPEND configsText "\n")
endif()
_FPHSA_FAILURE_MESSAGE("${FPHSA_FAIL_MESSAGE} ${VERSION_MSG}, checked the following files:${configsText}")
else()
# Simple case: No Config-file was found at all:
_FPHSA_FAILURE_MESSAGE("${FPHSA_FAIL_MESSAGE}: found neither ${_NAME}Config.cmake nor ${_NAME_LOWER}-config.cmake ${VERSION_MSG}")
endif()
endif()
endmacro()
function(FIND_PACKAGE_HANDLE_STANDARD_ARGS _NAME _FIRST_ARG)
# Set up the arguments for `cmake_parse_arguments`.
set(options CONFIG_MODE HANDLE_COMPONENTS)
set(oneValueArgs FAIL_MESSAGE REASON_FAILURE_MESSAGE VERSION_VAR FOUND_VAR)
set(multiValueArgs REQUIRED_VARS)
# Check whether we are in 'simple' or 'extended' mode:
set(_KEYWORDS_FOR_EXTENDED_MODE ${options} ${oneValueArgs} ${multiValueArgs} )
list(FIND _KEYWORDS_FOR_EXTENDED_MODE "${_FIRST_ARG}" INDEX)
if(${INDEX} EQUAL -1)
set(FPHSA_FAIL_MESSAGE ${_FIRST_ARG})
set(FPHSA_REQUIRED_VARS ${ARGN})
set(FPHSA_VERSION_VAR)
else()
cmake_parse_arguments(FPHSA "${options}" "${oneValueArgs}" "${multiValueArgs}" ${_FIRST_ARG} ${ARGN})
if(FPHSA_UNPARSED_ARGUMENTS)
message(FATAL_ERROR "Unknown keywords given to FIND_PACKAGE_HANDLE_STANDARD_ARGS(): \"${FPHSA_UNPARSED_ARGUMENTS}\"")
endif()
if(NOT FPHSA_FAIL_MESSAGE)
set(FPHSA_FAIL_MESSAGE "DEFAULT_MSG")
endif()
# In config-mode, we rely on the variable <PackageName>_CONFIG, which is set by find_package()
# when it successfully found the config-file, including version checking:
if(FPHSA_CONFIG_MODE)
list(INSERT FPHSA_REQUIRED_VARS 0 ${_NAME}_CONFIG)
list(REMOVE_DUPLICATES FPHSA_REQUIRED_VARS)
set(FPHSA_VERSION_VAR ${_NAME}_VERSION)
endif()
if(NOT FPHSA_REQUIRED_VARS)
message(FATAL_ERROR "No REQUIRED_VARS specified for FIND_PACKAGE_HANDLE_STANDARD_ARGS()")
endif()
endif()
# now that we collected all arguments, process them
if("x${FPHSA_FAIL_MESSAGE}" STREQUAL "xDEFAULT_MSG")
set(FPHSA_FAIL_MESSAGE "Could NOT find ${_NAME}")
endif()
list(GET FPHSA_REQUIRED_VARS 0 _FIRST_REQUIRED_VAR)
string(TOUPPER ${_NAME} _NAME_UPPER)
string(TOLOWER ${_NAME} _NAME_LOWER)
if(FPHSA_FOUND_VAR)
if(FPHSA_FOUND_VAR MATCHES "^${_NAME}_FOUND$" OR FPHSA_FOUND_VAR MATCHES "^${_NAME_UPPER}_FOUND$")
set(_FOUND_VAR ${FPHSA_FOUND_VAR})
else()
message(FATAL_ERROR "The argument for FOUND_VAR is \"${FPHSA_FOUND_VAR}\", but only \"${_NAME}_FOUND\" and \"${_NAME_UPPER}_FOUND\" are valid names.")
endif()
else()
set(_FOUND_VAR ${_NAME_UPPER}_FOUND)
endif()
# collect all variables which were not found, so they can be printed, so the
# user knows better what went wrong (#6375)
set(MISSING_VARS "")
set(DETAILS "")
# check if all passed variables are valid
set(FPHSA_FOUND_${_NAME} TRUE)
foreach(_CURRENT_VAR ${FPHSA_REQUIRED_VARS})
if(NOT ${_CURRENT_VAR})
set(FPHSA_FOUND_${_NAME} FALSE)
string(APPEND MISSING_VARS " ${_CURRENT_VAR}")
else()
string(APPEND DETAILS "[${${_CURRENT_VAR}}]")
endif()
endforeach()
if(FPHSA_FOUND_${_NAME})
set(${_NAME}_FOUND TRUE)
set(${_NAME_UPPER}_FOUND TRUE)
else()
set(${_NAME}_FOUND FALSE)
set(${_NAME_UPPER}_FOUND FALSE)
endif()
# component handling
unset(FOUND_COMPONENTS_MSG)
unset(MISSING_COMPONENTS_MSG)
if(FPHSA_HANDLE_COMPONENTS)
foreach(comp ${${_NAME}_FIND_COMPONENTS})
if(${_NAME}_${comp}_FOUND)
if(NOT DEFINED FOUND_COMPONENTS_MSG)
set(FOUND_COMPONENTS_MSG "found components:")
endif()
string(APPEND FOUND_COMPONENTS_MSG " ${comp}")
else()
if(NOT DEFINED MISSING_COMPONENTS_MSG)
set(MISSING_COMPONENTS_MSG "missing components:")
endif()
string(APPEND MISSING_COMPONENTS_MSG " ${comp}")
if(${_NAME}_FIND_REQUIRED_${comp})
set(${_NAME}_FOUND FALSE)
string(APPEND MISSING_VARS " ${comp}")
endif()
endif()
endforeach()
set(COMPONENT_MSG "${FOUND_COMPONENTS_MSG} ${MISSING_COMPONENTS_MSG}")
string(APPEND DETAILS "[c${COMPONENT_MSG}]")
endif()
# version handling:
set(VERSION_MSG "")
set(VERSION_OK TRUE)
# check with DEFINED here as the requested or found version may be "0"
if (DEFINED ${_NAME}_FIND_VERSION)
if(DEFINED ${FPHSA_VERSION_VAR})
set(_FOUND_VERSION ${${FPHSA_VERSION_VAR}})
if(${_NAME}_FIND_VERSION_EXACT) # exact version required
# count the dots in the version string
string(REGEX REPLACE "[^.]" "" _VERSION_DOTS "${_FOUND_VERSION}")
# add one dot because there is one dot more than there are components
string(LENGTH "${_VERSION_DOTS}." _VERSION_DOTS)
if (_VERSION_DOTS GREATER ${_NAME}_FIND_VERSION_COUNT)
# Because of the C++ implementation of find_package() ${_NAME}_FIND_VERSION_COUNT
# is at most 4 here. Therefore a simple lookup table is used.
if (${_NAME}_FIND_VERSION_COUNT EQUAL 1)
set(_VERSION_REGEX "[^.]*")
elseif (${_NAME}_FIND_VERSION_COUNT EQUAL 2)
set(_VERSION_REGEX "[^.]*\\.[^.]*")
elseif (${_NAME}_FIND_VERSION_COUNT EQUAL 3)
set(_VERSION_REGEX "[^.]*\\.[^.]*\\.[^.]*")
else ()
set(_VERSION_REGEX "[^.]*\\.[^.]*\\.[^.]*\\.[^.]*")
endif ()
string(REGEX REPLACE "^(${_VERSION_REGEX})\\..*" "\\1" _VERSION_HEAD "${_FOUND_VERSION}")
unset(_VERSION_REGEX)
if (NOT ${_NAME}_FIND_VERSION VERSION_EQUAL _VERSION_HEAD)
set(VERSION_MSG "Found unsuitable version \"${_FOUND_VERSION}\", but required is exact version \"${${_NAME}_FIND_VERSION}\"")
set(VERSION_OK FALSE)
else ()
set(VERSION_MSG "(found suitable exact version \"${_FOUND_VERSION}\")")
endif ()
unset(_VERSION_HEAD)
else ()
if (NOT ${_NAME}_FIND_VERSION VERSION_EQUAL _FOUND_VERSION)
set(VERSION_MSG "Found unsuitable version \"${_FOUND_VERSION}\", but required is exact version \"${${_NAME}_FIND_VERSION}\"")
set(VERSION_OK FALSE)
else ()
set(VERSION_MSG "(found suitable exact version \"${_FOUND_VERSION}\")")
endif ()
endif ()
unset(_VERSION_DOTS)
else() # minimum version specified:
if (${_NAME}_FIND_VERSION VERSION_GREATER _FOUND_VERSION)
set(VERSION_MSG "Found unsuitable version \"${_FOUND_VERSION}\", but required is at least \"${${_NAME}_FIND_VERSION}\"")
set(VERSION_OK FALSE)
else ()
set(VERSION_MSG "(found suitable version \"${_FOUND_VERSION}\", minimum required is \"${${_NAME}_FIND_VERSION}\")")
endif ()
endif()
else()
# if the package was not found, but a version was given, add that to the output:
if(${_NAME}_FIND_VERSION_EXACT)
set(VERSION_MSG "(Required is exact version \"${${_NAME}_FIND_VERSION}\")")
else()
set(VERSION_MSG "(Required is at least version \"${${_NAME}_FIND_VERSION}\")")
endif()
endif()
else ()
# Check with DEFINED as the found version may be 0.
if(DEFINED ${FPHSA_VERSION_VAR})
set(VERSION_MSG "(found version \"${${FPHSA_VERSION_VAR}}\")")
endif()
endif ()
if(VERSION_OK)
string(APPEND DETAILS "[v${${FPHSA_VERSION_VAR}}(${${_NAME}_FIND_VERSION})]")
else()
set(${_NAME}_FOUND FALSE)
endif()
# print the result:
if (${_NAME}_FOUND)
FIND_PACKAGE_MESSAGE(${_NAME} "Found ${_NAME}: ${${_FIRST_REQUIRED_VAR}} ${VERSION_MSG} ${COMPONENT_MSG}" "${DETAILS}")
else ()
if(FPHSA_CONFIG_MODE)
_FPHSA_HANDLE_FAILURE_CONFIG_MODE()
else()
if(NOT VERSION_OK)
_FPHSA_FAILURE_MESSAGE("${FPHSA_FAIL_MESSAGE}: ${VERSION_MSG} (found ${${_FIRST_REQUIRED_VAR}})")
else()
_FPHSA_FAILURE_MESSAGE("${FPHSA_FAIL_MESSAGE} (missing:${MISSING_VARS}) ${VERSION_MSG}")
endif()
endif()
endif ()
set(${_NAME}_FOUND ${${_NAME}_FOUND} PARENT_SCOPE)
set(${_NAME_UPPER}_FOUND ${${_NAME}_FOUND} PARENT_SCOPE)
endfunction()

View File

@@ -1,48 +0,0 @@
# Distributed under the OSI-approved BSD 3-Clause License. See accompanying
# file Copyright.txt or https://cmake.org/licensing for details.
#[=======================================================================[.rst:
FindPackageMessage
------------------
.. code-block:: cmake
find_package_message(<name> "message for user" "find result details")
This function is intended to be used in FindXXX.cmake modules files.
It will print a message once for each unique find result. This is
useful for telling the user where a package was found. The first
argument specifies the name (XXX) of the package. The second argument
specifies the message to display. The third argument lists details
about the find result so that if they change the message will be
displayed again. The macro also obeys the QUIET argument to the
find_package command.
Example:
.. code-block:: cmake
if(X11_FOUND)
find_package_message(X11 "Found X11: ${X11_X11_LIB}"
"[${X11_X11_LIB}][${X11_INCLUDE_DIR}]")
else()
...
endif()
#]=======================================================================]
function(find_package_message pkg msg details)
# Avoid printing a message repeatedly for the same find result.
if(NOT ${pkg}_FIND_QUIETLY)
string(REPLACE "\n" "" details "${details}")
set(DETAILS_VAR FIND_PACKAGE_MESSAGE_DETAILS_${pkg})
if(NOT "${details}" STREQUAL "${${DETAILS_VAR}}")
# The message has not yet been printed.
message(STATUS "${msg}")
# Save the find details in the cache to avoid printing the same
# message again.
set("${DETAILS_VAR}" "${details}"
CACHE INTERNAL "Details about finding ${pkg}")
endif()
endif()
endfunction()

View File

@@ -3,7 +3,7 @@ MAINTAINER simon987 <me@simon987.net>
RUN apt update
RUN apt install -y libglib2.0-0 libcurl4 libmagic1 libharfbuzz-bin libopenjp2-7 libarchive13 liblzma5 libzstd1 liblz4-1 \
curl libtiff5 libpng16-16
curl libtiff5 libpng16-16 libpcre3
RUN mkdir -p /usr/share/tessdata && \
cd /usr/share/tessdata/ && \
@@ -16,4 +16,7 @@ RUN mkdir -p /usr/share/tessdata && \
ADD sist2 /root/sist2
ENV LANG C.UTF-8
ENV LC_ALL C.UTF-8
ENTRYPOINT ["/root/sist2"]

View File

@@ -1,10 +1,14 @@
rm ./sist2
cp ../sist2 .
rm ./sist2 sist2_debug
cp ../sist2.gz .
gzip -d sist2.gz
strip sist2
version=$(./sist2 --version)
echo "Version ${version}"
docker build . -t simon987/sist2:${version} -t simon987/sist2:latest
docker push simon987/sist2:${version}
docker push simon987/sist2:latest
docker run --rm simon987/sist2 -v

22
DockerArm64/Dockerfile Normal file
View File

@@ -0,0 +1,22 @@
FROM ubuntu:19.10
MAINTAINER simon987 <me@simon987.net>
RUN apt update
RUN apt install -y libglib2.0-0 libcurl4 libmagic1 libharfbuzz-bin libopenjp2-7 libarchive13 liblzma5 libzstd1 liblz4-1 \
curl libtiff5 libpng16-16 libpcre3
RUN mkdir -p /usr/share/tessdata && \
cd /usr/share/tessdata/ && \
curl -o /usr/share/tessdata/hin.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/hin.traineddata &&\
curl -o /usr/share/tessdata/jpn.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/jpn.traineddata &&\
curl -o /usr/share/tessdata/eng.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/eng.traineddata &&\
curl -o /usr/share/tessdata/fra.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/fra.traineddata &&\
curl -o /usr/share/tessdata/rus.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/rus.traineddata &&\
curl -o /usr/share/tessdata/spa.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/spa.traineddata && ls -lh
ADD sist2_arm64 /root/sist2
ENV LANG C.UTF-8
ENV LC_ALL C.UTF-8
ENTRYPOINT ["/root/sist2"]

13
DockerArm64/build.sh Executable file
View File

@@ -0,0 +1,13 @@
rm ./sist2_arm64
cp ../sist2_arm64.gz .
gzip -d sist2_arm64.gz
version=$(./sist2_arm64 --version)
echo "Version ${version}"
docker build . -t simon987/sist2-arm64:"${version}" -t simon987/sist2-arm64:latest
docker push simon987/sist2-arm64:"${version}"
docker push simon987/sist2-arm64:latest
docker run --rm simon987/sist2-arm64 -v

123
README.md
View File

@@ -1,6 +1,8 @@
![GitHub](https://img.shields.io/github/license/simon987/sist2.svg)
[![CodeFactor](https://www.codefactor.io/repository/github/simon987/sist2/badge?s=05daa325188aac4eae32c786f3d9cf4e0593f822)](https://www.codefactor.io/repository/github/simon987/sist2)
[![Development snapshots](https://ci.simon987.net/app/rest/builds/buildType(Sist2_Build)/statusIcon)](https://files.simon987.net/artifacts/Sist2/Build/)
[![Development snapshots](https://ci.simon987.net/api/badges/simon987/sist2/status.svg)](https://files.simon987.net/sist2/simon987_sist2/)
**Demo**: [sist2.simon987.net](https://sist2.simon987.net/?i=Demo%20files)
# sist2
@@ -8,95 +10,83 @@ sist2 (Simple incremental search tool)
*Warning: sist2 is in early development*
![sist2.png](docs/sist2.png)
## Features
* Fast, low memory usage, multi-threaded
* Mobile-friendly Web interface
* Portable (all its features are packaged in a single executable)
* Extracts text from common file types \*
* Extracts text and metadata from common file types \*
* Generates thumbnails \*
* Incremental scanning
* Automatic tagging from file attributes via [user scripts](scripting/README.md)
* Manual tagging from the UI and automatic tagging based on file attributes via [user scripts](docs/scripting.md)
* Recursive scan inside archive files \*\*
* OCR support with tesseract \*\*\*
* Stats page & disk utilisation visualization
\* See [format support](#format-support)
\*\* See [Archive files](#archive-files)
\*\*\* See [OCR](#ocr)
![stats](docs/stats.png)
## Getting Started
1. Have an [Elasticsearch](https://www.elastic.co/downloads/elasticsearch) instance running
1.
1. Have an Elasticsearch (>= 6.X.X) instance running
1. Download [from official website](https://www.elastic.co/downloads/elasticsearch)
1. *(or)* Run using docker:
```bash
docker run -d --name es1 --net sist2_net -p 9200:9200 \
-e "discovery.type=single-node" elasticsearch:7.5.2
```
1. *(or)* Run using docker-compose:
```yaml
elasticsearch:
image: docker.elastic.co/elasticsearch/elasticsearch:7.5.2
environment:
- discovery.type=single-node
- "ES_JAVA_OPTS=-Xms1G -Xmx2G"
```
1. Download sist2 executable
1. Download the [latest sist2 release](https://github.com/simon987/sist2/releases) *
1. *(or)* Download an [development snapshot](https://files.simon987.net/artifacts/Sist2/Build/) *(Not recommended!)*
1. *(or)* Download a [development snapshot](https://files.simon987.net/sist2/simon987_sist2/) *(Not recommended!)*
1. *(or)* `docker pull simon987/sist2:latest`
1. See [Usage guide](docs/USAGE.md)
\* *Windows users*: **sist2** runs under [WSL](https://en.wikipedia.org/wiki/Windows_Subsystem_for_Linux)
\* *Mac users*: See [#1](https://github.com/simon987/sist2/issues/1)
## Example usage
See help page `sist2 --help` for more details.
See [Usage guide](docs/USAGE.md) for more details
**Scan a directory**
```bash
sist2 scan ~/Documents -o ./orig_idx/
sist2 scan --threads 4 --content-size 16384 /mnt/Pictures
sist2 scan --incremental ./orig_idx/ -o ./updated_idx/ ~/Documents
```
**Push index to Elasticsearch or file**
```bash
sist2 index --force-reset ./my_idx
sist2 index --print ./my_idx > raw_documents.ndjson
```
**Start web interface**
```bash
sist2 web --bind 0.0.0.0 --port 4321 ./my_idx1 ./my_idx2 ./my_idx3
```
### Use sist2 with docker
**scan**
```bash
docker run -it \
-v /path/to/files/:/files \
-v $PWD/out/:/out \
simon987/sist2 scan -t 4 /files -o /out/my_idx1
```
**index**
```bash
docker run -it --network host\
-v $PWD/out/:/out \
simon987/sist2 index /out/my_idx1
```
**web**
```bash
docker run --rm --network host -d --name sist2\
-v $PWD/out/my_idx:/idx \
-v $PWD/my/files:/files
simon987/sist2 web --bind 0.0.0.0 /idx
docker stop sist2
```
1. Scan a directory: `sist2 scan ~/Documents -o ./docs_idx`
1. Push index to Elasticsearch: `sist2 index ./docs_idx`
1. Start web interface: `sist2 web ./docs_idx`
## Format support
File type | Library | Content | Thumbnail | Metadata
:---|:---|:---|:---|:---
pdf,xps,cbz,fb2,epub | MuPDF | text+ocr | yes, `png` | title |
`audio/*` | ffmpeg | - | yes, `jpeg` | ID3 tags |
`video/*` | ffmpeg | - | yes, `jpeg` | title, comment, artist |
`image/*` | ffmpeg | - | yes, `jpeg` | `EXIF:Artist`, `EXIF:ImageDescription` |
pdf,xps,fb2,epub | MuPDF | text+ocr | yes | author, title |
cbz,cbr | *(none)* | - | yes | - |
`audio/*` | ffmpeg | - | yes | ID3 tags |
`video/*` | ffmpeg | - | yes | title, comment, artist |
`image/*` | ffmpeg | - | yes | [Common EXIF tags](https://github.com/simon987/sist2/blob/efdde2734eca9b14a54f84568863b7ffd59bdba3/src/parsing/media.c#L190) |
raw, rw2, dng, cr2, crw, dcr, k25, kdc, mrw, pef, xf3, arw, sr2, srf, erf | LibRaw | - | yes | Common EXIF tags |
ttf,ttc,cff,woff,fnt,otf | Freetype2 | - | yes, `bmp` | Name & style |
`text/plain` | *(none)* | yes | no | - |
html, xml | *(none)* | yes | no | - |
tar, zip, rar, 7z, ar ... | Libarchive | yes\* | - | no |
docx, xlsx, pptx | libOPC | yes | no | no |
docx, xlsx, pptx | *(none)* | yes | if embedded | creator, modified_by, title |
doc (MS Word 97-2003) | antiword | yes | yes | author, title |
mobi, azw, azw3 | libmobi | yes | no | author, title |
\* *See [Archive files](#archive-files)*
@@ -106,22 +96,20 @@ they were directly in the file system. Recursive (archives inside archives)
scan is also supported.
**Limitations**:
* Parsing media files with formats that require
*seek* (e.g. `.gif`, `.mp4` w/ fragmented metadata etc.) is not supported.
* Support for parsing media files with formats that require *seek* (e.g. `.gif`, `.mp4` w/ fragmented metadata etc.)
is limitted (see `--mem-buffer` option)
* Archive files are scanned sequentially, by a single thread. On systems where
**sist2** is not I/O bound, scans might be faster when larger archives are split
into smaller parts.
To check if a media file can be parsed without *seek*, execute `cat file.mp4 | ffprobe -`
### OCR
You can enable OCR support for pdf,xps,cbz,fb2,epub file types with the
You can enable OCR support for pdf,xps,fb2,epub file types with the
`--ocr <lang>` option. Download the language data files with your
package manager (`apt install tesseract-ocr-eng`) or directly [from Github](https://github.com/tesseract-ocr/tesseract/wiki/Data-Files).
The `simon987/sist2` github image comes with common languages
The `simon987/sist2` image comes with common languages
(hin, jpn, eng, fra, rus, spa) pre-installed.
Examples
@@ -134,22 +122,17 @@ sist2 scan --ocr eng ~/Books/Textbooks/
## Build from source
You can compile **sist2** by yourself if you don't want to use the pre-compiled
binaries.
binaries (GCC 7+ required).
1. Install compile-time dependencies
*(Debian)*
```bash
apt install git cmake pkg-config libglib2.0-dev \
libssl-dev uuid-dev python3 libmagic-dev libfreetype6-dev \
libcurl-dev libbz2-dev yasm libharfbuzz-dev ragel \
libarchive-dev libtiff5 libpng16-16 libpango1.0-dev
vcpkg install lmdb cjson glib libarchive[core,bzip2,libxml2,lz4,lzma,lzo] pthread tesseract libxml2 ffmpeg zstd gtest mongoose libmagic libraw curl[core,ssl] jbig2dec brotli libmupdf
```
2. Build
```bash
git clone --recurse-submodules https://github.com/simon987/sist2
./scripts/get_static_libs.sh
cmake .
git clone --recursive https://github.com/simon987/sist2/
cmake -DSIST_DEBUG=off -DCMAKE_TOOLCHAIN_FILE=<VCPKG_ROOT>/scripts/buildsystems/vcpkg.cmake .
make
```

Submodule argparse deleted from fafc503d23

1
cJSON

Submodule cJSON deleted from 2d4ad84192

20
ci/build.sh Normal file → Executable file
View File

@@ -1,8 +1,20 @@
#!/usr/bin/env bash
./scripts/get_static_libs.sh
VCPKG_ROOT="/vcpkg"
cmake .
make
rm *.gz
git submodule update --init --recursive
rm -rf CMakeFiles CMakeCache.txt
cmake -DSIST_DEBUG=off -DCMAKE_TOOLCHAIN_FILE="${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" .
make -j 33
strip sist2
strip sist2_scan
mv sist2 sist2-x64-linux
rm -rf CMakeFiles CMakeCache.txt
cmake -DSIST_DEBUG=on -DCMAKE_TOOLCHAIN_FILE="${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" .
make -j 33
cp /usr/lib/x86_64-linux-gnu/libasan.so.2.0.0 libasan.so.2
mv sist2_debug sist2-x64-linux-debug
tar -czf sist2-x64-linux-debug.tar.gz sist2-x64-linux-debug libasan.so.2

13
ci/build_arm64.sh Executable file
View File

@@ -0,0 +1,13 @@
#!/usr/bin/env bash
VCPKG_ROOT="/vcpkg"
rm *.gz
git submodule update --init --recursive
rm -rf CMakeFiles CMakeCache.txt
cmake -DSIST_DEBUG=off -DCMAKE_TOOLCHAIN_FILE="${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" .
make -j 4
strip sist2
mv sist2 sist2-arm64-linux

BIN
demo.gif

Binary file not shown.

Before

Width:  |  Height:  |  Size: 18 MiB

403
docs/USAGE.md Normal file
View File

@@ -0,0 +1,403 @@
# Usage
*More examples (specifically with docker/compose) are in progress*
* [scan](#scan)
* [options](#scan-options)
* [examples](#scan-examples)
* [index format](#index-format)
* [index](#index)
* [options](#index-options)
* [examples](#index-examples)
* [web](#web)
* [options](#web-options)
* [examples](#web-examples)
* [rewrite_url](#rewrite_url)
* [link to specific indices](#link-to-specific-indices)
* [exec-script](#exec-script)
* [tagging](#tagging)
* [sidecar files](#sidecar-files)
```
Usage: sist2 scan [OPTION]... PATH
or: sist2 index [OPTION]... INDEX
or: sist2 web [OPTION]... INDEX...
or: sist2 exec-script [OPTION]... INDEX
Lightning-fast file system indexer and search tool.
-h, --help show this help message and exit
-v, --version Show version and exit
--verbose Turn on logging
--very-verbose Turn on debug messages
Scan options
-t, --threads=<int> Number of threads. DEFAULT=1
-q, --quality=<flt> Thumbnail quality, on a scale of 1.0 to 31.0, 1.0 being the best. DEFAULT=5
--size=<int> Thumbnail size, in pixels. Use negative value to disable. DEFAULT=500
--content-size=<int> Number of bytes to be extracted from text documents. Use negative value to disable. DEFAULT=32768
--incremental=<str> Reuse an existing index and only scan modified files.
-o, --output=<str> Output directory. DEFAULT=index.sist2/
--rewrite-url=<str> Serve files from this url instead of from disk.
--name=<str> Index display name. DEFAULT: (name of the directory)
--depth=<int> Scan up to DEPTH subdirectories deep. Use 0 to only scan files in PATH. DEFAULT: -1
--archive=<str> Archive file mode (skip|list|shallow|recurse). skip: Don't parse, list: only get file names as text, shallow: Don't parse archives inside archives. DEFAULT: recurse
--ocr=<str> Tesseract language (use tesseract --list-langs to see which are installed on your machine)
-e, --exclude=<str> Files that match this regex will not be scanned
--fast Only index file names & mime type
--treemap-threshold=<str> Relative size threshold for treemap (see USAGE.md). DEFAULT: 0.0005
--mem-buffer=<int> Maximum memory buffer size per thread in MB for files inside archives (see USAGE.md). DEFAULT: 2000
Index options
-t, --threads=<int> Number of threads. DEFAULT=1
--es-url=<str> Elasticsearch url with port. DEFAULT=http://localhost:9200
--es-index=<str> Elasticsearch index name. DEFAULT=sist2
-p, --print Just print JSON documents to stdout.
--script-file=<str> Path to user script.
--mappings-file=<str> Path to Elasticsearch mappings.
--settings-file=<str> Path to Elasticsearch settings.
--async-script Execute user script asynchronously.
--batch-size=<int> Index batch size. DEFAULT: 100
-f, --force-reset Reset Elasticsearch mappings and settings. (You must use this option the first time you use the index command)
Web options
--es-url=<str> Elasticsearch url. DEFAULT=http://localhost:9200
--es-index=<str> Elasticsearch index name. DEFAULT=sist2
--bind=<str> Listen on this address. DEFAULT=localhost:4090
--auth=<str> Basic auth in user:password format
--tag-auth=<str> Basic auth in user:password format for tagging
Exec-script options
--es-url=<str> Elasticsearch url. DEFAULT=http://localhost:9200
--es-index=<str> Elasticsearch index name. DEFAULT=sist2
--script-file=<str> Path to user script.
--async-script Execute user script asynchronously.
Made by simon987 <me@simon987.net>. Released under GPL-3.0
```
## Scan
### Scan options
* `-t, --threads`
Number of threads for file parsing. **Do not set a number higher than `$(nproc)` or `$(Get-WmiObject Win32_ComputerSystem).NumberOfLogicalProcessors` in Windows!**
* `-q, --quality`
Thumbnail quality, on a scale of 1.0 to 31.0, 1.0 being the best. *Does not affect PDF thumbnails quality*
* `--size`
Thumbnail size in pixels.
* `--content-size`
Number of bytes of text to be extracted from the content of files (plain text and PDFs).
Repeated whitespace and special characters do not count toward this limit.
* `--incremental`
Specify an existing index. Information about files in this index that were not modified (based on *mtime* attribute)
will be copied to the new index and will not be parsed again.
* `-o, --output` Output directory.
* `--rewrite-url` Set the `rewrite_url` option for the web module (See [rewrite_url](#rewrite_url))
* `--name` Set the `name` option for the web module
* `--depth` Maximum scan dept. Set to 0 only scan files directly in the root directory, set to -1 for infinite depth
* `--archive` Archive file mode.
* skip: Don't parse
* list: Only get file names as text
* shallow: Don't parse archives inside archives.
* recurse: Scan archives recursively (default)
* `--ocr` See [OCR](../README.md#OCR)
* `-e, --exclude` Regex pattern to exclude files. A file is excluded if the pattern matches any
part of the full absolute path.
Examples:
* `-e ".*\.ttf"`: Ignore ttf files
* `-e ".*\.(ttf|rar)"`: Ignore ttf and rar files
* `-e "^/mnt/backups/"`: Ignore all files in the `/mnt/backups/` directory
* `-e "^/mnt/Data[12]/"`: Ignore all files in the `/mnt/Data1/` and `/mnt/Data2/` directory
* `-e "(^/usr/)|(^/var/)|(^/media/DRIVE-A/tmp/)|(^/media/DRIVE-B/Trash/)"` Exclude the
`/usr`, `/var`, `/media/DRIVE-A/tmp`, `/media/DRIVE-B/Trash` directories
* `--fast` Only index file names and mime type
* `--treemap-threshold` Directories smaller than (`treemap-threshold` * `<total size of the index>`)
will not be considered for the disk utilisation visualization; their size will be added to
the parent directory. If the parent directory is still smaller than the threshold, it will also be "merged upwards"
and so on.
In effect, smaller `treemap-threshold` values will yield a more detailed
(but also a more cluttered and harder to read) visualization.
* `--mem-buffer` Maximum memory buffer size in MB (per thread) for files inside archives. Media files
larger than this number will be read sequentially and no *seek* operations will be supported.
To check if a media file can be parsed without *seek*, execute `cat file.mp4 | ffprobe -`
### Scan examples
Simple scan
```bash
sist2 scan ~/Documents
sist2 scan \
--threads 4 --content-size 16000000 --quality 1.0 --archive shallow \
--name "My Documents" --rewrite-url "http://nas.domain.local/My Documents/" \
~/Documents -o ./documents.idx/
```
Incremental scan
```
sist2 scan --incremental ./orig_idx/ -o ./updated_idx/ ~/Documents
```
### Index format
A typical `binary` type index structure looks like this:
```
documents.idx/
├── descriptor.json
├── _index_139965416830720
├── _index_139965425223424
├── _index_139965433616128
├── _index_139965442008832
├── _index_139965442008832
├── treemap.csv
├── agg_mime.csv
├── agg_date.csv
├── add_size.csv
├── thumbs/
| ├── data.mdb
| └── lock.mdb
├── tags/
| ├── data.mdb
| └── lock.mdb
└── meta/
├── data.mdb
└── lock.mdb
```
The `_index_*` files contain the raw binary index data and are not meant to be
read by other applications. The format is generally compatible across different
sist2 versions.
The `thumbs/` folder is a [LMDB](https://en.wikipedia.org/wiki/Lightning_Memory-Mapped_Database)
database containing the thumbnails.
The `descriptor.json` file contains general information about the index. The
following fields are safe to modify manually: `root`, `name`, [rewrite_url](#rewrite_url) and `timestamp`.
The `.csv` are pre-computed aggregations necessary for the stats page.
*Advanced usage*
Instead of using the `scan` module, you can also import an index generated
by a third party application. The 'external' index must have the following format:
```
my_index/
├── descriptor.json
├── _index_0
└── thumbs/
| ├── data.mdb
| └── lock.mdb
└── meta/
└── <empty>
```
*descriptor.json*:
```json
{
"uuid": "<valid UUID4>",
"version": "_external_v1",
"root": "(optional)",
"name": "<name>",
"rewrite_url": "(optional)",
"type": "json",
"timestamp": 1578971024
}
```
*_index_0*: NDJSON format (One json object per line)
```json
{
"_id": "unique uuid for the file",
"index": "index uuid4 (same one as descriptor.json!)",
"mime": "application/x-cbz",
"size": 14341204,
"mtime": 1578882996,
"extension": "cbz",
"name": "my_book",
"path": "path/to/books",
"content": "text contents of the book",
"title": "Title of the book",
"tag": ["genre.fiction", "author.someguy", "etc..."],
"_keyword": [
{"k": "ISBN", "v": "ABCD34789231"}
],
"_text": [
{"k": "other", "v": "This will be indexed as text"}
]
}
```
You can find the full list of supported fields [here](../src/io/serialize.c#L90)
The `_keyword.*` items will be indexed and searchable as **keyword** fields (only full matches allowed).
The `_text.*` items will be indexed and searchable as **text** fields (fuzzy searching allowed)
*thumbs/*:
LMDB key-value store. Keys are **binary** 16-byte md5 hash* (`_id` field)
and values are raw image bytes.
*\* Hash is calculated from the full path of the file, including the extension, relative to the index root*
Importing an external `binary` type index is technically possible but
it is currently unsupported and has no guaranties of back/forward compatibility.
## Index
### Index options
* `--es-url`
Elasticsearch url and port. If you are using docker, make sure that both containers are on the
same network.
* `--es-index`
Elasticsearch index name. DEFAULT=sist2
* `-p, --print`
Print index in JSON format to stdout.
* `--script-file`
Path to user script. See [Scripting](scripting.md).
* `--mappings-file`
Path to custom Elasticsearch mappings. If none is specified, [the bundled mappings](https://github.com/simon987/sist2/tree/master/schema) will be used.
* `--settings-file`
Path to custom Elasticsearch settings. *(See above)*
* `--async-script`
Use `wait_for_completion=false` elasticsearch option while executing user script.
(See [Elasticsearch documentation](https://www.elastic.co/guide/en/elasticsearch/reference/current/tasks.html))
* `--batch-size=<int>`
Index batch size. Indexing is generally faster with larger batches, but payloads that
are too large will fail and additional overhead for retrying with smaller sizes may slow
down the process.
* `-f, --force-reset`
Reset Elasticsearch mappings and settings.
### Index examples
**Push to elasticsearch**
```bash
sist2 index --force-reset --batch-size 1000 --es-url http://localhost:9200 ./my_index/
sist2 index ./my_index/
```
**Save index in JSON format**
```bash
sist2 index --print ./my_index/ > my_index.ndjson
```
**Inspect contents of an index**
```bash
sist2 index --print ./my_index/ | jq | less
```
## Web
### Web options
* `--es-url=<str>` Elasticsearch url.
* `--es-index`
Elasticsearch index name. DEFAULT=sist2
* `--bind=<str>` Listen on this address.
* `--auth=<str>` Basic auth in user:password format
* `--tag-auth=<str>` Basic auth in user:password format. Works the same way as the
`--auth` argument, but authentication is only applied the `/tag/` endpoint.
### Web examples
**Single index**
```bash
sist2 web --auth admin:hunter2 --bind 0.0.0.0:8888 my_index
```
**Multiple indices**
```bash
# Indices will be displayed in this order in the web interface
sist2 web index1 index2 index3 index4
```
### rewrite_url
When the `rewrite_url` field is not empty, the web module ignores the `root`
field and will return a HTTP redirect to `<rewrite_url><path>/<name><extension>`
instead of serving the file from disk.
Both the `root` and `rewrite_url` fields are safe to manually modify from the
`descriptor.json` file.
### Link to specific indices
To link to specific indices, you can add a list of comma-separated index name to
the URL: `?i=<name>,<name>`. By default, indices with `"(nsfw)"` in their name are
not displayed.
## exec-script
The `exec-script` command is used to execute a user script for an index that has already been imported to Elasticsearch with the `index` command. Note that the documents will not be reset to their default state before each execution as the `index` command does: if you make undesired changes to the documents by accident, you will need to run `index` again to revert to the original state.
# Tagging
### Manual tagging
You can modify tags of individual documents directly from the
`web` interface. Note that you can setup authentication for this feature
with the `--tag-auth` option (See [web options](#web-options))
![manual_tag](manual_tag.png)
Tags that are manually added are saved both in the
index folder (in `/tags/`) and in Elasticsearch*. When re-`index`ing,
they are read from the index and automatically applied.
You can safely copy the `/tags/` database to another index.
See [Automatic tagging](#automatic-tagging) for information about tag
hierarchies and tag colors.
\* *It can take a few seconds to take effect in new search queries, and the page needs
to be reloaded for the tags tab to update*
### Automatic tagging
See [scripting](scripting.md) documentation.
# Sidecar files
When scanning, sist2 will read metadata from `.s2meta` JSON files and overwrite the
original document's metadata. Sidecar metadata files will also work inside archives.
Sidecar files themselves are not saved in the index.
This feature is useful to leverage third-party applications such as speech-to-text or
OCR to add additional metadata to a file.
**Example**
```
~/Documents/
├── Video.mp4
└── Video.mp4.s2meta
```
The sidecar file must have exactly the same file path and the `.s2meta` suffix.
`Video.mp4.s2meta`:
```json
{
"content": "This sidecar file will overwrite some metadata fields of Video.mp4",
"author": "Some author",
"duration": 12345,
"bitrate": 67890,
"some_arbitrary_field": [1,2,3]
}
```
```
sist2 scan ~/Documents -o ./docs.idx
sist2 index ./docs.idx
```
*NOTE*: It is technically possible to overwrite the `tag` value using sidecar files, however,
it is not currently possible to restore both manual tags and sidecar tags without user scripts
while reindexing.

View File

Before

Width:  |  Height:  |  Size: 26 KiB

After

Width:  |  Height:  |  Size: 26 KiB

BIN
docs/manual_tag.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.9 KiB

View File

@@ -39,7 +39,7 @@ it adds the `genre.<genre>` tag.
ArrayList tags = ctx._source.tag = new ArrayList();
if (ctx._source?.genre != null) {
tags.add("genre." + ctx._source.genre.toLowerCase())
tags.add("genre." + ctx._source.genre.toLowerCase());
}
```
@@ -54,6 +54,11 @@ script.painless.regex.enabled: true
```
Or, if you're using docker add `-e "script.painless.regex.enabled=true"`
**Tag color**
You can specify the color for an individual tag by appending an
hexadecimal color code (`#RRGGBBAA`) to the tag name.
### Examples
If `(20XX)` is in the file name, add the `year.<year>` tag:
@@ -62,7 +67,7 @@ ArrayList tags = ctx._source.tag = new ArrayList();
Matcher m = /[\(\.+](20[0-9]{2})[\)\.+]/.matcher(ctx._source.name);
if (m.find()) {
tags.add("year." + m.group(1))
tags.add("year." + m.group(1));
}
```
@@ -106,12 +111,32 @@ if (ctx._source.path != "") {
}
```
Set the name of the last folder (`/path/to/<studio>/file.mp4`) to `studio.<studio>` tag
Parse `EXIF:F Number` tag
```Java
ArrayList tags = ctx._source.tag = new ArrayList();
if (ctx._source.path != "") {
String[] names = ctx._source.path.splitOnToken('/');
tags.add("studio." + names[names.length-1]);
if (ctx._source?.exif_fnumber != null) {
String[] values = ctx._source.exif_fnumber.splitOnToken(' ');
String aperture = String.valueOf(Float.parseFloat(values[0]) / Float.parseFloat(values[1]));
if (aperture == "NaN") {
aperture = "0,0";
}
tags.add("Aperture.f/" + aperture.replace(".", ","));
}
```
Display year and months from `EXIF:DateTime` tag
```Java
if (ctx._source?.exif_datetime != null) {
SimpleDateFormat parser = new SimpleDateFormat("yyyy:MM:dd HH:mm:ss");
Date date = parser.parse(ctx._source.exif_datetime);
SimpleDateFormat yp = new SimpleDateFormat("yyyy");
SimpleDateFormat mp = new SimpleDateFormat("MMMMMMMMM");
String year = yp.format(date);
String month = mp.format(date);
tags.add("Month." + month);
tags.add("Year." + year);
}
```

BIN
docs/sist2.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 889 KiB

BIN
docs/stats.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 167 KiB

View File

@@ -1,53 +0,0 @@
/*
Copyright (c) 2010, Florian Reuter
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
* Neither the name of Florian Reuter nor the names of its contributors
may be used to endorse or promote products derived from this
software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/**@file config/mce/config.h
*/
#ifndef MCE_CONFIG_H
#define MCE_CONFIG_H
#include <libxml/xmlstring.h>
#include <stdio.h>
#include <plib/plib.h>
#include <assert.h>
#ifdef __cplusplus
extern "C" {
#endif
#define MCE_NAMESPACE_SUBSUMPTION_ENABLED 0
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* MCE_CONFIG_H */

View File

@@ -1,189 +0,0 @@
/*
Copyright (c) 2010, Florian Reuter
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
* Neither the name of Florian Reuter nor the names of its contributors
may be used to endorse or promote products derived from this
software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/** @file mce/helper.h
Helper functions needed by mce/textreader.h and mce/textwriter.h to implement MCE:
- mceQNameLevelAdd(), mceQNameLevelLookup() and mceQNameLevelCleanup() maintain a set of mceQNameLevel_t tuples.
- mceQNameLevelPush() and mceQNameLevelPopIfMatch() maintain a stack of mceQNameLevel_t tuples.
- mceCtxInit(), mceCtxCleanup() and mceCtxUnderstandsNamespace() manage a context which holds all information needed to do MCE proprocessing.
*/
#include <mce/config.h>
#ifndef MCE_HELPER_H
#define MCE_HELPER_H
#ifdef __cplusplus
extern "C" {
#endif
/**
Tiple (ns, ln, level).
*/
typedef struct MCE_QNAME_LEVEL {
xmlChar *ns;
xmlChar *ln;
puint32_t level;
puint32_t flag; // used by mceTextWriter
} mceQNameLevel_t;
/**
*/
typedef enum MCE_SKIP_STATE_ENUM {
MCE_SKIP_STATE_IGNORE,
MCE_SKIP_STATE_ALTERNATE_CONTENT,
MCE_SKIP_STATE_CHOICE_MATCHED
} mceSkipState_t;
/**
Represents an intervall of levels which are "skipped" i.e. ignored.
*/
typedef struct MCE_SKIP_ITEM {
puint32_t level_start;
puint32_t level_end;
mceSkipState_t state;
} mceSkipItem_t;
/**
Either represents a set of (ns, ln, level) triples.
*/
typedef struct MCE_QNAME_LEVEL_SET {
mceQNameLevel_t *list_array;
puint32_t list_items;
puint32_t max_level;
} mceQNameLevelSet_t;
/**
The skip stack.
*/
typedef struct MCE_SKIP_STACK {
mceSkipItem_t *stack_array;
puint32_t stack_items;
} mceSkipStack_t;
typedef enum MCE_ERROR_ENUM {
MCE_ERROR_NONE,
MCE_ERROR_XML,
MCE_ERROR_MUST_UNDERSTAND,
MCE_ERROR_VALIDATION,
MCE_ERROR_MEMORY
} mceError_t;
/**
Holds all information to do MCE preprocessing.
*/
typedef struct MCE_CONTEXT {
mceQNameLevelSet_t ignorable_set;
mceQNameLevelSet_t understands_set;
mceQNameLevelSet_t processcontent_set;
mceQNameLevelSet_t suspended_set;
#if (MCE_NAMESPACE_SUBSUMPTION_ENABLED)
mceQNameLevelSet_t subsume_namespace_set;
mceQNameLevelSet_t subsume_exclude_set;
mceQNameLevelSet_t subsume_prefix_set;
#endif
mceSkipStack_t skip_stack;
mceError_t error;
pbool_t mce_disabled;
puint32_t suspended_level;
} mceCtx_t;
/**
Add a new tiple (ns, ln, level) to the triple set \c qname_level_set.
The \c ns_sub string is optional and will not be touched.
*/
pbool_t mceQNameLevelAdd(mceQNameLevelSet_t *qname_level_set, const xmlChar *ns, const xmlChar *ln, puint32_t level);
/**
Lookup a tiple (ns, ln, level) via \c ns and \c ln. If \c ignore_ln is PTRUE then the first tiple matching \c ns will be returned.
*/
mceQNameLevel_t* mceQNameLevelLookup(mceQNameLevelSet_t *qname_level_set, const xmlChar *ns, const xmlChar *ln, pbool_t ignore_ln);
/**
Remove all triples (ns, ln, level) where the level greater or equal to \c level.
*/
pbool_t mceQNameLevelCleanup(mceQNameLevelSet_t *qname_level_set, puint32_t level);
/**
Push a new skip intervall (level_start, level_end, state) on the stack \c skip_stack.
*/
pbool_t mceSkipStackPush(mceSkipStack_t *skip_stack, puint32_t level_start, puint32_t level_end, mceSkipState_t state);
/**
Pop the intervall (ns, ln, level) from the stack \c qname_level_array.
*/
void mceSkipStackPop(mceSkipStack_t *skip_stack);
/**
Returns top item or NULL.
*/
mceSkipItem_t *mceSkipStackTop(mceSkipStack_t *skip_stack);
/**
Returns TRUE, if the \c level is in the top skip intervall.
*/
pbool_t mceSkipStackSkip(mceSkipStack_t *skip_stack, puint32_t level);
/**
Initialize the mceCtx_t \c ctx.
*/
pbool_t mceCtxInit(mceCtx_t *ctx);
/**
Cleanup, i.e. release all resourced from the mceCtx_t \c ctx.
*/
pbool_t mceCtxCleanup(mceCtx_t *ctx);
/**
Register the namespace \ns in \c ctx.
*/
pbool_t mceCtxUnderstandsNamespace(mceCtx_t *ctx, const xmlChar *ns);
/**
Register the namespace \ns in \c ctx.
*/
pbool_t mceCtxSuspendProcessing(mceCtx_t *ctx, const xmlChar *ns, const xmlChar *ln);
#if (MCE_NAMESPACE_SUBSUMPTION_ENABLED)
/**
Subsume namespace \c ns_new with \c ns_old.
*/
pbool_t mceCtxSubsumeNamespace(mceCtx_t *ctx, const xmlChar *prefix_new, const xmlChar *ns_new, const xmlChar *ns_old);
#endif
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* MCE_HELPER_H */

View File

@@ -1,464 +0,0 @@
/*
Copyright (c) 2010, Florian Reuter
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
* Neither the name of Florian Reuter nor the names of its contributors
may be used to endorse or promote products derived from this
software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/** @file mce/textreader.h
*/
#ifndef MCE_TEXTREADER_H
#define MCE_TEXTREADER_H
#ifdef __cplusplus
extern "C" {
#endif
/**
A handle to an MCE-aware libxml2 xmlTextReader.
*/
typedef struct MCE_TEXTREADER mceTextReader_t;
#ifdef __cplusplus
} /* extern "C" */
#endif
#include <mce/config.h>
#include <opc/opc.h>
#include <mce/helper.h>
#include <libxml/xmlwriter.h>
#ifdef __cplusplus
extern "C" {
#endif
struct MCE_TEXTREADER {
xmlTextReaderPtr reader;
mceCtx_t mceCtx;
};
/**
Wrapper around an libxml2 xmlTextReaderRead function.
\see http://xmlsoft.org/html/libxml-xmlreader.html#xmlTextReaderRead
*/
int mceTextReaderRead(mceTextReader_t *mceTextReader);
/**
Wrapper around a libxml2 xmlTextReaderNext function.
\see http://xmlsoft.org/html/libxml-xmlreader.html#xmlTextReaderNext
*/
int mceTextReaderNext(mceTextReader_t *mceTextReader);
/**
Creates an mceTextReader from an XmlTextReader.
\code
mceTextReader reader;
mceTextReaderInit(&reader, xmlNewTextReaderFilename("sample.xml"));
// reader is ready to use.
mceTextReaderCleanup(&reader);
\endcode
\see http://xmlsoft.org/html/libxml-xmlreader.html#xmlNewTextReaderFilename
*/
int mceTextReaderInit(mceTextReader_t *mceTextReader, xmlTextReaderPtr reader);
/**
Cleanup MCE reader, i.e. free all resources. Also calls xmlTextReaderClose and xmlFreeTextReader.
\see http://xmlsoft.org/html/libxml-xmlreader.html#xmlTextReaderClose
\see http://xmlsoft.org/html/libxml-xmlreader.html#xmlFreeTextReader
*/
int mceTextReaderCleanup(mceTextReader_t *mceTextReader);
/**
Reads all events \c mceTextReader and pipes them to \writer.
\code
mceTextReader reader;
mceTextReaderInit(&reader, xmlNewTextReaderFilename("sample.xml"));
mceTextReaderUnderstandsNamespace(&reader, _X("http://myextension"));
xmlTextWriterPtr writer=xmlNewTextWriterFilename("out.xml", 0);
mceTextReaderDump(&reader, writer, P_FALSE);
xmlFreeTextWriter(writer);
mceTextReaderCleanup(&reader);
\endcode
*/
int mceTextReaderDump(mceTextReader_t *mceTextReader, xmlTextWriter *writer, pbool_t fragment);
/**
Registers an MCE namespace.
\see mceTextReaderDump()
*/
int mceTextReaderUnderstandsNamespace(mceTextReader_t *mceTextReader, const xmlChar *ns);
/**
Disable MCE processing.
\return Returns old value.
*/
pbool_t mceTextReaderDisableMCE(mceTextReader_t *mceTextReader, pbool_t flag);
/**
Signal an error to the MCE processor.
*/
void mceRaiseError(xmlTextReader *reader, mceCtx_t *ctx, mceError_t error, const xmlChar *str, ...);
/**
Internal function which does the MCE postprocessing. E.g. mceTextReaderRead() is implemented as
\code
mceTextReaderPostprocess(mceTextReader->reader, &mceTextReader->mceCtx, xmlTextReaderRead(mceTextReader->reader))
\endcode
This function is exposed to make existing libxm2 xmlTextReader MCE aware.
*/
int mceTextReaderPostprocess(xmlTextReader *reader, mceCtx_t *ctx, int ret);
/**
Get the error code.
*/
mceError_t mceTextReaderGetError(mceTextReader_t *mceTextReader);
/**
Helper macro to declare a start/end document block in a declarative way:
\code
mce_start_document(reader) {
} mce_end_document(reader);
\endcode
\hideinitializer
*/
#define mce_start_document(_reader_) \
if (NULL!=(_reader_)) { \
mceTextReaderRead(_reader_); \
if (0)
/**
\see mce_start_document.
\hideinitializer
*/
#define mce_end_document(_reader_) \
} /* if (NULL!=reader) */ \
/**
Container for mce_start_element and mce_start_attribute declarations.
\see mce_match_element
\see mce_match_attribute
\hideinitializer
*/
#define mce_start_choice(_reader_) \
if (0)
/**
\see mce_start_choice
\hideinitializer
*/
#define mce_end_choice(_reader_)
/**
Skips the attributes.
\see mce_match_element.
\hideinitializer
*/
#define mce_skip_attributes(_reader_) \
mce_start_attributes(_reader_) { \
} mce_end_attributes(_reader_);
/**
Skips the attributes.
\see mce_match_attribute.
\hideinitializer
*/
#define mce_skip_children(_reader_) \
mce_start_children(_reader_) { \
} mce_end_children(_reader_);
/**
\see mce_start_element.
\hideinitializer
*/
#define mce_start_children(_reader_) \
if (!xmlTextReaderIsEmptyElement((_reader_)->reader)) { \
mceTextReaderRead(_reader_); do { \
if (0)
/**
\see mce_start_element.
\hideinitializer
*/
#define mce_end_children(_reader_) \
else { \
if (XML_READER_TYPE_END_ELEMENT!=xmlTextReaderNodeType((_reader_)->reader)) { \
mceTextReaderNext(_reader_); /*skip unhandled element */ \
} \
} \
} while(XML_READER_TYPE_END_ELEMENT!=xmlTextReaderNodeType((_reader_)->reader) && \
XML_READER_TYPE_NONE!=xmlTextReaderNodeType((_reader_)->reader)); \
} /* if (!xmlTextReaderIsEmptyElement(reader->reader)) */
/**
Helper macro to match an element. Usefull for calling code in a seperate function:
\code
void handleElement(reader) {
mce_start_choice(reader) {
mce_start_element(reader, _X("ns"), _X("element")) {
} mce_end_element(reader)
} mce_end_choice(reader);
}
void parse(reader) {
mce_start_document(reader) {
mce_start_element(reader, _X("ns"), _X("ln")) {
mce_skip_attributes(reader);
mce_start_children(reader) {
mce_match_element(reader, _X("ns"), _X("element")) {
handleElement(reader);
}
} mce_end_children(reader);
} mce_end_element();
} mce_end_document(reader);
}
\endcode
\hideinitializer
*/
#define mce_match_element(_reader_, ns, ln) \
} else if (XML_READER_TYPE_ELEMENT==xmlTextReaderNodeType((_reader_)->reader) \
&& (NULL==ns || 0==xmlStrcmp(ns, xmlTextReaderConstNamespaceUri((_reader_)->reader))) \
&& (NULL==ln || 0==xmlStrcmp(ln, xmlTextReaderConstLocalName((_reader_)->reader)))) {
/**
Helper macro to declare a element block in a declarative way:
\code
mce_start_element(reader) {
mce_start_attributes(reader) {
mce_start_attribute(reader, _X("ns"), _X("lnA")) {
// code for handling lnA.
} mce_end_attribute(reader);
mce_start_attribute(reader, _X("ns"), _X("lnB")) {
// code for handling lnB.
} mce_end_attribute(reader);
} mce_end_attributes(reader);
mce_start_children(reader) {
mce_start_element(reader, _X("ns"), _X("lnA")) {
// code for handling lnA.
} mce_end_element(reader);
mce_start_element(reader, _X("ns"), _X("lnB")) {
// code for handling lnB.
} mce_end_element(reader);
mce_start_text(reader) {
// code for handling text.
} mce_end_text(reader);
} mce_end_children(reader);
} mce_end_element(reader);
\endcode
\hideinitializer
*/
#define mce_start_element(_reader_, ns, ln) \
mce_match_element(_reader_, ns, ln)
/**
\see mce_start_element.
\hideinitializer
*/
#define mce_end_element(_reader_) \
mceTextReaderNext(_reader_)
/**
Matches #TEXT without consuming it.
\hideinitializer
*/
#define mce_match_text(_reader_) \
} else if (XML_READER_TYPE_TEXT==xmlTextReaderNodeType((_reader_)->reader) \
|| XML_READER_TYPE_SIGNIFICANT_WHITESPACE==xmlTextReaderNodeType((_reader_)->reader)) {
/**
\see mce_start_element.
\hideinitializer
*/
#define mce_start_text(_reader_) \
mce_match_text(_reader_)
/**
\see mce_start_element.
\hideinitializer
*/
#define mce_end_text(_reader_) \
mceTextReaderNext(_reader_)
/**
\see mce_start_element.
\hideinitializer
*/
#define mce_start_attributes(_reader_) \
if (1==xmlTextReaderMoveToFirstAttribute((_reader_)->reader)) { \
do { \
if (0)
/**
\see mce_start_element.
\hideinitializer
*/
#define mce_end_attributes(_reader_) \
else { /* skipped attribute */ } \
} while(1==xmlTextReaderMoveToNextAttribute((_reader_)->reader)); \
xmlTextReaderMoveToElement((_reader_)->reader); }
/**
Helper macro to match an attribute. Usefull for calling code in a seperate function:
\code
void handleA(reader) {
mce_start_choice(reader) {
mce_start_attribute(reader, _X("ns"), _X("attr")) {
} mce_end_attribute(reader);
} mce_end_choice(reader);
}
void parse(reader) {
mce_start_document(reader) {
mce_start_element(reader, _X("ns"), _X("ln")) {
mce_start_attributes(reader) {
mce_match_attribute(reader, _X("ns"), _X("attr")) {
handleA(reader);
}
} mce_end_attributes(reader);
mce_skip_children(reader);
} mce_end_element();
} mce_end_document(reader);
}
\endcode
\hideinitializer
*/
#define mce_match_attribute(_reader_, ns, ln) \
} else if ((NULL==ns || 0==xmlStrcmp(ns, xmlTextReaderConstNamespaceUri((_reader_)->reader))) \
&& (NULL==ln || 0==xmlStrcmp(ln, xmlTextReaderConstLocalName((_reader_)->reader)))) {
/**
\see mce_start_element.
\hideinitializer
*/
#define mce_start_attribute(_reader_, ns, ln) \
mce_match_attribute(_reader_, ns, ln)
/**
\see mce_start_element.
\hideinitializer
*/
#define mce_end_attribute(_reader_)
/**
Error handling for MCE parsers.
\code
mce_start_element(&reader, NULL, _X("Default")) {
const xmlChar *ext=NULL;
const xmlChar *type=NULL;
mce_start_attributes(&reader) {
mce_start_attribute(&reader, NULL, _X("Extension")) {
ext=xmlTextReaderConstValue(reader.reader);
} mce_end_attribute(&reader);
mce_start_attribute(&reader, NULL, _X("ContentType")) {
type=xmlTextReaderConstValue(reader.reader);
} mce_end_attribute(&reader);
} mce_end_attributes(&reader);
mce_error_guard_start(&reader) {
mce_error(&reader, NULL==ext || ext[0]==0, MCE_ERROR_VALIDATION, "Missing @Extension attribute!");
mce_error(&reader, NULL==type || type[0]==0, MCE_ERROR_VALIDATION, "Missing @ContentType attribute!");
opcContainerType *ct=insertType(c, type, OPC_TRUE);
mce_error(&reader, NULL==ct, MCE_ERROR_MEMORY, NULL);
opcContainerExtension *ce=opcContainerInsertExtension(c, ext, OPC_TRUE);
mce_error(&reader, NULL==ce, MCE_ERROR_MEMORY, NULL);
mce_errorf(&reader, NULL!=ce->type && 0!=xmlStrcmp(ce->type, type), MCE_ERROR_VALIDATION, "Extension \"%s\" is mapped to type \"%s\" as well as \"%s\"", ext, type, ce->type);
ce->type=ct->type;
} mce_error_guard_end(&reader);
mce_skip_children(&reader);
} mce_end_element(&reader);
\endcode
\hideinitializer
*/
#define mce_error_guard_start(_reader_) if (MCE_ERROR_NONE==(_reader_)->mceCtx.error) do {
/**
\see mce_error_guard_start
\hideinitializer
*/
#define mce_error_guard_end(_reader_) } while(0)
/**
Signal an error if guard if false.
\hideinitializer
*/
#define mce_error(_reader_, guard, err, msg) if (guard) { (_reader_)->mceCtx.error=(err); fprintf(stderr, (NULL!=msg?msg:#err)); continue; }
/**
Signal an error if guard if false.
\hideinitializer
*/
#if defined(__GNUC__)
#define mce_errorf(_reader_, guard, err, msg, ...) if (guard) { mceRaiseError((_reader_)->reader, &(_reader_)->mceCtx, err, _X((NULL!=msg?msg:#err)), ##__VA_ARGS__ ); continue; }
#else
#define mce_errorf(_reader_, guard, err, msg, ...) if (guard) { mceRaiseError((_reader_)->reader, &(_reader_)->mceCtx, err, _X((NULL!=msg?msg:#err)), __VA_ARGS__ ); continue; }
#endif
/**
Only issues the error when in "strict mode".
\hideinitializer
*/
#define mce_error_strict mce_error
/**
\see mce_error_strict
\hideinitializer
*/
#define mce_error_strictf mce_errorf
/**
Marker for a MCE defintion.
\hideinitializer
*/
#define mce_def
/**
Marker for a MCE reference.
\hideinitializer
*/
#define mce_ref(r) (r)
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* MCE_TEXTREADER_H */

View File

@@ -1,176 +0,0 @@
/*
Copyright (c) 2010, Florian Reuter
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
* Neither the name of Florian Reuter nor the names of its contributors
may be used to endorse or promote products derived from this
software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/** @file mce/textwriter.h
*/
#include <mce/config.h>
#include <libxml/xmlwriter.h>
#include <mce/helper.h>
#ifndef MCE_TEXTWRITER_H
#define MCE_TEXTWRITER_H
#ifdef __cplusplus
extern "C" {
#endif
/**
Default flags for an MCE namespace declaration.
*/
#define MCE_DEFAULT 0x0
/**
Flags MCE namespace declaration "ignorable".
*/
#define MCE_IGNORABLE 0x1
/**
Flags MCE namespace declaration "must understand".
*/
#define MCE_MUSTUNDERSTAND 0x2
/**
The MCE text writer context.
*/
typedef struct MCE_TEXTWRITER_STRUCT mceTextWriter;
/**
Create a new MCE text writer.
\see http://xmlsoft.org/html/libxml-xmlIO.html#xmlOutputBufferCreateIO
\see http://xmlsoft.org/html/libxml-xmlwriter.html#xmlNewTextWriter
*/
mceTextWriter *mceTextWriterCreateIO(xmlOutputWriteCallback iowrite, xmlOutputCloseCallback ioclose, void *ioctx, xmlCharEncodingHandlerPtr encoder);
/**
Helper which create a new MCE text writer for a FILE handle.
*/
mceTextWriter *mceNewTextWriterFile(FILE *file);
/**
Free all resources for \w.
*/
int mceTextWriterFree(mceTextWriter *w);
/**
\see http://xmlsoft.org/html/libxml-xmlwriter.html#xmlTextWriterStartDocument
*/
int mceTextWriterStartDocument(mceTextWriter *w);
/**
\see http://xmlsoft.org/html/libxml-xmlwriter.html#xmlTextWriterEndDocument
*/
int mceTextWriterEndDocument(mceTextWriter *w);
/**
Start a new XML element. If ns==NULL then there is no namespace and ""==ns means the default namespace.
\see http://xmlsoft.org/html/libxml-xmlwriter.html#xmlTextWriterStartElement
\see http://xmlsoft.org/html/libxml-xmlwriter.html#xmlTextWriterStartElementNS
*/
int mceTextWriterStartElement(mceTextWriter *w, const xmlChar *ns, const xmlChar *ln);
/**
\see http://xmlsoft.org/html/libxml-xmlwriter.html#xmlTextWriterEndElement
*/
int mceTextWriterEndElement(mceTextWriter *w, const xmlChar *ns, const xmlChar *ln);
/**
\see http://xmlsoft.org/html/libxml-xmlwriter.html#xmlTextWriterWriteString
*/
int mceTextWriterWriteString(mceTextWriter *w, const xmlChar *content);
/**
Register a namespace. Must be called before mceTextWriterStartElement.
\see MCE_DEFAULT
\see MCE_IGNORABLE
\see MCE_MUSTUNDERSTAND
*/
const xmlChar *mceTextWriterRegisterNamespace(mceTextWriter *w, const xmlChar *ns, const xmlChar *prefix, int flags);
/**
Register qname (ns, ln) as a "process content" element wrt. MCE. Must be called before mceTextWriterStartElement.
*/
int mceTextWriterProcessContent(mceTextWriter *w, const xmlChar *ns, const xmlChar *ln);
/**
Writes a formatted attribute.
\see http://xmlsoft.org/html/libxml-xmlwriter.html#xmlTextWriterWriteFormatAttribute
*/
int mceTextWriterAttributeF(mceTextWriter *w, const xmlChar *ns, const xmlChar *ln, const char *value, ...);
/**
Starts an MCE alternate content section.
*/
int mceTextWriterStartAlternateContent(mceTextWriter *w);
/**
Ends an MCE alternate content section.
*/
int mceTextWriterEndAlternateContent(mceTextWriter *w);
/**
Start an MCE choice.
*/
int mceTextWriterStartChoice(mceTextWriter *w, const xmlChar *ns);
/**
Ends an MCE choice.
*/
int mceTextWriterEndChoice(mceTextWriter *w);
/**
Start an MCE fallback.
*/
int mceTextWriterStartFallback(mceTextWriter *w);
/**
Ends an MCE fallback.
*/
int mceTextWriterEndFallback(mceTextWriter *w);
/**
Returns the underlying xmlTextWriter.
*/
xmlTextWriterPtr mceTextWriterIntern(mceTextWriter *w);
/**
Helper which create a new xmlTextWriterPtr for a FILE handle.
*/
xmlTextWriterPtr xmlNewTextWriterFile(FILE *file);
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* MCE_TEXTWRITER_H */

View File

@@ -1,189 +0,0 @@
/*
Copyright (c) 2010, Florian Reuter
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
* Neither the name of Florian Reuter nor the names of its contributors
may be used to endorse or promote products derived from this
software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/**@file config/opc/config.h
*/
#ifndef OPC_CONFIG_H
#define OPC_CONFIG_H
#include <libxml/xmlstring.h>
#include <plib/plib.h>
#include <assert.h>
#ifdef __cplusplus
extern "C" {
#endif
/**
Assert expression e is true. Will be removed entirely in release mode.
\hideinitializer
*/
#define OPC_ASSERT(e) assert(e)
/**
Assert expression e is true. Expression will be executed in release mode too.
\hideinitializer
*/
#ifdef NDEBUG
#define OPC_ENSURE(e) (void)(e)
#else
#define OPC_ENSURE(e) assert(e)
#endif
/**
Constant for boolean true.
\hideinitializer
*/
#define OPC_TRUE (0==0)
/**
Constant for boolean false.
\hideinitializer
*/
#define OPC_FALSE (0==1)
/**
Boolean type.
\hideinitializer
*/
typedef pbool_t opc_bool_t;
/**
Type which represents an offset in e.g. a file.
\hideinitializer
*/
typedef pofs_t opc_ofs_t;
/**
8-bit unsigned integer.
\hideinitializer
*/
typedef puint8_t opc_uint8_t;
/**
16-bit unsigned integer.
\hideinitializer
*/
typedef puint16_t opc_uint16_t;
/**
32-bit unsigned integer.
\hideinitializer
*/
typedef puint32_t opc_uint32_t;
/**
64-bit unsigned integer.
\hideinitializer
*/
typedef puint64_t opc_uint64_t;
/**
8-bit signed integer.
\hideinitializer
*/
typedef pint8_t opc_int8_t;
/**
16-bit signed integer.
\hideinitializer
*/
typedef pint16_t opc_int16_t;
/**
32-bit signed integer.
\hideinitializer
*/
typedef pint32_t opc_int32_t;
/**
64-bit signed integer.
\hideinitializer
*/
typedef pint64_t opc_int64_t;
/**
Default size fo the deflate buffer used by zlib.
*/
#define OPC_DEFLATE_BUFFER_SIZE 4096
/**
Max system path len.
*/
#define OPC_MAX_PATH 512
/**
Error codes for the OPC module.
*/
typedef enum OPC_ERROR_ENUM {
OPC_ERROR_NONE,
OPC_ERROR_STREAM,
OPC_ERROR_SEEK, // can't seek
OPC_ERROR_UNSUPPORTED_DATA_DESCRIPTOR,
OPC_ERROR_UNSUPPORTED_COMPRESSION,
OPC_ERROR_DEFLATE,
OPC_ERROR_HEADER,
OPC_ERROR_MEMORY,
OPC_ERROR_XML,
OPC_ERROR_USER // user triggered an abort
} opc_error_t;
/**
Compression options for OPC streams.
*/
typedef enum OPC_COMPRESSIONOPTION_ENUM {
OPC_COMPRESSIONOPTION_NONE,
OPC_COMPRESSIONOPTION_NORMAL,
OPC_COMPRESSIONOPTION_MAXIMUM,
OPC_COMPRESSIONOPTION_FAST,
OPC_COMPRESSIONOPTION_SUPERFAST
} opcCompressionOption_t;
/**
Helper for debug logs.
\hideinitializer
*/
#define opc_logf printf
/**
Abstraction for memset(m, 0, s).
\hideinitializer
*/
#define opc_bzero_mem(m,s) memset(m, 0, s)
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* OPC_CONFIG_H */

View File

@@ -1,300 +0,0 @@
/*
Copyright (c) 2010, Florian Reuter
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
* Neither the name of Florian Reuter nor the names of its contributors
may be used to endorse or promote products derived from this
software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/** @file opc/container.h
The container.h module has the fundamental methods for dealing with ZIP-based OPC container.
OPC container can be opened in READ-ONLY mode, WRITE-ONLY mode, READ/WRITE mode, TEMPLATE mode and TRANSITION mode.
The most notable mode is the READ/WRITE mode, which gives you concurrent stream-based READ and WRITE access to a
single ZIP-based OPC container. This is achieved without the use of temporary files by taking advantage of the
OPC specific “interleave” mode. \see http://standards.iso.org/ittf/PubliclyAvailableStandards/c051459_ISOIEC_29500-2_2008(E).zip
The TEMPLATE mode allows very fast customized "cloning" of ZIP-based OPC container by using "RAW access" to the ZIP streams.
The TRANSITION mode is a special version of the TEMPLATE mode, which allows transition-based READ/WRITE access to the
ZIP-based OPC container using a temporary file.
*/
#include <opc/config.h>
#include <opc/file.h>
#ifndef OPC_CONTAINER_H
#define OPC_CONTAINER_H
#ifdef __cplusplus
extern "C" {
#endif
/**
Handle to an OPC container created by \ref opcContainerOpen.
\see opcContainerOpen.
*/
typedef struct OPC_CONTAINER_STRUCT opcContainer;
/**
Modes for opcContainerOpen();
\see opcContainerOpen
*/
typedef enum {
/**
Opens the OPC container denoted by \a fileName in READ-ONLY mode. The \a destName parameter must be \a NULL.
\hideinitializer
*/
OPC_OPEN_READ_ONLY=0,
/**
Opens the OPC container denoted by \a fileName in WRITE-ONLY mode. The \a destName parameter must be \a NULL.
\hideinitializer
*/
OPC_OPEN_WRITE_ONLY=1,
/**
Opens the OPC container denoted by \a fileName in READ/WRITE mode. The \a destName parameter must be \a NULL.
\hideinitializer
*/
OPC_OPEN_READ_WRITE=2,
/**
This mode will open the container denoted by \a fileName in READ-ONLY mode and the container denoted by
\a destName in write-only mode. Any modifications will be written to the container denoted by \a destName
and the unmodified streams from \a fileName will be written to \a destName on closing.
\warning Currently not implemented.
\hideinitializer
*/
OPC_OPEN_TEMPLATE=3,
/**
Like the OPC_OPEN_TEMPLATE mode, but the \a destName will be renamed to the \a fileName on closing. If \a destName
is \a NULL, then the name of the temporary file will be generated automatically.
\warning Currently not implemented.
\hideinitializer
*/
OPC_OPEN_TRANSITION=4
} opcContainerOpenMode;
/** Modes for opcContainerClose.
\see opcContainerClose.
*/
typedef enum {
/**
Close the OPC container without any further postprocessing.
\hideinitializer
*/
OPC_CLOSE_NOW = 0,
/**
Close the OPC container and trim the file by removing unused fragments like e.g.
deleted parts.
\hideinitializer
*/
OPC_CLOSE_TRIM = 1,
/**
Close the OPC container like in \a OPC_CLOSE_TRIM mode, but additionally remove any
"interleaved" parts by reordering them.
\warning Currently not implemented. Same semantic as OPC_CLOSE_TRIM.
\hideinitializer
*/
OPC_CLOSE_DEFRAG = 2
} opcContainerCloseMode;
/**
Opens a ZIP-based OPC container.
@param[in] fileName. For more details see \ref opcContainerOpenMode.
@param[in] mode. For more details see \ref opcContainerOpenMode.
@param[in] userContext. Will not be modified by libopc. Can be used to e.g. store the "this" pointer for C++ bindings.
@param[in] destName. For more details see \ref opcContainerOpenMode.
@return \a NULL if failed.
\see opcContainerOpenMode
\see opcContainerDump
*/
opcContainer* opcContainerOpen(const xmlChar *fileName,
opcContainerOpenMode mode,
void *userContext,
const xmlChar *destName);
/**
Opens a ZIP-based OPC container from memory.
@param[in] data.
@param[in] data_len.
@param[in] userContext. Will not be modified by libopc. Can be used to e.g. store the "this" pointer for C++ bindings.
@param[in] mode. For more details see \ref opcContainerOpenMode.
@return \a NULL if failed.
*/
opcContainer* opcContainerOpenMem(const opc_uint8_t *data, opc_uint32_t data_len,
opcContainerOpenMode mode,
void *userContext);
/**
Opens a ZIP-based OPC container from memory.
@param[in] ioread.
@param[in] iowrite.
@param[in] ioclose.
@param[in] ioseek.
@param[in] iotrim.
@param[in] ioflush.
@param[in] iocontext.
@param[in] file_size.
@param[in] userContext. Will not be modified by libopc. Can be used to e.g. store the "this" pointer for C++ bindings.
@param[in] mode. For more details see \ref opcContainerOpenMode.
@return \a NULL if failed.
*/
opcContainer* opcContainerOpenIO(opcFileReadCallback *ioread,
opcFileWriteCallback *iowrite,
opcFileCloseCallback *ioclose,
opcFileSeekCallback *ioseek,
opcFileTrimCallback *iotrim,
opcFileFlushCallback *ioflush,
void *iocontext,
pofs_t file_size,
opcContainerOpenMode mode,
void *userContext);
/**
Close an OPC container.
@param[in] c. \ref opcContainer openered by \ref opcContainerOpen.
@param[in] mode. For more information see \ref opcContainerCloseMode.
@return Non-zero if successful.
\see opcContainerOpen
\see opcContainerCloseMode
*/
opc_error_t opcContainerClose(opcContainer *c, opcContainerCloseMode mode);
/**
Returns the unmodified user context passed to \ref opcContainerOpen.
\see opcContainerOpen
*/
void *opcContainerGetUserContext(opcContainer *c);
/**
List all types, relations and parts of the container \a c to \a out.
\par Sample:
\include opc_dump.c
*/
opc_error_t opcContainerDump(opcContainer *c, FILE *out);
/**
Exports the OPC container to "Flat OPC" (http://blogs.msdn.com/b/ericwhite/archive/2008/09/29/the-flat-opc-format.aspx).
The flat versions of an OPC file are very important when dealing with e.g XSL(T)-based or Javascript-based transformations.
\see opcContainerFlatImport.
\todo Implementation needed.
*/
int opcContainerFlatExport(opcContainer *c, const xmlChar *fileName);
/**
Imports the flat version of an OPC container.
\see opcContainerFlatExport.
\todo Implementation needed.
*/
int opcContainerFlatImport(opcContainer *c, const xmlChar *fileName);
/**
Iterate all types.
\code
for(xmlChar *type=opcContentTypeFirst(c);
NULL!=type;
type=opcContentTypeNext(c, type)) {
printf("%s\n", type);
}
\endcode
*/
const xmlChar *opcContentTypeFirst(opcContainer *container);
/**
\see opcContentTypeNext()
*/
const xmlChar *opcContentTypeNext(opcContainer *container, const xmlChar *type);
/**
Iterate extensions.
\code
for(const xmlChar *ext=opcExtensionFirst(c);
NULL!=ext;
ext=opcExtensionNext(ext)) {
printf("%s\n", ext);
}
\endcode
*/
const xmlChar *opcExtensionFirst(opcContainer *container);
/**
\see opcExtensionFirst()
*/
const xmlChar *opcExtensionNext(opcContainer *container, const xmlChar *ext);
/**
Get registered type for extension.
\see opcExtensionRegister()
*/
const xmlChar *opcExtensionGetType(opcContainer *container, const xmlChar *ext);
/**
Register a mime-type and and extension.
\see opcExtensionGetType()
*/
const xmlChar *opcExtensionRegister(opcContainer *container, const xmlChar *ext, const xmlChar *type);
/**
Iterator through all relation types of the container:
\code
for(xmlChar *type=opcRelationTypeFirst(c);
NULL!=type;
type=opcRelationTypeNext(c, type)) {
printf("%s\n", type);
}
\endcode
*/
const xmlChar *opcRelationTypeFirst(opcContainer *container);
/**
\see opcRelationTypeFirst()
*/
const xmlChar *opcRelationTypeNext(opcContainer *container, const xmlChar *type);
/**
Iterator through all relation types of the container:
\code
for(xmlChar *target=opcExternalTargetFirst(c);
NULL!=target;
type=opcExternalTargetNext(c, target)) {
printf("%s\n", target);
}
\endcode
*/
const xmlChar *opcExternalTargetFirst(opcContainer *container);
/**
\see opcExternalTargetFirst()
*/
const xmlChar *opcExternalTargetNext(opcContainer *container, const xmlChar *target);
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* OPC_CONTAINER_H */

View File

@@ -1,200 +0,0 @@
/*
Copyright (c) 2010, Florian Reuter
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
* Neither the name of Florian Reuter nor the names of its contributors
may be used to endorse or promote products derived from this
software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/** @file opc/file.h
The opc module contains the file library functions.
*/
#include <opc/config.h>
#ifndef OPC_FILE_H
#define OPC_FILE_H
#ifdef __cplusplus
extern "C" {
#endif
/**
Flag for READ access.
\hideinitializer
*/
#define OPC_FILE_READ (1<<0)
/**
Flag for WRITE access.
\hideinitializer
*/
#define OPC_FILE_WRITE (1<<1)
/**
Flag indicates that file will be truncated when opened.
\hideinitializer
*/
#define OPC_FILE_TRUNC (1<<2)
/**
Abstraction for see modes.
*/
typedef enum OPC_FILESEEKMODE_ENUM {
opcFileSeekSet = SEEK_SET,
opcFileSeekCur = SEEK_CUR,
opcFileSeekEnd = SEEK_END
} opcFileSeekMode;
/**
Callback to read a file. E.g. for a FILE * context this can be implemented as
\code
static int opcFileRead(void *iocontext, char *buffer, int len) {
return fread(buffer, sizeof(char), len, (FILE*)iocontext);
}
\endcode
*/
typedef int opcFileReadCallback(void *iocontext, char *buffer, int len);
/**
Callback to write a file. E.g. for a FILE * context this can be implemented as
\code
static int opcFileWrite(void *iocontext, const char *buffer, int len) {
return fwrite(buffer, sizeof(char), len, (FILE*)iocontext);
}
\endcode
*/
typedef int opcFileWriteCallback(void *iocontext, const char *buffer, int len);
/**
Callback to close a file. E.g. for a FILE * context this can be implemented as
\code
static int opcFileClose(void *iocontext) {
return fclose((FILE*)iocontext);
}
\endcode
*/
typedef int opcFileCloseCallback(void *iocontext);
/**
Callback to seek a file. E.g. for a FILE * context this can be implemented as
\code
static opc_ofs_t opcFileSeek(void *iocontext, opc_ofs_t ofs) {
int ret=fseek((FILE*)iocontext, ofs, SEEK_SET);
if (ret>=0) {
return ftell((FILE*)iocontext);
} else {
return ret;
}
}
\endcode
*/
typedef opc_ofs_t opcFileSeekCallback(void *iocontext, opc_ofs_t ofs);
/**
Callback to trim a file. E.g. for a FILE * context this can be implemented as
\code
static int opcFileTrim(void *iocontext, opc_ofs_t new_size) {
#ifdef WIN32
return _chsize(fileno((FILE*)iocontext), new_size);
#else
return ftruncate(fileno((FILE*)iocontext), new_size);
#endif
}
\endcode
*/
typedef int opcFileTrimCallback(void *iocontext, opc_ofs_t new_size);
/**
Callback to flush a file. E.g. for a FILE * context this can be implemented as
\code
static int opcFileFlush(void *iocontext) {
return fflush((FILE*)iocontext);
}
\endcode
*/
typedef int opcFileFlushCallback(void *iocontext);
/**
Represents a state of a file, i.e. file position (buf_pos) and error status (err).
*/
typedef struct OPC_FILERAWSTATE_STRUCT {
opc_error_t err;
opc_ofs_t buf_pos; // current pos in file
} opcFileRawState;
/**
File IO context.
*/
typedef struct OPC_IO_STRUCT {
opcFileReadCallback *_ioread;
opcFileWriteCallback *_iowrite;
opcFileCloseCallback *_ioclose;
opcFileSeekCallback *_ioseek;
opcFileTrimCallback *_iotrim;
opcFileFlushCallback *_ioflush;
void *iocontext;
int flags;
opcFileRawState state;
opc_ofs_t file_size;
} opcIO_t;
/**
Initialize an IO context.
*/
opc_error_t opcFileInitIO(opcIO_t *io,
opcFileReadCallback *ioread,
opcFileWriteCallback *iowrite,
opcFileCloseCallback *ioclose,
opcFileSeekCallback *ioseek,
opcFileTrimCallback *iotrim,
opcFileFlushCallback *ioflush,
void *iocontext,
pofs_t file_size,
int flags);
/**
Initialize an IO context for a file.
*/
opc_error_t opcFileInitIOFile(opcIO_t *io, const xmlChar *filename, int flags);
/**
Initialize an IO for memory.
\warning Currently supports READ-ONLY file access.
*/
opc_error_t opcFileInitIOMemory(opcIO_t *io, const opc_uint8_t *data, opc_uint32_t data_len, int flags);
/**
Cleanup an IO context, i.e. release all system resources.
*/
opc_error_t opcFileCleanupIO(opcIO_t *io);
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* OPC_FILE_H */

View File

@@ -1,60 +0,0 @@
/*
Copyright (c) 2010, Florian Reuter
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
* Neither the name of Florian Reuter nor the names of its contributors
may be used to endorse or promote products derived from this
software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/** @file opc/helper.h
Contains helper functions for the opc module.
*/
#include <opc/config.h>
#ifndef OPC_HELPER_H
#define OPC_HELPER_H
#ifdef __cplusplus
extern "C" {
#endif
#ifdef __cplusplus
} /* extern "C" */
#endif
/**
Constructs a segment name.
*/
opc_uint16_t opcHelperAssembleSegmentName(char *out, opc_uint16_t out_size, const xmlChar *name, opc_uint32_t segment_number, opc_uint32_t next_segment_id, opc_bool_t rels_segment, opc_uint16_t *out_max);
/**
Splits a filename into the segment informations.
*/
opc_error_t opcHelperSplitFilename(opc_uint8_t *filename, opc_uint32_t filename_length, opc_uint32_t *segment_number, opc_bool_t *last_segment, opc_bool_t *rel_segment);
#endif /* OPC_HELPER_H */

View File

@@ -1,74 +0,0 @@
/*
Copyright (c) 2010, Florian Reuter
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
* Neither the name of Florian Reuter nor the names of its contributors
may be used to endorse or promote products derived from this
software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/** @file opc/inputstream.h
*/
#include <opc/config.h>
#ifndef OPC_INPUTSTREAM_H
#define OPC_INPUTSTREAM_H
#ifdef __cplusplus
extern "C" {
#endif
/**
Internal type which represents a binary input stream.
*/
typedef struct OPC_CONTAINER_INPUTSTREAM_STRUCT opcContainerInputStream;
/**
Opens the part \c name of the \c container for reading.
*/
opcContainerInputStream* opcContainerOpenInputStream(opcContainer *container, const xmlChar *name);
/**
Reads maximal \c buffer_len bytes from the input \c stream to \c buffer.
\return The number of byes read or "0" in case of an error or end-of-stream.
*/
opc_uint32_t opcContainerReadInputStream(opcContainerInputStream* stream, opc_uint8_t *buffer, opc_uint32_t buffer_len);
/**
Closes the input stream and releases all system resources.
*/
opc_error_t opcContainerCloseInputStream(opcContainerInputStream* stream);
/**
Returns the type of compression used for the stream.
*/
opcCompressionOption_t opcContainerGetInputStreamCompressionOption(opcContainerInputStream* stream);
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* OPC_INPUTSTREAM_H */

View File

@@ -1,73 +0,0 @@
/*
Copyright (c) 2010, Florian Reuter
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
* Neither the name of Florian Reuter nor the names of its contributors
may be used to endorse or promote products derived from this
software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/** @file opc/opc.h
The opc module contains the basic library functions.
*/
#include <opc/config.h>
#include <opc/container.h>
#include <opc/part.h>
#include <opc/relation.h>
#include <opc/inputstream.h>
#include <opc/outputstream.h>
#include <opc/zip.h>
#include <opc/xmlreader.h>
#include <opc/xmlwriter.h>
#include <opc/properties.h>
#ifndef OPC_OPC_H
#define OPC_OPC_H
#ifdef __cplusplus
extern "C" {
#endif
/**
* Initialize libopc.
* Sample:
* \include opc_helloworld.c
* @return Non-zero if successful.
*/
opc_error_t opcInitLibrary();
/**
* Free libopc. Clean up all resources.
* @return Non-zero if successful.
* \see opcInitLibrary.
*/
opc_error_t opcFreeLibrary();
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* OPC_OPC_H */

View File

@@ -1,71 +0,0 @@
/*
Copyright (c) 2010, Florian Reuter
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
* Neither the name of Florian Reuter nor the names of its contributors
may be used to endorse or promote products derived from this
software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/** @file opc/outputstream.h
*/
#include <opc/config.h>
#ifndef OPC_OUTPUTSTREAM_H
#define OPC_OUTPUTSTREAM_H
#ifdef __cplusplus
extern "C" {
#endif
/**
Internal type which represents a binary output stream.
*/
typedef struct OPC_CONTAINER_OUTPUTSTREAM_STRUCT opcContainerOutputStream;
/**
Open the part \c name or writing in \c container with compression \c compression_option.
\note Make sure the part exists!
\see opcPartCreate.
*/
opcContainerOutputStream* opcContainerCreateOutputStream(opcContainer *container, const xmlChar *name, opcCompressionOption_t compression_option);
/**
Write \c buffer_len bytes from \c buffer to \c stream.
\return Returns the number of bytes written.
*/
opc_uint32_t opcContainerWriteOutputStream(opcContainerOutputStream* stream, const opc_uint8_t *buffer, opc_uint32_t buffer_len);
/**
Close the \c stream and free all associated resources.
*/
opc_error_t opcContainerCloseOutputStream(opcContainerOutputStream* stream);
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* OPC_OUTPUTSTREAM_H */

View File

@@ -1,118 +0,0 @@
/*
Copyright (c) 2010, Florian Reuter
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
* Neither the name of Florian Reuter nor the names of its contributors
may be used to endorse or promote products derived from this
software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/** @file opc/part.h
*/
#include <opc/config.h>
#ifndef OPC_PART_H
#define OPC_PART_H
#ifdef __cplusplus
extern "C" {
#endif
/**
Handle to an OPC part created by \ref opcPartOpen.
\see opcPartOpen.
*/
typedef xmlChar* opcPart;
/**
Represents an invalid (resp. NULL) part.
In releations OPC_PART_INVALID also represents the root part.
\hideinitializer
*/
#define OPC_PART_INVALID NULL
/**
Find a part in a \ container by \c absolutePath and/or \c type.
Currently no flags are supported.
*/
opcPart opcPartFind(opcContainer *container,
const xmlChar *absolutePath,
const xmlChar *type,
int flags);
/**
Creates a part in a \ container with \c absolutePath and \c type.
Currently no flags are supported.
*/
opcPart opcPartCreate(opcContainer *container,
const xmlChar *absolutePath,
const xmlChar *type,
int flags);
/**
Returns the type of the container.
The string is interned and must not be freed.
*/
const xmlChar *opcPartGetType(opcContainer *c, opcPart part);
/**
Returns the type of the container.
If \c override_only then the return value will be NULL for parts not having an override type.
The string is interned and must not be freed.
*/
const xmlChar *opcPartGetTypeEx(opcContainer *c, opcPart part, opc_bool_t override_only);
/**
Deleted that part \c absolutePath in the \c container.
*/
opc_error_t opcPartDelete(opcContainer *container, const xmlChar *absolutePath);
/**
Get the first part.
\code
for(opcPart part=opcPartGetFirst(c);OPC_PART_INVALID!=part;part=opcPartGetNext(c, part)) {
printf("%s; \n", part, opcPartGetType(c, part));
}
\endcode
*/
opcPart opcPartGetFirst(opcContainer *container);
/**
Get the next part.
\see opcPartGetFirst
*/
opcPart opcPartGetNext(opcContainer *container, opcPart part);
/**
Returns the size in bytes of the \c part.
*/
opc_ofs_t opcPartGetSize(opcContainer *c, opcPart part);
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* OPC_PART_H */

View File

@@ -1,121 +0,0 @@
/*
Copyright (c) 2010, Florian Reuter
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
* Neither the name of Florian Reuter nor the names of its contributors
may be used to endorse or promote products derived from this
software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/** @file opc/properties.h
*/
#include <opc/config.h>
#include <opc/container.h>
#ifndef OPC_PROPERTIES_H
#define OPC_PROPERTIES_H
#ifdef __cplusplus
extern "C" {
#endif
/**
Represents a simple Dublin Core type.
*/
typedef struct OPC_DC_SIMPLE_TYPE {
xmlChar *str;
xmlChar *lang;
} opcDCSimpleType_t;
/**
Represents the core properties of an OPC container.
*/
typedef struct OPC_PROPERTIES_STRUCT {
xmlChar *category; /* xsd:string */
xmlChar *contentStatus; /* xsd:string */
xmlChar *created; /* dc:date */
opcDCSimpleType_t creator; /* dc:any */
opcDCSimpleType_t description; /* dc:any */
opcDCSimpleType_t identifier; /* dc:any */
opcDCSimpleType_t *keyword_array; /* cp:CT_Keywords */
opc_uint32_t keyword_items;
opcDCSimpleType_t language; /* dc:any */
xmlChar *lastModifiedBy; /* xsd:string */
xmlChar *lastPrinted; /* xsd:dateTime */
xmlChar *modified; /* dc:date */
xmlChar *revision; /* xsd:string */
opcDCSimpleType_t subject; /* dc:any */
opcDCSimpleType_t title; /* dc:any */
xmlChar *version; /* xsd:string */
} opcProperties_t;
/**
Initialize the core properties \c cp.
\see opcCorePropertiesSetString
*/
opc_error_t opcCorePropertiesInit(opcProperties_t *cp);
/**
Cleanup the core properties \c cp, i.e. release all resources.
\see opcCorePropertiesSetString
*/
opc_error_t opcCorePropertiesCleanup(opcProperties_t *cp);
/**
Rease the core properties \c cp from the container \c.
*/
opc_error_t opcCorePropertiesRead(opcProperties_t *cp, opcContainer *c);
/**
Write/Update the core properties \c cp in the container \c.
*/
opc_error_t opcCorePropertiesWrite(opcProperties_t *cp, opcContainer *c);
/**
Update a string in the core properties the right way.
\code
opcProperties_t cp;
opcCorePropertiesInit(&cp);
opcCorePropertiesSetString(&cp.revision, "1");
opcCorePropertiesSetStringLang(&cp.creator, "Florian Reuter", NULL);
opcCorePropertiesCleanup(&cp);
\endcode
*/
opc_error_t opcCorePropertiesSetString(xmlChar **prop, const xmlChar *str);
/**
Update a core properties the right way.
\see opcCorePropertiesSetString
*/
opc_error_t opcCorePropertiesSetStringLang(opcDCSimpleType_t *prop, const xmlChar *str, const xmlChar *lang);
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* OPC_PROPERTIES_H */

View File

@@ -1,140 +0,0 @@
/*
Copyright (c) 2010, Florian Reuter
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
* Neither the name of Florian Reuter nor the names of its contributors
may be used to endorse or promote products derived from this
software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/** @file opc/relation.h
*/
#include <opc/config.h>
#ifndef OPC_RELATION_H
#define OPC_RELATION_H
#ifdef __cplusplus
extern "C" {
#endif
/**
Indentifier for an OPC relation.
*/
typedef opc_uint32_t opcRelation;
/**
Constant which represents an invalid relation.
*/
#define OPC_RELATION_INVALID (-1)
/**
Find a relation originating from \c part in \c container with \c relationId and/or \c mimeType.
If \c part is OPC_PART_INVALID then part represents the root part.
@param[in] relationId The relationId (e.g. "rId1") or NULL.
@param[in] mimeType The mimeType or NULL.
*/
opcRelation opcRelationFind(opcContainer *container, opcPart part, const xmlChar *relationId, const xmlChar *mimeType);
/**
Deleted the relation from the container.
\see opcRelationFind.
*/
opc_error_t opcRelationDelete(opcContainer *container, opcPart part, const xmlChar *relationId, const xmlChar *mimeType);
/**
Returns the first relation.
The following code will dump all relations:
\code
for(opcPart part=opcPartGetFirst(c);OPC_PART_INVALID!=part;part=opcPartGetNext(c, part)) {
for(opcRelation rel=opcRelationFirst(part, c);
OPC_PART_INVALID!=rel;
rel=opcRelationNext(c, rel)) {
opcPart internal_target=opcRelationGetInternalTarget(c, part, rel);
const xmlChar *external_target=opcRelationGetExternalTarget(c, part, rel);
const xmlChar *target=(NULL!=internal_target?internal_target:external_target);
const xmlChar *prefix=NULL;
opc_uint32_t counter=-1;
const xmlChar *type=NULL;
opcRelationGetInformation(c, part, rel, &prefix, &counter, &type);
if (-1==counter) { // no counter after prefix
printf("%s;%s;%s;%s\n", part, prefix, target, type);
} else {
printf("%s;%s%i;%s;%s\n", part, prefix, counter, target, type);
}
}
}
\endcode
*/
opcRelation opcRelationFirst(opcContainer *container, opcPart part);
/**
\see opcRelationFirst
*/
opcRelation opcRelationNext(opcContainer *container, opcPart part, opcRelation relation);
/**
Returns the internal target.
\note To test for an external target use opcRelationGetExternalTarget.
\see opcRelationGetExternalTarget
*/
opcPart opcRelationGetInternalTarget(opcContainer *container, opcPart part, opcRelation relation);
/**
Returns the external target or NULL if it is an internal target.
The string is interned. Must not be freed.
\see opcRelationGetExternalTarget
*/
const xmlChar *opcRelationGetExternalTarget(opcContainer *container, opcPart part, opcRelation relation);
/**
Returns the relations type.
The string is interned. Must not be freed.
*/
const xmlChar *opcRelationGetType(opcContainer *container, opcPart part, opcRelation relation);
/**
Get information about a relation.
\see opcRelationFirst
*/
void opcRelationGetInformation(opcContainer *container, opcPart part, opcRelation relation, const xmlChar **prefix, opc_uint32_t *counter, const xmlChar **type);
/**
Add a relation to \c container from \c src part to \c dest part with id \c rid and type \c type.
*/
opc_uint32_t opcRelationAdd(opcContainer *container, opcPart src, const xmlChar *rid, opcPart dest, const xmlChar *type);
/**
Add an external relation to \c container from \c src part to \c target URL with id \c rid and type \c type.
*/
opc_uint32_t opcRelationAddExternal(opcContainer *container, opcPart src, const xmlChar *rid, const xmlChar *target, const xmlChar *type);
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* OPC_RELATION_H */

View File

@@ -1,69 +0,0 @@
/*
Copyright (c) 2010, Florian Reuter
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
* Neither the name of Florian Reuter nor the names of its contributors
may be used to endorse or promote products derived from this
software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/** @file opc/xmlreader.h
*/
#ifndef OPC_XMLREADER_H
#define OPC_XMLREADER_H
#include <opc/config.h>
#include <libxml/xmlreader.h>
#include <mce/textreader.h>
#ifdef __cplusplus
extern "C" {
#endif
/**
Open an MCE reader for \c partName. Parameters \c URL, \c encoding and \c options will be passed unmodified to
http://xmlsoft.org/html/libxml-xmlreader.html#xmlReaderForIO and they can we NULL, NULL, 0.
\note Make sure the part exists.
\see opcPartFind
*/
opc_error_t opcXmlReaderOpen(opcContainer *container, mceTextReader_t *mceTextReader, const xmlChar *partName, const char * URL, const char * encoding, int options);
/**
Returns an libxml DOM document. Parameters \c URL, \c encoding and \c options will be passed unmodified to
http://xmlsoft.org/html/libxml-parser.html#xmlReadIO and they can we NULL, NULL, 0.
\note Make sure the part exists.
\see opcPartFind
*/
xmlDocPtr opcXmlReaderReadDoc(opcContainer *container, const xmlChar *partName, const char * URL, const char * encoding, int options);
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* OPC_XMLREADER_H */

View File

@@ -1,57 +0,0 @@
/*
Copyright (c) 2010, Florian Reuter
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
* Neither the name of Florian Reuter nor the names of its contributors
may be used to endorse or promote products derived from this
software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/** @file opc/xmlwriter.h
*/
#include <opc/config.h>
#include <mce/textwriter.h>
#ifndef OPC_XMLWRITER_H
#define OPC_XMLWRITER_H
#ifdef __cplusplus
extern "C" {
#endif
/**
Create an MCE text writer for \c part in \c container with compression \c compression_option.
\note Make sure the part exists.
\see opcPartFind
*/
mceTextWriter *mceTextWriterOpen(opcContainer *c, opcPart part, opcCompressionOption_t compression_option);
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* OPC_XMLWRITER_H */

View File

@@ -1,255 +0,0 @@
/*
Copyright (c) 2010, Florian Reuter
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
* Neither the name of Florian Reuter nor the names of its contributors
may be used to endorse or promote products derived from this
software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/** @file opc/zip.h
The ZIP file backend of an OPC container.
*/
#include <opc/config.h>
#include <opc/file.h>
#include <opc/container.h>
#ifndef OPC_ZIP_H
#define OPC_ZIP_H
#ifdef __cplusplus
extern "C" {
#endif
/**
Default growth hint of an OPC stream.
*/
#define OPC_DEFAULT_GROWTH_HINT 512
/**
Handle to a ZIP archive.
\see internal.h
*/
typedef struct OPC_ZIP_STRUCT opcZip;
/**
Handle to a raw ZIP input stream.
\see internal.h
*/
typedef struct OPC_ZIPINPUTSTREAM_STRUCT opcZipInputStream;
/**
Handle to a raw ZIP output stream.
\see internal.h
*/
typedef struct OPC_ZIPOUTPUTSTREAM_STRUCT opcZipOutputStream;
/**
Holds all information of a ZIP segment.
*/
typedef struct OPC_ZIP_SEGMENT_INFO_STRUCT {
xmlChar name[OPC_MAX_PATH];
opc_uint32_t name_len;
opc_uint32_t segment_number;
opc_bool_t last_segment;
opc_bool_t rels_segment;
opc_uint32_t header_size;
opc_uint32_t min_header_size;
opc_uint32_t trailing_bytes;
opc_uint32_t compressed_size;
opc_uint32_t uncompressed_size;
opc_uint16_t bit_flag;
opc_uint32_t data_crc;
opc_uint16_t compression_method;
opc_ofs_t stream_ofs;
opc_uint16_t growth_hint;
} opcZipSegmentInfo_t;
/**
\see opcZipLoader
*/
typedef int opcZipLoaderOpenCallback(void *iocontext);
/**
\see opcZipLoader
*/
typedef int opcZipLoaderSkipCallback(void *iocontext);
/**
\see opcZipLoader
*/
typedef int opcZipLoaderReadCallback(void *iocontext, char *buffer, int len);
/**
\see opcZipLoader
*/
typedef int opcZipLoaderCloseCallback(void *iocontext);
/**
\see opcZipLoader
*/
typedef opc_error_t (opcZipLoaderSegmentCallback_t)(void *iocontext, void *userctx, opcZipSegmentInfo_t *info, opcZipLoaderOpenCallback *open, opcZipLoaderReadCallback *read, opcZipLoaderCloseCallback *close, opcZipLoaderSkipCallback *skip);
/**
Walks every segment in a ZIP archive and calls the \c segmentCallback callback method.
The implementer \c segmentCallback method must then eiher use the passed \c open, \c read and \c close methods
to read the stream or the passed \c skip methods to skip the stream.
This method can be used to e.g. read ZIP file in stream mode.
*/
opc_error_t opcZipLoader(opcIO_t *io, void *userctx, opcZipLoaderSegmentCallback_t *segmentCallback);
/**
\see opcZipClose
*/
typedef opc_error_t (opcZipSegmentReleaseCallback)(opcZip *zip, opc_uint32_t segment_id);
/**
Closes the ZIP archive \c zip and will call \c releaseCallback for every segment to give the implementer a chance
to free user resources.
*/
void opcZipClose(opcZip *zip, opcZipSegmentReleaseCallback* releaseCallback);
/**
Creates an empty ZIP archive with the given \c io.
*/
opcZip *opcZipCreate(opcIO_t *io);
/**
Commits all buffers and writes the ZIP archives local header directories.
if \c trim is true then padding bytes will be removed, i.e. the ZIP file size fill be minimalized.
*/
opc_error_t opcZipCommit(opcZip *zip, opc_bool_t trim);
/**
Garbage collection on the passed \c zip archive. This will e.g. make deleted files available as free space.
*/
opc_error_t opcZipGC(opcZip *zip);
/**
Load segment information into \c info.
If \c rels_segment is -1 then load the info for part with name \c partName.
Otherwise load the segment information for the ".rels." segment of \c partName.
\return Returns the segment_id.
*/
opc_uint32_t opcZipLoadSegment(opcZip *zip, const xmlChar *partName, opc_bool_t rels_segment, opcZipSegmentInfo_t *info);
/**
Create a segment with the given parameters.
\return Returns the segment_id.
*/
opc_uint32_t opcZipCreateSegment(opcZip *zip,
const xmlChar *partName,
opc_bool_t relsSegment,
opc_uint32_t segment_size,
opc_uint32_t growth_hint,
opc_uint16_t compression_method,
opc_uint16_t bit_flag);
/**
Creates an input stream for the segment with \c segment_id.
\see opcZipLoadSegment
\see opcZipCreateSegment
*/
opcZipInputStream *opcZipOpenInputStream(opcZip *zip, opc_uint32_t segment_id);
/**
Free all resources of the input stream.
*/
opc_error_t opcZipCloseInputStream(opcZip *zip, opcZipInputStream *stream);
/**
Read maximal \c buf_len bytes from the input stream into \buf.
\return Returns the number of bytes read.
*/
opc_uint32_t opcZipReadInputStream(opcZip *zip, opcZipInputStream *stream, opc_uint8_t *buf, opc_uint32_t buf_len);
/**
Creates an output stream for the segment with \c segment_id.
If \c *segment_id is -1 then a new segment will be created.
Otherwise the segment with \c *segment_id will be overwritten.
*/
opcZipOutputStream *opcZipCreateOutputStream(opcZip *zip,
opc_uint32_t *segment_id,
const xmlChar *partName,
opc_bool_t relsSegment,
opc_uint32_t segment_size,
opc_uint32_t growth_hint,
opc_uint16_t compression_method,
opc_uint16_t bit_flag);
/**
Opens an existing ouput stream for reading.
The \c *segment_id will be set to -1 and reset on opcZipCloseOutputStream.
\see opcZipCloseOutputStream
*/
opcZipOutputStream *opcZipOpenOutputStream(opcZip *zip, opc_uint32_t *segment_id);
/**
Will close the stream and free all resources. Additionally the new segment id will be stored in \c *segment_id.
\see opcZipOpenOutputStream
*/
opc_error_t opcZipCloseOutputStream(opcZip *zip, opcZipOutputStream *stream, opc_uint32_t *segment_id);
/**
Write \c buf_len bytes to \c buf.
\return Returns the number of bytes written.
*/
opc_uint32_t opcZipWriteOutputStream(opcZip *zip, opcZipOutputStream *stream, const opc_uint8_t *buf, opc_uint32_t buf_len);
/**
Returns the first segment id or -1.
Use the following code to iterarte through all segments.
\code
for(opc_uint32_t segment_id=opcZipGetFirstSegmentId(zip);
-1!=segment_id;
segment_id=opcZipGetNextSegmentId(zip, segment_id) {
...
}
\endcode
\see opcZipGetNextSegmentId
*/
opc_uint32_t opcZipGetFirstSegmentId(opcZip *zip);
/**
Returns the next segment id or -1.
\see opcZipGetFirstSegmentId
*/
opc_uint32_t opcZipGetNextSegmentId(opcZip *zip, opc_uint32_t segment_id);
/**
Returns info about the given segment id.
*/
opc_error_t opcZipGetSegmentInfo(opcZip *zip, opc_uint32_t segment_id, const xmlChar **name, opc_bool_t *rels_segment, opc_uint32_t *crc);
/**
Marks a given segments as deleted.
\see opcZipGC
*/
opc_bool_t opcZipSegmentDelete(opcZip *zip, opc_uint32_t *first_segment, opc_uint32_t *last_segment, opcZipSegmentReleaseCallback* releaseCallback);
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* OPC_ZIP_H */

View File

@@ -1,168 +0,0 @@
/* include/plib/plib.h. Generated from plib.h by configure. */
/*
Copyright (c) 2010, Florian Reuter
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
* Neither the name of Florian Reuter nor the names of its contributors
may be used to endorse or promote products derived from this
software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef _PLIB_PLIB_H_
#define _PLIB_PLIB_H_
#ifdef __cplusplus
extern "C" {
#endif
#define HAVE_STDINT_H 1
#define HAVE_STDDEF_H 1
#define HAVE_STDIO_H 1
#define HAVE_STRING_H 1
#define HAVE_LIMITS_H 1
#define HAVE_STDLIB_H 1
/* #undef HAVE_IO_H */
#define HAVE_UNISTD_H 1
#define HAVE_SYS_TYPES_H 1
#define IS_CONFIGURED 1
#if !defined(IS_CONFIGURED)
#if defined(WIN32)
#define HAVE_STRING_H 1
#define HAVE_STDINT_H 1
#define HAVE_LIMITS_H 1
#define HAVE_STDDEF_H 1
#define HAVE_STDIO_H 1
#define HAVE_STDLIB_H 1
#define HAVE_IO_H
#define snprintf _snprintf
#else
#error "configure not executed and we are not on a win32 machine? please run configure or define WIN32 is you are on a WIN32 platform."
#endif
#endif
#ifdef HAVE_STDDEF_H
#include <stddef.h>
typedef size_t pofs_t; // maximum file offset for eg. read write ops
#else
#error "system types can not be determined"
#endif
#ifdef HAVE_STDIO_H
#include <stdio.h>
#else
#error "system io can not be determined"
#endif
#ifdef HAVE_STDINT_H
#include <stdint.h>
typedef int8_t pint8_t;
typedef uint8_t puint8_t;
typedef int16_t pint16_t;
typedef uint16_t puint16_t;
typedef int32_t pint32_t;
typedef uint32_t puint32_t;
typedef int64_t pint64_t;
typedef uint64_t puint64_t;
typedef int pbool_t;
typedef size_t psize_t;
// INTN_MAX, INTN_MIN, UINTN_MAX
#else
#error "system types can not be determined"
#endif
#ifdef HAVE_STRING_H
#include <string.h>
#endif
#ifdef HAVE_LIMITS_H
#include <limits.h>
#define PUINT8_MAX UCHAR_MAX
#define PINT32_MAX INT_MAX
#define PINT32_MIN INT_MIN
#define PUINT32_MAX UINT_MAX
#define PUINT32_MIN 0
#define PUINT16_MAX USHRT_MAX
#define PUINT16_MIN 0
#else
#error "limits can not be determined"
#endif
#ifdef HAVE_STDLIB_H
#include <stdlib.h>
#endif
#ifdef HAVE_IO_H
#include <io.h>
#endif
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#ifdef HAVE_SYS_TYPES_H
#include <sys/types.h>
#endif
/**
Converts an ASCII string to a xmlChar string. This only works for ASCII strings.
*/
#ifndef _X
#define _X(s) BAD_CAST(s)
#endif
/**
Converts an xmlChar string to an ASCII string. This only works for ASCII charsets.
*/
#ifndef _X2C
#define _X2C(s) ((char*)(s))
#endif
#define PASSERT(e) assert(e)
#ifdef NDEBUG
#define PENSURE(e) (void)(e)
#else
#define PENSURE(e) assert(e)
#endif
#define PTRUE (0==0)
#define PFALSE (0==1)
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* _PLIB_PLIB_H_ */

Submodule lib/bzip2-1.0.6 deleted from 288acf97a1

Submodule lib/ffmpeg deleted from 8887991a31

Submodule lib/harfbuzz deleted from b28c282585

Submodule lib/leptonica deleted from cc03be70fd

Submodule lib/libmagic deleted from 1249b5cd02

Binary file not shown.

Binary file not shown.

Binary file not shown.

Submodule lib/libtiff deleted from 3db0ff91bc

Submodule lib/mupdf deleted from c50ac19e41

Submodule lib/onion deleted from 73329b61eb

Submodule lib/openjpeg deleted from ac3737372a

Submodule lib/tesseract deleted from f268e6615e

1
lmdb

Submodule lmdb deleted from 5c012bbe03

View File

@@ -1,17 +1,43 @@
{
"properties": {
"_tie": {
"type": "keyword",
"doc_values": true
},
"_depth": {
"type": "integer"
},
"path": {
"type": "text",
"analyzer": "path_analyzer",
"copy_to": "suggest-path"
"copy_to": "suggest-path",
"fielddata": true,
"fields": {
"nGram": {
"type": "text",
"analyzer": "my_nGram"
},
"text": {
"type": "text",
"analyzer": "content_analyzer"
}
}
},
"suggest-path": {
"type": "completion",
"analyzer": "keyword"
"analyzer": "case_insensitive_kw_analyzer"
},
"mime": {
"type": "keyword"
},
"parent": {
"type": "keyword",
"index": false
},
"thumbnail": {
"type": "keyword",
"index": false
},
"videoc": {
"type": "keyword",
"index": false
@@ -32,6 +58,10 @@
"type": "integer",
"index": false
},
"pages": {
"type": "integer",
"index": false
},
"mtime": {
"type": "integer"
},
@@ -104,7 +134,42 @@
}
},
"tag": {
"type": "keyword",
"copy_to": "suggest-tag"
},
"suggest-tag": {
"type": "completion",
"analyzer": "case_insensitive_kw_analyzer"
},
"exif_make": {
"type": "text"
},
"exif_model": {
"type": "text"
},
"exif:software": {
"type": "text"
},
"exif_exposure_time": {
"type": "keyword"
},
"exif_fnumber": {
"type": "keyword"
},
"exif_iso_speed_ratings": {
"type": "keyword"
},
"exif_focal_length": {
"type": "keyword"
},
"exif_user_comment": {
"type": "text"
},
"author": {
"type": "text"
},
"modified_by": {
"type": "text"
}
}
}

10
schema/pipeline.json Normal file
View File

@@ -0,0 +1,10 @@
{
"description": "Copy _id to _tie, save path depth",
"processors": [
{
"script": {
"source": "ctx._tie = ctx._id; ctx._depth = ctx.path.length() == 0 ? 0 : 1 + ctx.path.length() - ctx.path.replace(\"/\", \"\").length();"
}
}
]
}

View File

@@ -1,7 +1,8 @@
{
"index": {
"refresh_interval": "30s",
"codec": "best_compression"
"codec": "best_compression",
"number_of_replicas": 0
},
"analysis": {
"tokenizer": {
@@ -21,6 +22,12 @@
"lowercase"
]
},
"case_insensitive_kw_analyzer": {
"tokenizer": "keyword",
"filter": [
"lowercase"
]
},
"my_nGram": {
"tokenizer": "my_nGram_tokenizer",
"filter": [

View File

@@ -2,15 +2,15 @@
rm -rf index.sist2/
rm web/js/bundle.js 2> /dev/null
cat `ls web/js/*.min.js` > web/js/bundle.js
cat web/js/{util,dom,search}.js >> web/js/bundle.js
rm src/static/js/bundle.js 2> /dev/null
cat `ls src/static/js/*.min.js` > src/static/js/bundle.js
cat src/static/js/{util,dom}.js >> src/static/js/bundle.js
rm web/css/bundle*.css 2> /dev/null
cat web/css/*.min.css > web/css/bundle.css
cat web/css/light.css >> web/css/bundle.css
cat web/css/*.min.css > web/css/bundle_dark.css
cat web/css/dark.css >> web/css/bundle_dark.css
rm src/static/css/bundle*.css 2> /dev/null
cat src/static/css/*.min.css > src/static/css/bundle.css
cat src/static/css/light.css >> src/static/css/bundle.css
cat src/static/css/*.min.css > src/static/css/bundle_dark.css
cat src/static/css/dark.css >> src/static/css/bundle_dark.css
python3 scripts/mime.py > src/parsing/mime_generated.c
python3 scripts/serve_static.py > src/web/static_generated.c

View File

@@ -1,100 +0,0 @@
#!/usr/bin/env bash
THREADS=$(nproc)
cd lib
cd mupdf
CFLAGS=-fPIC make USE_SYSTEM_HARFBUZZ=yes USE_SYSTEM_OPENJPEG=yes HAVE_X11=no HAVE_GLUT=no -j $THREADS
cd ..
mv mupdf/build/release/libmupdf.a .
mv mupdf/build/release/libmupdf-third.a .
# openjp2
cd openjpeg
#cmake . -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_FLAGS="-O3 -march=native -DNDEBUG"
cmake . -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_FLAGS="-O3"
make -j $THREADS
cd ..
mv openjpeg/bin/libopenjp2.a .
# harfbuzz
cd harfbuzz
./autogen.sh
./configure --disable-shared --enable-static
make -j $THREADS
cd ..
mv harfbuzz/src/.libs/libharfbuzz.a .
# ffmpeg
cd ffmpeg
./configure --disable-shared --enable-static --disable-ffmpeg --disable-ffplay \
--disable-ffprobe --disable-doc\
--disable-manpages --disable-postproc --disable-avfilter \
--disable-alsa --disable-lzma --disable-xlib --disable-debug\
--disable-vdpau --disable-vaapi --disable-sdl2 --disable-network\
--extra-cflags=-fPIC
make -j $THREADS
cd ..
mv ffmpeg/libavcodec/libavcodec.a .
mv ffmpeg/libavformat/libavformat.a .
mv ffmpeg/libavutil/libavutil.a .
mv ffmpeg/libswresample/libswresample.a .
mv ffmpeg/libswscale/libswscale.a .
# onion
cd onion
mkdir build 2> /dev/null
cd build
cmake -DONION_USE_SSL=false -DONION_USE_PAM=false -DONION_USE_PNG=false -DONION_USE_JPEG=false \
-DONION_USE_JPEG=false -DONION_USE_XML2=false -DONION_USE_SYSTEMD=false -DONION_USE_SQLITE3=false \
-DONION_USE_REDIS=false -DONION_USE_GC=false -DONION_USE_TESTS=false -DONION_EXAMPLES=false \
-DONION_USE_BINDINGS_CPP=false ..
make -j $THREADS
cd ../..
mv onion/build/src/onion/libonion_static.a .
#bzip2
cd bzip2-1.0.6
make -j $THREADS
cd ..
mv bzip2-1.0.6/libbz2.a .
# magic
cd libmagic
./autogen.sh
./configure --enable-static --disable-shared
make -j $THREADS
cd ..
mv libmagic/src/.libs/libmagic.a .
# tesseract
cd tesseract
mkdir build
cd build
cmake -DSTATIC=on -DBUILD_TRAINING_TOOLS=off ..
make -j $THREADS
cd ../..
mv tesseract/build/libtesseract.a .
# leptonica
cd leptonica
./autogen.sh
./configure --without-zlib --without-jpeg --without-giflib \
--without-giflib --without-libwebp --without-libwebpmux --without-libopenjpeg \
--enable-static --disable-shared
make -j $THREADS
cd ..
mv leptonica/src/.libs/liblept.a .
# tiff
cd libtiff
./autogen.sh
./configure --enable-static --disable-shared --disable-lzw --disable-jpeg --disable-webp \
--disable-lzma --disable-zstd --disable-jbig
make -j $THREADS
cd ..
mv libtiff/libtiff/.libs/libtiff.a .

View File

@@ -1,6 +1,9 @@
import json
files = [
"schema/mappings.json",
"schema/settings.json",
"schema/pipeline.json",
]
@@ -9,6 +12,7 @@ def clean(filepath):
for file in files:
with open(file, "rb") as f:
data = f.read()
with open(file, "r") as f:
data = json.dumps(json.load(f), separators=(",", ":")).encode()
data += b'\0'
print("char %s[%d] = {%s};" % (clean(file), len(data), ",".join(str(int(b)) for b in data)))

View File

@@ -2,14 +2,18 @@ application/arj, arj
application/base64, mme
application/binhex, hqx
application/book, boo|book
application/CDFV2-corrupt,
application/CDFV2, sdv
application/clariscad, ccad
application/commonground, dp
application/csv,
application/dicom, dcm
application/drafting, drw
application/epub+zip, epub
application/freeloader, frl
application/futuresplash, spl
application/groupwise, vew
application/gzip, gz
application/gzip, gz|tgz
application/hta, hta
application/i-deas, unv
application/iges, iges|igs
@@ -17,7 +21,6 @@ application/inf, inf
application/java-archive, jar
application/java, class
application/javascript,
application/x-archive, a
application/json, json
application/marc, mrc
application/mbedlet, mbd
@@ -27,7 +30,9 @@ application/msword, doc|dot|w6w|wiz|word
application/netmc, mcp
application/octet-stream, bin|dump|gpg
application/oda, oda
application/ogg, ogv
application/pdf, pdf
application/pgp-keys,
application/pgp-signature, pgp
application/pkcs7-signature, p7s
application/pkix-cert, cer|crt
@@ -43,6 +48,10 @@ application/vda, vda
application/vnd.fdf, fdf
application/vnd.font-fontforge-sfd, sfd
application/vnd.hp-hpgl, hgl|hpg|hpgl
application/vnd.iccprofile, icm
application/vnd.iccprofile, icm
application/vnd.lotus-1-2-3,
application/vnd.ms-cab-compressed, cab
application/vnd.ms-excel, xlb|xlc|xll|xlm|xls|xlw
application/vnd.ms-fontobject, eot
application/vnd.ms-opentype, otf
@@ -54,45 +63,75 @@ application/vnd.ms-project, mpp
application/vnd.oasis.opendocument.base, odb
application/vnd.oasis.opendocument.formula, odf
application/vnd.oasis.opendocument.graphics, odg
application/vnd.oasis.opendocument.presentation, odp
application/vnd.oasis.opendocument.spreadsheet, ods
application/vnd.oasis.opendocument.text, odt
application/vnd.openxmlformats-officedocument.presentationml.presentation, pptx
application/vnd.openxmlformats-officedocument.spreadsheetml.sheet, xlsx
application/vnd.openxmlformats-officedocument.wordprocessingml.document, docx
application/vnd.symbian.install,
application/vnd.tcpdump.pcap, pcap
application/vnd.wap.wmlc, wmlc
application/vnd.wap.wmlscriptc, wmlsc
application/vnd.xara, web
application/vocaltec-media-desc, vmd
application/vocaltec-media-file, vmf
application/warc, warc
application/winhelp, hlp
application/wordperfect6.0, w60
application/wordperfect6.1, w61
application/wordperfect, wp|wp5|wp6|wpd
application/x-123, wk1
application/x-7z-compressed, 7z
application/x-aim, aim
application/x-apple-diskimage,
application/x-arc,
application/x-archive, a
application/x-atari-7800-rom, a78
application/x-authorware-bin, aab
application/x-authorware-map, aam
application/x-authorware-seg, aas
application/x-avira-qua,
application/x-bcpio, bcpio
application/x-bittorrent, torrent
application/x-bsh, bsh
application/x-bytecode.python, pyc
application/x-bzip2, boz|bz2
application/x-bzip, bz
application/x-cbr, cbr
application/x-cbz, cbz
application/x-cdlink, vcd
application/x-chat, cha|chat
application/x-chrome-extension,
application/x-cocoa, cco
application/x-conference, nsc
application/x-coredump,
application/x-cpio, cpio
application/x-dbf, dbf
application/x-dbt,
application/x-debian-package, deb
application/x-deepv, deepv
application/x-director, dcr|dir|dxr
application/x-director, dir|dxr
application/x-dmp, dmp
application/x-dosdriver,
application/x-dosexec, dll
application/x-dvi, dvi
application/x-elc, elc
application/x-empty,
application/x-envoy, env|evy
application/x-esrehber, es
application/x-excel, xla|xld|xlk|xlt|xlv
application/x-executable, exe
application/x-font-gdos,
application/x-font-pf2, pf2
application/x-font-pfm, pfm
application/x-font-sfn,
application/x-font-ttf, ttf|ttc
application/x-fptapplication/x-dbt,
application/x-freelance, pre
application/x-gamecube-rom,
application/x-gdbm,
application/x-gettext-translation,
application/x-git,
application/x-gsp, gsp
application/x-gss, gss
@@ -102,46 +141,68 @@ application/x-hdf, hdf
application/x-helpfile, help
application/x-httpd-imap, imap
application/x-ima, ima
application/x-innosetup,
application/x-internett-signup, ins
application/x-inventor, iv
application/x-ip2, ip
application/x-java-applet,
application/x-java-commerce, jcm
application/x-java-image,
application/x-java-jmod, jmod
application/x-java-keystore,
application/x-kdelnk,
application/x-koan, skd|skm|skp|skt
application/x-latex, latex|ltx
application/x-livescreen, ivy
application/x-lotus, wq1
application/x-lz4+json, jsonlz4
application/x-lz4, lz4
application/x-lz4, lz4
application/x-lzh-compressed,
application/x-lzh, lzh
application/x-lzip, lz
application/x-lzma, lzma
application/x-lzop, lzo
application/x-lzx, lzx
application/x-mach-binary, jnilib|dylib
application/x-mach-executable,
application/x-magic-cap-package-1.0, mc$
application/x-mathcad, mcd
application/x-maxis-dbpf,
application/x-meme, mm
application/x-midi, midi
application/x-mif, mif
application/x-mix-transfer, nix
application/xml, opf
application/x-mobipocket-ebook, mobi
application/vnd.amazon.mobi8-ebook, azw|azw3
application/x-msaccess, accdb
application/x-ms-compress-szdd, fon
application/x-ms-pdb, pdb
application/x-ms-reader, lit
application/x-n64-rom, z64
application/x-navi-animation, ani
application/x-navidoc, nvd
application/x-navimap, map
application/x-navistyle, stl
application/x-nes-rom, nes
application/x-netcdf, cdf|nc
application/x-newton-compatible-pkg, pkg
application/x-nintendo-ds-rom,
application/x-object, o
application/x-omcdatamaker, omcd
application/x-omc, omc
application/x-omcregerator, omcr
application/x-pagemaker, pm4|pm5
application/x-pcl, pcl
application/x-pgp-keyring,
application/x-pixclscript, plx
application/x-pkcs7-certreqresp, p7r
application/x-pkcs7-signature, p7a
application/x-project, mpc|mpt|mpv|mpx
application/x-qpro, wb1
application/x-rar, rar
application/x-rpm, rpm
application/x-sdp, sdp
application/x-sea, sea
application/x-seelogo, sl
@@ -149,12 +210,17 @@ application/x-setupscript,
application/x-sharedlib, so
application/x-shar, shar
application/x-shockwave-flash, swf
application/x-snappy-framed,
application/x-sprite, spr|sprite
application/x-sqlite3,
application/x-stargallery-thm,
application/x-stuffit, sit
application/x-sv4cpio, sv4cpio
application/x-sv4crc, sv4crc
application/x-tar, tar
application/x-tbook, sbk|tbk
application/x-terminfo,
application/x-terminfo2,
application/x-texinfo, texi|texinfo
application/x-tex-tfm, tfm
application/x-ustar, ustar
@@ -163,16 +229,22 @@ application/x-vnd.audioexplosion.mzz, mzz
application/x-vnd.ls-xpix, xpix
application/x-vrml, vrml
application/x-wais-source, src|wsrc
application/x-wine-extension-ini,
application/x-wintalk, wtk
application/x-world, svr
application/x-wri, wri
application/x-x509-ca-cert, der
application/x-xz, xz
application/x-zip,
application/x-zstd, zst
application/zip, zip
application/zlib, z
!audio/basic, au
audio/it, it
audio/make, funk|my|pfunk
audio/midi, kar
audio/mid, rmi
audio/mp4, m4b
audio/mpeg, m2a|mpa
audio/ogg, ogg
audio/s3m, s3m
@@ -180,7 +252,10 @@ audio/tsp-audio, tsi
audio/tsplayer, tsp
audio/vnd.qcelp, qcp
audio/voxware, vox
audio/x-aiff, aiff|aif
audio/x-flac, flac
audio/x-gsm, gsd|gsm
audio/x-hx-aac-adts,
audio/x-jam, jam
audio/x-liveaudio, lam
audio/x-m4a, m4a
@@ -194,17 +269,24 @@ audio/x-nspaudio, lma
audio/x-pn-realaudio, ram|rm|rmm|rmp
audio/x-psid, sid
audio/x-realaudio, ra
audio/x-s3m,
audio/x-twinvq-plugin, vqe|vql
audio/x-twinvq, vqf
audio/x-voc, voc
audio/x-wav, wav
!audio/x-xbox360-executable, xex
!audio/x-xbox-executable, xbe
font/otf,
font/sfnt,
font/woff2, woff2
font/woff, woff
image/bmp,
image/cmu-raster, rast
image/fif, fif
image/florian, flo|turbot
image/g3fax, g3
image/gif, gif
image/heic, heic
image/ief, ief|iefs
image/jpeg, jfif|jfif-tbnl|jpe|jpeg|jpg
image/jutvision, jut
@@ -213,6 +295,9 @@ image/pict, pic|pict
image/png, png|x-png
!image/svg, svg
!image/svg+xml,
image/tiff,
!image/vnd.adobe.photoshop, psd
!image/vnd.djvu, djvu
image/vnd.fpx, fpx
image/vnd.microsoft.icon,
image/vnd.rn-realflash, rf
@@ -220,9 +305,15 @@ image/vnd.rn-realpix, rp
image/vnd.wap.wbmp, wbmp
image/vnd.xiff, xif
image/webp, webp
image/wmf,
image/x-3ds, 3ds
image/x-award-bioslogo,
image/x-cmu-raster, ras
image/x-cur, tga
image/x-dwg, dwg|dxf|svf
image/x-eps,
image/x-exr, exr
image/x-gem,
image/x-icns,
!image/x-icon, ico
image/x-jg, art
@@ -236,32 +327,31 @@ image/x-portable-graymap, pgm
image/x-portable-pixmap, ppm
image/x-quicktime, qif|qti|qtif
image/x-rgb, rgb
image/x-tga,
image/x-tiff, tif|tiff
image/tiff,
image/x-win-bitmap,
!image/x-xcf, xcf
!image/x-xpixmap, xpm
image/x-xwindowdump, xwd
message/news,
message/rfc822, mht|mhtml|mime
model/vnd.dwf, dwf
model/vnd.gdl, gdl
model/vnd.gs.gdl, gdsl
model/vrml, wrz
model/x-pov, pov
text/asp, asp
text/css, css
text/x-sass, sass
text/x-scss, scss
text/html, acgi|htm|html|htmls|htx|shtml
text/javascript, js
text/mcf, mcf
text/pascal, pas
text/plain, com|cmd|conf|def|g|idc|list|lst|mar|sdml|text|txt|md|groovy|license|properties|desktop|ini|rst|cmake|ipynb|readme|less|lo|go|yml|d|cs|hpp|srt|nfo|sfv|m3u|csv|eml
text/PGP,
text/plain, com|cmd|conf|def|g|idc|list|lst|mar|sdml|text|txt|md|groovy|license|properties|desktop|ini|rst|cmake|ipynb|readme|less|lo|go|yml|d|cs|hpp|srt|nfo|sfv|m3u|csv|eml|make|log|markdown|yaml
application/vnd.coffeescript, coffee
text/richtext, rt|rtf|rtx
text/rtf,
text/scriplet, wsc
text/x-awk, awk
!video/x-jng, jng
video/x-mng, mng
image/x-cur, tga
image/x-xwindowdump, xwd
!image/vnd.adobe.photoshop, psd
text/tab-separated-values, tsv
text/troff, man|me|ms|roff|t|tr
text/uri-list, uji|unis|uri|uris
@@ -273,6 +363,7 @@ text/webviewhtml, htt
text/x-Algol68,
text/x-asm, asm|s
text/x-audiosoft-intra, aip
text/x-awk, awk
text/x-bcpl,
text/x-c, c|cc|h
text/x-c++, cpp|cxx|c++
@@ -287,23 +378,31 @@ text/x-makefile, am|mak
text/xml, xml|pom|iml|plist
text/x-m, m
text/x-msdos-batch, bat
text/x-ms-regedit, reg
text/x-objective-c,
text/x-pascal, p
text/x-perl, pl
text/x-php, php
text/x-po, po
text/x-python, py
text/x-ruby, rb
text/x-sass, sass
text/x-scss, scss
text/x-server-parsed-html, ssi
text/x-setext, etx
text/x-sgml, sgm|sgml
text/x-shellscript, sh
text/x-speech, talk
text/x-tcl,
text/x-tex, tex
text/x-uil, uil
text/x-uuencode, uue
text/x-vcalendar, vcs
text/x-vcard, vcf
video/animaflex, afl
video/avi, avi
video/avs-video, avs
video/MP2T,
video/mp4, mp4
video/mpeg, m1v|m2v|mpe|mpeg|mpg
video/quicktime, moov|mov|qt
@@ -318,101 +417,36 @@ video/x-atomic3d-feature, fmf
video/x-dl, dl
video/x-dv, dif|dv
video/x-fli, fli
video/x-flv, flv
video/x-isvideo, isu
!video/x-jng, jng
video/x-m4v, m4v
video/x-matroska, mkv
video/x-mng, mng
video/x-motion-jpeg, mjpg
video/x-ms-asf, asf|asx
video/x-ms-asf, asf|asx|wmv
video/x-msvideo, divx
video/x-qtc, qtc
video/x-sgi-movie, movie|mv
application/x-7z-compressed, 7z
application/vnd.openxmlformats-officedocument.wordprocessingml.document, docx
text/x-po, po
application/x-rpm, rpm
application/x-debian-package, deb
application/vnd.iccprofile, icm
application/dicom, dcm
image/x-exr, exr
application/vnd.iccprofile, icm
video/x-matroska, mkv
application/x-empty,
model/vnd.gdl, gdl
model/vnd.gs.gdl, gdsl
font/woff, woff
font/woff2, woff2
application/epub+zip, epub
application/x-mobipocket-ebook, mobi
audio/x-flac, flac
application/x-rar, rar
video/x-msvideo, divx
video/x-flv, flv
application/x-kdelnk,
text/x-tcl,
application/ogg, ogv
application/vnd.openxmlformats-officedocument.spreadsheetml.sheet, xlsx
application/vnd.ms-cab-compressed, cab
audio/mp4, m4b
!image/vnd.djvu, djvu
application/x-ms-reader, lit
application/CDFV2-corrupt,
text/x-vcard, vcf
application/x-innosetup,
application/winhelp, hlp
image/x-tga,
application/x-wine-extension-ini,
application/x-cbz, cbz
application/x-cbr, cbr
application/x-ms-compress-szdd, fon
application/x-atari-7800-rom, a78
application/x-nes-rom, nes
application/x-font-pfm, pfm
application/x-gettext-translation,
image/wmf,
application/pgp-keys,
image/x-3ds, 3ds
application/x-lz4, lz4
application/vnd.openxmlformats-officedocument.presentationml.presentation, pptx
application/vnd.oasis.opendocument.presentation, odp
application/x-msaccess, accdb
application/vnd.oasis.opendocument.spreadsheet, ods
audio/x-aiff, aiff|aif
text/x-ms-regedit, reg
application/x-gamecube-rom,
application/x-nintendo-ds-rom,
text/x-objective-c,
application/x-font-gdos,
application/x-apple-diskimage,
application/x-zstd, zst
video/x-m4v, m4v
message/news,
application/vnd.symbian.install,
application/x-lzh-compressed,
application/x-dosdriver,
application/vnd.tcpdump.pcap, pcap
x-epoc/x-sisx-app,
application/x-avira-qua,
video/MP2T,
application/x-snappy-framed,
application/x-lz4+json, jsonlz4
application/x-dmp, dmp
application/zlib, z
application/x-pgp-keyring,
application/x-gdbm,
application/x-font-pf2, pf2
application/x-zip,
application/x-coredump,
application/x-java-jmod, jmod
application/x-terminfo,
application/x-terminfo2,
application/x-arc,
application/vnd.lotus-1-2-3,
image/x-win-bitmap,
application/x-maxis-dbpf,
text/PGP,
audio/x-hx-aac-adts,
application/x-chrome-extension,
image/heic, heic
image/x-gem,
application/x-lzma, lzma
application/warc, warc
application/x-lz4, lz4
application/x-lzip, lz
application/x-lzop, lzo
application/x-zstd-dictionary,
application/vnd.ms-outlook, msg
image/x-olympus-orf, orf
image/x-nikon-nef, nef
image/x-fuji-raf, raf
image/x-panasonic-raw, rw2|raw
image/x-adobe-dng, dng
image/x-canon-cr2, cr2
image/x-canon-crw, crw
image/x-dcraw,
image/x-kodak-dcr, dcr
image/x-kodak-k25, k25
image/x-kodak-kdc, kdc
image/x-minolta-mrw, mrw
image/x-pentax-pef, pef
image/x-sigma-x3f, xf3
image/x-sony-arw, arw
image/x-sony-sr2, sr2
image/x-sony-srf, srf
image/x-epson-erf, erf
sist2/sidecar, s2meta
1 application/arj arj
2 application/base64 mme
3 application/binhex hqx
4 application/book boo|book
5 application/CDFV2-corrupt
6 application/CDFV2 sdv
7 application/clariscad ccad
8 application/commonground dp
9 application/csv
10 application/dicom dcm
11 application/drafting drw
12 application/epub+zip epub
13 application/freeloader frl
14 application/futuresplash spl
15 application/groupwise vew
16 application/gzip gz gz|tgz
17 application/hta hta
18 application/i-deas unv
19 application/iges iges|igs
21 application/java-archive jar
22 application/java class
23 application/javascript
application/x-archive a
24 application/json json
25 application/marc mrc
26 application/mbedlet mbd
30 application/netmc mcp
31 application/octet-stream bin|dump|gpg
32 application/oda oda
33 application/ogg ogv
34 application/pdf pdf
35 application/pgp-keys
36 application/pgp-signature pgp
37 application/pkcs7-signature p7s
38 application/pkix-cert cer|crt
48 application/vnd.fdf fdf
49 application/vnd.font-fontforge-sfd sfd
50 application/vnd.hp-hpgl hgl|hpg|hpgl
51 application/vnd.iccprofile icm
52 application/vnd.iccprofile icm
53 application/vnd.lotus-1-2-3
54 application/vnd.ms-cab-compressed cab
55 application/vnd.ms-excel xlb|xlc|xll|xlm|xls|xlw
56 application/vnd.ms-fontobject eot
57 application/vnd.ms-opentype otf
63 application/vnd.oasis.opendocument.base odb
64 application/vnd.oasis.opendocument.formula odf
65 application/vnd.oasis.opendocument.graphics odg
66 application/vnd.oasis.opendocument.presentation odp
67 application/vnd.oasis.opendocument.spreadsheet ods
68 application/vnd.oasis.opendocument.text odt
69 application/vnd.openxmlformats-officedocument.presentationml.presentation pptx
70 application/vnd.openxmlformats-officedocument.spreadsheetml.sheet xlsx
71 application/vnd.openxmlformats-officedocument.wordprocessingml.document docx
72 application/vnd.symbian.install
73 application/vnd.tcpdump.pcap pcap
74 application/vnd.wap.wmlc wmlc
75 application/vnd.wap.wmlscriptc wmlsc
76 application/vnd.xara web
77 application/vocaltec-media-desc vmd
78 application/vocaltec-media-file vmf
79 application/warc warc
80 application/winhelp hlp
81 application/wordperfect6.0 w60
82 application/wordperfect6.1 w61
83 application/wordperfect wp|wp5|wp6|wpd
84 application/x-123 wk1
85 application/x-7z-compressed 7z
86 application/x-aim aim
87 application/x-apple-diskimage
88 application/x-arc
89 application/x-archive a
90 application/x-atari-7800-rom a78
91 application/x-authorware-bin aab
92 application/x-authorware-map aam
93 application/x-authorware-seg aas
94 application/x-avira-qua
95 application/x-bcpio bcpio
96 application/x-bittorrent torrent
97 application/x-bsh bsh
98 application/x-bytecode.python pyc
99 application/x-bzip2 boz|bz2
100 application/x-bzip bz
101 application/x-cbr cbr
102 application/x-cbz cbz
103 application/x-cdlink vcd
104 application/x-chat cha|chat
105 application/x-chrome-extension
106 application/x-cocoa cco
107 application/x-conference nsc
108 application/x-coredump
109 application/x-cpio cpio
110 application/x-dbf dbf
111 application/x-dbt
112 application/x-debian-package deb
113 application/x-deepv deepv
114 application/x-director dcr|dir|dxr dir|dxr
115 application/x-dmp dmp
116 application/x-dosdriver
117 application/x-dosexec dll
118 application/x-dvi dvi
119 application/x-elc elc
120 application/x-empty
121 application/x-envoy env|evy
122 application/x-esrehber es
123 application/x-excel xla|xld|xlk|xlt|xlv
124 application/x-executable exe
125 application/x-font-gdos
126 application/x-font-pf2 pf2
127 application/x-font-pfm pfm
128 application/x-font-sfn
129 application/x-font-ttf ttf|ttc
130 application/x-fptapplication/x-dbt
131 application/x-freelance pre
132 application/x-gamecube-rom
133 application/x-gdbm
134 application/x-gettext-translation
135 application/x-git
136 application/x-gsp gsp
137 application/x-gss gss
141 application/x-helpfile help
142 application/x-httpd-imap imap
143 application/x-ima ima
144 application/x-innosetup
145 application/x-internett-signup ins
146 application/x-inventor iv
147 application/x-ip2 ip
148 application/x-java-applet
149 application/x-java-commerce jcm
150 application/x-java-image
151 application/x-java-jmod jmod
152 application/x-java-keystore
153 application/x-kdelnk
154 application/x-koan skd|skm|skp|skt
155 application/x-latex latex|ltx
156 application/x-livescreen ivy
157 application/x-lotus wq1
158 application/x-lz4+json jsonlz4
159 application/x-lz4 lz4
160 application/x-lz4 lz4
161 application/x-lzh-compressed
162 application/x-lzh lzh
163 application/x-lzip lz
164 application/x-lzma lzma
165 application/x-lzop lzo
166 application/x-lzx lzx
167 application/x-mach-binary jnilib|dylib
168 application/x-mach-executable
169 application/x-magic-cap-package-1.0 mc$
170 application/x-mathcad mcd
171 application/x-maxis-dbpf
172 application/x-meme mm
173 application/x-midi midi
174 application/x-mif mif
175 application/x-mix-transfer nix
176 application/xml opf
177 application/x-mobipocket-ebook mobi
178 application/vnd.amazon.mobi8-ebook azw|azw3
179 application/x-msaccess accdb
180 application/x-ms-compress-szdd fon
181 application/x-ms-pdb pdb
182 application/x-ms-reader lit
183 application/x-n64-rom z64
184 application/x-navi-animation ani
185 application/x-navidoc nvd
186 application/x-navimap map
187 application/x-navistyle stl
188 application/x-nes-rom nes
189 application/x-netcdf cdf|nc
190 application/x-newton-compatible-pkg pkg
191 application/x-nintendo-ds-rom
192 application/x-object o
193 application/x-omcdatamaker omcd
194 application/x-omc omc
195 application/x-omcregerator omcr
196 application/x-pagemaker pm4|pm5
197 application/x-pcl pcl
198 application/x-pgp-keyring
199 application/x-pixclscript plx
200 application/x-pkcs7-certreqresp p7r
201 application/x-pkcs7-signature p7a
202 application/x-project mpc|mpt|mpv|mpx
203 application/x-qpro wb1
204 application/x-rar rar
205 application/x-rpm rpm
206 application/x-sdp sdp
207 application/x-sea sea
208 application/x-seelogo sl
210 application/x-sharedlib so
211 application/x-shar shar
212 application/x-shockwave-flash swf
213 application/x-snappy-framed
214 application/x-sprite spr|sprite
215 application/x-sqlite3
216 application/x-stargallery-thm
217 application/x-stuffit sit
218 application/x-sv4cpio sv4cpio
219 application/x-sv4crc sv4crc
220 application/x-tar tar
221 application/x-tbook sbk|tbk
222 application/x-terminfo
223 application/x-terminfo2
224 application/x-texinfo texi|texinfo
225 application/x-tex-tfm tfm
226 application/x-ustar ustar
229 application/x-vnd.ls-xpix xpix
230 application/x-vrml vrml
231 application/x-wais-source src|wsrc
232 application/x-wine-extension-ini
233 application/x-wintalk wtk
234 application/x-world svr
235 application/x-wri wri
236 application/x-x509-ca-cert der
237 application/x-xz xz
238 application/x-zip
239 application/x-zstd zst
240 application/zip zip
241 application/zlib z
242 !audio/basic au
243 audio/it it
244 audio/make funk|my|pfunk
245 audio/midi kar
246 audio/mid rmi
247 audio/mp4 m4b
248 audio/mpeg m2a|mpa
249 audio/ogg ogg
250 audio/s3m s3m
252 audio/tsplayer tsp
253 audio/vnd.qcelp qcp
254 audio/voxware vox
255 audio/x-aiff aiff|aif
256 audio/x-flac flac
257 audio/x-gsm gsd|gsm
258 audio/x-hx-aac-adts
259 audio/x-jam jam
260 audio/x-liveaudio lam
261 audio/x-m4a m4a
269 audio/x-pn-realaudio ram|rm|rmm|rmp
270 audio/x-psid sid
271 audio/x-realaudio ra
272 audio/x-s3m
273 audio/x-twinvq-plugin vqe|vql
274 audio/x-twinvq vqf
275 audio/x-voc voc
276 audio/x-wav wav
277 !audio/x-xbox360-executable xex
278 !audio/x-xbox-executable xbe
279 font/otf
280 font/sfnt
281 font/woff2 woff2
282 font/woff woff
283 image/bmp
284 image/cmu-raster rast
285 image/fif fif
286 image/florian flo|turbot
287 image/g3fax g3
288 image/gif gif
289 image/heic heic
290 image/ief ief|iefs
291 image/jpeg jfif|jfif-tbnl|jpe|jpeg|jpg
292 image/jutvision jut
295 image/png png|x-png
296 !image/svg svg
297 !image/svg+xml
298 image/tiff
299 !image/vnd.adobe.photoshop psd
300 !image/vnd.djvu djvu
301 image/vnd.fpx fpx
302 image/vnd.microsoft.icon
303 image/vnd.rn-realflash rf
305 image/vnd.wap.wbmp wbmp
306 image/vnd.xiff xif
307 image/webp webp
308 image/wmf
309 image/x-3ds 3ds
310 image/x-award-bioslogo
311 image/x-cmu-raster ras
312 image/x-cur tga
313 image/x-dwg dwg|dxf|svf
314 image/x-eps
315 image/x-exr exr
316 image/x-gem
317 image/x-icns
318 !image/x-icon ico
319 image/x-jg art
327 image/x-portable-pixmap ppm
328 image/x-quicktime qif|qti|qtif
329 image/x-rgb rgb
330 image/x-tga
331 image/x-tiff tif|tiff
332 image/tiff image/x-win-bitmap
333 !image/x-xcf xcf
334 !image/x-xpixmap xpm
335 image/x-xwindowdump xwd
336 message/news
337 message/rfc822 mht|mhtml|mime
338 model/vnd.dwf dwf
339 model/vnd.gdl gdl
340 model/vnd.gs.gdl gdsl
341 model/vrml wrz
342 model/x-pov pov
343 text/asp asp
344 text/css css
text/x-sass sass
text/x-scss scss
345 text/html acgi|htm|html|htmls|htx|shtml
346 text/javascript js
347 text/mcf mcf
348 text/pascal pas
349 text/plain text/PGP com|cmd|conf|def|g|idc|list|lst|mar|sdml|text|txt|md|groovy|license|properties|desktop|ini|rst|cmake|ipynb|readme|less|lo|go|yml|d|cs|hpp|srt|nfo|sfv|m3u|csv|eml
350 text/plain com|cmd|conf|def|g|idc|list|lst|mar|sdml|text|txt|md|groovy|license|properties|desktop|ini|rst|cmake|ipynb|readme|less|lo|go|yml|d|cs|hpp|srt|nfo|sfv|m3u|csv|eml|make|log|markdown|yaml
351 application/vnd.coffeescript coffee
352 text/richtext rt|rtf|rtx
353 text/rtf
354 text/scriplet wsc
text/x-awk awk
!video/x-jng jng
video/x-mng mng
image/x-cur tga
image/x-xwindowdump xwd
!image/vnd.adobe.photoshop psd
355 text/tab-separated-values tsv
356 text/troff man|me|ms|roff|t|tr
357 text/uri-list uji|unis|uri|uris
363 text/x-Algol68
364 text/x-asm asm|s
365 text/x-audiosoft-intra aip
366 text/x-awk awk
367 text/x-bcpl
368 text/x-c c|cc|h
369 text/x-c++ cpp|cxx|c++
378 text/xml xml|pom|iml|plist
379 text/x-m m
380 text/x-msdos-batch bat
381 text/x-ms-regedit reg
382 text/x-objective-c
383 text/x-pascal p
384 text/x-perl pl
385 text/x-php php
386 text/x-po po
387 text/x-python py
388 text/x-ruby rb
389 text/x-sass sass
390 text/x-scss scss
391 text/x-server-parsed-html ssi
392 text/x-setext etx
393 text/x-sgml sgm|sgml
394 text/x-shellscript sh
395 text/x-speech talk
396 text/x-tcl
397 text/x-tex tex
398 text/x-uil uil
399 text/x-uuencode uue
400 text/x-vcalendar vcs
401 text/x-vcard vcf
402 video/animaflex afl
403 video/avi avi
404 video/avs-video avs
405 video/MP2T
406 video/mp4 mp4
407 video/mpeg m1v|m2v|mpe|mpeg|mpg
408 video/quicktime moov|mov|qt
417 video/x-dl dl
418 video/x-dv dif|dv
419 video/x-fli fli
420 video/x-flv flv
421 video/x-isvideo isu
422 !video/x-jng jng
423 video/x-m4v m4v
424 video/x-matroska mkv
425 video/x-mng mng
426 video/x-motion-jpeg mjpg
427 video/x-ms-asf asf|asx asf|asx|wmv
428 video/x-msvideo divx
429 video/x-qtc qtc
430 video/x-sgi-movie movie|mv
application/x-7z-compressed 7z
application/vnd.openxmlformats-officedocument.wordprocessingml.document docx
text/x-po po
application/x-rpm rpm
application/x-debian-package deb
application/vnd.iccprofile icm
application/dicom dcm
image/x-exr exr
application/vnd.iccprofile icm
video/x-matroska mkv
application/x-empty
model/vnd.gdl gdl
model/vnd.gs.gdl gdsl
font/woff woff
font/woff2 woff2
application/epub+zip epub
application/x-mobipocket-ebook mobi
audio/x-flac flac
application/x-rar rar
video/x-msvideo divx
video/x-flv flv
application/x-kdelnk
text/x-tcl
application/ogg ogv
application/vnd.openxmlformats-officedocument.spreadsheetml.sheet xlsx
application/vnd.ms-cab-compressed cab
audio/mp4 m4b
!image/vnd.djvu djvu
application/x-ms-reader lit
application/CDFV2-corrupt
text/x-vcard vcf
application/x-innosetup
application/winhelp hlp
image/x-tga
application/x-wine-extension-ini
application/x-cbz cbz
application/x-cbr cbr
application/x-ms-compress-szdd fon
application/x-atari-7800-rom a78
application/x-nes-rom nes
application/x-font-pfm pfm
application/x-gettext-translation
image/wmf
application/pgp-keys
image/x-3ds 3ds
application/x-lz4 lz4
application/vnd.openxmlformats-officedocument.presentationml.presentation pptx
application/vnd.oasis.opendocument.presentation odp
application/x-msaccess accdb
application/vnd.oasis.opendocument.spreadsheet ods
audio/x-aiff aiff|aif
text/x-ms-regedit reg
application/x-gamecube-rom
application/x-nintendo-ds-rom
text/x-objective-c
application/x-font-gdos
application/x-apple-diskimage
application/x-zstd zst
video/x-m4v m4v
message/news
application/vnd.symbian.install
application/x-lzh-compressed
application/x-dosdriver
application/vnd.tcpdump.pcap pcap
431 x-epoc/x-sisx-app
432 application/x-avira-qua application/x-zstd-dictionary
433 video/MP2T application/vnd.ms-outlook msg
434 application/x-snappy-framed image/x-olympus-orf orf
435 application/x-lz4+json image/x-nikon-nef jsonlz4 nef
436 application/x-dmp image/x-fuji-raf dmp raf
437 application/zlib image/x-panasonic-raw z rw2|raw
438 application/x-pgp-keyring image/x-adobe-dng dng
439 application/x-gdbm image/x-canon-cr2 cr2
440 application/x-font-pf2 image/x-canon-crw pf2 crw
441 application/x-zip image/x-dcraw
442 application/x-coredump image/x-kodak-dcr dcr
443 application/x-java-jmod image/x-kodak-k25 jmod k25
444 application/x-terminfo image/x-kodak-kdc kdc
445 application/x-terminfo2 image/x-minolta-mrw mrw
446 application/x-arc image/x-pentax-pef pef
447 application/vnd.lotus-1-2-3 image/x-sigma-x3f xf3
448 image/x-win-bitmap image/x-sony-arw arw
449 application/x-maxis-dbpf image/x-sony-sr2 sr2
450 text/PGP image/x-sony-srf srf
451 audio/x-hx-aac-adts image/x-epson-erf erf
452 application/x-chrome-extension sist2/sidecar s2meta
image/heic heic
image/x-gem
application/x-lzma lzma
application/warc warc
application/x-lz4 lz4
application/x-lzip lz
application/x-lzop lzo

View File

@@ -3,6 +3,7 @@ noparse = set()
ext_in_hash = set()
major_mime = {
"sist2": 0,
"model": 1,
"example": 2,
"message": 3,
@@ -18,7 +19,6 @@ major_mime = {
pdf = (
"application/pdf",
"application/x-cbz",
"application/epub+zip",
"application/vnd.ms-xpsdocument",
)
@@ -62,6 +62,40 @@ doc = (
"application/vnd.openxmlformats-officedocument.presentationml.presentation"
)
mobi = (
"application/x-mobipocket-ebook",
"application/vnd.amazon.mobi8-ebook"
)
markup = (
"text/xml",
"text/html",
"text/x-sgml"
)
raw = (
"image/x-olympus-orf",
"image/x-nikon-nef",
"image/x-fuji-raf",
"image/x-panasonic-raw",
"image/x-adobe-dng",
"image/x-canon-cr2",
"image/x-canon-crw",
"image/x-dcraw",
"image/x-kodak-dcr",
"image/x-kodak-k25",
"image/x-kodak-kdc",
"image/x-minolta-mrw",
"image/x-pentax-pef",
"image/x-sigma-x3f",
"image/x-sony-arw",
"image/x-sony-sr2",
"image/x-sony-srf",
"image/x-minolta-mrw",
"image/x-pentax-pef",
"image/x-epson-erf",
)
cnt = 1
@@ -82,8 +116,18 @@ def mime_id(mime):
mime_id += " | 0x08000000"
elif mime in doc:
mime_id += " | 0x04000000"
elif mime in mobi:
mime_id += " | 0x02000000"
elif mime in markup:
mime_id += " | 0x01000000"
elif mime in raw:
mime_id += " | 0x00800000"
elif mime == "application/x-empty":
cnt -= 1
return "1"
elif mime == "sist2/sidecar":
cnt -= 1
return "2"
return mime_id
@@ -91,7 +135,7 @@ def clean(t):
return t.replace("/", "_").replace(".", "_").replace("+", "_").replace("-", "_")
with open("mime.csv") as f:
with open("scripts/mime.csv") as f:
for l in f:
mime, ext_list = l.split(",")
if l.startswith("!"):
@@ -103,7 +147,7 @@ with open("mime.csv") as f:
print("// **Generated by mime.py**")
print("#ifndef MIME_GENERATED_C")
print("#define MIME_GENERATED_C")
print("#include <glib-2.0/glib.h>\n")
print("#include <glib.h>\n")
print("#include <stdlib.h>\n")
# Enum
print("enum mime {")

View File

@@ -1,10 +1,12 @@
files = [
"web/css/bundle.css",
"web/css/bundle_dark.css",
"web/js/bundle.js",
"web/img/sprite-skin-flat.png",
"web/img/sprite-skin-flat-dark.png",
"web/search.html",
"src/static/css/bundle.css",
"src/static/css/bundle_dark.css",
"src/static/js/bundle.js",
"src/static/js/search.js",
"src/static/img/sprite-skin-flat.png",
"src/static/img/sprite-skin-flat-dark.png",
"src/static/search.html",
"src/static/stats.html",
]

260
src/cli.c
View File

@@ -1,6 +1,5 @@
#include "cli.h"
#include "ctx.h"
#include <tesseract/capi.h>
#define DEFAULT_OUTPUT "index.sist2/"
@@ -10,10 +9,20 @@
#define DEFAULT_REWRITE_URL ""
#define DEFAULT_ES_URL "http://localhost:9200"
#define DEFAULT_ES_INDEX "sist2"
#define DEFAULT_BATCH_SIZE 100
#define DEFAULT_BIND_ADDR "localhost"
#define DEFAULT_PORT "4090"
#define DEFAULT_LISTEN_ADDRESS "localhost:4090"
#define DEFAULT_TREEMAP_THRESHOLD 0.0005
#define DEFAULT_MAX_MEM_BUFFER 2000
const char *TESS_DATAPATHS[] = {
"/usr/share/tessdata/",
"/usr/share/tesseract-ocr/tessdata/",
"./",
NULL
};
scan_args_t *scan_args_create() {
@@ -24,10 +33,18 @@ scan_args_t *scan_args_create() {
return args;
}
exec_args_t *exec_args_create() {
exec_args_t *args = calloc(sizeof(exec_args_t), 1);
return args;
}
void scan_args_destroy(scan_args_t *args) {
if (args->name != NULL) {
free(args->name);
}
if (args->incremental != NULL) {
free(args->incremental);
}
if (args->path != NULL) {
free(args->path);
}
@@ -39,6 +56,12 @@ void scan_args_destroy(scan_args_t *args) {
void index_args_destroy(index_args_t *args) {
//todo
if (args->es_mappings_path) {
free(args->es_mappings);
}
if (args->es_settings_path) {
free(args->es_settings);
}
free(args);
}
@@ -47,6 +70,10 @@ void web_args_destroy(web_args_t *args) {
free(args);
}
void exec_args_destroy(exec_args_t *args) {
free(args);
}
int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
if (argc < 2) {
fprintf(stderr, "Required positional argument: PATH.\n");
@@ -62,10 +89,10 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
}
if (args->incremental != NULL) {
abs_path = abspath(args->incremental);
args->incremental = abspath(args->incremental);
if (abs_path == NULL) {
fprintf(stderr, "File not found: %s\n", args->incremental);
return 1;
sist_log("main.c", SIST_WARNING, "Could not open original index! Disabled incremental scan feature.");
args->incremental = NULL;
}
}
@@ -107,7 +134,7 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
return 1;
}
if (args->depth < 0) {
if (args->depth <= 0) {
args->depth = G_MAXINT32;
} else {
args->depth += 1;
@@ -115,6 +142,10 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
if (args->name == NULL) {
args->name = g_path_get_basename(args->output);
} else {
char* tmp = malloc(strlen(args->name) + 1);
strcpy(tmp, args->name);
args->name = tmp;
}
if (args->rewrite_url == NULL) {
@@ -136,13 +167,53 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
if (args->tesseract_lang != NULL) {
TessBaseAPI *api = TessBaseAPICreate();
ret = TessBaseAPIInit3(api, TESS_DATAPATH, args->tesseract_lang);
char filename[128];
sprintf(filename, "%s.traineddata", args->tesseract_lang);
const char *path = find_file_in_paths(TESS_DATAPATHS, filename);
if (path == NULL) {
LOG_FATAL("cli.c", "Could not find tesseract language file!");
}
ret = TessBaseAPIInit3(api, path, args->tesseract_lang);
if (ret != 0) {
fprintf(stderr, "Could not initialize tesseract with lang '%s'\n", args->tesseract_lang);
return 1;
}
TessBaseAPIEnd(api);
TessBaseAPIDelete(api);
args->tesseract_path = path;
}
if (args->exclude_regex != NULL) {
const char *error;
int error_offset;
pcre *re = pcre_compile(args->exclude_regex, 0, &error, &error_offset, 0);
if (error != NULL) {
LOG_FATALF("cli.c", "pcre_compile returned error: %s (offset:%d)", error, error_offset)
}
pcre_extra *re_extra = pcre_study(re, 0, &error);
if (error != NULL) {
LOG_FATALF("cli.c", "pcre_study returned error: %s", error)
}
ScanCtx.exclude = re;
ScanCtx.exclude_extra = re_extra;
} else {
ScanCtx.exclude = NULL;
}
if (args->treemap_threshold_str == 0) {
args->treemap_threshold = DEFAULT_TREEMAP_THRESHOLD;
} else {
args->treemap_threshold = atof(args->treemap_threshold_str);
}
if (args->max_memory_buffer == 0) {
args->max_memory_buffer = DEFAULT_MAX_MEM_BUFFER;
}
LOG_DEBUGF("cli.c", "arg quality=%f", args->quality)
@@ -156,7 +227,40 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
LOG_DEBUGF("cli.c", "arg depth=%d", args->depth)
LOG_DEBUGF("cli.c", "arg path=%s", args->path)
LOG_DEBUGF("cli.c", "arg archive=%s", args->archive)
LOG_DEBUGF("cli.c", "arg ocr=%s", args->tesseract_lang)
LOG_DEBUGF("cli.c", "arg tesseract_lang=%s", args->tesseract_lang)
LOG_DEBUGF("cli.c", "arg tesseract_path=%s", args->tesseract_path)
LOG_DEBUGF("cli.c", "arg exclude=%s", args->exclude_regex)
LOG_DEBUGF("cli.c", "arg fast=%d", args->fast)
LOG_DEBUGF("cli.c", "arg treemap_threshold=%f", args->treemap_threshold)
LOG_DEBUGF("cli.c", "arg max_memory_buffer=%d", args->max_memory_buffer)
return 0;
}
int load_external_file(const char *file_path, char **dst) {
struct stat info;
int res = stat(file_path, &info);
if (res == -1) {
LOG_ERRORF("cli.c", "Error opening file '%s': %s\n", file_path, strerror(errno))
return 1;
}
int fd = open(file_path, O_RDONLY);
if (fd == -1) {
LOG_ERRORF("cli.c", "Error opening file '%s': %s\n", file_path, strerror(errno))
return 1;
}
*dst = malloc(info.st_size + 1);
res = read(fd, *dst, info.st_size);
if (res < 0) {
LOG_ERRORF("cli.c", "Error reading file '%s': %s\n", file_path, strerror(errno))
return 1;
}
*(*dst + info.st_size) = '\0';
close(fd);
return 0;
}
@@ -170,6 +274,13 @@ int index_args_validate(index_args_t *args, int argc, const char **argv) {
return 1;
}
if (args->threads == 0) {
args->threads = 1;
} else if (args->threads < 0) {
fprintf(stderr, "Invalid threads: %d\n", args->threads);
return 1;
}
char *index_path = abspath(argv[1]);
if (index_path == NULL) {
fprintf(stderr, "File not found: %s\n", argv[1]);
@@ -183,30 +294,26 @@ int index_args_validate(index_args_t *args, int argc, const char **argv) {
args->es_url = DEFAULT_ES_URL;
}
if (args->es_index == NULL) {
args->es_index = DEFAULT_ES_INDEX;
}
if (args->script_path != NULL) {
struct stat info;
int res = stat(args->script_path, &info);
if (res == -1) {
fprintf(stderr, "Error opening script file '%s': %s\n", args->script_path, strerror(errno));
if (load_external_file(args->script_path, &args->script) != 0) {
return 1;
}
int fd = open(args->script_path, O_RDONLY);
if (fd == -1) {
fprintf(stderr, "Error opening script file '%s': %s\n", args->script_path, strerror(errno));
return 1;
}
args->script = malloc(info.st_size + 1);
res = read(fd, args->script, info.st_size);
if (res == -1) {
fprintf(stderr, "Error reading script file '%s': %s\n", args->script_path, strerror(errno));
if (args->es_settings_path != NULL) {
if (load_external_file(args->es_settings_path, &args->es_settings) != 0) {
return 1;
}
}
*(args->script + info.st_size) = '\0';
close(fd);
if (args->es_mappings_path != NULL) {
if (load_external_file(args->es_mappings_path, &args->es_mappings) != 0) {
return 1;
}
}
if (args->batch_size == 0) {
@@ -214,10 +321,16 @@ int index_args_validate(index_args_t *args, int argc, const char **argv) {
}
LOG_DEBUGF("cli.c", "arg es_url=%s", args->es_url)
LOG_DEBUGF("cli.c", "arg es_index=%s", args->es_index)
LOG_DEBUGF("cli.c", "arg index_path=%s", args->index_path)
LOG_DEBUGF("cli.c", "arg script_path=%s", args->script_path)
LOG_DEBUGF("cli.c", "arg async_script=%s", args->async_script)
LOG_DEBUGF("cli.c", "arg script=%s", args->script)
LOG_DEBUGF("cli.c", "arg print=%d", args->print)
LOG_DEBUGF("cli.c", "arg es_mappings_path=%s", args->es_mappings_path)
LOG_DEBUGF("cli.c", "arg es_mappings=%s", args->es_mappings)
LOG_DEBUGF("cli.c", "arg es_settings_path=%s", args->es_settings_path)
LOG_DEBUGF("cli.c", "arg es_settings=%s", args->es_settings)
LOG_DEBUGF("cli.c", "arg batch_size=%d", args->batch_size)
LOG_DEBUGF("cli.c", "arg force_reset=%d", args->force_reset)
@@ -237,18 +350,57 @@ int web_args_validate(web_args_t *args, int argc, const char **argv) {
args->es_url = DEFAULT_ES_URL;
}
if (args->bind == NULL) {
args->bind = DEFAULT_BIND_ADDR;
if (args->listen_address == NULL) {
args->listen_address = DEFAULT_LISTEN_ADDRESS;
}
if (args->port == NULL) {
args->port = DEFAULT_PORT;
if (args->es_index == NULL) {
args->es_index = DEFAULT_ES_INDEX;
}
if (args->credentials != NULL) {
args->b64credentials = onion_base64_encode(args->credentials, (int) strlen(args->credentials));
//Remove trailing newline
*(args->b64credentials + strlen(args->b64credentials) - 1) = '\0';
char *ptr = strstr(args->credentials, ":");
if (ptr == NULL) {
fprintf(stderr, "Invalid --auth format, see usage\n");
return 1;
}
strncpy(args->auth_user, args->credentials, (ptr - args->credentials));
strcpy(args->auth_pass, ptr + 1);
if (strlen(args->auth_user) == 0) {
fprintf(stderr, "--auth username must be at least one character long");
return 1;
}
args->auth_enabled = TRUE;
} else {
args->auth_enabled = FALSE;
}
if (args->tag_credentials != NULL && args->credentials != NULL) {
fprintf(stderr, "--auth and --tag-auth are mutually exclusive");
return 1;
}
if (args->tag_credentials != NULL) {
char *ptr = strstr(args->tag_credentials, ":");
if (ptr == NULL) {
fprintf(stderr, "Invalid --tag-auth format, see usage\n");
return 1;
}
strncpy(args->auth_user, args->tag_credentials, (ptr - args->tag_credentials));
strcpy(args->auth_pass, ptr + 1);
if (strlen(args->auth_user) == 0) {
fprintf(stderr, "--tag-auth username must be at least one character long");
return 1;
}
args->tag_auth_enabled = TRUE;
} else {
args->tag_auth_enabled = FALSE;
}
args->index_count = argc - 1;
@@ -263,10 +415,12 @@ int web_args_validate(web_args_t *args, int argc, const char **argv) {
}
LOG_DEBUGF("cli.c", "arg es_url=%s", args->es_url)
LOG_DEBUGF("cli.c", "arg bind=%s", args->bind)
LOG_DEBUGF("cli.c", "arg port=%s", args->port)
LOG_DEBUGF("cli.c", "arg es_index=%s", args->es_index)
LOG_DEBUGF("cli.c", "arg listen=%s", args->listen_address)
LOG_DEBUGF("cli.c", "arg credentials=%s", args->credentials)
LOG_DEBUGF("cli.c", "arg b64credentials=%s", args->b64credentials)
LOG_DEBUGF("cli.c", "arg tag_credentials=%s", args->tag_credentials)
LOG_DEBUGF("cli.c", "arg auth_user=%s", args->auth_user)
LOG_DEBUGF("cli.c", "arg auth_pass=%s", args->auth_pass)
LOG_DEBUGF("cli.c", "arg index_count=%d", args->index_count)
for (int i = 0; i < args->index_count; i++) {
LOG_DEBUGF("cli.c", "arg indices[%d]=%s", i, args->indices[i])
@@ -285,3 +439,39 @@ web_args_t *web_args_create() {
return args;
}
int exec_args_validate(exec_args_t *args, int argc, const char **argv) {
if (argc < 2) {
fprintf(stderr, "Required positional argument: PATH.\n");
return 1;
}
char *index_path = abspath(argv[1]);
if (index_path == NULL) {
fprintf(stderr, "File not found: %s\n", argv[1]);
return 1;
} else {
args->index_path = argv[1];
free(index_path);
}
if (args->es_url == NULL) {
args->es_url = DEFAULT_ES_URL;
}
if (args->es_index == NULL) {
args->es_index = DEFAULT_ES_INDEX;
}
if (args->script_path == NULL) {
LOG_FATAL("cli.c", "--script-file argument is required");
}
if (load_external_file(args->script_path, &args->script) != 0) {
return 1;
}
LOG_DEBUGF("cli.c", "arg script_path=%s", args->script_path)
LOG_DEBUGF("cli.c", "arg script=%s", args->script)
return 0;
}

View File

@@ -3,6 +3,8 @@
#include "sist.h"
#include "libscan/arc/arc.h"
typedef struct scan_args {
float quality;
int size;
@@ -17,39 +19,76 @@ typedef struct scan_args {
char *archive;
archive_mode_t archive_mode;
char *tesseract_lang;
const char *tesseract_path;
char *exclude_regex;
int fast;
const char* treemap_threshold_str;
double treemap_threshold;
int max_memory_buffer;
} scan_args_t;
scan_args_t *scan_args_create();
void scan_args_destroy(scan_args_t *args);
int scan_args_validate(scan_args_t *args, int argc, const char **argv);
typedef struct index_args {
char *es_url;
char *es_index;
const char *index_path;
const char *script_path;
char *script;
const char *es_settings_path;
char *es_settings;
const char *es_mappings_path;
char *es_mappings;
int print;
int batch_size;
int async_script;
int force_reset;
int threads;
} index_args_t;
typedef struct web_args {
char *es_url;
char *bind;
char *port;
char *es_index;
char *listen_address;
char *credentials;
char *b64credentials;
char *tag_credentials;
char auth_user[256];
char auth_pass[256];
int auth_enabled;
int tag_auth_enabled;
int index_count;
const char **indices;
} web_args_t;
typedef struct exec_args {
char *es_url;
char *es_index;
const char *index_path;
const char *script_path;
int async_script;
char *script;
} exec_args_t;
index_args_t *index_args_create();
void index_args_destroy(index_args_t *args);
web_args_t *web_args_create();
void web_args_destroy(web_args_t *args);
int index_args_validate(index_args_t *args, int argc, const char **argv);
int web_args_validate(web_args_t *args, int argc, const char **argv);
exec_args_t *exec_args_create();
void exec_args_destroy(exec_args_t *args);
int exec_args_validate(exec_args_t *args, int argc, const char **argv);
#endif

6
src/ctx.c Normal file
View File

@@ -0,0 +1,6 @@
#include "ctx.h"
ScanCtx_t ScanCtx;
WebCtx_t WebCtx;
IndexCtx_t IndexCtx;
LogCtx_t LogCtx;

View File

@@ -2,8 +2,24 @@
#define SIST2_CTX_H
#include "sist.h"
#include "tpool.h"
#include "libscan/scan.h"
#include "libscan/arc/arc.h"
#include "libscan/comic/comic.h"
#include "libscan/ebook/ebook.h"
#include "libscan/font/font.h"
#include "libscan/media/media.h"
#include "libscan/ooxml/ooxml.h"
#include "libscan/text/text.h"
#include "libscan/mobi/scan_mobi.h"
#include "libscan/raw/raw.h"
#include "libscan/msdoc/msdoc.h"
#include "src/io/store.h"
struct {
#include <glib.h>
#include <pcre.h>
typedef struct {
struct index_t index;
GHashTable *mime_table;
@@ -11,14 +27,8 @@ struct {
tpool_t *pool;
int tn_size;
int threads;
int content_size;
float tn_qscale;
int depth;
archive_mode_t archive_mode;
int verbose;
int very_verbose;
size_t stat_tn_size;
size_t stat_index_size;
@@ -26,27 +36,54 @@ struct {
GHashTable *original_table;
GHashTable *copy_table;
pthread_mutex_t mupdf_mu;
char * tesseract_lang;
} ScanCtx;
pcre *exclude;
pcre_extra *exclude_extra;
int fast;
struct {
scan_arc_ctx_t arc_ctx;
scan_comic_ctx_t comic_ctx;
scan_ebook_ctx_t ebook_ctx;
scan_font_ctx_t font_ctx;
scan_media_ctx_t media_ctx;
scan_ooxml_ctx_t ooxml_ctx;
scan_text_ctx_t text_ctx;
scan_mobi_ctx_t mobi_ctx;
scan_raw_ctx_t raw_ctx;
scan_msdoc_ctx_t msdoc_ctx;
} ScanCtx_t;
typedef struct {
int verbose;
int very_verbose;
int no_color;
} LogCtx;
} LogCtx_t;
struct {
typedef struct {
char *es_url;
char *es_index;
int batch_size;
} IndexCtx;
tpool_t *pool;
store_t *tag_store;
GHashTable *tags;
store_t *meta_store;
GHashTable *meta;
} IndexCtx_t;
struct {
typedef struct {
char *es_url;
char *es_index;
int index_count;
char *b64credentials;
struct index_t indices[16];
} WebCtx;
char *auth_user;
char *auth_pass;
int auth_enabled;
int tag_auth_enabled;
struct index_t indices[64];
} WebCtx_t;
extern ScanCtx_t ScanCtx;
extern WebCtx_t WebCtx;
extern IndexCtx_t IndexCtx;
extern LogCtx_t LogCtx;
#endif

View File

@@ -1,11 +1,7 @@
#include "elastic.h"
#include "src/ctx.h"
#include <stdlib.h>
#include "web.h"
#include <stdio.h>
#include <string.h>
#include <cJSON/cJSON.h>
#include "static_generated.c"
@@ -13,19 +9,33 @@
typedef struct es_indexer {
int queued;
char *es_url;
char *es_index;
es_bulk_line_t *line_head;
es_bulk_line_t *line_tail;
} es_indexer_t;
static es_indexer_t *Indexer;
static __thread es_indexer_t *Indexer;
void print_json(cJSON *document, const char uuid_str[UUID_STR_LEN]) {
void delete_queue(int max);
void elastic_flush();
void elastic_cleanup() {
elastic_flush();
if (Indexer != NULL) {
free(Indexer->es_index);
free(Indexer->es_url);
free(Indexer);
}
}
void print_json(cJSON *document, const char id_str[MD5_STR_LENGTH]) {
cJSON *line = cJSON_CreateObject();
cJSON_AddStringToObject(line, "_id", uuid_str);
cJSON_AddStringToObject(line, "_index", "sist2");
cJSON_AddStringToObject(line, "_id", id_str);
cJSON_AddStringToObject(line, "_index", IndexCtx.es_index);
cJSON_AddStringToObject(line, "_type", "_doc");
cJSON_AddItemReferenceToObject(line, "_source", document);
@@ -37,23 +47,31 @@ void print_json(cJSON *document, const char uuid_str[UUID_STR_LEN]) {
cJSON_Delete(line);
}
void index_json(cJSON *document, const char uuid_str[UUID_STR_LEN]) {
void index_json_func(void *arg) {
es_bulk_line_t *line = arg;
elastic_index_line(line);
}
void index_json(cJSON *document, const char index_id_str[MD5_STR_LENGTH]) {
char *json = cJSON_PrintUnformatted(document);
size_t json_len = strlen(json);
es_bulk_line_t *bulk_line = malloc(sizeof(es_bulk_line_t) + json_len + 2);
memcpy(bulk_line->line, json, json_len);
memcpy(bulk_line->uuid_str, uuid_str, UUID_STR_LEN);
memcpy(bulk_line->path_md5_str, index_id_str, MD5_STR_LENGTH);
*(bulk_line->line + json_len) = '\n';
*(bulk_line->line + json_len + 1) = '\0';
bulk_line->next = NULL;
cJSON_free(json);
elastic_index_line(bulk_line);
tpool_add_work(IndexCtx.pool, index_json_func, bulk_line);
}
void execute_update_script(const char *script, const char index_id[UUID_STR_LEN]) {
void execute_update_script(const char *script, int async, const char index_id[MD5_STR_LENGTH]) {
if (Indexer == NULL) {
Indexer = create_indexer(IndexCtx.es_url, IndexCtx.es_index);
}
cJSON *body = cJSON_CreateObject();
cJSON *script_obj = cJSON_AddObjectToObject(body, "script");
@@ -64,12 +82,19 @@ void execute_update_script(const char *script, const char index_id[UUID_STR_LEN]
cJSON *term_obj = cJSON_AddObjectToObject(query, "term");
cJSON_AddStringToObject(term_obj, "index", index_id);
char * str = cJSON_Print(body);
char *str = cJSON_Print(body);
char bulk_url[4096];
snprintf(bulk_url, 4096, "%s/sist2/_update_by_query?pretty", Indexer->es_url);
response_t *r = web_post(bulk_url, str, "Content-Type: application/json");
if (async) {
snprintf(bulk_url, sizeof(bulk_url), "%s/%s/_update_by_query?wait_for_completion=false", Indexer->es_url,
Indexer->es_index);
} else {
snprintf(bulk_url, sizeof(bulk_url), "%s/%s/_update_by_query", Indexer->es_url, Indexer->es_index);
}
response_t *r = web_post(bulk_url, str);
if (!async) {
LOG_INFOF("elastic.c", "Executed user script <%d>", r->status_code);
}
cJSON *resp = cJSON_Parse(r->body);
cJSON_free(str);
@@ -84,31 +109,39 @@ void execute_update_script(const char *script, const char index_id[UUID_STR_LEN]
cJSON_free(error_str);
}
if (async) {
cJSON *task = cJSON_GetObjectItem(resp, "task");
LOG_INFOF("elastic.c", "User script queued: %s/_tasks/%s", Indexer->es_url, task->valuestring);
}
cJSON_Delete(resp);
}
void elastic_flush() {
if (Indexer == NULL) {
Indexer = create_indexer(IndexCtx.es_url);
}
void *create_bulk_buffer(int max, int *count, size_t *buf_len) {
es_bulk_line_t *line = Indexer->line_head;
int count = 0;
*count = 0;
size_t buf_size = 0;
size_t buf_cur = 0;
char *buf = malloc(1);
char *buf = malloc(8192);
size_t buf_capacity = 8192;
while (line != NULL && *count < max) {
char action_str[256];
snprintf(
action_str, sizeof(action_str),
"{\"index\":{\"_id\":\"%s\",\"_type\":\"_doc\",\"_index\":\"%s\"}}\n",
line->path_md5_str, Indexer->es_index
);
while (line != NULL) {
char action_str[512];
snprintf(action_str, 512,
"{\"index\":{\"_id\":\"%s\", \"_type\":\"_doc\", \"_index\":\"sist2\"}}\n", line->uuid_str);
size_t action_str_len = strlen(action_str);
size_t line_len = strlen(line->line);
buf = realloc(buf, buf_size + line_len + action_str_len);
while (buf_size + line_len + action_str_len > buf_capacity) {
buf_capacity *= 2;
buf = realloc(buf, buf_capacity);
}
buf_size += line_len + action_str_len;
memcpy(buf + buf_cur, action_str, action_str_len);
@@ -116,50 +149,147 @@ void elastic_flush() {
memcpy(buf + buf_cur, line->line, line_len);
buf_cur += line_len;
es_bulk_line_t *tmp = line;
line = line->next;
free(tmp);
count++;
}
buf = realloc(buf, buf_size + 1);
*(buf+buf_cur) = '\0';
Indexer->line_head = NULL;
Indexer->line_tail = NULL;
Indexer->queued = 0;
char bulk_url[4096];
snprintf(bulk_url, 4096, "%s/sist2/_bulk", Indexer->es_url);
response_t *r = web_post(bulk_url, buf, "Content-Type: application/x-ndjson");
if (r->status_code == 0) {
LOG_FATALF("elastic.c", "Could not connect to %s, make sure that elasticsearch is running!\n", IndexCtx.es_url)
(*count)++;
}
LOG_INFOF("elastic.c", "Indexed %d documents (%zukB) <%d>", count, buf_cur / 1024, r->status_code);
if (buf_size + 1 > buf_capacity) {
buf = realloc(buf, buf_capacity + 1);
}
cJSON *ret_json = cJSON_Parse(r->body);
*(buf + buf_cur) = '\0';
*buf_len = buf_cur;
return buf;
}
void print_errors(response_t *r) {
char *tmp = malloc(r->size + 1);
memcpy(tmp, r->body, r->size);
*(tmp + r->size) = '\0';
cJSON *ret_json = cJSON_Parse(tmp);
if (cJSON_GetObjectItem(ret_json, "errors")->valueint != 0) {
cJSON *err;
cJSON_ArrayForEach(err, cJSON_GetObjectItem(ret_json, "items")) {
if (cJSON_GetObjectItem(cJSON_GetObjectItem(err, "index"), "status")->valueint != 201) {
char* str = cJSON_Print(err);
char *str = cJSON_Print(err);
LOG_ERRORF("elastic.c", "%s\n", str);
cJSON_free(str);
}
}
}
cJSON_Delete(ret_json);
free(tmp);
}
void print_error(response_t *r) {
char *tmp = malloc(r->size + 1);
memcpy(tmp, r->body, r->size);
*(tmp + r->size) = '\0';
cJSON *ret_json = cJSON_Parse(tmp);
if (cJSON_GetObjectItem(ret_json, "error") != NULL) {
char *str = cJSON_Print(cJSON_GetObjectItem(ret_json, "error"));
LOG_ERRORF("elastic.c", "%s\n", str);
cJSON_free(str);
}
cJSON_Delete(ret_json);
free(tmp);
}
void _elastic_flush(int max) {
if (max == 0) {
LOG_WARNING("elastic.c", "calling _elastic_flush with 0 in queue")
return;
}
size_t buf_len;
int count;
void *buf = create_bulk_buffer(max, &count, &buf_len);
char bulk_url[4096];
snprintf(bulk_url, sizeof(bulk_url), "%s/%s/_bulk?pipeline=tie", Indexer->es_url, Indexer->es_index);
response_t *r = web_post(bulk_url, buf);
if (r->status_code == 0) {
LOG_FATALF("elastic.c", "Could not connect to %s, make sure that elasticsearch is running!\n", IndexCtx.es_url)
}
if (r->status_code == 413) {
if (max <= 1) {
LOG_ERRORF("elastic.c", "Single document too large, giving up: {%s}", Indexer->line_head->path_md5_str)
free_response(r);
free(buf);
delete_queue(1);
if (Indexer->queued != 0) {
elastic_flush();
}
return;
}
LOG_WARNINGF("elastic.c", "Payload too large, retrying (%d documents)", count);
free_response(r);
free(buf);
_elastic_flush(max / 2);
return;
} else if (r->status_code == 429) {
free_response(r);
free(buf);
LOG_WARNING("elastic.c", "Got 429 status, will retry after delay")
usleep(1000000 * 20);
_elastic_flush(max);
return;
} else if (r->status_code != 200) {
print_errors(r);
delete_queue(Indexer->queued);
} else {
print_errors(r);
LOG_INFOF("elastic.c", "Indexed %d documents (%zukB) <%d>", count, buf_len / 1024, r->status_code);
delete_queue(max);
if (Indexer->queued != 0) {
elastic_flush();
}
}
free_response(r);
free(buf);
}
void delete_queue(int max) {
for (int i = 0; i < max; i++) {
es_bulk_line_t *tmp = Indexer->line_head;
Indexer->line_head = tmp->next;
if (Indexer->line_head == NULL) {
Indexer->line_tail = NULL;
}
free(tmp);
Indexer->queued -= 1;
}
}
void elastic_flush() {
if (Indexer == NULL) {
Indexer = create_indexer(IndexCtx.es_url, IndexCtx.es_index);
}
_elastic_flush(Indexer->queued);
}
void elastic_index_line(es_bulk_line_t *line) {
if (Indexer == NULL) {
Indexer = create_indexer(IndexCtx.es_url);
Indexer = create_indexer(IndexCtx.es_url, IndexCtx.es_index);
}
if (Indexer->line_head == NULL) {
@@ -177,14 +307,18 @@ void elastic_index_line(es_bulk_line_t *line) {
}
}
es_indexer_t *create_indexer(const char *url) {
es_indexer_t *create_indexer(const char *url, const char *index) {
char *es_url = malloc(strlen(url) + 1);
strcpy(es_url, url);
char *es_index = malloc(strlen(index) + 1);
strcpy(es_index, index);
es_indexer_t *indexer = malloc(sizeof(es_indexer_t));
indexer->es_url = es_url;
indexer->es_index = es_index;
indexer->queued = 0;
indexer->line_head = NULL;
indexer->line_tail = NULL;
@@ -192,41 +326,42 @@ es_indexer_t *create_indexer(const char *url) {
return indexer;
}
void destroy_indexer(char * script, char index_id[UUID_STR_LEN]) {
void finish_indexer(char *script, int async_script, char *index_id) {
char url[4096];
snprintf(url, sizeof(url), "%s/sist2/_refresh", IndexCtx.es_url);
response_t *r = web_post(url, "", NULL);
snprintf(url, sizeof(url), "%s/%s/_refresh", IndexCtx.es_url, IndexCtx.es_index);
response_t *r = web_post(url, "");
LOG_INFOF("elastic.c", "Refresh index <%d>", r->status_code);
free_response(r);
if (script != NULL) {
execute_update_script(script, index_id);
}
execute_update_script(script, async_script, index_id);
free(script);
snprintf(url, sizeof(url), "%s/sist2/_refresh", IndexCtx.es_url);
r = web_post(url, "", NULL);
snprintf(url, sizeof(url), "%s/%s/_refresh", IndexCtx.es_url, IndexCtx.es_index);
r = web_post(url, "");
LOG_INFOF("elastic.c", "Refresh index <%d>", r->status_code);
free_response(r);
}
snprintf(url, sizeof(url), "%s/sist2/_forcemerge", IndexCtx.es_url);
r = web_post(url, "", NULL);
snprintf(url, sizeof(url), "%s/%s/_forcemerge", IndexCtx.es_url, IndexCtx.es_index);
r = web_post(url, "");
LOG_INFOF("elastic.c", "Merge index <%d>", r->status_code);
free_response(r);
if (Indexer != NULL) {
free(Indexer->es_url);
free(Indexer);
}
snprintf(url, sizeof(url), "%s/%s/_settings", IndexCtx.es_url, IndexCtx.es_index);
r = web_put(url, "{\"index\":{\"refresh_interval\":\"1s\"}}");
LOG_INFOF("elastic.c", "Set refresh interval <%d>", r->status_code);
free_response(r);
}
void elastic_init(int force_reset) {
void elastic_init(int force_reset, const char* user_mappings, const char* user_settings) {
// Check if index exists
char url[4096];
snprintf(url, 4096, "%s/sist2", IndexCtx.es_url);
response_t *r = web_get(url);
snprintf(url, sizeof(url), "%s/%s", IndexCtx.es_url, IndexCtx.es_index);
response_t *r = web_get(url, 30);
int index_exists = r->status_code == 200;
free_response(r);
@@ -235,42 +370,86 @@ void elastic_init(int force_reset) {
LOG_INFOF("elastic.c", "Delete index <%d>", r->status_code);
free_response(r);
snprintf(url, 4096, "%s/sist2", IndexCtx.es_url);
r = web_put(url, "", NULL);
snprintf(url, sizeof(url), "%s/%s", IndexCtx.es_url, IndexCtx.es_index);
r = web_put(url, "");
if (r->status_code != 200) {
print_error(r);
LOG_FATAL("elastic.c", "Could not create index")
}
LOG_INFOF("elastic.c", "Create index <%d>", r->status_code);
free_response(r);
snprintf(url, 4096, "%s/sist2/_close", IndexCtx.es_url);
r = web_post(url, "", NULL);
snprintf(url, sizeof(url), "%s/%s/_close", IndexCtx.es_url, IndexCtx.es_index);
r = web_post(url, "");
LOG_INFOF("elastic.c", "Close index <%d>", r->status_code);
free_response(r);
snprintf(url, 4096, "%s/sist2/_settings", IndexCtx.es_url);
r = web_put(url, settings_json, "Content-Type: application/json");
LOG_INFOF("elastic.c", "Update settings <%d>", r->status_code);
snprintf(url, sizeof(url), "%s/_ingest/pipeline/tie", IndexCtx.es_url);
r = web_put(url, pipeline_json);
LOG_INFOF("elastic.c", "Create pipeline <%d>", r->status_code);
free_response(r);
snprintf(url, 4096, "%s/sist2/_mappings/_doc?include_type_name=true", IndexCtx.es_url);
r = web_put(url, mappings_json, "Content-Type: application/json");
LOG_INFOF("elastic.c", "Update mappings <%d>", r->status_code);
snprintf(url, sizeof(url), "%s/%s/_settings", IndexCtx.es_url, IndexCtx.es_index);
r = web_put(url, user_settings ? user_settings : settings_json);
LOG_INFOF("elastic.c", "Update user_settings <%d>", r->status_code);
free_response(r);
snprintf(url, 4096, "%s/sist2/_open", IndexCtx.es_url);
r = web_post(url, "", NULL);
snprintf(url, sizeof(url), "%s/%s/_mappings/_doc?include_type_name=true", IndexCtx.es_url, IndexCtx.es_index);
r = web_put(url, user_mappings ? user_mappings : mappings_json);
LOG_INFOF("elastic.c", "Update user_mappings <%d>", r->status_code);
free_response(r);
snprintf(url, sizeof(url), "%s/%s/_open", IndexCtx.es_url, IndexCtx.es_index);
r = web_post(url, "");
LOG_INFOF("elastic.c", "Open index <%d>", r->status_code);
free_response(r);
}
}
cJSON *elastic_get_document(const char *uuid_str) {
cJSON *elastic_get_document(const char *id_str) {
char url[4096];
snprintf(url, 4096, "%s/sist2/_doc/%s", WebCtx.es_url, uuid_str);
snprintf(url, sizeof(url), "%s/%s/_doc/%s", WebCtx.es_url, WebCtx.es_index, id_str);
response_t *r = web_get(url);
response_t *r = web_get(url, 3);
cJSON *json = NULL;
if (r->status_code == 200) {
json = cJSON_Parse(r->body);
char *tmp = malloc(r->size + 1);
memcpy(tmp, r->body, r->size);
*(tmp + r->size) = '\0';
json = cJSON_Parse(tmp);
free(tmp);
}
free_response(r);
return json;
}
char *elastic_get_status() {
char url[4096];
snprintf(url, sizeof(url),
"%s/_cluster/state/metadata/%s?filter_path=metadata.indices.*.state", WebCtx.es_url, WebCtx.es_index);
response_t *r = web_get(url, 30);
cJSON *json = NULL;
char *status = malloc(128 * sizeof(char));
status[0] = '\0';
if (r->status_code == 200) {
char *tmp = malloc(r->size + 1);
memcpy(tmp, r->body, r->size);
*(tmp + r->size) = '\0';
json = cJSON_Parse(tmp);
free(tmp);
const cJSON *metadata = cJSON_GetObjectItem(json, "metadata");
if (metadata != NULL) {
const cJSON *indices = cJSON_GetObjectItem(metadata, "indices");
const cJSON *index = cJSON_GetObjectItem(indices, WebCtx.es_index);
const cJSON *state = cJSON_GetObjectItem(index, "state");
strcpy(status, state->valuestring);
}
}
free_response(r);
cJSON_Delete(json);
return status;
}

View File

@@ -5,7 +5,7 @@
typedef struct es_bulk_line {
struct es_bulk_line *next;
char uuid_str[UUID_STR_LEN];
char path_md5_str[MD5_STR_LENGTH];
char line[0];
} es_bulk_line_t;
@@ -16,18 +16,21 @@ typedef struct es_indexer es_indexer_t;
void elastic_index_line(es_bulk_line_t *line);
void elastic_flush();
void print_json(cJSON *document, const char index_id_str[MD5_STR_LENGTH]);
void print_json(cJSON *document, const char uuid_str[UUID_STR_LEN]);
void index_json(cJSON *document, const char index_id_str[MD5_STR_LENGTH]);
void index_json(cJSON *document, const char uuid_str[UUID_STR_LEN]);
es_indexer_t *create_indexer(const char *url, const char *index);
es_indexer_t *create_indexer(const char* es_url);
void elastic_cleanup();
void finish_indexer(char *script, int async_script, char *index_id);
void destroy_indexer(char *script, char index_id[UUID_STR_LEN]);
void elastic_init(int force_reset, const char* user_mappings, const char* user_settings);
void elastic_init(int force_reset);
cJSON *elastic_get_document(const char *id_str);
cJSON *elastic_get_document(const char *uuid_str);
char *elastic_get_status();
void execute_update_script(const char *script, int async, const char index_id[MD5_STR_LENGTH]);
#endif

File diff suppressed because one or more lines are too long

View File

@@ -1,4 +1,11 @@
#include "web.h"
#include "src/sist.h"
#include "src/ctx.h"
#include <mongoose.h>
#include <pthread.h>
#include <curl/curl.h>
size_t write_cb(char *ptr, size_t size, size_t nmemb, void *user_data) {
@@ -9,11 +16,91 @@ size_t write_cb(char *ptr, size_t size, size_t nmemb, void *user_data) {
}
void free_response(response_t *resp) {
if (resp->body != NULL) {
free(resp->body);
}
free(resp);
}
response_t *web_get(const char *url) {
void web_post_async_poll(subreq_ctx_t* req) {
fd_set fdread;
fd_set fdwrite;
fd_set fdexcep;
int maxfd = -1;
FD_ZERO(&fdread);
FD_ZERO(&fdwrite);
FD_ZERO(&fdexcep);
CURLMcode mc = curl_multi_fdset(req->multi, &fdread, &fdwrite, &fdexcep, &maxfd);
if(mc != CURLM_OK) {
req->done = TRUE;
return;
}
if (maxfd == -1) {
// no fds ready yet
return;
}
struct timeval timeout = {1, 0};
int rc = select(maxfd + 1, &fdread, &fdwrite, &fdexcep, &timeout);
switch(rc) {
case -1:
req->done = TRUE;
break;
case 0:
break;
default:
curl_multi_perform(req->multi, &req->running_handles);
break;
}
if (req->running_handles == 0) {
req->done = TRUE;
req->response->body = req->response_buf.buf;
req->response->size = req->response_buf.cur;
curl_easy_getinfo(req->handle, CURLINFO_RESPONSE_CODE, &req->response->status_code);
curl_multi_cleanup(req->multi);
curl_easy_cleanup(req->handle);
curl_slist_free_all(req->headers);
return;
}
}
subreq_ctx_t *web_post_async(const char *url, char *data) {
subreq_ctx_t *req = calloc(1, sizeof(subreq_ctx_t));
req->response = calloc(1, sizeof(response_t));
req->data = data;
req->response_buf = dyn_buffer_create();
req->handle = curl_easy_init();
CURL *curl = req->handle;
curl_easy_setopt(curl, CURLOPT_URL, url);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *) (&req->response_buf));
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb);
curl_easy_setopt(curl, CURLOPT_POST, 1);
curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
struct curl_slist *headers = NULL;
headers = curl_slist_append(headers, "Content-Type: application/json");
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
curl_easy_setopt(curl, CURLOPT_POSTFIELDS, data);
req->multi = curl_multi_init();
curl_multi_add_handle(req->multi, curl);
curl_multi_perform(req->multi, &req->running_handles);
LOG_DEBUGF("web.c", "async request POST %s", url)
return req;
}
response_t *web_get(const char *url, int timeout) {
response_t *resp = malloc(sizeof(response_t));
CURL *curl;
@@ -24,18 +111,24 @@ response_t *web_get(const char *url) {
curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *) (&buffer));
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb);
curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
curl_easy_setopt(curl, CURLOPT_TIMEOUT, timeout);
struct curl_slist *headers = NULL;
headers = curl_slist_append(headers, "Content-Type: application/json");
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
curl_easy_perform(curl);
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &resp->status_code);
curl_easy_cleanup(curl);
curl_slist_free_all(headers);
resp->body = buffer.buf;
resp->size = buffer.cur;
return resp;
}
response_t *web_post(const char *url, const char *data, const char *header) {
response_t *web_post(const char *url, const char *data) {
response_t *resp = malloc(sizeof(response_t));
@@ -50,10 +143,8 @@ response_t *web_post(const char *url, const char *data, const char *header) {
curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
struct curl_slist *headers = NULL;
if (header != NULL) {
headers = curl_slist_append(headers, header);
headers = curl_slist_append(headers, "Content-Type: application/json");
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
}
curl_easy_setopt(curl, CURLOPT_POSTFIELDS, data);
@@ -70,7 +161,7 @@ response_t *web_post(const char *url, const char *data, const char *header) {
}
response_t *web_put(const char *url, const char *data, const char *header) {
response_t *web_put(const char *url, const char *data) {
response_t *resp = malloc(sizeof(response_t));
@@ -86,11 +177,9 @@ response_t *web_put(const char *url, const char *data, const char *header) {
curl_easy_setopt(curl, CURLOPT_DNS_USE_GLOBAL_CACHE, 0);
curl_easy_setopt(curl, CURLOPT_IPRESOLVE, CURLOPT_DNS_LOCAL_IP4 );
if (header != NULL) {
struct curl_slist *headers = NULL;
headers = curl_slist_append(headers, header);
headers = curl_slist_append(headers, "Content-Type: application/json");
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
}
curl_easy_setopt(curl, CURLOPT_POSTFIELDS, data);
@@ -98,6 +187,7 @@ response_t *web_put(const char *url, const char *data, const char *header) {
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &resp->status_code);
curl_easy_cleanup(curl);
curl_slist_free_all(headers);
resp->body = buffer.buf;
resp->size = buffer.cur;
@@ -119,11 +209,15 @@ response_t *web_delete(const char *url) {
curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
curl_easy_setopt(curl, CURLOPT_POSTFIELDS, "");
struct curl_slist *headers = NULL;
headers = curl_slist_append(headers, "Content-Type: application/json");
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
curl_easy_perform(curl);
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &resp->status_code);
curl_easy_cleanup(curl);
curl_slist_free_all(headers);
resp->body = buffer.buf;
resp->size = buffer.cur;

View File

@@ -2,6 +2,8 @@
#define SIST2_WEB_H
#include "src/sist.h"
#include <mongoose.h>
#include <curl/curl.h>
typedef struct response {
char *body;
@@ -9,9 +11,27 @@ typedef struct response {
int status_code;
} response_t;
response_t *web_get(const char *url);
response_t *web_post(const char * url, const char * data, const char* header);
response_t *web_put(const char *url, const char *data, const char *header);
typedef struct {
response_t *resp;
int done;
} http_ev_data_t;
typedef struct {
char* data;
dyn_buffer_t response_buf;
struct curl_slist *headers;
CURL *handle;
CURLM *multi;
response_t *response;
int running_handles;
int done;
} subreq_ctx_t;
response_t *web_get(const char *url, int timeout);
response_t *web_post(const char * url, const char * data);
void web_post_async_poll(subreq_ctx_t* req);
subreq_ctx_t *web_post_async(const char *url, char *data);
response_t *web_put(const char *url, const char *data);
response_t *web_delete(const char *url);
void free_response(response_t *resp);

View File

@@ -1,16 +1,18 @@
#include "src/ctx.h"
#include "serialize.h"
#include "src/parsing/parse.h"
#include "src/parsing/mime.h"
static __thread int index_fd = -1;
typedef struct {
unsigned char uuid[16];
unsigned long ino;
unsigned char path_md5[MD5_DIGEST_LENGTH];
unsigned long size;
unsigned int mime;
int mtime;
short base;
short ext;
char has_parent;
} line_t;
void skip_meta(FILE *file) {
@@ -30,7 +32,7 @@ void skip_meta(FILE *file) {
void write_index_descriptor(char *path, index_descriptor_t *desc) {
cJSON *json = cJSON_CreateObject();
cJSON_AddStringToObject(json, "uuid", desc->uuid);
cJSON_AddStringToObject(json, "id", desc->id);
cJSON_AddStringToObject(json, "version", desc->version);
cJSON_AddStringToObject(json, "root", desc->root);
cJSON_AddStringToObject(json, "name", desc->name);
@@ -39,11 +41,14 @@ void write_index_descriptor(char *path, index_descriptor_t *desc) {
cJSON_AddNumberToObject(json, "timestamp", (double) desc->timestamp);
int fd = open(path, O_CREAT | O_WRONLY, S_IRUSR | S_IWUSR);
if (fd == -1) {
perror(path);
if (fd < 0) {
LOG_FATALF("serialize.c", "Could not open index descriptor: %s", strerror(errno));
}
char *str = cJSON_Print(json);
write(fd, str, strlen(str));
int ret = write(fd, str, strlen(str));
if (ret == -1) {
LOG_FATALF("serialize.c", "Could not write index descriptor: %s", strerror(errno));
}
free(str);
close(fd);
@@ -57,11 +62,14 @@ index_descriptor_t read_index_descriptor(char *path) {
int fd = open(path, O_RDONLY);
if (fd == -1) {
LOG_FATAL("serialize.c", "Invalid/corrupt index (Could not find descriptor)\n")
LOG_FATALF("serialize.c", "Invalid/corrupt index (Could not find descriptor): %s: %s\n", path, strerror(errno))
}
char *buf = malloc(info.st_size + 1);
read(fd, buf, info.st_size);
int ret = read(fd, buf, info.st_size);
if (ret == -1) {
LOG_FATALF("serialize.c", "Could not read index descriptor: %s", strerror(errno));
}
*(buf + info.st_size) = '\0';
close(fd);
@@ -74,7 +82,7 @@ index_descriptor_t read_index_descriptor(char *path) {
strcpy(descriptor.rewrite_url, cJSON_GetObjectItem(json, "rewrite_url")->valuestring);
descriptor.root_len = (short) strlen(descriptor.root);
strcpy(descriptor.version, cJSON_GetObjectItem(json, "version")->valuestring);
strcpy(descriptor.uuid, cJSON_GetObjectItem(json, "uuid")->valuestring);
strcpy(descriptor.id, cJSON_GetObjectItem(json, "id")->valuestring);
if (cJSON_GetObjectItem(json, "type") == NULL) {
strcpy(descriptor.type, INDEX_TYPE_BIN);
} else {
@@ -118,6 +126,32 @@ char *get_meta_key_text(enum metakey meta_key) {
return "font_name";
case MetaParent:
return "parent";
case MetaExifMake:
return "exif_make";
case MetaExifSoftware:
return "exif_software";
case MetaExifExposureTime:
return "exif_exposure_time";
case MetaExifFNumber:
return "exif_fnumber";
case MetaExifFocalLength:
return "exif_focal_length";
case MetaExifUserComment:
return "exif_user_comment";
case MetaExifIsoSpeedRatings:
return "exif_iso_speed_ratings";
case MetaExifModel:
return "exif_model";
case MetaExifDateTime:
return "exif_datetime";
case MetaAuthor:
return "author";
case MetaModifiedBy:
return "modified_by";
case MetaThumbnail:
return "thumbnail";
case MetaPages:
return "pages";
default:
return NULL;
}
@@ -140,8 +174,8 @@ void write_document(document_t *doc) {
dyn_buffer_t buf = dyn_buffer_create();
// Ignore root directory in the file path
doc->ext = doc->ext - ScanCtx.index.desc.root_len;
doc->base = doc->base - ScanCtx.index.desc.root_len;
doc->ext = (short) (doc->ext - ScanCtx.index.desc.root_len);
doc->base = (short) (doc->base - ScanCtx.index.desc.root_len);
doc->filepath += ScanCtx.index.desc.root_len;
dyn_buffer_write(&buf, doc, sizeof(line_t));
@@ -152,11 +186,11 @@ void write_document(document_t *doc) {
dyn_buffer_write_char(&buf, meta->key);
if (IS_META_INT(meta->key)) {
dyn_buffer_write_int(&buf, meta->intval);
dyn_buffer_write_int(&buf, meta->int_val);
} else if (IS_META_LONG(meta->key)) {
dyn_buffer_write_long(&buf, meta->longval);
dyn_buffer_write_long(&buf, meta->long_val);
} else {
dyn_buffer_write_str(&buf, meta->strval);
dyn_buffer_write_str(&buf, meta->str_val);
}
meta_line_t *tmp = meta;
@@ -167,7 +201,7 @@ void write_document(document_t *doc) {
int res = write(index_fd, buf.buf, buf.cur);
if (res == -1) {
perror("write");
LOG_FATALF("serialize.c", "Could not write document: %s", strerror(errno))
}
ScanCtx.stat_index_size += buf.cur;
dyn_buffer_destroy(&buf);
@@ -175,6 +209,8 @@ void write_document(document_t *doc) {
void thread_cleanup() {
close(index_fd);
cleanup_parse();
cleanup_font();
}
@@ -183,9 +219,9 @@ void read_index_bin(const char *path, const char *index_id, index_func func) {
dyn_buffer_t buf = dyn_buffer_create();
FILE *file = fopen(path, "rb");
while (1) {
while (TRUE) {
buf.cur = 0;
fread((void *) &line, 1, sizeof(line_t), file);
size_t _ = fread((void *) &line, 1, sizeof(line_t), file);
if (feof(file)) {
break;
}
@@ -193,14 +229,19 @@ void read_index_bin(const char *path, const char *index_id, index_func func) {
cJSON *document = cJSON_CreateObject();
cJSON_AddStringToObject(document, "index", index_id);
char uuid_str[UUID_STR_LEN];
uuid_unparse(line.uuid, uuid_str);
char path_md5_str[MD5_STR_LENGTH];
buf2hex(line.path_md5, sizeof(line.path_md5), path_md5_str);
const char *mime_text = mime_get_mime_text(line.mime);
if (mime_text == NULL) {
cJSON_AddNullToObject(document, "mime");
} else {
cJSON_AddStringToObject(document, "mime", mime_get_mime_text(line.mime));
}
cJSON_AddNumberToObject(document, "size", (double) line.size);
cJSON_AddNumberToObject(document, "mtime", line.mtime);
int c;
int c = 0;
while ((c = getc(file)) != 0) {
dyn_buffer_write_char(&buf, (char) c);
}
@@ -212,42 +253,42 @@ void read_index_bin(const char *path, const char *index_id, index_func func) {
} else {
*(buf.buf + line.ext) = '\0';
}
cJSON_AddStringToObject(document, "name", buf.buf + line.base);
char tmp[PATH_MAX * 3];
str_escape(tmp, buf.buf + line.base);
cJSON_AddStringToObject(document, "name", tmp);
if (line.base > 0) {
*(buf.buf + line.base - 1) = '\0';
cJSON_AddStringToObject(document, "path", buf.buf);
str_escape(tmp, buf.buf);
cJSON_AddStringToObject(document, "path", tmp);
} else {
cJSON_AddStringToObject(document, "path", "");
}
enum metakey key = getc(file);
size_t ret = 0;
while (key != '\n') {
switch (key) {
case MetaPages:
case MetaWidth:
case MetaHeight: {
int value;
fread(&value, sizeof(int), 1, file);
ret = fread(&value, sizeof(int), 1, file);
cJSON_AddNumberToObject(document, get_meta_key_text(key), value);
break;
}
case MetaMediaDuration:
case MetaMediaBitrate: {
long value;
fread(&value, sizeof(long), 1, file);
ret = fread(&value, sizeof(long), 1, file);
cJSON_AddNumberToObject(document, get_meta_key_text(key), (double) value);
break;
}
case MetaMediaAudioCodec:
case MetaMediaVideoCodec: {
int value;
fread(&value, sizeof(int), 1, file);
const AVCodecDescriptor *desc = avcodec_descriptor_get(value);
if (desc != NULL) {
cJSON_AddStringToObject(document, get_meta_key_text(key), desc->name);
}
break;
}
case MetaMediaVideoCodec:
case MetaContent:
case MetaArtist:
case MetaAlbum:
@@ -255,6 +296,18 @@ void read_index_bin(const char *path, const char *index_id, index_func func) {
case MetaGenre:
case MetaFontName:
case MetaParent:
case MetaExifMake:
case MetaExifSoftware:
case MetaExifExposureTime:
case MetaExifFNumber:
case MetaExifFocalLength:
case MetaExifUserComment:
case MetaExifIsoSpeedRatings:
case MetaExifDateTime:
case MetaExifModel:
case MetaAuthor:
case MetaModifiedBy:
case MetaThumbnail:
case MetaTitle: {
buf.cur = 0;
while ((c = getc(file)) != 0) {
@@ -273,8 +326,36 @@ void read_index_bin(const char *path, const char *index_id, index_func func) {
key = getc(file);
}
func(document, uuid_str);
cJSON *meta_obj = NULL;
if (IndexCtx.meta != NULL) {
const char *meta_string = g_hash_table_lookup(IndexCtx.meta, path_md5_str);
if (meta_string != NULL) {
meta_obj = cJSON_Parse(meta_string);
cJSON *child;
for (child = meta_obj->child; child != NULL; child = child->next) {
char meta_key[4096];
strcpy(meta_key, child->string);
cJSON_DeleteItemFromObject(document, meta_key);
cJSON_AddItemReferenceToObject(document, meta_key, child);
}
}
}
if (IndexCtx.tags != NULL) {
const char *tags_string = g_hash_table_lookup(IndexCtx.tags, path_md5_str);
if (tags_string != NULL) {
cJSON *tags_arr = cJSON_Parse(tags_string);
cJSON_DeleteItemFromObject(document, "tag");
cJSON_AddItemToObject(document, "tag", tags_arr);
}
}
func(document, path_md5_str);
cJSON_Delete(document);
if (meta_obj) {
cJSON_Delete(meta_obj);
}
}
dyn_buffer_destroy(&buf);
fclose(file);
@@ -298,11 +379,11 @@ const char *json_type_array_fields[] = {
void read_index_json(const char *path, UNUSED(const char *index_id), index_func func) {
FILE *file = fopen(path, "r");
while (1) {
while (TRUE) {
char *line = NULL;
size_t len;
size_t read = getline(&line, &len, file);
if (read == -1) {
if (read < 0) {
if (line) {
free(line);
}
@@ -318,7 +399,7 @@ void read_index_json(const char *path, UNUSED(const char *index_id), index_func
}
cJSON *document = cJSON_CreateObject();
const char *uuid_str = cJSON_GetObjectItem(input, "_id")->valuestring;
const char *id_str = cJSON_GetObjectItem(input, "_id")->valuestring;
for (int i = 0; i < (sizeof(json_type_copy_fields) / sizeof(json_type_copy_fields[0])); i++) {
cJSON *value = cJSON_GetObjectItem(input, json_type_copy_fields[i]);
@@ -346,7 +427,7 @@ void read_index_json(const char *path, UNUSED(const char *index_id), index_func
}
}
func(document, uuid_str);
func(document, id_str);
cJSON_Delete(document);
cJSON_Delete(input);
@@ -354,7 +435,7 @@ void read_index_json(const char *path, UNUSED(const char *index_id), index_func
fclose(file);
}
void read_index(const char *path, const char index_id[UUID_STR_LEN], const char *type, index_func func) {
void read_index(const char *path, const char index_id[MD5_STR_LENGTH], const char *type, index_func func) {
if (strcmp(type, INDEX_TYPE_BIN) == 0) {
read_index_bin(path, index_id, func);
@@ -367,13 +448,15 @@ void incremental_read(GHashTable *table, const char *filepath) {
FILE *file = fopen(filepath, "rb");
line_t line;
LOG_DEBUGF("serialize.c", "Incremental read %s", filepath)
while (1) {
fread((void *) &line, 1, sizeof(line_t), file);
if (feof(file)) {
size_t ret = fread((void *) &line, sizeof(line_t), 1, file);
if (ret != 1 || feof(file)) {
break;
}
incremental_put(table, line.ino, line.mtime);
incremental_put(table, line.path_md5, line.mtime);
while ((getc(file))) {}
skip_meta(file);
@@ -391,41 +474,55 @@ void incremental_copy(store_t *store, store_t *dst_store, const char *filepath,
FILE *dst_file = fopen(dst_filepath, "ab");
line_t line;
while (1) {
fread((void *) &line, 1, sizeof(line_t), file);
if (feof(file)) {
LOG_DEBUGF("serialize.c", "Incremental copy %s", filepath)
while (TRUE) {
size_t ret = fread((void *) &line, sizeof(line_t), 1, file);
if (ret != 1 || feof(file)) {
break;
}
if (incremental_get(copy_table, line.ino)) {
// Assume that files with parents still exist.
// One way to "fix" this would be to check if the parent is marked for copy but it would consistently
// delete files with grandparents, which is a side-effect worse than having orphaned files
if (line.has_parent || incremental_get(copy_table, line.path_md5)) {
fwrite(&line, sizeof(line), 1, dst_file);
size_t buf_len;
char *buf = store_read(store, (char *) line.uuid, 16, &buf_len);
store_write(dst_store, (char *) line.uuid, 16, buf, buf_len);
free(buf);
// Copy filepath
char filepath_buf[PATH_MAX];
char c;
char *ptr = filepath_buf;
while ((c = (char) getc(file))) {
fwrite(&c, sizeof(c), 1, dst_file);
*ptr++ = c;
}
*ptr = '\0';
fwrite(filepath_buf, (ptr - filepath_buf) + 1, 1, dst_file);
// Copy tn store contents
size_t buf_len;
char path_md5[MD5_DIGEST_LENGTH];
MD5((unsigned char *) filepath_buf, (ptr - filepath_buf), (unsigned char *) path_md5);
char *buf = store_read(store, path_md5, sizeof(path_md5), &buf_len);
if (buf_len != 0) {
store_write(dst_store, path_md5, sizeof(path_md5), buf, buf_len);
free(buf);
}
fwrite("\0", sizeof(c), 1, dst_file);
enum metakey key;
while (1) {
key = getc(file);
fwrite(&key, sizeof(char), 1, dst_file);
if (key == '\n') {
break;
}
fwrite(&key, sizeof(char), 1, dst_file);
if (IS_META_INT(key)) {
int val;
fread(&val, sizeof(val), 1, file);
ret = fread(&val, sizeof(val), 1, file);
fwrite(&val, sizeof(val), 1, dst_file);
} else if (IS_META_LONG(key)) {
long val;
fread(&val, sizeof(val), 1, file);
ret = fread(&val, sizeof(val), 1, file);
fwrite(&val, sizeof(val), 1, dst_file);
} else {
while ((c = (char) getc(file))) {
@@ -435,8 +532,10 @@ void incremental_copy(store_t *store, store_t *dst_store, const char *filepath,
}
}
} else {
while ((getc(file))) {}
skip_meta(file);
}
}
fclose(file);
fclose(dst_file);
}

View File

@@ -2,16 +2,19 @@
#define SIST2_SERIALIZE_H
#include "src/sist.h"
#include <sys/syscall.h>
#include "store.h"
typedef void(*index_func)(cJSON *, const char[UUID_STR_LEN]);
#include <sys/syscall.h>
#include <glib.h>
typedef void(*index_func)(cJSON *, const char[MD5_STR_LENGTH]);
void incremental_copy(store_t *store, store_t *dst_store, const char *filepath,
const char *dst_filepath, GHashTable *copy_table);
void write_document(document_t *doc);
void read_index(const char *path, const char[UUID_STR_LEN], const char *type, index_func);
void read_index(const char *path, const char[MD5_STR_LENGTH], const char *type, index_func);
void incremental_read(GHashTable *table, const char *filepath);

View File

@@ -1,9 +1,10 @@
#include "store.h"
#include "src/ctx.h"
store_t *store_create(char *path) {
store_t *store_create(char *path, size_t chunk_size) {
store_t *store = malloc(sizeof(struct store_t));
store->chunk_size = chunk_size;
pthread_rwlock_init(&store->lock, NULL);
mdb_env_create(&store->env);
@@ -15,11 +16,10 @@ store_t *store_create(char *path) {
);
if (open_ret != 0) {
fprintf(stderr, "Error while opening store: %s (%s)\n", mdb_strerror(open_ret), path);
exit(1);
LOG_FATALF("store.c", "Error while opening store: %s (%s)\n", mdb_strerror(open_ret), path)
}
store->size = (size_t) 1024 * 1024 * 5;
store->size = (size_t) store->chunk_size;
ScanCtx.stat_tn_size = 0;
mdb_env_set_mapsize(store->env, store->size);
@@ -40,12 +40,20 @@ void store_destroy(store_t *store) {
free(store);
}
void store_flush(store_t *store) {
mdb_env_sync(store->env, TRUE);
}
void store_write(store_t *store, char *key, size_t key_len, char *buf, size_t buf_len) {
if (LogCtx.very_verbose) {
char uuid_str[UUID_STR_LEN];
uuid_unparse((unsigned char *) key, uuid_str);
LOG_DEBUGF("store.c", "Store write {%s} %lu bytes", uuid_str, buf_len)
if (key_len == MD5_DIGEST_LENGTH) {
char path_md5_str[MD5_STR_LENGTH];
buf2hex((unsigned char *) key, MD5_DIGEST_LENGTH, path_md5_str);
LOG_DEBUGF("store.c", "Store write {%s} %lu bytes", path_md5_str, buf_len)
} else {
LOG_DEBUGF("store.c", "Store write {%s} %lu bytes", key, buf_len)
}
}
MDB_val mdb_key;
@@ -70,7 +78,7 @@ void store_write(store_t *store, char *key, size_t key_len, char *buf, size_t bu
// Cannot resize when there is a opened transaction.
// Resize take effect on the next commit.
pthread_rwlock_wrlock(&store->lock);
store->size += 1024 * 1024 * 50;
store->size += store->chunk_size;
mdb_env_set_mapsize(store->env, store->size);
mdb_txn_begin(store->env, NULL, 0, &txn);
put_ret = mdb_put(txn, store->dbi, &mdb_key, &mdb_value, 0);
@@ -82,7 +90,7 @@ void store_write(store_t *store, char *key, size_t key_len, char *buf, size_t bu
pthread_rwlock_unlock(&store->lock);
if (put_ret != 0) {
printf("%s\n", mdb_strerror(put_ret));
LOG_ERROR("store.c", mdb_strerror(put_ret))
}
}
@@ -111,3 +119,42 @@ char *store_read(store_t *store, char *key, size_t key_len, size_t *ret_vallen)
return buf;
}
GHashTable *store_read_all(store_t *store) {
int count = 0;
GHashTable *table = g_hash_table_new_full(g_str_hash, g_str_equal, free, free);
MDB_txn *txn = NULL;
mdb_txn_begin(store->env, NULL, MDB_RDONLY, &txn);
MDB_cursor *cur = NULL;
mdb_cursor_open(txn, store->dbi, &cur);
MDB_val key;
MDB_val value;
while (mdb_cursor_get(cur, &key, &value, MDB_NEXT) == 0) {
char *key_str = malloc(key.mv_size);
memcpy(key_str, key.mv_data, key.mv_size);
char *val_str = malloc(value.mv_size);
memcpy(val_str, value.mv_data, value.mv_size);
g_hash_table_insert(table, key_str, val_str);
count += 1;
}
const char *path;
mdb_env_get_path(store->env, &path);
LOG_DEBUGF("store.c", "Read %d entries from %s", count, path);
mdb_cursor_close(cur);
mdb_txn_abort(txn);
return table;
}
void store_copy(store_t *store, const char *destination) {
mkdir(destination, S_IWUSR | S_IRUSR | S_IXUSR);
mdb_env_copy(store->env, destination);
}

View File

@@ -4,21 +4,32 @@
#include <pthread.h>
#include <lmdb.h>
#include <glib.h>
#define STORE_SIZE_TN 1024 * 1024 * 5
#define STORE_SIZE_TAG 1024 * 16
#define STORE_SIZE_META STORE_SIZE_TAG
typedef struct store_t {
MDB_dbi dbi;
MDB_env *env;
size_t size;
size_t chunk_size;
pthread_rwlock_t lock;
} store_t;
#include "src/sist.h"
store_t *store_create(char *path);
store_t *store_create(char *path, size_t chunk_size);
void store_destroy(store_t *store);
void store_write(store_t *store, char *key, size_t key_len, char *buf, size_t buf_len);
void store_flush(store_t *store);
char *store_read(store_t *store, char *key, size_t key_len, size_t *ret_vallen);
GHashTable *store_read_all(store_t *store);
void store_copy(store_t *store, const char *destination);
#endif

View File

@@ -1,5 +1,8 @@
#include "walk.h"
#include "src/ctx.h"
#include "src/parsing/parse.h"
#include <ftw.h>
__always_inline
parse_job_t *create_fs_parse_job(const char *filepath, const struct stat *info, int base) {
@@ -15,12 +18,13 @@ parse_job_t *create_fs_parse_job(const char *filepath, const struct stat *info,
job->ext = len;
}
job->info = *info;
job->vfile.info = *info;
memset(job->parent, 0, 16);
memset(job->parent, 0, MD5_DIGEST_LENGTH);
job->vfile.filepath = job->filepath;
job->vfile.read = fs_read;
job->vfile.reset = fs_reset;
job->vfile.close = fs_close;
job->vfile.fd = -1;
job->vfile.is_fs_file = TRUE;
@@ -28,8 +32,18 @@ parse_job_t *create_fs_parse_job(const char *filepath, const struct stat *info,
return job;
}
int sub_strings[30];
#define EXCLUDED(str) (pcre_exec(ScanCtx.exclude, ScanCtx.exclude_extra, filepath, strlen(filepath), 0, 0, sub_strings, sizeof(sub_strings)) >= 0)
int handle_entry(const char *filepath, const struct stat *info, int typeflag, struct FTW *ftw) {
if (ftw->level <= ScanCtx.depth && typeflag == FTW_F && S_ISREG(info->st_mode)) {
if (typeflag == FTW_F && S_ISREG(info->st_mode) && ftw->level <= ScanCtx.depth) {
if (ScanCtx.exclude != NULL && EXCLUDED(filepath)) {
LOG_DEBUGF("walk.c", "Excluded: %s", filepath)
return 0;
}
parse_job_t *job = create_fs_parse_job(filepath, info, ftw->base);
tpool_add_work(ScanCtx.pool, parse, job);
}

View File

@@ -3,8 +3,6 @@
#define _XOPEN_SOURCE 500
#include "src/sist.h"
int walk_directory_tree(const char *);
#endif

View File

@@ -1,15 +1,17 @@
#include "log.h"
#include <pthread.h>
#include <stdarg.h>
const char *log_colors[] = {
"\033[34m", "\033[01;34m", "\033[0m",
"\033[01;33m", "\033[31m", "\033[01;31m"
"\033[34m", "\033[01;34m", "\033[01;33m", "\033[0m", "\033[31m", "\033[01;31m"
};
const char *log_levels[] = {
"DEBUG", "INFO", "WARNING", "ERROR", "FATAL"
};
void sist_logf(char *filepath, int level, char *format, ...) {
void vsist_logf(const char *filepath, int level, char *format, va_list ap) {
static int is_tty = -1;
if (is_tty == -1) {
@@ -31,23 +33,20 @@ void sist_logf(char *filepath, int level, char *format, ...) {
if (is_tty) {
log_len = snprintf(
log_str, sizeof(log_str),
"\033[%dm[%04X]%s [%s] [%s %s] ",
"\033[%dm[%04llX]%s [%s] [%s %s] ",
31 + ((unsigned int) (pid)) % 7, pid, log_colors[level],
datetime, log_levels[level], filepath
);
} else {
log_len = snprintf(
log_str, sizeof(log_str),
"[%04X] [%s] [%s %s] ",
"[%04llX] [%s] [%s %s] ",
pid, datetime, log_levels[level], filepath
);
}
va_list ap;
va_start(ap, format);
size_t maxsize = sizeof(log_str) - log_len;
log_len += vsnprintf(log_str + log_len, maxsize, format, ap);
va_end(ap);
if (is_tty) {
log_len += sprintf(log_str + log_len, "\033[0m\n");
@@ -56,10 +55,20 @@ void sist_logf(char *filepath, int level, char *format, ...) {
log_len += 1;
}
write(STDERR_FILENO, log_str, log_len);
int ret = write(STDERR_FILENO, log_str, log_len);
if (ret == -1) {
LOG_FATALF("serialize.c", "Could not write index descriptor: %s", strerror(errno))
}
}
void sist_log(char *filepath, int level, char *str) {
void sist_logf(const char *filepath, int level, char *format, ...) {
va_list ap;
va_start(ap, format);
vsist_logf(filepath, level, format, ap);
va_end(ap);
}
void sist_log(const char *filepath, int level, char *str) {
static int is_tty = -1;
if (is_tty == -1) {
@@ -81,7 +90,7 @@ void sist_log(char *filepath, int level, char *str) {
if (is_tty) {
log_len = snprintf(
log_str, sizeof(log_str),
"\033[%dm[%04X]%s [%s] [%s %s] %s \033[0m\n",
"\033[%dm[%04llX]%s [%s] [%s %s] %s \033[0m\n",
31 + ((unsigned int) (pid)) % 7, pid, log_colors[level],
datetime, log_levels[level], filepath,
str
@@ -89,11 +98,14 @@ void sist_log(char *filepath, int level, char *str) {
} else {
log_len = snprintf(
log_str, sizeof(log_str),
"[%04X] [%s] [%s %s] %s \n",
"[%04llX] [%s] [%s %s] %s \n",
pid, datetime, log_levels[level], filepath,
str
);
}
write(STDERR_FILENO, log_str, log_len);
int ret = write(STDERR_FILENO, log_str, log_len);
if (ret == -1) {
LOG_FATALF("serialize.c", "Could not write index descriptor: %s", strerror(errno));
}
}

View File

@@ -1,6 +1,7 @@
#ifndef SIST2_LOG_H
#define SIST2_LOG_H
#define LOG_MAX_LENGTH 8192
#define SIST_DEBUG 0
@@ -36,10 +37,11 @@
sist_log(filepath, SIST_FATAL, str);\
exit(-1);
#include "src/sist.h"
#include "sist.h"
void sist_logf(char *filepath, int level, char *format, ...);
void sist_logf(const char *filepath, int level, char *format, ...);
void vsist_logf(const char *filepath, int level, char *format, va_list ap);
void sist_log(char *filepath, int level, char *str);
void sist_log(const char *filepath, int level, char *str);
#endif

View File

@@ -1,32 +1,43 @@
#include "sist.h"
#include "ctx.h"
#include <third-party/argparse/argparse.h>
#include <locale.h>
#include "cli.h"
#include "io/serialize.h"
#include "io/store.h"
#include "tpool.h"
#include "io/walk.h"
#include "index/elastic.h"
#include "web/serve.h"
#include "parsing/mime.h"
#include "parsing/parse.h"
#include "stats.h"
#define DESCRIPTION "Lightning-fast file system indexer and search tool."
#define EPILOG "Made by simon987 <me@simon987.net>. Released under GPL-3.0"
static const char *const Version = "1.2.2";
static const char *const Version = "2.9.0";
static const char *const usage[] = {
"sist2 scan [OPTION]... PATH",
"sist2 index [OPTION]... INDEX",
"sist2 web [OPTION]... INDEX...",
"sist2 exec-script [OPTION]... INDEX",
NULL,
};
void global_init() {
curl_global_init(CURL_GLOBAL_NOTHING);
av_log_set_level(AV_LOG_QUIET);
opcInitLibrary();
}
void init_dir(const char *dirpath) {
char path[PATH_MAX];
snprintf(path, PATH_MAX, "%sdescriptor.json", dirpath);
uuid_t uuid;
uuid_generate(uuid);
uuid_unparse(uuid, ScanCtx.index.desc.uuid);
unsigned char index_md5[MD5_DIGEST_LENGTH];
MD5((unsigned char *) ScanCtx.index.desc.name, strlen(ScanCtx.index.desc.name), index_md5);
buf2hex(index_md5, MD5_DIGEST_LENGTH, ScanCtx.index.desc.id);
time(&ScanCtx.index.desc.timestamp);
strcpy(ScanCtx.index.desc.version, Version);
strcpy(ScanCtx.index.desc.type, INDEX_TYPE_BIN);
@@ -38,29 +49,151 @@ void scan_print_header() {
LOG_INFOF("main.c", "sist2 v%s", Version)
}
void sist2_scan(scan_args_t *args) {
void _store(char *key, size_t key_len, char *buf, size_t buf_len) {
store_write(ScanCtx.index.store, key, key_len, buf, buf_len);
}
void _log(const char *filepath, int level, char *str) {
if (level == LEVEL_FATAL) {
sist_log(filepath, level, str);
exit(-1);
}
if (LogCtx.verbose) {
if (level == LEVEL_DEBUG) {
if (LogCtx.very_verbose) {
sist_log(filepath, level, str);
}
} else {
sist_log(filepath, level, str);
}
}
}
void _logf(const char *filepath, int level, char *format, ...) {
va_list args;
va_start(args, format);
if (level == LEVEL_FATAL) {
vsist_logf(filepath, level, format, args);
exit(-1);
}
if (LogCtx.verbose) {
if (level == LEVEL_DEBUG) {
if (LogCtx.very_verbose) {
vsist_logf(filepath, level, format, args);
}
} else {
vsist_logf(filepath, level, format, args);
}
}
va_end(args);
}
void initialize_scan_context(scan_args_t *args) {
// Arc
ScanCtx.arc_ctx.mode = args->archive_mode;
ScanCtx.arc_ctx.log = _log;
ScanCtx.arc_ctx.logf = _logf;
ScanCtx.arc_ctx.parse = (parse_callback_t) parse;
// Comic
ScanCtx.comic_ctx.log = _log;
ScanCtx.comic_ctx.logf = _logf;
ScanCtx.comic_ctx.store = _store;
ScanCtx.comic_ctx.tn_size = args->size;
ScanCtx.comic_ctx.tn_qscale = args->quality;
ScanCtx.comic_ctx.cbr_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/x-cbr");
ScanCtx.comic_ctx.cbz_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/x-cbz");
// Ebook
pthread_mutex_init(&ScanCtx.ebook_ctx.mupdf_mutex, NULL);
ScanCtx.ebook_ctx.content_size = args->content_size;
ScanCtx.ebook_ctx.tn_size = args->size;
ScanCtx.ebook_ctx.tesseract_lang = args->tesseract_lang;
ScanCtx.ebook_ctx.tesseract_path = args->tesseract_path;
ScanCtx.ebook_ctx.log = _log;
ScanCtx.ebook_ctx.logf = _logf;
ScanCtx.ebook_ctx.store = _store;
// Font
ScanCtx.font_ctx.enable_tn = args->size > 0;
ScanCtx.font_ctx.log = _log;
ScanCtx.font_ctx.logf = _logf;
ScanCtx.font_ctx.store = _store;
// Media
ScanCtx.media_ctx.tn_qscale = args->quality;
ScanCtx.media_ctx.tn_size = args->size;
ScanCtx.media_ctx.log = _log;
ScanCtx.media_ctx.logf = _logf;
ScanCtx.media_ctx.store = _store;
ScanCtx.media_ctx.max_media_buffer = (long) args->max_memory_buffer * 1024 * 1024;
init_media();
// OOXML
ScanCtx.ooxml_ctx.content_size = args->content_size;
ScanCtx.ooxml_ctx.log = _log;
ScanCtx.ooxml_ctx.logf = _logf;
ScanCtx.ooxml_ctx.store = _store;
// MOBI
ScanCtx.mobi_ctx.content_size = args->content_size;
ScanCtx.mobi_ctx.log = _log;
ScanCtx.mobi_ctx.logf = _logf;
// TEXT
ScanCtx.text_ctx.content_size = args->content_size;
ScanCtx.text_ctx.log = _log;
ScanCtx.text_ctx.logf = _logf;
// MSDOC
ScanCtx.msdoc_ctx.tn_size = args->size;
ScanCtx.msdoc_ctx.content_size = args->content_size;
ScanCtx.msdoc_ctx.log = _log;
ScanCtx.msdoc_ctx.logf = _logf;
ScanCtx.msdoc_ctx.store = _store;
ScanCtx.msdoc_ctx.msdoc_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/msword");
ScanCtx.tn_qscale = args->quality;
ScanCtx.tn_size = args->size;
ScanCtx.content_size = args->content_size;
ScanCtx.threads = args->threads;
ScanCtx.depth = args->depth;
ScanCtx.archive_mode = args->archive_mode;
strncpy(ScanCtx.index.path, args->output, sizeof(ScanCtx.index.path));
strncpy(ScanCtx.index.desc.name, args->name, sizeof(ScanCtx.index.desc.name));
strncpy(ScanCtx.index.desc.root, args->path, sizeof(ScanCtx.index.desc.root));
strncpy(ScanCtx.index.desc.rewrite_url, args->rewrite_url, sizeof(ScanCtx.index.desc.rewrite_url));
ScanCtx.index.desc.root_len = (short) strlen(ScanCtx.index.desc.root);
ScanCtx.tesseract_lang = args->tesseract_lang;
ScanCtx.fast = args->fast;
init_dir(ScanCtx.index.path);
// Raw
ScanCtx.raw_ctx.tn_qscale = args->quality;
ScanCtx.raw_ctx.tn_size = args->size;
ScanCtx.raw_ctx.log = _log;
ScanCtx.raw_ctx.logf = _logf;
ScanCtx.raw_ctx.store = _store;
}
void sist2_scan(scan_args_t *args) {
ScanCtx.mime_table = mime_get_mime_table();
ScanCtx.ext_table = mime_get_ext_table();
initialize_scan_context(args);
init_dir(ScanCtx.index.path);
char store_path[PATH_MAX];
snprintf(store_path, PATH_MAX, "%sthumbs", ScanCtx.index.path);
mkdir(store_path, S_IWUSR | S_IRUSR | S_IXUSR);
ScanCtx.index.store = store_create(store_path);
ScanCtx.index.store = store_create(store_path, STORE_SIZE_TN);
snprintf(store_path, PATH_MAX, "%smeta", ScanCtx.index.path);
mkdir(store_path, S_IWUSR | S_IRUSR | S_IXUSR);
ScanCtx.index.meta_store = store_create(store_path, STORE_SIZE_META);
scan_print_header();
@@ -70,23 +203,32 @@ void sist2_scan(scan_args_t *args) {
DIR *dir = opendir(args->incremental);
if (dir == NULL) {
perror("opendir");
return;
LOG_FATALF("main.c", "Could not open original index for incremental scan: %s", strerror(errno))
}
char descriptor_path[PATH_MAX];
snprintf(descriptor_path, PATH_MAX, "%s/descriptor.json", args->incremental);
index_descriptor_t original_desc = read_index_descriptor(descriptor_path);
if (strcmp(original_desc.version, Version) != 0) {
LOG_FATALF("main.c", "Version mismatch! Index is %s but executable is %s/%s", original_desc.version,
Version, INDEX_VERSION_EXTERNAL)
}
struct dirent *de;
while ((de = readdir(dir)) != NULL) {
if (strncmp(de->d_name, "_index_", sizeof("_index_") - 1) == 0) {
char file_path[PATH_MAX];
snprintf(file_path, PATH_MAX, "%s/%s", args->incremental, de->d_name);
snprintf(file_path, PATH_MAX, "%s%s", args->incremental, de->d_name);
incremental_read(ScanCtx.original_table, file_path);
}
}
closedir(dir);
printf("Loaded %d items in to mtime table.", g_hash_table_size(ScanCtx.original_table));
LOG_INFOF("main.c", "Loaded %d items in to mtime table.", g_hash_table_size(ScanCtx.original_table))
}
ScanCtx.pool = tpool_create(args->threads, thread_cleanup);
ScanCtx.pool = tpool_create(args->threads, thread_cleanup, TRUE);
tpool_start(ScanCtx.pool);
walk_directory_tree(ScanCtx.index.desc.root);
tpool_wait(ScanCtx.pool);
@@ -96,7 +238,7 @@ void sist2_scan(scan_args_t *args) {
char dst_path[PATH_MAX];
snprintf(store_path, PATH_MAX, "%sthumbs", args->incremental);
snprintf(dst_path, PATH_MAX, "%s_index_original", ScanCtx.index.path);
store_t *source = store_create(store_path);
store_t *source = store_create(store_path, STORE_SIZE_TN);
DIR *dir = opendir(args->incremental);
if (dir == NULL) {
@@ -107,24 +249,34 @@ void sist2_scan(scan_args_t *args) {
while ((de = readdir(dir)) != NULL) {
if (strncmp(de->d_name, "_index_", sizeof("_index_") - 1) == 0) {
char file_path[PATH_MAX];
snprintf(file_path, PATH_MAX, "%s/%s", args->incremental, de->d_name);
snprintf(file_path, PATH_MAX, "%s%s", args->incremental, de->d_name);
incremental_copy(source, ScanCtx.index.store, file_path, dst_path, ScanCtx.copy_table);
}
}
closedir(dir);
store_destroy(source);
snprintf(store_path, PATH_MAX, "%stags", args->incremental);
snprintf(dst_path, PATH_MAX, "%stags", ScanCtx.index.path);
mkdir(store_path, S_IWUSR | S_IRUSR | S_IXUSR);
store_t *source_tags = store_create(store_path, STORE_SIZE_TAG);
store_copy(source_tags, dst_path);
store_destroy(source_tags);
}
generate_stats(&ScanCtx.index, args->treemap_threshold, ScanCtx.index.path);
store_destroy(ScanCtx.index.store);
}
void sist2_index(index_args_t *args) {
IndexCtx.es_url = args->es_url;
IndexCtx.es_index = args->es_index;
IndexCtx.batch_size = args->batch_size;
if (!args->print) {
elastic_init(args->force_reset);
elastic_init(args->force_reset, args->es_mappings, args->es_settings);
}
char descriptor_path[PATH_MAX];
@@ -135,17 +287,25 @@ void sist2_index(index_args_t *args) {
LOG_DEBUGF("main.c", "descriptor version %s (%s)", desc.version, desc.type)
if (strcmp(desc.version, Version) != 0 && strcmp(desc.version, INDEX_VERSION_EXTERNAL) != 0) {
fprintf(stderr, "Version mismatch! Index is %s but executable is %s/%s\n",
desc.version, Version, INDEX_VERSION_EXTERNAL);
return;
LOG_FATALF("main.c", "Version mismatch! Index is %s but executable is %s/%s", desc.version, Version,
INDEX_VERSION_EXTERNAL)
}
DIR *dir = opendir(args->index_path);
if (dir == NULL) {
perror("opendir");
return;
LOG_FATALF("main.c", "Could not open index %s: %s", args->index_path, strerror(errno))
}
char path_tmp[PATH_MAX];
snprintf(path_tmp, sizeof(path_tmp), "%s/tags", args->index_path);
mkdir(path_tmp, S_IWUSR | S_IRUSR | S_IXUSR);
IndexCtx.tag_store = store_create(path_tmp, STORE_SIZE_TAG);
IndexCtx.tags = store_read_all(IndexCtx.tag_store);
snprintf(path_tmp, sizeof(path_tmp), "%s/meta", args->index_path);
IndexCtx.meta_store = store_create(path_tmp, STORE_SIZE_META);
IndexCtx.meta = store_read_all(IndexCtx.meta_store);
index_func f;
if (args->print) {
f = print_json;
@@ -153,27 +313,64 @@ void sist2_index(index_args_t *args) {
f = index_json;
}
void (*cleanup)();
if (args->print) {
cleanup = NULL;
} else {
cleanup = elastic_cleanup;
}
IndexCtx.pool = tpool_create(args->threads, cleanup, FALSE);
tpool_start(IndexCtx.pool);
struct dirent *de;
while ((de = readdir(dir)) != NULL) {
if (strncmp(de->d_name, "_index_", sizeof("_index_") - 1) == 0) {
char file_path[PATH_MAX];
snprintf(file_path, PATH_MAX, "%s/%s", args->index_path, de->d_name);
read_index(file_path, desc.uuid, desc.type, f);
read_index(file_path, desc.id, desc.type, f);
}
}
closedir(dir);
tpool_wait(IndexCtx.pool);
tpool_destroy(IndexCtx.pool);
if (!args->print) {
elastic_flush();
destroy_indexer(args->script, desc.uuid);
finish_indexer(args->script, args->async_script, desc.id);
}
store_destroy(IndexCtx.tag_store);
g_hash_table_remove_all(IndexCtx.tags);
g_hash_table_destroy(IndexCtx.tags);
}
void sist2_exec_script(exec_args_t *args) {
LogCtx.verbose = TRUE;
char descriptor_path[PATH_MAX];
snprintf(descriptor_path, PATH_MAX, "%s/descriptor.json", args->index_path);
index_descriptor_t desc = read_index_descriptor(descriptor_path);
IndexCtx.es_url = args->es_url;
LOG_DEBUGF("main.c", "descriptor version %s (%s)", desc.version, desc.type)
execute_update_script(args->script, args->async_script, desc.id);
free(args->script);
}
void sist2_web(web_args_t *args) {
WebCtx.es_url = args->es_url;
WebCtx.es_index = args->es_index;
WebCtx.index_count = args->index_count;
WebCtx.b64credentials = args->b64credentials;
WebCtx.auth_user = args->auth_user;
WebCtx.auth_pass = args->auth_pass;
WebCtx.auth_enabled = args->auth_enabled;
WebCtx.tag_auth_enabled = args->tag_auth_enabled;
for (int i = 0; i < args->index_count; i++) {
char *abs_path = abspath(args->indices[i]);
@@ -183,7 +380,11 @@ void sist2_web(web_args_t *args) {
char path_tmp[PATH_MAX];
snprintf(path_tmp, PATH_MAX, "%sthumbs", abs_path);
WebCtx.indices[i].store = store_create(path_tmp);
WebCtx.indices[i].store = store_create(path_tmp, STORE_SIZE_TN);
snprintf(path_tmp, PATH_MAX, "%stags", abs_path);
mkdir(path_tmp, S_IWUSR | S_IRUSR | S_IXUSR);
WebCtx.indices[i].tag_store = store_create(path_tmp, STORE_SIZE_TAG);
snprintf(path_tmp, PATH_MAX, "%sdescriptor.json", abs_path);
WebCtx.indices[i].desc = read_index_descriptor(path_tmp);
@@ -193,21 +394,25 @@ void sist2_web(web_args_t *args) {
free(abs_path);
}
serve(args->bind, args->port);
serve(args->listen_address);
}
int main(int argc, const char *argv[]) {
global_init();
setlocale(LC_ALL, "");
scan_args_t *scan_args = scan_args_create();
index_args_t *index_args = index_args_create();
web_args_t *web_args = web_args_create();
exec_args_t *exec_args = exec_args_create();
int arg_version = 0;
char *common_es_url = NULL;
char *common_es_index = NULL;
char *common_script_path = NULL;
int common_async_script = 0;
int common_threads = 0;
struct argparse_option options[] = {
OPT_HELP(),
@@ -217,7 +422,7 @@ int main(int argc, const char *argv[]) {
OPT_BOOLEAN(0, "very-verbose", &LogCtx.very_verbose, "Turn on debug messages"),
OPT_GROUP("Scan options"),
OPT_INTEGER('t', "threads", &scan_args->threads, "Number of threads. DEFAULT=1"),
OPT_INTEGER('t', "threads", &common_threads, "Number of threads. DEFAULT=1"),
OPT_FLOAT('q', "quality", &scan_args->quality,
"Thumbnail quality, on a scale of 1.0 to 31.0, 1.0 being the best. DEFAULT=5"),
OPT_INTEGER(0, "size", &scan_args->size,
@@ -236,20 +441,39 @@ int main(int argc, const char *argv[]) {
"shallow: Don't parse archives inside archives. DEFAULT: recurse"),
OPT_STRING(0, "ocr", &scan_args->tesseract_lang, "Tesseract language (use tesseract --list-langs to see "
"which are installed on your machine)"),
OPT_STRING('e', "exclude", &scan_args->exclude_regex, "Files that match this regex will not be scanned"),
OPT_BOOLEAN(0, "fast", &scan_args->fast, "Only index file names & mime type"),
OPT_STRING(0, "treemap-threshold", &scan_args->treemap_threshold_str, "Relative size threshold for treemap "
"(see USAGE.md). DEFAULT: 0.0005"),
OPT_INTEGER(0, "mem-buffer", &scan_args->max_memory_buffer,
"Maximum memory buffer size per thread in MB for files inside archives "
"(see USAGE.md). DEFAULT: 2000"),
OPT_GROUP("Index options"),
OPT_INTEGER('t', "threads", &common_threads, "Number of threads. DEFAULT=1"),
OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url with port. DEFAULT=http://localhost:9200"),
OPT_STRING(0, "es-index", &common_es_index, "Elasticsearch index name. DEFAULT=sist2"),
OPT_BOOLEAN('p', "print", &index_args->print, "Just print JSON documents to stdout."),
OPT_STRING(0, "script-file", &index_args->script_path, "Path to user script."),
OPT_STRING(0, "script-file", &common_script_path, "Path to user script."),
OPT_STRING(0, "mappings-file", &index_args->es_mappings_path, "Path to Elasticsearch mappings."),
OPT_STRING(0, "settings-file", &index_args->es_settings_path, "Path to Elasticsearch settings."),
OPT_BOOLEAN(0, "async-script", &common_async_script, "Execute user script asynchronously."),
OPT_INTEGER(0, "batch-size", &index_args->batch_size, "Index batch size. DEFAULT: 100"),
OPT_BOOLEAN('f', "force-reset", &index_args->force_reset, "Reset Elasticsearch mappings and settings. "
"(You must use this option the first time you use the index command)"),
OPT_GROUP("Web options"),
OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url. DEFAULT=http://localhost:9200"),
OPT_STRING(0, "bind", &web_args->bind, "Listen on this address. DEFAULT=localhost"),
OPT_STRING(0, "port", &web_args->port, "Listen on this port. DEFAULT=4090"),
OPT_STRING(0, "es-index", &common_es_index, "Elasticsearch index name. DEFAULT=sist2"),
OPT_STRING(0, "bind", &web_args->listen_address, "Listen on this address. DEFAULT=localhost:4090"),
OPT_STRING(0, "auth", &web_args->credentials, "Basic auth in user:password format"),
OPT_STRING(0, "tag-auth", &web_args->tag_credentials, "Basic auth in user:password format for tagging"),
OPT_GROUP("Exec-script options"),
OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url. DEFAULT=http://localhost:9200"),
OPT_STRING(0, "es-index", &common_es_index, "Elasticsearch index name. DEFAULT=sist2"),
OPT_STRING(0, "script-file", &common_script_path, "Path to user script."),
OPT_BOOLEAN(0, "async-script", &common_async_script, "Execute user script asynchronously."),
OPT_END(),
};
@@ -261,7 +485,7 @@ int main(int argc, const char *argv[]) {
if (arg_version) {
printf(Version);
exit(0);
goto end;
}
if (LogCtx.very_verbose != 0) {
@@ -270,25 +494,35 @@ int main(int argc, const char *argv[]) {
web_args->es_url = common_es_url;
index_args->es_url = common_es_url;
exec_args->es_url = common_es_url;
web_args->es_index = common_es_index;
index_args->es_index = common_es_index;
exec_args->es_index = common_es_index;
index_args->script_path = common_script_path;
exec_args->script_path = common_script_path;
index_args->threads = common_threads;
scan_args->threads = common_threads;
exec_args->async_script = common_async_script;
index_args->async_script = common_async_script;
if (argc == 0) {
argparse_usage(&argparse);
return 1;
goto end;
} else if (strcmp(argv[0], "scan") == 0) {
int err = scan_args_validate(scan_args, argc, argv);
if (err != 0) {
return err;
goto end;
}
sist2_scan(scan_args);
}
else if (strcmp(argv[0], "index") == 0) {
} else if (strcmp(argv[0], "index") == 0) {
int err = index_args_validate(index_args, argc, argv);
if (err != 0) {
return err;
goto end;
}
sist2_index(index_args);
@@ -296,22 +530,30 @@ int main(int argc, const char *argv[]) {
int err = web_args_validate(web_args, argc, argv);
if (err != 0) {
return err;
goto end;
}
sist2_web(web_args);
} else if (strcmp(argv[0], "exec-script") == 0) {
int err = exec_args_validate(exec_args, argc, argv);
if (err != 0) {
goto end;
}
else {
sist2_exec_script(exec_args);
} else {
fprintf(stderr, "Invalid command: '%s'\n", argv[0]);
argparse_usage(&argparse);
return 1;
goto end;
}
printf("\n");
end:
scan_args_destroy(scan_args);
index_args_destroy(index_args);
web_args_destroy(web_args);
exec_args_destroy(exec_args);
return 0;
}

View File

@@ -1,157 +0,0 @@
#include "arc.h"
#include "src/ctx.h"
#define ARC_BUF_SIZE 8192
int should_parse_filtered_file(const char *filepath, int ext) {
char tmp[PATH_MAX * 2];
if (ext == 0) {
return FALSE;
}
memcpy(tmp, filepath, ext - 1);
*(tmp + ext - 1) = '\0';
char *idx = strrchr(tmp, '.');
if (idx == NULL) {
return FALSE;
}
if (strcmp(idx, ".tar") == 0) {
return TRUE;
}
return FALSE;
}
int arc_read(struct vfile *f, void *buf, size_t size) {
return archive_read_data(f->arc, buf, size);
}
typedef struct arc_data {
vfile_t *f;
char buf[ARC_BUF_SIZE];
} arc_data_f;
int vfile_open_callback(struct archive *a, void *user_data) {
arc_data_f *data = user_data;
if (data->f->is_fs_file && data->f->fd == -1) {
data->f->fd = open(data->f->filepath, O_RDONLY);
}
return ARCHIVE_OK;
}
long vfile_read_callback(struct archive *a, void *user_data, const void **buf) {
arc_data_f *data = user_data;
*buf = data->buf;
return data->f->read(data->f, data->buf, ARC_BUF_SIZE);
}
int vfile_close_callback(struct archive *a, void *user_data) {
arc_data_f *data = user_data;
if (data->f->close != NULL) {
data->f->close(data->f);
}
return ARCHIVE_OK;
}
void parse_archive(vfile_t *f, document_t *doc) {
struct archive *a;
struct archive_entry *entry;
arc_data_f data;
data.f = f;
int ret = 0;
if (data.f->is_fs_file) {
a = archive_read_new();
archive_read_support_filter_all(a);
archive_read_support_format_all(a);
ret = archive_read_open_filename(a, doc->filepath, ARC_BUF_SIZE);
} else if (ScanCtx.archive_mode == ARC_MODE_RECURSE) {
a = archive_read_new();
archive_read_support_filter_all(a);
archive_read_support_format_all(a);
ret = archive_read_open(
a, &data,
vfile_open_callback,
vfile_read_callback,
vfile_close_callback
);
} else {
return;
}
if (ret != ARCHIVE_OK) {
LOG_ERRORF(doc->filepath, "(arc.c) [%d] %s", ret, archive_error_string(a))
archive_read_free(a);
return;
}
if (ScanCtx.archive_mode == ARC_MODE_LIST) {
dyn_buffer_t buf = dyn_buffer_create();
while (archive_read_next_header(a, &entry) == ARCHIVE_OK) {
if (S_ISREG(archive_entry_stat(entry)->st_mode)) {
char *path = (char *) archive_entry_pathname(entry);
dyn_buffer_append_string(&buf, path);
dyn_buffer_write_char(&buf, '\n');
}
}
dyn_buffer_write_char(&buf, '\0');
meta_line_t *meta_list = malloc(sizeof(meta_line_t) + buf.cur);
meta_list->key = MetaContent;
strcpy(meta_list->strval, buf.buf);
APPEND_META(doc, meta_list);
dyn_buffer_destroy(&buf);
} else {
parse_job_t *sub_job = malloc(sizeof(parse_job_t) + PATH_MAX * 2);
sub_job->vfile.close = NULL;
sub_job->vfile.read = arc_read;
sub_job->vfile.arc = a;
sub_job->vfile.filepath = sub_job->filepath;
sub_job->vfile.is_fs_file = FALSE;
memcpy(sub_job->parent, doc->uuid, sizeof(uuid_t));
while (archive_read_next_header(a, &entry) == ARCHIVE_OK) {
sub_job->info = *archive_entry_stat(entry);
if (S_ISREG(sub_job->info.st_mode)) {
sprintf(sub_job->filepath, "%s#/%s", f->filepath, archive_entry_pathname(entry));
sub_job->base = (int) (strrchr(sub_job->filepath, '/') - sub_job->filepath) + 1;
char *p = strrchr(sub_job->filepath, '.');
if (p != NULL) {
sub_job->ext = (int) (p - sub_job->filepath + 1);
} else {
sub_job->ext = (int) strlen(sub_job->filepath);
}
parse(sub_job);
}
}
free(sub_job);
}
archive_read_free(a);
}

View File

@@ -1,12 +0,0 @@
#ifndef SIST2_ARC_H
#define SIST2_ARC_H
#include "src/sist.h"
int should_parse_filtered_file(const char *filepath, int ext);
void parse_archive(vfile_t *f, document_t *doc);
int arc_read(struct vfile * f, void *buf, size_t size);
#endif

View File

@@ -1,107 +0,0 @@
#include "doc.h"
#include "src/ctx.h"
void dump_text(mceTextReader_t *reader, dyn_buffer_t *buf) {
mce_skip_attributes(reader);
mce_start_children(reader) {
mce_start_element(reader, NULL, _X("t")) {
mce_skip_attributes(reader);
mce_start_children(reader) {
mce_start_text(reader) {
char *str = (char *) xmlTextReaderConstValue(reader->reader);
dyn_buffer_append_string(buf, str);
dyn_buffer_write_char(buf, ' ');
} mce_end_text(reader);
} mce_end_children(reader);
} mce_end_element(reader);
mce_start_element(reader, NULL, NULL) {
dump_text(reader, buf);
} mce_end_element(reader);
} mce_end_children(reader)
}
__always_inline
int should_read_part(opcPart part) {
char *part_name = (char *) part;
if (part == NULL) {
return FALSE;
}
if ( // Word
strcmp(part_name, "word/document.xml") == 0
|| strncmp(part_name, "word/footer", sizeof("word/footer") - 1) == 0
|| strncmp(part_name, "word/header", sizeof("word/header") - 1) == 0
// PowerPoint
|| strncmp(part_name, "ppt/slides/slide", sizeof("ppt/slides/slide") - 1) == 0
|| strncmp(part_name, "ppt/notesSlides/notesSlide", sizeof("ppt/notesSlides/notesSlide") - 1) == 0
// Excel
|| strncmp(part_name, "xl/worksheets/sheet", sizeof("xl/worksheets/sheet") - 1) == 0
|| strcmp(part_name, "xl/sharedStrings.xml") == 0
|| strcmp(part_name, "xl/workbook.xml") == 0
) {
return TRUE;
}
return FALSE;
}
__always_inline
void read_part(opcContainer *c, dyn_buffer_t *buf, opcPart part, document_t *doc) {
mceTextReader_t reader;
int ret = opcXmlReaderOpen(c, &reader, part, NULL, "UTF-8", 0);
if (ret != OPC_ERROR_NONE) {
LOG_ERRORF(doc->filepath, "(doc.c) opcXmlReaderOpen() returned error code %d", ret);
return;
}
mce_start_document(&reader) {
mce_start_element(&reader, NULL, NULL) {
dump_text(&reader, buf);
} mce_end_element(&reader);
} mce_end_document(&reader);
mceTextReaderCleanup(&reader);
}
void parse_doc(void *mem, size_t mem_len, document_t *doc) {
if (mem == NULL) {
return;
}
opcContainer *c = opcContainerOpenMem(mem, mem_len, OPC_OPEN_READ_ONLY, NULL);
if (c == NULL) {
LOG_ERROR(doc->filepath, "(doc.c) Couldn't open document with opcContainerOpenMem()");
return;
}
dyn_buffer_t buf = dyn_buffer_create();
opcPart part = opcPartGetFirst(c);
do {
if (should_read_part(part)) {
read_part(c, &buf, part, doc);
}
} while ((part = opcPartGetNext(c, part)));
opcContainerClose(c, OPC_CLOSE_NOW);
if (buf.cur > 0) {
dyn_buffer_write_char(&buf, '\0');
meta_line_t *meta = malloc(sizeof(meta_line_t) + buf.cur);
meta->key = MetaContent;
strcpy(meta->strval, buf.buf);
APPEND_META(doc, meta)
}
dyn_buffer_destroy(&buf);
}

View File

@@ -1,8 +0,0 @@
#ifndef SIST2_DOC_H
#define SIST2_DOC_H
#include "src/sist.h"
void parse_doc(void *buf, size_t buf_len, document_t *doc);
#endif

View File

@@ -1,226 +0,0 @@
#include "font.h"
#include "src/ctx.h"
__thread FT_Library ft_lib = NULL;
typedef struct text_dimensions {
unsigned int width;
unsigned int height;
unsigned int baseline;
} text_dimensions_t;
typedef struct glyph {
int top;
int height;
int width;
int descent;
int ascent;
int advance_width;
unsigned char *pixmap;
} glyph_t;
__always_inline
int kerning_offset(char c, char pc, FT_Face face) {
FT_Vector kerning;
FT_Get_Kerning(face, c, pc, FT_KERNING_DEFAULT, &kerning);
return (int) (kerning.x / 64);
}
__always_inline
glyph_t ft_glyph_to_glyph(FT_GlyphSlot slot) {
glyph_t glyph;
glyph.pixmap = slot->bitmap.buffer;
glyph.width = (int) slot->bitmap.width;
glyph.height = (int) slot->bitmap.rows;
glyph.top = slot->bitmap_top;
glyph.advance_width = (int) slot->advance.x / 64;
glyph.descent = MAX(0, glyph.height - glyph.top);
glyph.ascent = MAX(0, MAX(glyph.top, glyph.height) - glyph.descent);
return glyph;
}
text_dimensions_t text_dimension(char *text, FT_Face face) {
text_dimensions_t dimensions;
dimensions.width = 0;
int num_chars = (int) strlen(text);
unsigned int max_ascent = 0;
int max_descent = 0;
char pc = 0;
for (int i = 0; i < num_chars; i++) {
char c = text[i];
FT_Load_Char(face, c, 0);
glyph_t glyph = ft_glyph_to_glyph(face->glyph);
max_descent = MAX(max_descent, glyph.descent);
max_ascent = MAX(max_ascent, MAX(glyph.height, glyph.ascent));
int kerning_x = kerning_offset(c, pc, face);
dimensions.width += MAX(glyph.advance_width, glyph.width) + kerning_x;
pc = c;
}
dimensions.height = max_ascent + max_descent;
dimensions.baseline = max_descent;
return dimensions;
}
void draw_glyph(glyph_t *glyph, int x, int y, struct text_dimensions text_info, unsigned char *bitmap) {
unsigned int src = 0;
unsigned int dst = y * text_info.width + x;
unsigned int row_offset = text_info.width - glyph->width;
unsigned int buf_len = text_info.width * text_info.height;
for (unsigned int sy = 0; sy < glyph->height; sy++) {
for (unsigned int sx = 0; sx < glyph->width; sx++) {
if (dst < buf_len) {
bitmap[dst] |= glyph->pixmap[src];
}
src++;
dst++;
}
dst += row_offset;
}
}
void bmp_format(dyn_buffer_t *buf, text_dimensions_t dimensions, const unsigned char *bitmap) {
dyn_buffer_write_short(buf, 0x4D42); // Magic
dyn_buffer_write_int(buf, 0); // Size placeholder
dyn_buffer_write_int(buf, 0x5157); //Reserved
dyn_buffer_write_int(buf, 14 + 40 + 256 * 4); // pixels offset
dyn_buffer_write_int(buf, 40); // DIB size
dyn_buffer_write_int(buf, (int) dimensions.width);
dyn_buffer_write_int(buf, (int) dimensions.height);
dyn_buffer_write_short(buf, 1); // Color planes
dyn_buffer_write_short(buf, 8); // bits per pixel
dyn_buffer_write_int(buf, 0); // compression
dyn_buffer_write_int(buf, 0); // Ignored
dyn_buffer_write_int(buf, 3800); // hres
dyn_buffer_write_int(buf, 3800); // vres
dyn_buffer_write_int(buf, 256); // Color count
dyn_buffer_write_int(buf, 0); // Ignored
// RGBA32 Color table (Grayscale)
for (int i = 255; i >= 0; i--) {
dyn_buffer_write_int(buf, i + (i << 8) + (i << 16));
}
// Pixel array: write from bottom to top, with rows padded to multiples of 4-bytes
for (int y = (int) dimensions.height - 1; y >= 0; y--) {
for (unsigned int x = 0; x < dimensions.width; x++) {
dyn_buffer_write_char(buf, (char) bitmap[y * dimensions.width + x]);
}
while (buf->cur % 4 != 0) {
dyn_buffer_write_char(buf, 0);
}
}
// Size
*(int *) ((char *) buf->buf + 2) = buf->cur;
}
void parse_font(const char *buf, size_t buf_len, document_t *doc) {
if (ft_lib == NULL) {
FT_Init_FreeType(&ft_lib);
}
if (buf == NULL) {
return;
}
FT_Face face;
FT_Error err = FT_New_Memory_Face(ft_lib, (unsigned char *) buf, buf_len, 0, &face);
if (err != 0) {
LOG_ERRORF(doc->filepath, "(font.c) FT_New_Memory_Face() returned error code [%d] %s", err, ft_error_string(err));
return;
}
char font_name[1024];
if (face->style_name == NULL || *(face->style_name) == '?') {
if (face->family_name == NULL) {
strcpy(font_name, "(null)");
} else {
strcpy(font_name, face->family_name);
}
} else {
snprintf(font_name, sizeof(font_name), "%s %s", face->family_name, face->style_name);
}
meta_line_t *meta_name = malloc(sizeof(meta_line_t) + strlen(font_name));
meta_name->key = MetaFontName;
strcpy(meta_name->strval, font_name);
APPEND_META(doc, meta_name)
if (ScanCtx.tn_size <= 0) {
return;
}
int pixel = 64;
int num_chars = (int) strlen(font_name);
err = FT_Set_Pixel_Sizes(face, 0, pixel);
if (err != 0) {
LOG_WARNINGF(doc->filepath, "(font.c) FT_Set_Pixel_Sizes() returned error code [%d] %s", err, ft_error_string(err))
return;
}
text_dimensions_t dimensions = text_dimension(font_name, face);
unsigned char *bitmap = calloc(dimensions.width * dimensions.height, 1);
FT_Vector pen;
pen.x = 0;
char pc = 0;
for (int i = 0; i < num_chars; i++) {
char c = font_name[i];
err = FT_Load_Char(face, c, FT_LOAD_NO_HINTING | FT_LOAD_RENDER);
if (err != 0) {
c = c >= 'a' && c <= 'z' ? c - 32 : c + 32;
err = FT_Load_Char(face, c, FT_LOAD_NO_HINTING | FT_LOAD_RENDER);
if (err != 0) {
LOG_WARNINGF(doc->filepath, "(font.c) FT_Load_Char() returned error code [%d] %s", err, ft_error_string(err));
continue;
}
}
glyph_t glyph = ft_glyph_to_glyph(face->glyph);
pen.x += kerning_offset(c, pc, face);
if (pen.x <= 0) {
pen.x = ABS(glyph.advance_width - glyph.width);
}
pen.y = dimensions.height - glyph.ascent - dimensions.baseline;
draw_glyph(&glyph, pen.x, pen.y, dimensions, bitmap);
pen.x += glyph.advance_width;
pc = c;
}
dyn_buffer_t bmp_data = dyn_buffer_create();
bmp_format(&bmp_data, dimensions, bitmap);
store_write(ScanCtx.index.store, (char *) doc->uuid, sizeof(doc->uuid), (char *) bmp_data.buf, bmp_data.cur);
dyn_buffer_destroy(&bmp_data);
free(bitmap);
FT_Done_Face(face);
}

View File

@@ -1,9 +0,0 @@
#ifndef SIST2_FONT_H
#define SIST2_FONT_H
#include "src/sist.h"
void parse_font(const char * buf, size_t buf_len, document_t *doc);
#endif

View File

@@ -1,381 +0,0 @@
#include "src/sist.h"
#include "src/ctx.h"
#define MIN_SIZE 32
#define AVIO_BUF_SIZE 8192
__always_inline
AVCodecContext *alloc_jpeg_encoder(int dstW, int dstH, float qscale) {
AVCodec *jpeg_codec = avcodec_find_encoder(AV_CODEC_ID_MJPEG);
AVCodecContext *jpeg = avcodec_alloc_context3(jpeg_codec);
jpeg->width = dstW;
jpeg->height = dstH;
jpeg->time_base.den = 1000000;
jpeg->time_base.num = 1;
jpeg->i_quant_factor = qscale;
jpeg->pix_fmt = AV_PIX_FMT_YUVJ420P;
int ret = avcodec_open2(jpeg, jpeg_codec, NULL);
if (ret != 0) {
printf("Could not open jpeg encoder: %s!\n", av_err2str(ret));
return NULL;
}
return jpeg;
}
__always_inline
AVFrame *scale_frame(const AVCodecContext *decoder, const AVFrame *frame, int size) {
int dstW;
int dstH;
if (frame->width <= size && frame->height <= size) {
dstW = frame->width;
dstH = frame->height;
} else {
double ratio = (double) frame->width / frame->height;
if (frame->width > frame->height) {
dstW = size;
dstH = (int) (size / ratio);
} else {
dstW = (int) (size * ratio);
dstH = size;
}
}
if (dstW <= MIN_SIZE || dstH <= MIN_SIZE) {
return NULL;
}
AVFrame *scaled_frame = av_frame_alloc();
struct SwsContext *ctx = sws_getContext(
decoder->width, decoder->height, decoder->pix_fmt,
dstW, dstH, AV_PIX_FMT_YUVJ420P,
SWS_FAST_BILINEAR, 0, 0, 0
);
int dst_buf_len = av_image_get_buffer_size(AV_PIX_FMT_YUV420P, dstW, dstH, 1);
uint8_t *dst_buf = (uint8_t *) av_malloc(dst_buf_len);
av_image_fill_arrays(scaled_frame->data, scaled_frame->linesize, dst_buf, AV_PIX_FMT_YUV420P, dstW, dstH, 1);
sws_scale(ctx,
(const uint8_t *const *) frame->data, frame->linesize,
0, decoder->height,
scaled_frame->data, scaled_frame->linesize
);
scaled_frame->width = dstW;
scaled_frame->height = dstH;
scaled_frame->format = AV_PIX_FMT_YUV420P;
sws_freeContext(ctx);
return scaled_frame;
}
__always_inline
AVFrame *read_frame(AVFormatContext *pFormatCtx, AVCodecContext *decoder, int stream_idx, document_t *doc) {
AVFrame *frame = av_frame_alloc();
AVPacket avPacket;
av_init_packet(&avPacket);
int receive_ret = -EAGAIN;
while (receive_ret == -EAGAIN) {
// Get video frame
while (1) {
int read_frame_ret = av_read_frame(pFormatCtx, &avPacket);
if (read_frame_ret != 0) {
if (read_frame_ret != AVERROR_EOF) {
LOG_WARNINGF(doc->filepath,
"(media.c) avcodec_read_frame() returned error code [%d] %s",
read_frame_ret, av_err2str(read_frame_ret)
)
}
av_frame_free(&frame);
av_packet_unref(&avPacket);
return NULL;
}
//Ignore audio/other frames
if (avPacket.stream_index != stream_idx) {
av_packet_unref(&avPacket);
continue;
}
break;
}
// Feed it to decoder
int decode_ret = avcodec_send_packet(decoder, &avPacket);
if (decode_ret != 0) {
LOG_WARNINGF(doc->filepath,
"(media.c) avcodec_send_packet() returned error code [%d] %s",
decode_ret, av_err2str(decode_ret)
)
}
av_packet_unref(&avPacket);
receive_ret = avcodec_receive_frame(decoder, frame);
}
return frame;
}
#define APPEND_TAG_META(doc, tag_, keyname) \
text_buffer_t tex = text_buffer_create(-1); \
text_buffer_append_string0(&tex, tag_->value); \
text_buffer_terminate_string(&tex); \
meta_line_t *meta_tag = malloc(sizeof(meta_line_t) + tex.dyn_buffer.cur); \
meta_tag->key = keyname; \
strcpy(meta_tag->strval, tex.dyn_buffer.buf); \
APPEND_META(doc, meta_tag) \
text_buffer_destroy(&tex);
__always_inline
void append_audio_meta(AVFormatContext *pFormatCtx, document_t *doc) {
AVDictionaryEntry *tag = NULL;
while ((tag = av_dict_get(pFormatCtx->metadata, "", tag, AV_DICT_IGNORE_SUFFIX))) {
char key[32];
strncpy(key, tag->key, sizeof(key));
char *ptr = key;
for (; *ptr; ++ptr) *ptr = (char) tolower(*ptr);
if (strcmp(key, "artist") == 0) {
APPEND_TAG_META(doc, tag, MetaArtist)
} else if (strcmp(key, "genre") == 0) {
APPEND_TAG_META(doc, tag, MetaGenre)
} else if (strcmp(key, "title") == 0) {
APPEND_TAG_META(doc, tag, MetaTitle)
} else if (strcmp(key, "album_artist") == 0) {
APPEND_TAG_META(doc, tag, MetaAlbumArtist)
} else if (strcmp(key, "album") == 0) {
APPEND_TAG_META(doc, tag, MetaAlbum)
}
}
}
__always_inline
void
append_video_meta(AVFormatContext *pFormatCtx, AVFrame *frame, document_t *doc, int include_audio_tags, int is_video) {
if (is_video) {
meta_line_t *meta_duration = malloc(sizeof(meta_line_t));
meta_duration->key = MetaMediaDuration;
meta_duration->longval = pFormatCtx->duration / AV_TIME_BASE;
APPEND_META(doc, meta_duration)
meta_line_t *meta_bitrate = malloc(sizeof(meta_line_t));
meta_bitrate->key = MetaMediaBitrate;
meta_bitrate->longval = pFormatCtx->bit_rate;
APPEND_META(doc, meta_bitrate)
}
AVDictionaryEntry *tag = NULL;
if (is_video) {
while ((tag = av_dict_get(pFormatCtx->metadata, "", tag, AV_DICT_IGNORE_SUFFIX))) {
if (include_audio_tags && strcmp(tag->key, "title") == 0) {
APPEND_TAG_META(doc, tag, MetaTitle)
} else if (strcmp(tag->key, "comment") == 0) {
APPEND_TAG_META(doc, tag, MetaContent)
} else if (include_audio_tags && strcmp(tag->key, "artist") == 0) {
APPEND_TAG_META(doc, tag, MetaArtist)
}
}
} else {
// EXIF metadata
while ((tag = av_dict_get(frame->metadata, "", tag, AV_DICT_IGNORE_SUFFIX))) {
if (include_audio_tags && strcmp(tag->key, "Artist") == 0) {
APPEND_TAG_META(doc, tag, MetaArtist)
} else if (strcmp(tag->key, "ImageDescription") == 0) {
APPEND_TAG_META(doc, tag, MetaContent)
}
}
}
}
void parse_media(AVFormatContext *pFormatCtx, document_t *doc) {
int video_stream = -1;
int audio_stream = -1;
avformat_find_stream_info(pFormatCtx, NULL);
for (int i = (int) pFormatCtx->nb_streams - 1; i >= 0; i--) {
AVStream *stream = pFormatCtx->streams[i];
if (stream->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) {
if (audio_stream == -1) {
meta_line_t *meta_audio = malloc(sizeof(meta_line_t));
meta_audio->key = MetaMediaAudioCodec;
meta_audio->intval = stream->codecpar->codec_id;
APPEND_META(doc, meta_audio)
append_audio_meta(pFormatCtx, doc);
audio_stream = i;
}
} else if (stream->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) {
if (video_stream == -1) {
meta_line_t *meta_vid = malloc(sizeof(meta_line_t));
meta_vid->key = MetaMediaVideoCodec;
meta_vid->intval = stream->codecpar->codec_id;
APPEND_META(doc, meta_vid)
meta_line_t *meta_w = malloc(sizeof(meta_line_t));
meta_w->key = MetaWidth;
meta_w->intval = stream->codecpar->width;
APPEND_META(doc, meta_w)
meta_line_t *meta_h = malloc(sizeof(meta_line_t));
meta_h->key = MetaHeight;
meta_h->intval = stream->codecpar->height;
APPEND_META(doc, meta_h)
video_stream = i;
}
}
}
if (video_stream != -1 && ScanCtx.tn_size > 0) {
AVStream *stream = pFormatCtx->streams[video_stream];
if (stream->codecpar->width <= MIN_SIZE || stream->codecpar->height <= MIN_SIZE) {
avformat_close_input(&pFormatCtx);
avformat_free_context(pFormatCtx);
return;
}
// Decoder
AVCodec *video_codec = avcodec_find_decoder(stream->codecpar->codec_id);
AVCodecContext *decoder = avcodec_alloc_context3(video_codec);
avcodec_parameters_to_context(decoder, stream->codecpar);
avcodec_open2(decoder, video_codec, NULL);
//Seek
if (stream->nb_frames > 1 && stream->codecpar->codec_id != AV_CODEC_ID_GIF) {
int seek_ret = 0;
for (int i = 20; i >= 0; i--) {
seek_ret = av_seek_frame(pFormatCtx, video_stream,
stream->duration * 0.10, 0);
if (seek_ret == 0) {
break;
}
}
}
AVFrame *frame = read_frame(pFormatCtx, decoder, video_stream, doc);
if (frame == NULL) {
avcodec_free_context(&decoder);
avformat_close_input(&pFormatCtx);
avformat_free_context(pFormatCtx);
return;
}
append_video_meta(pFormatCtx, frame, doc, audio_stream == -1, stream->nb_frames > 1);
// Scale frame
AVFrame *scaled_frame = scale_frame(decoder, frame, ScanCtx.tn_size);
if (scaled_frame == NULL) {
av_frame_free(&frame);
avcodec_free_context(&decoder);
avformat_close_input(&pFormatCtx);
avformat_free_context(pFormatCtx);
return;
}
// Encode frame to jpeg
AVCodecContext *jpeg_encoder = alloc_jpeg_encoder(scaled_frame->width, scaled_frame->height, ScanCtx.tn_qscale);
avcodec_send_frame(jpeg_encoder, scaled_frame);
AVPacket jpeg_packet;
av_init_packet(&jpeg_packet);
avcodec_receive_packet(jpeg_encoder, &jpeg_packet);
// Save thumbnail
store_write(ScanCtx.index.store, (char *) doc->uuid, sizeof(doc->uuid), (char *) jpeg_packet.data,
jpeg_packet.size);
av_packet_unref(&jpeg_packet);
av_frame_free(&frame);
av_free(*scaled_frame->data);
av_frame_free(&scaled_frame);
avcodec_free_context(&jpeg_encoder);
avcodec_free_context(&decoder);
}
avformat_close_input(&pFormatCtx);
avformat_free_context(pFormatCtx);
}
void parse_media_filename(const char *filepath, document_t *doc) {
AVFormatContext *pFormatCtx = avformat_alloc_context();
if (pFormatCtx == NULL) {
LOG_ERROR(doc->filepath, "(media.c) Could not allocate context with avformat_alloc_context()")
return;
}
int res = avformat_open_input(&pFormatCtx, filepath, NULL, NULL);
if (res < 0) {
LOG_ERRORF(doc->filepath, "(media.c) avformat_open_input() returned [%d] %s", res, av_err2str(res))
avformat_close_input(&pFormatCtx);
avformat_free_context(pFormatCtx);
return;
}
parse_media(pFormatCtx, doc);
}
int vfile_read(void *ptr, uint8_t *buf, int buf_size) {
struct vfile *f = ptr;
int ret = f->read(f, buf, buf_size);
if (ret == 0) {
return AVERROR_EOF;
}
return ret;
}
void parse_media_vfile(struct vfile *f, document_t *doc) {
AVFormatContext *pFormatCtx = avformat_alloc_context();
if (pFormatCtx == NULL) {
LOG_ERROR(doc->filepath, "(media.c) Could not allocate context with avformat_alloc_context()")
return;
}
unsigned char *buffer = (unsigned char *) av_malloc(AVIO_BUF_SIZE);
AVIOContext *io_ctx = avio_alloc_context(buffer, AVIO_BUF_SIZE, 0, f, vfile_read, NULL, NULL);
pFormatCtx->pb = io_ctx;
pFormatCtx->flags |= AVFMT_FLAG_CUSTOM_IO;
int res = avformat_open_input(&pFormatCtx, "", NULL, NULL);
if (res == -5) {
// Tried to parse media that requires seek
av_free(io_ctx->buffer);
avio_context_free(&io_ctx);
avformat_close_input(&pFormatCtx);
avformat_free_context(pFormatCtx);
return;
} else if (res < 0) {
LOG_ERRORF(doc->filepath, "(media.c) avformat_open_input() returned [%d] %s", res, av_err2str(res))
av_free(io_ctx->buffer);
avio_context_free(&io_ctx);
avformat_close_input(&pFormatCtx);
avformat_free_context(pFormatCtx);
return;
}
parse_media(pFormatCtx, doc);
av_free(io_ctx->buffer);
avio_context_free(&io_ctx);
}

View File

@@ -1,14 +0,0 @@
#ifndef SIST2_MEDIA_H
#define SIST2_MEDIA_H
#include "src/sist.h"
#define MIN_VIDEO_SIZE 1024 * 64
#define MIN_IMAGE_SIZE 1024 * 2
void parse_media_filename(const char * filepath, document_t *doc);
void parse_media_vfile(struct vfile *f, document_t *doc);
#endif

View File

@@ -1,14 +1,15 @@
#ifndef SIST2_MIME_H
#define SIST2_MIME_H
#include "src/sist.h"
#include "../sist.h"
#define MAJOR_MIME(mime_id) (mime_id & 0x0FFF0000) >> 16
#define MAJOR_MIME(mime_id) (mime_id & 0x000F0000) >> 16
#define MIME_EMPTY 1
#define MIME_SIST2_SIDECAR 2
#define DONT_PARSE 0x80000000
#define SHOULD_PARSE(mime_id) (mime_id & DONT_PARSE) != DONT_PARSE && mime_id != 0
#define SHOULD_PARSE(mime_id) (ScanCtx.fast == 0 && (mime_id & DONT_PARSE) != DONT_PARSE && mime_id != 0)
#define PDF_MASK 0x40000000
#define IS_PDF(mime_id) (mime_id & PDF_MASK) == PDF_MASK
@@ -25,6 +26,15 @@
#define DOC_MASK 0x04000000
#define IS_DOC(mime_id) (mime_id & DOC_MASK) == DOC_MASK
#define MOBI_MASK 0x02000000
#define IS_MOBI(mime_id) (mime_id & MOBI_MASK) == MOBI_MASK
#define MARKUP_MASK 0x01000000
#define IS_MARKUP(mime_id) (mime_id & MARKUP_MASK) == MARKUP_MASK
#define RAW_MASK 0x00800000
#define IS_RAW(mime_id) (mime_id & RAW_MASK) == RAW_MASK
enum major_mime {
MimeInvalid = 0,
MimeModel = 1,

File diff suppressed because it is too large Load Diff

View File

@@ -1,7 +1,16 @@
#include "parse.h"
#include "src/sist.h"
#include "src/ctx.h"
#include "mime.h"
#include "src/io/serialize.h"
#include "src/parsing/sidecar.h"
__thread magic_t Magic = NULL;
#include <magic.h>
#define MIN_VIDEO_SIZE 1024 * 64
#define MIN_IMAGE_SIZE 1024 * 2
int fs_read(struct vfile *f, void *buf, size_t size) {
@@ -24,62 +33,47 @@ void fs_close(struct vfile *f) {
}
}
void *read_all(parse_job_t *job, const char *buf, int bytes_read) {
void *full_buf;
if (job->info.st_size <= bytes_read) {
full_buf = malloc(job->info.st_size);
memcpy(full_buf, buf, job->info.st_size);
} else {
full_buf = malloc(job->info.st_size);
memcpy(full_buf, buf, bytes_read);
int ret = job->vfile.read(&job->vfile, full_buf + bytes_read, job->info.st_size - bytes_read);
if (ret == -1) {
LOG_ERRORF(job->filepath, "read(): [%d] %s", errno, strerror(errno))
return NULL;
void fs_reset(struct vfile *f) {
if (f->fd != -1) {
lseek(f->fd, 0, SEEK_SET);
}
}
return full_buf;
}
#define IS_GIT_OBJ (strlen(doc.filepath + doc.base) == 38 && (strstr(doc.filepath, "objects") != NULL))
void parse(void *arg) {
parse_job_t *job = arg;
document_t doc;
int inc_ts = incremental_get(ScanCtx.original_table, job->info.st_ino);
if (inc_ts != 0 && inc_ts == job->info.st_mtim.tv_sec) {
incremental_mark_file_for_copy(ScanCtx.copy_table, job->info.st_ino);
return;
}
if (Magic == NULL) {
Magic = magic_open(MAGIC_MIME_TYPE);
}
doc.filepath = job->filepath;
doc.ext = (short) job->ext;
doc.base = (short) job->base;
char *rel_path = doc.filepath + ScanCtx.index.desc.root_len;
MD5((unsigned char *) rel_path, strlen(rel_path), doc.path_md5);
doc.meta_head = NULL;
doc.meta_tail = NULL;
doc.mime = 0;
doc.size = job->info.st_size;
doc.ino = job->info.st_ino;
doc.mtime = job->info.st_mtim.tv_sec;
doc.size = job->vfile.info.st_size;
doc.mtime = job->vfile.info.st_mtim.tv_sec;
uuid_generate(doc.uuid);
char *buf[PARSE_BUF_SIZE];
if (LogCtx.very_verbose) {
char uuid_str[UUID_STR_LEN];
uuid_unparse(doc.uuid, uuid_str);
LOG_DEBUGF(job->filepath, "Starting parse job {%s}", uuid_str)
int inc_ts = incremental_get(ScanCtx.original_table, doc.path_md5);
if (inc_ts != 0 && inc_ts == job->vfile.info.st_mtim.tv_sec) {
incremental_mark_file_for_copy(ScanCtx.copy_table, doc.path_md5);
return;
}
if (job->info.st_size == 0) {
char *buf[MAGIC_BUF_SIZE];
if (LogCtx.very_verbose) {
char path_md5_str[MD5_STR_LENGTH];
buf2hex(doc.path_md5, MD5_DIGEST_LENGTH, path_md5_str);
LOG_DEBUGF(job->filepath, "Starting parse job {%s}", path_md5_str)
}
if (job->vfile.info.st_size == 0) {
doc.mime = MIME_EMPTY;
} else if (*(job->filepath + job->ext) != '\0' && (job->ext - job->base != 1)) {
doc.mime = mime_get_mime_by_ext(ScanCtx.ext_table, job->filepath + job->ext);
@@ -87,82 +81,113 @@ void parse(void *arg) {
int bytes_read = 0;
if (doc.mime == 0) {
if (doc.mime == 0 && !ScanCtx.fast) {
if (IS_GIT_OBJ) {
goto abort;
}
// Get mime type with libmagic
bytes_read = job->vfile.read(&job->vfile, buf, PARSE_BUF_SIZE);
if (bytes_read == -1) {
LOG_WARNINGF(job->filepath, "read() Error: %s", strerror(errno))
if (!job->vfile.is_fs_file) {
LOG_WARNING(job->filepath,
"Guessing mime type with libmagic inside archive files is not currently supported");
goto abort;
}
bytes_read = job->vfile.read(&job->vfile, buf, MAGIC_BUF_SIZE);
if (bytes_read < 0) {
if (job->vfile.is_fs_file) {
LOG_ERRORF(job->filepath, "read(): [%d] %s", errno, strerror(errno))
} else {
LOG_ERRORF(job->filepath, "(virtual) read(): [%d] %s", bytes_read, archive_error_string(job->vfile.arc))
}
CLOSE_FILE(job->vfile)
return;
}
const char *magic_mime_str = magic_buffer(Magic, buf, bytes_read);
magic_t magic = magic_open(MAGIC_MIME_TYPE);
magic_load(magic, NULL);
const char *magic_mime_str = magic_buffer(magic, buf, bytes_read);
if (magic_mime_str != NULL) {
doc.mime = mime_get_mime_by_string(ScanCtx.mime_table, magic_mime_str);
LOG_DEBUGF(job->filepath, "libmagic: %s", magic_mime_str);
if (doc.mime == 0) {
LOG_WARNINGF(job->filepath, "Couldn't find mime %s", magic_mime_str);
}
}
job->vfile.reset(&job->vfile);
magic_close(magic);
}
int mmime = MAJOR_MIME(doc.mime);
if (!(SHOULD_PARSE(doc.mime))) {
} else if (IS_RAW(doc.mime)) {
parse_raw(&ScanCtx.raw_ctx, &job->vfile, &doc);
} else if ((mmime == MimeVideo && doc.size >= MIN_VIDEO_SIZE) ||
(mmime == MimeImage && doc.size >= MIN_IMAGE_SIZE) || mmime == MimeAudio) {
if (job->vfile.is_fs_file) {
parse_media_filename(job->filepath, &doc);
} else {
parse_media_vfile(&job->vfile, &doc);
}
parse_media(&ScanCtx.media_ctx, &job->vfile, &doc);
} else if (IS_PDF(doc.mime)) {
void *pdf_buf = read_all(job, (char *) buf, bytes_read);
parse_pdf(pdf_buf, doc.size, &doc);
parse_ebook(&ScanCtx.ebook_ctx, &job->vfile, mime_get_mime_text(doc.mime), &doc);
if (pdf_buf != buf && pdf_buf != NULL) {
free(pdf_buf);
} else if (mmime == MimeText && ScanCtx.text_ctx.content_size > 0) {
if (IS_MARKUP(doc.mime)) {
parse_markup(&ScanCtx.text_ctx, &job->vfile, &doc);
} else {
parse_text(&ScanCtx.text_ctx, &job->vfile, &doc);
}
} else if (mmime == MimeText && ScanCtx.content_size > 0) {
parse_text(bytes_read, &job->vfile, (char *) buf, &doc);
} else if (IS_FONT(doc.mime)) {
void *font_buf = read_all(job, (char *) buf, bytes_read);
parse_font(font_buf, doc.size, &doc);
parse_font(&ScanCtx.font_ctx, &job->vfile, &doc);
if (font_buf != buf && font_buf != NULL) {
free(font_buf);
}
} else if (
ScanCtx.archive_mode != ARC_MODE_SKIP && (
ScanCtx.arc_ctx.mode != ARC_MODE_SKIP && (
IS_ARC(doc.mime) ||
(IS_ARC_FILTER(doc.mime) && should_parse_filtered_file(doc.filepath, doc.ext))
)) {
parse_archive(&job->vfile, &doc);
} else if (ScanCtx.content_size > 0 && IS_DOC(doc.mime)) {
void *doc_buf = read_all(job, (char *) buf, bytes_read);
parse_doc(doc_buf, doc.size, &doc);
parse_archive(&ScanCtx.arc_ctx, &job->vfile, &doc);
} else if ((ScanCtx.ooxml_ctx.content_size > 0 || ScanCtx.media_ctx.tn_size > 0) && IS_DOC(doc.mime)) {
parse_ooxml(&ScanCtx.ooxml_ctx, &job->vfile, &doc);
} else if (is_cbr(&ScanCtx.comic_ctx, doc.mime) || is_cbz(&ScanCtx.comic_ctx, doc.mime)) {
parse_comic(&ScanCtx.comic_ctx, &job->vfile, &doc);
} else if (IS_MOBI(doc.mime)) {
parse_mobi(&ScanCtx.mobi_ctx, &job->vfile, &doc);
} else if (doc.mime == MIME_SIST2_SIDECAR) {
parse_sidecar(&job->vfile, &doc);
CLOSE_FILE(job->vfile)
return;
} else if (is_msdoc(&ScanCtx.msdoc_ctx, doc.mime)) {
parse_msdoc(&ScanCtx.msdoc_ctx, &job->vfile, &doc);
}
if (doc_buf != buf && doc_buf != NULL) {
free(doc_buf);
}
}
abort:
//Parent meta
if (!uuid_is_null(job->parent)) {
char tmp[UUID_STR_LEN];
uuid_unparse(job->parent, tmp);
meta_line_t *meta_parent = malloc(sizeof(meta_line_t) + UUID_STR_LEN + 1);
if (!md5_digest_is_null(job->parent)) {
meta_line_t *meta_parent = malloc(sizeof(meta_line_t) + MD5_STR_LENGTH);
meta_parent->key = MetaParent;
strcpy(meta_parent->strval, tmp);
buf2hex(job->parent, MD5_DIGEST_LENGTH, meta_parent->str_val);
APPEND_META((&doc), meta_parent)
doc.has_parent = TRUE;
} else {
doc.has_parent = FALSE;
}
write_document(&doc);
CLOSE_FILE(job->vfile)
}
void cleanup_parse() {
// noop
}

View File

@@ -1,13 +1,16 @@
#ifndef SIST2_PARSE_H
#define SIST2_PARSE_H
#include "src/sist.h"
#include "../sist.h"
#define PARSE_BUF_SIZE 4096
#define MAGIC_BUF_SIZE 4096 * 6
int fs_read(struct vfile *f, void *buf, size_t size);
void fs_close(struct vfile *f);
void fs_reset(struct vfile *f);
void parse(void *arg);
void cleanup_parse();
#endif

View File

@@ -1,336 +0,0 @@
#include "pdf.h"
#include "src/ctx.h"
#define MIN_OCR_SIZE 128
__thread text_buffer_t thread_buffer;
fz_page *render_cover(fz_context *ctx, document_t *doc, fz_document *fzdoc) {
int err = 0;
fz_page *cover = NULL;
fz_var(cover);
fz_try(ctx)
cover = fz_load_page(ctx, fzdoc, 0);
fz_catch(ctx)
err = 1;
if (err != 0) {
fz_drop_page(ctx, cover);
LOG_WARNINGF(doc->filepath, "fz_load_page() returned error code [%d] %s", err, ctx->error.message)
return NULL;
}
fz_rect bounds = fz_bound_page(ctx, cover);
float scale;
float w = (float) bounds.x1 - bounds.x0;
float h = (float) bounds.y1 - bounds.y0;
if (w > h) {
scale = (float) ScanCtx.tn_size / w;
} else {
scale = (float) ScanCtx.tn_size / h;
}
fz_matrix m = fz_scale(scale, scale);
bounds = fz_transform_rect(bounds, m);
fz_irect bbox = fz_round_rect(bounds);
fz_pixmap *pixmap = fz_new_pixmap_with_bbox(ctx, ctx->colorspace->rgb, bbox, NULL, 0);
fz_clear_pixmap_with_value(ctx, pixmap, 0xFF);
fz_device *dev = fz_new_draw_device(ctx, m, pixmap);
fz_var(err);
fz_try(ctx)
{
pthread_mutex_lock(&ScanCtx.mupdf_mu);
fz_run_page(ctx, cover, dev, fz_identity, NULL);
}
fz_always(ctx)
{
fz_close_device(ctx, dev);
fz_drop_device(ctx, dev);
pthread_mutex_unlock(&ScanCtx.mupdf_mu);
}
fz_catch(ctx)
err = ctx->error.errcode;
if (err != 0) {
LOG_WARNINGF(doc->filepath, "fz_run_page() returned error code [%d] %s", err, ctx->error.message)
fz_drop_page(ctx, cover);
fz_drop_pixmap(ctx, pixmap);
return NULL;
}
fz_buffer *fzbuf = NULL;
fz_var(fzbuf);
fz_var(err);
fz_try(ctx)
fzbuf = fz_new_buffer_from_pixmap_as_png(ctx, pixmap, fz_default_color_params);
fz_catch(ctx)
err = ctx->error.errcode;
if (err == 0) {
unsigned char *tn_buf;
size_t tn_len = fz_buffer_storage(ctx, fzbuf, &tn_buf);
store_write(ScanCtx.index.store, (char *) doc->uuid, sizeof(doc->uuid), (char *) tn_buf, tn_len);
}
fz_drop_buffer(ctx, fzbuf);
fz_drop_pixmap(ctx, pixmap);
if (err != 0) {
LOG_WARNINGF(doc->filepath, "fz_new_buffer_from_pixmap_as_png() returned error code [%d] %s", err,
ctx->error.message)
fz_drop_page(ctx, cover);
return NULL;
}
return cover;
}
void fz_err_callback(void *user, UNUSED(const char *message)) {
if (LogCtx.verbose) {
document_t *doc = (document_t *) user;
LOG_WARNINGF(doc->filepath, "FZ: %s", message)
}
}
__always_inline
void init_ctx(fz_context *ctx, document_t *doc) {
fz_disable_icc(ctx);
fz_register_document_handlers(ctx);
ctx->warn.print_user = doc;
ctx->warn.print = fz_err_callback;
ctx->error.print_user = doc;
ctx->error.print = fz_err_callback;
}
int read_stext_block(fz_stext_block *block, text_buffer_t *tex) {
if (block->type != FZ_STEXT_BLOCK_TEXT) {
return 0;
}
fz_stext_line *line = block->u.t.first_line;
while (line != NULL) {
fz_stext_char *c = line->first_char;
while (c != NULL) {
if (text_buffer_append_char(tex, c->c) == TEXT_BUF_FULL) {
return TEXT_BUF_FULL;
}
c = c->next;
}
line = line->next;
}
return 0;
}
void fill_image(fz_context *ctx, UNUSED(fz_device *dev),
fz_image *img, UNUSED(fz_matrix ctm), UNUSED(float alpha),
UNUSED(fz_color_params color_params)) {
int l2factor = 0;
if (img->w > MIN_OCR_SIZE && img->h > MIN_OCR_SIZE) {
fz_pixmap *pix = img->get_pixmap(ctx, img, NULL, img->w, img->h, &l2factor);
if (pix->h > MIN_OCR_SIZE && img->h > MIN_OCR_SIZE && img->xres != 0) {
TessBaseAPI *api = TessBaseAPICreate();
TessBaseAPIInit3(api, TESS_DATAPATH, ScanCtx.tesseract_lang);
TessBaseAPISetImage(api, pix->samples, pix->w, pix->h, pix->n, pix->stride);
TessBaseAPISetSourceResolution(api, pix->xres);
char *text = TessBaseAPIGetUTF8Text(api);
size_t len = strlen(text);
text_buffer_append_string(&thread_buffer, text, len - 1);
LOG_DEBUGF(
"pdf.c",
"(OCR) %dx%d got %dB from tesseract (%s), buffer:%dB",
pix->w, pix->h, len, ScanCtx.tesseract_lang, thread_buffer.dyn_buffer.cur
)
TessBaseAPIEnd(api);
TessBaseAPIDelete(api);
fz_drop_pixmap(ctx, pix);
}
}
}
void parse_pdf(void *buf, size_t buf_len, document_t *doc) {
if (buf == NULL) {
return;
}
static int mu_is_initialized = 0;
if (!mu_is_initialized) {
pthread_mutex_init(&ScanCtx.mupdf_mu, NULL);
mu_is_initialized = 1;
}
fz_context *ctx = fz_new_context(NULL, NULL, FZ_STORE_UNLIMITED);
init_ctx(ctx, doc);
int err = 0;
fz_document *fzdoc = NULL;
fz_stream *stream = NULL;
fz_var(fzdoc);
fz_var(stream);
fz_var(err);
fz_try(ctx)
{
stream = fz_open_memory(ctx, buf, buf_len);
fzdoc = fz_open_document_with_stream(ctx, mime_get_mime_text(doc->mime), stream);
}
fz_catch(ctx)
err = ctx->error.errcode;
if (err) {
fz_drop_stream(ctx, stream);
fz_drop_document(ctx, fzdoc);
fz_drop_context(ctx);
return;
}
char title[4096] = {'\0',};
fz_try(ctx)
fz_lookup_metadata(ctx, fzdoc, FZ_META_INFO_TITLE, title, sizeof(title));
fz_catch(ctx)
;
if (strlen(title) > 0) {
meta_line_t *meta_content = malloc(sizeof(meta_line_t) + strlen(title));
meta_content->key = MetaTitle;
strcpy(meta_content->strval, title);
APPEND_META(doc, meta_content)
}
int page_count = -1;
fz_var(err);
fz_try(ctx)
page_count = fz_count_pages(ctx, fzdoc);
fz_catch(ctx)
err = ctx->error.errcode;
if (err) {
LOG_WARNINGF(doc->filepath, "fz_count_pages() returned error code [%d] %s", err, ctx->error.message)
fz_drop_stream(ctx, stream);
fz_drop_document(ctx, fzdoc);
fz_drop_context(ctx);
return;
}
fz_page *cover = NULL;
if (ScanCtx.tn_size > 0) {
cover = render_cover(ctx, doc, fzdoc);
} else {
fz_var(cover);
fz_try(ctx)
cover = fz_load_page(ctx, fzdoc, 0);
fz_catch(ctx)
cover = NULL;
}
if (cover == NULL) {
fz_drop_stream(ctx, stream);
fz_drop_document(ctx, fzdoc);
fz_drop_context(ctx);
return;
}
if (ScanCtx.content_size > 0) {
fz_stext_options opts = {0};
thread_buffer = text_buffer_create(ScanCtx.content_size);
for (int current_page = 0; current_page < page_count; current_page++) {
fz_page *page = NULL;
if (current_page == 0) {
page = cover;
} else {
fz_var(err);
fz_try(ctx)
page = fz_load_page(ctx, fzdoc, current_page);
fz_catch(ctx)
err = ctx->error.errcode;
if (err != 0) {
LOG_WARNINGF(doc->filepath, "fz_load_page() returned error code [%d] %s", err, ctx->error.message)
text_buffer_destroy(&thread_buffer);
fz_drop_page(ctx, page);
fz_drop_stream(ctx, stream);
fz_drop_document(ctx, fzdoc);
fz_drop_context(ctx);
return;
}
}
fz_stext_page *stext = fz_new_stext_page(ctx, fz_bound_page(ctx, page));
fz_device *dev = fz_new_stext_device(ctx, stext, &opts);
dev->stroke_path = NULL;
dev->stroke_text = NULL;
dev->clip_text = NULL;
dev->clip_stroke_path = NULL;
dev->clip_stroke_text = NULL;
if (ScanCtx.tesseract_lang != NULL) {
dev->fill_image = fill_image;
}
fz_var(err);
fz_try(ctx)
fz_run_page(ctx, page, dev, fz_identity, NULL);
fz_always(ctx)
{
fz_close_device(ctx, dev);
fz_drop_device(ctx, dev);
}
fz_catch(ctx)
err = ctx->error.errcode;
if (err != 0) {
LOG_WARNINGF(doc->filepath, "fz_run_page() returned error code [%d] %s", err, ctx->error.message)
text_buffer_destroy(&thread_buffer);
fz_drop_page(ctx, page);
fz_drop_stext_page(ctx, stext);
fz_drop_stream(ctx, stream);
fz_drop_document(ctx, fzdoc);
fz_drop_context(ctx);
return;
}
fz_stext_block *block = stext->first_block;
while (block != NULL) {
int ret = read_stext_block(block, &thread_buffer);
if (ret == TEXT_BUF_FULL) {
break;
}
block = block->next;
}
fz_drop_stext_page(ctx, stext);
fz_drop_page(ctx, page);
if (thread_buffer.dyn_buffer.cur >= thread_buffer.dyn_buffer.size) {
break;
}
}
text_buffer_terminate_string(&thread_buffer);
meta_line_t *meta_content = malloc(sizeof(meta_line_t) + thread_buffer.dyn_buffer.cur);
meta_content->key = MetaContent;
memcpy(meta_content->strval, thread_buffer.dyn_buffer.buf, thread_buffer.dyn_buffer.cur);
APPEND_META(doc, meta_content)
text_buffer_destroy(&thread_buffer);
}
fz_drop_stream(ctx, stream);
fz_drop_document(ctx, fzdoc);
fz_drop_context(ctx);
}

View File

@@ -1,9 +0,0 @@
#ifndef SIST2_PDF_H
#define SIST2_PDF_H
#include "src/sist.h"
void parse_pdf(void *buf, size_t buf_len, document_t *doc);
#endif

Some files were not shown because too many files have changed in this diff Show More