Compare commits

...

46 Commits

Author SHA1 Message Date
4e1109c528 Merge pull request #288 from simon987/dev
v2.12.1
2022-04-23 10:30:19 -04:00
f87de89275 Version bump 2022-04-23 10:29:50 -04:00
1205981a11 CURL error handling, fix ES version handling, support for ES8, add --es-insecure-ssl argument 2022-04-23 10:29:31 -04:00
09613eaaf9 import magic database as a blob as last resort to make it work 2022-04-18 12:55:22 -04:00
a74726be55 Merge pull request #285 from simon987/dependabot/npm_and_yarn/sist2-vue/async-2.6.4
Bump async from 2.6.3 to 2.6.4 in /sist2-vue
2022-04-17 13:42:40 -04:00
dependabot[bot]
cb228052d2 Bump async from 2.6.3 to 2.6.4 in /sist2-vue
Bumps [async](https://github.com/caolan/async) from 2.6.3 to 2.6.4.
- [Release notes](https://github.com/caolan/async/releases)
- [Changelog](https://github.com/caolan/async/blob/v2.6.4/CHANGELOG.md)
- [Commits](https://github.com/caolan/async/compare/v2.6.3...v2.6.4)

---
updated-dependencies:
- dependency-name: async
  dependency-type: indirect
...

Signed-off-by: dependabot[bot] <support@github.com>
2022-04-17 17:41:14 +00:00
fe56da95d5 Merge pull request #284 from simon987/dev
v2.12.0
2022-04-17 13:38:42 -04:00
9f2ad58f78 bump version 2022-04-17 12:30:14 -04:00
84d9bf4323 Fix cmake libmobi build maybe 2022-04-17 12:23:45 -04:00
90aa90f3f3 Update antiword 2022-04-17 11:47:33 -04:00
3fad07360c Merge pull request #283 from simon987/dependabot/npm_and_yarn/sist2-vue/minimist-1.2.6
Bump minimist from 1.2.5 to 1.2.6 in /sist2-vue
2022-04-17 10:12:10 -04:00
dependabot[bot]
00c3a640d0 Bump minimist from 1.2.5 to 1.2.6 in /sist2-vue
Bumps [minimist](https://github.com/substack/minimist) from 1.2.5 to 1.2.6.
- [Release notes](https://github.com/substack/minimist/releases)
- [Commits](https://github.com/substack/minimist/compare/1.2.5...1.2.6)

---
updated-dependencies:
- dependency-name: minimist
  dependency-type: indirect
...

Signed-off-by: dependabot[bot] <support@github.com>
2022-04-17 12:53:12 +00:00
730e495bde Enable highlight in document info modal, remove /d/ endpoint 2022-04-16 16:11:17 -04:00
54df1dfcf7 Fix spacebar not working in search bar 2022-04-16 13:51:36 -04:00
a75675ecea Fix thumbnail copy bug, update tests 2022-04-16 11:48:43 -04:00
901035da15 Build libmobi with cmake, update to 0.10 2022-04-15 16:01:40 -04:00
ceb7265639 Fix max_analyzed_offset (again?) 2022-04-15 15:35:39 -04:00
036ed9ea1e Update libmagic cmake things 2022-04-15 15:35:20 -04:00
779303a2f7 Print body response when task id cannot be read 2022-04-14 16:24:56 -04:00
23aee14c07 Fix exec-script & fix memory leak in exec_args_validate 2022-04-14 15:43:24 -04:00
50b9201be3 Merge pull request #279 from simon987/dependabot/npm_and_yarn/sist2-vue/minimist-1.2.6
Bump minimist from 1.2.5 to 1.2.6 in /sist2-vue
2022-04-05 20:12:03 -04:00
dependabot[bot]
14cfb15661 Bump minimist from 1.2.5 to 1.2.6 in /sist2-vue
Bumps [minimist](https://github.com/substack/minimist) from 1.2.5 to 1.2.6.
- [Release notes](https://github.com/substack/minimist/releases)
- [Commits](https://github.com/substack/minimist/compare/1.2.5...1.2.6)

---
updated-dependencies:
- dependency-name: minimist
  dependency-type: indirect
...

Signed-off-by: dependabot[bot] <support@github.com>
2022-03-31 23:28:25 +00:00
125c85d9bb localize tag filter bar 2022-03-18 09:15:07 -04:00
474eb95aff Update antiword 2022-03-17 15:08:55 -04:00
acf7453057 Add test for large msdoc 2022-03-17 15:05:48 -04:00
9a949d2694 Use TRUE rather than 1 2022-03-17 09:13:19 -04:00
dbdc75dcb8 Add filter bar in tag picker 2022-03-17 09:12:43 -04:00
c575fca91d Do not store duration or bitrate when the value is 0 or for images 2022-03-05 21:24:59 -05:00
0bf4244683 Do blank search on page reload when media tab auto-reload is disabled 2022-03-05 20:56:02 -05:00
eea5ce75f3 Fix query args updating outside of the search page 2022-03-05 20:42:13 -05:00
9b81856353 Fix some errors in keyboard handler 2022-03-05 20:33:45 -05:00
a10d6952ba Fix segfault in print_errors() 2022-03-05 20:33:21 -05:00
2b639bd4ac Error handling in get_es_version() 2022-03-05 14:59:37 -05:00
e9f92330fd Cleanup macros 2022-03-05 11:18:07 -05:00
cb37a6e6c1 Fix thumbnail bug in serve 2022-03-05 11:18:07 -05:00
b82c26f0fb Add mt_ int_ prefixes in InfoTable 2022-03-05 11:18:06 -05:00
16a4fb4874 Rework document IDs 2022-03-05 11:18:06 -05:00
cdc4c0ad3d Cap maximum thumbnail count to 1000 2022-03-05 11:18:06 -05:00
d034851ecb Setup keyboard shortcuts for Lightbox, add option to disable animations 2022-03-05 11:18:06 -05:00
ea7dfe7c84 Update to mongoose 7.6 2022-03-05 11:18:05 -05:00
8bfd010f4b Update dev ES docker script 2022-03-05 11:18:05 -05:00
499eb2b2e4 Un-break raw file thumbnails 2022-03-05 11:18:05 -05:00
25ab883063 Merge pull request #263 from simon987/dependabot/npm_and_yarn/sist2-vue/url-parse-1.5.10
Bump url-parse from 1.5.4 to 1.5.10 in /sist2-vue
2022-02-28 09:26:15 -05:00
dependabot[bot]
6ab606203f Bump url-parse from 1.5.4 to 1.5.10 in /sist2-vue
Bumps [url-parse](https://github.com/unshiftio/url-parse) from 1.5.4 to 1.5.10.
- [Release notes](https://github.com/unshiftio/url-parse/releases)
- [Commits](https://github.com/unshiftio/url-parse/compare/1.5.4...1.5.10)

---
updated-dependencies:
- dependency-name: url-parse
  dependency-type: indirect
...

Signed-off-by: dependabot[bot] <support@github.com>
2022-02-28 04:23:32 +00:00
6ec98046fa Merge pull request #262 from yatli/fix_261
fix #261: inherit index id from base index when using incremental scan
2022-02-26 11:37:16 -05:00
Yatao Li
4fac81ca6a fix #261: new index ids generated for incremental scan 2022-02-27 00:25:23 +08:00
63 changed files with 931 additions and 522 deletions

3
.gitmodules vendored
View File

@@ -7,3 +7,6 @@
[submodule "third-party/libscan/third-party/antiword"]
path = third-party/libscan/third-party/antiword
url = https://github.com/simon987/antiword
[submodule "third-party/libscan/third-party/libmobi"]
path = third-party/libscan/third-party/libmobi
url = https://github.com/bfabiszewski/libmobi

View File

@@ -4,6 +4,7 @@ set(CMAKE_C_STANDARD 11)
project(sist2 C)
option(SIST_DEBUG "Build a debug executable" on)
option(SIST_FAST "Enable more optimisation flags" off)
option(SIST_FAKE_STORE "Disable IO operations of LMDB stores for debugging purposes" 0)
add_compile_definitions(
@@ -54,6 +55,10 @@ find_package(lmdb CONFIG REQUIRED)
find_package(cJSON CONFIG REQUIRED)
find_package(unofficial-mongoose CONFIG REQUIRED)
find_package(CURL CONFIG REQUIRED)
find_library(MAGIC_LIB
NAMES libmagic.so.1 magic
PATHS /usr/lib/x86_64-linux-gnu/ /usr/lib/aarch64-linux-gnu/
)
target_include_directories(
@@ -93,10 +98,22 @@ if (SIST_DEBUG)
PROPERTIES
OUTPUT_NAME sist2_debug
)
elseif (SIST_FAST)
target_compile_options(
sist2
PRIVATE
-Ofast
-march=native
-fno-stack-protector
-fomit-frame-pointer
-freciprocal-math
)
else ()
target_compile_options(
sist2
PRIVATE
-Ofast
-fno-stack-protector
-fomit-frame-pointer
@@ -121,11 +138,12 @@ target_link_libraries(
CURL::libcurl
pthread
magic
c
scan
${MAGIC_LIB}
)
add_custom_target(

View File

@@ -9,7 +9,7 @@ RUN strip sist2 || mv sist2_debug sist2
FROM --platform="linux/amd64" ubuntu:21.10
RUN apt update && apt install -y curl libasan5 && rm -rf /var/lib/apt/lists/*
RUN apt update && apt install -y curl libasan5 libmagic1 && rm -rf /var/lib/apt/lists/*
RUN mkdir -p /usr/share/tessdata && \
cd /usr/share/tessdata/ && \

View File

@@ -52,7 +52,7 @@ sist2 (Simple incremental search tool)
Select the file corresponding to your CPU architecture and mark the binary as executable with `chmod +x` *
2. *(or)* Download a [development snapshot](https://files.simon987.net/.gate/sist2/simon987_sist2/) *(Not
recommended!)*
3. *(or)* `docker pull simon987/sist2:2.11.7-x64-linux`
3. *(or)* `docker pull simon987/sist2:2.12.1-x64-linux`
1. See [Usage guide](docs/USAGE.md)

View File

@@ -103,7 +103,7 @@ Made by simon987 <me@simon987.net>. Released under GPL-3.0
* `--thumbnail-count`
Maximum number of thumbnails to generate. When set to a value >= 2, thumbnails for video previews
will be generated. The actual number of thumbnails generated depends on the length of the video (maximum 1 image
every ~5s). Set to 0 to completely disable thumbnails.
every ~7s). Set to 0 to completely disable thumbnails.
* `--content-size`
Number of bytes of text to be extracted from the content of files (plain text, PDFs etc.).
Repeated whitespace and special characters do not count toward this limit.
@@ -292,7 +292,7 @@ Both the `root` and `rewrite_url` fields are safe to manually modify from the
# Elasticsearch
Elasticsearch versions >=6.8.0, <8.0.0 are supported by sist2.
Elasticsearch versions >=6.8.0, 7.X.X and 8.X.X are supported by sist2.
Using a version >=7.14.0 is recommended to enable the following features:

View File

@@ -3,7 +3,7 @@
"refresh_interval": "30s",
"codec": "best_compression",
"number_of_replicas": 0,
"highlight.max_analyzed_offset": 10000000
"highlight.max_analyzed_offset": 1000000
},
"analysis": {
"tokenizer": {
@@ -16,7 +16,7 @@
"delimiter": "."
},
"my_nGram_tokenizer": {
"type": "nGram",
"type": "ngram",
"min_gram": 3,
"max_gram": 3
}

View File

@@ -5,5 +5,6 @@ rm -rf index.sist2/
python3 scripts/mime.py > src/parsing/mime_generated.c
python3 scripts/serve_static.py > src/web/static_generated.c
python3 scripts/index_static.py > src/index/static_generated.c
python3 scripts/magic_static.py > src/magic_generated.c
printf "static const char *const Sist2CommitHash = \"%s\";\n" $(git rev-parse HEAD) > src/git_hash.h

8
scripts/magic_static.py Normal file
View File

@@ -0,0 +1,8 @@
try:
with open("/usr/lib/file/magic.mgc", "rb") as f:
data = f.read()
except:
data = bytes([])
print("char magic_database_buffer[%d] = {%s};" % (len(data), ",".join(str(int(b)) for b in data)))

View File

@@ -1,2 +1,3 @@
docker run --rm -it -p 9200:9200 -e "discovery.type=single-node" \
docker run --rm -it --name "sist2-dev-es"\
-p 9200:9200 -e "discovery.type=single-node" \
-e "ES_JAVA_OPTS=-Xms8g -Xmx8g" elasticsearch:7.14.0

3
scripts/start_dev_es_6.sh Executable file
View File

@@ -0,0 +1,3 @@
docker run --rm -it --name "sist2-dev-es-6"\
-p 9202:9200 -e "discovery.type=single-node" \
-e "ES_JAVA_OPTS=-Xms8g -Xmx8g" elasticsearch:6.8.0

3
scripts/start_dev_es_8.sh Executable file
View File

@@ -0,0 +1,3 @@
docker run --rm -it --name "sist2-dev-es"\
-p 9200:9200 -p 9300:9300 -e "discovery.type=single-node" \
-e "ES_JAVA_OPTS=-Xms8g -Xmx8g" elasticsearch:8.1.2

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@@ -12,7 +12,6 @@
"axios": "^0.25.0",
"bootstrap-vue": "^2.21.2",
"core-js": "^3.6.5",
"crypto-es": "^1.2.7",
"d3": "^5.16.0",
"date-fns": "^2.21.3",
"dom-to-image": "^2.6.0",
@@ -3289,9 +3288,9 @@
}
},
"node_modules/async": {
"version": "2.6.3",
"resolved": "https://registry.npmjs.org/async/-/async-2.6.3.tgz",
"integrity": "sha512-zflvls11DCy+dQWzTW2dzuilv8Z5X/pjfmZOWba6TNIVDm+2UDaJmXSOXlasHKfNBs8oo3M0aT50fDEWfKZjXg==",
"version": "2.6.4",
"resolved": "https://registry.npmjs.org/async/-/async-2.6.4.tgz",
"integrity": "sha512-mzo5dfJYwAn29PeiJ0zvwTo04zj8HDJj0Mn8TD7sno7q12prdbnasKJHhkm2c1LgrhlJ0teaea8860oxi51mGA==",
"dev": true,
"dependencies": {
"lodash": "^4.17.14"
@@ -5261,11 +5260,6 @@
"node": "*"
}
},
"node_modules/crypto-es": {
"version": "1.2.7",
"resolved": "https://registry.npmjs.org/crypto-es/-/crypto-es-1.2.7.tgz",
"integrity": "sha512-UUqiVJ2gUuZFmbFsKmud3uuLcNP2+Opt+5ysmljycFCyhA0+T16XJmo1ev/t5kMChMqWh7IEvURNCqsg+SjZGQ=="
},
"node_modules/css-color-names": {
"version": "0.0.4",
"resolved": "https://registry.npmjs.org/css-color-names/-/css-color-names-0.0.4.tgz",
@@ -9742,9 +9736,9 @@
}
},
"node_modules/minimist": {
"version": "1.2.5",
"resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.5.tgz",
"integrity": "sha512-FM9nNUYrRBAELZQT3xeZQ7fmMOBg6nWNmJKTcgsJeaLstP/UODVpGsr5OhXhhXg6f+qtJ8uiZ+PUxkDWcgIXLw==",
"version": "1.2.6",
"resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.6.tgz",
"integrity": "sha512-Jsjnk4bw3YJqYzbdyBiNsPWHPfO++UGG749Cxs6peCu5Xg4nrena6OVxOYxrQTqww0Jmwt+Ref8rggumkTLz9Q==",
"dev": true
},
"node_modules/minipass": {
@@ -14098,9 +14092,9 @@
}
},
"node_modules/url-parse": {
"version": "1.5.4",
"resolved": "https://registry.npmjs.org/url-parse/-/url-parse-1.5.4.tgz",
"integrity": "sha512-ITeAByWWoqutFClc/lRZnFplgXgEZr3WJ6XngMM/N9DMIm4K8zXPCZ1Jdu0rERwO84w1WC5wkle2ubwTA4NTBg==",
"version": "1.5.10",
"resolved": "https://registry.npmjs.org/url-parse/-/url-parse-1.5.10.tgz",
"integrity": "sha512-WypcfiRhfeUP9vvF0j6rw0J3hrWrw6iZv3+22h6iRMJ/8z1Tj6XfLP4DsUix5MhMPnXpiHDoKyoZ/bdCkwBCiQ==",
"dev": true,
"dependencies": {
"querystringify": "^2.1.1",
@@ -17943,9 +17937,9 @@
"dev": true
},
"async": {
"version": "2.6.3",
"resolved": "https://registry.npmjs.org/async/-/async-2.6.3.tgz",
"integrity": "sha512-zflvls11DCy+dQWzTW2dzuilv8Z5X/pjfmZOWba6TNIVDm+2UDaJmXSOXlasHKfNBs8oo3M0aT50fDEWfKZjXg==",
"version": "2.6.4",
"resolved": "https://registry.npmjs.org/async/-/async-2.6.4.tgz",
"integrity": "sha512-mzo5dfJYwAn29PeiJ0zvwTo04zj8HDJj0Mn8TD7sno7q12prdbnasKJHhkm2c1LgrhlJ0teaea8860oxi51mGA==",
"dev": true,
"requires": {
"lodash": "^4.17.14"
@@ -19621,11 +19615,6 @@
"randomfill": "^1.0.3"
}
},
"crypto-es": {
"version": "1.2.7",
"resolved": "https://registry.npmjs.org/crypto-es/-/crypto-es-1.2.7.tgz",
"integrity": "sha512-UUqiVJ2gUuZFmbFsKmud3uuLcNP2+Opt+5ysmljycFCyhA0+T16XJmo1ev/t5kMChMqWh7IEvURNCqsg+SjZGQ=="
},
"css-color-names": {
"version": "0.0.4",
"resolved": "https://registry.npmjs.org/css-color-names/-/css-color-names-0.0.4.tgz",
@@ -23335,9 +23324,9 @@
}
},
"minimist": {
"version": "1.2.5",
"resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.5.tgz",
"integrity": "sha512-FM9nNUYrRBAELZQT3xeZQ7fmMOBg6nWNmJKTcgsJeaLstP/UODVpGsr5OhXhhXg6f+qtJ8uiZ+PUxkDWcgIXLw==",
"version": "1.2.6",
"resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.6.tgz",
"integrity": "sha512-Jsjnk4bw3YJqYzbdyBiNsPWHPfO++UGG749Cxs6peCu5Xg4nrena6OVxOYxrQTqww0Jmwt+Ref8rggumkTLz9Q==",
"dev": true
},
"minipass": {
@@ -27019,9 +27008,9 @@
}
},
"url-parse": {
"version": "1.5.4",
"resolved": "https://registry.npmjs.org/url-parse/-/url-parse-1.5.4.tgz",
"integrity": "sha512-ITeAByWWoqutFClc/lRZnFplgXgEZr3WJ6XngMM/N9DMIm4K8zXPCZ1Jdu0rERwO84w1WC5wkle2ubwTA4NTBg==",
"version": "1.5.10",
"resolved": "https://registry.npmjs.org/url-parse/-/url-parse-1.5.10.tgz",
"integrity": "sha512-WypcfiRhfeUP9vvF0j6rw0J3hrWrw6iZv3+22h6iRMJ/8z1Tj6XfLP4DsUix5MhMPnXpiHDoKyoZ/bdCkwBCiQ==",
"dev": true,
"requires": {
"querystringify": "^2.1.1",

View File

@@ -11,7 +11,6 @@
"axios": "^0.25.0",
"bootstrap-vue": "^2.21.2",
"core-js": "^3.6.5",
"crypto-es": "^1.2.7",
"d3": "^5.16.0",
"date-fns": "^2.21.3",
"dom-to-image": "^2.6.0",

View File

@@ -1,6 +1,5 @@
import axios from "axios";
import {ext, strUnescape, lum} from "./util";
import CryptoES from 'crypto-es';
export interface EsTag {
id: string
@@ -30,7 +29,6 @@ export interface EsHit {
_index: string
_id: string
_score: number
_path_md5: string
_type: string
_tags: Tag[]
_seq: number
@@ -249,11 +247,6 @@ class Sist2Api {
res.hits.hits.forEach((hit: EsHit) => {
hit["_source"]["name"] = strUnescape(hit["_source"]["name"]);
hit["_source"]["path"] = strUnescape(hit["_source"]["path"]);
hit["_path_md5"] = CryptoES.MD5(
hit["_source"]["path"] +
(hit["_source"]["path"] ? "/" : "") +
hit["_source"]["name"] + ext(hit)
).toString();
this.setHitProps(hit);
this.setHitTags(hit);
@@ -343,10 +336,6 @@ class Sist2Api {
};
}
getDocInfo(docId: string) {
return axios.get(`${this.baseUrl}d/${docId}`);
}
getTags() {
return this.esQuery({
aggs: {
@@ -380,8 +369,7 @@ class Sist2Api {
return axios.post(`${this.baseUrl}tag/` + hit["_source"]["index"], {
delete: false,
name: tag,
doc_id: hit["_id"],
path_md5: hit._path_md5
doc_id: hit["_id"]
});
}
@@ -389,8 +377,7 @@ class Sist2Api {
return axios.post(`${this.baseUrl}tag/` + hit["_source"]["index"], {
delete: true,
name: tag,
doc_id: hit["_id"],
path_md5: hit._path_md5
doc_id: hit["_id"]
});
}

View File

@@ -69,7 +69,7 @@ interface SortMode {
class Sist2Query {
searchQuery(): any {
searchQuery(blankSearch: boolean = false): any {
const getters = store.getters;
@@ -93,22 +93,6 @@ class Sist2Query {
{terms: {index: selectedIndexIds}}
] as any[];
if (sizeMin && sizeMax) {
filters.push({range: {size: {gte: sizeMin, lte: sizeMax}}})
} else if (sizeMin) {
filters.push({range: {size: {gte: sizeMin}}})
} else if (sizeMax) {
filters.push({range: {size: {lte: sizeMax}}})
}
if (dateMin && dateMax) {
filters.push({range: {mtime: {gte: dateMin, lte: dateMax}}})
} else if (dateMin) {
filters.push({range: {mtime: {gte: dateMin}}})
} else if (dateMax) {
filters.push({range: {mtime: {lte: dateMax}}})
}
const fields = [
"name^8",
"content^3",
@@ -128,7 +112,25 @@ class Sist2Query {
fields.push("name.nGram^3");
}
if (!blankSearch) {
if (sizeMin && sizeMax) {
filters.push({range: {size: {gte: sizeMin, lte: sizeMax}}})
} else if (sizeMin) {
filters.push({range: {size: {gte: sizeMin}}})
} else if (sizeMax) {
filters.push({range: {size: {lte: sizeMax}}})
}
if (dateMin && dateMax) {
filters.push({range: {mtime: {gte: dateMin, lte: dateMax}}})
} else if (dateMin) {
filters.push({range: {mtime: {gte: dateMin}}})
} else if (dateMax) {
filters.push({range: {mtime: {lte: dateMax}}})
}
const path = pathText.replace(/\/$/, "").toLowerCase(); //remove trailing slashes
if (path !== "") {
filters.push({term: {path: path}})
}
@@ -144,6 +146,7 @@ class Sist2Query {
selectedTags.forEach((tag: string) => filters.push({term: {"tag": tag}}));
}
}
}
let query;
if (getters.optQueryMode === "simple") {
@@ -182,7 +185,7 @@ class Sist2Query {
size: size,
} as any;
if (!empty) {
if (!empty && !blankSearch) {
q.query.bool.must = query;
}
@@ -207,7 +210,7 @@ class Sist2Query {
};
if (!legacyES) {
q.highlight.max_analyzed_offset = 9_999_999;
q.highlight.max_analyzed_offset = 999_999;
}
if (getters.optSearchInPath) {
@@ -237,7 +240,7 @@ class Sist2Query {
}
}
if (!empty) {
if (!empty && !blankSearch) {
q.query.function_score.query.bool.must.push(query);
}
}

View File

@@ -72,6 +72,12 @@ export default {
}
});
Object.keys(src).forEach(key => {
if (key.startsWith("mt_") || key.startsWith("int_")) {
items.push({key: key, value: src[key]});
}
});
// Exif GPS
if ("exif_gps_longitude_dec" in src) {
items.push({

View File

@@ -1,11 +1,13 @@
<template>
<Preloader v-if="loading"></Preloader>
<div v-else-if="content" class="content-div">{{ content }}</div>
<div v-else-if="content" class="content-div" v-html="content"></div>
</template>
<script>
import Sist2Api from "@/Sist2Api";
import Preloader from "@/components/Preloader";
import Sist2Query from "@/Sist2Query";
import store from "@/store";
export default {
name: "LazyContentDiv",
@@ -18,10 +20,72 @@ export default {
}
},
mounted() {
Sist2Api.getDocInfo(this.docId).then(src => {
this.content = src.data.content;
const query = Sist2Query.searchQuery();
if (this.$store.state.optHighlight) {
const fields = this.$store.state.fuzzy
? {"content.nGram": {}}
: {content: {}};
query.highlight = {
pre_tags: ["<mark>"],
post_tags: ["</mark>"],
number_of_fragments: 0,
fields,
};
if (!store.state.sist2Info.esVersionLegacy) {
query.highlight.max_analyzed_offset = 999_999;
}
}
if ("function_score" in query.query) {
query.query = query.query.function_score.query;
}
if (!("must" in query.query.bool)) {
query.query.bool.must = [];
} else if (!Array.isArray(query.query.bool.must)) {
query.query.bool.must = [query.query.bool.must];
}
query.query.bool.must.push({match: {_id: this.docId}});
delete query["sort"];
delete query["aggs"];
delete query["search_after"];
delete query.query["function_score"];
query._source = {
includes: ["content", "name", "path", "extension"]
}
query.size = 1;
Sist2Api.esQuery(query).then(resp => {
this.loading = false;
})
if (resp.hits.hits.length === 1) {
this.content = this.getContent(resp.hits.hits[0]);
} else {
console.log("FIXME: could not get content")
console.log(resp)
}
});
},
methods: {
getContent(doc) {
if (!doc.highlight) {
return doc._source.content;
}
if (doc.highlight["content.nGram"]) {
return doc.highlight["content.nGram"][0];
}
if (doc.highlight.content) {
return doc.highlight.content[0];
}
}
}
}
</script>

View File

@@ -1,6 +1,7 @@
<template>
<div>
<div :class="{'disable-animations': $store.state.optSimpleLightbox}">
<FsLightbox
ref="lightbox"
:key="lightboxKey"
:toggler="showLightbox"
:sources="lightboxSources"
@@ -10,7 +11,7 @@
:source-index="lightboxSlide"
:custom-toolbar-buttons="customButtons"
:slideshow-time="$store.getters.optLightboxSlideDuration * 1000"
:zoom-increment="0.5"
:zoom-increment="0.25"
:load-only-current-source="$store.getters.optLightboxLoadOnlyCurrent"
:on-close="onClose"
:on-open="onShow"
@@ -29,6 +30,7 @@ export default {
components: {FsLightbox},
data() {
return {
disableAnimations: true,
customButtons: [
{
viewBox: "0 0 384.928 384.928",
@@ -64,7 +66,84 @@ export default {
return this.$store.getters["uiLightboxTypes"];
}
},
mounted() {
const listener = document.onkeydown;
document.onkeydown = (e) => {
const ret = this.keyDownListener(e)
if (listener && ret) {
return listener(e);
}
};
},
methods: {
keyDownListener(e) {
const isLightboxOpen = this.$refs.lightbox === undefined || this.$refs.lightbox.$el.tagName === undefined;
if (isLightboxOpen) {
return true;
}
const lightboxStore = this.$refs.lightbox.fsLightboxStore.slice(-1)[0];
switch (e.key) {
case " ": {
e.preventDefault();
e.stopPropagation();
e.stopImmediatePropagation();
// Find video at current slide, toggle play/pause
[...document.getElementsByClassName("fslightbox-absoluted")].forEach(elem => {
if (elem.style.transform === "translate(0px)" || elem.style.transform === "translate(0px, 0px)") {
const vid = elem.getElementsByTagName("video")[0];
if (vid) {
if (vid.paused) {
vid.play();
} else {
vid.pause()
}
}
}
return false;
});
return false;
}
case "ArrowUp":
case "k": {
if (!lightboxStore.data.isThumbing && lightboxStore.core.thumbsToggler) {
lightboxStore.core.thumbsToggler.toggleThumbs();
}
return false;
}
case "ArrowDown":
case "j": {
if (lightboxStore.data.isThumbing && lightboxStore.core.thumbsToggler) {
lightboxStore.core.thumbsToggler.toggleThumbs();
}
return false;
}
case "h": {
if (lightboxStore.core.stageManager.getPreviousSlideIndex) {
lightboxStore.core.slideIndexChanger.jumpTo(lightboxStore.core.stageManager.getPreviousSlideIndex());
}
return false;
}
case "l": {
if (lightboxStore.core.stageManager.getNextSlideIndex) {
lightboxStore.core.slideIndexChanger.jumpTo(lightboxStore.core.stageManager.getNextSlideIndex());
}
return false;
}
}
return true;
},
onDownloadClick() {
const url = this.lightboxSources[this.lightboxSlide];
@@ -125,4 +204,20 @@ export default {
.fslightbox-toolbar-button:nth-child(7) {
order: 7;
}
.disable-animations .fslightbox-container {
background: rgba(30,30,30,.9);
}
.disable-animations .fslightbox-transform-transition {
transition: none;
}
.disable-animations .fslightbox-fade-in-strong {
animation: none;
}
.fslightbox-container video, .fslightbox-container img {
cursor: unset !important;
}
</style>

View File

@@ -1,5 +1,13 @@
<template>
<div>
<b-input-group v-if="showSearchBar" id="tag-picker-filter-bar">
<b-form-input :value="filter"
:placeholder="$t('tagFilter')"
@input="onFilter($event)"></b-form-input>
</b-input-group>
<div id="tagTree"></div>
</div>
</template>
<script>
@@ -112,10 +120,12 @@ function addTag(map, tag, id, count) {
export default {
name: "TagPicker",
props: ["showSearchBar"],
data() {
return {
tagTree: null,
loadedFromArgs: false,
filter: ""
}
},
mounted() {
@@ -129,6 +139,10 @@ export default {
});
},
methods: {
onFilter(value) {
this.filter = value;
this.tagTree.search(value);
},
initializeTree() {
const tagMap = [];
this.tagTree = new InspireTree({
@@ -163,7 +177,8 @@ export default {
});
},
handleTreeClick(node, e) {
if (e === "indeterminate" || e === "collapsed" || e === 'rendered' || e === "focused") {
if (e === "indeterminate" || e === "collapsed" || e === 'rendered' || e === "focused"
|| e === "matched" || e === "hidden") {
return;
}
@@ -180,7 +195,15 @@ export default {
}
</style>
<style>
.inspire-tree .focused>.wholerow {
.inspire-tree .focused > .wholerow {
border: none;
}
#tag-picker-filter-bar {
padding: 10px 4px 4px;
}
.theme-black .inspire-tree .matched > .wholerow {
background: rgba(251, 191, 41, 0.25);
}
</style>

View File

@@ -16,6 +16,7 @@ export default {
pages: "pages",
mimeTypes: "Media types",
tags: "Tags",
tagFilter: "Filter tags",
help: {
simpleSearch: "Simple search",
advancedSearch: "Advanced search",
@@ -72,7 +73,9 @@ export default {
hideLegacy: "Hide the 'legacyES' Elasticsearch notice",
updateMimeMap: "Update the Media Types tree in real time",
useDatePicker: "Use a Date Picker component rather than a slider",
vidPreviewInterval: "Video preview frame duration in ms"
vidPreviewInterval: "Video preview frame duration in ms",
simpleLightbox: "Disable animations in image viewer",
showTagPickerFilter: "Display the tag filter bar"
},
queryMode: {
simple: "Simple",
@@ -182,6 +185,7 @@ export default {
pages: "pages",
mimeTypes: "Types de médias",
tags: "Tags",
tagFilter: "Filtrer les tags",
help: {
simpleSearch: "Recherche simple",
advancedSearch: "Recherche avancée",
@@ -239,7 +243,9 @@ export default {
hideLegacy: "Masquer la notice 'legacyES' Elasticsearch",
updateMimeMap: "Mettre à jour l'arbre de Types de médias en temps réel",
useDatePicker: "Afficher un composant « Date Picker » plutôt qu'un slider",
vidPreviewInterval: "Durée des images d'aperçu video en millisecondes"
vidPreviewInterval: "Durée des images d'aperçu video en millisecondes",
simpleLightbox: "Désactiver les animations du visualiseur d'images",
showTagPickerFilter: "Afficher le filtre dans l'onglet Tags"
},
queryMode: {
simple: "Simple",
@@ -350,6 +356,7 @@ export default {
pages: "页",
mimeTypes: "文件类型",
tags: "标签",
tagFilter: "筛选标签",
help: {
simpleSearch: "简易搜索",
advancedSearch: "高级搜索",
@@ -406,7 +413,9 @@ export default {
hideLegacy: "隐藏'legacyES' Elasticsearch 通知",
updateMimeMap: "媒体类型树的实时更新",
useDatePicker: "使用日期选择器组件而不是滑块",
vidPreviewInterval: "视频预览帧的持续时间,以毫秒为单位"
vidPreviewInterval: "视频预览帧的持续时间,以毫秒为单位",
simpleLightbox: "在图片查看器中,禁用动画",
showTagPickerFilter: "显示标签过滤栏"
},
queryMode: {
simple: "简单",

View File

@@ -4,6 +4,8 @@ import VueRouter, {Route} from "vue-router";
import {EsHit, EsResult, EsTag, Index, Tag} from "@/Sist2Api";
import {deserializeMimes, serializeMimes} from "@/util";
const CONF_VERSION = 2;
Vue.use(Vuex)
export default new Vuex.Store({
@@ -24,7 +26,6 @@ export default new Vuex.Store({
sortMode: "score",
fuzzy: false,
size: 60,
optLang: "en",
optLangIsDefault: true,
@@ -32,6 +33,7 @@ export default new Vuex.Store({
optTheme: "light",
optDisplay: "grid",
optSize: 60,
optHighlight: true,
optTagOrOperator: false,
optFuzzy: true,
@@ -51,6 +53,8 @@ export default new Vuex.Store({
optUpdateMimeMap: false,
optUseDatePicker: false,
optVidPreviewInterval: 700,
optSimpleLightbox: true,
optShowTagPickerFilter: true,
_onLoadSelectedIndices: [] as string[],
_onLoadSelectedMimeTypes: [] as string[],
@@ -149,7 +153,7 @@ export default new Vuex.Store({
setOptSuggestPath: (state, val) => state.optSuggestPath = val,
setOptFragmentSize: (state, val) => state.optFragmentSize = val,
setOptQueryMode: (state, val) => state.optQueryMode = val,
setOptResultSize: (state, val) => state.size = val,
setOptResultSize: (state, val) => state.optSize = val,
setOptTagOrOperator: (state, val) => state.optTagOrOperator = val,
setOptTreemapType: (state, val) => state.optTreemapType = val,
@@ -161,6 +165,8 @@ export default new Vuex.Store({
setOptUpdateMimeMap: (state, val) => state.optUpdateMimeMap = val,
setOptUseDatePicker: (state, val) => state.optUseDatePicker = val,
setOptVidPreviewInterval: (state, val) => state.optVidPreviewInterval = val,
setOptSimpleLightbox: (state, val) => state.optSimpleLightbox = val,
setOptShowTagPickerFilter: (state, val) => state.optShowTagPickerFilter = val,
setOptLightboxLoadOnlyCurrent: (state, val) => state.optLightboxLoadOnlyCurrent = val,
setOptLightboxSlideDuration: (state, val) => state.optLightboxSlideDuration = val,
@@ -239,6 +245,11 @@ export default new Vuex.Store({
}
},
async updateArgs({state}, router: VueRouter) {
if (router.currentRoute.path !== "/") {
return;
}
await router.push({
query: {
q: state.searchText.trim() ? state.searchText.trim().replace(/\s+/g, " ") : undefined,
@@ -267,6 +278,8 @@ export default new Vuex.Store({
}
});
conf["version"] = CONF_VERSION;
localStorage.setItem("sist2_configuration", JSON.stringify(conf));
},
loadConfiguration({state}) {
@@ -274,6 +287,11 @@ export default new Vuex.Store({
if (confString) {
const conf = JSON.parse(confString);
if (!("version" in conf) || conf["version"] != CONF_VERSION) {
localStorage.removeItem("sist2_configuration");
window.location.reload();
}
Object.keys(state).forEach((key) => {
if (key.startsWith("opt")) {
(state as any)[key] = conf[key];
@@ -335,7 +353,7 @@ export default new Vuex.Store({
searchText: state => state.searchText,
pathText: state => state.pathText,
fuzzy: state => state.fuzzy,
size: state => state.size,
size: state => state.optSize,
sortMode: state => state.sortMode,
lastQueryResult: state => state.lastQueryResults,
lastDoc: function (state): EsHit | null {
@@ -373,10 +391,12 @@ export default new Vuex.Store({
optTreemapColor: state => state.optTreemapColor,
optLightboxLoadOnlyCurrent: state => state.optLightboxLoadOnlyCurrent,
optLightboxSlideDuration: state => state.optLightboxSlideDuration,
optResultSize: state => state.size,
optResultSize: state => state.optSize,
optHideLegacy: state => state.optHideLegacy,
optUpdateMimeMap: state => state.optUpdateMimeMap,
optUseDatePicker: state => state.optUseDatePicker,
optVidPreviewInterval: state => state.optVidPreviewInterval,
optSimpleLightbox: state => state.optSimpleLightbox,
optShowTagPickerFilter: state => state.optShowTagPickerFilter,
}
})

View File

@@ -45,6 +45,16 @@
<b-form-checkbox :checked="optUseDatePicker" @input="setOptUseDatePicker">
{{ $t("opt.useDatePicker") }}
</b-form-checkbox>
<b-form-checkbox :checked="optSimpleLightbox" @input="setOptSimpleLightbox">{{
$t("opt.simpleLightbox")
}}
</b-form-checkbox>
<b-form-checkbox :checked="optShowTagPickerFilter" @input="setOptShowTagPickerFilter">{{
$t("opt.showTagPickerFilter")
}}
</b-form-checkbox>
</b-card>
<br/>
@@ -239,6 +249,8 @@ export default {
"optUpdateMimeMap",
"optUseDatePicker",
"optVidPreviewInterval",
"optSimpleLightbox",
"optShowTagPickerFilter",
]),
clientWidth() {
return window.innerWidth;
@@ -285,6 +297,8 @@ export default {
"setOptUpdateMimeMap",
"setOptUseDatePicker",
"setOptVidPreviewInterval",
"setOptSimpleLightbox",
"setOptShowTagPickerFilter",
]),
onResetClick() {
localStorage.removeItem("sist2_configuration");

View File

@@ -56,6 +56,22 @@ export default Vue.extend({
onThumbnailClick() {
window.open(`/f/${this.doc._id}`, "_blank");
},
findByCustomField(field, id) {
return {
query: {
bool: {
must: [
{
match: {
[field]: id
}
}
]
}
},
size: 1
}
},
findById(id) {
return {
query: {
@@ -103,6 +119,8 @@ export default Vue.extend({
query = this.findById(this.$route.query.byId);
} else if (this.$route.query.byName) {
query = this.findByName(this.$route.query.byName);
} else if (this.$route.query.by && this.$route.query.q) {
query = this.findByCustomField(this.$route.query.by, this.$route.query.q)
}
if (query) {

View File

@@ -32,7 +32,7 @@
<MimePicker></MimePicker>
</b-tab>
<b-tab :title="$t('tags')">
<TagPicker></TagPicker>
<TagPicker :show-search-bar="$store.state.optShowTagPickerFilter"></TagPicker>
</b-tab>
</b-tabs>
</b-col>
@@ -139,7 +139,9 @@ export default Vue.extend({
this.setSist2Info(data);
this.setIndices(data.indices);
Sist2Api.getMimeTypes(Sist2Query.searchQuery()).then(({mimeMap}) => {
const doBlankSearch = !this.$store.state.optUpdateMimeMap;
Sist2Api.getMimeTypes(Sist2Query.searchQuery(doBlankSearch)).then(({mimeMap}) => {
this.$store.commit("setUiMimeMap", mimeMap);
this.uiLoading = false;
this.search(true);
@@ -206,7 +208,7 @@ export default Vue.extend({
this.$store.commit("setUiReachedScrollEnd", false);
},
async handleSearch(resp: EsResult) {
if (resp.hits.hits.length == 0) {
if (resp.hits.hits.length == 0 || resp.hits.hits.length < this.$store.state.optSize) {
this.$store.commit("setUiReachedScrollEnd", true);
}
@@ -246,6 +248,8 @@ export default Vue.extend({
this.$store.commit("setLastQueryResult", resp);
this.docs.push(...resp.hits.hits);
resp.hits.hits.forEach(hit => this.docIds.add(hit._id));
},
getDateRange(): Promise<{ min: number, max: number }> {
return sist2.esQuery({

View File

@@ -81,6 +81,11 @@ void web_args_destroy(web_args_t *args) {
}
void exec_args_destroy(exec_args_t *args) {
if (args->index_path != NULL) {
free(args->index_path);
}
free(args);
}
@@ -124,6 +129,9 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
args->tn_count = DEFAULT_THUMBNAIL_COUNT;
} else if (args->tn_count == OPTION_VALUE_DISABLE) {
args->tn_count = 0;
} else if (args->tn_count > 1000) {
printf("Invalid value --thumbnail-count argument: %d. Must be <= 1000.\n", args->tn_size);
return 1;
}
if (args->content_size == OPTION_VALUE_UNSPECIFIED) {
@@ -390,6 +398,7 @@ int index_args_validate(index_args_t *args, int argc, const char **argv) {
LOG_DEBUGF("cli.c", "arg es_url=%s", args->es_url)
LOG_DEBUGF("cli.c", "arg es_index=%s", args->es_index)
LOG_DEBUGF("cli.c", "arg es_insecure_ssl=%d", args->es_insecure_ssl)
LOG_DEBUGF("cli.c", "arg index_path=%s", args->index_path)
LOG_DEBUGF("cli.c", "arg script_path=%s", args->script_path)
LOG_DEBUGF("cli.c", "arg async_script=%d", args->async_script)
@@ -504,6 +513,7 @@ int web_args_validate(web_args_t *args, int argc, const char **argv) {
LOG_DEBUGF("cli.c", "arg es_url=%s", args->es_url)
LOG_DEBUGF("cli.c", "arg es_index=%s", args->es_index)
LOG_DEBUGF("cli.c", "arg es_insecure_ssl=%d", args->es_insecure_ssl)
LOG_DEBUGF("cli.c", "arg tagline=%s", args->tagline)
LOG_DEBUGF("cli.c", "arg dev=%d", args->dev)
LOG_DEBUGF("cli.c", "arg listen=%s", args->listen_address)

View File

@@ -50,6 +50,7 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv);
typedef struct index_args {
char *es_url;
char *es_index;
int es_insecure_ssl;
char *index_path;
const char *script_path;
char *script;
@@ -68,6 +69,7 @@ typedef struct index_args {
typedef struct web_args {
char *es_url;
char *es_index;
int es_insecure_ssl;
char *listen_address;
char *credentials;
char *tag_credentials;
@@ -85,7 +87,8 @@ typedef struct web_args {
typedef struct exec_args {
char *es_url;
char *es_index;
const char *index_path;
int es_insecure_ssl;
char *index_path;
const char *script_path;
int async_script;
char *script;

View File

@@ -79,6 +79,7 @@ typedef struct {
typedef struct {
char *es_url;
int es_insecure_ssl;
es_version_t *es_version;
char *es_index;
int batch_size;
@@ -97,6 +98,7 @@ typedef struct {
char *es_url;
es_version_t *es_version;
char *es_index;
int es_insecure_ssl;
int index_count;
char *auth_user;
char *auth_pass;

View File

@@ -21,6 +21,8 @@ void free_queue(int max);
void elastic_flush();
void print_error(response_t *r);
void destroy_indexer(es_indexer_t *indexer) {
if (indexer == NULL) {
@@ -45,13 +47,13 @@ void elastic_cleanup() {
destroy_indexer(Indexer);
}
void print_json(cJSON *document, const char id_str[MD5_STR_LENGTH]) {
void print_json(cJSON *document, const char id_str[SIST_DOC_ID_LEN]) {
cJSON *line = cJSON_CreateObject();
cJSON_AddStringToObject(line, "_id", id_str);
cJSON_AddStringToObject(line, "_index", IndexCtx.es_index);
cJSON_AddStringToObject(line, "_type", "_doc");
// cJSON_AddStringToObject(line, "_type", "_doc");
cJSON_AddItemReferenceToObject(line, "_source", document);
char *json = cJSON_PrintUnformatted(line);
@@ -72,19 +74,19 @@ void delete_document(const char* document_id_str, void* UNUSED(_data)) {
bulk_line->type = ES_BULK_LINE_DELETE;
bulk_line->next = NULL;
memcpy(bulk_line->path_md5_str, document_id_str, MD5_STR_LENGTH);
strcpy(bulk_line->doc_id, document_id_str);
tpool_add_work(IndexCtx.pool, index_json_func, bulk_line);
}
void index_json(cJSON *document, const char index_id_str[MD5_STR_LENGTH]) {
void index_json(cJSON *document, const char doc_id[SIST_DOC_ID_LEN]) {
char *json = cJSON_PrintUnformatted(document);
size_t json_len = strlen(json);
es_bulk_line_t *bulk_line = malloc(sizeof(es_bulk_line_t) + json_len + 2);
bulk_line->type = ES_BULK_LINE_INDEX;
memcpy(bulk_line->line, json, json_len);
memcpy(bulk_line->path_md5_str, index_id_str, MD5_STR_LENGTH);
strcpy(bulk_line->doc_id, doc_id);
*(bulk_line->line + json_len) = '\n';
*(bulk_line->line + json_len + 1) = '\0';
bulk_line->next = NULL;
@@ -93,7 +95,7 @@ void index_json(cJSON *document, const char index_id_str[MD5_STR_LENGTH]) {
tpool_add_work(IndexCtx.pool, index_json_func, bulk_line);
}
void execute_update_script(const char *script, int async, const char index_id[MD5_STR_LENGTH]) {
void execute_update_script(const char *script, int async, const char index_id[SIST_INDEX_ID_LEN]) {
if (Indexer == NULL) {
Indexer = create_indexer(IndexCtx.es_url, IndexCtx.es_index);
@@ -108,16 +110,16 @@ void execute_update_script(const char *script, int async, const char index_id[MD
cJSON *term_obj = cJSON_AddObjectToObject(query, "term");
cJSON_AddStringToObject(term_obj, "index", index_id);
char *str = cJSON_Print(body);
char *str = cJSON_PrintUnformatted(body);
char bulk_url[4096];
char url[4096];
if (async) {
snprintf(bulk_url, sizeof(bulk_url), "%s/%s/_update_by_query?wait_for_completion=false", Indexer->es_url,
snprintf(url, sizeof(url), "%s/%s/_update_by_query?wait_for_completion=false", Indexer->es_url,
Indexer->es_index);
} else {
snprintf(bulk_url, sizeof(bulk_url), "%s/%s/_update_by_query", Indexer->es_url, Indexer->es_index);
snprintf(url, sizeof(url), "%s/%s/_update_by_query", Indexer->es_url, Indexer->es_index);
}
response_t *r = web_post(bulk_url, str);
response_t *r = web_post(url, str, IndexCtx.es_insecure_ssl);
if (!async) {
LOG_INFOF("elastic.c", "Executed user script <%d>", r->status_code);
}
@@ -137,13 +139,18 @@ void execute_update_script(const char *script, int async, const char index_id[MD
if (async) {
cJSON *task = cJSON_GetObjectItem(resp, "task");
if (task == NULL) {
LOG_FATALF("elastic.c", "FIXME: Could not get task id: %s", r->body);
}
LOG_INFOF("elastic.c", "User script queued: %s/_tasks/%s", Indexer->es_url, task->valuestring);
}
cJSON_Delete(resp);
}
void *create_bulk_buffer(int max, int *count, size_t *buf_len) {
void *create_bulk_buffer(int max, int *count, size_t *buf_len, int legacy) {
es_bulk_line_t *line = Indexer->line_head;
*count = 0;
@@ -164,11 +171,20 @@ void *create_bulk_buffer(int max, int *count, size_t *buf_len) {
while (line != NULL && *count < max) {
char action_str[256];
if (line->type == ES_BULK_LINE_INDEX) {
if (legacy) {
snprintf(
action_str, sizeof(action_str),
"{\"index\":{\"_id\":\"%s\",\"_type\":\"_doc\",\"_index\":\"%s\"}}\n",
line->path_md5_str, Indexer->es_index
line->doc_id, Indexer->es_index
);
} else {
snprintf(
action_str, sizeof(action_str),
"{\"index\":{\"_id\":\"%s\",\"_index\":\"%s\"}}\n",
line->doc_id, Indexer->es_index
);
}
size_t action_str_len = strlen(action_str);
size_t line_len = strlen(line->line);
@@ -184,7 +200,7 @@ void *create_bulk_buffer(int max, int *count, size_t *buf_len) {
snprintf(
action_str, sizeof(action_str),
"{\"delete\":{\"_id\":\"%s\",\"_index\":\"%s\"}}\n",
line->path_md5_str, Indexer->es_index
line->doc_id, Indexer->es_index
);
size_t action_str_len = strlen(action_str);
@@ -212,7 +228,13 @@ void print_errors(response_t *r) {
*(tmp + r->size) = '\0';
cJSON *ret_json = cJSON_Parse(tmp);
if (cJSON_GetObjectItem(ret_json, "errors")->valueint != 0) {
cJSON *errors = cJSON_GetObjectItem(ret_json, "errors");
if (errors == NULL) {
char *str = cJSON_Print(ret_json);
LOG_ERRORF("elastic.c", "%s\n", str);
cJSON_free(str);
} else if (errors->valueint != 0) {
cJSON *err;
cJSON_ArrayForEach(err, cJSON_GetObjectItem(ret_json, "items")) {
if (cJSON_GetObjectItem(cJSON_GetObjectItem(err, "index"), "status")->valueint != 201) {
@@ -250,11 +272,11 @@ void _elastic_flush(int max) {
size_t buf_len;
int count;
void *buf = create_bulk_buffer(max, &count, &buf_len);
void *buf = create_bulk_buffer(max, &count, &buf_len, IS_LEGACY_VERSION(IndexCtx.es_version));
char bulk_url[4096];
snprintf(bulk_url, sizeof(bulk_url), "%s/%s/_bulk?pipeline=tie", Indexer->es_url, Indexer->es_index);
response_t *r = web_post(bulk_url, buf);
response_t *r = web_post(bulk_url, buf, IndexCtx.es_insecure_ssl);
if (r->status_code == 0) {
LOG_FATALF("elastic.c", "Could not connect to %s, make sure that elasticsearch is running!\n", IndexCtx.es_url)
@@ -263,7 +285,7 @@ void _elastic_flush(int max) {
if (r->status_code == 413) {
if (max <= 1) {
LOG_ERRORF("elastic.c", "Single document too large, giving up: {%s}", Indexer->line_head->path_md5_str)
LOG_ERRORF("elastic.c", "Single document too large, giving up: {%s}", Indexer->line_head->doc_id)
free_response(r);
free(buf);
free_queue(1);
@@ -380,7 +402,7 @@ void finish_indexer(char *script, int async_script, char *index_id) {
char url[4096];
snprintf(url, sizeof(url), "%s/%s/_refresh", IndexCtx.es_url, IndexCtx.es_index);
response_t *r = web_post(url, "");
response_t *r = web_post(url, "", IndexCtx.es_insecure_ssl);
LOG_INFOF("elastic.c", "Refresh index <%d>", r->status_code);
free_response(r);
@@ -389,36 +411,44 @@ void finish_indexer(char *script, int async_script, char *index_id) {
free(script);
snprintf(url, sizeof(url), "%s/%s/_refresh", IndexCtx.es_url, IndexCtx.es_index);
r = web_post(url, "");
r = web_post(url, "", IndexCtx.es_insecure_ssl);
LOG_INFOF("elastic.c", "Refresh index <%d>", r->status_code);
free_response(r);
}
snprintf(url, sizeof(url), "%s/%s/_forcemerge", IndexCtx.es_url, IndexCtx.es_index);
r = web_post(url, "");
r = web_post(url, "", IndexCtx.es_insecure_ssl);
LOG_INFOF("elastic.c", "Merge index <%d>", r->status_code);
free_response(r);
snprintf(url, sizeof(url), "%s/%s/_settings", IndexCtx.es_url, IndexCtx.es_index);
r = web_put(url, "{\"index\":{\"refresh_interval\":\"1s\"}}");
r = web_put(url, "{\"index\":{\"refresh_interval\":\"1s\"}}", IndexCtx.es_insecure_ssl);
LOG_INFOF("elastic.c", "Set refresh interval <%d>", r->status_code);
free_response(r);
}
es_version_t *elastic_get_version(const char *es_url) {
response_t *r = web_get(es_url, 30);
es_version_t *elastic_get_version(const char *es_url, int insecure) {
response_t *r = web_get(es_url, 30, insecure);
char *tmp = malloc(r->size + 1);
memcpy(tmp, r->body, r->size);
*(tmp + r->size) = '\0';
cJSON *response = cJSON_Parse(tmp);
free(tmp);
free_response(r);
if (response == NULL) {
return NULL;
}
if (cJSON_GetObjectItem(response, "error") != NULL) {
LOG_WARNING("elastic.c", "Could not get Elasticsearch version")
print_error(r);
free_response(r);
return NULL;
}
free_response(r);
if (cJSON_GetObjectItem(response, "version") == NULL ||
cJSON_GetObjectItem(cJSON_GetObjectItem(response, "version"), "number") == NULL) {
cJSON_Delete(response);
@@ -443,7 +473,7 @@ es_version_t *elastic_get_version(const char *es_url) {
void elastic_init(int force_reset, const char *user_mappings, const char *user_settings) {
es_version_t *es_version = elastic_get_version(IndexCtx.es_url);
es_version_t *es_version = elastic_get_version(IndexCtx.es_url, IndexCtx.es_insecure_ssl);
IndexCtx.es_version = es_version;
if (es_version == NULL) {
@@ -452,33 +482,33 @@ void elastic_init(int force_reset, const char *user_mappings, const char *user_s
LOG_INFOF("elastic.c",
"Elasticsearch version is %s (supported=%d, legacy=%d)",
format_es_version(es_version), IS_SUPPORTED_ES_VERSION(es_version), USE_LEGACY_ES_SETTINGS(es_version));
format_es_version(es_version), IS_SUPPORTED_ES_VERSION(es_version), IS_LEGACY_VERSION(es_version));
if (!IS_SUPPORTED_ES_VERSION(es_version)) {
LOG_FATAL("elastic.c", "sist2 only supports Elasticsearch v6.8 or newer")
LOG_FATAL("elastic.c", "This elasticsearch version is not supported!")
}
char *settings = NULL;
if (USE_LEGACY_ES_SETTINGS(es_version)) {
settings = settings_json;
} else {
if (IS_LEGACY_VERSION(es_version)) {
settings = settings_legacy_json;
} else {
settings = settings_json;
}
// Check if index exists
char url[4096];
snprintf(url, sizeof(url), "%s/%s", IndexCtx.es_url, IndexCtx.es_index);
response_t *r = web_get(url, 30);
response_t *r = web_get(url, 30, IndexCtx.es_insecure_ssl);
int index_exists = r->status_code == 200;
free_response(r);
if (!index_exists || force_reset) {
r = web_delete(url);
r = web_delete(url, IndexCtx.es_insecure_ssl);
LOG_INFOF("elastic.c", "Delete index <%d>", r->status_code);
free_response(r);
snprintf(url, sizeof(url), "%s/%s", IndexCtx.es_url, IndexCtx.es_index);
r = web_put(url, "");
r = web_put(url, "", IndexCtx.es_insecure_ssl);
if (r->status_code != 200) {
print_error(r);
@@ -489,17 +519,17 @@ void elastic_init(int force_reset, const char *user_mappings, const char *user_s
free_response(r);
snprintf(url, sizeof(url), "%s/%s/_close", IndexCtx.es_url, IndexCtx.es_index);
r = web_post(url, "");
r = web_post(url, "", IndexCtx.es_insecure_ssl);
LOG_INFOF("elastic.c", "Close index <%d>", r->status_code);
free_response(r);
snprintf(url, sizeof(url), "%s/_ingest/pipeline/tie", IndexCtx.es_url);
r = web_put(url, pipeline_json);
r = web_put(url, pipeline_json, IndexCtx.es_insecure_ssl);
LOG_INFOF("elastic.c", "Create pipeline <%d>", r->status_code);
free_response(r);
snprintf(url, sizeof(url), "%s/%s/_settings", IndexCtx.es_url, IndexCtx.es_index);
r = web_put(url, user_settings ? user_settings : settings);
r = web_put(url, user_settings ? user_settings : settings, IndexCtx.es_insecure_ssl);
LOG_INFOF("elastic.c", "Update ES settings <%d>", r->status_code);
if (r->status_code != 200) {
print_error(r);
@@ -507,8 +537,13 @@ void elastic_init(int force_reset, const char *user_mappings, const char *user_s
}
free_response(r);
if (IS_LEGACY_VERSION(es_version)) {
snprintf(url, sizeof(url), "%s/%s/_mappings/_doc?include_type_name=true", IndexCtx.es_url, IndexCtx.es_index);
r = web_put(url, user_mappings ? user_mappings : mappings_json);
} else {
snprintf(url, sizeof(url), "%s/%s/_mappings", IndexCtx.es_url, IndexCtx.es_index);
}
r = web_put(url, user_mappings ? user_mappings : mappings_json, IndexCtx.es_insecure_ssl);
LOG_INFOF("elastic.c", "Update ES mappings <%d>", r->status_code);
if (r->status_code != 200) {
print_error(r);
@@ -517,7 +552,7 @@ void elastic_init(int force_reset, const char *user_mappings, const char *user_s
free_response(r);
snprintf(url, sizeof(url), "%s/%s/_open", IndexCtx.es_url, IndexCtx.es_index);
r = web_post(url, "");
r = web_post(url, "", IndexCtx.es_insecure_ssl);
LOG_INFOF("elastic.c", "Open index <%d>", r->status_code);
free_response(r);
}
@@ -527,7 +562,7 @@ cJSON *elastic_get_document(const char *id_str) {
char url[4096];
snprintf(url, sizeof(url), "%s/%s/_doc/%s", WebCtx.es_url, WebCtx.es_index, id_str);
response_t *r = web_get(url, 3);
response_t *r = web_get(url, 3, WebCtx.es_insecure_ssl);
cJSON *json = NULL;
if (r->status_code == 200) {
char *tmp = malloc(r->size + 1);
@@ -545,7 +580,7 @@ char *elastic_get_status() {
snprintf(url, sizeof(url),
"%s/_cluster/state/metadata/%s?filter_path=metadata.indices.*.state", WebCtx.es_url, WebCtx.es_index);
response_t *r = web_get(url, 30);
response_t *r = web_get(url, 30, IndexCtx.es_insecure_ssl);
cJSON *json = NULL;
char *status = malloc(128 * sizeof(char));
status[0] = '\0';

View File

@@ -8,7 +8,7 @@
typedef struct es_bulk_line {
struct es_bulk_line *next;
char path_md5_str[MD5_STR_LENGTH];
char doc_id[SIST_DOC_ID_LEN];
int type;
char line[0];
} es_bulk_line_t;
@@ -20,8 +20,10 @@ typedef struct {
} es_version_t;
#define VERSION_GE(version, maj, min) ((version)->major > (maj) || ((version)->major == (maj) && (version)->minor >= (min)))
#define IS_SUPPORTED_ES_VERSION(es_version) VERSION_GE((es_version), 6, 8)
#define USE_LEGACY_ES_SETTINGS(es_version) (!VERSION_GE((es_version), 7, 14))
#define VERSION_LT(version, maj, min) (!VERSION_GE(version, maj, min))
#define IS_SUPPORTED_ES_VERSION(es_version) ((es_version) != NULL && VERSION_GE((es_version), 6, 8) && VERSION_LT((es_version), 9, 0))
#define IS_LEGACY_VERSION(es_version) ((es_version) != NULL && VERSION_LT((es_version), 7, 14))
__always_inline
static const char *format_es_version(es_version_t *version) {
@@ -40,9 +42,9 @@ typedef struct es_indexer es_indexer_t;
void elastic_index_line(es_bulk_line_t *line);
void print_json(cJSON *document, const char index_id_str[MD5_STR_LENGTH]);
void print_json(cJSON *document, const char index_id_str[SIST_INDEX_ID_LEN]);
void index_json(cJSON *document, const char index_id_str[MD5_STR_LENGTH]);
void index_json(cJSON *document, const char doc_id[SIST_INDEX_ID_LEN]);
void delete_document(const char *document_id_str, void* data);
@@ -57,8 +59,8 @@ cJSON *elastic_get_document(const char *id_str);
char *elastic_get_status();
es_version_t *elastic_get_version(const char *es_url);
es_version_t *elastic_get_version(const char *es_url, int insecure);
void execute_update_script(const char *script, int async, const char index_id[MD5_STR_LENGTH]);
void execute_update_script(const char *script, int async, const char index_id[SIST_INDEX_ID_LEN]);
#endif

File diff suppressed because one or more lines are too long

View File

@@ -22,7 +22,7 @@ void free_response(response_t *resp) {
free(resp);
}
void web_post_async_poll(subreq_ctx_t* req) {
void web_post_async_poll(subreq_ctx_t *req) {
fd_set fdread;
fd_set fdwrite;
fd_set fdexcep;
@@ -34,7 +34,7 @@ void web_post_async_poll(subreq_ctx_t* req) {
CURLMcode mc = curl_multi_fdset(req->multi, &fdread, &fdwrite, &fdexcep, &maxfd);
if(mc != CURLM_OK) {
if (mc != CURLM_OK) {
req->done = TRUE;
return;
}
@@ -47,7 +47,7 @@ void web_post_async_poll(subreq_ctx_t* req) {
struct timeval timeout = {1, 0};
int rc = select(maxfd + 1, &fdread, &fdwrite, &fdexcep, &timeout);
switch(rc) {
switch (rc) {
case -1:
req->done = TRUE;
break;
@@ -64,6 +64,10 @@ void web_post_async_poll(subreq_ctx_t* req) {
req->response->size = req->response_buf.cur;
curl_easy_getinfo(req->handle, CURLINFO_RESPONSE_CODE, &req->response->status_code);
if (req->response->status_code == 0) {
LOG_ERRORF("web.c", "CURL Error: %s", req->curl_err_buffer)
}
curl_multi_cleanup(req->multi);
curl_easy_cleanup(req->handle);
curl_slist_free_all(req->headers);
@@ -71,7 +75,7 @@ void web_post_async_poll(subreq_ctx_t* req) {
}
}
subreq_ctx_t *web_post_async(const char *url, char *data) {
subreq_ctx_t *web_post_async(const char *url, char *data, int insecure) {
subreq_ctx_t *req = calloc(1, sizeof(subreq_ctx_t));
req->response = calloc(1, sizeof(response_t));
req->data = data;
@@ -84,6 +88,11 @@ subreq_ctx_t *web_post_async(const char *url, char *data) {
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb);
curl_easy_setopt(curl, CURLOPT_POST, 1);
curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
if (insecure) {
curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0);
}
curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, req->curl_err_buffer);
struct curl_slist *headers = NULL;
headers = curl_slist_append(headers, "Content-Type: application/json");
@@ -100,7 +109,7 @@ subreq_ctx_t *web_post_async(const char *url, char *data) {
return req;
}
response_t *web_get(const char *url, int timeout) {
response_t *web_get(const char *url, int timeout, int insecure) {
response_t *resp = malloc(sizeof(response_t));
CURL *curl;
@@ -112,14 +121,24 @@ response_t *web_get(const char *url, int timeout) {
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb);
curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
curl_easy_setopt(curl, CURLOPT_TIMEOUT, timeout);
if (insecure) {
curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0);
}
struct curl_slist *headers = NULL;
headers = curl_slist_append(headers, "Content-Type: application/json");
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
char err_buffer[CURL_ERROR_SIZE + 1] = {};
curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, err_buffer);
curl_easy_perform(curl);
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &resp->status_code);
if (resp->status_code == 0) {
LOG_ERRORF("web.c", "CURL Error: %s", err_buffer)
}
curl_easy_cleanup(curl);
curl_slist_free_all(headers);
@@ -128,7 +147,7 @@ response_t *web_get(const char *url, int timeout) {
return resp;
}
response_t *web_post(const char *url, const char *data) {
response_t *web_post(const char *url, const char *data, int insecure) {
response_t *resp = malloc(sizeof(response_t));
@@ -141,6 +160,12 @@ response_t *web_post(const char *url, const char *data) {
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb);
curl_easy_setopt(curl, CURLOPT_POST, 1);
curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
if (insecure) {
curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0);
}
char err_buffer[CURL_ERROR_SIZE + 1] = {};
curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, err_buffer);
struct curl_slist *headers = NULL;
headers = curl_slist_append(headers, "Content-Type: application/json");
@@ -151,17 +176,21 @@ response_t *web_post(const char *url, const char *data) {
curl_easy_perform(curl);
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &resp->status_code);
curl_easy_cleanup(curl);
curl_slist_free_all(headers);
resp->body = buffer.buf;
resp->size = buffer.cur;
if (resp->status_code == 0) {
LOG_ERRORF("web.c", "CURL Error: %s", err_buffer)
}
curl_easy_cleanup(curl);
curl_slist_free_all(headers);
return resp;
}
response_t *web_put(const char *url, const char *data) {
response_t *web_put(const char *url, const char *data, int insecure) {
response_t *resp = malloc(sizeof(response_t));
@@ -175,7 +204,10 @@ response_t *web_put(const char *url, const char *data) {
curl_easy_setopt(curl, CURLOPT_CUSTOMREQUEST, "PUT");
curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
curl_easy_setopt(curl, CURLOPT_DNS_USE_GLOBAL_CACHE, 0);
curl_easy_setopt(curl, CURLOPT_IPRESOLVE, CURLOPT_DNS_LOCAL_IP4 );
curl_easy_setopt(curl, CURLOPT_IPRESOLVE, CURLOPT_DNS_LOCAL_IP4);
if (insecure) {
curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0);
}
struct curl_slist *headers = NULL;
headers = curl_slist_append(headers, "Content-Type: application/json");
@@ -194,7 +226,7 @@ response_t *web_put(const char *url, const char *data) {
return resp;
}
response_t *web_delete(const char *url) {
response_t *web_delete(const char *url, int insecure) {
response_t *resp = malloc(sizeof(response_t));
@@ -207,6 +239,9 @@ response_t *web_delete(const char *url) {
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb);
curl_easy_setopt(curl, CURLOPT_CUSTOMREQUEST, "DELETE");
curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
if (insecure) {
curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0);
}
curl_easy_setopt(curl, CURLOPT_POSTFIELDS, "");
struct curl_slist *headers = NULL;

View File

@@ -25,14 +25,15 @@ typedef struct {
response_t *response;
int running_handles;
int done;
char curl_err_buffer[CURL_ERROR_SIZE + 1];
} subreq_ctx_t;
response_t *web_get(const char *url, int timeout);
response_t *web_post(const char * url, const char * data);
response_t *web_get(const char *url, int timeout, int insecure);
response_t *web_post(const char * url, const char * data, int insecure);
void web_post_async_poll(subreq_ctx_t* req);
subreq_ctx_t *web_post_async(const char *url, char *data);
response_t *web_put(const char *url, const char *data);
response_t *web_delete(const char *url);
subreq_ctx_t *web_post_async(const char *url, char *data, int insecure);
response_t *web_put(const char *url, const char *data, int insecure);
response_t *web_delete(const char *url, int insecure);
void free_response(response_t *resp);

View File

@@ -124,9 +124,7 @@ char *build_json_string(document_t *doc) {
cJSON_AddStringToObject(json, "path", "");
}
char md5_str[MD5_STR_LENGTH];
buf2hex(doc->path_md5, MD5_DIGEST_LENGTH, md5_str);
cJSON_AddStringToObject(json, "_id", md5_str);
cJSON_AddStringToObject(json, "_id", doc->doc_id);
// Metadata
meta_line_t *meta = doc->meta_head;
@@ -452,20 +450,19 @@ void read_lines(const char *path, const line_processor_t processor) {
dyn_buffer_destroy(&buf);
fclose(file);
}
void read_index_ndjson(const char *line, void* _data) {
void** data = _data;
const char* index_id = data[0];
void read_index_ndjson(const char *line, void *_data) {
void **data = _data;
const char *index_id = data[0];
index_func func = data[1];
read_index_bin_handle_line(line, index_id, func);
}
void read_index(const char *path, const char index_id[MD5_STR_LENGTH], const char *type, index_func func) {
void read_index(const char *path, const char index_id[SIST_INDEX_ID_LEN], const char *type, index_func func) {
if (strcmp(type, INDEX_TYPE_NDJSON) == 0) {
read_lines(path, (line_processor_t) {
.data = (void*[2]){(void*)index_id, func} ,
.data = (void *[2]) {(void *) index_id, func},
.func = read_index_ndjson,
});
}
@@ -473,11 +470,11 @@ void read_index(const char *path, const char index_id[MD5_STR_LENGTH], const cha
static __thread GHashTable *IncrementalReadTable = NULL;
void json_put_incremental(cJSON *document, UNUSED(const char id_str[MD5_STR_LENGTH])) {
void json_put_incremental(cJSON *document, UNUSED(const char doc_id[SIST_DOC_ID_LEN])) {
const char *path_md5_str = cJSON_GetObjectItem(document, "_id")->valuestring;
const int mtime = cJSON_GetObjectItem(document, "mtime")->valueint;
incremental_put_str(IncrementalReadTable, path_md5_str, mtime);
incremental_put(IncrementalReadTable, path_md5_str, mtime);
}
void incremental_read(GHashTable *table, const char *filepath, index_descriptor_t *desc) {
@@ -490,13 +487,11 @@ static __thread GHashTable *IncrementalNewTable = NULL;
static __thread store_t *IncrementalCopySourceStore = NULL;
static __thread store_t *IncrementalCopyDestinationStore = NULL;
void incremental_copy_handle_doc(cJSON *document, UNUSED(const char id_str[MD5_STR_LENGTH])) {
void incremental_copy_handle_doc(cJSON *document, UNUSED(const char id_str[SIST_DOC_ID_LEN])) {
const char *path_md5_str = cJSON_GetObjectItem(document, "_id")->valuestring;
unsigned char path_md5[MD5_DIGEST_LENGTH];
hex2buf(path_md5_str, MD5_STR_LENGTH - 1, path_md5);
const char *doc_id = cJSON_GetObjectItem(document, "_id")->valuestring;
if (cJSON_GetObjectItem(document, "parent") != NULL || incremental_get_str(IncrementalCopyTable, path_md5_str)) {
if (cJSON_GetObjectItem(document, "parent") != NULL || incremental_get(IncrementalCopyTable, doc_id)) {
// Copy index line
cJSON_DeleteItemFromObject(document, "index");
char *json_str = cJSON_PrintUnformatted(document);
@@ -510,9 +505,9 @@ void incremental_copy_handle_doc(cJSON *document, UNUSED(const char id_str[MD5_S
// Copy tn store contents
size_t buf_len;
char *buf = store_read(IncrementalCopySourceStore, (char *) path_md5, sizeof(path_md5), &buf_len);
char *buf = store_read(IncrementalCopySourceStore, (char *) doc_id, SIST_DOC_ID_LEN, &buf_len);
if (buf_len != 0) {
store_write(IncrementalCopyDestinationStore, (char *) path_md5, sizeof(path_md5), buf, buf_len);
store_write(IncrementalCopyDestinationStore, (char *) doc_id, SIST_DOC_ID_LEN, buf, buf_len);
free(buf);
}
}
@@ -536,24 +531,24 @@ void incremental_copy(store_t *store, store_t *dst_store, const char *filepath,
read_index(filepath, "", INDEX_TYPE_NDJSON, incremental_copy_handle_doc);
}
void incremental_delete_handle_doc(cJSON *document, UNUSED(const char id_str[MD5_STR_LENGTH])) {
void incremental_delete_handle_doc(cJSON *document, UNUSED(const char id_str[SIST_DOC_ID_LEN])) {
char path_md5_n[MD5_STR_LENGTH + 1];
path_md5_n[MD5_STR_LENGTH] = '\0';
path_md5_n[MD5_STR_LENGTH - 1] = '\n';
const char *path_md5_str = cJSON_GetObjectItem(document, "_id")->valuestring;
char doc_id_n[SIST_DOC_ID_LEN + 1];
doc_id_n[SIST_DOC_ID_LEN] = '\0';
doc_id_n[SIST_DOC_ID_LEN - 1] = '\n';
const char *doc_id = cJSON_GetObjectItem(document, "_id")->valuestring;
// do not delete archive virtual entries
if (cJSON_GetObjectItem(document, "parent") == NULL
&& !incremental_get_str(IncrementalCopyTable, path_md5_str)
&& !incremental_get_str(IncrementalNewTable, path_md5_str)
&& !incremental_get(IncrementalCopyTable, doc_id)
&& !incremental_get(IncrementalNewTable, doc_id)
) {
memcpy(path_md5_n, path_md5_str, MD5_STR_LENGTH - 1);
zstd_write_string(path_md5_n, MD5_STR_LENGTH);
memcpy(doc_id_n, doc_id, SIST_DOC_ID_LEN - 1);
zstd_write_string(doc_id, sizeof(doc_id_n));
}
}
void incremental_delete(const char *del_filepath, const char* index_filepath,
void incremental_delete(const char *del_filepath, const char *index_filepath,
GHashTable *copy_table, GHashTable *new_table) {
if (WriterCtx.out_file == NULL) {

View File

@@ -12,7 +12,7 @@ typedef struct line_processor {
void (*func)(const char*, void*);
} line_processor_t;
typedef void(*index_func)(cJSON *, const char[MD5_STR_LENGTH]);
typedef void(*index_func)(cJSON *, const char[SIST_DOC_ID_LEN]);
void incremental_copy(store_t *store, store_t *dst_store, const char *filepath,
const char *dst_filepath, GHashTable *copy_table);
@@ -24,7 +24,7 @@ void write_document(document_t *doc);
void read_lines(const char *path, const line_processor_t processor);
void read_index(const char *path, const char[MD5_STR_LENGTH], const char *type, index_func);
void read_index(const char *path, const char index_id[SIST_INDEX_ID_LEN], const char *type, index_func);
void incremental_read(GHashTable *table, const char *filepath, index_descriptor_t *desc);
@@ -42,13 +42,13 @@ index_descriptor_t read_index_descriptor(char *path);
// caller ensures char file_path[PATH_MAX]
#define READ_INDICES(file_path, index_path, action_ok, action_main_fail, cond_original) \
snprintf(file_path, PATH_MAX, "%s_index_main.ndjson.zst", index_path); \
if (0 == access(file_path, R_OK)) { \
if (access(file_path, R_OK) == 0) { \
action_ok; \
} else { \
action_main_fail; \
} \
snprintf(file_path, PATH_MAX, "%s_index_original.ndjson.zst", index_path); \
if ((cond_original) && (0 == access(file_path, R_OK))) { \
if ((cond_original) && access(file_path, R_OK) == 0) { \
action_ok; \
} \

View File

@@ -52,22 +52,7 @@ void store_flush(store_t *store) {
void store_write(store_t *store, char *key, size_t key_len, char *buf, size_t buf_len) {
if (LogCtx.very_verbose) {
if (key_len == MD5_DIGEST_LENGTH) {
char path_md5_str[MD5_STR_LENGTH];
buf2hex((unsigned char *) key, MD5_DIGEST_LENGTH, path_md5_str);
LOG_DEBUGF("store.c", "Store write {%s} %lu bytes", path_md5_str, buf_len)
} else if (key_len == MD5_DIGEST_LENGTH + sizeof(int)) {
char path_md5_str[MD5_STR_LENGTH];
buf2hex((unsigned char *) key, MD5_DIGEST_LENGTH, path_md5_str);
LOG_DEBUGF("store.c", "Store write {%s/%d} %lu bytes",
path_md5_str, *(int *) (key + MD5_DIGEST_LENGTH), buf_len);
} else {
LOG_DEBUGF("store.c", "Store write {%s} %lu bytes", key, buf_len)
}
LOG_DEBUGF("store.c", "Store write %s@{%s} %lu bytes", store->path, key, buf_len)
}
#if (SIST_FAKE_STORE != 1)

View File

@@ -22,7 +22,7 @@ parse_job_t *create_fs_parse_job(const char *filepath, const struct stat *info,
job->vfile.info = *info;
memset(job->parent, 0, MD5_DIGEST_LENGTH);
job->parent[0] = '\0';
job->vfile.filepath = job->filepath;
job->vfile.read = fs_read;

1
src/magic_generated.c vendored Normal file

File diff suppressed because one or more lines are too long

View File

@@ -38,8 +38,8 @@ static __sighandler_t sigabrt_handler = NULL;
void sig_handler(int signum) {
LogCtx.verbose = 1;
LogCtx.very_verbose = 1;
LogCtx.verbose = TRUE;
LogCtx.very_verbose = TRUE;
LOG_ERROR("*SIGNAL HANDLER*", "=============================================\n\n");
LOG_ERRORF("*SIGNAL HANDLER*", "Uh oh! Caught fatal signal: %s", strsignal(signum));
@@ -103,7 +103,7 @@ void sig_handler(int signum) {
exit(-1);
}
void init_dir(const char *dirpath) {
void init_dir(const char *dirpath, scan_args_t *args) {
char path[PATH_MAX];
snprintf(path, PATH_MAX, "%sdescriptor.json", dirpath);
@@ -111,9 +111,18 @@ void init_dir(const char *dirpath) {
strcpy(ScanCtx.index.desc.version, Version);
strcpy(ScanCtx.index.desc.type, INDEX_TYPE_NDJSON);
if (args->incremental != NULL) {
// copy old index id
char descriptor_path[PATH_MAX];
snprintf(descriptor_path, PATH_MAX, "%sdescriptor.json", args->incremental);
index_descriptor_t original_desc = read_index_descriptor(descriptor_path);
memcpy(ScanCtx.index.desc.id, original_desc.id, sizeof(original_desc.id));
} else {
// generate new index id based on timestamp
unsigned char index_md5[MD5_DIGEST_LENGTH];
MD5((unsigned char *) &ScanCtx.index.desc.timestamp, sizeof(ScanCtx.index.desc.timestamp), index_md5);
buf2hex(index_md5, MD5_DIGEST_LENGTH, ScanCtx.index.desc.id);
}
write_index_descriptor(path, &ScanCtx.index.desc);
}
@@ -315,9 +324,13 @@ void load_incremental_index(const scan_args_t *args) {
LOG_FATALF("main.c", "Version mismatch! Index is %s but executable is %s", original_desc.version, Version)
}
READ_INDICES(file_path, args->incremental, incremental_read(ScanCtx.original_table, file_path, &original_desc),
READ_INDICES(
file_path,
args->incremental,
incremental_read(ScanCtx.original_table, file_path, &original_desc),
LOG_FATALF("main.c", "Could not open original main index for incremental scan: %s", strerror(errno)),
1);
TRUE
);
LOG_INFOF("main.c", "Loaded %d items in to mtime table.", g_hash_table_size(ScanCtx.original_table))
}
@@ -378,7 +391,7 @@ void sist2_scan(scan_args_t *args) {
initialize_scan_context(args);
init_dir(ScanCtx.index.path);
init_dir(ScanCtx.index.path, args);
char store_path[PATH_MAX];
snprintf(store_path, PATH_MAX, "%sthumbs", ScanCtx.index.path);
@@ -422,8 +435,8 @@ void sist2_scan(scan_args_t *args) {
LOG_DEBUGF("main.c", "Skipped files: %d", ScanCtx.dbg_skipped_files_count)
LOG_DEBUGF("main.c", "Excluded files: %d", ScanCtx.dbg_excluded_files_count)
LOG_DEBUGF("main.c", "Failed files: %d", ScanCtx.dbg_failed_files_count)
LOG_DEBUGF("main.c", "Thumbnail store size: %d", ScanCtx.stat_tn_size)
LOG_DEBUGF("main.c", "Index size: %d", ScanCtx.stat_index_size)
LOG_DEBUGF("main.c", "Thumbnail store size: %lu", ScanCtx.stat_tn_size)
LOG_DEBUGF("main.c", "Index size: %lu", ScanCtx.stat_index_size)
if (args->incremental != NULL) {
save_incremental_index(args);
@@ -440,6 +453,7 @@ void sist2_index(index_args_t *args) {
IndexCtx.es_url = args->es_url;
IndexCtx.es_index = args->es_index;
IndexCtx.es_insecure_ssl = args->es_insecure_ssl;
IndexCtx.batch_size = args->batch_size;
IndexCtx.needs_es_connection = !args->print;
@@ -525,6 +539,8 @@ void sist2_exec_script(exec_args_t *args) {
IndexCtx.es_url = args->es_url;
IndexCtx.es_index = args->es_index;
IndexCtx.es_insecure_ssl = args->es_insecure_ssl;
IndexCtx.needs_es_connection = TRUE;
LOG_DEBUGF("main.c", "descriptor version %s (%s)", desc.version, desc.type)
@@ -536,6 +552,7 @@ void sist2_web(web_args_t *args) {
WebCtx.es_url = args->es_url;
WebCtx.es_index = args->es_index;
WebCtx.es_insecure_ssl = args->es_insecure_ssl;
WebCtx.index_count = args->index_count;
WebCtx.auth_user = args->auth_user;
WebCtx.auth_pass = args->auth_pass;
@@ -606,6 +623,7 @@ int main(int argc, const char *argv[]) {
int arg_version = 0;
char *common_es_url = NULL;
int common_es_insecure_ssl = 0;
char *common_es_index = NULL;
char *common_script_path = NULL;
int common_async_script = 0;
@@ -671,6 +689,7 @@ int main(int argc, const char *argv[]) {
OPT_GROUP("Index options"),
OPT_INTEGER('t', "threads", &common_threads, "Number of threads. DEFAULT=1"),
OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url with port. DEFAULT=http://localhost:9200"),
OPT_BOOLEAN(0, "es-insecure-ssl", &common_es_insecure_ssl, "Do not verify SSL connections to Elasticsearch."),
OPT_STRING(0, "es-index", &common_es_index, "Elasticsearch index name. DEFAULT=sist2"),
OPT_BOOLEAN('p', "print", &index_args->print, "Just print JSON documents to stdout."),
OPT_BOOLEAN(0, "incremental-index", &index_args->incremental,
@@ -685,6 +704,7 @@ int main(int argc, const char *argv[]) {
OPT_GROUP("Web options"),
OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url. DEFAULT=http://localhost:9200"),
OPT_BOOLEAN(0, "es-insecure-ssl", &common_es_insecure_ssl, "Do not verify SSL connections to Elasticsearch."),
OPT_STRING(0, "es-index", &common_es_index, "Elasticsearch index name. DEFAULT=sist2"),
OPT_STRING(0, "bind", &web_args->listen_address, "Listen on this address. DEFAULT=localhost:4090"),
OPT_STRING(0, "auth", &web_args->credentials, "Basic auth in user:password format"),
@@ -695,6 +715,7 @@ int main(int argc, const char *argv[]) {
OPT_GROUP("Exec-script options"),
OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url. DEFAULT=http://localhost:9200"),
OPT_BOOLEAN(0, "es-insecure-ssl", &common_es_insecure_ssl, "Do not verify SSL connections to Elasticsearch."),
OPT_STRING(0, "es-index", &common_es_index, "Elasticsearch index name. DEFAULT=sist2"),
OPT_STRING(0, "script-file", &common_script_path, "Path to user script."),
OPT_BOOLEAN(0, "async-script", &common_async_script, "Execute user script asynchronously."),
@@ -724,6 +745,10 @@ int main(int argc, const char *argv[]) {
index_args->es_index = common_es_index;
exec_args->es_index = common_es_index;
web_args->es_insecure_ssl = common_es_insecure_ssl;
index_args->es_insecure_ssl = common_es_insecure_ssl;
exec_args->es_insecure_ssl = common_es_insecure_ssl;
index_args->script_path = common_script_path;
exec_args->script_path = common_script_path;
index_args->threads = common_threads;
@@ -767,9 +792,8 @@ int main(int argc, const char *argv[]) {
sist2_exec_script(exec_args);
} else {
fprintf(stderr, "Invalid command: '%s'\n", argv[0]);
argparse_usage(&argparse);
goto end;
LOG_FATALF("main.c", "Invalid command: '%s'\n", argv[0])
}
printf("\n");

View File

@@ -5,6 +5,7 @@
#include "mime.h"
#include "src/io/serialize.h"
#include "src/parsing/sidecar.h"
#include "src/magic_generated.c"
#include <magic.h>
@@ -69,7 +70,7 @@ void parse(void *arg) {
doc->base = (short) job->base;
char *rel_path = doc->filepath + ScanCtx.index.desc.root_len;
MD5((unsigned char *) rel_path, strlen(rel_path), doc->path_md5);
generate_doc_id(rel_path, doc->doc_id);
doc->meta_head = NULL;
doc->meta_tail = NULL;
@@ -77,10 +78,10 @@ void parse(void *arg) {
doc->size = job->vfile.info.st_size;
doc->mtime = (int) job->vfile.info.st_mtim.tv_sec;
int inc_ts = incremental_get(ScanCtx.original_table, doc->path_md5);
int inc_ts = incremental_get(ScanCtx.original_table, doc->doc_id);
if (inc_ts != 0 && inc_ts == job->vfile.info.st_mtim.tv_sec) {
pthread_mutex_lock(&ScanCtx.copy_table_mu);
incremental_mark_file(ScanCtx.copy_table, doc->path_md5);
incremental_mark_file(ScanCtx.copy_table, doc->doc_id);
pthread_mutex_unlock(&ScanCtx.copy_table_mu);
pthread_mutex_lock(&ScanCtx.dbg_file_counts_mu);
@@ -96,16 +97,14 @@ void parse(void *arg) {
if (ScanCtx.new_table != NULL) {
pthread_mutex_lock(&ScanCtx.copy_table_mu);
incremental_mark_file(ScanCtx.new_table, doc->path_md5);
incremental_mark_file(ScanCtx.new_table, doc->doc_id);
pthread_mutex_unlock(&ScanCtx.copy_table_mu);
}
char *buf[MAGIC_BUF_SIZE];
if (LogCtx.very_verbose) {
char path_md5_str[MD5_STR_LENGTH];
buf2hex(doc->path_md5, MD5_DIGEST_LENGTH, path_md5_str);
LOG_DEBUGF(job->filepath, "Starting parse job {%s}", path_md5_str)
LOG_DEBUGF(job->filepath, "Starting parse job {%s}", doc->doc_id)
}
if (job->vfile.info.st_size == 0) {
@@ -145,7 +144,15 @@ void parse(void *arg) {
}
magic_t magic = magic_open(MAGIC_MIME_TYPE);
magic_load(magic, NULL);
const char *magic_buffers[1] = {magic_database_buffer,};
size_t sizes[1] = {sizeof(magic_database_buffer),};
int load_ret = magic_load_buffers(magic, (void **) &magic_buffers, sizes, 1);
if (load_ret != 0) {
LOG_FATALF("parse.c", "Could not load libmagic database: (%d)", load_ret)
}
const char *magic_mime_str = magic_buffer(magic, buf, bytes_read);
if (magic_mime_str != NULL) {
@@ -218,10 +225,10 @@ void parse(void *arg) {
abort:
//Parent meta
if (!md5_digest_is_null(job->parent)) {
meta_line_t *meta_parent = malloc(sizeof(meta_line_t) + MD5_STR_LENGTH);
if (job->parent[0] != '\0') {
meta_line_t *meta_parent = malloc(sizeof(meta_line_t) + SIST_INDEX_ID_LEN);
meta_parent->key = MetaParent;
buf2hex(job->parent, MD5_DIGEST_LENGTH, meta_parent->str_val);
strcpy(meta_parent->str_val, job->parent);
APPEND_META((doc), meta_parent)
doc->has_parent = TRUE;

View File

@@ -23,14 +23,17 @@ void parse_sidecar(vfile_t *vfile, document_t *doc) {
}
char *json_str = cJSON_PrintUnformatted(json);
unsigned char path_md5[MD5_DIGEST_LENGTH];
MD5((unsigned char *) vfile->filepath + ScanCtx.index.desc.root_len, doc->ext - 1 - ScanCtx.index.desc.root_len,
path_md5);
char assoc_doc_id[SIST_DOC_ID_LEN];
char path_md5_str[MD5_STR_LENGTH];
buf2hex(path_md5, MD5_DIGEST_LENGTH, path_md5_str);
char rel_path[PATH_MAX];
size_t rel_path_len = doc->ext - 1 - ScanCtx.index.desc.root_len;
memcpy(rel_path, vfile->filepath + ScanCtx.index.desc.root_len, rel_path_len);
*(rel_path + rel_path_len) = '\0';
store_write(ScanCtx.index.meta_store, path_md5_str, MD5_STR_LENGTH, json_str, strlen(json_str) + 1);
generate_doc_id(rel_path, assoc_doc_id);
store_write(ScanCtx.index.meta_store, assoc_doc_id, sizeof(assoc_doc_id), json_str,
strlen(json_str) + 1);
cJSON_Delete(json);
free(json_str);

View File

@@ -27,10 +27,6 @@
#define UNUSED(x) __attribute__((__unused__)) x
#define MD5_STR_LENGTH 33
#define SHA1_STR_LENGTH 41
#define SHA1_DIGEST_LENGTH 20
#include "util.h"
#include "log.h"
#include "types.h"
@@ -53,14 +49,14 @@
#include <ctype.h>
#include "git_hash.h"
#define VERSION "2.11.7"
#define VERSION "2.12.1"
static const char *const Version = VERSION;
#ifndef SIST_PLATFORM
#define SIST_PLATFORM unknown
#endif
#define EXPECTED_MONGOOSE_VERSION "7.3"
#define EXPECTED_MONGOOSE_VERSION "7.6"
#define Q(x) #x
#define QUOTE(x) Q(x)

View File

@@ -20,7 +20,7 @@ typedef struct {
long count;
} agg_t;
void fill_tables(cJSON *document, UNUSED(const char index_id[MD5_STR_LENGTH])) {
void fill_tables(cJSON *document, UNUSED(const char index_id[SIST_INDEX_ID_LEN])) {
if (cJSON_GetObjectItem(document, "parent") != NULL) {
return;

View File

@@ -4,7 +4,7 @@
#define INDEX_TYPE_NDJSON "ndjson"
typedef struct index_descriptor {
char id[MD5_STR_LENGTH];
char id[SIST_INDEX_ID_LEN];
char version[64];
long timestamp;
char root[PATH_MAX];

View File

@@ -10,8 +10,6 @@
#include "third-party/utf8.h/utf8.h"
#include "libscan/scan.h"
#define MD5_STR_LENGTH 33
char *abspath(const char *path);
@@ -94,40 +92,24 @@ static void buf2hex(const unsigned char *buf, size_t buflen, char *hex_string) {
__always_inline
static int md5_digest_is_null(const unsigned char digest[MD5_DIGEST_LENGTH]) {
return (*(int64_t *) digest) == 0 && (*((int64_t *) digest + 1)) == 0;
static void generate_doc_id(const char *rel_path, char *doc_id) {
unsigned char md[MD5_DIGEST_LENGTH];
MD5((unsigned char *) rel_path, strlen(rel_path), md);
buf2hex(md, sizeof(md), doc_id);
}
__always_inline
static void incremental_put(GHashTable *table, const unsigned char path_md5[MD5_DIGEST_LENGTH], int mtime) {
char *ptr = malloc(MD5_STR_LENGTH);
buf2hex(path_md5, MD5_DIGEST_LENGTH, ptr);
static void incremental_put(GHashTable *table, const char doc_id[SIST_DOC_ID_LEN], int mtime) {
char *ptr = malloc(SIST_DOC_ID_LEN);
strcpy(ptr, doc_id);
g_hash_table_insert(table, ptr, GINT_TO_POINTER(mtime));
}
__always_inline
static void incremental_put_str(GHashTable *table, const char *path_md5, int mtime) {
char *ptr = malloc(MD5_STR_LENGTH);
strcpy(ptr, path_md5);
g_hash_table_insert(table, ptr, GINT_TO_POINTER(mtime));
}
__always_inline
static int incremental_get(GHashTable *table, const unsigned char path_md5[MD5_DIGEST_LENGTH]) {
static int incremental_get(GHashTable *table, const char doc_id[SIST_DOC_ID_LEN]) {
if (table != NULL) {
char md5_str[MD5_STR_LENGTH];
buf2hex(path_md5, MD5_DIGEST_LENGTH, md5_str);
return GPOINTER_TO_INT(g_hash_table_lookup(table, md5_str));
} else {
return 0;
}
}
__always_inline
static int incremental_get_str(GHashTable *table, const char *path_md5) {
if (table != NULL) {
return GPOINTER_TO_INT(g_hash_table_lookup(table, path_md5));
return GPOINTER_TO_INT(g_hash_table_lookup(table, doc_id));
} else {
return 0;
}
@@ -138,9 +120,9 @@ static int incremental_get_str(GHashTable *table, const char *path_md5) {
* !!Not thread safe.
*/
__always_inline
static int incremental_mark_file(GHashTable *table, const unsigned char path_md5[MD5_DIGEST_LENGTH]) {
char *ptr = malloc(MD5_STR_LENGTH);
buf2hex(path_md5, MD5_DIGEST_LENGTH, ptr);
static int incremental_mark_file(GHashTable *table, const char doc_id[SIST_DOC_ID_LEN]) {
char *ptr = malloc(SIST_DOC_ID_LEN);
strcpy(ptr, doc_id);
return g_hash_table_insert(table, ptr, GINT_TO_POINTER(1));
}

View File

@@ -12,6 +12,13 @@
#define HTTP_TEXT_TYPE_HEADER "Content-Type: text/plain;charset=utf-8\r\n"
#define HTTP_REPLY_NOT_FOUND mg_http_reply(nc, 404, HTTP_SERVER_HEADER HTTP_TEXT_TYPE_HEADER, "Not found");
static struct mg_http_serve_opts DefaultServeOpts = {
.fs = NULL,
.ssi_pattern = NULL,
.root_dir = NULL,
.mime_types = ""
};
static void send_response_line(struct mg_connection *nc, int status_code, size_t length, char *extra_headers) {
mg_printf(
@@ -29,7 +36,7 @@ static void send_response_line(struct mg_connection *nc, int status_code, size_t
index_t *get_index_by_id(const char *index_id) {
for (int i = WebCtx.index_count; i >= 0; i--) {
if (strncmp(index_id, WebCtx.indices[i].desc.id, MD5_STR_LENGTH) == 0) {
if (strncmp(index_id, WebCtx.indices[i].desc.id, SIST_INDEX_ID_LEN) == 0) {
return &WebCtx.indices[i];
}
}
@@ -54,7 +61,7 @@ store_t *get_tag_store(const char *index_id) {
void search_index(struct mg_connection *nc, struct mg_http_message *hm) {
if (WebCtx.dev) {
mg_http_serve_file(nc, hm, "sist2-vue/dist/index.html", "text/html", NULL);
mg_http_serve_file(nc, hm, "sist2-vue/dist/index.html", &DefaultServeOpts);
} else {
send_response_line(nc, 200, sizeof(index_html), "Content-Type: text/html");
mg_send(nc, index_html, sizeof(index_html));
@@ -63,23 +70,23 @@ void search_index(struct mg_connection *nc, struct mg_http_message *hm) {
void stats_files(struct mg_connection *nc, struct mg_http_message *hm) {
if (hm->uri.len != MD5_STR_LENGTH + 4) {
if (hm->uri.len != SIST_INDEX_ID_LEN + 4) {
HTTP_REPLY_NOT_FOUND
return;
}
char arg_md5[MD5_STR_LENGTH];
memcpy(arg_md5, hm->uri.ptr + 3, MD5_STR_LENGTH);
*(arg_md5 + MD5_STR_LENGTH - 1) = '\0';
char arg_index_id[SIST_INDEX_ID_LEN];
memcpy(arg_index_id, hm->uri.ptr + 3, SIST_INDEX_ID_LEN);
*(arg_index_id + SIST_INDEX_ID_LEN - 1) = '\0';
index_t *index = get_index_by_id(arg_md5);
index_t *index = get_index_by_id(arg_index_id);
if (index == NULL) {
HTTP_REPLY_NOT_FOUND
return;
}
const char *file;
switch (atoi(hm->uri.ptr + 3 + MD5_STR_LENGTH)) {
switch (atoi(hm->uri.ptr + 3 + SIST_INDEX_ID_LEN)) {
case 1:
file = "treemap.csv";
break;
@@ -104,12 +111,13 @@ void stats_files(struct mg_connection *nc, struct mg_http_message *hm) {
strcpy(full_path, index->path);
strcat(full_path, file);
mg_http_serve_file(nc, hm, full_path, "text/csv", disposition);
struct mg_http_serve_opts opts = {};
mg_http_serve_file(nc, hm, full_path, &opts);
}
void javascript(struct mg_connection *nc, struct mg_http_message *hm) {
if (WebCtx.dev) {
mg_http_serve_file(nc, hm, "sist2-vue/dist/js/index.js", "application/javascript", NULL);
mg_http_serve_file(nc, hm, "sist2-vue/dist/js/index.js", &DefaultServeOpts);
} else {
send_response_line(nc, 200, sizeof(index_js), "Content-Type: application/javascript");
mg_send(nc, index_js, sizeof(index_js));
@@ -118,7 +126,7 @@ void javascript(struct mg_connection *nc, struct mg_http_message *hm) {
void javascript_vendor(struct mg_connection *nc, struct mg_http_message *hm) {
if (WebCtx.dev) {
mg_http_serve_file(nc, hm, "sist2-vue/dist/js/chunk-vendors.js", "application/javascript", NULL);
mg_http_serve_file(nc, hm, "sist2-vue/dist/js/chunk-vendors.js", &DefaultServeOpts);
} else {
send_response_line(nc, 200, sizeof(chunk_vendors_js), "Content-Type: application/javascript");
mg_send(nc, chunk_vendors_js, sizeof(chunk_vendors_js));
@@ -142,28 +150,25 @@ void style_vendor(struct mg_connection *nc, struct mg_http_message *hm) {
void thumbnail(struct mg_connection *nc, struct mg_http_message *hm) {
int parse_tn_num = FALSE;
int has_thumbnail_index = FALSE;
if (hm->uri.len != 68) {
if (hm->uri.len != SIST_INDEX_ID_LEN + SIST_DOC_ID_LEN + 2) {
if (hm->uri.len != 68 + 4) {
if (hm->uri.len != SIST_INDEX_ID_LEN + SIST_DOC_ID_LEN + 2 + 4) {
LOG_DEBUGF("serve.c", "Invalid thumbnail path: %.*s", (int) hm->uri.len, hm->uri.ptr)
HTTP_REPLY_NOT_FOUND
return;
}
parse_tn_num = TRUE;
has_thumbnail_index = TRUE;
}
char arg_file_md5[MD5_STR_LENGTH];
char arg_index[MD5_STR_LENGTH];
char arg_doc_id[SIST_DOC_ID_LEN];
char arg_index[SIST_INDEX_ID_LEN];
memcpy(arg_index, hm->uri.ptr + 3, MD5_STR_LENGTH);
*(arg_index + MD5_STR_LENGTH - 1) = '\0';
memcpy(arg_file_md5, hm->uri.ptr + 3 + MD5_STR_LENGTH, MD5_STR_LENGTH);
*(arg_file_md5 + MD5_STR_LENGTH - 1) = '\0';
unsigned char md5_buf[MD5_DIGEST_LENGTH];
hex2buf(arg_file_md5, MD5_STR_LENGTH - 1, md5_buf);
memcpy(arg_index, hm->uri.ptr + 3, SIST_INDEX_ID_LEN);
*(arg_index + SIST_INDEX_ID_LEN - 1) = '\0';
memcpy(arg_doc_id, hm->uri.ptr + 3 + SIST_INDEX_ID_LEN, SIST_DOC_ID_LEN);
*(arg_doc_id + SIST_DOC_ID_LEN - 1) = '\0';
store_t *store = get_store(arg_index);
if (store == NULL) {
@@ -175,16 +180,18 @@ void thumbnail(struct mg_connection *nc, struct mg_http_message *hm) {
char *data;
size_t data_len = 0;
if (parse_tn_num) {
int tn_num = atoi(hm->uri.ptr + 68);
if (has_thumbnail_index) {
const char *tn_index = hm->uri.ptr + SIST_INDEX_ID_LEN + SIST_DOC_ID_LEN + 2;
char tn_key[sizeof(md5_buf) + sizeof(int)];
memcpy(tn_key, md5_buf, sizeof(md5_buf));
memcpy(tn_key + sizeof(md5_buf), &tn_num, sizeof(tn_num));
char tn_key[sizeof(arg_doc_id) + sizeof(char) * 4];
memcpy(tn_key, arg_doc_id, sizeof(arg_doc_id));
memcpy(tn_key + sizeof(arg_doc_id) - 1, tn_index, sizeof(char) * 4);
*(tn_key + sizeof(tn_key) - 1) = '\0';
data = store_read(store, (char *) tn_key, sizeof(tn_key), &data_len);
} else {
data = store_read(store, (char *) md5_buf, sizeof(md5_buf), &data_len);
data = store_read(store, (char *) arg_doc_id, sizeof(arg_doc_id), &data_len);
}
if (data_len != 0) {
@@ -205,7 +212,7 @@ void search(struct mg_connection *nc, struct mg_http_message *hm) {
if (hm->body.len == 0) {
LOG_DEBUG("serve.c", "Client sent empty body, ignoring request")
mg_http_reply(nc, 500, HTTP_SERVER_HEADER HTTP_TEXT_TYPE_HEADER, "Invalid request");
mg_http_reply(nc, 400, HTTP_SERVER_HEADER HTTP_TEXT_TYPE_HEADER, "Invalid request");
return;
}
@@ -216,7 +223,7 @@ void search(struct mg_connection *nc, struct mg_http_message *hm) {
char url[4096];
snprintf(url, 4096, "%s/%s/_search", WebCtx.es_url, WebCtx.es_index);
nc->fn_data = web_post_async(url, body);
nc->fn_data = web_post_async(url, body, WebCtx.es_insecure_ssl);
}
void serve_file_from_url(cJSON *json, index_t *idx, struct mg_connection *nc) {
@@ -274,10 +281,18 @@ void serve_file_from_disk(cJSON *json, index_t *idx, struct mg_connection *nc, s
char disposition[8192];
snprintf(disposition, sizeof(disposition),
HTTP_SERVER_HEADER "Content-Disposition: inline; filename=\"%s%s%s\"\r\nAccept-Ranges: bytes\r\n",
HTTP_SERVER_HEADER "Content-Disposition: inline; filename=\"%s%s%s\"\r\n"
"Accept-Ranges: bytes\r\nCache-Control: no-store\r\n",
name, strlen(ext) == 0 ? "" : ".", ext);
mg_http_serve_file(nc, hm, full_path, mime, disposition);
char mime_mapping[1024];
snprintf(mime_mapping, sizeof(mime_mapping), "%s=%s", ext, mime);
struct mg_http_serve_opts opts = {
.extra_headers = disposition,
.mime_types = mime_mapping
};
mg_http_serve_file(nc, hm, full_path, &opts);
}
void cache_es_version() {
@@ -287,7 +302,7 @@ void cache_es_version() {
return;
}
es_version_t *es_version = elastic_get_version(WebCtx.es_url);
es_version_t *es_version = elastic_get_version(WebCtx.es_url, WebCtx.es_insecure_ssl);
if (es_version != NULL) {
WebCtx.es_version = es_version;
is_cached = TRUE;
@@ -298,15 +313,20 @@ void index_info(struct mg_connection *nc) {
cache_es_version();
const char *es_version = "0.0.0";
if (WebCtx.es_version != NULL) {
es_version = format_es_version(WebCtx.es_version);
}
cJSON *json = cJSON_CreateObject();
cJSON *arr = cJSON_AddArrayToObject(json, "indices");
cJSON_AddStringToObject(json, "mongooseVersion", MG_VERSION);
cJSON_AddStringToObject(json, "esIndex", WebCtx.es_index);
cJSON_AddStringToObject(json, "version", Version);
cJSON_AddStringToObject(json, "esVersion", format_es_version(WebCtx.es_version));
cJSON_AddStringToObject(json, "esVersion", es_version);
cJSON_AddBoolToObject(json, "esVersionSupported", IS_SUPPORTED_ES_VERSION(WebCtx.es_version));
cJSON_AddBoolToObject(json, "esVersionLegacy", USE_LEGACY_ES_SETTINGS(WebCtx.es_version));
cJSON_AddBoolToObject(json, "esVersionLegacy", IS_LEGACY_VERSION(WebCtx.es_version));
cJSON_AddStringToObject(json, "platform", QUOTE(SIST_PLATFORM));
cJSON_AddStringToObject(json, "sist2Hash", Sist2CommitHash);
cJSON_AddStringToObject(json, "lang", WebCtx.lang);
@@ -339,55 +359,19 @@ void index_info(struct mg_connection *nc) {
}
void document_info(struct mg_connection *nc, struct mg_http_message *hm) {
if (hm->uri.len != MD5_STR_LENGTH + 2) {
LOG_DEBUGF("serve.c", "Invalid document_info path: %.*s", (int) hm->uri.len, hm->uri.ptr)
HTTP_REPLY_NOT_FOUND
return;
}
char arg_md5[MD5_STR_LENGTH];
memcpy(arg_md5, hm->uri.ptr + 3, MD5_STR_LENGTH);
*(arg_md5 + MD5_STR_LENGTH - 1) = '\0';
cJSON *doc = elastic_get_document(arg_md5);
cJSON *source = cJSON_GetObjectItem(doc, "_source");
cJSON *index_id = cJSON_GetObjectItem(source, "index");
if (index_id == NULL) {
cJSON_Delete(doc);
HTTP_REPLY_NOT_FOUND
return;
}
index_t *idx = get_index_by_id(index_id->valuestring);
if (idx == NULL) {
cJSON_Delete(doc);
HTTP_REPLY_NOT_FOUND
return;
}
char *json_str = cJSON_PrintUnformatted(source);
send_response_line(nc, 200, (int) strlen(json_str), "Content-Type: application/json");
mg_send(nc, json_str, (int) strlen(json_str));
free(json_str);
cJSON_Delete(doc);
}
void file(struct mg_connection *nc, struct mg_http_message *hm) {
if (hm->uri.len != MD5_STR_LENGTH + 2) {
if (hm->uri.len != SIST_DOC_ID_LEN + 2) {
LOG_DEBUGF("serve.c", "Invalid file path: %.*s", (int) hm->uri.len, hm->uri.ptr)
HTTP_REPLY_NOT_FOUND
return;
}
char arg_md5[MD5_STR_LENGTH];
memcpy(arg_md5, hm->uri.ptr + 3, MD5_STR_LENGTH);
*(arg_md5 + MD5_STR_LENGTH - 1) = '\0';
char arg_doc_id[SIST_DOC_ID_LEN];
memcpy(arg_doc_id, hm->uri.ptr + 3, SIST_DOC_ID_LEN);
*(arg_doc_id + SIST_DOC_ID_LEN - 1) = '\0';
const char *next = arg_md5;
const char *next = arg_doc_id;
cJSON *doc = NULL;
cJSON *index_id = NULL;
cJSON *source = NULL;
@@ -438,7 +422,6 @@ void status(struct mg_connection *nc) {
typedef struct {
char *name;
int delete;
char *path_md5_str;
char *doc_id;
} tag_req_t;
@@ -458,12 +441,6 @@ tag_req_t *parse_tag_request(cJSON *json) {
return NULL;
}
cJSON *arg_path_md5 = cJSON_GetObjectItem(json, "path_md5");
if (arg_path_md5 == NULL || !cJSON_IsString(arg_path_md5) ||
strlen(arg_path_md5->valuestring) != MD5_STR_LENGTH - 1) {
return NULL;
}
cJSON *arg_doc_id = cJSON_GetObjectItem(json, "doc_id");
if (arg_doc_id == NULL || !cJSON_IsString(arg_doc_id)) {
return NULL;
@@ -472,22 +449,21 @@ tag_req_t *parse_tag_request(cJSON *json) {
tag_req_t *req = malloc(sizeof(tag_req_t));
req->delete = arg_delete->valueint;
req->name = arg_name->valuestring;
req->path_md5_str = arg_path_md5->valuestring;
req->doc_id = arg_doc_id->valuestring;
return req;
}
void tag(struct mg_connection *nc, struct mg_http_message *hm) {
if (hm->uri.len != MD5_STR_LENGTH + 4) {
if (hm->uri.len != SIST_INDEX_ID_LEN + 4) {
LOG_DEBUGF("serve.c", "Invalid tag path: %.*s", (int) hm->uri.len, hm->uri.ptr)
HTTP_REPLY_NOT_FOUND
return;
}
char arg_index[MD5_STR_LENGTH];
memcpy(arg_index, hm->uri.ptr + 5, MD5_STR_LENGTH);
*(arg_index + MD5_STR_LENGTH - 1) = '\0';
char arg_index[SIST_INDEX_ID_LEN];
memcpy(arg_index, hm->uri.ptr + 5, SIST_INDEX_ID_LEN);
*(arg_index + SIST_INDEX_ID_LEN - 1) = '\0';
if (hm->body.len < 2 || hm->method.len != 4 || memcmp(&hm->method, "POST", 4) == 0) {
LOG_DEBUG("serve.c", "Invalid tag request")
@@ -519,7 +495,7 @@ void tag(struct mg_connection *nc, struct mg_http_message *hm) {
cJSON *arr = NULL;
size_t data_len = 0;
const char *data = store_read(store, arg_req->path_md5_str, MD5_STR_LENGTH, &data_len);
const char *data = store_read(store, arg_req->doc_id, SIST_DOC_ID_LEN, &data_len);
if (data_len == 0) {
arr = cJSON_CreateArray();
} else {
@@ -555,7 +531,7 @@ void tag(struct mg_connection *nc, struct mg_http_message *hm) {
char url[4096];
snprintf(url, sizeof(url), "%s/%s/_update/%s", WebCtx.es_url, WebCtx.es_index, arg_req->doc_id);
nc->fn_data = web_post_async(url, buf);
nc->fn_data = web_post_async(url, buf, WebCtx.es_insecure_ssl);
} else {
cJSON_AddItemToArray(arr, cJSON_CreateString(arg_req->name));
@@ -575,11 +551,11 @@ void tag(struct mg_connection *nc, struct mg_http_message *hm) {
char url[4096];
snprintf(url, sizeof(url), "%s/%s/_update/%s", WebCtx.es_url, WebCtx.es_index, arg_req->doc_id);
nc->fn_data = web_post_async(url, buf);
nc->fn_data = web_post_async(url, buf, WebCtx.es_insecure_ssl);
}
char *json_str = cJSON_PrintUnformatted(arr);
store_write(store, arg_req->path_md5_str, MD5_STR_LENGTH, json_str, strlen(json_str) + 1);
store_write(store, arg_req->doc_id, SIST_DOC_ID_LEN, json_str, strlen(json_str) + 1);
store_flush(store);
free(arg_req);
@@ -641,8 +617,6 @@ static void ev_router(struct mg_connection *nc, int ev, void *ev_data, UNUSED(vo
return;
}
tag(nc, hm);
} else if (mg_http_match_uri(hm, "/d/*")) {
document_info(nc, hm);
} else {
HTTP_REPLY_NOT_FOUND
}

File diff suppressed because one or more lines are too long

View File

@@ -35,10 +35,20 @@ def sist2_index(files, *args):
path = copy_files(files)
shutil.rmtree("test_i", ignore_errors=True)
sist2("scan", path, "-o", "test_i", *args)
sist2("scan", path, "-o", "test_i", "-t12", *args)
return iter(sist2_index_to_dict("test_i"))
def get_lmdb_contents(path):
import lmdb
env = lmdb.open(path)
txn = env.begin(write=False)
return dict((k, v) for k, v in txn.cursor())
def sist2_incremental_index(files, func=None, incremental_index=False, *args):
path = copy_files(files)
@@ -46,7 +56,7 @@ def sist2_incremental_index(files, func=None, incremental_index=False, *args):
func(path)
shutil.rmtree("test_i_inc", ignore_errors=True)
sist2("scan", path, "-o", "test_i_inc", "--incremental", "test_i", *args)
sist2("scan", path, "-o", "test_i_inc", "--incremental", "test_i", "-t12", *args)
return iter(sist2_index_to_dict("test_i_inc", incremental_index))
@@ -76,9 +86,31 @@ class ScanTest(unittest.TestCase):
pass
file_count = sum(1 for _ in sist2_index(TEST_FILES))
self.assertEqual(sum(1 for _ in sist2_incremental_index(TEST_FILES, remove_files)), file_count - 2)
self.assertEqual(sum(1 for _ in sist2_incremental_index(TEST_FILES, add_files, incremental_index=True)), 3)
self.assertEqual(sum(1 for _ in sist2_incremental_index(TEST_FILES, add_files)), file_count + 3)
lmdb_full = get_lmdb_contents("test_i/thumbs")
# Remove files
num_files_rm1 = len(list(sist2_incremental_index(TEST_FILES, remove_files)))
lmdb_rm1 = get_lmdb_contents("test_i_inc/thumbs")
self.assertEqual(num_files_rm1, file_count - 2)
self.assertEqual(len(set(lmdb_full.keys() - set(lmdb_rm1.keys()))), 2)
# add files (incremental_index=True)
num_files_add_inc = len(list(sist2_incremental_index(TEST_FILES, add_files, incremental_index=True)))
lmdb_add_inc = get_lmdb_contents("test_i_inc/thumbs")
self.assertEqual(num_files_add_inc, 3)
self.assertEqual(set(lmdb_full.keys()), set(lmdb_add_inc.keys()))
# add files
num_files_add = len(list(sist2_incremental_index(TEST_FILES, add_files)))
lmdb_add = get_lmdb_contents("test_i_inc/thumbs")
self.assertEqual(num_files_add, file_count + 3)
self.assertEqual(set(lmdb_full.keys()), set(lmdb_add.keys()))
# (No action)
sist2_incremental_index(TEST_FILES)
lmdb_inc = get_lmdb_contents("test_i_inc/thumbs")
self.assertEqual(set(lmdb_full.keys()), set(lmdb_inc.keys()))
if __name__ == "__main__":

View File

@@ -6,26 +6,11 @@ set(CMAKE_C_STANDARD 11)
option(BUILD_TESTS "Build tests" on)
add_subdirectory(third-party/antiword)
if (SIST_DEBUG)
add_compile_definitions(
antiword
DEBUG
)
target_compile_options(
antiword
PRIVATE
-g
-fstack-protector
-fno-omit-frame-pointer
-fsanitize=address
-fno-inline
)
else()
add_compile_definitions(
antiword
NDEBUG
)
endif()
set(USE_LIBXML2 OFF CACHE BOOL "" FORCE)
set(USE_XMLWRITER OFF CACHE BOOL "" FORCE)
set(BUILD_SHARED_LIBS OFF CACHE BOOL "" FORCE)
add_subdirectory(third-party/libmobi)
add_library(
scan
@@ -48,6 +33,54 @@ add_library(
libscan/mobi/scan_mobi.c libscan/mobi/scan_mobi.h libscan/raw/raw.c libscan/raw/raw.h)
set_target_properties(scan PROPERTIES LINKER_LANGUAGE C)
if (SIST_DEBUG)
add_compile_definitions(
antiword
DEBUG
)
target_compile_options(
antiword
PRIVATE
-g
-fstack-protector
-fno-omit-frame-pointer
-fsanitize=address
-fno-inline
)
elseif (SIST_FAST)
add_compile_definitions(
antiword
NDEBUG
)
target_compile_options(
scan
PRIVATE
-Ofast
-march=native
-fno-stack-protector
-fomit-frame-pointer
-freciprocal-math
)
else()
add_compile_definitions(
antiword
NDEBUG
)
target_compile_options(
scan
PRIVATE
-Ofast
#-march=native
-fno-stack-protector
-fomit-frame-pointer
#-freciprocal-math
)
endif()
set(CMAKE_FIND_LIBRARY_SUFFIXES .a .lib .so)
find_package(cJSON CONFIG REQUIRED)
@@ -85,35 +118,15 @@ target_compile_options(
-g
)
include(ExternalProject)
find_program(MAKE_EXE NAMES gmake nmake make)
ExternalProject_Add(
libmobi
GIT_REPOSITORY https://github.com/simon987/libmobi.git
GIT_TAG "public"
UPDATE_COMMAND ""
PATCH_COMMAND ""
TEST_COMMAND ""
CONFIGURE_COMMAND ./autogen.sh && ./configure
INSTALL_COMMAND ""
PREFIX "third-party/ext_libmobi"
SOURCE_DIR "third-party/ext_libmobi/src/libmobi"
BINARY_DIR "third-party/ext_libmobi/src/libmobi"
BUILD_COMMAND ${MAKE_EXE} -j 8 --silent
)
SET(MOBI_LIB_DIR ${CMAKE_CURRENT_BINARY_DIR}/third-party/ext_libmobi/src/libmobi/src/.libs/)
SET(MOBI_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/third-party/ext_libmobi/src/libmobi/src/)
if (SIST_DEBUG)
SET(FFMPEG_DEBUG "--enable-debug=3" "--disable-optimizations")
else()
SET(FFMPEG_DEBUG "")
endif()
include(ExternalProject)
find_program(MAKE_EXE NAMES gmake nmake make)
ExternalProject_Add(
ffmpeg
GIT_REPOSITORY https://git.ffmpeg.org/ffmpeg.git
@@ -159,10 +172,10 @@ SET(WPD_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/third-party/ext_libwpd/src/libwp
add_dependencies(
scan
libmobi
ffmpeg
antiword
libwpd
mobi
)
target_link_libraries(
@@ -180,8 +193,6 @@ target_link_libraries(
${MUPDF_LIB}
openjp2
${MOBI_LIB_DIR}/libmobi.a
${WPD_LIB_DIR}/libwpd-0.9.a
${WPD_LIB_DIR}/libwpd-stream-0.9.a
@@ -218,6 +229,7 @@ target_link_libraries(
${GUMBO_LIB}
dl
antiword
mobi
unofficial::pcre::pcre unofficial::pcre::pcre16 unofficial::pcre::pcre32 unofficial::pcre::pcrecpp
)

View File

@@ -202,7 +202,7 @@ scan_code_t parse_archive(scan_arc_ctx_t *ctx, vfile_t *f, document_t *doc, pcre
sub_job->vfile.logf = ctx->logf;
sub_job->vfile.has_checksum = FALSE;
sub_job->vfile.calculate_checksum = f->calculate_checksum;
memcpy(sub_job->parent, doc->path_md5, MD5_DIGEST_LENGTH);
strcpy(sub_job->parent, doc->doc_id);
while (archive_read_next_header(a, &entry) == ARCHIVE_OK) {
sub_job->vfile.info = *archive_entry_stat(entry);

View File

@@ -156,7 +156,7 @@ int render_cover(scan_ebook_ctx_t *ctx, fz_context *fzctx, document_t *doc, fz_d
avcodec_receive_packet(jpeg_encoder, &jpeg_packet);
APPEND_LONG_META(doc, MetaThumbnail, 1)
ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) jpeg_packet.data, jpeg_packet.size);
ctx->store(doc->doc_id, sizeof(doc->doc_id), (char *) jpeg_packet.data, jpeg_packet.size);
free(samples);
av_packet_unref(&jpeg_packet);

View File

@@ -232,7 +232,7 @@ void parse_font(scan_font_ctx_t *ctx, vfile_t *f, document_t *doc) {
bmp_format(&bmp_data, dimensions, bitmap);
APPEND_LONG_META(doc, MetaThumbnail, 1)
ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) bmp_data.buf, bmp_data.cur);
ctx->store(doc->doc_id, sizeof(doc->doc_id), (char *) bmp_data.buf, bmp_data.cur);
dyn_buffer_destroy(&bmp_data);
free(bitmap);

View File

@@ -20,8 +20,10 @@
#undef ABS
#define ABS(a) (((a) < 0) ? -(a) : (a))
#define SHA1_STR_LENGTH 41
#define SHA1_DIGEST_LENGTH 20
#define SHA1_DIGEST_LENGTH SHA_DIGEST_LENGTH
#define SHA1_STR_LENGTH (SHA1_DIGEST_LENGTH * 2 + 1)
#define MD5_STR_LENGTH (MD5_DIGEST_LENGTH * 2 + 1)
#define APPEND_STR_META(doc, keyname, value) \
{meta_line_t *meta_str = malloc(sizeof(meta_line_t) + strlen(value)); \

View File

@@ -4,7 +4,12 @@
#define MIN_SIZE 32
#define AVIO_BUF_SIZE 8192
#define IS_VIDEO(fmt) ((fmt)->iformat->name && strcmp((fmt)->iformat->name, "image2") != 0)
#define IS_VIDEO(fmt) ( \
(fmt)->iformat->name && strcmp((fmt)->iformat->name, "image2") != 0 \
&& strcmp((fmt)->iformat->name, "jpeg_pipe") != 0 \
&& strcmp((fmt)->iformat->name, "webp_pipe") != 0 \
&& strcmp((fmt)->iformat->name, "png_pipe") != 0 \
)
#define STORE_AS_IS ((void*)-1)
@@ -279,6 +284,7 @@ static void
append_video_meta(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx, AVFrame *frame, document_t *doc, int is_video) {
if (is_video) {
if (pFormatCtx->duration / AV_TIME_BASE != 0) {
meta_line_t *meta_duration = malloc(sizeof(meta_line_t));
meta_duration->key = MetaMediaDuration;
meta_duration->long_val = pFormatCtx->duration / AV_TIME_BASE;
@@ -286,12 +292,15 @@ append_video_meta(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx, AVFrame *f
meta_duration->long_val = 0;
}
APPEND_META(doc, meta_duration)
}
if (pFormatCtx->bit_rate != 0) {
meta_line_t *meta_bitrate = malloc(sizeof(meta_line_t));
meta_bitrate->key = MetaMediaBitrate;
meta_bitrate->long_val = pFormatCtx->bit_rate;
APPEND_META(doc, meta_bitrate)
}
}
AVDictionaryEntry *tag = NULL;
if (is_video) {
@@ -459,7 +468,7 @@ int decode_frame_and_save_thumbnail(scan_media_ctx_t *ctx, AVFormatContext *pFor
if (scaled_frame == STORE_AS_IS) {
return_value = SAVE_THUMBNAIL_OK;
ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) frame_and_packet->packet->data,
ctx->store((char *) doc->doc_id, sizeof(doc->doc_id), (char *) frame_and_packet->packet->data,
frame_and_packet->packet->size);
} else {
// Encode frame to jpeg
@@ -473,7 +482,7 @@ int decode_frame_and_save_thumbnail(scan_media_ctx_t *ctx, AVFormatContext *pFor
// Save thumbnail
if (thumbnail_index == 0) {
ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) jpeg_packet.data, jpeg_packet.size);
ctx->store((char *) doc->doc_id, sizeof(doc->doc_id), (char *) jpeg_packet.data, jpeg_packet.size);
return_value = SAVE_THUMBNAIL_OK;
} else if (thumbnail_index > 1) {
@@ -482,9 +491,8 @@ int decode_frame_and_save_thumbnail(scan_media_ctx_t *ctx, AVFormatContext *pFor
// I figure out a better fix.
thumbnail_index -= 1;
char tn_key[sizeof(doc->path_md5) + sizeof(int)];
memcpy(tn_key, doc->path_md5, sizeof(doc->path_md5));
memcpy(tn_key + sizeof(doc->path_md5), &thumbnail_index, sizeof(thumbnail_index));
char tn_key[sizeof(doc->doc_id) + sizeof(char) * 4];
snprintf(tn_key, sizeof(tn_key), "%s%04d", doc->doc_id, thumbnail_index);
ctx->store((char *) tn_key, sizeof(tn_key), (char *) jpeg_packet.data, jpeg_packet.size);
} else {
@@ -578,9 +586,10 @@ void parse_media_format_ctx(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx,
int video_duration_in_seconds = (int) (pFormatCtx->duration / AV_TIME_BASE);
int thumbnails_to_generate = (IS_VIDEO(pFormatCtx) && stream->codecpar->codec_id != AV_CODEC_ID_GIF && video_duration_in_seconds >= 15)
// Limit to ~1 thumbnail every 5s
? MAX(MIN(ctx->tn_count, video_duration_in_seconds / 5 + 1), 1) + 1
int thumbnails_to_generate = (IS_VIDEO(pFormatCtx) && stream->codecpar->codec_id != AV_CODEC_ID_GIF &&
video_duration_in_seconds >= 15)
// Limit to ~1 thumbnail every 7s
? MAX(MIN(ctx->tn_count, video_duration_in_seconds / 7 + 1), 1) + 1
: 1;
const double seek_increment = thumbnails_to_generate == 1
@@ -845,7 +854,7 @@ int store_image_thumbnail(scan_media_ctx_t *ctx, void *buf, size_t buf_len, docu
if (scaled_frame == STORE_AS_IS) {
APPEND_LONG_META(doc, MetaThumbnail, 1)
ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) frame_and_packet->packet->data,
ctx->store((char *) doc->doc_id, sizeof(doc->doc_id), (char *) frame_and_packet->packet->data,
frame_and_packet->packet->size);
} else {
// Encode frame to jpeg
@@ -859,7 +868,7 @@ int store_image_thumbnail(scan_media_ctx_t *ctx, void *buf, size_t buf_len, docu
// Save thumbnail
APPEND_LONG_META(doc, MetaThumbnail, 1)
ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) jpeg_packet.data, jpeg_packet.size);
ctx->store((char *) doc->doc_id, sizeof(doc->doc_id), (char *) jpeg_packet.data, jpeg_packet.size);
av_packet_unref(&jpeg_packet);
avcodec_free_context(&jpeg_encoder);

View File

@@ -1,6 +1,6 @@
#include "scan_mobi.h"
#include <mobi.h>
#include "../../third-party/libmobi/src/mobi.h"
#include <errno.h>
#include "stdlib.h"

View File

@@ -191,7 +191,7 @@ void read_thumbnail(scan_ooxml_ctx_t *ctx, document_t *doc, struct archive *a, s
archive_read_data(a, buf, entry_size);
APPEND_LONG_META(doc, MetaThumbnail, 1)
ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), buf, entry_size);
ctx->store((char *) doc->doc_id, sizeof(doc->doc_id), buf, entry_size);
free(buf);
}

View File

@@ -7,8 +7,22 @@
#define MIN_SIZE 32
int store_thumbnail_jpeg(scan_raw_ctx_t *ctx, libraw_processed_image_t *img, document_t *doc) {
return store_image_thumbnail((scan_media_ctx_t *) ctx, img->data, img->data_size, doc, "x.jpeg");
int store_thumbnail_jpeg(scan_raw_ctx_t *ctx, libraw_thumbnail_t img, document_t *doc) {
scan_media_ctx_t media_ctx = {
.read_subtitles = FALSE,
.tn_count = 1,
.max_media_buffer = 0,
.store = ctx->store,
.log = ctx->log,
.logf = ctx->logf,
.tn_size = ctx->tn_size,
.tn_qscale = ctx->tn_qscale,
.tesseract_lang = NULL,
.tesseract_path = NULL
};
return store_image_thumbnail(&media_ctx, img.thumb, img.tlength, doc, "x.jpeg");
}
int store_thumbnail_rgb24(scan_raw_ctx_t *ctx, libraw_processed_image_t *img, document_t *doc) {
@@ -70,7 +84,7 @@ int store_thumbnail_rgb24(scan_raw_ctx_t *ctx, libraw_processed_image_t *img, do
avcodec_receive_packet(jpeg_encoder, &jpeg_packet);
APPEND_LONG_META(doc, MetaThumbnail, 1)
ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) jpeg_packet.data, jpeg_packet.size);
ctx->store((char *) doc->doc_id, sizeof(doc->doc_id), (char *) jpeg_packet.data, jpeg_packet.size);
av_packet_unref(&jpeg_packet);
av_free(*scaled_frame->data);
@@ -171,6 +185,13 @@ void parse_raw(scan_raw_ctx_t *ctx, vfile_t *f, document_t *doc) {
return;
}
int tn_ok = 0;
if (libraw_lib->thumbnail.tformat == LIBRAW_THUMBNAIL_JPEG) {
tn_ok = store_thumbnail_jpeg(ctx, libraw_lib->thumbnail, doc);
} else if (libraw_lib->thumbnail.tformat == LIBRAW_THUMBNAIL_BITMAP) {
// TODO: technically this should work but is currently untested
int errc = 0;
libraw_processed_image_t *thumb = libraw_dcraw_make_mem_thumb(libraw_lib, &errc);
if (errc != 0) {
@@ -180,16 +201,9 @@ void parse_raw(scan_raw_ctx_t *ctx, vfile_t *f, document_t *doc) {
return;
}
int tn_ok = 0;
if (libraw_lib->thumbnail.tformat == LIBRAW_THUMBNAIL_JPEG) {
tn_ok = store_thumbnail_jpeg(ctx, thumb, doc);
} else if (libraw_lib->thumbnail.tformat == LIBRAW_THUMBNAIL_BITMAP) {
// TODO: technically this should work but is currently untested
tn_ok = store_thumbnail_rgb24(ctx, thumb, doc);
}
libraw_dcraw_clear_mem(thumb);
if (tn_ok == TRUE) {
free(buf);
libraw_close(libraw_lib);
@@ -206,7 +220,7 @@ void parse_raw(scan_raw_ctx_t *ctx, vfile_t *f, document_t *doc) {
libraw_dcraw_process(libraw_lib);
errc = 0;
int errc = 0;
libraw_processed_image_t *img = libraw_dcraw_make_mem_image(libraw_lib, &errc);
if (errc != 0) {
free(buf);

View File

@@ -48,6 +48,9 @@ typedef int scan_code_t;
#define CTX_LOG_FATALF(filepath, fmt, ...) ctx->logf(filepath, LEVEL_FATAL, fmt, __VA_ARGS__); exit(-1);
#define CTX_LOG_FATAL(filepath, str) ctx->log(filepath, LEVEL_FATAL, str); exit(-1);
#define SIST_DOC_ID_LEN MD5_STR_LENGTH
#define SIST_INDEX_ID_LEN MD5_STR_LENGTH
enum metakey {
// String
MetaContent = 1,
@@ -103,7 +106,7 @@ typedef struct meta_line {
typedef struct document {
unsigned char path_md5[MD5_DIGEST_LENGTH];
char doc_id[SIST_DOC_ID_LEN];
unsigned long size;
unsigned int mime;
int mtime;
@@ -159,7 +162,7 @@ typedef struct parse_job_t {
int base;
int ext;
struct vfile vfile;
unsigned char parent[MD5_DIGEST_LENGTH];
char parent[SIST_DOC_ID_LEN];
char filepath[1];
} parse_job_t;

View File

@@ -923,7 +923,6 @@ TEST(Msdoc, Test1Pdf) {
ASSERT_TRUE(strstr(get_meta(&doc, MetaContent)->str_val, "October 2000") != nullptr);
ASSERT_STREQ(get_meta(&doc, MetaTitle)->str_val, "INTERNATIONAL ORGANIZATION FOR STANDARDIZATION");
ASSERT_STREQ(get_meta(&doc, MetaAuthor)->str_val, "Oliver Morgan");
ASSERT_EQ(get_meta(&doc, MetaPages)->long_val, 57);
ASSERT_NEAR(strlen(get_meta(&doc, MetaContent)->str_val), msdoc_ctx.content_size, 4);
ASSERT_NE(size_before, store_size);
@@ -1030,6 +1029,23 @@ TEST(Msdoc, TestUtf8Text) {
cleanup(&doc, &f);
}
TEST(Msdoc, Test5Pdf) {
vfile_t f;
document_t doc;
load_doc_file("libscan-test-files/test_files/msdoc/test5.doc", &f, &doc);
size_t size_before = store_size;
parse_msdoc(&msdoc_ctx, &f, &doc);
ASSERT_TRUE(strstr(get_meta(&doc, MetaContent)->str_val, "орган Федеральной") != nullptr);
ASSERT_STREQ(get_meta(&doc, MetaAuthor)->str_val, "uswo");
ASSERT_NEAR(strlen(get_meta(&doc, MetaContent)->str_val), msdoc_ctx.content_size, 4);
ASSERT_NE(size_before, store_size);
cleanup(&doc, &f);
}
TEST(Msdoc, TestFuzz1) {
vfile_t f;
document_t doc;
@@ -1190,3 +1206,6 @@ int main(int argc, char **argv) {
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}
// 0x6130000d2580
// "/mnt/Hatchery/m ain/downloads/qbittorrent/downloads/Roskomnadzor/УПРАВЛЕНИЕ РОСКОМНАДЗОРА по РБ.zip#/УПРАВЛЕНИЕ РОСКОМНАДЗОРА по РБ/Лопатин Ю.М/Секнин/2015 год/Обучение по ", <incomplete sequence \320>...