Compare commits

..

32 Commits

Author SHA1 Message Date
4e1109c528 Merge pull request #288 from simon987/dev
v2.12.1
2022-04-23 10:30:19 -04:00
f87de89275 Version bump 2022-04-23 10:29:50 -04:00
1205981a11 CURL error handling, fix ES version handling, support for ES8, add --es-insecure-ssl argument 2022-04-23 10:29:31 -04:00
09613eaaf9 import magic database as a blob as last resort to make it work 2022-04-18 12:55:22 -04:00
a74726be55 Merge pull request #285 from simon987/dependabot/npm_and_yarn/sist2-vue/async-2.6.4
Bump async from 2.6.3 to 2.6.4 in /sist2-vue
2022-04-17 13:42:40 -04:00
dependabot[bot]
cb228052d2 Bump async from 2.6.3 to 2.6.4 in /sist2-vue
Bumps [async](https://github.com/caolan/async) from 2.6.3 to 2.6.4.
- [Release notes](https://github.com/caolan/async/releases)
- [Changelog](https://github.com/caolan/async/blob/v2.6.4/CHANGELOG.md)
- [Commits](https://github.com/caolan/async/compare/v2.6.3...v2.6.4)

---
updated-dependencies:
- dependency-name: async
  dependency-type: indirect
...

Signed-off-by: dependabot[bot] <support@github.com>
2022-04-17 17:41:14 +00:00
fe56da95d5 Merge pull request #284 from simon987/dev
v2.12.0
2022-04-17 13:38:42 -04:00
9f2ad58f78 bump version 2022-04-17 12:30:14 -04:00
84d9bf4323 Fix cmake libmobi build maybe 2022-04-17 12:23:45 -04:00
90aa90f3f3 Update antiword 2022-04-17 11:47:33 -04:00
3fad07360c Merge pull request #283 from simon987/dependabot/npm_and_yarn/sist2-vue/minimist-1.2.6
Bump minimist from 1.2.5 to 1.2.6 in /sist2-vue
2022-04-17 10:12:10 -04:00
dependabot[bot]
00c3a640d0 Bump minimist from 1.2.5 to 1.2.6 in /sist2-vue
Bumps [minimist](https://github.com/substack/minimist) from 1.2.5 to 1.2.6.
- [Release notes](https://github.com/substack/minimist/releases)
- [Commits](https://github.com/substack/minimist/compare/1.2.5...1.2.6)

---
updated-dependencies:
- dependency-name: minimist
  dependency-type: indirect
...

Signed-off-by: dependabot[bot] <support@github.com>
2022-04-17 12:53:12 +00:00
730e495bde Enable highlight in document info modal, remove /d/ endpoint 2022-04-16 16:11:17 -04:00
54df1dfcf7 Fix spacebar not working in search bar 2022-04-16 13:51:36 -04:00
a75675ecea Fix thumbnail copy bug, update tests 2022-04-16 11:48:43 -04:00
901035da15 Build libmobi with cmake, update to 0.10 2022-04-15 16:01:40 -04:00
ceb7265639 Fix max_analyzed_offset (again?) 2022-04-15 15:35:39 -04:00
036ed9ea1e Update libmagic cmake things 2022-04-15 15:35:20 -04:00
779303a2f7 Print body response when task id cannot be read 2022-04-14 16:24:56 -04:00
23aee14c07 Fix exec-script & fix memory leak in exec_args_validate 2022-04-14 15:43:24 -04:00
50b9201be3 Merge pull request #279 from simon987/dependabot/npm_and_yarn/sist2-vue/minimist-1.2.6
Bump minimist from 1.2.5 to 1.2.6 in /sist2-vue
2022-04-05 20:12:03 -04:00
dependabot[bot]
14cfb15661 Bump minimist from 1.2.5 to 1.2.6 in /sist2-vue
Bumps [minimist](https://github.com/substack/minimist) from 1.2.5 to 1.2.6.
- [Release notes](https://github.com/substack/minimist/releases)
- [Commits](https://github.com/substack/minimist/compare/1.2.5...1.2.6)

---
updated-dependencies:
- dependency-name: minimist
  dependency-type: indirect
...

Signed-off-by: dependabot[bot] <support@github.com>
2022-03-31 23:28:25 +00:00
125c85d9bb localize tag filter bar 2022-03-18 09:15:07 -04:00
474eb95aff Update antiword 2022-03-17 15:08:55 -04:00
acf7453057 Add test for large msdoc 2022-03-17 15:05:48 -04:00
9a949d2694 Use TRUE rather than 1 2022-03-17 09:13:19 -04:00
dbdc75dcb8 Add filter bar in tag picker 2022-03-17 09:12:43 -04:00
c575fca91d Do not store duration or bitrate when the value is 0 or for images 2022-03-05 21:24:59 -05:00
0bf4244683 Do blank search on page reload when media tab auto-reload is disabled 2022-03-05 20:56:02 -05:00
eea5ce75f3 Fix query args updating outside of the search page 2022-03-05 20:42:13 -05:00
9b81856353 Fix some errors in keyboard handler 2022-03-05 20:33:45 -05:00
a10d6952ba Fix segfault in print_errors() 2022-03-05 20:33:21 -05:00
45 changed files with 537 additions and 298 deletions

3
.gitmodules vendored
View File

@@ -7,3 +7,6 @@
[submodule "third-party/libscan/third-party/antiword"] [submodule "third-party/libscan/third-party/antiword"]
path = third-party/libscan/third-party/antiword path = third-party/libscan/third-party/antiword
url = https://github.com/simon987/antiword url = https://github.com/simon987/antiword
[submodule "third-party/libscan/third-party/libmobi"]
path = third-party/libscan/third-party/libmobi
url = https://github.com/bfabiszewski/libmobi

View File

@@ -4,6 +4,7 @@ set(CMAKE_C_STANDARD 11)
project(sist2 C) project(sist2 C)
option(SIST_DEBUG "Build a debug executable" on) option(SIST_DEBUG "Build a debug executable" on)
option(SIST_FAST "Enable more optimisation flags" off)
option(SIST_FAKE_STORE "Disable IO operations of LMDB stores for debugging purposes" 0) option(SIST_FAKE_STORE "Disable IO operations of LMDB stores for debugging purposes" 0)
add_compile_definitions( add_compile_definitions(
@@ -54,6 +55,10 @@ find_package(lmdb CONFIG REQUIRED)
find_package(cJSON CONFIG REQUIRED) find_package(cJSON CONFIG REQUIRED)
find_package(unofficial-mongoose CONFIG REQUIRED) find_package(unofficial-mongoose CONFIG REQUIRED)
find_package(CURL CONFIG REQUIRED) find_package(CURL CONFIG REQUIRED)
find_library(MAGIC_LIB
NAMES libmagic.so.1 magic
PATHS /usr/lib/x86_64-linux-gnu/ /usr/lib/aarch64-linux-gnu/
)
target_include_directories( target_include_directories(
@@ -93,16 +98,25 @@ if (SIST_DEBUG)
PROPERTIES PROPERTIES
OUTPUT_NAME sist2_debug OUTPUT_NAME sist2_debug
) )
elseif (SIST_FAST)
target_compile_options(
sist2
PRIVATE
-Ofast
-march=native
-fno-stack-protector
-fomit-frame-pointer
-freciprocal-math
)
else () else ()
target_compile_options( target_compile_options(
sist2 sist2
PRIVATE PRIVATE
-Ofast -Ofast
#-march=native
-fno-stack-protector -fno-stack-protector
-fomit-frame-pointer -fomit-frame-pointer
#-freciprocal-math
) )
endif () endif ()
@@ -124,13 +138,12 @@ target_link_libraries(
CURL::libcurl CURL::libcurl
pthread pthread
#magic
c c
scan scan
/usr/lib/x86_64-linux-gnu/libmagic.so.1 ${MAGIC_LIB}
) )
add_custom_target( add_custom_target(

View File

@@ -52,7 +52,7 @@ sist2 (Simple incremental search tool)
Select the file corresponding to your CPU architecture and mark the binary as executable with `chmod +x` * Select the file corresponding to your CPU architecture and mark the binary as executable with `chmod +x` *
2. *(or)* Download a [development snapshot](https://files.simon987.net/.gate/sist2/simon987_sist2/) *(Not 2. *(or)* Download a [development snapshot](https://files.simon987.net/.gate/sist2/simon987_sist2/) *(Not
recommended!)* recommended!)*
3. *(or)* `docker pull simon987/sist2:2.11.7-x64-linux` 3. *(or)* `docker pull simon987/sist2:2.12.1-x64-linux`
1. See [Usage guide](docs/USAGE.md) 1. See [Usage guide](docs/USAGE.md)

View File

@@ -292,7 +292,7 @@ Both the `root` and `rewrite_url` fields are safe to manually modify from the
# Elasticsearch # Elasticsearch
Elasticsearch versions >=6.8.0, <8.0.0 are supported by sist2. Elasticsearch versions >=6.8.0, 7.X.X and 8.X.X are supported by sist2.
Using a version >=7.14.0 is recommended to enable the following features: Using a version >=7.14.0 is recommended to enable the following features:

View File

@@ -3,7 +3,7 @@
"refresh_interval": "30s", "refresh_interval": "30s",
"codec": "best_compression", "codec": "best_compression",
"number_of_replicas": 0, "number_of_replicas": 0,
"highlight.max_analyzed_offset": 10000000 "highlight.max_analyzed_offset": 1000000
}, },
"analysis": { "analysis": {
"tokenizer": { "tokenizer": {
@@ -16,7 +16,7 @@
"delimiter": "." "delimiter": "."
}, },
"my_nGram_tokenizer": { "my_nGram_tokenizer": {
"type": "nGram", "type": "ngram",
"min_gram": 3, "min_gram": 3,
"max_gram": 3 "max_gram": 3
} }
@@ -55,37 +55,5 @@
] ]
} }
} }
},
"mappings": {
"dynamic_templates": [
{
"keyword_fields": {
"match_mapping_type": "string",
"match": "kw_*",
"mapping": {
"type": "keyword"
}
}
},
{
"integer_fields": {
"match_mapping_type": "*",
"match": "int_*",
"mapping": {
"type": "integer"
}
}
},
{
"meta_fields": {
"match_mapping_type": "*",
"match": "mt_*",
"mapping": {
"type": "keyword",
"index": false
}
}
}
]
} }
} }

View File

@@ -5,5 +5,6 @@ rm -rf index.sist2/
python3 scripts/mime.py > src/parsing/mime_generated.c python3 scripts/mime.py > src/parsing/mime_generated.c
python3 scripts/serve_static.py > src/web/static_generated.c python3 scripts/serve_static.py > src/web/static_generated.c
python3 scripts/index_static.py > src/index/static_generated.c python3 scripts/index_static.py > src/index/static_generated.c
python3 scripts/magic_static.py > src/magic_generated.c
printf "static const char *const Sist2CommitHash = \"%s\";\n" $(git rev-parse HEAD) > src/git_hash.h printf "static const char *const Sist2CommitHash = \"%s\";\n" $(git rev-parse HEAD) > src/git_hash.h

8
scripts/magic_static.py Normal file
View File

@@ -0,0 +1,8 @@
try:
with open("/usr/lib/file/magic.mgc", "rb") as f:
data = f.read()
except:
data = bytes([])
print("char magic_database_buffer[%d] = {%s};" % (len(data), ",".join(str(int(b)) for b in data)))

3
scripts/start_dev_es_6.sh Executable file
View File

@@ -0,0 +1,3 @@
docker run --rm -it --name "sist2-dev-es-6"\
-p 9202:9200 -e "discovery.type=single-node" \
-e "ES_JAVA_OPTS=-Xms8g -Xmx8g" elasticsearch:6.8.0

3
scripts/start_dev_es_8.sh Executable file
View File

@@ -0,0 +1,3 @@
docker run --rm -it --name "sist2-dev-es"\
-p 9200:9200 -p 9300:9300 -e "discovery.type=single-node" \
-e "ES_JAVA_OPTS=-Xms8g -Xmx8g" elasticsearch:8.1.2

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@@ -3288,9 +3288,9 @@
} }
}, },
"node_modules/async": { "node_modules/async": {
"version": "2.6.3", "version": "2.6.4",
"resolved": "https://registry.npmjs.org/async/-/async-2.6.3.tgz", "resolved": "https://registry.npmjs.org/async/-/async-2.6.4.tgz",
"integrity": "sha512-zflvls11DCy+dQWzTW2dzuilv8Z5X/pjfmZOWba6TNIVDm+2UDaJmXSOXlasHKfNBs8oo3M0aT50fDEWfKZjXg==", "integrity": "sha512-mzo5dfJYwAn29PeiJ0zvwTo04zj8HDJj0Mn8TD7sno7q12prdbnasKJHhkm2c1LgrhlJ0teaea8860oxi51mGA==",
"dev": true, "dev": true,
"dependencies": { "dependencies": {
"lodash": "^4.17.14" "lodash": "^4.17.14"
@@ -9736,9 +9736,9 @@
} }
}, },
"node_modules/minimist": { "node_modules/minimist": {
"version": "1.2.5", "version": "1.2.6",
"resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.5.tgz", "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.6.tgz",
"integrity": "sha512-FM9nNUYrRBAELZQT3xeZQ7fmMOBg6nWNmJKTcgsJeaLstP/UODVpGsr5OhXhhXg6f+qtJ8uiZ+PUxkDWcgIXLw==", "integrity": "sha512-Jsjnk4bw3YJqYzbdyBiNsPWHPfO++UGG749Cxs6peCu5Xg4nrena6OVxOYxrQTqww0Jmwt+Ref8rggumkTLz9Q==",
"dev": true "dev": true
}, },
"node_modules/minipass": { "node_modules/minipass": {
@@ -17937,9 +17937,9 @@
"dev": true "dev": true
}, },
"async": { "async": {
"version": "2.6.3", "version": "2.6.4",
"resolved": "https://registry.npmjs.org/async/-/async-2.6.3.tgz", "resolved": "https://registry.npmjs.org/async/-/async-2.6.4.tgz",
"integrity": "sha512-zflvls11DCy+dQWzTW2dzuilv8Z5X/pjfmZOWba6TNIVDm+2UDaJmXSOXlasHKfNBs8oo3M0aT50fDEWfKZjXg==", "integrity": "sha512-mzo5dfJYwAn29PeiJ0zvwTo04zj8HDJj0Mn8TD7sno7q12prdbnasKJHhkm2c1LgrhlJ0teaea8860oxi51mGA==",
"dev": true, "dev": true,
"requires": { "requires": {
"lodash": "^4.17.14" "lodash": "^4.17.14"
@@ -23324,9 +23324,9 @@
} }
}, },
"minimist": { "minimist": {
"version": "1.2.5", "version": "1.2.6",
"resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.5.tgz", "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.6.tgz",
"integrity": "sha512-FM9nNUYrRBAELZQT3xeZQ7fmMOBg6nWNmJKTcgsJeaLstP/UODVpGsr5OhXhhXg6f+qtJ8uiZ+PUxkDWcgIXLw==", "integrity": "sha512-Jsjnk4bw3YJqYzbdyBiNsPWHPfO++UGG749Cxs6peCu5Xg4nrena6OVxOYxrQTqww0Jmwt+Ref8rggumkTLz9Q==",
"dev": true "dev": true
}, },
"minipass": { "minipass": {

View File

@@ -336,10 +336,6 @@ class Sist2Api {
}; };
} }
getDocInfo(docId: string) {
return axios.get(`${this.baseUrl}d/${docId}`);
}
getTags() { getTags() {
return this.esQuery({ return this.esQuery({
aggs: { aggs: {

View File

@@ -69,7 +69,7 @@ interface SortMode {
class Sist2Query { class Sist2Query {
searchQuery(): any { searchQuery(blankSearch: boolean = false): any {
const getters = store.getters; const getters = store.getters;
@@ -93,22 +93,6 @@ class Sist2Query {
{terms: {index: selectedIndexIds}} {terms: {index: selectedIndexIds}}
] as any[]; ] as any[];
if (sizeMin && sizeMax) {
filters.push({range: {size: {gte: sizeMin, lte: sizeMax}}})
} else if (sizeMin) {
filters.push({range: {size: {gte: sizeMin}}})
} else if (sizeMax) {
filters.push({range: {size: {lte: sizeMax}}})
}
if (dateMin && dateMax) {
filters.push({range: {mtime: {gte: dateMin, lte: dateMax}}})
} else if (dateMin) {
filters.push({range: {mtime: {gte: dateMin}}})
} else if (dateMax) {
filters.push({range: {mtime: {lte: dateMax}}})
}
const fields = [ const fields = [
"name^8", "name^8",
"content^3", "content^3",
@@ -128,20 +112,39 @@ class Sist2Query {
fields.push("name.nGram^3"); fields.push("name.nGram^3");
} }
const path = pathText.replace(/\/$/, "").toLowerCase(); //remove trailing slashes if (!blankSearch) {
if (path !== "") { if (sizeMin && sizeMax) {
filters.push({term: {path: path}}) filters.push({range: {size: {gte: sizeMin, lte: sizeMax}}})
} } else if (sizeMin) {
filters.push({range: {size: {gte: sizeMin}}})
} else if (sizeMax) {
filters.push({range: {size: {lte: sizeMax}}})
}
if (selectedMimeTypes.length > 0) { if (dateMin && dateMax) {
filters.push({terms: {"mime": selectedMimeTypes}}); filters.push({range: {mtime: {gte: dateMin, lte: dateMax}}})
} } else if (dateMin) {
filters.push({range: {mtime: {gte: dateMin}}})
} else if (dateMax) {
filters.push({range: {mtime: {lte: dateMax}}})
}
if (selectedTags.length > 0) { const path = pathText.replace(/\/$/, "").toLowerCase(); //remove trailing slashes
if (getters.optTagOrOperator) {
filters.push({terms: {"tag": selectedTags}}); if (path !== "") {
} else { filters.push({term: {path: path}})
selectedTags.forEach((tag: string) => filters.push({term: {"tag": tag}})); }
if (selectedMimeTypes.length > 0) {
filters.push({terms: {"mime": selectedMimeTypes}});
}
if (selectedTags.length > 0) {
if (getters.optTagOrOperator) {
filters.push({terms: {"tag": selectedTags}});
} else {
selectedTags.forEach((tag: string) => filters.push({term: {"tag": tag}}));
}
} }
} }
@@ -182,7 +185,7 @@ class Sist2Query {
size: size, size: size,
} as any; } as any;
if (!empty) { if (!empty && !blankSearch) {
q.query.bool.must = query; q.query.bool.must = query;
} }
@@ -207,7 +210,7 @@ class Sist2Query {
}; };
if (!legacyES) { if (!legacyES) {
q.highlight.max_analyzed_offset = 9_999_999; q.highlight.max_analyzed_offset = 999_999;
} }
if (getters.optSearchInPath) { if (getters.optSearchInPath) {
@@ -237,7 +240,7 @@ class Sist2Query {
} }
} }
if (!empty) { if (!empty && !blankSearch) {
q.query.function_score.query.bool.must.push(query); q.query.function_score.query.bool.must.push(query);
} }
} }

View File

@@ -1,11 +1,13 @@
<template> <template>
<Preloader v-if="loading"></Preloader> <Preloader v-if="loading"></Preloader>
<div v-else-if="content" class="content-div">{{ content }}</div> <div v-else-if="content" class="content-div" v-html="content"></div>
</template> </template>
<script> <script>
import Sist2Api from "@/Sist2Api"; import Sist2Api from "@/Sist2Api";
import Preloader from "@/components/Preloader"; import Preloader from "@/components/Preloader";
import Sist2Query from "@/Sist2Query";
import store from "@/store";
export default { export default {
name: "LazyContentDiv", name: "LazyContentDiv",
@@ -18,10 +20,72 @@ export default {
} }
}, },
mounted() { mounted() {
Sist2Api.getDocInfo(this.docId).then(src => { const query = Sist2Query.searchQuery();
this.content = src.data.content;
if (this.$store.state.optHighlight) {
const fields = this.$store.state.fuzzy
? {"content.nGram": {}}
: {content: {}};
query.highlight = {
pre_tags: ["<mark>"],
post_tags: ["</mark>"],
number_of_fragments: 0,
fields,
};
if (!store.state.sist2Info.esVersionLegacy) {
query.highlight.max_analyzed_offset = 999_999;
}
}
if ("function_score" in query.query) {
query.query = query.query.function_score.query;
}
if (!("must" in query.query.bool)) {
query.query.bool.must = [];
} else if (!Array.isArray(query.query.bool.must)) {
query.query.bool.must = [query.query.bool.must];
}
query.query.bool.must.push({match: {_id: this.docId}});
delete query["sort"];
delete query["aggs"];
delete query["search_after"];
delete query.query["function_score"];
query._source = {
includes: ["content", "name", "path", "extension"]
}
query.size = 1;
Sist2Api.esQuery(query).then(resp => {
this.loading = false; this.loading = false;
}) if (resp.hits.hits.length === 1) {
this.content = this.getContent(resp.hits.hits[0]);
} else {
console.log("FIXME: could not get content")
console.log(resp)
}
});
},
methods: {
getContent(doc) {
if (!doc.highlight) {
return doc._source.content;
}
if (doc.highlight["content.nGram"]) {
return doc.highlight["content.nGram"][0];
}
if (doc.highlight.content) {
return doc.highlight.content[0];
}
}
} }
} }
</script> </script>

View File

@@ -81,7 +81,9 @@ export default {
methods: { methods: {
keyDownListener(e) { keyDownListener(e) {
if (this.$refs.lightbox === undefined) { const isLightboxOpen = this.$refs.lightbox === undefined || this.$refs.lightbox.$el.tagName === undefined;
if (isLightboxOpen) {
return true; return true;
} }
@@ -89,7 +91,6 @@ export default {
switch (e.key) { switch (e.key) {
case " ": { case " ": {
console.log("SPACE")
e.preventDefault(); e.preventDefault();
e.stopPropagation(); e.stopPropagation();
e.stopImmediatePropagation(); e.stopImmediatePropagation();
@@ -98,16 +99,12 @@ export default {
[...document.getElementsByClassName("fslightbox-absoluted")].forEach(elem => { [...document.getElementsByClassName("fslightbox-absoluted")].forEach(elem => {
if (elem.style.transform === "translate(0px)" || elem.style.transform === "translate(0px, 0px)") { if (elem.style.transform === "translate(0px)" || elem.style.transform === "translate(0px, 0px)") {
const vid = elem.getElementsByTagName("video")[0]; const vid = elem.getElementsByTagName("video")[0];
console.log(elem)
console.log(vid)
if (vid) { if (vid) {
if (vid.paused) { if (vid.paused) {
vid.play(); vid.play();
console.log("PLAY")
} else { } else {
vid.pause() vid.pause()
console.log("PAUSE")
} }
} }
} }
@@ -119,24 +116,28 @@ export default {
} }
case "ArrowUp": case "ArrowUp":
case "k": { case "k": {
if (!lightboxStore.data.isThumbing) { if (!lightboxStore.data.isThumbing && lightboxStore.core.thumbsToggler) {
lightboxStore.core.thumbsToggler.toggleThumbs(); lightboxStore.core.thumbsToggler.toggleThumbs();
} }
return false; return false;
} }
case "ArrowDown": case "ArrowDown":
case "j": { case "j": {
if (lightboxStore.data.isThumbing) { if (lightboxStore.data.isThumbing && lightboxStore.core.thumbsToggler) {
lightboxStore.core.thumbsToggler.toggleThumbs(); lightboxStore.core.thumbsToggler.toggleThumbs();
} }
return false; return false;
} }
case "h": { case "h": {
lightboxStore.core.slideIndexChanger.jumpTo(lightboxStore.core.stageManager.getPreviousSlideIndex()); if (lightboxStore.core.stageManager.getPreviousSlideIndex) {
break; lightboxStore.core.slideIndexChanger.jumpTo(lightboxStore.core.stageManager.getPreviousSlideIndex());
}
return false;
} }
case "l": { case "l": {
lightboxStore.core.slideIndexChanger.jumpTo(lightboxStore.core.stageManager.getNextSlideIndex()); if (lightboxStore.core.stageManager.getNextSlideIndex) {
lightboxStore.core.slideIndexChanger.jumpTo(lightboxStore.core.stageManager.getNextSlideIndex());
}
return false; return false;
} }
} }

View File

@@ -1,5 +1,13 @@
<template> <template>
<div id="tagTree"></div> <div>
<b-input-group v-if="showSearchBar" id="tag-picker-filter-bar">
<b-form-input :value="filter"
:placeholder="$t('tagFilter')"
@input="onFilter($event)"></b-form-input>
</b-input-group>
<div id="tagTree"></div>
</div>
</template> </template>
<script> <script>
@@ -112,10 +120,12 @@ function addTag(map, tag, id, count) {
export default { export default {
name: "TagPicker", name: "TagPicker",
props: ["showSearchBar"],
data() { data() {
return { return {
tagTree: null, tagTree: null,
loadedFromArgs: false, loadedFromArgs: false,
filter: ""
} }
}, },
mounted() { mounted() {
@@ -129,6 +139,10 @@ export default {
}); });
}, },
methods: { methods: {
onFilter(value) {
this.filter = value;
this.tagTree.search(value);
},
initializeTree() { initializeTree() {
const tagMap = []; const tagMap = [];
this.tagTree = new InspireTree({ this.tagTree = new InspireTree({
@@ -163,7 +177,8 @@ export default {
}); });
}, },
handleTreeClick(node, e) { handleTreeClick(node, e) {
if (e === "indeterminate" || e === "collapsed" || e === 'rendered' || e === "focused") { if (e === "indeterminate" || e === "collapsed" || e === 'rendered' || e === "focused"
|| e === "matched" || e === "hidden") {
return; return;
} }
@@ -180,7 +195,15 @@ export default {
} }
</style> </style>
<style> <style>
.inspire-tree .focused>.wholerow { .inspire-tree .focused > .wholerow {
border: none; border: none;
} }
#tag-picker-filter-bar {
padding: 10px 4px 4px;
}
.theme-black .inspire-tree .matched > .wholerow {
background: rgba(251, 191, 41, 0.25);
}
</style> </style>

View File

@@ -16,6 +16,7 @@ export default {
pages: "pages", pages: "pages",
mimeTypes: "Media types", mimeTypes: "Media types",
tags: "Tags", tags: "Tags",
tagFilter: "Filter tags",
help: { help: {
simpleSearch: "Simple search", simpleSearch: "Simple search",
advancedSearch: "Advanced search", advancedSearch: "Advanced search",
@@ -74,6 +75,7 @@ export default {
useDatePicker: "Use a Date Picker component rather than a slider", useDatePicker: "Use a Date Picker component rather than a slider",
vidPreviewInterval: "Video preview frame duration in ms", vidPreviewInterval: "Video preview frame duration in ms",
simpleLightbox: "Disable animations in image viewer", simpleLightbox: "Disable animations in image viewer",
showTagPickerFilter: "Display the tag filter bar"
}, },
queryMode: { queryMode: {
simple: "Simple", simple: "Simple",
@@ -183,6 +185,7 @@ export default {
pages: "pages", pages: "pages",
mimeTypes: "Types de médias", mimeTypes: "Types de médias",
tags: "Tags", tags: "Tags",
tagFilter: "Filtrer les tags",
help: { help: {
simpleSearch: "Recherche simple", simpleSearch: "Recherche simple",
advancedSearch: "Recherche avancée", advancedSearch: "Recherche avancée",
@@ -242,6 +245,7 @@ export default {
useDatePicker: "Afficher un composant « Date Picker » plutôt qu'un slider", useDatePicker: "Afficher un composant « Date Picker » plutôt qu'un slider",
vidPreviewInterval: "Durée des images d'aperçu video en millisecondes", vidPreviewInterval: "Durée des images d'aperçu video en millisecondes",
simpleLightbox: "Désactiver les animations du visualiseur d'images", simpleLightbox: "Désactiver les animations du visualiseur d'images",
showTagPickerFilter: "Afficher le filtre dans l'onglet Tags"
}, },
queryMode: { queryMode: {
simple: "Simple", simple: "Simple",
@@ -352,6 +356,7 @@ export default {
pages: "页", pages: "页",
mimeTypes: "文件类型", mimeTypes: "文件类型",
tags: "标签", tags: "标签",
tagFilter: "筛选标签",
help: { help: {
simpleSearch: "简易搜索", simpleSearch: "简易搜索",
advancedSearch: "高级搜索", advancedSearch: "高级搜索",
@@ -410,6 +415,7 @@ export default {
useDatePicker: "使用日期选择器组件而不是滑块", useDatePicker: "使用日期选择器组件而不是滑块",
vidPreviewInterval: "视频预览帧的持续时间,以毫秒为单位", vidPreviewInterval: "视频预览帧的持续时间,以毫秒为单位",
simpleLightbox: "在图片查看器中,禁用动画", simpleLightbox: "在图片查看器中,禁用动画",
showTagPickerFilter: "显示标签过滤栏"
}, },
queryMode: { queryMode: {
simple: "简单", simple: "简单",

View File

@@ -4,6 +4,8 @@ import VueRouter, {Route} from "vue-router";
import {EsHit, EsResult, EsTag, Index, Tag} from "@/Sist2Api"; import {EsHit, EsResult, EsTag, Index, Tag} from "@/Sist2Api";
import {deserializeMimes, serializeMimes} from "@/util"; import {deserializeMimes, serializeMimes} from "@/util";
const CONF_VERSION = 2;
Vue.use(Vuex) Vue.use(Vuex)
export default new Vuex.Store({ export default new Vuex.Store({
@@ -24,7 +26,6 @@ export default new Vuex.Store({
sortMode: "score", sortMode: "score",
fuzzy: false, fuzzy: false,
size: 60,
optLang: "en", optLang: "en",
optLangIsDefault: true, optLangIsDefault: true,
@@ -32,6 +33,7 @@ export default new Vuex.Store({
optTheme: "light", optTheme: "light",
optDisplay: "grid", optDisplay: "grid",
optSize: 60,
optHighlight: true, optHighlight: true,
optTagOrOperator: false, optTagOrOperator: false,
optFuzzy: true, optFuzzy: true,
@@ -52,6 +54,7 @@ export default new Vuex.Store({
optUseDatePicker: false, optUseDatePicker: false,
optVidPreviewInterval: 700, optVidPreviewInterval: 700,
optSimpleLightbox: true, optSimpleLightbox: true,
optShowTagPickerFilter: true,
_onLoadSelectedIndices: [] as string[], _onLoadSelectedIndices: [] as string[],
_onLoadSelectedMimeTypes: [] as string[], _onLoadSelectedMimeTypes: [] as string[],
@@ -150,7 +153,7 @@ export default new Vuex.Store({
setOptSuggestPath: (state, val) => state.optSuggestPath = val, setOptSuggestPath: (state, val) => state.optSuggestPath = val,
setOptFragmentSize: (state, val) => state.optFragmentSize = val, setOptFragmentSize: (state, val) => state.optFragmentSize = val,
setOptQueryMode: (state, val) => state.optQueryMode = val, setOptQueryMode: (state, val) => state.optQueryMode = val,
setOptResultSize: (state, val) => state.size = val, setOptResultSize: (state, val) => state.optSize = val,
setOptTagOrOperator: (state, val) => state.optTagOrOperator = val, setOptTagOrOperator: (state, val) => state.optTagOrOperator = val,
setOptTreemapType: (state, val) => state.optTreemapType = val, setOptTreemapType: (state, val) => state.optTreemapType = val,
@@ -163,6 +166,7 @@ export default new Vuex.Store({
setOptUseDatePicker: (state, val) => state.optUseDatePicker = val, setOptUseDatePicker: (state, val) => state.optUseDatePicker = val,
setOptVidPreviewInterval: (state, val) => state.optVidPreviewInterval = val, setOptVidPreviewInterval: (state, val) => state.optVidPreviewInterval = val,
setOptSimpleLightbox: (state, val) => state.optSimpleLightbox = val, setOptSimpleLightbox: (state, val) => state.optSimpleLightbox = val,
setOptShowTagPickerFilter: (state, val) => state.optShowTagPickerFilter = val,
setOptLightboxLoadOnlyCurrent: (state, val) => state.optLightboxLoadOnlyCurrent = val, setOptLightboxLoadOnlyCurrent: (state, val) => state.optLightboxLoadOnlyCurrent = val,
setOptLightboxSlideDuration: (state, val) => state.optLightboxSlideDuration = val, setOptLightboxSlideDuration: (state, val) => state.optLightboxSlideDuration = val,
@@ -241,6 +245,11 @@ export default new Vuex.Store({
} }
}, },
async updateArgs({state}, router: VueRouter) { async updateArgs({state}, router: VueRouter) {
if (router.currentRoute.path !== "/") {
return;
}
await router.push({ await router.push({
query: { query: {
q: state.searchText.trim() ? state.searchText.trim().replace(/\s+/g, " ") : undefined, q: state.searchText.trim() ? state.searchText.trim().replace(/\s+/g, " ") : undefined,
@@ -269,6 +278,8 @@ export default new Vuex.Store({
} }
}); });
conf["version"] = CONF_VERSION;
localStorage.setItem("sist2_configuration", JSON.stringify(conf)); localStorage.setItem("sist2_configuration", JSON.stringify(conf));
}, },
loadConfiguration({state}) { loadConfiguration({state}) {
@@ -276,6 +287,11 @@ export default new Vuex.Store({
if (confString) { if (confString) {
const conf = JSON.parse(confString); const conf = JSON.parse(confString);
if (!("version" in conf) || conf["version"] != CONF_VERSION) {
localStorage.removeItem("sist2_configuration");
window.location.reload();
}
Object.keys(state).forEach((key) => { Object.keys(state).forEach((key) => {
if (key.startsWith("opt")) { if (key.startsWith("opt")) {
(state as any)[key] = conf[key]; (state as any)[key] = conf[key];
@@ -337,7 +353,7 @@ export default new Vuex.Store({
searchText: state => state.searchText, searchText: state => state.searchText,
pathText: state => state.pathText, pathText: state => state.pathText,
fuzzy: state => state.fuzzy, fuzzy: state => state.fuzzy,
size: state => state.size, size: state => state.optSize,
sortMode: state => state.sortMode, sortMode: state => state.sortMode,
lastQueryResult: state => state.lastQueryResults, lastQueryResult: state => state.lastQueryResults,
lastDoc: function (state): EsHit | null { lastDoc: function (state): EsHit | null {
@@ -375,11 +391,12 @@ export default new Vuex.Store({
optTreemapColor: state => state.optTreemapColor, optTreemapColor: state => state.optTreemapColor,
optLightboxLoadOnlyCurrent: state => state.optLightboxLoadOnlyCurrent, optLightboxLoadOnlyCurrent: state => state.optLightboxLoadOnlyCurrent,
optLightboxSlideDuration: state => state.optLightboxSlideDuration, optLightboxSlideDuration: state => state.optLightboxSlideDuration,
optResultSize: state => state.size, optResultSize: state => state.optSize,
optHideLegacy: state => state.optHideLegacy, optHideLegacy: state => state.optHideLegacy,
optUpdateMimeMap: state => state.optUpdateMimeMap, optUpdateMimeMap: state => state.optUpdateMimeMap,
optUseDatePicker: state => state.optUseDatePicker, optUseDatePicker: state => state.optUseDatePicker,
optVidPreviewInterval: state => state.optVidPreviewInterval, optVidPreviewInterval: state => state.optVidPreviewInterval,
optSimpleLightbox: state => state.optSimpleLightbox, optSimpleLightbox: state => state.optSimpleLightbox,
optShowTagPickerFilter: state => state.optShowTagPickerFilter,
} }
}) })

View File

@@ -50,6 +50,11 @@
$t("opt.simpleLightbox") $t("opt.simpleLightbox")
}} }}
</b-form-checkbox> </b-form-checkbox>
<b-form-checkbox :checked="optShowTagPickerFilter" @input="setOptShowTagPickerFilter">{{
$t("opt.showTagPickerFilter")
}}
</b-form-checkbox>
</b-card> </b-card>
<br/> <br/>
@@ -245,6 +250,7 @@ export default {
"optUseDatePicker", "optUseDatePicker",
"optVidPreviewInterval", "optVidPreviewInterval",
"optSimpleLightbox", "optSimpleLightbox",
"optShowTagPickerFilter",
]), ]),
clientWidth() { clientWidth() {
return window.innerWidth; return window.innerWidth;
@@ -292,6 +298,7 @@ export default {
"setOptUseDatePicker", "setOptUseDatePicker",
"setOptVidPreviewInterval", "setOptVidPreviewInterval",
"setOptSimpleLightbox", "setOptSimpleLightbox",
"setOptShowTagPickerFilter",
]), ]),
onResetClick() { onResetClick() {
localStorage.removeItem("sist2_configuration"); localStorage.removeItem("sist2_configuration");

View File

@@ -32,7 +32,7 @@
<MimePicker></MimePicker> <MimePicker></MimePicker>
</b-tab> </b-tab>
<b-tab :title="$t('tags')"> <b-tab :title="$t('tags')">
<TagPicker></TagPicker> <TagPicker :show-search-bar="$store.state.optShowTagPickerFilter"></TagPicker>
</b-tab> </b-tab>
</b-tabs> </b-tabs>
</b-col> </b-col>
@@ -139,7 +139,9 @@ export default Vue.extend({
this.setSist2Info(data); this.setSist2Info(data);
this.setIndices(data.indices); this.setIndices(data.indices);
Sist2Api.getMimeTypes(Sist2Query.searchQuery()).then(({mimeMap}) => { const doBlankSearch = !this.$store.state.optUpdateMimeMap;
Sist2Api.getMimeTypes(Sist2Query.searchQuery(doBlankSearch)).then(({mimeMap}) => {
this.$store.commit("setUiMimeMap", mimeMap); this.$store.commit("setUiMimeMap", mimeMap);
this.uiLoading = false; this.uiLoading = false;
this.search(true); this.search(true);
@@ -206,7 +208,7 @@ export default Vue.extend({
this.$store.commit("setUiReachedScrollEnd", false); this.$store.commit("setUiReachedScrollEnd", false);
}, },
async handleSearch(resp: EsResult) { async handleSearch(resp: EsResult) {
if (resp.hits.hits.length == 0) { if (resp.hits.hits.length == 0 || resp.hits.hits.length < this.$store.state.optSize) {
this.$store.commit("setUiReachedScrollEnd", true); this.$store.commit("setUiReachedScrollEnd", true);
} }
@@ -246,6 +248,8 @@ export default Vue.extend({
this.$store.commit("setLastQueryResult", resp); this.$store.commit("setLastQueryResult", resp);
this.docs.push(...resp.hits.hits); this.docs.push(...resp.hits.hits);
resp.hits.hits.forEach(hit => this.docIds.add(hit._id));
}, },
getDateRange(): Promise<{ min: number, max: number }> { getDateRange(): Promise<{ min: number, max: number }> {
return sist2.esQuery({ return sist2.esQuery({

View File

@@ -81,6 +81,11 @@ void web_args_destroy(web_args_t *args) {
} }
void exec_args_destroy(exec_args_t *args) { void exec_args_destroy(exec_args_t *args) {
if (args->index_path != NULL) {
free(args->index_path);
}
free(args); free(args);
} }
@@ -393,6 +398,7 @@ int index_args_validate(index_args_t *args, int argc, const char **argv) {
LOG_DEBUGF("cli.c", "arg es_url=%s", args->es_url) LOG_DEBUGF("cli.c", "arg es_url=%s", args->es_url)
LOG_DEBUGF("cli.c", "arg es_index=%s", args->es_index) LOG_DEBUGF("cli.c", "arg es_index=%s", args->es_index)
LOG_DEBUGF("cli.c", "arg es_insecure_ssl=%d", args->es_insecure_ssl)
LOG_DEBUGF("cli.c", "arg index_path=%s", args->index_path) LOG_DEBUGF("cli.c", "arg index_path=%s", args->index_path)
LOG_DEBUGF("cli.c", "arg script_path=%s", args->script_path) LOG_DEBUGF("cli.c", "arg script_path=%s", args->script_path)
LOG_DEBUGF("cli.c", "arg async_script=%d", args->async_script) LOG_DEBUGF("cli.c", "arg async_script=%d", args->async_script)
@@ -507,6 +513,7 @@ int web_args_validate(web_args_t *args, int argc, const char **argv) {
LOG_DEBUGF("cli.c", "arg es_url=%s", args->es_url) LOG_DEBUGF("cli.c", "arg es_url=%s", args->es_url)
LOG_DEBUGF("cli.c", "arg es_index=%s", args->es_index) LOG_DEBUGF("cli.c", "arg es_index=%s", args->es_index)
LOG_DEBUGF("cli.c", "arg es_insecure_ssl=%d", args->es_insecure_ssl)
LOG_DEBUGF("cli.c", "arg tagline=%s", args->tagline) LOG_DEBUGF("cli.c", "arg tagline=%s", args->tagline)
LOG_DEBUGF("cli.c", "arg dev=%d", args->dev) LOG_DEBUGF("cli.c", "arg dev=%d", args->dev)
LOG_DEBUGF("cli.c", "arg listen=%s", args->listen_address) LOG_DEBUGF("cli.c", "arg listen=%s", args->listen_address)

View File

@@ -50,6 +50,7 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv);
typedef struct index_args { typedef struct index_args {
char *es_url; char *es_url;
char *es_index; char *es_index;
int es_insecure_ssl;
char *index_path; char *index_path;
const char *script_path; const char *script_path;
char *script; char *script;
@@ -68,6 +69,7 @@ typedef struct index_args {
typedef struct web_args { typedef struct web_args {
char *es_url; char *es_url;
char *es_index; char *es_index;
int es_insecure_ssl;
char *listen_address; char *listen_address;
char *credentials; char *credentials;
char *tag_credentials; char *tag_credentials;
@@ -85,7 +87,8 @@ typedef struct web_args {
typedef struct exec_args { typedef struct exec_args {
char *es_url; char *es_url;
char *es_index; char *es_index;
const char *index_path; int es_insecure_ssl;
char *index_path;
const char *script_path; const char *script_path;
int async_script; int async_script;
char *script; char *script;

View File

@@ -79,6 +79,7 @@ typedef struct {
typedef struct { typedef struct {
char *es_url; char *es_url;
int es_insecure_ssl;
es_version_t *es_version; es_version_t *es_version;
char *es_index; char *es_index;
int batch_size; int batch_size;
@@ -97,6 +98,7 @@ typedef struct {
char *es_url; char *es_url;
es_version_t *es_version; es_version_t *es_version;
char *es_index; char *es_index;
int es_insecure_ssl;
int index_count; int index_count;
char *auth_user; char *auth_user;
char *auth_pass; char *auth_pass;

View File

@@ -53,7 +53,7 @@ void print_json(cJSON *document, const char id_str[SIST_DOC_ID_LEN]) {
cJSON_AddStringToObject(line, "_id", id_str); cJSON_AddStringToObject(line, "_id", id_str);
cJSON_AddStringToObject(line, "_index", IndexCtx.es_index); cJSON_AddStringToObject(line, "_index", IndexCtx.es_index);
cJSON_AddStringToObject(line, "_type", "_doc"); // cJSON_AddStringToObject(line, "_type", "_doc");
cJSON_AddItemReferenceToObject(line, "_source", document); cJSON_AddItemReferenceToObject(line, "_source", document);
char *json = cJSON_PrintUnformatted(line); char *json = cJSON_PrintUnformatted(line);
@@ -110,16 +110,16 @@ void execute_update_script(const char *script, int async, const char index_id[SI
cJSON *term_obj = cJSON_AddObjectToObject(query, "term"); cJSON *term_obj = cJSON_AddObjectToObject(query, "term");
cJSON_AddStringToObject(term_obj, "index", index_id); cJSON_AddStringToObject(term_obj, "index", index_id);
char *str = cJSON_Print(body); char *str = cJSON_PrintUnformatted(body);
char bulk_url[4096]; char url[4096];
if (async) { if (async) {
snprintf(bulk_url, sizeof(bulk_url), "%s/%s/_update_by_query?wait_for_completion=false", Indexer->es_url, snprintf(url, sizeof(url), "%s/%s/_update_by_query?wait_for_completion=false", Indexer->es_url,
Indexer->es_index); Indexer->es_index);
} else { } else {
snprintf(bulk_url, sizeof(bulk_url), "%s/%s/_update_by_query", Indexer->es_url, Indexer->es_index); snprintf(url, sizeof(url), "%s/%s/_update_by_query", Indexer->es_url, Indexer->es_index);
} }
response_t *r = web_post(bulk_url, str); response_t *r = web_post(url, str, IndexCtx.es_insecure_ssl);
if (!async) { if (!async) {
LOG_INFOF("elastic.c", "Executed user script <%d>", r->status_code); LOG_INFOF("elastic.c", "Executed user script <%d>", r->status_code);
} }
@@ -139,13 +139,18 @@ void execute_update_script(const char *script, int async, const char index_id[SI
if (async) { if (async) {
cJSON *task = cJSON_GetObjectItem(resp, "task"); cJSON *task = cJSON_GetObjectItem(resp, "task");
if (task == NULL) {
LOG_FATALF("elastic.c", "FIXME: Could not get task id: %s", r->body);
}
LOG_INFOF("elastic.c", "User script queued: %s/_tasks/%s", Indexer->es_url, task->valuestring); LOG_INFOF("elastic.c", "User script queued: %s/_tasks/%s", Indexer->es_url, task->valuestring);
} }
cJSON_Delete(resp); cJSON_Delete(resp);
} }
void *create_bulk_buffer(int max, int *count, size_t *buf_len) { void *create_bulk_buffer(int max, int *count, size_t *buf_len, int legacy) {
es_bulk_line_t *line = Indexer->line_head; es_bulk_line_t *line = Indexer->line_head;
*count = 0; *count = 0;
@@ -166,11 +171,20 @@ void *create_bulk_buffer(int max, int *count, size_t *buf_len) {
while (line != NULL && *count < max) { while (line != NULL && *count < max) {
char action_str[256]; char action_str[256];
if (line->type == ES_BULK_LINE_INDEX) { if (line->type == ES_BULK_LINE_INDEX) {
snprintf(
action_str, sizeof(action_str), if (legacy) {
"{\"index\":{\"_id\":\"%s\",\"_type\":\"_doc\",\"_index\":\"%s\"}}\n", snprintf(
line->doc_id, Indexer->es_index action_str, sizeof(action_str),
); "{\"index\":{\"_id\":\"%s\",\"_type\":\"_doc\",\"_index\":\"%s\"}}\n",
line->doc_id, Indexer->es_index
);
} else {
snprintf(
action_str, sizeof(action_str),
"{\"index\":{\"_id\":\"%s\",\"_index\":\"%s\"}}\n",
line->doc_id, Indexer->es_index
);
}
size_t action_str_len = strlen(action_str); size_t action_str_len = strlen(action_str);
size_t line_len = strlen(line->line); size_t line_len = strlen(line->line);
@@ -214,7 +228,13 @@ void print_errors(response_t *r) {
*(tmp + r->size) = '\0'; *(tmp + r->size) = '\0';
cJSON *ret_json = cJSON_Parse(tmp); cJSON *ret_json = cJSON_Parse(tmp);
if (cJSON_GetObjectItem(ret_json, "errors")->valueint != 0) { cJSON *errors = cJSON_GetObjectItem(ret_json, "errors");
if (errors == NULL) {
char *str = cJSON_Print(ret_json);
LOG_ERRORF("elastic.c", "%s\n", str);
cJSON_free(str);
} else if (errors->valueint != 0) {
cJSON *err; cJSON *err;
cJSON_ArrayForEach(err, cJSON_GetObjectItem(ret_json, "items")) { cJSON_ArrayForEach(err, cJSON_GetObjectItem(ret_json, "items")) {
if (cJSON_GetObjectItem(cJSON_GetObjectItem(err, "index"), "status")->valueint != 201) { if (cJSON_GetObjectItem(cJSON_GetObjectItem(err, "index"), "status")->valueint != 201) {
@@ -252,11 +272,11 @@ void _elastic_flush(int max) {
size_t buf_len; size_t buf_len;
int count; int count;
void *buf = create_bulk_buffer(max, &count, &buf_len); void *buf = create_bulk_buffer(max, &count, &buf_len, IS_LEGACY_VERSION(IndexCtx.es_version));
char bulk_url[4096]; char bulk_url[4096];
snprintf(bulk_url, sizeof(bulk_url), "%s/%s/_bulk?pipeline=tie", Indexer->es_url, Indexer->es_index); snprintf(bulk_url, sizeof(bulk_url), "%s/%s/_bulk?pipeline=tie", Indexer->es_url, Indexer->es_index);
response_t *r = web_post(bulk_url, buf); response_t *r = web_post(bulk_url, buf, IndexCtx.es_insecure_ssl);
if (r->status_code == 0) { if (r->status_code == 0) {
LOG_FATALF("elastic.c", "Could not connect to %s, make sure that elasticsearch is running!\n", IndexCtx.es_url) LOG_FATALF("elastic.c", "Could not connect to %s, make sure that elasticsearch is running!\n", IndexCtx.es_url)
@@ -382,7 +402,7 @@ void finish_indexer(char *script, int async_script, char *index_id) {
char url[4096]; char url[4096];
snprintf(url, sizeof(url), "%s/%s/_refresh", IndexCtx.es_url, IndexCtx.es_index); snprintf(url, sizeof(url), "%s/%s/_refresh", IndexCtx.es_url, IndexCtx.es_index);
response_t *r = web_post(url, ""); response_t *r = web_post(url, "", IndexCtx.es_insecure_ssl);
LOG_INFOF("elastic.c", "Refresh index <%d>", r->status_code); LOG_INFOF("elastic.c", "Refresh index <%d>", r->status_code);
free_response(r); free_response(r);
@@ -391,24 +411,24 @@ void finish_indexer(char *script, int async_script, char *index_id) {
free(script); free(script);
snprintf(url, sizeof(url), "%s/%s/_refresh", IndexCtx.es_url, IndexCtx.es_index); snprintf(url, sizeof(url), "%s/%s/_refresh", IndexCtx.es_url, IndexCtx.es_index);
r = web_post(url, ""); r = web_post(url, "", IndexCtx.es_insecure_ssl);
LOG_INFOF("elastic.c", "Refresh index <%d>", r->status_code); LOG_INFOF("elastic.c", "Refresh index <%d>", r->status_code);
free_response(r); free_response(r);
} }
snprintf(url, sizeof(url), "%s/%s/_forcemerge", IndexCtx.es_url, IndexCtx.es_index); snprintf(url, sizeof(url), "%s/%s/_forcemerge", IndexCtx.es_url, IndexCtx.es_index);
r = web_post(url, ""); r = web_post(url, "", IndexCtx.es_insecure_ssl);
LOG_INFOF("elastic.c", "Merge index <%d>", r->status_code); LOG_INFOF("elastic.c", "Merge index <%d>", r->status_code);
free_response(r); free_response(r);
snprintf(url, sizeof(url), "%s/%s/_settings", IndexCtx.es_url, IndexCtx.es_index); snprintf(url, sizeof(url), "%s/%s/_settings", IndexCtx.es_url, IndexCtx.es_index);
r = web_put(url, "{\"index\":{\"refresh_interval\":\"1s\"}}"); r = web_put(url, "{\"index\":{\"refresh_interval\":\"1s\"}}", IndexCtx.es_insecure_ssl);
LOG_INFOF("elastic.c", "Set refresh interval <%d>", r->status_code); LOG_INFOF("elastic.c", "Set refresh interval <%d>", r->status_code);
free_response(r); free_response(r);
} }
es_version_t *elastic_get_version(const char *es_url) { es_version_t *elastic_get_version(const char *es_url, int insecure) {
response_t *r = web_get(es_url, 30); response_t *r = web_get(es_url, 30, insecure);
char *tmp = malloc(r->size + 1); char *tmp = malloc(r->size + 1);
memcpy(tmp, r->body, r->size); memcpy(tmp, r->body, r->size);
@@ -453,7 +473,7 @@ es_version_t *elastic_get_version(const char *es_url) {
void elastic_init(int force_reset, const char *user_mappings, const char *user_settings) { void elastic_init(int force_reset, const char *user_mappings, const char *user_settings) {
es_version_t *es_version = elastic_get_version(IndexCtx.es_url); es_version_t *es_version = elastic_get_version(IndexCtx.es_url, IndexCtx.es_insecure_ssl);
IndexCtx.es_version = es_version; IndexCtx.es_version = es_version;
if (es_version == NULL) { if (es_version == NULL) {
@@ -462,33 +482,33 @@ void elastic_init(int force_reset, const char *user_mappings, const char *user_s
LOG_INFOF("elastic.c", LOG_INFOF("elastic.c",
"Elasticsearch version is %s (supported=%d, legacy=%d)", "Elasticsearch version is %s (supported=%d, legacy=%d)",
format_es_version(es_version), IS_SUPPORTED_ES_VERSION(es_version), USE_LEGACY_ES_SETTINGS(es_version)); format_es_version(es_version), IS_SUPPORTED_ES_VERSION(es_version), IS_LEGACY_VERSION(es_version));
if (!IS_SUPPORTED_ES_VERSION(es_version)) { if (!IS_SUPPORTED_ES_VERSION(es_version)) {
LOG_FATAL("elastic.c", "sist2 only supports Elasticsearch v6.8 or newer") LOG_FATAL("elastic.c", "This elasticsearch version is not supported!")
} }
char *settings = NULL; char *settings = NULL;
if (USE_LEGACY_ES_SETTINGS(es_version)) { if (IS_LEGACY_VERSION(es_version)) {
settings = settings_json;
} else {
settings = settings_legacy_json; settings = settings_legacy_json;
} else {
settings = settings_json;
} }
// Check if index exists // Check if index exists
char url[4096]; char url[4096];
snprintf(url, sizeof(url), "%s/%s", IndexCtx.es_url, IndexCtx.es_index); snprintf(url, sizeof(url), "%s/%s", IndexCtx.es_url, IndexCtx.es_index);
response_t *r = web_get(url, 30); response_t *r = web_get(url, 30, IndexCtx.es_insecure_ssl);
int index_exists = r->status_code == 200; int index_exists = r->status_code == 200;
free_response(r); free_response(r);
if (!index_exists || force_reset) { if (!index_exists || force_reset) {
r = web_delete(url); r = web_delete(url, IndexCtx.es_insecure_ssl);
LOG_INFOF("elastic.c", "Delete index <%d>", r->status_code); LOG_INFOF("elastic.c", "Delete index <%d>", r->status_code);
free_response(r); free_response(r);
snprintf(url, sizeof(url), "%s/%s", IndexCtx.es_url, IndexCtx.es_index); snprintf(url, sizeof(url), "%s/%s", IndexCtx.es_url, IndexCtx.es_index);
r = web_put(url, ""); r = web_put(url, "", IndexCtx.es_insecure_ssl);
if (r->status_code != 200) { if (r->status_code != 200) {
print_error(r); print_error(r);
@@ -499,17 +519,17 @@ void elastic_init(int force_reset, const char *user_mappings, const char *user_s
free_response(r); free_response(r);
snprintf(url, sizeof(url), "%s/%s/_close", IndexCtx.es_url, IndexCtx.es_index); snprintf(url, sizeof(url), "%s/%s/_close", IndexCtx.es_url, IndexCtx.es_index);
r = web_post(url, ""); r = web_post(url, "", IndexCtx.es_insecure_ssl);
LOG_INFOF("elastic.c", "Close index <%d>", r->status_code); LOG_INFOF("elastic.c", "Close index <%d>", r->status_code);
free_response(r); free_response(r);
snprintf(url, sizeof(url), "%s/_ingest/pipeline/tie", IndexCtx.es_url); snprintf(url, sizeof(url), "%s/_ingest/pipeline/tie", IndexCtx.es_url);
r = web_put(url, pipeline_json); r = web_put(url, pipeline_json, IndexCtx.es_insecure_ssl);
LOG_INFOF("elastic.c", "Create pipeline <%d>", r->status_code); LOG_INFOF("elastic.c", "Create pipeline <%d>", r->status_code);
free_response(r); free_response(r);
snprintf(url, sizeof(url), "%s/%s/_settings", IndexCtx.es_url, IndexCtx.es_index); snprintf(url, sizeof(url), "%s/%s/_settings", IndexCtx.es_url, IndexCtx.es_index);
r = web_put(url, user_settings ? user_settings : settings); r = web_put(url, user_settings ? user_settings : settings, IndexCtx.es_insecure_ssl);
LOG_INFOF("elastic.c", "Update ES settings <%d>", r->status_code); LOG_INFOF("elastic.c", "Update ES settings <%d>", r->status_code);
if (r->status_code != 200) { if (r->status_code != 200) {
print_error(r); print_error(r);
@@ -517,8 +537,13 @@ void elastic_init(int force_reset, const char *user_mappings, const char *user_s
} }
free_response(r); free_response(r);
snprintf(url, sizeof(url), "%s/%s/_mappings/_doc?include_type_name=true", IndexCtx.es_url, IndexCtx.es_index); if (IS_LEGACY_VERSION(es_version)) {
r = web_put(url, user_mappings ? user_mappings : mappings_json); snprintf(url, sizeof(url), "%s/%s/_mappings/_doc?include_type_name=true", IndexCtx.es_url, IndexCtx.es_index);
} else {
snprintf(url, sizeof(url), "%s/%s/_mappings", IndexCtx.es_url, IndexCtx.es_index);
}
r = web_put(url, user_mappings ? user_mappings : mappings_json, IndexCtx.es_insecure_ssl);
LOG_INFOF("elastic.c", "Update ES mappings <%d>", r->status_code); LOG_INFOF("elastic.c", "Update ES mappings <%d>", r->status_code);
if (r->status_code != 200) { if (r->status_code != 200) {
print_error(r); print_error(r);
@@ -527,7 +552,7 @@ void elastic_init(int force_reset, const char *user_mappings, const char *user_s
free_response(r); free_response(r);
snprintf(url, sizeof(url), "%s/%s/_open", IndexCtx.es_url, IndexCtx.es_index); snprintf(url, sizeof(url), "%s/%s/_open", IndexCtx.es_url, IndexCtx.es_index);
r = web_post(url, ""); r = web_post(url, "", IndexCtx.es_insecure_ssl);
LOG_INFOF("elastic.c", "Open index <%d>", r->status_code); LOG_INFOF("elastic.c", "Open index <%d>", r->status_code);
free_response(r); free_response(r);
} }
@@ -537,7 +562,7 @@ cJSON *elastic_get_document(const char *id_str) {
char url[4096]; char url[4096];
snprintf(url, sizeof(url), "%s/%s/_doc/%s", WebCtx.es_url, WebCtx.es_index, id_str); snprintf(url, sizeof(url), "%s/%s/_doc/%s", WebCtx.es_url, WebCtx.es_index, id_str);
response_t *r = web_get(url, 3); response_t *r = web_get(url, 3, WebCtx.es_insecure_ssl);
cJSON *json = NULL; cJSON *json = NULL;
if (r->status_code == 200) { if (r->status_code == 200) {
char *tmp = malloc(r->size + 1); char *tmp = malloc(r->size + 1);
@@ -555,7 +580,7 @@ char *elastic_get_status() {
snprintf(url, sizeof(url), snprintf(url, sizeof(url),
"%s/_cluster/state/metadata/%s?filter_path=metadata.indices.*.state", WebCtx.es_url, WebCtx.es_index); "%s/_cluster/state/metadata/%s?filter_path=metadata.indices.*.state", WebCtx.es_url, WebCtx.es_index);
response_t *r = web_get(url, 30); response_t *r = web_get(url, 30, IndexCtx.es_insecure_ssl);
cJSON *json = NULL; cJSON *json = NULL;
char *status = malloc(128 * sizeof(char)); char *status = malloc(128 * sizeof(char));
status[0] = '\0'; status[0] = '\0';

View File

@@ -20,8 +20,10 @@ typedef struct {
} es_version_t; } es_version_t;
#define VERSION_GE(version, maj, min) ((version)->major > (maj) || ((version)->major == (maj) && (version)->minor >= (min))) #define VERSION_GE(version, maj, min) ((version)->major > (maj) || ((version)->major == (maj) && (version)->minor >= (min)))
#define IS_SUPPORTED_ES_VERSION(es_version) ((es_version) != NULL && VERSION_GE((es_version), 6, 8)) #define VERSION_LT(version, maj, min) (!VERSION_GE(version, maj, min))
#define USE_LEGACY_ES_SETTINGS(es_version) ((es_version) != NULL && (!VERSION_GE((es_version), 7, 14)))
#define IS_SUPPORTED_ES_VERSION(es_version) ((es_version) != NULL && VERSION_GE((es_version), 6, 8) && VERSION_LT((es_version), 9, 0))
#define IS_LEGACY_VERSION(es_version) ((es_version) != NULL && VERSION_LT((es_version), 7, 14))
__always_inline __always_inline
static const char *format_es_version(es_version_t *version) { static const char *format_es_version(es_version_t *version) {
@@ -57,7 +59,7 @@ cJSON *elastic_get_document(const char *id_str);
char *elastic_get_status(); char *elastic_get_status();
es_version_t *elastic_get_version(const char *es_url); es_version_t *elastic_get_version(const char *es_url, int insecure);
void execute_update_script(const char *script, int async, const char index_id[SIST_INDEX_ID_LEN]); void execute_update_script(const char *script, int async, const char index_id[SIST_INDEX_ID_LEN]);

File diff suppressed because one or more lines are too long

View File

@@ -22,7 +22,7 @@ void free_response(response_t *resp) {
free(resp); free(resp);
} }
void web_post_async_poll(subreq_ctx_t* req) { void web_post_async_poll(subreq_ctx_t *req) {
fd_set fdread; fd_set fdread;
fd_set fdwrite; fd_set fdwrite;
fd_set fdexcep; fd_set fdexcep;
@@ -34,7 +34,7 @@ void web_post_async_poll(subreq_ctx_t* req) {
CURLMcode mc = curl_multi_fdset(req->multi, &fdread, &fdwrite, &fdexcep, &maxfd); CURLMcode mc = curl_multi_fdset(req->multi, &fdread, &fdwrite, &fdexcep, &maxfd);
if(mc != CURLM_OK) { if (mc != CURLM_OK) {
req->done = TRUE; req->done = TRUE;
return; return;
} }
@@ -47,7 +47,7 @@ void web_post_async_poll(subreq_ctx_t* req) {
struct timeval timeout = {1, 0}; struct timeval timeout = {1, 0};
int rc = select(maxfd + 1, &fdread, &fdwrite, &fdexcep, &timeout); int rc = select(maxfd + 1, &fdread, &fdwrite, &fdexcep, &timeout);
switch(rc) { switch (rc) {
case -1: case -1:
req->done = TRUE; req->done = TRUE;
break; break;
@@ -64,6 +64,10 @@ void web_post_async_poll(subreq_ctx_t* req) {
req->response->size = req->response_buf.cur; req->response->size = req->response_buf.cur;
curl_easy_getinfo(req->handle, CURLINFO_RESPONSE_CODE, &req->response->status_code); curl_easy_getinfo(req->handle, CURLINFO_RESPONSE_CODE, &req->response->status_code);
if (req->response->status_code == 0) {
LOG_ERRORF("web.c", "CURL Error: %s", req->curl_err_buffer)
}
curl_multi_cleanup(req->multi); curl_multi_cleanup(req->multi);
curl_easy_cleanup(req->handle); curl_easy_cleanup(req->handle);
curl_slist_free_all(req->headers); curl_slist_free_all(req->headers);
@@ -71,7 +75,7 @@ void web_post_async_poll(subreq_ctx_t* req) {
} }
} }
subreq_ctx_t *web_post_async(const char *url, char *data) { subreq_ctx_t *web_post_async(const char *url, char *data, int insecure) {
subreq_ctx_t *req = calloc(1, sizeof(subreq_ctx_t)); subreq_ctx_t *req = calloc(1, sizeof(subreq_ctx_t));
req->response = calloc(1, sizeof(response_t)); req->response = calloc(1, sizeof(response_t));
req->data = data; req->data = data;
@@ -84,6 +88,11 @@ subreq_ctx_t *web_post_async(const char *url, char *data) {
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb); curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb);
curl_easy_setopt(curl, CURLOPT_POST, 1); curl_easy_setopt(curl, CURLOPT_POST, 1);
curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2"); curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
if (insecure) {
curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0);
}
curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, req->curl_err_buffer);
struct curl_slist *headers = NULL; struct curl_slist *headers = NULL;
headers = curl_slist_append(headers, "Content-Type: application/json"); headers = curl_slist_append(headers, "Content-Type: application/json");
@@ -100,7 +109,7 @@ subreq_ctx_t *web_post_async(const char *url, char *data) {
return req; return req;
} }
response_t *web_get(const char *url, int timeout) { response_t *web_get(const char *url, int timeout, int insecure) {
response_t *resp = malloc(sizeof(response_t)); response_t *resp = malloc(sizeof(response_t));
CURL *curl; CURL *curl;
@@ -112,14 +121,24 @@ response_t *web_get(const char *url, int timeout) {
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb); curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb);
curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2"); curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
curl_easy_setopt(curl, CURLOPT_TIMEOUT, timeout); curl_easy_setopt(curl, CURLOPT_TIMEOUT, timeout);
if (insecure) {
curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0);
}
struct curl_slist *headers = NULL; struct curl_slist *headers = NULL;
headers = curl_slist_append(headers, "Content-Type: application/json"); headers = curl_slist_append(headers, "Content-Type: application/json");
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers); curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
char err_buffer[CURL_ERROR_SIZE + 1] = {};
curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, err_buffer);
curl_easy_perform(curl); curl_easy_perform(curl);
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &resp->status_code); curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &resp->status_code);
if (resp->status_code == 0) {
LOG_ERRORF("web.c", "CURL Error: %s", err_buffer)
}
curl_easy_cleanup(curl); curl_easy_cleanup(curl);
curl_slist_free_all(headers); curl_slist_free_all(headers);
@@ -128,7 +147,7 @@ response_t *web_get(const char *url, int timeout) {
return resp; return resp;
} }
response_t *web_post(const char *url, const char *data) { response_t *web_post(const char *url, const char *data, int insecure) {
response_t *resp = malloc(sizeof(response_t)); response_t *resp = malloc(sizeof(response_t));
@@ -141,6 +160,12 @@ response_t *web_post(const char *url, const char *data) {
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb); curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb);
curl_easy_setopt(curl, CURLOPT_POST, 1); curl_easy_setopt(curl, CURLOPT_POST, 1);
curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2"); curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
if (insecure) {
curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0);
}
char err_buffer[CURL_ERROR_SIZE + 1] = {};
curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, err_buffer);
struct curl_slist *headers = NULL; struct curl_slist *headers = NULL;
headers = curl_slist_append(headers, "Content-Type: application/json"); headers = curl_slist_append(headers, "Content-Type: application/json");
@@ -151,17 +176,21 @@ response_t *web_post(const char *url, const char *data) {
curl_easy_perform(curl); curl_easy_perform(curl);
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &resp->status_code); curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &resp->status_code);
curl_easy_cleanup(curl);
curl_slist_free_all(headers);
resp->body = buffer.buf; resp->body = buffer.buf;
resp->size = buffer.cur; resp->size = buffer.cur;
if (resp->status_code == 0) {
LOG_ERRORF("web.c", "CURL Error: %s", err_buffer)
}
curl_easy_cleanup(curl);
curl_slist_free_all(headers);
return resp; return resp;
} }
response_t *web_put(const char *url, const char *data) { response_t *web_put(const char *url, const char *data, int insecure) {
response_t *resp = malloc(sizeof(response_t)); response_t *resp = malloc(sizeof(response_t));
@@ -175,7 +204,10 @@ response_t *web_put(const char *url, const char *data) {
curl_easy_setopt(curl, CURLOPT_CUSTOMREQUEST, "PUT"); curl_easy_setopt(curl, CURLOPT_CUSTOMREQUEST, "PUT");
curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2"); curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
curl_easy_setopt(curl, CURLOPT_DNS_USE_GLOBAL_CACHE, 0); curl_easy_setopt(curl, CURLOPT_DNS_USE_GLOBAL_CACHE, 0);
curl_easy_setopt(curl, CURLOPT_IPRESOLVE, CURLOPT_DNS_LOCAL_IP4 ); curl_easy_setopt(curl, CURLOPT_IPRESOLVE, CURLOPT_DNS_LOCAL_IP4);
if (insecure) {
curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0);
}
struct curl_slist *headers = NULL; struct curl_slist *headers = NULL;
headers = curl_slist_append(headers, "Content-Type: application/json"); headers = curl_slist_append(headers, "Content-Type: application/json");
@@ -194,7 +226,7 @@ response_t *web_put(const char *url, const char *data) {
return resp; return resp;
} }
response_t *web_delete(const char *url) { response_t *web_delete(const char *url, int insecure) {
response_t *resp = malloc(sizeof(response_t)); response_t *resp = malloc(sizeof(response_t));
@@ -207,6 +239,9 @@ response_t *web_delete(const char *url) {
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb); curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb);
curl_easy_setopt(curl, CURLOPT_CUSTOMREQUEST, "DELETE"); curl_easy_setopt(curl, CURLOPT_CUSTOMREQUEST, "DELETE");
curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2"); curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
if (insecure) {
curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0);
}
curl_easy_setopt(curl, CURLOPT_POSTFIELDS, ""); curl_easy_setopt(curl, CURLOPT_POSTFIELDS, "");
struct curl_slist *headers = NULL; struct curl_slist *headers = NULL;

View File

@@ -25,14 +25,15 @@ typedef struct {
response_t *response; response_t *response;
int running_handles; int running_handles;
int done; int done;
char curl_err_buffer[CURL_ERROR_SIZE + 1];
} subreq_ctx_t; } subreq_ctx_t;
response_t *web_get(const char *url, int timeout); response_t *web_get(const char *url, int timeout, int insecure);
response_t *web_post(const char * url, const char * data); response_t *web_post(const char * url, const char * data, int insecure);
void web_post_async_poll(subreq_ctx_t* req); void web_post_async_poll(subreq_ctx_t* req);
subreq_ctx_t *web_post_async(const char *url, char *data); subreq_ctx_t *web_post_async(const char *url, char *data, int insecure);
response_t *web_put(const char *url, const char *data); response_t *web_put(const char *url, const char *data, int insecure);
response_t *web_delete(const char *url); response_t *web_delete(const char *url, int insecure);
void free_response(response_t *resp); void free_response(response_t *resp);

View File

@@ -505,9 +505,9 @@ void incremental_copy_handle_doc(cJSON *document, UNUSED(const char id_str[SIST_
// Copy tn store contents // Copy tn store contents
size_t buf_len; size_t buf_len;
char *buf = store_read(IncrementalCopySourceStore, (char *) doc_id, sizeof(doc_id), &buf_len); char *buf = store_read(IncrementalCopySourceStore, (char *) doc_id, SIST_DOC_ID_LEN, &buf_len);
if (buf_len != 0) { if (buf_len != 0) {
store_write(IncrementalCopyDestinationStore, (char *) doc_id, sizeof(doc_id), buf, buf_len); store_write(IncrementalCopyDestinationStore, (char *) doc_id, SIST_DOC_ID_LEN, buf, buf_len);
free(buf); free(buf);
} }
} }

View File

@@ -42,13 +42,13 @@ index_descriptor_t read_index_descriptor(char *path);
// caller ensures char file_path[PATH_MAX] // caller ensures char file_path[PATH_MAX]
#define READ_INDICES(file_path, index_path, action_ok, action_main_fail, cond_original) \ #define READ_INDICES(file_path, index_path, action_ok, action_main_fail, cond_original) \
snprintf(file_path, PATH_MAX, "%s_index_main.ndjson.zst", index_path); \ snprintf(file_path, PATH_MAX, "%s_index_main.ndjson.zst", index_path); \
if (0 == access(file_path, R_OK)) { \ if (access(file_path, R_OK) == 0) { \
action_ok; \ action_ok; \
} else { \ } else { \
action_main_fail; \ action_main_fail; \
} \ } \
snprintf(file_path, PATH_MAX, "%s_index_original.ndjson.zst", index_path); \ snprintf(file_path, PATH_MAX, "%s_index_original.ndjson.zst", index_path); \
if ((cond_original) && (0 == access(file_path, R_OK))) { \ if ((cond_original) && access(file_path, R_OK) == 0) { \
action_ok; \ action_ok; \
} \ } \

1
src/magic_generated.c vendored Normal file

File diff suppressed because one or more lines are too long

View File

@@ -38,8 +38,8 @@ static __sighandler_t sigabrt_handler = NULL;
void sig_handler(int signum) { void sig_handler(int signum) {
LogCtx.verbose = 1; LogCtx.verbose = TRUE;
LogCtx.very_verbose = 1; LogCtx.very_verbose = TRUE;
LOG_ERROR("*SIGNAL HANDLER*", "=============================================\n\n"); LOG_ERROR("*SIGNAL HANDLER*", "=============================================\n\n");
LOG_ERRORF("*SIGNAL HANDLER*", "Uh oh! Caught fatal signal: %s", strsignal(signum)); LOG_ERRORF("*SIGNAL HANDLER*", "Uh oh! Caught fatal signal: %s", strsignal(signum));
@@ -103,7 +103,7 @@ void sig_handler(int signum) {
exit(-1); exit(-1);
} }
void init_dir(const char *dirpath, scan_args_t* args) { void init_dir(const char *dirpath, scan_args_t *args) {
char path[PATH_MAX]; char path[PATH_MAX];
snprintf(path, PATH_MAX, "%sdescriptor.json", dirpath); snprintf(path, PATH_MAX, "%sdescriptor.json", dirpath);
@@ -112,16 +112,16 @@ void init_dir(const char *dirpath, scan_args_t* args) {
strcpy(ScanCtx.index.desc.type, INDEX_TYPE_NDJSON); strcpy(ScanCtx.index.desc.type, INDEX_TYPE_NDJSON);
if (args->incremental != NULL) { if (args->incremental != NULL) {
// copy old index id // copy old index id
char descriptor_path[PATH_MAX]; char descriptor_path[PATH_MAX];
snprintf(descriptor_path, PATH_MAX, "%sdescriptor.json", args->incremental); snprintf(descriptor_path, PATH_MAX, "%sdescriptor.json", args->incremental);
index_descriptor_t original_desc = read_index_descriptor(descriptor_path); index_descriptor_t original_desc = read_index_descriptor(descriptor_path);
memcpy(ScanCtx.index.desc.id, original_desc.id, sizeof(original_desc.id)); memcpy(ScanCtx.index.desc.id, original_desc.id, sizeof(original_desc.id));
} else { } else {
// generate new index id based on timestamp // generate new index id based on timestamp
unsigned char index_md5[MD5_DIGEST_LENGTH]; unsigned char index_md5[MD5_DIGEST_LENGTH];
MD5((unsigned char *) &ScanCtx.index.desc.timestamp, sizeof(ScanCtx.index.desc.timestamp), index_md5); MD5((unsigned char *) &ScanCtx.index.desc.timestamp, sizeof(ScanCtx.index.desc.timestamp), index_md5);
buf2hex(index_md5, MD5_DIGEST_LENGTH, ScanCtx.index.desc.id); buf2hex(index_md5, MD5_DIGEST_LENGTH, ScanCtx.index.desc.id);
} }
write_index_descriptor(path, &ScanCtx.index.desc); write_index_descriptor(path, &ScanCtx.index.desc);
@@ -324,9 +324,13 @@ void load_incremental_index(const scan_args_t *args) {
LOG_FATALF("main.c", "Version mismatch! Index is %s but executable is %s", original_desc.version, Version) LOG_FATALF("main.c", "Version mismatch! Index is %s but executable is %s", original_desc.version, Version)
} }
READ_INDICES(file_path, args->incremental, incremental_read(ScanCtx.original_table, file_path, &original_desc), READ_INDICES(
LOG_FATALF("main.c", "Could not open original main index for incremental scan: %s", strerror(errno)), file_path,
1); args->incremental,
incremental_read(ScanCtx.original_table, file_path, &original_desc),
LOG_FATALF("main.c", "Could not open original main index for incremental scan: %s", strerror(errno)),
TRUE
);
LOG_INFOF("main.c", "Loaded %d items in to mtime table.", g_hash_table_size(ScanCtx.original_table)) LOG_INFOF("main.c", "Loaded %d items in to mtime table.", g_hash_table_size(ScanCtx.original_table))
} }
@@ -431,8 +435,8 @@ void sist2_scan(scan_args_t *args) {
LOG_DEBUGF("main.c", "Skipped files: %d", ScanCtx.dbg_skipped_files_count) LOG_DEBUGF("main.c", "Skipped files: %d", ScanCtx.dbg_skipped_files_count)
LOG_DEBUGF("main.c", "Excluded files: %d", ScanCtx.dbg_excluded_files_count) LOG_DEBUGF("main.c", "Excluded files: %d", ScanCtx.dbg_excluded_files_count)
LOG_DEBUGF("main.c", "Failed files: %d", ScanCtx.dbg_failed_files_count) LOG_DEBUGF("main.c", "Failed files: %d", ScanCtx.dbg_failed_files_count)
LOG_DEBUGF("main.c", "Thumbnail store size: %d", ScanCtx.stat_tn_size) LOG_DEBUGF("main.c", "Thumbnail store size: %lu", ScanCtx.stat_tn_size)
LOG_DEBUGF("main.c", "Index size: %d", ScanCtx.stat_index_size) LOG_DEBUGF("main.c", "Index size: %lu", ScanCtx.stat_index_size)
if (args->incremental != NULL) { if (args->incremental != NULL) {
save_incremental_index(args); save_incremental_index(args);
@@ -449,6 +453,7 @@ void sist2_index(index_args_t *args) {
IndexCtx.es_url = args->es_url; IndexCtx.es_url = args->es_url;
IndexCtx.es_index = args->es_index; IndexCtx.es_index = args->es_index;
IndexCtx.es_insecure_ssl = args->es_insecure_ssl;
IndexCtx.batch_size = args->batch_size; IndexCtx.batch_size = args->batch_size;
IndexCtx.needs_es_connection = !args->print; IndexCtx.needs_es_connection = !args->print;
@@ -534,6 +539,8 @@ void sist2_exec_script(exec_args_t *args) {
IndexCtx.es_url = args->es_url; IndexCtx.es_url = args->es_url;
IndexCtx.es_index = args->es_index; IndexCtx.es_index = args->es_index;
IndexCtx.es_insecure_ssl = args->es_insecure_ssl;
IndexCtx.needs_es_connection = TRUE;
LOG_DEBUGF("main.c", "descriptor version %s (%s)", desc.version, desc.type) LOG_DEBUGF("main.c", "descriptor version %s (%s)", desc.version, desc.type)
@@ -545,6 +552,7 @@ void sist2_web(web_args_t *args) {
WebCtx.es_url = args->es_url; WebCtx.es_url = args->es_url;
WebCtx.es_index = args->es_index; WebCtx.es_index = args->es_index;
WebCtx.es_insecure_ssl = args->es_insecure_ssl;
WebCtx.index_count = args->index_count; WebCtx.index_count = args->index_count;
WebCtx.auth_user = args->auth_user; WebCtx.auth_user = args->auth_user;
WebCtx.auth_pass = args->auth_pass; WebCtx.auth_pass = args->auth_pass;
@@ -615,6 +623,7 @@ int main(int argc, const char *argv[]) {
int arg_version = 0; int arg_version = 0;
char *common_es_url = NULL; char *common_es_url = NULL;
int common_es_insecure_ssl = 0;
char *common_es_index = NULL; char *common_es_index = NULL;
char *common_script_path = NULL; char *common_script_path = NULL;
int common_async_script = 0; int common_async_script = 0;
@@ -680,6 +689,7 @@ int main(int argc, const char *argv[]) {
OPT_GROUP("Index options"), OPT_GROUP("Index options"),
OPT_INTEGER('t', "threads", &common_threads, "Number of threads. DEFAULT=1"), OPT_INTEGER('t', "threads", &common_threads, "Number of threads. DEFAULT=1"),
OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url with port. DEFAULT=http://localhost:9200"), OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url with port. DEFAULT=http://localhost:9200"),
OPT_BOOLEAN(0, "es-insecure-ssl", &common_es_insecure_ssl, "Do not verify SSL connections to Elasticsearch."),
OPT_STRING(0, "es-index", &common_es_index, "Elasticsearch index name. DEFAULT=sist2"), OPT_STRING(0, "es-index", &common_es_index, "Elasticsearch index name. DEFAULT=sist2"),
OPT_BOOLEAN('p', "print", &index_args->print, "Just print JSON documents to stdout."), OPT_BOOLEAN('p', "print", &index_args->print, "Just print JSON documents to stdout."),
OPT_BOOLEAN(0, "incremental-index", &index_args->incremental, OPT_BOOLEAN(0, "incremental-index", &index_args->incremental,
@@ -694,6 +704,7 @@ int main(int argc, const char *argv[]) {
OPT_GROUP("Web options"), OPT_GROUP("Web options"),
OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url. DEFAULT=http://localhost:9200"), OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url. DEFAULT=http://localhost:9200"),
OPT_BOOLEAN(0, "es-insecure-ssl", &common_es_insecure_ssl, "Do not verify SSL connections to Elasticsearch."),
OPT_STRING(0, "es-index", &common_es_index, "Elasticsearch index name. DEFAULT=sist2"), OPT_STRING(0, "es-index", &common_es_index, "Elasticsearch index name. DEFAULT=sist2"),
OPT_STRING(0, "bind", &web_args->listen_address, "Listen on this address. DEFAULT=localhost:4090"), OPT_STRING(0, "bind", &web_args->listen_address, "Listen on this address. DEFAULT=localhost:4090"),
OPT_STRING(0, "auth", &web_args->credentials, "Basic auth in user:password format"), OPT_STRING(0, "auth", &web_args->credentials, "Basic auth in user:password format"),
@@ -704,6 +715,7 @@ int main(int argc, const char *argv[]) {
OPT_GROUP("Exec-script options"), OPT_GROUP("Exec-script options"),
OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url. DEFAULT=http://localhost:9200"), OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url. DEFAULT=http://localhost:9200"),
OPT_BOOLEAN(0, "es-insecure-ssl", &common_es_insecure_ssl, "Do not verify SSL connections to Elasticsearch."),
OPT_STRING(0, "es-index", &common_es_index, "Elasticsearch index name. DEFAULT=sist2"), OPT_STRING(0, "es-index", &common_es_index, "Elasticsearch index name. DEFAULT=sist2"),
OPT_STRING(0, "script-file", &common_script_path, "Path to user script."), OPT_STRING(0, "script-file", &common_script_path, "Path to user script."),
OPT_BOOLEAN(0, "async-script", &common_async_script, "Execute user script asynchronously."), OPT_BOOLEAN(0, "async-script", &common_async_script, "Execute user script asynchronously."),
@@ -733,6 +745,10 @@ int main(int argc, const char *argv[]) {
index_args->es_index = common_es_index; index_args->es_index = common_es_index;
exec_args->es_index = common_es_index; exec_args->es_index = common_es_index;
web_args->es_insecure_ssl = common_es_insecure_ssl;
index_args->es_insecure_ssl = common_es_insecure_ssl;
exec_args->es_insecure_ssl = common_es_insecure_ssl;
index_args->script_path = common_script_path; index_args->script_path = common_script_path;
exec_args->script_path = common_script_path; exec_args->script_path = common_script_path;
index_args->threads = common_threads; index_args->threads = common_threads;
@@ -776,9 +792,8 @@ int main(int argc, const char *argv[]) {
sist2_exec_script(exec_args); sist2_exec_script(exec_args);
} else { } else {
fprintf(stderr, "Invalid command: '%s'\n", argv[0]);
argparse_usage(&argparse); argparse_usage(&argparse);
goto end; LOG_FATALF("main.c", "Invalid command: '%s'\n", argv[0])
} }
printf("\n"); printf("\n");

View File

@@ -5,6 +5,7 @@
#include "mime.h" #include "mime.h"
#include "src/io/serialize.h" #include "src/io/serialize.h"
#include "src/parsing/sidecar.h" #include "src/parsing/sidecar.h"
#include "src/magic_generated.c"
#include <magic.h> #include <magic.h>
@@ -143,7 +144,15 @@ void parse(void *arg) {
} }
magic_t magic = magic_open(MAGIC_MIME_TYPE); magic_t magic = magic_open(MAGIC_MIME_TYPE);
magic_load(magic, NULL);
const char *magic_buffers[1] = {magic_database_buffer,};
size_t sizes[1] = {sizeof(magic_database_buffer),};
int load_ret = magic_load_buffers(magic, (void **) &magic_buffers, sizes, 1);
if (load_ret != 0) {
LOG_FATALF("parse.c", "Could not load libmagic database: (%d)", load_ret)
}
const char *magic_mime_str = magic_buffer(magic, buf, bytes_read); const char *magic_mime_str = magic_buffer(magic, buf, bytes_read);
if (magic_mime_str != NULL) { if (magic_mime_str != NULL) {

View File

@@ -27,10 +27,6 @@
#define UNUSED(x) __attribute__((__unused__)) x #define UNUSED(x) __attribute__((__unused__)) x
#define MD5_STR_LENGTH 33
#define SHA1_STR_LENGTH 41
#define SHA1_DIGEST_LENGTH 20
#include "util.h" #include "util.h"
#include "log.h" #include "log.h"
#include "types.h" #include "types.h"
@@ -53,7 +49,7 @@
#include <ctype.h> #include <ctype.h>
#include "git_hash.h" #include "git_hash.h"
#define VERSION "2.12.0" #define VERSION "2.12.1"
static const char *const Version = VERSION; static const char *const Version = VERSION;
#ifndef SIST_PLATFORM #ifndef SIST_PLATFORM

View File

@@ -212,7 +212,7 @@ void search(struct mg_connection *nc, struct mg_http_message *hm) {
if (hm->body.len == 0) { if (hm->body.len == 0) {
LOG_DEBUG("serve.c", "Client sent empty body, ignoring request") LOG_DEBUG("serve.c", "Client sent empty body, ignoring request")
mg_http_reply(nc, 500, HTTP_SERVER_HEADER HTTP_TEXT_TYPE_HEADER, "Invalid request"); mg_http_reply(nc, 400, HTTP_SERVER_HEADER HTTP_TEXT_TYPE_HEADER, "Invalid request");
return; return;
} }
@@ -223,7 +223,7 @@ void search(struct mg_connection *nc, struct mg_http_message *hm) {
char url[4096]; char url[4096];
snprintf(url, 4096, "%s/%s/_search", WebCtx.es_url, WebCtx.es_index); snprintf(url, 4096, "%s/%s/_search", WebCtx.es_url, WebCtx.es_index);
nc->fn_data = web_post_async(url, body); nc->fn_data = web_post_async(url, body, WebCtx.es_insecure_ssl);
} }
void serve_file_from_url(cJSON *json, index_t *idx, struct mg_connection *nc) { void serve_file_from_url(cJSON *json, index_t *idx, struct mg_connection *nc) {
@@ -302,7 +302,7 @@ void cache_es_version() {
return; return;
} }
es_version_t *es_version = elastic_get_version(WebCtx.es_url); es_version_t *es_version = elastic_get_version(WebCtx.es_url, WebCtx.es_insecure_ssl);
if (es_version != NULL) { if (es_version != NULL) {
WebCtx.es_version = es_version; WebCtx.es_version = es_version;
is_cached = TRUE; is_cached = TRUE;
@@ -326,7 +326,7 @@ void index_info(struct mg_connection *nc) {
cJSON_AddStringToObject(json, "version", Version); cJSON_AddStringToObject(json, "version", Version);
cJSON_AddStringToObject(json, "esVersion", es_version); cJSON_AddStringToObject(json, "esVersion", es_version);
cJSON_AddBoolToObject(json, "esVersionSupported", IS_SUPPORTED_ES_VERSION(WebCtx.es_version)); cJSON_AddBoolToObject(json, "esVersionSupported", IS_SUPPORTED_ES_VERSION(WebCtx.es_version));
cJSON_AddBoolToObject(json, "esVersionLegacy", USE_LEGACY_ES_SETTINGS(WebCtx.es_version)); cJSON_AddBoolToObject(json, "esVersionLegacy", IS_LEGACY_VERSION(WebCtx.es_version));
cJSON_AddStringToObject(json, "platform", QUOTE(SIST_PLATFORM)); cJSON_AddStringToObject(json, "platform", QUOTE(SIST_PLATFORM));
cJSON_AddStringToObject(json, "sist2Hash", Sist2CommitHash); cJSON_AddStringToObject(json, "sist2Hash", Sist2CommitHash);
cJSON_AddStringToObject(json, "lang", WebCtx.lang); cJSON_AddStringToObject(json, "lang", WebCtx.lang);
@@ -359,42 +359,6 @@ void index_info(struct mg_connection *nc) {
} }
void document_info(struct mg_connection *nc, struct mg_http_message *hm) {
if (hm->uri.len != SIST_DOC_ID_LEN + 2) {
LOG_DEBUGF("serve.c", "Invalid document_info path: %.*s", (int) hm->uri.len, hm->uri.ptr)
HTTP_REPLY_NOT_FOUND
return;
}
char arg_doc_id[SIST_DOC_ID_LEN];
memcpy(arg_doc_id, hm->uri.ptr + 3, SIST_DOC_ID_LEN);
*(arg_doc_id + SIST_DOC_ID_LEN - 1) = '\0';
cJSON *doc = elastic_get_document(arg_doc_id);
cJSON *source = cJSON_GetObjectItem(doc, "_source");
cJSON *index_id = cJSON_GetObjectItem(source, "index");
if (index_id == NULL) {
cJSON_Delete(doc);
HTTP_REPLY_NOT_FOUND
return;
}
index_t *idx = get_index_by_id(index_id->valuestring);
if (idx == NULL) {
cJSON_Delete(doc);
HTTP_REPLY_NOT_FOUND
return;
}
char *json_str = cJSON_PrintUnformatted(source);
send_response_line(nc, 200, (int) strlen(json_str), "Content-Type: application/json");
mg_send(nc, json_str, (int) strlen(json_str));
free(json_str);
cJSON_Delete(doc);
}
void file(struct mg_connection *nc, struct mg_http_message *hm) { void file(struct mg_connection *nc, struct mg_http_message *hm) {
if (hm->uri.len != SIST_DOC_ID_LEN + 2) { if (hm->uri.len != SIST_DOC_ID_LEN + 2) {
@@ -567,7 +531,7 @@ void tag(struct mg_connection *nc, struct mg_http_message *hm) {
char url[4096]; char url[4096];
snprintf(url, sizeof(url), "%s/%s/_update/%s", WebCtx.es_url, WebCtx.es_index, arg_req->doc_id); snprintf(url, sizeof(url), "%s/%s/_update/%s", WebCtx.es_url, WebCtx.es_index, arg_req->doc_id);
nc->fn_data = web_post_async(url, buf); nc->fn_data = web_post_async(url, buf, WebCtx.es_insecure_ssl);
} else { } else {
cJSON_AddItemToArray(arr, cJSON_CreateString(arg_req->name)); cJSON_AddItemToArray(arr, cJSON_CreateString(arg_req->name));
@@ -587,7 +551,7 @@ void tag(struct mg_connection *nc, struct mg_http_message *hm) {
char url[4096]; char url[4096];
snprintf(url, sizeof(url), "%s/%s/_update/%s", WebCtx.es_url, WebCtx.es_index, arg_req->doc_id); snprintf(url, sizeof(url), "%s/%s/_update/%s", WebCtx.es_url, WebCtx.es_index, arg_req->doc_id);
nc->fn_data = web_post_async(url, buf); nc->fn_data = web_post_async(url, buf, WebCtx.es_insecure_ssl);
} }
char *json_str = cJSON_PrintUnformatted(arr); char *json_str = cJSON_PrintUnformatted(arr);
@@ -653,8 +617,6 @@ static void ev_router(struct mg_connection *nc, int ev, void *ev_data, UNUSED(vo
return; return;
} }
tag(nc, hm); tag(nc, hm);
} else if (mg_http_match_uri(hm, "/d/*")) {
document_info(nc, hm);
} else { } else {
HTTP_REPLY_NOT_FOUND HTTP_REPLY_NOT_FOUND
} }

File diff suppressed because one or more lines are too long

View File

@@ -35,10 +35,20 @@ def sist2_index(files, *args):
path = copy_files(files) path = copy_files(files)
shutil.rmtree("test_i", ignore_errors=True) shutil.rmtree("test_i", ignore_errors=True)
sist2("scan", path, "-o", "test_i", *args) sist2("scan", path, "-o", "test_i", "-t12", *args)
return iter(sist2_index_to_dict("test_i")) return iter(sist2_index_to_dict("test_i"))
def get_lmdb_contents(path):
import lmdb
env = lmdb.open(path)
txn = env.begin(write=False)
return dict((k, v) for k, v in txn.cursor())
def sist2_incremental_index(files, func=None, incremental_index=False, *args): def sist2_incremental_index(files, func=None, incremental_index=False, *args):
path = copy_files(files) path = copy_files(files)
@@ -46,7 +56,7 @@ def sist2_incremental_index(files, func=None, incremental_index=False, *args):
func(path) func(path)
shutil.rmtree("test_i_inc", ignore_errors=True) shutil.rmtree("test_i_inc", ignore_errors=True)
sist2("scan", path, "-o", "test_i_inc", "--incremental", "test_i", *args) sist2("scan", path, "-o", "test_i_inc", "--incremental", "test_i", "-t12", *args)
return iter(sist2_index_to_dict("test_i_inc", incremental_index)) return iter(sist2_index_to_dict("test_i_inc", incremental_index))
@@ -76,9 +86,31 @@ class ScanTest(unittest.TestCase):
pass pass
file_count = sum(1 for _ in sist2_index(TEST_FILES)) file_count = sum(1 for _ in sist2_index(TEST_FILES))
self.assertEqual(sum(1 for _ in sist2_incremental_index(TEST_FILES, remove_files)), file_count - 2) lmdb_full = get_lmdb_contents("test_i/thumbs")
self.assertEqual(sum(1 for _ in sist2_incremental_index(TEST_FILES, add_files, incremental_index=True)), 3)
self.assertEqual(sum(1 for _ in sist2_incremental_index(TEST_FILES, add_files)), file_count + 3) # Remove files
num_files_rm1 = len(list(sist2_incremental_index(TEST_FILES, remove_files)))
lmdb_rm1 = get_lmdb_contents("test_i_inc/thumbs")
self.assertEqual(num_files_rm1, file_count - 2)
self.assertEqual(len(set(lmdb_full.keys() - set(lmdb_rm1.keys()))), 2)
# add files (incremental_index=True)
num_files_add_inc = len(list(sist2_incremental_index(TEST_FILES, add_files, incremental_index=True)))
lmdb_add_inc = get_lmdb_contents("test_i_inc/thumbs")
self.assertEqual(num_files_add_inc, 3)
self.assertEqual(set(lmdb_full.keys()), set(lmdb_add_inc.keys()))
# add files
num_files_add = len(list(sist2_incremental_index(TEST_FILES, add_files)))
lmdb_add = get_lmdb_contents("test_i_inc/thumbs")
self.assertEqual(num_files_add, file_count + 3)
self.assertEqual(set(lmdb_full.keys()), set(lmdb_add.keys()))
# (No action)
sist2_incremental_index(TEST_FILES)
lmdb_inc = get_lmdb_contents("test_i_inc/thumbs")
self.assertEqual(set(lmdb_full.keys()), set(lmdb_inc.keys()))
if __name__ == "__main__": if __name__ == "__main__":

View File

@@ -7,6 +7,11 @@ option(BUILD_TESTS "Build tests" on)
add_subdirectory(third-party/antiword) add_subdirectory(third-party/antiword)
set(USE_LIBXML2 OFF CACHE BOOL "" FORCE)
set(USE_XMLWRITER OFF CACHE BOOL "" FORCE)
set(BUILD_SHARED_LIBS OFF CACHE BOOL "" FORCE)
add_subdirectory(third-party/libmobi)
add_library( add_library(
scan scan
libscan/util.c libscan/util.h libscan/util.c libscan/util.h
@@ -42,6 +47,22 @@ if (SIST_DEBUG)
-fsanitize=address -fsanitize=address
-fno-inline -fno-inline
) )
elseif (SIST_FAST)
add_compile_definitions(
antiword
NDEBUG
)
target_compile_options(
scan
PRIVATE
-Ofast
-march=native
-fno-stack-protector
-fomit-frame-pointer
-freciprocal-math
)
else() else()
add_compile_definitions( add_compile_definitions(
antiword antiword
@@ -97,35 +118,15 @@ target_compile_options(
-g -g
) )
include(ExternalProject)
find_program(MAKE_EXE NAMES gmake nmake make)
ExternalProject_Add(
libmobi
GIT_REPOSITORY https://github.com/simon987/libmobi.git
GIT_TAG "public"
UPDATE_COMMAND ""
PATCH_COMMAND ""
TEST_COMMAND ""
CONFIGURE_COMMAND ./autogen.sh && ./configure
INSTALL_COMMAND ""
PREFIX "third-party/ext_libmobi"
SOURCE_DIR "third-party/ext_libmobi/src/libmobi"
BINARY_DIR "third-party/ext_libmobi/src/libmobi"
BUILD_COMMAND ${MAKE_EXE} -j 8 --silent
)
SET(MOBI_LIB_DIR ${CMAKE_CURRENT_BINARY_DIR}/third-party/ext_libmobi/src/libmobi/src/.libs/)
SET(MOBI_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/third-party/ext_libmobi/src/libmobi/src/)
if (SIST_DEBUG) if (SIST_DEBUG)
SET(FFMPEG_DEBUG "--enable-debug=3" "--disable-optimizations") SET(FFMPEG_DEBUG "--enable-debug=3" "--disable-optimizations")
else() else()
SET(FFMPEG_DEBUG "") SET(FFMPEG_DEBUG "")
endif() endif()
include(ExternalProject)
find_program(MAKE_EXE NAMES gmake nmake make)
ExternalProject_Add( ExternalProject_Add(
ffmpeg ffmpeg
GIT_REPOSITORY https://git.ffmpeg.org/ffmpeg.git GIT_REPOSITORY https://git.ffmpeg.org/ffmpeg.git
@@ -171,10 +172,10 @@ SET(WPD_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/third-party/ext_libwpd/src/libwp
add_dependencies( add_dependencies(
scan scan
libmobi
ffmpeg ffmpeg
antiword antiword
libwpd libwpd
mobi
) )
target_link_libraries( target_link_libraries(
@@ -192,8 +193,6 @@ target_link_libraries(
${MUPDF_LIB} ${MUPDF_LIB}
openjp2 openjp2
${MOBI_LIB_DIR}/libmobi.a
${WPD_LIB_DIR}/libwpd-0.9.a ${WPD_LIB_DIR}/libwpd-0.9.a
${WPD_LIB_DIR}/libwpd-stream-0.9.a ${WPD_LIB_DIR}/libwpd-stream-0.9.a
@@ -230,6 +229,7 @@ target_link_libraries(
${GUMBO_LIB} ${GUMBO_LIB}
dl dl
antiword antiword
mobi
unofficial::pcre::pcre unofficial::pcre::pcre16 unofficial::pcre::pcre32 unofficial::pcre::pcrecpp unofficial::pcre::pcre unofficial::pcre::pcre16 unofficial::pcre::pcre32 unofficial::pcre::pcrecpp
) )

View File

@@ -4,7 +4,12 @@
#define MIN_SIZE 32 #define MIN_SIZE 32
#define AVIO_BUF_SIZE 8192 #define AVIO_BUF_SIZE 8192
#define IS_VIDEO(fmt) ((fmt)->iformat->name && strcmp((fmt)->iformat->name, "image2") != 0) #define IS_VIDEO(fmt) ( \
(fmt)->iformat->name && strcmp((fmt)->iformat->name, "image2") != 0 \
&& strcmp((fmt)->iformat->name, "jpeg_pipe") != 0 \
&& strcmp((fmt)->iformat->name, "webp_pipe") != 0 \
&& strcmp((fmt)->iformat->name, "png_pipe") != 0 \
)
#define STORE_AS_IS ((void*)-1) #define STORE_AS_IS ((void*)-1)
@@ -279,18 +284,22 @@ static void
append_video_meta(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx, AVFrame *frame, document_t *doc, int is_video) { append_video_meta(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx, AVFrame *frame, document_t *doc, int is_video) {
if (is_video) { if (is_video) {
meta_line_t *meta_duration = malloc(sizeof(meta_line_t)); if (pFormatCtx->duration / AV_TIME_BASE != 0) {
meta_duration->key = MetaMediaDuration; meta_line_t *meta_duration = malloc(sizeof(meta_line_t));
meta_duration->long_val = pFormatCtx->duration / AV_TIME_BASE; meta_duration->key = MetaMediaDuration;
if (meta_duration->long_val > INT32_MAX) { meta_duration->long_val = pFormatCtx->duration / AV_TIME_BASE;
meta_duration->long_val = 0; if (meta_duration->long_val > INT32_MAX) {
meta_duration->long_val = 0;
}
APPEND_META(doc, meta_duration)
} }
APPEND_META(doc, meta_duration)
meta_line_t *meta_bitrate = malloc(sizeof(meta_line_t)); if (pFormatCtx->bit_rate != 0) {
meta_bitrate->key = MetaMediaBitrate; meta_line_t *meta_bitrate = malloc(sizeof(meta_line_t));
meta_bitrate->long_val = pFormatCtx->bit_rate; meta_bitrate->key = MetaMediaBitrate;
APPEND_META(doc, meta_bitrate) meta_bitrate->long_val = pFormatCtx->bit_rate;
APPEND_META(doc, meta_bitrate)
}
} }
AVDictionaryEntry *tag = NULL; AVDictionaryEntry *tag = NULL;
@@ -577,7 +586,8 @@ void parse_media_format_ctx(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx,
int video_duration_in_seconds = (int) (pFormatCtx->duration / AV_TIME_BASE); int video_duration_in_seconds = (int) (pFormatCtx->duration / AV_TIME_BASE);
int thumbnails_to_generate = (IS_VIDEO(pFormatCtx) && stream->codecpar->codec_id != AV_CODEC_ID_GIF && video_duration_in_seconds >= 15) int thumbnails_to_generate = (IS_VIDEO(pFormatCtx) && stream->codecpar->codec_id != AV_CODEC_ID_GIF &&
video_duration_in_seconds >= 15)
// Limit to ~1 thumbnail every 7s // Limit to ~1 thumbnail every 7s
? MAX(MIN(ctx->tn_count, video_duration_in_seconds / 7 + 1), 1) + 1 ? MAX(MIN(ctx->tn_count, video_duration_in_seconds / 7 + 1), 1) + 1
: 1; : 1;

View File

@@ -1,6 +1,6 @@
#include "scan_mobi.h" #include "scan_mobi.h"
#include <mobi.h> #include "../../third-party/libmobi/src/mobi.h"
#include <errno.h> #include <errno.h>
#include "stdlib.h" #include "stdlib.h"

View File

@@ -48,7 +48,6 @@ typedef int scan_code_t;
#define CTX_LOG_FATALF(filepath, fmt, ...) ctx->logf(filepath, LEVEL_FATAL, fmt, __VA_ARGS__); exit(-1); #define CTX_LOG_FATALF(filepath, fmt, ...) ctx->logf(filepath, LEVEL_FATAL, fmt, __VA_ARGS__); exit(-1);
#define CTX_LOG_FATAL(filepath, str) ctx->log(filepath, LEVEL_FATAL, str); exit(-1); #define CTX_LOG_FATAL(filepath, str) ctx->log(filepath, LEVEL_FATAL, str); exit(-1);
#define MD5_STR_LENGTH 33
#define SIST_DOC_ID_LEN MD5_STR_LENGTH #define SIST_DOC_ID_LEN MD5_STR_LENGTH
#define SIST_INDEX_ID_LEN MD5_STR_LENGTH #define SIST_INDEX_ID_LEN MD5_STR_LENGTH

View File

@@ -923,7 +923,6 @@ TEST(Msdoc, Test1Pdf) {
ASSERT_TRUE(strstr(get_meta(&doc, MetaContent)->str_val, "October 2000") != nullptr); ASSERT_TRUE(strstr(get_meta(&doc, MetaContent)->str_val, "October 2000") != nullptr);
ASSERT_STREQ(get_meta(&doc, MetaTitle)->str_val, "INTERNATIONAL ORGANIZATION FOR STANDARDIZATION"); ASSERT_STREQ(get_meta(&doc, MetaTitle)->str_val, "INTERNATIONAL ORGANIZATION FOR STANDARDIZATION");
ASSERT_STREQ(get_meta(&doc, MetaAuthor)->str_val, "Oliver Morgan"); ASSERT_STREQ(get_meta(&doc, MetaAuthor)->str_val, "Oliver Morgan");
ASSERT_EQ(get_meta(&doc, MetaPages)->long_val, 57);
ASSERT_NEAR(strlen(get_meta(&doc, MetaContent)->str_val), msdoc_ctx.content_size, 4); ASSERT_NEAR(strlen(get_meta(&doc, MetaContent)->str_val), msdoc_ctx.content_size, 4);
ASSERT_NE(size_before, store_size); ASSERT_NE(size_before, store_size);
@@ -1030,6 +1029,23 @@ TEST(Msdoc, TestUtf8Text) {
cleanup(&doc, &f); cleanup(&doc, &f);
} }
TEST(Msdoc, Test5Pdf) {
vfile_t f;
document_t doc;
load_doc_file("libscan-test-files/test_files/msdoc/test5.doc", &f, &doc);
size_t size_before = store_size;
parse_msdoc(&msdoc_ctx, &f, &doc);
ASSERT_TRUE(strstr(get_meta(&doc, MetaContent)->str_val, "орган Федеральной") != nullptr);
ASSERT_STREQ(get_meta(&doc, MetaAuthor)->str_val, "uswo");
ASSERT_NEAR(strlen(get_meta(&doc, MetaContent)->str_val), msdoc_ctx.content_size, 4);
ASSERT_NE(size_before, store_size);
cleanup(&doc, &f);
}
TEST(Msdoc, TestFuzz1) { TEST(Msdoc, TestFuzz1) {
vfile_t f; vfile_t f;
document_t doc; document_t doc;
@@ -1189,4 +1205,7 @@ int main(int argc, char **argv) {
av_log_set_level(AV_LOG_QUIET); av_log_set_level(AV_LOG_QUIET);
::testing::InitGoogleTest(&argc, argv); ::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS(); return RUN_ALL_TESTS();
} }
// 0x6130000d2580
// "/mnt/Hatchery/m ain/downloads/qbittorrent/downloads/Roskomnadzor/УПРАВЛЕНИЕ РОСКОМНАДЗОРА по РБ.zip#/УПРАВЛЕНИЕ РОСКОМНАДЗОРА по РБ/Лопатин Ю.М/Секнин/2015 год/Обучение по ", <incomplete sequence \320>...