Compare commits

...

32 Commits

Author SHA1 Message Date
4e1109c528 Merge pull request #288 from simon987/dev
v2.12.1
2022-04-23 10:30:19 -04:00
f87de89275 Version bump 2022-04-23 10:29:50 -04:00
1205981a11 CURL error handling, fix ES version handling, support for ES8, add --es-insecure-ssl argument 2022-04-23 10:29:31 -04:00
09613eaaf9 import magic database as a blob as last resort to make it work 2022-04-18 12:55:22 -04:00
a74726be55 Merge pull request #285 from simon987/dependabot/npm_and_yarn/sist2-vue/async-2.6.4
Bump async from 2.6.3 to 2.6.4 in /sist2-vue
2022-04-17 13:42:40 -04:00
dependabot[bot]
cb228052d2 Bump async from 2.6.3 to 2.6.4 in /sist2-vue
Bumps [async](https://github.com/caolan/async) from 2.6.3 to 2.6.4.
- [Release notes](https://github.com/caolan/async/releases)
- [Changelog](https://github.com/caolan/async/blob/v2.6.4/CHANGELOG.md)
- [Commits](https://github.com/caolan/async/compare/v2.6.3...v2.6.4)

---
updated-dependencies:
- dependency-name: async
  dependency-type: indirect
...

Signed-off-by: dependabot[bot] <support@github.com>
2022-04-17 17:41:14 +00:00
fe56da95d5 Merge pull request #284 from simon987/dev
v2.12.0
2022-04-17 13:38:42 -04:00
9f2ad58f78 bump version 2022-04-17 12:30:14 -04:00
84d9bf4323 Fix cmake libmobi build maybe 2022-04-17 12:23:45 -04:00
90aa90f3f3 Update antiword 2022-04-17 11:47:33 -04:00
3fad07360c Merge pull request #283 from simon987/dependabot/npm_and_yarn/sist2-vue/minimist-1.2.6
Bump minimist from 1.2.5 to 1.2.6 in /sist2-vue
2022-04-17 10:12:10 -04:00
dependabot[bot]
00c3a640d0 Bump minimist from 1.2.5 to 1.2.6 in /sist2-vue
Bumps [minimist](https://github.com/substack/minimist) from 1.2.5 to 1.2.6.
- [Release notes](https://github.com/substack/minimist/releases)
- [Commits](https://github.com/substack/minimist/compare/1.2.5...1.2.6)

---
updated-dependencies:
- dependency-name: minimist
  dependency-type: indirect
...

Signed-off-by: dependabot[bot] <support@github.com>
2022-04-17 12:53:12 +00:00
730e495bde Enable highlight in document info modal, remove /d/ endpoint 2022-04-16 16:11:17 -04:00
54df1dfcf7 Fix spacebar not working in search bar 2022-04-16 13:51:36 -04:00
a75675ecea Fix thumbnail copy bug, update tests 2022-04-16 11:48:43 -04:00
901035da15 Build libmobi with cmake, update to 0.10 2022-04-15 16:01:40 -04:00
ceb7265639 Fix max_analyzed_offset (again?) 2022-04-15 15:35:39 -04:00
036ed9ea1e Update libmagic cmake things 2022-04-15 15:35:20 -04:00
779303a2f7 Print body response when task id cannot be read 2022-04-14 16:24:56 -04:00
23aee14c07 Fix exec-script & fix memory leak in exec_args_validate 2022-04-14 15:43:24 -04:00
50b9201be3 Merge pull request #279 from simon987/dependabot/npm_and_yarn/sist2-vue/minimist-1.2.6
Bump minimist from 1.2.5 to 1.2.6 in /sist2-vue
2022-04-05 20:12:03 -04:00
dependabot[bot]
14cfb15661 Bump minimist from 1.2.5 to 1.2.6 in /sist2-vue
Bumps [minimist](https://github.com/substack/minimist) from 1.2.5 to 1.2.6.
- [Release notes](https://github.com/substack/minimist/releases)
- [Commits](https://github.com/substack/minimist/compare/1.2.5...1.2.6)

---
updated-dependencies:
- dependency-name: minimist
  dependency-type: indirect
...

Signed-off-by: dependabot[bot] <support@github.com>
2022-03-31 23:28:25 +00:00
125c85d9bb localize tag filter bar 2022-03-18 09:15:07 -04:00
474eb95aff Update antiword 2022-03-17 15:08:55 -04:00
acf7453057 Add test for large msdoc 2022-03-17 15:05:48 -04:00
9a949d2694 Use TRUE rather than 1 2022-03-17 09:13:19 -04:00
dbdc75dcb8 Add filter bar in tag picker 2022-03-17 09:12:43 -04:00
c575fca91d Do not store duration or bitrate when the value is 0 or for images 2022-03-05 21:24:59 -05:00
0bf4244683 Do blank search on page reload when media tab auto-reload is disabled 2022-03-05 20:56:02 -05:00
eea5ce75f3 Fix query args updating outside of the search page 2022-03-05 20:42:13 -05:00
9b81856353 Fix some errors in keyboard handler 2022-03-05 20:33:45 -05:00
a10d6952ba Fix segfault in print_errors() 2022-03-05 20:33:21 -05:00
45 changed files with 537 additions and 298 deletions

3
.gitmodules vendored
View File

@@ -7,3 +7,6 @@
[submodule "third-party/libscan/third-party/antiword"]
path = third-party/libscan/third-party/antiword
url = https://github.com/simon987/antiword
[submodule "third-party/libscan/third-party/libmobi"]
path = third-party/libscan/third-party/libmobi
url = https://github.com/bfabiszewski/libmobi

View File

@@ -4,6 +4,7 @@ set(CMAKE_C_STANDARD 11)
project(sist2 C)
option(SIST_DEBUG "Build a debug executable" on)
option(SIST_FAST "Enable more optimisation flags" off)
option(SIST_FAKE_STORE "Disable IO operations of LMDB stores for debugging purposes" 0)
add_compile_definitions(
@@ -54,6 +55,10 @@ find_package(lmdb CONFIG REQUIRED)
find_package(cJSON CONFIG REQUIRED)
find_package(unofficial-mongoose CONFIG REQUIRED)
find_package(CURL CONFIG REQUIRED)
find_library(MAGIC_LIB
NAMES libmagic.so.1 magic
PATHS /usr/lib/x86_64-linux-gnu/ /usr/lib/aarch64-linux-gnu/
)
target_include_directories(
@@ -93,16 +98,25 @@ if (SIST_DEBUG)
PROPERTIES
OUTPUT_NAME sist2_debug
)
elseif (SIST_FAST)
target_compile_options(
sist2
PRIVATE
-Ofast
-march=native
-fno-stack-protector
-fomit-frame-pointer
-freciprocal-math
)
else ()
target_compile_options(
sist2
PRIVATE
-Ofast
#-march=native
-fno-stack-protector
-fomit-frame-pointer
#-freciprocal-math
)
endif ()
@@ -124,13 +138,12 @@ target_link_libraries(
CURL::libcurl
pthread
#magic
c
scan
/usr/lib/x86_64-linux-gnu/libmagic.so.1
${MAGIC_LIB}
)
add_custom_target(

View File

@@ -52,7 +52,7 @@ sist2 (Simple incremental search tool)
Select the file corresponding to your CPU architecture and mark the binary as executable with `chmod +x` *
2. *(or)* Download a [development snapshot](https://files.simon987.net/.gate/sist2/simon987_sist2/) *(Not
recommended!)*
3. *(or)* `docker pull simon987/sist2:2.11.7-x64-linux`
3. *(or)* `docker pull simon987/sist2:2.12.1-x64-linux`
1. See [Usage guide](docs/USAGE.md)

View File

@@ -292,7 +292,7 @@ Both the `root` and `rewrite_url` fields are safe to manually modify from the
# Elasticsearch
Elasticsearch versions >=6.8.0, <8.0.0 are supported by sist2.
Elasticsearch versions >=6.8.0, 7.X.X and 8.X.X are supported by sist2.
Using a version >=7.14.0 is recommended to enable the following features:

View File

@@ -3,7 +3,7 @@
"refresh_interval": "30s",
"codec": "best_compression",
"number_of_replicas": 0,
"highlight.max_analyzed_offset": 10000000
"highlight.max_analyzed_offset": 1000000
},
"analysis": {
"tokenizer": {
@@ -16,7 +16,7 @@
"delimiter": "."
},
"my_nGram_tokenizer": {
"type": "nGram",
"type": "ngram",
"min_gram": 3,
"max_gram": 3
}
@@ -55,37 +55,5 @@
]
}
}
},
"mappings": {
"dynamic_templates": [
{
"keyword_fields": {
"match_mapping_type": "string",
"match": "kw_*",
"mapping": {
"type": "keyword"
}
}
},
{
"integer_fields": {
"match_mapping_type": "*",
"match": "int_*",
"mapping": {
"type": "integer"
}
}
},
{
"meta_fields": {
"match_mapping_type": "*",
"match": "mt_*",
"mapping": {
"type": "keyword",
"index": false
}
}
}
]
}
}

View File

@@ -5,5 +5,6 @@ rm -rf index.sist2/
python3 scripts/mime.py > src/parsing/mime_generated.c
python3 scripts/serve_static.py > src/web/static_generated.c
python3 scripts/index_static.py > src/index/static_generated.c
python3 scripts/magic_static.py > src/magic_generated.c
printf "static const char *const Sist2CommitHash = \"%s\";\n" $(git rev-parse HEAD) > src/git_hash.h

8
scripts/magic_static.py Normal file
View File

@@ -0,0 +1,8 @@
try:
with open("/usr/lib/file/magic.mgc", "rb") as f:
data = f.read()
except:
data = bytes([])
print("char magic_database_buffer[%d] = {%s};" % (len(data), ",".join(str(int(b)) for b in data)))

3
scripts/start_dev_es_6.sh Executable file
View File

@@ -0,0 +1,3 @@
docker run --rm -it --name "sist2-dev-es-6"\
-p 9202:9200 -e "discovery.type=single-node" \
-e "ES_JAVA_OPTS=-Xms8g -Xmx8g" elasticsearch:6.8.0

3
scripts/start_dev_es_8.sh Executable file
View File

@@ -0,0 +1,3 @@
docker run --rm -it --name "sist2-dev-es"\
-p 9200:9200 -p 9300:9300 -e "discovery.type=single-node" \
-e "ES_JAVA_OPTS=-Xms8g -Xmx8g" elasticsearch:8.1.2

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@@ -3288,9 +3288,9 @@
}
},
"node_modules/async": {
"version": "2.6.3",
"resolved": "https://registry.npmjs.org/async/-/async-2.6.3.tgz",
"integrity": "sha512-zflvls11DCy+dQWzTW2dzuilv8Z5X/pjfmZOWba6TNIVDm+2UDaJmXSOXlasHKfNBs8oo3M0aT50fDEWfKZjXg==",
"version": "2.6.4",
"resolved": "https://registry.npmjs.org/async/-/async-2.6.4.tgz",
"integrity": "sha512-mzo5dfJYwAn29PeiJ0zvwTo04zj8HDJj0Mn8TD7sno7q12prdbnasKJHhkm2c1LgrhlJ0teaea8860oxi51mGA==",
"dev": true,
"dependencies": {
"lodash": "^4.17.14"
@@ -9736,9 +9736,9 @@
}
},
"node_modules/minimist": {
"version": "1.2.5",
"resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.5.tgz",
"integrity": "sha512-FM9nNUYrRBAELZQT3xeZQ7fmMOBg6nWNmJKTcgsJeaLstP/UODVpGsr5OhXhhXg6f+qtJ8uiZ+PUxkDWcgIXLw==",
"version": "1.2.6",
"resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.6.tgz",
"integrity": "sha512-Jsjnk4bw3YJqYzbdyBiNsPWHPfO++UGG749Cxs6peCu5Xg4nrena6OVxOYxrQTqww0Jmwt+Ref8rggumkTLz9Q==",
"dev": true
},
"node_modules/minipass": {
@@ -17937,9 +17937,9 @@
"dev": true
},
"async": {
"version": "2.6.3",
"resolved": "https://registry.npmjs.org/async/-/async-2.6.3.tgz",
"integrity": "sha512-zflvls11DCy+dQWzTW2dzuilv8Z5X/pjfmZOWba6TNIVDm+2UDaJmXSOXlasHKfNBs8oo3M0aT50fDEWfKZjXg==",
"version": "2.6.4",
"resolved": "https://registry.npmjs.org/async/-/async-2.6.4.tgz",
"integrity": "sha512-mzo5dfJYwAn29PeiJ0zvwTo04zj8HDJj0Mn8TD7sno7q12prdbnasKJHhkm2c1LgrhlJ0teaea8860oxi51mGA==",
"dev": true,
"requires": {
"lodash": "^4.17.14"
@@ -23324,9 +23324,9 @@
}
},
"minimist": {
"version": "1.2.5",
"resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.5.tgz",
"integrity": "sha512-FM9nNUYrRBAELZQT3xeZQ7fmMOBg6nWNmJKTcgsJeaLstP/UODVpGsr5OhXhhXg6f+qtJ8uiZ+PUxkDWcgIXLw==",
"version": "1.2.6",
"resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.6.tgz",
"integrity": "sha512-Jsjnk4bw3YJqYzbdyBiNsPWHPfO++UGG749Cxs6peCu5Xg4nrena6OVxOYxrQTqww0Jmwt+Ref8rggumkTLz9Q==",
"dev": true
},
"minipass": {

View File

@@ -336,10 +336,6 @@ class Sist2Api {
};
}
getDocInfo(docId: string) {
return axios.get(`${this.baseUrl}d/${docId}`);
}
getTags() {
return this.esQuery({
aggs: {

View File

@@ -69,7 +69,7 @@ interface SortMode {
class Sist2Query {
searchQuery(): any {
searchQuery(blankSearch: boolean = false): any {
const getters = store.getters;
@@ -93,22 +93,6 @@ class Sist2Query {
{terms: {index: selectedIndexIds}}
] as any[];
if (sizeMin && sizeMax) {
filters.push({range: {size: {gte: sizeMin, lte: sizeMax}}})
} else if (sizeMin) {
filters.push({range: {size: {gte: sizeMin}}})
} else if (sizeMax) {
filters.push({range: {size: {lte: sizeMax}}})
}
if (dateMin && dateMax) {
filters.push({range: {mtime: {gte: dateMin, lte: dateMax}}})
} else if (dateMin) {
filters.push({range: {mtime: {gte: dateMin}}})
} else if (dateMax) {
filters.push({range: {mtime: {lte: dateMax}}})
}
const fields = [
"name^8",
"content^3",
@@ -128,20 +112,39 @@ class Sist2Query {
fields.push("name.nGram^3");
}
const path = pathText.replace(/\/$/, "").toLowerCase(); //remove trailing slashes
if (path !== "") {
filters.push({term: {path: path}})
}
if (!blankSearch) {
if (sizeMin && sizeMax) {
filters.push({range: {size: {gte: sizeMin, lte: sizeMax}}})
} else if (sizeMin) {
filters.push({range: {size: {gte: sizeMin}}})
} else if (sizeMax) {
filters.push({range: {size: {lte: sizeMax}}})
}
if (selectedMimeTypes.length > 0) {
filters.push({terms: {"mime": selectedMimeTypes}});
}
if (dateMin && dateMax) {
filters.push({range: {mtime: {gte: dateMin, lte: dateMax}}})
} else if (dateMin) {
filters.push({range: {mtime: {gte: dateMin}}})
} else if (dateMax) {
filters.push({range: {mtime: {lte: dateMax}}})
}
if (selectedTags.length > 0) {
if (getters.optTagOrOperator) {
filters.push({terms: {"tag": selectedTags}});
} else {
selectedTags.forEach((tag: string) => filters.push({term: {"tag": tag}}));
const path = pathText.replace(/\/$/, "").toLowerCase(); //remove trailing slashes
if (path !== "") {
filters.push({term: {path: path}})
}
if (selectedMimeTypes.length > 0) {
filters.push({terms: {"mime": selectedMimeTypes}});
}
if (selectedTags.length > 0) {
if (getters.optTagOrOperator) {
filters.push({terms: {"tag": selectedTags}});
} else {
selectedTags.forEach((tag: string) => filters.push({term: {"tag": tag}}));
}
}
}
@@ -182,7 +185,7 @@ class Sist2Query {
size: size,
} as any;
if (!empty) {
if (!empty && !blankSearch) {
q.query.bool.must = query;
}
@@ -207,7 +210,7 @@ class Sist2Query {
};
if (!legacyES) {
q.highlight.max_analyzed_offset = 9_999_999;
q.highlight.max_analyzed_offset = 999_999;
}
if (getters.optSearchInPath) {
@@ -237,7 +240,7 @@ class Sist2Query {
}
}
if (!empty) {
if (!empty && !blankSearch) {
q.query.function_score.query.bool.must.push(query);
}
}

View File

@@ -1,11 +1,13 @@
<template>
<Preloader v-if="loading"></Preloader>
<div v-else-if="content" class="content-div">{{ content }}</div>
<div v-else-if="content" class="content-div" v-html="content"></div>
</template>
<script>
import Sist2Api from "@/Sist2Api";
import Preloader from "@/components/Preloader";
import Sist2Query from "@/Sist2Query";
import store from "@/store";
export default {
name: "LazyContentDiv",
@@ -18,10 +20,72 @@ export default {
}
},
mounted() {
Sist2Api.getDocInfo(this.docId).then(src => {
this.content = src.data.content;
const query = Sist2Query.searchQuery();
if (this.$store.state.optHighlight) {
const fields = this.$store.state.fuzzy
? {"content.nGram": {}}
: {content: {}};
query.highlight = {
pre_tags: ["<mark>"],
post_tags: ["</mark>"],
number_of_fragments: 0,
fields,
};
if (!store.state.sist2Info.esVersionLegacy) {
query.highlight.max_analyzed_offset = 999_999;
}
}
if ("function_score" in query.query) {
query.query = query.query.function_score.query;
}
if (!("must" in query.query.bool)) {
query.query.bool.must = [];
} else if (!Array.isArray(query.query.bool.must)) {
query.query.bool.must = [query.query.bool.must];
}
query.query.bool.must.push({match: {_id: this.docId}});
delete query["sort"];
delete query["aggs"];
delete query["search_after"];
delete query.query["function_score"];
query._source = {
includes: ["content", "name", "path", "extension"]
}
query.size = 1;
Sist2Api.esQuery(query).then(resp => {
this.loading = false;
})
if (resp.hits.hits.length === 1) {
this.content = this.getContent(resp.hits.hits[0]);
} else {
console.log("FIXME: could not get content")
console.log(resp)
}
});
},
methods: {
getContent(doc) {
if (!doc.highlight) {
return doc._source.content;
}
if (doc.highlight["content.nGram"]) {
return doc.highlight["content.nGram"][0];
}
if (doc.highlight.content) {
return doc.highlight.content[0];
}
}
}
}
</script>

View File

@@ -81,7 +81,9 @@ export default {
methods: {
keyDownListener(e) {
if (this.$refs.lightbox === undefined) {
const isLightboxOpen = this.$refs.lightbox === undefined || this.$refs.lightbox.$el.tagName === undefined;
if (isLightboxOpen) {
return true;
}
@@ -89,7 +91,6 @@ export default {
switch (e.key) {
case " ": {
console.log("SPACE")
e.preventDefault();
e.stopPropagation();
e.stopImmediatePropagation();
@@ -98,16 +99,12 @@ export default {
[...document.getElementsByClassName("fslightbox-absoluted")].forEach(elem => {
if (elem.style.transform === "translate(0px)" || elem.style.transform === "translate(0px, 0px)") {
const vid = elem.getElementsByTagName("video")[0];
console.log(elem)
console.log(vid)
if (vid) {
if (vid.paused) {
vid.play();
console.log("PLAY")
} else {
vid.pause()
console.log("PAUSE")
}
}
}
@@ -119,24 +116,28 @@ export default {
}
case "ArrowUp":
case "k": {
if (!lightboxStore.data.isThumbing) {
if (!lightboxStore.data.isThumbing && lightboxStore.core.thumbsToggler) {
lightboxStore.core.thumbsToggler.toggleThumbs();
}
return false;
}
case "ArrowDown":
case "j": {
if (lightboxStore.data.isThumbing) {
if (lightboxStore.data.isThumbing && lightboxStore.core.thumbsToggler) {
lightboxStore.core.thumbsToggler.toggleThumbs();
}
return false;
}
case "h": {
lightboxStore.core.slideIndexChanger.jumpTo(lightboxStore.core.stageManager.getPreviousSlideIndex());
break;
if (lightboxStore.core.stageManager.getPreviousSlideIndex) {
lightboxStore.core.slideIndexChanger.jumpTo(lightboxStore.core.stageManager.getPreviousSlideIndex());
}
return false;
}
case "l": {
lightboxStore.core.slideIndexChanger.jumpTo(lightboxStore.core.stageManager.getNextSlideIndex());
if (lightboxStore.core.stageManager.getNextSlideIndex) {
lightboxStore.core.slideIndexChanger.jumpTo(lightboxStore.core.stageManager.getNextSlideIndex());
}
return false;
}
}

View File

@@ -1,5 +1,13 @@
<template>
<div id="tagTree"></div>
<div>
<b-input-group v-if="showSearchBar" id="tag-picker-filter-bar">
<b-form-input :value="filter"
:placeholder="$t('tagFilter')"
@input="onFilter($event)"></b-form-input>
</b-input-group>
<div id="tagTree"></div>
</div>
</template>
<script>
@@ -112,10 +120,12 @@ function addTag(map, tag, id, count) {
export default {
name: "TagPicker",
props: ["showSearchBar"],
data() {
return {
tagTree: null,
loadedFromArgs: false,
filter: ""
}
},
mounted() {
@@ -129,6 +139,10 @@ export default {
});
},
methods: {
onFilter(value) {
this.filter = value;
this.tagTree.search(value);
},
initializeTree() {
const tagMap = [];
this.tagTree = new InspireTree({
@@ -163,7 +177,8 @@ export default {
});
},
handleTreeClick(node, e) {
if (e === "indeterminate" || e === "collapsed" || e === 'rendered' || e === "focused") {
if (e === "indeterminate" || e === "collapsed" || e === 'rendered' || e === "focused"
|| e === "matched" || e === "hidden") {
return;
}
@@ -180,7 +195,15 @@ export default {
}
</style>
<style>
.inspire-tree .focused>.wholerow {
.inspire-tree .focused > .wholerow {
border: none;
}
#tag-picker-filter-bar {
padding: 10px 4px 4px;
}
.theme-black .inspire-tree .matched > .wholerow {
background: rgba(251, 191, 41, 0.25);
}
</style>

View File

@@ -16,6 +16,7 @@ export default {
pages: "pages",
mimeTypes: "Media types",
tags: "Tags",
tagFilter: "Filter tags",
help: {
simpleSearch: "Simple search",
advancedSearch: "Advanced search",
@@ -74,6 +75,7 @@ export default {
useDatePicker: "Use a Date Picker component rather than a slider",
vidPreviewInterval: "Video preview frame duration in ms",
simpleLightbox: "Disable animations in image viewer",
showTagPickerFilter: "Display the tag filter bar"
},
queryMode: {
simple: "Simple",
@@ -183,6 +185,7 @@ export default {
pages: "pages",
mimeTypes: "Types de médias",
tags: "Tags",
tagFilter: "Filtrer les tags",
help: {
simpleSearch: "Recherche simple",
advancedSearch: "Recherche avancée",
@@ -242,6 +245,7 @@ export default {
useDatePicker: "Afficher un composant « Date Picker » plutôt qu'un slider",
vidPreviewInterval: "Durée des images d'aperçu video en millisecondes",
simpleLightbox: "Désactiver les animations du visualiseur d'images",
showTagPickerFilter: "Afficher le filtre dans l'onglet Tags"
},
queryMode: {
simple: "Simple",
@@ -352,6 +356,7 @@ export default {
pages: "页",
mimeTypes: "文件类型",
tags: "标签",
tagFilter: "筛选标签",
help: {
simpleSearch: "简易搜索",
advancedSearch: "高级搜索",
@@ -410,6 +415,7 @@ export default {
useDatePicker: "使用日期选择器组件而不是滑块",
vidPreviewInterval: "视频预览帧的持续时间,以毫秒为单位",
simpleLightbox: "在图片查看器中,禁用动画",
showTagPickerFilter: "显示标签过滤栏"
},
queryMode: {
simple: "简单",

View File

@@ -4,6 +4,8 @@ import VueRouter, {Route} from "vue-router";
import {EsHit, EsResult, EsTag, Index, Tag} from "@/Sist2Api";
import {deserializeMimes, serializeMimes} from "@/util";
const CONF_VERSION = 2;
Vue.use(Vuex)
export default new Vuex.Store({
@@ -24,7 +26,6 @@ export default new Vuex.Store({
sortMode: "score",
fuzzy: false,
size: 60,
optLang: "en",
optLangIsDefault: true,
@@ -32,6 +33,7 @@ export default new Vuex.Store({
optTheme: "light",
optDisplay: "grid",
optSize: 60,
optHighlight: true,
optTagOrOperator: false,
optFuzzy: true,
@@ -52,6 +54,7 @@ export default new Vuex.Store({
optUseDatePicker: false,
optVidPreviewInterval: 700,
optSimpleLightbox: true,
optShowTagPickerFilter: true,
_onLoadSelectedIndices: [] as string[],
_onLoadSelectedMimeTypes: [] as string[],
@@ -150,7 +153,7 @@ export default new Vuex.Store({
setOptSuggestPath: (state, val) => state.optSuggestPath = val,
setOptFragmentSize: (state, val) => state.optFragmentSize = val,
setOptQueryMode: (state, val) => state.optQueryMode = val,
setOptResultSize: (state, val) => state.size = val,
setOptResultSize: (state, val) => state.optSize = val,
setOptTagOrOperator: (state, val) => state.optTagOrOperator = val,
setOptTreemapType: (state, val) => state.optTreemapType = val,
@@ -163,6 +166,7 @@ export default new Vuex.Store({
setOptUseDatePicker: (state, val) => state.optUseDatePicker = val,
setOptVidPreviewInterval: (state, val) => state.optVidPreviewInterval = val,
setOptSimpleLightbox: (state, val) => state.optSimpleLightbox = val,
setOptShowTagPickerFilter: (state, val) => state.optShowTagPickerFilter = val,
setOptLightboxLoadOnlyCurrent: (state, val) => state.optLightboxLoadOnlyCurrent = val,
setOptLightboxSlideDuration: (state, val) => state.optLightboxSlideDuration = val,
@@ -241,6 +245,11 @@ export default new Vuex.Store({
}
},
async updateArgs({state}, router: VueRouter) {
if (router.currentRoute.path !== "/") {
return;
}
await router.push({
query: {
q: state.searchText.trim() ? state.searchText.trim().replace(/\s+/g, " ") : undefined,
@@ -269,6 +278,8 @@ export default new Vuex.Store({
}
});
conf["version"] = CONF_VERSION;
localStorage.setItem("sist2_configuration", JSON.stringify(conf));
},
loadConfiguration({state}) {
@@ -276,6 +287,11 @@ export default new Vuex.Store({
if (confString) {
const conf = JSON.parse(confString);
if (!("version" in conf) || conf["version"] != CONF_VERSION) {
localStorage.removeItem("sist2_configuration");
window.location.reload();
}
Object.keys(state).forEach((key) => {
if (key.startsWith("opt")) {
(state as any)[key] = conf[key];
@@ -337,7 +353,7 @@ export default new Vuex.Store({
searchText: state => state.searchText,
pathText: state => state.pathText,
fuzzy: state => state.fuzzy,
size: state => state.size,
size: state => state.optSize,
sortMode: state => state.sortMode,
lastQueryResult: state => state.lastQueryResults,
lastDoc: function (state): EsHit | null {
@@ -375,11 +391,12 @@ export default new Vuex.Store({
optTreemapColor: state => state.optTreemapColor,
optLightboxLoadOnlyCurrent: state => state.optLightboxLoadOnlyCurrent,
optLightboxSlideDuration: state => state.optLightboxSlideDuration,
optResultSize: state => state.size,
optResultSize: state => state.optSize,
optHideLegacy: state => state.optHideLegacy,
optUpdateMimeMap: state => state.optUpdateMimeMap,
optUseDatePicker: state => state.optUseDatePicker,
optVidPreviewInterval: state => state.optVidPreviewInterval,
optSimpleLightbox: state => state.optSimpleLightbox,
optShowTagPickerFilter: state => state.optShowTagPickerFilter,
}
})

View File

@@ -50,6 +50,11 @@
$t("opt.simpleLightbox")
}}
</b-form-checkbox>
<b-form-checkbox :checked="optShowTagPickerFilter" @input="setOptShowTagPickerFilter">{{
$t("opt.showTagPickerFilter")
}}
</b-form-checkbox>
</b-card>
<br/>
@@ -245,6 +250,7 @@ export default {
"optUseDatePicker",
"optVidPreviewInterval",
"optSimpleLightbox",
"optShowTagPickerFilter",
]),
clientWidth() {
return window.innerWidth;
@@ -292,6 +298,7 @@ export default {
"setOptUseDatePicker",
"setOptVidPreviewInterval",
"setOptSimpleLightbox",
"setOptShowTagPickerFilter",
]),
onResetClick() {
localStorage.removeItem("sist2_configuration");

View File

@@ -32,7 +32,7 @@
<MimePicker></MimePicker>
</b-tab>
<b-tab :title="$t('tags')">
<TagPicker></TagPicker>
<TagPicker :show-search-bar="$store.state.optShowTagPickerFilter"></TagPicker>
</b-tab>
</b-tabs>
</b-col>
@@ -139,7 +139,9 @@ export default Vue.extend({
this.setSist2Info(data);
this.setIndices(data.indices);
Sist2Api.getMimeTypes(Sist2Query.searchQuery()).then(({mimeMap}) => {
const doBlankSearch = !this.$store.state.optUpdateMimeMap;
Sist2Api.getMimeTypes(Sist2Query.searchQuery(doBlankSearch)).then(({mimeMap}) => {
this.$store.commit("setUiMimeMap", mimeMap);
this.uiLoading = false;
this.search(true);
@@ -206,7 +208,7 @@ export default Vue.extend({
this.$store.commit("setUiReachedScrollEnd", false);
},
async handleSearch(resp: EsResult) {
if (resp.hits.hits.length == 0) {
if (resp.hits.hits.length == 0 || resp.hits.hits.length < this.$store.state.optSize) {
this.$store.commit("setUiReachedScrollEnd", true);
}
@@ -246,6 +248,8 @@ export default Vue.extend({
this.$store.commit("setLastQueryResult", resp);
this.docs.push(...resp.hits.hits);
resp.hits.hits.forEach(hit => this.docIds.add(hit._id));
},
getDateRange(): Promise<{ min: number, max: number }> {
return sist2.esQuery({

View File

@@ -81,6 +81,11 @@ void web_args_destroy(web_args_t *args) {
}
void exec_args_destroy(exec_args_t *args) {
if (args->index_path != NULL) {
free(args->index_path);
}
free(args);
}
@@ -393,6 +398,7 @@ int index_args_validate(index_args_t *args, int argc, const char **argv) {
LOG_DEBUGF("cli.c", "arg es_url=%s", args->es_url)
LOG_DEBUGF("cli.c", "arg es_index=%s", args->es_index)
LOG_DEBUGF("cli.c", "arg es_insecure_ssl=%d", args->es_insecure_ssl)
LOG_DEBUGF("cli.c", "arg index_path=%s", args->index_path)
LOG_DEBUGF("cli.c", "arg script_path=%s", args->script_path)
LOG_DEBUGF("cli.c", "arg async_script=%d", args->async_script)
@@ -507,6 +513,7 @@ int web_args_validate(web_args_t *args, int argc, const char **argv) {
LOG_DEBUGF("cli.c", "arg es_url=%s", args->es_url)
LOG_DEBUGF("cli.c", "arg es_index=%s", args->es_index)
LOG_DEBUGF("cli.c", "arg es_insecure_ssl=%d", args->es_insecure_ssl)
LOG_DEBUGF("cli.c", "arg tagline=%s", args->tagline)
LOG_DEBUGF("cli.c", "arg dev=%d", args->dev)
LOG_DEBUGF("cli.c", "arg listen=%s", args->listen_address)

View File

@@ -50,6 +50,7 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv);
typedef struct index_args {
char *es_url;
char *es_index;
int es_insecure_ssl;
char *index_path;
const char *script_path;
char *script;
@@ -68,6 +69,7 @@ typedef struct index_args {
typedef struct web_args {
char *es_url;
char *es_index;
int es_insecure_ssl;
char *listen_address;
char *credentials;
char *tag_credentials;
@@ -85,7 +87,8 @@ typedef struct web_args {
typedef struct exec_args {
char *es_url;
char *es_index;
const char *index_path;
int es_insecure_ssl;
char *index_path;
const char *script_path;
int async_script;
char *script;

View File

@@ -79,6 +79,7 @@ typedef struct {
typedef struct {
char *es_url;
int es_insecure_ssl;
es_version_t *es_version;
char *es_index;
int batch_size;
@@ -97,6 +98,7 @@ typedef struct {
char *es_url;
es_version_t *es_version;
char *es_index;
int es_insecure_ssl;
int index_count;
char *auth_user;
char *auth_pass;

View File

@@ -53,7 +53,7 @@ void print_json(cJSON *document, const char id_str[SIST_DOC_ID_LEN]) {
cJSON_AddStringToObject(line, "_id", id_str);
cJSON_AddStringToObject(line, "_index", IndexCtx.es_index);
cJSON_AddStringToObject(line, "_type", "_doc");
// cJSON_AddStringToObject(line, "_type", "_doc");
cJSON_AddItemReferenceToObject(line, "_source", document);
char *json = cJSON_PrintUnformatted(line);
@@ -110,16 +110,16 @@ void execute_update_script(const char *script, int async, const char index_id[SI
cJSON *term_obj = cJSON_AddObjectToObject(query, "term");
cJSON_AddStringToObject(term_obj, "index", index_id);
char *str = cJSON_Print(body);
char *str = cJSON_PrintUnformatted(body);
char bulk_url[4096];
char url[4096];
if (async) {
snprintf(bulk_url, sizeof(bulk_url), "%s/%s/_update_by_query?wait_for_completion=false", Indexer->es_url,
snprintf(url, sizeof(url), "%s/%s/_update_by_query?wait_for_completion=false", Indexer->es_url,
Indexer->es_index);
} else {
snprintf(bulk_url, sizeof(bulk_url), "%s/%s/_update_by_query", Indexer->es_url, Indexer->es_index);
snprintf(url, sizeof(url), "%s/%s/_update_by_query", Indexer->es_url, Indexer->es_index);
}
response_t *r = web_post(bulk_url, str);
response_t *r = web_post(url, str, IndexCtx.es_insecure_ssl);
if (!async) {
LOG_INFOF("elastic.c", "Executed user script <%d>", r->status_code);
}
@@ -139,13 +139,18 @@ void execute_update_script(const char *script, int async, const char index_id[SI
if (async) {
cJSON *task = cJSON_GetObjectItem(resp, "task");
if (task == NULL) {
LOG_FATALF("elastic.c", "FIXME: Could not get task id: %s", r->body);
}
LOG_INFOF("elastic.c", "User script queued: %s/_tasks/%s", Indexer->es_url, task->valuestring);
}
cJSON_Delete(resp);
}
void *create_bulk_buffer(int max, int *count, size_t *buf_len) {
void *create_bulk_buffer(int max, int *count, size_t *buf_len, int legacy) {
es_bulk_line_t *line = Indexer->line_head;
*count = 0;
@@ -166,11 +171,20 @@ void *create_bulk_buffer(int max, int *count, size_t *buf_len) {
while (line != NULL && *count < max) {
char action_str[256];
if (line->type == ES_BULK_LINE_INDEX) {
snprintf(
action_str, sizeof(action_str),
"{\"index\":{\"_id\":\"%s\",\"_type\":\"_doc\",\"_index\":\"%s\"}}\n",
line->doc_id, Indexer->es_index
);
if (legacy) {
snprintf(
action_str, sizeof(action_str),
"{\"index\":{\"_id\":\"%s\",\"_type\":\"_doc\",\"_index\":\"%s\"}}\n",
line->doc_id, Indexer->es_index
);
} else {
snprintf(
action_str, sizeof(action_str),
"{\"index\":{\"_id\":\"%s\",\"_index\":\"%s\"}}\n",
line->doc_id, Indexer->es_index
);
}
size_t action_str_len = strlen(action_str);
size_t line_len = strlen(line->line);
@@ -214,7 +228,13 @@ void print_errors(response_t *r) {
*(tmp + r->size) = '\0';
cJSON *ret_json = cJSON_Parse(tmp);
if (cJSON_GetObjectItem(ret_json, "errors")->valueint != 0) {
cJSON *errors = cJSON_GetObjectItem(ret_json, "errors");
if (errors == NULL) {
char *str = cJSON_Print(ret_json);
LOG_ERRORF("elastic.c", "%s\n", str);
cJSON_free(str);
} else if (errors->valueint != 0) {
cJSON *err;
cJSON_ArrayForEach(err, cJSON_GetObjectItem(ret_json, "items")) {
if (cJSON_GetObjectItem(cJSON_GetObjectItem(err, "index"), "status")->valueint != 201) {
@@ -252,11 +272,11 @@ void _elastic_flush(int max) {
size_t buf_len;
int count;
void *buf = create_bulk_buffer(max, &count, &buf_len);
void *buf = create_bulk_buffer(max, &count, &buf_len, IS_LEGACY_VERSION(IndexCtx.es_version));
char bulk_url[4096];
snprintf(bulk_url, sizeof(bulk_url), "%s/%s/_bulk?pipeline=tie", Indexer->es_url, Indexer->es_index);
response_t *r = web_post(bulk_url, buf);
response_t *r = web_post(bulk_url, buf, IndexCtx.es_insecure_ssl);
if (r->status_code == 0) {
LOG_FATALF("elastic.c", "Could not connect to %s, make sure that elasticsearch is running!\n", IndexCtx.es_url)
@@ -382,7 +402,7 @@ void finish_indexer(char *script, int async_script, char *index_id) {
char url[4096];
snprintf(url, sizeof(url), "%s/%s/_refresh", IndexCtx.es_url, IndexCtx.es_index);
response_t *r = web_post(url, "");
response_t *r = web_post(url, "", IndexCtx.es_insecure_ssl);
LOG_INFOF("elastic.c", "Refresh index <%d>", r->status_code);
free_response(r);
@@ -391,24 +411,24 @@ void finish_indexer(char *script, int async_script, char *index_id) {
free(script);
snprintf(url, sizeof(url), "%s/%s/_refresh", IndexCtx.es_url, IndexCtx.es_index);
r = web_post(url, "");
r = web_post(url, "", IndexCtx.es_insecure_ssl);
LOG_INFOF("elastic.c", "Refresh index <%d>", r->status_code);
free_response(r);
}
snprintf(url, sizeof(url), "%s/%s/_forcemerge", IndexCtx.es_url, IndexCtx.es_index);
r = web_post(url, "");
r = web_post(url, "", IndexCtx.es_insecure_ssl);
LOG_INFOF("elastic.c", "Merge index <%d>", r->status_code);
free_response(r);
snprintf(url, sizeof(url), "%s/%s/_settings", IndexCtx.es_url, IndexCtx.es_index);
r = web_put(url, "{\"index\":{\"refresh_interval\":\"1s\"}}");
r = web_put(url, "{\"index\":{\"refresh_interval\":\"1s\"}}", IndexCtx.es_insecure_ssl);
LOG_INFOF("elastic.c", "Set refresh interval <%d>", r->status_code);
free_response(r);
}
es_version_t *elastic_get_version(const char *es_url) {
response_t *r = web_get(es_url, 30);
es_version_t *elastic_get_version(const char *es_url, int insecure) {
response_t *r = web_get(es_url, 30, insecure);
char *tmp = malloc(r->size + 1);
memcpy(tmp, r->body, r->size);
@@ -453,7 +473,7 @@ es_version_t *elastic_get_version(const char *es_url) {
void elastic_init(int force_reset, const char *user_mappings, const char *user_settings) {
es_version_t *es_version = elastic_get_version(IndexCtx.es_url);
es_version_t *es_version = elastic_get_version(IndexCtx.es_url, IndexCtx.es_insecure_ssl);
IndexCtx.es_version = es_version;
if (es_version == NULL) {
@@ -462,33 +482,33 @@ void elastic_init(int force_reset, const char *user_mappings, const char *user_s
LOG_INFOF("elastic.c",
"Elasticsearch version is %s (supported=%d, legacy=%d)",
format_es_version(es_version), IS_SUPPORTED_ES_VERSION(es_version), USE_LEGACY_ES_SETTINGS(es_version));
format_es_version(es_version), IS_SUPPORTED_ES_VERSION(es_version), IS_LEGACY_VERSION(es_version));
if (!IS_SUPPORTED_ES_VERSION(es_version)) {
LOG_FATAL("elastic.c", "sist2 only supports Elasticsearch v6.8 or newer")
LOG_FATAL("elastic.c", "This elasticsearch version is not supported!")
}
char *settings = NULL;
if (USE_LEGACY_ES_SETTINGS(es_version)) {
settings = settings_json;
} else {
if (IS_LEGACY_VERSION(es_version)) {
settings = settings_legacy_json;
} else {
settings = settings_json;
}
// Check if index exists
char url[4096];
snprintf(url, sizeof(url), "%s/%s", IndexCtx.es_url, IndexCtx.es_index);
response_t *r = web_get(url, 30);
response_t *r = web_get(url, 30, IndexCtx.es_insecure_ssl);
int index_exists = r->status_code == 200;
free_response(r);
if (!index_exists || force_reset) {
r = web_delete(url);
r = web_delete(url, IndexCtx.es_insecure_ssl);
LOG_INFOF("elastic.c", "Delete index <%d>", r->status_code);
free_response(r);
snprintf(url, sizeof(url), "%s/%s", IndexCtx.es_url, IndexCtx.es_index);
r = web_put(url, "");
r = web_put(url, "", IndexCtx.es_insecure_ssl);
if (r->status_code != 200) {
print_error(r);
@@ -499,17 +519,17 @@ void elastic_init(int force_reset, const char *user_mappings, const char *user_s
free_response(r);
snprintf(url, sizeof(url), "%s/%s/_close", IndexCtx.es_url, IndexCtx.es_index);
r = web_post(url, "");
r = web_post(url, "", IndexCtx.es_insecure_ssl);
LOG_INFOF("elastic.c", "Close index <%d>", r->status_code);
free_response(r);
snprintf(url, sizeof(url), "%s/_ingest/pipeline/tie", IndexCtx.es_url);
r = web_put(url, pipeline_json);
r = web_put(url, pipeline_json, IndexCtx.es_insecure_ssl);
LOG_INFOF("elastic.c", "Create pipeline <%d>", r->status_code);
free_response(r);
snprintf(url, sizeof(url), "%s/%s/_settings", IndexCtx.es_url, IndexCtx.es_index);
r = web_put(url, user_settings ? user_settings : settings);
r = web_put(url, user_settings ? user_settings : settings, IndexCtx.es_insecure_ssl);
LOG_INFOF("elastic.c", "Update ES settings <%d>", r->status_code);
if (r->status_code != 200) {
print_error(r);
@@ -517,8 +537,13 @@ void elastic_init(int force_reset, const char *user_mappings, const char *user_s
}
free_response(r);
snprintf(url, sizeof(url), "%s/%s/_mappings/_doc?include_type_name=true", IndexCtx.es_url, IndexCtx.es_index);
r = web_put(url, user_mappings ? user_mappings : mappings_json);
if (IS_LEGACY_VERSION(es_version)) {
snprintf(url, sizeof(url), "%s/%s/_mappings/_doc?include_type_name=true", IndexCtx.es_url, IndexCtx.es_index);
} else {
snprintf(url, sizeof(url), "%s/%s/_mappings", IndexCtx.es_url, IndexCtx.es_index);
}
r = web_put(url, user_mappings ? user_mappings : mappings_json, IndexCtx.es_insecure_ssl);
LOG_INFOF("elastic.c", "Update ES mappings <%d>", r->status_code);
if (r->status_code != 200) {
print_error(r);
@@ -527,7 +552,7 @@ void elastic_init(int force_reset, const char *user_mappings, const char *user_s
free_response(r);
snprintf(url, sizeof(url), "%s/%s/_open", IndexCtx.es_url, IndexCtx.es_index);
r = web_post(url, "");
r = web_post(url, "", IndexCtx.es_insecure_ssl);
LOG_INFOF("elastic.c", "Open index <%d>", r->status_code);
free_response(r);
}
@@ -537,7 +562,7 @@ cJSON *elastic_get_document(const char *id_str) {
char url[4096];
snprintf(url, sizeof(url), "%s/%s/_doc/%s", WebCtx.es_url, WebCtx.es_index, id_str);
response_t *r = web_get(url, 3);
response_t *r = web_get(url, 3, WebCtx.es_insecure_ssl);
cJSON *json = NULL;
if (r->status_code == 200) {
char *tmp = malloc(r->size + 1);
@@ -555,7 +580,7 @@ char *elastic_get_status() {
snprintf(url, sizeof(url),
"%s/_cluster/state/metadata/%s?filter_path=metadata.indices.*.state", WebCtx.es_url, WebCtx.es_index);
response_t *r = web_get(url, 30);
response_t *r = web_get(url, 30, IndexCtx.es_insecure_ssl);
cJSON *json = NULL;
char *status = malloc(128 * sizeof(char));
status[0] = '\0';

View File

@@ -20,8 +20,10 @@ typedef struct {
} es_version_t;
#define VERSION_GE(version, maj, min) ((version)->major > (maj) || ((version)->major == (maj) && (version)->minor >= (min)))
#define IS_SUPPORTED_ES_VERSION(es_version) ((es_version) != NULL && VERSION_GE((es_version), 6, 8))
#define USE_LEGACY_ES_SETTINGS(es_version) ((es_version) != NULL && (!VERSION_GE((es_version), 7, 14)))
#define VERSION_LT(version, maj, min) (!VERSION_GE(version, maj, min))
#define IS_SUPPORTED_ES_VERSION(es_version) ((es_version) != NULL && VERSION_GE((es_version), 6, 8) && VERSION_LT((es_version), 9, 0))
#define IS_LEGACY_VERSION(es_version) ((es_version) != NULL && VERSION_LT((es_version), 7, 14))
__always_inline
static const char *format_es_version(es_version_t *version) {
@@ -57,7 +59,7 @@ cJSON *elastic_get_document(const char *id_str);
char *elastic_get_status();
es_version_t *elastic_get_version(const char *es_url);
es_version_t *elastic_get_version(const char *es_url, int insecure);
void execute_update_script(const char *script, int async, const char index_id[SIST_INDEX_ID_LEN]);

File diff suppressed because one or more lines are too long

View File

@@ -22,7 +22,7 @@ void free_response(response_t *resp) {
free(resp);
}
void web_post_async_poll(subreq_ctx_t* req) {
void web_post_async_poll(subreq_ctx_t *req) {
fd_set fdread;
fd_set fdwrite;
fd_set fdexcep;
@@ -34,7 +34,7 @@ void web_post_async_poll(subreq_ctx_t* req) {
CURLMcode mc = curl_multi_fdset(req->multi, &fdread, &fdwrite, &fdexcep, &maxfd);
if(mc != CURLM_OK) {
if (mc != CURLM_OK) {
req->done = TRUE;
return;
}
@@ -47,7 +47,7 @@ void web_post_async_poll(subreq_ctx_t* req) {
struct timeval timeout = {1, 0};
int rc = select(maxfd + 1, &fdread, &fdwrite, &fdexcep, &timeout);
switch(rc) {
switch (rc) {
case -1:
req->done = TRUE;
break;
@@ -64,6 +64,10 @@ void web_post_async_poll(subreq_ctx_t* req) {
req->response->size = req->response_buf.cur;
curl_easy_getinfo(req->handle, CURLINFO_RESPONSE_CODE, &req->response->status_code);
if (req->response->status_code == 0) {
LOG_ERRORF("web.c", "CURL Error: %s", req->curl_err_buffer)
}
curl_multi_cleanup(req->multi);
curl_easy_cleanup(req->handle);
curl_slist_free_all(req->headers);
@@ -71,7 +75,7 @@ void web_post_async_poll(subreq_ctx_t* req) {
}
}
subreq_ctx_t *web_post_async(const char *url, char *data) {
subreq_ctx_t *web_post_async(const char *url, char *data, int insecure) {
subreq_ctx_t *req = calloc(1, sizeof(subreq_ctx_t));
req->response = calloc(1, sizeof(response_t));
req->data = data;
@@ -84,6 +88,11 @@ subreq_ctx_t *web_post_async(const char *url, char *data) {
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb);
curl_easy_setopt(curl, CURLOPT_POST, 1);
curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
if (insecure) {
curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0);
}
curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, req->curl_err_buffer);
struct curl_slist *headers = NULL;
headers = curl_slist_append(headers, "Content-Type: application/json");
@@ -100,7 +109,7 @@ subreq_ctx_t *web_post_async(const char *url, char *data) {
return req;
}
response_t *web_get(const char *url, int timeout) {
response_t *web_get(const char *url, int timeout, int insecure) {
response_t *resp = malloc(sizeof(response_t));
CURL *curl;
@@ -112,14 +121,24 @@ response_t *web_get(const char *url, int timeout) {
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb);
curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
curl_easy_setopt(curl, CURLOPT_TIMEOUT, timeout);
if (insecure) {
curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0);
}
struct curl_slist *headers = NULL;
headers = curl_slist_append(headers, "Content-Type: application/json");
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
char err_buffer[CURL_ERROR_SIZE + 1] = {};
curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, err_buffer);
curl_easy_perform(curl);
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &resp->status_code);
if (resp->status_code == 0) {
LOG_ERRORF("web.c", "CURL Error: %s", err_buffer)
}
curl_easy_cleanup(curl);
curl_slist_free_all(headers);
@@ -128,7 +147,7 @@ response_t *web_get(const char *url, int timeout) {
return resp;
}
response_t *web_post(const char *url, const char *data) {
response_t *web_post(const char *url, const char *data, int insecure) {
response_t *resp = malloc(sizeof(response_t));
@@ -141,6 +160,12 @@ response_t *web_post(const char *url, const char *data) {
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb);
curl_easy_setopt(curl, CURLOPT_POST, 1);
curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
if (insecure) {
curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0);
}
char err_buffer[CURL_ERROR_SIZE + 1] = {};
curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, err_buffer);
struct curl_slist *headers = NULL;
headers = curl_slist_append(headers, "Content-Type: application/json");
@@ -151,17 +176,21 @@ response_t *web_post(const char *url, const char *data) {
curl_easy_perform(curl);
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &resp->status_code);
curl_easy_cleanup(curl);
curl_slist_free_all(headers);
resp->body = buffer.buf;
resp->size = buffer.cur;
if (resp->status_code == 0) {
LOG_ERRORF("web.c", "CURL Error: %s", err_buffer)
}
curl_easy_cleanup(curl);
curl_slist_free_all(headers);
return resp;
}
response_t *web_put(const char *url, const char *data) {
response_t *web_put(const char *url, const char *data, int insecure) {
response_t *resp = malloc(sizeof(response_t));
@@ -175,7 +204,10 @@ response_t *web_put(const char *url, const char *data) {
curl_easy_setopt(curl, CURLOPT_CUSTOMREQUEST, "PUT");
curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
curl_easy_setopt(curl, CURLOPT_DNS_USE_GLOBAL_CACHE, 0);
curl_easy_setopt(curl, CURLOPT_IPRESOLVE, CURLOPT_DNS_LOCAL_IP4 );
curl_easy_setopt(curl, CURLOPT_IPRESOLVE, CURLOPT_DNS_LOCAL_IP4);
if (insecure) {
curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0);
}
struct curl_slist *headers = NULL;
headers = curl_slist_append(headers, "Content-Type: application/json");
@@ -194,7 +226,7 @@ response_t *web_put(const char *url, const char *data) {
return resp;
}
response_t *web_delete(const char *url) {
response_t *web_delete(const char *url, int insecure) {
response_t *resp = malloc(sizeof(response_t));
@@ -207,6 +239,9 @@ response_t *web_delete(const char *url) {
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb);
curl_easy_setopt(curl, CURLOPT_CUSTOMREQUEST, "DELETE");
curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
if (insecure) {
curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0);
}
curl_easy_setopt(curl, CURLOPT_POSTFIELDS, "");
struct curl_slist *headers = NULL;

View File

@@ -25,14 +25,15 @@ typedef struct {
response_t *response;
int running_handles;
int done;
char curl_err_buffer[CURL_ERROR_SIZE + 1];
} subreq_ctx_t;
response_t *web_get(const char *url, int timeout);
response_t *web_post(const char * url, const char * data);
response_t *web_get(const char *url, int timeout, int insecure);
response_t *web_post(const char * url, const char * data, int insecure);
void web_post_async_poll(subreq_ctx_t* req);
subreq_ctx_t *web_post_async(const char *url, char *data);
response_t *web_put(const char *url, const char *data);
response_t *web_delete(const char *url);
subreq_ctx_t *web_post_async(const char *url, char *data, int insecure);
response_t *web_put(const char *url, const char *data, int insecure);
response_t *web_delete(const char *url, int insecure);
void free_response(response_t *resp);

View File

@@ -505,9 +505,9 @@ void incremental_copy_handle_doc(cJSON *document, UNUSED(const char id_str[SIST_
// Copy tn store contents
size_t buf_len;
char *buf = store_read(IncrementalCopySourceStore, (char *) doc_id, sizeof(doc_id), &buf_len);
char *buf = store_read(IncrementalCopySourceStore, (char *) doc_id, SIST_DOC_ID_LEN, &buf_len);
if (buf_len != 0) {
store_write(IncrementalCopyDestinationStore, (char *) doc_id, sizeof(doc_id), buf, buf_len);
store_write(IncrementalCopyDestinationStore, (char *) doc_id, SIST_DOC_ID_LEN, buf, buf_len);
free(buf);
}
}

View File

@@ -42,13 +42,13 @@ index_descriptor_t read_index_descriptor(char *path);
// caller ensures char file_path[PATH_MAX]
#define READ_INDICES(file_path, index_path, action_ok, action_main_fail, cond_original) \
snprintf(file_path, PATH_MAX, "%s_index_main.ndjson.zst", index_path); \
if (0 == access(file_path, R_OK)) { \
if (access(file_path, R_OK) == 0) { \
action_ok; \
} else { \
action_main_fail; \
} \
snprintf(file_path, PATH_MAX, "%s_index_original.ndjson.zst", index_path); \
if ((cond_original) && (0 == access(file_path, R_OK))) { \
if ((cond_original) && access(file_path, R_OK) == 0) { \
action_ok; \
} \

1
src/magic_generated.c vendored Normal file

File diff suppressed because one or more lines are too long

View File

@@ -38,8 +38,8 @@ static __sighandler_t sigabrt_handler = NULL;
void sig_handler(int signum) {
LogCtx.verbose = 1;
LogCtx.very_verbose = 1;
LogCtx.verbose = TRUE;
LogCtx.very_verbose = TRUE;
LOG_ERROR("*SIGNAL HANDLER*", "=============================================\n\n");
LOG_ERRORF("*SIGNAL HANDLER*", "Uh oh! Caught fatal signal: %s", strsignal(signum));
@@ -103,7 +103,7 @@ void sig_handler(int signum) {
exit(-1);
}
void init_dir(const char *dirpath, scan_args_t* args) {
void init_dir(const char *dirpath, scan_args_t *args) {
char path[PATH_MAX];
snprintf(path, PATH_MAX, "%sdescriptor.json", dirpath);
@@ -112,16 +112,16 @@ void init_dir(const char *dirpath, scan_args_t* args) {
strcpy(ScanCtx.index.desc.type, INDEX_TYPE_NDJSON);
if (args->incremental != NULL) {
// copy old index id
char descriptor_path[PATH_MAX];
snprintf(descriptor_path, PATH_MAX, "%sdescriptor.json", args->incremental);
index_descriptor_t original_desc = read_index_descriptor(descriptor_path);
memcpy(ScanCtx.index.desc.id, original_desc.id, sizeof(original_desc.id));
// copy old index id
char descriptor_path[PATH_MAX];
snprintf(descriptor_path, PATH_MAX, "%sdescriptor.json", args->incremental);
index_descriptor_t original_desc = read_index_descriptor(descriptor_path);
memcpy(ScanCtx.index.desc.id, original_desc.id, sizeof(original_desc.id));
} else {
// generate new index id based on timestamp
unsigned char index_md5[MD5_DIGEST_LENGTH];
MD5((unsigned char *) &ScanCtx.index.desc.timestamp, sizeof(ScanCtx.index.desc.timestamp), index_md5);
buf2hex(index_md5, MD5_DIGEST_LENGTH, ScanCtx.index.desc.id);
// generate new index id based on timestamp
unsigned char index_md5[MD5_DIGEST_LENGTH];
MD5((unsigned char *) &ScanCtx.index.desc.timestamp, sizeof(ScanCtx.index.desc.timestamp), index_md5);
buf2hex(index_md5, MD5_DIGEST_LENGTH, ScanCtx.index.desc.id);
}
write_index_descriptor(path, &ScanCtx.index.desc);
@@ -324,9 +324,13 @@ void load_incremental_index(const scan_args_t *args) {
LOG_FATALF("main.c", "Version mismatch! Index is %s but executable is %s", original_desc.version, Version)
}
READ_INDICES(file_path, args->incremental, incremental_read(ScanCtx.original_table, file_path, &original_desc),
LOG_FATALF("main.c", "Could not open original main index for incremental scan: %s", strerror(errno)),
1);
READ_INDICES(
file_path,
args->incremental,
incremental_read(ScanCtx.original_table, file_path, &original_desc),
LOG_FATALF("main.c", "Could not open original main index for incremental scan: %s", strerror(errno)),
TRUE
);
LOG_INFOF("main.c", "Loaded %d items in to mtime table.", g_hash_table_size(ScanCtx.original_table))
}
@@ -431,8 +435,8 @@ void sist2_scan(scan_args_t *args) {
LOG_DEBUGF("main.c", "Skipped files: %d", ScanCtx.dbg_skipped_files_count)
LOG_DEBUGF("main.c", "Excluded files: %d", ScanCtx.dbg_excluded_files_count)
LOG_DEBUGF("main.c", "Failed files: %d", ScanCtx.dbg_failed_files_count)
LOG_DEBUGF("main.c", "Thumbnail store size: %d", ScanCtx.stat_tn_size)
LOG_DEBUGF("main.c", "Index size: %d", ScanCtx.stat_index_size)
LOG_DEBUGF("main.c", "Thumbnail store size: %lu", ScanCtx.stat_tn_size)
LOG_DEBUGF("main.c", "Index size: %lu", ScanCtx.stat_index_size)
if (args->incremental != NULL) {
save_incremental_index(args);
@@ -449,6 +453,7 @@ void sist2_index(index_args_t *args) {
IndexCtx.es_url = args->es_url;
IndexCtx.es_index = args->es_index;
IndexCtx.es_insecure_ssl = args->es_insecure_ssl;
IndexCtx.batch_size = args->batch_size;
IndexCtx.needs_es_connection = !args->print;
@@ -534,6 +539,8 @@ void sist2_exec_script(exec_args_t *args) {
IndexCtx.es_url = args->es_url;
IndexCtx.es_index = args->es_index;
IndexCtx.es_insecure_ssl = args->es_insecure_ssl;
IndexCtx.needs_es_connection = TRUE;
LOG_DEBUGF("main.c", "descriptor version %s (%s)", desc.version, desc.type)
@@ -545,6 +552,7 @@ void sist2_web(web_args_t *args) {
WebCtx.es_url = args->es_url;
WebCtx.es_index = args->es_index;
WebCtx.es_insecure_ssl = args->es_insecure_ssl;
WebCtx.index_count = args->index_count;
WebCtx.auth_user = args->auth_user;
WebCtx.auth_pass = args->auth_pass;
@@ -615,6 +623,7 @@ int main(int argc, const char *argv[]) {
int arg_version = 0;
char *common_es_url = NULL;
int common_es_insecure_ssl = 0;
char *common_es_index = NULL;
char *common_script_path = NULL;
int common_async_script = 0;
@@ -680,6 +689,7 @@ int main(int argc, const char *argv[]) {
OPT_GROUP("Index options"),
OPT_INTEGER('t', "threads", &common_threads, "Number of threads. DEFAULT=1"),
OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url with port. DEFAULT=http://localhost:9200"),
OPT_BOOLEAN(0, "es-insecure-ssl", &common_es_insecure_ssl, "Do not verify SSL connections to Elasticsearch."),
OPT_STRING(0, "es-index", &common_es_index, "Elasticsearch index name. DEFAULT=sist2"),
OPT_BOOLEAN('p', "print", &index_args->print, "Just print JSON documents to stdout."),
OPT_BOOLEAN(0, "incremental-index", &index_args->incremental,
@@ -694,6 +704,7 @@ int main(int argc, const char *argv[]) {
OPT_GROUP("Web options"),
OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url. DEFAULT=http://localhost:9200"),
OPT_BOOLEAN(0, "es-insecure-ssl", &common_es_insecure_ssl, "Do not verify SSL connections to Elasticsearch."),
OPT_STRING(0, "es-index", &common_es_index, "Elasticsearch index name. DEFAULT=sist2"),
OPT_STRING(0, "bind", &web_args->listen_address, "Listen on this address. DEFAULT=localhost:4090"),
OPT_STRING(0, "auth", &web_args->credentials, "Basic auth in user:password format"),
@@ -704,6 +715,7 @@ int main(int argc, const char *argv[]) {
OPT_GROUP("Exec-script options"),
OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url. DEFAULT=http://localhost:9200"),
OPT_BOOLEAN(0, "es-insecure-ssl", &common_es_insecure_ssl, "Do not verify SSL connections to Elasticsearch."),
OPT_STRING(0, "es-index", &common_es_index, "Elasticsearch index name. DEFAULT=sist2"),
OPT_STRING(0, "script-file", &common_script_path, "Path to user script."),
OPT_BOOLEAN(0, "async-script", &common_async_script, "Execute user script asynchronously."),
@@ -733,6 +745,10 @@ int main(int argc, const char *argv[]) {
index_args->es_index = common_es_index;
exec_args->es_index = common_es_index;
web_args->es_insecure_ssl = common_es_insecure_ssl;
index_args->es_insecure_ssl = common_es_insecure_ssl;
exec_args->es_insecure_ssl = common_es_insecure_ssl;
index_args->script_path = common_script_path;
exec_args->script_path = common_script_path;
index_args->threads = common_threads;
@@ -776,9 +792,8 @@ int main(int argc, const char *argv[]) {
sist2_exec_script(exec_args);
} else {
fprintf(stderr, "Invalid command: '%s'\n", argv[0]);
argparse_usage(&argparse);
goto end;
LOG_FATALF("main.c", "Invalid command: '%s'\n", argv[0])
}
printf("\n");

View File

@@ -5,6 +5,7 @@
#include "mime.h"
#include "src/io/serialize.h"
#include "src/parsing/sidecar.h"
#include "src/magic_generated.c"
#include <magic.h>
@@ -143,7 +144,15 @@ void parse(void *arg) {
}
magic_t magic = magic_open(MAGIC_MIME_TYPE);
magic_load(magic, NULL);
const char *magic_buffers[1] = {magic_database_buffer,};
size_t sizes[1] = {sizeof(magic_database_buffer),};
int load_ret = magic_load_buffers(magic, (void **) &magic_buffers, sizes, 1);
if (load_ret != 0) {
LOG_FATALF("parse.c", "Could not load libmagic database: (%d)", load_ret)
}
const char *magic_mime_str = magic_buffer(magic, buf, bytes_read);
if (magic_mime_str != NULL) {

View File

@@ -27,10 +27,6 @@
#define UNUSED(x) __attribute__((__unused__)) x
#define MD5_STR_LENGTH 33
#define SHA1_STR_LENGTH 41
#define SHA1_DIGEST_LENGTH 20
#include "util.h"
#include "log.h"
#include "types.h"
@@ -53,7 +49,7 @@
#include <ctype.h>
#include "git_hash.h"
#define VERSION "2.12.0"
#define VERSION "2.12.1"
static const char *const Version = VERSION;
#ifndef SIST_PLATFORM

View File

@@ -212,7 +212,7 @@ void search(struct mg_connection *nc, struct mg_http_message *hm) {
if (hm->body.len == 0) {
LOG_DEBUG("serve.c", "Client sent empty body, ignoring request")
mg_http_reply(nc, 500, HTTP_SERVER_HEADER HTTP_TEXT_TYPE_HEADER, "Invalid request");
mg_http_reply(nc, 400, HTTP_SERVER_HEADER HTTP_TEXT_TYPE_HEADER, "Invalid request");
return;
}
@@ -223,7 +223,7 @@ void search(struct mg_connection *nc, struct mg_http_message *hm) {
char url[4096];
snprintf(url, 4096, "%s/%s/_search", WebCtx.es_url, WebCtx.es_index);
nc->fn_data = web_post_async(url, body);
nc->fn_data = web_post_async(url, body, WebCtx.es_insecure_ssl);
}
void serve_file_from_url(cJSON *json, index_t *idx, struct mg_connection *nc) {
@@ -302,7 +302,7 @@ void cache_es_version() {
return;
}
es_version_t *es_version = elastic_get_version(WebCtx.es_url);
es_version_t *es_version = elastic_get_version(WebCtx.es_url, WebCtx.es_insecure_ssl);
if (es_version != NULL) {
WebCtx.es_version = es_version;
is_cached = TRUE;
@@ -326,7 +326,7 @@ void index_info(struct mg_connection *nc) {
cJSON_AddStringToObject(json, "version", Version);
cJSON_AddStringToObject(json, "esVersion", es_version);
cJSON_AddBoolToObject(json, "esVersionSupported", IS_SUPPORTED_ES_VERSION(WebCtx.es_version));
cJSON_AddBoolToObject(json, "esVersionLegacy", USE_LEGACY_ES_SETTINGS(WebCtx.es_version));
cJSON_AddBoolToObject(json, "esVersionLegacy", IS_LEGACY_VERSION(WebCtx.es_version));
cJSON_AddStringToObject(json, "platform", QUOTE(SIST_PLATFORM));
cJSON_AddStringToObject(json, "sist2Hash", Sist2CommitHash);
cJSON_AddStringToObject(json, "lang", WebCtx.lang);
@@ -359,42 +359,6 @@ void index_info(struct mg_connection *nc) {
}
void document_info(struct mg_connection *nc, struct mg_http_message *hm) {
if (hm->uri.len != SIST_DOC_ID_LEN + 2) {
LOG_DEBUGF("serve.c", "Invalid document_info path: %.*s", (int) hm->uri.len, hm->uri.ptr)
HTTP_REPLY_NOT_FOUND
return;
}
char arg_doc_id[SIST_DOC_ID_LEN];
memcpy(arg_doc_id, hm->uri.ptr + 3, SIST_DOC_ID_LEN);
*(arg_doc_id + SIST_DOC_ID_LEN - 1) = '\0';
cJSON *doc = elastic_get_document(arg_doc_id);
cJSON *source = cJSON_GetObjectItem(doc, "_source");
cJSON *index_id = cJSON_GetObjectItem(source, "index");
if (index_id == NULL) {
cJSON_Delete(doc);
HTTP_REPLY_NOT_FOUND
return;
}
index_t *idx = get_index_by_id(index_id->valuestring);
if (idx == NULL) {
cJSON_Delete(doc);
HTTP_REPLY_NOT_FOUND
return;
}
char *json_str = cJSON_PrintUnformatted(source);
send_response_line(nc, 200, (int) strlen(json_str), "Content-Type: application/json");
mg_send(nc, json_str, (int) strlen(json_str));
free(json_str);
cJSON_Delete(doc);
}
void file(struct mg_connection *nc, struct mg_http_message *hm) {
if (hm->uri.len != SIST_DOC_ID_LEN + 2) {
@@ -567,7 +531,7 @@ void tag(struct mg_connection *nc, struct mg_http_message *hm) {
char url[4096];
snprintf(url, sizeof(url), "%s/%s/_update/%s", WebCtx.es_url, WebCtx.es_index, arg_req->doc_id);
nc->fn_data = web_post_async(url, buf);
nc->fn_data = web_post_async(url, buf, WebCtx.es_insecure_ssl);
} else {
cJSON_AddItemToArray(arr, cJSON_CreateString(arg_req->name));
@@ -587,7 +551,7 @@ void tag(struct mg_connection *nc, struct mg_http_message *hm) {
char url[4096];
snprintf(url, sizeof(url), "%s/%s/_update/%s", WebCtx.es_url, WebCtx.es_index, arg_req->doc_id);
nc->fn_data = web_post_async(url, buf);
nc->fn_data = web_post_async(url, buf, WebCtx.es_insecure_ssl);
}
char *json_str = cJSON_PrintUnformatted(arr);
@@ -653,8 +617,6 @@ static void ev_router(struct mg_connection *nc, int ev, void *ev_data, UNUSED(vo
return;
}
tag(nc, hm);
} else if (mg_http_match_uri(hm, "/d/*")) {
document_info(nc, hm);
} else {
HTTP_REPLY_NOT_FOUND
}

File diff suppressed because one or more lines are too long

View File

@@ -35,10 +35,20 @@ def sist2_index(files, *args):
path = copy_files(files)
shutil.rmtree("test_i", ignore_errors=True)
sist2("scan", path, "-o", "test_i", *args)
sist2("scan", path, "-o", "test_i", "-t12", *args)
return iter(sist2_index_to_dict("test_i"))
def get_lmdb_contents(path):
import lmdb
env = lmdb.open(path)
txn = env.begin(write=False)
return dict((k, v) for k, v in txn.cursor())
def sist2_incremental_index(files, func=None, incremental_index=False, *args):
path = copy_files(files)
@@ -46,7 +56,7 @@ def sist2_incremental_index(files, func=None, incremental_index=False, *args):
func(path)
shutil.rmtree("test_i_inc", ignore_errors=True)
sist2("scan", path, "-o", "test_i_inc", "--incremental", "test_i", *args)
sist2("scan", path, "-o", "test_i_inc", "--incremental", "test_i", "-t12", *args)
return iter(sist2_index_to_dict("test_i_inc", incremental_index))
@@ -76,9 +86,31 @@ class ScanTest(unittest.TestCase):
pass
file_count = sum(1 for _ in sist2_index(TEST_FILES))
self.assertEqual(sum(1 for _ in sist2_incremental_index(TEST_FILES, remove_files)), file_count - 2)
self.assertEqual(sum(1 for _ in sist2_incremental_index(TEST_FILES, add_files, incremental_index=True)), 3)
self.assertEqual(sum(1 for _ in sist2_incremental_index(TEST_FILES, add_files)), file_count + 3)
lmdb_full = get_lmdb_contents("test_i/thumbs")
# Remove files
num_files_rm1 = len(list(sist2_incremental_index(TEST_FILES, remove_files)))
lmdb_rm1 = get_lmdb_contents("test_i_inc/thumbs")
self.assertEqual(num_files_rm1, file_count - 2)
self.assertEqual(len(set(lmdb_full.keys() - set(lmdb_rm1.keys()))), 2)
# add files (incremental_index=True)
num_files_add_inc = len(list(sist2_incremental_index(TEST_FILES, add_files, incremental_index=True)))
lmdb_add_inc = get_lmdb_contents("test_i_inc/thumbs")
self.assertEqual(num_files_add_inc, 3)
self.assertEqual(set(lmdb_full.keys()), set(lmdb_add_inc.keys()))
# add files
num_files_add = len(list(sist2_incremental_index(TEST_FILES, add_files)))
lmdb_add = get_lmdb_contents("test_i_inc/thumbs")
self.assertEqual(num_files_add, file_count + 3)
self.assertEqual(set(lmdb_full.keys()), set(lmdb_add.keys()))
# (No action)
sist2_incremental_index(TEST_FILES)
lmdb_inc = get_lmdb_contents("test_i_inc/thumbs")
self.assertEqual(set(lmdb_full.keys()), set(lmdb_inc.keys()))
if __name__ == "__main__":

View File

@@ -7,6 +7,11 @@ option(BUILD_TESTS "Build tests" on)
add_subdirectory(third-party/antiword)
set(USE_LIBXML2 OFF CACHE BOOL "" FORCE)
set(USE_XMLWRITER OFF CACHE BOOL "" FORCE)
set(BUILD_SHARED_LIBS OFF CACHE BOOL "" FORCE)
add_subdirectory(third-party/libmobi)
add_library(
scan
libscan/util.c libscan/util.h
@@ -42,6 +47,22 @@ if (SIST_DEBUG)
-fsanitize=address
-fno-inline
)
elseif (SIST_FAST)
add_compile_definitions(
antiword
NDEBUG
)
target_compile_options(
scan
PRIVATE
-Ofast
-march=native
-fno-stack-protector
-fomit-frame-pointer
-freciprocal-math
)
else()
add_compile_definitions(
antiword
@@ -97,35 +118,15 @@ target_compile_options(
-g
)
include(ExternalProject)
find_program(MAKE_EXE NAMES gmake nmake make)
ExternalProject_Add(
libmobi
GIT_REPOSITORY https://github.com/simon987/libmobi.git
GIT_TAG "public"
UPDATE_COMMAND ""
PATCH_COMMAND ""
TEST_COMMAND ""
CONFIGURE_COMMAND ./autogen.sh && ./configure
INSTALL_COMMAND ""
PREFIX "third-party/ext_libmobi"
SOURCE_DIR "third-party/ext_libmobi/src/libmobi"
BINARY_DIR "third-party/ext_libmobi/src/libmobi"
BUILD_COMMAND ${MAKE_EXE} -j 8 --silent
)
SET(MOBI_LIB_DIR ${CMAKE_CURRENT_BINARY_DIR}/third-party/ext_libmobi/src/libmobi/src/.libs/)
SET(MOBI_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/third-party/ext_libmobi/src/libmobi/src/)
if (SIST_DEBUG)
SET(FFMPEG_DEBUG "--enable-debug=3" "--disable-optimizations")
else()
SET(FFMPEG_DEBUG "")
endif()
include(ExternalProject)
find_program(MAKE_EXE NAMES gmake nmake make)
ExternalProject_Add(
ffmpeg
GIT_REPOSITORY https://git.ffmpeg.org/ffmpeg.git
@@ -171,10 +172,10 @@ SET(WPD_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/third-party/ext_libwpd/src/libwp
add_dependencies(
scan
libmobi
ffmpeg
antiword
libwpd
mobi
)
target_link_libraries(
@@ -192,8 +193,6 @@ target_link_libraries(
${MUPDF_LIB}
openjp2
${MOBI_LIB_DIR}/libmobi.a
${WPD_LIB_DIR}/libwpd-0.9.a
${WPD_LIB_DIR}/libwpd-stream-0.9.a
@@ -230,6 +229,7 @@ target_link_libraries(
${GUMBO_LIB}
dl
antiword
mobi
unofficial::pcre::pcre unofficial::pcre::pcre16 unofficial::pcre::pcre32 unofficial::pcre::pcrecpp
)

View File

@@ -4,7 +4,12 @@
#define MIN_SIZE 32
#define AVIO_BUF_SIZE 8192
#define IS_VIDEO(fmt) ((fmt)->iformat->name && strcmp((fmt)->iformat->name, "image2") != 0)
#define IS_VIDEO(fmt) ( \
(fmt)->iformat->name && strcmp((fmt)->iformat->name, "image2") != 0 \
&& strcmp((fmt)->iformat->name, "jpeg_pipe") != 0 \
&& strcmp((fmt)->iformat->name, "webp_pipe") != 0 \
&& strcmp((fmt)->iformat->name, "png_pipe") != 0 \
)
#define STORE_AS_IS ((void*)-1)
@@ -279,18 +284,22 @@ static void
append_video_meta(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx, AVFrame *frame, document_t *doc, int is_video) {
if (is_video) {
meta_line_t *meta_duration = malloc(sizeof(meta_line_t));
meta_duration->key = MetaMediaDuration;
meta_duration->long_val = pFormatCtx->duration / AV_TIME_BASE;
if (meta_duration->long_val > INT32_MAX) {
meta_duration->long_val = 0;
if (pFormatCtx->duration / AV_TIME_BASE != 0) {
meta_line_t *meta_duration = malloc(sizeof(meta_line_t));
meta_duration->key = MetaMediaDuration;
meta_duration->long_val = pFormatCtx->duration / AV_TIME_BASE;
if (meta_duration->long_val > INT32_MAX) {
meta_duration->long_val = 0;
}
APPEND_META(doc, meta_duration)
}
APPEND_META(doc, meta_duration)
meta_line_t *meta_bitrate = malloc(sizeof(meta_line_t));
meta_bitrate->key = MetaMediaBitrate;
meta_bitrate->long_val = pFormatCtx->bit_rate;
APPEND_META(doc, meta_bitrate)
if (pFormatCtx->bit_rate != 0) {
meta_line_t *meta_bitrate = malloc(sizeof(meta_line_t));
meta_bitrate->key = MetaMediaBitrate;
meta_bitrate->long_val = pFormatCtx->bit_rate;
APPEND_META(doc, meta_bitrate)
}
}
AVDictionaryEntry *tag = NULL;
@@ -577,7 +586,8 @@ void parse_media_format_ctx(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx,
int video_duration_in_seconds = (int) (pFormatCtx->duration / AV_TIME_BASE);
int thumbnails_to_generate = (IS_VIDEO(pFormatCtx) && stream->codecpar->codec_id != AV_CODEC_ID_GIF && video_duration_in_seconds >= 15)
int thumbnails_to_generate = (IS_VIDEO(pFormatCtx) && stream->codecpar->codec_id != AV_CODEC_ID_GIF &&
video_duration_in_seconds >= 15)
// Limit to ~1 thumbnail every 7s
? MAX(MIN(ctx->tn_count, video_duration_in_seconds / 7 + 1), 1) + 1
: 1;

View File

@@ -1,6 +1,6 @@
#include "scan_mobi.h"
#include <mobi.h>
#include "../../third-party/libmobi/src/mobi.h"
#include <errno.h>
#include "stdlib.h"

View File

@@ -48,7 +48,6 @@ typedef int scan_code_t;
#define CTX_LOG_FATALF(filepath, fmt, ...) ctx->logf(filepath, LEVEL_FATAL, fmt, __VA_ARGS__); exit(-1);
#define CTX_LOG_FATAL(filepath, str) ctx->log(filepath, LEVEL_FATAL, str); exit(-1);
#define MD5_STR_LENGTH 33
#define SIST_DOC_ID_LEN MD5_STR_LENGTH
#define SIST_INDEX_ID_LEN MD5_STR_LENGTH

View File

@@ -923,7 +923,6 @@ TEST(Msdoc, Test1Pdf) {
ASSERT_TRUE(strstr(get_meta(&doc, MetaContent)->str_val, "October 2000") != nullptr);
ASSERT_STREQ(get_meta(&doc, MetaTitle)->str_val, "INTERNATIONAL ORGANIZATION FOR STANDARDIZATION");
ASSERT_STREQ(get_meta(&doc, MetaAuthor)->str_val, "Oliver Morgan");
ASSERT_EQ(get_meta(&doc, MetaPages)->long_val, 57);
ASSERT_NEAR(strlen(get_meta(&doc, MetaContent)->str_val), msdoc_ctx.content_size, 4);
ASSERT_NE(size_before, store_size);
@@ -1030,6 +1029,23 @@ TEST(Msdoc, TestUtf8Text) {
cleanup(&doc, &f);
}
TEST(Msdoc, Test5Pdf) {
vfile_t f;
document_t doc;
load_doc_file("libscan-test-files/test_files/msdoc/test5.doc", &f, &doc);
size_t size_before = store_size;
parse_msdoc(&msdoc_ctx, &f, &doc);
ASSERT_TRUE(strstr(get_meta(&doc, MetaContent)->str_val, "орган Федеральной") != nullptr);
ASSERT_STREQ(get_meta(&doc, MetaAuthor)->str_val, "uswo");
ASSERT_NEAR(strlen(get_meta(&doc, MetaContent)->str_val), msdoc_ctx.content_size, 4);
ASSERT_NE(size_before, store_size);
cleanup(&doc, &f);
}
TEST(Msdoc, TestFuzz1) {
vfile_t f;
document_t doc;
@@ -1189,4 +1205,7 @@ int main(int argc, char **argv) {
av_log_set_level(AV_LOG_QUIET);
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}
}
// 0x6130000d2580
// "/mnt/Hatchery/m ain/downloads/qbittorrent/downloads/Roskomnadzor/УПРАВЛЕНИЕ РОСКОМНАДЗОРА по РБ.zip#/УПРАВЛЕНИЕ РОСКОМНАДЗОРА по РБ/Лопатин Ю.М/Секнин/2015 год/Обучение по ", <incomplete sequence \320>...