Compare commits

...

23 Commits

Author SHA1 Message Date
a74726be55 Merge pull request #285 from simon987/dependabot/npm_and_yarn/sist2-vue/async-2.6.4
Bump async from 2.6.3 to 2.6.4 in /sist2-vue
2022-04-17 13:42:40 -04:00
dependabot[bot]
cb228052d2 Bump async from 2.6.3 to 2.6.4 in /sist2-vue
Bumps [async](https://github.com/caolan/async) from 2.6.3 to 2.6.4.
- [Release notes](https://github.com/caolan/async/releases)
- [Changelog](https://github.com/caolan/async/blob/v2.6.4/CHANGELOG.md)
- [Commits](https://github.com/caolan/async/compare/v2.6.3...v2.6.4)

---
updated-dependencies:
- dependency-name: async
  dependency-type: indirect
...

Signed-off-by: dependabot[bot] <support@github.com>
2022-04-17 17:41:14 +00:00
fe56da95d5 Merge pull request #284 from simon987/dev
v2.12.0
2022-04-17 13:38:42 -04:00
9f2ad58f78 bump version 2022-04-17 12:30:14 -04:00
84d9bf4323 Fix cmake libmobi build maybe 2022-04-17 12:23:45 -04:00
90aa90f3f3 Update antiword 2022-04-17 11:47:33 -04:00
3fad07360c Merge pull request #283 from simon987/dependabot/npm_and_yarn/sist2-vue/minimist-1.2.6
Bump minimist from 1.2.5 to 1.2.6 in /sist2-vue
2022-04-17 10:12:10 -04:00
dependabot[bot]
00c3a640d0 Bump minimist from 1.2.5 to 1.2.6 in /sist2-vue
Bumps [minimist](https://github.com/substack/minimist) from 1.2.5 to 1.2.6.
- [Release notes](https://github.com/substack/minimist/releases)
- [Commits](https://github.com/substack/minimist/compare/1.2.5...1.2.6)

---
updated-dependencies:
- dependency-name: minimist
  dependency-type: indirect
...

Signed-off-by: dependabot[bot] <support@github.com>
2022-04-17 12:53:12 +00:00
730e495bde Enable highlight in document info modal, remove /d/ endpoint 2022-04-16 16:11:17 -04:00
54df1dfcf7 Fix spacebar not working in search bar 2022-04-16 13:51:36 -04:00
a75675ecea Fix thumbnail copy bug, update tests 2022-04-16 11:48:43 -04:00
901035da15 Build libmobi with cmake, update to 0.10 2022-04-15 16:01:40 -04:00
ceb7265639 Fix max_analyzed_offset (again?) 2022-04-15 15:35:39 -04:00
036ed9ea1e Update libmagic cmake things 2022-04-15 15:35:20 -04:00
779303a2f7 Print body response when task id cannot be read 2022-04-14 16:24:56 -04:00
23aee14c07 Fix exec-script & fix memory leak in exec_args_validate 2022-04-14 15:43:24 -04:00
50b9201be3 Merge pull request #279 from simon987/dependabot/npm_and_yarn/sist2-vue/minimist-1.2.6
Bump minimist from 1.2.5 to 1.2.6 in /sist2-vue
2022-04-05 20:12:03 -04:00
dependabot[bot]
14cfb15661 Bump minimist from 1.2.5 to 1.2.6 in /sist2-vue
Bumps [minimist](https://github.com/substack/minimist) from 1.2.5 to 1.2.6.
- [Release notes](https://github.com/substack/minimist/releases)
- [Commits](https://github.com/substack/minimist/compare/1.2.5...1.2.6)

---
updated-dependencies:
- dependency-name: minimist
  dependency-type: indirect
...

Signed-off-by: dependabot[bot] <support@github.com>
2022-03-31 23:28:25 +00:00
125c85d9bb localize tag filter bar 2022-03-18 09:15:07 -04:00
474eb95aff Update antiword 2022-03-17 15:08:55 -04:00
acf7453057 Add test for large msdoc 2022-03-17 15:05:48 -04:00
9a949d2694 Use TRUE rather than 1 2022-03-17 09:13:19 -04:00
dbdc75dcb8 Add filter bar in tag picker 2022-03-17 09:12:43 -04:00
34 changed files with 315 additions and 157 deletions

3
.gitmodules vendored
View File

@@ -7,3 +7,6 @@
[submodule "third-party/libscan/third-party/antiword"]
path = third-party/libscan/third-party/antiword
url = https://github.com/simon987/antiword
[submodule "third-party/libscan/third-party/libmobi"]
path = third-party/libscan/third-party/libmobi
url = https://github.com/bfabiszewski/libmobi

View File

@@ -4,6 +4,7 @@ set(CMAKE_C_STANDARD 11)
project(sist2 C)
option(SIST_DEBUG "Build a debug executable" on)
option(SIST_FAST "Enable more optimisation flags" off)
option(SIST_FAKE_STORE "Disable IO operations of LMDB stores for debugging purposes" 0)
add_compile_definitions(
@@ -54,6 +55,10 @@ find_package(lmdb CONFIG REQUIRED)
find_package(cJSON CONFIG REQUIRED)
find_package(unofficial-mongoose CONFIG REQUIRED)
find_package(CURL CONFIG REQUIRED)
find_library(MAGIC_LIB
NAMES libmagic.so.1 magic
PATHS /usr/lib/x86_64-linux-gnu/ /usr/lib/aarch64-linux-gnu/
)
target_include_directories(
@@ -93,16 +98,25 @@ if (SIST_DEBUG)
PROPERTIES
OUTPUT_NAME sist2_debug
)
elseif (SIST_FAST)
target_compile_options(
sist2
PRIVATE
-Ofast
-march=native
-fno-stack-protector
-fomit-frame-pointer
-freciprocal-math
)
else ()
target_compile_options(
sist2
PRIVATE
-Ofast
#-march=native
-fno-stack-protector
-fomit-frame-pointer
#-freciprocal-math
)
endif ()
@@ -124,13 +138,12 @@ target_link_libraries(
CURL::libcurl
pthread
#magic
c
scan
/usr/lib/x86_64-linux-gnu/libmagic.so.1
${MAGIC_LIB}
)
add_custom_target(

View File

@@ -52,7 +52,7 @@ sist2 (Simple incremental search tool)
Select the file corresponding to your CPU architecture and mark the binary as executable with `chmod +x` *
2. *(or)* Download a [development snapshot](https://files.simon987.net/.gate/sist2/simon987_sist2/) *(Not
recommended!)*
3. *(or)* `docker pull simon987/sist2:2.11.7-x64-linux`
3. *(or)* `docker pull simon987/sist2:2.12.0-x64-linux`
1. See [Usage guide](docs/USAGE.md)

View File

@@ -3,7 +3,7 @@
"refresh_interval": "30s",
"codec": "best_compression",
"number_of_replicas": 0,
"highlight.max_analyzed_offset": 10000000
"highlight.max_analyzed_offset": 1000000
},
"analysis": {
"tokenizer": {

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@@ -3288,9 +3288,9 @@
}
},
"node_modules/async": {
"version": "2.6.3",
"resolved": "https://registry.npmjs.org/async/-/async-2.6.3.tgz",
"integrity": "sha512-zflvls11DCy+dQWzTW2dzuilv8Z5X/pjfmZOWba6TNIVDm+2UDaJmXSOXlasHKfNBs8oo3M0aT50fDEWfKZjXg==",
"version": "2.6.4",
"resolved": "https://registry.npmjs.org/async/-/async-2.6.4.tgz",
"integrity": "sha512-mzo5dfJYwAn29PeiJ0zvwTo04zj8HDJj0Mn8TD7sno7q12prdbnasKJHhkm2c1LgrhlJ0teaea8860oxi51mGA==",
"dev": true,
"dependencies": {
"lodash": "^4.17.14"
@@ -9736,9 +9736,9 @@
}
},
"node_modules/minimist": {
"version": "1.2.5",
"resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.5.tgz",
"integrity": "sha512-FM9nNUYrRBAELZQT3xeZQ7fmMOBg6nWNmJKTcgsJeaLstP/UODVpGsr5OhXhhXg6f+qtJ8uiZ+PUxkDWcgIXLw==",
"version": "1.2.6",
"resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.6.tgz",
"integrity": "sha512-Jsjnk4bw3YJqYzbdyBiNsPWHPfO++UGG749Cxs6peCu5Xg4nrena6OVxOYxrQTqww0Jmwt+Ref8rggumkTLz9Q==",
"dev": true
},
"node_modules/minipass": {
@@ -17937,9 +17937,9 @@
"dev": true
},
"async": {
"version": "2.6.3",
"resolved": "https://registry.npmjs.org/async/-/async-2.6.3.tgz",
"integrity": "sha512-zflvls11DCy+dQWzTW2dzuilv8Z5X/pjfmZOWba6TNIVDm+2UDaJmXSOXlasHKfNBs8oo3M0aT50fDEWfKZjXg==",
"version": "2.6.4",
"resolved": "https://registry.npmjs.org/async/-/async-2.6.4.tgz",
"integrity": "sha512-mzo5dfJYwAn29PeiJ0zvwTo04zj8HDJj0Mn8TD7sno7q12prdbnasKJHhkm2c1LgrhlJ0teaea8860oxi51mGA==",
"dev": true,
"requires": {
"lodash": "^4.17.14"
@@ -23324,9 +23324,9 @@
}
},
"minimist": {
"version": "1.2.5",
"resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.5.tgz",
"integrity": "sha512-FM9nNUYrRBAELZQT3xeZQ7fmMOBg6nWNmJKTcgsJeaLstP/UODVpGsr5OhXhhXg6f+qtJ8uiZ+PUxkDWcgIXLw==",
"version": "1.2.6",
"resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.6.tgz",
"integrity": "sha512-Jsjnk4bw3YJqYzbdyBiNsPWHPfO++UGG749Cxs6peCu5Xg4nrena6OVxOYxrQTqww0Jmwt+Ref8rggumkTLz9Q==",
"dev": true
},
"minipass": {

View File

@@ -336,10 +336,6 @@ class Sist2Api {
};
}
getDocInfo(docId: string) {
return axios.get(`${this.baseUrl}d/${docId}`);
}
getTags() {
return this.esQuery({
aggs: {

View File

@@ -210,7 +210,7 @@ class Sist2Query {
};
if (!legacyES) {
q.highlight.max_analyzed_offset = 9_999_999;
q.highlight.max_analyzed_offset = 999_999;
}
if (getters.optSearchInPath) {

View File

@@ -1,11 +1,13 @@
<template>
<Preloader v-if="loading"></Preloader>
<div v-else-if="content" class="content-div">{{ content }}</div>
<div v-else-if="content" class="content-div" v-html="content"></div>
</template>
<script>
import Sist2Api from "@/Sist2Api";
import Preloader from "@/components/Preloader";
import Sist2Query from "@/Sist2Query";
import store from "@/store";
export default {
name: "LazyContentDiv",
@@ -18,10 +20,72 @@ export default {
}
},
mounted() {
Sist2Api.getDocInfo(this.docId).then(src => {
this.content = src.data.content;
const query = Sist2Query.searchQuery();
if (this.$store.state.optHighlight) {
const fields = this.$store.state.fuzzy
? {"content.nGram": {}}
: {content: {}};
query.highlight = {
pre_tags: ["<mark>"],
post_tags: ["</mark>"],
number_of_fragments: 0,
fields,
};
if (!store.state.sist2Info.esVersionLegacy) {
query.highlight.max_analyzed_offset = 999_999;
}
}
if ("function_score" in query.query) {
query.query = query.query.function_score.query;
}
if (!("must" in query.query.bool)) {
query.query.bool.must = [];
} else if (!Array.isArray(query.query.bool.must)) {
query.query.bool.must = [query.query.bool.must];
}
query.query.bool.must.push({match: {_id: this.docId}});
delete query["sort"];
delete query["aggs"];
delete query["search_after"];
delete query.query["function_score"];
query._source = {
includes: ["content", "name", "path", "extension"]
}
query.size = 1;
Sist2Api.esQuery(query).then(resp => {
this.loading = false;
})
if (resp.hits.hits.length === 1) {
this.content = this.getContent(resp.hits.hits[0]);
} else {
console.log("FIXME: could not get content")
console.log(resp)
}
});
},
methods: {
getContent(doc) {
if (!doc.highlight) {
return doc._source.content;
}
if (doc.highlight["content.nGram"]) {
return doc.highlight["content.nGram"][0];
}
if (doc.highlight.content) {
return doc.highlight.content[0];
}
}
}
}
</script>

View File

@@ -81,7 +81,9 @@ export default {
methods: {
keyDownListener(e) {
if (this.$refs.lightbox === undefined) {
const isLightboxOpen = this.$refs.lightbox === undefined || this.$refs.lightbox.$el.tagName === undefined;
if (isLightboxOpen) {
return true;
}

View File

@@ -1,5 +1,13 @@
<template>
<div id="tagTree"></div>
<div>
<b-input-group v-if="showSearchBar" id="tag-picker-filter-bar">
<b-form-input :value="filter"
:placeholder="$t('tagFilter')"
@input="onFilter($event)"></b-form-input>
</b-input-group>
<div id="tagTree"></div>
</div>
</template>
<script>
@@ -112,10 +120,12 @@ function addTag(map, tag, id, count) {
export default {
name: "TagPicker",
props: ["showSearchBar"],
data() {
return {
tagTree: null,
loadedFromArgs: false,
filter: ""
}
},
mounted() {
@@ -129,6 +139,10 @@ export default {
});
},
methods: {
onFilter(value) {
this.filter = value;
this.tagTree.search(value);
},
initializeTree() {
const tagMap = [];
this.tagTree = new InspireTree({
@@ -163,7 +177,8 @@ export default {
});
},
handleTreeClick(node, e) {
if (e === "indeterminate" || e === "collapsed" || e === 'rendered' || e === "focused") {
if (e === "indeterminate" || e === "collapsed" || e === 'rendered' || e === "focused"
|| e === "matched" || e === "hidden") {
return;
}
@@ -180,7 +195,15 @@ export default {
}
</style>
<style>
.inspire-tree .focused>.wholerow {
.inspire-tree .focused > .wholerow {
border: none;
}
#tag-picker-filter-bar {
padding: 10px 4px 4px;
}
.theme-black .inspire-tree .matched > .wholerow {
background: rgba(251, 191, 41, 0.25);
}
</style>

View File

@@ -16,6 +16,7 @@ export default {
pages: "pages",
mimeTypes: "Media types",
tags: "Tags",
tagFilter: "Filter tags",
help: {
simpleSearch: "Simple search",
advancedSearch: "Advanced search",
@@ -74,6 +75,7 @@ export default {
useDatePicker: "Use a Date Picker component rather than a slider",
vidPreviewInterval: "Video preview frame duration in ms",
simpleLightbox: "Disable animations in image viewer",
showTagPickerFilter: "Display the tag filter bar"
},
queryMode: {
simple: "Simple",
@@ -183,6 +185,7 @@ export default {
pages: "pages",
mimeTypes: "Types de médias",
tags: "Tags",
tagFilter: "Filtrer les tags",
help: {
simpleSearch: "Recherche simple",
advancedSearch: "Recherche avancée",
@@ -242,6 +245,7 @@ export default {
useDatePicker: "Afficher un composant « Date Picker » plutôt qu'un slider",
vidPreviewInterval: "Durée des images d'aperçu video en millisecondes",
simpleLightbox: "Désactiver les animations du visualiseur d'images",
showTagPickerFilter: "Afficher le filtre dans l'onglet Tags"
},
queryMode: {
simple: "Simple",
@@ -352,6 +356,7 @@ export default {
pages: "页",
mimeTypes: "文件类型",
tags: "标签",
tagFilter: "筛选标签",
help: {
simpleSearch: "简易搜索",
advancedSearch: "高级搜索",
@@ -410,6 +415,7 @@ export default {
useDatePicker: "使用日期选择器组件而不是滑块",
vidPreviewInterval: "视频预览帧的持续时间,以毫秒为单位",
simpleLightbox: "在图片查看器中,禁用动画",
showTagPickerFilter: "显示标签过滤栏"
},
queryMode: {
simple: "简单",

View File

@@ -4,6 +4,8 @@ import VueRouter, {Route} from "vue-router";
import {EsHit, EsResult, EsTag, Index, Tag} from "@/Sist2Api";
import {deserializeMimes, serializeMimes} from "@/util";
const CONF_VERSION = 2;
Vue.use(Vuex)
export default new Vuex.Store({
@@ -24,7 +26,6 @@ export default new Vuex.Store({
sortMode: "score",
fuzzy: false,
size: 60,
optLang: "en",
optLangIsDefault: true,
@@ -32,6 +33,7 @@ export default new Vuex.Store({
optTheme: "light",
optDisplay: "grid",
optSize: 60,
optHighlight: true,
optTagOrOperator: false,
optFuzzy: true,
@@ -52,6 +54,7 @@ export default new Vuex.Store({
optUseDatePicker: false,
optVidPreviewInterval: 700,
optSimpleLightbox: true,
optShowTagPickerFilter: true,
_onLoadSelectedIndices: [] as string[],
_onLoadSelectedMimeTypes: [] as string[],
@@ -150,7 +153,7 @@ export default new Vuex.Store({
setOptSuggestPath: (state, val) => state.optSuggestPath = val,
setOptFragmentSize: (state, val) => state.optFragmentSize = val,
setOptQueryMode: (state, val) => state.optQueryMode = val,
setOptResultSize: (state, val) => state.size = val,
setOptResultSize: (state, val) => state.optSize = val,
setOptTagOrOperator: (state, val) => state.optTagOrOperator = val,
setOptTreemapType: (state, val) => state.optTreemapType = val,
@@ -163,6 +166,7 @@ export default new Vuex.Store({
setOptUseDatePicker: (state, val) => state.optUseDatePicker = val,
setOptVidPreviewInterval: (state, val) => state.optVidPreviewInterval = val,
setOptSimpleLightbox: (state, val) => state.optSimpleLightbox = val,
setOptShowTagPickerFilter: (state, val) => state.optShowTagPickerFilter = val,
setOptLightboxLoadOnlyCurrent: (state, val) => state.optLightboxLoadOnlyCurrent = val,
setOptLightboxSlideDuration: (state, val) => state.optLightboxSlideDuration = val,
@@ -274,6 +278,8 @@ export default new Vuex.Store({
}
});
conf["version"] = CONF_VERSION;
localStorage.setItem("sist2_configuration", JSON.stringify(conf));
},
loadConfiguration({state}) {
@@ -281,6 +287,11 @@ export default new Vuex.Store({
if (confString) {
const conf = JSON.parse(confString);
if (!("version" in conf) || conf["version"] != CONF_VERSION) {
localStorage.removeItem("sist2_configuration");
window.location.reload();
}
Object.keys(state).forEach((key) => {
if (key.startsWith("opt")) {
(state as any)[key] = conf[key];
@@ -342,7 +353,7 @@ export default new Vuex.Store({
searchText: state => state.searchText,
pathText: state => state.pathText,
fuzzy: state => state.fuzzy,
size: state => state.size,
size: state => state.optSize,
sortMode: state => state.sortMode,
lastQueryResult: state => state.lastQueryResults,
lastDoc: function (state): EsHit | null {
@@ -380,11 +391,12 @@ export default new Vuex.Store({
optTreemapColor: state => state.optTreemapColor,
optLightboxLoadOnlyCurrent: state => state.optLightboxLoadOnlyCurrent,
optLightboxSlideDuration: state => state.optLightboxSlideDuration,
optResultSize: state => state.size,
optResultSize: state => state.optSize,
optHideLegacy: state => state.optHideLegacy,
optUpdateMimeMap: state => state.optUpdateMimeMap,
optUseDatePicker: state => state.optUseDatePicker,
optVidPreviewInterval: state => state.optVidPreviewInterval,
optSimpleLightbox: state => state.optSimpleLightbox,
optShowTagPickerFilter: state => state.optShowTagPickerFilter,
}
})

View File

@@ -50,6 +50,11 @@
$t("opt.simpleLightbox")
}}
</b-form-checkbox>
<b-form-checkbox :checked="optShowTagPickerFilter" @input="setOptShowTagPickerFilter">{{
$t("opt.showTagPickerFilter")
}}
</b-form-checkbox>
</b-card>
<br/>
@@ -245,6 +250,7 @@ export default {
"optUseDatePicker",
"optVidPreviewInterval",
"optSimpleLightbox",
"optShowTagPickerFilter",
]),
clientWidth() {
return window.innerWidth;
@@ -292,6 +298,7 @@ export default {
"setOptUseDatePicker",
"setOptVidPreviewInterval",
"setOptSimpleLightbox",
"setOptShowTagPickerFilter",
]),
onResetClick() {
localStorage.removeItem("sist2_configuration");

View File

@@ -32,7 +32,7 @@
<MimePicker></MimePicker>
</b-tab>
<b-tab :title="$t('tags')">
<TagPicker></TagPicker>
<TagPicker :show-search-bar="$store.state.optShowTagPickerFilter"></TagPicker>
</b-tab>
</b-tabs>
</b-col>
@@ -208,7 +208,7 @@ export default Vue.extend({
this.$store.commit("setUiReachedScrollEnd", false);
},
async handleSearch(resp: EsResult) {
if (resp.hits.hits.length == 0) {
if (resp.hits.hits.length == 0 || resp.hits.hits.length < this.$store.state.optSize) {
this.$store.commit("setUiReachedScrollEnd", true);
}
@@ -248,6 +248,8 @@ export default Vue.extend({
this.$store.commit("setLastQueryResult", resp);
this.docs.push(...resp.hits.hits);
resp.hits.hits.forEach(hit => this.docIds.add(hit._id));
},
getDateRange(): Promise<{ min: number, max: number }> {
return sist2.esQuery({

View File

@@ -81,6 +81,11 @@ void web_args_destroy(web_args_t *args) {
}
void exec_args_destroy(exec_args_t *args) {
if (args->index_path != NULL) {
free(args->index_path);
}
free(args);
}

View File

@@ -85,7 +85,7 @@ typedef struct web_args {
typedef struct exec_args {
char *es_url;
char *es_index;
const char *index_path;
char *index_path;
const char *script_path;
int async_script;
char *script;

View File

@@ -110,16 +110,16 @@ void execute_update_script(const char *script, int async, const char index_id[SI
cJSON *term_obj = cJSON_AddObjectToObject(query, "term");
cJSON_AddStringToObject(term_obj, "index", index_id);
char *str = cJSON_Print(body);
char *str = cJSON_PrintUnformatted(body);
char bulk_url[4096];
char url[4096];
if (async) {
snprintf(bulk_url, sizeof(bulk_url), "%s/%s/_update_by_query?wait_for_completion=false", Indexer->es_url,
snprintf(url, sizeof(url), "%s/%s/_update_by_query?wait_for_completion=false", Indexer->es_url,
Indexer->es_index);
} else {
snprintf(bulk_url, sizeof(bulk_url), "%s/%s/_update_by_query", Indexer->es_url, Indexer->es_index);
snprintf(url, sizeof(url), "%s/%s/_update_by_query", Indexer->es_url, Indexer->es_index);
}
response_t *r = web_post(bulk_url, str);
response_t *r = web_post(url, str);
if (!async) {
LOG_INFOF("elastic.c", "Executed user script <%d>", r->status_code);
}
@@ -139,6 +139,11 @@ void execute_update_script(const char *script, int async, const char index_id[SI
if (async) {
cJSON *task = cJSON_GetObjectItem(resp, "task");
if (task == NULL) {
LOG_FATALF("elastic.c", "FIXME: Could not get task id: %s", r->body);
}
LOG_INFOF("elastic.c", "User script queued: %s/_tasks/%s", Indexer->es_url, task->valuestring);
}

File diff suppressed because one or more lines are too long

View File

@@ -22,7 +22,7 @@ void free_response(response_t *resp) {
free(resp);
}
void web_post_async_poll(subreq_ctx_t* req) {
void web_post_async_poll(subreq_ctx_t *req) {
fd_set fdread;
fd_set fdwrite;
fd_set fdexcep;
@@ -34,7 +34,7 @@ void web_post_async_poll(subreq_ctx_t* req) {
CURLMcode mc = curl_multi_fdset(req->multi, &fdread, &fdwrite, &fdexcep, &maxfd);
if(mc != CURLM_OK) {
if (mc != CURLM_OK) {
req->done = TRUE;
return;
}
@@ -47,7 +47,7 @@ void web_post_async_poll(subreq_ctx_t* req) {
struct timeval timeout = {1, 0};
int rc = select(maxfd + 1, &fdread, &fdwrite, &fdexcep, &timeout);
switch(rc) {
switch (rc) {
case -1:
req->done = TRUE;
break;
@@ -142,6 +142,9 @@ response_t *web_post(const char *url, const char *data) {
curl_easy_setopt(curl, CURLOPT_POST, 1);
curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
char err_buffer[CURL_ERROR_SIZE + 1] = {};
curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, err_buffer);
struct curl_slist *headers = NULL;
headers = curl_slist_append(headers, "Content-Type: application/json");
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
@@ -151,12 +154,16 @@ response_t *web_post(const char *url, const char *data) {
curl_easy_perform(curl);
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &resp->status_code);
curl_easy_cleanup(curl);
curl_slist_free_all(headers);
resp->body = buffer.buf;
resp->size = buffer.cur;
if (resp->status_code == 0) {
LOG_ERRORF("web.c", "CURL Error: %s", err_buffer)
}
curl_easy_cleanup(curl);
curl_slist_free_all(headers);
return resp;
}
@@ -175,7 +182,7 @@ response_t *web_put(const char *url, const char *data) {
curl_easy_setopt(curl, CURLOPT_CUSTOMREQUEST, "PUT");
curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
curl_easy_setopt(curl, CURLOPT_DNS_USE_GLOBAL_CACHE, 0);
curl_easy_setopt(curl, CURLOPT_IPRESOLVE, CURLOPT_DNS_LOCAL_IP4 );
curl_easy_setopt(curl, CURLOPT_IPRESOLVE, CURLOPT_DNS_LOCAL_IP4);
struct curl_slist *headers = NULL;
headers = curl_slist_append(headers, "Content-Type: application/json");

View File

@@ -505,9 +505,9 @@ void incremental_copy_handle_doc(cJSON *document, UNUSED(const char id_str[SIST_
// Copy tn store contents
size_t buf_len;
char *buf = store_read(IncrementalCopySourceStore, (char *) doc_id, sizeof(doc_id), &buf_len);
char *buf = store_read(IncrementalCopySourceStore, (char *) doc_id, SIST_DOC_ID_LEN, &buf_len);
if (buf_len != 0) {
store_write(IncrementalCopyDestinationStore, (char *) doc_id, sizeof(doc_id), buf, buf_len);
store_write(IncrementalCopyDestinationStore, (char *) doc_id, SIST_DOC_ID_LEN, buf, buf_len);
free(buf);
}
}

View File

@@ -42,13 +42,13 @@ index_descriptor_t read_index_descriptor(char *path);
// caller ensures char file_path[PATH_MAX]
#define READ_INDICES(file_path, index_path, action_ok, action_main_fail, cond_original) \
snprintf(file_path, PATH_MAX, "%s_index_main.ndjson.zst", index_path); \
if (0 == access(file_path, R_OK)) { \
if (access(file_path, R_OK) == 0) { \
action_ok; \
} else { \
action_main_fail; \
} \
snprintf(file_path, PATH_MAX, "%s_index_original.ndjson.zst", index_path); \
if ((cond_original) && (0 == access(file_path, R_OK))) { \
if ((cond_original) && access(file_path, R_OK) == 0) { \
action_ok; \
} \

View File

@@ -38,8 +38,8 @@ static __sighandler_t sigabrt_handler = NULL;
void sig_handler(int signum) {
LogCtx.verbose = 1;
LogCtx.very_verbose = 1;
LogCtx.verbose = TRUE;
LogCtx.very_verbose = TRUE;
LOG_ERROR("*SIGNAL HANDLER*", "=============================================\n\n");
LOG_ERRORF("*SIGNAL HANDLER*", "Uh oh! Caught fatal signal: %s", strsignal(signum));
@@ -103,7 +103,7 @@ void sig_handler(int signum) {
exit(-1);
}
void init_dir(const char *dirpath, scan_args_t* args) {
void init_dir(const char *dirpath, scan_args_t *args) {
char path[PATH_MAX];
snprintf(path, PATH_MAX, "%sdescriptor.json", dirpath);
@@ -112,16 +112,16 @@ void init_dir(const char *dirpath, scan_args_t* args) {
strcpy(ScanCtx.index.desc.type, INDEX_TYPE_NDJSON);
if (args->incremental != NULL) {
// copy old index id
char descriptor_path[PATH_MAX];
snprintf(descriptor_path, PATH_MAX, "%sdescriptor.json", args->incremental);
index_descriptor_t original_desc = read_index_descriptor(descriptor_path);
memcpy(ScanCtx.index.desc.id, original_desc.id, sizeof(original_desc.id));
// copy old index id
char descriptor_path[PATH_MAX];
snprintf(descriptor_path, PATH_MAX, "%sdescriptor.json", args->incremental);
index_descriptor_t original_desc = read_index_descriptor(descriptor_path);
memcpy(ScanCtx.index.desc.id, original_desc.id, sizeof(original_desc.id));
} else {
// generate new index id based on timestamp
unsigned char index_md5[MD5_DIGEST_LENGTH];
MD5((unsigned char *) &ScanCtx.index.desc.timestamp, sizeof(ScanCtx.index.desc.timestamp), index_md5);
buf2hex(index_md5, MD5_DIGEST_LENGTH, ScanCtx.index.desc.id);
// generate new index id based on timestamp
unsigned char index_md5[MD5_DIGEST_LENGTH];
MD5((unsigned char *) &ScanCtx.index.desc.timestamp, sizeof(ScanCtx.index.desc.timestamp), index_md5);
buf2hex(index_md5, MD5_DIGEST_LENGTH, ScanCtx.index.desc.id);
}
write_index_descriptor(path, &ScanCtx.index.desc);
@@ -324,9 +324,13 @@ void load_incremental_index(const scan_args_t *args) {
LOG_FATALF("main.c", "Version mismatch! Index is %s but executable is %s", original_desc.version, Version)
}
READ_INDICES(file_path, args->incremental, incremental_read(ScanCtx.original_table, file_path, &original_desc),
LOG_FATALF("main.c", "Could not open original main index for incremental scan: %s", strerror(errno)),
1);
READ_INDICES(
file_path,
args->incremental,
incremental_read(ScanCtx.original_table, file_path, &original_desc),
LOG_FATALF("main.c", "Could not open original main index for incremental scan: %s", strerror(errno)),
TRUE
);
LOG_INFOF("main.c", "Loaded %d items in to mtime table.", g_hash_table_size(ScanCtx.original_table))
}
@@ -534,6 +538,7 @@ void sist2_exec_script(exec_args_t *args) {
IndexCtx.es_url = args->es_url;
IndexCtx.es_index = args->es_index;
IndexCtx.needs_es_connection = TRUE;
LOG_DEBUGF("main.c", "descriptor version %s (%s)", desc.version, desc.type)
@@ -776,9 +781,8 @@ int main(int argc, const char *argv[]) {
sist2_exec_script(exec_args);
} else {
fprintf(stderr, "Invalid command: '%s'\n", argv[0]);
argparse_usage(&argparse);
goto end;
LOG_FATALF("main.c", "Invalid command: '%s'\n", argv[0])
}
printf("\n");

View File

@@ -27,10 +27,6 @@
#define UNUSED(x) __attribute__((__unused__)) x
#define MD5_STR_LENGTH 33
#define SHA1_STR_LENGTH 41
#define SHA1_DIGEST_LENGTH 20
#include "util.h"
#include "log.h"
#include "types.h"

View File

@@ -359,42 +359,6 @@ void index_info(struct mg_connection *nc) {
}
void document_info(struct mg_connection *nc, struct mg_http_message *hm) {
if (hm->uri.len != SIST_DOC_ID_LEN + 2) {
LOG_DEBUGF("serve.c", "Invalid document_info path: %.*s", (int) hm->uri.len, hm->uri.ptr)
HTTP_REPLY_NOT_FOUND
return;
}
char arg_doc_id[SIST_DOC_ID_LEN];
memcpy(arg_doc_id, hm->uri.ptr + 3, SIST_DOC_ID_LEN);
*(arg_doc_id + SIST_DOC_ID_LEN - 1) = '\0';
cJSON *doc = elastic_get_document(arg_doc_id);
cJSON *source = cJSON_GetObjectItem(doc, "_source");
cJSON *index_id = cJSON_GetObjectItem(source, "index");
if (index_id == NULL) {
cJSON_Delete(doc);
HTTP_REPLY_NOT_FOUND
return;
}
index_t *idx = get_index_by_id(index_id->valuestring);
if (idx == NULL) {
cJSON_Delete(doc);
HTTP_REPLY_NOT_FOUND
return;
}
char *json_str = cJSON_PrintUnformatted(source);
send_response_line(nc, 200, (int) strlen(json_str), "Content-Type: application/json");
mg_send(nc, json_str, (int) strlen(json_str));
free(json_str);
cJSON_Delete(doc);
}
void file(struct mg_connection *nc, struct mg_http_message *hm) {
if (hm->uri.len != SIST_DOC_ID_LEN + 2) {
@@ -653,8 +617,6 @@ static void ev_router(struct mg_connection *nc, int ev, void *ev_data, UNUSED(vo
return;
}
tag(nc, hm);
} else if (mg_http_match_uri(hm, "/d/*")) {
document_info(nc, hm);
} else {
HTTP_REPLY_NOT_FOUND
}

File diff suppressed because one or more lines are too long

View File

@@ -35,10 +35,20 @@ def sist2_index(files, *args):
path = copy_files(files)
shutil.rmtree("test_i", ignore_errors=True)
sist2("scan", path, "-o", "test_i", *args)
sist2("scan", path, "-o", "test_i", "-t12", *args)
return iter(sist2_index_to_dict("test_i"))
def get_lmdb_contents(path):
import lmdb
env = lmdb.open(path)
txn = env.begin(write=False)
return dict((k, v) for k, v in txn.cursor())
def sist2_incremental_index(files, func=None, incremental_index=False, *args):
path = copy_files(files)
@@ -46,7 +56,7 @@ def sist2_incremental_index(files, func=None, incremental_index=False, *args):
func(path)
shutil.rmtree("test_i_inc", ignore_errors=True)
sist2("scan", path, "-o", "test_i_inc", "--incremental", "test_i", *args)
sist2("scan", path, "-o", "test_i_inc", "--incremental", "test_i", "-t12", *args)
return iter(sist2_index_to_dict("test_i_inc", incremental_index))
@@ -76,9 +86,31 @@ class ScanTest(unittest.TestCase):
pass
file_count = sum(1 for _ in sist2_index(TEST_FILES))
self.assertEqual(sum(1 for _ in sist2_incremental_index(TEST_FILES, remove_files)), file_count - 2)
self.assertEqual(sum(1 for _ in sist2_incremental_index(TEST_FILES, add_files, incremental_index=True)), 3)
self.assertEqual(sum(1 for _ in sist2_incremental_index(TEST_FILES, add_files)), file_count + 3)
lmdb_full = get_lmdb_contents("test_i/thumbs")
# Remove files
num_files_rm1 = len(list(sist2_incremental_index(TEST_FILES, remove_files)))
lmdb_rm1 = get_lmdb_contents("test_i_inc/thumbs")
self.assertEqual(num_files_rm1, file_count - 2)
self.assertEqual(len(set(lmdb_full.keys() - set(lmdb_rm1.keys()))), 2)
# add files (incremental_index=True)
num_files_add_inc = len(list(sist2_incremental_index(TEST_FILES, add_files, incremental_index=True)))
lmdb_add_inc = get_lmdb_contents("test_i_inc/thumbs")
self.assertEqual(num_files_add_inc, 3)
self.assertEqual(set(lmdb_full.keys()), set(lmdb_add_inc.keys()))
# add files
num_files_add = len(list(sist2_incremental_index(TEST_FILES, add_files)))
lmdb_add = get_lmdb_contents("test_i_inc/thumbs")
self.assertEqual(num_files_add, file_count + 3)
self.assertEqual(set(lmdb_full.keys()), set(lmdb_add.keys()))
# (No action)
sist2_incremental_index(TEST_FILES)
lmdb_inc = get_lmdb_contents("test_i_inc/thumbs")
self.assertEqual(set(lmdb_full.keys()), set(lmdb_inc.keys()))
if __name__ == "__main__":

View File

@@ -7,6 +7,11 @@ option(BUILD_TESTS "Build tests" on)
add_subdirectory(third-party/antiword)
set(USE_LIBXML2 OFF CACHE BOOL "" FORCE)
set(USE_XMLWRITER OFF CACHE BOOL "" FORCE)
set(BUILD_SHARED_LIBS OFF CACHE BOOL "" FORCE)
add_subdirectory(third-party/libmobi)
add_library(
scan
libscan/util.c libscan/util.h
@@ -42,6 +47,22 @@ if (SIST_DEBUG)
-fsanitize=address
-fno-inline
)
elseif (SIST_FAST)
add_compile_definitions(
antiword
NDEBUG
)
target_compile_options(
scan
PRIVATE
-Ofast
-march=native
-fno-stack-protector
-fomit-frame-pointer
-freciprocal-math
)
else()
add_compile_definitions(
antiword
@@ -97,35 +118,15 @@ target_compile_options(
-g
)
include(ExternalProject)
find_program(MAKE_EXE NAMES gmake nmake make)
ExternalProject_Add(
libmobi
GIT_REPOSITORY https://github.com/simon987/libmobi.git
GIT_TAG "public"
UPDATE_COMMAND ""
PATCH_COMMAND ""
TEST_COMMAND ""
CONFIGURE_COMMAND ./autogen.sh && ./configure
INSTALL_COMMAND ""
PREFIX "third-party/ext_libmobi"
SOURCE_DIR "third-party/ext_libmobi/src/libmobi"
BINARY_DIR "third-party/ext_libmobi/src/libmobi"
BUILD_COMMAND ${MAKE_EXE} -j 8 --silent
)
SET(MOBI_LIB_DIR ${CMAKE_CURRENT_BINARY_DIR}/third-party/ext_libmobi/src/libmobi/src/.libs/)
SET(MOBI_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/third-party/ext_libmobi/src/libmobi/src/)
if (SIST_DEBUG)
SET(FFMPEG_DEBUG "--enable-debug=3" "--disable-optimizations")
else()
SET(FFMPEG_DEBUG "")
endif()
include(ExternalProject)
find_program(MAKE_EXE NAMES gmake nmake make)
ExternalProject_Add(
ffmpeg
GIT_REPOSITORY https://git.ffmpeg.org/ffmpeg.git
@@ -171,10 +172,10 @@ SET(WPD_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/third-party/ext_libwpd/src/libwp
add_dependencies(
scan
libmobi
ffmpeg
antiword
libwpd
mobi
)
target_link_libraries(
@@ -192,8 +193,6 @@ target_link_libraries(
${MUPDF_LIB}
openjp2
${MOBI_LIB_DIR}/libmobi.a
${WPD_LIB_DIR}/libwpd-0.9.a
${WPD_LIB_DIR}/libwpd-stream-0.9.a
@@ -230,6 +229,7 @@ target_link_libraries(
${GUMBO_LIB}
dl
antiword
mobi
unofficial::pcre::pcre unofficial::pcre::pcre16 unofficial::pcre::pcre32 unofficial::pcre::pcrecpp
)

View File

@@ -1,6 +1,6 @@
#include "scan_mobi.h"
#include <mobi.h>
#include "../../third-party/libmobi/src/mobi.h"
#include <errno.h>
#include "stdlib.h"

View File

@@ -48,7 +48,6 @@ typedef int scan_code_t;
#define CTX_LOG_FATALF(filepath, fmt, ...) ctx->logf(filepath, LEVEL_FATAL, fmt, __VA_ARGS__); exit(-1);
#define CTX_LOG_FATAL(filepath, str) ctx->log(filepath, LEVEL_FATAL, str); exit(-1);
#define MD5_STR_LENGTH 33
#define SIST_DOC_ID_LEN MD5_STR_LENGTH
#define SIST_INDEX_ID_LEN MD5_STR_LENGTH

View File

@@ -923,7 +923,6 @@ TEST(Msdoc, Test1Pdf) {
ASSERT_TRUE(strstr(get_meta(&doc, MetaContent)->str_val, "October 2000") != nullptr);
ASSERT_STREQ(get_meta(&doc, MetaTitle)->str_val, "INTERNATIONAL ORGANIZATION FOR STANDARDIZATION");
ASSERT_STREQ(get_meta(&doc, MetaAuthor)->str_val, "Oliver Morgan");
ASSERT_EQ(get_meta(&doc, MetaPages)->long_val, 57);
ASSERT_NEAR(strlen(get_meta(&doc, MetaContent)->str_val), msdoc_ctx.content_size, 4);
ASSERT_NE(size_before, store_size);
@@ -1030,6 +1029,23 @@ TEST(Msdoc, TestUtf8Text) {
cleanup(&doc, &f);
}
TEST(Msdoc, Test5Pdf) {
vfile_t f;
document_t doc;
load_doc_file("libscan-test-files/test_files/msdoc/test5.doc", &f, &doc);
size_t size_before = store_size;
parse_msdoc(&msdoc_ctx, &f, &doc);
ASSERT_TRUE(strstr(get_meta(&doc, MetaContent)->str_val, "орган Федеральной") != nullptr);
ASSERT_STREQ(get_meta(&doc, MetaAuthor)->str_val, "uswo");
ASSERT_NEAR(strlen(get_meta(&doc, MetaContent)->str_val), msdoc_ctx.content_size, 4);
ASSERT_NE(size_before, store_size);
cleanup(&doc, &f);
}
TEST(Msdoc, TestFuzz1) {
vfile_t f;
document_t doc;
@@ -1189,4 +1205,7 @@ int main(int argc, char **argv) {
av_log_set_level(AV_LOG_QUIET);
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}
}
// 0x6130000d2580
// "/mnt/Hatchery/m ain/downloads/qbittorrent/downloads/Roskomnadzor/УПРАВЛЕНИЕ РОСКОМНАДЗОРА по РБ.zip#/УПРАВЛЕНИЕ РОСКОМНАДЗОРА по РБ/Лопатин Ю.М/Секнин/2015 год/Обучение по ", <incomplete sequence \320>...