Compare commits

...

16 Commits

Author SHA1 Message Date
a74726be55 Merge pull request #285 from simon987/dependabot/npm_and_yarn/sist2-vue/async-2.6.4
Bump async from 2.6.3 to 2.6.4 in /sist2-vue
2022-04-17 13:42:40 -04:00
dependabot[bot]
cb228052d2 Bump async from 2.6.3 to 2.6.4 in /sist2-vue
Bumps [async](https://github.com/caolan/async) from 2.6.3 to 2.6.4.
- [Release notes](https://github.com/caolan/async/releases)
- [Changelog](https://github.com/caolan/async/blob/v2.6.4/CHANGELOG.md)
- [Commits](https://github.com/caolan/async/compare/v2.6.3...v2.6.4)

---
updated-dependencies:
- dependency-name: async
  dependency-type: indirect
...

Signed-off-by: dependabot[bot] <support@github.com>
2022-04-17 17:41:14 +00:00
fe56da95d5 Merge pull request #284 from simon987/dev
v2.12.0
2022-04-17 13:38:42 -04:00
9f2ad58f78 bump version 2022-04-17 12:30:14 -04:00
84d9bf4323 Fix cmake libmobi build maybe 2022-04-17 12:23:45 -04:00
90aa90f3f3 Update antiword 2022-04-17 11:47:33 -04:00
3fad07360c Merge pull request #283 from simon987/dependabot/npm_and_yarn/sist2-vue/minimist-1.2.6
Bump minimist from 1.2.5 to 1.2.6 in /sist2-vue
2022-04-17 10:12:10 -04:00
dependabot[bot]
00c3a640d0 Bump minimist from 1.2.5 to 1.2.6 in /sist2-vue
Bumps [minimist](https://github.com/substack/minimist) from 1.2.5 to 1.2.6.
- [Release notes](https://github.com/substack/minimist/releases)
- [Commits](https://github.com/substack/minimist/compare/1.2.5...1.2.6)

---
updated-dependencies:
- dependency-name: minimist
  dependency-type: indirect
...

Signed-off-by: dependabot[bot] <support@github.com>
2022-04-17 12:53:12 +00:00
730e495bde Enable highlight in document info modal, remove /d/ endpoint 2022-04-16 16:11:17 -04:00
54df1dfcf7 Fix spacebar not working in search bar 2022-04-16 13:51:36 -04:00
a75675ecea Fix thumbnail copy bug, update tests 2022-04-16 11:48:43 -04:00
901035da15 Build libmobi with cmake, update to 0.10 2022-04-15 16:01:40 -04:00
ceb7265639 Fix max_analyzed_offset (again?) 2022-04-15 15:35:39 -04:00
036ed9ea1e Update libmagic cmake things 2022-04-15 15:35:20 -04:00
779303a2f7 Print body response when task id cannot be read 2022-04-14 16:24:56 -04:00
23aee14c07 Fix exec-script & fix memory leak in exec_args_validate 2022-04-14 15:43:24 -04:00
29 changed files with 229 additions and 138 deletions

3
.gitmodules vendored
View File

@@ -7,3 +7,6 @@
[submodule "third-party/libscan/third-party/antiword"] [submodule "third-party/libscan/third-party/antiword"]
path = third-party/libscan/third-party/antiword path = third-party/libscan/third-party/antiword
url = https://github.com/simon987/antiword url = https://github.com/simon987/antiword
[submodule "third-party/libscan/third-party/libmobi"]
path = third-party/libscan/third-party/libmobi
url = https://github.com/bfabiszewski/libmobi

View File

@@ -4,6 +4,7 @@ set(CMAKE_C_STANDARD 11)
project(sist2 C) project(sist2 C)
option(SIST_DEBUG "Build a debug executable" on) option(SIST_DEBUG "Build a debug executable" on)
option(SIST_FAST "Enable more optimisation flags" off)
option(SIST_FAKE_STORE "Disable IO operations of LMDB stores for debugging purposes" 0) option(SIST_FAKE_STORE "Disable IO operations of LMDB stores for debugging purposes" 0)
add_compile_definitions( add_compile_definitions(
@@ -54,6 +55,10 @@ find_package(lmdb CONFIG REQUIRED)
find_package(cJSON CONFIG REQUIRED) find_package(cJSON CONFIG REQUIRED)
find_package(unofficial-mongoose CONFIG REQUIRED) find_package(unofficial-mongoose CONFIG REQUIRED)
find_package(CURL CONFIG REQUIRED) find_package(CURL CONFIG REQUIRED)
find_library(MAGIC_LIB
NAMES libmagic.so.1 magic
PATHS /usr/lib/x86_64-linux-gnu/ /usr/lib/aarch64-linux-gnu/
)
target_include_directories( target_include_directories(
@@ -93,16 +98,25 @@ if (SIST_DEBUG)
PROPERTIES PROPERTIES
OUTPUT_NAME sist2_debug OUTPUT_NAME sist2_debug
) )
elseif (SIST_FAST)
target_compile_options(
sist2
PRIVATE
-Ofast
-march=native
-fno-stack-protector
-fomit-frame-pointer
-freciprocal-math
)
else () else ()
target_compile_options( target_compile_options(
sist2 sist2
PRIVATE PRIVATE
-Ofast -Ofast
#-march=native
-fno-stack-protector -fno-stack-protector
-fomit-frame-pointer -fomit-frame-pointer
#-freciprocal-math
) )
endif () endif ()
@@ -124,13 +138,12 @@ target_link_libraries(
CURL::libcurl CURL::libcurl
pthread pthread
#magic
c c
scan scan
/usr/lib/x86_64-linux-gnu/libmagic.so.1 ${MAGIC_LIB}
) )
add_custom_target( add_custom_target(

View File

@@ -52,7 +52,7 @@ sist2 (Simple incremental search tool)
Select the file corresponding to your CPU architecture and mark the binary as executable with `chmod +x` * Select the file corresponding to your CPU architecture and mark the binary as executable with `chmod +x` *
2. *(or)* Download a [development snapshot](https://files.simon987.net/.gate/sist2/simon987_sist2/) *(Not 2. *(or)* Download a [development snapshot](https://files.simon987.net/.gate/sist2/simon987_sist2/) *(Not
recommended!)* recommended!)*
3. *(or)* `docker pull simon987/sist2:2.11.7-x64-linux` 3. *(or)* `docker pull simon987/sist2:2.12.0-x64-linux`
1. See [Usage guide](docs/USAGE.md) 1. See [Usage guide](docs/USAGE.md)

View File

@@ -3,7 +3,7 @@
"refresh_interval": "30s", "refresh_interval": "30s",
"codec": "best_compression", "codec": "best_compression",
"number_of_replicas": 0, "number_of_replicas": 0,
"highlight.max_analyzed_offset": 10000000 "highlight.max_analyzed_offset": 1000000
}, },
"analysis": { "analysis": {
"tokenizer": { "tokenizer": {

File diff suppressed because one or more lines are too long

View File

@@ -3288,9 +3288,9 @@
} }
}, },
"node_modules/async": { "node_modules/async": {
"version": "2.6.3", "version": "2.6.4",
"resolved": "https://registry.npmjs.org/async/-/async-2.6.3.tgz", "resolved": "https://registry.npmjs.org/async/-/async-2.6.4.tgz",
"integrity": "sha512-zflvls11DCy+dQWzTW2dzuilv8Z5X/pjfmZOWba6TNIVDm+2UDaJmXSOXlasHKfNBs8oo3M0aT50fDEWfKZjXg==", "integrity": "sha512-mzo5dfJYwAn29PeiJ0zvwTo04zj8HDJj0Mn8TD7sno7q12prdbnasKJHhkm2c1LgrhlJ0teaea8860oxi51mGA==",
"dev": true, "dev": true,
"dependencies": { "dependencies": {
"lodash": "^4.17.14" "lodash": "^4.17.14"
@@ -17937,9 +17937,9 @@
"dev": true "dev": true
}, },
"async": { "async": {
"version": "2.6.3", "version": "2.6.4",
"resolved": "https://registry.npmjs.org/async/-/async-2.6.3.tgz", "resolved": "https://registry.npmjs.org/async/-/async-2.6.4.tgz",
"integrity": "sha512-zflvls11DCy+dQWzTW2dzuilv8Z5X/pjfmZOWba6TNIVDm+2UDaJmXSOXlasHKfNBs8oo3M0aT50fDEWfKZjXg==", "integrity": "sha512-mzo5dfJYwAn29PeiJ0zvwTo04zj8HDJj0Mn8TD7sno7q12prdbnasKJHhkm2c1LgrhlJ0teaea8860oxi51mGA==",
"dev": true, "dev": true,
"requires": { "requires": {
"lodash": "^4.17.14" "lodash": "^4.17.14"

View File

@@ -336,10 +336,6 @@ class Sist2Api {
}; };
} }
getDocInfo(docId: string) {
return axios.get(`${this.baseUrl}d/${docId}`);
}
getTags() { getTags() {
return this.esQuery({ return this.esQuery({
aggs: { aggs: {

View File

@@ -210,7 +210,7 @@ class Sist2Query {
}; };
if (!legacyES) { if (!legacyES) {
q.highlight.max_analyzed_offset = 9_999_999; q.highlight.max_analyzed_offset = 999_999;
} }
if (getters.optSearchInPath) { if (getters.optSearchInPath) {

View File

@@ -1,11 +1,13 @@
<template> <template>
<Preloader v-if="loading"></Preloader> <Preloader v-if="loading"></Preloader>
<div v-else-if="content" class="content-div">{{ content }}</div> <div v-else-if="content" class="content-div" v-html="content"></div>
</template> </template>
<script> <script>
import Sist2Api from "@/Sist2Api"; import Sist2Api from "@/Sist2Api";
import Preloader from "@/components/Preloader"; import Preloader from "@/components/Preloader";
import Sist2Query from "@/Sist2Query";
import store from "@/store";
export default { export default {
name: "LazyContentDiv", name: "LazyContentDiv",
@@ -18,10 +20,72 @@ export default {
} }
}, },
mounted() { mounted() {
Sist2Api.getDocInfo(this.docId).then(src => { const query = Sist2Query.searchQuery();
this.content = src.data.content;
if (this.$store.state.optHighlight) {
const fields = this.$store.state.fuzzy
? {"content.nGram": {}}
: {content: {}};
query.highlight = {
pre_tags: ["<mark>"],
post_tags: ["</mark>"],
number_of_fragments: 0,
fields,
};
if (!store.state.sist2Info.esVersionLegacy) {
query.highlight.max_analyzed_offset = 999_999;
}
}
if ("function_score" in query.query) {
query.query = query.query.function_score.query;
}
if (!("must" in query.query.bool)) {
query.query.bool.must = [];
} else if (!Array.isArray(query.query.bool.must)) {
query.query.bool.must = [query.query.bool.must];
}
query.query.bool.must.push({match: {_id: this.docId}});
delete query["sort"];
delete query["aggs"];
delete query["search_after"];
delete query.query["function_score"];
query._source = {
includes: ["content", "name", "path", "extension"]
}
query.size = 1;
Sist2Api.esQuery(query).then(resp => {
this.loading = false; this.loading = false;
}) if (resp.hits.hits.length === 1) {
this.content = this.getContent(resp.hits.hits[0]);
} else {
console.log("FIXME: could not get content")
console.log(resp)
}
});
},
methods: {
getContent(doc) {
if (!doc.highlight) {
return doc._source.content;
}
if (doc.highlight["content.nGram"]) {
return doc.highlight["content.nGram"][0];
}
if (doc.highlight.content) {
return doc.highlight.content[0];
}
}
} }
} }
</script> </script>

View File

@@ -81,7 +81,9 @@ export default {
methods: { methods: {
keyDownListener(e) { keyDownListener(e) {
if (this.$refs.lightbox === undefined) { const isLightboxOpen = this.$refs.lightbox === undefined || this.$refs.lightbox.$el.tagName === undefined;
if (isLightboxOpen) {
return true; return true;
} }

View File

@@ -26,7 +26,6 @@ export default new Vuex.Store({
sortMode: "score", sortMode: "score",
fuzzy: false, fuzzy: false,
size: 60,
optLang: "en", optLang: "en",
optLangIsDefault: true, optLangIsDefault: true,
@@ -34,6 +33,7 @@ export default new Vuex.Store({
optTheme: "light", optTheme: "light",
optDisplay: "grid", optDisplay: "grid",
optSize: 60,
optHighlight: true, optHighlight: true,
optTagOrOperator: false, optTagOrOperator: false,
optFuzzy: true, optFuzzy: true,
@@ -153,7 +153,7 @@ export default new Vuex.Store({
setOptSuggestPath: (state, val) => state.optSuggestPath = val, setOptSuggestPath: (state, val) => state.optSuggestPath = val,
setOptFragmentSize: (state, val) => state.optFragmentSize = val, setOptFragmentSize: (state, val) => state.optFragmentSize = val,
setOptQueryMode: (state, val) => state.optQueryMode = val, setOptQueryMode: (state, val) => state.optQueryMode = val,
setOptResultSize: (state, val) => state.size = val, setOptResultSize: (state, val) => state.optSize = val,
setOptTagOrOperator: (state, val) => state.optTagOrOperator = val, setOptTagOrOperator: (state, val) => state.optTagOrOperator = val,
setOptTreemapType: (state, val) => state.optTreemapType = val, setOptTreemapType: (state, val) => state.optTreemapType = val,
@@ -353,7 +353,7 @@ export default new Vuex.Store({
searchText: state => state.searchText, searchText: state => state.searchText,
pathText: state => state.pathText, pathText: state => state.pathText,
fuzzy: state => state.fuzzy, fuzzy: state => state.fuzzy,
size: state => state.size, size: state => state.optSize,
sortMode: state => state.sortMode, sortMode: state => state.sortMode,
lastQueryResult: state => state.lastQueryResults, lastQueryResult: state => state.lastQueryResults,
lastDoc: function (state): EsHit | null { lastDoc: function (state): EsHit | null {
@@ -391,7 +391,7 @@ export default new Vuex.Store({
optTreemapColor: state => state.optTreemapColor, optTreemapColor: state => state.optTreemapColor,
optLightboxLoadOnlyCurrent: state => state.optLightboxLoadOnlyCurrent, optLightboxLoadOnlyCurrent: state => state.optLightboxLoadOnlyCurrent,
optLightboxSlideDuration: state => state.optLightboxSlideDuration, optLightboxSlideDuration: state => state.optLightboxSlideDuration,
optResultSize: state => state.size, optResultSize: state => state.optSize,
optHideLegacy: state => state.optHideLegacy, optHideLegacy: state => state.optHideLegacy,
optUpdateMimeMap: state => state.optUpdateMimeMap, optUpdateMimeMap: state => state.optUpdateMimeMap,
optUseDatePicker: state => state.optUseDatePicker, optUseDatePicker: state => state.optUseDatePicker,

View File

@@ -208,7 +208,7 @@ export default Vue.extend({
this.$store.commit("setUiReachedScrollEnd", false); this.$store.commit("setUiReachedScrollEnd", false);
}, },
async handleSearch(resp: EsResult) { async handleSearch(resp: EsResult) {
if (resp.hits.hits.length == 0) { if (resp.hits.hits.length == 0 || resp.hits.hits.length < this.$store.state.optSize) {
this.$store.commit("setUiReachedScrollEnd", true); this.$store.commit("setUiReachedScrollEnd", true);
} }
@@ -248,6 +248,8 @@ export default Vue.extend({
this.$store.commit("setLastQueryResult", resp); this.$store.commit("setLastQueryResult", resp);
this.docs.push(...resp.hits.hits); this.docs.push(...resp.hits.hits);
resp.hits.hits.forEach(hit => this.docIds.add(hit._id));
}, },
getDateRange(): Promise<{ min: number, max: number }> { getDateRange(): Promise<{ min: number, max: number }> {
return sist2.esQuery({ return sist2.esQuery({

View File

@@ -81,6 +81,11 @@ void web_args_destroy(web_args_t *args) {
} }
void exec_args_destroy(exec_args_t *args) { void exec_args_destroy(exec_args_t *args) {
if (args->index_path != NULL) {
free(args->index_path);
}
free(args); free(args);
} }

View File

@@ -85,7 +85,7 @@ typedef struct web_args {
typedef struct exec_args { typedef struct exec_args {
char *es_url; char *es_url;
char *es_index; char *es_index;
const char *index_path; char *index_path;
const char *script_path; const char *script_path;
int async_script; int async_script;
char *script; char *script;

View File

@@ -110,16 +110,16 @@ void execute_update_script(const char *script, int async, const char index_id[SI
cJSON *term_obj = cJSON_AddObjectToObject(query, "term"); cJSON *term_obj = cJSON_AddObjectToObject(query, "term");
cJSON_AddStringToObject(term_obj, "index", index_id); cJSON_AddStringToObject(term_obj, "index", index_id);
char *str = cJSON_Print(body); char *str = cJSON_PrintUnformatted(body);
char bulk_url[4096]; char url[4096];
if (async) { if (async) {
snprintf(bulk_url, sizeof(bulk_url), "%s/%s/_update_by_query?wait_for_completion=false", Indexer->es_url, snprintf(url, sizeof(url), "%s/%s/_update_by_query?wait_for_completion=false", Indexer->es_url,
Indexer->es_index); Indexer->es_index);
} else { } else {
snprintf(bulk_url, sizeof(bulk_url), "%s/%s/_update_by_query", Indexer->es_url, Indexer->es_index); snprintf(url, sizeof(url), "%s/%s/_update_by_query", Indexer->es_url, Indexer->es_index);
} }
response_t *r = web_post(bulk_url, str); response_t *r = web_post(url, str);
if (!async) { if (!async) {
LOG_INFOF("elastic.c", "Executed user script <%d>", r->status_code); LOG_INFOF("elastic.c", "Executed user script <%d>", r->status_code);
} }
@@ -139,6 +139,11 @@ void execute_update_script(const char *script, int async, const char index_id[SI
if (async) { if (async) {
cJSON *task = cJSON_GetObjectItem(resp, "task"); cJSON *task = cJSON_GetObjectItem(resp, "task");
if (task == NULL) {
LOG_FATALF("elastic.c", "FIXME: Could not get task id: %s", r->body);
}
LOG_INFOF("elastic.c", "User script queued: %s/_tasks/%s", Indexer->es_url, task->valuestring); LOG_INFOF("elastic.c", "User script queued: %s/_tasks/%s", Indexer->es_url, task->valuestring);
} }

File diff suppressed because one or more lines are too long

View File

@@ -22,7 +22,7 @@ void free_response(response_t *resp) {
free(resp); free(resp);
} }
void web_post_async_poll(subreq_ctx_t* req) { void web_post_async_poll(subreq_ctx_t *req) {
fd_set fdread; fd_set fdread;
fd_set fdwrite; fd_set fdwrite;
fd_set fdexcep; fd_set fdexcep;
@@ -34,7 +34,7 @@ void web_post_async_poll(subreq_ctx_t* req) {
CURLMcode mc = curl_multi_fdset(req->multi, &fdread, &fdwrite, &fdexcep, &maxfd); CURLMcode mc = curl_multi_fdset(req->multi, &fdread, &fdwrite, &fdexcep, &maxfd);
if(mc != CURLM_OK) { if (mc != CURLM_OK) {
req->done = TRUE; req->done = TRUE;
return; return;
} }
@@ -47,7 +47,7 @@ void web_post_async_poll(subreq_ctx_t* req) {
struct timeval timeout = {1, 0}; struct timeval timeout = {1, 0};
int rc = select(maxfd + 1, &fdread, &fdwrite, &fdexcep, &timeout); int rc = select(maxfd + 1, &fdread, &fdwrite, &fdexcep, &timeout);
switch(rc) { switch (rc) {
case -1: case -1:
req->done = TRUE; req->done = TRUE;
break; break;
@@ -142,6 +142,9 @@ response_t *web_post(const char *url, const char *data) {
curl_easy_setopt(curl, CURLOPT_POST, 1); curl_easy_setopt(curl, CURLOPT_POST, 1);
curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2"); curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
char err_buffer[CURL_ERROR_SIZE + 1] = {};
curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, err_buffer);
struct curl_slist *headers = NULL; struct curl_slist *headers = NULL;
headers = curl_slist_append(headers, "Content-Type: application/json"); headers = curl_slist_append(headers, "Content-Type: application/json");
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers); curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
@@ -151,12 +154,16 @@ response_t *web_post(const char *url, const char *data) {
curl_easy_perform(curl); curl_easy_perform(curl);
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &resp->status_code); curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &resp->status_code);
curl_easy_cleanup(curl);
curl_slist_free_all(headers);
resp->body = buffer.buf; resp->body = buffer.buf;
resp->size = buffer.cur; resp->size = buffer.cur;
if (resp->status_code == 0) {
LOG_ERRORF("web.c", "CURL Error: %s", err_buffer)
}
curl_easy_cleanup(curl);
curl_slist_free_all(headers);
return resp; return resp;
} }
@@ -175,7 +182,7 @@ response_t *web_put(const char *url, const char *data) {
curl_easy_setopt(curl, CURLOPT_CUSTOMREQUEST, "PUT"); curl_easy_setopt(curl, CURLOPT_CUSTOMREQUEST, "PUT");
curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2"); curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
curl_easy_setopt(curl, CURLOPT_DNS_USE_GLOBAL_CACHE, 0); curl_easy_setopt(curl, CURLOPT_DNS_USE_GLOBAL_CACHE, 0);
curl_easy_setopt(curl, CURLOPT_IPRESOLVE, CURLOPT_DNS_LOCAL_IP4 ); curl_easy_setopt(curl, CURLOPT_IPRESOLVE, CURLOPT_DNS_LOCAL_IP4);
struct curl_slist *headers = NULL; struct curl_slist *headers = NULL;
headers = curl_slist_append(headers, "Content-Type: application/json"); headers = curl_slist_append(headers, "Content-Type: application/json");

View File

@@ -505,9 +505,9 @@ void incremental_copy_handle_doc(cJSON *document, UNUSED(const char id_str[SIST_
// Copy tn store contents // Copy tn store contents
size_t buf_len; size_t buf_len;
char *buf = store_read(IncrementalCopySourceStore, (char *) doc_id, sizeof(doc_id), &buf_len); char *buf = store_read(IncrementalCopySourceStore, (char *) doc_id, SIST_DOC_ID_LEN, &buf_len);
if (buf_len != 0) { if (buf_len != 0) {
store_write(IncrementalCopyDestinationStore, (char *) doc_id, sizeof(doc_id), buf, buf_len); store_write(IncrementalCopyDestinationStore, (char *) doc_id, SIST_DOC_ID_LEN, buf, buf_len);
free(buf); free(buf);
} }
} }

View File

@@ -42,13 +42,13 @@ index_descriptor_t read_index_descriptor(char *path);
// caller ensures char file_path[PATH_MAX] // caller ensures char file_path[PATH_MAX]
#define READ_INDICES(file_path, index_path, action_ok, action_main_fail, cond_original) \ #define READ_INDICES(file_path, index_path, action_ok, action_main_fail, cond_original) \
snprintf(file_path, PATH_MAX, "%s_index_main.ndjson.zst", index_path); \ snprintf(file_path, PATH_MAX, "%s_index_main.ndjson.zst", index_path); \
if (0 == access(file_path, R_OK)) { \ if (access(file_path, R_OK) == 0) { \
action_ok; \ action_ok; \
} else { \ } else { \
action_main_fail; \ action_main_fail; \
} \ } \
snprintf(file_path, PATH_MAX, "%s_index_original.ndjson.zst", index_path); \ snprintf(file_path, PATH_MAX, "%s_index_original.ndjson.zst", index_path); \
if ((cond_original) && (0 == access(file_path, R_OK))) { \ if ((cond_original) && access(file_path, R_OK) == 0) { \
action_ok; \ action_ok; \
} \ } \

View File

@@ -103,7 +103,7 @@ void sig_handler(int signum) {
exit(-1); exit(-1);
} }
void init_dir(const char *dirpath, scan_args_t* args) { void init_dir(const char *dirpath, scan_args_t *args) {
char path[PATH_MAX]; char path[PATH_MAX];
snprintf(path, PATH_MAX, "%sdescriptor.json", dirpath); snprintf(path, PATH_MAX, "%sdescriptor.json", dirpath);
@@ -112,16 +112,16 @@ void init_dir(const char *dirpath, scan_args_t* args) {
strcpy(ScanCtx.index.desc.type, INDEX_TYPE_NDJSON); strcpy(ScanCtx.index.desc.type, INDEX_TYPE_NDJSON);
if (args->incremental != NULL) { if (args->incremental != NULL) {
// copy old index id // copy old index id
char descriptor_path[PATH_MAX]; char descriptor_path[PATH_MAX];
snprintf(descriptor_path, PATH_MAX, "%sdescriptor.json", args->incremental); snprintf(descriptor_path, PATH_MAX, "%sdescriptor.json", args->incremental);
index_descriptor_t original_desc = read_index_descriptor(descriptor_path); index_descriptor_t original_desc = read_index_descriptor(descriptor_path);
memcpy(ScanCtx.index.desc.id, original_desc.id, sizeof(original_desc.id)); memcpy(ScanCtx.index.desc.id, original_desc.id, sizeof(original_desc.id));
} else { } else {
// generate new index id based on timestamp // generate new index id based on timestamp
unsigned char index_md5[MD5_DIGEST_LENGTH]; unsigned char index_md5[MD5_DIGEST_LENGTH];
MD5((unsigned char *) &ScanCtx.index.desc.timestamp, sizeof(ScanCtx.index.desc.timestamp), index_md5); MD5((unsigned char *) &ScanCtx.index.desc.timestamp, sizeof(ScanCtx.index.desc.timestamp), index_md5);
buf2hex(index_md5, MD5_DIGEST_LENGTH, ScanCtx.index.desc.id); buf2hex(index_md5, MD5_DIGEST_LENGTH, ScanCtx.index.desc.id);
} }
write_index_descriptor(path, &ScanCtx.index.desc); write_index_descriptor(path, &ScanCtx.index.desc);
@@ -324,9 +324,13 @@ void load_incremental_index(const scan_args_t *args) {
LOG_FATALF("main.c", "Version mismatch! Index is %s but executable is %s", original_desc.version, Version) LOG_FATALF("main.c", "Version mismatch! Index is %s but executable is %s", original_desc.version, Version)
} }
READ_INDICES(file_path, args->incremental, incremental_read(ScanCtx.original_table, file_path, &original_desc), READ_INDICES(
LOG_FATALF("main.c", "Could not open original main index for incremental scan: %s", strerror(errno)), file_path,
1); args->incremental,
incremental_read(ScanCtx.original_table, file_path, &original_desc),
LOG_FATALF("main.c", "Could not open original main index for incremental scan: %s", strerror(errno)),
TRUE
);
LOG_INFOF("main.c", "Loaded %d items in to mtime table.", g_hash_table_size(ScanCtx.original_table)) LOG_INFOF("main.c", "Loaded %d items in to mtime table.", g_hash_table_size(ScanCtx.original_table))
} }
@@ -534,6 +538,7 @@ void sist2_exec_script(exec_args_t *args) {
IndexCtx.es_url = args->es_url; IndexCtx.es_url = args->es_url;
IndexCtx.es_index = args->es_index; IndexCtx.es_index = args->es_index;
IndexCtx.needs_es_connection = TRUE;
LOG_DEBUGF("main.c", "descriptor version %s (%s)", desc.version, desc.type) LOG_DEBUGF("main.c", "descriptor version %s (%s)", desc.version, desc.type)
@@ -776,9 +781,8 @@ int main(int argc, const char *argv[]) {
sist2_exec_script(exec_args); sist2_exec_script(exec_args);
} else { } else {
fprintf(stderr, "Invalid command: '%s'\n", argv[0]);
argparse_usage(&argparse); argparse_usage(&argparse);
goto end; LOG_FATALF("main.c", "Invalid command: '%s'\n", argv[0])
} }
printf("\n"); printf("\n");

View File

@@ -27,10 +27,6 @@
#define UNUSED(x) __attribute__((__unused__)) x #define UNUSED(x) __attribute__((__unused__)) x
#define MD5_STR_LENGTH 33
#define SHA1_STR_LENGTH 41
#define SHA1_DIGEST_LENGTH 20
#include "util.h" #include "util.h"
#include "log.h" #include "log.h"
#include "types.h" #include "types.h"

View File

@@ -359,42 +359,6 @@ void index_info(struct mg_connection *nc) {
} }
void document_info(struct mg_connection *nc, struct mg_http_message *hm) {
if (hm->uri.len != SIST_DOC_ID_LEN + 2) {
LOG_DEBUGF("serve.c", "Invalid document_info path: %.*s", (int) hm->uri.len, hm->uri.ptr)
HTTP_REPLY_NOT_FOUND
return;
}
char arg_doc_id[SIST_DOC_ID_LEN];
memcpy(arg_doc_id, hm->uri.ptr + 3, SIST_DOC_ID_LEN);
*(arg_doc_id + SIST_DOC_ID_LEN - 1) = '\0';
cJSON *doc = elastic_get_document(arg_doc_id);
cJSON *source = cJSON_GetObjectItem(doc, "_source");
cJSON *index_id = cJSON_GetObjectItem(source, "index");
if (index_id == NULL) {
cJSON_Delete(doc);
HTTP_REPLY_NOT_FOUND
return;
}
index_t *idx = get_index_by_id(index_id->valuestring);
if (idx == NULL) {
cJSON_Delete(doc);
HTTP_REPLY_NOT_FOUND
return;
}
char *json_str = cJSON_PrintUnformatted(source);
send_response_line(nc, 200, (int) strlen(json_str), "Content-Type: application/json");
mg_send(nc, json_str, (int) strlen(json_str));
free(json_str);
cJSON_Delete(doc);
}
void file(struct mg_connection *nc, struct mg_http_message *hm) { void file(struct mg_connection *nc, struct mg_http_message *hm) {
if (hm->uri.len != SIST_DOC_ID_LEN + 2) { if (hm->uri.len != SIST_DOC_ID_LEN + 2) {
@@ -653,8 +617,6 @@ static void ev_router(struct mg_connection *nc, int ev, void *ev_data, UNUSED(vo
return; return;
} }
tag(nc, hm); tag(nc, hm);
} else if (mg_http_match_uri(hm, "/d/*")) {
document_info(nc, hm);
} else { } else {
HTTP_REPLY_NOT_FOUND HTTP_REPLY_NOT_FOUND
} }

File diff suppressed because one or more lines are too long

View File

@@ -35,10 +35,20 @@ def sist2_index(files, *args):
path = copy_files(files) path = copy_files(files)
shutil.rmtree("test_i", ignore_errors=True) shutil.rmtree("test_i", ignore_errors=True)
sist2("scan", path, "-o", "test_i", *args) sist2("scan", path, "-o", "test_i", "-t12", *args)
return iter(sist2_index_to_dict("test_i")) return iter(sist2_index_to_dict("test_i"))
def get_lmdb_contents(path):
import lmdb
env = lmdb.open(path)
txn = env.begin(write=False)
return dict((k, v) for k, v in txn.cursor())
def sist2_incremental_index(files, func=None, incremental_index=False, *args): def sist2_incremental_index(files, func=None, incremental_index=False, *args):
path = copy_files(files) path = copy_files(files)
@@ -46,7 +56,7 @@ def sist2_incremental_index(files, func=None, incremental_index=False, *args):
func(path) func(path)
shutil.rmtree("test_i_inc", ignore_errors=True) shutil.rmtree("test_i_inc", ignore_errors=True)
sist2("scan", path, "-o", "test_i_inc", "--incremental", "test_i", *args) sist2("scan", path, "-o", "test_i_inc", "--incremental", "test_i", "-t12", *args)
return iter(sist2_index_to_dict("test_i_inc", incremental_index)) return iter(sist2_index_to_dict("test_i_inc", incremental_index))
@@ -76,9 +86,31 @@ class ScanTest(unittest.TestCase):
pass pass
file_count = sum(1 for _ in sist2_index(TEST_FILES)) file_count = sum(1 for _ in sist2_index(TEST_FILES))
self.assertEqual(sum(1 for _ in sist2_incremental_index(TEST_FILES, remove_files)), file_count - 2) lmdb_full = get_lmdb_contents("test_i/thumbs")
self.assertEqual(sum(1 for _ in sist2_incremental_index(TEST_FILES, add_files, incremental_index=True)), 3)
self.assertEqual(sum(1 for _ in sist2_incremental_index(TEST_FILES, add_files)), file_count + 3) # Remove files
num_files_rm1 = len(list(sist2_incremental_index(TEST_FILES, remove_files)))
lmdb_rm1 = get_lmdb_contents("test_i_inc/thumbs")
self.assertEqual(num_files_rm1, file_count - 2)
self.assertEqual(len(set(lmdb_full.keys() - set(lmdb_rm1.keys()))), 2)
# add files (incremental_index=True)
num_files_add_inc = len(list(sist2_incremental_index(TEST_FILES, add_files, incremental_index=True)))
lmdb_add_inc = get_lmdb_contents("test_i_inc/thumbs")
self.assertEqual(num_files_add_inc, 3)
self.assertEqual(set(lmdb_full.keys()), set(lmdb_add_inc.keys()))
# add files
num_files_add = len(list(sist2_incremental_index(TEST_FILES, add_files)))
lmdb_add = get_lmdb_contents("test_i_inc/thumbs")
self.assertEqual(num_files_add, file_count + 3)
self.assertEqual(set(lmdb_full.keys()), set(lmdb_add.keys()))
# (No action)
sist2_incremental_index(TEST_FILES)
lmdb_inc = get_lmdb_contents("test_i_inc/thumbs")
self.assertEqual(set(lmdb_full.keys()), set(lmdb_inc.keys()))
if __name__ == "__main__": if __name__ == "__main__":

View File

@@ -7,6 +7,11 @@ option(BUILD_TESTS "Build tests" on)
add_subdirectory(third-party/antiword) add_subdirectory(third-party/antiword)
set(USE_LIBXML2 OFF CACHE BOOL "" FORCE)
set(USE_XMLWRITER OFF CACHE BOOL "" FORCE)
set(BUILD_SHARED_LIBS OFF CACHE BOOL "" FORCE)
add_subdirectory(third-party/libmobi)
add_library( add_library(
scan scan
libscan/util.c libscan/util.h libscan/util.c libscan/util.h
@@ -42,6 +47,22 @@ if (SIST_DEBUG)
-fsanitize=address -fsanitize=address
-fno-inline -fno-inline
) )
elseif (SIST_FAST)
add_compile_definitions(
antiword
NDEBUG
)
target_compile_options(
scan
PRIVATE
-Ofast
-march=native
-fno-stack-protector
-fomit-frame-pointer
-freciprocal-math
)
else() else()
add_compile_definitions( add_compile_definitions(
antiword antiword
@@ -97,35 +118,15 @@ target_compile_options(
-g -g
) )
include(ExternalProject)
find_program(MAKE_EXE NAMES gmake nmake make)
ExternalProject_Add(
libmobi
GIT_REPOSITORY https://github.com/simon987/libmobi.git
GIT_TAG "public"
UPDATE_COMMAND ""
PATCH_COMMAND ""
TEST_COMMAND ""
CONFIGURE_COMMAND ./autogen.sh && ./configure
INSTALL_COMMAND ""
PREFIX "third-party/ext_libmobi"
SOURCE_DIR "third-party/ext_libmobi/src/libmobi"
BINARY_DIR "third-party/ext_libmobi/src/libmobi"
BUILD_COMMAND ${MAKE_EXE} -j 8 --silent
)
SET(MOBI_LIB_DIR ${CMAKE_CURRENT_BINARY_DIR}/third-party/ext_libmobi/src/libmobi/src/.libs/)
SET(MOBI_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/third-party/ext_libmobi/src/libmobi/src/)
if (SIST_DEBUG) if (SIST_DEBUG)
SET(FFMPEG_DEBUG "--enable-debug=3" "--disable-optimizations") SET(FFMPEG_DEBUG "--enable-debug=3" "--disable-optimizations")
else() else()
SET(FFMPEG_DEBUG "") SET(FFMPEG_DEBUG "")
endif() endif()
include(ExternalProject)
find_program(MAKE_EXE NAMES gmake nmake make)
ExternalProject_Add( ExternalProject_Add(
ffmpeg ffmpeg
GIT_REPOSITORY https://git.ffmpeg.org/ffmpeg.git GIT_REPOSITORY https://git.ffmpeg.org/ffmpeg.git
@@ -171,10 +172,10 @@ SET(WPD_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/third-party/ext_libwpd/src/libwp
add_dependencies( add_dependencies(
scan scan
libmobi
ffmpeg ffmpeg
antiword antiword
libwpd libwpd
mobi
) )
target_link_libraries( target_link_libraries(
@@ -192,8 +193,6 @@ target_link_libraries(
${MUPDF_LIB} ${MUPDF_LIB}
openjp2 openjp2
${MOBI_LIB_DIR}/libmobi.a
${WPD_LIB_DIR}/libwpd-0.9.a ${WPD_LIB_DIR}/libwpd-0.9.a
${WPD_LIB_DIR}/libwpd-stream-0.9.a ${WPD_LIB_DIR}/libwpd-stream-0.9.a
@@ -230,6 +229,7 @@ target_link_libraries(
${GUMBO_LIB} ${GUMBO_LIB}
dl dl
antiword antiword
mobi
unofficial::pcre::pcre unofficial::pcre::pcre16 unofficial::pcre::pcre32 unofficial::pcre::pcrecpp unofficial::pcre::pcre unofficial::pcre::pcre16 unofficial::pcre::pcre32 unofficial::pcre::pcrecpp
) )

View File

@@ -1,6 +1,6 @@
#include "scan_mobi.h" #include "scan_mobi.h"
#include <mobi.h> #include "../../third-party/libmobi/src/mobi.h"
#include <errno.h> #include <errno.h>
#include "stdlib.h" #include "stdlib.h"

View File

@@ -48,7 +48,6 @@ typedef int scan_code_t;
#define CTX_LOG_FATALF(filepath, fmt, ...) ctx->logf(filepath, LEVEL_FATAL, fmt, __VA_ARGS__); exit(-1); #define CTX_LOG_FATALF(filepath, fmt, ...) ctx->logf(filepath, LEVEL_FATAL, fmt, __VA_ARGS__); exit(-1);
#define CTX_LOG_FATAL(filepath, str) ctx->log(filepath, LEVEL_FATAL, str); exit(-1); #define CTX_LOG_FATAL(filepath, str) ctx->log(filepath, LEVEL_FATAL, str); exit(-1);
#define MD5_STR_LENGTH 33
#define SIST_DOC_ID_LEN MD5_STR_LENGTH #define SIST_DOC_ID_LEN MD5_STR_LENGTH
#define SIST_INDEX_ID_LEN MD5_STR_LENGTH #define SIST_INDEX_ID_LEN MD5_STR_LENGTH