mirror of
https://github.com/simon987/sist2.git
synced 2025-12-20 02:26:08 +00:00
Compare commits
16 Commits
50b9201be3
...
2.12.0
| Author | SHA1 | Date | |
|---|---|---|---|
| a74726be55 | |||
|
|
cb228052d2 | ||
| fe56da95d5 | |||
| 9f2ad58f78 | |||
| 84d9bf4323 | |||
| 90aa90f3f3 | |||
| 3fad07360c | |||
|
|
00c3a640d0 | ||
| 730e495bde | |||
| 54df1dfcf7 | |||
| a75675ecea | |||
| 901035da15 | |||
| ceb7265639 | |||
| 036ed9ea1e | |||
| 779303a2f7 | |||
| 23aee14c07 |
3
.gitmodules
vendored
3
.gitmodules
vendored
@@ -7,3 +7,6 @@
|
||||
[submodule "third-party/libscan/third-party/antiword"]
|
||||
path = third-party/libscan/third-party/antiword
|
||||
url = https://github.com/simon987/antiword
|
||||
[submodule "third-party/libscan/third-party/libmobi"]
|
||||
path = third-party/libscan/third-party/libmobi
|
||||
url = https://github.com/bfabiszewski/libmobi
|
||||
|
||||
@@ -4,6 +4,7 @@ set(CMAKE_C_STANDARD 11)
|
||||
project(sist2 C)
|
||||
|
||||
option(SIST_DEBUG "Build a debug executable" on)
|
||||
option(SIST_FAST "Enable more optimisation flags" off)
|
||||
option(SIST_FAKE_STORE "Disable IO operations of LMDB stores for debugging purposes" 0)
|
||||
|
||||
add_compile_definitions(
|
||||
@@ -54,6 +55,10 @@ find_package(lmdb CONFIG REQUIRED)
|
||||
find_package(cJSON CONFIG REQUIRED)
|
||||
find_package(unofficial-mongoose CONFIG REQUIRED)
|
||||
find_package(CURL CONFIG REQUIRED)
|
||||
find_library(MAGIC_LIB
|
||||
NAMES libmagic.so.1 magic
|
||||
PATHS /usr/lib/x86_64-linux-gnu/ /usr/lib/aarch64-linux-gnu/
|
||||
)
|
||||
|
||||
|
||||
target_include_directories(
|
||||
@@ -93,16 +98,25 @@ if (SIST_DEBUG)
|
||||
PROPERTIES
|
||||
OUTPUT_NAME sist2_debug
|
||||
)
|
||||
elseif (SIST_FAST)
|
||||
target_compile_options(
|
||||
sist2
|
||||
PRIVATE
|
||||
|
||||
-Ofast
|
||||
-march=native
|
||||
-fno-stack-protector
|
||||
-fomit-frame-pointer
|
||||
-freciprocal-math
|
||||
)
|
||||
else ()
|
||||
target_compile_options(
|
||||
sist2
|
||||
PRIVATE
|
||||
|
||||
-Ofast
|
||||
#-march=native
|
||||
-fno-stack-protector
|
||||
-fomit-frame-pointer
|
||||
#-freciprocal-math
|
||||
)
|
||||
endif ()
|
||||
|
||||
@@ -124,13 +138,12 @@ target_link_libraries(
|
||||
CURL::libcurl
|
||||
|
||||
pthread
|
||||
#magic
|
||||
|
||||
c
|
||||
|
||||
scan
|
||||
|
||||
/usr/lib/x86_64-linux-gnu/libmagic.so.1
|
||||
${MAGIC_LIB}
|
||||
)
|
||||
|
||||
add_custom_target(
|
||||
|
||||
@@ -52,7 +52,7 @@ sist2 (Simple incremental search tool)
|
||||
Select the file corresponding to your CPU architecture and mark the binary as executable with `chmod +x` *
|
||||
2. *(or)* Download a [development snapshot](https://files.simon987.net/.gate/sist2/simon987_sist2/) *(Not
|
||||
recommended!)*
|
||||
3. *(or)* `docker pull simon987/sist2:2.11.7-x64-linux`
|
||||
3. *(or)* `docker pull simon987/sist2:2.12.0-x64-linux`
|
||||
|
||||
1. See [Usage guide](docs/USAGE.md)
|
||||
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
"refresh_interval": "30s",
|
||||
"codec": "best_compression",
|
||||
"number_of_replicas": 0,
|
||||
"highlight.max_analyzed_offset": 10000000
|
||||
"highlight.max_analyzed_offset": 1000000
|
||||
},
|
||||
"analysis": {
|
||||
"tokenizer": {
|
||||
|
||||
2
sist2-vue/dist/js/index.js
vendored
2
sist2-vue/dist/js/index.js
vendored
File diff suppressed because one or more lines are too long
12
sist2-vue/package-lock.json
generated
12
sist2-vue/package-lock.json
generated
@@ -3288,9 +3288,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/async": {
|
||||
"version": "2.6.3",
|
||||
"resolved": "https://registry.npmjs.org/async/-/async-2.6.3.tgz",
|
||||
"integrity": "sha512-zflvls11DCy+dQWzTW2dzuilv8Z5X/pjfmZOWba6TNIVDm+2UDaJmXSOXlasHKfNBs8oo3M0aT50fDEWfKZjXg==",
|
||||
"version": "2.6.4",
|
||||
"resolved": "https://registry.npmjs.org/async/-/async-2.6.4.tgz",
|
||||
"integrity": "sha512-mzo5dfJYwAn29PeiJ0zvwTo04zj8HDJj0Mn8TD7sno7q12prdbnasKJHhkm2c1LgrhlJ0teaea8860oxi51mGA==",
|
||||
"dev": true,
|
||||
"dependencies": {
|
||||
"lodash": "^4.17.14"
|
||||
@@ -17937,9 +17937,9 @@
|
||||
"dev": true
|
||||
},
|
||||
"async": {
|
||||
"version": "2.6.3",
|
||||
"resolved": "https://registry.npmjs.org/async/-/async-2.6.3.tgz",
|
||||
"integrity": "sha512-zflvls11DCy+dQWzTW2dzuilv8Z5X/pjfmZOWba6TNIVDm+2UDaJmXSOXlasHKfNBs8oo3M0aT50fDEWfKZjXg==",
|
||||
"version": "2.6.4",
|
||||
"resolved": "https://registry.npmjs.org/async/-/async-2.6.4.tgz",
|
||||
"integrity": "sha512-mzo5dfJYwAn29PeiJ0zvwTo04zj8HDJj0Mn8TD7sno7q12prdbnasKJHhkm2c1LgrhlJ0teaea8860oxi51mGA==",
|
||||
"dev": true,
|
||||
"requires": {
|
||||
"lodash": "^4.17.14"
|
||||
|
||||
@@ -336,10 +336,6 @@ class Sist2Api {
|
||||
};
|
||||
}
|
||||
|
||||
getDocInfo(docId: string) {
|
||||
return axios.get(`${this.baseUrl}d/${docId}`);
|
||||
}
|
||||
|
||||
getTags() {
|
||||
return this.esQuery({
|
||||
aggs: {
|
||||
|
||||
@@ -210,7 +210,7 @@ class Sist2Query {
|
||||
};
|
||||
|
||||
if (!legacyES) {
|
||||
q.highlight.max_analyzed_offset = 9_999_999;
|
||||
q.highlight.max_analyzed_offset = 999_999;
|
||||
}
|
||||
|
||||
if (getters.optSearchInPath) {
|
||||
|
||||
@@ -1,11 +1,13 @@
|
||||
<template>
|
||||
<Preloader v-if="loading"></Preloader>
|
||||
<div v-else-if="content" class="content-div">{{ content }}</div>
|
||||
<div v-else-if="content" class="content-div" v-html="content"></div>
|
||||
</template>
|
||||
|
||||
<script>
|
||||
import Sist2Api from "@/Sist2Api";
|
||||
import Preloader from "@/components/Preloader";
|
||||
import Sist2Query from "@/Sist2Query";
|
||||
import store from "@/store";
|
||||
|
||||
export default {
|
||||
name: "LazyContentDiv",
|
||||
@@ -18,10 +20,72 @@ export default {
|
||||
}
|
||||
},
|
||||
mounted() {
|
||||
Sist2Api.getDocInfo(this.docId).then(src => {
|
||||
this.content = src.data.content;
|
||||
const query = Sist2Query.searchQuery();
|
||||
|
||||
if (this.$store.state.optHighlight) {
|
||||
|
||||
const fields = this.$store.state.fuzzy
|
||||
? {"content.nGram": {}}
|
||||
: {content: {}};
|
||||
|
||||
query.highlight = {
|
||||
pre_tags: ["<mark>"],
|
||||
post_tags: ["</mark>"],
|
||||
number_of_fragments: 0,
|
||||
fields,
|
||||
};
|
||||
|
||||
if (!store.state.sist2Info.esVersionLegacy) {
|
||||
query.highlight.max_analyzed_offset = 999_999;
|
||||
}
|
||||
}
|
||||
|
||||
if ("function_score" in query.query) {
|
||||
query.query = query.query.function_score.query;
|
||||
}
|
||||
|
||||
if (!("must" in query.query.bool)) {
|
||||
query.query.bool.must = [];
|
||||
} else if (!Array.isArray(query.query.bool.must)) {
|
||||
query.query.bool.must = [query.query.bool.must];
|
||||
}
|
||||
|
||||
query.query.bool.must.push({match: {_id: this.docId}});
|
||||
|
||||
delete query["sort"];
|
||||
delete query["aggs"];
|
||||
delete query["search_after"];
|
||||
delete query.query["function_score"];
|
||||
|
||||
query._source = {
|
||||
includes: ["content", "name", "path", "extension"]
|
||||
}
|
||||
|
||||
query.size = 1;
|
||||
|
||||
Sist2Api.esQuery(query).then(resp => {
|
||||
this.loading = false;
|
||||
})
|
||||
if (resp.hits.hits.length === 1) {
|
||||
this.content = this.getContent(resp.hits.hits[0]);
|
||||
} else {
|
||||
console.log("FIXME: could not get content")
|
||||
console.log(resp)
|
||||
}
|
||||
});
|
||||
},
|
||||
methods: {
|
||||
getContent(doc) {
|
||||
if (!doc.highlight) {
|
||||
return doc._source.content;
|
||||
}
|
||||
|
||||
if (doc.highlight["content.nGram"]) {
|
||||
return doc.highlight["content.nGram"][0];
|
||||
}
|
||||
if (doc.highlight.content) {
|
||||
return doc.highlight.content[0];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
</script>
|
||||
|
||||
@@ -81,7 +81,9 @@ export default {
|
||||
methods: {
|
||||
keyDownListener(e) {
|
||||
|
||||
if (this.$refs.lightbox === undefined) {
|
||||
const isLightboxOpen = this.$refs.lightbox === undefined || this.$refs.lightbox.$el.tagName === undefined;
|
||||
|
||||
if (isLightboxOpen) {
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
@@ -26,7 +26,6 @@ export default new Vuex.Store({
|
||||
sortMode: "score",
|
||||
|
||||
fuzzy: false,
|
||||
size: 60,
|
||||
|
||||
optLang: "en",
|
||||
optLangIsDefault: true,
|
||||
@@ -34,6 +33,7 @@ export default new Vuex.Store({
|
||||
optTheme: "light",
|
||||
optDisplay: "grid",
|
||||
|
||||
optSize: 60,
|
||||
optHighlight: true,
|
||||
optTagOrOperator: false,
|
||||
optFuzzy: true,
|
||||
@@ -153,7 +153,7 @@ export default new Vuex.Store({
|
||||
setOptSuggestPath: (state, val) => state.optSuggestPath = val,
|
||||
setOptFragmentSize: (state, val) => state.optFragmentSize = val,
|
||||
setOptQueryMode: (state, val) => state.optQueryMode = val,
|
||||
setOptResultSize: (state, val) => state.size = val,
|
||||
setOptResultSize: (state, val) => state.optSize = val,
|
||||
setOptTagOrOperator: (state, val) => state.optTagOrOperator = val,
|
||||
|
||||
setOptTreemapType: (state, val) => state.optTreemapType = val,
|
||||
@@ -353,7 +353,7 @@ export default new Vuex.Store({
|
||||
searchText: state => state.searchText,
|
||||
pathText: state => state.pathText,
|
||||
fuzzy: state => state.fuzzy,
|
||||
size: state => state.size,
|
||||
size: state => state.optSize,
|
||||
sortMode: state => state.sortMode,
|
||||
lastQueryResult: state => state.lastQueryResults,
|
||||
lastDoc: function (state): EsHit | null {
|
||||
@@ -391,7 +391,7 @@ export default new Vuex.Store({
|
||||
optTreemapColor: state => state.optTreemapColor,
|
||||
optLightboxLoadOnlyCurrent: state => state.optLightboxLoadOnlyCurrent,
|
||||
optLightboxSlideDuration: state => state.optLightboxSlideDuration,
|
||||
optResultSize: state => state.size,
|
||||
optResultSize: state => state.optSize,
|
||||
optHideLegacy: state => state.optHideLegacy,
|
||||
optUpdateMimeMap: state => state.optUpdateMimeMap,
|
||||
optUseDatePicker: state => state.optUseDatePicker,
|
||||
|
||||
@@ -208,7 +208,7 @@ export default Vue.extend({
|
||||
this.$store.commit("setUiReachedScrollEnd", false);
|
||||
},
|
||||
async handleSearch(resp: EsResult) {
|
||||
if (resp.hits.hits.length == 0) {
|
||||
if (resp.hits.hits.length == 0 || resp.hits.hits.length < this.$store.state.optSize) {
|
||||
this.$store.commit("setUiReachedScrollEnd", true);
|
||||
}
|
||||
|
||||
@@ -248,6 +248,8 @@ export default Vue.extend({
|
||||
this.$store.commit("setLastQueryResult", resp);
|
||||
|
||||
this.docs.push(...resp.hits.hits);
|
||||
|
||||
resp.hits.hits.forEach(hit => this.docIds.add(hit._id));
|
||||
},
|
||||
getDateRange(): Promise<{ min: number, max: number }> {
|
||||
return sist2.esQuery({
|
||||
|
||||
@@ -81,6 +81,11 @@ void web_args_destroy(web_args_t *args) {
|
||||
}
|
||||
|
||||
void exec_args_destroy(exec_args_t *args) {
|
||||
|
||||
if (args->index_path != NULL) {
|
||||
free(args->index_path);
|
||||
}
|
||||
|
||||
free(args);
|
||||
}
|
||||
|
||||
|
||||
@@ -85,7 +85,7 @@ typedef struct web_args {
|
||||
typedef struct exec_args {
|
||||
char *es_url;
|
||||
char *es_index;
|
||||
const char *index_path;
|
||||
char *index_path;
|
||||
const char *script_path;
|
||||
int async_script;
|
||||
char *script;
|
||||
|
||||
@@ -110,16 +110,16 @@ void execute_update_script(const char *script, int async, const char index_id[SI
|
||||
cJSON *term_obj = cJSON_AddObjectToObject(query, "term");
|
||||
cJSON_AddStringToObject(term_obj, "index", index_id);
|
||||
|
||||
char *str = cJSON_Print(body);
|
||||
char *str = cJSON_PrintUnformatted(body);
|
||||
|
||||
char bulk_url[4096];
|
||||
char url[4096];
|
||||
if (async) {
|
||||
snprintf(bulk_url, sizeof(bulk_url), "%s/%s/_update_by_query?wait_for_completion=false", Indexer->es_url,
|
||||
snprintf(url, sizeof(url), "%s/%s/_update_by_query?wait_for_completion=false", Indexer->es_url,
|
||||
Indexer->es_index);
|
||||
} else {
|
||||
snprintf(bulk_url, sizeof(bulk_url), "%s/%s/_update_by_query", Indexer->es_url, Indexer->es_index);
|
||||
snprintf(url, sizeof(url), "%s/%s/_update_by_query", Indexer->es_url, Indexer->es_index);
|
||||
}
|
||||
response_t *r = web_post(bulk_url, str);
|
||||
response_t *r = web_post(url, str);
|
||||
if (!async) {
|
||||
LOG_INFOF("elastic.c", "Executed user script <%d>", r->status_code);
|
||||
}
|
||||
@@ -139,6 +139,11 @@ void execute_update_script(const char *script, int async, const char index_id[SI
|
||||
|
||||
if (async) {
|
||||
cJSON *task = cJSON_GetObjectItem(resp, "task");
|
||||
|
||||
if (task == NULL) {
|
||||
LOG_FATALF("elastic.c", "FIXME: Could not get task id: %s", r->body);
|
||||
}
|
||||
|
||||
LOG_INFOF("elastic.c", "User script queued: %s/_tasks/%s", Indexer->es_url, task->valuestring);
|
||||
}
|
||||
|
||||
|
||||
2
src/index/static_generated.c
vendored
2
src/index/static_generated.c
vendored
File diff suppressed because one or more lines are too long
@@ -22,7 +22,7 @@ void free_response(response_t *resp) {
|
||||
free(resp);
|
||||
}
|
||||
|
||||
void web_post_async_poll(subreq_ctx_t* req) {
|
||||
void web_post_async_poll(subreq_ctx_t *req) {
|
||||
fd_set fdread;
|
||||
fd_set fdwrite;
|
||||
fd_set fdexcep;
|
||||
@@ -34,7 +34,7 @@ void web_post_async_poll(subreq_ctx_t* req) {
|
||||
|
||||
CURLMcode mc = curl_multi_fdset(req->multi, &fdread, &fdwrite, &fdexcep, &maxfd);
|
||||
|
||||
if(mc != CURLM_OK) {
|
||||
if (mc != CURLM_OK) {
|
||||
req->done = TRUE;
|
||||
return;
|
||||
}
|
||||
@@ -47,7 +47,7 @@ void web_post_async_poll(subreq_ctx_t* req) {
|
||||
struct timeval timeout = {1, 0};
|
||||
int rc = select(maxfd + 1, &fdread, &fdwrite, &fdexcep, &timeout);
|
||||
|
||||
switch(rc) {
|
||||
switch (rc) {
|
||||
case -1:
|
||||
req->done = TRUE;
|
||||
break;
|
||||
@@ -142,6 +142,9 @@ response_t *web_post(const char *url, const char *data) {
|
||||
curl_easy_setopt(curl, CURLOPT_POST, 1);
|
||||
curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
|
||||
|
||||
char err_buffer[CURL_ERROR_SIZE + 1] = {};
|
||||
curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, err_buffer);
|
||||
|
||||
struct curl_slist *headers = NULL;
|
||||
headers = curl_slist_append(headers, "Content-Type: application/json");
|
||||
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
|
||||
@@ -151,12 +154,16 @@ response_t *web_post(const char *url, const char *data) {
|
||||
curl_easy_perform(curl);
|
||||
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &resp->status_code);
|
||||
|
||||
curl_easy_cleanup(curl);
|
||||
curl_slist_free_all(headers);
|
||||
|
||||
resp->body = buffer.buf;
|
||||
resp->size = buffer.cur;
|
||||
|
||||
if (resp->status_code == 0) {
|
||||
LOG_ERRORF("web.c", "CURL Error: %s", err_buffer)
|
||||
}
|
||||
|
||||
curl_easy_cleanup(curl);
|
||||
curl_slist_free_all(headers);
|
||||
|
||||
return resp;
|
||||
}
|
||||
|
||||
@@ -175,7 +182,7 @@ response_t *web_put(const char *url, const char *data) {
|
||||
curl_easy_setopt(curl, CURLOPT_CUSTOMREQUEST, "PUT");
|
||||
curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
|
||||
curl_easy_setopt(curl, CURLOPT_DNS_USE_GLOBAL_CACHE, 0);
|
||||
curl_easy_setopt(curl, CURLOPT_IPRESOLVE, CURLOPT_DNS_LOCAL_IP4 );
|
||||
curl_easy_setopt(curl, CURLOPT_IPRESOLVE, CURLOPT_DNS_LOCAL_IP4);
|
||||
|
||||
struct curl_slist *headers = NULL;
|
||||
headers = curl_slist_append(headers, "Content-Type: application/json");
|
||||
|
||||
@@ -505,9 +505,9 @@ void incremental_copy_handle_doc(cJSON *document, UNUSED(const char id_str[SIST_
|
||||
|
||||
// Copy tn store contents
|
||||
size_t buf_len;
|
||||
char *buf = store_read(IncrementalCopySourceStore, (char *) doc_id, sizeof(doc_id), &buf_len);
|
||||
char *buf = store_read(IncrementalCopySourceStore, (char *) doc_id, SIST_DOC_ID_LEN, &buf_len);
|
||||
if (buf_len != 0) {
|
||||
store_write(IncrementalCopyDestinationStore, (char *) doc_id, sizeof(doc_id), buf, buf_len);
|
||||
store_write(IncrementalCopyDestinationStore, (char *) doc_id, SIST_DOC_ID_LEN, buf, buf_len);
|
||||
free(buf);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -42,13 +42,13 @@ index_descriptor_t read_index_descriptor(char *path);
|
||||
// caller ensures char file_path[PATH_MAX]
|
||||
#define READ_INDICES(file_path, index_path, action_ok, action_main_fail, cond_original) \
|
||||
snprintf(file_path, PATH_MAX, "%s_index_main.ndjson.zst", index_path); \
|
||||
if (0 == access(file_path, R_OK)) { \
|
||||
if (access(file_path, R_OK) == 0) { \
|
||||
action_ok; \
|
||||
} else { \
|
||||
action_main_fail; \
|
||||
} \
|
||||
snprintf(file_path, PATH_MAX, "%s_index_original.ndjson.zst", index_path); \
|
||||
if ((cond_original) && (0 == access(file_path, R_OK))) { \
|
||||
if ((cond_original) && access(file_path, R_OK) == 0) { \
|
||||
action_ok; \
|
||||
} \
|
||||
|
||||
|
||||
34
src/main.c
34
src/main.c
@@ -103,7 +103,7 @@ void sig_handler(int signum) {
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
void init_dir(const char *dirpath, scan_args_t* args) {
|
||||
void init_dir(const char *dirpath, scan_args_t *args) {
|
||||
char path[PATH_MAX];
|
||||
snprintf(path, PATH_MAX, "%sdescriptor.json", dirpath);
|
||||
|
||||
@@ -112,16 +112,16 @@ void init_dir(const char *dirpath, scan_args_t* args) {
|
||||
strcpy(ScanCtx.index.desc.type, INDEX_TYPE_NDJSON);
|
||||
|
||||
if (args->incremental != NULL) {
|
||||
// copy old index id
|
||||
char descriptor_path[PATH_MAX];
|
||||
snprintf(descriptor_path, PATH_MAX, "%sdescriptor.json", args->incremental);
|
||||
index_descriptor_t original_desc = read_index_descriptor(descriptor_path);
|
||||
memcpy(ScanCtx.index.desc.id, original_desc.id, sizeof(original_desc.id));
|
||||
// copy old index id
|
||||
char descriptor_path[PATH_MAX];
|
||||
snprintf(descriptor_path, PATH_MAX, "%sdescriptor.json", args->incremental);
|
||||
index_descriptor_t original_desc = read_index_descriptor(descriptor_path);
|
||||
memcpy(ScanCtx.index.desc.id, original_desc.id, sizeof(original_desc.id));
|
||||
} else {
|
||||
// generate new index id based on timestamp
|
||||
unsigned char index_md5[MD5_DIGEST_LENGTH];
|
||||
MD5((unsigned char *) &ScanCtx.index.desc.timestamp, sizeof(ScanCtx.index.desc.timestamp), index_md5);
|
||||
buf2hex(index_md5, MD5_DIGEST_LENGTH, ScanCtx.index.desc.id);
|
||||
// generate new index id based on timestamp
|
||||
unsigned char index_md5[MD5_DIGEST_LENGTH];
|
||||
MD5((unsigned char *) &ScanCtx.index.desc.timestamp, sizeof(ScanCtx.index.desc.timestamp), index_md5);
|
||||
buf2hex(index_md5, MD5_DIGEST_LENGTH, ScanCtx.index.desc.id);
|
||||
}
|
||||
|
||||
write_index_descriptor(path, &ScanCtx.index.desc);
|
||||
@@ -324,9 +324,13 @@ void load_incremental_index(const scan_args_t *args) {
|
||||
LOG_FATALF("main.c", "Version mismatch! Index is %s but executable is %s", original_desc.version, Version)
|
||||
}
|
||||
|
||||
READ_INDICES(file_path, args->incremental, incremental_read(ScanCtx.original_table, file_path, &original_desc),
|
||||
LOG_FATALF("main.c", "Could not open original main index for incremental scan: %s", strerror(errno)),
|
||||
1);
|
||||
READ_INDICES(
|
||||
file_path,
|
||||
args->incremental,
|
||||
incremental_read(ScanCtx.original_table, file_path, &original_desc),
|
||||
LOG_FATALF("main.c", "Could not open original main index for incremental scan: %s", strerror(errno)),
|
||||
TRUE
|
||||
);
|
||||
|
||||
LOG_INFOF("main.c", "Loaded %d items in to mtime table.", g_hash_table_size(ScanCtx.original_table))
|
||||
}
|
||||
@@ -534,6 +538,7 @@ void sist2_exec_script(exec_args_t *args) {
|
||||
|
||||
IndexCtx.es_url = args->es_url;
|
||||
IndexCtx.es_index = args->es_index;
|
||||
IndexCtx.needs_es_connection = TRUE;
|
||||
|
||||
LOG_DEBUGF("main.c", "descriptor version %s (%s)", desc.version, desc.type)
|
||||
|
||||
@@ -776,9 +781,8 @@ int main(int argc, const char *argv[]) {
|
||||
sist2_exec_script(exec_args);
|
||||
|
||||
} else {
|
||||
fprintf(stderr, "Invalid command: '%s'\n", argv[0]);
|
||||
argparse_usage(&argparse);
|
||||
goto end;
|
||||
LOG_FATALF("main.c", "Invalid command: '%s'\n", argv[0])
|
||||
}
|
||||
printf("\n");
|
||||
|
||||
|
||||
@@ -27,10 +27,6 @@
|
||||
|
||||
#define UNUSED(x) __attribute__((__unused__)) x
|
||||
|
||||
#define MD5_STR_LENGTH 33
|
||||
#define SHA1_STR_LENGTH 41
|
||||
#define SHA1_DIGEST_LENGTH 20
|
||||
|
||||
#include "util.h"
|
||||
#include "log.h"
|
||||
#include "types.h"
|
||||
|
||||
@@ -359,42 +359,6 @@ void index_info(struct mg_connection *nc) {
|
||||
}
|
||||
|
||||
|
||||
void document_info(struct mg_connection *nc, struct mg_http_message *hm) {
|
||||
|
||||
if (hm->uri.len != SIST_DOC_ID_LEN + 2) {
|
||||
LOG_DEBUGF("serve.c", "Invalid document_info path: %.*s", (int) hm->uri.len, hm->uri.ptr)
|
||||
HTTP_REPLY_NOT_FOUND
|
||||
return;
|
||||
}
|
||||
|
||||
char arg_doc_id[SIST_DOC_ID_LEN];
|
||||
memcpy(arg_doc_id, hm->uri.ptr + 3, SIST_DOC_ID_LEN);
|
||||
*(arg_doc_id + SIST_DOC_ID_LEN - 1) = '\0';
|
||||
|
||||
cJSON *doc = elastic_get_document(arg_doc_id);
|
||||
cJSON *source = cJSON_GetObjectItem(doc, "_source");
|
||||
|
||||
cJSON *index_id = cJSON_GetObjectItem(source, "index");
|
||||
if (index_id == NULL) {
|
||||
cJSON_Delete(doc);
|
||||
HTTP_REPLY_NOT_FOUND
|
||||
return;
|
||||
}
|
||||
|
||||
index_t *idx = get_index_by_id(index_id->valuestring);
|
||||
if (idx == NULL) {
|
||||
cJSON_Delete(doc);
|
||||
HTTP_REPLY_NOT_FOUND
|
||||
return;
|
||||
}
|
||||
|
||||
char *json_str = cJSON_PrintUnformatted(source);
|
||||
send_response_line(nc, 200, (int) strlen(json_str), "Content-Type: application/json");
|
||||
mg_send(nc, json_str, (int) strlen(json_str));
|
||||
free(json_str);
|
||||
cJSON_Delete(doc);
|
||||
}
|
||||
|
||||
void file(struct mg_connection *nc, struct mg_http_message *hm) {
|
||||
|
||||
if (hm->uri.len != SIST_DOC_ID_LEN + 2) {
|
||||
@@ -653,8 +617,6 @@ static void ev_router(struct mg_connection *nc, int ev, void *ev_data, UNUSED(vo
|
||||
return;
|
||||
}
|
||||
tag(nc, hm);
|
||||
} else if (mg_http_match_uri(hm, "/d/*")) {
|
||||
document_info(nc, hm);
|
||||
} else {
|
||||
HTTP_REPLY_NOT_FOUND
|
||||
}
|
||||
|
||||
2
src/web/static_generated.c
vendored
2
src/web/static_generated.c
vendored
File diff suppressed because one or more lines are too long
@@ -35,10 +35,20 @@ def sist2_index(files, *args):
|
||||
path = copy_files(files)
|
||||
|
||||
shutil.rmtree("test_i", ignore_errors=True)
|
||||
sist2("scan", path, "-o", "test_i", *args)
|
||||
sist2("scan", path, "-o", "test_i", "-t12", *args)
|
||||
return iter(sist2_index_to_dict("test_i"))
|
||||
|
||||
|
||||
def get_lmdb_contents(path):
|
||||
import lmdb
|
||||
|
||||
env = lmdb.open(path)
|
||||
|
||||
txn = env.begin(write=False)
|
||||
|
||||
return dict((k, v) for k, v in txn.cursor())
|
||||
|
||||
|
||||
def sist2_incremental_index(files, func=None, incremental_index=False, *args):
|
||||
path = copy_files(files)
|
||||
|
||||
@@ -46,7 +56,7 @@ def sist2_incremental_index(files, func=None, incremental_index=False, *args):
|
||||
func(path)
|
||||
|
||||
shutil.rmtree("test_i_inc", ignore_errors=True)
|
||||
sist2("scan", path, "-o", "test_i_inc", "--incremental", "test_i", *args)
|
||||
sist2("scan", path, "-o", "test_i_inc", "--incremental", "test_i", "-t12", *args)
|
||||
return iter(sist2_index_to_dict("test_i_inc", incremental_index))
|
||||
|
||||
|
||||
@@ -76,9 +86,31 @@ class ScanTest(unittest.TestCase):
|
||||
pass
|
||||
|
||||
file_count = sum(1 for _ in sist2_index(TEST_FILES))
|
||||
self.assertEqual(sum(1 for _ in sist2_incremental_index(TEST_FILES, remove_files)), file_count - 2)
|
||||
self.assertEqual(sum(1 for _ in sist2_incremental_index(TEST_FILES, add_files, incremental_index=True)), 3)
|
||||
self.assertEqual(sum(1 for _ in sist2_incremental_index(TEST_FILES, add_files)), file_count + 3)
|
||||
lmdb_full = get_lmdb_contents("test_i/thumbs")
|
||||
|
||||
# Remove files
|
||||
num_files_rm1 = len(list(sist2_incremental_index(TEST_FILES, remove_files)))
|
||||
lmdb_rm1 = get_lmdb_contents("test_i_inc/thumbs")
|
||||
self.assertEqual(num_files_rm1, file_count - 2)
|
||||
self.assertEqual(len(set(lmdb_full.keys() - set(lmdb_rm1.keys()))), 2)
|
||||
|
||||
# add files (incremental_index=True)
|
||||
num_files_add_inc = len(list(sist2_incremental_index(TEST_FILES, add_files, incremental_index=True)))
|
||||
lmdb_add_inc = get_lmdb_contents("test_i_inc/thumbs")
|
||||
self.assertEqual(num_files_add_inc, 3)
|
||||
self.assertEqual(set(lmdb_full.keys()), set(lmdb_add_inc.keys()))
|
||||
|
||||
# add files
|
||||
num_files_add = len(list(sist2_incremental_index(TEST_FILES, add_files)))
|
||||
lmdb_add = get_lmdb_contents("test_i_inc/thumbs")
|
||||
self.assertEqual(num_files_add, file_count + 3)
|
||||
self.assertEqual(set(lmdb_full.keys()), set(lmdb_add.keys()))
|
||||
|
||||
# (No action)
|
||||
sist2_incremental_index(TEST_FILES)
|
||||
lmdb_inc = get_lmdb_contents("test_i_inc/thumbs")
|
||||
|
||||
self.assertEqual(set(lmdb_full.keys()), set(lmdb_inc.keys()))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
52
third-party/libscan/CMakeLists.txt
vendored
52
third-party/libscan/CMakeLists.txt
vendored
@@ -7,6 +7,11 @@ option(BUILD_TESTS "Build tests" on)
|
||||
|
||||
add_subdirectory(third-party/antiword)
|
||||
|
||||
set(USE_LIBXML2 OFF CACHE BOOL "" FORCE)
|
||||
set(USE_XMLWRITER OFF CACHE BOOL "" FORCE)
|
||||
set(BUILD_SHARED_LIBS OFF CACHE BOOL "" FORCE)
|
||||
add_subdirectory(third-party/libmobi)
|
||||
|
||||
add_library(
|
||||
scan
|
||||
libscan/util.c libscan/util.h
|
||||
@@ -42,6 +47,22 @@ if (SIST_DEBUG)
|
||||
-fsanitize=address
|
||||
-fno-inline
|
||||
)
|
||||
elseif (SIST_FAST)
|
||||
add_compile_definitions(
|
||||
antiword
|
||||
NDEBUG
|
||||
)
|
||||
|
||||
target_compile_options(
|
||||
scan
|
||||
PRIVATE
|
||||
|
||||
-Ofast
|
||||
-march=native
|
||||
-fno-stack-protector
|
||||
-fomit-frame-pointer
|
||||
-freciprocal-math
|
||||
)
|
||||
else()
|
||||
add_compile_definitions(
|
||||
antiword
|
||||
@@ -97,35 +118,15 @@ target_compile_options(
|
||||
-g
|
||||
)
|
||||
|
||||
include(ExternalProject)
|
||||
find_program(MAKE_EXE NAMES gmake nmake make)
|
||||
ExternalProject_Add(
|
||||
libmobi
|
||||
GIT_REPOSITORY https://github.com/simon987/libmobi.git
|
||||
GIT_TAG "public"
|
||||
|
||||
UPDATE_COMMAND ""
|
||||
PATCH_COMMAND ""
|
||||
TEST_COMMAND ""
|
||||
CONFIGURE_COMMAND ./autogen.sh && ./configure
|
||||
INSTALL_COMMAND ""
|
||||
|
||||
PREFIX "third-party/ext_libmobi"
|
||||
SOURCE_DIR "third-party/ext_libmobi/src/libmobi"
|
||||
BINARY_DIR "third-party/ext_libmobi/src/libmobi"
|
||||
|
||||
BUILD_COMMAND ${MAKE_EXE} -j 8 --silent
|
||||
)
|
||||
|
||||
SET(MOBI_LIB_DIR ${CMAKE_CURRENT_BINARY_DIR}/third-party/ext_libmobi/src/libmobi/src/.libs/)
|
||||
SET(MOBI_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/third-party/ext_libmobi/src/libmobi/src/)
|
||||
|
||||
if (SIST_DEBUG)
|
||||
SET(FFMPEG_DEBUG "--enable-debug=3" "--disable-optimizations")
|
||||
else()
|
||||
SET(FFMPEG_DEBUG "")
|
||||
endif()
|
||||
|
||||
include(ExternalProject)
|
||||
find_program(MAKE_EXE NAMES gmake nmake make)
|
||||
|
||||
ExternalProject_Add(
|
||||
ffmpeg
|
||||
GIT_REPOSITORY https://git.ffmpeg.org/ffmpeg.git
|
||||
@@ -171,10 +172,10 @@ SET(WPD_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/third-party/ext_libwpd/src/libwp
|
||||
|
||||
add_dependencies(
|
||||
scan
|
||||
libmobi
|
||||
ffmpeg
|
||||
antiword
|
||||
libwpd
|
||||
mobi
|
||||
)
|
||||
|
||||
target_link_libraries(
|
||||
@@ -192,8 +193,6 @@ target_link_libraries(
|
||||
${MUPDF_LIB}
|
||||
openjp2
|
||||
|
||||
${MOBI_LIB_DIR}/libmobi.a
|
||||
|
||||
${WPD_LIB_DIR}/libwpd-0.9.a
|
||||
${WPD_LIB_DIR}/libwpd-stream-0.9.a
|
||||
|
||||
@@ -230,6 +229,7 @@ target_link_libraries(
|
||||
${GUMBO_LIB}
|
||||
dl
|
||||
antiword
|
||||
mobi
|
||||
unofficial::pcre::pcre unofficial::pcre::pcre16 unofficial::pcre::pcre32 unofficial::pcre::pcrecpp
|
||||
)
|
||||
|
||||
|
||||
2
third-party/libscan/libscan/mobi/scan_mobi.c
vendored
2
third-party/libscan/libscan/mobi/scan_mobi.c
vendored
@@ -1,6 +1,6 @@
|
||||
#include "scan_mobi.h"
|
||||
|
||||
#include <mobi.h>
|
||||
#include "../../third-party/libmobi/src/mobi.h"
|
||||
#include <errno.h>
|
||||
#include "stdlib.h"
|
||||
|
||||
|
||||
1
third-party/libscan/libscan/scan.h
vendored
1
third-party/libscan/libscan/scan.h
vendored
@@ -48,7 +48,6 @@ typedef int scan_code_t;
|
||||
#define CTX_LOG_FATALF(filepath, fmt, ...) ctx->logf(filepath, LEVEL_FATAL, fmt, __VA_ARGS__); exit(-1);
|
||||
#define CTX_LOG_FATAL(filepath, str) ctx->log(filepath, LEVEL_FATAL, str); exit(-1);
|
||||
|
||||
#define MD5_STR_LENGTH 33
|
||||
#define SIST_DOC_ID_LEN MD5_STR_LENGTH
|
||||
#define SIST_INDEX_ID_LEN MD5_STR_LENGTH
|
||||
|
||||
|
||||
2
third-party/libscan/third-party/antiword
vendored
2
third-party/libscan/third-party/antiword
vendored
Submodule third-party/libscan/third-party/antiword updated: b9afdb0561...ddb042143e
1
third-party/libscan/third-party/libmobi
vendored
Submodule
1
third-party/libscan/third-party/libmobi
vendored
Submodule
Submodule third-party/libscan/third-party/libmobi added at 395dbde361
Reference in New Issue
Block a user