mirror of
https://github.com/simon987/sist2.git
synced 2025-04-08 13:06:47 +00:00
Rework document IDs
This commit is contained in:
parent
cdc4c0ad3d
commit
16a4fb4874
@ -97,9 +97,12 @@ else ()
|
||||
target_compile_options(
|
||||
sist2
|
||||
PRIVATE
|
||||
|
||||
-Ofast
|
||||
#-march=native
|
||||
-fno-stack-protector
|
||||
-fomit-frame-pointer
|
||||
#-freciprocal-math
|
||||
)
|
||||
endif ()
|
||||
|
||||
@ -121,11 +124,13 @@ target_link_libraries(
|
||||
CURL::libcurl
|
||||
|
||||
pthread
|
||||
magic
|
||||
#magic
|
||||
|
||||
c
|
||||
|
||||
scan
|
||||
|
||||
/usr/lib/x86_64-linux-gnu/libmagic.so.1
|
||||
)
|
||||
|
||||
add_custom_target(
|
||||
|
@ -9,7 +9,7 @@ RUN strip sist2 || mv sist2_debug sist2
|
||||
|
||||
FROM --platform="linux/amd64" ubuntu:21.10
|
||||
|
||||
RUN apt update && apt install -y curl libasan5 && rm -rf /var/lib/apt/lists/*
|
||||
RUN apt update && apt install -y curl libasan5 libmagic1 && rm -rf /var/lib/apt/lists/*
|
||||
|
||||
RUN mkdir -p /usr/share/tessdata && \
|
||||
cd /usr/share/tessdata/ && \
|
||||
|
@ -103,7 +103,7 @@ Made by simon987 <me@simon987.net>. Released under GPL-3.0
|
||||
* `--thumbnail-count`
|
||||
Maximum number of thumbnails to generate. When set to a value >= 2, thumbnails for video previews
|
||||
will be generated. The actual number of thumbnails generated depends on the length of the video (maximum 1 image
|
||||
every ~5s). Set to 0 to completely disable thumbnails.
|
||||
every ~7s). Set to 0 to completely disable thumbnails.
|
||||
* `--content-size`
|
||||
Number of bytes of text to be extracted from the content of files (plain text, PDFs etc.).
|
||||
Repeated whitespace and special characters do not count toward this limit.
|
||||
|
@ -55,5 +55,37 @@
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"mappings": {
|
||||
"dynamic_templates": [
|
||||
{
|
||||
"keyword_fields": {
|
||||
"match_mapping_type": "string",
|
||||
"match": "kw_*",
|
||||
"mapping": {
|
||||
"type": "keyword"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"integer_fields": {
|
||||
"match_mapping_type": "*",
|
||||
"match": "int_*",
|
||||
"mapping": {
|
||||
"type": "integer"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"meta_fields": {
|
||||
"match_mapping_type": "*",
|
||||
"match": "mt_*",
|
||||
"mapping": {
|
||||
"type": "keyword",
|
||||
"index": false
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
2
sist2-vue/dist/css/index.css
vendored
2
sist2-vue/dist/css/index.css
vendored
File diff suppressed because one or more lines are too long
38
sist2-vue/dist/js/chunk-vendors.js
vendored
38
sist2-vue/dist/js/chunk-vendors.js
vendored
File diff suppressed because one or more lines are too long
2
sist2-vue/dist/js/index.js
vendored
2
sist2-vue/dist/js/index.js
vendored
File diff suppressed because one or more lines are too long
11
sist2-vue/package-lock.json
generated
11
sist2-vue/package-lock.json
generated
@ -12,7 +12,6 @@
|
||||
"axios": "^0.25.0",
|
||||
"bootstrap-vue": "^2.21.2",
|
||||
"core-js": "^3.6.5",
|
||||
"crypto-es": "^1.2.7",
|
||||
"d3": "^5.16.0",
|
||||
"date-fns": "^2.21.3",
|
||||
"dom-to-image": "^2.6.0",
|
||||
@ -5261,11 +5260,6 @@
|
||||
"node": "*"
|
||||
}
|
||||
},
|
||||
"node_modules/crypto-es": {
|
||||
"version": "1.2.7",
|
||||
"resolved": "https://registry.npmjs.org/crypto-es/-/crypto-es-1.2.7.tgz",
|
||||
"integrity": "sha512-UUqiVJ2gUuZFmbFsKmud3uuLcNP2+Opt+5ysmljycFCyhA0+T16XJmo1ev/t5kMChMqWh7IEvURNCqsg+SjZGQ=="
|
||||
},
|
||||
"node_modules/css-color-names": {
|
||||
"version": "0.0.4",
|
||||
"resolved": "https://registry.npmjs.org/css-color-names/-/css-color-names-0.0.4.tgz",
|
||||
@ -19621,11 +19615,6 @@
|
||||
"randomfill": "^1.0.3"
|
||||
}
|
||||
},
|
||||
"crypto-es": {
|
||||
"version": "1.2.7",
|
||||
"resolved": "https://registry.npmjs.org/crypto-es/-/crypto-es-1.2.7.tgz",
|
||||
"integrity": "sha512-UUqiVJ2gUuZFmbFsKmud3uuLcNP2+Opt+5ysmljycFCyhA0+T16XJmo1ev/t5kMChMqWh7IEvURNCqsg+SjZGQ=="
|
||||
},
|
||||
"css-color-names": {
|
||||
"version": "0.0.4",
|
||||
"resolved": "https://registry.npmjs.org/css-color-names/-/css-color-names-0.0.4.tgz",
|
||||
|
@ -11,7 +11,6 @@
|
||||
"axios": "^0.25.0",
|
||||
"bootstrap-vue": "^2.21.2",
|
||||
"core-js": "^3.6.5",
|
||||
"crypto-es": "^1.2.7",
|
||||
"d3": "^5.16.0",
|
||||
"date-fns": "^2.21.3",
|
||||
"dom-to-image": "^2.6.0",
|
||||
|
@ -1,6 +1,5 @@
|
||||
import axios from "axios";
|
||||
import {ext, strUnescape, lum} from "./util";
|
||||
import CryptoES from 'crypto-es';
|
||||
|
||||
export interface EsTag {
|
||||
id: string
|
||||
@ -30,7 +29,6 @@ export interface EsHit {
|
||||
_index: string
|
||||
_id: string
|
||||
_score: number
|
||||
_path_md5: string
|
||||
_type: string
|
||||
_tags: Tag[]
|
||||
_seq: number
|
||||
@ -249,11 +247,6 @@ class Sist2Api {
|
||||
res.hits.hits.forEach((hit: EsHit) => {
|
||||
hit["_source"]["name"] = strUnescape(hit["_source"]["name"]);
|
||||
hit["_source"]["path"] = strUnescape(hit["_source"]["path"]);
|
||||
hit["_path_md5"] = CryptoES.MD5(
|
||||
hit["_source"]["path"] +
|
||||
(hit["_source"]["path"] ? "/" : "") +
|
||||
hit["_source"]["name"] + ext(hit)
|
||||
).toString();
|
||||
|
||||
this.setHitProps(hit);
|
||||
this.setHitTags(hit);
|
||||
@ -380,8 +373,7 @@ class Sist2Api {
|
||||
return axios.post(`${this.baseUrl}tag/` + hit["_source"]["index"], {
|
||||
delete: false,
|
||||
name: tag,
|
||||
doc_id: hit["_id"],
|
||||
path_md5: hit._path_md5
|
||||
doc_id: hit["_id"]
|
||||
});
|
||||
}
|
||||
|
||||
@ -389,8 +381,7 @@ class Sist2Api {
|
||||
return axios.post(`${this.baseUrl}tag/` + hit["_source"]["index"], {
|
||||
delete: true,
|
||||
name: tag,
|
||||
doc_id: hit["_id"],
|
||||
path_md5: hit._path_md5
|
||||
doc_id: hit["_id"]
|
||||
});
|
||||
}
|
||||
|
||||
|
@ -56,6 +56,22 @@ export default Vue.extend({
|
||||
onThumbnailClick() {
|
||||
window.open(`/f/${this.doc._id}`, "_blank");
|
||||
},
|
||||
findByCustomField(field, id) {
|
||||
return {
|
||||
query: {
|
||||
bool: {
|
||||
must: [
|
||||
{
|
||||
match: {
|
||||
[field]: id
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
size: 1
|
||||
}
|
||||
},
|
||||
findById(id) {
|
||||
return {
|
||||
query: {
|
||||
@ -103,6 +119,8 @@ export default Vue.extend({
|
||||
query = this.findById(this.$route.query.byId);
|
||||
} else if (this.$route.query.byName) {
|
||||
query = this.findByName(this.$route.query.byName);
|
||||
} else if (this.$route.query.by && this.$route.query.q) {
|
||||
query = this.findByCustomField(this.$route.query.by, this.$route.query.q)
|
||||
}
|
||||
|
||||
if (query) {
|
||||
|
@ -45,7 +45,7 @@ void elastic_cleanup() {
|
||||
destroy_indexer(Indexer);
|
||||
}
|
||||
|
||||
void print_json(cJSON *document, const char id_str[MD5_STR_LENGTH]) {
|
||||
void print_json(cJSON *document, const char id_str[SIST_DOC_ID_LEN]) {
|
||||
|
||||
cJSON *line = cJSON_CreateObject();
|
||||
|
||||
@ -72,19 +72,19 @@ void delete_document(const char* document_id_str, void* UNUSED(_data)) {
|
||||
bulk_line->type = ES_BULK_LINE_DELETE;
|
||||
bulk_line->next = NULL;
|
||||
|
||||
memcpy(bulk_line->path_md5_str, document_id_str, MD5_STR_LENGTH);
|
||||
strcpy(bulk_line->doc_id, document_id_str);
|
||||
tpool_add_work(IndexCtx.pool, index_json_func, bulk_line);
|
||||
}
|
||||
|
||||
|
||||
void index_json(cJSON *document, const char index_id_str[MD5_STR_LENGTH]) {
|
||||
void index_json(cJSON *document, const char doc_id[SIST_DOC_ID_LEN]) {
|
||||
char *json = cJSON_PrintUnformatted(document);
|
||||
|
||||
size_t json_len = strlen(json);
|
||||
es_bulk_line_t *bulk_line = malloc(sizeof(es_bulk_line_t) + json_len + 2);
|
||||
bulk_line->type = ES_BULK_LINE_INDEX;
|
||||
memcpy(bulk_line->line, json, json_len);
|
||||
memcpy(bulk_line->path_md5_str, index_id_str, MD5_STR_LENGTH);
|
||||
strcpy(bulk_line->doc_id, doc_id);
|
||||
*(bulk_line->line + json_len) = '\n';
|
||||
*(bulk_line->line + json_len + 1) = '\0';
|
||||
bulk_line->next = NULL;
|
||||
@ -93,7 +93,7 @@ void index_json(cJSON *document, const char index_id_str[MD5_STR_LENGTH]) {
|
||||
tpool_add_work(IndexCtx.pool, index_json_func, bulk_line);
|
||||
}
|
||||
|
||||
void execute_update_script(const char *script, int async, const char index_id[MD5_STR_LENGTH]) {
|
||||
void execute_update_script(const char *script, int async, const char index_id[SIST_INDEX_ID_LEN]) {
|
||||
|
||||
if (Indexer == NULL) {
|
||||
Indexer = create_indexer(IndexCtx.es_url, IndexCtx.es_index);
|
||||
@ -167,7 +167,7 @@ void *create_bulk_buffer(int max, int *count, size_t *buf_len) {
|
||||
snprintf(
|
||||
action_str, sizeof(action_str),
|
||||
"{\"index\":{\"_id\":\"%s\",\"_type\":\"_doc\",\"_index\":\"%s\"}}\n",
|
||||
line->path_md5_str, Indexer->es_index
|
||||
line->doc_id, Indexer->es_index
|
||||
);
|
||||
|
||||
size_t action_str_len = strlen(action_str);
|
||||
@ -184,7 +184,7 @@ void *create_bulk_buffer(int max, int *count, size_t *buf_len) {
|
||||
snprintf(
|
||||
action_str, sizeof(action_str),
|
||||
"{\"delete\":{\"_id\":\"%s\",\"_index\":\"%s\"}}\n",
|
||||
line->path_md5_str, Indexer->es_index
|
||||
line->doc_id, Indexer->es_index
|
||||
);
|
||||
|
||||
size_t action_str_len = strlen(action_str);
|
||||
@ -263,7 +263,7 @@ void _elastic_flush(int max) {
|
||||
if (r->status_code == 413) {
|
||||
|
||||
if (max <= 1) {
|
||||
LOG_ERRORF("elastic.c", "Single document too large, giving up: {%s}", Indexer->line_head->path_md5_str)
|
||||
LOG_ERRORF("elastic.c", "Single document too large, giving up: {%s}", Indexer->line_head->doc_id)
|
||||
free_response(r);
|
||||
free(buf);
|
||||
free_queue(1);
|
||||
|
@ -8,7 +8,7 @@
|
||||
|
||||
typedef struct es_bulk_line {
|
||||
struct es_bulk_line *next;
|
||||
char path_md5_str[MD5_STR_LENGTH];
|
||||
char doc_id[SIST_DOC_ID_LEN];
|
||||
int type;
|
||||
char line[0];
|
||||
} es_bulk_line_t;
|
||||
@ -40,9 +40,9 @@ typedef struct es_indexer es_indexer_t;
|
||||
|
||||
void elastic_index_line(es_bulk_line_t *line);
|
||||
|
||||
void print_json(cJSON *document, const char index_id_str[MD5_STR_LENGTH]);
|
||||
void print_json(cJSON *document, const char index_id_str[SIST_INDEX_ID_LEN]);
|
||||
|
||||
void index_json(cJSON *document, const char index_id_str[MD5_STR_LENGTH]);
|
||||
void index_json(cJSON *document, const char doc_id[SIST_INDEX_ID_LEN]);
|
||||
|
||||
void delete_document(const char *document_id_str, void* data);
|
||||
|
||||
@ -59,6 +59,6 @@ char *elastic_get_status();
|
||||
|
||||
es_version_t *elastic_get_version(const char *es_url);
|
||||
|
||||
void execute_update_script(const char *script, int async, const char index_id[MD5_STR_LENGTH]);
|
||||
void execute_update_script(const char *script, int async, const char index_id[SIST_INDEX_ID_LEN]);
|
||||
|
||||
#endif
|
||||
|
2
src/index/static_generated.c
vendored
2
src/index/static_generated.c
vendored
File diff suppressed because one or more lines are too long
@ -124,9 +124,7 @@ char *build_json_string(document_t *doc) {
|
||||
cJSON_AddStringToObject(json, "path", "");
|
||||
}
|
||||
|
||||
char md5_str[MD5_STR_LENGTH];
|
||||
buf2hex(doc->path_md5, MD5_DIGEST_LENGTH, md5_str);
|
||||
cJSON_AddStringToObject(json, "_id", md5_str);
|
||||
cJSON_AddStringToObject(json, "_id", doc->doc_id);
|
||||
|
||||
// Metadata
|
||||
meta_line_t *meta = doc->meta_head;
|
||||
@ -452,32 +450,31 @@ void read_lines(const char *path, const line_processor_t processor) {
|
||||
|
||||
dyn_buffer_destroy(&buf);
|
||||
fclose(file);
|
||||
|
||||
}
|
||||
|
||||
void read_index_ndjson(const char *line, void* _data) {
|
||||
void** data = _data;
|
||||
const char* index_id = data[0];
|
||||
void read_index_ndjson(const char *line, void *_data) {
|
||||
void **data = _data;
|
||||
const char *index_id = data[0];
|
||||
index_func func = data[1];
|
||||
read_index_bin_handle_line(line, index_id, func);
|
||||
}
|
||||
|
||||
void read_index(const char *path, const char index_id[MD5_STR_LENGTH], const char *type, index_func func) {
|
||||
void read_index(const char *path, const char index_id[SIST_INDEX_ID_LEN], const char *type, index_func func) {
|
||||
if (strcmp(type, INDEX_TYPE_NDJSON) == 0) {
|
||||
read_lines(path, (line_processor_t) {
|
||||
.data = (void*[2]){(void*)index_id, func} ,
|
||||
.func = read_index_ndjson,
|
||||
.data = (void *[2]) {(void *) index_id, func},
|
||||
.func = read_index_ndjson,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
static __thread GHashTable *IncrementalReadTable = NULL;
|
||||
|
||||
void json_put_incremental(cJSON *document, UNUSED(const char id_str[MD5_STR_LENGTH])) {
|
||||
void json_put_incremental(cJSON *document, UNUSED(const char doc_id[SIST_DOC_ID_LEN])) {
|
||||
const char *path_md5_str = cJSON_GetObjectItem(document, "_id")->valuestring;
|
||||
const int mtime = cJSON_GetObjectItem(document, "mtime")->valueint;
|
||||
|
||||
incremental_put_str(IncrementalReadTable, path_md5_str, mtime);
|
||||
incremental_put(IncrementalReadTable, path_md5_str, mtime);
|
||||
}
|
||||
|
||||
void incremental_read(GHashTable *table, const char *filepath, index_descriptor_t *desc) {
|
||||
@ -490,13 +487,11 @@ static __thread GHashTable *IncrementalNewTable = NULL;
|
||||
static __thread store_t *IncrementalCopySourceStore = NULL;
|
||||
static __thread store_t *IncrementalCopyDestinationStore = NULL;
|
||||
|
||||
void incremental_copy_handle_doc(cJSON *document, UNUSED(const char id_str[MD5_STR_LENGTH])) {
|
||||
void incremental_copy_handle_doc(cJSON *document, UNUSED(const char id_str[SIST_DOC_ID_LEN])) {
|
||||
|
||||
const char *path_md5_str = cJSON_GetObjectItem(document, "_id")->valuestring;
|
||||
unsigned char path_md5[MD5_DIGEST_LENGTH];
|
||||
hex2buf(path_md5_str, MD5_STR_LENGTH - 1, path_md5);
|
||||
const char *doc_id = cJSON_GetObjectItem(document, "_id")->valuestring;
|
||||
|
||||
if (cJSON_GetObjectItem(document, "parent") != NULL || incremental_get_str(IncrementalCopyTable, path_md5_str)) {
|
||||
if (cJSON_GetObjectItem(document, "parent") != NULL || incremental_get(IncrementalCopyTable, doc_id)) {
|
||||
// Copy index line
|
||||
cJSON_DeleteItemFromObject(document, "index");
|
||||
char *json_str = cJSON_PrintUnformatted(document);
|
||||
@ -510,9 +505,9 @@ void incremental_copy_handle_doc(cJSON *document, UNUSED(const char id_str[MD5_S
|
||||
|
||||
// Copy tn store contents
|
||||
size_t buf_len;
|
||||
char *buf = store_read(IncrementalCopySourceStore, (char *) path_md5, sizeof(path_md5), &buf_len);
|
||||
char *buf = store_read(IncrementalCopySourceStore, (char *) doc_id, sizeof(doc_id), &buf_len);
|
||||
if (buf_len != 0) {
|
||||
store_write(IncrementalCopyDestinationStore, (char *) path_md5, sizeof(path_md5), buf, buf_len);
|
||||
store_write(IncrementalCopyDestinationStore, (char *) doc_id, sizeof(doc_id), buf, buf_len);
|
||||
free(buf);
|
||||
}
|
||||
}
|
||||
@ -536,24 +531,24 @@ void incremental_copy(store_t *store, store_t *dst_store, const char *filepath,
|
||||
read_index(filepath, "", INDEX_TYPE_NDJSON, incremental_copy_handle_doc);
|
||||
}
|
||||
|
||||
void incremental_delete_handle_doc(cJSON *document, UNUSED(const char id_str[MD5_STR_LENGTH])) {
|
||||
void incremental_delete_handle_doc(cJSON *document, UNUSED(const char id_str[SIST_DOC_ID_LEN])) {
|
||||
|
||||
char path_md5_n[MD5_STR_LENGTH + 1];
|
||||
path_md5_n[MD5_STR_LENGTH] = '\0';
|
||||
path_md5_n[MD5_STR_LENGTH - 1] = '\n';
|
||||
const char *path_md5_str = cJSON_GetObjectItem(document, "_id")->valuestring;
|
||||
char doc_id_n[SIST_DOC_ID_LEN + 1];
|
||||
doc_id_n[SIST_DOC_ID_LEN] = '\0';
|
||||
doc_id_n[SIST_DOC_ID_LEN - 1] = '\n';
|
||||
const char *doc_id = cJSON_GetObjectItem(document, "_id")->valuestring;
|
||||
|
||||
// do not delete archive virtual entries
|
||||
if (cJSON_GetObjectItem(document, "parent") == NULL
|
||||
&& !incremental_get_str(IncrementalCopyTable, path_md5_str)
|
||||
&& !incremental_get_str(IncrementalNewTable, path_md5_str)
|
||||
&& !incremental_get(IncrementalCopyTable, doc_id)
|
||||
&& !incremental_get(IncrementalNewTable, doc_id)
|
||||
) {
|
||||
memcpy(path_md5_n, path_md5_str, MD5_STR_LENGTH - 1);
|
||||
zstd_write_string(path_md5_n, MD5_STR_LENGTH);
|
||||
memcpy(doc_id_n, doc_id, SIST_DOC_ID_LEN - 1);
|
||||
zstd_write_string(doc_id, sizeof(doc_id_n));
|
||||
}
|
||||
}
|
||||
|
||||
void incremental_delete(const char *del_filepath, const char* index_filepath,
|
||||
void incremental_delete(const char *del_filepath, const char *index_filepath,
|
||||
GHashTable *copy_table, GHashTable *new_table) {
|
||||
|
||||
if (WriterCtx.out_file == NULL) {
|
||||
|
@ -12,7 +12,7 @@ typedef struct line_processor {
|
||||
void (*func)(const char*, void*);
|
||||
} line_processor_t;
|
||||
|
||||
typedef void(*index_func)(cJSON *, const char[MD5_STR_LENGTH]);
|
||||
typedef void(*index_func)(cJSON *, const char[SIST_DOC_ID_LEN]);
|
||||
|
||||
void incremental_copy(store_t *store, store_t *dst_store, const char *filepath,
|
||||
const char *dst_filepath, GHashTable *copy_table);
|
||||
@ -24,7 +24,7 @@ void write_document(document_t *doc);
|
||||
|
||||
void read_lines(const char *path, const line_processor_t processor);
|
||||
|
||||
void read_index(const char *path, const char[MD5_STR_LENGTH], const char *type, index_func);
|
||||
void read_index(const char *path, const char index_id[SIST_INDEX_ID_LEN], const char *type, index_func);
|
||||
|
||||
void incremental_read(GHashTable *table, const char *filepath, index_descriptor_t *desc);
|
||||
|
||||
|
@ -52,22 +52,7 @@ void store_flush(store_t *store) {
|
||||
void store_write(store_t *store, char *key, size_t key_len, char *buf, size_t buf_len) {
|
||||
|
||||
if (LogCtx.very_verbose) {
|
||||
if (key_len == MD5_DIGEST_LENGTH) {
|
||||
char path_md5_str[MD5_STR_LENGTH];
|
||||
buf2hex((unsigned char *) key, MD5_DIGEST_LENGTH, path_md5_str);
|
||||
|
||||
LOG_DEBUGF("store.c", "Store write {%s} %lu bytes", path_md5_str, buf_len)
|
||||
|
||||
} else if (key_len == MD5_DIGEST_LENGTH + sizeof(int)) {
|
||||
char path_md5_str[MD5_STR_LENGTH];
|
||||
buf2hex((unsigned char *) key, MD5_DIGEST_LENGTH, path_md5_str);
|
||||
|
||||
LOG_DEBUGF("store.c", "Store write {%s/%d} %lu bytes",
|
||||
path_md5_str, *(int *) (key + MD5_DIGEST_LENGTH), buf_len);
|
||||
|
||||
} else {
|
||||
LOG_DEBUGF("store.c", "Store write {%s} %lu bytes", key, buf_len)
|
||||
}
|
||||
LOG_DEBUGF("store.c", "Store write %s@{%s} %lu bytes", store->path, key, buf_len)
|
||||
}
|
||||
|
||||
#if (SIST_FAKE_STORE != 1)
|
||||
|
@ -22,7 +22,7 @@ parse_job_t *create_fs_parse_job(const char *filepath, const struct stat *info,
|
||||
|
||||
job->vfile.info = *info;
|
||||
|
||||
memset(job->parent, 0, MD5_DIGEST_LENGTH);
|
||||
job->parent[0] = '\0';
|
||||
|
||||
job->vfile.filepath = job->filepath;
|
||||
job->vfile.read = fs_read;
|
||||
|
@ -118,7 +118,7 @@ void init_dir(const char *dirpath, scan_args_t* args) {
|
||||
index_descriptor_t original_desc = read_index_descriptor(descriptor_path);
|
||||
memcpy(ScanCtx.index.desc.id, original_desc.id, sizeof(original_desc.id));
|
||||
} else {
|
||||
// genreate new index id based on timestamp
|
||||
// generate new index id based on timestamp
|
||||
unsigned char index_md5[MD5_DIGEST_LENGTH];
|
||||
MD5((unsigned char *) &ScanCtx.index.desc.timestamp, sizeof(ScanCtx.index.desc.timestamp), index_md5);
|
||||
buf2hex(index_md5, MD5_DIGEST_LENGTH, ScanCtx.index.desc.id);
|
||||
|
@ -69,7 +69,7 @@ void parse(void *arg) {
|
||||
doc->base = (short) job->base;
|
||||
|
||||
char *rel_path = doc->filepath + ScanCtx.index.desc.root_len;
|
||||
MD5((unsigned char *) rel_path, strlen(rel_path), doc->path_md5);
|
||||
generate_doc_id(rel_path, doc->doc_id);
|
||||
|
||||
doc->meta_head = NULL;
|
||||
doc->meta_tail = NULL;
|
||||
@ -77,10 +77,10 @@ void parse(void *arg) {
|
||||
doc->size = job->vfile.info.st_size;
|
||||
doc->mtime = (int) job->vfile.info.st_mtim.tv_sec;
|
||||
|
||||
int inc_ts = incremental_get(ScanCtx.original_table, doc->path_md5);
|
||||
int inc_ts = incremental_get(ScanCtx.original_table, doc->doc_id);
|
||||
if (inc_ts != 0 && inc_ts == job->vfile.info.st_mtim.tv_sec) {
|
||||
pthread_mutex_lock(&ScanCtx.copy_table_mu);
|
||||
incremental_mark_file(ScanCtx.copy_table, doc->path_md5);
|
||||
incremental_mark_file(ScanCtx.copy_table, doc->doc_id);
|
||||
pthread_mutex_unlock(&ScanCtx.copy_table_mu);
|
||||
|
||||
pthread_mutex_lock(&ScanCtx.dbg_file_counts_mu);
|
||||
@ -96,16 +96,14 @@ void parse(void *arg) {
|
||||
|
||||
if (ScanCtx.new_table != NULL) {
|
||||
pthread_mutex_lock(&ScanCtx.copy_table_mu);
|
||||
incremental_mark_file(ScanCtx.new_table, doc->path_md5);
|
||||
incremental_mark_file(ScanCtx.new_table, doc->doc_id);
|
||||
pthread_mutex_unlock(&ScanCtx.copy_table_mu);
|
||||
}
|
||||
|
||||
char *buf[MAGIC_BUF_SIZE];
|
||||
|
||||
if (LogCtx.very_verbose) {
|
||||
char path_md5_str[MD5_STR_LENGTH];
|
||||
buf2hex(doc->path_md5, MD5_DIGEST_LENGTH, path_md5_str);
|
||||
LOG_DEBUGF(job->filepath, "Starting parse job {%s}", path_md5_str)
|
||||
LOG_DEBUGF(job->filepath, "Starting parse job {%s}", doc->doc_id)
|
||||
}
|
||||
|
||||
if (job->vfile.info.st_size == 0) {
|
||||
@ -218,10 +216,10 @@ void parse(void *arg) {
|
||||
abort:
|
||||
|
||||
//Parent meta
|
||||
if (!md5_digest_is_null(job->parent)) {
|
||||
meta_line_t *meta_parent = malloc(sizeof(meta_line_t) + MD5_STR_LENGTH);
|
||||
if (job->parent[0] != '\0') {
|
||||
meta_line_t *meta_parent = malloc(sizeof(meta_line_t) + SIST_INDEX_ID_LEN);
|
||||
meta_parent->key = MetaParent;
|
||||
buf2hex(job->parent, MD5_DIGEST_LENGTH, meta_parent->str_val);
|
||||
strcpy(meta_parent->str_val, job->parent);
|
||||
APPEND_META((doc), meta_parent)
|
||||
|
||||
doc->has_parent = TRUE;
|
||||
|
@ -23,16 +23,19 @@ void parse_sidecar(vfile_t *vfile, document_t *doc) {
|
||||
}
|
||||
char *json_str = cJSON_PrintUnformatted(json);
|
||||
|
||||
unsigned char path_md5[MD5_DIGEST_LENGTH];
|
||||
MD5((unsigned char *) vfile->filepath + ScanCtx.index.desc.root_len, doc->ext - 1 - ScanCtx.index.desc.root_len,
|
||||
path_md5);
|
||||
char assoc_doc_id[SIST_DOC_ID_LEN];
|
||||
|
||||
char path_md5_str[MD5_STR_LENGTH];
|
||||
buf2hex(path_md5, MD5_DIGEST_LENGTH, path_md5_str);
|
||||
char rel_path[PATH_MAX];
|
||||
size_t rel_path_len = doc->ext - 1 - ScanCtx.index.desc.root_len;
|
||||
memcpy(rel_path, vfile->filepath + ScanCtx.index.desc.root_len, rel_path_len);
|
||||
*(rel_path + rel_path_len) = '\0';
|
||||
|
||||
store_write(ScanCtx.index.meta_store, path_md5_str, MD5_STR_LENGTH, json_str, strlen(json_str) + 1);
|
||||
generate_doc_id(rel_path, assoc_doc_id);
|
||||
|
||||
store_write(ScanCtx.index.meta_store, assoc_doc_id, sizeof(assoc_doc_id), json_str,
|
||||
strlen(json_str) + 1);
|
||||
|
||||
cJSON_Delete(json);
|
||||
free(json_str);
|
||||
free(buf);
|
||||
}
|
||||
}
|
@ -53,7 +53,7 @@
|
||||
#include <ctype.h>
|
||||
#include "git_hash.h"
|
||||
|
||||
#define VERSION "2.11.7"
|
||||
#define VERSION "2.12.0"
|
||||
static const char *const Version = VERSION;
|
||||
|
||||
#ifndef SIST_PLATFORM
|
||||
|
@ -20,7 +20,7 @@ typedef struct {
|
||||
long count;
|
||||
} agg_t;
|
||||
|
||||
void fill_tables(cJSON *document, UNUSED(const char index_id[MD5_STR_LENGTH])) {
|
||||
void fill_tables(cJSON *document, UNUSED(const char index_id[SIST_INDEX_ID_LEN])) {
|
||||
|
||||
if (cJSON_GetObjectItem(document, "parent") != NULL) {
|
||||
return;
|
||||
|
@ -4,7 +4,7 @@
|
||||
#define INDEX_TYPE_NDJSON "ndjson"
|
||||
|
||||
typedef struct index_descriptor {
|
||||
char id[MD5_STR_LENGTH];
|
||||
char id[SIST_INDEX_ID_LEN];
|
||||
char version[64];
|
||||
long timestamp;
|
||||
char root[PATH_MAX];
|
||||
|
44
src/util.h
44
src/util.h
@ -10,8 +10,6 @@
|
||||
#include "third-party/utf8.h/utf8.h"
|
||||
#include "libscan/scan.h"
|
||||
|
||||
#define MD5_STR_LENGTH 33
|
||||
|
||||
|
||||
char *abspath(const char *path);
|
||||
|
||||
@ -94,40 +92,24 @@ static void buf2hex(const unsigned char *buf, size_t buflen, char *hex_string) {
|
||||
|
||||
|
||||
__always_inline
|
||||
static int md5_digest_is_null(const unsigned char digest[MD5_DIGEST_LENGTH]) {
|
||||
return (*(int64_t *) digest) == 0 && (*((int64_t *) digest + 1)) == 0;
|
||||
static void generate_doc_id(const char *rel_path, char *doc_id) {
|
||||
unsigned char md[MD5_DIGEST_LENGTH];
|
||||
|
||||
MD5((unsigned char *) rel_path, strlen(rel_path), md);
|
||||
buf2hex(md, sizeof(md), doc_id);
|
||||
}
|
||||
|
||||
|
||||
__always_inline
|
||||
static void incremental_put(GHashTable *table, const unsigned char path_md5[MD5_DIGEST_LENGTH], int mtime) {
|
||||
char *ptr = malloc(MD5_STR_LENGTH);
|
||||
buf2hex(path_md5, MD5_DIGEST_LENGTH, ptr);
|
||||
static void incremental_put(GHashTable *table, const char doc_id[SIST_DOC_ID_LEN], int mtime) {
|
||||
char *ptr = malloc(SIST_DOC_ID_LEN);
|
||||
strcpy(ptr, doc_id);
|
||||
g_hash_table_insert(table, ptr, GINT_TO_POINTER(mtime));
|
||||
}
|
||||
|
||||
__always_inline
|
||||
static void incremental_put_str(GHashTable *table, const char *path_md5, int mtime) {
|
||||
char *ptr = malloc(MD5_STR_LENGTH);
|
||||
strcpy(ptr, path_md5);
|
||||
g_hash_table_insert(table, ptr, GINT_TO_POINTER(mtime));
|
||||
}
|
||||
|
||||
__always_inline
|
||||
static int incremental_get(GHashTable *table, const unsigned char path_md5[MD5_DIGEST_LENGTH]) {
|
||||
static int incremental_get(GHashTable *table, const char doc_id[SIST_DOC_ID_LEN]) {
|
||||
if (table != NULL) {
|
||||
char md5_str[MD5_STR_LENGTH];
|
||||
buf2hex(path_md5, MD5_DIGEST_LENGTH, md5_str);
|
||||
return GPOINTER_TO_INT(g_hash_table_lookup(table, md5_str));
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
__always_inline
|
||||
static int incremental_get_str(GHashTable *table, const char *path_md5) {
|
||||
if (table != NULL) {
|
||||
return GPOINTER_TO_INT(g_hash_table_lookup(table, path_md5));
|
||||
return GPOINTER_TO_INT(g_hash_table_lookup(table, doc_id));
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
@ -138,9 +120,9 @@ static int incremental_get_str(GHashTable *table, const char *path_md5) {
|
||||
* !!Not thread safe.
|
||||
*/
|
||||
__always_inline
|
||||
static int incremental_mark_file(GHashTable *table, const unsigned char path_md5[MD5_DIGEST_LENGTH]) {
|
||||
char *ptr = malloc(MD5_STR_LENGTH);
|
||||
buf2hex(path_md5, MD5_DIGEST_LENGTH, ptr);
|
||||
static int incremental_mark_file(GHashTable *table, const char doc_id[SIST_DOC_ID_LEN]) {
|
||||
char *ptr = malloc(SIST_DOC_ID_LEN);
|
||||
strcpy(ptr, doc_id);
|
||||
return g_hash_table_insert(table, ptr, GINT_TO_POINTER(1));
|
||||
}
|
||||
|
||||
|
@ -36,7 +36,7 @@ static void send_response_line(struct mg_connection *nc, int status_code, size_t
|
||||
|
||||
index_t *get_index_by_id(const char *index_id) {
|
||||
for (int i = WebCtx.index_count; i >= 0; i--) {
|
||||
if (strncmp(index_id, WebCtx.indices[i].desc.id, MD5_STR_LENGTH) == 0) {
|
||||
if (strncmp(index_id, WebCtx.indices[i].desc.id, SIST_INDEX_ID_LEN) == 0) {
|
||||
return &WebCtx.indices[i];
|
||||
}
|
||||
}
|
||||
@ -70,23 +70,23 @@ void search_index(struct mg_connection *nc, struct mg_http_message *hm) {
|
||||
|
||||
void stats_files(struct mg_connection *nc, struct mg_http_message *hm) {
|
||||
|
||||
if (hm->uri.len != MD5_STR_LENGTH + 4) {
|
||||
if (hm->uri.len != SIST_INDEX_ID_LEN + 4) {
|
||||
HTTP_REPLY_NOT_FOUND
|
||||
return;
|
||||
}
|
||||
|
||||
char arg_md5[MD5_STR_LENGTH];
|
||||
memcpy(arg_md5, hm->uri.ptr + 3, MD5_STR_LENGTH);
|
||||
*(arg_md5 + MD5_STR_LENGTH - 1) = '\0';
|
||||
char arg_index_id[SIST_INDEX_ID_LEN];
|
||||
memcpy(arg_index_id, hm->uri.ptr + 3, SIST_INDEX_ID_LEN);
|
||||
*(arg_index_id + SIST_INDEX_ID_LEN - 1) = '\0';
|
||||
|
||||
index_t *index = get_index_by_id(arg_md5);
|
||||
index_t *index = get_index_by_id(arg_index_id);
|
||||
if (index == NULL) {
|
||||
HTTP_REPLY_NOT_FOUND
|
||||
return;
|
||||
}
|
||||
|
||||
const char *file;
|
||||
switch (atoi(hm->uri.ptr + 3 + MD5_STR_LENGTH)) {
|
||||
switch (atoi(hm->uri.ptr + 3 + SIST_INDEX_ID_LEN)) {
|
||||
case 1:
|
||||
file = "treemap.csv";
|
||||
break;
|
||||
@ -150,28 +150,25 @@ void style_vendor(struct mg_connection *nc, struct mg_http_message *hm) {
|
||||
|
||||
void thumbnail(struct mg_connection *nc, struct mg_http_message *hm) {
|
||||
|
||||
int parse_tn_num = FALSE;
|
||||
int has_thumbnail_index = FALSE;
|
||||
|
||||
if (hm->uri.len != 68) {
|
||||
if (hm->uri.len != SIST_INDEX_ID_LEN + SIST_DOC_ID_LEN + 2) {
|
||||
|
||||
if (hm->uri.len != 68 + 4) {
|
||||
if (hm->uri.len != SIST_INDEX_ID_LEN + SIST_DOC_ID_LEN + 2 + 4) {
|
||||
LOG_DEBUGF("serve.c", "Invalid thumbnail path: %.*s", (int) hm->uri.len, hm->uri.ptr)
|
||||
HTTP_REPLY_NOT_FOUND
|
||||
return;
|
||||
}
|
||||
parse_tn_num = TRUE;
|
||||
has_thumbnail_index = TRUE;
|
||||
}
|
||||
|
||||
char arg_file_md5[MD5_STR_LENGTH];
|
||||
char arg_index[MD5_STR_LENGTH];
|
||||
char arg_doc_id[SIST_DOC_ID_LEN];
|
||||
char arg_index[SIST_INDEX_ID_LEN];
|
||||
|
||||
memcpy(arg_index, hm->uri.ptr + 3, MD5_STR_LENGTH);
|
||||
*(arg_index + MD5_STR_LENGTH - 1) = '\0';
|
||||
memcpy(arg_file_md5, hm->uri.ptr + 3 + MD5_STR_LENGTH, MD5_STR_LENGTH);
|
||||
*(arg_file_md5 + MD5_STR_LENGTH - 1) = '\0';
|
||||
|
||||
unsigned char md5_buf[MD5_DIGEST_LENGTH];
|
||||
hex2buf(arg_file_md5, MD5_STR_LENGTH - 1, md5_buf);
|
||||
memcpy(arg_index, hm->uri.ptr + 3, SIST_INDEX_ID_LEN);
|
||||
*(arg_index + SIST_INDEX_ID_LEN - 1) = '\0';
|
||||
memcpy(arg_doc_id, hm->uri.ptr + 3 + SIST_INDEX_ID_LEN, SIST_DOC_ID_LEN);
|
||||
*(arg_doc_id + SIST_DOC_ID_LEN - 1) = '\0';
|
||||
|
||||
store_t *store = get_store(arg_index);
|
||||
if (store == NULL) {
|
||||
@ -183,16 +180,17 @@ void thumbnail(struct mg_connection *nc, struct mg_http_message *hm) {
|
||||
char *data;
|
||||
size_t data_len = 0;
|
||||
|
||||
if (parse_tn_num) {
|
||||
int tn_num = atoi(hm->uri.ptr + 68);
|
||||
if (has_thumbnail_index) {
|
||||
const char *tn_index = hm->uri.ptr + SIST_INDEX_ID_LEN + SIST_DOC_ID_LEN + 2;
|
||||
|
||||
char tn_key[sizeof(md5_buf) + sizeof(int)];
|
||||
memcpy(tn_key, md5_buf, sizeof(md5_buf));
|
||||
memcpy(tn_key + sizeof(md5_buf), &tn_num, sizeof(tn_num));
|
||||
char tn_key[sizeof(arg_doc_id) + sizeof(char) * 4];
|
||||
|
||||
memcpy(tn_key, arg_doc_id, sizeof(arg_doc_id));
|
||||
memcpy(tn_key + sizeof(arg_doc_id) - 1, tn_index, sizeof(char) * 4);
|
||||
|
||||
data = store_read(store, (char *) tn_key, sizeof(tn_key), &data_len);
|
||||
} else {
|
||||
data = store_read(store, (char *) md5_buf, sizeof(md5_buf), &data_len);
|
||||
data = store_read(store, (char *) arg_doc_id, sizeof(arg_doc_id), &data_len);
|
||||
}
|
||||
|
||||
if (data_len != 0) {
|
||||
@ -357,17 +355,17 @@ void index_info(struct mg_connection *nc) {
|
||||
|
||||
void document_info(struct mg_connection *nc, struct mg_http_message *hm) {
|
||||
|
||||
if (hm->uri.len != MD5_STR_LENGTH + 2) {
|
||||
if (hm->uri.len != SIST_DOC_ID_LEN + 2) {
|
||||
LOG_DEBUGF("serve.c", "Invalid document_info path: %.*s", (int) hm->uri.len, hm->uri.ptr)
|
||||
HTTP_REPLY_NOT_FOUND
|
||||
return;
|
||||
}
|
||||
|
||||
char arg_md5[MD5_STR_LENGTH];
|
||||
memcpy(arg_md5, hm->uri.ptr + 3, MD5_STR_LENGTH);
|
||||
*(arg_md5 + MD5_STR_LENGTH - 1) = '\0';
|
||||
char arg_doc_id[SIST_DOC_ID_LEN];
|
||||
memcpy(arg_doc_id, hm->uri.ptr + 3, SIST_DOC_ID_LEN);
|
||||
*(arg_doc_id + SIST_DOC_ID_LEN - 1) = '\0';
|
||||
|
||||
cJSON *doc = elastic_get_document(arg_md5);
|
||||
cJSON *doc = elastic_get_document(arg_doc_id);
|
||||
cJSON *source = cJSON_GetObjectItem(doc, "_source");
|
||||
|
||||
cJSON *index_id = cJSON_GetObjectItem(source, "index");
|
||||
@ -393,17 +391,17 @@ void document_info(struct mg_connection *nc, struct mg_http_message *hm) {
|
||||
|
||||
void file(struct mg_connection *nc, struct mg_http_message *hm) {
|
||||
|
||||
if (hm->uri.len != MD5_STR_LENGTH + 2) {
|
||||
if (hm->uri.len != SIST_DOC_ID_LEN + 2) {
|
||||
LOG_DEBUGF("serve.c", "Invalid file path: %.*s", (int) hm->uri.len, hm->uri.ptr)
|
||||
HTTP_REPLY_NOT_FOUND
|
||||
return;
|
||||
}
|
||||
|
||||
char arg_md5[MD5_STR_LENGTH];
|
||||
memcpy(arg_md5, hm->uri.ptr + 3, MD5_STR_LENGTH);
|
||||
*(arg_md5 + MD5_STR_LENGTH - 1) = '\0';
|
||||
char arg_doc_id[SIST_DOC_ID_LEN];
|
||||
memcpy(arg_doc_id, hm->uri.ptr + 3, SIST_DOC_ID_LEN);
|
||||
*(arg_doc_id + SIST_DOC_ID_LEN - 1) = '\0';
|
||||
|
||||
const char *next = arg_md5;
|
||||
const char *next = arg_doc_id;
|
||||
cJSON *doc = NULL;
|
||||
cJSON *index_id = NULL;
|
||||
cJSON *source = NULL;
|
||||
@ -454,7 +452,6 @@ void status(struct mg_connection *nc) {
|
||||
typedef struct {
|
||||
char *name;
|
||||
int delete;
|
||||
char *path_md5_str;
|
||||
char *doc_id;
|
||||
} tag_req_t;
|
||||
|
||||
@ -474,12 +471,6 @@ tag_req_t *parse_tag_request(cJSON *json) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
cJSON *arg_path_md5 = cJSON_GetObjectItem(json, "path_md5");
|
||||
if (arg_path_md5 == NULL || !cJSON_IsString(arg_path_md5) ||
|
||||
strlen(arg_path_md5->valuestring) != MD5_STR_LENGTH - 1) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
cJSON *arg_doc_id = cJSON_GetObjectItem(json, "doc_id");
|
||||
if (arg_doc_id == NULL || !cJSON_IsString(arg_doc_id)) {
|
||||
return NULL;
|
||||
@ -488,22 +479,21 @@ tag_req_t *parse_tag_request(cJSON *json) {
|
||||
tag_req_t *req = malloc(sizeof(tag_req_t));
|
||||
req->delete = arg_delete->valueint;
|
||||
req->name = arg_name->valuestring;
|
||||
req->path_md5_str = arg_path_md5->valuestring;
|
||||
req->doc_id = arg_doc_id->valuestring;
|
||||
|
||||
return req;
|
||||
}
|
||||
|
||||
void tag(struct mg_connection *nc, struct mg_http_message *hm) {
|
||||
if (hm->uri.len != MD5_STR_LENGTH + 4) {
|
||||
if (hm->uri.len != SIST_INDEX_ID_LEN + 4) {
|
||||
LOG_DEBUGF("serve.c", "Invalid tag path: %.*s", (int) hm->uri.len, hm->uri.ptr)
|
||||
HTTP_REPLY_NOT_FOUND
|
||||
return;
|
||||
}
|
||||
|
||||
char arg_index[MD5_STR_LENGTH];
|
||||
memcpy(arg_index, hm->uri.ptr + 5, MD5_STR_LENGTH);
|
||||
*(arg_index + MD5_STR_LENGTH - 1) = '\0';
|
||||
char arg_index[SIST_INDEX_ID_LEN];
|
||||
memcpy(arg_index, hm->uri.ptr + 5, SIST_INDEX_ID_LEN);
|
||||
*(arg_index + SIST_INDEX_ID_LEN - 1) = '\0';
|
||||
|
||||
if (hm->body.len < 2 || hm->method.len != 4 || memcmp(&hm->method, "POST", 4) == 0) {
|
||||
LOG_DEBUG("serve.c", "Invalid tag request")
|
||||
@ -535,7 +525,7 @@ void tag(struct mg_connection *nc, struct mg_http_message *hm) {
|
||||
cJSON *arr = NULL;
|
||||
|
||||
size_t data_len = 0;
|
||||
const char *data = store_read(store, arg_req->path_md5_str, MD5_STR_LENGTH, &data_len);
|
||||
const char *data = store_read(store, arg_req->doc_id, SIST_DOC_ID_LEN, &data_len);
|
||||
if (data_len == 0) {
|
||||
arr = cJSON_CreateArray();
|
||||
} else {
|
||||
@ -595,7 +585,7 @@ void tag(struct mg_connection *nc, struct mg_http_message *hm) {
|
||||
}
|
||||
|
||||
char *json_str = cJSON_PrintUnformatted(arr);
|
||||
store_write(store, arg_req->path_md5_str, MD5_STR_LENGTH, json_str, strlen(json_str) + 1);
|
||||
store_write(store, arg_req->doc_id, SIST_DOC_ID_LEN, json_str, strlen(json_str) + 1);
|
||||
store_flush(store);
|
||||
|
||||
free(arg_req);
|
||||
|
2
src/web/static_generated.c
vendored
2
src/web/static_generated.c
vendored
File diff suppressed because one or more lines are too long
52
third-party/libscan/CMakeLists.txt
vendored
52
third-party/libscan/CMakeLists.txt
vendored
@ -6,26 +6,6 @@ set(CMAKE_C_STANDARD 11)
|
||||
option(BUILD_TESTS "Build tests" on)
|
||||
|
||||
add_subdirectory(third-party/antiword)
|
||||
if (SIST_DEBUG)
|
||||
add_compile_definitions(
|
||||
antiword
|
||||
DEBUG
|
||||
)
|
||||
target_compile_options(
|
||||
antiword
|
||||
PRIVATE
|
||||
-g
|
||||
-fstack-protector
|
||||
-fno-omit-frame-pointer
|
||||
-fsanitize=address
|
||||
-fno-inline
|
||||
)
|
||||
else()
|
||||
add_compile_definitions(
|
||||
antiword
|
||||
NDEBUG
|
||||
)
|
||||
endif()
|
||||
|
||||
add_library(
|
||||
scan
|
||||
@ -48,6 +28,38 @@ add_library(
|
||||
libscan/mobi/scan_mobi.c libscan/mobi/scan_mobi.h libscan/raw/raw.c libscan/raw/raw.h)
|
||||
set_target_properties(scan PROPERTIES LINKER_LANGUAGE C)
|
||||
|
||||
if (SIST_DEBUG)
|
||||
add_compile_definitions(
|
||||
antiword
|
||||
DEBUG
|
||||
)
|
||||
target_compile_options(
|
||||
antiword
|
||||
PRIVATE
|
||||
-g
|
||||
-fstack-protector
|
||||
-fno-omit-frame-pointer
|
||||
-fsanitize=address
|
||||
-fno-inline
|
||||
)
|
||||
else()
|
||||
add_compile_definitions(
|
||||
antiword
|
||||
NDEBUG
|
||||
)
|
||||
|
||||
target_compile_options(
|
||||
scan
|
||||
PRIVATE
|
||||
|
||||
-Ofast
|
||||
#-march=native
|
||||
-fno-stack-protector
|
||||
-fomit-frame-pointer
|
||||
#-freciprocal-math
|
||||
)
|
||||
endif()
|
||||
|
||||
set(CMAKE_FIND_LIBRARY_SUFFIXES .a .lib .so)
|
||||
|
||||
find_package(cJSON CONFIG REQUIRED)
|
||||
|
2
third-party/libscan/libscan/arc/arc.c
vendored
2
third-party/libscan/libscan/arc/arc.c
vendored
@ -202,7 +202,7 @@ scan_code_t parse_archive(scan_arc_ctx_t *ctx, vfile_t *f, document_t *doc, pcre
|
||||
sub_job->vfile.logf = ctx->logf;
|
||||
sub_job->vfile.has_checksum = FALSE;
|
||||
sub_job->vfile.calculate_checksum = f->calculate_checksum;
|
||||
memcpy(sub_job->parent, doc->path_md5, MD5_DIGEST_LENGTH);
|
||||
strcpy(sub_job->parent, doc->doc_id);
|
||||
|
||||
while (archive_read_next_header(a, &entry) == ARCHIVE_OK) {
|
||||
sub_job->vfile.info = *archive_entry_stat(entry);
|
||||
|
2
third-party/libscan/libscan/ebook/ebook.c
vendored
2
third-party/libscan/libscan/ebook/ebook.c
vendored
@ -156,7 +156,7 @@ int render_cover(scan_ebook_ctx_t *ctx, fz_context *fzctx, document_t *doc, fz_d
|
||||
avcodec_receive_packet(jpeg_encoder, &jpeg_packet);
|
||||
|
||||
APPEND_LONG_META(doc, MetaThumbnail, 1)
|
||||
ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) jpeg_packet.data, jpeg_packet.size);
|
||||
ctx->store(doc->doc_id, sizeof(doc->doc_id), (char *) jpeg_packet.data, jpeg_packet.size);
|
||||
|
||||
free(samples);
|
||||
av_packet_unref(&jpeg_packet);
|
||||
|
2
third-party/libscan/libscan/font/font.c
vendored
2
third-party/libscan/libscan/font/font.c
vendored
@ -232,7 +232,7 @@ void parse_font(scan_font_ctx_t *ctx, vfile_t *f, document_t *doc) {
|
||||
bmp_format(&bmp_data, dimensions, bitmap);
|
||||
|
||||
APPEND_LONG_META(doc, MetaThumbnail, 1)
|
||||
ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) bmp_data.buf, bmp_data.cur);
|
||||
ctx->store(doc->doc_id, sizeof(doc->doc_id), (char *) bmp_data.buf, bmp_data.cur);
|
||||
|
||||
dyn_buffer_destroy(&bmp_data);
|
||||
free(bitmap);
|
||||
|
17
third-party/libscan/libscan/media/media.c
vendored
17
third-party/libscan/libscan/media/media.c
vendored
@ -459,7 +459,7 @@ int decode_frame_and_save_thumbnail(scan_media_ctx_t *ctx, AVFormatContext *pFor
|
||||
if (scaled_frame == STORE_AS_IS) {
|
||||
return_value = SAVE_THUMBNAIL_OK;
|
||||
|
||||
ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) frame_and_packet->packet->data,
|
||||
ctx->store((char *) doc->doc_id, sizeof(doc->doc_id), (char *) frame_and_packet->packet->data,
|
||||
frame_and_packet->packet->size);
|
||||
} else {
|
||||
// Encode frame to jpeg
|
||||
@ -473,7 +473,7 @@ int decode_frame_and_save_thumbnail(scan_media_ctx_t *ctx, AVFormatContext *pFor
|
||||
|
||||
// Save thumbnail
|
||||
if (thumbnail_index == 0) {
|
||||
ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) jpeg_packet.data, jpeg_packet.size);
|
||||
ctx->store((char *) doc->doc_id, sizeof(doc->doc_id), (char *) jpeg_packet.data, jpeg_packet.size);
|
||||
return_value = SAVE_THUMBNAIL_OK;
|
||||
|
||||
} else if (thumbnail_index > 1) {
|
||||
@ -482,9 +482,8 @@ int decode_frame_and_save_thumbnail(scan_media_ctx_t *ctx, AVFormatContext *pFor
|
||||
// I figure out a better fix.
|
||||
thumbnail_index -= 1;
|
||||
|
||||
char tn_key[sizeof(doc->path_md5) + sizeof(int)];
|
||||
memcpy(tn_key, doc->path_md5, sizeof(doc->path_md5));
|
||||
memcpy(tn_key + sizeof(doc->path_md5), &thumbnail_index, sizeof(thumbnail_index));
|
||||
char tn_key[sizeof(doc->doc_id) + sizeof(char) * 4];
|
||||
snprintf(tn_key, sizeof(tn_key), "%s%04d", doc->doc_id, thumbnail_index);
|
||||
|
||||
ctx->store((char *) tn_key, sizeof(tn_key), (char *) jpeg_packet.data, jpeg_packet.size);
|
||||
} else {
|
||||
@ -579,8 +578,8 @@ void parse_media_format_ctx(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx,
|
||||
int video_duration_in_seconds = (int) (pFormatCtx->duration / AV_TIME_BASE);
|
||||
|
||||
int thumbnails_to_generate = (IS_VIDEO(pFormatCtx) && stream->codecpar->codec_id != AV_CODEC_ID_GIF && video_duration_in_seconds >= 15)
|
||||
// Limit to ~1 thumbnail every 5s
|
||||
? MAX(MIN(ctx->tn_count, video_duration_in_seconds / 5 + 1), 1) + 1
|
||||
// Limit to ~1 thumbnail every 7s
|
||||
? MAX(MIN(ctx->tn_count, video_duration_in_seconds / 7 + 1), 1) + 1
|
||||
: 1;
|
||||
|
||||
const double seek_increment = thumbnails_to_generate == 1
|
||||
@ -845,7 +844,7 @@ int store_image_thumbnail(scan_media_ctx_t *ctx, void *buf, size_t buf_len, docu
|
||||
|
||||
if (scaled_frame == STORE_AS_IS) {
|
||||
APPEND_LONG_META(doc, MetaThumbnail, 1)
|
||||
ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) frame_and_packet->packet->data,
|
||||
ctx->store((char *) doc->doc_id, sizeof(doc->doc_id), (char *) frame_and_packet->packet->data,
|
||||
frame_and_packet->packet->size);
|
||||
} else {
|
||||
// Encode frame to jpeg
|
||||
@ -859,7 +858,7 @@ int store_image_thumbnail(scan_media_ctx_t *ctx, void *buf, size_t buf_len, docu
|
||||
|
||||
// Save thumbnail
|
||||
APPEND_LONG_META(doc, MetaThumbnail, 1)
|
||||
ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) jpeg_packet.data, jpeg_packet.size);
|
||||
ctx->store((char *) doc->doc_id, sizeof(doc->doc_id), (char *) jpeg_packet.data, jpeg_packet.size);
|
||||
|
||||
av_packet_unref(&jpeg_packet);
|
||||
avcodec_free_context(&jpeg_encoder);
|
||||
|
2
third-party/libscan/libscan/ooxml/ooxml.c
vendored
2
third-party/libscan/libscan/ooxml/ooxml.c
vendored
@ -191,7 +191,7 @@ void read_thumbnail(scan_ooxml_ctx_t *ctx, document_t *doc, struct archive *a, s
|
||||
archive_read_data(a, buf, entry_size);
|
||||
|
||||
APPEND_LONG_META(doc, MetaThumbnail, 1)
|
||||
ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), buf, entry_size);
|
||||
ctx->store((char *) doc->doc_id, sizeof(doc->doc_id), buf, entry_size);
|
||||
free(buf);
|
||||
}
|
||||
|
||||
|
2
third-party/libscan/libscan/raw/raw.c
vendored
2
third-party/libscan/libscan/raw/raw.c
vendored
@ -84,7 +84,7 @@ int store_thumbnail_rgb24(scan_raw_ctx_t *ctx, libraw_processed_image_t *img, do
|
||||
avcodec_receive_packet(jpeg_encoder, &jpeg_packet);
|
||||
|
||||
APPEND_LONG_META(doc, MetaThumbnail, 1)
|
||||
ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) jpeg_packet.data, jpeg_packet.size);
|
||||
ctx->store((char *) doc->doc_id, sizeof(doc->doc_id), (char *) jpeg_packet.data, jpeg_packet.size);
|
||||
|
||||
av_packet_unref(&jpeg_packet);
|
||||
av_free(*scaled_frame->data);
|
||||
|
8
third-party/libscan/libscan/scan.h
vendored
8
third-party/libscan/libscan/scan.h
vendored
@ -48,6 +48,10 @@ typedef int scan_code_t;
|
||||
#define CTX_LOG_FATALF(filepath, fmt, ...) ctx->logf(filepath, LEVEL_FATAL, fmt, __VA_ARGS__); exit(-1);
|
||||
#define CTX_LOG_FATAL(filepath, str) ctx->log(filepath, LEVEL_FATAL, str); exit(-1);
|
||||
|
||||
#define MD5_STR_LENGTH 33
|
||||
#define SIST_DOC_ID_LEN MD5_STR_LENGTH
|
||||
#define SIST_INDEX_ID_LEN MD5_STR_LENGTH
|
||||
|
||||
enum metakey {
|
||||
// String
|
||||
MetaContent = 1,
|
||||
@ -103,7 +107,7 @@ typedef struct meta_line {
|
||||
|
||||
|
||||
typedef struct document {
|
||||
unsigned char path_md5[MD5_DIGEST_LENGTH];
|
||||
char doc_id[SIST_DOC_ID_LEN];
|
||||
unsigned long size;
|
||||
unsigned int mime;
|
||||
int mtime;
|
||||
@ -159,7 +163,7 @@ typedef struct parse_job_t {
|
||||
int base;
|
||||
int ext;
|
||||
struct vfile vfile;
|
||||
unsigned char parent[MD5_DIGEST_LENGTH];
|
||||
char parent[SIST_DOC_ID_LEN];
|
||||
char filepath[1];
|
||||
} parse_job_t;
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user