Compare commits

..

4 Commits

62 changed files with 511 additions and 773 deletions

3
.gitmodules vendored
View File

@@ -7,6 +7,3 @@
[submodule "third-party/libscan/third-party/antiword"]
path = third-party/libscan/third-party/antiword
url = https://github.com/simon987/antiword
[submodule "third-party/libscan/third-party/libmobi"]
path = third-party/libscan/third-party/libmobi
url = https://github.com/bfabiszewski/libmobi

View File

@@ -4,7 +4,6 @@ set(CMAKE_C_STANDARD 11)
project(sist2 C)
option(SIST_DEBUG "Build a debug executable" on)
option(SIST_FAST "Enable more optimisation flags" off)
option(SIST_FAKE_STORE "Disable IO operations of LMDB stores for debugging purposes" 0)
add_compile_definitions(
@@ -55,10 +54,6 @@ find_package(lmdb CONFIG REQUIRED)
find_package(cJSON CONFIG REQUIRED)
find_package(unofficial-mongoose CONFIG REQUIRED)
find_package(CURL CONFIG REQUIRED)
find_library(MAGIC_LIB
NAMES libmagic.so.1 magic
PATHS /usr/lib/x86_64-linux-gnu/ /usr/lib/aarch64-linux-gnu/
)
target_include_directories(
@@ -98,22 +93,10 @@ if (SIST_DEBUG)
PROPERTIES
OUTPUT_NAME sist2_debug
)
elseif (SIST_FAST)
target_compile_options(
sist2
PRIVATE
-Ofast
-march=native
-fno-stack-protector
-fomit-frame-pointer
-freciprocal-math
)
else ()
target_compile_options(
sist2
PRIVATE
-Ofast
-fno-stack-protector
-fomit-frame-pointer
@@ -138,12 +121,11 @@ target_link_libraries(
CURL::libcurl
pthread
magic
c
scan
${MAGIC_LIB}
)
add_custom_target(

View File

@@ -9,7 +9,7 @@ RUN strip sist2 || mv sist2_debug sist2
FROM --platform="linux/amd64" ubuntu:21.10
RUN apt update && apt install -y curl libasan5 libmagic1 && rm -rf /var/lib/apt/lists/*
RUN apt update && apt install -y curl libasan5 && rm -rf /var/lib/apt/lists/*
RUN mkdir -p /usr/share/tessdata && \
cd /usr/share/tessdata/ && \

View File

@@ -52,7 +52,7 @@ sist2 (Simple incremental search tool)
Select the file corresponding to your CPU architecture and mark the binary as executable with `chmod +x` *
2. *(or)* Download a [development snapshot](https://files.simon987.net/.gate/sist2/simon987_sist2/) *(Not
recommended!)*
3. *(or)* `docker pull simon987/sist2:2.12.1-x64-linux`
3. *(or)* `docker pull simon987/sist2:2.11.7-x64-linux`
1. See [Usage guide](docs/USAGE.md)

View File

@@ -103,7 +103,7 @@ Made by simon987 <me@simon987.net>. Released under GPL-3.0
* `--thumbnail-count`
Maximum number of thumbnails to generate. When set to a value >= 2, thumbnails for video previews
will be generated. The actual number of thumbnails generated depends on the length of the video (maximum 1 image
every ~7s). Set to 0 to completely disable thumbnails.
every ~5s). Set to 0 to completely disable thumbnails.
* `--content-size`
Number of bytes of text to be extracted from the content of files (plain text, PDFs etc.).
Repeated whitespace and special characters do not count toward this limit.
@@ -292,7 +292,7 @@ Both the `root` and `rewrite_url` fields are safe to manually modify from the
# Elasticsearch
Elasticsearch versions >=6.8.0, 7.X.X and 8.X.X are supported by sist2.
Elasticsearch versions >=6.8.0, <8.0.0 are supported by sist2.
Using a version >=7.14.0 is recommended to enable the following features:

View File

@@ -3,7 +3,7 @@
"refresh_interval": "30s",
"codec": "best_compression",
"number_of_replicas": 0,
"highlight.max_analyzed_offset": 1000000
"highlight.max_analyzed_offset": 10000000
},
"analysis": {
"tokenizer": {
@@ -16,7 +16,7 @@
"delimiter": "."
},
"my_nGram_tokenizer": {
"type": "ngram",
"type": "nGram",
"min_gram": 3,
"max_gram": 3
}

View File

@@ -5,6 +5,5 @@ rm -rf index.sist2/
python3 scripts/mime.py > src/parsing/mime_generated.c
python3 scripts/serve_static.py > src/web/static_generated.c
python3 scripts/index_static.py > src/index/static_generated.c
python3 scripts/magic_static.py > src/magic_generated.c
printf "static const char *const Sist2CommitHash = \"%s\";\n" $(git rev-parse HEAD) > src/git_hash.h

View File

@@ -1,8 +0,0 @@
try:
with open("/usr/lib/file/magic.mgc", "rb") as f:
data = f.read()
except:
data = bytes([])
print("char magic_database_buffer[%d] = {%s};" % (len(data), ",".join(str(int(b)) for b in data)))

View File

@@ -1,3 +0,0 @@
docker run --rm -it --name "sist2-dev-es-6"\
-p 9202:9200 -e "discovery.type=single-node" \
-e "ES_JAVA_OPTS=-Xms8g -Xmx8g" elasticsearch:6.8.0

View File

@@ -1,3 +0,0 @@
docker run --rm -it --name "sist2-dev-es"\
-p 9200:9200 -p 9300:9300 -e "discovery.type=single-node" \
-e "ES_JAVA_OPTS=-Xms8g -Xmx8g" elasticsearch:8.1.2

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@@ -12,6 +12,7 @@
"axios": "^0.25.0",
"bootstrap-vue": "^2.21.2",
"core-js": "^3.6.5",
"crypto-es": "^1.2.7",
"d3": "^5.16.0",
"date-fns": "^2.21.3",
"dom-to-image": "^2.6.0",
@@ -3288,9 +3289,9 @@
}
},
"node_modules/async": {
"version": "2.6.4",
"resolved": "https://registry.npmjs.org/async/-/async-2.6.4.tgz",
"integrity": "sha512-mzo5dfJYwAn29PeiJ0zvwTo04zj8HDJj0Mn8TD7sno7q12prdbnasKJHhkm2c1LgrhlJ0teaea8860oxi51mGA==",
"version": "2.6.3",
"resolved": "https://registry.npmjs.org/async/-/async-2.6.3.tgz",
"integrity": "sha512-zflvls11DCy+dQWzTW2dzuilv8Z5X/pjfmZOWba6TNIVDm+2UDaJmXSOXlasHKfNBs8oo3M0aT50fDEWfKZjXg==",
"dev": true,
"dependencies": {
"lodash": "^4.17.14"
@@ -5260,6 +5261,11 @@
"node": "*"
}
},
"node_modules/crypto-es": {
"version": "1.2.7",
"resolved": "https://registry.npmjs.org/crypto-es/-/crypto-es-1.2.7.tgz",
"integrity": "sha512-UUqiVJ2gUuZFmbFsKmud3uuLcNP2+Opt+5ysmljycFCyhA0+T16XJmo1ev/t5kMChMqWh7IEvURNCqsg+SjZGQ=="
},
"node_modules/css-color-names": {
"version": "0.0.4",
"resolved": "https://registry.npmjs.org/css-color-names/-/css-color-names-0.0.4.tgz",
@@ -9736,9 +9742,9 @@
}
},
"node_modules/minimist": {
"version": "1.2.6",
"resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.6.tgz",
"integrity": "sha512-Jsjnk4bw3YJqYzbdyBiNsPWHPfO++UGG749Cxs6peCu5Xg4nrena6OVxOYxrQTqww0Jmwt+Ref8rggumkTLz9Q==",
"version": "1.2.5",
"resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.5.tgz",
"integrity": "sha512-FM9nNUYrRBAELZQT3xeZQ7fmMOBg6nWNmJKTcgsJeaLstP/UODVpGsr5OhXhhXg6f+qtJ8uiZ+PUxkDWcgIXLw==",
"dev": true
},
"node_modules/minipass": {
@@ -14092,9 +14098,9 @@
}
},
"node_modules/url-parse": {
"version": "1.5.10",
"resolved": "https://registry.npmjs.org/url-parse/-/url-parse-1.5.10.tgz",
"integrity": "sha512-WypcfiRhfeUP9vvF0j6rw0J3hrWrw6iZv3+22h6iRMJ/8z1Tj6XfLP4DsUix5MhMPnXpiHDoKyoZ/bdCkwBCiQ==",
"version": "1.5.4",
"resolved": "https://registry.npmjs.org/url-parse/-/url-parse-1.5.4.tgz",
"integrity": "sha512-ITeAByWWoqutFClc/lRZnFplgXgEZr3WJ6XngMM/N9DMIm4K8zXPCZ1Jdu0rERwO84w1WC5wkle2ubwTA4NTBg==",
"dev": true,
"dependencies": {
"querystringify": "^2.1.1",
@@ -17937,9 +17943,9 @@
"dev": true
},
"async": {
"version": "2.6.4",
"resolved": "https://registry.npmjs.org/async/-/async-2.6.4.tgz",
"integrity": "sha512-mzo5dfJYwAn29PeiJ0zvwTo04zj8HDJj0Mn8TD7sno7q12prdbnasKJHhkm2c1LgrhlJ0teaea8860oxi51mGA==",
"version": "2.6.3",
"resolved": "https://registry.npmjs.org/async/-/async-2.6.3.tgz",
"integrity": "sha512-zflvls11DCy+dQWzTW2dzuilv8Z5X/pjfmZOWba6TNIVDm+2UDaJmXSOXlasHKfNBs8oo3M0aT50fDEWfKZjXg==",
"dev": true,
"requires": {
"lodash": "^4.17.14"
@@ -19615,6 +19621,11 @@
"randomfill": "^1.0.3"
}
},
"crypto-es": {
"version": "1.2.7",
"resolved": "https://registry.npmjs.org/crypto-es/-/crypto-es-1.2.7.tgz",
"integrity": "sha512-UUqiVJ2gUuZFmbFsKmud3uuLcNP2+Opt+5ysmljycFCyhA0+T16XJmo1ev/t5kMChMqWh7IEvURNCqsg+SjZGQ=="
},
"css-color-names": {
"version": "0.0.4",
"resolved": "https://registry.npmjs.org/css-color-names/-/css-color-names-0.0.4.tgz",
@@ -23324,9 +23335,9 @@
}
},
"minimist": {
"version": "1.2.6",
"resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.6.tgz",
"integrity": "sha512-Jsjnk4bw3YJqYzbdyBiNsPWHPfO++UGG749Cxs6peCu5Xg4nrena6OVxOYxrQTqww0Jmwt+Ref8rggumkTLz9Q==",
"version": "1.2.5",
"resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.5.tgz",
"integrity": "sha512-FM9nNUYrRBAELZQT3xeZQ7fmMOBg6nWNmJKTcgsJeaLstP/UODVpGsr5OhXhhXg6f+qtJ8uiZ+PUxkDWcgIXLw==",
"dev": true
},
"minipass": {
@@ -27008,9 +27019,9 @@
}
},
"url-parse": {
"version": "1.5.10",
"resolved": "https://registry.npmjs.org/url-parse/-/url-parse-1.5.10.tgz",
"integrity": "sha512-WypcfiRhfeUP9vvF0j6rw0J3hrWrw6iZv3+22h6iRMJ/8z1Tj6XfLP4DsUix5MhMPnXpiHDoKyoZ/bdCkwBCiQ==",
"version": "1.5.4",
"resolved": "https://registry.npmjs.org/url-parse/-/url-parse-1.5.4.tgz",
"integrity": "sha512-ITeAByWWoqutFClc/lRZnFplgXgEZr3WJ6XngMM/N9DMIm4K8zXPCZ1Jdu0rERwO84w1WC5wkle2ubwTA4NTBg==",
"dev": true,
"requires": {
"querystringify": "^2.1.1",

View File

@@ -11,6 +11,7 @@
"axios": "^0.25.0",
"bootstrap-vue": "^2.21.2",
"core-js": "^3.6.5",
"crypto-es": "^1.2.7",
"d3": "^5.16.0",
"date-fns": "^2.21.3",
"dom-to-image": "^2.6.0",

View File

@@ -1,5 +1,6 @@
import axios from "axios";
import {ext, strUnescape, lum} from "./util";
import CryptoES from 'crypto-es';
export interface EsTag {
id: string
@@ -29,6 +30,7 @@ export interface EsHit {
_index: string
_id: string
_score: number
_path_md5: string
_type: string
_tags: Tag[]
_seq: number
@@ -247,6 +249,11 @@ class Sist2Api {
res.hits.hits.forEach((hit: EsHit) => {
hit["_source"]["name"] = strUnescape(hit["_source"]["name"]);
hit["_source"]["path"] = strUnescape(hit["_source"]["path"]);
hit["_path_md5"] = CryptoES.MD5(
hit["_source"]["path"] +
(hit["_source"]["path"] ? "/" : "") +
hit["_source"]["name"] + ext(hit)
).toString();
this.setHitProps(hit);
this.setHitTags(hit);
@@ -336,6 +343,10 @@ class Sist2Api {
};
}
getDocInfo(docId: string) {
return axios.get(`${this.baseUrl}d/${docId}`);
}
getTags() {
return this.esQuery({
aggs: {
@@ -369,7 +380,8 @@ class Sist2Api {
return axios.post(`${this.baseUrl}tag/` + hit["_source"]["index"], {
delete: false,
name: tag,
doc_id: hit["_id"]
doc_id: hit["_id"],
path_md5: hit._path_md5
});
}
@@ -377,7 +389,8 @@ class Sist2Api {
return axios.post(`${this.baseUrl}tag/` + hit["_source"]["index"], {
delete: true,
name: tag,
doc_id: hit["_id"]
doc_id: hit["_id"],
path_md5: hit._path_md5
});
}

View File

@@ -69,7 +69,7 @@ interface SortMode {
class Sist2Query {
searchQuery(blankSearch: boolean = false): any {
searchQuery(): any {
const getters = store.getters;
@@ -93,6 +93,22 @@ class Sist2Query {
{terms: {index: selectedIndexIds}}
] as any[];
if (sizeMin && sizeMax) {
filters.push({range: {size: {gte: sizeMin, lte: sizeMax}}})
} else if (sizeMin) {
filters.push({range: {size: {gte: sizeMin}}})
} else if (sizeMax) {
filters.push({range: {size: {lte: sizeMax}}})
}
if (dateMin && dateMax) {
filters.push({range: {mtime: {gte: dateMin, lte: dateMax}}})
} else if (dateMin) {
filters.push({range: {mtime: {gte: dateMin}}})
} else if (dateMax) {
filters.push({range: {mtime: {lte: dateMax}}})
}
const fields = [
"name^8",
"content^3",
@@ -112,39 +128,20 @@ class Sist2Query {
fields.push("name.nGram^3");
}
if (!blankSearch) {
if (sizeMin && sizeMax) {
filters.push({range: {size: {gte: sizeMin, lte: sizeMax}}})
} else if (sizeMin) {
filters.push({range: {size: {gte: sizeMin}}})
} else if (sizeMax) {
filters.push({range: {size: {lte: sizeMax}}})
}
const path = pathText.replace(/\/$/, "").toLowerCase(); //remove trailing slashes
if (path !== "") {
filters.push({term: {path: path}})
}
if (dateMin && dateMax) {
filters.push({range: {mtime: {gte: dateMin, lte: dateMax}}})
} else if (dateMin) {
filters.push({range: {mtime: {gte: dateMin}}})
} else if (dateMax) {
filters.push({range: {mtime: {lte: dateMax}}})
}
if (selectedMimeTypes.length > 0) {
filters.push({terms: {"mime": selectedMimeTypes}});
}
const path = pathText.replace(/\/$/, "").toLowerCase(); //remove trailing slashes
if (path !== "") {
filters.push({term: {path: path}})
}
if (selectedMimeTypes.length > 0) {
filters.push({terms: {"mime": selectedMimeTypes}});
}
if (selectedTags.length > 0) {
if (getters.optTagOrOperator) {
filters.push({terms: {"tag": selectedTags}});
} else {
selectedTags.forEach((tag: string) => filters.push({term: {"tag": tag}}));
}
if (selectedTags.length > 0) {
if (getters.optTagOrOperator) {
filters.push({terms: {"tag": selectedTags}});
} else {
selectedTags.forEach((tag: string) => filters.push({term: {"tag": tag}}));
}
}
@@ -185,7 +182,7 @@ class Sist2Query {
size: size,
} as any;
if (!empty && !blankSearch) {
if (!empty) {
q.query.bool.must = query;
}
@@ -210,7 +207,7 @@ class Sist2Query {
};
if (!legacyES) {
q.highlight.max_analyzed_offset = 999_999;
q.highlight.max_analyzed_offset = 9_999_999;
}
if (getters.optSearchInPath) {
@@ -240,7 +237,7 @@ class Sist2Query {
}
}
if (!empty && !blankSearch) {
if (!empty) {
q.query.function_score.query.bool.must.push(query);
}
}

View File

@@ -72,12 +72,6 @@ export default {
}
});
Object.keys(src).forEach(key => {
if (key.startsWith("mt_") || key.startsWith("int_")) {
items.push({key: key, value: src[key]});
}
});
// Exif GPS
if ("exif_gps_longitude_dec" in src) {
items.push({

View File

@@ -1,13 +1,11 @@
<template>
<Preloader v-if="loading"></Preloader>
<div v-else-if="content" class="content-div" v-html="content"></div>
<div v-else-if="content" class="content-div">{{ content }}</div>
</template>
<script>
import Sist2Api from "@/Sist2Api";
import Preloader from "@/components/Preloader";
import Sist2Query from "@/Sist2Query";
import store from "@/store";
export default {
name: "LazyContentDiv",
@@ -20,72 +18,10 @@ export default {
}
},
mounted() {
const query = Sist2Query.searchQuery();
if (this.$store.state.optHighlight) {
const fields = this.$store.state.fuzzy
? {"content.nGram": {}}
: {content: {}};
query.highlight = {
pre_tags: ["<mark>"],
post_tags: ["</mark>"],
number_of_fragments: 0,
fields,
};
if (!store.state.sist2Info.esVersionLegacy) {
query.highlight.max_analyzed_offset = 999_999;
}
}
if ("function_score" in query.query) {
query.query = query.query.function_score.query;
}
if (!("must" in query.query.bool)) {
query.query.bool.must = [];
} else if (!Array.isArray(query.query.bool.must)) {
query.query.bool.must = [query.query.bool.must];
}
query.query.bool.must.push({match: {_id: this.docId}});
delete query["sort"];
delete query["aggs"];
delete query["search_after"];
delete query.query["function_score"];
query._source = {
includes: ["content", "name", "path", "extension"]
}
query.size = 1;
Sist2Api.esQuery(query).then(resp => {
Sist2Api.getDocInfo(this.docId).then(src => {
this.content = src.data.content;
this.loading = false;
if (resp.hits.hits.length === 1) {
this.content = this.getContent(resp.hits.hits[0]);
} else {
console.log("FIXME: could not get content")
console.log(resp)
}
});
},
methods: {
getContent(doc) {
if (!doc.highlight) {
return doc._source.content;
}
if (doc.highlight["content.nGram"]) {
return doc.highlight["content.nGram"][0];
}
if (doc.highlight.content) {
return doc.highlight.content[0];
}
}
})
}
}
</script>

View File

@@ -81,9 +81,7 @@ export default {
methods: {
keyDownListener(e) {
const isLightboxOpen = this.$refs.lightbox === undefined || this.$refs.lightbox.$el.tagName === undefined;
if (isLightboxOpen) {
if (this.$refs.lightbox === undefined) {
return true;
}
@@ -91,6 +89,7 @@ export default {
switch (e.key) {
case " ": {
console.log("SPACE")
e.preventDefault();
e.stopPropagation();
e.stopImmediatePropagation();
@@ -99,12 +98,16 @@ export default {
[...document.getElementsByClassName("fslightbox-absoluted")].forEach(elem => {
if (elem.style.transform === "translate(0px)" || elem.style.transform === "translate(0px, 0px)") {
const vid = elem.getElementsByTagName("video")[0];
console.log(elem)
console.log(vid)
if (vid) {
if (vid.paused) {
vid.play();
console.log("PLAY")
} else {
vid.pause()
console.log("PAUSE")
}
}
}
@@ -116,28 +119,24 @@ export default {
}
case "ArrowUp":
case "k": {
if (!lightboxStore.data.isThumbing && lightboxStore.core.thumbsToggler) {
if (!lightboxStore.data.isThumbing) {
lightboxStore.core.thumbsToggler.toggleThumbs();
}
return false;
}
case "ArrowDown":
case "j": {
if (lightboxStore.data.isThumbing && lightboxStore.core.thumbsToggler) {
if (lightboxStore.data.isThumbing) {
lightboxStore.core.thumbsToggler.toggleThumbs();
}
return false;
}
case "h": {
if (lightboxStore.core.stageManager.getPreviousSlideIndex) {
lightboxStore.core.slideIndexChanger.jumpTo(lightboxStore.core.stageManager.getPreviousSlideIndex());
}
return false;
lightboxStore.core.slideIndexChanger.jumpTo(lightboxStore.core.stageManager.getPreviousSlideIndex());
break;
}
case "l": {
if (lightboxStore.core.stageManager.getNextSlideIndex) {
lightboxStore.core.slideIndexChanger.jumpTo(lightboxStore.core.stageManager.getNextSlideIndex());
}
lightboxStore.core.slideIndexChanger.jumpTo(lightboxStore.core.stageManager.getNextSlideIndex());
return false;
}
}

View File

@@ -1,13 +1,5 @@
<template>
<div>
<b-input-group v-if="showSearchBar" id="tag-picker-filter-bar">
<b-form-input :value="filter"
:placeholder="$t('tagFilter')"
@input="onFilter($event)"></b-form-input>
</b-input-group>
<div id="tagTree"></div>
</div>
<div id="tagTree"></div>
</template>
<script>
@@ -120,12 +112,10 @@ function addTag(map, tag, id, count) {
export default {
name: "TagPicker",
props: ["showSearchBar"],
data() {
return {
tagTree: null,
loadedFromArgs: false,
filter: ""
}
},
mounted() {
@@ -139,10 +129,6 @@ export default {
});
},
methods: {
onFilter(value) {
this.filter = value;
this.tagTree.search(value);
},
initializeTree() {
const tagMap = [];
this.tagTree = new InspireTree({
@@ -177,8 +163,7 @@ export default {
});
},
handleTreeClick(node, e) {
if (e === "indeterminate" || e === "collapsed" || e === 'rendered' || e === "focused"
|| e === "matched" || e === "hidden") {
if (e === "indeterminate" || e === "collapsed" || e === 'rendered' || e === "focused") {
return;
}
@@ -195,15 +180,7 @@ export default {
}
</style>
<style>
.inspire-tree .focused > .wholerow {
.inspire-tree .focused>.wholerow {
border: none;
}
#tag-picker-filter-bar {
padding: 10px 4px 4px;
}
.theme-black .inspire-tree .matched > .wholerow {
background: rgba(251, 191, 41, 0.25);
}
</style>

View File

@@ -16,7 +16,6 @@ export default {
pages: "pages",
mimeTypes: "Media types",
tags: "Tags",
tagFilter: "Filter tags",
help: {
simpleSearch: "Simple search",
advancedSearch: "Advanced search",
@@ -75,7 +74,6 @@ export default {
useDatePicker: "Use a Date Picker component rather than a slider",
vidPreviewInterval: "Video preview frame duration in ms",
simpleLightbox: "Disable animations in image viewer",
showTagPickerFilter: "Display the tag filter bar"
},
queryMode: {
simple: "Simple",
@@ -185,7 +183,6 @@ export default {
pages: "pages",
mimeTypes: "Types de médias",
tags: "Tags",
tagFilter: "Filtrer les tags",
help: {
simpleSearch: "Recherche simple",
advancedSearch: "Recherche avancée",
@@ -245,7 +242,6 @@ export default {
useDatePicker: "Afficher un composant « Date Picker » plutôt qu'un slider",
vidPreviewInterval: "Durée des images d'aperçu video en millisecondes",
simpleLightbox: "Désactiver les animations du visualiseur d'images",
showTagPickerFilter: "Afficher le filtre dans l'onglet Tags"
},
queryMode: {
simple: "Simple",
@@ -356,7 +352,6 @@ export default {
pages: "页",
mimeTypes: "文件类型",
tags: "标签",
tagFilter: "筛选标签",
help: {
simpleSearch: "简易搜索",
advancedSearch: "高级搜索",
@@ -415,7 +410,6 @@ export default {
useDatePicker: "使用日期选择器组件而不是滑块",
vidPreviewInterval: "视频预览帧的持续时间,以毫秒为单位",
simpleLightbox: "在图片查看器中,禁用动画",
showTagPickerFilter: "显示标签过滤栏"
},
queryMode: {
simple: "简单",

View File

@@ -4,8 +4,6 @@ import VueRouter, {Route} from "vue-router";
import {EsHit, EsResult, EsTag, Index, Tag} from "@/Sist2Api";
import {deserializeMimes, serializeMimes} from "@/util";
const CONF_VERSION = 2;
Vue.use(Vuex)
export default new Vuex.Store({
@@ -26,6 +24,7 @@ export default new Vuex.Store({
sortMode: "score",
fuzzy: false,
size: 60,
optLang: "en",
optLangIsDefault: true,
@@ -33,7 +32,6 @@ export default new Vuex.Store({
optTheme: "light",
optDisplay: "grid",
optSize: 60,
optHighlight: true,
optTagOrOperator: false,
optFuzzy: true,
@@ -54,7 +52,6 @@ export default new Vuex.Store({
optUseDatePicker: false,
optVidPreviewInterval: 700,
optSimpleLightbox: true,
optShowTagPickerFilter: true,
_onLoadSelectedIndices: [] as string[],
_onLoadSelectedMimeTypes: [] as string[],
@@ -153,7 +150,7 @@ export default new Vuex.Store({
setOptSuggestPath: (state, val) => state.optSuggestPath = val,
setOptFragmentSize: (state, val) => state.optFragmentSize = val,
setOptQueryMode: (state, val) => state.optQueryMode = val,
setOptResultSize: (state, val) => state.optSize = val,
setOptResultSize: (state, val) => state.size = val,
setOptTagOrOperator: (state, val) => state.optTagOrOperator = val,
setOptTreemapType: (state, val) => state.optTreemapType = val,
@@ -166,7 +163,6 @@ export default new Vuex.Store({
setOptUseDatePicker: (state, val) => state.optUseDatePicker = val,
setOptVidPreviewInterval: (state, val) => state.optVidPreviewInterval = val,
setOptSimpleLightbox: (state, val) => state.optSimpleLightbox = val,
setOptShowTagPickerFilter: (state, val) => state.optShowTagPickerFilter = val,
setOptLightboxLoadOnlyCurrent: (state, val) => state.optLightboxLoadOnlyCurrent = val,
setOptLightboxSlideDuration: (state, val) => state.optLightboxSlideDuration = val,
@@ -245,11 +241,6 @@ export default new Vuex.Store({
}
},
async updateArgs({state}, router: VueRouter) {
if (router.currentRoute.path !== "/") {
return;
}
await router.push({
query: {
q: state.searchText.trim() ? state.searchText.trim().replace(/\s+/g, " ") : undefined,
@@ -278,8 +269,6 @@ export default new Vuex.Store({
}
});
conf["version"] = CONF_VERSION;
localStorage.setItem("sist2_configuration", JSON.stringify(conf));
},
loadConfiguration({state}) {
@@ -287,11 +276,6 @@ export default new Vuex.Store({
if (confString) {
const conf = JSON.parse(confString);
if (!("version" in conf) || conf["version"] != CONF_VERSION) {
localStorage.removeItem("sist2_configuration");
window.location.reload();
}
Object.keys(state).forEach((key) => {
if (key.startsWith("opt")) {
(state as any)[key] = conf[key];
@@ -353,7 +337,7 @@ export default new Vuex.Store({
searchText: state => state.searchText,
pathText: state => state.pathText,
fuzzy: state => state.fuzzy,
size: state => state.optSize,
size: state => state.size,
sortMode: state => state.sortMode,
lastQueryResult: state => state.lastQueryResults,
lastDoc: function (state): EsHit | null {
@@ -391,12 +375,11 @@ export default new Vuex.Store({
optTreemapColor: state => state.optTreemapColor,
optLightboxLoadOnlyCurrent: state => state.optLightboxLoadOnlyCurrent,
optLightboxSlideDuration: state => state.optLightboxSlideDuration,
optResultSize: state => state.optSize,
optResultSize: state => state.size,
optHideLegacy: state => state.optHideLegacy,
optUpdateMimeMap: state => state.optUpdateMimeMap,
optUseDatePicker: state => state.optUseDatePicker,
optVidPreviewInterval: state => state.optVidPreviewInterval,
optSimpleLightbox: state => state.optSimpleLightbox,
optShowTagPickerFilter: state => state.optShowTagPickerFilter,
}
})

View File

@@ -50,11 +50,6 @@
$t("opt.simpleLightbox")
}}
</b-form-checkbox>
<b-form-checkbox :checked="optShowTagPickerFilter" @input="setOptShowTagPickerFilter">{{
$t("opt.showTagPickerFilter")
}}
</b-form-checkbox>
</b-card>
<br/>
@@ -250,7 +245,6 @@ export default {
"optUseDatePicker",
"optVidPreviewInterval",
"optSimpleLightbox",
"optShowTagPickerFilter",
]),
clientWidth() {
return window.innerWidth;
@@ -298,7 +292,6 @@ export default {
"setOptUseDatePicker",
"setOptVidPreviewInterval",
"setOptSimpleLightbox",
"setOptShowTagPickerFilter",
]),
onResetClick() {
localStorage.removeItem("sist2_configuration");

View File

@@ -56,22 +56,6 @@ export default Vue.extend({
onThumbnailClick() {
window.open(`/f/${this.doc._id}`, "_blank");
},
findByCustomField(field, id) {
return {
query: {
bool: {
must: [
{
match: {
[field]: id
}
}
]
}
},
size: 1
}
},
findById(id) {
return {
query: {
@@ -119,8 +103,6 @@ export default Vue.extend({
query = this.findById(this.$route.query.byId);
} else if (this.$route.query.byName) {
query = this.findByName(this.$route.query.byName);
} else if (this.$route.query.by && this.$route.query.q) {
query = this.findByCustomField(this.$route.query.by, this.$route.query.q)
}
if (query) {

View File

@@ -32,7 +32,7 @@
<MimePicker></MimePicker>
</b-tab>
<b-tab :title="$t('tags')">
<TagPicker :show-search-bar="$store.state.optShowTagPickerFilter"></TagPicker>
<TagPicker></TagPicker>
</b-tab>
</b-tabs>
</b-col>
@@ -139,9 +139,7 @@ export default Vue.extend({
this.setSist2Info(data);
this.setIndices(data.indices);
const doBlankSearch = !this.$store.state.optUpdateMimeMap;
Sist2Api.getMimeTypes(Sist2Query.searchQuery(doBlankSearch)).then(({mimeMap}) => {
Sist2Api.getMimeTypes(Sist2Query.searchQuery()).then(({mimeMap}) => {
this.$store.commit("setUiMimeMap", mimeMap);
this.uiLoading = false;
this.search(true);
@@ -208,7 +206,7 @@ export default Vue.extend({
this.$store.commit("setUiReachedScrollEnd", false);
},
async handleSearch(resp: EsResult) {
if (resp.hits.hits.length == 0 || resp.hits.hits.length < this.$store.state.optSize) {
if (resp.hits.hits.length == 0) {
this.$store.commit("setUiReachedScrollEnd", true);
}
@@ -248,8 +246,6 @@ export default Vue.extend({
this.$store.commit("setLastQueryResult", resp);
this.docs.push(...resp.hits.hits);
resp.hits.hits.forEach(hit => this.docIds.add(hit._id));
},
getDateRange(): Promise<{ min: number, max: number }> {
return sist2.esQuery({

View File

@@ -81,11 +81,6 @@ void web_args_destroy(web_args_t *args) {
}
void exec_args_destroy(exec_args_t *args) {
if (args->index_path != NULL) {
free(args->index_path);
}
free(args);
}
@@ -129,9 +124,6 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
args->tn_count = DEFAULT_THUMBNAIL_COUNT;
} else if (args->tn_count == OPTION_VALUE_DISABLE) {
args->tn_count = 0;
} else if (args->tn_count > 1000) {
printf("Invalid value --thumbnail-count argument: %d. Must be <= 1000.\n", args->tn_size);
return 1;
}
if (args->content_size == OPTION_VALUE_UNSPECIFIED) {
@@ -398,7 +390,6 @@ int index_args_validate(index_args_t *args, int argc, const char **argv) {
LOG_DEBUGF("cli.c", "arg es_url=%s", args->es_url)
LOG_DEBUGF("cli.c", "arg es_index=%s", args->es_index)
LOG_DEBUGF("cli.c", "arg es_insecure_ssl=%d", args->es_insecure_ssl)
LOG_DEBUGF("cli.c", "arg index_path=%s", args->index_path)
LOG_DEBUGF("cli.c", "arg script_path=%s", args->script_path)
LOG_DEBUGF("cli.c", "arg async_script=%d", args->async_script)
@@ -513,7 +504,6 @@ int web_args_validate(web_args_t *args, int argc, const char **argv) {
LOG_DEBUGF("cli.c", "arg es_url=%s", args->es_url)
LOG_DEBUGF("cli.c", "arg es_index=%s", args->es_index)
LOG_DEBUGF("cli.c", "arg es_insecure_ssl=%d", args->es_insecure_ssl)
LOG_DEBUGF("cli.c", "arg tagline=%s", args->tagline)
LOG_DEBUGF("cli.c", "arg dev=%d", args->dev)
LOG_DEBUGF("cli.c", "arg listen=%s", args->listen_address)

View File

@@ -50,7 +50,6 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv);
typedef struct index_args {
char *es_url;
char *es_index;
int es_insecure_ssl;
char *index_path;
const char *script_path;
char *script;
@@ -69,7 +68,6 @@ typedef struct index_args {
typedef struct web_args {
char *es_url;
char *es_index;
int es_insecure_ssl;
char *listen_address;
char *credentials;
char *tag_credentials;
@@ -87,8 +85,7 @@ typedef struct web_args {
typedef struct exec_args {
char *es_url;
char *es_index;
int es_insecure_ssl;
char *index_path;
const char *index_path;
const char *script_path;
int async_script;
char *script;

View File

@@ -79,7 +79,6 @@ typedef struct {
typedef struct {
char *es_url;
int es_insecure_ssl;
es_version_t *es_version;
char *es_index;
int batch_size;
@@ -98,7 +97,6 @@ typedef struct {
char *es_url;
es_version_t *es_version;
char *es_index;
int es_insecure_ssl;
int index_count;
char *auth_user;
char *auth_pass;

View File

@@ -21,8 +21,6 @@ void free_queue(int max);
void elastic_flush();
void print_error(response_t *r);
void destroy_indexer(es_indexer_t *indexer) {
if (indexer == NULL) {
@@ -47,13 +45,13 @@ void elastic_cleanup() {
destroy_indexer(Indexer);
}
void print_json(cJSON *document, const char id_str[SIST_DOC_ID_LEN]) {
void print_json(cJSON *document, const char id_str[MD5_STR_LENGTH]) {
cJSON *line = cJSON_CreateObject();
cJSON_AddStringToObject(line, "_id", id_str);
cJSON_AddStringToObject(line, "_index", IndexCtx.es_index);
// cJSON_AddStringToObject(line, "_type", "_doc");
cJSON_AddStringToObject(line, "_type", "_doc");
cJSON_AddItemReferenceToObject(line, "_source", document);
char *json = cJSON_PrintUnformatted(line);
@@ -74,19 +72,19 @@ void delete_document(const char* document_id_str, void* UNUSED(_data)) {
bulk_line->type = ES_BULK_LINE_DELETE;
bulk_line->next = NULL;
strcpy(bulk_line->doc_id, document_id_str);
memcpy(bulk_line->path_md5_str, document_id_str, MD5_STR_LENGTH);
tpool_add_work(IndexCtx.pool, index_json_func, bulk_line);
}
void index_json(cJSON *document, const char doc_id[SIST_DOC_ID_LEN]) {
void index_json(cJSON *document, const char index_id_str[MD5_STR_LENGTH]) {
char *json = cJSON_PrintUnformatted(document);
size_t json_len = strlen(json);
es_bulk_line_t *bulk_line = malloc(sizeof(es_bulk_line_t) + json_len + 2);
bulk_line->type = ES_BULK_LINE_INDEX;
memcpy(bulk_line->line, json, json_len);
strcpy(bulk_line->doc_id, doc_id);
memcpy(bulk_line->path_md5_str, index_id_str, MD5_STR_LENGTH);
*(bulk_line->line + json_len) = '\n';
*(bulk_line->line + json_len + 1) = '\0';
bulk_line->next = NULL;
@@ -95,7 +93,7 @@ void index_json(cJSON *document, const char doc_id[SIST_DOC_ID_LEN]) {
tpool_add_work(IndexCtx.pool, index_json_func, bulk_line);
}
void execute_update_script(const char *script, int async, const char index_id[SIST_INDEX_ID_LEN]) {
void execute_update_script(const char *script, int async, const char index_id[MD5_STR_LENGTH]) {
if (Indexer == NULL) {
Indexer = create_indexer(IndexCtx.es_url, IndexCtx.es_index);
@@ -110,16 +108,16 @@ void execute_update_script(const char *script, int async, const char index_id[SI
cJSON *term_obj = cJSON_AddObjectToObject(query, "term");
cJSON_AddStringToObject(term_obj, "index", index_id);
char *str = cJSON_PrintUnformatted(body);
char *str = cJSON_Print(body);
char url[4096];
char bulk_url[4096];
if (async) {
snprintf(url, sizeof(url), "%s/%s/_update_by_query?wait_for_completion=false", Indexer->es_url,
snprintf(bulk_url, sizeof(bulk_url), "%s/%s/_update_by_query?wait_for_completion=false", Indexer->es_url,
Indexer->es_index);
} else {
snprintf(url, sizeof(url), "%s/%s/_update_by_query", Indexer->es_url, Indexer->es_index);
snprintf(bulk_url, sizeof(bulk_url), "%s/%s/_update_by_query", Indexer->es_url, Indexer->es_index);
}
response_t *r = web_post(url, str, IndexCtx.es_insecure_ssl);
response_t *r = web_post(bulk_url, str);
if (!async) {
LOG_INFOF("elastic.c", "Executed user script <%d>", r->status_code);
}
@@ -139,18 +137,13 @@ void execute_update_script(const char *script, int async, const char index_id[SI
if (async) {
cJSON *task = cJSON_GetObjectItem(resp, "task");
if (task == NULL) {
LOG_FATALF("elastic.c", "FIXME: Could not get task id: %s", r->body);
}
LOG_INFOF("elastic.c", "User script queued: %s/_tasks/%s", Indexer->es_url, task->valuestring);
}
cJSON_Delete(resp);
}
void *create_bulk_buffer(int max, int *count, size_t *buf_len, int legacy) {
void *create_bulk_buffer(int max, int *count, size_t *buf_len) {
es_bulk_line_t *line = Indexer->line_head;
*count = 0;
@@ -171,20 +164,11 @@ void *create_bulk_buffer(int max, int *count, size_t *buf_len, int legacy) {
while (line != NULL && *count < max) {
char action_str[256];
if (line->type == ES_BULK_LINE_INDEX) {
if (legacy) {
snprintf(
action_str, sizeof(action_str),
"{\"index\":{\"_id\":\"%s\",\"_type\":\"_doc\",\"_index\":\"%s\"}}\n",
line->doc_id, Indexer->es_index
);
} else {
snprintf(
action_str, sizeof(action_str),
"{\"index\":{\"_id\":\"%s\",\"_index\":\"%s\"}}\n",
line->doc_id, Indexer->es_index
);
}
snprintf(
action_str, sizeof(action_str),
"{\"index\":{\"_id\":\"%s\",\"_type\":\"_doc\",\"_index\":\"%s\"}}\n",
line->path_md5_str, Indexer->es_index
);
size_t action_str_len = strlen(action_str);
size_t line_len = strlen(line->line);
@@ -200,7 +184,7 @@ void *create_bulk_buffer(int max, int *count, size_t *buf_len, int legacy) {
snprintf(
action_str, sizeof(action_str),
"{\"delete\":{\"_id\":\"%s\",\"_index\":\"%s\"}}\n",
line->doc_id, Indexer->es_index
line->path_md5_str, Indexer->es_index
);
size_t action_str_len = strlen(action_str);
@@ -228,13 +212,7 @@ void print_errors(response_t *r) {
*(tmp + r->size) = '\0';
cJSON *ret_json = cJSON_Parse(tmp);
cJSON *errors = cJSON_GetObjectItem(ret_json, "errors");
if (errors == NULL) {
char *str = cJSON_Print(ret_json);
LOG_ERRORF("elastic.c", "%s\n", str);
cJSON_free(str);
} else if (errors->valueint != 0) {
if (cJSON_GetObjectItem(ret_json, "errors")->valueint != 0) {
cJSON *err;
cJSON_ArrayForEach(err, cJSON_GetObjectItem(ret_json, "items")) {
if (cJSON_GetObjectItem(cJSON_GetObjectItem(err, "index"), "status")->valueint != 201) {
@@ -272,11 +250,11 @@ void _elastic_flush(int max) {
size_t buf_len;
int count;
void *buf = create_bulk_buffer(max, &count, &buf_len, IS_LEGACY_VERSION(IndexCtx.es_version));
void *buf = create_bulk_buffer(max, &count, &buf_len);
char bulk_url[4096];
snprintf(bulk_url, sizeof(bulk_url), "%s/%s/_bulk?pipeline=tie", Indexer->es_url, Indexer->es_index);
response_t *r = web_post(bulk_url, buf, IndexCtx.es_insecure_ssl);
response_t *r = web_post(bulk_url, buf);
if (r->status_code == 0) {
LOG_FATALF("elastic.c", "Could not connect to %s, make sure that elasticsearch is running!\n", IndexCtx.es_url)
@@ -285,7 +263,7 @@ void _elastic_flush(int max) {
if (r->status_code == 413) {
if (max <= 1) {
LOG_ERRORF("elastic.c", "Single document too large, giving up: {%s}", Indexer->line_head->doc_id)
LOG_ERRORF("elastic.c", "Single document too large, giving up: {%s}", Indexer->line_head->path_md5_str)
free_response(r);
free(buf);
free_queue(1);
@@ -402,7 +380,7 @@ void finish_indexer(char *script, int async_script, char *index_id) {
char url[4096];
snprintf(url, sizeof(url), "%s/%s/_refresh", IndexCtx.es_url, IndexCtx.es_index);
response_t *r = web_post(url, "", IndexCtx.es_insecure_ssl);
response_t *r = web_post(url, "");
LOG_INFOF("elastic.c", "Refresh index <%d>", r->status_code);
free_response(r);
@@ -411,44 +389,36 @@ void finish_indexer(char *script, int async_script, char *index_id) {
free(script);
snprintf(url, sizeof(url), "%s/%s/_refresh", IndexCtx.es_url, IndexCtx.es_index);
r = web_post(url, "", IndexCtx.es_insecure_ssl);
r = web_post(url, "");
LOG_INFOF("elastic.c", "Refresh index <%d>", r->status_code);
free_response(r);
}
snprintf(url, sizeof(url), "%s/%s/_forcemerge", IndexCtx.es_url, IndexCtx.es_index);
r = web_post(url, "", IndexCtx.es_insecure_ssl);
r = web_post(url, "");
LOG_INFOF("elastic.c", "Merge index <%d>", r->status_code);
free_response(r);
snprintf(url, sizeof(url), "%s/%s/_settings", IndexCtx.es_url, IndexCtx.es_index);
r = web_put(url, "{\"index\":{\"refresh_interval\":\"1s\"}}", IndexCtx.es_insecure_ssl);
r = web_put(url, "{\"index\":{\"refresh_interval\":\"1s\"}}");
LOG_INFOF("elastic.c", "Set refresh interval <%d>", r->status_code);
free_response(r);
}
es_version_t *elastic_get_version(const char *es_url, int insecure) {
response_t *r = web_get(es_url, 30, insecure);
es_version_t *elastic_get_version(const char *es_url) {
response_t *r = web_get(es_url, 30);
char *tmp = malloc(r->size + 1);
memcpy(tmp, r->body, r->size);
*(tmp + r->size) = '\0';
cJSON *response = cJSON_Parse(tmp);
free(tmp);
free_response(r);
if (response == NULL) {
return NULL;
}
if (cJSON_GetObjectItem(response, "error") != NULL) {
LOG_WARNING("elastic.c", "Could not get Elasticsearch version")
print_error(r);
free_response(r);
return NULL;
}
free_response(r);
if (cJSON_GetObjectItem(response, "version") == NULL ||
cJSON_GetObjectItem(cJSON_GetObjectItem(response, "version"), "number") == NULL) {
cJSON_Delete(response);
@@ -473,7 +443,7 @@ es_version_t *elastic_get_version(const char *es_url, int insecure) {
void elastic_init(int force_reset, const char *user_mappings, const char *user_settings) {
es_version_t *es_version = elastic_get_version(IndexCtx.es_url, IndexCtx.es_insecure_ssl);
es_version_t *es_version = elastic_get_version(IndexCtx.es_url);
IndexCtx.es_version = es_version;
if (es_version == NULL) {
@@ -482,33 +452,33 @@ void elastic_init(int force_reset, const char *user_mappings, const char *user_s
LOG_INFOF("elastic.c",
"Elasticsearch version is %s (supported=%d, legacy=%d)",
format_es_version(es_version), IS_SUPPORTED_ES_VERSION(es_version), IS_LEGACY_VERSION(es_version));
format_es_version(es_version), IS_SUPPORTED_ES_VERSION(es_version), USE_LEGACY_ES_SETTINGS(es_version));
if (!IS_SUPPORTED_ES_VERSION(es_version)) {
LOG_FATAL("elastic.c", "This elasticsearch version is not supported!")
LOG_FATAL("elastic.c", "sist2 only supports Elasticsearch v6.8 or newer")
}
char *settings = NULL;
if (IS_LEGACY_VERSION(es_version)) {
settings = settings_legacy_json;
} else {
if (USE_LEGACY_ES_SETTINGS(es_version)) {
settings = settings_json;
} else {
settings = settings_legacy_json;
}
// Check if index exists
char url[4096];
snprintf(url, sizeof(url), "%s/%s", IndexCtx.es_url, IndexCtx.es_index);
response_t *r = web_get(url, 30, IndexCtx.es_insecure_ssl);
response_t *r = web_get(url, 30);
int index_exists = r->status_code == 200;
free_response(r);
if (!index_exists || force_reset) {
r = web_delete(url, IndexCtx.es_insecure_ssl);
r = web_delete(url);
LOG_INFOF("elastic.c", "Delete index <%d>", r->status_code);
free_response(r);
snprintf(url, sizeof(url), "%s/%s", IndexCtx.es_url, IndexCtx.es_index);
r = web_put(url, "", IndexCtx.es_insecure_ssl);
r = web_put(url, "");
if (r->status_code != 200) {
print_error(r);
@@ -519,17 +489,17 @@ void elastic_init(int force_reset, const char *user_mappings, const char *user_s
free_response(r);
snprintf(url, sizeof(url), "%s/%s/_close", IndexCtx.es_url, IndexCtx.es_index);
r = web_post(url, "", IndexCtx.es_insecure_ssl);
r = web_post(url, "");
LOG_INFOF("elastic.c", "Close index <%d>", r->status_code);
free_response(r);
snprintf(url, sizeof(url), "%s/_ingest/pipeline/tie", IndexCtx.es_url);
r = web_put(url, pipeline_json, IndexCtx.es_insecure_ssl);
r = web_put(url, pipeline_json);
LOG_INFOF("elastic.c", "Create pipeline <%d>", r->status_code);
free_response(r);
snprintf(url, sizeof(url), "%s/%s/_settings", IndexCtx.es_url, IndexCtx.es_index);
r = web_put(url, user_settings ? user_settings : settings, IndexCtx.es_insecure_ssl);
r = web_put(url, user_settings ? user_settings : settings);
LOG_INFOF("elastic.c", "Update ES settings <%d>", r->status_code);
if (r->status_code != 200) {
print_error(r);
@@ -537,13 +507,8 @@ void elastic_init(int force_reset, const char *user_mappings, const char *user_s
}
free_response(r);
if (IS_LEGACY_VERSION(es_version)) {
snprintf(url, sizeof(url), "%s/%s/_mappings/_doc?include_type_name=true", IndexCtx.es_url, IndexCtx.es_index);
} else {
snprintf(url, sizeof(url), "%s/%s/_mappings", IndexCtx.es_url, IndexCtx.es_index);
}
r = web_put(url, user_mappings ? user_mappings : mappings_json, IndexCtx.es_insecure_ssl);
snprintf(url, sizeof(url), "%s/%s/_mappings/_doc?include_type_name=true", IndexCtx.es_url, IndexCtx.es_index);
r = web_put(url, user_mappings ? user_mappings : mappings_json);
LOG_INFOF("elastic.c", "Update ES mappings <%d>", r->status_code);
if (r->status_code != 200) {
print_error(r);
@@ -552,7 +517,7 @@ void elastic_init(int force_reset, const char *user_mappings, const char *user_s
free_response(r);
snprintf(url, sizeof(url), "%s/%s/_open", IndexCtx.es_url, IndexCtx.es_index);
r = web_post(url, "", IndexCtx.es_insecure_ssl);
r = web_post(url, "");
LOG_INFOF("elastic.c", "Open index <%d>", r->status_code);
free_response(r);
}
@@ -562,7 +527,7 @@ cJSON *elastic_get_document(const char *id_str) {
char url[4096];
snprintf(url, sizeof(url), "%s/%s/_doc/%s", WebCtx.es_url, WebCtx.es_index, id_str);
response_t *r = web_get(url, 3, WebCtx.es_insecure_ssl);
response_t *r = web_get(url, 3);
cJSON *json = NULL;
if (r->status_code == 200) {
char *tmp = malloc(r->size + 1);
@@ -580,7 +545,7 @@ char *elastic_get_status() {
snprintf(url, sizeof(url),
"%s/_cluster/state/metadata/%s?filter_path=metadata.indices.*.state", WebCtx.es_url, WebCtx.es_index);
response_t *r = web_get(url, 30, IndexCtx.es_insecure_ssl);
response_t *r = web_get(url, 30);
cJSON *json = NULL;
char *status = malloc(128 * sizeof(char));
status[0] = '\0';

View File

@@ -8,7 +8,7 @@
typedef struct es_bulk_line {
struct es_bulk_line *next;
char doc_id[SIST_DOC_ID_LEN];
char path_md5_str[MD5_STR_LENGTH];
int type;
char line[0];
} es_bulk_line_t;
@@ -20,10 +20,8 @@ typedef struct {
} es_version_t;
#define VERSION_GE(version, maj, min) ((version)->major > (maj) || ((version)->major == (maj) && (version)->minor >= (min)))
#define VERSION_LT(version, maj, min) (!VERSION_GE(version, maj, min))
#define IS_SUPPORTED_ES_VERSION(es_version) ((es_version) != NULL && VERSION_GE((es_version), 6, 8) && VERSION_LT((es_version), 9, 0))
#define IS_LEGACY_VERSION(es_version) ((es_version) != NULL && VERSION_LT((es_version), 7, 14))
#define IS_SUPPORTED_ES_VERSION(es_version) VERSION_GE((es_version), 6, 8)
#define USE_LEGACY_ES_SETTINGS(es_version) (!VERSION_GE((es_version), 7, 14))
__always_inline
static const char *format_es_version(es_version_t *version) {
@@ -42,9 +40,9 @@ typedef struct es_indexer es_indexer_t;
void elastic_index_line(es_bulk_line_t *line);
void print_json(cJSON *document, const char index_id_str[SIST_INDEX_ID_LEN]);
void print_json(cJSON *document, const char index_id_str[MD5_STR_LENGTH]);
void index_json(cJSON *document, const char doc_id[SIST_INDEX_ID_LEN]);
void index_json(cJSON *document, const char index_id_str[MD5_STR_LENGTH]);
void delete_document(const char *document_id_str, void* data);
@@ -59,8 +57,8 @@ cJSON *elastic_get_document(const char *id_str);
char *elastic_get_status();
es_version_t *elastic_get_version(const char *es_url, int insecure);
es_version_t *elastic_get_version(const char *es_url);
void execute_update_script(const char *script, int async, const char index_id[SIST_INDEX_ID_LEN]);
void execute_update_script(const char *script, int async, const char index_id[MD5_STR_LENGTH]);
#endif

File diff suppressed because one or more lines are too long

View File

@@ -22,7 +22,7 @@ void free_response(response_t *resp) {
free(resp);
}
void web_post_async_poll(subreq_ctx_t *req) {
void web_post_async_poll(subreq_ctx_t* req) {
fd_set fdread;
fd_set fdwrite;
fd_set fdexcep;
@@ -34,7 +34,7 @@ void web_post_async_poll(subreq_ctx_t *req) {
CURLMcode mc = curl_multi_fdset(req->multi, &fdread, &fdwrite, &fdexcep, &maxfd);
if (mc != CURLM_OK) {
if(mc != CURLM_OK) {
req->done = TRUE;
return;
}
@@ -47,7 +47,7 @@ void web_post_async_poll(subreq_ctx_t *req) {
struct timeval timeout = {1, 0};
int rc = select(maxfd + 1, &fdread, &fdwrite, &fdexcep, &timeout);
switch (rc) {
switch(rc) {
case -1:
req->done = TRUE;
break;
@@ -64,10 +64,6 @@ void web_post_async_poll(subreq_ctx_t *req) {
req->response->size = req->response_buf.cur;
curl_easy_getinfo(req->handle, CURLINFO_RESPONSE_CODE, &req->response->status_code);
if (req->response->status_code == 0) {
LOG_ERRORF("web.c", "CURL Error: %s", req->curl_err_buffer)
}
curl_multi_cleanup(req->multi);
curl_easy_cleanup(req->handle);
curl_slist_free_all(req->headers);
@@ -75,7 +71,7 @@ void web_post_async_poll(subreq_ctx_t *req) {
}
}
subreq_ctx_t *web_post_async(const char *url, char *data, int insecure) {
subreq_ctx_t *web_post_async(const char *url, char *data) {
subreq_ctx_t *req = calloc(1, sizeof(subreq_ctx_t));
req->response = calloc(1, sizeof(response_t));
req->data = data;
@@ -88,11 +84,6 @@ subreq_ctx_t *web_post_async(const char *url, char *data, int insecure) {
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb);
curl_easy_setopt(curl, CURLOPT_POST, 1);
curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
if (insecure) {
curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0);
}
curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, req->curl_err_buffer);
struct curl_slist *headers = NULL;
headers = curl_slist_append(headers, "Content-Type: application/json");
@@ -109,7 +100,7 @@ subreq_ctx_t *web_post_async(const char *url, char *data, int insecure) {
return req;
}
response_t *web_get(const char *url, int timeout, int insecure) {
response_t *web_get(const char *url, int timeout) {
response_t *resp = malloc(sizeof(response_t));
CURL *curl;
@@ -121,24 +112,14 @@ response_t *web_get(const char *url, int timeout, int insecure) {
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb);
curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
curl_easy_setopt(curl, CURLOPT_TIMEOUT, timeout);
if (insecure) {
curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0);
}
struct curl_slist *headers = NULL;
headers = curl_slist_append(headers, "Content-Type: application/json");
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
char err_buffer[CURL_ERROR_SIZE + 1] = {};
curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, err_buffer);
curl_easy_perform(curl);
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &resp->status_code);
if (resp->status_code == 0) {
LOG_ERRORF("web.c", "CURL Error: %s", err_buffer)
}
curl_easy_cleanup(curl);
curl_slist_free_all(headers);
@@ -147,7 +128,7 @@ response_t *web_get(const char *url, int timeout, int insecure) {
return resp;
}
response_t *web_post(const char *url, const char *data, int insecure) {
response_t *web_post(const char *url, const char *data) {
response_t *resp = malloc(sizeof(response_t));
@@ -160,12 +141,6 @@ response_t *web_post(const char *url, const char *data, int insecure) {
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb);
curl_easy_setopt(curl, CURLOPT_POST, 1);
curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
if (insecure) {
curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0);
}
char err_buffer[CURL_ERROR_SIZE + 1] = {};
curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, err_buffer);
struct curl_slist *headers = NULL;
headers = curl_slist_append(headers, "Content-Type: application/json");
@@ -176,21 +151,17 @@ response_t *web_post(const char *url, const char *data, int insecure) {
curl_easy_perform(curl);
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &resp->status_code);
resp->body = buffer.buf;
resp->size = buffer.cur;
if (resp->status_code == 0) {
LOG_ERRORF("web.c", "CURL Error: %s", err_buffer)
}
curl_easy_cleanup(curl);
curl_slist_free_all(headers);
resp->body = buffer.buf;
resp->size = buffer.cur;
return resp;
}
response_t *web_put(const char *url, const char *data, int insecure) {
response_t *web_put(const char *url, const char *data) {
response_t *resp = malloc(sizeof(response_t));
@@ -204,10 +175,7 @@ response_t *web_put(const char *url, const char *data, int insecure) {
curl_easy_setopt(curl, CURLOPT_CUSTOMREQUEST, "PUT");
curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
curl_easy_setopt(curl, CURLOPT_DNS_USE_GLOBAL_CACHE, 0);
curl_easy_setopt(curl, CURLOPT_IPRESOLVE, CURLOPT_DNS_LOCAL_IP4);
if (insecure) {
curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0);
}
curl_easy_setopt(curl, CURLOPT_IPRESOLVE, CURLOPT_DNS_LOCAL_IP4 );
struct curl_slist *headers = NULL;
headers = curl_slist_append(headers, "Content-Type: application/json");
@@ -226,7 +194,7 @@ response_t *web_put(const char *url, const char *data, int insecure) {
return resp;
}
response_t *web_delete(const char *url, int insecure) {
response_t *web_delete(const char *url) {
response_t *resp = malloc(sizeof(response_t));
@@ -239,9 +207,6 @@ response_t *web_delete(const char *url, int insecure) {
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb);
curl_easy_setopt(curl, CURLOPT_CUSTOMREQUEST, "DELETE");
curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
if (insecure) {
curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0);
}
curl_easy_setopt(curl, CURLOPT_POSTFIELDS, "");
struct curl_slist *headers = NULL;

View File

@@ -25,15 +25,14 @@ typedef struct {
response_t *response;
int running_handles;
int done;
char curl_err_buffer[CURL_ERROR_SIZE + 1];
} subreq_ctx_t;
response_t *web_get(const char *url, int timeout, int insecure);
response_t *web_post(const char * url, const char * data, int insecure);
response_t *web_get(const char *url, int timeout);
response_t *web_post(const char * url, const char * data);
void web_post_async_poll(subreq_ctx_t* req);
subreq_ctx_t *web_post_async(const char *url, char *data, int insecure);
response_t *web_put(const char *url, const char *data, int insecure);
response_t *web_delete(const char *url, int insecure);
subreq_ctx_t *web_post_async(const char *url, char *data);
response_t *web_put(const char *url, const char *data);
response_t *web_delete(const char *url);
void free_response(response_t *resp);

View File

@@ -124,7 +124,9 @@ char *build_json_string(document_t *doc) {
cJSON_AddStringToObject(json, "path", "");
}
cJSON_AddStringToObject(json, "_id", doc->doc_id);
char md5_str[MD5_STR_LENGTH];
buf2hex(doc->path_md5, MD5_DIGEST_LENGTH, md5_str);
cJSON_AddStringToObject(json, "_id", md5_str);
// Metadata
meta_line_t *meta = doc->meta_head;
@@ -450,31 +452,32 @@ void read_lines(const char *path, const line_processor_t processor) {
dyn_buffer_destroy(&buf);
fclose(file);
}
void read_index_ndjson(const char *line, void *_data) {
void **data = _data;
const char *index_id = data[0];
void read_index_ndjson(const char *line, void* _data) {
void** data = _data;
const char* index_id = data[0];
index_func func = data[1];
read_index_bin_handle_line(line, index_id, func);
}
void read_index(const char *path, const char index_id[SIST_INDEX_ID_LEN], const char *type, index_func func) {
void read_index(const char *path, const char index_id[MD5_STR_LENGTH], const char *type, index_func func) {
if (strcmp(type, INDEX_TYPE_NDJSON) == 0) {
read_lines(path, (line_processor_t) {
.data = (void *[2]) {(void *) index_id, func},
.func = read_index_ndjson,
.data = (void*[2]){(void*)index_id, func} ,
.func = read_index_ndjson,
});
}
}
static __thread GHashTable *IncrementalReadTable = NULL;
void json_put_incremental(cJSON *document, UNUSED(const char doc_id[SIST_DOC_ID_LEN])) {
void json_put_incremental(cJSON *document, UNUSED(const char id_str[MD5_STR_LENGTH])) {
const char *path_md5_str = cJSON_GetObjectItem(document, "_id")->valuestring;
const int mtime = cJSON_GetObjectItem(document, "mtime")->valueint;
incremental_put(IncrementalReadTable, path_md5_str, mtime);
incremental_put_str(IncrementalReadTable, path_md5_str, mtime);
}
void incremental_read(GHashTable *table, const char *filepath, index_descriptor_t *desc) {
@@ -487,11 +490,13 @@ static __thread GHashTable *IncrementalNewTable = NULL;
static __thread store_t *IncrementalCopySourceStore = NULL;
static __thread store_t *IncrementalCopyDestinationStore = NULL;
void incremental_copy_handle_doc(cJSON *document, UNUSED(const char id_str[SIST_DOC_ID_LEN])) {
void incremental_copy_handle_doc(cJSON *document, UNUSED(const char id_str[MD5_STR_LENGTH])) {
const char *doc_id = cJSON_GetObjectItem(document, "_id")->valuestring;
const char *path_md5_str = cJSON_GetObjectItem(document, "_id")->valuestring;
unsigned char path_md5[MD5_DIGEST_LENGTH];
hex2buf(path_md5_str, MD5_STR_LENGTH - 1, path_md5);
if (cJSON_GetObjectItem(document, "parent") != NULL || incremental_get(IncrementalCopyTable, doc_id)) {
if (cJSON_GetObjectItem(document, "parent") != NULL || incremental_get_str(IncrementalCopyTable, path_md5_str)) {
// Copy index line
cJSON_DeleteItemFromObject(document, "index");
char *json_str = cJSON_PrintUnformatted(document);
@@ -505,9 +510,9 @@ void incremental_copy_handle_doc(cJSON *document, UNUSED(const char id_str[SIST_
// Copy tn store contents
size_t buf_len;
char *buf = store_read(IncrementalCopySourceStore, (char *) doc_id, SIST_DOC_ID_LEN, &buf_len);
char *buf = store_read(IncrementalCopySourceStore, (char *) path_md5, sizeof(path_md5), &buf_len);
if (buf_len != 0) {
store_write(IncrementalCopyDestinationStore, (char *) doc_id, SIST_DOC_ID_LEN, buf, buf_len);
store_write(IncrementalCopyDestinationStore, (char *) path_md5, sizeof(path_md5), buf, buf_len);
free(buf);
}
}
@@ -531,24 +536,24 @@ void incremental_copy(store_t *store, store_t *dst_store, const char *filepath,
read_index(filepath, "", INDEX_TYPE_NDJSON, incremental_copy_handle_doc);
}
void incremental_delete_handle_doc(cJSON *document, UNUSED(const char id_str[SIST_DOC_ID_LEN])) {
void incremental_delete_handle_doc(cJSON *document, UNUSED(const char id_str[MD5_STR_LENGTH])) {
char doc_id_n[SIST_DOC_ID_LEN + 1];
doc_id_n[SIST_DOC_ID_LEN] = '\0';
doc_id_n[SIST_DOC_ID_LEN - 1] = '\n';
const char *doc_id = cJSON_GetObjectItem(document, "_id")->valuestring;
char path_md5_n[MD5_STR_LENGTH + 1];
path_md5_n[MD5_STR_LENGTH] = '\0';
path_md5_n[MD5_STR_LENGTH - 1] = '\n';
const char *path_md5_str = cJSON_GetObjectItem(document, "_id")->valuestring;
// do not delete archive virtual entries
if (cJSON_GetObjectItem(document, "parent") == NULL
&& !incremental_get(IncrementalCopyTable, doc_id)
&& !incremental_get(IncrementalNewTable, doc_id)
&& !incremental_get_str(IncrementalCopyTable, path_md5_str)
&& !incremental_get_str(IncrementalNewTable, path_md5_str)
) {
memcpy(doc_id_n, doc_id, SIST_DOC_ID_LEN - 1);
zstd_write_string(doc_id, sizeof(doc_id_n));
memcpy(path_md5_n, path_md5_str, MD5_STR_LENGTH - 1);
zstd_write_string(path_md5_n, MD5_STR_LENGTH);
}
}
void incremental_delete(const char *del_filepath, const char *index_filepath,
void incremental_delete(const char *del_filepath, const char* index_filepath,
GHashTable *copy_table, GHashTable *new_table) {
if (WriterCtx.out_file == NULL) {

View File

@@ -12,7 +12,7 @@ typedef struct line_processor {
void (*func)(const char*, void*);
} line_processor_t;
typedef void(*index_func)(cJSON *, const char[SIST_DOC_ID_LEN]);
typedef void(*index_func)(cJSON *, const char[MD5_STR_LENGTH]);
void incremental_copy(store_t *store, store_t *dst_store, const char *filepath,
const char *dst_filepath, GHashTable *copy_table);
@@ -24,7 +24,7 @@ void write_document(document_t *doc);
void read_lines(const char *path, const line_processor_t processor);
void read_index(const char *path, const char index_id[SIST_INDEX_ID_LEN], const char *type, index_func);
void read_index(const char *path, const char[MD5_STR_LENGTH], const char *type, index_func);
void incremental_read(GHashTable *table, const char *filepath, index_descriptor_t *desc);
@@ -42,13 +42,13 @@ index_descriptor_t read_index_descriptor(char *path);
// caller ensures char file_path[PATH_MAX]
#define READ_INDICES(file_path, index_path, action_ok, action_main_fail, cond_original) \
snprintf(file_path, PATH_MAX, "%s_index_main.ndjson.zst", index_path); \
if (access(file_path, R_OK) == 0) { \
if (0 == access(file_path, R_OK)) { \
action_ok; \
} else { \
action_main_fail; \
} \
snprintf(file_path, PATH_MAX, "%s_index_original.ndjson.zst", index_path); \
if ((cond_original) && access(file_path, R_OK) == 0) { \
if ((cond_original) && (0 == access(file_path, R_OK))) { \
action_ok; \
} \

View File

@@ -52,7 +52,22 @@ void store_flush(store_t *store) {
void store_write(store_t *store, char *key, size_t key_len, char *buf, size_t buf_len) {
if (LogCtx.very_verbose) {
LOG_DEBUGF("store.c", "Store write %s@{%s} %lu bytes", store->path, key, buf_len)
if (key_len == MD5_DIGEST_LENGTH) {
char path_md5_str[MD5_STR_LENGTH];
buf2hex((unsigned char *) key, MD5_DIGEST_LENGTH, path_md5_str);
LOG_DEBUGF("store.c", "Store write {%s} %lu bytes", path_md5_str, buf_len)
} else if (key_len == MD5_DIGEST_LENGTH + sizeof(int)) {
char path_md5_str[MD5_STR_LENGTH];
buf2hex((unsigned char *) key, MD5_DIGEST_LENGTH, path_md5_str);
LOG_DEBUGF("store.c", "Store write {%s/%d} %lu bytes",
path_md5_str, *(int *) (key + MD5_DIGEST_LENGTH), buf_len);
} else {
LOG_DEBUGF("store.c", "Store write {%s} %lu bytes", key, buf_len)
}
}
#if (SIST_FAKE_STORE != 1)

View File

@@ -22,7 +22,7 @@ parse_job_t *create_fs_parse_job(const char *filepath, const struct stat *info,
job->vfile.info = *info;
job->parent[0] = '\0';
memset(job->parent, 0, MD5_DIGEST_LENGTH);
job->vfile.filepath = job->filepath;
job->vfile.read = fs_read;

File diff suppressed because one or more lines are too long

View File

@@ -38,8 +38,8 @@ static __sighandler_t sigabrt_handler = NULL;
void sig_handler(int signum) {
LogCtx.verbose = TRUE;
LogCtx.very_verbose = TRUE;
LogCtx.verbose = 1;
LogCtx.very_verbose = 1;
LOG_ERROR("*SIGNAL HANDLER*", "=============================================\n\n");
LOG_ERRORF("*SIGNAL HANDLER*", "Uh oh! Caught fatal signal: %s", strsignal(signum));
@@ -103,7 +103,7 @@ void sig_handler(int signum) {
exit(-1);
}
void init_dir(const char *dirpath, scan_args_t *args) {
void init_dir(const char *dirpath, scan_args_t* args) {
char path[PATH_MAX];
snprintf(path, PATH_MAX, "%sdescriptor.json", dirpath);
@@ -112,16 +112,16 @@ void init_dir(const char *dirpath, scan_args_t *args) {
strcpy(ScanCtx.index.desc.type, INDEX_TYPE_NDJSON);
if (args->incremental != NULL) {
// copy old index id
char descriptor_path[PATH_MAX];
snprintf(descriptor_path, PATH_MAX, "%sdescriptor.json", args->incremental);
index_descriptor_t original_desc = read_index_descriptor(descriptor_path);
memcpy(ScanCtx.index.desc.id, original_desc.id, sizeof(original_desc.id));
// copy old index id
char descriptor_path[PATH_MAX];
snprintf(descriptor_path, PATH_MAX, "%sdescriptor.json", args->incremental);
index_descriptor_t original_desc = read_index_descriptor(descriptor_path);
memcpy(ScanCtx.index.desc.id, original_desc.id, sizeof(original_desc.id));
} else {
// generate new index id based on timestamp
unsigned char index_md5[MD5_DIGEST_LENGTH];
MD5((unsigned char *) &ScanCtx.index.desc.timestamp, sizeof(ScanCtx.index.desc.timestamp), index_md5);
buf2hex(index_md5, MD5_DIGEST_LENGTH, ScanCtx.index.desc.id);
// genreate new index id based on timestamp
unsigned char index_md5[MD5_DIGEST_LENGTH];
MD5((unsigned char *) &ScanCtx.index.desc.timestamp, sizeof(ScanCtx.index.desc.timestamp), index_md5);
buf2hex(index_md5, MD5_DIGEST_LENGTH, ScanCtx.index.desc.id);
}
write_index_descriptor(path, &ScanCtx.index.desc);
@@ -324,13 +324,9 @@ void load_incremental_index(const scan_args_t *args) {
LOG_FATALF("main.c", "Version mismatch! Index is %s but executable is %s", original_desc.version, Version)
}
READ_INDICES(
file_path,
args->incremental,
incremental_read(ScanCtx.original_table, file_path, &original_desc),
LOG_FATALF("main.c", "Could not open original main index for incremental scan: %s", strerror(errno)),
TRUE
);
READ_INDICES(file_path, args->incremental, incremental_read(ScanCtx.original_table, file_path, &original_desc),
LOG_FATALF("main.c", "Could not open original main index for incremental scan: %s", strerror(errno)),
1);
LOG_INFOF("main.c", "Loaded %d items in to mtime table.", g_hash_table_size(ScanCtx.original_table))
}
@@ -435,8 +431,8 @@ void sist2_scan(scan_args_t *args) {
LOG_DEBUGF("main.c", "Skipped files: %d", ScanCtx.dbg_skipped_files_count)
LOG_DEBUGF("main.c", "Excluded files: %d", ScanCtx.dbg_excluded_files_count)
LOG_DEBUGF("main.c", "Failed files: %d", ScanCtx.dbg_failed_files_count)
LOG_DEBUGF("main.c", "Thumbnail store size: %lu", ScanCtx.stat_tn_size)
LOG_DEBUGF("main.c", "Index size: %lu", ScanCtx.stat_index_size)
LOG_DEBUGF("main.c", "Thumbnail store size: %d", ScanCtx.stat_tn_size)
LOG_DEBUGF("main.c", "Index size: %d", ScanCtx.stat_index_size)
if (args->incremental != NULL) {
save_incremental_index(args);
@@ -453,7 +449,6 @@ void sist2_index(index_args_t *args) {
IndexCtx.es_url = args->es_url;
IndexCtx.es_index = args->es_index;
IndexCtx.es_insecure_ssl = args->es_insecure_ssl;
IndexCtx.batch_size = args->batch_size;
IndexCtx.needs_es_connection = !args->print;
@@ -539,8 +534,6 @@ void sist2_exec_script(exec_args_t *args) {
IndexCtx.es_url = args->es_url;
IndexCtx.es_index = args->es_index;
IndexCtx.es_insecure_ssl = args->es_insecure_ssl;
IndexCtx.needs_es_connection = TRUE;
LOG_DEBUGF("main.c", "descriptor version %s (%s)", desc.version, desc.type)
@@ -552,7 +545,6 @@ void sist2_web(web_args_t *args) {
WebCtx.es_url = args->es_url;
WebCtx.es_index = args->es_index;
WebCtx.es_insecure_ssl = args->es_insecure_ssl;
WebCtx.index_count = args->index_count;
WebCtx.auth_user = args->auth_user;
WebCtx.auth_pass = args->auth_pass;
@@ -623,7 +615,6 @@ int main(int argc, const char *argv[]) {
int arg_version = 0;
char *common_es_url = NULL;
int common_es_insecure_ssl = 0;
char *common_es_index = NULL;
char *common_script_path = NULL;
int common_async_script = 0;
@@ -689,7 +680,6 @@ int main(int argc, const char *argv[]) {
OPT_GROUP("Index options"),
OPT_INTEGER('t', "threads", &common_threads, "Number of threads. DEFAULT=1"),
OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url with port. DEFAULT=http://localhost:9200"),
OPT_BOOLEAN(0, "es-insecure-ssl", &common_es_insecure_ssl, "Do not verify SSL connections to Elasticsearch."),
OPT_STRING(0, "es-index", &common_es_index, "Elasticsearch index name. DEFAULT=sist2"),
OPT_BOOLEAN('p', "print", &index_args->print, "Just print JSON documents to stdout."),
OPT_BOOLEAN(0, "incremental-index", &index_args->incremental,
@@ -704,7 +694,6 @@ int main(int argc, const char *argv[]) {
OPT_GROUP("Web options"),
OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url. DEFAULT=http://localhost:9200"),
OPT_BOOLEAN(0, "es-insecure-ssl", &common_es_insecure_ssl, "Do not verify SSL connections to Elasticsearch."),
OPT_STRING(0, "es-index", &common_es_index, "Elasticsearch index name. DEFAULT=sist2"),
OPT_STRING(0, "bind", &web_args->listen_address, "Listen on this address. DEFAULT=localhost:4090"),
OPT_STRING(0, "auth", &web_args->credentials, "Basic auth in user:password format"),
@@ -715,7 +704,6 @@ int main(int argc, const char *argv[]) {
OPT_GROUP("Exec-script options"),
OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url. DEFAULT=http://localhost:9200"),
OPT_BOOLEAN(0, "es-insecure-ssl", &common_es_insecure_ssl, "Do not verify SSL connections to Elasticsearch."),
OPT_STRING(0, "es-index", &common_es_index, "Elasticsearch index name. DEFAULT=sist2"),
OPT_STRING(0, "script-file", &common_script_path, "Path to user script."),
OPT_BOOLEAN(0, "async-script", &common_async_script, "Execute user script asynchronously."),
@@ -745,10 +733,6 @@ int main(int argc, const char *argv[]) {
index_args->es_index = common_es_index;
exec_args->es_index = common_es_index;
web_args->es_insecure_ssl = common_es_insecure_ssl;
index_args->es_insecure_ssl = common_es_insecure_ssl;
exec_args->es_insecure_ssl = common_es_insecure_ssl;
index_args->script_path = common_script_path;
exec_args->script_path = common_script_path;
index_args->threads = common_threads;
@@ -792,8 +776,9 @@ int main(int argc, const char *argv[]) {
sist2_exec_script(exec_args);
} else {
fprintf(stderr, "Invalid command: '%s'\n", argv[0]);
argparse_usage(&argparse);
LOG_FATALF("main.c", "Invalid command: '%s'\n", argv[0])
goto end;
}
printf("\n");

View File

@@ -5,7 +5,6 @@
#include "mime.h"
#include "src/io/serialize.h"
#include "src/parsing/sidecar.h"
#include "src/magic_generated.c"
#include <magic.h>
@@ -70,7 +69,7 @@ void parse(void *arg) {
doc->base = (short) job->base;
char *rel_path = doc->filepath + ScanCtx.index.desc.root_len;
generate_doc_id(rel_path, doc->doc_id);
MD5((unsigned char *) rel_path, strlen(rel_path), doc->path_md5);
doc->meta_head = NULL;
doc->meta_tail = NULL;
@@ -78,10 +77,10 @@ void parse(void *arg) {
doc->size = job->vfile.info.st_size;
doc->mtime = (int) job->vfile.info.st_mtim.tv_sec;
int inc_ts = incremental_get(ScanCtx.original_table, doc->doc_id);
int inc_ts = incremental_get(ScanCtx.original_table, doc->path_md5);
if (inc_ts != 0 && inc_ts == job->vfile.info.st_mtim.tv_sec) {
pthread_mutex_lock(&ScanCtx.copy_table_mu);
incremental_mark_file(ScanCtx.copy_table, doc->doc_id);
incremental_mark_file(ScanCtx.copy_table, doc->path_md5);
pthread_mutex_unlock(&ScanCtx.copy_table_mu);
pthread_mutex_lock(&ScanCtx.dbg_file_counts_mu);
@@ -97,14 +96,16 @@ void parse(void *arg) {
if (ScanCtx.new_table != NULL) {
pthread_mutex_lock(&ScanCtx.copy_table_mu);
incremental_mark_file(ScanCtx.new_table, doc->doc_id);
incremental_mark_file(ScanCtx.new_table, doc->path_md5);
pthread_mutex_unlock(&ScanCtx.copy_table_mu);
}
char *buf[MAGIC_BUF_SIZE];
if (LogCtx.very_verbose) {
LOG_DEBUGF(job->filepath, "Starting parse job {%s}", doc->doc_id)
char path_md5_str[MD5_STR_LENGTH];
buf2hex(doc->path_md5, MD5_DIGEST_LENGTH, path_md5_str);
LOG_DEBUGF(job->filepath, "Starting parse job {%s}", path_md5_str)
}
if (job->vfile.info.st_size == 0) {
@@ -144,15 +145,7 @@ void parse(void *arg) {
}
magic_t magic = magic_open(MAGIC_MIME_TYPE);
const char *magic_buffers[1] = {magic_database_buffer,};
size_t sizes[1] = {sizeof(magic_database_buffer),};
int load_ret = magic_load_buffers(magic, (void **) &magic_buffers, sizes, 1);
if (load_ret != 0) {
LOG_FATALF("parse.c", "Could not load libmagic database: (%d)", load_ret)
}
magic_load(magic, NULL);
const char *magic_mime_str = magic_buffer(magic, buf, bytes_read);
if (magic_mime_str != NULL) {
@@ -225,10 +218,10 @@ void parse(void *arg) {
abort:
//Parent meta
if (job->parent[0] != '\0') {
meta_line_t *meta_parent = malloc(sizeof(meta_line_t) + SIST_INDEX_ID_LEN);
if (!md5_digest_is_null(job->parent)) {
meta_line_t *meta_parent = malloc(sizeof(meta_line_t) + MD5_STR_LENGTH);
meta_parent->key = MetaParent;
strcpy(meta_parent->str_val, job->parent);
buf2hex(job->parent, MD5_DIGEST_LENGTH, meta_parent->str_val);
APPEND_META((doc), meta_parent)
doc->has_parent = TRUE;

View File

@@ -23,19 +23,16 @@ void parse_sidecar(vfile_t *vfile, document_t *doc) {
}
char *json_str = cJSON_PrintUnformatted(json);
char assoc_doc_id[SIST_DOC_ID_LEN];
unsigned char path_md5[MD5_DIGEST_LENGTH];
MD5((unsigned char *) vfile->filepath + ScanCtx.index.desc.root_len, doc->ext - 1 - ScanCtx.index.desc.root_len,
path_md5);
char rel_path[PATH_MAX];
size_t rel_path_len = doc->ext - 1 - ScanCtx.index.desc.root_len;
memcpy(rel_path, vfile->filepath + ScanCtx.index.desc.root_len, rel_path_len);
*(rel_path + rel_path_len) = '\0';
char path_md5_str[MD5_STR_LENGTH];
buf2hex(path_md5, MD5_DIGEST_LENGTH, path_md5_str);
generate_doc_id(rel_path, assoc_doc_id);
store_write(ScanCtx.index.meta_store, assoc_doc_id, sizeof(assoc_doc_id), json_str,
strlen(json_str) + 1);
store_write(ScanCtx.index.meta_store, path_md5_str, MD5_STR_LENGTH, json_str, strlen(json_str) + 1);
cJSON_Delete(json);
free(json_str);
free(buf);
}
}

View File

@@ -27,6 +27,10 @@
#define UNUSED(x) __attribute__((__unused__)) x
#define MD5_STR_LENGTH 33
#define SHA1_STR_LENGTH 41
#define SHA1_DIGEST_LENGTH 20
#include "util.h"
#include "log.h"
#include "types.h"
@@ -49,7 +53,7 @@
#include <ctype.h>
#include "git_hash.h"
#define VERSION "2.12.1"
#define VERSION "2.11.7"
static const char *const Version = VERSION;
#ifndef SIST_PLATFORM

View File

@@ -20,7 +20,7 @@ typedef struct {
long count;
} agg_t;
void fill_tables(cJSON *document, UNUSED(const char index_id[SIST_INDEX_ID_LEN])) {
void fill_tables(cJSON *document, UNUSED(const char index_id[MD5_STR_LENGTH])) {
if (cJSON_GetObjectItem(document, "parent") != NULL) {
return;

View File

@@ -4,7 +4,7 @@
#define INDEX_TYPE_NDJSON "ndjson"
typedef struct index_descriptor {
char id[SIST_INDEX_ID_LEN];
char id[MD5_STR_LENGTH];
char version[64];
long timestamp;
char root[PATH_MAX];

View File

@@ -10,6 +10,8 @@
#include "third-party/utf8.h/utf8.h"
#include "libscan/scan.h"
#define MD5_STR_LENGTH 33
char *abspath(const char *path);
@@ -92,24 +94,40 @@ static void buf2hex(const unsigned char *buf, size_t buflen, char *hex_string) {
__always_inline
static void generate_doc_id(const char *rel_path, char *doc_id) {
unsigned char md[MD5_DIGEST_LENGTH];
MD5((unsigned char *) rel_path, strlen(rel_path), md);
buf2hex(md, sizeof(md), doc_id);
static int md5_digest_is_null(const unsigned char digest[MD5_DIGEST_LENGTH]) {
return (*(int64_t *) digest) == 0 && (*((int64_t *) digest + 1)) == 0;
}
__always_inline
static void incremental_put(GHashTable *table, const char doc_id[SIST_DOC_ID_LEN], int mtime) {
char *ptr = malloc(SIST_DOC_ID_LEN);
strcpy(ptr, doc_id);
static void incremental_put(GHashTable *table, const unsigned char path_md5[MD5_DIGEST_LENGTH], int mtime) {
char *ptr = malloc(MD5_STR_LENGTH);
buf2hex(path_md5, MD5_DIGEST_LENGTH, ptr);
g_hash_table_insert(table, ptr, GINT_TO_POINTER(mtime));
}
__always_inline
static int incremental_get(GHashTable *table, const char doc_id[SIST_DOC_ID_LEN]) {
static void incremental_put_str(GHashTable *table, const char *path_md5, int mtime) {
char *ptr = malloc(MD5_STR_LENGTH);
strcpy(ptr, path_md5);
g_hash_table_insert(table, ptr, GINT_TO_POINTER(mtime));
}
__always_inline
static int incremental_get(GHashTable *table, const unsigned char path_md5[MD5_DIGEST_LENGTH]) {
if (table != NULL) {
return GPOINTER_TO_INT(g_hash_table_lookup(table, doc_id));
char md5_str[MD5_STR_LENGTH];
buf2hex(path_md5, MD5_DIGEST_LENGTH, md5_str);
return GPOINTER_TO_INT(g_hash_table_lookup(table, md5_str));
} else {
return 0;
}
}
__always_inline
static int incremental_get_str(GHashTable *table, const char *path_md5) {
if (table != NULL) {
return GPOINTER_TO_INT(g_hash_table_lookup(table, path_md5));
} else {
return 0;
}
@@ -120,9 +138,9 @@ static int incremental_get(GHashTable *table, const char doc_id[SIST_DOC_ID_LEN]
* !!Not thread safe.
*/
__always_inline
static int incremental_mark_file(GHashTable *table, const char doc_id[SIST_DOC_ID_LEN]) {
char *ptr = malloc(SIST_DOC_ID_LEN);
strcpy(ptr, doc_id);
static int incremental_mark_file(GHashTable *table, const unsigned char path_md5[MD5_DIGEST_LENGTH]) {
char *ptr = malloc(MD5_STR_LENGTH);
buf2hex(path_md5, MD5_DIGEST_LENGTH, ptr);
return g_hash_table_insert(table, ptr, GINT_TO_POINTER(1));
}

View File

@@ -36,7 +36,7 @@ static void send_response_line(struct mg_connection *nc, int status_code, size_t
index_t *get_index_by_id(const char *index_id) {
for (int i = WebCtx.index_count; i >= 0; i--) {
if (strncmp(index_id, WebCtx.indices[i].desc.id, SIST_INDEX_ID_LEN) == 0) {
if (strncmp(index_id, WebCtx.indices[i].desc.id, MD5_STR_LENGTH) == 0) {
return &WebCtx.indices[i];
}
}
@@ -70,23 +70,23 @@ void search_index(struct mg_connection *nc, struct mg_http_message *hm) {
void stats_files(struct mg_connection *nc, struct mg_http_message *hm) {
if (hm->uri.len != SIST_INDEX_ID_LEN + 4) {
if (hm->uri.len != MD5_STR_LENGTH + 4) {
HTTP_REPLY_NOT_FOUND
return;
}
char arg_index_id[SIST_INDEX_ID_LEN];
memcpy(arg_index_id, hm->uri.ptr + 3, SIST_INDEX_ID_LEN);
*(arg_index_id + SIST_INDEX_ID_LEN - 1) = '\0';
char arg_md5[MD5_STR_LENGTH];
memcpy(arg_md5, hm->uri.ptr + 3, MD5_STR_LENGTH);
*(arg_md5 + MD5_STR_LENGTH - 1) = '\0';
index_t *index = get_index_by_id(arg_index_id);
index_t *index = get_index_by_id(arg_md5);
if (index == NULL) {
HTTP_REPLY_NOT_FOUND
return;
}
const char *file;
switch (atoi(hm->uri.ptr + 3 + SIST_INDEX_ID_LEN)) {
switch (atoi(hm->uri.ptr + 3 + MD5_STR_LENGTH)) {
case 1:
file = "treemap.csv";
break;
@@ -150,25 +150,28 @@ void style_vendor(struct mg_connection *nc, struct mg_http_message *hm) {
void thumbnail(struct mg_connection *nc, struct mg_http_message *hm) {
int has_thumbnail_index = FALSE;
int parse_tn_num = FALSE;
if (hm->uri.len != SIST_INDEX_ID_LEN + SIST_DOC_ID_LEN + 2) {
if (hm->uri.len != 68) {
if (hm->uri.len != SIST_INDEX_ID_LEN + SIST_DOC_ID_LEN + 2 + 4) {
if (hm->uri.len != 68 + 4) {
LOG_DEBUGF("serve.c", "Invalid thumbnail path: %.*s", (int) hm->uri.len, hm->uri.ptr)
HTTP_REPLY_NOT_FOUND
return;
}
has_thumbnail_index = TRUE;
parse_tn_num = TRUE;
}
char arg_doc_id[SIST_DOC_ID_LEN];
char arg_index[SIST_INDEX_ID_LEN];
char arg_file_md5[MD5_STR_LENGTH];
char arg_index[MD5_STR_LENGTH];
memcpy(arg_index, hm->uri.ptr + 3, SIST_INDEX_ID_LEN);
*(arg_index + SIST_INDEX_ID_LEN - 1) = '\0';
memcpy(arg_doc_id, hm->uri.ptr + 3 + SIST_INDEX_ID_LEN, SIST_DOC_ID_LEN);
*(arg_doc_id + SIST_DOC_ID_LEN - 1) = '\0';
memcpy(arg_index, hm->uri.ptr + 3, MD5_STR_LENGTH);
*(arg_index + MD5_STR_LENGTH - 1) = '\0';
memcpy(arg_file_md5, hm->uri.ptr + 3 + MD5_STR_LENGTH, MD5_STR_LENGTH);
*(arg_file_md5 + MD5_STR_LENGTH - 1) = '\0';
unsigned char md5_buf[MD5_DIGEST_LENGTH];
hex2buf(arg_file_md5, MD5_STR_LENGTH - 1, md5_buf);
store_t *store = get_store(arg_index);
if (store == NULL) {
@@ -180,18 +183,16 @@ void thumbnail(struct mg_connection *nc, struct mg_http_message *hm) {
char *data;
size_t data_len = 0;
if (has_thumbnail_index) {
const char *tn_index = hm->uri.ptr + SIST_INDEX_ID_LEN + SIST_DOC_ID_LEN + 2;
if (parse_tn_num) {
int tn_num = atoi(hm->uri.ptr + 68);
char tn_key[sizeof(arg_doc_id) + sizeof(char) * 4];
memcpy(tn_key, arg_doc_id, sizeof(arg_doc_id));
memcpy(tn_key + sizeof(arg_doc_id) - 1, tn_index, sizeof(char) * 4);
*(tn_key + sizeof(tn_key) - 1) = '\0';
char tn_key[sizeof(md5_buf) + sizeof(int)];
memcpy(tn_key, md5_buf, sizeof(md5_buf));
memcpy(tn_key + sizeof(md5_buf), &tn_num, sizeof(tn_num));
data = store_read(store, (char *) tn_key, sizeof(tn_key), &data_len);
} else {
data = store_read(store, (char *) arg_doc_id, sizeof(arg_doc_id), &data_len);
data = store_read(store, (char *) md5_buf, sizeof(md5_buf), &data_len);
}
if (data_len != 0) {
@@ -212,7 +213,7 @@ void search(struct mg_connection *nc, struct mg_http_message *hm) {
if (hm->body.len == 0) {
LOG_DEBUG("serve.c", "Client sent empty body, ignoring request")
mg_http_reply(nc, 400, HTTP_SERVER_HEADER HTTP_TEXT_TYPE_HEADER, "Invalid request");
mg_http_reply(nc, 500, HTTP_SERVER_HEADER HTTP_TEXT_TYPE_HEADER, "Invalid request");
return;
}
@@ -223,7 +224,7 @@ void search(struct mg_connection *nc, struct mg_http_message *hm) {
char url[4096];
snprintf(url, 4096, "%s/%s/_search", WebCtx.es_url, WebCtx.es_index);
nc->fn_data = web_post_async(url, body, WebCtx.es_insecure_ssl);
nc->fn_data = web_post_async(url, body);
}
void serve_file_from_url(cJSON *json, index_t *idx, struct mg_connection *nc) {
@@ -282,7 +283,7 @@ void serve_file_from_disk(cJSON *json, index_t *idx, struct mg_connection *nc, s
char disposition[8192];
snprintf(disposition, sizeof(disposition),
HTTP_SERVER_HEADER "Content-Disposition: inline; filename=\"%s%s%s\"\r\n"
"Accept-Ranges: bytes\r\nCache-Control: no-store\r\n",
"Accept-Ranges: bytes\r\nCache-Control: no-store\r\n",
name, strlen(ext) == 0 ? "" : ".", ext);
char mime_mapping[1024];
@@ -302,7 +303,7 @@ void cache_es_version() {
return;
}
es_version_t *es_version = elastic_get_version(WebCtx.es_url, WebCtx.es_insecure_ssl);
es_version_t *es_version = elastic_get_version(WebCtx.es_url);
if (es_version != NULL) {
WebCtx.es_version = es_version;
is_cached = TRUE;
@@ -313,20 +314,15 @@ void index_info(struct mg_connection *nc) {
cache_es_version();
const char *es_version = "0.0.0";
if (WebCtx.es_version != NULL) {
es_version = format_es_version(WebCtx.es_version);
}
cJSON *json = cJSON_CreateObject();
cJSON *arr = cJSON_AddArrayToObject(json, "indices");
cJSON_AddStringToObject(json, "mongooseVersion", MG_VERSION);
cJSON_AddStringToObject(json, "esIndex", WebCtx.es_index);
cJSON_AddStringToObject(json, "version", Version);
cJSON_AddStringToObject(json, "esVersion", es_version);
cJSON_AddStringToObject(json, "esVersion", format_es_version(WebCtx.es_version));
cJSON_AddBoolToObject(json, "esVersionSupported", IS_SUPPORTED_ES_VERSION(WebCtx.es_version));
cJSON_AddBoolToObject(json, "esVersionLegacy", IS_LEGACY_VERSION(WebCtx.es_version));
cJSON_AddBoolToObject(json, "esVersionLegacy", USE_LEGACY_ES_SETTINGS(WebCtx.es_version));
cJSON_AddStringToObject(json, "platform", QUOTE(SIST_PLATFORM));
cJSON_AddStringToObject(json, "sist2Hash", Sist2CommitHash);
cJSON_AddStringToObject(json, "lang", WebCtx.lang);
@@ -359,19 +355,55 @@ void index_info(struct mg_connection *nc) {
}
void document_info(struct mg_connection *nc, struct mg_http_message *hm) {
if (hm->uri.len != MD5_STR_LENGTH + 2) {
LOG_DEBUGF("serve.c", "Invalid document_info path: %.*s", (int) hm->uri.len, hm->uri.ptr)
HTTP_REPLY_NOT_FOUND
return;
}
char arg_md5[MD5_STR_LENGTH];
memcpy(arg_md5, hm->uri.ptr + 3, MD5_STR_LENGTH);
*(arg_md5 + MD5_STR_LENGTH - 1) = '\0';
cJSON *doc = elastic_get_document(arg_md5);
cJSON *source = cJSON_GetObjectItem(doc, "_source");
cJSON *index_id = cJSON_GetObjectItem(source, "index");
if (index_id == NULL) {
cJSON_Delete(doc);
HTTP_REPLY_NOT_FOUND
return;
}
index_t *idx = get_index_by_id(index_id->valuestring);
if (idx == NULL) {
cJSON_Delete(doc);
HTTP_REPLY_NOT_FOUND
return;
}
char *json_str = cJSON_PrintUnformatted(source);
send_response_line(nc, 200, (int) strlen(json_str), "Content-Type: application/json");
mg_send(nc, json_str, (int) strlen(json_str));
free(json_str);
cJSON_Delete(doc);
}
void file(struct mg_connection *nc, struct mg_http_message *hm) {
if (hm->uri.len != SIST_DOC_ID_LEN + 2) {
if (hm->uri.len != MD5_STR_LENGTH + 2) {
LOG_DEBUGF("serve.c", "Invalid file path: %.*s", (int) hm->uri.len, hm->uri.ptr)
HTTP_REPLY_NOT_FOUND
return;
}
char arg_doc_id[SIST_DOC_ID_LEN];
memcpy(arg_doc_id, hm->uri.ptr + 3, SIST_DOC_ID_LEN);
*(arg_doc_id + SIST_DOC_ID_LEN - 1) = '\0';
char arg_md5[MD5_STR_LENGTH];
memcpy(arg_md5, hm->uri.ptr + 3, MD5_STR_LENGTH);
*(arg_md5 + MD5_STR_LENGTH - 1) = '\0';
const char *next = arg_doc_id;
const char *next = arg_md5;
cJSON *doc = NULL;
cJSON *index_id = NULL;
cJSON *source = NULL;
@@ -422,6 +454,7 @@ void status(struct mg_connection *nc) {
typedef struct {
char *name;
int delete;
char *path_md5_str;
char *doc_id;
} tag_req_t;
@@ -441,6 +474,12 @@ tag_req_t *parse_tag_request(cJSON *json) {
return NULL;
}
cJSON *arg_path_md5 = cJSON_GetObjectItem(json, "path_md5");
if (arg_path_md5 == NULL || !cJSON_IsString(arg_path_md5) ||
strlen(arg_path_md5->valuestring) != MD5_STR_LENGTH - 1) {
return NULL;
}
cJSON *arg_doc_id = cJSON_GetObjectItem(json, "doc_id");
if (arg_doc_id == NULL || !cJSON_IsString(arg_doc_id)) {
return NULL;
@@ -449,21 +488,22 @@ tag_req_t *parse_tag_request(cJSON *json) {
tag_req_t *req = malloc(sizeof(tag_req_t));
req->delete = arg_delete->valueint;
req->name = arg_name->valuestring;
req->path_md5_str = arg_path_md5->valuestring;
req->doc_id = arg_doc_id->valuestring;
return req;
}
void tag(struct mg_connection *nc, struct mg_http_message *hm) {
if (hm->uri.len != SIST_INDEX_ID_LEN + 4) {
if (hm->uri.len != MD5_STR_LENGTH + 4) {
LOG_DEBUGF("serve.c", "Invalid tag path: %.*s", (int) hm->uri.len, hm->uri.ptr)
HTTP_REPLY_NOT_FOUND
return;
}
char arg_index[SIST_INDEX_ID_LEN];
memcpy(arg_index, hm->uri.ptr + 5, SIST_INDEX_ID_LEN);
*(arg_index + SIST_INDEX_ID_LEN - 1) = '\0';
char arg_index[MD5_STR_LENGTH];
memcpy(arg_index, hm->uri.ptr + 5, MD5_STR_LENGTH);
*(arg_index + MD5_STR_LENGTH - 1) = '\0';
if (hm->body.len < 2 || hm->method.len != 4 || memcmp(&hm->method, "POST", 4) == 0) {
LOG_DEBUG("serve.c", "Invalid tag request")
@@ -495,7 +535,7 @@ void tag(struct mg_connection *nc, struct mg_http_message *hm) {
cJSON *arr = NULL;
size_t data_len = 0;
const char *data = store_read(store, arg_req->doc_id, SIST_DOC_ID_LEN, &data_len);
const char *data = store_read(store, arg_req->path_md5_str, MD5_STR_LENGTH, &data_len);
if (data_len == 0) {
arr = cJSON_CreateArray();
} else {
@@ -531,7 +571,7 @@ void tag(struct mg_connection *nc, struct mg_http_message *hm) {
char url[4096];
snprintf(url, sizeof(url), "%s/%s/_update/%s", WebCtx.es_url, WebCtx.es_index, arg_req->doc_id);
nc->fn_data = web_post_async(url, buf, WebCtx.es_insecure_ssl);
nc->fn_data = web_post_async(url, buf);
} else {
cJSON_AddItemToArray(arr, cJSON_CreateString(arg_req->name));
@@ -551,11 +591,11 @@ void tag(struct mg_connection *nc, struct mg_http_message *hm) {
char url[4096];
snprintf(url, sizeof(url), "%s/%s/_update/%s", WebCtx.es_url, WebCtx.es_index, arg_req->doc_id);
nc->fn_data = web_post_async(url, buf, WebCtx.es_insecure_ssl);
nc->fn_data = web_post_async(url, buf);
}
char *json_str = cJSON_PrintUnformatted(arr);
store_write(store, arg_req->doc_id, SIST_DOC_ID_LEN, json_str, strlen(json_str) + 1);
store_write(store, arg_req->path_md5_str, MD5_STR_LENGTH, json_str, strlen(json_str) + 1);
store_flush(store);
free(arg_req);
@@ -617,6 +657,8 @@ static void ev_router(struct mg_connection *nc, int ev, void *ev_data, UNUSED(vo
return;
}
tag(nc, hm);
} else if (mg_http_match_uri(hm, "/d/*")) {
document_info(nc, hm);
} else {
HTTP_REPLY_NOT_FOUND
}

File diff suppressed because one or more lines are too long

View File

@@ -35,20 +35,10 @@ def sist2_index(files, *args):
path = copy_files(files)
shutil.rmtree("test_i", ignore_errors=True)
sist2("scan", path, "-o", "test_i", "-t12", *args)
sist2("scan", path, "-o", "test_i", *args)
return iter(sist2_index_to_dict("test_i"))
def get_lmdb_contents(path):
import lmdb
env = lmdb.open(path)
txn = env.begin(write=False)
return dict((k, v) for k, v in txn.cursor())
def sist2_incremental_index(files, func=None, incremental_index=False, *args):
path = copy_files(files)
@@ -56,7 +46,7 @@ def sist2_incremental_index(files, func=None, incremental_index=False, *args):
func(path)
shutil.rmtree("test_i_inc", ignore_errors=True)
sist2("scan", path, "-o", "test_i_inc", "--incremental", "test_i", "-t12", *args)
sist2("scan", path, "-o", "test_i_inc", "--incremental", "test_i", *args)
return iter(sist2_index_to_dict("test_i_inc", incremental_index))
@@ -86,31 +76,9 @@ class ScanTest(unittest.TestCase):
pass
file_count = sum(1 for _ in sist2_index(TEST_FILES))
lmdb_full = get_lmdb_contents("test_i/thumbs")
# Remove files
num_files_rm1 = len(list(sist2_incremental_index(TEST_FILES, remove_files)))
lmdb_rm1 = get_lmdb_contents("test_i_inc/thumbs")
self.assertEqual(num_files_rm1, file_count - 2)
self.assertEqual(len(set(lmdb_full.keys() - set(lmdb_rm1.keys()))), 2)
# add files (incremental_index=True)
num_files_add_inc = len(list(sist2_incremental_index(TEST_FILES, add_files, incremental_index=True)))
lmdb_add_inc = get_lmdb_contents("test_i_inc/thumbs")
self.assertEqual(num_files_add_inc, 3)
self.assertEqual(set(lmdb_full.keys()), set(lmdb_add_inc.keys()))
# add files
num_files_add = len(list(sist2_incremental_index(TEST_FILES, add_files)))
lmdb_add = get_lmdb_contents("test_i_inc/thumbs")
self.assertEqual(num_files_add, file_count + 3)
self.assertEqual(set(lmdb_full.keys()), set(lmdb_add.keys()))
# (No action)
sist2_incremental_index(TEST_FILES)
lmdb_inc = get_lmdb_contents("test_i_inc/thumbs")
self.assertEqual(set(lmdb_full.keys()), set(lmdb_inc.keys()))
self.assertEqual(sum(1 for _ in sist2_incremental_index(TEST_FILES, remove_files)), file_count - 2)
self.assertEqual(sum(1 for _ in sist2_incremental_index(TEST_FILES, add_files, incremental_index=True)), 3)
self.assertEqual(sum(1 for _ in sist2_incremental_index(TEST_FILES, add_files)), file_count + 3)
if __name__ == "__main__":

View File

@@ -6,11 +6,26 @@ set(CMAKE_C_STANDARD 11)
option(BUILD_TESTS "Build tests" on)
add_subdirectory(third-party/antiword)
set(USE_LIBXML2 OFF CACHE BOOL "" FORCE)
set(USE_XMLWRITER OFF CACHE BOOL "" FORCE)
set(BUILD_SHARED_LIBS OFF CACHE BOOL "" FORCE)
add_subdirectory(third-party/libmobi)
if (SIST_DEBUG)
add_compile_definitions(
antiword
DEBUG
)
target_compile_options(
antiword
PRIVATE
-g
-fstack-protector
-fno-omit-frame-pointer
-fsanitize=address
-fno-inline
)
else()
add_compile_definitions(
antiword
NDEBUG
)
endif()
add_library(
scan
@@ -33,54 +48,6 @@ add_library(
libscan/mobi/scan_mobi.c libscan/mobi/scan_mobi.h libscan/raw/raw.c libscan/raw/raw.h)
set_target_properties(scan PROPERTIES LINKER_LANGUAGE C)
if (SIST_DEBUG)
add_compile_definitions(
antiword
DEBUG
)
target_compile_options(
antiword
PRIVATE
-g
-fstack-protector
-fno-omit-frame-pointer
-fsanitize=address
-fno-inline
)
elseif (SIST_FAST)
add_compile_definitions(
antiword
NDEBUG
)
target_compile_options(
scan
PRIVATE
-Ofast
-march=native
-fno-stack-protector
-fomit-frame-pointer
-freciprocal-math
)
else()
add_compile_definitions(
antiword
NDEBUG
)
target_compile_options(
scan
PRIVATE
-Ofast
#-march=native
-fno-stack-protector
-fomit-frame-pointer
#-freciprocal-math
)
endif()
set(CMAKE_FIND_LIBRARY_SUFFIXES .a .lib .so)
find_package(cJSON CONFIG REQUIRED)
@@ -118,15 +85,35 @@ target_compile_options(
-g
)
include(ExternalProject)
find_program(MAKE_EXE NAMES gmake nmake make)
ExternalProject_Add(
libmobi
GIT_REPOSITORY https://github.com/simon987/libmobi.git
GIT_TAG "public"
UPDATE_COMMAND ""
PATCH_COMMAND ""
TEST_COMMAND ""
CONFIGURE_COMMAND ./autogen.sh && ./configure
INSTALL_COMMAND ""
PREFIX "third-party/ext_libmobi"
SOURCE_DIR "third-party/ext_libmobi/src/libmobi"
BINARY_DIR "third-party/ext_libmobi/src/libmobi"
BUILD_COMMAND ${MAKE_EXE} -j 8 --silent
)
SET(MOBI_LIB_DIR ${CMAKE_CURRENT_BINARY_DIR}/third-party/ext_libmobi/src/libmobi/src/.libs/)
SET(MOBI_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/third-party/ext_libmobi/src/libmobi/src/)
if (SIST_DEBUG)
SET(FFMPEG_DEBUG "--enable-debug=3" "--disable-optimizations")
else()
SET(FFMPEG_DEBUG "")
endif()
include(ExternalProject)
find_program(MAKE_EXE NAMES gmake nmake make)
ExternalProject_Add(
ffmpeg
GIT_REPOSITORY https://git.ffmpeg.org/ffmpeg.git
@@ -172,10 +159,10 @@ SET(WPD_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/third-party/ext_libwpd/src/libwp
add_dependencies(
scan
libmobi
ffmpeg
antiword
libwpd
mobi
)
target_link_libraries(
@@ -193,6 +180,8 @@ target_link_libraries(
${MUPDF_LIB}
openjp2
${MOBI_LIB_DIR}/libmobi.a
${WPD_LIB_DIR}/libwpd-0.9.a
${WPD_LIB_DIR}/libwpd-stream-0.9.a
@@ -229,7 +218,6 @@ target_link_libraries(
${GUMBO_LIB}
dl
antiword
mobi
unofficial::pcre::pcre unofficial::pcre::pcre16 unofficial::pcre::pcre32 unofficial::pcre::pcrecpp
)

View File

@@ -202,7 +202,7 @@ scan_code_t parse_archive(scan_arc_ctx_t *ctx, vfile_t *f, document_t *doc, pcre
sub_job->vfile.logf = ctx->logf;
sub_job->vfile.has_checksum = FALSE;
sub_job->vfile.calculate_checksum = f->calculate_checksum;
strcpy(sub_job->parent, doc->doc_id);
memcpy(sub_job->parent, doc->path_md5, MD5_DIGEST_LENGTH);
while (archive_read_next_header(a, &entry) == ARCHIVE_OK) {
sub_job->vfile.info = *archive_entry_stat(entry);

View File

@@ -156,7 +156,7 @@ int render_cover(scan_ebook_ctx_t *ctx, fz_context *fzctx, document_t *doc, fz_d
avcodec_receive_packet(jpeg_encoder, &jpeg_packet);
APPEND_LONG_META(doc, MetaThumbnail, 1)
ctx->store(doc->doc_id, sizeof(doc->doc_id), (char *) jpeg_packet.data, jpeg_packet.size);
ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) jpeg_packet.data, jpeg_packet.size);
free(samples);
av_packet_unref(&jpeg_packet);

View File

@@ -232,7 +232,7 @@ void parse_font(scan_font_ctx_t *ctx, vfile_t *f, document_t *doc) {
bmp_format(&bmp_data, dimensions, bitmap);
APPEND_LONG_META(doc, MetaThumbnail, 1)
ctx->store(doc->doc_id, sizeof(doc->doc_id), (char *) bmp_data.buf, bmp_data.cur);
ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) bmp_data.buf, bmp_data.cur);
dyn_buffer_destroy(&bmp_data);
free(bitmap);

View File

@@ -20,10 +20,8 @@
#undef ABS
#define ABS(a) (((a) < 0) ? -(a) : (a))
#define SHA1_DIGEST_LENGTH SHA_DIGEST_LENGTH
#define SHA1_STR_LENGTH (SHA1_DIGEST_LENGTH * 2 + 1)
#define MD5_STR_LENGTH (MD5_DIGEST_LENGTH * 2 + 1)
#define SHA1_STR_LENGTH 41
#define SHA1_DIGEST_LENGTH 20
#define APPEND_STR_META(doc, keyname, value) \
{meta_line_t *meta_str = malloc(sizeof(meta_line_t) + strlen(value)); \

View File

@@ -4,12 +4,7 @@
#define MIN_SIZE 32
#define AVIO_BUF_SIZE 8192
#define IS_VIDEO(fmt) ( \
(fmt)->iformat->name && strcmp((fmt)->iformat->name, "image2") != 0 \
&& strcmp((fmt)->iformat->name, "jpeg_pipe") != 0 \
&& strcmp((fmt)->iformat->name, "webp_pipe") != 0 \
&& strcmp((fmt)->iformat->name, "png_pipe") != 0 \
)
#define IS_VIDEO(fmt) ((fmt)->iformat->name && strcmp((fmt)->iformat->name, "image2") != 0)
#define STORE_AS_IS ((void*)-1)
@@ -284,22 +279,18 @@ static void
append_video_meta(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx, AVFrame *frame, document_t *doc, int is_video) {
if (is_video) {
if (pFormatCtx->duration / AV_TIME_BASE != 0) {
meta_line_t *meta_duration = malloc(sizeof(meta_line_t));
meta_duration->key = MetaMediaDuration;
meta_duration->long_val = pFormatCtx->duration / AV_TIME_BASE;
if (meta_duration->long_val > INT32_MAX) {
meta_duration->long_val = 0;
}
APPEND_META(doc, meta_duration)
meta_line_t *meta_duration = malloc(sizeof(meta_line_t));
meta_duration->key = MetaMediaDuration;
meta_duration->long_val = pFormatCtx->duration / AV_TIME_BASE;
if (meta_duration->long_val > INT32_MAX) {
meta_duration->long_val = 0;
}
APPEND_META(doc, meta_duration)
if (pFormatCtx->bit_rate != 0) {
meta_line_t *meta_bitrate = malloc(sizeof(meta_line_t));
meta_bitrate->key = MetaMediaBitrate;
meta_bitrate->long_val = pFormatCtx->bit_rate;
APPEND_META(doc, meta_bitrate)
}
meta_line_t *meta_bitrate = malloc(sizeof(meta_line_t));
meta_bitrate->key = MetaMediaBitrate;
meta_bitrate->long_val = pFormatCtx->bit_rate;
APPEND_META(doc, meta_bitrate)
}
AVDictionaryEntry *tag = NULL;
@@ -468,7 +459,7 @@ int decode_frame_and_save_thumbnail(scan_media_ctx_t *ctx, AVFormatContext *pFor
if (scaled_frame == STORE_AS_IS) {
return_value = SAVE_THUMBNAIL_OK;
ctx->store((char *) doc->doc_id, sizeof(doc->doc_id), (char *) frame_and_packet->packet->data,
ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) frame_and_packet->packet->data,
frame_and_packet->packet->size);
} else {
// Encode frame to jpeg
@@ -482,7 +473,7 @@ int decode_frame_and_save_thumbnail(scan_media_ctx_t *ctx, AVFormatContext *pFor
// Save thumbnail
if (thumbnail_index == 0) {
ctx->store((char *) doc->doc_id, sizeof(doc->doc_id), (char *) jpeg_packet.data, jpeg_packet.size);
ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) jpeg_packet.data, jpeg_packet.size);
return_value = SAVE_THUMBNAIL_OK;
} else if (thumbnail_index > 1) {
@@ -491,8 +482,9 @@ int decode_frame_and_save_thumbnail(scan_media_ctx_t *ctx, AVFormatContext *pFor
// I figure out a better fix.
thumbnail_index -= 1;
char tn_key[sizeof(doc->doc_id) + sizeof(char) * 4];
snprintf(tn_key, sizeof(tn_key), "%s%04d", doc->doc_id, thumbnail_index);
char tn_key[sizeof(doc->path_md5) + sizeof(int)];
memcpy(tn_key, doc->path_md5, sizeof(doc->path_md5));
memcpy(tn_key + sizeof(doc->path_md5), &thumbnail_index, sizeof(thumbnail_index));
ctx->store((char *) tn_key, sizeof(tn_key), (char *) jpeg_packet.data, jpeg_packet.size);
} else {
@@ -586,10 +578,9 @@ void parse_media_format_ctx(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx,
int video_duration_in_seconds = (int) (pFormatCtx->duration / AV_TIME_BASE);
int thumbnails_to_generate = (IS_VIDEO(pFormatCtx) && stream->codecpar->codec_id != AV_CODEC_ID_GIF &&
video_duration_in_seconds >= 15)
// Limit to ~1 thumbnail every 7s
? MAX(MIN(ctx->tn_count, video_duration_in_seconds / 7 + 1), 1) + 1
int thumbnails_to_generate = (IS_VIDEO(pFormatCtx) && stream->codecpar->codec_id != AV_CODEC_ID_GIF && video_duration_in_seconds >= 15)
// Limit to ~1 thumbnail every 5s
? MAX(MIN(ctx->tn_count, video_duration_in_seconds / 5 + 1), 1) + 1
: 1;
const double seek_increment = thumbnails_to_generate == 1
@@ -854,7 +845,7 @@ int store_image_thumbnail(scan_media_ctx_t *ctx, void *buf, size_t buf_len, docu
if (scaled_frame == STORE_AS_IS) {
APPEND_LONG_META(doc, MetaThumbnail, 1)
ctx->store((char *) doc->doc_id, sizeof(doc->doc_id), (char *) frame_and_packet->packet->data,
ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) frame_and_packet->packet->data,
frame_and_packet->packet->size);
} else {
// Encode frame to jpeg
@@ -868,7 +859,7 @@ int store_image_thumbnail(scan_media_ctx_t *ctx, void *buf, size_t buf_len, docu
// Save thumbnail
APPEND_LONG_META(doc, MetaThumbnail, 1)
ctx->store((char *) doc->doc_id, sizeof(doc->doc_id), (char *) jpeg_packet.data, jpeg_packet.size);
ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) jpeg_packet.data, jpeg_packet.size);
av_packet_unref(&jpeg_packet);
avcodec_free_context(&jpeg_encoder);

View File

@@ -1,6 +1,6 @@
#include "scan_mobi.h"
#include "../../third-party/libmobi/src/mobi.h"
#include <mobi.h>
#include <errno.h>
#include "stdlib.h"

View File

@@ -191,7 +191,7 @@ void read_thumbnail(scan_ooxml_ctx_t *ctx, document_t *doc, struct archive *a, s
archive_read_data(a, buf, entry_size);
APPEND_LONG_META(doc, MetaThumbnail, 1)
ctx->store((char *) doc->doc_id, sizeof(doc->doc_id), buf, entry_size);
ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), buf, entry_size);
free(buf);
}

View File

@@ -84,7 +84,7 @@ int store_thumbnail_rgb24(scan_raw_ctx_t *ctx, libraw_processed_image_t *img, do
avcodec_receive_packet(jpeg_encoder, &jpeg_packet);
APPEND_LONG_META(doc, MetaThumbnail, 1)
ctx->store((char *) doc->doc_id, sizeof(doc->doc_id), (char *) jpeg_packet.data, jpeg_packet.size);
ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) jpeg_packet.data, jpeg_packet.size);
av_packet_unref(&jpeg_packet);
av_free(*scaled_frame->data);

View File

@@ -48,9 +48,6 @@ typedef int scan_code_t;
#define CTX_LOG_FATALF(filepath, fmt, ...) ctx->logf(filepath, LEVEL_FATAL, fmt, __VA_ARGS__); exit(-1);
#define CTX_LOG_FATAL(filepath, str) ctx->log(filepath, LEVEL_FATAL, str); exit(-1);
#define SIST_DOC_ID_LEN MD5_STR_LENGTH
#define SIST_INDEX_ID_LEN MD5_STR_LENGTH
enum metakey {
// String
MetaContent = 1,
@@ -106,7 +103,7 @@ typedef struct meta_line {
typedef struct document {
char doc_id[SIST_DOC_ID_LEN];
unsigned char path_md5[MD5_DIGEST_LENGTH];
unsigned long size;
unsigned int mime;
int mtime;
@@ -162,7 +159,7 @@ typedef struct parse_job_t {
int base;
int ext;
struct vfile vfile;
char parent[SIST_DOC_ID_LEN];
unsigned char parent[MD5_DIGEST_LENGTH];
char filepath[1];
} parse_job_t;

View File

@@ -923,6 +923,7 @@ TEST(Msdoc, Test1Pdf) {
ASSERT_TRUE(strstr(get_meta(&doc, MetaContent)->str_val, "October 2000") != nullptr);
ASSERT_STREQ(get_meta(&doc, MetaTitle)->str_val, "INTERNATIONAL ORGANIZATION FOR STANDARDIZATION");
ASSERT_STREQ(get_meta(&doc, MetaAuthor)->str_val, "Oliver Morgan");
ASSERT_EQ(get_meta(&doc, MetaPages)->long_val, 57);
ASSERT_NEAR(strlen(get_meta(&doc, MetaContent)->str_val), msdoc_ctx.content_size, 4);
ASSERT_NE(size_before, store_size);
@@ -1029,23 +1030,6 @@ TEST(Msdoc, TestUtf8Text) {
cleanup(&doc, &f);
}
TEST(Msdoc, Test5Pdf) {
vfile_t f;
document_t doc;
load_doc_file("libscan-test-files/test_files/msdoc/test5.doc", &f, &doc);
size_t size_before = store_size;
parse_msdoc(&msdoc_ctx, &f, &doc);
ASSERT_TRUE(strstr(get_meta(&doc, MetaContent)->str_val, "орган Федеральной") != nullptr);
ASSERT_STREQ(get_meta(&doc, MetaAuthor)->str_val, "uswo");
ASSERT_NEAR(strlen(get_meta(&doc, MetaContent)->str_val), msdoc_ctx.content_size, 4);
ASSERT_NE(size_before, store_size);
cleanup(&doc, &f);
}
TEST(Msdoc, TestFuzz1) {
vfile_t f;
document_t doc;
@@ -1205,7 +1189,4 @@ int main(int argc, char **argv) {
av_log_set_level(AV_LOG_QUIET);
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}
// 0x6130000d2580
// "/mnt/Hatchery/m ain/downloads/qbittorrent/downloads/Roskomnadzor/УПРАВЛЕНИЕ РОСКОМНАДЗОРА по РБ.zip#/УПРАВЛЕНИЕ РОСКОМНАДЗОРА по РБ/Лопатин Ю.М/Секнин/2015 год/Обучение по ", <incomplete sequence \320>...
}