mirror of
https://github.com/simon987/sist2.git
synced 2025-04-21 19:26:45 +00:00
Compare commits
9 Commits
8ad9fc9e32
...
57a28d781f
Author | SHA1 | Date | |
---|---|---|---|
57a28d781f | |||
6ec98046fa | |||
|
4fac81ca6a | ||
2882741926 | |||
edba9b7917 | |||
e89964d592 | |||
329afcbe4f | |||
2a2664a5cd | |||
0d18637e88 |
@ -3,7 +3,7 @@ MAINTAINER simon987 <me@simon987.net>
|
|||||||
|
|
||||||
WORKDIR /build/
|
WORKDIR /build/
|
||||||
COPY . .
|
COPY . .
|
||||||
RUN cmake -DSIST_PLATFORM=x64_linux -DSIST_DEBUG=on -DBUILD_TESTS=off -DCMAKE_TOOLCHAIN_FILE=/vcpkg/scripts/buildsystems/vcpkg.cmake .
|
RUN cmake -DSIST_PLATFORM=x64_linux -DSIST_DEBUG=off -DBUILD_TESTS=off -DCMAKE_TOOLCHAIN_FILE=/vcpkg/scripts/buildsystems/vcpkg.cmake .
|
||||||
RUN make -j$(nproc)
|
RUN make -j$(nproc)
|
||||||
RUN strip sist2 || mv sist2_debug sist2
|
RUN strip sist2 || mv sist2_debug sist2
|
||||||
|
|
||||||
|
@ -52,7 +52,7 @@ sist2 (Simple incremental search tool)
|
|||||||
Select the file corresponding to your CPU architecture and mark the binary as executable with `chmod +x` *
|
Select the file corresponding to your CPU architecture and mark the binary as executable with `chmod +x` *
|
||||||
2. *(or)* Download a [development snapshot](https://files.simon987.net/.gate/sist2/simon987_sist2/) *(Not
|
2. *(or)* Download a [development snapshot](https://files.simon987.net/.gate/sist2/simon987_sist2/) *(Not
|
||||||
recommended!)*
|
recommended!)*
|
||||||
3. *(or)* `docker pull simon987/sist2:2.11.6-x64-linux`
|
3. *(or)* `docker pull simon987/sist2:2.11.7-x64-linux`
|
||||||
|
|
||||||
1. See [Usage guide](docs/USAGE.md)
|
1. See [Usage guide](docs/USAGE.md)
|
||||||
|
|
||||||
|
122
docs/USAGE.md
122
docs/USAGE.md
@ -13,7 +13,6 @@
|
|||||||
* [options](#web-options)
|
* [options](#web-options)
|
||||||
* [examples](#web-examples)
|
* [examples](#web-examples)
|
||||||
* [rewrite_url](#rewrite_url)
|
* [rewrite_url](#rewrite_url)
|
||||||
* [link to specific indices](#link-to-specific-indices)
|
|
||||||
* [elasticsearch](#elasticsearch)
|
* [elasticsearch](#elasticsearch)
|
||||||
* [exec-script](#exec-script)
|
* [exec-script](#exec-script)
|
||||||
* [tagging](#tagging)
|
* [tagging](#tagging)
|
||||||
@ -26,62 +25,66 @@ Usage: sist2 scan [OPTION]... PATH
|
|||||||
or: sist2 exec-script [OPTION]... INDEX
|
or: sist2 exec-script [OPTION]... INDEX
|
||||||
Lightning-fast file system indexer and search tool.
|
Lightning-fast file system indexer and search tool.
|
||||||
|
|
||||||
-h, --help show this help message and exit
|
-h, --help show this help message and exit
|
||||||
-v, --version Show version and exit
|
-v, --version Show version and exit
|
||||||
--verbose Turn on logging
|
--verbose Turn on logging
|
||||||
--very-verbose Turn on debug messages
|
--very-verbose Turn on debug messages
|
||||||
|
|
||||||
Scan options
|
Scan options
|
||||||
-t, --threads=<int> Number of threads. DEFAULT=1
|
-t, --threads=<int> Number of threads. DEFAULT=1
|
||||||
-q, --quality=<flt> Thumbnail quality, on a scale of 1.0 to 31.0, 1.0 being the best. DEFAULT=3
|
--mem-throttle=<int> Total memory threshold in MiB for scan throttling. DEFAULT=0
|
||||||
--size=<int> Thumbnail size, in pixels. Use negative value to disable. DEFAULT=500
|
-q, --thumbnail-quality=<flt> Thumbnail quality, on a scale of 1.0 to 31.0, 1.0 being the best. DEFAULT=1
|
||||||
--content-size=<int> Number of bytes to be extracted from text documents. Use negative value to disable. DEFAULT=32768
|
--thumbnail-size=<int> Thumbnail size, in pixels. DEFAULT=500
|
||||||
--incremental=<str> Reuse an existing index and only scan modified files.
|
--thumbnail-count=<int> Number of thumbnails to generate. Set a value > 1 to create video previews, set to 0 to disable thumbnails. DEFAULT=1
|
||||||
-o, --output=<str> Output directory. DEFAULT=index.sist2/
|
--content-size=<int> Number of bytes to be extracted from text documents. Set to 0 to disable. DEFAULT=32768
|
||||||
--rewrite-url=<str> Serve files from this url instead of from disk.
|
--incremental=<str> Reuse an existing index and only scan modified files.
|
||||||
--name=<str> Index display name. DEFAULT: (name of the directory)
|
-o, --output=<str> Output directory. DEFAULT=index.sist2/
|
||||||
--depth=<int> Scan up to DEPTH subdirectories deep. Use 0 to only scan files in PATH. DEFAULT: -1
|
--rewrite-url=<str> Serve files from this url instead of from disk.
|
||||||
--archive=<str> Archive file mode (skip|list|shallow|recurse). skip: Don't parse, list: only get file names as text, shallow: Don't parse archives inside archives. DEFAULT: recurse
|
--name=<str> Index display name. DEFAULT: (name of the directory)
|
||||||
--archive-passphrase=<str> Passphrase for encrypted archive files
|
--depth=<int> Scan up to DEPTH subdirectories deep. Use 0 to only scan files in PATH. DEFAULT: -1
|
||||||
--ocr-lang=<str> Tesseract language (use 'tesseract --list-langs' to see which are installed on your machine)
|
--archive=<str> Archive file mode (skip|list|shallow|recurse). skip: Don't parse, list: only get file names as text, shallow: Don't parse archives inside archives. DEFAULT: recurse
|
||||||
--ocr-images Enable OCR'ing of image files.
|
--archive-passphrase=<str> Passphrase for encrypted archive files
|
||||||
--ocr-ebooks Enable OCR'ing of ebook files.
|
--ocr-lang=<str> Tesseract language (use 'tesseract --list-langs' to see which are installed on your machine)
|
||||||
-e, --exclude=<str> Files that match this regex will not be scanned
|
--ocr-images Enable OCR'ing of image files.
|
||||||
--fast Only index file names & mime type
|
--ocr-ebooks Enable OCR'ing of ebook files.
|
||||||
--treemap-threshold=<str> Relative size threshold for treemap (see USAGE.md). DEFAULT: 0.0005
|
-e, --exclude=<str> Files that match this regex will not be scanned
|
||||||
--mem-buffer=<int> Maximum memory buffer size per thread in MB for files inside archives (see USAGE.md). DEFAULT: 2000
|
--fast Only index file names & mime type
|
||||||
--read-subtitles Read subtitles from media files.
|
--treemap-threshold=<str> Relative size threshold for treemap (see USAGE.md). DEFAULT: 0.0005
|
||||||
--fast-epub Faster but less accurate EPUB parsing (no thumbnails, metadata)
|
--mem-buffer=<int> Maximum memory buffer size per thread in MiB for files inside archives (see USAGE.md). DEFAULT: 2000
|
||||||
--checksums Calculate file checksums when scanning.
|
--read-subtitles Read subtitles from media files.
|
||||||
--list-file=<str> Specify a list of newline-delimited paths to be scanned instead of normal directory traversal. Use '-' to read from stdin.
|
--fast-epub Faster but less accurate EPUB parsing (no thumbnails, metadata)
|
||||||
|
--checksums Calculate file checksums when scanning.
|
||||||
|
--list-file=<str> Specify a list of newline-delimited paths to be scanned instead of normal directory traversal. Use '-' to read from stdin.
|
||||||
|
|
||||||
Index options
|
Index options
|
||||||
-t, --threads=<int> Number of threads. DEFAULT=1
|
-t, --threads=<int> Number of threads. DEFAULT=1
|
||||||
--es-url=<str> Elasticsearch url with port. DEFAULT=http://localhost:9200
|
--es-url=<str> Elasticsearch url with port. DEFAULT=http://localhost:9200
|
||||||
--es-index=<str> Elasticsearch index name. DEFAULT=sist2
|
--es-index=<str> Elasticsearch index name. DEFAULT=sist2
|
||||||
-p, --print Just print JSON documents to stdout.
|
-p, --print Just print JSON documents to stdout.
|
||||||
--script-file=<str> Path to user script.
|
--incremental-index Conduct incremental indexing, assumes that the old index is already digested by Elasticsearch.
|
||||||
--mappings-file=<str> Path to Elasticsearch mappings.
|
--script-file=<str> Path to user script.
|
||||||
--settings-file=<str> Path to Elasticsearch settings.
|
--mappings-file=<str> Path to Elasticsearch mappings.
|
||||||
--async-script Execute user script asynchronously.
|
--settings-file=<str> Path to Elasticsearch settings.
|
||||||
--batch-size=<int> Index batch size. DEFAULT: 100
|
--async-script Execute user script asynchronously.
|
||||||
-f, --force-reset Reset Elasticsearch mappings and settings. (You must use this option the first time you use the index command)
|
--batch-size=<int> Index batch size. DEFAULT: 100
|
||||||
|
-f, --force-reset Reset Elasticsearch mappings and settings. (You must use this option the first time you use the index command)
|
||||||
|
|
||||||
Web options
|
Web options
|
||||||
--es-url=<str> Elasticsearch url. DEFAULT=http://localhost:9200
|
--es-url=<str> Elasticsearch url. DEFAULT=http://localhost:9200
|
||||||
--es-index=<str> Elasticsearch index name. DEFAULT=sist2
|
--es-index=<str> Elasticsearch index name. DEFAULT=sist2
|
||||||
--bind=<str> Listen on this address. DEFAULT=localhost:4090
|
--bind=<str> Listen on this address. DEFAULT=localhost:4090
|
||||||
--auth=<str> Basic auth in user:password format
|
--auth=<str> Basic auth in user:password format
|
||||||
--tag-auth=<str> Basic auth in user:password format for tagging
|
--tag-auth=<str> Basic auth in user:password format for tagging
|
||||||
--tagline=<str> Tagline in navbar
|
--tagline=<str> Tagline in navbar
|
||||||
--dev Serve html & js files from disk (for development)
|
--dev Serve html & js files from disk (for development)
|
||||||
--lang=<str> Default UI language. Can be changed by the user
|
--lang=<str> Default UI language. Can be changed by the user
|
||||||
|
|
||||||
Exec-script options
|
Exec-script options
|
||||||
--es-url=<str> Elasticsearch url. DEFAULT=http://localhost:9200
|
--es-url=<str> Elasticsearch url. DEFAULT=http://localhost:9200
|
||||||
--es-index=<str> Elasticsearch index name. DEFAULT=sist2
|
--es-index=<str> Elasticsearch index name. DEFAULT=sist2
|
||||||
--script-file=<str> Path to user script.
|
--script-file=<str> Path to user script.
|
||||||
--async-script Execute user script asynchronously.
|
--async-script Execute user script asynchronously.
|
||||||
|
Made by simon987 <me@simon987.net>. Released under GPL-3.0
|
||||||
```
|
```
|
||||||
|
|
||||||
## Scan
|
## Scan
|
||||||
@ -90,13 +93,21 @@ Exec-script options
|
|||||||
|
|
||||||
* `-t, --threads`
|
* `-t, --threads`
|
||||||
Number of threads for file parsing. **Do not set a number higher than `$(nproc)` or `$(Get-CimInstance Win32_ComputerSystem).NumberOfLogicalProcessors` in Windows!**
|
Number of threads for file parsing. **Do not set a number higher than `$(nproc)` or `$(Get-CimInstance Win32_ComputerSystem).NumberOfLogicalProcessors` in Windows!**
|
||||||
* `-q, --quality`
|
* `--mem-throttle`
|
||||||
|
Total memory threshold in MiB for scan throttling. Worker threads will not start a new parse job
|
||||||
|
until the total memory usage of sist2 is below this threshold. Set to 0 to disable. DEFAULT=0
|
||||||
|
* `-q, --thumbnail-quality`
|
||||||
Thumbnail quality, on a scale of 1.0 to 31.0, 1.0 being the best.
|
Thumbnail quality, on a scale of 1.0 to 31.0, 1.0 being the best.
|
||||||
* `--size`
|
* `--thumbnail-size`
|
||||||
Thumbnail size in pixels.
|
Thumbnail size in pixels.
|
||||||
|
* `--thumbnail-count`
|
||||||
|
Maximum number of thumbnails to generate. When set to a value >= 2, thumbnails for video previews
|
||||||
|
will be generated. The actual number of thumbnails generated depends on the length of the video (maximum 1 image
|
||||||
|
every ~5s). Set to 0 to completely disable thumbnails.
|
||||||
* `--content-size`
|
* `--content-size`
|
||||||
Number of bytes of text to be extracted from the content of files (plain text and PDFs).
|
Number of bytes of text to be extracted from the content of files (plain text, PDFs etc.).
|
||||||
Repeated whitespace and special characters do not count toward this limit.
|
Repeated whitespace and special characters do not count toward this limit.
|
||||||
|
Set to 0 to completely disable content parsing.
|
||||||
* `--incremental`
|
* `--incremental`
|
||||||
Specify an existing index. Information about files in this index that were not modified (based on *mtime* attribute)
|
Specify an existing index. Information about files in this index that were not modified (based on *mtime* attribute)
|
||||||
will be copied to the new index and will not be parsed again.
|
will be copied to the new index and will not be parsed again.
|
||||||
@ -129,13 +140,13 @@ Exec-script options
|
|||||||
In effect, smaller `treemap-threshold` values will yield a more detailed
|
In effect, smaller `treemap-threshold` values will yield a more detailed
|
||||||
(but also a more cluttered and harder to read) visualization.
|
(but also a more cluttered and harder to read) visualization.
|
||||||
|
|
||||||
* `--mem-buffer` Maximum memory buffer size in MB (per thread) for files inside archives. Media files
|
* `--mem-buffer` Maximum memory buffer size in MiB (per thread) for files inside archives. Media files
|
||||||
larger than this number will be read sequentially and no *seek* operations will be supported.
|
larger than this number will be read sequentially and no *seek* operations will be supported.
|
||||||
|
|
||||||
To check if a media file can be parsed without *seek*, execute `cat file.mp4 | ffprobe -`
|
To check if a media file can be parsed without *seek*, execute `cat file.mp4 | ffprobe -`
|
||||||
* `--read-subtitles` When enabled, will attempt to read the subtitles stream from media files.
|
* `--read-subtitles` When enabled, will attempt to read the subtitles stream from media files.
|
||||||
* `--fast-epub` Much faster but less accurate EPUB parsing. When enabled, sist2 will use a simple HTML parser to read epub files instead of the MuPDF library. No thumbnails are generated and author/title metadata are not parsed.
|
* `--fast-epub` Much faster but less accurate EPUB parsing. When enabled, sist2 will use a simple HTML parser to read epub files instead of the MuPDF library. No thumbnails are generated and author/title metadata are not parsed.
|
||||||
* `--checksums` Calculate file checksums (sha1) when scanning files. This option does not cause any additional read
|
* `--checksums` Calculate file checksums (SHA1) when scanning files. This option does not cause any additional read
|
||||||
operations. Checksums are not calculated for all file types, unless the file is inside an archive. When enabled, duplicate
|
operations. Checksums are not calculated for all file types, unless the file is inside an archive. When enabled, duplicate
|
||||||
files are hidden in the web UI (this behaviour can be toggled in the Configuration page).
|
files are hidden in the web UI (this behaviour can be toggled in the Configuration page).
|
||||||
|
|
||||||
@ -205,6 +216,9 @@ and values are raw image bytes.
|
|||||||
Elasticsearch index name. DEFAULT=sist2
|
Elasticsearch index name. DEFAULT=sist2
|
||||||
* `-p, --print`
|
* `-p, --print`
|
||||||
Print index in JSON format to stdout.
|
Print index in JSON format to stdout.
|
||||||
|
* `--incremental-index`
|
||||||
|
Conduct incremental indexing. Assumes that the old index is already ingested in Elasticsearch.
|
||||||
|
Only the new changes since the last scan will be sent.
|
||||||
* `--script-file`
|
* `--script-file`
|
||||||
Path to user script. See [Scripting](scripting.md).
|
Path to user script. See [Scripting](scripting.md).
|
||||||
* `--mappings-file`
|
* `--mappings-file`
|
||||||
|
9
sist2-vue/dist/css/chunk-vendors.css
vendored
Normal file
9
sist2-vue/dist/css/chunk-vendors.css
vendored
Normal file
File diff suppressed because one or more lines are too long
1
sist2-vue/dist/css/index.css
vendored
Normal file
1
sist2-vue/dist/css/index.css
vendored
Normal file
File diff suppressed because one or more lines are too long
3
sist2-vue/dist/index.html
vendored
Normal file
3
sist2-vue/dist/index.html
vendored
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
<!DOCTYPE html><html lang="en"><head><meta charset="utf-8"><meta http-equiv="X-UA-Compatible" content="IE=edge"><meta name="viewport" content="width=device-width,initial-scale=1,maximum-scale=1,user-scalable=no"><title>sist2</title><link href="css/chunk-vendors.css" rel="preload" as="style"><link href="css/index.css" rel="preload" as="style"><link href="js/chunk-vendors.js" rel="preload" as="script"><link href="js/index.js" rel="preload" as="script"><link href="css/chunk-vendors.css" rel="stylesheet"><link href="css/index.css" rel="stylesheet"></head><body><noscript><style>body {
|
||||||
|
height: initial;
|
||||||
|
}</style><div style="text-align: center; margin-top: 100px"><strong>We're sorry but sist2 doesn't work properly without JavaScript enabled. Please enable it to continue.</strong><br><strong>Nous sommes désolés mais sist2 ne fonctionne pas correctement si JavaScript est activé. Veuillez l'activer pour continuer.</strong></div></noscript><div id="app"></div><script src="js/chunk-vendors.js"></script><script src="js/index.js"></script></body></html>
|
146
sist2-vue/dist/js/chunk-vendors.js
vendored
Normal file
146
sist2-vue/dist/js/chunk-vendors.js
vendored
Normal file
File diff suppressed because one or more lines are too long
1
sist2-vue/dist/js/index.js
vendored
Normal file
1
sist2-vue/dist/js/index.js
vendored
Normal file
File diff suppressed because one or more lines are too long
21
src/main.c
21
src/main.c
@ -103,7 +103,7 @@ void sig_handler(int signum) {
|
|||||||
exit(-1);
|
exit(-1);
|
||||||
}
|
}
|
||||||
|
|
||||||
void init_dir(const char *dirpath) {
|
void init_dir(const char *dirpath, scan_args_t* args) {
|
||||||
char path[PATH_MAX];
|
char path[PATH_MAX];
|
||||||
snprintf(path, PATH_MAX, "%sdescriptor.json", dirpath);
|
snprintf(path, PATH_MAX, "%sdescriptor.json", dirpath);
|
||||||
|
|
||||||
@ -111,9 +111,18 @@ void init_dir(const char *dirpath) {
|
|||||||
strcpy(ScanCtx.index.desc.version, Version);
|
strcpy(ScanCtx.index.desc.version, Version);
|
||||||
strcpy(ScanCtx.index.desc.type, INDEX_TYPE_NDJSON);
|
strcpy(ScanCtx.index.desc.type, INDEX_TYPE_NDJSON);
|
||||||
|
|
||||||
unsigned char index_md5[MD5_DIGEST_LENGTH];
|
if (args->incremental != NULL) {
|
||||||
MD5((unsigned char *) &ScanCtx.index.desc.timestamp, sizeof(ScanCtx.index.desc.timestamp), index_md5);
|
// copy old index id
|
||||||
buf2hex(index_md5, MD5_DIGEST_LENGTH, ScanCtx.index.desc.id);
|
char descriptor_path[PATH_MAX];
|
||||||
|
snprintf(descriptor_path, PATH_MAX, "%sdescriptor.json", args->incremental);
|
||||||
|
index_descriptor_t original_desc = read_index_descriptor(descriptor_path);
|
||||||
|
memcpy(ScanCtx.index.desc.id, original_desc.id, sizeof(original_desc.id));
|
||||||
|
} else {
|
||||||
|
// genreate new index id based on timestamp
|
||||||
|
unsigned char index_md5[MD5_DIGEST_LENGTH];
|
||||||
|
MD5((unsigned char *) &ScanCtx.index.desc.timestamp, sizeof(ScanCtx.index.desc.timestamp), index_md5);
|
||||||
|
buf2hex(index_md5, MD5_DIGEST_LENGTH, ScanCtx.index.desc.id);
|
||||||
|
}
|
||||||
|
|
||||||
write_index_descriptor(path, &ScanCtx.index.desc);
|
write_index_descriptor(path, &ScanCtx.index.desc);
|
||||||
}
|
}
|
||||||
@ -378,7 +387,7 @@ void sist2_scan(scan_args_t *args) {
|
|||||||
|
|
||||||
initialize_scan_context(args);
|
initialize_scan_context(args);
|
||||||
|
|
||||||
init_dir(ScanCtx.index.path);
|
init_dir(ScanCtx.index.path, args);
|
||||||
|
|
||||||
char store_path[PATH_MAX];
|
char store_path[PATH_MAX];
|
||||||
snprintf(store_path, PATH_MAX, "%sthumbs", ScanCtx.index.path);
|
snprintf(store_path, PATH_MAX, "%sthumbs", ScanCtx.index.path);
|
||||||
@ -674,7 +683,7 @@ int main(int argc, const char *argv[]) {
|
|||||||
OPT_STRING(0, "es-index", &common_es_index, "Elasticsearch index name. DEFAULT=sist2"),
|
OPT_STRING(0, "es-index", &common_es_index, "Elasticsearch index name. DEFAULT=sist2"),
|
||||||
OPT_BOOLEAN('p', "print", &index_args->print, "Just print JSON documents to stdout."),
|
OPT_BOOLEAN('p', "print", &index_args->print, "Just print JSON documents to stdout."),
|
||||||
OPT_BOOLEAN(0, "incremental-index", &index_args->incremental,
|
OPT_BOOLEAN(0, "incremental-index", &index_args->incremental,
|
||||||
"Conduct incremental indexing, assumes that the old index is already digested by Elasticsearch."),
|
"Conduct incremental indexing. Assumes that the old index is already ingested in Elasticsearch."),
|
||||||
OPT_STRING(0, "script-file", &common_script_path, "Path to user script."),
|
OPT_STRING(0, "script-file", &common_script_path, "Path to user script."),
|
||||||
OPT_STRING(0, "mappings-file", &index_args->es_mappings_path, "Path to Elasticsearch mappings."),
|
OPT_STRING(0, "mappings-file", &index_args->es_mappings_path, "Path to Elasticsearch mappings."),
|
||||||
OPT_STRING(0, "settings-file", &index_args->es_settings_path, "Path to Elasticsearch settings."),
|
OPT_STRING(0, "settings-file", &index_args->es_settings_path, "Path to Elasticsearch settings."),
|
||||||
|
10
third-party/libscan/CMakeLists.txt
vendored
10
third-party/libscan/CMakeLists.txt
vendored
@ -11,11 +11,6 @@ if (SIST_DEBUG)
|
|||||||
antiword
|
antiword
|
||||||
DEBUG
|
DEBUG
|
||||||
)
|
)
|
||||||
else()
|
|
||||||
add_compile_definitions(
|
|
||||||
antiword
|
|
||||||
NDEBUG
|
|
||||||
)
|
|
||||||
target_compile_options(
|
target_compile_options(
|
||||||
antiword
|
antiword
|
||||||
PRIVATE
|
PRIVATE
|
||||||
@ -25,6 +20,11 @@ else()
|
|||||||
-fsanitize=address
|
-fsanitize=address
|
||||||
-fno-inline
|
-fno-inline
|
||||||
)
|
)
|
||||||
|
else()
|
||||||
|
add_compile_definitions(
|
||||||
|
antiword
|
||||||
|
NDEBUG
|
||||||
|
)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
add_library(
|
add_library(
|
||||||
|
8
third-party/libscan/libscan/media/media.c
vendored
8
third-party/libscan/libscan/media/media.c
vendored
@ -251,7 +251,7 @@ void append_tag_meta_if_not_exists(scan_media_ctx_t *ctx, document_t *doc, AVDic
|
|||||||
for (; *ptr; ++ptr) *ptr = (char) tolower(*ptr);
|
for (; *ptr; ++ptr) *ptr = (char) tolower(*ptr);
|
||||||
|
|
||||||
__always_inline
|
__always_inline
|
||||||
static void append_audio_meta(AVFormatContext *pFormatCtx, document_t *doc) {
|
static void append_audio_meta(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx, document_t *doc) {
|
||||||
|
|
||||||
AVDictionaryEntry *tag = NULL;
|
AVDictionaryEntry *tag = NULL;
|
||||||
while ((tag = av_dict_get(pFormatCtx->metadata, "", tag, AV_DICT_IGNORE_SUFFIX))) {
|
while ((tag = av_dict_get(pFormatCtx->metadata, "", tag, AV_DICT_IGNORE_SUFFIX))) {
|
||||||
@ -269,7 +269,7 @@ static void append_audio_meta(AVFormatContext *pFormatCtx, document_t *doc) {
|
|||||||
} else if (strcmp(key, "album") == 0) {
|
} else if (strcmp(key, "album") == 0) {
|
||||||
APPEND_TAG_META(MetaAlbum)
|
APPEND_TAG_META(MetaAlbum)
|
||||||
} else if (strcmp(key, "comment") == 0) {
|
} else if (strcmp(key, "comment") == 0) {
|
||||||
APPEND_TAG_META(MetaContent)
|
append_tag_meta_if_not_exists(ctx, doc, tag, MetaContent);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -437,7 +437,7 @@ int decode_frame_and_save_thumbnail(scan_media_ctx_t *ctx, AVFormatContext *pFor
|
|||||||
return SAVE_THUMBNAIL_FAILED;
|
return SAVE_THUMBNAIL_FAILED;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ctx->tesseract_lang != NULL && IS_VIDEO(pFormatCtx)) {
|
if (ctx->tesseract_lang != NULL && IS_VIDEO(pFormatCtx) && thumbnail_index == 0) {
|
||||||
ocr_image(ctx, doc, decoder, frame_and_packet->frame);
|
ocr_image(ctx, doc, decoder, frame_and_packet->frame);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -558,7 +558,7 @@ void parse_media_format_ctx(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx,
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (audio_stream != -1) {
|
if (audio_stream != -1) {
|
||||||
append_audio_meta(pFormatCtx, doc);
|
append_audio_meta(ctx, pFormatCtx, doc);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (video_stream != -1 && ctx->tn_count > 0) {
|
if (video_stream != -1 && ctx->tn_count > 0) {
|
||||||
|
44
third-party/libscan/libscan/raw/raw.c
vendored
44
third-party/libscan/libscan/raw/raw.c
vendored
@ -7,8 +7,22 @@
|
|||||||
|
|
||||||
#define MIN_SIZE 32
|
#define MIN_SIZE 32
|
||||||
|
|
||||||
int store_thumbnail_jpeg(scan_raw_ctx_t *ctx, libraw_processed_image_t *img, document_t *doc) {
|
int store_thumbnail_jpeg(scan_raw_ctx_t *ctx, libraw_thumbnail_t img, document_t *doc) {
|
||||||
return store_image_thumbnail((scan_media_ctx_t *) ctx, img->data, img->data_size, doc, "x.jpeg");
|
|
||||||
|
scan_media_ctx_t media_ctx = {
|
||||||
|
.read_subtitles = FALSE,
|
||||||
|
.tn_count = 1,
|
||||||
|
.max_media_buffer = 0,
|
||||||
|
.store = ctx->store,
|
||||||
|
.log = ctx->log,
|
||||||
|
.logf = ctx->logf,
|
||||||
|
.tn_size = ctx->tn_size,
|
||||||
|
.tn_qscale = ctx->tn_qscale,
|
||||||
|
.tesseract_lang = NULL,
|
||||||
|
.tesseract_path = NULL
|
||||||
|
};
|
||||||
|
|
||||||
|
return store_image_thumbnail(&media_ctx, img.thumb, img.tlength, doc, "x.jpeg");
|
||||||
}
|
}
|
||||||
|
|
||||||
int store_thumbnail_rgb24(scan_raw_ctx_t *ctx, libraw_processed_image_t *img, document_t *doc) {
|
int store_thumbnail_rgb24(scan_raw_ctx_t *ctx, libraw_processed_image_t *img, document_t *doc) {
|
||||||
@ -171,25 +185,25 @@ void parse_raw(scan_raw_ctx_t *ctx, vfile_t *f, document_t *doc) {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
int errc = 0;
|
|
||||||
libraw_processed_image_t *thumb = libraw_dcraw_make_mem_thumb(libraw_lib, &errc);
|
|
||||||
if (errc != 0) {
|
|
||||||
free(buf);
|
|
||||||
libraw_dcraw_clear_mem(thumb);
|
|
||||||
libraw_close(libraw_lib);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
int tn_ok = 0;
|
int tn_ok = 0;
|
||||||
|
|
||||||
if (libraw_lib->thumbnail.tformat == LIBRAW_THUMBNAIL_JPEG) {
|
if (libraw_lib->thumbnail.tformat == LIBRAW_THUMBNAIL_JPEG) {
|
||||||
tn_ok = store_thumbnail_jpeg(ctx, thumb, doc);
|
tn_ok = store_thumbnail_jpeg(ctx, libraw_lib->thumbnail, doc);
|
||||||
} else if (libraw_lib->thumbnail.tformat == LIBRAW_THUMBNAIL_BITMAP) {
|
} else if (libraw_lib->thumbnail.tformat == LIBRAW_THUMBNAIL_BITMAP) {
|
||||||
// TODO: technically this should work but is currently untested
|
// TODO: technically this should work but is currently untested
|
||||||
|
|
||||||
|
int errc = 0;
|
||||||
|
libraw_processed_image_t *thumb = libraw_dcraw_make_mem_thumb(libraw_lib, &errc);
|
||||||
|
if (errc != 0) {
|
||||||
|
free(buf);
|
||||||
|
libraw_dcraw_clear_mem(thumb);
|
||||||
|
libraw_close(libraw_lib);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
tn_ok = store_thumbnail_rgb24(ctx, thumb, doc);
|
tn_ok = store_thumbnail_rgb24(ctx, thumb, doc);
|
||||||
}
|
}
|
||||||
|
|
||||||
libraw_dcraw_clear_mem(thumb);
|
|
||||||
|
|
||||||
if (tn_ok == TRUE) {
|
if (tn_ok == TRUE) {
|
||||||
free(buf);
|
free(buf);
|
||||||
libraw_close(libraw_lib);
|
libraw_close(libraw_lib);
|
||||||
@ -206,7 +220,7 @@ void parse_raw(scan_raw_ctx_t *ctx, vfile_t *f, document_t *doc) {
|
|||||||
|
|
||||||
libraw_dcraw_process(libraw_lib);
|
libraw_dcraw_process(libraw_lib);
|
||||||
|
|
||||||
errc = 0;
|
int errc = 0;
|
||||||
libraw_processed_image_t *img = libraw_dcraw_make_mem_image(libraw_lib, &errc);
|
libraw_processed_image_t *img = libraw_dcraw_make_mem_image(libraw_lib, &errc);
|
||||||
if (errc != 0) {
|
if (errc != 0) {
|
||||||
free(buf);
|
free(buf);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user