Compare commits

...

1 Commits

Author SHA1 Message Date
9e51d55ca1 Update readme 2019-10-26 15:48:34 -04:00
5 changed files with 48 additions and 35 deletions

View File

@ -5,19 +5,40 @@
sist2 (Simple incremental search tool)
*Warning: sist2 is in early development*
## Features
* Fast, low memory usage
* Portable (all its features are packaged in a single executable)
* Extracts text from common file types\*
* Generates thumbnails\*
* Incremental scanning
\* See [format support](#format-support)
## Getting Started
1. Have an [Elasticsearch](https://www.elastic.co/downloads/elasticsearch) instance running
1. Download the [latest sist2 release](https://github.com/simon987/sist2/releases)
*Windows users*: `sist2` runs under [WSL](https://en.wikipedia.org/wiki/Windows_Subsystem_for_Linux)
*Mac users*: See [#1](https://github.com/simon987/sist2/issues/1)
## Example usage
![demo](demo.gif)
See help page `sist2 --help` for more details.
**Scan a directory**
```bash
sist2 scan ~/Documents -o ./orig_idx/
sist2 scan --threads 4 --content-size 16384 /mnt/Pictures
sist2 scan -i ./orig_idx/ -o ./updated_idx/ ~/Documents
sist2 scan --incremental ./orig_idx/ -o ./updated_idx/ ~/Documents
```
**Push index to Elasticsearch or file**
@ -46,22 +67,11 @@ docx, xlsx, pptx | | *planned* | no | *planned* |
## Installation
1. Download runtime dependencies
`apt install curl bzip2`
1. Download binary
Get [the latest release](https://github.com/simon987/sist2/releases) from GitHub
1. (Optional) Add to search path
`mv sist2 /usr/bin/`
## Build from source
You can compile **sist2** by yourself if you don't want to use the pre-compiled
binaries.
1. Install compile-time dependencies
*(Debian)*
@ -70,12 +80,7 @@ docx, xlsx, pptx | | *planned* | no | *planned* |
libssl-dev uuid-dev libavformat-dev libswscale-dev \
python3 libmagic-dev libfreetype6-dev libcurl-dev \
libbz2-dev yasm
```
*(Archlinux)*
```bash
pacman -S git ffmpeg pkg-config cmake openssl curl \
bzip2 yasm libutil-linux
```
2. Build
```bash
git clone --recurse-submodules https://github.com/simon987/sist2

BIN
demo.gif Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 18 MiB

View File

@ -37,5 +37,4 @@ struct {
} WebCtx;
#endif

View File

@ -94,7 +94,7 @@ void parse(void *arg) {
if (!(SHOULD_PARSE(doc.mime))) {
} else if ((mmime == MimeVideo && doc.size >= MIN_VIDEO_SIZE) || mmime == MimeAudio || mmime == MimeImage) {
// parse_media(job->filepath, &doc);
parse_media(job->filepath, &doc);
} else if (IS_PDF(doc.mime)) {
void *pdf_buf = read_all(job, (char *) buf, bytes_read, &fd);
@ -105,15 +105,15 @@ void parse(void *arg) {
}
} else if (mmime == MimeText && ScanCtx.content_size > 0) {
// parse_text(bytes_read, &fd, (char *) buf, &doc);
parse_text(bytes_read, &fd, (char *) buf, &doc);
} else if (IS_FONT(doc.mime)) {
// void *font_buf = read_all(job, (char *) buf, bytes_read, &fd);
// parse_font(font_buf, doc.size, &doc);
//
// if (font_buf != buf) {
// free(font_buf);
// }
void *font_buf = read_all(job, (char *) buf, bytes_read, &fd);
parse_font(font_buf, doc.size, &doc);
if (font_buf != buf) {
free(font_buf);
}
}
write_document(&doc);

View File

@ -1,7 +1,7 @@
#include <src/ctx.h>
#include "pdf.h"
#include "src/ctx.h"
__always_inline
fz_page *render_cover(fz_context *ctx, document_t *doc, fz_document *fzdoc) {
fz_page *cover = fz_load_page(ctx, fzdoc, 0);
@ -25,8 +25,12 @@ fz_page *render_cover(fz_context *ctx, document_t *doc, fz_document *fzdoc) {
fz_device *dev = fz_new_draw_device(ctx, m, pixmap);
pthread_mutex_lock(&ScanCtx.mupdf_mu);
fz_run_page(ctx, cover, dev, fz_identity, NULL);
pthread_mutex_unlock(&ScanCtx.mupdf_mu);
fz_try(ctx)
fz_run_page(ctx, cover, dev, fz_identity, NULL);
fz_always(ctx)
pthread_mutex_unlock(&ScanCtx.mupdf_mu);
fz_catch(ctx)
fz_rethrow(ctx);
fz_drop_device(ctx, dev);
@ -72,6 +76,7 @@ void parse_pdf(void *buf, size_t buf_len, document_t *doc) {
fzdoc = fz_open_document_with_stream(ctx, mime_get_mime_text(doc->mime), stream);
int page_count = fz_count_pages(ctx, fzdoc);
fz_page *cover = render_cover(ctx, doc, fzdoc);
fz_stext_options opts;
@ -90,8 +95,12 @@ void parse_pdf(void *buf, size_t buf_len, document_t *doc) {
fz_device *dev = fz_new_stext_device(ctx, stext, &opts);
pthread_mutex_lock(&ScanCtx.mupdf_mu);
fz_run_page_contents(ctx, page, dev, fz_identity, NULL);
pthread_mutex_unlock(&ScanCtx.mupdf_mu);
fz_try(ctx)
fz_run_page_contents(ctx, page, dev, fz_identity, NULL);
fz_always(ctx)
pthread_mutex_unlock(&ScanCtx.mupdf_mu);
fz_catch(ctx)
fz_rethrow(ctx);
fz_drop_device(ctx, dev);