Update readme

Handle GPS metadata in the UI
Fix meta_key UB problem
2025-12-14 15:59:03 +00:00 · 2021-06-11 20:44:47 -04:00 · 2021-06-11 20:41:05 -04:00 · 2021-06-11 20:19:36 -04:00
9 changed files with 99 additions and 44 deletions
--- a/README.md
+++ b/README.md
@@ -25,14 +25,12 @@ sist2 (Simple incremental search tool)
 * OCR support with tesseract \*\*\*
 * Stats page & disk utilisation visualization
 \* See [format support](#format-support)    
 \*\* See [Archive files](#archive-files)    
 \*\*\* See [OCR](#ocr)
 ![stats](docs/stats.png)
 ## Getting Started
 1. Have an Elasticsearch (>= 6.X.X) instance running
@@ -57,10 +55,8 @@ sist2 (Simple incremental search tool)
 1. See [Usage guide](docs/USAGE.md)
 \* *Windows users*: **sist2** runs under [WSL](https://en.wikipedia.org/wiki/Windows_Subsystem_for_Linux)
 ## Example usage
 See [Usage guide](docs/USAGE.md) for more details
@@ -69,7 +65,6 @@ See [Usage guide](docs/USAGE.md) for more details
 1. Push index to Elasticsearch: `sist2 index ./docs_idx`
 1. Start web interface: `sist2 web ./docs_idx`
 ## Format support
 File type | Library | Content | Thumbnail | Metadata
@@ -78,8 +73,8 @@ pdf,xps,fb2,epub | MuPDF | text+ocr | yes | author, title |
 cbz,cbr | *(none)* | - | yes | - |
 `audio/*` | ffmpeg | - | yes | ID3 tags |
 `video/*` | ffmpeg | - | yes | title, comment, artist |
-`image/*` | ffmpeg | - | yes | [Common EXIF tags](https://github.com/simon987/sist2/blob/efdde2734eca9b14a54f84568863b7ffd59bdba3/src/parsing/media.c#L190) |
+`image/*` | ffmpeg | - | yes | [Common EXIF tags](https://github.com/simon987/sist2/blob/efdde2734eca9b14a54f84568863b7ffd59bdba3/src/parsing/media.c#L190), GPS tags |
-raw, rw2, dng, cr2, crw, dcr, k25, kdc, mrw, pef, xf3, arw, sr2, srf, erf  | LibRaw | - | yes | Common EXIF tags |
+raw, rw2, dng, cr2, crw, dcr, k25, kdc, mrw, pef, xf3, arw, sr2, srf, erf  | LibRaw | - | yes | Common EXIF tags, GPS tags |
 ttf,ttc,cff,woff,fnt,otf | Freetype2 | - | yes, `bmp` | Name & style |
 `text/plain` | *(none)* | yes | no | - |
 html, xml | *(none)* | yes | no | - |
@@ -91,38 +86,37 @@ mobi, azw, azw3 | libmobi | yes | no | author, title |
 \* *See [Archive files](#archive-files)*
 ### Archive files
-**sist2** will scan files stored into archive files (zip, tar, 7z...) as if
+
-they were directly in the file system. Recursive (archives inside archives)
+**sist2** will scan files stored into archive files (zip, tar, 7z...) as if they were directly in the file system.
 Recursive (archives inside archives)
 scan is also supported.
 **Limitations**:
 * Support for parsing media files with formats that require *seek* (e.g. `.gif`, `.mp4` w/ fragmented metadata etc.)
  is limitted (see `--mem-buffer` option)
 * Archive files are scanned sequentially, by a single thread. On systems where
-**sist2** is not I/O bound, scans might be faster when larger archives are split
+  **sist2** is not I/O bound, scans might be faster when larger archives are split into smaller parts.
 into smaller parts.
 ### OCR
 You can enable OCR support for pdf,xps,fb2,epub file types with the
-`--ocr <lang>` option. Download the language data files with your
+`--ocr <lang>` option. Download the language data files with your package manager (`apt install tesseract-ocr-eng`) or
-package manager (`apt install tesseract-ocr-eng`) or directly [from Github](https://github.com/tesseract-ocr/tesseract/wiki/Data-Files).
+directly [from Github](https://github.com/tesseract-ocr/tesseract/wiki/Data-Files).
 The `simon987/sist2` image comes with common languages
 (hin, jpn, eng, fra, rus, spa) pre-installed.
 Examples
 ```bash
 sist2 scan --ocr jpn ~/Books/Manga/
 sist2 scan --ocr eng ~/Books/Textbooks/
 ```
 ## Build from source
-You can compile **sist2** by yourself if you don't want to use the pre-compiled
+You can compile **sist2** by yourself if you don't want to use the pre-compiled binaries (GCC 7+ required).
 binaries (GCC 7+ required).
 1. Install compile-time dependencies
--- a/schema/mappings.json
+++ b/schema/mappings.json
@@ -165,6 +165,30 @@
    "exif_user_comment": {
      "type": "text"
    },
    "exif_gps_longitude_ref": {
      "type": "keyword",
      "index": false
    },
    "exif_gps_longitude_dms": {
      "type": "keyword",
      "index": false
    },
    "exif_gps_longitude_dec": {
      "type": "keyword",
      "index": false
    },
    "exif_gps_latitude_ref": {
      "type": "keyword",
      "index": false
    },
    "exif_gps_latitude_dms": {
      "type": "keyword",
      "index": false
    },
    "exif_gps_latitude_dec": {
      "type": "keyword",
      "index": false
    },
    "author": {
      "type": "text"
    },
--- a/src/index/static_generated.c
+++ b/src/index/static_generated.c
--- a/src/io/serialize.c
+++ b/src/io/serialize.c
@@ -18,7 +18,7 @@ typedef struct {
 #define META_NEXT 0xFFFF
 void skip_meta(FILE *file) {
-    enum metakey key;
+    enum metakey key = 0;
    fread(&key, sizeof(uint16_t), 1, file);
    while (key != META_NEXT) {
@@ -237,7 +237,7 @@ void read_index_bin(const char *path, const char *index_id, index_func func) {
    FILE *file = fopen(path, "rb");
    while (TRUE) {
        buf.cur = 0;
-        size_t _ = fread((void *) &line, 1, sizeof(line_t), file);
+        size_t _ = fread((void *) &line, sizeof(line_t), 1, file);
        if (feof(file)) {
            break;
        }
@@ -284,8 +284,8 @@ void read_index_bin(const char *path, const char *index_id, index_func func) {
            cJSON_AddStringToObject(document, "path", "");
        }
-        enum metakey key;
+        enum metakey key = 0;
-        fread(&key, sizeof(short), 1, file);
+        fread(&key, sizeof(uint16_t), 1, file);
        size_t ret;
        while (key != META_NEXT) {
            switch (key) {
@@ -481,7 +481,7 @@ void incremental_read(GHashTable *table, const char *filepath) {
        incremental_put(table, line.path_md5, line.mtime);
-        while ((getc(file))) {}
+        while ((getc(file)) != 0) {}
        skip_meta(file);
    }
    fclose(file);
@@ -531,7 +531,7 @@ void incremental_copy(store_t *store, store_t *dst_store, const char *filepath,
                free(buf);
            }
-            enum metakey key;
+            enum metakey key = 0;
            while (1) {
                fread(&key, sizeof(uint16_t), 1, file);
                fwrite(&key, sizeof(uint16_t), 1, dst_file);
--- a/src/static/js/dom.js
+++ b/src/static/js/dom.js
@@ -192,6 +192,19 @@ function makeUserTag(tag, hit) {
    return userTag;
 }
 function makeGpsMetaRow(tbody, latitude, longitude) {
    tbody.append($("<tr>")
        .append($("<td>").text("Exif GPS"))
        .append($("<td>")
            .append($("<a>")
                .text(`${latitude}, ${longitude}`)
                .attr("href", `https://maps.google.com/?q=${latitude},${longitude}&ll=${latitude},${longitude}&t=k&z=17`)
                .attr("target", "_blank")
            )
        )
    );
 }
 function infoButtonCb(hit) {
    return () => {
        getDocumentInfo(hit["_id"]).then(doc => {
@@ -229,13 +242,25 @@ function infoButtonCb(hit) {
                    .text(new Date(doc["mtime"] * 1000).toISOString().split(".")[0].replace("T", " "))
                    .attr("title", doc["mtime"]))
            );
            // Exif GPS
            if ("exif_gps_longitude_dec" in doc) {
                makeGpsMetaRow(tbody, doc["exif_gps_latitude_dec"], doc["exif_gps_longitude_dec"])
            } else if ("exif_gps_longitude_dms" in doc) {
                makeGpsMetaRow(
                    tbody,
                    dmsToDecimal(doc["exif_gps_latitude_dms"], doc["exif_gps_latitude_ref"]),
                    dmsToDecimal(doc["exif_gps_longitude_dms"], doc["exif_gps_longitude_ref"]),
                )
            }
            const displayFields = new Set([
                "mime", "size", "path", "title", "width", "height", "duration", "audioc", "videoc",
                "bitrate", "artist", "album", "album_artist", "genre", "title", "font_name", "tag", "author",
                "modified_by", "pages"
            ]);
            Object.keys(doc)
-                .filter(key => key.startsWith("_keyword.") || key.startsWith("_text.") || displayFields.has(key) || key.startsWith("exif_"))
+                .filter(key => key.startsWith("_keyword.") || key.startsWith("_text.") || displayFields.has(key) || (key.startsWith("exif_") && !key.includes("gps")))
                .forEach(key => {
                    tbody.append($("<tr>")
                        .append($("<td>").text(key))
@@ -352,7 +377,7 @@ function createDocCard(hit) {
            audio.setAttribute("src", "f/" + hit["_id"]);
            audio.addEventListener("play", () => {
                // Pause all currently playing audio tags
-                $("audio").each(function(){
+                $("audio").each(function () {
                    if (this !== audio) {
                        this.pause();
                    }
--- a/src/static/js/util.js
+++ b/src/static/js/util.js
@@ -236,3 +236,13 @@ function updateColumnStyle() {
        `
    }
 }
 function dmsToDecimal(dms, ref) {
    const tokens = dms.split(",")
    const d = Number(tokens[0].trim().split(":")[0]) / Number(tokens[0].trim().split(":")[1])
    const m = Number(tokens[1].trim().split(":")[0]) / Number(tokens[1].trim().split(":")[1])
    const s = Number(tokens[2].trim().split(":")[0]) / Number(tokens[2].trim().split(":")[1])
    return (d + (m / 60) + (s / 3600)) * (ref === "S" || ref === "W" ? -1 : 1)
 }
--- a/src/web/static_generated.c
+++ b/src/web/static_generated.c
--- a/tests/test_scan.py
+++ b/tests/test_scan.py
@@ -17,17 +17,19 @@ def copy_files(files):
 def sist2(*args):
    print("./sist2 " + " ".join(args))
    return subprocess.check_output(
-        args=["./sist2_debug", *args],
+        args=["./sist2", *args],
    )
 def sist2_index(files, *args):
    path = copy_files(files)
-    shutil.rmtree("i", ignore_errors=True)
+    shutil.rmtree("test_i", ignore_errors=True)
-    sist2("scan", path, "-o", "i", *args)
+    sist2("scan", path, "-o", "test_i", *args)
-    return iter(sist2_index_to_dict("i"))
+    return iter(sist2_index_to_dict("test_i"))
 def sist2_incremental_index(files, func=None, *args):
@@ -36,14 +38,14 @@ def sist2_incremental_index(files, func=None, *args):
    if func:
        func(path)
-    shutil.rmtree("i_inc", ignore_errors=True)
+    shutil.rmtree("test_i_inc", ignore_errors=True)
-    sist2("scan", path, "-o", "i_inc", "--incremental", "i", *args)
+    sist2("scan", path, "-o", "test_i_inc", "--incremental", "test_i", *args)
-    return iter(sist2_index_to_dict("i_inc"))
+    return iter(sist2_index_to_dict("test_i_inc"))
 def sist2_index_to_dict(index):
    res = subprocess.check_output(
-        args=["./sist2_debug", "index", "--print", index],
+        args=["./sist2", "index", "--print", index],
    )
    for line in res.splitlines():
--- a/third-party/libscan
+++ b/third-party/libscan
Author	SHA1	Message	Date
simon987	7c46ad632a	Update readme	2021-06-11 20:44:47 -04:00
simon987	5b8c13fd13	Handle GPS metadata in the UI	2021-06-11 20:41:05 -04:00
simon987	efa4a06e56	Fix meta_key UB problem	2021-06-11 20:19:36 -04:00