mirror of
				https://github.com/simon987/sist2.git
				synced 2025-10-31 16:06:53 +00:00 
			
		
		
		
	
						commit
						65c499e477
					
				
							
								
								
									
										30
									
								
								.drone.yml
									
									
									
									
									
								
							
							
						
						
									
										30
									
								
								.drone.yml
									
									
									
									
									
								
							| @ -11,21 +11,6 @@ steps: | ||||
|     image: simon987/sist2-build | ||||
|     commands: | ||||
|       - ./scripts/build.sh | ||||
|   - name: docker | ||||
|     image: plugins/docker | ||||
|     settings: | ||||
|       username: | ||||
|         from_secret: DOCKER_USER | ||||
|       password: | ||||
|         from_secret: DOCKER_PASSWORD | ||||
|       repo: simon987/sist2 | ||||
|       context: ./ | ||||
|       dockerfile: ./Dockerfile | ||||
|       auto_tag: true | ||||
|       auto_tag_suffix: x64-linux | ||||
|       when: | ||||
|         event: | ||||
|           - tag | ||||
|   - name: scp files | ||||
|     image: appleboy/drone-scp | ||||
|     settings: | ||||
| @ -42,6 +27,21 @@ steps: | ||||
|         - ./VERSION | ||||
|         - ./sist2-x64-linux | ||||
|         - ./sist2-x64-linux-debug | ||||
|   - name: docker | ||||
|     image: plugins/docker | ||||
|     settings: | ||||
|       username: | ||||
|         from_secret: DOCKER_USER | ||||
|       password: | ||||
|         from_secret: DOCKER_PASSWORD | ||||
|       repo: simon987/sist2 | ||||
|       context: ./ | ||||
|       dockerfile: ./Dockerfile | ||||
|       auto_tag: true | ||||
|       auto_tag_suffix: x64-linux | ||||
|       when: | ||||
|         event: | ||||
|           - tag | ||||
| 
 | ||||
| --- | ||||
| kind: pipeline | ||||
|  | ||||
							
								
								
									
										4
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										4
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							| @ -23,4 +23,6 @@ git_hash.h | ||||
| Testing/ | ||||
| test_i | ||||
| test_i_inc | ||||
| node_modules/ | ||||
| node_modules/ | ||||
| .cmake/ | ||||
| i_inc/ | ||||
							
								
								
									
										3
									
								
								.gitmodules
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										3
									
								
								.gitmodules
									
									
									
									
										vendored
									
									
								
							| @ -1,6 +1,3 @@ | ||||
| [submodule "third-party/libscan"] | ||||
| 	path = third-party/libscan | ||||
| 	url = https://github.com/simon987/libscan | ||||
| [submodule "third-party/argparse"] | ||||
| 	path = third-party/argparse | ||||
| 	url = https://github.com/simon987/argparse | ||||
|  | ||||
| @ -22,9 +22,6 @@ add_subdirectory(third-party/argparse) | ||||
| 
 | ||||
| add_executable(sist2 | ||||
| 
 | ||||
|         # argparse | ||||
|         third-party/argparse/argparse.h third-party/argparse/argparse.c | ||||
| 
 | ||||
|         src/main.c | ||||
|         src/sist.h | ||||
|         src/io/walk.h src/io/walk.c | ||||
| @ -41,7 +38,11 @@ add_executable(sist2 | ||||
|         src/log.c src/log.h | ||||
|         src/cli.c src/cli.h | ||||
|         src/stats.c src/stats.h src/ctx.c | ||||
|         src/parsing/sidecar.c src/parsing/sidecar.h) | ||||
|         src/parsing/sidecar.c src/parsing/sidecar.h | ||||
| 
 | ||||
|         # argparse | ||||
|         third-party/argparse/argparse.h third-party/argparse/argparse.c | ||||
|         ) | ||||
| 
 | ||||
| target_link_directories(sist2 PRIVATE BEFORE ${_VCPKG_INSTALLED_DIR}/${VCPKG_TARGET_TRIPLET}/lib/) | ||||
| set(CMAKE_FIND_LIBRARY_SUFFIXES .a .lib) | ||||
| @ -86,6 +87,7 @@ if (SIST_DEBUG) | ||||
|             sist2 | ||||
|             PRIVATE | ||||
|             -fsanitize=address | ||||
|             -static-libasan | ||||
|     ) | ||||
|     set_target_properties( | ||||
|             sist2 | ||||
|  | ||||
							
								
								
									
										56
									
								
								README.md
									
									
									
									
									
								
							
							
						
						
									
										56
									
								
								README.md
									
									
									
									
									
								
							| @ -51,7 +51,7 @@ sist2 (Simple incremental search tool) | ||||
|     1. Download the [latest sist2 release](https://github.com/simon987/sist2/releases) * | ||||
|     1. *(or)* Download a [development snapshot](https://files.simon987.net/.gate/sist2/simon987_sist2/) *(Not | ||||
|        recommended!)* | ||||
|     1. *(or)* `docker pull simon987/sist2:2.11.4-x64-linux` | ||||
|     1. *(or)* `docker pull simon987/sist2:2.11.6-x64-linux` | ||||
| 
 | ||||
| 1. See [Usage guide](docs/USAGE.md) | ||||
| 
 | ||||
| @ -67,23 +67,23 @@ See [Usage guide](docs/USAGE.md) for more details | ||||
| 
 | ||||
| ## Format support | ||||
| 
 | ||||
| File type | Library | Content | Thumbnail | Metadata | ||||
| :---|:---|:---|:---|:--- | ||||
| pdf,xps,fb2,epub | MuPDF | text+ocr | yes | author, title | | ||||
| cbz,cbr | [libscan](https://github.com/simon987/libscan) | - | yes | - | | ||||
| `audio/*` | ffmpeg | - | yes | ID3 tags | | ||||
| `video/*` | ffmpeg | - | yes | title, comment, artist | | ||||
| `image/*` | ffmpeg | - | yes | [Common EXIF tags](https://github.com/simon987/sist2/blob/efdde2734eca9b14a54f84568863b7ffd59bdba3/src/parsing/media.c#L190), GPS tags | | ||||
| raw, rw2, dng, cr2, crw, dcr, k25, kdc, mrw, pef, xf3, arw, sr2, srf, erf  | LibRaw | - | yes | Common EXIF tags, GPS tags | | ||||
| ttf,ttc,cff,woff,fnt,otf | Freetype2 | - | yes, `bmp` | Name & style | | ||||
| `text/plain` | [libscan](https://github.com/simon987/libscan) | yes | no | - | | ||||
| html, xml | [libscan](https://github.com/simon987/libscan) | yes | no | - | | ||||
| tar, zip, rar, 7z, ar ...  | Libarchive | yes\* | - | no | | ||||
| docx, xlsx, pptx | [libscan](https://github.com/simon987/libscan) | yes | if embedded | creator, modified_by, title | | ||||
| doc (MS Word 97-2003) | antiword | yes | yes | author, title | | ||||
| mobi, azw, azw3 | libmobi | yes | no | author, title | | ||||
| wpd (WordPerfect) | libwpd | yes | no | *planned* | | ||||
| json, jsonl, ndjson | [libscan](https://github.com/simon987/libscan) | yes | - | - | | ||||
| | File type                                                                 | Library                                                                      | Content  | Thumbnail   | Metadata                                                                                                                               | | ||||
| |:--------------------------------------------------------------------------|:-----------------------------------------------------------------------------|:---------|:------------|:---------------------------------------------------------------------------------------------------------------------------------------| | ||||
| | pdf,xps,fb2,epub                                                          | MuPDF                                                                        | text+ocr | yes         | author, title                                                                                                                          | | ||||
| | cbz,cbr                                                                   | [libscan](https://github.com/simon987/sist2/tree/master/third-party/libscan) | -        | yes         | -                                                                                                                                      | | ||||
| | `audio/*`                                                                 | ffmpeg                                                                       | -        | yes         | ID3 tags                                                                                                                               | | ||||
| | `video/*`                                                                 | ffmpeg                                                                       | -        | yes         | title, comment, artist                                                                                                                 | | ||||
| | `image/*`                                                                 | ffmpeg                                                                       | -        | yes         | [Common EXIF tags](https://github.com/simon987/sist2/blob/efdde2734eca9b14a54f84568863b7ffd59bdba3/src/parsing/media.c#L190), GPS tags | | ||||
| | raw, rw2, dng, cr2, crw, dcr, k25, kdc, mrw, pef, xf3, arw, sr2, srf, erf | LibRaw                                                                       | -        | yes         | Common EXIF tags, GPS tags                                                                                                             | | ||||
| | ttf,ttc,cff,woff,fnt,otf                                                  | Freetype2                                                                    | -        | yes, `bmp`  | Name & style                                                                                                                           | | ||||
| | `text/plain`                                                              | [libscan](https://github.com/simon987/sist2/tree/master/third-party/libscan) | yes      | no          | -                                                                                                                                      | | ||||
| | html, xml                                                                 | [libscan](https://github.com/simon987/sist2/tree/master/third-party/libscan) | yes      | no          | -                                                                                                                                      | | ||||
| | tar, zip, rar, 7z, ar ...                                                 | Libarchive                                                                   | yes\*    | -           | no                                                                                                                                     | | ||||
| | docx, xlsx, pptx                                                          | [libscan](https://github.com/simon987/sist2/tree/master/third-party/libscan) | yes      | if embedded | creator, modified_by, title                                                                                                            | | ||||
| | doc (MS Word 97-2003)                                                     | antiword                                                                     | yes      | yes         | author, title                                                                                                                          | | ||||
| | mobi, azw, azw3                                                           | libmobi                                                                      | yes      | no          | author, title                                                                                                                          | | ||||
| | wpd (WordPerfect)                                                         | libwpd                                                                       | yes      | no          | *planned*                                                                                                                              | | ||||
| | json, jsonl, ndjson                                                       | [libscan](https://github.com/simon987/sist2/tree/master/third-party/libscan) | yes      | -           | -                                                                                                                                      | | ||||
| 
 | ||||
| \* *See [Archive files](#archive-files)* | ||||
| 
 | ||||
| @ -102,18 +102,24 @@ scan is also supported. | ||||
| 
 | ||||
| ### OCR | ||||
| 
 | ||||
| You can enable OCR support for pdf,xps,fb2,epub file types with the | ||||
| `--ocr <lang>` option. Download the language data files with your package manager (`apt install tesseract-ocr-eng`) or | ||||
| You can enable OCR support for ebook (pdf,xps,fb2,epub) or image file types with the | ||||
| `--ocr-lang <lang>` option in combination with `--ocr-images` and/or `--ocr-ebooks`. | ||||
| Download the language data files with your package manager (`apt install tesseract-ocr-eng`) or | ||||
| directly [from Github](https://github.com/tesseract-ocr/tesseract/wiki/Data-Files). | ||||
| 
 | ||||
| The `simon987/sist2` image comes with common languages | ||||
| (hin, jpn, eng, fra, rus, spa) pre-installed. | ||||
| 
 | ||||
| Examples | ||||
| You can use the `+` separator to specify multiple languages. The language | ||||
| name must be identical to the `*.traineddata` file installed on your system  | ||||
| (use `chi_sim` rather than `chi-sim`). | ||||
| 
 | ||||
| Examples: | ||||
| 
 | ||||
| ```bash | ||||
| sist2 scan --ocr jpn ~/Books/Manga/ | ||||
| sist2 scan --ocr eng ~/Books/Textbooks/ | ||||
| sist2 scan --ocr-ebooks --ocr-lang jpn ~/Books/Manga/ | ||||
| sist2 scan --ocr-images --ocr-lang eng ~/Images/Screenshots/ | ||||
| sist2 scan --ocr-ebooks --ocr-images --ocr-lang eng+chi_sim ~/Chinese-Bilingual/ | ||||
| ``` | ||||
| 
 | ||||
| ## Build from source | ||||
| @ -126,7 +132,7 @@ You can compile **sist2** by yourself if you don't want to use the pre-compiled | ||||
| git clone --recursive https://github.com/simon987/sist2/ | ||||
| cd sist2 | ||||
| docker build . -f ./Dockerfile -t my-sist2-image | ||||
| docker run --rm my-sist2-image cat /root/sist2 > sist2-x64-linux | ||||
| docker run --rm --entrypoint cat my-sist2-image /root/sist2 > sist2-x64-linux | ||||
| ``` | ||||
| 
 | ||||
| ### On a linux computer | ||||
| @ -143,7 +149,7 @@ docker run --rm my-sist2-image cat /root/sist2 > sist2-x64-linux | ||||
| 
 | ||||
|     ```bash | ||||
|     vcpkg install curl[core,openssl] | ||||
|     vcpkg install lmdb cjson glib brotli libarchive[core,bzip2,libxml2,lz4,lzma,lzo] pthread tesseract libxml2 libmupdf gtest mongoose libuuid libmagic libraw jasper lcms gumbo | ||||
|     vcpkg install lmdb cjson glib brotli libarchive[core,bzip2,libxml2,lz4,lzma,lzo] pthread tesseract libxml2 libmupdf gtest mongoose libmagic libraw jasper lcms gumbo | ||||
|     ``` | ||||
| 
 | ||||
| 1. Build | ||||
|  | ||||
| @ -43,7 +43,7 @@ Scan options | ||||
|     --depth=<int>                 Scan up to DEPTH subdirectories deep. Use 0 to only scan files in PATH. DEFAULT: -1 | ||||
|     --archive=<str>               Archive file mode (skip|list|shallow|recurse). skip: Don't parse, list: only get file names as text, shallow: Don't parse archives inside archives. DEFAULT: recurse | ||||
|     --archive-passphrase=<str>    Passphrase for encrypted archive files | ||||
|     --ocr=<str>                   Tesseract language (use tesseract --list-langs to see which are installed on your machine) | ||||
|     # TODO: add new --ocr-* options here | ||||
|     -e, --exclude=<str>           Files that match this regex will not be scanned | ||||
|     --fast                        Only index file names & mime type | ||||
|     --treemap-threshold=<str>     Relative size threshold for treemap (see USAGE.md). DEFAULT: 0.0005 | ||||
|  | ||||
| @ -6,5 +6,4 @@ python3 scripts/mime.py > src/parsing/mime_generated.c | ||||
| python3 scripts/serve_static.py > src/web/static_generated.c | ||||
| python3 scripts/index_static.py > src/index/static_generated.c | ||||
| 
 | ||||
| printf "static const char *const Sist2CommitHash = \"%s\";\n" $(git rev-parse HEAD) > src/git_hash.h | ||||
| printf "static const char *const LibScanCommitHash = \"%s\";\n" $(cd third-party/libscan/ && git rev-parse HEAD) >> src/git_hash.h | ||||
| printf "static const char *const Sist2CommitHash = \"%s\";\n" $(git rev-parse HEAD) > src/git_hash.h | ||||
							
								
								
									
										9
									
								
								sist2-vue/dist/css/chunk-vendors.css
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										9
									
								
								sist2-vue/dist/css/chunk-vendors.css
									
									
									
									
										vendored
									
									
								
							
										
											
												File diff suppressed because one or more lines are too long
											
										
									
								
							
							
								
								
									
										1
									
								
								sist2-vue/dist/css/index.css
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										1
									
								
								sist2-vue/dist/css/index.css
									
									
									
									
										vendored
									
									
								
							
										
											
												File diff suppressed because one or more lines are too long
											
										
									
								
							
							
								
								
									
										4
									
								
								sist2-vue/dist/js/chunk-vendors.js
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										4
									
								
								sist2-vue/dist/js/chunk-vendors.js
									
									
									
									
										vendored
									
									
								
							
										
											
												File diff suppressed because one or more lines are too long
											
										
									
								
							
							
								
								
									
										2
									
								
								sist2-vue/dist/js/index.js
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								sist2-vue/dist/js/index.js
									
									
									
									
										vendored
									
									
								
							
										
											
												File diff suppressed because one or more lines are too long
											
										
									
								
							| @ -256,20 +256,31 @@ class Sist2Api { | ||||
|         }); | ||||
|     } | ||||
| 
 | ||||
|     getMimeTypes() { | ||||
|         return this.esQuery({ | ||||
|             aggs: { | ||||
|                 mimeTypes: { | ||||
|                     terms: { | ||||
|                         field: "mime", | ||||
|                         size: 10000 | ||||
|                     } | ||||
|     getMimeTypes(query = undefined) { | ||||
|         const AGGS = { | ||||
|             mimeTypes: { | ||||
|                 terms: { | ||||
|                     field: "mime", | ||||
|                     size: 10000 | ||||
|                 } | ||||
|             }, | ||||
|             size: 0, | ||||
|         }).then(resp => { | ||||
|             } | ||||
|         }; | ||||
| 
 | ||||
|         if (!query) { | ||||
|             query = { | ||||
|                 aggs: AGGS, | ||||
|                 size: 0, | ||||
|             }; | ||||
|         } else { | ||||
|             query.size = 0; | ||||
|             query.aggs = AGGS; | ||||
|         } | ||||
| 
 | ||||
|         return this.esQuery(query).then(resp => { | ||||
|             const mimeMap: any[] = []; | ||||
|             resp["aggregations"]["mimeTypes"]["buckets"].sort((a: any, b: any) => a.key > b.key).forEach((bucket: any) => { | ||||
|             const buckets = resp["aggregations"]["mimeTypes"]["buckets"]; | ||||
| 
 | ||||
|             buckets.sort((a: any, b: any) => a.key > b.key).forEach((bucket: any) => { | ||||
|                 const tmp = bucket["key"].split("/"); | ||||
|                 const category = tmp[0]; | ||||
|                 const mime = tmp[1]; | ||||
| @ -289,11 +300,18 @@ class Sist2Api { | ||||
|                 }); | ||||
| 
 | ||||
|                 if (!category_exists) { | ||||
|                     mimeMap.push({"text": category, children: [child]}); | ||||
|                     mimeMap.push({text: category, children: [child], id: category}); | ||||
|                 } | ||||
|             }) | ||||
| 
 | ||||
|             return mimeMap; | ||||
|             mimeMap.forEach(node => { | ||||
|                 if (node.children) { | ||||
|                     node.children.sort((a, b) => a.id.localeCompare(b.id)); | ||||
|                 } | ||||
|             }) | ||||
|             mimeMap.sort((a, b) => a.id.localeCompare(b.id)) | ||||
| 
 | ||||
|             return {buckets, mimeMap}; | ||||
|         }); | ||||
|     } | ||||
| 
 | ||||
|  | ||||
| @ -15,7 +15,7 @@ | ||||
| 
 | ||||
| <script> | ||||
| import IndexDebugInfo from "@/components/IndexDebugInfo"; | ||||
| import DebugIcon from "@/components/DebugIcon"; | ||||
| import DebugIcon from "@/components/icons/DebugIcon"; | ||||
| 
 | ||||
| export default { | ||||
|   name: "DebugInfo.vue", | ||||
| @ -27,7 +27,6 @@ export default { | ||||
|         {key: "platform", value: this.$store.state.sist2Info.platform}, | ||||
|         {key: "debugBinary", value: this.$store.state.sist2Info.debug}, | ||||
|         {key: "sist2CommitHash", value: this.$store.state.sist2Info.sist2Hash}, | ||||
|         {key: "libscanCommitHash", value: this.$store.state.sist2Info.libscanHash}, | ||||
|         {key: "esIndex", value: this.$store.state.sist2Info.esIndex}, | ||||
|         {key: "tagline", value: this.$store.state.sist2Info.tagline}, | ||||
|         {key: "dev", value: this.$store.state.sist2Info.dev}, | ||||
|  | ||||
| @ -34,9 +34,11 @@ | ||||
|           </svg> | ||||
|         </div> | ||||
| 
 | ||||
|         <img v-if="doc._props.isPlayableImage || doc._props.isPlayableVideo" | ||||
|         <img ref="tn" | ||||
|              v-if="doc._props.isPlayableImage || doc._props.isPlayableVideo" | ||||
|              :src="(doc._props.isGif && hover) ? `f/${doc._id}` : `t/${doc._source.index}/${doc._id}`" | ||||
|              alt="" | ||||
|              :style="{height: (doc._props.isGif && hover) ? `${tnHeight()}px` : undefined}" | ||||
|              class="pointer fit card-img-top" @click="onThumbnailClick()"> | ||||
|         <img v-else :src="`t/${doc._source.index}/${doc._id}`" alt="" | ||||
|              class="fit card-img-top"> | ||||
| @ -122,6 +124,9 @@ export default { | ||||
|     }, | ||||
|     onTnLeave() { | ||||
|       this.hover = false; | ||||
|     }, | ||||
|     tnHeight() { | ||||
|       return this.$refs.tn.height; | ||||
|     } | ||||
|   }, | ||||
| } | ||||
|  | ||||
| @ -1,5 +1,6 @@ | ||||
| <template> | ||||
|   <b-list-group-item class="flex-column align-items-start mb-2" :class="{'sub-document': doc._props.isSubDocument}"> | ||||
|   <b-list-group-item class="flex-column align-items-start mb-2" :class="{'sub-document': doc._props.isSubDocument}" | ||||
|                      @mouseenter="onTnEnter()" @mouseleave="onTnLeave()" > | ||||
| 
 | ||||
|     <!-- Info modal--> | ||||
|     <DocInfoModal :show="showInfo" :doc="doc" @close="showInfo = false"></DocInfoModal> | ||||
| @ -56,7 +57,7 @@ import TagContainer from "@/components/TagContainer"; | ||||
| import DocFileTitle from "@/components/DocFileTitle"; | ||||
| import DocInfoModal from "@/components/DocInfoModal"; | ||||
| import ContentDiv from "@/components/ContentDiv"; | ||||
| import FileIcon from "@/components/FileIcon"; | ||||
| import FileIcon from "@/components/icons/FileIcon"; | ||||
| 
 | ||||
| export default { | ||||
|   name: "DocListItem", | ||||
| @ -85,7 +86,13 @@ export default { | ||||
|         return this.doc.highlight["path.nGram"] + "/" | ||||
|       } | ||||
|       return this.doc._source.path + "/" | ||||
|     } | ||||
|     }, | ||||
|     onTnEnter() { | ||||
|       this.hover = true; | ||||
|     }, | ||||
|     onTnLeave() { | ||||
|       this.hover = false; | ||||
|     }, | ||||
|   } | ||||
| } | ||||
| </script> | ||||
|  | ||||
| @ -133,6 +133,11 @@ export default Vue.extend({ | ||||
|   font-size: 80%; | ||||
| } | ||||
| 
 | ||||
| .theme-black .version-badge { | ||||
|   color: #eee !important; | ||||
|   background: none; | ||||
| } | ||||
| 
 | ||||
| .version-badge { | ||||
|   color: #222 !important; | ||||
|   background: none; | ||||
|  | ||||
| @ -1,6 +1,5 @@ | ||||
| <template> | ||||
|   <div> | ||||
|     <!-- TODO: Set slideshowTime as a configurable option--> | ||||
|     <FsLightbox | ||||
|         :key="lightboxKey" | ||||
|         :toggler="showLightbox" | ||||
| @ -10,7 +9,7 @@ | ||||
|         :types="lightboxTypes" | ||||
|         :source-index="lightboxSlide" | ||||
|         :custom-toolbar-buttons="customButtons" | ||||
|         :slideshow-time="1000 * 10" | ||||
|         :slideshow-time="$store.getters.optLightboxSlideDuration * 1000" | ||||
|         :zoom-increment="0.5" | ||||
|         :load-only-current-source="$store.getters.optLightboxLoadOnlyCurrent" | ||||
|         :on-close="onClose" | ||||
|  | ||||
| @ -7,40 +7,24 @@ import InspireTree from "inspire-tree"; | ||||
| import InspireTreeDOM from "inspire-tree-dom"; | ||||
| 
 | ||||
| import "inspire-tree-dom/dist/inspire-tree-light.min.css"; | ||||
| import {getSelectedTreeNodes} from "@/util"; | ||||
| import {getSelectedTreeNodes, getTreeNodeAttributes} from "@/util"; | ||||
| import Sist2Api from "@/Sist2Api"; | ||||
| import Sist2Query from "@/Sist2Query"; | ||||
| 
 | ||||
| export default { | ||||
|   name: "MimePicker", | ||||
|   data() { | ||||
|     return { | ||||
|       mimeTree: null, | ||||
|       stashedMimeTreeAttributes: null | ||||
|     } | ||||
|   }, | ||||
|   mounted() { | ||||
|     this.$store.subscribe((mutation) => { | ||||
|       if (mutation.type === "setUiMimeMap") { | ||||
|         const mimeMap = mutation.payload.slice(); | ||||
| 
 | ||||
|         const elem = document.getElementById("mimeTree"); | ||||
|         console.log(elem); | ||||
| 
 | ||||
|         this.mimeTree = new InspireTree({ | ||||
|           selection: { | ||||
|             mode: 'checkbox' | ||||
|           }, | ||||
|           data: mimeMap | ||||
|         }); | ||||
|         new InspireTreeDOM(this.mimeTree, { | ||||
|           target: '#mimeTree' | ||||
|         }); | ||||
|         this.mimeTree.on("node.state.changed", this.handleTreeClick); | ||||
|         this.mimeTree.deselect(); | ||||
| 
 | ||||
|         if (this.$store.state._onLoadSelectedMimeTypes.length > 0) { | ||||
|           this.$store.state._onLoadSelectedMimeTypes.forEach(mime => { | ||||
|             this.mimeTree.node(mime).select(); | ||||
|           }); | ||||
|         } | ||||
|       if (mutation.type === "setUiMimeMap" && this.mimeTree === null) { | ||||
|         this.initializeTree(); | ||||
|       } else if (mutation.type === "busSearch") { | ||||
|         this.updateTree(); | ||||
|       } | ||||
|     }); | ||||
|   }, | ||||
| @ -52,6 +36,73 @@ export default { | ||||
| 
 | ||||
|       this.$store.commit("setSelectedMimeTypes", getSelectedTreeNodes(this.mimeTree)); | ||||
|     }, | ||||
|     updateTree() { | ||||
| 
 | ||||
|       if (this.$store.getters.optUpdateMimeMap === false) { | ||||
|         return; | ||||
|       } | ||||
| 
 | ||||
|       if (this.stashedMimeTreeAttributes === null) { | ||||
|         this.stashedMimeTreeAttributes = getTreeNodeAttributes(this.mimeTree); | ||||
|       } | ||||
| 
 | ||||
|       const query = Sist2Query.searchQuery(); | ||||
| 
 | ||||
|       Sist2Api.getMimeTypes(query).then(({buckets, mimeMap}) => { | ||||
|         this.$store.commit("setUiMimeMap", mimeMap); | ||||
|         this.$store.commit("setUiDetailsMimeAgg", buckets); | ||||
| 
 | ||||
|         this.mimeTree.removeAll(); | ||||
|         this.mimeTree.addNodes(mimeMap); | ||||
| 
 | ||||
|         // Restore selected mimes | ||||
|         if (this.stashedMimeTreeAttributes === null) { | ||||
|           // NOTE: This happens when successive fast searches are triggered | ||||
|           this.stashedMimeTreeAttributes = {}; | ||||
|           // Always add the selected mime types | ||||
|           this.$store.state.selectedMimeTypes.forEach(mime => { | ||||
|             this.stashedMimeTreeAttributes[mime] = { | ||||
|               checked: true | ||||
|             } | ||||
|           }); | ||||
|         } | ||||
| 
 | ||||
|         Object.entries(this.stashedMimeTreeAttributes).forEach(([mime, attributes]) => { | ||||
|           if (this.mimeTree.node(mime)) { | ||||
|             if (attributes.checked) { | ||||
|               this.mimeTree.node(mime).select(); | ||||
|             } | ||||
|             if (attributes.collapsed === false) { | ||||
|               this.mimeTree.node(mime).expand(); | ||||
|             } | ||||
|           } | ||||
|         }); | ||||
|         this.stashedMimeTreeAttributes = null; | ||||
|       }); | ||||
|     }, | ||||
| 
 | ||||
|     initializeTree() { | ||||
|       const mimeMap = this.$store.state.uiMimeMap; | ||||
| 
 | ||||
|       this.mimeTree = new InspireTree({ | ||||
|         selection: { | ||||
|           mode: "checkbox" | ||||
|         }, | ||||
|         data: mimeMap | ||||
|       }); | ||||
| 
 | ||||
|       new InspireTreeDOM(this.mimeTree, { | ||||
|         target: "#mimeTree" | ||||
|       }); | ||||
|       this.mimeTree.on("node.state.changed", this.handleTreeClick); | ||||
|       this.mimeTree.deselect(); | ||||
| 
 | ||||
|       if (this.$store.state._onLoadSelectedMimeTypes.length > 0) { | ||||
|         this.$store.state._onLoadSelectedMimeTypes.forEach(mime => { | ||||
|           this.mimeTree.node(mime).select(); | ||||
|         }); | ||||
|       } | ||||
|     } | ||||
|   } | ||||
| } | ||||
| </script> | ||||
|  | ||||
| @ -20,7 +20,7 @@ | ||||
| </template> | ||||
| 
 | ||||
| <script> | ||||
| import Sist2Icon from "@/components/Sist2Icon"; | ||||
| import Sist2Icon from "@/components/icons/Sist2Icon"; | ||||
| 
 | ||||
| export default { | ||||
|   name: "NavBar", | ||||
|  | ||||
| @ -3,7 +3,10 @@ | ||||
|     <span>{{ hitCount }} {{ hitCount === 1 ? $t("hit") : $t("hits") }}</span> | ||||
| 
 | ||||
|     <div style="float: right"> | ||||
|       <b-button v-b-toggle.collapse-1 variant="primary" class="not-mobile">{{ $t("details") }}</b-button> | ||||
|       <b-button v-b-toggle.collapse-1 variant="primary" class="not-mobile" @click="onToggle()">{{ | ||||
|           $t("details") | ||||
|         }} | ||||
|       </b-button> | ||||
| 
 | ||||
|       <template v-if="hitCount !== 0"> | ||||
|         <SortSelect class="ml-2"></SortSelect> | ||||
| @ -14,22 +17,42 @@ | ||||
| 
 | ||||
|     <b-collapse id="collapse-1" class="pt-2" style="clear:both;"> | ||||
|       <b-card> | ||||
|         <b-table :items="tableItems" small borderless thead-class="hidden" class="mb-0"></b-table> | ||||
|         <b-table :items="tableItems" small borderless bordered thead-class="hidden" class="mb-0"></b-table> | ||||
| 
 | ||||
|         <br/> | ||||
|         <h4> | ||||
|           {{$t("mimeTypes")}} | ||||
|           <b-button size="sm" variant="primary" class="float-right" @click="onCopyClick"><ClipboardIcon/></b-button> | ||||
|         </h4> | ||||
|         <Preloader v-if="$store.state.uiDetailsMimeAgg == null"></Preloader> | ||||
|         <b-table | ||||
|             v-else | ||||
|             sort-by="doc_count" | ||||
|             :sort-desc="true" | ||||
|             thead-class="hidden" | ||||
|             :items="$store.state.uiDetailsMimeAgg" small bordered class="mb-0" | ||||
|         ></b-table> | ||||
|       </b-card> | ||||
|     </b-collapse> | ||||
|   </b-card> | ||||
| </template> | ||||
| 
 | ||||
| <script lang="ts"> | ||||
| import {EsResult} from "@/Sist2Api"; | ||||
| import Sist2Api, {EsResult} from "@/Sist2Api"; | ||||
| import Vue from "vue"; | ||||
| import {humanFileSize} from "@/util"; | ||||
| import DisplayModeToggle from "@/components/DisplayModeToggle.vue"; | ||||
| import SortSelect from "@/components/SortSelect.vue"; | ||||
| import Preloader from "@/components/Preloader.vue"; | ||||
| import Sist2Query from "@/Sist2Query"; | ||||
| import ClipboardIcon from "@/components/icons/ClipboardIcon.vue"; | ||||
| 
 | ||||
| export default Vue.extend({ | ||||
|   name: "ResultsCard", | ||||
|   components: {SortSelect, DisplayModeToggle}, | ||||
|   components: {ClipboardIcon, Preloader, SortSelect, DisplayModeToggle}, | ||||
|   created() { | ||||
| 
 | ||||
|   }, | ||||
|   computed: { | ||||
|     lastResultsLoaded() { | ||||
|       return this.$store.state.lastQueryResults != null; | ||||
| @ -54,6 +77,39 @@ export default Vue.extend({ | ||||
|     totalSize() { | ||||
|       return humanFileSize((this.$store.state.lastQueryResults as EsResult).aggregations.total_size.value); | ||||
|     }, | ||||
|     onToggle() { | ||||
|       const show = !document.getElementById("collapse-1").classList.contains("show"); | ||||
|       this.$store.commit("setUiShowDetails", show); | ||||
| 
 | ||||
|       if (show && this.$store.state.uiDetailsMimeAgg == null && !this.$store.state.optUpdateMimeMap) { | ||||
|         // Mime aggs are not updated automatically, update now | ||||
|         this.forceUpdateMimeAgg(); | ||||
|       } | ||||
|     }, | ||||
|     onCopyClick() { | ||||
|       let tsvString = ""; | ||||
|       this.$store.state.uiDetailsMimeAgg.slice().sort((a,b) => b["doc_count"] - a["doc_count"]).forEach(row => { | ||||
|         tsvString += `${row["key"]}\t${row["doc_count"]}\n`; | ||||
|       }); | ||||
| 
 | ||||
|       navigator.clipboard.writeText(tsvString); | ||||
| 
 | ||||
|       this.$bvToast.toast( | ||||
|           this.$t("toast.copiedToClipboard"), | ||||
|           { | ||||
|             title: null, | ||||
|             noAutoHide: false, | ||||
|             toaster: "b-toaster-bottom-right", | ||||
|             headerClass: "hidden", | ||||
|             bodyClass: "toast-body-info", | ||||
|           }); | ||||
|     }, | ||||
|     forceUpdateMimeAgg() { | ||||
|       const query = Sist2Query.searchQuery(); | ||||
|       Sist2Api.getMimeTypes(query).then(({buckets}) => { | ||||
|         this.$store.commit("setUiDetailsMimeAgg", buckets); | ||||
|       }); | ||||
|     } | ||||
|   }, | ||||
| }); | ||||
| 
 | ||||
|  | ||||
| @ -51,7 +51,7 @@ | ||||
|         >{{ tag.text.split(".").pop() }}</span> | ||||
| 
 | ||||
|         <b-popover :target="hit._id+tag.rawText" triggers="focus blur" placement="top"> | ||||
|           <b-button variant="danger" @click="onTagDeleteClick(tag, $event)">Delete</b-button> | ||||
|           <b-button variant="danger" @click="onTagDeleteClick(tag, $event)">{{$t("deleteTag")}}</b-button> | ||||
|         </b-popover> | ||||
|       </div> | ||||
| 
 | ||||
| @ -63,7 +63,7 @@ | ||||
|     </template> | ||||
| 
 | ||||
|     <!-- Add button --> | ||||
|     <small v-if="showAddButton" class="badge add-tag-button" @click="tagAdd()">Add</small> | ||||
|     <small v-if="showAddButton" class="badge add-tag-button" @click="tagAdd()">{{$t("addTag")}}</small> | ||||
| 
 | ||||
|     <!-- Size tag--> | ||||
|     <small v-else class="text-muted badge-size">{{ | ||||
|  | ||||
| @ -120,7 +120,7 @@ export default { | ||||
|   }, | ||||
|   mounted() { | ||||
|     this.$store.subscribe((mutation) => { | ||||
|       if (mutation.type === "setUiMimeMap") { | ||||
|       if (mutation.type === "setUiMimeMap" && this.tagTree === null) { | ||||
|         this.initializeTree(); | ||||
|         this.updateTree(); | ||||
|       } else if (mutation.type === "busUpdateTags") { | ||||
| @ -147,6 +147,7 @@ export default { | ||||
|       this.tagTree.on("node.state.changed", this.handleTreeClick); | ||||
|     }, | ||||
|     updateTree() { | ||||
|       // TODO: remember which tags are selected and restore? | ||||
|       const tagMap = []; | ||||
|       Sist2Api.getTags().then(tags => { | ||||
|         tags.forEach(tag => addTag(tagMap, tag.id, tag.id, tag.count)); | ||||
|  | ||||
							
								
								
									
										21
									
								
								sist2-vue/src/components/icons/ClipboardIcon.vue
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										21
									
								
								sist2-vue/src/components/icons/ClipboardIcon.vue
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,21 @@ | ||||
| <template> | ||||
|   <svg style="width:24px;height:24px" viewBox="0 0 24 24"> | ||||
|     <path | ||||
|         fill="currentColor" | ||||
|         d="M17,9H7V7H17M17,13H7V11H17M14,17H7V15H14M12,3A1,1 0 0,1 13,4A1,1 0 0,1 12,5A1,1 0 0,1 11,4A1,1 0 0,1 12,3M19,3H14.82C14.4,1.84 13.3,1 12,1C10.7,1 9.6,1.84 9.18,3H5A2,2 0 0,0 3,5V19A2,2 0 0,0 5,21H19A2,2 0 0,0 21,19V5A2,2 0 0,0 19,3Z"/> | ||||
|   </svg> | ||||
| </template> | ||||
| 
 | ||||
| <script> | ||||
| export default { | ||||
|   name: "ClipboardIcon" | ||||
| } | ||||
| </script> | ||||
| 
 | ||||
| <style scoped> | ||||
| svg { | ||||
|   display: inline-block; | ||||
|   width: 20px; | ||||
|   height: 20px; | ||||
| } | ||||
| </style> | ||||
							
								
								
									
										21
									
								
								sist2-vue/src/components/icons/LanguageIcon.vue
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										21
									
								
								sist2-vue/src/components/icons/LanguageIcon.vue
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,21 @@ | ||||
| <template> | ||||
|   <svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24"> | ||||
|     <path | ||||
|         fill="currentColor" | ||||
|         d="M12 0c-6.627 0-12 5.373-12 12s5.373 12 12 12 12-5.373 12-12-5.373-12-12-12zm1 16.057v-3.057h2.994c-.059 1.143-.212 2.24-.456 3.279-.823-.12-1.674-.188-2.538-.222zm1.957 2.162c-.499 1.33-1.159 2.497-1.957 3.456v-3.62c.666.028 1.319.081 1.957.164zm-1.957-7.219v-3.015c.868-.034 1.721-.103 2.548-.224.238 1.027.389 2.111.446 3.239h-2.994zm0-5.014v-3.661c.806.969 1.471 2.15 1.971 3.496-.642.084-1.3.137-1.971.165zm2.703-3.267c1.237.496 2.354 1.228 3.29 2.146-.642.234-1.311.442-2.019.607-.344-.992-.775-1.91-1.271-2.753zm-7.241 13.56c-.244-1.039-.398-2.136-.456-3.279h2.994v3.057c-.865.034-1.714.102-2.538.222zm2.538 1.776v3.62c-.798-.959-1.458-2.126-1.957-3.456.638-.083 1.291-.136 1.957-.164zm-2.994-7.055c.057-1.128.207-2.212.446-3.239.827.121 1.68.19 2.548.224v3.015h-2.994zm1.024-5.179c.5-1.346 1.165-2.527 1.97-3.496v3.661c-.671-.028-1.329-.081-1.97-.165zm-2.005-.35c-.708-.165-1.377-.373-2.018-.607.937-.918 2.053-1.65 3.29-2.146-.496.844-.927 1.762-1.272 2.753zm-.549 1.918c-.264 1.151-.434 2.36-.492 3.611h-3.933c.165-1.658.739-3.197 1.617-4.518.88.361 1.816.67 2.808.907zm.009 9.262c-.988.236-1.92.542-2.797.9-.89-1.328-1.471-2.879-1.637-4.551h3.934c.058 1.265.231 2.488.5 3.651zm.553 1.917c.342.976.768 1.881 1.257 2.712-1.223-.49-2.326-1.211-3.256-2.115.636-.229 1.299-.435 1.999-.597zm9.924 0c.7.163 1.362.367 1.999.597-.931.903-2.034 1.625-3.257 2.116.489-.832.915-1.737 1.258-2.713zm.553-1.917c.27-1.163.442-2.386.501-3.651h3.934c-.167 1.672-.748 3.223-1.638 4.551-.877-.358-1.81-.664-2.797-.9zm.501-5.651c-.058-1.251-.229-2.46-.492-3.611.992-.237 1.929-.546 2.809-.907.877 1.321 1.451 2.86 1.616 4.518h-3.933z"/> | ||||
|   </svg> | ||||
| </template> | ||||
| 
 | ||||
| <script> | ||||
| export default { | ||||
|   name: "LanguageIcon" | ||||
| } | ||||
| </script> | ||||
| 
 | ||||
| <style scoped> | ||||
| svg { | ||||
|   display: inline-block; | ||||
|   width: 20px; | ||||
|   height: 20px; | ||||
| } | ||||
| </style> | ||||
| @ -5,6 +5,8 @@ export default { | ||||
|             advanced: "Advanced search", | ||||
|             fuzzy: "Fuzzy" | ||||
|         }, | ||||
|         addTag: "Add", | ||||
|         deleteTag: "Delete", | ||||
|         download: "Download", | ||||
|         and: "and", | ||||
|         page: "page", | ||||
| @ -64,7 +66,8 @@ export default { | ||||
|             resultSize: "Number of results per page", | ||||
|             tagOrOperator: "Use OR operator when specifying multiple tags.", | ||||
|             hideDuplicates: "Hide duplicate results based on checksum", | ||||
|             hideLegacy: "Hide the 'legacyES' Elasticsearch notice" | ||||
|             hideLegacy: "Hide the 'legacyES' Elasticsearch notice", | ||||
|             updateMimeMap: "Update the Media Types tree in real time" | ||||
|         }, | ||||
|         queryMode: { | ||||
|             simple: "Simple", | ||||
| @ -72,7 +75,8 @@ export default { | ||||
|         }, | ||||
|         lang: { | ||||
|             en: "English", | ||||
|             fr: "Français" | ||||
|             fr: "Français", | ||||
|             "zh-CN": "简体中文", | ||||
|         }, | ||||
|         displayMode: { | ||||
|             grid: "Grid", | ||||
| @ -126,11 +130,13 @@ export default { | ||||
|             esQueryErr: "Could not parse or execute query, please check the Advanced search documentation. " + | ||||
|                 "See server logs for more information.", | ||||
|             dupeTagTitle: "Duplicate tag", | ||||
|             dupeTag: "This tag already exists for this document." | ||||
|             dupeTag: "This tag already exists for this document.", | ||||
|             copiedToClipboard: "Copied to clipboard" | ||||
|         }, | ||||
|         saveTagModalTitle: "Add tag", | ||||
|         saveTagPlaceholder: "Tag name", | ||||
|         confirm: "Confirm", | ||||
|         indexPickerPlaceholder: "Select an index", | ||||
|         sort: { | ||||
|             relevance: "Relevance", | ||||
|             dateAsc: "Date (Older first)", | ||||
| @ -160,6 +166,8 @@ export default { | ||||
|             advanced: "Recherche avancée", | ||||
|             fuzzy: "Approximatif" | ||||
|         }, | ||||
|         addTag: "Ajouter", | ||||
|         deleteTag: "Supprimer", | ||||
|         download: "Télécharger", | ||||
|         and: "et", | ||||
|         page: "page", | ||||
| @ -220,7 +228,8 @@ export default { | ||||
|             resultSize: "Nombre de résultats par page", | ||||
|             tagOrOperator: "Utiliser l'opérateur OU lors de la spécification de plusieurs tags", | ||||
|             hideDuplicates: "Masquer les résultats en double", | ||||
|             hideLegacy: "Masquer la notice 'legacyES' Elasticsearch" | ||||
|             hideLegacy: "Masquer la notice 'legacyES' Elasticsearch", | ||||
|             updateMimeMap: "Mettre à jour l'arbre de Types de médias en temps réel" | ||||
|         }, | ||||
|         queryMode: { | ||||
|             simple: "Simple", | ||||
| @ -228,7 +237,8 @@ export default { | ||||
|         }, | ||||
|         lang: { | ||||
|             en: "English", | ||||
|             fr: "Français" | ||||
|             fr: "Français", | ||||
|             "zh-CN": "简体中文", | ||||
|         }, | ||||
|         displayMode: { | ||||
|             grid: "Grille", | ||||
| @ -283,7 +293,8 @@ export default { | ||||
|             esQueryErr: "Impossible d'analyser ou d'exécuter la requête, veuillez consulter la documentation sur la " + | ||||
|                 "recherche avancée. Voir les journaux du serveur pour plus d'informations.", | ||||
|             dupeTagTitle: "Tag en double", | ||||
|             dupeTag: "Ce tag existe déjà pour ce document." | ||||
|             dupeTag: "Ce tag existe déjà pour ce document.", | ||||
|             copiedToClipboard: "Copié dans le presse-papier" | ||||
|         }, | ||||
|         saveTagModalTitle: "Ajouter un tag", | ||||
|         saveTagPlaceholder: "Nom du tag", | ||||
| @ -311,5 +322,166 @@ export default { | ||||
|             selectedIndex: "indice sélectionné", | ||||
|             selectedIndices: "indices sélectionnés", | ||||
|         }, | ||||
|     } | ||||
| } | ||||
|     }, | ||||
|     "zh-CN": { | ||||
|         searchBar: { | ||||
|             simple: "搜索", | ||||
|             advanced: "高级搜索", | ||||
|             fuzzy: "模糊搜索" | ||||
|         }, | ||||
|         addTag: "添加", | ||||
|         deleteTag: "删除", | ||||
|         download: "下载", | ||||
|         and: "与", | ||||
|         page: "页", | ||||
|         pages: "页", | ||||
|         mimeTypes: "文件类型", | ||||
|         tags: "标签", | ||||
|         help: { | ||||
|             simpleSearch: "简易搜索", | ||||
|             advancedSearch: "高级搜索", | ||||
|             help: "帮助", | ||||
|             term: "<关键词>", | ||||
|             and: "与操作", | ||||
|             or: "或操作", | ||||
|             not: "反选单个关键词", | ||||
|             quotes: "括起来的部分视为一个关键词,保序", | ||||
|             prefix: "在词尾使用时,匹配前缀", | ||||
|             parens: "表达式编组", | ||||
|             tildeTerm: "匹配编辑距离以内的关键词", | ||||
|             tildePhrase: "匹配短语,容忍一些非匹配词", | ||||
|             example1: | ||||
|                 "例如: <code>\"番茄\" +(炒蛋 | 牛腩) -饭</code> 将匹配" + | ||||
|                 "短语 <i>番茄炒蛋</i>、<i>炒蛋</i> 或者 <i>牛腩</i>,而忽略任何带有" + | ||||
|                 "<i>饭</i>的关键词.", | ||||
|             defaultOperator: | ||||
|                 "表达式中无<code>+</code>或者<code>|</code>时,默认使用" + | ||||
|                 "<code>+</code>(与操作)。", | ||||
|             fuzzy: | ||||
|                 "选中<b>模糊搜索</b>选项时,返回部分匹配的结果(3-grams)。", | ||||
|             moreInfoSimple: "详细信息:<a target=\"_blank\" " + | ||||
|                 "rel=\"noreferrer\" href=\"//www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-simple-query-string-query.html\">Elasticsearch文档</a>", | ||||
|             moreInfoAdvanced: "高级搜索模式文档:<a target=\"_blank\" rel=\"noreferrer\" href=\"//www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-query-string-query.html#query-string-syntax\">Elasticsearch文档</a>" | ||||
|         }, | ||||
|         config: "配置", | ||||
|         configDescription: "配置在此浏览器中实时保存。", | ||||
|         configReset: "重置所有设置", | ||||
|         searchOptions: "搜索选项", | ||||
|         treemapOptions: "树状图选项", | ||||
|         displayOptions: "显示选项", | ||||
|         opt: { | ||||
|             lang: "语言", | ||||
|             highlight: "启用高亮", | ||||
|             fuzzy: "默认使用模糊搜索", | ||||
|             searchInPath: "匹配文档路径", | ||||
|             suggestPath: "搜索框启用自动补全", | ||||
|             fragmentSize: "高亮上下文大小", | ||||
|             queryMode: "搜索模式", | ||||
|             displayMode: "显示", | ||||
|             columns: "列数", | ||||
|             treemapType: "树状图类属性", | ||||
|             treemapTiling: "树状图平铺", | ||||
|             treemapColorGroupingDepth: "树状图颜色编组深度(展开)", | ||||
|             treemapColor: "树状图颜色(折叠)", | ||||
|             treemapSize: "树状图大小", | ||||
|             theme: "主题", | ||||
|             lightboxLoadOnlyCurrent: "在图片查看器中,不要预读相邻的全图", | ||||
|             slideDuration: "幻灯片时长", | ||||
|             resultSize: "每页结果数", | ||||
|             tagOrOperator: "使用或操作(OR)匹配多个标签。", | ||||
|             hideDuplicates: "使用校验码隐藏重复结果", | ||||
|             hideLegacy: "隐藏'legacyES' Elasticsearch 通知", | ||||
|             updateMimeMap: "媒体类型树的实时更新" | ||||
|         }, | ||||
|         queryMode: { | ||||
|             simple: "简单", | ||||
|             advanced: "高级", | ||||
|         }, | ||||
|         lang: { | ||||
|             en: "English", | ||||
|             fr: "Français", | ||||
|             "zh-CN": "简体中文", | ||||
|         }, | ||||
|         displayMode: { | ||||
|             grid: "网格", | ||||
|             list: "列表", | ||||
|         }, | ||||
|         columns: { | ||||
|             auto: "自动" | ||||
|         }, | ||||
|         treemapType: { | ||||
|             cascaded: "折叠", | ||||
|             flat: "平铺(紧凑)" | ||||
|         }, | ||||
|         treemapSize: { | ||||
|             small: "小", | ||||
|             medium: "中", | ||||
|             large: "大", | ||||
|             xLarge: "加大", | ||||
|             xxLarge: "加加大", | ||||
|             custom: "自订", | ||||
|         }, | ||||
|         treemapTiling: { | ||||
|             binary: "Binary", | ||||
|             squarify: "Squarify", | ||||
|             slice: "Slice", | ||||
|             dice: "Dice", | ||||
|             sliceDice: "Slice & Dice", | ||||
|         }, | ||||
|         theme: { | ||||
|             light: "亮", | ||||
|             black: "暗" | ||||
|         }, | ||||
|         hit: "命中", | ||||
|         hits: "命中", | ||||
|         details: "详细信息", | ||||
|         stats: "统计信息", | ||||
|         queryTime: "查询时间", | ||||
|         totalSize: "总大小", | ||||
|         pathBar: { | ||||
|             placeholder: "过滤路径", | ||||
|             modalTitle: "选择路径" | ||||
|         }, | ||||
|         debug: "调试信息", | ||||
|         debugDescription: "对调试除错有用的信息。 若您遇到bug或者想建议新功能,请提交新Issue到" + | ||||
|             "<a href='https://github.com/simon987/sist2/issues/new/choose'>这里</a>.", | ||||
|         tagline: "标签栏", | ||||
|         toast: { | ||||
|             esConnErrTitle: "Elasticsearch连接错误", | ||||
|             esConnErr: "sist2 web 模块连接Elasticsearch出错。" + | ||||
|                 "查看服务日志以获取更多信息。", | ||||
|             esQueryErrTitle: "查询错误", | ||||
|             esQueryErr: "无法识别或执行查询,请查阅高级搜索文档。" + | ||||
|                 "查看服务日志以获取更多信息。", | ||||
|             dupeTagTitle: "重复标签", | ||||
|             dupeTag: "该标签已存在于此文档。", | ||||
|             copiedToClipboard: "复制到剪贴板" | ||||
|         }, | ||||
|         saveTagModalTitle: "增加标签", | ||||
|         saveTagPlaceholder: "标签名", | ||||
|         confirm: "确认", | ||||
|         indexPickerPlaceholder: "选择一个索引", | ||||
|         sort: { | ||||
|             relevance: "相关度", | ||||
|             dateAsc: "日期(由旧到新)", | ||||
|             dateDesc: "日期(由新到旧)", | ||||
|             sizeAsc: "大小(从小到大)", | ||||
|             sizeDesc: "大小(从大到小)", | ||||
|             nameAsc: "名字(A-z)", | ||||
|             nameDesc: "名字 (Z-a)", | ||||
|             random: "随机", | ||||
|         }, | ||||
|         d3: { | ||||
|             mimeCount: "各类文件数量分布", | ||||
|             mimeSize: "各类文件大小分布", | ||||
|             dateHistogram: "文件修改时间分布", | ||||
|             sizeHistogram: "文件大小分布", | ||||
|         }, | ||||
|         indexPicker: { | ||||
|             selectNone: "清空", | ||||
|             selectAll: "全选", | ||||
|             selectedIndex: "选中索引", | ||||
|             selectedIndices: "选中索引", | ||||
|         }, | ||||
|     }, | ||||
| } | ||||
|  | ||||
| @ -27,6 +27,7 @@ export default new Vuex.Store({ | ||||
|         size: 60, | ||||
| 
 | ||||
|         optLang: "en", | ||||
|         optLangIsDefault: true, | ||||
|         optHideDuplicates: true, | ||||
|         optTheme: "light", | ||||
|         optDisplay: "grid", | ||||
| @ -47,6 +48,7 @@ export default new Vuex.Store({ | ||||
|         optLightboxLoadOnlyCurrent: false, | ||||
|         optLightboxSlideDuration: 15, | ||||
|         optHideLegacy: false, | ||||
|         optUpdateMimeMap: true, | ||||
| 
 | ||||
|         _onLoadSelectedIndices: [] as string[], | ||||
|         _onLoadSelectedMimeTypes: [] as string[], | ||||
| @ -71,9 +73,14 @@ export default new Vuex.Store({ | ||||
|         uiLightboxSlide: 0, | ||||
|         uiReachedScrollEnd: false, | ||||
| 
 | ||||
|         uiDetailsMimeAgg: null, | ||||
|         uiShowDetails: false, | ||||
| 
 | ||||
|         uiMimeMap: [] as any[] | ||||
|     }, | ||||
|     mutations: { | ||||
|         setUiShowDetails: (state, val) => state.uiShowDetails = val, | ||||
|         setUiDetailsMimeAgg: (state, val) => state.uiDetailsMimeAgg = val, | ||||
|         setUiReachedScrollEnd: (state, val) => state.uiReachedScrollEnd = val, | ||||
|         setTags: (state, val) => state.tags = val, | ||||
|         setPathText: (state, val) => state.pathText = val, | ||||
| @ -82,7 +89,10 @@ export default new Vuex.Store({ | ||||
|         setSist2Info: (state, val) => state.sist2Info = val, | ||||
|         setSeed: (state, val) => state.seed = val, | ||||
|         setOptHideDuplicates: (state, val) => state.optHideDuplicates = val, | ||||
|         setOptLang: (state, val) => state.optLang = val, | ||||
|         setOptLang: (state, val) => { | ||||
|             state.optLang = val; | ||||
|             state.optLangIsDefault = false; | ||||
|         }, | ||||
|         setSortMode: (state, val) => state.sortMode = val, | ||||
|         setIndices: (state, val) => { | ||||
|             state.indices = val; | ||||
| @ -146,8 +156,10 @@ export default new Vuex.Store({ | ||||
|         setOptTreemapSize: (state, val) => state.optTreemapSize = val, | ||||
|         setOptTreemapColor: (state, val) => state.optTreemapColor = val, | ||||
|         setOptHideLegacy: (state, val) => state.optHideLegacy = val, | ||||
|         setOptUpdateMimeMap: (state, val) => state.optUpdateMimeMap = val, | ||||
| 
 | ||||
|         setOptLightboxLoadOnlyCurrent: (state, val) => state.optLightboxLoadOnlyCurrent = val, | ||||
|         setOptLightboxSlideDuration: (state, val) => state.optLightboxSlideDuration = val, | ||||
| 
 | ||||
|         setUiMimeMap: (state, val) => state.uiMimeMap = val, | ||||
| 
 | ||||
| @ -157,8 +169,18 @@ export default new Vuex.Store({ | ||||
|         busUpdateTags: () => { | ||||
|             // noop
 | ||||
|         }, | ||||
|         busSearch: () => { | ||||
|             // noop
 | ||||
|         }, | ||||
|     }, | ||||
|     actions: { | ||||
|         setSist2Info: (store, val) => { | ||||
|             store.commit("setSist2Info", val); | ||||
| 
 | ||||
|             if (store.state.optLangIsDefault) { | ||||
|                 store.commit("setOptLang", val.lang); | ||||
|             } | ||||
|         }, | ||||
|         loadFromArgs({commit}, route: Route) { | ||||
| 
 | ||||
|             if (route.query.q) { | ||||
| @ -278,6 +300,7 @@ export default new Vuex.Store({ | ||||
|             commit("setUiLightboxTypes", []); | ||||
|             commit("setUiLightboxCaptions", []); | ||||
|             commit("setUiLightboxKey", 0); | ||||
|             commit("setUiDetailsMimeAgg", null); | ||||
|         } | ||||
|     }, | ||||
|     modules: {}, | ||||
| @ -342,5 +365,6 @@ export default new Vuex.Store({ | ||||
|         optLightboxSlideDuration: state => state.optLightboxSlideDuration, | ||||
|         optResultSize: state => state.size, | ||||
|         optHideLegacy: state => state.optHideLegacy, | ||||
|         optUpdateMimeMap: state => state.optUpdateMimeMap, | ||||
|     } | ||||
| }) | ||||
| @ -97,6 +97,30 @@ export function getSelectedTreeNodes(tree: any) { | ||||
|     return Array.from(selectedNodes); | ||||
| } | ||||
| 
 | ||||
| export function getTreeNodeAttributes(tree: any) { | ||||
|     const nodes = tree.selectable(); | ||||
|     const attributes = {}; | ||||
| 
 | ||||
|     for (let i = 0; i < nodes.length; i++) { | ||||
| 
 | ||||
|         let id = null; | ||||
| 
 | ||||
|         if (nodes[i].text.indexOf("(") !== -1 && nodes[i].values) { | ||||
|             id = nodes[i].values.slice(-1)[0]; | ||||
|         } else { | ||||
|             id = nodes[i].id | ||||
|         } | ||||
| 
 | ||||
|         attributes[id] = { | ||||
|             checked: nodes[i].itree.state.checked, | ||||
|             collapsed: nodes[i].itree.state.collapsed, | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     return attributes; | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| export function serializeMimes(mimes: string[]): string | undefined { | ||||
|     if (mimes.length == 0) { | ||||
|         return undefined; | ||||
|  | ||||
| @ -15,15 +15,8 @@ | ||||
|         <h4>{{ $t("displayOptions") }}</h4> | ||||
| 
 | ||||
|         <b-card> | ||||
|           <b-form-checkbox :checked="optLightboxLoadOnlyCurrent" @input="setOptLightboxLoadOnlyCurrent"> | ||||
|             {{ $t("opt.lightboxLoadOnlyCurrent") }} | ||||
|           </b-form-checkbox> | ||||
| 
 | ||||
|           <b-form-checkbox :checked="optHideLegacy" @input="setOptHideLegacy"> | ||||
|             {{ $t("opt.hideLegacy") }} | ||||
|           </b-form-checkbox> | ||||
| 
 | ||||
|           <label>{{ $t("opt.lang") }}</label> | ||||
|           <label><LanguageIcon/><span style="vertical-align: middle"> {{ $t("opt.lang") }}</span></label> | ||||
|           <b-form-select :options="langOptions" :value="optLang" @input="setOptLang"></b-form-select> | ||||
| 
 | ||||
|           <label>{{ $t("opt.theme") }}</label> | ||||
| @ -34,6 +27,20 @@ | ||||
| 
 | ||||
|           <label>{{ $t("opt.columns") }}</label> | ||||
|           <b-form-select :options="columnsOptions" :value="optColumns" @input="setOptColumns"></b-form-select> | ||||
| 
 | ||||
|           <div style="height: 10px"></div> | ||||
| 
 | ||||
|           <b-form-checkbox :checked="optLightboxLoadOnlyCurrent" @input="setOptLightboxLoadOnlyCurrent"> | ||||
|             {{ $t("opt.lightboxLoadOnlyCurrent") }} | ||||
|           </b-form-checkbox> | ||||
| 
 | ||||
|           <b-form-checkbox :checked="optHideLegacy" @input="setOptHideLegacy"> | ||||
|             {{ $t("opt.hideLegacy") }} | ||||
|           </b-form-checkbox> | ||||
| 
 | ||||
|           <b-form-checkbox :checked="optUpdateMimeMap" @input="setOptUpdateMimeMap"> | ||||
|             {{ $t("opt.updateMimeMap") }} | ||||
|           </b-form-checkbox> | ||||
|         </b-card> | ||||
| 
 | ||||
|         <br/> | ||||
| @ -117,15 +124,15 @@ | ||||
| </template> | ||||
| 
 | ||||
| <script> | ||||
| import Vue from "vue"; | ||||
| import {mapGetters, mapMutations} from "vuex"; | ||||
| import {mapActions, mapGetters, mapMutations} from "vuex"; | ||||
| import DebugInfo from "@/components/DebugInfo.vue"; | ||||
| import Preloader from "@/components/Preloader.vue"; | ||||
| import sist2 from "@/Sist2Api"; | ||||
| import GearIcon from "@/components/GearIcon.vue"; | ||||
| import GearIcon from "@/components/icons/GearIcon.vue"; | ||||
| import LanguageIcon from "@/components/icons/LanguageIcon"; | ||||
| 
 | ||||
| export default { | ||||
|   components: {GearIcon, DebugInfo, Preloader}, | ||||
|   components: {LanguageIcon, GearIcon, DebugInfo, Preloader}, | ||||
|   data() { | ||||
|     return { | ||||
|       loading: true, | ||||
| @ -133,6 +140,7 @@ export default { | ||||
|       langOptions: [ | ||||
|         {value: "en", text: this.$t("lang.en")}, | ||||
|         {value: "fr", text: this.$t("lang.fr")}, | ||||
|         {value: "zh-CN", text: this.$t("lang.zh-CN")}, | ||||
|       ], | ||||
|       queryModeOptions: [ | ||||
|         {value: "simple", text: this.$t("queryMode.simple")}, | ||||
| @ -220,6 +228,7 @@ export default { | ||||
|       "optLang", | ||||
|       "optHideDuplicates", | ||||
|       "optHideLegacy", | ||||
|       "optUpdateMimeMap", | ||||
|     ]), | ||||
|     clientWidth() { | ||||
|       return window.innerWidth; | ||||
| @ -227,7 +236,7 @@ export default { | ||||
|   }, | ||||
|   mounted() { | ||||
|     sist2.getSist2Info().then(data => { | ||||
|       this.$store.commit("setSist2Info", data) | ||||
|       this.setSist2Info(data); | ||||
|       this.loading = false; | ||||
|     }); | ||||
| 
 | ||||
| @ -238,6 +247,9 @@ export default { | ||||
|     }); | ||||
|   }, | ||||
|   methods: { | ||||
|     ...mapActions({ | ||||
|       setSist2Info: "setSist2Info", | ||||
|     }), | ||||
|     ...mapMutations([ | ||||
|       "setOptTheme", | ||||
|       "setOptDisplay", | ||||
| @ -255,12 +267,12 @@ export default { | ||||
|       "setOptTreemapSize", | ||||
|       "setOptLightboxLoadOnlyCurrent", | ||||
|       "setOptLightboxSlideDuration", | ||||
|       "setOptContainerWidth", | ||||
|       "setOptResultSize", | ||||
|       "setOptTagOrOperator", | ||||
|       "setOptLang", | ||||
|       "setOptHideDuplicates", | ||||
|       "setOptHideLegacy" | ||||
|       "setOptHideLegacy", | ||||
|       "setOptUpdateMimeMap" | ||||
|     ]), | ||||
|     onResetClick() { | ||||
|       localStorage.removeItem("sist2_configuration"); | ||||
|  | ||||
| @ -60,7 +60,7 @@ | ||||
| 
 | ||||
| <script lang="ts"> | ||||
| import Preloader from "@/components/Preloader.vue"; | ||||
| import {mapGetters, mapMutations} from "vuex"; | ||||
| import {mapActions, mapGetters, mapMutations} from "vuex"; | ||||
| import sist2 from "../Sist2Api"; | ||||
| import Sist2Api, {EsHit, EsResult} from "../Sist2Api"; | ||||
| import SearchBar from "@/components/SearchBar.vue"; | ||||
| @ -139,7 +139,7 @@ export default Vue.extend({ | ||||
|         this.setSist2Info(data); | ||||
|         this.setIndices(data.indices); | ||||
| 
 | ||||
|         Sist2Api.getMimeTypes().then(mimeMap => { | ||||
|         Sist2Api.getMimeTypes(Sist2Query.searchQuery()).then(({mimeMap}) => { | ||||
|           this.$store.commit("setUiMimeMap", mimeMap); | ||||
|           this.uiLoading = false; | ||||
|           this.search(true); | ||||
| @ -151,8 +151,10 @@ export default Vue.extend({ | ||||
|     }); | ||||
|   }, | ||||
|   methods: { | ||||
|     ...mapMutations({ | ||||
|     ...mapActions({ | ||||
|       setSist2Info: "setSist2Info", | ||||
|     }), | ||||
|     ...mapMutations({ | ||||
|       setIndices: "setIndices", | ||||
|       setDateBoundsMin: "setDateBoundsMin", | ||||
|       setDateBoundsMax: "setDateBoundsMax", | ||||
| @ -183,6 +185,7 @@ export default Vue.extend({ | ||||
|     async searchNow(q: any) { | ||||
|       this.searchBusy = true; | ||||
|       await this.$store.dispatch("incrementQuerySequence"); | ||||
|       this.$store.commit("busSearch"); | ||||
| 
 | ||||
|       Sist2Api.esQuery(q).then(async (resp: EsResult) => { | ||||
|         await this.handleSearch(resp); | ||||
| @ -284,6 +287,11 @@ export default Vue.extend({ | ||||
|   border: none; | ||||
| } | ||||
| 
 | ||||
| .toast-header-info, .toast-body-info { | ||||
|   background: #2196f3; | ||||
|   color: #fff !important; | ||||
| } | ||||
| 
 | ||||
| .toast-header-error, .toast-body-error { | ||||
|   background: #a94442; | ||||
|   color: #f2dede !important; | ||||
|  | ||||
							
								
								
									
										78
									
								
								src/cli.c
									
									
									
									
									
								
							
							
						
						
									
										78
									
								
								src/cli.c
									
									
									
									
									
								
							| @ -22,6 +22,7 @@ | ||||
| const char *TESS_DATAPATHS[] = { | ||||
|         "/usr/share/tessdata/", | ||||
|         "/usr/share/tesseract-ocr/tessdata/", | ||||
|         "/usr/share/tesseract-ocr/4.00/tessdata/", | ||||
|         "./", | ||||
|         NULL | ||||
| }; | ||||
| @ -145,7 +146,7 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) { | ||||
|     if (args->name == NULL) { | ||||
|         args->name = g_path_get_basename(args->output); | ||||
|     } else { | ||||
|         char* tmp = malloc(strlen(args->name) + 1); | ||||
|         char *tmp = malloc(strlen(args->name) + 1); | ||||
|         strcpy(tmp, args->name); | ||||
|         args->name = tmp; | ||||
|     } | ||||
| @ -167,17 +168,50 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) { | ||||
|         return 1; | ||||
|     } | ||||
| 
 | ||||
|     if (args->tesseract_lang != NULL) { | ||||
|         TessBaseAPI *api = TessBaseAPICreate(); | ||||
|     if (args->ocr_images && args->tesseract_lang == NULL) { | ||||
|         fprintf(stderr, "You must specify --ocr-lang <LANG> to use --ocr-images"); | ||||
|         return 1; | ||||
|     } | ||||
| 
 | ||||
|         char filename[128]; | ||||
|         sprintf(filename, "%s.traineddata", args->tesseract_lang); | ||||
|         const char *path = find_file_in_paths(TESS_DATAPATHS, filename); | ||||
|         if (path == NULL) { | ||||
|             LOG_FATAL("cli.c", "Could not find tesseract language file!"); | ||||
|     if (args->ocr_ebooks && args->tesseract_lang == NULL) { | ||||
|         fprintf(stderr, "You must specify --ocr-lang <LANG> to use --ocr-ebooks"); | ||||
|         return 1; | ||||
|     } | ||||
| 
 | ||||
|     if (args->tesseract_lang != NULL) { | ||||
| 
 | ||||
|         if (!args->ocr_ebooks && !args->ocr_images) { | ||||
|             fprintf(stderr, "You must specify at least one of --ocr-ebooks, --ocr-images"); | ||||
|             return 1; | ||||
|         } | ||||
| 
 | ||||
|         ret = TessBaseAPIInit3(api, path, args->tesseract_lang); | ||||
|         TessBaseAPI *api = TessBaseAPICreate(); | ||||
| 
 | ||||
|         const char *trained_data_path = NULL; | ||||
|         char *lang = malloc(strlen(args->tesseract_lang) + 1); | ||||
|         strcpy(lang, args->tesseract_lang); | ||||
| 
 | ||||
|         lang = strtok(lang, "+"); | ||||
| 
 | ||||
|         while (lang != NULL) { | ||||
|             char filename[128]; | ||||
|             sprintf(filename, "%s.traineddata", lang); | ||||
| 
 | ||||
|             const char *path = find_file_in_paths(TESS_DATAPATHS, filename); | ||||
|             if (path == NULL) { | ||||
|                 LOG_FATALF("cli.c", "Could not find tesseract language file: %s!", filename); | ||||
|             } | ||||
|             if (trained_data_path != NULL && path != trained_data_path) { | ||||
|                 LOG_FATAL("cli.c", "When specifying more than one tesseract language, all the traineddata " | ||||
|                                    "files must be in the same folder") | ||||
|             } | ||||
|             trained_data_path = path; | ||||
| 
 | ||||
|             lang = strtok(NULL, "+"); | ||||
|         } | ||||
|         free(lang); | ||||
| 
 | ||||
|         ret = TessBaseAPIInit3(api, trained_data_path, args->tesseract_lang); | ||||
|         if (ret != 0) { | ||||
|             fprintf(stderr, "Could not initialize tesseract with lang '%s'\n", args->tesseract_lang); | ||||
|             return 1; | ||||
| @ -185,7 +219,7 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) { | ||||
|         TessBaseAPIEnd(api); | ||||
|         TessBaseAPIDelete(api); | ||||
| 
 | ||||
|         args->tesseract_path = path; | ||||
|         args->tesseract_path = trained_data_path; | ||||
|     } | ||||
| 
 | ||||
|     if (args->exclude_regex != NULL) { | ||||
| @ -218,6 +252,19 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) { | ||||
|         args->max_memory_buffer = DEFAULT_MAX_MEM_BUFFER; | ||||
|     } | ||||
| 
 | ||||
|     if (args->list_path != NULL) { | ||||
|         if (strcmp(args->list_path, "-") == 0) { | ||||
|             args->list_file = stdin; | ||||
|             LOG_DEBUG("cli.c", "Using stdin as list file") | ||||
|         } else { | ||||
|             args->list_file = fopen(args->list_path, "r"); | ||||
| 
 | ||||
|             if (args->list_file == NULL) { | ||||
|                 LOG_FATALF("main.c", "List file could not be opened: %s (%s)", args->list_path, errno); | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     LOG_DEBUGF("cli.c", "arg quality=%f", args->quality) | ||||
|     LOG_DEBUGF("cli.c", "arg size=%d", args->size) | ||||
|     LOG_DEBUGF("cli.c", "arg content_size=%d", args->content_size) | ||||
| @ -237,6 +284,7 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) { | ||||
|     LOG_DEBUGF("cli.c", "arg fast_epub=%d", args->fast_epub) | ||||
|     LOG_DEBUGF("cli.c", "arg treemap_threshold=%f", args->treemap_threshold) | ||||
|     LOG_DEBUGF("cli.c", "arg max_memory_buffer=%d", args->max_memory_buffer) | ||||
|     LOG_DEBUGF("cli.c", "arg list_path=%s", args->list_path) | ||||
| 
 | ||||
|     return 0; | ||||
| } | ||||
| @ -362,15 +410,15 @@ int web_args_validate(web_args_t *args, int argc, const char **argv) { | ||||
|         args->es_index = DEFAULT_ES_INDEX; | ||||
|     } | ||||
| 
 | ||||
|     if (args->lang == NULL) { | ||||
|         args->lang = DEFAULT_LANG; | ||||
|     } | ||||
| 
 | ||||
|     if (args->tagline == NULL) { | ||||
|         args->tagline = DEFAULT_TAGLINE; | ||||
|     } | ||||
| 
 | ||||
|     if (strlen(args->lang) != 2) { | ||||
|     if (args->lang == NULL) { | ||||
|         args->lang = DEFAULT_LANG; | ||||
|     } | ||||
| 
 | ||||
|     if (strlen(args->lang) != 2 && strlen(args->lang) != 5) { | ||||
|         fprintf(stderr, "Invalid --lang value, see usage\n"); | ||||
|         return 1; | ||||
|     } | ||||
|  | ||||
| @ -21,6 +21,8 @@ typedef struct scan_args { | ||||
|     char *archive_passphrase; | ||||
|     char *tesseract_lang; | ||||
|     const char *tesseract_path; | ||||
|     int ocr_images; | ||||
|     int ocr_ebooks; | ||||
|     char *exclude_regex; | ||||
|     int fast; | ||||
|     const char* treemap_threshold_str; | ||||
| @ -29,6 +31,8 @@ typedef struct scan_args { | ||||
|     int read_subtitles; | ||||
|     int fast_epub; | ||||
|     int calculate_checksums; | ||||
|     char *list_path; | ||||
|     FILE *list_file; | ||||
| } scan_args_t; | ||||
| 
 | ||||
| scan_args_t *scan_args_create(); | ||||
|  | ||||
| @ -41,6 +41,7 @@ typedef struct { | ||||
| 
 | ||||
|     GHashTable *original_table; | ||||
|     GHashTable *copy_table; | ||||
|     pthread_mutex_t copy_table_mu; | ||||
| 
 | ||||
|     pcre *exclude; | ||||
|     pcre_extra *exclude_extra; | ||||
| @ -97,7 +98,7 @@ typedef struct { | ||||
|     int tag_auth_enabled; | ||||
|     char *tagline; | ||||
|     struct index_t indices[256]; | ||||
|     char lang[3]; | ||||
|     char lang[10]; | ||||
|     int dev; | ||||
| } WebCtx_t; | ||||
| 
 | ||||
|  | ||||
| @ -38,6 +38,8 @@ char *get_meta_key_text(enum metakey meta_key) { | ||||
|             return "parent"; | ||||
|         case MetaExifMake: | ||||
|             return "exif_make"; | ||||
|         case MetaExifDescription: | ||||
|             return "exif_description"; | ||||
|         case MetaExifSoftware: | ||||
|             return "exif_software"; | ||||
|         case MetaExifExposureTime: | ||||
| @ -150,6 +152,7 @@ char *build_json_string(document_t *doc) { | ||||
|             case MetaFontName: | ||||
|             case MetaParent: | ||||
|             case MetaExifMake: | ||||
|             case MetaExifDescription: | ||||
|             case MetaExifSoftware: | ||||
|             case MetaExifExposureTime: | ||||
|             case MetaExifFNumber: | ||||
|  | ||||
| @ -4,6 +4,8 @@ | ||||
| 
 | ||||
| #include <ftw.h> | ||||
| 
 | ||||
| #define STR_STARTS_WITH(x, y) (strncmp(y, x, strlen(y) - 1) == 0) | ||||
| 
 | ||||
| __always_inline | ||||
| parse_job_t *create_fs_parse_job(const char *filepath, const struct stat *info, int base) { | ||||
|     int len = (int) strlen(filepath); | ||||
| @ -77,3 +79,57 @@ int handle_entry(const char *filepath, const struct stat *info, int typeflag, st | ||||
| int walk_directory_tree(const char *dirpath) { | ||||
|     return nftw(dirpath, handle_entry, MAX_FILE_DESCRIPTORS, FTW_PHYS | FTW_ACTIONRETVAL); | ||||
| } | ||||
| 
 | ||||
| int iterate_file_list(void *input_file) { | ||||
| 
 | ||||
|     char buf[PATH_MAX]; | ||||
|     struct stat info; | ||||
| 
 | ||||
|     while (fgets(buf, sizeof(buf), input_file) != NULL) { | ||||
| 
 | ||||
|         // Remove trailing newline
 | ||||
|         *(buf + strlen(buf) - 1) = '\0'; | ||||
| 
 | ||||
|         int stat_ret = stat(buf, &info); | ||||
| 
 | ||||
|         if (stat_ret != 0) { | ||||
|             LOG_ERRORF("walk.c", "Could not stat file %s (%s)", buf, strerror(errno)); | ||||
|             continue; | ||||
|         } | ||||
| 
 | ||||
|         if (!S_ISREG(info.st_mode)) { | ||||
|             LOG_ERRORF("walk.c", "Is not a regular file: %s", buf); | ||||
|             continue; | ||||
|         } | ||||
| 
 | ||||
|         char *absolute_path = canonicalize_file_name(buf); | ||||
| 
 | ||||
|         if (absolute_path == NULL) { | ||||
|             LOG_FATALF("walk.c", "FIXME: Could not get absolute path of %s", buf); | ||||
|         } | ||||
| 
 | ||||
|         if (ScanCtx.exclude != NULL && EXCLUDED(absolute_path)) { | ||||
|             LOG_DEBUGF("walk.c", "Excluded: %s", absolute_path) | ||||
| 
 | ||||
|             if (S_ISREG(info.st_mode)) { | ||||
|                 pthread_mutex_lock(&ScanCtx.dbg_file_counts_mu); | ||||
|                 ScanCtx.dbg_excluded_files_count += 1; | ||||
|                 pthread_mutex_unlock(&ScanCtx.dbg_file_counts_mu); | ||||
|             } | ||||
| 
 | ||||
|             continue; | ||||
|         } | ||||
| 
 | ||||
|         if (!STR_STARTS_WITH(absolute_path, ScanCtx.index.desc.root)) { | ||||
|             LOG_FATALF("walk.c", "File is not a children of root folder (%s): %s", ScanCtx.index.desc.root, buf); | ||||
|         } | ||||
| 
 | ||||
|         int base = (int) (strrchr(buf, '/') - buf) + 1; | ||||
| 
 | ||||
|         parse_job_t *job = create_fs_parse_job(absolute_path, &info, base); | ||||
|         free(absolute_path); | ||||
|         tpool_add_work(ScanCtx.pool, parse, job); | ||||
|     } | ||||
| 
 | ||||
|     return 0; | ||||
| } | ||||
| @ -5,4 +5,6 @@ | ||||
| 
 | ||||
| int walk_directory_tree(const char *); | ||||
| 
 | ||||
| int iterate_file_list(void* input_file); | ||||
| 
 | ||||
| #endif | ||||
|  | ||||
							
								
								
									
										40
									
								
								src/main.c
									
									
									
									
									
								
							
							
						
						
									
										40
									
								
								src/main.c
									
									
									
									
									
								
							| @ -14,6 +14,9 @@ | ||||
| #include "parsing/mime.h" | ||||
| #include "parsing/parse.h" | ||||
| 
 | ||||
| #include <signal.h> | ||||
| #include <unistd.h> | ||||
| 
 | ||||
| #include "stats.h" | ||||
| 
 | ||||
| #define DESCRIPTION "Lightning-fast file system indexer and search tool." | ||||
| @ -29,8 +32,6 @@ static const char *const usage[] = { | ||||
|         NULL, | ||||
| }; | ||||
| 
 | ||||
| #include<signal.h> | ||||
| #include<unistd.h> | ||||
| 
 | ||||
| static __sighandler_t sigsegv_handler = NULL; | ||||
| static __sighandler_t sigabrt_handler = NULL; | ||||
| @ -169,6 +170,7 @@ void initialize_scan_context(scan_args_t *args) { | ||||
|     ScanCtx.dbg_current_files = g_hash_table_new_full(g_int64_hash, g_int64_equal, NULL, NULL); | ||||
|     pthread_mutex_init(&ScanCtx.dbg_current_files_mu, NULL); | ||||
|     pthread_mutex_init(&ScanCtx.dbg_file_counts_mu, NULL); | ||||
|     pthread_mutex_init(&ScanCtx.copy_table_mu, NULL); | ||||
| 
 | ||||
|     ScanCtx.calculate_checksums = args->calculate_checksums; | ||||
| 
 | ||||
| @ -218,6 +220,11 @@ void initialize_scan_context(scan_args_t *args) { | ||||
|     ScanCtx.media_ctx.store = _store; | ||||
|     ScanCtx.media_ctx.max_media_buffer = (long) args->max_memory_buffer * 1024 * 1024; | ||||
|     ScanCtx.media_ctx.read_subtitles = args->read_subtitles; | ||||
| 
 | ||||
|     if (args->ocr_images) { | ||||
|         ScanCtx.media_ctx.tesseract_lang = args->tesseract_lang; | ||||
|         ScanCtx.media_ctx.tesseract_path = args->tesseract_path; | ||||
|     } | ||||
|     init_media(); | ||||
| 
 | ||||
|     // OOXML
 | ||||
| @ -334,10 +341,20 @@ void sist2_scan(scan_args_t *args) { | ||||
|     ScanCtx.writer_pool = tpool_create(1, writer_cleanup, TRUE, FALSE); | ||||
|     tpool_start(ScanCtx.writer_pool); | ||||
| 
 | ||||
|     int walk_ret = walk_directory_tree(ScanCtx.index.desc.root); | ||||
|     if (walk_ret == -1) { | ||||
|         LOG_FATALF("main.c", "walk_directory_tree() failed! %s (%d)", strerror(errno), errno) | ||||
|     if (args->list_path) { | ||||
|         // Scan using file list
 | ||||
|         int list_ret = iterate_file_list(args->list_file); | ||||
|         if (list_ret != 0) { | ||||
|             LOG_FATALF("main.c", "iterate_file_list() failed! (%d)", list_ret) | ||||
|         } | ||||
|     } else { | ||||
|         // Scan directory recursively
 | ||||
|         int walk_ret = walk_directory_tree(ScanCtx.index.desc.root); | ||||
|         if (walk_ret == -1) { | ||||
|             LOG_FATALF("main.c", "walk_directory_tree() failed! %s (%d)", strerror(errno), errno) | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     tpool_wait(ScanCtx.pool); | ||||
|     tpool_destroy(ScanCtx.pool); | ||||
| 
 | ||||
| @ -489,7 +506,7 @@ void sist2_web(web_args_t *args) { | ||||
|     WebCtx.tag_auth_enabled = args->tag_auth_enabled; | ||||
|     WebCtx.tagline = args->tagline; | ||||
|     WebCtx.dev = args->dev; | ||||
|     strcpy(WebCtx.lang, "en"); | ||||
|     strcpy(WebCtx.lang, args->lang); | ||||
| 
 | ||||
|     for (int i = 0; i < args->index_count; i++) { | ||||
|         char *abs_path = abspath(args->indices[i]); | ||||
| @ -564,8 +581,11 @@ int main(int argc, const char *argv[]) { | ||||
|             OPT_STRING(0, "archive-passphrase", &scan_args->archive_passphrase, | ||||
|                        "Passphrase for encrypted archive files"), | ||||
| 
 | ||||
|             OPT_STRING(0, "ocr", &scan_args->tesseract_lang, "Tesseract language (use tesseract --list-langs to see " | ||||
|                                                              "which are installed on your machine)"), | ||||
|             OPT_STRING(0, "ocr-lang", &scan_args->tesseract_lang, | ||||
|                        "Tesseract language (use 'tesseract --list-langs' to see " | ||||
|                        "which are installed on your machine)"), | ||||
|             OPT_BOOLEAN(0, "ocr-images", &scan_args->ocr_images, "Enable OCR'ing of image files."), | ||||
|             OPT_BOOLEAN(0, "ocr-ebooks", &scan_args->ocr_ebooks, "Enable OCR'ing of ebook files."), | ||||
|             OPT_STRING('e', "exclude", &scan_args->exclude_regex, "Files that match this regex will not be scanned"), | ||||
|             OPT_BOOLEAN(0, "fast", &scan_args->fast, "Only index file names & mime type"), | ||||
|             OPT_STRING(0, "treemap-threshold", &scan_args->treemap_threshold_str, "Relative size threshold for treemap " | ||||
| @ -577,6 +597,9 @@ int main(int argc, const char *argv[]) { | ||||
|             OPT_BOOLEAN(0, "fast-epub", &scan_args->fast_epub, | ||||
|                         "Faster but less accurate EPUB parsing (no thumbnails, metadata)"), | ||||
|             OPT_BOOLEAN(0, "checksums", &scan_args->calculate_checksums, "Calculate file checksums when scanning."), | ||||
|             OPT_STRING(0, "list-file", &scan_args->list_path, "Specify a list of newline-delimited paths to be scanned" | ||||
|                                                               " instead of normal directory traversal. Use '-' to read" | ||||
|                                                               " from stdin."), | ||||
| 
 | ||||
|             OPT_GROUP("Index options"), | ||||
|             OPT_INTEGER('t', "threads", &common_threads, "Number of threads. DEFAULT=1"), | ||||
| @ -599,6 +622,7 @@ int main(int argc, const char *argv[]) { | ||||
|             OPT_STRING(0, "tag-auth", &web_args->tag_credentials, "Basic auth in user:password format for tagging"), | ||||
|             OPT_STRING(0, "tagline", &web_args->tagline, "Tagline in navbar"), | ||||
|             OPT_BOOLEAN(0, "dev", &web_args->dev, "Serve html & js files from disk (for development)"), | ||||
|             OPT_STRING(0, "lang", &web_args->lang, "Default UI language. Can be changed by the user"), | ||||
| 
 | ||||
|             OPT_GROUP("Exec-script options"), | ||||
|             OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url. DEFAULT=http://localhost:9200"), | ||||
|  | ||||
| @ -79,7 +79,9 @@ void parse(void *arg) { | ||||
| 
 | ||||
|     int inc_ts = incremental_get(ScanCtx.original_table, doc->path_md5); | ||||
|     if (inc_ts != 0 && inc_ts == job->vfile.info.st_mtim.tv_sec) { | ||||
|         pthread_mutex_lock(&ScanCtx.copy_table_mu); | ||||
|         incremental_mark_file_for_copy(ScanCtx.copy_table, doc->path_md5); | ||||
|         pthread_mutex_unlock(&ScanCtx.copy_table_mu); | ||||
| 
 | ||||
|         pthread_mutex_lock(&ScanCtx.dbg_file_counts_mu); | ||||
|         ScanCtx.dbg_skipped_files_count += 1; | ||||
|  | ||||
| @ -133,6 +133,9 @@ static int incremental_get_str(GHashTable *table, const char *path_md5) { | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * Not thread safe! | ||||
|  */ | ||||
| __always_inline | ||||
| static int incremental_mark_file_for_copy(GHashTable *table, const unsigned char path_md5[MD5_DIGEST_LENGTH]) { | ||||
|     char *ptr = malloc(MD5_STR_LENGTH); | ||||
|  | ||||
| @ -280,7 +280,6 @@ void index_info(struct mg_connection *nc) { | ||||
|     cJSON_AddBoolToObject(json, "esVersionLegacy", USE_LEGACY_ES_SETTINGS(WebCtx.es_version)); | ||||
|     cJSON_AddStringToObject(json, "platform", QUOTE(SIST_PLATFORM)); | ||||
|     cJSON_AddStringToObject(json, "sist2Hash", Sist2CommitHash); | ||||
|     cJSON_AddStringToObject(json, "libscanHash", LibScanCommitHash); | ||||
|     cJSON_AddStringToObject(json, "lang", WebCtx.lang); | ||||
|     cJSON_AddBoolToObject(json, "dev", WebCtx.dev); | ||||
| #ifdef SIST_DEBUG | ||||
|  | ||||
							
								
								
									
										6
									
								
								src/web/static_generated.c
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										6
									
								
								src/web/static_generated.c
									
									
									
									
										vendored
									
									
								
							
										
											
												File diff suppressed because one or more lines are too long
											
										
									
								
							
							
								
								
									
										29
									
								
								third-party/libscan/libscan/ebook/ebook.c
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										29
									
								
								third-party/libscan/libscan/ebook/ebook.c
									
									
									
									
										vendored
									
									
								
							| @ -5,9 +5,7 @@ | ||||
| 
 | ||||
| #include "../media/media.h" | ||||
| #include "../arc/arc.h" | ||||
| 
 | ||||
| #define MIN_OCR_SIZE 350 | ||||
| #define MIN_OCR_LEN 10 | ||||
| #include "../ocr/ocr.h" | ||||
| 
 | ||||
| /* fill_image callback doesn't let us pass opaque pointers unless I create my own device */ | ||||
| __thread text_buffer_t thread_buffer; | ||||
| @ -225,7 +223,9 @@ static int read_stext_block(fz_stext_block *block, text_buffer_t *tex) { | ||||
|     return 0; | ||||
| } | ||||
| 
 | ||||
| #define IS_VALID_BPP(d) ((d)==1 || (d)==2 || (d)==4 || (d)==8 || (d)==16 || (d)==24 || (d)==32) | ||||
| static void fill_image_ocr_cb(const char* text, size_t len) { | ||||
|   text_buffer_append_string(&thread_buffer, text, len - 1); | ||||
| } | ||||
| 
 | ||||
| void fill_image(fz_context *fzctx, UNUSED(fz_device *dev), | ||||
|                 fz_image *img, UNUSED(fz_matrix ctm), UNUSED(float alpha), | ||||
| @ -233,26 +233,9 @@ void fill_image(fz_context *fzctx, UNUSED(fz_device *dev), | ||||
| 
 | ||||
|     int l2factor = 0; | ||||
| 
 | ||||
|     if (img->w > MIN_OCR_SIZE && img->h > MIN_OCR_SIZE && IS_VALID_BPP(img->n)) { | ||||
| 
 | ||||
|     if (img->w >= MIN_OCR_WIDTH && img->h >= MIN_OCR_HEIGHT && OCR_IS_VALID_BPP(img->n)) { | ||||
|         fz_pixmap *pix = img->get_pixmap(fzctx, img, NULL, img->w, img->h, &l2factor); | ||||
| 
 | ||||
|         if (pix->h > MIN_OCR_SIZE && img->h > MIN_OCR_SIZE && img->xres != 0) { | ||||
|             TessBaseAPI *api = TessBaseAPICreate(); | ||||
|             TessBaseAPIInit3(api, thread_ctx.tesseract_path, thread_ctx.tesseract_lang); | ||||
| 
 | ||||
|             TessBaseAPISetImage(api, pix->samples, pix->w, pix->h, pix->n, pix->stride); | ||||
|             TessBaseAPISetSourceResolution(api, pix->xres); | ||||
| 
 | ||||
|             char *text = TessBaseAPIGetUTF8Text(api); | ||||
|             size_t len = strlen(text); | ||||
|             if (len >= MIN_OCR_LEN) { | ||||
|                 text_buffer_append_string(&thread_buffer, text, len - 1); | ||||
|             } | ||||
| 
 | ||||
|             TessBaseAPIEnd(api); | ||||
|             TessBaseAPIDelete(api); | ||||
|         } | ||||
|         ocr_extract_text(thread_ctx.tesseract_path, thread_ctx.tesseract_lang, pix->samples, pix->w, pix->h, pix->n, pix->stride, pix->xres, fill_image_ocr_cb); | ||||
|         fz_drop_pixmap(fzctx, pix); | ||||
|     } | ||||
| } | ||||
|  | ||||
							
								
								
									
										70
									
								
								third-party/libscan/libscan/media/media.c
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										70
									
								
								third-party/libscan/libscan/media/media.c
									
									
									
									
										vendored
									
									
								
							| @ -1,12 +1,18 @@ | ||||
| #include "media.h" | ||||
| #include "../ocr/ocr.h" | ||||
| #include <ctype.h> | ||||
| 
 | ||||
| #define MIN_SIZE 32 | ||||
| #define AVIO_BUF_SIZE 8192 | ||||
| #define IS_VIDEO(fmt) (fmt->iformat->name && strcmp(fmt->iformat->name, "image2") != 0) | ||||
| #define IS_VIDEO(fmt) ((fmt)->iformat->name && strcmp((fmt)->iformat->name, "image2") != 0) | ||||
| 
 | ||||
| #define STREAM_IS_IMAGE (stream->nb_frames <= 1) | ||||
| 
 | ||||
| #define STORE_AS_IS ((void*)-1) | ||||
| 
 | ||||
| // Pointer to document being processed
 | ||||
| __thread document_t *thread_doc; | ||||
| 
 | ||||
| const char *get_filepath_with_ext(document_t *doc, const char *filepath, const char *mime_str) { | ||||
| 
 | ||||
|     int has_extension = doc->ext > doc->base; | ||||
| @ -311,7 +317,7 @@ append_video_meta(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx, AVFrame *f | ||||
|             if (strcmp(key, "artist") == 0) { | ||||
|                 append_tag_meta_if_not_exists(ctx, doc, tag, MetaArtist); | ||||
|             } else if (strcmp(key, "imagedescription") == 0) { | ||||
|                 APPEND_TAG_META(MetaContent) | ||||
|                 append_tag_meta_if_not_exists(ctx, doc, tag, MetaContent); | ||||
|             } else if (strcmp(key, "make") == 0) { | ||||
|                 APPEND_TAG_META(MetaExifMake) | ||||
|             } else if (strcmp(key, "model") == 0) { | ||||
| @ -343,6 +349,55 @@ append_video_meta(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx, AVFrame *f | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| static void ocr_image_cb(const char *text, size_t len) { | ||||
|     APPEND_STR_META(thread_doc, MetaContent, text); | ||||
| } | ||||
| 
 | ||||
| #define OCR_PIXEL_FORMAT AV_PIX_FMT_RGB32 | ||||
| #define OCR_BYTES_PER_PIXEL 4 | ||||
| #define OCR_PIXELS_PER_INCH 70 | ||||
| 
 | ||||
| void ocr_image(scan_media_ctx_t *ctx, document_t *doc, const AVCodecContext *decoder, AVFrame *frame) { | ||||
| 
 | ||||
|     // Convert to RGB32
 | ||||
|     AVFrame *rgb_frame = av_frame_alloc(); | ||||
| 
 | ||||
|     struct SwsContext *sws_ctx = sws_getContext( | ||||
|             frame->width, frame->height, decoder->pix_fmt, | ||||
|             frame->width, frame->height, OCR_PIXEL_FORMAT, | ||||
|             SWS_LANCZOS, 0, 0, 0 | ||||
|     ); | ||||
| 
 | ||||
|     int dst_buf_len = av_image_get_buffer_size(OCR_PIXEL_FORMAT, frame->width, frame->height, 1); | ||||
|     uint8_t *dst_buf = (uint8_t *) av_malloc(dst_buf_len * 2); | ||||
| 
 | ||||
|     av_image_fill_arrays(rgb_frame->data, rgb_frame->linesize, dst_buf, OCR_PIXEL_FORMAT, frame->width, frame->height, | ||||
|                          1); | ||||
| 
 | ||||
|     sws_scale(sws_ctx, | ||||
|               (const uint8_t *const *) frame->data, frame->linesize, | ||||
|               0, frame->height, | ||||
|               rgb_frame->data, rgb_frame->linesize | ||||
|     ); | ||||
| 
 | ||||
|     thread_doc = doc; | ||||
|     ocr_extract_text( | ||||
|             ctx->tesseract_path, | ||||
|             ctx->tesseract_lang, | ||||
|             rgb_frame->data[0], | ||||
|             frame->width, | ||||
|             frame->height, | ||||
|             OCR_BYTES_PER_PIXEL, | ||||
|             rgb_frame->linesize[0], | ||||
|             OCR_PIXELS_PER_INCH, | ||||
|             ocr_image_cb | ||||
|     ); | ||||
| 
 | ||||
|     sws_freeContext(sws_ctx); | ||||
|     av_free(*rgb_frame->data); | ||||
|     av_frame_free(&rgb_frame); | ||||
| } | ||||
| 
 | ||||
| void parse_media_format_ctx(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx, document_t *doc) { | ||||
| 
 | ||||
|     int video_stream = -1; | ||||
| @ -419,11 +474,11 @@ void parse_media_format_ctx(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx, | ||||
|         avcodec_open2(decoder, video_codec, NULL); | ||||
| 
 | ||||
|         //Seek
 | ||||
|         if (stream->nb_frames > 1 && stream->codecpar->codec_id != AV_CODEC_ID_GIF) { | ||||
|         if (!STREAM_IS_IMAGE && stream->codecpar->codec_id != AV_CODEC_ID_GIF) { | ||||
|             int seek_ret; | ||||
|             for (int i = 20; i >= 0; i--) { | ||||
|                 seek_ret = av_seek_frame(pFormatCtx, video_stream, | ||||
|                                          stream->duration * 0.10, 0); | ||||
|                                          (long) ((double) stream->duration * 0.10), 0); | ||||
|                 if (seek_ret == 0) { | ||||
|                     break; | ||||
|                 } | ||||
| @ -438,6 +493,11 @@ void parse_media_format_ctx(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx, | ||||
|             return; | ||||
|         } | ||||
| 
 | ||||
|         if (ctx->tesseract_lang != NULL && STREAM_IS_IMAGE) { | ||||
|             ocr_image(ctx, doc, decoder, frame_and_packet->frame); | ||||
|         } | ||||
| 
 | ||||
|         // NOTE: OCR'd content takes precedence over exif image description
 | ||||
|         append_video_meta(ctx, pFormatCtx, frame_and_packet->frame, doc, IS_VIDEO(pFormatCtx)); | ||||
| 
 | ||||
|         // Scale frame
 | ||||
| @ -534,7 +594,7 @@ long memfile_seek(void *ptr, long offset, int whence) { | ||||
|     memfile_t *mem = ptr; | ||||
| 
 | ||||
|     if (whence == 0x10000) { | ||||
|         return mem->size; | ||||
|         return (long) mem->size; | ||||
|     } | ||||
| 
 | ||||
|     int ret = fseek(mem->file, offset, whence); | ||||
|  | ||||
							
								
								
									
										3
									
								
								third-party/libscan/libscan/media/media.h
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										3
									
								
								third-party/libscan/libscan/media/media.h
									
									
									
									
										vendored
									
									
								
							| @ -19,6 +19,9 @@ typedef struct { | ||||
|     float tn_qscale; | ||||
|     long max_media_buffer; | ||||
|     int read_subtitles; | ||||
| 
 | ||||
|     const char *tesseract_lang; | ||||
|     const char *tesseract_path; | ||||
| } scan_media_ctx_t; | ||||
| 
 | ||||
| __always_inline | ||||
|  | ||||
							
								
								
									
										47
									
								
								third-party/libscan/libscan/ocr/ocr.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										47
									
								
								third-party/libscan/libscan/ocr/ocr.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,47 @@ | ||||
| #ifndef OCR_H | ||||
| #define OCR_H | ||||
| 
 | ||||
| #include "../scan.h" | ||||
| #include <tesseract/capi.h> | ||||
| 
 | ||||
| #define MIN_OCR_WIDTH 350 | ||||
| #define MIN_OCR_HEIGHT 100 | ||||
| #define MIN_OCR_LEN 10 | ||||
| 
 | ||||
| #define OCR_IS_VALID_BPP(d)                                                    \ | ||||
|   ((d) == 1 || (d) == 2 || (d) == 4 || (d) == 8 || (d) == 16 || (d) == 24 ||   \ | ||||
|    (d) == 32) | ||||
| 
 | ||||
| typedef void (*ocr_extract_callback_t)(const char *, size_t); | ||||
| 
 | ||||
| __always_inline static void | ||||
| ocr_extract_text(const char *tesseract_path, const char *tesseract_lang, | ||||
|                  const unsigned char *img_buf, const int img_w, const int img_h, | ||||
|                  const int img_bpp, const int img_stride, const int img_xres, | ||||
|                  const ocr_extract_callback_t cb) { | ||||
| 
 | ||||
|     if (img_w < MIN_OCR_WIDTH || img_h < MIN_OCR_HEIGHT || img_xres <= 0 || | ||||
|         !OCR_IS_VALID_BPP(img_bpp)) { | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     TessBaseAPI *api = TessBaseAPICreate(); | ||||
|     TessBaseAPIInit3(api, tesseract_path, tesseract_lang); | ||||
| 
 | ||||
|     TessBaseAPISetImage(api, img_buf, img_w, img_h, img_bpp, img_stride); | ||||
|     TessBaseAPISetSourceResolution(api, img_xres); | ||||
| 
 | ||||
|     char *text = TessBaseAPIGetUTF8Text(api); | ||||
|     if (text != NULL) { | ||||
|         size_t len = strlen(text); | ||||
|         if (len >= MIN_OCR_LEN) { | ||||
|             cb(text, len); | ||||
|         } | ||||
|         TessDeleteText(text); | ||||
|     } | ||||
| 
 | ||||
|     TessBaseAPIEnd(api); | ||||
|     TessBaseAPIDelete(api); | ||||
| } | ||||
| 
 | ||||
| #endif | ||||
							
								
								
									
										20
									
								
								third-party/libscan/libscan/ooxml/ooxml.c
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										20
									
								
								third-party/libscan/libscan/ooxml/ooxml.c
									
									
									
									
										vendored
									
									
								
							| @ -15,18 +15,18 @@ static int should_read_part(const char *part) { | ||||
|     } | ||||
| 
 | ||||
|     if (    // Word
 | ||||
|             STR_STARTS_WITH(part, "word/document.xml") | ||||
|             || STR_STARTS_WITH(part, "word/footnotes.xml") | ||||
|             || STR_STARTS_WITH(part, "word/endnotes.xml") | ||||
|             || STR_STARTS_WITH(part, "word/footer") | ||||
|             || STR_STARTS_WITH(part, "word/header") | ||||
|             STR_STARTS_WITH_CONSTANT(part, "word/document.xml") | ||||
|             || STR_STARTS_WITH_CONSTANT(part, "word/footnotes.xml") | ||||
|             || STR_STARTS_WITH_CONSTANT(part, "word/endnotes.xml") | ||||
|             || STR_STARTS_WITH_CONSTANT(part, "word/footer") | ||||
|             || STR_STARTS_WITH_CONSTANT(part, "word/header") | ||||
|             // PowerPoint
 | ||||
|             || STR_STARTS_WITH(part, "ppt/slides/slide") | ||||
|             || STR_STARTS_WITH(part, "ppt/notesSlides/slide") | ||||
|             || STR_STARTS_WITH_CONSTANT(part, "ppt/slides/slide") | ||||
|             || STR_STARTS_WITH_CONSTANT(part, "ppt/notesSlides/slide") | ||||
|             // Excel
 | ||||
|             || STR_STARTS_WITH(part, "xl/worksheets/sheet") | ||||
|             || STR_STARTS_WITH(part, "xl/sharedStrings.xml") | ||||
|             || STR_STARTS_WITH(part, "xl/workbook.xml") | ||||
|             || STR_STARTS_WITH_CONSTANT(part, "xl/worksheets/sheet") | ||||
|             || STR_STARTS_WITH_CONSTANT(part, "xl/sharedStrings.xml") | ||||
|             || STR_STARTS_WITH_CONSTANT(part, "xl/workbook.xml") | ||||
|             ) { | ||||
|         return TRUE; | ||||
|     } | ||||
|  | ||||
							
								
								
									
										10
									
								
								third-party/libscan/libscan/raw/raw.c
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										10
									
								
								third-party/libscan/libscan/raw/raw.c
									
									
									
									
										vendored
									
									
								
							| @ -143,7 +143,7 @@ void parse_raw(scan_raw_ctx_t *ctx, vfile_t *f, document_t *doc) { | ||||
| 
 | ||||
|     libraw_gps_info_t gps = libraw_lib->other.parsed_gps; | ||||
|     double gps_longitude_dec = | ||||
|             (gps.longtitude[0] + gps.longtitude[1] / 60 + gps.longtitude[2] / 3600) * DMS_REF(gps.longref); | ||||
|             (gps.longitude[0] + gps.longitude[1] / 60 + gps.longitude[2] / 3600) * DMS_REF(gps.longref); | ||||
|     snprintf(tmp, sizeof(tmp), "%.15f", gps_longitude_dec); | ||||
|     if (gps_longitude_dec != 0.0) { | ||||
|         APPEND_STR_META(doc, MetaExifGpsLongitudeDec, tmp) | ||||
| @ -163,7 +163,13 @@ void parse_raw(scan_raw_ctx_t *ctx, vfile_t *f, document_t *doc) { | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     libraw_unpack_thumb(libraw_lib); | ||||
|     int unpack_ret = libraw_unpack_thumb(libraw_lib); | ||||
|     if (unpack_ret != 0) { | ||||
|         CTX_LOG_ERRORF(f->filepath, "libraw_unpack_thumb returned error code %d", unpack_ret) | ||||
|         free(buf); | ||||
|         libraw_close(libraw_lib); | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     int errc = 0; | ||||
|     libraw_processed_image_t *thumb = libraw_dcraw_make_mem_thumb(libraw_lib, &errc); | ||||
|  | ||||
							
								
								
									
										1
									
								
								third-party/libscan/libscan/scan.h
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										1
									
								
								third-party/libscan/libscan/scan.h
									
									
									
									
										vendored
									
									
								
							| @ -61,6 +61,7 @@ enum metakey { | ||||
|     MetaFontName, | ||||
|     MetaParent, | ||||
|     MetaExifMake, | ||||
|     MetaExifDescription, | ||||
|     MetaExifSoftware, | ||||
|     MetaExifExposureTime, | ||||
|     MetaExifFNumber, | ||||
|  | ||||
							
								
								
									
										2
									
								
								third-party/libscan/libscan/util.h
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								third-party/libscan/libscan/util.h
									
									
									
									
										vendored
									
									
								
							| @ -7,7 +7,7 @@ | ||||
| #include "../third-party/utf8.h/utf8.h" | ||||
| #include "macros.h" | ||||
| 
 | ||||
| #define STR_STARTS_WITH(x, y) (strncmp(y, x, sizeof(y) - 1) == 0) | ||||
| #define STR_STARTS_WITH_CONSTANT(x, y) (strncmp(y, x, sizeof(y) - 1) == 0) | ||||
| 
 | ||||
| #define TEXT_BUF_FULL (-1) | ||||
| #define INITIAL_BUF_SIZE (1024 * 16) | ||||
|  | ||||
							
								
								
									
										17
									
								
								third-party/libscan/test/main.cpp
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										17
									
								
								third-party/libscan/test/main.cpp
									
									
									
									
										vendored
									
									
								
							| @ -227,7 +227,7 @@ TEST(Ebook, Utf8Pdf) { | ||||
| 
 | ||||
|     parse_ebook(&ebook_500_ctx, &f, "application/pdf", &doc); | ||||
| 
 | ||||
|     ASSERT_TRUE(STR_STARTS_WITH(get_meta(&doc, MetaContent)->str_val, "最後測試 ")); | ||||
|     ASSERT_TRUE(STR_STARTS_WITH_CONSTANT(get_meta(&doc, MetaContent)->str_val, "最後測試 ")); | ||||
|     cleanup(&doc, &f); | ||||
| } | ||||
| 
 | ||||
| @ -245,7 +245,7 @@ TEST(Ebook, Utf8PdfInvalidChars) { | ||||
|     // It should say "HART is a group of highly qualified ..." but the PDF
 | ||||
|     //  text is been intentionally fucked with by the authors
 | ||||
|     // We can at least filter out the non-printable/invalid characters like '<27>' etc
 | ||||
|     ASSERT_TRUE(STR_STARTS_WITH(get_meta(&doc, MetaContent)->str_val, "HART i a g f highl alified ")); | ||||
|     ASSERT_TRUE(STR_STARTS_WITH_CONSTANT(get_meta(&doc, MetaContent)->str_val, "HART i a g f highl alified ")); | ||||
|     cleanup(&doc, &f); | ||||
| } | ||||
| 
 | ||||
| @ -780,6 +780,19 @@ TEST(Arc, EncryptedZip) { | ||||
| } | ||||
| 
 | ||||
| /* RAW */ | ||||
| TEST(RAW, Segfault1) { | ||||
|     vfile_t f; | ||||
|     document_t doc; | ||||
|     load_doc_file("libscan-test-files/test_files/raw/segfault1.dng", &f, &doc); | ||||
| 
 | ||||
|     parse_raw(&raw_ctx, &f, &doc); | ||||
| 
 | ||||
|     ASSERT_EQ(get_meta(&doc, MetaWidth)->long_val, 3840); | ||||
|     ASSERT_EQ(get_meta(&doc, MetaHeight)->long_val, 7680); | ||||
| 
 | ||||
|     cleanup(&doc, &f); | ||||
| } | ||||
| 
 | ||||
| TEST(RAW, Panasonic) { | ||||
|     vfile_t f; | ||||
|     document_t doc; | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user