mirror of
				https://github.com/simon987/sist2.git
				synced 2025-10-30 23:46:52 +00:00 
			
		
		
		
	
						commit
						7493dedc8c
					
				| @ -10,7 +10,7 @@ steps: | ||||
|   - name: build | ||||
|     image: simon987/sist2-build | ||||
|     commands: | ||||
|       - ./ci/build.sh | ||||
|       - ./scripts/build.sh | ||||
|   - name: docker | ||||
|     image: plugins/docker | ||||
|     settings: | ||||
| @ -55,7 +55,7 @@ steps: | ||||
|   - name: build | ||||
|     image: simon987/sist2-build-arm64 | ||||
|     commands: | ||||
|       - ./ci/build_arm64.sh | ||||
|       - ./scripts/build_arm64.sh | ||||
|   - name: scp files | ||||
|     image: appleboy/drone-scp | ||||
|     settings: | ||||
|  | ||||
							
								
								
									
										4
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										4
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							| @ -10,13 +10,13 @@ Makefile | ||||
| LOG | ||||
| sist2* | ||||
| !sist2-vue/ | ||||
| index.sist2/ | ||||
| *.sist2/ | ||||
| bundle*.css | ||||
| bundle.js | ||||
| *.a | ||||
| vgcore.* | ||||
| build/ | ||||
| third-party/ | ||||
| third-party/argparse | ||||
| *.idx/ | ||||
| VERSION | ||||
| git_hash.h | ||||
|  | ||||
							
								
								
									
										8
									
								
								.gitmodules
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										8
									
								
								.gitmodules
									
									
									
									
										vendored
									
									
								
							| @ -3,4 +3,10 @@ | ||||
| 	url = https://github.com/simon987/libscan | ||||
| [submodule "third-party/argparse"] | ||||
| 	path = third-party/argparse | ||||
| 	url = https://github.com/cofyc/argparse | ||||
| 	url = https://github.com/simon987/argparse | ||||
| [submodule "third-party/libscan/third-party/utf8.h"] | ||||
| 	path = third-party/libscan/third-party/utf8.h | ||||
| 	url = https://github.com/sheredom/utf8.h | ||||
| [submodule "third-party/libscan/third-party/antiword"] | ||||
| 	path = third-party/libscan/third-party/antiword | ||||
| 	url = https://github.com/simon987/antiword | ||||
|  | ||||
							
								
								
									
										10
									
								
								Dockerfile
									
									
									
									
									
								
							
							
						
						
									
										10
									
								
								Dockerfile
									
									
									
									
									
								
							| @ -6,12 +6,10 @@ COPY . . | ||||
| RUN cmake -DSIST_PLATFORM=x64_linux -DSIST_DEBUG=off -DBUILD_TESTS=off -DCMAKE_TOOLCHAIN_FILE=/vcpkg/scripts/buildsystems/vcpkg.cmake . | ||||
| RUN make -j$(nproc) | ||||
| RUN strip sist2 | ||||
| RUN ls -lh | ||||
| RUN ls -lh sist2-vue/dist/ | ||||
| 
 | ||||
| FROM ubuntu:20.10 | ||||
| FROM ubuntu:21.10 | ||||
| 
 | ||||
| RUN apt update && apt install -y curl libasan5 | ||||
| RUN apt update && apt install -y curl libasan5 && rm -rf /var/lib/apt/lists/* | ||||
| 
 | ||||
| RUN mkdir -p /usr/share/tessdata && \ | ||||
|     cd /usr/share/tessdata/ && \ | ||||
| @ -22,9 +20,9 @@ RUN mkdir -p /usr/share/tessdata && \ | ||||
|     curl -o /usr/share/tessdata/rus.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/rus.traineddata &&\ | ||||
|     curl -o /usr/share/tessdata/spa.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/spa.traineddata | ||||
| 
 | ||||
| COPY --from=build /build/sist2 /root/sist2 | ||||
| ENTRYPOINT ["/root/sist2"] | ||||
| 
 | ||||
| ENV LANG C.UTF-8 | ||||
| ENV LC_ALL C.UTF-8 | ||||
| 
 | ||||
| ENTRYPOINT ["/root/sist2"] | ||||
| COPY --from=build /build/sist2 /root/sist2 | ||||
|  | ||||
| @ -7,9 +7,9 @@ RUN cmake -DSIST_PLATFORM=arm64_linux -DSIST_DEBUG=off -DBUILD_TESTS=off -DCMAKE | ||||
| RUN make -j$(nproc) | ||||
| RUN strip sist2 | ||||
| 
 | ||||
| FROM ubuntu:20.10 | ||||
| FROM --platform linux/arm64/v8 ubuntu:21.10 | ||||
| 
 | ||||
| RUN apt update && apt install -y curl libasan5 | ||||
| RUN apt update && apt install -y curl libasan5 && rm -rf /var/lib/apt/lists/* | ||||
| 
 | ||||
| RUN mkdir -p /usr/share/tessdata && \ | ||||
|     cd /usr/share/tessdata/ && \ | ||||
| @ -20,9 +20,9 @@ RUN mkdir -p /usr/share/tessdata && \ | ||||
|     curl -o /usr/share/tessdata/rus.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/rus.traineddata &&\ | ||||
|     curl -o /usr/share/tessdata/spa.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/spa.traineddata | ||||
| 
 | ||||
| COPY --from=build /build/sist2 /root/sist2 | ||||
| 
 | ||||
| ENV LANG C.UTF-8 | ||||
| ENV LC_ALL C.UTF-8 | ||||
| 
 | ||||
| ENTRYPOINT ["/root/sist2"] | ||||
| 
 | ||||
| COPY --from=build /build/sist2 /root/sist2 | ||||
| @ -2,7 +2,7 @@ | ||||
| [](https://www.codefactor.io/repository/github/simon987/sist2) | ||||
| [](https://files.simon987.net/.gate/sist2/simon987_sist2/) | ||||
| 
 | ||||
| **Demo**: [sist2.simon987.net](https://sist2.simon987.net/?i=Demo%20files) | ||||
| **Demo**: [sist2.simon987.net](https://sist2.simon987.net/) | ||||
| 
 | ||||
| # sist2 | ||||
| 
 | ||||
| @ -33,12 +33,11 @@ sist2 (Simple incremental search tool) | ||||
| 
 | ||||
| ## Getting Started | ||||
| 
 | ||||
| 1. Have an Elasticsearch (>= 6.X.X) instance running | ||||
| 1. Have an Elasticsearch (>= 6.8.X, ideally >=7.14.0) instance running | ||||
|     1. Download [from official website](https://www.elastic.co/downloads/elasticsearch) | ||||
|     1. *(or)* Run using docker: | ||||
|         ```bash | ||||
|        docker run -d --name es1 --net sist2_net -p 9200:9200 \ | ||||
|             -e "discovery.type=single-node" elasticsearch:7.14.0 | ||||
|         docker run -d -p 9200:9200 -e "discovery.type=single-node" elasticsearch:7.14.0 | ||||
|         ``` | ||||
|     1. *(or)* Run using docker-compose: | ||||
|         ```yaml | ||||
| @ -52,7 +51,7 @@ sist2 (Simple incremental search tool) | ||||
|     1. Download the [latest sist2 release](https://github.com/simon987/sist2/releases) * | ||||
|     1. *(or)* Download a [development snapshot](https://files.simon987.net/.gate/sist2/simon987_sist2/) *(Not | ||||
|        recommended!)* | ||||
|     1. *(or)* `docker pull simon987/sist2:2.11.3-x64-linux` | ||||
|     1. *(or)* `docker pull simon987/sist2:2.11.4-x64-linux` | ||||
| 
 | ||||
| 1. See [Usage guide](docs/USAGE.md) | ||||
| 
 | ||||
|  | ||||
| @ -14,6 +14,7 @@ | ||||
|     * [examples](#web-examples) | ||||
|     * [rewrite_url](#rewrite_url) | ||||
|     * [link to specific indices](#link-to-specific-indices) | ||||
| * [elasticsearch](#elasticsearch) | ||||
| * [exec-script](#exec-script) | ||||
| * [tagging](#tagging) | ||||
| * [sidecar files](#sidecar-files) | ||||
| @ -266,9 +267,20 @@ sist2 web index1 index2 index3 index4 | ||||
| When the `rewrite_url` field is not empty, the web module ignores the `root` | ||||
| field and will return a HTTP redirect to `<rewrite_url><path>/<name><extension>` | ||||
| instead of serving the file from disk.  | ||||
| Both the `root` and `rewrite_url` fields are safe to manually modify from the  | ||||
| Both the `root` and `rewrite_url` fields are safe to manually modify from the  | ||||
| `descriptor.json` file. | ||||
| 
 | ||||
| # Elasticsearch | ||||
| 
 | ||||
| Elasticsearch versions >=6.8.0, <8.0.0 are supported by sist2.  | ||||
| 
 | ||||
| Using a version >=7.14.0 is recommended to enable the following features: | ||||
| 
 | ||||
| - Bug fix for large documents (See #198) | ||||
| 
 | ||||
| When using a legacy version of ES, a notice will be displayed next to the sist2 version in the web UI. | ||||
| If you don't care about the features above, you can ignore it or disable it in the configuration page. | ||||
| 
 | ||||
| ## exec-script | ||||
| 
 | ||||
| The `exec-script` command is used to execute a user script for an index that has already been imported to Elasticsearch with the `index` command. Note that the documents will not be reset to their default state before each execution as the `index` command does: if you make undesired changes to the documents by accident, you will need to run `index` again to revert to the original state. | ||||
|  | ||||
| @ -78,6 +78,7 @@ | ||||
|     "name": { | ||||
|       "analyzer": "content_analyzer", | ||||
|       "type": "text", | ||||
|       "fielddata": true, | ||||
|       "fields": { | ||||
|         "nGram": { | ||||
|           "type": "text", | ||||
|  | ||||
							
								
								
									
										58
									
								
								schema/settings_legacy.json
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										58
									
								
								schema/settings_legacy.json
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,58 @@ | ||||
| { | ||||
|   "index": { | ||||
|     "refresh_interval": "30s", | ||||
|     "codec": "best_compression", | ||||
|     "number_of_replicas": 0 | ||||
|   }, | ||||
|   "analysis": { | ||||
|     "tokenizer": { | ||||
|       "path_tokenizer": { | ||||
|         "type": "path_hierarchy", | ||||
|         "delimiter": "/" | ||||
|       }, | ||||
|       "tag_tokenizer": { | ||||
|         "type": "path_hierarchy", | ||||
|         "delimiter": "." | ||||
|       }, | ||||
|       "my_nGram_tokenizer": { | ||||
|         "type": "nGram", | ||||
|         "min_gram": 3, | ||||
|         "max_gram": 3 | ||||
|       } | ||||
|     }, | ||||
|     "analyzer": { | ||||
|       "path_analyzer": { | ||||
|         "tokenizer": "path_tokenizer", | ||||
|         "filter": [ | ||||
|           "lowercase" | ||||
|         ] | ||||
|       }, | ||||
|       "tag_analyzer": { | ||||
|         "tokenizer": "tag_tokenizer", | ||||
|         "filter": [ | ||||
|           "lowercase" | ||||
|         ] | ||||
|       }, | ||||
|       "case_insensitive_kw_analyzer": { | ||||
|         "tokenizer": "keyword", | ||||
|         "filter": [ | ||||
|           "lowercase" | ||||
|         ] | ||||
|       }, | ||||
|       "my_nGram": { | ||||
|         "tokenizer": "my_nGram_tokenizer", | ||||
|         "filter": [ | ||||
|           "lowercase", | ||||
|           "asciifolding" | ||||
|         ] | ||||
|       }, | ||||
|       "content_analyzer": { | ||||
|         "tokenizer": "standard", | ||||
|         "filter": [ | ||||
|           "lowercase", | ||||
|           "asciifolding" | ||||
|         ] | ||||
|       } | ||||
|     } | ||||
|   } | ||||
| } | ||||
| @ -3,6 +3,7 @@ import json | ||||
| files = [ | ||||
|     "schema/mappings.json", | ||||
|     "schema/settings.json", | ||||
|     "schema/settings_legacy.json", | ||||
|     "schema/pipeline.json", | ||||
| ] | ||||
| 
 | ||||
|  | ||||
							
								
								
									
										2
									
								
								sist2-vue/dist/css/index.css
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								sist2-vue/dist/css/index.css
									
									
									
									
										vendored
									
									
								
							
										
											
												File diff suppressed because one or more lines are too long
											
										
									
								
							
							
								
								
									
										2
									
								
								sist2-vue/dist/js/index.js
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								sist2-vue/dist/js/index.js
									
									
									
									
										vendored
									
									
								
							
										
											
												File diff suppressed because one or more lines are too long
											
										
									
								
							| @ -51,6 +51,7 @@ export interface EsHit { | ||||
|         duration: number | ||||
|         tag: string[] | ||||
|         checksum: string | ||||
|         thumbnail: string | ||||
|     } | ||||
|     _props: { | ||||
|         isSubDocument: boolean | ||||
| @ -61,6 +62,8 @@ export interface EsHit { | ||||
|         isPlayableImage: boolean | ||||
|         isAudio: boolean | ||||
|         hasThumbnail: boolean | ||||
|         tnW: number | ||||
|         tnH: number | ||||
|     } | ||||
|     highlight: { | ||||
|         name: string[] | undefined, | ||||
| @ -131,6 +134,8 @@ class Sist2Api { | ||||
| 
 | ||||
|         if ("thumbnail" in hit._source) { | ||||
|             hit._props.hasThumbnail = true; | ||||
|             hit._props.tnW = Number(hit._source.thumbnail.split(",")[0]); | ||||
|             hit._props.tnH = Number(hit._source.thumbnail.split(",")[1]); | ||||
|         } | ||||
| 
 | ||||
|         switch (mimeCategory) { | ||||
|  | ||||
| @ -43,6 +43,20 @@ const SORT_MODES = { | ||||
|             {_tie: {order: "asc"}} | ||||
|         ], | ||||
|         key: (hit: EsHit) => hit._source.size | ||||
|     }, | ||||
|     nameAsc: { | ||||
|         mode: [ | ||||
|             {name: {order: "asc"}}, | ||||
|             {_tie: {order: "asc"}} | ||||
|         ], | ||||
|         key: (hit: EsHit) => hit._source.name | ||||
|     }, | ||||
|     nameDesc: { | ||||
|         mode: [ | ||||
|             {name: {order: "desc"}}, | ||||
|             {_tie: {order: "asc"}} | ||||
|         ], | ||||
|         key: (hit: EsHit) => hit._source.name | ||||
|     } | ||||
| } as any; | ||||
| 
 | ||||
| @ -73,6 +87,8 @@ class Sist2Query { | ||||
|         const selectedMimeTypes = getters.selectedMimeTypes; | ||||
|         const selectedTags = getters.selectedTags; | ||||
| 
 | ||||
|         const legacyES = store.state.sist2Info.esVersionLegacy; | ||||
| 
 | ||||
|         const filters = [ | ||||
|             {terms: {index: selectedIndexIds}} | ||||
|         ] as any[]; | ||||
| @ -187,9 +203,13 @@ class Sist2Query { | ||||
|                     "name.nGram": {}, | ||||
|                     "content.nGram": {}, | ||||
|                     font_name: {}, | ||||
|                 }, | ||||
|                 max_analyzed_offset: 9_999_999 | ||||
|                 } | ||||
|             }; | ||||
| 
 | ||||
|             if (!legacyES) { | ||||
|                 q.highlight.max_analyzed_offset = 9_999_999; | ||||
|             } | ||||
| 
 | ||||
|             if (getters.optSearchInPath) { | ||||
|                 q.highlight.fields["path.text"] = {}; | ||||
|                 q.highlight.fields["path.nGram"] = {}; | ||||
|  | ||||
| @ -5,7 +5,6 @@ | ||||
| 
 | ||||
|     <b-card-body> | ||||
| 
 | ||||
|       <!-- TODO: ES connectivity, Link to GH page --> | ||||
|       <b-table :items="tableItems" small borderless responsive="md" thead-class="hidden" class="mb-0"></b-table> | ||||
| 
 | ||||
|       <hr /> | ||||
| @ -32,6 +31,9 @@ export default { | ||||
|         {key: "esIndex", value: this.$store.state.sist2Info.esIndex}, | ||||
|         {key: "tagline", value: this.$store.state.sist2Info.tagline}, | ||||
|         {key: "dev", value: this.$store.state.sist2Info.dev}, | ||||
|         {key: "esVersion", value: this.$store.state.sist2Info.esVersion}, | ||||
|         {key: "esVersionSupported", value: this.$store.state.sist2Info.esVersionSupported}, | ||||
|         {key: "esVersionLegacy", value: this.$store.state.sist2Info.esVersionLegacy}, | ||||
|       ] | ||||
|     } | ||||
|   } | ||||
|  | ||||
| @ -15,11 +15,15 @@ | ||||
|           <span class="badge badge-resolution">{{ humanTime(doc._source.duration) }}</span> | ||||
|         </div> | ||||
| 
 | ||||
|         <div v-if="doc._props.isImage && !hover" class="card-img-overlay" :class="{'small-badge': smallBadge}"> | ||||
|         <div | ||||
|             v-if="doc._props.isImage && !hover && doc._props.tnW / doc._props.tnH < 5" | ||||
|             class="card-img-overlay" | ||||
|             :class="{'small-badge': smallBadge}"> | ||||
|           <span class="badge badge-resolution">{{ `${doc._source.width}x${doc._source.height}` }}</span> | ||||
|         </div> | ||||
| 
 | ||||
|         <div v-if="(doc._props.isVideo || doc._props.isGif) && doc._source.duration > 0 && !hover" class="card-img-overlay" | ||||
|         <div v-if="(doc._props.isVideo || doc._props.isGif) && doc._source.duration > 0 && !hover" | ||||
|              class="card-img-overlay" | ||||
|              :class="{'small-badge': smallBadge}"> | ||||
|           <span class="badge badge-resolution">{{ humanTime(doc._source.duration) }}</span> | ||||
|         </div> | ||||
| @ -39,7 +43,8 @@ | ||||
|       </div> | ||||
| 
 | ||||
|       <!-- Audio player--> | ||||
|       <audio v-if="doc._props.isAudio" ref="audio" preload="none" class="audio-fit fit" controls :type="doc._source.mime" | ||||
|       <audio v-if="doc._props.isAudio" ref="audio" preload="none" class="audio-fit fit" controls | ||||
|              :type="doc._source.mime" | ||||
|              :src="`f/${doc._id}`" | ||||
|              @play="onAudioPlay()"></audio> | ||||
| 
 | ||||
|  | ||||
| @ -7,11 +7,27 @@ | ||||
|         value-field="id"></b-form-select> | ||||
|   </div> | ||||
|   <div v-else> | ||||
|     <b-list-group id="index-picker-desktop"> | ||||
| 
 | ||||
|     <div class="d-flex justify-content-between align-content-center"> | ||||
|       <span> | ||||
|         {{ selectedIndices.length }} | ||||
|         {{ selectedIndices.length === 1 ? $t("indexPicker.selectedIndex") : $t("indexPicker.selectedIndices") }} | ||||
|       </span> | ||||
| 
 | ||||
|       <div> | ||||
|         <b-button variant="link" @click="selectAll()"> {{ $t("indexPicker.selectAll") }}</b-button> | ||||
|         <b-button variant="link" @click="selectNone()"> {{ $t("indexPicker.selectNone") }}</b-button> | ||||
|       </div> | ||||
|     </div> | ||||
| 
 | ||||
|     <b-list-group id="index-picker-desktop" class="unselectable"> | ||||
|       <b-list-group-item | ||||
|           v-for="idx in indices" | ||||
|           @click="toggleIndex(idx)" | ||||
|           class="d-flex justify-content-between align-items-center list-group-item-action pointer"> | ||||
|           @click="toggleIndex(idx, $event)" | ||||
|           @click.shift="shiftClick(idx, $event)" | ||||
|           class="d-flex justify-content-between align-items-center list-group-item-action pointer" | ||||
|           :class="{active: lastClickIndex === idx}" | ||||
|       > | ||||
|         <div class="d-flex"> | ||||
|           <b-checkbox @change="toggleIndex(idx)" :checked="isSelected(idx)"></b-checkbox> | ||||
|           {{ idx.name }} | ||||
| @ -36,6 +52,7 @@ export default Vue.extend({ | ||||
|   data() { | ||||
|     return { | ||||
|       loading: true, | ||||
|       lastClickIndex: null | ||||
|     } | ||||
|   }, | ||||
|   computed: { | ||||
| @ -53,13 +70,50 @@ export default Vue.extend({ | ||||
|     ...mapActions({ | ||||
|       setSelectedIndices: "setSelectedIndices" | ||||
|     }), | ||||
|     shiftClick(index, e) { | ||||
|       if (this.lastClickIndex === null) { | ||||
|         return; | ||||
|       } | ||||
| 
 | ||||
|       const select = this.isSelected(this.lastClickIndex); | ||||
| 
 | ||||
|       let leftBoundary = this.indices.indexOf(this.lastClickIndex); | ||||
|       let rightBoundary = this.indices.indexOf(index); | ||||
| 
 | ||||
|       if (rightBoundary < leftBoundary) { | ||||
|         let tmp = leftBoundary; | ||||
|         leftBoundary = rightBoundary; | ||||
|         rightBoundary = tmp; | ||||
|       } | ||||
| 
 | ||||
|       for (let i = leftBoundary; i <= rightBoundary; i++) { | ||||
|         if (select) { | ||||
|           if (!this.isSelected(this.indices[i])) { | ||||
|             this.setSelectedIndices([this.indices[i], ...this.selectedIndices]); | ||||
|           } | ||||
|         } else { | ||||
|           this.setSelectedIndices(this.selectedIndices.filter(idx => idx !== this.indices[i])); | ||||
|         } | ||||
|       } | ||||
|     }, | ||||
|     selectAll() { | ||||
|       this.setSelectedIndices(this.indices); | ||||
|     }, | ||||
|     selectNone() { | ||||
|       this.setSelectedIndices([]); | ||||
|     }, | ||||
|     onSelect(value) { | ||||
|       this.setSelectedIndices(this.indices.filter(idx => value.includes(idx.id))); | ||||
|     }, | ||||
|     formatIdxDate(timestamp: number): string { | ||||
|       return format(new Date(timestamp * 1000), "yyyy-MM-dd"); | ||||
|     }, | ||||
|     toggleIndex(index) { | ||||
|     toggleIndex(index, e) { | ||||
|       if (e.shiftKey) { | ||||
|         return; | ||||
|       } | ||||
| 
 | ||||
|       this.lastClickIndex = index; | ||||
|       if (this.isSelected(index)) { | ||||
|         this.setSelectedIndices(this.selectedIndices.filter(idx => idx.id != index.id)); | ||||
|       } else { | ||||
| @ -92,4 +146,21 @@ export default Vue.extend({ | ||||
|   overflow-y: auto; | ||||
|   max-height: 132px; | ||||
| } | ||||
| 
 | ||||
| .btn-link:focus { | ||||
|   box-shadow: none; | ||||
| } | ||||
| 
 | ||||
| .unselectable { | ||||
|   user-select: none; | ||||
|   -ms-user-select: none; | ||||
|   -moz-user-select: none; | ||||
|   -webkit-user-select: none; | ||||
| } | ||||
| 
 | ||||
| .list-group-item.active { | ||||
|   z-index: 2; | ||||
|   background-color: inherit; | ||||
|   color: inherit; | ||||
| } | ||||
| </style> | ||||
| @ -21,6 +21,9 @@ export default { | ||||
|       if (mutation.type === "setUiMimeMap") { | ||||
|         const mimeMap = mutation.payload.slice(); | ||||
| 
 | ||||
|         const elem = document.getElementById("mimeTree"); | ||||
|         console.log(elem); | ||||
| 
 | ||||
|         this.mimeTree = new InspireTree({ | ||||
|           selection: { | ||||
|             mode: 'checkbox' | ||||
|  | ||||
| @ -8,7 +8,8 @@ | ||||
|     </b-navbar-brand> | ||||
| 
 | ||||
|     <span class="badge badge-pill version" v-if="$store && $store.state.sist2Info"> | ||||
|       v{{ sist2Version() }}<span v-if="isDebug()">-dbg</span> | ||||
|       v{{ sist2Version() }}<span v-if="isDebug()">-dbg</span><span v-if="isLegacy() && !hideLegacy()">-<a | ||||
|         href="https://github.com/simon987/sist2/blob/master/docs/USAGE.md#elasticsearch" target="_blank">legacyES</a></span> | ||||
|     </span> | ||||
| 
 | ||||
|     <span v-if="$store && $store.state.sist2Info" class="tagline" v-html="tagline()"></span> | ||||
| @ -20,6 +21,7 @@ | ||||
| 
 | ||||
| <script> | ||||
| import Sist2Icon from "@/components/Sist2Icon"; | ||||
| 
 | ||||
| export default { | ||||
|   name: "NavBar", | ||||
|   components: {Sist2Icon}, | ||||
| @ -32,6 +34,12 @@ export default { | ||||
|     }, | ||||
|     isDebug() { | ||||
|       return this.$store.state.sist2Info.debug; | ||||
|     }, | ||||
|     isLegacy() { | ||||
|       return this.$store.state.sist2Info.esVersionLegacy; | ||||
|     }, | ||||
|     hideLegacy() { | ||||
|       return this.$store.state.optHideLegacy; | ||||
|     } | ||||
|   } | ||||
| } | ||||
| @ -95,7 +103,7 @@ export default { | ||||
|   } | ||||
| } | ||||
| 
 | ||||
| .theme-light .btn-link{ | ||||
| .theme-light .btn-link { | ||||
|   color: #222; | ||||
| } | ||||
| </style> | ||||
| @ -5,9 +5,11 @@ | ||||
|     <div style="float: right"> | ||||
|       <b-button v-b-toggle.collapse-1 variant="primary" class="not-mobile">{{ $t("details") }}</b-button> | ||||
| 
 | ||||
|       <template v-if="hitCount !== 0"> | ||||
|         <SortSelect class="ml-2"></SortSelect> | ||||
| 
 | ||||
|         <DisplayModeToggle class="ml-2"></DisplayModeToggle> | ||||
|       </template> | ||||
|     </div> | ||||
| 
 | ||||
|     <b-collapse id="collapse-1" class="pt-2" style="clear:both;"> | ||||
| @ -21,7 +23,7 @@ | ||||
| <script lang="ts"> | ||||
| import {EsResult} from "@/Sist2Api"; | ||||
| import Vue from "vue"; | ||||
| import {humanFileSize, humanTime} from "@/util"; | ||||
| import {humanFileSize} from "@/util"; | ||||
| import DisplayModeToggle from "@/components/DisplayModeToggle.vue"; | ||||
| import SortSelect from "@/components/SortSelect.vue"; | ||||
| 
 | ||||
|  | ||||
| @ -19,6 +19,14 @@ | ||||
|       {{ $t("sort.sizeDesc") }} | ||||
|     </b-dropdown-item> | ||||
| 
 | ||||
|     <b-dropdown-item :class="{'dropdown-active': sort === 'nameDesc'}" @click="onSelect('nameDesc')"> | ||||
|       {{ $t("sort.nameDesc") }} | ||||
|     </b-dropdown-item> | ||||
| 
 | ||||
|     <b-dropdown-item :class="{'dropdown-active': sort === 'nameAsc'}" @click="onSelect('nameAsc')"> | ||||
|       {{ $t("sort.nameAsc") }} | ||||
|     </b-dropdown-item> | ||||
| 
 | ||||
|     <b-dropdown-item :class="{'dropdown-active': sort === 'random'}" @click="onSelect('random')"> | ||||
|       {{ $t("sort.random") }} | ||||
|     </b-dropdown-item> | ||||
|  | ||||
| @ -63,7 +63,8 @@ export default { | ||||
|             slideDuration: "Slide duration", | ||||
|             resultSize: "Number of results per page", | ||||
|             tagOrOperator: "Use OR operator when specifying multiple tags.", | ||||
|             hideDuplicates: "Hide duplicate results based on checksum" | ||||
|             hideDuplicates: "Hide duplicate results based on checksum", | ||||
|             hideLegacy: "Hide the 'legacyES' Elasticsearch notice" | ||||
|         }, | ||||
|         queryMode: { | ||||
|             simple: "Simple", | ||||
| @ -130,13 +131,14 @@ export default { | ||||
|         saveTagModalTitle: "Add tag", | ||||
|         saveTagPlaceholder: "Tag name", | ||||
|         confirm: "Confirm", | ||||
|         indexPickerPlaceholder: "Select indices", | ||||
|         sort: { | ||||
|             relevance: "Relevance", | ||||
|             dateAsc: "Date (Older first)", | ||||
|             dateDesc: "Date (Newer first)", | ||||
|             sizeAsc: "Size (Smaller first)", | ||||
|             sizeDesc: "Size (Larger first)", | ||||
|             nameAsc: "Name (A-z)", | ||||
|             nameDesc: "Name (Z-a)", | ||||
|             random: "Random", | ||||
|         }, | ||||
|         d3: { | ||||
| @ -144,7 +146,13 @@ export default { | ||||
|             mimeSize: "Size distribution by media type", | ||||
|             dateHistogram: "File modification time distribution", | ||||
|             sizeHistogram: "File size distribution", | ||||
|         } | ||||
|         }, | ||||
|         indexPicker: { | ||||
|             selectNone: "Select None", | ||||
|             selectAll: "Select All", | ||||
|             selectedIndex: "selected index", | ||||
|             selectedIndices: "selected indices", | ||||
|         }, | ||||
|     }, | ||||
|     fr: { | ||||
|         searchBar: { | ||||
| @ -211,7 +219,8 @@ export default { | ||||
|             slideDuration: "Durée des diapositives", | ||||
|             resultSize: "Nombre de résultats par page", | ||||
|             tagOrOperator: "Utiliser l'opérateur OU lors de la spécification de plusieurs tags", | ||||
|             hideDuplicates: "Masquer les résultats en double" | ||||
|             hideDuplicates: "Masquer les résultats en double", | ||||
|             hideLegacy: "Masquer la notice 'legacyES' Elasticsearch" | ||||
|         }, | ||||
|         queryMode: { | ||||
|             simple: "Simple", | ||||
| @ -286,6 +295,8 @@ export default { | ||||
|             dateDesc: "Date (Plus récent)", | ||||
|             sizeAsc: "Taille (Plus petit)", | ||||
|             sizeDesc: "Taille (Plus grand)", | ||||
|             nameAsc: "Nom (A-z)", | ||||
|             nameDesc: "Nom (Z-a)", | ||||
|             random: "Aléatoire", | ||||
|         }, | ||||
|         d3: { | ||||
| @ -293,6 +304,12 @@ export default { | ||||
|             mimeSize: "Distribution des tailles de fichiers par type de média", | ||||
|             dateHistogram: "Distribution des dates de modification", | ||||
|             sizeHistogram: "Distribution des tailles de fichier", | ||||
|         } | ||||
|         }, | ||||
|         indexPicker: { | ||||
|             selectNone: "Sélectionner aucun", | ||||
|             selectAll: "Sélectionner tout", | ||||
|             selectedIndex: "indice sélectionné", | ||||
|             selectedIndices: "indices sélectionnés", | ||||
|         }, | ||||
|     } | ||||
| } | ||||
| @ -46,6 +46,7 @@ export default new Vuex.Store({ | ||||
|         optTreemapColor: "PuBuGn", | ||||
|         optLightboxLoadOnlyCurrent: false, | ||||
|         optLightboxSlideDuration: 15, | ||||
|         optHideLegacy: false, | ||||
| 
 | ||||
|         _onLoadSelectedIndices: [] as string[], | ||||
|         _onLoadSelectedMimeTypes: [] as string[], | ||||
| @ -144,6 +145,7 @@ export default new Vuex.Store({ | ||||
|         setOptTreemapColorGroupingDepth: (state, val) => state.optTreemapColorGroupingDepth = val, | ||||
|         setOptTreemapSize: (state, val) => state.optTreemapSize = val, | ||||
|         setOptTreemapColor: (state, val) => state.optTreemapColor = val, | ||||
|         setOptHideLegacy: (state, val) => state.optHideLegacy = val, | ||||
| 
 | ||||
|         setOptLightboxLoadOnlyCurrent: (state, val) => state.optLightboxLoadOnlyCurrent = val, | ||||
| 
 | ||||
| @ -339,5 +341,6 @@ export default new Vuex.Store({ | ||||
|         optLightboxLoadOnlyCurrent: state => state.optLightboxLoadOnlyCurrent, | ||||
|         optLightboxSlideDuration: state => state.optLightboxSlideDuration, | ||||
|         optResultSize: state => state.size, | ||||
|         optHideLegacy: state => state.optHideLegacy, | ||||
|     } | ||||
| }) | ||||
| @ -19,6 +19,10 @@ | ||||
|             {{ $t("opt.lightboxLoadOnlyCurrent") }} | ||||
|           </b-form-checkbox> | ||||
| 
 | ||||
|           <b-form-checkbox :checked="optHideLegacy" @input="setOptHideLegacy"> | ||||
|             {{ $t("opt.hideLegacy") }} | ||||
|           </b-form-checkbox> | ||||
| 
 | ||||
|           <label>{{ $t("opt.lang") }}</label> | ||||
|           <b-form-select :options="langOptions" :value="optLang" @input="setOptLang"></b-form-select> | ||||
| 
 | ||||
| @ -215,6 +219,7 @@ export default { | ||||
|       "optTagOrOperator", | ||||
|       "optLang", | ||||
|       "optHideDuplicates", | ||||
|       "optHideLegacy", | ||||
|     ]), | ||||
|     clientWidth() { | ||||
|       return window.innerWidth; | ||||
| @ -254,7 +259,8 @@ export default { | ||||
|       "setOptResultSize", | ||||
|       "setOptTagOrOperator", | ||||
|       "setOptLang", | ||||
|       "setOptHideDuplicates" | ||||
|       "setOptHideDuplicates", | ||||
|       "setOptHideLegacy" | ||||
|     ]), | ||||
|     onResetClick() { | ||||
|       localStorage.removeItem("sist2_configuration"); | ||||
|  | ||||
| @ -31,7 +31,7 @@ | ||||
|           </b-row> | ||||
|         </b-col> | ||||
|         <b-col> | ||||
|           <b-tabs> | ||||
|           <b-tabs justified> | ||||
|             <b-tab :title="$t('mimeTypes')"> | ||||
|               <MimePicker></MimePicker> | ||||
|             </b-tab> | ||||
| @ -43,9 +43,13 @@ | ||||
|       </b-row> | ||||
|     </b-card> | ||||
| 
 | ||||
|     <Preloader v-if="searchBusy && docs.length === 0" class="mt-3"></Preloader> | ||||
|     <div v-show="docs.length === 0 && !uiLoading"> | ||||
|       <Preloader v-if="searchBusy" class="mt-3"></Preloader> | ||||
| 
 | ||||
|     <div v-else-if="docs.length > 0"> | ||||
|       <ResultsCard></ResultsCard> | ||||
|     </div> | ||||
| 
 | ||||
|     <div v-if="docs.length > 0"> | ||||
|       <ResultsCard></ResultsCard> | ||||
| 
 | ||||
|       <DocCardWall v-if="optDisplay==='grid'" :docs="docs" :append="appendFunc"></DocCardWall> | ||||
| @ -109,10 +113,6 @@ export default Vue.extend({ | ||||
| 
 | ||||
|     }, 350, {leading: false}); | ||||
| 
 | ||||
|     Sist2Api.getMimeTypes().then(mimeMap => { | ||||
|       this.$store.commit("setUiMimeMap", mimeMap); | ||||
|     }); | ||||
| 
 | ||||
|     this.$store.dispatch("loadFromArgs", this.$route).then(() => { | ||||
|       this.$store.subscribe(() => this.$store.dispatch("updateArgs", this.$router)); | ||||
|       this.$store.subscribe((mutation) => { | ||||
| @ -138,9 +138,13 @@ export default Vue.extend({ | ||||
|       sist2.getSist2Info().then(data => { | ||||
|         this.setSist2Info(data); | ||||
|         this.setIndices(data.indices); | ||||
|         this.uiLoading = false; | ||||
| 
 | ||||
|         Sist2Api.getMimeTypes().then(mimeMap => { | ||||
|           this.$store.commit("setUiMimeMap", mimeMap); | ||||
|           this.uiLoading = false; | ||||
|           this.search(true); | ||||
|         }); | ||||
| 
 | ||||
|       }).catch(() => { | ||||
|         this.showErrorToast(); | ||||
|       }); | ||||
|  | ||||
| @ -2,6 +2,7 @@ | ||||
| 
 | ||||
| ScanCtx_t ScanCtx = { | ||||
|         .stat_index_size = 0, | ||||
|         .stat_tn_size = 0, | ||||
|         .dbg_current_files = NULL, | ||||
|         .pool = NULL | ||||
| }; | ||||
|  | ||||
| @ -17,6 +17,7 @@ | ||||
| #include "libscan/wpd/wpd.h" | ||||
| #include "libscan/json/json.h" | ||||
| #include "src/io/store.h" | ||||
| #include "src/index/elastic.h" | ||||
| 
 | ||||
| #include <glib.h> | ||||
| #include <pcre.h> | ||||
| @ -75,6 +76,7 @@ typedef struct { | ||||
| 
 | ||||
| typedef struct { | ||||
|     char *es_url; | ||||
|     es_version_t *es_version; | ||||
|     char *es_index; | ||||
|     int batch_size; | ||||
|     tpool_t *pool; | ||||
| @ -86,6 +88,7 @@ typedef struct { | ||||
| 
 | ||||
| typedef struct { | ||||
|     char *es_url; | ||||
|     es_version_t *es_version; | ||||
|     char *es_index; | ||||
|     int index_count; | ||||
|     char *auth_user; | ||||
|  | ||||
| @ -253,7 +253,7 @@ void _elastic_flush(int max) { | ||||
|     } else { | ||||
| 
 | ||||
|         print_errors(r); | ||||
|         LOG_INFOF("elastic.c", "Indexed %d documents (%zukB) <%d>", count, buf_len / 1024, r->status_code); | ||||
|         LOG_DEBUGF("elastic.c", "Indexed %d documents (%zukB) <%d>", count, buf_len / 1024, r->status_code); | ||||
|         delete_queue(max); | ||||
| 
 | ||||
|         if (Indexer->queued != 0) { | ||||
| @ -356,7 +356,65 @@ void finish_indexer(char *script, int async_script, char *index_id) { | ||||
|     free_response(r); | ||||
| } | ||||
| 
 | ||||
| void elastic_init(int force_reset, const char* user_mappings, const char* user_settings) { | ||||
| es_version_t *elastic_get_version(const char *es_url) { | ||||
|     response_t *r = web_get(es_url, 30); | ||||
| 
 | ||||
|     char *tmp = malloc(r->size + 1); | ||||
|     memcpy(tmp, r->body, r->size); | ||||
|     *(tmp + r->size) = '\0'; | ||||
|     cJSON *response = cJSON_Parse(tmp); | ||||
|     free(tmp); | ||||
|     free_response(r); | ||||
| 
 | ||||
|     if (response == NULL) { | ||||
|         return NULL; | ||||
|     } | ||||
| 
 | ||||
|     if (cJSON_GetObjectItem(response, "version") == NULL || | ||||
|         cJSON_GetObjectItem(cJSON_GetObjectItem(response, "version"), "number") == NULL) { | ||||
|         cJSON_Delete(response); | ||||
|         return NULL; | ||||
|     } | ||||
| 
 | ||||
|     char *version_str = cJSON_GetObjectItem(cJSON_GetObjectItem(response, "version"), "number")->valuestring; | ||||
| 
 | ||||
|     es_version_t *version = malloc(sizeof(es_version_t)); | ||||
| 
 | ||||
|     const char *tok = strtok(version_str, "."); | ||||
|     version->major = atoi(tok); | ||||
|     tok = strtok(NULL, "."); | ||||
|     version->minor = atoi(tok); | ||||
|     tok = strtok(NULL, "."); | ||||
|     version->patch = atoi(tok); | ||||
| 
 | ||||
|     cJSON_Delete(response); | ||||
| 
 | ||||
|     return version; | ||||
| } | ||||
| 
 | ||||
| void elastic_init(int force_reset, const char *user_mappings, const char *user_settings) { | ||||
| 
 | ||||
|     es_version_t *es_version = elastic_get_version(IndexCtx.es_url); | ||||
|     IndexCtx.es_version = es_version; | ||||
| 
 | ||||
|     if (es_version == NULL) { | ||||
|         LOG_FATAL("elastic.c", "Could not get ES version") | ||||
|     } | ||||
| 
 | ||||
|     LOG_INFOF("elastic.c", | ||||
|               "Elasticsearch version is %s (supported=%d, legacy=%d)", | ||||
|               format_es_version(es_version), IS_SUPPORTED_ES_VERSION(es_version), USE_LEGACY_ES_SETTINGS(es_version)); | ||||
| 
 | ||||
|     if (!IS_SUPPORTED_ES_VERSION(es_version)) { | ||||
|         LOG_FATAL("elastic.c", "sist2 only supports Elasticsearch v6.8 or newer") | ||||
|     } | ||||
| 
 | ||||
|     char *settings = NULL; | ||||
|     if (USE_LEGACY_ES_SETTINGS(es_version)) { | ||||
|         settings = settings_json; | ||||
|     } else { | ||||
|         settings = settings_legacy_json; | ||||
|     } | ||||
| 
 | ||||
|     // Check if index exists
 | ||||
|     char url[4096]; | ||||
| @ -392,7 +450,7 @@ void elastic_init(int force_reset, const char* user_mappings, const char* user_s | ||||
|         free_response(r); | ||||
| 
 | ||||
|         snprintf(url, sizeof(url), "%s/%s/_settings", IndexCtx.es_url, IndexCtx.es_index); | ||||
|         r = web_put(url, user_settings ? user_settings : settings_json); | ||||
|         r = web_put(url, user_settings ? user_settings : settings); | ||||
|         LOG_INFOF("elastic.c", "Update ES settings <%d>", r->status_code); | ||||
|         if (r->status_code != 200) { | ||||
|             print_error(r); | ||||
|  | ||||
| @ -9,6 +9,26 @@ typedef struct es_bulk_line { | ||||
|     char line[0]; | ||||
| } es_bulk_line_t; | ||||
| 
 | ||||
| typedef struct { | ||||
|     int major; | ||||
|     int minor; | ||||
|     int patch; | ||||
| } es_version_t; | ||||
| 
 | ||||
| #define VERSION_GE(version, maj, min) ((version)->major > (maj) || ((version)->major == (maj) && (version)->minor >= (min))) | ||||
| #define IS_SUPPORTED_ES_VERSION(es_version) VERSION_GE((es_version), 6, 8) | ||||
| #define USE_LEGACY_ES_SETTINGS(es_version) (!VERSION_GE((es_version), 7, 14)) | ||||
| 
 | ||||
| __always_inline | ||||
| static const char *format_es_version(es_version_t *version) { | ||||
|     static char buf[64]; | ||||
| 
 | ||||
|     snprintf(buf, sizeof(buf), "%d.%d.%d", version->major, version->minor, version->patch); | ||||
| 
 | ||||
|     return buf; | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| /**
 | ||||
|  * Note: indexer is *not* thread safe | ||||
|  */ | ||||
| @ -31,6 +51,8 @@ cJSON *elastic_get_document(const char *id_str); | ||||
| 
 | ||||
| char *elastic_get_status(); | ||||
| 
 | ||||
| es_version_t *elastic_get_version(const char *es_url); | ||||
| 
 | ||||
| void execute_update_script(const char *script, int async, const char index_id[MD5_STR_LENGTH]); | ||||
| 
 | ||||
| #endif | ||||
|  | ||||
							
								
								
									
										3
									
								
								src/index/static_generated.c
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										3
									
								
								src/index/static_generated.c
									
									
									
									
										vendored
									
									
								
							
										
											
												File diff suppressed because one or more lines are too long
											
										
									
								
							| @ -23,7 +23,6 @@ store_t *store_create(const char *path, size_t chunk_size) { | ||||
|     } | ||||
| 
 | ||||
|     store->size = (size_t) store->chunk_size; | ||||
|     ScanCtx.stat_tn_size = 0; | ||||
|     mdb_env_set_mapsize(store->env, store->size); | ||||
| 
 | ||||
|     // Open dbi
 | ||||
|  | ||||
| @ -43,26 +43,36 @@ int sub_strings[30]; | ||||
| 
 | ||||
| int handle_entry(const char *filepath, const struct stat *info, int typeflag, struct FTW *ftw) { | ||||
| 
 | ||||
|     if (typeflag == FTW_F && S_ISREG(info->st_mode) && ftw->level <= ScanCtx.depth) { | ||||
|     if (ftw->level > ScanCtx.depth) { | ||||
|         if (typeflag == FTW_D) { | ||||
|             return FTW_SKIP_SUBTREE; | ||||
|         } | ||||
|         return FTW_CONTINUE; | ||||
|     } | ||||
| 
 | ||||
|     if (ScanCtx.exclude != NULL && EXCLUDED(filepath)) { | ||||
|         LOG_DEBUGF("walk.c", "Excluded: %s", filepath) | ||||
| 
 | ||||
|         if (typeflag == FTW_F && S_ISREG(info->st_mode)) { | ||||
|             pthread_mutex_lock(&ScanCtx.dbg_file_counts_mu); | ||||
|             ScanCtx.dbg_excluded_files_count += 1; | ||||
|             pthread_mutex_unlock(&ScanCtx.dbg_file_counts_mu); | ||||
|             return 0; | ||||
|         } else if (typeflag == FTW_D) { | ||||
|             return FTW_SKIP_SUBTREE; | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     if (typeflag == FTW_F && S_ISREG(info->st_mode)) { | ||||
|         parse_job_t *job = create_fs_parse_job(filepath, info, ftw->base); | ||||
|         tpool_add_work(ScanCtx.pool, parse, job); | ||||
|     } | ||||
| 
 | ||||
|     return 0; | ||||
|     return FTW_CONTINUE; | ||||
| } | ||||
| 
 | ||||
| #define MAX_FILE_DESCRIPTORS 64 | ||||
| 
 | ||||
| int walk_directory_tree(const char *dirpath) { | ||||
|     return nftw(dirpath, handle_entry, MAX_FILE_DESCRIPTORS, FTW_PHYS | FTW_DEPTH); | ||||
|     return nftw(dirpath, handle_entry, MAX_FILE_DESCRIPTORS, FTW_PHYS | FTW_ACTIONRETVAL); | ||||
| } | ||||
|  | ||||
| @ -433,7 +433,7 @@ void sist2_index(index_args_t *args) { | ||||
|         cleanup = elastic_cleanup; | ||||
|     } | ||||
| 
 | ||||
|     IndexCtx.pool = tpool_create(args->threads, cleanup, FALSE, FALSE); | ||||
|     IndexCtx.pool = tpool_create(args->threads, cleanup, FALSE, TRUE); | ||||
|     tpool_start(IndexCtx.pool); | ||||
| 
 | ||||
|     struct dirent *de; | ||||
|  | ||||
| @ -1,6 +1,8 @@ | ||||
| #ifndef SIST_H | ||||
| #define SIST_H | ||||
| 
 | ||||
| #define _GNU_SOURCE | ||||
| 
 | ||||
| #ifndef	FALSE | ||||
| #define	FALSE	(0) | ||||
| #define BOOL int | ||||
| @ -51,7 +53,7 @@ | ||||
| #include <ctype.h> | ||||
| #include "git_hash.h" | ||||
| 
 | ||||
| #define VERSION "2.11.3" | ||||
| #define VERSION "2.11.4" | ||||
| static const char *const Version = VERSION; | ||||
| 
 | ||||
| #ifndef SIST_PLATFORM | ||||
|  | ||||
| @ -177,7 +177,7 @@ static void *tpool_worker(void *arg) { | ||||
| } | ||||
| 
 | ||||
| void tpool_wait(tpool_t *pool) { | ||||
|     LOG_INFO("tpool.c", "Waiting for worker threads to finish") | ||||
|     LOG_DEBUG("tpool.c", "Waiting for worker threads to finish") | ||||
|     pthread_mutex_lock(&(pool->work_mutex)); | ||||
|     while (TRUE) { | ||||
|         if (pool->done_cnt < pool->work_cnt) { | ||||
|  | ||||
| @ -88,7 +88,7 @@ void progress_bar_print(double percentage, size_t tn_size, size_t index_size) { | ||||
| 
 | ||||
|     static int last_val = -1; | ||||
|     int val = (int) (percentage * 100); | ||||
|     if (last_val == val || val > 100 || index_size < 1024) { | ||||
|     if (last_val == val || val > 100) { | ||||
|         return; | ||||
|     } | ||||
|     last_val = val; | ||||
|  | ||||
| @ -252,12 +252,32 @@ void serve_file_from_disk(cJSON *json, index_t *idx, struct mg_connection *nc, s | ||||
|     mg_http_serve_file(nc, hm, full_path, mime, disposition); | ||||
| } | ||||
| 
 | ||||
| void cache_es_version() { | ||||
|     static int is_cached = FALSE; | ||||
| 
 | ||||
|     if (is_cached == TRUE) { | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     es_version_t *es_version = elastic_get_version(WebCtx.es_url); | ||||
|     if (es_version != NULL) { | ||||
|         WebCtx.es_version = es_version; | ||||
|         is_cached = TRUE; | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| void index_info(struct mg_connection *nc) { | ||||
| 
 | ||||
|     cache_es_version(); | ||||
| 
 | ||||
|     cJSON *json = cJSON_CreateObject(); | ||||
|     cJSON *arr = cJSON_AddArrayToObject(json, "indices"); | ||||
| 
 | ||||
|     cJSON_AddStringToObject(json, "esIndex", WebCtx.es_index); | ||||
|     cJSON_AddStringToObject(json, "version", Version); | ||||
|     cJSON_AddStringToObject(json, "esVersion", format_es_version(WebCtx.es_version)); | ||||
|     cJSON_AddBoolToObject(json, "esVersionSupported", IS_SUPPORTED_ES_VERSION(WebCtx.es_version)); | ||||
|     cJSON_AddBoolToObject(json, "esVersionLegacy", USE_LEGACY_ES_SETTINGS(WebCtx.es_version)); | ||||
|     cJSON_AddStringToObject(json, "platform", QUOTE(SIST_PLATFORM)); | ||||
|     cJSON_AddStringToObject(json, "sist2Hash", Sist2CommitHash); | ||||
|     cJSON_AddStringToObject(json, "libscanHash", LibScanCommitHash); | ||||
|  | ||||
							
								
								
									
										10
									
								
								src/web/static_generated.c
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										10
									
								
								src/web/static_generated.c
									
									
									
									
										vendored
									
									
								
							
										
											
												File diff suppressed because one or more lines are too long
											
										
									
								
							
							
								
								
									
										2
									
								
								third-party/argparse
									
									
									
									
										vendored
									
									
								
							
							
								
								
								
								
								
								
									
									
								
							
						
						
									
										2
									
								
								third-party/argparse
									
									
									
									
										vendored
									
									
								
							| @ -1 +1 @@ | ||||
| Subproject commit ffd9c23427d0cb105e27f27f0cf97b463b6a8bf8 | ||||
| Subproject commit 225141eb3df2fc1711962e3779646423407cb3f5 | ||||
							
								
								
									
										1
									
								
								third-party/libscan
									
									
									
									
										vendored
									
									
								
							
							
								
								
								
								
								
								
									
									
								
							
						
						
									
										1
									
								
								third-party/libscan
									
									
									
									
										vendored
									
									
								
							| @ -1 +0,0 @@ | ||||
| Subproject commit 3787475ecba7453a2a97ab470103606c2cecabb2 | ||||
							
								
								
									
										12
									
								
								third-party/libscan/.gitignore
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										12
									
								
								third-party/libscan/.gitignore
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,12 @@ | ||||
| .idea/ | ||||
| cmake_install.cmake | ||||
| Makefile | ||||
| libscan.a | ||||
| libscan.so | ||||
| *.cbp | ||||
| CMakeFiles | ||||
| CMakeCache.txt | ||||
| scan_test | ||||
| third-party/ext_* | ||||
| libscan-test-files | ||||
| scan_*_test | ||||
							
								
								
									
										233
									
								
								third-party/libscan/CMakeLists.txt
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										233
									
								
								third-party/libscan/CMakeLists.txt
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,233 @@ | ||||
| cmake_minimum_required(VERSION 3.15) | ||||
| 
 | ||||
| project(scan) | ||||
| set(CMAKE_C_STANDARD 11) | ||||
| 
 | ||||
| option(BUILD_TESTS "Build tests" on) | ||||
| 
 | ||||
| add_subdirectory(third-party/antiword) | ||||
| add_compile_definitions( | ||||
|         antiword | ||||
|         NDEBUG | ||||
| ) | ||||
| 
 | ||||
| add_library( | ||||
|         scan | ||||
|         libscan/util.c libscan/util.h | ||||
|         libscan/scan.h | ||||
|         libscan/macros.h | ||||
| 
 | ||||
|         libscan/text/text.c libscan/text/text.h | ||||
|         libscan/arc/arc.c libscan/arc/arc.h | ||||
|         libscan/ebook/ebook.c libscan/ebook/ebook.h | ||||
|         libscan/comic/comic.c libscan/comic/comic.h | ||||
|         libscan/ooxml/ooxml.c libscan/ooxml/ooxml.h | ||||
|         libscan/media/media.c libscan/media/media.h | ||||
|         libscan/font/font.c libscan/font/font.h | ||||
|         libscan/msdoc/msdoc.c libscan/msdoc/msdoc.h | ||||
|         libscan/json/json.c libscan/json/json.h | ||||
|         libscan/wpd/wpd.c libscan/wpd/wpd.h libscan/wpd/libwpd_c_api.h libscan/wpd/libwpd_c_api.cpp | ||||
| 
 | ||||
|         third-party/utf8.h | ||||
|         libscan/mobi/scan_mobi.c libscan/mobi/scan_mobi.h libscan/raw/raw.c libscan/raw/raw.h) | ||||
| set_target_properties(scan PROPERTIES LINKER_LANGUAGE C) | ||||
| 
 | ||||
| set(CMAKE_FIND_LIBRARY_SUFFIXES .a .lib .so) | ||||
| 
 | ||||
| find_package(cJSON CONFIG REQUIRED) | ||||
| find_package(LibArchive REQUIRED) | ||||
| find_package(BZip2 REQUIRED) | ||||
| find_package(lz4 REQUIRED) | ||||
| 
 | ||||
| find_package(Threads REQUIRED) | ||||
| find_package(Tesseract CONFIG REQUIRED) | ||||
| find_package(OpenJPEG CONFIG REQUIRED) | ||||
| find_package(JPEG REQUIRED) | ||||
| find_package(LibXml2 REQUIRED) | ||||
| find_package(LibLZMA REQUIRED) | ||||
| find_package(ZLIB REQUIRED) | ||||
| find_package(unofficial-pcre CONFIG REQUIRED) | ||||
| 
 | ||||
| 
 | ||||
| find_library(JBIG2DEC_LIB NAMES jbig2decd jbig2dec) | ||||
| find_library(HARFBUZZ_LIB NAMES harfbuzz harfbuzzd) | ||||
| find_library(FREETYPE_LIB NAMES freetype freetyped) | ||||
| find_package(unofficial-brotli CONFIG REQUIRED) | ||||
| find_library(LZO2_LIB NAMES lzo2) | ||||
| 
 | ||||
| find_library(RAW_LIB NAMES libraw.a) | ||||
| find_library(MUPDF_LIB NAMES liblibmupdf.a) | ||||
| find_library(CMS_LIB NAMES lcms2) | ||||
| find_library(JAS_LIB NAMES jasper) | ||||
| find_library(GUMBO_LIB NAMES gumbo) | ||||
| find_library(GOMP_LIB NAMES libgomp.a gomp PATHS /usr/lib/gcc/x86_64-linux-gnu/5/ /usr/lib/gcc/x86_64-linux-gnu/9/ /usr/lib/gcc/x86_64-linux-gnu/10/ /usr/lib/gcc/aarch64-linux-gnu/7/ /usr/lib/gcc/aarch64-linux-gnu/9/ /usr/lib/gcc/x86_64-linux-gnu/7/) | ||||
| 
 | ||||
| 
 | ||||
| target_compile_options( | ||||
|         scan | ||||
|         PRIVATE | ||||
|         -g | ||||
| ) | ||||
| 
 | ||||
| include(ExternalProject) | ||||
| find_program(MAKE_EXE NAMES gmake nmake make) | ||||
| ExternalProject_Add( | ||||
|         libmobi | ||||
|         GIT_REPOSITORY https://github.com/simon987/libmobi.git | ||||
|         GIT_TAG "public" | ||||
| 
 | ||||
|         UPDATE_COMMAND "" | ||||
|         PATCH_COMMAND "" | ||||
|         TEST_COMMAND "" | ||||
|         CONFIGURE_COMMAND ./autogen.sh && ./configure | ||||
|         INSTALL_COMMAND "" | ||||
| 
 | ||||
|         PREFIX "third-party/ext_libmobi" | ||||
|         SOURCE_DIR "third-party/ext_libmobi/src/libmobi" | ||||
|         BINARY_DIR "third-party/ext_libmobi/src/libmobi" | ||||
| 
 | ||||
|         BUILD_COMMAND ${MAKE_EXE} -j 8 --silent | ||||
| ) | ||||
| 
 | ||||
| SET(MOBI_LIB_DIR ${CMAKE_CURRENT_BINARY_DIR}/third-party/ext_libmobi/src/libmobi/src/.libs/) | ||||
| SET(MOBI_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/third-party/ext_libmobi/src/libmobi/src/) | ||||
| 
 | ||||
| if (SIST_DEBUG) | ||||
|     SET(FFMPEG_DEBUG "--enable-debug=3" "--disable-optimizations") | ||||
| else() | ||||
|     SET(FFMPEG_DEBUG "") | ||||
| endif() | ||||
| 
 | ||||
| ExternalProject_Add( | ||||
|         ffmpeg | ||||
|         GIT_REPOSITORY https://git.ffmpeg.org/ffmpeg.git | ||||
|         GIT_TAG "n4.4" | ||||
| 
 | ||||
|         UPDATE_COMMAND "" | ||||
|         PATCH_COMMAND "" | ||||
|         TEST_COMMAND "" | ||||
|         CONFIGURE_COMMAND ./configure --disable-shared --enable-static --disable-ffmpeg --disable-ffplay | ||||
|         --disable-ffprobe --disable-doc --disable-manpages --disable-postproc --disable-avfilter --disable-alsa | ||||
|         --disable-lzma --disable-xlib --disable-vdpau --disable-vaapi --disable-sdl2 | ||||
|         --disable-network  ${FFMPEG_DEBUG} | ||||
|         INSTALL_COMMAND "" | ||||
| 
 | ||||
|         PREFIX "third-party/ext_ffmpeg" | ||||
|         SOURCE_DIR "third-party/ext_ffmpeg/src/ffmpeg" | ||||
|         BINARY_DIR "third-party/ext_ffmpeg/src/ffmpeg" | ||||
| 
 | ||||
|         BUILD_COMMAND ${MAKE_EXE} -j33 --silent | ||||
| ) | ||||
| 
 | ||||
| SET(FFMPEG_LIB_DIR ${CMAKE_CURRENT_BINARY_DIR}/third-party/ext_ffmpeg/src/ffmpeg) | ||||
| SET(FFMPEG_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/third-party/ext_ffmpeg/src/ffmpeg) | ||||
| 
 | ||||
| ExternalProject_Add( | ||||
|         libwpd | ||||
|         URL http://prdownloads.sourceforge.net/libwpd/libwpd-0.9.9.tar.gz | ||||
| 
 | ||||
|         UPDATE_COMMAND "" | ||||
|         PATCH_COMMAND "" | ||||
|         TEST_COMMAND "" | ||||
|         CONFIGURE_COMMAND ./configure --without-docs --enable-static --disable-shared | ||||
|         INSTALL_COMMAND "" | ||||
| 
 | ||||
|         PREFIX "third-party/ext_libwpd" | ||||
|         SOURCE_DIR "third-party/ext_libwpd/src/libwpd" | ||||
|         BINARY_DIR "third-party/ext_libwpd/src/libwpd" | ||||
| 
 | ||||
|         BUILD_COMMAND ${MAKE_EXE} -j33 | ||||
| ) | ||||
| SET(WPD_LIB_DIR ${CMAKE_CURRENT_BINARY_DIR}/third-party/ext_libwpd/src/libwpd/src/lib/.libs/) | ||||
| SET(WPD_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/third-party/ext_libwpd/src/libwpd/inc/) | ||||
| 
 | ||||
| add_dependencies( | ||||
|         scan | ||||
|         libmobi | ||||
|         ffmpeg | ||||
|         antiword | ||||
|         libwpd | ||||
| ) | ||||
| 
 | ||||
| target_link_libraries( | ||||
|         scan | ||||
|         PUBLIC | ||||
| 
 | ||||
|         cjson | ||||
|         ${LibArchive_LIBRARIES} | ||||
|         ZLIB::ZLIB | ||||
|         BZip2::BZip2 | ||||
|         lz4::lz4 | ||||
|         ${LZO2_LIB} | ||||
|         LibLZMA::LibLZMA | ||||
| 
 | ||||
|         ${MUPDF_LIB} | ||||
|         openjp2 | ||||
| 
 | ||||
|         ${MOBI_LIB_DIR}/libmobi.a | ||||
| 
 | ||||
|         ${WPD_LIB_DIR}/libwpd-0.9.a | ||||
|         ${WPD_LIB_DIR}/libwpd-stream-0.9.a | ||||
| 
 | ||||
|         ${FREETYPE_LIB} | ||||
|         ${HARFBUZZ_LIB} | ||||
|         ${JBIG2DEC_LIB} | ||||
| 
 | ||||
|         stdc++ | ||||
| 
 | ||||
|         -Wl,--whole-archive | ||||
|         m | ||||
|         -Wl,--no-whole-archive | ||||
| 
 | ||||
|         ${JPEG_LIBRARIES} | ||||
|         ${Tesseract_LIBRARIES} | ||||
|         ${LIBXML2_LIBRARIES} | ||||
|         ${FREETYPE_LIB} | ||||
|         unofficial::brotli::brotlidec-static | ||||
| 
 | ||||
|         ${FFMPEG_LIB_DIR}/libavformat/libavformat.a | ||||
|         ${FFMPEG_LIB_DIR}/libavcodec/libavcodec.a | ||||
|         ${FFMPEG_LIB_DIR}/libavutil/libavutil.a | ||||
|         ${FFMPEG_LIB_DIR}/libswresample/libswresample.a | ||||
|         ${FFMPEG_LIB_DIR}/libswscale/libswscale.a | ||||
| 
 | ||||
|         z | ||||
| 
 | ||||
|         ${CMAKE_THREAD_LIBS_INIT} | ||||
| 
 | ||||
|         ${RAW_LIB} | ||||
|         ${GOMP_LIB} | ||||
|         ${CMS_LIB} | ||||
|         ${JAS_LIB} | ||||
|         ${GUMBO_LIB} | ||||
|         dl | ||||
|         antiword | ||||
|         unofficial::pcre::pcre unofficial::pcre::pcre16 unofficial::pcre::pcre32 unofficial::pcre::pcrecpp | ||||
| ) | ||||
| 
 | ||||
| target_include_directories( | ||||
|         scan | ||||
|         PUBLIC | ||||
|         ${MUPDF_INC_DIR} | ||||
|         ${JPEG_INCLUDE_DIR} | ||||
|         ${LIBXML2_INCLUDE_DIR} | ||||
|         ${FFMPEG_INCLUDE_DIR} | ||||
|         ${MOBI_INCLUDE_DIR} | ||||
|         ${WPD_INCLUDE_DIR} | ||||
| ) | ||||
| 
 | ||||
| if (BUILD_TESTS) | ||||
|     find_package(GTest CONFIG REQUIRED) | ||||
| 
 | ||||
|     add_executable(scan_ub_test test/main.cpp test/test_util.cpp test/test_util.h) | ||||
|     target_compile_options(scan_ub_test PRIVATE -g -fsanitize=undefined -fno-omit-frame-pointer) | ||||
|     target_link_libraries(scan_ub_test PRIVATE GTest::gtest GTest::gtest_main -fsanitize=undefined scan) | ||||
| 
 | ||||
|     add_executable(scan_a_test test/main.cpp test/test_util.cpp test/test_util.h) | ||||
|     target_compile_options(scan_a_test PRIVATE -g -fsanitize=address -fno-omit-frame-pointer) | ||||
|     target_link_libraries(scan_a_test PRIVATE GTest::gtest GTest::gtest_main -fsanitize=address scan) | ||||
| 
 | ||||
|     add_executable(scan_test test/main.cpp test/test_util.cpp test/test_util.h) | ||||
|     target_compile_options(scan_test PRIVATE -g -fno-omit-frame-pointer) | ||||
|     target_link_libraries(scan_test PRIVATE GTest::gtest GTest::gtest_main scan) | ||||
| endif() | ||||
							
								
								
									
										4
									
								
								third-party/libscan/README.md
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										4
									
								
								third-party/libscan/README.md
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,4 @@ | ||||
| ### Run fuzz tests: | ||||
| ```bash | ||||
| ./scan_a_test --gtest_filter=*Fuzz* --gtest_repeat=100 | ||||
| ``` | ||||
							
								
								
									
										244
									
								
								third-party/libscan/libscan/arc/arc.c
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										244
									
								
								third-party/libscan/libscan/arc/arc.c
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,244 @@ | ||||
| #include "arc.h" | ||||
| 
 | ||||
| #include <stdio.h> | ||||
| #include <stdlib.h> | ||||
| #include <string.h> | ||||
| #include <fcntl.h> | ||||
| #include <openssl/evp.h> | ||||
| #include <pcre.h> | ||||
| 
 | ||||
| 
 | ||||
| int should_parse_filtered_file(const char *filepath, int ext) { | ||||
|     char tmp[PATH_MAX * 2]; | ||||
| 
 | ||||
|     if (ext == 0) { | ||||
|         return FALSE; | ||||
|     } | ||||
| 
 | ||||
|     if (strncmp(filepath + ext, "tgz", 3) == 0) { | ||||
|         return TRUE; | ||||
|     } | ||||
| 
 | ||||
|     memcpy(tmp, filepath, ext - 1); | ||||
|     *(tmp + ext - 1) = '\0'; | ||||
| 
 | ||||
|     char *idx = strrchr(tmp, '.'); | ||||
| 
 | ||||
|     if (idx == NULL) { | ||||
|         return FALSE; | ||||
|     } | ||||
| 
 | ||||
|     if (strcmp(idx, ".tar") == 0) { | ||||
|         return TRUE; | ||||
|     } | ||||
| 
 | ||||
|     return FALSE; | ||||
| } | ||||
| 
 | ||||
| void arc_close(struct vfile *f) { | ||||
|     SHA1_Final(f->sha1_digest, &f->sha1_ctx); | ||||
| 
 | ||||
|     if (f->rewind_buffer != NULL) { | ||||
|         free(f->rewind_buffer); | ||||
|         f->rewind_buffer = NULL; | ||||
|         f->rewind_buffer_size = 0; | ||||
|         f->rewind_buffer_cursor = 0; | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| int arc_read(struct vfile *f, void *buf, size_t size) { | ||||
| 
 | ||||
|     int bytes_copied = 0; | ||||
| 
 | ||||
|     if (f->rewind_buffer_size != 0) { | ||||
|         if (size > f->rewind_buffer_size) { | ||||
|             memcpy(buf, f->rewind_buffer + f->rewind_buffer_cursor, f->rewind_buffer_size); | ||||
| 
 | ||||
|             bytes_copied = f->rewind_buffer_size; | ||||
|             size -= f->rewind_buffer_size; | ||||
|             buf += f->rewind_buffer_size; | ||||
|             f->rewind_buffer_size = 0; | ||||
|         } else { | ||||
|             memcpy(buf, f->rewind_buffer + f->rewind_buffer_cursor, size); | ||||
|             f->rewind_buffer_size -= (int) size; | ||||
|             f->rewind_buffer_cursor += (int) size; | ||||
| 
 | ||||
|             return (int) size; | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     size_t bytes_read = archive_read_data(f->arc, buf, size); | ||||
| 
 | ||||
|     if (bytes_read != 0 && bytes_read <= size && f->calculate_checksum) { | ||||
|         f->has_checksum = TRUE; | ||||
| 
 | ||||
|         safe_sha1_update(&f->sha1_ctx, (unsigned char *) buf, bytes_read); | ||||
|     } | ||||
| 
 | ||||
|     if (bytes_read != size && archive_errno(f->arc) != 0) { | ||||
|         const char *error_str = archive_error_string(f->arc); | ||||
|         if (error_str != NULL) { | ||||
|             f->logf(f->filepath, LEVEL_ERROR, "Error reading archive file: %s", error_str); | ||||
|         } | ||||
|         return -1; | ||||
|     } | ||||
| 
 | ||||
|     return (int) bytes_read + bytes_copied; | ||||
| } | ||||
| 
 | ||||
| int arc_read_rewindable(struct vfile *f, void *buf, size_t size) { | ||||
| 
 | ||||
|     if (f->rewind_buffer != NULL) { | ||||
|         fprintf(stderr, "Allocated rewind buffer more than once for %s", f->filepath); | ||||
|         exit(-1); | ||||
|     } | ||||
| 
 | ||||
|     size_t bytes_read = archive_read_data(f->arc, buf, size); | ||||
| 
 | ||||
|     if (bytes_read != size && archive_errno(f->arc) != 0) { | ||||
|         const char *error_str = archive_error_string(f->arc); | ||||
|         if (error_str != NULL) { | ||||
|             f->logf(f->filepath, LEVEL_ERROR, "Error reading archive file: %s", error_str); | ||||
|         } | ||||
|         return -1; | ||||
|     } | ||||
| 
 | ||||
|     f->rewind_buffer = malloc(size); | ||||
|     f->rewind_buffer_size = (int) size; | ||||
|     f->rewind_buffer_cursor = 0; | ||||
|     memcpy(f->rewind_buffer, buf, size); | ||||
| 
 | ||||
|     return (int) bytes_read; | ||||
| } | ||||
| 
 | ||||
| int arc_open(scan_arc_ctx_t *ctx, vfile_t *f, struct archive **a, arc_data_t *arc_data, int allow_recurse) { | ||||
|     arc_data->f = f; | ||||
| 
 | ||||
|     if (f->is_fs_file) { | ||||
|         *a = archive_read_new(); | ||||
|         archive_read_support_filter_all(*a); | ||||
|         archive_read_support_format_all(*a); | ||||
|         if (ctx->passphrase[0] != 0) { | ||||
|             archive_read_add_passphrase(*a, ctx->passphrase); | ||||
|         } | ||||
| 
 | ||||
|         return archive_read_open_filename(*a, f->filepath, ARC_BUF_SIZE); | ||||
|     } else if (allow_recurse) { | ||||
|         *a = archive_read_new(); | ||||
|         archive_read_support_filter_all(*a); | ||||
|         archive_read_support_format_all(*a); | ||||
|         if (ctx->passphrase[0] != 0) { | ||||
|             archive_read_add_passphrase(*a, ctx->passphrase); | ||||
|         } | ||||
| 
 | ||||
|         return archive_read_open( | ||||
|                 *a, arc_data, | ||||
|                 vfile_open_callback, | ||||
|                 vfile_read_callback, | ||||
|                 vfile_close_callback | ||||
|         ); | ||||
|     } else { | ||||
|         return ARC_SKIPPED; | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| static __thread int sub_strings[30]; | ||||
| #define EXCLUDED(str) (pcre_exec(exclude, exclude_extra, str, strlen(str), 0, 0, sub_strings, sizeof(sub_strings)) >= 0) | ||||
| 
 | ||||
| scan_code_t parse_archive(scan_arc_ctx_t *ctx, vfile_t *f, document_t *doc, pcre *exclude, pcre_extra *exclude_extra) { | ||||
| 
 | ||||
|     struct archive *a = NULL; | ||||
|     struct archive_entry *entry = NULL; | ||||
| 
 | ||||
|     arc_data_t arc_data; | ||||
|     arc_data.f = f; | ||||
| 
 | ||||
|     int ret = arc_open(ctx, f, &a, &arc_data, ctx->mode == ARC_MODE_RECURSE); | ||||
|     if (ret == ARC_SKIPPED) { | ||||
|         return SCAN_OK; | ||||
|     } | ||||
| 
 | ||||
|     if (ret != ARCHIVE_OK) { | ||||
|         CTX_LOG_ERRORF(f->filepath, "(arc.c) [%d] %s", ret, archive_error_string(a)) | ||||
|         archive_read_free(a); | ||||
|         return SCAN_ERR_READ; | ||||
|     } | ||||
| 
 | ||||
|     if (ctx->mode == ARC_MODE_LIST) { | ||||
|         dyn_buffer_t buf = dyn_buffer_create(); | ||||
| 
 | ||||
|         while (archive_read_next_header(a, &entry) == ARCHIVE_OK) { | ||||
|             if (S_ISREG(archive_entry_stat(entry)->st_mode)) { | ||||
|                 const char *utf8_name = archive_entry_pathname_utf8(entry); | ||||
|                 const char *file_path = utf8_name == NULL ? archive_entry_pathname(entry) : utf8_name; | ||||
| 
 | ||||
|                 dyn_buffer_append_string(&buf, file_path); | ||||
|                 dyn_buffer_write_char(&buf, ' '); | ||||
|             } | ||||
|         } | ||||
|         dyn_buffer_write_char(&buf, '\0'); | ||||
| 
 | ||||
|         meta_line_t *meta_list = malloc(sizeof(meta_line_t) + buf.cur); | ||||
|         meta_list->key = MetaContent; | ||||
|         strcpy(meta_list->str_val, buf.buf); | ||||
|         APPEND_META(doc, meta_list) | ||||
|         dyn_buffer_destroy(&buf); | ||||
| 
 | ||||
|     } else { | ||||
| 
 | ||||
|         parse_job_t *sub_job = malloc(sizeof(parse_job_t) + PATH_MAX * 2); | ||||
| 
 | ||||
|         sub_job->vfile.close = arc_close; | ||||
|         sub_job->vfile.read = arc_read; | ||||
|         sub_job->vfile.read_rewindable = arc_read_rewindable; | ||||
|         sub_job->vfile.reset = NULL; | ||||
|         sub_job->vfile.arc = a; | ||||
|         sub_job->vfile.filepath = sub_job->filepath; | ||||
|         sub_job->vfile.is_fs_file = FALSE; | ||||
|         sub_job->vfile.rewind_buffer_size = 0; | ||||
|         sub_job->vfile.rewind_buffer = NULL; | ||||
|         sub_job->vfile.log = ctx->log; | ||||
|         sub_job->vfile.logf = ctx->logf; | ||||
|         sub_job->vfile.has_checksum = FALSE; | ||||
|         sub_job->vfile.calculate_checksum = f->calculate_checksum; | ||||
|         memcpy(sub_job->parent, doc->path_md5, MD5_DIGEST_LENGTH); | ||||
| 
 | ||||
|         while (archive_read_next_header(a, &entry) == ARCHIVE_OK) { | ||||
|             sub_job->vfile.info = *archive_entry_stat(entry); | ||||
|             if (S_ISREG(sub_job->vfile.info.st_mode)) { | ||||
| 
 | ||||
|                 const char *utf8_name = archive_entry_pathname_utf8(entry); | ||||
| 
 | ||||
|                 if (utf8_name == NULL) { | ||||
|                     sprintf(sub_job->filepath, "%s#/%s", f->filepath, archive_entry_pathname(entry)); | ||||
|                 } else { | ||||
|                     sprintf(sub_job->filepath, "%s#/%s", f->filepath, utf8_name); | ||||
|                 } | ||||
|                 sub_job->base = (int) (strrchr(sub_job->filepath, '/') - sub_job->filepath) + 1; | ||||
| 
 | ||||
|                 // Handle excludes
 | ||||
|                 if (exclude != NULL && EXCLUDED(sub_job->filepath)) { | ||||
|                     CTX_LOG_DEBUGF("arc.c", "Excluded: %s", sub_job->filepath) | ||||
|                     continue; | ||||
|                 } | ||||
| 
 | ||||
|                 char *p = strrchr(sub_job->filepath, '.'); | ||||
|                 if (p != NULL && (p - sub_job->filepath) > strlen(f->filepath)) { | ||||
|                     sub_job->ext = (int) (p - sub_job->filepath + 1); | ||||
|                 } else { | ||||
|                     sub_job->ext = (int) strlen(sub_job->filepath); | ||||
|                 } | ||||
| 
 | ||||
|                 SHA1_Init(&sub_job->vfile.sha1_ctx); | ||||
| 
 | ||||
|                 ctx->parse(sub_job); | ||||
|             } | ||||
|         } | ||||
| 
 | ||||
|         free(sub_job); | ||||
|     } | ||||
| 
 | ||||
|     archive_read_free(a); | ||||
|     return SCAN_OK; | ||||
| } | ||||
							
								
								
									
										80
									
								
								third-party/libscan/libscan/arc/arc.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										80
									
								
								third-party/libscan/libscan/arc/arc.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,80 @@ | ||||
| #ifndef SCAN_ARC_H | ||||
| #define SCAN_ARC_H | ||||
| 
 | ||||
| #include <archive.h> | ||||
| #include <archive_entry.h> | ||||
| #include <fcntl.h> | ||||
| #include <pcre.h> | ||||
| #include "../scan.h" | ||||
| 
 | ||||
| # define ARC_SKIPPED (-1) | ||||
| #define ARC_MODE_SKIP 0 | ||||
| #define ARC_MODE_LIST 1 | ||||
| #define ARC_MODE_SHALLOW 2 | ||||
| #define ARC_MODE_RECURSE 3 | ||||
| typedef int archive_mode_t; | ||||
| 
 | ||||
| typedef struct { | ||||
|     archive_mode_t mode; | ||||
| 
 | ||||
|     parse_callback_t parse; | ||||
|     log_callback_t log; | ||||
|     logf_callback_t logf; | ||||
|     store_callback_t store; | ||||
|     char passphrase[4096]; | ||||
| } scan_arc_ctx_t; | ||||
| 
 | ||||
| #define ARC_BUF_SIZE 8192 | ||||
| 
 | ||||
| typedef struct { | ||||
|     vfile_t *f; | ||||
|     char buf[ARC_BUF_SIZE]; | ||||
| } arc_data_t; | ||||
| 
 | ||||
| static int vfile_open_callback(struct archive *a, void *user_data) { | ||||
|     arc_data_t *data = (arc_data_t *) user_data; | ||||
| 
 | ||||
|     if (!data->f->is_fs_file) { | ||||
|         SHA1_Init(&data->f->sha1_ctx); | ||||
|     } | ||||
| 
 | ||||
|     return ARCHIVE_OK; | ||||
| } | ||||
| 
 | ||||
| static long vfile_read_callback(struct archive *a, void *user_data, const void **buf) { | ||||
|     arc_data_t *data = (arc_data_t *) user_data; | ||||
| 
 | ||||
|     *buf = data->buf; | ||||
|     long ret = data->f->read(data->f, data->buf, sizeof(data->buf)); | ||||
| 
 | ||||
|     if (!data->f->is_fs_file && ret > 0) { | ||||
|         data->f->has_checksum = TRUE; | ||||
|         safe_sha1_update(&data->f->sha1_ctx, (unsigned char*)data->buf, ret); | ||||
|     } | ||||
| 
 | ||||
|     return ret; | ||||
| } | ||||
| 
 | ||||
| static int vfile_close_callback(struct archive *a, void *user_data) { | ||||
|     arc_data_t *data = (arc_data_t *) user_data; | ||||
| 
 | ||||
|     if (!data->f->is_fs_file) { | ||||
|         SHA1_Final((unsigned char *) data->f->sha1_digest, &data->f->sha1_ctx); | ||||
|     } | ||||
| 
 | ||||
|     return ARCHIVE_OK; | ||||
| } | ||||
| 
 | ||||
| int arc_open(scan_arc_ctx_t *ctx, vfile_t *f, struct archive **a, arc_data_t *arc_data, int allow_recurse); | ||||
| 
 | ||||
| int should_parse_filtered_file(const char *filepath, int ext); | ||||
| 
 | ||||
| scan_code_t parse_archive(scan_arc_ctx_t *ctx, vfile_t *f, document_t *doc, pcre *exclude, pcre_extra *exclude_extra); | ||||
| 
 | ||||
| int arc_read(struct vfile *f, void *buf, size_t size); | ||||
| 
 | ||||
| int arc_read_rewindable(struct vfile *f, void *buf, size_t size); | ||||
| 
 | ||||
| void arc_close(struct vfile *f); | ||||
| 
 | ||||
| #endif | ||||
							
								
								
									
										58
									
								
								third-party/libscan/libscan/comic/comic.c
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										58
									
								
								third-party/libscan/libscan/comic/comic.c
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,58 @@ | ||||
| #include "comic.h" | ||||
| #include "../media/media.h" | ||||
| #include "../arc/arc.h" | ||||
| 
 | ||||
| #include <stdlib.h> | ||||
| #include <archive.h> | ||||
| 
 | ||||
| static scan_arc_ctx_t arc_ctx = (scan_arc_ctx_t) {.passphrase = {0,}}; | ||||
| 
 | ||||
| void parse_comic(scan_comic_ctx_t *ctx, vfile_t *f, document_t *doc) { | ||||
|     struct archive *a = NULL; | ||||
|     struct archive_entry *entry = NULL; | ||||
|     arc_data_t arc_data; | ||||
| 
 | ||||
|     if (ctx->tn_size <= 0) { | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     int ret = arc_open(&arc_ctx, f, &a, &arc_data, TRUE); | ||||
|     if (ret != ARCHIVE_OK) { | ||||
|         CTX_LOG_ERRORF(f->filepath, "(cbr.c) [%d] %s", ret, archive_error_string(a)) | ||||
|         archive_read_free(a); | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     while (archive_read_next_header(a, &entry) == ARCHIVE_OK) { | ||||
|         struct stat info = *archive_entry_stat(entry); | ||||
|         if (S_ISREG(info.st_mode)) { | ||||
|             const char *utf8_name = archive_entry_pathname_utf8(entry); | ||||
|             const char *file_path = utf8_name == NULL ? archive_entry_pathname(entry) : utf8_name; | ||||
| 
 | ||||
|             char *p = strrchr(file_path, '.'); | ||||
|             if (p != NULL && (strcmp(p, ".png") == 0 || strcmp(p, ".jpg") == 0 || strcmp(p, ".jpeg") == 0)) { | ||||
|                 size_t entry_size = archive_entry_size(entry); | ||||
|                 void *buf = malloc(entry_size); | ||||
|                 size_t read = archive_read_data(a, buf, entry_size); | ||||
| 
 | ||||
|                 if (read != entry_size) { | ||||
|                     const char *err_str = archive_error_string(a); | ||||
|                     if (err_str) { | ||||
|                         CTX_LOG_ERRORF("comic.c", "Error while reading entry: %s", err_str) | ||||
|                     } | ||||
|                     free(buf); | ||||
|                     break; | ||||
|                 } | ||||
| 
 | ||||
|                 ret = store_image_thumbnail((scan_media_ctx_t *) ctx, buf, entry_size, doc, file_path); | ||||
|                 free(buf); | ||||
| 
 | ||||
|                 if (ret == TRUE) { | ||||
|                     break; | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     archive_read_free(a); | ||||
| } | ||||
							
								
								
									
										31
									
								
								third-party/libscan/libscan/comic/comic.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										31
									
								
								third-party/libscan/libscan/comic/comic.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,31 @@ | ||||
| #ifndef SCAN_CBR_H | ||||
| #define SCAN_CBR_H | ||||
| 
 | ||||
| #include <stdlib.h> | ||||
| #include "../ebook/ebook.h" | ||||
| 
 | ||||
| typedef struct { | ||||
|     log_callback_t log; | ||||
|     logf_callback_t logf; | ||||
|     store_callback_t store; | ||||
| 
 | ||||
|     int tn_size; | ||||
|     float tn_qscale; | ||||
| 
 | ||||
|     unsigned int cbr_mime; | ||||
|     unsigned int cbz_mime; | ||||
| } scan_comic_ctx_t; | ||||
| 
 | ||||
| __always_inline | ||||
| static int is_cbr(scan_comic_ctx_t *ctx, unsigned int mime) { | ||||
|     return mime == ctx->cbr_mime; | ||||
| } | ||||
| 
 | ||||
| __always_inline | ||||
| static int is_cbz(scan_comic_ctx_t *ctx, unsigned int mime) { | ||||
|     return mime == ctx->cbz_mime; | ||||
| } | ||||
| 
 | ||||
| void parse_comic(scan_comic_ctx_t *ctx, vfile_t *f, document_t *doc); | ||||
| 
 | ||||
| #endif | ||||
							
								
								
									
										495
									
								
								third-party/libscan/libscan/ebook/ebook.c
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										495
									
								
								third-party/libscan/libscan/ebook/ebook.c
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,495 @@ | ||||
| #include "ebook.h" | ||||
| #include <mupdf/fitz.h> | ||||
| #include <pthread.h> | ||||
| #include <tesseract/capi.h> | ||||
| 
 | ||||
| #include "../media/media.h" | ||||
| #include "../arc/arc.h" | ||||
| 
 | ||||
| #define MIN_OCR_SIZE 350 | ||||
| #define MIN_OCR_LEN 10 | ||||
| 
 | ||||
| /* fill_image callback doesn't let us pass opaque pointers unless I create my own device */ | ||||
| __thread text_buffer_t thread_buffer; | ||||
| __thread scan_ebook_ctx_t thread_ctx; | ||||
| 
 | ||||
| pthread_mutex_t Mutex; | ||||
| 
 | ||||
| static void my_fz_lock(UNUSED(void *user), int lock) { | ||||
|     if (lock == FZ_LOCK_FREETYPE) { | ||||
|         pthread_mutex_lock(&Mutex); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| static void my_fz_unlock(UNUSED(void *user), int lock) { | ||||
|     if (lock == FZ_LOCK_FREETYPE) { | ||||
|         pthread_mutex_unlock(&Mutex); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| int pixmap_is_blank(const fz_pixmap *pixmap) { | ||||
|     int pixmap_size = pixmap->n * pixmap->w * pixmap->h; | ||||
|     const int pixel0 = pixmap->samples[0]; | ||||
|     for (int i = 0; i < pixmap_size; i++) { | ||||
|         if (pixmap->samples[i] != pixel0) { | ||||
|             return FALSE; | ||||
|         } | ||||
|     } | ||||
|     return TRUE; | ||||
| } | ||||
| 
 | ||||
| fz_pixmap * | ||||
| load_pixmap(scan_ebook_ctx_t *ctx, int page, fz_context *fzctx, fz_document *fzdoc, document_t *doc, fz_page **cover) { | ||||
| 
 | ||||
|     int err = 0; | ||||
| 
 | ||||
|     fz_var(cover); | ||||
|     fz_var(err); | ||||
|     fz_try(fzctx)*cover = fz_load_page(fzctx, fzdoc, page); | ||||
|     fz_catch(fzctx)err = 1; | ||||
| 
 | ||||
|     if (err != 0) { | ||||
|         CTX_LOG_WARNINGF(doc->filepath, "fz_load_page() returned error code [%d] %s", err, fzctx->error.message) | ||||
|         return NULL; | ||||
|     } | ||||
| 
 | ||||
|     fz_rect bounds = fz_bound_page(fzctx, *cover); | ||||
| 
 | ||||
|     float scale; | ||||
|     float w = bounds.x1 - bounds.x0; | ||||
|     float h = bounds.y1 - bounds.y0; | ||||
|     if (w > h) { | ||||
|         scale = (float) ctx->tn_size / w; | ||||
|     } else { | ||||
|         scale = (float) ctx->tn_size / h; | ||||
|     } | ||||
|     fz_matrix m = fz_scale(scale, scale); | ||||
| 
 | ||||
|     bounds = fz_transform_rect(bounds, m); | ||||
|     fz_irect bbox = fz_round_rect(bounds); | ||||
|     fz_pixmap *pixmap = fz_new_pixmap_with_bbox(fzctx, fz_device_rgb(fzctx), bbox, NULL, 0); | ||||
| 
 | ||||
|     fz_clear_pixmap_with_value(fzctx, pixmap, 0xFF); | ||||
|     fz_device *dev = fz_new_draw_device(fzctx, m, pixmap); | ||||
| 
 | ||||
|     fz_var(err); | ||||
|     fz_try(fzctx) { | ||||
|                 fz_run_page(fzctx, *cover, dev, fz_identity, NULL); | ||||
|             } fz_always(fzctx) { | ||||
|             fz_close_device(fzctx, dev); | ||||
|             fz_drop_device(fzctx, dev); | ||||
|         } fz_catch(fzctx)err = fzctx->error.errcode; | ||||
| 
 | ||||
|     if (err != 0) { | ||||
|         CTX_LOG_WARNINGF(doc->filepath, "fz_run_page() returned error code [%d] %s", err, fzctx->error.message) | ||||
|         fz_drop_page(fzctx, *cover); | ||||
|         fz_drop_pixmap(fzctx, pixmap); | ||||
|         return NULL; | ||||
|     } | ||||
| 
 | ||||
|     if (pixmap->n != 3) { | ||||
|         CTX_LOG_ERRORF(doc->filepath, "Got unexpected pixmap depth: %d", pixmap->n) | ||||
|         fz_drop_page(fzctx, *cover); | ||||
|         fz_drop_pixmap(fzctx, pixmap); | ||||
|         return NULL; | ||||
|     } | ||||
| 
 | ||||
|     return pixmap; | ||||
| } | ||||
| 
 | ||||
| int render_cover(scan_ebook_ctx_t *ctx, fz_context *fzctx, document_t *doc, fz_document *fzdoc) { | ||||
| 
 | ||||
|     fz_page *cover = NULL; | ||||
|     fz_pixmap *pixmap = load_pixmap(ctx, 0, fzctx, fzdoc, doc, &cover); | ||||
|     if (pixmap == NULL) { | ||||
|         return FALSE; | ||||
|     } | ||||
| 
 | ||||
|     if (pixmap_is_blank(pixmap)) { | ||||
|         fz_drop_page(fzctx, cover); | ||||
|         fz_drop_pixmap(fzctx, pixmap); | ||||
|         CTX_LOG_DEBUG(doc->filepath, "Cover page is blank, using page 1 instead") | ||||
|         pixmap = load_pixmap(ctx, 1, fzctx, fzdoc, doc, &cover); | ||||
|         if (pixmap == NULL) { | ||||
|             return FALSE; | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     // RGB24 -> YUV420p
 | ||||
|     AVFrame *scaled_frame = av_frame_alloc(); | ||||
| 
 | ||||
|     struct SwsContext *sws_ctx = sws_getContext( | ||||
|             pixmap->w, pixmap->h, AV_PIX_FMT_RGB24, | ||||
|             pixmap->w, pixmap->h, AV_PIX_FMT_YUV420P, | ||||
|             SIST_SWS_ALGO, 0, 0, 0 | ||||
|     ); | ||||
| 
 | ||||
|     int dst_buf_len = av_image_get_buffer_size(AV_PIX_FMT_YUV420P, pixmap->w, pixmap->h, 1); | ||||
|     uint8_t *dst_buf = (uint8_t *) av_malloc(dst_buf_len); | ||||
| 
 | ||||
|     av_image_fill_arrays(scaled_frame->data, scaled_frame->linesize, dst_buf, AV_PIX_FMT_YUV420P, pixmap->w, pixmap->h, | ||||
|                          1); | ||||
| 
 | ||||
|     unsigned char *samples = calloc(1, 1024 * 1024 * 1024); | ||||
|     memcpy(samples, pixmap->samples, pixmap->stride * pixmap->h); | ||||
| 
 | ||||
|     const uint8_t *in_data[1] = {samples,}; | ||||
|     int in_line_size[1] = {(int) pixmap->stride}; | ||||
| 
 | ||||
|     sws_scale(sws_ctx, | ||||
|               in_data, in_line_size, | ||||
|               0, pixmap->h, | ||||
|               scaled_frame->data, scaled_frame->linesize | ||||
|     ); | ||||
| 
 | ||||
|     scaled_frame->width = pixmap->w; | ||||
|     scaled_frame->height = pixmap->h; | ||||
|     scaled_frame->format = AV_PIX_FMT_YUV420P; | ||||
| 
 | ||||
|     sws_freeContext(sws_ctx); | ||||
| 
 | ||||
|     // YUV420p -> JPEG
 | ||||
|     AVCodecContext *jpeg_encoder = alloc_jpeg_encoder(pixmap->w, pixmap->h, ctx->tn_qscale); | ||||
|     avcodec_send_frame(jpeg_encoder, scaled_frame); | ||||
| 
 | ||||
|     AVPacket jpeg_packet; | ||||
|     av_init_packet(&jpeg_packet); | ||||
|     avcodec_receive_packet(jpeg_encoder, &jpeg_packet); | ||||
| 
 | ||||
|     APPEND_TN_META(doc, pixmap->w, pixmap->h) | ||||
|     ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) jpeg_packet.data, jpeg_packet.size); | ||||
| 
 | ||||
|     free(samples); | ||||
|     av_packet_unref(&jpeg_packet); | ||||
|     av_free(*scaled_frame->data); | ||||
|     av_frame_free(&scaled_frame); | ||||
|     avcodec_free_context(&jpeg_encoder); | ||||
| 
 | ||||
|     fz_drop_pixmap(fzctx, pixmap); | ||||
|     fz_drop_page(fzctx, cover); | ||||
| 
 | ||||
|     return TRUE; | ||||
| } | ||||
| 
 | ||||
| void fz_err_callback(void *user, const char *message) { | ||||
|     document_t *doc = (document_t *) user; | ||||
| 
 | ||||
|     const scan_ebook_ctx_t *ctx = &thread_ctx; | ||||
|     CTX_LOG_WARNINGF(doc->filepath, "FZ: %s", message) | ||||
| } | ||||
| 
 | ||||
| void fz_warn_callback(void *user, const char *message) { | ||||
|     document_t *doc = (document_t *) user; | ||||
| 
 | ||||
|     const scan_ebook_ctx_t *ctx = &thread_ctx; | ||||
|     CTX_LOG_DEBUGF(doc->filepath, "FZ: %s", message) | ||||
| } | ||||
| 
 | ||||
| static void init_fzctx(fz_context *fzctx, document_t *doc) { | ||||
|     fz_register_document_handlers(fzctx); | ||||
| 
 | ||||
|     static int mu_is_initialized = FALSE; | ||||
|     if (!mu_is_initialized) { | ||||
|         pthread_mutex_init(&Mutex, NULL); | ||||
|         mu_is_initialized = TRUE; | ||||
|     } | ||||
| 
 | ||||
|     fzctx->warn.print_user = doc; | ||||
|     fzctx->warn.print = fz_warn_callback; | ||||
|     fzctx->error.print_user = doc; | ||||
|     fzctx->error.print = fz_err_callback; | ||||
| 
 | ||||
|     fzctx->locks.lock = my_fz_lock; | ||||
|     fzctx->locks.unlock = my_fz_unlock; | ||||
| } | ||||
| 
 | ||||
| static int read_stext_block(fz_stext_block *block, text_buffer_t *tex) { | ||||
|     if (block->type != FZ_STEXT_BLOCK_TEXT) { | ||||
|         return 0; | ||||
|     } | ||||
| 
 | ||||
|     fz_stext_line *line = block->u.t.first_line; | ||||
|     while (line != NULL) { | ||||
|         text_buffer_append_char(tex, ' '); | ||||
|         fz_stext_char *c = line->first_char; | ||||
|         while (c != NULL) { | ||||
|             if (text_buffer_append_char(tex, c->c) == TEXT_BUF_FULL) { | ||||
|                 return TEXT_BUF_FULL; | ||||
|             } | ||||
|             c = c->next; | ||||
|         } | ||||
|         line = line->next; | ||||
|     } | ||||
|     text_buffer_append_char(tex, ' '); | ||||
|     return 0; | ||||
| } | ||||
| 
 | ||||
| #define IS_VALID_BPP(d) ((d)==1 || (d)==2 || (d)==4 || (d)==8 || (d)==16 || (d)==24 || (d)==32) | ||||
| 
 | ||||
| void fill_image(fz_context *fzctx, UNUSED(fz_device *dev), | ||||
|                 fz_image *img, UNUSED(fz_matrix ctm), UNUSED(float alpha), | ||||
|                 UNUSED(fz_color_params color_params)) { | ||||
| 
 | ||||
|     int l2factor = 0; | ||||
| 
 | ||||
|     if (img->w > MIN_OCR_SIZE && img->h > MIN_OCR_SIZE && IS_VALID_BPP(img->n)) { | ||||
| 
 | ||||
|         fz_pixmap *pix = img->get_pixmap(fzctx, img, NULL, img->w, img->h, &l2factor); | ||||
| 
 | ||||
|         if (pix->h > MIN_OCR_SIZE && img->h > MIN_OCR_SIZE && img->xres != 0) { | ||||
|             TessBaseAPI *api = TessBaseAPICreate(); | ||||
|             TessBaseAPIInit3(api, thread_ctx.tesseract_path, thread_ctx.tesseract_lang); | ||||
| 
 | ||||
|             TessBaseAPISetImage(api, pix->samples, pix->w, pix->h, pix->n, pix->stride); | ||||
|             TessBaseAPISetSourceResolution(api, pix->xres); | ||||
| 
 | ||||
|             char *text = TessBaseAPIGetUTF8Text(api); | ||||
|             size_t len = strlen(text); | ||||
|             if (len >= MIN_OCR_LEN) { | ||||
|                 text_buffer_append_string(&thread_buffer, text, len - 1); | ||||
|             } | ||||
| 
 | ||||
|             TessBaseAPIEnd(api); | ||||
|             TessBaseAPIDelete(api); | ||||
|         } | ||||
|         fz_drop_pixmap(fzctx, pix); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| void | ||||
| parse_ebook_mem(scan_ebook_ctx_t *ctx, void *buf, size_t buf_len, const char *mime_str, document_t *doc, int tn_only) { | ||||
| 
 | ||||
|     fz_context *fzctx = fz_new_context(NULL, NULL, FZ_STORE_DEFAULT); | ||||
|     thread_ctx = *ctx; | ||||
| 
 | ||||
|     init_fzctx(fzctx, doc); | ||||
| 
 | ||||
|     int err = 0; | ||||
| 
 | ||||
|     fz_document *fzdoc = NULL; | ||||
|     fz_stream *stream = NULL; | ||||
|     fz_var(fzdoc); | ||||
|     fz_var(stream); | ||||
|     fz_var(err); | ||||
| 
 | ||||
|     fz_try(fzctx) { | ||||
|                 stream = fz_open_memory(fzctx, buf, buf_len); | ||||
|                 fzdoc = fz_open_document_with_stream(fzctx, mime_str, stream); | ||||
|             } fz_catch(fzctx)err = fzctx->error.errcode; | ||||
| 
 | ||||
|     if (err != 0) { | ||||
|         fz_drop_stream(fzctx, stream); | ||||
|         fz_drop_document(fzctx, fzdoc); | ||||
|         fz_drop_context(fzctx); | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     int page_count = -1; | ||||
|     fz_var(err); | ||||
|     fz_try(fzctx)page_count = fz_count_pages(fzctx, fzdoc); | ||||
|     fz_catch(fzctx)err = fzctx->error.errcode; | ||||
| 
 | ||||
|     if (err) { | ||||
|         CTX_LOG_WARNINGF(doc->filepath, "fz_count_pages() returned error code [%d] %s", err, fzctx->error.message) | ||||
|         fz_drop_stream(fzctx, stream); | ||||
|         fz_drop_document(fzctx, fzdoc); | ||||
|         fz_drop_context(fzctx); | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     APPEND_LONG_META(doc, MetaPages, page_count) | ||||
| 
 | ||||
|     if (ctx->tn_size > 0) { | ||||
|         if (render_cover(ctx, fzctx, doc, fzdoc) == FALSE) { | ||||
|             fz_drop_stream(fzctx, stream); | ||||
|             fz_drop_document(fzctx, fzdoc); | ||||
|             fz_drop_context(fzctx); | ||||
|             return; | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     if (tn_only) { | ||||
|         fz_drop_stream(fzctx, stream); | ||||
|         fz_drop_document(fzctx, fzdoc); | ||||
|         fz_drop_context(fzctx); | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     char title[8192] = {'\0',}; | ||||
|     fz_try(fzctx)fz_lookup_metadata(fzctx, fzdoc, FZ_META_INFO_TITLE, title, sizeof(title)); | ||||
|     fz_catch(fzctx); | ||||
| 
 | ||||
|     if (strlen(title) > 0) { | ||||
|         APPEND_UTF8_META(doc, MetaTitle, title) | ||||
|     } | ||||
| 
 | ||||
|     char author[4096] = {'\0',}; | ||||
|     fz_try(fzctx)fz_lookup_metadata(fzctx, fzdoc, FZ_META_INFO_AUTHOR, author, sizeof(author)); | ||||
|     fz_catch(fzctx); | ||||
| 
 | ||||
|     if (strlen(author) > 0) { | ||||
|         APPEND_UTF8_META(doc, MetaAuthor, author) | ||||
|     } | ||||
| 
 | ||||
| 
 | ||||
|     if (ctx->content_size > 0) { | ||||
|         fz_stext_options opts = {0}; | ||||
|         thread_buffer = text_buffer_create(ctx->content_size); | ||||
| 
 | ||||
|         for (int current_page = 0; current_page < page_count; current_page++) { | ||||
|             fz_page *page = NULL; | ||||
|             fz_var(err); | ||||
|             fz_try(fzctx)page = fz_load_page(fzctx, fzdoc, current_page); | ||||
|             fz_catch(fzctx)err = fzctx->error.errcode; | ||||
|             if (err != 0) { | ||||
|                 CTX_LOG_WARNINGF(doc->filepath, "fz_load_page() returned error code [%d] %s", err, fzctx->error.message) | ||||
|                 text_buffer_destroy(&thread_buffer); | ||||
|                 fz_drop_page(fzctx, page); | ||||
|                 fz_drop_stream(fzctx, stream); | ||||
|                 fz_drop_document(fzctx, fzdoc); | ||||
|                 fz_drop_context(fzctx); | ||||
|                 return; | ||||
|             } | ||||
| 
 | ||||
|             fz_stext_page *stext = fz_new_stext_page(fzctx, fz_bound_page(fzctx, page)); | ||||
|             fz_device *dev = fz_new_stext_device(fzctx, stext, &opts); | ||||
|             dev->stroke_path = NULL; | ||||
|             dev->stroke_text = NULL; | ||||
|             dev->clip_text = NULL; | ||||
|             dev->clip_stroke_path = NULL; | ||||
|             dev->clip_stroke_text = NULL; | ||||
| 
 | ||||
|             if (ctx->tesseract_lang != NULL) { | ||||
|                 dev->fill_image = fill_image; | ||||
|             } | ||||
| 
 | ||||
|             fz_var(err); | ||||
|             fz_try(fzctx)fz_run_page(fzctx, page, dev, fz_identity, NULL); | ||||
|             fz_always(fzctx) { | ||||
|                     fz_close_device(fzctx, dev); | ||||
|                     fz_drop_device(fzctx, dev); | ||||
|                 } fz_catch(fzctx)err = fzctx->error.errcode; | ||||
| 
 | ||||
|             if (err != 0) { | ||||
|                 CTX_LOG_WARNINGF(doc->filepath, "fz_run_page() returned error code [%d] %s", err, fzctx->error.message) | ||||
|                 text_buffer_destroy(&thread_buffer); | ||||
|                 fz_drop_page(fzctx, page); | ||||
|                 fz_drop_stext_page(fzctx, stext); | ||||
|                 fz_drop_stream(fzctx, stream); | ||||
|                 fz_drop_document(fzctx, fzdoc); | ||||
|                 fz_drop_context(fzctx); | ||||
|                 return; | ||||
|             } | ||||
| 
 | ||||
|             fz_stext_block *block = stext->first_block; | ||||
|             while (block != NULL) { | ||||
|                 int ret = read_stext_block(block, &thread_buffer); | ||||
|                 if (ret == TEXT_BUF_FULL) { | ||||
|                     break; | ||||
|                 } | ||||
|                 block = block->next; | ||||
|             } | ||||
|             fz_drop_stext_page(fzctx, stext); | ||||
|             fz_drop_page(fzctx, page); | ||||
| 
 | ||||
|             if (thread_buffer.dyn_buffer.cur >= ctx->content_size) { | ||||
|                 break; | ||||
|             } | ||||
|         } | ||||
|         text_buffer_terminate_string(&thread_buffer); | ||||
| 
 | ||||
|         meta_line_t *meta_content = malloc(sizeof(meta_line_t) + thread_buffer.dyn_buffer.cur); | ||||
|         meta_content->key = MetaContent; | ||||
|         memcpy(meta_content->str_val, thread_buffer.dyn_buffer.buf, thread_buffer.dyn_buffer.cur); | ||||
|         APPEND_META(doc, meta_content) | ||||
| 
 | ||||
|         text_buffer_destroy(&thread_buffer); | ||||
|     } | ||||
| 
 | ||||
|     fz_drop_stream(fzctx, stream); | ||||
|     fz_drop_document(fzctx, fzdoc); | ||||
|     fz_drop_context(fzctx); | ||||
| } | ||||
| 
 | ||||
| static scan_arc_ctx_t arc_ctx = (scan_arc_ctx_t) {.passphrase = {0,}}; | ||||
| 
 | ||||
| void parse_epub_fast(scan_ebook_ctx_t *ctx, vfile_t *f, document_t *doc) { | ||||
|     struct archive *a = NULL; | ||||
|     struct archive_entry *entry = NULL; | ||||
|     arc_data_t arc_data; | ||||
| 
 | ||||
|     text_buffer_t content_buffer = text_buffer_create(ctx->content_size); | ||||
| 
 | ||||
|     if (ctx->tn_size <= 0) { | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     int ret = arc_open(&arc_ctx, f, &a, &arc_data, TRUE); | ||||
|     if (ret != ARCHIVE_OK) { | ||||
|         CTX_LOG_ERRORF(f->filepath, "(ebook.c) [%d] %s", ret, archive_error_string(a)) | ||||
|         archive_read_free(a); | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     while (archive_read_next_header(a, &entry) == ARCHIVE_OK) { | ||||
|         struct stat info = *archive_entry_stat(entry); | ||||
|         if (S_ISREG(info.st_mode)) { | ||||
|             const char *utf8_name = archive_entry_pathname_utf8(entry); | ||||
|             const char *file_path = utf8_name == NULL ? archive_entry_pathname(entry) : utf8_name; | ||||
| 
 | ||||
|             char *p = strrchr(file_path, '.'); | ||||
|             if (p != NULL && (strcmp(p, ".html") == 0 || (strcmp(p, ".xhtml") == 0))) { | ||||
|                 size_t entry_size = archive_entry_size(entry); | ||||
|                 void *buf = malloc(entry_size + 1); | ||||
|                 size_t read = archive_read_data(a, buf, entry_size); | ||||
|                 *(char *) (buf + entry_size) = '\0'; | ||||
| 
 | ||||
|                 if (read != entry_size) { | ||||
|                     const char *err_str = archive_error_string(a); | ||||
|                     if (err_str) { | ||||
|                         CTX_LOG_ERRORF("ebook.c", "Error while reading entry: %s", err_str) | ||||
|                     } | ||||
|                     free(buf); | ||||
|                     break; | ||||
|                 } | ||||
| 
 | ||||
|                 ret = text_buffer_append_markup(&content_buffer, buf); | ||||
|                 free(buf); | ||||
| 
 | ||||
|                 if (ret == TEXT_BUF_FULL) { | ||||
|                     break; | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     text_buffer_terminate_string(&content_buffer); | ||||
| 
 | ||||
|     meta_line_t *meta_content = malloc(sizeof(meta_line_t) + content_buffer.dyn_buffer.cur); | ||||
|     meta_content->key = MetaContent; | ||||
|     memcpy(meta_content->str_val, content_buffer.dyn_buffer.buf, content_buffer.dyn_buffer.cur); | ||||
|     APPEND_META(doc, meta_content) | ||||
| 
 | ||||
|     text_buffer_destroy(&content_buffer); | ||||
| 
 | ||||
|     archive_read_free(a); | ||||
| } | ||||
| 
 | ||||
| void parse_ebook(scan_ebook_ctx_t *ctx, vfile_t *f, const char *mime_str, document_t *doc) { | ||||
| 
 | ||||
|     if (ctx->fast_epub_parse && is_epub(mime_str)) { | ||||
|         parse_epub_fast(ctx, f, doc); | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     size_t buf_len; | ||||
|     void *buf = read_all(f, &buf_len); | ||||
|     if (buf == NULL) { | ||||
|         CTX_LOG_ERROR(f->filepath, "read_all() failed") | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     parse_ebook_mem(ctx, buf, buf_len, mime_str, doc, FALSE); | ||||
|     free(buf); | ||||
| } | ||||
							
								
								
									
										30
									
								
								third-party/libscan/libscan/ebook/ebook.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										30
									
								
								third-party/libscan/libscan/ebook/ebook.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,30 @@ | ||||
| #ifndef SCAN_EBOOK_H | ||||
| #define SCAN_EBOOK_H | ||||
| 
 | ||||
| #include "../scan.h" | ||||
| 
 | ||||
| typedef struct { | ||||
|     long content_size; | ||||
|     int tn_size; | ||||
|     const char *tesseract_lang; | ||||
|     const char *tesseract_path; | ||||
|     pthread_mutex_t mupdf_mutex; | ||||
| 
 | ||||
|     log_callback_t log; | ||||
|     logf_callback_t logf; | ||||
|     store_callback_t store; | ||||
|     int fast_epub_parse; | ||||
|     float tn_qscale; | ||||
| } scan_ebook_ctx_t; | ||||
| 
 | ||||
| void parse_ebook(scan_ebook_ctx_t *ctx, vfile_t *f, const char *mime_str, document_t *doc); | ||||
| 
 | ||||
| void | ||||
| parse_ebook_mem(scan_ebook_ctx_t *ctx, void *buf, size_t buf_len, const char *mime_str, document_t *doc, int tn_only); | ||||
| 
 | ||||
| __always_inline | ||||
| static int is_epub(const char *mime_string) { | ||||
|     return strcmp(mime_string, "application/epub+zip") == 0; | ||||
| } | ||||
| 
 | ||||
| #endif | ||||
							
								
								
									
										246
									
								
								third-party/libscan/libscan/font/font.c
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										246
									
								
								third-party/libscan/libscan/font/font.c
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,246 @@ | ||||
| #include "font.h" | ||||
| 
 | ||||
| #include <ft2build.h> | ||||
| #include <freetype/freetype.h> | ||||
| #include "../util.h" | ||||
| 
 | ||||
| 
 | ||||
| __thread FT_Library ft_lib = NULL; | ||||
| 
 | ||||
| 
 | ||||
| typedef struct text_dimensions { | ||||
|     unsigned int width; | ||||
|     unsigned int height; | ||||
|     unsigned int baseline; | ||||
| } text_dimensions_t; | ||||
| 
 | ||||
| typedef struct glyph { | ||||
|     int top; | ||||
|     int height; | ||||
|     int width; | ||||
|     int descent; | ||||
|     int ascent; | ||||
|     int advance_width; | ||||
|     unsigned char *pixmap; | ||||
| } glyph_t; | ||||
| 
 | ||||
| 
 | ||||
| __always_inline | ||||
| int kerning_offset(char c, char pc, FT_Face face) { | ||||
|     FT_Vector kerning; | ||||
|     FT_Get_Kerning(face, c, pc, FT_KERNING_DEFAULT, &kerning); | ||||
| 
 | ||||
|     return (int) (kerning.x / 64); | ||||
| } | ||||
| 
 | ||||
| __always_inline | ||||
| glyph_t ft_glyph_to_glyph(FT_GlyphSlot slot) { | ||||
|     glyph_t glyph; | ||||
| 
 | ||||
|     glyph.pixmap = slot->bitmap.buffer; | ||||
| 
 | ||||
|     glyph.width = (int) slot->bitmap.width; | ||||
|     glyph.height = (int) slot->bitmap.rows; | ||||
|     glyph.top = slot->bitmap_top; | ||||
|     glyph.advance_width = (int) slot->advance.x / 64; | ||||
| 
 | ||||
|     glyph.descent = MAX(0, glyph.height - glyph.top); | ||||
|     glyph.ascent = MAX(0, MAX(glyph.top, glyph.height) - glyph.descent); | ||||
| 
 | ||||
|     return glyph; | ||||
| } | ||||
| 
 | ||||
| text_dimensions_t text_dimension(char *text, FT_Face face) { | ||||
|     text_dimensions_t dimensions; | ||||
| 
 | ||||
|     dimensions.width = 0; | ||||
| 
 | ||||
|     int num_chars = (int) strlen(text); | ||||
| 
 | ||||
|     unsigned int max_ascent = 0; | ||||
|     int max_descent = 0; | ||||
| 
 | ||||
|     char pc = 0; | ||||
|     for (int i = 0; i < num_chars; i++) { | ||||
|         char c = text[i]; | ||||
| 
 | ||||
|         FT_Load_Char(face, c, 0); | ||||
|         glyph_t glyph = ft_glyph_to_glyph(face->glyph); | ||||
| 
 | ||||
|         max_descent = MAX(max_descent, glyph.descent); | ||||
|         max_ascent = MAX(max_ascent, MAX(glyph.height, glyph.ascent)); | ||||
| 
 | ||||
|         int kerning_x = kerning_offset(c, pc, face); | ||||
|         dimensions.width += MAX(glyph.advance_width, glyph.width) + kerning_x; | ||||
| 
 | ||||
|         pc = c; | ||||
|     } | ||||
| 
 | ||||
|     dimensions.height = max_ascent + max_descent; | ||||
|     dimensions.baseline = max_descent; | ||||
| 
 | ||||
|     return dimensions; | ||||
| } | ||||
| 
 | ||||
| void draw_glyph(glyph_t *glyph, int x, int y, struct text_dimensions text_info, unsigned char *bitmap) { | ||||
|     unsigned int src = 0; | ||||
|     unsigned int dst = y * text_info.width + x; | ||||
|     unsigned int row_offset = text_info.width - glyph->width; | ||||
|     unsigned int buf_len = text_info.width * text_info.height; | ||||
| 
 | ||||
|     for (unsigned int sy = 0; sy < glyph->height; sy++) { | ||||
|         for (unsigned int sx = 0; sx < glyph->width; sx++) { | ||||
|             if (dst < buf_len) { | ||||
|                 bitmap[dst] |= glyph->pixmap[src]; | ||||
|             } | ||||
|             src++; | ||||
|             dst++; | ||||
|         } | ||||
|         dst += row_offset; | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| void bmp_format(dyn_buffer_t *buf, text_dimensions_t dimensions, const unsigned char *bitmap) { | ||||
| 
 | ||||
|     dyn_buffer_write_short(buf, 0x4D42); // Magic
 | ||||
|     dyn_buffer_write_int(buf, 0); // Size placeholder
 | ||||
|     dyn_buffer_write_int(buf, 0x5157); //Reserved
 | ||||
|     dyn_buffer_write_int(buf, 14 + 40 + 256 * 4); // pixels offset
 | ||||
| 
 | ||||
|     dyn_buffer_write_int(buf, 40); // DIB size
 | ||||
|     dyn_buffer_write_int(buf, (int) dimensions.width); | ||||
|     dyn_buffer_write_int(buf, (int) dimensions.height); | ||||
|     dyn_buffer_write_short(buf, 1); // Color planes
 | ||||
|     dyn_buffer_write_short(buf, 8); // bits per pixel
 | ||||
|     dyn_buffer_write_int(buf, 0); // compression
 | ||||
|     dyn_buffer_write_int(buf, 0); // Ignored
 | ||||
|     dyn_buffer_write_int(buf, 3800); // hres
 | ||||
|     dyn_buffer_write_int(buf, 3800); // vres
 | ||||
|     dyn_buffer_write_int(buf, 256); // Color count
 | ||||
|     dyn_buffer_write_int(buf, 0); // Ignored
 | ||||
| 
 | ||||
|     // RGBA32 Color table (Grayscale)
 | ||||
|     for (int i = 255; i >= 0; i--) { | ||||
|         dyn_buffer_write_int(buf, i + (i << 8) + (i << 16)); | ||||
|     } | ||||
| 
 | ||||
|     // Pixel array: write from bottom to top, with rows padded to multiples of 4-bytes
 | ||||
|     for (int y = (int) dimensions.height - 1; y >= 0; y--) { | ||||
|         for (unsigned int x = 0; x < dimensions.width; x++) { | ||||
|             dyn_buffer_write_char(buf, (char) bitmap[y * dimensions.width + x]); | ||||
|         } | ||||
|         while (buf->cur % 4 != 0) { | ||||
|             dyn_buffer_write_char(buf, 0); | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     // Size
 | ||||
|     *(int *) ((char *) buf->buf + 2) = buf->cur; | ||||
| } | ||||
| 
 | ||||
| void parse_font(scan_font_ctx_t *ctx, vfile_t *f, document_t *doc) { | ||||
|     if (ft_lib == NULL) { | ||||
|         FT_Init_FreeType(&ft_lib); | ||||
|     } | ||||
| 
 | ||||
|     size_t buf_len = 0; | ||||
|     void *buf = read_all(f, &buf_len); | ||||
|     if (buf == NULL) { | ||||
|         CTX_LOG_ERROR(f->filepath, "read_all() failed") | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     FT_Face face; | ||||
|     FT_Error err = FT_New_Memory_Face(ft_lib, (unsigned char *) buf, (int) buf_len, 0, &face); | ||||
|     if (err != 0) { | ||||
|         CTX_LOG_ERRORF(doc->filepath, "(font.c) FT_New_Memory_Face() returned error code [%d] %s", err, | ||||
|                        FT_Error_String(err)) | ||||
|         free(buf); | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     char font_name[4096]; | ||||
| 
 | ||||
|     if (face->style_name == NULL || (strcmp(face->style_name, "?") == 0)) { | ||||
|         if (face->family_name == NULL) { | ||||
|             strcpy(font_name, "(null)"); | ||||
|         } else { | ||||
|             strncpy(font_name, face->family_name, sizeof(font_name)); | ||||
|         } | ||||
|     } else { | ||||
|         snprintf(font_name, sizeof(font_name), "%s %s", face->family_name, face->style_name); | ||||
|     } | ||||
| 
 | ||||
|     meta_line_t *meta_name = malloc(sizeof(meta_line_t) + strlen(font_name)); | ||||
|     meta_name->key = MetaFontName; | ||||
|     strcpy(meta_name->str_val, font_name); | ||||
|     APPEND_META(doc, meta_name) | ||||
| 
 | ||||
|     if (ctx->enable_tn == TRUE) { | ||||
|         FT_Done_Face(face); | ||||
|         free(buf); | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     int pixel = 64; | ||||
|     int num_chars = (int) strlen(font_name); | ||||
| 
 | ||||
|     err = FT_Set_Pixel_Sizes(face, 0, pixel); | ||||
|     if (err != 0) { | ||||
|         CTX_LOG_WARNINGF(doc->filepath, "(font.c) FT_Set_Pixel_Sizes() returned error code [%d] %s", err, | ||||
|                          FT_Error_String(err)) | ||||
|         FT_Done_Face(face); | ||||
|         free(buf); | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     text_dimensions_t dimensions = text_dimension(font_name, face); | ||||
|     unsigned char *bitmap = calloc(dimensions.width * dimensions.height, 1); | ||||
| 
 | ||||
|     FT_Vector pen; | ||||
|     pen.x = 0; | ||||
| 
 | ||||
|     char pc = 0; | ||||
|     for (int i = 0; i < num_chars; i++) { | ||||
|         char c = font_name[i]; | ||||
| 
 | ||||
|         err = FT_Load_Char(face, c, FT_LOAD_NO_HINTING | FT_LOAD_RENDER); | ||||
|         if (err != 0) { | ||||
|             c = c >= 'a' && c <= 'z' ? c - 32 : c + 32; | ||||
|             err = FT_Load_Char(face, c, FT_LOAD_NO_HINTING | FT_LOAD_RENDER); | ||||
|             if (err != 0) { | ||||
|                 CTX_LOG_WARNINGF(doc->filepath, "(font.c) FT_Load_Char() returned error code [%d] %s", err, | ||||
|                                  FT_Error_String(err)) | ||||
|                 continue; | ||||
|             } | ||||
|         } | ||||
|         glyph_t glyph = ft_glyph_to_glyph(face->glyph); | ||||
| 
 | ||||
|         pen.x += kerning_offset(c, pc, face); | ||||
|         if (pen.x <= 0) { | ||||
|             pen.x = ABS(glyph.advance_width - glyph.width); | ||||
|         } | ||||
|         pen.y = dimensions.height - glyph.ascent - dimensions.baseline; | ||||
| 
 | ||||
|         draw_glyph(&glyph, pen.x, pen.y, dimensions, bitmap); | ||||
| 
 | ||||
|         pen.x += glyph.advance_width; | ||||
|         pc = c; | ||||
|     } | ||||
| 
 | ||||
|     dyn_buffer_t bmp_data = dyn_buffer_create(); | ||||
|     bmp_format(&bmp_data, dimensions, bitmap); | ||||
| 
 | ||||
|     APPEND_TN_META(doc, dimensions.width, dimensions.height) | ||||
|     ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) bmp_data.buf, bmp_data.cur); | ||||
| 
 | ||||
|     dyn_buffer_destroy(&bmp_data); | ||||
|     free(bitmap); | ||||
| 
 | ||||
|     FT_Done_Face(face); | ||||
|     free(buf); | ||||
| } | ||||
| 
 | ||||
| void cleanup_font() { | ||||
|     FT_Done_FreeType(ft_lib); | ||||
| } | ||||
							
								
								
									
										17
									
								
								third-party/libscan/libscan/font/font.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										17
									
								
								third-party/libscan/libscan/font/font.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,17 @@ | ||||
| #ifndef SCAN_FONT_H | ||||
| #define SCAN_FONT_H | ||||
| 
 | ||||
| #include "../scan.h" | ||||
| 
 | ||||
| 
 | ||||
| typedef struct { | ||||
|     int enable_tn; | ||||
|     log_callback_t log; | ||||
|     logf_callback_t logf; | ||||
|     store_callback_t store; | ||||
| } scan_font_ctx_t; | ||||
| 
 | ||||
| void parse_font(scan_font_ctx_t *ctx, vfile_t *f, document_t *doc); | ||||
| void cleanup_font(); | ||||
| 
 | ||||
| #endif | ||||
							
								
								
									
										119
									
								
								third-party/libscan/libscan/json/json.c
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										119
									
								
								third-party/libscan/libscan/json/json.c
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,119 @@ | ||||
| #include "json.h" | ||||
| #include "cjson/cJSON.h" | ||||
| 
 | ||||
| 
 | ||||
| #define JSON_MAX_FILE_SIZE (1024 * 1024 * 50) | ||||
| 
 | ||||
| int json_extract_text(cJSON *json, text_buffer_t *tex) { | ||||
|     if (cJSON_IsObject(json)) { | ||||
|         for (cJSON *child = json->child; child != NULL; child = child->next) { | ||||
|             if (json_extract_text(child, tex)) { | ||||
|                 return TRUE; | ||||
|             } | ||||
|         } | ||||
|     } else if (cJSON_IsArray(json)) { | ||||
|         cJSON *child; | ||||
|         cJSON_ArrayForEach(child, json) { | ||||
|             if (json_extract_text(child, tex)) { | ||||
|                 return TRUE; | ||||
|             } | ||||
|         } | ||||
|     } else if (cJSON_IsString(json)) { | ||||
|         if (text_buffer_append_string0(tex, json->valuestring) == TEXT_BUF_FULL) { | ||||
|             return TRUE; | ||||
|         } | ||||
|         if (text_buffer_append_char(tex, ' ') == TEXT_BUF_FULL) { | ||||
|             return TRUE; | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     return FALSE; | ||||
| } | ||||
| 
 | ||||
| scan_code_t parse_json(scan_json_ctx_t *ctx, vfile_t *f, document_t *doc) { | ||||
| 
 | ||||
|     if (f->info.st_size > JSON_MAX_FILE_SIZE) { | ||||
|         CTX_LOG_WARNINGF("json.c", "File larger than maximum allowed [%s]", f->filepath) | ||||
|         return SCAN_ERR_SKIP; | ||||
|     } | ||||
| 
 | ||||
|     size_t buf_len; | ||||
|     char *buf = read_all(f, &buf_len); | ||||
| 
 | ||||
|     if (buf == NULL) { | ||||
|         return SCAN_ERR_READ; | ||||
|     } | ||||
| 
 | ||||
|     buf_len += 1; | ||||
|     buf = realloc(buf, buf_len); | ||||
|     *(buf + buf_len - 1) = '\0'; | ||||
| 
 | ||||
|     cJSON *json = cJSON_ParseWithOpts(buf, NULL, TRUE); | ||||
|     text_buffer_t tex = text_buffer_create(ctx->content_size); | ||||
| 
 | ||||
|     json_extract_text(json, &tex); | ||||
|     text_buffer_terminate_string(&tex); | ||||
| 
 | ||||
|     APPEND_STR_META(doc, MetaContent, tex.dyn_buffer.buf); | ||||
| 
 | ||||
|     cJSON_Delete(json); | ||||
|     free(buf); | ||||
|     text_buffer_destroy(&tex); | ||||
| 
 | ||||
|     return SCAN_OK; | ||||
| } | ||||
| 
 | ||||
| #define JSON_BUF_SIZE (1024 * 1024 * 5) | ||||
| 
 | ||||
| scan_code_t parse_ndjson(scan_json_ctx_t *ctx, vfile_t *f, document_t *doc) { | ||||
| 
 | ||||
|     char *buf = calloc(JSON_BUF_SIZE + 1, sizeof(char)); | ||||
|     *(buf + JSON_BUF_SIZE) = '\0'; | ||||
| 
 | ||||
|     text_buffer_t tex = text_buffer_create(ctx->content_size); | ||||
| 
 | ||||
|     size_t ret; | ||||
|     int eof = FALSE; | ||||
|     const char *parse_end = buf; | ||||
|     size_t to_read; | ||||
|     char *ptr = buf; | ||||
| 
 | ||||
|     while (TRUE) { | ||||
|         cJSON *json; | ||||
| 
 | ||||
|         if (!eof) { | ||||
|             to_read = parse_end == buf ? JSON_BUF_SIZE : parse_end - buf; | ||||
|             ret = f->read(f, ptr, to_read); | ||||
|             if (ret != to_read) { | ||||
|                 eof = TRUE; | ||||
|             } | ||||
|         } | ||||
| 
 | ||||
|         json = cJSON_ParseWithOpts(buf, &parse_end, FALSE); | ||||
| 
 | ||||
|         if (parse_end == buf + JSON_BUF_SIZE) { | ||||
|             CTX_LOG_ERRORF("json.c", "Line too large for buffer [%s]", doc->filepath); | ||||
|             cJSON_Delete(json); | ||||
|             break; | ||||
|         } | ||||
| 
 | ||||
|         if (parse_end == buf) { | ||||
|             cJSON_Delete(json); | ||||
|             break; | ||||
|         } | ||||
| 
 | ||||
|         json_extract_text(json, &tex); | ||||
| 
 | ||||
|         cJSON_Delete(json); | ||||
| 
 | ||||
|         memmove(buf, parse_end, (buf + JSON_BUF_SIZE - parse_end)); | ||||
|         ptr = buf + JSON_BUF_SIZE - parse_end + buf; | ||||
|     } | ||||
| 
 | ||||
|     text_buffer_terminate_string(&tex); | ||||
| 
 | ||||
|     APPEND_STR_META(doc, MetaContent, tex.dyn_buffer.buf); | ||||
| 
 | ||||
|     free(buf); | ||||
|     text_buffer_destroy(&tex); | ||||
| } | ||||
							
								
								
									
										30
									
								
								third-party/libscan/libscan/json/json.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										30
									
								
								third-party/libscan/libscan/json/json.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,30 @@ | ||||
| #ifndef SCAN_JSON_H | ||||
| #define SCAN_JSON_H | ||||
| 
 | ||||
| #include "../scan.h" | ||||
| 
 | ||||
| 
 | ||||
| typedef struct { | ||||
|     long content_size; | ||||
|     log_callback_t log; | ||||
|     logf_callback_t logf; | ||||
|     store_callback_t store; | ||||
|     unsigned int json_mime; | ||||
|     unsigned int ndjson_mime; | ||||
| } scan_json_ctx_t; | ||||
| 
 | ||||
| scan_code_t parse_json(scan_json_ctx_t *ctx, vfile_t *f, document_t *doc); | ||||
| 
 | ||||
| scan_code_t parse_ndjson(scan_json_ctx_t *ctx, vfile_t *f, document_t *doc); | ||||
| 
 | ||||
| __always_inline | ||||
| static int is_json(scan_json_ctx_t *ctx, unsigned int mime) { | ||||
|     return mime == ctx->json_mime; | ||||
| } | ||||
| 
 | ||||
| __always_inline | ||||
| static int is_ndjson(scan_json_ctx_t *ctx, unsigned int mime) { | ||||
|     return mime == ctx->ndjson_mime; | ||||
| } | ||||
| 
 | ||||
| #endif | ||||
							
								
								
									
										62
									
								
								third-party/libscan/libscan/macros.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										62
									
								
								third-party/libscan/libscan/macros.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,62 @@ | ||||
| #ifndef FALSE | ||||
| #define FALSE (0) | ||||
| #define BOOL int | ||||
| #endif | ||||
| 
 | ||||
| #ifndef TRUE | ||||
| #define TRUE (!FALSE) | ||||
| #endif | ||||
| 
 | ||||
| #undef MAX | ||||
| #define MAX(a, b)  (((a) > (b)) ? (a) : (b)) | ||||
| 
 | ||||
| #undef MIN | ||||
| #define MIN(a, b)  (((a) < (b)) ? (a) : (b)) | ||||
| 
 | ||||
| #ifndef PATH_MAX | ||||
| #define PATH_MAX 4096 | ||||
| #endif | ||||
| 
 | ||||
| #undef ABS | ||||
| #define ABS(a) (((a) < 0) ? -(a) : (a)) | ||||
| 
 | ||||
| #define SHA1_STR_LENGTH 41 | ||||
| #define SHA1_DIGEST_LENGTH 20 | ||||
| 
 | ||||
| #define APPEND_STR_META(doc, keyname, value) \ | ||||
|     {meta_line_t *meta_str = malloc(sizeof(meta_line_t) + strlen(value)); \ | ||||
|     meta_str->key = keyname; \ | ||||
|     strcpy(meta_str->str_val, value); \ | ||||
|     APPEND_META(doc, meta_str)} | ||||
| 
 | ||||
| #define APPEND_LONG_META(doc, keyname, value) \ | ||||
|     {meta_line_t *meta_long = malloc(sizeof(meta_line_t)); \ | ||||
|     meta_long->key = keyname; \ | ||||
|     meta_long->long_val = value; \ | ||||
|     APPEND_META(doc, meta_long)} | ||||
| 
 | ||||
| #define APPEND_TN_META(doc, width, height) \ | ||||
|     {meta_line_t *meta_str = malloc(sizeof(meta_line_t) + 4 + 1 + 4); \ | ||||
|     meta_str->key = MetaThumbnail; \ | ||||
|     sprintf(meta_str->str_val, "%04d,%04d", width, height); \ | ||||
|     APPEND_META(doc, meta_str)} | ||||
| 
 | ||||
| #define APPEND_META(doc, meta) \ | ||||
|     meta->next = NULL;\ | ||||
|     if (doc->meta_head == NULL) {\ | ||||
|         doc->meta_head = meta;\ | ||||
|         doc->meta_tail = doc->meta_head;\ | ||||
|     } else {\ | ||||
|         doc->meta_tail->next = meta;\ | ||||
|         doc->meta_tail = meta;\ | ||||
|     } | ||||
| 
 | ||||
| #define APPEND_UTF8_META(doc, keyname, str) \ | ||||
|     text_buffer_t tex = text_buffer_create(-1); \ | ||||
|     text_buffer_append_string0(&tex, str); \ | ||||
|     text_buffer_terminate_string(&tex); \ | ||||
|     meta_line_t *meta_tag = malloc(sizeof(meta_line_t) + tex.dyn_buffer.cur); \ | ||||
|     meta_tag->key = keyname; \ | ||||
|     strcpy(meta_tag->str_val, tex.dyn_buffer.buf); \ | ||||
|     APPEND_META(doc, meta_tag) \ | ||||
|     text_buffer_destroy(&tex); | ||||
							
								
								
									
										749
									
								
								third-party/libscan/libscan/media/media.c
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										749
									
								
								third-party/libscan/libscan/media/media.c
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,749 @@ | ||||
| #include "media.h" | ||||
| #include <ctype.h> | ||||
| 
 | ||||
| #define MIN_SIZE 32 | ||||
| #define AVIO_BUF_SIZE 8192 | ||||
| #define IS_VIDEO(fmt) (fmt->iformat->name && strcmp(fmt->iformat->name, "image2") != 0) | ||||
| 
 | ||||
| #define STORE_AS_IS ((void*)-1) | ||||
| 
 | ||||
| const char *get_filepath_with_ext(document_t *doc, const char *filepath, const char *mime_str) { | ||||
| 
 | ||||
|     int has_extension = doc->ext > doc->base; | ||||
| 
 | ||||
|     if (!has_extension) { | ||||
|         if (strcmp(mime_str, "image/png") == 0) { | ||||
|             return "file.png"; | ||||
|         } else if (strcmp(mime_str, "image/jpeg") == 0) { | ||||
|             return "file.jpg"; | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     return filepath; | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| __always_inline | ||||
| void *scale_frame(const AVCodecContext *decoder, const AVFrame *frame, int size) { | ||||
| 
 | ||||
|     if (frame->pict_type == AV_PICTURE_TYPE_NONE) { | ||||
|         return NULL; | ||||
|     } | ||||
| 
 | ||||
|     int dstW; | ||||
|     int dstH; | ||||
|     if (frame->width <= size && frame->height <= size) { | ||||
|         if (decoder->codec_id == AV_CODEC_ID_MJPEG || decoder->codec_id == AV_CODEC_ID_PNG) { | ||||
|             return STORE_AS_IS; | ||||
|         } | ||||
| 
 | ||||
|         dstW = frame->width; | ||||
|         dstH = frame->height; | ||||
|     } else { | ||||
|         double ratio = (double) frame->width / frame->height; | ||||
|         if (frame->width > frame->height) { | ||||
|             dstW = size; | ||||
|             dstH = (int) (size / ratio); | ||||
|         } else { | ||||
|             dstW = (int) (size * ratio); | ||||
|             dstH = size; | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     if (dstW <= MIN_SIZE || dstH <= MIN_SIZE) { | ||||
|         return NULL; | ||||
|     } | ||||
| 
 | ||||
|     AVFrame *scaled_frame = av_frame_alloc(); | ||||
| 
 | ||||
|     struct SwsContext *sws_ctx = sws_getContext( | ||||
|             decoder->width, decoder->height, decoder->pix_fmt, | ||||
|             dstW, dstH, AV_PIX_FMT_YUVJ420P, | ||||
|             SIST_SWS_ALGO, 0, 0, 0 | ||||
|     ); | ||||
| 
 | ||||
|     int dst_buf_len = av_image_get_buffer_size(AV_PIX_FMT_YUV420P, dstW, dstH, 1); | ||||
|     uint8_t *dst_buf = (uint8_t *) av_malloc(dst_buf_len * 2); | ||||
| 
 | ||||
|     av_image_fill_arrays(scaled_frame->data, scaled_frame->linesize, dst_buf, AV_PIX_FMT_YUV420P, dstW, dstH, 1); | ||||
| 
 | ||||
|     sws_scale(sws_ctx, | ||||
|               (const uint8_t *const *) frame->data, frame->linesize, | ||||
|               0, decoder->height, | ||||
|               scaled_frame->data, scaled_frame->linesize | ||||
|     ); | ||||
| 
 | ||||
|     scaled_frame->width = dstW; | ||||
|     scaled_frame->height = dstH; | ||||
|     scaled_frame->format = AV_PIX_FMT_YUV420P; | ||||
| 
 | ||||
|     sws_freeContext(sws_ctx); | ||||
| 
 | ||||
|     return scaled_frame; | ||||
| } | ||||
| 
 | ||||
| typedef struct { | ||||
|     AVPacket *packet; | ||||
|     AVFrame *frame; | ||||
| } frame_and_packet_t; | ||||
| 
 | ||||
| static void frame_and_packet_free(frame_and_packet_t *frame_and_packet) { | ||||
|     if (frame_and_packet->packet != NULL) { | ||||
|         av_packet_free(&frame_and_packet->packet); | ||||
|     } | ||||
| 
 | ||||
|     if (frame_and_packet->frame != NULL) { | ||||
|         av_frame_free(&frame_and_packet->frame); | ||||
|     } | ||||
| 
 | ||||
|     free(frame_and_packet->packet); | ||||
|     free(frame_and_packet); | ||||
| } | ||||
| 
 | ||||
| __always_inline | ||||
| static void read_subtitles(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx, int stream_idx, document_t *doc) { | ||||
| 
 | ||||
|     text_buffer_t tex = text_buffer_create(-1); | ||||
| 
 | ||||
|     AVPacket packet; | ||||
|     AVSubtitle subtitle; | ||||
| 
 | ||||
|     AVCodec *subtitle_codec = avcodec_find_decoder(pFormatCtx->streams[stream_idx]->codecpar->codec_id); | ||||
|     AVCodecContext *decoder = avcodec_alloc_context3(subtitle_codec); | ||||
|     avcodec_parameters_to_context(decoder, pFormatCtx->streams[stream_idx]->codecpar); | ||||
|     avcodec_open2(decoder, subtitle_codec, NULL); | ||||
| 
 | ||||
|     decoder->sub_text_format = FF_SUB_TEXT_FMT_ASS; | ||||
| 
 | ||||
|     int got_sub; | ||||
| 
 | ||||
|     while (1) { | ||||
|         int read_frame_ret = av_read_frame(pFormatCtx, &packet); | ||||
| 
 | ||||
|         if (read_frame_ret != 0) { | ||||
|             break; | ||||
|         } | ||||
| 
 | ||||
|         if (packet.stream_index != stream_idx) { | ||||
|             av_packet_unref(&packet); | ||||
|             continue; | ||||
|         } | ||||
| 
 | ||||
|         avcodec_decode_subtitle2(decoder, &subtitle, &got_sub, &packet); | ||||
| 
 | ||||
|         if (got_sub) { | ||||
|             for (int i = 0; i < subtitle.num_rects; i++) { | ||||
|                 const char *text = subtitle.rects[i]->ass; | ||||
| 
 | ||||
|                 if (text == NULL) { | ||||
|                     continue; | ||||
|                 } | ||||
| 
 | ||||
|                 char *idx = strstr(text, "\\N"); | ||||
|                 if (idx != NULL && strlen(idx + 2) > 1) { | ||||
|                     text_buffer_append_string0(&tex, idx + 2); | ||||
|                     text_buffer_append_char(&tex, ' '); | ||||
|                 } | ||||
|             } | ||||
|             avsubtitle_free(&subtitle); | ||||
|         } | ||||
| 
 | ||||
|         av_packet_unref(&packet); | ||||
|     } | ||||
| 
 | ||||
|     text_buffer_terminate_string(&tex); | ||||
| 
 | ||||
|     APPEND_STR_META(doc, MetaContent, tex.dyn_buffer.buf) | ||||
|     text_buffer_destroy(&tex); | ||||
|     avcodec_free_context(&decoder); | ||||
| } | ||||
| 
 | ||||
| __always_inline | ||||
| static frame_and_packet_t * | ||||
| read_frame(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx, AVCodecContext *decoder, int stream_idx, | ||||
|            document_t *doc) { | ||||
| 
 | ||||
|     frame_and_packet_t *result = calloc(1, sizeof(frame_and_packet_t)); | ||||
|     result->packet = av_packet_alloc(); | ||||
|     result->frame = av_frame_alloc(); | ||||
| 
 | ||||
|     av_init_packet(result->packet); | ||||
| 
 | ||||
|     int receive_ret = -EAGAIN; | ||||
|     while (receive_ret == -EAGAIN) { | ||||
|         // Get video frame
 | ||||
|         while (1) { | ||||
|             int read_frame_ret = av_read_frame(pFormatCtx, result->packet); | ||||
| 
 | ||||
|             if (read_frame_ret != 0) { | ||||
|                 if (read_frame_ret != AVERROR_EOF) { | ||||
|                     CTX_LOG_WARNINGF(doc->filepath, | ||||
|                                      "(media.c) avcodec_read_frame() returned error code [%d] %s", | ||||
|                                      read_frame_ret, av_err2str(read_frame_ret) | ||||
|                     ) | ||||
|                 } | ||||
|                 frame_and_packet_free(result); | ||||
|                 return NULL; | ||||
|             } | ||||
| 
 | ||||
|             //Ignore audio/other frames
 | ||||
|             if (result->packet->stream_index != stream_idx) { | ||||
|                 av_packet_unref(result->packet); | ||||
|                 continue; | ||||
|             } | ||||
|             break; | ||||
|         } | ||||
| 
 | ||||
|         // Feed it to decoder
 | ||||
|         int decode_ret = avcodec_send_packet(decoder, result->packet); | ||||
|         if (decode_ret != 0) { | ||||
|             CTX_LOG_ERRORF(doc->filepath, | ||||
|                            "(media.c) avcodec_send_packet() returned error code [%d] %s", | ||||
|                            decode_ret, av_err2str(decode_ret) | ||||
|             ) | ||||
|             frame_and_packet_free(result); | ||||
|             return NULL; | ||||
|         } | ||||
| 
 | ||||
|         receive_ret = avcodec_receive_frame(decoder, result->frame); | ||||
|         if (receive_ret == -EAGAIN && result->packet != NULL) { | ||||
|             av_packet_unref(result->packet); | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     return result; | ||||
| } | ||||
| 
 | ||||
| void append_tag_meta_if_not_exists(scan_media_ctx_t *ctx, document_t *doc, AVDictionaryEntry *tag, enum metakey key) { | ||||
| 
 | ||||
|     meta_line_t *meta = doc->meta_head; | ||||
|     while (meta != NULL) { | ||||
|         if (meta->key == key) { | ||||
|             CTX_LOG_DEBUGF(doc->filepath, "Ignoring duplicate tag: '%02x=%s' and '%02x=%s'", | ||||
|                            key, meta->str_val, key, tag->value) | ||||
|             return; | ||||
|         } | ||||
|         meta = meta->next; | ||||
|     } | ||||
| 
 | ||||
|     text_buffer_t tex = text_buffer_create(-1); | ||||
|     text_buffer_append_string0(&tex, tag->value); | ||||
|     text_buffer_terminate_string(&tex); | ||||
|     meta_line_t *meta_tag = malloc(sizeof(meta_line_t) + tex.dyn_buffer.cur); | ||||
|     meta_tag->key = key; | ||||
|     strcpy(meta_tag->str_val, tex.dyn_buffer.buf); | ||||
| 
 | ||||
|     APPEND_META(doc, meta_tag) | ||||
|     text_buffer_destroy(&tex); | ||||
| } | ||||
| 
 | ||||
| #define APPEND_TAG_META(keyname) \ | ||||
|     APPEND_UTF8_META(doc, keyname, tag->value) | ||||
| 
 | ||||
| #define STRCPY_TOLOWER(dst, str) \ | ||||
|     strncpy(dst, str, sizeof(dst)); \ | ||||
|     char *ptr = dst; \ | ||||
|     for (; *ptr; ++ptr) *ptr = (char) tolower(*ptr); | ||||
| 
 | ||||
| __always_inline | ||||
| static void append_audio_meta(AVFormatContext *pFormatCtx, document_t *doc) { | ||||
| 
 | ||||
|     AVDictionaryEntry *tag = NULL; | ||||
|     while ((tag = av_dict_get(pFormatCtx->metadata, "", tag, AV_DICT_IGNORE_SUFFIX))) { | ||||
|         char key[256]; | ||||
|         STRCPY_TOLOWER(key, tag->key) | ||||
| 
 | ||||
|         if (strcmp(key, "artist") == 0) { | ||||
|             APPEND_TAG_META(MetaArtist) | ||||
|         } else if (strcmp(key, "genre") == 0) { | ||||
|             APPEND_TAG_META(MetaGenre) | ||||
|         } else if (strcmp(key, "title") == 0) { | ||||
|             APPEND_TAG_META(MetaTitle) | ||||
|         } else if (strcmp(key, "album_artist") == 0) { | ||||
|             APPEND_TAG_META(MetaAlbumArtist) | ||||
|         } else if (strcmp(key, "album") == 0) { | ||||
|             APPEND_TAG_META(MetaAlbum) | ||||
|         } else if (strcmp(key, "comment") == 0) { | ||||
|             APPEND_TAG_META(MetaContent) | ||||
|         } | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| __always_inline | ||||
| static void | ||||
| append_video_meta(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx, AVFrame *frame, document_t *doc, int is_video) { | ||||
| 
 | ||||
|     if (is_video) { | ||||
|         meta_line_t *meta_duration = malloc(sizeof(meta_line_t)); | ||||
|         meta_duration->key = MetaMediaDuration; | ||||
|         meta_duration->long_val = pFormatCtx->duration / AV_TIME_BASE; | ||||
|         if (meta_duration->long_val > INT32_MAX) { | ||||
|             meta_duration->long_val = 0; | ||||
|         } | ||||
|         APPEND_META(doc, meta_duration) | ||||
| 
 | ||||
|         meta_line_t *meta_bitrate = malloc(sizeof(meta_line_t)); | ||||
|         meta_bitrate->key = MetaMediaBitrate; | ||||
|         meta_bitrate->long_val = pFormatCtx->bit_rate; | ||||
|         APPEND_META(doc, meta_bitrate) | ||||
|     } | ||||
| 
 | ||||
|     AVDictionaryEntry *tag = NULL; | ||||
|     if (is_video) { | ||||
|         while ((tag = av_dict_get(pFormatCtx->metadata, "", tag, AV_DICT_IGNORE_SUFFIX))) { | ||||
|             char key[256]; | ||||
|             STRCPY_TOLOWER(key, tag->key) | ||||
| 
 | ||||
|             if (strcmp(key, "title") == 0) { | ||||
|                 append_tag_meta_if_not_exists(ctx, doc, tag, MetaTitle); | ||||
|             } else if (strcmp(key, "comment") == 0) { | ||||
|                 append_tag_meta_if_not_exists(ctx, doc, tag, MetaContent); | ||||
|             } else if (strcmp(key, "artist") == 0) { | ||||
|                 append_tag_meta_if_not_exists(ctx, doc, tag, MetaArtist); | ||||
|             } | ||||
|         } | ||||
|     } else { | ||||
|         // EXIF metadata
 | ||||
|         while ((tag = av_dict_get(frame->metadata, "", tag, AV_DICT_IGNORE_SUFFIX))) { | ||||
|             char key[256]; | ||||
|             STRCPY_TOLOWER(key, tag->key) | ||||
| 
 | ||||
|             if (strcmp(key, "artist") == 0) { | ||||
|                 append_tag_meta_if_not_exists(ctx, doc, tag, MetaArtist); | ||||
|             } else if (strcmp(key, "imagedescription") == 0) { | ||||
|                 APPEND_TAG_META(MetaContent) | ||||
|             } else if (strcmp(key, "make") == 0) { | ||||
|                 APPEND_TAG_META(MetaExifMake) | ||||
|             } else if (strcmp(key, "model") == 0) { | ||||
|                 APPEND_TAG_META(MetaExifModel) | ||||
|             } else if (strcmp(key, "software") == 0) { | ||||
|                 APPEND_TAG_META(MetaExifSoftware) | ||||
|             } else if (strcmp(key, "fnumber") == 0) { | ||||
|                 APPEND_TAG_META(MetaExifFNumber) | ||||
|             } else if (strcmp(key, "focallength") == 0) { | ||||
|                 APPEND_TAG_META(MetaExifFocalLength) | ||||
|             } else if (strcmp(key, "usercomment") == 0) { | ||||
|                 APPEND_TAG_META(MetaExifUserComment) | ||||
|             } else if (strcmp(key, "isospeedratings") == 0) { | ||||
|                 APPEND_TAG_META(MetaExifIsoSpeedRatings) | ||||
|             } else if (strcmp(key, "exposuretime") == 0) { | ||||
|                 APPEND_TAG_META(MetaExifExposureTime) | ||||
|             } else if (strcmp(key, "datetime") == 0) { | ||||
|                 APPEND_TAG_META(MetaExifDateTime) | ||||
|             } else if (strcmp(key, "gpslatitude") == 0) { | ||||
|                 APPEND_TAG_META(MetaExifGpsLatitudeDMS) | ||||
|             } else if (strcmp(key, "gpslatituderef") == 0) { | ||||
|                 APPEND_TAG_META(MetaExifGpsLatitudeRef) | ||||
|             } else if (strcmp(key, "gpslongitude") == 0) { | ||||
|                 APPEND_TAG_META(MetaExifGpsLongitudeDMS) | ||||
|             } else if (strcmp(key, "gpslongituderef") == 0) { | ||||
|                 APPEND_TAG_META(MetaExifGpsLongitudeRef) | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| void parse_media_format_ctx(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx, document_t *doc) { | ||||
| 
 | ||||
|     int video_stream = -1; | ||||
|     int audio_stream = -1; | ||||
|     int subtitle_stream = -1; | ||||
| 
 | ||||
|     avformat_find_stream_info(pFormatCtx, NULL); | ||||
| 
 | ||||
|     for (int i = (int) pFormatCtx->nb_streams - 1; i >= 0; i--) { | ||||
|         AVStream *stream = pFormatCtx->streams[i]; | ||||
| 
 | ||||
|         if (stream->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) { | ||||
|             if (audio_stream == -1) { | ||||
|                 const AVCodecDescriptor *desc = avcodec_descriptor_get(stream->codecpar->codec_id); | ||||
| 
 | ||||
|                 if (desc != NULL) { | ||||
|                     APPEND_STR_META(doc, MetaMediaAudioCodec, desc->name) | ||||
|                 } | ||||
| 
 | ||||
|                 audio_stream = i; | ||||
|             } | ||||
|         } else if (stream->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) { | ||||
| 
 | ||||
|             if (video_stream == -1) { | ||||
|                 const AVCodecDescriptor *desc = avcodec_descriptor_get(stream->codecpar->codec_id); | ||||
| 
 | ||||
|                 if (desc != NULL) { | ||||
|                     APPEND_STR_META(doc, MetaMediaVideoCodec, desc->name) | ||||
|                 } | ||||
| 
 | ||||
|                 meta_line_t *meta_w = malloc(sizeof(meta_line_t)); | ||||
|                 meta_w->key = MetaWidth; | ||||
|                 meta_w->long_val = stream->codecpar->width; | ||||
|                 APPEND_META(doc, meta_w) | ||||
| 
 | ||||
|                 meta_line_t *meta_h = malloc(sizeof(meta_line_t)); | ||||
|                 meta_h->key = MetaHeight; | ||||
|                 meta_h->long_val = stream->codecpar->height; | ||||
|                 APPEND_META(doc, meta_h) | ||||
| 
 | ||||
|                 video_stream = i; | ||||
|             } | ||||
|         } else if (stream->codecpar->codec_type == AVMEDIA_TYPE_SUBTITLE) { | ||||
|             subtitle_stream = i; | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     if (subtitle_stream != -1 && ctx->read_subtitles) { | ||||
|         read_subtitles(ctx, pFormatCtx, subtitle_stream, doc); | ||||
| 
 | ||||
|         // Reset stream
 | ||||
|         if (video_stream != -1) { | ||||
|             av_seek_frame(pFormatCtx, video_stream, 0, 0); | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     if (audio_stream != -1) { | ||||
|         append_audio_meta(pFormatCtx, doc); | ||||
|     } | ||||
| 
 | ||||
|     if (video_stream != -1 && ctx->tn_size > 0) { | ||||
|         AVStream *stream = pFormatCtx->streams[video_stream]; | ||||
| 
 | ||||
|         if (stream->codecpar->width <= MIN_SIZE || stream->codecpar->height <= MIN_SIZE) { | ||||
|             avformat_close_input(&pFormatCtx); | ||||
|             avformat_free_context(pFormatCtx); | ||||
|             return; | ||||
|         } | ||||
| 
 | ||||
|         // Decoder
 | ||||
|         AVCodec *video_codec = avcodec_find_decoder(stream->codecpar->codec_id); | ||||
|         AVCodecContext *decoder = avcodec_alloc_context3(video_codec); | ||||
|         avcodec_parameters_to_context(decoder, stream->codecpar); | ||||
|         avcodec_open2(decoder, video_codec, NULL); | ||||
| 
 | ||||
|         //Seek
 | ||||
|         if (stream->nb_frames > 1 && stream->codecpar->codec_id != AV_CODEC_ID_GIF) { | ||||
|             int seek_ret; | ||||
|             for (int i = 20; i >= 0; i--) { | ||||
|                 seek_ret = av_seek_frame(pFormatCtx, video_stream, | ||||
|                                          stream->duration * 0.10, 0); | ||||
|                 if (seek_ret == 0) { | ||||
|                     break; | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
| 
 | ||||
|         frame_and_packet_t *frame_and_packet = read_frame(ctx, pFormatCtx, decoder, video_stream, doc); | ||||
|         if (frame_and_packet == NULL) { | ||||
|             avcodec_free_context(&decoder); | ||||
|             avformat_close_input(&pFormatCtx); | ||||
|             avformat_free_context(pFormatCtx); | ||||
|             return; | ||||
|         } | ||||
| 
 | ||||
|         append_video_meta(ctx, pFormatCtx, frame_and_packet->frame, doc, IS_VIDEO(pFormatCtx)); | ||||
| 
 | ||||
|         // Scale frame
 | ||||
|         AVFrame *scaled_frame = scale_frame(decoder, frame_and_packet->frame, ctx->tn_size); | ||||
| 
 | ||||
|         if (scaled_frame == NULL) { | ||||
|             frame_and_packet_free(frame_and_packet); | ||||
|             avcodec_free_context(&decoder); | ||||
|             avformat_close_input(&pFormatCtx); | ||||
|             avformat_free_context(pFormatCtx); | ||||
|             return; | ||||
|         } | ||||
| 
 | ||||
|         if (scaled_frame == STORE_AS_IS) { | ||||
|             APPEND_TN_META(doc, frame_and_packet->frame->width, frame_and_packet->frame->height) | ||||
|             ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) frame_and_packet->packet->data, | ||||
|                        frame_and_packet->packet->size); | ||||
|         } else { | ||||
|             // Encode frame to jpeg
 | ||||
|             AVCodecContext *jpeg_encoder = alloc_jpeg_encoder(scaled_frame->width, scaled_frame->height, | ||||
|                                                               ctx->tn_qscale); | ||||
|             avcodec_send_frame(jpeg_encoder, scaled_frame); | ||||
| 
 | ||||
|             AVPacket jpeg_packet; | ||||
|             av_init_packet(&jpeg_packet); | ||||
|             avcodec_receive_packet(jpeg_encoder, &jpeg_packet); | ||||
| 
 | ||||
|             // Save thumbnail
 | ||||
|             APPEND_TN_META(doc, scaled_frame->width, scaled_frame->height) | ||||
|             ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) jpeg_packet.data, jpeg_packet.size); | ||||
| 
 | ||||
|             avcodec_free_context(&jpeg_encoder); | ||||
|             av_packet_unref(&jpeg_packet); | ||||
|             av_free(*scaled_frame->data); | ||||
|             av_frame_free(&scaled_frame); | ||||
|         } | ||||
| 
 | ||||
|         frame_and_packet_free(frame_and_packet); | ||||
|         avcodec_free_context(&decoder); | ||||
|     } | ||||
| 
 | ||||
|     avformat_close_input(&pFormatCtx); | ||||
|     avformat_free_context(pFormatCtx); | ||||
| } | ||||
| 
 | ||||
| void parse_media_filename(scan_media_ctx_t *ctx, const char *filepath, document_t *doc) { | ||||
| 
 | ||||
|     AVFormatContext *pFormatCtx = avformat_alloc_context(); | ||||
|     if (pFormatCtx == NULL) { | ||||
|         CTX_LOG_ERROR(doc->filepath, "(media.c) Could not allocate context with avformat_alloc_context()") | ||||
|         return; | ||||
|     } | ||||
|     int res = avformat_open_input(&pFormatCtx, filepath, NULL, NULL); | ||||
|     if (res < 0) { | ||||
|         CTX_LOG_ERRORF(doc->filepath, "(media.c) avformat_open_input() returned [%d] %s", res, av_err2str(res)) | ||||
|         avformat_close_input(&pFormatCtx); | ||||
|         avformat_free_context(pFormatCtx); | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     parse_media_format_ctx(ctx, pFormatCtx, doc); | ||||
| } | ||||
| 
 | ||||
| int vfile_read(void *ptr, uint8_t *buf, int buf_size) { | ||||
|     struct vfile *f = ptr; | ||||
| 
 | ||||
|     int ret = f->read(f, buf, buf_size); | ||||
| 
 | ||||
|     if (ret == 0) { | ||||
|         return AVERROR_EOF; | ||||
|     } | ||||
|     return ret; | ||||
| } | ||||
| 
 | ||||
| typedef struct { | ||||
|     size_t size; | ||||
|     FILE *file; | ||||
|     void *buf; | ||||
| } memfile_t; | ||||
| 
 | ||||
| int memfile_read(void *ptr, uint8_t *buf, int buf_size) { | ||||
|     memfile_t *mem = ptr; | ||||
| 
 | ||||
|     size_t ret = fread(buf, 1, buf_size, mem->file); | ||||
| 
 | ||||
|     if (ret == 0 && feof(mem->file)) { | ||||
|         return AVERROR_EOF; | ||||
|     } | ||||
| 
 | ||||
|     return (int) ret; | ||||
| } | ||||
| 
 | ||||
| long memfile_seek(void *ptr, long offset, int whence) { | ||||
|     memfile_t *mem = ptr; | ||||
| 
 | ||||
|     if (whence == 0x10000) { | ||||
|         return mem->size; | ||||
|     } | ||||
| 
 | ||||
|     int ret = fseek(mem->file, offset, whence); | ||||
|     if (ret != 0) { | ||||
|         return AVERROR_EOF; | ||||
|     } | ||||
| 
 | ||||
|     return ftell(mem->file); | ||||
| } | ||||
| 
 | ||||
| int memfile_open(vfile_t *f, memfile_t *mem) { | ||||
|     mem->size = f->info.st_size; | ||||
| 
 | ||||
|     mem->buf = malloc(mem->size); | ||||
|     if (mem->buf == NULL) { | ||||
|         return -1; | ||||
|     } | ||||
| 
 | ||||
|     int ret = f->read(f, mem->buf, mem->size); | ||||
|     mem->file = fmemopen(mem->buf, mem->size, "rb"); | ||||
| 
 | ||||
|     if (f->calculate_checksum) { | ||||
|         SHA1_Init(&f->sha1_ctx); | ||||
|         safe_sha1_update(&f->sha1_ctx, mem->buf, mem->size); | ||||
|         SHA1_Final(f->sha1_digest, &f->sha1_ctx); | ||||
|         f->has_checksum = TRUE; | ||||
|     } | ||||
| 
 | ||||
|     return (ret == mem->size && mem->file != NULL) ? 0 : -1; | ||||
| } | ||||
| 
 | ||||
| int memfile_open_buf(void *buf, size_t buf_len, memfile_t *mem) { | ||||
|     mem->size = (int) buf_len; | ||||
| 
 | ||||
|     mem->buf = buf; | ||||
|     mem->file = fmemopen(mem->buf, mem->size, "rb"); | ||||
| 
 | ||||
|     return mem->file != NULL ? 0 : -1; | ||||
| } | ||||
| 
 | ||||
| void memfile_close(memfile_t *mem) { | ||||
|     if (mem->buf != NULL) { | ||||
|         free(mem->buf); | ||||
|         fclose(mem->file); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| void parse_media_vfile(scan_media_ctx_t *ctx, struct vfile *f, document_t *doc, const char *mime_str) { | ||||
| 
 | ||||
|     AVFormatContext *pFormatCtx = avformat_alloc_context(); | ||||
|     if (pFormatCtx == NULL) { | ||||
|         CTX_LOG_ERROR(doc->filepath, "(media.c) Could not allocate context with avformat_alloc_context()") | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     unsigned char *buffer = (unsigned char *) av_malloc(AVIO_BUF_SIZE); | ||||
|     AVIOContext *io_ctx = NULL; | ||||
|     memfile_t memfile = {0, 0, 0}; | ||||
| 
 | ||||
|     const char *filepath = get_filepath_with_ext(doc, f->filepath, mime_str); | ||||
| 
 | ||||
|     if (f->info.st_size <= ctx->max_media_buffer) { | ||||
|         int ret = memfile_open(f, &memfile); | ||||
|         if (ret == 0) { | ||||
|             CTX_LOG_DEBUGF(f->filepath, "Loading media file in memory (%ldB)", f->info.st_size) | ||||
|             io_ctx = avio_alloc_context(buffer, AVIO_BUF_SIZE, 0, &memfile, memfile_read, NULL, memfile_seek); | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     if (io_ctx == NULL) { | ||||
|         CTX_LOG_DEBUGF(f->filepath, "Reading media file without seek support", f->info.st_size) | ||||
|         io_ctx = avio_alloc_context(buffer, AVIO_BUF_SIZE, 0, f, vfile_read, NULL, NULL); | ||||
|     } | ||||
| 
 | ||||
|     pFormatCtx->pb = io_ctx; | ||||
| 
 | ||||
|     int res = avformat_open_input(&pFormatCtx, filepath, NULL, NULL); | ||||
|     if (res < 0) { | ||||
|         if (res != -5) { | ||||
|             CTX_LOG_ERRORF(doc->filepath, "(media.c) avformat_open_input() returned [%d] %s", res, av_err2str(res)) | ||||
|         } | ||||
|         av_free(io_ctx->buffer); | ||||
|         memfile_close(&memfile); | ||||
|         avio_context_free(&io_ctx); | ||||
|         avformat_close_input(&pFormatCtx); | ||||
|         avformat_free_context(pFormatCtx); | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     parse_media_format_ctx(ctx, pFormatCtx, doc); | ||||
|     av_free(io_ctx->buffer); | ||||
|     avio_context_free(&io_ctx); | ||||
|     memfile_close(&memfile); | ||||
| } | ||||
| 
 | ||||
| void parse_media(scan_media_ctx_t *ctx, vfile_t *f, document_t *doc, const char *mime_str) { | ||||
| 
 | ||||
|     if (f->is_fs_file) { | ||||
|         parse_media_filename(ctx, f->filepath, doc); | ||||
|     } else { | ||||
|         parse_media_vfile(ctx, f, doc, mime_str); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| void init_media() { | ||||
|     av_log_set_level(AV_LOG_QUIET); | ||||
| } | ||||
| 
 | ||||
| int store_image_thumbnail(scan_media_ctx_t *ctx, void *buf, size_t buf_len, document_t *doc, const char *url) { | ||||
|     memfile_t memfile = {0, 0, 0}; | ||||
|     AVIOContext *io_ctx = NULL; | ||||
| 
 | ||||
|     AVFormatContext *pFormatCtx = avformat_alloc_context(); | ||||
|     if (pFormatCtx == NULL) { | ||||
|         CTX_LOG_ERROR(doc->filepath, "(media.c) Could not allocate context with avformat_alloc_context()") | ||||
|         return FALSE; | ||||
|     } | ||||
| 
 | ||||
|     unsigned char *buffer = (unsigned char *) av_malloc(AVIO_BUF_SIZE); | ||||
| 
 | ||||
|     int ret = memfile_open_buf(buf, buf_len, &memfile); | ||||
|     if (ret == 0) { | ||||
|         CTX_LOG_DEBUGF(doc->filepath, "Loading media file in memory (%ldB)", buf_len) | ||||
|         io_ctx = avio_alloc_context(buffer, AVIO_BUF_SIZE, 0, &memfile, memfile_read, NULL, memfile_seek); | ||||
|     } else { | ||||
|         avformat_close_input(&pFormatCtx); | ||||
|         avformat_free_context(pFormatCtx); | ||||
|         fclose(memfile.file); | ||||
|         return FALSE; | ||||
|     } | ||||
| 
 | ||||
|     pFormatCtx->pb = io_ctx; | ||||
| 
 | ||||
|     int res = avformat_open_input(&pFormatCtx, url, NULL, NULL); | ||||
|     if (res != 0) { | ||||
|         av_free(io_ctx->buffer); | ||||
|         avformat_close_input(&pFormatCtx); | ||||
|         avformat_free_context(pFormatCtx); | ||||
|         avio_context_free(&io_ctx); | ||||
|         fclose(memfile.file); | ||||
|         return FALSE; | ||||
|     } | ||||
| 
 | ||||
|     AVStream *stream = pFormatCtx->streams[0]; | ||||
| 
 | ||||
|     // Decoder
 | ||||
|     const AVCodec *video_codec = avcodec_find_decoder(stream->codecpar->codec_id); | ||||
|     AVCodecContext *decoder = avcodec_alloc_context3(video_codec); | ||||
|     avcodec_parameters_to_context(decoder, stream->codecpar); | ||||
|     avcodec_open2(decoder, video_codec, NULL); | ||||
| 
 | ||||
|     frame_and_packet_t *frame_and_packet = read_frame(ctx, pFormatCtx, decoder, 0, doc); | ||||
|     if (frame_and_packet == NULL) { | ||||
|         avcodec_free_context(&decoder); | ||||
|         avformat_close_input(&pFormatCtx); | ||||
|         avformat_free_context(pFormatCtx); | ||||
|         av_free(io_ctx->buffer); | ||||
|         avio_context_free(&io_ctx); | ||||
|         fclose(memfile.file); | ||||
|         return FALSE; | ||||
|     } | ||||
| 
 | ||||
|     // Scale frame
 | ||||
|     AVFrame *scaled_frame = scale_frame(decoder, frame_and_packet->frame, ctx->tn_size); | ||||
| 
 | ||||
|     if (scaled_frame == NULL) { | ||||
|         frame_and_packet_free(frame_and_packet); | ||||
|         avcodec_free_context(&decoder); | ||||
|         avformat_close_input(&pFormatCtx); | ||||
|         avformat_free_context(pFormatCtx); | ||||
|         av_free(io_ctx->buffer); | ||||
|         avio_context_free(&io_ctx); | ||||
|         fclose(memfile.file); | ||||
|         return FALSE; | ||||
|     } | ||||
| 
 | ||||
|     if (scaled_frame == STORE_AS_IS) { | ||||
|         APPEND_TN_META(doc, frame_and_packet->frame->width, frame_and_packet->frame->height) | ||||
|         ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) frame_and_packet->packet->data, | ||||
|                    frame_and_packet->packet->size); | ||||
|     } else { | ||||
|         // Encode frame to jpeg
 | ||||
|         AVCodecContext *jpeg_encoder = alloc_jpeg_encoder(scaled_frame->width, scaled_frame->height, | ||||
|                                                           ctx->tn_qscale); | ||||
|         avcodec_send_frame(jpeg_encoder, scaled_frame); | ||||
| 
 | ||||
|         AVPacket jpeg_packet; | ||||
|         av_init_packet(&jpeg_packet); | ||||
|         avcodec_receive_packet(jpeg_encoder, &jpeg_packet); | ||||
| 
 | ||||
|         // Save thumbnail
 | ||||
|         APPEND_TN_META(doc, scaled_frame->width, scaled_frame->height) | ||||
|         ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) jpeg_packet.data, jpeg_packet.size); | ||||
| 
 | ||||
|         av_packet_unref(&jpeg_packet); | ||||
|         avcodec_free_context(&jpeg_encoder); | ||||
|         av_free(*scaled_frame->data); | ||||
|         av_frame_free(&scaled_frame); | ||||
|     } | ||||
| 
 | ||||
|     frame_and_packet_free(frame_and_packet); | ||||
|     avcodec_free_context(&decoder); | ||||
| 
 | ||||
|     avformat_close_input(&pFormatCtx); | ||||
|     avformat_free_context(pFormatCtx); | ||||
| 
 | ||||
|     av_free(io_ctx->buffer); | ||||
|     avio_context_free(&io_ctx); | ||||
|     fclose(memfile.file); | ||||
| 
 | ||||
|     return TRUE; | ||||
| } | ||||
							
								
								
									
										52
									
								
								third-party/libscan/libscan/media/media.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										52
									
								
								third-party/libscan/libscan/media/media.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,52 @@ | ||||
| #ifndef SIST2_MEDIA_H | ||||
| #define SIST2_MEDIA_H | ||||
| 
 | ||||
| 
 | ||||
| #include "../scan.h" | ||||
| 
 | ||||
| #include "libavformat/avformat.h" | ||||
| #include "libswscale/swscale.h" | ||||
| #include "libswresample/swresample.h" | ||||
| #include "libavcodec/avcodec.h" | ||||
| #include "libavutil/imgutils.h" | ||||
| 
 | ||||
| typedef struct { | ||||
|     log_callback_t log; | ||||
|     logf_callback_t logf; | ||||
|     store_callback_t store; | ||||
| 
 | ||||
|     int tn_size; | ||||
|     float tn_qscale; | ||||
|     long max_media_buffer; | ||||
|     int read_subtitles; | ||||
| } scan_media_ctx_t; | ||||
| 
 | ||||
| __always_inline | ||||
| static AVCodecContext *alloc_jpeg_encoder(int w, int h, float qscale) { | ||||
| 
 | ||||
|     const AVCodec *jpeg_codec = avcodec_find_encoder(AV_CODEC_ID_MJPEG); | ||||
|     AVCodecContext *jpeg = avcodec_alloc_context3(jpeg_codec); | ||||
|     jpeg->width = w; | ||||
|     jpeg->height = h; | ||||
|     jpeg->time_base.den = 1000000; | ||||
|     jpeg->time_base.num = 1; | ||||
|     jpeg->i_quant_factor = qscale; | ||||
| 
 | ||||
|     jpeg->pix_fmt = AV_PIX_FMT_YUVJ420P; | ||||
|     int ret = avcodec_open2(jpeg, jpeg_codec, NULL); | ||||
| 
 | ||||
|     if (ret != 0) { | ||||
|         return NULL; | ||||
|     } | ||||
| 
 | ||||
|     return jpeg; | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| void parse_media(scan_media_ctx_t *ctx, vfile_t *f, document_t *doc, const char*mime_str); | ||||
| 
 | ||||
| void init_media(); | ||||
| 
 | ||||
| int store_image_thumbnail(scan_media_ctx_t *ctx, void *buf, size_t buf_len, document_t *doc, const char *url); | ||||
| 
 | ||||
| #endif | ||||
							
								
								
									
										79
									
								
								third-party/libscan/libscan/mobi/scan_mobi.c
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										79
									
								
								third-party/libscan/libscan/mobi/scan_mobi.c
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,79 @@ | ||||
| #include "scan_mobi.h" | ||||
| 
 | ||||
| #include <mobi.h> | ||||
| #include <errno.h> | ||||
| #include "stdlib.h" | ||||
| 
 | ||||
| void parse_mobi(scan_mobi_ctx_t *ctx, vfile_t *f, document_t *doc) { | ||||
| 
 | ||||
|     MOBIData *m = mobi_init(); | ||||
|     if (m == NULL) { | ||||
|         CTX_LOG_ERROR(f->filepath, "mobi_init() failed") | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     size_t buf_len; | ||||
|     char* buf = read_all(f, &buf_len); | ||||
|     if (buf == NULL) { | ||||
|         mobi_free(m); | ||||
|         CTX_LOG_ERROR(f->filepath, "read_all() failed") | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     FILE *file = fmemopen(buf, buf_len, "rb"); | ||||
|     if (file == NULL) { | ||||
|         mobi_free(m); | ||||
|         free(buf); | ||||
|         CTX_LOG_ERRORF(f->filepath, "fmemopen() failed (%d)", errno) | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     MOBI_RET mobi_ret = mobi_load_file(m, file); | ||||
|     fclose(file); | ||||
|     if (mobi_ret != MOBI_SUCCESS) { | ||||
|         mobi_free(m); | ||||
|         free(buf); | ||||
|         CTX_LOG_ERRORF(f->filepath, "mobi_laod_file() returned error code [%d]", mobi_ret) | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     char *author = mobi_meta_get_author(m); | ||||
|     if (author != NULL) { | ||||
|         APPEND_STR_META(doc, MetaAuthor, author) | ||||
|         free(author); | ||||
|     } | ||||
|     char *title = mobi_meta_get_title(m); | ||||
|     if (title != NULL) { | ||||
|         APPEND_STR_META(doc, MetaTitle, title) | ||||
|         free(title); | ||||
|     } | ||||
| 
 | ||||
|     const size_t maxlen = mobi_get_text_maxsize(m); | ||||
|     if (maxlen == MOBI_NOTSET) { | ||||
|         free(buf); | ||||
|         CTX_LOG_DEBUGF("%s", "Invalid text maxsize: %zu", maxlen) | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     char *content_str = malloc(maxlen + 1); | ||||
|     size_t length = maxlen; | ||||
|     mobi_ret = mobi_get_rawml(m, content_str, &length); | ||||
|     if (mobi_ret != MOBI_SUCCESS) { | ||||
|         mobi_free(m); | ||||
|         free(content_str); | ||||
|         free(buf); | ||||
|         CTX_LOG_ERRORF(f->filepath, "mobi_get_rawml() returned error code [%d]", mobi_ret) | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     text_buffer_t tex = text_buffer_create(ctx->content_size); | ||||
|     text_buffer_append_markup(&tex, content_str); | ||||
|     text_buffer_terminate_string(&tex); | ||||
| 
 | ||||
|     APPEND_STR_META(doc, MetaContent, tex.dyn_buffer.buf) | ||||
| 
 | ||||
|     free(content_str); | ||||
|     free(buf); | ||||
|     text_buffer_destroy(&tex); | ||||
|     mobi_free(m); | ||||
| } | ||||
							
								
								
									
										14
									
								
								third-party/libscan/libscan/mobi/scan_mobi.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										14
									
								
								third-party/libscan/libscan/mobi/scan_mobi.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,14 @@ | ||||
| #ifndef SCAN_SCAN_MOBI_H | ||||
| #define SCAN_SCAN_MOBI_H | ||||
| 
 | ||||
| #include "../scan.h" | ||||
| 
 | ||||
| typedef struct { | ||||
|     long content_size; | ||||
|     log_callback_t log; | ||||
|     logf_callback_t logf; | ||||
| } scan_mobi_ctx_t; | ||||
| 
 | ||||
| void parse_mobi(scan_mobi_ctx_t *ctx, vfile_t *f, document_t *doc); | ||||
| 
 | ||||
| #endif | ||||
							
								
								
									
										147
									
								
								third-party/libscan/libscan/msdoc/msdoc.c
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										147
									
								
								third-party/libscan/libscan/msdoc/msdoc.c
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,147 @@ | ||||
| #include "msdoc.h" | ||||
| #include <errno.h> | ||||
| 
 | ||||
| #include <sys/mman.h> | ||||
| #include "../../third-party/antiword/src/antiword.h" | ||||
| 
 | ||||
| #include "../ebook/ebook.h" | ||||
| 
 | ||||
| void parse_msdoc_text(scan_msdoc_ctx_t *ctx, document_t *doc, FILE *file_in, void *buf, size_t buf_len) { | ||||
| 
 | ||||
|     // Open word doc
 | ||||
|     options_type *opts = direct_vGetOptions(); | ||||
|     opts->iParagraphBreak = 74; | ||||
|     opts->eConversionType = conversion_text; | ||||
|     opts->bHideHiddenText = 1; | ||||
|     opts->bRemoveRemovedText = 1; | ||||
|     opts->bUseLandscape = 0; | ||||
|     opts->eEncoding = encoding_utf_8; | ||||
|     opts->iPageHeight = 842; // A4
 | ||||
|     opts->iPageWidth = 595; | ||||
|     opts->eImageLevel = level_ps_3; | ||||
| 
 | ||||
|     int doc_word_version = iGuessVersionNumber(file_in, (int) buf_len); | ||||
|     if (doc_word_version < 0 || doc_word_version == 3) { | ||||
|         free(buf); | ||||
|         return; | ||||
|     } | ||||
|     rewind(file_in); | ||||
| 
 | ||||
|     size_t out_len; | ||||
|     char *out_buf; | ||||
| 
 | ||||
|     FILE *file_out = open_memstream(&out_buf, &out_len); | ||||
| 
 | ||||
|     diagram_type *diag = pCreateDiagram("antiword", NULL, file_out); | ||||
|     if (diag == NULL) { | ||||
|         fclose(file_in); | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     iInitDocument(file_in, (int) buf_len); | ||||
|     const char *author = szGetAuthor(); | ||||
|     if (author != NULL) { | ||||
|         APPEND_UTF8_META(doc, MetaAuthor, author) | ||||
|     } | ||||
| 
 | ||||
|     const char *title = szGetTitle(); | ||||
|     if (title != NULL) { | ||||
|         APPEND_UTF8_META(doc, MetaTitle, title) | ||||
|     } | ||||
|     vFreeDocument(); | ||||
| 
 | ||||
|     bWordDecryptor(file_in, (int) buf_len, diag); | ||||
|     vDestroyDiagram(diag); | ||||
|     fclose(file_out); | ||||
| 
 | ||||
|     if (buf_len > 0) { | ||||
|         text_buffer_t tex = text_buffer_create(ctx->content_size); | ||||
|         text_buffer_append_string(&tex, out_buf, out_len); | ||||
|         text_buffer_terminate_string(&tex); | ||||
| 
 | ||||
|         meta_line_t *meta_content = malloc(sizeof(meta_line_t) + tex.dyn_buffer.cur); | ||||
|         meta_content->key = MetaContent; | ||||
|         memcpy(meta_content->str_val, tex.dyn_buffer.buf, tex.dyn_buffer.cur); | ||||
|         APPEND_META(doc, meta_content) | ||||
| 
 | ||||
|         text_buffer_destroy(&tex); | ||||
|     } | ||||
| 
 | ||||
|     free(buf); | ||||
|     free(out_buf); | ||||
| } | ||||
| 
 | ||||
| void parse_msdoc_pdf(scan_msdoc_ctx_t *ctx, document_t *doc, FILE *file, void *buf, size_t buf_len) { | ||||
| 
 | ||||
|     scan_ebook_ctx_t ebook_ctx = { | ||||
|             .content_size = ctx->content_size, | ||||
|             .tn_size = ctx->tn_size, | ||||
|             .log = ctx->log, | ||||
|             .logf = ctx->logf, | ||||
|             .store = ctx->store, | ||||
|     }; | ||||
| 
 | ||||
|     // Open word doc
 | ||||
|     options_type *opts = direct_vGetOptions(); | ||||
|     opts->iParagraphBreak = 74; | ||||
|     opts->eConversionType = conversion_pdf; | ||||
|     opts->bHideHiddenText = 1; | ||||
|     opts->bRemoveRemovedText = 1; | ||||
|     opts->bUseLandscape = 0; | ||||
|     opts->eEncoding = encoding_latin_1; | ||||
|     opts->iPageHeight = 842; // A4
 | ||||
|     opts->iPageWidth = 595; | ||||
|     opts->eImageLevel = level_ps_3; | ||||
| 
 | ||||
|     int doc_word_version = iGuessVersionNumber(file, (int) buf_len); | ||||
|     if (doc_word_version < 0 || doc_word_version == 3) { | ||||
|         free(buf); | ||||
|         return; | ||||
|     } | ||||
|     rewind(file); | ||||
| 
 | ||||
|     size_t out_len; | ||||
|     char *out_buf; | ||||
| 
 | ||||
|     FILE *file_out = open_memstream(&out_buf, &out_len); | ||||
| 
 | ||||
|     diagram_type *diag = pCreateDiagram("antiword", NULL, file_out); | ||||
|     if (diag == NULL) { | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     bWordDecryptor(file, (int) buf_len, diag); | ||||
|     vDestroyDiagram(diag); | ||||
| 
 | ||||
|     fclose(file_out); | ||||
| 
 | ||||
|     parse_ebook_mem(&ebook_ctx, out_buf, out_len, "application/pdf", doc, TRUE); | ||||
| 
 | ||||
|     free(buf); | ||||
|     free(out_buf); | ||||
| } | ||||
| 
 | ||||
| void parse_msdoc(scan_msdoc_ctx_t *ctx, vfile_t *f, document_t *doc) { | ||||
| 
 | ||||
|     size_t buf_len; | ||||
|     char *buf = read_all(f, &buf_len); | ||||
|     if (buf == NULL) { | ||||
|         CTX_LOG_ERROR(f->filepath, "read_all() failed") | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     FILE *file = fmemopen(buf, buf_len, "rb"); | ||||
|     if (file == NULL) { | ||||
|         free(buf); | ||||
|         CTX_LOG_ERRORF(f->filepath, "fmemopen() failed (%d)", errno) | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     if (ctx->tn_size > 0) { | ||||
|         char *buf_pdf = malloc(buf_len); | ||||
|         memcpy(buf_pdf, buf, buf_len); | ||||
|         parse_msdoc_pdf(ctx, doc, file, buf_pdf, buf_len); | ||||
|     } | ||||
|     parse_msdoc_text(ctx, doc, file, buf, buf_len); | ||||
|     fclose(file); | ||||
| } | ||||
							
								
								
									
										24
									
								
								third-party/libscan/libscan/msdoc/msdoc.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										24
									
								
								third-party/libscan/libscan/msdoc/msdoc.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,24 @@ | ||||
| #ifndef SCAN_SCAN_MSDOC_H | ||||
| #define SCAN_SCAN_MSDOC_H | ||||
| 
 | ||||
| #include "../scan.h" | ||||
| 
 | ||||
| typedef struct { | ||||
|     long content_size; | ||||
|     int tn_size; | ||||
|     log_callback_t log; | ||||
|     logf_callback_t logf; | ||||
|     store_callback_t store; | ||||
|     unsigned int msdoc_mime; | ||||
| } scan_msdoc_ctx_t; | ||||
| 
 | ||||
| __always_inline | ||||
| static int is_msdoc(scan_msdoc_ctx_t *ctx, unsigned int mime) { | ||||
|     return mime == ctx->msdoc_mime; | ||||
| } | ||||
| 
 | ||||
| void parse_msdoc(scan_msdoc_ctx_t *ctx, vfile_t *f, document_t *doc); | ||||
| 
 | ||||
| void parse_msdoc_text(scan_msdoc_ctx_t *ctx, document_t *doc, FILE *file_in, void* buf, size_t buf_len); | ||||
| 
 | ||||
| #endif | ||||
							
								
								
									
										260
									
								
								third-party/libscan/libscan/ooxml/ooxml.c
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										260
									
								
								third-party/libscan/libscan/ooxml/ooxml.c
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,260 @@ | ||||
| #include "ooxml.h" | ||||
| 
 | ||||
| #include <archive.h> | ||||
| #include <archive_entry.h> | ||||
| #include <libxml/xmlstring.h> | ||||
| #include <libxml/parser.h> | ||||
| 
 | ||||
| #define _X(str) ((const xmlChar*)str) | ||||
| 
 | ||||
| __always_inline | ||||
| static int should_read_part(const char *part) { | ||||
| 
 | ||||
|     if (part == NULL) { | ||||
|         return FALSE; | ||||
|     } | ||||
| 
 | ||||
|     if (    // Word
 | ||||
|             STR_STARTS_WITH(part, "word/document.xml") | ||||
|             || STR_STARTS_WITH(part, "word/footnotes.xml") | ||||
|             || STR_STARTS_WITH(part, "word/endnotes.xml") | ||||
|             || STR_STARTS_WITH(part, "word/footer") | ||||
|             || STR_STARTS_WITH(part, "word/header") | ||||
|             // PowerPoint
 | ||||
|             || STR_STARTS_WITH(part, "ppt/slides/slide") | ||||
|             || STR_STARTS_WITH(part, "ppt/notesSlides/slide") | ||||
|             // Excel
 | ||||
|             || STR_STARTS_WITH(part, "xl/worksheets/sheet") | ||||
|             || STR_STARTS_WITH(part, "xl/sharedStrings.xml") | ||||
|             || STR_STARTS_WITH(part, "xl/workbook.xml") | ||||
|             ) { | ||||
|         return TRUE; | ||||
|     } | ||||
| 
 | ||||
|     return FALSE; | ||||
| } | ||||
| 
 | ||||
| int extract_text(scan_ooxml_ctx_t *ctx, xmlDoc *xml, xmlNode *node, text_buffer_t *buf) { | ||||
|     //TODO: Check which nodes are likely to have a 't' child, and ignore nodes that aren't
 | ||||
|     xmlErrorPtr err = xmlGetLastError(); | ||||
|     if (err != NULL) { | ||||
|         if (err->level == XML_ERR_FATAL) { | ||||
|             CTX_LOG_ERRORF("ooxml.c", "Got fatal XML error while parsing document: %s", err->message) | ||||
|             return -1; | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     for (xmlNode *child = node; child; child = child->next) { | ||||
|         if (child->name != NULL && *child->name == 't' && *(child->name + 1) == '\0') { | ||||
|             xmlChar *text = xmlNodeListGetString(xml, child->xmlChildrenNode, 1); | ||||
| 
 | ||||
|             if (text) { | ||||
|                 int ret = text_buffer_append_string0(buf, (char *) text); | ||||
|                 text_buffer_append_char(buf, ' '); | ||||
|                 xmlFree(text); | ||||
| 
 | ||||
|                 if (ret == TEXT_BUF_FULL) { | ||||
|                     return ret; | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
| 
 | ||||
|         if (extract_text(ctx, xml, child->children, buf) == TEXT_BUF_FULL) { | ||||
|             return TEXT_BUF_FULL; | ||||
|         } | ||||
|     } | ||||
|     return 0; | ||||
| } | ||||
| 
 | ||||
| int xml_io_read(void *context, char *buffer, int len) { | ||||
|     struct archive *a = context; | ||||
|     return (int) archive_read_data(a, buffer, len); | ||||
| } | ||||
| 
 | ||||
| int xml_io_close(UNUSED(void *context)) { | ||||
|     //noop
 | ||||
|     return 0; | ||||
| } | ||||
| 
 | ||||
| #define READ_PART_ERR (-2) | ||||
| 
 | ||||
| __always_inline | ||||
| static int read_part(scan_ooxml_ctx_t *ctx, struct archive *a, text_buffer_t *buf, document_t *doc) { | ||||
| 
 | ||||
|     xmlDoc *xml = xmlReadIO(xml_io_read, xml_io_close, a, "/", NULL, | ||||
|                             XML_PARSE_RECOVER | XML_PARSE_NOWARNING | XML_PARSE_NOERROR | XML_PARSE_NONET); | ||||
| 
 | ||||
|     if (xml == NULL) { | ||||
|         CTX_LOG_ERROR(doc->filepath, "Could not parse XML") | ||||
|         return READ_PART_ERR; | ||||
|     } | ||||
| 
 | ||||
|     xmlNode *root = xmlDocGetRootElement(xml); | ||||
|     if (root == NULL) { | ||||
|         CTX_LOG_ERROR(doc->filepath, "Empty document") | ||||
|         xmlFreeDoc(xml); | ||||
|         return READ_PART_ERR; | ||||
|     } | ||||
| 
 | ||||
|     int ret = extract_text(ctx, xml, root, buf); | ||||
|     xmlFreeDoc(xml); | ||||
| 
 | ||||
|     return ret; | ||||
| } | ||||
| 
 | ||||
| __always_inline | ||||
| static int read_doc_props_app(scan_ooxml_ctx_t *ctx, struct archive *a, document_t *doc) { | ||||
|     xmlDoc *xml = xmlReadIO(xml_io_read, xml_io_close, a, "/", NULL, | ||||
|                             XML_PARSE_RECOVER | XML_PARSE_NOWARNING | XML_PARSE_NOERROR | XML_PARSE_NONET); | ||||
| 
 | ||||
|     if (xml == NULL) { | ||||
|         CTX_LOG_ERROR(doc->filepath, "Could not parse XML") | ||||
|         return -1; | ||||
|     } | ||||
| 
 | ||||
|     xmlNode *root = xmlDocGetRootElement(xml); | ||||
|     if (root == NULL) { | ||||
|         CTX_LOG_ERROR(doc->filepath, "Empty document") | ||||
|         xmlFreeDoc(xml); | ||||
|         return -1; | ||||
|     } | ||||
| 
 | ||||
|     if (xmlStrEqual(root->name, _X("Properties"))) { | ||||
|         for (xmlNode *child = root->children; child; child = child->next) { | ||||
|             xmlChar *text = xmlNodeListGetString(xml, child->xmlChildrenNode, 1); | ||||
|             if (text == NULL) { | ||||
|                 continue; | ||||
|             } | ||||
| 
 | ||||
|             if (xmlStrEqual(child->name, _X("Pages"))) { | ||||
|                 APPEND_LONG_META(doc, MetaPages, strtol((char *) text, NULL, 10)) | ||||
|             } | ||||
| 
 | ||||
|             xmlFree(text); | ||||
|         } | ||||
|     } | ||||
|     xmlFreeDoc(xml); | ||||
| 
 | ||||
|     return 0; | ||||
| } | ||||
| 
 | ||||
| __always_inline | ||||
| static int read_doc_props(scan_ooxml_ctx_t *ctx, struct archive *a, document_t *doc) { | ||||
|     xmlDoc *xml = xmlReadIO(xml_io_read, xml_io_close, a, "/", NULL, | ||||
|                             XML_PARSE_RECOVER | XML_PARSE_NOWARNING | XML_PARSE_NOERROR | XML_PARSE_NONET); | ||||
| 
 | ||||
|     if (xml == NULL) { | ||||
|         CTX_LOG_ERROR(doc->filepath, "Could not parse XML") | ||||
|         return -1; | ||||
|     } | ||||
| 
 | ||||
|     xmlNode *root = xmlDocGetRootElement(xml); | ||||
|     if (root == NULL) { | ||||
|         CTX_LOG_ERROR(doc->filepath, "Empty document") | ||||
|         xmlFreeDoc(xml); | ||||
|         return -1; | ||||
|     } | ||||
| 
 | ||||
|     if (xmlStrEqual(root->name, _X("coreProperties"))) { | ||||
|         for (xmlNode *child = root->children; child; child = child->next) { | ||||
|             xmlChar *text = xmlNodeListGetString(xml, child->xmlChildrenNode, 1); | ||||
|             if (text == NULL) { | ||||
|                 continue; | ||||
|             } | ||||
| 
 | ||||
|             if (xmlStrEqual(child->name, _X("title"))) { | ||||
|                 APPEND_STR_META(doc, MetaTitle, (char *) text) | ||||
|             } else if (xmlStrEqual(child->name, _X("creator"))) { | ||||
|                 APPEND_STR_META(doc, MetaAuthor, (char *) text) | ||||
|             } else if (xmlStrEqual(child->name, _X("lastModifiedBy"))) { | ||||
|                 APPEND_STR_META(doc, MetaModifiedBy, (char *) text) | ||||
|             } | ||||
| 
 | ||||
|             xmlFree(text); | ||||
|         } | ||||
|     } | ||||
|     xmlFreeDoc(xml); | ||||
| 
 | ||||
|     return 0; | ||||
| } | ||||
| 
 | ||||
| #define MAX_TN_SIZE (1024 * 1024 * 15) | ||||
| 
 | ||||
| void read_thumbnail(scan_ooxml_ctx_t *ctx, document_t *doc, struct archive *a, struct archive_entry *entry) { | ||||
|     size_t entry_size = archive_entry_size(entry); | ||||
| 
 | ||||
|     if (entry_size <= 0 || entry_size > MAX_TN_SIZE) { | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     char *buf = malloc(entry_size); | ||||
|     archive_read_data(a, buf, entry_size); | ||||
| 
 | ||||
|     APPEND_TN_META(doc, 1, 1) // Size unknown
 | ||||
|     ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), buf, entry_size); | ||||
|     free(buf); | ||||
| } | ||||
| 
 | ||||
| void parse_ooxml(scan_ooxml_ctx_t *ctx, vfile_t *f, document_t *doc) { | ||||
| 
 | ||||
|     size_t buf_len; | ||||
|     void *buf = read_all(f, &buf_len); | ||||
|     if (buf == NULL) { | ||||
|         CTX_LOG_ERROR(f->filepath, "read_all() failed") | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     struct archive *a = archive_read_new(); | ||||
|     archive_read_support_format_zip(a); | ||||
| 
 | ||||
|     int ret = archive_read_open_memory(a, buf, buf_len); | ||||
|     if (ret != ARCHIVE_OK) { | ||||
|         CTX_LOG_ERRORF(doc->filepath, "Could not read archive: %s", archive_error_string(a)) | ||||
|         archive_read_free(a); | ||||
|         free(buf); | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     text_buffer_t tex = text_buffer_create(ctx->content_size); | ||||
| 
 | ||||
|     struct archive_entry *entry; | ||||
|     int buffer_full = FALSE; | ||||
|     while (archive_read_next_header(a, &entry) == ARCHIVE_OK) { | ||||
|         if (S_ISREG(archive_entry_stat(entry)->st_mode)) { | ||||
|             const char *path = archive_entry_pathname(entry); | ||||
| 
 | ||||
|             if (!buffer_full && should_read_part(path) && ctx->content_size > 0) { | ||||
|                 ret = read_part(ctx, a, &tex, doc); | ||||
|                 if (ret == READ_PART_ERR) { | ||||
|                     break; | ||||
|                 } else if (ret == TEXT_BUF_FULL) { | ||||
|                     buffer_full = TRUE; | ||||
|                 } | ||||
|             } else if (strcmp(path, "docProps/app.xml") == 0) { | ||||
|                 if (read_doc_props_app(ctx, a, doc) != 0) { | ||||
|                     break; | ||||
|                 } | ||||
|             } else if (strcmp(path, "docProps/core.xml") == 0) { | ||||
|                 if (read_doc_props(ctx, a, doc) != 0) { | ||||
|                     break; | ||||
|                 } | ||||
|             } else if (strcmp(path, "docProps/thumbnail.jpeg") == 0) { | ||||
|                 read_thumbnail(ctx, doc, a, entry); | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     if (tex.dyn_buffer.cur > 0) { | ||||
|         text_buffer_terminate_string(&tex); | ||||
| 
 | ||||
|         meta_line_t *meta = malloc(sizeof(meta_line_t) + tex.dyn_buffer.cur); | ||||
|         meta->key = MetaContent; | ||||
|         strcpy(meta->str_val, tex.dyn_buffer.buf); | ||||
|         APPEND_META(doc, meta) | ||||
|     } | ||||
| 
 | ||||
|     archive_read_close(a); | ||||
|     archive_read_free(a); | ||||
|     text_buffer_destroy(&tex); | ||||
|     free(buf); | ||||
| } | ||||
							
								
								
									
										16
									
								
								third-party/libscan/libscan/ooxml/ooxml.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										16
									
								
								third-party/libscan/libscan/ooxml/ooxml.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,16 @@ | ||||
| #ifndef SCAN_OOXML_H | ||||
| #define SCAN_OOXML_H | ||||
| 
 | ||||
| #include <stdlib.h> | ||||
| #include "../scan.h" | ||||
| 
 | ||||
| typedef struct { | ||||
|     long content_size; | ||||
|     log_callback_t log; | ||||
|     logf_callback_t logf; | ||||
|     store_callback_t store; | ||||
| } scan_ooxml_ctx_t; | ||||
| 
 | ||||
| void parse_ooxml(scan_ooxml_ctx_t *ctx, vfile_t *f, document_t *doc); | ||||
| 
 | ||||
| #endif | ||||
							
								
								
									
										218
									
								
								third-party/libscan/libscan/raw/raw.c
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										218
									
								
								third-party/libscan/libscan/raw/raw.c
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,218 @@ | ||||
| #include "raw.h" | ||||
| #include <libraw/libraw.h> | ||||
| 
 | ||||
| #include "../media/media.h" | ||||
| #include <unistd.h> | ||||
| 
 | ||||
| 
 | ||||
| #define MIN_SIZE 32 | ||||
| 
 | ||||
| int store_thumbnail_jpeg(scan_raw_ctx_t *ctx, libraw_processed_image_t *img, document_t *doc) { | ||||
|     return store_image_thumbnail((scan_media_ctx_t *) ctx, img->data, img->data_size, doc, "x.jpeg"); | ||||
| } | ||||
| 
 | ||||
| int store_thumbnail_rgb24(scan_raw_ctx_t *ctx, libraw_processed_image_t *img, document_t *doc) { | ||||
| 
 | ||||
|     int dstW; | ||||
|     int dstH; | ||||
| 
 | ||||
|     if (img->width <= ctx->tn_size && img->height <= ctx->tn_size) { | ||||
|         dstW = img->width; | ||||
|         dstH = img->height; | ||||
|     } else { | ||||
|         double ratio = (double) img->width / img->height; | ||||
|         if (img->width > img->height) { | ||||
|             dstW = ctx->tn_size; | ||||
|             dstH = (int) (ctx->tn_size / ratio); | ||||
|         } else { | ||||
|             dstW = (int) (ctx->tn_size * ratio); | ||||
|             dstH = ctx->tn_size; | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     if (dstW <= MIN_SIZE || dstH <= MIN_SIZE) { | ||||
|         return FALSE; | ||||
|     } | ||||
| 
 | ||||
|     AVFrame *scaled_frame = av_frame_alloc(); | ||||
| 
 | ||||
|     struct SwsContext *sws_ctx = sws_getContext( | ||||
|             img->width, img->height, AV_PIX_FMT_RGB24, | ||||
|             dstW, dstH, AV_PIX_FMT_YUVJ420P, | ||||
|             SIST_SWS_ALGO, 0, 0, 0 | ||||
|     ); | ||||
| 
 | ||||
|     int dst_buf_len = av_image_get_buffer_size(AV_PIX_FMT_YUV420P, dstW, dstH, 1); | ||||
|     uint8_t *dst_buf = (uint8_t *) av_malloc(dst_buf_len); | ||||
| 
 | ||||
|     av_image_fill_arrays(scaled_frame->data, scaled_frame->linesize, dst_buf, AV_PIX_FMT_YUV420P, dstW, dstH, 1); | ||||
| 
 | ||||
|     const uint8_t *in_data[1] = {img->data}; | ||||
|     int in_line_size[1] = {3 * img->width}; | ||||
| 
 | ||||
|     sws_scale(sws_ctx, | ||||
|               in_data, in_line_size, | ||||
|               0, img->height, | ||||
|               scaled_frame->data, scaled_frame->linesize | ||||
|     ); | ||||
| 
 | ||||
|     scaled_frame->width = dstW; | ||||
|     scaled_frame->height = dstH; | ||||
|     scaled_frame->format = AV_PIX_FMT_YUV420P; | ||||
| 
 | ||||
|     sws_freeContext(sws_ctx); | ||||
| 
 | ||||
|     AVCodecContext *jpeg_encoder = alloc_jpeg_encoder(scaled_frame->width, scaled_frame->height, 1.0f); | ||||
|     avcodec_send_frame(jpeg_encoder, scaled_frame); | ||||
| 
 | ||||
|     AVPacket jpeg_packet; | ||||
|     av_init_packet(&jpeg_packet); | ||||
|     avcodec_receive_packet(jpeg_encoder, &jpeg_packet); | ||||
| 
 | ||||
|     APPEND_TN_META(doc, scaled_frame->width, scaled_frame->height) | ||||
|     ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) jpeg_packet.data, jpeg_packet.size); | ||||
| 
 | ||||
|     av_packet_unref(&jpeg_packet); | ||||
|     av_free(*scaled_frame->data); | ||||
|     av_frame_free(&scaled_frame); | ||||
|     avcodec_free_context(&jpeg_encoder); | ||||
| 
 | ||||
|     return TRUE; | ||||
| } | ||||
| 
 | ||||
| #define DMS_REF(ref) (((ref) == 'S' || (ref) == 'W') ? -1 : 1) | ||||
| 
 | ||||
| void parse_raw(scan_raw_ctx_t *ctx, vfile_t *f, document_t *doc) { | ||||
|     libraw_data_t *libraw_lib = libraw_init(0); | ||||
| 
 | ||||
|     if (!libraw_lib) { | ||||
|         CTX_LOG_ERROR("raw.c", "Cannot create libraw handle") | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     size_t buf_len = 0; | ||||
|     void *buf = read_all(f, &buf_len); | ||||
|     if (buf == NULL) { | ||||
|         CTX_LOG_ERROR(f->filepath, "read_all() failed") | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     int ret = libraw_open_buffer(libraw_lib, buf, buf_len); | ||||
|     if (ret != 0) { | ||||
|         CTX_LOG_ERROR(f->filepath, "Could not open raw file") | ||||
|         free(buf); | ||||
|         libraw_close(libraw_lib); | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     if (*libraw_lib->idata.model != '\0') { | ||||
|         APPEND_STR_META(doc, MetaExifModel, libraw_lib->idata.model) | ||||
|     } | ||||
|     if (*libraw_lib->idata.make != '\0') { | ||||
|         APPEND_STR_META(doc, MetaExifMake, libraw_lib->idata.make) | ||||
|     } | ||||
|     if (*libraw_lib->idata.software != '\0') { | ||||
|         APPEND_STR_META(doc, MetaExifSoftware, libraw_lib->idata.software) | ||||
|     } | ||||
|     APPEND_LONG_META(doc, MetaWidth, libraw_lib->sizes.width) | ||||
|     APPEND_LONG_META(doc, MetaHeight, libraw_lib->sizes.height) | ||||
|     char tmp[1024]; | ||||
|     snprintf(tmp, sizeof(tmp), "%g", libraw_lib->other.iso_speed); | ||||
|     APPEND_STR_META(doc, MetaExifIsoSpeedRatings, tmp) | ||||
| 
 | ||||
|     if (*libraw_lib->other.desc != '\0') { | ||||
|         APPEND_STR_META(doc, MetaContent, libraw_lib->other.desc) | ||||
|     } | ||||
|     if (*libraw_lib->other.artist != '\0') { | ||||
|         APPEND_STR_META(doc, MetaArtist, libraw_lib->other.artist) | ||||
|     } | ||||
| 
 | ||||
|     struct tm *time = localtime(&libraw_lib->other.timestamp); | ||||
|     strftime(tmp, sizeof(tmp), "%Y:%m:%d %H:%M:%S", time); | ||||
|     APPEND_STR_META(doc, MetaExifDateTime, tmp) | ||||
| 
 | ||||
|     snprintf(tmp, sizeof(tmp), "%.1f", libraw_lib->other.focal_len); | ||||
|     APPEND_STR_META(doc, MetaExifFocalLength, tmp) | ||||
| 
 | ||||
|     snprintf(tmp, sizeof(tmp), "%.1f", libraw_lib->other.aperture); | ||||
|     APPEND_STR_META(doc, MetaExifFNumber, tmp) | ||||
| 
 | ||||
|     int denominator = (int) roundf(1 / libraw_lib->other.shutter); | ||||
|     snprintf(tmp, sizeof(tmp), "1/%d", denominator); | ||||
|     APPEND_STR_META(doc, MetaExifExposureTime, tmp) | ||||
| 
 | ||||
|     libraw_gps_info_t gps = libraw_lib->other.parsed_gps; | ||||
|     double gps_longitude_dec = | ||||
|             (gps.longtitude[0] + gps.longtitude[1] / 60 + gps.longtitude[2] / 3600) * DMS_REF(gps.longref); | ||||
|     snprintf(tmp, sizeof(tmp), "%.15f", gps_longitude_dec); | ||||
|     if (gps_longitude_dec != 0.0) { | ||||
|         APPEND_STR_META(doc, MetaExifGpsLongitudeDec, tmp) | ||||
|     } | ||||
| 
 | ||||
|     double gps_latitude_dec = (gps.latitude[0] + gps.latitude[1] / 60 + gps.latitude[2] / 3600) * DMS_REF(gps.latref); | ||||
|     snprintf(tmp, sizeof(tmp), "%.15f", gps_latitude_dec); | ||||
|     if (gps_latitude_dec != 0.0) { | ||||
|         APPEND_STR_META(doc, MetaExifGpsLatitudeDec, tmp) | ||||
|     } | ||||
| 
 | ||||
|     APPEND_STR_META(doc, MetaMediaVideoCodec, "raw") | ||||
| 
 | ||||
|     if (ctx->tn_size <= 0) { | ||||
|         free(buf); | ||||
|         libraw_close(libraw_lib); | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     libraw_unpack_thumb(libraw_lib); | ||||
| 
 | ||||
|     int errc = 0; | ||||
|     libraw_processed_image_t *thumb = libraw_dcraw_make_mem_thumb(libraw_lib, &errc); | ||||
|     if (errc != 0) { | ||||
|         free(buf); | ||||
|         libraw_dcraw_clear_mem(thumb); | ||||
|         libraw_close(libraw_lib); | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     int tn_ok = 0; | ||||
|     if (libraw_lib->thumbnail.tformat == LIBRAW_THUMBNAIL_JPEG) { | ||||
|         tn_ok = store_thumbnail_jpeg(ctx, thumb, doc); | ||||
|     } else if (libraw_lib->thumbnail.tformat == LIBRAW_THUMBNAIL_BITMAP) { | ||||
|         // TODO: technically this should work but is currently untested
 | ||||
|         tn_ok = store_thumbnail_rgb24(ctx, thumb, doc); | ||||
|     } | ||||
| 
 | ||||
|     libraw_dcraw_clear_mem(thumb); | ||||
| 
 | ||||
|     if (tn_ok == TRUE) { | ||||
|         free(buf); | ||||
|         libraw_close(libraw_lib); | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     ret = libraw_unpack(libraw_lib); | ||||
|     if (ret != 0) { | ||||
|         CTX_LOG_ERROR(f->filepath, "Could not unpack raw file") | ||||
|         free(buf); | ||||
|         libraw_close(libraw_lib); | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     libraw_dcraw_process(libraw_lib); | ||||
| 
 | ||||
|     errc = 0; | ||||
|     libraw_processed_image_t *img = libraw_dcraw_make_mem_image(libraw_lib, &errc); | ||||
|     if (errc != 0) { | ||||
|         free(buf); | ||||
|         libraw_dcraw_clear_mem(img); | ||||
|         libraw_close(libraw_lib); | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     store_thumbnail_rgb24(ctx, img, doc); | ||||
| 
 | ||||
|     libraw_dcraw_clear_mem(img); | ||||
|     libraw_close(libraw_lib); | ||||
| 
 | ||||
|     free(buf); | ||||
| } | ||||
							
								
								
									
										17
									
								
								third-party/libscan/libscan/raw/raw.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										17
									
								
								third-party/libscan/libscan/raw/raw.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,17 @@ | ||||
| #ifndef SIST2_RAW_H | ||||
| #define SIST2_RAW_H | ||||
| 
 | ||||
| #include "../scan.h" | ||||
| 
 | ||||
| typedef struct { | ||||
|     log_callback_t log; | ||||
|     logf_callback_t logf; | ||||
|     store_callback_t store; | ||||
| 
 | ||||
|     int tn_size; | ||||
|     float tn_qscale; | ||||
| } scan_raw_ctx_t; | ||||
| 
 | ||||
| void parse_raw(scan_raw_ctx_t *ctx, vfile_t *f, document_t *doc); | ||||
| 
 | ||||
| #endif //SIST2_RAW_H
 | ||||
							
								
								
									
										170
									
								
								third-party/libscan/libscan/scan.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										170
									
								
								third-party/libscan/libscan/scan.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,170 @@ | ||||
| #ifndef SCAN_SCAN_H | ||||
| #define SCAN_SCAN_H | ||||
| 
 | ||||
| #ifndef _GNU_SOURCE | ||||
| #define _GNU_SOURCE | ||||
| #endif | ||||
| 
 | ||||
| #include <stdio.h> | ||||
| #include <sys/stat.h> | ||||
| #include <openssl/md5.h> | ||||
| #include <openssl/sha.h> | ||||
| 
 | ||||
| #include "macros.h" | ||||
| 
 | ||||
| #define SIST_SWS_ALGO SWS_LANCZOS | ||||
| 
 | ||||
| #define UNUSED(x) __attribute__((__unused__))  x | ||||
| 
 | ||||
| typedef void (*store_callback_t)(char *key, size_t key_len, char *buf, size_t buf_len); | ||||
| 
 | ||||
| typedef void (*logf_callback_t)(const char *filepath, int level, char *format, ...); | ||||
| 
 | ||||
| typedef void (*log_callback_t)(const char *filepath, int level, char *str); | ||||
| 
 | ||||
| typedef int scan_code_t; | ||||
| #define SCAN_OK (scan_code_t) 0 | ||||
| #define SCAN_ERR_READ (scan_code_t) (-1) | ||||
| #define SCAN_ERR_SKIP (scan_code_t) (-2) | ||||
| 
 | ||||
| #define LEVEL_DEBUG 0 | ||||
| #define LEVEL_INFO 1 | ||||
| #define LEVEL_WARNING 2 | ||||
| #define LEVEL_ERROR 3 | ||||
| #define LEVEL_FATAL 4 | ||||
| 
 | ||||
| #define CTX_LOG_DEBUGF(filepath, fmt, ...) ctx->logf(filepath, LEVEL_DEBUG, fmt, __VA_ARGS__); | ||||
| #define CTX_LOG_DEBUG(filepath, str) ctx->log(filepath, LEVEL_DEBUG, str); | ||||
| 
 | ||||
| #define CTX_LOG_INFOF(filepath, fmt, ...) ctx->logf(filepath, LEVEL_INFO, fmt, __VA_ARGS__); | ||||
| #define CTX_LOG_INFO(filepath, str) ctx->log(filepath, LEVEL_INFO, str); | ||||
| 
 | ||||
| #define CTX_LOG_WARNINGF(filepath, fmt, ...) ctx->logf(filepath, LEVEL_WARNING, fmt, __VA_ARGS__); | ||||
| #define CTX_LOG_WARNING(filepath, str) ctx->log(filepath, LEVEL_WARNING, str); | ||||
| 
 | ||||
| #define CTX_LOG_ERRORF(filepath, fmt, ...) ctx->logf(filepath, LEVEL_ERROR, fmt, __VA_ARGS__); | ||||
| #define CTX_LOG_ERROR(filepath, str) ctx->log(filepath, LEVEL_ERROR, str); | ||||
| 
 | ||||
| #define CTX_LOG_FATALF(filepath, fmt, ...) ctx->logf(filepath, LEVEL_FATAL, fmt, __VA_ARGS__); exit(-1); | ||||
| #define CTX_LOG_FATAL(filepath, str) ctx->log(filepath, LEVEL_FATAL, str); exit(-1); | ||||
| 
 | ||||
| enum metakey { | ||||
|     // String
 | ||||
|     MetaContent = 1, | ||||
|     MetaMediaAudioCodec, | ||||
|     MetaMediaVideoCodec, | ||||
|     MetaArtist, | ||||
|     MetaAlbum, | ||||
|     MetaAlbumArtist, | ||||
|     MetaGenre, | ||||
|     MetaTitle, | ||||
|     MetaFontName, | ||||
|     MetaParent, | ||||
|     MetaExifMake, | ||||
|     MetaExifSoftware, | ||||
|     MetaExifExposureTime, | ||||
|     MetaExifFNumber, | ||||
|     MetaExifFocalLength, | ||||
|     MetaExifUserComment, | ||||
|     MetaExifModel, | ||||
|     MetaExifIsoSpeedRatings, | ||||
|     MetaExifDateTime, | ||||
|     MetaAuthor, | ||||
|     MetaModifiedBy, | ||||
|     MetaThumbnail, | ||||
|     MetaChecksum, | ||||
| 
 | ||||
|     // Number
 | ||||
|     MetaWidth, | ||||
|     MetaHeight, | ||||
|     MetaMediaDuration, | ||||
|     MetaMediaBitrate, | ||||
|     MetaPages, | ||||
| 
 | ||||
|     // ??
 | ||||
|     MetaExifGpsLongitudeDMS, | ||||
|     MetaExifGpsLongitudeRef, | ||||
|     MetaExifGpsLatitudeDMS, | ||||
|     MetaExifGpsLatitudeRef, | ||||
|     MetaExifGpsLatitudeDec, | ||||
|     MetaExifGpsLongitudeDec, | ||||
| }; | ||||
| 
 | ||||
| typedef struct meta_line { | ||||
|     struct meta_line *next; | ||||
|     enum metakey key; | ||||
|     union { | ||||
|         char str_val[0]; | ||||
|         unsigned long long_val; | ||||
|         double double_val; | ||||
|     }; | ||||
| } meta_line_t; | ||||
| 
 | ||||
| 
 | ||||
| typedef struct document { | ||||
|     unsigned char path_md5[MD5_DIGEST_LENGTH]; | ||||
|     unsigned long size; | ||||
|     unsigned int mime; | ||||
|     int mtime; | ||||
|     short base; | ||||
|     short ext; | ||||
|     char has_parent; | ||||
|     meta_line_t *meta_head; | ||||
|     meta_line_t *meta_tail; | ||||
|     char *filepath; | ||||
| } document_t; | ||||
| 
 | ||||
| typedef struct vfile vfile_t; | ||||
| 
 | ||||
| __attribute__((warn_unused_result)) | ||||
| typedef int (*read_func_t)(struct vfile *, void *buf, size_t size); | ||||
| 
 | ||||
| __attribute__((warn_unused_result)) | ||||
| typedef long (*seek_func_t)(struct vfile *, long offset, int whence); | ||||
| 
 | ||||
| typedef void (*close_func_t)(struct vfile *); | ||||
| 
 | ||||
| typedef void (*reset_func_t)(struct vfile *); | ||||
| 
 | ||||
| typedef struct vfile { | ||||
|     union { | ||||
|         int fd; | ||||
|         struct archive *arc; | ||||
|         const void *_test_data; | ||||
|     }; | ||||
| 
 | ||||
|     int is_fs_file; | ||||
|     int has_checksum; | ||||
|     int calculate_checksum; | ||||
|     const char *filepath; | ||||
|     struct stat info; | ||||
| 
 | ||||
|     SHA_CTX sha1_ctx; | ||||
|     unsigned char sha1_digest[SHA1_DIGEST_LENGTH]; | ||||
| 
 | ||||
|     void *rewind_buffer; | ||||
|     int rewind_buffer_size; | ||||
|     int rewind_buffer_cursor; | ||||
| 
 | ||||
|     read_func_t read; | ||||
|     read_func_t read_rewindable; | ||||
|     close_func_t close; | ||||
|     reset_func_t reset; | ||||
|     log_callback_t log; | ||||
|     logf_callback_t logf; | ||||
| } vfile_t; | ||||
| 
 | ||||
| typedef struct parse_job_t { | ||||
|     int base; | ||||
|     int ext; | ||||
|     struct vfile vfile; | ||||
|     unsigned char parent[MD5_DIGEST_LENGTH]; | ||||
|     char filepath[1]; | ||||
| } parse_job_t; | ||||
| 
 | ||||
| 
 | ||||
| #include "util.h" | ||||
| 
 | ||||
| typedef void (*parse_callback_t)(parse_job_t *job); | ||||
| 
 | ||||
| #endif | ||||
							
								
								
									
										64
									
								
								third-party/libscan/libscan/text/text.c
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										64
									
								
								third-party/libscan/libscan/text/text.c
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,64 @@ | ||||
| #include "text.h" | ||||
| 
 | ||||
| scan_code_t parse_text(scan_text_ctx_t *ctx, vfile_t *f, document_t *doc) { | ||||
| 
 | ||||
|     int to_read = MIN(ctx->content_size, f->info.st_size); | ||||
| 
 | ||||
|     if (to_read <= 2) { | ||||
|         return SCAN_OK; | ||||
|     } | ||||
| 
 | ||||
|     char *buf = malloc(to_read); | ||||
|     int ret = f->read(f, buf, to_read); | ||||
|     if (ret < 0) { | ||||
|         CTX_LOG_ERRORF(doc->filepath, "read() returned error code: [%d]", ret) | ||||
|         free(buf); | ||||
|         return SCAN_ERR_READ; | ||||
|     } | ||||
| 
 | ||||
|     text_buffer_t tex = text_buffer_create(ctx->content_size); | ||||
| 
 | ||||
|     if ((*(int16_t*)buf) == (int16_t)0xFFFE) { | ||||
|         text_buffer_append_string16_le(&tex, buf + 2, to_read - 2); | ||||
|     } else if((*(int16_t*)buf) == (int16_t)0xFEFF) { | ||||
|         text_buffer_append_string16_be(&tex, buf + 2, to_read - 2); | ||||
|     } else { | ||||
|         text_buffer_append_string(&tex, buf, to_read); | ||||
|     } | ||||
|     text_buffer_terminate_string(&tex); | ||||
| 
 | ||||
|     APPEND_STR_META(doc, MetaContent, tex.dyn_buffer.buf); | ||||
| 
 | ||||
|     free(buf); | ||||
|     text_buffer_destroy(&tex); | ||||
| 
 | ||||
|     return SCAN_OK; | ||||
| } | ||||
| 
 | ||||
| #define MAX_MARKUP_SIZE (1024 * 1024) | ||||
| 
 | ||||
| scan_code_t parse_markup(scan_text_ctx_t *ctx, vfile_t *f, document_t *doc) { | ||||
| 
 | ||||
|     int to_read = MIN(MAX_MARKUP_SIZE, f->info.st_size); | ||||
| 
 | ||||
|     char *buf = malloc(to_read + 1); | ||||
|     int ret = f->read(f, buf, to_read); | ||||
|     if (ret < 0) { | ||||
|         CTX_LOG_ERRORF(doc->filepath, "read() returned error code: [%d]", ret) | ||||
|         free(buf); | ||||
|         return SCAN_ERR_READ; | ||||
|     } | ||||
| 
 | ||||
|     *(buf + to_read) = '\0'; | ||||
| 
 | ||||
|     text_buffer_t tex = text_buffer_create(ctx->content_size); | ||||
|     text_buffer_append_markup(&tex, buf); | ||||
|     text_buffer_terminate_string(&tex); | ||||
| 
 | ||||
|     APPEND_STR_META(doc, MetaContent, tex.dyn_buffer.buf); | ||||
| 
 | ||||
|     free(buf); | ||||
|     text_buffer_destroy(&tex); | ||||
| 
 | ||||
|     return SCAN_OK; | ||||
| } | ||||
							
								
								
									
										18
									
								
								third-party/libscan/libscan/text/text.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										18
									
								
								third-party/libscan/libscan/text/text.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,18 @@ | ||||
| #ifndef SCAN_TEXT_H | ||||
| #define SCAN_TEXT_H | ||||
| 
 | ||||
| #include "../scan.h" | ||||
| #include "../util.h" | ||||
| 
 | ||||
| typedef struct { | ||||
|     long content_size; | ||||
| 
 | ||||
|     log_callback_t log; | ||||
|     logf_callback_t logf; | ||||
| } scan_text_ctx_t; | ||||
| 
 | ||||
| scan_code_t parse_text(scan_text_ctx_t *ctx, vfile_t *f, document_t *doc); | ||||
| 
 | ||||
| scan_code_t parse_markup(scan_text_ctx_t *ctx, vfile_t *f, document_t *doc); | ||||
| 
 | ||||
| #endif | ||||
							
								
								
									
										0
									
								
								third-party/libscan/libscan/util.c
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										0
									
								
								third-party/libscan/libscan/util.c
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
								
								
									
										361
									
								
								third-party/libscan/libscan/util.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										361
									
								
								third-party/libscan/libscan/util.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,361 @@ | ||||
| #ifndef SCAN_UTIL_H | ||||
| #define SCAN_UTIL_H | ||||
| 
 | ||||
| #include "stdio.h" | ||||
| #include "stdlib.h" | ||||
| #include "string.h" | ||||
| #include "../third-party/utf8.h/utf8.h" | ||||
| #include "macros.h" | ||||
| 
 | ||||
| #define STR_STARTS_WITH(x, y) (strncmp(y, x, sizeof(y) - 1) == 0) | ||||
| 
 | ||||
| #define TEXT_BUF_FULL (-1) | ||||
| #define INITIAL_BUF_SIZE (1024 * 16) | ||||
| 
 | ||||
| #define SHOULD_IGNORE_CHAR(c) !(SHOULD_KEEP_CHAR(c)) | ||||
| #define SHOULD_KEEP_CHAR(c) (\ | ||||
|     ((c) >= '\'' && (c) <= ';') || \ | ||||
|     ((c) >= 'A' && (c) <= 'z') || \ | ||||
|     ((c) > 127 && (c) != 0x00A0 && (c) && (c) != 0xFFFD)) | ||||
| 
 | ||||
| 
 | ||||
| typedef struct dyn_buffer { | ||||
|     char *buf; | ||||
|     size_t cur; | ||||
|     size_t size; | ||||
| } dyn_buffer_t; | ||||
| 
 | ||||
| typedef struct text_buffer { | ||||
|     long max_size; | ||||
|     int last_char_was_whitespace; | ||||
|     dyn_buffer_t dyn_buffer; | ||||
| } text_buffer_t; | ||||
| 
 | ||||
| static int utf8_validchr2(const char *s) { | ||||
|     if (0x00 == (0x80 & *s)) { | ||||
|         return TRUE; | ||||
|     } else if (0xf0 == (0xf8 & *s)) { | ||||
|         if ((0x80 != (0xc0 & s[1])) || (0x80 != (0xc0 & s[2])) || | ||||
|             (0x80 != (0xc0 & s[3]))) { | ||||
|             return FALSE; | ||||
|         } | ||||
| 
 | ||||
|         if (0x80 == (0xc0 & s[4])) { | ||||
|             return FALSE; | ||||
|         } | ||||
| 
 | ||||
|         if ((0 == (0x07 & s[0])) && (0 == (0x30 & s[1]))) { | ||||
|             return FALSE; | ||||
|         } | ||||
|     } else if (0xe0 == (0xf0 & *s)) { | ||||
|         if ((0x80 != (0xc0 & s[1])) || (0x80 != (0xc0 & s[2]))) { | ||||
|             return FALSE; | ||||
|         } | ||||
| 
 | ||||
|         if (0x80 == (0xc0 & s[3])) { | ||||
|             return FALSE; | ||||
|         } | ||||
| 
 | ||||
|         if ((0 == (0x0f & s[0])) && (0 == (0x20 & s[1]))) { | ||||
|             return FALSE; | ||||
|         } | ||||
|     } else if (0xc0 == (0xe0 & *s)) { | ||||
|         if (0x80 != (0xc0 & s[1])) { | ||||
|             return FALSE; | ||||
|         } | ||||
| 
 | ||||
|         if (0x80 == (0xc0 & s[2])) { | ||||
|             return FALSE; | ||||
|         } | ||||
| 
 | ||||
|         if (0 == (0x1e & s[0])) { | ||||
|             return FALSE; | ||||
|         } | ||||
|     } else { | ||||
|         return FALSE; | ||||
|     } | ||||
| 
 | ||||
|     return TRUE; | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| static dyn_buffer_t dyn_buffer_create() { | ||||
|     dyn_buffer_t buf; | ||||
| 
 | ||||
|     buf.size = INITIAL_BUF_SIZE; | ||||
|     buf.cur = 0; | ||||
|     buf.buf = (char *) malloc(INITIAL_BUF_SIZE); | ||||
| 
 | ||||
|     return buf; | ||||
| } | ||||
| 
 | ||||
| static void grow_buffer(dyn_buffer_t *buf, size_t size) { | ||||
|     if (buf->cur + size > buf->size) { | ||||
|         do { | ||||
|             buf->size *= 2; | ||||
|         } while (buf->cur + size > buf->size); | ||||
| 
 | ||||
|         buf->buf = (char *) realloc(buf->buf, buf->size); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| static void grow_buffer_small(dyn_buffer_t *buf) { | ||||
|     if (buf->cur + sizeof(long) > buf->size) { | ||||
|         buf->size *= 2; | ||||
|         buf->buf = (char *) realloc(buf->buf, buf->size); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| static void dyn_buffer_write(dyn_buffer_t *buf, const void *data, size_t size) { | ||||
|     grow_buffer(buf, size); | ||||
| 
 | ||||
|     memcpy(buf->buf + buf->cur, data, size); | ||||
|     buf->cur += size; | ||||
| } | ||||
| 
 | ||||
| static void dyn_buffer_write_char(dyn_buffer_t *buf, char c) { | ||||
|     grow_buffer_small(buf); | ||||
| 
 | ||||
|     *(buf->buf + buf->cur) = c; | ||||
|     buf->cur += sizeof(c); | ||||
| } | ||||
| 
 | ||||
| static void dyn_buffer_write_str(dyn_buffer_t *buf, const char *str) { | ||||
|     dyn_buffer_write(buf, str, strlen(str)); | ||||
|     dyn_buffer_write_char(buf, '\0'); | ||||
| } | ||||
| 
 | ||||
| static void dyn_buffer_append_string(dyn_buffer_t *buf, const char *str) { | ||||
|     dyn_buffer_write(buf, str, strlen(str)); | ||||
| } | ||||
| 
 | ||||
| static void dyn_buffer_write_int(dyn_buffer_t *buf, int d) { | ||||
|     grow_buffer_small(buf); | ||||
| 
 | ||||
|     *(int *) (buf->buf + buf->cur) = d; | ||||
|     buf->cur += sizeof(int); | ||||
| } | ||||
| 
 | ||||
| static void dyn_buffer_write_short(dyn_buffer_t *buf, uint16_t s) { | ||||
|     grow_buffer_small(buf); | ||||
| 
 | ||||
|     *(uint16_t *) (buf->buf + buf->cur) = s; | ||||
|     buf->cur += sizeof(uint16_t); | ||||
| } | ||||
| 
 | ||||
| static void dyn_buffer_write_long(dyn_buffer_t *buf, unsigned long l) { | ||||
|     grow_buffer_small(buf); | ||||
| 
 | ||||
|     *(unsigned long *) (buf->buf + buf->cur) = l; | ||||
|     buf->cur += sizeof(unsigned long); | ||||
| } | ||||
| 
 | ||||
| static void dyn_buffer_destroy(dyn_buffer_t *buf) { | ||||
|     free(buf->buf); | ||||
| } | ||||
| 
 | ||||
| static void text_buffer_destroy(text_buffer_t *buf) { | ||||
|     dyn_buffer_destroy(&buf->dyn_buffer); | ||||
| } | ||||
| 
 | ||||
| static text_buffer_t text_buffer_create(long max_size) { | ||||
|     text_buffer_t text_buf; | ||||
| 
 | ||||
|     text_buf.dyn_buffer = dyn_buffer_create(); | ||||
|     text_buf.max_size = max_size; | ||||
|     text_buf.last_char_was_whitespace = FALSE; | ||||
| 
 | ||||
|     return text_buf; | ||||
| } | ||||
| 
 | ||||
| static int text_buffer_append_char(text_buffer_t *buf, int c) { | ||||
| 
 | ||||
|     if (SHOULD_IGNORE_CHAR(c) || c == ' ') { | ||||
|         if (!buf->last_char_was_whitespace && buf->dyn_buffer.cur != 0) { | ||||
|             dyn_buffer_write_char(&buf->dyn_buffer, ' '); | ||||
|             buf->last_char_was_whitespace = TRUE; | ||||
| 
 | ||||
|             if (buf->max_size > 0 && buf->dyn_buffer.cur > buf->max_size) { | ||||
|                 return TEXT_BUF_FULL; | ||||
|             } | ||||
|         } | ||||
|     } else { | ||||
|         buf->last_char_was_whitespace = FALSE; | ||||
|         grow_buffer_small(&buf->dyn_buffer); | ||||
| 
 | ||||
|         if (((utf8_int32_t) 0xffffff80 & c) == 0) { | ||||
|             *(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = (char) c; | ||||
|         } else if (((utf8_int32_t) 0xfffff800 & c) == 0) { | ||||
|             *(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0xc0 | (char) (c >> 6); | ||||
|             *(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0x80 | (char) (c & 0x3f); | ||||
|         } else if (((utf8_int32_t) 0xffff0000 & c) == 0) { | ||||
|             *(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0xe0 | (char) (c >> 12); | ||||
|             *(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0x80 | (char) ((c >> 6) & 0x3f); | ||||
|             *(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0x80 | (char) (c & 0x3f); | ||||
|         } else { | ||||
|             *(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0xf0 | (char) (c >> 18); | ||||
|             *(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0x80 | (char) ((c >> 12) & 0x3f); | ||||
|             *(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0x80 | (char) ((c >> 6) & 0x3f); | ||||
|             *(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0x80 | (char) (c & 0x3f); | ||||
|         } | ||||
| 
 | ||||
|         if (buf->max_size > 0 && buf->dyn_buffer.cur > buf->max_size) { | ||||
|             return TEXT_BUF_FULL; | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     return 0; | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| static void text_buffer_terminate_string(text_buffer_t *buf) { | ||||
|     if (buf->dyn_buffer.cur > 0 && *(buf->dyn_buffer.buf + buf->dyn_buffer.cur - 1) == ' ') { | ||||
|         *(buf->dyn_buffer.buf + buf->dyn_buffer.cur - 1) = '\0'; | ||||
|     } else { | ||||
|         dyn_buffer_write_char(&buf->dyn_buffer, '\0'); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| // Naive UTF16 -> ascii conversion
 | ||||
| static int text_buffer_append_string16_le(text_buffer_t *buf, const char *str, size_t len) { | ||||
|     int ret = 0; | ||||
|     for (int i = 1; i < len; i += 2) { | ||||
|         ret = text_buffer_append_char(buf, str[i]); | ||||
|     } | ||||
|     return ret; | ||||
| } | ||||
| 
 | ||||
| static int text_buffer_append_string16_be(text_buffer_t *buf, const char *str, size_t len) { | ||||
|     int ret = 0; | ||||
|     for (int i = 0; i < len; i += 2) { | ||||
|         ret = text_buffer_append_char(buf, str[i]); | ||||
|     } | ||||
|     return ret; | ||||
| } | ||||
| 
 | ||||
| #define UTF8_END_OF_STRING \ | ||||
|     (ptr - str >= len || *ptr == 0 || \ | ||||
|     (0xc0 == (0xe0 & *ptr) && ptr - str > len - 2) || \ | ||||
|     (0xe0 == (0xf0 & *ptr) && ptr - str > len - 3) || \ | ||||
|     (0xf0 == (0xf8 & *ptr) && ptr - str > len - 4)) | ||||
| 
 | ||||
| static int text_buffer_append_string(text_buffer_t *buf, const char *str, size_t len) { | ||||
| 
 | ||||
|     const char *ptr = str; | ||||
|     const char *oldPtr = ptr; | ||||
| 
 | ||||
|     if (str == NULL || UTF8_END_OF_STRING) { | ||||
|         return 0; | ||||
|     } | ||||
| 
 | ||||
|     if (len <= 4) { | ||||
|         for (int i = 0; i < len; i++) { | ||||
|             if (((utf8_int32_t) 0xffffff80 & str[i]) == 0 && SHOULD_KEEP_CHAR(str[i])) { | ||||
|                 dyn_buffer_write_char(&buf->dyn_buffer, str[i]); | ||||
|             } | ||||
|         } | ||||
|         return 0; | ||||
|     } | ||||
| 
 | ||||
|     utf8_int32_t c; | ||||
|     char tmp[16] = {0}; | ||||
| 
 | ||||
|     do { | ||||
|         ptr = (char *) utf8codepoint(ptr, &c); | ||||
|         *(int *) tmp = 0x00000000; | ||||
|         memcpy(tmp, oldPtr, ptr - oldPtr); | ||||
|         oldPtr = ptr; | ||||
| 
 | ||||
|         if (!utf8_validchr2(tmp)) { | ||||
|             continue; | ||||
|         } | ||||
| 
 | ||||
|         int ret = text_buffer_append_char(buf, c); | ||||
| 
 | ||||
|         if (ret != 0) { | ||||
|             return ret; | ||||
|         } | ||||
|     } while (!UTF8_END_OF_STRING); | ||||
| 
 | ||||
|     return 0; | ||||
| } | ||||
| 
 | ||||
| static int text_buffer_append_string0(text_buffer_t *buf, const char *str) { | ||||
|     return text_buffer_append_string(buf, str, strlen(str)); | ||||
| } | ||||
| 
 | ||||
| static int text_buffer_append_markup(text_buffer_t *buf, const char *markup) { | ||||
| 
 | ||||
|     int tag_open = TRUE; | ||||
|     const char *ptr = markup; | ||||
|     const char *start = markup; | ||||
| 
 | ||||
|     while (*ptr != '\0') { | ||||
|         if (tag_open) { | ||||
|             if (*ptr == '>') { | ||||
|                 tag_open = FALSE; | ||||
|                 start = ptr + 1; | ||||
|             } | ||||
|         } else { | ||||
|             if (*ptr == '<') { | ||||
|                 tag_open = TRUE; | ||||
|                 if (ptr != start) { | ||||
|                     if (text_buffer_append_string(buf, start, (ptr - start)) == TEXT_BUF_FULL) { | ||||
|                         return TEXT_BUF_FULL; | ||||
|                     } | ||||
|                     if (text_buffer_append_char(buf, ' ') == TEXT_BUF_FULL) { | ||||
|                         return TEXT_BUF_FULL; | ||||
|                     } | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
| 
 | ||||
|         ptr += 1; | ||||
|     } | ||||
| 
 | ||||
|     if (ptr != start) { | ||||
|         if (text_buffer_append_string(buf, start, (ptr - start)) == TEXT_BUF_FULL) { | ||||
|             return TEXT_BUF_FULL; | ||||
|         } | ||||
|         if (text_buffer_append_char(buf, ' ') == TEXT_BUF_FULL) { | ||||
|             return TEXT_BUF_FULL; | ||||
|         } | ||||
|     } | ||||
|     return 0; | ||||
| } | ||||
| 
 | ||||
| static void *read_all(vfile_t *f, size_t *size) { | ||||
|     void *buf = malloc(f->info.st_size); | ||||
|     *size = f->read(f, buf, f->info.st_size); | ||||
| 
 | ||||
|     if (*size != f->info.st_size) { | ||||
|         free(buf); | ||||
|         return NULL; | ||||
|     } | ||||
| 
 | ||||
|     return buf; | ||||
| } | ||||
| 
 | ||||
| #define STACK_BUFFER_SIZE (size_t)(4096 * 8) | ||||
| 
 | ||||
| __always_inline | ||||
| static void safe_sha1_update(SHA_CTX *ctx, void *buf, size_t size) { | ||||
|     unsigned char stack_buf[STACK_BUFFER_SIZE]; | ||||
| 
 | ||||
|     void *sha1_buf; | ||||
|     if (size <= STACK_BUFFER_SIZE) { | ||||
|         sha1_buf = stack_buf; | ||||
|     } else { | ||||
|         void *heap_sha1_buf = malloc(size); | ||||
|         sha1_buf = heap_sha1_buf; | ||||
|     } | ||||
| 
 | ||||
|     memcpy(sha1_buf, buf, size); | ||||
|     SHA1_Update(ctx, (const void *) sha1_buf, size); | ||||
| 
 | ||||
|     if (sha1_buf != stack_buf) { | ||||
|         free(sha1_buf); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| #endif | ||||
							
								
								
									
										200
									
								
								third-party/libscan/libscan/wpd/libwpd_c_api.cpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										200
									
								
								third-party/libscan/libscan/wpd/libwpd_c_api.cpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,200 @@ | ||||
| #include "libwpd_c_api.h" | ||||
| #include "libwpd/libwpd.h" | ||||
| #include "libwpd/WPXProperty.h" | ||||
| #include "libwpd-stream/libwpd-stream.h" | ||||
| 
 | ||||
| class StringDocument : public WPXDocumentInterface { | ||||
| 
 | ||||
| private: | ||||
|     text_buffer_t *tex; | ||||
|     document_t *doc; | ||||
|     bool is_full; | ||||
| public: | ||||
| 
 | ||||
|     StringDocument(text_buffer_t *tex, document_t *doc) { | ||||
|         this->tex = tex; | ||||
|         this->doc = doc; | ||||
|         this->is_full = false; | ||||
|     } | ||||
| 
 | ||||
|     void setDocumentMetaData(const WPXPropertyList &propList) override { | ||||
| 
 | ||||
|         WPXPropertyList::Iter propIter(propList); | ||||
|         for (propIter.rewind(); propIter.next();) { | ||||
|             // TODO: Read metadata here ?!
 | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     void endDocument() override { | ||||
|         text_buffer_terminate_string(this->tex); | ||||
|     } | ||||
| 
 | ||||
|     void closeParagraph() override { | ||||
|         if (!this->is_full) { | ||||
|             if (text_buffer_append_char(tex, ' ') == TEXT_BUF_FULL) { | ||||
|                 this->is_full = true; | ||||
|             }; | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     void closeSpan() override { | ||||
|         if (!this->is_full) { | ||||
|             if (text_buffer_append_char(tex, ' ') == TEXT_BUF_FULL) { | ||||
|                 this->is_full = true; | ||||
|             }; | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     void closeSection() override { | ||||
|         if (!this->is_full) { | ||||
|             if (text_buffer_append_char(tex, ' ') == TEXT_BUF_FULL) { | ||||
|                 this->is_full = true; | ||||
|             }; | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     void insertTab() override { | ||||
|         if (!this->is_full) { | ||||
|             if (text_buffer_append_char(tex, ' ') == TEXT_BUF_FULL) { | ||||
|                 this->is_full = true; | ||||
|             }; | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     void insertSpace() override { | ||||
|         if (!this->is_full) { | ||||
|             if (text_buffer_append_char(tex, ' ') == TEXT_BUF_FULL) { | ||||
|                 this->is_full = true; | ||||
|             }; | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     void insertText(const WPXString &text) override { | ||||
|         if (!this->is_full) { | ||||
|             if (text_buffer_append_string0(tex, text.cstr()) == TEXT_BUF_FULL) { | ||||
|                 this->is_full = true; | ||||
|             }; | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     void insertLineBreak() override { | ||||
|         if (!this->is_full) { | ||||
|             if (text_buffer_append_char(tex, ' ') == TEXT_BUF_FULL) { | ||||
|                 this->is_full = true; | ||||
|             }; | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     void definePageStyle(const WPXPropertyList &propList) override { /* noop */ } | ||||
| 
 | ||||
|     void closePageSpan() override { /* noop */ } | ||||
| 
 | ||||
|     void openHeader(const WPXPropertyList &propList) override { /* noop */ } | ||||
| 
 | ||||
|     void closeHeader() override { /* noop */ } | ||||
| 
 | ||||
|     void openFooter(const WPXPropertyList &propList) override { /* noop */ } | ||||
| 
 | ||||
|     void closeFooter() override { /* noop */ } | ||||
| 
 | ||||
|     void | ||||
|     defineParagraphStyle(const WPXPropertyList &propList, const WPXPropertyListVector &tabStops) override { /* noop */ } | ||||
| 
 | ||||
|     void openParagraph(const WPXPropertyList &propList, const WPXPropertyListVector &tabStops) override { /* noop */ } | ||||
| 
 | ||||
|     void defineCharacterStyle(const WPXPropertyList &propList) override { /* noop */ } | ||||
| 
 | ||||
|     void openSpan(const WPXPropertyList &propList) override { /* noop */ } | ||||
| 
 | ||||
|     void | ||||
|     defineSectionStyle(const WPXPropertyList &propList, const WPXPropertyListVector &columns) override { /* noop */ } | ||||
| 
 | ||||
|     void openSection(const WPXPropertyList &propList, const WPXPropertyListVector &columns) override { /* noop */ } | ||||
| 
 | ||||
|     void insertField(const WPXString &type, const WPXPropertyList &propList) override { /* noop */ } | ||||
| 
 | ||||
|     void defineOrderedListLevel(const WPXPropertyList &propList) override { /* noop */ } | ||||
| 
 | ||||
|     void defineUnorderedListLevel(const WPXPropertyList &propList) override { /* noop */ } | ||||
| 
 | ||||
|     void openOrderedListLevel(const WPXPropertyList &propList) override { /* noop */ } | ||||
| 
 | ||||
|     void openUnorderedListLevel(const WPXPropertyList &propList) override { /* noop */ } | ||||
| 
 | ||||
|     void closeOrderedListLevel() override { /* noop */ } | ||||
| 
 | ||||
|     void closeUnorderedListLevel() override { /* noop */ } | ||||
| 
 | ||||
|     void openListElement(const WPXPropertyList &propList, const WPXPropertyListVector &tabStops) override { /* noop */ } | ||||
| 
 | ||||
|     void closeListElement() override { /* noop */ } | ||||
| 
 | ||||
|     void openFootnote(const WPXPropertyList &propList) override { /* noop */ } | ||||
| 
 | ||||
|     void closeFootnote() override { /* noop */ } | ||||
| 
 | ||||
|     void openEndnote(const WPXPropertyList &propList) override { /* noop */ } | ||||
| 
 | ||||
|     void closeEndnote() override { /* noop */ } | ||||
| 
 | ||||
|     void openComment(const WPXPropertyList &propList) override { /* noop */ } | ||||
| 
 | ||||
|     void closeComment() override { /* noop */ } | ||||
| 
 | ||||
|     void openTextBox(const WPXPropertyList &propList) override { /* noop */ } | ||||
| 
 | ||||
|     void closeTextBox() override { /* noop */ } | ||||
| 
 | ||||
|     void openTable(const WPXPropertyList &propList, const WPXPropertyListVector &columns) override { /* noop */ } | ||||
| 
 | ||||
|     void openTableRow(const WPXPropertyList &propList) override { /* noop */ } | ||||
| 
 | ||||
|     void closeTableRow() override { /* noop */ } | ||||
| 
 | ||||
|     void openTableCell(const WPXPropertyList &propList) override { /* noop */ } | ||||
| 
 | ||||
|     void closeTableCell() override { /* noop */ } | ||||
| 
 | ||||
|     void insertCoveredTableCell(const WPXPropertyList &propList) override { /* noop */ } | ||||
| 
 | ||||
|     void closeTable() override { /* noop */ } | ||||
| 
 | ||||
|     void openFrame(const WPXPropertyList &propList) override { /* noop */ } | ||||
| 
 | ||||
|     void closeFrame() override { /* noop */ } | ||||
| 
 | ||||
|     void insertBinaryObject(const WPXPropertyList &propList, const WPXBinaryData &data) override { /* noop */ } | ||||
| 
 | ||||
|     void insertEquation(const WPXPropertyList &propList, const WPXString &data) override { /* noop */ } | ||||
| 
 | ||||
|     void openPageSpan(const WPXPropertyList &propList) override { /* noop */ } | ||||
| 
 | ||||
|     void startDocument() override { /* noop */ }; | ||||
| }; | ||||
| 
 | ||||
| 
 | ||||
| wpd_stream_t wpd_memory_stream_create(const unsigned char *buf, size_t buf_len) { | ||||
|     auto *input = new WPXStringStream(buf, buf_len); | ||||
|     return input; | ||||
| } | ||||
| 
 | ||||
| wpd_confidence_t wpd_is_file_format_supported(wpd_stream_t ptr) { | ||||
|     auto *stream = (WPXStringStream *) ptr; | ||||
|     WPDConfidence confidence = WPDocument::isFileFormatSupported(stream); | ||||
| 
 | ||||
|     return (wpd_confidence_t) confidence; | ||||
| } | ||||
| 
 | ||||
| wpd_result_t wpd_parse(wpd_stream_t ptr, text_buffer_t *tex, document_t *doc) { | ||||
|     auto *stream = (WPXStringStream *) ptr; | ||||
| 
 | ||||
|     auto myDoc = StringDocument(tex, doc); | ||||
|     WPDResult result2 = WPDocument::parse(stream, &myDoc, nullptr); | ||||
| 
 | ||||
|     return (wpd_result_t) result2; | ||||
| } | ||||
| 
 | ||||
| void wpd_memory_stream_destroy(wpd_stream_t ptr) { | ||||
|     auto *stream = (WPXStringStream *) ptr; | ||||
|     delete stream; | ||||
| } | ||||
							
								
								
									
										50
									
								
								third-party/libscan/libscan/wpd/libwpd_c_api.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										50
									
								
								third-party/libscan/libscan/wpd/libwpd_c_api.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,50 @@ | ||||
| #ifndef SIST2_LIBWPD_C_API_H | ||||
| #define SIST2_LIBWPD_C_API_H | ||||
| 
 | ||||
| #include "stdlib.h" | ||||
| 
 | ||||
| #ifdef __cplusplus | ||||
| #define EXTERNC extern "C" | ||||
| #else | ||||
| #define EXTERNC | ||||
| #endif | ||||
| 
 | ||||
| #ifdef __cplusplus | ||||
| extern "C" { | ||||
| #endif | ||||
| #include "../scan.h" | ||||
| #include "../util.h" | ||||
| #ifdef __cplusplus | ||||
| }; | ||||
| #endif | ||||
| 
 | ||||
| 
 | ||||
| typedef void *wpd_stream_t; | ||||
| 
 | ||||
| typedef enum { | ||||
|     C_WPD_CONFIDENCE_NONE = 0, | ||||
|     C_WPD_CONFIDENCE_UNSUPPORTED_ENCRYPTION, | ||||
|     C_WPD_CONFIDENCE_SUPPORTED_ENCRYPTION, | ||||
|     C_WPD_CONFIDENCE_EXCELLENT | ||||
| } wpd_confidence_t; | ||||
| 
 | ||||
| typedef enum { | ||||
|     C_WPD_OK, | ||||
|     C_WPD_FILE_ACCESS_ERROR, | ||||
|     C_WPD_PARSE_ERROR, | ||||
|     C_WPD_UNSUPPORTED_ENCRYPTION_ERROR, | ||||
|     C_WPD_PASSWORD_MISSMATCH_ERROR, | ||||
|     C_WPD_OLE_ERROR, | ||||
|     C_WPD_UNKNOWN_ERROR | ||||
| } wpd_result_t; | ||||
| 
 | ||||
| 
 | ||||
| EXTERNC wpd_confidence_t wpd_is_file_format_supported(wpd_stream_t stream); | ||||
| 
 | ||||
| EXTERNC wpd_stream_t wpd_memory_stream_create(const unsigned char *buf, size_t buf_len); | ||||
| 
 | ||||
| EXTERNC void wpd_memory_stream_destroy(wpd_stream_t stream); | ||||
| 
 | ||||
| EXTERNC wpd_result_t wpd_parse(wpd_stream_t ptr, text_buffer_t *tex, document_t *doc); | ||||
| 
 | ||||
| #endif | ||||
							
								
								
									
										41
									
								
								third-party/libscan/libscan/wpd/wpd.c
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										41
									
								
								third-party/libscan/libscan/wpd/wpd.c
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,41 @@ | ||||
| #include "wpd.h" | ||||
| #include "libwpd_c_api.h" | ||||
| 
 | ||||
| scan_code_t parse_wpd(scan_wpd_ctx_t *ctx, vfile_t *f, document_t *doc) { | ||||
| 
 | ||||
|     size_t buf_len; | ||||
|     void *buf = read_all(f, &buf_len); | ||||
| 
 | ||||
|     void *stream = wpd_memory_stream_create(buf, buf_len); | ||||
|     wpd_confidence_t conf = wpd_is_file_format_supported(stream); | ||||
| 
 | ||||
|     if (conf == C_WPD_CONFIDENCE_SUPPORTED_ENCRYPTION || conf == C_WPD_CONFIDENCE_UNSUPPORTED_ENCRYPTION) { | ||||
|         CTX_LOG_DEBUGF("wpd.c", "File is encrypted! Password-protected WPD files are not supported yet (conf=%d)", conf) | ||||
|         wpd_memory_stream_destroy(stream); | ||||
|         free(buf); | ||||
|         return SCAN_ERR_READ; | ||||
|     } | ||||
| 
 | ||||
|     if (conf != C_WPD_CONFIDENCE_EXCELLENT) { | ||||
|         CTX_LOG_ERRORF("wpd.c", "Unsupported file format! [%s] (conf=%d)", doc->filepath, conf) | ||||
|         wpd_memory_stream_destroy(stream); | ||||
|         free(buf); | ||||
|         return SCAN_ERR_READ; | ||||
|     } | ||||
| 
 | ||||
|     text_buffer_t tex = text_buffer_create(-1); | ||||
|     wpd_result_t res = wpd_parse(stream, &tex, doc); | ||||
| 
 | ||||
|     if (res != C_WPD_OK) { | ||||
|         CTX_LOG_ERRORF("wpd.c", "Error while parsing WPD file [%s] (%d)", | ||||
|                        doc->filepath, res) | ||||
|     } | ||||
| 
 | ||||
|     if (tex.dyn_buffer.cur != 0) { | ||||
|         APPEND_STR_META(doc, MetaContent, tex.dyn_buffer.buf) | ||||
|     } | ||||
| 
 | ||||
|     text_buffer_destroy(&tex); | ||||
|     wpd_memory_stream_destroy(stream); | ||||
|     free(buf); | ||||
| } | ||||
							
								
								
									
										23
									
								
								third-party/libscan/libscan/wpd/wpd.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										23
									
								
								third-party/libscan/libscan/wpd/wpd.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,23 @@ | ||||
| #ifndef SIST2_WPD_H | ||||
| #define SIST2_WPD_H | ||||
| 
 | ||||
| #include "../scan.h" | ||||
| #include "../util.h" | ||||
| 
 | ||||
| typedef struct { | ||||
|     long content_size; | ||||
| 
 | ||||
|     log_callback_t log; | ||||
|     logf_callback_t logf; | ||||
| 
 | ||||
|     unsigned int wpd_mime; | ||||
| } scan_wpd_ctx_t; | ||||
| 
 | ||||
| scan_code_t parse_wpd(scan_wpd_ctx_t *ctx, vfile_t *f, document_t *doc); | ||||
| 
 | ||||
| __always_inline | ||||
| static int is_wpd(scan_wpd_ctx_t *ctx, unsigned int mime) { | ||||
|     return mime == ctx->wpd_mime; | ||||
| } | ||||
| 
 | ||||
| #endif | ||||
							
								
								
									
										1169
									
								
								third-party/libscan/test/main.cpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										1169
									
								
								third-party/libscan/test/main.cpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										114
									
								
								third-party/libscan/test/test_util.cpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										114
									
								
								third-party/libscan/test/test_util.cpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,114 @@ | ||||
| #include "test_util.h" | ||||
| #include <gtest/gtest.h> | ||||
| 
 | ||||
| #include <unistd.h> | ||||
| #include <fcntl.h> | ||||
| 
 | ||||
| #define FILE_NOT_FOUND_ERR "Could not file, did you clone the test files repo?" | ||||
| 
 | ||||
| 
 | ||||
| int fs_read(struct vfile *f, void *buf, size_t size) { | ||||
| 
 | ||||
|     if (f->fd == -1) { | ||||
|         f->fd = open(f->filepath, O_RDONLY); | ||||
|         if (f->fd == -1) { | ||||
|             return -1; | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     return (int) read(f->fd, buf, size); | ||||
| } | ||||
| 
 | ||||
| //Note: No out of bounds check
 | ||||
| int mem_read(vfile_t *f, void *buf, size_t size) { | ||||
|     memcpy(buf, f->_test_data, size); | ||||
|     f->_test_data = (char *) f->_test_data + size; | ||||
|     return 0; | ||||
| } | ||||
| 
 | ||||
| void fs_close(vfile_t *f) { | ||||
|     if (f->fd != -1) { | ||||
|         close(f->fd); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| void load_doc_file(const char *filepath, vfile_t *f, document_t *doc) { | ||||
|     doc->meta_head = nullptr; | ||||
|     doc->meta_tail = nullptr; | ||||
|     load_file(filepath, f); | ||||
| } | ||||
| 
 | ||||
| void load_doc_mem(void *mem, size_t mem_len, vfile_t *f, document_t *doc) { | ||||
|     doc->meta_head = nullptr; | ||||
|     doc->meta_tail = nullptr; | ||||
|     load_mem(mem, mem_len, f); | ||||
| } | ||||
| 
 | ||||
| void cleanup(document_t *doc, vfile_t *f) { | ||||
|     destroy_doc(doc); | ||||
|     CLOSE_FILE((*f)) | ||||
| } | ||||
| 
 | ||||
| void load_file(const char *filepath, vfile_t *f) { | ||||
|     stat(filepath, &f->info); | ||||
|     f->fd = open(filepath, O_RDONLY); | ||||
| 
 | ||||
|     if (f->fd == -1) { | ||||
|         FAIL() << FILE_NOT_FOUND_ERR; | ||||
|     } | ||||
| 
 | ||||
|     f->filepath = filepath; | ||||
|     f->read = fs_read; | ||||
|     f->close = fs_close; | ||||
|     f->is_fs_file = TRUE; | ||||
|     f->calculate_checksum = TRUE; | ||||
|     f->has_checksum = FALSE; | ||||
| } | ||||
| 
 | ||||
| void load_mem(void *mem, size_t size, vfile_t *f) { | ||||
|     f->filepath = "_mem_"; | ||||
|     f->_test_data = mem; | ||||
|     f->info.st_size = (int) size; | ||||
|     f->read = mem_read; | ||||
|     f->close = nullptr; | ||||
|     f->is_fs_file = TRUE; | ||||
| } | ||||
| 
 | ||||
| meta_line_t *get_meta(document_t *doc, metakey key) { | ||||
|     return get_meta_from(doc->meta_head, key); | ||||
| } | ||||
| 
 | ||||
| meta_line_t *get_meta_from(meta_line_t *meta, metakey key) { | ||||
|     while (meta != nullptr) { | ||||
|         if (meta->key == key) { | ||||
|             return meta; | ||||
|         } | ||||
|         meta = meta->next; | ||||
|     } | ||||
|     return nullptr; | ||||
| } | ||||
| 
 | ||||
| void destroy_doc(document_t *doc) { | ||||
|     meta_line_t *meta = doc->meta_head; | ||||
|     while (meta != nullptr) { | ||||
|         meta_line_t *tmp = meta; | ||||
|         meta = tmp->next; | ||||
|         free(tmp); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| void fuzz_buffer(char *buf, size_t *buf_len, int width, int n, int trunc_p) { | ||||
|     for (int i = 0; i < n; i++) { | ||||
| 
 | ||||
|         size_t offset = rand() % (*buf_len - width - 1); | ||||
| 
 | ||||
|         if (rand() % 100 < trunc_p) { | ||||
|             *buf_len = MAX(offset, 1000); | ||||
|             continue; | ||||
|         } | ||||
| 
 | ||||
|         for (int disp = 0; disp < width; disp++) { | ||||
|             buf[offset + disp] = (int8_t) rand(); | ||||
|         } | ||||
|     } | ||||
| } | ||||
							
								
								
									
										46
									
								
								third-party/libscan/test/test_util.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										46
									
								
								third-party/libscan/test/test_util.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,46 @@ | ||||
| #ifndef SCAN_TEST_UTIL_H | ||||
| #define SCAN_TEST_UTIL_H | ||||
| 
 | ||||
| #include "../libscan/scan.h" | ||||
| #include <fcntl.h> | ||||
| #include <unistd.h> | ||||
| 
 | ||||
| void load_file(const char *filepath, vfile_t *f); | ||||
| void load_mem(void *mem, size_t size, vfile_t *f); | ||||
| void load_doc_mem(void *mem, size_t mem_len, vfile_t *f, document_t *doc); | ||||
| void load_doc_file(const char *filepath, vfile_t *f, document_t *doc); | ||||
| void cleanup(document_t *doc, vfile_t *f); | ||||
| 
 | ||||
| static void noop_logf(const char *filepath, int level, char *format, ...) { | ||||
|     // noop
 | ||||
| } | ||||
| 
 | ||||
| static void noop_log(const char *filepath, int level, char *str) { | ||||
|     // noop
 | ||||
| } | ||||
| 
 | ||||
| static size_t store_size = 0; | ||||
| 
 | ||||
| static void counter_store(char* key, size_t key_len, char *value, size_t value_len) { | ||||
|     store_size += value_len; | ||||
| //    char id[37];
 | ||||
| //    char tmp[PATH_MAX];
 | ||||
| //    uuid_unparse(reinterpret_cast<const unsigned char *>(key), id);
 | ||||
| //    sprintf(tmp, "%s.jpeg", id);
 | ||||
| //    int fd = open(tmp, O_TRUNC|O_WRONLY|O_CREAT, 0777);
 | ||||
| //    write(fd, value, value_len);
 | ||||
| //    close(fd);
 | ||||
| } | ||||
| 
 | ||||
| meta_line_t *get_meta(document_t *doc, metakey key); | ||||
| 
 | ||||
| meta_line_t *get_meta_from(meta_line_t *meta, metakey key); | ||||
| 
 | ||||
| 
 | ||||
| #define CLOSE_FILE(f) if (f.close != NULL) {f.close(&f);}; | ||||
| 
 | ||||
| void destroy_doc(document_t *doc); | ||||
| 
 | ||||
| void fuzz_buffer(char *buf, size_t *buf_len, int width, int n, int trunc_p); | ||||
| 
 | ||||
| #endif | ||||
							
								
								
									
										1
									
								
								third-party/libscan/third-party/antiword
									
									
									
									
										vendored
									
									
										Submodule
									
								
							
							
								
								
								
								
								
								
									
									
								
							
						
						
									
										1
									
								
								third-party/libscan/third-party/antiword
									
									
									
									
										vendored
									
									
										Submodule
									
								
							| @ -0,0 +1 @@ | ||||
| Subproject commit 62ae66db99e9dd88dfa31999f516f71bb8bdc8b2 | ||||
							
								
								
									
										1
									
								
								third-party/libscan/third-party/utf8.h
									
									
									
									
										vendored
									
									
										Submodule
									
								
							
							
								
								
								
								
								
								
									
									
								
							
						
						
									
										1
									
								
								third-party/libscan/third-party/utf8.h
									
									
									
									
										vendored
									
									
										Submodule
									
								
							| @ -0,0 +1 @@ | ||||
| Subproject commit 146be69f88575d753317d8ef13b16f80e0656fc7 | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user