mirror of
				https://github.com/simon987/sist2.git
				synced 2025-11-03 17:26:52 +00:00 
			
		
		
		
	Compare commits
	
		
			No commits in common. "bb91139ffbf9477310f650589ad4fcdc18dc3702" and "cd71551a225d40ba675994295ab21cbb0ffbae40" have entirely different histories.
		
	
	
		
			bb91139ffb
			...
			cd71551a22
		
	
		
@ -10,7 +10,7 @@ steps:
 | 
			
		||||
  - name: build
 | 
			
		||||
    image: simon987/sist2-build
 | 
			
		||||
    commands:
 | 
			
		||||
      - ./scripts/build.sh
 | 
			
		||||
      - ./ci/build.sh
 | 
			
		||||
  - name: docker
 | 
			
		||||
    image: plugins/docker
 | 
			
		||||
    settings:
 | 
			
		||||
@ -55,7 +55,7 @@ steps:
 | 
			
		||||
  - name: build
 | 
			
		||||
    image: simon987/sist2-build-arm64
 | 
			
		||||
    commands:
 | 
			
		||||
      - ./scripts/build_arm64.sh
 | 
			
		||||
      - ./ci/build_arm64.sh
 | 
			
		||||
  - name: scp files
 | 
			
		||||
    image: appleboy/drone-scp
 | 
			
		||||
    settings:
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										4
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										4
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							@ -10,13 +10,13 @@ Makefile
 | 
			
		||||
LOG
 | 
			
		||||
sist2*
 | 
			
		||||
!sist2-vue/
 | 
			
		||||
*.sist2/
 | 
			
		||||
index.sist2/
 | 
			
		||||
bundle*.css
 | 
			
		||||
bundle.js
 | 
			
		||||
*.a
 | 
			
		||||
vgcore.*
 | 
			
		||||
build/
 | 
			
		||||
third-party/argparse
 | 
			
		||||
third-party/
 | 
			
		||||
*.idx/
 | 
			
		||||
VERSION
 | 
			
		||||
git_hash.h
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										8
									
								
								.gitmodules
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										8
									
								
								.gitmodules
									
									
									
									
										vendored
									
									
								
							@ -3,10 +3,4 @@
 | 
			
		||||
	url = https://github.com/simon987/libscan
 | 
			
		||||
[submodule "third-party/argparse"]
 | 
			
		||||
	path = third-party/argparse
 | 
			
		||||
	url = https://github.com/simon987/argparse
 | 
			
		||||
[submodule "third-party/libscan/third-party/utf8.h"]
 | 
			
		||||
	path = third-party/libscan/third-party/utf8.h
 | 
			
		||||
	url = https://github.com/sheredom/utf8.h
 | 
			
		||||
[submodule "third-party/libscan/third-party/antiword"]
 | 
			
		||||
	path = third-party/libscan/third-party/antiword
 | 
			
		||||
	url = https://github.com/simon987/antiword
 | 
			
		||||
	url = https://github.com/cofyc/argparse
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										10
									
								
								Dockerfile
									
									
									
									
									
								
							
							
						
						
									
										10
									
								
								Dockerfile
									
									
									
									
									
								
							@ -6,10 +6,12 @@ COPY . .
 | 
			
		||||
RUN cmake -DSIST_PLATFORM=x64_linux -DSIST_DEBUG=off -DBUILD_TESTS=off -DCMAKE_TOOLCHAIN_FILE=/vcpkg/scripts/buildsystems/vcpkg.cmake .
 | 
			
		||||
RUN make -j$(nproc)
 | 
			
		||||
RUN strip sist2
 | 
			
		||||
RUN ls -lh
 | 
			
		||||
RUN ls -lh sist2-vue/dist/
 | 
			
		||||
 | 
			
		||||
FROM ubuntu:21.10
 | 
			
		||||
FROM ubuntu:20.10
 | 
			
		||||
 | 
			
		||||
RUN apt update && apt install -y curl libasan5 && rm -rf /var/lib/apt/lists/*
 | 
			
		||||
RUN apt update && apt install -y curl libasan5
 | 
			
		||||
 | 
			
		||||
RUN mkdir -p /usr/share/tessdata && \
 | 
			
		||||
    cd /usr/share/tessdata/ && \
 | 
			
		||||
@ -20,9 +22,9 @@ RUN mkdir -p /usr/share/tessdata && \
 | 
			
		||||
    curl -o /usr/share/tessdata/rus.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/rus.traineddata &&\
 | 
			
		||||
    curl -o /usr/share/tessdata/spa.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/spa.traineddata
 | 
			
		||||
 | 
			
		||||
ENTRYPOINT ["/root/sist2"]
 | 
			
		||||
COPY --from=build /build/sist2 /root/sist2
 | 
			
		||||
 | 
			
		||||
ENV LANG C.UTF-8
 | 
			
		||||
ENV LC_ALL C.UTF-8
 | 
			
		||||
 | 
			
		||||
COPY --from=build /build/sist2 /root/sist2
 | 
			
		||||
ENTRYPOINT ["/root/sist2"]
 | 
			
		||||
 | 
			
		||||
@ -7,9 +7,9 @@ RUN cmake -DSIST_PLATFORM=arm64_linux -DSIST_DEBUG=off -DBUILD_TESTS=off -DCMAKE
 | 
			
		||||
RUN make -j$(nproc)
 | 
			
		||||
RUN strip sist2
 | 
			
		||||
 | 
			
		||||
FROM --platform="linux/arm64/v8" ubuntu:21.10
 | 
			
		||||
FROM ubuntu:20.10
 | 
			
		||||
 | 
			
		||||
RUN apt update && apt install -y curl libasan5 && rm -rf /var/lib/apt/lists/*
 | 
			
		||||
RUN apt update && apt install -y curl libasan5
 | 
			
		||||
 | 
			
		||||
RUN mkdir -p /usr/share/tessdata && \
 | 
			
		||||
    cd /usr/share/tessdata/ && \
 | 
			
		||||
@ -20,9 +20,9 @@ RUN mkdir -p /usr/share/tessdata && \
 | 
			
		||||
    curl -o /usr/share/tessdata/rus.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/rus.traineddata &&\
 | 
			
		||||
    curl -o /usr/share/tessdata/spa.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/spa.traineddata
 | 
			
		||||
 | 
			
		||||
COPY --from=build /build/sist2 /root/sist2
 | 
			
		||||
 | 
			
		||||
ENV LANG C.UTF-8
 | 
			
		||||
ENV LC_ALL C.UTF-8
 | 
			
		||||
 | 
			
		||||
ENTRYPOINT ["/root/sist2"]
 | 
			
		||||
 | 
			
		||||
COPY --from=build /build/sist2 /root/sist2
 | 
			
		||||
ENTRYPOINT ["/root/sist2"]
 | 
			
		||||
@ -2,7 +2,7 @@
 | 
			
		||||
[](https://www.codefactor.io/repository/github/simon987/sist2)
 | 
			
		||||
[](https://files.simon987.net/.gate/sist2/simon987_sist2/)
 | 
			
		||||
 | 
			
		||||
**Demo**: [sist2.simon987.net](https://sist2.simon987.net/)
 | 
			
		||||
**Demo**: [sist2.simon987.net](https://sist2.simon987.net/?i=Demo%20files)
 | 
			
		||||
 | 
			
		||||
# sist2
 | 
			
		||||
 | 
			
		||||
@ -33,11 +33,12 @@ sist2 (Simple incremental search tool)
 | 
			
		||||
 | 
			
		||||
## Getting Started
 | 
			
		||||
 | 
			
		||||
1. Have an Elasticsearch (>= 6.8.X, ideally >=7.14.0) instance running
 | 
			
		||||
1. Have an Elasticsearch (>= 6.X.X) instance running
 | 
			
		||||
    1. Download [from official website](https://www.elastic.co/downloads/elasticsearch)
 | 
			
		||||
    1. *(or)* Run using docker:
 | 
			
		||||
        ```bash
 | 
			
		||||
        docker run -d -p 9200:9200 -e "discovery.type=single-node" elasticsearch:7.14.0
 | 
			
		||||
       docker run -d --name es1 --net sist2_net -p 9200:9200 \
 | 
			
		||||
            -e "discovery.type=single-node" elasticsearch:7.14.0
 | 
			
		||||
        ```
 | 
			
		||||
    1. *(or)* Run using docker-compose:
 | 
			
		||||
        ```yaml
 | 
			
		||||
@ -51,7 +52,7 @@ sist2 (Simple incremental search tool)
 | 
			
		||||
    1. Download the [latest sist2 release](https://github.com/simon987/sist2/releases) *
 | 
			
		||||
    1. *(or)* Download a [development snapshot](https://files.simon987.net/.gate/sist2/simon987_sist2/) *(Not
 | 
			
		||||
       recommended!)*
 | 
			
		||||
    1. *(or)* `docker pull simon987/sist2:2.11.4-x64-linux`
 | 
			
		||||
    1. *(or)* `docker pull simon987/sist2:2.11.3-x64-linux`
 | 
			
		||||
 | 
			
		||||
1. See [Usage guide](docs/USAGE.md)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -14,7 +14,6 @@
 | 
			
		||||
    * [examples](#web-examples)
 | 
			
		||||
    * [rewrite_url](#rewrite_url)
 | 
			
		||||
    * [link to specific indices](#link-to-specific-indices)
 | 
			
		||||
* [elasticsearch](#elasticsearch)
 | 
			
		||||
* [exec-script](#exec-script)
 | 
			
		||||
* [tagging](#tagging)
 | 
			
		||||
* [sidecar files](#sidecar-files)
 | 
			
		||||
@ -267,20 +266,9 @@ sist2 web index1 index2 index3 index4
 | 
			
		||||
When the `rewrite_url` field is not empty, the web module ignores the `root`
 | 
			
		||||
field and will return a HTTP redirect to `<rewrite_url><path>/<name><extension>`
 | 
			
		||||
instead of serving the file from disk. 
 | 
			
		||||
Both the `root` and `rewrite_url` fields are safe to manually modify from the 
 | 
			
		||||
Both the `root` and `rewrite_url` fields are safe to manually modify from the 
 | 
			
		||||
`descriptor.json` file.
 | 
			
		||||
 | 
			
		||||
# Elasticsearch
 | 
			
		||||
 | 
			
		||||
Elasticsearch versions >=6.8.0, <8.0.0 are supported by sist2. 
 | 
			
		||||
 | 
			
		||||
Using a version >=7.14.0 is recommended to enable the following features:
 | 
			
		||||
 | 
			
		||||
- Bug fix for large documents (See #198)
 | 
			
		||||
 | 
			
		||||
When using a legacy version of ES, a notice will be displayed next to the sist2 version in the web UI.
 | 
			
		||||
If you don't care about the features above, you can ignore it or disable it in the configuration page.
 | 
			
		||||
 | 
			
		||||
## exec-script
 | 
			
		||||
 | 
			
		||||
The `exec-script` command is used to execute a user script for an index that has already been imported to Elasticsearch with the `index` command. Note that the documents will not be reset to their default state before each execution as the `index` command does: if you make undesired changes to the documents by accident, you will need to run `index` again to revert to the original state.
 | 
			
		||||
 | 
			
		||||
@ -78,7 +78,6 @@
 | 
			
		||||
    "name": {
 | 
			
		||||
      "analyzer": "content_analyzer",
 | 
			
		||||
      "type": "text",
 | 
			
		||||
      "fielddata": true,
 | 
			
		||||
      "fields": {
 | 
			
		||||
        "nGram": {
 | 
			
		||||
          "type": "text",
 | 
			
		||||
 | 
			
		||||
@ -1,58 +0,0 @@
 | 
			
		||||
{
 | 
			
		||||
  "index": {
 | 
			
		||||
    "refresh_interval": "30s",
 | 
			
		||||
    "codec": "best_compression",
 | 
			
		||||
    "number_of_replicas": 0
 | 
			
		||||
  },
 | 
			
		||||
  "analysis": {
 | 
			
		||||
    "tokenizer": {
 | 
			
		||||
      "path_tokenizer": {
 | 
			
		||||
        "type": "path_hierarchy",
 | 
			
		||||
        "delimiter": "/"
 | 
			
		||||
      },
 | 
			
		||||
      "tag_tokenizer": {
 | 
			
		||||
        "type": "path_hierarchy",
 | 
			
		||||
        "delimiter": "."
 | 
			
		||||
      },
 | 
			
		||||
      "my_nGram_tokenizer": {
 | 
			
		||||
        "type": "nGram",
 | 
			
		||||
        "min_gram": 3,
 | 
			
		||||
        "max_gram": 3
 | 
			
		||||
      }
 | 
			
		||||
    },
 | 
			
		||||
    "analyzer": {
 | 
			
		||||
      "path_analyzer": {
 | 
			
		||||
        "tokenizer": "path_tokenizer",
 | 
			
		||||
        "filter": [
 | 
			
		||||
          "lowercase"
 | 
			
		||||
        ]
 | 
			
		||||
      },
 | 
			
		||||
      "tag_analyzer": {
 | 
			
		||||
        "tokenizer": "tag_tokenizer",
 | 
			
		||||
        "filter": [
 | 
			
		||||
          "lowercase"
 | 
			
		||||
        ]
 | 
			
		||||
      },
 | 
			
		||||
      "case_insensitive_kw_analyzer": {
 | 
			
		||||
        "tokenizer": "keyword",
 | 
			
		||||
        "filter": [
 | 
			
		||||
          "lowercase"
 | 
			
		||||
        ]
 | 
			
		||||
      },
 | 
			
		||||
      "my_nGram": {
 | 
			
		||||
        "tokenizer": "my_nGram_tokenizer",
 | 
			
		||||
        "filter": [
 | 
			
		||||
          "lowercase",
 | 
			
		||||
          "asciifolding"
 | 
			
		||||
        ]
 | 
			
		||||
      },
 | 
			
		||||
      "content_analyzer": {
 | 
			
		||||
        "tokenizer": "standard",
 | 
			
		||||
        "filter": [
 | 
			
		||||
          "lowercase",
 | 
			
		||||
          "asciifolding"
 | 
			
		||||
        ]
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
@ -3,7 +3,6 @@ import json
 | 
			
		||||
files = [
 | 
			
		||||
    "schema/mappings.json",
 | 
			
		||||
    "schema/settings.json",
 | 
			
		||||
    "schema/settings_legacy.json",
 | 
			
		||||
    "schema/pipeline.json",
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										2
									
								
								sist2-vue/dist/css/index.css
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								sist2-vue/dist/css/index.css
									
									
									
									
										vendored
									
									
								
							
										
											
												File diff suppressed because one or more lines are too long
											
										
									
								
							
							
								
								
									
										2
									
								
								sist2-vue/dist/js/index.js
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								sist2-vue/dist/js/index.js
									
									
									
									
										vendored
									
									
								
							
										
											
												File diff suppressed because one or more lines are too long
											
										
									
								
							@ -51,7 +51,6 @@ export interface EsHit {
 | 
			
		||||
        duration: number
 | 
			
		||||
        tag: string[]
 | 
			
		||||
        checksum: string
 | 
			
		||||
        thumbnail: string
 | 
			
		||||
    }
 | 
			
		||||
    _props: {
 | 
			
		||||
        isSubDocument: boolean
 | 
			
		||||
@ -62,8 +61,6 @@ export interface EsHit {
 | 
			
		||||
        isPlayableImage: boolean
 | 
			
		||||
        isAudio: boolean
 | 
			
		||||
        hasThumbnail: boolean
 | 
			
		||||
        tnW: number
 | 
			
		||||
        tnH: number
 | 
			
		||||
    }
 | 
			
		||||
    highlight: {
 | 
			
		||||
        name: string[] | undefined,
 | 
			
		||||
@ -134,8 +131,6 @@ class Sist2Api {
 | 
			
		||||
 | 
			
		||||
        if ("thumbnail" in hit._source) {
 | 
			
		||||
            hit._props.hasThumbnail = true;
 | 
			
		||||
            hit._props.tnW = Number(hit._source.thumbnail.split(",")[0]);
 | 
			
		||||
            hit._props.tnH = Number(hit._source.thumbnail.split(",")[1]);
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        switch (mimeCategory) {
 | 
			
		||||
 | 
			
		||||
@ -43,20 +43,6 @@ const SORT_MODES = {
 | 
			
		||||
            {_tie: {order: "asc"}}
 | 
			
		||||
        ],
 | 
			
		||||
        key: (hit: EsHit) => hit._source.size
 | 
			
		||||
    },
 | 
			
		||||
    nameAsc: {
 | 
			
		||||
        mode: [
 | 
			
		||||
            {name: {order: "asc"}},
 | 
			
		||||
            {_tie: {order: "asc"}}
 | 
			
		||||
        ],
 | 
			
		||||
        key: (hit: EsHit) => hit._source.name
 | 
			
		||||
    },
 | 
			
		||||
    nameDesc: {
 | 
			
		||||
        mode: [
 | 
			
		||||
            {name: {order: "desc"}},
 | 
			
		||||
            {_tie: {order: "asc"}}
 | 
			
		||||
        ],
 | 
			
		||||
        key: (hit: EsHit) => hit._source.name
 | 
			
		||||
    }
 | 
			
		||||
} as any;
 | 
			
		||||
 | 
			
		||||
@ -87,8 +73,6 @@ class Sist2Query {
 | 
			
		||||
        const selectedMimeTypes = getters.selectedMimeTypes;
 | 
			
		||||
        const selectedTags = getters.selectedTags;
 | 
			
		||||
 | 
			
		||||
        const legacyES = store.state.sist2Info.esVersionLegacy;
 | 
			
		||||
 | 
			
		||||
        const filters = [
 | 
			
		||||
            {terms: {index: selectedIndexIds}}
 | 
			
		||||
        ] as any[];
 | 
			
		||||
@ -203,13 +187,9 @@ class Sist2Query {
 | 
			
		||||
                    "name.nGram": {},
 | 
			
		||||
                    "content.nGram": {},
 | 
			
		||||
                    font_name: {},
 | 
			
		||||
                }
 | 
			
		||||
                },
 | 
			
		||||
                max_analyzed_offset: 9_999_999
 | 
			
		||||
            };
 | 
			
		||||
 | 
			
		||||
            if (!legacyES) {
 | 
			
		||||
                q.highlight.max_analyzed_offset = 9_999_999;
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            if (getters.optSearchInPath) {
 | 
			
		||||
                q.highlight.fields["path.text"] = {};
 | 
			
		||||
                q.highlight.fields["path.nGram"] = {};
 | 
			
		||||
 | 
			
		||||
@ -5,6 +5,7 @@
 | 
			
		||||
 | 
			
		||||
    <b-card-body>
 | 
			
		||||
 | 
			
		||||
      <!-- TODO: ES connectivity, Link to GH page -->
 | 
			
		||||
      <b-table :items="tableItems" small borderless responsive="md" thead-class="hidden" class="mb-0"></b-table>
 | 
			
		||||
 | 
			
		||||
      <hr />
 | 
			
		||||
@ -31,9 +32,6 @@ export default {
 | 
			
		||||
        {key: "esIndex", value: this.$store.state.sist2Info.esIndex},
 | 
			
		||||
        {key: "tagline", value: this.$store.state.sist2Info.tagline},
 | 
			
		||||
        {key: "dev", value: this.$store.state.sist2Info.dev},
 | 
			
		||||
        {key: "esVersion", value: this.$store.state.sist2Info.esVersion},
 | 
			
		||||
        {key: "esVersionSupported", value: this.$store.state.sist2Info.esVersionSupported},
 | 
			
		||||
        {key: "esVersionLegacy", value: this.$store.state.sist2Info.esVersionLegacy},
 | 
			
		||||
      ]
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
@ -15,15 +15,11 @@
 | 
			
		||||
          <span class="badge badge-resolution">{{ humanTime(doc._source.duration) }}</span>
 | 
			
		||||
        </div>
 | 
			
		||||
 | 
			
		||||
        <div
 | 
			
		||||
            v-if="doc._props.isImage && !hover && doc._props.tnW / doc._props.tnH < 5"
 | 
			
		||||
            class="card-img-overlay"
 | 
			
		||||
            :class="{'small-badge': smallBadge}">
 | 
			
		||||
        <div v-if="doc._props.isImage && !hover" class="card-img-overlay" :class="{'small-badge': smallBadge}">
 | 
			
		||||
          <span class="badge badge-resolution">{{ `${doc._source.width}x${doc._source.height}` }}</span>
 | 
			
		||||
        </div>
 | 
			
		||||
 | 
			
		||||
        <div v-if="(doc._props.isVideo || doc._props.isGif) && doc._source.duration > 0 && !hover"
 | 
			
		||||
             class="card-img-overlay"
 | 
			
		||||
        <div v-if="(doc._props.isVideo || doc._props.isGif) && doc._source.duration > 0 && !hover" class="card-img-overlay"
 | 
			
		||||
             :class="{'small-badge': smallBadge}">
 | 
			
		||||
          <span class="badge badge-resolution">{{ humanTime(doc._source.duration) }}</span>
 | 
			
		||||
        </div>
 | 
			
		||||
@ -43,8 +39,7 @@
 | 
			
		||||
      </div>
 | 
			
		||||
 | 
			
		||||
      <!-- Audio player-->
 | 
			
		||||
      <audio v-if="doc._props.isAudio" ref="audio" preload="none" class="audio-fit fit" controls
 | 
			
		||||
             :type="doc._source.mime"
 | 
			
		||||
      <audio v-if="doc._props.isAudio" ref="audio" preload="none" class="audio-fit fit" controls :type="doc._source.mime"
 | 
			
		||||
             :src="`f/${doc._id}`"
 | 
			
		||||
             @play="onAudioPlay()"></audio>
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -7,27 +7,11 @@
 | 
			
		||||
        value-field="id"></b-form-select>
 | 
			
		||||
  </div>
 | 
			
		||||
  <div v-else>
 | 
			
		||||
 | 
			
		||||
    <div class="d-flex justify-content-between align-content-center">
 | 
			
		||||
      <span>
 | 
			
		||||
        {{ selectedIndices.length }}
 | 
			
		||||
        {{ selectedIndices.length === 1 ? $t("indexPicker.selectedIndex") : $t("indexPicker.selectedIndices") }}
 | 
			
		||||
      </span>
 | 
			
		||||
 | 
			
		||||
      <div>
 | 
			
		||||
        <b-button variant="link" @click="selectAll()"> {{ $t("indexPicker.selectAll") }}</b-button>
 | 
			
		||||
        <b-button variant="link" @click="selectNone()"> {{ $t("indexPicker.selectNone") }}</b-button>
 | 
			
		||||
      </div>
 | 
			
		||||
    </div>
 | 
			
		||||
 | 
			
		||||
    <b-list-group id="index-picker-desktop" class="unselectable">
 | 
			
		||||
    <b-list-group id="index-picker-desktop">
 | 
			
		||||
      <b-list-group-item
 | 
			
		||||
          v-for="idx in indices"
 | 
			
		||||
          @click="toggleIndex(idx, $event)"
 | 
			
		||||
          @click.shift="shiftClick(idx, $event)"
 | 
			
		||||
          class="d-flex justify-content-between align-items-center list-group-item-action pointer"
 | 
			
		||||
          :class="{active: lastClickIndex === idx}"
 | 
			
		||||
      >
 | 
			
		||||
          @click="toggleIndex(idx)"
 | 
			
		||||
          class="d-flex justify-content-between align-items-center list-group-item-action pointer">
 | 
			
		||||
        <div class="d-flex">
 | 
			
		||||
          <b-checkbox @change="toggleIndex(idx)" :checked="isSelected(idx)"></b-checkbox>
 | 
			
		||||
          {{ idx.name }}
 | 
			
		||||
@ -52,7 +36,6 @@ export default Vue.extend({
 | 
			
		||||
  data() {
 | 
			
		||||
    return {
 | 
			
		||||
      loading: true,
 | 
			
		||||
      lastClickIndex: null
 | 
			
		||||
    }
 | 
			
		||||
  },
 | 
			
		||||
  computed: {
 | 
			
		||||
@ -70,50 +53,13 @@ export default Vue.extend({
 | 
			
		||||
    ...mapActions({
 | 
			
		||||
      setSelectedIndices: "setSelectedIndices"
 | 
			
		||||
    }),
 | 
			
		||||
    shiftClick(index, e) {
 | 
			
		||||
      if (this.lastClickIndex === null) {
 | 
			
		||||
        return;
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      const select = this.isSelected(this.lastClickIndex);
 | 
			
		||||
 | 
			
		||||
      let leftBoundary = this.indices.indexOf(this.lastClickIndex);
 | 
			
		||||
      let rightBoundary = this.indices.indexOf(index);
 | 
			
		||||
 | 
			
		||||
      if (rightBoundary < leftBoundary) {
 | 
			
		||||
        let tmp = leftBoundary;
 | 
			
		||||
        leftBoundary = rightBoundary;
 | 
			
		||||
        rightBoundary = tmp;
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      for (let i = leftBoundary; i <= rightBoundary; i++) {
 | 
			
		||||
        if (select) {
 | 
			
		||||
          if (!this.isSelected(this.indices[i])) {
 | 
			
		||||
            this.setSelectedIndices([this.indices[i], ...this.selectedIndices]);
 | 
			
		||||
          }
 | 
			
		||||
        } else {
 | 
			
		||||
          this.setSelectedIndices(this.selectedIndices.filter(idx => idx !== this.indices[i]));
 | 
			
		||||
        }
 | 
			
		||||
      }
 | 
			
		||||
    },
 | 
			
		||||
    selectAll() {
 | 
			
		||||
      this.setSelectedIndices(this.indices);
 | 
			
		||||
    },
 | 
			
		||||
    selectNone() {
 | 
			
		||||
      this.setSelectedIndices([]);
 | 
			
		||||
    },
 | 
			
		||||
    onSelect(value) {
 | 
			
		||||
      this.setSelectedIndices(this.indices.filter(idx => value.includes(idx.id)));
 | 
			
		||||
    },
 | 
			
		||||
    formatIdxDate(timestamp: number): string {
 | 
			
		||||
      return format(new Date(timestamp * 1000), "yyyy-MM-dd");
 | 
			
		||||
    },
 | 
			
		||||
    toggleIndex(index, e) {
 | 
			
		||||
      if (e.shiftKey) {
 | 
			
		||||
        return;
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      this.lastClickIndex = index;
 | 
			
		||||
    toggleIndex(index) {
 | 
			
		||||
      if (this.isSelected(index)) {
 | 
			
		||||
        this.setSelectedIndices(this.selectedIndices.filter(idx => idx.id != index.id));
 | 
			
		||||
      } else {
 | 
			
		||||
@ -146,21 +92,4 @@ export default Vue.extend({
 | 
			
		||||
  overflow-y: auto;
 | 
			
		||||
  max-height: 132px;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
.btn-link:focus {
 | 
			
		||||
  box-shadow: none;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
.unselectable {
 | 
			
		||||
  user-select: none;
 | 
			
		||||
  -ms-user-select: none;
 | 
			
		||||
  -moz-user-select: none;
 | 
			
		||||
  -webkit-user-select: none;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
.list-group-item.active {
 | 
			
		||||
  z-index: 2;
 | 
			
		||||
  background-color: inherit;
 | 
			
		||||
  color: inherit;
 | 
			
		||||
}
 | 
			
		||||
</style>
 | 
			
		||||
@ -21,9 +21,6 @@ export default {
 | 
			
		||||
      if (mutation.type === "setUiMimeMap") {
 | 
			
		||||
        const mimeMap = mutation.payload.slice();
 | 
			
		||||
 | 
			
		||||
        const elem = document.getElementById("mimeTree");
 | 
			
		||||
        console.log(elem);
 | 
			
		||||
 | 
			
		||||
        this.mimeTree = new InspireTree({
 | 
			
		||||
          selection: {
 | 
			
		||||
            mode: 'checkbox'
 | 
			
		||||
 | 
			
		||||
@ -8,8 +8,7 @@
 | 
			
		||||
    </b-navbar-brand>
 | 
			
		||||
 | 
			
		||||
    <span class="badge badge-pill version" v-if="$store && $store.state.sist2Info">
 | 
			
		||||
      v{{ sist2Version() }}<span v-if="isDebug()">-dbg</span><span v-if="isLegacy() && !hideLegacy()">-<a
 | 
			
		||||
        href="https://github.com/simon987/sist2/blob/master/docs/USAGE.md#elasticsearch" target="_blank">legacyES</a></span>
 | 
			
		||||
      v{{ sist2Version() }}<span v-if="isDebug()">-dbg</span>
 | 
			
		||||
    </span>
 | 
			
		||||
 | 
			
		||||
    <span v-if="$store && $store.state.sist2Info" class="tagline" v-html="tagline()"></span>
 | 
			
		||||
@ -21,7 +20,6 @@
 | 
			
		||||
 | 
			
		||||
<script>
 | 
			
		||||
import Sist2Icon from "@/components/Sist2Icon";
 | 
			
		||||
 | 
			
		||||
export default {
 | 
			
		||||
  name: "NavBar",
 | 
			
		||||
  components: {Sist2Icon},
 | 
			
		||||
@ -34,12 +32,6 @@ export default {
 | 
			
		||||
    },
 | 
			
		||||
    isDebug() {
 | 
			
		||||
      return this.$store.state.sist2Info.debug;
 | 
			
		||||
    },
 | 
			
		||||
    isLegacy() {
 | 
			
		||||
      return this.$store.state.sist2Info.esVersionLegacy;
 | 
			
		||||
    },
 | 
			
		||||
    hideLegacy() {
 | 
			
		||||
      return this.$store.state.optHideLegacy;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
@ -103,7 +95,7 @@ export default {
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
.theme-light .btn-link {
 | 
			
		||||
.theme-light .btn-link{
 | 
			
		||||
  color: #222;
 | 
			
		||||
}
 | 
			
		||||
</style>
 | 
			
		||||
@ -5,11 +5,9 @@
 | 
			
		||||
    <div style="float: right">
 | 
			
		||||
      <b-button v-b-toggle.collapse-1 variant="primary" class="not-mobile">{{ $t("details") }}</b-button>
 | 
			
		||||
 | 
			
		||||
      <template v-if="hitCount !== 0">
 | 
			
		||||
        <SortSelect class="ml-2"></SortSelect>
 | 
			
		||||
      <SortSelect class="ml-2"></SortSelect>
 | 
			
		||||
 | 
			
		||||
        <DisplayModeToggle class="ml-2"></DisplayModeToggle>
 | 
			
		||||
      </template>
 | 
			
		||||
      <DisplayModeToggle class="ml-2"></DisplayModeToggle>
 | 
			
		||||
    </div>
 | 
			
		||||
 | 
			
		||||
    <b-collapse id="collapse-1" class="pt-2" style="clear:both;">
 | 
			
		||||
@ -23,7 +21,7 @@
 | 
			
		||||
<script lang="ts">
 | 
			
		||||
import {EsResult} from "@/Sist2Api";
 | 
			
		||||
import Vue from "vue";
 | 
			
		||||
import {humanFileSize} from "@/util";
 | 
			
		||||
import {humanFileSize, humanTime} from "@/util";
 | 
			
		||||
import DisplayModeToggle from "@/components/DisplayModeToggle.vue";
 | 
			
		||||
import SortSelect from "@/components/SortSelect.vue";
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -19,14 +19,6 @@
 | 
			
		||||
      {{ $t("sort.sizeDesc") }}
 | 
			
		||||
    </b-dropdown-item>
 | 
			
		||||
 | 
			
		||||
    <b-dropdown-item :class="{'dropdown-active': sort === 'nameDesc'}" @click="onSelect('nameDesc')">
 | 
			
		||||
      {{ $t("sort.nameDesc") }}
 | 
			
		||||
    </b-dropdown-item>
 | 
			
		||||
 | 
			
		||||
    <b-dropdown-item :class="{'dropdown-active': sort === 'nameAsc'}" @click="onSelect('nameAsc')">
 | 
			
		||||
      {{ $t("sort.nameAsc") }}
 | 
			
		||||
    </b-dropdown-item>
 | 
			
		||||
 | 
			
		||||
    <b-dropdown-item :class="{'dropdown-active': sort === 'random'}" @click="onSelect('random')">
 | 
			
		||||
      {{ $t("sort.random") }}
 | 
			
		||||
    </b-dropdown-item>
 | 
			
		||||
 | 
			
		||||
@ -63,8 +63,7 @@ export default {
 | 
			
		||||
            slideDuration: "Slide duration",
 | 
			
		||||
            resultSize: "Number of results per page",
 | 
			
		||||
            tagOrOperator: "Use OR operator when specifying multiple tags.",
 | 
			
		||||
            hideDuplicates: "Hide duplicate results based on checksum",
 | 
			
		||||
            hideLegacy: "Hide the 'legacyES' Elasticsearch notice"
 | 
			
		||||
            hideDuplicates: "Hide duplicate results based on checksum"
 | 
			
		||||
        },
 | 
			
		||||
        queryMode: {
 | 
			
		||||
            simple: "Simple",
 | 
			
		||||
@ -131,14 +130,13 @@ export default {
 | 
			
		||||
        saveTagModalTitle: "Add tag",
 | 
			
		||||
        saveTagPlaceholder: "Tag name",
 | 
			
		||||
        confirm: "Confirm",
 | 
			
		||||
        indexPickerPlaceholder: "Select indices",
 | 
			
		||||
        sort: {
 | 
			
		||||
            relevance: "Relevance",
 | 
			
		||||
            dateAsc: "Date (Older first)",
 | 
			
		||||
            dateDesc: "Date (Newer first)",
 | 
			
		||||
            sizeAsc: "Size (Smaller first)",
 | 
			
		||||
            sizeDesc: "Size (Larger first)",
 | 
			
		||||
            nameAsc: "Name (A-z)",
 | 
			
		||||
            nameDesc: "Name (Z-a)",
 | 
			
		||||
            random: "Random",
 | 
			
		||||
        },
 | 
			
		||||
        d3: {
 | 
			
		||||
@ -146,13 +144,7 @@ export default {
 | 
			
		||||
            mimeSize: "Size distribution by media type",
 | 
			
		||||
            dateHistogram: "File modification time distribution",
 | 
			
		||||
            sizeHistogram: "File size distribution",
 | 
			
		||||
        },
 | 
			
		||||
        indexPicker: {
 | 
			
		||||
            selectNone: "Select None",
 | 
			
		||||
            selectAll: "Select All",
 | 
			
		||||
            selectedIndex: "selected index",
 | 
			
		||||
            selectedIndices: "selected indices",
 | 
			
		||||
        },
 | 
			
		||||
        }
 | 
			
		||||
    },
 | 
			
		||||
    fr: {
 | 
			
		||||
        searchBar: {
 | 
			
		||||
@ -219,8 +211,7 @@ export default {
 | 
			
		||||
            slideDuration: "Durée des diapositives",
 | 
			
		||||
            resultSize: "Nombre de résultats par page",
 | 
			
		||||
            tagOrOperator: "Utiliser l'opérateur OU lors de la spécification de plusieurs tags",
 | 
			
		||||
            hideDuplicates: "Masquer les résultats en double",
 | 
			
		||||
            hideLegacy: "Masquer la notice 'legacyES' Elasticsearch"
 | 
			
		||||
            hideDuplicates: "Masquer les résultats en double"
 | 
			
		||||
        },
 | 
			
		||||
        queryMode: {
 | 
			
		||||
            simple: "Simple",
 | 
			
		||||
@ -295,8 +286,6 @@ export default {
 | 
			
		||||
            dateDesc: "Date (Plus récent)",
 | 
			
		||||
            sizeAsc: "Taille (Plus petit)",
 | 
			
		||||
            sizeDesc: "Taille (Plus grand)",
 | 
			
		||||
            nameAsc: "Nom (A-z)",
 | 
			
		||||
            nameDesc: "Nom (Z-a)",
 | 
			
		||||
            random: "Aléatoire",
 | 
			
		||||
        },
 | 
			
		||||
        d3: {
 | 
			
		||||
@ -304,12 +293,6 @@ export default {
 | 
			
		||||
            mimeSize: "Distribution des tailles de fichiers par type de média",
 | 
			
		||||
            dateHistogram: "Distribution des dates de modification",
 | 
			
		||||
            sizeHistogram: "Distribution des tailles de fichier",
 | 
			
		||||
        },
 | 
			
		||||
        indexPicker: {
 | 
			
		||||
            selectNone: "Sélectionner aucun",
 | 
			
		||||
            selectAll: "Sélectionner tout",
 | 
			
		||||
            selectedIndex: "indice sélectionné",
 | 
			
		||||
            selectedIndices: "indices sélectionnés",
 | 
			
		||||
        },
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
@ -46,7 +46,6 @@ export default new Vuex.Store({
 | 
			
		||||
        optTreemapColor: "PuBuGn",
 | 
			
		||||
        optLightboxLoadOnlyCurrent: false,
 | 
			
		||||
        optLightboxSlideDuration: 15,
 | 
			
		||||
        optHideLegacy: false,
 | 
			
		||||
 | 
			
		||||
        _onLoadSelectedIndices: [] as string[],
 | 
			
		||||
        _onLoadSelectedMimeTypes: [] as string[],
 | 
			
		||||
@ -145,7 +144,6 @@ export default new Vuex.Store({
 | 
			
		||||
        setOptTreemapColorGroupingDepth: (state, val) => state.optTreemapColorGroupingDepth = val,
 | 
			
		||||
        setOptTreemapSize: (state, val) => state.optTreemapSize = val,
 | 
			
		||||
        setOptTreemapColor: (state, val) => state.optTreemapColor = val,
 | 
			
		||||
        setOptHideLegacy: (state, val) => state.optHideLegacy = val,
 | 
			
		||||
 | 
			
		||||
        setOptLightboxLoadOnlyCurrent: (state, val) => state.optLightboxLoadOnlyCurrent = val,
 | 
			
		||||
 | 
			
		||||
@ -341,6 +339,5 @@ export default new Vuex.Store({
 | 
			
		||||
        optLightboxLoadOnlyCurrent: state => state.optLightboxLoadOnlyCurrent,
 | 
			
		||||
        optLightboxSlideDuration: state => state.optLightboxSlideDuration,
 | 
			
		||||
        optResultSize: state => state.size,
 | 
			
		||||
        optHideLegacy: state => state.optHideLegacy,
 | 
			
		||||
    }
 | 
			
		||||
})
 | 
			
		||||
@ -19,10 +19,6 @@
 | 
			
		||||
            {{ $t("opt.lightboxLoadOnlyCurrent") }}
 | 
			
		||||
          </b-form-checkbox>
 | 
			
		||||
 | 
			
		||||
          <b-form-checkbox :checked="optHideLegacy" @input="setOptHideLegacy">
 | 
			
		||||
            {{ $t("opt.hideLegacy") }}
 | 
			
		||||
          </b-form-checkbox>
 | 
			
		||||
 | 
			
		||||
          <label>{{ $t("opt.lang") }}</label>
 | 
			
		||||
          <b-form-select :options="langOptions" :value="optLang" @input="setOptLang"></b-form-select>
 | 
			
		||||
 | 
			
		||||
@ -219,7 +215,6 @@ export default {
 | 
			
		||||
      "optTagOrOperator",
 | 
			
		||||
      "optLang",
 | 
			
		||||
      "optHideDuplicates",
 | 
			
		||||
      "optHideLegacy",
 | 
			
		||||
    ]),
 | 
			
		||||
    clientWidth() {
 | 
			
		||||
      return window.innerWidth;
 | 
			
		||||
@ -259,8 +254,7 @@ export default {
 | 
			
		||||
      "setOptResultSize",
 | 
			
		||||
      "setOptTagOrOperator",
 | 
			
		||||
      "setOptLang",
 | 
			
		||||
      "setOptHideDuplicates",
 | 
			
		||||
      "setOptHideLegacy"
 | 
			
		||||
      "setOptHideDuplicates"
 | 
			
		||||
    ]),
 | 
			
		||||
    onResetClick() {
 | 
			
		||||
      localStorage.removeItem("sist2_configuration");
 | 
			
		||||
 | 
			
		||||
@ -31,7 +31,7 @@
 | 
			
		||||
          </b-row>
 | 
			
		||||
        </b-col>
 | 
			
		||||
        <b-col>
 | 
			
		||||
          <b-tabs justified>
 | 
			
		||||
          <b-tabs>
 | 
			
		||||
            <b-tab :title="$t('mimeTypes')">
 | 
			
		||||
              <MimePicker></MimePicker>
 | 
			
		||||
            </b-tab>
 | 
			
		||||
@ -43,13 +43,9 @@
 | 
			
		||||
      </b-row>
 | 
			
		||||
    </b-card>
 | 
			
		||||
 | 
			
		||||
    <div v-show="docs.length === 0 && !uiLoading">
 | 
			
		||||
      <Preloader v-if="searchBusy" class="mt-3"></Preloader>
 | 
			
		||||
    <Preloader v-if="searchBusy && docs.length === 0" class="mt-3"></Preloader>
 | 
			
		||||
 | 
			
		||||
      <ResultsCard></ResultsCard>
 | 
			
		||||
    </div>
 | 
			
		||||
 | 
			
		||||
    <div v-if="docs.length > 0">
 | 
			
		||||
    <div v-else-if="docs.length > 0">
 | 
			
		||||
      <ResultsCard></ResultsCard>
 | 
			
		||||
 | 
			
		||||
      <DocCardWall v-if="optDisplay==='grid'" :docs="docs" :append="appendFunc"></DocCardWall>
 | 
			
		||||
@ -113,6 +109,10 @@ export default Vue.extend({
 | 
			
		||||
 | 
			
		||||
    }, 350, {leading: false});
 | 
			
		||||
 | 
			
		||||
    Sist2Api.getMimeTypes().then(mimeMap => {
 | 
			
		||||
      this.$store.commit("setUiMimeMap", mimeMap);
 | 
			
		||||
    });
 | 
			
		||||
 | 
			
		||||
    this.$store.dispatch("loadFromArgs", this.$route).then(() => {
 | 
			
		||||
      this.$store.subscribe(() => this.$store.dispatch("updateArgs", this.$router));
 | 
			
		||||
      this.$store.subscribe((mutation) => {
 | 
			
		||||
@ -138,13 +138,9 @@ export default Vue.extend({
 | 
			
		||||
      sist2.getSist2Info().then(data => {
 | 
			
		||||
        this.setSist2Info(data);
 | 
			
		||||
        this.setIndices(data.indices);
 | 
			
		||||
        this.uiLoading = false;
 | 
			
		||||
 | 
			
		||||
        Sist2Api.getMimeTypes().then(mimeMap => {
 | 
			
		||||
          this.$store.commit("setUiMimeMap", mimeMap);
 | 
			
		||||
          this.uiLoading = false;
 | 
			
		||||
          this.search(true);
 | 
			
		||||
        });
 | 
			
		||||
 | 
			
		||||
        this.search(true);
 | 
			
		||||
      }).catch(() => {
 | 
			
		||||
        this.showErrorToast();
 | 
			
		||||
      });
 | 
			
		||||
@ -213,7 +209,7 @@ export default Vue.extend({
 | 
			
		||||
        resp.hits.hits = resp.hits.hits.filter(hit => {
 | 
			
		||||
 | 
			
		||||
          if (!("checksum" in hit._source)) {
 | 
			
		||||
            return true;
 | 
			
		||||
              return true;
 | 
			
		||||
          }
 | 
			
		||||
 | 
			
		||||
          const isDupe = !this.docChecksums.has(hit._source.checksum);
 | 
			
		||||
 | 
			
		||||
@ -2,7 +2,6 @@
 | 
			
		||||
 | 
			
		||||
ScanCtx_t ScanCtx = {
 | 
			
		||||
        .stat_index_size = 0,
 | 
			
		||||
        .stat_tn_size = 0,
 | 
			
		||||
        .dbg_current_files = NULL,
 | 
			
		||||
        .pool = NULL
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
@ -17,7 +17,6 @@
 | 
			
		||||
#include "libscan/wpd/wpd.h"
 | 
			
		||||
#include "libscan/json/json.h"
 | 
			
		||||
#include "src/io/store.h"
 | 
			
		||||
#include "src/index/elastic.h"
 | 
			
		||||
 | 
			
		||||
#include <glib.h>
 | 
			
		||||
#include <pcre.h>
 | 
			
		||||
@ -76,7 +75,6 @@ typedef struct {
 | 
			
		||||
 | 
			
		||||
typedef struct {
 | 
			
		||||
    char *es_url;
 | 
			
		||||
    es_version_t *es_version;
 | 
			
		||||
    char *es_index;
 | 
			
		||||
    int batch_size;
 | 
			
		||||
    tpool_t *pool;
 | 
			
		||||
@ -88,7 +86,6 @@ typedef struct {
 | 
			
		||||
 | 
			
		||||
typedef struct {
 | 
			
		||||
    char *es_url;
 | 
			
		||||
    es_version_t *es_version;
 | 
			
		||||
    char *es_index;
 | 
			
		||||
    int index_count;
 | 
			
		||||
    char *auth_user;
 | 
			
		||||
 | 
			
		||||
@ -253,7 +253,7 @@ void _elastic_flush(int max) {
 | 
			
		||||
    } else {
 | 
			
		||||
 | 
			
		||||
        print_errors(r);
 | 
			
		||||
        LOG_DEBUGF("elastic.c", "Indexed %d documents (%zukB) <%d>", count, buf_len / 1024, r->status_code);
 | 
			
		||||
        LOG_INFOF("elastic.c", "Indexed %d documents (%zukB) <%d>", count, buf_len / 1024, r->status_code);
 | 
			
		||||
        delete_queue(max);
 | 
			
		||||
 | 
			
		||||
        if (Indexer->queued != 0) {
 | 
			
		||||
@ -356,65 +356,7 @@ void finish_indexer(char *script, int async_script, char *index_id) {
 | 
			
		||||
    free_response(r);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
es_version_t *elastic_get_version(const char *es_url) {
 | 
			
		||||
    response_t *r = web_get(es_url, 30);
 | 
			
		||||
 | 
			
		||||
    char *tmp = malloc(r->size + 1);
 | 
			
		||||
    memcpy(tmp, r->body, r->size);
 | 
			
		||||
    *(tmp + r->size) = '\0';
 | 
			
		||||
    cJSON *response = cJSON_Parse(tmp);
 | 
			
		||||
    free(tmp);
 | 
			
		||||
    free_response(r);
 | 
			
		||||
 | 
			
		||||
    if (response == NULL) {
 | 
			
		||||
        return NULL;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if (cJSON_GetObjectItem(response, "version") == NULL ||
 | 
			
		||||
        cJSON_GetObjectItem(cJSON_GetObjectItem(response, "version"), "number") == NULL) {
 | 
			
		||||
        cJSON_Delete(response);
 | 
			
		||||
        return NULL;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    char *version_str = cJSON_GetObjectItem(cJSON_GetObjectItem(response, "version"), "number")->valuestring;
 | 
			
		||||
 | 
			
		||||
    es_version_t *version = malloc(sizeof(es_version_t));
 | 
			
		||||
 | 
			
		||||
    const char *tok = strtok(version_str, ".");
 | 
			
		||||
    version->major = atoi(tok);
 | 
			
		||||
    tok = strtok(NULL, ".");
 | 
			
		||||
    version->minor = atoi(tok);
 | 
			
		||||
    tok = strtok(NULL, ".");
 | 
			
		||||
    version->patch = atoi(tok);
 | 
			
		||||
 | 
			
		||||
    cJSON_Delete(response);
 | 
			
		||||
 | 
			
		||||
    return version;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void elastic_init(int force_reset, const char *user_mappings, const char *user_settings) {
 | 
			
		||||
 | 
			
		||||
    es_version_t *es_version = elastic_get_version(IndexCtx.es_url);
 | 
			
		||||
    IndexCtx.es_version = es_version;
 | 
			
		||||
 | 
			
		||||
    if (es_version == NULL) {
 | 
			
		||||
        LOG_FATAL("elastic.c", "Could not get ES version")
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    LOG_INFOF("elastic.c",
 | 
			
		||||
              "Elasticsearch version is %s (supported=%d, legacy=%d)",
 | 
			
		||||
              format_es_version(es_version), IS_SUPPORTED_ES_VERSION(es_version), USE_LEGACY_ES_SETTINGS(es_version));
 | 
			
		||||
 | 
			
		||||
    if (!IS_SUPPORTED_ES_VERSION(es_version)) {
 | 
			
		||||
        LOG_FATAL("elastic.c", "sist2 only supports Elasticsearch v6.8 or newer")
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    char *settings = NULL;
 | 
			
		||||
    if (USE_LEGACY_ES_SETTINGS(es_version)) {
 | 
			
		||||
        settings = settings_json;
 | 
			
		||||
    } else {
 | 
			
		||||
        settings = settings_legacy_json;
 | 
			
		||||
    }
 | 
			
		||||
void elastic_init(int force_reset, const char* user_mappings, const char* user_settings) {
 | 
			
		||||
 | 
			
		||||
    // Check if index exists
 | 
			
		||||
    char url[4096];
 | 
			
		||||
@ -450,7 +392,7 @@ void elastic_init(int force_reset, const char *user_mappings, const char *user_s
 | 
			
		||||
        free_response(r);
 | 
			
		||||
 | 
			
		||||
        snprintf(url, sizeof(url), "%s/%s/_settings", IndexCtx.es_url, IndexCtx.es_index);
 | 
			
		||||
        r = web_put(url, user_settings ? user_settings : settings);
 | 
			
		||||
        r = web_put(url, user_settings ? user_settings : settings_json);
 | 
			
		||||
        LOG_INFOF("elastic.c", "Update ES settings <%d>", r->status_code);
 | 
			
		||||
        if (r->status_code != 200) {
 | 
			
		||||
            print_error(r);
 | 
			
		||||
 | 
			
		||||
@ -9,26 +9,6 @@ typedef struct es_bulk_line {
 | 
			
		||||
    char line[0];
 | 
			
		||||
} es_bulk_line_t;
 | 
			
		||||
 | 
			
		||||
typedef struct {
 | 
			
		||||
    int major;
 | 
			
		||||
    int minor;
 | 
			
		||||
    int patch;
 | 
			
		||||
} es_version_t;
 | 
			
		||||
 | 
			
		||||
#define VERSION_GE(version, maj, min) ((version)->major > (maj) || ((version)->major == (maj) && (version)->minor >= (min)))
 | 
			
		||||
#define IS_SUPPORTED_ES_VERSION(es_version) VERSION_GE((es_version), 6, 8)
 | 
			
		||||
#define USE_LEGACY_ES_SETTINGS(es_version) (!VERSION_GE((es_version), 7, 14))
 | 
			
		||||
 | 
			
		||||
__always_inline
 | 
			
		||||
static const char *format_es_version(es_version_t *version) {
 | 
			
		||||
    static char buf[64];
 | 
			
		||||
 | 
			
		||||
    snprintf(buf, sizeof(buf), "%d.%d.%d", version->major, version->minor, version->patch);
 | 
			
		||||
 | 
			
		||||
    return buf;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * Note: indexer is *not* thread safe
 | 
			
		||||
 */
 | 
			
		||||
@ -51,8 +31,6 @@ cJSON *elastic_get_document(const char *id_str);
 | 
			
		||||
 | 
			
		||||
char *elastic_get_status();
 | 
			
		||||
 | 
			
		||||
es_version_t *elastic_get_version(const char *es_url);
 | 
			
		||||
 | 
			
		||||
void execute_update_script(const char *script, int async, const char index_id[MD5_STR_LENGTH]);
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										3
									
								
								src/index/static_generated.c
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										3
									
								
								src/index/static_generated.c
									
									
									
									
										vendored
									
									
								
							
										
											
												File diff suppressed because one or more lines are too long
											
										
									
								
							@ -23,6 +23,7 @@ store_t *store_create(const char *path, size_t chunk_size) {
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    store->size = (size_t) store->chunk_size;
 | 
			
		||||
    ScanCtx.stat_tn_size = 0;
 | 
			
		||||
    mdb_env_set_mapsize(store->env, store->size);
 | 
			
		||||
 | 
			
		||||
    // Open dbi
 | 
			
		||||
 | 
			
		||||
@ -43,36 +43,26 @@ int sub_strings[30];
 | 
			
		||||
 | 
			
		||||
int handle_entry(const char *filepath, const struct stat *info, int typeflag, struct FTW *ftw) {
 | 
			
		||||
 | 
			
		||||
    if (ftw->level > ScanCtx.depth) {
 | 
			
		||||
        if (typeflag == FTW_D) {
 | 
			
		||||
            return FTW_SKIP_SUBTREE;
 | 
			
		||||
        }
 | 
			
		||||
        return FTW_CONTINUE;
 | 
			
		||||
    }
 | 
			
		||||
    if (typeflag == FTW_F && S_ISREG(info->st_mode) && ftw->level <= ScanCtx.depth) {
 | 
			
		||||
 | 
			
		||||
    if (ScanCtx.exclude != NULL && EXCLUDED(filepath)) {
 | 
			
		||||
        LOG_DEBUGF("walk.c", "Excluded: %s", filepath)
 | 
			
		||||
        if (ScanCtx.exclude != NULL && EXCLUDED(filepath)) {
 | 
			
		||||
            LOG_DEBUGF("walk.c", "Excluded: %s", filepath)
 | 
			
		||||
 | 
			
		||||
        if (typeflag == FTW_F && S_ISREG(info->st_mode)) {
 | 
			
		||||
            pthread_mutex_lock(&ScanCtx.dbg_file_counts_mu);
 | 
			
		||||
            ScanCtx.dbg_excluded_files_count += 1;
 | 
			
		||||
            pthread_mutex_unlock(&ScanCtx.dbg_file_counts_mu);
 | 
			
		||||
            return 0;
 | 
			
		||||
        } else if (typeflag == FTW_D) {
 | 
			
		||||
            return FTW_SKIP_SUBTREE;
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if (typeflag == FTW_F && S_ISREG(info->st_mode)) {
 | 
			
		||||
        parse_job_t *job = create_fs_parse_job(filepath, info, ftw->base);
 | 
			
		||||
        tpool_add_work(ScanCtx.pool, parse, job);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return FTW_CONTINUE;
 | 
			
		||||
    return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#define MAX_FILE_DESCRIPTORS 64
 | 
			
		||||
 | 
			
		||||
int walk_directory_tree(const char *dirpath) {
 | 
			
		||||
    return nftw(dirpath, handle_entry, MAX_FILE_DESCRIPTORS, FTW_PHYS | FTW_ACTIONRETVAL);
 | 
			
		||||
    return nftw(dirpath, handle_entry, MAX_FILE_DESCRIPTORS, FTW_PHYS | FTW_DEPTH);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										20
									
								
								src/log.c
									
									
									
									
									
								
							
							
						
						
									
										20
									
								
								src/log.c
									
									
									
									
									
								
							@ -55,14 +55,10 @@ void vsist_logf(const char *filepath, int level, char *format, va_list ap) {
 | 
			
		||||
        log_len += 1;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if (PrintingProgressBar) {
 | 
			
		||||
        PrintingProgressBar = FALSE;
 | 
			
		||||
        memmove(log_str + 1, log_str, log_len);
 | 
			
		||||
        log_str[0] = '\n';
 | 
			
		||||
        log_len += 1;
 | 
			
		||||
    int ret = write(STDERR_FILENO, log_str, log_len);
 | 
			
		||||
    if (ret == -1) {
 | 
			
		||||
        LOG_FATALF("serialize.c", "Could not write index descriptor: %s", strerror(errno))
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    write(STDERR_FILENO, log_str, log_len);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void sist_logf(const char *filepath, int level, char *format, ...) {
 | 
			
		||||
@ -108,12 +104,8 @@ void sist_log(const char *filepath, int level, char *str) {
 | 
			
		||||
        );
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if (PrintingProgressBar) {
 | 
			
		||||
        PrintingProgressBar = FALSE;
 | 
			
		||||
        memmove(log_str + 1, log_str, log_len);
 | 
			
		||||
        log_str[0] = '\n';
 | 
			
		||||
        log_len += 1;
 | 
			
		||||
    int ret = write(STDERR_FILENO, log_str, log_len);
 | 
			
		||||
    if (ret == -1) {
 | 
			
		||||
        LOG_FATALF("serialize.c", "Could not write index descriptor: %s", strerror(errno));
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    write(STDERR_FILENO, log_str, log_len);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@ -433,7 +433,7 @@ void sist2_index(index_args_t *args) {
 | 
			
		||||
        cleanup = elastic_cleanup;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    IndexCtx.pool = tpool_create(args->threads, cleanup, FALSE, args->print == 0);
 | 
			
		||||
    IndexCtx.pool = tpool_create(args->threads, cleanup, FALSE, FALSE);
 | 
			
		||||
    tpool_start(IndexCtx.pool);
 | 
			
		||||
 | 
			
		||||
    struct dirent *de;
 | 
			
		||||
@ -518,8 +518,8 @@ void sist2_web(web_args_t *args) {
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
int main(int argc, const char *argv[]) {
 | 
			
		||||
    sigsegv_handler = signal(SIGSEGV, sig_handler);
 | 
			
		||||
    sigabrt_handler = signal(SIGABRT, sig_handler);
 | 
			
		||||
//    sigsegv_handler = signal(SIGSEGV, sig_handler);
 | 
			
		||||
//    sigabrt_handler = signal(SIGABRT, sig_handler);
 | 
			
		||||
 | 
			
		||||
    setlocale(LC_ALL, "");
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -1,8 +1,6 @@
 | 
			
		||||
#ifndef SIST_H
 | 
			
		||||
#define SIST_H
 | 
			
		||||
 | 
			
		||||
#define _GNU_SOURCE
 | 
			
		||||
 | 
			
		||||
#ifndef	FALSE
 | 
			
		||||
#define	FALSE	(0)
 | 
			
		||||
#define BOOL int
 | 
			
		||||
@ -53,7 +51,7 @@
 | 
			
		||||
#include <ctype.h>
 | 
			
		||||
#include "git_hash.h"
 | 
			
		||||
 | 
			
		||||
#define VERSION "2.11.5"
 | 
			
		||||
#define VERSION "2.11.3"
 | 
			
		||||
static const char *const Version = VERSION;
 | 
			
		||||
 | 
			
		||||
#ifndef SIST_PLATFORM
 | 
			
		||||
 | 
			
		||||
@ -177,7 +177,7 @@ static void *tpool_worker(void *arg) {
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void tpool_wait(tpool_t *pool) {
 | 
			
		||||
    LOG_DEBUG("tpool.c", "Waiting for worker threads to finish")
 | 
			
		||||
    LOG_INFO("tpool.c", "Waiting for worker threads to finish")
 | 
			
		||||
    pthread_mutex_lock(&(pool->work_mutex));
 | 
			
		||||
    while (TRUE) {
 | 
			
		||||
        if (pool->done_cnt < pool->work_cnt) {
 | 
			
		||||
@ -191,9 +191,7 @@ void tpool_wait(tpool_t *pool) {
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
    if (pool->print_progress) {
 | 
			
		||||
        progress_bar_print(1.0, ScanCtx.stat_tn_size, ScanCtx.stat_index_size);
 | 
			
		||||
    }
 | 
			
		||||
    progress_bar_print(1.0, ScanCtx.stat_tn_size, ScanCtx.stat_index_size);
 | 
			
		||||
    pthread_mutex_unlock(&(pool->work_mutex));
 | 
			
		||||
 | 
			
		||||
    LOG_INFO("tpool.c", "Worker threads finished")
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										26
									
								
								src/util.c
									
									
									
									
									
								
							
							
						
						
									
										26
									
								
								src/util.c
									
									
									
									
									
								
							@ -84,13 +84,11 @@ char *expandpath(const char *path) {
 | 
			
		||||
    return expanded;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int PrintingProgressBar = 0;
 | 
			
		||||
 | 
			
		||||
void progress_bar_print(double percentage, size_t tn_size, size_t index_size) {
 | 
			
		||||
 | 
			
		||||
    static int last_val = -1;
 | 
			
		||||
    int val = (int) (percentage * 100);
 | 
			
		||||
    if (last_val == val || val > 100) {
 | 
			
		||||
    if (last_val == val || val > 100 || index_size < 1024) {
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
    last_val = val;
 | 
			
		||||
@ -116,21 +114,13 @@ void progress_bar_print(double percentage, size_t tn_size, size_t index_size) {
 | 
			
		||||
        index_unit = 'M';
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if (tn_size == 0 && index_size == 0) {
 | 
			
		||||
        fprintf(stderr,
 | 
			
		||||
                "\r%3d%%[%.*s>%*s]",
 | 
			
		||||
                val, lpad, PBSTR, rpad, ""
 | 
			
		||||
        );
 | 
			
		||||
    } else {
 | 
			
		||||
        fprintf(stderr,
 | 
			
		||||
                "\r%3d%%[%.*s>%*s] TN:%3d%c IDX:%3d%c",
 | 
			
		||||
                val, lpad, PBSTR, rpad, "",
 | 
			
		||||
                (int) tn_size, tn_unit,
 | 
			
		||||
                (int) index_size, index_unit
 | 
			
		||||
        );
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    PrintingProgressBar = TRUE;
 | 
			
		||||
    printf(
 | 
			
		||||
            "\r%3d%%[%.*s>%*s] TN:%3d%c IDX:%3d%c",
 | 
			
		||||
            val, lpad, PBSTR, rpad, "",
 | 
			
		||||
            (int) tn_size, tn_unit,
 | 
			
		||||
            (int) index_size, index_unit
 | 
			
		||||
    );
 | 
			
		||||
    fflush(stdout);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
GHashTable *incremental_get_table() {
 | 
			
		||||
 | 
			
		||||
@ -19,8 +19,6 @@ char *expandpath(const char *path);
 | 
			
		||||
 | 
			
		||||
dyn_buffer_t url_escape(char *str);
 | 
			
		||||
 | 
			
		||||
extern int PrintingProgressBar;
 | 
			
		||||
 | 
			
		||||
void progress_bar_print(double percentage, size_t tn_size, size_t index_size);
 | 
			
		||||
 | 
			
		||||
GHashTable *incremental_get_table();
 | 
			
		||||
 | 
			
		||||
@ -252,32 +252,12 @@ void serve_file_from_disk(cJSON *json, index_t *idx, struct mg_connection *nc, s
 | 
			
		||||
    mg_http_serve_file(nc, hm, full_path, mime, disposition);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void cache_es_version() {
 | 
			
		||||
    static int is_cached = FALSE;
 | 
			
		||||
 | 
			
		||||
    if (is_cached == TRUE) {
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    es_version_t *es_version = elastic_get_version(WebCtx.es_url);
 | 
			
		||||
    if (es_version != NULL) {
 | 
			
		||||
        WebCtx.es_version = es_version;
 | 
			
		||||
        is_cached = TRUE;
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void index_info(struct mg_connection *nc) {
 | 
			
		||||
 | 
			
		||||
    cache_es_version();
 | 
			
		||||
 | 
			
		||||
    cJSON *json = cJSON_CreateObject();
 | 
			
		||||
    cJSON *arr = cJSON_AddArrayToObject(json, "indices");
 | 
			
		||||
 | 
			
		||||
    cJSON_AddStringToObject(json, "esIndex", WebCtx.es_index);
 | 
			
		||||
    cJSON_AddStringToObject(json, "version", Version);
 | 
			
		||||
    cJSON_AddStringToObject(json, "esVersion", format_es_version(WebCtx.es_version));
 | 
			
		||||
    cJSON_AddBoolToObject(json, "esVersionSupported", IS_SUPPORTED_ES_VERSION(WebCtx.es_version));
 | 
			
		||||
    cJSON_AddBoolToObject(json, "esVersionLegacy", USE_LEGACY_ES_SETTINGS(WebCtx.es_version));
 | 
			
		||||
    cJSON_AddStringToObject(json, "platform", QUOTE(SIST_PLATFORM));
 | 
			
		||||
    cJSON_AddStringToObject(json, "sist2Hash", Sist2CommitHash);
 | 
			
		||||
    cJSON_AddStringToObject(json, "libscanHash", LibScanCommitHash);
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										4
									
								
								src/web/static_generated.c
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										4
									
								
								src/web/static_generated.c
									
									
									
									
										vendored
									
									
								
							
										
											
												File diff suppressed because one or more lines are too long
											
										
									
								
							
							
								
								
									
										2
									
								
								third-party/argparse
									
									
									
									
										vendored
									
									
								
							
							
								
								
								
								
								
								
									
									
								
							
						
						
									
										2
									
								
								third-party/argparse
									
									
									
									
										vendored
									
									
								
							@ -1 +1 @@
 | 
			
		||||
Subproject commit 225141eb3df2fc1711962e3779646423407cb3f5
 | 
			
		||||
Subproject commit ffd9c23427d0cb105e27f27f0cf97b463b6a8bf8
 | 
			
		||||
							
								
								
									
										1
									
								
								third-party/libscan
									
									
									
									
										vendored
									
									
										Submodule
									
								
							
							
								
								
								
								
								
								
									
									
								
							
						
						
									
										1
									
								
								third-party/libscan
									
									
									
									
										vendored
									
									
										Submodule
									
								
							@ -0,0 +1 @@
 | 
			
		||||
Subproject commit 3787475ecba7453a2a97ab470103606c2cecabb2
 | 
			
		||||
							
								
								
									
										12
									
								
								third-party/libscan/.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										12
									
								
								third-party/libscan/.gitignore
									
									
									
									
										vendored
									
									
								
							@ -1,12 +0,0 @@
 | 
			
		||||
.idea/
 | 
			
		||||
cmake_install.cmake
 | 
			
		||||
Makefile
 | 
			
		||||
libscan.a
 | 
			
		||||
libscan.so
 | 
			
		||||
*.cbp
 | 
			
		||||
CMakeFiles
 | 
			
		||||
CMakeCache.txt
 | 
			
		||||
scan_test
 | 
			
		||||
third-party/ext_*
 | 
			
		||||
libscan-test-files
 | 
			
		||||
scan_*_test
 | 
			
		||||
							
								
								
									
										233
									
								
								third-party/libscan/CMakeLists.txt
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										233
									
								
								third-party/libscan/CMakeLists.txt
									
									
									
									
										vendored
									
									
								
							@ -1,233 +0,0 @@
 | 
			
		||||
cmake_minimum_required(VERSION 3.15)
 | 
			
		||||
 | 
			
		||||
project(scan)
 | 
			
		||||
set(CMAKE_C_STANDARD 11)
 | 
			
		||||
 | 
			
		||||
option(BUILD_TESTS "Build tests" on)
 | 
			
		||||
 | 
			
		||||
add_subdirectory(third-party/antiword)
 | 
			
		||||
add_compile_definitions(
 | 
			
		||||
        antiword
 | 
			
		||||
        NDEBUG
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
add_library(
 | 
			
		||||
        scan
 | 
			
		||||
        libscan/util.c libscan/util.h
 | 
			
		||||
        libscan/scan.h
 | 
			
		||||
        libscan/macros.h
 | 
			
		||||
 | 
			
		||||
        libscan/text/text.c libscan/text/text.h
 | 
			
		||||
        libscan/arc/arc.c libscan/arc/arc.h
 | 
			
		||||
        libscan/ebook/ebook.c libscan/ebook/ebook.h
 | 
			
		||||
        libscan/comic/comic.c libscan/comic/comic.h
 | 
			
		||||
        libscan/ooxml/ooxml.c libscan/ooxml/ooxml.h
 | 
			
		||||
        libscan/media/media.c libscan/media/media.h
 | 
			
		||||
        libscan/font/font.c libscan/font/font.h
 | 
			
		||||
        libscan/msdoc/msdoc.c libscan/msdoc/msdoc.h
 | 
			
		||||
        libscan/json/json.c libscan/json/json.h
 | 
			
		||||
        libscan/wpd/wpd.c libscan/wpd/wpd.h libscan/wpd/libwpd_c_api.h libscan/wpd/libwpd_c_api.cpp
 | 
			
		||||
 | 
			
		||||
        third-party/utf8.h
 | 
			
		||||
        libscan/mobi/scan_mobi.c libscan/mobi/scan_mobi.h libscan/raw/raw.c libscan/raw/raw.h)
 | 
			
		||||
set_target_properties(scan PROPERTIES LINKER_LANGUAGE C)
 | 
			
		||||
 | 
			
		||||
set(CMAKE_FIND_LIBRARY_SUFFIXES .a .lib .so)
 | 
			
		||||
 | 
			
		||||
find_package(cJSON CONFIG REQUIRED)
 | 
			
		||||
find_package(LibArchive REQUIRED)
 | 
			
		||||
find_package(BZip2 REQUIRED)
 | 
			
		||||
find_package(lz4 REQUIRED)
 | 
			
		||||
 | 
			
		||||
find_package(Threads REQUIRED)
 | 
			
		||||
find_package(Tesseract CONFIG REQUIRED)
 | 
			
		||||
find_package(OpenJPEG CONFIG REQUIRED)
 | 
			
		||||
find_package(JPEG REQUIRED)
 | 
			
		||||
find_package(LibXml2 REQUIRED)
 | 
			
		||||
find_package(LibLZMA REQUIRED)
 | 
			
		||||
find_package(ZLIB REQUIRED)
 | 
			
		||||
find_package(unofficial-pcre CONFIG REQUIRED)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
find_library(JBIG2DEC_LIB NAMES jbig2decd jbig2dec)
 | 
			
		||||
find_library(HARFBUZZ_LIB NAMES harfbuzz harfbuzzd)
 | 
			
		||||
find_library(FREETYPE_LIB NAMES freetype freetyped)
 | 
			
		||||
find_package(unofficial-brotli CONFIG REQUIRED)
 | 
			
		||||
find_library(LZO2_LIB NAMES lzo2)
 | 
			
		||||
 | 
			
		||||
find_library(RAW_LIB NAMES libraw.a)
 | 
			
		||||
find_library(MUPDF_LIB NAMES liblibmupdf.a)
 | 
			
		||||
find_library(CMS_LIB NAMES lcms2)
 | 
			
		||||
find_library(JAS_LIB NAMES jasper)
 | 
			
		||||
find_library(GUMBO_LIB NAMES gumbo)
 | 
			
		||||
find_library(GOMP_LIB NAMES libgomp.a gomp PATHS /usr/lib/gcc/x86_64-linux-gnu/5/ /usr/lib/gcc/x86_64-linux-gnu/9/ /usr/lib/gcc/x86_64-linux-gnu/10/ /usr/lib/gcc/aarch64-linux-gnu/7/ /usr/lib/gcc/aarch64-linux-gnu/9/ /usr/lib/gcc/x86_64-linux-gnu/7/)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
target_compile_options(
 | 
			
		||||
        scan
 | 
			
		||||
        PRIVATE
 | 
			
		||||
        -g
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
include(ExternalProject)
 | 
			
		||||
find_program(MAKE_EXE NAMES gmake nmake make)
 | 
			
		||||
ExternalProject_Add(
 | 
			
		||||
        libmobi
 | 
			
		||||
        GIT_REPOSITORY https://github.com/simon987/libmobi.git
 | 
			
		||||
        GIT_TAG "public"
 | 
			
		||||
 | 
			
		||||
        UPDATE_COMMAND ""
 | 
			
		||||
        PATCH_COMMAND ""
 | 
			
		||||
        TEST_COMMAND ""
 | 
			
		||||
        CONFIGURE_COMMAND ./autogen.sh && ./configure
 | 
			
		||||
        INSTALL_COMMAND ""
 | 
			
		||||
 | 
			
		||||
        PREFIX "third-party/ext_libmobi"
 | 
			
		||||
        SOURCE_DIR "third-party/ext_libmobi/src/libmobi"
 | 
			
		||||
        BINARY_DIR "third-party/ext_libmobi/src/libmobi"
 | 
			
		||||
 | 
			
		||||
        BUILD_COMMAND ${MAKE_EXE} -j 8 --silent
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
SET(MOBI_LIB_DIR ${CMAKE_CURRENT_BINARY_DIR}/third-party/ext_libmobi/src/libmobi/src/.libs/)
 | 
			
		||||
SET(MOBI_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/third-party/ext_libmobi/src/libmobi/src/)
 | 
			
		||||
 | 
			
		||||
if (SIST_DEBUG)
 | 
			
		||||
    SET(FFMPEG_DEBUG "--enable-debug=3" "--disable-optimizations")
 | 
			
		||||
else()
 | 
			
		||||
    SET(FFMPEG_DEBUG "")
 | 
			
		||||
endif()
 | 
			
		||||
 | 
			
		||||
ExternalProject_Add(
 | 
			
		||||
        ffmpeg
 | 
			
		||||
        GIT_REPOSITORY https://git.ffmpeg.org/ffmpeg.git
 | 
			
		||||
        GIT_TAG "n4.4"
 | 
			
		||||
 | 
			
		||||
        UPDATE_COMMAND ""
 | 
			
		||||
        PATCH_COMMAND ""
 | 
			
		||||
        TEST_COMMAND ""
 | 
			
		||||
        CONFIGURE_COMMAND ./configure --disable-shared --enable-static --disable-ffmpeg --disable-ffplay
 | 
			
		||||
        --disable-ffprobe --disable-doc --disable-manpages --disable-postproc --disable-avfilter --disable-alsa
 | 
			
		||||
        --disable-lzma --disable-xlib --disable-vdpau --disable-vaapi --disable-sdl2
 | 
			
		||||
        --disable-network  ${FFMPEG_DEBUG}
 | 
			
		||||
        INSTALL_COMMAND ""
 | 
			
		||||
 | 
			
		||||
        PREFIX "third-party/ext_ffmpeg"
 | 
			
		||||
        SOURCE_DIR "third-party/ext_ffmpeg/src/ffmpeg"
 | 
			
		||||
        BINARY_DIR "third-party/ext_ffmpeg/src/ffmpeg"
 | 
			
		||||
 | 
			
		||||
        BUILD_COMMAND ${MAKE_EXE} -j33 --silent
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
SET(FFMPEG_LIB_DIR ${CMAKE_CURRENT_BINARY_DIR}/third-party/ext_ffmpeg/src/ffmpeg)
 | 
			
		||||
SET(FFMPEG_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/third-party/ext_ffmpeg/src/ffmpeg)
 | 
			
		||||
 | 
			
		||||
ExternalProject_Add(
 | 
			
		||||
        libwpd
 | 
			
		||||
        URL http://prdownloads.sourceforge.net/libwpd/libwpd-0.9.9.tar.gz
 | 
			
		||||
 | 
			
		||||
        UPDATE_COMMAND ""
 | 
			
		||||
        PATCH_COMMAND ""
 | 
			
		||||
        TEST_COMMAND ""
 | 
			
		||||
        CONFIGURE_COMMAND ./configure --without-docs --enable-static --disable-shared
 | 
			
		||||
        INSTALL_COMMAND ""
 | 
			
		||||
 | 
			
		||||
        PREFIX "third-party/ext_libwpd"
 | 
			
		||||
        SOURCE_DIR "third-party/ext_libwpd/src/libwpd"
 | 
			
		||||
        BINARY_DIR "third-party/ext_libwpd/src/libwpd"
 | 
			
		||||
 | 
			
		||||
        BUILD_COMMAND ${MAKE_EXE} -j33
 | 
			
		||||
)
 | 
			
		||||
SET(WPD_LIB_DIR ${CMAKE_CURRENT_BINARY_DIR}/third-party/ext_libwpd/src/libwpd/src/lib/.libs/)
 | 
			
		||||
SET(WPD_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/third-party/ext_libwpd/src/libwpd/inc/)
 | 
			
		||||
 | 
			
		||||
add_dependencies(
 | 
			
		||||
        scan
 | 
			
		||||
        libmobi
 | 
			
		||||
        ffmpeg
 | 
			
		||||
        antiword
 | 
			
		||||
        libwpd
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
target_link_libraries(
 | 
			
		||||
        scan
 | 
			
		||||
        PUBLIC
 | 
			
		||||
 | 
			
		||||
        cjson
 | 
			
		||||
        ${LibArchive_LIBRARIES}
 | 
			
		||||
        ZLIB::ZLIB
 | 
			
		||||
        BZip2::BZip2
 | 
			
		||||
        lz4::lz4
 | 
			
		||||
        ${LZO2_LIB}
 | 
			
		||||
        LibLZMA::LibLZMA
 | 
			
		||||
 | 
			
		||||
        ${MUPDF_LIB}
 | 
			
		||||
        openjp2
 | 
			
		||||
 | 
			
		||||
        ${MOBI_LIB_DIR}/libmobi.a
 | 
			
		||||
 | 
			
		||||
        ${WPD_LIB_DIR}/libwpd-0.9.a
 | 
			
		||||
        ${WPD_LIB_DIR}/libwpd-stream-0.9.a
 | 
			
		||||
 | 
			
		||||
        ${FREETYPE_LIB}
 | 
			
		||||
        ${HARFBUZZ_LIB}
 | 
			
		||||
        ${JBIG2DEC_LIB}
 | 
			
		||||
 | 
			
		||||
        stdc++
 | 
			
		||||
 | 
			
		||||
        -Wl,--whole-archive
 | 
			
		||||
        m
 | 
			
		||||
        -Wl,--no-whole-archive
 | 
			
		||||
 | 
			
		||||
        ${JPEG_LIBRARIES}
 | 
			
		||||
        ${Tesseract_LIBRARIES}
 | 
			
		||||
        ${LIBXML2_LIBRARIES}
 | 
			
		||||
        ${FREETYPE_LIB}
 | 
			
		||||
        unofficial::brotli::brotlidec-static
 | 
			
		||||
 | 
			
		||||
        ${FFMPEG_LIB_DIR}/libavformat/libavformat.a
 | 
			
		||||
        ${FFMPEG_LIB_DIR}/libavcodec/libavcodec.a
 | 
			
		||||
        ${FFMPEG_LIB_DIR}/libavutil/libavutil.a
 | 
			
		||||
        ${FFMPEG_LIB_DIR}/libswresample/libswresample.a
 | 
			
		||||
        ${FFMPEG_LIB_DIR}/libswscale/libswscale.a
 | 
			
		||||
 | 
			
		||||
        z
 | 
			
		||||
 | 
			
		||||
        ${CMAKE_THREAD_LIBS_INIT}
 | 
			
		||||
 | 
			
		||||
        ${RAW_LIB}
 | 
			
		||||
        ${GOMP_LIB}
 | 
			
		||||
        ${CMS_LIB}
 | 
			
		||||
        ${JAS_LIB}
 | 
			
		||||
        ${GUMBO_LIB}
 | 
			
		||||
        dl
 | 
			
		||||
        antiword
 | 
			
		||||
        unofficial::pcre::pcre unofficial::pcre::pcre16 unofficial::pcre::pcre32 unofficial::pcre::pcrecpp
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
target_include_directories(
 | 
			
		||||
        scan
 | 
			
		||||
        PUBLIC
 | 
			
		||||
        ${MUPDF_INC_DIR}
 | 
			
		||||
        ${JPEG_INCLUDE_DIR}
 | 
			
		||||
        ${LIBXML2_INCLUDE_DIR}
 | 
			
		||||
        ${FFMPEG_INCLUDE_DIR}
 | 
			
		||||
        ${MOBI_INCLUDE_DIR}
 | 
			
		||||
        ${WPD_INCLUDE_DIR}
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
if (BUILD_TESTS)
 | 
			
		||||
    find_package(GTest CONFIG REQUIRED)
 | 
			
		||||
 | 
			
		||||
    add_executable(scan_ub_test test/main.cpp test/test_util.cpp test/test_util.h)
 | 
			
		||||
    target_compile_options(scan_ub_test PRIVATE -g -fsanitize=undefined -fno-omit-frame-pointer)
 | 
			
		||||
    target_link_libraries(scan_ub_test PRIVATE GTest::gtest GTest::gtest_main -fsanitize=undefined scan)
 | 
			
		||||
 | 
			
		||||
    add_executable(scan_a_test test/main.cpp test/test_util.cpp test/test_util.h)
 | 
			
		||||
    target_compile_options(scan_a_test PRIVATE -g -fsanitize=address -fno-omit-frame-pointer)
 | 
			
		||||
    target_link_libraries(scan_a_test PRIVATE GTest::gtest GTest::gtest_main -fsanitize=address scan)
 | 
			
		||||
 | 
			
		||||
    add_executable(scan_test test/main.cpp test/test_util.cpp test/test_util.h)
 | 
			
		||||
    target_compile_options(scan_test PRIVATE -g -fno-omit-frame-pointer)
 | 
			
		||||
    target_link_libraries(scan_test PRIVATE GTest::gtest GTest::gtest_main scan)
 | 
			
		||||
endif()
 | 
			
		||||
							
								
								
									
										4
									
								
								third-party/libscan/README.md
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										4
									
								
								third-party/libscan/README.md
									
									
									
									
										vendored
									
									
								
							@ -1,4 +0,0 @@
 | 
			
		||||
### Run fuzz tests:
 | 
			
		||||
```bash
 | 
			
		||||
./scan_a_test --gtest_filter=*Fuzz* --gtest_repeat=100
 | 
			
		||||
```
 | 
			
		||||
							
								
								
									
										244
									
								
								third-party/libscan/libscan/arc/arc.c
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										244
									
								
								third-party/libscan/libscan/arc/arc.c
									
									
									
									
										vendored
									
									
								
							@ -1,244 +0,0 @@
 | 
			
		||||
#include "arc.h"
 | 
			
		||||
 | 
			
		||||
#include <stdio.h>
 | 
			
		||||
#include <stdlib.h>
 | 
			
		||||
#include <string.h>
 | 
			
		||||
#include <fcntl.h>
 | 
			
		||||
#include <openssl/evp.h>
 | 
			
		||||
#include <pcre.h>
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
int should_parse_filtered_file(const char *filepath, int ext) {
 | 
			
		||||
    char tmp[PATH_MAX * 2];
 | 
			
		||||
 | 
			
		||||
    if (ext == 0) {
 | 
			
		||||
        return FALSE;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if (strncmp(filepath + ext, "tgz", 3) == 0) {
 | 
			
		||||
        return TRUE;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    memcpy(tmp, filepath, ext - 1);
 | 
			
		||||
    *(tmp + ext - 1) = '\0';
 | 
			
		||||
 | 
			
		||||
    char *idx = strrchr(tmp, '.');
 | 
			
		||||
 | 
			
		||||
    if (idx == NULL) {
 | 
			
		||||
        return FALSE;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if (strcmp(idx, ".tar") == 0) {
 | 
			
		||||
        return TRUE;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return FALSE;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void arc_close(struct vfile *f) {
 | 
			
		||||
    SHA1_Final(f->sha1_digest, &f->sha1_ctx);
 | 
			
		||||
 | 
			
		||||
    if (f->rewind_buffer != NULL) {
 | 
			
		||||
        free(f->rewind_buffer);
 | 
			
		||||
        f->rewind_buffer = NULL;
 | 
			
		||||
        f->rewind_buffer_size = 0;
 | 
			
		||||
        f->rewind_buffer_cursor = 0;
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
int arc_read(struct vfile *f, void *buf, size_t size) {
 | 
			
		||||
 | 
			
		||||
    int bytes_copied = 0;
 | 
			
		||||
 | 
			
		||||
    if (f->rewind_buffer_size != 0) {
 | 
			
		||||
        if (size > f->rewind_buffer_size) {
 | 
			
		||||
            memcpy(buf, f->rewind_buffer + f->rewind_buffer_cursor, f->rewind_buffer_size);
 | 
			
		||||
 | 
			
		||||
            bytes_copied = f->rewind_buffer_size;
 | 
			
		||||
            size -= f->rewind_buffer_size;
 | 
			
		||||
            buf += f->rewind_buffer_size;
 | 
			
		||||
            f->rewind_buffer_size = 0;
 | 
			
		||||
        } else {
 | 
			
		||||
            memcpy(buf, f->rewind_buffer + f->rewind_buffer_cursor, size);
 | 
			
		||||
            f->rewind_buffer_size -= (int) size;
 | 
			
		||||
            f->rewind_buffer_cursor += (int) size;
 | 
			
		||||
 | 
			
		||||
            return (int) size;
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    size_t bytes_read = archive_read_data(f->arc, buf, size);
 | 
			
		||||
 | 
			
		||||
    if (bytes_read != 0 && bytes_read <= size && f->calculate_checksum) {
 | 
			
		||||
        f->has_checksum = TRUE;
 | 
			
		||||
 | 
			
		||||
        safe_sha1_update(&f->sha1_ctx, (unsigned char *) buf, bytes_read);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if (bytes_read != size && archive_errno(f->arc) != 0) {
 | 
			
		||||
        const char *error_str = archive_error_string(f->arc);
 | 
			
		||||
        if (error_str != NULL) {
 | 
			
		||||
            f->logf(f->filepath, LEVEL_ERROR, "Error reading archive file: %s", error_str);
 | 
			
		||||
        }
 | 
			
		||||
        return -1;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return (int) bytes_read + bytes_copied;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int arc_read_rewindable(struct vfile *f, void *buf, size_t size) {
 | 
			
		||||
 | 
			
		||||
    if (f->rewind_buffer != NULL) {
 | 
			
		||||
        fprintf(stderr, "Allocated rewind buffer more than once for %s", f->filepath);
 | 
			
		||||
        exit(-1);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    size_t bytes_read = archive_read_data(f->arc, buf, size);
 | 
			
		||||
 | 
			
		||||
    if (bytes_read != size && archive_errno(f->arc) != 0) {
 | 
			
		||||
        const char *error_str = archive_error_string(f->arc);
 | 
			
		||||
        if (error_str != NULL) {
 | 
			
		||||
            f->logf(f->filepath, LEVEL_ERROR, "Error reading archive file: %s", error_str);
 | 
			
		||||
        }
 | 
			
		||||
        return -1;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    f->rewind_buffer = malloc(size);
 | 
			
		||||
    f->rewind_buffer_size = (int) size;
 | 
			
		||||
    f->rewind_buffer_cursor = 0;
 | 
			
		||||
    memcpy(f->rewind_buffer, buf, size);
 | 
			
		||||
 | 
			
		||||
    return (int) bytes_read;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int arc_open(scan_arc_ctx_t *ctx, vfile_t *f, struct archive **a, arc_data_t *arc_data, int allow_recurse) {
 | 
			
		||||
    arc_data->f = f;
 | 
			
		||||
 | 
			
		||||
    if (f->is_fs_file) {
 | 
			
		||||
        *a = archive_read_new();
 | 
			
		||||
        archive_read_support_filter_all(*a);
 | 
			
		||||
        archive_read_support_format_all(*a);
 | 
			
		||||
        if (ctx->passphrase[0] != 0) {
 | 
			
		||||
            archive_read_add_passphrase(*a, ctx->passphrase);
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        return archive_read_open_filename(*a, f->filepath, ARC_BUF_SIZE);
 | 
			
		||||
    } else if (allow_recurse) {
 | 
			
		||||
        *a = archive_read_new();
 | 
			
		||||
        archive_read_support_filter_all(*a);
 | 
			
		||||
        archive_read_support_format_all(*a);
 | 
			
		||||
        if (ctx->passphrase[0] != 0) {
 | 
			
		||||
            archive_read_add_passphrase(*a, ctx->passphrase);
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        return archive_read_open(
 | 
			
		||||
                *a, arc_data,
 | 
			
		||||
                vfile_open_callback,
 | 
			
		||||
                vfile_read_callback,
 | 
			
		||||
                vfile_close_callback
 | 
			
		||||
        );
 | 
			
		||||
    } else {
 | 
			
		||||
        return ARC_SKIPPED;
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static __thread int sub_strings[30];
 | 
			
		||||
#define EXCLUDED(str) (pcre_exec(exclude, exclude_extra, str, strlen(str), 0, 0, sub_strings, sizeof(sub_strings)) >= 0)
 | 
			
		||||
 | 
			
		||||
scan_code_t parse_archive(scan_arc_ctx_t *ctx, vfile_t *f, document_t *doc, pcre *exclude, pcre_extra *exclude_extra) {
 | 
			
		||||
 | 
			
		||||
    struct archive *a = NULL;
 | 
			
		||||
    struct archive_entry *entry = NULL;
 | 
			
		||||
 | 
			
		||||
    arc_data_t arc_data;
 | 
			
		||||
    arc_data.f = f;
 | 
			
		||||
 | 
			
		||||
    int ret = arc_open(ctx, f, &a, &arc_data, ctx->mode == ARC_MODE_RECURSE);
 | 
			
		||||
    if (ret == ARC_SKIPPED) {
 | 
			
		||||
        return SCAN_OK;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if (ret != ARCHIVE_OK) {
 | 
			
		||||
        CTX_LOG_ERRORF(f->filepath, "(arc.c) [%d] %s", ret, archive_error_string(a))
 | 
			
		||||
        archive_read_free(a);
 | 
			
		||||
        return SCAN_ERR_READ;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if (ctx->mode == ARC_MODE_LIST) {
 | 
			
		||||
        dyn_buffer_t buf = dyn_buffer_create();
 | 
			
		||||
 | 
			
		||||
        while (archive_read_next_header(a, &entry) == ARCHIVE_OK) {
 | 
			
		||||
            if (S_ISREG(archive_entry_stat(entry)->st_mode)) {
 | 
			
		||||
                const char *utf8_name = archive_entry_pathname_utf8(entry);
 | 
			
		||||
                const char *file_path = utf8_name == NULL ? archive_entry_pathname(entry) : utf8_name;
 | 
			
		||||
 | 
			
		||||
                dyn_buffer_append_string(&buf, file_path);
 | 
			
		||||
                dyn_buffer_write_char(&buf, ' ');
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
        dyn_buffer_write_char(&buf, '\0');
 | 
			
		||||
 | 
			
		||||
        meta_line_t *meta_list = malloc(sizeof(meta_line_t) + buf.cur);
 | 
			
		||||
        meta_list->key = MetaContent;
 | 
			
		||||
        strcpy(meta_list->str_val, buf.buf);
 | 
			
		||||
        APPEND_META(doc, meta_list)
 | 
			
		||||
        dyn_buffer_destroy(&buf);
 | 
			
		||||
 | 
			
		||||
    } else {
 | 
			
		||||
 | 
			
		||||
        parse_job_t *sub_job = malloc(sizeof(parse_job_t) + PATH_MAX * 2);
 | 
			
		||||
 | 
			
		||||
        sub_job->vfile.close = arc_close;
 | 
			
		||||
        sub_job->vfile.read = arc_read;
 | 
			
		||||
        sub_job->vfile.read_rewindable = arc_read_rewindable;
 | 
			
		||||
        sub_job->vfile.reset = NULL;
 | 
			
		||||
        sub_job->vfile.arc = a;
 | 
			
		||||
        sub_job->vfile.filepath = sub_job->filepath;
 | 
			
		||||
        sub_job->vfile.is_fs_file = FALSE;
 | 
			
		||||
        sub_job->vfile.rewind_buffer_size = 0;
 | 
			
		||||
        sub_job->vfile.rewind_buffer = NULL;
 | 
			
		||||
        sub_job->vfile.log = ctx->log;
 | 
			
		||||
        sub_job->vfile.logf = ctx->logf;
 | 
			
		||||
        sub_job->vfile.has_checksum = FALSE;
 | 
			
		||||
        sub_job->vfile.calculate_checksum = f->calculate_checksum;
 | 
			
		||||
        memcpy(sub_job->parent, doc->path_md5, MD5_DIGEST_LENGTH);
 | 
			
		||||
 | 
			
		||||
        while (archive_read_next_header(a, &entry) == ARCHIVE_OK) {
 | 
			
		||||
            sub_job->vfile.info = *archive_entry_stat(entry);
 | 
			
		||||
            if (S_ISREG(sub_job->vfile.info.st_mode)) {
 | 
			
		||||
 | 
			
		||||
                const char *utf8_name = archive_entry_pathname_utf8(entry);
 | 
			
		||||
 | 
			
		||||
                if (utf8_name == NULL) {
 | 
			
		||||
                    sprintf(sub_job->filepath, "%s#/%s", f->filepath, archive_entry_pathname(entry));
 | 
			
		||||
                } else {
 | 
			
		||||
                    sprintf(sub_job->filepath, "%s#/%s", f->filepath, utf8_name);
 | 
			
		||||
                }
 | 
			
		||||
                sub_job->base = (int) (strrchr(sub_job->filepath, '/') - sub_job->filepath) + 1;
 | 
			
		||||
 | 
			
		||||
                // Handle excludes
 | 
			
		||||
                if (exclude != NULL && EXCLUDED(sub_job->filepath)) {
 | 
			
		||||
                    CTX_LOG_DEBUGF("arc.c", "Excluded: %s", sub_job->filepath)
 | 
			
		||||
                    continue;
 | 
			
		||||
                }
 | 
			
		||||
 | 
			
		||||
                char *p = strrchr(sub_job->filepath, '.');
 | 
			
		||||
                if (p != NULL && (p - sub_job->filepath) > strlen(f->filepath)) {
 | 
			
		||||
                    sub_job->ext = (int) (p - sub_job->filepath + 1);
 | 
			
		||||
                } else {
 | 
			
		||||
                    sub_job->ext = (int) strlen(sub_job->filepath);
 | 
			
		||||
                }
 | 
			
		||||
 | 
			
		||||
                SHA1_Init(&sub_job->vfile.sha1_ctx);
 | 
			
		||||
 | 
			
		||||
                ctx->parse(sub_job);
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        free(sub_job);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    archive_read_free(a);
 | 
			
		||||
    return SCAN_OK;
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										80
									
								
								third-party/libscan/libscan/arc/arc.h
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										80
									
								
								third-party/libscan/libscan/arc/arc.h
									
									
									
									
										vendored
									
									
								
							@ -1,80 +0,0 @@
 | 
			
		||||
#ifndef SCAN_ARC_H
 | 
			
		||||
#define SCAN_ARC_H
 | 
			
		||||
 | 
			
		||||
#include <archive.h>
 | 
			
		||||
#include <archive_entry.h>
 | 
			
		||||
#include <fcntl.h>
 | 
			
		||||
#include <pcre.h>
 | 
			
		||||
#include "../scan.h"
 | 
			
		||||
 | 
			
		||||
# define ARC_SKIPPED (-1)
 | 
			
		||||
#define ARC_MODE_SKIP 0
 | 
			
		||||
#define ARC_MODE_LIST 1
 | 
			
		||||
#define ARC_MODE_SHALLOW 2
 | 
			
		||||
#define ARC_MODE_RECURSE 3
 | 
			
		||||
typedef int archive_mode_t;
 | 
			
		||||
 | 
			
		||||
typedef struct {
 | 
			
		||||
    archive_mode_t mode;
 | 
			
		||||
 | 
			
		||||
    parse_callback_t parse;
 | 
			
		||||
    log_callback_t log;
 | 
			
		||||
    logf_callback_t logf;
 | 
			
		||||
    store_callback_t store;
 | 
			
		||||
    char passphrase[4096];
 | 
			
		||||
} scan_arc_ctx_t;
 | 
			
		||||
 | 
			
		||||
#define ARC_BUF_SIZE 8192
 | 
			
		||||
 | 
			
		||||
typedef struct {
 | 
			
		||||
    vfile_t *f;
 | 
			
		||||
    char buf[ARC_BUF_SIZE];
 | 
			
		||||
} arc_data_t;
 | 
			
		||||
 | 
			
		||||
static int vfile_open_callback(struct archive *a, void *user_data) {
 | 
			
		||||
    arc_data_t *data = (arc_data_t *) user_data;
 | 
			
		||||
 | 
			
		||||
    if (!data->f->is_fs_file) {
 | 
			
		||||
        SHA1_Init(&data->f->sha1_ctx);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return ARCHIVE_OK;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static long vfile_read_callback(struct archive *a, void *user_data, const void **buf) {
 | 
			
		||||
    arc_data_t *data = (arc_data_t *) user_data;
 | 
			
		||||
 | 
			
		||||
    *buf = data->buf;
 | 
			
		||||
    long ret = data->f->read(data->f, data->buf, sizeof(data->buf));
 | 
			
		||||
 | 
			
		||||
    if (!data->f->is_fs_file && ret > 0) {
 | 
			
		||||
        data->f->has_checksum = TRUE;
 | 
			
		||||
        safe_sha1_update(&data->f->sha1_ctx, (unsigned char*)data->buf, ret);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return ret;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int vfile_close_callback(struct archive *a, void *user_data) {
 | 
			
		||||
    arc_data_t *data = (arc_data_t *) user_data;
 | 
			
		||||
 | 
			
		||||
    if (!data->f->is_fs_file) {
 | 
			
		||||
        SHA1_Final((unsigned char *) data->f->sha1_digest, &data->f->sha1_ctx);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return ARCHIVE_OK;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int arc_open(scan_arc_ctx_t *ctx, vfile_t *f, struct archive **a, arc_data_t *arc_data, int allow_recurse);
 | 
			
		||||
 | 
			
		||||
int should_parse_filtered_file(const char *filepath, int ext);
 | 
			
		||||
 | 
			
		||||
scan_code_t parse_archive(scan_arc_ctx_t *ctx, vfile_t *f, document_t *doc, pcre *exclude, pcre_extra *exclude_extra);
 | 
			
		||||
 | 
			
		||||
int arc_read(struct vfile *f, void *buf, size_t size);
 | 
			
		||||
 | 
			
		||||
int arc_read_rewindable(struct vfile *f, void *buf, size_t size);
 | 
			
		||||
 | 
			
		||||
void arc_close(struct vfile *f);
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
							
								
								
									
										58
									
								
								third-party/libscan/libscan/comic/comic.c
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										58
									
								
								third-party/libscan/libscan/comic/comic.c
									
									
									
									
										vendored
									
									
								
							@ -1,58 +0,0 @@
 | 
			
		||||
#include "comic.h"
 | 
			
		||||
#include "../media/media.h"
 | 
			
		||||
#include "../arc/arc.h"
 | 
			
		||||
 | 
			
		||||
#include <stdlib.h>
 | 
			
		||||
#include <archive.h>
 | 
			
		||||
 | 
			
		||||
static scan_arc_ctx_t arc_ctx = (scan_arc_ctx_t) {.passphrase = {0,}};
 | 
			
		||||
 | 
			
		||||
void parse_comic(scan_comic_ctx_t *ctx, vfile_t *f, document_t *doc) {
 | 
			
		||||
    struct archive *a = NULL;
 | 
			
		||||
    struct archive_entry *entry = NULL;
 | 
			
		||||
    arc_data_t arc_data;
 | 
			
		||||
 | 
			
		||||
    if (ctx->tn_size <= 0) {
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    int ret = arc_open(&arc_ctx, f, &a, &arc_data, TRUE);
 | 
			
		||||
    if (ret != ARCHIVE_OK) {
 | 
			
		||||
        CTX_LOG_ERRORF(f->filepath, "(cbr.c) [%d] %s", ret, archive_error_string(a))
 | 
			
		||||
        archive_read_free(a);
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    while (archive_read_next_header(a, &entry) == ARCHIVE_OK) {
 | 
			
		||||
        struct stat info = *archive_entry_stat(entry);
 | 
			
		||||
        if (S_ISREG(info.st_mode)) {
 | 
			
		||||
            const char *utf8_name = archive_entry_pathname_utf8(entry);
 | 
			
		||||
            const char *file_path = utf8_name == NULL ? archive_entry_pathname(entry) : utf8_name;
 | 
			
		||||
 | 
			
		||||
            char *p = strrchr(file_path, '.');
 | 
			
		||||
            if (p != NULL && (strcmp(p, ".png") == 0 || strcmp(p, ".jpg") == 0 || strcmp(p, ".jpeg") == 0)) {
 | 
			
		||||
                size_t entry_size = archive_entry_size(entry);
 | 
			
		||||
                void *buf = malloc(entry_size);
 | 
			
		||||
                size_t read = archive_read_data(a, buf, entry_size);
 | 
			
		||||
 | 
			
		||||
                if (read != entry_size) {
 | 
			
		||||
                    const char *err_str = archive_error_string(a);
 | 
			
		||||
                    if (err_str) {
 | 
			
		||||
                        CTX_LOG_ERRORF("comic.c", "Error while reading entry: %s", err_str)
 | 
			
		||||
                    }
 | 
			
		||||
                    free(buf);
 | 
			
		||||
                    break;
 | 
			
		||||
                }
 | 
			
		||||
 | 
			
		||||
                ret = store_image_thumbnail((scan_media_ctx_t *) ctx, buf, entry_size, doc, file_path);
 | 
			
		||||
                free(buf);
 | 
			
		||||
 | 
			
		||||
                if (ret == TRUE) {
 | 
			
		||||
                    break;
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    archive_read_free(a);
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										31
									
								
								third-party/libscan/libscan/comic/comic.h
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										31
									
								
								third-party/libscan/libscan/comic/comic.h
									
									
									
									
										vendored
									
									
								
							@ -1,31 +0,0 @@
 | 
			
		||||
#ifndef SCAN_CBR_H
 | 
			
		||||
#define SCAN_CBR_H
 | 
			
		||||
 | 
			
		||||
#include <stdlib.h>
 | 
			
		||||
#include "../ebook/ebook.h"
 | 
			
		||||
 | 
			
		||||
typedef struct {
 | 
			
		||||
    log_callback_t log;
 | 
			
		||||
    logf_callback_t logf;
 | 
			
		||||
    store_callback_t store;
 | 
			
		||||
 | 
			
		||||
    int tn_size;
 | 
			
		||||
    float tn_qscale;
 | 
			
		||||
 | 
			
		||||
    unsigned int cbr_mime;
 | 
			
		||||
    unsigned int cbz_mime;
 | 
			
		||||
} scan_comic_ctx_t;
 | 
			
		||||
 | 
			
		||||
__always_inline
 | 
			
		||||
static int is_cbr(scan_comic_ctx_t *ctx, unsigned int mime) {
 | 
			
		||||
    return mime == ctx->cbr_mime;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
__always_inline
 | 
			
		||||
static int is_cbz(scan_comic_ctx_t *ctx, unsigned int mime) {
 | 
			
		||||
    return mime == ctx->cbz_mime;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void parse_comic(scan_comic_ctx_t *ctx, vfile_t *f, document_t *doc);
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
							
								
								
									
										495
									
								
								third-party/libscan/libscan/ebook/ebook.c
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										495
									
								
								third-party/libscan/libscan/ebook/ebook.c
									
									
									
									
										vendored
									
									
								
							@ -1,495 +0,0 @@
 | 
			
		||||
#include "ebook.h"
 | 
			
		||||
#include <mupdf/fitz.h>
 | 
			
		||||
#include <pthread.h>
 | 
			
		||||
#include <tesseract/capi.h>
 | 
			
		||||
 | 
			
		||||
#include "../media/media.h"
 | 
			
		||||
#include "../arc/arc.h"
 | 
			
		||||
 | 
			
		||||
#define MIN_OCR_SIZE 350
 | 
			
		||||
#define MIN_OCR_LEN 10
 | 
			
		||||
 | 
			
		||||
/* fill_image callback doesn't let us pass opaque pointers unless I create my own device */
 | 
			
		||||
__thread text_buffer_t thread_buffer;
 | 
			
		||||
__thread scan_ebook_ctx_t thread_ctx;
 | 
			
		||||
 | 
			
		||||
pthread_mutex_t Mutex;
 | 
			
		||||
 | 
			
		||||
static void my_fz_lock(UNUSED(void *user), int lock) {
 | 
			
		||||
    if (lock == FZ_LOCK_FREETYPE) {
 | 
			
		||||
        pthread_mutex_lock(&Mutex);
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void my_fz_unlock(UNUSED(void *user), int lock) {
 | 
			
		||||
    if (lock == FZ_LOCK_FREETYPE) {
 | 
			
		||||
        pthread_mutex_unlock(&Mutex);
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
int pixmap_is_blank(const fz_pixmap *pixmap) {
 | 
			
		||||
    int pixmap_size = pixmap->n * pixmap->w * pixmap->h;
 | 
			
		||||
    const int pixel0 = pixmap->samples[0];
 | 
			
		||||
    for (int i = 0; i < pixmap_size; i++) {
 | 
			
		||||
        if (pixmap->samples[i] != pixel0) {
 | 
			
		||||
            return FALSE;
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
    return TRUE;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
fz_pixmap *
 | 
			
		||||
load_pixmap(scan_ebook_ctx_t *ctx, int page, fz_context *fzctx, fz_document *fzdoc, document_t *doc, fz_page **cover) {
 | 
			
		||||
 | 
			
		||||
    int err = 0;
 | 
			
		||||
 | 
			
		||||
    fz_var(cover);
 | 
			
		||||
    fz_var(err);
 | 
			
		||||
    fz_try(fzctx)*cover = fz_load_page(fzctx, fzdoc, page);
 | 
			
		||||
    fz_catch(fzctx)err = 1;
 | 
			
		||||
 | 
			
		||||
    if (err != 0) {
 | 
			
		||||
        CTX_LOG_WARNINGF(doc->filepath, "fz_load_page() returned error code [%d] %s", err, fzctx->error.message)
 | 
			
		||||
        return NULL;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    fz_rect bounds = fz_bound_page(fzctx, *cover);
 | 
			
		||||
 | 
			
		||||
    float scale;
 | 
			
		||||
    float w = bounds.x1 - bounds.x0;
 | 
			
		||||
    float h = bounds.y1 - bounds.y0;
 | 
			
		||||
    if (w > h) {
 | 
			
		||||
        scale = (float) ctx->tn_size / w;
 | 
			
		||||
    } else {
 | 
			
		||||
        scale = (float) ctx->tn_size / h;
 | 
			
		||||
    }
 | 
			
		||||
    fz_matrix m = fz_scale(scale, scale);
 | 
			
		||||
 | 
			
		||||
    bounds = fz_transform_rect(bounds, m);
 | 
			
		||||
    fz_irect bbox = fz_round_rect(bounds);
 | 
			
		||||
    fz_pixmap *pixmap = fz_new_pixmap_with_bbox(fzctx, fz_device_rgb(fzctx), bbox, NULL, 0);
 | 
			
		||||
 | 
			
		||||
    fz_clear_pixmap_with_value(fzctx, pixmap, 0xFF);
 | 
			
		||||
    fz_device *dev = fz_new_draw_device(fzctx, m, pixmap);
 | 
			
		||||
 | 
			
		||||
    fz_var(err);
 | 
			
		||||
    fz_try(fzctx) {
 | 
			
		||||
                fz_run_page(fzctx, *cover, dev, fz_identity, NULL);
 | 
			
		||||
            } fz_always(fzctx) {
 | 
			
		||||
            fz_close_device(fzctx, dev);
 | 
			
		||||
            fz_drop_device(fzctx, dev);
 | 
			
		||||
        } fz_catch(fzctx)err = fzctx->error.errcode;
 | 
			
		||||
 | 
			
		||||
    if (err != 0) {
 | 
			
		||||
        CTX_LOG_WARNINGF(doc->filepath, "fz_run_page() returned error code [%d] %s", err, fzctx->error.message)
 | 
			
		||||
        fz_drop_page(fzctx, *cover);
 | 
			
		||||
        fz_drop_pixmap(fzctx, pixmap);
 | 
			
		||||
        return NULL;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if (pixmap->n != 3) {
 | 
			
		||||
        CTX_LOG_ERRORF(doc->filepath, "Got unexpected pixmap depth: %d", pixmap->n)
 | 
			
		||||
        fz_drop_page(fzctx, *cover);
 | 
			
		||||
        fz_drop_pixmap(fzctx, pixmap);
 | 
			
		||||
        return NULL;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return pixmap;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int render_cover(scan_ebook_ctx_t *ctx, fz_context *fzctx, document_t *doc, fz_document *fzdoc) {
 | 
			
		||||
 | 
			
		||||
    fz_page *cover = NULL;
 | 
			
		||||
    fz_pixmap *pixmap = load_pixmap(ctx, 0, fzctx, fzdoc, doc, &cover);
 | 
			
		||||
    if (pixmap == NULL) {
 | 
			
		||||
        return FALSE;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if (pixmap_is_blank(pixmap)) {
 | 
			
		||||
        fz_drop_page(fzctx, cover);
 | 
			
		||||
        fz_drop_pixmap(fzctx, pixmap);
 | 
			
		||||
        CTX_LOG_DEBUG(doc->filepath, "Cover page is blank, using page 1 instead")
 | 
			
		||||
        pixmap = load_pixmap(ctx, 1, fzctx, fzdoc, doc, &cover);
 | 
			
		||||
        if (pixmap == NULL) {
 | 
			
		||||
            return FALSE;
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // RGB24 -> YUV420p
 | 
			
		||||
    AVFrame *scaled_frame = av_frame_alloc();
 | 
			
		||||
 | 
			
		||||
    struct SwsContext *sws_ctx = sws_getContext(
 | 
			
		||||
            pixmap->w, pixmap->h, AV_PIX_FMT_RGB24,
 | 
			
		||||
            pixmap->w, pixmap->h, AV_PIX_FMT_YUV420P,
 | 
			
		||||
            SIST_SWS_ALGO, 0, 0, 0
 | 
			
		||||
    );
 | 
			
		||||
 | 
			
		||||
    int dst_buf_len = av_image_get_buffer_size(AV_PIX_FMT_YUV420P, pixmap->w, pixmap->h, 1);
 | 
			
		||||
    uint8_t *dst_buf = (uint8_t *) av_malloc(dst_buf_len);
 | 
			
		||||
 | 
			
		||||
    av_image_fill_arrays(scaled_frame->data, scaled_frame->linesize, dst_buf, AV_PIX_FMT_YUV420P, pixmap->w, pixmap->h,
 | 
			
		||||
                         1);
 | 
			
		||||
 | 
			
		||||
    unsigned char *samples = calloc(1, 1024 * 1024 * 1024);
 | 
			
		||||
    memcpy(samples, pixmap->samples, pixmap->stride * pixmap->h);
 | 
			
		||||
 | 
			
		||||
    const uint8_t *in_data[1] = {samples,};
 | 
			
		||||
    int in_line_size[1] = {(int) pixmap->stride};
 | 
			
		||||
 | 
			
		||||
    sws_scale(sws_ctx,
 | 
			
		||||
              in_data, in_line_size,
 | 
			
		||||
              0, pixmap->h,
 | 
			
		||||
              scaled_frame->data, scaled_frame->linesize
 | 
			
		||||
    );
 | 
			
		||||
 | 
			
		||||
    scaled_frame->width = pixmap->w;
 | 
			
		||||
    scaled_frame->height = pixmap->h;
 | 
			
		||||
    scaled_frame->format = AV_PIX_FMT_YUV420P;
 | 
			
		||||
 | 
			
		||||
    sws_freeContext(sws_ctx);
 | 
			
		||||
 | 
			
		||||
    // YUV420p -> JPEG
 | 
			
		||||
    AVCodecContext *jpeg_encoder = alloc_jpeg_encoder(pixmap->w, pixmap->h, ctx->tn_qscale);
 | 
			
		||||
    avcodec_send_frame(jpeg_encoder, scaled_frame);
 | 
			
		||||
 | 
			
		||||
    AVPacket jpeg_packet;
 | 
			
		||||
    av_init_packet(&jpeg_packet);
 | 
			
		||||
    avcodec_receive_packet(jpeg_encoder, &jpeg_packet);
 | 
			
		||||
 | 
			
		||||
    APPEND_TN_META(doc, pixmap->w, pixmap->h)
 | 
			
		||||
    ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) jpeg_packet.data, jpeg_packet.size);
 | 
			
		||||
 | 
			
		||||
    free(samples);
 | 
			
		||||
    av_packet_unref(&jpeg_packet);
 | 
			
		||||
    av_free(*scaled_frame->data);
 | 
			
		||||
    av_frame_free(&scaled_frame);
 | 
			
		||||
    avcodec_free_context(&jpeg_encoder);
 | 
			
		||||
 | 
			
		||||
    fz_drop_pixmap(fzctx, pixmap);
 | 
			
		||||
    fz_drop_page(fzctx, cover);
 | 
			
		||||
 | 
			
		||||
    return TRUE;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void fz_err_callback(void *user, const char *message) {
 | 
			
		||||
    document_t *doc = (document_t *) user;
 | 
			
		||||
 | 
			
		||||
    const scan_ebook_ctx_t *ctx = &thread_ctx;
 | 
			
		||||
    CTX_LOG_WARNINGF(doc->filepath, "FZ: %s", message)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void fz_warn_callback(void *user, const char *message) {
 | 
			
		||||
    document_t *doc = (document_t *) user;
 | 
			
		||||
 | 
			
		||||
    const scan_ebook_ctx_t *ctx = &thread_ctx;
 | 
			
		||||
    CTX_LOG_DEBUGF(doc->filepath, "FZ: %s", message)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void init_fzctx(fz_context *fzctx, document_t *doc) {
 | 
			
		||||
    fz_register_document_handlers(fzctx);
 | 
			
		||||
 | 
			
		||||
    static int mu_is_initialized = FALSE;
 | 
			
		||||
    if (!mu_is_initialized) {
 | 
			
		||||
        pthread_mutex_init(&Mutex, NULL);
 | 
			
		||||
        mu_is_initialized = TRUE;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    fzctx->warn.print_user = doc;
 | 
			
		||||
    fzctx->warn.print = fz_warn_callback;
 | 
			
		||||
    fzctx->error.print_user = doc;
 | 
			
		||||
    fzctx->error.print = fz_err_callback;
 | 
			
		||||
 | 
			
		||||
    fzctx->locks.lock = my_fz_lock;
 | 
			
		||||
    fzctx->locks.unlock = my_fz_unlock;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int read_stext_block(fz_stext_block *block, text_buffer_t *tex) {
 | 
			
		||||
    if (block->type != FZ_STEXT_BLOCK_TEXT) {
 | 
			
		||||
        return 0;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    fz_stext_line *line = block->u.t.first_line;
 | 
			
		||||
    while (line != NULL) {
 | 
			
		||||
        text_buffer_append_char(tex, ' ');
 | 
			
		||||
        fz_stext_char *c = line->first_char;
 | 
			
		||||
        while (c != NULL) {
 | 
			
		||||
            if (text_buffer_append_char(tex, c->c) == TEXT_BUF_FULL) {
 | 
			
		||||
                return TEXT_BUF_FULL;
 | 
			
		||||
            }
 | 
			
		||||
            c = c->next;
 | 
			
		||||
        }
 | 
			
		||||
        line = line->next;
 | 
			
		||||
    }
 | 
			
		||||
    text_buffer_append_char(tex, ' ');
 | 
			
		||||
    return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#define IS_VALID_BPP(d) ((d)==1 || (d)==2 || (d)==4 || (d)==8 || (d)==16 || (d)==24 || (d)==32)
 | 
			
		||||
 | 
			
		||||
void fill_image(fz_context *fzctx, UNUSED(fz_device *dev),
 | 
			
		||||
                fz_image *img, UNUSED(fz_matrix ctm), UNUSED(float alpha),
 | 
			
		||||
                UNUSED(fz_color_params color_params)) {
 | 
			
		||||
 | 
			
		||||
    int l2factor = 0;
 | 
			
		||||
 | 
			
		||||
    if (img->w > MIN_OCR_SIZE && img->h > MIN_OCR_SIZE && IS_VALID_BPP(img->n)) {
 | 
			
		||||
 | 
			
		||||
        fz_pixmap *pix = img->get_pixmap(fzctx, img, NULL, img->w, img->h, &l2factor);
 | 
			
		||||
 | 
			
		||||
        if (pix->h > MIN_OCR_SIZE && img->h > MIN_OCR_SIZE && img->xres != 0) {
 | 
			
		||||
            TessBaseAPI *api = TessBaseAPICreate();
 | 
			
		||||
            TessBaseAPIInit3(api, thread_ctx.tesseract_path, thread_ctx.tesseract_lang);
 | 
			
		||||
 | 
			
		||||
            TessBaseAPISetImage(api, pix->samples, pix->w, pix->h, pix->n, pix->stride);
 | 
			
		||||
            TessBaseAPISetSourceResolution(api, pix->xres);
 | 
			
		||||
 | 
			
		||||
            char *text = TessBaseAPIGetUTF8Text(api);
 | 
			
		||||
            size_t len = strlen(text);
 | 
			
		||||
            if (len >= MIN_OCR_LEN) {
 | 
			
		||||
                text_buffer_append_string(&thread_buffer, text, len - 1);
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            TessBaseAPIEnd(api);
 | 
			
		||||
            TessBaseAPIDelete(api);
 | 
			
		||||
        }
 | 
			
		||||
        fz_drop_pixmap(fzctx, pix);
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void
 | 
			
		||||
parse_ebook_mem(scan_ebook_ctx_t *ctx, void *buf, size_t buf_len, const char *mime_str, document_t *doc, int tn_only) {
 | 
			
		||||
 | 
			
		||||
    fz_context *fzctx = fz_new_context(NULL, NULL, FZ_STORE_DEFAULT);
 | 
			
		||||
    thread_ctx = *ctx;
 | 
			
		||||
 | 
			
		||||
    init_fzctx(fzctx, doc);
 | 
			
		||||
 | 
			
		||||
    int err = 0;
 | 
			
		||||
 | 
			
		||||
    fz_document *fzdoc = NULL;
 | 
			
		||||
    fz_stream *stream = NULL;
 | 
			
		||||
    fz_var(fzdoc);
 | 
			
		||||
    fz_var(stream);
 | 
			
		||||
    fz_var(err);
 | 
			
		||||
 | 
			
		||||
    fz_try(fzctx) {
 | 
			
		||||
                stream = fz_open_memory(fzctx, buf, buf_len);
 | 
			
		||||
                fzdoc = fz_open_document_with_stream(fzctx, mime_str, stream);
 | 
			
		||||
            } fz_catch(fzctx)err = fzctx->error.errcode;
 | 
			
		||||
 | 
			
		||||
    if (err != 0) {
 | 
			
		||||
        fz_drop_stream(fzctx, stream);
 | 
			
		||||
        fz_drop_document(fzctx, fzdoc);
 | 
			
		||||
        fz_drop_context(fzctx);
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    int page_count = -1;
 | 
			
		||||
    fz_var(err);
 | 
			
		||||
    fz_try(fzctx)page_count = fz_count_pages(fzctx, fzdoc);
 | 
			
		||||
    fz_catch(fzctx)err = fzctx->error.errcode;
 | 
			
		||||
 | 
			
		||||
    if (err) {
 | 
			
		||||
        CTX_LOG_WARNINGF(doc->filepath, "fz_count_pages() returned error code [%d] %s", err, fzctx->error.message)
 | 
			
		||||
        fz_drop_stream(fzctx, stream);
 | 
			
		||||
        fz_drop_document(fzctx, fzdoc);
 | 
			
		||||
        fz_drop_context(fzctx);
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    APPEND_LONG_META(doc, MetaPages, page_count)
 | 
			
		||||
 | 
			
		||||
    if (ctx->tn_size > 0) {
 | 
			
		||||
        if (render_cover(ctx, fzctx, doc, fzdoc) == FALSE) {
 | 
			
		||||
            fz_drop_stream(fzctx, stream);
 | 
			
		||||
            fz_drop_document(fzctx, fzdoc);
 | 
			
		||||
            fz_drop_context(fzctx);
 | 
			
		||||
            return;
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if (tn_only) {
 | 
			
		||||
        fz_drop_stream(fzctx, stream);
 | 
			
		||||
        fz_drop_document(fzctx, fzdoc);
 | 
			
		||||
        fz_drop_context(fzctx);
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    char title[8192] = {'\0',};
 | 
			
		||||
    fz_try(fzctx)fz_lookup_metadata(fzctx, fzdoc, FZ_META_INFO_TITLE, title, sizeof(title));
 | 
			
		||||
    fz_catch(fzctx);
 | 
			
		||||
 | 
			
		||||
    if (strlen(title) > 0) {
 | 
			
		||||
        APPEND_UTF8_META(doc, MetaTitle, title)
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    char author[4096] = {'\0',};
 | 
			
		||||
    fz_try(fzctx)fz_lookup_metadata(fzctx, fzdoc, FZ_META_INFO_AUTHOR, author, sizeof(author));
 | 
			
		||||
    fz_catch(fzctx);
 | 
			
		||||
 | 
			
		||||
    if (strlen(author) > 0) {
 | 
			
		||||
        APPEND_UTF8_META(doc, MetaAuthor, author)
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    if (ctx->content_size > 0) {
 | 
			
		||||
        fz_stext_options opts = {0};
 | 
			
		||||
        thread_buffer = text_buffer_create(ctx->content_size);
 | 
			
		||||
 | 
			
		||||
        for (int current_page = 0; current_page < page_count; current_page++) {
 | 
			
		||||
            fz_page *page = NULL;
 | 
			
		||||
            fz_var(err);
 | 
			
		||||
            fz_try(fzctx)page = fz_load_page(fzctx, fzdoc, current_page);
 | 
			
		||||
            fz_catch(fzctx)err = fzctx->error.errcode;
 | 
			
		||||
            if (err != 0) {
 | 
			
		||||
                CTX_LOG_WARNINGF(doc->filepath, "fz_load_page() returned error code [%d] %s", err, fzctx->error.message)
 | 
			
		||||
                text_buffer_destroy(&thread_buffer);
 | 
			
		||||
                fz_drop_page(fzctx, page);
 | 
			
		||||
                fz_drop_stream(fzctx, stream);
 | 
			
		||||
                fz_drop_document(fzctx, fzdoc);
 | 
			
		||||
                fz_drop_context(fzctx);
 | 
			
		||||
                return;
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            fz_stext_page *stext = fz_new_stext_page(fzctx, fz_bound_page(fzctx, page));
 | 
			
		||||
            fz_device *dev = fz_new_stext_device(fzctx, stext, &opts);
 | 
			
		||||
            dev->stroke_path = NULL;
 | 
			
		||||
            dev->stroke_text = NULL;
 | 
			
		||||
            dev->clip_text = NULL;
 | 
			
		||||
            dev->clip_stroke_path = NULL;
 | 
			
		||||
            dev->clip_stroke_text = NULL;
 | 
			
		||||
 | 
			
		||||
            if (ctx->tesseract_lang != NULL) {
 | 
			
		||||
                dev->fill_image = fill_image;
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            fz_var(err);
 | 
			
		||||
            fz_try(fzctx)fz_run_page(fzctx, page, dev, fz_identity, NULL);
 | 
			
		||||
            fz_always(fzctx) {
 | 
			
		||||
                    fz_close_device(fzctx, dev);
 | 
			
		||||
                    fz_drop_device(fzctx, dev);
 | 
			
		||||
                } fz_catch(fzctx)err = fzctx->error.errcode;
 | 
			
		||||
 | 
			
		||||
            if (err != 0) {
 | 
			
		||||
                CTX_LOG_WARNINGF(doc->filepath, "fz_run_page() returned error code [%d] %s", err, fzctx->error.message)
 | 
			
		||||
                text_buffer_destroy(&thread_buffer);
 | 
			
		||||
                fz_drop_page(fzctx, page);
 | 
			
		||||
                fz_drop_stext_page(fzctx, stext);
 | 
			
		||||
                fz_drop_stream(fzctx, stream);
 | 
			
		||||
                fz_drop_document(fzctx, fzdoc);
 | 
			
		||||
                fz_drop_context(fzctx);
 | 
			
		||||
                return;
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            fz_stext_block *block = stext->first_block;
 | 
			
		||||
            while (block != NULL) {
 | 
			
		||||
                int ret = read_stext_block(block, &thread_buffer);
 | 
			
		||||
                if (ret == TEXT_BUF_FULL) {
 | 
			
		||||
                    break;
 | 
			
		||||
                }
 | 
			
		||||
                block = block->next;
 | 
			
		||||
            }
 | 
			
		||||
            fz_drop_stext_page(fzctx, stext);
 | 
			
		||||
            fz_drop_page(fzctx, page);
 | 
			
		||||
 | 
			
		||||
            if (thread_buffer.dyn_buffer.cur >= ctx->content_size) {
 | 
			
		||||
                break;
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
        text_buffer_terminate_string(&thread_buffer);
 | 
			
		||||
 | 
			
		||||
        meta_line_t *meta_content = malloc(sizeof(meta_line_t) + thread_buffer.dyn_buffer.cur);
 | 
			
		||||
        meta_content->key = MetaContent;
 | 
			
		||||
        memcpy(meta_content->str_val, thread_buffer.dyn_buffer.buf, thread_buffer.dyn_buffer.cur);
 | 
			
		||||
        APPEND_META(doc, meta_content)
 | 
			
		||||
 | 
			
		||||
        text_buffer_destroy(&thread_buffer);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    fz_drop_stream(fzctx, stream);
 | 
			
		||||
    fz_drop_document(fzctx, fzdoc);
 | 
			
		||||
    fz_drop_context(fzctx);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static scan_arc_ctx_t arc_ctx = (scan_arc_ctx_t) {.passphrase = {0,}};
 | 
			
		||||
 | 
			
		||||
void parse_epub_fast(scan_ebook_ctx_t *ctx, vfile_t *f, document_t *doc) {
 | 
			
		||||
    struct archive *a = NULL;
 | 
			
		||||
    struct archive_entry *entry = NULL;
 | 
			
		||||
    arc_data_t arc_data;
 | 
			
		||||
 | 
			
		||||
    text_buffer_t content_buffer = text_buffer_create(ctx->content_size);
 | 
			
		||||
 | 
			
		||||
    if (ctx->tn_size <= 0) {
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    int ret = arc_open(&arc_ctx, f, &a, &arc_data, TRUE);
 | 
			
		||||
    if (ret != ARCHIVE_OK) {
 | 
			
		||||
        CTX_LOG_ERRORF(f->filepath, "(ebook.c) [%d] %s", ret, archive_error_string(a))
 | 
			
		||||
        archive_read_free(a);
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    while (archive_read_next_header(a, &entry) == ARCHIVE_OK) {
 | 
			
		||||
        struct stat info = *archive_entry_stat(entry);
 | 
			
		||||
        if (S_ISREG(info.st_mode)) {
 | 
			
		||||
            const char *utf8_name = archive_entry_pathname_utf8(entry);
 | 
			
		||||
            const char *file_path = utf8_name == NULL ? archive_entry_pathname(entry) : utf8_name;
 | 
			
		||||
 | 
			
		||||
            char *p = strrchr(file_path, '.');
 | 
			
		||||
            if (p != NULL && (strcmp(p, ".html") == 0 || (strcmp(p, ".xhtml") == 0))) {
 | 
			
		||||
                size_t entry_size = archive_entry_size(entry);
 | 
			
		||||
                void *buf = malloc(entry_size + 1);
 | 
			
		||||
                size_t read = archive_read_data(a, buf, entry_size);
 | 
			
		||||
                *(char *) (buf + entry_size) = '\0';
 | 
			
		||||
 | 
			
		||||
                if (read != entry_size) {
 | 
			
		||||
                    const char *err_str = archive_error_string(a);
 | 
			
		||||
                    if (err_str) {
 | 
			
		||||
                        CTX_LOG_ERRORF("ebook.c", "Error while reading entry: %s", err_str)
 | 
			
		||||
                    }
 | 
			
		||||
                    free(buf);
 | 
			
		||||
                    break;
 | 
			
		||||
                }
 | 
			
		||||
 | 
			
		||||
                ret = text_buffer_append_markup(&content_buffer, buf);
 | 
			
		||||
                free(buf);
 | 
			
		||||
 | 
			
		||||
                if (ret == TEXT_BUF_FULL) {
 | 
			
		||||
                    break;
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    text_buffer_terminate_string(&content_buffer);
 | 
			
		||||
 | 
			
		||||
    meta_line_t *meta_content = malloc(sizeof(meta_line_t) + content_buffer.dyn_buffer.cur);
 | 
			
		||||
    meta_content->key = MetaContent;
 | 
			
		||||
    memcpy(meta_content->str_val, content_buffer.dyn_buffer.buf, content_buffer.dyn_buffer.cur);
 | 
			
		||||
    APPEND_META(doc, meta_content)
 | 
			
		||||
 | 
			
		||||
    text_buffer_destroy(&content_buffer);
 | 
			
		||||
 | 
			
		||||
    archive_read_free(a);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void parse_ebook(scan_ebook_ctx_t *ctx, vfile_t *f, const char *mime_str, document_t *doc) {
 | 
			
		||||
 | 
			
		||||
    if (ctx->fast_epub_parse && is_epub(mime_str)) {
 | 
			
		||||
        parse_epub_fast(ctx, f, doc);
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    size_t buf_len;
 | 
			
		||||
    void *buf = read_all(f, &buf_len);
 | 
			
		||||
    if (buf == NULL) {
 | 
			
		||||
        CTX_LOG_ERROR(f->filepath, "read_all() failed")
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    parse_ebook_mem(ctx, buf, buf_len, mime_str, doc, FALSE);
 | 
			
		||||
    free(buf);
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										30
									
								
								third-party/libscan/libscan/ebook/ebook.h
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										30
									
								
								third-party/libscan/libscan/ebook/ebook.h
									
									
									
									
										vendored
									
									
								
							@ -1,30 +0,0 @@
 | 
			
		||||
#ifndef SCAN_EBOOK_H
 | 
			
		||||
#define SCAN_EBOOK_H
 | 
			
		||||
 | 
			
		||||
#include "../scan.h"
 | 
			
		||||
 | 
			
		||||
typedef struct {
 | 
			
		||||
    long content_size;
 | 
			
		||||
    int tn_size;
 | 
			
		||||
    const char *tesseract_lang;
 | 
			
		||||
    const char *tesseract_path;
 | 
			
		||||
    pthread_mutex_t mupdf_mutex;
 | 
			
		||||
 | 
			
		||||
    log_callback_t log;
 | 
			
		||||
    logf_callback_t logf;
 | 
			
		||||
    store_callback_t store;
 | 
			
		||||
    int fast_epub_parse;
 | 
			
		||||
    float tn_qscale;
 | 
			
		||||
} scan_ebook_ctx_t;
 | 
			
		||||
 | 
			
		||||
void parse_ebook(scan_ebook_ctx_t *ctx, vfile_t *f, const char *mime_str, document_t *doc);
 | 
			
		||||
 | 
			
		||||
void
 | 
			
		||||
parse_ebook_mem(scan_ebook_ctx_t *ctx, void *buf, size_t buf_len, const char *mime_str, document_t *doc, int tn_only);
 | 
			
		||||
 | 
			
		||||
__always_inline
 | 
			
		||||
static int is_epub(const char *mime_string) {
 | 
			
		||||
    return strcmp(mime_string, "application/epub+zip") == 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
							
								
								
									
										246
									
								
								third-party/libscan/libscan/font/font.c
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										246
									
								
								third-party/libscan/libscan/font/font.c
									
									
									
									
										vendored
									
									
								
							@ -1,246 +0,0 @@
 | 
			
		||||
#include "font.h"
 | 
			
		||||
 | 
			
		||||
#include <ft2build.h>
 | 
			
		||||
#include <freetype/freetype.h>
 | 
			
		||||
#include "../util.h"
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
__thread FT_Library ft_lib = NULL;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
typedef struct text_dimensions {
 | 
			
		||||
    unsigned int width;
 | 
			
		||||
    unsigned int height;
 | 
			
		||||
    unsigned int baseline;
 | 
			
		||||
} text_dimensions_t;
 | 
			
		||||
 | 
			
		||||
typedef struct glyph {
 | 
			
		||||
    int top;
 | 
			
		||||
    int height;
 | 
			
		||||
    int width;
 | 
			
		||||
    int descent;
 | 
			
		||||
    int ascent;
 | 
			
		||||
    int advance_width;
 | 
			
		||||
    unsigned char *pixmap;
 | 
			
		||||
} glyph_t;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
__always_inline
 | 
			
		||||
int kerning_offset(char c, char pc, FT_Face face) {
 | 
			
		||||
    FT_Vector kerning;
 | 
			
		||||
    FT_Get_Kerning(face, c, pc, FT_KERNING_DEFAULT, &kerning);
 | 
			
		||||
 | 
			
		||||
    return (int) (kerning.x / 64);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
__always_inline
 | 
			
		||||
glyph_t ft_glyph_to_glyph(FT_GlyphSlot slot) {
 | 
			
		||||
    glyph_t glyph;
 | 
			
		||||
 | 
			
		||||
    glyph.pixmap = slot->bitmap.buffer;
 | 
			
		||||
 | 
			
		||||
    glyph.width = (int) slot->bitmap.width;
 | 
			
		||||
    glyph.height = (int) slot->bitmap.rows;
 | 
			
		||||
    glyph.top = slot->bitmap_top;
 | 
			
		||||
    glyph.advance_width = (int) slot->advance.x / 64;
 | 
			
		||||
 | 
			
		||||
    glyph.descent = MAX(0, glyph.height - glyph.top);
 | 
			
		||||
    glyph.ascent = MAX(0, MAX(glyph.top, glyph.height) - glyph.descent);
 | 
			
		||||
 | 
			
		||||
    return glyph;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
text_dimensions_t text_dimension(char *text, FT_Face face) {
 | 
			
		||||
    text_dimensions_t dimensions;
 | 
			
		||||
 | 
			
		||||
    dimensions.width = 0;
 | 
			
		||||
 | 
			
		||||
    int num_chars = (int) strlen(text);
 | 
			
		||||
 | 
			
		||||
    unsigned int max_ascent = 0;
 | 
			
		||||
    int max_descent = 0;
 | 
			
		||||
 | 
			
		||||
    char pc = 0;
 | 
			
		||||
    for (int i = 0; i < num_chars; i++) {
 | 
			
		||||
        char c = text[i];
 | 
			
		||||
 | 
			
		||||
        FT_Load_Char(face, c, 0);
 | 
			
		||||
        glyph_t glyph = ft_glyph_to_glyph(face->glyph);
 | 
			
		||||
 | 
			
		||||
        max_descent = MAX(max_descent, glyph.descent);
 | 
			
		||||
        max_ascent = MAX(max_ascent, MAX(glyph.height, glyph.ascent));
 | 
			
		||||
 | 
			
		||||
        int kerning_x = kerning_offset(c, pc, face);
 | 
			
		||||
        dimensions.width += MAX(glyph.advance_width, glyph.width) + kerning_x;
 | 
			
		||||
 | 
			
		||||
        pc = c;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    dimensions.height = max_ascent + max_descent;
 | 
			
		||||
    dimensions.baseline = max_descent;
 | 
			
		||||
 | 
			
		||||
    return dimensions;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void draw_glyph(glyph_t *glyph, int x, int y, struct text_dimensions text_info, unsigned char *bitmap) {
 | 
			
		||||
    unsigned int src = 0;
 | 
			
		||||
    unsigned int dst = y * text_info.width + x;
 | 
			
		||||
    unsigned int row_offset = text_info.width - glyph->width;
 | 
			
		||||
    unsigned int buf_len = text_info.width * text_info.height;
 | 
			
		||||
 | 
			
		||||
    for (unsigned int sy = 0; sy < glyph->height; sy++) {
 | 
			
		||||
        for (unsigned int sx = 0; sx < glyph->width; sx++) {
 | 
			
		||||
            if (dst < buf_len) {
 | 
			
		||||
                bitmap[dst] |= glyph->pixmap[src];
 | 
			
		||||
            }
 | 
			
		||||
            src++;
 | 
			
		||||
            dst++;
 | 
			
		||||
        }
 | 
			
		||||
        dst += row_offset;
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void bmp_format(dyn_buffer_t *buf, text_dimensions_t dimensions, const unsigned char *bitmap) {
 | 
			
		||||
 | 
			
		||||
    dyn_buffer_write_short(buf, 0x4D42); // Magic
 | 
			
		||||
    dyn_buffer_write_int(buf, 0); // Size placeholder
 | 
			
		||||
    dyn_buffer_write_int(buf, 0x5157); //Reserved
 | 
			
		||||
    dyn_buffer_write_int(buf, 14 + 40 + 256 * 4); // pixels offset
 | 
			
		||||
 | 
			
		||||
    dyn_buffer_write_int(buf, 40); // DIB size
 | 
			
		||||
    dyn_buffer_write_int(buf, (int) dimensions.width);
 | 
			
		||||
    dyn_buffer_write_int(buf, (int) dimensions.height);
 | 
			
		||||
    dyn_buffer_write_short(buf, 1); // Color planes
 | 
			
		||||
    dyn_buffer_write_short(buf, 8); // bits per pixel
 | 
			
		||||
    dyn_buffer_write_int(buf, 0); // compression
 | 
			
		||||
    dyn_buffer_write_int(buf, 0); // Ignored
 | 
			
		||||
    dyn_buffer_write_int(buf, 3800); // hres
 | 
			
		||||
    dyn_buffer_write_int(buf, 3800); // vres
 | 
			
		||||
    dyn_buffer_write_int(buf, 256); // Color count
 | 
			
		||||
    dyn_buffer_write_int(buf, 0); // Ignored
 | 
			
		||||
 | 
			
		||||
    // RGBA32 Color table (Grayscale)
 | 
			
		||||
    for (int i = 255; i >= 0; i--) {
 | 
			
		||||
        dyn_buffer_write_int(buf, i + (i << 8) + (i << 16));
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // Pixel array: write from bottom to top, with rows padded to multiples of 4-bytes
 | 
			
		||||
    for (int y = (int) dimensions.height - 1; y >= 0; y--) {
 | 
			
		||||
        for (unsigned int x = 0; x < dimensions.width; x++) {
 | 
			
		||||
            dyn_buffer_write_char(buf, (char) bitmap[y * dimensions.width + x]);
 | 
			
		||||
        }
 | 
			
		||||
        while (buf->cur % 4 != 0) {
 | 
			
		||||
            dyn_buffer_write_char(buf, 0);
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // Size
 | 
			
		||||
    *(int *) ((char *) buf->buf + 2) = buf->cur;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void parse_font(scan_font_ctx_t *ctx, vfile_t *f, document_t *doc) {
 | 
			
		||||
    if (ft_lib == NULL) {
 | 
			
		||||
        FT_Init_FreeType(&ft_lib);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    size_t buf_len = 0;
 | 
			
		||||
    void *buf = read_all(f, &buf_len);
 | 
			
		||||
    if (buf == NULL) {
 | 
			
		||||
        CTX_LOG_ERROR(f->filepath, "read_all() failed")
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    FT_Face face;
 | 
			
		||||
    FT_Error err = FT_New_Memory_Face(ft_lib, (unsigned char *) buf, (int) buf_len, 0, &face);
 | 
			
		||||
    if (err != 0) {
 | 
			
		||||
        CTX_LOG_ERRORF(doc->filepath, "(font.c) FT_New_Memory_Face() returned error code [%d] %s", err,
 | 
			
		||||
                       FT_Error_String(err))
 | 
			
		||||
        free(buf);
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    char font_name[4096];
 | 
			
		||||
 | 
			
		||||
    if (face->style_name == NULL || (strcmp(face->style_name, "?") == 0)) {
 | 
			
		||||
        if (face->family_name == NULL) {
 | 
			
		||||
            strcpy(font_name, "(null)");
 | 
			
		||||
        } else {
 | 
			
		||||
            strncpy(font_name, face->family_name, sizeof(font_name));
 | 
			
		||||
        }
 | 
			
		||||
    } else {
 | 
			
		||||
        snprintf(font_name, sizeof(font_name), "%s %s", face->family_name, face->style_name);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    meta_line_t *meta_name = malloc(sizeof(meta_line_t) + strlen(font_name));
 | 
			
		||||
    meta_name->key = MetaFontName;
 | 
			
		||||
    strcpy(meta_name->str_val, font_name);
 | 
			
		||||
    APPEND_META(doc, meta_name)
 | 
			
		||||
 | 
			
		||||
    if (ctx->enable_tn == TRUE) {
 | 
			
		||||
        FT_Done_Face(face);
 | 
			
		||||
        free(buf);
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    int pixel = 64;
 | 
			
		||||
    int num_chars = (int) strlen(font_name);
 | 
			
		||||
 | 
			
		||||
    err = FT_Set_Pixel_Sizes(face, 0, pixel);
 | 
			
		||||
    if (err != 0) {
 | 
			
		||||
        CTX_LOG_WARNINGF(doc->filepath, "(font.c) FT_Set_Pixel_Sizes() returned error code [%d] %s", err,
 | 
			
		||||
                         FT_Error_String(err))
 | 
			
		||||
        FT_Done_Face(face);
 | 
			
		||||
        free(buf);
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    text_dimensions_t dimensions = text_dimension(font_name, face);
 | 
			
		||||
    unsigned char *bitmap = calloc(dimensions.width * dimensions.height, 1);
 | 
			
		||||
 | 
			
		||||
    FT_Vector pen;
 | 
			
		||||
    pen.x = 0;
 | 
			
		||||
 | 
			
		||||
    char pc = 0;
 | 
			
		||||
    for (int i = 0; i < num_chars; i++) {
 | 
			
		||||
        char c = font_name[i];
 | 
			
		||||
 | 
			
		||||
        err = FT_Load_Char(face, c, FT_LOAD_NO_HINTING | FT_LOAD_RENDER);
 | 
			
		||||
        if (err != 0) {
 | 
			
		||||
            c = c >= 'a' && c <= 'z' ? c - 32 : c + 32;
 | 
			
		||||
            err = FT_Load_Char(face, c, FT_LOAD_NO_HINTING | FT_LOAD_RENDER);
 | 
			
		||||
            if (err != 0) {
 | 
			
		||||
                CTX_LOG_WARNINGF(doc->filepath, "(font.c) FT_Load_Char() returned error code [%d] %s", err,
 | 
			
		||||
                                 FT_Error_String(err))
 | 
			
		||||
                continue;
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
        glyph_t glyph = ft_glyph_to_glyph(face->glyph);
 | 
			
		||||
 | 
			
		||||
        pen.x += kerning_offset(c, pc, face);
 | 
			
		||||
        if (pen.x <= 0) {
 | 
			
		||||
            pen.x = ABS(glyph.advance_width - glyph.width);
 | 
			
		||||
        }
 | 
			
		||||
        pen.y = dimensions.height - glyph.ascent - dimensions.baseline;
 | 
			
		||||
 | 
			
		||||
        draw_glyph(&glyph, pen.x, pen.y, dimensions, bitmap);
 | 
			
		||||
 | 
			
		||||
        pen.x += glyph.advance_width;
 | 
			
		||||
        pc = c;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    dyn_buffer_t bmp_data = dyn_buffer_create();
 | 
			
		||||
    bmp_format(&bmp_data, dimensions, bitmap);
 | 
			
		||||
 | 
			
		||||
    APPEND_TN_META(doc, dimensions.width, dimensions.height)
 | 
			
		||||
    ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) bmp_data.buf, bmp_data.cur);
 | 
			
		||||
 | 
			
		||||
    dyn_buffer_destroy(&bmp_data);
 | 
			
		||||
    free(bitmap);
 | 
			
		||||
 | 
			
		||||
    FT_Done_Face(face);
 | 
			
		||||
    free(buf);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void cleanup_font() {
 | 
			
		||||
    FT_Done_FreeType(ft_lib);
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										17
									
								
								third-party/libscan/libscan/font/font.h
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										17
									
								
								third-party/libscan/libscan/font/font.h
									
									
									
									
										vendored
									
									
								
							@ -1,17 +0,0 @@
 | 
			
		||||
#ifndef SCAN_FONT_H
 | 
			
		||||
#define SCAN_FONT_H
 | 
			
		||||
 | 
			
		||||
#include "../scan.h"
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
typedef struct {
 | 
			
		||||
    int enable_tn;
 | 
			
		||||
    log_callback_t log;
 | 
			
		||||
    logf_callback_t logf;
 | 
			
		||||
    store_callback_t store;
 | 
			
		||||
} scan_font_ctx_t;
 | 
			
		||||
 | 
			
		||||
void parse_font(scan_font_ctx_t *ctx, vfile_t *f, document_t *doc);
 | 
			
		||||
void cleanup_font();
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
							
								
								
									
										119
									
								
								third-party/libscan/libscan/json/json.c
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										119
									
								
								third-party/libscan/libscan/json/json.c
									
									
									
									
										vendored
									
									
								
							@ -1,119 +0,0 @@
 | 
			
		||||
#include "json.h"
 | 
			
		||||
#include "cjson/cJSON.h"
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#define JSON_MAX_FILE_SIZE (1024 * 1024 * 50)
 | 
			
		||||
 | 
			
		||||
int json_extract_text(cJSON *json, text_buffer_t *tex) {
 | 
			
		||||
    if (cJSON_IsObject(json)) {
 | 
			
		||||
        for (cJSON *child = json->child; child != NULL; child = child->next) {
 | 
			
		||||
            if (json_extract_text(child, tex)) {
 | 
			
		||||
                return TRUE;
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
    } else if (cJSON_IsArray(json)) {
 | 
			
		||||
        cJSON *child;
 | 
			
		||||
        cJSON_ArrayForEach(child, json) {
 | 
			
		||||
            if (json_extract_text(child, tex)) {
 | 
			
		||||
                return TRUE;
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
    } else if (cJSON_IsString(json)) {
 | 
			
		||||
        if (text_buffer_append_string0(tex, json->valuestring) == TEXT_BUF_FULL) {
 | 
			
		||||
            return TRUE;
 | 
			
		||||
        }
 | 
			
		||||
        if (text_buffer_append_char(tex, ' ') == TEXT_BUF_FULL) {
 | 
			
		||||
            return TRUE;
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return FALSE;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
scan_code_t parse_json(scan_json_ctx_t *ctx, vfile_t *f, document_t *doc) {
 | 
			
		||||
 | 
			
		||||
    if (f->info.st_size > JSON_MAX_FILE_SIZE) {
 | 
			
		||||
        CTX_LOG_WARNINGF("json.c", "File larger than maximum allowed [%s]", f->filepath)
 | 
			
		||||
        return SCAN_ERR_SKIP;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    size_t buf_len;
 | 
			
		||||
    char *buf = read_all(f, &buf_len);
 | 
			
		||||
 | 
			
		||||
    if (buf == NULL) {
 | 
			
		||||
        return SCAN_ERR_READ;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    buf_len += 1;
 | 
			
		||||
    buf = realloc(buf, buf_len);
 | 
			
		||||
    *(buf + buf_len - 1) = '\0';
 | 
			
		||||
 | 
			
		||||
    cJSON *json = cJSON_ParseWithOpts(buf, NULL, TRUE);
 | 
			
		||||
    text_buffer_t tex = text_buffer_create(ctx->content_size);
 | 
			
		||||
 | 
			
		||||
    json_extract_text(json, &tex);
 | 
			
		||||
    text_buffer_terminate_string(&tex);
 | 
			
		||||
 | 
			
		||||
    APPEND_STR_META(doc, MetaContent, tex.dyn_buffer.buf);
 | 
			
		||||
 | 
			
		||||
    cJSON_Delete(json);
 | 
			
		||||
    free(buf);
 | 
			
		||||
    text_buffer_destroy(&tex);
 | 
			
		||||
 | 
			
		||||
    return SCAN_OK;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#define JSON_BUF_SIZE (1024 * 1024 * 5)
 | 
			
		||||
 | 
			
		||||
scan_code_t parse_ndjson(scan_json_ctx_t *ctx, vfile_t *f, document_t *doc) {
 | 
			
		||||
 | 
			
		||||
    char *buf = calloc(JSON_BUF_SIZE + 1, sizeof(char));
 | 
			
		||||
    *(buf + JSON_BUF_SIZE) = '\0';
 | 
			
		||||
 | 
			
		||||
    text_buffer_t tex = text_buffer_create(ctx->content_size);
 | 
			
		||||
 | 
			
		||||
    size_t ret;
 | 
			
		||||
    int eof = FALSE;
 | 
			
		||||
    const char *parse_end = buf;
 | 
			
		||||
    size_t to_read;
 | 
			
		||||
    char *ptr = buf;
 | 
			
		||||
 | 
			
		||||
    while (TRUE) {
 | 
			
		||||
        cJSON *json;
 | 
			
		||||
 | 
			
		||||
        if (!eof) {
 | 
			
		||||
            to_read = parse_end == buf ? JSON_BUF_SIZE : parse_end - buf;
 | 
			
		||||
            ret = f->read(f, ptr, to_read);
 | 
			
		||||
            if (ret != to_read) {
 | 
			
		||||
                eof = TRUE;
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        json = cJSON_ParseWithOpts(buf, &parse_end, FALSE);
 | 
			
		||||
 | 
			
		||||
        if (parse_end == buf + JSON_BUF_SIZE) {
 | 
			
		||||
            CTX_LOG_ERRORF("json.c", "Line too large for buffer [%s]", doc->filepath);
 | 
			
		||||
            cJSON_Delete(json);
 | 
			
		||||
            break;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        if (parse_end == buf) {
 | 
			
		||||
            cJSON_Delete(json);
 | 
			
		||||
            break;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        json_extract_text(json, &tex);
 | 
			
		||||
 | 
			
		||||
        cJSON_Delete(json);
 | 
			
		||||
 | 
			
		||||
        memmove(buf, parse_end, (buf + JSON_BUF_SIZE - parse_end));
 | 
			
		||||
        ptr = buf + JSON_BUF_SIZE - parse_end + buf;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    text_buffer_terminate_string(&tex);
 | 
			
		||||
 | 
			
		||||
    APPEND_STR_META(doc, MetaContent, tex.dyn_buffer.buf);
 | 
			
		||||
 | 
			
		||||
    free(buf);
 | 
			
		||||
    text_buffer_destroy(&tex);
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										30
									
								
								third-party/libscan/libscan/json/json.h
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										30
									
								
								third-party/libscan/libscan/json/json.h
									
									
									
									
										vendored
									
									
								
							@ -1,30 +0,0 @@
 | 
			
		||||
#ifndef SCAN_JSON_H
 | 
			
		||||
#define SCAN_JSON_H
 | 
			
		||||
 | 
			
		||||
#include "../scan.h"
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
typedef struct {
 | 
			
		||||
    long content_size;
 | 
			
		||||
    log_callback_t log;
 | 
			
		||||
    logf_callback_t logf;
 | 
			
		||||
    store_callback_t store;
 | 
			
		||||
    unsigned int json_mime;
 | 
			
		||||
    unsigned int ndjson_mime;
 | 
			
		||||
} scan_json_ctx_t;
 | 
			
		||||
 | 
			
		||||
scan_code_t parse_json(scan_json_ctx_t *ctx, vfile_t *f, document_t *doc);
 | 
			
		||||
 | 
			
		||||
scan_code_t parse_ndjson(scan_json_ctx_t *ctx, vfile_t *f, document_t *doc);
 | 
			
		||||
 | 
			
		||||
__always_inline
 | 
			
		||||
static int is_json(scan_json_ctx_t *ctx, unsigned int mime) {
 | 
			
		||||
    return mime == ctx->json_mime;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
__always_inline
 | 
			
		||||
static int is_ndjson(scan_json_ctx_t *ctx, unsigned int mime) {
 | 
			
		||||
    return mime == ctx->ndjson_mime;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
							
								
								
									
										62
									
								
								third-party/libscan/libscan/macros.h
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										62
									
								
								third-party/libscan/libscan/macros.h
									
									
									
									
										vendored
									
									
								
							@ -1,62 +0,0 @@
 | 
			
		||||
#ifndef FALSE
 | 
			
		||||
#define FALSE (0)
 | 
			
		||||
#define BOOL int
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#ifndef TRUE
 | 
			
		||||
#define TRUE (!FALSE)
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#undef MAX
 | 
			
		||||
#define MAX(a, b)  (((a) > (b)) ? (a) : (b))
 | 
			
		||||
 | 
			
		||||
#undef MIN
 | 
			
		||||
#define MIN(a, b)  (((a) < (b)) ? (a) : (b))
 | 
			
		||||
 | 
			
		||||
#ifndef PATH_MAX
 | 
			
		||||
#define PATH_MAX 4096
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#undef ABS
 | 
			
		||||
#define ABS(a) (((a) < 0) ? -(a) : (a))
 | 
			
		||||
 | 
			
		||||
#define SHA1_STR_LENGTH 41
 | 
			
		||||
#define SHA1_DIGEST_LENGTH 20
 | 
			
		||||
 | 
			
		||||
#define APPEND_STR_META(doc, keyname, value) \
 | 
			
		||||
    {meta_line_t *meta_str = malloc(sizeof(meta_line_t) + strlen(value)); \
 | 
			
		||||
    meta_str->key = keyname; \
 | 
			
		||||
    strcpy(meta_str->str_val, value); \
 | 
			
		||||
    APPEND_META(doc, meta_str)}
 | 
			
		||||
 | 
			
		||||
#define APPEND_LONG_META(doc, keyname, value) \
 | 
			
		||||
    {meta_line_t *meta_long = malloc(sizeof(meta_line_t)); \
 | 
			
		||||
    meta_long->key = keyname; \
 | 
			
		||||
    meta_long->long_val = value; \
 | 
			
		||||
    APPEND_META(doc, meta_long)}
 | 
			
		||||
 | 
			
		||||
#define APPEND_TN_META(doc, width, height) \
 | 
			
		||||
    {meta_line_t *meta_str = malloc(sizeof(meta_line_t) + 4 + 1 + 4); \
 | 
			
		||||
    meta_str->key = MetaThumbnail; \
 | 
			
		||||
    sprintf(meta_str->str_val, "%04d,%04d", width, height); \
 | 
			
		||||
    APPEND_META(doc, meta_str)}
 | 
			
		||||
 | 
			
		||||
#define APPEND_META(doc, meta) \
 | 
			
		||||
    meta->next = NULL;\
 | 
			
		||||
    if (doc->meta_head == NULL) {\
 | 
			
		||||
        doc->meta_head = meta;\
 | 
			
		||||
        doc->meta_tail = doc->meta_head;\
 | 
			
		||||
    } else {\
 | 
			
		||||
        doc->meta_tail->next = meta;\
 | 
			
		||||
        doc->meta_tail = meta;\
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
#define APPEND_UTF8_META(doc, keyname, str) \
 | 
			
		||||
    text_buffer_t tex = text_buffer_create(-1); \
 | 
			
		||||
    text_buffer_append_string0(&tex, str); \
 | 
			
		||||
    text_buffer_terminate_string(&tex); \
 | 
			
		||||
    meta_line_t *meta_tag = malloc(sizeof(meta_line_t) + tex.dyn_buffer.cur); \
 | 
			
		||||
    meta_tag->key = keyname; \
 | 
			
		||||
    strcpy(meta_tag->str_val, tex.dyn_buffer.buf); \
 | 
			
		||||
    APPEND_META(doc, meta_tag) \
 | 
			
		||||
    text_buffer_destroy(&tex);
 | 
			
		||||
							
								
								
									
										749
									
								
								third-party/libscan/libscan/media/media.c
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										749
									
								
								third-party/libscan/libscan/media/media.c
									
									
									
									
										vendored
									
									
								
							@ -1,749 +0,0 @@
 | 
			
		||||
#include "media.h"
 | 
			
		||||
#include <ctype.h>
 | 
			
		||||
 | 
			
		||||
#define MIN_SIZE 32
 | 
			
		||||
#define AVIO_BUF_SIZE 8192
 | 
			
		||||
#define IS_VIDEO(fmt) (fmt->iformat->name && strcmp(fmt->iformat->name, "image2") != 0)
 | 
			
		||||
 | 
			
		||||
#define STORE_AS_IS ((void*)-1)
 | 
			
		||||
 | 
			
		||||
const char *get_filepath_with_ext(document_t *doc, const char *filepath, const char *mime_str) {
 | 
			
		||||
 | 
			
		||||
    int has_extension = doc->ext > doc->base;
 | 
			
		||||
 | 
			
		||||
    if (!has_extension) {
 | 
			
		||||
        if (strcmp(mime_str, "image/png") == 0) {
 | 
			
		||||
            return "file.png";
 | 
			
		||||
        } else if (strcmp(mime_str, "image/jpeg") == 0) {
 | 
			
		||||
            return "file.jpg";
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return filepath;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
__always_inline
 | 
			
		||||
void *scale_frame(const AVCodecContext *decoder, const AVFrame *frame, int size) {
 | 
			
		||||
 | 
			
		||||
    if (frame->pict_type == AV_PICTURE_TYPE_NONE) {
 | 
			
		||||
        return NULL;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    int dstW;
 | 
			
		||||
    int dstH;
 | 
			
		||||
    if (frame->width <= size && frame->height <= size) {
 | 
			
		||||
        if (decoder->codec_id == AV_CODEC_ID_MJPEG || decoder->codec_id == AV_CODEC_ID_PNG) {
 | 
			
		||||
            return STORE_AS_IS;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        dstW = frame->width;
 | 
			
		||||
        dstH = frame->height;
 | 
			
		||||
    } else {
 | 
			
		||||
        double ratio = (double) frame->width / frame->height;
 | 
			
		||||
        if (frame->width > frame->height) {
 | 
			
		||||
            dstW = size;
 | 
			
		||||
            dstH = (int) (size / ratio);
 | 
			
		||||
        } else {
 | 
			
		||||
            dstW = (int) (size * ratio);
 | 
			
		||||
            dstH = size;
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if (dstW <= MIN_SIZE || dstH <= MIN_SIZE) {
 | 
			
		||||
        return NULL;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    AVFrame *scaled_frame = av_frame_alloc();
 | 
			
		||||
 | 
			
		||||
    struct SwsContext *sws_ctx = sws_getContext(
 | 
			
		||||
            decoder->width, decoder->height, decoder->pix_fmt,
 | 
			
		||||
            dstW, dstH, AV_PIX_FMT_YUVJ420P,
 | 
			
		||||
            SIST_SWS_ALGO, 0, 0, 0
 | 
			
		||||
    );
 | 
			
		||||
 | 
			
		||||
    int dst_buf_len = av_image_get_buffer_size(AV_PIX_FMT_YUV420P, dstW, dstH, 1);
 | 
			
		||||
    uint8_t *dst_buf = (uint8_t *) av_malloc(dst_buf_len * 2);
 | 
			
		||||
 | 
			
		||||
    av_image_fill_arrays(scaled_frame->data, scaled_frame->linesize, dst_buf, AV_PIX_FMT_YUV420P, dstW, dstH, 1);
 | 
			
		||||
 | 
			
		||||
    sws_scale(sws_ctx,
 | 
			
		||||
              (const uint8_t *const *) frame->data, frame->linesize,
 | 
			
		||||
              0, decoder->height,
 | 
			
		||||
              scaled_frame->data, scaled_frame->linesize
 | 
			
		||||
    );
 | 
			
		||||
 | 
			
		||||
    scaled_frame->width = dstW;
 | 
			
		||||
    scaled_frame->height = dstH;
 | 
			
		||||
    scaled_frame->format = AV_PIX_FMT_YUV420P;
 | 
			
		||||
 | 
			
		||||
    sws_freeContext(sws_ctx);
 | 
			
		||||
 | 
			
		||||
    return scaled_frame;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
typedef struct {
 | 
			
		||||
    AVPacket *packet;
 | 
			
		||||
    AVFrame *frame;
 | 
			
		||||
} frame_and_packet_t;
 | 
			
		||||
 | 
			
		||||
static void frame_and_packet_free(frame_and_packet_t *frame_and_packet) {
 | 
			
		||||
    if (frame_and_packet->packet != NULL) {
 | 
			
		||||
        av_packet_free(&frame_and_packet->packet);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if (frame_and_packet->frame != NULL) {
 | 
			
		||||
        av_frame_free(&frame_and_packet->frame);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    free(frame_and_packet->packet);
 | 
			
		||||
    free(frame_and_packet);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
__always_inline
 | 
			
		||||
static void read_subtitles(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx, int stream_idx, document_t *doc) {
 | 
			
		||||
 | 
			
		||||
    text_buffer_t tex = text_buffer_create(-1);
 | 
			
		||||
 | 
			
		||||
    AVPacket packet;
 | 
			
		||||
    AVSubtitle subtitle;
 | 
			
		||||
 | 
			
		||||
    AVCodec *subtitle_codec = avcodec_find_decoder(pFormatCtx->streams[stream_idx]->codecpar->codec_id);
 | 
			
		||||
    AVCodecContext *decoder = avcodec_alloc_context3(subtitle_codec);
 | 
			
		||||
    avcodec_parameters_to_context(decoder, pFormatCtx->streams[stream_idx]->codecpar);
 | 
			
		||||
    avcodec_open2(decoder, subtitle_codec, NULL);
 | 
			
		||||
 | 
			
		||||
    decoder->sub_text_format = FF_SUB_TEXT_FMT_ASS;
 | 
			
		||||
 | 
			
		||||
    int got_sub;
 | 
			
		||||
 | 
			
		||||
    while (1) {
 | 
			
		||||
        int read_frame_ret = av_read_frame(pFormatCtx, &packet);
 | 
			
		||||
 | 
			
		||||
        if (read_frame_ret != 0) {
 | 
			
		||||
            break;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        if (packet.stream_index != stream_idx) {
 | 
			
		||||
            av_packet_unref(&packet);
 | 
			
		||||
            continue;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        avcodec_decode_subtitle2(decoder, &subtitle, &got_sub, &packet);
 | 
			
		||||
 | 
			
		||||
        if (got_sub) {
 | 
			
		||||
            for (int i = 0; i < subtitle.num_rects; i++) {
 | 
			
		||||
                const char *text = subtitle.rects[i]->ass;
 | 
			
		||||
 | 
			
		||||
                if (text == NULL) {
 | 
			
		||||
                    continue;
 | 
			
		||||
                }
 | 
			
		||||
 | 
			
		||||
                char *idx = strstr(text, "\\N");
 | 
			
		||||
                if (idx != NULL && strlen(idx + 2) > 1) {
 | 
			
		||||
                    text_buffer_append_string0(&tex, idx + 2);
 | 
			
		||||
                    text_buffer_append_char(&tex, ' ');
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
            avsubtitle_free(&subtitle);
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        av_packet_unref(&packet);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    text_buffer_terminate_string(&tex);
 | 
			
		||||
 | 
			
		||||
    APPEND_STR_META(doc, MetaContent, tex.dyn_buffer.buf)
 | 
			
		||||
    text_buffer_destroy(&tex);
 | 
			
		||||
    avcodec_free_context(&decoder);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
__always_inline
 | 
			
		||||
static frame_and_packet_t *
 | 
			
		||||
read_frame(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx, AVCodecContext *decoder, int stream_idx,
 | 
			
		||||
           document_t *doc) {
 | 
			
		||||
 | 
			
		||||
    frame_and_packet_t *result = calloc(1, sizeof(frame_and_packet_t));
 | 
			
		||||
    result->packet = av_packet_alloc();
 | 
			
		||||
    result->frame = av_frame_alloc();
 | 
			
		||||
 | 
			
		||||
    av_init_packet(result->packet);
 | 
			
		||||
 | 
			
		||||
    int receive_ret = -EAGAIN;
 | 
			
		||||
    while (receive_ret == -EAGAIN) {
 | 
			
		||||
        // Get video frame
 | 
			
		||||
        while (1) {
 | 
			
		||||
            int read_frame_ret = av_read_frame(pFormatCtx, result->packet);
 | 
			
		||||
 | 
			
		||||
            if (read_frame_ret != 0) {
 | 
			
		||||
                if (read_frame_ret != AVERROR_EOF) {
 | 
			
		||||
                    CTX_LOG_WARNINGF(doc->filepath,
 | 
			
		||||
                                     "(media.c) avcodec_read_frame() returned error code [%d] %s",
 | 
			
		||||
                                     read_frame_ret, av_err2str(read_frame_ret)
 | 
			
		||||
                    )
 | 
			
		||||
                }
 | 
			
		||||
                frame_and_packet_free(result);
 | 
			
		||||
                return NULL;
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            //Ignore audio/other frames
 | 
			
		||||
            if (result->packet->stream_index != stream_idx) {
 | 
			
		||||
                av_packet_unref(result->packet);
 | 
			
		||||
                continue;
 | 
			
		||||
            }
 | 
			
		||||
            break;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        // Feed it to decoder
 | 
			
		||||
        int decode_ret = avcodec_send_packet(decoder, result->packet);
 | 
			
		||||
        if (decode_ret != 0) {
 | 
			
		||||
            CTX_LOG_ERRORF(doc->filepath,
 | 
			
		||||
                           "(media.c) avcodec_send_packet() returned error code [%d] %s",
 | 
			
		||||
                           decode_ret, av_err2str(decode_ret)
 | 
			
		||||
            )
 | 
			
		||||
            frame_and_packet_free(result);
 | 
			
		||||
            return NULL;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        receive_ret = avcodec_receive_frame(decoder, result->frame);
 | 
			
		||||
        if (receive_ret == -EAGAIN && result->packet != NULL) {
 | 
			
		||||
            av_packet_unref(result->packet);
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return result;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void append_tag_meta_if_not_exists(scan_media_ctx_t *ctx, document_t *doc, AVDictionaryEntry *tag, enum metakey key) {
 | 
			
		||||
 | 
			
		||||
    meta_line_t *meta = doc->meta_head;
 | 
			
		||||
    while (meta != NULL) {
 | 
			
		||||
        if (meta->key == key) {
 | 
			
		||||
            CTX_LOG_DEBUGF(doc->filepath, "Ignoring duplicate tag: '%02x=%s' and '%02x=%s'",
 | 
			
		||||
                           key, meta->str_val, key, tag->value)
 | 
			
		||||
            return;
 | 
			
		||||
        }
 | 
			
		||||
        meta = meta->next;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    text_buffer_t tex = text_buffer_create(-1);
 | 
			
		||||
    text_buffer_append_string0(&tex, tag->value);
 | 
			
		||||
    text_buffer_terminate_string(&tex);
 | 
			
		||||
    meta_line_t *meta_tag = malloc(sizeof(meta_line_t) + tex.dyn_buffer.cur);
 | 
			
		||||
    meta_tag->key = key;
 | 
			
		||||
    strcpy(meta_tag->str_val, tex.dyn_buffer.buf);
 | 
			
		||||
 | 
			
		||||
    APPEND_META(doc, meta_tag)
 | 
			
		||||
    text_buffer_destroy(&tex);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#define APPEND_TAG_META(keyname) \
 | 
			
		||||
    APPEND_UTF8_META(doc, keyname, tag->value)
 | 
			
		||||
 | 
			
		||||
#define STRCPY_TOLOWER(dst, str) \
 | 
			
		||||
    strncpy(dst, str, sizeof(dst)); \
 | 
			
		||||
    char *ptr = dst; \
 | 
			
		||||
    for (; *ptr; ++ptr) *ptr = (char) tolower(*ptr);
 | 
			
		||||
 | 
			
		||||
__always_inline
 | 
			
		||||
static void append_audio_meta(AVFormatContext *pFormatCtx, document_t *doc) {
 | 
			
		||||
 | 
			
		||||
    AVDictionaryEntry *tag = NULL;
 | 
			
		||||
    while ((tag = av_dict_get(pFormatCtx->metadata, "", tag, AV_DICT_IGNORE_SUFFIX))) {
 | 
			
		||||
        char key[256];
 | 
			
		||||
        STRCPY_TOLOWER(key, tag->key)
 | 
			
		||||
 | 
			
		||||
        if (strcmp(key, "artist") == 0) {
 | 
			
		||||
            APPEND_TAG_META(MetaArtist)
 | 
			
		||||
        } else if (strcmp(key, "genre") == 0) {
 | 
			
		||||
            APPEND_TAG_META(MetaGenre)
 | 
			
		||||
        } else if (strcmp(key, "title") == 0) {
 | 
			
		||||
            APPEND_TAG_META(MetaTitle)
 | 
			
		||||
        } else if (strcmp(key, "album_artist") == 0) {
 | 
			
		||||
            APPEND_TAG_META(MetaAlbumArtist)
 | 
			
		||||
        } else if (strcmp(key, "album") == 0) {
 | 
			
		||||
            APPEND_TAG_META(MetaAlbum)
 | 
			
		||||
        } else if (strcmp(key, "comment") == 0) {
 | 
			
		||||
            APPEND_TAG_META(MetaContent)
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
__always_inline
 | 
			
		||||
static void
 | 
			
		||||
append_video_meta(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx, AVFrame *frame, document_t *doc, int is_video) {
 | 
			
		||||
 | 
			
		||||
    if (is_video) {
 | 
			
		||||
        meta_line_t *meta_duration = malloc(sizeof(meta_line_t));
 | 
			
		||||
        meta_duration->key = MetaMediaDuration;
 | 
			
		||||
        meta_duration->long_val = pFormatCtx->duration / AV_TIME_BASE;
 | 
			
		||||
        if (meta_duration->long_val > INT32_MAX) {
 | 
			
		||||
            meta_duration->long_val = 0;
 | 
			
		||||
        }
 | 
			
		||||
        APPEND_META(doc, meta_duration)
 | 
			
		||||
 | 
			
		||||
        meta_line_t *meta_bitrate = malloc(sizeof(meta_line_t));
 | 
			
		||||
        meta_bitrate->key = MetaMediaBitrate;
 | 
			
		||||
        meta_bitrate->long_val = pFormatCtx->bit_rate;
 | 
			
		||||
        APPEND_META(doc, meta_bitrate)
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    AVDictionaryEntry *tag = NULL;
 | 
			
		||||
    if (is_video) {
 | 
			
		||||
        while ((tag = av_dict_get(pFormatCtx->metadata, "", tag, AV_DICT_IGNORE_SUFFIX))) {
 | 
			
		||||
            char key[256];
 | 
			
		||||
            STRCPY_TOLOWER(key, tag->key)
 | 
			
		||||
 | 
			
		||||
            if (strcmp(key, "title") == 0) {
 | 
			
		||||
                append_tag_meta_if_not_exists(ctx, doc, tag, MetaTitle);
 | 
			
		||||
            } else if (strcmp(key, "comment") == 0) {
 | 
			
		||||
                append_tag_meta_if_not_exists(ctx, doc, tag, MetaContent);
 | 
			
		||||
            } else if (strcmp(key, "artist") == 0) {
 | 
			
		||||
                append_tag_meta_if_not_exists(ctx, doc, tag, MetaArtist);
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
    } else {
 | 
			
		||||
        // EXIF metadata
 | 
			
		||||
        while ((tag = av_dict_get(frame->metadata, "", tag, AV_DICT_IGNORE_SUFFIX))) {
 | 
			
		||||
            char key[256];
 | 
			
		||||
            STRCPY_TOLOWER(key, tag->key)
 | 
			
		||||
 | 
			
		||||
            if (strcmp(key, "artist") == 0) {
 | 
			
		||||
                append_tag_meta_if_not_exists(ctx, doc, tag, MetaArtist);
 | 
			
		||||
            } else if (strcmp(key, "imagedescription") == 0) {
 | 
			
		||||
                APPEND_TAG_META(MetaContent)
 | 
			
		||||
            } else if (strcmp(key, "make") == 0) {
 | 
			
		||||
                APPEND_TAG_META(MetaExifMake)
 | 
			
		||||
            } else if (strcmp(key, "model") == 0) {
 | 
			
		||||
                APPEND_TAG_META(MetaExifModel)
 | 
			
		||||
            } else if (strcmp(key, "software") == 0) {
 | 
			
		||||
                APPEND_TAG_META(MetaExifSoftware)
 | 
			
		||||
            } else if (strcmp(key, "fnumber") == 0) {
 | 
			
		||||
                APPEND_TAG_META(MetaExifFNumber)
 | 
			
		||||
            } else if (strcmp(key, "focallength") == 0) {
 | 
			
		||||
                APPEND_TAG_META(MetaExifFocalLength)
 | 
			
		||||
            } else if (strcmp(key, "usercomment") == 0) {
 | 
			
		||||
                APPEND_TAG_META(MetaExifUserComment)
 | 
			
		||||
            } else if (strcmp(key, "isospeedratings") == 0) {
 | 
			
		||||
                APPEND_TAG_META(MetaExifIsoSpeedRatings)
 | 
			
		||||
            } else if (strcmp(key, "exposuretime") == 0) {
 | 
			
		||||
                APPEND_TAG_META(MetaExifExposureTime)
 | 
			
		||||
            } else if (strcmp(key, "datetime") == 0) {
 | 
			
		||||
                APPEND_TAG_META(MetaExifDateTime)
 | 
			
		||||
            } else if (strcmp(key, "gpslatitude") == 0) {
 | 
			
		||||
                APPEND_TAG_META(MetaExifGpsLatitudeDMS)
 | 
			
		||||
            } else if (strcmp(key, "gpslatituderef") == 0) {
 | 
			
		||||
                APPEND_TAG_META(MetaExifGpsLatitudeRef)
 | 
			
		||||
            } else if (strcmp(key, "gpslongitude") == 0) {
 | 
			
		||||
                APPEND_TAG_META(MetaExifGpsLongitudeDMS)
 | 
			
		||||
            } else if (strcmp(key, "gpslongituderef") == 0) {
 | 
			
		||||
                APPEND_TAG_META(MetaExifGpsLongitudeRef)
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void parse_media_format_ctx(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx, document_t *doc) {
 | 
			
		||||
 | 
			
		||||
    int video_stream = -1;
 | 
			
		||||
    int audio_stream = -1;
 | 
			
		||||
    int subtitle_stream = -1;
 | 
			
		||||
 | 
			
		||||
    avformat_find_stream_info(pFormatCtx, NULL);
 | 
			
		||||
 | 
			
		||||
    for (int i = (int) pFormatCtx->nb_streams - 1; i >= 0; i--) {
 | 
			
		||||
        AVStream *stream = pFormatCtx->streams[i];
 | 
			
		||||
 | 
			
		||||
        if (stream->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) {
 | 
			
		||||
            if (audio_stream == -1) {
 | 
			
		||||
                const AVCodecDescriptor *desc = avcodec_descriptor_get(stream->codecpar->codec_id);
 | 
			
		||||
 | 
			
		||||
                if (desc != NULL) {
 | 
			
		||||
                    APPEND_STR_META(doc, MetaMediaAudioCodec, desc->name)
 | 
			
		||||
                }
 | 
			
		||||
 | 
			
		||||
                audio_stream = i;
 | 
			
		||||
            }
 | 
			
		||||
        } else if (stream->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) {
 | 
			
		||||
 | 
			
		||||
            if (video_stream == -1) {
 | 
			
		||||
                const AVCodecDescriptor *desc = avcodec_descriptor_get(stream->codecpar->codec_id);
 | 
			
		||||
 | 
			
		||||
                if (desc != NULL) {
 | 
			
		||||
                    APPEND_STR_META(doc, MetaMediaVideoCodec, desc->name)
 | 
			
		||||
                }
 | 
			
		||||
 | 
			
		||||
                meta_line_t *meta_w = malloc(sizeof(meta_line_t));
 | 
			
		||||
                meta_w->key = MetaWidth;
 | 
			
		||||
                meta_w->long_val = stream->codecpar->width;
 | 
			
		||||
                APPEND_META(doc, meta_w)
 | 
			
		||||
 | 
			
		||||
                meta_line_t *meta_h = malloc(sizeof(meta_line_t));
 | 
			
		||||
                meta_h->key = MetaHeight;
 | 
			
		||||
                meta_h->long_val = stream->codecpar->height;
 | 
			
		||||
                APPEND_META(doc, meta_h)
 | 
			
		||||
 | 
			
		||||
                video_stream = i;
 | 
			
		||||
            }
 | 
			
		||||
        } else if (stream->codecpar->codec_type == AVMEDIA_TYPE_SUBTITLE) {
 | 
			
		||||
            subtitle_stream = i;
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if (subtitle_stream != -1 && ctx->read_subtitles) {
 | 
			
		||||
        read_subtitles(ctx, pFormatCtx, subtitle_stream, doc);
 | 
			
		||||
 | 
			
		||||
        // Reset stream
 | 
			
		||||
        if (video_stream != -1) {
 | 
			
		||||
            av_seek_frame(pFormatCtx, video_stream, 0, 0);
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if (audio_stream != -1) {
 | 
			
		||||
        append_audio_meta(pFormatCtx, doc);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if (video_stream != -1 && ctx->tn_size > 0) {
 | 
			
		||||
        AVStream *stream = pFormatCtx->streams[video_stream];
 | 
			
		||||
 | 
			
		||||
        if (stream->codecpar->width <= MIN_SIZE || stream->codecpar->height <= MIN_SIZE) {
 | 
			
		||||
            avformat_close_input(&pFormatCtx);
 | 
			
		||||
            avformat_free_context(pFormatCtx);
 | 
			
		||||
            return;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        // Decoder
 | 
			
		||||
        AVCodec *video_codec = avcodec_find_decoder(stream->codecpar->codec_id);
 | 
			
		||||
        AVCodecContext *decoder = avcodec_alloc_context3(video_codec);
 | 
			
		||||
        avcodec_parameters_to_context(decoder, stream->codecpar);
 | 
			
		||||
        avcodec_open2(decoder, video_codec, NULL);
 | 
			
		||||
 | 
			
		||||
        //Seek
 | 
			
		||||
        if (stream->nb_frames > 1 && stream->codecpar->codec_id != AV_CODEC_ID_GIF) {
 | 
			
		||||
            int seek_ret;
 | 
			
		||||
            for (int i = 20; i >= 0; i--) {
 | 
			
		||||
                seek_ret = av_seek_frame(pFormatCtx, video_stream,
 | 
			
		||||
                                         stream->duration * 0.10, 0);
 | 
			
		||||
                if (seek_ret == 0) {
 | 
			
		||||
                    break;
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        frame_and_packet_t *frame_and_packet = read_frame(ctx, pFormatCtx, decoder, video_stream, doc);
 | 
			
		||||
        if (frame_and_packet == NULL) {
 | 
			
		||||
            avcodec_free_context(&decoder);
 | 
			
		||||
            avformat_close_input(&pFormatCtx);
 | 
			
		||||
            avformat_free_context(pFormatCtx);
 | 
			
		||||
            return;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        append_video_meta(ctx, pFormatCtx, frame_and_packet->frame, doc, IS_VIDEO(pFormatCtx));
 | 
			
		||||
 | 
			
		||||
        // Scale frame
 | 
			
		||||
        AVFrame *scaled_frame = scale_frame(decoder, frame_and_packet->frame, ctx->tn_size);
 | 
			
		||||
 | 
			
		||||
        if (scaled_frame == NULL) {
 | 
			
		||||
            frame_and_packet_free(frame_and_packet);
 | 
			
		||||
            avcodec_free_context(&decoder);
 | 
			
		||||
            avformat_close_input(&pFormatCtx);
 | 
			
		||||
            avformat_free_context(pFormatCtx);
 | 
			
		||||
            return;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        if (scaled_frame == STORE_AS_IS) {
 | 
			
		||||
            APPEND_TN_META(doc, frame_and_packet->frame->width, frame_and_packet->frame->height)
 | 
			
		||||
            ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) frame_and_packet->packet->data,
 | 
			
		||||
                       frame_and_packet->packet->size);
 | 
			
		||||
        } else {
 | 
			
		||||
            // Encode frame to jpeg
 | 
			
		||||
            AVCodecContext *jpeg_encoder = alloc_jpeg_encoder(scaled_frame->width, scaled_frame->height,
 | 
			
		||||
                                                              ctx->tn_qscale);
 | 
			
		||||
            avcodec_send_frame(jpeg_encoder, scaled_frame);
 | 
			
		||||
 | 
			
		||||
            AVPacket jpeg_packet;
 | 
			
		||||
            av_init_packet(&jpeg_packet);
 | 
			
		||||
            avcodec_receive_packet(jpeg_encoder, &jpeg_packet);
 | 
			
		||||
 | 
			
		||||
            // Save thumbnail
 | 
			
		||||
            APPEND_TN_META(doc, scaled_frame->width, scaled_frame->height)
 | 
			
		||||
            ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) jpeg_packet.data, jpeg_packet.size);
 | 
			
		||||
 | 
			
		||||
            avcodec_free_context(&jpeg_encoder);
 | 
			
		||||
            av_packet_unref(&jpeg_packet);
 | 
			
		||||
            av_free(*scaled_frame->data);
 | 
			
		||||
            av_frame_free(&scaled_frame);
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        frame_and_packet_free(frame_and_packet);
 | 
			
		||||
        avcodec_free_context(&decoder);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    avformat_close_input(&pFormatCtx);
 | 
			
		||||
    avformat_free_context(pFormatCtx);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void parse_media_filename(scan_media_ctx_t *ctx, const char *filepath, document_t *doc) {
 | 
			
		||||
 | 
			
		||||
    AVFormatContext *pFormatCtx = avformat_alloc_context();
 | 
			
		||||
    if (pFormatCtx == NULL) {
 | 
			
		||||
        CTX_LOG_ERROR(doc->filepath, "(media.c) Could not allocate context with avformat_alloc_context()")
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
    int res = avformat_open_input(&pFormatCtx, filepath, NULL, NULL);
 | 
			
		||||
    if (res < 0) {
 | 
			
		||||
        CTX_LOG_ERRORF(doc->filepath, "(media.c) avformat_open_input() returned [%d] %s", res, av_err2str(res))
 | 
			
		||||
        avformat_close_input(&pFormatCtx);
 | 
			
		||||
        avformat_free_context(pFormatCtx);
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    parse_media_format_ctx(ctx, pFormatCtx, doc);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int vfile_read(void *ptr, uint8_t *buf, int buf_size) {
 | 
			
		||||
    struct vfile *f = ptr;
 | 
			
		||||
 | 
			
		||||
    int ret = f->read(f, buf, buf_size);
 | 
			
		||||
 | 
			
		||||
    if (ret == 0) {
 | 
			
		||||
        return AVERROR_EOF;
 | 
			
		||||
    }
 | 
			
		||||
    return ret;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
typedef struct {
 | 
			
		||||
    size_t size;
 | 
			
		||||
    FILE *file;
 | 
			
		||||
    void *buf;
 | 
			
		||||
} memfile_t;
 | 
			
		||||
 | 
			
		||||
int memfile_read(void *ptr, uint8_t *buf, int buf_size) {
 | 
			
		||||
    memfile_t *mem = ptr;
 | 
			
		||||
 | 
			
		||||
    size_t ret = fread(buf, 1, buf_size, mem->file);
 | 
			
		||||
 | 
			
		||||
    if (ret == 0 && feof(mem->file)) {
 | 
			
		||||
        return AVERROR_EOF;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return (int) ret;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
long memfile_seek(void *ptr, long offset, int whence) {
 | 
			
		||||
    memfile_t *mem = ptr;
 | 
			
		||||
 | 
			
		||||
    if (whence == 0x10000) {
 | 
			
		||||
        return mem->size;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    int ret = fseek(mem->file, offset, whence);
 | 
			
		||||
    if (ret != 0) {
 | 
			
		||||
        return AVERROR_EOF;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return ftell(mem->file);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int memfile_open(vfile_t *f, memfile_t *mem) {
 | 
			
		||||
    mem->size = f->info.st_size;
 | 
			
		||||
 | 
			
		||||
    mem->buf = malloc(mem->size);
 | 
			
		||||
    if (mem->buf == NULL) {
 | 
			
		||||
        return -1;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    int ret = f->read(f, mem->buf, mem->size);
 | 
			
		||||
    mem->file = fmemopen(mem->buf, mem->size, "rb");
 | 
			
		||||
 | 
			
		||||
    if (f->calculate_checksum) {
 | 
			
		||||
        SHA1_Init(&f->sha1_ctx);
 | 
			
		||||
        safe_sha1_update(&f->sha1_ctx, mem->buf, mem->size);
 | 
			
		||||
        SHA1_Final(f->sha1_digest, &f->sha1_ctx);
 | 
			
		||||
        f->has_checksum = TRUE;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return (ret == mem->size && mem->file != NULL) ? 0 : -1;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int memfile_open_buf(void *buf, size_t buf_len, memfile_t *mem) {
 | 
			
		||||
    mem->size = (int) buf_len;
 | 
			
		||||
 | 
			
		||||
    mem->buf = buf;
 | 
			
		||||
    mem->file = fmemopen(mem->buf, mem->size, "rb");
 | 
			
		||||
 | 
			
		||||
    return mem->file != NULL ? 0 : -1;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void memfile_close(memfile_t *mem) {
 | 
			
		||||
    if (mem->buf != NULL) {
 | 
			
		||||
        free(mem->buf);
 | 
			
		||||
        fclose(mem->file);
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void parse_media_vfile(scan_media_ctx_t *ctx, struct vfile *f, document_t *doc, const char *mime_str) {
 | 
			
		||||
 | 
			
		||||
    AVFormatContext *pFormatCtx = avformat_alloc_context();
 | 
			
		||||
    if (pFormatCtx == NULL) {
 | 
			
		||||
        CTX_LOG_ERROR(doc->filepath, "(media.c) Could not allocate context with avformat_alloc_context()")
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    unsigned char *buffer = (unsigned char *) av_malloc(AVIO_BUF_SIZE);
 | 
			
		||||
    AVIOContext *io_ctx = NULL;
 | 
			
		||||
    memfile_t memfile = {0, 0, 0};
 | 
			
		||||
 | 
			
		||||
    const char *filepath = get_filepath_with_ext(doc, f->filepath, mime_str);
 | 
			
		||||
 | 
			
		||||
    if (f->info.st_size <= ctx->max_media_buffer) {
 | 
			
		||||
        int ret = memfile_open(f, &memfile);
 | 
			
		||||
        if (ret == 0) {
 | 
			
		||||
            CTX_LOG_DEBUGF(f->filepath, "Loading media file in memory (%ldB)", f->info.st_size)
 | 
			
		||||
            io_ctx = avio_alloc_context(buffer, AVIO_BUF_SIZE, 0, &memfile, memfile_read, NULL, memfile_seek);
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if (io_ctx == NULL) {
 | 
			
		||||
        CTX_LOG_DEBUGF(f->filepath, "Reading media file without seek support", f->info.st_size)
 | 
			
		||||
        io_ctx = avio_alloc_context(buffer, AVIO_BUF_SIZE, 0, f, vfile_read, NULL, NULL);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    pFormatCtx->pb = io_ctx;
 | 
			
		||||
 | 
			
		||||
    int res = avformat_open_input(&pFormatCtx, filepath, NULL, NULL);
 | 
			
		||||
    if (res < 0) {
 | 
			
		||||
        if (res != -5) {
 | 
			
		||||
            CTX_LOG_ERRORF(doc->filepath, "(media.c) avformat_open_input() returned [%d] %s", res, av_err2str(res))
 | 
			
		||||
        }
 | 
			
		||||
        av_free(io_ctx->buffer);
 | 
			
		||||
        memfile_close(&memfile);
 | 
			
		||||
        avio_context_free(&io_ctx);
 | 
			
		||||
        avformat_close_input(&pFormatCtx);
 | 
			
		||||
        avformat_free_context(pFormatCtx);
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    parse_media_format_ctx(ctx, pFormatCtx, doc);
 | 
			
		||||
    av_free(io_ctx->buffer);
 | 
			
		||||
    avio_context_free(&io_ctx);
 | 
			
		||||
    memfile_close(&memfile);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void parse_media(scan_media_ctx_t *ctx, vfile_t *f, document_t *doc, const char *mime_str) {
 | 
			
		||||
 | 
			
		||||
    if (f->is_fs_file) {
 | 
			
		||||
        parse_media_filename(ctx, f->filepath, doc);
 | 
			
		||||
    } else {
 | 
			
		||||
        parse_media_vfile(ctx, f, doc, mime_str);
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void init_media() {
 | 
			
		||||
    av_log_set_level(AV_LOG_QUIET);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int store_image_thumbnail(scan_media_ctx_t *ctx, void *buf, size_t buf_len, document_t *doc, const char *url) {
 | 
			
		||||
    memfile_t memfile = {0, 0, 0};
 | 
			
		||||
    AVIOContext *io_ctx = NULL;
 | 
			
		||||
 | 
			
		||||
    AVFormatContext *pFormatCtx = avformat_alloc_context();
 | 
			
		||||
    if (pFormatCtx == NULL) {
 | 
			
		||||
        CTX_LOG_ERROR(doc->filepath, "(media.c) Could not allocate context with avformat_alloc_context()")
 | 
			
		||||
        return FALSE;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    unsigned char *buffer = (unsigned char *) av_malloc(AVIO_BUF_SIZE);
 | 
			
		||||
 | 
			
		||||
    int ret = memfile_open_buf(buf, buf_len, &memfile);
 | 
			
		||||
    if (ret == 0) {
 | 
			
		||||
        CTX_LOG_DEBUGF(doc->filepath, "Loading media file in memory (%ldB)", buf_len)
 | 
			
		||||
        io_ctx = avio_alloc_context(buffer, AVIO_BUF_SIZE, 0, &memfile, memfile_read, NULL, memfile_seek);
 | 
			
		||||
    } else {
 | 
			
		||||
        avformat_close_input(&pFormatCtx);
 | 
			
		||||
        avformat_free_context(pFormatCtx);
 | 
			
		||||
        fclose(memfile.file);
 | 
			
		||||
        return FALSE;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    pFormatCtx->pb = io_ctx;
 | 
			
		||||
 | 
			
		||||
    int res = avformat_open_input(&pFormatCtx, url, NULL, NULL);
 | 
			
		||||
    if (res != 0) {
 | 
			
		||||
        av_free(io_ctx->buffer);
 | 
			
		||||
        avformat_close_input(&pFormatCtx);
 | 
			
		||||
        avformat_free_context(pFormatCtx);
 | 
			
		||||
        avio_context_free(&io_ctx);
 | 
			
		||||
        fclose(memfile.file);
 | 
			
		||||
        return FALSE;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    AVStream *stream = pFormatCtx->streams[0];
 | 
			
		||||
 | 
			
		||||
    // Decoder
 | 
			
		||||
    const AVCodec *video_codec = avcodec_find_decoder(stream->codecpar->codec_id);
 | 
			
		||||
    AVCodecContext *decoder = avcodec_alloc_context3(video_codec);
 | 
			
		||||
    avcodec_parameters_to_context(decoder, stream->codecpar);
 | 
			
		||||
    avcodec_open2(decoder, video_codec, NULL);
 | 
			
		||||
 | 
			
		||||
    frame_and_packet_t *frame_and_packet = read_frame(ctx, pFormatCtx, decoder, 0, doc);
 | 
			
		||||
    if (frame_and_packet == NULL) {
 | 
			
		||||
        avcodec_free_context(&decoder);
 | 
			
		||||
        avformat_close_input(&pFormatCtx);
 | 
			
		||||
        avformat_free_context(pFormatCtx);
 | 
			
		||||
        av_free(io_ctx->buffer);
 | 
			
		||||
        avio_context_free(&io_ctx);
 | 
			
		||||
        fclose(memfile.file);
 | 
			
		||||
        return FALSE;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // Scale frame
 | 
			
		||||
    AVFrame *scaled_frame = scale_frame(decoder, frame_and_packet->frame, ctx->tn_size);
 | 
			
		||||
 | 
			
		||||
    if (scaled_frame == NULL) {
 | 
			
		||||
        frame_and_packet_free(frame_and_packet);
 | 
			
		||||
        avcodec_free_context(&decoder);
 | 
			
		||||
        avformat_close_input(&pFormatCtx);
 | 
			
		||||
        avformat_free_context(pFormatCtx);
 | 
			
		||||
        av_free(io_ctx->buffer);
 | 
			
		||||
        avio_context_free(&io_ctx);
 | 
			
		||||
        fclose(memfile.file);
 | 
			
		||||
        return FALSE;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if (scaled_frame == STORE_AS_IS) {
 | 
			
		||||
        APPEND_TN_META(doc, frame_and_packet->frame->width, frame_and_packet->frame->height)
 | 
			
		||||
        ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) frame_and_packet->packet->data,
 | 
			
		||||
                   frame_and_packet->packet->size);
 | 
			
		||||
    } else {
 | 
			
		||||
        // Encode frame to jpeg
 | 
			
		||||
        AVCodecContext *jpeg_encoder = alloc_jpeg_encoder(scaled_frame->width, scaled_frame->height,
 | 
			
		||||
                                                          ctx->tn_qscale);
 | 
			
		||||
        avcodec_send_frame(jpeg_encoder, scaled_frame);
 | 
			
		||||
 | 
			
		||||
        AVPacket jpeg_packet;
 | 
			
		||||
        av_init_packet(&jpeg_packet);
 | 
			
		||||
        avcodec_receive_packet(jpeg_encoder, &jpeg_packet);
 | 
			
		||||
 | 
			
		||||
        // Save thumbnail
 | 
			
		||||
        APPEND_TN_META(doc, scaled_frame->width, scaled_frame->height)
 | 
			
		||||
        ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) jpeg_packet.data, jpeg_packet.size);
 | 
			
		||||
 | 
			
		||||
        av_packet_unref(&jpeg_packet);
 | 
			
		||||
        avcodec_free_context(&jpeg_encoder);
 | 
			
		||||
        av_free(*scaled_frame->data);
 | 
			
		||||
        av_frame_free(&scaled_frame);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    frame_and_packet_free(frame_and_packet);
 | 
			
		||||
    avcodec_free_context(&decoder);
 | 
			
		||||
 | 
			
		||||
    avformat_close_input(&pFormatCtx);
 | 
			
		||||
    avformat_free_context(pFormatCtx);
 | 
			
		||||
 | 
			
		||||
    av_free(io_ctx->buffer);
 | 
			
		||||
    avio_context_free(&io_ctx);
 | 
			
		||||
    fclose(memfile.file);
 | 
			
		||||
 | 
			
		||||
    return TRUE;
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										52
									
								
								third-party/libscan/libscan/media/media.h
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										52
									
								
								third-party/libscan/libscan/media/media.h
									
									
									
									
										vendored
									
									
								
							@ -1,52 +0,0 @@
 | 
			
		||||
#ifndef SIST2_MEDIA_H
 | 
			
		||||
#define SIST2_MEDIA_H
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#include "../scan.h"
 | 
			
		||||
 | 
			
		||||
#include "libavformat/avformat.h"
 | 
			
		||||
#include "libswscale/swscale.h"
 | 
			
		||||
#include "libswresample/swresample.h"
 | 
			
		||||
#include "libavcodec/avcodec.h"
 | 
			
		||||
#include "libavutil/imgutils.h"
 | 
			
		||||
 | 
			
		||||
typedef struct {
 | 
			
		||||
    log_callback_t log;
 | 
			
		||||
    logf_callback_t logf;
 | 
			
		||||
    store_callback_t store;
 | 
			
		||||
 | 
			
		||||
    int tn_size;
 | 
			
		||||
    float tn_qscale;
 | 
			
		||||
    long max_media_buffer;
 | 
			
		||||
    int read_subtitles;
 | 
			
		||||
} scan_media_ctx_t;
 | 
			
		||||
 | 
			
		||||
__always_inline
 | 
			
		||||
static AVCodecContext *alloc_jpeg_encoder(int w, int h, float qscale) {
 | 
			
		||||
 | 
			
		||||
    const AVCodec *jpeg_codec = avcodec_find_encoder(AV_CODEC_ID_MJPEG);
 | 
			
		||||
    AVCodecContext *jpeg = avcodec_alloc_context3(jpeg_codec);
 | 
			
		||||
    jpeg->width = w;
 | 
			
		||||
    jpeg->height = h;
 | 
			
		||||
    jpeg->time_base.den = 1000000;
 | 
			
		||||
    jpeg->time_base.num = 1;
 | 
			
		||||
    jpeg->i_quant_factor = qscale;
 | 
			
		||||
 | 
			
		||||
    jpeg->pix_fmt = AV_PIX_FMT_YUVJ420P;
 | 
			
		||||
    int ret = avcodec_open2(jpeg, jpeg_codec, NULL);
 | 
			
		||||
 | 
			
		||||
    if (ret != 0) {
 | 
			
		||||
        return NULL;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return jpeg;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
void parse_media(scan_media_ctx_t *ctx, vfile_t *f, document_t *doc, const char*mime_str);
 | 
			
		||||
 | 
			
		||||
void init_media();
 | 
			
		||||
 | 
			
		||||
int store_image_thumbnail(scan_media_ctx_t *ctx, void *buf, size_t buf_len, document_t *doc, const char *url);
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
							
								
								
									
										79
									
								
								third-party/libscan/libscan/mobi/scan_mobi.c
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										79
									
								
								third-party/libscan/libscan/mobi/scan_mobi.c
									
									
									
									
										vendored
									
									
								
							@ -1,79 +0,0 @@
 | 
			
		||||
#include "scan_mobi.h"
 | 
			
		||||
 | 
			
		||||
#include <mobi.h>
 | 
			
		||||
#include <errno.h>
 | 
			
		||||
#include "stdlib.h"
 | 
			
		||||
 | 
			
		||||
void parse_mobi(scan_mobi_ctx_t *ctx, vfile_t *f, document_t *doc) {
 | 
			
		||||
 | 
			
		||||
    MOBIData *m = mobi_init();
 | 
			
		||||
    if (m == NULL) {
 | 
			
		||||
        CTX_LOG_ERROR(f->filepath, "mobi_init() failed")
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    size_t buf_len;
 | 
			
		||||
    char* buf = read_all(f, &buf_len);
 | 
			
		||||
    if (buf == NULL) {
 | 
			
		||||
        mobi_free(m);
 | 
			
		||||
        CTX_LOG_ERROR(f->filepath, "read_all() failed")
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    FILE *file = fmemopen(buf, buf_len, "rb");
 | 
			
		||||
    if (file == NULL) {
 | 
			
		||||
        mobi_free(m);
 | 
			
		||||
        free(buf);
 | 
			
		||||
        CTX_LOG_ERRORF(f->filepath, "fmemopen() failed (%d)", errno)
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    MOBI_RET mobi_ret = mobi_load_file(m, file);
 | 
			
		||||
    fclose(file);
 | 
			
		||||
    if (mobi_ret != MOBI_SUCCESS) {
 | 
			
		||||
        mobi_free(m);
 | 
			
		||||
        free(buf);
 | 
			
		||||
        CTX_LOG_ERRORF(f->filepath, "mobi_laod_file() returned error code [%d]", mobi_ret)
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    char *author = mobi_meta_get_author(m);
 | 
			
		||||
    if (author != NULL) {
 | 
			
		||||
        APPEND_STR_META(doc, MetaAuthor, author)
 | 
			
		||||
        free(author);
 | 
			
		||||
    }
 | 
			
		||||
    char *title = mobi_meta_get_title(m);
 | 
			
		||||
    if (title != NULL) {
 | 
			
		||||
        APPEND_STR_META(doc, MetaTitle, title)
 | 
			
		||||
        free(title);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    const size_t maxlen = mobi_get_text_maxsize(m);
 | 
			
		||||
    if (maxlen == MOBI_NOTSET) {
 | 
			
		||||
        free(buf);
 | 
			
		||||
        CTX_LOG_DEBUGF("%s", "Invalid text maxsize: %zu", maxlen)
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    char *content_str = malloc(maxlen + 1);
 | 
			
		||||
    size_t length = maxlen;
 | 
			
		||||
    mobi_ret = mobi_get_rawml(m, content_str, &length);
 | 
			
		||||
    if (mobi_ret != MOBI_SUCCESS) {
 | 
			
		||||
        mobi_free(m);
 | 
			
		||||
        free(content_str);
 | 
			
		||||
        free(buf);
 | 
			
		||||
        CTX_LOG_ERRORF(f->filepath, "mobi_get_rawml() returned error code [%d]", mobi_ret)
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    text_buffer_t tex = text_buffer_create(ctx->content_size);
 | 
			
		||||
    text_buffer_append_markup(&tex, content_str);
 | 
			
		||||
    text_buffer_terminate_string(&tex);
 | 
			
		||||
 | 
			
		||||
    APPEND_STR_META(doc, MetaContent, tex.dyn_buffer.buf)
 | 
			
		||||
 | 
			
		||||
    free(content_str);
 | 
			
		||||
    free(buf);
 | 
			
		||||
    text_buffer_destroy(&tex);
 | 
			
		||||
    mobi_free(m);
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										14
									
								
								third-party/libscan/libscan/mobi/scan_mobi.h
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										14
									
								
								third-party/libscan/libscan/mobi/scan_mobi.h
									
									
									
									
										vendored
									
									
								
							@ -1,14 +0,0 @@
 | 
			
		||||
#ifndef SCAN_SCAN_MOBI_H
 | 
			
		||||
#define SCAN_SCAN_MOBI_H
 | 
			
		||||
 | 
			
		||||
#include "../scan.h"
 | 
			
		||||
 | 
			
		||||
typedef struct {
 | 
			
		||||
    long content_size;
 | 
			
		||||
    log_callback_t log;
 | 
			
		||||
    logf_callback_t logf;
 | 
			
		||||
} scan_mobi_ctx_t;
 | 
			
		||||
 | 
			
		||||
void parse_mobi(scan_mobi_ctx_t *ctx, vfile_t *f, document_t *doc);
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
							
								
								
									
										147
									
								
								third-party/libscan/libscan/msdoc/msdoc.c
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										147
									
								
								third-party/libscan/libscan/msdoc/msdoc.c
									
									
									
									
										vendored
									
									
								
							@ -1,147 +0,0 @@
 | 
			
		||||
#include "msdoc.h"
 | 
			
		||||
#include <errno.h>
 | 
			
		||||
 | 
			
		||||
#include <sys/mman.h>
 | 
			
		||||
#include "../../third-party/antiword/src/antiword.h"
 | 
			
		||||
 | 
			
		||||
#include "../ebook/ebook.h"
 | 
			
		||||
 | 
			
		||||
void parse_msdoc_text(scan_msdoc_ctx_t *ctx, document_t *doc, FILE *file_in, void *buf, size_t buf_len) {
 | 
			
		||||
 | 
			
		||||
    // Open word doc
 | 
			
		||||
    options_type *opts = direct_vGetOptions();
 | 
			
		||||
    opts->iParagraphBreak = 74;
 | 
			
		||||
    opts->eConversionType = conversion_text;
 | 
			
		||||
    opts->bHideHiddenText = 1;
 | 
			
		||||
    opts->bRemoveRemovedText = 1;
 | 
			
		||||
    opts->bUseLandscape = 0;
 | 
			
		||||
    opts->eEncoding = encoding_utf_8;
 | 
			
		||||
    opts->iPageHeight = 842; // A4
 | 
			
		||||
    opts->iPageWidth = 595;
 | 
			
		||||
    opts->eImageLevel = level_ps_3;
 | 
			
		||||
 | 
			
		||||
    int doc_word_version = iGuessVersionNumber(file_in, (int) buf_len);
 | 
			
		||||
    if (doc_word_version < 0 || doc_word_version == 3) {
 | 
			
		||||
        free(buf);
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
    rewind(file_in);
 | 
			
		||||
 | 
			
		||||
    size_t out_len;
 | 
			
		||||
    char *out_buf;
 | 
			
		||||
 | 
			
		||||
    FILE *file_out = open_memstream(&out_buf, &out_len);
 | 
			
		||||
 | 
			
		||||
    diagram_type *diag = pCreateDiagram("antiword", NULL, file_out);
 | 
			
		||||
    if (diag == NULL) {
 | 
			
		||||
        fclose(file_in);
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    iInitDocument(file_in, (int) buf_len);
 | 
			
		||||
    const char *author = szGetAuthor();
 | 
			
		||||
    if (author != NULL) {
 | 
			
		||||
        APPEND_UTF8_META(doc, MetaAuthor, author)
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    const char *title = szGetTitle();
 | 
			
		||||
    if (title != NULL) {
 | 
			
		||||
        APPEND_UTF8_META(doc, MetaTitle, title)
 | 
			
		||||
    }
 | 
			
		||||
    vFreeDocument();
 | 
			
		||||
 | 
			
		||||
    bWordDecryptor(file_in, (int) buf_len, diag);
 | 
			
		||||
    vDestroyDiagram(diag);
 | 
			
		||||
    fclose(file_out);
 | 
			
		||||
 | 
			
		||||
    if (buf_len > 0) {
 | 
			
		||||
        text_buffer_t tex = text_buffer_create(ctx->content_size);
 | 
			
		||||
        text_buffer_append_string(&tex, out_buf, out_len);
 | 
			
		||||
        text_buffer_terminate_string(&tex);
 | 
			
		||||
 | 
			
		||||
        meta_line_t *meta_content = malloc(sizeof(meta_line_t) + tex.dyn_buffer.cur);
 | 
			
		||||
        meta_content->key = MetaContent;
 | 
			
		||||
        memcpy(meta_content->str_val, tex.dyn_buffer.buf, tex.dyn_buffer.cur);
 | 
			
		||||
        APPEND_META(doc, meta_content)
 | 
			
		||||
 | 
			
		||||
        text_buffer_destroy(&tex);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    free(buf);
 | 
			
		||||
    free(out_buf);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void parse_msdoc_pdf(scan_msdoc_ctx_t *ctx, document_t *doc, FILE *file, void *buf, size_t buf_len) {
 | 
			
		||||
 | 
			
		||||
    scan_ebook_ctx_t ebook_ctx = {
 | 
			
		||||
            .content_size = ctx->content_size,
 | 
			
		||||
            .tn_size = ctx->tn_size,
 | 
			
		||||
            .log = ctx->log,
 | 
			
		||||
            .logf = ctx->logf,
 | 
			
		||||
            .store = ctx->store,
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    // Open word doc
 | 
			
		||||
    options_type *opts = direct_vGetOptions();
 | 
			
		||||
    opts->iParagraphBreak = 74;
 | 
			
		||||
    opts->eConversionType = conversion_pdf;
 | 
			
		||||
    opts->bHideHiddenText = 1;
 | 
			
		||||
    opts->bRemoveRemovedText = 1;
 | 
			
		||||
    opts->bUseLandscape = 0;
 | 
			
		||||
    opts->eEncoding = encoding_latin_1;
 | 
			
		||||
    opts->iPageHeight = 842; // A4
 | 
			
		||||
    opts->iPageWidth = 595;
 | 
			
		||||
    opts->eImageLevel = level_ps_3;
 | 
			
		||||
 | 
			
		||||
    int doc_word_version = iGuessVersionNumber(file, (int) buf_len);
 | 
			
		||||
    if (doc_word_version < 0 || doc_word_version == 3) {
 | 
			
		||||
        free(buf);
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
    rewind(file);
 | 
			
		||||
 | 
			
		||||
    size_t out_len;
 | 
			
		||||
    char *out_buf;
 | 
			
		||||
 | 
			
		||||
    FILE *file_out = open_memstream(&out_buf, &out_len);
 | 
			
		||||
 | 
			
		||||
    diagram_type *diag = pCreateDiagram("antiword", NULL, file_out);
 | 
			
		||||
    if (diag == NULL) {
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    bWordDecryptor(file, (int) buf_len, diag);
 | 
			
		||||
    vDestroyDiagram(diag);
 | 
			
		||||
 | 
			
		||||
    fclose(file_out);
 | 
			
		||||
 | 
			
		||||
    parse_ebook_mem(&ebook_ctx, out_buf, out_len, "application/pdf", doc, TRUE);
 | 
			
		||||
 | 
			
		||||
    free(buf);
 | 
			
		||||
    free(out_buf);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void parse_msdoc(scan_msdoc_ctx_t *ctx, vfile_t *f, document_t *doc) {
 | 
			
		||||
 | 
			
		||||
    size_t buf_len;
 | 
			
		||||
    char *buf = read_all(f, &buf_len);
 | 
			
		||||
    if (buf == NULL) {
 | 
			
		||||
        CTX_LOG_ERROR(f->filepath, "read_all() failed")
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    FILE *file = fmemopen(buf, buf_len, "rb");
 | 
			
		||||
    if (file == NULL) {
 | 
			
		||||
        free(buf);
 | 
			
		||||
        CTX_LOG_ERRORF(f->filepath, "fmemopen() failed (%d)", errno)
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if (ctx->tn_size > 0) {
 | 
			
		||||
        char *buf_pdf = malloc(buf_len);
 | 
			
		||||
        memcpy(buf_pdf, buf, buf_len);
 | 
			
		||||
        parse_msdoc_pdf(ctx, doc, file, buf_pdf, buf_len);
 | 
			
		||||
    }
 | 
			
		||||
    parse_msdoc_text(ctx, doc, file, buf, buf_len);
 | 
			
		||||
    fclose(file);
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										24
									
								
								third-party/libscan/libscan/msdoc/msdoc.h
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										24
									
								
								third-party/libscan/libscan/msdoc/msdoc.h
									
									
									
									
										vendored
									
									
								
							@ -1,24 +0,0 @@
 | 
			
		||||
#ifndef SCAN_SCAN_MSDOC_H
 | 
			
		||||
#define SCAN_SCAN_MSDOC_H
 | 
			
		||||
 | 
			
		||||
#include "../scan.h"
 | 
			
		||||
 | 
			
		||||
typedef struct {
 | 
			
		||||
    long content_size;
 | 
			
		||||
    int tn_size;
 | 
			
		||||
    log_callback_t log;
 | 
			
		||||
    logf_callback_t logf;
 | 
			
		||||
    store_callback_t store;
 | 
			
		||||
    unsigned int msdoc_mime;
 | 
			
		||||
} scan_msdoc_ctx_t;
 | 
			
		||||
 | 
			
		||||
__always_inline
 | 
			
		||||
static int is_msdoc(scan_msdoc_ctx_t *ctx, unsigned int mime) {
 | 
			
		||||
    return mime == ctx->msdoc_mime;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void parse_msdoc(scan_msdoc_ctx_t *ctx, vfile_t *f, document_t *doc);
 | 
			
		||||
 | 
			
		||||
void parse_msdoc_text(scan_msdoc_ctx_t *ctx, document_t *doc, FILE *file_in, void* buf, size_t buf_len);
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
							
								
								
									
										260
									
								
								third-party/libscan/libscan/ooxml/ooxml.c
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										260
									
								
								third-party/libscan/libscan/ooxml/ooxml.c
									
									
									
									
										vendored
									
									
								
							@ -1,260 +0,0 @@
 | 
			
		||||
#include "ooxml.h"
 | 
			
		||||
 | 
			
		||||
#include <archive.h>
 | 
			
		||||
#include <archive_entry.h>
 | 
			
		||||
#include <libxml/xmlstring.h>
 | 
			
		||||
#include <libxml/parser.h>
 | 
			
		||||
 | 
			
		||||
#define _X(str) ((const xmlChar*)str)
 | 
			
		||||
 | 
			
		||||
__always_inline
 | 
			
		||||
static int should_read_part(const char *part) {
 | 
			
		||||
 | 
			
		||||
    if (part == NULL) {
 | 
			
		||||
        return FALSE;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if (    // Word
 | 
			
		||||
            STR_STARTS_WITH(part, "word/document.xml")
 | 
			
		||||
            || STR_STARTS_WITH(part, "word/footnotes.xml")
 | 
			
		||||
            || STR_STARTS_WITH(part, "word/endnotes.xml")
 | 
			
		||||
            || STR_STARTS_WITH(part, "word/footer")
 | 
			
		||||
            || STR_STARTS_WITH(part, "word/header")
 | 
			
		||||
            // PowerPoint
 | 
			
		||||
            || STR_STARTS_WITH(part, "ppt/slides/slide")
 | 
			
		||||
            || STR_STARTS_WITH(part, "ppt/notesSlides/slide")
 | 
			
		||||
            // Excel
 | 
			
		||||
            || STR_STARTS_WITH(part, "xl/worksheets/sheet")
 | 
			
		||||
            || STR_STARTS_WITH(part, "xl/sharedStrings.xml")
 | 
			
		||||
            || STR_STARTS_WITH(part, "xl/workbook.xml")
 | 
			
		||||
            ) {
 | 
			
		||||
        return TRUE;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return FALSE;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int extract_text(scan_ooxml_ctx_t *ctx, xmlDoc *xml, xmlNode *node, text_buffer_t *buf) {
 | 
			
		||||
    //TODO: Check which nodes are likely to have a 't' child, and ignore nodes that aren't
 | 
			
		||||
    xmlErrorPtr err = xmlGetLastError();
 | 
			
		||||
    if (err != NULL) {
 | 
			
		||||
        if (err->level == XML_ERR_FATAL) {
 | 
			
		||||
            CTX_LOG_ERRORF("ooxml.c", "Got fatal XML error while parsing document: %s", err->message)
 | 
			
		||||
            return -1;
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    for (xmlNode *child = node; child; child = child->next) {
 | 
			
		||||
        if (child->name != NULL && *child->name == 't' && *(child->name + 1) == '\0') {
 | 
			
		||||
            xmlChar *text = xmlNodeListGetString(xml, child->xmlChildrenNode, 1);
 | 
			
		||||
 | 
			
		||||
            if (text) {
 | 
			
		||||
                int ret = text_buffer_append_string0(buf, (char *) text);
 | 
			
		||||
                text_buffer_append_char(buf, ' ');
 | 
			
		||||
                xmlFree(text);
 | 
			
		||||
 | 
			
		||||
                if (ret == TEXT_BUF_FULL) {
 | 
			
		||||
                    return ret;
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        if (extract_text(ctx, xml, child->children, buf) == TEXT_BUF_FULL) {
 | 
			
		||||
            return TEXT_BUF_FULL;
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
    return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int xml_io_read(void *context, char *buffer, int len) {
 | 
			
		||||
    struct archive *a = context;
 | 
			
		||||
    return (int) archive_read_data(a, buffer, len);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int xml_io_close(UNUSED(void *context)) {
 | 
			
		||||
    //noop
 | 
			
		||||
    return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#define READ_PART_ERR (-2)
 | 
			
		||||
 | 
			
		||||
__always_inline
 | 
			
		||||
static int read_part(scan_ooxml_ctx_t *ctx, struct archive *a, text_buffer_t *buf, document_t *doc) {
 | 
			
		||||
 | 
			
		||||
    xmlDoc *xml = xmlReadIO(xml_io_read, xml_io_close, a, "/", NULL,
 | 
			
		||||
                            XML_PARSE_RECOVER | XML_PARSE_NOWARNING | XML_PARSE_NOERROR | XML_PARSE_NONET);
 | 
			
		||||
 | 
			
		||||
    if (xml == NULL) {
 | 
			
		||||
        CTX_LOG_ERROR(doc->filepath, "Could not parse XML")
 | 
			
		||||
        return READ_PART_ERR;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    xmlNode *root = xmlDocGetRootElement(xml);
 | 
			
		||||
    if (root == NULL) {
 | 
			
		||||
        CTX_LOG_ERROR(doc->filepath, "Empty document")
 | 
			
		||||
        xmlFreeDoc(xml);
 | 
			
		||||
        return READ_PART_ERR;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    int ret = extract_text(ctx, xml, root, buf);
 | 
			
		||||
    xmlFreeDoc(xml);
 | 
			
		||||
 | 
			
		||||
    return ret;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
__always_inline
 | 
			
		||||
static int read_doc_props_app(scan_ooxml_ctx_t *ctx, struct archive *a, document_t *doc) {
 | 
			
		||||
    xmlDoc *xml = xmlReadIO(xml_io_read, xml_io_close, a, "/", NULL,
 | 
			
		||||
                            XML_PARSE_RECOVER | XML_PARSE_NOWARNING | XML_PARSE_NOERROR | XML_PARSE_NONET);
 | 
			
		||||
 | 
			
		||||
    if (xml == NULL) {
 | 
			
		||||
        CTX_LOG_ERROR(doc->filepath, "Could not parse XML")
 | 
			
		||||
        return -1;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    xmlNode *root = xmlDocGetRootElement(xml);
 | 
			
		||||
    if (root == NULL) {
 | 
			
		||||
        CTX_LOG_ERROR(doc->filepath, "Empty document")
 | 
			
		||||
        xmlFreeDoc(xml);
 | 
			
		||||
        return -1;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if (xmlStrEqual(root->name, _X("Properties"))) {
 | 
			
		||||
        for (xmlNode *child = root->children; child; child = child->next) {
 | 
			
		||||
            xmlChar *text = xmlNodeListGetString(xml, child->xmlChildrenNode, 1);
 | 
			
		||||
            if (text == NULL) {
 | 
			
		||||
                continue;
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            if (xmlStrEqual(child->name, _X("Pages"))) {
 | 
			
		||||
                APPEND_LONG_META(doc, MetaPages, strtol((char *) text, NULL, 10))
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            xmlFree(text);
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
    xmlFreeDoc(xml);
 | 
			
		||||
 | 
			
		||||
    return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
__always_inline
 | 
			
		||||
static int read_doc_props(scan_ooxml_ctx_t *ctx, struct archive *a, document_t *doc) {
 | 
			
		||||
    xmlDoc *xml = xmlReadIO(xml_io_read, xml_io_close, a, "/", NULL,
 | 
			
		||||
                            XML_PARSE_RECOVER | XML_PARSE_NOWARNING | XML_PARSE_NOERROR | XML_PARSE_NONET);
 | 
			
		||||
 | 
			
		||||
    if (xml == NULL) {
 | 
			
		||||
        CTX_LOG_ERROR(doc->filepath, "Could not parse XML")
 | 
			
		||||
        return -1;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    xmlNode *root = xmlDocGetRootElement(xml);
 | 
			
		||||
    if (root == NULL) {
 | 
			
		||||
        CTX_LOG_ERROR(doc->filepath, "Empty document")
 | 
			
		||||
        xmlFreeDoc(xml);
 | 
			
		||||
        return -1;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if (xmlStrEqual(root->name, _X("coreProperties"))) {
 | 
			
		||||
        for (xmlNode *child = root->children; child; child = child->next) {
 | 
			
		||||
            xmlChar *text = xmlNodeListGetString(xml, child->xmlChildrenNode, 1);
 | 
			
		||||
            if (text == NULL) {
 | 
			
		||||
                continue;
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            if (xmlStrEqual(child->name, _X("title"))) {
 | 
			
		||||
                APPEND_STR_META(doc, MetaTitle, (char *) text)
 | 
			
		||||
            } else if (xmlStrEqual(child->name, _X("creator"))) {
 | 
			
		||||
                APPEND_STR_META(doc, MetaAuthor, (char *) text)
 | 
			
		||||
            } else if (xmlStrEqual(child->name, _X("lastModifiedBy"))) {
 | 
			
		||||
                APPEND_STR_META(doc, MetaModifiedBy, (char *) text)
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            xmlFree(text);
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
    xmlFreeDoc(xml);
 | 
			
		||||
 | 
			
		||||
    return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#define MAX_TN_SIZE (1024 * 1024 * 15)
 | 
			
		||||
 | 
			
		||||
void read_thumbnail(scan_ooxml_ctx_t *ctx, document_t *doc, struct archive *a, struct archive_entry *entry) {
 | 
			
		||||
    size_t entry_size = archive_entry_size(entry);
 | 
			
		||||
 | 
			
		||||
    if (entry_size <= 0 || entry_size > MAX_TN_SIZE) {
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    char *buf = malloc(entry_size);
 | 
			
		||||
    archive_read_data(a, buf, entry_size);
 | 
			
		||||
 | 
			
		||||
    APPEND_TN_META(doc, 1, 1) // Size unknown
 | 
			
		||||
    ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), buf, entry_size);
 | 
			
		||||
    free(buf);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void parse_ooxml(scan_ooxml_ctx_t *ctx, vfile_t *f, document_t *doc) {
 | 
			
		||||
 | 
			
		||||
    size_t buf_len;
 | 
			
		||||
    void *buf = read_all(f, &buf_len);
 | 
			
		||||
    if (buf == NULL) {
 | 
			
		||||
        CTX_LOG_ERROR(f->filepath, "read_all() failed")
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    struct archive *a = archive_read_new();
 | 
			
		||||
    archive_read_support_format_zip(a);
 | 
			
		||||
 | 
			
		||||
    int ret = archive_read_open_memory(a, buf, buf_len);
 | 
			
		||||
    if (ret != ARCHIVE_OK) {
 | 
			
		||||
        CTX_LOG_ERRORF(doc->filepath, "Could not read archive: %s", archive_error_string(a))
 | 
			
		||||
        archive_read_free(a);
 | 
			
		||||
        free(buf);
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    text_buffer_t tex = text_buffer_create(ctx->content_size);
 | 
			
		||||
 | 
			
		||||
    struct archive_entry *entry;
 | 
			
		||||
    int buffer_full = FALSE;
 | 
			
		||||
    while (archive_read_next_header(a, &entry) == ARCHIVE_OK) {
 | 
			
		||||
        if (S_ISREG(archive_entry_stat(entry)->st_mode)) {
 | 
			
		||||
            const char *path = archive_entry_pathname(entry);
 | 
			
		||||
 | 
			
		||||
            if (!buffer_full && should_read_part(path) && ctx->content_size > 0) {
 | 
			
		||||
                ret = read_part(ctx, a, &tex, doc);
 | 
			
		||||
                if (ret == READ_PART_ERR) {
 | 
			
		||||
                    break;
 | 
			
		||||
                } else if (ret == TEXT_BUF_FULL) {
 | 
			
		||||
                    buffer_full = TRUE;
 | 
			
		||||
                }
 | 
			
		||||
            } else if (strcmp(path, "docProps/app.xml") == 0) {
 | 
			
		||||
                if (read_doc_props_app(ctx, a, doc) != 0) {
 | 
			
		||||
                    break;
 | 
			
		||||
                }
 | 
			
		||||
            } else if (strcmp(path, "docProps/core.xml") == 0) {
 | 
			
		||||
                if (read_doc_props(ctx, a, doc) != 0) {
 | 
			
		||||
                    break;
 | 
			
		||||
                }
 | 
			
		||||
            } else if (strcmp(path, "docProps/thumbnail.jpeg") == 0) {
 | 
			
		||||
                read_thumbnail(ctx, doc, a, entry);
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if (tex.dyn_buffer.cur > 0) {
 | 
			
		||||
        text_buffer_terminate_string(&tex);
 | 
			
		||||
 | 
			
		||||
        meta_line_t *meta = malloc(sizeof(meta_line_t) + tex.dyn_buffer.cur);
 | 
			
		||||
        meta->key = MetaContent;
 | 
			
		||||
        strcpy(meta->str_val, tex.dyn_buffer.buf);
 | 
			
		||||
        APPEND_META(doc, meta)
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    archive_read_close(a);
 | 
			
		||||
    archive_read_free(a);
 | 
			
		||||
    text_buffer_destroy(&tex);
 | 
			
		||||
    free(buf);
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										16
									
								
								third-party/libscan/libscan/ooxml/ooxml.h
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										16
									
								
								third-party/libscan/libscan/ooxml/ooxml.h
									
									
									
									
										vendored
									
									
								
							@ -1,16 +0,0 @@
 | 
			
		||||
#ifndef SCAN_OOXML_H
 | 
			
		||||
#define SCAN_OOXML_H
 | 
			
		||||
 | 
			
		||||
#include <stdlib.h>
 | 
			
		||||
#include "../scan.h"
 | 
			
		||||
 | 
			
		||||
typedef struct {
 | 
			
		||||
    long content_size;
 | 
			
		||||
    log_callback_t log;
 | 
			
		||||
    logf_callback_t logf;
 | 
			
		||||
    store_callback_t store;
 | 
			
		||||
} scan_ooxml_ctx_t;
 | 
			
		||||
 | 
			
		||||
void parse_ooxml(scan_ooxml_ctx_t *ctx, vfile_t *f, document_t *doc);
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
							
								
								
									
										218
									
								
								third-party/libscan/libscan/raw/raw.c
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										218
									
								
								third-party/libscan/libscan/raw/raw.c
									
									
									
									
										vendored
									
									
								
							@ -1,218 +0,0 @@
 | 
			
		||||
#include "raw.h"
 | 
			
		||||
#include <libraw/libraw.h>
 | 
			
		||||
 | 
			
		||||
#include "../media/media.h"
 | 
			
		||||
#include <unistd.h>
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#define MIN_SIZE 32
 | 
			
		||||
 | 
			
		||||
int store_thumbnail_jpeg(scan_raw_ctx_t *ctx, libraw_processed_image_t *img, document_t *doc) {
 | 
			
		||||
    return store_image_thumbnail((scan_media_ctx_t *) ctx, img->data, img->data_size, doc, "x.jpeg");
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int store_thumbnail_rgb24(scan_raw_ctx_t *ctx, libraw_processed_image_t *img, document_t *doc) {
 | 
			
		||||
 | 
			
		||||
    int dstW;
 | 
			
		||||
    int dstH;
 | 
			
		||||
 | 
			
		||||
    if (img->width <= ctx->tn_size && img->height <= ctx->tn_size) {
 | 
			
		||||
        dstW = img->width;
 | 
			
		||||
        dstH = img->height;
 | 
			
		||||
    } else {
 | 
			
		||||
        double ratio = (double) img->width / img->height;
 | 
			
		||||
        if (img->width > img->height) {
 | 
			
		||||
            dstW = ctx->tn_size;
 | 
			
		||||
            dstH = (int) (ctx->tn_size / ratio);
 | 
			
		||||
        } else {
 | 
			
		||||
            dstW = (int) (ctx->tn_size * ratio);
 | 
			
		||||
            dstH = ctx->tn_size;
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if (dstW <= MIN_SIZE || dstH <= MIN_SIZE) {
 | 
			
		||||
        return FALSE;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    AVFrame *scaled_frame = av_frame_alloc();
 | 
			
		||||
 | 
			
		||||
    struct SwsContext *sws_ctx = sws_getContext(
 | 
			
		||||
            img->width, img->height, AV_PIX_FMT_RGB24,
 | 
			
		||||
            dstW, dstH, AV_PIX_FMT_YUVJ420P,
 | 
			
		||||
            SIST_SWS_ALGO, 0, 0, 0
 | 
			
		||||
    );
 | 
			
		||||
 | 
			
		||||
    int dst_buf_len = av_image_get_buffer_size(AV_PIX_FMT_YUV420P, dstW, dstH, 1);
 | 
			
		||||
    uint8_t *dst_buf = (uint8_t *) av_malloc(dst_buf_len);
 | 
			
		||||
 | 
			
		||||
    av_image_fill_arrays(scaled_frame->data, scaled_frame->linesize, dst_buf, AV_PIX_FMT_YUV420P, dstW, dstH, 1);
 | 
			
		||||
 | 
			
		||||
    const uint8_t *in_data[1] = {img->data};
 | 
			
		||||
    int in_line_size[1] = {3 * img->width};
 | 
			
		||||
 | 
			
		||||
    sws_scale(sws_ctx,
 | 
			
		||||
              in_data, in_line_size,
 | 
			
		||||
              0, img->height,
 | 
			
		||||
              scaled_frame->data, scaled_frame->linesize
 | 
			
		||||
    );
 | 
			
		||||
 | 
			
		||||
    scaled_frame->width = dstW;
 | 
			
		||||
    scaled_frame->height = dstH;
 | 
			
		||||
    scaled_frame->format = AV_PIX_FMT_YUV420P;
 | 
			
		||||
 | 
			
		||||
    sws_freeContext(sws_ctx);
 | 
			
		||||
 | 
			
		||||
    AVCodecContext *jpeg_encoder = alloc_jpeg_encoder(scaled_frame->width, scaled_frame->height, 1.0f);
 | 
			
		||||
    avcodec_send_frame(jpeg_encoder, scaled_frame);
 | 
			
		||||
 | 
			
		||||
    AVPacket jpeg_packet;
 | 
			
		||||
    av_init_packet(&jpeg_packet);
 | 
			
		||||
    avcodec_receive_packet(jpeg_encoder, &jpeg_packet);
 | 
			
		||||
 | 
			
		||||
    APPEND_TN_META(doc, scaled_frame->width, scaled_frame->height)
 | 
			
		||||
    ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) jpeg_packet.data, jpeg_packet.size);
 | 
			
		||||
 | 
			
		||||
    av_packet_unref(&jpeg_packet);
 | 
			
		||||
    av_free(*scaled_frame->data);
 | 
			
		||||
    av_frame_free(&scaled_frame);
 | 
			
		||||
    avcodec_free_context(&jpeg_encoder);
 | 
			
		||||
 | 
			
		||||
    return TRUE;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#define DMS_REF(ref) (((ref) == 'S' || (ref) == 'W') ? -1 : 1)
 | 
			
		||||
 | 
			
		||||
void parse_raw(scan_raw_ctx_t *ctx, vfile_t *f, document_t *doc) {
 | 
			
		||||
    libraw_data_t *libraw_lib = libraw_init(0);
 | 
			
		||||
 | 
			
		||||
    if (!libraw_lib) {
 | 
			
		||||
        CTX_LOG_ERROR("raw.c", "Cannot create libraw handle")
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    size_t buf_len = 0;
 | 
			
		||||
    void *buf = read_all(f, &buf_len);
 | 
			
		||||
    if (buf == NULL) {
 | 
			
		||||
        CTX_LOG_ERROR(f->filepath, "read_all() failed")
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    int ret = libraw_open_buffer(libraw_lib, buf, buf_len);
 | 
			
		||||
    if (ret != 0) {
 | 
			
		||||
        CTX_LOG_ERROR(f->filepath, "Could not open raw file")
 | 
			
		||||
        free(buf);
 | 
			
		||||
        libraw_close(libraw_lib);
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if (*libraw_lib->idata.model != '\0') {
 | 
			
		||||
        APPEND_STR_META(doc, MetaExifModel, libraw_lib->idata.model)
 | 
			
		||||
    }
 | 
			
		||||
    if (*libraw_lib->idata.make != '\0') {
 | 
			
		||||
        APPEND_STR_META(doc, MetaExifMake, libraw_lib->idata.make)
 | 
			
		||||
    }
 | 
			
		||||
    if (*libraw_lib->idata.software != '\0') {
 | 
			
		||||
        APPEND_STR_META(doc, MetaExifSoftware, libraw_lib->idata.software)
 | 
			
		||||
    }
 | 
			
		||||
    APPEND_LONG_META(doc, MetaWidth, libraw_lib->sizes.width)
 | 
			
		||||
    APPEND_LONG_META(doc, MetaHeight, libraw_lib->sizes.height)
 | 
			
		||||
    char tmp[1024];
 | 
			
		||||
    snprintf(tmp, sizeof(tmp), "%g", libraw_lib->other.iso_speed);
 | 
			
		||||
    APPEND_STR_META(doc, MetaExifIsoSpeedRatings, tmp)
 | 
			
		||||
 | 
			
		||||
    if (*libraw_lib->other.desc != '\0') {
 | 
			
		||||
        APPEND_STR_META(doc, MetaContent, libraw_lib->other.desc)
 | 
			
		||||
    }
 | 
			
		||||
    if (*libraw_lib->other.artist != '\0') {
 | 
			
		||||
        APPEND_STR_META(doc, MetaArtist, libraw_lib->other.artist)
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    struct tm *time = localtime(&libraw_lib->other.timestamp);
 | 
			
		||||
    strftime(tmp, sizeof(tmp), "%Y:%m:%d %H:%M:%S", time);
 | 
			
		||||
    APPEND_STR_META(doc, MetaExifDateTime, tmp)
 | 
			
		||||
 | 
			
		||||
    snprintf(tmp, sizeof(tmp), "%.1f", libraw_lib->other.focal_len);
 | 
			
		||||
    APPEND_STR_META(doc, MetaExifFocalLength, tmp)
 | 
			
		||||
 | 
			
		||||
    snprintf(tmp, sizeof(tmp), "%.1f", libraw_lib->other.aperture);
 | 
			
		||||
    APPEND_STR_META(doc, MetaExifFNumber, tmp)
 | 
			
		||||
 | 
			
		||||
    int denominator = (int) roundf(1 / libraw_lib->other.shutter);
 | 
			
		||||
    snprintf(tmp, sizeof(tmp), "1/%d", denominator);
 | 
			
		||||
    APPEND_STR_META(doc, MetaExifExposureTime, tmp)
 | 
			
		||||
 | 
			
		||||
    libraw_gps_info_t gps = libraw_lib->other.parsed_gps;
 | 
			
		||||
    double gps_longitude_dec =
 | 
			
		||||
            (gps.longtitude[0] + gps.longtitude[1] / 60 + gps.longtitude[2] / 3600) * DMS_REF(gps.longref);
 | 
			
		||||
    snprintf(tmp, sizeof(tmp), "%.15f", gps_longitude_dec);
 | 
			
		||||
    if (gps_longitude_dec != 0.0) {
 | 
			
		||||
        APPEND_STR_META(doc, MetaExifGpsLongitudeDec, tmp)
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    double gps_latitude_dec = (gps.latitude[0] + gps.latitude[1] / 60 + gps.latitude[2] / 3600) * DMS_REF(gps.latref);
 | 
			
		||||
    snprintf(tmp, sizeof(tmp), "%.15f", gps_latitude_dec);
 | 
			
		||||
    if (gps_latitude_dec != 0.0) {
 | 
			
		||||
        APPEND_STR_META(doc, MetaExifGpsLatitudeDec, tmp)
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    APPEND_STR_META(doc, MetaMediaVideoCodec, "raw")
 | 
			
		||||
 | 
			
		||||
    if (ctx->tn_size <= 0) {
 | 
			
		||||
        free(buf);
 | 
			
		||||
        libraw_close(libraw_lib);
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    libraw_unpack_thumb(libraw_lib);
 | 
			
		||||
 | 
			
		||||
    int errc = 0;
 | 
			
		||||
    libraw_processed_image_t *thumb = libraw_dcraw_make_mem_thumb(libraw_lib, &errc);
 | 
			
		||||
    if (errc != 0) {
 | 
			
		||||
        free(buf);
 | 
			
		||||
        libraw_dcraw_clear_mem(thumb);
 | 
			
		||||
        libraw_close(libraw_lib);
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    int tn_ok = 0;
 | 
			
		||||
    if (libraw_lib->thumbnail.tformat == LIBRAW_THUMBNAIL_JPEG) {
 | 
			
		||||
        tn_ok = store_thumbnail_jpeg(ctx, thumb, doc);
 | 
			
		||||
    } else if (libraw_lib->thumbnail.tformat == LIBRAW_THUMBNAIL_BITMAP) {
 | 
			
		||||
        // TODO: technically this should work but is currently untested
 | 
			
		||||
        tn_ok = store_thumbnail_rgb24(ctx, thumb, doc);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    libraw_dcraw_clear_mem(thumb);
 | 
			
		||||
 | 
			
		||||
    if (tn_ok == TRUE) {
 | 
			
		||||
        free(buf);
 | 
			
		||||
        libraw_close(libraw_lib);
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    ret = libraw_unpack(libraw_lib);
 | 
			
		||||
    if (ret != 0) {
 | 
			
		||||
        CTX_LOG_ERROR(f->filepath, "Could not unpack raw file")
 | 
			
		||||
        free(buf);
 | 
			
		||||
        libraw_close(libraw_lib);
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    libraw_dcraw_process(libraw_lib);
 | 
			
		||||
 | 
			
		||||
    errc = 0;
 | 
			
		||||
    libraw_processed_image_t *img = libraw_dcraw_make_mem_image(libraw_lib, &errc);
 | 
			
		||||
    if (errc != 0) {
 | 
			
		||||
        free(buf);
 | 
			
		||||
        libraw_dcraw_clear_mem(img);
 | 
			
		||||
        libraw_close(libraw_lib);
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    store_thumbnail_rgb24(ctx, img, doc);
 | 
			
		||||
 | 
			
		||||
    libraw_dcraw_clear_mem(img);
 | 
			
		||||
    libraw_close(libraw_lib);
 | 
			
		||||
 | 
			
		||||
    free(buf);
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										17
									
								
								third-party/libscan/libscan/raw/raw.h
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										17
									
								
								third-party/libscan/libscan/raw/raw.h
									
									
									
									
										vendored
									
									
								
							@ -1,17 +0,0 @@
 | 
			
		||||
#ifndef SIST2_RAW_H
 | 
			
		||||
#define SIST2_RAW_H
 | 
			
		||||
 | 
			
		||||
#include "../scan.h"
 | 
			
		||||
 | 
			
		||||
typedef struct {
 | 
			
		||||
    log_callback_t log;
 | 
			
		||||
    logf_callback_t logf;
 | 
			
		||||
    store_callback_t store;
 | 
			
		||||
 | 
			
		||||
    int tn_size;
 | 
			
		||||
    float tn_qscale;
 | 
			
		||||
} scan_raw_ctx_t;
 | 
			
		||||
 | 
			
		||||
void parse_raw(scan_raw_ctx_t *ctx, vfile_t *f, document_t *doc);
 | 
			
		||||
 | 
			
		||||
#endif //SIST2_RAW_H
 | 
			
		||||
							
								
								
									
										170
									
								
								third-party/libscan/libscan/scan.h
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										170
									
								
								third-party/libscan/libscan/scan.h
									
									
									
									
										vendored
									
									
								
							@ -1,170 +0,0 @@
 | 
			
		||||
#ifndef SCAN_SCAN_H
 | 
			
		||||
#define SCAN_SCAN_H
 | 
			
		||||
 | 
			
		||||
#ifndef _GNU_SOURCE
 | 
			
		||||
#define _GNU_SOURCE
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#include <stdio.h>
 | 
			
		||||
#include <sys/stat.h>
 | 
			
		||||
#include <openssl/md5.h>
 | 
			
		||||
#include <openssl/sha.h>
 | 
			
		||||
 | 
			
		||||
#include "macros.h"
 | 
			
		||||
 | 
			
		||||
#define SIST_SWS_ALGO SWS_LANCZOS
 | 
			
		||||
 | 
			
		||||
#define UNUSED(x) __attribute__((__unused__))  x
 | 
			
		||||
 | 
			
		||||
typedef void (*store_callback_t)(char *key, size_t key_len, char *buf, size_t buf_len);
 | 
			
		||||
 | 
			
		||||
typedef void (*logf_callback_t)(const char *filepath, int level, char *format, ...);
 | 
			
		||||
 | 
			
		||||
typedef void (*log_callback_t)(const char *filepath, int level, char *str);
 | 
			
		||||
 | 
			
		||||
typedef int scan_code_t;
 | 
			
		||||
#define SCAN_OK (scan_code_t) 0
 | 
			
		||||
#define SCAN_ERR_READ (scan_code_t) (-1)
 | 
			
		||||
#define SCAN_ERR_SKIP (scan_code_t) (-2)
 | 
			
		||||
 | 
			
		||||
#define LEVEL_DEBUG 0
 | 
			
		||||
#define LEVEL_INFO 1
 | 
			
		||||
#define LEVEL_WARNING 2
 | 
			
		||||
#define LEVEL_ERROR 3
 | 
			
		||||
#define LEVEL_FATAL 4
 | 
			
		||||
 | 
			
		||||
#define CTX_LOG_DEBUGF(filepath, fmt, ...) ctx->logf(filepath, LEVEL_DEBUG, fmt, __VA_ARGS__);
 | 
			
		||||
#define CTX_LOG_DEBUG(filepath, str) ctx->log(filepath, LEVEL_DEBUG, str);
 | 
			
		||||
 | 
			
		||||
#define CTX_LOG_INFOF(filepath, fmt, ...) ctx->logf(filepath, LEVEL_INFO, fmt, __VA_ARGS__);
 | 
			
		||||
#define CTX_LOG_INFO(filepath, str) ctx->log(filepath, LEVEL_INFO, str);
 | 
			
		||||
 | 
			
		||||
#define CTX_LOG_WARNINGF(filepath, fmt, ...) ctx->logf(filepath, LEVEL_WARNING, fmt, __VA_ARGS__);
 | 
			
		||||
#define CTX_LOG_WARNING(filepath, str) ctx->log(filepath, LEVEL_WARNING, str);
 | 
			
		||||
 | 
			
		||||
#define CTX_LOG_ERRORF(filepath, fmt, ...) ctx->logf(filepath, LEVEL_ERROR, fmt, __VA_ARGS__);
 | 
			
		||||
#define CTX_LOG_ERROR(filepath, str) ctx->log(filepath, LEVEL_ERROR, str);
 | 
			
		||||
 | 
			
		||||
#define CTX_LOG_FATALF(filepath, fmt, ...) ctx->logf(filepath, LEVEL_FATAL, fmt, __VA_ARGS__); exit(-1);
 | 
			
		||||
#define CTX_LOG_FATAL(filepath, str) ctx->log(filepath, LEVEL_FATAL, str); exit(-1);
 | 
			
		||||
 | 
			
		||||
enum metakey {
 | 
			
		||||
    // String
 | 
			
		||||
    MetaContent = 1,
 | 
			
		||||
    MetaMediaAudioCodec,
 | 
			
		||||
    MetaMediaVideoCodec,
 | 
			
		||||
    MetaArtist,
 | 
			
		||||
    MetaAlbum,
 | 
			
		||||
    MetaAlbumArtist,
 | 
			
		||||
    MetaGenre,
 | 
			
		||||
    MetaTitle,
 | 
			
		||||
    MetaFontName,
 | 
			
		||||
    MetaParent,
 | 
			
		||||
    MetaExifMake,
 | 
			
		||||
    MetaExifSoftware,
 | 
			
		||||
    MetaExifExposureTime,
 | 
			
		||||
    MetaExifFNumber,
 | 
			
		||||
    MetaExifFocalLength,
 | 
			
		||||
    MetaExifUserComment,
 | 
			
		||||
    MetaExifModel,
 | 
			
		||||
    MetaExifIsoSpeedRatings,
 | 
			
		||||
    MetaExifDateTime,
 | 
			
		||||
    MetaAuthor,
 | 
			
		||||
    MetaModifiedBy,
 | 
			
		||||
    MetaThumbnail,
 | 
			
		||||
    MetaChecksum,
 | 
			
		||||
 | 
			
		||||
    // Number
 | 
			
		||||
    MetaWidth,
 | 
			
		||||
    MetaHeight,
 | 
			
		||||
    MetaMediaDuration,
 | 
			
		||||
    MetaMediaBitrate,
 | 
			
		||||
    MetaPages,
 | 
			
		||||
 | 
			
		||||
    // ??
 | 
			
		||||
    MetaExifGpsLongitudeDMS,
 | 
			
		||||
    MetaExifGpsLongitudeRef,
 | 
			
		||||
    MetaExifGpsLatitudeDMS,
 | 
			
		||||
    MetaExifGpsLatitudeRef,
 | 
			
		||||
    MetaExifGpsLatitudeDec,
 | 
			
		||||
    MetaExifGpsLongitudeDec,
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
typedef struct meta_line {
 | 
			
		||||
    struct meta_line *next;
 | 
			
		||||
    enum metakey key;
 | 
			
		||||
    union {
 | 
			
		||||
        char str_val[0];
 | 
			
		||||
        unsigned long long_val;
 | 
			
		||||
        double double_val;
 | 
			
		||||
    };
 | 
			
		||||
} meta_line_t;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
typedef struct document {
 | 
			
		||||
    unsigned char path_md5[MD5_DIGEST_LENGTH];
 | 
			
		||||
    unsigned long size;
 | 
			
		||||
    unsigned int mime;
 | 
			
		||||
    int mtime;
 | 
			
		||||
    short base;
 | 
			
		||||
    short ext;
 | 
			
		||||
    char has_parent;
 | 
			
		||||
    meta_line_t *meta_head;
 | 
			
		||||
    meta_line_t *meta_tail;
 | 
			
		||||
    char *filepath;
 | 
			
		||||
} document_t;
 | 
			
		||||
 | 
			
		||||
typedef struct vfile vfile_t;
 | 
			
		||||
 | 
			
		||||
__attribute__((warn_unused_result))
 | 
			
		||||
typedef int (*read_func_t)(struct vfile *, void *buf, size_t size);
 | 
			
		||||
 | 
			
		||||
__attribute__((warn_unused_result))
 | 
			
		||||
typedef long (*seek_func_t)(struct vfile *, long offset, int whence);
 | 
			
		||||
 | 
			
		||||
typedef void (*close_func_t)(struct vfile *);
 | 
			
		||||
 | 
			
		||||
typedef void (*reset_func_t)(struct vfile *);
 | 
			
		||||
 | 
			
		||||
typedef struct vfile {
 | 
			
		||||
    union {
 | 
			
		||||
        int fd;
 | 
			
		||||
        struct archive *arc;
 | 
			
		||||
        const void *_test_data;
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    int is_fs_file;
 | 
			
		||||
    int has_checksum;
 | 
			
		||||
    int calculate_checksum;
 | 
			
		||||
    const char *filepath;
 | 
			
		||||
    struct stat info;
 | 
			
		||||
 | 
			
		||||
    SHA_CTX sha1_ctx;
 | 
			
		||||
    unsigned char sha1_digest[SHA1_DIGEST_LENGTH];
 | 
			
		||||
 | 
			
		||||
    void *rewind_buffer;
 | 
			
		||||
    int rewind_buffer_size;
 | 
			
		||||
    int rewind_buffer_cursor;
 | 
			
		||||
 | 
			
		||||
    read_func_t read;
 | 
			
		||||
    read_func_t read_rewindable;
 | 
			
		||||
    close_func_t close;
 | 
			
		||||
    reset_func_t reset;
 | 
			
		||||
    log_callback_t log;
 | 
			
		||||
    logf_callback_t logf;
 | 
			
		||||
} vfile_t;
 | 
			
		||||
 | 
			
		||||
typedef struct parse_job_t {
 | 
			
		||||
    int base;
 | 
			
		||||
    int ext;
 | 
			
		||||
    struct vfile vfile;
 | 
			
		||||
    unsigned char parent[MD5_DIGEST_LENGTH];
 | 
			
		||||
    char filepath[1];
 | 
			
		||||
} parse_job_t;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#include "util.h"
 | 
			
		||||
 | 
			
		||||
typedef void (*parse_callback_t)(parse_job_t *job);
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
							
								
								
									
										64
									
								
								third-party/libscan/libscan/text/text.c
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										64
									
								
								third-party/libscan/libscan/text/text.c
									
									
									
									
										vendored
									
									
								
							@ -1,64 +0,0 @@
 | 
			
		||||
#include "text.h"
 | 
			
		||||
 | 
			
		||||
scan_code_t parse_text(scan_text_ctx_t *ctx, vfile_t *f, document_t *doc) {
 | 
			
		||||
 | 
			
		||||
    int to_read = MIN(ctx->content_size, f->info.st_size);
 | 
			
		||||
 | 
			
		||||
    if (to_read <= 2) {
 | 
			
		||||
        return SCAN_OK;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    char *buf = malloc(to_read);
 | 
			
		||||
    int ret = f->read(f, buf, to_read);
 | 
			
		||||
    if (ret < 0) {
 | 
			
		||||
        CTX_LOG_ERRORF(doc->filepath, "read() returned error code: [%d]", ret)
 | 
			
		||||
        free(buf);
 | 
			
		||||
        return SCAN_ERR_READ;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    text_buffer_t tex = text_buffer_create(ctx->content_size);
 | 
			
		||||
 | 
			
		||||
    if ((*(int16_t*)buf) == (int16_t)0xFFFE) {
 | 
			
		||||
        text_buffer_append_string16_le(&tex, buf + 2, to_read - 2);
 | 
			
		||||
    } else if((*(int16_t*)buf) == (int16_t)0xFEFF) {
 | 
			
		||||
        text_buffer_append_string16_be(&tex, buf + 2, to_read - 2);
 | 
			
		||||
    } else {
 | 
			
		||||
        text_buffer_append_string(&tex, buf, to_read);
 | 
			
		||||
    }
 | 
			
		||||
    text_buffer_terminate_string(&tex);
 | 
			
		||||
 | 
			
		||||
    APPEND_STR_META(doc, MetaContent, tex.dyn_buffer.buf);
 | 
			
		||||
 | 
			
		||||
    free(buf);
 | 
			
		||||
    text_buffer_destroy(&tex);
 | 
			
		||||
 | 
			
		||||
    return SCAN_OK;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#define MAX_MARKUP_SIZE (1024 * 1024)
 | 
			
		||||
 | 
			
		||||
scan_code_t parse_markup(scan_text_ctx_t *ctx, vfile_t *f, document_t *doc) {
 | 
			
		||||
 | 
			
		||||
    int to_read = MIN(MAX_MARKUP_SIZE, f->info.st_size);
 | 
			
		||||
 | 
			
		||||
    char *buf = malloc(to_read + 1);
 | 
			
		||||
    int ret = f->read(f, buf, to_read);
 | 
			
		||||
    if (ret < 0) {
 | 
			
		||||
        CTX_LOG_ERRORF(doc->filepath, "read() returned error code: [%d]", ret)
 | 
			
		||||
        free(buf);
 | 
			
		||||
        return SCAN_ERR_READ;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    *(buf + to_read) = '\0';
 | 
			
		||||
 | 
			
		||||
    text_buffer_t tex = text_buffer_create(ctx->content_size);
 | 
			
		||||
    text_buffer_append_markup(&tex, buf);
 | 
			
		||||
    text_buffer_terminate_string(&tex);
 | 
			
		||||
 | 
			
		||||
    APPEND_STR_META(doc, MetaContent, tex.dyn_buffer.buf);
 | 
			
		||||
 | 
			
		||||
    free(buf);
 | 
			
		||||
    text_buffer_destroy(&tex);
 | 
			
		||||
 | 
			
		||||
    return SCAN_OK;
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										18
									
								
								third-party/libscan/libscan/text/text.h
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										18
									
								
								third-party/libscan/libscan/text/text.h
									
									
									
									
										vendored
									
									
								
							@ -1,18 +0,0 @@
 | 
			
		||||
#ifndef SCAN_TEXT_H
 | 
			
		||||
#define SCAN_TEXT_H
 | 
			
		||||
 | 
			
		||||
#include "../scan.h"
 | 
			
		||||
#include "../util.h"
 | 
			
		||||
 | 
			
		||||
typedef struct {
 | 
			
		||||
    long content_size;
 | 
			
		||||
 | 
			
		||||
    log_callback_t log;
 | 
			
		||||
    logf_callback_t logf;
 | 
			
		||||
} scan_text_ctx_t;
 | 
			
		||||
 | 
			
		||||
scan_code_t parse_text(scan_text_ctx_t *ctx, vfile_t *f, document_t *doc);
 | 
			
		||||
 | 
			
		||||
scan_code_t parse_markup(scan_text_ctx_t *ctx, vfile_t *f, document_t *doc);
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
							
								
								
									
										0
									
								
								third-party/libscan/libscan/util.c
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										0
									
								
								third-party/libscan/libscan/util.c
									
									
									
									
										vendored
									
									
								
							
							
								
								
									
										361
									
								
								third-party/libscan/libscan/util.h
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										361
									
								
								third-party/libscan/libscan/util.h
									
									
									
									
										vendored
									
									
								
							@ -1,361 +0,0 @@
 | 
			
		||||
#ifndef SCAN_UTIL_H
 | 
			
		||||
#define SCAN_UTIL_H
 | 
			
		||||
 | 
			
		||||
#include "stdio.h"
 | 
			
		||||
#include "stdlib.h"
 | 
			
		||||
#include "string.h"
 | 
			
		||||
#include "../third-party/utf8.h/utf8.h"
 | 
			
		||||
#include "macros.h"
 | 
			
		||||
 | 
			
		||||
#define STR_STARTS_WITH(x, y) (strncmp(y, x, sizeof(y) - 1) == 0)
 | 
			
		||||
 | 
			
		||||
#define TEXT_BUF_FULL (-1)
 | 
			
		||||
#define INITIAL_BUF_SIZE (1024 * 16)
 | 
			
		||||
 | 
			
		||||
#define SHOULD_IGNORE_CHAR(c) !(SHOULD_KEEP_CHAR(c))
 | 
			
		||||
#define SHOULD_KEEP_CHAR(c) (\
 | 
			
		||||
    ((c) >= '\'' && (c) <= ';') || \
 | 
			
		||||
    ((c) >= 'A' && (c) <= 'z') || \
 | 
			
		||||
    ((c) > 127 && (c) != 0x00A0 && (c) && (c) != 0xFFFD))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
typedef struct dyn_buffer {
 | 
			
		||||
    char *buf;
 | 
			
		||||
    size_t cur;
 | 
			
		||||
    size_t size;
 | 
			
		||||
} dyn_buffer_t;
 | 
			
		||||
 | 
			
		||||
typedef struct text_buffer {
 | 
			
		||||
    long max_size;
 | 
			
		||||
    int last_char_was_whitespace;
 | 
			
		||||
    dyn_buffer_t dyn_buffer;
 | 
			
		||||
} text_buffer_t;
 | 
			
		||||
 | 
			
		||||
static int utf8_validchr2(const char *s) {
 | 
			
		||||
    if (0x00 == (0x80 & *s)) {
 | 
			
		||||
        return TRUE;
 | 
			
		||||
    } else if (0xf0 == (0xf8 & *s)) {
 | 
			
		||||
        if ((0x80 != (0xc0 & s[1])) || (0x80 != (0xc0 & s[2])) ||
 | 
			
		||||
            (0x80 != (0xc0 & s[3]))) {
 | 
			
		||||
            return FALSE;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        if (0x80 == (0xc0 & s[4])) {
 | 
			
		||||
            return FALSE;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        if ((0 == (0x07 & s[0])) && (0 == (0x30 & s[1]))) {
 | 
			
		||||
            return FALSE;
 | 
			
		||||
        }
 | 
			
		||||
    } else if (0xe0 == (0xf0 & *s)) {
 | 
			
		||||
        if ((0x80 != (0xc0 & s[1])) || (0x80 != (0xc0 & s[2]))) {
 | 
			
		||||
            return FALSE;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        if (0x80 == (0xc0 & s[3])) {
 | 
			
		||||
            return FALSE;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        if ((0 == (0x0f & s[0])) && (0 == (0x20 & s[1]))) {
 | 
			
		||||
            return FALSE;
 | 
			
		||||
        }
 | 
			
		||||
    } else if (0xc0 == (0xe0 & *s)) {
 | 
			
		||||
        if (0x80 != (0xc0 & s[1])) {
 | 
			
		||||
            return FALSE;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        if (0x80 == (0xc0 & s[2])) {
 | 
			
		||||
            return FALSE;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        if (0 == (0x1e & s[0])) {
 | 
			
		||||
            return FALSE;
 | 
			
		||||
        }
 | 
			
		||||
    } else {
 | 
			
		||||
        return FALSE;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return TRUE;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
static dyn_buffer_t dyn_buffer_create() {
 | 
			
		||||
    dyn_buffer_t buf;
 | 
			
		||||
 | 
			
		||||
    buf.size = INITIAL_BUF_SIZE;
 | 
			
		||||
    buf.cur = 0;
 | 
			
		||||
    buf.buf = (char *) malloc(INITIAL_BUF_SIZE);
 | 
			
		||||
 | 
			
		||||
    return buf;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void grow_buffer(dyn_buffer_t *buf, size_t size) {
 | 
			
		||||
    if (buf->cur + size > buf->size) {
 | 
			
		||||
        do {
 | 
			
		||||
            buf->size *= 2;
 | 
			
		||||
        } while (buf->cur + size > buf->size);
 | 
			
		||||
 | 
			
		||||
        buf->buf = (char *) realloc(buf->buf, buf->size);
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void grow_buffer_small(dyn_buffer_t *buf) {
 | 
			
		||||
    if (buf->cur + sizeof(long) > buf->size) {
 | 
			
		||||
        buf->size *= 2;
 | 
			
		||||
        buf->buf = (char *) realloc(buf->buf, buf->size);
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void dyn_buffer_write(dyn_buffer_t *buf, const void *data, size_t size) {
 | 
			
		||||
    grow_buffer(buf, size);
 | 
			
		||||
 | 
			
		||||
    memcpy(buf->buf + buf->cur, data, size);
 | 
			
		||||
    buf->cur += size;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void dyn_buffer_write_char(dyn_buffer_t *buf, char c) {
 | 
			
		||||
    grow_buffer_small(buf);
 | 
			
		||||
 | 
			
		||||
    *(buf->buf + buf->cur) = c;
 | 
			
		||||
    buf->cur += sizeof(c);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void dyn_buffer_write_str(dyn_buffer_t *buf, const char *str) {
 | 
			
		||||
    dyn_buffer_write(buf, str, strlen(str));
 | 
			
		||||
    dyn_buffer_write_char(buf, '\0');
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void dyn_buffer_append_string(dyn_buffer_t *buf, const char *str) {
 | 
			
		||||
    dyn_buffer_write(buf, str, strlen(str));
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void dyn_buffer_write_int(dyn_buffer_t *buf, int d) {
 | 
			
		||||
    grow_buffer_small(buf);
 | 
			
		||||
 | 
			
		||||
    *(int *) (buf->buf + buf->cur) = d;
 | 
			
		||||
    buf->cur += sizeof(int);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void dyn_buffer_write_short(dyn_buffer_t *buf, uint16_t s) {
 | 
			
		||||
    grow_buffer_small(buf);
 | 
			
		||||
 | 
			
		||||
    *(uint16_t *) (buf->buf + buf->cur) = s;
 | 
			
		||||
    buf->cur += sizeof(uint16_t);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void dyn_buffer_write_long(dyn_buffer_t *buf, unsigned long l) {
 | 
			
		||||
    grow_buffer_small(buf);
 | 
			
		||||
 | 
			
		||||
    *(unsigned long *) (buf->buf + buf->cur) = l;
 | 
			
		||||
    buf->cur += sizeof(unsigned long);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void dyn_buffer_destroy(dyn_buffer_t *buf) {
 | 
			
		||||
    free(buf->buf);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void text_buffer_destroy(text_buffer_t *buf) {
 | 
			
		||||
    dyn_buffer_destroy(&buf->dyn_buffer);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static text_buffer_t text_buffer_create(long max_size) {
 | 
			
		||||
    text_buffer_t text_buf;
 | 
			
		||||
 | 
			
		||||
    text_buf.dyn_buffer = dyn_buffer_create();
 | 
			
		||||
    text_buf.max_size = max_size;
 | 
			
		||||
    text_buf.last_char_was_whitespace = FALSE;
 | 
			
		||||
 | 
			
		||||
    return text_buf;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int text_buffer_append_char(text_buffer_t *buf, int c) {
 | 
			
		||||
 | 
			
		||||
    if (SHOULD_IGNORE_CHAR(c) || c == ' ') {
 | 
			
		||||
        if (!buf->last_char_was_whitespace && buf->dyn_buffer.cur != 0) {
 | 
			
		||||
            dyn_buffer_write_char(&buf->dyn_buffer, ' ');
 | 
			
		||||
            buf->last_char_was_whitespace = TRUE;
 | 
			
		||||
 | 
			
		||||
            if (buf->max_size > 0 && buf->dyn_buffer.cur > buf->max_size) {
 | 
			
		||||
                return TEXT_BUF_FULL;
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
    } else {
 | 
			
		||||
        buf->last_char_was_whitespace = FALSE;
 | 
			
		||||
        grow_buffer_small(&buf->dyn_buffer);
 | 
			
		||||
 | 
			
		||||
        if (((utf8_int32_t) 0xffffff80 & c) == 0) {
 | 
			
		||||
            *(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = (char) c;
 | 
			
		||||
        } else if (((utf8_int32_t) 0xfffff800 & c) == 0) {
 | 
			
		||||
            *(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0xc0 | (char) (c >> 6);
 | 
			
		||||
            *(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0x80 | (char) (c & 0x3f);
 | 
			
		||||
        } else if (((utf8_int32_t) 0xffff0000 & c) == 0) {
 | 
			
		||||
            *(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0xe0 | (char) (c >> 12);
 | 
			
		||||
            *(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0x80 | (char) ((c >> 6) & 0x3f);
 | 
			
		||||
            *(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0x80 | (char) (c & 0x3f);
 | 
			
		||||
        } else {
 | 
			
		||||
            *(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0xf0 | (char) (c >> 18);
 | 
			
		||||
            *(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0x80 | (char) ((c >> 12) & 0x3f);
 | 
			
		||||
            *(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0x80 | (char) ((c >> 6) & 0x3f);
 | 
			
		||||
            *(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0x80 | (char) (c & 0x3f);
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        if (buf->max_size > 0 && buf->dyn_buffer.cur > buf->max_size) {
 | 
			
		||||
            return TEXT_BUF_FULL;
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
static void text_buffer_terminate_string(text_buffer_t *buf) {
 | 
			
		||||
    if (buf->dyn_buffer.cur > 0 && *(buf->dyn_buffer.buf + buf->dyn_buffer.cur - 1) == ' ') {
 | 
			
		||||
        *(buf->dyn_buffer.buf + buf->dyn_buffer.cur - 1) = '\0';
 | 
			
		||||
    } else {
 | 
			
		||||
        dyn_buffer_write_char(&buf->dyn_buffer, '\0');
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Naive UTF16 -> ascii conversion
 | 
			
		||||
static int text_buffer_append_string16_le(text_buffer_t *buf, const char *str, size_t len) {
 | 
			
		||||
    int ret = 0;
 | 
			
		||||
    for (int i = 1; i < len; i += 2) {
 | 
			
		||||
        ret = text_buffer_append_char(buf, str[i]);
 | 
			
		||||
    }
 | 
			
		||||
    return ret;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int text_buffer_append_string16_be(text_buffer_t *buf, const char *str, size_t len) {
 | 
			
		||||
    int ret = 0;
 | 
			
		||||
    for (int i = 0; i < len; i += 2) {
 | 
			
		||||
        ret = text_buffer_append_char(buf, str[i]);
 | 
			
		||||
    }
 | 
			
		||||
    return ret;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#define UTF8_END_OF_STRING \
 | 
			
		||||
    (ptr - str >= len || *ptr == 0 || \
 | 
			
		||||
    (0xc0 == (0xe0 & *ptr) && ptr - str > len - 2) || \
 | 
			
		||||
    (0xe0 == (0xf0 & *ptr) && ptr - str > len - 3) || \
 | 
			
		||||
    (0xf0 == (0xf8 & *ptr) && ptr - str > len - 4))
 | 
			
		||||
 | 
			
		||||
static int text_buffer_append_string(text_buffer_t *buf, const char *str, size_t len) {
 | 
			
		||||
 | 
			
		||||
    const char *ptr = str;
 | 
			
		||||
    const char *oldPtr = ptr;
 | 
			
		||||
 | 
			
		||||
    if (str == NULL || UTF8_END_OF_STRING) {
 | 
			
		||||
        return 0;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if (len <= 4) {
 | 
			
		||||
        for (int i = 0; i < len; i++) {
 | 
			
		||||
            if (((utf8_int32_t) 0xffffff80 & str[i]) == 0 && SHOULD_KEEP_CHAR(str[i])) {
 | 
			
		||||
                dyn_buffer_write_char(&buf->dyn_buffer, str[i]);
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
        return 0;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    utf8_int32_t c;
 | 
			
		||||
    char tmp[16] = {0};
 | 
			
		||||
 | 
			
		||||
    do {
 | 
			
		||||
        ptr = (char *) utf8codepoint(ptr, &c);
 | 
			
		||||
        *(int *) tmp = 0x00000000;
 | 
			
		||||
        memcpy(tmp, oldPtr, ptr - oldPtr);
 | 
			
		||||
        oldPtr = ptr;
 | 
			
		||||
 | 
			
		||||
        if (!utf8_validchr2(tmp)) {
 | 
			
		||||
            continue;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        int ret = text_buffer_append_char(buf, c);
 | 
			
		||||
 | 
			
		||||
        if (ret != 0) {
 | 
			
		||||
            return ret;
 | 
			
		||||
        }
 | 
			
		||||
    } while (!UTF8_END_OF_STRING);
 | 
			
		||||
 | 
			
		||||
    return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int text_buffer_append_string0(text_buffer_t *buf, const char *str) {
 | 
			
		||||
    return text_buffer_append_string(buf, str, strlen(str));
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int text_buffer_append_markup(text_buffer_t *buf, const char *markup) {
 | 
			
		||||
 | 
			
		||||
    int tag_open = TRUE;
 | 
			
		||||
    const char *ptr = markup;
 | 
			
		||||
    const char *start = markup;
 | 
			
		||||
 | 
			
		||||
    while (*ptr != '\0') {
 | 
			
		||||
        if (tag_open) {
 | 
			
		||||
            if (*ptr == '>') {
 | 
			
		||||
                tag_open = FALSE;
 | 
			
		||||
                start = ptr + 1;
 | 
			
		||||
            }
 | 
			
		||||
        } else {
 | 
			
		||||
            if (*ptr == '<') {
 | 
			
		||||
                tag_open = TRUE;
 | 
			
		||||
                if (ptr != start) {
 | 
			
		||||
                    if (text_buffer_append_string(buf, start, (ptr - start)) == TEXT_BUF_FULL) {
 | 
			
		||||
                        return TEXT_BUF_FULL;
 | 
			
		||||
                    }
 | 
			
		||||
                    if (text_buffer_append_char(buf, ' ') == TEXT_BUF_FULL) {
 | 
			
		||||
                        return TEXT_BUF_FULL;
 | 
			
		||||
                    }
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        ptr += 1;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if (ptr != start) {
 | 
			
		||||
        if (text_buffer_append_string(buf, start, (ptr - start)) == TEXT_BUF_FULL) {
 | 
			
		||||
            return TEXT_BUF_FULL;
 | 
			
		||||
        }
 | 
			
		||||
        if (text_buffer_append_char(buf, ' ') == TEXT_BUF_FULL) {
 | 
			
		||||
            return TEXT_BUF_FULL;
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
    return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void *read_all(vfile_t *f, size_t *size) {
 | 
			
		||||
    void *buf = malloc(f->info.st_size);
 | 
			
		||||
    *size = f->read(f, buf, f->info.st_size);
 | 
			
		||||
 | 
			
		||||
    if (*size != f->info.st_size) {
 | 
			
		||||
        free(buf);
 | 
			
		||||
        return NULL;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return buf;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#define STACK_BUFFER_SIZE (size_t)(4096 * 8)
 | 
			
		||||
 | 
			
		||||
__always_inline
 | 
			
		||||
static void safe_sha1_update(SHA_CTX *ctx, void *buf, size_t size) {
 | 
			
		||||
    unsigned char stack_buf[STACK_BUFFER_SIZE];
 | 
			
		||||
 | 
			
		||||
    void *sha1_buf;
 | 
			
		||||
    if (size <= STACK_BUFFER_SIZE) {
 | 
			
		||||
        sha1_buf = stack_buf;
 | 
			
		||||
    } else {
 | 
			
		||||
        void *heap_sha1_buf = malloc(size);
 | 
			
		||||
        sha1_buf = heap_sha1_buf;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    memcpy(sha1_buf, buf, size);
 | 
			
		||||
    SHA1_Update(ctx, (const void *) sha1_buf, size);
 | 
			
		||||
 | 
			
		||||
    if (sha1_buf != stack_buf) {
 | 
			
		||||
        free(sha1_buf);
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
							
								
								
									
										200
									
								
								third-party/libscan/libscan/wpd/libwpd_c_api.cpp
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										200
									
								
								third-party/libscan/libscan/wpd/libwpd_c_api.cpp
									
									
									
									
										vendored
									
									
								
							@ -1,200 +0,0 @@
 | 
			
		||||
#include "libwpd_c_api.h"
 | 
			
		||||
#include "libwpd/libwpd.h"
 | 
			
		||||
#include "libwpd/WPXProperty.h"
 | 
			
		||||
#include "libwpd-stream/libwpd-stream.h"
 | 
			
		||||
 | 
			
		||||
class StringDocument : public WPXDocumentInterface {
 | 
			
		||||
 | 
			
		||||
private:
 | 
			
		||||
    text_buffer_t *tex;
 | 
			
		||||
    document_t *doc;
 | 
			
		||||
    bool is_full;
 | 
			
		||||
public:
 | 
			
		||||
 | 
			
		||||
    StringDocument(text_buffer_t *tex, document_t *doc) {
 | 
			
		||||
        this->tex = tex;
 | 
			
		||||
        this->doc = doc;
 | 
			
		||||
        this->is_full = false;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    void setDocumentMetaData(const WPXPropertyList &propList) override {
 | 
			
		||||
 | 
			
		||||
        WPXPropertyList::Iter propIter(propList);
 | 
			
		||||
        for (propIter.rewind(); propIter.next();) {
 | 
			
		||||
            // TODO: Read metadata here ?!
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    void endDocument() override {
 | 
			
		||||
        text_buffer_terminate_string(this->tex);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    void closeParagraph() override {
 | 
			
		||||
        if (!this->is_full) {
 | 
			
		||||
            if (text_buffer_append_char(tex, ' ') == TEXT_BUF_FULL) {
 | 
			
		||||
                this->is_full = true;
 | 
			
		||||
            };
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    void closeSpan() override {
 | 
			
		||||
        if (!this->is_full) {
 | 
			
		||||
            if (text_buffer_append_char(tex, ' ') == TEXT_BUF_FULL) {
 | 
			
		||||
                this->is_full = true;
 | 
			
		||||
            };
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    void closeSection() override {
 | 
			
		||||
        if (!this->is_full) {
 | 
			
		||||
            if (text_buffer_append_char(tex, ' ') == TEXT_BUF_FULL) {
 | 
			
		||||
                this->is_full = true;
 | 
			
		||||
            };
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    void insertTab() override {
 | 
			
		||||
        if (!this->is_full) {
 | 
			
		||||
            if (text_buffer_append_char(tex, ' ') == TEXT_BUF_FULL) {
 | 
			
		||||
                this->is_full = true;
 | 
			
		||||
            };
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    void insertSpace() override {
 | 
			
		||||
        if (!this->is_full) {
 | 
			
		||||
            if (text_buffer_append_char(tex, ' ') == TEXT_BUF_FULL) {
 | 
			
		||||
                this->is_full = true;
 | 
			
		||||
            };
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    void insertText(const WPXString &text) override {
 | 
			
		||||
        if (!this->is_full) {
 | 
			
		||||
            if (text_buffer_append_string0(tex, text.cstr()) == TEXT_BUF_FULL) {
 | 
			
		||||
                this->is_full = true;
 | 
			
		||||
            };
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    void insertLineBreak() override {
 | 
			
		||||
        if (!this->is_full) {
 | 
			
		||||
            if (text_buffer_append_char(tex, ' ') == TEXT_BUF_FULL) {
 | 
			
		||||
                this->is_full = true;
 | 
			
		||||
            };
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    void definePageStyle(const WPXPropertyList &propList) override { /* noop */ }
 | 
			
		||||
 | 
			
		||||
    void closePageSpan() override { /* noop */ }
 | 
			
		||||
 | 
			
		||||
    void openHeader(const WPXPropertyList &propList) override { /* noop */ }
 | 
			
		||||
 | 
			
		||||
    void closeHeader() override { /* noop */ }
 | 
			
		||||
 | 
			
		||||
    void openFooter(const WPXPropertyList &propList) override { /* noop */ }
 | 
			
		||||
 | 
			
		||||
    void closeFooter() override { /* noop */ }
 | 
			
		||||
 | 
			
		||||
    void
 | 
			
		||||
    defineParagraphStyle(const WPXPropertyList &propList, const WPXPropertyListVector &tabStops) override { /* noop */ }
 | 
			
		||||
 | 
			
		||||
    void openParagraph(const WPXPropertyList &propList, const WPXPropertyListVector &tabStops) override { /* noop */ }
 | 
			
		||||
 | 
			
		||||
    void defineCharacterStyle(const WPXPropertyList &propList) override { /* noop */ }
 | 
			
		||||
 | 
			
		||||
    void openSpan(const WPXPropertyList &propList) override { /* noop */ }
 | 
			
		||||
 | 
			
		||||
    void
 | 
			
		||||
    defineSectionStyle(const WPXPropertyList &propList, const WPXPropertyListVector &columns) override { /* noop */ }
 | 
			
		||||
 | 
			
		||||
    void openSection(const WPXPropertyList &propList, const WPXPropertyListVector &columns) override { /* noop */ }
 | 
			
		||||
 | 
			
		||||
    void insertField(const WPXString &type, const WPXPropertyList &propList) override { /* noop */ }
 | 
			
		||||
 | 
			
		||||
    void defineOrderedListLevel(const WPXPropertyList &propList) override { /* noop */ }
 | 
			
		||||
 | 
			
		||||
    void defineUnorderedListLevel(const WPXPropertyList &propList) override { /* noop */ }
 | 
			
		||||
 | 
			
		||||
    void openOrderedListLevel(const WPXPropertyList &propList) override { /* noop */ }
 | 
			
		||||
 | 
			
		||||
    void openUnorderedListLevel(const WPXPropertyList &propList) override { /* noop */ }
 | 
			
		||||
 | 
			
		||||
    void closeOrderedListLevel() override { /* noop */ }
 | 
			
		||||
 | 
			
		||||
    void closeUnorderedListLevel() override { /* noop */ }
 | 
			
		||||
 | 
			
		||||
    void openListElement(const WPXPropertyList &propList, const WPXPropertyListVector &tabStops) override { /* noop */ }
 | 
			
		||||
 | 
			
		||||
    void closeListElement() override { /* noop */ }
 | 
			
		||||
 | 
			
		||||
    void openFootnote(const WPXPropertyList &propList) override { /* noop */ }
 | 
			
		||||
 | 
			
		||||
    void closeFootnote() override { /* noop */ }
 | 
			
		||||
 | 
			
		||||
    void openEndnote(const WPXPropertyList &propList) override { /* noop */ }
 | 
			
		||||
 | 
			
		||||
    void closeEndnote() override { /* noop */ }
 | 
			
		||||
 | 
			
		||||
    void openComment(const WPXPropertyList &propList) override { /* noop */ }
 | 
			
		||||
 | 
			
		||||
    void closeComment() override { /* noop */ }
 | 
			
		||||
 | 
			
		||||
    void openTextBox(const WPXPropertyList &propList) override { /* noop */ }
 | 
			
		||||
 | 
			
		||||
    void closeTextBox() override { /* noop */ }
 | 
			
		||||
 | 
			
		||||
    void openTable(const WPXPropertyList &propList, const WPXPropertyListVector &columns) override { /* noop */ }
 | 
			
		||||
 | 
			
		||||
    void openTableRow(const WPXPropertyList &propList) override { /* noop */ }
 | 
			
		||||
 | 
			
		||||
    void closeTableRow() override { /* noop */ }
 | 
			
		||||
 | 
			
		||||
    void openTableCell(const WPXPropertyList &propList) override { /* noop */ }
 | 
			
		||||
 | 
			
		||||
    void closeTableCell() override { /* noop */ }
 | 
			
		||||
 | 
			
		||||
    void insertCoveredTableCell(const WPXPropertyList &propList) override { /* noop */ }
 | 
			
		||||
 | 
			
		||||
    void closeTable() override { /* noop */ }
 | 
			
		||||
 | 
			
		||||
    void openFrame(const WPXPropertyList &propList) override { /* noop */ }
 | 
			
		||||
 | 
			
		||||
    void closeFrame() override { /* noop */ }
 | 
			
		||||
 | 
			
		||||
    void insertBinaryObject(const WPXPropertyList &propList, const WPXBinaryData &data) override { /* noop */ }
 | 
			
		||||
 | 
			
		||||
    void insertEquation(const WPXPropertyList &propList, const WPXString &data) override { /* noop */ }
 | 
			
		||||
 | 
			
		||||
    void openPageSpan(const WPXPropertyList &propList) override { /* noop */ }
 | 
			
		||||
 | 
			
		||||
    void startDocument() override { /* noop */ };
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
wpd_stream_t wpd_memory_stream_create(const unsigned char *buf, size_t buf_len) {
 | 
			
		||||
    auto *input = new WPXStringStream(buf, buf_len);
 | 
			
		||||
    return input;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
wpd_confidence_t wpd_is_file_format_supported(wpd_stream_t ptr) {
 | 
			
		||||
    auto *stream = (WPXStringStream *) ptr;
 | 
			
		||||
    WPDConfidence confidence = WPDocument::isFileFormatSupported(stream);
 | 
			
		||||
 | 
			
		||||
    return (wpd_confidence_t) confidence;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
wpd_result_t wpd_parse(wpd_stream_t ptr, text_buffer_t *tex, document_t *doc) {
 | 
			
		||||
    auto *stream = (WPXStringStream *) ptr;
 | 
			
		||||
 | 
			
		||||
    auto myDoc = StringDocument(tex, doc);
 | 
			
		||||
    WPDResult result2 = WPDocument::parse(stream, &myDoc, nullptr);
 | 
			
		||||
 | 
			
		||||
    return (wpd_result_t) result2;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void wpd_memory_stream_destroy(wpd_stream_t ptr) {
 | 
			
		||||
    auto *stream = (WPXStringStream *) ptr;
 | 
			
		||||
    delete stream;
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										50
									
								
								third-party/libscan/libscan/wpd/libwpd_c_api.h
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										50
									
								
								third-party/libscan/libscan/wpd/libwpd_c_api.h
									
									
									
									
										vendored
									
									
								
							@ -1,50 +0,0 @@
 | 
			
		||||
#ifndef SIST2_LIBWPD_C_API_H
 | 
			
		||||
#define SIST2_LIBWPD_C_API_H
 | 
			
		||||
 | 
			
		||||
#include "stdlib.h"
 | 
			
		||||
 | 
			
		||||
#ifdef __cplusplus
 | 
			
		||||
#define EXTERNC extern "C"
 | 
			
		||||
#else
 | 
			
		||||
#define EXTERNC
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#ifdef __cplusplus
 | 
			
		||||
extern "C" {
 | 
			
		||||
#endif
 | 
			
		||||
#include "../scan.h"
 | 
			
		||||
#include "../util.h"
 | 
			
		||||
#ifdef __cplusplus
 | 
			
		||||
};
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
typedef void *wpd_stream_t;
 | 
			
		||||
 | 
			
		||||
typedef enum {
 | 
			
		||||
    C_WPD_CONFIDENCE_NONE = 0,
 | 
			
		||||
    C_WPD_CONFIDENCE_UNSUPPORTED_ENCRYPTION,
 | 
			
		||||
    C_WPD_CONFIDENCE_SUPPORTED_ENCRYPTION,
 | 
			
		||||
    C_WPD_CONFIDENCE_EXCELLENT
 | 
			
		||||
} wpd_confidence_t;
 | 
			
		||||
 | 
			
		||||
typedef enum {
 | 
			
		||||
    C_WPD_OK,
 | 
			
		||||
    C_WPD_FILE_ACCESS_ERROR,
 | 
			
		||||
    C_WPD_PARSE_ERROR,
 | 
			
		||||
    C_WPD_UNSUPPORTED_ENCRYPTION_ERROR,
 | 
			
		||||
    C_WPD_PASSWORD_MISSMATCH_ERROR,
 | 
			
		||||
    C_WPD_OLE_ERROR,
 | 
			
		||||
    C_WPD_UNKNOWN_ERROR
 | 
			
		||||
} wpd_result_t;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
EXTERNC wpd_confidence_t wpd_is_file_format_supported(wpd_stream_t stream);
 | 
			
		||||
 | 
			
		||||
EXTERNC wpd_stream_t wpd_memory_stream_create(const unsigned char *buf, size_t buf_len);
 | 
			
		||||
 | 
			
		||||
EXTERNC void wpd_memory_stream_destroy(wpd_stream_t stream);
 | 
			
		||||
 | 
			
		||||
EXTERNC wpd_result_t wpd_parse(wpd_stream_t ptr, text_buffer_t *tex, document_t *doc);
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
							
								
								
									
										41
									
								
								third-party/libscan/libscan/wpd/wpd.c
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										41
									
								
								third-party/libscan/libscan/wpd/wpd.c
									
									
									
									
										vendored
									
									
								
							@ -1,41 +0,0 @@
 | 
			
		||||
#include "wpd.h"
 | 
			
		||||
#include "libwpd_c_api.h"
 | 
			
		||||
 | 
			
		||||
scan_code_t parse_wpd(scan_wpd_ctx_t *ctx, vfile_t *f, document_t *doc) {
 | 
			
		||||
 | 
			
		||||
    size_t buf_len;
 | 
			
		||||
    void *buf = read_all(f, &buf_len);
 | 
			
		||||
 | 
			
		||||
    void *stream = wpd_memory_stream_create(buf, buf_len);
 | 
			
		||||
    wpd_confidence_t conf = wpd_is_file_format_supported(stream);
 | 
			
		||||
 | 
			
		||||
    if (conf == C_WPD_CONFIDENCE_SUPPORTED_ENCRYPTION || conf == C_WPD_CONFIDENCE_UNSUPPORTED_ENCRYPTION) {
 | 
			
		||||
        CTX_LOG_DEBUGF("wpd.c", "File is encrypted! Password-protected WPD files are not supported yet (conf=%d)", conf)
 | 
			
		||||
        wpd_memory_stream_destroy(stream);
 | 
			
		||||
        free(buf);
 | 
			
		||||
        return SCAN_ERR_READ;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if (conf != C_WPD_CONFIDENCE_EXCELLENT) {
 | 
			
		||||
        CTX_LOG_ERRORF("wpd.c", "Unsupported file format! [%s] (conf=%d)", doc->filepath, conf)
 | 
			
		||||
        wpd_memory_stream_destroy(stream);
 | 
			
		||||
        free(buf);
 | 
			
		||||
        return SCAN_ERR_READ;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    text_buffer_t tex = text_buffer_create(-1);
 | 
			
		||||
    wpd_result_t res = wpd_parse(stream, &tex, doc);
 | 
			
		||||
 | 
			
		||||
    if (res != C_WPD_OK) {
 | 
			
		||||
        CTX_LOG_ERRORF("wpd.c", "Error while parsing WPD file [%s] (%d)",
 | 
			
		||||
                       doc->filepath, res)
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if (tex.dyn_buffer.cur != 0) {
 | 
			
		||||
        APPEND_STR_META(doc, MetaContent, tex.dyn_buffer.buf)
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    text_buffer_destroy(&tex);
 | 
			
		||||
    wpd_memory_stream_destroy(stream);
 | 
			
		||||
    free(buf);
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										23
									
								
								third-party/libscan/libscan/wpd/wpd.h
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										23
									
								
								third-party/libscan/libscan/wpd/wpd.h
									
									
									
									
										vendored
									
									
								
							@ -1,23 +0,0 @@
 | 
			
		||||
#ifndef SIST2_WPD_H
 | 
			
		||||
#define SIST2_WPD_H
 | 
			
		||||
 | 
			
		||||
#include "../scan.h"
 | 
			
		||||
#include "../util.h"
 | 
			
		||||
 | 
			
		||||
typedef struct {
 | 
			
		||||
    long content_size;
 | 
			
		||||
 | 
			
		||||
    log_callback_t log;
 | 
			
		||||
    logf_callback_t logf;
 | 
			
		||||
 | 
			
		||||
    unsigned int wpd_mime;
 | 
			
		||||
} scan_wpd_ctx_t;
 | 
			
		||||
 | 
			
		||||
scan_code_t parse_wpd(scan_wpd_ctx_t *ctx, vfile_t *f, document_t *doc);
 | 
			
		||||
 | 
			
		||||
__always_inline
 | 
			
		||||
static int is_wpd(scan_wpd_ctx_t *ctx, unsigned int mime) {
 | 
			
		||||
    return mime == ctx->wpd_mime;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
							
								
								
									
										1169
									
								
								third-party/libscan/test/main.cpp
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										1169
									
								
								third-party/libscan/test/main.cpp
									
									
									
									
										vendored
									
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										114
									
								
								third-party/libscan/test/test_util.cpp
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										114
									
								
								third-party/libscan/test/test_util.cpp
									
									
									
									
										vendored
									
									
								
							@ -1,114 +0,0 @@
 | 
			
		||||
#include "test_util.h"
 | 
			
		||||
#include <gtest/gtest.h>
 | 
			
		||||
 | 
			
		||||
#include <unistd.h>
 | 
			
		||||
#include <fcntl.h>
 | 
			
		||||
 | 
			
		||||
#define FILE_NOT_FOUND_ERR "Could not file, did you clone the test files repo?"
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
int fs_read(struct vfile *f, void *buf, size_t size) {
 | 
			
		||||
 | 
			
		||||
    if (f->fd == -1) {
 | 
			
		||||
        f->fd = open(f->filepath, O_RDONLY);
 | 
			
		||||
        if (f->fd == -1) {
 | 
			
		||||
            return -1;
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return (int) read(f->fd, buf, size);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
//Note: No out of bounds check
 | 
			
		||||
int mem_read(vfile_t *f, void *buf, size_t size) {
 | 
			
		||||
    memcpy(buf, f->_test_data, size);
 | 
			
		||||
    f->_test_data = (char *) f->_test_data + size;
 | 
			
		||||
    return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void fs_close(vfile_t *f) {
 | 
			
		||||
    if (f->fd != -1) {
 | 
			
		||||
        close(f->fd);
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void load_doc_file(const char *filepath, vfile_t *f, document_t *doc) {
 | 
			
		||||
    doc->meta_head = nullptr;
 | 
			
		||||
    doc->meta_tail = nullptr;
 | 
			
		||||
    load_file(filepath, f);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void load_doc_mem(void *mem, size_t mem_len, vfile_t *f, document_t *doc) {
 | 
			
		||||
    doc->meta_head = nullptr;
 | 
			
		||||
    doc->meta_tail = nullptr;
 | 
			
		||||
    load_mem(mem, mem_len, f);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void cleanup(document_t *doc, vfile_t *f) {
 | 
			
		||||
    destroy_doc(doc);
 | 
			
		||||
    CLOSE_FILE((*f))
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void load_file(const char *filepath, vfile_t *f) {
 | 
			
		||||
    stat(filepath, &f->info);
 | 
			
		||||
    f->fd = open(filepath, O_RDONLY);
 | 
			
		||||
 | 
			
		||||
    if (f->fd == -1) {
 | 
			
		||||
        FAIL() << FILE_NOT_FOUND_ERR;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    f->filepath = filepath;
 | 
			
		||||
    f->read = fs_read;
 | 
			
		||||
    f->close = fs_close;
 | 
			
		||||
    f->is_fs_file = TRUE;
 | 
			
		||||
    f->calculate_checksum = TRUE;
 | 
			
		||||
    f->has_checksum = FALSE;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void load_mem(void *mem, size_t size, vfile_t *f) {
 | 
			
		||||
    f->filepath = "_mem_";
 | 
			
		||||
    f->_test_data = mem;
 | 
			
		||||
    f->info.st_size = (int) size;
 | 
			
		||||
    f->read = mem_read;
 | 
			
		||||
    f->close = nullptr;
 | 
			
		||||
    f->is_fs_file = TRUE;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
meta_line_t *get_meta(document_t *doc, metakey key) {
 | 
			
		||||
    return get_meta_from(doc->meta_head, key);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
meta_line_t *get_meta_from(meta_line_t *meta, metakey key) {
 | 
			
		||||
    while (meta != nullptr) {
 | 
			
		||||
        if (meta->key == key) {
 | 
			
		||||
            return meta;
 | 
			
		||||
        }
 | 
			
		||||
        meta = meta->next;
 | 
			
		||||
    }
 | 
			
		||||
    return nullptr;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void destroy_doc(document_t *doc) {
 | 
			
		||||
    meta_line_t *meta = doc->meta_head;
 | 
			
		||||
    while (meta != nullptr) {
 | 
			
		||||
        meta_line_t *tmp = meta;
 | 
			
		||||
        meta = tmp->next;
 | 
			
		||||
        free(tmp);
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void fuzz_buffer(char *buf, size_t *buf_len, int width, int n, int trunc_p) {
 | 
			
		||||
    for (int i = 0; i < n; i++) {
 | 
			
		||||
 | 
			
		||||
        size_t offset = rand() % (*buf_len - width - 1);
 | 
			
		||||
 | 
			
		||||
        if (rand() % 100 < trunc_p) {
 | 
			
		||||
            *buf_len = MAX(offset, 1000);
 | 
			
		||||
            continue;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        for (int disp = 0; disp < width; disp++) {
 | 
			
		||||
            buf[offset + disp] = (int8_t) rand();
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										46
									
								
								third-party/libscan/test/test_util.h
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										46
									
								
								third-party/libscan/test/test_util.h
									
									
									
									
										vendored
									
									
								
							@ -1,46 +0,0 @@
 | 
			
		||||
#ifndef SCAN_TEST_UTIL_H
 | 
			
		||||
#define SCAN_TEST_UTIL_H
 | 
			
		||||
 | 
			
		||||
#include "../libscan/scan.h"
 | 
			
		||||
#include <fcntl.h>
 | 
			
		||||
#include <unistd.h>
 | 
			
		||||
 | 
			
		||||
void load_file(const char *filepath, vfile_t *f);
 | 
			
		||||
void load_mem(void *mem, size_t size, vfile_t *f);
 | 
			
		||||
void load_doc_mem(void *mem, size_t mem_len, vfile_t *f, document_t *doc);
 | 
			
		||||
void load_doc_file(const char *filepath, vfile_t *f, document_t *doc);
 | 
			
		||||
void cleanup(document_t *doc, vfile_t *f);
 | 
			
		||||
 | 
			
		||||
static void noop_logf(const char *filepath, int level, char *format, ...) {
 | 
			
		||||
    // noop
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void noop_log(const char *filepath, int level, char *str) {
 | 
			
		||||
    // noop
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static size_t store_size = 0;
 | 
			
		||||
 | 
			
		||||
static void counter_store(char* key, size_t key_len, char *value, size_t value_len) {
 | 
			
		||||
    store_size += value_len;
 | 
			
		||||
//    char id[37];
 | 
			
		||||
//    char tmp[PATH_MAX];
 | 
			
		||||
//    uuid_unparse(reinterpret_cast<const unsigned char *>(key), id);
 | 
			
		||||
//    sprintf(tmp, "%s.jpeg", id);
 | 
			
		||||
//    int fd = open(tmp, O_TRUNC|O_WRONLY|O_CREAT, 0777);
 | 
			
		||||
//    write(fd, value, value_len);
 | 
			
		||||
//    close(fd);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
meta_line_t *get_meta(document_t *doc, metakey key);
 | 
			
		||||
 | 
			
		||||
meta_line_t *get_meta_from(meta_line_t *meta, metakey key);
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#define CLOSE_FILE(f) if (f.close != NULL) {f.close(&f);};
 | 
			
		||||
 | 
			
		||||
void destroy_doc(document_t *doc);
 | 
			
		||||
 | 
			
		||||
void fuzz_buffer(char *buf, size_t *buf_len, int width, int n, int trunc_p);
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
							
								
								
									
										1
									
								
								third-party/libscan/third-party/antiword
									
									
									
									
										vendored
									
									
								
							
							
								
								
								
								
								
								
									
									
								
							
						
						
									
										1
									
								
								third-party/libscan/third-party/antiword
									
									
									
									
										vendored
									
									
								
							@ -1 +0,0 @@
 | 
			
		||||
Subproject commit 62ae66db99e9dd88dfa31999f516f71bb8bdc8b2
 | 
			
		||||
							
								
								
									
										1
									
								
								third-party/libscan/third-party/utf8.h
									
									
									
									
										vendored
									
									
								
							
							
								
								
								
								
								
								
									
									
								
							
						
						
									
										1
									
								
								third-party/libscan/third-party/utf8.h
									
									
									
									
										vendored
									
									
								
							@ -1 +0,0 @@
 | 
			
		||||
Subproject commit 146be69f88575d753317d8ef13b16f80e0656fc7
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user