Compare commits

...

54 Commits

Author SHA1 Message Date
98e0a5fd64 Update CI script 2020-03-06 09:41:33 -05:00
740a49a09f version bump 2020-03-06 09:36:46 -05:00
81be662574 (breaking) update mime list 2020-03-06 09:36:21 -05:00
02fa3f02f5 Fix memory leak with virtual files in parse.c 2020-03-06 09:36:07 -05:00
cfdd7bdd87 Fix memory leak in font.c 2020-03-06 09:35:19 -05:00
7ceb645926 hotfix invalid read in text_buffer 2020-03-06 09:34:41 -05:00
7d0091f647 whoops 2020-03-05 21:54:56 -05:00
b3cd630399 Update README.md 2020-03-05 19:42:06 -05:00
5f7a1acfe3 Merge pull request #36 from simon987/wip-doc
Wip doc
2020-03-05 18:43:56 -05:00
513a21cca2 Undo debug stuff 2020-03-05 18:42:51 -05:00
04dbfb23ab Cleanup warnings 2020-03-05 16:53:30 -05:00
1abddabeec Rewrite doc.c module, fix bad error handling, fix pdf.c memory leaks 2020-03-05 16:12:34 -05:00
9ace5774af Update dependencies 2020-03-05 16:10:45 -05:00
eab6101cf7 make --fast faster 2020-03-05 12:26:43 -05:00
d7cbd5d2b6 wip doc rewrite 2020-03-05 09:13:37 -05:00
641edf2715 Prettier warning messages in main.c 2020-03-04 17:57:49 -05:00
7efb4957bf inline text/util functions 2020-03-04 17:50:31 -05:00
9ae77fdedb Fix css glitch 2020-03-03 16:51:01 -05:00
98c40901ed Disallow incremental scan when version does not match (#33) 2020-03-03 16:36:07 -05:00
363375d5da version bump 2020-03-03 16:25:41 -05:00
149de95d88 (breaking) Upgrade path filter bar 2020-03-03 16:24:24 -05:00
e5bb4856d2 (breaking) Set item depth in ingest pipeline 2020-03-02 17:39:25 -05:00
d78994d427 Ignore --incremental option when the directory does not exist (#31) 2020-03-01 21:16:50 -05:00
f2d68d54df Update README.md 2020-03-01 13:55:08 -05:00
e03625838b Settings menu (#30) and UI tweaks 2020-02-29 19:26:09 -05:00
86840b46f4 Version bump 2020-02-27 09:47:06 -05:00
e57f9916eb Rewrite documentation 2020-02-27 09:45:14 -05:00
565ba6ee76 Fix for #29 2020-02-27 09:44:19 -05:00
d83fc2c373 Fix docker build for 1.2.15 2020-02-27 09:42:18 -05:00
d4da28249e --fast option #27 2020-02-22 18:37:08 -05:00
483a454c8d --exclude argument #26 2020-02-22 16:55:35 -05:00
018ac86640 fix build... 2020-02-22 13:20:41 -05:00
398f1aead4 Support for cbr documents 2020-02-22 13:11:19 -05:00
d19a75926b Fix invalid read in terminate_string() 2020-02-22 13:10:40 -05:00
1ac8b40e3d Code style 2020-02-22 09:02:59 -05:00
a8505cb8c1 Fix for #28 2020-02-20 16:42:13 -05:00
ae8652d86e UI tweaks, search syntax (#25) 2020-02-16 15:24:29 -05:00
849beb09d8 hotfix 2020-02-15 19:33:18 -05:00
e1aaaee617 UI tweak 2020-02-15 09:30:14 -05:00
c02b940945 (I forgot to commit this) 2020-02-14 20:58:10 -05:00
2934ddb07f Add image viewer (#2) 2020-02-14 18:28:55 -05:00
7f6f3c02fa OCR tweaks 2020-02-11 21:13:47 -05:00
7f98d5a682 Fix buffer overflow (whoops) 2020-02-09 18:11:29 -05:00
7eb9c5d7d5 Fix web/index issue with NULL mime types 2020-02-09 17:23:49 -05:00
184439aa38 increase minimum image size for OCR 2020-02-09 14:06:59 -05:00
1ce8b298a1 Display EXIF tags on document info panel, remove march=native on openjp 2020-02-09 13:21:19 -05:00
75f99025d9 add exif dateTime, allow some special characters in text meta 2020-02-09 08:47:13 -05:00
ebe852bd5a Fix rewrite-url arg 2020-02-09 08:23:17 -05:00
402b103c49 Fix total count for ES 7.5 2020-02-08 09:25:00 -05:00
e9b6e1cdc2 Turn off auto optimisation in libtesseract build 2020-02-08 08:32:04 -05:00
ed1ce8ab5e Handle XML errors #18 2020-02-07 10:08:01 -05:00
d1fa4febc4 Improve scroll feature, UI fix 2020-02-07 10:08:01 -05:00
048c55df7b Update README.md 2020-02-06 19:56:29 -05:00
f77bc6a025 Update README.md 2020-02-06 19:55:32 -05:00
84 changed files with 2517 additions and 4312 deletions

View File

@@ -4,6 +4,8 @@ set(CMAKE_C_STANDARD 11)
project(sist2 C)
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}/CMakeModules")
option(SIST_DEBUG "Build a debug executable" on)
add_executable(
sist2
src/main.c
@@ -26,6 +28,7 @@ add_executable(
src/parsing/arc.c src/parsing/arc.h
src/parsing/doc.c src/parsing/doc.h
src/log.c src/log.h
src/parsing/cbr.h src/parsing/cbr.c
# argparse
argparse/argparse.h argparse/argparse.c
@@ -80,14 +83,40 @@ target_link_directories(
${UUID_LIBRARY_DIRS}
)
target_compile_options(sist2
target_compile_options(
sist2
PRIVATE
-Ofast
# -march=native
-fPIC
-fno-stack-protector
-fomit-frame-pointer
)
)
if (SIST_DEBUG)
target_compile_options(
sist2
PRIVATE
-g
-fstack-protector
-fno-omit-frame-pointer
-fsanitize=address
)
target_link_options(
sist2
PRIVATE
-fsanitize=address
)
set_target_properties(
sist2
PROPERTIES
OUTPUT_NAME sist2_debug
)
else ()
target_compile_options(
sist2
PRIVATE
-Ofast
-fno-stack-protector
-fomit-frame-pointer
)
endif ()
TARGET_LINK_LIBRARIES(
sist2
@@ -114,16 +143,14 @@ TARGET_LINK_LIBRARIES(
m
bz2
${PROJECT_SOURCE_DIR}/lib/libmagic.a
# ${PROJECT_SOURCE_DIR}/lib/libmagic.a
magic
${PROJECT_SOURCE_DIR}/lib/libharfbuzz.a
${PROJECT_SOURCE_DIR}/lib/libopenjp2.a
freetype
archive
xml2
${PROJECT_SOURCE_DIR}/lib/libopc/libmce.a
${PROJECT_SOURCE_DIR}/lib/libopc/libopc.a
${PROJECT_SOURCE_DIR}/lib/libopc/libplib.a
${PROJECT_SOURCE_DIR}/lib/libtesseract.a
${PROJECT_SOURCE_DIR}/lib/liblept.a
@@ -136,6 +163,8 @@ TARGET_LINK_LIBRARIES(
${PROJECT_SOURCE_DIR}/lib/libcrypto.a
${PROJECT_SOURCE_DIR}/lib/libssl.a
dl
pcre
)
add_custom_target(

View File

@@ -3,7 +3,7 @@ MAINTAINER simon987 <me@simon987.net>
RUN apt update
RUN apt install -y libglib2.0-0 libcurl4 libmagic1 libharfbuzz-bin libopenjp2-7 libarchive13 liblzma5 libzstd1 liblz4-1 \
curl libtiff5 libpng16-16
curl libtiff5 libpng16-16 libpcre3
RUN mkdir -p /usr/share/tessdata && \
cd /usr/share/tessdata/ && \

View File

@@ -8,9 +8,12 @@ sist2 (Simple incremental search tool)
*Warning: sist2 is in early development*
![sist2.png](sist2.png)
## Features
* Fast, low memory usage, multi-threaded
* Mobile-friendly Web interface
* Portable (all its features are packaged in a single executable)
* Extracts text from common file types \*
* Generates thumbnails \*
@@ -26,80 +29,53 @@ sist2 (Simple incremental search tool)
## Getting Started
1. Have an [Elasticsearch](https://www.elastic.co/downloads/elasticsearch) instance running
1.
1. Have an Elasticsearch (>= 6.X.X) instance running
1. Download [from official website](https://www.elastic.co/downloads/elasticsearch)
1. *(or)* Run using docker:
```bash
docker run -d --name es1 --net sist2_net -p 9200:9200 \
-e "discovery.type=single-node" elasticsearch:7.5.2
```
1. *(or)* Run using docker-compose:
```yaml
elasticsearch:
image: docker.elastic.co/elasticsearch/elasticsearch:7.5.2
environment:
- discovery.type=single-node
- "ES_JAVA_OPTS=-Xms1G -Xmx2G"
```
1. Download sist2 executable
1. Download the [latest sist2 release](https://github.com/simon987/sist2/releases) *
1. *(or)* Download a [development snapshot](https://files.simon987.net/artifacts/Sist2/Build/) *(Not recommended!)*
1. *(or)* `docker pull simon987/sist2:latest`
1. See [Usage guide](USAGE.md)
\* *Windows users*: **sist2** runs under [WSL](https://en.wikipedia.org/wiki/Windows_Subsystem_for_Linux)
\* *Mac users*: See [#1](https://github.com/simon987/sist2/issues/1)
## Example usage
See [Usage guide](USAGE.md) for more details
![demo](demo.gif)
See help page `sist2 --help` for more details.
**Scan a directory**
```bash
sist2 scan ~/Documents -o ./orig_idx/
sist2 scan --threads 4 --content-size 16384 /mnt/Pictures
sist2 scan --incremental ./orig_idx/ -o ./updated_idx/ ~/Documents
```
**Push index to Elasticsearch or file**
```bash
sist2 index --force-reset ./my_idx
sist2 index --print ./my_idx > raw_documents.ndjson
```
**Start web interface**
```bash
sist2 web --bind 0.0.0.0 --port 4321 ./my_idx1 ./my_idx2 ./my_idx3
```
### Use sist2 with docker
**scan**
```bash
docker run -it \
-v /path/to/files/:/files \
-v $PWD/out/:/out \
simon987/sist2 scan -t 4 /files -o /out/my_idx1
```
**index**
```bash
docker run -it --network host\
-v $PWD/out/:/out \
simon987/sist2 index /out/my_idx1
```
**web**
```bash
docker run --rm --network host -d --name sist2\
-v $PWD/out/my_idx:/idx \
-v $PWD/my/files:/files
simon987/sist2 web --bind 0.0.0.0 /idx
docker stop sist2
```
1. Scan a directory: `sist2 scan ~/Documents -o ./docs_idx`
1. Push index to Elasticsearch: `sist2 index ./docs_idx`
1. Start web interface: `sist2 web ./docs_idx`
## Format support
File type | Library | Content | Thumbnail | Metadata
:---|:---|:---|:---|:---
pdf,xps,cbz,fb2,epub | MuPDF | text+ocr | yes, `png` | title |
pdf,xps,cbz,cbr,fb2,epub | MuPDF | text+ocr | yes, `png` | title |
`audio/*` | ffmpeg | - | yes, `jpeg` | ID3 tags |
`video/*` | ffmpeg | - | yes, `jpeg` | title, comment, artist |
`image/*` | ffmpeg | - | yes, `jpeg` | `EXIF:Artist`, `EXIF:ImageDescription` |
`image/*` | ffmpeg | - | yes, `jpeg` | [Common EXIF tags](https://github.com/simon987/sist2/blob/efdde2734eca9b14a54f84568863b7ffd59bdba3/src/parsing/media.c#L190) |
ttf,ttc,cff,woff,fnt,otf | Freetype2 | - | yes, `bmp` | Name & style |
`text/plain` | *(none)* | yes | no | - |
tar, zip, rar, 7z, ar ... | Libarchive | yes\* | - | no |
docx, xlsx, pptx | libOPC | yes | no | no |
docx, xlsx, pptx | *(none)* | yes | no | no |
\* *See [Archive files](#archive-files)*
@@ -120,11 +96,11 @@ To check if a media file can be parsed without *seek*, execute `cat file.mp4 | f
### OCR
You can enable OCR support for pdf,xps,cbz,fb2,epub file types with the
You can enable OCR support for pdf,xps,cbz,cbr,fb2,epub file types with the
`--ocr <lang>` option. Download the language data files with your
package manager (`apt install tesseract-ocr-eng`) or directly [from Github](https://github.com/tesseract-ocr/tesseract/wiki/Data-Files).
The `simon987/sist2` github image comes with common languages
The `simon987/sist2` image comes with common languages
(hin, jpn, eng, fra, rus, spa) pre-installed.
Examples
@@ -145,8 +121,9 @@ binaries.
```bash
apt install git cmake pkg-config libglib2.0-dev \
libssl-dev uuid-dev python3 libmagic-dev libfreetype6-dev \
libcurl-dev libbz2-dev yasm libharfbuzz-dev ragel \
libarchive-dev libtiff5 libpng16-16 libpango1.0-dev
libcurl4-openssl-dev libbz2-dev yasm libharfbuzz-dev ragel \
libarchive-dev libtiff5 libpng16-16 libpango1.0-dev \
libxml2-dev libopenjp2-7-dev libleptonica-dev
```
2. Build

275
USAGE.md Normal file
View File

@@ -0,0 +1,275 @@
# Usage
*More examples (specifically with docker/compose) are in progress*
* [scan](#scan)
* [options](#scan-options)
* [examples](#scan-examples)
* [index format](#index-format)
* [index](#index)
* [options](#index-options)
* [examples](#index-examples)
* [web](#web)
* [options](#web-options)
* [examples](#web-examples)
* [rewrite_url](#rewrite_url)
* [link to specific indices](#link-to-specific-indices)
```
Usage: sist2 scan [OPTION]... PATH
or: sist2 index [OPTION]... INDEX
or: sist2 web [OPTION]... INDEX...
Lightning-fast file system indexer and search tool.
-h, --help show this help message and exit
-v, --version Show version and exit
--verbose Turn on logging
--very-verbose Turn on debug messages
Scan options
-t, --threads=<int> Number of threads. DEFAULT=1
-q, --quality=<flt> Thumbnail quality, on a scale of 1.0 to 31.0, 1.0 being the best. DEFAULT=5
--size=<int> Thumbnail size, in pixels. Use negative value to disable. DEFAULT=500
--content-size=<int> Number of bytes to be extracted from text documents. Use negative value to disable. DEFAULT=32768
--incremental=<str> Reuse an existing index and only scan modified files.
-o, --output=<str> Output directory. DEFAULT=index.sist2/
--rewrite-url=<str> Serve files from this url instead of from disk.
--name=<str> Index display name. DEFAULT: (name of the directory)
--depth=<int> Scan up to DEPTH subdirectories deep. Use 0 to only scan files in PATH. DEFAULT: -1
--archive=<str> Archive file mode (skip|list|shallow|recurse). skip: Don't parse, list: only get file names as text, shallow: Don't parse archives inside archives. DEFAULT: recurse
--ocr=<str> Tesseract language (use tesseract --list-langs to see which are installed on your machine)
-e, --exclude=<str> Files that match this regex will not be scanned
--fast Only index file names & mime type
Index options
--es-url=<str> Elasticsearch url with port. DEFAULT=http://localhost:9200
-p, --print Just print JSON documents to stdout.
--script-file=<str> Path to user script.
--batch-size=<int> Index batch size. DEFAULT: 100
-f, --force-reset Reset Elasticsearch mappings and settings. (You must use this option the first time you use the index command)
Web options
--es-url=<str> Elasticsearch url. DEFAULT=http://localhost:9200
--bind=<str> Listen on this address. DEFAULT=localhost
--port=<str> Listen on this port. DEFAULT=4090
--auth=<str> Basic auth in user:password format
Made by simon987 <me@simon987.net>. Released under GPL-3.0
```
## Scan
### Scan options
* `-t, --threads`
Number of threads for file parsing. **Do not set a number higher than `$(nproc)`!**.
* `-q, --quality`
Thumbnail quality, on a scale of 1.0 to 31.0, 1.0 being the best. *Does not affect PDF thumbnails quality*
* `--size`
Thumbnail size in pixels.
* `--content-size`
Number of bytes of text to be extracted from the content of files (plain text and PDFs).
Repeated whitespace and special characters do not count toward this limit.
* `--incremental`
Specify an existing index. Information about files in this index that were not modified (based on *mtime* attribute)
will be copied to the new index and will not be parsed again.
* `-o, --output` Output directory.
* `--rewrite-url` Set the `rewrite_url` option for the web module (See [rewrite_url](#rewrite_url))
* `--name` Set the `name` option for the web module
* `--depth` Maximum scan dept. Set to 0 only scan files directly in the root directory, set to -1 for infinite depth
* `--archive` Archive file mode.
* skip: Don't parse
* list: Only get file names as text
* shallow: Don't parse archives inside archives.
* recurse: Scan archives recursively (default)
* `--ocr` See [OCR](README.md#OCR)
* `-e, --exclude` Regex pattern to exclude files. A file is excluded if the pattern matches any
part of the full absolute path.
Examples:
* `-e ".*\.ttf"`: Ignore ttf files
* `-e ".*\.(ttf|rar)"`: Ignore ttf and rar files
* `-e "^/mnt/backups/"`: Ignore all files in the `/mnt/backups/` directory
* `-e "^/mnt/Data[12]/"`: Ignore all files in the `/mnt/Data1/` and `/mnt/Data2/` directory
* `-e "(^/usr/)|(^/var/)|(^/media/DRIVE-A/tmp/)|(^/media/DRIVE-B/Trash/)"` Exclude the
`/usr`, `/var`, `/media/DRIVE-A/tmp`, `/media/DRIVE-B/Trash` directories
* `--fast` Only index file names and mime type
### Scan examples
Simple scan
```bash
sist2 scan ~/Documents
sist2 scan \
--threads 4 --content-size 16000000 --quality 1.0 --archive shallow \
--name "My Documents" --rewrite-url "http://nas.domain.local/My Documents/" \
~/Documents -o ./documents.idx/
```
Incremental scan
```
sist2 scan --incremental ./orig_idx/ -o ./updated_idx/ ~/Documents
```
### Index format
A typical `binary` type index structure looks like this:
```
documents.idx/
├── descriptor.json
├── _index_139965416830720
├── _index_139965425223424
├── _index_139965433616128
├── _index_139965442008832
└── thumbs
├── data.mdb
└── lock.mdb
```
The `_index_*` files contain the raw binary index data and are not meant to be
read by other applications. The format is generally compatible across different
sist2 versions.
The `thumbs/` folder is a [LMDB](https://en.wikipedia.org/wiki/Lightning_Memory-Mapped_Database)
database containing the thumbnails.
The `descriptor.json` file contains general information about the index. The
following fields are safe to modify manually: `root`, `name`, [rewrite_url](#rewrite_url) and `timestamp`.
*Advanced usage*
Instead of using the `scan` module, you can also import an index generated
by a third party application. The 'external' index must have the following format:
```
my_index/
├── descriptor.json
├── _index_0
└── thumbs
├── data.mdb
└── lock.mdb
```
*descriptor.json*:
```json
{
"uuid": "<valid UUID4>",
"version": "_external_v1",
"root": "(optional)",
"name": "<name>",
"rewrite_url": "(optional)",
"type": "json",
"timestamp": 1578971024
}
```
*_index_0*: NDJSON format (One json object per line)
```json
{
"_id": "unique uuid for the file",
"index": "index uuid4 (same one as descriptor.json!)",
"mime": "application/x-cbz",
"size": 14341204,
"mtime": 1578882996,
"extension": "cbz",
"name": "my_book",
"path": "path/to/books",
"content": "text contents of the book",
"title": "Title of the book",
"tag": ["genre.fiction", "author.someguy", "etc..."],
"_keyword": [
{"k": "ISBN", "v": "ABCD34789231"}
],
"_text": [
{"k": "other", "v": "This will be indexed as text"}
]
}
```
You can find the full list of supported fields [here](src/io/serialize.c#L90)
The `_keyword.*` items will be indexed and searchable as **keyword** fields (only full matches allowed).
The `_text.*` items will be indexed and searchable as **text** fields (fuzzy searching allowed)
*thumbs/*:
LMDB key-value store. Keys are **binary** 128-bit UUID4s (`_id` field)
and values are raw image bytes.
Importing an external `binary` type index is technically possible but
it is currently unsupported and has no guaranties of back/forward compatibility.
## Index
### Index options
* `--es-url`
Elasticsearch url and port. If you are using docker, make sure that both containers are on the
same network.
* `-p, --print`
Print index in JSON format to stdout.
* `--script-file`
Path to user script. See [Scripting](scripting/README.md).
* `--batch-size=<int>`
Index batch size. Indexing is generally faster with larger batches, but payloads that
are too large will fail and additional overhead for retrying with smaller sizes may slow
down the process.
* `-f, --force-reset`
Reset Elasticsearch mappings and settings.
**(You must use this option the first time you use the index command)**.
### Index examples
**Push to elasticsearch**
```bash
sist2 index --force-reset --batch-size 1000 --es-url http://localhost:9200 ./my_index/
sist2 index ./my_index/
```
**Save index in JSON format**
```bash
sist2 index --print ./my_index/ > my_index.ndjson
```
**Inspect contents of an index**
```bash
sist2 index --print ./my_index/ | jq | less
```
## Web
### Web options
* `--es-url=<str>` Elasticsearch url.
* `--bind=<str>` Listen on this address.
* `--port=<str>` Listen on this port.
* `--auth=<str>` Basic auth in user:password format
### Web examples
**Single index**
```bash
sist2 web --auth admin:hunter2 --bind 0.0.0.0 --port 8888 my_index
```
**Multiple indices**
```bash
# Indices will be displayed in this order in the web interface
sist2 web index1 index2 index3 index4
```
### rewrite_url
When the `rewrite_url` field is not empty, the web module ignores the `root`
field and will return a HTTP redirect to `<rewrite_url><path>/<name><extension>`
instead of serving the file from disk.
Both the `root` and `rewrite_url` fields are safe to manually modify from the
`descriptor.json` file.
### Link to specific indices
To link to specific indices, you can add a list of comma-separated index name to
the URL: `?i=<name>,<name>`. By default, indices with `"(nsfw)"` in their name are
not displayed.

2
cJSON

Submodule cJSON updated: 2d4ad84192...e8077d0150

View File

@@ -2,6 +2,11 @@
./scripts/get_static_libs.sh
cmake .
rm -rf CMakeFiles CmakeCache.txt
cmake -DSIST_DEBUG=off .
make
strip sist2
rm -rf CMakeFiles CmakeCache.txt
cmake -DSIST_DEBUG=on .
make

BIN
demo.gif

Binary file not shown.

Before

Width:  |  Height:  |  Size: 18 MiB

View File

@@ -1,53 +0,0 @@
/*
Copyright (c) 2010, Florian Reuter
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
* Neither the name of Florian Reuter nor the names of its contributors
may be used to endorse or promote products derived from this
software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/**@file config/mce/config.h
*/
#ifndef MCE_CONFIG_H
#define MCE_CONFIG_H
#include <libxml/xmlstring.h>
#include <stdio.h>
#include <plib/plib.h>
#include <assert.h>
#ifdef __cplusplus
extern "C" {
#endif
#define MCE_NAMESPACE_SUBSUMPTION_ENABLED 0
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* MCE_CONFIG_H */

View File

@@ -1,189 +0,0 @@
/*
Copyright (c) 2010, Florian Reuter
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
* Neither the name of Florian Reuter nor the names of its contributors
may be used to endorse or promote products derived from this
software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/** @file mce/helper.h
Helper functions needed by mce/textreader.h and mce/textwriter.h to implement MCE:
- mceQNameLevelAdd(), mceQNameLevelLookup() and mceQNameLevelCleanup() maintain a set of mceQNameLevel_t tuples.
- mceQNameLevelPush() and mceQNameLevelPopIfMatch() maintain a stack of mceQNameLevel_t tuples.
- mceCtxInit(), mceCtxCleanup() and mceCtxUnderstandsNamespace() manage a context which holds all information needed to do MCE proprocessing.
*/
#include <mce/config.h>
#ifndef MCE_HELPER_H
#define MCE_HELPER_H
#ifdef __cplusplus
extern "C" {
#endif
/**
Tiple (ns, ln, level).
*/
typedef struct MCE_QNAME_LEVEL {
xmlChar *ns;
xmlChar *ln;
puint32_t level;
puint32_t flag; // used by mceTextWriter
} mceQNameLevel_t;
/**
*/
typedef enum MCE_SKIP_STATE_ENUM {
MCE_SKIP_STATE_IGNORE,
MCE_SKIP_STATE_ALTERNATE_CONTENT,
MCE_SKIP_STATE_CHOICE_MATCHED
} mceSkipState_t;
/**
Represents an intervall of levels which are "skipped" i.e. ignored.
*/
typedef struct MCE_SKIP_ITEM {
puint32_t level_start;
puint32_t level_end;
mceSkipState_t state;
} mceSkipItem_t;
/**
Either represents a set of (ns, ln, level) triples.
*/
typedef struct MCE_QNAME_LEVEL_SET {
mceQNameLevel_t *list_array;
puint32_t list_items;
puint32_t max_level;
} mceQNameLevelSet_t;
/**
The skip stack.
*/
typedef struct MCE_SKIP_STACK {
mceSkipItem_t *stack_array;
puint32_t stack_items;
} mceSkipStack_t;
typedef enum MCE_ERROR_ENUM {
MCE_ERROR_NONE,
MCE_ERROR_XML,
MCE_ERROR_MUST_UNDERSTAND,
MCE_ERROR_VALIDATION,
MCE_ERROR_MEMORY
} mceError_t;
/**
Holds all information to do MCE preprocessing.
*/
typedef struct MCE_CONTEXT {
mceQNameLevelSet_t ignorable_set;
mceQNameLevelSet_t understands_set;
mceQNameLevelSet_t processcontent_set;
mceQNameLevelSet_t suspended_set;
#if (MCE_NAMESPACE_SUBSUMPTION_ENABLED)
mceQNameLevelSet_t subsume_namespace_set;
mceQNameLevelSet_t subsume_exclude_set;
mceQNameLevelSet_t subsume_prefix_set;
#endif
mceSkipStack_t skip_stack;
mceError_t error;
pbool_t mce_disabled;
puint32_t suspended_level;
} mceCtx_t;
/**
Add a new tiple (ns, ln, level) to the triple set \c qname_level_set.
The \c ns_sub string is optional and will not be touched.
*/
pbool_t mceQNameLevelAdd(mceQNameLevelSet_t *qname_level_set, const xmlChar *ns, const xmlChar *ln, puint32_t level);
/**
Lookup a tiple (ns, ln, level) via \c ns and \c ln. If \c ignore_ln is PTRUE then the first tiple matching \c ns will be returned.
*/
mceQNameLevel_t* mceQNameLevelLookup(mceQNameLevelSet_t *qname_level_set, const xmlChar *ns, const xmlChar *ln, pbool_t ignore_ln);
/**
Remove all triples (ns, ln, level) where the level greater or equal to \c level.
*/
pbool_t mceQNameLevelCleanup(mceQNameLevelSet_t *qname_level_set, puint32_t level);
/**
Push a new skip intervall (level_start, level_end, state) on the stack \c skip_stack.
*/
pbool_t mceSkipStackPush(mceSkipStack_t *skip_stack, puint32_t level_start, puint32_t level_end, mceSkipState_t state);
/**
Pop the intervall (ns, ln, level) from the stack \c qname_level_array.
*/
void mceSkipStackPop(mceSkipStack_t *skip_stack);
/**
Returns top item or NULL.
*/
mceSkipItem_t *mceSkipStackTop(mceSkipStack_t *skip_stack);
/**
Returns TRUE, if the \c level is in the top skip intervall.
*/
pbool_t mceSkipStackSkip(mceSkipStack_t *skip_stack, puint32_t level);
/**
Initialize the mceCtx_t \c ctx.
*/
pbool_t mceCtxInit(mceCtx_t *ctx);
/**
Cleanup, i.e. release all resourced from the mceCtx_t \c ctx.
*/
pbool_t mceCtxCleanup(mceCtx_t *ctx);
/**
Register the namespace \ns in \c ctx.
*/
pbool_t mceCtxUnderstandsNamespace(mceCtx_t *ctx, const xmlChar *ns);
/**
Register the namespace \ns in \c ctx.
*/
pbool_t mceCtxSuspendProcessing(mceCtx_t *ctx, const xmlChar *ns, const xmlChar *ln);
#if (MCE_NAMESPACE_SUBSUMPTION_ENABLED)
/**
Subsume namespace \c ns_new with \c ns_old.
*/
pbool_t mceCtxSubsumeNamespace(mceCtx_t *ctx, const xmlChar *prefix_new, const xmlChar *ns_new, const xmlChar *ns_old);
#endif
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* MCE_HELPER_H */

View File

@@ -1,464 +0,0 @@
/*
Copyright (c) 2010, Florian Reuter
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
* Neither the name of Florian Reuter nor the names of its contributors
may be used to endorse or promote products derived from this
software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/** @file mce/textreader.h
*/
#ifndef MCE_TEXTREADER_H
#define MCE_TEXTREADER_H
#ifdef __cplusplus
extern "C" {
#endif
/**
A handle to an MCE-aware libxml2 xmlTextReader.
*/
typedef struct MCE_TEXTREADER mceTextReader_t;
#ifdef __cplusplus
} /* extern "C" */
#endif
#include <mce/config.h>
#include <opc/opc.h>
#include <mce/helper.h>
#include <libxml/xmlwriter.h>
#ifdef __cplusplus
extern "C" {
#endif
struct MCE_TEXTREADER {
xmlTextReaderPtr reader;
mceCtx_t mceCtx;
};
/**
Wrapper around an libxml2 xmlTextReaderRead function.
\see http://xmlsoft.org/html/libxml-xmlreader.html#xmlTextReaderRead
*/
int mceTextReaderRead(mceTextReader_t *mceTextReader);
/**
Wrapper around a libxml2 xmlTextReaderNext function.
\see http://xmlsoft.org/html/libxml-xmlreader.html#xmlTextReaderNext
*/
int mceTextReaderNext(mceTextReader_t *mceTextReader);
/**
Creates an mceTextReader from an XmlTextReader.
\code
mceTextReader reader;
mceTextReaderInit(&reader, xmlNewTextReaderFilename("sample.xml"));
// reader is ready to use.
mceTextReaderCleanup(&reader);
\endcode
\see http://xmlsoft.org/html/libxml-xmlreader.html#xmlNewTextReaderFilename
*/
int mceTextReaderInit(mceTextReader_t *mceTextReader, xmlTextReaderPtr reader);
/**
Cleanup MCE reader, i.e. free all resources. Also calls xmlTextReaderClose and xmlFreeTextReader.
\see http://xmlsoft.org/html/libxml-xmlreader.html#xmlTextReaderClose
\see http://xmlsoft.org/html/libxml-xmlreader.html#xmlFreeTextReader
*/
int mceTextReaderCleanup(mceTextReader_t *mceTextReader);
/**
Reads all events \c mceTextReader and pipes them to \writer.
\code
mceTextReader reader;
mceTextReaderInit(&reader, xmlNewTextReaderFilename("sample.xml"));
mceTextReaderUnderstandsNamespace(&reader, _X("http://myextension"));
xmlTextWriterPtr writer=xmlNewTextWriterFilename("out.xml", 0);
mceTextReaderDump(&reader, writer, P_FALSE);
xmlFreeTextWriter(writer);
mceTextReaderCleanup(&reader);
\endcode
*/
int mceTextReaderDump(mceTextReader_t *mceTextReader, xmlTextWriter *writer, pbool_t fragment);
/**
Registers an MCE namespace.
\see mceTextReaderDump()
*/
int mceTextReaderUnderstandsNamespace(mceTextReader_t *mceTextReader, const xmlChar *ns);
/**
Disable MCE processing.
\return Returns old value.
*/
pbool_t mceTextReaderDisableMCE(mceTextReader_t *mceTextReader, pbool_t flag);
/**
Signal an error to the MCE processor.
*/
void mceRaiseError(xmlTextReader *reader, mceCtx_t *ctx, mceError_t error, const xmlChar *str, ...);
/**
Internal function which does the MCE postprocessing. E.g. mceTextReaderRead() is implemented as
\code
mceTextReaderPostprocess(mceTextReader->reader, &mceTextReader->mceCtx, xmlTextReaderRead(mceTextReader->reader))
\endcode
This function is exposed to make existing libxm2 xmlTextReader MCE aware.
*/
int mceTextReaderPostprocess(xmlTextReader *reader, mceCtx_t *ctx, int ret);
/**
Get the error code.
*/
mceError_t mceTextReaderGetError(mceTextReader_t *mceTextReader);
/**
Helper macro to declare a start/end document block in a declarative way:
\code
mce_start_document(reader) {
} mce_end_document(reader);
\endcode
\hideinitializer
*/
#define mce_start_document(_reader_) \
if (NULL!=(_reader_)) { \
mceTextReaderRead(_reader_); \
if (0)
/**
\see mce_start_document.
\hideinitializer
*/
#define mce_end_document(_reader_) \
} /* if (NULL!=reader) */ \
/**
Container for mce_start_element and mce_start_attribute declarations.
\see mce_match_element
\see mce_match_attribute
\hideinitializer
*/
#define mce_start_choice(_reader_) \
if (0)
/**
\see mce_start_choice
\hideinitializer
*/
#define mce_end_choice(_reader_)
/**
Skips the attributes.
\see mce_match_element.
\hideinitializer
*/
#define mce_skip_attributes(_reader_) \
mce_start_attributes(_reader_) { \
} mce_end_attributes(_reader_);
/**
Skips the attributes.
\see mce_match_attribute.
\hideinitializer
*/
#define mce_skip_children(_reader_) \
mce_start_children(_reader_) { \
} mce_end_children(_reader_);
/**
\see mce_start_element.
\hideinitializer
*/
#define mce_start_children(_reader_) \
if (!xmlTextReaderIsEmptyElement((_reader_)->reader)) { \
mceTextReaderRead(_reader_); do { \
if (0)
/**
\see mce_start_element.
\hideinitializer
*/
#define mce_end_children(_reader_) \
else { \
if (XML_READER_TYPE_END_ELEMENT!=xmlTextReaderNodeType((_reader_)->reader)) { \
mceTextReaderNext(_reader_); /*skip unhandled element */ \
} \
} \
} while(XML_READER_TYPE_END_ELEMENT!=xmlTextReaderNodeType((_reader_)->reader) && \
XML_READER_TYPE_NONE!=xmlTextReaderNodeType((_reader_)->reader)); \
} /* if (!xmlTextReaderIsEmptyElement(reader->reader)) */
/**
Helper macro to match an element. Usefull for calling code in a seperate function:
\code
void handleElement(reader) {
mce_start_choice(reader) {
mce_start_element(reader, _X("ns"), _X("element")) {
} mce_end_element(reader)
} mce_end_choice(reader);
}
void parse(reader) {
mce_start_document(reader) {
mce_start_element(reader, _X("ns"), _X("ln")) {
mce_skip_attributes(reader);
mce_start_children(reader) {
mce_match_element(reader, _X("ns"), _X("element")) {
handleElement(reader);
}
} mce_end_children(reader);
} mce_end_element();
} mce_end_document(reader);
}
\endcode
\hideinitializer
*/
#define mce_match_element(_reader_, ns, ln) \
} else if (XML_READER_TYPE_ELEMENT==xmlTextReaderNodeType((_reader_)->reader) \
&& (NULL==ns || 0==xmlStrcmp(ns, xmlTextReaderConstNamespaceUri((_reader_)->reader))) \
&& (NULL==ln || 0==xmlStrcmp(ln, xmlTextReaderConstLocalName((_reader_)->reader)))) {
/**
Helper macro to declare a element block in a declarative way:
\code
mce_start_element(reader) {
mce_start_attributes(reader) {
mce_start_attribute(reader, _X("ns"), _X("lnA")) {
// code for handling lnA.
} mce_end_attribute(reader);
mce_start_attribute(reader, _X("ns"), _X("lnB")) {
// code for handling lnB.
} mce_end_attribute(reader);
} mce_end_attributes(reader);
mce_start_children(reader) {
mce_start_element(reader, _X("ns"), _X("lnA")) {
// code for handling lnA.
} mce_end_element(reader);
mce_start_element(reader, _X("ns"), _X("lnB")) {
// code for handling lnB.
} mce_end_element(reader);
mce_start_text(reader) {
// code for handling text.
} mce_end_text(reader);
} mce_end_children(reader);
} mce_end_element(reader);
\endcode
\hideinitializer
*/
#define mce_start_element(_reader_, ns, ln) \
mce_match_element(_reader_, ns, ln)
/**
\see mce_start_element.
\hideinitializer
*/
#define mce_end_element(_reader_) \
mceTextReaderNext(_reader_)
/**
Matches #TEXT without consuming it.
\hideinitializer
*/
#define mce_match_text(_reader_) \
} else if (XML_READER_TYPE_TEXT==xmlTextReaderNodeType((_reader_)->reader) \
|| XML_READER_TYPE_SIGNIFICANT_WHITESPACE==xmlTextReaderNodeType((_reader_)->reader)) {
/**
\see mce_start_element.
\hideinitializer
*/
#define mce_start_text(_reader_) \
mce_match_text(_reader_)
/**
\see mce_start_element.
\hideinitializer
*/
#define mce_end_text(_reader_) \
mceTextReaderNext(_reader_)
/**
\see mce_start_element.
\hideinitializer
*/
#define mce_start_attributes(_reader_) \
if (1==xmlTextReaderMoveToFirstAttribute((_reader_)->reader)) { \
do { \
if (0)
/**
\see mce_start_element.
\hideinitializer
*/
#define mce_end_attributes(_reader_) \
else { /* skipped attribute */ } \
} while(1==xmlTextReaderMoveToNextAttribute((_reader_)->reader)); \
xmlTextReaderMoveToElement((_reader_)->reader); }
/**
Helper macro to match an attribute. Usefull for calling code in a seperate function:
\code
void handleA(reader) {
mce_start_choice(reader) {
mce_start_attribute(reader, _X("ns"), _X("attr")) {
} mce_end_attribute(reader);
} mce_end_choice(reader);
}
void parse(reader) {
mce_start_document(reader) {
mce_start_element(reader, _X("ns"), _X("ln")) {
mce_start_attributes(reader) {
mce_match_attribute(reader, _X("ns"), _X("attr")) {
handleA(reader);
}
} mce_end_attributes(reader);
mce_skip_children(reader);
} mce_end_element();
} mce_end_document(reader);
}
\endcode
\hideinitializer
*/
#define mce_match_attribute(_reader_, ns, ln) \
} else if ((NULL==ns || 0==xmlStrcmp(ns, xmlTextReaderConstNamespaceUri((_reader_)->reader))) \
&& (NULL==ln || 0==xmlStrcmp(ln, xmlTextReaderConstLocalName((_reader_)->reader)))) {
/**
\see mce_start_element.
\hideinitializer
*/
#define mce_start_attribute(_reader_, ns, ln) \
mce_match_attribute(_reader_, ns, ln)
/**
\see mce_start_element.
\hideinitializer
*/
#define mce_end_attribute(_reader_)
/**
Error handling for MCE parsers.
\code
mce_start_element(&reader, NULL, _X("Default")) {
const xmlChar *ext=NULL;
const xmlChar *type=NULL;
mce_start_attributes(&reader) {
mce_start_attribute(&reader, NULL, _X("Extension")) {
ext=xmlTextReaderConstValue(reader.reader);
} mce_end_attribute(&reader);
mce_start_attribute(&reader, NULL, _X("ContentType")) {
type=xmlTextReaderConstValue(reader.reader);
} mce_end_attribute(&reader);
} mce_end_attributes(&reader);
mce_error_guard_start(&reader) {
mce_error(&reader, NULL==ext || ext[0]==0, MCE_ERROR_VALIDATION, "Missing @Extension attribute!");
mce_error(&reader, NULL==type || type[0]==0, MCE_ERROR_VALIDATION, "Missing @ContentType attribute!");
opcContainerType *ct=insertType(c, type, OPC_TRUE);
mce_error(&reader, NULL==ct, MCE_ERROR_MEMORY, NULL);
opcContainerExtension *ce=opcContainerInsertExtension(c, ext, OPC_TRUE);
mce_error(&reader, NULL==ce, MCE_ERROR_MEMORY, NULL);
mce_errorf(&reader, NULL!=ce->type && 0!=xmlStrcmp(ce->type, type), MCE_ERROR_VALIDATION, "Extension \"%s\" is mapped to type \"%s\" as well as \"%s\"", ext, type, ce->type);
ce->type=ct->type;
} mce_error_guard_end(&reader);
mce_skip_children(&reader);
} mce_end_element(&reader);
\endcode
\hideinitializer
*/
#define mce_error_guard_start(_reader_) if (MCE_ERROR_NONE==(_reader_)->mceCtx.error) do {
/**
\see mce_error_guard_start
\hideinitializer
*/
#define mce_error_guard_end(_reader_) } while(0)
/**
Signal an error if guard if false.
\hideinitializer
*/
#define mce_error(_reader_, guard, err, msg) if (guard) { (_reader_)->mceCtx.error=(err); fprintf(stderr, (NULL!=msg?msg:#err)); continue; }
/**
Signal an error if guard if false.
\hideinitializer
*/
#if defined(__GNUC__)
#define mce_errorf(_reader_, guard, err, msg, ...) if (guard) { mceRaiseError((_reader_)->reader, &(_reader_)->mceCtx, err, _X((NULL!=msg?msg:#err)), ##__VA_ARGS__ ); continue; }
#else
#define mce_errorf(_reader_, guard, err, msg, ...) if (guard) { mceRaiseError((_reader_)->reader, &(_reader_)->mceCtx, err, _X((NULL!=msg?msg:#err)), __VA_ARGS__ ); continue; }
#endif
/**
Only issues the error when in "strict mode".
\hideinitializer
*/
#define mce_error_strict mce_error
/**
\see mce_error_strict
\hideinitializer
*/
#define mce_error_strictf mce_errorf
/**
Marker for a MCE defintion.
\hideinitializer
*/
#define mce_def
/**
Marker for a MCE reference.
\hideinitializer
*/
#define mce_ref(r) (r)
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* MCE_TEXTREADER_H */

View File

@@ -1,176 +0,0 @@
/*
Copyright (c) 2010, Florian Reuter
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
* Neither the name of Florian Reuter nor the names of its contributors
may be used to endorse or promote products derived from this
software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/** @file mce/textwriter.h
*/
#include <mce/config.h>
#include <libxml/xmlwriter.h>
#include <mce/helper.h>
#ifndef MCE_TEXTWRITER_H
#define MCE_TEXTWRITER_H
#ifdef __cplusplus
extern "C" {
#endif
/**
Default flags for an MCE namespace declaration.
*/
#define MCE_DEFAULT 0x0
/**
Flags MCE namespace declaration "ignorable".
*/
#define MCE_IGNORABLE 0x1
/**
Flags MCE namespace declaration "must understand".
*/
#define MCE_MUSTUNDERSTAND 0x2
/**
The MCE text writer context.
*/
typedef struct MCE_TEXTWRITER_STRUCT mceTextWriter;
/**
Create a new MCE text writer.
\see http://xmlsoft.org/html/libxml-xmlIO.html#xmlOutputBufferCreateIO
\see http://xmlsoft.org/html/libxml-xmlwriter.html#xmlNewTextWriter
*/
mceTextWriter *mceTextWriterCreateIO(xmlOutputWriteCallback iowrite, xmlOutputCloseCallback ioclose, void *ioctx, xmlCharEncodingHandlerPtr encoder);
/**
Helper which create a new MCE text writer for a FILE handle.
*/
mceTextWriter *mceNewTextWriterFile(FILE *file);
/**
Free all resources for \w.
*/
int mceTextWriterFree(mceTextWriter *w);
/**
\see http://xmlsoft.org/html/libxml-xmlwriter.html#xmlTextWriterStartDocument
*/
int mceTextWriterStartDocument(mceTextWriter *w);
/**
\see http://xmlsoft.org/html/libxml-xmlwriter.html#xmlTextWriterEndDocument
*/
int mceTextWriterEndDocument(mceTextWriter *w);
/**
Start a new XML element. If ns==NULL then there is no namespace and ""==ns means the default namespace.
\see http://xmlsoft.org/html/libxml-xmlwriter.html#xmlTextWriterStartElement
\see http://xmlsoft.org/html/libxml-xmlwriter.html#xmlTextWriterStartElementNS
*/
int mceTextWriterStartElement(mceTextWriter *w, const xmlChar *ns, const xmlChar *ln);
/**
\see http://xmlsoft.org/html/libxml-xmlwriter.html#xmlTextWriterEndElement
*/
int mceTextWriterEndElement(mceTextWriter *w, const xmlChar *ns, const xmlChar *ln);
/**
\see http://xmlsoft.org/html/libxml-xmlwriter.html#xmlTextWriterWriteString
*/
int mceTextWriterWriteString(mceTextWriter *w, const xmlChar *content);
/**
Register a namespace. Must be called before mceTextWriterStartElement.
\see MCE_DEFAULT
\see MCE_IGNORABLE
\see MCE_MUSTUNDERSTAND
*/
const xmlChar *mceTextWriterRegisterNamespace(mceTextWriter *w, const xmlChar *ns, const xmlChar *prefix, int flags);
/**
Register qname (ns, ln) as a "process content" element wrt. MCE. Must be called before mceTextWriterStartElement.
*/
int mceTextWriterProcessContent(mceTextWriter *w, const xmlChar *ns, const xmlChar *ln);
/**
Writes a formatted attribute.
\see http://xmlsoft.org/html/libxml-xmlwriter.html#xmlTextWriterWriteFormatAttribute
*/
int mceTextWriterAttributeF(mceTextWriter *w, const xmlChar *ns, const xmlChar *ln, const char *value, ...);
/**
Starts an MCE alternate content section.
*/
int mceTextWriterStartAlternateContent(mceTextWriter *w);
/**
Ends an MCE alternate content section.
*/
int mceTextWriterEndAlternateContent(mceTextWriter *w);
/**
Start an MCE choice.
*/
int mceTextWriterStartChoice(mceTextWriter *w, const xmlChar *ns);
/**
Ends an MCE choice.
*/
int mceTextWriterEndChoice(mceTextWriter *w);
/**
Start an MCE fallback.
*/
int mceTextWriterStartFallback(mceTextWriter *w);
/**
Ends an MCE fallback.
*/
int mceTextWriterEndFallback(mceTextWriter *w);
/**
Returns the underlying xmlTextWriter.
*/
xmlTextWriterPtr mceTextWriterIntern(mceTextWriter *w);
/**
Helper which create a new xmlTextWriterPtr for a FILE handle.
*/
xmlTextWriterPtr xmlNewTextWriterFile(FILE *file);
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* MCE_TEXTWRITER_H */

View File

@@ -1,189 +0,0 @@
/*
Copyright (c) 2010, Florian Reuter
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
* Neither the name of Florian Reuter nor the names of its contributors
may be used to endorse or promote products derived from this
software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/**@file config/opc/config.h
*/
#ifndef OPC_CONFIG_H
#define OPC_CONFIG_H
#include <libxml/xmlstring.h>
#include <plib/plib.h>
#include <assert.h>
#ifdef __cplusplus
extern "C" {
#endif
/**
Assert expression e is true. Will be removed entirely in release mode.
\hideinitializer
*/
#define OPC_ASSERT(e) assert(e)
/**
Assert expression e is true. Expression will be executed in release mode too.
\hideinitializer
*/
#ifdef NDEBUG
#define OPC_ENSURE(e) (void)(e)
#else
#define OPC_ENSURE(e) assert(e)
#endif
/**
Constant for boolean true.
\hideinitializer
*/
#define OPC_TRUE (0==0)
/**
Constant for boolean false.
\hideinitializer
*/
#define OPC_FALSE (0==1)
/**
Boolean type.
\hideinitializer
*/
typedef pbool_t opc_bool_t;
/**
Type which represents an offset in e.g. a file.
\hideinitializer
*/
typedef pofs_t opc_ofs_t;
/**
8-bit unsigned integer.
\hideinitializer
*/
typedef puint8_t opc_uint8_t;
/**
16-bit unsigned integer.
\hideinitializer
*/
typedef puint16_t opc_uint16_t;
/**
32-bit unsigned integer.
\hideinitializer
*/
typedef puint32_t opc_uint32_t;
/**
64-bit unsigned integer.
\hideinitializer
*/
typedef puint64_t opc_uint64_t;
/**
8-bit signed integer.
\hideinitializer
*/
typedef pint8_t opc_int8_t;
/**
16-bit signed integer.
\hideinitializer
*/
typedef pint16_t opc_int16_t;
/**
32-bit signed integer.
\hideinitializer
*/
typedef pint32_t opc_int32_t;
/**
64-bit signed integer.
\hideinitializer
*/
typedef pint64_t opc_int64_t;
/**
Default size fo the deflate buffer used by zlib.
*/
#define OPC_DEFLATE_BUFFER_SIZE 4096
/**
Max system path len.
*/
#define OPC_MAX_PATH 512
/**
Error codes for the OPC module.
*/
typedef enum OPC_ERROR_ENUM {
OPC_ERROR_NONE,
OPC_ERROR_STREAM,
OPC_ERROR_SEEK, // can't seek
OPC_ERROR_UNSUPPORTED_DATA_DESCRIPTOR,
OPC_ERROR_UNSUPPORTED_COMPRESSION,
OPC_ERROR_DEFLATE,
OPC_ERROR_HEADER,
OPC_ERROR_MEMORY,
OPC_ERROR_XML,
OPC_ERROR_USER // user triggered an abort
} opc_error_t;
/**
Compression options for OPC streams.
*/
typedef enum OPC_COMPRESSIONOPTION_ENUM {
OPC_COMPRESSIONOPTION_NONE,
OPC_COMPRESSIONOPTION_NORMAL,
OPC_COMPRESSIONOPTION_MAXIMUM,
OPC_COMPRESSIONOPTION_FAST,
OPC_COMPRESSIONOPTION_SUPERFAST
} opcCompressionOption_t;
/**
Helper for debug logs.
\hideinitializer
*/
#define opc_logf printf
/**
Abstraction for memset(m, 0, s).
\hideinitializer
*/
#define opc_bzero_mem(m,s) memset(m, 0, s)
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* OPC_CONFIG_H */

View File

@@ -1,300 +0,0 @@
/*
Copyright (c) 2010, Florian Reuter
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
* Neither the name of Florian Reuter nor the names of its contributors
may be used to endorse or promote products derived from this
software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/** @file opc/container.h
The container.h module has the fundamental methods for dealing with ZIP-based OPC container.
OPC container can be opened in READ-ONLY mode, WRITE-ONLY mode, READ/WRITE mode, TEMPLATE mode and TRANSITION mode.
The most notable mode is the READ/WRITE mode, which gives you concurrent stream-based READ and WRITE access to a
single ZIP-based OPC container. This is achieved without the use of temporary files by taking advantage of the
OPC specific “interleave” mode. \see http://standards.iso.org/ittf/PubliclyAvailableStandards/c051459_ISOIEC_29500-2_2008(E).zip
The TEMPLATE mode allows very fast customized "cloning" of ZIP-based OPC container by using "RAW access" to the ZIP streams.
The TRANSITION mode is a special version of the TEMPLATE mode, which allows transition-based READ/WRITE access to the
ZIP-based OPC container using a temporary file.
*/
#include <opc/config.h>
#include <opc/file.h>
#ifndef OPC_CONTAINER_H
#define OPC_CONTAINER_H
#ifdef __cplusplus
extern "C" {
#endif
/**
Handle to an OPC container created by \ref opcContainerOpen.
\see opcContainerOpen.
*/
typedef struct OPC_CONTAINER_STRUCT opcContainer;
/**
Modes for opcContainerOpen();
\see opcContainerOpen
*/
typedef enum {
/**
Opens the OPC container denoted by \a fileName in READ-ONLY mode. The \a destName parameter must be \a NULL.
\hideinitializer
*/
OPC_OPEN_READ_ONLY=0,
/**
Opens the OPC container denoted by \a fileName in WRITE-ONLY mode. The \a destName parameter must be \a NULL.
\hideinitializer
*/
OPC_OPEN_WRITE_ONLY=1,
/**
Opens the OPC container denoted by \a fileName in READ/WRITE mode. The \a destName parameter must be \a NULL.
\hideinitializer
*/
OPC_OPEN_READ_WRITE=2,
/**
This mode will open the container denoted by \a fileName in READ-ONLY mode and the container denoted by
\a destName in write-only mode. Any modifications will be written to the container denoted by \a destName
and the unmodified streams from \a fileName will be written to \a destName on closing.
\warning Currently not implemented.
\hideinitializer
*/
OPC_OPEN_TEMPLATE=3,
/**
Like the OPC_OPEN_TEMPLATE mode, but the \a destName will be renamed to the \a fileName on closing. If \a destName
is \a NULL, then the name of the temporary file will be generated automatically.
\warning Currently not implemented.
\hideinitializer
*/
OPC_OPEN_TRANSITION=4
} opcContainerOpenMode;
/** Modes for opcContainerClose.
\see opcContainerClose.
*/
typedef enum {
/**
Close the OPC container without any further postprocessing.
\hideinitializer
*/
OPC_CLOSE_NOW = 0,
/**
Close the OPC container and trim the file by removing unused fragments like e.g.
deleted parts.
\hideinitializer
*/
OPC_CLOSE_TRIM = 1,
/**
Close the OPC container like in \a OPC_CLOSE_TRIM mode, but additionally remove any
"interleaved" parts by reordering them.
\warning Currently not implemented. Same semantic as OPC_CLOSE_TRIM.
\hideinitializer
*/
OPC_CLOSE_DEFRAG = 2
} opcContainerCloseMode;
/**
Opens a ZIP-based OPC container.
@param[in] fileName. For more details see \ref opcContainerOpenMode.
@param[in] mode. For more details see \ref opcContainerOpenMode.
@param[in] userContext. Will not be modified by libopc. Can be used to e.g. store the "this" pointer for C++ bindings.
@param[in] destName. For more details see \ref opcContainerOpenMode.
@return \a NULL if failed.
\see opcContainerOpenMode
\see opcContainerDump
*/
opcContainer* opcContainerOpen(const xmlChar *fileName,
opcContainerOpenMode mode,
void *userContext,
const xmlChar *destName);
/**
Opens a ZIP-based OPC container from memory.
@param[in] data.
@param[in] data_len.
@param[in] userContext. Will not be modified by libopc. Can be used to e.g. store the "this" pointer for C++ bindings.
@param[in] mode. For more details see \ref opcContainerOpenMode.
@return \a NULL if failed.
*/
opcContainer* opcContainerOpenMem(const opc_uint8_t *data, opc_uint32_t data_len,
opcContainerOpenMode mode,
void *userContext);
/**
Opens a ZIP-based OPC container from memory.
@param[in] ioread.
@param[in] iowrite.
@param[in] ioclose.
@param[in] ioseek.
@param[in] iotrim.
@param[in] ioflush.
@param[in] iocontext.
@param[in] file_size.
@param[in] userContext. Will not be modified by libopc. Can be used to e.g. store the "this" pointer for C++ bindings.
@param[in] mode. For more details see \ref opcContainerOpenMode.
@return \a NULL if failed.
*/
opcContainer* opcContainerOpenIO(opcFileReadCallback *ioread,
opcFileWriteCallback *iowrite,
opcFileCloseCallback *ioclose,
opcFileSeekCallback *ioseek,
opcFileTrimCallback *iotrim,
opcFileFlushCallback *ioflush,
void *iocontext,
pofs_t file_size,
opcContainerOpenMode mode,
void *userContext);
/**
Close an OPC container.
@param[in] c. \ref opcContainer openered by \ref opcContainerOpen.
@param[in] mode. For more information see \ref opcContainerCloseMode.
@return Non-zero if successful.
\see opcContainerOpen
\see opcContainerCloseMode
*/
opc_error_t opcContainerClose(opcContainer *c, opcContainerCloseMode mode);
/**
Returns the unmodified user context passed to \ref opcContainerOpen.
\see opcContainerOpen
*/
void *opcContainerGetUserContext(opcContainer *c);
/**
List all types, relations and parts of the container \a c to \a out.
\par Sample:
\include opc_dump.c
*/
opc_error_t opcContainerDump(opcContainer *c, FILE *out);
/**
Exports the OPC container to "Flat OPC" (http://blogs.msdn.com/b/ericwhite/archive/2008/09/29/the-flat-opc-format.aspx).
The flat versions of an OPC file are very important when dealing with e.g XSL(T)-based or Javascript-based transformations.
\see opcContainerFlatImport.
\todo Implementation needed.
*/
int opcContainerFlatExport(opcContainer *c, const xmlChar *fileName);
/**
Imports the flat version of an OPC container.
\see opcContainerFlatExport.
\todo Implementation needed.
*/
int opcContainerFlatImport(opcContainer *c, const xmlChar *fileName);
/**
Iterate all types.
\code
for(xmlChar *type=opcContentTypeFirst(c);
NULL!=type;
type=opcContentTypeNext(c, type)) {
printf("%s\n", type);
}
\endcode
*/
const xmlChar *opcContentTypeFirst(opcContainer *container);
/**
\see opcContentTypeNext()
*/
const xmlChar *opcContentTypeNext(opcContainer *container, const xmlChar *type);
/**
Iterate extensions.
\code
for(const xmlChar *ext=opcExtensionFirst(c);
NULL!=ext;
ext=opcExtensionNext(ext)) {
printf("%s\n", ext);
}
\endcode
*/
const xmlChar *opcExtensionFirst(opcContainer *container);
/**
\see opcExtensionFirst()
*/
const xmlChar *opcExtensionNext(opcContainer *container, const xmlChar *ext);
/**
Get registered type for extension.
\see opcExtensionRegister()
*/
const xmlChar *opcExtensionGetType(opcContainer *container, const xmlChar *ext);
/**
Register a mime-type and and extension.
\see opcExtensionGetType()
*/
const xmlChar *opcExtensionRegister(opcContainer *container, const xmlChar *ext, const xmlChar *type);
/**
Iterator through all relation types of the container:
\code
for(xmlChar *type=opcRelationTypeFirst(c);
NULL!=type;
type=opcRelationTypeNext(c, type)) {
printf("%s\n", type);
}
\endcode
*/
const xmlChar *opcRelationTypeFirst(opcContainer *container);
/**
\see opcRelationTypeFirst()
*/
const xmlChar *opcRelationTypeNext(opcContainer *container, const xmlChar *type);
/**
Iterator through all relation types of the container:
\code
for(xmlChar *target=opcExternalTargetFirst(c);
NULL!=target;
type=opcExternalTargetNext(c, target)) {
printf("%s\n", target);
}
\endcode
*/
const xmlChar *opcExternalTargetFirst(opcContainer *container);
/**
\see opcExternalTargetFirst()
*/
const xmlChar *opcExternalTargetNext(opcContainer *container, const xmlChar *target);
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* OPC_CONTAINER_H */

View File

@@ -1,200 +0,0 @@
/*
Copyright (c) 2010, Florian Reuter
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
* Neither the name of Florian Reuter nor the names of its contributors
may be used to endorse or promote products derived from this
software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/** @file opc/file.h
The opc module contains the file library functions.
*/
#include <opc/config.h>
#ifndef OPC_FILE_H
#define OPC_FILE_H
#ifdef __cplusplus
extern "C" {
#endif
/**
Flag for READ access.
\hideinitializer
*/
#define OPC_FILE_READ (1<<0)
/**
Flag for WRITE access.
\hideinitializer
*/
#define OPC_FILE_WRITE (1<<1)
/**
Flag indicates that file will be truncated when opened.
\hideinitializer
*/
#define OPC_FILE_TRUNC (1<<2)
/**
Abstraction for see modes.
*/
typedef enum OPC_FILESEEKMODE_ENUM {
opcFileSeekSet = SEEK_SET,
opcFileSeekCur = SEEK_CUR,
opcFileSeekEnd = SEEK_END
} opcFileSeekMode;
/**
Callback to read a file. E.g. for a FILE * context this can be implemented as
\code
static int opcFileRead(void *iocontext, char *buffer, int len) {
return fread(buffer, sizeof(char), len, (FILE*)iocontext);
}
\endcode
*/
typedef int opcFileReadCallback(void *iocontext, char *buffer, int len);
/**
Callback to write a file. E.g. for a FILE * context this can be implemented as
\code
static int opcFileWrite(void *iocontext, const char *buffer, int len) {
return fwrite(buffer, sizeof(char), len, (FILE*)iocontext);
}
\endcode
*/
typedef int opcFileWriteCallback(void *iocontext, const char *buffer, int len);
/**
Callback to close a file. E.g. for a FILE * context this can be implemented as
\code
static int opcFileClose(void *iocontext) {
return fclose((FILE*)iocontext);
}
\endcode
*/
typedef int opcFileCloseCallback(void *iocontext);
/**
Callback to seek a file. E.g. for a FILE * context this can be implemented as
\code
static opc_ofs_t opcFileSeek(void *iocontext, opc_ofs_t ofs) {
int ret=fseek((FILE*)iocontext, ofs, SEEK_SET);
if (ret>=0) {
return ftell((FILE*)iocontext);
} else {
return ret;
}
}
\endcode
*/
typedef opc_ofs_t opcFileSeekCallback(void *iocontext, opc_ofs_t ofs);
/**
Callback to trim a file. E.g. for a FILE * context this can be implemented as
\code
static int opcFileTrim(void *iocontext, opc_ofs_t new_size) {
#ifdef WIN32
return _chsize(fileno((FILE*)iocontext), new_size);
#else
return ftruncate(fileno((FILE*)iocontext), new_size);
#endif
}
\endcode
*/
typedef int opcFileTrimCallback(void *iocontext, opc_ofs_t new_size);
/**
Callback to flush a file. E.g. for a FILE * context this can be implemented as
\code
static int opcFileFlush(void *iocontext) {
return fflush((FILE*)iocontext);
}
\endcode
*/
typedef int opcFileFlushCallback(void *iocontext);
/**
Represents a state of a file, i.e. file position (buf_pos) and error status (err).
*/
typedef struct OPC_FILERAWSTATE_STRUCT {
opc_error_t err;
opc_ofs_t buf_pos; // current pos in file
} opcFileRawState;
/**
File IO context.
*/
typedef struct OPC_IO_STRUCT {
opcFileReadCallback *_ioread;
opcFileWriteCallback *_iowrite;
opcFileCloseCallback *_ioclose;
opcFileSeekCallback *_ioseek;
opcFileTrimCallback *_iotrim;
opcFileFlushCallback *_ioflush;
void *iocontext;
int flags;
opcFileRawState state;
opc_ofs_t file_size;
} opcIO_t;
/**
Initialize an IO context.
*/
opc_error_t opcFileInitIO(opcIO_t *io,
opcFileReadCallback *ioread,
opcFileWriteCallback *iowrite,
opcFileCloseCallback *ioclose,
opcFileSeekCallback *ioseek,
opcFileTrimCallback *iotrim,
opcFileFlushCallback *ioflush,
void *iocontext,
pofs_t file_size,
int flags);
/**
Initialize an IO context for a file.
*/
opc_error_t opcFileInitIOFile(opcIO_t *io, const xmlChar *filename, int flags);
/**
Initialize an IO for memory.
\warning Currently supports READ-ONLY file access.
*/
opc_error_t opcFileInitIOMemory(opcIO_t *io, const opc_uint8_t *data, opc_uint32_t data_len, int flags);
/**
Cleanup an IO context, i.e. release all system resources.
*/
opc_error_t opcFileCleanupIO(opcIO_t *io);
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* OPC_FILE_H */

View File

@@ -1,60 +0,0 @@
/*
Copyright (c) 2010, Florian Reuter
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
* Neither the name of Florian Reuter nor the names of its contributors
may be used to endorse or promote products derived from this
software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/** @file opc/helper.h
Contains helper functions for the opc module.
*/
#include <opc/config.h>
#ifndef OPC_HELPER_H
#define OPC_HELPER_H
#ifdef __cplusplus
extern "C" {
#endif
#ifdef __cplusplus
} /* extern "C" */
#endif
/**
Constructs a segment name.
*/
opc_uint16_t opcHelperAssembleSegmentName(char *out, opc_uint16_t out_size, const xmlChar *name, opc_uint32_t segment_number, opc_uint32_t next_segment_id, opc_bool_t rels_segment, opc_uint16_t *out_max);
/**
Splits a filename into the segment informations.
*/
opc_error_t opcHelperSplitFilename(opc_uint8_t *filename, opc_uint32_t filename_length, opc_uint32_t *segment_number, opc_bool_t *last_segment, opc_bool_t *rel_segment);
#endif /* OPC_HELPER_H */

View File

@@ -1,74 +0,0 @@
/*
Copyright (c) 2010, Florian Reuter
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
* Neither the name of Florian Reuter nor the names of its contributors
may be used to endorse or promote products derived from this
software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/** @file opc/inputstream.h
*/
#include <opc/config.h>
#ifndef OPC_INPUTSTREAM_H
#define OPC_INPUTSTREAM_H
#ifdef __cplusplus
extern "C" {
#endif
/**
Internal type which represents a binary input stream.
*/
typedef struct OPC_CONTAINER_INPUTSTREAM_STRUCT opcContainerInputStream;
/**
Opens the part \c name of the \c container for reading.
*/
opcContainerInputStream* opcContainerOpenInputStream(opcContainer *container, const xmlChar *name);
/**
Reads maximal \c buffer_len bytes from the input \c stream to \c buffer.
\return The number of byes read or "0" in case of an error or end-of-stream.
*/
opc_uint32_t opcContainerReadInputStream(opcContainerInputStream* stream, opc_uint8_t *buffer, opc_uint32_t buffer_len);
/**
Closes the input stream and releases all system resources.
*/
opc_error_t opcContainerCloseInputStream(opcContainerInputStream* stream);
/**
Returns the type of compression used for the stream.
*/
opcCompressionOption_t opcContainerGetInputStreamCompressionOption(opcContainerInputStream* stream);
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* OPC_INPUTSTREAM_H */

View File

@@ -1,73 +0,0 @@
/*
Copyright (c) 2010, Florian Reuter
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
* Neither the name of Florian Reuter nor the names of its contributors
may be used to endorse or promote products derived from this
software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/** @file opc/opc.h
The opc module contains the basic library functions.
*/
#include <opc/config.h>
#include <opc/container.h>
#include <opc/part.h>
#include <opc/relation.h>
#include <opc/inputstream.h>
#include <opc/outputstream.h>
#include <opc/zip.h>
#include <opc/xmlreader.h>
#include <opc/xmlwriter.h>
#include <opc/properties.h>
#ifndef OPC_OPC_H
#define OPC_OPC_H
#ifdef __cplusplus
extern "C" {
#endif
/**
* Initialize libopc.
* Sample:
* \include opc_helloworld.c
* @return Non-zero if successful.
*/
opc_error_t opcInitLibrary();
/**
* Free libopc. Clean up all resources.
* @return Non-zero if successful.
* \see opcInitLibrary.
*/
opc_error_t opcFreeLibrary();
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* OPC_OPC_H */

View File

@@ -1,71 +0,0 @@
/*
Copyright (c) 2010, Florian Reuter
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
* Neither the name of Florian Reuter nor the names of its contributors
may be used to endorse or promote products derived from this
software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/** @file opc/outputstream.h
*/
#include <opc/config.h>
#ifndef OPC_OUTPUTSTREAM_H
#define OPC_OUTPUTSTREAM_H
#ifdef __cplusplus
extern "C" {
#endif
/**
Internal type which represents a binary output stream.
*/
typedef struct OPC_CONTAINER_OUTPUTSTREAM_STRUCT opcContainerOutputStream;
/**
Open the part \c name or writing in \c container with compression \c compression_option.
\note Make sure the part exists!
\see opcPartCreate.
*/
opcContainerOutputStream* opcContainerCreateOutputStream(opcContainer *container, const xmlChar *name, opcCompressionOption_t compression_option);
/**
Write \c buffer_len bytes from \c buffer to \c stream.
\return Returns the number of bytes written.
*/
opc_uint32_t opcContainerWriteOutputStream(opcContainerOutputStream* stream, const opc_uint8_t *buffer, opc_uint32_t buffer_len);
/**
Close the \c stream and free all associated resources.
*/
opc_error_t opcContainerCloseOutputStream(opcContainerOutputStream* stream);
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* OPC_OUTPUTSTREAM_H */

View File

@@ -1,118 +0,0 @@
/*
Copyright (c) 2010, Florian Reuter
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
* Neither the name of Florian Reuter nor the names of its contributors
may be used to endorse or promote products derived from this
software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/** @file opc/part.h
*/
#include <opc/config.h>
#ifndef OPC_PART_H
#define OPC_PART_H
#ifdef __cplusplus
extern "C" {
#endif
/**
Handle to an OPC part created by \ref opcPartOpen.
\see opcPartOpen.
*/
typedef xmlChar* opcPart;
/**
Represents an invalid (resp. NULL) part.
In releations OPC_PART_INVALID also represents the root part.
\hideinitializer
*/
#define OPC_PART_INVALID NULL
/**
Find a part in a \ container by \c absolutePath and/or \c type.
Currently no flags are supported.
*/
opcPart opcPartFind(opcContainer *container,
const xmlChar *absolutePath,
const xmlChar *type,
int flags);
/**
Creates a part in a \ container with \c absolutePath and \c type.
Currently no flags are supported.
*/
opcPart opcPartCreate(opcContainer *container,
const xmlChar *absolutePath,
const xmlChar *type,
int flags);
/**
Returns the type of the container.
The string is interned and must not be freed.
*/
const xmlChar *opcPartGetType(opcContainer *c, opcPart part);
/**
Returns the type of the container.
If \c override_only then the return value will be NULL for parts not having an override type.
The string is interned and must not be freed.
*/
const xmlChar *opcPartGetTypeEx(opcContainer *c, opcPart part, opc_bool_t override_only);
/**
Deleted that part \c absolutePath in the \c container.
*/
opc_error_t opcPartDelete(opcContainer *container, const xmlChar *absolutePath);
/**
Get the first part.
\code
for(opcPart part=opcPartGetFirst(c);OPC_PART_INVALID!=part;part=opcPartGetNext(c, part)) {
printf("%s; \n", part, opcPartGetType(c, part));
}
\endcode
*/
opcPart opcPartGetFirst(opcContainer *container);
/**
Get the next part.
\see opcPartGetFirst
*/
opcPart opcPartGetNext(opcContainer *container, opcPart part);
/**
Returns the size in bytes of the \c part.
*/
opc_ofs_t opcPartGetSize(opcContainer *c, opcPart part);
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* OPC_PART_H */

View File

@@ -1,121 +0,0 @@
/*
Copyright (c) 2010, Florian Reuter
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
* Neither the name of Florian Reuter nor the names of its contributors
may be used to endorse or promote products derived from this
software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/** @file opc/properties.h
*/
#include <opc/config.h>
#include <opc/container.h>
#ifndef OPC_PROPERTIES_H
#define OPC_PROPERTIES_H
#ifdef __cplusplus
extern "C" {
#endif
/**
Represents a simple Dublin Core type.
*/
typedef struct OPC_DC_SIMPLE_TYPE {
xmlChar *str;
xmlChar *lang;
} opcDCSimpleType_t;
/**
Represents the core properties of an OPC container.
*/
typedef struct OPC_PROPERTIES_STRUCT {
xmlChar *category; /* xsd:string */
xmlChar *contentStatus; /* xsd:string */
xmlChar *created; /* dc:date */
opcDCSimpleType_t creator; /* dc:any */
opcDCSimpleType_t description; /* dc:any */
opcDCSimpleType_t identifier; /* dc:any */
opcDCSimpleType_t *keyword_array; /* cp:CT_Keywords */
opc_uint32_t keyword_items;
opcDCSimpleType_t language; /* dc:any */
xmlChar *lastModifiedBy; /* xsd:string */
xmlChar *lastPrinted; /* xsd:dateTime */
xmlChar *modified; /* dc:date */
xmlChar *revision; /* xsd:string */
opcDCSimpleType_t subject; /* dc:any */
opcDCSimpleType_t title; /* dc:any */
xmlChar *version; /* xsd:string */
} opcProperties_t;
/**
Initialize the core properties \c cp.
\see opcCorePropertiesSetString
*/
opc_error_t opcCorePropertiesInit(opcProperties_t *cp);
/**
Cleanup the core properties \c cp, i.e. release all resources.
\see opcCorePropertiesSetString
*/
opc_error_t opcCorePropertiesCleanup(opcProperties_t *cp);
/**
Rease the core properties \c cp from the container \c.
*/
opc_error_t opcCorePropertiesRead(opcProperties_t *cp, opcContainer *c);
/**
Write/Update the core properties \c cp in the container \c.
*/
opc_error_t opcCorePropertiesWrite(opcProperties_t *cp, opcContainer *c);
/**
Update a string in the core properties the right way.
\code
opcProperties_t cp;
opcCorePropertiesInit(&cp);
opcCorePropertiesSetString(&cp.revision, "1");
opcCorePropertiesSetStringLang(&cp.creator, "Florian Reuter", NULL);
opcCorePropertiesCleanup(&cp);
\endcode
*/
opc_error_t opcCorePropertiesSetString(xmlChar **prop, const xmlChar *str);
/**
Update a core properties the right way.
\see opcCorePropertiesSetString
*/
opc_error_t opcCorePropertiesSetStringLang(opcDCSimpleType_t *prop, const xmlChar *str, const xmlChar *lang);
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* OPC_PROPERTIES_H */

View File

@@ -1,140 +0,0 @@
/*
Copyright (c) 2010, Florian Reuter
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
* Neither the name of Florian Reuter nor the names of its contributors
may be used to endorse or promote products derived from this
software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/** @file opc/relation.h
*/
#include <opc/config.h>
#ifndef OPC_RELATION_H
#define OPC_RELATION_H
#ifdef __cplusplus
extern "C" {
#endif
/**
Indentifier for an OPC relation.
*/
typedef opc_uint32_t opcRelation;
/**
Constant which represents an invalid relation.
*/
#define OPC_RELATION_INVALID (-1)
/**
Find a relation originating from \c part in \c container with \c relationId and/or \c mimeType.
If \c part is OPC_PART_INVALID then part represents the root part.
@param[in] relationId The relationId (e.g. "rId1") or NULL.
@param[in] mimeType The mimeType or NULL.
*/
opcRelation opcRelationFind(opcContainer *container, opcPart part, const xmlChar *relationId, const xmlChar *mimeType);
/**
Deleted the relation from the container.
\see opcRelationFind.
*/
opc_error_t opcRelationDelete(opcContainer *container, opcPart part, const xmlChar *relationId, const xmlChar *mimeType);
/**
Returns the first relation.
The following code will dump all relations:
\code
for(opcPart part=opcPartGetFirst(c);OPC_PART_INVALID!=part;part=opcPartGetNext(c, part)) {
for(opcRelation rel=opcRelationFirst(part, c);
OPC_PART_INVALID!=rel;
rel=opcRelationNext(c, rel)) {
opcPart internal_target=opcRelationGetInternalTarget(c, part, rel);
const xmlChar *external_target=opcRelationGetExternalTarget(c, part, rel);
const xmlChar *target=(NULL!=internal_target?internal_target:external_target);
const xmlChar *prefix=NULL;
opc_uint32_t counter=-1;
const xmlChar *type=NULL;
opcRelationGetInformation(c, part, rel, &prefix, &counter, &type);
if (-1==counter) { // no counter after prefix
printf("%s;%s;%s;%s\n", part, prefix, target, type);
} else {
printf("%s;%s%i;%s;%s\n", part, prefix, counter, target, type);
}
}
}
\endcode
*/
opcRelation opcRelationFirst(opcContainer *container, opcPart part);
/**
\see opcRelationFirst
*/
opcRelation opcRelationNext(opcContainer *container, opcPart part, opcRelation relation);
/**
Returns the internal target.
\note To test for an external target use opcRelationGetExternalTarget.
\see opcRelationGetExternalTarget
*/
opcPart opcRelationGetInternalTarget(opcContainer *container, opcPart part, opcRelation relation);
/**
Returns the external target or NULL if it is an internal target.
The string is interned. Must not be freed.
\see opcRelationGetExternalTarget
*/
const xmlChar *opcRelationGetExternalTarget(opcContainer *container, opcPart part, opcRelation relation);
/**
Returns the relations type.
The string is interned. Must not be freed.
*/
const xmlChar *opcRelationGetType(opcContainer *container, opcPart part, opcRelation relation);
/**
Get information about a relation.
\see opcRelationFirst
*/
void opcRelationGetInformation(opcContainer *container, opcPart part, opcRelation relation, const xmlChar **prefix, opc_uint32_t *counter, const xmlChar **type);
/**
Add a relation to \c container from \c src part to \c dest part with id \c rid and type \c type.
*/
opc_uint32_t opcRelationAdd(opcContainer *container, opcPart src, const xmlChar *rid, opcPart dest, const xmlChar *type);
/**
Add an external relation to \c container from \c src part to \c target URL with id \c rid and type \c type.
*/
opc_uint32_t opcRelationAddExternal(opcContainer *container, opcPart src, const xmlChar *rid, const xmlChar *target, const xmlChar *type);
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* OPC_RELATION_H */

View File

@@ -1,69 +0,0 @@
/*
Copyright (c) 2010, Florian Reuter
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
* Neither the name of Florian Reuter nor the names of its contributors
may be used to endorse or promote products derived from this
software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/** @file opc/xmlreader.h
*/
#ifndef OPC_XMLREADER_H
#define OPC_XMLREADER_H
#include <opc/config.h>
#include <libxml/xmlreader.h>
#include <mce/textreader.h>
#ifdef __cplusplus
extern "C" {
#endif
/**
Open an MCE reader for \c partName. Parameters \c URL, \c encoding and \c options will be passed unmodified to
http://xmlsoft.org/html/libxml-xmlreader.html#xmlReaderForIO and they can we NULL, NULL, 0.
\note Make sure the part exists.
\see opcPartFind
*/
opc_error_t opcXmlReaderOpen(opcContainer *container, mceTextReader_t *mceTextReader, const xmlChar *partName, const char * URL, const char * encoding, int options);
/**
Returns an libxml DOM document. Parameters \c URL, \c encoding and \c options will be passed unmodified to
http://xmlsoft.org/html/libxml-parser.html#xmlReadIO and they can we NULL, NULL, 0.
\note Make sure the part exists.
\see opcPartFind
*/
xmlDocPtr opcXmlReaderReadDoc(opcContainer *container, const xmlChar *partName, const char * URL, const char * encoding, int options);
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* OPC_XMLREADER_H */

View File

@@ -1,57 +0,0 @@
/*
Copyright (c) 2010, Florian Reuter
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
* Neither the name of Florian Reuter nor the names of its contributors
may be used to endorse or promote products derived from this
software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/** @file opc/xmlwriter.h
*/
#include <opc/config.h>
#include <mce/textwriter.h>
#ifndef OPC_XMLWRITER_H
#define OPC_XMLWRITER_H
#ifdef __cplusplus
extern "C" {
#endif
/**
Create an MCE text writer for \c part in \c container with compression \c compression_option.
\note Make sure the part exists.
\see opcPartFind
*/
mceTextWriter *mceTextWriterOpen(opcContainer *c, opcPart part, opcCompressionOption_t compression_option);
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* OPC_XMLWRITER_H */

View File

@@ -1,255 +0,0 @@
/*
Copyright (c) 2010, Florian Reuter
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
* Neither the name of Florian Reuter nor the names of its contributors
may be used to endorse or promote products derived from this
software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/** @file opc/zip.h
The ZIP file backend of an OPC container.
*/
#include <opc/config.h>
#include <opc/file.h>
#include <opc/container.h>
#ifndef OPC_ZIP_H
#define OPC_ZIP_H
#ifdef __cplusplus
extern "C" {
#endif
/**
Default growth hint of an OPC stream.
*/
#define OPC_DEFAULT_GROWTH_HINT 512
/**
Handle to a ZIP archive.
\see internal.h
*/
typedef struct OPC_ZIP_STRUCT opcZip;
/**
Handle to a raw ZIP input stream.
\see internal.h
*/
typedef struct OPC_ZIPINPUTSTREAM_STRUCT opcZipInputStream;
/**
Handle to a raw ZIP output stream.
\see internal.h
*/
typedef struct OPC_ZIPOUTPUTSTREAM_STRUCT opcZipOutputStream;
/**
Holds all information of a ZIP segment.
*/
typedef struct OPC_ZIP_SEGMENT_INFO_STRUCT {
xmlChar name[OPC_MAX_PATH];
opc_uint32_t name_len;
opc_uint32_t segment_number;
opc_bool_t last_segment;
opc_bool_t rels_segment;
opc_uint32_t header_size;
opc_uint32_t min_header_size;
opc_uint32_t trailing_bytes;
opc_uint32_t compressed_size;
opc_uint32_t uncompressed_size;
opc_uint16_t bit_flag;
opc_uint32_t data_crc;
opc_uint16_t compression_method;
opc_ofs_t stream_ofs;
opc_uint16_t growth_hint;
} opcZipSegmentInfo_t;
/**
\see opcZipLoader
*/
typedef int opcZipLoaderOpenCallback(void *iocontext);
/**
\see opcZipLoader
*/
typedef int opcZipLoaderSkipCallback(void *iocontext);
/**
\see opcZipLoader
*/
typedef int opcZipLoaderReadCallback(void *iocontext, char *buffer, int len);
/**
\see opcZipLoader
*/
typedef int opcZipLoaderCloseCallback(void *iocontext);
/**
\see opcZipLoader
*/
typedef opc_error_t (opcZipLoaderSegmentCallback_t)(void *iocontext, void *userctx, opcZipSegmentInfo_t *info, opcZipLoaderOpenCallback *open, opcZipLoaderReadCallback *read, opcZipLoaderCloseCallback *close, opcZipLoaderSkipCallback *skip);
/**
Walks every segment in a ZIP archive and calls the \c segmentCallback callback method.
The implementer \c segmentCallback method must then eiher use the passed \c open, \c read and \c close methods
to read the stream or the passed \c skip methods to skip the stream.
This method can be used to e.g. read ZIP file in stream mode.
*/
opc_error_t opcZipLoader(opcIO_t *io, void *userctx, opcZipLoaderSegmentCallback_t *segmentCallback);
/**
\see opcZipClose
*/
typedef opc_error_t (opcZipSegmentReleaseCallback)(opcZip *zip, opc_uint32_t segment_id);
/**
Closes the ZIP archive \c zip and will call \c releaseCallback for every segment to give the implementer a chance
to free user resources.
*/
void opcZipClose(opcZip *zip, opcZipSegmentReleaseCallback* releaseCallback);
/**
Creates an empty ZIP archive with the given \c io.
*/
opcZip *opcZipCreate(opcIO_t *io);
/**
Commits all buffers and writes the ZIP archives local header directories.
if \c trim is true then padding bytes will be removed, i.e. the ZIP file size fill be minimalized.
*/
opc_error_t opcZipCommit(opcZip *zip, opc_bool_t trim);
/**
Garbage collection on the passed \c zip archive. This will e.g. make deleted files available as free space.
*/
opc_error_t opcZipGC(opcZip *zip);
/**
Load segment information into \c info.
If \c rels_segment is -1 then load the info for part with name \c partName.
Otherwise load the segment information for the ".rels." segment of \c partName.
\return Returns the segment_id.
*/
opc_uint32_t opcZipLoadSegment(opcZip *zip, const xmlChar *partName, opc_bool_t rels_segment, opcZipSegmentInfo_t *info);
/**
Create a segment with the given parameters.
\return Returns the segment_id.
*/
opc_uint32_t opcZipCreateSegment(opcZip *zip,
const xmlChar *partName,
opc_bool_t relsSegment,
opc_uint32_t segment_size,
opc_uint32_t growth_hint,
opc_uint16_t compression_method,
opc_uint16_t bit_flag);
/**
Creates an input stream for the segment with \c segment_id.
\see opcZipLoadSegment
\see opcZipCreateSegment
*/
opcZipInputStream *opcZipOpenInputStream(opcZip *zip, opc_uint32_t segment_id);
/**
Free all resources of the input stream.
*/
opc_error_t opcZipCloseInputStream(opcZip *zip, opcZipInputStream *stream);
/**
Read maximal \c buf_len bytes from the input stream into \buf.
\return Returns the number of bytes read.
*/
opc_uint32_t opcZipReadInputStream(opcZip *zip, opcZipInputStream *stream, opc_uint8_t *buf, opc_uint32_t buf_len);
/**
Creates an output stream for the segment with \c segment_id.
If \c *segment_id is -1 then a new segment will be created.
Otherwise the segment with \c *segment_id will be overwritten.
*/
opcZipOutputStream *opcZipCreateOutputStream(opcZip *zip,
opc_uint32_t *segment_id,
const xmlChar *partName,
opc_bool_t relsSegment,
opc_uint32_t segment_size,
opc_uint32_t growth_hint,
opc_uint16_t compression_method,
opc_uint16_t bit_flag);
/**
Opens an existing ouput stream for reading.
The \c *segment_id will be set to -1 and reset on opcZipCloseOutputStream.
\see opcZipCloseOutputStream
*/
opcZipOutputStream *opcZipOpenOutputStream(opcZip *zip, opc_uint32_t *segment_id);
/**
Will close the stream and free all resources. Additionally the new segment id will be stored in \c *segment_id.
\see opcZipOpenOutputStream
*/
opc_error_t opcZipCloseOutputStream(opcZip *zip, opcZipOutputStream *stream, opc_uint32_t *segment_id);
/**
Write \c buf_len bytes to \c buf.
\return Returns the number of bytes written.
*/
opc_uint32_t opcZipWriteOutputStream(opcZip *zip, opcZipOutputStream *stream, const opc_uint8_t *buf, opc_uint32_t buf_len);
/**
Returns the first segment id or -1.
Use the following code to iterarte through all segments.
\code
for(opc_uint32_t segment_id=opcZipGetFirstSegmentId(zip);
-1!=segment_id;
segment_id=opcZipGetNextSegmentId(zip, segment_id) {
...
}
\endcode
\see opcZipGetNextSegmentId
*/
opc_uint32_t opcZipGetFirstSegmentId(opcZip *zip);
/**
Returns the next segment id or -1.
\see opcZipGetFirstSegmentId
*/
opc_uint32_t opcZipGetNextSegmentId(opcZip *zip, opc_uint32_t segment_id);
/**
Returns info about the given segment id.
*/
opc_error_t opcZipGetSegmentInfo(opcZip *zip, opc_uint32_t segment_id, const xmlChar **name, opc_bool_t *rels_segment, opc_uint32_t *crc);
/**
Marks a given segments as deleted.
\see opcZipGC
*/
opc_bool_t opcZipSegmentDelete(opcZip *zip, opc_uint32_t *first_segment, opc_uint32_t *last_segment, opcZipSegmentReleaseCallback* releaseCallback);
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* OPC_ZIP_H */

View File

@@ -1,168 +0,0 @@
/* include/plib/plib.h. Generated from plib.h by configure. */
/*
Copyright (c) 2010, Florian Reuter
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
* Neither the name of Florian Reuter nor the names of its contributors
may be used to endorse or promote products derived from this
software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef _PLIB_PLIB_H_
#define _PLIB_PLIB_H_
#ifdef __cplusplus
extern "C" {
#endif
#define HAVE_STDINT_H 1
#define HAVE_STDDEF_H 1
#define HAVE_STDIO_H 1
#define HAVE_STRING_H 1
#define HAVE_LIMITS_H 1
#define HAVE_STDLIB_H 1
/* #undef HAVE_IO_H */
#define HAVE_UNISTD_H 1
#define HAVE_SYS_TYPES_H 1
#define IS_CONFIGURED 1
#if !defined(IS_CONFIGURED)
#if defined(WIN32)
#define HAVE_STRING_H 1
#define HAVE_STDINT_H 1
#define HAVE_LIMITS_H 1
#define HAVE_STDDEF_H 1
#define HAVE_STDIO_H 1
#define HAVE_STDLIB_H 1
#define HAVE_IO_H
#define snprintf _snprintf
#else
#error "configure not executed and we are not on a win32 machine? please run configure or define WIN32 is you are on a WIN32 platform."
#endif
#endif
#ifdef HAVE_STDDEF_H
#include <stddef.h>
typedef size_t pofs_t; // maximum file offset for eg. read write ops
#else
#error "system types can not be determined"
#endif
#ifdef HAVE_STDIO_H
#include <stdio.h>
#else
#error "system io can not be determined"
#endif
#ifdef HAVE_STDINT_H
#include <stdint.h>
typedef int8_t pint8_t;
typedef uint8_t puint8_t;
typedef int16_t pint16_t;
typedef uint16_t puint16_t;
typedef int32_t pint32_t;
typedef uint32_t puint32_t;
typedef int64_t pint64_t;
typedef uint64_t puint64_t;
typedef int pbool_t;
typedef size_t psize_t;
// INTN_MAX, INTN_MIN, UINTN_MAX
#else
#error "system types can not be determined"
#endif
#ifdef HAVE_STRING_H
#include <string.h>
#endif
#ifdef HAVE_LIMITS_H
#include <limits.h>
#define PUINT8_MAX UCHAR_MAX
#define PINT32_MAX INT_MAX
#define PINT32_MIN INT_MIN
#define PUINT32_MAX UINT_MAX
#define PUINT32_MIN 0
#define PUINT16_MAX USHRT_MAX
#define PUINT16_MIN 0
#else
#error "limits can not be determined"
#endif
#ifdef HAVE_STDLIB_H
#include <stdlib.h>
#endif
#ifdef HAVE_IO_H
#include <io.h>
#endif
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#ifdef HAVE_SYS_TYPES_H
#include <sys/types.h>
#endif
/**
Converts an ASCII string to a xmlChar string. This only works for ASCII strings.
*/
#ifndef _X
#define _X(s) BAD_CAST(s)
#endif
/**
Converts an xmlChar string to an ASCII string. This only works for ASCII charsets.
*/
#ifndef _X2C
#define _X2C(s) ((char*)(s))
#endif
#define PASSERT(e) assert(e)
#ifdef NDEBUG
#define PENSURE(e) (void)(e)
#else
#define PENSURE(e) assert(e)
#endif
#define PTRUE (0==0)
#define PFALSE (0==1)
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* _PLIB_PLIB_H_ */

Binary file not shown.

Binary file not shown.

Binary file not shown.

217
mime.csv
View File

@@ -2,10 +2,14 @@ application/arj, arj
application/base64, mme
application/binhex, hqx
application/book, boo|book
application/CDFV2-corrupt,
application/CDFV2, sdv
application/clariscad, ccad
application/commonground, dp
application/csv,
application/dicom, dcm
application/drafting, drw
application/epub+zip, epub
application/freeloader, frl
application/futuresplash, spl
application/groupwise, vew
@@ -17,7 +21,6 @@ application/inf, inf
application/java-archive, jar
application/java, class
application/javascript,
application/x-archive, a
application/json, json
application/marc, mrc
application/mbedlet, mbd
@@ -27,7 +30,9 @@ application/msword, doc|dot|w6w|wiz|word
application/netmc, mcp
application/octet-stream, bin|dump|gpg
application/oda, oda
application/ogg, ogv
application/pdf, pdf
application/pgp-keys,
application/pgp-signature, pgp
application/pkcs7-signature, p7s
application/pkix-cert, cer|crt
@@ -43,6 +48,10 @@ application/vda, vda
application/vnd.fdf, fdf
application/vnd.font-fontforge-sfd, sfd
application/vnd.hp-hpgl, hgl|hpg|hpgl
application/vnd.iccprofile, icm
application/vnd.iccprofile, icm
application/vnd.lotus-1-2-3,
application/vnd.ms-cab-compressed, cab
application/vnd.ms-excel, xlb|xlc|xll|xlm|xls|xlw
application/vnd.ms-fontobject, eot
application/vnd.ms-opentype, otf
@@ -54,45 +63,75 @@ application/vnd.ms-project, mpp
application/vnd.oasis.opendocument.base, odb
application/vnd.oasis.opendocument.formula, odf
application/vnd.oasis.opendocument.graphics, odg
application/vnd.oasis.opendocument.presentation, odp
application/vnd.oasis.opendocument.spreadsheet, ods
application/vnd.oasis.opendocument.text, odt
application/vnd.openxmlformats-officedocument.presentationml.presentation, pptx
application/vnd.openxmlformats-officedocument.spreadsheetml.sheet, xlsx
application/vnd.openxmlformats-officedocument.wordprocessingml.document, docx
application/vnd.symbian.install,
application/vnd.tcpdump.pcap, pcap
application/vnd.wap.wmlc, wmlc
application/vnd.wap.wmlscriptc, wmlsc
application/vnd.xara, web
application/vocaltec-media-desc, vmd
application/vocaltec-media-file, vmf
application/warc, warc
application/winhelp, hlp
application/wordperfect6.0, w60
application/wordperfect6.1, w61
application/wordperfect, wp|wp5|wp6|wpd
application/x-123, wk1
application/x-7z-compressed, 7z
application/x-aim, aim
application/x-apple-diskimage,
application/x-arc,
application/x-archive, a
application/x-atari-7800-rom, a78
application/x-authorware-bin, aab
application/x-authorware-map, aam
application/x-authorware-seg, aas
application/x-avira-qua,
application/x-bcpio, bcpio
application/x-bittorrent, torrent
application/x-bsh, bsh
application/x-bytecode.python, pyc
application/x-bzip2, boz|bz2
application/x-bzip, bz
application/x-cbr, cbr
application/x-cbz, cbz
application/x-cdlink, vcd
application/x-chat, cha|chat
application/x-chrome-extension,
application/x-cocoa, cco
application/x-conference, nsc
application/x-coredump,
application/x-cpio, cpio
application/x-dbf, dbf
application/x-dbt,
application/x-debian-package, deb
application/x-deepv, deepv
application/x-director, dcr|dir|dxr
application/x-dmp, dmp
application/x-dosdriver,
application/x-dosexec, dll
application/x-dvi, dvi
application/x-elc, elc
application/x-empty,
application/x-envoy, env|evy
application/x-esrehber, es
application/x-excel, xla|xld|xlk|xlt|xlv
application/x-executable, exe
application/x-font-gdos,
application/x-font-pf2, pf2
application/x-font-pfm, pfm
application/x-font-sfn,
application/x-font-ttf, ttf|ttc
application/x-fptapplication/x-dbt,
application/x-freelance, pre
application/x-gamecube-rom,
application/x-gdbm,
application/x-gettext-translation,
application/x-git,
application/x-gsp, gsp
application/x-gss, gss
@@ -102,46 +141,67 @@ application/x-hdf, hdf
application/x-helpfile, help
application/x-httpd-imap, imap
application/x-ima, ima
application/x-innosetup,
application/x-internett-signup, ins
application/x-inventor, iv
application/x-ip2, ip
application/x-java-applet,
application/x-java-commerce, jcm
application/x-java-image,
application/x-java-jmod, jmod
application/x-java-keystore,
application/x-kdelnk,
application/x-koan, skd|skm|skp|skt
application/x-latex, latex|ltx
application/x-livescreen, ivy
application/x-lotus, wq1
application/x-lz4+json, jsonlz4
application/x-lz4, lz4
application/x-lz4, lz4
application/x-lzh-compressed,
application/x-lzh, lzh
application/x-lzip, lz
application/x-lzma, lzma
application/x-lzop, lzo
application/x-lzx, lzx
application/x-mach-binary, jnilib|dylib
application/x-mach-executable,
application/x-magic-cap-package-1.0, mc$
application/x-mathcad, mcd
application/x-maxis-dbpf,
application/x-meme, mm
application/x-midi, midi
application/x-mif, mif
application/x-mix-transfer, nix
application/xml, opf
application/x-mobipocket-ebook, mobi
application/x-msaccess, accdb
application/x-ms-compress-szdd, fon
application/x-ms-pdb, pdb
application/x-ms-reader, lit
application/x-n64-rom, z64
application/x-navi-animation, ani
application/x-navidoc, nvd
application/x-navimap, map
application/x-navistyle, stl
application/x-nes-rom, nes
application/x-netcdf, cdf|nc
application/x-newton-compatible-pkg, pkg
application/x-nintendo-ds-rom,
application/x-object, o
application/x-omcdatamaker, omcd
application/x-omc, omc
application/x-omcregerator, omcr
application/x-pagemaker, pm4|pm5
application/x-pcl, pcl
application/x-pgp-keyring,
application/x-pixclscript, plx
application/x-pkcs7-certreqresp, p7r
application/x-pkcs7-signature, p7a
application/x-project, mpc|mpt|mpv|mpx
application/x-qpro, wb1
application/x-rar, rar
application/x-rpm, rpm
application/x-sdp, sdp
application/x-sea, sea
application/x-seelogo, sl
@@ -149,12 +209,17 @@ application/x-setupscript,
application/x-sharedlib, so
application/x-shar, shar
application/x-shockwave-flash, swf
application/x-snappy-framed,
application/x-sprite, spr|sprite
application/x-sqlite3,
application/x-stargallery-thm,
application/x-stuffit, sit
application/x-sv4cpio, sv4cpio
application/x-sv4crc, sv4crc
application/x-tar, tar
application/x-tbook, sbk|tbk
application/x-terminfo,
application/x-terminfo2,
application/x-texinfo, texi|texinfo
application/x-tex-tfm, tfm
application/x-ustar, ustar
@@ -163,16 +228,22 @@ application/x-vnd.audioexplosion.mzz, mzz
application/x-vnd.ls-xpix, xpix
application/x-vrml, vrml
application/x-wais-source, src|wsrc
application/x-wine-extension-ini,
application/x-wintalk, wtk
application/x-world, svr
application/x-wri, wri
application/x-x509-ca-cert, der
application/x-xz, xz
application/x-zip,
application/x-zstd, zst
application/zip, zip
application/zlib, z
!audio/basic, au
audio/it, it
audio/make, funk|my|pfunk
audio/midi, kar
audio/mid, rmi
audio/mp4, m4b
audio/mpeg, m2a|mpa
audio/ogg, ogg
audio/s3m, s3m
@@ -180,7 +251,10 @@ audio/tsp-audio, tsi
audio/tsplayer, tsp
audio/vnd.qcelp, qcp
audio/voxware, vox
audio/x-aiff, aiff|aif
audio/x-flac, flac
audio/x-gsm, gsd|gsm
audio/x-hx-aac-adts,
audio/x-jam, jam
audio/x-liveaudio, lam
audio/x-m4a, m4a
@@ -194,17 +268,24 @@ audio/x-nspaudio, lma
audio/x-pn-realaudio, ram|rm|rmm|rmp
audio/x-psid, sid
audio/x-realaudio, ra
audio/x-s3m,
audio/x-twinvq-plugin, vqe|vql
audio/x-twinvq, vqf
audio/x-voc, voc
audio/x-wav, wav
!audio/x-xbox360-executable, xex
!audio/x-xbox-executable, xbe
font/otf,
font/sfnt,
font/woff2, woff2
font/woff, woff
image/bmp,
image/cmu-raster, rast
image/fif, fif
image/florian, flo|turbot
image/g3fax, g3
image/gif, gif
image/heic, heic
image/ief, ief|iefs
image/jpeg, jfif|jfif-tbnl|jpe|jpeg|jpg
image/jutvision, jut
@@ -213,6 +294,9 @@ image/pict, pic|pict
image/png, png|x-png
!image/svg, svg
!image/svg+xml,
image/tiff,
!image/vnd.adobe.photoshop, psd
!image/vnd.djvu, djvu
image/vnd.fpx, fpx
image/vnd.microsoft.icon,
image/vnd.rn-realflash, rf
@@ -220,9 +304,15 @@ image/vnd.rn-realpix, rp
image/vnd.wap.wbmp, wbmp
image/vnd.xiff, xif
image/webp, webp
image/wmf,
image/x-3ds, 3ds
image/x-award-bioslogo,
image/x-cmu-raster, ras
image/x-cur, tga
image/x-dwg, dwg|dxf|svf
image/x-eps,
image/x-exr, exr
image/x-gem,
image/x-icns,
!image/x-icon, ico
image/x-jg, art
@@ -236,32 +326,30 @@ image/x-portable-graymap, pgm
image/x-portable-pixmap, ppm
image/x-quicktime, qif|qti|qtif
image/x-rgb, rgb
image/x-tga,
image/x-tiff, tif|tiff
image/tiff,
image/x-win-bitmap,
!image/x-xcf, xcf
!image/x-xpixmap, xpm
image/x-xwindowdump, xwd
message/news,
message/rfc822, mht|mhtml|mime
model/vnd.dwf, dwf
model/vnd.gdl, gdl
model/vnd.gs.gdl, gdsl
model/vrml, wrz
model/x-pov, pov
text/asp, asp
text/css, css
text/x-sass, sass
text/x-scss, scss
text/html, acgi|htm|html|htmls|htx|shtml
text/javascript, js
text/mcf, mcf
text/pascal, pas
text/PGP,
text/plain, com|cmd|conf|def|g|idc|list|lst|mar|sdml|text|txt|md|groovy|license|properties|desktop|ini|rst|cmake|ipynb|readme|less|lo|go|yml|d|cs|hpp|srt|nfo|sfv|m3u|csv|eml
text/richtext, rt|rtf|rtx
text/rtf,
text/scriplet, wsc
text/x-awk, awk
!video/x-jng, jng
video/x-mng, mng
image/x-cur, tga
image/x-xwindowdump, xwd
!image/vnd.adobe.photoshop, psd
text/tab-separated-values, tsv
text/troff, man|me|ms|roff|t|tr
text/uri-list, uji|unis|uri|uris
@@ -273,6 +361,7 @@ text/webviewhtml, htt
text/x-Algol68,
text/x-asm, asm|s
text/x-audiosoft-intra, aip
text/x-awk, awk
text/x-bcpl,
text/x-c, c|cc|h
text/x-c++, cpp|cxx|c++
@@ -287,23 +376,31 @@ text/x-makefile, am|mak
text/xml, xml|pom|iml|plist
text/x-m, m
text/x-msdos-batch, bat
text/x-ms-regedit, reg
text/x-objective-c,
text/x-pascal, p
text/x-perl, pl
text/x-php, php
text/x-po, po
text/x-python, py
text/x-ruby, rb
text/x-sass, sass
text/x-scss, scss
text/x-server-parsed-html, ssi
text/x-setext, etx
text/x-sgml, sgm|sgml
text/x-shellscript, sh
text/x-speech, talk
text/x-tcl,
text/x-tex, tex
text/x-uil, uil
text/x-uuencode, uue
text/x-vcalendar, vcs
text/x-vcard, vcf
video/animaflex, afl
video/avi, avi
video/avs-video, avs
video/MP2T,
video/mp4, mp4
video/mpeg, m1v|m2v|mpe|mpeg|mpg
video/quicktime, moov|mov|qt
@@ -318,101 +415,15 @@ video/x-atomic3d-feature, fmf
video/x-dl, dl
video/x-dv, dif|dv
video/x-fli, fli
video/x-flv, flv
video/x-isvideo, isu
!video/x-jng, jng
video/x-m4v, m4v
video/x-matroska, mkv
video/x-mng, mng
video/x-motion-jpeg, mjpg
video/x-ms-asf, asf|asx
video/x-ms-asf, asf|asx|wmv
video/x-msvideo, divx
video/x-qtc, qtc
video/x-sgi-movie, movie|mv
application/x-7z-compressed, 7z
application/vnd.openxmlformats-officedocument.wordprocessingml.document, docx
text/x-po, po
application/x-rpm, rpm
application/x-debian-package, deb
application/vnd.iccprofile, icm
application/dicom, dcm
image/x-exr, exr
application/vnd.iccprofile, icm
video/x-matroska, mkv
application/x-empty,
model/vnd.gdl, gdl
model/vnd.gs.gdl, gdsl
font/woff, woff
font/woff2, woff2
application/epub+zip, epub
application/x-mobipocket-ebook, mobi
audio/x-flac, flac
application/x-rar, rar
video/x-msvideo, divx
video/x-flv, flv
application/x-kdelnk,
text/x-tcl,
application/ogg, ogv
application/vnd.openxmlformats-officedocument.spreadsheetml.sheet, xlsx
application/vnd.ms-cab-compressed, cab
audio/mp4, m4b
!image/vnd.djvu, djvu
application/x-ms-reader, lit
application/CDFV2-corrupt,
text/x-vcard, vcf
application/x-innosetup,
application/winhelp, hlp
image/x-tga,
application/x-wine-extension-ini,
application/x-cbz, cbz
application/x-cbr, cbr
application/x-ms-compress-szdd, fon
application/x-atari-7800-rom, a78
application/x-nes-rom, nes
application/x-font-pfm, pfm
application/x-gettext-translation,
image/wmf,
application/pgp-keys,
image/x-3ds, 3ds
application/x-lz4, lz4
application/vnd.openxmlformats-officedocument.presentationml.presentation, pptx
application/vnd.oasis.opendocument.presentation, odp
application/x-msaccess, accdb
application/vnd.oasis.opendocument.spreadsheet, ods
audio/x-aiff, aiff|aif
text/x-ms-regedit, reg
application/x-gamecube-rom,
application/x-nintendo-ds-rom,
text/x-objective-c,
application/x-font-gdos,
application/x-apple-diskimage,
application/x-zstd, zst
video/x-m4v, m4v
message/news,
application/vnd.symbian.install,
application/x-lzh-compressed,
application/x-dosdriver,
application/vnd.tcpdump.pcap, pcap
x-epoc/x-sisx-app,
application/x-avira-qua,
video/MP2T,
application/x-snappy-framed,
application/x-lz4+json, jsonlz4
application/x-dmp, dmp
application/zlib, z
application/x-pgp-keyring,
application/x-gdbm,
application/x-font-pf2, pf2
application/x-zip,
application/x-coredump,
application/x-java-jmod, jmod
application/x-terminfo,
application/x-terminfo2,
application/x-arc,
application/vnd.lotus-1-2-3,
image/x-win-bitmap,
application/x-maxis-dbpf,
text/PGP,
audio/x-hx-aac-adts,
application/x-chrome-extension,
image/heic, heic
image/x-gem,
application/x-lzma, lzma
application/warc, warc
application/x-lz4, lz4
application/x-lzip, lz
application/x-lzop, lzo
1 application/arj arj
2 application/base64 mme
3 application/binhex hqx
4 application/book boo|book
5 application/CDFV2-corrupt
6 application/CDFV2 sdv
7 application/clariscad ccad
8 application/commonground dp
9 application/csv
10 application/dicom dcm
11 application/drafting drw
12 application/epub+zip epub
13 application/freeloader frl
14 application/futuresplash spl
15 application/groupwise vew
21 application/java-archive jar
22 application/java class
23 application/javascript
application/x-archive a
24 application/json json
25 application/marc mrc
26 application/mbedlet mbd
30 application/netmc mcp
31 application/octet-stream bin|dump|gpg
32 application/oda oda
33 application/ogg ogv
34 application/pdf pdf
35 application/pgp-keys
36 application/pgp-signature pgp
37 application/pkcs7-signature p7s
38 application/pkix-cert cer|crt
48 application/vnd.fdf fdf
49 application/vnd.font-fontforge-sfd sfd
50 application/vnd.hp-hpgl hgl|hpg|hpgl
51 application/vnd.iccprofile icm
52 application/vnd.iccprofile icm
53 application/vnd.lotus-1-2-3
54 application/vnd.ms-cab-compressed cab
55 application/vnd.ms-excel xlb|xlc|xll|xlm|xls|xlw
56 application/vnd.ms-fontobject eot
57 application/vnd.ms-opentype otf
63 application/vnd.oasis.opendocument.base odb
64 application/vnd.oasis.opendocument.formula odf
65 application/vnd.oasis.opendocument.graphics odg
66 application/vnd.oasis.opendocument.presentation odp
67 application/vnd.oasis.opendocument.spreadsheet ods
68 application/vnd.oasis.opendocument.text odt
69 application/vnd.openxmlformats-officedocument.presentationml.presentation pptx
70 application/vnd.openxmlformats-officedocument.spreadsheetml.sheet xlsx
71 application/vnd.openxmlformats-officedocument.wordprocessingml.document docx
72 application/vnd.symbian.install
73 application/vnd.tcpdump.pcap pcap
74 application/vnd.wap.wmlc wmlc
75 application/vnd.wap.wmlscriptc wmlsc
76 application/vnd.xara web
77 application/vocaltec-media-desc vmd
78 application/vocaltec-media-file vmf
79 application/warc warc
80 application/winhelp hlp
81 application/wordperfect6.0 w60
82 application/wordperfect6.1 w61
83 application/wordperfect wp|wp5|wp6|wpd
84 application/x-123 wk1
85 application/x-7z-compressed 7z
86 application/x-aim aim
87 application/x-apple-diskimage
88 application/x-arc
89 application/x-archive a
90 application/x-atari-7800-rom a78
91 application/x-authorware-bin aab
92 application/x-authorware-map aam
93 application/x-authorware-seg aas
94 application/x-avira-qua
95 application/x-bcpio bcpio
96 application/x-bittorrent torrent
97 application/x-bsh bsh
98 application/x-bytecode.python pyc
99 application/x-bzip2 boz|bz2
100 application/x-bzip bz
101 application/x-cbr cbr
102 application/x-cbz cbz
103 application/x-cdlink vcd
104 application/x-chat cha|chat
105 application/x-chrome-extension
106 application/x-cocoa cco
107 application/x-conference nsc
108 application/x-coredump
109 application/x-cpio cpio
110 application/x-dbf dbf
111 application/x-dbt
112 application/x-debian-package deb
113 application/x-deepv deepv
114 application/x-director dcr|dir|dxr
115 application/x-dmp dmp
116 application/x-dosdriver
117 application/x-dosexec dll
118 application/x-dvi dvi
119 application/x-elc elc
120 application/x-empty
121 application/x-envoy env|evy
122 application/x-esrehber es
123 application/x-excel xla|xld|xlk|xlt|xlv
124 application/x-executable exe
125 application/x-font-gdos
126 application/x-font-pf2 pf2
127 application/x-font-pfm pfm
128 application/x-font-sfn
129 application/x-font-ttf ttf|ttc
130 application/x-fptapplication/x-dbt
131 application/x-freelance pre
132 application/x-gamecube-rom
133 application/x-gdbm
134 application/x-gettext-translation
135 application/x-git
136 application/x-gsp gsp
137 application/x-gss gss
141 application/x-helpfile help
142 application/x-httpd-imap imap
143 application/x-ima ima
144 application/x-innosetup
145 application/x-internett-signup ins
146 application/x-inventor iv
147 application/x-ip2 ip
148 application/x-java-applet
149 application/x-java-commerce jcm
150 application/x-java-image
151 application/x-java-jmod jmod
152 application/x-java-keystore
153 application/x-kdelnk
154 application/x-koan skd|skm|skp|skt
155 application/x-latex latex|ltx
156 application/x-livescreen ivy
157 application/x-lotus wq1
158 application/x-lz4+json jsonlz4
159 application/x-lz4 lz4
160 application/x-lz4 lz4
161 application/x-lzh-compressed
162 application/x-lzh lzh
163 application/x-lzip lz
164 application/x-lzma lzma
165 application/x-lzop lzo
166 application/x-lzx lzx
167 application/x-mach-binary jnilib|dylib
168 application/x-mach-executable
169 application/x-magic-cap-package-1.0 mc$
170 application/x-mathcad mcd
171 application/x-maxis-dbpf
172 application/x-meme mm
173 application/x-midi midi
174 application/x-mif mif
175 application/x-mix-transfer nix
176 application/xml opf
177 application/x-mobipocket-ebook mobi
178 application/x-msaccess accdb
179 application/x-ms-compress-szdd fon
180 application/x-ms-pdb pdb
181 application/x-ms-reader lit
182 application/x-n64-rom z64
183 application/x-navi-animation ani
184 application/x-navidoc nvd
185 application/x-navimap map
186 application/x-navistyle stl
187 application/x-nes-rom nes
188 application/x-netcdf cdf|nc
189 application/x-newton-compatible-pkg pkg
190 application/x-nintendo-ds-rom
191 application/x-object o
192 application/x-omcdatamaker omcd
193 application/x-omc omc
194 application/x-omcregerator omcr
195 application/x-pagemaker pm4|pm5
196 application/x-pcl pcl
197 application/x-pgp-keyring
198 application/x-pixclscript plx
199 application/x-pkcs7-certreqresp p7r
200 application/x-pkcs7-signature p7a
201 application/x-project mpc|mpt|mpv|mpx
202 application/x-qpro wb1
203 application/x-rar rar
204 application/x-rpm rpm
205 application/x-sdp sdp
206 application/x-sea sea
207 application/x-seelogo sl
209 application/x-sharedlib so
210 application/x-shar shar
211 application/x-shockwave-flash swf
212 application/x-snappy-framed
213 application/x-sprite spr|sprite
214 application/x-sqlite3
215 application/x-stargallery-thm
216 application/x-stuffit sit
217 application/x-sv4cpio sv4cpio
218 application/x-sv4crc sv4crc
219 application/x-tar tar
220 application/x-tbook sbk|tbk
221 application/x-terminfo
222 application/x-terminfo2
223 application/x-texinfo texi|texinfo
224 application/x-tex-tfm tfm
225 application/x-ustar ustar
228 application/x-vnd.ls-xpix xpix
229 application/x-vrml vrml
230 application/x-wais-source src|wsrc
231 application/x-wine-extension-ini
232 application/x-wintalk wtk
233 application/x-world svr
234 application/x-wri wri
235 application/x-x509-ca-cert der
236 application/x-xz xz
237 application/x-zip
238 application/x-zstd zst
239 application/zip zip
240 application/zlib z
241 !audio/basic au
242 audio/it it
243 audio/make funk|my|pfunk
244 audio/midi kar
245 audio/mid rmi
246 audio/mp4 m4b
247 audio/mpeg m2a|mpa
248 audio/ogg ogg
249 audio/s3m s3m
251 audio/tsplayer tsp
252 audio/vnd.qcelp qcp
253 audio/voxware vox
254 audio/x-aiff aiff|aif
255 audio/x-flac flac
256 audio/x-gsm gsd|gsm
257 audio/x-hx-aac-adts
258 audio/x-jam jam
259 audio/x-liveaudio lam
260 audio/x-m4a m4a
268 audio/x-pn-realaudio ram|rm|rmm|rmp
269 audio/x-psid sid
270 audio/x-realaudio ra
271 audio/x-s3m
272 audio/x-twinvq-plugin vqe|vql
273 audio/x-twinvq vqf
274 audio/x-voc voc
275 audio/x-wav wav
276 !audio/x-xbox360-executable xex
277 !audio/x-xbox-executable xbe
278 font/otf
279 font/sfnt
280 font/woff2 woff2
281 font/woff woff
282 image/bmp
283 image/cmu-raster rast
284 image/fif fif
285 image/florian flo|turbot
286 image/g3fax g3
287 image/gif gif
288 image/heic heic
289 image/ief ief|iefs
290 image/jpeg jfif|jfif-tbnl|jpe|jpeg|jpg
291 image/jutvision jut
294 image/png png|x-png
295 !image/svg svg
296 !image/svg+xml
297 image/tiff
298 !image/vnd.adobe.photoshop psd
299 !image/vnd.djvu djvu
300 image/vnd.fpx fpx
301 image/vnd.microsoft.icon
302 image/vnd.rn-realflash rf
304 image/vnd.wap.wbmp wbmp
305 image/vnd.xiff xif
306 image/webp webp
307 image/wmf
308 image/x-3ds 3ds
309 image/x-award-bioslogo
310 image/x-cmu-raster ras
311 image/x-cur tga
312 image/x-dwg dwg|dxf|svf
313 image/x-eps
314 image/x-exr exr
315 image/x-gem
316 image/x-icns
317 !image/x-icon ico
318 image/x-jg art
326 image/x-portable-pixmap ppm
327 image/x-quicktime qif|qti|qtif
328 image/x-rgb rgb
329 image/x-tga
330 image/x-tiff tif|tiff
331 image/tiff image/x-win-bitmap
332 !image/x-xcf xcf
333 !image/x-xpixmap xpm
334 image/x-xwindowdump xwd
335 message/news
336 message/rfc822 mht|mhtml|mime
337 model/vnd.dwf dwf
338 model/vnd.gdl gdl
339 model/vnd.gs.gdl gdsl
340 model/vrml wrz
341 model/x-pov pov
342 text/asp asp
343 text/css css
text/x-sass sass
text/x-scss scss
344 text/html acgi|htm|html|htmls|htx|shtml
345 text/javascript js
346 text/mcf mcf
347 text/pascal pas
348 text/PGP
349 text/plain com|cmd|conf|def|g|idc|list|lst|mar|sdml|text|txt|md|groovy|license|properties|desktop|ini|rst|cmake|ipynb|readme|less|lo|go|yml|d|cs|hpp|srt|nfo|sfv|m3u|csv|eml
350 text/richtext rt|rtf|rtx
351 text/rtf
352 text/scriplet wsc
text/x-awk awk
!video/x-jng jng
video/x-mng mng
image/x-cur tga
image/x-xwindowdump xwd
!image/vnd.adobe.photoshop psd
353 text/tab-separated-values tsv
354 text/troff man|me|ms|roff|t|tr
355 text/uri-list uji|unis|uri|uris
361 text/x-Algol68
362 text/x-asm asm|s
363 text/x-audiosoft-intra aip
364 text/x-awk awk
365 text/x-bcpl
366 text/x-c c|cc|h
367 text/x-c++ cpp|cxx|c++
376 text/xml xml|pom|iml|plist
377 text/x-m m
378 text/x-msdos-batch bat
379 text/x-ms-regedit reg
380 text/x-objective-c
381 text/x-pascal p
382 text/x-perl pl
383 text/x-php php
384 text/x-po po
385 text/x-python py
386 text/x-ruby rb
387 text/x-sass sass
388 text/x-scss scss
389 text/x-server-parsed-html ssi
390 text/x-setext etx
391 text/x-sgml sgm|sgml
392 text/x-shellscript sh
393 text/x-speech talk
394 text/x-tcl
395 text/x-tex tex
396 text/x-uil uil
397 text/x-uuencode uue
398 text/x-vcalendar vcs
399 text/x-vcard vcf
400 video/animaflex afl
401 video/avi avi
402 video/avs-video avs
403 video/MP2T
404 video/mp4 mp4
405 video/mpeg m1v|m2v|mpe|mpeg|mpg
406 video/quicktime moov|mov|qt
415 video/x-dl dl
416 video/x-dv dif|dv
417 video/x-fli fli
418 video/x-flv flv
419 video/x-isvideo isu
420 !video/x-jng jng
421 video/x-m4v m4v
422 video/x-matroska mkv
423 video/x-mng mng
424 video/x-motion-jpeg mjpg
425 video/x-ms-asf asf|asx asf|asx|wmv
426 video/x-msvideo divx
427 video/x-qtc qtc
428 video/x-sgi-movie movie|mv
application/x-7z-compressed 7z
application/vnd.openxmlformats-officedocument.wordprocessingml.document docx
text/x-po po
application/x-rpm rpm
application/x-debian-package deb
application/vnd.iccprofile icm
application/dicom dcm
image/x-exr exr
application/vnd.iccprofile icm
video/x-matroska mkv
application/x-empty
model/vnd.gdl gdl
model/vnd.gs.gdl gdsl
font/woff woff
font/woff2 woff2
application/epub+zip epub
application/x-mobipocket-ebook mobi
audio/x-flac flac
application/x-rar rar
video/x-msvideo divx
video/x-flv flv
application/x-kdelnk
text/x-tcl
application/ogg ogv
application/vnd.openxmlformats-officedocument.spreadsheetml.sheet xlsx
application/vnd.ms-cab-compressed cab
audio/mp4 m4b
!image/vnd.djvu djvu
application/x-ms-reader lit
application/CDFV2-corrupt
text/x-vcard vcf
application/x-innosetup
application/winhelp hlp
image/x-tga
application/x-wine-extension-ini
application/x-cbz cbz
application/x-cbr cbr
application/x-ms-compress-szdd fon
application/x-atari-7800-rom a78
application/x-nes-rom nes
application/x-font-pfm pfm
application/x-gettext-translation
image/wmf
application/pgp-keys
image/x-3ds 3ds
application/x-lz4 lz4
application/vnd.openxmlformats-officedocument.presentationml.presentation pptx
application/vnd.oasis.opendocument.presentation odp
application/x-msaccess accdb
application/vnd.oasis.opendocument.spreadsheet ods
audio/x-aiff aiff|aif
text/x-ms-regedit reg
application/x-gamecube-rom
application/x-nintendo-ds-rom
text/x-objective-c
application/x-font-gdos
application/x-apple-diskimage
application/x-zstd zst
video/x-m4v m4v
message/news
application/vnd.symbian.install
application/x-lzh-compressed
application/x-dosdriver
application/vnd.tcpdump.pcap pcap
429 x-epoc/x-sisx-app
application/x-avira-qua
video/MP2T
application/x-snappy-framed
application/x-lz4+json jsonlz4
application/x-dmp dmp
application/zlib z
application/x-pgp-keyring
application/x-gdbm
application/x-font-pf2 pf2
application/x-zip
application/x-coredump
application/x-java-jmod jmod
application/x-terminfo
application/x-terminfo2
application/x-arc
application/vnd.lotus-1-2-3
image/x-win-bitmap
application/x-maxis-dbpf
text/PGP
audio/x-hx-aac-adts
application/x-chrome-extension
image/heic heic
image/x-gem
application/x-lzma lzma
application/warc warc
application/x-lz4 lz4
application/x-lzip lz
application/x-lzop lzo

View File

@@ -1,13 +1,17 @@
{
"properties": {
"_tie": {
"type": "keyword",
"doc_values": true
},
"_depth": {
"type": "integer"
},
"path": {
"type": "text",
"analyzer": "path_analyzer",
"copy_to": "suggest-path"
},
"suggest-path": {
"type": "completion",
"analyzer": "case_insensitive_kw_analyzer"
"fielddata": true,
"index_prefixes": {}
},
"mime": {
"type": "keyword"
@@ -105,6 +109,30 @@
},
"tag": {
"type": "keyword"
},
"exif_make": {
"type": "text"
},
"exif_model": {
"type": "text"
},
"exif:software": {
"type": "text"
},
"exif_exposure_time": {
"type": "keyword"
},
"exif_fnumber": {
"type": "keyword"
},
"exif_iso_speed_ratings": {
"type": "keyword"
},
"exif_focal_length": {
"type": "keyword"
},
"exif_user_comment": {
"type": "text"
}
}
}

10
schema/pipeline.json Normal file
View File

@@ -0,0 +1,10 @@
{
"description": "Copy _id to _tie, save path depth",
"processors": [
{
"script": {
"source": "ctx._tie = ctx._id; ctx._depth = ctx.path.length() == 0 ? 0 : 1 + ctx.path.length() - ctx.path.replace(\"/\", \"\").length();"
}
}
]
}

View File

@@ -54,6 +54,11 @@ script.painless.regex.enabled: true
```
Or, if you're using docker add `-e "script.painless.regex.enabled=true"`
**Tag color**
You can specify the color for an individual tag by appending an
hexadecimal color code (`#RRGGBBAA`) to the tag name.
### Examples
If `(20XX)` is in the file name, add the `year.<year>` tag:
@@ -115,3 +120,33 @@ if (ctx._source.path != "") {
tags.add("studio." + names[names.length-1]);
}
```
Parse `EXIF:F Number` tag
```Java
if (ctx._source?.exif_fnumber != null) {
String[] values = ctx._source.exif_fnumber.splitOnToken(' ');
String aperture = String.valueOf(Float.parseFloat(values[0]) / Float.parseFloat(values[1]));
if (aperture == "NaN") {
aperture = "0,0";
}
tags.add("Aperture.f/" + aperture.replace(".", ","));
}
```
Display year and months from `EXIF:DateTime` tag
```Java
if (ctx._source?.exif_datetime != null) {
SimpleDateFormat parser = new SimpleDateFormat("yyyy:MM:dd HH:mm:ss");
Date date = parser.parse(ctx._source.exif_datetime);
SimpleDateFormat yp = new SimpleDateFormat("yyyy");
SimpleDateFormat mp = new SimpleDateFormat("MMMMMMMMM");
String year = yp.format(date);
String month = mp.format(date);
tags.add("Month." + month);
tags.add("Year." + year);
}
```

View File

@@ -13,7 +13,7 @@ mv mupdf/build/release/libmupdf-third.a .
# openjp2
cd openjpeg
cmake . -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_FLAGS="-O3 -march=native -DNDEBUG -fPIC"
cmake . -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_FLAGS="-O3 -DNDEBUG -fPIC"
make -j $THREADS
cd ..
mv openjpeg/bin/libopenjp2.a .
@@ -75,7 +75,7 @@ cd tesseract
mkdir build
cd build
cmake -DSTATIC=on -DBUILD_TRAINING_TOOLS=off -DBUILD_TESTS=off -DCMAKE_BUILD_TYPE=Release \
-DCMAKE_CXX_FLAGS="-fPIC" ..
-DCMAKE_CXX_FLAGS="-fPIC" -DAUTO_OPTIMIZE=off ..
make -j $THREADS
cd ../..
mv tesseract/build/libtesseract.a .

View File

@@ -1,6 +1,9 @@
import json
files = [
"schema/mappings.json",
"schema/settings.json",
"schema/pipeline.json",
]
@@ -9,6 +12,6 @@ def clean(filepath):
for file in files:
with open(file, "rb") as f:
data = f.read()
with open(file, "r") as f:
data = json.dumps(json.load(f), separators=(",", ":")).encode()
print("char %s[%d] = {%s};" % (clean(file), len(data), ",".join(str(int(b)) for b in data)))

BIN
sist2.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 889 KiB

View File

@@ -1,8 +1,6 @@
#include "cli.h"
#include "ctx.h"
#include <tesseract/capi.h>
#define DEFAULT_OUTPUT "index.sist2/"
#define DEFAULT_CONTENT_SIZE 32768
#define DEFAULT_QUALITY 5
@@ -71,8 +69,8 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
if (args->incremental != NULL) {
abs_path = abspath(args->incremental);
if (abs_path == NULL) {
fprintf(stderr, "File not found: %s\n", args->incremental);
return 1;
sist_log("main.c", SIST_WARNING, "Could not open original index! Disabled incremental scan feature.");
args->incremental = NULL;
}
}
@@ -162,6 +160,26 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
args->tesseract_path = path;
}
if (args->exclude_regex != NULL) {
const char *error;
int error_offset;
pcre *re = pcre_compile(args->exclude_regex, 0, &error, &error_offset, 0);
if (error != NULL) {
LOG_FATALF("cli.c", "pcre_compile returned error: %s (offset:%d)", error, error_offset)
}
pcre_extra *re_extra = pcre_study(re, 0, &error);
if (error != NULL) {
LOG_FATALF("cli.c", "pcre_study returned error: %s", error)
}
ScanCtx.exclude = re;
ScanCtx.exclude_extra = re_extra;
} else {
ScanCtx.exclude = NULL;
}
LOG_DEBUGF("cli.c", "arg quality=%f", args->quality)
LOG_DEBUGF("cli.c", "arg size=%d", args->size)
LOG_DEBUGF("cli.c", "arg content_size=%d", args->content_size)
@@ -175,6 +193,8 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
LOG_DEBUGF("cli.c", "arg archive=%s", args->archive)
LOG_DEBUGF("cli.c", "arg tesseract_lang=%s", args->tesseract_lang)
LOG_DEBUGF("cli.c", "arg tesseract_path=%s", args->tesseract_path)
LOG_DEBUGF("cli.c", "arg exclude=%s", args->exclude_regex)
LOG_DEBUGF("cli.c", "arg fast=%d", args->fast)
return 0;
}
@@ -218,7 +238,7 @@ int index_args_validate(index_args_t *args, int argc, const char **argv) {
args->script = malloc(info.st_size + 1);
res = read(fd, args->script, info.st_size);
if (res == -1) {
if (res < 0) {
fprintf(stderr, "Error reading script file '%s': %s\n", args->script_path, strerror(errno));
return 1;
}

View File

@@ -18,10 +18,14 @@ typedef struct scan_args {
archive_mode_t archive_mode;
char *tesseract_lang;
const char *tesseract_path;
char *exclude_regex;
int fast;
} scan_args_t;
scan_args_t *scan_args_create();
void scan_args_destroy(scan_args_t *args);
int scan_args_validate(scan_args_t *args, int argc, const char **argv);
typedef struct index_args {
@@ -45,12 +49,15 @@ typedef struct web_args {
} web_args_t;
index_args_t *index_args_create();
void index_args_destroy(index_args_t *args);
web_args_t *web_args_create();
void web_args_destroy(web_args_t *args);
int index_args_validate(index_args_t *args, int argc, const char **argv);
int web_args_validate(web_args_t *args, int argc, const char **argv);
#endif

View File

@@ -28,7 +28,10 @@ struct {
pthread_mutex_t mupdf_mu;
char * tesseract_lang;
char * tesseract_path;
const char * tesseract_path;
pcre *exclude;
pcre_extra *exclude_extra;
int fast;
} ScanCtx;
struct {

View File

@@ -1,12 +1,6 @@
#include "elastic.h"
#include "src/ctx.h"
#include <stdlib.h>
#include "web.h"
#include <stdio.h>
#include <string.h>
#include <cJSON/cJSON.h>
#include "static_generated.c"
@@ -20,6 +14,8 @@ typedef struct es_indexer {
static es_indexer_t *Indexer;
void delete_queue(int max);
void print_json(cJSON *document, const char uuid_str[UUID_STR_LEN]) {
cJSON *line = cJSON_CreateObject();
@@ -64,7 +60,7 @@ void execute_update_script(const char *script, const char index_id[UUID_STR_LEN]
cJSON *term_obj = cJSON_AddObjectToObject(query, "term");
cJSON_AddStringToObject(term_obj, "index", index_id);
char * str = cJSON_Print(body);
char *str = cJSON_Print(body);
char bulk_url[4096];
snprintf(bulk_url, 4096, "%s/sist2/_update_by_query?pretty", Indexer->es_url);
@@ -87,24 +83,18 @@ void execute_update_script(const char *script, const char index_id[UUID_STR_LEN]
cJSON_Delete(resp);
}
void elastic_flush() {
if (Indexer == NULL) {
Indexer = create_indexer(IndexCtx.es_url);
}
void *create_bulk_buffer(int max, int *count, size_t *buf_len) {
es_bulk_line_t *line = Indexer->line_head;
int count = 0;
*count = 0;
size_t buf_size = 0;
size_t buf_cur = 0;
char *buf = malloc(1);
while (line != NULL) {
while (line != NULL && *count < max) {
char action_str[512];
snprintf(action_str, 512,
"{\"index\":{\"_id\":\"%s\", \"_type\":\"_doc\", \"_index\":\"sist2\"}}\n", line->uuid_str);
"{\"index\":{\"_id\":\"%s\", \"_type\":\"_doc\", \"_index\":\"sist2\"}}\n", line->uuid_str);
size_t action_str_len = strlen(action_str);
size_t line_len = strlen(line->line);
@@ -116,46 +106,105 @@ void elastic_flush() {
memcpy(buf + buf_cur, line->line, line_len);
buf_cur += line_len;
es_bulk_line_t *tmp = line;
line = line->next;
free(tmp);
count++;
(*count)++;
}
buf = realloc(buf, buf_size + 1);
*(buf+buf_cur) = '\0';
*(buf + buf_cur) = '\0';
Indexer->line_head = NULL;
Indexer->line_tail = NULL;
Indexer->queued = 0;
*buf_len = buf_cur;
return buf;
}
void *print_errors(response_t *r) {
cJSON *ret_json = cJSON_Parse(r->body);
if (cJSON_GetObjectItem(ret_json, "errors")->valueint != 0) {
cJSON *err;
cJSON_ArrayForEach(err, cJSON_GetObjectItem(ret_json, "items")) {
if (cJSON_GetObjectItem(cJSON_GetObjectItem(err, "index"), "status")->valueint != 201) {
char *str = cJSON_Print(err);
LOG_ERRORF("elastic.c", "%s\n", str);
cJSON_free(str);
}
}
}
cJSON_Delete(ret_json);
}
void _elastic_flush(int max) {
size_t buf_len;
int count;
void *buf = create_bulk_buffer(max, &count, &buf_len);
char bulk_url[4096];
snprintf(bulk_url, 4096, "%s/sist2/_bulk", Indexer->es_url);
snprintf(bulk_url, 4096, "%s/sist2/_bulk?pipeline=tie", Indexer->es_url);
response_t *r = web_post(bulk_url, buf, "Content-Type: application/x-ndjson");
if (r->status_code == 0) {
LOG_FATALF("elastic.c", "Could not connect to %s, make sure that elasticsearch is running!\n", IndexCtx.es_url)
}
LOG_INFOF("elastic.c", "Indexed %d documents (%zukB) <%d>", count, buf_cur / 1024, r->status_code);
if (r->status_code == 413) {
cJSON *ret_json = cJSON_Parse(r->body);
if (cJSON_GetObjectItem(ret_json, "errors")->valueint != 0) {
cJSON *err;
cJSON_ArrayForEach(err, cJSON_GetObjectItem(ret_json, "items")) {
if (cJSON_GetObjectItem(cJSON_GetObjectItem(err, "index"), "status")->valueint != 201) {
char* str = cJSON_Print(err);
LOG_ERRORF("elastic.c", "%s\n", str);
cJSON_free(str);
if (max <= 1) {
LOG_ERRORF("elastic.c", "Single document too large, giving up: {%s}", Indexer->line_head->uuid_str)
free_response(r);
free(buf);
delete_queue(1);
if (Indexer->queued != 0) {
elastic_flush();
}
return;
}
LOG_WARNINGF("elastic.c", "Payload too large, retrying (%d documents)", count);
free_response(r);
free(buf);
_elastic_flush(max / 2);
return;
} else if (r->status_code != 200) {
print_errors(r);
delete_queue(Indexer->queued);
} else {
print_errors(r);
LOG_INFOF("elastic.c", "Indexed %d documents (%zukB) <%d>", count, buf_len / 1024, r->status_code);
delete_queue(max);
if (Indexer->queued != 0) {
elastic_flush();
}
}
cJSON_Delete(ret_json);
free_response(r);
free(buf);
}
void delete_queue(int max) {
for (int i = 0; i < max; i++) {
es_bulk_line_t *tmp = Indexer->line_head;
Indexer->line_head = tmp->next;
if (Indexer->line_head == NULL) {
Indexer->line_tail = NULL;
} else {
free(tmp);
}
Indexer->queued -= 1;
}
}
void elastic_flush() {
if (Indexer == NULL) {
Indexer = create_indexer(IndexCtx.es_url);
}
_elastic_flush(Indexer->queued);
}
void elastic_index_line(es_bulk_line_t *line) {
if (Indexer == NULL) {
@@ -192,7 +241,7 @@ es_indexer_t *create_indexer(const char *url) {
return indexer;
}
void destroy_indexer(char * script, char index_id[UUID_STR_LEN]) {
void destroy_indexer(char *script, char index_id[UUID_STR_LEN]) {
char url[4096];
@@ -245,6 +294,11 @@ void elastic_init(int force_reset) {
LOG_INFOF("elastic.c", "Close index <%d>", r->status_code);
free_response(r);
snprintf(url, 4096, "%s/_ingest/pipeline/tie", IndexCtx.es_url);
r = web_put(url, pipeline_json, "Content-Type: application/json");
LOG_INFOF("elastic.c", "Create pipeline <%d>", r->status_code);
free_response(r);
snprintf(url, 4096, "%s/sist2/_settings", IndexCtx.es_url);
r = web_put(url, settings_json, "Content-Type: application/json");
LOG_INFOF("elastic.c", "Update settings <%d>", r->status_code);
@@ -278,7 +332,7 @@ cJSON *elastic_get_document(const char *uuid_str) {
char *elastic_get_status() {
char url[4096];
snprintf(url, 4096,
"%s/_cluster/state/metadata/sist2?filter_path=metadata.indices.*.state", WebCtx.es_url);
"%s/_cluster/state/metadata/sist2?filter_path=metadata.indices.*.state", WebCtx.es_url);
response_t *r = web_get(url);
cJSON *json = NULL;

File diff suppressed because one or more lines are too long

View File

@@ -39,8 +39,8 @@ void write_index_descriptor(char *path, index_descriptor_t *desc) {
cJSON_AddNumberToObject(json, "timestamp", (double) desc->timestamp);
int fd = open(path, O_CREAT | O_WRONLY, S_IRUSR | S_IWUSR);
if (fd == -1) {
perror(path);
if (fd < 0) {
LOG_FATALF("serialize.c", "Could not write index descriptor: %s", strerror(errno));
}
char *str = cJSON_Print(json);
write(fd, str, strlen(str));
@@ -57,7 +57,7 @@ index_descriptor_t read_index_descriptor(char *path) {
int fd = open(path, O_RDONLY);
if (fd == -1) {
LOG_FATAL("serialize.c", "Invalid/corrupt index (Could not find descriptor)\n")
LOG_FATALF("serialize.c", "Invalid/corrupt index (Could not find descriptor): %s: %s\n", path ,strerror(errno))
}
char *buf = malloc(info.st_size + 1);
@@ -134,6 +134,8 @@ char *get_meta_key_text(enum metakey meta_key) {
return "exif_iso_speed_ratings";
case MetaExifModel:
return "exif_model";
case MetaExifDateTime:
return "exif_datetime";
default:
return NULL;
}
@@ -183,7 +185,7 @@ void write_document(document_t *doc) {
int res = write(index_fd, buf.buf, buf.cur);
if (res == -1) {
perror("write");
LOG_FATALF("serialize.c", "Could not write document: %s", strerror(errno))
}
ScanCtx.stat_index_size += buf.cur;
dyn_buffer_destroy(&buf);
@@ -191,6 +193,8 @@ void write_document(document_t *doc) {
void thread_cleanup() {
close(index_fd);
cleanup_parse();
cleanup_font();
}
@@ -212,7 +216,12 @@ void read_index_bin(const char *path, const char *index_id, index_func func) {
char uuid_str[UUID_STR_LEN];
uuid_unparse(line.uuid, uuid_str);
cJSON_AddStringToObject(document, "mime", mime_get_mime_text(line.mime));
const char* mime_text = mime_get_mime_text(line.mime);
if (mime_text == NULL) {
cJSON_AddNullToObject(document, "mime");
} else {
cJSON_AddStringToObject(document, "mime", mime_get_mime_text(line.mime));
}
cJSON_AddNumberToObject(document, "size", (double) line.size);
cJSON_AddNumberToObject(document, "mtime", line.mtime);
@@ -278,6 +287,7 @@ void read_index_bin(const char *path, const char *index_id, index_func func) {
case MetaExifFocalLength:
case MetaExifUserComment:
case MetaExifIsoSpeedRatings:
case MetaExifDateTime:
case MetaExifModel:
case MetaTitle: {
buf.cur = 0;
@@ -326,7 +336,7 @@ void read_index_json(const char *path, UNUSED(const char *index_id), index_func
char *line = NULL;
size_t len;
size_t read = getline(&line, &len, file);
if (read == -1) {
if (read < 0) {
if (line) {
free(line);
}

View File

@@ -15,8 +15,7 @@ store_t *store_create(char *path) {
);
if (open_ret != 0) {
fprintf(stderr, "Error while opening store: %s (%s)\n", mdb_strerror(open_ret), path);
exit(1);
LOG_FATALF("store.c", "Error while opening store: %s (%s)\n", mdb_strerror(open_ret), path)
}
store->size = (size_t) 1024 * 1024 * 5;
@@ -82,7 +81,7 @@ void store_write(store_t *store, char *key, size_t key_len, char *buf, size_t bu
pthread_rwlock_unlock(&store->lock);
if (put_ret != 0) {
printf("%s\n", mdb_strerror(put_ret));
LOG_ERROR("store.c", mdb_strerror(put_ret))
}
}

View File

@@ -28,8 +28,18 @@ parse_job_t *create_fs_parse_job(const char *filepath, const struct stat *info,
return job;
}
int sub_strings[30];
#define EXCLUDED(str) (pcre_exec(ScanCtx.exclude, ScanCtx.exclude_extra, filepath, strlen(filepath), 0, 0, sub_strings, sizeof(sub_strings)) >= 0)
int handle_entry(const char *filepath, const struct stat *info, int typeflag, struct FTW *ftw) {
if (ftw->level <= ScanCtx.depth && typeflag == FTW_F && S_ISREG(info->st_mode)) {
if (typeflag == FTW_F && S_ISREG(info->st_mode) && ftw->level <= ScanCtx.depth) {
if (ScanCtx.exclude != NULL && EXCLUDED(filepath)) {
LOG_DEBUGF("walk.c", "Excluded: %s", filepath)
return 0;
}
parse_job_t *job = create_fs_parse_job(filepath, info, ftw->base);
tpool_add_work(ScanCtx.pool, parse, job);
}

View File

@@ -6,7 +6,7 @@
#define EPILOG "Made by simon987 <me@simon987.net>. Released under GPL-3.0"
static const char *const Version = "1.2.5";
static const char *const Version = "1.3.2";
static const char *const usage[] = {
"sist2 scan [OPTION]... PATH",
"sist2 index [OPTION]... INDEX",
@@ -17,7 +17,6 @@ static const char *const usage[] = {
void global_init() {
curl_global_init(CURL_GLOBAL_NOTHING);
av_log_set_level(AV_LOG_QUIET);
opcInitLibrary();
}
void init_dir(const char *dirpath) {
@@ -49,15 +48,19 @@ void sist2_scan(scan_args_t *args) {
strncpy(ScanCtx.index.path, args->output, sizeof(ScanCtx.index.path));
strncpy(ScanCtx.index.desc.name, args->name, sizeof(ScanCtx.index.desc.name));
strncpy(ScanCtx.index.desc.root, args->path, sizeof(ScanCtx.index.desc.root));
strncpy(ScanCtx.index.desc.rewrite_url, args->rewrite_url, sizeof(ScanCtx.index.desc.rewrite_url));
ScanCtx.index.desc.root_len = (short) strlen(ScanCtx.index.desc.root);
ScanCtx.tesseract_lang = args->tesseract_lang;
ScanCtx.tesseract_path = args->tesseract_path;
ScanCtx.fast = args->fast;
init_dir(ScanCtx.index.path);
ScanCtx.mime_table = mime_get_mime_table();
ScanCtx.ext_table = mime_get_ext_table();
cbr_init();
char store_path[PATH_MAX];
snprintf(store_path, PATH_MAX, "%sthumbs", ScanCtx.index.path);
mkdir(store_path, S_IWUSR | S_IRUSR | S_IXUSR);
@@ -71,9 +74,18 @@ void sist2_scan(scan_args_t *args) {
DIR *dir = opendir(args->incremental);
if (dir == NULL) {
perror("opendir");
return;
LOG_FATALF("main.c", "Could not open original index for incremental scan: %s", strerror(errno))
}
char descriptor_path[PATH_MAX];
snprintf(descriptor_path, PATH_MAX, "%s/descriptor.json", args->incremental);
index_descriptor_t original_desc = read_index_descriptor(descriptor_path);
if (strcmp(original_desc.version, Version) != 0) {
LOG_FATALF("main.c", "Version mismatch! Index is %s but executable is %s/%s", original_desc.version,
Version, INDEX_VERSION_EXTERNAL)
}
struct dirent *de;
while ((de = readdir(dir)) != NULL) {
if (strncmp(de->d_name, "_index_", sizeof("_index_") - 1) == 0) {
@@ -84,7 +96,7 @@ void sist2_scan(scan_args_t *args) {
}
closedir(dir);
printf("Loaded %d items in to mtime table.", g_hash_table_size(ScanCtx.original_table));
LOG_INFOF("main.c", "Loaded %d items in to mtime table.", g_hash_table_size(ScanCtx.original_table))
}
ScanCtx.pool = tpool_create(args->threads, thread_cleanup);
@@ -136,15 +148,13 @@ void sist2_index(index_args_t *args) {
LOG_DEBUGF("main.c", "descriptor version %s (%s)", desc.version, desc.type)
if (strcmp(desc.version, Version) != 0 && strcmp(desc.version, INDEX_VERSION_EXTERNAL) != 0) {
fprintf(stderr, "Version mismatch! Index is %s but executable is %s/%s\n",
desc.version, Version, INDEX_VERSION_EXTERNAL);
return;
LOG_FATALF("main.c", "Version mismatch! Index is %s but executable is %s/%s", desc.version, Version,
INDEX_VERSION_EXTERNAL)
}
DIR *dir = opendir(args->index_path);
if (dir == NULL) {
perror("opendir");
return;
LOG_FATALF("main.c", "Could not open index %s: %s", args->index_path, strerror(errno))
}
index_func f;
@@ -237,6 +247,8 @@ int main(int argc, const char *argv[]) {
"shallow: Don't parse archives inside archives. DEFAULT: recurse"),
OPT_STRING(0, "ocr", &scan_args->tesseract_lang, "Tesseract language (use tesseract --list-langs to see "
"which are installed on your machine)"),
OPT_STRING('e', "exclude", &scan_args->exclude_regex, "Files that match this regex will not be scanned"),
OPT_BOOLEAN(0, "fast", &scan_args->fast, "Only index file names & mime type"),
OPT_GROUP("Index options"),
OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url with port. DEFAULT=http://localhost:9200"),
@@ -262,7 +274,7 @@ int main(int argc, const char *argv[]) {
if (arg_version) {
printf(Version);
exit(0);
goto end;
}
if (LogCtx.very_verbose != 0) {
@@ -274,22 +286,20 @@ int main(int argc, const char *argv[]) {
if (argc == 0) {
argparse_usage(&argparse);
return 1;
goto end;
} else if (strcmp(argv[0], "scan") == 0) {
int err = scan_args_validate(scan_args, argc, argv);
if (err != 0) {
return err;
goto end;
}
sist2_scan(scan_args);
}
else if (strcmp(argv[0], "index") == 0) {
} else if (strcmp(argv[0], "index") == 0) {
int err = index_args_validate(index_args, argc, argv);
if (err != 0) {
return err;
goto end;
}
sist2_index(index_args);
@@ -297,20 +307,19 @@ int main(int argc, const char *argv[]) {
int err = web_args_validate(web_args, argc, argv);
if (err != 0) {
return err;
goto end;
}
sist2_web(web_args);
}
else {
} else {
fprintf(stderr, "Invalid command: '%s'\n", argv[0]);
argparse_usage(&argparse);
return 1;
goto end;
}
printf("\n");
end:
scan_args_destroy(scan_args);
index_args_destroy(index_args);
web_args_destroy(web_args);

View File

@@ -1,8 +1,6 @@
#include "arc.h"
#include "src/ctx.h"
#define ARC_BUF_SIZE 8192
int should_parse_filtered_file(const char *filepath, int ext) {
char tmp[PATH_MAX * 2];

View File

@@ -2,6 +2,7 @@
#define SIST2_ARC_H
#include "src/sist.h"
#define ARC_BUF_SIZE 8192
int should_parse_filtered_file(const char *filepath, int ext);

52
src/parsing/cbr.c Normal file
View File

@@ -0,0 +1,52 @@
#include "cbr.h"
#include "src/ctx.h"
unsigned int cbr_mime;
unsigned int cbz_mime;
void cbr_init() {
cbr_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/x-cbr");
cbz_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/x-cbz");
}
int is_cbr(unsigned int mime) {
return mime == cbr_mime;
}
void parse_cbr(void *buf, size_t buf_len, document_t *doc) {
char *out_buf = malloc(buf_len * 2);
size_t out_buf_used = 0;
struct archive *rar_in = archive_read_new();
archive_read_support_filter_none(rar_in);
archive_read_support_format_rar(rar_in);
archive_read_open_memory(rar_in, buf, buf_len);
struct archive *zip_out = archive_write_new();
archive_write_set_format_zip(zip_out);
archive_write_open_memory(zip_out, out_buf, buf_len * 2, &out_buf_used);
struct archive_entry *entry;
while (archive_read_next_header(rar_in, &entry) == ARCHIVE_OK) {
archive_write_header(zip_out, entry);
char arc_buf[ARC_BUF_SIZE];
int len = archive_read_data(rar_in, arc_buf, ARC_BUF_SIZE);
while (len > 0) {
archive_write_data(zip_out, arc_buf, len);
len = archive_read_data(rar_in, arc_buf, ARC_BUF_SIZE);
}
}
archive_write_close(zip_out);
archive_write_free(zip_out);
archive_read_close(rar_in);
archive_read_free(rar_in);
doc->mime = cbz_mime;
parse_pdf(out_buf, out_buf_used, doc);
doc->mime = cbr_mime;
free(out_buf);
}

12
src/parsing/cbr.h Normal file
View File

@@ -0,0 +1,12 @@
#ifndef SIST2_CBR_H
#define SIST2_CBR_H
#include "src/sist.h"
void cbr_init();
int is_cbr(unsigned int mime);
void parse_cbr(void *buf, size_t buf_len, document_t *doc);
#endif

View File

@@ -1,49 +1,31 @@
#include "doc.h"
#include "src/ctx.h"
void dump_text(mceTextReader_t *reader, dyn_buffer_t *buf) {
mce_skip_attributes(reader);
mce_start_children(reader) {
mce_start_element(reader, NULL, _X("t")) {
mce_skip_attributes(reader);
mce_start_children(reader) {
mce_start_text(reader) {
char *str = (char *) xmlTextReaderConstValue(reader->reader);
dyn_buffer_append_string(buf, str);
dyn_buffer_write_char(buf, ' ');
} mce_end_text(reader);
} mce_end_children(reader);
} mce_end_element(reader);
mce_start_element(reader, NULL, NULL) {
dump_text(reader, buf);
} mce_end_element(reader);
} mce_end_children(reader)
}
#define STR_STARTS_WITH(x, y) (strncmp(y, x, sizeof(y) - 1) == 0)
__always_inline
int should_read_part(opcPart part) {
static int should_read_part(const char *part) {
char *part_name = (char *) part;
LOG_DEBUGF("doc.c", "Got part : %s", part)
if (part == NULL) {
return FALSE;
}
if ( // Word
strcmp(part_name, "word/document.xml") == 0
|| strncmp(part_name, "word/footer", sizeof("word/footer") - 1) == 0
|| strncmp(part_name, "word/header", sizeof("word/header") - 1) == 0
STR_STARTS_WITH(part, "word/document.xml")
|| STR_STARTS_WITH(part, "word/footnotes.xml")
|| STR_STARTS_WITH(part, "word/endnotes.xml")
|| STR_STARTS_WITH(part, "word/footer")
|| STR_STARTS_WITH(part, "word/header")
// PowerPoint
|| strncmp(part_name, "ppt/slides/slide", sizeof("ppt/slides/slide") - 1) == 0
|| strncmp(part_name, "ppt/notesSlides/notesSlide", sizeof("ppt/notesSlides/notesSlide") - 1) == 0
|| STR_STARTS_WITH(part, "ppt/slides/slide")
|| STR_STARTS_WITH(part, "ppt/notesSlides/slide")
// Excel
|| strncmp(part_name, "xl/worksheets/sheet", sizeof("xl/worksheets/sheet") - 1) == 0
|| strcmp(part_name, "xl/sharedStrings.xml") == 0
|| strcmp(part_name, "xl/workbook.xml") == 0
|| STR_STARTS_WITH(part, "xl/worksheets/sheet")
|| STR_STARTS_WITH(part, "xl/sharedStrings.xml")
|| STR_STARTS_WITH(part, "xl/workbook.xml")
) {
return TRUE;
}
@@ -51,31 +33,64 @@ int should_read_part(opcPart part) {
return FALSE;
}
int extract_text(xmlDoc *xml, xmlNode *node, text_buffer_t *buf) {
//TODO: Check which nodes are likely to have a 't' child, and ignore nodes that aren't
xmlErrorPtr err = xmlGetLastError();
if (err != NULL) {
if (err->level == XML_ERR_FATAL) {
LOG_ERRORF("doc.c", "Got fatal XML error while parsing document: %s", err->message)
return -1;
} else {
LOG_ERRORF("doc.c", "Got recoverable XML error while parsing document: %s", err->message)
}
}
for (xmlNode *child = node; child; child = child->next) {
if (*child->name == 't' && *(child->name + 1) == '\0') {
xmlChar *text = xmlNodeListGetString(xml, child->xmlChildrenNode, 1);
if (text) {
text_buffer_append_string0(buf, (char *) text);
text_buffer_append_char(buf, ' ');
xmlFree(text);
}
}
extract_text(xml, child->children, buf);
}
}
int xml_io_read(void *context, char *buffer, int len) {
struct archive *a = context;
return archive_read_data(a, buffer, len);
}
int xml_io_close(UNUSED(void *context)) {
//noop
return 0;
}
__always_inline
void read_part(opcContainer *c, dyn_buffer_t *buf, opcPart part, document_t *doc) {
static int read_part(struct archive *a, text_buffer_t *buf, document_t *doc) {
mceTextReader_t reader;
int options;
if (LogCtx.very_verbose) {
options = XML_PARSE_NONET;
} else {
options = XML_PARSE_NOWARNING | XML_PARSE_NOERROR | XML_PARSE_NONET;
xmlDoc *xml = xmlReadIO(xml_io_read, xml_io_close, a, "/", NULL, XML_PARSE_RECOVER | XML_PARSE_NOWARNING | XML_PARSE_NOERROR | XML_PARSE_NONET);
if (xml == NULL) {
LOG_ERROR(doc->filepath, "Could not parse XML")
return -1;
}
int ret = opcXmlReaderOpen(c, &reader, part, NULL, "UTF-8", options);
if (ret != OPC_ERROR_NONE) {
LOG_ERRORF(doc->filepath, "(doc.c) opcXmlReaderOpen() returned error code %d", ret);
return;
xmlNode *root = xmlDocGetRootElement(xml);
if (root == NULL) {
LOG_ERROR(doc->filepath, "Empty document")
xmlFreeDoc(xml);
return -1;
}
mce_start_document(&reader) {
mce_start_element(&reader, NULL, NULL) {
dump_text(&reader, buf);
} mce_end_element(&reader);
} mce_end_document(&reader);
extract_text(xml, root, buf);
xmlFreeDoc(xml);
mceTextReaderCleanup(&reader);
return 0;
}
void parse_doc(void *mem, size_t mem_len, document_t *doc) {
@@ -84,31 +99,42 @@ void parse_doc(void *mem, size_t mem_len, document_t *doc) {
return;
}
opcContainer *c = opcContainerOpenMem(mem, mem_len, OPC_OPEN_READ_ONLY, NULL);
if (c == NULL) {
LOG_ERROR(doc->filepath, "(doc.c) Couldn't open document with opcContainerOpenMem()");
struct archive *a = archive_read_new();
archive_read_support_format_zip(a);
int ret = archive_read_open_memory(a, mem, mem_len);
if (ret != ARCHIVE_OK) {
LOG_ERRORF(doc->filepath, "Could not read archive: %s", archive_error_string(a))
archive_read_free(a);
return;
}
dyn_buffer_t buf = dyn_buffer_create();
text_buffer_t buf = text_buffer_create(ScanCtx.content_size);
opcPart part = opcPartGetFirst(c);
do {
if (should_read_part(part)) {
read_part(c, &buf, part, doc);
struct archive_entry *entry;
while (archive_read_next_header(a, &entry) == ARCHIVE_OK) {
if (S_ISREG(archive_entry_stat(entry)->st_mode)) {
const char *path = archive_entry_pathname(entry);
if (should_read_part(path)) {
ret = read_part(a, &buf, doc);
if (ret != 0) {
break;
}
}
}
} while ((part = opcPartGetNext(c, part)));
}
opcContainerClose(c, OPC_CLOSE_NOW);
if (buf.dyn_buffer.cur > 0) {
text_buffer_terminate_string(&buf);
if (buf.cur > 0) {
dyn_buffer_write_char(&buf, '\0');
meta_line_t *meta = malloc(sizeof(meta_line_t) + buf.cur);
meta_line_t *meta = malloc(sizeof(meta_line_t) + buf.dyn_buffer.cur);
meta->key = MetaContent;
strcpy(meta->strval, buf.buf);
strcpy(meta->strval, buf.dyn_buffer.buf);
APPEND_META(doc, meta)
}
dyn_buffer_destroy(&buf);
archive_read_close(a);
archive_read_free(a);
text_buffer_destroy(&buf);
}

View File

@@ -140,6 +140,7 @@ void parse_font(const char *buf, size_t buf_len, document_t *doc) {
if (ft_lib == NULL) {
FT_Init_FreeType(&ft_lib);
}
if (buf == NULL) {
return;
}
@@ -169,6 +170,7 @@ void parse_font(const char *buf, size_t buf_len, document_t *doc) {
APPEND_META(doc, meta_name)
if (ScanCtx.tn_size <= 0) {
FT_Done_Face(face);
return;
}
@@ -178,6 +180,7 @@ void parse_font(const char *buf, size_t buf_len, document_t *doc) {
err = FT_Set_Pixel_Sizes(face, 0, pixel);
if (err != 0) {
LOG_WARNINGF(doc->filepath, "(font.c) FT_Set_Pixel_Sizes() returned error code [%d] %s", err, ft_error_string(err))
FT_Done_Face(face);
return;
}
@@ -224,3 +227,7 @@ void parse_font(const char *buf, size_t buf_len, document_t *doc) {
FT_Done_Face(face);
}
void cleanup_font() {
FT_Done_FreeType(ft_lib);
}

View File

@@ -5,5 +5,6 @@
void parse_font(const char * buf, size_t buf_len, document_t *doc);
void cleanup_font();
#endif

View File

@@ -5,7 +5,7 @@
#define AVIO_BUF_SIZE 8192
__always_inline
AVCodecContext *alloc_jpeg_encoder(int dstW, int dstH, float qscale) {
static AVCodecContext *alloc_jpeg_encoder(int dstW, int dstH, float qscale) {
AVCodec *jpeg_codec = avcodec_find_encoder(AV_CODEC_ID_MJPEG);
AVCodecContext *jpeg = avcodec_alloc_context3(jpeg_codec);
@@ -78,7 +78,7 @@ AVFrame *scale_frame(const AVCodecContext *decoder, const AVFrame *frame, int si
}
__always_inline
AVFrame *read_frame(AVFormatContext *pFormatCtx, AVCodecContext *decoder, int stream_idx, document_t *doc) {
static AVFrame *read_frame(AVFormatContext *pFormatCtx, AVCodecContext *decoder, int stream_idx, document_t *doc) {
AVFrame *frame = av_frame_alloc();
AVPacket avPacket;
@@ -113,10 +113,13 @@ AVFrame *read_frame(AVFormatContext *pFormatCtx, AVCodecContext *decoder, int st
// Feed it to decoder
int decode_ret = avcodec_send_packet(decoder, &avPacket);
if (decode_ret != 0) {
LOG_WARNINGF(doc->filepath,
LOG_ERRORF(doc->filepath,
"(media.c) avcodec_send_packet() returned error code [%d] %s",
decode_ret, av_err2str(decode_ret)
)
av_frame_free(&frame);
av_packet_unref(&avPacket);
return NULL;
}
av_packet_unref(&avPacket);
receive_ret = avcodec_receive_frame(decoder, frame);
@@ -135,11 +138,11 @@ AVFrame *read_frame(AVFormatContext *pFormatCtx, AVCodecContext *decoder, int st
text_buffer_destroy(&tex);
__always_inline
void append_audio_meta(AVFormatContext *pFormatCtx, document_t *doc) {
static void append_audio_meta(AVFormatContext *pFormatCtx, document_t *doc) {
AVDictionaryEntry *tag = NULL;
while ((tag = av_dict_get(pFormatCtx->metadata, "", tag, AV_DICT_IGNORE_SUFFIX))) {
char key[32];
char key[256];
strncpy(key, tag->key, sizeof(key));
char *ptr = key;
@@ -160,7 +163,7 @@ void append_audio_meta(AVFormatContext *pFormatCtx, document_t *doc) {
}
__always_inline
void
static void
append_video_meta(AVFormatContext *pFormatCtx, AVFrame *frame, document_t *doc, int include_audio_tags, int is_video) {
if (is_video) {
@@ -209,6 +212,8 @@ append_video_meta(AVFormatContext *pFormatCtx, AVFrame *frame, document_t *doc,
APPEND_TAG_META(doc, tag, MetaExifIsoSpeedRatings)
} else if (strcmp(tag->key, "ExposureTime") == 0) {
APPEND_TAG_META(doc, tag, MetaExifExposureTime)
} else if (strcmp(tag->key, "DateTime") == 0) {
APPEND_TAG_META(doc, tag, MetaExifDateTime)
}
}
}

View File

@@ -8,7 +8,7 @@
#define MIME_EMPTY 1
#define DONT_PARSE 0x80000000
#define SHOULD_PARSE(mime_id) (mime_id & DONT_PARSE) != DONT_PARSE && mime_id != 0
#define SHOULD_PARSE(mime_id) (ScanCtx.fast == 0 && (mime_id & DONT_PARSE) != DONT_PARSE && mime_id != 0)
#define PDF_MASK 0x40000000
#define IS_PDF(mime_id) (mime_id & PDF_MASK) == PDF_MASK

File diff suppressed because it is too large Load Diff

View File

@@ -36,8 +36,14 @@ void *read_all(parse_job_t *job, const char *buf, int bytes_read) {
memcpy(full_buf, buf, bytes_read);
int ret = job->vfile.read(&job->vfile, full_buf + bytes_read, job->info.st_size - bytes_read);
if (ret == -1) {
LOG_ERRORF(job->filepath, "read(): [%d] %s", errno, strerror(errno))
if (ret < 0) {
free(full_buf);
if (job->vfile.is_fs_file) {
LOG_ERRORF(job->filepath, "read(): [%d] %s", errno, strerror(errno))
} else {
LOG_ERRORF(job->filepath, "(virtual) read(): [%d] %s", ret, archive_error_string(job->vfile.arc))
}
return NULL;
}
}
@@ -58,6 +64,7 @@ void parse(void *arg) {
if (Magic == NULL) {
Magic = magic_open(MAGIC_MIME_TYPE);
magic_load(Magic, NULL);
}
doc.filepath = job->filepath;
@@ -87,11 +94,17 @@ void parse(void *arg) {
int bytes_read = 0;
if (doc.mime == 0) {
if (doc.mime == 0 && !ScanCtx.fast) {
// Get mime type with libmagic
bytes_read = job->vfile.read(&job->vfile, buf, PARSE_BUF_SIZE);
if (bytes_read == -1) {
LOG_WARNINGF(job->filepath, "read() Error: %s", strerror(errno))
if (bytes_read < 0) {
if (job->vfile.is_fs_file) {
LOG_ERRORF(job->filepath, "read(): [%d] %s", errno, strerror(errno))
} else {
LOG_ERRORF(job->filepath, "(virtual) read(): [%d] %s", bytes_read, archive_error_string(job->vfile.arc))
}
CLOSE_FILE(job->vfile)
return;
}
@@ -99,10 +112,16 @@ void parse(void *arg) {
const char *magic_mime_str = magic_buffer(Magic, buf, bytes_read);
if (magic_mime_str != NULL) {
doc.mime = mime_get_mime_by_string(ScanCtx.mime_table, magic_mime_str);
LOG_DEBUGF(job->filepath, "libmagic: %s", magic_mime_str);
if (doc.mime == 0) {
LOG_WARNINGF(job->filepath, "Couldn't find mime %s", magic_mime_str);
}
}
magic_close(Magic);
Magic = NULL;
}
int mmime = MAJOR_MIME(doc.mime);
@@ -149,6 +168,13 @@ void parse(void *arg) {
if (doc_buf != buf && doc_buf != NULL) {
free(doc_buf);
}
} else if (is_cbr(doc.mime)) {
void *cbr_buf = read_all(job, (char *) buf, bytes_read);
parse_cbr(cbr_buf, doc.size, &doc);
if (cbr_buf != buf && cbr_buf != NULL) {
free(cbr_buf);
}
}
//Parent meta
@@ -166,3 +192,9 @@ void parse(void *arg) {
CLOSE_FILE(job->vfile)
}
void cleanup_parse() {
if (Magic != NULL) {
magic_close(Magic);
}
}

View File

@@ -10,4 +10,6 @@ void fs_close(struct vfile *f);
void parse(void *arg);
void cleanup_parse();
#endif

View File

@@ -1,16 +1,18 @@
#include "pdf.h"
#include "src/ctx.h"
#define MIN_OCR_SIZE 128
#define MIN_OCR_SIZE 350
#define MIN_OCR_LEN 10
__thread text_buffer_t thread_buffer;
fz_page *render_cover(fz_context *ctx, document_t *doc, fz_document *fzdoc) {
int render_cover(fz_context *ctx, document_t *doc, fz_document *fzdoc) {
int err = 0;
fz_page *cover = NULL;
fz_var(cover);
fz_var(err);
fz_try(ctx)
cover = fz_load_page(ctx, fzdoc, 0);
fz_catch(ctx)
@@ -19,7 +21,7 @@ fz_page *render_cover(fz_context *ctx, document_t *doc, fz_document *fzdoc) {
if (err != 0) {
fz_drop_page(ctx, cover);
LOG_WARNINGF(doc->filepath, "fz_load_page() returned error code [%d] %s", err, ctx->error.message)
return NULL;
return FALSE;
}
fz_rect bounds = fz_bound_page(ctx, cover);
@@ -60,7 +62,7 @@ fz_page *render_cover(fz_context *ctx, document_t *doc, fz_document *fzdoc) {
LOG_WARNINGF(doc->filepath, "fz_run_page() returned error code [%d] %s", err, ctx->error.message)
fz_drop_page(ctx, cover);
fz_drop_pixmap(ctx, pixmap);
return NULL;
return FALSE;
}
fz_buffer *fzbuf = NULL;
@@ -80,15 +82,15 @@ fz_page *render_cover(fz_context *ctx, document_t *doc, fz_document *fzdoc) {
fz_drop_buffer(ctx, fzbuf);
fz_drop_pixmap(ctx, pixmap);
fz_drop_page(ctx, cover);
if (err != 0) {
LOG_WARNINGF(doc->filepath, "fz_new_buffer_from_pixmap_as_png() returned error code [%d] %s", err,
ctx->error.message)
fz_drop_page(ctx, cover);
return NULL;
return FALSE;
}
return cover;
return TRUE;
}
void fz_err_callback(void *user, UNUSED(const char *message)) {
@@ -99,7 +101,7 @@ void fz_err_callback(void *user, UNUSED(const char *message)) {
}
__always_inline
void init_ctx(fz_context *ctx, document_t *doc) {
static void init_ctx(fz_context *ctx, document_t *doc) {
fz_disable_icc(ctx);
fz_register_document_handlers(ctx);
@@ -109,7 +111,8 @@ void init_ctx(fz_context *ctx, document_t *doc) {
ctx->error.print = fz_err_callback;
}
int read_stext_block(fz_stext_block *block, text_buffer_t *tex) {
__always_inline
static int read_stext_block(fz_stext_block *block, text_buffer_t *tex) {
if (block->type != FZ_STEXT_BLOCK_TEXT) {
return 0;
}
@@ -128,6 +131,7 @@ int read_stext_block(fz_stext_block *block, text_buffer_t *tex) {
return 0;
}
#define IS_VALID_BPP(d) (d==1 || d==2 || d==4 || d==8 || d==16 || d==24 || d==32)
void fill_image(fz_context *ctx, UNUSED(fz_device *dev),
fz_image *img, UNUSED(fz_matrix ctm), UNUSED(float alpha),
@@ -135,7 +139,7 @@ void fill_image(fz_context *ctx, UNUSED(fz_device *dev),
int l2factor = 0;
if (img->w > MIN_OCR_SIZE && img->h > MIN_OCR_SIZE) {
if (img->w > MIN_OCR_SIZE && img->h > MIN_OCR_SIZE && IS_VALID_BPP(img->n)) {
fz_pixmap *pix = img->get_pixmap(ctx, img, NULL, img->w, img->h, &l2factor);
@@ -148,12 +152,14 @@ void fill_image(fz_context *ctx, UNUSED(fz_device *dev),
char *text = TessBaseAPIGetUTF8Text(api);
size_t len = strlen(text);
text_buffer_append_string(&thread_buffer, text, len - 1);
LOG_DEBUGF(
"pdf.c",
"(OCR) %dx%d got %dB from tesseract (%s), buffer:%dB",
pix->w, pix->h, len, ScanCtx.tesseract_lang, thread_buffer.dyn_buffer.cur
)
if (len >= MIN_OCR_LEN) {
text_buffer_append_string(&thread_buffer, text, len - 1);
LOG_DEBUGF(
"pdf.c",
"(OCR) %dx%d got %dB from tesseract (%s), buffer:%dB",
pix->w, pix->h, len, ScanCtx.tesseract_lang, thread_buffer.dyn_buffer.cur
)
}
TessBaseAPIEnd(api);
TessBaseAPIDelete(api);
@@ -162,7 +168,7 @@ void fill_image(fz_context *ctx, UNUSED(fz_device *dev),
}
}
void parse_pdf(void *buf, size_t buf_len, document_t *doc) {
void parse_pdf(const void *buf, size_t buf_len, document_t *doc) {
if (buf == NULL) {
return;
@@ -193,7 +199,7 @@ void parse_pdf(void *buf, size_t buf_len, document_t *doc) {
fz_catch(ctx)
err = ctx->error.errcode;
if (err) {
if (err != 0) {
fz_drop_stream(ctx, stream);
fz_drop_document(ctx, fzdoc);
fz_drop_context(ctx);
@@ -228,18 +234,11 @@ void parse_pdf(void *buf, size_t buf_len, document_t *doc) {
return;
}
fz_page *cover = NULL;
if (ScanCtx.tn_size > 0) {
cover = render_cover(ctx, doc, fzdoc);
} else {
fz_var(cover);
fz_try(ctx)
cover = fz_load_page(ctx, fzdoc, 0);
fz_catch(ctx)
cover = NULL;
err = render_cover(ctx, doc, fzdoc);
}
if (cover == NULL) {
if (err == TRUE) {
fz_drop_stream(ctx, stream);
fz_drop_document(ctx, fzdoc);
fz_drop_context(ctx);
@@ -252,23 +251,19 @@ void parse_pdf(void *buf, size_t buf_len, document_t *doc) {
for (int current_page = 0; current_page < page_count; current_page++) {
fz_page *page = NULL;
if (current_page == 0) {
page = cover;
} else {
fz_var(err);
fz_try(ctx)
page = fz_load_page(ctx, fzdoc, current_page);
fz_catch(ctx)
err = ctx->error.errcode;
if (err != 0) {
LOG_WARNINGF(doc->filepath, "fz_load_page() returned error code [%d] %s", err, ctx->error.message)
text_buffer_destroy(&thread_buffer);
fz_drop_page(ctx, page);
fz_drop_stream(ctx, stream);
fz_drop_document(ctx, fzdoc);
fz_drop_context(ctx);
return;
}
fz_var(err);
fz_try(ctx)
page = fz_load_page(ctx, fzdoc, current_page);
fz_catch(ctx)
err = ctx->error.errcode;
if (err != 0) {
LOG_WARNINGF(doc->filepath, "fz_load_page() returned error code [%d] %s", err, ctx->error.message)
text_buffer_destroy(&thread_buffer);
fz_drop_page(ctx, page);
fz_drop_stream(ctx, stream);
fz_drop_document(ctx, fzdoc);
fz_drop_context(ctx);
return;
}
fz_stext_page *stext = fz_new_stext_page(ctx, fz_bound_page(ctx, page));
@@ -285,12 +280,12 @@ void parse_pdf(void *buf, size_t buf_len, document_t *doc) {
fz_var(err);
fz_try(ctx)
fz_run_page(ctx, page, dev, fz_identity, NULL);
fz_run_page(ctx, page, dev, fz_identity, NULL);
fz_always(ctx)
{
fz_close_device(ctx, dev);
fz_drop_device(ctx, dev);
}
{
fz_close_device(ctx, dev);
fz_drop_device(ctx, dev);
}
fz_catch(ctx)
err = ctx->error.errcode;

View File

@@ -4,6 +4,6 @@
#include "src/sist.h"
void parse_pdf(void *buf, size_t buf_len, document_t *doc);
void parse_pdf(const void *buf, size_t buf_len, document_t *doc);
#endif

View File

@@ -31,10 +31,11 @@
#include "freetype/freetype.h"
#include <archive.h>
#include <archive_entry.h>
#include <opc/opc.h>
#include <libxml/xmlstring.h>
#include <libxml/parser.h>
#define BOOL int
#include <tesseract/capi.h>
#include <pcre.h>
#include <onion/onion.h>
#include <onion/handler.h>
@@ -47,6 +48,7 @@
#include "types.h"
#include "tpool.h"
#include "utf8.h/utf8.h"
#include "util.h"
#include "io/store.h"
#include "io/serialize.h"
@@ -59,9 +61,9 @@
#include "parsing/font.h"
#include "parsing/arc.h"
#include "parsing/doc.h"
#include "parsing/cbr.h"
#include "cli.h"
#include "log.h"
#include "utf8.h/utf8.h"
#include "src/index/elastic.h"
#include "index/web.h"

View File

@@ -39,6 +39,7 @@ enum metakey {
MetaExifUserComment = 20 | META_STR_MASK,
MetaExifModel = 21 | META_STR_MASK,
MetaExifIsoSpeedRatings = 22 | META_STR_MASK,
MetaExifDateTime = 23 | META_STR_MASK,
//Note to self: this will break after 31 entries
};
@@ -48,7 +49,7 @@ enum metakey {
typedef struct index_descriptor {
char uuid[UUID_STR_LEN];
char version[6];
char version[64];
long timestamp;
char root[PATH_MAX];
char rewrite_url[8196];
@@ -94,7 +95,6 @@ typedef int (*read_func_t)(struct vfile *, void *buf, size_t size);
typedef void (*close_func_t)(struct vfile *);
typedef struct vfile {
union {
int fd;
struct archive *arc;

View File

@@ -1,235 +1,6 @@
#include "util.h"
#include "src/ctx.h"
dyn_buffer_t dyn_buffer_create() {
dyn_buffer_t buf;
buf.size = INITIAL_BUF_SIZE;
buf.cur = 0;
buf.buf = malloc(INITIAL_BUF_SIZE);
return buf;
}
void grow_buffer(dyn_buffer_t *buf, size_t size) {
if (buf->cur + size > buf->size) {
do {
buf->size *= 2;
} while (buf->cur + size > buf->size);
buf->buf = realloc(buf->buf, buf->size);
}
}
void grow_buffer_small(dyn_buffer_t *buf) {
if (buf->cur + sizeof(long) > buf->size) {
buf->size *= 2;
buf->buf = realloc(buf->buf, buf->size);
}
}
void dyn_buffer_write(dyn_buffer_t *buf, void *data, size_t size) {
grow_buffer(buf, size);
memcpy(buf->buf + buf->cur, data, size);
buf->cur += size;
}
void dyn_buffer_write_char(dyn_buffer_t *buf, char c) {
grow_buffer_small(buf);
*(buf->buf + buf->cur) = c;
buf->cur += sizeof(c);
}
void dyn_buffer_write_str(dyn_buffer_t *buf, char *str) {
dyn_buffer_write(buf, str, strlen(str));
dyn_buffer_write_char(buf, '\0');
}
void dyn_buffer_append_string(dyn_buffer_t *buf, char *str) {
dyn_buffer_write(buf, str, strlen(str));
}
void dyn_buffer_write_int(dyn_buffer_t *buf, int d) {
grow_buffer_small(buf);
*(int *) (buf->buf + buf->cur) = d;
buf->cur += sizeof(int);
}
void dyn_buffer_write_short(dyn_buffer_t *buf, short s) {
grow_buffer_small(buf);
*(short *) (buf->buf + buf->cur) = s;
buf->cur += sizeof(short);
}
void dyn_buffer_write_long(dyn_buffer_t *buf, unsigned long l) {
grow_buffer_small(buf);
*(unsigned long *) (buf->buf + buf->cur) = l;
buf->cur += sizeof(unsigned long);
}
void dyn_buffer_destroy(dyn_buffer_t *buf) {
free(buf->buf);
}
void text_buffer_destroy(text_buffer_t *buf) {
dyn_buffer_destroy(&buf->dyn_buffer);
}
text_buffer_t text_buffer_create(int max_size) {
text_buffer_t text_buf;
text_buf.dyn_buffer = dyn_buffer_create();
text_buf.max_size = max_size;
text_buf.last_char_was_whitespace = FALSE;
return text_buf;
}
void text_buffer_terminate_string(text_buffer_t *buf) {
if (*(buf->dyn_buffer.buf + buf->dyn_buffer.cur - 1) == ' ') {
*(buf->dyn_buffer.buf + buf->dyn_buffer.cur - 1) = '\0';
} else {
dyn_buffer_write_char(&buf->dyn_buffer, '\0');
}
}
__always_inline
int utf8_validchr(const char *s) {
if (0x00 == (0x80 & *s)) {
return TRUE;
} else if (0xf0 == (0xf8 & *s)) {
if ((0x80 != (0xc0 & s[1])) || (0x80 != (0xc0 & s[2])) ||
(0x80 != (0xc0 & s[3]))) {
return FALSE;
}
if (0x80 == (0xc0 & s[4])) {
return FALSE;
}
if ((0 == (0x07 & s[0])) && (0 == (0x30 & s[1]))) {
return FALSE;
}
} else if (0xe0 == (0xf0 & *s)) {
if ((0x80 != (0xc0 & s[1])) || (0x80 != (0xc0 & s[2]))) {
return FALSE;
}
if (0x80 == (0xc0 & s[3])) {
return FALSE;
}
if ((0 == (0x0f & s[0])) && (0 == (0x20 & s[1]))) {
return FALSE;
}
} else if (0xc0 == (0xe0 & *s)) {
if (0x80 != (0xc0 & s[1])) {
return FALSE;
}
if (0x80 == (0xc0 & s[2])) {
return FALSE;
}
if (0 == (0x1e & s[0])) {
return FALSE;
}
} else {
return FALSE;
}
return TRUE;
}
int text_buffer_append_string(text_buffer_t *buf, char *str, size_t len) {
utf8_int32_t c;
if (str == NULL || len < 1 ||
(0xf0 == (0xf8 & str[0]) && len < 4) ||
(0xe0 == (0xf0 & str[0]) && len < 3) ||
(0xc0 == (0xe0 & str[0]) && len == 1) ||
*(str) == 0) {
return 0;
}
for (void *v = utf8codepoint(str, &c); c != '\0' && ((char *) v - str + 4) < len; v = utf8codepoint(v, &c)) {
if (utf8_validchr(v)) {
text_buffer_append_char(buf, c);
}
}
return 0;
}
int text_buffer_append_string0(text_buffer_t *buf, char *str) {
utf8_int32_t c;
for (void *v = utf8codepoint(str, &c); c != '\0'; v = utf8codepoint(v, &c)) {
if (utf8_validchr(v)) {
text_buffer_append_char(buf, c);
}
}
}
int text_buffer_append_char(text_buffer_t *buf, int c) {
if (SHOULD_IGNORE_CHAR(c) || c == ' ') {
if (!buf->last_char_was_whitespace && buf->dyn_buffer.cur != 0) {
dyn_buffer_write_char(&buf->dyn_buffer, ' ');
buf->last_char_was_whitespace = TRUE;
if (buf->max_size > 0 && buf->dyn_buffer.cur >= buf->max_size) {
return TEXT_BUF_FULL;
}
}
} else {
buf->last_char_was_whitespace = FALSE;
grow_buffer_small(&buf->dyn_buffer);
if (0 == ((utf8_int32_t) 0xffffff80 & c)) {
*(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = (char) c;
} else if (0 == ((utf8_int32_t) 0xfffff800 & c)) {
*(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0xc0 | (char) (c >> 6);
*(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0x80 | (char) (c & 0x3f);
} else if (0 == ((utf8_int32_t) 0xffff0000 & c)) {
*(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0xe0 | (char) (c >> 12);
*(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0x80 | (char) ((c >> 6) & 0x3f);
*(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0x80 | (char) (c & 0x3f);
} else {
*(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0xf0 | (char) (c >> 18);
*(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0x80 | (char) ((c >> 12) & 0x3f);
*(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0x80 | (char) ((c >> 6) & 0x3f);
*(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0x80 | (char) (c & 0x3f);
}
if (buf->max_size > 0 && buf->dyn_buffer.cur >= buf->max_size) {
return TEXT_BUF_FULL;
}
}
return 0;
}
void incremental_put(GHashTable *table, unsigned long inode_no, int mtime) {
g_hash_table_insert(table, (gpointer) inode_no, GINT_TO_POINTER(mtime));
}
int incremental_get(GHashTable *table, unsigned long inode_no) {
if (table != NULL) {
return GPOINTER_TO_INT(g_hash_table_lookup(table, (gpointer) inode_no));
} else {
return 0;
}
}
int incremental_mark_file_for_copy(GHashTable *table, unsigned long inode_no) {
g_hash_table_insert(table, GINT_TO_POINTER(inode_no), GINT_TO_POINTER(1));
}
#define PBSTR "========================================"
#define PBWIDTH 40
@@ -259,8 +30,10 @@ char *abspath(const char *path) {
if (abs == NULL) {
return NULL;
}
abs = realloc(abs, strlen(abs) + 2);
strcat(abs, "/");
if (strlen(abs) > 1) {
abs = realloc(abs, strlen(abs) + 2);
strcat(abs, "/");
}
wordfree(&w);
return abs;

View File

@@ -7,7 +7,7 @@
#define INITIAL_BUF_SIZE 1024 * 16
#define SHOULD_IGNORE_CHAR(c) !(SHOULD_KEEP_CHAR(c))
#define SHOULD_KEEP_CHAR(c) ((c >= '0' && c <= '9') || (c >= 'A' && c <= 'z') || (c > 127))
#define SHOULD_KEEP_CHAR(c) ((c >= '\'' && c <= ';') || (c >= 'A' && c <= 'z') || (c > 127))
typedef struct dyn_buffer {
@@ -32,47 +32,253 @@ dyn_buffer_t url_escape(char *str);
void progress_bar_print(double percentage, size_t tn_size, size_t index_size);
GHashTable *incremental_get_table();
dyn_buffer_t dyn_buffer_create();
__always_inline
static int utf8_validchr2(const char *s) {
if (0x00 == (0x80 & *s)) {
return TRUE;
} else if (0xf0 == (0xf8 & *s)) {
if ((0x80 != (0xc0 & s[1])) || (0x80 != (0xc0 & s[2])) ||
(0x80 != (0xc0 & s[3]))) {
return FALSE;
}
void grow_buffer(dyn_buffer_t *buf, size_t size);
if (0x80 == (0xc0 & s[4])) {
return FALSE;
}
void grow_buffer_small(dyn_buffer_t *buf);
if ((0 == (0x07 & s[0])) && (0 == (0x30 & s[1]))) {
return FALSE;
}
} else if (0xe0 == (0xf0 & *s)) {
if ((0x80 != (0xc0 & s[1])) || (0x80 != (0xc0 & s[2]))) {
return FALSE;
}
void dyn_buffer_write(dyn_buffer_t *buf, void *data, size_t size);
if (0x80 == (0xc0 & s[3])) {
return FALSE;
}
void dyn_buffer_write_char(dyn_buffer_t *buf, char c);
if ((0 == (0x0f & s[0])) && (0 == (0x20 & s[1]))) {
return FALSE;
}
} else if (0xc0 == (0xe0 & *s)) {
if (0x80 != (0xc0 & s[1])) {
return FALSE;
}
void dyn_buffer_write_str(dyn_buffer_t *buf, char *str);
if (0x80 == (0xc0 & s[2])) {
return FALSE;
}
void dyn_buffer_append_string(dyn_buffer_t *buf, char *str);
if (0 == (0x1e & s[0])) {
return FALSE;
}
} else {
return FALSE;
}
void dyn_buffer_write_int(dyn_buffer_t *buf, int d);
return TRUE;
}
void dyn_buffer_write_short(dyn_buffer_t *buf, short s);
void dyn_buffer_write_long(dyn_buffer_t *buf, unsigned long l);
__always_inline
static dyn_buffer_t dyn_buffer_create() {
dyn_buffer_t buf;
void dyn_buffer_destroy(dyn_buffer_t *buf);
buf.size = INITIAL_BUF_SIZE;
buf.cur = 0;
buf.buf = malloc(INITIAL_BUF_SIZE);
void text_buffer_destroy(text_buffer_t *buf);
return buf;
}
text_buffer_t text_buffer_create(int max_size);
__always_inline
static void grow_buffer(dyn_buffer_t *buf, size_t size) {
if (buf->cur + size > buf->size) {
do {
buf->size *= 2;
} while (buf->cur + size > buf->size);
void text_buffer_terminate_string(text_buffer_t *buf);
buf->buf = realloc(buf->buf, buf->size);
}
}
int text_buffer_append_string(text_buffer_t *buf, char *str, size_t len);
int text_buffer_append_string0(text_buffer_t *buf, char *str);
__always_inline
static void grow_buffer_small(dyn_buffer_t *buf) {
if (buf->cur + sizeof(long) > buf->size) {
buf->size *= 2;
buf->buf = realloc(buf->buf, buf->size);
}
}
int text_buffer_append_char(text_buffer_t *buf, int c);
__always_inline
static void dyn_buffer_write(dyn_buffer_t *buf, void *data, size_t size) {
grow_buffer(buf, size);
void incremental_put(GHashTable *table, unsigned long inode_no, int mtime);
memcpy(buf->buf + buf->cur, data, size);
buf->cur += size;
}
int incremental_get(GHashTable *table, unsigned long inode_no);
__always_inline
static void dyn_buffer_write_char(dyn_buffer_t *buf, char c) {
grow_buffer_small(buf);
*(buf->buf + buf->cur) = c;
buf->cur += sizeof(c);
}
__always_inline
static void dyn_buffer_write_str(dyn_buffer_t *buf, char *str) {
dyn_buffer_write(buf, str, strlen(str));
dyn_buffer_write_char(buf, '\0');
}
__always_inline
static void dyn_buffer_append_string(dyn_buffer_t *buf, char *str) {
dyn_buffer_write(buf, str, strlen(str));
}
__always_inline
static void dyn_buffer_write_int(dyn_buffer_t *buf, int d) {
grow_buffer_small(buf);
*(int *) (buf->buf + buf->cur) = d;
buf->cur += sizeof(int);
}
__always_inline
static void dyn_buffer_write_short(dyn_buffer_t *buf, short s) {
grow_buffer_small(buf);
*(short *) (buf->buf + buf->cur) = s;
buf->cur += sizeof(short);
}
__always_inline
static void dyn_buffer_write_long(dyn_buffer_t *buf, unsigned long l) {
grow_buffer_small(buf);
*(unsigned long *) (buf->buf + buf->cur) = l;
buf->cur += sizeof(unsigned long);
}
__always_inline
static void dyn_buffer_destroy(dyn_buffer_t *buf) {
free(buf->buf);
}
__always_inline
static void text_buffer_destroy(text_buffer_t *buf) {
dyn_buffer_destroy(&buf->dyn_buffer);
}
__always_inline
static text_buffer_t text_buffer_create(int max_size) {
text_buffer_t text_buf;
text_buf.dyn_buffer = dyn_buffer_create();
text_buf.max_size = max_size;
text_buf.last_char_was_whitespace = FALSE;
return text_buf;
}
__always_inline
static int text_buffer_append_char(text_buffer_t *buf, int c) {
if (SHOULD_IGNORE_CHAR(c) || c == ' ') {
if (!buf->last_char_was_whitespace && buf->dyn_buffer.cur != 0) {
dyn_buffer_write_char(&buf->dyn_buffer, ' ');
buf->last_char_was_whitespace = TRUE;
if (buf->max_size > 0 && buf->dyn_buffer.cur >= buf->max_size) {
return TEXT_BUF_FULL;
}
}
} else {
buf->last_char_was_whitespace = FALSE;
grow_buffer_small(&buf->dyn_buffer);
if (0 == ((utf8_int32_t) 0xffffff80 & c)) {
*(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = (char) c;
} else if (0 == ((utf8_int32_t) 0xfffff800 & c)) {
*(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0xc0 | (char) (c >> 6);
*(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0x80 | (char) (c & 0x3f);
} else if (0 == ((utf8_int32_t) 0xffff0000 & c)) {
*(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0xe0 | (char) (c >> 12);
*(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0x80 | (char) ((c >> 6) & 0x3f);
*(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0x80 | (char) (c & 0x3f);
} else {
*(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0xf0 | (char) (c >> 18);
*(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0x80 | (char) ((c >> 12) & 0x3f);
*(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0x80 | (char) ((c >> 6) & 0x3f);
*(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0x80 | (char) (c & 0x3f);
}
if (buf->max_size > 0 && buf->dyn_buffer.cur >= buf->max_size) {
return TEXT_BUF_FULL;
}
}
return 0;
}
__always_inline
static void text_buffer_terminate_string(text_buffer_t *buf) {
if (buf->dyn_buffer.cur > 0 && *(buf->dyn_buffer.buf + buf->dyn_buffer.cur - 1) == ' ') {
*(buf->dyn_buffer.buf + buf->dyn_buffer.cur - 1) = '\0';
} else {
dyn_buffer_write_char(&buf->dyn_buffer, '\0');
}
}
__always_inline
static int text_buffer_append_string(text_buffer_t *buf, char *str, size_t len) {
utf8_int32_t c;
if (str == NULL || len < 1 ||
(0xf0 == (0xf8 & str[0]) && len < 4) ||
(0xe0 == (0xf0 & str[0]) && len < 3) ||
(0xc0 == (0xe0 & str[0]) && len == 1) ||
*(str) == 0) {
return 0;
}
for (void *v = utf8codepoint(str, &c); c != '\0' && ((char *) v - str + 4) < len; v = utf8codepoint(v, &c)) {
if (utf8_validchr2(v)) {
text_buffer_append_char(buf, c);
}
}
return 0;
}
__always_inline
static int text_buffer_append_string0(text_buffer_t *buf, char *str) {
return text_buffer_append_string(buf, str, strlen(str));
}
__always_inline
static void incremental_put(GHashTable *table, unsigned long inode_no, int mtime) {
g_hash_table_insert(table, (gpointer) inode_no, GINT_TO_POINTER(mtime));
}
__always_inline
static int incremental_get(GHashTable *table, unsigned long inode_no) {
if (table != NULL) {
return GPOINTER_TO_INT(g_hash_table_lookup(table, (gpointer) inode_no));
} else {
return 0;
}
}
__always_inline
static int incremental_mark_file_for_copy(GHashTable *table, unsigned long inode_no) {
g_hash_table_insert(table, GINT_TO_POINTER(inode_no), GINT_TO_POINTER(1));
}
int incremental_mark_file_for_copy(GHashTable *table, unsigned long inode_no);
const char *find_file_in_paths(const char **paths, const char *filename);

View File

@@ -181,7 +181,12 @@ int chunked_response_file(const char *filename, const char *mime,
}
}
onion_response_set_length(res, length);
onion_response_set_header(res, "Content-Type", mime);
if (mime != NULL) {
onion_response_set_header(res, "Content-Type", mime);
} else {
onion_response_set_header(res, "Content-Type", "application/octet-stream");
}
onion_response_write_headers(res);
if ((onion_request_get_flags(request) & OR_HEAD) == OR_HEAD) {
length = 0;
@@ -221,14 +226,6 @@ int search(UNUSED(void *p), onion_request *req, onion_response *res) {
return OCS_NOT_PROCESSED;
}
char *scroll_param;
const char *scroll = onion_request_get_query(req, "scroll");
if (scroll != NULL) {
scroll_param = "?scroll=3m";
} else {
scroll_param = "";
}
const struct onion_block_t *block = onion_request_get_data(req);
if (block == NULL) {
@@ -236,7 +233,7 @@ int search(UNUSED(void *p), onion_request *req, onion_response *res) {
}
char url[4096];
snprintf(url, 4096, "%s/sist2/_search%s", WebCtx.es_url, scroll_param);
snprintf(url, 4096, "%s/sist2/_search", WebCtx.es_url);
response_t *r = web_post(url, onion_block_data(block), "Content-Type: application/json");
set_default_headers(res);
@@ -246,6 +243,7 @@ int search(UNUSED(void *p), onion_request *req, onion_response *res) {
if (r->status_code == 200) {
onion_response_write(res, r->body, r->size);
} else {
sist_log("serve.c", SIST_WARNING, "ElasticSearch error during query");
onion_response_set_code(res, HTTP_INTERNAL_ERROR);
}
@@ -254,43 +252,6 @@ int search(UNUSED(void *p), onion_request *req, onion_response *res) {
return OCS_PROCESSED;
}
int scroll(UNUSED(void *p), onion_request *req, onion_response *res) {
int flags = onion_request_get_flags(req);
if ((flags & OR_METHODS) != OR_GET) {
return OCS_NOT_PROCESSED;
}
char url[4096];
snprintf(url, 4096, "%s/_search/scroll", WebCtx.es_url);
const char *scroll_id = onion_request_get_query(req, "scroll_id");
cJSON *json = cJSON_CreateObject();
cJSON_AddStringToObject(json, "scroll_id", scroll_id);
cJSON_AddStringToObject(json, "scroll", "3m");
char *json_str = cJSON_PrintUnformatted(json);
response_t *r = web_post(url, json_str, "Content-Type: application/json");
cJSON_Delete(json);
cJSON_free(json_str);
if (r->status_code != 200) {
free_response(r);
return OCS_NOT_PROCESSED;
}
set_default_headers(res);
onion_response_set_header(res, "Content-Type", "application/json");
onion_response_set_header(res, "Content-Disposition", "application/json");
onion_response_set_length(res, r->size);
onion_response_write(res, r->body, r->size);
free_response(r);
return OCS_PROCESSED;
}
int serve_file_from_url(cJSON *json, index_t *idx, onion_request *req, onion_response *res) {
const char *path = cJSON_GetObjectItem(json, "path")->valuestring;
@@ -466,7 +427,6 @@ void serve(const char *hostname, const char *port) {
onion_url_add(urls, "img/sprite-skin-flat.png", img_sprite_skin_flag);
onion_url_add(urls, "es", search);
onion_url_add(urls, "scroll", scroll);
onion_url_add(urls, "status", status);
onion_url_add(
urls,

File diff suppressed because one or more lines are too long

View File

@@ -1,9 +0,0 @@
.autocomplete-suggestions {
text-align: left; cursor: default; border: 1px solid #ccc; border-top: 0; background: #fff; box-shadow: -1px 1px 3px rgba(0,0,0,.1);
/* core styles should not be changed */
position: absolute; display: none; z-index: 9999; max-height: 254px; overflow: hidden; overflow-y: auto; box-sizing: border-box;
}
.autocomplete-suggestion { position: relative; padding: 0 .6em; line-height: 23px; white-space: nowrap; overflow: hidden; text-overflow: ellipsis; font-size: 1.02em; color: #333; }
.autocomplete-suggestion b { font-weight: normal; color: #1f8dd6; }
.autocomplete-suggestion.selected { background: #f0f0f0; }

View File

@@ -102,6 +102,7 @@ body {
border-bottom: none;
border-left: none;
border-right: none;
padding: .25rem 0.5rem;
}
.list-group-item:first-child {
@@ -199,7 +200,7 @@ body {
max-width: 100%;
max-height: 175px;
margin: 0 auto 0;
padding: 3px 3px 0 3px;
padding: 3px 3px 0;
width: auto;
height: auto;
}
@@ -208,7 +209,7 @@ body {
display: block;
max-width: 64px;
max-height: 64px;
margin: 0 auto 0;
margin: 0 auto;
width: auto;
height: auto;
}
@@ -243,11 +244,18 @@ body {
}
mark {
background: #fff217;
background: rgba(251, 191, 41, 0.25);
border-radius: 0;
padding: 1px 0;
color: inherit;
}
.content-div mark {
background: rgba(251, 191, 41, 0.40);
color: white;
}
.content-div {
font-family: SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace;
font-size: 13px;
@@ -391,10 +399,6 @@ option {
margin-top: 1em;
}
.list-group-item {
padding: .25rem 0.5rem;
}
.wrapper-sm {
min-width: 64px;
}
@@ -423,3 +427,33 @@ option {
margin-top: -14px;
font-size: 11px;
}
@media (min-width: 800px) {
.small-btn {
display: none;
}
.large-btn {
display: inherit;
}
}
@media (max-width: 801px) {
.small-btn {
display: inherit;
}
.large-btn {
display: none;
}
}
#searchBar {
border-right: none;
}
#pathTree .title {
cursor: pointer;
}
svg {
fill: white;
}

View File

@@ -184,6 +184,7 @@ mark {
background: #fff217;
border-radius: 0;
padding: 1px 0;
color: inherit;
}
.content-div {
@@ -287,3 +288,29 @@ mark {
margin-top: -14px;
font-size: 11px;
}
@media (min-width: 800px) {
.small-btn {
display: none;
}
.large-btn {
display: inherit;
}
}
@media (max-width: 801px) {
.small-btn {
display: inherit;
}
.large-btn {
display: none;
}
}
#searchBar {
border-right: none;
}
#pathTree .title {
cursor: pointer;
}

1
web/css/smartphoto.min.css vendored Normal file

File diff suppressed because one or more lines are too long

5
web/js/1_popper.min.js vendored Normal file

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@@ -75,6 +75,10 @@ function shouldPlayVideo(hit) {
return videoc !== "hevc" && videoc !== "mpeg2video" && videoc !== "wmv3";
}
function shouldDisplayRawImage(hit) {
return hit["_source"]["mime"] && hit["_source"]["mime"].startsWith("image/") && hit["_source"]["videoc"] !== "tiff";
}
function makePlaceholder(w, h, small) {
let calc;
if (small) {
@@ -96,10 +100,14 @@ function makePlaceholder(w, h, small) {
return el;
}
function ext(hit) {
return hit["_source"].hasOwnProperty("extension") && hit["_source"]["extension"] !== "" ? "." + hit["_source"]["extension"] : "";
}
function makeTitle(hit) {
let title = document.createElement("div");
title.setAttribute("class", "file-title");
let extension = hit["_source"].hasOwnProperty("extension") && hit["_source"]["extension"] !== "" ? "." + hit["_source"]["extension"] : "";
let extension = ext(hit);
applyNameToTitle(hit, title, extension);
@@ -113,7 +121,7 @@ function getTags(hit, mimeCategory) {
switch (mimeCategory) {
case "video":
case "image":
if (hit["_source"].hasOwnProperty("videoc")) {
if (hit["_source"].hasOwnProperty("videoc") && hit["_source"]["videoc"]) {
const formatTag = document.createElement("span");
formatTag.setAttribute("class", "badge badge-pill badge-video");
formatTag.appendChild(document.createTextNode(hit["_source"]["videoc"].replace(" ", "")));
@@ -121,7 +129,7 @@ function getTags(hit, mimeCategory) {
}
break;
case "audio": {
if (hit["_source"].hasOwnProperty("audioc")) {
if (hit["_source"].hasOwnProperty("audioc") && hit["_source"]["audioc"]) {
let formatTag = document.createElement("span");
formatTag.setAttribute("class", "badge badge-pill badge-audio");
formatTag.appendChild(document.createTextNode(hit["_source"]["audioc"]));
@@ -156,7 +164,7 @@ function getTags(hit, mimeCategory) {
function infoButtonCb(hit) {
return () => {
getDocumentInfo(hit["_id"]).then(doc => {
$("#modal-title").text(doc["name"] + (doc["extension"] ? "." + doc["extension"] : ""));
$("#modal-title").text(doc["name"] + ext(hit));
const tbody = $("<tbody>");
$("#modal-body").empty()
@@ -175,7 +183,7 @@ function infoButtonCb(hit) {
"bitrate", "artist", "album", "album_artist", "genre", "title", "font_name", "tag"
]);
Object.keys(doc)
.filter(key => key.startsWith("_keyword.") || key.startsWith("_text.") || displayFields.has(key))
.filter(key => key.startsWith("_keyword.") || key.startsWith("_text.") || displayFields.has(key) || key.startsWith("exif_"))
.forEach(key => {
tbody.append($("<tr>")
.append($("<td>").text(key))
@@ -369,6 +377,7 @@ function makeThumbnail(mimeCategory, hit, imgWrapper, small) {
|| hit["_source"]["mime"] === "application/pdf"
|| hit["_source"]["mime"] === "application/epub+zip"
|| hit["_source"]["mime"] === "application/x-cbz"
|| hit["_source"]["mime"] === "application/x-cbr"
|| hit["_source"].hasOwnProperty("font_name")
) {
thumbnail = document.createElement("img");
@@ -379,6 +388,15 @@ function makeThumbnail(mimeCategory, hit, imgWrapper, small) {
}
thumbnail.setAttribute("src", `t/${hit["_source"]["index"]}/${hit["_id"]}`);
if (!hit["_source"]["parent"] && shouldDisplayRawImage(hit)) {
imgWrapper.setAttribute("id", "sp" + hit["_id"]);
imgWrapper.setAttribute("data-src", `t/${hit["_source"]["index"]}/${hit["_id"]}`);
imgWrapper.setAttribute("href", `f/${hit["_id"]}`);
imgWrapper.setAttribute("data-caption", hit["_source"]["path"] + "/" + hit["_source"]["name"] + ext(hit));
imgWrapper.setAttribute("data-group", "p" + Math.floor(docCount / SIZE));
imgWrapper.classList.add("sp");
}
const placeholder = makePlaceholder(hit["_source"]["width"], hit["_source"]["height"], small);
imgWrapper.appendChild(placeholder);
@@ -499,8 +517,7 @@ function makePreloader() {
function makePageIndicator(searchResult) {
let pageIndicator = document.createElement("div");
pageIndicator.setAttribute("class", "page-indicator font-weight-light");
const totalHits = searchResult["hits"]["total"].hasOwnProperty("value")
? searchResult["hits"]["total"]["value"] : searchResult["hits"]["total"];
const totalHits = searchResult["aggregations"]["total_count"]["value"];
pageIndicator.appendChild(document.createTextNode(docCount + " / " + totalHits));
return pageIndicator;
}
@@ -529,26 +546,29 @@ function makeStatsCard(searchResult) {
resultMode.appendChild(gridMode);
resultMode.appendChild(listMode);
if (mode === "grid") {
if (CONF.options.display === "grid") {
gridMode.classList.add("active")
} else {
listMode.classList.add("active")
}
gridMode.addEventListener("click", () => {
mode = "grid";
localStorage.setItem("mode", mode);
console.log("what");
console.log(CONF.options);
CONF.options.display = "grid";
console.log(CONF.options);
CONF.save();
console.log(CONF.options);
searchDebounced();
});
listMode.addEventListener("click", () => {
mode = "list";
localStorage.setItem("mode", mode);
CONF.options.display = "list";
CONF.save();
searchDebounced();
});
let stat = document.createElement("span");
const totalHits = searchResult["hits"]["total"].hasOwnProperty("value")
? searchResult["hits"]["total"]["value"] : searchResult["hits"]["total"];
const totalHits = searchResult["aggregations"]["total_count"]["value"];
stat.appendChild(document.createTextNode(totalHits + " results in " + searchResult["took"] + "ms"));
statsCardBody.appendChild(stat);
@@ -568,7 +588,7 @@ function makeStatsCard(searchResult) {
function makeResultContainer() {
let resultContainer = document.createElement("div");
if (mode === "grid") {
if (CONF.options.display === "grid") {
resultContainer.setAttribute("class", "card-columns");
} else {
resultContainer.setAttribute("class", "list-group");

56
web/js/jquery-smartphoto.min.js vendored Normal file

File diff suppressed because one or more lines are too long

View File

@@ -6,19 +6,47 @@ let tagTree;
let searchBar = document.getElementById("searchBar");
let pathBar = document.getElementById("pathBar");
let scroll_id = null;
let lastDoc = null;
let reachedEnd = false;
let docCount = 0;
let coolingDown = false;
let searchBusy = true;
let selectedIndices = [];
let indexMap = {};
let mode;
if (localStorage.getItem("mode") === null) {
mode = "grid";
} else {
mode = localStorage.getItem("mode")
const CONF = new Settings();
const _defaults = {
display: "grid",
fuzzy: true,
highlight: true
};
function Settings() {
this.options = {};
this._onUpdate = function () {
$("#fuzzyToggle").prop("checked", this.options.fuzzy);
}
this.load = function () {
const raw = window.localStorage.getItem("options");
if (raw === null) {
this.options = _defaults;
} else {
this.options = JSON.parse(raw);
}
this._onUpdate();
}
this.save = function () {
window.localStorage.setItem("options", JSON.stringify(this.options));
this._onUpdate();
}
}
function showEsError() {
$.toast({
heading: "Elasticsearch connection error",
@@ -53,6 +81,7 @@ window.onload = () => {
}
window.location.reload();
})
CONF.load();
};
function toggleFuzzy() {
@@ -62,8 +91,8 @@ function toggleFuzzy() {
$.jsonPost("i").then(resp => {
const urlIndices = (new URLSearchParams(location.search)).get("i");
resp["indices"].forEach(idx => {
indexMap[idx.id] = idx.name;
const opt = $("<option>")
.attr("value", idx.id)
.append(idx.name);
@@ -79,6 +108,8 @@ $.jsonPost("i").then(resp => {
}
$("#indices").append(opt);
});
createPathTree("#pathTree");
});
function getDocumentInfo(id) {
@@ -105,6 +136,7 @@ function handleTreeClick(tree) {
}
}
//TODO: filter based on selected indexes, sort mime types
$.jsonPost("es", {
aggs: {
mimeTypes: {
@@ -151,15 +183,10 @@ $.jsonPost("es", {
target: '#mimeTree'
});
mimeTree.on("node.click", handleTreeClick(mimeTree));
mimeTree.select();
mimeTree.node("any").deselect();
mimeTree.deselect();
mimeTree.node("any").select();
});
function leafTag(tag) {
const tokens = tag.split(".");
return tokens[tokens.length - 1]
}
// Tags tree
$.jsonPost("es", {
aggs: {
@@ -221,35 +248,10 @@ function addTag(map, tag, id, count) {
}
}
new autoComplete({
selector: '#pathBar',
minChars: 1,
delay: 400,
renderItem: function (item) {
return '<div class="autocomplete-suggestion" data-val="' + item + '">' + item + '</div>';
},
source: async function (term, suggest) {
term = term.toLowerCase();
const choices = await getPathChoices();
let matches = [];
for (let i = 0; i < choices.length; i++) {
if (~choices[i].toLowerCase().indexOf(term)) {
matches.push(choices[i]);
}
}
suggest(matches);
},
onSelect: function () {
searchDebounced();
}
});
function insertHits(resultContainer, hits) {
for (let i = 0; i < hits.length; i++) {
if (mode === "grid") {
if (CONF.options.display === "grid") {
resultContainer.appendChild(createDocCard(hits[i]));
} else {
resultContainer.appendChild(createDocLine(hits[i]));
@@ -259,41 +261,18 @@ function insertHits(resultContainer, hits) {
}
window.addEventListener("scroll", function () {
if (!coolingDown && !searchBusy) {
if (!searchBusy) {
let threshold = 400;
if ((window.innerHeight + window.scrollY) >= document.body.offsetHeight - threshold) {
coolingDown = true;
doScroll();
if (!reachedEnd) {
coolingDown = true;
search(lastDoc);
}
}
}
});
function doScroll() {
$.get("scroll", {scroll_id: scroll_id})
.then(searchResult => {
let searchResults = document.getElementById("searchResults");
let hits = searchResult["hits"]["hits"];
//Page indicator
let pageIndicator = makePageIndicator(searchResult);
searchResults.appendChild(pageIndicator);
//Result container
let resultContainer = makeResultContainer();
searchResults.appendChild(resultContainer);
insertHits(resultContainer, hits);
if (hits.length === SIZE) {
coolingDown = false;
}
})
.fail(() => {
window.location.reload();
})
}
function getSelectedNodes(tree) {
let selectedNodes = [];
@@ -314,21 +293,25 @@ function getSelectedNodes(tree) {
return selectedNodes
}
function search() {
function search(after = null) {
lastDoc = null;
if (searchBusy) {
return;
}
searchBusy = true;
//Clear old search results
let searchResults = document.getElementById("searchResults");
while (searchResults.firstChild) {
searchResults.removeChild(searchResults.firstChild);
//Clear old search results
let preload;
if (!after) {
while (searchResults.firstChild) {
searchResults.removeChild(searchResults.firstChild);
}
preload = makePreloader();
searchResults.appendChild(preload);
}
const preload = makePreloader();
searchResults.appendChild(preload);
let query = searchBar.value;
let empty = query === "";
let condition = empty ? "should" : "must";
@@ -362,27 +345,40 @@ function search() {
filters.push([{terms: {"tag": tags}}]);
}
$.jsonPost("es?scroll=1", {
let q = {
"_source": {
excludes: ["content"]
excludes: ["content", "_tie"]
},
query: {
bool: {
[condition]: {
multi_match: {
simple_query_string: {
query: query,
type: "most_fields",
fields: fields,
operator: "and"
default_operator: "and"
}
},
filter: filters
}
},
sort: [
"_score"
"sort": [
{"_score": {"order": "desc"}},
{"_tie": {"order": "asc"}}
],
highlight: {
aggs:
{
total_size: {"sum": {"field": "size"}},
total_count: {"value_count": {"field": "size"}}
},
size: SIZE,
};
if (after) {
q.search_after = [after["_score"], after["_id"]];
}
if (CONF.options.highlight) {
q.highlight = {
pre_tags: ["<mark>"],
post_tags: ["</mark>"],
fields: {
@@ -392,25 +388,36 @@ function search() {
"name.nGram": {},
font_name: {},
}
},
aggs: {
total_size: {"sum": {"field": "size"}}
},
size: SIZE,
}).then(searchResult => {
scroll_id = searchResult["_scroll_id"];
};
}
preload.remove();
//Search stats
searchResults.appendChild(makeStatsCard(searchResult));
$.jsonPost("es", q).then(searchResult => {
let hits = searchResult["hits"]["hits"];
if (hits) {
lastDoc = hits[hits.length - 1];
}
if (!after) {
preload.remove();
searchResults.appendChild(makeStatsCard(searchResult));
} else {
let pageIndicator = makePageIndicator(searchResult);
searchResults.appendChild(pageIndicator);
}
//Setup page
let resultContainer = makeResultContainer();
searchResults.appendChild(resultContainer);
docCount = 0;
insertHits(resultContainer, searchResult["hits"]["hits"]);
window.setTimeout(() => {
$(".sp").SmartPhoto({animationSpeed: 0, swipeTopToClose: true, showAnimation: false, forceInterval: 50});
}, 100);
if (!after) {
docCount = 0;
}
reachedEnd = hits.length !== SIZE;
insertHits(resultContainer, hits);
searchBusy = false;
});
}
@@ -471,21 +478,136 @@ function updateIndices() {
document.getElementById("indices").addEventListener("change", updateIndices);
updateIndices();
//Suggest
function getPathChoices() {
return new Promise(getPaths => {
$.jsonPost("es", {
suggest: {
path: {
prefix: pathBar.value,
completion: {
field: "suggest-path",
skip_duplicates: true,
size: 10000
}
window.onkeyup = function (e) {
if (e.key === "/" || e.key === "Escape") {
const bar = document.getElementById("searchBar");
bar.scrollIntoView();
bar.focus();
}
};
function getNextDepth(node) {
let q = {
query: {
bool: {
filter: [
{term: {index: node.index}},
{term: {_depth: node.depth + 1}}
]
}
},
aggs: {
paths: {
terms: {
field: "path",
size: 10000
}
}
}).then(resp => getPaths(resp["suggest"]["path"][0]["options"].map(opt => opt["_source"]["path"])));
},
size: 0
}
if (node.depth > 0) {
q.query.bool.must = {
prefix: {
path: node.id,
}
};
}
return $.jsonPost("es", q).then(resp => {
const buckets = resp["aggregations"]["paths"]["buckets"];
if (!buckets) {
return false;
}
return buckets
.filter(bucket => bucket.key.length > node.id.length || node.id.startsWith("/"))
.sort((a, b) => a.key > b.key)
.map(bucket => {
const i = bucket.key.lastIndexOf("/");
const name = (i === -1 || i === 1) ? bucket.key : bucket.key.slice(i + 1);
return {
id: bucket.key,
text: `${name}/ (${bucket.doc_count})`,
depth: node.depth + 1,
index: node.index,
children: true,
}
})
})
}
function handlePathTreeClick(tree) {
return (event, node, handler) => {
if (node.depth !== 0) {
$("#pathBar").val(node.id)
$("#pathTreeModal").modal("hide")
searchDebounced();
}
handler();
}
}
function createPathTree(target) {
let pathTree = new InspireTree({
data: function (node, resolve, reject) {
return getNextDepth(node);
}
});
selectedIndices.forEach(index => {
pathTree.addNode({
id: "/" + index,
text: `/[${indexMap[index]}]`,
index: index,
depth: 0,
children: true
})
})
new InspireTreeDOM(pathTree, {
target: target
});
pathTree.on("node.click", handlePathTreeClick(pathTree));
const button = document.querySelector("#pathBarHelper")
const tooltip = document.querySelector("#pathTreeTooltip")
console.log(button)
console.log(tooltip)
Popper.createPopper(button, tooltip ,{
trigger: "click",
placement: "right",
});
}
function updateSettings() {
CONF.options.display = $("#settingDisplay").val();
CONF.options.fuzzy = $("#settingFuzzy").prop("checked");
CONF.options.highlight = $("#settingHighlight").prop("checked");
CONF.save();
searchDebounced();
$.toast({
heading: "Settings updated",
text: "Settings saved to browser storage",
stack: 3,
bgColor: "#00a4bc",
textColor: "#fff",
position: 'bottom-right',
hideAfter: 3000,
loaderBg: "#08c7e8",
});
}
function loadSettings() {
CONF.load();
$("#settingDisplay").val(CONF.options.display);
$("#settingFuzzy").prop("checked", CONF.options.fuzzy);
$("#settingHighlight").prop("checked", CONF.options.highlight);
}

View File

@@ -11,27 +11,44 @@
<nav class="navbar navbar-expand-lg">
<a class="navbar-brand" href="/">sist2</a>
<span class="badge badge-pill version">v1.2.5</span>
<span class="badge badge-pill version">v1.3.2</span>
<span class="tagline">Lightning-fast file system indexer and search tool </span>
<a style="margin-left: auto" id="theme" class="btn" title="Toggle theme" href="/">Theme</a>
<button style="margin-left: auto" class="btn" type="button" data-toggle="modal" data-target="#settings" onclick="loadSettings()">Settings</button>
<a id="theme" class="btn" title="Toggle theme" href="/">Theme</a>
</nav>
<div class="container">
<div class="card">
<div class="card-body">
<div class="form-group">
<input id="pathBar" type="search" class="form-control" placeholder="Filter path">
<div class="input-group">
<div class="input-group-prepend">
<button id="pathBarHelper" class="btn btn-outline-secondary" data-toggle="modal" data-target="#pathTreeModal">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 576 512" width="20px"><path d="M288 224h224a32 32 0 0 0 32-32V64a32 32 0 0 0-32-32H400L368 0h-80a32 32 0 0 0-32 32v64H64V8a8 8 0 0 0-8-8H40a8 8 0 0 0-8 8v392a16 16 0 0 0 16 16h208v64a32 32 0 0 0 32 32h224a32 32 0 0 0 32-32V352a32 32 0 0 0-32-32H400l-32-32h-80a32 32 0 0 0-32 32v64H64V128h192v64a32 32 0 0 0 32 32zm0 96h66.74l32 32H512v128H288zm0-288h66.74l32 32H512v128H288z"/></svg>
</button>
</div>
<input id="pathBar" type="search" class="form-control" placeholder="Filter path">
</div>
</div>
<div class="input-group">
<div class="input-group-prepend">
<div class="input-group-text">
<span title="Toggle fuzzy searching" onclick="document.getElementById('fuzzyToggle').click()">Fuzzy&nbsp</span>
<span title="Toggle fuzzy searching" onclick="document.getElementById('fuzzyToggle').click()">Fuzzy&nbsp</span>
<input title="Toggle fuzzy searching" type="checkbox" id="fuzzyToggle"
onclick="toggleFuzzy()" checked>
</div>
</div>
<input id="searchBar" type="search" class="form-control" placeholder="Search">
<div class="input-group-append">
<button class="btn btn-outline-secondary small-btn" type="button" data-toggle="modal"
data-target="#help">?
</button>
<button class="btn btn-outline-secondary large-btn" type="button" data-toggle="modal"
data-target="#help">Help
</button>
</div>
</div>
<input title="File size" id="sizeSlider" name="size">
@@ -45,10 +62,12 @@
<div class="col" id="treeTabs">
<ul class="nav nav-tabs" role="tablist">
<li class="nav-item">
<a class="nav-link active" data-toggle="tab" href="#mime" role="tab" aria-controls="home" aria-selected="true">Mime Types</a>
<a class="nav-link active" data-toggle="tab" href="#mime" role="tab" aria-controls="home"
aria-selected="true">Mime Types</a>
</li>
<li class="nav-item">
<a class="nav-link" data-toggle="tab" href="#tag" role="tab" aria-controls="profile" aria-selected="false" title="User-defined tags">Tags</a>
<a class="nav-link" data-toggle="tab" href="#tag" role="tab" aria-controls="profile"
aria-selected="false" title="User-defined tags">Tags</a>
</li>
</ul>
<div class="tab-content" id="myTabContent">
@@ -79,6 +98,118 @@
</div>
</div>
<div class="modal" id="help" tabindex="-1" role="dialog" aria-labelledby="modal-title" aria-hidden="true">
<div class="modal-dialog modal-lg modal-dialog-centered" role="document">
<div class="modal-content">
<div class="modal-header">
<h5 class="modal-title">Search help</h5>
<button type="button" class="close" data-dismiss="modal" aria-label="Close">
<span aria-hidden="true">&times;</span>
</button>
</div>
<div class="modal-body">
<table class="table">
<tbody>
<tr>
<td><code>+</code></td>
<td>signifies AND operation</td>
</tr>
<tr>
<td><code>|</code></td>
<td>signifies OR operation</td>
</tr>
<tr>
<td><code>-</code></td>
<td>negates a single token</td>
</tr>
<tr>
<td><code>""</code></td>
<td>wraps a number of tokens to signify a phrase for searching</td>
</tr>
<tr>
<td><code>*</code></td>
<td>at the end of a term signifies a prefix query</td>
</tr>
<tr>
<td><code>(</code> and <code>)</code></td>
<td>signify precedence</td>
</tr>
<tr>
<td><code>~N</code></td>
<td>after a word signifies edit distance (fuzziness)</td>
</tr>
<tr>
<td><code>~N</code></td>
<td>after a phrase signifies slop amount</td>
</tr>
</tbody>
</table>
<p>For example: <code>"fried eggs" +(eggplant | potato) -frittata</code> will match the phrase
<i>fried eggs</i> and either <i>eggplant</i> or <i>potato</i>, but will ignore results
containing <i>frittata</i>.</p>
<p>When neither <code>+</code> or <code>|</code> is specified, the default operator is <code>+</code> (and).</p>
<p>When the <b>Fuzzy</b> option is checked, partial matches are also returned.</p>
<br>
<p>For more information, see <a target="_blank"
href="//www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-simple-query-string-query.html">Elasticsearch
documentation</a></p>
</div>
</div>
</div>
</div>
<div class="modal" id="settings" tabindex="-1" role="dialog" aria-labelledby="modal-title" aria-hidden="true">
<div class="modal-dialog modal-dialog-centered" role="document">
<div class="modal-content">
<div class="modal-header">
<h5 class="modal-title">Settings</h5>
<button type="button" class="close" data-dismiss="modal" aria-label="Close">
<span aria-hidden="true">&times;</span>
</button>
</div>
<div class="modal-body">
<div class="custom-control custom-checkbox">
<input type="checkbox" class="custom-control-input" id="settingHighlight">
<label class="custom-control-label" for="settingHighlight">Enable highlighting</label>
</div>
<div class="custom-control custom-checkbox">
<input type="checkbox" class="custom-control-input" id="settingFuzzy">
<label class="custom-control-label" for="settingFuzzy">Set fuzzy search by default</label>
</div>
<label for="settingDisplay">Display</label>
<select id="settingDisplay" class="form-control form-control-sm">
<option value="grid">Grid</option>
<option value="list">List</option>
</select>
<br>
<button style="float: right" class="btn btn-primary" onclick="updateSettings()">Update settings</button>
</div>
</div>
</div>
</div>
<div class="modal" id="pathTreeModal" tabindex="-1" role="dialog" aria-labelledby="modal-title" aria-hidden="true">
<div class="modal-dialog modal-lg" role="document">
<div class="modal-content">
<div class="modal-header">
<h5 class="modal-title">Select path</h5>
<button type="button" class="close" data-dismiss="modal" aria-label="Close">
<span aria-hidden="true">&times;</span>
</button>
</div>
<div class="modal-body">
<div id="pathTree" class="tree"></div>
</div>
</div>
</div>
</div>
<div id="searchResults"></div>
</div>