mirror of
https://github.com/simon987/sist2.git
synced 2025-04-04 07:52:59 +00:00
wip
This commit is contained in:
parent
f56cfb0f2f
commit
27188b6fa0
@ -63,7 +63,7 @@ add_executable(
|
||||
src/database/database_schema.c
|
||||
src/database/database_fts.c
|
||||
src/web/web_fts.c
|
||||
)
|
||||
src/database/database_embeddings.c)
|
||||
set_target_properties(sist2 PROPERTIES LINKER_LANGUAGE C)
|
||||
|
||||
target_link_directories(sist2 PRIVATE BEFORE ${_VCPKG_INSTALLED_DIR}/${VCPKG_TARGET_TRIPLET}/lib/)
|
||||
@ -76,6 +76,7 @@ find_package(unofficial-mongoose CONFIG REQUIRED)
|
||||
find_package(CURL CONFIG REQUIRED)
|
||||
find_library(MAGIC_LIB NAMES libmagic.a REQUIRED)
|
||||
find_package(unofficial-sqlite3 CONFIG REQUIRED)
|
||||
find_package(OpenBLAS CONFIG REQUIRED)
|
||||
|
||||
|
||||
target_include_directories(
|
||||
@ -158,6 +159,7 @@ target_link_libraries(
|
||||
|
||||
${MAGIC_LIB}
|
||||
unofficial::sqlite3::sqlite3
|
||||
OpenBLAS::OpenBLAS
|
||||
)
|
||||
|
||||
add_custom_target(
|
||||
|
249
sist2-vue/package-lock.json
generated
249
sist2-vue/package-lock.json
generated
@ -19,6 +19,7 @@
|
||||
"dom-to-image": "^2.6.0",
|
||||
"fslightbox-vue": "fslightbox-vue.tgz",
|
||||
"nouislider": "^15.2.0",
|
||||
"onnxruntime-web": "^1.15.1",
|
||||
"underscore": "^1.13.1",
|
||||
"vue": "^2.6.12",
|
||||
"vue-color": "^2.8.1",
|
||||
@ -30,6 +31,7 @@
|
||||
},
|
||||
"devDependencies": {
|
||||
"@babel/polyfill": "^7.12.1",
|
||||
"@types/underscore": "^1.11.6",
|
||||
"@vue/cli-plugin-babel": "~5.0.8",
|
||||
"@vue/cli-plugin-router": "~5.0.8",
|
||||
"@vue/cli-plugin-typescript": "^5.0.8",
|
||||
@ -1956,6 +1958,60 @@
|
||||
"integrity": "sha512-a5Sab1C4/icpTZVzZc5Ghpz88yQtGOyNqYXcZgOssB2uuAr+wF/MvN6bgtW32q7HHrvBki+BsZ0OuNv6EV3K9g==",
|
||||
"dev": true
|
||||
},
|
||||
"node_modules/@protobufjs/aspromise": {
|
||||
"version": "1.1.2",
|
||||
"resolved": "https://registry.npmjs.org/@protobufjs/aspromise/-/aspromise-1.1.2.tgz",
|
||||
"integrity": "sha512-j+gKExEuLmKwvz3OgROXtrJ2UG2x8Ch2YZUxahh+s1F2HZ+wAceUNLkvy6zKCPVRkU++ZWQrdxsUeQXmcg4uoQ=="
|
||||
},
|
||||
"node_modules/@protobufjs/base64": {
|
||||
"version": "1.1.2",
|
||||
"resolved": "https://registry.npmjs.org/@protobufjs/base64/-/base64-1.1.2.tgz",
|
||||
"integrity": "sha512-AZkcAA5vnN/v4PDqKyMR5lx7hZttPDgClv83E//FMNhR2TMcLUhfRUBHCmSl0oi9zMgDDqRUJkSxO3wm85+XLg=="
|
||||
},
|
||||
"node_modules/@protobufjs/codegen": {
|
||||
"version": "2.0.4",
|
||||
"resolved": "https://registry.npmjs.org/@protobufjs/codegen/-/codegen-2.0.4.tgz",
|
||||
"integrity": "sha512-YyFaikqM5sH0ziFZCN3xDC7zeGaB/d0IUb9CATugHWbd1FRFwWwt4ld4OYMPWu5a3Xe01mGAULCdqhMlPl29Jg=="
|
||||
},
|
||||
"node_modules/@protobufjs/eventemitter": {
|
||||
"version": "1.1.0",
|
||||
"resolved": "https://registry.npmjs.org/@protobufjs/eventemitter/-/eventemitter-1.1.0.tgz",
|
||||
"integrity": "sha512-j9ednRT81vYJ9OfVuXG6ERSTdEL1xVsNgqpkxMsbIabzSo3goCjDIveeGv5d03om39ML71RdmrGNjG5SReBP/Q=="
|
||||
},
|
||||
"node_modules/@protobufjs/fetch": {
|
||||
"version": "1.1.0",
|
||||
"resolved": "https://registry.npmjs.org/@protobufjs/fetch/-/fetch-1.1.0.tgz",
|
||||
"integrity": "sha512-lljVXpqXebpsijW71PZaCYeIcE5on1w5DlQy5WH6GLbFryLUrBD4932W/E2BSpfRJWseIL4v/KPgBFxDOIdKpQ==",
|
||||
"dependencies": {
|
||||
"@protobufjs/aspromise": "^1.1.1",
|
||||
"@protobufjs/inquire": "^1.1.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@protobufjs/float": {
|
||||
"version": "1.0.2",
|
||||
"resolved": "https://registry.npmjs.org/@protobufjs/float/-/float-1.0.2.tgz",
|
||||
"integrity": "sha512-Ddb+kVXlXst9d+R9PfTIxh1EdNkgoRe5tOX6t01f1lYWOvJnSPDBlG241QLzcyPdoNTsblLUdujGSE4RzrTZGQ=="
|
||||
},
|
||||
"node_modules/@protobufjs/inquire": {
|
||||
"version": "1.1.0",
|
||||
"resolved": "https://registry.npmjs.org/@protobufjs/inquire/-/inquire-1.1.0.tgz",
|
||||
"integrity": "sha512-kdSefcPdruJiFMVSbn801t4vFK7KB/5gd2fYvrxhuJYg8ILrmn9SKSX2tZdV6V+ksulWqS7aXjBcRXl3wHoD9Q=="
|
||||
},
|
||||
"node_modules/@protobufjs/path": {
|
||||
"version": "1.1.2",
|
||||
"resolved": "https://registry.npmjs.org/@protobufjs/path/-/path-1.1.2.tgz",
|
||||
"integrity": "sha512-6JOcJ5Tm08dOHAbdR3GrvP+yUUfkjG5ePsHYczMFLq3ZmMkAD98cDgcT2iA1lJ9NVwFd4tH/iSSoe44YWkltEA=="
|
||||
},
|
||||
"node_modules/@protobufjs/pool": {
|
||||
"version": "1.1.0",
|
||||
"resolved": "https://registry.npmjs.org/@protobufjs/pool/-/pool-1.1.0.tgz",
|
||||
"integrity": "sha512-0kELaGSIDBKvcgS4zkjz1PeddatrjYcmMWOlAuAPwAeccUrPHdUqo/J6LiymHHEiJT5NrF1UVwxY14f+fy4WQw=="
|
||||
},
|
||||
"node_modules/@protobufjs/utf8": {
|
||||
"version": "1.1.0",
|
||||
"resolved": "https://registry.npmjs.org/@protobufjs/utf8/-/utf8-1.1.0.tgz",
|
||||
"integrity": "sha512-Vvn3zZrhQZkkBE8LSuW3em98c0FwgO4nxzv6OdSxPKJIEKY2bGbHn+mhGIPerzI4twdxaP8/0+06HBpwf345Lw=="
|
||||
},
|
||||
"node_modules/@sideway/address": {
|
||||
"version": "4.1.4",
|
||||
"resolved": "https://registry.npmjs.org/@sideway/address/-/address-4.1.4.tgz",
|
||||
@ -2488,6 +2544,12 @@
|
||||
"@types/node": "*"
|
||||
}
|
||||
},
|
||||
"node_modules/@types/underscore": {
|
||||
"version": "1.11.6",
|
||||
"resolved": "https://registry.npmjs.org/@types/underscore/-/underscore-1.11.6.tgz",
|
||||
"integrity": "sha512-G2oC64I/sR817KDL2b2Mc7+diXyxcibyUeLMyexU4K/sG8hyt/YMlbBK0TVhx/YQ1ehfzgXhLuq2YQHIL4bXUQ==",
|
||||
"dev": true
|
||||
},
|
||||
"node_modules/@types/webgl-ext": {
|
||||
"version": "0.0.30",
|
||||
"resolved": "https://registry.npmjs.org/@types/webgl-ext/-/webgl-ext-0.0.30.tgz",
|
||||
@ -6563,6 +6625,11 @@
|
||||
"node": ">=8"
|
||||
}
|
||||
},
|
||||
"node_modules/flatbuffers": {
|
||||
"version": "1.12.0",
|
||||
"resolved": "https://registry.npmjs.org/flatbuffers/-/flatbuffers-1.12.0.tgz",
|
||||
"integrity": "sha512-c7CZADjRcl6j0PlvFy0ZqXQ67qSEZfrVPynmnL+2zPc+NtMvrF8Y0QceMo7QqnSPc7+uWjUIAbvCQ5WIKlMVdQ=="
|
||||
},
|
||||
"node_modules/follow-redirects": {
|
||||
"version": "1.15.2",
|
||||
"resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.2.tgz",
|
||||
@ -6951,6 +7018,11 @@
|
||||
"integrity": "sha512-9ByhssR2fPVsNZj478qUUbKfmL0+t5BDVyjShtyZZLiK7ZDAArFFfopyOTj0M05wE2tJPisA4iTnnXl2YoPvOA==",
|
||||
"dev": true
|
||||
},
|
||||
"node_modules/guid-typescript": {
|
||||
"version": "1.0.9",
|
||||
"resolved": "https://registry.npmjs.org/guid-typescript/-/guid-typescript-1.0.9.tgz",
|
||||
"integrity": "sha512-Y8T4vYhEfwJOTbouREvG+3XDsjr8E3kIr7uf+JZ0BYloFsttiHU0WfvANVsR7TxNUJa/WpCnw/Ino/p+DeBhBQ=="
|
||||
},
|
||||
"node_modules/gzip-size": {
|
||||
"version": "6.0.0",
|
||||
"resolved": "https://registry.npmjs.org/gzip-size/-/gzip-size-6.0.0.tgz",
|
||||
@ -8591,6 +8663,32 @@
|
||||
"node": ">=4"
|
||||
}
|
||||
},
|
||||
"node_modules/onnx-proto": {
|
||||
"version": "4.0.4",
|
||||
"resolved": "https://registry.npmjs.org/onnx-proto/-/onnx-proto-4.0.4.tgz",
|
||||
"integrity": "sha512-aldMOB3HRoo6q/phyB6QRQxSt895HNNw82BNyZ2CMh4bjeKv7g/c+VpAFtJuEMVfYLMbRx61hbuqnKceLeDcDA==",
|
||||
"dependencies": {
|
||||
"protobufjs": "^6.8.8"
|
||||
}
|
||||
},
|
||||
"node_modules/onnxruntime-common": {
|
||||
"version": "1.15.1",
|
||||
"resolved": "https://registry.npmjs.org/onnxruntime-common/-/onnxruntime-common-1.15.1.tgz",
|
||||
"integrity": "sha512-Y89eJ8QmaRsPZPWLaX7mfqhj63ny47rSkQe80hIo+lvBQdrdXYR9VO362xvZulk9DFkCnXmGidprvgJ07bKsIQ=="
|
||||
},
|
||||
"node_modules/onnxruntime-web": {
|
||||
"version": "1.15.1",
|
||||
"resolved": "https://registry.npmjs.org/onnxruntime-web/-/onnxruntime-web-1.15.1.tgz",
|
||||
"integrity": "sha512-Ky4AXFLFyiGRu5KQJdDcbhdNcO0f2ND/8IPmTEwcKKIHpCwH6/Q9UoMpcoFz78lxGvnmmy+FFgA/Bs1HjdM6LA==",
|
||||
"dependencies": {
|
||||
"flatbuffers": "^1.12.0",
|
||||
"guid-typescript": "^1.0.9",
|
||||
"long": "^4.0.0",
|
||||
"onnx-proto": "^4.0.4",
|
||||
"onnxruntime-common": "~1.15.1",
|
||||
"platform": "^1.3.6"
|
||||
}
|
||||
},
|
||||
"node_modules/open": {
|
||||
"version": "8.4.0",
|
||||
"resolved": "https://registry.npmjs.org/open/-/open-8.4.0.tgz",
|
||||
@ -8972,6 +9070,11 @@
|
||||
"node": ">=8"
|
||||
}
|
||||
},
|
||||
"node_modules/platform": {
|
||||
"version": "1.3.6",
|
||||
"resolved": "https://registry.npmjs.org/platform/-/platform-1.3.6.tgz",
|
||||
"integrity": "sha512-fnWVljUchTro6RiCFvCXBbNhJc2NijN7oIQxbwsyL0buWJPG85v81ehlHI9fXrJsMNgTofEoWIQeClKpgxFLrg=="
|
||||
},
|
||||
"node_modules/popper.js": {
|
||||
"version": "1.16.1",
|
||||
"resolved": "https://registry.npmjs.org/popper.js/-/popper.js-1.16.1.tgz",
|
||||
@ -9593,6 +9696,31 @@
|
||||
"integrity": "sha1-IS1b/hMYMGpCD2QCuOJv85ZHqEk=",
|
||||
"dev": true
|
||||
},
|
||||
"node_modules/protobufjs": {
|
||||
"version": "6.11.3",
|
||||
"resolved": "https://registry.npmjs.org/protobufjs/-/protobufjs-6.11.3.tgz",
|
||||
"integrity": "sha512-xL96WDdCZYdU7Slin569tFX712BxsxslWwAfAhCYjQKGTq7dAU91Lomy6nLLhh/dyGhk/YH4TwTSRxTzhuHyZg==",
|
||||
"hasInstallScript": true,
|
||||
"dependencies": {
|
||||
"@protobufjs/aspromise": "^1.1.2",
|
||||
"@protobufjs/base64": "^1.1.2",
|
||||
"@protobufjs/codegen": "^2.0.4",
|
||||
"@protobufjs/eventemitter": "^1.1.0",
|
||||
"@protobufjs/fetch": "^1.1.0",
|
||||
"@protobufjs/float": "^1.0.2",
|
||||
"@protobufjs/inquire": "^1.1.0",
|
||||
"@protobufjs/path": "^1.1.2",
|
||||
"@protobufjs/pool": "^1.1.0",
|
||||
"@protobufjs/utf8": "^1.1.0",
|
||||
"@types/long": "^4.0.1",
|
||||
"@types/node": ">=13.7.0",
|
||||
"long": "^4.0.0"
|
||||
},
|
||||
"bin": {
|
||||
"pbjs": "bin/pbjs",
|
||||
"pbts": "bin/pbts"
|
||||
}
|
||||
},
|
||||
"node_modules/proxy-addr": {
|
||||
"version": "2.0.7",
|
||||
"resolved": "https://registry.npmjs.org/proxy-addr/-/proxy-addr-2.0.7.tgz",
|
||||
@ -13273,6 +13401,60 @@
|
||||
"integrity": "sha512-a5Sab1C4/icpTZVzZc5Ghpz88yQtGOyNqYXcZgOssB2uuAr+wF/MvN6bgtW32q7HHrvBki+BsZ0OuNv6EV3K9g==",
|
||||
"dev": true
|
||||
},
|
||||
"@protobufjs/aspromise": {
|
||||
"version": "1.1.2",
|
||||
"resolved": "https://registry.npmjs.org/@protobufjs/aspromise/-/aspromise-1.1.2.tgz",
|
||||
"integrity": "sha512-j+gKExEuLmKwvz3OgROXtrJ2UG2x8Ch2YZUxahh+s1F2HZ+wAceUNLkvy6zKCPVRkU++ZWQrdxsUeQXmcg4uoQ=="
|
||||
},
|
||||
"@protobufjs/base64": {
|
||||
"version": "1.1.2",
|
||||
"resolved": "https://registry.npmjs.org/@protobufjs/base64/-/base64-1.1.2.tgz",
|
||||
"integrity": "sha512-AZkcAA5vnN/v4PDqKyMR5lx7hZttPDgClv83E//FMNhR2TMcLUhfRUBHCmSl0oi9zMgDDqRUJkSxO3wm85+XLg=="
|
||||
},
|
||||
"@protobufjs/codegen": {
|
||||
"version": "2.0.4",
|
||||
"resolved": "https://registry.npmjs.org/@protobufjs/codegen/-/codegen-2.0.4.tgz",
|
||||
"integrity": "sha512-YyFaikqM5sH0ziFZCN3xDC7zeGaB/d0IUb9CATugHWbd1FRFwWwt4ld4OYMPWu5a3Xe01mGAULCdqhMlPl29Jg=="
|
||||
},
|
||||
"@protobufjs/eventemitter": {
|
||||
"version": "1.1.0",
|
||||
"resolved": "https://registry.npmjs.org/@protobufjs/eventemitter/-/eventemitter-1.1.0.tgz",
|
||||
"integrity": "sha512-j9ednRT81vYJ9OfVuXG6ERSTdEL1xVsNgqpkxMsbIabzSo3goCjDIveeGv5d03om39ML71RdmrGNjG5SReBP/Q=="
|
||||
},
|
||||
"@protobufjs/fetch": {
|
||||
"version": "1.1.0",
|
||||
"resolved": "https://registry.npmjs.org/@protobufjs/fetch/-/fetch-1.1.0.tgz",
|
||||
"integrity": "sha512-lljVXpqXebpsijW71PZaCYeIcE5on1w5DlQy5WH6GLbFryLUrBD4932W/E2BSpfRJWseIL4v/KPgBFxDOIdKpQ==",
|
||||
"requires": {
|
||||
"@protobufjs/aspromise": "^1.1.1",
|
||||
"@protobufjs/inquire": "^1.1.0"
|
||||
}
|
||||
},
|
||||
"@protobufjs/float": {
|
||||
"version": "1.0.2",
|
||||
"resolved": "https://registry.npmjs.org/@protobufjs/float/-/float-1.0.2.tgz",
|
||||
"integrity": "sha512-Ddb+kVXlXst9d+R9PfTIxh1EdNkgoRe5tOX6t01f1lYWOvJnSPDBlG241QLzcyPdoNTsblLUdujGSE4RzrTZGQ=="
|
||||
},
|
||||
"@protobufjs/inquire": {
|
||||
"version": "1.1.0",
|
||||
"resolved": "https://registry.npmjs.org/@protobufjs/inquire/-/inquire-1.1.0.tgz",
|
||||
"integrity": "sha512-kdSefcPdruJiFMVSbn801t4vFK7KB/5gd2fYvrxhuJYg8ILrmn9SKSX2tZdV6V+ksulWqS7aXjBcRXl3wHoD9Q=="
|
||||
},
|
||||
"@protobufjs/path": {
|
||||
"version": "1.1.2",
|
||||
"resolved": "https://registry.npmjs.org/@protobufjs/path/-/path-1.1.2.tgz",
|
||||
"integrity": "sha512-6JOcJ5Tm08dOHAbdR3GrvP+yUUfkjG5ePsHYczMFLq3ZmMkAD98cDgcT2iA1lJ9NVwFd4tH/iSSoe44YWkltEA=="
|
||||
},
|
||||
"@protobufjs/pool": {
|
||||
"version": "1.1.0",
|
||||
"resolved": "https://registry.npmjs.org/@protobufjs/pool/-/pool-1.1.0.tgz",
|
||||
"integrity": "sha512-0kELaGSIDBKvcgS4zkjz1PeddatrjYcmMWOlAuAPwAeccUrPHdUqo/J6LiymHHEiJT5NrF1UVwxY14f+fy4WQw=="
|
||||
},
|
||||
"@protobufjs/utf8": {
|
||||
"version": "1.1.0",
|
||||
"resolved": "https://registry.npmjs.org/@protobufjs/utf8/-/utf8-1.1.0.tgz",
|
||||
"integrity": "sha512-Vvn3zZrhQZkkBE8LSuW3em98c0FwgO4nxzv6OdSxPKJIEKY2bGbHn+mhGIPerzI4twdxaP8/0+06HBpwf345Lw=="
|
||||
},
|
||||
"@sideway/address": {
|
||||
"version": "4.1.4",
|
||||
"resolved": "https://registry.npmjs.org/@sideway/address/-/address-4.1.4.tgz",
|
||||
@ -13727,6 +13909,12 @@
|
||||
"@types/node": "*"
|
||||
}
|
||||
},
|
||||
"@types/underscore": {
|
||||
"version": "1.11.6",
|
||||
"resolved": "https://registry.npmjs.org/@types/underscore/-/underscore-1.11.6.tgz",
|
||||
"integrity": "sha512-G2oC64I/sR817KDL2b2Mc7+diXyxcibyUeLMyexU4K/sG8hyt/YMlbBK0TVhx/YQ1ehfzgXhLuq2YQHIL4bXUQ==",
|
||||
"dev": true
|
||||
},
|
||||
"@types/webgl-ext": {
|
||||
"version": "0.0.30",
|
||||
"resolved": "https://registry.npmjs.org/@types/webgl-ext/-/webgl-ext-0.0.30.tgz",
|
||||
@ -16894,6 +17082,11 @@
|
||||
"path-exists": "^4.0.0"
|
||||
}
|
||||
},
|
||||
"flatbuffers": {
|
||||
"version": "1.12.0",
|
||||
"resolved": "https://registry.npmjs.org/flatbuffers/-/flatbuffers-1.12.0.tgz",
|
||||
"integrity": "sha512-c7CZADjRcl6j0PlvFy0ZqXQ67qSEZfrVPynmnL+2zPc+NtMvrF8Y0QceMo7QqnSPc7+uWjUIAbvCQ5WIKlMVdQ=="
|
||||
},
|
||||
"follow-redirects": {
|
||||
"version": "1.15.2",
|
||||
"resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.2.tgz",
|
||||
@ -17162,6 +17355,11 @@
|
||||
"integrity": "sha512-9ByhssR2fPVsNZj478qUUbKfmL0+t5BDVyjShtyZZLiK7ZDAArFFfopyOTj0M05wE2tJPisA4iTnnXl2YoPvOA==",
|
||||
"dev": true
|
||||
},
|
||||
"guid-typescript": {
|
||||
"version": "1.0.9",
|
||||
"resolved": "https://registry.npmjs.org/guid-typescript/-/guid-typescript-1.0.9.tgz",
|
||||
"integrity": "sha512-Y8T4vYhEfwJOTbouREvG+3XDsjr8E3kIr7uf+JZ0BYloFsttiHU0WfvANVsR7TxNUJa/WpCnw/Ino/p+DeBhBQ=="
|
||||
},
|
||||
"gzip-size": {
|
||||
"version": "6.0.0",
|
||||
"resolved": "https://registry.npmjs.org/gzip-size/-/gzip-size-6.0.0.tgz",
|
||||
@ -18424,6 +18622,32 @@
|
||||
"mimic-fn": "^1.0.0"
|
||||
}
|
||||
},
|
||||
"onnx-proto": {
|
||||
"version": "4.0.4",
|
||||
"resolved": "https://registry.npmjs.org/onnx-proto/-/onnx-proto-4.0.4.tgz",
|
||||
"integrity": "sha512-aldMOB3HRoo6q/phyB6QRQxSt895HNNw82BNyZ2CMh4bjeKv7g/c+VpAFtJuEMVfYLMbRx61hbuqnKceLeDcDA==",
|
||||
"requires": {
|
||||
"protobufjs": "^6.8.8"
|
||||
}
|
||||
},
|
||||
"onnxruntime-common": {
|
||||
"version": "1.15.1",
|
||||
"resolved": "https://registry.npmjs.org/onnxruntime-common/-/onnxruntime-common-1.15.1.tgz",
|
||||
"integrity": "sha512-Y89eJ8QmaRsPZPWLaX7mfqhj63ny47rSkQe80hIo+lvBQdrdXYR9VO362xvZulk9DFkCnXmGidprvgJ07bKsIQ=="
|
||||
},
|
||||
"onnxruntime-web": {
|
||||
"version": "1.15.1",
|
||||
"resolved": "https://registry.npmjs.org/onnxruntime-web/-/onnxruntime-web-1.15.1.tgz",
|
||||
"integrity": "sha512-Ky4AXFLFyiGRu5KQJdDcbhdNcO0f2ND/8IPmTEwcKKIHpCwH6/Q9UoMpcoFz78lxGvnmmy+FFgA/Bs1HjdM6LA==",
|
||||
"requires": {
|
||||
"flatbuffers": "^1.12.0",
|
||||
"guid-typescript": "^1.0.9",
|
||||
"long": "^4.0.0",
|
||||
"onnx-proto": "^4.0.4",
|
||||
"onnxruntime-common": "~1.15.1",
|
||||
"platform": "^1.3.6"
|
||||
}
|
||||
},
|
||||
"open": {
|
||||
"version": "8.4.0",
|
||||
"resolved": "https://registry.npmjs.org/open/-/open-8.4.0.tgz",
|
||||
@ -18710,6 +18934,11 @@
|
||||
"find-up": "^4.0.0"
|
||||
}
|
||||
},
|
||||
"platform": {
|
||||
"version": "1.3.6",
|
||||
"resolved": "https://registry.npmjs.org/platform/-/platform-1.3.6.tgz",
|
||||
"integrity": "sha512-fnWVljUchTro6RiCFvCXBbNhJc2NijN7oIQxbwsyL0buWJPG85v81ehlHI9fXrJsMNgTofEoWIQeClKpgxFLrg=="
|
||||
},
|
||||
"popper.js": {
|
||||
"version": "1.16.1",
|
||||
"resolved": "https://registry.npmjs.org/popper.js/-/popper.js-1.16.1.tgz",
|
||||
@ -19118,6 +19347,26 @@
|
||||
"integrity": "sha1-IS1b/hMYMGpCD2QCuOJv85ZHqEk=",
|
||||
"dev": true
|
||||
},
|
||||
"protobufjs": {
|
||||
"version": "6.11.3",
|
||||
"resolved": "https://registry.npmjs.org/protobufjs/-/protobufjs-6.11.3.tgz",
|
||||
"integrity": "sha512-xL96WDdCZYdU7Slin569tFX712BxsxslWwAfAhCYjQKGTq7dAU91Lomy6nLLhh/dyGhk/YH4TwTSRxTzhuHyZg==",
|
||||
"requires": {
|
||||
"@protobufjs/aspromise": "^1.1.2",
|
||||
"@protobufjs/base64": "^1.1.2",
|
||||
"@protobufjs/codegen": "^2.0.4",
|
||||
"@protobufjs/eventemitter": "^1.1.0",
|
||||
"@protobufjs/fetch": "^1.1.0",
|
||||
"@protobufjs/float": "^1.0.2",
|
||||
"@protobufjs/inquire": "^1.1.0",
|
||||
"@protobufjs/path": "^1.1.2",
|
||||
"@protobufjs/pool": "^1.1.0",
|
||||
"@protobufjs/utf8": "^1.1.0",
|
||||
"@types/long": "^4.0.1",
|
||||
"@types/node": ">=13.7.0",
|
||||
"long": "^4.0.0"
|
||||
}
|
||||
},
|
||||
"proxy-addr": {
|
||||
"version": "2.0.7",
|
||||
"resolved": "https://registry.npmjs.org/proxy-addr/-/proxy-addr-2.0.7.tgz",
|
||||
|
@ -18,6 +18,7 @@
|
||||
"dom-to-image": "^2.6.0",
|
||||
"fslightbox-vue": "fslightbox-vue.tgz",
|
||||
"nouislider": "^15.2.0",
|
||||
"onnxruntime-web": "^1.15.1",
|
||||
"underscore": "^1.13.1",
|
||||
"vue": "^2.6.12",
|
||||
"vue-color": "^2.8.1",
|
||||
@ -29,6 +30,7 @@
|
||||
},
|
||||
"devDependencies": {
|
||||
"@babel/polyfill": "^7.12.1",
|
||||
"@types/underscore": "^1.11.6",
|
||||
"@vue/cli-plugin-babel": "~5.0.8",
|
||||
"@vue/cli-plugin-router": "~5.0.8",
|
||||
"@vue/cli-plugin-typescript": "^5.0.8",
|
||||
|
@ -103,6 +103,16 @@ class Sist2ElasticsearchQuery {
|
||||
q["highlightContextSize"] = Number(getters.optFragmentSize);
|
||||
}
|
||||
|
||||
if (getters.embeddingText) {
|
||||
q["model"] = getters.embeddingsModel;
|
||||
q["embedding"] = getters.embedding;
|
||||
q["sort"] = "embedding";
|
||||
q["sortAsc"] = false;
|
||||
} else if (getters.sortMode == "embedding") {
|
||||
q["sort"] = "sort"
|
||||
q["sortAsc"] = true;
|
||||
}
|
||||
|
||||
return q;
|
||||
}
|
||||
}
|
||||
|
@ -12,7 +12,7 @@ export default {
|
||||
props: ["span", "text"],
|
||||
methods: {
|
||||
getStyle() {
|
||||
return ModelsRepo.data[this.$store.getters.mlModel.name].labelStyles[this.span.label];
|
||||
return ModelsRepo.data[this.$store.getters.nerModel.name].labelStyles[this.span.label];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -22,7 +22,7 @@ export default {
|
||||
props: ["spans", "text"],
|
||||
computed: {
|
||||
legend() {
|
||||
return Object.entries(ModelsRepo.data[this.$store.state.mlModel.name].legend)
|
||||
return Object.entries(ModelsRepo.data[this.$store.state.nerModel.name].legend)
|
||||
.map(([label, name]) => ({
|
||||
text: name,
|
||||
id: label,
|
||||
|
99
sist2-vue/src/components/EmbeddingsSearchBar.vue
Normal file
99
sist2-vue/src/components/EmbeddingsSearchBar.vue
Normal file
@ -0,0 +1,99 @@
|
||||
<template>
|
||||
<div>
|
||||
<b-progress v-if="modelLoading" :value="modelLoadingProgress" max="1" class="mb-1" variant="warning"
|
||||
show-progress>
|
||||
</b-progress>
|
||||
<b-input-group>
|
||||
<b-form-input :value="embeddingText"
|
||||
:placeholder="$t('embeddingsSearchPlaceholder')"
|
||||
@input="onInput($event)"
|
||||
:disabled="modelLoading"
|
||||
></b-form-input>
|
||||
|
||||
<!-- TODO: dropdown of available models-->
|
||||
<!-- <template #prepend>-->
|
||||
<!-- <b-input-group-text>-->
|
||||
<!-- <b-form-checkbox :checked="fuzzy" title="Toggle fuzzy searching" @change="setFuzzy($event)">-->
|
||||
<!-- {{ $t("searchBar.fuzzy") }}-->
|
||||
<!-- </b-form-checkbox>-->
|
||||
<!-- </b-input-group-text>-->
|
||||
<!-- </template>-->
|
||||
<template #append>
|
||||
<b-input-group-text>
|
||||
<MLIcon></MLIcon>
|
||||
</b-input-group-text>
|
||||
</template>
|
||||
</b-input-group>
|
||||
</div>
|
||||
</template>
|
||||
|
||||
<script>
|
||||
import {mapGetters, mapMutations} from "vuex";
|
||||
import {CLIPTransformerModel} from "@/ml/CLIPTransformerModel"
|
||||
import _debounce from "lodash/debounce";
|
||||
import MLIcon from "@/components/icons/MlIcon.vue";
|
||||
|
||||
export default {
|
||||
components: {MLIcon},
|
||||
data() {
|
||||
return {
|
||||
modelLoading: false,
|
||||
modelLoadingProgress: 0,
|
||||
modelLoaded: false,
|
||||
model: null
|
||||
}
|
||||
},
|
||||
computed: {
|
||||
...mapGetters({
|
||||
optQueryMode: "optQueryMode",
|
||||
embeddingText: "embeddingText",
|
||||
fuzzy: "fuzzy",
|
||||
}),
|
||||
},
|
||||
mounted() {
|
||||
this.onInput = _debounce(this._onInput, 300, {leading: false});
|
||||
},
|
||||
methods: {
|
||||
...mapMutations({
|
||||
setEmbeddingText: "setEmbeddingText",
|
||||
setEmbedding: "setEmbedding",
|
||||
setEmbeddingModel: "setEmbeddingsModel",
|
||||
}),
|
||||
async loadModel() {
|
||||
this.modelLoading = true;
|
||||
this.model = new CLIPTransformerModel(
|
||||
// TODO: add a config for this (?)
|
||||
"https://github.com/simon987/sist2-models/raw/main/clip/models/clip-vit-base-patch32-q8.onnx",
|
||||
"https://github.com/simon987/sist2-models/raw/main/clip/models/tokenizer.json",
|
||||
);
|
||||
|
||||
await this.model.init(async progress => {
|
||||
this.modelLoadingProgress = progress;
|
||||
});
|
||||
this.modelLoading = false;
|
||||
this.modelLoaded = true;
|
||||
},
|
||||
async _onInput(text) {
|
||||
if (!this.modelLoaded) {
|
||||
await this.loadModel();
|
||||
this.setEmbeddingModel(1); // TODO
|
||||
}
|
||||
|
||||
if (text.length === 0) {
|
||||
this.setEmbeddingText("");
|
||||
this.setEmbedding(null);
|
||||
return;
|
||||
}
|
||||
|
||||
const embeddings = await this.model.predict(text);
|
||||
|
||||
this.setEmbeddingText(text);
|
||||
this.setEmbedding(embeddings);
|
||||
},
|
||||
mounted() {
|
||||
}
|
||||
}
|
||||
}
|
||||
</script>
|
||||
<style>
|
||||
</style>
|
@ -9,7 +9,7 @@
|
||||
<b-button :disabled="mlPredictionsLoading || mlLoading" @click="mlAnalyze" variant="primary"
|
||||
>{{ $t("ml.analyzeText") }}
|
||||
</b-button>
|
||||
<b-select :disabled="mlPredictionsLoading || mlLoading" class="ml-2" v-model="mlModel">
|
||||
<b-select :disabled="mlPredictionsLoading || mlLoading" class="ml-2" v-model="nerModel">
|
||||
<b-select-option :value="opt.value" v-for="opt of ModelsRepo.getOptions()">{{ opt.text }}
|
||||
</b-select-option>
|
||||
</b-select>
|
||||
@ -57,16 +57,16 @@ export default {
|
||||
modelPredictionProgress: 0,
|
||||
mlPredictionsLoading: false,
|
||||
mlLoading: false,
|
||||
mlModel: null,
|
||||
nerModel: null,
|
||||
analyzedContentSpans: []
|
||||
}
|
||||
},
|
||||
mounted() {
|
||||
|
||||
if (this.$store.getters.optMlDefaultModel) {
|
||||
this.mlModel = this.$store.getters.optMlDefaultModel
|
||||
this.nerModel = this.$store.getters.optMlDefaultModel
|
||||
} else {
|
||||
this.mlModel = ModelsRepo.getDefaultModel();
|
||||
this.nerModel = ModelsRepo.getDefaultModel();
|
||||
}
|
||||
|
||||
Sist2Api
|
||||
@ -86,7 +86,7 @@ export default {
|
||||
computed: {
|
||||
...mapGetters(["optAutoAnalyze"]),
|
||||
modelSize() {
|
||||
const modelData = ModelsRepo.data[this.mlModel];
|
||||
const modelData = ModelsRepo.data[this.nerModel];
|
||||
if (!modelData) {
|
||||
return 0;
|
||||
}
|
||||
@ -110,10 +110,10 @@ export default {
|
||||
}
|
||||
},
|
||||
async getMlModel() {
|
||||
if (this.$store.getters.mlModel.name !== this.mlModel) {
|
||||
if (this.$store.getters.nerModel.name !== this.nerModel) {
|
||||
this.mlLoading = true;
|
||||
this.modelLoadingProgress = 0;
|
||||
const modelInfo = ModelsRepo.data[this.mlModel];
|
||||
const modelInfo = ModelsRepo.data[this.nerModel];
|
||||
|
||||
const model = new BertNerModel(
|
||||
modelInfo.vocabUrl,
|
||||
@ -122,25 +122,25 @@ export default {
|
||||
)
|
||||
|
||||
await model.init(progress => this.modelLoadingProgress = progress);
|
||||
this.$store.commit("setMlModel", {model, name: this.mlModel});
|
||||
this.$store.commit("setNerModel", {model, name: this.nerModel});
|
||||
|
||||
this.mlLoading = false;
|
||||
return model
|
||||
}
|
||||
|
||||
return this.$store.getters.mlModel.model;
|
||||
return this.$store.getters.nerModel.model;
|
||||
},
|
||||
async mlAnalyze() {
|
||||
if (!this.content) {
|
||||
return;
|
||||
}
|
||||
|
||||
const modelInfo = ModelsRepo.data[this.mlModel];
|
||||
const modelInfo = ModelsRepo.data[this.nerModel];
|
||||
if (modelInfo === undefined) {
|
||||
return;
|
||||
}
|
||||
|
||||
this.$store.commit("setOptMlDefaultModel", this.mlModel);
|
||||
this.$store.commit("setOptMlDefaultModel", this.nerModel);
|
||||
await this.$store.dispatch("updateConfiguration");
|
||||
|
||||
const model = await this.getMlModel();
|
||||
|
@ -1,5 +1,5 @@
|
||||
<template>
|
||||
<b-dropdown variant="primary">
|
||||
<b-dropdown variant="primary" :disabled="$store.getters.embeddingText !== ''">
|
||||
<b-dropdown-item :class="{'dropdown-active': sort === 'score'}" @click="onSelect('score')">{{
|
||||
$t("sort.relevance")
|
||||
}}
|
||||
|
@ -210,4 +210,8 @@ export default {
|
||||
.theme-black .inspire-tree .matched > .wholerow {
|
||||
background: rgba(251, 191, 41, 0.25);
|
||||
}
|
||||
#tagTree {
|
||||
max-height: 350px;
|
||||
overflow: auto;
|
||||
}
|
||||
</style>
|
50
sist2-vue/src/components/icons/MlIcon.vue
Normal file
50
sist2-vue/src/components/icons/MlIcon.vue
Normal file
@ -0,0 +1,50 @@
|
||||
<template>
|
||||
<svg height="20px" width="20px" xmlns="http://www.w3.org/2000/svg"
|
||||
viewBox="0 0 512 512" xml:space="preserve">
|
||||
<g>
|
||||
<path class="st0" d="M167.314,14.993C167.314,6.712,160.602,0,152.332,0h-5.514c-8.27,0-14.982,6.712-14.982,14.993v41.466h35.478
|
||||
V14.993z"/>
|
||||
<path class="st0"
|
||||
d="M238.26,14.993C238.26,6.712,231.549,0,223.278,0h-5.504c-8.271,0-14.982,6.712-14.982,14.993v41.466h35.468 V14.993z"/>
|
||||
<path class="st0"
|
||||
d="M309.207,14.993C309.207,6.712,302.496,0,294.225,0h-5.504c-8.271,0-14.982,6.712-14.982,14.993v41.466h35.468 V14.993z"/>
|
||||
<path class="st0"
|
||||
d="M380.164,14.993C380.164,6.712,373.453,0,365.182,0h-5.514c-8.27,0-14.982,6.712-14.982,14.993v41.466h35.478 V14.993z"/>
|
||||
<path class="st0"
|
||||
d="M131.836,497.007c0,8.282,6.712,14.993,14.982,14.993h5.514c8.27,0,14.982-6.711,14.982-14.993V455.55h-35.478 V497.007z"/>
|
||||
<path class="st0"
|
||||
d="M202.792,497.007c0,8.282,6.712,14.993,14.982,14.993h5.504c8.27,0,14.982-6.711,14.982-14.993V455.55h-35.468 V497.007z"/>
|
||||
<path class="st0"
|
||||
d="M273.739,497.007c0,8.282,6.712,14.993,14.982,14.993h5.504c8.271,0,14.982-6.711,14.982-14.993V455.55 h-35.468V497.007z"/>
|
||||
<path class="st0"
|
||||
d="M344.686,497.007c0,8.282,6.712,14.993,14.982,14.993h5.514c8.271,0,14.982-6.711,14.982-14.993V455.55 h-35.478V497.007z"/>
|
||||
<path class="st0"
|
||||
d="M497.018,131.836H455.55v35.479h41.468c8.27,0,14.982-6.712,14.982-14.993v-5.493 C512,138.548,505.288,131.836,497.018,131.836z"/>
|
||||
<path class="st0"
|
||||
d="M497.018,202.793H455.55v35.468h41.468c8.27,0,14.982-6.712,14.982-14.982v-5.494 C512,209.504,505.288,202.793,497.018,202.793z"/>
|
||||
<path class="st0"
|
||||
d="M497.018,273.739H455.55v35.468h41.468c8.27,0,14.982-6.711,14.982-14.992v-5.494 C512,280.451,505.288,273.739,497.018,273.739z"/>
|
||||
<path class="st0"
|
||||
d="M497.018,344.686H455.55v35.479h41.468c8.27,0,14.982-6.712,14.982-14.993v-5.493 C512,351.398,505.288,344.686,497.018,344.686z"/>
|
||||
<path class="st0"
|
||||
d="M0,146.828v5.493c0,8.281,6.711,14.993,14.982,14.993H56.46v-35.479H14.982C6.711,131.836,0,138.548,0,146.828 z"/>
|
||||
<path class="st0"
|
||||
d="M0,217.785v5.494c0,8.27,6.711,14.982,14.982,14.982H56.46v-35.468H14.982C6.711,202.793,0,209.504,0,217.785z "/>
|
||||
<path class="st0"
|
||||
d="M0,288.721v5.494c0,8.281,6.711,14.992,14.982,14.992H56.46v-35.468H14.982C6.711,273.739,0,280.451,0,288.721 z"/>
|
||||
<path class="st0"
|
||||
d="M0,359.679v5.493c0,8.281,6.711,14.993,14.982,14.993H56.46v-35.479H14.982C6.711,344.686,0,351.398,0,359.679 z"/>
|
||||
<path class="st0"
|
||||
d="M78.628,433.382h354.753V78.628H78.628V433.382z M376.56,120.2c9.18,0,16.635,7.445,16.635,16.634 c0,9.18-7.455,16.624-16.635,16.624c-9.179,0-16.624-7.445-16.624-16.624C359.936,127.644,367.381,120.2,376.56,120.2z M376.56,361.32c9.18,0,16.635,7.445,16.635,16.635c0,9.179-7.455,16.623-16.635,16.623c-9.179,0-16.624-7.444-16.624-16.623 C359.936,368.764,367.381,361.32,376.56,361.32z M184.362,184.362h143.287v143.287H184.362V184.362z M135.439,120.2 c9.19,0,16.635,7.445,16.635,16.634c0,9.169-7.445,16.624-16.635,16.624c-9.178,0-16.623-7.455-16.623-16.624 C118.816,127.644,126.26,120.2,135.439,120.2z M135.439,361.32c9.19,0,16.635,7.445,16.635,16.635 c0,9.169-7.445,16.623-16.635,16.623c-9.178,0-16.623-7.454-16.623-16.623C118.816,368.764,126.26,361.32,135.439,361.32z"/>
|
||||
</g>
|
||||
</svg>
|
||||
</template>
|
||||
|
||||
<script>
|
||||
export default {
|
||||
name: "MLIcon"
|
||||
}
|
||||
</script>
|
||||
|
||||
<style scoped>
|
||||
</style>
|
@ -18,6 +18,7 @@ export default {
|
||||
tags: "Tags",
|
||||
tagFilter: "Filter tags",
|
||||
forExample: "For example:",
|
||||
embeddingsSearchPlaceholder: "Embeddings search",
|
||||
help: {
|
||||
simpleSearch: "Simple search",
|
||||
advancedSearch: "Advanced search",
|
||||
|
118
sist2-vue/src/ml/BPETokenizer.js
Normal file
118
sist2-vue/src/ml/BPETokenizer.js
Normal file
@ -0,0 +1,118 @@
|
||||
const inf = Number.POSITIVE_INFINITY;
|
||||
const START_TOK = 49406;
|
||||
const END_TOK = 49407;
|
||||
|
||||
function min(array, key) {
|
||||
return array
|
||||
.reduce((a, b) => (key(a, b) ? b : a))
|
||||
}
|
||||
|
||||
class TupleSet extends Set {
|
||||
add(elem) {
|
||||
return super.add(elem.join("`"));
|
||||
}
|
||||
|
||||
has(elem) {
|
||||
return super.has(elem.join("`"));
|
||||
}
|
||||
|
||||
toList() {
|
||||
return [...this].map(x => x.split("`"))
|
||||
}
|
||||
}
|
||||
|
||||
export class BPETokenizer {
|
||||
|
||||
_encoder = null;
|
||||
_bpeRanks = null;
|
||||
|
||||
constructor(encoder, bpeRanks) {
|
||||
this._encoder = encoder;
|
||||
this._bpeRanks = bpeRanks;
|
||||
}
|
||||
|
||||
getPairs(word) {
|
||||
const pairs = new TupleSet();
|
||||
|
||||
let prevChar = word[0];
|
||||
for (let i = 1; i < word.length; i++) {
|
||||
pairs.add([prevChar, word[i]])
|
||||
prevChar = word[i];
|
||||
}
|
||||
|
||||
return pairs.toList();
|
||||
}
|
||||
|
||||
bpe(token) {
|
||||
let word = [...token];
|
||||
word[word.length - 1] += "</w>";
|
||||
let pairs = this.getPairs(word)
|
||||
|
||||
if (pairs.length === 0) {
|
||||
return token + "</w>"
|
||||
}
|
||||
|
||||
while (true) {
|
||||
const bigram = min(pairs, (a, b) => {
|
||||
return (this._bpeRanks[a.join("`")] ?? inf) > (this._bpeRanks[b.join("`") ?? inf])
|
||||
});
|
||||
|
||||
if (this._bpeRanks[bigram.join("`")] === undefined) {
|
||||
break;
|
||||
}
|
||||
|
||||
const [first, second] = bigram;
|
||||
let newWord = [];
|
||||
let i = 0;
|
||||
|
||||
while (i < word.length) {
|
||||
const j = word.indexOf(first, i);
|
||||
if (j === -1) {
|
||||
newWord.push(...word.slice(i));
|
||||
break;
|
||||
} else {
|
||||
newWord.push(...word.slice(i, j));
|
||||
i = j;
|
||||
}
|
||||
|
||||
if (word[i] === first && i < word.length - 1 && word[i + 1] === second) {
|
||||
newWord.push(first + second);
|
||||
i += 2;
|
||||
} else {
|
||||
newWord.push(word[i]);
|
||||
i += 1;
|
||||
}
|
||||
}
|
||||
|
||||
word = [...newWord]
|
||||
if (word.length === 1) {
|
||||
break;
|
||||
} else {
|
||||
pairs = this.getPairs(word);
|
||||
}
|
||||
}
|
||||
|
||||
return word.join(" ");
|
||||
}
|
||||
|
||||
encode(text) {
|
||||
let bpeTokens = [];
|
||||
text = text.trim();
|
||||
text = text.replaceAll(/\s+/g, " ");
|
||||
|
||||
text
|
||||
.match(/<\|startoftext\|>|<\|endoftext\|>|'s|'t|'re|'ve|'m|'ll|'d|[a-zA-Z0-9]+/ig)
|
||||
.forEach(token => {
|
||||
bpeTokens.push(...this.bpe(token).split(" ").map(t => this._encoder[t]));
|
||||
});
|
||||
|
||||
bpeTokens.unshift(START_TOK);
|
||||
bpeTokens = bpeTokens.slice(0, 76);
|
||||
bpeTokens.push(END_TOK);
|
||||
while (bpeTokens.length < 77) {
|
||||
bpeTokens.push(0);
|
||||
}
|
||||
|
||||
return bpeTokens;
|
||||
}
|
||||
}
|
@ -1,6 +1,8 @@
|
||||
import BertTokenizer from "@/ml/BertTokenizer";
|
||||
import * as tf from "@tensorflow/tfjs";
|
||||
import axios from "axios";
|
||||
import {chunk as _chunk} from "underscore";
|
||||
import * as ort from "onnxruntime-web";
|
||||
import {argMax, downloadToBuffer, ORT_WASM_PATHS} from "@/ml/mlUtils";
|
||||
|
||||
export default class BertNerModel {
|
||||
vocabUrl;
|
||||
@ -29,7 +31,10 @@ export default class BertNerModel {
|
||||
}
|
||||
|
||||
async loadModel(onProgress) {
|
||||
this._model = await tf.loadGraphModel(this.modelUrl, {onProgress});
|
||||
ort.env.wasm.wasmPaths = ORT_WASM_PATHS;
|
||||
const buf = await downloadToBuffer(this.modelUrl, onProgress);
|
||||
|
||||
this._model = await ort.InferenceSession.create(buf.buffer, {executionProviders: ["wasm"]});
|
||||
}
|
||||
|
||||
alignLabels(labels, wordIds, words) {
|
||||
@ -57,21 +62,28 @@ export default class BertNerModel {
|
||||
|
||||
async predict(text, callback) {
|
||||
this._previousWordId = null;
|
||||
const encoded = this._tokenizer.encodeText(text, this.inputSize)
|
||||
const encoded = this._tokenizer.encodeText(text, this.inputSize);
|
||||
|
||||
let i = 0;
|
||||
for (let chunk of encoded.inputChunks) {
|
||||
const rawResult = tf.tidy(() => this._model.execute({
|
||||
input_ids: tf.tensor2d(chunk.inputIds, [1, this.inputSize], "int32"),
|
||||
token_type_ids: tf.tensor2d(chunk.segmentIds, [1, this.inputSize], "int32"),
|
||||
attention_mask: tf.tensor2d(chunk.inputMask, [1, this.inputSize], "int32"),
|
||||
}));
|
||||
|
||||
const labelIds = await tf.argMax(rawResult, -1);
|
||||
const labelIdsArray = await labelIds.array();
|
||||
const labels = labelIdsArray[0].map(id => this.id2label[id]);
|
||||
rawResult.dispose()
|
||||
const results = await this._model.run({
|
||||
input_ids: new ort.Tensor("int32", chunk.inputIds, [1, this.inputSize]),
|
||||
token_type_ids: new ort.Tensor("int32", chunk.segmentIds, [1, this.inputSize]),
|
||||
attention_mask: new ort.Tensor("int32", chunk.inputMask, [1, this.inputSize]),
|
||||
});
|
||||
|
||||
callback(this.alignLabels(labels, chunk.wordIds, encoded.words))
|
||||
const labelIds = _chunk(results["output"].data, this.id2label.length).map(argMax);
|
||||
const labels = labelIds.map(id => this.id2label[id]);
|
||||
|
||||
callback(this.alignLabels(labels, chunk.wordIds, encoded.words));
|
||||
|
||||
i += 1;
|
||||
|
||||
// give browser some time to repaint
|
||||
if (i % 2 === 0) {
|
||||
await new Promise(resolve => setTimeout(resolve, 0));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -1,4 +1,5 @@
|
||||
import {zip, chunk} from "underscore";
|
||||
import {toInt64} from "@/ml/mlUtils";
|
||||
|
||||
const UNK_INDEX = 100;
|
||||
const CLS_INDEX = 101;
|
||||
|
48
sist2-vue/src/ml/CLIPTransformerModel.js
Normal file
48
sist2-vue/src/ml/CLIPTransformerModel.js
Normal file
@ -0,0 +1,48 @@
|
||||
import * as ort from "onnxruntime-web";
|
||||
import {BPETokenizer} from "@/ml/BPETokenizer";
|
||||
import axios from "axios";
|
||||
import {downloadToBuffer, ORT_WASM_PATHS} from "@/ml/mlUtils";
|
||||
|
||||
export class CLIPTransformerModel {
|
||||
|
||||
_modelUrl = null;
|
||||
_tokenizerUrl = null;
|
||||
_model = null;
|
||||
_tokenizer = null;
|
||||
|
||||
constructor(modelUrl, tokenizerUrl) {
|
||||
this._modelUrl = modelUrl;
|
||||
this._tokenizerUrl = tokenizerUrl;
|
||||
}
|
||||
|
||||
async init(onProgress) {
|
||||
await Promise.all([this.loadTokenizer(), this.loadModel(onProgress)]);
|
||||
}
|
||||
|
||||
async loadModel(onProgress) {
|
||||
ort.env.wasm.wasmPaths = ORT_WASM_PATHS;
|
||||
const buf = await downloadToBuffer(this._modelUrl, onProgress);
|
||||
|
||||
this._model = await ort.InferenceSession.create(buf.buffer, {executionProviders: ["wasm"]});
|
||||
}
|
||||
|
||||
async loadTokenizer() {
|
||||
const resp = await axios.get(this._tokenizerUrl);
|
||||
this._tokenizer = new BPETokenizer(resp.data.encoder, resp.data.bpe_ranks)
|
||||
}
|
||||
|
||||
async predict(text) {
|
||||
const tokenized = this._tokenizer.encode(text);
|
||||
|
||||
const feeds = {
|
||||
input_ids: new ort.Tensor("int32", tokenized, [1, 77])
|
||||
};
|
||||
|
||||
const results = await this._model.run(feeds);
|
||||
|
||||
return Array.from(
|
||||
Object.values(results)
|
||||
.find(result => result.size === 512).data
|
||||
);
|
||||
}
|
||||
}
|
47
sist2-vue/src/ml/mlUtils.js
Normal file
47
sist2-vue/src/ml/mlUtils.js
Normal file
@ -0,0 +1,47 @@
|
||||
export async function downloadToBuffer(url, onProgress) {
|
||||
const resp = await fetch(url);
|
||||
|
||||
const contentLength = +resp.headers.get("Content-Length");
|
||||
const buf = new Uint8ClampedArray(contentLength);
|
||||
const reader = resp.body.getReader();
|
||||
let cursor = 0;
|
||||
|
||||
if (onProgress) {
|
||||
onProgress(0);
|
||||
}
|
||||
|
||||
while (true) {
|
||||
const {done, value} = await reader.read();
|
||||
|
||||
if (done) {
|
||||
break;
|
||||
}
|
||||
|
||||
console.log(`Sending ${value.length} bytes into ${buf.length} at offset ${cursor} (${buf.length - cursor} free)`)
|
||||
buf.set(value, cursor);
|
||||
cursor += value.length;
|
||||
|
||||
if (onProgress) {
|
||||
onProgress(cursor / contentLength);
|
||||
}
|
||||
}
|
||||
|
||||
return buf;
|
||||
}
|
||||
|
||||
export function argMax(array) {
|
||||
return array
|
||||
.map((x, i) => [x, i])
|
||||
.reduce((r, a) => (a[0] > r[0] ? a : r))[1];
|
||||
}
|
||||
|
||||
export function toInt64(array) {
|
||||
return new BigInt64Array(array.map(BigInt));
|
||||
}
|
||||
|
||||
export const ORT_WASM_PATHS = {
|
||||
"ort-wasm-simd.wasm": "https://cdn.jsdelivr.net/npm/onnxruntime-web@1.15.1/dist/ort-wasm-simd.wasm",
|
||||
"ort-wasm.wasm": "https://cdn.jsdelivr.net/npm/onnxruntime-web@1.15.1/dist/ort-wasm.wasm",
|
||||
"ort-wasm-simd-threaded.wasm": "https://cdn.jsdelivr.net/npm/onnxruntime-web@1.15.1/dist/ort-wasm-simd-threaded.wasm",
|
||||
"ort-wasm-threaded.wasm": "https://cdn.jsdelivr.net/npm/onnxruntime-web@1.15.1/dist/ort-wasm-threaded.wasm",
|
||||
}
|
@ -23,6 +23,8 @@ export default new Vuex.Store({
|
||||
dateMin: undefined,
|
||||
dateMax: undefined,
|
||||
searchText: "",
|
||||
embeddingText: "",
|
||||
embedding: null,
|
||||
pathText: "",
|
||||
sortMode: "score",
|
||||
|
||||
@ -91,10 +93,11 @@ export default new Vuex.Store({
|
||||
uiMimeMap: [] as any[],
|
||||
|
||||
auth0Token: null,
|
||||
mlModel: {
|
||||
nerModel: {
|
||||
model: null,
|
||||
name: null
|
||||
},
|
||||
embeddingsModel: null
|
||||
},
|
||||
mutations: {
|
||||
setUiShowDetails: (state, val) => state.uiShowDetails = val,
|
||||
@ -129,6 +132,8 @@ export default new Vuex.Store({
|
||||
setDateBoundsMin: (state, val) => state.dateBoundsMin = val,
|
||||
setDateBoundsMax: (state, val) => state.dateBoundsMax = val,
|
||||
setSearchText: (state, val) => state.searchText = val,
|
||||
setEmbeddingText: (state, val) => state.embeddingText = val,
|
||||
setEmbedding: (state, val) => state.embedding= val,
|
||||
setFuzzy: (state, val) => state.fuzzy = val,
|
||||
setLastQueryResult: (state, val) => state.lastQueryResults = val,
|
||||
setFirstQueryResult: (state, val) => state.firstQueryResults = val,
|
||||
@ -212,7 +217,8 @@ export default new Vuex.Store({
|
||||
// noop
|
||||
},
|
||||
setAuth0Token: (state, val) => state.auth0Token = val,
|
||||
setMlModel: (state, val) => state.mlModel = val,
|
||||
setNerModel: (state, val) => state.nerModel = val,
|
||||
setEmbeddingsModel: (state, val) => state.embeddingsModel = val,
|
||||
},
|
||||
actions: {
|
||||
setSist2Info: (store, val) => {
|
||||
@ -370,7 +376,9 @@ export default new Vuex.Store({
|
||||
},
|
||||
modules: {},
|
||||
getters: {
|
||||
mlModel: (state) => state.mlModel,
|
||||
nerModel: (state) => state.nerModel,
|
||||
embeddingsModel: (state) => state.embeddingsModel,
|
||||
embedding: (state) => state.embedding,
|
||||
seed: (state) => state.seed,
|
||||
getPathText: (state) => state.pathText,
|
||||
indices: state => state.indices,
|
||||
@ -389,6 +397,7 @@ export default new Vuex.Store({
|
||||
sizeMin: state => state.sizeMin,
|
||||
sizeMax: state => state.sizeMax,
|
||||
searchText: state => state.searchText,
|
||||
embeddingText: state => state.embeddingText,
|
||||
pathText: state => state.pathText,
|
||||
fuzzy: state => state.fuzzy,
|
||||
size: state => state.optSize,
|
||||
|
@ -13,6 +13,7 @@
|
||||
|
||||
<b-card v-show="!uiLoading && !showEsConnectionError" id="search-panel">
|
||||
<SearchBar @show-help="showHelp=true"></SearchBar>
|
||||
<EmbeddingsSearchBar class="mt-3"></EmbeddingsSearchBar>
|
||||
<b-row>
|
||||
<b-col style="height: 70px;" sm="6">
|
||||
<SizeSlider></SizeSlider>
|
||||
@ -58,16 +59,14 @@
|
||||
</div>
|
||||
</template>
|
||||
|
||||
<script lang="ts">
|
||||
<script>
|
||||
import Preloader from "@/components/Preloader.vue";
|
||||
import {mapActions, mapGetters, mapMutations} from "vuex";
|
||||
import sist2 from "../Sist2Api";
|
||||
import Sist2Api, {EsHit, EsResult} from "../Sist2Api";
|
||||
import SearchBar from "@/components/SearchBar.vue";
|
||||
import IndexPicker from "@/components/IndexPicker.vue";
|
||||
import Vue from "vue";
|
||||
import Sist2Query from "@/Sist2ElasticsearchQuery";
|
||||
import _debounce from "lodash/debounce";
|
||||
import {debounce as _debounce} from "underscore";
|
||||
import DocCardWall from "@/components/DocCardWall.vue";
|
||||
import Lightbox from "@/components/Lightbox.vue";
|
||||
import LightboxCaption from "@/components/LightboxCaption.vue";
|
||||
@ -79,11 +78,13 @@ import DateSlider from "@/components/DateSlider.vue";
|
||||
import TagPicker from "@/components/TagPicker.vue";
|
||||
import DocList from "@/components/DocList.vue";
|
||||
import HelpDialog from "@/components/HelpDialog.vue";
|
||||
import Sist2SqliteQuery from "@/Sist2SqliteQuery";
|
||||
import EmbeddingsSearchBar from "@/components/EmbeddingsSearchBar.vue";
|
||||
import Sist2Api from "@/Sist2Api";
|
||||
|
||||
|
||||
export default Vue.extend({
|
||||
components: {
|
||||
EmbeddingsSearchBar,
|
||||
HelpDialog,
|
||||
DocList,
|
||||
TagPicker,
|
||||
@ -93,8 +94,8 @@ export default Vue.extend({
|
||||
data: () => ({
|
||||
loading: false,
|
||||
uiLoading: true,
|
||||
search: undefined as any,
|
||||
docs: [] as EsHit[],
|
||||
search: undefined,
|
||||
docs: [],
|
||||
docIds: new Set(),
|
||||
docChecksums: new Set(),
|
||||
searchBusy: false,
|
||||
@ -108,16 +109,16 @@ export default Vue.extend({
|
||||
mounted() {
|
||||
// Handle touch events
|
||||
window.ontouchend = () => this.$store.commit("busTouchEnd");
|
||||
window.ontouchcancel = this.$store.commit("busTouchEnd");
|
||||
window.ontouchcancel = () => this.$store.commit("busTouchEnd");
|
||||
|
||||
this.search = _debounce(async (clear: boolean) => {
|
||||
this.search = _debounce(async (clear) => {
|
||||
if (clear) {
|
||||
await this.clearResults();
|
||||
}
|
||||
|
||||
await this.searchNow();
|
||||
|
||||
}, 350, {leading: false});
|
||||
}, 350, false);
|
||||
|
||||
this.$store.dispatch("loadFromArgs", this.$route).then(() => {
|
||||
this.$store.subscribe(() => this.$store.dispatch("updateArgs", this.$router));
|
||||
@ -126,6 +127,7 @@ export default Vue.extend({
|
||||
"setSizeMin", "setSizeMax", "setDateMin", "setDateMax", "setSearchText", "setPathText",
|
||||
"setSortMode", "setOptHighlight", "setOptFragmentSize", "setFuzzy", "setSize", "setSelectedIndices",
|
||||
"setSelectedMimeTypes", "setSelectedTags", "setOptQueryMode", "setOptSearchInPath",
|
||||
"setEmbedding"
|
||||
].includes(mutation.type)) {
|
||||
if (this.searchBusy) {
|
||||
return;
|
||||
@ -152,7 +154,7 @@ export default Vue.extend({
|
||||
}).catch(error => {
|
||||
console.log(error);
|
||||
|
||||
if (error.response.status == 503 || error.response.status == 500) {
|
||||
if (error.response.status === 503 || error.response.status === 500) {
|
||||
this.showEsConnectionError = true;
|
||||
this.uiLoading = false;
|
||||
} else {
|
||||
@ -181,7 +183,7 @@ export default Vue.extend({
|
||||
bodyClass: "toast-body-error",
|
||||
});
|
||||
},
|
||||
showSyntaxErrorToast: function (): void {
|
||||
showSyntaxErrorToast: function () {
|
||||
this.$bvToast.toast(
|
||||
this.$t("toast.esQueryErr"),
|
||||
{
|
||||
@ -197,7 +199,7 @@ export default Vue.extend({
|
||||
await this.$store.dispatch("incrementQuerySequence");
|
||||
this.$store.commit("busSearch");
|
||||
|
||||
Sist2Api.search().then(async (resp: EsResult) => {
|
||||
Sist2Api.search().then(async (resp) => {
|
||||
await this.handleSearch(resp);
|
||||
this.searchBusy = false;
|
||||
}).catch(err => {
|
||||
@ -215,8 +217,8 @@ export default Vue.extend({
|
||||
await this.$store.dispatch("clearResults");
|
||||
this.$store.commit("setUiReachedScrollEnd", false);
|
||||
},
|
||||
async handleSearch(resp: EsResult) {
|
||||
if (resp.hits.hits.length == 0 || resp.hits.hits.length < this.$store.state.optSize) {
|
||||
async handleSearch(resp) {
|
||||
if (resp.hits.hits.length === 0 || resp.hits.hits.length < this.$store.state.optSize) {
|
||||
this.$store.commit("setUiReachedScrollEnd", true);
|
||||
}
|
||||
|
||||
|
@ -163,7 +163,8 @@ void database_open(database_t *db) {
|
||||
&db->write_document_sidecar_stmt, NULL));
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
|
||||
db->db,
|
||||
"REPLACE INTO document (id, mtime, size, json_data, version) VALUES (?, ?, ?, ?, (SELECT max(id) FROM version));", -1,
|
||||
"REPLACE INTO document (id, mtime, size, json_data, version) VALUES (?, ?, ?, ?, (SELECT max(id) FROM version));",
|
||||
-1,
|
||||
&db->write_document_stmt, NULL));
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
|
||||
db->db,
|
||||
@ -175,6 +176,10 @@ void database_open(database_t *db) {
|
||||
db->db, "SELECT json_data FROM document WHERE id=?", -1,
|
||||
&db->get_document, NULL));
|
||||
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
|
||||
db->db, "SELECT * FROM model", -1,
|
||||
&db->get_models, NULL));
|
||||
|
||||
// Create functions
|
||||
sqlite3_create_function(
|
||||
db->db,
|
||||
@ -186,6 +191,17 @@ void database_open(database_t *db) {
|
||||
NULL,
|
||||
NULL
|
||||
);
|
||||
|
||||
sqlite3_create_function(
|
||||
db->db,
|
||||
"embedding_to_json",
|
||||
5,
|
||||
SQLITE_UTF8,
|
||||
NULL,
|
||||
embedding_to_json_func,
|
||||
NULL,
|
||||
NULL
|
||||
);
|
||||
} else if (db->type == IPC_CONSUMER_DATABASE) {
|
||||
|
||||
sqlite3_create_function(
|
||||
@ -248,6 +264,10 @@ void database_open(database_t *db) {
|
||||
db->db, "SELECT tag, count(*) FROM tag GROUP BY tag", -1,
|
||||
&db->fts_get_tags, NULL));
|
||||
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
|
||||
db->db, "SELECT size FROM model WHERE id=?", -1,
|
||||
&db->fts_model_size, NULL));
|
||||
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
|
||||
db->db, "SELECT path, count FROM path_index"
|
||||
" WHERE (index_id=?1 OR ?1 IS NULL) AND depth BETWEEN ? AND ?"
|
||||
@ -302,6 +322,17 @@ void database_open(database_t *db) {
|
||||
NULL,
|
||||
NULL
|
||||
);
|
||||
|
||||
sqlite3_create_function(
|
||||
db->db,
|
||||
"cosine_sim",
|
||||
3,
|
||||
SQLITE_UTF8,
|
||||
NULL,
|
||||
cosine_sim_func,
|
||||
NULL,
|
||||
NULL
|
||||
);
|
||||
}
|
||||
|
||||
if (db->type == FTS_DATABASE || db->type == INDEX_DATABASE) {
|
||||
@ -463,8 +494,6 @@ database_iterator_t *database_create_document_iterator(database_t *db) {
|
||||
|
||||
sqlite3_stmt *stmt;
|
||||
|
||||
// TODO optimization: remove mtime, size, _id from json_data
|
||||
|
||||
sqlite3_prepare_v2(db->db, "WITH doc (j) AS (SELECT CASE"
|
||||
" WHEN sc.json_data IS NULL THEN"
|
||||
" CASE"
|
||||
@ -800,4 +829,4 @@ cJSON *database_get_document(database_t *db, char *doc_id) {
|
||||
void database_increment_version(database_t *db) {
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(
|
||||
db->db, "INSERT INTO version DEFAULT VALUES", NULL, NULL, NULL));
|
||||
}
|
||||
}
|
@ -41,6 +41,7 @@ typedef enum {
|
||||
FTS_SORT_RANDOM,
|
||||
FTS_SORT_NAME,
|
||||
FTS_SORT_ID,
|
||||
FTS_SORT_EMBEDDING
|
||||
} fts_sort_t;
|
||||
|
||||
typedef struct {
|
||||
@ -83,6 +84,7 @@ typedef struct database {
|
||||
sqlite3_stmt *write_document_sidecar_stmt;
|
||||
sqlite3_stmt *write_thumbnail_stmt;
|
||||
sqlite3_stmt *get_document;
|
||||
sqlite3_stmt *get_models;
|
||||
|
||||
sqlite3_stmt *delete_tag_stmt;
|
||||
sqlite3_stmt *write_tag_stmt;
|
||||
@ -100,6 +102,8 @@ typedef struct database {
|
||||
sqlite3_stmt *fts_get_document;
|
||||
sqlite3_stmt *fts_suggest_tag;
|
||||
sqlite3_stmt *fts_get_tags;
|
||||
sqlite3_stmt *fts_model_size;
|
||||
|
||||
|
||||
char **tag_array;
|
||||
|
||||
@ -210,7 +214,8 @@ cJSON *database_fts_search(database_t *db, const char *query, const char *path,
|
||||
long size_max, long date_min, long date_max, int page_size,
|
||||
char **index_ids, char **mime_types, char **tags, int sort_asc,
|
||||
fts_sort_t sort, int seed, char **after, int fetch_aggregations,
|
||||
int highlight, int highlight_context_size);
|
||||
int highlight, int highlight_context_size, int model,
|
||||
const float *embedding, int embedding_size);
|
||||
|
||||
void database_write_tag(database_t *db, char *doc_id, char *tag);
|
||||
|
||||
@ -228,4 +233,10 @@ cJSON *database_fts_get_tags(database_t *db);
|
||||
|
||||
cJSON *database_get_document(database_t *db, char *doc_id);
|
||||
|
||||
void cosine_sim_func(sqlite3_context *ctx, int argc, sqlite3_value **argv);
|
||||
|
||||
void embedding_to_json_func(sqlite3_context *ctx, int argc, sqlite3_value **argv);
|
||||
|
||||
cJSON *database_get_models(database_t *db);
|
||||
|
||||
#endif
|
94
src/database/database_embeddings.c
Normal file
94
src/database/database_embeddings.c
Normal file
@ -0,0 +1,94 @@
|
||||
#include <openblas/cblas.h>
|
||||
#include "database.h"
|
||||
|
||||
|
||||
static float cosine_sim(int n, const float *a, const float *b) {
|
||||
float dot_product = cblas_sdot(n, a, 1, b, 1);
|
||||
float norm_a = cblas_snrm2(n, a, 1);
|
||||
float norm_b = cblas_snrm2(n, b, 1);
|
||||
|
||||
return dot_product / (norm_a * norm_b);
|
||||
}
|
||||
|
||||
|
||||
void cosine_sim_func(sqlite3_context *ctx, int argc, sqlite3_value **argv) {
|
||||
if (argc != 3) {
|
||||
sqlite3_result_error(ctx, "Invalid parameters", -1);
|
||||
}
|
||||
|
||||
int n = sqlite3_value_int(argv[0]);
|
||||
const float *a = sqlite3_value_blob(argv[1]);
|
||||
const float *b = sqlite3_value_blob(argv[2]);
|
||||
|
||||
if (a == NULL || b == NULL) {
|
||||
sqlite3_result_double(ctx, -1);
|
||||
return;
|
||||
}
|
||||
|
||||
float result = cosine_sim(n, a, b);
|
||||
if (result != result) {
|
||||
result = -1;
|
||||
}
|
||||
|
||||
sqlite3_result_double(ctx, result);
|
||||
}
|
||||
|
||||
void embedding_to_json_func(sqlite3_context *ctx, int argc, sqlite3_value **argv) {
|
||||
|
||||
// emb, type, start, end, size
|
||||
|
||||
if (argc != 5) {
|
||||
sqlite3_result_error(ctx, "Invalid parameters", -1);
|
||||
}
|
||||
|
||||
const float *embedding = sqlite3_value_blob(argv[0]);
|
||||
const char *type = (const char *) sqlite3_value_text(argv[1]);
|
||||
|
||||
int size = sqlite3_value_int(argv[4]);
|
||||
|
||||
if (strcmp(type, "flat") == 0) {
|
||||
|
||||
cJSON *json = cJSON_CreateFloatArray(embedding, size);
|
||||
|
||||
char *json_str = cJSON_PrintBuffered(json, size * 22, FALSE);
|
||||
|
||||
cJSON_Delete(json);
|
||||
|
||||
sqlite3_result_text(ctx, json_str, -1, SQLITE_TRANSIENT);
|
||||
free(json_str);
|
||||
|
||||
} else {
|
||||
int start = sqlite3_value_int(argv[2]);
|
||||
int end = sqlite3_value_int(argv[3]);
|
||||
|
||||
sqlite3_result_error(ctx, "Nested embeddings not implemented yet", -1);
|
||||
}
|
||||
}
|
||||
|
||||
cJSON *database_get_models(database_t *db) {
|
||||
cJSON *json = cJSON_CreateArray();
|
||||
sqlite3_stmt *stmt = db->get_models;
|
||||
|
||||
int ret;
|
||||
do {
|
||||
ret = sqlite3_step(stmt);
|
||||
CRASH_IF_STMT_FAIL(ret);
|
||||
|
||||
if (ret == SQLITE_DONE) {
|
||||
break;
|
||||
}
|
||||
|
||||
cJSON *row = cJSON_CreateObject();
|
||||
|
||||
cJSON_AddNumberToObject(row, "id", sqlite3_column_int(stmt, 0));
|
||||
cJSON_AddStringToObject(row, "name", (const char *) sqlite3_column_text(stmt, 1));
|
||||
cJSON_AddStringToObject(row, "url", (const char *) sqlite3_column_int64(stmt, 2));
|
||||
cJSON_AddStringToObject(row, "path", (const char *) sqlite3_column_text(stmt, 3));
|
||||
cJSON_AddNumberToObject(row, "size", sqlite3_column_int(stmt, 4));
|
||||
cJSON_AddStringToObject(row, "type", (const char *) sqlite3_column_text(stmt, 5));
|
||||
|
||||
cJSON_AddItemToArray(json, row);
|
||||
} while (TRUE);
|
||||
|
||||
return json;
|
||||
}
|
@ -37,7 +37,7 @@ int database_fts_get_max_path_depth(database_t *db) {
|
||||
|
||||
void database_fts_index(database_t *db) {
|
||||
|
||||
LOG_INFO("database_fts.c", "Creating content table.");
|
||||
LOG_INFO("database_fts.c", "Creating content table");
|
||||
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(
|
||||
db->db,
|
||||
@ -47,21 +47,12 @@ void database_fts_index(database_t *db) {
|
||||
" document.json_data ->> 'path' as path,"
|
||||
" mtime,"
|
||||
" document.json_data ->> 'mime' as mime,"
|
||||
" CASE"
|
||||
" WHEN sc.json_data IS NULL THEN"
|
||||
" json_set(document.json_data, "
|
||||
" '$._id',document.id,"
|
||||
" '$.size',document.size, "
|
||||
" '$.mtime',document.mtime)"
|
||||
" ELSE json_patch("
|
||||
" json_set(document.json_data,"
|
||||
" '$._id',document.id,"
|
||||
" '$.size',document.size,"
|
||||
" '$.mtime', document.mtime),"
|
||||
" sc.json_data) END"
|
||||
" FROM document"
|
||||
" LEFT JOIN document_sidecar sc ON document.id = sc.id"
|
||||
" GROUP BY document.id)"
|
||||
" )"
|
||||
" INSERT"
|
||||
" INTO fts.document_index (id, index_id, size, name, path, mtime, mime, json_data)"
|
||||
" SELECT * FROM docs WHERE true"
|
||||
@ -69,7 +60,16 @@ void database_fts_index(database_t *db) {
|
||||
" size=excluded.size, mtime=excluded.mtime, mime=excluded.mime, json_data=excluded.json_data;",
|
||||
NULL, NULL, NULL));
|
||||
|
||||
LOG_DEBUG("database_fts.c", "Deleting old documents.");
|
||||
LOG_DEBUG("database_fts.c", "Copying embeddings");
|
||||
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(
|
||||
db->db,
|
||||
"REPLACE INTO fts.embedding (id, model_id, start, end, embedding)"
|
||||
" SELECT id, model_id, start, end, embedding FROM embedding", NULL, NULL, NULL));
|
||||
|
||||
// TODO: delete old embeddings
|
||||
|
||||
LOG_DEBUG("database_fts.c", "Deleting old documents");
|
||||
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(
|
||||
db->db,
|
||||
@ -144,7 +144,7 @@ void database_fts_index(database_t *db) {
|
||||
"INSERT INTO path_index (path, index_id, count, depth) SELECT path, index_id, total, depth FROM path_tmp",
|
||||
NULL, NULL, NULL));
|
||||
|
||||
LOG_DEBUG("database_fts.c", "Generating search index.");
|
||||
LOG_DEBUG("database_fts.c", "Generating search index");
|
||||
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(
|
||||
db->db, "INSERT INTO search(search) VALUES ('delete-all')",
|
||||
@ -157,7 +157,7 @@ void database_fts_index(database_t *db) {
|
||||
}
|
||||
|
||||
void database_fts_optimize(database_t *db) {
|
||||
LOG_INFO("database_fts.c", "Optimizing search index.");
|
||||
LOG_INFO("database_fts.c", "Optimizing search index");
|
||||
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(
|
||||
db->db,
|
||||
@ -408,6 +408,8 @@ const char *get_sort_var(fts_sort_t sort) {
|
||||
return "doc.name";
|
||||
case FTS_SORT_ID:
|
||||
return "doc.id";
|
||||
case FTS_SORT_EMBEDDING:
|
||||
return "cosine_sim(?7, ?8, emb.embedding)";
|
||||
default:
|
||||
return NULL;
|
||||
}
|
||||
@ -459,11 +461,36 @@ char *get_after_where(char **after, fts_sort_t sort, int sort_asc) {
|
||||
return "(sort_var, doc.ROWID) < (?3, ?4)";
|
||||
}
|
||||
|
||||
int database_fts_get_model_size(database_t *db, int model_id) {
|
||||
sqlite3_bind_int(db->fts_model_size, 1, model_id);
|
||||
int ret = sqlite3_step(db->fts_model_size);
|
||||
CRASH_IF_STMT_FAIL(ret);
|
||||
|
||||
if (ret == SQLITE_DONE) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
int size = sqlite3_column_int(db->fts_model_size, 0);
|
||||
sqlite3_reset(db->fts_model_size);
|
||||
|
||||
return size;
|
||||
}
|
||||
|
||||
cJSON *database_fts_search(database_t *db, const char *query, const char *path, long size_min,
|
||||
long size_max, long date_min, long date_max, int page_size,
|
||||
char **index_ids, char **mime_types, char **tags, int sort_asc,
|
||||
fts_sort_t sort, int seed, char **after, int fetch_aggregations,
|
||||
int highlight, int highlight_context_size) {
|
||||
int highlight, int highlight_context_size, int model,
|
||||
const float *embedding, int embedding_size) {
|
||||
|
||||
if (embedding) {
|
||||
int model_embedding_size = database_fts_get_model_size(db, model);
|
||||
if (model_embedding_size != embedding_size) {
|
||||
LOG_WARNINGF("database_fts.c", "Received invalid embedding size for model %s: %d, expected %d",
|
||||
model, embedding_size, model_embedding_size);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
char path_glob[PATH_MAX * 2];
|
||||
snprintf(path_glob, sizeof(path_glob), "%s/*", path);
|
||||
@ -502,6 +529,11 @@ cJSON *database_fts_search(database_t *db, const char *query, const char *path,
|
||||
"'$.content')";
|
||||
}
|
||||
|
||||
const char *embedding_join = "";
|
||||
if (embedding) {
|
||||
embedding_join = "LEFT JOIN embedding emb ON emb.id = doc.id AND emb.model_id=?9";
|
||||
}
|
||||
|
||||
char *sql;
|
||||
char *agg_sql;
|
||||
|
||||
@ -512,12 +544,14 @@ cJSON *database_fts_search(database_t *db, const char *query, const char *path,
|
||||
" %s, %s as sort_var, doc.ROWID"
|
||||
" FROM search"
|
||||
" INNER JOIN document_index doc on doc.ROWID = search.ROWID"
|
||||
" %s"
|
||||
" WHERE %s"
|
||||
" ORDER BY sort_var%s, doc.ROWID"
|
||||
" LIMIT ?2",
|
||||
json_object_sql, get_sort_var(sort),
|
||||
embedding_join,
|
||||
where,
|
||||
sort_asc ? "" : "DESC");
|
||||
sort_asc ? "" : " DESC");
|
||||
|
||||
if (fetch_aggregations) {
|
||||
asprintf(&agg_sql,
|
||||
@ -533,10 +567,12 @@ cJSON *database_fts_search(database_t *db, const char *query, const char *path,
|
||||
"SELECT"
|
||||
" %s, %s as sort_var, doc.ROWID"
|
||||
" FROM document_index doc"
|
||||
" %s"
|
||||
" WHERE %s"
|
||||
" ORDER BY sort_var%s,doc.ROWID"
|
||||
" LIMIT ?2",
|
||||
json_object_sql, get_sort_var(sort),
|
||||
embedding_join,
|
||||
where,
|
||||
sort_asc ? "" : " DESC");
|
||||
|
||||
@ -569,7 +605,6 @@ cJSON *database_fts_search(database_t *db, const char *query, const char *path,
|
||||
if (tags) {
|
||||
db->tag_array = tags;
|
||||
}
|
||||
|
||||
if (size_min > 0) {
|
||||
sqlite3_bind_int64(stmt, sqlite3_bind_parameter_index(stmt, "@size_min"), size_min);
|
||||
}
|
||||
@ -602,6 +637,11 @@ cJSON *database_fts_search(database_t *db, const char *query, const char *path,
|
||||
if (highlight) {
|
||||
sqlite3_bind_int(stmt, 6, highlight_context_size);
|
||||
}
|
||||
if (embedding) {
|
||||
sqlite3_bind_int(stmt, 7, embedding_size);
|
||||
sqlite3_bind_blob(stmt, 8, embedding, (int) sizeof(float) * embedding_size, SQLITE_STATIC);
|
||||
sqlite3_bind_int(stmt, 9, model);
|
||||
}
|
||||
|
||||
cJSON *json = cJSON_CreateObject();
|
||||
cJSON *hits_hits = cJSON_CreateArray();
|
||||
|
@ -38,6 +38,25 @@ const char *FtsDatabaseSchema =
|
||||
");"
|
||||
"CREATE INDEX IF NOT EXISTS tag_tag_idx ON tag(tag);"
|
||||
"CREATE INDEX IF NOT EXISTS tag_id_idx ON tag(id);"
|
||||
""
|
||||
"CREATE TABLE IF NOT EXISTS embedding ("
|
||||
" id TEXT REFERENCES document(id),"
|
||||
" model_id INTEGER NOT NULL REFERENCES model(id),"
|
||||
" start INTEGER NOT NULL,"
|
||||
" end INTEGER,"
|
||||
" embedding BLOB NOT NULL,"
|
||||
" PRIMARY KEY (id, model_id, start)"
|
||||
");"
|
||||
""
|
||||
"CREATE TABLE IF NOT EXISTS model ("
|
||||
" id INTEGER PRIMARY KEY,"
|
||||
" name TEXT NOT NULL UNIQUE CHECK ( length(name) < 16 ),"
|
||||
" url TEXT,"
|
||||
" path TEXT NOT NULL UNIQUE,"
|
||||
" size INTEGER NOT NULL,"
|
||||
" type TEXT NOT NULL CHECK ( type IN ('flat', 'nested') )"
|
||||
");"
|
||||
""
|
||||
"CREATE TRIGGER IF NOT EXISTS tag_write_trigger"
|
||||
" AFTER INSERT ON tag"
|
||||
" BEGIN"
|
||||
@ -155,5 +174,14 @@ const char *IndexDatabaseSchema =
|
||||
" mime TEXT NOT NULL,"
|
||||
" size INTEGER NOT NULL,"
|
||||
" count INTEGER NOT NULL"
|
||||
");"
|
||||
""
|
||||
"CREATE TABLE embedding ("
|
||||
" id TEXT REFERENCES document(id),"
|
||||
" model_id INTEGER NOT NULL references model(id),"
|
||||
" start INTEGER NOT NULL,"
|
||||
" end INTEGER,"
|
||||
" embedding BLOB NOT NULL,"
|
||||
" PRIMARY KEY (id, model_id, start)"
|
||||
");";
|
||||
|
||||
|
@ -1,6 +1,7 @@
|
||||
#ifndef WALK_H
|
||||
#define WALK_H
|
||||
|
||||
#undef _XOPEN_SOURCE
|
||||
#define _XOPEN_SOURCE 500
|
||||
|
||||
int walk_directory_tree(const char *);
|
||||
|
@ -321,6 +321,8 @@ void sist2_index(index_args_t *args) {
|
||||
strcpy(doc_id, cJSON_GetObjectItem(json, "_id")->valuestring);
|
||||
cJSON_DeleteItemFromObject(json, "_id");
|
||||
|
||||
// TODO: delete tag if empty
|
||||
|
||||
if (args->print) {
|
||||
print_json(json, doc_id);
|
||||
} else {
|
||||
@ -462,6 +464,11 @@ int set_to_negative_if_value_is_zero(UNUSED(struct argparse *self), const struct
|
||||
int main(int argc, const char *argv[]) {
|
||||
setlocale(LC_ALL, "");
|
||||
|
||||
// database_t *db = database_create("clip.sist2", INDEX_DATABASE);
|
||||
// database_open(db);
|
||||
// database_test(db);
|
||||
// exit(0);
|
||||
|
||||
scan_args_t *scan_args = scan_args_create();
|
||||
index_args_t *index_args = index_args_create();
|
||||
web_args_t *web_args = web_args_create();
|
||||
|
@ -87,7 +87,7 @@ static void buf2hex(const unsigned char *buf, size_t buflen, char *hex_string) {
|
||||
*s = '\0';
|
||||
}
|
||||
|
||||
static void md5_hexdigest(void *data, size_t size, char *output) {
|
||||
static void md5_hexdigest(const void *data, size_t size, char *output) {
|
||||
EVP_MD_CTX *md_ctx = EVP_MD_CTX_new();
|
||||
EVP_DigestInit_ex(md_ctx, EVP_md5(), NULL);
|
||||
|
||||
@ -120,7 +120,7 @@ struct timespec timespec_add(struct timespec ts1, long usec);
|
||||
#define pthread_cond_timedwait_ms(cond, mutex, delay_ms) do {\
|
||||
struct timespec now; \
|
||||
clock_gettime(CLOCK_REALTIME, &now); \
|
||||
struct timespec end_time = timespec_add(now, MILLISECOND * delay_ms); \
|
||||
struct timespec end_time = timespec_add(now, MILLISECOND * (delay_ms)); \
|
||||
pthread_cond_timedwait(cond, mutex, &end_time); \
|
||||
} while (0)
|
||||
|
||||
|
@ -28,7 +28,15 @@ static struct mg_http_serve_opts DefaultServeOpts = {
|
||||
.fs = NULL,
|
||||
.ssi_pattern = NULL,
|
||||
.root_dir = NULL,
|
||||
.mime_types = ""
|
||||
.mime_types = HTTP_SERVER_HEADER
|
||||
};
|
||||
|
||||
static struct mg_http_serve_opts IndexServeOpts = {
|
||||
.fs = NULL,
|
||||
.ssi_pattern = NULL,
|
||||
.root_dir = NULL,
|
||||
.mime_types = "",
|
||||
.extra_headers = HTTP_SERVER_HEADER "Cross-Origin-Embedder-Policy: require-corp\r\nCross-Origin-Opener-Policy: same-origin\r\n"
|
||||
};
|
||||
|
||||
void stats_files(struct mg_connection *nc, struct mg_http_message *hm) {
|
||||
@ -67,7 +75,7 @@ void stats_files(struct mg_connection *nc, struct mg_http_message *hm) {
|
||||
|
||||
void serve_index_html(struct mg_connection *nc, struct mg_http_message *hm) {
|
||||
if (WebCtx.dev) {
|
||||
mg_http_serve_file(nc, hm, "sist2-vue/dist/index.html", &DefaultServeOpts);
|
||||
mg_http_serve_file(nc, hm, "sist2-vue/dist/index.html", &IndexServeOpts);
|
||||
} else {
|
||||
web_serve_asset_index_html(nc);
|
||||
}
|
||||
@ -334,6 +342,9 @@ void index_info(struct mg_connection *nc) {
|
||||
cJSON_AddStringToObject(idx_json, "rewriteUrl", idx->desc.rewrite_url);
|
||||
cJSON_AddNumberToObject(idx_json, "timestamp", (double) idx->desc.timestamp);
|
||||
cJSON_AddItemToArray(arr, idx_json);
|
||||
|
||||
cJSON *models = database_get_models(idx->db);
|
||||
cJSON_AddItemToObject(idx_json, "models", models);
|
||||
}
|
||||
|
||||
if (WebCtx.search_backend == SQLITE_SEARCH_BACKEND) {
|
||||
|
@ -32,6 +32,9 @@ typedef struct {
|
||||
int fetch_aggregations;
|
||||
int highlight;
|
||||
int highlight_context_size;
|
||||
int model;
|
||||
float *embedding;
|
||||
int embedding_size;
|
||||
} fts_search_req_t;
|
||||
|
||||
fts_sort_t get_sort_mode(const cJSON *req_sort) {
|
||||
@ -45,11 +48,27 @@ fts_sort_t get_sort_mode(const cJSON *req_sort) {
|
||||
return FTS_SORT_RANDOM;
|
||||
} else if (strcmp(req_sort->valuestring, "name") == 0) {
|
||||
return FTS_SORT_NAME;
|
||||
} else if (strcmp(req_sort->valuestring, "embedding") == 0) {
|
||||
return FTS_SORT_EMBEDDING;
|
||||
}
|
||||
|
||||
return FTS_SORT_INVALID;
|
||||
}
|
||||
|
||||
float *get_float_buffer(cJSON *arr, int *size) {
|
||||
*size = cJSON_GetArraySize(arr);
|
||||
|
||||
float *floats = malloc(sizeof(float) * *size);
|
||||
|
||||
cJSON *elem;
|
||||
int i = 0;
|
||||
cJSON_ArrayForEach(elem, arr) {
|
||||
floats[i] = (float) elem->valuedouble;
|
||||
i += 1;
|
||||
}
|
||||
|
||||
return floats;
|
||||
}
|
||||
|
||||
static json_value get_json_string(cJSON *object, const char *name) {
|
||||
|
||||
@ -89,6 +108,25 @@ static json_value get_json_bool(cJSON *object, const char *name) {
|
||||
return (json_value) {item, FALSE};
|
||||
}
|
||||
|
||||
static json_value get_json_float_array(cJSON *object, const char *name) {
|
||||
cJSON *item = cJSON_GetObjectItem(object, name);
|
||||
if (item == NULL || cJSON_IsNull(item)) {
|
||||
return (json_value) {NULL, FALSE};
|
||||
}
|
||||
if (!cJSON_IsArray(item) || cJSON_GetArraySize(item) == 0) {
|
||||
return (json_value) {NULL, TRUE};
|
||||
}
|
||||
|
||||
cJSON *elem;
|
||||
cJSON_ArrayForEach(elem, item) {
|
||||
if (!cJSON_IsNumber(elem)) {
|
||||
return (json_value) {NULL, TRUE};
|
||||
}
|
||||
}
|
||||
|
||||
return (json_value) {item, FALSE};
|
||||
}
|
||||
|
||||
static json_value get_json_array(cJSON *object, const char *name) {
|
||||
cJSON *item = cJSON_GetObjectItem(object, name);
|
||||
if (item == NULL || cJSON_IsNull(item)) {
|
||||
@ -131,7 +169,7 @@ fts_search_req_t *get_search_req(struct mg_http_message *hm) {
|
||||
|
||||
json_value req_query, req_path, req_size_min, req_size_max, req_date_min, req_date_max, req_page_size,
|
||||
req_index_ids, req_mime_types, req_tags, req_sort_asc, req_sort, req_seed, req_after,
|
||||
req_fetch_aggregations, req_highlight, req_highlight_context_size;
|
||||
req_fetch_aggregations, req_highlight, req_highlight_context_size, req_embedding, req_model;
|
||||
|
||||
if (!cJSON_IsObject(json) ||
|
||||
(req_query = get_json_string(json, "query")).invalid ||
|
||||
@ -150,6 +188,8 @@ fts_search_req_t *get_search_req(struct mg_http_message *hm) {
|
||||
(req_mime_types = get_json_array(json, "mimeTypes")).invalid ||
|
||||
(req_highlight = get_json_bool(json, "highlight")).invalid ||
|
||||
(req_highlight_context_size = get_json_number(json, "highlightContextSize")).invalid ||
|
||||
(req_embedding = get_json_float_array(json, "embedding")).invalid ||
|
||||
(req_model = get_json_number(json, "model")).invalid ||
|
||||
(req_tags = get_json_array(json, "tags")).invalid) {
|
||||
cJSON_Delete(json);
|
||||
return NULL;
|
||||
@ -190,7 +230,11 @@ fts_search_req_t *get_search_req(struct mg_http_message *hm) {
|
||||
cJSON_Delete(json);
|
||||
return NULL;
|
||||
}
|
||||
if (req_highlight_context_size.val->valueint < 0) {
|
||||
if (req_highlight_context_size.val && req_highlight_context_size.val->valueint < 0) {
|
||||
cJSON_Delete(json);
|
||||
return NULL;
|
||||
}
|
||||
if (req_model.val && !req_embedding.val || !req_model.val && req_embedding.val) {
|
||||
cJSON_Delete(json);
|
||||
return NULL;
|
||||
}
|
||||
@ -216,6 +260,10 @@ fts_search_req_t *get_search_req(struct mg_http_message *hm) {
|
||||
req->highlight_context_size = req_highlight_context_size.val
|
||||
? req_highlight_context_size.val->valueint
|
||||
: DEFAULT_HIGHLIGHT_CONTEXT_SIZE;
|
||||
req->model = req_model.val ? req_model.val->valueint : 0;
|
||||
req->embedding = req_model.val
|
||||
? get_float_buffer(req_embedding.val, &req->embedding_size)
|
||||
: NULL;
|
||||
|
||||
cJSON_Delete(json);
|
||||
|
||||
@ -238,6 +286,10 @@ void destroy_search_req(fts_search_req_t *req) {
|
||||
destroy_array(req->mime_types);
|
||||
destroy_array(req->tags);
|
||||
|
||||
if (req->embedding) {
|
||||
free(req->embedding);
|
||||
}
|
||||
|
||||
free(req);
|
||||
}
|
||||
|
||||
@ -331,7 +383,13 @@ void fts_search(struct mg_connection *nc, struct mg_http_message *hm) {
|
||||
req->page_size, req->index_ids, req->mime_types,
|
||||
req->tags, req->sort_asc, req->sort, req->seed,
|
||||
req->after, req->fetch_aggregations, req->highlight,
|
||||
req->highlight_context_size);
|
||||
req->highlight_context_size, req->model,
|
||||
req->embedding, req->embedding_size);
|
||||
|
||||
if (json == NULL) {
|
||||
HTTP_REPLY_BAD_REQUEST
|
||||
return;
|
||||
}
|
||||
|
||||
destroy_search_req(req);
|
||||
mg_send_json(nc, json);
|
||||
|
Loading…
x
Reference in New Issue
Block a user