This commit is contained in:
simon987 2023-07-24 19:36:20 -04:00
parent f56cfb0f2f
commit 27188b6fa0
29 changed files with 1008 additions and 75 deletions

View File

@ -63,7 +63,7 @@ add_executable(
src/database/database_schema.c src/database/database_schema.c
src/database/database_fts.c src/database/database_fts.c
src/web/web_fts.c src/web/web_fts.c
) src/database/database_embeddings.c)
set_target_properties(sist2 PROPERTIES LINKER_LANGUAGE C) set_target_properties(sist2 PROPERTIES LINKER_LANGUAGE C)
target_link_directories(sist2 PRIVATE BEFORE ${_VCPKG_INSTALLED_DIR}/${VCPKG_TARGET_TRIPLET}/lib/) target_link_directories(sist2 PRIVATE BEFORE ${_VCPKG_INSTALLED_DIR}/${VCPKG_TARGET_TRIPLET}/lib/)
@ -76,6 +76,7 @@ find_package(unofficial-mongoose CONFIG REQUIRED)
find_package(CURL CONFIG REQUIRED) find_package(CURL CONFIG REQUIRED)
find_library(MAGIC_LIB NAMES libmagic.a REQUIRED) find_library(MAGIC_LIB NAMES libmagic.a REQUIRED)
find_package(unofficial-sqlite3 CONFIG REQUIRED) find_package(unofficial-sqlite3 CONFIG REQUIRED)
find_package(OpenBLAS CONFIG REQUIRED)
target_include_directories( target_include_directories(
@ -158,6 +159,7 @@ target_link_libraries(
${MAGIC_LIB} ${MAGIC_LIB}
unofficial::sqlite3::sqlite3 unofficial::sqlite3::sqlite3
OpenBLAS::OpenBLAS
) )
add_custom_target( add_custom_target(

View File

@ -19,6 +19,7 @@
"dom-to-image": "^2.6.0", "dom-to-image": "^2.6.0",
"fslightbox-vue": "fslightbox-vue.tgz", "fslightbox-vue": "fslightbox-vue.tgz",
"nouislider": "^15.2.0", "nouislider": "^15.2.0",
"onnxruntime-web": "^1.15.1",
"underscore": "^1.13.1", "underscore": "^1.13.1",
"vue": "^2.6.12", "vue": "^2.6.12",
"vue-color": "^2.8.1", "vue-color": "^2.8.1",
@ -30,6 +31,7 @@
}, },
"devDependencies": { "devDependencies": {
"@babel/polyfill": "^7.12.1", "@babel/polyfill": "^7.12.1",
"@types/underscore": "^1.11.6",
"@vue/cli-plugin-babel": "~5.0.8", "@vue/cli-plugin-babel": "~5.0.8",
"@vue/cli-plugin-router": "~5.0.8", "@vue/cli-plugin-router": "~5.0.8",
"@vue/cli-plugin-typescript": "^5.0.8", "@vue/cli-plugin-typescript": "^5.0.8",
@ -1956,6 +1958,60 @@
"integrity": "sha512-a5Sab1C4/icpTZVzZc5Ghpz88yQtGOyNqYXcZgOssB2uuAr+wF/MvN6bgtW32q7HHrvBki+BsZ0OuNv6EV3K9g==", "integrity": "sha512-a5Sab1C4/icpTZVzZc5Ghpz88yQtGOyNqYXcZgOssB2uuAr+wF/MvN6bgtW32q7HHrvBki+BsZ0OuNv6EV3K9g==",
"dev": true "dev": true
}, },
"node_modules/@protobufjs/aspromise": {
"version": "1.1.2",
"resolved": "https://registry.npmjs.org/@protobufjs/aspromise/-/aspromise-1.1.2.tgz",
"integrity": "sha512-j+gKExEuLmKwvz3OgROXtrJ2UG2x8Ch2YZUxahh+s1F2HZ+wAceUNLkvy6zKCPVRkU++ZWQrdxsUeQXmcg4uoQ=="
},
"node_modules/@protobufjs/base64": {
"version": "1.1.2",
"resolved": "https://registry.npmjs.org/@protobufjs/base64/-/base64-1.1.2.tgz",
"integrity": "sha512-AZkcAA5vnN/v4PDqKyMR5lx7hZttPDgClv83E//FMNhR2TMcLUhfRUBHCmSl0oi9zMgDDqRUJkSxO3wm85+XLg=="
},
"node_modules/@protobufjs/codegen": {
"version": "2.0.4",
"resolved": "https://registry.npmjs.org/@protobufjs/codegen/-/codegen-2.0.4.tgz",
"integrity": "sha512-YyFaikqM5sH0ziFZCN3xDC7zeGaB/d0IUb9CATugHWbd1FRFwWwt4ld4OYMPWu5a3Xe01mGAULCdqhMlPl29Jg=="
},
"node_modules/@protobufjs/eventemitter": {
"version": "1.1.0",
"resolved": "https://registry.npmjs.org/@protobufjs/eventemitter/-/eventemitter-1.1.0.tgz",
"integrity": "sha512-j9ednRT81vYJ9OfVuXG6ERSTdEL1xVsNgqpkxMsbIabzSo3goCjDIveeGv5d03om39ML71RdmrGNjG5SReBP/Q=="
},
"node_modules/@protobufjs/fetch": {
"version": "1.1.0",
"resolved": "https://registry.npmjs.org/@protobufjs/fetch/-/fetch-1.1.0.tgz",
"integrity": "sha512-lljVXpqXebpsijW71PZaCYeIcE5on1w5DlQy5WH6GLbFryLUrBD4932W/E2BSpfRJWseIL4v/KPgBFxDOIdKpQ==",
"dependencies": {
"@protobufjs/aspromise": "^1.1.1",
"@protobufjs/inquire": "^1.1.0"
}
},
"node_modules/@protobufjs/float": {
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/@protobufjs/float/-/float-1.0.2.tgz",
"integrity": "sha512-Ddb+kVXlXst9d+R9PfTIxh1EdNkgoRe5tOX6t01f1lYWOvJnSPDBlG241QLzcyPdoNTsblLUdujGSE4RzrTZGQ=="
},
"node_modules/@protobufjs/inquire": {
"version": "1.1.0",
"resolved": "https://registry.npmjs.org/@protobufjs/inquire/-/inquire-1.1.0.tgz",
"integrity": "sha512-kdSefcPdruJiFMVSbn801t4vFK7KB/5gd2fYvrxhuJYg8ILrmn9SKSX2tZdV6V+ksulWqS7aXjBcRXl3wHoD9Q=="
},
"node_modules/@protobufjs/path": {
"version": "1.1.2",
"resolved": "https://registry.npmjs.org/@protobufjs/path/-/path-1.1.2.tgz",
"integrity": "sha512-6JOcJ5Tm08dOHAbdR3GrvP+yUUfkjG5ePsHYczMFLq3ZmMkAD98cDgcT2iA1lJ9NVwFd4tH/iSSoe44YWkltEA=="
},
"node_modules/@protobufjs/pool": {
"version": "1.1.0",
"resolved": "https://registry.npmjs.org/@protobufjs/pool/-/pool-1.1.0.tgz",
"integrity": "sha512-0kELaGSIDBKvcgS4zkjz1PeddatrjYcmMWOlAuAPwAeccUrPHdUqo/J6LiymHHEiJT5NrF1UVwxY14f+fy4WQw=="
},
"node_modules/@protobufjs/utf8": {
"version": "1.1.0",
"resolved": "https://registry.npmjs.org/@protobufjs/utf8/-/utf8-1.1.0.tgz",
"integrity": "sha512-Vvn3zZrhQZkkBE8LSuW3em98c0FwgO4nxzv6OdSxPKJIEKY2bGbHn+mhGIPerzI4twdxaP8/0+06HBpwf345Lw=="
},
"node_modules/@sideway/address": { "node_modules/@sideway/address": {
"version": "4.1.4", "version": "4.1.4",
"resolved": "https://registry.npmjs.org/@sideway/address/-/address-4.1.4.tgz", "resolved": "https://registry.npmjs.org/@sideway/address/-/address-4.1.4.tgz",
@ -2488,6 +2544,12 @@
"@types/node": "*" "@types/node": "*"
} }
}, },
"node_modules/@types/underscore": {
"version": "1.11.6",
"resolved": "https://registry.npmjs.org/@types/underscore/-/underscore-1.11.6.tgz",
"integrity": "sha512-G2oC64I/sR817KDL2b2Mc7+diXyxcibyUeLMyexU4K/sG8hyt/YMlbBK0TVhx/YQ1ehfzgXhLuq2YQHIL4bXUQ==",
"dev": true
},
"node_modules/@types/webgl-ext": { "node_modules/@types/webgl-ext": {
"version": "0.0.30", "version": "0.0.30",
"resolved": "https://registry.npmjs.org/@types/webgl-ext/-/webgl-ext-0.0.30.tgz", "resolved": "https://registry.npmjs.org/@types/webgl-ext/-/webgl-ext-0.0.30.tgz",
@ -6563,6 +6625,11 @@
"node": ">=8" "node": ">=8"
} }
}, },
"node_modules/flatbuffers": {
"version": "1.12.0",
"resolved": "https://registry.npmjs.org/flatbuffers/-/flatbuffers-1.12.0.tgz",
"integrity": "sha512-c7CZADjRcl6j0PlvFy0ZqXQ67qSEZfrVPynmnL+2zPc+NtMvrF8Y0QceMo7QqnSPc7+uWjUIAbvCQ5WIKlMVdQ=="
},
"node_modules/follow-redirects": { "node_modules/follow-redirects": {
"version": "1.15.2", "version": "1.15.2",
"resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.2.tgz", "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.2.tgz",
@ -6951,6 +7018,11 @@
"integrity": "sha512-9ByhssR2fPVsNZj478qUUbKfmL0+t5BDVyjShtyZZLiK7ZDAArFFfopyOTj0M05wE2tJPisA4iTnnXl2YoPvOA==", "integrity": "sha512-9ByhssR2fPVsNZj478qUUbKfmL0+t5BDVyjShtyZZLiK7ZDAArFFfopyOTj0M05wE2tJPisA4iTnnXl2YoPvOA==",
"dev": true "dev": true
}, },
"node_modules/guid-typescript": {
"version": "1.0.9",
"resolved": "https://registry.npmjs.org/guid-typescript/-/guid-typescript-1.0.9.tgz",
"integrity": "sha512-Y8T4vYhEfwJOTbouREvG+3XDsjr8E3kIr7uf+JZ0BYloFsttiHU0WfvANVsR7TxNUJa/WpCnw/Ino/p+DeBhBQ=="
},
"node_modules/gzip-size": { "node_modules/gzip-size": {
"version": "6.0.0", "version": "6.0.0",
"resolved": "https://registry.npmjs.org/gzip-size/-/gzip-size-6.0.0.tgz", "resolved": "https://registry.npmjs.org/gzip-size/-/gzip-size-6.0.0.tgz",
@ -8591,6 +8663,32 @@
"node": ">=4" "node": ">=4"
} }
}, },
"node_modules/onnx-proto": {
"version": "4.0.4",
"resolved": "https://registry.npmjs.org/onnx-proto/-/onnx-proto-4.0.4.tgz",
"integrity": "sha512-aldMOB3HRoo6q/phyB6QRQxSt895HNNw82BNyZ2CMh4bjeKv7g/c+VpAFtJuEMVfYLMbRx61hbuqnKceLeDcDA==",
"dependencies": {
"protobufjs": "^6.8.8"
}
},
"node_modules/onnxruntime-common": {
"version": "1.15.1",
"resolved": "https://registry.npmjs.org/onnxruntime-common/-/onnxruntime-common-1.15.1.tgz",
"integrity": "sha512-Y89eJ8QmaRsPZPWLaX7mfqhj63ny47rSkQe80hIo+lvBQdrdXYR9VO362xvZulk9DFkCnXmGidprvgJ07bKsIQ=="
},
"node_modules/onnxruntime-web": {
"version": "1.15.1",
"resolved": "https://registry.npmjs.org/onnxruntime-web/-/onnxruntime-web-1.15.1.tgz",
"integrity": "sha512-Ky4AXFLFyiGRu5KQJdDcbhdNcO0f2ND/8IPmTEwcKKIHpCwH6/Q9UoMpcoFz78lxGvnmmy+FFgA/Bs1HjdM6LA==",
"dependencies": {
"flatbuffers": "^1.12.0",
"guid-typescript": "^1.0.9",
"long": "^4.0.0",
"onnx-proto": "^4.0.4",
"onnxruntime-common": "~1.15.1",
"platform": "^1.3.6"
}
},
"node_modules/open": { "node_modules/open": {
"version": "8.4.0", "version": "8.4.0",
"resolved": "https://registry.npmjs.org/open/-/open-8.4.0.tgz", "resolved": "https://registry.npmjs.org/open/-/open-8.4.0.tgz",
@ -8972,6 +9070,11 @@
"node": ">=8" "node": ">=8"
} }
}, },
"node_modules/platform": {
"version": "1.3.6",
"resolved": "https://registry.npmjs.org/platform/-/platform-1.3.6.tgz",
"integrity": "sha512-fnWVljUchTro6RiCFvCXBbNhJc2NijN7oIQxbwsyL0buWJPG85v81ehlHI9fXrJsMNgTofEoWIQeClKpgxFLrg=="
},
"node_modules/popper.js": { "node_modules/popper.js": {
"version": "1.16.1", "version": "1.16.1",
"resolved": "https://registry.npmjs.org/popper.js/-/popper.js-1.16.1.tgz", "resolved": "https://registry.npmjs.org/popper.js/-/popper.js-1.16.1.tgz",
@ -9593,6 +9696,31 @@
"integrity": "sha1-IS1b/hMYMGpCD2QCuOJv85ZHqEk=", "integrity": "sha1-IS1b/hMYMGpCD2QCuOJv85ZHqEk=",
"dev": true "dev": true
}, },
"node_modules/protobufjs": {
"version": "6.11.3",
"resolved": "https://registry.npmjs.org/protobufjs/-/protobufjs-6.11.3.tgz",
"integrity": "sha512-xL96WDdCZYdU7Slin569tFX712BxsxslWwAfAhCYjQKGTq7dAU91Lomy6nLLhh/dyGhk/YH4TwTSRxTzhuHyZg==",
"hasInstallScript": true,
"dependencies": {
"@protobufjs/aspromise": "^1.1.2",
"@protobufjs/base64": "^1.1.2",
"@protobufjs/codegen": "^2.0.4",
"@protobufjs/eventemitter": "^1.1.0",
"@protobufjs/fetch": "^1.1.0",
"@protobufjs/float": "^1.0.2",
"@protobufjs/inquire": "^1.1.0",
"@protobufjs/path": "^1.1.2",
"@protobufjs/pool": "^1.1.0",
"@protobufjs/utf8": "^1.1.0",
"@types/long": "^4.0.1",
"@types/node": ">=13.7.0",
"long": "^4.0.0"
},
"bin": {
"pbjs": "bin/pbjs",
"pbts": "bin/pbts"
}
},
"node_modules/proxy-addr": { "node_modules/proxy-addr": {
"version": "2.0.7", "version": "2.0.7",
"resolved": "https://registry.npmjs.org/proxy-addr/-/proxy-addr-2.0.7.tgz", "resolved": "https://registry.npmjs.org/proxy-addr/-/proxy-addr-2.0.7.tgz",
@ -13273,6 +13401,60 @@
"integrity": "sha512-a5Sab1C4/icpTZVzZc5Ghpz88yQtGOyNqYXcZgOssB2uuAr+wF/MvN6bgtW32q7HHrvBki+BsZ0OuNv6EV3K9g==", "integrity": "sha512-a5Sab1C4/icpTZVzZc5Ghpz88yQtGOyNqYXcZgOssB2uuAr+wF/MvN6bgtW32q7HHrvBki+BsZ0OuNv6EV3K9g==",
"dev": true "dev": true
}, },
"@protobufjs/aspromise": {
"version": "1.1.2",
"resolved": "https://registry.npmjs.org/@protobufjs/aspromise/-/aspromise-1.1.2.tgz",
"integrity": "sha512-j+gKExEuLmKwvz3OgROXtrJ2UG2x8Ch2YZUxahh+s1F2HZ+wAceUNLkvy6zKCPVRkU++ZWQrdxsUeQXmcg4uoQ=="
},
"@protobufjs/base64": {
"version": "1.1.2",
"resolved": "https://registry.npmjs.org/@protobufjs/base64/-/base64-1.1.2.tgz",
"integrity": "sha512-AZkcAA5vnN/v4PDqKyMR5lx7hZttPDgClv83E//FMNhR2TMcLUhfRUBHCmSl0oi9zMgDDqRUJkSxO3wm85+XLg=="
},
"@protobufjs/codegen": {
"version": "2.0.4",
"resolved": "https://registry.npmjs.org/@protobufjs/codegen/-/codegen-2.0.4.tgz",
"integrity": "sha512-YyFaikqM5sH0ziFZCN3xDC7zeGaB/d0IUb9CATugHWbd1FRFwWwt4ld4OYMPWu5a3Xe01mGAULCdqhMlPl29Jg=="
},
"@protobufjs/eventemitter": {
"version": "1.1.0",
"resolved": "https://registry.npmjs.org/@protobufjs/eventemitter/-/eventemitter-1.1.0.tgz",
"integrity": "sha512-j9ednRT81vYJ9OfVuXG6ERSTdEL1xVsNgqpkxMsbIabzSo3goCjDIveeGv5d03om39ML71RdmrGNjG5SReBP/Q=="
},
"@protobufjs/fetch": {
"version": "1.1.0",
"resolved": "https://registry.npmjs.org/@protobufjs/fetch/-/fetch-1.1.0.tgz",
"integrity": "sha512-lljVXpqXebpsijW71PZaCYeIcE5on1w5DlQy5WH6GLbFryLUrBD4932W/E2BSpfRJWseIL4v/KPgBFxDOIdKpQ==",
"requires": {
"@protobufjs/aspromise": "^1.1.1",
"@protobufjs/inquire": "^1.1.0"
}
},
"@protobufjs/float": {
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/@protobufjs/float/-/float-1.0.2.tgz",
"integrity": "sha512-Ddb+kVXlXst9d+R9PfTIxh1EdNkgoRe5tOX6t01f1lYWOvJnSPDBlG241QLzcyPdoNTsblLUdujGSE4RzrTZGQ=="
},
"@protobufjs/inquire": {
"version": "1.1.0",
"resolved": "https://registry.npmjs.org/@protobufjs/inquire/-/inquire-1.1.0.tgz",
"integrity": "sha512-kdSefcPdruJiFMVSbn801t4vFK7KB/5gd2fYvrxhuJYg8ILrmn9SKSX2tZdV6V+ksulWqS7aXjBcRXl3wHoD9Q=="
},
"@protobufjs/path": {
"version": "1.1.2",
"resolved": "https://registry.npmjs.org/@protobufjs/path/-/path-1.1.2.tgz",
"integrity": "sha512-6JOcJ5Tm08dOHAbdR3GrvP+yUUfkjG5ePsHYczMFLq3ZmMkAD98cDgcT2iA1lJ9NVwFd4tH/iSSoe44YWkltEA=="
},
"@protobufjs/pool": {
"version": "1.1.0",
"resolved": "https://registry.npmjs.org/@protobufjs/pool/-/pool-1.1.0.tgz",
"integrity": "sha512-0kELaGSIDBKvcgS4zkjz1PeddatrjYcmMWOlAuAPwAeccUrPHdUqo/J6LiymHHEiJT5NrF1UVwxY14f+fy4WQw=="
},
"@protobufjs/utf8": {
"version": "1.1.0",
"resolved": "https://registry.npmjs.org/@protobufjs/utf8/-/utf8-1.1.0.tgz",
"integrity": "sha512-Vvn3zZrhQZkkBE8LSuW3em98c0FwgO4nxzv6OdSxPKJIEKY2bGbHn+mhGIPerzI4twdxaP8/0+06HBpwf345Lw=="
},
"@sideway/address": { "@sideway/address": {
"version": "4.1.4", "version": "4.1.4",
"resolved": "https://registry.npmjs.org/@sideway/address/-/address-4.1.4.tgz", "resolved": "https://registry.npmjs.org/@sideway/address/-/address-4.1.4.tgz",
@ -13727,6 +13909,12 @@
"@types/node": "*" "@types/node": "*"
} }
}, },
"@types/underscore": {
"version": "1.11.6",
"resolved": "https://registry.npmjs.org/@types/underscore/-/underscore-1.11.6.tgz",
"integrity": "sha512-G2oC64I/sR817KDL2b2Mc7+diXyxcibyUeLMyexU4K/sG8hyt/YMlbBK0TVhx/YQ1ehfzgXhLuq2YQHIL4bXUQ==",
"dev": true
},
"@types/webgl-ext": { "@types/webgl-ext": {
"version": "0.0.30", "version": "0.0.30",
"resolved": "https://registry.npmjs.org/@types/webgl-ext/-/webgl-ext-0.0.30.tgz", "resolved": "https://registry.npmjs.org/@types/webgl-ext/-/webgl-ext-0.0.30.tgz",
@ -16894,6 +17082,11 @@
"path-exists": "^4.0.0" "path-exists": "^4.0.0"
} }
}, },
"flatbuffers": {
"version": "1.12.0",
"resolved": "https://registry.npmjs.org/flatbuffers/-/flatbuffers-1.12.0.tgz",
"integrity": "sha512-c7CZADjRcl6j0PlvFy0ZqXQ67qSEZfrVPynmnL+2zPc+NtMvrF8Y0QceMo7QqnSPc7+uWjUIAbvCQ5WIKlMVdQ=="
},
"follow-redirects": { "follow-redirects": {
"version": "1.15.2", "version": "1.15.2",
"resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.2.tgz", "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.2.tgz",
@ -17162,6 +17355,11 @@
"integrity": "sha512-9ByhssR2fPVsNZj478qUUbKfmL0+t5BDVyjShtyZZLiK7ZDAArFFfopyOTj0M05wE2tJPisA4iTnnXl2YoPvOA==", "integrity": "sha512-9ByhssR2fPVsNZj478qUUbKfmL0+t5BDVyjShtyZZLiK7ZDAArFFfopyOTj0M05wE2tJPisA4iTnnXl2YoPvOA==",
"dev": true "dev": true
}, },
"guid-typescript": {
"version": "1.0.9",
"resolved": "https://registry.npmjs.org/guid-typescript/-/guid-typescript-1.0.9.tgz",
"integrity": "sha512-Y8T4vYhEfwJOTbouREvG+3XDsjr8E3kIr7uf+JZ0BYloFsttiHU0WfvANVsR7TxNUJa/WpCnw/Ino/p+DeBhBQ=="
},
"gzip-size": { "gzip-size": {
"version": "6.0.0", "version": "6.0.0",
"resolved": "https://registry.npmjs.org/gzip-size/-/gzip-size-6.0.0.tgz", "resolved": "https://registry.npmjs.org/gzip-size/-/gzip-size-6.0.0.tgz",
@ -18424,6 +18622,32 @@
"mimic-fn": "^1.0.0" "mimic-fn": "^1.0.0"
} }
}, },
"onnx-proto": {
"version": "4.0.4",
"resolved": "https://registry.npmjs.org/onnx-proto/-/onnx-proto-4.0.4.tgz",
"integrity": "sha512-aldMOB3HRoo6q/phyB6QRQxSt895HNNw82BNyZ2CMh4bjeKv7g/c+VpAFtJuEMVfYLMbRx61hbuqnKceLeDcDA==",
"requires": {
"protobufjs": "^6.8.8"
}
},
"onnxruntime-common": {
"version": "1.15.1",
"resolved": "https://registry.npmjs.org/onnxruntime-common/-/onnxruntime-common-1.15.1.tgz",
"integrity": "sha512-Y89eJ8QmaRsPZPWLaX7mfqhj63ny47rSkQe80hIo+lvBQdrdXYR9VO362xvZulk9DFkCnXmGidprvgJ07bKsIQ=="
},
"onnxruntime-web": {
"version": "1.15.1",
"resolved": "https://registry.npmjs.org/onnxruntime-web/-/onnxruntime-web-1.15.1.tgz",
"integrity": "sha512-Ky4AXFLFyiGRu5KQJdDcbhdNcO0f2ND/8IPmTEwcKKIHpCwH6/Q9UoMpcoFz78lxGvnmmy+FFgA/Bs1HjdM6LA==",
"requires": {
"flatbuffers": "^1.12.0",
"guid-typescript": "^1.0.9",
"long": "^4.0.0",
"onnx-proto": "^4.0.4",
"onnxruntime-common": "~1.15.1",
"platform": "^1.3.6"
}
},
"open": { "open": {
"version": "8.4.0", "version": "8.4.0",
"resolved": "https://registry.npmjs.org/open/-/open-8.4.0.tgz", "resolved": "https://registry.npmjs.org/open/-/open-8.4.0.tgz",
@ -18710,6 +18934,11 @@
"find-up": "^4.0.0" "find-up": "^4.0.0"
} }
}, },
"platform": {
"version": "1.3.6",
"resolved": "https://registry.npmjs.org/platform/-/platform-1.3.6.tgz",
"integrity": "sha512-fnWVljUchTro6RiCFvCXBbNhJc2NijN7oIQxbwsyL0buWJPG85v81ehlHI9fXrJsMNgTofEoWIQeClKpgxFLrg=="
},
"popper.js": { "popper.js": {
"version": "1.16.1", "version": "1.16.1",
"resolved": "https://registry.npmjs.org/popper.js/-/popper.js-1.16.1.tgz", "resolved": "https://registry.npmjs.org/popper.js/-/popper.js-1.16.1.tgz",
@ -19118,6 +19347,26 @@
"integrity": "sha1-IS1b/hMYMGpCD2QCuOJv85ZHqEk=", "integrity": "sha1-IS1b/hMYMGpCD2QCuOJv85ZHqEk=",
"dev": true "dev": true
}, },
"protobufjs": {
"version": "6.11.3",
"resolved": "https://registry.npmjs.org/protobufjs/-/protobufjs-6.11.3.tgz",
"integrity": "sha512-xL96WDdCZYdU7Slin569tFX712BxsxslWwAfAhCYjQKGTq7dAU91Lomy6nLLhh/dyGhk/YH4TwTSRxTzhuHyZg==",
"requires": {
"@protobufjs/aspromise": "^1.1.2",
"@protobufjs/base64": "^1.1.2",
"@protobufjs/codegen": "^2.0.4",
"@protobufjs/eventemitter": "^1.1.0",
"@protobufjs/fetch": "^1.1.0",
"@protobufjs/float": "^1.0.2",
"@protobufjs/inquire": "^1.1.0",
"@protobufjs/path": "^1.1.2",
"@protobufjs/pool": "^1.1.0",
"@protobufjs/utf8": "^1.1.0",
"@types/long": "^4.0.1",
"@types/node": ">=13.7.0",
"long": "^4.0.0"
}
},
"proxy-addr": { "proxy-addr": {
"version": "2.0.7", "version": "2.0.7",
"resolved": "https://registry.npmjs.org/proxy-addr/-/proxy-addr-2.0.7.tgz", "resolved": "https://registry.npmjs.org/proxy-addr/-/proxy-addr-2.0.7.tgz",

View File

@ -18,6 +18,7 @@
"dom-to-image": "^2.6.0", "dom-to-image": "^2.6.0",
"fslightbox-vue": "fslightbox-vue.tgz", "fslightbox-vue": "fslightbox-vue.tgz",
"nouislider": "^15.2.0", "nouislider": "^15.2.0",
"onnxruntime-web": "^1.15.1",
"underscore": "^1.13.1", "underscore": "^1.13.1",
"vue": "^2.6.12", "vue": "^2.6.12",
"vue-color": "^2.8.1", "vue-color": "^2.8.1",
@ -29,6 +30,7 @@
}, },
"devDependencies": { "devDependencies": {
"@babel/polyfill": "^7.12.1", "@babel/polyfill": "^7.12.1",
"@types/underscore": "^1.11.6",
"@vue/cli-plugin-babel": "~5.0.8", "@vue/cli-plugin-babel": "~5.0.8",
"@vue/cli-plugin-router": "~5.0.8", "@vue/cli-plugin-router": "~5.0.8",
"@vue/cli-plugin-typescript": "^5.0.8", "@vue/cli-plugin-typescript": "^5.0.8",

View File

@ -103,6 +103,16 @@ class Sist2ElasticsearchQuery {
q["highlightContextSize"] = Number(getters.optFragmentSize); q["highlightContextSize"] = Number(getters.optFragmentSize);
} }
if (getters.embeddingText) {
q["model"] = getters.embeddingsModel;
q["embedding"] = getters.embedding;
q["sort"] = "embedding";
q["sortAsc"] = false;
} else if (getters.sortMode == "embedding") {
q["sort"] = "sort"
q["sortAsc"] = true;
}
return q; return q;
} }
} }

View File

@ -12,7 +12,7 @@ export default {
props: ["span", "text"], props: ["span", "text"],
methods: { methods: {
getStyle() { getStyle() {
return ModelsRepo.data[this.$store.getters.mlModel.name].labelStyles[this.span.label]; return ModelsRepo.data[this.$store.getters.nerModel.name].labelStyles[this.span.label];
} }
} }
} }

View File

@ -22,7 +22,7 @@ export default {
props: ["spans", "text"], props: ["spans", "text"],
computed: { computed: {
legend() { legend() {
return Object.entries(ModelsRepo.data[this.$store.state.mlModel.name].legend) return Object.entries(ModelsRepo.data[this.$store.state.nerModel.name].legend)
.map(([label, name]) => ({ .map(([label, name]) => ({
text: name, text: name,
id: label, id: label,

View File

@ -0,0 +1,99 @@
<template>
<div>
<b-progress v-if="modelLoading" :value="modelLoadingProgress" max="1" class="mb-1" variant="warning"
show-progress>
</b-progress>
<b-input-group>
<b-form-input :value="embeddingText"
:placeholder="$t('embeddingsSearchPlaceholder')"
@input="onInput($event)"
:disabled="modelLoading"
></b-form-input>
<!-- TODO: dropdown of available models-->
<!-- <template #prepend>-->
<!-- <b-input-group-text>-->
<!-- <b-form-checkbox :checked="fuzzy" title="Toggle fuzzy searching" @change="setFuzzy($event)">-->
<!-- {{ $t("searchBar.fuzzy") }}-->
<!-- </b-form-checkbox>-->
<!-- </b-input-group-text>-->
<!-- </template>-->
<template #append>
<b-input-group-text>
<MLIcon></MLIcon>
</b-input-group-text>
</template>
</b-input-group>
</div>
</template>
<script>
import {mapGetters, mapMutations} from "vuex";
import {CLIPTransformerModel} from "@/ml/CLIPTransformerModel"
import _debounce from "lodash/debounce";
import MLIcon from "@/components/icons/MlIcon.vue";
export default {
components: {MLIcon},
data() {
return {
modelLoading: false,
modelLoadingProgress: 0,
modelLoaded: false,
model: null
}
},
computed: {
...mapGetters({
optQueryMode: "optQueryMode",
embeddingText: "embeddingText",
fuzzy: "fuzzy",
}),
},
mounted() {
this.onInput = _debounce(this._onInput, 300, {leading: false});
},
methods: {
...mapMutations({
setEmbeddingText: "setEmbeddingText",
setEmbedding: "setEmbedding",
setEmbeddingModel: "setEmbeddingsModel",
}),
async loadModel() {
this.modelLoading = true;
this.model = new CLIPTransformerModel(
// TODO: add a config for this (?)
"https://github.com/simon987/sist2-models/raw/main/clip/models/clip-vit-base-patch32-q8.onnx",
"https://github.com/simon987/sist2-models/raw/main/clip/models/tokenizer.json",
);
await this.model.init(async progress => {
this.modelLoadingProgress = progress;
});
this.modelLoading = false;
this.modelLoaded = true;
},
async _onInput(text) {
if (!this.modelLoaded) {
await this.loadModel();
this.setEmbeddingModel(1); // TODO
}
if (text.length === 0) {
this.setEmbeddingText("");
this.setEmbedding(null);
return;
}
const embeddings = await this.model.predict(text);
this.setEmbeddingText(text);
this.setEmbedding(embeddings);
},
mounted() {
}
}
}
</script>
<style>
</style>

View File

@ -9,7 +9,7 @@
<b-button :disabled="mlPredictionsLoading || mlLoading" @click="mlAnalyze" variant="primary" <b-button :disabled="mlPredictionsLoading || mlLoading" @click="mlAnalyze" variant="primary"
>{{ $t("ml.analyzeText") }} >{{ $t("ml.analyzeText") }}
</b-button> </b-button>
<b-select :disabled="mlPredictionsLoading || mlLoading" class="ml-2" v-model="mlModel"> <b-select :disabled="mlPredictionsLoading || mlLoading" class="ml-2" v-model="nerModel">
<b-select-option :value="opt.value" v-for="opt of ModelsRepo.getOptions()">{{ opt.text }} <b-select-option :value="opt.value" v-for="opt of ModelsRepo.getOptions()">{{ opt.text }}
</b-select-option> </b-select-option>
</b-select> </b-select>
@ -57,16 +57,16 @@ export default {
modelPredictionProgress: 0, modelPredictionProgress: 0,
mlPredictionsLoading: false, mlPredictionsLoading: false,
mlLoading: false, mlLoading: false,
mlModel: null, nerModel: null,
analyzedContentSpans: [] analyzedContentSpans: []
} }
}, },
mounted() { mounted() {
if (this.$store.getters.optMlDefaultModel) { if (this.$store.getters.optMlDefaultModel) {
this.mlModel = this.$store.getters.optMlDefaultModel this.nerModel = this.$store.getters.optMlDefaultModel
} else { } else {
this.mlModel = ModelsRepo.getDefaultModel(); this.nerModel = ModelsRepo.getDefaultModel();
} }
Sist2Api Sist2Api
@ -86,7 +86,7 @@ export default {
computed: { computed: {
...mapGetters(["optAutoAnalyze"]), ...mapGetters(["optAutoAnalyze"]),
modelSize() { modelSize() {
const modelData = ModelsRepo.data[this.mlModel]; const modelData = ModelsRepo.data[this.nerModel];
if (!modelData) { if (!modelData) {
return 0; return 0;
} }
@ -110,10 +110,10 @@ export default {
} }
}, },
async getMlModel() { async getMlModel() {
if (this.$store.getters.mlModel.name !== this.mlModel) { if (this.$store.getters.nerModel.name !== this.nerModel) {
this.mlLoading = true; this.mlLoading = true;
this.modelLoadingProgress = 0; this.modelLoadingProgress = 0;
const modelInfo = ModelsRepo.data[this.mlModel]; const modelInfo = ModelsRepo.data[this.nerModel];
const model = new BertNerModel( const model = new BertNerModel(
modelInfo.vocabUrl, modelInfo.vocabUrl,
@ -122,25 +122,25 @@ export default {
) )
await model.init(progress => this.modelLoadingProgress = progress); await model.init(progress => this.modelLoadingProgress = progress);
this.$store.commit("setMlModel", {model, name: this.mlModel}); this.$store.commit("setNerModel", {model, name: this.nerModel});
this.mlLoading = false; this.mlLoading = false;
return model return model
} }
return this.$store.getters.mlModel.model; return this.$store.getters.nerModel.model;
}, },
async mlAnalyze() { async mlAnalyze() {
if (!this.content) { if (!this.content) {
return; return;
} }
const modelInfo = ModelsRepo.data[this.mlModel]; const modelInfo = ModelsRepo.data[this.nerModel];
if (modelInfo === undefined) { if (modelInfo === undefined) {
return; return;
} }
this.$store.commit("setOptMlDefaultModel", this.mlModel); this.$store.commit("setOptMlDefaultModel", this.nerModel);
await this.$store.dispatch("updateConfiguration"); await this.$store.dispatch("updateConfiguration");
const model = await this.getMlModel(); const model = await this.getMlModel();

View File

@ -1,5 +1,5 @@
<template> <template>
<b-dropdown variant="primary"> <b-dropdown variant="primary" :disabled="$store.getters.embeddingText !== ''">
<b-dropdown-item :class="{'dropdown-active': sort === 'score'}" @click="onSelect('score')">{{ <b-dropdown-item :class="{'dropdown-active': sort === 'score'}" @click="onSelect('score')">{{
$t("sort.relevance") $t("sort.relevance")
}} }}

View File

@ -210,4 +210,8 @@ export default {
.theme-black .inspire-tree .matched > .wholerow { .theme-black .inspire-tree .matched > .wholerow {
background: rgba(251, 191, 41, 0.25); background: rgba(251, 191, 41, 0.25);
} }
#tagTree {
max-height: 350px;
overflow: auto;
}
</style> </style>

View File

@ -0,0 +1,50 @@
<template>
<svg height="20px" width="20px" xmlns="http://www.w3.org/2000/svg"
viewBox="0 0 512 512" xml:space="preserve">
<g>
<path class="st0" d="M167.314,14.993C167.314,6.712,160.602,0,152.332,0h-5.514c-8.27,0-14.982,6.712-14.982,14.993v41.466h35.478
V14.993z"/>
<path class="st0"
d="M238.26,14.993C238.26,6.712,231.549,0,223.278,0h-5.504c-8.271,0-14.982,6.712-14.982,14.993v41.466h35.468 V14.993z"/>
<path class="st0"
d="M309.207,14.993C309.207,6.712,302.496,0,294.225,0h-5.504c-8.271,0-14.982,6.712-14.982,14.993v41.466h35.468 V14.993z"/>
<path class="st0"
d="M380.164,14.993C380.164,6.712,373.453,0,365.182,0h-5.514c-8.27,0-14.982,6.712-14.982,14.993v41.466h35.478 V14.993z"/>
<path class="st0"
d="M131.836,497.007c0,8.282,6.712,14.993,14.982,14.993h5.514c8.27,0,14.982-6.711,14.982-14.993V455.55h-35.478 V497.007z"/>
<path class="st0"
d="M202.792,497.007c0,8.282,6.712,14.993,14.982,14.993h5.504c8.27,0,14.982-6.711,14.982-14.993V455.55h-35.468 V497.007z"/>
<path class="st0"
d="M273.739,497.007c0,8.282,6.712,14.993,14.982,14.993h5.504c8.271,0,14.982-6.711,14.982-14.993V455.55 h-35.468V497.007z"/>
<path class="st0"
d="M344.686,497.007c0,8.282,6.712,14.993,14.982,14.993h5.514c8.271,0,14.982-6.711,14.982-14.993V455.55 h-35.478V497.007z"/>
<path class="st0"
d="M497.018,131.836H455.55v35.479h41.468c8.27,0,14.982-6.712,14.982-14.993v-5.493 C512,138.548,505.288,131.836,497.018,131.836z"/>
<path class="st0"
d="M497.018,202.793H455.55v35.468h41.468c8.27,0,14.982-6.712,14.982-14.982v-5.494 C512,209.504,505.288,202.793,497.018,202.793z"/>
<path class="st0"
d="M497.018,273.739H455.55v35.468h41.468c8.27,0,14.982-6.711,14.982-14.992v-5.494 C512,280.451,505.288,273.739,497.018,273.739z"/>
<path class="st0"
d="M497.018,344.686H455.55v35.479h41.468c8.27,0,14.982-6.712,14.982-14.993v-5.493 C512,351.398,505.288,344.686,497.018,344.686z"/>
<path class="st0"
d="M0,146.828v5.493c0,8.281,6.711,14.993,14.982,14.993H56.46v-35.479H14.982C6.711,131.836,0,138.548,0,146.828 z"/>
<path class="st0"
d="M0,217.785v5.494c0,8.27,6.711,14.982,14.982,14.982H56.46v-35.468H14.982C6.711,202.793,0,209.504,0,217.785z "/>
<path class="st0"
d="M0,288.721v5.494c0,8.281,6.711,14.992,14.982,14.992H56.46v-35.468H14.982C6.711,273.739,0,280.451,0,288.721 z"/>
<path class="st0"
d="M0,359.679v5.493c0,8.281,6.711,14.993,14.982,14.993H56.46v-35.479H14.982C6.711,344.686,0,351.398,0,359.679 z"/>
<path class="st0"
d="M78.628,433.382h354.753V78.628H78.628V433.382z M376.56,120.2c9.18,0,16.635,7.445,16.635,16.634 c0,9.18-7.455,16.624-16.635,16.624c-9.179,0-16.624-7.445-16.624-16.624C359.936,127.644,367.381,120.2,376.56,120.2z M376.56,361.32c9.18,0,16.635,7.445,16.635,16.635c0,9.179-7.455,16.623-16.635,16.623c-9.179,0-16.624-7.444-16.624-16.623 C359.936,368.764,367.381,361.32,376.56,361.32z M184.362,184.362h143.287v143.287H184.362V184.362z M135.439,120.2 c9.19,0,16.635,7.445,16.635,16.634c0,9.169-7.445,16.624-16.635,16.624c-9.178,0-16.623-7.455-16.623-16.624 C118.816,127.644,126.26,120.2,135.439,120.2z M135.439,361.32c9.19,0,16.635,7.445,16.635,16.635 c0,9.169-7.445,16.623-16.635,16.623c-9.178,0-16.623-7.454-16.623-16.623C118.816,368.764,126.26,361.32,135.439,361.32z"/>
</g>
</svg>
</template>
<script>
export default {
name: "MLIcon"
}
</script>
<style scoped>
</style>

View File

@ -18,6 +18,7 @@ export default {
tags: "Tags", tags: "Tags",
tagFilter: "Filter tags", tagFilter: "Filter tags",
forExample: "For example:", forExample: "For example:",
embeddingsSearchPlaceholder: "Embeddings search",
help: { help: {
simpleSearch: "Simple search", simpleSearch: "Simple search",
advancedSearch: "Advanced search", advancedSearch: "Advanced search",

View File

@ -0,0 +1,118 @@
const inf = Number.POSITIVE_INFINITY;
const START_TOK = 49406;
const END_TOK = 49407;
function min(array, key) {
return array
.reduce((a, b) => (key(a, b) ? b : a))
}
class TupleSet extends Set {
add(elem) {
return super.add(elem.join("`"));
}
has(elem) {
return super.has(elem.join("`"));
}
toList() {
return [...this].map(x => x.split("`"))
}
}
export class BPETokenizer {
_encoder = null;
_bpeRanks = null;
constructor(encoder, bpeRanks) {
this._encoder = encoder;
this._bpeRanks = bpeRanks;
}
getPairs(word) {
const pairs = new TupleSet();
let prevChar = word[0];
for (let i = 1; i < word.length; i++) {
pairs.add([prevChar, word[i]])
prevChar = word[i];
}
return pairs.toList();
}
bpe(token) {
let word = [...token];
word[word.length - 1] += "</w>";
let pairs = this.getPairs(word)
if (pairs.length === 0) {
return token + "</w>"
}
while (true) {
const bigram = min(pairs, (a, b) => {
return (this._bpeRanks[a.join("`")] ?? inf) > (this._bpeRanks[b.join("`") ?? inf])
});
if (this._bpeRanks[bigram.join("`")] === undefined) {
break;
}
const [first, second] = bigram;
let newWord = [];
let i = 0;
while (i < word.length) {
const j = word.indexOf(first, i);
if (j === -1) {
newWord.push(...word.slice(i));
break;
} else {
newWord.push(...word.slice(i, j));
i = j;
}
if (word[i] === first && i < word.length - 1 && word[i + 1] === second) {
newWord.push(first + second);
i += 2;
} else {
newWord.push(word[i]);
i += 1;
}
}
word = [...newWord]
if (word.length === 1) {
break;
} else {
pairs = this.getPairs(word);
}
}
return word.join(" ");
}
encode(text) {
let bpeTokens = [];
text = text.trim();
text = text.replaceAll(/\s+/g, " ");
text
.match(/<\|startoftext\|>|<\|endoftext\|>|'s|'t|'re|'ve|'m|'ll|'d|[a-zA-Z0-9]+/ig)
.forEach(token => {
bpeTokens.push(...this.bpe(token).split(" ").map(t => this._encoder[t]));
});
bpeTokens.unshift(START_TOK);
bpeTokens = bpeTokens.slice(0, 76);
bpeTokens.push(END_TOK);
while (bpeTokens.length < 77) {
bpeTokens.push(0);
}
return bpeTokens;
}
}

View File

@ -1,6 +1,8 @@
import BertTokenizer from "@/ml/BertTokenizer"; import BertTokenizer from "@/ml/BertTokenizer";
import * as tf from "@tensorflow/tfjs";
import axios from "axios"; import axios from "axios";
import {chunk as _chunk} from "underscore";
import * as ort from "onnxruntime-web";
import {argMax, downloadToBuffer, ORT_WASM_PATHS} from "@/ml/mlUtils";
export default class BertNerModel { export default class BertNerModel {
vocabUrl; vocabUrl;
@ -29,7 +31,10 @@ export default class BertNerModel {
} }
async loadModel(onProgress) { async loadModel(onProgress) {
this._model = await tf.loadGraphModel(this.modelUrl, {onProgress}); ort.env.wasm.wasmPaths = ORT_WASM_PATHS;
const buf = await downloadToBuffer(this.modelUrl, onProgress);
this._model = await ort.InferenceSession.create(buf.buffer, {executionProviders: ["wasm"]});
} }
alignLabels(labels, wordIds, words) { alignLabels(labels, wordIds, words) {
@ -57,21 +62,28 @@ export default class BertNerModel {
async predict(text, callback) { async predict(text, callback) {
this._previousWordId = null; this._previousWordId = null;
const encoded = this._tokenizer.encodeText(text, this.inputSize) const encoded = this._tokenizer.encodeText(text, this.inputSize);
let i = 0;
for (let chunk of encoded.inputChunks) { for (let chunk of encoded.inputChunks) {
const rawResult = tf.tidy(() => this._model.execute({
input_ids: tf.tensor2d(chunk.inputIds, [1, this.inputSize], "int32"),
token_type_ids: tf.tensor2d(chunk.segmentIds, [1, this.inputSize], "int32"),
attention_mask: tf.tensor2d(chunk.inputMask, [1, this.inputSize], "int32"),
}));
const labelIds = await tf.argMax(rawResult, -1); const results = await this._model.run({
const labelIdsArray = await labelIds.array(); input_ids: new ort.Tensor("int32", chunk.inputIds, [1, this.inputSize]),
const labels = labelIdsArray[0].map(id => this.id2label[id]); token_type_ids: new ort.Tensor("int32", chunk.segmentIds, [1, this.inputSize]),
rawResult.dispose() attention_mask: new ort.Tensor("int32", chunk.inputMask, [1, this.inputSize]),
});
callback(this.alignLabels(labels, chunk.wordIds, encoded.words)) const labelIds = _chunk(results["output"].data, this.id2label.length).map(argMax);
const labels = labelIds.map(id => this.id2label[id]);
callback(this.alignLabels(labels, chunk.wordIds, encoded.words));
i += 1;
// give browser some time to repaint
if (i % 2 === 0) {
await new Promise(resolve => setTimeout(resolve, 0));
}
} }
} }
} }

View File

@ -1,4 +1,5 @@
import {zip, chunk} from "underscore"; import {zip, chunk} from "underscore";
import {toInt64} from "@/ml/mlUtils";
const UNK_INDEX = 100; const UNK_INDEX = 100;
const CLS_INDEX = 101; const CLS_INDEX = 101;

View File

@ -0,0 +1,48 @@
import * as ort from "onnxruntime-web";
import {BPETokenizer} from "@/ml/BPETokenizer";
import axios from "axios";
import {downloadToBuffer, ORT_WASM_PATHS} from "@/ml/mlUtils";
export class CLIPTransformerModel {
_modelUrl = null;
_tokenizerUrl = null;
_model = null;
_tokenizer = null;
constructor(modelUrl, tokenizerUrl) {
this._modelUrl = modelUrl;
this._tokenizerUrl = tokenizerUrl;
}
async init(onProgress) {
await Promise.all([this.loadTokenizer(), this.loadModel(onProgress)]);
}
async loadModel(onProgress) {
ort.env.wasm.wasmPaths = ORT_WASM_PATHS;
const buf = await downloadToBuffer(this._modelUrl, onProgress);
this._model = await ort.InferenceSession.create(buf.buffer, {executionProviders: ["wasm"]});
}
async loadTokenizer() {
const resp = await axios.get(this._tokenizerUrl);
this._tokenizer = new BPETokenizer(resp.data.encoder, resp.data.bpe_ranks)
}
async predict(text) {
const tokenized = this._tokenizer.encode(text);
const feeds = {
input_ids: new ort.Tensor("int32", tokenized, [1, 77])
};
const results = await this._model.run(feeds);
return Array.from(
Object.values(results)
.find(result => result.size === 512).data
);
}
}

View File

@ -0,0 +1,47 @@
export async function downloadToBuffer(url, onProgress) {
const resp = await fetch(url);
const contentLength = +resp.headers.get("Content-Length");
const buf = new Uint8ClampedArray(contentLength);
const reader = resp.body.getReader();
let cursor = 0;
if (onProgress) {
onProgress(0);
}
while (true) {
const {done, value} = await reader.read();
if (done) {
break;
}
console.log(`Sending ${value.length} bytes into ${buf.length} at offset ${cursor} (${buf.length - cursor} free)`)
buf.set(value, cursor);
cursor += value.length;
if (onProgress) {
onProgress(cursor / contentLength);
}
}
return buf;
}
export function argMax(array) {
return array
.map((x, i) => [x, i])
.reduce((r, a) => (a[0] > r[0] ? a : r))[1];
}
export function toInt64(array) {
return new BigInt64Array(array.map(BigInt));
}
export const ORT_WASM_PATHS = {
"ort-wasm-simd.wasm": "https://cdn.jsdelivr.net/npm/onnxruntime-web@1.15.1/dist/ort-wasm-simd.wasm",
"ort-wasm.wasm": "https://cdn.jsdelivr.net/npm/onnxruntime-web@1.15.1/dist/ort-wasm.wasm",
"ort-wasm-simd-threaded.wasm": "https://cdn.jsdelivr.net/npm/onnxruntime-web@1.15.1/dist/ort-wasm-simd-threaded.wasm",
"ort-wasm-threaded.wasm": "https://cdn.jsdelivr.net/npm/onnxruntime-web@1.15.1/dist/ort-wasm-threaded.wasm",
}

View File

@ -23,6 +23,8 @@ export default new Vuex.Store({
dateMin: undefined, dateMin: undefined,
dateMax: undefined, dateMax: undefined,
searchText: "", searchText: "",
embeddingText: "",
embedding: null,
pathText: "", pathText: "",
sortMode: "score", sortMode: "score",
@ -91,10 +93,11 @@ export default new Vuex.Store({
uiMimeMap: [] as any[], uiMimeMap: [] as any[],
auth0Token: null, auth0Token: null,
mlModel: { nerModel: {
model: null, model: null,
name: null name: null
}, },
embeddingsModel: null
}, },
mutations: { mutations: {
setUiShowDetails: (state, val) => state.uiShowDetails = val, setUiShowDetails: (state, val) => state.uiShowDetails = val,
@ -129,6 +132,8 @@ export default new Vuex.Store({
setDateBoundsMin: (state, val) => state.dateBoundsMin = val, setDateBoundsMin: (state, val) => state.dateBoundsMin = val,
setDateBoundsMax: (state, val) => state.dateBoundsMax = val, setDateBoundsMax: (state, val) => state.dateBoundsMax = val,
setSearchText: (state, val) => state.searchText = val, setSearchText: (state, val) => state.searchText = val,
setEmbeddingText: (state, val) => state.embeddingText = val,
setEmbedding: (state, val) => state.embedding= val,
setFuzzy: (state, val) => state.fuzzy = val, setFuzzy: (state, val) => state.fuzzy = val,
setLastQueryResult: (state, val) => state.lastQueryResults = val, setLastQueryResult: (state, val) => state.lastQueryResults = val,
setFirstQueryResult: (state, val) => state.firstQueryResults = val, setFirstQueryResult: (state, val) => state.firstQueryResults = val,
@ -212,7 +217,8 @@ export default new Vuex.Store({
// noop // noop
}, },
setAuth0Token: (state, val) => state.auth0Token = val, setAuth0Token: (state, val) => state.auth0Token = val,
setMlModel: (state, val) => state.mlModel = val, setNerModel: (state, val) => state.nerModel = val,
setEmbeddingsModel: (state, val) => state.embeddingsModel = val,
}, },
actions: { actions: {
setSist2Info: (store, val) => { setSist2Info: (store, val) => {
@ -370,7 +376,9 @@ export default new Vuex.Store({
}, },
modules: {}, modules: {},
getters: { getters: {
mlModel: (state) => state.mlModel, nerModel: (state) => state.nerModel,
embeddingsModel: (state) => state.embeddingsModel,
embedding: (state) => state.embedding,
seed: (state) => state.seed, seed: (state) => state.seed,
getPathText: (state) => state.pathText, getPathText: (state) => state.pathText,
indices: state => state.indices, indices: state => state.indices,
@ -389,6 +397,7 @@ export default new Vuex.Store({
sizeMin: state => state.sizeMin, sizeMin: state => state.sizeMin,
sizeMax: state => state.sizeMax, sizeMax: state => state.sizeMax,
searchText: state => state.searchText, searchText: state => state.searchText,
embeddingText: state => state.embeddingText,
pathText: state => state.pathText, pathText: state => state.pathText,
fuzzy: state => state.fuzzy, fuzzy: state => state.fuzzy,
size: state => state.optSize, size: state => state.optSize,

View File

@ -13,6 +13,7 @@
<b-card v-show="!uiLoading && !showEsConnectionError" id="search-panel"> <b-card v-show="!uiLoading && !showEsConnectionError" id="search-panel">
<SearchBar @show-help="showHelp=true"></SearchBar> <SearchBar @show-help="showHelp=true"></SearchBar>
<EmbeddingsSearchBar class="mt-3"></EmbeddingsSearchBar>
<b-row> <b-row>
<b-col style="height: 70px;" sm="6"> <b-col style="height: 70px;" sm="6">
<SizeSlider></SizeSlider> <SizeSlider></SizeSlider>
@ -58,16 +59,14 @@
</div> </div>
</template> </template>
<script lang="ts"> <script>
import Preloader from "@/components/Preloader.vue"; import Preloader from "@/components/Preloader.vue";
import {mapActions, mapGetters, mapMutations} from "vuex"; import {mapActions, mapGetters, mapMutations} from "vuex";
import sist2 from "../Sist2Api";
import Sist2Api, {EsHit, EsResult} from "../Sist2Api";
import SearchBar from "@/components/SearchBar.vue"; import SearchBar from "@/components/SearchBar.vue";
import IndexPicker from "@/components/IndexPicker.vue"; import IndexPicker from "@/components/IndexPicker.vue";
import Vue from "vue"; import Vue from "vue";
import Sist2Query from "@/Sist2ElasticsearchQuery"; import Sist2Query from "@/Sist2ElasticsearchQuery";
import _debounce from "lodash/debounce"; import {debounce as _debounce} from "underscore";
import DocCardWall from "@/components/DocCardWall.vue"; import DocCardWall from "@/components/DocCardWall.vue";
import Lightbox from "@/components/Lightbox.vue"; import Lightbox from "@/components/Lightbox.vue";
import LightboxCaption from "@/components/LightboxCaption.vue"; import LightboxCaption from "@/components/LightboxCaption.vue";
@ -79,11 +78,13 @@ import DateSlider from "@/components/DateSlider.vue";
import TagPicker from "@/components/TagPicker.vue"; import TagPicker from "@/components/TagPicker.vue";
import DocList from "@/components/DocList.vue"; import DocList from "@/components/DocList.vue";
import HelpDialog from "@/components/HelpDialog.vue"; import HelpDialog from "@/components/HelpDialog.vue";
import Sist2SqliteQuery from "@/Sist2SqliteQuery"; import EmbeddingsSearchBar from "@/components/EmbeddingsSearchBar.vue";
import Sist2Api from "@/Sist2Api";
export default Vue.extend({ export default Vue.extend({
components: { components: {
EmbeddingsSearchBar,
HelpDialog, HelpDialog,
DocList, DocList,
TagPicker, TagPicker,
@ -93,8 +94,8 @@ export default Vue.extend({
data: () => ({ data: () => ({
loading: false, loading: false,
uiLoading: true, uiLoading: true,
search: undefined as any, search: undefined,
docs: [] as EsHit[], docs: [],
docIds: new Set(), docIds: new Set(),
docChecksums: new Set(), docChecksums: new Set(),
searchBusy: false, searchBusy: false,
@ -108,16 +109,16 @@ export default Vue.extend({
mounted() { mounted() {
// Handle touch events // Handle touch events
window.ontouchend = () => this.$store.commit("busTouchEnd"); window.ontouchend = () => this.$store.commit("busTouchEnd");
window.ontouchcancel = this.$store.commit("busTouchEnd"); window.ontouchcancel = () => this.$store.commit("busTouchEnd");
this.search = _debounce(async (clear: boolean) => { this.search = _debounce(async (clear) => {
if (clear) { if (clear) {
await this.clearResults(); await this.clearResults();
} }
await this.searchNow(); await this.searchNow();
}, 350, {leading: false}); }, 350, false);
this.$store.dispatch("loadFromArgs", this.$route).then(() => { this.$store.dispatch("loadFromArgs", this.$route).then(() => {
this.$store.subscribe(() => this.$store.dispatch("updateArgs", this.$router)); this.$store.subscribe(() => this.$store.dispatch("updateArgs", this.$router));
@ -126,6 +127,7 @@ export default Vue.extend({
"setSizeMin", "setSizeMax", "setDateMin", "setDateMax", "setSearchText", "setPathText", "setSizeMin", "setSizeMax", "setDateMin", "setDateMax", "setSearchText", "setPathText",
"setSortMode", "setOptHighlight", "setOptFragmentSize", "setFuzzy", "setSize", "setSelectedIndices", "setSortMode", "setOptHighlight", "setOptFragmentSize", "setFuzzy", "setSize", "setSelectedIndices",
"setSelectedMimeTypes", "setSelectedTags", "setOptQueryMode", "setOptSearchInPath", "setSelectedMimeTypes", "setSelectedTags", "setOptQueryMode", "setOptSearchInPath",
"setEmbedding"
].includes(mutation.type)) { ].includes(mutation.type)) {
if (this.searchBusy) { if (this.searchBusy) {
return; return;
@ -152,7 +154,7 @@ export default Vue.extend({
}).catch(error => { }).catch(error => {
console.log(error); console.log(error);
if (error.response.status == 503 || error.response.status == 500) { if (error.response.status === 503 || error.response.status === 500) {
this.showEsConnectionError = true; this.showEsConnectionError = true;
this.uiLoading = false; this.uiLoading = false;
} else { } else {
@ -181,7 +183,7 @@ export default Vue.extend({
bodyClass: "toast-body-error", bodyClass: "toast-body-error",
}); });
}, },
showSyntaxErrorToast: function (): void { showSyntaxErrorToast: function () {
this.$bvToast.toast( this.$bvToast.toast(
this.$t("toast.esQueryErr"), this.$t("toast.esQueryErr"),
{ {
@ -197,7 +199,7 @@ export default Vue.extend({
await this.$store.dispatch("incrementQuerySequence"); await this.$store.dispatch("incrementQuerySequence");
this.$store.commit("busSearch"); this.$store.commit("busSearch");
Sist2Api.search().then(async (resp: EsResult) => { Sist2Api.search().then(async (resp) => {
await this.handleSearch(resp); await this.handleSearch(resp);
this.searchBusy = false; this.searchBusy = false;
}).catch(err => { }).catch(err => {
@ -215,8 +217,8 @@ export default Vue.extend({
await this.$store.dispatch("clearResults"); await this.$store.dispatch("clearResults");
this.$store.commit("setUiReachedScrollEnd", false); this.$store.commit("setUiReachedScrollEnd", false);
}, },
async handleSearch(resp: EsResult) { async handleSearch(resp) {
if (resp.hits.hits.length == 0 || resp.hits.hits.length < this.$store.state.optSize) { if (resp.hits.hits.length === 0 || resp.hits.hits.length < this.$store.state.optSize) {
this.$store.commit("setUiReachedScrollEnd", true); this.$store.commit("setUiReachedScrollEnd", true);
} }

View File

@ -163,7 +163,8 @@ void database_open(database_t *db) {
&db->write_document_sidecar_stmt, NULL)); &db->write_document_sidecar_stmt, NULL));
CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2( CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
db->db, db->db,
"REPLACE INTO document (id, mtime, size, json_data, version) VALUES (?, ?, ?, ?, (SELECT max(id) FROM version));", -1, "REPLACE INTO document (id, mtime, size, json_data, version) VALUES (?, ?, ?, ?, (SELECT max(id) FROM version));",
-1,
&db->write_document_stmt, NULL)); &db->write_document_stmt, NULL));
CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2( CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
db->db, db->db,
@ -175,6 +176,10 @@ void database_open(database_t *db) {
db->db, "SELECT json_data FROM document WHERE id=?", -1, db->db, "SELECT json_data FROM document WHERE id=?", -1,
&db->get_document, NULL)); &db->get_document, NULL));
CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
db->db, "SELECT * FROM model", -1,
&db->get_models, NULL));
// Create functions // Create functions
sqlite3_create_function( sqlite3_create_function(
db->db, db->db,
@ -186,6 +191,17 @@ void database_open(database_t *db) {
NULL, NULL,
NULL NULL
); );
sqlite3_create_function(
db->db,
"embedding_to_json",
5,
SQLITE_UTF8,
NULL,
embedding_to_json_func,
NULL,
NULL
);
} else if (db->type == IPC_CONSUMER_DATABASE) { } else if (db->type == IPC_CONSUMER_DATABASE) {
sqlite3_create_function( sqlite3_create_function(
@ -248,6 +264,10 @@ void database_open(database_t *db) {
db->db, "SELECT tag, count(*) FROM tag GROUP BY tag", -1, db->db, "SELECT tag, count(*) FROM tag GROUP BY tag", -1,
&db->fts_get_tags, NULL)); &db->fts_get_tags, NULL));
CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
db->db, "SELECT size FROM model WHERE id=?", -1,
&db->fts_model_size, NULL));
CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2( CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
db->db, "SELECT path, count FROM path_index" db->db, "SELECT path, count FROM path_index"
" WHERE (index_id=?1 OR ?1 IS NULL) AND depth BETWEEN ? AND ?" " WHERE (index_id=?1 OR ?1 IS NULL) AND depth BETWEEN ? AND ?"
@ -302,6 +322,17 @@ void database_open(database_t *db) {
NULL, NULL,
NULL NULL
); );
sqlite3_create_function(
db->db,
"cosine_sim",
3,
SQLITE_UTF8,
NULL,
cosine_sim_func,
NULL,
NULL
);
} }
if (db->type == FTS_DATABASE || db->type == INDEX_DATABASE) { if (db->type == FTS_DATABASE || db->type == INDEX_DATABASE) {
@ -463,8 +494,6 @@ database_iterator_t *database_create_document_iterator(database_t *db) {
sqlite3_stmt *stmt; sqlite3_stmt *stmt;
// TODO optimization: remove mtime, size, _id from json_data
sqlite3_prepare_v2(db->db, "WITH doc (j) AS (SELECT CASE" sqlite3_prepare_v2(db->db, "WITH doc (j) AS (SELECT CASE"
" WHEN sc.json_data IS NULL THEN" " WHEN sc.json_data IS NULL THEN"
" CASE" " CASE"
@ -800,4 +829,4 @@ cJSON *database_get_document(database_t *db, char *doc_id) {
void database_increment_version(database_t *db) { void database_increment_version(database_t *db) {
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec( CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(
db->db, "INSERT INTO version DEFAULT VALUES", NULL, NULL, NULL)); db->db, "INSERT INTO version DEFAULT VALUES", NULL, NULL, NULL));
} }

View File

@ -41,6 +41,7 @@ typedef enum {
FTS_SORT_RANDOM, FTS_SORT_RANDOM,
FTS_SORT_NAME, FTS_SORT_NAME,
FTS_SORT_ID, FTS_SORT_ID,
FTS_SORT_EMBEDDING
} fts_sort_t; } fts_sort_t;
typedef struct { typedef struct {
@ -83,6 +84,7 @@ typedef struct database {
sqlite3_stmt *write_document_sidecar_stmt; sqlite3_stmt *write_document_sidecar_stmt;
sqlite3_stmt *write_thumbnail_stmt; sqlite3_stmt *write_thumbnail_stmt;
sqlite3_stmt *get_document; sqlite3_stmt *get_document;
sqlite3_stmt *get_models;
sqlite3_stmt *delete_tag_stmt; sqlite3_stmt *delete_tag_stmt;
sqlite3_stmt *write_tag_stmt; sqlite3_stmt *write_tag_stmt;
@ -100,6 +102,8 @@ typedef struct database {
sqlite3_stmt *fts_get_document; sqlite3_stmt *fts_get_document;
sqlite3_stmt *fts_suggest_tag; sqlite3_stmt *fts_suggest_tag;
sqlite3_stmt *fts_get_tags; sqlite3_stmt *fts_get_tags;
sqlite3_stmt *fts_model_size;
char **tag_array; char **tag_array;
@ -210,7 +214,8 @@ cJSON *database_fts_search(database_t *db, const char *query, const char *path,
long size_max, long date_min, long date_max, int page_size, long size_max, long date_min, long date_max, int page_size,
char **index_ids, char **mime_types, char **tags, int sort_asc, char **index_ids, char **mime_types, char **tags, int sort_asc,
fts_sort_t sort, int seed, char **after, int fetch_aggregations, fts_sort_t sort, int seed, char **after, int fetch_aggregations,
int highlight, int highlight_context_size); int highlight, int highlight_context_size, int model,
const float *embedding, int embedding_size);
void database_write_tag(database_t *db, char *doc_id, char *tag); void database_write_tag(database_t *db, char *doc_id, char *tag);
@ -228,4 +233,10 @@ cJSON *database_fts_get_tags(database_t *db);
cJSON *database_get_document(database_t *db, char *doc_id); cJSON *database_get_document(database_t *db, char *doc_id);
void cosine_sim_func(sqlite3_context *ctx, int argc, sqlite3_value **argv);
void embedding_to_json_func(sqlite3_context *ctx, int argc, sqlite3_value **argv);
cJSON *database_get_models(database_t *db);
#endif #endif

View File

@ -0,0 +1,94 @@
#include <openblas/cblas.h>
#include "database.h"
static float cosine_sim(int n, const float *a, const float *b) {
float dot_product = cblas_sdot(n, a, 1, b, 1);
float norm_a = cblas_snrm2(n, a, 1);
float norm_b = cblas_snrm2(n, b, 1);
return dot_product / (norm_a * norm_b);
}
void cosine_sim_func(sqlite3_context *ctx, int argc, sqlite3_value **argv) {
if (argc != 3) {
sqlite3_result_error(ctx, "Invalid parameters", -1);
}
int n = sqlite3_value_int(argv[0]);
const float *a = sqlite3_value_blob(argv[1]);
const float *b = sqlite3_value_blob(argv[2]);
if (a == NULL || b == NULL) {
sqlite3_result_double(ctx, -1);
return;
}
float result = cosine_sim(n, a, b);
if (result != result) {
result = -1;
}
sqlite3_result_double(ctx, result);
}
void embedding_to_json_func(sqlite3_context *ctx, int argc, sqlite3_value **argv) {
// emb, type, start, end, size
if (argc != 5) {
sqlite3_result_error(ctx, "Invalid parameters", -1);
}
const float *embedding = sqlite3_value_blob(argv[0]);
const char *type = (const char *) sqlite3_value_text(argv[1]);
int size = sqlite3_value_int(argv[4]);
if (strcmp(type, "flat") == 0) {
cJSON *json = cJSON_CreateFloatArray(embedding, size);
char *json_str = cJSON_PrintBuffered(json, size * 22, FALSE);
cJSON_Delete(json);
sqlite3_result_text(ctx, json_str, -1, SQLITE_TRANSIENT);
free(json_str);
} else {
int start = sqlite3_value_int(argv[2]);
int end = sqlite3_value_int(argv[3]);
sqlite3_result_error(ctx, "Nested embeddings not implemented yet", -1);
}
}
cJSON *database_get_models(database_t *db) {
cJSON *json = cJSON_CreateArray();
sqlite3_stmt *stmt = db->get_models;
int ret;
do {
ret = sqlite3_step(stmt);
CRASH_IF_STMT_FAIL(ret);
if (ret == SQLITE_DONE) {
break;
}
cJSON *row = cJSON_CreateObject();
cJSON_AddNumberToObject(row, "id", sqlite3_column_int(stmt, 0));
cJSON_AddStringToObject(row, "name", (const char *) sqlite3_column_text(stmt, 1));
cJSON_AddStringToObject(row, "url", (const char *) sqlite3_column_int64(stmt, 2));
cJSON_AddStringToObject(row, "path", (const char *) sqlite3_column_text(stmt, 3));
cJSON_AddNumberToObject(row, "size", sqlite3_column_int(stmt, 4));
cJSON_AddStringToObject(row, "type", (const char *) sqlite3_column_text(stmt, 5));
cJSON_AddItemToArray(json, row);
} while (TRUE);
return json;
}

View File

@ -37,7 +37,7 @@ int database_fts_get_max_path_depth(database_t *db) {
void database_fts_index(database_t *db) { void database_fts_index(database_t *db) {
LOG_INFO("database_fts.c", "Creating content table."); LOG_INFO("database_fts.c", "Creating content table");
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec( CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(
db->db, db->db,
@ -47,21 +47,12 @@ void database_fts_index(database_t *db) {
" document.json_data ->> 'path' as path," " document.json_data ->> 'path' as path,"
" mtime," " mtime,"
" document.json_data ->> 'mime' as mime," " document.json_data ->> 'mime' as mime,"
" CASE"
" WHEN sc.json_data IS NULL THEN"
" json_set(document.json_data, " " json_set(document.json_data, "
" '$._id',document.id," " '$._id',document.id,"
" '$.size',document.size, " " '$.size',document.size, "
" '$.mtime',document.mtime)" " '$.mtime',document.mtime)"
" ELSE json_patch("
" json_set(document.json_data,"
" '$._id',document.id,"
" '$.size',document.size,"
" '$.mtime', document.mtime),"
" sc.json_data) END"
" FROM document" " FROM document"
" LEFT JOIN document_sidecar sc ON document.id = sc.id" " )"
" GROUP BY document.id)"
" INSERT" " INSERT"
" INTO fts.document_index (id, index_id, size, name, path, mtime, mime, json_data)" " INTO fts.document_index (id, index_id, size, name, path, mtime, mime, json_data)"
" SELECT * FROM docs WHERE true" " SELECT * FROM docs WHERE true"
@ -69,7 +60,16 @@ void database_fts_index(database_t *db) {
" size=excluded.size, mtime=excluded.mtime, mime=excluded.mime, json_data=excluded.json_data;", " size=excluded.size, mtime=excluded.mtime, mime=excluded.mime, json_data=excluded.json_data;",
NULL, NULL, NULL)); NULL, NULL, NULL));
LOG_DEBUG("database_fts.c", "Deleting old documents."); LOG_DEBUG("database_fts.c", "Copying embeddings");
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(
db->db,
"REPLACE INTO fts.embedding (id, model_id, start, end, embedding)"
" SELECT id, model_id, start, end, embedding FROM embedding", NULL, NULL, NULL));
// TODO: delete old embeddings
LOG_DEBUG("database_fts.c", "Deleting old documents");
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec( CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(
db->db, db->db,
@ -144,7 +144,7 @@ void database_fts_index(database_t *db) {
"INSERT INTO path_index (path, index_id, count, depth) SELECT path, index_id, total, depth FROM path_tmp", "INSERT INTO path_index (path, index_id, count, depth) SELECT path, index_id, total, depth FROM path_tmp",
NULL, NULL, NULL)); NULL, NULL, NULL));
LOG_DEBUG("database_fts.c", "Generating search index."); LOG_DEBUG("database_fts.c", "Generating search index");
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec( CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(
db->db, "INSERT INTO search(search) VALUES ('delete-all')", db->db, "INSERT INTO search(search) VALUES ('delete-all')",
@ -157,7 +157,7 @@ void database_fts_index(database_t *db) {
} }
void database_fts_optimize(database_t *db) { void database_fts_optimize(database_t *db) {
LOG_INFO("database_fts.c", "Optimizing search index."); LOG_INFO("database_fts.c", "Optimizing search index");
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec( CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(
db->db, db->db,
@ -408,6 +408,8 @@ const char *get_sort_var(fts_sort_t sort) {
return "doc.name"; return "doc.name";
case FTS_SORT_ID: case FTS_SORT_ID:
return "doc.id"; return "doc.id";
case FTS_SORT_EMBEDDING:
return "cosine_sim(?7, ?8, emb.embedding)";
default: default:
return NULL; return NULL;
} }
@ -459,11 +461,36 @@ char *get_after_where(char **after, fts_sort_t sort, int sort_asc) {
return "(sort_var, doc.ROWID) < (?3, ?4)"; return "(sort_var, doc.ROWID) < (?3, ?4)";
} }
int database_fts_get_model_size(database_t *db, int model_id) {
sqlite3_bind_int(db->fts_model_size, 1, model_id);
int ret = sqlite3_step(db->fts_model_size);
CRASH_IF_STMT_FAIL(ret);
if (ret == SQLITE_DONE) {
return -1;
}
int size = sqlite3_column_int(db->fts_model_size, 0);
sqlite3_reset(db->fts_model_size);
return size;
}
cJSON *database_fts_search(database_t *db, const char *query, const char *path, long size_min, cJSON *database_fts_search(database_t *db, const char *query, const char *path, long size_min,
long size_max, long date_min, long date_max, int page_size, long size_max, long date_min, long date_max, int page_size,
char **index_ids, char **mime_types, char **tags, int sort_asc, char **index_ids, char **mime_types, char **tags, int sort_asc,
fts_sort_t sort, int seed, char **after, int fetch_aggregations, fts_sort_t sort, int seed, char **after, int fetch_aggregations,
int highlight, int highlight_context_size) { int highlight, int highlight_context_size, int model,
const float *embedding, int embedding_size) {
if (embedding) {
int model_embedding_size = database_fts_get_model_size(db, model);
if (model_embedding_size != embedding_size) {
LOG_WARNINGF("database_fts.c", "Received invalid embedding size for model %s: %d, expected %d",
model, embedding_size, model_embedding_size);
return NULL;
}
}
char path_glob[PATH_MAX * 2]; char path_glob[PATH_MAX * 2];
snprintf(path_glob, sizeof(path_glob), "%s/*", path); snprintf(path_glob, sizeof(path_glob), "%s/*", path);
@ -502,6 +529,11 @@ cJSON *database_fts_search(database_t *db, const char *query, const char *path,
"'$.content')"; "'$.content')";
} }
const char *embedding_join = "";
if (embedding) {
embedding_join = "LEFT JOIN embedding emb ON emb.id = doc.id AND emb.model_id=?9";
}
char *sql; char *sql;
char *agg_sql; char *agg_sql;
@ -512,12 +544,14 @@ cJSON *database_fts_search(database_t *db, const char *query, const char *path,
" %s, %s as sort_var, doc.ROWID" " %s, %s as sort_var, doc.ROWID"
" FROM search" " FROM search"
" INNER JOIN document_index doc on doc.ROWID = search.ROWID" " INNER JOIN document_index doc on doc.ROWID = search.ROWID"
" %s"
" WHERE %s" " WHERE %s"
" ORDER BY sort_var%s, doc.ROWID" " ORDER BY sort_var%s, doc.ROWID"
" LIMIT ?2", " LIMIT ?2",
json_object_sql, get_sort_var(sort), json_object_sql, get_sort_var(sort),
embedding_join,
where, where,
sort_asc ? "" : "DESC"); sort_asc ? "" : " DESC");
if (fetch_aggregations) { if (fetch_aggregations) {
asprintf(&agg_sql, asprintf(&agg_sql,
@ -533,10 +567,12 @@ cJSON *database_fts_search(database_t *db, const char *query, const char *path,
"SELECT" "SELECT"
" %s, %s as sort_var, doc.ROWID" " %s, %s as sort_var, doc.ROWID"
" FROM document_index doc" " FROM document_index doc"
" %s"
" WHERE %s" " WHERE %s"
" ORDER BY sort_var%s,doc.ROWID" " ORDER BY sort_var%s,doc.ROWID"
" LIMIT ?2", " LIMIT ?2",
json_object_sql, get_sort_var(sort), json_object_sql, get_sort_var(sort),
embedding_join,
where, where,
sort_asc ? "" : " DESC"); sort_asc ? "" : " DESC");
@ -569,7 +605,6 @@ cJSON *database_fts_search(database_t *db, const char *query, const char *path,
if (tags) { if (tags) {
db->tag_array = tags; db->tag_array = tags;
} }
if (size_min > 0) { if (size_min > 0) {
sqlite3_bind_int64(stmt, sqlite3_bind_parameter_index(stmt, "@size_min"), size_min); sqlite3_bind_int64(stmt, sqlite3_bind_parameter_index(stmt, "@size_min"), size_min);
} }
@ -602,6 +637,11 @@ cJSON *database_fts_search(database_t *db, const char *query, const char *path,
if (highlight) { if (highlight) {
sqlite3_bind_int(stmt, 6, highlight_context_size); sqlite3_bind_int(stmt, 6, highlight_context_size);
} }
if (embedding) {
sqlite3_bind_int(stmt, 7, embedding_size);
sqlite3_bind_blob(stmt, 8, embedding, (int) sizeof(float) * embedding_size, SQLITE_STATIC);
sqlite3_bind_int(stmt, 9, model);
}
cJSON *json = cJSON_CreateObject(); cJSON *json = cJSON_CreateObject();
cJSON *hits_hits = cJSON_CreateArray(); cJSON *hits_hits = cJSON_CreateArray();

View File

@ -38,6 +38,25 @@ const char *FtsDatabaseSchema =
");" ");"
"CREATE INDEX IF NOT EXISTS tag_tag_idx ON tag(tag);" "CREATE INDEX IF NOT EXISTS tag_tag_idx ON tag(tag);"
"CREATE INDEX IF NOT EXISTS tag_id_idx ON tag(id);" "CREATE INDEX IF NOT EXISTS tag_id_idx ON tag(id);"
""
"CREATE TABLE IF NOT EXISTS embedding ("
" id TEXT REFERENCES document(id),"
" model_id INTEGER NOT NULL REFERENCES model(id),"
" start INTEGER NOT NULL,"
" end INTEGER,"
" embedding BLOB NOT NULL,"
" PRIMARY KEY (id, model_id, start)"
");"
""
"CREATE TABLE IF NOT EXISTS model ("
" id INTEGER PRIMARY KEY,"
" name TEXT NOT NULL UNIQUE CHECK ( length(name) < 16 ),"
" url TEXT,"
" path TEXT NOT NULL UNIQUE,"
" size INTEGER NOT NULL,"
" type TEXT NOT NULL CHECK ( type IN ('flat', 'nested') )"
");"
""
"CREATE TRIGGER IF NOT EXISTS tag_write_trigger" "CREATE TRIGGER IF NOT EXISTS tag_write_trigger"
" AFTER INSERT ON tag" " AFTER INSERT ON tag"
" BEGIN" " BEGIN"
@ -155,5 +174,14 @@ const char *IndexDatabaseSchema =
" mime TEXT NOT NULL," " mime TEXT NOT NULL,"
" size INTEGER NOT NULL," " size INTEGER NOT NULL,"
" count INTEGER NOT NULL" " count INTEGER NOT NULL"
");"
""
"CREATE TABLE embedding ("
" id TEXT REFERENCES document(id),"
" model_id INTEGER NOT NULL references model(id),"
" start INTEGER NOT NULL,"
" end INTEGER,"
" embedding BLOB NOT NULL,"
" PRIMARY KEY (id, model_id, start)"
");"; ");";

View File

@ -1,6 +1,7 @@
#ifndef WALK_H #ifndef WALK_H
#define WALK_H #define WALK_H
#undef _XOPEN_SOURCE
#define _XOPEN_SOURCE 500 #define _XOPEN_SOURCE 500
int walk_directory_tree(const char *); int walk_directory_tree(const char *);

View File

@ -321,6 +321,8 @@ void sist2_index(index_args_t *args) {
strcpy(doc_id, cJSON_GetObjectItem(json, "_id")->valuestring); strcpy(doc_id, cJSON_GetObjectItem(json, "_id")->valuestring);
cJSON_DeleteItemFromObject(json, "_id"); cJSON_DeleteItemFromObject(json, "_id");
// TODO: delete tag if empty
if (args->print) { if (args->print) {
print_json(json, doc_id); print_json(json, doc_id);
} else { } else {
@ -462,6 +464,11 @@ int set_to_negative_if_value_is_zero(UNUSED(struct argparse *self), const struct
int main(int argc, const char *argv[]) { int main(int argc, const char *argv[]) {
setlocale(LC_ALL, ""); setlocale(LC_ALL, "");
// database_t *db = database_create("clip.sist2", INDEX_DATABASE);
// database_open(db);
// database_test(db);
// exit(0);
scan_args_t *scan_args = scan_args_create(); scan_args_t *scan_args = scan_args_create();
index_args_t *index_args = index_args_create(); index_args_t *index_args = index_args_create();
web_args_t *web_args = web_args_create(); web_args_t *web_args = web_args_create();

View File

@ -87,7 +87,7 @@ static void buf2hex(const unsigned char *buf, size_t buflen, char *hex_string) {
*s = '\0'; *s = '\0';
} }
static void md5_hexdigest(void *data, size_t size, char *output) { static void md5_hexdigest(const void *data, size_t size, char *output) {
EVP_MD_CTX *md_ctx = EVP_MD_CTX_new(); EVP_MD_CTX *md_ctx = EVP_MD_CTX_new();
EVP_DigestInit_ex(md_ctx, EVP_md5(), NULL); EVP_DigestInit_ex(md_ctx, EVP_md5(), NULL);
@ -120,7 +120,7 @@ struct timespec timespec_add(struct timespec ts1, long usec);
#define pthread_cond_timedwait_ms(cond, mutex, delay_ms) do {\ #define pthread_cond_timedwait_ms(cond, mutex, delay_ms) do {\
struct timespec now; \ struct timespec now; \
clock_gettime(CLOCK_REALTIME, &now); \ clock_gettime(CLOCK_REALTIME, &now); \
struct timespec end_time = timespec_add(now, MILLISECOND * delay_ms); \ struct timespec end_time = timespec_add(now, MILLISECOND * (delay_ms)); \
pthread_cond_timedwait(cond, mutex, &end_time); \ pthread_cond_timedwait(cond, mutex, &end_time); \
} while (0) } while (0)

View File

@ -28,7 +28,15 @@ static struct mg_http_serve_opts DefaultServeOpts = {
.fs = NULL, .fs = NULL,
.ssi_pattern = NULL, .ssi_pattern = NULL,
.root_dir = NULL, .root_dir = NULL,
.mime_types = "" .mime_types = HTTP_SERVER_HEADER
};
static struct mg_http_serve_opts IndexServeOpts = {
.fs = NULL,
.ssi_pattern = NULL,
.root_dir = NULL,
.mime_types = "",
.extra_headers = HTTP_SERVER_HEADER "Cross-Origin-Embedder-Policy: require-corp\r\nCross-Origin-Opener-Policy: same-origin\r\n"
}; };
void stats_files(struct mg_connection *nc, struct mg_http_message *hm) { void stats_files(struct mg_connection *nc, struct mg_http_message *hm) {
@ -67,7 +75,7 @@ void stats_files(struct mg_connection *nc, struct mg_http_message *hm) {
void serve_index_html(struct mg_connection *nc, struct mg_http_message *hm) { void serve_index_html(struct mg_connection *nc, struct mg_http_message *hm) {
if (WebCtx.dev) { if (WebCtx.dev) {
mg_http_serve_file(nc, hm, "sist2-vue/dist/index.html", &DefaultServeOpts); mg_http_serve_file(nc, hm, "sist2-vue/dist/index.html", &IndexServeOpts);
} else { } else {
web_serve_asset_index_html(nc); web_serve_asset_index_html(nc);
} }
@ -334,6 +342,9 @@ void index_info(struct mg_connection *nc) {
cJSON_AddStringToObject(idx_json, "rewriteUrl", idx->desc.rewrite_url); cJSON_AddStringToObject(idx_json, "rewriteUrl", idx->desc.rewrite_url);
cJSON_AddNumberToObject(idx_json, "timestamp", (double) idx->desc.timestamp); cJSON_AddNumberToObject(idx_json, "timestamp", (double) idx->desc.timestamp);
cJSON_AddItemToArray(arr, idx_json); cJSON_AddItemToArray(arr, idx_json);
cJSON *models = database_get_models(idx->db);
cJSON_AddItemToObject(idx_json, "models", models);
} }
if (WebCtx.search_backend == SQLITE_SEARCH_BACKEND) { if (WebCtx.search_backend == SQLITE_SEARCH_BACKEND) {

View File

@ -32,6 +32,9 @@ typedef struct {
int fetch_aggregations; int fetch_aggregations;
int highlight; int highlight;
int highlight_context_size; int highlight_context_size;
int model;
float *embedding;
int embedding_size;
} fts_search_req_t; } fts_search_req_t;
fts_sort_t get_sort_mode(const cJSON *req_sort) { fts_sort_t get_sort_mode(const cJSON *req_sort) {
@ -45,11 +48,27 @@ fts_sort_t get_sort_mode(const cJSON *req_sort) {
return FTS_SORT_RANDOM; return FTS_SORT_RANDOM;
} else if (strcmp(req_sort->valuestring, "name") == 0) { } else if (strcmp(req_sort->valuestring, "name") == 0) {
return FTS_SORT_NAME; return FTS_SORT_NAME;
} else if (strcmp(req_sort->valuestring, "embedding") == 0) {
return FTS_SORT_EMBEDDING;
} }
return FTS_SORT_INVALID; return FTS_SORT_INVALID;
} }
float *get_float_buffer(cJSON *arr, int *size) {
*size = cJSON_GetArraySize(arr);
float *floats = malloc(sizeof(float) * *size);
cJSON *elem;
int i = 0;
cJSON_ArrayForEach(elem, arr) {
floats[i] = (float) elem->valuedouble;
i += 1;
}
return floats;
}
static json_value get_json_string(cJSON *object, const char *name) { static json_value get_json_string(cJSON *object, const char *name) {
@ -89,6 +108,25 @@ static json_value get_json_bool(cJSON *object, const char *name) {
return (json_value) {item, FALSE}; return (json_value) {item, FALSE};
} }
static json_value get_json_float_array(cJSON *object, const char *name) {
cJSON *item = cJSON_GetObjectItem(object, name);
if (item == NULL || cJSON_IsNull(item)) {
return (json_value) {NULL, FALSE};
}
if (!cJSON_IsArray(item) || cJSON_GetArraySize(item) == 0) {
return (json_value) {NULL, TRUE};
}
cJSON *elem;
cJSON_ArrayForEach(elem, item) {
if (!cJSON_IsNumber(elem)) {
return (json_value) {NULL, TRUE};
}
}
return (json_value) {item, FALSE};
}
static json_value get_json_array(cJSON *object, const char *name) { static json_value get_json_array(cJSON *object, const char *name) {
cJSON *item = cJSON_GetObjectItem(object, name); cJSON *item = cJSON_GetObjectItem(object, name);
if (item == NULL || cJSON_IsNull(item)) { if (item == NULL || cJSON_IsNull(item)) {
@ -131,7 +169,7 @@ fts_search_req_t *get_search_req(struct mg_http_message *hm) {
json_value req_query, req_path, req_size_min, req_size_max, req_date_min, req_date_max, req_page_size, json_value req_query, req_path, req_size_min, req_size_max, req_date_min, req_date_max, req_page_size,
req_index_ids, req_mime_types, req_tags, req_sort_asc, req_sort, req_seed, req_after, req_index_ids, req_mime_types, req_tags, req_sort_asc, req_sort, req_seed, req_after,
req_fetch_aggregations, req_highlight, req_highlight_context_size; req_fetch_aggregations, req_highlight, req_highlight_context_size, req_embedding, req_model;
if (!cJSON_IsObject(json) || if (!cJSON_IsObject(json) ||
(req_query = get_json_string(json, "query")).invalid || (req_query = get_json_string(json, "query")).invalid ||
@ -150,6 +188,8 @@ fts_search_req_t *get_search_req(struct mg_http_message *hm) {
(req_mime_types = get_json_array(json, "mimeTypes")).invalid || (req_mime_types = get_json_array(json, "mimeTypes")).invalid ||
(req_highlight = get_json_bool(json, "highlight")).invalid || (req_highlight = get_json_bool(json, "highlight")).invalid ||
(req_highlight_context_size = get_json_number(json, "highlightContextSize")).invalid || (req_highlight_context_size = get_json_number(json, "highlightContextSize")).invalid ||
(req_embedding = get_json_float_array(json, "embedding")).invalid ||
(req_model = get_json_number(json, "model")).invalid ||
(req_tags = get_json_array(json, "tags")).invalid) { (req_tags = get_json_array(json, "tags")).invalid) {
cJSON_Delete(json); cJSON_Delete(json);
return NULL; return NULL;
@ -190,7 +230,11 @@ fts_search_req_t *get_search_req(struct mg_http_message *hm) {
cJSON_Delete(json); cJSON_Delete(json);
return NULL; return NULL;
} }
if (req_highlight_context_size.val->valueint < 0) { if (req_highlight_context_size.val && req_highlight_context_size.val->valueint < 0) {
cJSON_Delete(json);
return NULL;
}
if (req_model.val && !req_embedding.val || !req_model.val && req_embedding.val) {
cJSON_Delete(json); cJSON_Delete(json);
return NULL; return NULL;
} }
@ -216,6 +260,10 @@ fts_search_req_t *get_search_req(struct mg_http_message *hm) {
req->highlight_context_size = req_highlight_context_size.val req->highlight_context_size = req_highlight_context_size.val
? req_highlight_context_size.val->valueint ? req_highlight_context_size.val->valueint
: DEFAULT_HIGHLIGHT_CONTEXT_SIZE; : DEFAULT_HIGHLIGHT_CONTEXT_SIZE;
req->model = req_model.val ? req_model.val->valueint : 0;
req->embedding = req_model.val
? get_float_buffer(req_embedding.val, &req->embedding_size)
: NULL;
cJSON_Delete(json); cJSON_Delete(json);
@ -238,6 +286,10 @@ void destroy_search_req(fts_search_req_t *req) {
destroy_array(req->mime_types); destroy_array(req->mime_types);
destroy_array(req->tags); destroy_array(req->tags);
if (req->embedding) {
free(req->embedding);
}
free(req); free(req);
} }
@ -331,7 +383,13 @@ void fts_search(struct mg_connection *nc, struct mg_http_message *hm) {
req->page_size, req->index_ids, req->mime_types, req->page_size, req->index_ids, req->mime_types,
req->tags, req->sort_asc, req->sort, req->seed, req->tags, req->sort_asc, req->sort, req->seed,
req->after, req->fetch_aggregations, req->highlight, req->after, req->fetch_aggregations, req->highlight,
req->highlight_context_size); req->highlight_context_size, req->model,
req->embedding, req->embedding_size);
if (json == NULL) {
HTTP_REPLY_BAD_REQUEST
return;
}
destroy_search_req(req); destroy_search_req(req);
mg_send_json(nc, json); mg_send_json(nc, json);