fix(server): use bigrams for cjk (#24285)

* use bigrams for cjk

* update sql

* linting

* actually migrate ocr

* fix backwards test

* use array

* tweaks
This commit is contained in:
Mert
2025-12-01 12:24:37 -05:00
committed by GitHub
parent d8ca210641
commit 95c29a8aea
5 changed files with 203 additions and 47 deletions

View File

@@ -45,12 +45,12 @@ export class OcrRepository {
textScore: DummyValue.NUMBER,
},
],
DummyValue.STRING,
],
})
upsert(assetId: string, ocrDataList: Insertable<AssetOcrTable>[]) {
upsert(assetId: string, ocrDataList: Insertable<AssetOcrTable>[], searchText: string) {
let query = this.db.with('deleted_ocr', (db) => db.deleteFrom('asset_ocr').where('assetId', '=', assetId));
if (ocrDataList.length > 0) {
const searchText = ocrDataList.map((item) => item.text.trim()).join(' ');
(query as any) = query
.with('inserted_ocr', (db) => db.insertInto('asset_ocr').values(ocrDataList))
.with('inserted_search', (db) =>