mirror of
https://github.com/immich-app/immich.git
synced 2026-03-01 18:19:10 +03:00
chore(server): don't insert embeddings if the model has changed (#17885)
* chore(server): don't insert embeddings if the model has changed We're moving away from the heuristic of waiting for queues to complete. The job which inserts embeddings can simply check if the model has changed before inserting, rather than attempting to lock the queue. * more robust dim size update * use check constraint * index command cleanup * add create statement * update medium test, create appropriate extension * new line * set dimension size when running on all assets * why does it want braces smh * take 2 --------- Co-authored-by: mertalev <101130780+mertalev@users.noreply.github.com>
This commit is contained in:
@@ -12,6 +12,7 @@ import { DatabaseExtension, DatabaseLock, VectorIndex } from 'src/enum';
|
||||
import { ConfigRepository } from 'src/repositories/config.repository';
|
||||
import { LoggingRepository } from 'src/repositories/logging.repository';
|
||||
import { ExtensionVersion, VectorExtension, VectorUpdateResult } from 'src/types';
|
||||
import { vectorIndexQuery } from 'src/utils/database';
|
||||
import { isValidInteger } from 'src/validation';
|
||||
import { DataSource } from 'typeorm';
|
||||
|
||||
@@ -119,12 +120,7 @@ export class DatabaseRepository {
|
||||
await sql`ALTER TABLE ${sql.raw(table)} ALTER COLUMN embedding SET DATA TYPE vector(${sql.raw(String(dimSize))})`.execute(
|
||||
tx,
|
||||
);
|
||||
await sql`SET vectors.pgvector_compatibility=on`.execute(tx);
|
||||
await sql`
|
||||
CREATE INDEX IF NOT EXISTS ${sql.raw(index)} ON ${sql.raw(table)}
|
||||
USING hnsw (embedding vector_cosine_ops)
|
||||
WITH (ef_construction = 300, m = 16)
|
||||
`.execute(tx);
|
||||
await sql.raw(vectorIndexQuery({ vectorExtension: this.vectorExtension, table, indexName: index })).execute(tx);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
@@ -6,7 +6,8 @@ import { DB, Exif } from 'src/db';
|
||||
import { DummyValue, GenerateSql } from 'src/decorators';
|
||||
import { MapAsset } from 'src/dtos/asset-response.dto';
|
||||
import { AssetStatus, AssetType } from 'src/enum';
|
||||
import { anyUuid, asUuid, searchAssetBuilder } from 'src/utils/database';
|
||||
import { ConfigRepository } from 'src/repositories/config.repository';
|
||||
import { anyUuid, asUuid, searchAssetBuilder, vectorIndexQuery } from 'src/utils/database';
|
||||
import { isValidInteger } from 'src/validation';
|
||||
|
||||
export interface SearchResult<T> {
|
||||
@@ -201,7 +202,10 @@ export interface GetCameraMakesOptions {
|
||||
|
||||
@Injectable()
|
||||
export class SearchRepository {
|
||||
constructor(@InjectKysely() private db: Kysely<DB>) {}
|
||||
constructor(
|
||||
@InjectKysely() private db: Kysely<DB>,
|
||||
private configRepository: ConfigRepository,
|
||||
) {}
|
||||
|
||||
@GenerateSql({
|
||||
params: [
|
||||
@@ -446,8 +450,8 @@ export class SearchRepository {
|
||||
async upsert(assetId: string, embedding: string): Promise<void> {
|
||||
await this.db
|
||||
.insertInto('smart_search')
|
||||
.values({ assetId: asUuid(assetId), embedding } as any)
|
||||
.onConflict((oc) => oc.column('assetId').doUpdateSet({ embedding } as any))
|
||||
.values({ assetId, embedding })
|
||||
.onConflict((oc) => oc.column('assetId').doUpdateSet((eb) => ({ embedding: eb.ref('excluded.embedding') })))
|
||||
.execute();
|
||||
}
|
||||
|
||||
@@ -469,19 +473,32 @@ export class SearchRepository {
|
||||
return dimSize;
|
||||
}
|
||||
|
||||
setDimensionSize(dimSize: number): Promise<void> {
|
||||
async setDimensionSize(dimSize: number): Promise<void> {
|
||||
if (!isValidInteger(dimSize, { min: 1, max: 2 ** 16 })) {
|
||||
throw new Error(`Invalid CLIP dimension size: ${dimSize}`);
|
||||
}
|
||||
|
||||
return this.db.transaction().execute(async (trx) => {
|
||||
await sql`truncate ${sql.table('smart_search')}`.execute(trx);
|
||||
// this is done in two transactions to handle concurrent writes
|
||||
await this.db.transaction().execute(async (trx) => {
|
||||
await sql`delete from ${sql.table('smart_search')}`.execute(trx);
|
||||
await trx.schema.alterTable('smart_search').dropConstraint('dim_size_constraint').ifExists().execute();
|
||||
await sql`alter table ${sql.table('smart_search')} add constraint dim_size_constraint check (array_length(embedding::real[], 1) = ${sql.lit(dimSize)})`.execute(
|
||||
trx,
|
||||
);
|
||||
});
|
||||
|
||||
const vectorExtension = this.configRepository.getEnv().database.vectorExtension;
|
||||
await this.db.transaction().execute(async (trx) => {
|
||||
await sql`drop index if exists clip_index`.execute(trx);
|
||||
await trx.schema
|
||||
.alterTable('smart_search')
|
||||
.alterColumn('embedding', (col) => col.setDataType(sql.raw(`vector(${dimSize})`)))
|
||||
.execute();
|
||||
await sql`reindex index clip_index`.execute(trx);
|
||||
await sql.raw(vectorIndexQuery({ vectorExtension, table: 'smart_search', indexName: 'clip_index' })).execute(trx);
|
||||
await trx.schema.alterTable('smart_search').dropConstraint('dim_size_constraint').ifExists().execute();
|
||||
});
|
||||
|
||||
await sql`vacuum analyze ${sql.table('smart_search')}`.execute(this.db);
|
||||
}
|
||||
|
||||
async deleteAllSearchEmbeddings(): Promise<void> {
|
||||
|
||||
Reference in New Issue
Block a user