chore(server): don't insert embeddings if the model has changed (#17885)

* chore(server): don't insert embeddings if the model has changed

We're moving away from the heuristic of waiting for queues to complete. The job
which inserts embeddings can simply check if the model has changed before
inserting, rather than attempting to lock the queue.

* more robust dim size update

* use check constraint

* index command cleanup

* add create statement

* update medium test, create appropriate extension

* new line

* set dimension size when running on all assets

* why does it want braces smh

* take 2

---------

Co-authored-by: mertalev <101130780+mertalev@users.noreply.github.com>
This commit is contained in:
Thomas
2025-04-29 19:23:01 +01:00
committed by GitHub
parent 0e4cf9ac57
commit 3ce353393a
11 changed files with 82 additions and 136 deletions

View File

@@ -50,12 +50,6 @@ export class SmartInfoService extends BaseService {
return;
}
const { isPaused } = await this.jobRepository.getQueueStatus(QueueName.SMART_SEARCH);
if (!isPaused) {
await this.jobRepository.pause(QueueName.SMART_SEARCH);
}
await this.jobRepository.waitForQueueCompletion(QueueName.SMART_SEARCH);
if (dimSizeChange) {
this.logger.log(
`Dimension size of model ${newConfig.machineLearning.clip.modelName} is ${dimSize}, but database expects ${dbDimSize}.`,
@@ -67,9 +61,8 @@ export class SmartInfoService extends BaseService {
await this.searchRepository.deleteAllSearchEmbeddings();
}
if (!isPaused) {
await this.jobRepository.resume(QueueName.SMART_SEARCH);
}
// TODO: A job to reindex all assets should be scheduled, though user
// confirmation should probably be requested before doing that.
});
}
@@ -81,7 +74,9 @@ export class SmartInfoService extends BaseService {
}
if (force) {
await this.searchRepository.deleteAllSearchEmbeddings();
const { dimSize } = getCLIPModelInfo(machineLearning.clip.modelName);
// in addition to deleting embeddings, update the dimension size in case it failed earlier
await this.searchRepository.setDimensionSize(dimSize);
}
const assetPagination = usePagination(JOBS_ASSET_PAGINATION_SIZE, (pagination) => {
@@ -126,6 +121,12 @@ export class SmartInfoService extends BaseService {
await this.databaseRepository.wait(DatabaseLock.CLIPDimSize);
}
const newConfig = await this.getConfig({ withCache: true });
if (machineLearning.clip.modelName !== newConfig.machineLearning.clip.modelName) {
// Skip the job if the the model has changed since the embedding was generated.
return JobStatus.SKIPPED;
}
await this.searchRepository.upsert(asset.id, embedding);
return JobStatus.SUCCESS;