mirror of
https://github.com/immich-app/immich.git
synced 2026-03-06 10:07:48 +03:00
feat(server): near-duplicate detection (#8228)
* duplicate detection job, entity, config * queueing * job panel, update api * use embedding in db instead of fetching * disable concurrency * only queue visible assets * handle multiple duplicateIds * update concurrent queue check * add provider * add web placeholder, server endpoint, migration, various fixes * update sql * select embedding by default * rename variable * simplify * remove separate entity, handle re-running with different threshold, set default back to 0.02 * fix tests * add tests * add index to entity * formatting * update asset mock * fix `upsertJobStatus` signature * update sql * formatting * default to 0.03 * optimize clustering * use asset's `duplicateId` if present * update sql * update tests * expose admin setting * refactor * formatting * skip if ml is disabled * debug trash e2e * remove from web * remove from sidebar * test if ml is disabled * update sql * separate duplicate detection from clip in config, disable by default for now * fix doc * lower minimum `maxDistance` * update api * Add and Use Duplicate Detection Feature Flag (#9364) * Add Duplicate Detection Flag * Use Duplicate Detection Flag * Attempt Fixes for Failing Checks * lower minimum `maxDistance` * fix tests --------- Co-authored-by: mertalev <101130780+mertalev@users.noreply.github.com> * chore: fixes and additions after rebase * chore: update api (remove new Role enum) * fix: left join smart search so getAll works without machine learning * test: trash e2e go back to checking length of assets is zero * chore: regen api after rebase * test: fix tests after rebase * redundant join --------- Co-authored-by: Nicholas Flamy <30300649+NicholasFlamy@users.noreply.github.com> Co-authored-by: Zack Pollard <zackpollard@ymail.com> Co-authored-by: Zack Pollard <zack@futo.org>
This commit is contained in:
@@ -18,6 +18,7 @@ import {
|
||||
AssetStats,
|
||||
AssetStatsOptions,
|
||||
AssetUpdateAllOptions,
|
||||
AssetUpdateDuplicateOptions,
|
||||
AssetUpdateOptions,
|
||||
IAssetRepository,
|
||||
LivePhotoSearchOptions,
|
||||
@@ -73,7 +74,7 @@ export class AssetRepository implements IAssetRepository {
|
||||
await this.exifRepository.upsert(exif, { conflictPaths: ['assetId'] });
|
||||
}
|
||||
|
||||
async upsertJobStatus(jobStatus: Partial<AssetJobStatusEntity>): Promise<void> {
|
||||
async upsertJobStatus(...jobStatus: Partial<AssetJobStatusEntity>[]): Promise<void> {
|
||||
await this.jobStatusRepository.upsert(jobStatus, { conflictPaths: ['assetId'] });
|
||||
}
|
||||
|
||||
@@ -257,6 +258,21 @@ export class AssetRepository implements IAssetRepository {
|
||||
await this.repository.update({ id: In(ids) }, options);
|
||||
}
|
||||
|
||||
@GenerateSql({
|
||||
params: [{ targetDuplicateId: DummyValue.UUID, duplicateIds: [DummyValue.UUID], assetIds: [DummyValue.UUID] }],
|
||||
})
|
||||
async updateDuplicates(options: AssetUpdateDuplicateOptions): Promise<void> {
|
||||
await this.repository
|
||||
.createQueryBuilder()
|
||||
.update()
|
||||
.set({ duplicateId: options.targetDuplicateId })
|
||||
.where({
|
||||
duplicateId: In(options.duplicateIds),
|
||||
})
|
||||
.orWhere({ id: In(options.assetIds) })
|
||||
.execute();
|
||||
}
|
||||
|
||||
@Chunked()
|
||||
async softDeleteAll(ids: string[]): Promise<void> {
|
||||
await this.repository.softDelete({ id: In(ids) });
|
||||
@@ -375,6 +391,18 @@ export class AssetRepository implements IAssetRepository {
|
||||
break;
|
||||
}
|
||||
|
||||
case WithoutProperty.DUPLICATE: {
|
||||
where = {
|
||||
previewPath: Not(IsNull()),
|
||||
isVisible: true,
|
||||
smartSearch: true,
|
||||
jobStatus: {
|
||||
duplicatesDetectedAt: IsNull(),
|
||||
},
|
||||
};
|
||||
break;
|
||||
}
|
||||
|
||||
case WithoutProperty.OBJECT_TAGS: {
|
||||
relations = {
|
||||
smartInfo: true,
|
||||
@@ -614,6 +642,13 @@ export class AssetRepository implements IAssetRepository {
|
||||
);
|
||||
}
|
||||
|
||||
@GenerateSql({ params: [{ userIds: [DummyValue.UUID, DummyValue.UUID] }] })
|
||||
getDuplicates(options: AssetBuilderOptions): Promise<AssetEntity[]> {
|
||||
return this.getBuilder({ ...options, isDuplicate: true })
|
||||
.orderBy('asset.duplicateId')
|
||||
.getMany();
|
||||
}
|
||||
|
||||
@GenerateSql({ params: [DummyValue.UUID, { minAssetsPerField: 5, maxFields: 12 }] })
|
||||
async getAssetIdByCity(
|
||||
ownerId: string,
|
||||
@@ -673,16 +708,14 @@ export class AssetRepository implements IAssetRepository {
|
||||
}
|
||||
|
||||
private getBuilder(options: AssetBuilderOptions) {
|
||||
const { isArchived, isFavorite, isTrashed, albumId, personId, userIds, withStacked, exifInfo, assetType } = options;
|
||||
|
||||
const builder = this.repository.createQueryBuilder('asset').where('asset.isVisible = true');
|
||||
if (assetType !== undefined) {
|
||||
builder.andWhere('asset.type = :assetType', { assetType });
|
||||
if (options.assetType !== undefined) {
|
||||
builder.andWhere('asset.type = :assetType', { assetType: options.assetType });
|
||||
}
|
||||
|
||||
let stackJoined = false;
|
||||
|
||||
if (exifInfo !== false) {
|
||||
if (options.exifInfo !== false) {
|
||||
stackJoined = true;
|
||||
builder
|
||||
.leftJoinAndSelect('asset.exifInfo', 'exifInfo')
|
||||
@@ -690,34 +723,38 @@ export class AssetRepository implements IAssetRepository {
|
||||
.leftJoinAndSelect('stack.assets', 'stackedAssets');
|
||||
}
|
||||
|
||||
if (albumId) {
|
||||
builder.leftJoin('asset.albums', 'album').andWhere('album.id = :albumId', { albumId });
|
||||
if (options.albumId) {
|
||||
builder.leftJoin('asset.albums', 'album').andWhere('album.id = :albumId', { albumId: options.albumId });
|
||||
}
|
||||
|
||||
if (userIds) {
|
||||
builder.andWhere('asset.ownerId IN (:...userIds )', { userIds });
|
||||
if (options.userIds) {
|
||||
builder.andWhere('asset.ownerId IN (:...userIds )', { userIds: options.userIds });
|
||||
}
|
||||
|
||||
if (isArchived !== undefined) {
|
||||
builder.andWhere('asset.isArchived = :isArchived', { isArchived });
|
||||
if (options.isArchived !== undefined) {
|
||||
builder.andWhere('asset.isArchived = :isArchived', { isArchived: options.isArchived });
|
||||
}
|
||||
|
||||
if (isFavorite !== undefined) {
|
||||
builder.andWhere('asset.isFavorite = :isFavorite', { isFavorite });
|
||||
if (options.isFavorite !== undefined) {
|
||||
builder.andWhere('asset.isFavorite = :isFavorite', { isFavorite: options.isFavorite });
|
||||
}
|
||||
|
||||
if (isTrashed !== undefined) {
|
||||
builder.andWhere(`asset.deletedAt ${isTrashed ? 'IS NOT NULL' : 'IS NULL'}`).withDeleted();
|
||||
if (options.isTrashed !== undefined) {
|
||||
builder.andWhere(`asset.deletedAt ${options.isTrashed ? 'IS NOT NULL' : 'IS NULL'}`).withDeleted();
|
||||
}
|
||||
|
||||
if (personId !== undefined) {
|
||||
if (options.isDuplicate !== undefined) {
|
||||
builder.andWhere(`asset.duplicateId ${options.isDuplicate ? 'IS NOT NULL' : 'IS NULL'}`);
|
||||
}
|
||||
|
||||
if (options.personId !== undefined) {
|
||||
builder
|
||||
.innerJoin('asset.faces', 'faces')
|
||||
.innerJoin('faces.person', 'person')
|
||||
.andWhere('person.id = :personId', { personId });
|
||||
.andWhere('person.id = :personId', { personId: options.personId });
|
||||
}
|
||||
|
||||
if (withStacked) {
|
||||
if (options.withStacked) {
|
||||
if (!stackJoined) {
|
||||
builder.leftJoinAndSelect('asset.stack', 'stack').leftJoinAndSelect('stack.assets', 'stackedAssets');
|
||||
}
|
||||
|
||||
@@ -65,6 +65,10 @@ export const JOBS_TO_QUEUE: Record<JobName, QueueName> = {
|
||||
[JobName.QUEUE_SMART_SEARCH]: QueueName.SMART_SEARCH,
|
||||
[JobName.SMART_SEARCH]: QueueName.SMART_SEARCH,
|
||||
|
||||
// duplicate detection
|
||||
[JobName.QUEUE_DUPLICATE_DETECTION]: QueueName.DUPLICATE_DETECTION,
|
||||
[JobName.DUPLICATE_DETECTION]: QueueName.DUPLICATE_DETECTION,
|
||||
|
||||
// XMP sidecars
|
||||
[JobName.QUEUE_SIDECAR]: QueueName.SIDECAR,
|
||||
[JobName.SIDECAR_DISCOVERY]: QueueName.SIDECAR,
|
||||
|
||||
@@ -10,6 +10,8 @@ import { SmartSearchEntity } from 'src/entities/smart-search.entity';
|
||||
import { DatabaseExtension } from 'src/interfaces/database.interface';
|
||||
import { ILoggerRepository } from 'src/interfaces/logger.interface';
|
||||
import {
|
||||
AssetDuplicateResult,
|
||||
AssetDuplicateSearch,
|
||||
AssetSearchOptions,
|
||||
FaceEmbeddingSearch,
|
||||
FaceSearchResult,
|
||||
@@ -145,6 +147,44 @@ export class SearchRepository implements ISearchRepository {
|
||||
return results;
|
||||
}
|
||||
|
||||
@GenerateSql({
|
||||
params: [
|
||||
{
|
||||
embedding: Array.from({ length: 512 }, Math.random),
|
||||
maxDistance: 0.6,
|
||||
userIds: [DummyValue.UUID],
|
||||
},
|
||||
],
|
||||
})
|
||||
searchDuplicates({
|
||||
assetId,
|
||||
embedding,
|
||||
maxDistance,
|
||||
userIds,
|
||||
}: AssetDuplicateSearch): Promise<AssetDuplicateResult[]> {
|
||||
const cte = this.assetRepository.createQueryBuilder('asset');
|
||||
cte
|
||||
.select('search.assetId', 'assetId')
|
||||
.addSelect('asset.duplicateId', 'duplicateId')
|
||||
.addSelect(`search.embedding <=> :embedding`, 'distance')
|
||||
.innerJoin('asset.smartSearch', 'search')
|
||||
.where('asset.ownerId IN (:...userIds )')
|
||||
.andWhere('asset.id != :assetId')
|
||||
.andWhere('asset.isVisible = :isVisible')
|
||||
.orderBy('search.embedding <=> :embedding')
|
||||
.limit(64)
|
||||
.setParameters({ assetId, embedding: asVector(embedding), isVisible: true, userIds });
|
||||
|
||||
const builder = this.assetRepository.manager
|
||||
.createQueryBuilder()
|
||||
.addCommonTableExpression(cte, 'cte')
|
||||
.from('cte', 'res')
|
||||
.select('res.*')
|
||||
.where('res.distance <= :maxDistance', { maxDistance });
|
||||
|
||||
return builder.getRawMany() as any as Promise<AssetDuplicateResult[]>;
|
||||
}
|
||||
|
||||
@GenerateSql({
|
||||
params: [
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user