fix: face and edit handling (#25738)

* fix: handle edits when creating face
This commit is contained in:
Brandon Wees
2026-02-05 13:29:46 -06:00
committed by GitHub
parent cfc5ed5997
commit 37e5968a7a
6 changed files with 938 additions and 34 deletions

View File

@@ -0,0 +1,221 @@
import { mapAsset } from 'src/dtos/asset-response.dto';
import { AssetEditAction } from 'src/dtos/editing.dto';
import { assetStub } from 'test/fixtures/asset.stub';
import { faceStub } from 'test/fixtures/face.stub';
import { personStub } from 'test/fixtures/person.stub';
describe('mapAsset', () => {
describe('peopleWithFaces', () => {
it('should transform all faces when a person has multiple faces in the same image', () => {
const face1 = {
...faceStub.primaryFace1,
boundingBoxX1: 100,
boundingBoxY1: 100,
boundingBoxX2: 200,
boundingBoxY2: 200,
imageWidth: 1000,
imageHeight: 800,
};
const face2 = {
...faceStub.primaryFace1,
id: 'assetFaceId-second',
boundingBoxX1: 300,
boundingBoxY1: 400,
boundingBoxX2: 400,
boundingBoxY2: 500,
imageWidth: 1000,
imageHeight: 800,
};
const asset = {
...assetStub.withCropEdit,
faces: [face1, face2],
exifInfo: {
exifImageWidth: 1000,
exifImageHeight: 800,
},
};
const result = mapAsset(asset as any);
expect(result.people).toBeDefined();
expect(result.people).toHaveLength(1);
expect(result.people![0].faces).toHaveLength(2);
// Verify that both faces have been transformed (bounding boxes adjusted for crop)
const firstFace = result.people![0].faces[0];
const secondFace = result.people![0].faces[1];
// After crop (x: 216, y: 1512), the coordinates should be adjusted
// Faces outside the crop area will be clamped
expect(firstFace.boundingBoxX1).toBe(-116); // 100 - 216 = -116
expect(firstFace.boundingBoxY1).toBe(-1412); // 100 - 1512 = -1412
expect(firstFace.boundingBoxX2).toBe(-16); // 200 - 216 = -16
expect(firstFace.boundingBoxY2).toBe(-1312); // 200 - 1512 = -1312
expect(secondFace.boundingBoxX1).toBe(84); // 300 - 216
expect(secondFace.boundingBoxY1).toBe(-1112); // 400 - 1512 = -1112
expect(secondFace.boundingBoxX2).toBe(184); // 400 - 216
expect(secondFace.boundingBoxY2).toBe(-1012); // 500 - 1512 = -1012
});
it('should transform unassigned faces with edits and dimensions', () => {
const unassignedFace = {
...faceStub.noPerson1,
boundingBoxX1: 100,
boundingBoxY1: 100,
boundingBoxX2: 200,
boundingBoxY2: 200,
imageWidth: 1000,
imageHeight: 800,
};
const asset = {
...assetStub.withCropEdit,
faces: [unassignedFace],
exifInfo: {
exifImageWidth: 1000,
exifImageHeight: 800,
},
edits: [
{
action: AssetEditAction.Crop,
parameters: { x: 50, y: 50, width: 500, height: 400 },
},
],
};
const result = mapAsset(asset as any);
expect(result.unassignedFaces).toBeDefined();
expect(result.unassignedFaces).toHaveLength(1);
// Verify that unassigned face has been transformed
const face = result.unassignedFaces![0];
expect(face.boundingBoxX1).toBe(50); // 100 - 50
expect(face.boundingBoxY1).toBe(50); // 100 - 50
expect(face.boundingBoxX2).toBe(150); // 200 - 50
expect(face.boundingBoxY2).toBe(150); // 200 - 50
});
it('should handle multiple people each with multiple faces', () => {
const person1Face1 = {
...faceStub.primaryFace1,
id: 'face-1-1',
person: personStub.withName,
personId: personStub.withName.id,
boundingBoxX1: 100,
boundingBoxY1: 100,
boundingBoxX2: 200,
boundingBoxY2: 200,
imageWidth: 1000,
imageHeight: 800,
};
const person1Face2 = {
...faceStub.primaryFace1,
id: 'face-1-2',
person: personStub.withName,
personId: personStub.withName.id,
boundingBoxX1: 300,
boundingBoxY1: 300,
boundingBoxX2: 400,
boundingBoxY2: 400,
imageWidth: 1000,
imageHeight: 800,
};
const person2Face1 = {
...faceStub.mergeFace1,
id: 'face-2-1',
person: personStub.mergePerson,
personId: personStub.mergePerson.id,
boundingBoxX1: 500,
boundingBoxY1: 100,
boundingBoxX2: 600,
boundingBoxY2: 200,
imageWidth: 1000,
imageHeight: 800,
};
const asset = {
...assetStub.withCropEdit,
faces: [person1Face1, person1Face2, person2Face1],
exifInfo: {
exifImageWidth: 1000,
exifImageHeight: 800,
},
edits: [],
};
const result = mapAsset(asset as any);
expect(result.people).toBeDefined();
expect(result.people).toHaveLength(2);
const person1 = result.people!.find((p) => p.id === personStub.withName.id);
const person2 = result.people!.find((p) => p.id === personStub.mergePerson.id);
expect(person1).toBeDefined();
expect(person1!.faces).toHaveLength(2);
// No edits, so coordinates should be unchanged
expect(person1!.faces[0].boundingBoxX1).toBe(100);
expect(person1!.faces[0].boundingBoxY1).toBe(100);
expect(person1!.faces[1].boundingBoxX1).toBe(300);
expect(person1!.faces[1].boundingBoxY1).toBe(300);
expect(person2).toBeDefined();
expect(person2!.faces).toHaveLength(1);
expect(person2!.faces[0].boundingBoxX1).toBe(500);
expect(person2!.faces[0].boundingBoxY1).toBe(100);
});
it('should combine faces of the same person into a single entry', () => {
const face1 = {
...faceStub.primaryFace1,
id: 'face-1',
person: personStub.withName,
personId: personStub.withName.id,
boundingBoxX1: 100,
boundingBoxY1: 100,
boundingBoxX2: 200,
boundingBoxY2: 200,
imageWidth: 1000,
imageHeight: 800,
};
const face2 = {
...faceStub.primaryFace1,
id: 'face-2',
person: personStub.withName,
personId: personStub.withName.id,
boundingBoxX1: 300,
boundingBoxY1: 300,
boundingBoxX2: 400,
boundingBoxY2: 400,
imageWidth: 1000,
imageHeight: 800,
};
const asset = {
...assetStub.withCropEdit,
faces: [face1, face2],
exifInfo: {
exifImageWidth: 1000,
exifImageHeight: 800,
},
edits: [],
};
const result = mapAsset(asset as any);
expect(result.people).toBeDefined();
expect(result.people).toHaveLength(1);
const person = result.people![0];
expect(person.id).toBe(personStub.withName.id);
expect(person.faces).toHaveLength(2);
});
});
});

View File

@@ -193,27 +193,30 @@ export type AssetMapOptions = {
auth?: AuthDto;
};
// TODO: this is inefficient
const peopleWithFaces = (
faces?: AssetFace[],
edits?: AssetEditActionItem[],
assetDimensions?: ImageDimensions,
): PersonWithFacesResponseDto[] => {
const result: PersonWithFacesResponseDto[] = [];
if (faces) {
for (const face of faces) {
if (face.person) {
const existingPersonEntry = result.find((item) => item.id === face.person!.id);
if (existingPersonEntry) {
existingPersonEntry.faces.push(face);
} else {
result.push({ ...mapPerson(face.person!), faces: [mapFacesWithoutPerson(face, edits, assetDimensions)] });
}
}
}
if (!faces) {
return [];
}
return result;
const peopleFaces: Map<string, PersonWithFacesResponseDto> = new Map();
for (const face of faces) {
if (!face.person) {
continue;
}
if (!peopleFaces.has(face.person.id)) {
peopleFaces.set(face.person.id, { ...mapPerson(face.person), faces: [] });
}
const mappedFace = mapFacesWithoutPerson(face, edits, assetDimensions);
peopleFaces.get(face.person.id)!.faces.push(mappedFace);
}
return [...peopleFaces.values()];
};
const mapStack = (entity: { stack?: Stack | null }) => {
@@ -275,7 +278,9 @@ export function mapAsset(entity: MapAsset, options: AssetMapOptions = {}): Asset
livePhotoVideoId: entity.livePhotoVideoId,
tags: entity.tags?.map((tag) => mapTag(tag)),
people: peopleWithFaces(entity.faces, entity.edits, assetDimensions),
unassignedFaces: entity.faces?.filter((face) => !face.person).map((a) => mapFacesWithoutPerson(a)),
unassignedFaces: entity.faces
?.filter((face) => !face.person)
.map((a) => mapFacesWithoutPerson(a, entity.edits, assetDimensions)),
checksum: hexOrBufferToBase64(entity.checksum)!,
stack: withStack ? mapStack(entity) : undefined,
isOffline: entity.isOffline,

View File

@@ -44,6 +44,7 @@ import { getDimensions } from 'src/utils/asset.util';
import { ImmichFileResponse } from 'src/utils/file';
import { mimeTypes } from 'src/utils/mime-types';
import { isFacialRecognitionEnabled } from 'src/utils/misc';
import { Point, transformPoints } from 'src/utils/transform';
@Injectable()
export class PersonService extends BaseService {
@@ -634,15 +635,61 @@ export class PersonService extends BaseService {
this.requireAccess({ auth, permission: Permission.PersonRead, ids: [dto.personId] }),
]);
const asset = await this.assetRepository.getById(dto.assetId, { edits: true, exifInfo: true });
if (!asset) {
throw new NotFoundException('Asset not found');
}
const edits = asset.edits || [];
let topLeft: Point = { x: dto.x, y: dto.y };
let bottomRight: Point = { x: dto.x + dto.width, y: dto.y + dto.height };
// the coordinates received from the client are based on the edited preview image
// we need to convert them to the coordinate space of the original unedited image
if (edits.length > 0) {
if (!asset.width || !asset.height || !asset.exifInfo?.exifImageWidth || !asset.exifInfo?.exifImageHeight) {
throw new BadRequestException('Asset does not have valid dimensions');
}
// convert from preview to full dimensions
const scaleFactor = asset.width / dto.imageWidth;
topLeft = { x: topLeft.x * scaleFactor, y: topLeft.y * scaleFactor };
bottomRight = { x: bottomRight.x * scaleFactor, y: bottomRight.y * scaleFactor };
const {
points: [invertedTopLeft, invertedBottomRight],
} = transformPoints(
[topLeft, bottomRight],
edits,
{ width: asset.width, height: asset.height },
{ inverse: true },
);
// make sure topLeft is top-left and bottomRight is bottom-right
topLeft = {
x: Math.min(invertedTopLeft.x, invertedBottomRight.x),
y: Math.min(invertedTopLeft.y, invertedBottomRight.y),
};
bottomRight = {
x: Math.max(invertedTopLeft.x, invertedBottomRight.x),
y: Math.max(invertedTopLeft.y, invertedBottomRight.y),
};
// now coordinates are in original image space
dto.imageHeight = asset.exifInfo.exifImageHeight;
dto.imageWidth = asset.exifInfo.exifImageWidth;
}
await this.personRepository.createAssetFace({
personId: dto.personId,
assetId: dto.assetId,
imageHeight: dto.imageHeight,
imageWidth: dto.imageWidth,
boundingBoxX1: dto.x,
boundingBoxX2: dto.x + dto.width,
boundingBoxY1: dto.y,
boundingBoxY2: dto.y + dto.height,
boundingBoxX1: Math.round(topLeft.x),
boundingBoxX2: Math.round(bottomRight.x),
boundingBoxY1: Math.round(topLeft.y),
boundingBoxY2: Math.round(bottomRight.y),
sourceType: SourceType.Manual,
});
}

View File

@@ -61,7 +61,7 @@ export const createAffineMatrix = (
);
};
type Point = { x: number; y: number };
export type Point = { x: number; y: number };
type TransformState = {
points: Point[];
@@ -73,29 +73,33 @@ type TransformState = {
* Transforms an array of points through a series of edit operations (crop, rotate, mirror).
* Points should be in absolute pixel coordinates relative to the starting dimensions.
*/
const transformPoints = (
export const transformPoints = (
points: Point[],
edits: AssetEditActionItem[],
startingDimensions: ImageDimensions,
{ inverse = false } = {},
): TransformState => {
let currentWidth = startingDimensions.width;
let currentHeight = startingDimensions.height;
let transformedPoints = [...points];
// Handle crop first
const crop = edits.find((edit) => edit.action === 'crop');
if (crop) {
const { x: cropX, y: cropY, width: cropWidth, height: cropHeight } = crop.parameters;
transformedPoints = transformedPoints.map((p) => ({
x: p.x - cropX,
y: p.y - cropY,
}));
currentWidth = cropWidth;
currentHeight = cropHeight;
// Handle crop first if not inverting
if (!inverse) {
const crop = edits.find((edit) => edit.action === 'crop');
if (crop) {
const { x: cropX, y: cropY, width: cropWidth, height: cropHeight } = crop.parameters;
transformedPoints = transformedPoints.map((p) => ({
x: p.x - cropX,
y: p.y - cropY,
}));
currentWidth = cropWidth;
currentHeight = cropHeight;
}
}
// Apply rotate and mirror transforms
for (const edit of edits) {
const editSequence = inverse ? edits.toReversed() : edits;
for (const edit of editSequence) {
let matrix: Matrix = identity();
if (edit.action === 'rotate') {
const angleDegrees = edit.parameters.angle;
@@ -105,7 +109,7 @@ const transformPoints = (
matrix = compose(
translate(newWidth / 2, newHeight / 2),
rotate(angleRadians),
rotate(inverse ? -angleRadians : angleRadians),
translate(-currentWidth / 2, -currentHeight / 2),
);
@@ -125,6 +129,18 @@ const transformPoints = (
transformedPoints = transformedPoints.map((p) => applyToPoint(matrix, p));
}
// Handle crop last if inverting
if (inverse) {
const crop = edits.find((edit) => edit.action === 'crop');
if (crop) {
const { x: cropX, y: cropY } = crop.parameters;
transformedPoints = transformedPoints.map((p) => ({
x: p.x + cropX,
y: p.y + cropY,
}));
}
}
return {
points: transformedPoints,
currentWidth,