feat(web): image-relative overlays with zoom support for faces, OCR, and face editor

This commit is contained in:
midzelis
2026-03-07 19:06:14 +00:00
parent 9dafc8e8e9
commit c852c58940
17 changed files with 1289 additions and 236 deletions

View File

@@ -20,7 +20,7 @@ export {
toColumnarFormat,
} from './timeline/rest-response';
export type { Changes } from './timeline/rest-response';
export type { Changes, FaceData } from './timeline/rest-response';
export { randomImage, randomImageFromString, randomPreview, randomThumbnail } from './timeline/images';

View File

@@ -7,8 +7,10 @@ import {
AssetVisibility,
UserAvatarColor,
type AlbumResponseDto,
type AssetFaceWithoutPersonResponseDto,
type AssetResponseDto,
type ExifResponseDto,
type PersonWithFacesResponseDto,
type TimeBucketAssetResponseDto,
type TimeBucketsResponseDto,
type UserResponseDto,
@@ -284,7 +286,16 @@ const createDefaultOwner = (ownerId: string) => {
* Convert a TimelineAssetConfig to a full AssetResponseDto
* This matches the response from GET /api/assets/:id
*/
export function toAssetResponseDto(asset: MockTimelineAsset, owner?: UserResponseDto): AssetResponseDto {
export type FaceData = {
people: PersonWithFacesResponseDto[];
unassignedFaces: AssetFaceWithoutPersonResponseDto[];
};
export function toAssetResponseDto(
asset: MockTimelineAsset,
owner?: UserResponseDto,
faceData?: FaceData,
): AssetResponseDto {
const now = new Date().toISOString();
// Default owner if not provided
@@ -338,8 +349,8 @@ export function toAssetResponseDto(asset: MockTimelineAsset, owner?: UserRespons
exifInfo,
livePhotoVideoId: asset.livePhotoVideoId,
tags: [],
people: [],
unassignedFaces: [],
people: faceData?.people ?? [],
unassignedFaces: faceData?.unassignedFaces ?? [],
stack: asset.stack,
isOffline: false,
hasMetadata: true,

View File

@@ -1,5 +1,6 @@
import type { AssetFaceResponseDto, AssetResponseDto, PersonWithFacesResponseDto, SourceType } from '@immich/sdk';
import { BrowserContext } from '@playwright/test';
import { randomThumbnail } from 'src/ui/generators/timeline';
import { type FaceData, randomThumbnail } from 'src/ui/generators/timeline';
// Minimal valid H.264 MP4 (8x8px, 1 frame) that browsers can decode to get videoWidth/videoHeight
const MINIMAL_MP4_BASE64 =
@@ -125,3 +126,84 @@ export const setupFaceEditorMockApiRoutes = async (
});
});
};
export type MockFaceSpec = {
personId: string;
personName: string;
faceId: string;
boundingBoxX1: number;
boundingBoxY1: number;
boundingBoxX2: number;
boundingBoxY2: number;
};
const toPersonResponseDto = (spec: MockFaceSpec) => ({
id: spec.personId,
name: spec.personName,
birthDate: null,
isHidden: false,
thumbnailPath: `/upload/thumbs/${spec.personId}.jpeg`,
updatedAt: '2025-01-01T00:00:00.000Z',
});
const toBoundingBox = (spec: MockFaceSpec, imageWidth: number, imageHeight: number) => ({
id: spec.faceId,
imageWidth,
imageHeight,
boundingBoxX1: spec.boundingBoxX1,
boundingBoxY1: spec.boundingBoxY1,
boundingBoxX2: spec.boundingBoxX2,
boundingBoxY2: spec.boundingBoxY2,
});
export const createMockFaceData = (specs: MockFaceSpec[], imageWidth: number, imageHeight: number): FaceData => {
const people: PersonWithFacesResponseDto[] = specs.map((spec) => ({
...toPersonResponseDto(spec),
faces: [toBoundingBox(spec, imageWidth, imageHeight)],
}));
return { people, unassignedFaces: [] };
};
export const createMockAssetFaces = (
specs: MockFaceSpec[],
imageWidth: number,
imageHeight: number,
): AssetFaceResponseDto[] => {
return specs.map((spec) => ({
...toBoundingBox(spec, imageWidth, imageHeight),
person: toPersonResponseDto(spec),
sourceType: 'machine-learning' as SourceType,
}));
};
export const setupGetFacesMockApiRoute = async (context: BrowserContext, faces: AssetFaceResponseDto[]) => {
await context.route('**/api/faces?*', async (route, request) => {
if (request.method() !== 'GET') {
return route.fallback();
}
return route.fulfill({
status: 200,
contentType: 'application/json',
json: faces,
});
});
};
export const setupFaceOverlayMockApiRoutes = async (context: BrowserContext, assetDto: AssetResponseDto) => {
await context.route('**/api/assets/*', async (route, request) => {
if (request.method() !== 'GET') {
return route.fallback();
}
const url = new URL(request.url());
const assetId = url.pathname.split('/').at(-1);
if (assetId !== assetDto.id) {
return route.fallback();
}
return route.fulfill({
status: 200,
contentType: 'application/json',
json: assetDto,
});
});
};

View File

@@ -0,0 +1,55 @@
import { faker } from '@faker-js/faker';
import type { AssetOcrResponseDto } from '@immich/sdk';
import { BrowserContext } from '@playwright/test';
export type MockOcrBox = {
text: string;
x1: number;
y1: number;
x2: number;
y2: number;
x3: number;
y3: number;
x4: number;
y4: number;
};
export const createMockOcrData = (assetId: string, boxes: MockOcrBox[]): AssetOcrResponseDto[] => {
return boxes.map((box) => ({
id: faker.string.uuid(),
assetId,
x1: box.x1,
y1: box.y1,
x2: box.x2,
y2: box.y2,
x3: box.x3,
y3: box.y3,
x4: box.x4,
y4: box.y4,
boxScore: 0.95,
textScore: 0.9,
text: box.text,
}));
};
export const setupOcrMockApiRoutes = async (
context: BrowserContext,
ocrDataByAssetId: Map<string, AssetOcrResponseDto[]>,
) => {
await context.route('**/assets/*/ocr', async (route, request) => {
if (request.method() !== 'GET') {
return route.fallback();
}
const url = new URL(request.url());
const segments = url.pathname.split('/');
const assetIdIndex = segments.indexOf('assets') + 1;
const assetId = segments[assetIdIndex];
const ocrData = ocrDataByAssetId.get(assetId) ?? [];
return route.fulfill({
status: 200,
contentType: 'application/json',
json: ocrData,
});
});
};

View File

@@ -149,7 +149,7 @@ test.describe('face-editor', () => {
await expect(page.getByRole('dialog')).toBeVisible();
});
test('Confirming tag calls createFace API and closes editor', async ({ page }) => {
test('Confirming tag calls createFace API with valid coordinates and closes editor', async ({ page }) => {
const asset = selectRandom(fixture.assets, rng);
await openFaceEditor(page, asset);
@@ -163,8 +163,15 @@ test.describe('face-editor', () => {
await expect(page.locator('#face-editor')).toBeHidden();
expect(faceCreateCapture.requests).toHaveLength(1);
expect(faceCreateCapture.requests[0].assetId).toBe(asset.id);
expect(faceCreateCapture.requests[0].personId).toBe(personToTag.id);
const request = faceCreateCapture.requests[0];
expect(request.assetId).toBe(asset.id);
expect(request.personId).toBe(personToTag.id);
expect(request.x).toBeGreaterThanOrEqual(0);
expect(request.y).toBeGreaterThanOrEqual(0);
expect(request.width).toBeGreaterThan(0);
expect(request.height).toBeGreaterThan(0);
expect(request.x + request.width).toBeLessThanOrEqual(request.imageWidth);
expect(request.y + request.height).toBeLessThanOrEqual(request.imageHeight);
});
test('Cancel button closes face editor', async ({ page }) => {
@@ -282,4 +289,39 @@ test.describe('face-editor', () => {
expect(afterDrag.left).toBeGreaterThan(beforeDrag.left + 50);
expect(afterDrag.top).toBeGreaterThan(beforeDrag.top + 20);
});
test('Cancel on confirmation dialog keeps face editor open', async ({ page }) => {
const asset = selectRandom(fixture.assets, rng);
await openFaceEditor(page, asset);
const personToTag = mockPeople[0];
await page.locator('#face-selector').getByText(personToTag.name).click();
await expect(page.getByRole('dialog')).toBeVisible();
await page
.getByRole('dialog')
.getByRole('button', { name: /cancel/i })
.click();
await expect(page.getByRole('dialog')).toBeHidden();
await expect(page.locator('#face-selector')).toBeVisible();
await expect(page.locator('#face-editor')).toBeVisible();
expect(faceCreateCapture.requests).toHaveLength(0);
});
test('Clicking on face rect center does not reposition it', async ({ page }) => {
const asset = selectRandom(fixture.assets, rng);
await openFaceEditor(page, asset);
const beforeClick = await getFaceBoxRect(page);
const centerX = beforeClick.left + beforeClick.width / 2;
const centerY = beforeClick.top + beforeClick.height / 2;
await page.mouse.click(centerX, centerY);
await page.waitForTimeout(300);
const afterClick = await getFaceBoxRect(page);
expect(Math.abs(afterClick.left - beforeClick.left)).toBeLessThan(3);
expect(Math.abs(afterClick.top - beforeClick.top)).toBeLessThan(3);
});
});

View File

@@ -0,0 +1,264 @@
import { expect, test } from '@playwright/test';
import { toAssetResponseDto } from 'src/ui/generators/timeline';
import {
createMockAssetFaces,
createMockFaceData,
createMockPeople,
type MockFaceSpec,
setupFaceEditorMockApiRoutes,
setupFaceOverlayMockApiRoutes,
setupGetFacesMockApiRoute,
} from 'src/ui/mock-network/face-editor-network';
import { assetViewerUtils } from '../timeline/utils';
import { ensureDetailPanelVisible, setupAssetViewerFixture } from './utils';
test.describe.configure({ mode: 'parallel' });
const FACE_SPECS: MockFaceSpec[] = [
{
personId: 'person-alice',
personName: 'Alice Johnson',
faceId: 'face-alice',
boundingBoxX1: 1000,
boundingBoxY1: 500,
boundingBoxX2: 1500,
boundingBoxY2: 1200,
},
{
personId: 'person-bob',
personName: 'Bob Smith',
faceId: 'face-bob',
boundingBoxX1: 2000,
boundingBoxY1: 800,
boundingBoxX2: 2400,
boundingBoxY2: 1600,
},
];
const setupFaceMocks = async (
context: import('@playwright/test').BrowserContext,
fixture: ReturnType<typeof setupAssetViewerFixture>,
) => {
const mockPeople = createMockPeople(4);
const faceData = createMockFaceData(
FACE_SPECS,
fixture.primaryAssetDto.width ?? 3000,
fixture.primaryAssetDto.height ?? 4000,
);
const assetDtoWithFaces = toAssetResponseDto(fixture.primaryAsset, undefined, faceData);
await setupFaceOverlayMockApiRoutes(context, assetDtoWithFaces);
await setupFaceEditorMockApiRoutes(context, mockPeople, { requests: [] });
};
test.describe('face overlay bounding boxes', () => {
const fixture = setupAssetViewerFixture(901);
test.beforeEach(async ({ context }) => {
await setupFaceMocks(context, fixture);
});
test('face overlay divs render with correct aria labels', async ({ page }) => {
await page.goto(`/photos/${fixture.primaryAsset.id}`);
await assetViewerUtils.waitForViewerLoad(page, fixture.primaryAsset);
const aliceOverlay = page.getByLabel('Person: Alice Johnson');
const bobOverlay = page.getByLabel('Person: Bob Smith');
await expect(aliceOverlay).toBeVisible();
await expect(bobOverlay).toBeVisible();
});
test('face overlay shows border on hover', async ({ page }) => {
await page.goto(`/photos/${fixture.primaryAsset.id}`);
await assetViewerUtils.waitForViewerLoad(page, fixture.primaryAsset);
const aliceOverlay = page.getByLabel('Person: Alice Johnson');
await expect(aliceOverlay).toBeVisible();
const activeBorder = page.locator('[data-viewer-content] .border-solid.border-white.border-3');
await expect(activeBorder).toHaveCount(0);
await aliceOverlay.hover();
await expect(activeBorder).toHaveCount(1);
});
test('face name tooltip appears on hover', async ({ page }) => {
await page.goto(`/photos/${fixture.primaryAsset.id}`);
await assetViewerUtils.waitForViewerLoad(page, fixture.primaryAsset);
const aliceOverlay = page.getByLabel('Person: Alice Johnson');
await expect(aliceOverlay).toBeVisible();
await aliceOverlay.hover();
const nameTooltip = page.locator('[data-viewer-content]').getByText('Alice Johnson');
await expect(nameTooltip).toBeVisible();
});
test('face overlays hidden in face edit mode', async ({ page }) => {
await page.goto(`/photos/${fixture.primaryAsset.id}`);
await assetViewerUtils.waitForViewerLoad(page, fixture.primaryAsset);
const aliceOverlay = page.getByLabel('Person: Alice Johnson');
await expect(aliceOverlay).toBeVisible();
await ensureDetailPanelVisible(page);
await page.getByLabel('Tag people').click();
await page.locator('#face-selector').waitFor({ state: 'visible' });
await expect(aliceOverlay).toBeHidden();
});
test('face overlay hover works after exiting face edit mode', async ({ page }) => {
await page.goto(`/photos/${fixture.primaryAsset.id}`);
await assetViewerUtils.waitForViewerLoad(page, fixture.primaryAsset);
const aliceOverlay = page.getByLabel('Person: Alice Johnson');
await expect(aliceOverlay).toBeVisible();
await ensureDetailPanelVisible(page);
await page.getByLabel('Tag people').click();
await page.locator('#face-selector').waitFor({ state: 'visible' });
await expect(aliceOverlay).toBeHidden();
await page.getByRole('button', { name: /cancel/i }).click();
await expect(page.locator('#face-selector')).toBeHidden();
await expect(aliceOverlay).toBeVisible();
const activeBorder = page.locator('[data-viewer-content] .border-solid.border-white.border-3');
await expect(activeBorder).toHaveCount(0);
await aliceOverlay.hover();
await expect(activeBorder).toHaveCount(1);
});
});
test.describe('zoom and face editor interaction', () => {
const fixture = setupAssetViewerFixture(902);
test.beforeEach(async ({ context }) => {
await setupFaceMocks(context, fixture);
});
test('zoom is preserved when entering face edit mode', async ({ page }) => {
await page.goto(`/photos/${fixture.primaryAsset.id}`);
await assetViewerUtils.waitForViewerLoad(page, fixture.primaryAsset);
const { width, height } = page.viewportSize()!;
await page.mouse.move(width / 2, height / 2);
await page.mouse.wheel(0, -1);
const imgLocator = page.locator('[data-viewer-content] img[draggable="false"]');
await expect(async () => {
const transform = await imgLocator.evaluate((element) => {
return getComputedStyle(element.closest('[style*="transform"]') ?? element).transform;
});
expect(transform).not.toBe('none');
expect(transform).not.toBe('');
}).toPass({ timeout: 2000 });
await ensureDetailPanelVisible(page);
await page.getByLabel('Tag people').click();
await page.locator('#face-selector').waitFor({ state: 'visible' });
await expect(page.locator('#face-editor')).toBeVisible();
const afterTransform = await imgLocator.evaluate((element) => {
return getComputedStyle(element.closest('[style*="transform"]') ?? element).transform;
});
expect(afterTransform).not.toBe('none');
});
});
test.describe('face overlay via detail panel interaction', () => {
const fixture = setupAssetViewerFixture(903);
test.beforeEach(async ({ context }) => {
await setupFaceMocks(context, fixture);
});
test('hovering person in detail panel shows face overlay border', async ({ page }) => {
await page.goto(`/photos/${fixture.primaryAsset.id}`);
await assetViewerUtils.waitForViewerLoad(page, fixture.primaryAsset);
await ensureDetailPanelVisible(page);
const personLink = page.locator('#detail-panel a').filter({ hasText: 'Alice Johnson' });
await expect(personLink).toBeVisible();
const activeBorder = page.locator('[data-viewer-content] .border-solid.border-white.border-3');
await expect(activeBorder).toHaveCount(0);
await personLink.hover();
await expect(activeBorder).toHaveCount(1);
});
test('touch pointer on person in detail panel shows face overlay border', async ({ page }) => {
await page.goto(`/photos/${fixture.primaryAsset.id}`);
await assetViewerUtils.waitForViewerLoad(page, fixture.primaryAsset);
await ensureDetailPanelVisible(page);
const personLink = page.locator('#detail-panel a').filter({ hasText: 'Alice Johnson' });
await expect(personLink).toBeVisible();
const activeBorder = page.locator('[data-viewer-content] .border-solid.border-white.border-3');
await expect(activeBorder).toHaveCount(0);
// Simulate a touch-type pointerover (the fix changed from onmouseover to onpointerover,
// which fires for touch pointers unlike mouseover)
await personLink.dispatchEvent('pointerover', { pointerType: 'touch' });
await expect(activeBorder).toHaveCount(1);
});
test('hovering person in detail panel works after exiting face edit mode', async ({ page }) => {
await page.goto(`/photos/${fixture.primaryAsset.id}`);
await assetViewerUtils.waitForViewerLoad(page, fixture.primaryAsset);
await ensureDetailPanelVisible(page);
await page.getByLabel('Tag people').click();
await page.locator('#face-selector').waitFor({ state: 'visible' });
await page.getByRole('button', { name: /cancel/i }).click();
await expect(page.locator('#face-selector')).toBeHidden();
const personLink = page.locator('#detail-panel a').filter({ hasText: 'Alice Johnson' });
await expect(personLink).toBeVisible();
const activeBorder = page.locator('[data-viewer-content] .border-solid.border-white.border-3');
await personLink.hover();
await expect(activeBorder).toHaveCount(1);
});
});
test.describe('face overlay via edit faces side panel', () => {
const fixture = setupAssetViewerFixture(904);
test.beforeEach(async ({ context }) => {
await setupFaceMocks(context, fixture);
const assetFaces = createMockAssetFaces(
FACE_SPECS,
fixture.primaryAssetDto.width ?? 3000,
fixture.primaryAssetDto.height ?? 4000,
);
await setupGetFacesMockApiRoute(context, assetFaces);
});
test('hovering person in edit faces panel shows face overlay border', async ({ page }) => {
await page.goto(`/photos/${fixture.primaryAsset.id}`);
await assetViewerUtils.waitForViewerLoad(page, fixture.primaryAsset);
await ensureDetailPanelVisible(page);
await page.getByLabel('Edit people').click();
const faceThumbnail = page.locator('section div[role="button"]').first();
await expect(faceThumbnail).toBeVisible();
const activeBorder = page.locator('[data-viewer-content] .border-solid.border-white.border-3');
await expect(activeBorder).toHaveCount(0);
await faceThumbnail.hover();
await expect(activeBorder).toHaveCount(1);
});
});

View File

@@ -0,0 +1,300 @@
import type { AssetOcrResponseDto, AssetResponseDto } from '@immich/sdk';
import { expect, test } from '@playwright/test';
import { toAssetResponseDto } from 'src/ui/generators/timeline';
import {
createMockStack,
createMockStackAsset,
MockStack,
setupBrokenAssetMockApiRoutes,
} from 'src/ui/mock-network/broken-asset-network';
import { createMockOcrData, setupOcrMockApiRoutes } from 'src/ui/mock-network/ocr-network';
import { assetViewerUtils } from '../timeline/utils';
import { setupAssetViewerFixture } from './utils';
test.describe.configure({ mode: 'parallel' });
const PRIMARY_OCR_BOXES = [
{ text: 'Hello World', x1: 0.1, y1: 0.1, x2: 0.4, y2: 0.1, x3: 0.4, y3: 0.15, x4: 0.1, y4: 0.15 },
{ text: 'Immich Photo', x1: 0.2, y1: 0.3, x2: 0.6, y2: 0.3, x3: 0.6, y3: 0.36, x4: 0.2, y4: 0.36 },
];
const SECONDARY_OCR_BOXES = [
{ text: 'Second Asset Text', x1: 0.15, y1: 0.2, x2: 0.55, y2: 0.2, x3: 0.55, y3: 0.26, x4: 0.15, y4: 0.26 },
];
test.describe('OCR bounding boxes', () => {
const fixture = setupAssetViewerFixture(920);
test.beforeEach(async ({ context }) => {
const primaryAssetDto = toAssetResponseDto(fixture.primaryAsset);
const ocrDataByAssetId = new Map<string, AssetOcrResponseDto[]>([
[primaryAssetDto.id, createMockOcrData(primaryAssetDto.id, PRIMARY_OCR_BOXES)],
]);
await setupOcrMockApiRoutes(context, ocrDataByAssetId);
});
test('OCR bounding boxes appear when clicking OCR button', async ({ page }) => {
await page.goto(`/photos/${fixture.primaryAsset.id}`);
await assetViewerUtils.waitForViewerLoad(page, fixture.primaryAsset);
const ocrButton = page.getByLabel('Text recognition');
await expect(ocrButton).toBeVisible();
await ocrButton.click();
const ocrBoxes = page.locator('[data-viewer-content] [data-testid="ocr-box"]');
await expect(ocrBoxes).toHaveCount(2);
await expect(ocrBoxes.nth(0)).toContainText('Hello World');
await expect(ocrBoxes.nth(1)).toContainText('Immich Photo');
});
test('OCR bounding boxes toggle off on second click', async ({ page }) => {
await page.goto(`/photos/${fixture.primaryAsset.id}`);
await assetViewerUtils.waitForViewerLoad(page, fixture.primaryAsset);
const ocrButton = page.getByLabel('Text recognition');
await ocrButton.click();
await expect(page.locator('[data-viewer-content] [data-testid="ocr-box"]').first()).toBeVisible();
await ocrButton.click();
await expect(page.locator('[data-viewer-content] [data-testid="ocr-box"]')).toHaveCount(0);
});
});
test.describe('OCR with stacked assets', () => {
const fixture = setupAssetViewerFixture(921);
let mockStack: MockStack;
let primaryAssetDto: AssetResponseDto;
let secondAssetDto: AssetResponseDto;
test.beforeAll(async () => {
primaryAssetDto = toAssetResponseDto(fixture.primaryAsset);
secondAssetDto = createMockStackAsset(fixture.adminUserId);
secondAssetDto.originalFileName = 'second-ocr-asset.jpg';
mockStack = createMockStack(primaryAssetDto, [secondAssetDto], new Set());
});
test.beforeEach(async ({ context }) => {
await setupBrokenAssetMockApiRoutes(context, mockStack);
const ocrDataByAssetId = new Map<string, AssetOcrResponseDto[]>([
[primaryAssetDto.id, createMockOcrData(primaryAssetDto.id, PRIMARY_OCR_BOXES)],
[secondAssetDto.id, createMockOcrData(secondAssetDto.id, SECONDARY_OCR_BOXES)],
]);
await setupOcrMockApiRoutes(context, ocrDataByAssetId);
});
test('different OCR boxes shown for different stacked assets', async ({ page }) => {
await page.goto(`/photos/${fixture.primaryAsset.id}`);
await assetViewerUtils.waitForViewerLoad(page, fixture.primaryAsset);
const ocrButton = page.getByLabel('Text recognition');
await expect(ocrButton).toBeVisible();
await ocrButton.click();
const ocrBoxes = page.locator('[data-viewer-content] [data-testid="ocr-box"]');
await expect(ocrBoxes).toHaveCount(2);
await expect(ocrBoxes.nth(0)).toContainText('Hello World');
const stackThumbnails = page.locator('#stack-slideshow [data-asset]');
await expect(stackThumbnails).toHaveCount(2);
await stackThumbnails.nth(1).click();
// refreshOcr() clears showOverlay when switching assets, so re-enable it
await expect(ocrBoxes).toHaveCount(0);
await expect(ocrButton).toBeVisible();
await ocrButton.click();
await expect(ocrBoxes).toHaveCount(1);
await expect(ocrBoxes.first()).toContainText('Second Asset Text');
});
});
test.describe('OCR boxes and zoom', () => {
const fixture = setupAssetViewerFixture(922);
test.beforeEach(async ({ context }) => {
const primaryAssetDto = toAssetResponseDto(fixture.primaryAsset);
const ocrDataByAssetId = new Map<string, AssetOcrResponseDto[]>([
[primaryAssetDto.id, createMockOcrData(primaryAssetDto.id, PRIMARY_OCR_BOXES)],
]);
await setupOcrMockApiRoutes(context, ocrDataByAssetId);
});
test('OCR boxes scale with zoom', async ({ page }) => {
await page.goto(`/photos/${fixture.primaryAsset.id}`);
await assetViewerUtils.waitForViewerLoad(page, fixture.primaryAsset);
const ocrButton = page.getByLabel('Text recognition');
await expect(ocrButton).toBeVisible();
await ocrButton.click();
const ocrBox = page.locator('[data-viewer-content] [data-testid="ocr-box"]').first();
await expect(ocrBox).toBeVisible();
const initialBox = await ocrBox.boundingBox();
expect(initialBox).toBeTruthy();
const { width, height } = page.viewportSize()!;
await page.mouse.move(width / 2, height / 2);
await page.mouse.wheel(0, -3);
await expect(async () => {
const zoomedBox = await ocrBox.boundingBox();
expect(zoomedBox).toBeTruthy();
expect(zoomedBox!.width).toBeGreaterThan(initialBox!.width);
expect(zoomedBox!.height).toBeGreaterThan(initialBox!.height);
}).toPass({ timeout: 2000 });
});
});
test.describe('OCR text interaction', () => {
const fixture = setupAssetViewerFixture(923);
test.beforeEach(async ({ context }) => {
const primaryAssetDto = toAssetResponseDto(fixture.primaryAsset);
const ocrDataByAssetId = new Map<string, AssetOcrResponseDto[]>([
[primaryAssetDto.id, createMockOcrData(primaryAssetDto.id, PRIMARY_OCR_BOXES)],
]);
await setupOcrMockApiRoutes(context, ocrDataByAssetId);
});
test('OCR text box has data-overlay-interactive attribute', async ({ page }) => {
await page.goto(`/photos/${fixture.primaryAsset.id}`);
await assetViewerUtils.waitForViewerLoad(page, fixture.primaryAsset);
await page.getByLabel('Text recognition').click();
const ocrBox = page.locator('[data-viewer-content] [data-testid="ocr-box"]').first();
await expect(ocrBox).toBeVisible();
await expect(ocrBox).toHaveAttribute('data-overlay-interactive');
});
test('OCR text box receives focus on click', async ({ page }) => {
await page.goto(`/photos/${fixture.primaryAsset.id}`);
await assetViewerUtils.waitForViewerLoad(page, fixture.primaryAsset);
await page.getByLabel('Text recognition').click();
const ocrBox = page.locator('[data-viewer-content] [data-testid="ocr-box"]').first();
await expect(ocrBox).toBeVisible();
await ocrBox.click();
await expect(ocrBox).toBeFocused();
});
test('dragging on OCR text box does not trigger image pan', async ({ page }) => {
await page.goto(`/photos/${fixture.primaryAsset.id}`);
await assetViewerUtils.waitForViewerLoad(page, fixture.primaryAsset);
await page.getByLabel('Text recognition').click();
const ocrBox = page.locator('[data-viewer-content] [data-testid="ocr-box"]').first();
await expect(ocrBox).toBeVisible();
const imgLocator = page.locator('[data-viewer-content] img[draggable="false"]');
const initialTransform = await imgLocator.evaluate((element) => {
return getComputedStyle(element.closest('[style*="transform"]') ?? element).transform;
});
const box = await ocrBox.boundingBox();
expect(box).toBeTruthy();
const centerX = box!.x + box!.width / 2;
const centerY = box!.y + box!.height / 2;
await page.mouse.move(centerX, centerY);
await page.mouse.down();
await page.mouse.move(centerX + 50, centerY + 30, { steps: 5 });
await page.mouse.up();
const afterTransform = await imgLocator.evaluate((element) => {
return getComputedStyle(element.closest('[style*="transform"]') ?? element).transform;
});
expect(afterTransform).toBe(initialTransform);
});
test('split touch gesture across zoom container does not trigger zoom', async ({ page }) => {
await page.goto(`/photos/${fixture.primaryAsset.id}`);
await assetViewerUtils.waitForViewerLoad(page, fixture.primaryAsset);
await page.getByLabel('Text recognition').click();
const ocrBox = page.locator('[data-viewer-content] [data-testid="ocr-box"]').first();
await expect(ocrBox).toBeVisible();
const imgLocator = page.locator('[data-viewer-content] img[draggable="false"]');
const initialTransform = await imgLocator.evaluate((element) => {
return getComputedStyle(element.closest('[style*="transform"]') ?? element).transform;
});
const viewerContent = page.locator('[data-viewer-content]');
const viewerBox = await viewerContent.boundingBox();
expect(viewerBox).toBeTruthy();
// Dispatch a synthetic split gesture: one touch inside the viewer, one outside
await page.evaluate(
({ viewerCenterX, viewerCenterY, outsideY }) => {
const viewer = document.querySelector('[data-viewer-content]');
if (!viewer) {
return;
}
const createTouch = (id: number, x: number, y: number) => {
return new Touch({
identifier: id,
target: viewer,
clientX: x,
clientY: y,
});
};
const insideTouch = createTouch(0, viewerCenterX, viewerCenterY);
const outsideTouch = createTouch(1, viewerCenterX, outsideY);
const touchStartEvent = new TouchEvent('touchstart', {
touches: [insideTouch, outsideTouch],
targetTouches: [insideTouch],
changedTouches: [insideTouch, outsideTouch],
bubbles: true,
cancelable: true,
});
const touchMoveEvent = new TouchEvent('touchmove', {
touches: [createTouch(0, viewerCenterX, viewerCenterY - 30), createTouch(1, viewerCenterX, outsideY + 30)],
targetTouches: [createTouch(0, viewerCenterX, viewerCenterY - 30)],
changedTouches: [
createTouch(0, viewerCenterX, viewerCenterY - 30),
createTouch(1, viewerCenterX, outsideY + 30),
],
bubbles: true,
cancelable: true,
});
const touchEndEvent = new TouchEvent('touchend', {
touches: [],
targetTouches: [],
changedTouches: [insideTouch, outsideTouch],
bubbles: true,
cancelable: true,
});
viewer.dispatchEvent(touchStartEvent);
viewer.dispatchEvent(touchMoveEvent);
viewer.dispatchEvent(touchEndEvent);
},
{
viewerCenterX: viewerBox!.x + viewerBox!.width / 2,
viewerCenterY: viewerBox!.y + viewerBox!.height / 2,
outsideY: 10, // near the top of the page, outside the viewer
},
);
const afterTransform = await imgLocator.evaluate((element) => {
return getComputedStyle(element.closest('[style*="transform"]') ?? element).transform;
});
expect(afterTransform).toBe(initialTransform);
});
});

View File

@@ -1,11 +1,17 @@
import { assetViewerManager } from '$lib/managers/asset-viewer-manager.svelte';
import { createZoomImageWheel } from '@zoom-image/core';
export const zoomImageAction = (node: HTMLElement, options?: { disabled?: boolean }) => {
type TouchEventLike = {
touches: Iterable<{ clientX: number; clientY: number }> & { length: number };
targetTouches: ArrayLike<unknown>;
};
const asTouchEvent = (event: Event) => event as unknown as TouchEventLike;
export const zoomImageAction = (node: HTMLElement, options?: { zoomTarget?: HTMLElement }) => {
const zoomInstance = createZoomImageWheel(node, {
maxZoom: 10,
initialState: assetViewerManager.zoomState,
zoomTarget: null,
zoomTarget: options?.zoomTarget,
});
const unsubscribes = [
@@ -13,47 +19,124 @@ export const zoomImageAction = (node: HTMLElement, options?: { disabled?: boolea
zoomInstance.subscribe(({ state }) => assetViewerManager.onZoomChange(state)),
];
const onInteractionStart = (event: Event) => {
if (options?.disabled) {
event.stopImmediatePropagation();
const controller = new AbortController();
const { signal } = controller;
node.addEventListener('pointerdown', () => assetViewerManager.cancelZoomAnimation(), { capture: true, signal });
// Intercept events in capture phase to prevent zoom-image from seeing interactions on
// overlay elements (e.g. OCR text boxes), preserving browser defaults like text selection.
const isOverlayEvent = (event: Event) => !!(event.target as HTMLElement).closest('[data-overlay-interactive]');
const isOverlayAtPoint = (x: number, y: number) =>
!!document.elementFromPoint(x, y)?.closest('[data-overlay-interactive]');
// Pointer event interception: track pointers that start on overlays and intercept the entire gesture.
const overlayPointers = new Set<number>();
const interceptedPointers = new Set<number>();
const interceptOverlayPointerDown = (event: PointerEvent) => {
if (isOverlayEvent(event) || isOverlayAtPoint(event.clientX, event.clientY)) {
overlayPointers.add(event.pointerId);
interceptedPointers.add(event.pointerId);
event.stopPropagation();
} else if (overlayPointers.size > 0) {
// Split gesture (e.g. pinch with one finger on overlay) — intercept entirely.
interceptedPointers.add(event.pointerId);
event.stopPropagation();
}
assetViewerManager.cancelZoomAnimation();
};
const interceptOverlayPointerEvent = (event: PointerEvent) => {
if (interceptedPointers.has(event.pointerId)) {
event.stopPropagation();
}
};
const interceptOverlayPointerEnd = (event: PointerEvent) => {
overlayPointers.delete(event.pointerId);
if (interceptedPointers.delete(event.pointerId)) {
event.stopPropagation();
}
};
node.addEventListener('pointerdown', interceptOverlayPointerDown, { capture: true, signal });
node.addEventListener('pointermove', interceptOverlayPointerEvent, { capture: true, signal });
node.addEventListener('pointerup', interceptOverlayPointerEnd, { capture: true, signal });
node.addEventListener('pointerleave', interceptOverlayPointerEnd, { capture: true, signal });
node.addEventListener('wheel', onInteractionStart, { capture: true });
node.addEventListener('pointerdown', onInteractionStart, { capture: true });
// Touch event interception for overlay touches or split gestures (pinch across container boundary).
// Once intercepted, stays intercepted until all fingers are lifted.
let touchGestureIntercepted = false;
const interceptOverlayTouchEvent = (event: Event) => {
if (touchGestureIntercepted) {
event.stopPropagation();
return;
}
const { touches, targetTouches } = asTouchEvent(event);
if (touches && targetTouches) {
if (touches.length > targetTouches.length) {
touchGestureIntercepted = true;
event.stopPropagation();
return;
}
for (const touch of touches) {
if (isOverlayAtPoint(touch.clientX, touch.clientY)) {
touchGestureIntercepted = true;
event.stopPropagation();
return;
}
}
} else if (isOverlayEvent(event)) {
event.stopPropagation();
}
};
const resetTouchGesture = (event: Event) => {
const { touches } = asTouchEvent(event);
if (touches.length === 0) {
touchGestureIntercepted = false;
}
};
node.addEventListener('touchstart', interceptOverlayTouchEvent, { capture: true, signal });
node.addEventListener('touchmove', interceptOverlayTouchEvent, { capture: true, signal });
node.addEventListener('touchend', resetTouchGesture, { capture: true, signal });
// Suppress Safari's synthetic dblclick on double-tap. Without this, zoom-image's touchstart
// handler zooms to maxZoom (10x), then Safari's synthetic dblclick triggers photo-viewer's
// handler which conflicts. Chrome does not fire synthetic dblclick on touch.
// Wheel and dblclick interception on overlay elements.
// Dblclick also intercepted for all touch double-taps (Safari fires synthetic dblclick
// on double-tap, which conflicts with zoom-image's touch zoom handler).
let lastPointerWasTouch = false;
const trackPointerType = (event: PointerEvent) => {
lastPointerWasTouch = event.pointerType === 'touch';
};
const suppressTouchDblClick = (event: MouseEvent) => {
if (lastPointerWasTouch) {
event.stopImmediatePropagation();
}
};
node.addEventListener('pointerdown', trackPointerType, { capture: true });
node.addEventListener('dblclick', suppressTouchDblClick, { capture: true });
node.addEventListener('pointerdown', (event) => (lastPointerWasTouch = event.pointerType === 'touch'), {
capture: true,
signal,
});
node.addEventListener(
'wheel',
(event) => {
if (isOverlayEvent(event)) {
event.stopPropagation();
}
},
{ capture: true, signal },
);
node.addEventListener(
'dblclick',
(event) => {
if (lastPointerWasTouch || isOverlayEvent(event)) {
event.stopImmediatePropagation();
}
},
{ capture: true, signal },
);
// Allow zoomed content to render outside the container bounds
node.style.overflow = 'visible';
// Prevent browser handling of touch gestures so zoom-image can manage them
node.style.touchAction = 'none';
return {
update(newOptions?: { disabled?: boolean }) {
update(newOptions?: { zoomTarget?: HTMLElement }) {
options = newOptions;
if (newOptions?.zoomTarget !== undefined) {
zoomInstance.setState({ zoomTarget: newOptions.zoomTarget });
}
},
destroy() {
controller.abort();
for (const unsubscribe of unsubscribes) {
unsubscribe();
}
node.removeEventListener('wheel', onInteractionStart, { capture: true });
node.removeEventListener('pointerdown', onInteractionStart, { capture: true });
node.removeEventListener('pointerdown', trackPointerType, { capture: true });
node.removeEventListener('dblclick', suppressTouchDblClick, { capture: true });
zoomInstance.cleanup();
},
};

View File

@@ -149,81 +149,66 @@
(quality.preview === 'success' ? previewElement : undefined) ??
(quality.thumbnail === 'success' ? thumbnailElement : undefined);
});
const zoomTransform = $derived.by(() => {
const { currentZoom, currentPositionX, currentPositionY } = assetViewerManager.zoomState;
if (currentZoom === 1 && currentPositionX === 0 && currentPositionY === 0) {
return undefined;
}
return `translate(${currentPositionX}px, ${currentPositionY}px) scale(${currentZoom})`;
});
</script>
<div class="relative h-full w-full overflow-hidden will-change-transform" bind:this={ref}>
{@render backdrop?.()}
<!-- pointer-events-none so events pass through to the container where zoom-image listens -->
<div
class="absolute inset-0 pointer-events-none"
style:transform={zoomTransform}
style:transform-origin={zoomTransform ? '0 0' : undefined}
>
<div class="absolute" style:left style:top style:width style:height>
{#if show.alphaBackground}
<AlphaBackground />
{/if}
<div class="absolute inset-0 pointer-events-none" style:left style:top style:width style:height>
{#if show.alphaBackground}
<AlphaBackground />
{/if}
{#if show.thumbhash}
{#if asset.thumbhash}
<!-- Thumbhash / spinner layer -->
<canvas use:thumbhash={{ base64ThumbHash: asset.thumbhash }} class="h-full w-full absolute"></canvas>
{:else if show.spinner}
<DelayedLoadingSpinner />
{/if}
{#if show.thumbhash}
{#if asset.thumbhash}
<!-- Thumbhash / spinner layer -->
<canvas use:thumbhash={{ base64ThumbHash: asset.thumbhash }} class="h-full w-full absolute"></canvas>
{:else if show.spinner}
<DelayedLoadingSpinner />
{/if}
{/if}
{#if show.thumbnail}
<ImageLayer
{adaptiveImageLoader}
{width}
{height}
quality="thumbnail"
src={status.urls.thumbnail}
alt=""
role="presentation"
bind:ref={thumbnailElement}
/>
{/if}
{#if show.thumbnail}
<ImageLayer
{adaptiveImageLoader}
{width}
{height}
quality="thumbnail"
src={status.urls.thumbnail}
alt=""
role="presentation"
bind:ref={thumbnailElement}
/>
{/if}
{#if show.brokenAsset}
<BrokenAsset class="text-xl h-full w-full absolute" />
{/if}
{#if show.brokenAsset}
<BrokenAsset class="text-xl h-full w-full absolute" />
{/if}
{#if show.preview}
<ImageLayer
{adaptiveImageLoader}
{alt}
{width}
{height}
{overlays}
quality="preview"
src={status.urls.preview}
bind:ref={previewElement}
/>
{/if}
{#if show.preview}
<ImageLayer
{adaptiveImageLoader}
{alt}
{width}
{height}
{overlays}
quality="preview"
src={status.urls.preview}
bind:ref={previewElement}
/>
{/if}
{#if show.original}
<ImageLayer
{adaptiveImageLoader}
{alt}
{width}
{height}
{overlays}
quality="original"
src={status.urls.original}
bind:ref={originalElement}
/>
{/if}
</div>
{#if show.original}
<ImageLayer
{adaptiveImageLoader}
{alt}
{width}
{height}
{overlays}
quality="original"
src={status.urls.original}
bind:ref={originalElement}
/>
{/if}
</div>
</div>

View File

@@ -176,6 +176,7 @@
onDestroy(() => {
activityManager.reset();
assetViewerManager.closeEditor();
isFaceEditMode.value = false;
syncAssetViewerOpenClass(false);
preloadManager.destroy();
});
@@ -358,15 +359,18 @@
}
};
const refreshOcr = async () => {
ocrManager.clear();
if (sharedLink) {
return;
}
await ocrManager.getAssetOcr(asset.id);
};
const refresh = async () => {
await refreshStack();
ocrManager.clear();
if (!sharedLink) {
if (previewStackedAsset) {
await ocrManager.getAssetOcr(previewStackedAsset.id);
}
await ocrManager.getAssetOcr(asset.id);
}
await refreshOcr();
};
$effect(() => {
@@ -375,6 +379,12 @@
untrack(() => handlePromiseError(refresh()));
});
$effect(() => {
// eslint-disable-next-line @typescript-eslint/no-unused-expressions
previewStackedAsset;
untrack(() => ocrManager.clear());
});
let lastCursor = $state<AssetCursor>();
$effect(() => {
@@ -460,7 +470,7 @@
<section
id="immich-asset-viewer"
class="fixed start-0 top-0 grid size-full grid-cols-4 grid-rows-[64px_1fr] overflow-hidden bg-black"
class="fixed start-0 top-0 grid size-full grid-cols-4 grid-rows-[64px_1fr] overflow-hidden bg-black touch-none"
use:focusTrap
bind:this={assetViewerHtmlElement}
>

View File

@@ -7,6 +7,7 @@
import { timeToLoadTheMap } from '$lib/constants';
import { assetViewerManager } from '$lib/managers/asset-viewer-manager.svelte';
import { authManager } from '$lib/managers/auth-manager.svelte';
import { eventManager } from '$lib/managers/event-manager.svelte';
import { featureFlagsManager } from '$lib/managers/feature-flags-manager.svelte';
import AssetChangeDateModal from '$lib/modals/AssetChangeDateModal.svelte';
import { Route } from '$lib/route';
@@ -122,6 +123,7 @@
const handleRefreshPeople = async () => {
asset = await getAssetInfo({ id: asset.id });
eventManager.emit('AssetUpdate', asset);
showEditFaces = false;
};
@@ -233,8 +235,8 @@
href={Route.viewPerson(person, { previousRoute })}
onfocus={() => ($boundingBoxesArray = people[index].faces)}
onblur={() => ($boundingBoxesArray = [])}
onmouseover={() => ($boundingBoxesArray = people[index].faces)}
onmouseleave={() => ($boundingBoxesArray = [])}
onpointerover={() => ($boundingBoxesArray = people[index].faces)}
onpointerleave={() => ($boundingBoxesArray = [])}
>
<div class="relative">
<ImageThumbnail

View File

@@ -1,10 +1,12 @@
<script lang="ts">
import ImageThumbnail from '$lib/components/assets/thumbnail/image-thumbnail.svelte';
import { assetViewerManager } from '$lib/managers/asset-viewer-manager.svelte';
import { assetViewingStore } from '$lib/stores/asset-viewing.store';
import { isFaceEditMode } from '$lib/stores/face-edit.svelte';
import { getPeopleThumbnailUrl } from '$lib/utils';
import { getNaturalSize, scaleToFit } from '$lib/utils/container-utils';
import { handleError } from '$lib/utils/handle-error';
import { scaleFaceRectOnResize } from '$lib/utils/people-utils';
import { createFace, getAllPeople, type PersonResponseDto } from '@immich/sdk';
import { Button, Input, modalManager, toastManager } from '@immich/ui';
import { Canvas, InteractiveFabricObject, Rect } from 'fabric';
@@ -22,6 +24,7 @@
let { htmlElement, containerWidth, containerHeight, assetId }: Props = $props();
let canvasEl: HTMLCanvasElement | undefined = $state();
let containerEl: HTMLDivElement | undefined = $state();
let canvas: Canvas | undefined = $state();
let faceRect: Rect | undefined = $state();
let faceSelectorEl: HTMLDivElement | undefined = $state();
@@ -31,6 +34,8 @@
let searchTerm = $state('');
let faceBoxPosition = $state({ left: 0, top: 0, width: 0, height: 0 });
let userMovedRect = false;
let previousMetrics = { contentWidth: 0, offsetX: 0, offsetY: 0 };
let filteredCandidates = $derived(
searchTerm
@@ -56,7 +61,8 @@
return;
}
canvas = new Canvas(canvasEl);
canvas = new Canvas(canvasEl, { width: containerWidth, height: containerHeight });
canvas.selection = false;
configureControlStyle();
// eslint-disable-next-line tscompat/tscompat
@@ -74,12 +80,49 @@
canvas.add(faceRect);
canvas.setActiveObject(faceRect);
setDefaultFaceRectanglePosition(faceRect);
};
onMount(async () => {
setupCanvas();
await getPeople();
onMount(() => {
void getPeople();
});
$effect(() => {
if (!canvas) {
return;
}
const upperCanvas = canvas.upperCanvasEl;
const controller = new AbortController();
const { signal } = controller;
const handlePointerOnCanvas = (event: PointerEvent) => {
if (!canvas) {
return;
}
const { target } = canvas.findTarget(event);
if (target) {
event.stopPropagation();
return;
}
if (event.type === 'pointerdown' && faceRect) {
event.stopPropagation();
const pointer = canvas.getScenePoint(event);
faceRect.set({ left: pointer.x, top: pointer.y });
faceRect.setCoords();
userMovedRect = true;
canvas.renderAll();
positionFaceSelector();
}
};
for (const type of ['pointerdown', 'pointermove', 'pointerup'] as const) {
upperCanvas.addEventListener(type, handlePointerOnCanvas, { signal });
}
return () => {
controller.abort();
};
});
const imageContentMetrics = $derived.by(() => {
@@ -95,45 +138,46 @@
});
const setDefaultFaceRectanglePosition = (faceRect: Rect) => {
const { offsetX, offsetY } = imageContentMetrics;
const { offsetX, offsetY, contentWidth, contentHeight } = imageContentMetrics;
faceRect.set({
top: offsetY + 200,
left: offsetX + 200,
top: offsetY + contentHeight / 2 - 56,
left: offsetX + contentWidth / 2 - 56,
});
faceRect.setCoords();
positionFaceSelector();
};
$effect(() => {
if (!canvas) {
const { offsetX, offsetY, contentWidth } = imageContentMetrics;
if (contentWidth === 0) {
return;
}
canvas.setDimensions({
width: containerWidth,
height: containerHeight,
});
const isFirstRun = previousMetrics.contentWidth === 0;
if (!faceRect) {
if (isFirstRun && !canvas) {
setupCanvas();
}
if (!canvas || !faceRect) {
return;
}
if (!isFaceRectIntersectingCanvas(faceRect, canvas)) {
if (!isFirstRun) {
canvas.setDimensions({ width: containerWidth, height: containerHeight });
}
if (!isFirstRun && userMovedRect) {
faceRect.set(scaleFaceRectOnResize(faceRect, previousMetrics, offsetX, offsetY, contentWidth));
} else {
setDefaultFaceRectanglePosition(faceRect);
}
});
const isFaceRectIntersectingCanvas = (faceRect: Rect, canvas: Canvas) => {
const faceBox = faceRect.getBoundingRect();
return !(
0 > faceBox.left + faceBox.width ||
0 > faceBox.top + faceBox.height ||
canvas.width < faceBox.left ||
canvas.height < faceBox.top
);
};
faceRect.setCoords();
previousMetrics = { contentWidth, offsetX, offsetY };
canvas.renderAll();
positionFaceSelector();
});
const cancel = () => {
isFaceEditMode.value = false;
@@ -163,11 +207,12 @@
const gap = 15;
const padding = faceRect.padding ?? 0;
const rawBox = faceRect.getBoundingRect();
const { currentZoom, currentPositionX, currentPositionY } = assetViewerManager.zoomState;
const faceBox = {
left: rawBox.left - padding,
top: rawBox.top - padding,
width: rawBox.width + padding * 2,
height: rawBox.height + padding * 2,
left: (rawBox.left - padding) * currentZoom + currentPositionX,
top: (rawBox.top - padding) * currentZoom + currentPositionY,
width: (rawBox.width + padding * 2) * currentZoom,
height: (rawBox.height + padding * 2) * currentZoom,
};
const selectorWidth = faceSelectorEl.offsetWidth;
const chromeHeight = faceSelectorEl.offsetHeight - scrollableListEl.offsetHeight;
@@ -177,20 +222,21 @@
const clampTop = (top: number) => clamp(top, gap, containerHeight - selectorHeight - gap);
const clampLeft = (left: number) => clamp(left, gap, containerWidth - selectorWidth - gap);
const overlapArea = (position: { top: number; left: number }) => {
const selectorRight = position.left + selectorWidth;
const selectorBottom = position.top + selectorHeight;
const faceRight = faceBox.left + faceBox.width;
const faceBottom = faceBox.top + faceBox.height;
const faceRight = faceBox.left + faceBox.width;
const faceBottom = faceBox.top + faceBox.height;
const overlapX = Math.max(0, Math.min(selectorRight, faceRight) - Math.max(position.left, faceBox.left));
const overlapY = Math.max(0, Math.min(selectorBottom, faceBottom) - Math.max(position.top, faceBox.top));
const overlapArea = (position: { top: number; left: number }) => {
const overlapX = Math.max(
0,
Math.min(position.left + selectorWidth, faceRight) - Math.max(position.left, faceBox.left),
);
const overlapY = Math.max(
0,
Math.min(position.top + selectorHeight, faceBottom) - Math.max(position.top, faceBox.top),
);
return overlapX * overlapY;
};
const faceBottom = faceBox.top + faceBox.height;
const faceRight = faceBox.left + faceBox.width;
const positions = [
{ top: clampTop(faceBottom + gap), left: clampLeft(faceBox.left) },
{ top: clampTop(faceBox.top - selectorHeight - gap), left: clampLeft(faceBox.left) },
@@ -212,30 +258,71 @@
}
}
faceSelectorEl.style.top = `${bestPosition.top}px`;
faceSelectorEl.style.left = `${bestPosition.left}px`;
const containerRect = containerEl?.getBoundingClientRect();
const offsetTop = containerRect?.top ?? 0;
const offsetLeft = containerRect?.left ?? 0;
faceSelectorEl.style.top = `${bestPosition.top + offsetTop}px`;
faceSelectorEl.style.left = `${bestPosition.left + offsetLeft}px`;
scrollableListEl.style.height = `${listHeight}px`;
faceBoxPosition = { left: faceBox.left, top: faceBox.top, width: faceBox.width, height: faceBox.height };
faceBoxPosition = faceBox;
};
$effect(() => {
if (!canvas) {
return;
}
const { currentZoom, currentPositionX, currentPositionY } = assetViewerManager.zoomState;
canvas.setViewportTransform([currentZoom, 0, 0, currentZoom, currentPositionX, currentPositionY]);
canvas.renderAll();
positionFaceSelector();
});
$effect(() => {
const rect = faceRect;
if (rect) {
rect.on('moving', positionFaceSelector);
rect.on('scaling', positionFaceSelector);
const onUserMove = () => {
userMovedRect = true;
positionFaceSelector();
};
rect.on('moving', onUserMove);
rect.on('scaling', onUserMove);
return () => {
rect.off('moving', positionFaceSelector);
rect.off('scaling', positionFaceSelector);
rect.off('moving', onUserMove);
rect.off('scaling', onUserMove);
};
}
});
const trapEvents = (node: HTMLElement) => {
const stop = (e: Event) => e.stopPropagation();
const eventTypes = ['keydown', 'pointerdown', 'pointermove', 'pointerup'] as const;
for (const type of eventTypes) {
node.addEventListener(type, stop);
}
// Move to body so the selector isn't affected by the zoom transform on the container
document.body.append(node);
return {
destroy() {
for (const type of eventTypes) {
node.removeEventListener(type, stop);
}
node.remove();
},
};
};
const getFaceCroppedCoordinates = () => {
if (!faceRect || !htmlElement) {
return;
}
const { left, top, width, height } = faceRect.getBoundingRect();
const scaledWidth = faceRect.getScaledWidth();
const scaledHeight = faceRect.getScaledHeight();
const left = faceRect.left - scaledWidth / 2;
const top = faceRect.top - scaledHeight / 2;
const { offsetX, offsetY, contentWidth, contentHeight } = imageContentMetrics;
const natural = getNaturalSize(htmlElement);
@@ -249,8 +336,8 @@
imageHeight: natural.height,
x: Math.floor(imageX),
y: Math.floor(imageY),
width: Math.floor(width * scaleX),
height: Math.floor(height * scaleY),
width: Math.floor(scaledWidth * scaleX),
height: Math.floor(scaledHeight * scaleY),
};
};
@@ -281,16 +368,16 @@
});
await assetViewingStore.setAssetId(assetId);
isFaceEditMode.value = false;
} catch (error) {
handleError(error, 'Error tagging face');
} finally {
isFaceEditMode.value = false;
}
};
</script>
<div
id="face-editor-data"
bind:this={containerEl}
class="absolute start-0 top-0 z-5 h-full w-full overflow-hidden"
data-face-left={faceBoxPosition.left}
data-face-top={faceBoxPosition.top}
@@ -302,7 +389,9 @@
<div
id="face-selector"
bind:this={faceSelectorEl}
class="absolute top-[calc(50%-250px)] start-[calc(50%-125px)] max-w-[250px] w-[250px] bg-white dark:bg-immich-dark-gray dark:text-immich-dark-fg backdrop-blur-sm px-2 py-4 rounded-xl border border-gray-200 dark:border-gray-800 transition-[top,left] duration-200 ease-out"
class="fixed w-[min(200px,45vw)] min-w-48 bg-white dark:bg-immich-dark-gray dark:text-immich-dark-fg backdrop-blur-sm px-2 py-4 rounded-xl border border-gray-200 dark:border-gray-800 transition-[top,left] duration-200 ease-out"
use:trapEvents
onwheel={(e) => e.stopPropagation()}
>
<p class="text-center text-sm">{$t('select_person_to_tag')}</p>

View File

@@ -1,4 +1,5 @@
<script lang="ts">
import { mediaQueryManager } from '$lib/stores/media-query-manager.svelte';
import type { OcrBox } from '$lib/utils/ocr-utils';
import { calculateBoundingBoxMatrix, calculateFittedFontSize } from '$lib/utils/ocr-utils';
@@ -8,6 +9,7 @@
let { ocrBox }: Props = $props();
const isTouch = $derived(mediaQueryManager.pointerCoarse);
const dimensions = $derived(calculateBoundingBoxMatrix(ocrBox.points));
const transform = $derived(`matrix3d(${dimensions.matrix.join(',')})`);
@@ -15,13 +17,23 @@
calculateFittedFontSize(ocrBox.text, dimensions.width, dimensions.height, ocrBox.verticalMode) + 'px',
);
const handleSelectStart = (event: Event) => {
const target = event.currentTarget as HTMLElement;
requestAnimationFrame(() => {
const selection = globalThis.getSelection();
if (selection) {
selection.selectAllChildren(target);
}
});
};
const verticalStyle = $derived.by(() => {
switch (ocrBox.verticalMode) {
case 'cjk': {
return ' writing-mode: vertical-rl;';
return 'writing-mode: vertical-rl;';
}
case 'rotated': {
return ' writing-mode: vertical-rl; text-orientation: sideways;';
return 'writing-mode: vertical-rl; text-orientation: sideways;';
}
default: {
return '';
@@ -30,17 +42,20 @@
});
</script>
<div class="absolute left-0 top-0">
<div
class="absolute flex items-center justify-center text-transparent border-2 border-blue-500 bg-blue-500/10 pointer-events-auto cursor-text select-text transition-colors hover:z-1 hover:text-white hover:bg-black/60 hover:border-blue-600 hover:border-3 focus:z-1 focus:text-white focus:bg-black/60 focus:border-blue-600 focus:border-3 focus:outline-none {ocrBox.verticalMode ===
'none'
? 'px-2 py-1 whitespace-nowrap'
: 'px-1 py-2'}"
style="font-size: {fontSize}; width: {dimensions.width}px; height: {dimensions.height}px; transform: {transform}; transform-origin: 0 0;{verticalStyle}"
tabindex="0"
role="button"
aria-label={ocrBox.text}
>
{ocrBox.text}
</div>
<div
class="absolute left-0 top-0 flex items-center justify-center border-2 border-blue-500 pointer-events-auto cursor-text focus:z-1 focus:border-blue-600 focus:border-3 focus:outline-none {isTouch
? 'text-white bg-black/60 select-all'
: 'select-text text-transparent bg-blue-500/10 transition-colors hover:z-1 hover:text-white hover:bg-black/60 hover:border-blue-600 hover:border-3'} {ocrBox.verticalMode ===
'none'
? 'px-2 py-1 whitespace-nowrap'
: 'px-1 py-2'}"
style="font-size: {fontSize}; width: {dimensions.width}px; height: {dimensions.height}px; transform: {transform}; transform-origin: 0 0; touch-action: none; {verticalStyle}"
data-testid="ocr-box"
data-overlay-interactive
tabindex="0"
role="button"
aria-label={ocrBox.text}
onselectstart={isTouch ? handleSelectStart : undefined}
>
{ocrBox.text}
</div>

View File

@@ -105,12 +105,6 @@
const onPlaySlideshow = () => ($slideshowState = SlideshowState.PlaySlideshow);
$effect(() => {
if (isFaceEditMode.value && assetViewerManager.zoom > 1) {
onZoom();
}
});
// TODO move to action + command palette
const onCopyShortcut = (event: KeyboardEvent) => {
if (globalThis.getSelection()?.type === 'Range') {
@@ -151,6 +145,8 @@
$slideshowState !== SlideshowState.None && $slideshowLook === SlideshowLook.BlurredBackground && !!asset.thumbhash,
);
let adaptiveImage = $state<HTMLDivElement | undefined>();
const faceToNameMap = $derived.by(() => {
// eslint-disable-next-line svelte/prefer-svelte-reactivity
const map = new Map<Faces, string>();
@@ -159,40 +155,17 @@
map.set(face, person.name);
}
}
if (isFaceEditMode.value) {
for (const face of asset.unassignedFaces ?? []) {
map.set(face, '');
}
}
return map;
});
const faces = $derived(Array.from(faceToNameMap.keys()));
const handleImageMouseMove = (event: MouseEvent) => {
$boundingBoxesArray = [];
if (!assetViewerManager.imgRef || !element || isFaceEditMode.value || ocrManager.showOverlay) {
return;
}
const natural = getNaturalSize(assetViewerManager.imgRef);
const scaled = scaleToFit(natural, container);
const { currentZoom, currentPositionX, currentPositionY } = assetViewerManager.zoomState;
const contentOffsetX = (container.width - scaled.width) / 2;
const contentOffsetY = (container.height - scaled.height) / 2;
const containerRect = element.getBoundingClientRect();
const mouseX = (event.clientX - containerRect.left - contentOffsetX * currentZoom - currentPositionX) / currentZoom;
const mouseY = (event.clientY - containerRect.top - contentOffsetY * currentZoom - currentPositionY) / currentZoom;
const faceBoxes = getBoundingBox(faces, overlayMetrics);
for (const [index, box] of faceBoxes.entries()) {
if (mouseX >= box.left && mouseX <= box.left + box.width && mouseY >= box.top && mouseY <= box.top + box.height) {
$boundingBoxesArray.push(faces[index]);
}
}
};
const handleImageMouseLeave = () => {
$boundingBoxesArray = [];
};
const boundingBoxes = $derived(getBoundingBox(faces, overlayMetrics));
const activeBoundingBoxes = $derived(getBoundingBox($boundingBoxesArray, overlayMetrics));
</script>
<AssetViewerEvents {onCopy} {onZoom} />
@@ -213,9 +186,7 @@
bind:clientHeight={containerHeight}
role="presentation"
ondblclick={onZoom}
onmousemove={handleImageMouseMove}
onmouseleave={handleImageMouseLeave}
use:zoomImageAction={{ disabled: isFaceEditMode.value || ocrManager.showOverlay }}
use:zoomImageAction={{ zoomTarget: adaptiveImage }}
{...useSwipe((event) => onSwipe?.(event))}
>
<AdaptiveImage
@@ -233,6 +204,7 @@
onReady?.();
}}
bind:imgRef={assetViewerManager.imgRef}
bind:ref={adaptiveImage}
>
{#snippet backdrop()}
{#if blurredSlideshow}
@@ -243,20 +215,38 @@
{/if}
{/snippet}
{#snippet overlays()}
{#each getBoundingBox($boundingBoxesArray, overlayMetrics) as boundingbox, index (boundingbox.id)}
<div
class="absolute border-solid border-white border-3 rounded-lg"
style="top: {boundingbox.top}px; left: {boundingbox.left}px; height: {boundingbox.height}px; width: {boundingbox.width}px;"
></div>
{#if faceToNameMap.get($boundingBoxesArray[index])}
{#if !isFaceEditMode.value && !ocrManager.showOverlay}
{#each boundingBoxes as boundingbox, index (boundingbox.id)}
{@const face = faces[index]}
{@const name = faceToNameMap.get(face)}
<!-- svelte-ignore a11y_no_static_element_interactions -->
<div
class="absolute bg-white/90 text-black px-2 py-1 rounded text-sm font-medium whitespace-nowrap pointer-events-none shadow-lg"
style="top: {boundingbox.top + boundingbox.height + 4}px; left: {boundingbox.left +
boundingbox.width}px; transform: translateX(-100%);"
>
{faceToNameMap.get($boundingBoxesArray[index])}
</div>
{/if}
class="absolute pointer-events-auto outline-none rounded-lg"
style="top: {boundingbox.top}px; left: {boundingbox.left}px; height: {boundingbox.height}px; width: {boundingbox.width}px;"
aria-label="{$t('person')}: {name || $t('unknown')}"
onpointerenter={() => ($boundingBoxesArray = [face])}
onpointerleave={() => ($boundingBoxesArray = [])}
></div>
{/each}
{/if}
{#each activeBoundingBoxes as boundingbox (boundingbox.id)}
{@const matchedFace = faces.find((f) => f.id === boundingbox.id)}
{@const name = matchedFace ? faceToNameMap.get(matchedFace) : undefined}
<div
class="absolute border-solid border-white border-3 rounded-lg pointer-events-none"
style="top: {boundingbox.top}px; left: {boundingbox.left}px; height: {boundingbox.height}px; width: {boundingbox.width}px;"
>
{#if name}
<div
aria-hidden="true"
class="absolute bg-white/90 text-black px-2 py-1 rounded text-sm font-medium whitespace-nowrap shadow-lg"
style="top: {boundingbox.height + 4}px; right: 0;"
>
{name}
</div>
{/if}
</div>
{/each}
{#each ocrBoxes as ocrBox (ocrBox.id)}

View File

@@ -232,8 +232,8 @@
tabindex={index}
class="absolute start-0 top-0 h-22.5 w-22.5 cursor-default"
onfocus={() => ($boundingBoxesArray = [peopleWithFaces[index]])}
onmouseover={() => ($boundingBoxesArray = [peopleWithFaces[index]])}
onmouseleave={() => ($boundingBoxesArray = [])}
onpointerover={() => ($boundingBoxesArray = [peopleWithFaces[index]])}
onpointerleave={() => ($boundingBoxesArray = [])}
>
<div class="relative">
{#if selectedPersonToCreate[face.id]}

View File

@@ -1,6 +1,6 @@
import type { Faces } from '$lib/stores/people.store';
import type { ContentMetrics } from '$lib/utils/container-utils';
import { getBoundingBox } from '$lib/utils/people-utils';
import { getBoundingBox, scaleFaceRectOnResize, type FaceRectState, type ResizeContext } from '$lib/utils/people-utils';
const makeFace = (overrides: Partial<Faces> = {}): Faces => ({
id: 'face-1',
@@ -97,3 +97,96 @@ describe('getBoundingBox', () => {
expect(boxes[0].left).toBeLessThan(boxes[1].left);
});
});
describe('scaleFaceRectOnResize', () => {
const makeRect = (overrides: Partial<FaceRectState> = {}): FaceRectState => ({
left: 300,
top: 400,
scaleX: 1,
scaleY: 1,
...overrides,
});
const makePrevious = (overrides: Partial<ResizeContext> = {}): ResizeContext => ({
offsetX: 100,
offsetY: 50,
contentWidth: 800,
...overrides,
});
it('should preserve relative position when container doubles in size', () => {
const rect = makeRect({ left: 300, top: 250 });
const previous = makePrevious({ offsetX: 100, offsetY: 50, contentWidth: 800 });
const result = scaleFaceRectOnResize(rect, previous, 200, 100, 1600);
// imageRelLeft = (300 - 100) * 2 = 400, new left = 200 + 400 = 600
// imageRelTop = (250 - 50) * 2 = 400, new top = 100 + 400 = 500
expect(result.left).toBe(600);
expect(result.top).toBe(500);
expect(result.scaleX).toBe(2);
expect(result.scaleY).toBe(2);
});
it('should preserve relative position when container halves in size', () => {
const rect = makeRect({ left: 300, top: 250 });
const previous = makePrevious({ offsetX: 100, offsetY: 50, contentWidth: 800 });
const result = scaleFaceRectOnResize(rect, previous, 50, 25, 400);
// imageRelLeft = (300 - 100) * 0.5 = 100, new left = 50 + 100 = 150
// imageRelTop = (250 - 50) * 0.5 = 100, new top = 25 + 100 = 125
expect(result.left).toBe(150);
expect(result.top).toBe(125);
expect(result.scaleX).toBe(0.5);
expect(result.scaleY).toBe(0.5);
});
it('should handle no change in dimensions', () => {
const rect = makeRect({ left: 300, top: 250, scaleX: 1.5, scaleY: 1.5 });
const previous = makePrevious({ offsetX: 100, offsetY: 50, contentWidth: 800 });
const result = scaleFaceRectOnResize(rect, previous, 100, 50, 800);
expect(result.left).toBe(300);
expect(result.top).toBe(250);
expect(result.scaleX).toBe(1.5);
expect(result.scaleY).toBe(1.5);
});
it('should handle offset changes without content width change', () => {
const rect = makeRect({ left: 300, top: 250 });
const previous = makePrevious({ offsetX: 100, offsetY: 50, contentWidth: 800 });
const result = scaleFaceRectOnResize(rect, previous, 150, 75, 800);
// scale = 1, imageRelLeft = 200, imageRelTop = 200
// new left = 150 + 200 = 350, new top = 75 + 200 = 275
expect(result.left).toBe(350);
expect(result.top).toBe(275);
expect(result.scaleX).toBe(1);
expect(result.scaleY).toBe(1);
});
it('should compound existing scale factors', () => {
const rect = makeRect({ left: 300, top: 250, scaleX: 2, scaleY: 3 });
const previous = makePrevious({ contentWidth: 800 });
const result = scaleFaceRectOnResize(rect, previous, previous.offsetX, previous.offsetY, 1600);
expect(result.scaleX).toBe(4);
expect(result.scaleY).toBe(6);
});
it('should handle rect at image origin (top-left of content area)', () => {
const rect = makeRect({ left: 100, top: 50 });
const previous = makePrevious({ offsetX: 100, offsetY: 50, contentWidth: 800 });
const result = scaleFaceRectOnResize(rect, previous, 200, 100, 1600);
// imageRelLeft = (100 - 100) * 2 = 0, new left = 200
// imageRelTop = (50 - 50) * 2 = 0, new top = 100
expect(result.left).toBe(200);
expect(result.top).toBe(100);
});
});

View File

@@ -37,6 +37,38 @@ export const getBoundingBox = (faces: Faces[], metrics: ContentMetrics): Boundin
return boxes;
};
export type FaceRectState = {
left: number;
top: number;
scaleX: number;
scaleY: number;
};
export type ResizeContext = {
contentWidth: number;
offsetX: number;
offsetY: number;
};
export const scaleFaceRectOnResize = (
faceRect: FaceRectState,
previous: ResizeContext,
newOffsetX: number,
newOffsetY: number,
newContentWidth: number,
): FaceRectState => {
const scale = newContentWidth / previous.contentWidth;
const imageRelativeLeft = (faceRect.left - previous.offsetX) * scale;
const imageRelativeTop = (faceRect.top - previous.offsetY) * scale;
return {
left: newOffsetX + imageRelativeLeft,
top: newOffsetY + imageRelativeTop,
scaleX: faceRect.scaleX * scale,
scaleY: faceRect.scaleY * scale,
};
};
export const zoomImageToBase64 = async (
face: AssetFaceResponseDto,
assetId: string,