From 4c1e7288c71a001ea778b0b397b76eefd14218a3 Mon Sep 17 00:00:00 2001 From: midzelis Date: Sat, 7 Mar 2026 19:06:14 +0000 Subject: [PATCH] feat(web): image-relative overlays with zoom support for faces, OCR, and face editor --- e2e/src/ui/generators/timeline.ts | 2 +- .../ui/generators/timeline/rest-response.ts | 17 +- .../ui/mock-network/face-editor-network.ts | 84 ++++- e2e/src/ui/mock-network/ocr-network.ts | 55 ++++ .../asset-viewer/face-editor.e2e-spec.ts | 48 ++- .../asset-viewer/face-overlay.e2e-spec.ts | 264 +++++++++++++++ e2e/src/ui/specs/asset-viewer/ocr.e2e-spec.ts | 300 ++++++++++++++++++ web/src/lib/actions/zoom-image.ts | 139 ++++++-- web/src/lib/components/AdaptiveImage.svelte | 122 +++---- .../asset-viewer/asset-viewer.svelte | 26 +- .../asset-viewer/detail-panel.svelte | 6 +- .../face-editor/face-editor.svelte | 248 ++++++++++----- .../asset-viewer/ocr-bounding-box.svelte | 45 ++- .../photo-sphere-viewer-adapter.svelte | 6 +- .../asset-viewer/photo-viewer.svelte | 113 +++---- .../faces-page/person-side-panel.svelte | 4 +- web/src/lib/utils/container-utils.spec.ts | 174 +++++++++- web/src/lib/utils/container-utils.ts | 105 +++++- web/src/lib/utils/ocr-utils.spec.ts | 27 +- web/src/lib/utils/ocr-utils.ts | 25 +- web/src/lib/utils/people-utils.spec.ts | 150 ++++++--- web/src/lib/utils/people-utils.ts | 66 ++-- 22 files changed, 1632 insertions(+), 394 deletions(-) create mode 100644 e2e/src/ui/mock-network/ocr-network.ts create mode 100644 e2e/src/ui/specs/asset-viewer/face-overlay.e2e-spec.ts create mode 100644 e2e/src/ui/specs/asset-viewer/ocr.e2e-spec.ts diff --git a/e2e/src/ui/generators/timeline.ts b/e2e/src/ui/generators/timeline.ts index d4c91d667f..9f926f6b0c 100644 --- a/e2e/src/ui/generators/timeline.ts +++ b/e2e/src/ui/generators/timeline.ts @@ -20,7 +20,7 @@ export { toColumnarFormat, } from './timeline/rest-response'; -export type { Changes } from './timeline/rest-response'; +export type { Changes, FaceData } from './timeline/rest-response'; export { randomImage, randomImageFromString, randomPreview, randomThumbnail } from './timeline/images'; diff --git a/e2e/src/ui/generators/timeline/rest-response.ts b/e2e/src/ui/generators/timeline/rest-response.ts index 0c4bd06dc3..9baadda095 100644 --- a/e2e/src/ui/generators/timeline/rest-response.ts +++ b/e2e/src/ui/generators/timeline/rest-response.ts @@ -7,8 +7,10 @@ import { AssetVisibility, UserAvatarColor, type AlbumResponseDto, + type AssetFaceWithoutPersonResponseDto, type AssetResponseDto, type ExifResponseDto, + type PersonWithFacesResponseDto, type TimeBucketAssetResponseDto, type TimeBucketsResponseDto, type UserResponseDto, @@ -284,7 +286,16 @@ const createDefaultOwner = (ownerId: string) => { * Convert a TimelineAssetConfig to a full AssetResponseDto * This matches the response from GET /api/assets/:id */ -export function toAssetResponseDto(asset: MockTimelineAsset, owner?: UserResponseDto): AssetResponseDto { +export type FaceData = { + people: PersonWithFacesResponseDto[]; + unassignedFaces: AssetFaceWithoutPersonResponseDto[]; +}; + +export function toAssetResponseDto( + asset: MockTimelineAsset, + owner?: UserResponseDto, + faceData?: FaceData, +): AssetResponseDto { const now = new Date().toISOString(); // Default owner if not provided @@ -338,8 +349,8 @@ export function toAssetResponseDto(asset: MockTimelineAsset, owner?: UserRespons exifInfo, livePhotoVideoId: asset.livePhotoVideoId, tags: [], - people: [], - unassignedFaces: [], + people: faceData?.people ?? [], + unassignedFaces: faceData?.unassignedFaces ?? [], stack: asset.stack, isOffline: false, hasMetadata: true, diff --git a/e2e/src/ui/mock-network/face-editor-network.ts b/e2e/src/ui/mock-network/face-editor-network.ts index 778f04baf9..df384478d2 100644 --- a/e2e/src/ui/mock-network/face-editor-network.ts +++ b/e2e/src/ui/mock-network/face-editor-network.ts @@ -1,5 +1,6 @@ +import type { AssetFaceResponseDto, AssetResponseDto, PersonWithFacesResponseDto, SourceType } from '@immich/sdk'; import { BrowserContext } from '@playwright/test'; -import { randomThumbnail } from 'src/ui/generators/timeline'; +import { type FaceData, randomThumbnail } from 'src/ui/generators/timeline'; // Minimal valid H.264 MP4 (8x8px, 1 frame) that browsers can decode to get videoWidth/videoHeight const MINIMAL_MP4_BASE64 = @@ -125,3 +126,84 @@ export const setupFaceEditorMockApiRoutes = async ( }); }); }; + +export type MockFaceSpec = { + personId: string; + personName: string; + faceId: string; + boundingBoxX1: number; + boundingBoxY1: number; + boundingBoxX2: number; + boundingBoxY2: number; +}; + +const toPersonResponseDto = (spec: MockFaceSpec) => ({ + id: spec.personId, + name: spec.personName, + birthDate: null, + isHidden: false, + thumbnailPath: `/upload/thumbs/${spec.personId}.jpeg`, + updatedAt: '2025-01-01T00:00:00.000Z', +}); + +const toBoundingBox = (spec: MockFaceSpec, imageWidth: number, imageHeight: number) => ({ + id: spec.faceId, + imageWidth, + imageHeight, + boundingBoxX1: spec.boundingBoxX1, + boundingBoxY1: spec.boundingBoxY1, + boundingBoxX2: spec.boundingBoxX2, + boundingBoxY2: spec.boundingBoxY2, +}); + +export const createMockFaceData = (specs: MockFaceSpec[], imageWidth: number, imageHeight: number): FaceData => { + const people: PersonWithFacesResponseDto[] = specs.map((spec) => ({ + ...toPersonResponseDto(spec), + faces: [toBoundingBox(spec, imageWidth, imageHeight)], + })); + + return { people, unassignedFaces: [] }; +}; + +export const createMockAssetFaces = ( + specs: MockFaceSpec[], + imageWidth: number, + imageHeight: number, +): AssetFaceResponseDto[] => { + return specs.map((spec) => ({ + ...toBoundingBox(spec, imageWidth, imageHeight), + person: toPersonResponseDto(spec), + sourceType: 'machine-learning' as SourceType, + })); +}; + +export const setupGetFacesMockApiRoute = async (context: BrowserContext, faces: AssetFaceResponseDto[]) => { + await context.route('**/api/faces?*', async (route, request) => { + if (request.method() !== 'GET') { + return route.fallback(); + } + return route.fulfill({ + status: 200, + contentType: 'application/json', + json: faces, + }); + }); +}; + +export const setupFaceOverlayMockApiRoutes = async (context: BrowserContext, assetDto: AssetResponseDto) => { + await context.route('**/api/assets/*', async (route, request) => { + if (request.method() !== 'GET') { + return route.fallback(); + } + const url = new URL(request.url()); + const assetId = url.pathname.split('/').at(-1); + if (assetId !== assetDto.id) { + return route.fallback(); + } + return route.fulfill({ + status: 200, + contentType: 'application/json', + json: assetDto, + }); + }); +}; diff --git a/e2e/src/ui/mock-network/ocr-network.ts b/e2e/src/ui/mock-network/ocr-network.ts new file mode 100644 index 0000000000..3b1a2fe62e --- /dev/null +++ b/e2e/src/ui/mock-network/ocr-network.ts @@ -0,0 +1,55 @@ +import { faker } from '@faker-js/faker'; +import type { AssetOcrResponseDto } from '@immich/sdk'; +import { BrowserContext } from '@playwright/test'; + +export type MockOcrBox = { + text: string; + x1: number; + y1: number; + x2: number; + y2: number; + x3: number; + y3: number; + x4: number; + y4: number; +}; + +export const createMockOcrData = (assetId: string, boxes: MockOcrBox[]): AssetOcrResponseDto[] => { + return boxes.map((box) => ({ + id: faker.string.uuid(), + assetId, + x1: box.x1, + y1: box.y1, + x2: box.x2, + y2: box.y2, + x3: box.x3, + y3: box.y3, + x4: box.x4, + y4: box.y4, + boxScore: 0.95, + textScore: 0.9, + text: box.text, + })); +}; + +export const setupOcrMockApiRoutes = async ( + context: BrowserContext, + ocrDataByAssetId: Map, +) => { + await context.route('**/assets/*/ocr', async (route, request) => { + if (request.method() !== 'GET') { + return route.fallback(); + } + const url = new URL(request.url()); + const segments = url.pathname.split('/'); + const assetIdIndex = segments.indexOf('assets') + 1; + const assetId = segments[assetIdIndex]; + + const ocrData = ocrDataByAssetId.get(assetId) ?? []; + return route.fulfill({ + status: 200, + contentType: 'application/json', + json: ocrData, + }); + }); +}; diff --git a/e2e/src/ui/specs/asset-viewer/face-editor.e2e-spec.ts b/e2e/src/ui/specs/asset-viewer/face-editor.e2e-spec.ts index b1058f646e..a90312c579 100644 --- a/e2e/src/ui/specs/asset-viewer/face-editor.e2e-spec.ts +++ b/e2e/src/ui/specs/asset-viewer/face-editor.e2e-spec.ts @@ -149,7 +149,7 @@ test.describe('face-editor', () => { await expect(page.getByRole('dialog')).toBeVisible(); }); - test('Confirming tag calls createFace API and closes editor', async ({ page }) => { + test('Confirming tag calls createFace API with valid coordinates and closes editor', async ({ page }) => { const asset = selectRandom(fixture.assets, rng); await openFaceEditor(page, asset); @@ -163,8 +163,15 @@ test.describe('face-editor', () => { await expect(page.locator('#face-editor')).toBeHidden(); expect(faceCreateCapture.requests).toHaveLength(1); - expect(faceCreateCapture.requests[0].assetId).toBe(asset.id); - expect(faceCreateCapture.requests[0].personId).toBe(personToTag.id); + const request = faceCreateCapture.requests[0]; + expect(request.assetId).toBe(asset.id); + expect(request.personId).toBe(personToTag.id); + expect(request.x).toBeGreaterThanOrEqual(0); + expect(request.y).toBeGreaterThanOrEqual(0); + expect(request.width).toBeGreaterThan(0); + expect(request.height).toBeGreaterThan(0); + expect(request.x + request.width).toBeLessThanOrEqual(request.imageWidth); + expect(request.y + request.height).toBeLessThanOrEqual(request.imageHeight); }); test('Cancel button closes face editor', async ({ page }) => { @@ -282,4 +289,39 @@ test.describe('face-editor', () => { expect(afterDrag.left).toBeGreaterThan(beforeDrag.left + 50); expect(afterDrag.top).toBeGreaterThan(beforeDrag.top + 20); }); + + test('Cancel on confirmation dialog keeps face editor open', async ({ page }) => { + const asset = selectRandom(fixture.assets, rng); + await openFaceEditor(page, asset); + + const personToTag = mockPeople[0]; + await page.locator('#face-selector').getByText(personToTag.name).click(); + + await expect(page.getByRole('dialog')).toBeVisible(); + await page + .getByRole('dialog') + .getByRole('button', { name: /cancel/i }) + .click(); + + await expect(page.getByRole('dialog')).toBeHidden(); + await expect(page.locator('#face-selector')).toBeVisible(); + await expect(page.locator('#face-editor')).toBeVisible(); + expect(faceCreateCapture.requests).toHaveLength(0); + }); + + test('Clicking on face rect center does not reposition it', async ({ page }) => { + const asset = selectRandom(fixture.assets, rng); + await openFaceEditor(page, asset); + + const beforeClick = await getFaceBoxRect(page); + const centerX = beforeClick.left + beforeClick.width / 2; + const centerY = beforeClick.top + beforeClick.height / 2; + + await page.mouse.click(centerX, centerY); + await page.waitForTimeout(300); + + const afterClick = await getFaceBoxRect(page); + expect(Math.abs(afterClick.left - beforeClick.left)).toBeLessThan(3); + expect(Math.abs(afterClick.top - beforeClick.top)).toBeLessThan(3); + }); }); diff --git a/e2e/src/ui/specs/asset-viewer/face-overlay.e2e-spec.ts b/e2e/src/ui/specs/asset-viewer/face-overlay.e2e-spec.ts new file mode 100644 index 0000000000..c69503cf11 --- /dev/null +++ b/e2e/src/ui/specs/asset-viewer/face-overlay.e2e-spec.ts @@ -0,0 +1,264 @@ +import { expect, test } from '@playwright/test'; +import { toAssetResponseDto } from 'src/ui/generators/timeline'; +import { + createMockAssetFaces, + createMockFaceData, + createMockPeople, + type MockFaceSpec, + setupFaceEditorMockApiRoutes, + setupFaceOverlayMockApiRoutes, + setupGetFacesMockApiRoute, +} from 'src/ui/mock-network/face-editor-network'; +import { assetViewerUtils } from '../timeline/utils'; +import { ensureDetailPanelVisible, setupAssetViewerFixture } from './utils'; + +test.describe.configure({ mode: 'parallel' }); + +const FACE_SPECS: MockFaceSpec[] = [ + { + personId: 'person-alice', + personName: 'Alice Johnson', + faceId: 'face-alice', + boundingBoxX1: 1000, + boundingBoxY1: 500, + boundingBoxX2: 1500, + boundingBoxY2: 1200, + }, + { + personId: 'person-bob', + personName: 'Bob Smith', + faceId: 'face-bob', + boundingBoxX1: 2000, + boundingBoxY1: 800, + boundingBoxX2: 2400, + boundingBoxY2: 1600, + }, +]; + +const setupFaceMocks = async ( + context: import('@playwright/test').BrowserContext, + fixture: ReturnType, +) => { + const mockPeople = createMockPeople(4); + const faceData = createMockFaceData( + FACE_SPECS, + fixture.primaryAssetDto.width ?? 3000, + fixture.primaryAssetDto.height ?? 4000, + ); + const assetDtoWithFaces = toAssetResponseDto(fixture.primaryAsset, undefined, faceData); + await setupFaceOverlayMockApiRoutes(context, assetDtoWithFaces); + await setupFaceEditorMockApiRoutes(context, mockPeople, { requests: [] }); +}; + +test.describe('face overlay bounding boxes', () => { + const fixture = setupAssetViewerFixture(901); + + test.beforeEach(async ({ context }) => { + await setupFaceMocks(context, fixture); + }); + + test('face overlay divs render with correct aria labels', async ({ page }) => { + await page.goto(`/photos/${fixture.primaryAsset.id}`); + await assetViewerUtils.waitForViewerLoad(page, fixture.primaryAsset); + + const aliceOverlay = page.getByLabel('Person: Alice Johnson'); + const bobOverlay = page.getByLabel('Person: Bob Smith'); + + await expect(aliceOverlay).toBeVisible(); + await expect(bobOverlay).toBeVisible(); + }); + + test('face overlay shows border on hover', async ({ page }) => { + await page.goto(`/photos/${fixture.primaryAsset.id}`); + await assetViewerUtils.waitForViewerLoad(page, fixture.primaryAsset); + + const aliceOverlay = page.getByLabel('Person: Alice Johnson'); + await expect(aliceOverlay).toBeVisible(); + + const activeBorder = page.locator('[data-viewer-content] .border-solid.border-white.border-3'); + await expect(activeBorder).toHaveCount(0); + + await aliceOverlay.hover(); + await expect(activeBorder).toHaveCount(1); + }); + + test('face name tooltip appears on hover', async ({ page }) => { + await page.goto(`/photos/${fixture.primaryAsset.id}`); + await assetViewerUtils.waitForViewerLoad(page, fixture.primaryAsset); + + const aliceOverlay = page.getByLabel('Person: Alice Johnson'); + await expect(aliceOverlay).toBeVisible(); + + await aliceOverlay.hover(); + + const nameTooltip = page.locator('[data-viewer-content]').getByText('Alice Johnson'); + await expect(nameTooltip).toBeVisible(); + }); + + test('face overlays hidden in face edit mode', async ({ page }) => { + await page.goto(`/photos/${fixture.primaryAsset.id}`); + await assetViewerUtils.waitForViewerLoad(page, fixture.primaryAsset); + + const aliceOverlay = page.getByLabel('Person: Alice Johnson'); + await expect(aliceOverlay).toBeVisible(); + + await ensureDetailPanelVisible(page); + await page.getByLabel('Tag people').click(); + await page.locator('#face-selector').waitFor({ state: 'visible' }); + + await expect(aliceOverlay).toBeHidden(); + }); + + test('face overlay hover works after exiting face edit mode', async ({ page }) => { + await page.goto(`/photos/${fixture.primaryAsset.id}`); + await assetViewerUtils.waitForViewerLoad(page, fixture.primaryAsset); + + const aliceOverlay = page.getByLabel('Person: Alice Johnson'); + await expect(aliceOverlay).toBeVisible(); + + await ensureDetailPanelVisible(page); + await page.getByLabel('Tag people').click(); + await page.locator('#face-selector').waitFor({ state: 'visible' }); + await expect(aliceOverlay).toBeHidden(); + + await page.getByRole('button', { name: /cancel/i }).click(); + await expect(page.locator('#face-selector')).toBeHidden(); + + await expect(aliceOverlay).toBeVisible(); + + const activeBorder = page.locator('[data-viewer-content] .border-solid.border-white.border-3'); + await expect(activeBorder).toHaveCount(0); + await aliceOverlay.hover(); + await expect(activeBorder).toHaveCount(1); + }); +}); + +test.describe('zoom and face editor interaction', () => { + const fixture = setupAssetViewerFixture(902); + + test.beforeEach(async ({ context }) => { + await setupFaceMocks(context, fixture); + }); + + test('zoom is preserved when entering face edit mode', async ({ page }) => { + await page.goto(`/photos/${fixture.primaryAsset.id}`); + await assetViewerUtils.waitForViewerLoad(page, fixture.primaryAsset); + + const { width, height } = page.viewportSize()!; + await page.mouse.move(width / 2, height / 2); + await page.mouse.wheel(0, -1); + + const imgLocator = page.locator('[data-viewer-content] img[draggable="false"]'); + await expect(async () => { + const transform = await imgLocator.evaluate((element) => { + return getComputedStyle(element.closest('[style*="transform"]') ?? element).transform; + }); + expect(transform).not.toBe('none'); + expect(transform).not.toBe(''); + }).toPass({ timeout: 2000 }); + + await ensureDetailPanelVisible(page); + await page.getByLabel('Tag people').click(); + await page.locator('#face-selector').waitFor({ state: 'visible' }); + + await expect(page.locator('#face-editor')).toBeVisible(); + + const afterTransform = await imgLocator.evaluate((element) => { + return getComputedStyle(element.closest('[style*="transform"]') ?? element).transform; + }); + expect(afterTransform).not.toBe('none'); + }); +}); + +test.describe('face overlay via detail panel interaction', () => { + const fixture = setupAssetViewerFixture(903); + + test.beforeEach(async ({ context }) => { + await setupFaceMocks(context, fixture); + }); + + test('hovering person in detail panel shows face overlay border', async ({ page }) => { + await page.goto(`/photos/${fixture.primaryAsset.id}`); + await assetViewerUtils.waitForViewerLoad(page, fixture.primaryAsset); + + await ensureDetailPanelVisible(page); + + const personLink = page.locator('#detail-panel a').filter({ hasText: 'Alice Johnson' }); + await expect(personLink).toBeVisible(); + + const activeBorder = page.locator('[data-viewer-content] .border-solid.border-white.border-3'); + await expect(activeBorder).toHaveCount(0); + + await personLink.hover(); + await expect(activeBorder).toHaveCount(1); + }); + + test('touch pointer on person in detail panel shows face overlay border', async ({ page }) => { + await page.goto(`/photos/${fixture.primaryAsset.id}`); + await assetViewerUtils.waitForViewerLoad(page, fixture.primaryAsset); + + await ensureDetailPanelVisible(page); + + const personLink = page.locator('#detail-panel a').filter({ hasText: 'Alice Johnson' }); + await expect(personLink).toBeVisible(); + + const activeBorder = page.locator('[data-viewer-content] .border-solid.border-white.border-3'); + await expect(activeBorder).toHaveCount(0); + + // Simulate a touch-type pointerover (the fix changed from onmouseover to onpointerover, + // which fires for touch pointers unlike mouseover) + await personLink.dispatchEvent('pointerover', { pointerType: 'touch' }); + await expect(activeBorder).toHaveCount(1); + }); + + test('hovering person in detail panel works after exiting face edit mode', async ({ page }) => { + await page.goto(`/photos/${fixture.primaryAsset.id}`); + await assetViewerUtils.waitForViewerLoad(page, fixture.primaryAsset); + + await ensureDetailPanelVisible(page); + await page.getByLabel('Tag people').click(); + await page.locator('#face-selector').waitFor({ state: 'visible' }); + + await page.getByRole('button', { name: /cancel/i }).click(); + await expect(page.locator('#face-selector')).toBeHidden(); + + const personLink = page.locator('#detail-panel a').filter({ hasText: 'Alice Johnson' }); + await expect(personLink).toBeVisible(); + + const activeBorder = page.locator('[data-viewer-content] .border-solid.border-white.border-3'); + await personLink.hover(); + await expect(activeBorder).toHaveCount(1); + }); +}); + +test.describe('face overlay via edit faces side panel', () => { + const fixture = setupAssetViewerFixture(904); + + test.beforeEach(async ({ context }) => { + await setupFaceMocks(context, fixture); + + const assetFaces = createMockAssetFaces( + FACE_SPECS, + fixture.primaryAssetDto.width ?? 3000, + fixture.primaryAssetDto.height ?? 4000, + ); + await setupGetFacesMockApiRoute(context, assetFaces); + }); + + test('hovering person in edit faces panel shows face overlay border', async ({ page }) => { + await page.goto(`/photos/${fixture.primaryAsset.id}`); + await assetViewerUtils.waitForViewerLoad(page, fixture.primaryAsset); + + await ensureDetailPanelVisible(page); + await page.getByLabel('Edit people').click(); + + const faceThumbnail = page.locator('section div[role="button"]').first(); + await expect(faceThumbnail).toBeVisible(); + + const activeBorder = page.locator('[data-viewer-content] .border-solid.border-white.border-3'); + await expect(activeBorder).toHaveCount(0); + + await faceThumbnail.hover(); + await expect(activeBorder).toHaveCount(1); + }); +}); diff --git a/e2e/src/ui/specs/asset-viewer/ocr.e2e-spec.ts b/e2e/src/ui/specs/asset-viewer/ocr.e2e-spec.ts new file mode 100644 index 0000000000..5a442a6081 --- /dev/null +++ b/e2e/src/ui/specs/asset-viewer/ocr.e2e-spec.ts @@ -0,0 +1,300 @@ +import type { AssetOcrResponseDto, AssetResponseDto } from '@immich/sdk'; +import { expect, test } from '@playwright/test'; +import { toAssetResponseDto } from 'src/ui/generators/timeline'; +import { + createMockStack, + createMockStackAsset, + MockStack, + setupBrokenAssetMockApiRoutes, +} from 'src/ui/mock-network/broken-asset-network'; +import { createMockOcrData, setupOcrMockApiRoutes } from 'src/ui/mock-network/ocr-network'; +import { assetViewerUtils } from '../timeline/utils'; +import { setupAssetViewerFixture } from './utils'; + +test.describe.configure({ mode: 'parallel' }); + +const PRIMARY_OCR_BOXES = [ + { text: 'Hello World', x1: 0.1, y1: 0.1, x2: 0.4, y2: 0.1, x3: 0.4, y3: 0.15, x4: 0.1, y4: 0.15 }, + { text: 'Immich Photo', x1: 0.2, y1: 0.3, x2: 0.6, y2: 0.3, x3: 0.6, y3: 0.36, x4: 0.2, y4: 0.36 }, +]; + +const SECONDARY_OCR_BOXES = [ + { text: 'Second Asset Text', x1: 0.15, y1: 0.2, x2: 0.55, y2: 0.2, x3: 0.55, y3: 0.26, x4: 0.15, y4: 0.26 }, +]; + +test.describe('OCR bounding boxes', () => { + const fixture = setupAssetViewerFixture(920); + + test.beforeEach(async ({ context }) => { + const primaryAssetDto = toAssetResponseDto(fixture.primaryAsset); + const ocrDataByAssetId = new Map([ + [primaryAssetDto.id, createMockOcrData(primaryAssetDto.id, PRIMARY_OCR_BOXES)], + ]); + + await setupOcrMockApiRoutes(context, ocrDataByAssetId); + }); + + test('OCR bounding boxes appear when clicking OCR button', async ({ page }) => { + await page.goto(`/photos/${fixture.primaryAsset.id}`); + await assetViewerUtils.waitForViewerLoad(page, fixture.primaryAsset); + + const ocrButton = page.getByLabel('Text recognition'); + await expect(ocrButton).toBeVisible(); + await ocrButton.click(); + + const ocrBoxes = page.locator('[data-viewer-content] [data-testid="ocr-box"]'); + await expect(ocrBoxes).toHaveCount(2); + + await expect(ocrBoxes.nth(0)).toContainText('Hello World'); + await expect(ocrBoxes.nth(1)).toContainText('Immich Photo'); + }); + + test('OCR bounding boxes toggle off on second click', async ({ page }) => { + await page.goto(`/photos/${fixture.primaryAsset.id}`); + await assetViewerUtils.waitForViewerLoad(page, fixture.primaryAsset); + + const ocrButton = page.getByLabel('Text recognition'); + await ocrButton.click(); + await expect(page.locator('[data-viewer-content] [data-testid="ocr-box"]').first()).toBeVisible(); + + await ocrButton.click(); + await expect(page.locator('[data-viewer-content] [data-testid="ocr-box"]')).toHaveCount(0); + }); +}); + +test.describe('OCR with stacked assets', () => { + const fixture = setupAssetViewerFixture(921); + let mockStack: MockStack; + let primaryAssetDto: AssetResponseDto; + let secondAssetDto: AssetResponseDto; + + test.beforeAll(async () => { + primaryAssetDto = toAssetResponseDto(fixture.primaryAsset); + secondAssetDto = createMockStackAsset(fixture.adminUserId); + secondAssetDto.originalFileName = 'second-ocr-asset.jpg'; + mockStack = createMockStack(primaryAssetDto, [secondAssetDto], new Set()); + }); + + test.beforeEach(async ({ context }) => { + await setupBrokenAssetMockApiRoutes(context, mockStack); + + const ocrDataByAssetId = new Map([ + [primaryAssetDto.id, createMockOcrData(primaryAssetDto.id, PRIMARY_OCR_BOXES)], + [secondAssetDto.id, createMockOcrData(secondAssetDto.id, SECONDARY_OCR_BOXES)], + ]); + + await setupOcrMockApiRoutes(context, ocrDataByAssetId); + }); + + test('different OCR boxes shown for different stacked assets', async ({ page }) => { + await page.goto(`/photos/${fixture.primaryAsset.id}`); + await assetViewerUtils.waitForViewerLoad(page, fixture.primaryAsset); + + const ocrButton = page.getByLabel('Text recognition'); + await expect(ocrButton).toBeVisible(); + await ocrButton.click(); + + const ocrBoxes = page.locator('[data-viewer-content] [data-testid="ocr-box"]'); + await expect(ocrBoxes).toHaveCount(2); + await expect(ocrBoxes.nth(0)).toContainText('Hello World'); + + const stackThumbnails = page.locator('#stack-slideshow [data-asset]'); + await expect(stackThumbnails).toHaveCount(2); + await stackThumbnails.nth(1).click(); + + // refreshOcr() clears showOverlay when switching assets, so re-enable it + await expect(ocrBoxes).toHaveCount(0); + await expect(ocrButton).toBeVisible(); + await ocrButton.click(); + + await expect(ocrBoxes).toHaveCount(1); + await expect(ocrBoxes.first()).toContainText('Second Asset Text'); + }); +}); + +test.describe('OCR boxes and zoom', () => { + const fixture = setupAssetViewerFixture(922); + + test.beforeEach(async ({ context }) => { + const primaryAssetDto = toAssetResponseDto(fixture.primaryAsset); + const ocrDataByAssetId = new Map([ + [primaryAssetDto.id, createMockOcrData(primaryAssetDto.id, PRIMARY_OCR_BOXES)], + ]); + + await setupOcrMockApiRoutes(context, ocrDataByAssetId); + }); + + test('OCR boxes scale with zoom', async ({ page }) => { + await page.goto(`/photos/${fixture.primaryAsset.id}`); + await assetViewerUtils.waitForViewerLoad(page, fixture.primaryAsset); + + const ocrButton = page.getByLabel('Text recognition'); + await expect(ocrButton).toBeVisible(); + await ocrButton.click(); + + const ocrBox = page.locator('[data-viewer-content] [data-testid="ocr-box"]').first(); + await expect(ocrBox).toBeVisible(); + + const initialBox = await ocrBox.boundingBox(); + expect(initialBox).toBeTruthy(); + + const { width, height } = page.viewportSize()!; + await page.mouse.move(width / 2, height / 2); + await page.mouse.wheel(0, -3); + + await expect(async () => { + const zoomedBox = await ocrBox.boundingBox(); + expect(zoomedBox).toBeTruthy(); + expect(zoomedBox!.width).toBeGreaterThan(initialBox!.width); + expect(zoomedBox!.height).toBeGreaterThan(initialBox!.height); + }).toPass({ timeout: 2000 }); + }); +}); + +test.describe('OCR text interaction', () => { + const fixture = setupAssetViewerFixture(923); + + test.beforeEach(async ({ context }) => { + const primaryAssetDto = toAssetResponseDto(fixture.primaryAsset); + const ocrDataByAssetId = new Map([ + [primaryAssetDto.id, createMockOcrData(primaryAssetDto.id, PRIMARY_OCR_BOXES)], + ]); + + await setupOcrMockApiRoutes(context, ocrDataByAssetId); + }); + + test('OCR text box has data-overlay-interactive attribute', async ({ page }) => { + await page.goto(`/photos/${fixture.primaryAsset.id}`); + await assetViewerUtils.waitForViewerLoad(page, fixture.primaryAsset); + + await page.getByLabel('Text recognition').click(); + + const ocrBox = page.locator('[data-viewer-content] [data-testid="ocr-box"]').first(); + await expect(ocrBox).toBeVisible(); + await expect(ocrBox).toHaveAttribute('data-overlay-interactive'); + }); + + test('OCR text box receives focus on click', async ({ page }) => { + await page.goto(`/photos/${fixture.primaryAsset.id}`); + await assetViewerUtils.waitForViewerLoad(page, fixture.primaryAsset); + + await page.getByLabel('Text recognition').click(); + + const ocrBox = page.locator('[data-viewer-content] [data-testid="ocr-box"]').first(); + await expect(ocrBox).toBeVisible(); + + await ocrBox.click(); + await expect(ocrBox).toBeFocused(); + }); + + test('dragging on OCR text box does not trigger image pan', async ({ page }) => { + await page.goto(`/photos/${fixture.primaryAsset.id}`); + await assetViewerUtils.waitForViewerLoad(page, fixture.primaryAsset); + + await page.getByLabel('Text recognition').click(); + + const ocrBox = page.locator('[data-viewer-content] [data-testid="ocr-box"]').first(); + await expect(ocrBox).toBeVisible(); + + const imgLocator = page.locator('[data-viewer-content] img[draggable="false"]'); + const initialTransform = await imgLocator.evaluate((element) => { + return getComputedStyle(element.closest('[style*="transform"]') ?? element).transform; + }); + + const box = await ocrBox.boundingBox(); + expect(box).toBeTruthy(); + const centerX = box!.x + box!.width / 2; + const centerY = box!.y + box!.height / 2; + + await page.mouse.move(centerX, centerY); + await page.mouse.down(); + await page.mouse.move(centerX + 50, centerY + 30, { steps: 5 }); + await page.mouse.up(); + + const afterTransform = await imgLocator.evaluate((element) => { + return getComputedStyle(element.closest('[style*="transform"]') ?? element).transform; + }); + expect(afterTransform).toBe(initialTransform); + }); + + test('split touch gesture across zoom container does not trigger zoom', async ({ page }) => { + await page.goto(`/photos/${fixture.primaryAsset.id}`); + await assetViewerUtils.waitForViewerLoad(page, fixture.primaryAsset); + + await page.getByLabel('Text recognition').click(); + const ocrBox = page.locator('[data-viewer-content] [data-testid="ocr-box"]').first(); + await expect(ocrBox).toBeVisible(); + + const imgLocator = page.locator('[data-viewer-content] img[draggable="false"]'); + const initialTransform = await imgLocator.evaluate((element) => { + return getComputedStyle(element.closest('[style*="transform"]') ?? element).transform; + }); + + const viewerContent = page.locator('[data-viewer-content]'); + const viewerBox = await viewerContent.boundingBox(); + expect(viewerBox).toBeTruthy(); + + // Dispatch a synthetic split gesture: one touch inside the viewer, one outside + await page.evaluate( + ({ viewerCenterX, viewerCenterY, outsideY }) => { + const viewer = document.querySelector('[data-viewer-content]'); + if (!viewer) { + return; + } + + const createTouch = (id: number, x: number, y: number) => { + return new Touch({ + identifier: id, + target: viewer, + clientX: x, + clientY: y, + }); + }; + + const insideTouch = createTouch(0, viewerCenterX, viewerCenterY); + const outsideTouch = createTouch(1, viewerCenterX, outsideY); + + const touchStartEvent = new TouchEvent('touchstart', { + touches: [insideTouch, outsideTouch], + targetTouches: [insideTouch], + changedTouches: [insideTouch, outsideTouch], + bubbles: true, + cancelable: true, + }); + + const touchMoveEvent = new TouchEvent('touchmove', { + touches: [createTouch(0, viewerCenterX, viewerCenterY - 30), createTouch(1, viewerCenterX, outsideY + 30)], + targetTouches: [createTouch(0, viewerCenterX, viewerCenterY - 30)], + changedTouches: [ + createTouch(0, viewerCenterX, viewerCenterY - 30), + createTouch(1, viewerCenterX, outsideY + 30), + ], + bubbles: true, + cancelable: true, + }); + + const touchEndEvent = new TouchEvent('touchend', { + touches: [], + targetTouches: [], + changedTouches: [insideTouch, outsideTouch], + bubbles: true, + cancelable: true, + }); + + viewer.dispatchEvent(touchStartEvent); + viewer.dispatchEvent(touchMoveEvent); + viewer.dispatchEvent(touchEndEvent); + }, + { + viewerCenterX: viewerBox!.x + viewerBox!.width / 2, + viewerCenterY: viewerBox!.y + viewerBox!.height / 2, + outsideY: 10, // near the top of the page, outside the viewer + }, + ); + + const afterTransform = await imgLocator.evaluate((element) => { + return getComputedStyle(element.closest('[style*="transform"]') ?? element).transform; + }); + expect(afterTransform).toBe(initialTransform); + }); +}); diff --git a/web/src/lib/actions/zoom-image.ts b/web/src/lib/actions/zoom-image.ts index 35c3d3a106..1616f56cbc 100644 --- a/web/src/lib/actions/zoom-image.ts +++ b/web/src/lib/actions/zoom-image.ts @@ -1,11 +1,17 @@ import { assetViewerManager } from '$lib/managers/asset-viewer-manager.svelte'; import { createZoomImageWheel } from '@zoom-image/core'; -export const zoomImageAction = (node: HTMLElement, options?: { disabled?: boolean }) => { +type TouchEventLike = { + touches: Iterable<{ clientX: number; clientY: number }> & { length: number }; + targetTouches: ArrayLike; +}; +const asTouchEvent = (event: Event) => event as unknown as TouchEventLike; + +export const zoomImageAction = (node: HTMLElement, options?: { zoomTarget?: HTMLElement }) => { const zoomInstance = createZoomImageWheel(node, { maxZoom: 10, initialState: assetViewerManager.zoomState, - zoomTarget: null, + zoomTarget: options?.zoomTarget, }); const unsubscribes = [ @@ -13,47 +19,124 @@ export const zoomImageAction = (node: HTMLElement, options?: { disabled?: boolea zoomInstance.subscribe(({ state }) => assetViewerManager.onZoomChange(state)), ]; - const onInteractionStart = (event: Event) => { - if (options?.disabled) { - event.stopImmediatePropagation(); + const controller = new AbortController(); + const { signal } = controller; + + node.addEventListener('pointerdown', () => assetViewerManager.cancelZoomAnimation(), { capture: true, signal }); + + // Intercept events in capture phase to prevent zoom-image from seeing interactions on + // overlay elements (e.g. OCR text boxes), preserving browser defaults like text selection. + const isOverlayEvent = (event: Event) => !!(event.target as HTMLElement).closest('[data-overlay-interactive]'); + const isOverlayAtPoint = (x: number, y: number) => + !!document.elementFromPoint(x, y)?.closest('[data-overlay-interactive]'); + + // Pointer event interception: track pointers that start on overlays and intercept the entire gesture. + const overlayPointers = new Set(); + const interceptedPointers = new Set(); + const interceptOverlayPointerDown = (event: PointerEvent) => { + if (isOverlayEvent(event) || isOverlayAtPoint(event.clientX, event.clientY)) { + overlayPointers.add(event.pointerId); + interceptedPointers.add(event.pointerId); + event.stopPropagation(); + } else if (overlayPointers.size > 0) { + // Split gesture (e.g. pinch with one finger on overlay) — intercept entirely. + interceptedPointers.add(event.pointerId); + event.stopPropagation(); } - assetViewerManager.cancelZoomAnimation(); }; + const interceptOverlayPointerEvent = (event: PointerEvent) => { + if (interceptedPointers.has(event.pointerId)) { + event.stopPropagation(); + } + }; + const interceptOverlayPointerEnd = (event: PointerEvent) => { + overlayPointers.delete(event.pointerId); + if (interceptedPointers.delete(event.pointerId)) { + event.stopPropagation(); + } + }; + node.addEventListener('pointerdown', interceptOverlayPointerDown, { capture: true, signal }); + node.addEventListener('pointermove', interceptOverlayPointerEvent, { capture: true, signal }); + node.addEventListener('pointerup', interceptOverlayPointerEnd, { capture: true, signal }); + node.addEventListener('pointerleave', interceptOverlayPointerEnd, { capture: true, signal }); - node.addEventListener('wheel', onInteractionStart, { capture: true }); - node.addEventListener('pointerdown', onInteractionStart, { capture: true }); + // Touch event interception for overlay touches or split gestures (pinch across container boundary). + // Once intercepted, stays intercepted until all fingers are lifted. + let touchGestureIntercepted = false; + const interceptOverlayTouchEvent = (event: Event) => { + if (touchGestureIntercepted) { + event.stopPropagation(); + return; + } + const { touches, targetTouches } = asTouchEvent(event); + if (touches && targetTouches) { + if (touches.length > targetTouches.length) { + touchGestureIntercepted = true; + event.stopPropagation(); + return; + } + for (const touch of touches) { + if (isOverlayAtPoint(touch.clientX, touch.clientY)) { + touchGestureIntercepted = true; + event.stopPropagation(); + return; + } + } + } else if (isOverlayEvent(event)) { + event.stopPropagation(); + } + }; + const resetTouchGesture = (event: Event) => { + const { touches } = asTouchEvent(event); + if (touches.length === 0) { + touchGestureIntercepted = false; + } + }; + node.addEventListener('touchstart', interceptOverlayTouchEvent, { capture: true, signal }); + node.addEventListener('touchmove', interceptOverlayTouchEvent, { capture: true, signal }); + node.addEventListener('touchend', resetTouchGesture, { capture: true, signal }); - // Suppress Safari's synthetic dblclick on double-tap. Without this, zoom-image's touchstart - // handler zooms to maxZoom (10x), then Safari's synthetic dblclick triggers photo-viewer's - // handler which conflicts. Chrome does not fire synthetic dblclick on touch. + // Wheel and dblclick interception on overlay elements. + // Dblclick also intercepted for all touch double-taps (Safari fires synthetic dblclick + // on double-tap, which conflicts with zoom-image's touch zoom handler). let lastPointerWasTouch = false; - const trackPointerType = (event: PointerEvent) => { - lastPointerWasTouch = event.pointerType === 'touch'; - }; - const suppressTouchDblClick = (event: MouseEvent) => { - if (lastPointerWasTouch) { - event.stopImmediatePropagation(); - } - }; - node.addEventListener('pointerdown', trackPointerType, { capture: true }); - node.addEventListener('dblclick', suppressTouchDblClick, { capture: true }); + node.addEventListener('pointerdown', (event) => (lastPointerWasTouch = event.pointerType === 'touch'), { + capture: true, + signal, + }); + node.addEventListener( + 'wheel', + (event) => { + if (isOverlayEvent(event)) { + event.stopPropagation(); + } + }, + { capture: true, signal }, + ); + node.addEventListener( + 'dblclick', + (event) => { + if (lastPointerWasTouch || isOverlayEvent(event)) { + event.stopImmediatePropagation(); + } + }, + { capture: true, signal }, + ); - // Allow zoomed content to render outside the container bounds node.style.overflow = 'visible'; - // Prevent browser handling of touch gestures so zoom-image can manage them node.style.touchAction = 'none'; return { - update(newOptions?: { disabled?: boolean }) { + update(newOptions?: { zoomTarget?: HTMLElement }) { options = newOptions; + if (newOptions?.zoomTarget !== undefined) { + zoomInstance.setState({ zoomTarget: newOptions.zoomTarget }); + } }, destroy() { + controller.abort(); for (const unsubscribe of unsubscribes) { unsubscribe(); } - node.removeEventListener('wheel', onInteractionStart, { capture: true }); - node.removeEventListener('pointerdown', onInteractionStart, { capture: true }); - node.removeEventListener('pointerdown', trackPointerType, { capture: true }); - node.removeEventListener('dblclick', suppressTouchDblClick, { capture: true }); zoomInstance.cleanup(); }, }; diff --git a/web/src/lib/components/AdaptiveImage.svelte b/web/src/lib/components/AdaptiveImage.svelte index fad4d49d1b..90c9328cf8 100644 --- a/web/src/lib/components/AdaptiveImage.svelte +++ b/web/src/lib/components/AdaptiveImage.svelte @@ -7,7 +7,7 @@ import { assetViewerManager } from '$lib/managers/asset-viewer-manager.svelte'; import { getAssetUrls } from '$lib/utils'; import { AdaptiveImageLoader, type QualityList } from '$lib/utils/adaptive-image-loader.svelte'; - import { scaleToCover, scaleToFit } from '$lib/utils/container-utils'; + import { scaleToCover, scaleToFit, type Size } from '$lib/utils/container-utils'; import { getAltText } from '$lib/utils/thumbnail-util'; import { toTimelineAsset } from '$lib/utils/timeline-util'; import type { AssetResponseDto, SharedLinkResponseDto } from '@immich/sdk'; @@ -17,10 +17,7 @@ asset: AssetResponseDto; sharedLink?: SharedLinkResponseDto; objectFit?: 'contain' | 'cover'; - container: { - width: number; - height: number; - }; + container: Size; onUrlChange?: (url: string) => void; onImageReady?: () => void; onError?: () => void; @@ -149,81 +146,66 @@ (quality.preview === 'success' ? previewElement : undefined) ?? (quality.thumbnail === 'success' ? thumbnailElement : undefined); }); - - const zoomTransform = $derived.by(() => { - const { currentZoom, currentPositionX, currentPositionY } = assetViewerManager.zoomState; - if (currentZoom === 1 && currentPositionX === 0 && currentPositionY === 0) { - return undefined; - } - return `translate(${currentPositionX}px, ${currentPositionY}px) scale(${currentZoom})`; - });
{@render backdrop?.()} - -
-
- {#if show.alphaBackground} - - {/if} +
+ {#if show.alphaBackground} + + {/if} - {#if show.thumbhash} - {#if asset.thumbhash} - - - {:else if show.spinner} - - {/if} + {#if show.thumbhash} + {#if asset.thumbhash} + + + {:else if show.spinner} + {/if} + {/if} - {#if show.thumbnail} - - {/if} + {#if show.thumbnail} + + {/if} - {#if show.brokenAsset} - - {/if} + {#if show.brokenAsset} + + {/if} - {#if show.preview} - - {/if} + {#if show.preview} + + {/if} - {#if show.original} - - {/if} -
+ {#if show.original} + + {/if}
diff --git a/web/src/lib/components/asset-viewer/asset-viewer.svelte b/web/src/lib/components/asset-viewer/asset-viewer.svelte index 3f7b048c8f..572af0ff75 100644 --- a/web/src/lib/components/asset-viewer/asset-viewer.svelte +++ b/web/src/lib/components/asset-viewer/asset-viewer.svelte @@ -176,6 +176,7 @@ onDestroy(() => { activityManager.reset(); assetViewerManager.closeEditor(); + isFaceEditMode.value = false; syncAssetViewerOpenClass(false); preloadManager.destroy(); }); @@ -358,15 +359,18 @@ } }; + const refreshOcr = async () => { + ocrManager.clear(); + if (sharedLink) { + return; + } + + await ocrManager.getAssetOcr(asset.id); + }; + const refresh = async () => { await refreshStack(); - ocrManager.clear(); - if (!sharedLink) { - if (previewStackedAsset) { - await ocrManager.getAssetOcr(previewStackedAsset.id); - } - await ocrManager.getAssetOcr(asset.id); - } + await refreshOcr(); }; $effect(() => { @@ -375,6 +379,12 @@ untrack(() => handlePromiseError(refresh())); }); + $effect(() => { + // eslint-disable-next-line @typescript-eslint/no-unused-expressions + previewStackedAsset; + untrack(() => ocrManager.clear()); + }); + let lastCursor = $state(); $effect(() => { @@ -460,7 +470,7 @@
diff --git a/web/src/lib/components/asset-viewer/detail-panel.svelte b/web/src/lib/components/asset-viewer/detail-panel.svelte index e80d376f57..aaeafedafe 100644 --- a/web/src/lib/components/asset-viewer/detail-panel.svelte +++ b/web/src/lib/components/asset-viewer/detail-panel.svelte @@ -7,6 +7,7 @@ import { timeToLoadTheMap } from '$lib/constants'; import { assetViewerManager } from '$lib/managers/asset-viewer-manager.svelte'; import { authManager } from '$lib/managers/auth-manager.svelte'; + import { eventManager } from '$lib/managers/event-manager.svelte'; import { featureFlagsManager } from '$lib/managers/feature-flags-manager.svelte'; import AssetChangeDateModal from '$lib/modals/AssetChangeDateModal.svelte'; import { Route } from '$lib/route'; @@ -122,6 +123,7 @@ const handleRefreshPeople = async () => { asset = await getAssetInfo({ id: asset.id }); + eventManager.emit('AssetUpdate', asset); showEditFaces = false; }; @@ -233,8 +235,8 @@ href={Route.viewPerson(person, { previousRoute })} onfocus={() => ($boundingBoxesArray = people[index].faces)} onblur={() => ($boundingBoxesArray = [])} - onmouseover={() => ($boundingBoxesArray = people[index].faces)} - onmouseleave={() => ($boundingBoxesArray = [])} + onpointerover={() => ($boundingBoxesArray = people[index].faces)} + onpointerleave={() => ($boundingBoxesArray = [])} >
import ImageThumbnail from '$lib/components/assets/thumbnail/image-thumbnail.svelte'; + import { assetViewerManager } from '$lib/managers/asset-viewer-manager.svelte'; import { assetViewingStore } from '$lib/stores/asset-viewing.store'; import { isFaceEditMode } from '$lib/stores/face-edit.svelte'; import { getPeopleThumbnailUrl } from '$lib/utils'; - import { getNaturalSize, scaleToFit } from '$lib/utils/container-utils'; + import { computeContentMetrics, getNaturalSize, mapContentRectToNatural } from '$lib/utils/container-utils'; import { handleError } from '$lib/utils/handle-error'; + import { scaleFaceRectOnResize, type ResizeContext } from '$lib/utils/people-utils'; import { createFace, getAllPeople, type PersonResponseDto } from '@immich/sdk'; import { shortcut } from '$lib/actions/shortcut'; import { Button, Input, modalManager, toastManager } from '@immich/ui'; @@ -23,6 +25,7 @@ let { htmlElement, containerWidth, containerHeight, assetId }: Props = $props(); let canvasEl: HTMLCanvasElement | undefined = $state(); + let containerEl: HTMLDivElement | undefined = $state(); let canvas: Canvas | undefined = $state(); let faceRect: Rect | undefined = $state(); let faceSelectorEl: HTMLDivElement | undefined = $state(); @@ -32,6 +35,8 @@ let searchTerm = $state(''); let faceBoxPosition = $state({ left: 0, top: 0, width: 0, height: 0 }); + let userMovedRect = false; + let previousMetrics: ResizeContext | null = null; let filteredCandidates = $derived( searchTerm @@ -57,7 +62,8 @@ return; } - canvas = new Canvas(canvasEl); + canvas = new Canvas(canvasEl, { width: containerWidth, height: containerHeight }); + canvas.selection = false; configureControlStyle(); // eslint-disable-next-line tscompat/tscompat @@ -75,66 +81,100 @@ canvas.add(faceRect); canvas.setActiveObject(faceRect); - setDefaultFaceRectanglePosition(faceRect); }; - onMount(async () => { - setupCanvas(); - await getPeople(); + onMount(() => { + void getPeople(); }); - const imageContentMetrics = $derived.by(() => { - const natural = getNaturalSize(htmlElement); - const container = { width: containerWidth, height: containerHeight }; - const { width: contentWidth, height: contentHeight } = scaleToFit(natural, container); - return { - contentWidth, - contentHeight, - offsetX: (containerWidth - contentWidth) / 2, - offsetY: (containerHeight - contentHeight) / 2, - }; - }); - - const setDefaultFaceRectanglePosition = (faceRect: Rect) => { - const { offsetX, offsetY } = imageContentMetrics; - - faceRect.set({ - top: offsetY + 200, - left: offsetX + 200, - }); - - faceRect.setCoords(); - positionFaceSelector(); - }; - $effect(() => { if (!canvas) { return; } - canvas.setDimensions({ - width: containerWidth, - height: containerHeight, - }); + const upperCanvas = canvas.upperCanvasEl; + const controller = new AbortController(); + const { signal } = controller; - if (!faceRect) { + const stopIfOnTarget = (event: PointerEvent) => { + if (canvas?.findTarget(event).target) { + event.stopPropagation(); + } + }; + + const handlePointerDown = (event: PointerEvent) => { + if (!canvas) { + return; + } + if (canvas.findTarget(event).target) { + event.stopPropagation(); + return; + } + if (faceRect) { + event.stopPropagation(); + const pointer = canvas.getScenePoint(event); + faceRect.set({ left: pointer.x, top: pointer.y }); + faceRect.setCoords(); + userMovedRect = true; + canvas.renderAll(); + positionFaceSelector(); + } + }; + + upperCanvas.addEventListener('pointerdown', handlePointerDown, { signal }); + upperCanvas.addEventListener('pointermove', stopIfOnTarget, { signal }); + upperCanvas.addEventListener('pointerup', stopIfOnTarget, { signal }); + + return () => { + controller.abort(); + }; + }); + + const imageContentMetrics = $derived( + computeContentMetrics(getNaturalSize(htmlElement), { width: containerWidth, height: containerHeight }), + ); + + const setDefaultFaceRectanglePosition = (faceRect: Rect) => { + const { offsetX, offsetY, contentWidth, contentHeight } = imageContentMetrics; + + faceRect.set({ + top: offsetY + contentHeight / 2 - 56, + left: offsetX + contentWidth / 2 - 56, + }); + }; + + $effect(() => { + const { offsetX, offsetY, contentWidth } = imageContentMetrics; + + if (contentWidth === 0) { return; } - if (!isFaceRectIntersectingCanvas(faceRect, canvas)) { + const isFirstRun = previousMetrics === null; + + if (isFirstRun && !canvas) { + setupCanvas(); + } + + if (!canvas || !faceRect) { + return; + } + + if (!isFirstRun) { + canvas.setDimensions({ width: containerWidth, height: containerHeight }); + } + + if (!isFirstRun && userMovedRect && previousMetrics) { + faceRect.set(scaleFaceRectOnResize(faceRect, previousMetrics, { contentWidth, offsetX, offsetY })); + } else { setDefaultFaceRectanglePosition(faceRect); } - }); - const isFaceRectIntersectingCanvas = (faceRect: Rect, canvas: Canvas) => { - const faceBox = faceRect.getBoundingRect(); - return !( - 0 > faceBox.left + faceBox.width || - 0 > faceBox.top + faceBox.height || - canvas.width < faceBox.left || - canvas.height < faceBox.top - ); - }; + faceRect.setCoords(); + previousMetrics = { contentWidth, offsetX, offsetY }; + canvas.renderAll(); + positionFaceSelector(); + }); const cancel = () => { isFaceEditMode.value = false; @@ -164,11 +204,12 @@ const gap = 15; const padding = faceRect.padding ?? 0; const rawBox = faceRect.getBoundingRect(); + const { currentZoom, currentPositionX, currentPositionY } = assetViewerManager.zoomState; const faceBox = { - left: rawBox.left - padding, - top: rawBox.top - padding, - width: rawBox.width + padding * 2, - height: rawBox.height + padding * 2, + left: (rawBox.left - padding) * currentZoom + currentPositionX, + top: (rawBox.top - padding) * currentZoom + currentPositionY, + width: (rawBox.width + padding * 2) * currentZoom, + height: (rawBox.height + padding * 2) * currentZoom, }; const selectorWidth = faceSelectorEl.offsetWidth; const chromeHeight = faceSelectorEl.offsetHeight - scrollableListEl.offsetHeight; @@ -178,20 +219,21 @@ const clampTop = (top: number) => clamp(top, gap, containerHeight - selectorHeight - gap); const clampLeft = (left: number) => clamp(left, gap, containerWidth - selectorWidth - gap); - const overlapArea = (position: { top: number; left: number }) => { - const selectorRight = position.left + selectorWidth; - const selectorBottom = position.top + selectorHeight; - const faceRight = faceBox.left + faceBox.width; - const faceBottom = faceBox.top + faceBox.height; + const faceRight = faceBox.left + faceBox.width; + const faceBottom = faceBox.top + faceBox.height; - const overlapX = Math.max(0, Math.min(selectorRight, faceRight) - Math.max(position.left, faceBox.left)); - const overlapY = Math.max(0, Math.min(selectorBottom, faceBottom) - Math.max(position.top, faceBox.top)); + const overlapArea = (position: { top: number; left: number }) => { + const overlapX = Math.max( + 0, + Math.min(position.left + selectorWidth, faceRight) - Math.max(position.left, faceBox.left), + ); + const overlapY = Math.max( + 0, + Math.min(position.top + selectorHeight, faceBottom) - Math.max(position.top, faceBox.top), + ); return overlapX * overlapY; }; - const faceBottom = faceBox.top + faceBox.height; - const faceRight = faceBox.left + faceBox.width; - const positions = [ { top: clampTop(faceBottom + gap), left: clampLeft(faceBox.left) }, { top: clampTop(faceBox.top - selectorHeight - gap), left: clampLeft(faceBox.left) }, @@ -213,45 +255,89 @@ } } - faceSelectorEl.style.top = `${bestPosition.top}px`; - faceSelectorEl.style.left = `${bestPosition.left}px`; + const containerRect = containerEl?.getBoundingClientRect(); + const offsetTop = containerRect?.top ?? 0; + const offsetLeft = containerRect?.left ?? 0; + faceSelectorEl.style.top = `${bestPosition.top + offsetTop}px`; + faceSelectorEl.style.left = `${bestPosition.left + offsetLeft}px`; scrollableListEl.style.height = `${listHeight}px`; - faceBoxPosition = { left: faceBox.left, top: faceBox.top, width: faceBox.width, height: faceBox.height }; + faceBoxPosition = faceBox; }; + $effect(() => { + if (!canvas) { + return; + } + + const { currentZoom, currentPositionX, currentPositionY } = assetViewerManager.zoomState; + canvas.setViewportTransform([currentZoom, 0, 0, currentZoom, currentPositionX, currentPositionY]); + canvas.renderAll(); + positionFaceSelector(); + }); + $effect(() => { const rect = faceRect; if (rect) { - rect.on('moving', positionFaceSelector); - rect.on('scaling', positionFaceSelector); + const onUserMove = () => { + userMovedRect = true; + positionFaceSelector(); + }; + rect.on('moving', onUserMove); + rect.on('scaling', onUserMove); return () => { - rect.off('moving', positionFaceSelector); - rect.off('scaling', positionFaceSelector); + rect.off('moving', onUserMove); + rect.off('scaling', onUserMove); }; } }); + const trapEvents = (node: HTMLElement) => { + const stop = (e: Event) => e.stopPropagation(); + const eventTypes = ['keydown', 'pointerdown', 'pointermove', 'pointerup'] as const; + for (const type of eventTypes) { + node.addEventListener(type, stop); + } + + // Move to body so the selector isn't affected by the zoom transform on the container + document.body.append(node); + + return { + destroy() { + for (const type of eventTypes) { + node.removeEventListener(type, stop); + } + node.remove(); + }, + }; + }; + const getFaceCroppedCoordinates = () => { if (!faceRect || !htmlElement) { return; } - const { left, top, width, height } = faceRect.getBoundingRect(); - const { offsetX, offsetY, contentWidth, contentHeight } = imageContentMetrics; + const scaledWidth = faceRect.getScaledWidth(); + const scaledHeight = faceRect.getScaledHeight(); const natural = getNaturalSize(htmlElement); - const scaleX = natural.width / contentWidth; - const scaleY = natural.height / contentHeight; - const imageX = (left - offsetX) * scaleX; - const imageY = (top - offsetY) * scaleY; + const imageRect = mapContentRectToNatural( + { + left: faceRect.left - scaledWidth / 2, + top: faceRect.top - scaledHeight / 2, + width: scaledWidth, + height: scaledHeight, + }, + imageContentMetrics, + natural, + ); return { imageWidth: natural.width, imageHeight: natural.height, - x: Math.floor(imageX), - y: Math.floor(imageY), - width: Math.floor(width * scaleX), - height: Math.floor(height * scaleY), + x: Math.floor(imageRect.left), + y: Math.floor(imageRect.top), + width: Math.floor(imageRect.width), + height: Math.floor(imageRect.height), }; }; @@ -282,10 +368,9 @@ }); await assetViewingStore.setAssetId(assetId); + isFaceEditMode.value = false; } catch (error) { handleError(error, 'Error tagging face'); - } finally { - isFaceEditMode.value = false; } }; @@ -294,6 +379,7 @@
e.stopPropagation()} >

{$t('select_person_to_tag')}

diff --git a/web/src/lib/components/asset-viewer/ocr-bounding-box.svelte b/web/src/lib/components/asset-viewer/ocr-bounding-box.svelte index d5551b9cc5..bc16a2b72d 100644 --- a/web/src/lib/components/asset-viewer/ocr-bounding-box.svelte +++ b/web/src/lib/components/asset-viewer/ocr-bounding-box.svelte @@ -1,4 +1,5 @@ -
-
- {ocrBox.text} -
+
+ {ocrBox.text}
diff --git a/web/src/lib/components/asset-viewer/photo-sphere-viewer-adapter.svelte b/web/src/lib/components/asset-viewer/photo-sphere-viewer-adapter.svelte index 926383d9c2..d46b5e0dc1 100644 --- a/web/src/lib/components/asset-viewer/photo-sphere-viewer-adapter.svelte +++ b/web/src/lib/components/asset-viewer/photo-sphere-viewer-adapter.svelte @@ -128,10 +128,8 @@ } const boxes = getOcrBoundingBoxes(ocrData, { - contentWidth: viewer.state.textureData.panoData.croppedWidth, - contentHeight: viewer.state.textureData.panoData.croppedHeight, - offsetX: 0, - offsetY: 0, + width: viewer.state.textureData.panoData.croppedWidth, + height: viewer.state.textureData.panoData.croppedHeight, }); for (const [index, box] of boxes.entries()) { diff --git a/web/src/lib/components/asset-viewer/photo-viewer.svelte b/web/src/lib/components/asset-viewer/photo-viewer.svelte index 4a6a02cb4a..369cb3b4d6 100644 --- a/web/src/lib/components/asset-viewer/photo-viewer.svelte +++ b/web/src/lib/components/asset-viewer/photo-viewer.svelte @@ -14,7 +14,7 @@ import { SlideshowLook, SlideshowState, slideshowStore } from '$lib/stores/slideshow.store'; import { handlePromiseError } from '$lib/utils'; import { canCopyImageToClipboard, copyImageToClipboard } from '$lib/utils/asset-utils'; - import { getNaturalSize, scaleToFit, type ContentMetrics } from '$lib/utils/container-utils'; + import { getNaturalSize, scaleToFit, type Size } from '$lib/utils/container-utils'; import { handleError } from '$lib/utils/handle-error'; import { getOcrBoundingBoxes } from '$lib/utils/ocr-utils'; import { getBoundingBox } from '$lib/utils/people-utils'; @@ -67,23 +67,15 @@ height: containerHeight, }); - const overlayMetrics = $derived.by((): ContentMetrics => { + const overlaySize = $derived.by((): Size => { if (!assetViewerManager.imgRef || !visibleImageReady) { - return { contentWidth: 0, contentHeight: 0, offsetX: 0, offsetY: 0 }; + return { width: 0, height: 0 }; } - const natural = getNaturalSize(assetViewerManager.imgRef); - const scaled = scaleToFit(natural, { width: containerWidth, height: containerHeight }); - - return { - contentWidth: scaled.width, - contentHeight: scaled.height, - offsetX: 0, - offsetY: 0, - }; + return scaleToFit(getNaturalSize(assetViewerManager.imgRef), { width: containerWidth, height: containerHeight }); }); - const ocrBoxes = $derived(ocrManager.showOverlay ? getOcrBoundingBoxes(ocrManager.data, overlayMetrics) : []); + const ocrBoxes = $derived(ocrManager.showOverlay ? getOcrBoundingBoxes(ocrManager.data, overlaySize) : []); const onCopy = async () => { if (!canCopyImageToClipboard() || !assetViewerManager.imgRef) { @@ -105,12 +97,6 @@ const onPlaySlideshow = () => ($slideshowState = SlideshowState.PlaySlideshow); - $effect(() => { - if (isFaceEditMode.value && assetViewerManager.zoom > 1) { - onZoom(); - } - }); - // TODO move to action + command palette const onCopyShortcut = (event: KeyboardEvent) => { if (globalThis.getSelection()?.type === 'Range') { @@ -151,6 +137,8 @@ $slideshowState !== SlideshowState.None && $slideshowLook === SlideshowLook.BlurredBackground && !!asset.thumbhash, ); + let adaptiveImage = $state(); + const faceToNameMap = $derived.by(() => { // eslint-disable-next-line svelte/prefer-svelte-reactivity const map = new Map(); @@ -159,40 +147,18 @@ map.set(face, person.name); } } + if (isFaceEditMode.value) { + for (const face of asset.unassignedFaces ?? []) { + map.set(face, ''); + } + } return map; }); + // Array needed for indexed access in the template (faces[index]) const faces = $derived(Array.from(faceToNameMap.keys())); - - const handleImageMouseMove = (event: MouseEvent) => { - $boundingBoxesArray = []; - if (!assetViewerManager.imgRef || !element || isFaceEditMode.value || ocrManager.showOverlay) { - return; - } - - const natural = getNaturalSize(assetViewerManager.imgRef); - const scaled = scaleToFit(natural, container); - const { currentZoom, currentPositionX, currentPositionY } = assetViewerManager.zoomState; - - const contentOffsetX = (container.width - scaled.width) / 2; - const contentOffsetY = (container.height - scaled.height) / 2; - - const containerRect = element.getBoundingClientRect(); - const mouseX = (event.clientX - containerRect.left - contentOffsetX * currentZoom - currentPositionX) / currentZoom; - const mouseY = (event.clientY - containerRect.top - contentOffsetY * currentZoom - currentPositionY) / currentZoom; - - const faceBoxes = getBoundingBox(faces, overlayMetrics); - - for (const [index, box] of faceBoxes.entries()) { - if (mouseX >= box.left && mouseX <= box.left + box.width && mouseY >= box.top && mouseY <= box.top + box.height) { - $boundingBoxesArray.push(faces[index]); - } - } - }; - - const handleImageMouseLeave = () => { - $boundingBoxesArray = []; - }; + const boundingBoxes = $derived(getBoundingBox(faces, overlaySize)); + const activeBoundingBoxes = $derived(boundingBoxes.filter((box) => $boundingBoxesArray.some((f) => f.id === box.id))); @@ -213,9 +179,7 @@ bind:clientHeight={containerHeight} role="presentation" ondblclick={onZoom} - onmousemove={handleImageMouseMove} - onmouseleave={handleImageMouseLeave} - use:zoomImageAction={{ disabled: isFaceEditMode.value || ocrManager.showOverlay }} + use:zoomImageAction={{ zoomTarget: adaptiveImage }} {...useSwipe((event) => onSwipe?.(event))} > {#snippet backdrop()} {#if blurredSlideshow} @@ -243,20 +208,38 @@ {/if} {/snippet} {#snippet overlays()} - {#each getBoundingBox($boundingBoxesArray, overlayMetrics) as boundingbox, index (boundingbox.id)} -
- {#if faceToNameMap.get($boundingBoxesArray[index])} + {#if !isFaceEditMode.value} + {#each boundingBoxes as boundingbox, index (boundingbox.id)} + {@const face = faces[index]} + {@const name = faceToNameMap.get(face)} +
- {faceToNameMap.get($boundingBoxesArray[index])} -
- {/if} + class="absolute pointer-events-auto outline-none rounded-lg" + style="top: {boundingbox.top}px; left: {boundingbox.left}px; height: {boundingbox.height}px; width: {boundingbox.width}px;" + aria-label="{$t('person')}: {name || $t('unknown')}" + onpointerenter={() => ($boundingBoxesArray = [face])} + onpointerleave={() => ($boundingBoxesArray = [])} + >
+ {/each} + {/if} + + {#each activeBoundingBoxes as boundingbox (boundingbox.id)} + {@const face = faces.find((f) => f.id === boundingbox.id)} + {@const name = face ? faceToNameMap.get(face) : undefined} +
+ {#if name} + + {/if} +
{/each} {#each ocrBoxes as ocrBox (ocrBox.id)} diff --git a/web/src/lib/components/faces-page/person-side-panel.svelte b/web/src/lib/components/faces-page/person-side-panel.svelte index cad29706a4..a9f91c733c 100644 --- a/web/src/lib/components/faces-page/person-side-panel.svelte +++ b/web/src/lib/components/faces-page/person-side-panel.svelte @@ -232,8 +232,8 @@ tabindex={index} class="absolute start-0 top-0 h-22.5 w-22.5 cursor-default" onfocus={() => ($boundingBoxesArray = [peopleWithFaces[index]])} - onmouseover={() => ($boundingBoxesArray = [peopleWithFaces[index]])} - onmouseleave={() => ($boundingBoxesArray = [])} + onpointerover={() => ($boundingBoxesArray = [peopleWithFaces[index]])} + onpointerleave={() => ($boundingBoxesArray = [])} >
{#if selectedPersonToCreate[face.id]} diff --git a/web/src/lib/utils/container-utils.spec.ts b/web/src/lib/utils/container-utils.spec.ts index 802ed24e40..c7da5c0364 100644 --- a/web/src/lib/utils/container-utils.spec.ts +++ b/web/src/lib/utils/container-utils.spec.ts @@ -1,4 +1,14 @@ -import { getContentMetrics, getNaturalSize, scaleToFit } from '$lib/utils/container-utils'; +import { + computeContentMetrics, + getContentMetrics, + getNaturalSize, + mapContentRectToNatural, + mapContentToNatural, + mapNormalizedRectToContent, + mapNormalizedToContent, + scaleToCover, + scaleToFit, +} from '$lib/utils/container-utils'; const mockImage = (props: { naturalWidth: number; @@ -92,3 +102,165 @@ describe('getNaturalSize', () => { expect(getNaturalSize(video)).toEqual({ width: 1920, height: 1080 }); }); }); + +describe('scaleToCover', () => { + it('should scale up to cover container when image is smaller', () => { + expect(scaleToCover({ width: 400, height: 300 }, { width: 800, height: 600 })).toEqual({ + width: 800, + height: 600, + }); + }); + + it('should use height scale when image is wider than container', () => { + expect(scaleToCover({ width: 2000, height: 1000 }, { width: 800, height: 600 })).toEqual({ + width: 1200, + height: 600, + }); + }); + + it('should use width scale when image is taller than container', () => { + expect(scaleToCover({ width: 1000, height: 2000 }, { width: 800, height: 600 })).toEqual({ + width: 800, + height: 1600, + }); + }); +}); + +describe('computeContentMetrics', () => { + it('should compute metrics with scaleToFit by default', () => { + expect(computeContentMetrics({ width: 2000, height: 1000 }, { width: 800, height: 600 })).toEqual({ + contentWidth: 800, + contentHeight: 400, + offsetX: 0, + offsetY: 100, + }); + }); + + it('should accept scaleToCover as scale function', () => { + expect(computeContentMetrics({ width: 2000, height: 1000 }, { width: 800, height: 600 }, scaleToCover)).toEqual({ + contentWidth: 1200, + contentHeight: 600, + offsetX: -200, + offsetY: 0, + }); + }); + + it('should compute zero offsets when aspect ratios match', () => { + expect(computeContentMetrics({ width: 1600, height: 900 }, { width: 800, height: 450 })).toEqual({ + contentWidth: 800, + contentHeight: 450, + offsetX: 0, + offsetY: 0, + }); + }); +}); + +// Coordinate space glossary: +// +// "Normalized" coordinates: values in the 0–1 range, where (0,0) is the top-left +// of the image and (1,1) is the bottom-right. Resolution-independent. +// +// "Content" coordinates: pixel positions within the container, after the image +// has been scaled (scaleToFit/scaleToCover) and offset (centered). This is what +// CSS and DOM layout use for positioning overlays like face boxes and OCR text. +// +// "Natural" coordinates: pixel positions in the original image file at its full +// resolution (e.g. 4000×3000). Used when cropping or drawing on the source image. +// +// "Metadata pixel space": the coordinate system used by face detection / OCR +// models, where positions are in pixels relative to the image dimensions stored +// in metadata (face.imageWidth/imageHeight). These may differ from the natural +// dimensions if the image was resized. To convert to normalized, divide by +// the metadata dimensions (e.g. face.boundingBoxX1 / face.imageWidth). + +describe('mapNormalizedToContent', () => { + const metrics = { contentWidth: 800, contentHeight: 400, offsetX: 0, offsetY: 100 }; + + it('should map top-left corner', () => { + expect(mapNormalizedToContent({ x: 0, y: 0 }, metrics)).toEqual({ x: 0, y: 100 }); + }); + + it('should map bottom-right corner', () => { + expect(mapNormalizedToContent({ x: 1, y: 1 }, metrics)).toEqual({ x: 800, y: 500 }); + }); + + it('should map center point', () => { + expect(mapNormalizedToContent({ x: 0.5, y: 0.5 }, metrics)).toEqual({ x: 400, y: 300 }); + }); + + it('should apply offsets correctly for letterboxed content', () => { + const letterboxed = { contentWidth: 300, contentHeight: 600, offsetX: 250, offsetY: 0 }; + expect(mapNormalizedToContent({ x: 0, y: 0 }, letterboxed)).toEqual({ x: 250, y: 0 }); + expect(mapNormalizedToContent({ x: 1, y: 1 }, letterboxed)).toEqual({ x: 550, y: 600 }); + }); +}); + +describe('mapContentToNatural', () => { + const metrics = { contentWidth: 800, contentHeight: 400, offsetX: 0, offsetY: 100 }; + const natural = { width: 4000, height: 2000 }; + + it('should map content origin to natural origin', () => { + expect(mapContentToNatural({ x: 0, y: 100 }, metrics, natural)).toEqual({ x: 0, y: 0 }); + }); + + it('should map content bottom-right to natural bottom-right', () => { + expect(mapContentToNatural({ x: 800, y: 500 }, metrics, natural)).toEqual({ x: 4000, y: 2000 }); + }); + + it('should map content center to natural center', () => { + expect(mapContentToNatural({ x: 400, y: 300 }, metrics, natural)).toEqual({ x: 2000, y: 1000 }); + }); + + it('should be the inverse of mapNormalizedToContent', () => { + const normalized = { x: 0.3, y: 0.7 }; + const contentPoint = mapNormalizedToContent(normalized, metrics); + const naturalPoint = mapContentToNatural(contentPoint, metrics, natural); + expect(naturalPoint.x).toBeCloseTo(normalized.x * natural.width); + expect(naturalPoint.y).toBeCloseTo(normalized.y * natural.height); + }); +}); + +describe('mapNormalizedRectToContent', () => { + const metrics = { contentWidth: 800, contentHeight: 400, offsetX: 0, offsetY: 100 }; + + it('should map a normalized rect to content pixel coordinates', () => { + const rect = mapNormalizedRectToContent({ x: 0.25, y: 0.25 }, { x: 0.75, y: 0.75 }, metrics); + expect(rect).toEqual({ left: 200, top: 200, width: 400, height: 200 }); + }); + + it('should map full image rect', () => { + const rect = mapNormalizedRectToContent({ x: 0, y: 0 }, { x: 1, y: 1 }, metrics); + expect(rect).toEqual({ left: 0, top: 100, width: 800, height: 400 }); + }); + + it('should handle letterboxed content with horizontal offsets', () => { + const letterboxed = { contentWidth: 300, contentHeight: 600, offsetX: 250, offsetY: 0 }; + const rect = mapNormalizedRectToContent({ x: 0, y: 0 }, { x: 1, y: 1 }, letterboxed); + expect(rect).toEqual({ left: 250, top: 0, width: 300, height: 600 }); + }); +}); + +describe('mapContentRectToNatural', () => { + const metrics = { contentWidth: 800, contentHeight: 400, offsetX: 0, offsetY: 100 }; + const natural = { width: 4000, height: 2000 }; + + it('should map a content rect to natural image coordinates', () => { + const rect = mapContentRectToNatural({ left: 200, top: 200, width: 400, height: 200 }, metrics, natural); + expect(rect).toEqual({ left: 1000, top: 500, width: 2000, height: 1000 }); + }); + + it('should map full content rect to full natural dimensions', () => { + const rect = mapContentRectToNatural({ left: 0, top: 100, width: 800, height: 400 }, metrics, natural); + expect(rect).toEqual({ left: 0, top: 0, width: 4000, height: 2000 }); + }); + + it('should be the inverse of mapNormalizedRectToContent', () => { + const normalized = { topLeft: { x: 0.2, y: 0.3 }, bottomRight: { x: 0.8, y: 0.9 } }; + const contentRect = mapNormalizedRectToContent(normalized.topLeft, normalized.bottomRight, metrics); + const naturalRect = mapContentRectToNatural(contentRect, metrics, natural); + expect(naturalRect.left).toBeCloseTo(normalized.topLeft.x * natural.width); + expect(naturalRect.top).toBeCloseTo(normalized.topLeft.y * natural.height); + expect(naturalRect.width).toBeCloseTo((normalized.bottomRight.x - normalized.topLeft.x) * natural.width); + expect(naturalRect.height).toBeCloseTo((normalized.bottomRight.y - normalized.topLeft.y) * natural.height); + }); +}); diff --git a/web/src/lib/utils/container-utils.ts b/web/src/lib/utils/container-utils.ts index ffa2fae769..da16cc3df6 100644 --- a/web/src/lib/utils/container-utils.ts +++ b/web/src/lib/utils/container-utils.ts @@ -1,3 +1,27 @@ +// Coordinate spaces used throughout the viewer: +// +// "Normalized": 0–1 range, (0,0) = top-left, (1,1) = bottom-right. Resolution-independent. +// Example: OCR coordinates, or face coords after dividing by metadata dimensions. +// +// "Content": pixel position within the container after scaling (scaleToFit/scaleToCover) +// and centering. Used for DOM overlay positioning (face boxes, OCR text). +// +// "Natural": pixel position in the original full-resolution image file (e.g. 4000×3000). +// Used when cropping or drawing on the source image. +// +// "Metadata pixel space": coordinates from face detection / OCR models, in pixels relative +// to face.imageWidth/imageHeight. Divide by those dimensions to get normalized coords. + +export interface Point { + x: number; + y: number; +} + +export interface Size { + width: number; + height: number; +} + export interface ContentMetrics { contentWidth: number; contentHeight: number; @@ -5,10 +29,7 @@ export interface ContentMetrics { offsetY: number; } -export const scaleToCover = ( - dimensions: { width: number; height: number }, - container: { width: number; height: number }, -): { width: number; height: number } => { +export const scaleToCover = (dimensions: Size, container: Size): Size => { const scaleX = container.width / dimensions.width; const scaleY = container.height / dimensions.height; const scale = Math.max(scaleX, scaleY); @@ -18,10 +39,7 @@ export const scaleToCover = ( }; }; -export const scaleToFit = ( - dimensions: { width: number; height: number }, - container: { width: number; height: number }, -): { width: number; height: number } => { +export const scaleToFit = (dimensions: Size, container: Size): Size => { const scaleX = container.width / dimensions.width; const scaleY = container.height / dimensions.height; const scale = Math.min(scaleX, scaleY); @@ -31,28 +49,83 @@ export const scaleToFit = ( }; }; -const getElementSize = (element: HTMLImageElement | HTMLVideoElement): { width: number; height: number } => { +const getElementSize = (element: HTMLImageElement | HTMLVideoElement): Size => { if (element instanceof HTMLVideoElement) { return { width: element.clientWidth, height: element.clientHeight }; } return { width: element.width, height: element.height }; }; -export const getNaturalSize = (element: HTMLImageElement | HTMLVideoElement): { width: number; height: number } => { +export const getNaturalSize = (element: HTMLImageElement | HTMLVideoElement): Size => { if (element instanceof HTMLVideoElement) { return { width: element.videoWidth, height: element.videoHeight }; } return { width: element.naturalWidth, height: element.naturalHeight }; }; -export const getContentMetrics = (element: HTMLImageElement | HTMLVideoElement): ContentMetrics => { - const natural = getNaturalSize(element); - const client = getElementSize(element); - const { width: contentWidth, height: contentHeight } = scaleToFit(natural, client); +export function computeContentMetrics( + imageSize: Size, + containerSize: Size, + scaleFn: (dimensions: Size, container: Size) => Size = scaleToFit, +) { + const { width: contentWidth, height: contentHeight } = scaleFn(imageSize, containerSize); return { contentWidth, contentHeight, - offsetX: (client.width - contentWidth) / 2, - offsetY: (client.height - contentHeight) / 2, + offsetX: (containerSize.width - contentWidth) / 2, + offsetY: (containerSize.height - contentHeight) / 2, }; +} + +export const getContentMetrics = (element: HTMLImageElement | HTMLVideoElement): ContentMetrics => { + const natural = getNaturalSize(element); + const client = getElementSize(element); + return computeContentMetrics(natural, client); }; + +export function mapNormalizedToContent(point: Point, metrics: ContentMetrics): Point { + return { + x: point.x * metrics.contentWidth + metrics.offsetX, + y: point.y * metrics.contentHeight + metrics.offsetY, + }; +} + +export function mapContentToNatural(point: Point, metrics: ContentMetrics, naturalSize: Size): Point { + return { + x: ((point.x - metrics.offsetX) / metrics.contentWidth) * naturalSize.width, + y: ((point.y - metrics.offsetY) / metrics.contentHeight) * naturalSize.height, + }; +} + +export interface Rect { + top: number; + left: number; + width: number; + height: number; +} + +export function mapNormalizedRectToContent(topLeft: Point, bottomRight: Point, metrics: ContentMetrics): Rect { + const tl = mapNormalizedToContent(topLeft, metrics); + const br = mapNormalizedToContent(bottomRight, metrics); + return { + top: tl.y, + left: tl.x, + width: br.x - tl.x, + height: br.y - tl.y, + }; +} + +export function mapContentRectToNatural(rect: Rect, metrics: ContentMetrics, naturalSize: Size): Rect { + const topLeft = mapContentToNatural({ x: rect.left, y: rect.top }, metrics, naturalSize); + const bottomRight = mapContentToNatural( + { x: rect.left + rect.width, y: rect.top + rect.height }, + metrics, + naturalSize, + ); + return { + top: topLeft.y, + left: topLeft.x, + width: bottomRight.x - topLeft.x, + height: bottomRight.y - topLeft.y, + }; +} diff --git a/web/src/lib/utils/ocr-utils.spec.ts b/web/src/lib/utils/ocr-utils.spec.ts index c3ce70394d..c88936fddf 100644 --- a/web/src/lib/utils/ocr-utils.spec.ts +++ b/web/src/lib/utils/ocr-utils.spec.ts @@ -1,5 +1,5 @@ import type { OcrBoundingBox } from '$lib/stores/ocr.svelte'; -import type { ContentMetrics } from '$lib/utils/container-utils'; +import type { Size } from '$lib/utils/container-utils'; import { getOcrBoundingBoxes } from '$lib/utils/ocr-utils'; describe('getOcrBoundingBoxes', () => { @@ -21,9 +21,9 @@ describe('getOcrBoundingBoxes', () => { text: 'hello', }, ]; - const metrics: ContentMetrics = { contentWidth: 1000, contentHeight: 500, offsetX: 0, offsetY: 0 }; + const imageSize: Size = { width: 1000, height: 500 }; - const boxes = getOcrBoundingBoxes(ocrData, metrics); + const boxes = getOcrBoundingBoxes(ocrData, imageSize); expect(boxes).toHaveLength(1); expect(boxes[0].id).toBe('box1'); @@ -37,7 +37,7 @@ describe('getOcrBoundingBoxes', () => { ]); }); - it('should apply offsets for letterboxed images', () => { + it('should map full-image box to full display area', () => { const ocrData: OcrBoundingBox[] = [ { id: 'box1', @@ -55,21 +55,20 @@ describe('getOcrBoundingBoxes', () => { text: 'test', }, ]; - const metrics: ContentMetrics = { contentWidth: 600, contentHeight: 400, offsetX: 100, offsetY: 50 }; + const imageSize: Size = { width: 600, height: 400 }; - const boxes = getOcrBoundingBoxes(ocrData, metrics); + const boxes = getOcrBoundingBoxes(ocrData, imageSize); expect(boxes[0].points).toEqual([ - { x: 100, y: 50 }, - { x: 700, y: 50 }, - { x: 700, y: 450 }, - { x: 100, y: 450 }, + { x: 0, y: 0 }, + { x: 600, y: 0 }, + { x: 600, y: 400 }, + { x: 0, y: 400 }, ]); }); it('should return empty array for empty input', () => { - const metrics: ContentMetrics = { contentWidth: 800, contentHeight: 600, offsetX: 0, offsetY: 0 }; - expect(getOcrBoundingBoxes([], metrics)).toEqual([]); + expect(getOcrBoundingBoxes([], { width: 800, height: 600 })).toEqual([]); }); it('should handle multiple boxes', () => { @@ -105,9 +104,9 @@ describe('getOcrBoundingBoxes', () => { text: 'second', }, ]; - const metrics: ContentMetrics = { contentWidth: 200, contentHeight: 200, offsetX: 0, offsetY: 0 }; + const imageSize: Size = { width: 200, height: 200 }; - const boxes = getOcrBoundingBoxes(ocrData, metrics); + const boxes = getOcrBoundingBoxes(ocrData, imageSize); expect(boxes).toHaveLength(2); expect(boxes[0].text).toBe('first'); diff --git a/web/src/lib/utils/ocr-utils.ts b/web/src/lib/utils/ocr-utils.ts index c483eb9551..2806e9c801 100644 --- a/web/src/lib/utils/ocr-utils.ts +++ b/web/src/lib/utils/ocr-utils.ts @@ -1,11 +1,7 @@ import type { OcrBoundingBox } from '$lib/stores/ocr.svelte'; -import type { ContentMetrics } from '$lib/utils/container-utils'; +import { mapNormalizedToContent, type ContentMetrics, type Point, type Size } from '$lib/utils/container-utils'; import { clamp } from 'lodash-es'; - -export type Point = { - x: number; - y: number; -}; +export type { Point } from '$lib/utils/container-utils'; const distance = (p1: Point, p2: Point) => Math.hypot(p2.x - p1.x, p2.y - p1.y); @@ -38,7 +34,7 @@ const getVerticalMode = (width: number, height: number, text: string): VerticalM * @param points - Array of 4 corner points of the bounding box * @returns 4x4 matrix to transform the div with text onto the polygon defined by the corner points, and size to set on the source div. */ -export const calculateBoundingBoxMatrix = (points: Point[]): { matrix: number[]; width: number; height: number } => { +export const calculateBoundingBoxMatrix = (points: Point[]): Size & { matrix: number[] } => { const [topLeft, topRight, bottomRight, bottomLeft] = points; const width = Math.max(distance(topLeft, topRight), distance(bottomLeft, bottomRight)); @@ -163,7 +159,13 @@ export const calculateFittedFontSize = ( return clamp(Math.min(scaleFromWidth, scaleFromHeight), MIN_FONT_SIZE, MAX_FONT_SIZE); }; -export const getOcrBoundingBoxes = (ocrData: OcrBoundingBox[], metrics: ContentMetrics): OcrBox[] => { +export const getOcrBoundingBoxes = (ocrData: OcrBoundingBox[], imageSize: Size): OcrBox[] => { + const metrics: ContentMetrics = { + contentWidth: imageSize.width, + contentHeight: imageSize.height, + offsetX: 0, + offsetY: 0, + }; const boxes: OcrBox[] = []; for (const ocr of ocrData) { const points = [ @@ -171,10 +173,7 @@ export const getOcrBoundingBoxes = (ocrData: OcrBoundingBox[], metrics: ContentM { x: ocr.x2, y: ocr.y2 }, { x: ocr.x3, y: ocr.y3 }, { x: ocr.x4, y: ocr.y4 }, - ].map((point) => ({ - x: point.x * metrics.contentWidth + metrics.offsetX, - y: point.y * metrics.contentHeight + metrics.offsetY, - })); + ].map((point) => mapNormalizedToContent(point, metrics)); const boxWidth = Math.max(distance(points[0], points[1]), distance(points[3], points[2])); const boxHeight = Math.max(distance(points[0], points[3]), distance(points[1], points[2])); @@ -188,7 +187,7 @@ export const getOcrBoundingBoxes = (ocrData: OcrBoundingBox[], metrics: ContentM }); } - const rowThreshold = metrics.contentHeight * 0.02; + const rowThreshold = imageSize.height * 0.02; boxes.sort((a, b) => { const yDifference = a.points[0].y - b.points[0].y; if (Math.abs(yDifference) < rowThreshold) { diff --git a/web/src/lib/utils/people-utils.spec.ts b/web/src/lib/utils/people-utils.spec.ts index 80371bd9c4..f34920bf64 100644 --- a/web/src/lib/utils/people-utils.spec.ts +++ b/web/src/lib/utils/people-utils.spec.ts @@ -1,6 +1,6 @@ import type { Faces } from '$lib/stores/people.store'; -import type { ContentMetrics } from '$lib/utils/container-utils'; -import { getBoundingBox } from '$lib/utils/people-utils'; +import type { Size } from '$lib/utils/container-utils'; +import { getBoundingBox, scaleFaceRectOnResize, type FaceRectState, type ResizeContext } from '$lib/utils/people-utils'; const makeFace = (overrides: Partial = {}): Faces => ({ id: 'face-1', @@ -16,21 +16,21 @@ const makeFace = (overrides: Partial = {}): Faces => ({ describe('getBoundingBox', () => { it('should scale face coordinates to display dimensions', () => { const face = makeFace(); - const metrics: ContentMetrics = { contentWidth: 800, contentHeight: 600, offsetX: 0, offsetY: 0 }; + const imageSize: Size = { width: 800, height: 600 }; - const boxes = getBoundingBox([face], metrics); + const boxes = getBoundingBox([face], imageSize); expect(boxes).toHaveLength(1); expect(boxes[0]).toEqual({ id: 'face-1', - top: Math.round(600 * (750 / 3000)), - left: Math.round(800 * (1000 / 4000)), - width: Math.round(800 * (2000 / 4000) - 800 * (1000 / 4000)), - height: Math.round(600 * (1500 / 3000) - 600 * (750 / 3000)), + top: 600 * (750 / 3000), + left: 800 * (1000 / 4000), + width: 800 * (2000 / 4000) - 800 * (1000 / 4000), + height: 600 * (1500 / 3000) - 600 * (750 / 3000), }); }); - it('should apply offsets for letterboxed display', () => { + it('should map full-image face to full display area', () => { const face = makeFace({ imageWidth: 1000, imageHeight: 1000, @@ -39,49 +39,21 @@ describe('getBoundingBox', () => { boundingBoxX2: 1000, boundingBoxY2: 1000, }); - const metrics: ContentMetrics = { contentWidth: 600, contentHeight: 600, offsetX: 100, offsetY: 0 }; + const imageSize: Size = { width: 600, height: 600 }; - const boxes = getBoundingBox([face], metrics); + const boxes = getBoundingBox([face], imageSize); expect(boxes[0]).toEqual({ id: 'face-1', top: 0, - left: 100, + left: 0, width: 600, height: 600, }); }); - it('should handle zoom by pre-scaled metrics', () => { - const face = makeFace({ - imageWidth: 1000, - imageHeight: 1000, - boundingBoxX1: 0, - boundingBoxY1: 0, - boundingBoxX2: 500, - boundingBoxY2: 500, - }); - const metrics: ContentMetrics = { - contentWidth: 1600, - contentHeight: 1200, - offsetX: -200, - offsetY: -100, - }; - - const boxes = getBoundingBox([face], metrics); - - expect(boxes[0]).toEqual({ - id: 'face-1', - top: -100, - left: -200, - width: 800, - height: 600, - }); - }); - it('should return empty array for empty faces', () => { - const metrics: ContentMetrics = { contentWidth: 800, contentHeight: 600, offsetX: 0, offsetY: 0 }; - expect(getBoundingBox([], metrics)).toEqual([]); + expect(getBoundingBox([], { width: 800, height: 600 })).toEqual([]); }); it('should handle multiple faces', () => { @@ -89,11 +61,103 @@ describe('getBoundingBox', () => { makeFace({ id: 'face-1', boundingBoxX1: 0, boundingBoxY1: 0, boundingBoxX2: 1000, boundingBoxY2: 1000 }), makeFace({ id: 'face-2', boundingBoxX1: 2000, boundingBoxY1: 1500, boundingBoxX2: 3000, boundingBoxY2: 2500 }), ]; - const metrics: ContentMetrics = { contentWidth: 800, contentHeight: 600, offsetX: 0, offsetY: 0 }; - const boxes = getBoundingBox(faces, metrics); + const boxes = getBoundingBox(faces, { width: 800, height: 600 }); expect(boxes).toHaveLength(2); expect(boxes[0].left).toBeLessThan(boxes[1].left); }); }); + +describe('scaleFaceRectOnResize', () => { + const makeRect = (overrides: Partial = {}): FaceRectState => ({ + left: 300, + top: 400, + scaleX: 1, + scaleY: 1, + ...overrides, + }); + + const makePrevious = (overrides: Partial = {}): ResizeContext => ({ + offsetX: 100, + offsetY: 50, + contentWidth: 800, + ...overrides, + }); + + it('should preserve relative position when container doubles in size', () => { + const rect = makeRect({ left: 300, top: 250 }); + const previous = makePrevious({ offsetX: 100, offsetY: 50, contentWidth: 800 }); + + const result = scaleFaceRectOnResize(rect, previous, { offsetX: 200, offsetY: 100, contentWidth: 1600 }); + + // imageRelLeft = (300 - 100) * 2 = 400, new left = 200 + 400 = 600 + // imageRelTop = (250 - 50) * 2 = 400, new top = 100 + 400 = 500 + expect(result.left).toBe(600); + expect(result.top).toBe(500); + expect(result.scaleX).toBe(2); + expect(result.scaleY).toBe(2); + }); + + it('should preserve relative position when container halves in size', () => { + const rect = makeRect({ left: 300, top: 250 }); + const previous = makePrevious({ offsetX: 100, offsetY: 50, contentWidth: 800 }); + + const result = scaleFaceRectOnResize(rect, previous, { offsetX: 50, offsetY: 25, contentWidth: 400 }); + + // imageRelLeft = (300 - 100) * 0.5 = 100, new left = 50 + 100 = 150 + // imageRelTop = (250 - 50) * 0.5 = 100, new top = 25 + 100 = 125 + expect(result.left).toBe(150); + expect(result.top).toBe(125); + expect(result.scaleX).toBe(0.5); + expect(result.scaleY).toBe(0.5); + }); + + it('should handle no change in dimensions', () => { + const rect = makeRect({ left: 300, top: 250, scaleX: 1.5, scaleY: 1.5 }); + const previous = makePrevious({ offsetX: 100, offsetY: 50, contentWidth: 800 }); + + const result = scaleFaceRectOnResize(rect, previous, { offsetX: 100, offsetY: 50, contentWidth: 800 }); + + expect(result.left).toBe(300); + expect(result.top).toBe(250); + expect(result.scaleX).toBe(1.5); + expect(result.scaleY).toBe(1.5); + }); + + it('should handle offset changes without content width change', () => { + const rect = makeRect({ left: 300, top: 250 }); + const previous = makePrevious({ offsetX: 100, offsetY: 50, contentWidth: 800 }); + + const result = scaleFaceRectOnResize(rect, previous, { offsetX: 150, offsetY: 75, contentWidth: 800 }); + + // scale = 1, imageRelLeft = 200, imageRelTop = 200 + // new left = 150 + 200 = 350, new top = 75 + 200 = 275 + expect(result.left).toBe(350); + expect(result.top).toBe(275); + expect(result.scaleX).toBe(1); + expect(result.scaleY).toBe(1); + }); + + it('should compound existing scale factors', () => { + const rect = makeRect({ left: 300, top: 250, scaleX: 2, scaleY: 3 }); + const previous = makePrevious({ contentWidth: 800 }); + + const result = scaleFaceRectOnResize(rect, previous, { ...previous, contentWidth: 1600 }); + + expect(result.scaleX).toBe(4); + expect(result.scaleY).toBe(6); + }); + + it('should handle rect at image origin (top-left of content area)', () => { + const rect = makeRect({ left: 100, top: 50 }); + const previous = makePrevious({ offsetX: 100, offsetY: 50, contentWidth: 800 }); + + const result = scaleFaceRectOnResize(rect, previous, { offsetX: 200, offsetY: 100, contentWidth: 1600 }); + + // imageRelLeft = (100 - 100) * 2 = 0, new left = 200 + // imageRelTop = (50 - 50) * 2 = 0, new top = 100 + expect(result.left).toBe(200); + expect(result.top).toBe(100); + }); +}); diff --git a/web/src/lib/utils/people-utils.ts b/web/src/lib/utils/people-utils.ts index b8fb8973e6..64f407bae2 100644 --- a/web/src/lib/utils/people-utils.ts +++ b/web/src/lib/utils/people-utils.ts @@ -1,42 +1,58 @@ import type { Faces } from '$lib/stores/people.store'; import { getAssetMediaUrl } from '$lib/utils'; -import type { ContentMetrics } from '$lib/utils/container-utils'; +import { mapNormalizedRectToContent, type ContentMetrics, type Rect, type Size } from '$lib/utils/container-utils'; import { AssetTypeEnum, type AssetFaceResponseDto } from '@immich/sdk'; -export interface BoundingBox { - id: string; - top: number; - left: number; - width: number; - height: number; -} +export type BoundingBox = Rect & { id: string }; -export const getBoundingBox = (faces: Faces[], metrics: ContentMetrics): BoundingBox[] => { +export const getBoundingBox = (faces: Faces[], imageSize: Size): BoundingBox[] => { + const metrics: ContentMetrics = { + contentWidth: imageSize.width, + contentHeight: imageSize.height, + offsetX: 0, + offsetY: 0, + }; const boxes: BoundingBox[] = []; for (const face of faces) { - const scaleX = metrics.contentWidth / face.imageWidth; - const scaleY = metrics.contentHeight / face.imageHeight; + const rect = mapNormalizedRectToContent( + { x: face.boundingBoxX1 / face.imageWidth, y: face.boundingBoxY1 / face.imageHeight }, + { x: face.boundingBoxX2 / face.imageWidth, y: face.boundingBoxY2 / face.imageHeight }, + metrics, + ); - const coordinates = { - x1: scaleX * face.boundingBoxX1 + metrics.offsetX, - x2: scaleX * face.boundingBoxX2 + metrics.offsetX, - y1: scaleY * face.boundingBoxY1 + metrics.offsetY, - y2: scaleY * face.boundingBoxY2 + metrics.offsetY, - }; - - boxes.push({ - id: face.id, - top: Math.round(coordinates.y1), - left: Math.round(coordinates.x1), - width: Math.round(coordinates.x2 - coordinates.x1), - height: Math.round(coordinates.y2 - coordinates.y1), - }); + boxes.push({ id: face.id, ...rect }); } return boxes; }; +export type FaceRectState = { + left: number; + top: number; + scaleX: number; + scaleY: number; +}; + +export type ResizeContext = Pick; + +export const scaleFaceRectOnResize = ( + faceRect: FaceRectState, + previous: ResizeContext, + current: ResizeContext, +): FaceRectState => { + const scale = current.contentWidth / previous.contentWidth; + const imageRelativeLeft = (faceRect.left - previous.offsetX) * scale; + const imageRelativeTop = (faceRect.top - previous.offsetY) * scale; + + return { + left: current.offsetX + imageRelativeLeft, + top: current.offsetY + imageRelativeTop, + scaleX: faceRect.scaleX * scale, + scaleY: faceRect.scaleY * scale, + }; +}; + export const zoomImageToBase64 = async ( face: AssetFaceResponseDto, assetId: string,