diff --git a/e2e/src/ui/mock-network/ocr-network.ts b/e2e/src/ui/mock-network/ocr-network.ts new file mode 100644 index 0000000000..3b1a2fe62e --- /dev/null +++ b/e2e/src/ui/mock-network/ocr-network.ts @@ -0,0 +1,55 @@ +import { faker } from '@faker-js/faker'; +import type { AssetOcrResponseDto } from '@immich/sdk'; +import { BrowserContext } from '@playwright/test'; + +export type MockOcrBox = { + text: string; + x1: number; + y1: number; + x2: number; + y2: number; + x3: number; + y3: number; + x4: number; + y4: number; +}; + +export const createMockOcrData = (assetId: string, boxes: MockOcrBox[]): AssetOcrResponseDto[] => { + return boxes.map((box) => ({ + id: faker.string.uuid(), + assetId, + x1: box.x1, + y1: box.y1, + x2: box.x2, + y2: box.y2, + x3: box.x3, + y3: box.y3, + x4: box.x4, + y4: box.y4, + boxScore: 0.95, + textScore: 0.9, + text: box.text, + })); +}; + +export const setupOcrMockApiRoutes = async ( + context: BrowserContext, + ocrDataByAssetId: Map, +) => { + await context.route('**/assets/*/ocr', async (route, request) => { + if (request.method() !== 'GET') { + return route.fallback(); + } + const url = new URL(request.url()); + const segments = url.pathname.split('/'); + const assetIdIndex = segments.indexOf('assets') + 1; + const assetId = segments[assetIdIndex]; + + const ocrData = ocrDataByAssetId.get(assetId) ?? []; + return route.fulfill({ + status: 200, + contentType: 'application/json', + json: ocrData, + }); + }); +}; diff --git a/e2e/src/ui/specs/asset-viewer/ocr.e2e-spec.ts b/e2e/src/ui/specs/asset-viewer/ocr.e2e-spec.ts new file mode 100644 index 0000000000..5a442a6081 --- /dev/null +++ b/e2e/src/ui/specs/asset-viewer/ocr.e2e-spec.ts @@ -0,0 +1,300 @@ +import type { AssetOcrResponseDto, AssetResponseDto } from '@immich/sdk'; +import { expect, test } from '@playwright/test'; +import { toAssetResponseDto } from 'src/ui/generators/timeline'; +import { + createMockStack, + createMockStackAsset, + MockStack, + setupBrokenAssetMockApiRoutes, +} from 'src/ui/mock-network/broken-asset-network'; +import { createMockOcrData, setupOcrMockApiRoutes } from 'src/ui/mock-network/ocr-network'; +import { assetViewerUtils } from '../timeline/utils'; +import { setupAssetViewerFixture } from './utils'; + +test.describe.configure({ mode: 'parallel' }); + +const PRIMARY_OCR_BOXES = [ + { text: 'Hello World', x1: 0.1, y1: 0.1, x2: 0.4, y2: 0.1, x3: 0.4, y3: 0.15, x4: 0.1, y4: 0.15 }, + { text: 'Immich Photo', x1: 0.2, y1: 0.3, x2: 0.6, y2: 0.3, x3: 0.6, y3: 0.36, x4: 0.2, y4: 0.36 }, +]; + +const SECONDARY_OCR_BOXES = [ + { text: 'Second Asset Text', x1: 0.15, y1: 0.2, x2: 0.55, y2: 0.2, x3: 0.55, y3: 0.26, x4: 0.15, y4: 0.26 }, +]; + +test.describe('OCR bounding boxes', () => { + const fixture = setupAssetViewerFixture(920); + + test.beforeEach(async ({ context }) => { + const primaryAssetDto = toAssetResponseDto(fixture.primaryAsset); + const ocrDataByAssetId = new Map([ + [primaryAssetDto.id, createMockOcrData(primaryAssetDto.id, PRIMARY_OCR_BOXES)], + ]); + + await setupOcrMockApiRoutes(context, ocrDataByAssetId); + }); + + test('OCR bounding boxes appear when clicking OCR button', async ({ page }) => { + await page.goto(`/photos/${fixture.primaryAsset.id}`); + await assetViewerUtils.waitForViewerLoad(page, fixture.primaryAsset); + + const ocrButton = page.getByLabel('Text recognition'); + await expect(ocrButton).toBeVisible(); + await ocrButton.click(); + + const ocrBoxes = page.locator('[data-viewer-content] [data-testid="ocr-box"]'); + await expect(ocrBoxes).toHaveCount(2); + + await expect(ocrBoxes.nth(0)).toContainText('Hello World'); + await expect(ocrBoxes.nth(1)).toContainText('Immich Photo'); + }); + + test('OCR bounding boxes toggle off on second click', async ({ page }) => { + await page.goto(`/photos/${fixture.primaryAsset.id}`); + await assetViewerUtils.waitForViewerLoad(page, fixture.primaryAsset); + + const ocrButton = page.getByLabel('Text recognition'); + await ocrButton.click(); + await expect(page.locator('[data-viewer-content] [data-testid="ocr-box"]').first()).toBeVisible(); + + await ocrButton.click(); + await expect(page.locator('[data-viewer-content] [data-testid="ocr-box"]')).toHaveCount(0); + }); +}); + +test.describe('OCR with stacked assets', () => { + const fixture = setupAssetViewerFixture(921); + let mockStack: MockStack; + let primaryAssetDto: AssetResponseDto; + let secondAssetDto: AssetResponseDto; + + test.beforeAll(async () => { + primaryAssetDto = toAssetResponseDto(fixture.primaryAsset); + secondAssetDto = createMockStackAsset(fixture.adminUserId); + secondAssetDto.originalFileName = 'second-ocr-asset.jpg'; + mockStack = createMockStack(primaryAssetDto, [secondAssetDto], new Set()); + }); + + test.beforeEach(async ({ context }) => { + await setupBrokenAssetMockApiRoutes(context, mockStack); + + const ocrDataByAssetId = new Map([ + [primaryAssetDto.id, createMockOcrData(primaryAssetDto.id, PRIMARY_OCR_BOXES)], + [secondAssetDto.id, createMockOcrData(secondAssetDto.id, SECONDARY_OCR_BOXES)], + ]); + + await setupOcrMockApiRoutes(context, ocrDataByAssetId); + }); + + test('different OCR boxes shown for different stacked assets', async ({ page }) => { + await page.goto(`/photos/${fixture.primaryAsset.id}`); + await assetViewerUtils.waitForViewerLoad(page, fixture.primaryAsset); + + const ocrButton = page.getByLabel('Text recognition'); + await expect(ocrButton).toBeVisible(); + await ocrButton.click(); + + const ocrBoxes = page.locator('[data-viewer-content] [data-testid="ocr-box"]'); + await expect(ocrBoxes).toHaveCount(2); + await expect(ocrBoxes.nth(0)).toContainText('Hello World'); + + const stackThumbnails = page.locator('#stack-slideshow [data-asset]'); + await expect(stackThumbnails).toHaveCount(2); + await stackThumbnails.nth(1).click(); + + // refreshOcr() clears showOverlay when switching assets, so re-enable it + await expect(ocrBoxes).toHaveCount(0); + await expect(ocrButton).toBeVisible(); + await ocrButton.click(); + + await expect(ocrBoxes).toHaveCount(1); + await expect(ocrBoxes.first()).toContainText('Second Asset Text'); + }); +}); + +test.describe('OCR boxes and zoom', () => { + const fixture = setupAssetViewerFixture(922); + + test.beforeEach(async ({ context }) => { + const primaryAssetDto = toAssetResponseDto(fixture.primaryAsset); + const ocrDataByAssetId = new Map([ + [primaryAssetDto.id, createMockOcrData(primaryAssetDto.id, PRIMARY_OCR_BOXES)], + ]); + + await setupOcrMockApiRoutes(context, ocrDataByAssetId); + }); + + test('OCR boxes scale with zoom', async ({ page }) => { + await page.goto(`/photos/${fixture.primaryAsset.id}`); + await assetViewerUtils.waitForViewerLoad(page, fixture.primaryAsset); + + const ocrButton = page.getByLabel('Text recognition'); + await expect(ocrButton).toBeVisible(); + await ocrButton.click(); + + const ocrBox = page.locator('[data-viewer-content] [data-testid="ocr-box"]').first(); + await expect(ocrBox).toBeVisible(); + + const initialBox = await ocrBox.boundingBox(); + expect(initialBox).toBeTruthy(); + + const { width, height } = page.viewportSize()!; + await page.mouse.move(width / 2, height / 2); + await page.mouse.wheel(0, -3); + + await expect(async () => { + const zoomedBox = await ocrBox.boundingBox(); + expect(zoomedBox).toBeTruthy(); + expect(zoomedBox!.width).toBeGreaterThan(initialBox!.width); + expect(zoomedBox!.height).toBeGreaterThan(initialBox!.height); + }).toPass({ timeout: 2000 }); + }); +}); + +test.describe('OCR text interaction', () => { + const fixture = setupAssetViewerFixture(923); + + test.beforeEach(async ({ context }) => { + const primaryAssetDto = toAssetResponseDto(fixture.primaryAsset); + const ocrDataByAssetId = new Map([ + [primaryAssetDto.id, createMockOcrData(primaryAssetDto.id, PRIMARY_OCR_BOXES)], + ]); + + await setupOcrMockApiRoutes(context, ocrDataByAssetId); + }); + + test('OCR text box has data-overlay-interactive attribute', async ({ page }) => { + await page.goto(`/photos/${fixture.primaryAsset.id}`); + await assetViewerUtils.waitForViewerLoad(page, fixture.primaryAsset); + + await page.getByLabel('Text recognition').click(); + + const ocrBox = page.locator('[data-viewer-content] [data-testid="ocr-box"]').first(); + await expect(ocrBox).toBeVisible(); + await expect(ocrBox).toHaveAttribute('data-overlay-interactive'); + }); + + test('OCR text box receives focus on click', async ({ page }) => { + await page.goto(`/photos/${fixture.primaryAsset.id}`); + await assetViewerUtils.waitForViewerLoad(page, fixture.primaryAsset); + + await page.getByLabel('Text recognition').click(); + + const ocrBox = page.locator('[data-viewer-content] [data-testid="ocr-box"]').first(); + await expect(ocrBox).toBeVisible(); + + await ocrBox.click(); + await expect(ocrBox).toBeFocused(); + }); + + test('dragging on OCR text box does not trigger image pan', async ({ page }) => { + await page.goto(`/photos/${fixture.primaryAsset.id}`); + await assetViewerUtils.waitForViewerLoad(page, fixture.primaryAsset); + + await page.getByLabel('Text recognition').click(); + + const ocrBox = page.locator('[data-viewer-content] [data-testid="ocr-box"]').first(); + await expect(ocrBox).toBeVisible(); + + const imgLocator = page.locator('[data-viewer-content] img[draggable="false"]'); + const initialTransform = await imgLocator.evaluate((element) => { + return getComputedStyle(element.closest('[style*="transform"]') ?? element).transform; + }); + + const box = await ocrBox.boundingBox(); + expect(box).toBeTruthy(); + const centerX = box!.x + box!.width / 2; + const centerY = box!.y + box!.height / 2; + + await page.mouse.move(centerX, centerY); + await page.mouse.down(); + await page.mouse.move(centerX + 50, centerY + 30, { steps: 5 }); + await page.mouse.up(); + + const afterTransform = await imgLocator.evaluate((element) => { + return getComputedStyle(element.closest('[style*="transform"]') ?? element).transform; + }); + expect(afterTransform).toBe(initialTransform); + }); + + test('split touch gesture across zoom container does not trigger zoom', async ({ page }) => { + await page.goto(`/photos/${fixture.primaryAsset.id}`); + await assetViewerUtils.waitForViewerLoad(page, fixture.primaryAsset); + + await page.getByLabel('Text recognition').click(); + const ocrBox = page.locator('[data-viewer-content] [data-testid="ocr-box"]').first(); + await expect(ocrBox).toBeVisible(); + + const imgLocator = page.locator('[data-viewer-content] img[draggable="false"]'); + const initialTransform = await imgLocator.evaluate((element) => { + return getComputedStyle(element.closest('[style*="transform"]') ?? element).transform; + }); + + const viewerContent = page.locator('[data-viewer-content]'); + const viewerBox = await viewerContent.boundingBox(); + expect(viewerBox).toBeTruthy(); + + // Dispatch a synthetic split gesture: one touch inside the viewer, one outside + await page.evaluate( + ({ viewerCenterX, viewerCenterY, outsideY }) => { + const viewer = document.querySelector('[data-viewer-content]'); + if (!viewer) { + return; + } + + const createTouch = (id: number, x: number, y: number) => { + return new Touch({ + identifier: id, + target: viewer, + clientX: x, + clientY: y, + }); + }; + + const insideTouch = createTouch(0, viewerCenterX, viewerCenterY); + const outsideTouch = createTouch(1, viewerCenterX, outsideY); + + const touchStartEvent = new TouchEvent('touchstart', { + touches: [insideTouch, outsideTouch], + targetTouches: [insideTouch], + changedTouches: [insideTouch, outsideTouch], + bubbles: true, + cancelable: true, + }); + + const touchMoveEvent = new TouchEvent('touchmove', { + touches: [createTouch(0, viewerCenterX, viewerCenterY - 30), createTouch(1, viewerCenterX, outsideY + 30)], + targetTouches: [createTouch(0, viewerCenterX, viewerCenterY - 30)], + changedTouches: [ + createTouch(0, viewerCenterX, viewerCenterY - 30), + createTouch(1, viewerCenterX, outsideY + 30), + ], + bubbles: true, + cancelable: true, + }); + + const touchEndEvent = new TouchEvent('touchend', { + touches: [], + targetTouches: [], + changedTouches: [insideTouch, outsideTouch], + bubbles: true, + cancelable: true, + }); + + viewer.dispatchEvent(touchStartEvent); + viewer.dispatchEvent(touchMoveEvent); + viewer.dispatchEvent(touchEndEvent); + }, + { + viewerCenterX: viewerBox!.x + viewerBox!.width / 2, + viewerCenterY: viewerBox!.y + viewerBox!.height / 2, + outsideY: 10, // near the top of the page, outside the viewer + }, + ); + + const afterTransform = await imgLocator.evaluate((element) => { + return getComputedStyle(element.closest('[style*="transform"]') ?? element).transform; + }); + expect(afterTransform).toBe(initialTransform); + }); +}); diff --git a/web/src/lib/actions/zoom-image.ts b/web/src/lib/actions/zoom-image.ts index 35c3d3a106..07c44569cd 100644 --- a/web/src/lib/actions/zoom-image.ts +++ b/web/src/lib/actions/zoom-image.ts @@ -1,11 +1,18 @@ import { assetViewerManager } from '$lib/managers/asset-viewer-manager.svelte'; import { createZoomImageWheel } from '@zoom-image/core'; -export const zoomImageAction = (node: HTMLElement, options?: { disabled?: boolean }) => { +// Minimal touch shape — avoids importing DOM TouchEvent which isn't available in all TS targets. +type TouchEventLike = { + touches: Iterable<{ clientX: number; clientY: number }> & { length: number }; + targetTouches: ArrayLike; +}; +const asTouchEvent = (event: Event) => event as unknown as TouchEventLike; + +export const zoomImageAction = (node: HTMLElement, options?: { zoomTarget?: HTMLElement }) => { const zoomInstance = createZoomImageWheel(node, { maxZoom: 10, initialState: assetViewerManager.zoomState, - zoomTarget: null, + zoomTarget: options?.zoomTarget, }); const unsubscribes = [ @@ -13,47 +20,130 @@ export const zoomImageAction = (node: HTMLElement, options?: { disabled?: boolea zoomInstance.subscribe(({ state }) => assetViewerManager.onZoomChange(state)), ]; - const onInteractionStart = (event: Event) => { - if (options?.disabled) { - event.stopImmediatePropagation(); + const controller = new AbortController(); + const { signal } = controller; + + node.addEventListener('pointerdown', () => assetViewerManager.cancelZoomAnimation(), { capture: true, signal }); + + // Intercept events in capture phase to prevent zoom-image from seeing interactions on + // overlay elements (e.g. OCR text boxes), preserving browser defaults like text selection. + const isOverlayEvent = (event: Event) => !!(event.target as HTMLElement).closest('[data-overlay-interactive]'); + const isOverlayAtPoint = (x: number, y: number) => + !!document.elementFromPoint(x, y)?.closest('[data-overlay-interactive]'); + + // Pointer event interception: track pointers that start on overlays and intercept the entire gesture. + const overlayPointers = new Set(); + const interceptedPointers = new Set(); + const interceptOverlayPointerDown = (event: PointerEvent) => { + if (isOverlayEvent(event) || isOverlayAtPoint(event.clientX, event.clientY)) { + overlayPointers.add(event.pointerId); + interceptedPointers.add(event.pointerId); + event.stopPropagation(); + } else if (overlayPointers.size > 0) { + // Split gesture (e.g. pinch with one finger on overlay) — intercept entirely. + interceptedPointers.add(event.pointerId); + event.stopPropagation(); } - assetViewerManager.cancelZoomAnimation(); }; + const interceptOverlayPointerEvent = (event: PointerEvent) => { + if (interceptedPointers.has(event.pointerId)) { + event.stopPropagation(); + } + }; + const interceptOverlayPointerEnd = (event: PointerEvent) => { + overlayPointers.delete(event.pointerId); + if (interceptedPointers.delete(event.pointerId)) { + event.stopPropagation(); + } + }; + node.addEventListener('pointerdown', interceptOverlayPointerDown, { capture: true, signal }); + node.addEventListener('pointermove', interceptOverlayPointerEvent, { capture: true, signal }); + node.addEventListener('pointerup', interceptOverlayPointerEnd, { capture: true, signal }); + node.addEventListener('pointerleave', interceptOverlayPointerEnd, { capture: true, signal }); - node.addEventListener('wheel', onInteractionStart, { capture: true }); - node.addEventListener('pointerdown', onInteractionStart, { capture: true }); + // Touch event interception for overlay touches or split gestures (pinch across container boundary). + // Once intercepted, stays intercepted until all fingers are lifted. + let touchGestureIntercepted = false; + const interceptOverlayTouchEvent = (event: Event) => { + if (touchGestureIntercepted) { + event.stopPropagation(); + return; + } + const { touches, targetTouches } = asTouchEvent(event); + if (touches && targetTouches) { + if (touches.length > targetTouches.length) { + touchGestureIntercepted = true; + event.stopPropagation(); + return; + } + for (const touch of touches) { + if (isOverlayAtPoint(touch.clientX, touch.clientY)) { + touchGestureIntercepted = true; + event.stopPropagation(); + return; + } + } + } else if (isOverlayEvent(event)) { + event.stopPropagation(); + } + }; + const resetTouchGesture = (event: Event) => { + const { touches } = asTouchEvent(event); + if (touches.length === 0) { + touchGestureIntercepted = false; + } + }; + node.addEventListener('touchstart', interceptOverlayTouchEvent, { capture: true, signal }); + node.addEventListener('touchmove', interceptOverlayTouchEvent, { capture: true, signal }); + node.addEventListener('touchend', resetTouchGesture, { capture: true, signal }); - // Suppress Safari's synthetic dblclick on double-tap. Without this, zoom-image's touchstart - // handler zooms to maxZoom (10x), then Safari's synthetic dblclick triggers photo-viewer's - // handler which conflicts. Chrome does not fire synthetic dblclick on touch. + // Wheel and dblclick interception on overlay elements. + // Dblclick also intercepted for all touch double-taps (Safari fires synthetic dblclick + // on double-tap, which conflicts with zoom-image's touch zoom handler). let lastPointerWasTouch = false; - const trackPointerType = (event: PointerEvent) => { - lastPointerWasTouch = event.pointerType === 'touch'; - }; - const suppressTouchDblClick = (event: MouseEvent) => { - if (lastPointerWasTouch) { - event.stopImmediatePropagation(); - } - }; - node.addEventListener('pointerdown', trackPointerType, { capture: true }); - node.addEventListener('dblclick', suppressTouchDblClick, { capture: true }); + node.addEventListener('pointerdown', (event) => (lastPointerWasTouch = event.pointerType === 'touch'), { + capture: true, + signal, + }); + node.addEventListener( + 'wheel', + (event) => { + if (isOverlayEvent(event)) { + event.stopPropagation(); + } + }, + { capture: true, signal }, + ); + node.addEventListener( + 'dblclick', + (event) => { + if (lastPointerWasTouch || isOverlayEvent(event)) { + event.stopImmediatePropagation(); + } + }, + { capture: true, signal }, + ); - // Allow zoomed content to render outside the container bounds + if (options?.zoomTarget) { + options.zoomTarget.style.willChange = 'transform'; + } node.style.overflow = 'visible'; - // Prevent browser handling of touch gestures so zoom-image can manage them node.style.touchAction = 'none'; return { - update(newOptions?: { disabled?: boolean }) { + update(newOptions?: { zoomTarget?: HTMLElement }) { options = newOptions; + if (newOptions?.zoomTarget !== undefined) { + zoomInstance.setState({ zoomTarget: newOptions.zoomTarget }); + } }, destroy() { + controller.abort(); + if (options?.zoomTarget) { + options.zoomTarget.style.willChange = ''; + } for (const unsubscribe of unsubscribes) { unsubscribe(); } - node.removeEventListener('wheel', onInteractionStart, { capture: true }); - node.removeEventListener('pointerdown', onInteractionStart, { capture: true }); - node.removeEventListener('pointerdown', trackPointerType, { capture: true }); - node.removeEventListener('dblclick', suppressTouchDblClick, { capture: true }); zoomInstance.cleanup(); }, }; diff --git a/web/src/lib/components/AdaptiveImage.svelte b/web/src/lib/components/AdaptiveImage.svelte index fad4d49d1b..90c9328cf8 100644 --- a/web/src/lib/components/AdaptiveImage.svelte +++ b/web/src/lib/components/AdaptiveImage.svelte @@ -7,7 +7,7 @@ import { assetViewerManager } from '$lib/managers/asset-viewer-manager.svelte'; import { getAssetUrls } from '$lib/utils'; import { AdaptiveImageLoader, type QualityList } from '$lib/utils/adaptive-image-loader.svelte'; - import { scaleToCover, scaleToFit } from '$lib/utils/container-utils'; + import { scaleToCover, scaleToFit, type Size } from '$lib/utils/container-utils'; import { getAltText } from '$lib/utils/thumbnail-util'; import { toTimelineAsset } from '$lib/utils/timeline-util'; import type { AssetResponseDto, SharedLinkResponseDto } from '@immich/sdk'; @@ -17,10 +17,7 @@ asset: AssetResponseDto; sharedLink?: SharedLinkResponseDto; objectFit?: 'contain' | 'cover'; - container: { - width: number; - height: number; - }; + container: Size; onUrlChange?: (url: string) => void; onImageReady?: () => void; onError?: () => void; @@ -149,81 +146,66 @@ (quality.preview === 'success' ? previewElement : undefined) ?? (quality.thumbnail === 'success' ? thumbnailElement : undefined); }); - - const zoomTransform = $derived.by(() => { - const { currentZoom, currentPositionX, currentPositionY } = assetViewerManager.zoomState; - if (currentZoom === 1 && currentPositionX === 0 && currentPositionY === 0) { - return undefined; - } - return `translate(${currentPositionX}px, ${currentPositionY}px) scale(${currentZoom})`; - });
{@render backdrop?.()} - -
-
- {#if show.alphaBackground} - - {/if} +
+ {#if show.alphaBackground} + + {/if} - {#if show.thumbhash} - {#if asset.thumbhash} - - - {:else if show.spinner} - - {/if} + {#if show.thumbhash} + {#if asset.thumbhash} + + + {:else if show.spinner} + {/if} + {/if} - {#if show.thumbnail} - - {/if} + {#if show.thumbnail} + + {/if} - {#if show.brokenAsset} - - {/if} + {#if show.brokenAsset} + + {/if} - {#if show.preview} - - {/if} + {#if show.preview} + + {/if} - {#if show.original} - - {/if} -
+ {#if show.original} + + {/if}
diff --git a/web/src/lib/components/asset-viewer/face-editor/face-editor.svelte b/web/src/lib/components/asset-viewer/face-editor/face-editor.svelte index f2b9c2e157..ddd8f393c4 100644 --- a/web/src/lib/components/asset-viewer/face-editor/face-editor.svelte +++ b/web/src/lib/components/asset-viewer/face-editor/face-editor.svelte @@ -13,12 +13,12 @@ import { onMount } from 'svelte'; import { t } from 'svelte-i18n'; - interface Props { + type Props = { htmlElement: HTMLImageElement | HTMLVideoElement; containerWidth: number; containerHeight: number; assetId: string; - } + }; let { htmlElement, containerWidth, containerHeight, assetId }: Props = $props(); @@ -295,6 +295,7 @@
+ import { mediaQueryManager } from '$lib/stores/media-query-manager.svelte'; import type { OcrBox } from '$lib/utils/ocr-utils'; import { calculateBoundingBoxMatrix, calculateFittedFontSize } from '$lib/utils/ocr-utils'; @@ -8,6 +9,7 @@ let { ocrBox }: Props = $props(); + const isTouch = $derived(mediaQueryManager.pointerCoarse); const dimensions = $derived(calculateBoundingBoxMatrix(ocrBox.points)); const transform = $derived(`matrix3d(${dimensions.matrix.join(',')})`); @@ -15,13 +17,23 @@ calculateFittedFontSize(ocrBox.text, dimensions.width, dimensions.height, ocrBox.verticalMode) + 'px', ); + const handleSelectStart = (event: Event) => { + const target = event.currentTarget as HTMLElement; + requestAnimationFrame(() => { + const selection = globalThis.getSelection(); + if (selection) { + selection.selectAllChildren(target); + } + }); + }; + const verticalStyle = $derived.by(() => { switch (ocrBox.verticalMode) { case 'cjk': { - return ' writing-mode: vertical-rl;'; + return 'writing-mode: vertical-rl;'; } case 'rotated': { - return ' writing-mode: vertical-rl; text-orientation: sideways;'; + return 'writing-mode: vertical-rl; text-orientation: sideways;'; } default: { return ''; @@ -30,17 +42,23 @@ }); -
-
- {ocrBox.text} -
+
+ {ocrBox.text}
diff --git a/web/src/lib/components/asset-viewer/photo-sphere-viewer-adapter.svelte b/web/src/lib/components/asset-viewer/photo-sphere-viewer-adapter.svelte index 926383d9c2..d46b5e0dc1 100644 --- a/web/src/lib/components/asset-viewer/photo-sphere-viewer-adapter.svelte +++ b/web/src/lib/components/asset-viewer/photo-sphere-viewer-adapter.svelte @@ -128,10 +128,8 @@ } const boxes = getOcrBoundingBoxes(ocrData, { - contentWidth: viewer.state.textureData.panoData.croppedWidth, - contentHeight: viewer.state.textureData.panoData.croppedHeight, - offsetX: 0, - offsetY: 0, + width: viewer.state.textureData.panoData.croppedWidth, + height: viewer.state.textureData.panoData.croppedHeight, }); for (const [index, box] of boxes.entries()) { diff --git a/web/src/lib/components/asset-viewer/photo-viewer.svelte b/web/src/lib/components/asset-viewer/photo-viewer.svelte index 4a6a02cb4a..581e715367 100644 --- a/web/src/lib/components/asset-viewer/photo-viewer.svelte +++ b/web/src/lib/components/asset-viewer/photo-viewer.svelte @@ -14,7 +14,7 @@ import { SlideshowLook, SlideshowState, slideshowStore } from '$lib/stores/slideshow.store'; import { handlePromiseError } from '$lib/utils'; import { canCopyImageToClipboard, copyImageToClipboard } from '$lib/utils/asset-utils'; - import { getNaturalSize, scaleToFit, type ContentMetrics } from '$lib/utils/container-utils'; + import { getNaturalSize, scaleToFit, type Size } from '$lib/utils/container-utils'; import { handleError } from '$lib/utils/handle-error'; import { getOcrBoundingBoxes } from '$lib/utils/ocr-utils'; import { getBoundingBox } from '$lib/utils/people-utils'; @@ -25,14 +25,14 @@ import { t } from 'svelte-i18n'; import type { AssetCursor } from './asset-viewer.svelte'; - interface Props { + type Props = { cursor: AssetCursor; element?: HTMLDivElement; sharedLink?: SharedLinkResponseDto; onReady?: () => void; onError?: () => void; onSwipe?: (event: SwipeCustomEvent) => void; - } + }; let { cursor, element = $bindable(), sharedLink, onReady, onError, onSwipe }: Props = $props(); @@ -67,23 +67,15 @@ height: containerHeight, }); - const overlayMetrics = $derived.by((): ContentMetrics => { + const overlaySize = $derived.by((): Size => { if (!assetViewerManager.imgRef || !visibleImageReady) { - return { contentWidth: 0, contentHeight: 0, offsetX: 0, offsetY: 0 }; + return { width: 0, height: 0 }; } - const natural = getNaturalSize(assetViewerManager.imgRef); - const scaled = scaleToFit(natural, { width: containerWidth, height: containerHeight }); - - return { - contentWidth: scaled.width, - contentHeight: scaled.height, - offsetX: 0, - offsetY: 0, - }; + return scaleToFit(getNaturalSize(assetViewerManager.imgRef), { width: containerWidth, height: containerHeight }); }); - const ocrBoxes = $derived(ocrManager.showOverlay ? getOcrBoundingBoxes(ocrManager.data, overlayMetrics) : []); + const ocrBoxes = $derived(ocrManager.showOverlay ? getOcrBoundingBoxes(ocrManager.data, overlaySize) : []); const onCopy = async () => { if (!canCopyImageToClipboard() || !assetViewerManager.imgRef) { @@ -151,6 +143,8 @@ $slideshowState !== SlideshowState.None && $slideshowLook === SlideshowLook.BlurredBackground && !!asset.thumbhash, ); + let adaptiveImage = $state(); + const faceToNameMap = $derived.by(() => { // eslint-disable-next-line svelte/prefer-svelte-reactivity const map = new Map(); @@ -181,7 +175,7 @@ const mouseX = (event.clientX - containerRect.left - contentOffsetX * currentZoom - currentPositionX) / currentZoom; const mouseY = (event.clientY - containerRect.top - contentOffsetY * currentZoom - currentPositionY) / currentZoom; - const faceBoxes = getBoundingBox(faces, overlayMetrics); + const faceBoxes = getBoundingBox(faces, overlaySize); for (const [index, box] of faceBoxes.entries()) { if (mouseX >= box.left && mouseX <= box.left + box.width && mouseY >= box.top && mouseY <= box.top + box.height) { @@ -215,7 +209,7 @@ ondblclick={onZoom} onmousemove={handleImageMouseMove} onmouseleave={handleImageMouseLeave} - use:zoomImageAction={{ disabled: isFaceEditMode.value || ocrManager.showOverlay }} + use:zoomImageAction={{ zoomTarget: adaptiveImage }} {...useSwipe((event) => onSwipe?.(event))} > {#snippet backdrop()} {#if blurredSlideshow} @@ -243,7 +238,7 @@ {/if} {/snippet} {#snippet overlays()} - {#each getBoundingBox($boundingBoxesArray, overlayMetrics) as boundingbox, index (boundingbox.id)} + {#each getBoundingBox($boundingBoxesArray, overlaySize) as boundingbox, index (boundingbox.id)}
{ expect(getNaturalSize(video)).toEqual({ width: 1920, height: 1080 }); }); }); + +describe('scaleToCover', () => { + it('should scale up to cover container when image is smaller', () => { + expect(scaleToCover({ width: 400, height: 300 }, { width: 800, height: 600 })).toEqual({ + width: 800, + height: 600, + }); + }); + + it('should use height scale when image is wider than container', () => { + expect(scaleToCover({ width: 2000, height: 1000 }, { width: 800, height: 600 })).toEqual({ + width: 1200, + height: 600, + }); + }); + + it('should use width scale when image is taller than container', () => { + expect(scaleToCover({ width: 1000, height: 2000 }, { width: 800, height: 600 })).toEqual({ + width: 800, + height: 1600, + }); + }); +}); + +describe('mapNormalizedToContent', () => { + const metrics = { contentWidth: 800, contentHeight: 400, offsetX: 0, offsetY: 100 }; + + it('should map top-left corner', () => { + expect(mapNormalizedToContent({ x: 0, y: 0 }, metrics)).toEqual({ x: 0, y: 100 }); + }); + + it('should map bottom-right corner', () => { + expect(mapNormalizedToContent({ x: 1, y: 1 }, metrics)).toEqual({ x: 800, y: 500 }); + }); + + it('should map center point', () => { + expect(mapNormalizedToContent({ x: 0.5, y: 0.5 }, metrics)).toEqual({ x: 400, y: 300 }); + }); + + it('should apply offsets correctly for letterboxed content', () => { + const letterboxed = { contentWidth: 300, contentHeight: 600, offsetX: 250, offsetY: 0 }; + expect(mapNormalizedToContent({ x: 0, y: 0 }, letterboxed)).toEqual({ x: 250, y: 0 }); + expect(mapNormalizedToContent({ x: 1, y: 1 }, letterboxed)).toEqual({ x: 550, y: 600 }); + }); + + it('should accept Size (zero offsets)', () => { + const size = { width: 800, height: 400 }; + expect(mapNormalizedToContent({ x: 0, y: 0 }, size)).toEqual({ x: 0, y: 0 }); + expect(mapNormalizedToContent({ x: 1, y: 1 }, size)).toEqual({ x: 800, y: 400 }); + expect(mapNormalizedToContent({ x: 0.5, y: 0.5 }, size)).toEqual({ x: 400, y: 200 }); + }); +}); + +describe('mapNormalizedRectToContent', () => { + const metrics = { contentWidth: 800, contentHeight: 400, offsetX: 0, offsetY: 100 }; + + it('should map a normalized rect to content pixel coordinates', () => { + const rect = mapNormalizedRectToContent({ x: 0.25, y: 0.25 }, { x: 0.75, y: 0.75 }, metrics); + expect(rect).toEqual({ left: 200, top: 200, width: 400, height: 200 }); + }); + + it('should map full image rect', () => { + const rect = mapNormalizedRectToContent({ x: 0, y: 0 }, { x: 1, y: 1 }, metrics); + expect(rect).toEqual({ left: 0, top: 100, width: 800, height: 400 }); + }); + + it('should handle letterboxed content with horizontal offsets', () => { + const letterboxed = { contentWidth: 300, contentHeight: 600, offsetX: 250, offsetY: 0 }; + const rect = mapNormalizedRectToContent({ x: 0, y: 0 }, { x: 1, y: 1 }, letterboxed); + expect(rect).toEqual({ left: 250, top: 0, width: 300, height: 600 }); + }); + + it('should accept Size (zero offsets)', () => { + const size = { width: 800, height: 400 }; + const rect = mapNormalizedRectToContent({ x: 0.25, y: 0.25 }, { x: 0.75, y: 0.75 }, size); + expect(rect).toEqual({ left: 200, top: 100, width: 400, height: 200 }); + }); +}); diff --git a/web/src/lib/utils/container-utils.ts b/web/src/lib/utils/container-utils.ts index ffa2fae769..36e260fcc7 100644 --- a/web/src/lib/utils/container-utils.ts +++ b/web/src/lib/utils/container-utils.ts @@ -1,14 +1,35 @@ -export interface ContentMetrics { +// Coordinate spaces used throughout the viewer: +// +// "Normalized": 0–1 range, (0,0) = top-left, (1,1) = bottom-right. Resolution-independent. +// Example: OCR coordinates, or face coords after dividing by metadata dimensions. +// +// "Content": pixel position within the container after scaling (scaleToFit/scaleToCover) +// and centering. Used for DOM overlay positioning (face boxes, OCR text). +// +// "Natural": pixel position in the original full-resolution image file (e.g. 4000×3000). +// Used when cropping or drawing on the source image. +// +// "Metadata pixel space": coordinates from face detection / OCR models, in pixels relative +// to face.imageWidth/imageHeight. Divide by those dimensions to get normalized coords. + +export type Point = { + x: number; + y: number; +}; + +export type Size = { + width: number; + height: number; +}; + +export type ContentMetrics = { contentWidth: number; contentHeight: number; offsetX: number; offsetY: number; -} +}; -export const scaleToCover = ( - dimensions: { width: number; height: number }, - container: { width: number; height: number }, -): { width: number; height: number } => { +export const scaleToCover = (dimensions: Size, container: Size): Size => { const scaleX = container.width / dimensions.width; const scaleY = container.height / dimensions.height; const scale = Math.max(scaleX, scaleY); @@ -18,10 +39,7 @@ export const scaleToCover = ( }; }; -export const scaleToFit = ( - dimensions: { width: number; height: number }, - container: { width: number; height: number }, -): { width: number; height: number } => { +export const scaleToFit = (dimensions: Size, container: Size): Size => { const scaleX = container.width / dimensions.width; const scaleY = container.height / dimensions.height; const scale = Math.min(scaleX, scaleY); @@ -31,14 +49,14 @@ export const scaleToFit = ( }; }; -const getElementSize = (element: HTMLImageElement | HTMLVideoElement): { width: number; height: number } => { +const getElementSize = (element: HTMLImageElement | HTMLVideoElement): Size => { if (element instanceof HTMLVideoElement) { return { width: element.clientWidth, height: element.clientHeight }; } return { width: element.width, height: element.height }; }; -export const getNaturalSize = (element: HTMLImageElement | HTMLVideoElement): { width: number; height: number } => { +export const getNaturalSize = (element: HTMLImageElement | HTMLVideoElement): Size => { if (element instanceof HTMLVideoElement) { return { width: element.videoWidth, height: element.videoHeight }; } @@ -56,3 +74,38 @@ export const getContentMetrics = (element: HTMLImageElement | HTMLVideoElement): offsetY: (client.height - contentHeight) / 2, }; }; + +export function mapNormalizedToContent(point: Point, sizeOrMetrics: Size | ContentMetrics): Point { + if ('contentWidth' in sizeOrMetrics) { + return { + x: point.x * sizeOrMetrics.contentWidth + sizeOrMetrics.offsetX, + y: point.y * sizeOrMetrics.contentHeight + sizeOrMetrics.offsetY, + }; + } + return { + x: point.x * sizeOrMetrics.width, + y: point.y * sizeOrMetrics.height, + }; +} + +export type Rect = { + top: number; + left: number; + width: number; + height: number; +}; + +export function mapNormalizedRectToContent( + topLeft: Point, + bottomRight: Point, + sizeOrMetrics: Size | ContentMetrics, +): Rect { + const tl = mapNormalizedToContent(topLeft, sizeOrMetrics); + const br = mapNormalizedToContent(bottomRight, sizeOrMetrics); + return { + top: tl.y, + left: tl.x, + width: br.x - tl.x, + height: br.y - tl.y, + }; +} diff --git a/web/src/lib/utils/ocr-utils.spec.ts b/web/src/lib/utils/ocr-utils.spec.ts index c3ce70394d..c88936fddf 100644 --- a/web/src/lib/utils/ocr-utils.spec.ts +++ b/web/src/lib/utils/ocr-utils.spec.ts @@ -1,5 +1,5 @@ import type { OcrBoundingBox } from '$lib/stores/ocr.svelte'; -import type { ContentMetrics } from '$lib/utils/container-utils'; +import type { Size } from '$lib/utils/container-utils'; import { getOcrBoundingBoxes } from '$lib/utils/ocr-utils'; describe('getOcrBoundingBoxes', () => { @@ -21,9 +21,9 @@ describe('getOcrBoundingBoxes', () => { text: 'hello', }, ]; - const metrics: ContentMetrics = { contentWidth: 1000, contentHeight: 500, offsetX: 0, offsetY: 0 }; + const imageSize: Size = { width: 1000, height: 500 }; - const boxes = getOcrBoundingBoxes(ocrData, metrics); + const boxes = getOcrBoundingBoxes(ocrData, imageSize); expect(boxes).toHaveLength(1); expect(boxes[0].id).toBe('box1'); @@ -37,7 +37,7 @@ describe('getOcrBoundingBoxes', () => { ]); }); - it('should apply offsets for letterboxed images', () => { + it('should map full-image box to full display area', () => { const ocrData: OcrBoundingBox[] = [ { id: 'box1', @@ -55,21 +55,20 @@ describe('getOcrBoundingBoxes', () => { text: 'test', }, ]; - const metrics: ContentMetrics = { contentWidth: 600, contentHeight: 400, offsetX: 100, offsetY: 50 }; + const imageSize: Size = { width: 600, height: 400 }; - const boxes = getOcrBoundingBoxes(ocrData, metrics); + const boxes = getOcrBoundingBoxes(ocrData, imageSize); expect(boxes[0].points).toEqual([ - { x: 100, y: 50 }, - { x: 700, y: 50 }, - { x: 700, y: 450 }, - { x: 100, y: 450 }, + { x: 0, y: 0 }, + { x: 600, y: 0 }, + { x: 600, y: 400 }, + { x: 0, y: 400 }, ]); }); it('should return empty array for empty input', () => { - const metrics: ContentMetrics = { contentWidth: 800, contentHeight: 600, offsetX: 0, offsetY: 0 }; - expect(getOcrBoundingBoxes([], metrics)).toEqual([]); + expect(getOcrBoundingBoxes([], { width: 800, height: 600 })).toEqual([]); }); it('should handle multiple boxes', () => { @@ -105,9 +104,9 @@ describe('getOcrBoundingBoxes', () => { text: 'second', }, ]; - const metrics: ContentMetrics = { contentWidth: 200, contentHeight: 200, offsetX: 0, offsetY: 0 }; + const imageSize: Size = { width: 200, height: 200 }; - const boxes = getOcrBoundingBoxes(ocrData, metrics); + const boxes = getOcrBoundingBoxes(ocrData, imageSize); expect(boxes).toHaveLength(2); expect(boxes[0].text).toBe('first'); diff --git a/web/src/lib/utils/ocr-utils.ts b/web/src/lib/utils/ocr-utils.ts index c483eb9551..2bf23bff2d 100644 --- a/web/src/lib/utils/ocr-utils.ts +++ b/web/src/lib/utils/ocr-utils.ts @@ -1,23 +1,19 @@ import type { OcrBoundingBox } from '$lib/stores/ocr.svelte'; -import type { ContentMetrics } from '$lib/utils/container-utils'; +import { mapNormalizedToContent, type Point, type Size } from '$lib/utils/container-utils'; import { clamp } from 'lodash-es'; - -export type Point = { - x: number; - y: number; -}; +export type { Point } from '$lib/utils/container-utils'; const distance = (p1: Point, p2: Point) => Math.hypot(p2.x - p1.x, p2.y - p1.y); export type VerticalMode = 'none' | 'cjk' | 'rotated'; -export interface OcrBox { +export type OcrBox = { id: string; points: Point[]; text: string; confidence: number; verticalMode: VerticalMode; -} +}; const CJK_PATTERN = /[\u3000-\u303F\u3040-\u309F\u30A0-\u30FF\u3400-\u4DBF\u4E00-\u9FFF\uF900-\uFAFF\uAC00-\uD7AF\uFF00-\uFFEF]/; @@ -38,7 +34,7 @@ const getVerticalMode = (width: number, height: number, text: string): VerticalM * @param points - Array of 4 corner points of the bounding box * @returns 4x4 matrix to transform the div with text onto the polygon defined by the corner points, and size to set on the source div. */ -export const calculateBoundingBoxMatrix = (points: Point[]): { matrix: number[]; width: number; height: number } => { +export const calculateBoundingBoxMatrix = (points: Point[]): Size & { matrix: number[] } => { const [topLeft, topRight, bottomRight, bottomLeft] = points; const width = Math.max(distance(topLeft, topRight), distance(bottomLeft, bottomRight)); @@ -163,7 +159,7 @@ export const calculateFittedFontSize = ( return clamp(Math.min(scaleFromWidth, scaleFromHeight), MIN_FONT_SIZE, MAX_FONT_SIZE); }; -export const getOcrBoundingBoxes = (ocrData: OcrBoundingBox[], metrics: ContentMetrics): OcrBox[] => { +export const getOcrBoundingBoxes = (ocrData: OcrBoundingBox[], imageSize: Size): OcrBox[] => { const boxes: OcrBox[] = []; for (const ocr of ocrData) { const points = [ @@ -171,10 +167,7 @@ export const getOcrBoundingBoxes = (ocrData: OcrBoundingBox[], metrics: ContentM { x: ocr.x2, y: ocr.y2 }, { x: ocr.x3, y: ocr.y3 }, { x: ocr.x4, y: ocr.y4 }, - ].map((point) => ({ - x: point.x * metrics.contentWidth + metrics.offsetX, - y: point.y * metrics.contentHeight + metrics.offsetY, - })); + ].map((point) => mapNormalizedToContent(point, imageSize)); const boxWidth = Math.max(distance(points[0], points[1]), distance(points[3], points[2])); const boxHeight = Math.max(distance(points[0], points[3]), distance(points[1], points[2])); @@ -188,7 +181,7 @@ export const getOcrBoundingBoxes = (ocrData: OcrBoundingBox[], metrics: ContentM }); } - const rowThreshold = metrics.contentHeight * 0.02; + const rowThreshold = imageSize.height * 0.02; boxes.sort((a, b) => { const yDifference = a.points[0].y - b.points[0].y; if (Math.abs(yDifference) < rowThreshold) { diff --git a/web/src/lib/utils/people-utils.spec.ts b/web/src/lib/utils/people-utils.spec.ts index 80371bd9c4..f27a1855b5 100644 --- a/web/src/lib/utils/people-utils.spec.ts +++ b/web/src/lib/utils/people-utils.spec.ts @@ -1,5 +1,5 @@ import type { Faces } from '$lib/stores/people.store'; -import type { ContentMetrics } from '$lib/utils/container-utils'; +import type { Size } from '$lib/utils/container-utils'; import { getBoundingBox } from '$lib/utils/people-utils'; const makeFace = (overrides: Partial = {}): Faces => ({ @@ -16,21 +16,21 @@ const makeFace = (overrides: Partial = {}): Faces => ({ describe('getBoundingBox', () => { it('should scale face coordinates to display dimensions', () => { const face = makeFace(); - const metrics: ContentMetrics = { contentWidth: 800, contentHeight: 600, offsetX: 0, offsetY: 0 }; + const imageSize: Size = { width: 800, height: 600 }; - const boxes = getBoundingBox([face], metrics); + const boxes = getBoundingBox([face], imageSize); expect(boxes).toHaveLength(1); expect(boxes[0]).toEqual({ id: 'face-1', - top: Math.round(600 * (750 / 3000)), - left: Math.round(800 * (1000 / 4000)), - width: Math.round(800 * (2000 / 4000) - 800 * (1000 / 4000)), - height: Math.round(600 * (1500 / 3000) - 600 * (750 / 3000)), + top: 600 * (750 / 3000), + left: 800 * (1000 / 4000), + width: 800 * (2000 / 4000) - 800 * (1000 / 4000), + height: 600 * (1500 / 3000) - 600 * (750 / 3000), }); }); - it('should apply offsets for letterboxed display', () => { + it('should map full-image face to full display area', () => { const face = makeFace({ imageWidth: 1000, imageHeight: 1000, @@ -39,49 +39,21 @@ describe('getBoundingBox', () => { boundingBoxX2: 1000, boundingBoxY2: 1000, }); - const metrics: ContentMetrics = { contentWidth: 600, contentHeight: 600, offsetX: 100, offsetY: 0 }; + const imageSize: Size = { width: 600, height: 600 }; - const boxes = getBoundingBox([face], metrics); + const boxes = getBoundingBox([face], imageSize); expect(boxes[0]).toEqual({ id: 'face-1', top: 0, - left: 100, + left: 0, width: 600, height: 600, }); }); - it('should handle zoom by pre-scaled metrics', () => { - const face = makeFace({ - imageWidth: 1000, - imageHeight: 1000, - boundingBoxX1: 0, - boundingBoxY1: 0, - boundingBoxX2: 500, - boundingBoxY2: 500, - }); - const metrics: ContentMetrics = { - contentWidth: 1600, - contentHeight: 1200, - offsetX: -200, - offsetY: -100, - }; - - const boxes = getBoundingBox([face], metrics); - - expect(boxes[0]).toEqual({ - id: 'face-1', - top: -100, - left: -200, - width: 800, - height: 600, - }); - }); - it('should return empty array for empty faces', () => { - const metrics: ContentMetrics = { contentWidth: 800, contentHeight: 600, offsetX: 0, offsetY: 0 }; - expect(getBoundingBox([], metrics)).toEqual([]); + expect(getBoundingBox([], { width: 800, height: 600 })).toEqual([]); }); it('should handle multiple faces', () => { @@ -89,9 +61,8 @@ describe('getBoundingBox', () => { makeFace({ id: 'face-1', boundingBoxX1: 0, boundingBoxY1: 0, boundingBoxX2: 1000, boundingBoxY2: 1000 }), makeFace({ id: 'face-2', boundingBoxX1: 2000, boundingBoxY1: 1500, boundingBoxX2: 3000, boundingBoxY2: 2500 }), ]; - const metrics: ContentMetrics = { contentWidth: 800, contentHeight: 600, offsetX: 0, offsetY: 0 }; - const boxes = getBoundingBox(faces, metrics); + const boxes = getBoundingBox(faces, { width: 800, height: 600 }); expect(boxes).toHaveLength(2); expect(boxes[0].left).toBeLessThan(boxes[1].left); diff --git a/web/src/lib/utils/people-utils.ts b/web/src/lib/utils/people-utils.ts index b8fb8973e6..f7f9f4ee42 100644 --- a/web/src/lib/utils/people-utils.ts +++ b/web/src/lib/utils/people-utils.ts @@ -1,37 +1,21 @@ import type { Faces } from '$lib/stores/people.store'; import { getAssetMediaUrl } from '$lib/utils'; -import type { ContentMetrics } from '$lib/utils/container-utils'; +import { mapNormalizedRectToContent, type Rect, type Size } from '$lib/utils/container-utils'; import { AssetTypeEnum, type AssetFaceResponseDto } from '@immich/sdk'; -export interface BoundingBox { - id: string; - top: number; - left: number; - width: number; - height: number; -} +export type BoundingBox = Rect & { id: string }; -export const getBoundingBox = (faces: Faces[], metrics: ContentMetrics): BoundingBox[] => { +export const getBoundingBox = (faces: Faces[], imageSize: Size): BoundingBox[] => { const boxes: BoundingBox[] = []; for (const face of faces) { - const scaleX = metrics.contentWidth / face.imageWidth; - const scaleY = metrics.contentHeight / face.imageHeight; + const rect = mapNormalizedRectToContent( + { x: face.boundingBoxX1 / face.imageWidth, y: face.boundingBoxY1 / face.imageHeight }, + { x: face.boundingBoxX2 / face.imageWidth, y: face.boundingBoxY2 / face.imageHeight }, + imageSize, + ); - const coordinates = { - x1: scaleX * face.boundingBoxX1 + metrics.offsetX, - x2: scaleX * face.boundingBoxX2 + metrics.offsetX, - y1: scaleY * face.boundingBoxY1 + metrics.offsetY, - y2: scaleY * face.boundingBoxY2 + metrics.offsetY, - }; - - boxes.push({ - id: face.id, - top: Math.round(coordinates.y1), - left: Math.round(coordinates.x1), - width: Math.round(coordinates.x2 - coordinates.x1), - height: Math.round(coordinates.y2 - coordinates.y1), - }); + boxes.push({ id: face.id, ...rect }); } return boxes;