From 0e627ba004ef114f3546d619424f5e48fa6f4ba6 Mon Sep 17 00:00:00 2001 From: Mees Frensel Date: Thu, 29 Jan 2026 18:47:35 +0100 Subject: [PATCH] feat(web): show ocr text boxes in panoramas --- .../asset-viewer/asset-viewer.svelte | 1 - .../asset-viewer/ocr-bounding-box.svelte | 27 ++-- .../photo-sphere-viewer-adapter.svelte | 103 ++++++++++++++- web/src/lib/utils/ocr-utils.ts | 122 +++++++++--------- 4 files changed, 170 insertions(+), 83 deletions(-) diff --git a/web/src/lib/components/asset-viewer/asset-viewer.svelte b/web/src/lib/components/asset-viewer/asset-viewer.svelte index 333e76d2ea..1a04f45880 100644 --- a/web/src/lib/components/asset-viewer/asset-viewer.svelte +++ b/web/src/lib/components/asset-viewer/asset-viewer.svelte @@ -424,7 +424,6 @@ const showOcrButton = $derived( $slideshowState === SlideshowState.None && asset.type === AssetTypeEnum.Image && - !(asset.exifInfo?.projectionType === 'EQUIRECTANGULAR') && !assetViewerManager.isShowEditor && ocrManager.hasOcrData, ); diff --git a/web/src/lib/components/asset-viewer/ocr-bounding-box.svelte b/web/src/lib/components/asset-viewer/ocr-bounding-box.svelte index e64b674ac1..6f6caad0fc 100644 --- a/web/src/lib/components/asset-viewer/ocr-bounding-box.svelte +++ b/web/src/lib/components/asset-viewer/ocr-bounding-box.svelte @@ -1,6 +1,6 @@ -
- +
- - -
{ocrBox.text}
diff --git a/web/src/lib/components/asset-viewer/photo-sphere-viewer-adapter.svelte b/web/src/lib/components/asset-viewer/photo-sphere-viewer-adapter.svelte index f671aa1b1c..f4ba6868e0 100644 --- a/web/src/lib/components/asset-viewer/photo-sphere-viewer-adapter.svelte +++ b/web/src/lib/components/asset-viewer/photo-sphere-viewer-adapter.svelte @@ -2,8 +2,10 @@ import { shortcuts } from '$lib/actions/shortcut'; import AssetViewerEvents from '$lib/components/AssetViewerEvents.svelte'; import { assetViewerManager } from '$lib/managers/asset-viewer-manager.svelte'; + import { ocrManager, type OcrBoundingBox } from '$lib/stores/ocr.svelte'; import { boundingBoxesArray, type Faces } from '$lib/stores/people.store'; import { alwaysLoadOriginalFile } from '$lib/stores/preferences.store'; + import { calculateBoundingBoxMatrix, getOcrBoundingBoxesAtSize, type Point } from '$lib/utils/ocr-utils'; import { EquirectangularAdapter, Viewer, @@ -27,6 +29,17 @@ strokeLinejoin: 'round', }; + // Adapted as well as possible from classlist 'border-2 border-blue-500 bg-blue-500/10 hover:border-blue-600 hover:border-3' + const OCR_BOX_SVG_STYLE = { + fill: 'var(--color-blue-500)', + fillOpacity: '0.1', + stroke: 'var(--color-blue-500)', + strokeWidth: '2px', + }; + + const OCR_TOOLTIP_HTML_CLASS = + 'flex items-center justify-center text-white bg-black/50 cursor-text pointer-events-auto whitespace-pre-wrap wrap-break-word select-text'; + type Props = { panorama: string | { source: string }; originalPanorama?: string | { source: string }; @@ -96,6 +109,59 @@ } }); + $effect(() => { + updateOcrBoxes(ocrManager.showOverlay, ocrManager.data); + }); + + /** Use updateOnly=true on zoom, pan, or resize. */ + const updateOcrBoxes = (showOverlay: boolean, ocrData: OcrBoundingBox[], updateOnly = false) => { + if (!viewer || !viewer.state.textureData || !viewer.getPlugin(MarkersPlugin)) { + return; + } + const markersPlugin = viewer.getPlugin(MarkersPlugin); + if (!showOverlay) { + markersPlugin.clearMarkers(); + return; + } + if (!updateOnly) { + markersPlugin.clearMarkers(); + } + + const boxes = getOcrBoundingBoxesAtSize(ocrData, { + width: viewer.state.textureData.panoData.croppedWidth, + height: viewer.state.textureData.panoData.croppedHeight, + }); + + for (const [index, box] of boxes.entries()) { + const points = box.points.map((p) => texturePointToViewerPoint(viewer, p)); + const { matrix, width, height } = calculateBoundingBoxMatrix(points); + + const fontSize = (1.4 * width) / box.text.length; // fits almost all strings within the box, depends on font family + const transform = `matrix3d(${matrix.join(',')})`; + const content = `
${box.text}
`; + + if (updateOnly) { + markersPlugin.updateMarker({ + id: `box_${index}`, + polygonPixels: box.points.map((b) => [b.x, b.y]), + tooltip: { content }, + }); + } else { + markersPlugin.addMarker({ + id: `box_${index}`, + polygonPixels: box.points.map((b) => [b.x, b.y]), + svgStyle: OCR_BOX_SVG_STYLE, + tooltip: { content, trigger: 'click' }, + }); + } + } + }; + + const texturePointToViewerPoint = (viewer: Viewer, point: Point) => { + const spherical = viewer.dataHelper.textureCoordsToSphericalCoords({ textureX: point.x, textureY: point.y }); + return viewer.dataHelper.sphericalCoordsToViewerCoords(spherical); + }; + const onZoom = () => { viewer?.animate({ zoom: assetViewerManager.zoom > 1 ? 50 : 83.3, speed: 250 }); }; @@ -160,7 +226,20 @@ viewer.addEventListener(events.ZoomUpdatedEvent.type, zoomHandler, { passive: true }); } - return () => viewer.removeEventListener(events.ZoomUpdatedEvent.type, zoomHandler); + const onReadyHandler = () => updateOcrBoxes(ocrManager.showOverlay, ocrManager.data, false); + const updateHandler = () => updateOcrBoxes(ocrManager.showOverlay, ocrManager.data, true); + viewer.addEventListener(events.ReadyEvent.type, onReadyHandler); + viewer.addEventListener(events.PositionUpdatedEvent.type, updateHandler); + viewer.addEventListener(events.SizeUpdatedEvent.type, updateHandler); + viewer.addEventListener(events.ZoomUpdatedEvent.type, updateHandler, { passive: true }); + + return () => { + viewer.removeEventListener(events.ReadyEvent.type, onReadyHandler); + viewer.removeEventListener(events.PositionUpdatedEvent.type, updateHandler); + viewer.removeEventListener(events.SizeUpdatedEvent.type, updateHandler); + viewer.removeEventListener(events.ZoomUpdatedEvent.type, updateHandler); + viewer.removeEventListener(events.ZoomUpdatedEvent.type, zoomHandler); + }; }); onDestroy(() => { @@ -176,3 +255,25 @@
+ + diff --git a/web/src/lib/utils/ocr-utils.ts b/web/src/lib/utils/ocr-utils.ts index 97364d06f5..01f118a4e5 100644 --- a/web/src/lib/utils/ocr-utils.ts +++ b/web/src/lib/utils/ocr-utils.ts @@ -12,70 +12,58 @@ const getContainedSize = (img: HTMLImageElement): { width: number; height: numbe return { width, height }; }; +export type Point = { + x: number; + y: number; +}; + export interface OcrBox { id: string; - points: { x: number; y: number }[]; + points: Point[]; text: string; confidence: number; } -export interface BoundingBoxDimensions { - minX: number; - maxX: number; - minY: number; - maxY: number; - width: number; - height: number; - centerX: number; - centerY: number; - rotation: number; - skewX: number; - skewY: number; -} - /** - * Calculate bounding box dimensions and properties from OCR points + * Calculate bounding box transform from OCR points. Result matrix can be used as input for css matrix3d. * @param points - Array of 4 corner points of the bounding box - * @returns Dimensions, rotation, and skew values for the bounding box + * @returns 4x4 matrix to transform the div with text onto the polygon defined by the corner points, and size to set on the source div. */ -export const calculateBoundingBoxDimensions = (points: { x: number; y: number }[]): BoundingBoxDimensions => { +export const calculateBoundingBoxMatrix = (points: Point[]): { matrix: number[]; width: number; height: number } => { const [topLeft, topRight, bottomRight, bottomLeft] = points; - const minX = Math.min(...points.map(({ x }) => x)); - const maxX = Math.max(...points.map(({ x }) => x)); - const minY = Math.min(...points.map(({ y }) => y)); - const maxY = Math.max(...points.map(({ y }) => y)); - const width = maxX - minX; - const height = maxY - minY; - const centerX = (minX + maxX) / 2; - const centerY = (minY + maxY) / 2; - // Calculate rotation angle from the bottom edge (bottomLeft to bottomRight) - const rotation = Math.atan2(bottomRight.y - bottomLeft.y, bottomRight.x - bottomLeft.x) * (180 / Math.PI); + // Approximate width and height to prevent text distortion as much as possible + const distance = (p1: Point, p2: Point) => Math.hypot(p2.x - p1.x, p2.y - p1.y); + const width = Math.max(distance(topLeft, topRight), distance(bottomLeft, bottomRight)); + const height = Math.max(distance(topLeft, bottomLeft), distance(topRight, bottomRight)); - // Calculate skew angles to handle perspective distortion - // SkewX: compare left and right edges - const leftEdgeAngle = Math.atan2(bottomLeft.y - topLeft.y, bottomLeft.x - topLeft.x); - const rightEdgeAngle = Math.atan2(bottomRight.y - topRight.y, bottomRight.x - topRight.x); - const skewX = (rightEdgeAngle - leftEdgeAngle) * (180 / Math.PI); + const dx1 = topRight.x - bottomRight.x; + const dx2 = bottomLeft.x - bottomRight.x; + const dx3 = topLeft.x - topRight.x + bottomRight.x - bottomLeft.x; - // SkewY: compare top and bottom edges - const topEdgeAngle = Math.atan2(topRight.y - topLeft.y, topRight.x - topLeft.x); - const bottomEdgeAngle = Math.atan2(bottomRight.y - bottomLeft.y, bottomRight.x - bottomLeft.x); - const skewY = (bottomEdgeAngle - topEdgeAngle) * (180 / Math.PI); + const dy1 = topRight.y - bottomRight.y; + const dy2 = bottomLeft.y - bottomRight.y; + const dy3 = topLeft.y - topRight.y + bottomRight.y - bottomLeft.y; - return { - minX, - maxX, - minY, - maxY, - width, - height, - centerX, - centerY, - rotation, - skewX, - skewY, - }; + const det = dx1 * dy2 - dx2 * dy1; + const a13 = (dx3 * dy2 - dx2 * dy3) / det; + const a23 = (dx1 * dy3 - dx3 * dy1) / det; + + const a11 = (1 + a13) * topRight.x - topLeft.x; + const a21 = (1 + a23) * bottomLeft.x - topLeft.x; + + const a12 = (1 + a13) * topRight.y - topLeft.y; + const a22 = (1 + a23) * bottomLeft.y - topLeft.y; + + // prettier-ignore + const matrix = [ + a11 / width, a12 / width, 0, a13 / width, + a21 / height, a22 / height, 0, a23 / height, + 0, 0, 1, 0, + topLeft.x, topLeft.y, 0, 1, + ]; + + return { matrix, width, height }; }; /** @@ -87,18 +75,32 @@ export const getOcrBoundingBoxes = ( zoom: ZoomImageWheelState, photoViewer: HTMLImageElement | null, ): OcrBox[] => { - const boxes: OcrBox[] = []; - if (photoViewer === null || !photoViewer.naturalWidth || !photoViewer.naturalHeight) { - return boxes; + return []; } const clientHeight = photoViewer.clientHeight; const clientWidth = photoViewer.clientWidth; const { width, height } = getContainedSize(photoViewer); - const imageWidth = photoViewer.naturalWidth; - const imageHeight = photoViewer.naturalHeight; + const offset = { + x: ((clientWidth - width) / 2) * zoom.currentZoom + zoom.currentPositionX, + y: ((clientHeight - height) / 2) * zoom.currentZoom + zoom.currentPositionY, + }; + + return getOcrBoundingBoxesAtSize( + ocrData, + { width: width * zoom.currentZoom, height: height * zoom.currentZoom }, + offset, + ); +}; + +export const getOcrBoundingBoxesAtSize = ( + ocrData: OcrBoundingBox[], + targetSize: { width: number; height: number }, + offset?: Point, +) => { + const boxes: OcrBox[] = []; for (const ocr of ocrData) { // Convert normalized coordinates (0-1) to actual pixel positions @@ -109,14 +111,8 @@ export const getOcrBoundingBoxes = ( { x: ocr.x3, y: ocr.y3 }, { x: ocr.x4, y: ocr.y4 }, ].map((point) => ({ - x: - (width / imageWidth) * zoom.currentZoom * point.x * imageWidth + - ((clientWidth - width) / 2) * zoom.currentZoom + - zoom.currentPositionX, - y: - (height / imageHeight) * zoom.currentZoom * point.y * imageHeight + - ((clientHeight - height) / 2) * zoom.currentZoom + - zoom.currentPositionY, + x: targetSize.width * point.x + (offset?.x ?? 0), + y: targetSize.height * point.y + (offset?.y ?? 0), })); boxes.push({