-
-
-
{ocrBox.text}
diff --git a/web/src/lib/components/asset-viewer/photo-sphere-viewer-adapter.svelte b/web/src/lib/components/asset-viewer/photo-sphere-viewer-adapter.svelte
index f671aa1b1c..f4ba6868e0 100644
--- a/web/src/lib/components/asset-viewer/photo-sphere-viewer-adapter.svelte
+++ b/web/src/lib/components/asset-viewer/photo-sphere-viewer-adapter.svelte
@@ -2,8 +2,10 @@
import { shortcuts } from '$lib/actions/shortcut';
import AssetViewerEvents from '$lib/components/AssetViewerEvents.svelte';
import { assetViewerManager } from '$lib/managers/asset-viewer-manager.svelte';
+ import { ocrManager, type OcrBoundingBox } from '$lib/stores/ocr.svelte';
import { boundingBoxesArray, type Faces } from '$lib/stores/people.store';
import { alwaysLoadOriginalFile } from '$lib/stores/preferences.store';
+ import { calculateBoundingBoxMatrix, getOcrBoundingBoxesAtSize, type Point } from '$lib/utils/ocr-utils';
import {
EquirectangularAdapter,
Viewer,
@@ -27,6 +29,17 @@
strokeLinejoin: 'round',
};
+ // Adapted as well as possible from classlist 'border-2 border-blue-500 bg-blue-500/10 hover:border-blue-600 hover:border-3'
+ const OCR_BOX_SVG_STYLE = {
+ fill: 'var(--color-blue-500)',
+ fillOpacity: '0.1',
+ stroke: 'var(--color-blue-500)',
+ strokeWidth: '2px',
+ };
+
+ const OCR_TOOLTIP_HTML_CLASS =
+ 'flex items-center justify-center text-white bg-black/50 cursor-text pointer-events-auto whitespace-pre-wrap wrap-break-word select-text';
+
type Props = {
panorama: string | { source: string };
originalPanorama?: string | { source: string };
@@ -96,6 +109,59 @@
}
});
+ $effect(() => {
+ updateOcrBoxes(ocrManager.showOverlay, ocrManager.data);
+ });
+
+ /** Use updateOnly=true on zoom, pan, or resize. */
+ const updateOcrBoxes = (showOverlay: boolean, ocrData: OcrBoundingBox[], updateOnly = false) => {
+ if (!viewer || !viewer.state.textureData || !viewer.getPlugin(MarkersPlugin)) {
+ return;
+ }
+ const markersPlugin = viewer.getPlugin
(MarkersPlugin);
+ if (!showOverlay) {
+ markersPlugin.clearMarkers();
+ return;
+ }
+ if (!updateOnly) {
+ markersPlugin.clearMarkers();
+ }
+
+ const boxes = getOcrBoundingBoxesAtSize(ocrData, {
+ width: viewer.state.textureData.panoData.croppedWidth,
+ height: viewer.state.textureData.panoData.croppedHeight,
+ });
+
+ for (const [index, box] of boxes.entries()) {
+ const points = box.points.map((p) => texturePointToViewerPoint(viewer, p));
+ const { matrix, width, height } = calculateBoundingBoxMatrix(points);
+
+ const fontSize = (1.4 * width) / box.text.length; // fits almost all strings within the box, depends on font family
+ const transform = `matrix3d(${matrix.join(',')})`;
+ const content = `${box.text}
`;
+
+ if (updateOnly) {
+ markersPlugin.updateMarker({
+ id: `box_${index}`,
+ polygonPixels: box.points.map((b) => [b.x, b.y]),
+ tooltip: { content },
+ });
+ } else {
+ markersPlugin.addMarker({
+ id: `box_${index}`,
+ polygonPixels: box.points.map((b) => [b.x, b.y]),
+ svgStyle: OCR_BOX_SVG_STYLE,
+ tooltip: { content, trigger: 'click' },
+ });
+ }
+ }
+ };
+
+ const texturePointToViewerPoint = (viewer: Viewer, point: Point) => {
+ const spherical = viewer.dataHelper.textureCoordsToSphericalCoords({ textureX: point.x, textureY: point.y });
+ return viewer.dataHelper.sphericalCoordsToViewerCoords(spherical);
+ };
+
const onZoom = () => {
viewer?.animate({ zoom: assetViewerManager.zoom > 1 ? 50 : 83.3, speed: 250 });
};
@@ -160,7 +226,20 @@
viewer.addEventListener(events.ZoomUpdatedEvent.type, zoomHandler, { passive: true });
}
- return () => viewer.removeEventListener(events.ZoomUpdatedEvent.type, zoomHandler);
+ const onReadyHandler = () => updateOcrBoxes(ocrManager.showOverlay, ocrManager.data, false);
+ const updateHandler = () => updateOcrBoxes(ocrManager.showOverlay, ocrManager.data, true);
+ viewer.addEventListener(events.ReadyEvent.type, onReadyHandler);
+ viewer.addEventListener(events.PositionUpdatedEvent.type, updateHandler);
+ viewer.addEventListener(events.SizeUpdatedEvent.type, updateHandler);
+ viewer.addEventListener(events.ZoomUpdatedEvent.type, updateHandler, { passive: true });
+
+ return () => {
+ viewer.removeEventListener(events.ReadyEvent.type, onReadyHandler);
+ viewer.removeEventListener(events.PositionUpdatedEvent.type, updateHandler);
+ viewer.removeEventListener(events.SizeUpdatedEvent.type, updateHandler);
+ viewer.removeEventListener(events.ZoomUpdatedEvent.type, updateHandler);
+ viewer.removeEventListener(events.ZoomUpdatedEvent.type, zoomHandler);
+ };
});
onDestroy(() => {
@@ -176,3 +255,25 @@
+
+
diff --git a/web/src/lib/utils/ocr-utils.ts b/web/src/lib/utils/ocr-utils.ts
index 97364d06f5..01f118a4e5 100644
--- a/web/src/lib/utils/ocr-utils.ts
+++ b/web/src/lib/utils/ocr-utils.ts
@@ -12,70 +12,58 @@ const getContainedSize = (img: HTMLImageElement): { width: number; height: numbe
return { width, height };
};
+export type Point = {
+ x: number;
+ y: number;
+};
+
export interface OcrBox {
id: string;
- points: { x: number; y: number }[];
+ points: Point[];
text: string;
confidence: number;
}
-export interface BoundingBoxDimensions {
- minX: number;
- maxX: number;
- minY: number;
- maxY: number;
- width: number;
- height: number;
- centerX: number;
- centerY: number;
- rotation: number;
- skewX: number;
- skewY: number;
-}
-
/**
- * Calculate bounding box dimensions and properties from OCR points
+ * Calculate bounding box transform from OCR points. Result matrix can be used as input for css matrix3d.
* @param points - Array of 4 corner points of the bounding box
- * @returns Dimensions, rotation, and skew values for the bounding box
+ * @returns 4x4 matrix to transform the div with text onto the polygon defined by the corner points, and size to set on the source div.
*/
-export const calculateBoundingBoxDimensions = (points: { x: number; y: number }[]): BoundingBoxDimensions => {
+export const calculateBoundingBoxMatrix = (points: Point[]): { matrix: number[]; width: number; height: number } => {
const [topLeft, topRight, bottomRight, bottomLeft] = points;
- const minX = Math.min(...points.map(({ x }) => x));
- const maxX = Math.max(...points.map(({ x }) => x));
- const minY = Math.min(...points.map(({ y }) => y));
- const maxY = Math.max(...points.map(({ y }) => y));
- const width = maxX - minX;
- const height = maxY - minY;
- const centerX = (minX + maxX) / 2;
- const centerY = (minY + maxY) / 2;
- // Calculate rotation angle from the bottom edge (bottomLeft to bottomRight)
- const rotation = Math.atan2(bottomRight.y - bottomLeft.y, bottomRight.x - bottomLeft.x) * (180 / Math.PI);
+ // Approximate width and height to prevent text distortion as much as possible
+ const distance = (p1: Point, p2: Point) => Math.hypot(p2.x - p1.x, p2.y - p1.y);
+ const width = Math.max(distance(topLeft, topRight), distance(bottomLeft, bottomRight));
+ const height = Math.max(distance(topLeft, bottomLeft), distance(topRight, bottomRight));
- // Calculate skew angles to handle perspective distortion
- // SkewX: compare left and right edges
- const leftEdgeAngle = Math.atan2(bottomLeft.y - topLeft.y, bottomLeft.x - topLeft.x);
- const rightEdgeAngle = Math.atan2(bottomRight.y - topRight.y, bottomRight.x - topRight.x);
- const skewX = (rightEdgeAngle - leftEdgeAngle) * (180 / Math.PI);
+ const dx1 = topRight.x - bottomRight.x;
+ const dx2 = bottomLeft.x - bottomRight.x;
+ const dx3 = topLeft.x - topRight.x + bottomRight.x - bottomLeft.x;
- // SkewY: compare top and bottom edges
- const topEdgeAngle = Math.atan2(topRight.y - topLeft.y, topRight.x - topLeft.x);
- const bottomEdgeAngle = Math.atan2(bottomRight.y - bottomLeft.y, bottomRight.x - bottomLeft.x);
- const skewY = (bottomEdgeAngle - topEdgeAngle) * (180 / Math.PI);
+ const dy1 = topRight.y - bottomRight.y;
+ const dy2 = bottomLeft.y - bottomRight.y;
+ const dy3 = topLeft.y - topRight.y + bottomRight.y - bottomLeft.y;
- return {
- minX,
- maxX,
- minY,
- maxY,
- width,
- height,
- centerX,
- centerY,
- rotation,
- skewX,
- skewY,
- };
+ const det = dx1 * dy2 - dx2 * dy1;
+ const a13 = (dx3 * dy2 - dx2 * dy3) / det;
+ const a23 = (dx1 * dy3 - dx3 * dy1) / det;
+
+ const a11 = (1 + a13) * topRight.x - topLeft.x;
+ const a21 = (1 + a23) * bottomLeft.x - topLeft.x;
+
+ const a12 = (1 + a13) * topRight.y - topLeft.y;
+ const a22 = (1 + a23) * bottomLeft.y - topLeft.y;
+
+ // prettier-ignore
+ const matrix = [
+ a11 / width, a12 / width, 0, a13 / width,
+ a21 / height, a22 / height, 0, a23 / height,
+ 0, 0, 1, 0,
+ topLeft.x, topLeft.y, 0, 1,
+ ];
+
+ return { matrix, width, height };
};
/**
@@ -87,18 +75,32 @@ export const getOcrBoundingBoxes = (
zoom: ZoomImageWheelState,
photoViewer: HTMLImageElement | null,
): OcrBox[] => {
- const boxes: OcrBox[] = [];
-
if (photoViewer === null || !photoViewer.naturalWidth || !photoViewer.naturalHeight) {
- return boxes;
+ return [];
}
const clientHeight = photoViewer.clientHeight;
const clientWidth = photoViewer.clientWidth;
const { width, height } = getContainedSize(photoViewer);
- const imageWidth = photoViewer.naturalWidth;
- const imageHeight = photoViewer.naturalHeight;
+ const offset = {
+ x: ((clientWidth - width) / 2) * zoom.currentZoom + zoom.currentPositionX,
+ y: ((clientHeight - height) / 2) * zoom.currentZoom + zoom.currentPositionY,
+ };
+
+ return getOcrBoundingBoxesAtSize(
+ ocrData,
+ { width: width * zoom.currentZoom, height: height * zoom.currentZoom },
+ offset,
+ );
+};
+
+export const getOcrBoundingBoxesAtSize = (
+ ocrData: OcrBoundingBox[],
+ targetSize: { width: number; height: number },
+ offset?: Point,
+) => {
+ const boxes: OcrBox[] = [];
for (const ocr of ocrData) {
// Convert normalized coordinates (0-1) to actual pixel positions
@@ -109,14 +111,8 @@ export const getOcrBoundingBoxes = (
{ x: ocr.x3, y: ocr.y3 },
{ x: ocr.x4, y: ocr.y4 },
].map((point) => ({
- x:
- (width / imageWidth) * zoom.currentZoom * point.x * imageWidth +
- ((clientWidth - width) / 2) * zoom.currentZoom +
- zoom.currentPositionX,
- y:
- (height / imageHeight) * zoom.currentZoom * point.y * imageHeight +
- ((clientHeight - height) / 2) * zoom.currentZoom +
- zoom.currentPositionY,
+ x: targetSize.width * point.x + (offset?.x ?? 0),
+ y: targetSize.height * point.y + (offset?.y ?? 0),
}));
boxes.push({