mirror of
https://github.com/immich-app/immich.git
synced 2026-02-28 01:29:04 +03:00
feat(web): show ocr text boxes in panoramas (#25727)
This commit is contained in:
@@ -425,7 +425,6 @@
|
|||||||
const showOcrButton = $derived(
|
const showOcrButton = $derived(
|
||||||
$slideshowState === SlideshowState.None &&
|
$slideshowState === SlideshowState.None &&
|
||||||
asset.type === AssetTypeEnum.Image &&
|
asset.type === AssetTypeEnum.Image &&
|
||||||
!(asset.exifInfo?.projectionType === 'EQUIRECTANGULAR') &&
|
|
||||||
!assetViewerManager.isShowEditor &&
|
!assetViewerManager.isShowEditor &&
|
||||||
ocrManager.hasOcrData,
|
ocrManager.hasOcrData,
|
||||||
);
|
);
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
<script lang="ts">
|
<script lang="ts">
|
||||||
import type { OcrBox } from '$lib/utils/ocr-utils';
|
import type { OcrBox } from '$lib/utils/ocr-utils';
|
||||||
import { calculateBoundingBoxDimensions } from '$lib/utils/ocr-utils';
|
import { calculateBoundingBoxMatrix } from '$lib/utils/ocr-utils';
|
||||||
|
|
||||||
type Props = {
|
type Props = {
|
||||||
ocrBox: OcrBox;
|
ocrBox: OcrBox;
|
||||||
@@ -8,28 +8,19 @@
|
|||||||
|
|
||||||
let { ocrBox }: Props = $props();
|
let { ocrBox }: Props = $props();
|
||||||
|
|
||||||
const dimensions = $derived(calculateBoundingBoxDimensions(ocrBox.points));
|
const dimensions = $derived(calculateBoundingBoxMatrix(ocrBox.points));
|
||||||
|
|
||||||
const transform = $derived(
|
const transform = $derived(`matrix3d(${dimensions.matrix.join(',')})`);
|
||||||
`translate(${dimensions.minX}px, ${dimensions.minY}px) rotate(${dimensions.rotation}deg) skew(${dimensions.skewX}deg, ${dimensions.skewY}deg)`,
|
// Fits almost all strings within the box, depends on font family
|
||||||
);
|
const fontSize = $derived(
|
||||||
|
`max(var(--text-sm), min(var(--text-6xl), ${(1.4 * dimensions.width) / ocrBox.text.length}px))`,
|
||||||
const transformOrigin = $derived(
|
|
||||||
`${dimensions.centerX - dimensions.minX}px ${dimensions.centerY - dimensions.minY}px`,
|
|
||||||
);
|
);
|
||||||
</script>
|
</script>
|
||||||
|
|
||||||
<div class="absolute group left-0 top-0 pointer-events-none">
|
<div class="absolute left-0 top-0">
|
||||||
<!-- Bounding box with CSS transforms -->
|
|
||||||
<div
|
<div
|
||||||
class="absolute border-2 border-blue-500 bg-blue-500/10 cursor-pointer pointer-events-auto transition-all group-hover:bg-blue-500/30 group-hover:border-blue-600 group-hover:border-[3px]"
|
class="absolute flex items-center justify-center text-transparent text-sm border-2 border-blue-500 bg-blue-500/10 px-2 py-1 pointer-events-auto cursor-text whitespace-pre-wrap wrap-break-word select-text transition-all hover:text-white hover:bg-black/60 hover:border-blue-600 hover:border-3"
|
||||||
style="width: {dimensions.width}px; height: {dimensions.height}px; transform: {transform}; transform-origin: {transformOrigin};"
|
style="font-size: {fontSize}; width: {dimensions.width}px; height: {dimensions.height}px; transform: {transform}; transform-origin: 0 0;"
|
||||||
></div>
|
|
||||||
|
|
||||||
<!-- Text overlay - always rendered but invisible, allows text selection and copy -->
|
|
||||||
<div
|
|
||||||
class="absolute flex items-center justify-center text-transparent text-sm px-2 py-1 pointer-events-auto cursor-text whitespace-pre-wrap wrap-break-word select-text group-hover:text-white group-hover:bg-black/75 group-hover:z-10"
|
|
||||||
style="width: {dimensions.width}px; height: {dimensions.height}px; transform: {transform}; transform-origin: {transformOrigin};"
|
|
||||||
>
|
>
|
||||||
{ocrBox.text}
|
{ocrBox.text}
|
||||||
</div>
|
</div>
|
||||||
|
|||||||
@@ -2,8 +2,10 @@
|
|||||||
import { shortcuts } from '$lib/actions/shortcut';
|
import { shortcuts } from '$lib/actions/shortcut';
|
||||||
import AssetViewerEvents from '$lib/components/AssetViewerEvents.svelte';
|
import AssetViewerEvents from '$lib/components/AssetViewerEvents.svelte';
|
||||||
import { assetViewerManager } from '$lib/managers/asset-viewer-manager.svelte';
|
import { assetViewerManager } from '$lib/managers/asset-viewer-manager.svelte';
|
||||||
|
import { ocrManager, type OcrBoundingBox } from '$lib/stores/ocr.svelte';
|
||||||
import { boundingBoxesArray, type Faces } from '$lib/stores/people.store';
|
import { boundingBoxesArray, type Faces } from '$lib/stores/people.store';
|
||||||
import { alwaysLoadOriginalFile } from '$lib/stores/preferences.store';
|
import { alwaysLoadOriginalFile } from '$lib/stores/preferences.store';
|
||||||
|
import { calculateBoundingBoxMatrix, getOcrBoundingBoxesAtSize, type Point } from '$lib/utils/ocr-utils';
|
||||||
import {
|
import {
|
||||||
EquirectangularAdapter,
|
EquirectangularAdapter,
|
||||||
Viewer,
|
Viewer,
|
||||||
@@ -27,6 +29,17 @@
|
|||||||
strokeLinejoin: 'round',
|
strokeLinejoin: 'round',
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Adapted as well as possible from classlist 'border-2 border-blue-500 bg-blue-500/10 hover:border-blue-600 hover:border-3'
|
||||||
|
const OCR_BOX_SVG_STYLE = {
|
||||||
|
fill: 'var(--color-blue-500)',
|
||||||
|
fillOpacity: '0.1',
|
||||||
|
stroke: 'var(--color-blue-500)',
|
||||||
|
strokeWidth: '2px',
|
||||||
|
};
|
||||||
|
|
||||||
|
const OCR_TOOLTIP_HTML_CLASS =
|
||||||
|
'flex items-center justify-center text-white bg-black/50 cursor-text pointer-events-auto whitespace-pre-wrap wrap-break-word select-text';
|
||||||
|
|
||||||
type Props = {
|
type Props = {
|
||||||
panorama: string | { source: string };
|
panorama: string | { source: string };
|
||||||
originalPanorama?: string | { source: string };
|
originalPanorama?: string | { source: string };
|
||||||
@@ -96,6 +109,59 @@
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
$effect(() => {
|
||||||
|
updateOcrBoxes(ocrManager.showOverlay, ocrManager.data);
|
||||||
|
});
|
||||||
|
|
||||||
|
/** Use updateOnly=true on zoom, pan, or resize. */
|
||||||
|
const updateOcrBoxes = (showOverlay: boolean, ocrData: OcrBoundingBox[], updateOnly = false) => {
|
||||||
|
if (!viewer || !viewer.state.textureData || !viewer.getPlugin(MarkersPlugin)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const markersPlugin = viewer.getPlugin<MarkersPlugin>(MarkersPlugin);
|
||||||
|
if (!showOverlay) {
|
||||||
|
markersPlugin.clearMarkers();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (!updateOnly) {
|
||||||
|
markersPlugin.clearMarkers();
|
||||||
|
}
|
||||||
|
|
||||||
|
const boxes = getOcrBoundingBoxesAtSize(ocrData, {
|
||||||
|
width: viewer.state.textureData.panoData.croppedWidth,
|
||||||
|
height: viewer.state.textureData.panoData.croppedHeight,
|
||||||
|
});
|
||||||
|
|
||||||
|
for (const [index, box] of boxes.entries()) {
|
||||||
|
const points = box.points.map((p) => texturePointToViewerPoint(viewer, p));
|
||||||
|
const { matrix, width, height } = calculateBoundingBoxMatrix(points);
|
||||||
|
|
||||||
|
const fontSize = (1.4 * width) / box.text.length; // fits almost all strings within the box, depends on font family
|
||||||
|
const transform = `matrix3d(${matrix.join(',')})`;
|
||||||
|
const content = `<div class="${OCR_TOOLTIP_HTML_CLASS}" style="font-size: ${fontSize}px; width: ${width}px; height: ${height}px; transform: ${transform}; transform-origin: 0 0;">${box.text}</div>`;
|
||||||
|
|
||||||
|
if (updateOnly) {
|
||||||
|
markersPlugin.updateMarker({
|
||||||
|
id: `box_${index}`,
|
||||||
|
polygonPixels: box.points.map((b) => [b.x, b.y]),
|
||||||
|
tooltip: { content },
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
markersPlugin.addMarker({
|
||||||
|
id: `box_${index}`,
|
||||||
|
polygonPixels: box.points.map((b) => [b.x, b.y]),
|
||||||
|
svgStyle: OCR_BOX_SVG_STYLE,
|
||||||
|
tooltip: { content, trigger: 'click' },
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
const texturePointToViewerPoint = (viewer: Viewer, point: Point) => {
|
||||||
|
const spherical = viewer.dataHelper.textureCoordsToSphericalCoords({ textureX: point.x, textureY: point.y });
|
||||||
|
return viewer.dataHelper.sphericalCoordsToViewerCoords(spherical);
|
||||||
|
};
|
||||||
|
|
||||||
const onZoom = () => {
|
const onZoom = () => {
|
||||||
viewer?.animate({ zoom: assetViewerManager.zoom > 1 ? 50 : 83.3, speed: 250 });
|
viewer?.animate({ zoom: assetViewerManager.zoom > 1 ? 50 : 83.3, speed: 250 });
|
||||||
};
|
};
|
||||||
@@ -160,7 +226,20 @@
|
|||||||
viewer.addEventListener(events.ZoomUpdatedEvent.type, zoomHandler, { passive: true });
|
viewer.addEventListener(events.ZoomUpdatedEvent.type, zoomHandler, { passive: true });
|
||||||
}
|
}
|
||||||
|
|
||||||
return () => viewer.removeEventListener(events.ZoomUpdatedEvent.type, zoomHandler);
|
const onReadyHandler = () => updateOcrBoxes(ocrManager.showOverlay, ocrManager.data, false);
|
||||||
|
const updateHandler = () => updateOcrBoxes(ocrManager.showOverlay, ocrManager.data, true);
|
||||||
|
viewer.addEventListener(events.ReadyEvent.type, onReadyHandler);
|
||||||
|
viewer.addEventListener(events.PositionUpdatedEvent.type, updateHandler);
|
||||||
|
viewer.addEventListener(events.SizeUpdatedEvent.type, updateHandler);
|
||||||
|
viewer.addEventListener(events.ZoomUpdatedEvent.type, updateHandler, { passive: true });
|
||||||
|
|
||||||
|
return () => {
|
||||||
|
viewer.removeEventListener(events.ReadyEvent.type, onReadyHandler);
|
||||||
|
viewer.removeEventListener(events.PositionUpdatedEvent.type, updateHandler);
|
||||||
|
viewer.removeEventListener(events.SizeUpdatedEvent.type, updateHandler);
|
||||||
|
viewer.removeEventListener(events.ZoomUpdatedEvent.type, updateHandler);
|
||||||
|
viewer.removeEventListener(events.ZoomUpdatedEvent.type, zoomHandler);
|
||||||
|
};
|
||||||
});
|
});
|
||||||
|
|
||||||
onDestroy(() => {
|
onDestroy(() => {
|
||||||
@@ -176,3 +255,25 @@
|
|||||||
|
|
||||||
<svelte:document use:shortcuts={[{ shortcut: { key: 'z' }, onShortcut: onZoom, preventDefault: true }]} />
|
<svelte:document use:shortcuts={[{ shortcut: { key: 'z' }, onShortcut: onZoom, preventDefault: true }]} />
|
||||||
<div class="h-full w-full mb-0" bind:this={container}></div>
|
<div class="h-full w-full mb-0" bind:this={container}></div>
|
||||||
|
|
||||||
|
<style>
|
||||||
|
/* Reset the default tooltip styling */
|
||||||
|
:global(.psv-tooltip) {
|
||||||
|
top: 0 !important;
|
||||||
|
left: 0 !important;
|
||||||
|
background: none;
|
||||||
|
box-shadow: none;
|
||||||
|
width: 0;
|
||||||
|
height: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
:global(.psv-tooltip-content) {
|
||||||
|
font: var(--font-normal);
|
||||||
|
padding: 0;
|
||||||
|
text-shadow: none;
|
||||||
|
}
|
||||||
|
|
||||||
|
:global(.psv-tooltip-arrow) {
|
||||||
|
display: none;
|
||||||
|
}
|
||||||
|
</style>
|
||||||
|
|||||||
@@ -12,70 +12,58 @@ const getContainedSize = (img: HTMLImageElement): { width: number; height: numbe
|
|||||||
return { width, height };
|
return { width, height };
|
||||||
};
|
};
|
||||||
|
|
||||||
|
export type Point = {
|
||||||
|
x: number;
|
||||||
|
y: number;
|
||||||
|
};
|
||||||
|
|
||||||
export interface OcrBox {
|
export interface OcrBox {
|
||||||
id: string;
|
id: string;
|
||||||
points: { x: number; y: number }[];
|
points: Point[];
|
||||||
text: string;
|
text: string;
|
||||||
confidence: number;
|
confidence: number;
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface BoundingBoxDimensions {
|
|
||||||
minX: number;
|
|
||||||
maxX: number;
|
|
||||||
minY: number;
|
|
||||||
maxY: number;
|
|
||||||
width: number;
|
|
||||||
height: number;
|
|
||||||
centerX: number;
|
|
||||||
centerY: number;
|
|
||||||
rotation: number;
|
|
||||||
skewX: number;
|
|
||||||
skewY: number;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Calculate bounding box dimensions and properties from OCR points
|
* Calculate bounding box transform from OCR points. Result matrix can be used as input for css matrix3d.
|
||||||
* @param points - Array of 4 corner points of the bounding box
|
* @param points - Array of 4 corner points of the bounding box
|
||||||
* @returns Dimensions, rotation, and skew values for the bounding box
|
* @returns 4x4 matrix to transform the div with text onto the polygon defined by the corner points, and size to set on the source div.
|
||||||
*/
|
*/
|
||||||
export const calculateBoundingBoxDimensions = (points: { x: number; y: number }[]): BoundingBoxDimensions => {
|
export const calculateBoundingBoxMatrix = (points: Point[]): { matrix: number[]; width: number; height: number } => {
|
||||||
const [topLeft, topRight, bottomRight, bottomLeft] = points;
|
const [topLeft, topRight, bottomRight, bottomLeft] = points;
|
||||||
const minX = Math.min(...points.map(({ x }) => x));
|
|
||||||
const maxX = Math.max(...points.map(({ x }) => x));
|
|
||||||
const minY = Math.min(...points.map(({ y }) => y));
|
|
||||||
const maxY = Math.max(...points.map(({ y }) => y));
|
|
||||||
const width = maxX - minX;
|
|
||||||
const height = maxY - minY;
|
|
||||||
const centerX = (minX + maxX) / 2;
|
|
||||||
const centerY = (minY + maxY) / 2;
|
|
||||||
|
|
||||||
// Calculate rotation angle from the bottom edge (bottomLeft to bottomRight)
|
// Approximate width and height to prevent text distortion as much as possible
|
||||||
const rotation = Math.atan2(bottomRight.y - bottomLeft.y, bottomRight.x - bottomLeft.x) * (180 / Math.PI);
|
const distance = (p1: Point, p2: Point) => Math.hypot(p2.x - p1.x, p2.y - p1.y);
|
||||||
|
const width = Math.max(distance(topLeft, topRight), distance(bottomLeft, bottomRight));
|
||||||
|
const height = Math.max(distance(topLeft, bottomLeft), distance(topRight, bottomRight));
|
||||||
|
|
||||||
// Calculate skew angles to handle perspective distortion
|
const dx1 = topRight.x - bottomRight.x;
|
||||||
// SkewX: compare left and right edges
|
const dx2 = bottomLeft.x - bottomRight.x;
|
||||||
const leftEdgeAngle = Math.atan2(bottomLeft.y - topLeft.y, bottomLeft.x - topLeft.x);
|
const dx3 = topLeft.x - topRight.x + bottomRight.x - bottomLeft.x;
|
||||||
const rightEdgeAngle = Math.atan2(bottomRight.y - topRight.y, bottomRight.x - topRight.x);
|
|
||||||
const skewX = (rightEdgeAngle - leftEdgeAngle) * (180 / Math.PI);
|
|
||||||
|
|
||||||
// SkewY: compare top and bottom edges
|
const dy1 = topRight.y - bottomRight.y;
|
||||||
const topEdgeAngle = Math.atan2(topRight.y - topLeft.y, topRight.x - topLeft.x);
|
const dy2 = bottomLeft.y - bottomRight.y;
|
||||||
const bottomEdgeAngle = Math.atan2(bottomRight.y - bottomLeft.y, bottomRight.x - bottomLeft.x);
|
const dy3 = topLeft.y - topRight.y + bottomRight.y - bottomLeft.y;
|
||||||
const skewY = (bottomEdgeAngle - topEdgeAngle) * (180 / Math.PI);
|
|
||||||
|
|
||||||
return {
|
const det = dx1 * dy2 - dx2 * dy1;
|
||||||
minX,
|
const a13 = (dx3 * dy2 - dx2 * dy3) / det;
|
||||||
maxX,
|
const a23 = (dx1 * dy3 - dx3 * dy1) / det;
|
||||||
minY,
|
|
||||||
maxY,
|
const a11 = (1 + a13) * topRight.x - topLeft.x;
|
||||||
width,
|
const a21 = (1 + a23) * bottomLeft.x - topLeft.x;
|
||||||
height,
|
|
||||||
centerX,
|
const a12 = (1 + a13) * topRight.y - topLeft.y;
|
||||||
centerY,
|
const a22 = (1 + a23) * bottomLeft.y - topLeft.y;
|
||||||
rotation,
|
|
||||||
skewX,
|
// prettier-ignore
|
||||||
skewY,
|
const matrix = [
|
||||||
};
|
a11 / width, a12 / width, 0, a13 / width,
|
||||||
|
a21 / height, a22 / height, 0, a23 / height,
|
||||||
|
0, 0, 1, 0,
|
||||||
|
topLeft.x, topLeft.y, 0, 1,
|
||||||
|
];
|
||||||
|
|
||||||
|
return { matrix, width, height };
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -87,18 +75,32 @@ export const getOcrBoundingBoxes = (
|
|||||||
zoom: ZoomImageWheelState,
|
zoom: ZoomImageWheelState,
|
||||||
photoViewer: HTMLImageElement | null,
|
photoViewer: HTMLImageElement | null,
|
||||||
): OcrBox[] => {
|
): OcrBox[] => {
|
||||||
const boxes: OcrBox[] = [];
|
|
||||||
|
|
||||||
if (photoViewer === null || !photoViewer.naturalWidth || !photoViewer.naturalHeight) {
|
if (photoViewer === null || !photoViewer.naturalWidth || !photoViewer.naturalHeight) {
|
||||||
return boxes;
|
return [];
|
||||||
}
|
}
|
||||||
|
|
||||||
const clientHeight = photoViewer.clientHeight;
|
const clientHeight = photoViewer.clientHeight;
|
||||||
const clientWidth = photoViewer.clientWidth;
|
const clientWidth = photoViewer.clientWidth;
|
||||||
const { width, height } = getContainedSize(photoViewer);
|
const { width, height } = getContainedSize(photoViewer);
|
||||||
|
|
||||||
const imageWidth = photoViewer.naturalWidth;
|
const offset = {
|
||||||
const imageHeight = photoViewer.naturalHeight;
|
x: ((clientWidth - width) / 2) * zoom.currentZoom + zoom.currentPositionX,
|
||||||
|
y: ((clientHeight - height) / 2) * zoom.currentZoom + zoom.currentPositionY,
|
||||||
|
};
|
||||||
|
|
||||||
|
return getOcrBoundingBoxesAtSize(
|
||||||
|
ocrData,
|
||||||
|
{ width: width * zoom.currentZoom, height: height * zoom.currentZoom },
|
||||||
|
offset,
|
||||||
|
);
|
||||||
|
};
|
||||||
|
|
||||||
|
export const getOcrBoundingBoxesAtSize = (
|
||||||
|
ocrData: OcrBoundingBox[],
|
||||||
|
targetSize: { width: number; height: number },
|
||||||
|
offset?: Point,
|
||||||
|
) => {
|
||||||
|
const boxes: OcrBox[] = [];
|
||||||
|
|
||||||
for (const ocr of ocrData) {
|
for (const ocr of ocrData) {
|
||||||
// Convert normalized coordinates (0-1) to actual pixel positions
|
// Convert normalized coordinates (0-1) to actual pixel positions
|
||||||
@@ -109,14 +111,8 @@ export const getOcrBoundingBoxes = (
|
|||||||
{ x: ocr.x3, y: ocr.y3 },
|
{ x: ocr.x3, y: ocr.y3 },
|
||||||
{ x: ocr.x4, y: ocr.y4 },
|
{ x: ocr.x4, y: ocr.y4 },
|
||||||
].map((point) => ({
|
].map((point) => ({
|
||||||
x:
|
x: targetSize.width * point.x + (offset?.x ?? 0),
|
||||||
(width / imageWidth) * zoom.currentZoom * point.x * imageWidth +
|
y: targetSize.height * point.y + (offset?.y ?? 0),
|
||||||
((clientWidth - width) / 2) * zoom.currentZoom +
|
|
||||||
zoom.currentPositionX,
|
|
||||||
y:
|
|
||||||
(height / imageHeight) * zoom.currentZoom * point.y * imageHeight +
|
|
||||||
((clientHeight - height) / 2) * zoom.currentZoom +
|
|
||||||
zoom.currentPositionY,
|
|
||||||
}));
|
}));
|
||||||
|
|
||||||
boxes.push({
|
boxes.push({
|
||||||
|
|||||||
Reference in New Issue
Block a user