mirror of
https://github.com/immich-app/immich.git
synced 2026-02-15 13:28:24 +03:00
Compare commits
1 Commits
perf/optim
...
feat/fd-gl
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
334ebbfe7d |
@@ -73,6 +73,12 @@ RUN --mount=type=cache,id=pnpm-plugins,target=/buildcache/pnpm-store \
|
||||
|
||||
FROM ghcr.io/immich-app/base-server-prod:202601131104@sha256:c649c5838b6348836d27db6d49cadbbc6157feae7a1a237180c3dec03577ba8f
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y fd-find && \
|
||||
ln -s /usr/bin/fdfind /usr/local/bin/fd && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
WORKDIR /usr/src/app
|
||||
ENV NODE_ENV=production \
|
||||
NVIDIA_DRIVER_CAPABILITIES=all \
|
||||
|
||||
@@ -18,6 +18,12 @@ WORKDIR /tmp/create-dep-cache
|
||||
RUN pnpm fetch && rm -rf /tmp/create-dep-cache && chmod -R o+rw /buildcache
|
||||
WORKDIR /usr/src/app
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y fd-find && \
|
||||
ln -s /usr/bin/fdfind /usr/local/bin/fd && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
ENV PATH="${PATH}:/usr/src/app/server/bin:/usr/src/app/web/bin" \
|
||||
IMMICH_ENV=development \
|
||||
NVIDIA_DRIVER_CAPABILITIES=all \
|
||||
|
||||
@@ -58,10 +58,7 @@ export interface CrawlOptionsDto {
|
||||
pathsToCrawl: string[];
|
||||
includeHidden?: boolean;
|
||||
exclusionPatterns?: string[];
|
||||
}
|
||||
|
||||
export interface WalkOptionsDto extends CrawlOptionsDto {
|
||||
take: number;
|
||||
take?: number;
|
||||
}
|
||||
|
||||
export class ValidateLibraryDto {
|
||||
|
||||
@@ -1,13 +1,13 @@
|
||||
import { Injectable } from '@nestjs/common';
|
||||
import archiver from 'archiver';
|
||||
import chokidar, { ChokidarOptions } from 'chokidar';
|
||||
import { escapePath, glob, globStream } from 'fast-glob';
|
||||
import { spawn } from 'node:child_process';
|
||||
import { constants, createReadStream, createWriteStream, existsSync, mkdirSync, ReadOptionsWithBuffer } from 'node:fs';
|
||||
import fs from 'node:fs/promises';
|
||||
import path from 'node:path';
|
||||
import { PassThrough, Readable, Writable } from 'node:stream';
|
||||
import { createGunzip, createGzip } from 'node:zlib';
|
||||
import { CrawlOptionsDto, WalkOptionsDto } from 'src/dtos/library.dto';
|
||||
import { CrawlOptionsDto } from 'src/dtos/library.dto';
|
||||
import { LoggingRepository } from 'src/repositories/logging.repository';
|
||||
import { mimeTypes } from 'src/utils/mime-types';
|
||||
|
||||
@@ -198,52 +198,87 @@ export class StorageRepository {
|
||||
};
|
||||
}
|
||||
|
||||
crawl(crawlOptions: CrawlOptionsDto): Promise<string[]> {
|
||||
async crawl(crawlOptions: CrawlOptionsDto): Promise<string[]> {
|
||||
const { pathsToCrawl, exclusionPatterns, includeHidden } = crawlOptions;
|
||||
if (pathsToCrawl.length === 0) {
|
||||
return Promise.resolve([]);
|
||||
return [];
|
||||
}
|
||||
|
||||
const globbedPaths = pathsToCrawl.map((path) => this.asGlob(path));
|
||||
return new Promise((resolve, reject) => {
|
||||
const args: string[] = [
|
||||
'-t',
|
||||
'f', // File type: only files
|
||||
'-a', // Absolute paths
|
||||
'-i', // Case insensitive
|
||||
'.', // Search pattern: match all files
|
||||
];
|
||||
|
||||
return glob(globbedPaths, {
|
||||
absolute: true,
|
||||
caseSensitiveMatch: false,
|
||||
onlyFiles: true,
|
||||
dot: includeHidden,
|
||||
ignore: exclusionPatterns,
|
||||
});
|
||||
}
|
||||
|
||||
async *walk(walkOptions: WalkOptionsDto): AsyncGenerator<string[]> {
|
||||
const { pathsToCrawl, exclusionPatterns, includeHidden } = walkOptions;
|
||||
if (pathsToCrawl.length === 0) {
|
||||
async function* emptyGenerator() {}
|
||||
return emptyGenerator();
|
||||
}
|
||||
|
||||
const globbedPaths = pathsToCrawl.map((path) => this.asGlob(path));
|
||||
|
||||
const stream = globStream(globbedPaths, {
|
||||
absolute: true,
|
||||
caseSensitiveMatch: false,
|
||||
onlyFiles: true,
|
||||
dot: includeHidden,
|
||||
ignore: exclusionPatterns,
|
||||
});
|
||||
|
||||
let batch: string[] = [];
|
||||
for await (const value of stream) {
|
||||
batch.push(value.toString());
|
||||
if (batch.length === walkOptions.take) {
|
||||
yield batch;
|
||||
batch = [];
|
||||
if (includeHidden) {
|
||||
args.push('-H');
|
||||
}
|
||||
}
|
||||
|
||||
if (batch.length > 0) {
|
||||
yield batch;
|
||||
}
|
||||
for (const pattern of exclusionPatterns ?? []) {
|
||||
args.push('-E', pattern);
|
||||
}
|
||||
|
||||
const extensions = mimeTypes.getSupportedFileExtensions();
|
||||
for (const ext of extensions) {
|
||||
// fd expects extensions without the dot
|
||||
args.push('-e', ext.replace(/^\./, ''));
|
||||
}
|
||||
|
||||
args.push(...pathsToCrawl);
|
||||
|
||||
const fdfind = spawn('fdfind', args);
|
||||
|
||||
const files: string[] = [];
|
||||
let buffer = '';
|
||||
let stderr = '';
|
||||
|
||||
fdfind.stdout.on('data', (data) => {
|
||||
buffer += data.toString();
|
||||
const lines = buffer.split('\n');
|
||||
// Keep the last partial line in the buffer
|
||||
buffer = lines.pop() || '';
|
||||
|
||||
for (const line of lines) {
|
||||
const trimmed = line.trim();
|
||||
if (trimmed.length > 0) {
|
||||
files.push(trimmed);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
fdfind.stderr.on('data', (data) => {
|
||||
stderr += data.toString();
|
||||
});
|
||||
|
||||
fdfind.on('close', (code) => {
|
||||
// Process any remaining data in the buffer
|
||||
if (buffer.length > 0) {
|
||||
const trimmed = buffer.trim();
|
||||
if (trimmed.length > 0) {
|
||||
files.push(trimmed);
|
||||
}
|
||||
}
|
||||
|
||||
if (code === 0) {
|
||||
resolve(files);
|
||||
} else {
|
||||
reject(new Error(`fdfind process exited with code ${code}: ${stderr}`));
|
||||
}
|
||||
});
|
||||
|
||||
fdfind.on('error', (error) => {
|
||||
if ((error as NodeJS.ErrnoException).code === 'ENOENT') {
|
||||
reject(
|
||||
new Error('fdfind command not found. Please install fd-find: https://github.com/sharkdp/fd#installation'),
|
||||
);
|
||||
} else {
|
||||
reject(new Error(`Failed to spawn fdfind: ${error.message}`));
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
watch(paths: string[], options: ChokidarOptions, events: Partial<WatchEvents>) {
|
||||
@@ -257,10 +292,4 @@ export class StorageRepository {
|
||||
|
||||
return () => watcher.close();
|
||||
}
|
||||
|
||||
private asGlob(pathToCrawl: string): string {
|
||||
const escapedPath = escapePath(pathToCrawl).replaceAll('"', '["]').replaceAll("'", "[']").replaceAll('`', '[`]');
|
||||
const extensions = `*{${mimeTypes.getSupportedFileExtensions().join(',')}}`;
|
||||
return `${escapedPath}/**/${extensions}`;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
import { BadRequestException } from '@nestjs/common';
|
||||
import { Stats } from 'node:fs';
|
||||
import { defaults, SystemConfig } from 'src/config';
|
||||
import { JOBS_LIBRARY_PAGINATION_SIZE } from 'src/constants';
|
||||
import { mapLibrary } from 'src/dtos/library.dto';
|
||||
import { AssetType, CronJob, ImmichWorker, JobName, JobStatus } from 'src/enum';
|
||||
import { LibraryService } from 'src/services/library.service';
|
||||
@@ -14,10 +13,6 @@ import { factory, newUuid } from 'test/small.factory';
|
||||
import { makeStream, newTestService, ServiceMocks } from 'test/utils';
|
||||
import { vitest } from 'vitest';
|
||||
|
||||
async function* mockWalk() {
|
||||
yield await Promise.resolve(['/data/user1/photo.jpg']);
|
||||
}
|
||||
|
||||
describe(LibraryService.name, () => {
|
||||
let sut: LibraryService;
|
||||
|
||||
@@ -165,7 +160,7 @@ describe(LibraryService.name, () => {
|
||||
const library = factory.library({ importPaths: ['/foo', '/bar'] });
|
||||
|
||||
mocks.library.get.mockResolvedValue(library);
|
||||
mocks.storage.walk.mockImplementation(mockWalk);
|
||||
mocks.storage.crawl.mockResolvedValue(['/data/user1/photo.jpg']);
|
||||
mocks.storage.stat.mockResolvedValue({ isDirectory: () => true } as Stats);
|
||||
mocks.storage.checkFileExists.mockResolvedValue(true);
|
||||
mocks.asset.filterNewExternalAssetPaths.mockResolvedValue(['/data/user1/photo.jpg']);
|
||||
@@ -206,11 +201,10 @@ describe(LibraryService.name, () => {
|
||||
|
||||
await sut.handleQueueSyncFiles({ id: library.id });
|
||||
|
||||
expect(mocks.storage.walk).toHaveBeenCalledWith({
|
||||
expect(mocks.storage.crawl).toHaveBeenCalledWith({
|
||||
pathsToCrawl: [library.importPaths[1]],
|
||||
exclusionPatterns: [],
|
||||
includeHidden: false,
|
||||
take: JOBS_LIBRARY_PAGINATION_SIZE,
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -220,7 +214,7 @@ describe(LibraryService.name, () => {
|
||||
const library = factory.library({ importPaths: ['/foo', '/bar'] });
|
||||
|
||||
mocks.library.get.mockResolvedValue(library);
|
||||
mocks.storage.walk.mockImplementation(mockWalk);
|
||||
mocks.storage.crawl.mockResolvedValue(['/data/user1/photo.jpg']);
|
||||
mocks.storage.stat.mockResolvedValue({ isDirectory: () => true } as Stats);
|
||||
mocks.storage.checkFileExists.mockResolvedValue(true);
|
||||
mocks.asset.filterNewExternalAssetPaths.mockResolvedValue(['/data/user1/photo.jpg']);
|
||||
@@ -262,11 +256,10 @@ describe(LibraryService.name, () => {
|
||||
|
||||
await sut.handleQueueSyncFiles({ id: library.id });
|
||||
|
||||
expect(mocks.storage.walk).toHaveBeenCalledWith({
|
||||
expect(mocks.storage.crawl).toHaveBeenCalledWith({
|
||||
pathsToCrawl: [library.importPaths[1]],
|
||||
exclusionPatterns: [],
|
||||
includeHidden: false,
|
||||
take: JOBS_LIBRARY_PAGINATION_SIZE,
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -276,7 +269,7 @@ describe(LibraryService.name, () => {
|
||||
const library = factory.library();
|
||||
|
||||
mocks.library.get.mockResolvedValue(library);
|
||||
mocks.storage.walk.mockImplementation(async function* generator() {});
|
||||
mocks.storage.crawl.mockResolvedValue([]);
|
||||
mocks.asset.getLibraryAssetCount.mockResolvedValue(1);
|
||||
mocks.asset.detectOfflineExternalAssets.mockResolvedValue({ numUpdatedRows: 1n });
|
||||
|
||||
@@ -294,7 +287,7 @@ describe(LibraryService.name, () => {
|
||||
const library = factory.library();
|
||||
|
||||
mocks.library.get.mockResolvedValue(library);
|
||||
mocks.storage.walk.mockImplementation(async function* generator() {});
|
||||
mocks.storage.crawl.mockResolvedValue([]);
|
||||
mocks.asset.getLibraryAssetCount.mockResolvedValue(0);
|
||||
mocks.asset.detectOfflineExternalAssets.mockResolvedValue({ numUpdatedRows: 1n });
|
||||
|
||||
@@ -308,7 +301,7 @@ describe(LibraryService.name, () => {
|
||||
const library = factory.library({ importPaths: ['/foo', '/bar'] });
|
||||
|
||||
mocks.library.get.mockResolvedValue(library);
|
||||
mocks.storage.walk.mockImplementation(async function* generator() {});
|
||||
mocks.storage.crawl.mockResolvedValue([]);
|
||||
mocks.library.streamAssetIds.mockReturnValue(makeStream([assetStub.external]));
|
||||
mocks.asset.getLibraryAssetCount.mockResolvedValue(1);
|
||||
mocks.asset.detectOfflineExternalAssets.mockResolvedValue({ numUpdatedRows: 0n });
|
||||
|
||||
@@ -394,7 +394,16 @@ export class LibraryService extends BaseService {
|
||||
|
||||
private async processEntity(filePath: string, ownerId: string, libraryId: string) {
|
||||
const assetPath = path.normalize(filePath);
|
||||
const stat = await this.storageRepository.stat(assetPath);
|
||||
|
||||
let stat: Stats;
|
||||
try {
|
||||
stat = await this.storageRepository.stat(assetPath);
|
||||
} catch (error: any) {
|
||||
if (error.code === 'ENOENT') {
|
||||
this.logger.error(`File not found during import: ${assetPath} (original path: ${filePath})`);
|
||||
}
|
||||
throw error;
|
||||
}
|
||||
|
||||
return {
|
||||
ownerId,
|
||||
@@ -636,21 +645,25 @@ export class LibraryService extends BaseService {
|
||||
return JobStatus.Skipped;
|
||||
}
|
||||
|
||||
const pathsOnDisk = this.storageRepository.walk({
|
||||
this.logger.log(`Starting disk crawl of ${validImportPaths.length} import path(s) for library ${library.id}...`);
|
||||
|
||||
const crawlStart = Date.now();
|
||||
|
||||
const pathsOnDisk = await this.storageRepository.crawl({
|
||||
pathsToCrawl: validImportPaths,
|
||||
includeHidden: false,
|
||||
exclusionPatterns: library.exclusionPatterns,
|
||||
take: JOBS_LIBRARY_PAGINATION_SIZE,
|
||||
});
|
||||
|
||||
let importCount = 0;
|
||||
let crawlCount = 0;
|
||||
|
||||
this.logger.log(`Starting disk crawl of ${validImportPaths.length} import path(s) for library ${library.id}...`);
|
||||
this.logger.log(
|
||||
`Found ${pathsOnDisk.length} file(s) on disk in ${((Date.now() - crawlStart) / 1000).toFixed(2)}s, queuing for import...`,
|
||||
);
|
||||
|
||||
for await (const pathBatch of pathsOnDisk) {
|
||||
crawlCount += pathBatch.length;
|
||||
const paths = await this.assetRepository.filterNewExternalAssetPaths(library.id, pathBatch);
|
||||
for (let i = 0; i < pathsOnDisk.length; i += JOBS_LIBRARY_PAGINATION_SIZE) {
|
||||
const pathChunk = pathsOnDisk.slice(i, i + JOBS_LIBRARY_PAGINATION_SIZE);
|
||||
const paths = await this.assetRepository.filterNewExternalAssetPaths(library.id, pathChunk);
|
||||
|
||||
if (paths.length > 0) {
|
||||
importCount += paths.length;
|
||||
@@ -660,18 +673,18 @@ export class LibraryService extends BaseService {
|
||||
data: {
|
||||
libraryId: library.id,
|
||||
paths,
|
||||
progressCounter: crawlCount,
|
||||
progressCounter: i + pathChunk.length,
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
this.logger.log(
|
||||
`Crawled ${crawlCount} file(s) so far: ${paths.length} of current batch of ${pathBatch.length} will be imported to library ${library.id}...`,
|
||||
`Processed ${i + pathChunk.length} file(s): ${paths.length} of current batch of ${pathChunk.length} will be imported to library ${library.id}...`,
|
||||
);
|
||||
}
|
||||
|
||||
this.logger.log(
|
||||
`Finished disk crawl, ${crawlCount} file(s) found on disk and queued ${importCount} file(s) for import into ${library.id}`,
|
||||
`Finished disk crawl, ${pathsOnDisk.length} file(s) found on disk and queued ${importCount} file(s) for import into ${library.id}`,
|
||||
);
|
||||
|
||||
await this.libraryRepository.update(job.id, { refreshedAt: new Date() });
|
||||
|
||||
Reference in New Issue
Block a user