feat: restore database backups (#23978)

* feat: ProcessRepository#createSpawnDuplexStream

* test: write tests for ProcessRepository#createSpawnDuplexStream

* feat: StorageRepository#createGzip,createGunzip,createPlainReadStream

* feat: backups util (args, create, restore, progress)

* feat: wait on maintenance operation lock on boot

* chore: use backup util from backup.service.ts
test: update backup.service.ts tests with new util

* feat: list/delete backups (maintenance services)

* chore: open api
fix: missing action in cli.service.ts

* chore: add missing repositories to MaintenanceModule

* refactor: move logSecret into module init

* feat: initialise StorageCore in maintenance mode

* feat: authenticate websocket requests in maintenance mode

* test: add mock for new storage fns

* feat: add MaintenanceEphemeralStateRepository
refactor: cache the secret in memory

* test: update service worker tests

* feat: add external maintenance mode status

* feat: synchronised status, restore db action

* test: backup restore service tests

* refactor: DRY end maintenance

* feat: list and delete backup routes

* feat: start action on boot

* fix: should set status on restore end

* refactor: add maintenanceStore to hold writables

* feat: sync status to web app

* feat: web impl.

* test: various utils for testings

* test: web e2e tests

* test: e2e maintenance spec

* test: update cli spec

* chore: e2e lint

* chore: lint fixes

* chore: lint fixes

* feat: start restore flow route

* test: update e2e tests

* chore: remove neon lights on maintenance action pages

* fix: use 'startRestoreFlow' on onboarding page

* chore: ignore any library folder in `docker/`

* fix: load status on boot

* feat: upload backups

* refactor: permit any .sql(.gz) to be listed/restored

* feat: download backups from list

* fix: permit uploading just .sql files

* feat: restore just .sql files

* fix: don't show backups list if logged out

* feat: system integrity check in restore flow

* test: not providing failed backups in API anymore

* test: util should also not try to use failedBackups

* fix: actually assign inputStream

* test: correct test backup prep.

* fix: ensure task is defined to show error

* test: fix docker cp command

* test: update e2e web spec to select next button

* test: update e2e api tests

* test: refactor timeouts

* chore: remove `showDelete` from maint. settings

* chore: lint

* chore: lint

* fix: make sure backups are correctly sorted for clean up

* test: update service spec

* test: adjust e2e timeout

* test: increase web timeouts for ci

* chore: move gitignore changes

* chore: additional filename validation

* refactor: better typings for integrity API

* feat: higher accuracy progress tracking

* chore: delay lock retry

* refactor: remove old maintenance settings

* refactor: clean up tailwind classes

* refactor: use while loop rather than recursive calls

* test: update service specs

* chore: check canParse too

* chore: lint

* fix: logic error causing infinite loop

* refactor: use <ProgressBar /> from ui library

* fix: create or overwrite file

* chore: i18n pass, update progress bar

* fix: wrong translation string

* chore: update colour variables

* test: update web test for new maint. page

* chore: format, fix key

* test: update tests to be more linter complaint & use new routines

* chore: update onClick -> onAction, title -> breadcrumbs

* fix: use wrench icon in admin settings sidebar

* chore: add translation strings to accordion

* chore: lint

* refactor: move maintenance worker init into service

* refactor: `maintenanceStatus` -> `getMaintenanceStatus`
refactor: `integrityCheck` -> `detectPriorInstall`
chore: add `v2.4.0` version
refactor: `/backups/list` -> `/backups`
refactor: use sendFile in download route
refactor: use separate backups permissions
chore: correct descriptions
refactor: permit handler that doesn't return promise for sendfile

* refactor: move status impl into service
refactor: add active flag to maintenance status

* refactor: split into database backup controller

* test: split api e2e tests and passing

* fix: move end button into authed default maint page

* fix: also show in restore flow

* fix: import getMaintenanceStatus

* test: split web e2e tests

* refactor: ensure detect install is consistently named

* chore: ensure admin for detect install while out of maint.

* refactor: remove state repository

* test: update maint. worker service spec

* test: split backup service spec

* refactor: rename db backup routes

* refactor: instead of param, allow bulk backup deletion

* test: update sdk use in e2e test

* test: correct deleteBackup call

* fix: correct type for serverinstall response dto

* chore: validate filename for deletion

* test: wip

* test: backups no longer take path param

* refactor: scope util to database-backups instead of backups

* fix: update worker controller with new route

* chore: use new admin page actions

* chore: remove stray comment

* test: rename outdated test

* refactor: getter pattern for maintenance secret

* refactor: `createSpawnDuplexStream` -> `spawnDuplexStream`

* refactor: prefer `Object.assign`

* refactor: remove useless try {} block

* refactor: prefer `type Props`
refactor: prefer arrow function

* refactor: use luxon API for minutesAgo

* chore: remove change to gitignore

* refactor: prefer `type Props`

* refactor: remove async from onMount

* refactor: use luxon toRelative for relative time

* refactor: duplicate logic check

* chore: open api

* refactor: begin moving code into web//services

* refactor: don't use template string with $t

* test: use dialog role to match prompt

* refactor: split actions into flow/restore

* test: fix action value

* refactor: move more service calls into web//services

* chore: should void fn return

* chore: bump 2.4.0 to 2.5.0 in controller

* chore: bump 2.4.0 to 2.5.0 in controller

* refactor: use events for web//services

* chore: open api

* chore: open api

* refactor: don't await returned promise

* refactor: remove redundant check

* refactor: add `type: command` to actions

* refactor: split backup entries into own component

* refactor: split restore flow into separate components

* refactor(web): split BackupDelete event

* chore: stylings

* chore: stylings

* fix: don't log query failure on first boot

* feat: support pg_dumpall backups

* feat: display information about each backup

* chore: i18n

* feat: rollback to restore point on migrations failure

* feat: health check after restore

* chore: format

* refactor: split health check into separate function

* refactor: split health into repository
test: write tests covering rollbacks

* fix: omit 'health' requirement from createDbBackup

* test(e2e): rollback test

* fix: wrap text in backup entry

* fix: don't shrink context menu button

* fix: correct CREATE DB syntax for postgres

* test: rename backups generated by test

* feat: add filesize to backup response dto

* feat: restore list

* feat: ui work

* fix: e2e test

* fix: e2e test

* pr feedback

* pr feedback

---------

Co-authored-by: Alex <alex.tran1502@gmail.com>
Co-authored-by: Jason Rasmussen <jason@rasm.me>
This commit is contained in:
Paul Makles
2026-01-20 15:22:28 +00:00
committed by GitHub
parent ca0d4b283a
commit 61a9d5cbc7
81 changed files with 5585 additions and 391 deletions

View File

@@ -1,13 +1,16 @@
import { Injectable } from '@nestjs/common';
import { DateTime } from 'luxon';
import path from 'node:path';
import semver from 'semver';
import { serverVersion } from 'src/constants';
import { StorageCore } from 'src/cores/storage.core';
import { OnEvent, OnJob } from 'src/decorators';
import { DatabaseLock, ImmichWorker, JobName, JobStatus, QueueName, StorageFolder } from 'src/enum';
import { ArgOf } from 'src/repositories/event.repository';
import { BaseService } from 'src/services/base.service';
import {
createDatabaseBackup,
isFailedDatabaseBackupName,
isValidDatabaseRoutineBackupName,
UnsupportedPostgresError,
} from 'src/utils/database-backups';
import { handlePromiseError } from 'src/utils/misc';
@Injectable()
@@ -53,16 +56,11 @@ export class BackupService extends BaseService {
const backupsFolder = StorageCore.getBaseFolder(StorageFolder.Backups);
const files = await this.storageRepository.readdir(backupsFolder);
const failedBackups = files.filter((file) => file.match(/immich-db-backup-.*\.sql\.gz\.tmp$/));
const backups = files
.filter((file) => {
const oldBackupStyle = file.match(/immich-db-backup-\d+\.sql\.gz$/);
//immich-db-backup-20250729T114018-v1.136.0-pg14.17.sql.gz
const newBackupStyle = file.match(/immich-db-backup-\d{8}T\d{6}-v.*-pg.*\.sql\.gz$/);
return oldBackupStyle || newBackupStyle;
})
.filter((filename) => isValidDatabaseRoutineBackupName(filename))
.toSorted()
.toReversed();
const failedBackups = files.filter((filename) => isFailedDatabaseBackupName(filename));
const toDelete = backups.slice(config.keepLastAmount);
toDelete.push(...failedBackups);
@@ -75,123 +73,27 @@ export class BackupService extends BaseService {
@OnJob({ name: JobName.DatabaseBackup, queue: QueueName.BackupDatabase })
async handleBackupDatabase(): Promise<JobStatus> {
this.logger.debug(`Database Backup Started`);
const { database } = this.configRepository.getEnv();
const config = database.config;
const isUrlConnection = config.connectionType === 'url';
let connectionUrl: string = isUrlConnection ? config.url : '';
if (URL.canParse(connectionUrl)) {
// remove known bad url parameters for pg_dumpall
const url = new URL(connectionUrl);
url.searchParams.delete('uselibpqcompat');
connectionUrl = url.toString();
}
const databaseParams = isUrlConnection
? ['--dbname', connectionUrl]
: [
'--username',
config.username,
'--host',
config.host,
'--port',
`${config.port}`,
'--database',
config.database,
];
databaseParams.push('--clean', '--if-exists');
const databaseVersion = await this.databaseRepository.getPostgresVersion();
const backupFilePath = path.join(
StorageCore.getBaseFolder(StorageFolder.Backups),
`immich-db-backup-${DateTime.now().toFormat("yyyyLLdd'T'HHmmss")}-v${serverVersion.toString()}-pg${databaseVersion.split(' ')[0]}.sql.gz.tmp`,
);
const databaseSemver = semver.coerce(databaseVersion);
const databaseMajorVersion = databaseSemver?.major;
if (!databaseMajorVersion || !databaseSemver || !semver.satisfies(databaseSemver, '>=14.0.0 <19.0.0')) {
this.logger.error(`Database Backup Failure: Unsupported PostgreSQL version: ${databaseVersion}`);
return JobStatus.Failed;
}
this.logger.log(`Database Backup Starting. Database Version: ${databaseMajorVersion}`);
try {
await new Promise<void>((resolve, reject) => {
const pgdump = this.processRepository.spawn(
`/usr/lib/postgresql/${databaseMajorVersion}/bin/pg_dumpall`,
databaseParams,
{
env: {
PATH: process.env.PATH,
PGPASSWORD: isUrlConnection ? new URL(connectionUrl).password : config.password,
},
},
);
// NOTE: `--rsyncable` is only supported in GNU gzip
const gzip = this.processRepository.spawn(`gzip`, ['--rsyncable']);
pgdump.stdout.pipe(gzip.stdin);
const fileStream = this.storageRepository.createWriteStream(backupFilePath);
gzip.stdout.pipe(fileStream);
pgdump.on('error', (err) => {
this.logger.error(`Backup failed with error: ${err}`);
reject(err);
});
gzip.on('error', (err) => {
this.logger.error(`Gzip failed with error: ${err}`);
reject(err);
});
let pgdumpLogs = '';
let gzipLogs = '';
pgdump.stderr.on('data', (data) => (pgdumpLogs += data));
gzip.stderr.on('data', (data) => (gzipLogs += data));
pgdump.on('exit', (code) => {
if (code !== 0) {
this.logger.error(`Backup failed with code ${code}`);
reject(`Backup failed with code ${code}`);
this.logger.error(pgdumpLogs);
return;
}
if (pgdumpLogs) {
this.logger.debug(`pgdump_all logs\n${pgdumpLogs}`);
}
});
gzip.on('exit', (code) => {
if (code !== 0) {
this.logger.error(`Gzip failed with code ${code}`);
reject(`Gzip failed with code ${code}`);
this.logger.error(gzipLogs);
return;
}
if (pgdump.exitCode !== 0) {
this.logger.error(`Gzip exited with code 0 but pgdump exited with ${pgdump.exitCode}`);
return;
}
resolve();
});
});
await this.storageRepository.rename(backupFilePath, backupFilePath.replace('.tmp', ''));
await createDatabaseBackup(this.backupRepos);
} catch (error) {
this.logger.error(`Database Backup Failure: ${error}`);
await this.storageRepository
.unlink(backupFilePath)
.catch((error) => this.logger.error(`Failed to delete failed backup file: ${error}`));
if (error instanceof UnsupportedPostgresError) {
return JobStatus.Failed;
}
throw error;
}
this.logger.log(`Database Backup Success`);
await this.cleanupDatabaseBackups();
return JobStatus.Success;
}
private get backupRepos() {
return {
logger: this.logger,
storage: this.storageRepository,
config: this.configRepository,
process: this.processRepository,
database: this.databaseRepository,
};
}
}