Source code for low_rank_toolbox.krylov.solvers.lyapunov_solvers

"""Krylov-based solvers for the Lyapunov equation.

Author: Benjamin Carrel, University of Geneva, 2022-2023
"""

# Warnings
from typing import Optional, Union
from warnings import warn

# %% Imports
import numpy as np
import scipy.linalg as la
import scipy.sparse as sps
import scipy.sparse.linalg as spsla
from numpy import ndarray
from scipy.sparse import issparse, spmatrix

from ...matrices import LowRankMatrix, QuasiSVD
from ...matrices.low_rank_matrix import LowRankEfficiencyWarning
from ..spaces import ExtendedKrylovSpace, KrylovSpace, RationalKrylovSpace

machine_precision = np.finfo(float).eps
Matrix = ndarray | spmatrix | LowRankMatrix

# %% FUNCTIONS


[docs] def solve_small_lyapunov(A: ndarray | spmatrix, C: ndarray) -> ndarray: "Solve the Lyapunov equation AX + XA = C for small matrices." # Convert sparse A to dense for scipy.linalg.solve_lyapunov A_dense: ndarray = A.toarray() if issparse(A) else A # type: ignore[assignment, union-attr] return la.solve_lyapunov(A_dense, C)
[docs] def solve_sparse_low_rank_symmetric_lyapunov( A: spmatrix, C: LowRankMatrix, tol: float = 1e-12, max_iter: Optional[int] = None, krylov_kwargs: Optional[dict] = None, ) -> QuasiSVD: """ Low-rank solver for the symmetric Lyapunov equation: Find X such that A X + X A = C. NOTE: Matrices A and C must be symmetric, which is exploited to halve the computational cost. Uses a single Krylov space since the solution X is also symmetric. **Performance:** Uses the Lanczos algorithm (3-term recurrence) instead of Arnoldi (full orthogonalization), reducing computational cost by approximately 2x and storage by O(m²) → O(m) for the recurrence coefficients. Parameters ---------- A : spmatrix Symmetric sparse matrix of shape (n, n) C : LowRankMatrix Symmetric low-rank matrix of shape (n, n) tol : float, optional Convergence tolerance, by default 1e-12 max_iter : int, optional Maximum iterations, by default n / rank(C) krylov_kwargs : dict, optional Krylov space configuration with keys: - 'extended' (bool): Use extended Krylov space (default True) Extended Krylov uses both A and A^(-1), providing faster convergence. Uses Lanczos algorithm for symmetric A (efficient 3-term recurrence). - 'invA' (callable): Custom inverse function for A - 'poles' (array_like): Poles for rational Krylov space WARNING: Rational Krylov currently uses Arnoldi even for symmetric A (Lanczos not yet implemented for rational case). Returns ------- QuasiSVD Symmetric low-rank solution X Examples -------- >>> import numpy as np >>> from scipy.sparse import diags >>> from low_rank_toolbox import LowRankMatrix >>> from low_rank_toolbox.krylov import solve_sparse_low_rank_symmetric_lyapunov >>> # Create a symmetric sparse matrix (100x100 tridiagonal) >>> n = 100 >>> A = diags([1, 4, 1], [-1, 0, 1], shape=(n, n), format='csr') >>> # Create a symmetric low-rank right-hand side >>> U = np.zeros((n, 1)) >>> U[0, 0] = 1.0 >>> C = LowRankMatrix(U, U.T) # Rank-1 symmetric matrix >>> # Solve AX + XA = C >>> X = solve_sparse_low_rank_symmetric_lyapunov(A, C, tol=1e-10) >>> # Verify the solution >>> residual = A @ X + X @ A - C >>> residual.norm() < 1e-8 True >>> # X is low-rank >>> X.rank <= n True """ # Check inputs assert isinstance(A, spmatrix), "A must be a sparse matrix" assert isinstance(C, LowRankMatrix), "C must be a low-rank matrix" assert tol > machine_precision, "tol must be larger than machine precision" if max_iter is None: max_iter = max(2, int(A.shape[0] / C.rank)) assert max_iter >= 2, "max_iter must be at least 2" # Parse Krylov kwargs if krylov_kwargs is None: krylov_kwargs = {} extended = krylov_kwargs.get("extended", True) invA = krylov_kwargs.get("invA", None) poles = krylov_kwargs.get("poles", None) if extended and poles is not None: raise ValueError("Cannot use rational Krylov space with extended Krylov space") # Precompute some quantities normA = spsla.norm(A) normC = C.norm() # Define the Krylov space # Ensure initial vector is 2D X0 = C._matrices[0] if X0.ndim == 1: X0 = X0.reshape(-1, 1) if extended: if invA is None: invA = lambda x: spsla.spsolve(A, x) krylov_space: Union[ExtendedKrylovSpace, RationalKrylovSpace, KrylovSpace] = ( ExtendedKrylovSpace(A, X0, invA, is_symmetric=True) ) elif poles is not None: krylov_space = RationalKrylovSpace(A, X0, poles, is_symmetric=True) else: krylov_space = KrylovSpace(A, X0, is_symmetric=True) warn( "Warning: standard Krylov space may not converge. Consider using extended or rational Krylov space." ) # Current basis Uk = krylov_space.Q # SOLVE SMALL PROJECTED LYAPUNOV IN LOOP for k in np.arange(1, max_iter): # SOLVE PROJECTED LYAPUNOV Ak Y + Y Ak = Ck Ak = Uk.T.dot(A.dot(Uk)) # Ck = U^T @ C @ U (use dense output to avoid memory-inefficient intermediate) CUk = C.dot(Uk, dense_output=True) # C @ U (dense) Ck = Uk.T @ CUk # U^T @ (C @ U) Yk = la.solve_lyapunov(Ak, Ck) # CHECK CONVERGENCE Xk = QuasiSVD(Uk, Yk, Uk) AXk = Xk.dot_sparse(A, side="opposite") XkA = Xk.dot_sparse(A) # Compute residual norm residual = C - AXk - XkA # Handle both ndarray and LowRankMatrix types residual_norm = ( la.norm(residual) if isinstance(residual, ndarray) else residual.norm() ) crit = residual_norm / (2 * normA * la.norm(Yk) + normC) if crit < tol: # Truncate up to machine precision since the criterion overestimates the error return Xk.to_svd().truncate() else: krylov_space.augment_basis() Uk = krylov_space.Q warn("No convergence before max_iter") # Need to solve with final basis Ak = Uk.T.dot(A.dot(Uk)) CUk = C.dot(Uk, dense_output=True) # C @ U (dense) Ck = Uk.T @ CUk Yk = la.solve_lyapunov(Ak, Ck) X = QuasiSVD(Uk, Yk, Uk) return X
[docs] def solve_sparse_low_rank_non_symmetric_lyapunov( A: spmatrix, C: LowRankMatrix, tol: float = 1e-12, max_iter: Optional[int] = None, krylov_kwargs: Optional[dict] = None, ) -> QuasiSVD: """ Low-rank solver for the general Lyapunov equation: Find X such that A X + X A^H = C. Uses two Krylov spaces (left for A, right for A^H). The projected problem is solved as a Sylvester equation: Ak Y + Y Bk^H = Ck. Parameters ---------- A : spmatrix Sparse matrix of shape (n, n) C : LowRankMatrix Low-rank right-hand side of shape (n, n) tol : float, optional Convergence tolerance, by default 1e-12 max_iter : int, optional Maximum iterations, by default n / rank(C) krylov_kwargs : dict, optional Krylov space configuration with keys: - 'extended' (bool): Use extended Krylov space (default True) - 'invA' (callable): Custom inverse function for A - 'invAH' (callable): Custom inverse function for A^H - 'poles_A' (array_like): Poles for rational Krylov space for A - 'poles_AH' (array_like): Poles for rational Krylov space for A^H Returns ------- QuasiSVD Low-rank solution X (not necessarily symmetric) """ # Check inputs assert isinstance(A, spmatrix), "A must be a sparse matrix" assert isinstance(C, LowRankMatrix), "C must be a low-rank matrix" assert tol > machine_precision, "tol must be larger than machine precision" if max_iter is None: max_iter = max(2, int(A.shape[0] / C.rank)) assert max_iter >= 2, "max_iter must be at least 2" # Parse Krylov kwargs if krylov_kwargs is None: krylov_kwargs = {} extended = krylov_kwargs.get("extended", True) invA = krylov_kwargs.get("invA", None) invAH = krylov_kwargs.get("invAH", None) poles_A = krylov_kwargs.get("poles_A", None) poles_AH = krylov_kwargs.get("poles_AH", None) if extended and (poles_A is not None or poles_AH is not None): raise ValueError("Cannot use rational Krylov space with extended Krylov space") # Precompute some quantities normA = spsla.norm(A) normC = C.norm() AH = A.T.conj() # Hermitian transpose (conjugate transpose) U, V = C._matrices[0], C._matrices[-1].T.conj() # Ensure U and V are 2D if U.ndim == 1: U = U.reshape(-1, 1) if V.ndim == 1: V = V.reshape(-1, 1) # Define the left Krylov space (for A) if extended: if invA is None: invA = lambda x: spsla.spsolve(A, x) left_space: Union[ExtendedKrylovSpace, RationalKrylovSpace, KrylovSpace] = ( ExtendedKrylovSpace(A, U, invA) ) elif poles_A is not None: left_space = RationalKrylovSpace(A, U, poles_A) else: warn( "Warning: standard Krylov space may not converge. Consider using extended or rational Krylov space." ) left_space = KrylovSpace(A, U) # Define the right Krylov space (for A^H) if extended: if invAH is None: invAH = lambda x: spsla.spsolve(AH, x) right_space: Union[ExtendedKrylovSpace, RationalKrylovSpace, KrylovSpace] = ( ExtendedKrylovSpace(AH, V, invAH) ) elif poles_AH is not None: right_space = RationalKrylovSpace(AH, V, poles_AH) else: right_space = KrylovSpace(AH, V) # Current basis Uk = left_space.Q Vk = right_space.Q # SOLVE SMALL PROJECTED SYLVESTER IN LOOP for k in np.arange(1, max_iter): # SOLVE PROJECTED SYLVESTER Ak Y + Y Bk^H = Ck # where Ak = U^T @ A @ U and Bk = V^T @ A^H @ V Ak = Uk.T.dot(A.dot(Uk)) Bk = Vk.T.dot(AH.dot(Vk)) # Ck = U^T @ C @ V (use dense output to avoid memory-inefficient intermediate) CVk = C.dot(Vk, dense_output=True) # C @ V (dense) Ck = Uk.conj().T @ CVk # U^H @ (C @ V) # Solve Ak Y + Y Bk^H = Ck, which is a Sylvester equation Yk = la.solve_sylvester(Ak, Bk.conj().T, Ck) # CHECK CONVERGENCE Xk = QuasiSVD(Uk, Yk, Vk) AXk = Xk.dot_sparse(A, side="opposite") # A @ Xk XkAH = Xk.dot_sparse(AH) # Xk @ A^H # Compute residual norm residual = C - AXk - XkAH # Handle both ndarray and LowRankMatrix types residual_norm = ( la.norm(residual) if isinstance(residual, ndarray) else residual.norm() ) crit = residual_norm / (2 * normA * la.norm(Yk) + normC) if crit < tol or k == max_iter - 1: # Truncate to machine precision since the criterion overestimates the error return Xk.to_svd().truncate() else: left_space.augment_basis() Uk = left_space.Q right_space.augment_basis() Vk = right_space.Q warn("No convergence before max_iter") # Need to solve with final basis Ak = Uk.T.dot(A.dot(Uk)) Bk = Vk.T.dot(AH.dot(Vk)) CVk = C.dot(Vk, dense_output=True) # C @ V (dense) Ck = Uk.conj().T @ CVk Yk = la.solve_sylvester(Ak, Bk.conj().T, Ck) X = QuasiSVD(Uk, Yk, Vk) return X
[docs] def solve_lyapunov( A: ndarray | spmatrix, C: ndarray | LowRankMatrix, tol: float = 1e-12, max_iter: Optional[int] = None, is_symmetric: Optional[bool] = None, krylov_kwargs: Optional[dict] = None, ) -> ndarray | LowRankMatrix: """ Efficient low-rank compatible solver for the Lyapunov equation. Find X such that AX + XA^H = C. This function is a wrapper that selects the appropriate solver based on the types of A and C. For lower computational cost, if A and C are symmetric, set is_symmetric=True. This enables the use of the Lanczos algorithm (3-term recurrence) instead of Arnoldi (full orthogonalization), approximately halving the computational cost and memory usage per iteration. Parameters ---------- A : ndarray | spmatrix The matrix A of shape (n, n) C : ndarray | LowRankMatrix The matrix C of shape (n, n) tol : float, optional Convergence tolerance for Krylov solver, by default 1e-12 max_iter : int, optional Maximum iterations for Krylov solver, by default None is_symmetric : bool, optional If True, use symmetric solver (only for sparse A and low-rank C). Exploits symmetry using Lanczos algorithm for ~2x speedup. If None, symmetry is auto-detected (with efficiency warning). krylov_kwargs : dict, optional Krylov space configuration (see solve_sparse_low_rank_symmetric_lyapunov or solve_sparse_low_rank_non_symmetric_lyapunov for details) Returns ------- ndarray | LowRankMatrix The solution X, either dense or low-rank and symmetric if and only if C is symmetric. Examples -------- >>> import numpy as np >>> from scipy.sparse import csr_matrix >>> from low_rank_toolbox import LowRankMatrix >>> from low_rank_toolbox.krylov import solve_lyapunov >>> # Small dense case >>> A_small = np.array([[4, 1], [1, 3]]) >>> C_small = np.array([[1, 0], [0, 1]]) >>> X_small = solve_lyapunov(A_small, C_small) >>> # Verify: AX + XA = C >>> np.allclose(A_small @ X_small + X_small @ A_small, C_small) True >>> # Large sparse case with low-rank RHS >>> from scipy.sparse import diags >>> n = 100 >>> A = diags([1, 4, 1], [-1, 0, 1], shape=(n, n), format='csr') >>> U = np.zeros((n, 1)) >>> U[0, 0] = 1.0 >>> C = LowRankMatrix(U, U.T) >>> X = solve_lyapunov(A, C, is_symmetric=True, tol=1e-10) >>> # Solution is low-rank and symmetric >>> type(X).__name__ 'SVD' >>> X.rank <= n # Much lower rank than n True """ # Check Krylov kwargs if krylov_kwargs is None: # Default parameters for Krylov solver krylov_kwargs = {"extended": True} # Check symmetry input if is_symmetric is None: # Check symmetry of low-rank matrix C if isinstance(C, ndarray): is_symmetric = np.allclose(C, C.T.conj()) else: try: warn( "Checking symmetry of low-rank matrix C. For better performance, please provide is_symmetric bool input.", LowRankEfficiencyWarning, ) is_symmetric = C.is_symmetric() except: Cd = C.to_dense() warn( "Checking symmetry of low-rank C via dense conversion. For better performance, please provide is_symmetric bool input.", LowRankEfficiencyWarning, ) is_symmetric = np.allclose(Cd, Cd.T.conj()) # Check symmetry of A if needed if is_symmetric: # Sparse matrix case if isinstance(A, spmatrix): if not (A != A.T).nnz == 0: is_symmetric = False # Dense matrix case elif isinstance(A, ndarray): if not np.allclose(A, A.T.conj()): is_symmetric = False # Low rank solver X: Union[QuasiSVD, ndarray] if isinstance(C, LowRankMatrix): # Convert dense A to sparse if needed if isinstance(A, ndarray): A = sps.csc_matrix(A) if is_symmetric: X = solve_sparse_low_rank_symmetric_lyapunov( A, C, tol, max_iter, krylov_kwargs ) else: X = solve_sparse_low_rank_non_symmetric_lyapunov( A, C, tol, max_iter, krylov_kwargs ) # Dense solver else: X_dense: Union[QuasiSVD, ndarray] = solve_small_lyapunov(A, C) # type: ignore[assignment] X = X_dense return X # type: ignore[return-value]