Source code for low_rank_toolbox.krylov.solvers.sylvester_solvers

"""Krylov-based solvers for the Sylvester equation.

Author: Benjamin Carrel, University of Geneva, 2022-2023
"""

# Warnings
from typing import Optional, Union
from warnings import warn

# %% Imports
import numpy as np
import scipy.linalg as la
import scipy.sparse as sps
import scipy.sparse.linalg as spsla
from numpy import ndarray
from scipy.sparse import issparse, spmatrix

from ...matrices import LowRankMatrix, QuasiSVD
from ..spaces import ExtendedKrylovSpace, KrylovSpace, RationalKrylovSpace

machine_precision = np.finfo(float).eps
Matrix = ndarray | spmatrix | LowRankMatrix

# %% Functions


[docs] def solve_small_sylvester( A: ndarray | spmatrix, B: ndarray | spmatrix, C: ndarray ) -> ndarray: """ Solve the Sylvester equation for small systems. Find X such that AX + XB = C. Wrapper to scipy.linalg.solve_sylvester. For larger systems, use solve_sparse_low_rank_sylvester or solve_sylvester_large_A_small_B. Parameters ---------- A : ndarray | spmatrix Matrix of shape (m, m) B : ndarray | spmatrix Matrix of shape (n, n) C : ndarray Matrix of shape (m, n) Returns ------- ndarray Dense solution of shape (m, n) """ # Convert sparse matrices to dense for scipy.linalg.solve_sylvester A_dense = A.toarray() if issparse(A) else A # type: ignore[assignment, union-attr] B_dense = B.toarray() if issparse(B) else B # type: ignore[assignment, union-attr] return la.solve_sylvester(A_dense, B_dense, C)
[docs] def solve_sylvester_large_A_small_B(A: spmatrix, B: ndarray, C: ndarray) -> ndarray: """ Solve the Sylvester equation when A is large and B is small. Find X such that AX + XB = C using eigenvalue decomposition of B. Reference: Simoncini, V., 2016. Computational Methods for Linear Matrix Equations. SIAM Rev. 58, 377-441. https://doi.org/10.1137/130912839 Parameters ---------- A : spmatrix Sparse matrix of shape (m, m) B : ndarray Dense matrix of shape (n, n) C : ndarray Dense matrix of shape (m, n) Returns ------- ndarray Dense solution of the Sylvester equation of shape (m, n) """ # Check inputs assert isinstance(A, spmatrix), "A must be a sparse matrix" assert isinstance(B, ndarray), "B must be a dense matrix" assert isinstance(C, ndarray), "C must be a dense matrix" S, W = la.eigh(B) C_hat = C.dot(W) X_hat = np.zeros(C.shape) I = sps.eye(*A.shape) for i in np.arange(len(S)): X_hat[:, i] = spsla.spsolve(A + S[i] * I, C_hat[:, i]) return X_hat @ W.T
[docs] def solve_sparse_low_rank_sylvester( A: spmatrix, B: spmatrix, C: LowRankMatrix, tol: float = 1e-12, max_iter: Optional[int] = None, krylov_kwargs: Optional[dict] = None, is_A_symmetric: Optional[bool] = None, is_B_symmetric: Optional[bool] = None, ) -> QuasiSVD: """ Low-rank solver for the Sylvester equation. Find X such that AX + XB = C. Uses two Krylov spaces (left for A, right for B). The projected problem is solved as a small Sylvester equation: Ak Y + Y Bk = Ck. **Algorithm Note:** Each Krylov space independently uses Lanczos if the corresponding matrix is symmetric. If A is symmetric, the left Krylov space uses Lanczos. If B is symmetric, the right Krylov space uses Lanczos. Both can use Lanczos independently. **Special Case:** When A = B and both are symmetric, the problem becomes a Lyapunov equation (AX + XA = C). Use solve_lyapunov() instead to exploit the symmetry of X and use only one Krylov space instead of two. Reference: Simoncini, V., 2016. Computational Methods for Linear Matrix Equations. SIAM Rev. 58, 377-441. https://doi.org/10.1137/130912839 Parameters ---------- A : spmatrix Sparse matrix of shape (m, m) B : spmatrix Sparse matrix of shape (n, n) C : LowRankMatrix Low-rank right-hand side of shape (m, n) tol : float, optional Convergence tolerance, by default 1e-12 max_iter : int, optional Maximum iterations, by default m / rank(C) krylov_kwargs : dict, optional Krylov space configuration with keys: - 'extended' (bool): Use extended Krylov space (default True) - 'invA' (callable): Custom inverse function for A - 'invB' (callable): Custom inverse function for B - 'poles_A' (array_like): Poles for rational Krylov space for A - 'poles_B' (array_like): Poles for rational Krylov space for B is_A_symmetric : bool, optional If True, A is symmetric and Lanczos is used for left Krylov space. If None (default), symmetry is auto-detected. is_B_symmetric : bool, optional If True, B is symmetric and Lanczos is used for right Krylov space. If None (default), symmetry is auto-detected. Returns ------- QuasiSVD Low-rank solution X of shape (m, n) Examples -------- >>> import numpy as np >>> from scipy.sparse import diags >>> from low_rank_toolbox import LowRankMatrix >>> from low_rank_toolbox.krylov import solve_sparse_low_rank_sylvester >>> # Create sparse matrices A and B (larger sizes) >>> n, m = 100, 80 >>> A = diags([1, 4, 1], [-1, 0, 1], shape=(n, n), format='csr') >>> B = diags([1, 2, 1], [-1, 0, 1], shape=(m, m), format='csr') >>> # Create a low-rank right-hand side >>> U = np.zeros((n, 1)) >>> U[0, 0] = 1.0 >>> V = np.zeros((m, 1)) >>> V[0, 0] = 1.0 >>> C = LowRankMatrix(U, V.T) >>> # Solve AX + XB = C >>> X = solve_sparse_low_rank_sylvester(A, B, C, tol=1e-10) >>> # Verify the solution >>> residual = A @ X + X @ B - C >>> residual.norm() < 1e-8 True >>> # X is low-rank >>> X.rank <= 50 True """ # Check inputs assert isinstance(A, spmatrix), "A must be a sparse matrix" assert isinstance(B, spmatrix), "B must be a sparse matrix" assert isinstance(C, LowRankMatrix), "C must be a low-rank matrix" assert tol > machine_precision, "tol must be larger than machine precision" if max_iter is None: max_iter = max(2, int(A.shape[0] / C.rank)) assert max_iter >= 2, "max_iter must be at least 2" # Parse Krylov kwargs if krylov_kwargs is None: krylov_kwargs = {} extended = krylov_kwargs.get("extended", True) invA = krylov_kwargs.get("invA", None) invB = krylov_kwargs.get("invB", None) poles_A = krylov_kwargs.get("poles_A", None) poles_B = krylov_kwargs.get("poles_B", None) if extended and (poles_A is not None or poles_B is not None): raise ValueError("Cannot use rational Krylov space with extended Krylov space") # Check symmetry of A and B if is_A_symmetric is None: # Auto-detect symmetry of A is_A_symmetric = (A != A.T).nnz == 0 if is_B_symmetric is None: # Auto-detect symmetry of B is_B_symmetric = (B != B.T).nnz == 0 # Precompute some quantities normA = spsla.norm(A) normB = spsla.norm(B) normC = C.norm() U, V = C._matrices[0], C._matrices[-1].T # Ensure U and V are 2D if U.ndim == 1: U = U.reshape(-1, 1) if V.ndim == 1: V = V.reshape(-1, 1) # Define the left Krylov space (for A) left_space: Union[ExtendedKrylovSpace, RationalKrylovSpace, KrylovSpace] if extended: if invA is None: invA = lambda x: spsla.spsolve(A, x) left_space = ExtendedKrylovSpace(A, U, invA, is_symmetric=is_A_symmetric) elif poles_A is not None: left_space = RationalKrylovSpace(A, U, poles_A, is_symmetric=is_A_symmetric) else: warn( "Warning: standard Krylov space may not converge. Consider using extended or rational Krylov space." ) left_space = KrylovSpace(A, U, is_symmetric=is_A_symmetric) # Define the right Krylov space (for B) right_space: Union[ExtendedKrylovSpace, RationalKrylovSpace, KrylovSpace] if extended: if invB is None: invB = lambda x: spsla.spsolve(B, x) right_space = ExtendedKrylovSpace(B, V, invB, is_symmetric=is_B_symmetric) elif poles_B is not None: right_space = RationalKrylovSpace(B, V, poles_B, is_symmetric=is_B_symmetric) else: right_space = KrylovSpace(B, V, is_symmetric=is_B_symmetric) # Current basis Uk = left_space.Q Vk = right_space.Q # SOLVE SMALL PROJECTED SYLVESTER IN LOOP for k in np.arange(1, max_iter): # SOLVE PROJECTED SYLVESTER Ak Y + Y Bk = Ck Ak = Uk.T.dot(A.dot(Uk)) Bk = Vk.T.dot(B.dot(Vk)) # Ck = U^T @ C @ V (convert to dense) CVk = C.dot(Vk) # C @ V if isinstance(CVk, LowRankMatrix): CVk = CVk.to_dense() Ck = Uk.T @ CVk # U^T @ (C @ V) Yk = la.solve_sylvester(Ak, Bk, Ck) # CHECK CONVERGENCE Xk = QuasiSVD(Uk, Yk, Vk) AXk = Xk.dot_sparse(A, side="opposite") # A @ Xk XkB = Xk.dot_sparse(B) # Xk @ B # Compute residual norm residual = AXk + XkB - C # Handle both ndarray and LowRankMatrix types residual_norm = ( la.norm(residual) if isinstance(residual, ndarray) else residual.norm() ) crit = residual_norm / ((normA + normB) * la.norm(Yk) + normC) if crit < tol or k == max_iter - 1: # Truncate to machine precision since the criterion overestimates the error return Xk.to_svd().truncate() else: left_space.augment_basis() Uk = left_space.Q right_space.augment_basis() Vk = right_space.Q warn("No convergence before max_iter") # Need to solve with final basis Ak = Uk.T.dot(A.dot(Uk)) Bk = Vk.T.dot(B.dot(Vk)) CVk = C.dot(Vk) if isinstance(CVk, LowRankMatrix): CVk = CVk.to_dense() Ck = Uk.T @ CVk Yk = la.solve_sylvester(Ak, Bk, Ck) X = QuasiSVD(Uk, Yk, Vk) return X
[docs] def solve_sylvester( A: ndarray | spmatrix, B: ndarray | spmatrix, C: ndarray | LowRankMatrix, tol: float = 1e-12, max_iter: Optional[int] = None, krylov_kwargs: Optional[dict] = None, is_A_symmetric: Optional[bool] = None, is_B_symmetric: Optional[bool] = None, ) -> ndarray | LowRankMatrix: """ Efficient low-rank compatible solver for the Sylvester equation. Find X such that AX + XB = C. This function is a wrapper that selects the appropriate solver based on the types of A, B, and C. Parameters ---------- A : ndarray | spmatrix Matrix of shape (m, m) B : ndarray | spmatrix Matrix of shape (n, n) C : ndarray | LowRankMatrix Right-hand side of shape (m, n) tol : float, optional Convergence tolerance for Krylov solver, by default 1e-12 max_iter : int, optional Maximum iterations for Krylov solver, by default None krylov_kwargs : dict, optional Krylov space configuration for sparse low-rank solver is_A_symmetric : bool, optional If True, A is symmetric. Auto-detected if None. is_B_symmetric : bool, optional If True, B is symmetric. Auto-detected if None. Returns ------- ndarray | LowRankMatrix Solution X of shape (m, n), either dense or low-rank depending on inputs Examples -------- >>> import numpy as np >>> from scipy.sparse import csr_matrix >>> from low_rank_toolbox import LowRankMatrix >>> from low_rank_toolbox.krylov import solve_sylvester >>> # Small dense case >>> A_small = np.array([[4, 1], [1, 3]]) >>> B_small = np.array([[2, 1], [1, 1]]) >>> C_small = np.array([[1, 0], [0, 1]]) >>> X_small = solve_sylvester(A_small, B_small, C_small) >>> # Verify: AX + XB = C >>> np.allclose(A_small @ X_small + X_small @ B_small, C_small) True >>> # Large sparse case with low-rank RHS >>> from scipy.sparse import diags >>> n, m = 100, 80 >>> A = diags([1, 4, 1], [-1, 0, 1], shape=(n, n), format='csr') >>> B = diags([1, 2, 1], [-1, 0, 1], shape=(m, m), format='csr') >>> U = np.zeros((n, 1)) >>> U[0, 0] = 1.0 >>> V = np.zeros((m, 1)) >>> V[0, 0] = 1.0 >>> C = LowRankMatrix(U, V.T) >>> X = solve_sylvester(A, B, C, tol=1e-10) >>> # Solution is low-rank >>> type(X).__name__ 'SVD' >>> X.rank <= 50 True """ # Check Krylov kwargs if krylov_kwargs is None: # Default parameters for Krylov solver krylov_kwargs = {"extended": True} # Low rank solver X: Union[QuasiSVD, ndarray] if isinstance(C, LowRankMatrix): # Convert dense A and B to sparse if needed if isinstance(A, ndarray): A = sps.csc_matrix(A) if isinstance(B, ndarray): B = sps.csc_matrix(B) # Both A and B must be sparse for low-rank solver assert isinstance(A, spmatrix) and isinstance( B, spmatrix ), "For low-rank C, both A and B should be sparse (or will be converted)" X = solve_sparse_low_rank_sylvester( A, B, C, tol, max_iter, krylov_kwargs, is_A_symmetric, is_B_symmetric ) # Dense solver else: # C is dense X_result: Union[QuasiSVD, ndarray] if isinstance(A, spmatrix) and isinstance(B, ndarray): # A large/sparse, B small/dense X_result = solve_sylvester_large_A_small_B(A, B, C) # type: ignore[assignment] elif isinstance(A, ndarray) and isinstance(B, spmatrix): # A small/dense, B large/sparse - transpose the problem # AX + XB = C => B^T X^T + X^T A^T = C^T X_T = solve_sylvester_large_A_small_B(B.T, A, C.T) X_result = X_T.T # type: ignore[assignment] else: # Both A and B are small (dense or small sparse) X_result = solve_small_sylvester(A, B, C) # type: ignore[assignment] X = X_result return X # type: ignore[return-value]