"""Krylov-based solvers for the Sylvester equation.
Author: Benjamin Carrel, University of Geneva, 2022-2023
"""
# Warnings
from typing import Optional, Union
from warnings import warn
# %% Imports
import numpy as np
import scipy.linalg as la
import scipy.sparse as sps
import scipy.sparse.linalg as spsla
from numpy import ndarray
from scipy.sparse import issparse, spmatrix
from ...matrices import LowRankMatrix, QuasiSVD
from ..spaces import ExtendedKrylovSpace, KrylovSpace, RationalKrylovSpace
machine_precision = np.finfo(float).eps
Matrix = ndarray | spmatrix | LowRankMatrix
# %% Functions
[docs]
def solve_small_sylvester(
A: ndarray | spmatrix, B: ndarray | spmatrix, C: ndarray
) -> ndarray:
"""
Solve the Sylvester equation for small systems.
Find X such that AX + XB = C. Wrapper to scipy.linalg.solve_sylvester.
For larger systems, use solve_sparse_low_rank_sylvester or solve_sylvester_large_A_small_B.
Parameters
----------
A : ndarray | spmatrix
Matrix of shape (m, m)
B : ndarray | spmatrix
Matrix of shape (n, n)
C : ndarray
Matrix of shape (m, n)
Returns
-------
ndarray
Dense solution of shape (m, n)
"""
# Convert sparse matrices to dense for scipy.linalg.solve_sylvester
A_dense = A.toarray() if issparse(A) else A # type: ignore[assignment, union-attr]
B_dense = B.toarray() if issparse(B) else B # type: ignore[assignment, union-attr]
return la.solve_sylvester(A_dense, B_dense, C)
[docs]
def solve_sylvester_large_A_small_B(A: spmatrix, B: ndarray, C: ndarray) -> ndarray:
"""
Solve the Sylvester equation when A is large and B is small.
Find X such that AX + XB = C using eigenvalue decomposition of B.
Reference:
Simoncini, V., 2016. Computational Methods for Linear Matrix Equations.
SIAM Rev. 58, 377-441. https://doi.org/10.1137/130912839
Parameters
----------
A : spmatrix
Sparse matrix of shape (m, m)
B : ndarray
Dense matrix of shape (n, n)
C : ndarray
Dense matrix of shape (m, n)
Returns
-------
ndarray
Dense solution of the Sylvester equation of shape (m, n)
"""
# Check inputs
assert isinstance(A, spmatrix), "A must be a sparse matrix"
assert isinstance(B, ndarray), "B must be a dense matrix"
assert isinstance(C, ndarray), "C must be a dense matrix"
S, W = la.eigh(B)
C_hat = C.dot(W)
X_hat = np.zeros(C.shape)
I = sps.eye(*A.shape)
for i in np.arange(len(S)):
X_hat[:, i] = spsla.spsolve(A + S[i] * I, C_hat[:, i])
return X_hat @ W.T
[docs]
def solve_sparse_low_rank_sylvester(
A: spmatrix,
B: spmatrix,
C: LowRankMatrix,
tol: float = 1e-12,
max_iter: Optional[int] = None,
krylov_kwargs: Optional[dict] = None,
is_A_symmetric: Optional[bool] = None,
is_B_symmetric: Optional[bool] = None,
) -> QuasiSVD:
"""
Low-rank solver for the Sylvester equation.
Find X such that AX + XB = C.
Uses two Krylov spaces (left for A, right for B). The projected problem is solved
as a small Sylvester equation: Ak Y + Y Bk = Ck.
**Algorithm Note:** Each Krylov space independently uses Lanczos if the corresponding
matrix is symmetric. If A is symmetric, the left Krylov space uses Lanczos.
If B is symmetric, the right Krylov space uses Lanczos. Both can use Lanczos independently.
**Special Case:** When A = B and both are symmetric, the problem becomes a Lyapunov
equation (AX + XA = C). Use solve_lyapunov() instead to exploit the symmetry of X
and use only one Krylov space instead of two.
Reference:
Simoncini, V., 2016. Computational Methods for Linear Matrix Equations.
SIAM Rev. 58, 377-441. https://doi.org/10.1137/130912839
Parameters
----------
A : spmatrix
Sparse matrix of shape (m, m)
B : spmatrix
Sparse matrix of shape (n, n)
C : LowRankMatrix
Low-rank right-hand side of shape (m, n)
tol : float, optional
Convergence tolerance, by default 1e-12
max_iter : int, optional
Maximum iterations, by default m / rank(C)
krylov_kwargs : dict, optional
Krylov space configuration with keys:
- 'extended' (bool): Use extended Krylov space (default True)
- 'invA' (callable): Custom inverse function for A
- 'invB' (callable): Custom inverse function for B
- 'poles_A' (array_like): Poles for rational Krylov space for A
- 'poles_B' (array_like): Poles for rational Krylov space for B
is_A_symmetric : bool, optional
If True, A is symmetric and Lanczos is used for left Krylov space.
If None (default), symmetry is auto-detected.
is_B_symmetric : bool, optional
If True, B is symmetric and Lanczos is used for right Krylov space.
If None (default), symmetry is auto-detected.
Returns
-------
QuasiSVD
Low-rank solution X of shape (m, n)
Examples
--------
>>> import numpy as np
>>> from scipy.sparse import diags
>>> from low_rank_toolbox import LowRankMatrix
>>> from low_rank_toolbox.krylov import solve_sparse_low_rank_sylvester
>>> # Create sparse matrices A and B (larger sizes)
>>> n, m = 100, 80
>>> A = diags([1, 4, 1], [-1, 0, 1], shape=(n, n), format='csr')
>>> B = diags([1, 2, 1], [-1, 0, 1], shape=(m, m), format='csr')
>>> # Create a low-rank right-hand side
>>> U = np.zeros((n, 1))
>>> U[0, 0] = 1.0
>>> V = np.zeros((m, 1))
>>> V[0, 0] = 1.0
>>> C = LowRankMatrix(U, V.T)
>>> # Solve AX + XB = C
>>> X = solve_sparse_low_rank_sylvester(A, B, C, tol=1e-10)
>>> # Verify the solution
>>> residual = A @ X + X @ B - C
>>> residual.norm() < 1e-8
True
>>> # X is low-rank
>>> X.rank <= 50
True
"""
# Check inputs
assert isinstance(A, spmatrix), "A must be a sparse matrix"
assert isinstance(B, spmatrix), "B must be a sparse matrix"
assert isinstance(C, LowRankMatrix), "C must be a low-rank matrix"
assert tol > machine_precision, "tol must be larger than machine precision"
if max_iter is None:
max_iter = max(2, int(A.shape[0] / C.rank))
assert max_iter >= 2, "max_iter must be at least 2"
# Parse Krylov kwargs
if krylov_kwargs is None:
krylov_kwargs = {}
extended = krylov_kwargs.get("extended", True)
invA = krylov_kwargs.get("invA", None)
invB = krylov_kwargs.get("invB", None)
poles_A = krylov_kwargs.get("poles_A", None)
poles_B = krylov_kwargs.get("poles_B", None)
if extended and (poles_A is not None or poles_B is not None):
raise ValueError("Cannot use rational Krylov space with extended Krylov space")
# Check symmetry of A and B
if is_A_symmetric is None:
# Auto-detect symmetry of A
is_A_symmetric = (A != A.T).nnz == 0
if is_B_symmetric is None:
# Auto-detect symmetry of B
is_B_symmetric = (B != B.T).nnz == 0
# Precompute some quantities
normA = spsla.norm(A)
normB = spsla.norm(B)
normC = C.norm()
U, V = C._matrices[0], C._matrices[-1].T
# Ensure U and V are 2D
if U.ndim == 1:
U = U.reshape(-1, 1)
if V.ndim == 1:
V = V.reshape(-1, 1)
# Define the left Krylov space (for A)
left_space: Union[ExtendedKrylovSpace, RationalKrylovSpace, KrylovSpace]
if extended:
if invA is None:
invA = lambda x: spsla.spsolve(A, x)
left_space = ExtendedKrylovSpace(A, U, invA, is_symmetric=is_A_symmetric)
elif poles_A is not None:
left_space = RationalKrylovSpace(A, U, poles_A, is_symmetric=is_A_symmetric)
else:
warn(
"Warning: standard Krylov space may not converge. Consider using extended or rational Krylov space."
)
left_space = KrylovSpace(A, U, is_symmetric=is_A_symmetric)
# Define the right Krylov space (for B)
right_space: Union[ExtendedKrylovSpace, RationalKrylovSpace, KrylovSpace]
if extended:
if invB is None:
invB = lambda x: spsla.spsolve(B, x)
right_space = ExtendedKrylovSpace(B, V, invB, is_symmetric=is_B_symmetric)
elif poles_B is not None:
right_space = RationalKrylovSpace(B, V, poles_B, is_symmetric=is_B_symmetric)
else:
right_space = KrylovSpace(B, V, is_symmetric=is_B_symmetric)
# Current basis
Uk = left_space.Q
Vk = right_space.Q
# SOLVE SMALL PROJECTED SYLVESTER IN LOOP
for k in np.arange(1, max_iter):
# SOLVE PROJECTED SYLVESTER Ak Y + Y Bk = Ck
Ak = Uk.T.dot(A.dot(Uk))
Bk = Vk.T.dot(B.dot(Vk))
# Ck = U^T @ C @ V (convert to dense)
CVk = C.dot(Vk) # C @ V
if isinstance(CVk, LowRankMatrix):
CVk = CVk.to_dense()
Ck = Uk.T @ CVk # U^T @ (C @ V)
Yk = la.solve_sylvester(Ak, Bk, Ck)
# CHECK CONVERGENCE
Xk = QuasiSVD(Uk, Yk, Vk)
AXk = Xk.dot_sparse(A, side="opposite") # A @ Xk
XkB = Xk.dot_sparse(B) # Xk @ B
# Compute residual norm
residual = AXk + XkB - C
# Handle both ndarray and LowRankMatrix types
residual_norm = (
la.norm(residual) if isinstance(residual, ndarray) else residual.norm()
)
crit = residual_norm / ((normA + normB) * la.norm(Yk) + normC)
if crit < tol or k == max_iter - 1:
# Truncate to machine precision since the criterion overestimates the error
return Xk.to_svd().truncate()
else:
left_space.augment_basis()
Uk = left_space.Q
right_space.augment_basis()
Vk = right_space.Q
warn("No convergence before max_iter")
# Need to solve with final basis
Ak = Uk.T.dot(A.dot(Uk))
Bk = Vk.T.dot(B.dot(Vk))
CVk = C.dot(Vk)
if isinstance(CVk, LowRankMatrix):
CVk = CVk.to_dense()
Ck = Uk.T @ CVk
Yk = la.solve_sylvester(Ak, Bk, Ck)
X = QuasiSVD(Uk, Yk, Vk)
return X
[docs]
def solve_sylvester(
A: ndarray | spmatrix,
B: ndarray | spmatrix,
C: ndarray | LowRankMatrix,
tol: float = 1e-12,
max_iter: Optional[int] = None,
krylov_kwargs: Optional[dict] = None,
is_A_symmetric: Optional[bool] = None,
is_B_symmetric: Optional[bool] = None,
) -> ndarray | LowRankMatrix:
"""
Efficient low-rank compatible solver for the Sylvester equation.
Find X such that AX + XB = C.
This function is a wrapper that selects the appropriate solver based on the types of A, B, and C.
Parameters
----------
A : ndarray | spmatrix
Matrix of shape (m, m)
B : ndarray | spmatrix
Matrix of shape (n, n)
C : ndarray | LowRankMatrix
Right-hand side of shape (m, n)
tol : float, optional
Convergence tolerance for Krylov solver, by default 1e-12
max_iter : int, optional
Maximum iterations for Krylov solver, by default None
krylov_kwargs : dict, optional
Krylov space configuration for sparse low-rank solver
is_A_symmetric : bool, optional
If True, A is symmetric. Auto-detected if None.
is_B_symmetric : bool, optional
If True, B is symmetric. Auto-detected if None.
Returns
-------
ndarray | LowRankMatrix
Solution X of shape (m, n), either dense or low-rank depending on inputs
Examples
--------
>>> import numpy as np
>>> from scipy.sparse import csr_matrix
>>> from low_rank_toolbox import LowRankMatrix
>>> from low_rank_toolbox.krylov import solve_sylvester
>>> # Small dense case
>>> A_small = np.array([[4, 1], [1, 3]])
>>> B_small = np.array([[2, 1], [1, 1]])
>>> C_small = np.array([[1, 0], [0, 1]])
>>> X_small = solve_sylvester(A_small, B_small, C_small)
>>> # Verify: AX + XB = C
>>> np.allclose(A_small @ X_small + X_small @ B_small, C_small)
True
>>> # Large sparse case with low-rank RHS
>>> from scipy.sparse import diags
>>> n, m = 100, 80
>>> A = diags([1, 4, 1], [-1, 0, 1], shape=(n, n), format='csr')
>>> B = diags([1, 2, 1], [-1, 0, 1], shape=(m, m), format='csr')
>>> U = np.zeros((n, 1))
>>> U[0, 0] = 1.0
>>> V = np.zeros((m, 1))
>>> V[0, 0] = 1.0
>>> C = LowRankMatrix(U, V.T)
>>> X = solve_sylvester(A, B, C, tol=1e-10)
>>> # Solution is low-rank
>>> type(X).__name__
'SVD'
>>> X.rank <= 50
True
"""
# Check Krylov kwargs
if krylov_kwargs is None:
# Default parameters for Krylov solver
krylov_kwargs = {"extended": True}
# Low rank solver
X: Union[QuasiSVD, ndarray]
if isinstance(C, LowRankMatrix):
# Convert dense A and B to sparse if needed
if isinstance(A, ndarray):
A = sps.csc_matrix(A)
if isinstance(B, ndarray):
B = sps.csc_matrix(B)
# Both A and B must be sparse for low-rank solver
assert isinstance(A, spmatrix) and isinstance(
B, spmatrix
), "For low-rank C, both A and B should be sparse (or will be converted)"
X = solve_sparse_low_rank_sylvester(
A, B, C, tol, max_iter, krylov_kwargs, is_A_symmetric, is_B_symmetric
)
# Dense solver
else:
# C is dense
X_result: Union[QuasiSVD, ndarray]
if isinstance(A, spmatrix) and isinstance(B, ndarray):
# A large/sparse, B small/dense
X_result = solve_sylvester_large_A_small_B(A, B, C) # type: ignore[assignment]
elif isinstance(A, ndarray) and isinstance(B, spmatrix):
# A small/dense, B large/sparse - transpose the problem
# AX + XB = C => B^T X^T + X^T A^T = C^T
X_T = solve_sylvester_large_A_small_B(B.T, A, C.T)
X_result = X_T.T # type: ignore[assignment]
else:
# Both A and B are small (dense or small sparse)
X_result = solve_small_sylvester(A, B, C) # type: ignore[assignment]
X = X_result
return X # type: ignore[return-value]