Source code for low_rank_toolbox.krylov.solvers.lyapunov_solvers

"""Krylov-based solvers for the Lyapunov equation.

Author: Benjamin Carrel, University of Geneva, 2022-2023
"""

# Warnings
from typing import Optional, Union
from warnings import warn

# %% Imports
import numpy as np
import scipy.linalg as la
import scipy.sparse as sps
import scipy.sparse.linalg as spsla
from numpy import ndarray
from scipy.sparse import issparse, spmatrix

from ...matrices import LowRankMatrix, QuasiSVD
from ...matrices.low_rank_matrix import LowRankEfficiencyWarning
from ..spaces import ExtendedKrylovSpace, KrylovSpace, RationalKrylovSpace

machine_precision = np.finfo(float).eps
Matrix = ndarray | spmatrix | LowRankMatrix

# %% FUNCTIONS



[docs]
def solve_small_lyapunov(A: ndarray | spmatrix, C: ndarray) -> ndarray:
    "Solve the Lyapunov equation AX + XA = C for small matrices."
    # Convert sparse A to dense for scipy.linalg.solve_lyapunov
    A_dense: ndarray = A.toarray() if issparse(A) else A  # type: ignore[assignment, union-attr]
    return la.solve_lyapunov(A_dense, C)




[docs]
def solve_sparse_low_rank_symmetric_lyapunov(
    A: spmatrix,
    C: LowRankMatrix,
    tol: float = 1e-12,
    max_iter: Optional[int] = None,
    krylov_kwargs: Optional[dict] = None,
) -> QuasiSVD:
    """
    Low-rank solver for the symmetric Lyapunov equation:

    Find X such that A X + X A = C.

    NOTE: Matrices A and C must be symmetric, which is exploited to halve the computational cost.
    Uses a single Krylov space since the solution X is also symmetric.

    **Performance:** Uses the Lanczos algorithm (3-term recurrence) instead of Arnoldi
    (full orthogonalization), reducing computational cost by approximately 2x and storage
    by O(m²) → O(m) for the recurrence coefficients.

    Parameters
    ----------
    A : spmatrix
        Symmetric sparse matrix of shape (n, n)
    C : LowRankMatrix
        Symmetric low-rank matrix of shape (n, n)
    tol : float, optional
        Convergence tolerance, by default 1e-12
    max_iter : int, optional
        Maximum iterations, by default n / rank(C)
    krylov_kwargs : dict, optional
        Krylov space configuration with keys:
        - 'extended' (bool): Use extended Krylov space (default True)
          Extended Krylov uses both A and A^(-1), providing faster convergence.
          Uses Lanczos algorithm for symmetric A (efficient 3-term recurrence).
        - 'invA' (callable): Custom inverse function for A
        - 'poles' (array_like): Poles for rational Krylov space
          WARNING: Rational Krylov currently uses Arnoldi even for symmetric A
          (Lanczos not yet implemented for rational case).

    Returns
    -------
    QuasiSVD
        Symmetric low-rank solution X

    Examples
    --------
    >>> import numpy as np
    >>> from scipy.sparse import diags
    >>> from low_rank_toolbox import LowRankMatrix
    >>> from low_rank_toolbox.krylov import solve_sparse_low_rank_symmetric_lyapunov
    >>> # Create a symmetric sparse matrix (100x100 tridiagonal)
    >>> n = 100
    >>> A = diags([1, 4, 1], [-1, 0, 1], shape=(n, n), format='csr')
    >>> # Create a symmetric low-rank right-hand side
    >>> U = np.zeros((n, 1))
    >>> U[0, 0] = 1.0
    >>> C = LowRankMatrix(U, U.T)  # Rank-1 symmetric matrix
    >>> # Solve AX + XA = C
    >>> X = solve_sparse_low_rank_symmetric_lyapunov(A, C, tol=1e-10)
    >>> # Verify the solution
    >>> residual = A @ X + X @ A - C
    >>> residual.norm() < 1e-8
    True
    >>> # X is low-rank
    >>> X.rank <= n
    True
    """
    # Check inputs
    assert isinstance(A, spmatrix), "A must be a sparse matrix"
    assert isinstance(C, LowRankMatrix), "C must be a low-rank matrix"
    assert tol > machine_precision, "tol must be larger than machine precision"
    if max_iter is None:
        max_iter = max(2, int(A.shape[0] / C.rank))
    assert max_iter >= 2, "max_iter must be at least 2"

    # Parse Krylov kwargs
    if krylov_kwargs is None:
        krylov_kwargs = {}
    extended = krylov_kwargs.get("extended", True)
    invA = krylov_kwargs.get("invA", None)
    poles = krylov_kwargs.get("poles", None)

    if extended and poles is not None:
        raise ValueError("Cannot use rational Krylov space with extended Krylov space")

    # Precompute some quantities
    normA = spsla.norm(A)
    normC = C.norm()

    # Define the Krylov space
    # Ensure initial vector is 2D
    X0 = C._matrices[0]
    if X0.ndim == 1:
        X0 = X0.reshape(-1, 1)

    if extended:
        if invA is None:
            invA = lambda x: spsla.spsolve(A, x)
        krylov_space: Union[ExtendedKrylovSpace, RationalKrylovSpace, KrylovSpace] = (
            ExtendedKrylovSpace(A, X0, invA, is_symmetric=True)
        )
    elif poles is not None:
        krylov_space = RationalKrylovSpace(A, X0, poles, is_symmetric=True)
    else:
        krylov_space = KrylovSpace(A, X0, is_symmetric=True)
        warn(
            "Warning: standard Krylov space may not converge. Consider using extended or rational Krylov space."
        )

    # Current basis
    Uk = krylov_space.Q

    # SOLVE SMALL PROJECTED LYAPUNOV IN LOOP
    for k in np.arange(1, max_iter):
        # SOLVE PROJECTED LYAPUNOV Ak Y + Y Ak = Ck
        Ak = Uk.T.dot(A.dot(Uk))
        # Ck = U^T @ C @ U (use dense output to avoid memory-inefficient intermediate)
        CUk = C.dot(Uk, dense_output=True)  # C @ U (dense)
        Ck = Uk.T @ CUk  # U^T @ (C @ U)
        Yk = la.solve_lyapunov(Ak, Ck)

        # CHECK CONVERGENCE
        Xk = QuasiSVD(Uk, Yk, Uk)
        AXk = Xk.dot_sparse(A, side="opposite")
        XkA = Xk.dot_sparse(A)
        # Compute residual norm
        residual = C - AXk - XkA
        # Handle both ndarray and LowRankMatrix types
        residual_norm = (
            la.norm(residual) if isinstance(residual, ndarray) else residual.norm()
        )
        crit = residual_norm / (2 * normA * la.norm(Yk) + normC)

        if crit < tol:
            # Truncate up to machine precision since the criterion overestimates the error
            return Xk.to_svd().truncate()
        else:
            krylov_space.augment_basis()
            Uk = krylov_space.Q

    warn("No convergence before max_iter")
    # Need to solve with final basis
    Ak = Uk.T.dot(A.dot(Uk))
    CUk = C.dot(Uk, dense_output=True)  # C @ U (dense)
    Ck = Uk.T @ CUk
    Yk = la.solve_lyapunov(Ak, Ck)
    X = QuasiSVD(Uk, Yk, Uk)
    return X




[docs]
def solve_sparse_low_rank_non_symmetric_lyapunov(
    A: spmatrix,
    C: LowRankMatrix,
    tol: float = 1e-12,
    max_iter: Optional[int] = None,
    krylov_kwargs: Optional[dict] = None,
) -> QuasiSVD:
    """
    Low-rank solver for the general Lyapunov equation:

    Find X such that A X + X A^H = C.

    Uses two Krylov spaces (left for A, right for A^H). The projected problem is solved
    as a Sylvester equation: Ak Y + Y Bk^H = Ck.

    Parameters
    ----------
    A : spmatrix
        Sparse matrix of shape (n, n)
    C : LowRankMatrix
        Low-rank right-hand side of shape (n, n)
    tol : float, optional
        Convergence tolerance, by default 1e-12
    max_iter : int, optional
        Maximum iterations, by default n / rank(C)
    krylov_kwargs : dict, optional
        Krylov space configuration with keys:
        - 'extended' (bool): Use extended Krylov space (default True)
        - 'invA' (callable): Custom inverse function for A
        - 'invAH' (callable): Custom inverse function for A^H
        - 'poles_A' (array_like): Poles for rational Krylov space for A
        - 'poles_AH' (array_like): Poles for rational Krylov space for A^H

    Returns
    -------
    QuasiSVD
        Low-rank solution X (not necessarily symmetric)
    """
    # Check inputs
    assert isinstance(A, spmatrix), "A must be a sparse matrix"
    assert isinstance(C, LowRankMatrix), "C must be a low-rank matrix"
    assert tol > machine_precision, "tol must be larger than machine precision"
    if max_iter is None:
        max_iter = max(2, int(A.shape[0] / C.rank))
    assert max_iter >= 2, "max_iter must be at least 2"

    # Parse Krylov kwargs
    if krylov_kwargs is None:
        krylov_kwargs = {}
    extended = krylov_kwargs.get("extended", True)
    invA = krylov_kwargs.get("invA", None)
    invAH = krylov_kwargs.get("invAH", None)
    poles_A = krylov_kwargs.get("poles_A", None)
    poles_AH = krylov_kwargs.get("poles_AH", None)

    if extended and (poles_A is not None or poles_AH is not None):
        raise ValueError("Cannot use rational Krylov space with extended Krylov space")

    # Precompute some quantities
    normA = spsla.norm(A)
    normC = C.norm()
    AH = A.T.conj()  # Hermitian transpose (conjugate transpose)
    U, V = C._matrices[0], C._matrices[-1].T.conj()

    # Ensure U and V are 2D
    if U.ndim == 1:
        U = U.reshape(-1, 1)
    if V.ndim == 1:
        V = V.reshape(-1, 1)

    # Define the left Krylov space (for A)
    if extended:
        if invA is None:
            invA = lambda x: spsla.spsolve(A, x)
        left_space: Union[ExtendedKrylovSpace, RationalKrylovSpace, KrylovSpace] = (
            ExtendedKrylovSpace(A, U, invA)
        )
    elif poles_A is not None:
        left_space = RationalKrylovSpace(A, U, poles_A)
    else:
        warn(
            "Warning: standard Krylov space may not converge. Consider using extended or rational Krylov space."
        )
        left_space = KrylovSpace(A, U)

    # Define the right Krylov space (for A^H)
    if extended:
        if invAH is None:
            invAH = lambda x: spsla.spsolve(AH, x)
        right_space: Union[ExtendedKrylovSpace, RationalKrylovSpace, KrylovSpace] = (
            ExtendedKrylovSpace(AH, V, invAH)
        )
    elif poles_AH is not None:
        right_space = RationalKrylovSpace(AH, V, poles_AH)
    else:
        right_space = KrylovSpace(AH, V)

    # Current basis
    Uk = left_space.Q
    Vk = right_space.Q

    # SOLVE SMALL PROJECTED SYLVESTER IN LOOP
    for k in np.arange(1, max_iter):
        # SOLVE PROJECTED SYLVESTER Ak Y + Y Bk^H = Ck
        # where Ak = U^T @ A @ U and Bk = V^T @ A^H @ V
        Ak = Uk.T.dot(A.dot(Uk))
        Bk = Vk.T.dot(AH.dot(Vk))
        # Ck = U^T @ C @ V (use dense output to avoid memory-inefficient intermediate)
        CVk = C.dot(Vk, dense_output=True)  # C @ V (dense)
        Ck = Uk.conj().T @ CVk  # U^H @ (C @ V)
        # Solve Ak Y + Y Bk^H = Ck, which is a Sylvester equation
        Yk = la.solve_sylvester(Ak, Bk.conj().T, Ck)

        # CHECK CONVERGENCE
        Xk = QuasiSVD(Uk, Yk, Vk)
        AXk = Xk.dot_sparse(A, side="opposite")  # A @ Xk
        XkAH = Xk.dot_sparse(AH)  # Xk @ A^H
        # Compute residual norm
        residual = C - AXk - XkAH
        # Handle both ndarray and LowRankMatrix types
        residual_norm = (
            la.norm(residual) if isinstance(residual, ndarray) else residual.norm()
        )
        crit = residual_norm / (2 * normA * la.norm(Yk) + normC)

        if crit < tol or k == max_iter - 1:
            # Truncate to machine precision since the criterion overestimates the error
            return Xk.to_svd().truncate()
        else:
            left_space.augment_basis()
            Uk = left_space.Q
            right_space.augment_basis()
            Vk = right_space.Q

    warn("No convergence before max_iter")
    # Need to solve with final basis
    Ak = Uk.T.dot(A.dot(Uk))
    Bk = Vk.T.dot(AH.dot(Vk))
    CVk = C.dot(Vk, dense_output=True)  # C @ V (dense)
    Ck = Uk.conj().T @ CVk
    Yk = la.solve_sylvester(Ak, Bk.conj().T, Ck)
    X = QuasiSVD(Uk, Yk, Vk)
    return X




[docs]
def solve_lyapunov(
    A: ndarray | spmatrix,
    C: ndarray | LowRankMatrix,
    tol: float = 1e-12,
    max_iter: Optional[int] = None,
    is_symmetric: Optional[bool] = None,
    krylov_kwargs: Optional[dict] = None,
) -> ndarray | LowRankMatrix:
    """
    Efficient low-rank compatible solver for the Lyapunov equation.

    Find X such that AX + XA^H = C.

    This function is a wrapper that selects the appropriate solver based on the types of A and C.

    For lower computational cost, if A and C are symmetric, set is_symmetric=True.
    This enables the use of the Lanczos algorithm (3-term recurrence) instead of
    Arnoldi (full orthogonalization), approximately halving the computational cost
    and memory usage per iteration.

    Parameters
    ----------
    A : ndarray | spmatrix
        The matrix A of shape (n, n)
    C : ndarray | LowRankMatrix
        The matrix C of shape (n, n)
    tol : float, optional
        Convergence tolerance for Krylov solver, by default 1e-12
    max_iter : int, optional
        Maximum iterations for Krylov solver, by default None
    is_symmetric : bool, optional
        If True, use symmetric solver (only for sparse A and low-rank C).
        Exploits symmetry using Lanczos algorithm for ~2x speedup.
        If None, symmetry is auto-detected (with efficiency warning).
    krylov_kwargs : dict, optional
        Krylov space configuration (see solve_sparse_low_rank_symmetric_lyapunov
        or solve_sparse_low_rank_non_symmetric_lyapunov for details)

    Returns
    -------
    ndarray | LowRankMatrix
        The solution X, either dense or low-rank and symmetric if and only if C is symmetric.

    Examples
    --------
    >>> import numpy as np
    >>> from scipy.sparse import csr_matrix
    >>> from low_rank_toolbox import LowRankMatrix
    >>> from low_rank_toolbox.krylov import solve_lyapunov
    >>> # Small dense case
    >>> A_small = np.array([[4, 1], [1, 3]])
    >>> C_small = np.array([[1, 0], [0, 1]])
    >>> X_small = solve_lyapunov(A_small, C_small)
    >>> # Verify: AX + XA = C
    >>> np.allclose(A_small @ X_small + X_small @ A_small, C_small)
    True
    >>> # Large sparse case with low-rank RHS
    >>> from scipy.sparse import diags
    >>> n = 100
    >>> A = diags([1, 4, 1], [-1, 0, 1], shape=(n, n), format='csr')
    >>> U = np.zeros((n, 1))
    >>> U[0, 0] = 1.0
    >>> C = LowRankMatrix(U, U.T)
    >>> X = solve_lyapunov(A, C, is_symmetric=True, tol=1e-10)
    >>> # Solution is low-rank and symmetric
    >>> type(X).__name__
    'SVD'
    >>> X.rank <= n  # Much lower rank than n
    True
    """
    # Check Krylov kwargs
    if krylov_kwargs is None:
        # Default parameters for Krylov solver
        krylov_kwargs = {"extended": True}
    # Check symmetry input
    if is_symmetric is None:
        # Check symmetry of low-rank matrix C
        if isinstance(C, ndarray):
            is_symmetric = np.allclose(C, C.T.conj())
        else:
            try:
                warn(
                    "Checking symmetry of low-rank matrix C. For better performance, please provide is_symmetric bool input.",
                    LowRankEfficiencyWarning,
                )
                is_symmetric = C.is_symmetric()
            except:
                Cd = C.to_dense()
                warn(
                    "Checking symmetry of low-rank C via dense conversion. For better performance, please provide is_symmetric bool input.",
                    LowRankEfficiencyWarning,
                )
                is_symmetric = np.allclose(Cd, Cd.T.conj())
        # Check symmetry of A if needed
        if is_symmetric:
            # Sparse matrix case
            if isinstance(A, spmatrix):
                if not (A != A.T).nnz == 0:
                    is_symmetric = False
            # Dense matrix case
            elif isinstance(A, ndarray):
                if not np.allclose(A, A.T.conj()):
                    is_symmetric = False

    # Low rank solver
    X: Union[QuasiSVD, ndarray]
    if isinstance(C, LowRankMatrix):
        # Convert dense A to sparse if needed
        if isinstance(A, ndarray):
            A = sps.csc_matrix(A)
        if is_symmetric:
            X = solve_sparse_low_rank_symmetric_lyapunov(
                A, C, tol, max_iter, krylov_kwargs
            )
        else:
            X = solve_sparse_low_rank_non_symmetric_lyapunov(
                A, C, tol, max_iter, krylov_kwargs
            )

    # Dense solver
    else:
        X_dense: Union[QuasiSVD, ndarray] = solve_small_lyapunov(A, C)  # type: ignore[assignment]
        X = X_dense
    return X  # type: ignore[return-value]