Source code for low_rank_toolbox.cssp.osinsky

"""Osinsky's quasi-optimal column subset selection algorithm.

Author: Benjamin Carrel, University of Geneva, 2024
"""

from typing import Union

import numpy as np
import scipy.linalg as la
from numpy import ndarray


[docs] def Osinsky( U: ndarray, return_projector: bool = False, return_inverse: bool = False, **extra_args, ) -> Union[ndarray, tuple]: """ Osinsky's quasi optimal column subset selection algorithm. Reference: "Close to optimal column approximations with a single SVD." by A.I. Osinsky, 2023. Parameters ---------- U : numpy.ndarray Orthonormal real matrix of shape (n, r) defining a row space approximation return_projector : bool If True, return also the matrix U @ inv(U[S, :]) return_inverse : bool If True, return also the inverse matrix inv(U[S, :]) extra_args : dict Additional arguments: solve_kwargs : dict Additional arguments for the solve function Returns ------- J : ndarray Selected column indices (r elements) P_U : ndarray (n x r) (optional) Matrix U @ inv(U[J, :]) where U[J, :] is the (r x r) submatrix. Only returned if return_projector=True. inv_U : ndarray (r x r) (optional) Matrix inv(U[J, :]). Only returned if return_inverse=True (requires return_projector=True). """ n, r = U.shape if r > n: raise ValueError( "Number of columns r must be less than or equal to number of rows n." ) Uk = U.T.copy() P = np.arange(n) l_scores = np.zeros(n) eps = np.finfo(np.float64).eps # Machine epsilon for float64 for k in range(r): # --- Pivot Selection --- current_sub_Uk = Uk[k:r, k:n] norms_sq = np.linalg.norm(current_sub_Uk, axis=0) ** 2 current_l = l_scores[k:n] # Criterion: norms_sq / (1 + l_j), handle small norms/denominators denominators = 1.0 + current_l pivot_vals = np.zeros_like(denominators) valid_mask = norms_sq > eps pivot_vals[valid_mask] = norms_sq[valid_mask] / denominators[valid_mask] best_idx_in_slice = np.argmax(pivot_vals) j_pivot = k + best_idx_in_slice # --- Swap --- if k != j_pivot: idx = np.array([k, j_pivot]) Uk[:, idx] = Uk[:, np.array([j_pivot, k])] P[k], P[j_pivot] = P[j_pivot], P[k] l_scores[k], l_scores[j_pivot] = l_scores[j_pivot], l_scores[k] # --- Householder Reflection --- x = Uk[k:r, k].copy() norm_x = np.linalg.norm(x) d_update = Uk[k, k:n].copy() # Initialize update scores from current row if norm_x > eps: alpha = x[0] v = x v[0] = alpha - np.sign(x[0]) * norm_x norm_v = np.linalg.norm(v) if norm_v > eps: v /= norm_v # Apply reflection: A_sub = A_sub - 2 * v * (v.H @ A_sub) sub_matrix_to_update = Uk[k:r, k:n] vT_Asub = v.T.conj().dot(sub_matrix_to_update) Uk[k:r, k:n] -= 2 * np.outer(v, vT_Asub) d_update = Uk[k, k:n].copy() # --- Update Scores --- # Use absolute value squared for both real and complex matrices l_scores[k:n] += np.abs(d_update) ** 2 # --- End Loop --- if return_projector: P_U = la.solve( U[P[:r], :].T.conj(), U.T.conj(), **extra_args.get("solve_kwargs", {}) ).T.conj() if return_inverse: inv_U = U.T.conj().dot(P_U) return P[:r], P_U, inv_U else: return P[:r], P_U return P[:r]
if __name__ == "__main__": # Example usage - real case np.random.seed(0) ## Tall matrix with orthonormal columns print("Real case -- Tall matrix with orthonormal columns") n, r = 10, 4 U = np.random.randn(n, r) U, _ = la.qr(U, mode="economic") J = Osinsky(U, return_projector=False) print("Selected indices J:", J) print("U[J, :]:\n", U[J, :]) # Example usage - complex case print("\nComplex case -- Tall matrix with orthonormal columns") U_complex = np.random.randn(n, r) + 1j * np.random.randn(n, r) U_complex, _ = la.qr(U_complex, mode="economic") J_complex = Osinsky(U_complex, return_projector=False) print("Selected indices J (complex case):", J_complex) print("U[J, :] (complex case):\n", U_complex[J_complex, :])