Source code for anomalib.models.components.dimensionality_reduction.random_projection
"""This module comprises PatchCore Sampling Methods for the embedding.- Random Sparse Projector Sparse Random Projection using PyTorch Operations"""# Copyright (C) 2022 Intel Corporation# SPDX-License-Identifier: Apache-2.0fromtypingimportOptionalimportnumpyasnpimporttorchfromsklearn.utils.randomimportsample_without_replacementfromtorchimportTensor
[docs]classNotFittedError(ValueError,AttributeError):"""Raise Exception if estimator is used before fitting."""
[docs]classSparseRandomProjection:"""Sparse Random Projection using PyTorch operations. Args: eps (float, optional): Minimum distortion rate parameter for calculating Johnson-Lindenstrauss minimum dimensions. Defaults to 0.1. random_state (Optional[int], optional): Uses the seed to set the random state for sample_without_replacement function. Defaults to None. """def__init__(self,eps:float=0.1,random_state:Optional[int]=None)->None:self.n_components:intself.sparse_random_matrix:Tensorself.eps=epsself.random_state=random_state
[docs]def_sparse_random_matrix(self,n_features:int):"""Random sparse matrix. Based on https://web.stanford.edu/~hastie/Papers/Ping/KDD06_rp.pdf. Args: n_features (int): Dimentionality of the original source space Returns: Tensor: Sparse matrix of shape (n_components, n_features). The generated Gaussian random matrix is in CSR (compressed sparse row) format. """# Density 'auto'. Factorize densitydensity=1/np.sqrt(n_features)ifdensity==1:# skip index generation if totally densebinomial=torch.distributions.Binomial(total_count=1,probs=0.5)components=binomial.sample((self.n_components,n_features))*2-1components=1/np.sqrt(self.n_components)*componentselse:# Sparse matrix is not being generated here as it is stored as dense anywayscomponents=torch.zeros((self.n_components,n_features),dtype=torch.float64)foriinrange(self.n_components):# find the indices of the non-zero components for row innz_idx=torch.distributions.Binomial(total_count=n_features,probs=density).sample()# get nnz_idx column indices# pylint: disable=not-callablec_idx=torch.tensor(sample_without_replacement(n_population=n_features,n_samples=nnz_idx,random_state=self.random_state),dtype=torch.int64,)data=torch.distributions.Binomial(total_count=1,probs=0.5).sample(sample_shape=c_idx.size())*2-1# assign data to only those columnscomponents[i,c_idx]=data.double()components*=np.sqrt(1/density)/np.sqrt(self.n_components)returncomponents
[docs]defjohnson_lindenstrauss_min_dim(self,n_samples:int,eps:float=0.1):"""Find a 'safe' number of components to randomly project to. Ref eqn 2.1 https://cseweb.ucsd.edu/~dasgupta/papers/jl.pdf Args: n_samples (int): Number of samples used to compute safe components eps (float, optional): Minimum distortion rate. Defaults to 0.1. """denominator=(eps**2/2)-(eps**3/3)return(4*np.log(n_samples)/denominator).astype(np.int64)
[docs]deffit(self,embedding:Tensor)->"SparseRandomProjection":"""Generates sparse matrix from the embedding tensor. Args: embedding (Tensor): embedding tensor for generating embedding Returns: (SparseRandomProjection): Return self to be used as >>> generator = SparseRandomProjection() >>> generator = generator.fit() """n_samples,n_features=embedding.shapedevice=embedding.deviceself.n_components=self.johnson_lindenstrauss_min_dim(n_samples=n_samples,eps=self.eps)# Generate projection matrix# torch can't multiply directly on sparse matrix and moving sparse matrix to cuda throws error# (Could not run 'aten::empty_strided' with arguments from the 'SparseCsrCUDA' backend)# hence sparse matrix is stored as a dense matrix on the deviceself.sparse_random_matrix=self._sparse_random_matrix(n_features=n_features).to(device)returnself
[docs]deftransform(self,embedding:Tensor)->Tensor:"""Project the data by using matrix product with the random matrix. Args: embedding (Tensor): Embedding of shape (n_samples, n_features) The input data to project into a smaller dimensional space Returns: projected_embedding (Tensor): Sparse matrix of shape (n_samples, n_components) Projected array. """ifself.sparse_random_matrixisNone:raiseNotFittedError("`fit()` has not been called on SparseRandomProjection yet.")projected_embedding=embedding@self.sparse_random_matrix.T.float()returnprojected_embedding