Coverage for cvx/risk/linalg/pca.py: 100%

12 statements  

« prev     ^ index     » next       coverage.py v7.6.8, created at 2025-01-09 10:59 +0000

1# Copyright 2023 Stanford University Convex Optimization Group 

2# 

3# Licensed under the Apache License, Version 2.0 (the "License"); 

4# you may not use this file except in compliance with the License. 

5# You may obtain a copy of the License at 

6# 

7# http://www.apache.org/licenses/LICENSE-2.0 

8# 

9# Unless required by applicable law or agreed to in writing, software 

10# distributed under the License is distributed on an "AS IS" BASIS, 

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

12# See the License for the specific language governing permissions and 

13# limitations under the License. 

14"""PCA analysis""" 

15 

16from __future__ import annotations 

17 

18from collections import namedtuple 

19 

20import numpy as np 

21import pandas as pd 

22from sklearn.decomposition import PCA as sklearnPCA 

23 

24PCA = namedtuple( 

25 "PCA", 

26 ["explained_variance", "factors", "exposure", "cov", "systematic", "idiosyncratic"], 

27) 

28 

29 

30def pca(returns, n_components=10): 

31 """ 

32 Compute the first n principal components for a return matrix 

33 

34 Args: 

35 returns: DataFrame of prices 

36 n_components: Number of components 

37 """ 

38 

39 # USING SKLEARN. Let's look at the first n components 

40 sklearn_pca = sklearnPCA(n_components=n_components) 

41 sklearn_pca.fit_transform(returns) 

42 

43 exposure = sklearn_pca.components_ 

44 factors = returns @ np.transpose(exposure) 

45 

46 return PCA( 

47 explained_variance=sklearn_pca.explained_variance_ratio_, 

48 factors=factors, 

49 exposure=pd.DataFrame(data=exposure, columns=returns.columns), 

50 cov=factors.cov(), 

51 systematic=pd.DataFrame(data=factors.values @ exposure, index=returns.index, columns=returns.columns), 

52 idiosyncratic=pd.DataFrame( 

53 data=returns.values - factors.values @ exposure, 

54 index=returns.index, 

55 columns=returns.columns, 

56 ), 

57 )