Coverage for cvx/risk/linalg/pca.py: 100%

3# Licensed under the Apache License, Version 2.0 (the "License");

4# you may not use this file except in compliance with the License.

5# You may obtain a copy of the License at

7# http://www.apache.org/licenses/LICENSE-2.0

9# Unless required by applicable law or agreed to in writing, software

10# distributed under the License is distributed on an "AS IS" BASIS,

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

12# See the License for the specific language governing permissions and

13# limitations under the License.

14"""PCA analysis"""

16from __future__ import annotations

18from collections import namedtuple

20import numpy as np

21import pandas as pd

22from sklearn.decomposition import PCA as sklearnPCA

24PCA = namedtuple(

25 "PCA",

26 ["explained_variance", "factors", "exposure", "cov", "systematic", "idiosyncratic"],

30def pca(returns, n_components=10):

31 """

32 Compute the first n principal components for a return matrix

34 Args:

35 returns: DataFrame of prices

36 n_components: Number of components

37 """

39 # USING SKLEARN. Let's look at the first n components

40 sklearn_pca = sklearnPCA(n_components=n_components)

41 sklearn_pca.fit_transform(returns)

43 exposure = sklearn_pca.components_

44 factors = returns @ np.transpose(exposure)

46 return PCA(

47 explained_variance=sklearn_pca.explained_variance_ratio_,

48 factors=factors,

49 exposure=pd.DataFrame(data=exposure, columns=returns.columns),

50 cov=factors.cov(),

51 systematic=pd.DataFrame(data=factors.values @ exposure, index=returns.index, columns=returns.columns),

52 idiosyncratic=pd.DataFrame(

53 data=returns.values - factors.values @ exposure,

54 index=returns.index,

55 columns=returns.columns,

56 ),