Coverage for cvx/risk/linalg/pca.py: 100%
12 statements
« prev ^ index » next coverage.py v7.6.8, created at 2025-01-09 10:59 +0000
« prev ^ index » next coverage.py v7.6.8, created at 2025-01-09 10:59 +0000
1# Copyright 2023 Stanford University Convex Optimization Group
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14"""PCA analysis"""
16from __future__ import annotations
18from collections import namedtuple
20import numpy as np
21import pandas as pd
22from sklearn.decomposition import PCA as sklearnPCA
24PCA = namedtuple(
25 "PCA",
26 ["explained_variance", "factors", "exposure", "cov", "systematic", "idiosyncratic"],
27)
30def pca(returns, n_components=10):
31 """
32 Compute the first n principal components for a return matrix
34 Args:
35 returns: DataFrame of prices
36 n_components: Number of components
37 """
39 # USING SKLEARN. Let's look at the first n components
40 sklearn_pca = sklearnPCA(n_components=n_components)
41 sklearn_pca.fit_transform(returns)
43 exposure = sklearn_pca.components_
44 factors = returns @ np.transpose(exposure)
46 return PCA(
47 explained_variance=sklearn_pca.explained_variance_ratio_,
48 factors=factors,
49 exposure=pd.DataFrame(data=exposure, columns=returns.columns),
50 cov=factors.cov(),
51 systematic=pd.DataFrame(data=factors.values @ exposure, index=returns.index, columns=returns.columns),
52 idiosyncratic=pd.DataFrame(
53 data=returns.values - factors.values @ exposure,
54 index=returns.index,
55 columns=returns.columns,
56 ),
57 )