1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements.  See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License.  You may obtain a copy of the License at
8  *
9  *      http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 package org.apache.commons.math.stat.correlation;
18 
19 import org.apache.commons.math.MathRuntimeException;
20 import org.apache.commons.math.exception.util.LocalizedFormats;
21 import org.apache.commons.math.linear.RealMatrix;
22 import org.apache.commons.math.linear.BlockRealMatrix;
23 import org.apache.commons.math.stat.descriptive.moment.Mean;
24 import org.apache.commons.math.stat.descriptive.moment.Variance;
25 
26 /**
27  * Computes covariances for pairs of arrays or columns of a matrix.
28  *
29  * <p>The constructors that take <code>RealMatrix</code> or
30  * <code>double[][]</code> arguments generate covariance matrices.  The
31  * columns of the input matrices are assumed to represent variable values.</p>
32  *
33  * <p>The constructor argument <code>biasCorrected</code> determines whether or
34  * not computed covariances are bias-corrected.</p>
35  *
36  * <p>Unbiased covariances are given by the formula</p>
37  * <code>cov(X, Y) = &Sigma;[(x<sub>i</sub> - E(X))(y<sub>i</sub> - E(Y))] / (n - 1)</code>
38  * where <code>E(X)</code> is the mean of <code>X</code> and <code>E(Y)</code>
39  * is the mean of the <code>Y</code> values.
40  *
41  * <p>Non-bias-corrected estimates use <code>n</code> in place of <code>n - 1</code>
42  *
43  * @version $Revision: 983921 $ $Date: 2010-08-10 12:46:06 +0200 (mar. 10 août 2010) $
44  * @since 2.0
45  */
46 public class Covariance {
47 
48     /** covariance matrix */
49     private final RealMatrix covarianceMatrix;
50 
51     /**
52      * Create an empty covariance matrix.
53      */
54     /** Number of observations (length of covariate vectors) */
55     private final int n;
56 
57     /**
58      * Create a Covariance with no data
59      */
Covariance()60     public Covariance() {
61         super();
62         covarianceMatrix = null;
63         n = 0;
64     }
65 
66     /**
67      * Create a Covariance matrix from a rectangular array
68      * whose columns represent covariates.
69      *
70      * <p>The <code>biasCorrected</code> parameter determines whether or not
71      * covariance estimates are bias-corrected.</p>
72      *
73      * <p>The input array must be rectangular with at least two columns
74      * and two rows.</p>
75      *
76      * @param data rectangular array with columns representing covariates
77      * @param biasCorrected true means covariances are bias-corrected
78      * @throws IllegalArgumentException if the input data array is not
79      * rectangular with at least two rows and two columns.
80      */
Covariance(double[][] data, boolean biasCorrected)81     public Covariance(double[][] data, boolean biasCorrected) {
82         this(new BlockRealMatrix(data), biasCorrected);
83     }
84 
85     /**
86      * Create a Covariance matrix from a rectangular array
87      * whose columns represent covariates.
88      *
89      * <p>The input array must be rectangular with at least two columns
90      * and two rows</p>
91      *
92      * @param data rectangular array with columns representing covariates
93      * @throws IllegalArgumentException if the input data array is not
94      * rectangular with at least two rows and two columns.
95      */
Covariance(double[][] data)96     public Covariance(double[][] data) {
97         this(data, true);
98     }
99 
100     /**
101      * Create a covariance matrix from a matrix whose columns
102      * represent covariates.
103      *
104      * <p>The <code>biasCorrected</code> parameter determines whether or not
105      * covariance estimates are bias-corrected.</p>
106      *
107      * <p>The matrix must have at least two columns and two rows</p>
108      *
109      * @param matrix matrix with columns representing covariates
110      * @param biasCorrected true means covariances are bias-corrected
111      * @throws IllegalArgumentException if the input matrix does not have
112      * at least two rows and two columns
113      */
Covariance(RealMatrix matrix, boolean biasCorrected)114     public Covariance(RealMatrix matrix, boolean biasCorrected) {
115        checkSufficientData(matrix);
116        n = matrix.getRowDimension();
117        covarianceMatrix = computeCovarianceMatrix(matrix, biasCorrected);
118     }
119 
120     /**
121      * Create a covariance matrix from a matrix whose columns
122      * represent covariates.
123      *
124      * <p>The matrix must have at least two columns and two rows</p>
125      *
126      * @param matrix matrix with columns representing covariates
127      * @throws IllegalArgumentException if the input matrix does not have
128      * at least two rows and two columns
129      */
Covariance(RealMatrix matrix)130     public Covariance(RealMatrix matrix) {
131         this(matrix, true);
132     }
133 
134     /**
135      * Returns the covariance matrix
136      *
137      * @return covariance matrix
138      */
getCovarianceMatrix()139     public RealMatrix getCovarianceMatrix() {
140         return covarianceMatrix;
141     }
142 
143     /**
144      * Returns the number of observations (length of covariate vectors)
145      *
146      * @return number of observations
147      */
148 
getN()149     public int getN() {
150         return n;
151     }
152 
153     /**
154      * Compute a covariance matrix from a matrix whose columns represent
155      * covariates.
156      * @param matrix input matrix (must have at least two columns and two rows)
157      * @param biasCorrected determines whether or not covariance estimates are bias-corrected
158      * @return covariance matrix
159      */
computeCovarianceMatrix(RealMatrix matrix, boolean biasCorrected)160     protected RealMatrix computeCovarianceMatrix(RealMatrix matrix, boolean biasCorrected) {
161         int dimension = matrix.getColumnDimension();
162         Variance variance = new Variance(biasCorrected);
163         RealMatrix outMatrix = new BlockRealMatrix(dimension, dimension);
164         for (int i = 0; i < dimension; i++) {
165             for (int j = 0; j < i; j++) {
166               double cov = covariance(matrix.getColumn(i), matrix.getColumn(j), biasCorrected);
167               outMatrix.setEntry(i, j, cov);
168               outMatrix.setEntry(j, i, cov);
169             }
170             outMatrix.setEntry(i, i, variance.evaluate(matrix.getColumn(i)));
171         }
172         return outMatrix;
173     }
174 
175     /**
176      * Create a covariance matrix from a matrix whose columns represent
177      * covariates. Covariances are computed using the bias-corrected formula.
178      * @param matrix input matrix (must have at least two columns and two rows)
179      * @return covariance matrix
180      * @see #Covariance
181      */
computeCovarianceMatrix(RealMatrix matrix)182     protected RealMatrix computeCovarianceMatrix(RealMatrix matrix) {
183         return computeCovarianceMatrix(matrix, true);
184     }
185 
186     /**
187      * Compute a covariance matrix from a rectangular array whose columns represent
188      * covariates.
189      * @param data input array (must have at least two columns and two rows)
190      * @param biasCorrected determines whether or not covariance estimates are bias-corrected
191      * @return covariance matrix
192      */
computeCovarianceMatrix(double[][] data, boolean biasCorrected)193     protected RealMatrix computeCovarianceMatrix(double[][] data, boolean biasCorrected) {
194         return computeCovarianceMatrix(new BlockRealMatrix(data), biasCorrected);
195     }
196 
197     /**
198      * Create a covariance matrix from a rectangual array whose columns represent
199      * covariates. Covariances are computed using the bias-corrected formula.
200      * @param data input array (must have at least two columns and two rows)
201      * @return covariance matrix
202      * @see #Covariance
203      */
computeCovarianceMatrix(double[][] data)204     protected RealMatrix computeCovarianceMatrix(double[][] data) {
205         return computeCovarianceMatrix(data, true);
206     }
207 
208     /**
209      * Computes the covariance between the two arrays.
210      *
211      * <p>Array lengths must match and the common length must be at least 2.</p>
212      *
213      * @param xArray first data array
214      * @param yArray second data array
215      * @param biasCorrected if true, returned value will be bias-corrected
216      * @return returns the covariance for the two arrays
217      * @throws  IllegalArgumentException if the arrays lengths do not match or
218      * there is insufficient data
219      */
covariance(final double[] xArray, final double[] yArray, boolean biasCorrected)220     public double covariance(final double[] xArray, final double[] yArray, boolean biasCorrected)
221         throws IllegalArgumentException {
222         Mean mean = new Mean();
223         double result = 0d;
224         int length = xArray.length;
225         if (length != yArray.length) {
226             throw MathRuntimeException.createIllegalArgumentException(
227                   LocalizedFormats.DIMENSIONS_MISMATCH_SIMPLE, length, yArray.length);
228         } else if (length < 2) {
229             throw MathRuntimeException.createIllegalArgumentException(
230                   LocalizedFormats.INSUFFICIENT_DIMENSION, length, 2);
231         } else {
232             double xMean = mean.evaluate(xArray);
233             double yMean = mean.evaluate(yArray);
234             for (int i = 0; i < length; i++) {
235                 double xDev = xArray[i] - xMean;
236                 double yDev = yArray[i] - yMean;
237                 result += (xDev * yDev - result) / (i + 1);
238             }
239         }
240         return biasCorrected ? result * ((double) length / (double)(length - 1)) : result;
241     }
242 
243     /**
244      * Computes the covariance between the two arrays, using the bias-corrected
245      * formula.
246      *
247      * <p>Array lengths must match and the common length must be at least 2.</p>
248      *
249      * @param xArray first data array
250      * @param yArray second data array
251      * @return returns the covariance for the two arrays
252      * @throws  IllegalArgumentException if the arrays lengths do not match or
253      * there is insufficient data
254      */
covariance(final double[] xArray, final double[] yArray)255     public double covariance(final double[] xArray, final double[] yArray)
256         throws IllegalArgumentException {
257         return covariance(xArray, yArray, true);
258     }
259 
260     /**
261      * Throws IllegalArgumentException of the matrix does not have at least
262      * two columns and two rows
263      * @param matrix matrix to check
264      */
checkSufficientData(final RealMatrix matrix)265     private void checkSufficientData(final RealMatrix matrix) {
266         int nRows = matrix.getRowDimension();
267         int nCols = matrix.getColumnDimension();
268         if (nRows < 2 || nCols < 2) {
269             throw MathRuntimeException.createIllegalArgumentException(
270                     LocalizedFormats.INSUFFICIENT_ROWS_AND_COLUMNS,
271                     nRows, nCols);
272         }
273     }
274 }
275