STK++ 0.9.13
STK_LocalVariance.h
Go to the documentation of this file.
1/*--------------------------------------------------------------------*/
2/* Copyright (C) 2004-2016 Serge Iovleff, Université Lille 1, Inria
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU Lesser General Public License as
6 published by the Free Software Foundation; either version 2 of the
7 License, or (at your option) any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU Lesser General Public License for more details.
13
14 You should have received a copy of the GNU Lesser General Public
15 License along with this program; if not, write to the
16 Free Software Foundation, Inc.,
17 59 Temple Place,
18 Suite 330,
19 Boston, MA 02111-1307
20 USA
21
22 Contact : S..._Dot_I..._At_stkpp_Dot_org (see copyright for ...)
23*/
24
25/*
26 * Project: stkpp::AAModels
27 * Purpose: Implementation of the ILinearReduct interface using the local variance.
28 * Author: iovleff, S..._Dot_I..._At_stkpp_Dot_org (see copyright for ...)
29 **/
30
35#ifndef STK_LOCALVARIANCE_H
36#define STK_LOCALVARIANCE_H
37
38#include "STK_Reduct_Util.h"
39#include "STK_ILinearReduct.h"
40
44
45namespace STK
46{
61template<class Array>
62class LocalVariance: public ILinearReduct<Array, VectorX>
63{
64 public:
67 using Base::p_data_;
68 using Base::p_reduced_;
69 using Base::axis_;
77 LocalVariance( Array const* p_data
79 , int nbNeighbor =1);
86 LocalVariance( Array const& data
88 , int nbNeighbor =1);
93
95 inline virtual LocalVariance* clone() const { return new LocalVariance(*this);}
97 virtual ~LocalVariance();
99 inline int nbNeighbor() const { return nbNeighbor_;}
101 inline ArrayXXi const& pred() const { return neighbors_;}
103 inline ArraySquareX const& covariance() const { return covariance_;}
105 inline ArraySquareX const& localCovariance() const { return localCovariance_;}
106
107 protected:
111 virtual void update();
112
122
130 virtual void maximizeStep();
135 virtual void maximizeStep( VectorX const& weights);
136
138 void prim();
140 void minimalDistance();
141
146
147 private:
151 void computeAxis();
158};
159
160/*
161 * Constructor.
162 * @param data the input data set
163 */
164template<class Array>
166 , Reduct::TypeGraph type, int nbNeighbor)
167 : Base(p_data)
168 , type_(type)
169 , nbNeighbor_(nbNeighbor)
170 , neighbors_()
171 , dist_()
172
173{
174 if (!p_data) return;
176 dist_.resize(p_data->rows(), Range(1,nbNeighbor_));
178 // compute minimal proximity graph of the data set
179 switch (type_)
180 {
181 case Reduct::prim_:
182 prim();
183 break;
186 break;
189 break;
190 };
191}
192
193/*
194 * Constructor.
195 * @param data the input data set
196 */
197template<class Array>
199 , Reduct::TypeGraph type, int nbNeighbor)
200 : Base(data)
201 , type_(type)
202 , nbNeighbor_(nbNeighbor)
203 , neighbors_(data.rows(), Range(1,nbNeighbor_))
204 , dist_(data.rows(), Range(1,nbNeighbor_), Arithmetic<Real>::max())
205{
206 // compute minimal proximity graph of the data set
207 switch (type_)
208 {
209 case Reduct::prim_:
210 prim();
211 break;
214 break;
217 break;
218 };
219}
220
224template<class Array>
226 : Base(reducer)
227 , type_(reducer.type_)
228 , nbNeighbor_(reducer.nbNeighbor_)
229 , neighbors_(reducer.neighbors_)
230 , dist_(reducer.dist_)
231{}
232
233/*
234 * Destructor
235 */
236template<class Array>
238
239/*
240 * set the data set to use.
241 */
242template<class Array>
244{
245#ifdef STK_REDUCT_DEBUG
246 if (!p_data_)
248#endif
249 // update dimensions of the containers for the proximity graph
250 neighbors_.resize(p_data_->rows(), Range(1,nbNeighbor_));
251 // compute minimal proximity graph of the data set
252 switch (type_)
253 {
254 case Reduct::prim_:
255 prim();
256 break;
258 minimalDistance();
259 break;
262 break;
263 };
264}
265
266/*
267 * Compute the axis by maximizing the ratio of the local variance on the
268 * total variance of the data set.
269 */
270template<class Array>
272{
273#ifdef STK_REDUCT_DEBUG
274 if (!p_data_)
276#endif
277 // compute covariance matrices
278 computeCovarianceMatrices();
279 // compute the axis
280 computeAxis();
281}
282
283/*
284 * Compute the axis by maximizing the ratio of the local variance on the
285 * total variance of the data set.
286 */
287template<class Array>
289{
290#ifdef STK_REDUCT_DEBUG
291 if (!p_data_)
293#endif
294 // compute covariance matrices
295 computeCovarianceMatrices(weights);
296 // compute the axis
297 computeAxis();
298}
299
300/* compute the covariances matrices of the data set
301 * @param nbNeighbor number of neighbors to look at
302 **/
303template<class Array>
305{
307 // compute the covariance matrix
308 stats_.setData(p_data_);
309 stats_.run();
310 covariance_.move(stats_.covariance());
311 // constants
312 const Real pond = 2* nbNeighbor_ * p_data_->sizeRows();
313
314 // compute local covariance matrix
315 localCovariance_.resize(p_data_->cols());
316 for (int j=p_data_->beginCols(); j<p_data_->endCols(); j++)
317 {
318 for (int k=p_data_->beginCols(); k<p_data_->endCols(); k++)
319 {
320 Real sum = 0.0;
321 for (int i=p_data_->beginRows(); i<p_data_->endRows(); i++)
322 {
323 for (int l = 1; l <= nbNeighbor_; ++l)
324 {
325 sum += ((*p_data_)(i, j) - (*p_data_)(neighbors_(i, l), j))
326 * ((*p_data_)(i, k) - (*p_data_)(neighbors_(i, l), k));
327 }
328 }
329 localCovariance_(j, k) = sum/pond;
330 }
331 }
332}
333
334/* compute the weighted covariances matrices of the data set */
335template<class Array>
337{
339 // compute the covariance matrix
340 stats_.setData(p_data_);
342 covariance_.move(stats_.covariance());
343
344 // get dimensions
345 const Real pond = 2* nbNeighbor_ * p_data_->sizeRows() ;
346 // compute weighted local covariance matrix
347 localCovariance_.resize(p_data_->cols());
348 for (int i=p_data_->beginCols(); i<p_data_->endCols(); i++)
349 {
350 for (int j=p_data_->beginCols(); j<p_data_->endCols(); j++)
351 {
352 Real sum = 0.0;
353 for (int k=p_data_->beginRows(); k<p_data_->endRows(); k++)
354 {
355 for (int l = 1; l <= nbNeighbor_; ++l)
356 {
357 sum += (weights[k] * weights[neighbors_(k, l)])
358 * ((*p_data_)(k, i) - (*p_data_)(neighbors_(k, l), i))
359 * ((*p_data_)(k, j) - (*p_data_)(neighbors_(k, l), j));
360 }
361 }
362 localCovariance_(i, j) = sum / pond;
363 }
364 }
365}
366
367/* compute the axis
368 **/
369template<class Array>
371{
372 // compute the number of axis, cannot be greater than the dimension of the data
373 Range range(p_data_->beginCols(), std::min(this->dim_, p_data_->sizeCols()));
374 // compute the eigenvalues decomposition of the local covariance
375 SymEigen<ArraySquareX>* decomp = new SymEigen<ArraySquareX>(localCovariance_);
376 decomp->run();
377 // compute the generalized inverse of the local covariance
379 decomp->ginv(inv_local);
380 // compute the product
381 ArraySquareX prod = inv_local * covariance_;
382 // compute the eigenvalues decomposition of the product
383 decomp->setData(prod);
384 decomp->run();
385 // save axis and index values
386 axis_.resize(p_data_->cols(), range);
387 idx_values_.resize(range);
388 axis_ = decomp->rotation().col(range);
389 idx_values_ = decomp->eigenValues()[range];
390 // we can safely remove the decomposition
391 delete decomp;
392}
393
394template<class Array>
396{
397 // get dimensions
398 const int begin_ind = p_data_->beginRows();
399 const int last_ind = p_data_->lastIdxRows();
400 /* value vector : store the minimal value. */
401 VectorX value(p_data_->rows(), Arithmetic<Real>::max());
402 /* position of the points */
403 Array1D<int> ipos(p_data_->rows());
404 // Initialization the position array
405 for (int i=begin_ind; i<=last_ind; i++) ipos[i] = i;
406
407 // start Prim algorithm
408 //Initialization of the root
409 value[begin_ind] = 0.0; // the root have value 0.0
410 neighbors_(begin_ind, 1) = begin_ind; // and have itself as predecessor
411 int imin = begin_ind; // the index of the current minimal value
412 Real kmin = 0.0; // the current minimal value
413 // begin iterations
414 for (int iter = last_ind; iter>=begin_ind; iter--)
415 {
416 // put the minimal key at the end of the array key_
417 value.swap(imin, iter); // put the minimal value to the end
418 ipos.swap(imin, iter); // update the position of current minimal point
419 // Update the value for the neighbors points and find minimal value
420 imin = begin_ind;
421 kmin = value[begin_ind];
422 // reference on the current point
423 int icur = ipos[iter];
424 RowVector P(p_data_->row(icur), true);
425 // update distance of the neighbors point
426 for (int i=begin_ind; i<iter; i++)
427 {
428 // check if we have a better distance for the neighbors
429 Real d=dist(P, p_data_->row(ipos[i]));
430 if (d < value[i])
431 {
432 value[i] = d;
433 neighbors_(ipos[i], 1) = icur;
434 }
435 // minimal key
436 if (kmin>value[i]) { imin=i; kmin = value[i];}
437 }
438 }
439}
440
441template<class Array>
443{
444 dist_.resize(p_data_->rows(), Range(1,nbNeighbor_));
445 dist_ = Arithmetic<Real>::max();
446 // get dimensions
447 const int begin_ind = p_data_->beginRows();
448 const int last_ind = p_data_->lastIdxRows();
449 // start minimal distance algorithm
450 for (int j = begin_ind; j<last_ind; j++)
451 {
452 // reference on the current point
453 RowVector curPoint(p_data_->row(j), true);
454 // update distance of the neighbors point
455 for (int i=j+1; i<=last_ind; i++)
456 {
457 // compute distance between point i and point j
458 Real d=dist(curPoint, p_data_->row(i));
459 // check if we get a better distance for the point j
460 if (dist_(i, nbNeighbor_) > d )
461 {
462 // check if we get a better distance for the point i
463 int pos = nbNeighbor_;
464 while (dist_(i, pos) > d && pos-- > 1) {}
465 pos++;
466 // shift values
467 for (int k= nbNeighbor_ -1; k>= pos; k--)
468 {
469 dist_(i, k+1) = dist_(i, k);
470 neighbors_(i, k+1) = neighbors_(i, k);
471 }
472 // set minimal distance in place
473 dist_(i, pos) = d;
474 neighbors_(i, pos) = j;
475 }
476 // check if we get a better distance for the point j
477 if (dist_(j, nbNeighbor_) > d )
478 {
479 // insertion sorting algorihtm
480 int pos = nbNeighbor_;
481 while (dist_(j, pos) > d && pos-- > 1) {}
482 pos++;
483 // shift valuesconst
484 for (int k= nbNeighbor_ -1; k>= pos; k--)
485 {
486 dist_(j, k+1) = dist_(j, k);
487 neighbors_(j, k+1) = neighbors_(j, k);
488 }
489 // set minimal distance in place
490 dist_(j, pos) = d;
491 neighbors_(j, pos) = i;
492 }
493 }
494 }
495}
496
497
498} // namespace STK
499#endif /* STK_LOCALVARIANCE_H */
In this file, we define the final class Array2D.
In this file we define the interface base class ILinearReduct.
#define STKRUNTIME_ERROR_NO_ARG(Where, Error)
Definition STK_Macros.h:138
In this file we define utilities enum and functions for the Reduct project.
In this file we specialize the class Multivariate to Real type.
In this file we define the SymEigen class (for a symmetric matrix).
void swap(int i, int j)
swap two elements: only for vectors and points
Derived & resize(Range const &I, Range const &J)
resize the array.
A ILinearReduct is an interface base class for reduction method using linear reduction.
Array * p_reduced_
The reduced data set.
VectorX idx_values_
The values of the index for each axis.
virtual bool run()
run the computations.
virtual void setData(YArray_ const &y, XArray_ const &x)
set the data set.
Array const * p_data() const
get the data set
Array const * p_data_
A pointer on the original data set.
A LocalVariance is an implementation of the abstract ILinearReduct class.
ILinearReduct< Array, VectorX > Base
void minimalDistance()
compute the minimal distance graph
ArraySquareX const & localCovariance() const
virtual ~LocalVariance()
Destructor.
Reduct::TypeGraph type_
number of neighbors
void computeAxis()
compute the axis using the first eigenvectors of the matrix of covariance and local covariance
virtual LocalVariance * clone() const
clone pattern
ArrayXXi const & pred() const
LocalVariance(Reduct::TypeGraph type=Reduct::distance_, int nbNeighbor=1)
Default constructor.
LocalVariance(Array const *p_data, Reduct::TypeGraph type=Reduct::distance_, int nbNeighbor=1)
Constructor.
ArrayXXi neighbors_
Predecessor array : store the spanning tree or the minimal distance to the neighbors.
ArraySquareX localCovariance_
the local covariance Array
ArraySquareX covariance_
the covariance Array
hidden::Traits< Array >::Row RowVector
virtual void update()
Compute the proximity graph if the data set is modified.
ArraySquareX const & covariance() const
void computeCovarianceMatrices()
compute the covariances matrices of the data set
virtual void maximizeStep()
Compute the axis by maximizing the ratio of the local variance on the total variance of the data set.
void prim()
compute the minimal spanning tree
int nbNeighbor_
number of neighbors
ArrayXX dist_
distance matrix : store the minimal distance to the neighbors
The MultidimRegression class allows to regress a multidimensional output variable among a multivariat...
Computation of the multivariate statistics of a Variable.
Index sub-vector region: Specialization when the size is unknown.
Definition STK_Range.h:265
Real dist(ExprBase< Container1D1 > const &x, ExprBase< Container1D2 > const &y)
Compute the distance between two vectors.
Arrays::SumOp< Lhs, Rhs >::result_type sum(Lhs const &lhs, Rhs const &rhs)
convenience function for summing two arrays
double Real
STK fundamental type of Real values.
hidden::SliceVisitorSelector< Derived, hidden::MaxVisitor, Arrays::by_col_ >::type_result max(Derived const &A)
If A is a row-vector or a column-vector then the function will return the usual maximal value of the ...
TypeGraph
Type of proximity graph to used in order to compute the local variance:
The namespace STK is the main domain space of the Statistical ToolKit project.
Arithmetic properties of STK fundamental types.