STK++ 0.9.13
STK_DiagGaussianBase.h
Go to the documentation of this file.
1/*--------------------------------------------------------------------*/
2/* Copyright (C) 2004-2016 Serge Iovleff
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU Lesser General Public License as
6 published by the Free Software Foundation; either version 2 of the
7 License, or (at your option) any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU Lesser General Public License for more details.
13
14 You should have received a copy of the GNU Lesser General Public
15 License along with this program; if not, write to the
16 Free Software Foundation, Inc.,
17 59 Temple Place,
18 Suite 330,
19 Boston, MA 02111-1307
20 USA
21
22 Contact : S..._DOT_I..._AT_stkpp.org (see copyright for ...)
23*/
24
25/*
26 * Project: stkpp::Clustering
27 * created on: Dec 4, 2013
28 * Authors: Serge Iovleff
29 **/
30
35#ifndef STK_DIAGGAUSSIANBASE_H
36#define STK_DIAGGAUSSIANBASE_H
37
38#include "../STK_IMixtureDensity.h"
39
44#include "../DiagGaussianModels/STK_DiagGaussianParameters.h"
45
46namespace STK
47{
48
52template<class Derived>
53class DiagGaussianBase: public IMixtureDensity<Derived >
54{
55 public:
57 using Base::nbCluster;
58 using Base::nbSample;
59 using Base::param_;
60 using Base::p_data;
61
62 protected:
73
74 public:
76 inline Real const& mean(int k, int j) const { return param_.mean(k,j);}
78 inline Real const& sigma(int k, int j) const { return param_.sigma(k,j);}
79
81 void initializeModelImpl() { param_.resize(p_data()->cols());}
85 Real lnComponentProbability(int i, int k) const;
90 template<class Weights>
91 Real impute(int i, int j, Weights const& pk) const;
95 inline Real rand(int i, int j, int k) const
96 { return Law::Normal::rand(mean(k, j), sigma(k,j));}
101 template<class Array>
107 void writeParameters(CArrayXX const* p_tik, ostream& os) const;
108
109 protected:
113 void randomMean( CArrayXX const* p_tik);
115 bool updateMean( CArrayXX const* p_tik);
116};
117
118/* @return the value of the log-probability of the i-th sample in the k-th
119 * component.
120 * @param i,k indexes of the sample and of the component
121 **/
122template<class Derived>
124{
125 Real sum =0.;
126 for (int j=p_data()->beginCols(); j<p_data()->endCols(); ++j)
127 {
128 if (sigma(k,j))
129 { sum += Law::Normal::lpdf(p_data()->elt(i,j), mean(k, j), sigma(k,j));}
130 }
131 return sum;
132}
133
134/* @return an imputation value for the jth variable of the ith sample
135 * @param i,j indexes of the data to impute
136 * @param pk the probabilities of each class for the ith individual
137 **/
138template<class Derived>
139template<class Weights>
141{
142 Real sum = 0.;
143 for (int k= pk.begin(); k < pk.end(); ++k)
144 { sum += pk[k] * mean(k,j);}
145 return sum;
146}
147
148template<class Derived>
150{
151 // indexes array
152 VectorXi indexes(p_data()->rows());
153 for(int i=indexes.begin(); i< indexes.end(); ++i) { indexes[i] = i;}
154 Range rind(p_data()->rows());
155 // sample without repetition
156 for (int k= p_tik->beginCols(); k < p_tik->endCols(); ++k)
157 {
158 // random number in [0, end-k[
159 int i = Law::UniformDiscrete::rand(rind.begin(), rind.end()-1);
160 // get ith individuals
161 param_.mean_[k] = p_data()->row(indexes[i]);
162 // exchange it with nth
163 indexes.swap(i, rind.lastIdx());
164 // decrease
165 rind.decLast(1);
166 }
167}
168
169/* compute the weighted mean of a Gaussian mixture. */
170template<class Derived>
172{
173 for (int k= p_tik->beginCols(); k < p_tik->endCols(); ++k)
174 {
175 for (int j=p_data()->beginCols(); j< p_data()->endCols(); ++j)
176 { param_.mean_[k][j] = p_data()->col(j).wmean(p_tik->col(k));}
177 }
178 return true;
179}
180
181/* This function is used in order to get the current values of the means
182 * and standard deviations.
183 * @param[out] params the array with the parameters of the mixture.
184 */
185template<class Derived>
186template<class Array>
188{
189 int nbClust = nbCluster();
190 params.resize(2*nbClust, p_data()->cols());
191 for (int k= 0; k < nbClust; ++k)
192 {
193 for (int j= params.beginCols(); j< params.endCols(); ++j)
194 {
195 params(baseIdx+2*k , j) = mean(baseIdx + k, j);
196 params(baseIdx+2*k+1, j) = sigma(baseIdx + k, j);
197 }
198 }
199}
200
201
202/* This function can be used to write summary of parameters to the output stream.
203 * @param p_tik a constant pointer on the posterior probabilities
204 * @param os Stream where you want to write the summary of parameters.
205 */
206template<class Derived>
208{
209 CPointX m(p_data()->cols()), s(p_data()->cols());
210 for (int k= p_tik->beginCols(); k < p_tik->endCols(); ++k)
211 {
212 // store sigma values in an array for a nice output
213 for (int j= s.begin(); j < s.end(); ++j)
214 { m[j] = mean(k,j); s[j] = sigma(k,j);}
215 os << _T("---> Component ") << k << _T("\n");
216 os << _T("mean = ") << m;
217 os << _T("sigma = ")<< s;
218 }
219}
220
221} // namespace STK
222
223#endif /* STK_DiagGaussianBASE_H */
In this file we define the constant Arrays.
This file define methods for displaying Arrays and Expressions.
In this file we define the Normal probability law class.
In this file we implement the uniform (discrete) law.
#define _T(x)
Let x unmodified.
Base class for the diagonal Gaussian models.
DiagGaussianBase(DiagGaussianBase const &model)
copy constructor
Real const & mean(int k, int j) const
void initializeModelImpl()
Initialize the parameters of the model.
Real impute(int i, int j, Weights const &pk) const
Array const *const & p_data() const
Real lnComponentProbability(int i, int k) const
Real const & sigma(int k, int j) const
IMixtureDensity< Derived > Base
Parameters param_
parameters of the derived mixture model.
bool updateMean(CArrayXX const *p_tik)
compute the weighted mean of a Gaussian mixture.
Real rand(int i, int j, int k) const
void writeParameters(CArrayXX const *p_tik, ostream &os) const
This function can be used to write summary of parameters to the output stream.
void getParameters(Array &params) const
This function is used in order to get the current values of the means and standard deviations.
void randomMean(CArrayXX const *p_tik)
sample randomly the mean of each component by sampling randomly a row of the data set.
DiagGaussianBase(int nbCluster)
default constructor
hidden::CSlice< Derived, sizeRows_, 1 >::Result col(int j) const
implement the col operator using a reference on the column of the allocator
Base class for all Mixture densities.
Array const *const & p_data() const
Parameters param_
parameters of the derived mixture model.
hidden::MixtureTraits< Derived >::Array Array
virtual Real lpdf(Real const &x) const
Real rand() const
Generate a pseudo Normal random variate.
virtual int rand() const
Generate a pseudo Uniform random variate.
The MultidimRegression class allows to regress a multidimensional output variable among a multivariat...
Index sub-vector region: Specialization when the size is unknown.
Definition STK_Range.h:265
Arrays::SumOp< Lhs, Rhs >::result_type sum(Lhs const &lhs, Rhs const &rhs)
convenience function for summing two arrays
const int baseIdx
base index of the containers created in STK++.
double Real
STK fundamental type of Real values.
hidden::SliceVisitorSelector< Derived, hidden::MeanVisitor, Arrays::by_col_ >::type_result mean(Derived const &A)
If A is a row-vector or a column-vector then the function will return the usual mean value of the vec...
std::basic_ostream< Char > ostream
ostream for Char
Definition STK_Stream.h:57
The namespace STK is the main domain space of the Statistical ToolKit project.