STK++ 0.9.13
STK_CategoricalBase.h
Go to the documentation of this file.
1/*--------------------------------------------------------------------*/
2/* Copyright (C) 2004-2016 Serge Iovleff
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU Lesser General Public License as
6 published by the Free Software Foundation; either version 2 of the
7 License, or (at your option) any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU Lesser General Public License for more details.
13
14 You should have received a copy of the GNU Lesser General Public
15 License along with this program; if not, write to the
16 Free Software Foundation, Inc.,
17 59 Temple Place,
18 Suite 330,
19 Boston, MA 02111-1307
20 USA
21
22 Contact : S..._DOT_I..._AT_stkpp.org (see copyright for ...)
23*/
24
25/*
26 * Project: stkpp::Clustering
27 * created on: Dec 4, 2013
28 * Authors: Serge Iovleff
29 **/
30
36#ifndef STK_CATEGORICALBASE_H
37#define STK_CATEGORICALBASE_H
38
39#include "../STK_IMixtureDensity.h"
40#include "../CategoricalModels/STK_CategoricalParameters.h"
42
43namespace STK
44{
45
49template<class Derived>
50class CategoricalBase: public IMixtureDensity<Derived >
51{
52 protected:
54 using Base::param_;
55 using Base::p_data;
56
69
70 public:
72 inline PointXi const& nbModalities() const { return nbModalities_;}
74 inline Range const& modalities() const { return modalities_;}
76 inline Real proba(int k, int j, int l) const { return param_.proba(k,j,l);}
78 inline CVectorX proba(int k, int j) const { return param_.proba(k,j);}
83 {
84 // compute the maximal number of modalities
85 nbModalities_.resize(p_data()->cols());
87 for (int j= p_data()->beginCols(); j < p_data()->endCols(); ++j)
88 {
89 int min = p_data()->col(j).minElt(), max = p_data()->col(j).maxElt();
90 amin = std::min(amin, min); amax = std::max(amax, max);
92 }
93 // set range of the modalities
95 // resize vectors of probabilities
96 param_.resize(modalities_,p_data()->cols());
97#ifdef STK_MIXTURE_VERBOSE
98 stk_cout << _T("In CatagoricalBase::initializeModelImpl. modalities_ = ")
99 << modalities_ << _T("\n");
100#endif
101 }
105 Real lnComponentProbability(int i, int k) const;
110 template<class Weights>
111 int impute(int i, int j, Weights const& tk) const;
115 inline int rand(int i, int j, int k) const
116 { return Law::Categorical::rand(proba(k,j));}
121 template<class Array>
127 void writeParameters(CArrayXX const* p_tik, ostream& os) const;
128
129 protected:
134};
135
136/* @return the value of the probability of the i-th sample in the k-th component.
137 * @param i,k indexes of the sample and of the component
138 **/
139template<class Derived>
141{
142 Real sum =0.;
143 for (int j=p_data()->beginCols(); j<p_data()->endCols(); ++j)
144 { // what to do if the probability is zero but a sample get this modality
145 // for now, just ignore it (it's possible if tik_(i,k) == 0)
146 Real prob= proba(k, j, p_data()->elt(i,j));
147 if (prob) { sum += std::log(prob);}
148 }
149 return sum;
150}
151
152/* Implementation */
153template<class Derived>
154template<class Weights>
155int CategoricalBase<Derived>::impute(int i, int j, Weights const& tk) const
156{
157 int lmax = modalities_.begin();
159 // compute for each modality the pondered probability of occurrence
160 for (int l=modalities_.begin(); l< modalities_.end(); ++l)
161 {
162 Real p = 0.;
163 for (int k= tk.begin(); k < tk.end(); ++k)
164 { p += tk[k] * proba(k, j, l);}
165
166 if (pmax < p) { pmax = p; lmax = l;}
167 }
168 return lmax;
169}
170
171/* This function can be used to write summary of parameters to the output stream.
172 * @param p_tik a constant pointer on the posterior probabilities
173 * @param os Stream where you want to write the summary of parameters.
174 */
175template<class Derived>
177{
178 ArrayXX p(modalities(), p_data()->cols());
179 for (int k= p_tik->beginCols(); k < p_tik->endCols(); ++k)
180 {
181 // store proba values in an array for a nice output
182 for (int j= p.beginCols(); j < p.endCols(); ++j)
183 {
184 for (int l= modalities().begin(); l < modalities().end(); ++l)
185 { p(l, j) = proba(k,j,l);}
186 }
187 os << _T("---> Component ") << k << _T("\n");
188 os << _T("probabilities =\n") << p << _T("\n");
189 }
190
191}
192
193template<class Derived>
194template<class Array>
196{
197 int nbCluster = this->nbCluster();
198 int nbModalities = modalities().size();
199
200 params.resize(nbModalities * nbCluster, p_data()->cols());
201 for (int k = 0; k < nbCluster; ++k)
202 {
203 for (int j = p_data()->beginCols(); j < p_data()->endCols(); ++j)
204 {
205 for (int l = 0; l < nbModalities; ++l)
206 { params(baseIdx+k * nbModalities + l, j) = proba(baseIdx+k, j, modalities().begin() + l);}
207 }
208 }
209}
210
211
212} // namespace STK
213
214#endif /* STK_CategoricalBASE_H */
A Array2DPoint is a one dimensional horizontal container.
#define _R(first, last)
Utility macro that can be used in a similar way that first:last.
Definition STK_Range.h:53
#define stk_cout
Standard stk output stream.
#define _T(x)
Let x unmodified.
Base class for the Categorical models.
CategoricalBase(CategoricalBase const &model)
copy constructor
CVectorX proba(int k, int j) const
Array const *const & p_data() const
Real proba(int k, int j, int l) const
int rand(int i, int j, int k) const
Parameters param_
parameters of the derived mixture model.
IMixtureDensity< Derived > Base
Range modalities_
range of the modalities
Range const & modalities() const
void initializeModelImpl()
Initialize the model.
CategoricalBase(int nbCluster)
default constructor
int impute(int i, int j, Weights const &tk) const
PointXi nbModalities_
Array with the number of modalities of each columns of the data set.
void writeParameters(CArrayXX const *p_tik, ostream &os) const
This function can be used to write summary of parameters to the output stream.
void getParameters(Array &params) const
This function is used in order to get the current values of the parameters in an array.
Real lnComponentProbability(int i, int k) const
PointXi const & nbModalities() const
Derived & resize(Range const &I, Range const &J)
resize the array.
Base class for all Mixture densities.
Array const *const & p_data() const
Parameters param_
parameters of the derived mixture model.
hidden::MixtureTraits< Derived >::Array Array
virtual int rand() const
The MultidimRegression class allows to regress a multidimensional output variable among a multivariat...
Index sub-vector region: Specialization when the size is unknown.
Definition STK_Range.h:265
Arrays::SumOp< Lhs, Rhs >::result_type sum(Lhs const &lhs, Rhs const &rhs)
convenience function for summing two arrays
const int baseIdx
base index of the containers created in STK++.
double Real
STK fundamental type of Real values.
hidden::SliceVisitorSelector< Derived, hidden::MinVisitor, Arrays::by_col_ >::type_result min(Derived const &A)
If A is a row-vector or a column-vector then the function will return the usual minimal value of the ...
hidden::SliceVisitorSelector< Derived, hidden::MaxVisitor, Arrays::by_col_ >::type_result max(Derived const &A)
If A is a row-vector or a column-vector then the function will return the usual maximal value of the ...
std::basic_ostream< Char > ostream
ostream for Char
Definition STK_Stream.h:57
The namespace STK is the main domain space of the Statistical ToolKit project.
Arithmetic properties of STK fundamental types.