STK++ 0.9.13
STK_MixtureSemiLearner.cpp
Go to the documentation of this file.
1/*--------------------------------------------------------------------*/
2/* Copyright (C) 2004-2016 Serge Iovleff
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU Lesser General Public License as
6 published by the Free Software Foundation; either version 2 of the
7 License, or (at your option) any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU Lesser General Public License for more details.
13
14 You should have received a copy of the GNU Lesser General Public
15 License along with this program; if not, write to the
16 Free Software Foundation, Inc.,
17 59 Temple Place,
18 Suite 330,
19 Boston, MA 02111-1307
20 USA
21
22 Contact : S..._Dot_I..._At_stkpp_Dot_org (see copyright for ...)
23*/
24
25/*
26 * Project: stkpp::Clustering
27 * created on: 02 June 2016
28 * Author: iovleff, S..._Dot_I..._At_stkpp_Dot_org (see copyright for ...)
29 **/
30
35#include <cmath>
36
37#if STK_MIXTURE_DEBUG | STK_MIXTURE_VERBOSE | STK_MIXTURE_VERY_VERBOSE
39#endif
40
42
45
46namespace STK
47{
48MixtureSemiLearner::MixtureSemiLearner( int nbSample, int nbCluster)
49 : MixtureComposer(nbSample, nbCluster)
50 , knownLabels_()
51 , unknownLabels_(nbSample)
52{
53 int i = zi_.begin();
54 std::vector<int>::iterator it;
55 for (it = unknownLabels_.begin(); it != unknownLabels_.end(); ++it, ++i)
56 { *it = i;}
57}
58
59
60/* copy constructor */
63 , knownLabels_(model.knownLabels_)
64 , unknownLabels_(model.unknownLabels_)
65{}
66/* destructor */
68
69/* clone pattern */
72
74{
75 // set dimensions
77 p_composer->createSemiLearner(this);
78 p_composer->createComposer(v_mixtures());
79 return p_composer;
80}
81
82
83/* cStep */
85{
86 std::vector<int>::const_iterator it;
87 for (it=unknownLabels_.begin(); it != unknownLabels_.end(); it++)
89 // count the number of individuals in each class
91 return tk_.minElt();
92}
93
94/* simulate zi */
96{
97 // simulate zi
98 std::vector<int>::const_iterator it;
99 for (it=unknownLabels_.begin(); it != unknownLabels_.end(); it++)
101 return cStep();
102}
103
104/* compute tik, default implementation. */
106{
107#ifdef STK_MIXTURE_DEBUG
108 stk_cout << _T("Entering MixtureSemiLearner::eStep()\n");
109#endif
110 Real sum = 0.; tk_ =0.;
111//#ifdef _OPENMP
112//#pragma omp parallel for reduction (+:sum)
113//#endif
114 std::vector<int>::const_iterator it;
115 for(it=unknownLabels_.begin(); it != unknownLabels_.end(); it++)
117 for(it=knownLabels_.begin(); it != knownLabels_.end(); it++)
118 { sum += lnComponentProbability(*it, zi_[*it]) + std::log(pk_[zi_[*it]]);}
119 // update log-likelihood
121 // compute proportions
123#ifdef STK_MIXTURE_DEBUG
124 stk_cout << _T("MixtureSemiLearner::eStep() done\n");
125 stk_cout << _T("lnLikelihood =") << sum << _T("\n");
126#endif
127 return tk_.minElt();
128}
129
130/* Compute Zi using the Map estimate, default implementation. */
132{
133 std::vector<int>::const_iterator it;
134 for (it=unknownLabels_.begin(); it != unknownLabels_.end(); it++)
136}
137
138/* Create the mixture model parameters. Default implementation is to
139 * set pk_ and tik_ arrays to 1/K value and zi_ to first label value. */
141{
142 std::vector<int>::const_iterator it;
143 for (it=unknownLabels_.begin(); it != unknownLabels_.end(); it++)
144 { tik_.row(*it) = 1./nbCluster();}
145 for (it=knownLabels_.begin(); it != knownLabels_.end(); it++)
146 { tik_.row(*it) = 0.; tik_(*it, zi_[*it]) = 1.;}
147}
148
149/* generate random tik_ */
151{
152 tk_ = 0.;
153 std::vector<int>::const_iterator it;
154 for (it = unknownLabels_.begin(); it != unknownLabels_.end(); ++it)
155 {
156 // create a reference on the i-th row
157 CPointX tikRowi(tik_.row(*it), true);
158 tikRowi.randUnif();
159 tikRowi = tikRowi * pk_;
160 tikRowi /= tikRowi.sum();
161 tk_ += tikRowi;
162 }
163 return tk_.minElt();
164}
165
166/* generate random zi */
168{
170 std::vector<int>::const_iterator it;
171 for (it = unknownLabels_.begin(); it != unknownLabels_.end(); ++it)
172 { zi_[*it] = law.rand();}
173 // create tik and compute nk
174 return cStep();
175}
176
177/* Copy the ingredient of the semi-learner in the creation process */
179{
180 knownLabels_ = p_learner->knownLabels();
181 unknownLabels_ = p_learner->unknownLabels();
182}
183
184/* Constructor.
185 * @param nbCluster,nbSample number of clusters and samples
186 */
188 : MixtureSemiLearner( nbSample, nbCluster)
189{ setNbFreeParameter(0); /* remove the count of the pk parameters */}
190
191/* copy constructor.
192 * @param model the model to copy
193 */
196
197/* Create a composer, but reinitialize the ingredients parameters. */
199{
201 p_composer->createSemiLearner(this);
202 p_composer->createComposer(v_mixtures());
203 /* remove the count of the pk parameters */
204 p_composer->setNbFreeParameter(p_composer->nbFreeParameter()-(nbCluster()-1));
205 return p_composer;
206}
207/* Create a clone of the current model, with ingredients parameters preserved. */
210
211/* overloading of the pStep() method. Let them initialized to 1/K. */
213
214
215
216} // namespace STK
217
This file define methods for displaying Arrays and Expressions.
In this file we define the Categorical distribution.
In this file we define the MixtureSemiLearner class for semi-supervised learning models.
This file contain the functors computings statistics.
#define stk_cout
Standard stk output stream.
#define _T(x)
Let x unmodified.
Type const minElt(int &row, int &col) const
hidden::CSlice< Derived, 1, sizeCols_ >::Result row(int i) const
implement the row operator using a reference on the row of the allocator
virtual int cStep()
Replace tik by zik.
virtual Real eStep()
compute the zi, the lnLikelihood of the current estimates and the next value of the tik.
virtual void mapStep()
Compute zi using the Map estimate.
virtual int sStep()
Simulate zi accordingly to tik and replace tik by zik by calling cStep().
CArrayXX tik_
The tik probabilities.
CVectorXi zi_
The zi class label.
std::vector< IMixture * > const & v_mixtures() const
CPointX pk_
The proportions of each mixtures.
CPointX tk_
The sum of the columns of tik_.
void setNbFreeParameter(int const &nbFreeParameter)
set the number of free parameters of the model
void setLnLikelihood(Real const &lnLikelihood)
set the log-likelihood of the model
Categorical probability law.
virtual int rand() const
Main class for handling composed mixture models.
void createComposer(std::vector< IMixture * > const &v_mixtures_)
Create the composer using existing data handler and mixtures.
virtual Real lnComponentProbability(int i, int k) const
specialization of the composer for the fixed proportion case.
virtual MixtureSemiLearnerFixedProp * create() const
Create a composer, but reinitialize the mixtures parameters.
MixtureSemiLearnerFixedProp(int nbSample, int nbCluster)
Constructor.
virtual void pStep()
overloading of the pStep() method.
virtual MixtureSemiLearnerFixedProp * clone() const
Create a clone of the current model, with mixtures parameters preserved.
Base class for Learning a Mixture mixed model when some classes are known.
virtual void mapStep()
Compute zi using the Map estimate.
virtual void initializeMixtureParameters()
Create the mixture model parameters tik_ and pk_.
virtual int randomZi()
generate random zi_
MixtureSemiLearner(int nbSample, int nbCluster)
Constructor.
virtual MixtureSemiLearner * create() const
Create a composer, but reinitialize the mixtures parameters.
virtual int randomTik()
generate random tik_
std::vector< int > unknownLabels_
indexes of the unknown labels
virtual ~MixtureSemiLearner()
destructor
void createSemiLearner(MixtureSemiLearner const *const p_learner)
Copy the ingredient of the semi-learner in the creation process.
virtual int sStep()
Simulate zi accordingly to tik and replace tik by zik by calling cStep().
virtual int cStep()
Replace tik by zik.
virtual Real eStep()
compute the zi, the lnLikelihood of the current estimates and the next value of the tik.
virtual MixtureSemiLearner * clone() const
Create a clone of the current model, with mixtures parameters preserved.
std::vector< int > knownLabels_
indexes of the known labels
The MultidimRegression class allows to regress a multidimensional output variable among a multivariat...
Arrays::SumOp< Lhs, Rhs >::result_type sum(Lhs const &lhs, Rhs const &rhs)
convenience function for summing two arrays
double Real
STK fundamental type of Real values.
hidden::FunctorTraits< Derived, SumOp >::Row sum(Derived const &A)
Compute the sum of A.
hidden::FunctorTraits< Derived, SumOp >::Row sumByCol(Derived const &A)
The namespace STK is the main domain space of the Statistical ToolKit project.