STK++ 0.9.13
STK_PartitionHandler.h
Go to the documentation of this file.
1
2
3#ifndef STK_PARTITIONHANDLER_H
4#define STK_PARTITIONHANDLER_H
5
6#include <Sdk.h>
7
10
11namespace STK
12{
13
19{
20 public:
27 inline virtual ~PartitionHandler() {}
28
30 inline Real const& proportion() const { return prop_;}
32 inline Range const& rangeData() const { return rangeData_;}
34 inline CVectorXi const& partitions() const { return partitions_;}
35
36 inline virtual bool run()
37 { partition(); hasRun_ = true; return true;}
38
39 inline void setData( Range const& rangeData, Real prop)
40 {
43 prop_ = prop;
45 hasRun_ = false;
46 }
48 template<class Data>
49 bool getPartitions( Data const& x, Data& xLearn, Data& xTest);
51 template<class xData, class yData>
52 bool getPartitions( xData const& x, xData& xLearn, xData& xTest
53 , yData const& y, yData& yLearn, yData& yTest);
54
55 protected:
57 void partition();
58
59 private:
68};
69
70/* Default constructor. nbLearns is set to the number of observation
71 * @param rangeData the range of the data to set
72 * @param nbLearns numbbe of Learns
73 **/
75 : IRunnerBase()
76 , rangeData_(rangeData), prop_(prop)
77 , sizeTest_(int(rangeData_.size()*prop_))
78 , partitions_()
79{
80 // check proportion
81 if (prop_>1)
83 if (prop_<0)
85}
86/* get the data set when setting out fold k and test data set */
87template<class Data>
88bool PartitionHandler::getPartitions( Data const& x,Data& xLearn, Data& xTest)
89{
90 // check if partitions are determined
91 if (!hasRun_)
93 return false;
94 }
95 // check dimensions
96 if (x.rows() != rangeData_)
97 { msg_error_ = STKERROR_NO_ARG(PartitionHandler::getKLearn,x.rows()!=rangeData_);
98 return false;
99 }
100 // prepare containers
101 Range xLearnRows = x.rows();
102 xLearnRows.decLast(sizeTest_);
103 xLearn.resize(xLearnRows, x.cols());
104 xTest.resize(sizeTest_, x.cols());
105 // copy data
106 int iLearnRow = xLearn.beginRows(), iTestRow = xTest.beginRows();
107 for (int i = partitions_.begin(); i < partitions_.end(); ++i)
108 {
109 if (partitions_[i] == 1)
110 {
111 xTest.row(iTestRow) = x.row(i);
112 ++iTestRow;
113 }
114 else
115 {
116 xLearn.row(iLearnRow) = x.row(i);
117 ++iLearnRow;
118 }
119 }
120 return true;
121}
122/* get the data set when setting out fold k and test data set */
123template<class xData, class yData>
125 , yData const& y, yData& yLearn, yData& yTest)
126{
127 // check if partitions are determined
128 if (!hasRun_)
129 { msg_error_ = STKERROR_NO_ARG(PartitionHandler::getKLearn,PartitionHandler has to run);
130 return false;
131 }
132 // check dimensions
133 if (x.rows() != rangeData_)
134 { msg_error_ = STKERROR_NO_ARG(PartitionHandler::getKLearn,x.rows()!=rangeData_);
135 return false;
136 }
137 if (y.rows() != rangeData_)
138 { msg_error_ = STKERROR_NO_ARG(PartitionHandler::getKLearn,y.rows()!=rangeData_);
139 return false;
140 }
141 // prepare constainers
142 Range xLearnRows = x.rows();
143 xLearnRows.decLast(sizeTest_);
144 xLearn.resize(xLearnRows, x.cols());
145 xTest.resize(sizeTest_, x.cols());
146 yLearn.resize(xLearnRows, y.cols());
147 yTest.resize(sizeTest_, y.cols());
148 // copy data
149 int iLearnRow = xLearn.beginRows(), iTestRow = xTest.beginRows();
150 for (int i = partitions_.begin(); i < partitions_.end(); ++i)
151 {
152 if (partitions_[i] == 1)
153 {
154 xTest.row(iTestRow) = x.row(i);
155 yTest.row(iTestRow) = y.row(i);
156 ++iTestRow;
157 }
158 else
159 {
160 xLearn.row(iLearnRow) = x.row(i);
161 yLearn.row(iLearnRow) = y.row(i);
162 ++iLearnRow;
163 }
164 }
165 return true;
166}
167
168/* create a random partition in k folds*/
170{
172 int endTest = partitions_.begin()+sizeTest_;
173 //fill the container with the index of the partition (1 test, 0 learn)
174 for(int i = partitions_.begin() ; i< endTest ;i++) { partitions_[i] = 1;}
175 for(int i = endTest ; i< partitions_.end() ;i++) { partitions_[i] = 0;}
176 //make a random rearrangement
177 int begin = partitions_.begin();
178 for (int i=partitions_.end()-2; i>begin; --i)
179 { std::swap(partitions_[i], partitions_[Law::UniformDiscrete::rand(begin, i+1)]);}
180}
181
182} // namespace STK
183
184#endif /* STK_PARTITIONHANDLER_H */
In this file we implement the final class CArrayVector.
In this file we implement the uniform (discrete) law.
#define STKERROR_NO_ARG(Where, Error)
Definition STK_Macros.h:49
#define STKRUNTIME_ERROR_1ARG(Where, Arg, Error)
Definition STK_Macros.h:129
This file include all the other header files of the project Sdk.
Derived & resize(Range const &I, Range const &J)
resize the Array.
void clear()
clear all allocated memory .
Abstract base class for all classes having a.
Definition STK_IRunner.h:65
String msg_error_
String with the last error message.
Definition STK_IRunner.h:96
bool hasRun_
true if run has been used, false otherwise
Definition STK_IRunner.h:98
virtual int rand() const
Generate a pseudo Uniform random variate.
The MultidimRegression class allows to regress a multidimensional output variable among a multivariat...
CvHanler is an utility function for building the submatrix/subvectors needed when creating learning a...
PartitionHandler(Range const &rangeData, Real prop)
Default constructor.
void partition()
create a random partition
virtual bool run()
run the computations.
virtual ~PartitionHandler()
destructor
CVectorXi partitions_
repartition of the sample into k-folds
Range rangeData_
Range of the data set (number of rows)
Range const & rangeData() const
void setData(Range const &rangeData, Real prop)
bool getPartitions(Data const &x, Data &xLearn, Data &xTest)
get the data set when setting out fold k and test data set
Real const & proportion() const
CVectorXi const & partitions() const
int sizeTest_
size of the test (should
Index sub-vector region: Specialization when the size is unknown.
Definition STK_Range.h:265
int size() const
get the size of the TRange (the number of elements).
Definition STK_Range.h:303
double Real
STK fundamental type of Real values.
The namespace STK is the main domain space of the Statistical ToolKit project.