STK++ 0.9.13
STK_DataHandler.cpp
Go to the documentation of this file.
1/*--------------------------------------------------------------------*/
2/* Copyright (C) 2004-2016 Serge Iovleff
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU Lesser General Public License as
6 published by the Free Software Foundation; either version 2 of the
7 License, or (at your option) any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU Lesser General Public License for more details.
13
14 You should have received a copy of the GNU Lesser General Public
15 License along with this program; if not, write to the
16 Free Software Foundation, Inc.,
17 59 Temple Place,
18 Suite 330,
19 Boston, MA 02111-1307
20 USA
21
22 Contact : S..._Dot_I..._At_stkpp_Dot_org (see copyright for ...)
23*/
24
25/*
26 * Project: stkpp::DManager
27 * created on: 16 nov. 2013
28 * Author: iovleff, S..._Dot_I..._At_stkpp_Dot_org (see copyright for ...)
29 **/
30
35#include "../include/STK_DataHandler.h"
36
37namespace STK
38{
39
40/* read a data file and its companion description file. */
42 , std::string const& idData
43 , std::string const& idModel)
44{
46 // no names to read in the first line
47 data.setWithNames(withNames_);
48 // read the data set
49 if (!data.read())
50 {
51 stk_cerr << _T("An error occur when reading the data file.\nWhat: ")
52 << data.error();
53 return false;
54 }
55 // add descriptor
57 desc[baseIdx] = idModel ; desc[baseIdx+1] = idData;
58 // store data and descriptors
59 if (!addInfo(idData, idModel)) return false;
60 data_ += data;
61 // store descriptor : this is the same for all the columns added
62 for (int j=data.beginCols(); j< data.endCols(); ++j)
63 { descriptor_.push_back(desc);}
64 return true;
65}
66
67/* read a data file and its companion description file. */
68bool DataHandler::readDataFromCsvFile(std::string const& datafile, std::string descriptorfile)
69{
71 // no names to read in the first line
72 rwdata.setWithNames(withNames_);
73 // read the data set
74 if (!rwdata.read())
75 {
76 stk_cerr << _T("An error occur when reading the data file.\nWhat: ")
77 << rwdata.error();
78 return false;
79 }
81 // no names to read in the first line
82 rwdesc.setWithNames(false);
83 // read the data set
84 if (!rwdesc.read())
85 {
86 stk_cerr << _T("An error occur when reading the descriptor file.\nWhat: ")
87 << rwdesc.error();
88 return false;
89 }
90 // check logic
91 if (rwdata.size() != rwdesc.size())
92 {
93 stk_cerr << _T("Data file and descriptor file does not have the same number of column.\n");
94 return false;
95 }
96 if (rwdata.sizeRows() == 0)
97 {
98 stk_cerr << _T("No data.\n");
99 return false;
100 }
101 if (rwdesc.sizeRows() < 2)
102 {
103 stk_cerr << _T("No descriptor.\n");
104 return false;
105 }
106 // parse descriptor file
107 int firstRow = rwdesc.beginRows();
108 for (int j=rwdesc.beginCols(); j< rwdesc.endCols(); j++)
109 {
110 std::string idModel = rwdesc.at(j).at(firstRow);
111 std::string idData = rwdesc.at(j).at(firstRow+1);
112 if (!addInfo(idData, idModel)) return false;
113 }
114 // store data and descriptors
115 data_ += rwdata;
117 return true;
118}
119
120/* remove the data with the given idData */
121void DataHandler::removeData(std::string const& idData)
122{
123 int rowIdData = descriptor_.beginRows()+1;
124 for (int i = descriptor_.endCols()-1; i >= descriptor_.beginCols(); --i)
125 { if (descriptor_.var(i)[rowIdData] == idData)
126 {
127 data_.eraseColumn(i);
128 descriptor_.eraseColumn(i);
129 }
130 }
131 info_.erase(idData);
132}
133
134/* lookup on the descriptors in order to get the columns of the ReadWriteCsv
135 * with the Id idData.
136 * @param idData id of the data to get
137 **/
138std::vector<int> DataHandler::colIndex(std::string const& idData) const
139{
140 int rowIdData = descriptor_.beginRows()+1;
141 std::vector<int> colindex;
142 for (int i = descriptor_.beginCols(); i <= descriptor_.lastIdxCols(); ++i)
143 { if (descriptor_.var(i).at(rowIdData) == idData) colindex.push_back(i);}
144 return colindex;
145}
146
147
148} // namespace STK
149
150
151
#define stk_cerr
Standard stk error stream.
#define _T(x)
Let x unmodified.
InfoMap info_
Store the informations of the mixtures in the form (idData, idModel) with.
bool addInfo(std::string const &idData, std::string const &idModel)
Add an info descriptor to the data handler.
ReadWriteCsv descriptor_
descriptor files with two rows.
bool withNames_
first line with names ?
ReadWriteCsv data_
data files
void removeData(std::string const &idData)
remove the data with the given idData
std::vector< int > colIndex(std::string const &idData) const
lookup on the descriptors in order to get the columns of the ReadWriteCsv with the Id idData.
bool readDataFromCsvFile(std::string const &datafile, std::string descriptorfile)
read a data file and its companion description file.
ReadWriteCsv const & data() const
get the whole data set
String const & error() const
get the last error message.
Definition STK_IRunner.h:82
The MultidimRegression class allows to regress a multidimensional output variable among a multivariat...
String stringNa
Representation of a Not Available value.
const int baseIdx
base index of the containers created in STK++.
The namespace STK is the main domain space of the Statistical ToolKit project.