STK++ 0.9.13
STK_DataFrame.cpp
Go to the documentation of this file.
1/*--------------------------------------------------------------------*/
2/* Copyright (C) 2004-2016 Serge Iovleff, Université Lille 1, Inria
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU Lesser General Public License as
6 published by the Free Software Foundation; either version 2 of the
7 License, or (elt your option) any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU Lesser General Public License for more details.
13
14 You should have received a copy of the GNU Lesser General Public
15 License along with this program; if not, write to the
16 Free Software Foundation, Inc.,
17 59 Temple Place,
18 Suite 330,
19 Boston, MA 02111-1307
20 USA
21
22 Contact : S..._Dot_I..._At_stkpp_Dot_org (see copyright for ...)
23*/
24
25/*
26 * Project: stkpp::DManager
27 * Purpose: Implement the DataFrame (Table) class.
28 * Author: Serge Iovleff, S..._Dot_I..._At_stkpp_Dot_org (see copyright for ...)
29 *
30**/
31
36#include "../include/STK_ExportToCsv.h"
37
38namespace STK
39{
40/* Default constructor . */
42
43/* Copy constructor */
44DataFrame::DataFrame( DataFrame const& T, bool ref)
45 : Base(T.range())
46 , Base2D(T)
47{
48 // we need to copy explicitly the data
49 for (int j=begin(); j<end(); j++) // for all columns
50 if (T[j]) // if there is data
51 elt(j) = T[j]->clone(ref); // set the adress of a clone
52}
53
54/* destructor */
56
57/* clear the object. */
59{
60 freeMem();
61 // set default range for list
63 // set default range for container2D
65}
66
67/* Operator = : overwrite the DataFrame with T. */
69{
70 // Resize if necessary.
71 if (sizeCols() != T.sizeCols()) Base::resize(T.cols());
72
73 // Copy without overlapping.
74 if ((T.begin()>=begin()))
75 {
76 for (int jt=T.begin(), j=begin(); jt<=T.lastIdx(); j++, jt++)
77 {
78 // clear old mem if any
79 if (elt(j)) delete elt(j);
80 // if there is a variable, create a copy
81 if (T[jt]) elt(j) = T[jt]->clone();
82 else elt(j) = 0;
83 }
84 }
85 else
86 {
87 for (int jt=T.lastIdx(), j=lastIdx(); jt>=T.begin(); j--, jt--)
88 {
89 // clear old mem if any
90 if (elt(j)) delete elt(j);
91 // if there is a variable, create a copy
92 if (T[jt]) elt(j) = T[jt]->clone();
93 else elt(j) = 0;
94 }
95 }
96 return *this;
97}
98
99/* resize the container:
100 * - call @c shift(I.begin(), J.begin()
101 * - call @c popBackCols() (@c insertRows()) and/or @c popBackCols()
102 * (@c popBackRows()).
103 * The implicit assumption made by this method is that it is easier and
104 * faster to add column than add rows to the 2D container.
105 * @param I the new range for the rows of the container
106 * @param J the new range for the columns of the container
107 **/
108void DataFrame::resize( Range const& I, Range const& J)
109{
110 // check if there is something to do
111 if ((rows() == I) && (cols() == J)) return;
112 // translate beg
113 shift(I.begin(), J.begin());
114 // number of rows to del or add
115 int rinc = I.lastIdx() - lastIdxRows();
116 // number of cols to del or add
117 int cinc = J.lastIdx() - lastIdxCols();
118 // check if we add cols
119 if (cinc >=0) // work first on rows
120 {
121 if (rinc < 0) popBackRows(-rinc); // less rows
122 else pushBackRows(rinc); // more rows
123 pushBackCols(cinc); // add columns
124 }
125 else // work first on columns
126 {
127 popBackCols(-cinc); // remove columns
128 if (rinc < 0) popBackRows(-rinc); // less rows
129 else pushBackRows(rinc); // more rows
130 }
131}
132
133/* New beginning index for the object. */
134void DataFrame::shift(int const& cbeg)
135{
136 // list1D shift
138 // IContainer2D shift for Col
140}
141
142/* New beginning index for the object. */
143void DataFrame::shift(int const& rbeg, int const& cbeg)
144{
145 if (beginRows() == rbeg && beginCols() == cbeg) return;
146 if (isRef())
148 // list1D shift
149 shift(cbeg);
150 // For each column update Variable
151 for (int j=begin(); j<=lastIdx(); j++)
152 if (elt(j)) { elt(j)->shift(rbeg);}
153 // update range of the rows
155}
156
157
158/* Del last column of the container. */
159void DataFrame::popBackCols(int const& n)
160{
161 // if n<=0 nothing to do
162 if (n<=0) return;
163 // if there is columns to erase
164 if (sizeCols()<n)
166 // for all columns, delete variables
167 for (int j=lastIdxCols() - n +1; j<=lastIdxCols(); j++)
168 if (elt(j)) delete elt(j);
169 // popBack() of List1D
170 Base::popBack(n);
171 // update IContainer2D
173 // if it was the last elt, free mem
174 if (this->sizeCols() == 0) freeMem();
175}
176
177/* Delete n columns elt the nuber pos of the container. */
178void DataFrame::eraseCols( int pos, int const& n)
179{
180 // if n<=0 nothing to do
181 if (n<=0) return;
182 // check conditions
183 if (pos<begin())
185 if (pos>lastIdx())
187 if (lastIdx() < pos+n-1)
189 // for all columns, delete variables
190 for (int j=pos+n-1; j>=pos; j--)
191 if (elt(j)) { delete elt(j);}
192 // delete elements of the List1D
193 erase(pos, n);
194 // update cols_
196 // if it was the last col, free mem
197 if (this->sizeCols() == 0) freeMem();
198}
199
200/* Insert variable V elt the position i to the container. */
201void DataFrame::insertVariable(int pos, IVariable* const & V)
202{
203 // List1D
205 elt(pos) = V;
206 // the variable have to be in the same range
207 elt(pos)->shift(beginRows());
208 // update horizontal range (the number of column)
210
211 // update rows with NA values
212 int inc = sizeRows() - V->size();
213 if (inc == 0) return; // same size
214 if (inc > 0) // V has less rows
215 { // put NA values to the inserted column
216 elt(pos)->pushBackNAValues(inc);
217 }
218 else
219 { // put NA values to the other columns
220 for (int i=this->beginCols(); i <pos; i++)
221 if (elt(i)) { elt(i)->pushBackNAValues(-inc);}
222 for (int i=pos+1; i <=this->lastIdxCols(); i++)
223 if (elt(i)) { elt(i)->pushBackNAValues(-inc);}
224 // update LastVe
225 incLastIdxRows(-inc);
226 }
227}
228
229/* Merge this with a dataframe (horizontally). */
231{
232 // List1D
233 pushBack();
234 back() = V;
235 // update horizontal range (the number of column)
237 // adjust the first index of the inserted variable
239 // update rows with NA values
240 int inc = sizeRows() - V->size();
241 if (inc == 0) return; // same size
242 if (inc > 0) //V has less rows
243 { // put NA values to the inserted columns
245 }
246 else
247 { // put NA values to the oter columns
248 for (int i=this->begin(); i <lastIdxCols(); i++)
249 if (elt(i)) { elt(i)->pushBackNAValues(-inc);}
250 // update LastVe
251 incLastIdxRows(-inc);
252 }
253}
254
255/* merge this DataFrame with other. */
257{
258 if (other.empty()) return;
259 // is this structure just a pointer?
260 if (this->isRef())
261 { STKRUNTIME_ERROR_NO_ARG(DataFrame::merge(other),*this is a reference.);}
262 // is T just a pointer?
263 if (other.isRef())
265 // break const reference
266 DataFrame& otherRef = const_cast<DataFrame&>(other);
267 // update rows with NA values
268 int pos(lastIdx()+1), inc = sizeRows() - otherRef.sizeRows();
269
270 otherRef.shift(beginRows(), beginCols());
272
273 if (inc == 0) return; // same size
274
275 if (inc > 0) // other has less rows
276 { // put NA values to the merged columns
277 for (int i= lastIdx(); i >=pos; i--)
278 if (elt(i)) { elt(i)->pushBackNAValues(inc);}
279 }
280 else
281 { // put NA values to the existing columns
282 for (int i=this->begin(); i <pos; i++)
283 if (elt(i)) { elt(i)->pushBackNAValues(-inc);}
284
285 // update LastVe
286 incLastIdxRows(-inc);
287 }
288
289}
290
291/* Insert the DatatFrame D elt the column pos to the container. */
292void DataFrame::insertDataFrame( int pos, const DataFrame& D)
293{
294 if (D.empty()) return;
295 // is this structure just a pointer?
296 if (this->isRef())
298 if (begin()>pos)
300 if (lastIdx()+1<pos)
302 // List1D
303 insertElt(pos, D.sizeCols());
304 // insert all columns of D
305 for (int i = D.beginCols(), icol = pos; i <=D.lastIdxCols(); i++, icol++)
306 {
307 if (D.elt(i))
308 {
309 elt(icol) = D.elt(i)->clone();
310 elt(icol)->shift(beginRows());
311 }
312 else { elt(icol) = 0;}
313 }
314 // update LastHo
316 // update rows with NA values
317 int inc = sizeRows() - D.sizeRows();
318 if (inc == 0) return; // same size
319 if (inc > 0) // D has less rows
320 { // put NA values to the inserted columns
321 for (int i= pos+D.sizeCols()-1; i >=pos; i--)
322 if (elt(i)) { elt(i)->pushBackNAValues(inc);}
323 }
324 else
325 { // put NA values to the oter columns
326 for (int i=this->beginCols(); i <pos; i++)
327 if (elt(i)) { elt(i)->pushBackNAValues(-inc);}
328 for (int i=pos+D.sizeCols(); i <=this->lastIdx(); i++)
329 if (elt(i)) { elt(i)->pushBackNAValues(-inc);}
330 // update LastVe
331 incLastIdxRows(-inc);
332 }
333}
334
335/* Merge this with a dataframe (horizontally).
336*/
338{
339 if (D.empty()) return;
340 // is this structure just a pointer?
341 if (this->isRef())
343 // compute pos
344 int pos(lastIdx()+1);
345 // List1D
346 pushBack(D.sizeCols());
347 // insert all columns of D
348 for (int i = D.begin(), icol = pos; i <=D.lastIdx(); i++, icol++)
349 {
350 if (D.elt(i))
351 {
352 elt(icol) = D.elt(i)->clone();
353 elt(icol)->shift(beginRows());
354 }
355 }
356 // update LastHo
358 // update rows with NA values
359 int inc = sizeRows() - D.sizeRows();
360 if (inc == 0) return; // same size
361 if (inc > 0) // D has less rows
362 { // put NA values to the inserted columns
363 for (int i= lastIdx(); i >=pos; i--)
364 if (elt(i)) { elt(i)->pushBackNAValues(inc);}
365 }
366 else
367 { // put NA values to the oter columns
368 for (int i=this->begin(); i <pos; i++)
369 if (elt(i)) { elt(i)->pushBackNAValues(-inc);}
370 // update LastVe
371 incLastIdxRows(-inc);
372 }
373}
374
375/* Add columns to the container. */
376void DataFrame::pushBackCols(int const& n)
377{
378 // if n<=0 nothing to do
379 if (n <= 0) return;
380 // add n columns to list1D
381 insert(Range(lastIdx()+1, n), 0);
382 // update IContainer2D
384}
385
386/* Insert columns elt the specified position to the container. */
387void DataFrame::insertCols( int pos, int const& n)
388{
389 if (n <= 0) return; // if n<=0 nothing to do
390 // check conditions
391 if (pos<begin())
393 if (pos>lastIdx())
394 { STKOUT_OF_RANGE_2ARG(Dataframe::insertCols,pos, n,pos>lastIdx());}
395 // insert n elements in list1D
396 insert(Range(pos, n), 0);
397 // update IContainer2D
399}
400
401/* Add n rows to the container. */
402void DataFrame::pushBackRows(int const& n)
403{
404 // if n<=0 nothing to do
405 if (n<=0) return;
406 // for each column append row
407 for (int j=begin(); j<=lastIdx(); j++)
408 {
409 if (elt(j)) { elt(j)->pushBack(n);}
410 }
411 // update range of the container
412 incRangeRows(n);
413}
414
415/* Insert n rows elt the ith position of the container. */
416void DataFrame::insertRows( int pos, int const& n)
417{
418 // if n<=0 nothing to do
419 if (n<=0) return;
420 if (beginRows() > pos)
422 if (lastIdxRows()+1 < pos)
424 // insert rows to each variables
425 for (int j=begin(); j<=lastIdx(); j++)
426 if (elt(j)) { elt(j)->insertElt(pos, n);}
427 // update rows_
429}
430
431/* Dell last row of the container. */
432void DataFrame::popBackRows(int const& n)
433{
434 if (sizeRows() < n)
436 // del last row to each variable
437 for (int j=begin(); j<=lastIdx(); j++)
438 if (elt(j)) { elt(j)->popBack(n);}
440}
441
442/* Dell n rows elt the ith position to the container. */
443void DataFrame::eraseRows( int pos, int const& n)
444{
445 // if n<=0 nothing to do
446 if (n<=0) return;
447 if (beginRows() > pos)
449 if (lastIdxRows() < pos)
451 if (lastIdxRows() < pos+n-1)
453 // for each variable erase elts
454 for (int j=begin(); j<=lastIdx(); j++)
455 if (elt(j)) { elt(j)->erase(pos, n);}
456 // update rows_
458}
459
460/* Protected function for memory deallocation. */
462{
463 if (isRef()) return;
464 // liberate variables
465 freeCols();
467 setRows();
468 setCols();
469}
470
471/* Protected function for rows memory deallocation.*/
473{
474 if (isRef()) return;
475 // for all columns
476 for (int j=begin(); j<=lastIdx(); j++)
477 if (elt(j)) // if there is mem allocated
478 { delete elt(j); elt(j) = 0;}
479 // set default range
480 setRows();
481}
482
483// write a selection
484void DataFrame::writeDataFrame( ostream& os, int const& left
485 , int const& right
486 ) const
487{
488 // Export to csv the DataFrame
489 ExportToCsv csv(*this);
490 // get the csv
491 ReadWriteCsv* pData = csv.p_readWriteCsv();
492 // set delimiters to blank
493 pData->setDelimiters(_T(" "));
494 // write the csv
495 pData->writeSelection(os, beginRows(), lastIdxRows(), left, right);
496}
497
498/* Print a DataFrame. */
500{
501 s << std::right;
502 V.writeDataFrame(s, V.beginCols(), V.lastIdxCols());
503 return s;
504}
505
506} // namespace STK
#define STKOUT_OF_RANGE_1ARG(Where, Arg, Error)
Definition STK_Macros.h:93
#define STKRUNTIME_ERROR_1ARG(Where, Arg, Error)
Definition STK_Macros.h:129
#define STKOUT_OF_RANGE_2ARG(Where, Arg1, Arg2, Error)
Definition STK_Macros.h:102
#define STKRUNTIME_ERROR_NO_ARG(Where, Error)
Definition STK_Macros.h:138
#define STKRUNTIME_ERROR_2ARG(Where, Arg1, Arg2, Error)
Definition STK_Macros.h:120
#define _T(x)
Let x unmodified.
DataFrame is a List of Variable with the same number of rows.
void eraseRows(int pos, int const &n=1)
Delete n rows at the position pos to the container.
bool empty() const
int beginRows() const
void popBackCols(int const &n)
Delete n columns of the container.
int sizeCols() const
void insertRows(int pos, int const &n=1)
Insert n rows at the ith position of the container.
int beginCols() const
IVariable *& elt(int const &i)
access to an element.
void clear()
Clear the object.
void shift(int const &cbeg=1)
New beginning index for the object.
void merge(DataFrame const &D)
merge this DataFrame with D.
void pushBackCols(int const &n=1)
Add columns to the container.
virtual ~DataFrame()
Destructor.
void pushBackVariable(IVariable *const &V)
Append a DataFrame back.
RowRange const & rows() const
void insertDataFrame(int pos, DataFrame const &D)
Insert a DataFrame at the specified position to the container.
void insertCols(int pos, int const &n=1)
Insert columns at the specified position to the container.
DataFrame & operator=(DataFrame const &T)
Operator = : overwrite the DataFrame with T.
int lastIdxRows() const
void popBackRows(int const &n)
Dell last rows of the container.
void freeCols()
function for row memory deallocation.
void eraseCols(int pos, int const &n=1)
Delete n columns at the position of the container.
int lastIdxCols() const
int sizeRows() const
void freeMem()
function for memory deallocation.
void insertVariable(int pos, IVariable *const &V)
Insert a Vartiable at the specified position to the container.
void pushBackRows(int const &n=1)
Add n rows to the container.
void resize(Range const &I, Range const &J)
resize the container:
DataFrame()
Default Constructor, empty table.
void writeDataFrame(ostream &os, int const &left, int const &right) const
write a DataFrame to the output stream os.
ColRange cols() const
void pushBackDataFrame(DataFrame const &D)
Append a DataFrame back.
Export data to a Csv.
void shiftRows(int beg)
Set the first index of the rows.
void shiftCols(int beg)
Shift the columns first index to beg.
void setRows(RowRange const &I=RowRange())
Set the range of the number of rows.
void decLastIdxRows(int dec)
Decrement the end of the number of rows.
void incRangeRows(int inc)
Increment the range of the number of rows.
void setRanges(RowRange const &I=RowRange(), ColRange const &J=ColRange())
Set the ranges of the container.
void decLastIdxCols(int dec)
Decrement the last index of the columns.
void incLastIdxRows(int inc)
Increment the end of the number of rows.
void setCols(ColRange const &J=ColRange())
Set the columns range.
void incLastIdxCols(int inc)
Increment the last index of the columns.
Derived * clone() const
create a leaf using the copy constructor of the Derived class.
Derived & resize(Range const &I=RowRange())
void setRange(RowRange const &I=RowRange())
Set range of the rows of the container.
IVariable is an Interface class for all Variables classes.
virtual void popBack(int n=1)=0
Delete n last elements of the container.
virtual void shift(int beg)=0
New first index for the object.
virtual void pushBackNAValues(int n=1)=0
push back n NA values.
virtual void insertElt(int pos, int n=1)=0
Insert n elements at the position pos of the container.
virtual void erase(int pos, int n=1)=0
Delete n elements at the pos index from the container.
virtual IVariable * clone(bool ref=false) const =0
virtual void pushBack(int n=1)=0
Add n elements at the end of the container.
virtual int size() const =0
void pushBack(int const &n=1)
Add n Elts to the container.
Definition STK_List1D.h:323
void erase(int pos, int const &n=1)
Delete n elts at the pos index to the container.
Definition STK_List1D.h:475
void merge(List1D const &other)
merge this with other.
Definition STK_List1D.h:379
void insertElt(int pos, int const &n=1)
Insert n elts at the position pos of the container.
Definition STK_List1D.h:414
void insert(Range const &I, Type const &v)
Insert element v in the range I of the List1D.
Definition STK_List1D.h:370
void popBack(int const &n=1)
Delete n last elements of the container.
Definition STK_List1D.h:458
int lastIdx() const
void freeMem()
Protected function for deallocation.
Definition STK_List1D.h:651
The MultidimRegression class allows to regress a multidimensional output variable among a multivariat...
Index sub-vector region: Specialization when the size is unknown.
Definition STK_Range.h:265
ostream & operator<<(ostream &s, const CAllocator< Type, SizeRows_, SizeCols_, Orient_ > &V)
output stream for CAllocator.
Definition STK_CArray.h:221
std::basic_ostream< Char > ostream
ostream for Char
Definition STK_Stream.h:57
The namespace STK is the main domain space of the Statistical ToolKit project.
TRange< UnknownSize > Range
Definition STK_Range.h:59
bool isRef() const