STK++ 0.9.13
STK::hidden::PanelBlockProduct< Lhs, Rhs, Result > Struct Template Reference

Methods to use for C=AB with A divided in panels and B divided in blocks. More...

#include <STK_ArrayByArrayProduct.h>

Public Types

typedef Result::Type Type
 
typedef hidden::MultImpl< TypeCmult
 
typedef hidden::MultCoefImpl< Lhs, Rhs, Result > MultCoeff
 

Static Public Member Functions

static void run (Lhs const &lhs, Rhs const &rhs, Result &res)
 Main method for Matrices multiplication implementation.
 
template<class SubLhs , class SubRhs >
static void multPanelByBlock (SubLhs const &lhs, SubRhs const &rhs, Result &res)
 Default dimension.
 
template<class SubLhs , class SubRhs >
static void multPanelByBlockPart (SubLhs const &lhs, SubRhs const &rhs, Result &res)
 Default dimension.
 

Detailed Description

template<typename Lhs, typename Rhs, typename Result>
struct STK::hidden::PanelBlockProduct< Lhs, Rhs, Result >

Methods to use for C=AB with A divided in panels and B divided in blocks.

The structure PanelBlockProduct contains only static methods and typedef and should normally not be used directly.

See also
PanelByBlock

Definition at line 458 of file STK_ArrayByArrayProduct.h.

Member Typedef Documentation

◆ Cmult

Definition at line 461 of file STK_ArrayByArrayProduct.h.

◆ MultCoeff

template<typename Lhs , typename Rhs , typename Result >
typedef hidden::MultCoefImpl<Lhs, Rhs, Result> STK::hidden::PanelBlockProduct< Lhs, Rhs, Result >::MultCoeff

Definition at line 462 of file STK_ArrayByArrayProduct.h.

◆ Type

template<typename Lhs , typename Rhs , typename Result >
typedef Result::Type STK::hidden::PanelBlockProduct< Lhs, Rhs, Result >::Type

Definition at line 460 of file STK_ArrayByArrayProduct.h.

Member Function Documentation

◆ multPanelByBlock()

template<typename Lhs , typename Rhs , typename Result >
template<class SubLhs , class SubRhs >
static void STK::hidden::PanelBlockProduct< Lhs, Rhs, Result >::multPanelByBlock ( SubLhs const lhs,
SubRhs const rhs,
Result &  res 
)
inlinestatic

Default dimension.

Definition at line 529 of file STK_ArrayByArrayProduct.h.

530 {
531 int const k= lhs.beginCols();
532 for (int i=lhs.beginRows(); i<lhs.endRows(); ++i)
533 {
534 int j = rhs.beginCols();
535 res.elt(i,j) += lhs(i, k ) * rhs(k , j) + lhs(i, k+1) * rhs(k+1, j)
536 + lhs(i, k+2) * rhs(k+2, j) + lhs(i, k+3) * rhs(k+3, j);
537 ++j;
538 res.elt(i,j) += lhs(i, k ) * rhs(k , j) + lhs(i, k+1) * rhs(k+1, j)
539 + lhs(i, k+2) * rhs(k+2, j) + lhs(i, k+3) * rhs(k+3, j);
540 ++j;
541 res.elt(i,j) += lhs(i, k ) * rhs(k , j) + lhs(i, k+1) * rhs(k+1, j)
542 + lhs(i, k+2) * rhs(k+2, j) + lhs(i, k+3) * rhs(k+3, j);
543 ++j;
544 res.elt(i,j) += lhs(i, k ) * rhs(k , j) + lhs(i, k+1) * rhs(k+1, j)
545 + lhs(i, k+2) * rhs(k+2, j) + lhs(i, k+3) * rhs(k+3, j);
546 }
547 }

Referenced by STK::hidden::PanelBlockProduct< Lhs, Rhs, Result >::run().

◆ multPanelByBlockPart()

template<typename Lhs , typename Rhs , typename Result >
template<class SubLhs , class SubRhs >
static void STK::hidden::PanelBlockProduct< Lhs, Rhs, Result >::multPanelByBlockPart ( SubLhs const lhs,
SubRhs const rhs,
Result &  res 
)
inlinestatic

Default dimension.

Definition at line 550 of file STK_ArrayByArrayProduct.h.

551 {
552 const int k= lhs.beginCols();
553 for (int i=lhs.beginRows(); i<lhs.endRows(); ++i)
554 for (int j=rhs.beginCols(); j<rhs.endCols(); ++j)
555 res.elt(i,j) += lhs(i, k ) * rhs(k , j) + lhs(i, k+1) * rhs(k+1, j)
556 + lhs(i, k+2) * rhs(k+2, j) + lhs(i, k+3) * rhs(k+3, j);
557 }

Referenced by STK::hidden::PanelBlockProduct< Lhs, Rhs, Result >::run().

◆ run()

template<typename Lhs , typename Rhs , typename Result >
static void STK::hidden::PanelBlockProduct< Lhs, Rhs, Result >::run ( Lhs const lhs,
Rhs const rhs,
Result &  res 
)
inlinestatic

Main method for Matrices multiplication implementation.

Note
res have been resized and initialized to zero outside this method.

Definition at line 467 of file STK_ArrayByArrayProduct.h.

468 {
469#ifdef STK_ARRAYS_DEBUG
470 stk_cout << _T("Entering PanelBlockProduct::run()\n");
471#endif
472 // compute dimensions
473 int nbInnerLoop = lhs.sizeCols()/blockSize_; // = rhs.sizeRows()/blockSize_;
474 int nbBlocks = rhs.sizeCols()/blockSize_;
475 int nbPanels = lhs.sizeRows()/panelSize_;
476
477 // remaining sizes in the matrices
478 int bSize = rhs.sizeCols() - blockSize_ * nbBlocks;
479 int pSize = lhs.sizeRows() - panelSize_ * nbPanels;
480 int tSize = lhs.sizeCols() - blockSize_ * nbInnerLoop;
481
482 // index of the remaining positions
483 int kLastPos = lhs.beginCols() + blockSize_ * nbInnerLoop;
484
485 // start blocks by panel
486 for (int k = 0; k<nbInnerLoop; ++k)
487 {
488 TRange<blockSize_> innerRange(lhs.beginCols() + k * blockSize_, blockSize_);
489#ifdef _OPENMP
490#pragma omp parallel for
491#endif
492 for (int j = 0; j<nbBlocks; ++j)
493 {
494 TRange<blockSize_> colRange(rhs.beginCols() + j * blockSize_, blockSize_);
495 for (int i = 0; i<nbPanels; ++i)
496 {
497 TRange<panelSize_> rowRange(lhs.beginRows() + i * panelSize_, panelSize_);
498 multPanelByBlock( lhs.sub(rowRange, innerRange), rhs.sub(innerRange, colRange), res);
499 }
500 // remaining incomplete
501 Range rowRange(lhs.beginRows() + nbPanels * panelSize_, pSize);
502 multPanelByBlock( lhs.sub(rowRange, innerRange), rhs.sub(innerRange, colRange), res);
503 }
504 Range colRange(rhs.beginCols() + nbBlocks * blockSize_, bSize);
505#ifdef _OPENMP
506#pragma omp parallel for
507#endif
508 for (int i = 0; i<nbPanels; ++i)
509 {
510 TRange<panelSize_> rowRange(lhs.beginRows() + i * panelSize_, panelSize_);
511 multPanelByBlockPart( lhs.sub(rowRange, innerRange), rhs.sub(innerRange, colRange), res);
512 }
513 // remaining incomplete panels
514 Range rowRange(lhs.beginRows() + nbPanels * panelSize_, pSize);
515 multPanelByBlockPart( lhs.sub(rowRange, innerRange), rhs.sub(innerRange, colRange), res);
516 } // InnerLoop
517 // treat the remaining rows/columns not in outer loop k
518 switch (tSize)
519 {
520 case 0: break;
521 case 1: MultCoeff::mult1Outer(lhs, rhs, res, kLastPos); break;
522 case 2: MultCoeff::mult2Outer(lhs, rhs, res, kLastPos); break;
523 case 3: MultCoeff::mult3Outer(lhs, rhs, res, kLastPos); break;
524 default:break;
525 }
526 }
#define stk_cout
Standard stk output stream.
#define _T(x)
Let x unmodified.
const int panelSize_
const int blockSize_
TRange< UnknownSize > Range
Definition STK_Range.h:59
static void mult2Outer(Lhs const &lhs, Rhs const &rhs, Result &res, int k)
multiplication with two outer rows/columns
static void mult1Outer(Lhs const &lhs, Rhs const &rhs, Result &res, int k)
multiplication with one outer rows/columns
static void mult3Outer(Lhs const &lhs, Rhs const &rhs, Result &res, int k)
multiplication with three outer rows/columns
static void multPanelByBlockPart(SubLhs const &lhs, SubRhs const &rhs, Result &res)
Default dimension.
static void multPanelByBlock(SubLhs const &lhs, SubRhs const &rhs, Result &res)
Default dimension.

References _T, STK::blockSize_, STK::hidden::MultCoefImpl< Lhs, Rhs, Result >::mult1Outer(), STK::hidden::MultCoefImpl< Lhs, Rhs, Result >::mult2Outer(), STK::hidden::MultCoefImpl< Lhs, Rhs, Result >::mult3Outer(), STK::hidden::PanelBlockProduct< Lhs, Rhs, Result >::multPanelByBlock(), STK::hidden::PanelBlockProduct< Lhs, Rhs, Result >::multPanelByBlockPart(), STK::panelSize_, and stk_cout.


The documentation for this struct was generated from the following file: