• Home
  • History
  • Annotate
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1  // This file is part of Eigen, a lightweight C++ template library
2  // for linear algebra.
3  //
4  // Copyright (C) 2010 Gael Guennebaud <gael.guennebaud@inria.fr>
5  //
6  // This Source Code Form is subject to the terms of the Mozilla
7  // Public License v. 2.0. If a copy of the MPL was not distributed
8  // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9  
10  #ifndef EIGEN_PARALLELIZER_H
11  #define EIGEN_PARALLELIZER_H
12  
13  namespace Eigen {
14  
15  namespace internal {
16  
17  /** \internal */
manage_multi_threading(Action action,int * v)18  inline void manage_multi_threading(Action action, int* v)
19  {
20    static EIGEN_UNUSED int m_maxThreads = -1;
21  
22    if(action==SetAction)
23    {
24      eigen_internal_assert(v!=0);
25      m_maxThreads = *v;
26    }
27    else if(action==GetAction)
28    {
29      eigen_internal_assert(v!=0);
30      #ifdef EIGEN_HAS_OPENMP
31      if(m_maxThreads>0)
32        *v = m_maxThreads;
33      else
34        *v = omp_get_max_threads();
35      #else
36      *v = 1;
37      #endif
38    }
39    else
40    {
41      eigen_internal_assert(false);
42    }
43  }
44  
45  }
46  
47  /** Must be call first when calling Eigen from multiple threads */
initParallel()48  inline void initParallel()
49  {
50    int nbt;
51    internal::manage_multi_threading(GetAction, &nbt);
52    std::ptrdiff_t l1, l2;
53    internal::manage_caching_sizes(GetAction, &l1, &l2);
54  }
55  
56  /** \returns the max number of threads reserved for Eigen
57    * \sa setNbThreads */
nbThreads()58  inline int nbThreads()
59  {
60    int ret;
61    internal::manage_multi_threading(GetAction, &ret);
62    return ret;
63  }
64  
65  /** Sets the max number of threads reserved for Eigen
66    * \sa nbThreads */
setNbThreads(int v)67  inline void setNbThreads(int v)
68  {
69    internal::manage_multi_threading(SetAction, &v);
70  }
71  
72  namespace internal {
73  
74  template<typename Index> struct GemmParallelInfo
75  {
GemmParallelInfoGemmParallelInfo76    GemmParallelInfo() : sync(-1), users(0), rhs_start(0), rhs_length(0) {}
77  
78    int volatile sync;
79    int volatile users;
80  
81    Index rhs_start;
82    Index rhs_length;
83  };
84  
85  template<bool Condition, typename Functor, typename Index>
parallelize_gemm(const Functor & func,Index rows,Index cols,bool transpose)86  void parallelize_gemm(const Functor& func, Index rows, Index cols, bool transpose)
87  {
88    // TODO when EIGEN_USE_BLAS is defined,
89    // we should still enable OMP for other scalar types
90  #if !(defined (EIGEN_HAS_OPENMP)) || defined (EIGEN_USE_BLAS)
91    // FIXME the transpose variable is only needed to properly split
92    // the matrix product when multithreading is enabled. This is a temporary
93    // fix to support row-major destination matrices. This whole
94    // parallelizer mechanism has to be redisigned anyway.
95    EIGEN_UNUSED_VARIABLE(transpose);
96    func(0,rows, 0,cols);
97  #else
98  
99    // Dynamically check whether we should enable or disable OpenMP.
100    // The conditions are:
101    // - the max number of threads we can create is greater than 1
102    // - we are not already in a parallel code
103    // - the sizes are large enough
104  
105    // 1- are we already in a parallel session?
106    // FIXME omp_get_num_threads()>1 only works for openmp, what if the user does not use openmp?
107    if((!Condition) || (omp_get_num_threads()>1))
108      return func(0,rows, 0,cols);
109  
110    Index size = transpose ? cols : rows;
111  
112    // 2- compute the maximal number of threads from the size of the product:
113    // FIXME this has to be fine tuned
114    Index max_threads = std::max<Index>(1,size / 32);
115  
116    // 3 - compute the number of threads we are going to use
117    Index threads = std::min<Index>(nbThreads(), max_threads);
118  
119    if(threads==1)
120      return func(0,rows, 0,cols);
121  
122    Eigen::initParallel();
123    func.initParallelSession();
124  
125    if(transpose)
126      std::swap(rows,cols);
127  
128    Index blockCols = (cols / threads) & ~Index(0x3);
129    Index blockRows = (rows / threads) & ~Index(0x7);
130  
131    GemmParallelInfo<Index>* info = new GemmParallelInfo<Index>[threads];
132  
133    #pragma omp parallel for schedule(static,1) num_threads(threads)
134    for(Index i=0; i<threads; ++i)
135    {
136      Index r0 = i*blockRows;
137      Index actualBlockRows = (i+1==threads) ? rows-r0 : blockRows;
138  
139      Index c0 = i*blockCols;
140      Index actualBlockCols = (i+1==threads) ? cols-c0 : blockCols;
141  
142      info[i].rhs_start = c0;
143      info[i].rhs_length = actualBlockCols;
144  
145      if(transpose)
146        func(0, cols, r0, actualBlockRows, info);
147      else
148        func(r0, actualBlockRows, 0,cols, info);
149    }
150  
151    delete[] info;
152  #endif
153  }
154  
155  } // end namespace internal
156  
157  } // end namespace Eigen
158  
159  #endif // EIGEN_PARALLELIZER_H
160