GetFEM  5.5
bgeot_sparse_tensors.h
Go to the documentation of this file.
1 /* -*- c++ -*- (enables emacs c++ mode) */
2 /*===========================================================================
3 
4  Copyright (C) 2000-2026 Julien Pommier
5 
6  This file is a part of GetFEM
7 
8  GetFEM is free software; you can redistribute it and/or modify it
9  under the terms of the GNU Lesser General Public License as published
10  by the Free Software Foundation; either version 3 of the License, or
11  (at your option) any later version along with the GCC Runtime Library
12  Exception either version 3.1 or (at your option) any later version.
13  This program is distributed in the hope that it will be useful, but
14  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15  or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
16  License and GCC Runtime Library Exception for more details.
17  You should have received a copy of the GNU Lesser General Public License
18  along with this program. If not, see https://www.gnu.org/licenses/.
19 
20  As a special exception, you may use this file as it is a part of a free
21  software library without restriction. Specifically, if other files
22  instantiate templates or use macros or inline functions from this file,
23  or you compile this file and link it with other files to produce an
24  executable, this file does not by itself cause the resulting executable
25  to be covered by the GNU Lesser General Public License. This exception
26  does not however invalidate any other reasons why the executable file
27  might be covered by the GNU Lesser General Public License.
28 
29 ===========================================================================*/
30 
31 /**@file bgeot_sparse_tensors.h
32  @author Julien Pommier <Julien.Pommier@insa-toulouse.fr>
33  @date January 2003.
34  @brief Sparse tensors, used during the assembly.
35 
36  "sparse" tensors: these are not handled like sparse matrices
37 
38  As an example, let say that we have a tensor t(i,j,k,l) of
39  dimensions 4x2x3x3, with t(i,j,k,l!=k) == 0.
40 
41  Then the tensor shape will be represented by a set of 3 objects of type
42  'tensor_mask':
43  mask1: {i}, "1111"
44  mask2: {j}, "11"
45  mask3: {k,l}, "100"
46  "010"
47  "001"
48  They contain a binary tensor indicating the non-null elements.
49 
50  The set of these three masks define the shape of the tensor
51  (class tensor_shape)
52 
53  If we add information about the location of the non-null elements
54  (by mean of strides), then we have an object of type 'tensor_ref'
55 
56  Iteration on the data of one or more tensor should be done via the
57  'multi_tensor_iterator', which can iterate over common non-null
58  elements of a set of tensors.
59 
60 
61  maximum (virtual) number of elements in a tensor : 2^31
62  maximum number of dimensions : 254
63 
64  "ought to be enough for anybody"
65 */
66 #ifndef BGEOT_SPARSE_TENSORS
67 #define BGEOT_SPARSE_TENSORS
68 
69 #include "gmm/gmm_except.h"
70 #include "bgeot_config.h"
71 #include "dal_bit_vector.h"
72 // #include "gmm/gmm_kernel.h" // for i/o on vectors it is convenient
73 #include <iostream>
74 #include <bitset>
75 
76 namespace bgeot {
77  typedef gmm::uint32_type index_type;
78  typedef gmm::int32_type stride_type; /* signé! */
79 
80  // typedef std::vector<index_type> tensor_ranges;
81  class tensor_ranges : public std::vector<index_type> {
82  public:
83  tensor_ranges() : std::vector<index_type>() {}
84  tensor_ranges(size_type n) : std::vector<index_type>(n) {}
85  tensor_ranges(size_type n, index_type V) : std::vector<index_type>(n,V) {}
86  bool is_zero_size() const
87  {
88  for (dim_type i=0; i < this->size(); ++i)
89  if ((*this)[i] == 0)
90  return true;
91  return false;
92  }
93  };
94  typedef std::vector<stride_type> tensor_strides;
95  typedef std::vector<dim_type> index_set;
96 
97  typedef scalar_type * TDIter;
98 
99  std::ostream& operator<<(std::ostream& o, const tensor_ranges& r);
100 
101  /* stupid && inefficient loop structure */
102  struct tensor_ranges_loop {
103  tensor_ranges sz;
104  tensor_ranges cnt;
105  bool finished_;
106  public:
107  tensor_ranges_loop(const tensor_ranges& t) : sz(t), cnt(t.size()), finished_(t.size() == 0) {
108  std::fill(cnt.begin(), cnt.end(), 0);
109  }
110  index_type index(dim_type i) { return cnt[i]; }
111  bool finished() const { return finished_; }
112  bool next() {
113  index_type i = 0;
114  while (++cnt[i] >= sz[i]) {
115  cnt[i] = 0; i++; if (i >= sz.size()) { finished_ = true; break; }
116  }
117  return finished_;
118  }
119  };
120 
121  /* handle a binary mask over a given number of indices */
122  class tensor_mask {
123  tensor_ranges r;
124  index_set idxs;
125  std::vector<bool> m;
126  tensor_strides s; /* strides in m */
127  mutable index_type card_; /* finally i should have kept m as a dal::bit_vector ... */
128  mutable bool card_uptodate;
129  public:
130  tensor_mask() { set_card(0); }
131  explicit tensor_mask(const tensor_ranges& r_, const index_set& idxs_) {
132  assign(r_,idxs_);
133  }
134  /* constructeur par fusion */
135  explicit tensor_mask(const tensor_mask& tm1, const tensor_mask& tm2, bool and_op);
136  explicit tensor_mask(const std::vector<const tensor_mask*>& tm);
137  explicit tensor_mask(const std::vector<const tensor_mask*> tm1,
138  const std::vector<const tensor_mask*> tm2, bool and_op);
139  void swap(tensor_mask &tm) {
140  r.swap(tm.r); idxs.swap(tm.idxs);
141  m.swap(tm.m); s.swap(tm.s);
142  std::swap(card_, tm.card_);
143  std::swap(card_uptodate, tm.card_uptodate);
144  }
145  void assign(const tensor_ranges& r_, const index_set& idxs_) {
146  r = r_; idxs = idxs_; eval_strides(); m.assign(size(),false);
147  set_card(0);
148  }
149  void assign(const tensor_mask& tm) {
150  r = tm.r;
151  idxs = tm.idxs;
152  m = tm.m;
153  s = tm.s;
154  card_ = tm.card_; card_uptodate = tm.card_uptodate;
155  }
156  void assign(const std::vector<const tensor_mask* >& tm);
157  void assign(const tensor_mask& tm1, const tensor_mask& tm2, bool and_op);
158 
159  void clear() { r.resize(0); idxs.resize(0); m.clear(); s.resize(0); set_card(0); }
160  const tensor_ranges& ranges() const { return r; }
161  const index_set& indexes() const { return idxs; }
162  const tensor_strides& strides() const { return s; }
163  index_set& indexes() { return idxs; }
164  void eval_strides() {
165  s.resize(r.size()+1); s[0]=1;
166  for (index_type i=0; i < r.size(); ++i) {
167  s[i+1]=s[i]*r[i];
168  }
169  }
170  index_type ndim() const { return index_type(r.size()); }
171  index_type size() const { return s[r.size()]; }
172  void set_card(index_type c) const { card_ = c; card_uptodate = true; }
173  void unset_card() const { card_uptodate = false; }
174  index_type card(bool just_look=false) const {
175  if (!card_uptodate || just_look) {
176  index_type c = index_type(std::count_if(m.begin(), m.end(),
177  [](const auto &x) {return x == true;}));
178  if (just_look) return c;
179  card_ = c;
180  }
181  return card_;
182  }
183  index_type pos(tensor_ranges& global_r) const {
184  index_type p = 0;
185  for (index_type i=0; i < r.size(); ++i)
186  p+= s[i]*global_r[idxs[i]];
187  return p;
188  }
189  index_type lpos(tensor_ranges& local_r) const {
190  index_type p = 0;
191  for (index_type i=0; i < r.size(); ++i)
192  p+= s[i]*local_r[i];
193  return p;
194  }
195  bool operator()(tensor_ranges& global_r) const {
196  return m[pos(global_r)];
197  }
198  bool operator()(stride_type p) const { return m[p]; }
199  void set_mask_val(stride_type p, bool v) { m[p]=v; card_uptodate = false; }
200  struct Slice {
201  dim_type dim;
202  index_type i0;
203  Slice(dim_type d, index_type i0_) : dim(d), i0(i0_) {}
204  };
205 
206  /* cree un masque de tranche */
207  void set_slice(index_type dim, index_type range, index_type islice) {
208  r.resize(1); r[0] = range;
209  idxs.resize(1); idxs[0] = dim_type(dim);
210  m.clear(); m.assign(range,false); m[islice] = 1; set_card(1);
211  eval_strides();
212  }
213  explicit tensor_mask(index_type range, Slice slice) {
214  set_slice(slice.dim, range, slice.i0);
215  }
216 
217  struct Diagonal {
218  dim_type i0, i1;
219  Diagonal(dim_type i0_, dim_type i1_) : i0(i0_), i1(i1_) {}
220  };
221 
222  /* cree un masque diagonal */
223  void set_diagonal(index_type n, index_type i0, index_type i1) {
224  assert(n);
225  r.resize(2); r[0] = r[1] = n;
226  idxs.resize(2); idxs[0] = dim_type(i0); idxs[1] = dim_type(i1);
227  m.assign(n*n, false);
228  for (index_type i=0; i < n; ++i) m[n*i+i]=true;
229  set_card(n);
230  eval_strides();
231  }
232  explicit tensor_mask(index_type n, Diagonal diag) {
233  set_diagonal(n, diag.i0, diag.i1);
234  }
235  void set_triangular(index_type n, index_type i0, index_type i1) {
236  assert(n);
237  r.resize(2); r[0] = r[1] = n;
238  idxs.resize(2); idxs[0] = dim_type(i0); idxs[1] = dim_type(i1);
239  m.assign(n*n,false); unset_card();
240  for (index_type i=0; i < n; ++i)
241  for (index_type j=i; j < n; ++j) m[i*n+j]=true;
242  eval_strides();
243  }
244  void set_full(index_type dim, index_type range) {
245  // assert(range); // not sure if permitting range==0 can have any side effects
246  r.resize(1); r[0] = range;
247  idxs.resize(1); idxs[0] = dim_type(dim);
248  m.assign(range, true); set_card(range);
249  eval_strides();
250  }
251  void set_empty(index_type dim, index_type range) {
252  // assert(range); // not sure if permitting range==0 can have any side effects
253  r.resize(1); r[0] = range;
254  idxs.resize(1); idxs[0] = dim_type(dim);
255  m.assign(range,false); set_card(0);
256  eval_strides();
257  }
258  explicit tensor_mask(index_type dim, index_type range) {
259  set_full(dim, range);
260  }
261  void set_zero() {
262  m.assign(size(),false); set_card(0);
263  }
264  void shift_dim_num_ge(dim_type dim, int shift) {
265  for (dim_type i=0; i < idxs.size(); ++i) {
266  if (idxs[i] >= dim) idxs[i] = dim_type(idxs[i] + shift);
267  }
268  check_assertions();
269  }
270  void gen_mask_pos(tensor_strides& p) const {
271  check_assertions();
272  p.resize(card());
273  index_type i = 0;
274  for (tensor_ranges_loop l(r); !l.finished(); l.next()) {
275  if (m[lpos(l.cnt)]) p[i++] = lpos(l.cnt);
276  }
277  assert(i==card());
278  }
279  void unpack_strides(const tensor_strides& packed, tensor_strides& unpacked) const;
280 
281  /* c'est mieux que celle ci renvoie un int ..
282  ou alors un unsigned mais dim_type c'est dangereux */
283  int max_dim() const {
284  index_set::const_iterator it = std::max_element(idxs.begin(),idxs.end());
285  return (it == idxs.end() ? -1 : *it);
286  }
287  void check_assertions() const;
288  void print(std::ostream &o) const;
289  void print_() const { print(cerr); }
290  };
291 
292 
293 
294  typedef std::vector<tensor_mask> tensor_mask_container;
295 
296  struct tensor_index_to_mask {
297  short_type mask_num;
298  short_type mask_dim;
299  tensor_index_to_mask() : mask_num(short_type(-1)),
300  mask_dim(short_type(-1)) {}
301  bool is_valid() { return mask_num != short_type(-1) &&
302  mask_dim != short_type(-1); }
303  };
304 
305 
306  /*
307  defini une "forme" de tenseur creux
308  la fonction merge permet de faire des unions / intersections entre ces formes
309  */
310  class tensor_shape {
311  mutable std::vector<tensor_index_to_mask> idx2mask;
312  tensor_mask_container masks_;
313 
314  /* verifie si un masque est completement vide,
315  si c'est le cas alors tous les autres masques sont vidés
316  (le tenseur est identiquement nul) */
317  void check_empty_mask() {
318  if (card() == 0) {
319  for (dim_type i=0; i < masks_.size(); ++i) {
320  masks_[i].set_zero();
321  }
322  }
323  }
324 
325  static void find_linked_masks(dim_type mnum, const tensor_shape &ts1, const tensor_shape &ts2,
326  dal::bit_vector& treated1, dal::bit_vector& treated2,
327  std::vector<const tensor_mask*>& lstA,
328  std::vector<const tensor_mask*>& lstB) {
329  // gare aux boucles infinies si aucun des indices n'est valide
330  assert(mnum < ts1.masks().size());
331  assert(!treated1[mnum]);
332  treated1.add(mnum);
333  lstA.push_back(&ts1.mask(mnum));
334  for (dim_type i=0; i < ts1.mask(mnum).indexes().size(); ++i) {
335  dim_type ii = ts1.mask(mnum).indexes()[i];
336  if (ts2.index_is_valid(ii) && !treated2[ts2.index_to_mask_num(ii)])
337  find_linked_masks(ts2.index_to_mask_num(ii),ts2,ts1,treated2,treated1,lstB,lstA);
338  }
339  }
340 
341  protected:
342  dim_type index_to_mask_num(dim_type ii) const {
343  if (index_is_valid(ii))
344  return dim_type(idx2mask[ii].mask_num); else return dim_type(-1);
345  }
346  public:
347  void clear() { masks_.resize(0); idx2mask.resize(0); }
348  void swap(tensor_shape& ts) {
349  idx2mask.swap(ts.idx2mask);
350  masks_.swap(ts.masks_);
351  }
352  dim_type ndim() const { return dim_type(idx2mask.size()); }
353  bool index_is_valid(dim_type ii) const {
354  assert(ii < idx2mask.size()); return idx2mask[ii].is_valid();
355  }
356  const tensor_mask& index_to_mask(dim_type ii) const {
357  assert(index_is_valid(ii)); return masks_[idx2mask[ii].mask_num];
358  }
359  dim_type index_to_mask_dim(dim_type ii) const {
360  assert(index_is_valid(ii)); return dim_type(idx2mask[ii].mask_dim);
361  }
362  index_type dim(dim_type ii) const
363  { assert(index_is_valid(ii)); return index_to_mask(ii).ranges()[index_to_mask_dim(ii)];
364  }
365  tensor_mask_container& masks() { return masks_; }
366  const tensor_mask_container& masks() const { return masks_; }
367  const tensor_mask& mask(dim_type i) const { assert(i<masks_.size()); return masks_[i]; }
368  stride_type card(bool just_look=false) const {
369  stride_type n = 1;
370  for (dim_type i=0; i < masks().size(); ++i)
371  n *= masks()[i].card(just_look);
372  return n;
373  }
374  void push_mask(const tensor_mask& m) { masks_.push_back(m); update_idx2mask(); }
375  void remove_mask(dim_type mdim) { /* be careful with this function.. remove
376  only useless mask ! */
377  masks_.erase(masks_.begin()+mdim);
378  update_idx2mask();
379  }
380  void remove_unused_dimensions() {
381  dim_type nd = 0;
382  for (dim_type i=0; i < ndim(); ++i) {
383  if (index_is_valid(i))
384  masks_[idx2mask[i].mask_num].indexes()[idx2mask[i].mask_dim] = nd++;
385  }
386  set_ndim_noclean(nd);
387  update_idx2mask();
388  }
389 
390  void update_idx2mask() const {
391  /*
392  dim_type N=0;
393  for (dim_type i=0; i < masks_.size(); ++i)
394  N = std::max(N, std::max_element(masks_.indexes().begin(), masks_.indexes.end()));
395  idx2mask.resize(N);
396  */
397 
398  std::fill(idx2mask.begin(), idx2mask.end(), tensor_index_to_mask());
399  for (dim_type i=0; i < masks_.size(); ++i) {
400  for (dim_type j=0; j < masks_[i].indexes().size(); ++j) {
401  dim_type k = masks_[i].indexes()[j];
402  GMM_ASSERT3(k < idx2mask.size() && !idx2mask[k].is_valid(), "");
403  idx2mask[k].mask_num = i; idx2mask[k].mask_dim = j;
404  }
405  }
406  }
407  void assign_shape(const tensor_shape& other) {
408  masks_ = other.masks_;
409  idx2mask = other.idx2mask;
410  // update_idx2mask();
411  }
412  void set_ndim(dim_type n) {
413  clear();
414  idx2mask.resize(n); update_idx2mask();
415  }
416  void set_ndim_noclean(dim_type n) {idx2mask.resize(n);}
417 
418  tensor_shape() {}
419 
420  /* create an "empty" shape of dimensions nd */
421  explicit tensor_shape(dim_type nd) : idx2mask(nd,tensor_index_to_mask()) {
422  masks_.reserve(16);
423  }
424  explicit tensor_shape(const tensor_ranges& r) {
425  masks_.reserve(16);
426  set_full(r);
427  }
428  void set_full(const tensor_ranges& r) {
429  idx2mask.resize(r.size());
430  masks_.resize(r.size());
431  for (dim_type i=0; i < r.size(); ++i) masks_[i].set_full(i,r[i]);
432  update_idx2mask();
433  }
434 
435  void set_empty(const tensor_ranges& r) {
436  idx2mask.resize(r.size());
437  masks_.resize(r.size());
438  for (dim_type i=0; i < r.size(); ++i) masks_[i].set_empty(i,r[i]);
439  update_idx2mask();
440  }
441 
442 
443  /* fusion d'une autre forme */
444  void merge(const tensor_shape &ts2, bool and_op = true) {
445  /* quelques verifs de base */
446  GMM_ASSERT3(ts2.ndim() == ndim(), "");
447  if (ts2.ndim()==0) return; /* c'est un scalaire */
448  for (dim_type i = 0; i < ndim(); ++i)
449  if (index_is_valid(i) && ts2.index_is_valid(i))
450  GMM_ASSERT3(ts2.dim(i) == dim(i), "");
451 
452  tensor_mask_container new_mask;
453  dal::bit_vector mask_treated1; mask_treated1.sup(0,masks().size());
454  dal::bit_vector mask_treated2; mask_treated2.sup(0,ts2.masks().size());
455  std::vector<const tensor_mask*> lstA, lstB; lstA.reserve(10); lstB.reserve(10);
456  for (dim_type i = 0; i < ndim(); ++i) {
457  dim_type i1 = dim_type(index_to_mask_num(i));
458  dim_type i2 = dim_type(ts2.index_to_mask_num(i));
459  lstA.clear(); lstB.clear();
460  if (index_is_valid(i) && !mask_treated1[i1])
461  find_linked_masks(i1, *this, ts2, mask_treated1, mask_treated2,
462  lstA, lstB);
463  else if (ts2.index_is_valid(i) && !mask_treated2[i2])
464  find_linked_masks(i2, ts2, *this, mask_treated2, mask_treated1,
465  lstB, lstA);
466  else continue;
467  GMM_ASSERT3(lstA.size() || lstB.size(), "");
468  new_mask.push_back(tensor_mask(lstA,lstB,and_op));
469  }
470  masks_ = new_mask;
471  update_idx2mask();
472  check_empty_mask();
473  }
474 
475  void shift_dim_num_ge(dim_type dim_num, int shift) {
476  for (dim_type m = 0; m < masks().size(); ++m)
477  masks()[m].shift_dim_num_ge(dim_num,shift);
478  }
479  /* the permutation vector might be greater than the current ndim,
480  in which case some indexes will be unused (when p[i] == dim_type(-1))
481  */
482  void permute(const std::vector<dim_type> p, bool revert=false) {
483  std::vector<dim_type> invp(ndim()); std::fill(invp.begin(), invp.end(), dim_type(-1));
484 
485  /* build the inverse permutation and check that this IS really a permuation */
486  for (dim_type i=0; i < p.size(); ++i) {
487  if (p[i] != dim_type(-1)) {
488  assert(invp[p[i]] == dim_type(-1));
489  invp[p[i]] = i;
490  }
491  }
492  for (dim_type i=0; i < invp.size(); ++i) assert(invp[i] != dim_type(-1));
493 
494  /* do the permutation (quite simple!) */
495  for (dim_type m=0; m < masks().size(); ++m) {
496  for (dim_type i=0; i < masks()[m].indexes().size(); ++i) {
497  if (!revert)
498  masks()[m].indexes()[i] = invp[masks()[m].indexes()[i]];
499  else
500  masks()[m].indexes()[i] = p[masks()[m].indexes()[i]];
501  }
502  }
503  set_ndim_noclean(dim_type(p.size()));
504  update_idx2mask();
505  }
506 
507  /* forme d'une tranche (c'est la forme qu'on applique à un tenseur pour
508  en extraire la tranche) */
509  tensor_shape slice_shape(tensor_mask::Slice slice) const {
510  assert(slice.dim < ndim() && slice.i0 < dim(slice.dim));
511  tensor_shape ts(ndim());
512  ts.push_mask(tensor_mask(dim(slice.dim), slice));
513  ts.merge(*this); /* le masque peut se retrouver brutalement vidé si on a tranché au mauvais endroit! */
514  return ts;
515  }
516 
517  tensor_shape diag_shape(tensor_mask::Diagonal diag) const {
518  assert(diag.i1 != diag.i0 && diag.i0 < ndim() && diag.i1 < ndim());
519  assert(dim(diag.i0) == dim(diag.i1));
520  tensor_shape ts(ndim());
521  ts.push_mask(tensor_mask(dim(diag.i0), diag));
522  ts.merge(*this);
523  return ts;
524  }
525 
526  /*
527  void diag(index_type i0, index_type i1) {
528  assert(i0 < idx.size() && i1 < idx.size());
529  assert(idx[i0].n == idx[i1].n);
530  tensor_shape ts2 = *this;
531  ts2.masks.resize(1);
532  ts2.masks[0].set_diagonal(idx[i0].n, i0, i1);
533  ts2.idx[i0].mask_num = ts2.idx[i1].mask_num = 0;
534  ts2.idx[i0].mask_dim = 0;
535  ts2.idx[i1].mask_dim = 1;
536  }
537  */
538  void print(std::ostream& o) const;
539  void print_() const { print(cerr); }
540  };
541 
542 
543  /* reference to a tensor:
544  - a shape
545  - a data pointer
546  - a set of strides
547  */
548  class tensor_ref : public tensor_shape {
549  std::vector< tensor_strides > strides_;
550  TDIter *pbase_; /* pointeur sur un pointeur qui designe les données
551  ça permet de changer la base pour toute une serie
552  de tensor_ref en un coup */
553 
554  stride_type base_shift_;
555 
556  void remove_mask(dim_type mdim) {
557  tensor_shape::remove_mask(mdim);
558  assert(strides_[mdim].size() == 0 ||
559  (strides_[mdim].size() == 1 && strides_[mdim][0] == 0)); /* sanity check.. */
560  strides_.erase(strides_.begin()+mdim);
561  }
562  public:
563  void swap(tensor_ref& tr) {
564  tensor_shape::swap(tr);
565  strides_.swap(tr.strides_);
566  std::swap(pbase_, tr.pbase_);
567  std::swap(base_shift_, tr.base_shift_);
568  }
569  const std::vector< tensor_strides >& strides() const { return strides_; }
570  std::vector< tensor_strides >& strides() { return strides_; }
571  TDIter base() const { return (pbase_ ? (*pbase_) : 0); }
572  TDIter *pbase() const { return pbase_; }
573  stride_type base_shift() const { return base_shift_; }
574  void set_base(TDIter &new_base) { pbase_ = &new_base; base_shift_ = 0; }
575 
576  void clear() { strides_.resize(0); pbase_ = 0; base_shift_ = 0; tensor_shape::clear(); }
577 
578 
579 
580  /* s'assure que le stride du premier indice est toujours bien égal à zéro */
581  void ensure_0_stride() {
582  for (index_type i=0; i < strides_.size(); ++i) {
583  if (strides_[i].size() >= 1 && strides_[i][0] != 0) {
584  stride_type s = strides_[i][0];
585  base_shift_ += s;
586  for (index_type j=0; j < strides_[i].size(); ++j)
587  strides_[i][j] -= s;
588  }
589  }
590  }
591 
592  /* constructeur à partir d'une forme : ATTENTION ce constructeur n'alloue pas la
593  mémoire nécessaire pour les données !! */
594  explicit tensor_ref(const tensor_shape& ts) : tensor_shape(ts), pbase_(0), base_shift_(0) {
595  strides_.reserve(16);
596  init_strides();
597  }
598  explicit tensor_ref(const tensor_ranges& r, TDIter *pbase__=0)
599  : tensor_shape(r), pbase_(pbase__), base_shift_(0) {
600  strides_.reserve(16);
601  init_strides();
602  }
603  void init_strides() {
604  strides_.resize(masks().size());
605  stride_type s = 1;
606  for (dim_type i = 0; i < strides_.size(); ++i) {
607  index_type n = mask(i).card();
608  strides_[i].resize(n);
609  for (index_type j=0;j<n;++j)
610  strides_[i][j] = j*s;
611  s *= n;
612  }
613  }
614  tensor_ref() : pbase_(0), base_shift_(0) { strides_.reserve(16); }
615 
616  void set_sub_tensor(const tensor_ref& tr, const tensor_shape& sub);
617 
618  /* constructeur à partir d'un sous-tenseur à partir d'un tenseur et d'une forme
619  hypothese: la forme 'sub' doit être un sous-ensemble de la forme du tenseur
620  */
621  explicit tensor_ref(const tensor_ref& tr, const tensor_shape& sub) {
622  set_sub_tensor(tr,sub);
623  }
624 
625  /* slices a tensor_ref, at dimension 'dim', position 'islice'
626  ... not straightforward for sparse tensors !
627  */
628  explicit tensor_ref(const tensor_ref& tr, tensor_mask::Slice slice);
629 
630  /* create a diagonal of another tensor */
631  explicit tensor_ref(const tensor_ref& tr, tensor_mask::Diagonal diag) {
632  set_sub_tensor(tr, tr.diag_shape(diag));
633  ensure_0_stride();
634  }
635 
636  void print(std::ostream& o) const;
637 
638  void print_() const { print(cerr); }
639  };
640 
641  std::ostream& operator<<(std::ostream& o, const tensor_mask& m);
642  std::ostream& operator<<(std::ostream& o, const tensor_shape& ts);
643  std::ostream& operator<<(std::ostream& o, const tensor_ref& tr);
644 
645  /* minimalistic data for iterations */
646  struct packed_range {
647  const stride_type *pinc;
648  const stride_type *begin, *end;
649  index_type n;
650  /* index_type cnt;*/
651  };
652  /* additionnal data */
653  struct packed_range_info {
654  index_type range;
655  dim_type original_masknum;
656  dim_type n;
657  std::vector<stride_type> mask_pos; /* pour l'iteration avec maj de la valeur des indices */
658  bool operator<(const packed_range_info& pi) const {
659  if (n < pi.n) return true;
660  else return false;
661  }
662  stride_type mean_increm; /* valeur moyenne de l'increment (utilisé pour le tri) */
663  tensor_strides inc; /* not strides but increments to the next index value,
664  with inc[range-1] == -sum(inc[0..range-2]) (automatic rewinding!)
665  of course, stride_type MUST be signed
666  */
667  std::bitset<32> have_regular_strides;
668  };
669 
670  /* the big one */
671  class multi_tensor_iterator {
672  index_type N; /* number of simultaneous tensors */
673  std::vector<packed_range> pr;
674  std::vector<packed_range_info> pri;
675 
676  std::vector<index_type> bloc_rank;
677  std::vector<index_type> bloc_nelt;
678 
679  std::vector<TDIter> it;
680  std::vector<TDIter*> pit0;
681  tensor_strides itbase;
682  struct index_value_data {
683  dim_type cnt_num;
684  const stride_type **ppinc; /* pointe vers pr[cnt_num].pinc, initialisé par rewind()
685  et pas avant (à cause de pbs lors de la copie de multi_tensor_iterator sinon)
686  permet de déduire la valeur du compteur: (*ppinc - pincbase) (à diviser par nn=(pri[cnt_num].n-N))
687  */
688  const stride_type *pincbase;
689  const stride_type *pposbase; /* pointe dans pri[cnt_num].mask_pos, retrouve la position dans le masque en fonction
690  du compteur déduit ci-dessus et des champs div et mod ci-dessous */
691  index_type div, mod, nn;
692  stride_type pos_; /* stores the position when the indexe is not part of the pri array
693  (hence the index only has 1 value, and ppos == &pos_, and pcnt = &zero */
694  };
695  std::vector<index_value_data> idxval;
696  std::vector<stride_type> vectorized_strides_; /* if the tensor have regular strides, the mti might be vectorizable */
697  index_type vectorized_size_; /* the size of each vectorizable chunk */
698  index_type vectorized_pr_dim; /* all pr[i], i >= vectorized_pr_dim, can be accessed via vectorized_strides */
699  public:
700  void clear() {
701  N = 0; pr.clear(); pri.clear(); bloc_rank.clear(); bloc_nelt.clear();
702  it.clear(); pit0.clear(); itbase.clear(); idxval.clear();
703  }
704  void swap(multi_tensor_iterator& m) {
705  std::swap(N,m.N); pr.swap(m.pr); pri.swap(m.pri);
706  bloc_rank.swap(m.bloc_rank); bloc_nelt.swap(m.bloc_nelt);
707  it.swap(m.it); pit0.swap(m.pit0); itbase.swap(m.itbase);
708  idxval.swap(m.idxval);
709  }
710  void rewind() {
711  for (dim_type i=0; i < pr.size(); ++i) {
712  pr[i].pinc = pr[i].begin = &pri[i].inc[0];
713  pr[i].end = pr[i].begin+pri[i].inc.size();
714  }
715  for (dim_type n=0; n < N; ++n)
716  it[n] = *(pit0[n]) + itbase[n];
717  for (dim_type i=0; i < idxval.size(); ++i) {
718  if (idxval[i].cnt_num != dim_type(-1)) {
719  idxval[i].ppinc = &pr[idxval[i].cnt_num].pinc;
720  idxval[i].pincbase = &pri[idxval[i].cnt_num].inc[0];
721  idxval[i].pposbase = &pri[idxval[i].cnt_num].mask_pos[0];
722  idxval[i].nn = (N-pri[idxval[i].cnt_num].n);
723  } else {
724  static const stride_type *null=0;
725  idxval[i].ppinc = &null;
726  idxval[i].pincbase = 0;
727  idxval[i].pposbase = &idxval[i].pos_;
728  idxval[i].nn = 1;
729  }
730  }
731  }
732  dim_type ndim() const { return dim_type(idxval.size()); }
733  /* get back the value of an index from then current iterator position */
734  index_type index(dim_type ii) {
735  index_value_data& iv = idxval[ii];
736  index_type cnt = index_type((*iv.ppinc - iv.pincbase)/iv.nn);
737  return ((iv.pposbase[cnt]) % iv.mod)/ iv.div;
738  }
739  index_type vectorized_size() const { return vectorized_size_; }
740  const std::vector<stride_type>& vectorized_strides() const { return vectorized_strides_; }
741  bool next(unsigned i_stop = unsigned(-1), unsigned i0_ = unsigned(-2)) {//=pr.size()-1) {
742  unsigned i0 = unsigned(i0_ == unsigned(-2) ? pr.size()-1 : i0_);
743  while (i0 != i_stop) {
744  for (unsigned n = pr[i0].n; n < N; ++n) {
745  // index_type pos = pr[i0].cnt * (N-pri[i0].n) + (n - pri[i0].n);
746  it[n] += *pr[i0].pinc; pr[i0].pinc++;
747  }
748  if (pr[i0].pinc != pr[i0].end) {
749  return true;
750  } else {
751  pr[i0].pinc = pr[i0].begin;
752  i0--;
753  }
754  }
755  return false;
756  }
757  bool vnext() { return next(unsigned(-1), vectorized_pr_dim); }
758  bool bnext(dim_type b) { return next(bloc_rank[b]-1, bloc_rank[b+1]-1); }
759  bool bnext_useful(dim_type b) { return bloc_rank[b] != bloc_rank[b+1]; }
760  /* version speciale pour itérer sur des tenseurs de même dimensions
761  (doit être un poil plus rapide) */
762  bool qnext1() {
763  if (pr.size() == 0) return false;
764  std::vector<packed_range>::reverse_iterator p_ = pr.rbegin();
765  while (p_!=pr.rend()) {
766  it[0] += *(p_->pinc++);
767  if (p_->pinc != p_->end) {
768  return true;
769  } else {
770  p_->pinc = p_->begin;
771  p_++;
772  }
773  }
774  return false;
775  }
776 
777  bool qnext2() {
778  if (pr.size() == 0) return false;
779  std::vector<packed_range>::reverse_iterator p_ = pr.rbegin();
780  while (p_!=pr.rend()) {
781  it[0] += *(p_->pinc++);
782  it[1] += *(p_->pinc++);
783  if (p_->pinc != p_->end) {
784  return true;
785  } else {
786  p_->pinc = p_->begin;
787  p_++;
788  }
789  }
790  return false;
791  }
792 
793  scalar_type& p(dim_type n) { return *it[n]; }
794 
795  multi_tensor_iterator() {}
796  multi_tensor_iterator(std::vector<tensor_ref> trtab, bool with_index_values) {
797  init(trtab, with_index_values);
798  }
799  void assign(std::vector<tensor_ref> trtab, bool with_index_values) {
800  multi_tensor_iterator m(trtab, with_index_values);
801  swap(m);
802  }
803  multi_tensor_iterator(const tensor_ref& tr0, bool with_index_values) {
804  std::vector<tensor_ref> trtab(1); trtab[0] = tr0;
805  init(trtab, with_index_values);
806  }
807  void assign(const tensor_ref& tr0, bool with_index_values) {
808  multi_tensor_iterator m(tr0, with_index_values);
809  swap(m);
810  }
811  multi_tensor_iterator(const tensor_ref& tr0,
812  const tensor_ref& tr1, bool with_index_values) {
813  std::vector<tensor_ref> trtab(2); trtab[0] = tr0; trtab[1] = tr1;
814  init(trtab, with_index_values);
815  }
816  void assign(const tensor_ref& tr0, const tensor_ref& tr1, bool with_index_values) {
817  multi_tensor_iterator m(tr0, tr1, with_index_values);
818  swap(m);
819  }
820  multi_tensor_iterator(const tensor_ref& tr0,
821  const tensor_ref& tr1,
822  const tensor_ref& tr2, bool with_index_values) {
823  std::vector<tensor_ref> trtab(3); trtab[0] = tr0; trtab[1] = tr1; trtab[2] = tr2;
824  init(trtab, with_index_values);
825  }
826  void assign(const tensor_ref& tr0, const tensor_ref& tr1, const tensor_ref& tr2, bool with_index_values) {
827  multi_tensor_iterator m(tr0, tr1, tr2, with_index_values);
828  swap(m);
829  }
830  void init(std::vector<tensor_ref> trtab, bool with_index_values);
831  void print() const;
832  };
833 
834 
835  /* handles a tree of reductions
836  The tree is used if more than two tensors are reduced, i.e.
837  z(:,:)=t(:,i).u(i,j).v(j,:)
838  in that case, the reduction against j can be performed on u(:,j).v(j,:) = w(:,:)
839  and then, z(:,:) = t(:,i).w(i,:)
840  */
841  struct tensor_reduction {
842  struct tref_or_reduction {
843  tensor_ref tr_;
844  std::shared_ptr<tensor_reduction> reduction;
845  tensor_ref &tr() { return tr_; }
846  const tensor_ref &tr() const { return tr_; }
847  explicit tref_or_reduction(const tensor_ref &tr__, const std::string& s)
848  : tr_(tr__), ridx(s) {}
849  explicit tref_or_reduction(const std::shared_ptr<tensor_reduction> &p, const std::string& s)
850  : reduction(p), ridx(s) {
851  reduction->result(tr_);
852  }
853  bool is_reduction() const { return reduction != 0; }
854  void swap(tref_or_reduction &other) { tr_.swap(other.tr_); std::swap(reduction, other.reduction); }
855  std::string ridx; /* reduction indexes, no index can appear
856  twice in the same tensor */
857  std::vector<dim_type> gdim; /* mapping to the global virtual
858  tensor whose range is the
859  union of the ranges of each
860  reduced tensor */
861  std::vector<dim_type> rdim; /* mapping to the dimensions of the
862  reduced tensor ( = dim_type(-1) for
863  dimensions i s.t. ridx[i] != ' ' ) */
864 
865  };
866  tensor_ranges reduced_range;
867  std::string reduction_chars; /* list of all indexes used for reduction */
868  tensor_ref trres;
869  typedef std::vector<tref_or_reduction>::iterator trtab_iterator;
870  std::vector<tref_or_reduction> trtab;
871  multi_tensor_iterator mti;
872  std::vector<scalar_type> out_data; /* optional storage of output */
873  TDIter pout_data;
874  public:
875  tensor_reduction() { clear(); }
876  virtual ~tensor_reduction() { clear(); }
877  void clear();
878 
879  /* renvoie les formes diagonalisées
880  pour bien faire, il faudrait que cette fonction prenne en argument
881  le required_shape de l'objet ATN_reducted_tensor, et fasse le merge
882  avec ce qu'elle renvoie... non trivial
883  */
884  static void diag_shape(tensor_shape& ts, const std::string& s) {
885  for (index_type i=0; i < s.length(); ++i) {
886  size_type pos = s.find(s[i]);
887  if (s[i] != ' ' && pos != i) // ce n'est pas de l'indice => reduction sur la diagonale
888  ts = ts.diag_shape(tensor_mask::Diagonal(dim_type(pos),dim_type(i)));
889  }
890  }
891 
892  void insert(const tensor_ref& tr_, const std::string& s);
893  void prepare(const tensor_ref* tr_out = NULL);
894  void do_reduction();
895  void result(tensor_ref& res) const {
896  res=trres;
897  res.remove_unused_dimensions();
898  }
899  private:
900  void insert(const tref_or_reduction& tr_, const std::string& s);
901  void update_reduction_chars();
902  void pre_prepare();
903  void make_sub_reductions();
904  size_type find_best_sub_reduction(dal::bit_vector &best_lst, std::string &best_idxset);
905  };
906 
907 } /* namespace bgeot */
908 
909 #endif
defines and typedefs for namespace bgeot
Provide a dynamic bit container.
void clear(L &l)
clear (fill with zeros) a vector or matrix.
Definition: gmm_blas.h:58
Definition of basic exceptions.
Basic Geometric Tools.
gmm::uint16_type short_type
used as the common short type integer in the library
Definition: bgeot_config.h:72
std::ostream & operator<<(std::ostream &o, const convex_structure &cv)
Print the details of the convex structure cvs to the output stream o.
size_t size_type
used as the common size type in the library
Definition: bgeot_poly.h:48