sparseTimesFullContraction.cpp 10 KB
Newer Older
1
// Xerus - A General Purpose Tensor Library
Sebastian Wolf's avatar
Sebastian Wolf committed
2
// Copyright (C) 2014-2016 Benjamin Huber and Sebastian Wolf. 
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
// 
// Xerus is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published
// by the Free Software Foundation, either version 3 of the License,
// or (at your option) any later version.
// 
// Xerus is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
// 
// You should have received a copy of the GNU Affero General Public License
// along with Xerus. If not, see <http://www.gnu.org/licenses/>.
//
// For further information on Xerus visit https://libXerus.org 
// or contact us at contact@libXerus.org.

20
21
22
23
/**
 * @file
 * @brief Implementation of sparse matrix times dense matrix wrapper functions.
 */
24
#include <memory>
25

26
#include <xerus/misc/performanceAnalysis.h>
27
#include <xerus/misc/check.h>
28
#include <xerus/misc/stringUtilities.h>
29
#include <xerus/sparseTimesFullContraction.h>
30
#include <xerus/misc/basicArraySupport.h>
31
#include <xerus/misc/internal.h>
32
33

namespace xerus {
34
    
35
    XERUS_force_inline void transpose(double* const __restrict _out, const double* const __restrict _in, const size_t _leftDim, const size_t _rightDim) {
36
37
        for(size_t i = 0; i < _leftDim; ++i) {
            for(size_t j = 0; j < _rightDim; ++j) {
38
                _out[j*_leftDim+i] = _in[i*_rightDim+j];
39
40
41
42
            }
        }
    }
    
43
    XERUS_force_inline std::unique_ptr<double[]> transpose(const double* const _A, const size_t _leftDim, const size_t _rightDim) {
44
45
46
47
48
49
        std::unique_ptr<double[]> AT(new double[_leftDim*_rightDim]);
        transpose(AT.get(), _A, _leftDim, _rightDim);
        return AT;
    }
    
    
50
    XERUS_force_inline void transpose(std::map<size_t, double>& __restrict _out, const std::map<size_t, double>& __restrict _in, const size_t _leftDim, const size_t _rightDim) {
51
        for(const auto& entry : _in) {
52
53
54
            const size_t i = entry.first/_rightDim;
            const size_t j = entry.first%_rightDim;
            _out.emplace(j*_leftDim + i, entry.second);
55
56
57
        }
    }
    
58
    XERUS_force_inline std::map<size_t, double> transpose(const std::map<size_t, double>& _A, const size_t _leftDim, const size_t _rightDim) {
59
60
61
62
63
        std::map<size_t, double> AT;
        transpose(AT, _A, _leftDim, _rightDim);
        return AT;
    }
    
64
65
    // - - - - - - - - - - - - - - - - - - - - - - - - - Mix to Full - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
    
66
67
68
69
    void matrix_matrix_product( double* const _C,
                                const size_t _leftDim,
                                const size_t _rightDim,
                                const double _alpha,
70
                                const std::map<size_t, double>& _A,
71
72
                                const bool _transposeA,
                                const size_t _midDim,
73
                                const double* const _B) {
74
		XERUS_PA_START;
75
		
76
        // Prepare output array
77
        misc::set_zero(_C, _leftDim*_rightDim);
78
        
79
80
        // Transposition of A only changes how i and j are calculated
        if(!_transposeA) {
81
            for(const auto& entry : _A) {
82
83
                const size_t i = entry.first/_midDim;
                const size_t j = entry.first%_midDim;
84
                misc::add_scaled(_C+i*_rightDim, _alpha*entry.second, _B+j*_rightDim, _rightDim);
85
86
            }
        } else {
87
            for(const auto& entry : _A) {
88
89
                const size_t i = entry.first%_leftDim;
                const size_t j = entry.first/_leftDim;
90
                misc::add_scaled(_C+i*_rightDim, _alpha*entry.second, _B+j*_rightDim, _rightDim);
91
92
            }
        }
93
        
94
		XERUS_PA_END("Mixed BLAS", "Matrix-Matrix-Multiplication ==> Full", misc::to_string(_leftDim)+"x"+misc::to_string(_midDim)+" * "+misc::to_string(_midDim)+"x"+misc::to_string(_rightDim));
95
96
97
98
99
100
    }
    
    void matrix_matrix_product( double* const _C,
                                const size_t _leftDim,
                                const size_t _rightDim,
                                const double _alpha,
101
                                const std::map<size_t, double>& _A,
102
103
                                const bool _transposeA,
                                const size_t _midDim,
104
                                const double* const _B,
105
                                const bool _transposeB) {
106
107
108
        if(_transposeB) {
            const std::unique_ptr<double[]> BT = transpose(_B, _rightDim, _midDim);
            matrix_matrix_product(_C, _leftDim, _rightDim, _alpha, _A, _transposeA, _midDim, BT.get());
109
        } else {
110
            matrix_matrix_product(_C, _leftDim, _rightDim, _alpha, _A, _transposeA, _midDim, _B);
111
        }
112
113
114
115
116
117
    }
    
    void matrix_matrix_product( double* const _C,
                                const size_t _leftDim,
                                const size_t _rightDim,
                                const double _alpha,
118
                                const double* const _A,
119
120
                                const bool _transposeA,
                                const size_t _midDim,
121
                                const std::map<size_t, double>& _B,
122
                                const bool _transposeB) {
123
124
125
126
        // It is significantly faster to calculate (B^T * A*T)^T
        const std::unique_ptr<double[]> CT(new double[_leftDim*_rightDim]);
        matrix_matrix_product(CT.get(), _rightDim, _leftDim, _alpha, _B, !_transposeB, _midDim, _A, !_transposeA);
        transpose(_C, CT.get(), _rightDim, _leftDim);
127
128
129
130
131
132
    }
    
    
    // - - - - - - - - - - - - - - - - - - - - - - - - - Mix to Sparse - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
    
    void matrix_matrix_product( std::map<size_t, double>& _C,
Ben Huber's avatar
Ben Huber committed
133
                                const size_t  /*_leftDim*/,
134
135
136
137
138
                                const size_t _rightDim,
                                const double _alpha,
                                const std::map<size_t, double>& _A,
                                const size_t _midDim,
                                const double* const _B) {
139
		XERUS_PA_START;
140
		
141
142
        size_t currentRow = 0;
        std::unique_ptr<double[]> row(new double[_rightDim]);
143
        misc::set_zero(row.get(), _rightDim);
144
        
145
        for(const auto& entry : _A) {
146
147
148
149
            const size_t i = entry.first/_midDim;
            const size_t j = entry.first%_midDim;
            
            if(i == currentRow) {
150
                misc::add_scaled(row.get(), _alpha*entry.second, _B+j*_rightDim, _rightDim);
151
            } else {
152
                INTERNAL_CHECK(i > currentRow, "Internal Error");
153
154
155
156
157
158
159
160
161
                
                // Copy old row to _C
                for(size_t k = 0; k < _rightDim; ++k) {
                    #pragma GCC diagnostic push
                    #pragma GCC diagnostic ignored "-Wfloat-equal"
                    if(row.get()[k] != 0) {
                        _C.emplace(currentRow*_rightDim + k, row.get()[k]);
                    }
                    #pragma GCC diagnostic pop
162
                }
163
164
165
                
                // Start new row
                currentRow = i;
166
                misc::copy_scaled(row.get(), _alpha*entry.second, _B+j*_rightDim, _rightDim);
167
            }
168
169
        }
        
170
171
172
        // Copy the last row to _C
        for(size_t k = 0; k < _rightDim; ++k) {
            #pragma GCC diagnostic push
173
174
175
176
                #pragma GCC diagnostic ignored "-Wfloat-equal"
                if(row.get()[k] != 0) {
                    _C.emplace(currentRow*_rightDim + k, row.get()[k]);
                }
177
178
            #pragma GCC diagnostic pop
        }
179
        
180
		XERUS_PA_END("Mixed BLAS", "Matrix-Matrix-Multiplication ==> Sparse", "?x"+misc::to_string(_midDim)+" * "+misc::to_string(_midDim)+"x"+misc::to_string(_rightDim));
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
    }
    
    void matrix_matrix_product( std::map<size_t, double>& _C,
                                const size_t _leftDim,
                                const size_t _rightDim,
                                const double _alpha,
                                const std::map<size_t, double>& _A,
                                const bool _transposeA,
                                const size_t _midDim,
                                const double* const _B,
                                const bool _transposeB) {
        if(_transposeA) {
            const std::map<size_t, double> AT = transpose(_A, _midDim, _leftDim);
            if(_transposeB) {
                std::unique_ptr<double[]> BT = transpose(_B, _rightDim, _midDim);
                matrix_matrix_product(_C, _leftDim, _rightDim, _alpha, AT, _midDim, BT.get());
            } else {
                matrix_matrix_product(_C, _leftDim, _rightDim, _alpha, AT, _midDim, _B);
            }
        } else {
            if(_transposeB) {
                std::unique_ptr<double[]> BT = transpose(_B, _rightDim, _midDim);
                matrix_matrix_product(_C, _leftDim, _rightDim, _alpha, _A, _midDim, BT.get());
            } else {
                matrix_matrix_product(_C, _leftDim, _rightDim, _alpha, _A, _midDim, _B);
206
207
            }
        }
208
    }
209
    
210
211
212
213
214
215
216
217
218
    void matrix_matrix_product( std::map<size_t, double>& _C,
                                const size_t _leftDim,
                                const size_t _rightDim,
                                const double _alpha,
                                const double* const _A,
                                const bool _transposeA,
                                const size_t _midDim,
                                const std::map<size_t, double>& _B,
                                const bool _transposeB) {
219
        // It is significantly faster to calculate (B^T * A*T)^T (this is only benchmarked for mix -> Full yet...)
220
221
222
223
        std::map<size_t, double> CT;
        matrix_matrix_product(CT, _rightDim, _leftDim, _alpha, _B, !_transposeB, _midDim, _A, !_transposeA);
        transpose(_C, CT, _rightDim, _leftDim);
    }
224
} // namespace xerus