Commit ba76ff90 authored by Sebastian Wolf's avatar Sebastian Wolf

Merge branch 'v3' of git:xerus/xerus into v3

parents 177d73b2 d3d2ecfb
Pipeline #710 passed with stages
in 8 minutes and 10 seconds
......@@ -8,6 +8,7 @@ Potentially breaking changes are marked with an exclamation point '!' at the beg
* ! All macros and preprocessor defines now use the XERUS_ prefix. The config.mk file changed accordingly.
* ! TT::find_largest_entry and TT::dyadic_product left the TT scope.
* ! Tensor::modify_diag_elements renamed to Tensor::modify_diagonal_entries for naming consistency.
* Much faster solve of matrix equations Ax=b by exploiting symmetry and definiteness where possible. This directly speeds up the ALS as well.
* Added a highly optimized minimal version of the ALS algorithm as xALS.
* Some minor bugfixes.
......
......@@ -14,6 +14,7 @@ It is not advisable to do this while developing, as it will be much more difficu
functions, but once you have established, that your code works as expected you might want to try replacing the `libxerus.so` object
used by your project with one compiled with the `-D XERUS_DISABLE_RUNTIME_CHECKS` flag.
## Use c++ instead of Python
## Compiling Xerus with High Optimizations
Per default the library already compiles with high optimization settings (corresponding basically to `-O3`) as there is rarely
......
......@@ -109,12 +109,12 @@ in dense representation. You should thus manually convert overly full sparse Ten
To do this there are a number of ways to interact with the representation of `xerus::Tensor` objects. Above we already saw, that
the constructors can be used to explicitely construct sparse (default behaviour) or dense tensors. For already existing objects
you can use the member functions [`.is_sparse()`](\ref xerus::Tensor::is_sparse()) and [`.is_dense()`](\ref xerus::Tensor::is_dense()) to query their representation. To change representations call the
member functions [`.use_dense_representation()`](\ref xerus::Tensor::use_dense_representation()) or [`.use_sparse_representation()`](\ref xerus::Tensor::use_sparse_representation()) to change it inplace or [`.dense_copy()`](\ref xerus::Tensor::dense_copy()) or
[`.sparse_copy()`](\ref xerus::Tensor::sparse_copy()) to obtain new tensor objects with the desired representation.
you can use the member functions [.is_sparse()](\ref xerus::Tensor::is_sparse()) and [.is_dense()](\ref xerus::Tensor::is_dense()) to query their representation. To change representations call the
member functions [.use_dense_representation()](\ref xerus::Tensor::use_dense_representation()) or [.use_sparse_representation()](\ref xerus::Tensor::use_sparse_representation()) to change it inplace or [.dense_copy()](\ref xerus::Tensor::dense_copy()) or
[.sparse_copy()](\ref xerus::Tensor::sparse_copy()) to obtain new tensor objects with the desired representation.
To make more informed decisions about whether a conversion might be useful the tensor objects can be queried for the number of
defined entries with [`.sparsity()`](\ref xerus::Tensor::sparsity()) or for the number of non-zero entries with [`.count_non_zero_entries()`](\ref xerus::Tensor::count_non_zero_entries()).
defined entries with [.sparsity()](\ref xerus::Tensor::sparsity()) or for the number of non-zero entries with [.count_non_zero_entries()](\ref xerus::Tensor::count_non_zero_entries()).
~~~.cpp
// create a sparse tensor with 100 random entries
W = xerus::Tensor::random({100,100}, 100);
......
......@@ -125,12 +125,8 @@ namespace xerus {
void rq_destructive( double* const _R, double* const _Q, double* const _A, const size_t _m, const size_t _n);
/* TODO we need test cases for these
///@brief: Solves Ax = b for x
void solve( double* const _x, const double* const _A, const size_t _n, const double* const _b);
///@brief: Solves Ax = b for x, Destroys A and b
void solve_destructive( double* const _bToX, double* const _A, const size_t _n);*/
void solve(double* _x, const double* _A, size_t _m, size_t _n, const double* _b, size_t _p);
......
......@@ -988,10 +988,18 @@ namespace xerus {
* @param _X Output Tensor for the resulting X.
* @param _A input Tensor A.
* @param _B input Tensor b.
* @param _extraDegree number of dimensions that @a _X and @a _B share and for which the least squares problem is independently solved.
* @param _extraDegree number of modes that @a _X and @a _B share and for which the least squares problem is independently solved.
*/
void solve_least_squares(Tensor& _X, const Tensor& _A, const Tensor& _B, const size_t _extraDegree);
void solve_least_squares(Tensor& _X, const Tensor& _A, const Tensor& _B, const size_t _extraDegree = 0);
/**
* @brief Solves the equation Ax = b for x. If the solution is not unique, the output need not be the minimal norm solution.
* @param _X Output Tensor for the result
* @param _A input Operator A
* @param _B input right-hand-side b
* @param _extraDegree number of modes that @a _x and @a _B sharefor which the solution should be computed independently.
*/
void solve(Tensor &_X, const Tensor &_A, const Tensor &_B, size_t _extraDegree = 0);
/**
* @brief calculates the entrywise product of two Tensors
......
......@@ -71,6 +71,61 @@ static misc::UnitTest tensor_solve("Tensor", "solve_Ax_equals_b", [](){
TEST((x2[{1,1}]) < 1e-14);
});
static misc::UnitTest solve_vs_lsqr("Tensor", "solve vs least squares", [](){
const size_t N = 500;
Tensor A({N, N});
for (size_t i=0; i<N; ++i) {
if (i>0) A[{i, i-1}] = -1;
if (i<N-1) A[{i, i+1}] = -1;
A[{i,i}] = 2;
}
A[0] = 1;
Tensor B = Tensor::random({N});
Index i,j,k;
// sparse
// LOG(blabla, "sparse start");
Tensor X;
solve(X, A, B);
// LOG(blabla, "sparse end");
MTEST(frob_norm(A(i,j)*X(j)-B(i)) < 1e-10, "1 " << frob_norm(A(i,j)*X(j)-B(i)));
A.use_dense_representation();
// dense (cholesky)
// LOG(blabla, "chol start");
Tensor X2;
solve(X2, A, B);
// LOG(blabla, "chol end");
MTEST(frob_norm(A(i,j)*X(j)-B(i)) < 1e-10, "2 " << frob_norm(A(i,j)*X(j)-B(i)));
MTEST(approx_equal(X, X2, 1e-10), X.frob_norm() << " " << X2.frob_norm() << " diff: " << frob_norm(X-X2));
// dense (LDL)
A[0] = -0.9;
// LOG(blabla, "LDL start");
solve(X, A, B);
// LOG(blabla, "LDL end");
MTEST(frob_norm(A(i,j)*X(j)-B(i)) < 1e-10, "3 " << frob_norm(A(i,j)*X(j)-B(i)));
// dense (LU)
A[1] = -0.9;
// LOG(blabla, "LU start");
solve(X, A, B);
// LOG(blabla, "LU end");
MTEST(frob_norm(A(i,j)*X(j)-B(i)) < 1e-10, "4 " << frob_norm(A(i,j)*X(j)-B(i)));
// dense (SVD)
A[1] = -0.9;
// LOG(blabla, "SVD start");
solve_least_squares(X, A, B);
// LOG(blabla, "SVD end");
MTEST(frob_norm(A(i,j)*X(j)-B(i)) < 1e-10, "5 " << frob_norm(A(i,j)*X(j)-B(i)));
});
static misc::UnitTest tensor_solve_sparse("Tensor", "solve_sparse", [](){
std::mt19937_64 &rnd = xerus::misc::randomEngine;
std::normal_distribution<double> dist(0.0, 1.0);
......
......@@ -19,6 +19,7 @@
#include<xerus.h>
#include "../../include/xerus/misc/internal.h"
#include "../../include/xerus/test/test.h"
using namespace xerus;
......
......@@ -45,7 +45,7 @@ namespace xerus {
Tensor b(_b);
Tensor x;
solve_least_squares(x, A, b, 0);
xerus::solve(x, A, b);
Index i,j,k,l;
if (_data.direction == Increasing) {
......
......@@ -486,37 +486,156 @@ namespace xerus {
}
/* TODO we need test cases for these
/// Solves Ax = b for x
void solve( double* const _x, const double* const _A, const size_t _n, const double* const _b) {
const std::unique_ptr<double[]> tmpA(new double[_n*_n]);
array_copy(tmpA.get(), _A, _n*_n);
array_copy(_x, _b, _n);
static bool is_symmetric(const double* const _A, const size_t _n) {
double max = 0;
for (size_t i=0; i<_n*_n; ++i) {
max = std::max(max, _A[i]);
}
for (size_t i=0; i<_n; ++i) {
for (size_t j=i+1; j<_n; ++j) {
if (std::abs(_A[i*_n + j] - _A[i + j*_n]) >= 4 * max * std::numeric_limits<double>::epsilon()) {
// LOG(aslkdjj, std::abs(_A[i*_n + j] - _A[i + j*_n]) << " / " << _A[i*_n + j] << " " << max);
return false;
}
}
}
return true;
}
/// @brief checks whether the diagonal of @a _A is all positive or all negative. returns false otherwise
static bool pos_neg_definite_diagonal(const double* const _A, const size_t _n) {
bool positive = (_A[0] > 0);
const size_t steps=_n+1;
if (positive) {
for (size_t i=1; i<_n; ++i) {
if (_A[i*steps] < std::numeric_limits<double>::epsilon()) {
return false;
}
}
return true;
} else {
for (size_t i=1; i<_n; ++i) {
if (_A[i*steps] > -std::numeric_limits<double>::epsilon()) {
return false;
}
}
return true;
}
solve_destructive(_x, tmpA.get(), _n);
}
/// Solves Ax = b for x
void solve_destructive( double* const _bToX, double* const _A, const size_t _n) {
void solve(double* const _x, const double* const _A, const size_t _m, const size_t _n, const double* const _b, const size_t _nrhs) {
REQUIRE(_m <= static_cast<size_t>(std::numeric_limits<int>::max()), "Dimension to large for BLAS/Lapack");
REQUIRE(_n <= static_cast<size_t>(std::numeric_limits<int>::max()), "Dimension to large for BLAS/Lapack");
REQUIRE(_nrhs <= static_cast<size_t>(std::numeric_limits<int>::max()), "Dimension to large for BLAS/Lapack");
const std::unique_ptr<double[]> tmpA(new double[_m*_n]);
misc::copy(tmpA.get(), _A, _m*_n);
LOG(debug, "solving with...");
START_TIME;
// not rectangular -> fallback to least-squares (QR or SVD decomposition to solve)
if (_m != _n) {
LOG(debug, "SVD");
const std::unique_ptr<double[]> tmpB(new double[_m*_nrhs]);
misc::copy(tmpB.get(), _b, _m*_nrhs);
solve_least_squares_destructive(_x, tmpA.get(), _m, _n, tmpB.get(), _nrhs);
return;
}
// not symmetric -> LU solver
if (!is_symmetric(_A, _n)) {
LOG(debug, "LU");
XERUS_PA_START;
std::unique_ptr<int[]> pivot(new int[_n]);
misc::copy(_x, _b, _n);
IF_CHECK( int lapackAnswer = ) LAPACKE_dgesv(
LAPACK_ROW_MAJOR,
static_cast<int>(_n), // Dimensions of A (nxn)
static_cast<int>(_nrhs), // Number of rhs b
tmpA.get(), // input: A, output: L and U
static_cast<int>(_n), // LDA
pivot.get(), // output: permutation P
_x, // input: rhs b, output: solution x
static_cast<int>(_nrhs) // ldb
);
CHECK(lapackAnswer == 0, error, "Unable to solve Ax = b (PLU solver). Lapacke says: " << lapackAnswer);
XERUS_PA_END("Dense LAPACK", "Solve (PLU)", misc::to_string(_n)+"x"+misc::to_string(_n)+"x"+misc::to_string(_nrhs));
return;
}
// positive or negative diagonal -> try cholesky
if (pos_neg_definite_diagonal(_A, _n)) {
LOG(debug, "trying cholesky");
int lapackAnswer = 0;
{
XERUS_PA_START;
lapackAnswer = LAPACKE_dpotrf2(
LAPACK_ROW_MAJOR,
'U', // Upper triangle of A is read
static_cast<int>(_n), // dimensions of A
tmpA.get(), // input: A, output: cholesky factorisation
static_cast<int>(_n) // LDA
);
XERUS_PA_END("Dense LAPACK", "Cholesky decomposition", misc::to_string(_n)+"x"+misc::to_string(_n));
}
if (lapackAnswer == 0) {
LOG(debug, "cholesky");
XERUS_PA_START;
misc::copy(_x, _b, _n);
lapackAnswer = LAPACKE_dpotrs(
LAPACK_ROW_MAJOR,
'U', // upper triangle of cholesky decomp is stored in tmpA
static_cast<int>(_n), // dimensions of A
static_cast<int>(_nrhs),// number of rhs
tmpA.get(), // input: cholesky decomp
static_cast<int>(_n), // lda
_x, // input: rhs b, output: solution x
static_cast<int>(_nrhs) // ldb
);
CHECK(lapackAnswer == 0, error, "Unable to solve Ax = b (cholesky solver). Lapacke says: " << lapackAnswer);
XERUS_PA_END("Dense LAPACK", "Solve (Cholesky)", misc::to_string(_n)+"x"+misc::to_string(_n)+"x"+misc::to_string(_nrhs));
return;
} else {
// restore tmpA
misc::copy(tmpA.get(), _A, _m*_n);
}
}
LOG(debug, "LDL");
// non-definite diagonal or choleksy failed -> fallback to LDL^T decomposition
XERUS_PA_START;
misc::copy(_x, _b, _n);
std::unique_ptr<int[]> pivot(new int[_n]);
int lapackAnswer = LAPACKE_dgesv(
LAPACKE_dsysv(
LAPACK_ROW_MAJOR,
static_cast<int>(_n), // Dimensions of A (nxn)
1, // Number of b's, here always one
_A, // The input matrix A, will be destroyed
static_cast<int>(_n), // LDA
pivot.get(), // Output of the pivot ordering
_bToX, // Input the vector b, output the vector x
1 );
CHECK(lapackAnswer == 0, error, "Unable to solves Ax = b. Lapacke says: " << lapackAnswer);
ADD_CALL("Solve ", to_string(_n)+"x"+to_string(_n));
}*/
'U', // upper triangular part of _A is accessed
static_cast<int>(_n), // dimension of A
static_cast<int>(_nrhs),// number of rhs
tmpA.get(), // input: A, output: part of the LDL decomposition
static_cast<int>(_n), // lda
pivot.get(), // output: details of blockstructure of D
_x, // input: rhs b, output: solution x
static_cast<int>(_nrhs) // ldb
);
XERUS_PA_END("Dense LAPACK", "Solve (LDL)", misc::to_string(_n)+"x"+misc::to_string(_n)+"x"+misc::to_string(_nrhs));
}
void solve_least_squares( double* const _x, const double* const _A, const size_t _m, const size_t _n, const double* const _b, const size_t _p){
......@@ -591,3 +710,4 @@ namespace xerus {
} // namespace blasWrapper
} // namespace xerus
......@@ -73,7 +73,8 @@ namespace xerus {
internal::IndexedTensorMoveable<Tensor> tmpX(new Tensor(), std::move(orderX));
solve_least_squares(*tmpX.tensorObject, reorderedA, reorderedB, extraDims);
//solve_least_squares(*tmpX.tensorObject, reorderedA, reorderedB, extraDims);
solve(*tmpX.tensorObject, reorderedA, reorderedB, extraDims);
return tmpX;
}
......
......@@ -177,7 +177,7 @@ namespace xerus {
#pragma GCC diagnostic pop
}
XERUS_PA_END("Mixed BLAS", "Matrix-Matrix-Multiplication ==> Sparse", misc::to_string(_leftDim)+"x"+misc::to_string(_midDim)+" * "+misc::to_string(_midDim)+"x"+misc::to_string(_rightDim));
XERUS_PA_END("Mixed BLAS", "Matrix-Matrix-Multiplication ==> Sparse", "?x"+misc::to_string(_midDim)+" * "+misc::to_string(_midDim)+"x"+misc::to_string(_rightDim));
}
void matrix_matrix_product( std::map<size_t, double>& _C,
......
......@@ -1601,7 +1601,7 @@ namespace xerus {
_A.get_unsanitized_dense_data(), m, n,
_B.get_unsanitized_dense_data(), p);
} else {
LOG(warning, "Sparse RHS not yet implemented (casting to dense)"); //TODO
LOG_ONCE(warning, "Sparse RHS not yet implemented (casting to dense)"); //TODO
Tensor Bcpy(_B);
Bcpy.factor = 1.0;
......@@ -1620,6 +1620,60 @@ namespace xerus {
void solve(Tensor& _X, const Tensor& _A, const Tensor& _B, const size_t _extraDegree) {
if (_A.is_sparse()) { // TODO
solve_least_squares(_X, _A, _B, _extraDegree);
return;
}
REQUIRE(&_X != &_B && &_X != &_A, "Not supportet yet");
const size_t degM = _B.degree() - _extraDegree;
const size_t degN = _A.degree() - degM;
REQUIRE(_A.degree() == degM+degN, "Inconsistent dimensions.");
REQUIRE(_B.degree() == degM+_extraDegree, "Inconsistent dimensions.");
// Make sure X has right dimensions
if( _X.degree() != degN + _extraDegree
|| !std::equal(_X.dimensions.begin(), _X.dimensions.begin() + degN, _A.dimensions.begin() + degM)
|| !std::equal(_X.dimensions.begin()+ degN, _X.dimensions.end(), _B.dimensions.begin() + degM))
{
Tensor::DimensionTuple newDimX;
newDimX.insert(newDimX.end(), _A.dimensions.begin()+degM, _A.dimensions.end());
newDimX.insert(newDimX.end(), _B.dimensions.begin()+degM, _B.dimensions.end());
_X.reset(std::move(newDimX), Tensor::Representation::Dense, Tensor::Initialisation::None);
}
// Calculate multDimensions
const size_t m = misc::product(_A.dimensions, 0, degM);
const size_t n = misc::product(_A.dimensions, degM, degM+degN);
const size_t p = misc::product(_B.dimensions, degM, degM+_extraDegree);
// Note that A isdense here
if(_B.is_dense()) {
blasWrapper::solve(
_X.override_dense_data(),
_A.get_unsanitized_dense_data(), m, n,
_B.get_unsanitized_dense_data(), p);
} else {
LOG_ONCE(warning, "Sparse RHS not yet implemented (casting to dense)"); //TODO
Tensor Bcpy(_B);
Bcpy.factor = 1.0;
Bcpy.use_dense_representation();
blasWrapper::solve(
_X.override_dense_data(),
_A.get_unsanitized_dense_data(), m, n,
Bcpy.get_unsanitized_dense_data(), p);
}
// Propagate the constant factor
_X.factor = _B.factor / _A.factor;
}
Tensor entrywise_product(const Tensor &_A, const Tensor &_B) {
REQUIRE(_A.dimensions == _B.dimensions, "Entrywise product ill-defined for non-equal dimensions.");
if(_A.is_dense() && _B.is_dense()) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment