Commit 6743ae1b authored by Sebastian Wolf's avatar Sebastian Wolf

Merge branch 'development' of git:xerus/xerus into development

parents 02e4a782 521dd164
Pipeline #939 passed with stages
in 9 minutes and 57 seconds
......@@ -10,7 +10,7 @@ CXX = g++
# C++ Version
#=================================================================================================
# Xerus requires at least C++11 language support, however some features need support of C++14 or
# even C++17 in order to activated.
# even C++17 in order to activated.
COMPATIBILITY = -std=c++11
#=================================================================================================
......@@ -24,24 +24,24 @@ COMPATIBILITY = -std=c++11
#=================================================================================================
# Optimization
#=================================================================================================
# We suggest the use of one of the following optimization levels. The first uses basically no
# optimization and is primarly intended for debugging purposes. The second (recommended) level
# activates more or less all optimization options that conform with the ISO C++ Standard.
# The last level activates all optimazations available, including non-ISO C++ conform optimization
# We suggest the use of one of the following optimization levels. The first uses basically no
# optimization and is primarly intended for debugging purposes. The second (recommended) level
# activates more or less all optimization options that conform with the ISO C++ Standard.
# The last level activates all optimazations available, including non-ISO C++ conform optimization
# and optimazations that may result in a loss of numerical precicsion, use at your own risk.
# LOW_OPTIMIZATION = TRUE # Activates -O0
HIGH_OPTIMIZATION = TRUE # Activates -O3 -march=native and some others
# DANGEROUS_OPTIMIZATION = TRUE # Activates everything of HIGH_OPTIMIZATION plus basically everything that is said to improve performance including several potentially unsafe optimizations
# LOW_OPTIMIZATION = TRUE # Activates -O0
HIGH_OPTIMIZATION = TRUE # Activates -O3 -march=native and some others
# DANGEROUS_OPTIMIZATION = TRUE # Activates everything of HIGH_OPTIMIZATION plus basically everything that is said to improve performance including several potentially unsafe optimizations
# Additionally Link Time Optimization support can be build into the library by uncommenting the following line.
# USE_LTO = TRUE # Activates -ftlo
COMPILE_THREADS = 8 # Number of threads to use during link time optimization.
# USE_LTO = TRUE # Activates -ftlo
COMPILE_THREADS = 8 # Number of threads to use during link time optimization.
# Some parts of Xerus can use parallelization. This can be aktivated by using openMP.
# OTHER += -fopenmp
#=================================================================================================
# Debug and Logging
# Debug and Logging
#=================================================================================================
# Use errors instead of warnings in many places.
# STRICT_WARNINGS = TRUE
......@@ -49,57 +49,57 @@ COMPILE_THREADS = 8 # Number of threads to use during link time optimizatio
# The Xerus library performs a number of runtime checks to ensure a valid input to all routines.
# While not recommended these runtime checks can be completly disabled by uncommenting the following
# line. This slighlty improves the performance.
# DEBUG += -D XERUS_DISABLE_RUNTIME_CHECKS # Disable all runtime checks
# DEBUG += -D XERUS_DISABLE_RUNTIME_CHECKS # Disable all runtime checks
# With the following option the callstack that is returned by get_call_stack (and thus all fatal errors)
# includes NO function names, source-filenames and line numbers.
# NOTE: activate this to remove the dependence on the binutils libraries -lbfd -liberty -lz and -ldl
# XERUS_NO_FANCY_CALLSTACK = TRUE # Show simple callstacks only
# XERUS_NO_FANCY_CALLSTACK = TRUE # Show simple callstacks only
# When performing tests it is useful to ensure that all of the code in covered. This is ensured by
# a set of macros including REQUIRE_TEST to mark lines that need to be tested. With the following
# definition this coverage is tested.
# DEBUG += -D XERUS_TEST_COVERAGE # Enable coverage tests
# DEBUG += -D XERUS_TEST_COVERAGE # Enable coverage tests
# Xerus uses many small objects (Indices, vectors of dimensions etc.) for which the standard allocator
# is not very efficient. With the following option, an experimental replacement allocator will be
# is not very efficient. With the following option, an experimental replacement allocator will be
# included in the library, which will _globally_ replace the 'new' and 'delete' operators in your program.
# Note also, that the replacement allocator is not (yet) thread safe!
# DEBUG += -D XERUS_REPLACE_ALLOCATOR
# You can add all kind of debuging options. In the following are some examples
DEBUG += -g # Adds debug symbols
# DEBUG += -D _GLIBCXX_ASSERTIONS # Activate GLIBCXX assertions
# Sanitization
# DEBUG += -fsanitize=undefined # GCC only
# DEBUG += -fsanitize=memory # Clang only
# DEBUG += -fsanitize=address # find out of bounds access
# DEBUG += -pg # adds profiling code for the 'gprof' analyzer
DEBUG += -g # Adds debug symbols
# DEBUG += -D _GLIBCXX_ASSERTIONS # Activate GLIBCXX assertions
# Sanitization
# DEBUG += -fsanitize=undefined # GCC only
# DEBUG += -fsanitize=memory # Clang only
# DEBUG += -fsanitize=address # find out of bounds access
# DEBUG += -pg # adds profiling code for the 'gprof' analyzer
# Xerus has a buildin logging system to provide runtime information. Here you can adjust the logging level used by the library.
# LOGGING += -D XERUS_LOG_DEBUG # Debug infomation, can significantly slow down the library.
LOGGING += -D XERUS_LOG_INFO # Information that is not linked to any unexpected behaviour but might nevertheless be of interest.
# LOGGING += -D XERUS_LOG_WARNING # Informations that is linked to unexpected behaviour which indicates incorrect use of the library but are no errors as such.
# LOGGING += -D XERUS_LOG_ERROR # Information about errors that occourt, assumedly by incorrect use of the library.
# LOGGING += -D XERUS_LOG_ERROR # Information about errors that occourt, assumedly by incorrect use of the library.
# Per default the logs are printed to cerr. Uncomment the following line to print the log messages to the file error.log instead.
# LOGGING += -D XERUS_LOGFILE # Use error file instead of cerr
# LOGGING += -D XERUS_LOGFILE # Use error file instead of cerr
# Print absolute times instead of relative to program time
# LOGGING += -D XERUS_LOG_ABSOLUTE_TIME
# Uncomment the following line to save the last Logs in a circular buffer (without printing them) to allow detailed reports in case of errors.
# Note that this can significatly slow down the library.
# LOGGING += -D XERUS_LOG_BUFFER # Activate the log buffer
# LOGGING += -D XERUS_LOG_BUFFER # Activate the log buffer
# Add time measurments for the relevant low level function calls. This allow to use get_analysis() to get a listing on all called low level fucntions and the time spend on them.
# LOGGING += -D XERUS_PERFORMANCE_ANALYSIS # Enable performance analysis
# LOGGING += -D XERUS_PERFORMANCE_ANALYSIS # Enable performance analysis
#=================================================================================================
......@@ -107,29 +107,30 @@ LOGGING += -D XERUS_LOG_INFO # Information that is not linke
#=================================================================================================
# Set the directories to install libxerus.
#INSTALL_LIB_PATH = /usr/local/lib64/ # Path where to install the libxerus.so shared library.
#INSTALL_HEADER_PATH = /usr/local/include/ # Path where to install the xerus header files.
#INSTALL_PYTHON_PATH = /usr/local/lib64/python2.7/site-packages/ # Path for the installation of the python bindings.
#INSTALL_LIB_PATH = /usr/local/lib64/ # Path where to install the libxerus.so shared library.
#INSTALL_HEADER_PATH = /usr/local/include/ # Path where to install the xerus header files.
#INSTALL_PYTHON_PATH = /usr/local/lib64/python2.7/site-packages/ # Path for the installation of the python bindings.
#=================================================================================================
# External libraries
#=================================================================================================
# Xerus depends on several external libraries, namely blas, cblas, lapack, lapacke and suiteSparse
# and bfd, all of which are available through common GNU/Linux packaging systems. If you want to
# build a shared library or run the unit tests of Xerus you have to provide the corresponding
# libraries here (otherwise only when linking your own program using Xerus).
# and bfd, all of which are available through common GNU/Linux packaging systems. If you want to
# build a shared library or run the unit tests of Xerus you have to provide the corresponding
# libraries here (otherwise only when linking your own program using Xerus).
# Make sure to use a parallel version of blas when compiling with -fopenmp.
# Uncomment or add the appropriate blas libraries
BLAS_LIBRARIES = -lopenblas -lgfortran # Openblas, serial
# BLAS_LIBRARIES = -lopenblasp -lgfortran # Openblas, parallel
# BLAS_LIBRARIES = /usr/lib64/atlas/libsatlas.so -lgfortran # Atlas
# BLAS_LIBRARIES = /usr/lib64/atlas/libf77blas.a /usr/lib64/atlas/libcblas.a /usr/lib64/atlas/libatlas.a -lgfortran # Custom
# Uncomment or add the appropriate blas libraries
BLAS_LIBRARIES = -lopenblas -lgfortran # Openblas, serial
# BLAS_LIBRARIES = -lopenblasp -lgfortran # Openblas, parallel
# BLAS_LIBRARIES = /usr/lib64/atlas/libsatlas.so -lgfortran # Atlas
# BLAS_LIBRARIES = /usr/lib64/atlas/libf77blas.a /usr/lib64/atlas/libcblas.a /usr/lib64/atlas/libatlas.a -lgfortran # Custom
# Uncomment or add the appropriate lapack libraries
LAPACK_LIBRARIES = -llapacke -llapack # Standard Lapack + Lapacke libraries
# LAPACK_LIBRARIES = ../lib/lapack/liblapacke.a ../lib/lapack/liblapack.a # Custom
# Uncomment or add the appropriate lapack libraries
LAPACK_LIBRARIES = -llapacke -llapack # Standard Lapack + Lapacke libraries
# LAPACK_LIBRARIES = ../lib/lapack/liblapacke.a ../lib/lapack/liblapack.a # Custom
# Uncomment or add the appropriate CXSparse library
SUITESPARSE = -lcholmod -lspqr
......
// Xerus - A General Purpose Tensor Library
// Copyright (C) 2014-2018 Benjamin Huber and Sebastian Wolf.
//
// Copyright (C) 2014-2018 Benjamin Huber and Sebastian Wolf.
//
// Xerus is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published
// by the Free Software Foundation, either version 3 of the License,
// or (at your option) any later version.
//
//
// Xerus is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
//
// You should have received a copy of the GNU Affero General Public License
// along with Xerus. If not, see <http://www.gnu.org/licenses/>.
//
// For further information on Xerus visit https://libXerus.org
// For further information on Xerus visit https://libXerus.org
// or contact us at contact@libXerus.org.
/**
......@@ -28,12 +28,14 @@
#include "../blockTT.h"
namespace xerus { namespace uq {
void uq_adf(TTTensor& _x, const UQMeasurementSet& _measurments, const PolynomBasis _basisType, const double _targetEps = 1e-8, const size_t _maxItr = 0);
TTTensor uq_adf(const UQMeasurementSet& _initalMeasurments, const UQMeasurementSet& _measurments, const PolynomBasis _basisType, const std::vector<size_t>& _dimensions, const double _targetEps = 1e-8, const size_t _maxItr = 0);
void uq_ra_adf(TTTensor& _x, const UQMeasurementSet& _measurments, const PolynomBasis _basisType, const double _targetEps = 1e-8, const size_t _maxItr = 0, const double _initalRankEps = 1e-2);
TTTensor uq_ra_adf(const UQMeasurementSet& _measurments, const PolynomBasis _basisType, const std::vector<size_t>& _dimensions, const double _targetEps = 1e-8, const size_t _maxItr = 0);
void uq_adf(TTTensor& _x, const UQMeasurementSet& _measurments, const PolynomBasis _basisType, const double _targetEps = 1e-8, const size_t _maxItr = 0);
TTTensor uq_adf(const UQMeasurementSet& _initalMeasurments, const UQMeasurementSet& _measurments, const PolynomBasis _basisType, const std::vector<size_t>& _dimensions, const double _targetEps = 1e-8, const size_t _maxItr = 0);
void uq_ra_adf(TTTensor& _x, const UQMeasurementSet& _measurments, const PolynomBasis _basisType, const double _targetEps = 1e-8, const size_t _maxItr = 0, const double _initalRankEps = 1e-2);
TTTensor uq_ra_adf(const UQMeasurementSet& _measurments, const PolynomBasis _basisType, const std::vector<size_t>& _dimensions, const double _targetEps = 1e-8, const size_t _maxItr = 0);
TTTensor uq_ra_adf_iv(TTTensor& _x, const UQMeasurementSet& _measurments, const PolynomBasis _basisType, const double _targetEps = 1e-8, const size_t _maxItr = 0);
}}
......@@ -36,434 +36,433 @@
#endif
namespace xerus { namespace uq { namespace impl_uqRaAdf {
const size_t tracking = 10;
template<size_t P>
class InternalSolver {
const size_t N;
const size_t d;
const double targetResidual;
const size_t maxRank = 75;
const double minRankEps = 1e-8;
const double epsDecay = 0.8;
const double convergenceFactor = 0.995;
const size_t maxIterations;
const double controlSetFraction = 0.1;
const std::vector<std::vector<Tensor>> positions;
const std::vector<Tensor>& solutions;
TTTensor& outX;
std::vector<std::vector<size_t>> sets;
std::vector<size_t> controlSet;
double optNorm;
double testNorm;
std::vector<double> setNorms = std::vector<double>(P);
double bestTestResidual = std::numeric_limits<double>::max();
internal::BlockTT bestX;
internal::BlockTT x;
std::vector<std::vector<Tensor>> rightStack; // From corePosition 1 to d-1
std::vector<std::vector<Tensor>> leftIsStack;
std::vector<std::vector<Tensor>> leftOughtStack;
double rankEps;
boost::circular_buffer<std::vector<size_t>> prevRanks;
boost::circular_buffer<double> residuals {tracking, std::numeric_limits<double>::max()};
public:
static std::vector<std::vector<Tensor>> create_positions(const TTTensor& _x, const PolynomBasis _basisType, const std::vector<std::vector<double>>& _randomVariables) {
std::vector<std::vector<Tensor>> positions(_x.degree());
for(size_t corePosition = 1; corePosition < _x.degree(); ++corePosition) {
positions[corePosition].reserve(_randomVariables.size());
for(size_t j = 0; j < _randomVariables.size(); ++j) {
positions[corePosition].push_back(polynomial_basis_evaluation(_randomVariables[j][corePosition-1], _basisType, _x.dimensions[corePosition]));
}
}
return positions;
}
void shuffle_sets() {
sets = std::vector<std::vector<size_t>>(P);
controlSet.clear();
std::uniform_real_distribution<double> stochDist(0.0, 1.0);
std::uniform_int_distribution<size_t> setDist(0, P-1);
for(size_t j = 0; j < N; ++j) {
if(stochDist(misc::randomEngine) > controlSetFraction) {
sets[setDist(misc::randomEngine)].push_back(j);
} else {
controlSet.push_back(j);
}
}
calc_solution_norms();
}
void calc_solution_norms() {
optNorm = 0.0;
for(size_t k = 0; k < sets.size(); ++k) {
setNorms[k] = 0.0;
for(const auto j : sets[k]) {
const double sqrNorm = misc::sqr(frob_norm(solutions[j]));
optNorm += sqrNorm;
setNorms[k] += sqrNorm;
}
setNorms[k] = std::sqrt(setNorms[k]);
}
optNorm = std::sqrt(optNorm);
testNorm = 0.0;
for(const auto j : controlSet) {
const double sqrNorm = misc::sqr(frob_norm(solutions[j]));
testNorm += sqrNorm;
}
testNorm = std::sqrt(testNorm);
}
InternalSolver(TTTensor& _x, const UQMeasurementSet& _measurments, const PolynomBasis _basisType, const size_t _maxItr, const double _targetEps, const double _initalRankEps) :
N(_measurments.size()),
d(_x.degree()),
targetResidual(_targetEps),
maxIterations(_maxItr),
positions(create_positions(_x, _basisType, _measurments.parameterVectors)),
solutions(_measurments.solutions),
outX(_x),
x(_x, 0, P),
rightStack(d, std::vector<Tensor>(N)),
leftIsStack(d, std::vector<Tensor>(N)),
leftOughtStack(d, std::vector<Tensor>(N)),
rankEps(_initalRankEps),
prevRanks(tracking+1, _x.ranks())
{
LOG(uqADF, "Set size: " << _measurments.size());
shuffle_sets();
}
void calc_left_stack(const size_t _position) {
REQUIRE(_position+1 < d, "Invalid corePosition");
if(_position == 0) {
Tensor shuffledX = x.get_component(0);
shuffledX.reinterpret_dimensions({x.dimensions[0], x.rank(0)}); // Remove dangling 1-mode
#pragma omp parallel for
for(size_t j = 0; j < N; ++j) {
// NOTE: leftIsStack[0] is always an identity
contract(leftOughtStack[_position][j], solutions[j], shuffledX, 1);
}
} else { // _position > 0
const Tensor shuffledX = reshuffle(x.get_component(_position), {1, 0, 2});
const size_t tracking = 10;
template<size_t P>
class InternalSolver {
const size_t N;
const size_t d;
const double targetResidual;
const size_t maxRank = 50;
const double minRankEps = 1e-8;
const double epsDecay = 0.8;
const double convergenceFactor = 0.995;
const size_t maxIterations;
const double controlSetFraction = 0.1;
const std::vector<std::vector<Tensor>> positions;
const std::vector<Tensor>& solutions;
TTTensor& outX;
std::vector<std::vector<size_t>> sets;
std::vector<size_t> controlSet;
double optNorm;
double testNorm;
std::vector<double> setNorms = std::vector<double>(P);
double bestTestResidual = std::numeric_limits<double>::max();
internal::BlockTT bestX;
internal::BlockTT x;
std::vector<std::vector<Tensor>> rightStack; // From corePosition 1 to d-1
std::vector<std::vector<Tensor>> leftIsStack;
std::vector<std::vector<Tensor>> leftOughtStack;
double rankEps;
boost::circular_buffer<std::vector<size_t>> prevRanks;
boost::circular_buffer<double> residuals {tracking, std::numeric_limits<double>::max()};
public:
static std::vector<std::vector<Tensor>> create_positions(const TTTensor& _x, const PolynomBasis _basisType, const std::vector<std::vector<double>>& _randomVariables) {
std::vector<std::vector<Tensor>> positions(_x.degree());
for(size_t corePosition = 1; corePosition < _x.degree(); ++corePosition) {
positions[corePosition].reserve(_randomVariables.size());
for(size_t j = 0; j < _randomVariables.size(); ++j) {
positions[corePosition].push_back(polynomial_basis_evaluation(_randomVariables[j][corePosition-1], _basisType, _x.dimensions[corePosition]));
}
}
return positions;
}
void shuffle_sets() {
sets = std::vector<std::vector<size_t>>(P);
controlSet.clear();
std::uniform_real_distribution<double> stochDist(0.0, 1.0);
std::uniform_int_distribution<size_t> setDist(0, P-1);
for(size_t j = 0; j < N; ++j) {
if(stochDist(misc::randomEngine) > controlSetFraction) {
sets[setDist(misc::randomEngine)].push_back(j);
} else {
controlSet.push_back(j);
}
}
calc_solution_norms();
}
void calc_solution_norms() {
optNorm = 0.0;
for(size_t k = 0; k < sets.size(); ++k) {
setNorms[k] = 0.0;
for(const auto j : sets[k]) {
const double sqrNorm = misc::sqr(frob_norm(solutions[j]));
optNorm += sqrNorm;
setNorms[k] += sqrNorm;
}
setNorms[k] = std::sqrt(setNorms[k]);
}
optNorm = std::sqrt(optNorm);
testNorm = 0.0;
for(const auto j : controlSet) {
const double sqrNorm = misc::sqr(frob_norm(solutions[j]));
testNorm += sqrNorm;
}
testNorm = std::sqrt(testNorm);
}
InternalSolver(TTTensor& _x, const UQMeasurementSet& _measurments, const PolynomBasis _basisType, const size_t _maxItr, const double _targetEps, const double _initalRankEps) :
N(_measurments.size()),
d(_x.degree()),
targetResidual(_targetEps),
maxIterations(_maxItr),
positions(create_positions(_x, _basisType, _measurments.parameterVectors)),
solutions(_measurments.solutions),
outX(_x),
x(_x, 0, P),
rightStack(d, std::vector<Tensor>(N)),
leftIsStack(d, std::vector<Tensor>(N)),
leftOughtStack(d, std::vector<Tensor>(N)),
rankEps(_initalRankEps),
prevRanks(tracking+1, _x.ranks())
{
LOG(uqADF, "Set size: " << _measurments.size());
shuffle_sets();
}
void calc_left_stack(const size_t _position) {
REQUIRE(_position+1 < d, "Invalid corePosition");
if(_position == 0) {
Tensor shuffledX = x.get_component(0);
shuffledX.reinterpret_dimensions({x.dimensions[0], x.rank(0)}); // Remove dangling 1-mode
#pragma omp parallel for
for(size_t j = 0; j < N; ++j) {
// NOTE: leftIsStack[0] is always an identity
contract(leftOughtStack[_position][j], solutions[j], shuffledX, 1);
}
} else { // _position > 0
const Tensor shuffledX = reshuffle(x.get_component(_position), {1, 0, 2});
Tensor measCmp, tmp;
#pragma omp parallel for firstprivate(measCmp, tmp)
for(size_t j = 0; j < N; ++j) {
contract(measCmp, positions[_position][j], shuffledX, 1);
if(_position > 1) {
contract(tmp, measCmp, true, leftIsStack[_position-1][j], false, 1);
contract(leftIsStack[_position][j], tmp, measCmp, 1);
} else { // _position == 1
contract(leftIsStack[_position][j], measCmp, true, measCmp, false, 1);
}
contract(leftOughtStack[_position][j], leftOughtStack[_position-1][j], measCmp, 1);
}
}
}
void calc_right_stack(const size_t _position) {
REQUIRE(_position > 0 && _position < d, "Invalid corePosition");
Tensor shuffledX = reshuffle(x.get_component(_position), {1, 0, 2});
if(_position+1 < d) {
Tensor tmp;
#pragma omp parallel for firstprivate(tmp)
for(size_t j = 0; j < N; ++j) {
contract(tmp, positions[_position][j], shuffledX, 1);
contract(rightStack[_position][j], tmp, rightStack[_position+1][j], 1);
}
} else { // _position == d-1
shuffledX.reinterpret_dimensions({shuffledX.dimensions[0], shuffledX.dimensions[1]}); // Remove dangling 1-mode
#pragma omp parallel for
for(size_t j = 0; j < N; ++j) {
contract(rightStack[_position][j], positions[_position][j], shuffledX, 1);
}
}
}
Tensor calculate_delta(const size_t _corePosition, const size_t _setId) const {
REQUIRE(x.corePosition == _corePosition, "IE");
Tensor delta(x.get_core(_setId).dimensions);
Tensor dyadComp, tmp;
if(_corePosition > 0) {
const Tensor shuffledX = reshuffle(x.get_core(_setId), {1, 0, 2});
#pragma omp parallel for firstprivate(dyadComp, tmp)
for(size_t jIdx = 0; jIdx < sets[_setId].size(); ++jIdx) {
const size_t j = sets[_setId][jIdx];
// Calculate common "dyadic part"
Tensor dyadicPart;
if(_corePosition < d-1) {
contract(dyadicPart, positions[_corePosition][j], rightStack[_corePosition+1][j], 0);
} else {
dyadicPart = positions[_corePosition][j];
dyadicPart.reinterpret_dimensions({dyadicPart.dimensions[0], 1}); // Add dangling 1-mode
}
// Calculate "is"
Tensor isPart;
contract(isPart, positions[_corePosition][j], shuffledX, 1);
if(_corePosition < d-1) {
contract(isPart, isPart, rightStack[_corePosition+1][j], 1);
} else {
isPart.reinterpret_dimensions({isPart.dimensions[0]});
}
if(_corePosition > 1) { // NOTE: For _corePosition == 1 leftIsStack is the identity
contract(isPart, leftIsStack[_corePosition-1][j], isPart, 1);
}
// Combine with ought part
contract(dyadComp, isPart - leftOughtStack[_corePosition-1][j], dyadicPart, 0);
#pragma omp critical
{ delta += dyadComp; }
}
} else { // _corePosition == 0
Tensor shuffledX = x.get_core(_setId);
shuffledX.reinterpret_dimensions({shuffledX.dimensions[1], shuffledX.dimensions[2]});
#pragma omp parallel for firstprivate(dyadComp, tmp)
for(size_t jIdx = 0; jIdx < sets[_setId].size(); ++jIdx) {
const size_t j = sets[_setId][jIdx];
contract(dyadComp, shuffledX, rightStack[_corePosition+1][j], 1);
contract(dyadComp, dyadComp - solutions[j], rightStack[_corePosition+1][j], 0);
dyadComp.reinterpret_dimensions({1, dyadComp.dimensions[0], dyadComp.dimensions[1]});
#pragma omp critical
{ delta += dyadComp; }
}
}
return delta;
}
double calculate_norm_A_projGrad(const Tensor& _delta, const size_t _corePosition, const size_t _setId) const {
double norm = 0.0;
Tensor tmp;
if(_corePosition == 0) {
#pragma omp parallel for firstprivate(tmp) reduction(+:norm)
for(size_t jIdx = 0; jIdx < sets[_setId].size(); ++jIdx) {
const size_t j = sets[_setId][jIdx];
contract(tmp, _delta, rightStack[1][j], 1);
const double normPart = misc::sqr(frob_norm(tmp));
norm += normPart;
}
} else { // _corePosition > 0
Tensor shuffledDelta = reshuffle(_delta, {1, 0, 2});
if(_corePosition+1 == d) {
shuffledDelta.reinterpret_dimensions({shuffledDelta.dimensions[0], shuffledDelta.dimensions[1]}); // Remove dangling 1-mode
}
Tensor rightPart;
#pragma omp parallel for firstprivate(tmp, rightPart) reduction(+:norm)
for(size_t jIdx = 0; jIdx < sets[_setId].size(); ++jIdx) {
const size_t j = sets[_setId][jIdx];
// Current node
contract(tmp, positions[_corePosition][j], shuffledDelta, 1);
if(_corePosition+1 < d) {
contract(rightPart, tmp, rightStack[_corePosition+1][j], 1);
} else {
rightPart = tmp;
}
if(_corePosition > 1) {
contract(tmp, rightPart, leftIsStack[_corePosition-1][j], 1);
contract(tmp, tmp, rightPart, 1);
} else { // NOTE: For _corePosition == 1 leftIsStack is the identity
contract(tmp, rightPart, rightPart, 1);
}
REQUIRE(tmp.size == 1, "IE");
norm += tmp[0];
}
}
return std::sqrt(norm);
}
std::tuple<double, double, std::vector<double>> calc_residuals(const size_t _corePosition) const {
REQUIRE(_corePosition == 0, "Invalid corePosition");
// TODO paralell
const auto avgCore = x.get_average_core();
Tensor tmp;
double optResidual = 0.0;
std::vector<double> setResiduals(sets.size(), 0.0);
for(size_t k = 0; k < sets.size(); ++k) {
for(const auto j : sets[k]) {
contract(tmp, avgCore, rightStack[1][j], 1);
tmp.reinterpret_dimensions({x.dimensions[0]});
tmp -= solutions[j];
const double resSqr = misc::sqr(frob_norm(tmp));
optResidual += resSqr;
setResiduals[k] += resSqr;
}
setResiduals[k] = std::sqrt(setResiduals[k])/setNorms[k];
}
optResidual = std::sqrt(optResidual)/optNorm;
double testResidual = 0.0;
for(const auto j : controlSet) {
contract(tmp, avgCore, rightStack[1][j], 1);
tmp.reinterpret_dimensions({x.dimensions[0]});
tmp -= solutions[j];
const double resSqr = misc::sqr(frob_norm(tmp));
testResidual += resSqr;
}
testResidual = std::sqrt(testResidual)/testNorm;
return std::make_tuple(optResidual, testResidual, setResiduals);
}
void update_core(const size_t _corePosition) {
const Index left, right, ext, p;
for(size_t setId = 0; setId < P; ++setId) {
const auto delta = calculate_delta(_corePosition, setId);
const auto normAProjGrad = calculate_norm_A_projGrad(delta, _corePosition, setId);
const value_t PyR = misc::sqr(frob_norm(delta));
// Actual update
x.component(_corePosition)(left, ext, p, right) = x.component(_corePosition)(left, ext, p, right)-((PyR/misc::sqr(normAProjGrad))*delta)(left, ext, right)*Tensor::dirac({P}, setId)(p);
}
}
void finish(const size_t _iteration) {
for(size_t i = 0; i < bestX.degree(); i++) {
if(i == bestX.corePosition) {
outX.set_component(i, bestX.get_average_core());
} else {
outX.set_component(i, bestX.get_component(i));
}
}
LOG(ADF, "Residual decrease from " << std::scientific << 0.0 /* TODO */ << " to " << std::scientific << residuals.back() << " in " << _iteration << " iterations.");
}
void solve() {