Skip to content

Commit

Permalink
Fix complex support.
Browse files Browse the repository at this point in the history
And enable complex tests. Fixes #6.
  • Loading branch information
devinamatthews committed Jun 15, 2017
2 parents 1b7bcc3 + 5853789 commit 237c870
Show file tree
Hide file tree
Showing 15 changed files with 135 additions and 166 deletions.
8 changes: 4 additions & 4 deletions src/configs/configs.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,10 +44,10 @@ struct blocksize
template <typename T> len_type extent() const { return _extent[type_idx<T>::value]; }

template <template <typename> class BS, typename T> blocksize(const BS<T>&)
: _def {BS<float>::def, BS<double>::def, BS<double>::def, BS<double>::def},
_max {BS<float>::max, BS<double>::max, BS<double>::max, BS<double>::max},
_iota {BS<float>::iota, BS<double>::iota, BS<double>::iota, BS<double>::iota},
_extent{BS<float>::extent, BS<double>::extent, BS<double>::extent, BS<double>::extent} {}
: _def {BS<float>::def, BS<double>::def, BS<scomplex>::def, BS<dcomplex>::def},
_max {BS<float>::max, BS<double>::max, BS<scomplex>::max, BS<dcomplex>::max},
_iota {BS<float>::iota, BS<double>::iota, BS<scomplex>::iota, BS<dcomplex>::iota},
_extent{BS<float>::extent, BS<double>::extent, BS<scomplex>::extent, BS<dcomplex>::extent} {}
};

template <template <typename> class ukr_t>
Expand Down
Binary file modified src/external/stl_ext/.git.bak
Binary file not shown.
25 changes: 12 additions & 13 deletions src/external/stl_ext/include/complex.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,19 +12,6 @@ using std::complex;
using std::real;
using std::imag;

inline float conj( float x) { return x; }
inline double conj( double x) { return x; }
inline long double conj(long double x) { return x; }
inline double conj( bool x) { return x; }
inline double conj( char x) { return x; }
inline double conj( char16_t x) { return x; }
inline double conj( char32_t x) { return x; }
inline double conj( wchar_t x) { return x; }
inline double conj( short x) { return x; }
inline double conj( int x) { return x; }
inline double conj( long x) { return x; }
inline double conj( long long x) { return x; }

template <typename T> struct real_type { typedef T type; };
template <typename T> struct real_type<complex<T>> { typedef T type; };
template <typename T>
Expand All @@ -48,6 +35,18 @@ using enable_if_not_complex = enable_if<!is_complex<T>::value,U>;
template <typename T, typename U=void>
using enable_if_not_complex_t = typename enable_if_not_complex<T,U>::type;

template <typename T>
enable_if_complex_t<T,T> conj(T x)
{
return {x.real(), -x.imag()};
}

template <typename T>
enable_if_not_complex_t<T,T> conj(T x)
{
return x;
}

template <typename T>
enable_if_complex_t<T,real_type_t<T>> norm2(T x)
{
Expand Down
2 changes: 1 addition & 1 deletion src/external/stl_ext/include/iostream.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -540,7 +540,7 @@ namespace detail

double l = log10(fabs(p.val));
auto d = lrint(l < 0 ? l-1 : l);
if (std::abs(d) > 2)
if (fabs(d) > 2)
{
os << std::scientific << std::setprecision(p.sigfigs) << p.val;
}
Expand Down
Binary file modified src/external/tci/.git.bak
Binary file not shown.
40 changes: 24 additions & 16 deletions src/external/tci/tci/communicator.c
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,22 @@ void tci_distribute(unsigned n, unsigned idx, uint64_t range,
}
}

void tci_distribute_2d(unsigned num, unsigned idx,
uint64_t range_m, uint64_t range_n,
uint64_t granularity_m, uint64_t granularity_n,
uint64_t* first_m, uint64_t* last_m, uint64_t* max_m,
uint64_t* first_n, uint64_t* last_n, uint64_t* max_n)
{
unsigned m, n;
tci_partition_2x2(num, range_m, num, range_n, num, &m, &n);

unsigned idx_m = idx % m;
unsigned idx_n = idx / m;

tci_distribute(m, idx_m, range_m, granularity_m, first_m, last_m, max_m);
tci_distribute(n, idx_n, range_n, granularity_n, first_n, last_n, max_n);
}

void tci_comm_distribute_over_gangs(tci_comm* comm, uint64_t range,
uint64_t granularity, uint64_t* first,
uint64_t* last, uint64_t* max)
Expand All @@ -207,14 +223,10 @@ void tci_comm_distribute_over_gangs_2d(tci_comm* comm,
uint64_t* first_m, uint64_t* last_m, uint64_t* max_m,
uint64_t* first_n, uint64_t* last_n, uint64_t* max_n)
{
unsigned m, n;
tci_partition_2x2(comm->ngang, range_m, range_n, &m, &n);

unsigned idx_m = comm->gid % m;
unsigned idx_n = comm->gid / m;

tci_distribute(m, idx_m, range_m, granularity_m, first_m, last_m, max_m);
tci_distribute(n, idx_n, range_n, granularity_n, first_n, last_n, max_n);
tci_distribute_2d(comm->ngang, comm->gid, range_m, range_n,
granularity_m, granularity_n,
first_m, last_m, max_m,
first_n, last_n, max_n);
}

void tci_comm_distribute_over_threads_2d(tci_comm* comm,
Expand All @@ -223,12 +235,8 @@ void tci_comm_distribute_over_threads_2d(tci_comm* comm,
uint64_t* first_m, uint64_t* last_m, uint64_t* max_m,
uint64_t* first_n, uint64_t* last_n, uint64_t* max_n)
{
unsigned m, n;
tci_partition_2x2(comm->nthread, range_m, range_n, &m, &n);

unsigned idx_m = comm->tid % m;
unsigned idx_n = comm->tid / m;

tci_distribute(m, idx_m, range_m, granularity_m, first_m, last_m, max_m);
tci_distribute(n, idx_n, range_n, granularity_n, first_n, last_n, max_n);
tci_distribute_2d(comm->nthread, comm->tid, range_m, range_n,
granularity_m, granularity_n,
first_m, last_m, max_m,
first_n, last_n, max_n);
}
31 changes: 31 additions & 0 deletions src/external/tci/tci/communicator.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,12 @@ void tci_distribute(unsigned n, unsigned idx, uint64_t range,
uint64_t granularity, uint64_t* first, uint64_t* last,
uint64_t* max);

void tci_distribute_2d(unsigned num, unsigned idx,
uint64_t range_m, uint64_t range_n,
uint64_t granularity_m, uint64_t granularity_n,
uint64_t* first_m, uint64_t* last_m, uint64_t* max_m,
uint64_t* first_n, uint64_t* last_n, uint64_t* max_n);

void tci_comm_distribute_over_gangs(tci_comm* comm, uint64_t range,
uint64_t granularity, uint64_t* first,
uint64_t* last, uint64_t* max);
Expand Down Expand Up @@ -164,6 +170,31 @@ class communicator
return child;
}

static std::tuple<uint64_t,uint64_t,uint64_t>
distribute(unsigned nthread, unsigned tid, uint64_t range,
uint64_t granularity=1)
{
uint64_t first, last, max;
tci_distribute(nthread, tid, range, granularity,
&first, &last, &max);
return std::make_tuple(first, last, max);
}

static std::tuple<uint64_t,uint64_t,uint64_t,uint64_t,uint64_t,uint64_t>
distribute_2d(unsigned nthread, unsigned tid,
uint64_t range_m, uint64_t range_n,
uint64_t granularity_m=1,
uint64_t granularity_n=1)
{
uint64_t first_m, last_m, max_m, first_n, last_n, max_n;
tci_distribute_2d(nthread, tid, range_m, range_n,
granularity_m, granularity_n,
&first_m, &last_m, &max_m,
&first_n, &last_n, &max_n);
return std::make_tuple(first_m, last_m, max_m,
first_n, last_n, max_n);
}

std::tuple<uint64_t,uint64_t,uint64_t>
distribute_over_gangs(uint64_t range, uint64_t granularity=1) const
{
Expand Down
26 changes: 19 additions & 7 deletions src/external/tci/tci/parallel.c
Original file line number Diff line number Diff line change
Expand Up @@ -205,17 +205,23 @@ static int ipow(int base, int power)

#endif

void tci_partition_2x2(unsigned nthread, uint64_t work1, uint64_t work2,
void tci_partition_2x2(unsigned nthread,
uint64_t work1, unsigned max1,
uint64_t work2, unsigned max2,
unsigned* nt1, unsigned* nt2)
{
*nt1 = *nt2 = 1;

if (nthread < 4)
{
if (work1 >= work2)
if (work1 >= work2 && max1 >= nthread)
{
*nt1 = nthread;
*nt2 = 1;
}
else
{
*nt1 = 1;
*nt2 = nthread;
}
return;
}

Expand All @@ -224,21 +230,27 @@ void tci_partition_2x2(unsigned nthread, uint64_t work1, uint64_t work2,

#if !TCI_USE_EXPENSIVE_PARTITION

unsigned num1 = 1;
unsigned num2 = 1;

unsigned f;
while ((f = tci_next_prime_factor(&factors)) > 1)
{
if (work1 > work2)
if ((work1 > work2 && num1*f <= max1) || num2*f > max2)
{
work1 /= f;
*nt1 *= f;
num1 *= f;
}
else
{
work2 /= f;
*nt2 *= f;
num2 *= f;
}
}

*nt1 = num1;
*nt2 = num2;

#else

/*
Expand Down
18 changes: 15 additions & 3 deletions src/external/tci/tci/parallel.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,9 @@ void tci_prime_factorization(unsigned n, tci_prime_factors* factors);

unsigned tci_next_prime_factor(tci_prime_factors* factors);

void tci_partition_2x2(unsigned nthread, uint64_t work1, uint64_t work2,
void tci_partition_2x2(unsigned nthread,
uint64_t work1, unsigned max1,
uint64_t work2, unsigned max2,
unsigned* nt1, unsigned* nt2);

#ifdef __cplusplus
Expand All @@ -47,7 +49,7 @@ void parallelize(Body&& body, unsigned nthread, unsigned arity=0)
tci_parallelize(
[](tci_comm* comm, void* data)
{
Body& body = *static_cast<Body*>(data);
Body body = *static_cast<Body*>(data);
body(*reinterpret_cast<communicator*>(comm));
},
static_cast<void*>(&body), nthread, arity);
Expand All @@ -74,7 +76,17 @@ inline std::pair<unsigned,unsigned>
partition_2x2(unsigned nthreads, uint64_t work1, uint64_t work2)
{
unsigned nt1, nt2;
tci_partition_2x2(nthreads, work1, work2, &nt1, &nt2);
tci_partition_2x2(nthreads, work1, nthreads, work2, nthreads, &nt1, &nt2);
return std::make_pair(nt1, nt2);
}

inline std::pair<unsigned,unsigned>
partition_2x2(unsigned nthreads,
uint64_t work1, unsigned max1,
uint64_t work2, unsigned max2)
{
unsigned nt1, nt2;
tci_partition_2x2(nthreads, work1, max1, work2, max2, &nt1, &nt2);
return std::make_pair(nt1, nt2);
}

Expand Down
6 changes: 5 additions & 1 deletion src/iface/1t/reduce.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,14 @@ void tblis_tensor_reduce(const tblis_comm* comm, const tblis_config* cfg,
result->get<T>() = conj(result->get<T>());
}

if (op == REDUCE_SUM || op == REDUCE_SUM_ABS || op == REDUCE_NORM_2)
if (op == REDUCE_SUM)
{
result->get<T>() *= A->alpha<T>();
}
else if (op == REDUCE_SUM_ABS || op == REDUCE_NORM_2)
{
result->get<T>() *= std::abs(A->alpha<T>());
}
})
}

Expand Down
Loading

0 comments on commit 237c870

Please sign in to comment.