Traceback (most recent call last):
File "E:\earthquake\train_V2\train.py", line 321, in <module>
trainOne("D1_TSLANet")
File "E:\earthquake\train_V2\train.py", line 300, in trainOne
doTrain(configs, model_name, train_loader, valid_loader)
File "E:\earthquake\train_V2\train.py", line 192, in doTrain
outputs = model(inputs)
File "C:\Users\DELL\.conda\envs\torchgpu\lib\site-packages\torch\nn\modules\module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "C:\Users\DELL\.conda\envs\torchgpu\lib\site-packages\torch\nn\modules\module.py", line 1527, in _call_impl
return forward_call(*args, **kwargs)
File "E:\earthquake\train_V2\Nets\D1_TSLANet.py", line 172, in forward
x = tsla_blk(x)
File "C:\Users\DELL\.conda\envs\torchgpu\lib\site-packages\torch\nn\modules\module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "C:\Users\DELL\.conda\envs\torchgpu\lib\site-packages\torch\nn\modules\module.py", line 1527, in _call_impl
return forward_call(*args, **kwargs)
File "E:\earthquake\train_V2\Nets\D1_TSLANet.py", line 120, in forward
x = x + self.drop_path(self.icb(self.norm2(self.asb(self.norm1(x)))))
File "C:\Users\DELL\.conda\envs\torchgpu\lib\site-packages\torch\nn\modules\module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "C:\Users\DELL\.conda\envs\torchgpu\lib\site-packages\torch\nn\modules\module.py", line 1527, in _call_impl
return forward_call(*args, **kwargs)
File "E:\earthquake\train_V2\Nets\D1_TSLANet.py", line 92, in forward
freq_mask = self.create_adaptive_high_freq_mask(x_fft)
File "E:\earthquake\train_V2\Nets\D1_TSLANet.py", line 64, in create_adaptive_high_freq_mask
energy = torch.abs(x_fft).pow(2).sum(dim=-1)
RuntimeError:
#ifdef __HIPCC__
#define ERROR_UNSUPPORTED_CAST ;
// corresponds to aten/src/ATen/native/cuda/thread_constants.h
#define CUDA_OR_ROCM_NUM_THREADS 256
// corresponds to aten/src/ATen/cuda/detail/OffsetCalculator.cuh
#define MAX_DIMS 16
#ifndef __forceinline__
#define __forceinline__ inline __attribute__((always_inline))
#endif
#else
//TODO use _assert_fail, because assert is disabled in non-debug builds
#define ERROR_UNSUPPORTED_CAST assert(false);
#define CUDA_OR_ROCM_NUM_THREADS 128
#define MAX_DIMS 25
#endif
#define POS_INFINITY __int_as_float(0x7f800000)
#define INFINITY POS_INFINITY
#define NEG_INFINITY __int_as_float(0xff800000)
#define NAN __int_as_float(0x7fffffff)
typedef long long int int64_t;
typedef unsigned int uint32_t;
typedef signed char int8_t;
typedef unsigned char uint8_t; // NOTE: this MUST be "unsigned char"! "char" is equivalent to "signed char"
typedef short int16_t;
static_assert(sizeof(int64_t) == 8, "expected size does not match");
static_assert(sizeof(uint32_t) == 4, "expected size does not match");
static_assert(sizeof(int8_t) == 1, "expected size does not match");
constexpr int num_threads = CUDA_OR_ROCM_NUM_THREADS;
constexpr int thread_work_size = 4; // TODO: make template substitution once we decide where those vars live
constexpr int block_work_size = thread_work_size * num_threads;
namespace std {
template <class _Tp>
_Tp&& __declval(int);
template <class _Tp>
_Tp __declval(long);
template <class _Tp>
decltype(__declval<_Tp>(0)) declval() noexcept;
template <class _Tp, _Tp __v>
struct integral_constant {
static const _Tp value = __v;
typedef _Tp value_type;
typedef integral_constant type;
};
typedef integral_constant<bool, true> true_type;
typedef integral_constant<bool, false> false_type;
// is_same, functional
template <class _Tp, class _Up> struct is_same : public false_type {};
template <class _Tp> struct is_same<_Tp, _Tp> : public true_type {};
// is_integral, for some types.
template <class _Tp> struct is_integral
: public integral_constant<bool, false> {};
template <> struct is_integral<bool>
: public integral_constant<bool, true> {};
template <> struct is_integral<char>
: public integral_constant<bool, true> {};
template <> struct is_integral<short>
: public integral_constant<bool, true> {};
template <> struct is_integral<int>
: public integral_constant<bool, true> {};
template <> struct is_integral<long>
: public integral_constant<bool, true> {};
template <> struct is_integral<long long>
: public integral_constant<bool, true> {};
// enable_if, functional
template <bool _C, typename _Tp> struct enable_if{};
template <typename _Tp> struct enable_if<true, _Tp>{
using type = _Tp;
};
template <bool b, class T=void>
using enable_if_t = typename enable_if<b,T>::type;
template <class _Tp> struct remove_const {typedef _Tp type;};
template <class _Tp> struct remove_const<const _Tp> {typedef _Tp type;};
template <class _Tp> using remove_const_t = typename remove_const<_Tp>::type;
template <class _Tp> struct remove_volatile {typedef _Tp type;};
template <class _Tp> struct remove_volatile<volatile _Tp> {typedef _Tp type;};
template <class _Tp> using remove_volatile_t = typename remove_volatile<_Tp>::type;
template <class _Tp> struct remove_cv
{typedef typename remove_volatile<typename remove_const<_Tp>::type>::type type;};
template <class _Tp> using remove_cv_t = typename remove_cv<_Tp>::type;
template <class _Tp> struct __libcpp_is_floating_point : public false_type {};
template <> struct __libcpp_is_floating_point<float> : public true_type {};
template <> struct __libcpp_is_floating_point<double> : public true_type {};
template <> struct __libcpp_is_floating_point<long double> : public true_type {};
template <class _Tp> struct is_floating_point
: public __libcpp_is_floating_point<typename remove_cv<_Tp>::type> {};
template <class _Tp> struct is_arithmetic
: public integral_constant<bool, is_integral<_Tp>::value ||
is_floating_point<_Tp>::value> {};
template <class _Tp>
inline constexpr bool is_arithmetic_v = is_arithmetic<_Tp>::value;
template <class _Tp>
struct __numeric_type
{
static void __test(...);
static float __test(float);
static double __test(char);
static double __test(int);
static double __test(unsigned);
static double __test(long);
static double __test(unsigned long);
static double __test(long long);
static double __test(unsigned long long);
static double __test(double);
static long double __test(long double);
typedef decltype(__test(declval<_Tp>())) type;
static const bool value = !is_same<type, void>::value;
};
template <>
struct __numeric_type<void>
{
static const bool value = true;
};
// __promote
template <class _A1, class _A2 = void, class _A3 = void,
bool = __numeric_type<_A1>::value &&
__numeric_type<_A2>::value &&
__numeric_type<_A3>::value>
class __promote_imp
{
public:
static const bool value = false;
};
template <class _A1, class _A2, class _A3>
class __promote_imp<_A1, _A2, _A3, true>
{
private:
typedef typename __promote_imp<_A1>::type __type1;
typedef typename __promote_imp<_A2>::type __type2;
typedef typename __promote_imp<_A3>::type __type3;
public:
typedef decltype(__type1() + __type2() + __type3()) type;
static const bool value = true;
};
template <class _A1, class _A2>
class __promote_imp<_A1, _A2, void, true>
{
private:
typedef typename __promote_imp<_A1>::type __type1;
typedef typename __promote_imp<_A2>::type __type2;
public:
typedef decltype(__type1() + __type2()) type;
static const bool value = true;
};
template <class _A1>
class __promote_imp<_A1, void, void, true>
{
public:
typedef typename __numeric_type<_A1>::type type;
static const bool value = true;
};
template <class _A1, class _A2 = void, class _A3 = void>
class __promote : public __promote_imp<_A1, _A2, _A3> {};
} // namespace std
namespace std {
using ::signbit;
using ::isfinite;
using ::isinf;
using ::isnan;
using ::abs;
using ::acos;
using ::acosf;
using ::asin;
using ::asinf;
using ::atan;
using ::atanf;
using ::atan2;
using ::atan2f;
using ::ceil;
using ::ceilf;
using ::cos;
using ::cosf;
using ::cosh;
using ::coshf;
using ::exp;
using ::expf;
using ::fabs;
using ::fabsf;
using ::floor;
using ::floorf;
using ::fmod;
using ::fmodf;
using ::frexp;
using ::frexpf;
using ::ldexp;
using ::ldexpf;
using ::log;
using ::logf;
using ::log10;
using ::log10f;
using ::modf;
using ::modff;
using ::pow;
using ::powf;
using ::sin;
using ::sinf;
using ::sinh;
using ::sinhf;
using ::sqrt;
using ::sqrtf;
using ::tan;
using ::tanf;
using ::tanh;
using ::tanhf;
using ::acosh;
using ::acoshf;
using ::asinh;
using ::asinhf;
using ::atanh;
using ::atanhf;
using ::cbrt;
using ::cbrtf;
using ::copysign;
using ::copysignf;
using ::erf;
using ::erff;
using ::erfc;
using ::erfcf;
using ::exp2;
using ::exp2f;
using ::expm1;
using ::expm1f;
using ::fdim;
using ::fdimf;
using ::fmaf;
using ::fma;
using ::fmax;
using ::fmaxf;
using ::fmin;
using ::fminf;
using ::hypot;
using ::hypotf;
using ::ilogb;
using ::ilogbf;
using ::lgamma;
using ::lgammaf;
using ::llrint;
using ::llrintf;
using ::llround;
using ::llroundf;
using ::log1p;
using ::log1pf;
using ::log2;
using ::log2f;
using ::logb;
using ::logbf;
using ::lrint;
using ::lrintf;
using ::lround;
using ::lroundf;
using ::nan;
using ::nanf;
using ::nearbyint;
using ::nearbyintf;
using ::nextafter;
using ::nextafterf;
using ::remainder;
using ::remainderf;
using ::remquo;
using ::remquof;
using ::rint;
using ::rintf;
using ::round;
using ::roundf;
using ::scalbln;
using ::scalblnf;
using ::scalbn;
using ::scalbnf;
using ::tgamma;
using ::tgammaf;
using ::trunc;
using ::truncf;
} // namespace std
// NB: Order matters for this macro; it is relied upon in
// _promoteTypesLookup and the serialization format.
// Note, some types have ctype as void because we don't support them in codegen
#define AT_FORALL_SCALAR_TYPES_WITH_COMPLEX(_) \
_(uint8_t, Byte) /* 0 */ \
_(int8_t, Char) /* 1 */ \
_(int16_t, Short) /* 2 */ \
_(int, Int) /* 3 */ \
_(int64_t, Long) /* 4 */ \
_(at::Half, Half) /* 5 */ \
_(float, Float) /* 6 */ \
_(double, Double) /* 7 */ \
_(std::complex<at::Half>, ComplexHalf) /* 8 */ \
_(std::complex<float>, ComplexFloat) /* 9 */ \
_(std::complex<double>, ComplexDouble) /* 10 */ \
_(bool, Bool) /* 11 */ \
_(void, QInt8) /* 12 */ \
_(void, QUInt8) /* 13 */ \
_(void, QInt32) /* 14 */ \
_(at::BFloat16, BFloat16) /* 15 */ \
#define AT_FORALL_SCALAR_TYPES_WITH_COMPLEX_EXCEPT_QINT(_) \
_(uint8_t, Byte) \
_(int8_t, Char) \
_(int16_t, Short) \
_(int, Int) \
_(int64_t, Long) \
_(at::Half, Half) \
_(float, Float) \
_(double, Double) \
_(std::complex<at::Half>, ComplexHalf) \
_(std::complex<float>, ComplexFloat) \
_(std::complex<double>, ComplexDouble) \
_(bool, Bool) \
_(at::BFloat16, BFloat16)
enum class ScalarType : int8_t {
#define DEFINE_ENUM(_1, n) n,
AT_FORALL_SCALAR_TYPES_WITH_COMPLEX(DEFINE_ENUM)
#undef DEFINE_ENUM
Undefined,
NumOptions
};
template <typename T, int size>
struct Array {
T data[size];
__device__ T operator[](int i) const {
return data[i];
}
__device__ T& operator[](int i) {
return data[i];
}
Array() = default;
Array(const Array&) = default;
Array& operator=(const Array&) = default;
__device__ Array(T x) {
for (int i = 0; i < size; i++) {
data[i] = x;
}
}
};
namespace std {
template<class _Tp> class complex;
template<class _Tp> complex<_Tp> operator*(const complex<_Tp>& __z, const complex<_Tp>& __w);
template<class _Tp> complex<_Tp> operator/(const complex<_Tp>& __x, const complex<_Tp>& __y);
template<class _Tp>
class complex
{
public:
typedef _Tp value_type;
private:
value_type __re_;
value_type __im_;
public:
constexpr
complex(const value_type& __re = value_type(), const value_type& __im = value_type())
: __re_(__re), __im_(__im) {}
template<class _Xp> constexpr
complex(const complex<_Xp>& __c)
: __re_(__c.real()), __im_(__c.imag()) {}
constexpr value_type real() const {return __re_;}
constexpr value_type imag() const {return __im_;}
void real(value_type __re) {__re_ = __re;}
void imag(value_type __im) {__im_ = __im;}
constexpr operator bool() const {
return real() || imag();
}
complex& operator= (const value_type& __re)
{__re_ = __re; __im_ = value_type(); return *this;}
complex& operator+=(const value_type& __re) {__re_ += __re; return *this;}
complex& operator-=(const value_type& __re) {__re_ -= __re; return *this;}
complex& operator*=(const value_type& __re) {__re_ *= __re; __im_ *= __re; return *this;}
complex& operator/=(const value_type& __re) {__re_ /= __re; __im_ /= __re; return *this;}
template<class _Xp> complex& operator= (const complex<_Xp>& __c)
{
__re_ = __c.real();
__im_ = __c.imag();
return *this;
}
template<class _Xp> complex& operator+=(const complex<_Xp>& __c)
{
__re_ += __c.real();
__im_ += __c.imag();
return *this;
}
template<class _Xp> complex& operator-=(const complex<_Xp>& __c)
{
__re_ -= __c.real();
__im_ -= __c.imag();
return *this;
}
template<class _Xp> complex& operator*=(const complex<_Xp>& __c)
{
*this = *this * complex(__c.real(), __c.imag());
return *this;
}
template<class _Xp> complex& operator/=(const complex<_Xp>& __c)
{
*this = *this / complex(__c.real(), __c.imag());
return *this;
}
};
template<> class complex<double>;
template<>
class complex<float>
{
float __re_;
float __im_;
public:
typedef float value_type;
constexpr complex(float __re = 0.0f, float __im = 0.0f)
: __re_(__re), __im_(__im) {}
explicit constexpr complex(const complex<double>& __c);
constexpr float real() const {return __re_;}
constexpr float imag() const {return __im_;}
void real(value_type __re) {__re_ = __re;}
void imag(value_type __im) {__im_ = __im;}
constexpr operator bool() const {
return real() || imag();
}
complex& operator= (float __re)
{__re_ = __re; __im_ = value_type(); return *this;}
complex& operator+=(float __re) {__re_ += __re; return *this;}
complex& operator-=(float __re) {__re_ -= __re; return *this;}
complex& operator*=(float __re) {__re_ *= __re; __im_ *= __re; return *this;}
complex& operator/=(float __re) {__re_ /= __re; __im_ /= __re; return *this;}
template<class _Xp> complex& operator= (const complex<_Xp>& __c)
{
__re_ = __c.real();
__im_ = __c.imag();
return *this;
}
template<class _Xp> complex& operator+=(const complex<_Xp>& __c)
{
__re_ += __c.real();
__im_ += __c.imag();
return *this;
}
template<class _Xp> complex& operator-=(const complex<_Xp>& __c)
{
__re_ -= __c.real();
__im_ -= __c.imag();
return *this;
}
template<class _Xp> complex& operator*=(const complex<_Xp>& __c)
{
*this = *this * complex(__c.real(), __c.imag());
return *this;
}
template<class _Xp> complex& operator/=(const complex<_Xp>& __c)
{
*this = *this / complex(__c.real(), __c.imag());
return *this;
}
};
template<>
class complex<double>
{
double __re_;
double __im_;
public:
typedef double value_type;
constexpr complex(double __re = 0.0, double __im = 0.0)
: __re_(__re), __im_(__im) {}
constexpr complex(const complex<float>& __c);
constexpr double real() const {return __re_;}
constexpr double imag() const {return __im_;}
void real(value_type __re) {__re_ = __re;}
void imag(value_type __im) {__im_ = __im;}
constexpr operator bool() const {
return real() || imag();
}
complex& operator= (double __re)
{__re_ = __re; __im_ = value_type(); return *this;}
complex& operator+=(double __re) {__re_ += __re; return *this;}
complex& operator-=(double __re) {__re_ -= __re; return *this;}
complex& operator*=(double __re) {__re_ *= __re; __im_ *= __re; return *this;}
complex& operator/=(double __re) {__re_ /= __re; __im_ /= __re; return *this;}
template<class _Xp> complex& operator= (const complex<_Xp>& __c)
{
__re_ = __c.real();
__im_ = __c.imag();
return *this;
}
template<class _Xp> complex& operator+=(const complex<_Xp>& __c)
{
__re_ += __c.real();
__im_ += __c.imag();
return *this;
}
template<class _Xp> complex& operator-=(const complex<_Xp>& __c)
{
__re_ -= __c.real();
__im_ -= __c.imag();
return *this;
}
template<class _Xp> complex& operator*=(const complex<_Xp>& __c)
{
*this = *this * complex(__c.real(), __c.imag());
return *this;
}
template<class _Xp> complex& operator/=(const complex<_Xp>& __c)
{
*this = *this / complex(__c.real(), __c.imag());
return *this;
}
};
inline
constexpr
complex<float>::complex(const complex<double>& __c)
: __re_(__c.real()), __im_(__c.imag()) {}
inline
constexpr
complex<double>::complex(const complex<float>& __c)
: __re_(__c.real()), __im_(__c.imag()) {}
// 26.3.6 operators:
template<class _Tp>
inline
complex<_Tp>
operator+(const complex<_Tp>& __x, const complex<_Tp>& __y)
{
complex<_Tp> __t(__x);
__t += __y;
return __t;
}
template<class _Tp>
inline
complex<_Tp>
operator+(const complex<_Tp>& __x, const _Tp& __y)
{
complex<_Tp> __t(__x);
__t += __y;
return __t;
}
template<class _Tp>
inline
complex<_Tp>
operator+(const _Tp& __x, const complex<_Tp>& __y)
{
complex<_Tp> __t(__y);
__t += __x;
return __t;
}
template<class _Tp>
inline
complex<_Tp>
operator-(const complex<_Tp>& __x, const complex<_Tp>& __y)
{
complex<_Tp> __t(__x);
__t -= __y;
return __t;
}
template<class _Tp>
inline
complex<_Tp>
operator-(const complex<_Tp>& __x, const _Tp& __y)
{
complex<_Tp> __t(__x);
__t -= __y;
return __t;
}
template<class _Tp>
inline
complex<_Tp>
operator-(const _Tp& __x, const complex<_Tp>& __y)
{
complex<_Tp> __t(-__y);
__t += __x;
return __t;
}
template<class _Tp>
complex<_Tp>
operator*(const complex<_Tp>& __z, const complex<_Tp>& __w)
{
_Tp __a = __z.real();
_Tp __b = __z.imag();
_Tp __c = __w.real();
_Tp __d = __w.imag();
_Tp __ac = __a * __c;
_Tp __bd = __b * __d;
_Tp __ad = __a * __d;
_Tp __bc = __b * __c;
_Tp __x = __ac - __bd;
_Tp __y = __ad + __bc;
if (isnan(__x) && isnan(__y))
{
bool __recalc = false;
if (isinf(__a) || isinf(__b))
{
__a = copysign(isinf(__a) ? _Tp(1) : _Tp(0), __a);
__b = copysign(isinf(__b) ? _Tp(1) : _Tp(0), __b);
if (isnan(__c))
__c = copysign(_Tp(0), __c);
if (isnan(__d))
__d = copysign(_Tp(0), __d);
__recalc = true;
}
if (isinf(__c) || isinf(__d))
{
__c = copysign(isinf(__c) ? _Tp(1) : _Tp(0), __c);
__d = copysign(isinf(__d) ? _Tp(1) : _Tp(0), __d);
if (isnan(__a))
__a = copysign(_Tp(0), __a);
if (isnan(__b))
__b = copysign(_Tp(0), __b);
__recalc = true;
}
if (!__recalc && (isinf(__ac) || isinf(__bd) ||
isinf(__ad) || isinf(__bc)))
{
if (isnan(__a))
__a = copysign(_Tp(0), __a);
if (isnan(__b))
__b = copysign(_Tp(0), __b);
if (isnan(__c))
__c = copysign(_Tp(0), __c);
if (isnan(__d))
__d = copysign(_Tp(0), __d);
__recalc = true;
}
if (__recalc)
{
__x = _Tp(INFINITY) * (__a * __c - __b * __d);
__y = _Tp(INFINITY) * (__a * __d + __b * __c);
}
}
return complex<_Tp>(__x, __y);
}
template<class _Tp>
inline
complex<_Tp>
operator*(const complex<_Tp>& __x, const _Tp& __y)
{
complex<_Tp> __t(__x);
__t *= __y;
return __t;
}
template<class _Tp>
inline
complex<_Tp>
operator*(const _Tp& __x, const complex<_Tp>& __y)
{
complex<_Tp> __t(__y);
__t *= __x;
return __t;
}
template<class _Tp>
complex<_Tp>
operator/(const complex<_Tp>& __z, const complex<_Tp>& __w)
{
int __ilogbw = 0;
_Tp __a = __z.real();
_Tp __b = __z.imag();
_Tp __c = __w.real();
_Tp __d = __w.imag();
_Tp __logbw = logb(fmax(fabs(__c), fabs(__d)));
if (isfinite(__logbw))
{
__ilogbw = static_cast<int>(__logbw);
__c = scalbn(__c, -__ilogbw);
__d = scalbn(__d, -__ilogbw);
}
_Tp __denom = __c * __c + __d * __d;
_Tp __x = scalbn((__a * __c + __b * __d) / __denom, -__ilogbw);
_Tp __y = scalbn((__b * __c - __a * __d) / __denom, -__ilogbw);
if (isnan(__x) && isnan(__y))
{
if ((__denom == _Tp(0)) && (!isnan(__a) || !isnan(__b)))
{
__x = copysign(_Tp(INFINITY), __c) * __a;
__y = copysign(_Tp(INFINITY), __c) * __b;
}
else if ((isinf(__a) || isinf(__b)) && isfinite(__c) && isfinite(__d))
{
__a = copysign(isinf(__a) ? _Tp(1) : _Tp(0), __a);
__b = copysign(isinf(__b) ? _Tp(1) : _Tp(0), __b);
__x = _Tp(INFINITY) * (__a * __c + __b * __d);
__y = _Tp(INFINITY) * (__b * __c - __a * __d);
}
else if (isinf(__logbw) && __logbw > _Tp(0) && isfinite(__a) && isfinite(__b))
{
__c = copysign(isinf(__c) ? _Tp(1) : _Tp(0), __c);
__d = copysign(isinf(__d) ? _Tp(1) : _Tp(0), __d);
__x = _Tp(0) * (__a * __c + __b * __d);
__y = _Tp(0) * (__b * __c - __a * __d);
}
}
return complex<_Tp>(__x, __y);
}
template<class _Tp>
inline
complex<_Tp>
operator/(const complex<_Tp>& __x, const _Tp& __y)
{
return complex<_Tp>(__x.real() / __y, __x.imag() / __y);
}
template<class _Tp>
inline
complex<_Tp>
operator/(const _Tp& __x, const complex<_Tp>& __y)
{
complex<_Tp> __t(__x);
__t /= __y;
return __t;
}
template<class _Tp>
inline
complex<_Tp>
operator+(const complex<_Tp>& __x)
{
return __x;
}
template<class _Tp>
inline
complex<_Tp>
operator-(const complex<_Tp>& __x)
{
return complex<_Tp>(-__x.real(), -__x.imag());
}
template<class _Tp>
inline constexpr
bool
operator==(const complex<_Tp>& __x, const complex<_Tp>& __y)
{
return __x.real() == __y.real() && __x.imag() == __y.imag();
}
template<class _Tp>
inline constexpr
bool
operator==(const complex<_Tp>& __x, const _Tp& __y)
{
return __x.real() == __y && __x.imag() == 0;
}
template<class _Tp>
inline constexpr
bool
operator==(const _Tp& __x, const complex<_Tp>& __y)
{
return __x == __y.real() && 0 == __y.imag();
}
template<class _Tp>
inline constexpr
bool
operator!=(const complex<_Tp>& __x, const complex<_Tp>& __y)
{
return !(__x == __y);
}
template<class _Tp>
inline constexpr
bool
operator!=(const complex<_Tp>& __x, const _Tp& __y)
{
return !(__x == __y);
}
template<class _Tp>
inline constexpr
bool
operator!=(const _Tp& __x, const complex<_Tp>& __y)
{
return !(__x == __y);
}
template<class _Tp>
inline constexpr
bool
operator&&(const complex<_Tp>& __x, const complex<_Tp>& __y)
{
return bool(__x) && bool(__y);
}
template<class _Tp>
inline constexpr
bool
isnan(const complex<_Tp>& __x)
{
return isnan(__x.real()) || isnan(__x.imag());
}
template<class _Tp>
inline constexpr
bool
operator||(const complex<_Tp>& __x, const complex<_Tp>& __y)
{
return bool(__x) || bool(__y);
}
// 26.3.7 values:
template <class _Tp, bool = is_integral<_Tp>::value,
bool = is_floating_point<_Tp>::value
>
struct __libcpp_complex_overload_traits {};
// Integral Types
template <class _Tp>
struct __libcpp_complex_overload_traits<_Tp, true, false>
{
typedef double _ValueType;
typedef complex<double> _ComplexType;
};
// Floating point types
template <class _Tp>
struct __libcpp_complex_overload_traits<_Tp, false, true>
{
typedef _Tp _ValueType;
typedef complex<_Tp> _ComplexType;
};
// real
template<class _Tp>
inline constexpr
_Tp
real(const complex<_Tp>& __c)
{
return __c.real();
}
template <class _Tp>
inline constexpr
typename __libcpp_complex_overload_traits<_Tp>::_ValueType
real(_Tp __re)
{
return __re;
}
// imag
template<class _Tp>
inline constexpr
_Tp
imag(const complex<_Tp>& __c)
{
return __c.imag();
}
template <class _Tp>
inline constexpr
typename __libcpp_complex_overload_traits<_Tp>::_ValueType
imag(_Tp)
{
return 0;
}
// abs
template<class _Tp>
inline
_Tp
abs(const complex<_Tp>& __c)
{
return hypot(__c.real(), __c.imag());
}
// arg
template<class _Tp>
inline
_Tp
arg(const complex<_Tp>& __c)
{
return atan2(__c.imag(), __c.real());
}
template<class _Tp>
inline
typename enable_if
<
is_integral<_Tp>::value || is_same<_Tp, double>::value,
double
>::type
arg(_Tp __re)
{
return atan2(0., __re);
}
template <class _Tp>
inline
typename enable_if<
is_same<_Tp, float>::value,
float
>::type
arg(_Tp __re)
{
return atan2f(0.F, __re);
}
}
namespace std {
// norm
template<class _Tp>
inline
_Tp
norm(const complex<_Tp>& __c)
{
if (isinf(__c.real()))
return abs(__c.real());
if (isinf(__c.imag()))
return abs(__c.imag());
return __c.real() * __c.real() + __c.imag() * __c.imag();
}
template <class _Tp>
inline
typename __libcpp_complex_overload_traits<_Tp>::_ValueType
norm(_Tp __re)
{
typedef typename __libcpp_complex_overload_traits<_Tp>::_ValueType _ValueType;
return static_cast<_ValueType>(__re) * __re;
}
// conj
template<class _Tp>
inline
complex<_Tp>
conj(const complex<_Tp>& __c)
{
return complex<_Tp>(__c.real(), -__c.imag());
}
template <class _Tp>
inline
typename __libcpp_complex_overload_traits<_Tp>::_ComplexType
conj(_Tp __re)
{
typedef typename __libcpp_complex_overload_traits<_Tp>::_ComplexType _ComplexType;
return _ComplexType(__re);
}
// proj
template<class _Tp>
inline
complex<_Tp>
proj(const complex<_Tp>& __c)
{
complex<_Tp> __r = __c;
if (isinf(__c.real()) || isinf(__c.imag()))
__r = complex<_Tp>(INFINITY, copysign(_Tp(0), __c.imag()));
return __r;
}
template <class _Tp>
inline
typename enable_if
<
is_floating_point<_Tp>::value,
typename __libcpp_complex_overload_traits<_Tp>::_ComplexType
>::type
proj(_Tp __re)
{
if (isinf(__re))
__re = abs(__re);
return complex<_Tp>(__re);
}
template <class _Tp>
inline
typename enable_if
<
is_integral<_Tp>::value,
typename __libcpp_complex_overload_traits<_Tp>::_ComplexType
>::type
proj(_Tp __re)
{
typedef typename __libcpp_complex_overload_traits<_Tp>::_ComplexType _ComplexType;
return _ComplexType(__re);
}
// polar
template<class _Tp>
complex<_Tp>
polar(const _Tp& __rho, const _Tp& __theta = _Tp())
{
if (isnan(__rho) || signbit(__rho))
return complex<_Tp>(_Tp(NAN), _Tp(NAN));
if (isnan(__theta))
{
if (isinf(__rho))
return complex<_Tp>(__rho, __theta);
return complex<_Tp>(__theta, __theta);
}
if (isinf(__theta))
{
if (isinf(__rho))
return complex<_Tp>(__rho, _Tp(NAN));
return complex<_Tp>(_Tp(NAN), _Tp(NAN));
}
_Tp __x = __rho * cos(__theta);
if (isnan(__x))
__x = 0;
_Tp __y = __rho * sin(__theta);
if (isnan(__y))
__y = 0;
return complex<_Tp>(__x, __y);
}
// log
template<class _Tp>
inline
complex<_Tp>
log(const complex<_Tp>& __x)
{
return complex<_Tp>(log(abs(__x)), arg(__x));
}
// log10
template<class _Tp>
inline
complex<_Tp>
log10(const complex<_Tp>& __x)
{
return log(__x) / log(_Tp(10));
}
// log2
template<class _Tp>
inline
complex<_Tp>
log2(const complex<_Tp>& __x)
{
return log(__x) / log(_Tp(2));
}
// sqrt
template<class _Tp>
complex<_Tp>
sqrt(const complex<_Tp>& __x)
{
if (isinf(__x.imag()))
return complex<_Tp>(_Tp(INFINITY), __x.imag());
if (isinf(__x.real()))
{
if (__x.real() > _Tp(0))
return complex<_Tp>(__x.real(), isnan(__x.imag()) ? __x.imag() : copysign(_Tp(0), __x.imag()));
return complex<_Tp>(isnan(__x.imag()) ? __x.imag() : _Tp(0), copysign(__x.real(), __x.imag()));
}
return polar(sqrt(abs(__x)), arg(__x) / _Tp(2));
}
// exp
template<class _Tp>
complex<_Tp>
exp(const complex<_Tp>& __x)
{
_Tp __i = __x.imag();
if (__i == 0) {
return complex<_Tp>(exp(__x.real()), copysign(_Tp(0), __x.imag()));
}
if (isinf(__x.real()))
{
if (__x.real() < _Tp(0))
{
if (!isfinite(__i))
__i = _Tp(1);
}
else if (__i == 0 || !isfinite(__i))
{
if (isinf(__i))
__i = _Tp(NAN);
return complex<_Tp>(__x.real(), __i);
}
}
_Tp __e = exp(__x.real());
return complex<_Tp>(__e * cos(__i), __e * sin(__i));
}
// pow
template<class _Tp>
inline
complex<_Tp>
pow(const complex<_Tp>& __x, const complex<_Tp>& __y)
{
return exp(__y * log(__x));
}
template<class _Tp, class _Up>
inline
complex<typename __promote<_Tp, _Up>::type>
pow(const complex<_Tp>& __x, const complex<_Up>& __y)
{
typedef complex<typename __promote<_Tp, _Up>::type> result_type;
return std::pow(result_type(__x), result_type(__y));
}
template<class _Tp, class _Up>
inline
typename enable_if
<
is_arithmetic<_Up>::value,
complex<typename __promote<_Tp, _Up>::type>
>::type
pow(const complex<_Tp>& __x, const _Up& __y)
{
typedef complex<typename __promote<_Tp, _Up>::type> result_type;
return std::pow(result_type(__x), result_type(__y));
}
template<class _Tp, class _Up>
inline
typename enable_if
<
is_arithmetic<_Tp>::value,
complex<typename __promote<_Tp, _Up>::type>
>::type
pow(const _Tp& __x, const complex<_Up>& __y)
{
typedef complex<typename __promote<_Tp, _Up>::type> result_type;
return std::pow(result_type(__x), result_type(__y));
}
// __sqr, computes pow(x, 2)
template<class _Tp>
inline
complex<_Tp>
__sqr(const complex<_Tp>& __x)
{
return complex<_Tp>((__x.real() - __x.imag()) * (__x.real() + __x.imag()),
_Tp(2) * __x.real() * __x.imag());
}
// asinh
template<class _Tp>
complex<_Tp>
asinh(const complex<_Tp>& __x)
{
const _Tp __pi(atan2(+0., -0.));
if (isinf(__x.real()))
{
if (isnan(__x.imag()))
return __x;
if (isinf(__x.imag()))
return complex<_Tp>(__x.real(), copysign(__pi * _Tp(0.25), __x.imag()));
return complex<_Tp>(__x.real(), copysign(_Tp(0), __x.imag()));
}
if (isnan(__x.real()))
{
if (isinf(__x.imag()))
return complex<_Tp>(__x.imag(), __x.real());
if (__x.imag() == 0)
return __x;
return complex<_Tp>(__x.real(), __x.real());
}
if (isinf(__x.imag()))
return complex<_Tp>(copysign(__x.imag(), __x.real()), copysign(__pi/_Tp(2), __x.imag()));
complex<_Tp> __z = log(__x + sqrt(__sqr(__x) + _Tp(1)));
return complex<_Tp>(copysign(__z.real(), __x.real()), copysign(__z.imag(), __x.imag()));
}
// acosh
template<class _Tp>
complex<_Tp>
acosh(const complex<_Tp>& __x)
{
const _Tp __pi(atan2(+0., -0.));
if (isinf(__x.real()))
{
if (isnan(__x.imag()))
return complex<_Tp>(abs(__x.real()), __x.imag());
if (isinf(__x.imag()))
{
if (__x.real() > 0)
return complex<_Tp>(__x.real(), copysign(__pi * _Tp(0.25), __x.imag()));
else
return complex<_Tp>(-__x.real(), copysign(__pi * _Tp(0.75), __x.imag()));
}
if (__x.real() < 0)
return complex<_Tp>(-__x.real(), copysign(__pi, __x.imag()));
return complex<_Tp>(__x.real(), copysign(_Tp(0), __x.imag()));
}
if (isnan(__x.real()))
{
if (isinf(__x.imag()))
return complex<_Tp>(abs(__x.imag()), __x.real());
return complex<_Tp>(__x.real(), __x.real());
}
if (isinf(__x.imag()))
return complex<_Tp>(abs(__x.imag()), copysign(__pi/_Tp(2), __x.imag()));
complex<_Tp> __z = log(__x + sqrt(__sqr(__x) - _Tp(1)));
return complex<_Tp>(copysign(__z.real(), _Tp(0)), copysign(__z.imag(), __x.imag()));
}
// atanh
template<class _Tp>
complex<_Tp>
atanh(const complex<_Tp>& __x)
{
const _Tp __pi(atan2(+0., -0.));
if (isinf(__x.imag()))
{
return complex<_Tp>(copysign(_Tp(0), __x.real()), copysign(__pi/_Tp(2), __x.imag()));
}
if (isnan(__x.imag()))
{
if (isinf(__x.real()) || __x.real() == 0)
return complex<_Tp>(copysign(_Tp(0), __x.real()), __x.imag());
return complex<_Tp>(__x.imag(), __x.imag());
}
if (isnan(__x.real()))
{
return complex<_Tp>(__x.real(), __x.real());
}
if (isinf(__x.real()))
{
return complex<_Tp>(copysign(_Tp(0), __x.real()), copysign(__pi/_Tp(2), __x.imag()));
}
if (abs(__x.real()) == _Tp(1) && __x.imag() == _Tp(0))
{
return complex<_Tp>(copysign(_Tp(INFINITY), __x.real()), copysign(_Tp(0), __x.imag()));
}
complex<_Tp> __z = log((_Tp(1) + __x) / (_Tp(1) - __x)) / _Tp(2);
return complex<_Tp>(copysign(__z.real(), __x.real()), copysign(__z.imag(), __x.imag()));
}
// sinh
template<class _Tp>
complex<_Tp>
sinh(const complex<_Tp>& __x)
{
if (isinf(__x.real()) && !isfinite(__x.imag()))
return complex<_Tp>(__x.real(), _Tp(NAN));
if (__x.real() == 0 && !isfinite(__x.imag()))
return complex<_Tp>(__x.real(), _Tp(NAN));
if (__x.imag() == 0 && !isfinite(__x.real()))
return __x;
return complex<_Tp>(sinh(__x.real()) * cos(__x.imag()), cosh(__x.real()) * sin(__x.imag()));
}
// cosh
template<class _Tp>
complex<_Tp>
cosh(const complex<_Tp>& __x)
{
if (isinf(__x.real()) && !isfinite(__x.imag()))
return complex<_Tp>(abs(__x.real()), _Tp(NAN));
if (__x.real() == 0 && !isfinite(__x.imag()))
return complex<_Tp>(_Tp(NAN), __x.real());
if (__x.real() == 0 && __x.imag() == 0)
return complex<_Tp>(_Tp(1), __x.imag());
if (__x.imag() == 0 && !isfinite(__x.real()))
return complex<_Tp>(abs(__x.real()), __x.imag());
return complex<_Tp>(cosh(__x.real()) * cos(__x.imag()), sinh(__x.real()) * sin(__x.imag()));
}
// tanh
template<class _Tp>
complex<_Tp>
tanh(const complex<_Tp>& __x)
{
if (isinf(__x.real()))
{
if (!isfinite(__x.imag()))
return complex<_Tp>(copysign(_Tp(1), __x.real()), _Tp(0));
return complex<_Tp>(copysign(_Tp(1), __x.real()), copysign(_Tp(0), sin(_Tp(2) * __x.imag())));
}
if (isnan(__x.real()) && __x.imag() == 0)
return __x;
_Tp __2r(_Tp(2) * __x.real());
_Tp __2i(_Tp(2) * __x.imag());
_Tp __d(cosh(__2r) + cos(__2i));
_Tp __2rsh(sinh(__2r));
if (isinf(__2rsh) && isinf(__d))
return complex<_Tp>(__2rsh > _Tp(0) ? _Tp(1) : _Tp(-1),
__2i > _Tp(0) ? _Tp(0) : _Tp(-0.));
return complex<_Tp>(__2rsh/__d, sin(__2i)/__d);
}
// asin
template<class _Tp>
complex<_Tp>
asin(const complex<_Tp>& __x)
{
complex<_Tp> __z = asinh(complex<_Tp>(-__x.imag(), __x.real()));
return complex<_Tp>(__z.imag(), -__z.real());
}
// acos
template<class _Tp>
complex<_Tp>
acos(const complex<_Tp>& __x)
{
const _Tp __pi(atan2(+0., -0.));
if (isinf(__x.real()))
{
if (isnan(__x.imag()))
return complex<_Tp>(__x.imag(), __x.real());
if (isinf(__x.imag()))
{
if (__x.real() < _Tp(0))
return complex<_Tp>(_Tp(0.75) * __pi, -__x.imag());
return complex<_Tp>(_Tp(0.25) * __pi, -__x.imag());
}
if (__x.real() < _Tp(0))
return complex<_Tp>(__pi, signbit(__x.imag()) ? -__x.real() : __x.real());
return complex<_Tp>(_Tp(0), signbit(__x.imag()) ? __x.real() : -__x.real());
}
if (isnan(__x.real()))
{
if (isinf(__x.imag()))
return complex<_Tp>(__x.real(), -__x.imag());
return complex<_Tp>(__x.real(), __x.real());
}
if (isinf(__x.imag()))
return complex<_Tp>(__pi/_Tp(2), -__x.imag());
if (__x.real() == 0 && (__x.imag() == 0 || isnan(__x.imag())))
return complex<_Tp>(__pi/_Tp(2), -__x.imag());
complex<_Tp> __z = log(__x + sqrt(__sqr(__x) - _Tp(1)));
if (signbit(__x.imag()))
return complex<_Tp>(abs(__z.imag()), abs(__z.real()));
return complex<_Tp>(abs(__z.imag()), -abs(__z.real()));
}
// atan
template<class _Tp>
complex<_Tp>
atan(const complex<_Tp>& __x)
{
complex<_Tp> __z = atanh(complex<_Tp>(-__x.imag(), __x.real()));
return complex<_Tp>(__z.imag(), -__z.real());
}
// sin
template<class _Tp>
complex<_Tp>
sin(const complex<_Tp>& __x)
{
complex<_Tp> __z = sinh(complex<_Tp>(-__x.imag(), __x.real()));
return complex<_Tp>(__z.imag(), -__z.real());
}
// cos
template<class _Tp>
inline
complex<_Tp>
cos(const complex<_Tp>& __x)
{
return cosh(complex<_Tp>(-__x.imag(), __x.real()));
}
// tan
template<class _Tp>
complex<_Tp>
tan(const complex<_Tp>& __x)
{
complex<_Tp> __z = tanh(complex<_Tp>(-__x.imag(), __x.real()));
return complex<_Tp>(__z.imag(), -__z.real());
}
// Literal suffix for complex number literals [complex.literals]
inline namespace literals
{
inline namespace complex_literals
{
constexpr complex<double> operator""i(long double __im)
{
return { 0.0, static_cast<double>(__im) };
}
constexpr complex<double> operator""i(unsigned long long __im)
{
return { 0.0, static_cast<double>(__im) };
}
constexpr complex<float> operator""if(long double __im)
{
return { 0.0f, static_cast<float>(__im) };
}
constexpr complex<float> operator""if(unsigned long long __im)
{
return { 0.0f, static_cast<float>(__im) };
}
} // namespace complex_literals
} // namespace literals
} // namespace std
namespace c10 {
template <typename T>
struct LoadImpl {
__device__ static T apply(const void *src) {
return *reinterpret_cast<const T*>(src);
}
};
template <>
struct LoadImpl<bool> {
__device__ static bool apply(const void *src) {
static_assert(sizeof(bool) == sizeof(char), "");
return LoadImpl<char>::apply(src);
}
};
template <typename T>
__device__ T load(const void *src) {
return LoadImpl<T>::apply(src);
}
template <typename scalar_t>
__device__ scalar_t load(const scalar_t *src) {
return LoadImpl<scalar_t>::apply(src);
}
} // namespace c10
template <typename scalar_t>
__device__ __inline__ scalar_t load(char* base_ptr, uint32_t offset) {
return c10::load(reinterpret_cast<scalar_t*>(base_ptr) + offset);
}
template<typename scalar_t>
__device__ __inline__ void store(scalar_t value, char *base_ptr, uint32_t offset) {
*(reinterpret_cast<scalar_t *>(base_ptr) + offset) = value;
}
// aligned vector generates vectorized load/store on CUDA
template<typename scalar_t, int vec_size>
struct alignas(sizeof(scalar_t) * vec_size) aligned_vector {
scalar_t val[vec_size];
};
template <int vec_size, typename scalar_t>
__device__ aligned_vector<scalar_t, vec_size> load_vector(const scalar_t *base_ptr, uint32_t offset) {
using vec_t = aligned_vector<scalar_t, vec_size>;
auto *from = reinterpret_cast<const vec_t *>(base_ptr);
return from[offset];
}
template <int vec_size>
__device__ aligned_vector<bool, vec_size> load_vector(const bool *base_ptr, uint32_t offset) {
// See NOTE [Loading boolean values]
auto tmp = load_vector<vec_size>(reinterpret_cast<const uint8_t*>(base_ptr), offset);
aligned_vector<bool, vec_size> ret;
for (int i = 0; i < vec_size; ++i) {
ret.val[i] = bool(tmp.val[i]);
}
return ret;
}
template <typename T> T abs_kernel(T x) { return std::abs(x); }
// TODO: setup grid-stride loop
extern "C" __global__
void abs_kernel_vectorized4_kernel(
const int N,
Array<char*, 1+1> data,
std::complex<float> scalar_val) //[1+1],
{
constexpr int vec_size = 4;
using scalar_t = std::complex<float>;
int remaining = N - block_work_size * blockIdx.x;
int thread_idx = threadIdx.x;
int idx = blockIdx.x;
std::complex<float> arg0[4];
std::complex<float> out0[4];
if (remaining < block_work_size) {
#pragma unroll
for (int j = 0; j < thread_work_size; j++){
if (thread_idx >= remaining) {
break;
}
int linear_idx = thread_idx + block_work_size * idx;
arg0[j] = load<std::complex<float>>(data[1], linear_idx);
thread_idx += num_threads;
}
#pragma unroll
for (int j = 0; j < thread_work_size; j++) {
if ((threadIdx.x + j*num_threads) < remaining) {
out0[j] = abs_kernel<std::complex<float>>(arg0[j] );
}
}
thread_idx = threadIdx.x;
#pragma unroll
for (int j = 0; j < thread_work_size; j++) {
if (thread_idx >= remaining) {
break;
}
int linear_idx = thread_idx + block_work_size * idx;
store<std::complex<float>>(out0[j], data[0], linear_idx);
thread_idx += num_threads;
}
} else {
static constexpr int loop_size = thread_work_size / vec_size;
//actual loading
auto * input0 = reinterpret_cast<const scalar_t*>(data[0+1]) + block_work_size * idx;
#pragma unroll
for (int i = 0; i<loop_size; i++){
const auto vec0 = load_vector<vec_size>(input0, thread_idx);
#pragma unroll
for (int j=0; j < vec_size; j++){
arg0[vec_size * i + j] = vec0.val[j];
}
thread_idx += num_threads;
}
#pragma unroll
for (int j = 0; j < thread_work_size; j++) {
out0[j] = abs_kernel<std::complex<float>>(arg0[j] );
}
using vec_t_output = aligned_vector<std::complex<float>, vec_size>;
vec_t_output* to_0 = reinterpret_cast<vec_t_output*>(data[0]) + block_work_size / vec_size * idx;
int thread_idx = threadIdx.x;
#pragma unroll
for (int i = 0; i<loop_size; i++){
vec_t_output v;
#pragma unroll
for (int j=0; j<vec_size; j++){
v.val[j] = out0[vec_size * i + j];
}
to_0[thread_idx] = v;
thread_idx += num_threads;
}
}
}
nvrtc: error: failed to open nvrtc-builtins64_121.dll.
Make sure that nvrtc-builtins64_121.dll is installed correctly.
分析这个报错,用中文回答
最新发布