coreylowman · coreylowman · Jan 30, 2023 · Jan 26, 2023 · coreylowman · Jan 27, 2023
diff --git a/src/tensor_ops/gelu/gelu.cu b/src/tensor_ops/gelu/gelu.cu
@@ -1,4 +1,6 @@
 #include "unary_op_macros.cuh"
+#define _USE_MATH_DEFINES
+#include <math.h>
 
 struct GeLUKernelOp {};
 
@@ -10,16 +12,16 @@ LONG_UNARY_OP(gelu_forward, gelu_backward, GeLUKernelOp,
 
         float alpha = x + fastCoeff * x_cube;
 
-        float y = 0.5 * x * (1.0 + tanh(M_2_SQRTPI * M_SQRT1_2 * alpha));
+        float y = 0.5 * x * (1.0 + tanhf(M_2_SQRTPI * M_SQRT1_2 * alpha));
         out[i] = y;
     },
     {
-        float kBeta = M_2_SQRTPI * M_SQRT2 * 0.5;                       
+        constexpr float kBeta = M_2_SQRTPI * M_SQRT2 * 0.5;                       
         constexpr float fastCoeff = 0.044715;
         float x_sq = x * x;
         float x_cube = x_sq * x;
         float inner = kBeta * (x + fastCoeff * x_cube);
-        float tanh_inner = tanh(inner);
+        float tanh_inner = tanhf(inner);
 
         float left = 0.5 * x;
         float right = 1.0 + tanh_inner;