1 #ifndef STAN_MATH_REV_MAT_FUN_LOG_SOFTMAX_HPP
2 #define STAN_MATH_REV_MAT_FUN_LOG_SOFTMAX_HPP
18 class log_softmax_elt_vari :
public vari {
26 log_softmax_elt_vari(
double val,
28 const double* softmax_alpha,
33 softmax_alpha_(softmax_alpha),
38 for (
int m = 0; m <
size_; ++m) {
40 alpha_[m]->adj_ += adj_ * (1 - softmax_alpha_[m]);
42 alpha_[m]->adj_ -= adj_ * softmax_alpha_[m];
60 inline Eigen::Matrix<var, Eigen::Dynamic, 1>
61 log_softmax(
const Eigen::Matrix<var, Eigen::Dynamic, 1>& alpha) {
67 if (alpha.size() == 0)
69 "must have size > 0");
70 if (alpha.size() == 0)
72 "must have size > 0");
73 if (alpha.size() == 0)
75 "must have size > 0");
79 =
reinterpret_cast<vari**
>
80 (vari::operator
new(
sizeof(
vari*) * alpha.size()));
81 for (
int i = 0; i < alpha.size(); ++i)
82 alpha_vi_array[i] = alpha(i).vi_;
85 Matrix<double, Dynamic, 1> alpha_d(alpha.size());
86 for (
int i = 0; i < alpha_d.size(); ++i)
87 alpha_d(i) = alpha(i).val();
92 Matrix<double, Dynamic, 1> softmax_alpha_d(alpha_d.size());
93 Matrix<double, Dynamic, 1> log_softmax_alpha_d(alpha_d.size());
95 double max_v = alpha_d.maxCoeff();
98 for (
int i = 0; i < alpha_d.size(); ++i) {
99 softmax_alpha_d(i) =
std::exp(alpha_d(i) - max_v);
100 sum += softmax_alpha_d(i);
103 for (
int i = 0; i < alpha_d.size(); ++i)
104 softmax_alpha_d(i) /=
sum;
107 for (
int i = 0; i < alpha_d.size(); ++i)
108 log_softmax_alpha_d(i) = (alpha_d(i) - max_v) - log_sum;
112 double* softmax_alpha_d_array
113 =
reinterpret_cast<double*
>
114 (vari::operator
new(
sizeof(double) * alpha_d.size()));
116 for (
int i = 0; i < alpha_d.size(); ++i)
117 softmax_alpha_d_array[i] = softmax_alpha_d(i);
119 Matrix<var, Dynamic, 1> log_softmax_alpha(alpha.size());
120 for (
int k = 0; k < log_softmax_alpha.size(); ++k)
122 =
var(
new log_softmax_elt_vari(log_softmax_alpha_d[k],
124 softmax_alpha_d_array,
127 return log_softmax_alpha;
fvar< T > sum(const std::vector< fvar< T > > &m)
Return the sum of the entries of the specified standard vector.
const double * softmax_alpha_
fvar< T > log(const fvar< T > &x)
The variable implementation base class.
Independent (input) and dependent (output) variables for gradients.
Eigen::Matrix< fvar< T >, Eigen::Dynamic, 1 > log_softmax(const Eigen::Matrix< fvar< T >, Eigen::Dynamic, 1 > &alpha)
bool check_nonzero_size(const char *function, const char *name, const T_y &y)
Return true if the specified matrix/vector is of non-zero size.
fvar< T > exp(const fvar< T > &x)
void domain_error(const char *function, const char *name, const T &y, const char *msg1, const char *msg2)
Throw a domain error with a consistently formatted message.
int size(const std::vector< T > &x)
Return the size of the specified standard vector.