diff --git a/stan/math/opencl/kernels/multinomial_logit_glm_lpmf.hpp b/stan/math/opencl/kernels/multinomial_logit_glm_lpmf.hpp
new file mode 100644
index 00000000000..20d512642c1
--- /dev/null
+++ b/stan/math/opencl/kernels/multinomial_logit_glm_lpmf.hpp
@@ -0,0 +1,141 @@
+#ifndef STAN_MATH_OPENCL_KERNELS_MULTINOMIAL_LOGIT_GLM_LPMF_HPP
+#define STAN_MATH_OPENCL_KERNELS_MULTINOMIAL_LOGIT_GLM_LPMF_HPP
+#ifdef STAN_OPENCL
+
+#include <stan/math/opencl/kernel_cl.hpp>
+#include <string>
+
+namespace stan {
+namespace math {
+namespace opencl_kernels {
+
+// \cond
+static constexpr const char* multinomial_logit_glm_kernel_code = STRINGIFY(
+    // \endcond
+    /** \ingroup opencl_kernels
+     * GPU kernel for the Generalized Linear Model (GLM) with multinomial
+     * distribution and softmax (logit) link function.
+     *
+     * Must be run with at least N_instances threads and local size LOCAL_SIZE_.
+     * Each thread handles one instance n = gid.
+     *
+     * The kernel performs two passes over the K classes for instance n:
+     *   1. find max(eta[n,:]) for numerical stability,
+     *   2. accumulate sum_exp, S_n, and logp using shifted eta
+     *      (eta[n,k] - max) to avoid catastrophic cancellation; skips
+     *      skips y_nk=0 terms to implement the 0*log(0)=0 convention;
+     *      if need_delta, stash exp(eta[n,k] - max) into delta_global.
+     * A final loop normalizes delta (if need_delta) and subtracts
+     * lgamma(y_nk+1) terms (if need_logp_gamma), reading only y_global
+     * and delta_global, without re-reading x_beta_global or alpha_global.
+     *
+     * @param[out] logp_global partial logp sums, one per work group
+     * @param[out] delta_global residual matrix N_instances x N_classes
+     * (col-major)
+     * @param[in] y_global outcome counts, N_instances x N_classes (col-major)
+     * @param[in] x_beta_global product x*beta, N_instances x N_classes
+     * (col-major)
+     * @param[in] alpha_global intercepts: K values if is_alpha_vector, else
+     *   N_instances x N_classes (col-major)
+     * @param N_instances number of instances
+     * @param N_classes number of outcome classes
+     * @param is_alpha_vector 1 if alpha is shared 1xK row, 0 if NxK
+     * @param need_delta 1 if delta_global should be computed and written
+     * @param need_logp_gamma 1 if lgamma terms should be included in logp
+     */
+    __kernel void multinomial_logit_glm(
+        __global double* logp_global, __global double* delta_global,
+        const __global int* y_global, const __global double* x_beta_global,
+        const __global double* alpha_global, const int N_instances,
+        const int N_classes, const int is_alpha_vector, const int need_delta,
+        const int need_logp_gamma) {
+      const int gid = get_global_id(0);
+      const int lid = get_local_id(0);
+      const int lsize = get_local_size(0);
+      const int wg_id = get_group_id(0);
+
+      __local double local_storage[LOCAL_SIZE_];
+
+      double logp = 0;
+      if (gid < N_instances) {
+        // Pass 1: row-wise max of eta for numerical stability.
+        double eta_max = -INFINITY;
+        for (int k = 0; k < N_classes; k++) {
+          int nk = k * N_instances + gid;
+          int alpha_idx = is_alpha_vector ? k : nk;
+          double eta_k = x_beta_global[nk] + alpha_global[alpha_idx];
+          if (eta_k > eta_max)
+            eta_max = eta_k;
+        }
+
+        // Pass 2: sum_exp, S_n, logp; if need_delta stash exp_k in
+        // delta_global.
+        double sum_exp = 0;
+        int S_n = 0;
+        for (int k = 0; k < N_classes; k++) {
+          int nk = k * N_instances + gid;
+          int alpha_idx = is_alpha_vector ? k : nk;
+          double shifted_eta_k
+              = x_beta_global[nk] + alpha_global[alpha_idx] - eta_max;
+          double exp_k = exp(shifted_eta_k);
+          sum_exp += exp_k;
+          int y_nk = y_global[nk];
+          S_n += y_nk;
+          if (y_nk != 0)
+            logp += y_nk * shifted_eta_k;
+          if (need_delta)
+            delta_global[nk] = exp_k;
+        }
+        logp -= S_n * log(sum_exp);
+
+        if (need_logp_gamma)
+          logp += lgamma(S_n + 1.0);
+
+        // Normalize delta and/or subtract lgamma(y_nk+1) in one pass.
+        if (need_delta || need_logp_gamma) {
+          for (int k = 0; k < N_classes; k++) {
+            int nk = k * N_instances + gid;
+            int y_nk = y_global[nk];
+            if (need_logp_gamma)
+              logp -= lgamma(y_nk + 1.0);
+            if (need_delta)
+              delta_global[nk] = y_nk - S_n * delta_global[nk] / sum_exp;
+          }
+        }
+      }
+
+      // Work-group reduction of logp.
+      local_storage[lid] = logp;
+      barrier(CLK_LOCAL_MEM_FENCE);
+      for (int step = lsize / REDUCTION_STEP_SIZE; step > 0;
+           step /= REDUCTION_STEP_SIZE) {
+        if (lid < step) {
+          for (int i = 1; i < REDUCTION_STEP_SIZE; i++) {
+            local_storage[lid] += local_storage[lid + step * i];
+          }
+        }
+        barrier(CLK_LOCAL_MEM_FENCE);
+      }
+      if (lid == 0) {
+        logp_global[wg_id] = local_storage[0];
+      }
+    }
+    // \cond
+);
+// \endcond
+
+/** \ingroup opencl_kernels
+ * See the docs for \link kernels/multinomial_logit_glm_lpmf.hpp
+ * multinomial_logit_glm() \endlink
+ */
+const kernel_cl<out_buffer, out_buffer, in_buffer, in_buffer, in_buffer, int,
+                int, int, int, int>
+    multinomial_logit_glm("multinomial_logit_glm",
+                          {multinomial_logit_glm_kernel_code},
+                          {{"REDUCTION_STEP_SIZE", 4}, {"LOCAL_SIZE_", 64}});
+
+}  // namespace opencl_kernels
+}  // namespace math
+}  // namespace stan
+#endif
+#endif
diff --git a/stan/math/opencl/prim.hpp b/stan/math/opencl/prim.hpp
index 4ae8ca1649c..2d3d2ef93ef 100644
--- a/stan/math/opencl/prim.hpp
+++ b/stan/math/opencl/prim.hpp
@@ -195,6 +195,7 @@
 #include <stan/math/opencl/prim/neg_binomial_2_lpmf.hpp>
 #include <stan/math/opencl/prim/neg_binomial_2_log_lpmf.hpp>
 #include <stan/math/opencl/prim/neg_binomial_2_log_glm_lpmf.hpp>
+#include <stan/math/opencl/prim/multinomial_logit_glm_lpmf.hpp>
 #include <stan/math/opencl/prim/normal_id_glm_lpdf.hpp>
 #include <stan/math/opencl/prim/normal_cdf.hpp>
 #include <stan/math/opencl/prim/normal_lccdf.hpp>
diff --git a/stan/math/opencl/prim/multinomial_logit_glm_lpmf.hpp b/stan/math/opencl/prim/multinomial_logit_glm_lpmf.hpp
new file mode 100644
index 00000000000..e521c929524
--- /dev/null
+++ b/stan/math/opencl/prim/multinomial_logit_glm_lpmf.hpp
@@ -0,0 +1,146 @@
+#ifndef STAN_MATH_OPENCL_PRIM_MULTINOMIAL_LOGIT_GLM_LPMF_HPP
+#define STAN_MATH_OPENCL_PRIM_MULTINOMIAL_LOGIT_GLM_LPMF_HPP
+#ifdef STAN_OPENCL
+
+#include <stan/math/opencl/prim/size.hpp>
+#include <stan/math/opencl/rev/operands_and_partials.hpp>
+#include <stan/math/opencl/matrix_cl.hpp>
+#include <stan/math/opencl/copy.hpp>
+#include <stan/math/opencl/prim/multiply.hpp>
+#include <stan/math/opencl/kernel_generator.hpp>
+#include <stan/math/opencl/kernels/multinomial_logit_glm_lpmf.hpp>
+
+#include <stan/math/prim/meta.hpp>
+#include <stan/math/prim/err.hpp>
+#include <stan/math/prim/fun/eval.hpp>
+#include <stan/math/prim/fun/sum.hpp>
+#include <stan/math/prim/fun/Eigen.hpp>
+
+#include <vector>
+
+namespace stan {
+namespace math {
+
+/** \ingroup opencl
+ * Returns the log PMF of the Generalized Linear Model (GLM)
+ * with multinomial distribution and softmax (logit) link function.
+ * This is an OpenCL overload of
+ * `prim/prob/multinomial_logit_glm_lpmf.hpp`.
+ * Alpha can be either a shared 1×K row vector or an N×K per-instance matrix.
+ *
+ * @tparam T_x type of the design matrix (N×M kernel expression)
+ * @tparam T_alpha type of the intercept (1×K or N×K kernel expression)
+ * @tparam T_beta type of the weight matrix (M×K kernel expression)
+ * @param y outcome count vectors: `y[n]` is a length-K vector of non-negative
+ *   integer counts for instance n
+ * @param x design matrix (N×M) on OpenCL device
+ * @param alpha intercept: 1×K broadcast row or N×K per-instance matrix
+ * @param beta weight matrix (M×K) on OpenCL device
+ * @return log sum of multinomial log PMFs over all N instances
+ * @throw std::domain_error if any element of x or beta is infinite or NaN,
+ * or if alpha contains `+inf` or NaN (`-inf` forces the corresponding softmax
+ * probability to zero and is allowed)
+ * @throw std::domain_error if any count in y is negative
+ * @throw std::invalid_argument if container sizes mismatch
+ */
+template <bool propto, typename T_x, typename T_alpha, typename T_beta,
+          require_all_prim_or_rev_kernel_expression_t<T_x, T_alpha,
+                                                      T_beta>* = nullptr>
+inline return_type_t<T_x, T_alpha, T_beta> multinomial_logit_glm_lpmf(
+    const std::vector<std::vector<int>>& y, const T_x& x, const T_alpha& alpha,
+    const T_beta& beta) {
+  using T_partials_return = partials_return_t<T_x, T_alpha, T_beta>;
+  static constexpr const char* function = "multinomial_logit_glm_lpmf";
+
+  const int N_instances = x.rows();
+  const int N_classes = beta.cols();
+
+  check_size_match(function, "Rows of", "x", N_instances, "size of", "y",
+                   y.size());
+  check_size_match(function, "Columns of", "beta", N_classes, "columns of",
+                   "alpha", alpha.cols());
+  check_size_match(function, "Columns of", "x", x.cols(), "rows of", "beta",
+                   beta.rows());
+
+  const int alpha_rows = alpha.rows();
+  const bool is_alpha_vector = alpha_rows == 1;
+  if (!is_alpha_vector) {
+    check_size_match(function, "Rows of", "alpha", alpha_rows, "rows of", "x",
+                     N_instances);
+  }
+
+  if (N_instances == 0) {
+    return 0;
+  }
+  for (int n = 0; n < N_instances; ++n) {
+    check_size_match(function, "Size of outcome vector", y[n].size(),
+                     "number of classes", N_classes);
+    check_nonnegative(function, "outcome counts", y[n]);
+  }
+
+  if constexpr (!include_summand<propto, T_x, T_alpha, T_beta>::value) {
+    return 0;
+  }
+
+  // Flatten nested y into an N×K matrix for upload.
+  Eigen::MatrixXi y_mat(N_instances, N_classes);
+  for (int n = 0; n < N_instances; ++n)
+    for (int k = 0; k < N_classes; ++k)
+      y_mat(n, k) = y[n][k];
+  matrix_cl<int> y_cl(y_mat);
+
+  const auto& x_val = eval(value_of(x));
+  const auto& alpha_val = eval(value_of(alpha));
+  const auto& beta_val = eval(value_of(beta));
+
+  matrix_cl<double> x_beta_cl = x_val * beta_val;
+
+  const int local_size
+      = opencl_kernels::multinomial_logit_glm.get_option("LOCAL_SIZE_");
+  const int wgs = (N_instances + local_size - 1) / local_size;
+
+  constexpr bool need_delta = is_any_autodiff_v<T_x, T_alpha, T_beta>;
+
+  matrix_cl<double> logp_cl(wgs, 1);
+  matrix_cl<double> delta_cl(0, 0);
+  if constexpr (need_delta)
+    delta_cl = matrix_cl<double>(N_instances, N_classes);
+
+  try {
+    opencl_kernels::multinomial_logit_glm(
+        cl::NDRange(local_size * wgs), cl::NDRange(local_size), logp_cl,
+        delta_cl, y_cl, x_beta_cl, alpha_val, N_instances, N_classes,
+        is_alpha_vector, need_delta, !propto);
+  } catch (const cl::Error& e) {
+    check_opencl_error(function, e);
+  }
+
+  T_partials_return logp = sum(from_matrix_cl(logp_cl));
+
+  if (!std::isfinite(logp)) {
+    check_cl(function, "Design matrix", x_val, "finite") = isfinite(x_val);
+    check_cl(function, "Intercept", alpha_val, "finite") = isfinite(alpha_val);
+    check_cl(function, "Weight matrix", beta_val, "finite")
+        = isfinite(beta_val);
+  }
+
+  auto ops_partials = make_partials_propagator(x, alpha, beta);
+  if constexpr (need_delta) {
+    // dlogp/deta[n,k] = delta[n,k]; chain rule: dx = delta*beta^T,
+    // dalpha = delta (or colwise_sum when 1xK), dbeta = x^T*delta.
+    if constexpr (is_autodiff_v<T_x>)
+      partials<0>(ops_partials) = delta_cl * transpose(beta_val);
+    if constexpr (is_autodiff_v<T_alpha>)
+      partials<1>(ops_partials)
+          = is_alpha_vector ? colwise_sum(delta_cl) : delta_cl;
+    if constexpr (is_autodiff_v<T_beta>)
+      partials<2>(ops_partials) = transpose(x_val) * delta_cl;
+  }
+  return ops_partials.build(logp);
+}
+
+}  // namespace math
+}  // namespace stan
+
+#endif
+#endif
diff --git a/stan/math/prim/prob.hpp b/stan/math/prim/prob.hpp
index 4fae64c1ace..671de48fdaa 100644
--- a/stan/math/prim/prob.hpp
+++ b/stan/math/prim/prob.hpp
@@ -179,6 +179,7 @@
 #include <stan/math/prim/prob/multi_student_t_cholesky_rng.hpp>
 #include <stan/math/prim/prob/multi_student_t_lpdf.hpp>
 #include <stan/math/prim/prob/multi_student_t_rng.hpp>
+#include <stan/math/prim/prob/multinomial_logit_glm_lpmf.hpp>
 #include <stan/math/prim/prob/multinomial_logit_lpmf.hpp>
 #include <stan/math/prim/prob/multinomial_logit_rng.hpp>
 #include <stan/math/prim/prob/multinomial_lpmf.hpp>
diff --git a/stan/math/prim/prob/multinomial_logit_glm_lpmf.hpp b/stan/math/prim/prob/multinomial_logit_glm_lpmf.hpp
new file mode 100644
index 00000000000..0734a2c91de
--- /dev/null
+++ b/stan/math/prim/prob/multinomial_logit_glm_lpmf.hpp
@@ -0,0 +1,196 @@
+#ifndef STAN_MATH_PRIM_PROB_MULTINOMIAL_LOGIT_GLM_LPMF_HPP
+#define STAN_MATH_PRIM_PROB_MULTINOMIAL_LOGIT_GLM_LPMF_HPP
+
+#include <stan/math/prim/meta.hpp>
+#include <stan/math/prim/err.hpp>
+#include <stan/math/prim/fun/exp.hpp>
+#include <stan/math/prim/fun/lgamma.hpp>
+#include <stan/math/prim/fun/lmultiply.hpp>
+#include <stan/math/prim/fun/Eigen.hpp>
+#include <stan/math/prim/fun/size_zero.hpp>
+#include <stan/math/prim/fun/to_matrix.hpp>
+#include <stan/math/prim/fun/to_ref.hpp>
+#include <stan/math/prim/fun/value_of.hpp>
+#include <stan/math/prim/functor/partials_propagator.hpp>
+#include <cmath>
+#include <vector>
+
+namespace stan {
+namespace math {
+
+/** \ingroup multivar_dists
+ * Returns the log PMF of the Generalized Linear Model (GLM)
+ * with multinomial distribution and softmax (logit) link function.
+ * Efficiently computes
+ * \f$\sum_n \mathrm{multinomial\_logit\_lpmf}(y_n \mid \eta_n)\f$
+ * with \f$\eta_n = x_n \beta + \alpha_n\f$ and analytically derived gradients.
+ *
+ * Writing \f$S_n = \sum_k y_{nk}\f$ for the per-instance count total and
+ * \f$p_{nk} = \mathrm{softmax}(\eta_n)_k
+ *   = \exp(\eta_{nk}) / \sum_{k'} \exp(\eta_{nk'})\f$
+ * for the category probabilities, the log PMF is
+ *
+ * \f[
+ * \log p(y\,|\,x,\alpha,\beta) = \sum_{n=1}^N \left[
+ *   \log\Gamma(S_n + 1) - \sum_{k=1}^K \log\Gamma(y_{nk} + 1)
+ *   + \sum_{k=1}^K y_{nk}\,\log p_{nk}
+ * \right].
+ * \f]
+ *
+ * Defining the residual
+ * \f$\delta_{nk} = y_{nk} - S_n\,p_{nk}\f$
+ * (so that \f$\partial\log p/\partial\eta_{nk} = \delta_{nk}\f$), the
+ * gradients are
+ *
+ * \f[
+ * \frac{\partial\log p}{\partial\alpha_{nk}} = \delta_{nk},
+ * \f]
+ *
+ * \f[
+ * \frac{\partial\log p}{\partial\beta_{mk}} = \sum_n x_{nm}\,\delta_{nk},
+ * \f]
+ *
+ * \f[
+ * \frac{\partial\log p}{\partial x_{nm}} = \sum_k \delta_{nk}\,\beta_{mk}.
+ * \f]
+ *
+ * When \f$\alpha\f$ is broadcast (a single \f$1\times K\f$ row used for all
+ * instances), \f$\partial\log p/\partial\alpha_k = \sum_n \delta_{nk}\f$.
+ *
+ * @tparam T_x type of the design matrix (N x M)
+ * @tparam T_alpha type of the intercept; either a row vector (1 x K) broadcast
+ * over all instances, or a matrix (N x K) with per-instance intercepts.
+ * Analogous to a scalar vs vector alpha in univariate GLMs: the K dimension
+ * corresponds to the class (output) space, not the instance dimension.
+ * @tparam T_beta type of the weight matrix (M x K)
+ * @param y outcome count vectors: `y[n]` is a length-K vector of non-negative
+ * integer counts for instance n; the counts need not sum to a fixed total
+ * @param x design matrix (N x M)
+ * @param alpha intercept; row vector (1 x K) broadcast across all instances,
+ * or matrix (N x K) with one bias row per instance
+ * @param beta weight matrix (M x K)
+ * @return log sum of multinomial log PMFs over all N instances
+ * @throw std::domain_error if any element of x or beta is infinite or NaN, or
+ * if alpha contains `+inf` or NaN (`-inf` forces the corresponding softmax
+ * probability to zero and is allowed; if all classes have `-inf` alpha for
+ * some instance but that instance has nonzero counts, the result is undefined),
+ * or if any count in y is negative
+ * @throw std::invalid_argument if container sizes mismatch
+ */
+template <bool propto, typename T_x, typename T_alpha, typename T_beta,
+          require_matrix_t<T_x>* = nullptr,
+          require_matrix_t<T_alpha>* = nullptr,
+          require_matrix_t<T_beta>* = nullptr>
+inline return_type_t<T_x, T_alpha, T_beta> multinomial_logit_glm_lpmf(
+    const std::vector<std::vector<int>>& y, const T_x& x, const T_alpha& alpha,
+    const T_beta& beta) {
+  using T_partials_return = partials_return_t<T_x, T_alpha, T_beta>;
+  using Eigen::Array;
+  using Eigen::Dynamic;
+  using T_x_ref = ref_type_if_not_constant_t<T_x>;
+  using T_alpha_ref = ref_type_if_not_constant_t<T_alpha>;
+  using T_beta_ref = ref_type_if_not_constant_t<T_beta>;
+  constexpr int T_alpha_rows = T_alpha::RowsAtCompileTime;
+  constexpr bool need_delta = is_any_autodiff_v<T_x, T_alpha, T_beta>;
+
+  const size_t N_instances = x.rows();
+  const size_t N_classes = beta.cols();
+
+  static constexpr const char* function = "multinomial_logit_glm_lpmf";
+  check_size_match(function, "Rows of outcome vectors", y.size(),
+                   "number of instances", N_instances);
+  check_size_match(function, "Columns of intercept", alpha.cols(),
+                   "number of classes", N_classes);
+  if constexpr (T_alpha_rows != 1) {
+    check_size_match(function, "Rows of intercept", alpha.rows(),
+                     "rows of design matrix", x.rows());
+  }
+  check_size_match(function, "Columns of design matrix", x.cols(),
+                   "rows of weight matrix", beta.rows());
+
+  if (size_zero(y)) {
+    return 0;
+  }
+  for (size_t n = 0; n < N_instances; ++n) {
+    check_size_match(function, "Size of outcome vector", y[n].size(),
+                     "number of classes", N_classes);
+    check_nonnegative(function, "outcome counts", y[n]);
+  }
+
+  if constexpr (!include_summand<propto, T_x, T_alpha, T_beta>::value) {
+    return 0;
+  }
+
+  T_x_ref x_ref = x;
+  T_alpha_ref alpha_ref = alpha;
+  T_beta_ref beta_ref = beta;
+
+  const auto& x_val = to_ref_if<is_autodiff_v<T_beta>>(value_of(x_ref));
+  const auto& beta_val = to_ref_if<is_autodiff_v<T_x>>(value_of(beta_ref));
+
+  Array<T_partials_return, Dynamic, Dynamic> eta;
+  if constexpr (T_alpha_rows == 1) {
+    eta = ((x_val * beta_val).rowwise() + value_of(alpha_ref)).array();
+  } else {
+    eta = (x_val * beta_val + value_of(alpha_ref)).array();
+  }
+
+  // Row-max shift for numerical stability; cancels in log-softmax.
+  const Array<T_partials_return, Dynamic, Dynamic> shifted_eta
+      = (eta.colwise() - eta.rowwise().maxCoeff()).eval();
+  const Array<T_partials_return, Dynamic, Dynamic> exp_eta = exp(shifted_eta);
+  const Array<T_partials_return, Dynamic, Dynamic> softmax_mat
+      = exp_eta.colwise() / exp_eta.rowwise().sum();
+
+  const Array<double, Dynamic, Dynamic> y_mat = to_matrix(y).array();
+  const Array<double, Dynamic, 1> instance_totals = y_mat.rowwise().sum();
+
+  // lmultiply implements 0*log(0)=0: classes with softmax=0 and y=0 contribute
+  // 0.
+  T_partials_return logp = lmultiply(y_mat, softmax_mat).sum();
+  if constexpr (include_summand<propto>::value) {
+    logp += lgamma(instance_totals + 1.0).sum() - lgamma(y_mat + 1.0).sum();
+  }
+
+  if (!std::isfinite(logp)) {
+    check_finite(function, "Weight matrix", beta_ref);
+    check_finite(function, "Intercept", alpha_ref);
+    check_finite(function, "Matrix of independent variables", x_ref);
+  }
+
+  auto ops_partials = make_partials_propagator(x_ref, alpha_ref, beta_ref);
+  if constexpr (need_delta) {
+    // δ[n,k] = y[n,k] - S_n·p[n,k]
+    const Array<T_partials_return, Dynamic, Dynamic> delta
+        = y_mat.template cast<T_partials_return>()
+          - softmax_mat.colwise()
+                * instance_totals.template cast<T_partials_return>();
+
+    if constexpr (is_autodiff_v<T_alpha>) {
+      if constexpr (T_alpha_rows == 1)
+        partials<1>(ops_partials) = delta.colwise().sum();
+      else
+        partials<1>(ops_partials) = delta;
+    }
+    if constexpr (is_autodiff_v<T_beta>) {
+      partials<2>(ops_partials)
+          = x_val.transpose().template cast<T_partials_return>()
+            * delta.matrix();
+    }
+    if constexpr (is_autodiff_v<T_x>) {
+      edge<0>(ops_partials).partials_ = delta.matrix() * beta_val.transpose();
+    }
+  }
+  return ops_partials.build(logp);
+}
+
+template <typename T_x, typename T_alpha, typename T_beta>
+inline return_type_t<T_x, T_alpha, T_beta> multinomial_logit_glm_lpmf(
+    const std::vector<std::vector<int>>& y, const T_x& x, const T_alpha& alpha,
+    const T_beta& beta) {
+  return multinomial_logit_glm_lpmf<false>(y, x, alpha, beta);
+}
+
+}  // namespace math
+}  // namespace stan
+#endif
diff --git a/test/unit/math/opencl/rev/multinomial_logit_glm_lpmf_test.cpp b/test/unit/math/opencl/rev/multinomial_logit_glm_lpmf_test.cpp
new file mode 100644
index 00000000000..ac2f9a005d3
--- /dev/null
+++ b/test/unit/math/opencl/rev/multinomial_logit_glm_lpmf_test.cpp
@@ -0,0 +1,291 @@
+#ifdef STAN_OPENCL
+#include <stan/math.hpp>
+#include <stan/math/opencl/prim.hpp>
+#include <gtest/gtest.h>
+#include <test/unit/math/opencl/util.hpp>
+#include <vector>
+
+using Eigen::Dynamic;
+using Eigen::Matrix;
+using stan::math::matrix_cl;
+using stan::math::var;
+using std::vector;
+
+TEST(ProbDistributionsMultinomialLogitGLM, error_checking) {
+  int N = 3, M = 2, K = 3;
+
+  vector<vector<int>> y{{1, 2, 0}, {0, 3, 1}, {2, 0, 2}};
+  vector<vector<int>> y_bad_n{{1, 2, 0}, {0, 3, 1}};
+  vector<vector<int>> y_bad_k{{1, 2}, {0, 3}, {2, 0}};
+  vector<vector<int>> y_neg{{-1, 2, 0}, {0, 3, 1}, {2, 0, 2}};
+
+  Matrix<double, Dynamic, Dynamic> x(N, M);
+  x << 1.0, -0.5, 0.3, 0.7, -0.2, 1.1;
+  Matrix<double, Dynamic, Dynamic> x_bad_n(N + 1, M);
+  Matrix<double, Dynamic, Dynamic> x_bad_m(N, M + 1);
+  Matrix<double, Dynamic, Dynamic> x_inf = x;
+  x_inf(0, 0) = stan::math::INFTY;
+
+  Matrix<double, Dynamic, Dynamic> beta(M, K);
+  beta << 0.3, -0.2, 0.1, -0.1, 0.4, -0.3;
+  Matrix<double, Dynamic, Dynamic> beta_bad_m(M + 1, K);
+  Matrix<double, Dynamic, Dynamic> beta_bad_k(M, K + 1);
+  Matrix<double, Dynamic, Dynamic> beta_inf = beta;
+  beta_inf(0, 0) = stan::math::INFTY;
+
+  Matrix<double, 1, Dynamic> alpha(1, K);
+  alpha << 0.1, -0.3, 0.2;
+  Matrix<double, 1, Dynamic> alpha_bad_k(1, K + 1);
+  Matrix<double, 1, Dynamic> alpha_inf = alpha;
+  alpha_inf(0) = stan::math::INFTY;
+
+  Matrix<double, Dynamic, Dynamic> alpha_mat(N, K);
+  alpha_mat << 0.1, -0.3, 0.2, -0.2, 0.1, 0.3, 0.0, 0.2, -0.1;
+  Matrix<double, Dynamic, Dynamic> alpha_mat_bad_n(N + 1, K);
+  Matrix<double, Dynamic, Dynamic> alpha_mat_bad_k(N, K + 1);
+
+  matrix_cl<double> x_cl(x), x_bad_n_cl(x_bad_n), x_bad_m_cl(x_bad_m),
+      x_inf_cl(x_inf);
+  matrix_cl<double> beta_cl(beta), beta_bad_m_cl(beta_bad_m),
+      beta_bad_k_cl(beta_bad_k), beta_inf_cl(beta_inf);
+  matrix_cl<double> alpha_cl(alpha), alpha_bad_k_cl(alpha_bad_k),
+      alpha_inf_cl(alpha_inf);
+  matrix_cl<double> alpha_mat_cl(alpha_mat),
+      alpha_mat_bad_n_cl(alpha_mat_bad_n), alpha_mat_bad_k_cl(alpha_mat_bad_k);
+
+  EXPECT_NO_THROW(
+      stan::math::multinomial_logit_glm_lpmf(y, x_cl, alpha_cl, beta_cl));
+  EXPECT_NO_THROW(
+      stan::math::multinomial_logit_glm_lpmf(y, x_cl, alpha_mat_cl, beta_cl));
+
+  EXPECT_THROW(
+      stan::math::multinomial_logit_glm_lpmf(y_bad_n, x_cl, alpha_cl, beta_cl),
+      std::invalid_argument);
+  EXPECT_THROW(
+      stan::math::multinomial_logit_glm_lpmf(y_bad_k, x_cl, alpha_cl, beta_cl),
+      std::invalid_argument);
+  EXPECT_THROW(
+      stan::math::multinomial_logit_glm_lpmf(y_neg, x_cl, alpha_cl, beta_cl),
+      std::domain_error);
+
+  EXPECT_THROW(
+      stan::math::multinomial_logit_glm_lpmf(y, x_bad_n_cl, alpha_cl, beta_cl),
+      std::invalid_argument);
+  EXPECT_THROW(
+      stan::math::multinomial_logit_glm_lpmf(y, x_bad_m_cl, alpha_cl, beta_cl),
+      std::invalid_argument);
+
+  EXPECT_THROW(
+      stan::math::multinomial_logit_glm_lpmf(y, x_cl, alpha_bad_k_cl, beta_cl),
+      std::invalid_argument);
+  EXPECT_THROW(stan::math::multinomial_logit_glm_lpmf(
+                   y, x_cl, alpha_mat_bad_n_cl, beta_cl),
+               std::invalid_argument);
+  EXPECT_THROW(stan::math::multinomial_logit_glm_lpmf(
+                   y, x_cl, alpha_mat_bad_k_cl, beta_cl),
+               std::invalid_argument);
+
+  EXPECT_THROW(
+      stan::math::multinomial_logit_glm_lpmf(y, x_cl, alpha_cl, beta_bad_m_cl),
+      std::invalid_argument);
+  EXPECT_THROW(
+      stan::math::multinomial_logit_glm_lpmf(y, x_cl, alpha_cl, beta_bad_k_cl),
+      std::invalid_argument);
+
+  EXPECT_THROW(
+      stan::math::multinomial_logit_glm_lpmf(y, x_inf_cl, alpha_cl, beta_cl),
+      std::domain_error);
+  EXPECT_THROW(
+      stan::math::multinomial_logit_glm_lpmf(y, x_cl, alpha_inf_cl, beta_cl),
+      std::domain_error);
+  EXPECT_THROW(
+      stan::math::multinomial_logit_glm_lpmf(y, x_cl, alpha_cl, beta_inf_cl),
+      std::domain_error);
+}
+
+TEST(ProbDistributionsMultinomialLogitGLM,
+     opencl_matches_cpu_small_broadcast_alpha) {
+  int N = 3, M = 2, K = 3;
+  vector<vector<int>> y{{1, 2, 0}, {0, 3, 1}, {2, 0, 2}};
+  Matrix<double, Dynamic, Dynamic> x(N, M);
+  x << 1.0, -0.5, 0.3, 0.7, -0.2, 1.1;
+  Matrix<double, 1, Dynamic> alpha(1, K);
+  alpha << 0.1, -0.3, 0.2;
+  Matrix<double, Dynamic, Dynamic> beta(M, K);
+  beta << 0.3, -0.2, 0.1, -0.1, 0.4, -0.3;
+
+  auto f = [&y](const auto& x_, const auto& a_, const auto& b_) {
+    return stan::math::multinomial_logit_glm_lpmf(y, x_, a_, b_);
+  };
+  auto f_propto = [&y](const auto& x_, const auto& a_, const auto& b_) {
+    return stan::math::multinomial_logit_glm_lpmf<true>(y, x_, a_, b_);
+  };
+  stan::math::test::compare_cpu_opencl_prim_rev(f, x, alpha, beta);
+  stan::math::test::compare_cpu_opencl_prim_rev(f_propto, x, alpha, beta);
+}
+
+TEST(ProbDistributionsMultinomialLogitGLM,
+     opencl_matches_cpu_small_matrix_alpha) {
+  int N = 3, M = 2, K = 3;
+  vector<vector<int>> y{{1, 2, 0}, {0, 3, 1}, {2, 0, 2}};
+  Matrix<double, Dynamic, Dynamic> x(N, M);
+  x << 1.0, -0.5, 0.3, 0.7, -0.2, 1.1;
+  Matrix<double, Dynamic, Dynamic> alpha(N, K);
+  alpha << 0.1, -0.3, 0.2, -0.2, 0.1, 0.3, 0.0, 0.2, -0.1;
+  Matrix<double, Dynamic, Dynamic> beta(M, K);
+  beta << 0.3, -0.2, 0.1, -0.1, 0.4, -0.3;
+
+  auto f = [&y](const auto& x_, const auto& a_, const auto& b_) {
+    return stan::math::multinomial_logit_glm_lpmf(y, x_, a_, b_);
+  };
+  auto f_propto = [&y](const auto& x_, const auto& a_, const auto& b_) {
+    return stan::math::multinomial_logit_glm_lpmf<true>(y, x_, a_, b_);
+  };
+  stan::math::test::compare_cpu_opencl_prim_rev(f, x, alpha, beta);
+  stan::math::test::compare_cpu_opencl_prim_rev(f_propto, x, alpha, beta);
+}
+
+TEST(ProbDistributionsMultinomialLogitGLM, opencl_matches_cpu_zero_instances) {
+  int M = 2, K = 3;
+  vector<vector<int>> y{};
+  Matrix<double, Dynamic, Dynamic> x(0, M);
+  Matrix<double, 1, Dynamic> alpha(1, K);
+  alpha << 0.1, -0.3, 0.2;
+  Matrix<double, Dynamic, Dynamic> beta(M, K);
+  beta << 0.3, -0.2, 0.1, -0.1, 0.4, -0.3;
+
+  auto f = [&y](const auto& x_, const auto& a_, const auto& b_) {
+    return stan::math::multinomial_logit_glm_lpmf(y, x_, a_, b_);
+  };
+  auto f_propto = [&y](const auto& x_, const auto& a_, const auto& b_) {
+    return stan::math::multinomial_logit_glm_lpmf<true>(y, x_, a_, b_);
+  };
+  stan::math::test::compare_cpu_opencl_prim_rev(f, x, alpha, beta);
+  stan::math::test::compare_cpu_opencl_prim_rev(f_propto, x, alpha, beta);
+}
+
+TEST(ProbDistributionsMultinomialLogitGLM, opencl_matches_cpu_zero_attributes) {
+  int N = 3, K = 3;
+  vector<vector<int>> y{{1, 2, 0}, {0, 3, 1}, {2, 0, 2}};
+  Matrix<double, Dynamic, Dynamic> x(N, 0);
+  Matrix<double, 1, Dynamic> alpha(1, K);
+  alpha << 0.1, -0.3, 0.2;
+  Matrix<double, Dynamic, Dynamic> beta(0, K);
+
+  auto f = [&y](const auto& x_, const auto& a_, const auto& b_) {
+    return stan::math::multinomial_logit_glm_lpmf(y, x_, a_, b_);
+  };
+  auto f_propto = [&y](const auto& x_, const auto& a_, const auto& b_) {
+    return stan::math::multinomial_logit_glm_lpmf<true>(y, x_, a_, b_);
+  };
+  stan::math::test::compare_cpu_opencl_prim_rev(f, x, alpha, beta);
+  stan::math::test::compare_cpu_opencl_prim_rev(f_propto, x, alpha, beta);
+}
+
+TEST(ProbDistributionsMultinomialLogitGLM, opencl_matches_cpu_single_instance) {
+  int N = 1, M = 3, K = 4;
+  vector<vector<int>> y{{2, 0, 3, 1}};
+  Matrix<double, Dynamic, Dynamic> x(N, M);
+  x << 0.5, -1.2, 0.8;
+  Matrix<double, 1, Dynamic> alpha(1, K);
+  alpha << 0.1, -0.3, 0.5, -0.2;
+  Matrix<double, Dynamic, Dynamic> beta(M, K);
+  beta << 0.2, -0.1, 0.4, 0.0, -0.3, 0.5, -0.2, 0.1, 0.1, 0.0, 0.3, -0.4;
+
+  auto f = [&y](const auto& x_, const auto& a_, const auto& b_) {
+    return stan::math::multinomial_logit_glm_lpmf(y, x_, a_, b_);
+  };
+  stan::math::test::compare_cpu_opencl_prim_rev(f, x, alpha, beta);
+}
+
+TEST(ProbDistributionsMultinomialLogitGLM, opencl_matches_cpu_all_zeros_y) {
+  int N = 2, M = 2, K = 3;
+  vector<vector<int>> y{{0, 0, 0}, {0, 0, 0}};
+  Matrix<double, Dynamic, Dynamic> x
+      = Matrix<double, Dynamic, Dynamic>::Random(N, M);
+  Matrix<double, 1, Dynamic> alpha = Matrix<double, 1, Dynamic>::Random(1, K);
+  Matrix<double, Dynamic, Dynamic> beta
+      = Matrix<double, Dynamic, Dynamic>::Random(M, K);
+
+  auto f = [&y](const auto& x_, const auto& a_, const auto& b_) {
+    return stan::math::multinomial_logit_glm_lpmf(y, x_, a_, b_);
+  };
+  stan::math::test::compare_cpu_opencl_prim_rev(f, x, alpha, beta);
+}
+
+TEST(ProbDistributionsMultinomialLogitGLM, opencl_matches_cpu_large) {
+  int N = 153, M = 17, K = 11;
+  vector<vector<int>> y(N, vector<int>(K));
+  for (int n = 0; n < N; ++n)
+    for (int k = 0; k < K; ++k)
+      y[n][k] = (n * K + k) % 5;
+
+  Matrix<double, Dynamic, Dynamic> x
+      = Matrix<double, Dynamic, Dynamic>::Random(N, M);
+  Matrix<double, 1, Dynamic> alpha = Matrix<double, 1, Dynamic>::Random(1, K);
+  Matrix<double, Dynamic, Dynamic> beta
+      = Matrix<double, Dynamic, Dynamic>::Random(M, K);
+
+  auto f = [&y](const auto& x_, const auto& a_, const auto& b_) {
+    return stan::math::multinomial_logit_glm_lpmf(y, x_, a_, b_);
+  };
+  auto f_propto = [&y](const auto& x_, const auto& a_, const auto& b_) {
+    return stan::math::multinomial_logit_glm_lpmf<true>(y, x_, a_, b_);
+  };
+  stan::math::test::compare_cpu_opencl_prim_rev(f, x, alpha, beta);
+  stan::math::test::compare_cpu_opencl_prim_rev(f_propto, x, alpha, beta);
+}
+
+TEST(ProbDistributionsMultinomialLogitGLM,
+     opencl_matches_cpu_large_matrix_alpha) {
+  int N = 153, M = 17, K = 11;
+  vector<vector<int>> y(N, vector<int>(K));
+  for (int n = 0; n < N; ++n)
+    for (int k = 0; k < K; ++k)
+      y[n][k] = (n * K + k) % 5;
+
+  Matrix<double, Dynamic, Dynamic> x
+      = Matrix<double, Dynamic, Dynamic>::Random(N, M);
+  Matrix<double, Dynamic, Dynamic> alpha
+      = Matrix<double, Dynamic, Dynamic>::Random(N, K);
+  Matrix<double, Dynamic, Dynamic> beta
+      = Matrix<double, Dynamic, Dynamic>::Random(M, K);
+
+  auto f = [&y](const auto& x_, const auto& a_, const auto& b_) {
+    return stan::math::multinomial_logit_glm_lpmf(y, x_, a_, b_);
+  };
+  auto f_propto = [&y](const auto& x_, const auto& a_, const auto& b_) {
+    return stan::math::multinomial_logit_glm_lpmf<true>(y, x_, a_, b_);
+  };
+  stan::math::test::compare_cpu_opencl_prim_rev(f, x, alpha, beta);
+  stan::math::test::compare_cpu_opencl_prim_rev(f_propto, x, alpha, beta);
+}
+
+TEST(ProbDistributionsMultinomialLogitGLM, opencl_neg_inf_alpha) {
+  // alpha[n,k]=-inf forces softmax probability to 0; y[n,k]=0 for those
+  // classes. Result must be finite and match the CPU prim result.
+  int N = 2, M = 2, K = 3;
+  vector<vector<int>> y{{2, 1, 0}, {0, 3, 2}};
+
+  Matrix<double, Dynamic, Dynamic> x(N, M);
+  x << 1.0, 0.5, 0.3, -0.7;
+
+  Matrix<double, Dynamic, Dynamic> beta(M, K);
+  beta << 0.3, -0.2, 0.1, -0.1, 0.4, -0.3;
+
+  Matrix<double, Dynamic, Dynamic> alpha(N, K);
+  alpha << 0.2, -0.1, -stan::math::INFTY, -stan::math::INFTY, 0.4, 0.1;
+
+  const double logp_cpu
+      = stan::math::multinomial_logit_glm_lpmf(y, x, alpha, beta);
+  ASSERT_TRUE(std::isfinite(logp_cpu));
+
+  matrix_cl<double> x_cl(x), alpha_cl(alpha), beta_cl(beta);
+  const double logp_cl
+      = stan::math::multinomial_logit_glm_lpmf(y, x_cl, alpha_cl, beta_cl);
+
+  EXPECT_TRUE(std::isfinite(logp_cl));
+  EXPECT_FLOAT_EQ(logp_cpu, logp_cl);
+}
+
+#endif
diff --git a/test/unit/math/prim/prob/multinomial_logit_glm_lpmf_test.cpp b/test/unit/math/prim/prob/multinomial_logit_glm_lpmf_test.cpp
new file mode 100644
index 00000000000..26d82626780
--- /dev/null
+++ b/test/unit/math/prim/prob/multinomial_logit_glm_lpmf_test.cpp
@@ -0,0 +1,260 @@
+#include <stan/math/prim.hpp>
+#include <gtest/gtest.h>
+#include <cmath>
+#include <vector>
+
+// -----------------------------------------------------------------------
+// Equivalence with multinomial_logit_lpmf (single instance, N=1)
+// -----------------------------------------------------------------------
+TEST(ProbMultinomialLogitGLM, matchesMultinomialLogit) {
+  // N=1, K=4 classes, M=3 features
+  Eigen::MatrixXd x(1, 3);
+  x << 0.5, -1.2, 0.8;
+  Eigen::RowVectorXd alpha(4);
+  alpha << 0.1, -0.3, 0.5, -0.2;
+  Eigen::MatrixXd beta(3, 4);
+  beta << 0.2, -0.1, 0.4, 0.0, -0.3, 0.5, -0.2, 0.1, 0.1, 0.0, 0.3, -0.4;
+
+  // eta = alpha^T + beta^T x^T  (K-vector for multinomial_logit_lpmf)
+  Eigen::VectorXd eta = alpha.transpose() + beta.transpose() * x.transpose();
+
+  std::vector<int> ns{3, 1, 0, 2};
+  std::vector<std::vector<int>> y{ns};
+
+  EXPECT_FLOAT_EQ(stan::math::multinomial_logit_lpmf(ns, eta),
+                  stan::math::multinomial_logit_glm_lpmf(y, x, alpha, beta));
+}
+
+// -----------------------------------------------------------------------
+// Equivalence with binomial_logit_lpmf (K=2 special case)
+//
+// With K=2, alpha=[0,0], and beta_mult = [beta_bin | 0_col] (M x 2),
+// the linear predictor per instance is lin_n = [x_n·beta_bin, 0].
+// The softmax of [a, 0] is [sigmoid(a), sigmoid(-a)], so the
+// multinomial log-PMF reduces to binomial_logit_lpmf(y0 | y0+y1, x·beta_bin).
+// -----------------------------------------------------------------------
+TEST(ProbMultinomialLogitGLM, matchesBinomialLogitLpmf) {
+  // N=2 instances, M=2 features, K=2 classes
+  Eigen::MatrixXd x(2, 2);
+  x << 1.0, -0.5, 0.3, 0.7;
+
+  Eigen::VectorXd beta_bin(2);
+  beta_bin << 0.4, -0.6;
+
+  // Multinomial parameters: alpha=[0,0], beta_mult = [[beta_bin], [0]]
+  const Eigen::RowVectorXd alpha_mult = Eigen::RowVectorXd::Zero(2);
+  Eigen::MatrixXd beta_mult(2, 2);
+  beta_mult << beta_bin(0), 0.0, beta_bin(1), 0.0;
+
+  const std::vector<int> n_success{3, 2};
+  const std::vector<int> n_trials{5, 4};
+
+  std::vector<std::vector<int>> y(2);
+  for (int i = 0; i < 2; ++i) {
+    y[i] = {n_success[i], n_trials[i] - n_success[i]};
+  }
+
+  // Binomial logit uses eta = x * beta_bin (no intercept, matches alpha=[0,0])
+  const Eigen::VectorXd eta = x * beta_bin;
+  EXPECT_FLOAT_EQ(
+      stan::math::binomial_logit_lpmf(n_success, n_trials, eta),
+      stan::math::multinomial_logit_glm_lpmf(y, x, alpha_mult, beta_mult));
+}
+
+// -----------------------------------------------------------------------
+// K=3, N=3 instances, M=2 features.
+// GLM result must equal the sum of per-instance multinomial_logit_lpmf calls.
+// -----------------------------------------------------------------------
+TEST(ProbMultinomialLogitGLM, K3_N3_manual) {
+  Eigen::MatrixXd x(3, 2);
+  x << 1.0, 0.5, -0.5, 1.0, 0.0, -1.0;
+
+  Eigen::RowVectorXd alpha(3);
+  alpha << 0.2, -0.1, 0.5;
+
+  Eigen::MatrixXd beta(2, 3);
+  beta << 0.3, -0.2, 0.1, -0.1, 0.4, -0.3;
+
+  std::vector<std::vector<int>> y{{2, 1, 3}, {0, 4, 1}, {3, 0, 2}};
+
+  double expected = 0;
+  for (int n = 0; n < 3; ++n) {
+    Eigen::VectorXd eta
+        = alpha.transpose() + beta.transpose() * x.row(n).transpose();
+    expected += stan::math::multinomial_logit_lpmf(y[n], eta);
+  }
+
+  EXPECT_FLOAT_EQ(expected,
+                  stan::math::multinomial_logit_glm_lpmf(y, x, alpha, beta));
+}
+
+// -----------------------------------------------------------------------
+// Matrix alpha (N x K): per-instance intercepts, full design matrix.
+// Result must equal the sum of per-instance multinomial_logit_lpmf calls
+// each using its own alpha row.
+// -----------------------------------------------------------------------
+TEST(ProbMultinomialLogitGLM, matrixAlpha_fullX) {
+  const int N = 4, K = 3, M = 2;
+  Eigen::MatrixXd x(N, M);
+  x << 1.0, 0.5, -0.5, 1.0, 0.0, -1.0, 0.7, 0.3;
+
+  Eigen::MatrixXd alpha(N, K);
+  alpha << 0.2, -0.1, 0.5, -0.3, 0.4, 0.1, 0.1, 0.0, -0.2, 0.5, -0.3, 0.2;
+
+  Eigen::MatrixXd beta(M, K);
+  beta << 0.3, -0.2, 0.1, -0.1, 0.4, -0.3;
+
+  std::vector<std::vector<int>> y{{2, 1, 3}, {0, 4, 1}, {3, 0, 2}, {1, 2, 1}};
+
+  double expected = 0;
+  for (int n = 0; n < N; ++n) {
+    Eigen::VectorXd eta
+        = alpha.row(n).transpose() + beta.transpose() * x.row(n).transpose();
+    expected += stan::math::multinomial_logit_lpmf(y[n], eta);
+  }
+
+  EXPECT_FLOAT_EQ(expected,
+                  stan::math::multinomial_logit_glm_lpmf(y, x, alpha, beta));
+}
+
+// -----------------------------------------------------------------------
+// propto=true: with non-autodiff parameters the entire log PMF is constant
+// w.r.t. parameters, so Stan math's early-return yields 0.
+// propto=false: full log PMF must be finite and negative.
+// -----------------------------------------------------------------------
+TEST(ProbMultinomialLogitGLM, propto) {
+  Eigen::MatrixXd x(2, 2);
+  x << 1.0, 0.0, 0.0, 1.0;
+  Eigen::RowVectorXd alpha(3);
+  alpha << 0.1, 0.2, 0.3;
+  Eigen::MatrixXd beta(2, 3);
+  beta << 0.1, 0.2, 0.3, 0.4, 0.5, 0.6;
+
+  std::vector<std::vector<int>> y{{1, 2, 3}, {2, 1, 0}};
+
+  const double logp_full
+      = stan::math::multinomial_logit_glm_lpmf<false>(y, x, alpha, beta);
+  EXPECT_TRUE(std::isfinite(logp_full));
+  EXPECT_LT(logp_full, 0.0);
+
+  // With all-double parameters and propto=true, Stan math elides all terms
+  // (the full log PMF is a constant) and returns 0.
+  EXPECT_EQ(stan::math::multinomial_logit_glm_lpmf<true>(y, x, alpha, beta),
+            0.0);
+}
+
+// -----------------------------------------------------------------------
+// alpha[n,k] = -inf forces softmax probability to 0 for class k on instance n.
+// When y[n,k] = 0 the 0*log(0) term must evaluate to 0 (not NaN).
+// -----------------------------------------------------------------------
+TEST(ProbMultinomialLogitGLM, negInfAlpha) {
+  // N=2, K=3, M=2. Some alpha entries are -inf, forcing p=0 for those classes.
+  const int N = 2, K = 3, M = 2;
+
+  Eigen::MatrixXd x(N, M);
+  x << 1.0, 0.5, 0.3, -0.7;
+
+  Eigen::MatrixXd beta(M, K);
+  beta << 0.3, -0.2, 0.1, -0.1, 0.4, -0.3;
+
+  // Per-instance (N x K) alpha with -inf entries; y is 0 for those classes.
+  Eigen::MatrixXd alpha(N, K);
+  alpha << 0.2, -0.1, -stan::math::INFTY, -stan::math::INFTY, 0.4, 0.1;
+
+  std::vector<std::vector<int>> y{{2, 1, 0}, {0, 3, 2}};
+
+  const double logp = stan::math::multinomial_logit_glm_lpmf(y, x, alpha, beta);
+  EXPECT_TRUE(std::isfinite(logp));
+  EXPECT_LT(logp, 0.0);
+
+  // Reference: -inf drops the class from the softmax, equivalent to
+  // multinomial_logit_lpmf over the remaining classes.
+  // lgamma(0 + 1) = 0 so the normalizing constant is unaffected.
+  double expected = 0;
+  for (int n = 0; n < N; ++n) {
+    const Eigen::VectorXd eta
+        = beta.transpose() * x.row(n).transpose() + alpha.row(n).transpose();
+    std::vector<int> y_in;
+    std::vector<double> eta_in;
+    for (int k = 0; k < K; ++k) {
+      if (std::isfinite(eta(k))) {
+        y_in.push_back(y[n][k]);
+        eta_in.push_back(eta(k));
+      }
+    }
+    expected += stan::math::multinomial_logit_lpmf(
+        y_in, Eigen::Map<const Eigen::VectorXd>(eta_in.data(), eta_in.size()));
+  }
+
+  EXPECT_FLOAT_EQ(expected, logp);
+}
+
+// -----------------------------------------------------------------------
+// Error handling
+// -----------------------------------------------------------------------
+TEST(ProbMultinomialLogitGLM, throwsCorrectly) {
+  Eigen::MatrixXd x(2, 2);
+  x << 1.0, 0.5, -0.5, 1.0;
+  Eigen::RowVectorXd alpha(3);
+  alpha << 0.1, 0.2, 0.3;
+  Eigen::MatrixXd beta(2, 3);
+  beta << 0.1, 0.2, 0.3, 0.4, 0.5, 0.6;
+
+  std::vector<std::vector<int>> y{{1, 2, 3}, {2, 1, 0}};
+
+  // Mismatched number of instances (y has 1 row, x has 2)
+  EXPECT_THROW(
+      stan::math::multinomial_logit_glm_lpmf({{1, 2, 3}}, x, alpha, beta),
+      std::invalid_argument);
+
+  // Mismatched number of classes in one outcome vector
+  EXPECT_THROW(stan::math::multinomial_logit_glm_lpmf({{1, 2}, {2, 1, 0}}, x,
+                                                      alpha, beta),
+               std::invalid_argument);
+
+  // Negative counts
+  EXPECT_THROW(stan::math::multinomial_logit_glm_lpmf({{-1, 2, 3}, {2, 1, 0}},
+                                                      x, alpha, beta),
+               std::domain_error);
+
+  // Infinite alpha
+  Eigen::RowVectorXd alpha_inf = alpha;
+  alpha_inf[0] = stan::math::INFTY;
+  EXPECT_THROW(stan::math::multinomial_logit_glm_lpmf(y, x, alpha_inf, beta),
+               std::domain_error);
+
+  // Infinite beta
+  Eigen::MatrixXd beta_inf = beta;
+  beta_inf(0, 0) = stan::math::INFTY;
+  EXPECT_THROW(stan::math::multinomial_logit_glm_lpmf(y, x, alpha, beta_inf),
+               std::domain_error);
+
+  // Infinite x
+  Eigen::MatrixXd x_inf = x;
+  x_inf(0, 0) = stan::math::INFTY;
+  EXPECT_THROW(stan::math::multinomial_logit_glm_lpmf(y, x_inf, alpha, beta),
+               std::domain_error);
+
+  // alpha cols mismatch with K (beta has 3 cols but alpha has 2 cols)
+  Eigen::RowVectorXd alpha_bad(2);
+  alpha_bad << 0.1, 0.2;
+  EXPECT_THROW(stan::math::multinomial_logit_glm_lpmf(y, x, alpha_bad, beta),
+               std::invalid_argument);
+
+  // Matrix alpha with wrong number of rows
+  Eigen::MatrixXd alpha_mat_bad_rows(3, 3);
+  EXPECT_THROW(
+      stan::math::multinomial_logit_glm_lpmf(y, x, alpha_mat_bad_rows, beta),
+      std::invalid_argument);
+
+  // Matrix alpha with wrong number of columns
+  Eigen::MatrixXd alpha_mat_bad_cols(2, 2);
+  EXPECT_THROW(
+      stan::math::multinomial_logit_glm_lpmf(y, x, alpha_mat_bad_cols, beta),
+      std::invalid_argument);
+
+  // beta rows mismatch with x cols (x has 2 cols, beta has 3 rows)
+  EXPECT_THROW(stan::math::multinomial_logit_glm_lpmf(y, x, alpha,
+                                                      Eigen::MatrixXd(3, 3)),
+               std::invalid_argument);
+}
diff --git a/test/unit/math/rev/prob/multinomial_logit_glm_lpmf_test.cpp b/test/unit/math/rev/prob/multinomial_logit_glm_lpmf_test.cpp
new file mode 100644
index 00000000000..e8f75d29d1f
--- /dev/null
+++ b/test/unit/math/rev/prob/multinomial_logit_glm_lpmf_test.cpp
@@ -0,0 +1,216 @@
+#include <stan/math/rev.hpp>
+#include <test/unit/math/rev/util.hpp>
+#include <gtest/gtest.h>
+#include <vector>
+
+// Reference: sum multinomial_logit_lpmf over all N instances
+template <bool propto, typename T_x, typename T_alpha, typename T_beta>
+inline stan::return_type_t<T_x, T_alpha, T_beta>
+multinomial_logit_glm_simple_lpmf(const std::vector<std::vector<int>>& y,
+                                  const T_x& x, const T_alpha& alpha,
+                                  const T_beta& beta) {
+  using T_return = stan::return_type_t<T_x, T_alpha, T_beta>;
+  const int N = x.rows();
+  // alpha is 1xK; as_column_vector_or_scalar+transpose gives a row-vector
+  // expression whose decay type lets rep_matrix deduce its Ret parameter.
+  const auto& alpha_row
+      = stan::math::as_column_vector_or_scalar(alpha).transpose();
+  auto lin = stan::math::to_ref(
+      stan::math::multiply(x, beta)
+      + stan::math::rep_matrix<std::decay_t<decltype(alpha_row)>>(alpha_row,
+                                                                  N));
+  T_return lpmf = 0;
+  for (int n = 0; n < N; ++n)
+    lpmf += stan::math::multinomial_logit_lpmf<propto>(
+        y[n], lin.row(n).transpose().eval());
+  return lpmf;
+}
+
+TEST_F(AgradRev, MultinomialLogitGLM_matches_simple_doubles) {
+  using stan::math::multinomial_logit_glm_lpmf;
+  const size_t N = 5, M = 2, K = 3;
+  std::vector<std::vector<int>> y{
+      {2, 1, 3}, {0, 4, 1}, {3, 0, 2}, {1, 2, 0}, {0, 1, 4}};
+  Eigen::MatrixXd x(N, M);
+  x << -1.2, 0.5, 0.3, -0.7, 1.0, 0.2, -0.4, 0.8, 0.6, -1.1;
+  Eigen::MatrixXd beta(M, K);
+  beta << 0.3, -0.2, 0.1, -0.1, 0.4, -0.3;
+  Eigen::RowVectorXd alpha(K);
+  alpha << 0.2, -0.1, 0.5;
+  const double eps = 1e-13;
+
+  EXPECT_NEAR(multinomial_logit_glm_simple_lpmf<false>(y, x, alpha, beta),
+              multinomial_logit_glm_lpmf(y, x, alpha, beta), eps);
+  EXPECT_NEAR(multinomial_logit_glm_simple_lpmf<true>(y, x, alpha, beta),
+              multinomial_logit_glm_lpmf<true>(y, x, alpha, beta), eps);
+}
+
+template <class T>
+class ProbDistributionsMultinomialLogitGLM
+    : public stan::math::test::VarMatrixTypedTests<T> {};
+
+TYPED_TEST_SUITE(ProbDistributionsMultinomialLogitGLM,
+                 stan::math::test::VarMatImpls);
+
+TYPED_TEST(ProbDistributionsMultinomialLogitGLM, glm_matches_simple_vars) {
+  using stan::math::multinomial_logit_glm_lpmf;
+  using stan::math::var;
+  using matrix_v = typename TypeParam::matrix_v;
+  using row_vector_v = typename TypeParam::row_vector_v;
+  const size_t N = 5, M = 2, K = 3;
+  const double eps = 1e-13;
+  std::vector<std::vector<int>> y{
+      {2, 1, 3}, {0, 4, 1}, {3, 0, 2}, {1, 2, 0}, {0, 1, 4}};
+  Eigen::MatrixXd x_val(N, M);
+  x_val << -1.2, 0.5, 0.3, -0.7, 1.0, 0.2, -0.4, 0.8, 0.6, -1.1;
+  Eigen::MatrixXd beta_val(M, K);
+  beta_val << 0.3, -0.2, 0.1, -0.1, 0.4, -0.3;
+  Eigen::RowVectorXd alpha_val(K);
+  alpha_val << 0.2, -0.1, 0.5;
+
+  // Reference always uses Matrix<var> so multinomial_logit_lpmf can extract
+  // rows.
+  Eigen::Matrix<var, Eigen::Dynamic, Eigen::Dynamic> x1 = x_val,
+                                                     beta1 = beta_val;
+  Eigen::Matrix<var, 1, Eigen::Dynamic> alpha1 = alpha_val;
+  matrix_v x2 = x_val;
+  matrix_v beta2 = beta_val;
+  row_vector_v alpha2 = alpha_val;
+
+  var res1 = multinomial_logit_glm_simple_lpmf<false>(y, x1, alpha1, beta1);
+  var res2 = multinomial_logit_glm_lpmf(y, x2, alpha2, beta2);
+  (res1 + res2).grad();
+  EXPECT_NEAR(res1.val(), res2.val(), eps);
+  for (size_t i = 0; i < M; ++i) {
+    for (size_t j = 0; j < N; ++j)
+      EXPECT_NEAR(x1.adj()(j, i), x2.adj()(j, i), eps);
+    for (size_t j = 0; j < K; ++j)
+      EXPECT_NEAR(beta1.adj()(i, j), beta2.adj()(i, j), eps);
+  }
+  for (size_t i = 0; i < K; ++i)
+    EXPECT_NEAR(alpha1.adj()[i], alpha2.adj()[i], eps);
+
+  stan::math::set_zero_all_adjoints();
+
+  res1 = multinomial_logit_glm_simple_lpmf<true>(y, x1, alpha1, beta1);
+  res2 = multinomial_logit_glm_lpmf<true>(y, x2, alpha2, beta2);
+  (res1 + res2).grad();
+  EXPECT_NEAR(res1.val(), res2.val(), eps);
+  for (size_t i = 0; i < M; ++i) {
+    for (size_t j = 0; j < N; ++j)
+      EXPECT_NEAR(x1.adj()(j, i), x2.adj()(j, i), eps);
+    for (size_t j = 0; j < K; ++j)
+      EXPECT_NEAR(beta1.adj()(i, j), beta2.adj()(i, j), eps);
+  }
+  for (size_t i = 0; i < K; ++i)
+    EXPECT_NEAR(alpha1.adj()[i], alpha2.adj()[i], eps);
+}
+
+TYPED_TEST(ProbDistributionsMultinomialLogitGLM, glm_matches_simple_big) {
+  using stan::math::multinomial_logit_glm_lpmf;
+  using stan::math::var;
+  using matrix_v = typename TypeParam::matrix_v;
+  using row_vector_v = typename TypeParam::row_vector_v;
+  const size_t N = 37, M = 11, K = 7;
+  const double eps = 1e-10;
+
+  std::vector<std::vector<int>> y(N, std::vector<int>(K));
+  for (size_t n = 0; n < N; ++n)
+    for (size_t k = 0; k < K; ++k)
+      y[n][k] = (n + k) % 5;
+
+  Eigen::MatrixXd x_val = Eigen::MatrixXd::Random(N, M);
+  Eigen::MatrixXd beta_val = Eigen::MatrixXd::Random(M, K);
+  Eigen::RowVectorXd alpha_val = Eigen::RowVectorXd::Random(K);
+
+  Eigen::Matrix<var, Eigen::Dynamic, Eigen::Dynamic> x1 = x_val,
+                                                     beta1 = beta_val;
+  Eigen::Matrix<var, 1, Eigen::Dynamic> alpha1 = alpha_val;
+  matrix_v x2 = x_val;
+  matrix_v beta2 = beta_val;
+  row_vector_v alpha2 = alpha_val;
+
+  var res1 = multinomial_logit_glm_simple_lpmf<false>(y, x1, alpha1, beta1);
+  var res2 = multinomial_logit_glm_lpmf(y, x2, alpha2, beta2);
+  (res1 + res2).grad();
+  EXPECT_NEAR(res1.val(), res2.val(), eps);
+  for (size_t i = 0; i < M; ++i) {
+    for (size_t j = 0; j < N; ++j)
+      EXPECT_NEAR(x1.adj()(j, i), x2.adj()(j, i), eps);
+    for (size_t j = 0; j < K; ++j)
+      EXPECT_NEAR(beta1.adj()(i, j), beta2.adj()(i, j), eps);
+  }
+  for (size_t i = 0; i < K; ++i)
+    EXPECT_NEAR(alpha1.adj()[i], alpha2.adj()[i], eps);
+}
+
+TYPED_TEST(ProbDistributionsMultinomialLogitGLM, matrix_alpha_grads) {
+  using stan::math::multinomial_logit_glm_lpmf;
+  using stan::math::var;
+  using matrix_v = typename TypeParam::matrix_v;
+  const size_t N = 4, M = 2, K = 3;
+  const double eps = 1e-13;
+  std::vector<std::vector<int>> y{{2, 1, 3}, {0, 4, 1}, {3, 0, 2}, {1, 2, 0}};
+  Eigen::MatrixXd x_val(N, M);
+  x_val << -1.2, 0.5, 0.3, -0.7, 1.0, 0.2, -0.4, 0.8;
+  Eigen::MatrixXd alpha_val(N, K);
+  alpha_val << 0.2, -0.1, 0.5, -0.3, 0.4, 0.1, 0.1, 0.0, -0.2, 0.5, -0.3, 0.2;
+  Eigen::MatrixXd beta_val(M, K);
+  beta_val << 0.3, -0.2, 0.1, -0.1, 0.4, -0.3;
+  // full x + matrix alpha: exercises the T_alpha_rows != 1 gradient path
+  // Reference uses Matrix<var> so multinomial_logit_lpmf can extract Eigen
+  // rows.
+  Eigen::Matrix<var, Eigen::Dynamic, Eigen::Dynamic> x1 = x_val,
+                                                     alpha1 = alpha_val,
+                                                     beta1 = beta_val;
+  matrix_v x2 = x_val;
+  matrix_v alpha2 = alpha_val;
+  matrix_v beta2 = beta_val;
+
+  var res1 = 0.0;
+  {
+    auto lin = stan::math::to_ref(stan::math::multiply(x1, beta1) + alpha1);
+    for (size_t n = 0; n < N; ++n)
+      res1 += stan::math::multinomial_logit_lpmf(y[n],
+                                                 lin.row(n).transpose().eval());
+  }
+  var res2 = multinomial_logit_glm_lpmf(y, x2, alpha2, beta2);
+  (res1 + res2).grad();
+  EXPECT_NEAR(res1.val(), res2.val(), eps);
+  for (size_t i = 0; i < M; ++i) {
+    for (size_t j = 0; j < N; ++j)
+      EXPECT_NEAR(x1.adj()(j, i), x2.adj()(j, i), eps);
+    for (size_t j = 0; j < K; ++j)
+      EXPECT_NEAR(beta1.adj()(i, j), beta2.adj()(i, j), eps);
+  }
+  for (size_t n = 0; n < N; ++n)
+    for (size_t k = 0; k < K; ++k)
+      EXPECT_NEAR(alpha1.adj()(n, k), alpha2.adj()(n, k), eps);
+}
+
+TYPED_TEST(ProbDistributionsMultinomialLogitGLM, interfaces) {
+  using stan::math::multinomial_logit_glm_lpmf;
+  using matrix_v = typename TypeParam::matrix_v;
+  using row_vector_v = typename TypeParam::row_vector_v;
+  const size_t N = 3, M = 2, K = 3;
+  std::vector<std::vector<int>> y{{1, 2, 0}, {0, 1, 3}, {2, 0, 1}};
+  Eigen::MatrixXd x_d(N, M);
+  x_d << 1.0, -0.5, 0.3, 0.7, -0.2, 0.4;
+  Eigen::MatrixXd beta_d(M, K);
+  beta_d << 0.1, 0.2, 0.3, 0.4, 0.5, 0.6;
+  Eigen::RowVectorXd alpha_d(K);
+  alpha_d << 0.1, 0.2, 0.3;
+
+  matrix_v x_v = x_d;
+  matrix_v beta_v = beta_d;
+  row_vector_v alpha_v = alpha_d;
+
+  EXPECT_NO_THROW(multinomial_logit_glm_lpmf(y, x_d, alpha_d, beta_d));
+  EXPECT_NO_THROW(multinomial_logit_glm_lpmf(y, x_v, alpha_d, beta_d));
+  EXPECT_NO_THROW(multinomial_logit_glm_lpmf(y, x_d, alpha_v, beta_d));
+  EXPECT_NO_THROW(multinomial_logit_glm_lpmf(y, x_d, alpha_d, beta_v));
+  EXPECT_NO_THROW(multinomial_logit_glm_lpmf(y, x_v, alpha_v, beta_d));
+  EXPECT_NO_THROW(multinomial_logit_glm_lpmf(y, x_v, alpha_d, beta_v));
+  EXPECT_NO_THROW(multinomial_logit_glm_lpmf(y, x_d, alpha_v, beta_v));
+  EXPECT_NO_THROW(multinomial_logit_glm_lpmf(y, x_v, alpha_v, beta_v));
+}