In NONMEM, three level approximations are used, and the first level approximation is Laplacian approximation. If that helps, the second level approximation is First-order conditional method (FOCE) and the third approximation level is First-order method (FO).
Laplacian approximation
Consider a function \(f(x)>0\) to be integrated, and assume the function is twice differentiable. Define \(g(x):=\log(f(x))\), and \(f(x)\) can be expressible as:
\begin{align}
f(x) = \exp \left( \log (f(x)) \right)= \exp g(x) \nonumber
\end{align}
g(x) can be approximated by a second-order Taylor approximation around a point \(x_0\) as follows:
\begin{align}
g(x) \approx g(x_0) + (x-x_0)g'(x_0) + \frac{(x-x_0)^2}{2!}g''(x_0) \nonumber
\end{align}
Then the integration \(\int f(x) dx\) can be approximated as follows:
\begin{align}
\int f(x) dx &= \int \exp(g(x)) dx \nonumber \\
&\approx \int \exp\left( g(x_0) + (x-x_0)g'(x_0) + \frac{(x-x_0)^2}{2!}g''(x_0) \right) dx \nonumber \\
&=f(x_0)\int \exp\left( (x-x_0)g'(x_0) + \frac{(x-x_0)^2}{2!}g''(x_0) \right) dx \nonumber \\
&=f(x_0)\cdot\sqrt{\frac{2\pi}{-g''(x_0)}}\cdot\exp\left( \frac{-g'(x_0)^2}{2g''(x_0)} \right), \nonumber
\end{align}
and if \(x\) is a vector of length \(p\),
\begin{align}
\int f(x) dx \approx f(x_0)\cdot\sqrt{\frac{(2\pi)^p}{|-g''(x_0)|}}\cdot\exp\left( -\frac{g'(x_0)^T g''(x_0)^{-1} g'(x_0)}{2} \right). \nonumber
\end{align}
Nonlinear random effect model
Let \(\theta\) be \(p\) length vector of mean parameters, \(\theta_i\) be \(p\) length vector of individual parameters for \(i\)th subject, say follows \(\theta_i \sim {\cal N}_p(\theta,\Omega)\) , and \(y_i\) be the \(j\) length vector of response for \(i\)th subject which follows \(y_i \sim {\cal N}_j\left(\hat{y}(t_i;\theta_i),\sigma^2 I_j \right)\), where \(\sigma^2\) be residual variance.
Consider the following marginal likelihood to be maximized:
\begin{align}
p(y_i \mid \theta,\Omega,\sigma) &= \int p(y_i, \theta_i \mid \theta,\Omega,\sigma) d \theta_i \nonumber \\
&= \int p(y_i \mid \theta_i , \theta,\Omega,\sigma) p(\theta_i \mid \theta,\Omega,\sigma) d \theta_i \nonumber \\
&= \int p(y_i \mid \theta_i , \sigma) p(\theta_i \mid \theta,\Omega) d \theta_i. \nonumber
\end{align}
Define \(f(\theta_i):=p(y_i \mid \theta_i , \sigma) p(\theta_i \mid \theta,\Omega) \Longleftrightarrow g(\theta_i)=\log p(y_i \mid \theta_i , \sigma) + \log p(\theta_i \mid \theta,\Omega) \), and its first order derivative can be obtained as:
\begin{align}
\frac{\partial g(\theta_i)}{\partial \theta_i}=\frac{p'(y_i \mid \theta_i , \sigma)}{p(y_i \mid \theta_i , \sigma)} + \frac{p'(\theta_i \mid \theta,\Omega)}{p(\theta_i \mid \theta,\Omega)}.\nonumber
\end{align}
Since \(\theta_i \sim {\cal N}_p(\theta,\Omega)\),
\begin{align}
p(\theta_i \mid \theta,\Omega) = \frac{1}{(2 \pi)^\frac{p}{2}\sqrt{|\Omega|}} \exp \left( -\frac{(\theta_i-\theta)\Omega^{-1}(\theta_i-\theta)}{2} \right),\nonumber
\end{align}
and its first derivative w.r.t. \(\theta_i\) is
\begin{align}
\frac{\partial p(\theta_i \mid \theta,\Omega)}{\partial \theta_i} = \frac{1}{(2 \pi)^\frac{p}{2}\sqrt{|\Omega|}} \exp \left( -\frac{(\theta_i-\theta)\Omega^{-1}(\theta_i-\theta)}{2} \right) \cdot \Bigl( -\Omega^{-1}(\theta_i-\theta)\Bigr),\nonumber
\end{align}
then
\begin{align}
\frac{p'(\theta_i \mid \theta,\Omega)}{p(\theta_i \mid \theta,\Omega)}= -\Omega^{-1}(\theta_i-\theta) \nonumber
\end{align}
Similarly, since \(y_i \sim {\cal N}_j\left(\hat{y}(t_i;\theta_i),\sigma^2 I_j \right)\),
\begin{align}
p(y_i \mid \theta_i , \sigma) = \frac{1}{(2 \pi)^\frac{j}{2}\sqrt{|\sigma^2 I_j|}} \exp \left(-\frac{\Bigl(y_i-\hat{y}_i(\theta_i)\Bigr)^2}{2 \sigma^2} \right),\nonumber
\end{align}
and its first derivative w.r.t. \(\theta_i\) is
\begin{align}
\frac{\partial p(y_i \mid \theta_i , \sigma)}{\partial \theta_i} = p(y_i \mid \theta_i , \sigma) \cdot \left( \frac{y_i-\hat{y}_i(\theta_i)}{\sigma^2} \hat{y}'(\theta_i) \right),\nonumber
\end{align}
then,
\begin{align}
\frac{p'(y_i \mid \theta_i , \sigma)}{p(y_i \mid \theta_i , \sigma)} = \frac{y_i-\hat{y}_i(\theta_i)}{\sigma^2} \hat{y}'(\theta_i). \nonumber
\end{align}
To sum up,
\begin{align}
\frac{\partial g(\theta_i)}{\partial \theta_i}=\frac{y_i-\hat{y}_i(\theta_i)}{\sigma^2} \hat{y}'(\theta_i)-\Omega^{-1}(\theta_i-\theta).\nonumber
\end{align}
and
\begin{align}
\frac{\partial^2 g(\theta_i)}{\partial \theta_i^2}=\frac{y_i-\hat{y}_i(\theta_i)}{\sigma^2} \hat{y}''(\theta_i)-\frac{\hat{y}'(\theta_i)^T\hat{y}'(\theta_i)}{\sigma^2}-\Omega^{-1}.\nonumber
\end{align}
Consider the "log"-likelihood to be maximized,
\begin{align}
\log p(y_i \mid \theta,\Omega,\sigma) &= \log \left(\int p(y_i \mid \theta_i , \sigma) p(\theta_i \mid \theta,\Omega) d \theta_i \right) \nonumber \\
&\approx \log \left(
f(\theta_{0i}) \cdot\sqrt{\frac{(2\pi)^p}{|-g''(\theta_{0i})|}}\cdot\exp\left( -\frac{g'(\theta_{0i})^T g''(\theta_{0i})^{-1} g'(\theta_{0i})}{2} \right)
\right) \nonumber \\
&=\log \left(
f(\theta_{0i}) \cdot\sqrt{\frac{(2\pi)^p}{|-g''(\theta_{0i})|}}
\right)
-\frac{g'(\theta_{0i})^T g''(\theta_{0i})^{-1} g'(\theta_{0i})}{2} \nonumber
\end{align}
Reference
Pinheiro, José C., and Douglas M. Bates. "Approximations to the log-likelihood function in the nonlinear mixed-effects model." Journal of computational and Graphical Statistics 4, no. 1 (1995): 12-35.
Wang, Yaning. "Derivation of various NONMEM estimation methods." Journal of Pharmacokinetics and pharmacodynamics 34, no. 5 (2007): 575-593.
Comments
Post a Comment