Introduction to CDsampling

library(CDsampling)

/private/var/folders/vf/_6q3gfkd14v15hhpcwlkrnc00000gn/T/RtmpHJ4lD3/Rbuild14a0c1d107945/CDsampling/vignettes/Intro_to_CDsampling.R

set.seed(123)

/private/var/folders/vf/_6q3gfkd14v15hhpcwlkrnc00000gn/T/RtmpHJ4lD3/Rbuild14a0c1d107945/CDsampling/vignettes/Intro_to_CDsampling.R In the context of paid research studies and clinical trials, budget considerations and patient sampling from available populations are subject to inherent constraints. CDsampling integrates optimal design theories within the framework of constrained sampling.

Reference: Constrained D-optimal Design for Paid Research Study (2024+), Statistica Sinica, to appear, DOI: 10.5705/ss.202022.0414, Yifei Huang, Liping Tong, and Jie Yang

Computation of Fisher information matrix

Example GLM Fisher information matrix

Consider a research study with a simple logistic regression model \[\log(\frac{\mu_i}{1-\mu_i}) = \beta_0 + \beta_1 x_{i1} + \beta_2 x_{i2}\] where \(\mu_i = E(Y_i\mid {\mathbf x}_i)\), \({\mathbf x}_i = (x_{i1}, x_{i2})^\top \in \{(-1, -1), (-1, +1), (+1, -1)\}\) and parameters \(\boldsymbol \beta = (\beta_0, \beta_1, \beta_2) = (0.5, 0.5, 0.5)\). The approximate allocation to the three design points is \(\mathbf w = (1/3,1/3,1/3)^\top\)

To calculate Fisher information matrix of the design with GLM, we can use F_func_GLM in the package.

beta = c(0.5, 0.5, 0.5)
X = matrix(data=c(1,-1,-1,1,-1,1,1,1,-1), byrow=TRUE, nrow=3) 
w = c(1/3,1/3,1/3) #approximate allocation
CDsampling::F_func_GLM(w=w, beta=beta, X=X, link='logit')
#> Dimensions: 3 x 3 
#> Matrix:
#> ------------------------------ 
#>      [,1]        [,2]        [,3]       
#> [1,]  0.23500371 -0.07833457 -0.07833457
#> [2,] -0.07833457  0.23500371 -0.07833457
#> [3,] -0.07833457 -0.07833457  0.23500371
#> ------------------------------

/private/var/folders/vf/_6q3gfkd14v15hhpcwlkrnc00000gn/T/RtmpHJ4lD3/Rbuild14a0c1d107945/CDsampling/vignettes/Intro_to_CDsampling.R

Example of MLM Fisher information matrix

Consider a research study with cumulative npo multinomial logit model with \(J=5\) response levels with covariates \((x_{i1}, x_{i2})=\{(1,0),(2,0),(3,0), (4,0),(1,1),(2,1),(3,1),(4,1)\}\). The model can be written as:\[\log(\frac{\pi_{i1}}{\pi_{i2}+\dots+\pi_{i5}}) = \beta_{11}+\beta_{12}x_{i1}+\beta_{13}x_{i2}\] \[\log(\frac{\pi_{i1}+\pi_{i2}}{\pi_{i3}+\pi_{i4}+\pi_{i5}}) = \beta_{21}+\beta_{22}x_{i1}+\beta_{23}x_{i2}\] \[\log(\frac{\pi_{i1}+\pi_{i2}+\pi_{i3}}{\pi_{i4}+\pi_{i5}}) = \beta_{31}+\beta_{32}x_{i1}+\beta_{33}x_{i2}\] \[\log(\frac{\pi_{i1}+\dots+\pi_{i4}}{\pi_{i5}}) = \beta_{41}+\beta_{42}x_{i1}+\beta_{43}x_{i2}\] where \(i=1,\dots,8\), we assume the parameters \(\boldsymbol \beta = (\beta_{11}, \beta_{12}, \beta_{13}, \beta_{21}, \beta_{22}, \beta_{23}, \beta_{31}, \beta_{32}, \beta_{33}, \beta_{41}, \beta_{42}, \beta_{43})^\top = (-4.047, -0.131, 4.214, -2.225, -0.376,\) \(3.519, -0.302, -0.237, 2.420, 1.386, -0.120, 1.284)^\top\). The approximate allocation for the eight design points is \(\mathbf w = (1/8, 1/8, 1/8, 1/8, 1/8, 1/8, 1/8, 1/8)^\top\).

To calculate the Fisher information matrix with MLM, we can use the F_func_MLM in the package.

J=5; p=12; m=8; 
beta = c(-4.047, -0.131, 4.214, -2.225, -0.376, 3.519, -0.302,
    -0.237,  2.420, 1.386,  -0.120,  1.284)
Xi=rep(0,J*p*m)   
dim(Xi)=c(J,p,m)
Xi[,,1] = rbind(c( 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
                c( 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0),
                c( 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0),
                c( 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0),
                c( 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0))

Xi[,,2] = rbind(c( 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
                c( 0, 0, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0),
                c( 0, 0, 0, 0, 0, 0, 1, 2, 0, 0, 0, 0),
                c( 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 0),
                c( 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0))

Xi[,,3] = rbind(c( 1, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
                c( 0, 0, 0, 1, 3, 0, 0, 0, 0, 0, 0, 0),
                c( 0, 0, 0, 0, 0, 0, 1, 3, 0, 0, 0, 0),
                c( 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 0),
                c( 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0))

 Xi[,,4] = rbind(c( 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
                c( 0, 0, 0, 1, 4, 0, 0, 0, 0, 0, 0, 0),
                c( 0, 0, 0, 0, 0, 0, 1, 4, 0, 0, 0, 0),
                c( 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 4, 0),
                c( 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0))

 Xi[,,5] = rbind(c( 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0),
                c( 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0),
                c( 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0),
                c( 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1),
                c( 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0))

 Xi[,,6] = rbind(c( 1, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0),
                c( 0, 0, 0, 1, 2, 1, 0, 0, 0, 0, 0, 0),
                c( 0, 0, 0, 0, 0, 0, 1, 2, 1, 0, 0, 0),
                c( 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 1),
                c( 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0))

 Xi[,,7] = rbind(c( 1, 3, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0),
                c( 0, 0, 0, 1, 3, 1, 0, 0, 0, 0, 0, 0),
                c( 0, 0, 0, 0, 0, 0, 1, 3, 1, 0, 0, 0),
                c( 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1),
                c( 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0))

 Xi[,,8] = rbind(c( 1, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0),
                c( 0, 0, 0, 1, 4, 1, 0, 0, 0, 0, 0, 0),
                c( 0, 0, 0, 0, 0, 0, 1, 4, 1, 0, 0, 0),
                c( 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 4, 1),
                c( 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0))
 alloc = rep(1/8,m) #approximate allocation
 CDsampling::F_func_MLM(w=alloc, beta=beta, X=Xi, link='cumulative')
#> Dimensions: 12 x 12 
#> Matrix:
#> ------------------------------------------------------------------------------------------------------------------------ 
#>       [,1]        [,2]        [,3]        [,4]        [,5]        [,6]       
#>  [1,]  0.44505694  1.37915564  0.43609135 -0.37247296 -1.21252053 -0.36379349
#>  [2,]  1.37915564  4.78410934  1.35694145 -1.21252053 -4.31436344 -1.19085831
#>  [3,]  0.43609135  1.35694145  0.43609135 -0.36379349 -1.19085831 -0.36379349
#>  [4,] -0.37247296 -1.21252053 -0.36379349  0.51192600  1.55177413  0.48018678
#>  [5,] -1.21252053 -4.31436344 -1.19085831  1.55177413  5.31193908  1.48241981
#>  [6,] -0.36379349 -1.19085831 -0.36379349  0.48018678  1.48241981  0.48018678
#>  [7,]  0.00000000  0.00000000  0.00000000 -0.09154268 -0.22027625 -0.07471168
#>  [8,]  0.00000000  0.00000000  0.00000000 -0.22027625 -0.64323991 -0.18445802
#>  [9,]  0.00000000  0.00000000  0.00000000 -0.07471168 -0.18445802 -0.07471168
#> [10,]  0.00000000  0.00000000  0.00000000  0.00000000  0.00000000  0.00000000
#> [11,]  0.00000000  0.00000000  0.00000000  0.00000000  0.00000000  0.00000000
#> [12,]  0.00000000  0.00000000  0.00000000  0.00000000  0.00000000  0.00000000
#>       [,7]        [,8]        [,9]        [,10]       [,11]       [,12]      
#>  [1,]  0.00000000  0.00000000  0.00000000  0.00000000  0.00000000  0.00000000
#>  [2,]  0.00000000  0.00000000  0.00000000  0.00000000  0.00000000  0.00000000
#>  [3,]  0.00000000  0.00000000  0.00000000  0.00000000  0.00000000  0.00000000
#>  [4,] -0.09154268 -0.22027625 -0.07471168  0.00000000  0.00000000  0.00000000
#>  [5,] -0.22027625 -0.64323991 -0.18445802  0.00000000  0.00000000  0.00000000
#>  [6,] -0.07471168 -0.18445802 -0.07471168  0.00000000  0.00000000  0.00000000
#>  [7,]  0.29320484  0.71978889  0.16205254 -0.10435894 -0.25399393 -0.06194131
#>  [8,]  0.71978889  2.13312603  0.41294396 -0.25399393 -0.74842743 -0.15305771
#>  [9,]  0.16205254  0.41294396  0.16205254 -0.06194131 -0.15305771 -0.06194131
#> [10,] -0.10435894 -0.25399393 -0.06194131  0.17861575  0.45287619  0.06925715
#> [11,] -0.25399393 -0.74842743 -0.15305771  0.45287619  1.37180187  0.17395849
#> [12,] -0.06194131 -0.15305771 -0.06194131  0.06925715  0.17395849  0.06925715
#> ------------------------------------------------------------------------------------------------------------------------

/private/var/folders/vf/_6q3gfkd14v15hhpcwlkrnc00000gn/T/RtmpHJ4lD3/Rbuild14a0c1d107945/CDsampling/vignettes/Intro_to_CDsampling.R

Data: trial_data & Constrained sampling with GLM

trial_data is a simulated dataset that contains \(N=500\) volunteers gender, age, and final efficacy information. The details as below: gender is coded as \(0\) for female and \(1\) for male. Age group is coded as both \(age\_1 = 0\) and \(age\_2 = 0\) for ages \(18\sim25\), \(age\_1 = 1\) for ages \(26\sim64\), and \(age\_2 = 1\) for ages \(65\) and above. The number of design points \(m=6\), that is, the number of combinations of gender and age groups \((x_{gender\_i}, x_{age\_1i}, x_{age\_2i})\) corresponds to \((0,0,0)\), \((0,1,0)\), \((0,0,1)\), \((1,0,0)\), \((1,1,0)\), \((1,0,1)\). We have the number of patients in each category as $(N_1, N_2, , N_6) = (50, 40, 10, $ \(200, 150, 50)\). Suppose that a sample of \(n=200\) participants is required due to budget limit.Our goal is to find the constrained D-optimal allocation \((w_1, w_2, \dots, w_6)\) with feasible allocation \(S = \{(w_1, \ldots, w_m)^T \in S_0 \mid n w_i \leq N_i, i=1, \ldots, m\}\).

We can use constrained lift-one algorithm liftone_constrained_GLM to find the locally D-optimal approximate sampling allocations.

We consider the logistic regression model for \(j=1,\dots,m\), \(i=1,\dots,n_j\) with \(\boldsymbol \beta=(\beta_{1}, \beta_{21}, \beta_{22}= (0,3,3,3)\):

\[\begin{equation}\label{eq:trial_logistic_model} {\rm logit} \{P(Y_{ij}=1 \mid x_{gender\_i}, x_{age\_1i}, x_{age\_2i})\} = \beta_0 + \beta_1 x_{gender\_i} + \beta_{21} x_{age\_1i} + \beta_{22} x_{age\_2i} \end{equation}\]

We use the following R codes to define the beta coefficients, sample size, and design matrix:

beta = c(0, 3, 3, 3) #coefficients 
X=matrix(data=c(1,0,0,0,1,0,1,0,1,0,0,1,1,1,0,0,1,1,1,0,1,1,0,1), ncol=4, byrow=TRUE) #design matrix X
nsample=200 #sample size

/private/var/folders/vf/_6q3gfkd14v15hhpcwlkrnc00000gn/T/RtmpHJ4lD3/Rbuild14a0c1d107945/CDsampling/vignettes/Intro_to_CDsampling.R

To run the liftone_constrained_GLM function, we also need to the \(\mathbf W\) matrix from the calculation of Fisher information matrix, we can use the W_func_GLM function in the package:

W_matrix=CDsampling::W_func_GLM(X=X, b=beta, link="logit") #W matrix

/private/var/folders/vf/_6q3gfkd14v15hhpcwlkrnc00000gn/T/RtmpHJ4lD3/Rbuild14a0c1d107945/CDsampling/vignettes/Intro_to_CDsampling.R

Lastly, we also need to define the constraints (number of patients from different gender and age group) and boundaries for constrained sampling (please see the reference for details of lower bound \(r_{i1}\) and upper bound \(r_{i2}\)):

rc = c(50, 40, 10, 200, 150, 50)/nsample
m = 6
g.con = matrix(0,nrow=(2*m+1), ncol=m)
g.con[1,] = rep(1, m)
g.con[2:(m+1),] = diag(m)
g.con[(m+2):(2*m+1), ] = diag(m)
g.dir = c("==", rep("<=", m), rep(">=", m))
g.rhs = c(1, rc, rep(0, m))

lower.bound=function(i, w){
  nsample = 200
  rc = c(50, 40, 10, 200, 150, 50)/nsample
  m=length(w) #num of categories
  temp = rep(0,m)
  temp[w>0]=1-pmin(1,rc[w>0])*(1-w[i])/w[w>0];
  temp[i]=0;
  max(0,temp);
}

upper.bound=function(i, w){
  nsample = 200
  rc = c(50, 40, 10, 200, 150, 50)/nsample
  m=length(w) #num of categories
  rc[i];
  min(1,rc[i])
}

/private/var/folders/vf/_6q3gfkd14v15hhpcwlkrnc00000gn/T/RtmpHJ4lD3/Rbuild14a0c1d107945/CDsampling/vignettes/Intro_to_CDsampling.R

Now, we can run the constrained lift-one algorithm:

set.seed(2025)
approximate_design = CDsampling::liftone_constrained_GLM(X=X, W=W_matrix, g.con=g.con, g.dir=g.dir, 
                                               g.rhs=g.rhs, lower.bound=lower.bound, 
                                               upper.bound=upper.bound, reltol=1e-10, 
                                               maxit=100, random=TRUE, nram=4, w00=NULL, 
                                               epsilon=1e-8)

print(approximate_design)
#> Optimal Sampling Results:
#> ================================================================================
#> w :
#> 0.25, 0.2, 0.05, 0.5, 0.0, 0.0 
#> --------------------------------------------------------------------------------
#> w0 :
#> 0.25, 0.2, 0.05, 0.5, 0.0, 0.0 
#> --------------------------------------------------------------------------------
#> maximum :
#> 2.8813e-08 
#> --------------------------------------------------------------------------------
#> convergence :
#> TRUE 
#> --------------------------------------------------------------------------------
#> itmax :
#> 1.0 
#> --------------------------------------------------------------------------------
#> deriv.ans :
#> 0.0, 3.6017e-08, 4.8528e-07, -1.1525e-07, -1.0310e-07, -7.9507e-08 
#> --------------------------------------------------------------------------------
#> gmax :
#> 0.0 
#> --------------------------------------------------------------------------------
#> reason :
#> "gmax <= 0" 
#> --------------------------------------------------------------------------------

/private/var/folders/vf/_6q3gfkd14v15hhpcwlkrnc00000gn/T/RtmpHJ4lD3/Rbuild14a0c1d107945/CDsampling/vignettes/Intro_to_CDsampling.R

To find the exact allocation (integer value of allocation), we can use the approxtoexact_constrained_func:

exact_design = CDsampling::approxtoexact_constrained_func(n=200, w=approximate_design$w, m=6, 
                                                           beta=beta, link='logit', X=X, 
                                                           Fdet_func=Fdet_func_GLM, 
                                                          iset_func=iset_func_trial) 

print(exact_design)
#> Optimal Sampling Results:
#> ================================================================================
#> allocation :
#> 50.0, 40.0, 10.0, 100.0, 0.0, 0.0 
#> --------------------------------------------------------------------------------
#> allocation.real :
#> 0.25, 0.2, 0.05, 0.5, 0.0, 0.0 
#> --------------------------------------------------------------------------------
#> det.maximum :
#> 46.1012 
#> --------------------------------------------------------------------------------

/private/var/folders/vf/_6q3gfkd14v15hhpcwlkrnc00000gn/T/RtmpHJ4lD3/Rbuild14a0c1d107945/CDsampling/vignettes/Intro_to_CDsampling.R

Note:

If we cannot obtain an approximated value of beta coefficients, we can also try to find the EW D-optimal allocations instead. For EW D-optimal allocation, we can come up with some prior distributions of beta coefficients, and replace the coefficients value with expectation of those prior distributions in the R codes. (Please see reference for more details). 

If there is no knowledge of beta coefficients, we can use constrained uniform design:

w00 = rep(1/200, 6)
unif_design = CDsampling::approxtoexact_constrained_func(n=200, w=w00, m=6, beta=NULL, 
  link=NULL, X=NULL, Fdet_func=Fdet_func_unif, iset_func=iset_func_trial)

print(unif_design)
#> Optimal Sampling Results:
#> ================================================================================
#> allocation :
#> 38.0, 38.0, 10.0, 38.0, 38.0, 38.0 
#> --------------------------------------------------------------------------------
#> allocation.real :
#> 0.005, 0.005, 0.005, 0.005, 0.005, 0.005 
#> --------------------------------------------------------------------------------
#> det.maximum :
#> 792351680.0 
#> --------------------------------------------------------------------------------

/private/var/folders/vf/_6q3gfkd14v15hhpcwlkrnc00000gn/T/RtmpHJ4lD3/Rbuild14a0c1d107945/CDsampling/vignettes/Intro_to_CDsampling.R

Data: trauma_data & Constrained sampling with MLM

trauma_data consists of \(N=802\) trauma patients. After the treatment, there are five outcomes, Death (\(1\)), Vegetative state (\(2\)), Major disability (\(3\)), Minor disability (\(4\)) and Good recovery (\(5\)). We consider two covariates, the symptoms of the trauma (\(x_{i1} = 0\) for mild or \(1\) for moderate /severe), there are \(392\) mild symptoms patients, and \(410\) moderate/severe symptoms patients. The other covariate is the treatment dose levels (\(x_{i2} = 1\) (Placebo), \(2\) (Low dose), \(3\) (Medium dose), and \(4\) (High dose). In total, we have \(m=8\) design points from the combination of levels from the two covariates. In this study, we plan to enroll \(n=600\) patients and the collection of feasible allocation is \(S=\{(w_1, \ldots, w_8)^\top \in S_0\mid n(w_1+w_2+w_3+w_4) \leq 392, n(w_5+w_6+w_7+w_8) \leq 410\}\). The model can be written as:\[\log(\frac{\pi_{i1}}{\pi_{i2}+\dots+\pi_{i5}}) = \beta_{11}+\beta_{12}x_{i1}+\beta_{13}x_{i2}\] \[\log(\frac{\pi_{i1}+\pi_{i2}}{\pi_{i3}+\pi_{i4}+\pi_{i5}}) = \beta_{21}+\beta_{22}x_{i1}+\beta_{23}x_{i2}\] \[\log(\frac{\pi_{i1}+\pi_{i2}+\pi_{i3}}{\pi_{i4}+\pi_{i5}}) = \beta_{31}+\beta_{32}x_{i1}+\beta_{33}x_{i2}\] \[\log(\frac{\pi_{i1}+\dots+\pi_{i4}}{\pi_{i5}}) = \beta_{41}+\beta_{42}x_{i1}+\beta_{43}x_{i2}\]

In this example, we assume the parameter to be \(\boldsymbol\beta = (\hat\beta_{11}, \hat\beta_{21}, \hat\beta_{31}, \hat\beta_{41}, \hat\beta_{12}, \hat\beta_{22}, \hat\beta_{32}, \hat\beta_{42}, \hat\beta_{13}, \hat\beta_{23}, \hat\beta_{33}, \hat\beta_{43})^\top = (-4.047, -2.225, -0.302, 1.386, 4.214, 3.519,\\ 2.420, 1.284, -0.131, -0.376, -0.237, -0.120)^\top\). In total, we have \(m=8\) design points from the combination of levels from the two covariates. In this study, we plan to enroll \(n=600\) patients and the collection of feasible allocation is \(S=\{(w_1, \ldots, w_8)^\top \in S_0\mid n(w_1+w_2+w_3+w_4) \leq 392, n(w_5+w_6+w_7+w_8) \leq 410\}\).

For this example, we can reuse the set-up of design matrix and beta coefficients from the MLM Fisher information matrix:

J=5; p=12; m=8; 
beta = c(-4.047, -0.131, 4.214, -2.225, -0.376, 3.519, -0.302,
    -0.237,  2.420, 1.386,  -0.120,  1.284)
Xi=rep(0,J*p*m)   
dim(Xi)=c(J,p,m)
Xi[,,1] = rbind(c( 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
                c( 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0),
                c( 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0),
                c( 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0),
                c( 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0))

Xi[,,2] = rbind(c( 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
                c( 0, 0, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0),
                c( 0, 0, 0, 0, 0, 0, 1, 2, 0, 0, 0, 0),
                c( 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 0),
                c( 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0))

Xi[,,3] = rbind(c( 1, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
                c( 0, 0, 0, 1, 3, 0, 0, 0, 0, 0, 0, 0),
                c( 0, 0, 0, 0, 0, 0, 1, 3, 0, 0, 0, 0),
                c( 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 0),
                c( 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0))

 Xi[,,4] = rbind(c( 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
                c( 0, 0, 0, 1, 4, 0, 0, 0, 0, 0, 0, 0),
                c( 0, 0, 0, 0, 0, 0, 1, 4, 0, 0, 0, 0),
                c( 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 4, 0),
                c( 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0))

 Xi[,,5] = rbind(c( 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0),
                c( 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0),
                c( 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0),
                c( 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1),
                c( 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0))

 Xi[,,6] = rbind(c( 1, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0),
                c( 0, 0, 0, 1, 2, 1, 0, 0, 0, 0, 0, 0),
                c( 0, 0, 0, 0, 0, 0, 1, 2, 1, 0, 0, 0),
                c( 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 1),
                c( 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0))

 Xi[,,7] = rbind(c( 1, 3, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0),
                c( 0, 0, 0, 1, 3, 1, 0, 0, 0, 0, 0, 0),
                c( 0, 0, 0, 0, 0, 0, 1, 3, 1, 0, 0, 0),
                c( 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1),
                c( 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0))

 Xi[,,8] = rbind(c( 1, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0),
                c( 0, 0, 0, 1, 4, 1, 0, 0, 0, 0, 0, 0),
                c( 0, 0, 0, 0, 0, 0, 1, 4, 1, 0, 0, 0),
                c( 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 4, 1),
                c( 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0))

/private/var/folders/vf/_6q3gfkd14v15hhpcwlkrnc00000gn/T/RtmpHJ4lD3/Rbuild14a0c1d107945/CDsampling/vignettes/Intro_to_CDsampling.R

Similarly to the GLM example, we define the sample size, constraints, and boundaries using the following R codes:

nsample=600 #sample size
constraint = c(392, 410)  #mild:severe

lower.bound <- function(i, w0){
  n = 600
  constraint = c(392,410)
  if(i <= 4){
    a.lower <- (sum(w0[5:8])-(constraint[2]/n)*(1-w0[i]))/(sum(w0[5:8]))
  }
  else{
    a.lower <- (sum(w0[1:4])-(constraint[1]/n)*(1-w0[i]))/(sum(w0[1:4]))
  }
  a.lower
}

upper.bound <- function(i, w0){
  n = 600
  constraint = c(392,410)
  if(i <= 4){
    b.upper <- ((constraint[1]/n)*(1-w0[i]) - (sum(w0[1:4])-w0[i]))/(1-sum(w0[1:4]))
  }
  else{
    b.upper <- ((constraint[2]/n)*(1-w0[i]) - (sum(w0[5:8])-w0[i]))/(1-sum(w0[5:8]))
  }
  b.upper
}

g.con = matrix(0,nrow=length(constraint)+1+m, ncol=m)
g.con[2:3,] = matrix(data=c(1,1,1,1,0,0,0,0,0,0,0,0,1,1,1,1), ncol = m, byrow=TRUE)
g.con[1,] = rep(1, m)
g.con[4:(length(constraint)+1+m), ] = diag(1, nrow=m)
g.dir = c("==", "<=","<=", rep(">=",m))
g.rhs = c(1, ifelse((constraint/nsample<1),constraint/nsample,1), rep(0, m))

/private/var/folders/vf/_6q3gfkd14v15hhpcwlkrnc00000gn/T/RtmpHJ4lD3/Rbuild14a0c1d107945/CDsampling/vignettes/Intro_to_CDsampling.R

Then, we can use the constrained lift-one algorithm to find the locally D-optimal approximate sampling with liftone_constrained_MLM function in the package:

set.seed(123)
approx_design = CDsampling::liftone_constrained_MLM(m=m, p=p, Xi=Xi, J=J, beta=beta, 
                                       lower.bound=lower.bound, upper.bound=upper.bound, 
                                        g.con=g.con, g.dir=g.dir, g.rhs=g.rhs, w00=NULL, 
                                        link='cumulative', Fi.func = Fi_func_MLM, 
                                        reltol=1e-5, maxit=500, delta = 1e-6, 
                                        epsilon=1e-8, random=TRUE, nram=1)

print(approx_design)
#> Optimal Sampling Results:
#> ================================================================================
#> w :
#> 0.2594, 0.0, 0.0, 0.1666, 0.2796, 0.0, 0.0, 0.2944 
#> --------------------------------------------------------------------------------
#> w0 :
#> 0.0, 0.0, 0.3167, 0.0, 0.6833, 0.0, 0.0, 0.0 
#> --------------------------------------------------------------------------------
#> Maximum :
#> 7.4958e-11 
#> --------------------------------------------------------------------------------
#> itmax :
#> 12.0 
#> --------------------------------------------------------------------------------
#> convergence :
#> TRUE 
#> --------------------------------------------------------------------------------
#> deriv.ans :
#> -7.5290e-11, -4.0225e-11, -4.0225e-11, -3.7980e-10, 1.0340e-25, -4.0225e-11, -4.0225e-11, 5.2256e-11 
#> --------------------------------------------------------------------------------
#> gmax :
#> NA 
#> --------------------------------------------------------------------------------
#> reason :
#> "all derivative <=0" 
#> --------------------------------------------------------------------------------

/private/var/folders/vf/_6q3gfkd14v15hhpcwlkrnc00000gn/T/RtmpHJ4lD3/Rbuild14a0c1d107945/CDsampling/vignettes/Intro_to_CDsampling.R

To get the exact allocation, we can again apply the approxtoexact_constrained_func in the package:

exact_design = CDsampling::approxtoexact_constrained_func(n=600, w=approx_design$w, m=8, 
 beta=beta, link='cumulative', X=Xi, Fdet_func=Fdet_func_MLM, 
 iset_func=iset_func_trauma)

print(exact_design)
#> Optimal Sampling Results:
#> ================================================================================
#> allocation :
#> 155.0, 0.0, 0.0, 100.0, 168.0, 0.0, 0.0, 177.0 
#> --------------------------------------------------------------------------------
#> allocation.real :
#> 0.2594, 0.0, 0.0, 0.1666, 0.2796, 0.0, 0.0, 0.2944 
#> --------------------------------------------------------------------------------
#> det.maximum :
#> 1.63163827059162e+23 
#> --------------------------------------------------------------------------------

/private/var/folders/vf/_6q3gfkd14v15hhpcwlkrnc00000gn/T/RtmpHJ4lD3/Rbuild14a0c1d107945/CDsampling/vignettes/Intro_to_CDsampling.R

If there is no coefficients information, we can find the constrained uniform sampling with the folloiwng codes:

iset_func_trauma <- function(allocation){
  iset = rep(1,8)
  if(sum(allocation[1:4])>=392){iset[1:4]=0}
  if(sum(allocation[5:8])>=410){iset[5:8]=0}
  return(iset)
 }

unif_design = CDsampling::approxtoexact_constrained_func(n=600, w=rep(1/600,8), m=8, beta=NULL, 
 link=NULL, X=NULL, Fdet_func=Fdet_func_unif, iset_func=iset_func_trauma)

unif_design
#> Optimal Sampling Results:
#> ================================================================================
#> allocation :
#> 75.0, 75.0, 75.0, 75.0, 75.0, 75.0, 75.0, 75.0 
#> --------------------------------------------------------------------------------
#> allocation.real :
#> 0.0017, 0.0017, 0.0017, 0.0017, 0.0017, 0.0017, 0.0017, 0.0017 
#> --------------------------------------------------------------------------------
#> det.maximum :
#> 1001129150390625 
#> --------------------------------------------------------------------------------

/private/var/folders/vf/_6q3gfkd14v15hhpcwlkrnc00000gn/T/RtmpHJ4lD3/Rbuild14a0c1d107945/CDsampling/vignettes/Intro_to_CDsampling.R