C Analysis

Code to analyse data and determine number (CI) TB notifications in interrupted time series analysis/es.

Rachael Burke and Pete Dodd
2022-07-21

This is the code we used for implementing the analysis. Appendix XX in our paper shows the rationale and mathematical formula and will be useful for understanding this code.

This is in a mixture of tidverse and baseR (data.table); reflecting our collaboration!.

smp_c = bacteriologically confirmed at the clinic at time of starting TB treatment [“smp” = smear pos, but NB. misleading abbreviation as many Bac+ are Xpert pos]
all_f = all form TB

library(tidyverse)
library(lubridate)
library(here)
library(broom)
library(data.table)

acf_start_date <- dmy("01 April 2011") 
acf_end_date <- dmy("30 Sep 2014")

# Data - take out post-ACF time
smp_c <- readRDS(here("data","cnrs_smp_clinic.rds")) %>% filter(acftime!="post-acf") %>% as.data.table() 
all_f <- readRDS(here("data","cnrs_all.rds")) %>% filter(acftime!="post-acf") %>% as.data.table() # all form

# Base graphs (to illustrate model predictions / fit to real data as we go)
load(here("data/cnr_base_plot_micro_clinic.rdata"))
load(here("data/cnr_base_plot_all.rdata"))

t1 <- smp_c[acftime=='acf',min(yq_num)] # t1 = START of ACF
t2 <- smp_c[acftime=='acf',max(yq_num)] # t2 = END of ACF
T <- t2-t1+1  # plus one because goes from START of first quarter to END of last quarter; this is the TIME (in number of quarters) from ACF starting.

ACF area only (no control)

Model

woc = “without control”.

mod.woc <- glm(data=smp_c[acfarea=="b) ACF"],
               family=poisson,
               n ~
                 offset(log(population)) +
                 (1 + yq_num) +           # k_1 + s_1.t
                 acftime + acftime:yq_num # I(t)(a+b.t)
               )

tidy(mod.woc)
# A tibble: 4 × 5
  term              estimate std.error statistic     p.value
  <chr>                <dbl>     <dbl>     <dbl>       <dbl>
1 (Intercept)        -7.41      0.0722  -103.    0          
2 yq_num             -0.0264    0.0155    -1.70  0.0887     
3 acftimeacf          0.696     0.131      5.32  0.000000101
4 yq_num:acftimeacf  -0.0142    0.0170    -0.833 0.405      

Predictions / point estimates for Bac+

Use model and equation to get and plot point estimates for cases and CNR (all form and smear pos at clinic)

# Population during ACF time
P <- smp_c[acfarea=="b) ACF" & yq_num>=t1 & yq_num<=t2,population] # this is the population at any level

# The coefficient that go into estimating theta (= difference in cases / CNRs between "real" and "counterfactual")
theta.woc <- list(
  itz=t1:t2, # itz it a vector of all the "acftime" quarters
  Pop=P,
  k=coef(mod.woc)['(Intercept)'],
  s=coef(mod.woc)['yq_num'],
  a=coef(mod.woc)['acftimeacf'],
  b=coef(mod.woc)['yq_num:acftimeacf']
)

## Make data - numbers of diagnoses ("cases")
tz <- seq(from=min(smp_c$yq_num),to=max(smp_c$yq_num),by=0.1) #time as a near-continous variable
TZ <- smp_c[acfarea=="b) ACF",yq_num] # time as each quarter
It <- ifelse(tz>=t1 & tz<=t2,1,0)  #indicator
nap <- with(data=theta.woc,{exp(k + s*tz)}) #counterfactual: cases per one person per quarter (ie. no offset term)
nap2 <- with(data=theta.woc,{exp(It*(a+b*tz))}) #ACF effect: cases per one person per quarter
napc <- (nap*nap2)[tz%%1==0] # what actually happened (ie. adding on ACF effect): cases per one person per quarter

pop <- smp_c[acfarea=="b) ACF",population] # population in ACF area
popc <- smp_c[acfarea=="a) Non-ACF",population] # population in non-ACF area (not needed now, but create for later 'with control' scenario)

# Note re "with" function (see "nap" / "nap2" / "napc") as I wasn't familar at first
# with = "Evaluate an R expression in an environment constructed from data, possibly modifying (a copy of) the original data."
# This is a line of code to evaluate "k + s*tz" and "It*(a+b*tz)" in an enviroment where those bits are what they are in theta.woc (but presumably also using the global environment, since tz is in global enviroment rather than in 'theta.woc; environment?)

# Make data including CNR
cnr1 <- with(data=theta.woc,{(exp(k + s*tz)) * 100000 * 4}) # cases per one person per quarter * 100,000 people * 4 quarters = CNR.  In counterfactual (no ACF) conditions.
cnr2 <- with(data=theta.woc,{exp(It*(a+b*tz)) * 100000 * 4}) # cases per one person per quarter * 100,000 people * 4 quarters = CNR. Plus effect of ACF.
cnr3 <- (nap*nap2) * 100000 * 4

df_cnr_woc <- tibble(yq_num=tz, 
                     yq= seq(from=min(smp_c$yq), to=max(smp_c$yq + months(3)), length.out = length(tz)), # make 'tz' correspond to real dates
                     cfact=cnr1, 
                     real=cnr3)

df_cases_woc <- tibble(yq_num=TZ, 
                     yq= seq(from=min(smp_c$yq), to=max(smp_c$yq + months(3)), length.out = length(TZ)),
                     cfact=nap[tz%%1==0] * pop, 
                     real=(nap*nap2)[tz%%1==0] * pop)

## Plot using ggplot
cnr_base_plot_micro_clinic +
  geom_line(aes(x=yq, y=cfact, linetype="c'factual", color="b) ACF"), data=df_cnr_woc) +
  geom_line(aes(x=yq, y=real, linetype="observed", color="b) ACF"), data=df_cnr_woc) +
  scale_linetype_manual(values=c(2,1)) +
  scale_color_manual(values=c("#46ACC8","#B40F20","#E58601")) +
  ylim(c(0,450))

Model and predictions / point estimates for all form TB

Less annotation here, it’s the same as above.

# exactly same model, but run with new dataset for all form TB
mod.woc.allf <- glm(data=all_f[acfarea=="b) ACF"],
               family=poisson,
               n ~
                 offset(log(population)) +
                 (1 + yq_num) +           # k_1 + s_1.t
                 acftime + acftime:yq_num # I(t)(a+b.t)
               )

theta.woc.allf <- list(
  itz=t1:t2,
  Pop=P,
  k=coef(mod.woc.allf)['(Intercept)'],
  s=coef(mod.woc.allf)['yq_num'],
  a=coef(mod.woc.allf)['acftimeacf'],
  b=coef(mod.woc.allf)['yq_num:acftimeacf']
)

nap.af <- with(data=theta.woc.allf,{exp(k + s*tz)}) #counterfactual: cases per one person per quarter (ie. no offset term)
nap2.af <- with(data=theta.woc.allf,{exp(It*(a+b*tz))}) #ACF effect: cases per one person per quarter
napc.af <- (nap.af*nap2.af)[tz%%1==0] # what actually happened (ie. adding on ACF effect): cases per one person per quarter

cnr1.af <- with(data=theta.woc.allf,{(exp(k + s*tz)) * 100000 * 4}) # cases per one person per quarter * 100,000 people * 4 quarters = CNR.  In counterfactual conditions.
cnr2.af <- with(data=theta.woc.allf,{exp(It*(a+b*tz)) * 100000 * 4}) # cases per one person per quarter * 100,000 people * 4 quarters = CNR. Plus effect of ACF.
cnr3.af <- (nap.af*nap2.af) * 400000

df_cnr_woc_af <- tibble(yq_num=tz, 
                     yq= seq(from=min(smp_c$yq), to=max(smp_c$yq + months(3)), length.out = length(tz)), # make 'tz' correspond to real dates
                     cfact=cnr1.af, 
                     real=cnr3.af)

df_cases_woc_af <- tibble(yq_num=TZ, 
                     yq= seq(from=min(smp_c$yq), to=max(smp_c$yq + months(3)), length.out = length(TZ)),
                     cfact=nap.af[tz%%1==0] * pop, 
                     real=(nap.af*nap2.af)[tz%%1==0] * pop)

cnr_base_plot_all +
  geom_line(aes(x=yq, y=cfact, linetype="c'factual", color="b) ACF"), data=df_cnr_woc_af) +
  geom_line(aes(x=yq, y=real, linetype="observed", color="b) ACF"), data=df_cnr_woc_af) +
  scale_linetype_manual(values=c(2,1)) +
  scale_color_manual(values=c("#46ACC8","#B40F20","#E58601")) +
  ylim(c(0,850))

Delta method to calculate CIs

Firstly the formula

dpars <- c('k','s','a','b') # this is a list of the parameters in the equation / model [these are the alphanumeric versions of greek letters for the general equation, in the model they get meaningful names]

# expression = "Creates or tests for objects of mode "expression".
# This is the mathematical expression for the difference between the TB diagnoses over the sum of time (see appendix for equation in latex)
# Functions for the difference between real and counterfactual
# Need to differentiate by hand as R not clever enough to handle sum:
# These are all functions of "theta"; when we come to use this function, theta is defined in above chunks.

D.fun <- function(theta){
  list2env(theta,envir = environment())
  sum(Pop*(exp(a+k+(s+b)*itz)-exp(k+s*itz))) # cases
}

dD.fun <- function(theta){
  list2env(theta,envir = environment())
  c(sum(Pop*(exp(a+k+(s+b)*itz)-exp(k+s*itz))),
    sum(Pop*itz*(exp(a+k+(s+b)*itz)-exp(k+s*itz))),
    sum(Pop*(exp(a+k+(s+b)*itz))),
    sum(Pop*itz*(exp(a+k+(s+b)*itz))))
}

# And version without population for CNR effects
Dcnr.fun <- function(theta){
  list2env(theta,envir = environment())
  mean((exp(a+k+(s+b)*itz)-exp(k+s*itz))*1e5*4)
}

dDcnr.fun <- function(theta){
  list2env(theta,envir = environment())
  c(mean((exp(a+k+(s+b)*itz)-exp(k+s*itz))*1e5*4),
    mean(itz*(exp(a+k+(s+b)*itz)-exp(k+s*itz))*1e5*4),
    mean((exp(a+k+(s+b)*itz))*1e5*4),
    mean(itz*(exp(a+k+(s+b)*itz)))*1e5*4)
}

#' Checks: diagnoses / cases
df_cases_woc %>% filter(yq>=acf_start_date) %>% summarise(sum_real=sum(real), sum_cf=sum(cfact)) %>% mutate(diff=sum_real - sum_cf)
# A tibble: 1 × 3
  sum_real sum_cf  diff
     <dbl>  <dbl> <dbl>
1    1255.   775.  480.
sum(pop*((nap2-1)*nap)[tz%%1==0]) #discrete version 
[1] 480.134
D.fun(theta.woc)              #our formula - good!
[1] 480.134
# Checks: CNR
df_cnr_woc %>% filter(yq_num%%1==0) %>% filter(yq>=acf_start_date) %>% summarise(mean_real=mean(real), mean_cf=mean(cfact)) %>% mutate(diff=mean_real - mean_cf)
# A tibble: 1 × 3
  mean_real mean_cf  diff
      <dbl>   <dbl> <dbl>
1      263.    162.  101.
sum(((nap2-1)*nap)[tz%%1==0] * 100000 * 4)/ 14 # multiply by 100,000 people, by 4 quarters per year, divide by 14 months of ACF (as pre-ACF there are 0 extra cases)
[1] 100.977
Dcnr.fun(theta.woc)              # our formula - this is right
[1] 100.977

And now use formulas to calcuate difference, variance and CI

#' The names in the variance-covariance matrix are in the same order as the gradient terms, so:
Sig.woc <- vcov(mod.woc)
rownames(Sig.woc) <- colnames(Sig.woc) <- dpars

#' Function to compute variance using delta approximation:
delta.se.woc <- function(theta,sig){
  g <- dD.fun(theta)
  V <- t(g) %*% sig %*% (g)
  sqrt(V)
}
delta.se.woc(theta.woc, Sig.woc) #test - what is standard error?
         [,1]
[1,] 143.8592
delta.cnr.se.woc <- function(theta, sig){
  g <- dDcnr.fun(theta)
  V <- t(g) %*% sig %*% (g)
  sqrt(V)
}
delta.cnr.se.woc(theta.woc, Sig.woc) #test - what is standard error?
         [,1]
[1,] 29.89557
#' This would give the estimate as 95% CI
woc.result <- c(D.fun(theta.woc),
                D.fun(theta.woc) - 1.96*delta.se.woc(theta.woc, Sig.woc),
                D.fun(theta.woc) + 1.96*delta.se.woc(theta.woc, Sig.woc))
woc.result
[1] 480.1340 198.1700 762.0981
woc.cnr.result <- c(Dcnr.fun(theta.woc),
                   Dcnr.fun(theta.woc) -1.96*delta.cnr.se.woc(theta.woc, Sig.woc),
                   Dcnr.fun(theta.woc) + 1.96*delta.cnr.se.woc(theta.woc, Sig.woc))
woc.cnr.result
[1] 100.97697  42.38166 159.57229
# And for the relative increase (rather than absolute increase)
# From PD email: "In short, the proportion relative increase CIs should be R - 1.96*s/M to R + 1.96*s/M." R = relative difference, M= mean CNR observed, s.e. = st. error

abs <- (df_cnr_woc %>% filter(yq_num%%1==0) %>% filter(yq>=acf_start_date) %>% summarise(mean_real=mean(real), mean_cf=mean(cfact)) %>% mutate(diff=mean_real - mean_cf))$diff
mean_cnr <- (df_cnr_woc %>% filter(yq_num%%1==0) %>% filter(yq>=acf_start_date) %>% summarise(mean_real=mean(real), mean_cf=mean(cfact)) %>% mutate(diff=mean_real - mean_cf))$mean_cf
rel_diff <- abs / mean_cnr
se <- delta.cnr.se.woc(theta.woc, Sig.woc)[1]

# Relative differences
c(rel_diff, (rel_diff - 1.96*se/mean_cnr),(rel_diff + 1.96*se/mean_cnr))
[1] 0.6220157 0.2610700 0.9829615

Repeat for all form TB

Sig.woc.af <- vcov(mod.woc.allf)
rownames(Sig.woc.af) <- colnames(Sig.woc.af) <- dpars

woc.result.af <- c(D.fun(theta.woc.allf),
                D.fun(theta.woc.allf) - 1.96*delta.se.woc(theta.woc.allf, Sig.woc.af),
                D.fun(theta.woc.allf) + 1.96*delta.se.woc(theta.woc.allf, Sig.woc.af))
woc.result.af
[1] 1614.607 1245.178 1984.035
woc.cnr.result.af <- c(Dcnr.fun(theta.woc.allf),
                   Dcnr.fun(theta.woc.allf) -1.96*delta.cnr.se.woc(theta.woc.allf, Sig.woc.af),
                   Dcnr.fun(theta.woc.allf) + 1.96*delta.cnr.se.woc(theta.woc.allf, Sig.woc.af))
woc.cnr.result.af
[1] 337.4240 260.5197 414.3283
# And the relative difference
abs.af <- (df_cnr_woc_af %>% filter(yq_num%%1==0) %>% filter(yq>=acf_start_date) %>% summarise(mean_real=mean(real), mean_cf=mean(cfact)) %>% mutate(diff=mean_real - mean_cf))$diff
mean_cnr.af <- (df_cnr_woc_af %>% filter(yq_num%%1==0) %>% filter(yq>=acf_start_date) %>% summarise(mean_real=mean(real), mean_cf=mean(cfact)) %>% mutate(diff=mean_real - mean_cf))$mean_cf
rel_diff.af <- abs.af / mean_cnr.af
se.af <- delta.cnr.se.woc(theta.woc.allf, Sig.woc.af)[1]

c(rel_diff.af, (rel_diff.af - 1.96*se/mean_cnr),(rel_diff.af + 1.96*se/mean_cnr))
[1] 1.005841 0.644895 1.366787

With control

Model

#' Here, I'm creating the factors by hand so that: 1) we can generate a counterfactual using `predict` to check this formula; 2) I understand the parametrization, rather than trying to reverse-engineer how default R syntax handles things. The notation follows that in the definitions section.

tmp <- copy(smp_c) %>% as.data.table()                       #make a copy of the data
tmp[,com:=ifelse(acfarea=="b) ACF",1,0)] #community indicator 1=ACF
tmp[,It:=ifelse(acftime=="acf",1,0)]     #ACF time indicator (as above)
tmp[,tz:=yq_num]                         #time
tmp[,Ittz:=It*tz]                        #ACF:time
tmp[,Itc:=It*com]                        #ACF:community
tmp[,Ittzc:=It*tz*com]                   #ACF:time:community

#' Regression
mod.wc <- glm(data=tmp,
              family=poisson,
              n ~
                offset(log(population)) +
                1 + 
                com +     # k_c
                tz +      #s.t
                It + 
                Ittz +    # control (A + B.t)*It
                Itc +  
                Ittzc     # ACF com increment (a+b.t)*It
              )

summary(mod.wc)

Call:
glm(formula = n ~ offset(log(population)) + 1 + com + tz + It + 
    Ittz + Itc + Ittzc, family = poisson, data = tmp)

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-3.0975  -0.9402  -0.3503   0.8241   4.8351  

Coefficients:
             Estimate Std. Error  z value Pr(>|z|)    
(Intercept) -7.568890   0.046326 -163.381  < 2e-16 ***
com          0.259291   0.050310    5.154 2.55e-07 ***
tz          -0.051830   0.009430   -5.496 3.88e-08 ***
It           0.132776   0.106859    1.243  0.21404    
Ittz         0.023811   0.011255    2.116  0.03438 *  
Itc          0.467066   0.153897    3.035  0.00241 ** 
Ittzc       -0.012593   0.009347   -1.347  0.17790    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for poisson family taken to be 1)

    Null deviance: 433.48  on 45  degrees of freedom
Residual deviance: 124.99  on 39  degrees of freedom
AIC: 433.6

Number of Fisher Scoring iterations: 4

The point estimates (Bac+)

#' Parameters from the w/control version
dparswc <- c('k0','k','s','A','B','a','b')
beta <- coef(mod.wc)
Sig.v <- Sig.wc <- vcov(mod.wc)
rownames(Sig.v) <- colnames(Sig.v) <- dparswc #same order, diff names

#' Parameters as list:
theta.wc <- list(
  Pop=P, # (Does it matter than here "P" is just for ACF communities? (Defined earlier))
  itz=t1:t2,
  k0=beta['(Intercept)'],k=beta['com'],
  s=beta['tz'],
  ## counter factual effect:
  A=beta['It'],B=beta['Ittz'],
  ## full effect:
  a=beta['Itc'],b=beta['Ittzc'])

# these were defined above, but recreating them here as a reminder that they exist
tz <- seq(from=min(smp_c$yq_num),to=max(smp_c$yq_num),by=0.1) #time
It <- ifelse(tz>=t1 & tz<=t2,1,0)  #indicator

#' Check understanding of parameters - as before these are all per one person per quarter (haven't added on 'pop' term)
## nonACF com
napc <- with(data=theta.wc,{exp(k0 + s*tz + It*(A + B*tz))})
## ACF com - without ACF
napa0 <- with(data=theta.wc,{exp(k0+k + s*tz + It*(A + B*tz))}) # this is our counterfactual
## ACF com - with ACF
napa <- with(data=theta.wc,{exp(It*(a + b*tz))})*napa0

# And make into a tibble;
df_cnr_wc <- tibble(
  yq_num=tz,
  yq= seq(from=min(smp_c$yq), to=max(smp_c$yq + months(3)), length.out = length(tz)),
  real=napa * 1e5 * 4,
  nonacf=napc * 1e5 * 4,
  cfact=napa0 * 1e5 * 4)

df_cases_wc <- tibble(
  yq_num=TZ,
  yq= seq(from=min(smp_c$yq), to=max(smp_c$yq + months(3)), length.out = length(TZ)),
  nonacf=napc[tz%%1==0]*popc,
  cfact=napa0[tz%%1==0]*pop,
  real=napa[tz%%1==0]*pop)

# And plot;
cnr_base_plot_micro_clinic +
  geom_line(aes(x=yq, y=real, linetype="observed", color="b) ACF"), data=df_cnr_wc) +
  geom_line(aes(x=yq, y=nonacf, linetype="observed", color="a) Non-ACF"), data=df_cnr_wc) +
  geom_line(aes(x=yq, y=cfact, linetype="c'fact", color="c) C'fact"), data=df_cnr_wc) +
  scale_linetype_manual(values=c(2,1)) +
  scale_color_manual(values=c("#46ACC8","#B40F20","#E58601")) +
  ylim(c(0,420))
# Looks good!!

The point estimates (all form TB)

tmp2 <- copy(all_f) %>% as.data.table()                       #make a copy of the data
tmp2[,com:=ifelse(acfarea=="b) ACF",1,0)] #community indicator 1=ACF
tmp2[,It:=ifelse(acftime=="acf",1,0)]     #ACF time indicator (as above)
tmp2[,tz:=yq_num]                         #time
tmp2[,Ittz:=It*tz]                        #ACF:time
tmp2[,Itc:=It*com]                        #ACF:community
tmp2[,Ittzc:=It*tz*com]                   #ACF:time:community

mod.wc.allf <- glm(data=tmp2,
              family=poisson,
              n ~
                offset(log(population)) +
                1 + 
                com +     # k_c
                tz +      #s.t
                It + 
                Ittz +    # control (A + B.t)*It
                Itc +  
                Ittzc     # ACF com increment (a+b.t)*It
              )

beta.allf <- coef(mod.wc.allf)
Sig.v.allf <- Sig.wc.allf <- vcov(mod.wc.allf)
rownames(Sig.v.allf) <- colnames(Sig.v.allf) <- dparswc #same order, diff names

# beta <- coef(mod.wc)
# Sig.v <- Sig.wc <- vcov(mod.wc)
# rownames(Sig.v) <- colnames(Sig.v) <- dparswc #same order, diff names

theta.wc.allf <- list(
  Pop=P, # (Does it matter than here "P" is just for ACF communities? (Defined earlier))
  itz=t1:t2,
  k0=beta.allf['(Intercept)'],k=beta.allf['com'],
  s=beta.allf['tz'],
  ## counter factual effect:
  A=beta.allf['It'],B=beta.allf['Ittz'],
  ## full effect:
  a=beta.allf['Itc'],b=beta.allf['Ittzc'])


napc.af <- with(data=theta.wc.allf,{exp(k0 + s*tz + It*(A + B*tz))})
napa0.af <- with(data=theta.wc.allf,{exp(k0+k + s*tz + It*(A + B*tz))}) # this is our counterfactual
napa.af <- with(data=theta.wc.allf,{exp(It*(a + b*tz))})*napa0.af

# And make into a tibble;
df_cnr_wc_af <- tibble(
  yq_num=tz,
  yq= seq(from=min(smp_c$yq), to=max(smp_c$yq + months(3)), length.out = length(tz)),
  real=napa.af * 1e5 * 4,
  nonacf=napc.af * 1e5 * 4,
  cfact=napa0.af * 1e5 * 4)

df_cases_wc_af <- tibble(
  yq_num=TZ,
  yq= seq(from=min(smp_c$yq), to=max(smp_c$yq + months(3)), length.out = length(TZ)),
  nonacf=napc.af[tz%%1==0]*popc,
  cfact=napa0.af[tz%%1==0]*pop,
  real=napa.af[tz%%1==0]*pop)

# And plot;
cnr_base_plot_all +
  geom_line(aes(x=yq, y=real, linetype="observed", color="b) ACF"), data=df_cnr_wc_af) +
  geom_line(aes(x=yq, y=nonacf, linetype="observed", color="a) Non-ACF"), data=df_cnr_wc_af) +
  geom_line(aes(x=yq, y=cfact, linetype="c'fact", color="c) C'fact"), data=df_cnr_wc_af) +
  scale_linetype_manual(values=c(2,1)) +
  scale_color_manual(values=c("#46ACC8","#B40F20","#E58601")) +
  ylim(c(0,850))

The formula for the difference

dparswc <- c('k0','k','s','A','B','a','b')

Dwc.fun <- function(theta){
  list2env(theta,envir = environment())
  sum(Pop*(exp(a+b*itz)-1)*exp(k0+k+A+(s+B)*itz)) # this is for cases
}

## diff by k,s,a,b,abar,bbar
dDwc.fun <- function(theta){
  list2env(theta,envir = environment())
  c(
    sum(Pop*(exp(a+b*itz)-1)*exp(k0+k+A+(s+B)*itz)), #(Again, this definitely right with "Pop" just being ACF area?)
    sum(Pop*(exp(a+b*itz)-1)*exp(k0+k+A+(s+B)*itz)),
    sum(Pop*(exp(a+b*itz)-1)*exp(k0+k+A+(s+B)*itz)*itz),
    sum(Pop*(exp(a+b*itz)-1)*exp(k0+k+A+(s+B)*itz)),
    sum(Pop*(exp(a+b*itz)-1)*exp(k0+k+A+(s+B)*itz)*itz),
    sum(Pop*(exp(a+b*itz))*exp(k0+k+A+(s+B)*itz)),
    sum(Pop*(exp(a+b*itz)*itz)*exp(k0+k+A+(s+B)*itz))
    )
}

#' and the CNR versions without population
Dwc.cnr.fun <- function(theta){
  list2env(theta,envir = environment())
  mean((exp(a+b*itz)-1)*exp(k0+k+A+(s+B)*itz)*1e5*4)
}

dDwc.cnr.fun <- function(theta){
  list2env(theta,envir = environment())
  c(
    mean((exp(a+b*itz)-1)*exp(k0+k+A+(s+B)*itz)*1e5*4),
    mean((exp(a+b*itz)-1)*exp(k0+k+A+(s+B)*itz)*1e5*4),
    mean((exp(a+b*itz)-1)*exp(k0+k+A+(s+B)*itz)*itz*1e5*4),
    mean((exp(a+b*itz)-1)*exp(k0+k+A+(s+B)*itz)*1e5*4),
    mean((exp(a+b*itz)-1)*exp(k0+k+A+(s+B)*itz)*itz*1e5*4),
    mean((exp(a+b*itz))*exp(k0+k+A+(s+B)*itz)*1e5*4),
    mean((exp(a+b*itz)*itz)*exp(k0+k+A+(s+B)*itz)*1e5*4)
  )
}

#' Check formula against predictions: YES
df_cases_wc %>% filter(yq_num%%1==0) %>% filter(yq>=acf_start_date) %>% summarise(real=sum(real), cfact=sum(cfact), nonacf=sum(nonacf)) %>% mutate(diff=real - cfact)
# A tibble: 1 × 4
   real cfact nonacf  diff
  <dbl> <dbl>  <dbl> <dbl>
1 1255.  951.  1638.  304.
Dwc.fun(theta.wc)
[1] 304.1835
sum(pop*(napa-napa0)[tz%%1==0])
[1] 304.1835
#' Check CNRs
df_cnr_wc %>% filter(yq_num%%1==0) %>% filter(yq>=acf_start_date) %>% summarise(real=mean(real), cfact=mean(cfact), nonacf=mean(nonacf)) %>% mutate(diff=real - cfact)
# A tibble: 1 × 4
   real cfact nonacf  diff
  <dbl> <dbl>  <dbl> <dbl>
1  263.  199.   154.  64.1
sum((napa-napa0)[tz%%1==0]) # as above this isn't quite right, but not sure how to fix it.  Probably doesn't matter though.
[1] 0.002242842
Dwc.cnr.fun(theta.wc)
[1] 64.08121

And the SE and CI of difference (Bac+)

#' Function to compute variance using delta approximation:
delta.se.wc <- function(theta, sv){
  g <- dDwc.fun(theta)
  V <- t(g) %*% sv %*% (g)
  sqrt(V)
}
delta.se.wc(theta.wc, Sig.v) #check
        [,1]
[1,] 63.9933
delta.cnr.se.wc <- function(theta, sv){
  g <- dDwc.cnr.fun(theta)
  V <- t(g) %*% sv %*% (g)
  sqrt(V)
}
delta.cnr.se.wc(theta.wc, Sig.v) #check
         [,1]
[1,] 13.41644
#' This would give the estimate as 95% CI
wc.result <- c(Dwc.fun(theta.wc),
               Dwc.fun(theta.wc) - 1.96*delta.se.wc(theta.wc, Sig.v),
               Dwc.fun(theta.wc) + 1.96*delta.se.wc(theta.wc, Sig.v))
wc.result
[1] 304.1835 178.7566 429.6103
#' CNR result
wc.cnr.result <- c(Dwc.cnr.fun(theta.wc),
                   Dwc.cnr.fun(theta.wc) -
                   1.96*delta.cnr.se.wc(theta.wc,Sig.v),
                   Dwc.cnr.fun(theta.wc) +
                   1.96*delta.cnr.se.wc(theta.wc,Sig.v))
wc.cnr.result
[1] 64.08121 37.78498 90.37744
# And relative increase
abs <- wc.cnr.result[1]
M <- (df_cnr_wc %>% filter(yq_num%%1==0) %>% filter(yq>=acf_start_date) %>% summarise(real=mean(real), cfact=mean(cfact), nonacf=mean(nonacf)) %>% mutate(diff=real - cfact))$cfact
R <- abs / M
se <- delta.cnr.se.wc(theta.wc,Sig.v)[1]

(R - 1.96*se/M)
[1] 0.1896512
(R + 1.96*se/M)
[1] 0.4536245
R
[1] 0.3216378

SE and CI of difference (all form)

wc.result.af <- c(Dwc.fun(theta.wc.allf),
                Dwc.fun(theta.wc.allf) - 1.96*delta.se.wc(theta.wc.allf, Sig.v.allf),
                Dwc.fun(theta.wc.allf) + 1.96*delta.se.wc(theta.wc.allf, Sig.v.allf))
wc.result.af
[1] 1142.2298  964.4464 1320.0132
wc.cnr.result.af <- c(Dwc.cnr.fun(theta.wc.allf),
                   Dwc.cnr.fun(theta.wc.allf) -1.96*delta.cnr.se.wc(theta.wc.allf, Sig.v.allf),
                   Dwc.cnr.fun(theta.wc.allf) + 1.96*delta.cnr.se.wc(theta.wc.allf, Sig.v.allf))
wc.cnr.result.af
[1] 239.4112 202.1827 276.6397
# And relative increase
abs.af <- wc.cnr.result.af[1]
M.af <- (df_cnr_wc_af %>% filter(yq_num%%1==0) %>% filter(yq>=acf_start_date) %>% summarise(real=mean(real), cfact=mean(cfact), nonacf=mean(nonacf)) %>% mutate(diff=real - cfact))$cfact
R.af <- abs.af / M.af
se.af <- delta.cnr.se.wc(theta.wc.allf,Sig.v.allf)[1]

(R.af - 1.96*se.af/M.af)
[1] 0.4664203
(R.af + 1.96*se.af/M.af)
[1] 0.6381872
R.af
[1] 0.5523037

Collecting the estimates and their CIs here in one place

For table 2 (see b_tableandfig.Rmd)

a <- df_cases_woc %>% filter(yq>=acf_start_date) %>% summarise(sum_real=sum(real), sum_cf=sum(cfact)) %>% mutate(diff=sum_real - sum_cf)
c <- df_cnr_woc %>% filter(yq_num%%1==0) %>% filter(yq>=acf_start_date) %>% summarise(mean_real=mean(real), mean_cf=mean(cfact)) %>% mutate(diff=mean_real - mean_cf)

a <- round(a)
c <- round(c)
woc.result <- round(woc.result,digits=0)
woc.cnr.result <- round(woc.cnr.result,digits=0)

b <- df_cases_woc_af %>% filter(yq>=acf_start_date) %>% summarise(sum_real=sum(real), sum_cf=sum(cfact)) %>% mutate(diff=sum_real - sum_cf)
d <- df_cnr_woc_af %>% filter(yq_num%%1==0) %>% filter(yq>=acf_start_date) %>% summarise(mean_real=mean(real), mean_cf=mean(cfact)) %>% mutate(diff=mean_real - mean_cf)

b <- round(b)
d <- round(d)
woc.result.af <-round(woc.result.af)
woc.cnr.result.af <- round(woc.cnr.result.af)

woc <- tibble(model=rep("woc", each=4),
              scenario=c("observed", "cfact (woc)", "observed", "cfact (woc)"),
              tb_type=rep(c("smp","allf"), each=2),
              sum_cases=c(a$sum_real, a$sum_cf, b$sum_real, b$sum_cf),
              mean_cnr=c(c$mean_real, c$mean_cf, d$mean_real, d$mean_cf),
              diff_cases=c(
                paste0(woc.result[1]," (",woc.result[2]," to ", woc.result[3],")"),
                paste0(woc.result[1]," (",woc.result[2]," to ", woc.result[3],")"),
                paste0(woc.result.af[1]," (",woc.result.af[2]," to ", woc.result.af[3],")"),
                paste0(woc.result.af[1]," (",woc.result.af[2]," to ", woc.result.af[3],")")),
              diff_cnr=c(
                paste0(woc.cnr.result[1]," (",woc.cnr.result[2]," to ", woc.cnr.result[3],")"),
                paste0(woc.cnr.result[1]," (",woc.cnr.result[2]," to ", woc.cnr.result[3],")"),
                paste0(woc.cnr.result.af[1]," (",woc.cnr.result.af[2]," to ", woc.cnr.result.af[3],")"),
                paste0(woc.cnr.result.af[1]," (",woc.cnr.result.af[2]," to ", woc.cnr.result.af[3],")"))
              )

e <- df_cases_wc %>% filter(yq_num%%1==0) %>% filter(yq>=acf_start_date) %>% summarise(real=sum(real), cfact=sum(cfact), nonacf=sum(nonacf)) %>% mutate(diff=real - cfact)
f <- df_cnr_wc %>% filter(yq_num%%1==0) %>% filter(yq>=acf_start_date) %>% summarise(real=mean(real), cfact=mean(cfact), nonacf=mean(nonacf)) %>% mutate(diff=real - cfact)
g <- df_cases_wc_af %>% filter(yq_num%%1==0) %>% filter(yq>=acf_start_date) %>% summarise(real=sum(real), cfact=sum(cfact), nonacf=sum(nonacf)) %>% mutate(diff=real - cfact)
h <- df_cnr_wc_af %>% filter(yq_num%%1==0) %>% filter(yq>=acf_start_date) %>% summarise(real=mean(real), cfact=mean(cfact), nonacf=mean(nonacf)) %>% mutate(diff=real - cfact)

e <- round(e)
f <- round(f)
g <- round(g)
h <- round(h)

wc.result <- round(wc.result)
wc.result.af <- round(wc.result.af)

wc.cnr.result <- round(wc.cnr.result)
wc.cnr.result.af <-  round(wc.cnr.result.af)

wc <- tibble(model=rep("wc", each=4),
              scenario=c("observed", "cfact (woc)", "observed", "cfact (woc)"),
              tb_type=rep(c("smp","allf"), each=2),
              sum_cases=c(e$real, e$cfact, g$real, g$cfact),
              mean_cnr=c(f$real, f$cfact, h$real, h$cfact),
              diff_cases=c(
                paste0(wc.result[1]," (",wc.result[2]," to ", wc.result[3],")"),
                paste0(wc.result[1]," (",wc.result[2]," to ", wc.result[3],")"),
                paste0(wc.result.af[1]," (",wc.result.af[2]," to ", wc.result.af[3],")"),
                paste0(wc.result.af[1]," (",wc.result.af[2]," to ", wc.result.af[3],")")),
             diff_cnr=c(
                paste0(wc.cnr.result[1]," (",wc.cnr.result[2]," to ", wc.cnr.result[3],")"),
                paste0(wc.cnr.result[1]," (",wc.cnr.result[2]," to ", wc.cnr.result[3],")"),
                paste0(wc.cnr.result.af[1]," (",wc.cnr.result.af[2]," to ", wc.cnr.result.af[3],")"),
                paste0(wc.cnr.result.af[1]," (",wc.cnr.result.af[2]," to ", wc.cnr.result.af[3],")"))
              )


result <- wc %>% rbind(woc)
result
# A tibble: 8 × 7
  model scenario    tb_type sum_cases mean_cnr diff_cases    diff_cnr 
  <chr> <chr>       <chr>       <dbl>    <dbl> <chr>         <chr>    
1 wc    observed    smp          1255      263 304 (179 to … 64 (38 t…
2 wc    cfact (woc) smp           951      199 304 (179 to … 64 (38 t…
3 wc    observed    allf         3213      673 1142 (964 to… 239 (202…
4 wc    cfact (woc) allf         2071      433 1142 (964 to… 239 (202…
5 woc   observed    smp          1255      263 480 (198 to … 101 (42 …
6 woc   cfact (woc) smp           775      162 480 (198 to … 101 (42 …
7 woc   observed    allf         3213      673 1615 (1245 t… 337 (261…
8 woc   cfact (woc) allf         1598      335 1615 (1245 t… 337 (261…

And save the results so that can make a well formatted table in next step. Also save the models to use to draw Fig 3.

saveRDS(result, file=here("data","results_summary.rds"))

saveRDS(mod.woc, file=here("data","mod.woc.rds"))
saveRDS(mod.woc.allf, file=here("data","mod.woc.allf.rds"))
saveRDS(mod.wc, file=here("data","mod.wc.rds"))
saveRDS(mod.wc.allf, file=here("data","mod.wc.allf.rds"))