6 Legacy

Solutions from previous workshops

6.1 Tidyr legacy

library(PKPDmisc)
library(knitr)
library(lazyeval)
library(tidyverse)
#> Loading tidyverse: ggplot2
#> Loading tidyverse: tibble
#> Loading tidyverse: tidyr
#> Loading tidyverse: readr
#> Loading tidyverse: purrr
#> Loading tidyverse: dplyr
#> Conflicts with tidy packages ----------------------------------------------
#> filter():     dplyr, stats
#> is_formula(): purrr, lazyeval
#> lag():        dplyr, stats

eta_cov <- read.csv("../data/ebe_cov_full.csv")

kable(head(eta_cov))

ID	ETA1	ETA2	ETA4	ETA5	ETA6	ETA7	ETA8	ETA9	BW	BMI	AGE	AST	ALT	CRCL	SEX	RACE
1	0.160	-0.067	-0.195	0.058	0.083	0.167	0.204	-0.114	109.4	38.3	48	13	17	131	1	1
4	0.681	0.165	0.276	-0.107	0.099	-1.562	0.355	0.056	120.2	31.3	53	38	77	177	0	1
5	0.480	0.017	-0.302	0.062	-0.287	0.260	-0.152	0.022	83.0	24.5	32	26	35	111	0	1
6	0.339	0.001	-0.105	0.079	-0.228	-0.326	-0.138	0.105	64.2	21.0	33	19	20	97	0	1
7	-0.139	0.187	0.155	0.260	0.122	-1.381	0.220	-0.063	74.4	26.1	47	16	25	93	0	1
8	-0.115	0.060	-0.063	0.230	-0.328	0.317	-0.492	0.076	68.4	21.8	32	15	24	103	0	1


g_eta_cov <- eta_cov %>% 
    gather(cov_name, cov_value, BW:CRCL)

kable(head(g_eta_cov))

ID	ETA1	ETA2	ETA4	ETA5	ETA6	ETA7	ETA8	ETA9	SEX	RACE	cov_name	cov_value
1	0.160	-0.067	-0.195	0.058	0.083	0.167	0.204	-0.114	1	1	BW	109.4
4	0.681	0.165	0.276	-0.107	0.099	-1.562	0.355	0.056	0	1	BW	120.2
5	0.480	0.017	-0.302	0.062	-0.287	0.260	-0.152	0.022	0	1	BW	83.0
6	0.339	0.001	-0.105	0.079	-0.228	-0.326	-0.138	0.105	0	1	BW	64.2
7	-0.139	0.187	0.155	0.260	0.122	-1.381	0.220	-0.063	0	1	BW	74.4
8	-0.115	0.060	-0.063	0.230	-0.328	0.317	-0.492	0.076	0	1	BW	68.4

lazily evaluated function for ggplot plots

eta_cov_scatter <- function(df, xval = "cov_value", yval, cov_name = "cov_name") {
  lazy_plot <- lazyeval::interp(~ggplot(df, aes(x = cov_value, y = ETA1)) +
    geom_point() + facet_wrap(~cov_name, scales="free"),
    cov_value = as.name(xval),
    ETA1 = as.name(yval),
    cov_name = as.name(cov_name))
  return(lazyeval::lazy_eval(lazy_plot))
}

6.1.1 Single plot example

eta_cov_scatter(g_eta_cov, yval = "ETA1")

6.1.2 Iterate through multiple ETA values

lapply(paste0("ETA", 1:4), function(eta, g_eta_cov) {
  eta_cov_scatter(g_eta_cov, yval = eta)
}, g_eta_cov)
#> [[1]]

#> 
#> [[2]]

#> 
#> [[3]]

#> 
#> [[4]]

6.1.3 Double stack

We can actually gather again


g2_eta_cov <- g_eta_cov %>% gather(eta_name, eta_value, ETA1:ETA9 )

kable(head(g2_eta_cov))

ID	SEX	RACE	cov_name	cov_value	eta_name	eta_value
1	1	1	BW	109.4	ETA1	0.160
4	0	1	BW	120.2	ETA1	0.681
5	0	1	BW	83.0	ETA1	0.480
6	0	1	BW	64.2	ETA1	0.339
7	0	1	BW	74.4	ETA1	-0.139
8	0	1	BW	68.4	ETA1	-0.115

kable(tail(g2_eta_cov))

	ID	SEX	RACE	ETHNIC	cov_name	cov_value	eta_name	eta_value
3289	91	0	1	0	CRCL	161	ETA9	0.008
3290	92	0	1	0	CRCL	124	ETA9	0.052
3291	93	1	1	0	CRCL	136	ETA9	0.134
3292	95	0	1	0	CRCL	213	ETA9	0.073
3293	97	0	1	0	CRCL	127	ETA9	-0.007
3294	98	0	1	1	CRCL	86	ETA9	0.026

Then we can split up the plots

split_eta_cov <- g2_eta_cov %>% split(.$cov_name)

6.1.4 plot all releationships

lapply(split_eta_cov, function(x) {
   cov_name <- unique(x$cov_name)
  ggplot(x, aes(x = cov_value, y = eta_value)) +
    geom_point() + facet_wrap(~eta_name, scales = "free") +
    geom_smooth(se = F) +
    ggtitle(cov_name) +
    xlab(cov_name) 
}) 
#> $AGE
#> `geom_smooth()` using method = 'loess'

#> 
#> $ALT
#> `geom_smooth()` using method = 'loess'

#> 
#> $AST
#> `geom_smooth()` using method = 'loess'

#> 
#> $BMI
#> `geom_smooth()` using method = 'loess'

#> 
#> $BW
#> `geom_smooth()` using method = 'loess'

#> 
#> $CRCL
#> `geom_smooth()` using method = 'loess'

6.2 dplyr data manipulation

library(PKPDmisc)
library(knitr)
library(tidyverse)

Objectives:

Import datasets and documents
Perform basic data manipulation upon importing the data.

6.2.1 Task-I

Use the .csv files demog, IV, and Oral provided into the data object folder.

Read in all three csv files and give them descriptive names (not data1, data2, data3)

demog <- read_csv("../data/demog.csv")
#> Parsed with column specification:
#> cols(
#>   ID = col_integer(),
#>   SEX = col_character(),
#>   WT = col_double(),
#>   AGE = col_integer(),
#>   RACE = col_character()
#> )
iv_data <- read_csv("../data/IV.csv")
#> Parsed with column specification:
#> cols(
#>   ID = col_integer(),
#>   TIME = col_double(),
#>   DV = col_character(),
#>   AMT = col_integer(),
#>   DOSE = col_integer()
#> )
oral_data <- read_csv("../data/ORAL.csv")
#> Parsed with column specification:
#> cols(
#>   ID = col_integer(),
#>   TIME = col_double(),
#>   DV = col_character(),
#>   AMT = col_integer(),
#>   DOSE = col_integer()
#> )

The goals of this section:

Use data manipulation tools to prepare the dataset for analysis

6.2.2 Task-II

Rename “DV” column as “COBS”

iv_data <- iv_data %>% rename(COBS = DV)
oral_data <- oral_data %>% rename(COBS = DV)

Add a Formulation column and label IV/Oral for each dataset

iv_data <- iv_data %>% mutate(FORM = "IV")
oral_data <- oral_data %>% mutate(FORM = "ORAL")

Appropriately merge the demographics dataset into the IV and Oral dataset
Create one integrated dataset with both IV and Oral data.

combined_data <- bind_rows(iv_data, oral_data)

## check to see if any ids not in the other
anti_join(combined_data, demog)
#> Joining, by = "ID"
#> # A tibble: 0 x 6
#> # ... with 6 variables: ID <int>, TIME <dbl>, COBS <chr>, AMT <int>,
#> #   DOSE <int>, FORM <chr>
anti_join(demog, combined_data)
#> Joining, by = "ID"
#> # A tibble: 2 x 5
#>      ID    SEX    WT   AGE      RACE
#>   <int>  <chr> <dbl> <int>     <chr>
#> 1    51   Male    60    28 Caucasian
#> 2    52 Female    70    33     Asian

Two individuals do not have any concentration-time data

all_data <- left_join(combined_data, demog)
#> Joining, by = "ID"

Perform the following tasks:
1. Ensure that the following columns are numeric and not text: TIME, COBS, WT, AGE, AMT and DOSEs

all_data %>% select(TIME, COBS, WT, AGE, AMT, DOSE) %>% str
#> Classes 'tbl_df', 'tbl' and 'data.frame':    1200 obs. of  6 variables:
#>  $ TIME: num  0 0.25 0.5 1 2 3 4 6 8 12 ...
#>  $ COBS: chr  NA "1273.5" "995.38" "1254.7" ...
#>  $ WT  : num  56.8 56.8 56.8 56.8 56.8 56.8 56.8 56.8 56.8 56.8 ...
#>  $ AGE : int  28 28 28 28 28 28 28 28 28 28 ...
#>  $ AMT : int  100 NA NA NA NA NA NA NA NA NA ...
#>  $ DOSE: int  100 100 100 100 100 100 100 100 100 100 ...

COBS is a character column, therefore want to find out what character values exist

# check what character values are present
unique_non_numerics(all_data$COBS)
#> [1] "BQL"

b. Change the following:
c. Create a new column called BQLFLAG which takes a value of "0" if there is a numerical value in CObs and "1" if there is "BQL" in CObs.

# if don't manually specify to handle NA COBS, will also get NA values for BQLFLAG
all_data <- all_data %>% mutate(BQLFLAG = ifelse(is.na(COBS), 0, 
                                                 ifelse(COBS == "BQL", 1, 0)),
                                COBS = as_numeric(COBS))
#> Warning in as_numeric(COBS): NAs introduced by coercion

all_data %>% head %>% kable

ID	TIME	COBS	AMT	DOSE	FORM	SEX	WT	AGE	RACE
1	0.00	NA	100	100	IV	Female	56.8	28	Hispanic
1	0.25	1274	NA	100	IV	Female	56.8	28	Hispanic
1	0.50	995	NA	100	IV	Female	56.8	28	Hispanic
1	1.00	1255	NA	100	IV	Female	56.8	28	Hispanic
1	2.00	1038	NA	100	IV	Female	56.8	28	Hispanic
1	3.00	1135	NA	100	IV	Female	56.8	28	Hispanic

all_data %>% filter(BQLFLAG ==1) %>% kable

ID	TIME	COBS	AMT	DOSE	FORM	SEX	WT	AGE	RACE	BQLFLAG
20	24	NA	NA	100	IV	Male	80.9	31	Asian	1
20	24	NA	NA	100	ORAL	Male	80.9	31	Asian	1

d. Filter the dataset such that you remove all rows where BQLFLAG=1
    i. WT from lb to kg 
    iv. CObs from μg/mL to μg/L

f_all_data <- all_data %>% filter(BQLFLAG != 1)
f_all_data_adjunits <- f_all_data %>% mutate(WT = WT/2.2,
                                             COBS = COBS*1000)

f_all_data_adjunits %>% head %>% kable

ID	TIME	COBS	AMT	DOSE	FORM	SEX	WT	AGE	RACE
1	0.00	NA	100	100	IV	Female	25.8	28	Hispanic
1	0.25	1273500	NA	100	IV	Female	25.8	28	Hispanic
1	0.50	995380	NA	100	IV	Female	25.8	28	Hispanic
1	1.00	1254700	NA	100	IV	Female	25.8	28	Hispanic
1	2.00	1037600	NA	100	IV	Female	25.8	28	Hispanic
1	3.00	1135400	NA	100	IV	Female	25.8	28	Hispanic

e. Create a new column called "GENDER" where:
    i. Female = 0
    ii. Male = 1 
f. Create a new column called RACEN where:
    i. Caucasian = 0
    ii. Asian = 1
    iii. Black = 2
    iv. Hispanic = 3
g. Create a new column called "LOGCOBS" where CObs is in the log scale
h. Create a new column called "USUBJID" - unique subject ID as combination of formulation and ID (hint check out `?interaction`)

i. Remove the following columns
i. SEX
ii. RACE

final_data <- f_all_data_adjunits %>% mutate(
  GENDER = ifelse(SEX == "Female", 0, 1),
  RACEN = as.numeric(factor(RACE, levels = c("Caucasian", "Asian", "Black", "Hispanic"))),
  LOGCOBS = log(COBS),
  USUBJID = interaction(ID, FORM)
) %>% select(-SEX, -RACE)

Save the above modifications as a new csv file

write_csv(final_data, "iv_oral_alldat.csv", na = ".")

6.2.3 Summary Statistics

show a summary for all demographic columns

final_data <- final_data %>% 
  mutate(GENDER = as.factor(GENDER),
         RACEN = as.factor(RACEN))
uid_final_data <- final_data %>% distinct(ID, .keep_all = TRUE)

uid_final_data %>% 
  select(WT, AGE, GENDER, RACEN) %>%
 summary %>% kable

WT	AGE	GENDER	RACEN
Min. :23.8	Min. :20.0	0:28	1:17
1st Qu.:26.6	1st Qu.:31.0	1:22	2: 8
Median :29.1	Median :39.5	NA	3:12
Mean :29.1	Mean :38.5	NA	4:13
3rd Qu.:31.3	3rd Qu.:48.0	NA	NA
Max. :36.8	Max. :59.0	NA	NA

Count the number of males/females in the dataset

# be careful only 1 row per id if calculating this way
uid_final_data %>% nrow
#> [1] 50
# or
n_distinct(uid_final_data$ID)
#> [1] 50

Count the number of subjects in each “Race” category

uid_final_data %>%  
  group_by(RACEN) %>% 
  tally
#> # A tibble: 4 x 2
#>    RACEN     n
#>   <fctr> <int>
#> 1      1    17
#> 2      2     8
#> 3      3    12
#> 4      4    13

calculate the min, mean, and max values for WT, AGE:
1. by Gender

uid_final_data %>% 
  select(GENDER, WT, AGE) %>%
  group_by(GENDER) %>% 
  summarize_all(funs(min, mean, max)) %>% 
  kable

GENDER	WT_min	AGE_min	WT_mean	AGE_mean	WT_max	AGE_max
0	23.8	20	27.0	37.0	31.4	51
1	29.2	28	31.8	40.5	36.8	59

b. by Race

uid_final_data %>% select(RACEN, WT, AGE) %>%
  group_by(RACEN) %>% 
  summarize_all(funs(min, mean, max)) %>% 
  kable

RACEN	WT_min	AGE_min	WT_mean	AGE_mean	WT_max	AGE_max
1	23.8	20	28.3	40.1	35.5	51
2	24.1	22	29.4	36.1	36.8	50
3	23.9	26	29.1	36.0	35.0	51
4	25.8	22	30.0	40.2	33.7	59

What is the Average numbers samples(observations) per individual in this dataset. Hint: make sure you are only counting samples, not necessarily all rows are observations!

# don't want dosing observations
final_data %>% filter(is.na(AMT)) %>% group_by(ID) %>% 
  summarize(num_obs = n()) %>%
  summarize(avg_samples = mean(num_obs))
#> # A tibble: 1 x 1
#>   avg_samples
#>         <dbl>
#> 1          22

Calculate the Mean, 5th, and 95th percentile concentration at each time point for each formulation and dose level. hint: you can use ?quantile to calculate various quantiles

final_data %>%
  group_by(TIME) %>% 
  s_quantiles(COBS, probs = c(0.05, 0.5, 0.95)) %>% 
  kable

TIME	COBS_q5	COBS_q50	COBS_q95
0.00	NA	NA	NA
0.25	179528	1013450	6299400
0.50	315901	1339500	6196680
1.00	516881	1602900	4941020
2.00	661580	1556600	4623085
3.00	609477	1407150	4218805
4.00	538884	1237250	3752430
6.00	350257	882890	2881720
8.00	170944	736590	2139750
12.00	86539	372920	1449365
16.00	28623	198495	987036
24.00	3748	81368	550874

6.3 Nonstandard evaluation

library(lazyeval)
library(PKPDdatasets)
library(PKPDmisc)
library(tidyverse)
#> Loading tidyverse: ggplot2
#> Loading tidyverse: tibble
#> Loading tidyverse: tidyr
#> Loading tidyverse: readr
#> Loading tidyverse: purrr
#> Loading tidyverse: dplyr
#> Conflicts with tidy packages ----------------------------------------------
#> filter():     dplyr, stats
#> is_formula(): purrr, lazyeval
#> lag():        dplyr, stats

eta_cov <- read_csv("../data/EtaCov_base.csv")
#> Parsed with column specification:
#> cols(
#>   Scenario = col_character(),
#>   ID = col_integer(),
#>   WT = col_integer(),
#>   AGE = col_integer(),
#>   nV = col_double(),
#>   nCl = col_double(),
#>   nKa = col_double()
#> )

lazyeval::interp()
lazyeval::lazy_eval()

This doesn’t work, as inside aes, ggplot literally evaluates the column names, so will look for the column called xtemplate, instead of Time

x <- "Time"
y <- "Conc"
ggplot(df, 
       aes(x=xtemplate, 
           y=ytemplate, 
           group = group_template)) + 
  geom_line() + 
  geom_point()

conc_time <- function(df, xcol, ycol, group_var) {
  p <- lazyeval::interp(~ggplot(df, 
       aes(x=xtemplate, 
           y=ytemplate, 
           group = group_template)) + 
  geom_line() + 
  geom_point() + theme_bw() + base_theme(),
                        xtemplate = as.name(xcol),
                        ytemplate = as.name(ycol),
                        group_template = as.name(group_var))
    
  return(lazyeval::lazy_eval(p))
}

capitalize_names(Theoph) %>%
  conc_time("TIME", "CONC", "SUBJECT")



capitalize_names(sd_oral_richpk) %>%
  conc_time("TIME", "CONC", "ID") +
  geom_hline(yintercept = 20, color = "red")

eta_vs_cov <- function(df, xcol, ycol, group_var, facet_var) {
  p <- lazyeval::interp(~ggplot(df, 
       aes(x=xtemplate, 
           y=ytemplate, 
           group = group_template))  + 
  geom_point() + facet_wrap(~facet_template) + stat_smooth(se = F) +
    theme_bw() + base_theme(),
                        xtemplate = as.name(xcol),
                        ytemplate = as.name(ycol),
                        group_template = as.name(group_var),
                        facet_template = as.name(facet_var))
    
  return(lazyeval::lazy_eval(p))
}

g_eta_cov <- eta_cov %>% gather(etaname, etaval, nV:nKa)

g2_eta_cov <- g_eta_cov %>% gather(covname, covval, AGE, WT)

eta_cov_list <- g2_eta_cov %>% split(.$covname)

eta_cov_list %>% lapply(eta_vs_cov, "covval", "etaval", group = "etaval", "etaname")
#> $AGE
#> `geom_smooth()` using method = 'loess'

#> 
#> $WT
#> `geom_smooth()` using method = 'loess'

cov_df <- eta_cov %>% select(WT:AGE)
plot_list <- list()
for (name in names(cov_df)) {
   plot_list[[name]] <- g_eta_cov %>% 
    eta_vs_cov(name, 
               "etaval", 
               group = "etaname", 
               facet_var = "etaname")
}


plot_list[["WT"]] + geom_vline(xintercept = 80, color = "red", size = 1.5)
#> `geom_smooth()` using method = 'loess'


lapply(plot_list, function(x) {
    # be aware that hard coded intercepts can run into issues with multiple plots
    #instead should use a list or named vector to setup the xintercept by cov name
  p <- x + geom_vline(xintercept = 80, color = "red", size = 1.5)
  return(p)
})
#> $WT
#> `geom_smooth()` using method = 'loess'

#> 
#> $AGE
#> `geom_smooth()` using method = 'loess'

6.4 Diagnostic Plots

read in the csv datasets:

EtaCov_gathered
Residuals
Theta

library(PKPDmisc)
library(knitr)
library(tidyverse)
#> Loading tidyverse: ggplot2
#> Loading tidyverse: tibble
#> Loading tidyverse: tidyr
#> Loading tidyverse: readr
#> Loading tidyverse: purrr
#> Loading tidyverse: dplyr
#> Conflicts with tidy packages ----------------------------------------------
#> filter(): dplyr, stats
#> lag():    dplyr, stats

resid <- read_phx("../data/Residuals.csv")
theta <- read_phx("../data/Theta.csv")
etacov_gathered <- read_phx("../data/EtaCov_gathered.csv")

From the Theta table, create a reasonable quality output table of the results. Hint, use knitr::kable, in combination with results=‘asis’ in the chunk settings

requires names:

theta %>% 
  select(-one_of(c("Scenario", "Var. Inf. factor"))) %>% 
  kable(digits = 2)

Parameter	Estimate	Units	Stderr	CV%	2.5% CI	97.5% CI
tvKa	0.39	1/hr	0.02	4.05	0.36	0.42
tvV	2.94		0.05	1.83	2.83	3.04
tvCl	0.08		0.00	1.80	0.08	0.08
dVdWT	1.00		0.00	0.00	1.00	1.00
dCldAGE	-0.87		0.11	-12.26	-1.09	-0.66
stdev0	0.10		0.00	2.80	0.09	0.10

clean up columns
clean up column names
units

Create a CWRES vs Time plot with loess fits for the central tendency and the spread (hint abs() is your friend for the spread)

gg_cwres_tad <- function(df) {
df %>%
  ggplot(aes(x = TAD, y = CWRES)) + geom_point() +
  stat_smooth(method = "loess", se=F, color = "red") +
  stat_smooth(data = df %>%
                mutate(CWRES = abs(CWRES)), 
              se = F, color = "blue") +
  stat_smooth(data = df %>%
                mutate(CWRES = -abs(CWRES)), 
              se = F, color = "blue") +
    theme_bw() +
    base_theme()
}

gg_cwres_tad(resid)
#> `geom_smooth()` using method = 'loess'
#> `geom_smooth()` using method = 'loess'

update the CWRES vs Time plot to flag anything with CWRES > 2.5 as a red value


resid %>% 
  mutate(HIGHCWRES = ifelse(abs(CWRES) > 2.5, 1, 0)) %>%
    ggplot(aes(x = TAD, y = CWRES)) +
  geom_point(aes(color = factor(HIGHCWRES))) +
  scale_color_manual(values = c("black", "red"), name = "Outlier", labels = c("not outlier", "outlier")) +
  stat_smooth(method = "loess") +
  stat_smooth(data = resid %>%
                mutate(CWRES = abs(CWRES)), 
              method="loess", color = "red", se = F) +
  stat_smooth(data = resid %>%
                mutate(CWRES = -abs(CWRES)), 
              method="loess", color = "red", se = F)

print a table of key information for all points with CWRES > 2.5

resid %>% 
  mutate(HIGHCWRES = ifelse(abs(CWRES) > 2.5, 1, 0)) %>%
  filter(HIGHCWRES ==1) %>% select(ID, IVAR, TAD, IPRED, DV) %>% kable(digits = 2)

ID	IVAR	TAD	IPRED	DV
4	364	28	28.93	18.62
4	400	64	11.92	13.73
5	48	0	7.26	8.12
9	352	16	39.54	27.48
36	3	3	23.60	17.10
36	364	28	18.01	22.57

Plot individual IPRED and DV vs time

split_resid <- resid %>% filter(TADSeq ==1) %>% mutate(IDBINS = ids_per_plot(ID, 9)) %>% split(.[["IDBINS"]])

p <- function(df) {
  df %>%
  ggplot(aes(x = TAD, y = IPRED, group= TADSeq)) +
  geom_line() + facet_wrap(~ID) + theme_bw() +
    geom_point(aes(x = TAD, y = DV))+
    labs(list(x = "Time after Dose, hrs",
              y = "Individual Predicted and Observed")) 
}
split_resid %>% map(p)
#> $`1`

#> 
#> $`2`

#> 
#> $`3`

#> 
#> $`4`

#> 
#> $`5`

#> 
#> $`6`

As a reminder, map works like lapply, it applies the same function to each element in the list. In this case, it is taking split_resid (which is the residual dataframe split by 9 ids per group) and then applies the plot function to each set of 9.

6b) add the population prediction as a dashed blue line


p <- function(df) {
  df %>%
  ggplot(aes(x = TAD, y = IPRED, group= TADSeq)) +
  geom_line() + facet_wrap(~ID) + theme_bw() +
    geom_point(aes(x = TAD, y = DV))+labs(list(x = "Time after Dose, hrs", y = "Individual Predicted and Observed")) +
    geom_line(aes(x = TAD, y = PRED, group = TADSeq), color = "blue")
}

split_resid %>% map(p)
#> $`1`

#> 
#> $`2`

#> 
#> $`3`

#> 
#> $`4`

#> 
#> $`5`

#> 
#> $`6`

With EtaCov_final create histograms of all the eta distributions


p_etas<- etacov_gathered %>%
  ggplot(aes(x = VALUE, group = ETA)) + 
  geom_histogram(fill = "white", color = "black") + 
  facet_wrap(~ETA, scales = "free") + base_theme()

p_etas
#> `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

add a mean value for each eta overlaid on the above plot

mean_eta <- etacov_gathered %>% 
    group_by(ETA) %>%
  summarize(meanEta = mean(VALUE))

p_etas + 
  geom_vline(data = mean_eta, aes(xintercept = meanEta), size = 1.5, color = "red")
#> `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Create Eta vs Covariate plots for each covariate and all etas

etacov_gathered %>%
    ggplot(aes(x = WT, y = VALUE, group = ETA)) + 
  geom_point() + facet_wrap(~ETA, scales = "free") + 
  stat_smooth(method = "loess", color = "blue", se = F, size = 1.3) + 
  base_theme()


etacov_gathered %>%
    ggplot(aes(x = AGE, y = VALUE, group = ETA)) + 
  geom_point() + facet_wrap(~ETA, scales = "free") +
  stat_smooth(method = "loess", color = "blue", se = F, size = 1.3) + base_theme()

Note in the plot above, the choice of facet_wrap was arbitrary, and potentially a cleaner looking plot can be created with facet_grid, especially for labels, my suggestion is to try both.

Hint: since there is so much duplicated, this would be a good opportunity to turn that into a function that you pass in the covariate to plot for x.

add loess fits to the eta cov plots

done in above plots