Import Observed Data in GastroPlus project

This RMarkdown file reads exposure data, Invitro dissolution data, and Solubility data and loads them as observed data into a GastroPlus 10.2 project. The script utilizes the gastroPlusAPI package to communicate with the GastroPlus X 10.2 service.

To customize your study with this script for a different simulation / project / variables, please make changes to the “set-input-information” code-chunk.

To customize processing of data to GastroPlus format, update the “process-input” code-chunk.

Configure required packages

Load other necessary necessary packages (tidyverse) required to execute the data manipulation in the script
Load gastroPlusAPI package
Load gastroPlusRModuLens package

R

library(tidyverse)
library(gastroPlusAPI)
library(gastroPlusRModuLens)

Set working directory

Set working directory as the current source editor context

R

if (rstudioapi::isAvailable()){
  current_working_directory <- dirname(rstudioapi::getSourceEditorContext()$path)
  setwd(current_working_directory)
}

Start GPX Service

R

gpx_service <- start_service(verbose = FALSE)

CODE

✔ Configured the GastroPlus Service

R

gpx_service$is_alive()

CODE

[1] TRUE

Setup Input Information

Make modification to the variables in this chunk to customize your workflow.

project_path: Location of the project

csv_file: File path of the longform dataset. If the dataset is wide form, the user will have to contextualize the data with data manipulations to pass to the correct Group type.

save_project_with_input_series: TRUE/FALSE selection if the series read from the data file should be saved with the GastroPlus project

R

project_path <- "../../ProjectFiles/GPX Run Modes/GPX Run Modes.gpproject"

csv_file <- "../../ProjectFiles/GPX Run Modes/LongFormData.csv"

save_project_with_input_series <- FALSE

Open Project and Read Existing Observed Data

Open specified project and glimpse through the observed series it currently contains

R

open_project(project_path)
current_observed_series <- get_experimental_data_inventory()
glimpse(current_observed_series)

CODE

Rows: 3
Columns: 4
$ group_name  <chr> "Ketoprofen Solubility", "Propranolol HCl Solubility", "Pi…
$ group_type  <chr> "Solubility", "Solubility", "Solubility"
$ series_name <chr> "Solubility vs pH", "Solubility vs pH", "Solubility vs pH"
$ series_type <chr> "pH_SolubilitySeries", "pH_SolubilitySeries", "pH_Solubili…

Retrieve and process data from a CSV file

Read data from CSV file and specify required GastroPlus attributes

R

csv_data <- readr::read_csv(csv_file, show_col_types = FALSE)
glimpse(csv_data)

CODE

Rows: 39
Columns: 17
$ COMPOUND       <chr> "Piroxicam", "Piroxicam", "Piroxicam", "Piroxicam", "Pi…
$ TIME           <dbl> 0.0, 0.5, 1.0, 2.0, 3.0, 6.0, 12.0, 24.0, 48.0, 96.0, 1…
$ DV             <dbl> 0.000, 31.400, 933.900, 2507.600, 2609.400, 2112.400, 1…
$ CV             <dbl> 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, …
$ UNIT           <chr> "(ng/mL)", "(ng/mL)", "(ng/mL)", "(ng/mL)", "(ng/mL)", …
$ DOSEAMOUNT     <dbl> 20.00, 20.00, 20.00, 20.00, 20.00, 20.00, 20.00, 20.00,…
$ DOSEAMOUNTUNIT <chr> "mg", "mg", "mg", "mg", "mg", "mg", "mg", "mg", "mg", "…
$ BW             <dbl> 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 74,…
$ BWUNIT         <chr> "kg", "kg", "kg", "kg", "kg", "kg", "kg", "kg", "kg", "…
$ AMOUNT         <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ AMOUNTUNIT     <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ VOLUME         <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ VOLUMEUNIT     <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ PH             <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ SOLUBILITY     <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ SOLUBILITYUNIT <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ DATASOURCE     <chr> "PlasmaExposure", "PlasmaExposure", "PlasmaExposure", "…

R

# explore content of input.
csv_data %>% 
  count(COMPOUND, DATASOURCE)

CODE

# A tibble: 5 × 3
  COMPOUND            DATASOURCE             n
  <chr>               <chr>              <int>
1 Ketoprofen          Solubility             5
2 Metoprolol Tartrate InVitroDissolution     6
3 Piroxicam           PlasmaExposure        13
4 Piroxicam           Solubility             7
5 Propranolol HCl     PlasmaExposure         8

R

csv_data %>% 
  count(UNIT)

CODE

# A tibble: 4 × 2
  UNIT        n
  <chr>   <int>
1 %           6
2 (ng/mL)    21
3 mg/mL       5
4 ug/mL       7

R

csv_data %>% 
  count(COMPOUND, UNIT, DATASOURCE)

CODE

# A tibble: 5 × 4
  COMPOUND            UNIT    DATASOURCE             n
  <chr>               <chr>   <chr>              <int>
1 Ketoprofen          mg/mL   Solubility             5
2 Metoprolol Tartrate %       InVitroDissolution     6
3 Piroxicam           (ng/mL) PlasmaExposure        13
4 Piroxicam           ug/mL   Solubility             7
5 Propranolol HCl     (ng/mL) PlasmaExposure         8

R

# put content into format expected by GastroPlus.
observed_data <- csv_data %>% 
  mutate(
    
    # Create group name using compound and dose information
    group_name = case_when(
      !is.na(DOSEAMOUNT) ~ paste0(COMPOUND, "_", DOSEAMOUNT, DOSEAMOUNTUNIT),
      TRUE ~ COMPOUND
    ),
    
    # Create series_name using available columns
    series_name = paste0(COMPOUND, " - ", DATASOURCE),
    
    # Process units 
    dependent_unit = UNIT %>% 
      # remove parentheses
      str_remove("^\\(") %>% 
      str_remove("\\)$") %>% 
      # μg and microgram are recognized by GastroPlus, but not ug.
      str_replace_all("ug", "µg") %>% 
      # unit should be blank for %
      str_replace_all("^%$", ""),
    independent_unit = case_when(
      DATASOURCE == "Solubility" ~ "",
      TRUE ~ "hour"
    ),
    
    group_type = case_when(
      DATASOURCE == "PlasmaExposure" ~ ObservedDataGroupType$ExposureData,
      DATASOURCE == "InVitroDissolution" ~ ObservedDataGroupType$InVitroDissolutionRelease,
      DATASOURCE == "Solubility" ~ ObservedDataGroupType$Solubility
    ),
    
    series_type = case_when(
      DATASOURCE == "PlasmaExposure" ~ ObservedDataSeriesType$UncertainConcentrationSeries,
      DATASOURCE == "InVitroDissolution" ~ ObservedDataSeriesType$TimeRealSeries,
      DATASOURCE == "Solubility" ~ ObservedDataSeriesType$pH_SolubilitySeries
    )
  )

Set Observed Data

The set_observed_series_data() function in gastroPlusRModuLens is capable of setting multiple collections of observed data at once, optionally including metadata.

R

# expected new observed series.
observed_data %>% 
  count(group_name, group_type, series_name, series_type)

CODE

# A tibble: 5 × 5
  group_name               group_type              series_name series_type     n
  <chr>                    <chr>                   <chr>       <chr>       <int>
1 Ketoprofen               Solubility              Ketoprofen… pH_Solubil…     5
2 Metoprolol Tartrate      InVitroDissolutionRele… Metoprolol… TimeRealSe…     6
3 Piroxicam                Solubility              Piroxicam … pH_Solubil…     7
4 Piroxicam_20mg           ExposureData            Piroxicam … UncertainC…    13
5 Propranolol HCl_140.28mg ExposureData            Propranolo… UncertainC…     8

R

observed_data %>% 
  count(series_type, independent_unit, dependent_unit)

CODE

# A tibble: 4 × 4
  series_type                  independent_unit dependent_unit     n
  <chr>                        <chr>            <chr>          <int>
1 TimeRealSeries               "hour"           ""                 6
2 UncertainConcentrationSeries "hour"           "ng/mL"           21
3 pH_SolubilitySeries          ""               "mg/mL"            5
4 pH_SolubilitySeries          ""               "µg/mL"            7

R

# for the exposure data, set with exposure metadata.
observed_data %>% 
  filter(group_type == "ExposureData") %>% 
  set_observed_series_data(
    independent = TIME,
    dependent = DV,
    uncertainty_percentage = CV,
    independent_unit = independent_unit,
    dependent_unit = dependent_unit,
    group_name = group_name,
    group_type = group_type,
    series_name = series_name,
    series_type = series_type,
    
    # metadata
    module = "Systemic Circulation",
    compartment = CompartmentType$SystemicCirculation,
    state = StateType$ConcentrationPresent,
    dose = 10, 
    dose_unit = "mg",
    infusion_time = 0.5, 
    infusion_time_unit = "h",
    body_mass = 70, 
    body_mass_unit = "kg"
  )

CODE

✔ Set observed data containing 13 unique observations in series: group_name: Piroxicam_20mg, group_type: ExposureData, series_name: Piroxicam - PlasmaExposure, series_type: UncertainConcentrationSeries

CODE

✔ Set exposure series metadata in series: group_name: Piroxicam_20mg, group_type: ExposureData, series_name: Piroxicam - PlasmaExposure, series_type: UncertainConcentrationSeries

CODE

✔ Set observed data containing 8 unique observations in series: group_name: Propranolol HCl_140.28mg, group_type: ExposureData, series_name: Propranolol HCl - PlasmaExposure, series_type: UncertainConcentrationSeries

CODE

✔ Set exposure series metadata in series: group_name: Propranolol HCl_140.28mg, group_type: ExposureData, series_name: Propranolol HCl - PlasmaExposure, series_type: UncertainConcentrationSeries

R

# set remainder of observed data with no metadata.
observed_data %>% 
  filter(group_type != "ExposureData") %>% 
  set_observed_series_data(
    independent = TIME,
    dependent = DV,
    uncertainty_percentage = CV,
    independent_unit = independent_unit,
    dependent_unit = dependent_unit,
    group_name = group_name,
    group_type = group_type,
    series_name = series_name,
    series_type = series_type
  )

CODE

✔ Set observed data containing 5 unique observations in series: group_name: Ketoprofen, group_type: Solubility, series_name: Ketoprofen - Solubility, series_type: pH_SolubilitySeries

CODE

✔ Set observed data containing 6 unique observations in series: group_name: Metoprolol Tartrate, group_type: InVitroDissolutionRelease, series_name: Metoprolol Tartrate - InVitroDissolution, series_type: TimeRealSeries

CODE

✔ Set observed data containing 7 unique observations in series: group_name: Piroxicam, group_type: Solubility, series_name: Piroxicam - Solubility, series_type: pH_SolubilitySeries

R

# observed data is set.
get_experimental_data_inventory()

CODE

# A tibble: 8 × 4
  group_name                 group_type                series_name   series_type
  <chr>                      <chr>                     <chr>         <chr>      
1 Metoprolol Tartrate        InVitroDissolutionRelease Metoprolol T… TimeRealSe…
2 Ketoprofen Solubility      Solubility                Solubility v… pH_Solubil…
3 Propranolol HCl Solubility Solubility                Solubility v… pH_Solubil…
4 Piroxicam Solubility       Solubility                Solubility v… pH_Solubil…
5 Ketoprofen                 Solubility                Ketoprofen -… pH_Solubil…
6 Piroxicam                  Solubility                Piroxicam - … pH_Solubil…
7 Piroxicam_20mg             ExposureData              Piroxicam - … UncertainC…
8 Propranolol HCl_140.28mg   ExposureData              Propranolol … UncertainC…

R

# metadata is set.
get_exposure_series_metadata(SeriesKey$new(
  group_type = "ExposureData",
  group_name = "Piroxicam_20mg",
  series_type = "UncertainConcentrationSeries",
  series_name = "Piroxicam - PlasmaExposure"
))

CODE

<ExposureSeriesMetadata>
  Public:
    attribute_list: function () 
    clone: function (deep = FALSE) 
    data_key: DataKey, R6
    exposure_experimental_setup: ExposureSeriesExperimentalSetup, R6
    fromJSON: function (input_json) 
    initialize: function (data_key = NULL, exposure_experimental_setup = NULL)

Repeat with loop workflow

R

gpx_service$kill()

CODE

[1] TRUE

R

gpx_service <- start_service(verbose = FALSE)

CODE

✔ Configured the GastroPlus Service

R

open_project(project_path)

R

# expected new observed series.
observed_data %>% 
  count(group_name, group_type, series_name, series_type)

CODE

# A tibble: 5 × 5
  group_name               group_type              series_name series_type     n
  <chr>                    <chr>                   <chr>       <chr>       <int>
1 Ketoprofen               Solubility              Ketoprofen… pH_Solubil…     5
2 Metoprolol Tartrate      InVitroDissolutionRele… Metoprolol… TimeRealSe…     6
3 Piroxicam                Solubility              Piroxicam … pH_Solubil…     7
4 Piroxicam_20mg           ExposureData            Piroxicam … UncertainC…    13
5 Propranolol HCl_140.28mg ExposureData            Propranolo… UncertainC…     8

R

observed_data %>% 
  count(series_type, independent_unit, dependent_unit)

CODE

# A tibble: 4 × 4
  series_type                  independent_unit dependent_unit     n
  <chr>                        <chr>            <chr>          <int>
1 TimeRealSeries               "hour"           ""                 6
2 UncertainConcentrationSeries "hour"           "ng/mL"           21
3 pH_SolubilitySeries          ""               "mg/mL"            5
4 pH_SolubilitySeries          ""               "µg/mL"            7

R

# create a new key variable to identify unique sets of observed data.
observed_data <- observed_data %>% 
  mutate(
    key = paste(group_name, group_type, series_name, series_type, sep = " - ")
  )

keys <- unique(observed_data$key)

#Loop through the records and grab the individual series that were processed above.
for(.key in keys)
{
  observed_data_filtered <- observed_data %>% 
    filter(key == .key)
  
  observed_series_data <- observed_data_filtered %>% 
    select(independent = TIME, dependent = DV, uncertainty_percentage = CV)
  
  # remove CV when all are missing
  if (isTRUE(all(is.na(observed_series_data$uncertainty_percentage)))) {
    observed_series_data$uncertainty_percentage <- NULL
  }
  
  #Add the observed series to the GPX project
  new_observed_data <- ObservedSeriesInformation$new(
    independent_unit = observed_data_filtered$independent_unit[1],
    dependent_unit = observed_data_filtered$dependent_unit[1],
    series = observed_series_data
  )
  
  new_series_key <- SeriesKey$new(
    group_type = observed_data_filtered$group_type[1],
    group_name = observed_data_filtered$group_name[1],
    series_type = observed_data_filtered$series_type[1],
    series_name = observed_data_filtered$series_name[1]
  )
  
  set_observed_series(new_series_key, new_observed_data)
}

Check Updated Observed Series in Project

Confirm if the inventory contains newly added series data

R

updated_observed_series <- get_experimental_data_inventory()
glimpse(updated_observed_series)

CODE

Rows: 8
Columns: 4
$ group_name  <chr> "Metoprolol Tartrate", "Ketoprofen Solubility", "Propranol…
$ group_type  <chr> "InVitroDissolutionRelease", "Solubility", "Solubility", "…
$ series_name <chr> "Metoprolol Tartrate - InVitroDissolution", "Solubility vs…
$ series_type <chr> "TimeRealSeries", "pH_SolubilitySeries", "pH_SolubilitySer…

Save Project if necessary and Kill GPX Service

R

if (save_project_with_input_series) {
  save_project()
}

gpx_service$kill()

CODE

[1] TRUE