## ----setup, include=FALSE-----------------------------------------------------
knitr::opts_chunk$set(echo = TRUE)
knitr::opts_chunk$set(dev = "png", dev.args = list(type = "cairo-png"))


library(tidyr)
library(dplyr)
library(HaDeX2)
library(gridExtra)
library(ggplot2)


## ----echo = FALSE-------------------------------------------------------------

f_calculate_mass <- function(dat){

  proton_mass <- 1.00727647

  dat %>%
    mutate(exp_mass = Center*z - z*proton_mass) %>%
    select(-Center, -z, -Modification) %>%
    group_by(Sequence, Start, End, MHP, MaxUptake, State, Exposure, Protein, File) %>%
    summarize(avg_exp_mass = weighted.mean(exp_mass, Inten, na.rm = TRUE)) %>%
    ungroup(.) %>%
    group_by(Sequence, Start, End, MHP, MaxUptake, State, Exposure, Protein) %>%
    summarize(mass = mean(avg_exp_mass, na.rm = TRUE),
              err_mass = coalesce(sd(avg_exp_mass, na.rm = TRUE)/sqrt(sum(!is.na(avg_exp_mass))), 0),
              num = (sum(!is.na(avg_exp_mass)))) %>%
    ungroup(.) %>%
    arrange(Start, End, Start, Exposure) %>%
    as.data.frame()

}

f_calculate_mass_no_inten <- function(dat){

  proton_mass <- 1.00727647

  dat %>%
    mutate(exp_mass = Center*z - z*proton_mass) %>%
    select(-Center, -z, -Modification) %>%
    group_by(Sequence, Start, End, MHP, MaxUptake, State, Exposure, Protein, File) %>%
    summarize(avg_exp_mass = mean(exp_mass, na.rm = TRUE)) %>%
    ungroup(.) %>%
    group_by(Sequence, Start, End, MHP, MaxUptake, State, Exposure, Protein) %>%
    summarize(mass = mean(avg_exp_mass, na.rm = TRUE),
              err_mass = coalesce(sd(avg_exp_mass, na.rm = TRUE)/sqrt(sum(!is.na(avg_exp_mass))), 0)) %>%
    ungroup(.) %>%
    arrange(Start, End, Start, Exposure) %>%
    as.data.frame()

}


## ----eval=FALSE---------------------------------------------------------------
#  avg_exp_mass = weighted.mean(exp_mass, Inten, na.rm = TRUE)

## ----include=FALSE------------------------------------------------------------
dat_no_weight <- f_calculate_mass_no_inten(alpha_dat) %>%
  arrange(Start, End, State, Exposure) %>%
  mutate(source = "no_weight") %>%
  select(Sequence, Start, End, State, Exposure, mass, err_mass, source)

dat_weight <- f_calculate_mass(alpha_dat) %>%
  arrange(Start, End, State, Exposure) %>%
  mutate(source = "weight") %>%
  select(Sequence, Start, End, State, Exposure, mass, err_mass, source)

tmp <- bind_rows(dat_no_weight, dat_weight) %>%
  gather(type, value, -Sequence, -Start, -End, -source, -State, -Exposure) %>%
  spread(source, value) %>%
  mutate(diff = (weight-no_weight))

## ----echo=FALSE---------------------------------------------------------------
tmp %>%
  filter(type == "mass") %>%
  select(-type) %>%
  filter(Sequence == "GFGDLKSPAGL")

## ----echo=FALSE---------------------------------------------------------------
tmp %>%
  filter(type == "err_mass") %>%
  select(-type) %>%
  filter(Sequence == "GFGDLKSPAGL")

## ----echo=FALSE---------------------------------------------------------------
mean(tmp[ tmp[["type"]] == "mass" , "diff"])

## ----echo=FALSE---------------------------------------------------------------
mean(tmp[ tmp[["type"]] == "err_mass" , "diff"])

## ----message=FALSE, warning=FALSE, echo=FALSE---------------------------------
ggplot(filter(tmp, type == "mass"), aes(diff)) +
  geom_histogram() +
  labs(title = "Differences between mass",
       x = "Difference")

## ----message=FALSE, warning=FALSE, echo=FALSE---------------------------------
ggplot(filter(tmp, type == "err_mass"), aes(diff)) +
  geom_histogram() +
  labs(title = "Differences between uncertainties of mass",
       x = "Difference")

