Skip to content

Commit

Permalink
Rename protcomp() to human.aa()
Browse files Browse the repository at this point in the history
  • Loading branch information
jedick committed Mar 3, 2024
1 parent a5cc89d commit c524852
Show file tree
Hide file tree
Showing 11 changed files with 119 additions and 93 deletions.
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Date: 2024-03-02
Date: 2024-03-03
Package: canprot
Version: 1.1.2-28
Version: 1.1.2-29
Title: Chemical Analysis of Proteins
Authors@R: c(
person("Jeffrey", "Dick", email = "[email protected]", role = c("aut", "cre"),
Expand Down
2 changes: 1 addition & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Exported variables
export(
# 20170612 (canprot 0.1.0)
"protcomp", "CLES",
"human.aa", "CLES",
# metrics added 20191005 - 20191027
"Zc", "nH2O", "GRAVY", "pI",
# 20200204 used in JMDplots package
Expand Down
31 changes: 24 additions & 7 deletions R/cplab.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,28 +3,45 @@
# Moved from diffplot.R and changed expression() to quote() 20230617

cplab <- list(
nH2O = quote(italic(n)[H[2]*O]),
DnH2O = quote(Delta*italic(n)[H[2]*O]),
nO2 = quote(italic(n)[O[2]]),
DnO2 = quote(Delta*italic(n)[O[2]]),
Zc = quote(italic(Z)[C]),
DZc = quote(Delta*italic(Z)[C]),
nO2 = quote(italic(n)[O[2]]),
DnO2 = quote(Delta*italic(n)[O[2]]),
nH2O = quote(italic(n)[H[2]*O]),
DnH2O = quote(Delta*italic(n)[H[2]*O]),
logfO2 = quote(log~italic("f")[O[2]]),
logaH2O = quote(log~italic("a")[H[2]*O]),
nC = quote(italic(n)[C] * "/AA"),
nH = quote(italic(n)[H] * "/AA"),
nN = quote(italic(n)[N] * "/AA"),
nO = quote(italic(n)[O] * "/AA"),
nS = quote(italic(n)[S] * "/AA"),
DnC = quote(Delta*italic(n)[C] * "/AA"),
DnH = quote(Delta*italic(n)[H] * "/AA"),
DnN = quote(Delta*italic(n)[N] * "/AA"),
DnO = quote(Delta*italic(n)[O] * "/AA"),
DnS = quote(Delta*italic(n)[S] * "/AA"),
V0 = quote("Volume per residue (cm" ^ 3 ~ "mol" ^ -1 * ")"),
pV0 = quote("Volume per protein (cm" ^ 3 ~ "mol" ^ -1 * ")"),
DV0 = quote(list(Delta * italic("V") * degree, "cm" ^ 3 ~ "mol" ^ -1)),
DV0 = quote(Delta * "V per residue (cm" ^ 3 ~ "mol" ^ -1 * ")"),
DV0 = quote(Delta * "V per protein (cm" ^ 3 ~ "mol" ^ -1 * ")"),
nAA = quote(italic(n)[AA]),
DnAA = quote(Delta*italic(n)[AA]),
GRAVY = "GRAVY",
DGRAVY = quote(Delta*"GRAVY"),
pI = "pI",
DpI = quote(Delta*"pI"),
MW = quote("MW per residue"),
DMW = quote(Delta * "MW"),
plength = "Protein length"
DMW = quote(Delta * "MW per residue"),
pMW = quote("MW per protein"),
DpMW = quote(Delta * "MW per protein"),
plength = "Protein length",
H_C = "H/C",
HC = "H/C",
N_C = "N/C",
NC = "N/C",
O_C = "O/C",
OC = "O/C",
S_C = "S/C",
SC = "S/C"
)
36 changes: 36 additions & 0 deletions R/human.aa.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# canprot/human.aa.R
# Get amino acid compositions for human proteins from UniProt IDs
# 20160705 jmd

human.aa <- function(uniprot = NULL, aa_file = NULL, stop.if.missing = FALSE, warn.if.duplicated = FALSE) {
# Get amino acid compositions of human proteins
aa <- get("human_aa", canprot)
# Add amino acid compositions from external file if specified
if(!is.null(aa_file)) {
aa_dat <- read.csv(aa_file, as.is=TRUE)
print(paste("human.aa: adding", nrow(aa_dat), "proteins from", aa_file))
aa <- rbind(aa_dat, aa)
}
if(is.null(uniprot)) {
stop("'uniprot' is NULL")
} else {
# Find the proteins listed in 'uniprot' - first look at the ID after the | separator
alluni <- sapply(strsplit(aa$protein, "|", fixed = TRUE), "[", 2)
# If that is NA (i.e. no | separator is present) use the entire string
ina <- is.na(alluni)
alluni[ina] <- aa$protein[ina]
iuni <- match(uniprot, alluni)
if(stop.if.missing) {
# Stop with error if any IDs are not found
if(any(is.na(iuni))) stop(paste("uniprot IDs not found:", paste(uniprot[is.na(iuni)], collapse = " ")))
}
if(warn.if.duplicated) {
# Warn if any IDs are duplicated
if(any(duplicated(iuni))) warning(paste("some uniprot IDs are duplicated:",
paste(uniprot[duplicated(iuni)], collapse=" ")), immediate. = TRUE)
}
aa <- aa[iuni, ]
}
# Return amino acid compositions
aa
}
8 changes: 5 additions & 3 deletions R/metrics.R
Original file line number Diff line number Diff line change
Expand Up @@ -123,12 +123,14 @@ pI <- function(AAcomp, terminal_H2O = 1, ...) {
iZ <- match(names(AA)[isZ], dimnames(Ztab)[[2]])
# Calculate the total charge as a function of pH
# ... the "else" is in case we have a data frame (used when first writing this function)
if(is.numeric(AA)) Ztot <- Ztab[, iZ] %*% AA[isZ]
else Ztot <- Ztab[, iZ] %*% as.matrix(t(AA[, isZ]))
if(is.numeric(AA)) Ztot <- Ztab[, iZ] %*% AA[isZ] else
Ztot <- Ztab[, iZ] %*% as.matrix(t(AA[, isZ]))
# Find pH where charge is closest to zero
# (absolute charge is minimized)
ipH <- which.min(abs(Ztot))
Ztab[ipH, 1]
pI <- Ztab[ipH, 1]
if(length(pI) == 0) pI <- NA
pI
}
# Number of N- and C-terminal groups
Nterm <- Cterm <- terminal_H2O
Expand Down
35 changes: 0 additions & 35 deletions R/protcomp.R

This file was deleted.

8 changes: 6 additions & 2 deletions inst/NEWS
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
CHANGES IN canprot 1.1.2-27 (2024-03-01)
---------------------------------------
CHANGES IN canprot 1.1.2-29 (2024-03-03)
----------------------------------------

- Objects in cplab are now `quote()`-ed instead of `expression()`s for easier
handling by other functions.
Expand All @@ -26,6 +26,10 @@ CHANGES IN canprot 1.1.2-27 (2024-03-01)

- Add add.cld() (add compact letter display to boxplots).

- Move chem16S::calc_metrics() to calc.metrics().

- Rename protcomp() to human.aa().

CHANGES IN canprot 1.1.2 (2022-01-17)
-------------------------------------

Expand Down
4 changes: 2 additions & 2 deletions man/human.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
\alias{human_additional}
\alias{human_extra}
\alias{uniprot_updates}
\title{Amino Acid Compositions of Human Proteins}
\title{Data for Amino Acid Compositions of Human Proteins}
\description{
Data for amino acid compositions of proteins and conversion from old to new UniProt IDs.
}
Expand Down Expand Up @@ -43,7 +43,7 @@ The \code{protein} column contains UniProt IDs in the format \code{database|acce

\seealso{
Amino acid compositions of non-human proteins are stored under \code{extdata/aa} in directories \code{archaea}, \code{bacteria}, \code{cow}, \code{dog}, \code{mouse}, \code{rat}, and \code{yeast}.
These files can be loaded in \code{\link{protcomp}} via the \code{aa_file} argument.
These files can be loaded in \code{\link{human.aa}} via the \code{aa_file} argument.
}

\examples{
Expand Down
37 changes: 37 additions & 0 deletions man/human.aa.Rd
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
\encoding{UTF-8}
\name{human.aa}
\alias{human.aa}
\title{Get Amino Acid Compositions of Human Proteins}
\description{
Get amino acid compositions of human proteins from their UniProt IDs.
}

\usage{
human.aa(uniprot = NULL, aa_file = NULL,
stop.if.missing = FALSE, warn.if.duplicated = FALSE)
}

\arguments{
\item{uniprot}{character, UniProt IDs of proteins}
\item{aa_file}{character, file name}
\item{stop.if.missing}{logical, stop with an error if there are UniProt IDs that can't be found?}
\item{warn.if.duplicated}{logical, emit a warning if duplicate UniProt IDs are detected?}
}
\details{
This function retrieves the amino acid compositions of one or more proteins specified by \code{uniprot}.
This function depends on the amino acid compositions of human proteins, which are stored in the \code{\link{canprot}} environment when the package is attached.
If \code{aa_file} is specified, additional amino acid compositions are read from this file.
This file should be in the same format as \code{\link{human_extra}.csv} in the installation directory of the package.
}
\value{
The function returns a data frame with amino acid compositions of proteins.
}
\examples{
human.aa("P24298")
}
\concept{Amino acid composition}
35 changes: 0 additions & 35 deletions man/protcomp.Rd

This file was deleted.

12 changes: 6 additions & 6 deletions vignettes/introduction.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -72,11 +72,11 @@ The vertical line denotes the proposed timing of the Great Oxidation Event (GOE)
## Human proteins in **canprot**

**canprot** has a database of amino acid compositions of human proteins assembled from UniProt.
Use `protcomp()` to get the amino acid composition.
Use `human.aa()` to get the amino acid composition.
This example is for alanine aminotransferase, which has a UniProt ID of [P24298](https://www.uniprot.org/uniprotkb/P24298):
```{r protcomp_ALAT1}
(pc <- protcomp("P24298"))
Zc(pc$aa)
```{r human.aa_ALAT1}
(aa <- human.aa("P24298"))
Zc(aa)
```

Do you have a list of UniProt IDs for a differential expression dataset?
Expand Down Expand Up @@ -107,8 +107,8 @@ down <- c("J3KN67", "Q9Y490", "J3KNQ4", "E7EVA0", "Q01082", "J3KQ32",
With those UniProt IDs for human proteins we can retrieve the amino acid compositions, then calculate a couple of chemical metrics and make some boxplots comparing the groups of differentially regulated proteins.

```{r DKM20_plot, out.width = "75%", fig.align = "center", fig.width = 7, fig.height = 5}
aa_down <- protcomp(down)$aa
aa_up <- protcomp(up)$aa
aa_down <- human.aa(down)
aa_up <- human.aa(up)
bp_names <- paste0(c("Down (", "Up ("), c(nrow(aa_down), nrow(aa_up)), c(")", ")"))
par(mfrow = c(1, 2))
Expand Down

0 comments on commit c524852

Please sign in to comment.