[2] protein.info

## Don't show: data(thermo)
thermo$obigt: 1911 aqueous, 3588 total species
## End(Don't show) # search by name in thermo$protein ip1 <- pinfo("LYSC_CHICK") ip2 <- pinfo("LYSC", "CHICK") # these are the same stopifnot(all.equal(ip1, ip2)) # two organisms with the same protein name ip3 <- pinfo("MYG", c("HORSE", "PHYCA")) # their amino acid compositions pinfo(ip3)
protein organism ref abbrv chains Ala Cys Asp Glu Phe Gly His Ile Lys Leu Met Asn Pro Gln Arg Ser 121 MYG HORSE UniProt P68082 1 15 0 8 13 7 15 11 9 19 17 2 2 4 6 2 5 7 MYG PHYCA UniProt P02185 1 17 0 7 14 6 11 12 9 19 18 2 1 4 5 4 6 Thr Val Trp Tyr 121 7 7 2 2 7 5 8 2 3
# their thermodynamic properties by group additivity protein.obigt(ip3)
protein.obigt: found MYG_HORSE (C769H1212N210O218S2, 153 residues) protein.obigt: found MYG_PHYCA (C783H1240N216O216S2, 153 residues) name abbrv formula state ref1 ref2 date G H S Cp V 1 MYG_HORSE NA C769H1212N210O218S2 aq UniProt NA NA -5092861 -12598083 4978.14 8172.949 12568.77 2 MYG_PHYCA NA C783H1240N216O216S2 aq UniProt NA NA -4906543 -12549363 5061.47 8423.000 12845.90 a1.a a2.b a3.c a4.d c1.e c2.f omega.lambda z.T 1 3048.32 460.36 391.76 -512.56 9717.3 -785.6 -5.71 0 2 3098.26 490.84 518.43 -538.56 9957.6 -786.0 -6.87 0
# an example of an unrecognized protein name ip4 <- pinfo("MYGPHYCA") stopifnot(is.na(ip4)) ## example for chicken lysozyme C # index in thermo$protein ip <- pinfo("LYSC_CHICK") # amino acid composition pinfo(ip)
protein organism ref abbrv chains Ala Cys Asp Glu Phe Gly His Ile Lys Leu Met Asn Pro Gln Arg Ser 6 LYSC CHICK UniProt P00698 1 12 8 7 2 3 12 1 6 6 8 2 14 2 3 11 10 Thr Val Trp Tyr 6 7 6 6 3
# length and chemical formula protein.length(ip)
[1] 129
protein.formula(ip)
C H N O S LYSC_CHICK 613 959 193 185 10
# group additivity for thermodynamic properties and HKF equation-of-state # parameters of non-ionized protein protein.obigt(ip)
protein.obigt: found LYSC_CHICK (C613H959N193O185S10, 129 residues) name abbrv formula state ref1 ref2 date G H S Cp V 1 LYSC_CHICK NA C613H959N193O185S10 aq UniProt NA NA -4119738 -10283083 4176.74 6415.553 10420.89 a1.a a2.b a3.c a4.d c1.e c2.f omega.lambda z.T 1 2512.58 345.88 450.87 -409.5 7768.7 -701.5 -7.94 0
# calculation of standard thermodynamic properties # (subcrt uses the species name, not ip) subcrt("LYSC_CHICK")
protein.obigt: found LYSC_CHICK (C613H959N193O185S10, 129 residues) subcrt: 1 species at 15 values of T and P (wet) $species name formula state ispecies 3589 LYSC_CHICK C613H959N193O185S10 aq 3589 $out $out$LYSC_CHICK T P rho logK G H S V Cp 1 0.01 1.000000 0.9998289 3217.676 -4021765 -10423733 3685.286 10049.21 4409.319 2 25.00 1.000000 0.9970614 3019.795 -4119738 -10283083 4176.740 10420.95 6415.518 3 50.00 1.000000 0.9880295 2861.401 -4230972 -10113250 4723.386 10600.23 7073.981 4 75.00 1.000000 0.9748643 2734.312 -4355838 -9932209 5262.872 10708.15 7376.581 5 100.00 1.013220 0.9583926 2631.998 -4493930 -9745475 5780.772 10782.93 7548.444 6 125.00 2.320144 0.9390726 2549.289 -4644330 -9554920 6274.142 10840.94 7665.198 7 150.00 4.757169 0.9170577 2482.415 -4806468 -9361481 6743.772 10891.15 7760.881 8 175.00 8.918049 0.8923427 2428.373 -4979619 -9165264 7191.777 10940.05 7856.945 9 200.00 15.536499 0.8647434 2384.804 -5163079 -8965847 7620.991 10994.29 7974.628 10 225.00 25.478603 0.8338733 2349.840 -5356186 -8762181 8035.020 11063.40 8146.453 11 250.00 39.736493 0.7990719 2321.996 -5558337 -8552157 8438.919 11165.02 8439.891 12 275.00 59.431251 0.7592362 2300.099 -5769035 -8331338 8841.260 11338.89 9030.924 13 300.00 85.837843 0.7124075 2283.258 -5987982 -8088790 9260.808 11694.88 10513.444 14 325.00 120.457572 0.6545772 2270.916 -6215390 -7788738 9755.566 12644.61 15882.325 15 350.00 165.211289 0.5746875 2263.284 -6453406 -7222044 10655.544 17027.35 60917.017
# affinity calculation, protein identified by ip basis("CHNOS+")
C H N O S Z ispecies logact state CO2 1 0 0 2 0 0 1576 -3 aq H2O 0 2 0 1 0 0 1 0 liq NH3 0 3 1 0 0 0 66 -4 aq H2S 0 2 0 0 1 0 67 -7 aq O2 0 0 0 2 0 0 3316 -80 gas H+ 0 1 0 0 0 1 3 -7 aq
affinity(iprotein=ip)
energy.args: temperature is 25 C energy.args: pressure is Psat subcrt: 27 species at 298.15 K and 1 bar (wet) subcrt: 18 species at 298.15 K and 1 bar (wet) $sout $sout$CO2 logK 1 67.61986 $sout$water logK 2 41.55247 $sout$NH3 logK 1 4.678781 $sout$H2S logK 1 4.891353 $sout$oxygen logK 1 0 $sout$`H+` logK 1 0 $sout$H2O_RESIDUE logK 1 46.50121 $sout$Ala_RESIDUE logK 1 18.59784 $sout$Cys_RESIDUE logK 1 12.38122 $sout$Asp_RESIDUE logK 1 79.94977 $sout$Glu_RESIDUE logK 1 80.34706 $sout$Phe_RESIDUE logK 1 -10.26796 $sout$Gly_RESIDUE logK 1 20.16575 $sout$His_RESIDUE logK 1 -11.00976 $sout$Ile_RESIDUE logK 1 13.59727 $sout$Lys_RESIDUE logK 1 11.56611 $sout$Leu_RESIDUE logK 1 15.21795 $sout$Met_RESIDUE logK 1 9.913842 $sout$Asn_RESIDUE logK 1 45.4838 $sout$Pro_RESIDUE logK 1 7.419493 $sout$Gln_RESIDUE logK 1 46.06214 $sout$Arg_RESIDUE logK 1 -7.467872 $sout$Ser_RESIDUE logK 1 44.33957 $sout$Thr_RESIDUE logK 1 41.33571 $sout$Val_RESIDUE logK 1 16.04698 $sout$Trp_RESIDUE logK 1 -26.8405 $sout$Tyr_RESIDUE logK 1 20.78807 $property [1] "A" $basis C H N O S Z ispecies logact state CO2 1 0 0 2 0 0 1576 -3 aq H2O 0 2 0 1 0 0 1 0 liq NH3 0 3 1 0 0 0 66 -4 aq H2S 0 2 0 0 1 0 67 -7 aq O2 0 0 0 2 0 0 3316 -80 gas H+ 0 1 0 0 0 1 3 -7 aq $species CO2 H2O NH3 H2S O2 H+ ispecies logact state name 1 613 180 193 10 -610.5 0 -6 -3 aq LYSC_CHICK $T [1] 298.15 $P [1] "Psat" $vars character(0) $vals $vals[[1]] [1] NA $values $values$`-6` [1] -598.5468
# affinity calculation, protein loaded as a species species("LYSC_CHICK")
CO2 H2O NH3 H2S O2 H+ ispecies logact state name 1 613 180 193 10 -610.5 0 3589 -3 aq LYSC_CHICK
affinity()
energy.args: temperature is 25 C energy.args: pressure is Psat subcrt: 7 species at 298.15 K and 1 bar (wet) subcrt: 18 species at 298.15 K and 1 bar (wet) $sout $sout$CO2 logK 1 67.61986 $sout$water logK 2 41.55247 $sout$NH3 logK 1 4.678781 $sout$H2S logK 1 4.891353 $sout$oxygen logK 1 0 $sout$`H+` logK 1 0 $sout$LYSC_CHICK logK 1 3019.795 $property [1] "A" $basis C H N O S Z ispecies logact state CO2 1 0 0 2 0 0 1576 -3 aq H2O 0 2 0 1 0 0 1 0 liq NH3 0 3 1 0 0 0 66 -4 aq H2S 0 2 0 0 1 0 67 -7 aq O2 0 0 0 2 0 0 3316 -80 gas H+ 0 1 0 0 0 1 3 -7 aq $species CO2 H2O NH3 H2S O2 H+ ispecies logact state name 1 613 180 193 10 -610.5 0 3589 -3 aq LYSC_CHICK $T [1] 298.15 $P [1] "Psat" $vars character(0) $vals $vals[[1]] [1] NA $values $values$`3589` [1] -598.5468
# NB: subcrt() only shows the properties of the non-ionized # protein, but affinity() uses the properties of the ionized # protein if the basis species have H+ ## these are all the same protein.formula("P53_PIG")
C H N O S P53_PIG 1873 2946 536 582 18
protein.formula(pinfo("P53_PIG"))
C H N O S P53_PIG 1873 2946 536 582 18
protein.formula(pinfo(pinfo("P53_PIG")))
C H N O S P53_PIG 1873 2946 536 582 18
## using protein.formula: average oxidation state of ## carbon of proteins from different organisms (Dick, 2014) # get amino acid compositions of microbial proteins # generated from the RefSeq database file <- system.file("extdata/refseq/protein_refseq.csv.xz", package="CHNOSZ") ip <- add.protein(read.csv(file, as.is=TRUE))
add.protein: added 779 new protein(s) to thermo$protein
# only use those organisms with a certain # number of sequenced bases ip <- ip[as.numeric(thermo$protein$abbrv[ip]) > 50000] pf <- protein.formula(thermo$protein[ip, ]) zc <- ZC(pf) # the organism names we search for # "" matches all organisms terms <- c("Natr", "Halo", "Rhodo", "Acido", "Methylo", "Chloro", "Nitro", "Desulfo", "Geo", "Methano", "Thermo", "Pyro", "Sulfo", "Buchner", "") tps <- thermo$protein$ref[ip] plot(0, 0, xlim=c(1, 15), ylim=c(-0.3, -0.05), pch="", ylab=expression(italic(Z)[C]), xlab="", xaxt="n", mar=c(6, 3, 1, 1)) for(i in 1:length(terms)) { it <- grep(terms[i], tps) zct <- zc[it] points(jitter(rep(i, length(zct))), zct, pch=20) } terms[15] <- paste("all", length(ip)) axis(1, 1:15, terms, las=2) title(main=paste("Average oxidation state of carbon in proteins", "by taxID in NCBI RefSeq (after Dick, 2014)", sep="\n"))

Image proteininfo1

 

## Don't show: opar <- par(no.readonly=TRUE) ## End(Don't show) # using pinfo() with regexp=TRUE: # plot ZC and nH2O/residue of HOX proteins # basis species: glutamine-glutamic acid-cysteine-O2-H2O basis("QEC")
C H N O S ispecies logact state C5H10N2O3 5 10 2 3 0 1620 -3.2 aq C5H9NO4 5 9 1 4 0 1621 -4.5 aq C3H7NO2S 3 7 1 2 1 1618 -3.6 aq H2O 0 2 0 1 0 1 0.0 liq O2 0 0 0 2 0 3316 -80.0 gas
# device setup par(mfrow=c(2, 2)) # a red-blue scale from 1-13 col <- ZC.col(1:13) # axis labels ZClab <- axis.label("ZC") nH2Olab <- expression(bar(italic(n))[H[2]*O]) # loop over HOX gene clusters for(cluster in c("A", "B", "C", "D")) { # get protein indices pattern <- paste0("^HX", cluster) ip <- pinfo(pattern, "HUMAN", regexp=TRUE) # calculate ZC and nH2O/residue thisZC <- ZC(protein.formula(ip)) thisH2O <- protein.basis(ip)[, "H2O"] / protein.length(ip) # plot lines plot(thisZC, thisH2O, type="l", xlab=ZClab, ylab=nH2Olab) # the number of the HOX gene pname <- pinfo(ip)$protein nHOX <- as.numeric(gsub("[A-Za-z]*", "", pname)) # plot colored points points(thisZC, thisH2O, pch=19, col=col[nHOX], cex=3.5) points(thisZC, thisH2O, pch=19, col="white", cex=2.5) # plot the number of the HOX gene text(thisZC, thisH2O, nHOX) # add title title(main=paste0("HOX", cluster)) } ## Don't show: par(opar) ## End(Don't show)
prtn.n>

Image proteininfo2