next up previous
Next: util.misc Up: CHNOSZ examples Previous: util.data

util.formula

utl.fr>   ## Don't show: 
utl.fr> data(thermo)
thermo$obigt has 1800 aqueous, 2925 total species

utl.fr> ## End Don't show
utl.fr>   ## converting among Gibbs, enthalpy, entropy
utl.fr>   GHS("H") # entropy of H (element)
[1] 15.61663

utl.fr>   # calculate enthalpy of formation of arsenopyrite 
utl.fr>   GHS("FeAsS",DG=-33843,S=68.5) 
[1] -20149.05

utl.fr>   # return the value of DG calculated from DH and S
utl.fr>   # cf. -56687.71 from subcrt("water")
utl.fr>   GHS("H2O",DH=-68316.76,S=16.7123)  
[1] -56677.81

utl.fr>   ## mass and entropy of compounds of elements
utl.fr>   element("CH4")
      mass  entropy
1 16.04276 63.83843

utl.fr>   element(c("CH4","H2O"),"mass")
      mass
1 16.04276
2 18.01528

utl.fr>   element("Z")   # charge
  mass   entropy
1    0 -15.61663

utl.fr>   # same mass, opposite entropy as charge
utl.fr>   element("Z-1") # i.e., electron
  mass  entropy
1    0 15.61663

utl.fr>   ## count selected elements in a formula
utl.fr>   m <- makeup("H2O")

utl.fr>   expand.formula(c("H","O"),m)
[1] 2 1

utl.fr>   expand.formula(c("C","H","S"),m)
[1] 0 2 0

utl.fr>   ## calculate the average chemical formula of all of 
utl.fr>   ## the proteins in CHNOSZ' database
utl.fr>   ## this is much faster than a for-loop
utl.fr>   pf <- protein.formula(thermo$protein)

utl.fr>   colSums(pf)/nrow(pf)
         C          H          N          O          S 
1758.99097 2772.69807  481.23111  533.18127   13.19724 

utl.fr>   ## nominal carbon oxidation states
utl.fr>   ZC("CO2")  # 4
[1] 4

utl.fr>   ZC("CH4")  # -4
[1] -4

utl.fr>   ZC("CHNOSZ") # 7
[1] 7

utl.fr>   si <- info(info("LYSC_CHICK"))
protein: found LYSC_CHICK (C613H959N193O185S10, 129 residues)
info: 2926 refers to LYSC_CHICK, C613H959N193O185S10 aq (BBA+03)

utl.fr>   ZC(si$formula)  # 0.01631
[1] 0.01631321

utl.fr>   ## plot ZC of reference protein sequence
utl.fr>   ## for different organisms
utl.fr>   file <- system.file("extdata/refseq/protein_refseq.csv.xz",package="CHNOSZ")

utl.fr>   ip <- add.protein(file)
add.protein: added 3266 of 3266 proteins from /home/jedick/R/x86_64-slackware-linux-gnu-library/2.13/CHNOSZ/extdata/refseq/protein_refseq.csv.xz 

utl.fr>   # only use those organisms with a certain
utl.fr>   # number of sequenced bases
utl.fr>   ip <- ip[as.numeric(thermo$protein$abbrv[ip])>100000]

utl.fr>   pf <- protein.formula(thermo$protein[ip,])

utl.fr>   zc <- ZC(pf)

utl.fr>   # the organism names we search for
utl.fr>   # "" matches all organisms
utl.fr>   terms <- c("Streptomyces","Pseudomonas","Salmonella",
utl.fr+     "Escherichia","Vibrio","Bacteroides","Lactobacillus",
utl.fr+     "Staphylococcus","Streptococcus","Methano","Bacillus","Thermo","")

utl.fr>   tps <- thermo$protein$ref[ip]

utl.fr>   plot(0,0,xlim=c(1,13),ylim=c(-0.3,-0.05),pch="",
utl.fr+     ylab="average oxidation state of carbon in proteins",
utl.fr+     xlab="",xaxt="n",mar=c(6,3,1,1))

utl.fr>   for(i in 1:length(terms)) {
utl.fr+     it <- grep(terms[i],tps)
utl.fr+     zct <- zc[it]
utl.fr+     points(jitter(rep(i,length(zct))),zct,pch=20)
utl.fr+   }

utl.fr>   terms[13] <- paste("all organisms")

utl.fr>   axis(1,1:13,terms,las=2)

utl.fr>   title(main=paste("Average Oxidation State of Carbon:",
utl.fr+     "Total Protein per taxID in NCBI RefSeq",sep="\n"))

\begin{figure}\par
\includegraphics{pictures/utilformula1}
\par
\par
 %
\end{figure}


next up previous
Next: util.misc Up: CHNOSZ examples Previous: util.data