#! /usr/bin/perl -w
# October 18, 2010.
# This script computes equilibrium concentrations of 3 species of
# nucleic acids:
# S: Single-stranded - a Short oligo(deoxy)ribonucleotide
# T: A Target nucleic acid sequence (such as an mRNA). S and T are
# assumed to be both RNA or both DNA
# ST: The (hetero-)dimer formed when one copy of S hybridizes to one
# copy of T.
# dGS and dGT refer to the free energies of the two single-stranded
# species S and T, respectively. These quantities are the changes in
# free energy between the folded state and the unfolded state. dGS is
# sometimes assumed to be zero (see below), which is a reasonable
# assumption if S is too short to form a stable secondary structure. S
# is assumed to be complementary to a region of T. The complementarity
# does not have to be perfect; in particular, some wobble pairs may
# form. This script simply assumes that S binds to that region of
# T. dGH is the hybridization free energy of S binding to its target
# in T. dGTopen is the free energy of T computed with the restriction
# that the region complementary to S remains single-stranded.
# The initial condition is that all of S and T are
# single-stranded. Both S and T are folded. The final state is that S
# is bound to its target in T. T remains folded except that the region
# complementary to S does not form any intramolecular base pairs. The
# assumption is that (a) S unfolds (if it is folded to begin with),
# that (b) T adjusts its folded state to free the region complementary
# to S and that (c) S then binds to its target in T. Both (a) and (b)
# are unfavorable energetically. (c) is favorable. Hybridization will
# not occur unless the total free energy change is favorable
# (negative). This script deals with "ensemble free energies" which
# means that the total strand concentrations of S and T must be known
# to compute equilibrium concentrations of the different species. The
# ensemble free energy is NOT the average of the free energies of the
# different species. A mixing entropy must be used.
# [S], [T] and [ST] refer to the molar concentrations of [S], [T] and
# [ST] at equilibrium. [S0] and [T0] are the total strand
# concentrations of S and T, respectively. Note that [S] = [S0] and
# [T] = [T0] if no hybridization occurs.
# Conservation of mass yields: [S] + [ST] = S0 and [T] + [ST] = T0.
# In addition, the equilibrium concentrations are governed by
# [ST]
# ------ = K, where -RTln(K) = dGTopen + dGH - dGT - dGS.
# [S][T]
# Concentrations are molar within the script, but for input/output
# purposes the default units are nanoMoles. This default may be
# changed by altering the constant "UNITS" from 1e-9 to any other
# value. For example, 1e-6 would express concentations in
# microMoles. Similarly, the default value for [S0] and [T0] is given
# by CONC, which is 10e-6 (1000 nanoMole). This value may be changed
# at will.
# There are two distinct ways to run this script: regular mode and
# full mode. A symbolic link is created so that the "file"
# Ensemble_Calc_Full.pl is merely a pointer to Ensemble_Calc.pl. If
# one script is changed, so is the other. Running Ensemble_Calc.pl
# gives the regular mode and running Ensemble_Calc_Full.pl gives the
# full mode.
# Regular mode. The command line syntax is strict and unforgiving. It
# is assumed that dGS is zero. 'Ensemble_Calc.pl -h' will give the
# usage. The last two command line variables are [S0] and [T0]
# respectively. If [T0] is missing, it will be set to [S0]. If both
# [T0] and [S0] are missing, they are both set to the default CONC.
# In this mode, concentration units cannot be adjusted. They are fixed
# by the value of UNITS. Running 'Ensemble_Calc.pl -v' will give
# program information only.
# Full mode.
use strict;
use warnings;
use Getopt::Long;
Getopt::Long::Configure 'gnu_getopt', 'no_auto_abbrev', 'no_ignore_case';
use constant RT => 0.61633;
use constant CONC => 1e-6;
use constant UNITS => 1e-9;
our ($ScriptName) = $0 =~ /([^\/]+)$/;
our $Verbose = 'No';
our $Units ;
our $Version = '1.4';
our $Bugs = 'zukerm@rpi.edu';
sub version ($) {
print "$ScriptName $Version\n";
print "By Chikako Ragan & Michael Zuker\n";
print "Copyright © 2010\n";
exit 0;
}
sub verbose () {
our $Verbose ;
$Verbose = 'Yes';
}
sub usage($) {
our $ScriptName;
if ($ScriptName =~ /Full/) {
print <0) || die "Type $ScriptName -h for help.\n";
if ($ScriptName =~ /Full/) {
GetOptions('v|version' => sub { version($ScriptName) },
'h|help' => sub { usage(0) },
'V|verbose' => sub { verbose() },
'U|units=f' => \$Units,
'H|dGh=f' => \$dgh,
'S|dGS=f' => \$dgs,
'T|dGT=f' => \$dgt,
'O|dGTopen=f' => \$dgtopen,
's|S0=f' => \$s0,
't|T0=f' => \$t0
) or &usage(1);
(defined($Units)) || ($Units = UNITS);
if (!defined($s0) && !defined($t0)) {
$s0 = CONC/$Units ;
$t0 = CONC/$Units ;
}
(defined($s0)) || ($s0 = $t0);
(defined($t0)) || ($t0 = $s0);
($s0 > 0) || ($s0 = CONC/$Units);
($t0 > 0) || ($t0 = CONC/$Units);
$s0 *= $Units;
$t0 *= $Units;
} else {
$Units = UNITS;
if ($#ARGV == 0) {
GetOptions('v|version' => sub { version($ScriptName) },
'h|help' => sub { usage(0) },
) or &usage(1);
}
if ($#ARGV < 2) {
&usage(1);
} else {
if ($dgh !~ /^[\+-]*[0-9]*\.*[0-9]*$/ && $dgh !~ /^[\. ]*$/) {
printf "invalid option for dGh (real number expected).\n";
&usage(1);
}
if ($dgtopen !~ /^[\+-]*[0-9]*\.*[0-9]*$/ && $dgtopen !~ /^[\. ]*$/) {
printf "invalid option for dGTopen (real number expected).\n";
&usage(1);
}
if ($dgt !~ /^[\+-]*[0-9]*\.*[0-9]*$/ && $dgt !~ /^[\. ]*$/) {
printf "invalid option for dGT (real number expected).\n";
&usage(1);
}
$dgh = $ARGV[0];
$dgtopen = $ARGV[1];
$dgt = $ARGV[2];
if ($#ARGV == 4) {
$s0 = $ARGV[3]*$Units;
$t0 = $ARGV[4]*$Units;
} elsif ($#ARGV == 3) {
$s0 = $t0 = $ARGV[3]*$Units;
printf "Setting [S0] = [T0] = %.2f nM\n", $s0/$Units;
} elsif ($#ARGV == 2) {
$s0 = $t0 = CONC ;
printf "Setting [S0] = [T0] = %.2f nM [default]\n", CONC/$Units;
} else {
&usage(2);
}
}
}
# Add checks. Any positive free energy forces [ST] = 0 and a warning.
# dGT > dGTopen is nonsense. End with an error message.
if ($dgtopen < $dgt) {
printf "Nonsense input!\tdGTopen (%.2f) is < dGT (%.2f)\n",$dgtopen,$dgt;
exit 3;
} elsif ( ($dgh>0.0) || ($dgtopen>0.0) || ($dgt>0.0) ) {
# Dump input in verbose mode.
if ($Verbose eq 'Yes') {
printf "\nInput: dGh = %.2f, dGS = %.2f, dGTopen = %.2f, dGT = %.2f, [S0] = %.2f, [T0] = %.2f\n\n",
$dgh, $dgs, $dgtopen, $dgt, $s0/$Units, $t0/$Units;
}
printf "Concentration units = %.0e\n",$Units;
printf "[S] = %.3f, [T] = %.3f, [ST] = 0.0, dGchange = 0.0 kcal/mol\n",
$s0/$Units, $t0/$Units ;
printf "Warning!\tThe free energies must be less than or equal to 0.\n";
exit 0;
}
my ($diff, $cmax, $j) = ($t0-$s0, $t0, 0.0);
my $logkst = -($dgtopen - $dgt + $dgh - $dgs)/RT;
if ( $diff > 0 ) {
$j = exp($logkst + log($diff));
} elsif ( $diff < 0 ) {
$cmax = $s0;
$j = -exp($logkst + log(-$diff));
}
# Compute concentrations
my $x = 4 * exp($logkst + log($s0));
my $y = 4 * exp($logkst + log($t0));
my $ws = 1 + $j;
my $wt = 1 - $j;
my ($s, $t, $st);
if ( $ws < 0 ) {
$s = 2 * exp(log($s0) - log($x) + log(sqrt($ws * $ws + $x) - $ws));
$t = 2 * exp(log($t0) - log($wt + sqrt($wt * $wt + $y) ) );
} elsif ( $wt < 0 ) {
$t = 2 * exp(log($t0) - log($y) + log(sqrt($wt * $wt + $y) - $wt));
$s = 2 * exp(log($s0) - log($ws + sqrt($ws * $ws + $x) ) );
} else {
$s = 2 * exp(log($s0) - log($ws + sqrt($ws * $ws + $x) ) );
$t = 2 * exp(log($t0) - log($wt + sqrt($wt * $wt + $y) ) );
}
$st = ( $s0 + $t0 - ($s + $t) )/2.0;
# Compute ensemble free energy
my $mus = $dgs + (RT * log($s/$s0));
my $mut = $dgt + (RT * log($t/$t0));
my $dgens = ($s0 * $mus + $t0 * $mut) / $cmax;
# Compute net free energy change
my $dginitial = (($dgs * $s0) + ($dgt * $t0)) / $cmax;
my $dgchange = $dgens - $dginitial;
# Dump input in verbose mode.
if ($Verbose eq 'Yes') {
printf "\nInput: dGh = %.2f, dGS = %.2f, dGTopen = %.2f, dGT = %.2f, [S0] = %.2f, [T0] = %.2f\n\n",
$dgh, $dgs, $dgtopen, $dgt, $s0/$Units, $t0/$Units ;
}
printf "Concentration units = %.0e\n",$Units;
printf "[S] = %.3f, [T] = %.3f, [ST] = %.3f, dGchange = %.2f kcal/mol\n",
$s/$Units, $t/$Units, $st/$Units, $dgchange;
# Check Solution
# The results below should all be zero
if ($Verbose eq 'Yes') {
printf "\nComputation check: The following 3 quantities should be 0\n";
printf "\$logkst - log(\$st) + log(\$s) + log(\$t) = %.4f\n",
$logkst - log($st) + log($s) + log($t);
printf "\$s/\$s0 + \$st/\$s0 - 1.0 = %.4f\n", $s/$s0 + $st/$s0 - 1.0 ;
printf "\$t/\$t0 + \$st/\$t0 - 1.0 = %.4f\n", $t/$t0 + $st/$t0 - 1.0 ;
}