#!/usr/bin/python

import numpy as np

# import the original energy from Kinney et al
emat_orig = np.genfromtxt('rnap-full-0_emat.txt')
# set conversion factor from AU to k_B T. See "Methods" for details on
# how this number was obtained.
AU_per_kT = 7.08
# average energy value for non-specific binding as computed using the
# original, unmodified energy matrix
avg_nonspecific_AU = 91.3 # AU
# length of the binding site as defined by our matrix.
site_len = 41

# Set the best binding base of each row to zero. This is so that all
# values are in terms of an energy *difference*: the difference
# between that particular base and the best binding base. Remember,
# adding a constant to each element in a particular row has no
# physical meaning.
# For later, we'll need to keep track of how much we've subtracted
# from the original matrix.
sum = 0 # how much we've subtracted
emat_zeroed = np.zeros(emat_orig.shape)
for i,row in enumerate(emat_orig):
    emat_zeroed[i,:] = row - min(row)
    sum = sum + min(row)

# Convert the units of the matrix from AU to k_B T. Now all nonzero
# matrix element are energy *differences* in terms of k_B T.
emat_rescaled = emat_zeroed/AU_per_kT

# Finally, add an offset to the matrix so that the average binding
# across the MG1655 genome is zero. This is for convenience only, and
# has no physical meaning. It's not immediately obvious how to do this
# for the rescaled matrix, but it is easy to see how to do it for the
# original matrix. Since the original matrix has an average
# nonspecific binding of 91.3 AU, we could just subtract 91.3 from all
# elements in a particular row. Or, to make things a bit more
# equitable, we could subtract 91.3/41 = 2.23 AU from each row in the
# matrix. Now that we've figured out how many AU to subtract from each
# row in the original matrix, we realize that we can simply subtract
# 2.23/AU_per_KT = 2.23/7.08 = 0.315 from each row in the rescaled
# matrix to obtain the matrix we want. This is almost right, but
# remember that we've already subtracted the minimum of each each from
# each row. The total amount subtracted is contained in the variable
# <sum> defined above. If we used the emat_zeroed matrix to compute
# the average nonspecific energy, we would obtain 
# <avg_nonspecific_AU - sum> as an answer. So "91.3" needs to be
# replaced with "91.3 - sum" everywhere in the preceding paragraph.

emat_rescaled_zeroed_to_MG1655 = (emat_rescaled - 
                                  ((avg_nonspecific_AU-sum)/site_len)/AU_per_kT)

# save the matrix in a text file
np.savetxt('rnap-full-0_emat_relativetoMG1655background_kT.txt',
           emat_rescaled_zeroed_to_MG1655)