X debug baseframe
import mdna
import mdtraj as md
import numpy as np
import matplotlib.pyplot as plt
from mdna import utils
import os
# load 1kx5.pdb
path_relative = '/Users/thor/surfdrive/Projects/mdna/docs/notebooks/pdbs/1kx5.pdb'
traj = md.load(path_relative)
rigid = mdna.compute_rigid_parameters(traj, chainids=[0, 1], fit_reference=False)
rigid_fit = mdna.compute_rigid_parameters(traj, chainids=[0, 1], fit_reference=True)
def _normalize_parameter_name(parameter_name):
key = ''.join(ch for ch in parameter_name.lower() if ch.isalnum())
alias_map = {
'shift': {'shift', 'dx'},
'slide': {'slide', 'dy'},
'rise': {'rise', 'dz'},
'tilt': {'tilt', 'rx'},
'roll': {'roll', 'ry'},
'twist': {'twist', 'rz'},
'shear': {'shear'},
'stretch': {'stretch'},
'stagger': {'stagger'},
'buckle': {'buckle'},
'propeller': {'propeller', 'propellor', 'prop'},
'opening': {'opening'},
}
for canonical, aliases in alias_map.items():
if key in aliases:
return canonical
return key
def _to_1d(values):
arr = np.asarray(values, dtype=float)
if arr.ndim == 1:
return arr
if arr.ndim == 2:
if arr.shape[0] == 1:
return arr[0]
return arr.mean(axis=0)
return arr.reshape(-1)
def _load_3dna_step_parameters(file_path):
in_step_section = False
columns = None
rows = []
with open(file_path, 'r') as handle:
for line in handle:
stripped = line.strip()
if 'Local base-pair step parameters' in line:
in_step_section = True
continue
if not in_step_section:
continue
if stripped.startswith('****************************************************************************'):
if rows:
break
continue
if stripped.startswith('bp'):
header_left = line.split('#', 1)[0]
header_tokens = header_left.split()
columns = [_normalize_parameter_name(token) for token in header_tokens[1:]]
continue
if columns is None or not stripped or stripped.startswith('#'):
continue
data_left = line.split('#', 1)[0]
tokens = data_left.split()
if len(tokens) < 2 + len(columns):
continue
numeric_tokens = tokens[2 : 2 + len(columns)]
try:
rows.append([float(token) for token in numeric_tokens])
except ValueError:
continue
if not rows or columns is None:
raise ValueError(f'Could not parse base-pair step parameters from {file_path}')
arr = np.asarray(rows, dtype=float)
return {column: np.concatenate([[0.0], arr[:, idx]]) for idx, column in enumerate(columns)}
def _get_mdna_parameter(rigid_object, parameter_name):
canonical = _normalize_parameter_name(parameter_name)
candidates = [parameter_name, canonical, canonical.capitalize()]
if canonical == 'propeller':
candidates.extend(['propellor', 'Propeller', 'Propellor'])
tried = []
for candidate in candidates:
if candidate in tried:
continue
tried.append(candidate)
try:
return rigid_object.get_parameter(candidate)
except Exception:
pass
raise KeyError(f'Parameter `{parameter_name}` not found in mdna rigid parameters (tried: {tried})')
name = 'twist'
out_path = os.path.join(os.path.dirname(path_relative), '1kx5-3dna.out')
para = _to_1d(_get_mdna_parameter(rigid, name))
para_fit = _to_1d(_get_mdna_parameter(rigid_fit, name))
three_dna = _load_3dna_step_parameters(out_path)
name_canonical = _normalize_parameter_name(name)
if name_canonical not in three_dna:
raise KeyError(f'Parameter `{name}` not found in 3DNA step parameters. Available: {sorted(three_dna.keys())}')
para_3dna = _to_1d(three_dna[name_canonical])
n = min(len(para), len(para_fit), len(para_3dna))
x = np.arange(1, n + 1)
plt.figure(figsize=(10, 4))
plt.plot(x, para[:n], label='mdna no fit')
plt.plot(x, para_fit[:n], label='mdna fit')
plt.plot(x, para_3dna[:n], '--', label='3DNA')
plt.xlabel('Base-pair step index')
plt.ylabel(name_canonical.capitalize())
plt.title(f'Rigid parameter comparison: {name_canonical}')
plt.legend()
plt.tight_layout()
para.shape
(147,)
# Parse 3DNA base-pair (intra) parameters from the output file
def _load_3dna_bp_parameters(file_path):
in_section = False
columns = None
rows = []
with open(file_path, 'r') as handle:
for line in handle:
stripped = line.strip()
if 'Simple base-pair parameters' in line:
in_section = True
continue
if not in_section:
continue
if stripped.startswith('****'):
if rows:
break
continue
if stripped.startswith('bp'):
header_left = line.split('#', 1)[0]
header_tokens = header_left.split()
columns = [_normalize_parameter_name(t) for t in header_tokens[1:]]
continue
if columns is None or not stripped or stripped.startswith('#'):
continue
data_left = line.split('#', 1)[0]
tokens = data_left.split()
if len(tokens) < 2 + len(columns):
continue
numeric_tokens = tokens[2:2 + len(columns)]
try:
rows.append([float(t) for t in numeric_tokens])
except ValueError:
continue
if not rows or columns is None:
raise ValueError(f'Could not parse base-pair parameters from {file_path}')
arr = np.asarray(rows, dtype=float)
return {col: arr[:, idx] for idx, col in enumerate(columns)}
bp_3dna = _load_3dna_bp_parameters(out_path)
# Compare all 6 base-pair parameters
fig, axes = plt.subplots(2, 3, figsize=(14, 6))
bp_names = ['shear', 'stretch', 'stagger', 'buckle', 'propeller', 'opening']
for ax, pname in zip(axes.flat, bp_names):
mdna_vals = _to_1d(_get_mdna_parameter(rigid, pname))
ref_vals = bp_3dna[pname]
nn = min(len(mdna_vals), len(ref_vals))
ax.plot(range(1, nn+1), mdna_vals[:nn], label='mdna no fit')
ax.plot(range(1, nn+1), ref_vals[:nn], '--', label='3DNA')
diff = np.abs(mdna_vals[:nn] - ref_vals[:nn])
ax.set_title(f'{pname} (max Δ={diff.max():.2f})')
ax.legend(fontsize=7)
plt.tight_layout()
plt.show()
The notations and configuration parameters for a rigid base and rigid base pair DNA model, as presented in this chapter, have been described in [36] and [41]. 2.1 DNA sequence and configuration of rigid bases In this work we consider a right-handed, double-stranded, linear B form DNA in which bases T, A, C and G are attached to two, oriented, anti-parallel backbone strands and form only the standard Watson-Crick pairs (A, T) and (C, G). Choosing one backbone strand as a reference, a DNA oligomer consisting of n base pairs is identified with a sequence of bases X1X2 · · · Xn, listed in the 5′ to 3′ direction along the strand, where Xa ∈ {T, A, C, G}. The base pairs associated with this sequence are denoted by (X, X)1, (X, X)2, . . . , (X, X)n, where X is defined as the Watson-Crick complement of X in the sense that A = T, T = A, C = G and G = C. The notation (X, X)a for a base pair indicates that base X is attached to the reference strand while X is attached to the complementary strand. In our coarse-grain model of DNA the bases are assumed to be rigid and the configura- tion of an oligomer is described by position (location and orientation) of all its bases. The orientation of a rigid body can be described by fixing an orthonormal frame in it, and the location can be specified by choosing a reference point of that body. This means that the configuration of a rigid base n-mer X1X2 · · · Xn can be given by a set of 2n base reference points with 2n attached base frames. We use the rules of the Tsukuba convention [54] to assign a reference point ra and a right- handed, orthonormal frame {da i } (i = 1, 2, 3) to each base Xa ∈ {T, A, C, G}. The vector da 1 points in the direction of the major groove, da 2 points in the direction of the reference strand and da 3 is perpendicular to the plane of the base Xa and is pointing towards the base Xa+1, as shown in Figure 2.1. Once the reference points and frame of each base are specified, the positions of all the non-hydrogen atoms of the base are known. Likewise, we assign a point ra and frame {da i } to each complementary base Xa. However, because the two strands are anti-parallel, we flip the frame {da i } (change the signs of the vectors d2 and d3 to opposite), so that the two frames overlap when the base pair is in its ideal shape. 5 CHAPTER 2. KINEMATICS OF RIGID BASE AND RIGID BASE PAIR MODELS OF DNA We will also consider matrices Da and Da with vectors {da i } and {da i } as columns, i.e. Da = {da i } = [da 1 da 2 da 3] and Da = {da i } = [da 1 da 2 da 3]. (a)ra+1 da+1 3 da+1 2 da+1 1 da 3 da 1 ra da 2 Xa Xa+1 da 2 Xa+1 da+1 1 da+1 3 da+1 2 da 1 da 3 ra+1 ra Xa (b) Figure 2.1: (a) Rigid bases of DNA with reference points and frames, corresponding to the Tsukuba convention [54]. Base pair and junction frames, as introduced in Section 2.3, are drawn in grey and black respectively. (Thanks to J. Głowacki for this figure.) (b) A schematic view of rigid bases with their reference point and frames with the indicated directions of vectors {da i } and {da i }, i = 1, 2, 3. Note that, despite the two strands of DNA being antiparallel, the frames Da and Da are oriented in the same way. For modelling purposes, it is convenient to describe the configuration of DNA in coor- dinates that do not depend on the absolute translations and rotations of the molecule. In particular, we would like to have a set of parameters and rules, that would still allow us to reconstruct the position and orientation of each base, once the position and orientation of one chosen base is given. To do this, we first need to parametrise rotations between orthonormal frames. 2.2 Parametrisation of rotations To describe rotations we will use Cayley (or Euler - Rodrigues) parameters. One other popular choice is Euler angles, used, for example, in the 3DNA software package [46]. Having a unit axis k and an angle of rotation ϕ ∈ [0, π), the corresponding rotation matrix Q ∈ SO(3) can be obtained using the Euler-Rodrigues formula Q = cos ϕ I + (1 − cos ϕ)k ⊗ k + sin ϕk×, (2.1) where I ∈ R3×3 is an identity matrix, k ⊗ k = kkT is the rank-one outer product matrix and k× is the skew matrix k× = 0 −k3 k2 k3 0 −k1 −k2 k1 0 , (2.2) satisfying (k×)v = k × v for all v ∈ R3. Then Qv gives the absolute coordinates of the vector 6 2.2. PARAMETRISATION OF ROTATIONS v rotated around the axis k by the angle ϕ for all v ∈ R3. Moreover, from (2.1) we get Q − QT = 2 sin ϕ k× and tr Q = 1 + 2 cos ϕ, (2.3) which allows us to compute ϕ and k as ϕ = arccos ( tr Q − 1 2 ) ∈ [0, π) and k = 1 2 sin ϕ Q32 − Q23 Q13 − Q31 Q21 − Q12 = 1 2 sin ϕ vec[Q − QT ], (2.4) when the rotation matrix Q = {Qij } is known. Here vec A for a skew matrix A ∈ R3×3 is defined as vec A = A32 A13 A21 . (2.5) On the other hand, for any two orthonormal frames D = [d1 d2 d3] and ˜D = [˜d1 ˜d2 ˜d3], D, ˜D ∈ R3×3, a rotation from one frame to another can be written as ˜D = QD = [Qd1 Qd2 Qd3], (2.6) where Q = ˜DDT . (2.7) Note that because Q is a proper right-handed rotation matrix (an orthonormal matrix with a determinant 1), it has eigenvalues (1, e−iϕ, eiϕ), where ϕ is the angle of rotation, and the real eigenvector of Q is its rotation axis k. When Q is symmetric, which corresponds to the cases ϕ = 0 and ϕ = π, then Q − QT is a zero matrix, and we can not use (2.4) for computing k. In the case ϕ = 0, Q = I and any unit vector can be chosen to be the rotation axis. In the case ϕ = π, the rotation axis can be computed as an eigenvector of the matrix Q + I, which has eigenvalues (0, 0, 2). However, in B-DNA, which we want to model, rotations through an angle π are very unlikely for relative rotations between adjacent bases. We can choose a scaling f (ϕ) for the rotation axis (where f is an invertible function), to have a three parameter vector f (ϕ)k fully describing the rotation. Then, as k is a unit vector, we can recover the angle ϕ from the relation f (ϕ) = ||f (ϕ)k||. For example, the scaling f (ϕ) used in Curves+ [41] is the value of ϕ in degrees, in [36] f (ϕ) = 2 tan ϕ 2 , and we will use f (ϕ) = 10 tan ϕ 2 as in [24]. We explain our choice of parameter scaling in Section 2.4. If we introduce the Cayley vector η, defined by η = 2 tan ϕ 2 k, (2.8) 7 CHAPTER 2. KINEMATICS OF RIGID BASE AND RIGID BASE PAIR MODELS OF DNA which is well defined for ϕ ∈ [0, π), then (2.1) is equivalent to Q = (I + 1 2 η×)(I − 1 2 η×)−1 =: cay(η). (2.9) For defining DNA configuration parameters, described in the next section, we will also introduce a middle frame between any two frames D and ˜D. We define the middle frame M as M = √QD, (2.10) where √Q is a matrix of half rotation from D to ˜D, i.e. rotation about the same axis k by the angle ϕ 2 . The same rotation as in (2.6) can be expressed multiplying D by a rotation matrix on the right: ˜D = DR = [ 3∑ i=1 Ri1di 3∑ i=1 Ri2di 3∑ i=1 Ri3di ] , (2.11) where R = DT ˜D = DT QD = ˜DT Q ˜D. (2.12) The normalised axial vector kR of the skew matrix R − RT is kR = Dk = ˜Dk, (2.13) i.e. kR is equal to k written in the coordinates of the D frame (which are the same as the coordinates in the ˜D frame, because k is the axis of rotation). Also, the eigenvalues of Q and of R are the same, which means that the two matrices indeed correspond to the same rotation, but expressed in different coordinates. We call R relative rotation matrix and Q the absolute rotation matrix. It will be convenient to use relative rotations in our work. 2.3 Internal coordinates of rigid bases and base pairs Having chosen a parametrisation of rotations, one can replace the absolute coordinates of bases by vectors of relative translations (difference vectors) between the reference points and vectors of relative rotations (Cayley vectors or scaled Cayley vectors) between the reference frames. As we want these coordinates to be independent of global translations and rotations of the whole molecule, they have to be expressed with respect to the frames attached to the molecule, rather than to a fixed lab coordinate frame. We could choose an order of tracing all the bases of an oligomer, for example X1, X1, X2, X2 . . . , Xn, Xn, and compute the vectors of translation and rotation between each consecutive pair of them, expressing both vectors in the reference frame of one of the two bases. However, the values of the configuration parameters of this kind depend on the choice of the reference strand. It would be convenient if the relations between two parameter vectors, corresponding to the two different choices of reference strand, were simple. In order to define relative coordinates with this property, it is helpful to introduce base-pair reference points together with base pair and junction frames. 8 2.3. INTERNAL COORDINATES OF RIGID BASES AND BASE PAIRS We define the base pair frame Ga = {ga i } as the average orientation of the two base frames, Ga = Da√Λa, (2.14) where Λa = (Da)T Da ∈ R3×3, (2.15) is the relative rotation matrix that describes the orientation of the frame {da i } with respect to {da i }. The intra base pair rotational coordinate vector, describing the relative rotation between Xa and Xa, is defined as the Cayley vector of the matrix Λa: ϑa = 2 tr[Λa] + 1 vec[Λa − (Λa)T ]. (2.16) Then ||ϑa|| = 2 sin ϕa 1 + cos ϕa = 2 tan ϕa 2 , (2.17) where ϕa is the angle of rotation corresponding to Λa. Note that the coordinates of this vector are the same with respect to all of the three frames Da, Da and Ga. We used a relative rotation matrix in order to directly get the Cayley vector expressed in these frames. We then use Ga as a reference frame for defining the intra base pair translational coordinate vector between Xa and Xa: ξa = (Ga)T (ra − ra). (2.18) An illustration of intra base pair rotational and translational coordinates is shown in Fig- ure 2.2.Xa Xa+1 ra+1 raXa ra+1 ra Xa+1 Λa Λa+1 ξa ξa+1 (a)Xa Xa+1 ra+1 ra Xa+1 Xa ra+1 ra qa ga+1 3 ga+1 2 qa+1 ga+1 1 ga 3 ga 2 ga 1 (b) Figure 2.2: (a) A schematic illustration of the intra base pair translational and rotational coordinates defined in (2.16) and (2.18). The base frames are shown in red, as in Figure 2.1(b). Note that the intra coordinates are expressed in the base pair frames, shown in (b) and defined in (2.14). To describe the relative rotation and displacement between neighbouring base pairs along the strands we consider the base pair frame Ga and a reference point qa, defined as the 9 CHAPTER 2. KINEMATICS OF RIGID BASE AND RIGID BASE PAIR MODELS OF DNA average position of the two base reference points: qa = 1 2 (ra + ra). (2.19) Similarly, we define a frame Ga+1 = {ga+1 i } and a point qa+1 associated with the base pair (X, X)a+1. Finally, the inter base pair, or junction, rotational coordinate vector, describing the rela- tive rotation between base pairs (X, X)a and (X, X)a+1 is θa = 2 tr[La] + 1 vec[La − (La)T ], (2.20) where La = (Ga)T Ga+1 ∈ R3×3, (2.21) is the relative rotation matrix that describes the orientation of frame {ga i+1} with respect to {ga i }. Note that ||θa|| = 2 sin φa 1 + cos φa = 2 tan φa 2 , (2.22) where φa is the angle of rotation corresponding to La. The inter base pair translational vector, describing the relative rotation between base pairs (X, X)a and (X, X)a+1 is ζa = (Ha)T (qa+1 − qa), (2.23) where the junction frame Ha = {ha i } is defined as Ha = Ga√La. (2.24) An illustration of inter base pair rotational and translational coordinates is shown in Fig- ure 2.3.Xa Xa+1 ra+1 raXa ra+1 ra Xa+1 qa+1 qa ζaLa (a)Xa Xa+1 ra+1 ra Xa+1 Xa ra+1 ra qa+1 qa ha 2 ha 3 ha 1 (b) Figure 2.3: (a) A schematic illustration of the inter base pair (junction) translational and rotational coordinates defined in (2.20) and (2.23). The base frames are shown in red, as in Figure 2.1(b). and the base pair frames in blue, as in Figure 2.2(b). Note that the inter coordinates are expressed in the junction frames, shown in (b) and defined in (2.24). 10 2.3. INTERNAL COORDINATES OF RIGID BASES AND BASE PAIRS Thus the relative rotation and displacement between bases Xa and Xa across the strands is described by the intra-base pair coordinates ya = (ϑ, ξ)a, whereas the relative rotation and displacement between the pairs (X, X)a and (X, X)a+1 along the strands is described by the inter base pair coordinates za = (θ, ζ)a. We define the vector of relative coordinates as w = (y1, z1, y2, z2, ..., yn−1, zn−1, yn) ∈ R12n−6. (2.25) Note that w does not depend on the global translations and rotations of the DNA molecule. The definitions of these coordinates can be shown to satisfy all the qualitative guide- lines set forth in the Cambridge convention [19] for nucleic acid structures, including the symmetry conditions associated with a change of reference strand. Accordingly, we refer to the components of the intra rotational parameters ϑa as Buckle-Propeller-Opening, the in- tra translational parameters ξa as Shear-Stretch-Stagger, the inter rotational parameters θa as Tilt-Roll-Twist, and the inter translational parameters ζa as Shift-Slide-Rise (see Figure 2.4). We remark that ϑa and θa are components of a Cayley rotation vector and so are not conventional angular coordinates about various axes as employed by many authors; however, they can be put into correspondence with conventional angular coordinates, and are nearly identical in the case of small rotations when the angular ones are measured in radians (or fifths of a radian for the scaling described in Section 2.4). Buckle Propeller Opening Tilt Roll Twist Shear Stretch Stagger Shift Slide Rise Figure 2.4: Intra base pair (left) and inter base pair (right) parameters of a coarse-grain DNA model; rotational parameters are on the top, translation parameters on the bottom; bases/base pairs are represented as cuboids. (Thanks to J. Głowacki for this figure.) The complete configuration of a DNA oligomer is specified by introducing six additional coordinates z0 = (θ, ζ)0 for the first base pair frame and reference point with respect to an external, lab-fixed frame. Ignoring these six degrees of freedom exactly corresponds to eliminating the overall symmetry of rigid body motion that exists when there is no external potential field. Given z0 = (θ, ζ)0 ∈ R6, the external base pair frame {g0 i } (e.g. {g0 i } = {ei}), the reference point q0 (e.g. q0 = (0, 0, 0)), and the vector of relative coordinates w ∈ R12n−6, we can recover all the frames and reference points for the bases Xa and Xa, a ∈ 1, . . . , n, using the formulas 11 CHAPTER 2. KINEMATICS OF RIGID BASE AND RIGID BASE PAIR MODELS OF DNA da j = 3∑ i=1 (√Λa)jiga i , ra = qa − 1 2 3∑ i=1 ξa i ga i , (2.26) da j = 3∑ i=1 Λa ij da i , ra = ra + 3∑ i=1 ξa i ga i , (2.27) ga+1 j = 3∑ i=1 La ij ga i , qa+1 = qa + 3∑ i=1 ζa i ha i , (2.28) where Λa = cay[ϑa] is the rotation matrix corresponding to ϑa, La = cay[θa] and {ha i } is defined by (2.24). One could go further in coarse-graining DNA and make an assumption that the base pairs of DNA are rigid bodies. In this case, it is enough to have base pair frames {ga i } and base pair reference points qa for describing the configuration of a molecule. Thus the vector of relative coordinates for the rigid base pair DNA model is wbp = (z1, z2, ..., zn−1) ∈ R6n−6.
dna = mdna.make()
dna.save_pdb('test')
Default sequence: CGCGAATTCGCG Number of base pairs: 12 Start rescaling spline based on requested number of base pairs. This requires recomputation of the control points to match the desired number of base pairs. Spline scaled to match the target number of base pairs: 12