Source code for analyze_foldamers.parameters.angle_distributions

import os

import matplotlib
import matplotlib.gridspec as gridspec
import matplotlib.pyplot as plt
import mdtraj as md
import numpy as np
from analyze_foldamers.parameters.bond_distributions import *
from analyze_foldamers.utilities.plot import plot_distribution
from cg_openmm.cg_model.cgmodel import CGModel
from matplotlib.backends.backend_pdf import PdfPages
from openmm import unit
from scipy.optimize import curve_fit

# These functions calculate and plot bond angle and torsion distributions from a CGModel object and pdb trajectory

def assign_angle_types(cgmodel, angle_list, particle_mapping=None):
    """Internal function for assigning angle types"""
    
    ang_types = [] # List of angle types for each angle in angle_list
    
    ang_array = np.zeros((len(angle_list),3))
    
    # Relevant angle types are added to a dictionary as they are discovered 
    ang_dict = {}
    
    # Create an inverse dictionary for getting angle string name from integer type
    inv_ang_dict = {}
    
    # Counter for number of angle types found:
    i_angle_type = 0
    
    # Assign angle types:
    
    for i in range(len(angle_list)):
        ang_array[i,0] = angle_list[i][0]
        ang_array[i,1] = angle_list[i][1]
        ang_array[i,2] = angle_list[i][2]
        
        if particle_mapping is None:
            particle_types = [
                CGModel.get_particle_type_name(cgmodel,angle_list[i][0]),
                CGModel.get_particle_type_name(cgmodel,angle_list[i][1]),
                CGModel.get_particle_type_name(cgmodel,angle_list[i][2])
            ]
        else:
            particle_types = [
                particle_mapping[CGModel.get_particle_type_name(cgmodel,angle_list[i][0])],
                particle_mapping[CGModel.get_particle_type_name(cgmodel,angle_list[i][1])],
                particle_mapping[CGModel.get_particle_type_name(cgmodel,angle_list[i][2])]
            ]
        
        string_name = ""
        reverse_string_name = ""
        for particle in particle_types:
            string_name += f"{particle}_"
        string_name = string_name[:-1]
        for particle in reversed(particle_types):
            reverse_string_name += f"{particle}_"
        reverse_string_name = reverse_string_name[:-1]
            
        if (string_name in ang_dict.keys()) == False:
            # New angle type found, add to angle dictionary
            i_angle_type += 1
            ang_dict[string_name] = i_angle_type
            ang_dict[reverse_string_name] = i_angle_type
            # For inverse dict we will use only the forward name based on first encounter
            inv_ang_dict[str(i_angle_type)] = string_name
            # print(f"adding new angle type {i_angle_type}: {string_name} to dictionary")
            # print(f"adding reverse version {i_angle_type}: {reverse_string_name} to dictionary")
            
        ang_types.append(ang_dict[string_name])
                    
    # Sort angles by type into separate sub arrays for mdtraj compute_angles
    ang_sub_arrays = {}
    for i in range(i_angle_type):
        ang_sub_arrays[str(i+1)] = np.zeros((ang_types.count(i+1),3))
    
    # Counter vector for all angle types
    n_i = np.zeros((i_angle_type,1), dtype=int)
    
    for i in range(len(angle_list)):
        ang_sub_arrays[str(ang_types[i])][n_i[ang_types[i]-1],:] = ang_array[i,:]
        n_i[ang_types[i]-1] += 1
        
    return ang_types, ang_array, ang_sub_arrays, n_i, i_angle_type, ang_dict, inv_ang_dict
    

def assign_torsion_types(cgmodel, torsion_list, particle_mapping=None):
    """Internal function for assigning torsion types"""
    
    torsion_types = [] # List of torsion types for each torsion in torsion_list
    torsion_array = np.zeros((len(torsion_list),4))
    
    # Relevant torsion types are added to a dictionary as they are discovered 
    torsion_dict = {}
    
    # Create an inverse dictionary for getting torsion string name from integer type
    inv_torsion_dict = {}
    
    # Counter for number of torsion types found:
    i_torsion_type = 0  
    
    for i in range(len(torsion_list)):
        torsion_array[i,0] = torsion_list[i][0]
        torsion_array[i,1] = torsion_list[i][1]
        torsion_array[i,2] = torsion_list[i][2]
        torsion_array[i,3] = torsion_list[i][3]
        
        if particle_mapping is None:
            particle_types = [
                CGModel.get_particle_type_name(cgmodel,torsion_list[i][0]),
                CGModel.get_particle_type_name(cgmodel,torsion_list[i][1]),
                CGModel.get_particle_type_name(cgmodel,torsion_list[i][2]),
                CGModel.get_particle_type_name(cgmodel,torsion_list[i][3])
            ]
        else:
            particle_types = [
                particle_mapping[CGModel.get_particle_type_name(cgmodel,torsion_list[i][0])],
                particle_mapping[CGModel.get_particle_type_name(cgmodel,torsion_list[i][1])],
                particle_mapping[CGModel.get_particle_type_name(cgmodel,torsion_list[i][2])],
                particle_mapping[CGModel.get_particle_type_name(cgmodel,torsion_list[i][3])]
            ]
        
        string_name = ""
        reverse_string_name = ""
        for particle in particle_types:
            string_name += f"{particle}_"
        string_name = string_name[:-1]
        for particle in reversed(particle_types):
            reverse_string_name += f"{particle}_"
        reverse_string_name = reverse_string_name[:-1]
            
        if (string_name in torsion_dict.keys()) == False:
            # New torsion type found, add to torsion dictionary
            i_torsion_type += 1
            torsion_dict[string_name] = i_torsion_type
            torsion_dict[reverse_string_name] = i_torsion_type
            # For inverse dict we will use only the forward name based on first encounter
            inv_torsion_dict[str(i_torsion_type)] = string_name
            
            # print(f"adding new torsion type {i_torsion_type}: {string_name} to dictionary")
            # print(f"adding reverse version {i_torsion_type}: {reverse_string_name} to dictionary")
            
            
        torsion_types.append(torsion_dict[string_name])
                        
    # Sort torsions by type into separate sub arrays for mdtraj compute_dihedrals
    torsion_sub_arrays = {}
    for i in range(i_torsion_type):
        torsion_sub_arrays[str(i+1)] = np.zeros((torsion_types.count(i+1),4))
    
    # Counter vector for all angle types
    n_i = np.zeros((i_torsion_type,1), dtype=int) 
    
    for i in range(len(torsion_list)):
        torsion_sub_arrays[str(torsion_types[i])][n_i[torsion_types[i]-1],:] = torsion_array[i,:]
        n_i[torsion_types[i]-1] += 1
        
    return torsion_types, torsion_array, torsion_sub_arrays, n_i, i_torsion_type, torsion_dict, inv_torsion_dict


[docs]def calc_bond_angle_distribution( cgmodel, file_list, nbins=90, frame_start=0, frame_stride=1, frame_end=-1, plot_per_page=2, temperature_list=None, plotfile="angle_hist.pdf", particle_mapping=None, ): """ Calculate and plot all bond angle distributions from a CGModel object and pdb trajectory :param cgmodel: CGModel() object :type cgmodel: class :param file_list: path to pdb or dcd trajectory file(s) :type file_list: str or list(str) :param nbins: number of bins spanning the range of 0 to 180 degrees, default = 90 :type nbins: int :param frame_start: First frame in trajectory file to use for analysis. :type frame_start: int :param frame_stride: Advance by this many frames when reading trajectories. :type frame_stride: int :param frame_end: Last frame in trajectory file to use for analysis. :type frame_end: int :param plot_per_page: number of subplots to display on each page (default=2) :type plot_per_page: int :param temperature_list: list of temperatures corresponding to file_list. If None, file names will be the plot labels. :type temperature_list: list(Quantity()) :param plotfile: Base filename for saving bond angle distribution pdf plots :type plotfile: str :param particle_mapping: dictionary mapping specific particle types to more general bonded types (for example, 'sc1' --> 'sc') (default=None) :type particle_mapping: dict(str:str) :returns: - angle_hist_data ( dict ) """ # Convert file_list to list if a single string: if type(file_list) == str: # Single file file_list = file_list.split() # Get angle list angle_list = CGModel.get_bond_angle_list(cgmodel) # Assign angle types: ang_types, ang_array, ang_sub_arrays, n_i, i_angle_type, ang_dict, inv_ang_dict = \ assign_angle_types(cgmodel, angle_list, particle_mapping=particle_mapping) # Create dictionary for saving angle histogram data: angle_hist_data = {} # Set bin edges: angle_bin_edges = np.linspace(0,180,nbins+1) angle_bin_centers = np.zeros((len(angle_bin_edges)-1,1)) for i in range(len(angle_bin_edges)-1): angle_bin_centers[i] = (angle_bin_edges[i]+angle_bin_edges[i+1])/2 file_index = 0 for file in file_list: # Load in a trajectory file: if file[-3:] == 'dcd': traj = md.load(file,top=md.Topology.from_openmm(cgmodel.topology)) else: traj = md.load(file) # Select frames for analysis: if frame_end == -1: frame_end = traj.n_frames traj = traj[frame_start:frame_end:frame_stride] nframes = traj.n_frames # Create inner dictionary for current file: if temperature_list is not None: file_key = f"{temperature_list[file_index].value_in_unit(unit.kelvin):.2f}" else: file_key = file[:-4] angle_hist_data[file_key] = {} for i in range(i_angle_type): # Compute all angle values in trajectory # This returns an [nframes x n_angles] array ang_val_array = md.compute_angles(traj,ang_sub_arrays[str(i+1)]) # Reshape arrays and convert to degrees: ang_val_array = (180/np.pi)*np.reshape(ang_val_array, (nframes*n_i[i][0],1)) # Histogram and plot results: n_out, bin_edges_out = np.histogram( ang_val_array, bins=angle_bin_edges,density=True) angle_hist_data[file_key][f"{inv_ang_dict[str(i+1)]}_density"]=n_out angle_hist_data[file_key][f"{inv_ang_dict[str(i+1)]}_bin_centers"]=angle_bin_centers file_index += 1 plot_distribution( inv_ang_dict, angle_hist_data, xlabel="Bond angle (degrees)", ylabel="Probability density", xlim=[0,180], figure_title="Angle distributions", file_name=f"{plotfile}", plot_per_page=plot_per_page ) return angle_hist_data
[docs]def calc_torsion_distribution( cgmodel, file_list, nbins=180, frame_start=0, frame_stride=1, frame_end=-1, plot_per_page=2, temperature_list=None, plotfile="torsion_hist.pdf", particle_mapping=None, ): """ Calculate and plot all torsion distributions from a CGModel object and pdb or dcd trajectory :param cgmodel: CGModel() object :type cgmodel: class :param file_list: path to pdb or dcd trajectory file(s) :type file_list: str or list(str) :param nbins: number of bins spanning the range of -180 to 180 degrees, default = 180 :type nbins: int :param frame_start: First frame in trajectory file to use for analysis. :type frame_start: int :param frame_stride: Advance by this many frames when reading trajectories. :type frame_stride: int :param frame_end: Last frame in trajectory file to use for analysis. :type frame_end: int :param plot_per_page: number of subplots to display on each page (default=2) :type plot_per_page: int :param temperature_list: list of temperatures corresponding to file_list. If None, file names will be the plot labels. :type temperature_list: list(Quantity()) :param plotfile: Base filename for saving torsion distribution pdf plots :type plotfile: str :param particle_mapping: dictionary mapping specific particle types to more general bonded types (for example, 'sc1' --> 'sc') (default=None) :type particle_mapping: dict(str:str) :returns: - torsion_hist_data ( dict ) """ # Convert file_list to list if a single string: if type(file_list) == str: # Single file file_list = file_list.split() # Get torsion list torsion_list = CGModel.get_torsion_list(cgmodel) # Assign torsion types torsion_types, torsion_array, torsion_sub_arrays, n_i, i_torsion_type, torsion_dict, inv_torsion_dict = \ assign_torsion_types(cgmodel, torsion_list, particle_mapping=particle_mapping) # Create dictionary for saving torsion histogram data: torsion_hist_data = {} # Set bin edges: torsion_bin_edges = np.linspace(-180,180,nbins+1) torsion_bin_centers = np.zeros((len(torsion_bin_edges)-1,1)) for i in range(len(torsion_bin_edges)-1): torsion_bin_centers[i] = (torsion_bin_edges[i]+torsion_bin_edges[i+1])/2 file_index = 0 for file in file_list: # Load in a trajectory file: if file[-3:] == 'dcd': traj = md.load(file,top=md.Topology.from_openmm(cgmodel.topology)) else: traj = md.load(file) # Select frames for analysis: if frame_end == -1: frame_end = traj.n_frames traj = traj[frame_start:frame_end:frame_stride] nframes = traj.n_frames # Create inner dictionary for current file: if temperature_list is not None: file_key = f"{temperature_list[file_index].value_in_unit(unit.kelvin):.2f}" else: file_key = file[:-4] torsion_hist_data[file_key] = {} for i in range(i_torsion_type): # Compute all torsion values in trajectory # This returns an [nframes x n_torsions] array torsion_val_array = md.compute_dihedrals( traj,torsion_sub_arrays[str(i+1)]) # Reshape arrays and convert to degrees: torsion_val_array = (180/np.pi)*np.reshape(torsion_val_array, (nframes*n_i[i][0],1)) # Histogram and plot results: n_out, bin_edges_out = np.histogram( torsion_val_array, bins=torsion_bin_edges,density=True) torsion_hist_data[file_key][f"{inv_torsion_dict[str(i+1)]}_density"]=n_out torsion_hist_data[file_key][f"{inv_torsion_dict[str(i+1)]}_bin_centers"]=torsion_bin_centers file_index += 1 plot_distribution( inv_torsion_dict, torsion_hist_data, xlabel="Torsion angle (degrees)", ylabel="Probability density", xlim=[-180,180], figure_title="Torsion_distributions", file_name=f"{plotfile}", plot_per_page=plot_per_page, ) return torsion_hist_data
[docs]def calc_2d_distribution( cgmodel, file_list, nbin_xvar=180, nbin_yvar=180, frame_start=0, frame_stride=1, frame_end=-1, plotfile="2d_hist.pdf", xvar_name = "bb_bb_bb", yvar_name = "bb_bb_bb_bb", colormap="nipy_spectral", temperature_list=None, particle_mapping=None, ): """ Calculate and plot 2d histogram for any 2 bonded variables, given a CGModel object and pdb or dcd trajectory. :param cgmodel: CGModel() object :type cgmodel: class :param file_list: path to pdb or dcd trajectory file(s) - can be a list or single string :type file_list: str or list(str) :param nbin_xvar: number of bins for x bonded variable :type nbin_xvar: int :param nbin_yvar: number of bins for y bonded variable :type nbin_yvar: :param frame_start: First frame in trajectory file to use for analysis. :type frame_start: int :param frame_stride: Advance by this many frames when reading trajectories. :type frame_stride: int :param frame_end: Last frame in trajectory file to use for analysis. :type frame_end: int :param plotfile: Filename for saving torsion distribution pdf plots :type plotfile: str :param xvar_name: particle sequence of the x bonded parameter, after any particle type mapping (default="bb_bb_bb") :type xvar_name: str :param yvar_name: particle sequence of the y bonded parameter, after any particle type mapping (default="bb_bb_bb_bb") :type yvar_name: str :param colormap: matplotlib pyplot colormap to use (default='nipy_spectral') :type colormap: str (case sensitive) :param temperature_list: list of temperatures corresponding to file_list. If None, no subplot labels will be used. :type temperature_list: list(Quantity()) :param particle_mapping: dictionary mapping specific particle types to more general bonded types (for example, 'sc1' --> 'sc') (default=None) :type particle_mapping: dict(str:str) :returns: - hist_data ( dict ) - xedges ( dict ) - yedges ( dict ) """ # Convert file_list to list if a single string: if type(file_list) == str: # Single file file_list = file_list.split() # Store angle, torsion values by filename for computing global colormap xvar_val_array = {} yvar_val_array = {} # Store the reverse name of the bonded type (need to check both) # x variable particle_list = [] particle = "" for c in xvar_name: if c == '_': particle_list.append(particle) particle = "" else: particle += c particle_list.append(particle) particle_list_reverse = particle_list[::-1] xvar_name_reverse = "" for par in particle_list_reverse: xvar_name_reverse += par xvar_name_reverse += "_" xvar_name_reverse = xvar_name_reverse[:-1] # y variable particle_list = [] particle = "" for c in yvar_name: if c == '_': particle_list.append(particle) particle = "" else: particle += c particle_list.append(particle) particle_list_reverse = particle_list[::-1] yvar_name_reverse = "" for par in particle_list_reverse: yvar_name_reverse += par yvar_name_reverse += "_" yvar_name_reverse = yvar_name_reverse[:-1] for file in file_list: # Load in a trajectory file: if file[-3:] == 'dcd': traj = md.load(file,top=md.Topology.from_openmm(cgmodel.topology)) else: traj = md.load(file) # Select frames for analysis: if frame_end == -1: frame_end = traj.n_frames traj = traj[frame_start:frame_end:frame_stride] nframes = traj.n_frames # x variable # Determine parameter type of xvar: n_particle_x = xvar_name.count('_')+1 if n_particle_x == 2: # Bond # Get bond list bond_list = CGModel.get_bond_list(cgmodel) # Assign bond types: bond_types, bond_array, bond_sub_arrays, n_i, i_bond_type, bond_dict, inv_bond_dict = \ assign_bond_types(cgmodel, bond_list, particle_mapping=particle_mapping) for i in range(i_bond_type): if inv_bond_dict[str(i+1)] == xvar_name or inv_bond_dict[str(i+1)] == xvar_name_reverse: # Compute all bond length values in trajectory # This returns an [nframes x n_bonds] array xvar_val_array[file] = md.compute_distances(traj,bond_sub_arrays[str(i+1)]) # Get equilibrium value: b_eq = cgmodel.get_bond_length(bond_sub_arrays[str(i+1)][0]) # Set bin edges: # This should be the same across all files - use heuristic from equilibrium bond length b_min = 0.5*b_eq.value_in_unit(unit.nanometer) b_max = 1.5*b_eq.value_in_unit(unit.nanometer) xvar_bin_edges = np.linspace(b_min,b_max,nbin_xvar+1) xvar_bin_centers = np.zeros((len(xvar_bin_edges)-1,1)) for i in range(len(xvar_bin_edges)-1): xvar_bin_centers[i] = (xvar_bin_edges[i]+xvar_bin_edges[i+1])/2 xlabel = f'{xvar_name} distance ({unit.nanometer})' elif n_particle_x == 3: # Angle # Get angle list angle_list = CGModel.get_bond_angle_list(cgmodel) # Assign angle types: ang_types, ang_array, ang_sub_arrays, n_i, i_angle_type, ang_dict, inv_ang_dict = \ assign_angle_types(cgmodel, angle_list, particle_mapping=particle_mapping) # Set bin edges: xvar_bin_edges = np.linspace(0,180,nbin_xvar+1) xvar_bin_centers = np.zeros((len(xvar_bin_edges)-1,1)) for i in range(len(xvar_bin_edges)-1): xvar_bin_centers[i] = (xvar_bin_edges[i]+xvar_bin_edges[i+1])/2 for i in range(i_angle_type): if inv_ang_dict[str(i+1)] == xvar_name or inv_ang_dict[str(i+1)] == xvar_name_reverse: # Compute all angle values in trajectory # This returns an [nframes x n_angles] array xvar_val_array[file] = md.compute_angles(traj,ang_sub_arrays[str(i+1)]) # Convert to degrees: xvar_val_array[file] *= (180/np.pi) xlabel = f'{xvar_name} angle (degrees)' elif n_particle_x == 4: # Torsion # Get torsion list torsion_list = CGModel.get_torsion_list(cgmodel) # Assign torsion types torsion_types, torsion_array, torsion_sub_arrays, n_j, i_torsion_type, torsion_dict, inv_torsion_dict = \ assign_torsion_types(cgmodel, torsion_list, particle_mapping=particle_mapping) # Set bin edges: xvar_bin_edges = np.linspace(-180,180,nbin_xvar+1) xvar_bin_centers = np.zeros((len(xvar_bin_edges)-1,1)) for i in range(len(xvar_bin_edges)-1): xvar_bin_centers[i] = (xvar_bin_edges[i]+xvar_bin_edges[i+1])/2 for i in range(i_torsion_type): if inv_torsion_dict[str(i+1)] == xvar_name or inv_torsion_dict[str(i+1)] == xvar_name_reverse: # Compute all torsion values in trajectory # This returns an [nframes x n_torsions] array xvar_val_array[file] = md.compute_dihedrals( traj,torsion_sub_arrays[str(i+1)]) # Convert to degrees: xvar_val_array[file] *= (180/np.pi) xlabel = f'{xvar_name} angle (degrees)' # y variable # Determine parameter type of yvar: n_particle_y = yvar_name.count('_')+1 if n_particle_y == 2: # Bond # Get bond list bond_list = CGModel.get_bond_list(cgmodel) # Assign bond types: bond_types, bond_array, bond_sub_arrays, n_i, i_bond_type, bond_dict, inv_bond_dict = \ assign_bond_types(cgmodel, bond_list, particle_mapping=particle_mapping) for i in range(i_bond_type): if inv_bond_dict[str(i+1)] == yvar_name or inv_bond_dict[str(i+1)] == yvar_name_reverse: # Compute all bond length values in trajectory # This returns an [nframes x n_bonds] array yvar_val_array[file] = md.compute_distances(traj,bond_sub_arrays[str(i+1)]) # Get equilibrium value: b_eq = cgmodel.get_bond_length(bond_sub_arrays[str(i+1)][0]) # Set bin edges: # This should be the same across all files - use heuristic from equilibrium bond length b_min = 0.5*b_eq.value_in_unit(unit.nanometer) b_max = 1.5*b_eq.value_in_unit(unit.nanometer) yvar_bin_edges = np.linspace(b_min,b_max,nbin_yvar+1) yvar_bin_centers = np.zeros((len(yvar_bin_edges)-1,1)) for i in range(len(yvar_bin_edges)-1): yvar_bin_centers[i] = (yvar_bin_edges[i]+yvar_bin_edges[i+1])/2 ylabel = f'{yvar_name} distance ({unit.nanometer})' elif n_particle_y == 3: # Angle # Get angle list angle_list = CGModel.get_bond_angle_list(cgmodel) # Assign angle types: ang_types, ang_array, ang_sub_arrays, n_i, i_angle_type, ang_dict, inv_ang_dict = \ assign_angle_types(cgmodel, angle_list, particle_mapping=particle_mapping) # Set bin edges: yvar_bin_edges = np.linspace(0,180,nbin_yvar+1) yvar_bin_centers = np.zeros((len(yvar_bin_edges)-1,1)) for i in range(len(yvar_bin_edges)-1): yvar_bin_centers[i] = (yvar_bin_edges[i]+yvar_bin_edges[i+1])/2 for i in range(i_angle_type): if inv_ang_dict[str(i+1)] == yvar_name or inv_ang_dict[str(i+1)] == yvar_name_reverse: # Compute all angle values in trajectory # This returns an [nframes x n_angles] array yvar_val_array[file] = md.compute_angles(traj,ang_sub_arrays[str(i+1)]) # Convert to degrees: yvar_val_array[file] *= (180/np.pi) ylabel = f'{yvar_name} angle (degrees)' elif n_particle_y == 4: # Torsion # Get torsion list torsion_list = CGModel.get_torsion_list(cgmodel) # Assign torsion types torsion_types, torsion_array, torsion_sub_arrays, n_j, i_torsion_type, torsion_dict, inv_torsion_dict = \ assign_torsion_types(cgmodel, torsion_list, particle_mapping=particle_mapping) # Set bin edges: yvar_bin_edges = np.linspace(-180,180,nbin_yvar+1) yvar_bin_centers = np.zeros((len(yvar_bin_edges)-1,1)) for i in range(len(yvar_bin_edges)-1): yvar_bin_centers[i] = (yvar_bin_edges[i]+yvar_bin_edges[i+1])/2 for i in range(i_torsion_type): if inv_torsion_dict[str(i+1)] == yvar_name or inv_torsion_dict[str(i+1)] == yvar_name_reverse: # Compute all torsion values in trajectory # This returns an [nframes x n_torsions] array yvar_val_array[file] = md.compute_dihedrals( traj,torsion_sub_arrays[str(i+1)]) # Convert to degrees: yvar_val_array[file] *= (180/np.pi) ylabel = f'{yvar_name} angle (degrees)' # Since the bonded variables may have different numbers of observables, we can use all # combinations of the 2 parameter observables to create the histograms. xvar_val_array_combo = {} yvar_val_array_combo = {} # Each array of single observables is [n_frames x n_occurances] # x value arrays should be [xval0_y0, xval1_y0, ...xvaln_y0, ... xval0_yn, xval1_yn, xvaln_yn] # y value arrays should be [yval0_x0, yval0_x1, ...yval0_xn, ... yvaln_x0, yvaln_x1, yvaln_xn] for file in file_list: n_occ_x = xvar_val_array[file].shape[1] n_occ_y = yvar_val_array[file].shape[1] xvar_val_array_combo[file] = np.zeros((nframes,n_occ_x*n_occ_y)) yvar_val_array_combo[file] = np.zeros_like(xvar_val_array_combo[file]) for iy in range(n_occ_y): xvar_val_array_combo[file][:,(iy*n_occ_x):((iy+1)*n_occ_x)] = xvar_val_array[file] for ix in range(n_occ_x): yvar_val_array_combo[file][:,ix+iy*n_occ_x] = yvar_val_array[file][:,iy] # Reshape arrays for histogramming: xvar_val_array_combo[file] = np.reshape(xvar_val_array_combo[file], (nframes*n_occ_x*n_occ_y,1)) yvar_val_array_combo[file] = np.reshape(yvar_val_array_combo[file], (nframes*n_occ_x*n_occ_y,1)) # 2d histogram the data and plot: hist_data, xedges, yedges = plot_2d_distribution( file_list, xvar_val_array_combo, yvar_val_array_combo, xvar_bin_edges, yvar_bin_edges, plotfile, colormap, xlabel, ylabel, temperature_list=temperature_list) return hist_data, xedges, yedges
[docs]def calc_ramachandran( cgmodel, file_list, nbin_theta=180, nbin_alpha=180, frame_start=0, frame_stride=1, frame_end=-1, plotfile="ramachandran.pdf", backbone_angle_type="bb_bb_bb", backbone_torsion_type="bb_bb_bb_bb", colormap="nipy_spectral", temperature_list=None, particle_mapping=None, ): """ Calculate and plot ramachandran plot for backbone bond bending-angle and torsion angle, given a CGModel object and pdb or dcd trajectory. :param cgmodel: CGModel() object :type cgmodel: class :param file_list: path to pdb or dcd trajectory file(s) - can be a list or single string :type file_list: str or list(str) :param nbin_theta: number of bins for bond-bending angle (spanning from 0 to 180 degrees) :type nbin_theta: int :param nbin_alpha: number of bins for torsion angle (spanning from -180 to +180 degrees) :type nbin_alpha: :param frame_start: First frame in trajectory file to use for analysis. :type frame_start: int :param frame_stride: Advance by this many frames when reading trajectories. :type frame_stride: int :param frame_end: Last frame in trajectory file to use for analysis. :type frame_end: int :param plotfile: Filename for saving torsion distribution pdf plots :type plotfile: str :param backbone_angle_type: particle sequence of the backbone angles, after any particle type mapping (default="bb_bb_bb") :type backbone_angle_type: str :param backbone_torsion_type: particle sequence of the backbone angles, after any particle type mapping (default="bb_bb_bb_bb") :type backbone_torsion_type: str :param colormap: matplotlib pyplot colormap to use (default='nipy_spectral') :type colormap: str (case sensitive) :param temperature_list: list of temperatures corresponding to file_list. If None, no subplot labels will be used. :type temperature_list: list(Quantity()) :param particle_mapping: dictionary mapping specific particle types to more general bonded types (for example, 'sc1' --> 'sc') (default=None) :type particle_mapping: dict(str:str) :returns: - hist_data ( dict ) - xedges ( dict ) - yedges ( dict ) """ # Convert file_list to list if a single string: if type(file_list) == str: # Single file file_list = file_list.split() # Store angle, torsion values by filename for computing global colormap ang_val_array = {} torsion_val_array = {} for file in file_list: # Load in a trajectory file: if file[-3:] == 'dcd': traj = md.load(file,top=md.Topology.from_openmm(cgmodel.topology)) else: traj = md.load(file) # Select frames for analysis: if frame_end == -1: frame_end = traj.n_frames traj = traj[frame_start:frame_end:frame_stride] nframes = traj.n_frames # Get angle list angle_list = CGModel.get_bond_angle_list(cgmodel) # Assign angle types: ang_types, ang_array, ang_sub_arrays, n_i, i_angle_type, ang_dict, inv_ang_dict = \ assign_angle_types(cgmodel, angle_list, particle_mapping=particle_mapping) # Set bin edges: angle_bin_edges = np.linspace(0,180,nbin_theta+1) angle_bin_centers = np.zeros((len(angle_bin_edges)-1,1)) for i in range(len(angle_bin_edges)-1): angle_bin_centers[i] = (angle_bin_edges[i]+angle_bin_edges[i+1])/2 for i in range(i_angle_type): if inv_ang_dict[str(i+1)] == backbone_angle_type: # Compute all angle values in trajectory # This returns an [nframes x n_angles] array ang_val_array[file] = md.compute_angles(traj,ang_sub_arrays[str(i+1)]) # We will have different numbers of bond-bending angle and torsion angle. # We will set a convention of omitting the last angle value. # Convert to degrees and exclude last angle: ang_val_array[file] = (180/np.pi)*ang_val_array[file][:,:-1] # Reshape array: ang_val_array[file] = np.reshape(ang_val_array[file], (nframes*(n_i[i]-1)[0],1)) # Get torsion list torsion_list = CGModel.get_torsion_list(cgmodel) # Assign torsion types torsion_types, torsion_array, torsion_sub_arrays, n_j, i_torsion_type, torsion_dict, inv_torsion_dict = \ assign_torsion_types(cgmodel, torsion_list, particle_mapping=particle_mapping) # Set bin edges: torsion_bin_edges = np.linspace(-180,180,nbin_alpha+1) torsion_bin_centers = np.zeros((len(torsion_bin_edges)-1,1)) for i in range(len(torsion_bin_edges)-1): torsion_bin_centers[i] = (torsion_bin_edges[i]+torsion_bin_edges[i+1])/2 for i in range(i_torsion_type): if inv_torsion_dict[str(i+1)] == backbone_torsion_type: # Compute all torsion values in trajectory # This returns an [nframes x n_torsions] array torsion_val_array[file] = md.compute_dihedrals( traj,torsion_sub_arrays[str(i+1)]) # Convert to degrees: torsion_val_array[file] *= (180/np.pi) # Reshape array torsion_val_array[file] = np.reshape(torsion_val_array[file], (nframes*n_j[i][0],1)) # 2d histogram the data and plot: hist_data, xedges, yedges = plot_2d_distribution( file_list, torsion_val_array, ang_val_array, torsion_bin_edges, angle_bin_edges, plotfile, colormap, xlabel='Alpha (degrees)', ylabel='Theta (degrees)', temperature_list=temperature_list) return hist_data, xedges, yedges
def plot_2d_distribution(file_list, xvar_val_array, yvar_val_array, xvar_bin_edges, yvar_bin_edges, plotfile, colormap, xlabel, ylabel, temperature_list=None): """Internal function for 2d histogramming bonded observable data and creating 2d plots""" # Store 2d histogram output hist_data = {} xedges = {} yedges = {} image = {} # Determine optimal subplot layout nseries = len(file_list) # This favors more rows instead of more columns: # If not a square number of series, an extra row is needed: nrow = int(np.ceil(np.sqrt(nseries)))+2+(nseries%(np.ceil(np.sqrt(nseries)))!=0) ncol = int(np.ceil(nseries/(nrow-1)))+2 # Initialize plot figure = plt.figure( constrained_layout=True, figsize=(11,8.5), ) # Gridspec width and height ratios: # Add extra space on sides for labels / colorbar widths = np.ones(ncol) widths[0]=0.1 widths[-1]=0.01 heights = np.ones(nrow) heights[0] = 0.1 heights[-1] = 0.1 fig_specs = figure.add_gridspec( ncols=ncol, nrows=nrow, width_ratios=widths, height_ratios=heights ) subplot_id = 1 for file in file_list: # Accounting for blank gridspec at the borders: row = int(np.ceil(subplot_id/(ncol-2))) col = 1+int((subplot_id-1)%(ncol-2)) # axs subplot object is only subscriptable in dimensions it has multiple entries in if nrow > 1 and ncol > 1: ax = figure.add_subplot(fig_specs[row,col]) hist_data_out, xedges_out, yedges_out, image_out = ax.hist2d( xvar_val_array[file][:,0], yvar_val_array[file][:,0], bins=[xvar_bin_edges,yvar_bin_edges], density=True, ) if ncol > 1 and nrow == 1: ax = figure.add_subplot(fig_specs[row,col]) hist_data_out, xedges_out, yedges_out, image_out = ax.hist2d( xvar_val_array[file][:,0], yvar_val_array[file][:,0], bins=[xvar_bin_edges,yvar_bin_edges], density=True, ) if ncol == 1 and nrow == 1: ax = figure.add_subplot(fig_specs[row,col]) hist_data_out, xedges_out, yedges_out, image_out = ax.hist2d( xvar_val_array[file][:,0], yvar_val_array[file][:,0], bins=[xvar_bin_edges,yvar_bin_edges], density=True, ) hist_data[file[:-4]] = hist_data_out xedges[file[:-4]] = xedges_out yedges[file[:-4]] = yedges_out image[file[:-4]] = image_out #axs[row,col].set_title(file) subplot_id += 1 plt.close() # Renormalize data to global maximum: max_global = 0 for key, val in hist_data.items(): max_i = np.amax(val) if max_i > max_global: max_global = max_i # Re-initialize plot figure = plt.figure( constrained_layout=True, figsize=(11,8.5), ) fig_specs = figure.add_gridspec( ncols=ncol, nrows=nrow, width_ratios=widths, height_ratios=heights ) # Update the colormap normalization: cmap=plt.get_cmap(colormap) norm=matplotlib.colors.Normalize(vmin=0,vmax=max_global) subplot_id = 1 for file in file_list: # Renormalize quadmesh images # There should be a way to do this using QuadMesh.set_norm. # For now just replot the data: # Accounting for blank gridspec at the borders: row = int(np.ceil(subplot_id/(ncol-2))) col = 1+int((subplot_id-1)%(ncol-2)) if nrow > 1 and ncol > 1: ax = figure.add_subplot(fig_specs[row,col]) hist_data_out, xedges_out, yedges_out, image_out = ax.hist2d( xvar_val_array[file][:,0], yvar_val_array[file][:,0], bins=[xvar_bin_edges,yvar_bin_edges], density=True, cmap=cmap, norm=norm, ) if temperature_list is not None: xlim = ax.get_xlim() ylim = ax.get_ylim() ax.text( (xlim[0]+0.1*(xlim[1]-xlim[0])), (ylim[0]+0.1*(ylim[1]-ylim[0])), f'{temperature_list[subplot_id-1].value_in_unit(unit.kelvin):<.2f} K', {'fontsize': 10,'color': 'w'}, ) if ncol > 1 and nrow == 1: ax = figure.add_subplot(fig_specs[row,col]) hist_data_out, xedges_out, yedges_out, image_out = ax.hist2d( xvar_val_array[file][:,0], yvar_val_array[file][:,0], bins=[xvar_bin_edges,yvar_bin_edges], density=True, cmap=cmap, norm=norm, ) if temperature_list is not None: xlim = ax.get_xlim() ylim = ax.get_ylim() ax.text( (xlim[0]+0.1*(xlim[1]-xlim[0])), (ylim[0]+0.1*(ylim[1]-ylim[0])), f'{temperature_list[subplot_id-1].value_in_unit(unit.kelvin):<.2f} K', {'fontsize': 10,'color': 'w'}, ) if ncol == 1 and nrow == 1: ax = figure.add_subplot(fig_specs[row,col]) hist_data_out, xedges_out, yedges_out, image_out = ax.hist2d( xvar_val_array[file][:,0], yvar_val_array[file][:,0], bins=[xvar_bin_edges,yvar_bin_edges], density=True, cmap=cmap, norm=norm, ) if temperature_list is not None: xlim = ax.get_xlim() ylim = ax.get_ylim() ax.text( (xlim[0]+0.1*(xlim[1]-xlim[0])), (ylim[0]+0.1*(ylim[1]-ylim[0])), f'{temperature_list[subplot_id-1].value_in_unit(unit.kelvin):<.2f} K', {'fontsize': 10,'color': 'w'}, ) hist_data[file[:-4]] = hist_data_out xedges[file[:-4]] = xedges_out yedges[file[:-4]] = yedges_out image[file[:-4]] = image_out subplot_id += 1 # Add common axis labels to border gridspec axes: ax_west = figure.add_subplot(fig_specs[:,0], frameon=False) ax_east = figure.add_subplot(fig_specs[:,-1], frameon=False) ax_south = figure.add_subplot(fig_specs[-1,1:-1], frameon=False) ax_north = figure.add_subplot(fig_specs[0,1:-1], frameon=False) # Add colorbar to right side: plt.colorbar( matplotlib.cm.ScalarMappable(norm=norm, cmap=cmap), ax=ax_east, shrink=0.5, fraction=1.0, pad=0, label='probability density') ax_south.set_xlabel(xlabel, fontsize=14) ax_south.xaxis.set_label_coords(0.5,0.5) ax_west.set_ylabel(ylabel, fontsize=14) ax_west.yaxis.set_label_coords(0.5,0.5) ax_east.tick_params(labelcolor='none', top=False, bottom=False, left=False, right=False) ax_west.tick_params(labelcolor='none', top=False, bottom=False, left=False, right=False) ax_south.tick_params(labelcolor='none', top=False, bottom=False, left=False, right=False) ax_north.tick_params(labelcolor='none', top=False, bottom=False, left=False, right=False) plt.savefig(plotfile) plt.close() return hist_data, xedges, yedges
[docs]def fit_ramachandran_data(hist_data, xedges, yedges): """ Calculate and plot ramachandran plot for backbone bond bending-angle and torsion angle, given a CGModel object and pdb or dcd trajectory. :param hist_data: dictionary containing 2D histogram data for one or more data series :type hist_data: dict {series_name: 2D numpy array} :param xedges: dictionary containing bin edges corresponding to x dimension of hist_data :type xedges: dict {series_name: 1D numpy array} :param yedges: dictionary containing bin edges corresponding to y dimension of hist_data :type yedges: dict {series_name: 1D numpy array} """ # Fit to 2d symmetric gaussian: def two_gauss(xy,a,x0,y0,c): x, y = xy return a*np.exp(-((np.power((x-x0),2) + 0.5*np.power((y-y0),2))/(2*np.power(c,2)))) param_opt = {} param_cov = {} for key,value in hist_data.items(): z_data = value max_xy_index = np.unravel_index(np.argmax(z_data, axis=None), z_data.shape) max_x = (xedges[key][max_xy_index[0]]+xedges[key][max_xy_index[0]+1])/2 max_y = (yedges[key][max_xy_index[1]]+yedges[key][max_xy_index[1]+1])/2 param_guess = [0.01,max_x,max_y,5] x_centers = xedges[key][:-1] + (xedges[key][1]-xedges[key][0])/2 y_centers = yedges[key][:-1] + (yedges[key][1]-yedges[key][0])/2 x_data, y_data = np.meshgrid(x_centers, y_centers) # Ravel x,y data to a pair of 1D arrays xy_data = np.vstack((x_data.ravel(), y_data.ravel())) param_opt[key], param_cov[key] = curve_fit(two_gauss, xy_data, z_data.ravel(), param_guess) return param_opt, param_cov