Source code for analyze_foldamers.parameters.bond_distributions

import os

import matplotlib
import matplotlib.gridspec as gridspec
import matplotlib.pyplot as plt
import mdtraj as md
import numpy as np
from analyze_foldamers.utilities.plot import plot_distribution
from cg_openmm.cg_model.cgmodel import CGModel
from matplotlib.backends.backend_pdf import PdfPages
from openmm import unit


def assign_bond_types(cgmodel, bond_list, particle_mapping=None):
    """Internal function for assigning bond types"""
    
    bond_types = []
    
    bond_array = np.zeros((len(bond_list),2))
    
    # Relevant bond types are added to a dictionary as they are discovered
    bond_dict = {}
    
    # Create an inverse dictionary for getting bond string name from integer type
    inv_bond_dict = {}
    
    # Counter for number of bond types found:
    i_bond_type = 0
    
    # Assign bond types:
    
    for i in range(len(bond_list)):
        bond_array[i,0] = bond_list[i][0]
        bond_array[i,1] = bond_list[i][1]
        
        if particle_mapping is None:
            particle_types = [
                CGModel.get_particle_type_name(cgmodel,bond_list[i][0]),
                CGModel.get_particle_type_name(cgmodel,bond_list[i][1])
            ]
        else:
            # Use particle_mapping dictionary to create more general bonded types:
            particle_types = [
                particle_mapping[CGModel.get_particle_type_name(cgmodel,bond_list[i][0])],
                particle_mapping[CGModel.get_particle_type_name(cgmodel,bond_list[i][1])]
            ]
        
        string_name = ""
        reverse_string_name = ""
        for particle in particle_types:
            string_name += f"{particle}_"
        string_name = string_name[:-1]
        for particle in reversed(particle_types):
            reverse_string_name += f"{particle}_"
        reverse_string_name = reverse_string_name[:-1]
            
        if (string_name in bond_dict.keys()) == False:
            # New bond type found, add to bond dictionary
            i_bond_type += 1
            bond_dict[string_name] = i_bond_type
            bond_dict[reverse_string_name] = i_bond_type
            # For inverse dict we will use only the forward name based on first encounter
            inv_bond_dict[str(i_bond_type)] = string_name
            # print(f"adding new bond type {i_bond_type}: {string_name} to dictionary")
            # print(f"adding reverse version {i_bond_type}: {reverse_string_name} to dictionary")
            
        bond_types.append(bond_dict[string_name])

    # Sort bonds by type into separate sub arrays for mdtraj compute_distances
    bond_sub_arrays = {}
    for i in range(i_bond_type):
        bond_sub_arrays[str(i+1)] = np.zeros((bond_types.count(i+1),2))
    
    # Counter vector for all bond types
    n_i = np.zeros((i_bond_type,1), dtype=int)
    
    for i in range(len(bond_list)):
        bond_sub_arrays[str(bond_types[i])][n_i[bond_types[i]-1],:] = bond_array[i,:]
        n_i[bond_types[i]-1] += 1
        
    return bond_types, bond_array, bond_sub_arrays, n_i, i_bond_type, bond_dict, inv_bond_dict        
    
    
[docs]def calc_bond_length_distribution( cgmodel, file_list, nbins=90, frame_start=0, frame_stride=1, frame_end=-1, plot_per_page=2, temperature_list=None, plotfile="bond_hist.pdf", particle_mapping=None, ): """ Calculate and plot all bond length distributions from a CGModel object and trajectory :param cgmodel: CGModel() object :type cgmodel: class :param file_list: path to pdb or dcd trajectory file(s) :type file_list: str or list(str) :param nbins: number of histogram bins :type nbins: int :param frame_start: First frame in trajectory file to use for analysis. :type frame_start: int :param frame_stride: Advance by this many frames when reading trajectories. :type frame_stride: int :param frame_end: Last frame in trajectory file to use for analysis. :type frame_end: int :param plot_per_page: number of subplots to display on each page (default=2) :type plot_per_page: int :param temperature_list: list of temperatures corresponding to file_list. If None, file names will be the plot labels. :type temperature_list: list(Quantity()) :param plotfile: filename for saving bond length distribution pdf plots :type plotfile: str :param particle_mapping: dictionary mapping specific particle types to more general bonded types (for example, 'sc1' --> 'sc') (default=None) :type particle_mapping: dict(str:str) :returns: - bond_hist_data ( dict ) """ # Convert file_list to list if a single string: if type(file_list) == str: # Single file file_list = file_list.split() # Create dictionary for saving bond histogram data: bond_hist_data = {} # Get bond list bond_list = CGModel.get_bond_list(cgmodel) # Assign bond types: bond_types, bond_array, bond_sub_arrays, n_i, i_bond_type, bond_dict, inv_bond_dict = \ assign_bond_types(cgmodel, bond_list, particle_mapping=particle_mapping) file_index = 0 for file in file_list: # Load in a trajectory file: if file[-3:] == 'dcd': traj = md.load(file,top=md.Topology.from_openmm(cgmodel.topology)) else: traj = md.load(file) # Select frames for analysis: if frame_end == -1: frame_end = traj.n_frames traj = traj[frame_start:frame_end:frame_stride] nframes = traj.n_frames # Create inner dictionary for current file: if temperature_list is not None: file_key = f"{temperature_list[file_index].value_in_unit(unit.kelvin):.2f}" else: file_key = file[:-4] bond_hist_data[file_key] = {} for i in range(i_bond_type): # Compute all bond distances in trajectory # This returns an [nframes x n_bonds] array bond_val_array = md.compute_distances(traj,bond_sub_arrays[str(i+1)]) # Reshape arrays: bond_val_array = np.reshape(bond_val_array, (nframes*n_i[i][0],1)) # Histogram and plot results: n_out, bin_edges_out = np.histogram( bond_val_array, bins=nbins, density=True) bond_bin_centers = np.zeros((len(bin_edges_out)-1,1)) for j in range(len(bin_edges_out)-1): bond_bin_centers[j] = (bin_edges_out[j]+bin_edges_out[j+1])/2 bond_hist_data[file_key][f"{inv_bond_dict[str(i+1)]}_density"]=n_out bond_hist_data[file_key][f"{inv_bond_dict[str(i+1)]}_bin_centers"]=bond_bin_centers file_index += 1 plot_distribution( inv_bond_dict, bond_hist_data, xlabel="Bond length (nm)", ylabel="Probability density", figure_title="Bond distributions", file_name=f"{plotfile}", plot_per_page=plot_per_page, ) return bond_hist_data