Source code for analyze_foldamers.parameters.bond_distributions

import os

import matplotlib
import matplotlib.gridspec as gridspec
import matplotlib.pyplot as plt
import mdtraj as md
import numpy as np
from analyze_foldamers.utilities.plot import plot_distribution
from cg_openmm.cg_model.cgmodel import CGModel
from matplotlib.backends.backend_pdf import PdfPages
from openmm import unit


def assign_bond_types(cgmodel, bond_list, particle_mapping=None):
    """Internal function for assigning bond types"""
    
    bond_types = []
    
    bond_array = np.zeros((len(bond_list),2))
    
    # Relevant bond types are added to a dictionary as they are discovered
    bond_dict = {}
    
    # Create an inverse dictionary for getting bond string name from integer type
    inv_bond_dict = {}
    
    # Counter for number of bond types found:
    i_bond_type = 0
    
    # Assign bond types:
    
    for i in range(len(bond_list)):
        bond_array[i,0] = bond_list[i][0]
        bond_array[i,1] = bond_list[i][1]
        
        if particle_mapping is None:
            particle_types = [
                CGModel.get_particle_type_name(cgmodel,bond_list[i][0]),
                CGModel.get_particle_type_name(cgmodel,bond_list[i][1])
            ]
        else:
            # Use particle_mapping dictionary to create more general bonded types:
            particle_types = [
                particle_mapping[CGModel.get_particle_type_name(cgmodel,bond_list[i][0])],
                particle_mapping[CGModel.get_particle_type_name(cgmodel,bond_list[i][1])]
            ]
        
        string_name = ""
        reverse_string_name = ""
        for particle in particle_types:
            string_name += f"{particle}_"
        string_name = string_name[:-1]
        for particle in reversed(particle_types):
            reverse_string_name += f"{particle}_"
        reverse_string_name = reverse_string_name[:-1]
            
        if (string_name in bond_dict.keys()) == False:
            # New bond type found, add to bond dictionary
            i_bond_type += 1
            bond_dict[string_name] = i_bond_type
            bond_dict[reverse_string_name] = i_bond_type
            # For inverse dict we will use only the forward name based on first encounter
            inv_bond_dict[str(i_bond_type)] = string_name
            # print(f"adding new bond type {i_bond_type}: {string_name} to dictionary")
            # print(f"adding reverse version {i_bond_type}: {reverse_string_name} to dictionary")
            
        bond_types.append(bond_dict[string_name])

    # Sort bonds by type into separate sub arrays for mdtraj compute_distances
    bond_sub_arrays = {}
    for i in range(i_bond_type):
        bond_sub_arrays[str(i+1)] = np.zeros((bond_types.count(i+1),2))
    
    # Counter vector for all bond types
    n_i = np.zeros((i_bond_type,1), dtype=int)
    
    for i in range(len(bond_list)):
        bond_sub_arrays[str(bond_types[i])][n_i[bond_types[i]-1],:] = bond_array[i,:]
        n_i[bond_types[i]-1] += 1
        
    return bond_types, bond_array, bond_sub_arrays, n_i, i_bond_type, bond_dict, inv_bond_dict        
    
    
[docs]def calc_bond_length_distribution(
    cgmodel, file_list, nbins=90, frame_start=0, frame_stride=1, frame_end=-1,
    plot_per_page=2, temperature_list=None, plotfile="bond_hist.pdf",
    particle_mapping=None,
    ):
    """
    Calculate and plot all bond length distributions from a CGModel object and trajectory

    :param cgmodel: CGModel() object
    :type cgmodel: class
    
    :param file_list: path to pdb or dcd trajectory file(s)
    :type file_list: str or list(str)
    
    :param nbins: number of histogram bins
    :type nbins: int
    
    :param frame_start: First frame in trajectory file to use for analysis.
    :type frame_start: int

    :param frame_stride: Advance by this many frames when reading trajectories.
    :type frame_stride: int

    :param frame_end: Last frame in trajectory file to use for analysis.
    :type frame_end: int
    
    :param plot_per_page: number of subplots to display on each page (default=2)
    :type plot_per_page: int
    
    :param temperature_list: list of temperatures corresponding to file_list. If None, file names will be the plot labels.
    :type temperature_list: list(Quantity())
    
    :param plotfile: filename for saving bond length distribution pdf plots
    :type plotfile: str
    
    :param particle_mapping: dictionary mapping specific particle types to more general bonded types (for example, 'sc1' --> 'sc') (default=None)
    :type particle_mapping: dict(str:str)
    
    :returns:
       - bond_hist_data ( dict )
    """   
    
    # Convert file_list to list if a single string:
    if type(file_list) == str:
        # Single file
        file_list = file_list.split()    
    
    # Create dictionary for saving bond histogram data:
    bond_hist_data = {}

    # Get bond list
    bond_list = CGModel.get_bond_list(cgmodel)
    
    # Assign bond types:
    bond_types, bond_array, bond_sub_arrays, n_i, i_bond_type, bond_dict, inv_bond_dict = \
        assign_bond_types(cgmodel, bond_list, particle_mapping=particle_mapping)
    
    file_index = 0
    for file in file_list:
        # Load in a trajectory file:
        if file[-3:] == 'dcd':
            traj = md.load(file,top=md.Topology.from_openmm(cgmodel.topology))
        else:
            traj = md.load(file)
            
        # Select frames for analysis:    
        if frame_end == -1:
            frame_end = traj.n_frames

        traj = traj[frame_start:frame_end:frame_stride]   
        
        nframes = traj.n_frames
            
        # Create inner dictionary for current file:
        if temperature_list is not None:
            file_key = f"{temperature_list[file_index].value_in_unit(unit.kelvin):.2f}" 
        else:
            file_key = file[:-4]
            
        bond_hist_data[file_key] = {}
                
        for i in range(i_bond_type):
            # Compute all bond distances in trajectory
            # This returns an [nframes x n_bonds] array
            bond_val_array = md.compute_distances(traj,bond_sub_arrays[str(i+1)])
            
            # Reshape arrays:  
            bond_val_array = np.reshape(bond_val_array, (nframes*n_i[i][0],1))
            
            # Histogram and plot results:
            n_out, bin_edges_out = np.histogram(
                bond_val_array, bins=nbins, density=True)
            
            bond_bin_centers = np.zeros((len(bin_edges_out)-1,1))
            for j in range(len(bin_edges_out)-1):
                bond_bin_centers[j] = (bin_edges_out[j]+bin_edges_out[j+1])/2   
            
            bond_hist_data[file_key][f"{inv_bond_dict[str(i+1)]}_density"]=n_out
            bond_hist_data[file_key][f"{inv_bond_dict[str(i+1)]}_bin_centers"]=bond_bin_centers
        
        file_index += 1
        
    plot_distribution(
        inv_bond_dict,
        bond_hist_data,
        xlabel="Bond length (nm)",
        ylabel="Probability density",
        figure_title="Bond distributions",
        file_name=f"{plotfile}",
        plot_per_page=plot_per_page,
    )
        
    return bond_hist_data