Source code for fretraj.grid

#!/usr/bin/env python3
import numpy as np
import numba as nb
import heapq

_dist_list = np.sqrt(np.array([1, 2, 3, 5, 6]))  # reduction to 74 neighbors


[docs]class Grid3D: """Class object holding a 3D grid Parameters ---------- mol_xyzr : ndarray array of x-,y-,z-coordinates and VdW radii with a shape [n_atoms, 4] attach_xyz : ndarray one-dimensional array of x-,y-,z-coordinates of the attachment point (corresponds to the center of the grid) linker_length : float length of the dye linker in Angstrom linker_width : float diameter of the dye linker in Angstrom dye_radii : ndarray([3,1]) array of dye radii in Angstrom with shape [3,1] grid_spacing : float spacing between grid points (in A) Notes ----- Attributes of the LabelLib class are: - discStep : float (the grid spacing) - originXYZ : numpy.array (x-/y-/z-coordinate of the grid origin) - shape : numpy.array (number of grid points in x-/y-/z-direction) - grid : numpy.array (flattened list of grid point values) """ def __init__(self, mol_xyzr, attach_xyz, linker_length, linker_width, dye_radii, grid_spacing, simulation_type): self.discStep = grid_spacing self.attach_xyz = attach_xyz self.grid_3d, self.originXYZ, self.originAdj = self.make_grid(attach_xyz, linker_length, grid_spacing) self.shape = np.array(self.grid_3d.shape) self.halfCubeLength = min(self.shape) * self.discStep * 0.5 self.grid_3d = self.block_molecule(mol_xyzr, 0.5 * linker_width) maxR = 3 * grid_spacing maxR_source = linker_width + grid_spacing self.grid_3d = self.dijkstra_init(maxR, maxR_source) self.grid_3d = self.setAboveTreshold(self.grid_3d, linker_length, -4) self.grid_3d = self.setAboveTreshold(self.grid_3d, 0, 1) if simulation_type == "AV1": dye_radii = [dye_radii[0]] self.grid_3d = self.excludeConcentricSpheres(mol_xyzr, dye_radii, 1) self.grid = self.grid_3d.flatten(order="F")
[docs] @staticmethod @nb.jit(nopython=True) def make_grid(attach_xyz, linker_length, grid_spacing): """Build a 3D grid around the attachment point Parameters ---------- attach_xyz : ndarray one-dimensional array of x-,y-,z-coordinates of the attachment point (corresponds to the center of the grid) linker_length : float length of the dye linker in Angstrom grid_spacing : float spacing between grid points (in A) Returns ------- tuple of ndarray 3-dimensional array of grid points with a shape of 2*ll_padRound+1, minimum of the grid, origin of the grid """ ll_pad = linker_length + 3 * grid_spacing # pad the linker_length ll_padRound = np.ceil(ll_pad / grid_spacing) * grid_spacing # round to next higher grid value xyz_min = attach_xyz - ll_padRound originAdj = xyz_min - 0.5 * grid_spacing gridptsPerEdge = 2 * int(ll_padRound / grid_spacing + 0.5) # + 1 grid_3d = np.full((gridptsPerEdge, gridptsPerEdge, gridptsPerEdge), np.inf) for i in range(gridptsPerEdge): for j in range(gridptsPerEdge): for k in range(gridptsPerEdge): ijk = np.array([i, j, k]) maxRSq = int(linker_length ** 2 / grid_spacing ** 2 + 0.5) ijk0 = (attach_xyz - xyz_min) / grid_spacing dSq = np.sum((ijk - ijk0) ** 2) if dSq > maxRSq: grid_3d[(i, j, k)] = -1 return grid_3d, xyz_min, originAdj
[docs] @staticmethod def _xyz2idx(xyz, originAdj, grid_spacing, decimals=6): """Get the ijk grid indices for a set of xyz values Parameters ---------- xyz : ndarray xyz coordinates originAdj : ndarray origin of the grid grid_spacing : float spacing between grid points (in A) decimals : int decimal to round to Note ---- Round to n-decimal places (here: 4) before casting to integer """ return np.round(((xyz - originAdj) / grid_spacing), decimals).astype(np.int64)
[docs] def block_molecule(self, mol_xyzr, extraClash): """Block the grid points which are within the VdW radius of any atom of the biomolecule plus an extra clash radius Parameters ---------- mol_xyzr : ndarray array of x-,y-,z-coordinates and VdW radii with a shape [n_atoms, 4] extraClash : float clash radius added to the VdW radius Returns ------- ndarray 3-dimensional array of grid points with a shape of 2*adjL+1 """ maxVdW_extraClash = np.max(mol_xyzr[:, 3]) + extraClash neighbor_list = nb.typed.List() [neighbor_list.append(n) for n in self.sortedNeighborIdx(maxVdW_extraClash)] if not neighbor_list: raise ValueError("Neighbor list is empty") ijk_atom = self._xyz2idx(mol_xyzr[:, 0:3], self.originAdj, self.discStep) outDistSq = (self.halfCubeLength + maxVdW_extraClash) ** 2 distSq = np.sum((mol_xyzr[:, 0:3] - self.attach_xyz) ** 2, 1) grid_3d = self._carve_VdWextraClash( self.grid_3d, mol_xyzr, neighbor_list, ijk_atom, extraClash, distSq, outDistSq, self.shape ) return grid_3d
[docs] @staticmethod @nb.jit(nopython=True) def _carve_VdWextraClash(grid_3d, mol_xyzr, neighbor_list, ijk_atom, extraClash, distSq, outDistSq, grid_shape): """Loop through the atoms and assign -1 to all grid values that are within the atoms VdW radius plus an extraClash value Parameters ---------- grid_3d : ndarray 3-dimensional array of grid points with a shape of 2*adjL+1 mol_xyzr : ndarray array of x-,y-,z-coordinates and VdW radii with a shape [n_atoms, 4] neighbor_list : list of 2-tuples of ndarray and float the tuples contain the ijk indices and the distance from the origin (0,0,0) ijk_atom : array ijk grid indices of the atoms of the biomolecule extraClash : float clash radius added to the VdW radius distSq : float square of the distance between the grid points to the attachment point outDistSq : float square of (halfcubelength + maxVdW_extraClash) grid_shape : array number of grid points in x,y and z coordinates """ for m in range(mol_xyzr.shape[0]): if distSq[m] > outDistSq: continue for n in neighbor_list: if n[0] > mol_xyzr[m, 3] + extraClash: break ijk_n = ijk_atom[m] + n[1] i, j, k = ijk_n if not np.any(ijk_n < 0) and not np.any(grid_shape - ijk_n <= 0): grid_3d[i, j, k] = -1 return grid_3d
[docs] def dijkstra_init(self, maxR, maxR_source): """Initialize the Dijkstra search algorithm Parameters ---------- maxR : float radius within to search for neighbors maxRsource : float radius within to search for neighbors in the first round of the Dijkstra algorithm Returns ------- ndarray 3-dimensional array of grid points with a shape of 2*adjL+1 """ ai, aj, ak = self._xyz2idx(self.attach_xyz, self.originAdj, self.discStep) self.grid_3d[ai, aj, ak] = 0 priority_queue = [(0.0, (ai, aj, ak))] priority_queue = nb.typed.List() [priority_queue.append(x) for x in [(0.0, (ai, aj, ak))]] edges_ess = nb.typed.List() [edges_ess.append(x) for x in self.essential_neighbors(self.sortedNeighborIdx(maxR), _dist_list)] edges_src = nb.typed.List() [edges_src.append(x) for x in self.sortedNeighborIdx(maxR_source)] grid_3d = self.dijkstra(self.grid_3d, edges_ess, edges_src, priority_queue, (ai, aj, ak)) return grid_3d
[docs] def sortedNeighborIdx(self, maxR): """Build a neighbor list with a maximal extent given by maxR and sorted by increasing distance from the origin Parameters ---------- maxR : float radius within to search for neighbors Returns ------- list of 2-tuples of ndarray and float the tuples contain the ijk indices and the distance from the origin (0,0,0) """ idxs = self._neighborIdx(maxR, self.discStep) idxs.sort(key=lambda x: x[0]) return idxs
[docs] @staticmethod def _neighborIdx(maxR, grid_spacing): """Build a list of neighboring indices to the origin (0,0,0) Parameters ---------- maxR : float radius within which to search for neighbors grid_spacing : float spacing between grid points (in A) Returns ------- list of 2-tuples of ndarray and float the tuples contain the ijk indices and the distance from the origin (0,0,0), e.g. [(1.0,array([-1,0,0]))] Notes ----- The neighbor list is built from an origin set at (0,0,0). imaxR defines the number of indices along one axis (i,j or k) to build the index cube. For imaxR=3, first a cube of 7*7*7=343 indices is built (1). This is reduced in a second step to 123 indices that have a distance < imaxR from the origin (2). Finally, they are reduced 74 essential neighbors by the distances in _dist_list (3). (this last reduction speeds up the Dikstra algorithm due to the shorter neighbor list without loosing much accuracy) """ idxs = [] maxRSq = maxR ** 2 imaxR = int(maxR / grid_spacing + 0.5) # rounds to next integer # (1) build a cube with (2*imaxR+1)**3 indices for k in range(-imaxR, imaxR + 1, 1): for j in range(-imaxR, imaxR + 1, 1): for i in range(-imaxR, imaxR + 1, 1): ijk = np.array([i, j, k], dtype=int) # (2) only accept indices with 0 < dS < imaxRSq dSq = np.sum(ijk ** 2) * grid_spacing ** 2 if dSq <= maxRSq: # and dSq > 0: d = np.sqrt(dSq) idxs.append((d, ijk)) return idxs
[docs] def essential_neighbors(self, idxs, dist_list): """Reduce the neighbor list to those indices with a distance from the origin featured in dist_list Parameters ---------- idxs : list of 2-tuples of ndarray and float the tuples contain the ijk indices and the distance from the origin (0,0,0) dist_list : list list of distances for neighbor search Notes ----- If dist_list=sqrt([1, 2, 3, 5, 6]) then the the neighbor list is reduced to 74 neighbors which make up a spherical shape and are a good compromise between neighbor space and time efficiency in the Dijkstra algorithm. The smaller this list the faster the Dijkstra algorithm will run at the expense of the accuracy of the resulting accessible volume (i.e. its "sphericalness") """ idxs_ess = [] for e in idxs: minDiff = min(abs(e[0] - dist_list * self.discStep)) if minDiff < (0.01 * self.discStep): idxs_ess.append(e) return idxs_ess
[docs] @staticmethod @nb.jit(nopython=True) def dijkstra(grid_3d, edges_ess, edges_src, priority_queue, start_ijk): """Djikstra algorithm with a priority queue Parameters ---------- grid_3d : ndarray 3-dimensional array of grid points with a shape given by n_xyz edges_ess : nb.typed.List of 2-tuples of ndarray and float list n essential neighbors from the origin (0,0,0) edges_src : list of 2-tuples of ndarray and float nb.typed.List of neighbors in the initialization round of the Dijkstra algorithm priority_queue : list of 2-tuples of ndarray and float list with distance and index of origin start_ijk : array ijk grid indices of the attachment site Returns ------- ndarray 3-dimensional array of grid points with a shape given by n_xyz """ priority_queue = list(priority_queue) while priority_queue: r, idx = heapq.heappop(priority_queue) if r > grid_3d[idx]: continue if idx == start_ijk: edges = edges_src else: edges = edges_ess for e in edges: i = idx[0] + e[1][0] j = idx[1] + e[1][1] k = idx[2] + e[1][2] val = grid_3d[i, j, k] if val < 0: continue t_r = r + e[0] if t_r < val: grid_3d[i, j, k] = t_r heapq.heappush(priority_queue, (t_r, (i, j, k))) return grid_3d
[docs] @staticmethod @nb.jit(nopython=True) def setAboveTreshold(grid_3d, treshold, new_value): """Reassign grid values which are above a specified treshold Parameters ---------- grid_3d : ndarray 3-dimensional array of grid points with a shape given by n_xyz treshold : float grid values above this treshold are reassigned with new_value new_value : float new grid value to be assigned Returns ------- ndarray 3-dimensional array of grid points with a shape given by n_xyz """ for i in range(grid_3d.shape[0]): for j in range(grid_3d.shape[1]): for k in range(grid_3d.shape[2]): if grid_3d[(i, j, k)] > treshold: grid_3d[(i, j, k)] = new_value return grid_3d
[docs] def excludeConcentricSpheres(self, mol_xyzr, dye_radii, maxRho): """Exclude all grid points which are not compatible with any of the dye radii because of clashing with the VdW surface Parameters ---------- mol_xyzr : ndarray array of x-,y-,z-coordinates and VdW radii with a shape [n_atoms, 4] dye_radii : ndarray([3,1]) array of dye radii in Angstrom with shape [3,1] maxRho : float maximum grid value in the free volume Returns ------- ndarray 3-dimensional array of grid points with a shape given by n_xyz """ maxVdW_extraClash = np.max(mol_xyzr[:, 3]) + np.max(dye_radii) neighbor_list = nb.typed.List() [neighbor_list.append(n) for n in self.sortedNeighborIdx(maxVdW_extraClash)] dye_radii_sorted = nb.typed.List() [dye_radii_sorted.append(x) for x in sorted(dye_radii)] rhos = np.linspace(0, maxRho, len(dye_radii) + 1) ijk_atom = self._xyz2idx(mol_xyzr[:, 0:3], self.originAdj, self.discStep) outdistSq = (self.halfCubeLength + maxVdW_extraClash) ** 2 distSq = np.sum((mol_xyzr[:, 0:3] - self.attach_xyz) ** 2, 1) grid_3d = self._assignRho( self.grid_3d, mol_xyzr, neighbor_list, ijk_atom, dye_radii_sorted, rhos, distSq, outdistSq, self.shape ) return grid_3d
[docs] @staticmethod @nb.jit(nopython=True) def _assignRho(grid_3d, mol_xyzr, neighbor_list, ijk_atom, dye_radii_sorted, rhos, distSq, outdistSq, grid_shape): """Loop through the atoms and radii and reassign the grid values with a number 0 < rho < 1. 0.00: grid point is not compatible with any of the three dye radii (clashes with VdW surface) 0.33: grid point is compatible with the smallest dye radius 0.66: grid point is compatible with the smallest two radii 1.00: grid point is compatible with all three dye radii Parameters ---------- grid_3d : ndarray 3-dimensional array of grid points with a shape given by n_xyz mol_xyzr : ndarray array of x-,y-,z-coordinates and VdW radii with a shape [n_atoms, 4] neighbor_list : list of 2-tuples of ndarray and float the tuples contain the ijk indices and the distance from the origin (0,0,0) ijk_atom : array ijk grid indices of the atoms of the biomolecule dye_radii_sorted : array dye radii sorted in ascending order rhos : array array of grid values for assignment, for AV3 [0, 0.33, 0.66, 1] and for AV1 [0, 1] distSq : float square of the distance between the grid points to the attachment point outdistSq : float square of (halfcubelength + maxVdW_extraClash) grid_shape : array number of grid points in x,y and z coordinates """ n = len(neighbor_list) n_radii = len(dye_radii_sorted) for m in range(mol_xyzr.shape[0]): if distSq[m] > outdistSq: continue else: s = 0 for r in range(n_radii): effR = dye_radii_sorted[r] + mol_xyzr[m, 3] while neighbor_list[s][0] <= effR: ijk_n = ijk_atom[m] + neighbor_list[s][1] i, j, k = ijk_n if not np.any(ijk_n < 0) and not np.any(grid_shape - ijk_n <= 0): gridval = grid_3d[i, j, k] grid_3d[i, j, k] = min(rhos[r], gridval) s += 1 if s >= n: break return grid_3d
[docs] def addWeights(self, das_xyzrm): """Reassign those grid values which are part of the contact volume. Parameters ---------- das_xyzrm : ndarray array of xyz coordinates, padded vdW radii and a marking (2.0) Returns ------- ndarray 3-dimensional array of grid points with a shape given by n_xyz """ maxClash = np.max(das_xyzrm[:, 3]) + self.discStep neighbor_list = nb.typed.List() [neighbor_list.append(n) for n in self.sortedNeighborIdx(maxClash)] ijk_atom = self._xyz2idx(das_xyzrm[:, 0:3], self.originAdj, self.discStep) outDistSq = (self.halfCubeLength + maxClash) ** 2 distSq = np.sum((das_xyzrm[:, 0:3] - self.attach_xyz) ** 2, 1) grid_3d = self._assignDensity(self.grid_3d, das_xyzrm, neighbor_list, ijk_atom, distSq, outDistSq, self.shape) return grid_3d
[docs] @staticmethod @nb.jit(nopython=True) def _assignDensity(grid_3d, das_xyzrm, neighbor_list, ijk_atom, distSq, outDistSq, grid_shape): """Loop through the atom of the biomolecule and reassign those grid value which are within das_xyzrm[m, 3] as they belong to the CV. Parameters ---------- grid_3d : ndarray 3-dimensional array of grid points with a shape given by n_xyz das_xyzrm : ndarray array of marked coordinates and padded vdW radii (n_atoms = number of atoms in mdtraj.Trajectory) neighbor_list : list of 2-tuples of ndarray and float the tuples contain the ijk indices and the distance from the origin (0,0,0) ijk_atom : array ijk grid indices of the atoms of the biomolecule distSq : float square of the distance between the grid points to the attachment point outDistSq : float square of (halfcubelength + maxVdW_extraClash) grid_shape : array number of grid points in x,y and z coordinates Returns ------- ndarray 3-dimensional array of grid points with a shape given by n_xyz """ n = len(neighbor_list) for m in range(das_xyzrm.shape[0]): if distSq[m] > outDistSq: continue else: s = 0 while neighbor_list[s][0] <= das_xyzrm[m, 3]: ijk_n = ijk_atom[m] + neighbor_list[s][1] i, j, k = ijk_n if not np.any(ijk_n < 0) and not np.any(grid_shape - ijk_n <= 0): gridval = grid_3d[i, j, k] if gridval > 0: grid_3d[i, j, k] += das_xyzrm[m, 4] s += 1 if s >= n: break return grid_3d