"""Main AlloViz classes: `Protein` and `Delta`
The :class:`~AlloViz.Protein` class is AlloViz's main class, processing a structure and
trajectory(ies) input files and allowing to calculate, analyze and visualize the
allosteric communication networks with its associated methods.
The :class:`~AlloViz.Delta` class takes two :class:`~AlloViz.Protein` objects as input to
calculate the delta-network to highlight the differences in the allosteric communication
between two systems.
Notes
=====
This module is not meant to be used directly, as the classes are imported in the
namespace of AlloViz itself.
"""
import os, io, re
import MDAnalysis as mda
import numpy as np
from multiprocess import Pool
from . import Analysis
from . import Elements
from .utils import rgetattr, rhasattr
from . import utils
from . import trajutils
from .. import Wrappers
class _Store:
r"""Private class that is only used to create empty class' attributes to which other
attributes are added.
"""
pass
[docs]
class Protein:
r"""AlloViz main class.
Objects of the class process the input structure and trajectory files and allow to
:meth:`~AlloViz.Protein.calculate` the different networks, which are stored in
classes of the :mod:`AlloViz.Wrappers` module and can be filtered, analyzed and
visualized with subsequent methods.
Parameters
----------
pdb : str
Filename of the PDB structure to read, process and use.
trajs : str or list
Filename(s) of the MD trajectory (or trajectories) to read and use. File format
must be recognized by MDAnalysis (e.g., xtc).
GPCR : bool or int, default: False
Use `True` if the structure is a GPCR, or pass the ID of a GPCRmd database
dynamics entry, without specifying the `pdb` nor the `trajs` parameters, to
automatically retrieve the files from the database and process them. They will
be downloaded to `path`, which if left undefined will default to the GPCRmd ID.
name : str, default: "protein"
Name string to be used, e.g., for the title of the colorbar shown when
representing a network with nglviewer.
path : str, optional
Path to store results in. It can exist already or not, and a new folder inside
it called `data` will be created to store computation results and other
files. If unspecified, it defaults to "." or the GPCRmd ID in case it is used.
protein_sel : str, default: :attr:`AlloViz.Protein._protein_sel`
MDAnalysis atom selection string to select the protein structure from the
Universe (e.g., in case simulations in biological conditions are used, to avoid
selecting extra chains, water molecules, ions...). It defaults to "(same segid as
protein) and (not segid LIG) and (not chainid L)" and it can be extended using,
e.g., :attr:`AlloViz.Protein._protein_sel` `+ " and {customselection}"`.
Other Parameters
----------------
psf : str, optional
Optional kwarg of the filename of the .psf file corresponding to the pdb used.
It is required alongside the `parameters` file to use the gRINN network
construction method.
parameters : str, optional
Optional kwarg of the filename of the MD simulation force-field parameters file
in NAMD format. It is required alongside the `psf` file to use the gRINN network
construction method.
special_res : dict, optional
Optional kwarg of a dictionary containing a mapping of special residue 3/4-letter
code(s) present in the structure to the corresponding standard 1-letter code(s).
Attributes
----------
pdb
trajs
GPCR
protein : :class:`~MDAnalysis.core.universe.Universe`
Universe of the processed pdb with only the selected `protein_sel` atoms.
u : :class:`~MDAnalysis.core.universe.Universe`
Universe of the processed pdb and trajectory files with only the `protein_sel`
atoms.
Raises
------
FileNotFoundError
If any of the files passed in the `pdb` and `traj` (and `psf` and `parameters`
if provided) parameters cannot be accessed.
See Also
--------
AlloViz.Delta : Class for calculation of the delta-network(s) between two Protein
objects.
Examples
--------
>>> opioidGPCR = AlloViz.Protein(GPCR=169)
>>> print(opioidGPCR.u)
<Universe with 88651 atoms>
"""
_protein_sel = "(same segid as protein) and (not segid LIG) and (not chainid L)"
"""
Class attribute used to select only protein atoms from the input files with
:external:ref:`MDAnalysis selection syntax <selection-commands-label>`. It defaults
to "(same segid as protein) and (not segid LIG) and (not chainid L)" and it can be
extended through the `protein_sel` parameter using, e.g.,
:attr:`AlloViz.Protein._protein_sel` + " and {customselection}"
"""
def __init__(
self,
pdb="",
trajs=[],
GPCR=False,
name=None,
path=None,
protein_sel=None,
**kwargs,
):
self.GPCR = GPCR
# If a GPCRmd ID is passed
if not isinstance(self.GPCR, bool) and isinstance(self.GPCR, int):
self.name = f"{self.GPCR}" if not name else name
self._path = f"{self.GPCR}" if not path else path
os.makedirs(self._path, exist_ok=True)
# Download the files from GPCRmd
if not any(
[
re.search("(pdb$|psf$|xtc$|dcd$|parameters$)", file)
for file in os.listdir(self._path)
]
):
trajutils.download_GPCRmd_files(self.GPCR, self._path)
files = os.listdir(self._path)
# Retrieve filenames from the files downloaded into self._path
get_filename = (
lambda ext: self._path
+ "/"
+ next(file for file in files if re.search(f"{ext}$", file))
)
self.pdb = get_filename("pdb")
self.trajs = list(
sorted(
f"{self._path}/{traj}"
for traj in files
if re.search("^(?!\.).*\.(xtc|dcd)$", traj)
)
)
if any(["psf" in file for file in files]):
self.psf = get_filename("psf")
if any(["parameters" in file for file in files]):
self._paramf = get_filename("parameters")
# If pdb and trajectory files are passed
else:
self.name = pdb.replace(".pdb", "").split("/")[-1] if not name else name
self._path = "." if not path else path
os.makedirs(self._path, exist_ok=True)
self.pdb = pdb
self.trajs = trajs if isinstance(trajs, list) else [trajs]
# Check if a .psf and a force-field parameters files have been passed for gRINN network calculation
passed_psf_params = "parameters" in kwargs and "psf" in kwargs
if passed_psf_params:
self.psf = kwargs["psf"]
self._paramf = kwargs["parameters"]
# Check if all the filehandles passed exist, and raise an error if not
files_to_check = (
self.trajs + [self.pdb]
if not passed_psf_params
else self.trajs + [self.pdb, self.psf, self._paramf]
)
files_exist = {file: os.path.isfile(file) for file in files_to_check}
if any([not file_exist for file_exist in files_exist.values()]):
raise FileNotFoundError(
f"Some of the files could not be found: {files_exist}"
)
self._protein_sel = Protein._protein_sel if not protein_sel else protein_sel
# Set the names of the directories and files that will be creating when processing the input files
self._datadir = f"{self._path}/data"
os.makedirs(self._datadir, exist_ok=True)
self._pdbf = f"{self._datadir}/protein.pdb"
self._trajs = dict(
[
(num + 1, f"{self._datadir}/traj_{num+1}.xtc")
for num in range(len(self.trajs))
]
)
self._psff = self._pdbf.replace("pdb", "psf") if hasattr(self, "psf") else None
# If the processed filenames don't exist yet as files, process the input files; if special_res kwarg is used it will be passed
if any(
[
not os.path.isfile(f)
for f in list(self._trajs.values())
+ [self._pdbf]
]
):
self._pdbf, self._trajs, self._psff = trajutils.process_input(
self.GPCR,
self.pdb,
self._protein_sel,
self._pdbf,
rgetattr(self, "psf"),
self._psff,
self.trajs,
self._trajs,
**{"special_res": kwargs["special_res"]} if "special_res" in kwargs else {},
)
# Set the protein/pdb and trajectory(ies) MDAnalysis' Universes with the processed files
self.protein = mda.Universe(self._pdbf)
""":class:`~MDAnalysis.core.universe.Universe`"""
self.u = mda.Universe(self._pdbf, *list(self._trajs.values()))
""":class:`~MDAnalysis.core.universe.Universe`"""
# Bonded cysteines must be identified to remove them from non-covalent contacts calculations (i.e., PyInteraph2_Contacts)
self._bonded_cys = trajutils.get_bonded_cys(self._pdbf)
# _dihedral_residx returns a list of indices of all the protein's residues in the Universes that are not in the extremes of the chain(s)
# These will be the residues for which Dihedral correlations can be calculated (residues in the extremes have some dihedrals missing)
# MDEntropy_AlphaAngle uses information of residues i-1, i, i+1, i+2; so end=-2 is passed to also exclude the second-to-last residue of the chain(s)
_res_arrays = np.split(
self.protein.residues.resindices,
np.where(np.diff(self.protein.residues.resnums) != 1)[0] + 1,
)
self._dihedral_residx = lambda end=-1: [
elem for arr in _res_arrays for elem in arr[1:end]
]
# _translate_ix returns a translated DataFrame Index (string) or MultiIndex (tuple) element according to the mapper dictionary passed
# The translation function _translate_ix(mapper) is passed as the function used by the .map method of a DataFrame Index or MultiIndex to translate it
self._translate_ix = (
lambda mapper: lambda ix: tuple(mapper[_] for _ in ix)
if isinstance(ix, tuple)
else mapper[ix]
)
def __sub__(self, other):
"""
The result of subtracting an "other" Protein object from "self" is a new object
(delta) which has the attributes that the Proteins have in common (regarding
network construction methods/packages, filtering schemes and elements-analysis),
whose values arise from subtracting the corresponding dataframes (i.e.,
subtracting the networks) to create a delta-network.
"""
# Create an empty class (_Store dummy class) instance to store the results
delta = _Store()
# For each package that the two Proteins have in common; taking them from those attributes in self's __dict__ that are valid package names
for pkg in (
key for key in self.__dict__ if utils.pkgname(key, fail=False) and key in other.__dict__
):
# Set an attribute with the package's name in the empty object
setattr(delta, pkg, _Store())
# For each filtering scheme (taken from self's __dict__ with a valid name) for which results have been analyze in "self" and that are also in "other"
for filtering in (
key
for key in getattr(self, pkg).__dict__
if any([f in key for f in utils.filteringsl])
and key in getattr(other, pkg).__dict__
):
# Also set an attribute with the filtering name
setattr(getattr(delta, pkg), filtering, _Store())
# And finally for each Element (taken from self's __dict__ with a valid name) that is also in "other"
for elem in (
key
for key in rgetattr(self, pkg, filtering).__dict__
if key.lower() in ["nodes", "edges"]
and key in rgetattr(other, pkg, filtering).__dict__
):
# Calculate the Elements' difference exploiting their custom subtraction special method definition
dif = rgetattr(self, pkg, filtering, elem) - rgetattr(
other, pkg, filtering, elem
)
# And save the result as a new Element object (to exploit its view method)
elemclass = eval(f"Elements.{elem.capitalize()}")
setattr(
rgetattr(delta, pkg, filtering),
elem,
elemclass(dif, parent=self._delta),
)
return delta.__dict__
[docs]
def calculate(self, pkgs="all", cores=1, **kwargs):
r"""Calculate rwa edge weights of allosteric networks.
Send the computation of the raw edge weights for the selected network
construction methods.
Parameters
----------
pkgs : str or list, default: "all"
Package(s)/Network construction method(s) for which to send raw edge weight
computation. "all" sends the computation for all available methods within
AlloViz (check :data:`AlloViz.AlloViz.utils.pkgsl`).
cores : int, default: 1
Number of cores to use for parallelization with a `multiprocess` Pool.
Default value only uses 1 core with a custom :class:`AlloViz.utils.dummypool`
that performs computations synchronously.
Other Parameters
----------------
taskcpus : int, optional
Optional kwarg to specify the amount of cores that parallelizable network
construction methods can use (i.e., AlloViz's method, getcontacts, dynetan,
PyInteraph, MDEntropy and gRINN).
stride : int, optional
Optional kwarg to specify the striding to be done on the trajectory(ies) for
computationally-expensive network construction methods: i.e., dynetan and
AlloViz's own method. Default is no striding.
namd : str, optional
Optional kwarg pointing to the namd2 executable location; if the `namd`
command is accessible through the CLI it is automatically retrieved with the
`distutils` package.
chis : int, optional
Optional kwarg to specify the number of side-chain chi dihedral angles (up to
5) to combine when sending the calculation of a child of the Combined_Dihs
Wrappers' base class that includes chi dihedrals in its calculation.
MDEntropy_method : str, optional, {"knn", "grassberger", "chaowangjost"}
Optional kwarg to specify the method to calculate the entropy of the
variables for Mutual Information estimation when using one of the
MDEntropy network construction methods (default: "grassberger").
See Also
--------
AlloViz.Wrappers.Base.Base : Base class to launch and store calculation results.
AlloViz.Protein.filter : Class method to filter the network raw edge
weights with different criteria.
AlloViz.Protein.analyze : Class method to analyze the calculated raw edge weights
with graph theory-based methods.
AlloViz.Protein.view : Class method to visualize the network on the protein
structure.
Notes
-----
Calculation results are stored as new instance attributes with the same name as
the selected packages/network construction methods. If the object is created
providing more than one trajectory file, the average and standard error of the
weights between the replicas are also calculated.
Examples
--------
If we have a 6-core computer and want to use 2 cores to compute the values for
each of the three trajectories of the protein (e.g., GPCRmd stores three replicas
of each structure):
>>> opioidGPCR = AlloViz.Protein(GPCR=169)
>>> opioidGPCR.calculate("dynetan", cores=6, taskcpus=2)
>>> print(opioidGPCR.dynetan.raw.shape)
(41041, 5)
"""
# Calculate for "all" packages or the ones passed as parameter (check that they are on the list of available packages and retrieve their case-sensitive names, else raise an Exception)
pkgs = utils.make_list(pkgs, if_all=utils.pkgsl, apply=utils.pkgname)
combined_dihs = [pkg for pkg in pkgs if "Dihs" in pkg]
# Add the calculations of the individual dihedrals if a combined_dihedral has been passed
bb = ["Phi", "Psi"]
sc = [f"Chi{i+1}" for i in range(4)]
if len(combined_dihs) > 0:
for comb in combined_dihs:
pkg = comb.split("_")[0]
if pkg == "CARDS":
if "Sidechain" in comb or "Backbone" in comb:
pkg = comb.rsplit("_", 2)[0]
else:
pkg = comb.rsplit("_", 1)[0]
dihs = bb if "Backbone" in comb else sc if "Sidechain" in comb else bb+sc
pkgs += [f"{pkg}_{dih}" for dih in dihs]
# Objects from the classes in the Wrappers module need to be passed a dictionary "d" containing all the attributes of the source Protein object and the passed kwargs
d = self.__dict__.copy()
d.update(kwargs)
# Depending on the desired cores, use a dummypool (synchronous calculations) or a `multiprocess` Pool
# Changing it inside the `utils` module allows to share the same one between modules
if cores > 1:
mypool = Pool(cores)
else:
mypool = utils.dummypool()
utils.pool = mypool
if any(["CARDS" in pkg for pkg in pkgs]):
Wrappers.CARDS_w.CARDS(self, d)
for pkg in set(pkgs) - set(combined_dihs):
# Establish the corresponding Wrappers' class
pkgclass = eval(f"Wrappers.{utils.pkgname(pkg)}")
# Setting the class as a new attribute will initialize all calculations asynchronously (synchronously if a dummypool is used)
if not hasattr(self, pkgclass.__name__):
setattr(self, pkgclass.__name__, pkgclass(self, d))
# Close the pool
utils.pool.close()
utils.pool.join()
utils.pool = utils.dummypool()
if len(combined_dihs) > 0:
# Calculate now the combination of dihedrals, which is just a combination of the already-calculated data
for pkg in combined_dihs:
pkgclass = eval(f"Wrappers.{utils.pkgname(pkg)}")
if not hasattr(self, pkgclass.__name__):
setattr(self, pkgclass.__name__, pkgclass(self, d))
return getattr(self, pkgclass.__name__) if len(pkgs) == 1 else None
[docs]
def filter(self, pkgs="all", filterings="All", *, GetContacts_threshold=0, Sequence_Neighbor_distance=5, Interresidue_distance=10):
r"""Filter network edges
Filter the networks according to the selected criteria to perform analyses on
(all or) a subset of the edges. It calls
:meth:`AlloViz.Wrappers.Base.Base.filter` and results are stored in instances
of the :class:`AlloViz.AlloViz.Filtering.Filtering` class. The different filtering
options are detailed in the :mod:`~AlloViz.AlloViz.Filtering` module.
Parameters
----------
pkgs : str or list, default: "all"
Package(s)/Network construction method(s) for which to analyze their raw edge
weights, which must be already calculated and their data saved as instance
attribute. In this case, "all" sends the computation for all available
methodsthat are already calculated and saved as instance attributes.
filterings : str or list of strs and/or lists, default: "All"
Filtering scheme(s) with which to filter the list of network edges before
analysis. It can be a string, or a list of strings and/or lists. A list of
lists (also with or without strings) is used to filter with a combination of
criteria. All available (and combinable) filtering options are functions in
the :mod:`~AlloViz.AlloViz.Filtering` module:
:func:`~AlloViz.AlloViz.Filtering.All`,
:func:`~AlloViz.AlloViz.Filtering.GetContacts_edges`,
:func:`~AlloViz.AlloViz.Filtering.No_Sequence_Neighbors`,
:func:`~AlloViz.AlloViz.Filtering.GPCR_Interhelix`. The default "All" (capitalized)
performs NONE of the available filtering schemes.
"all" (lowercase!) performs all the available filtering schemes (no combinations).
Other Parameters
----------------
GetContacts_threshold : float
Optional kwarg that can be passed to specify the minimum contact frequency
(0-1, default 0) threshold, which will be used to filter out contacts with a
frequency (average) lower than it before analysis.
Sequence_Neighbor_distance : int
Optional kwarg that can be passed to specify the minimum number of sequence
positions/distance between residues of a pair to retain in
`No_Sequence_Neighbors` filtering, which defaults to 5.
Interresidue_distance : int or float
Optional kwarg that can be passed to specify the minimum number of angstroms
that the CA atoms of residue pairs should have between each other in the initial
PDB/structure (default 10 Å) to be considered spatially distant.
See Also
--------
AlloViz.AlloViz.Filtering.Filtering : Filtering class.
AlloViz.Protein.calculate : Class method to calculate the network(s) raw edge
weights with different network construction methods.
AlloViz.Protein.analyze : Class method to analyze the calculated raw edge weights
with graph theory-based methods.
AlloViz.Protein.view : Class method to visualize the network on the protein
structure.
Examples
--------
>>> opioidGPCR = AlloViz.Protein(GPCR=169)
>>> opioidGPCR.calculate(["getcontacts", "dynetan"], cores=6, taskcpus=2)
>>> opioidGPCR.filter("dynetan", ["GetContacts_edges", ["GetContacts_edges", "GPCR_Interhelix"]])
>>> opioidGPCR.dynetan.GetContacts_edges_GPCR_Interhelix
<AlloViz.AlloViz.Filtering.Filtering at 0x7f892c3c0fa0>
"""
# Filter "all" packages (all the available packages that have been previously calculated and are in __dict__)
# or the ones passed as parameter (check that they are on the list of available packages and retrieve their case-sensitive names, else raise an Exception)
pkgs = utils.make_list(pkgs, if_all = [key for key in utils.pkgsl if key in self.__dict__], apply = utils.pkgname)
# Remove "GPCR_Interhelix" from the available filters if the protein is not a GPCR
all_available_filterings = utils.filteringsl.copy()
if not self.GPCR:
print('NOTE: "GPCR_Interhelix" filtering not available as the protein is not marked as a GPCR')
all_available_filterings.remove("GPCR_Interhelix")
# Calculate for all the passed Filterings
filterings = utils.make_list(
filterings,
if_all = all_available_filterings
)
for pkgn in pkgs:
pkg = rgetattr(self, pkgn)
if not pkg:
print(f"{pkgn} calculation results are needed first")
continue
result = pkg.filter(filterings,
GetContacts_threshold=GetContacts_threshold,
Sequence_Neighbor_distance=Sequence_Neighbor_distance,
Interresidue_distance=Interresidue_distance)
return result if (len(pkgs) == 1) else None
[docs]
def analyze(self, pkgs="all", filterings="all", elements="edges", metrics="all", cores=1, nodes_dict=Analysis.nodes_dict, edges_dict=Analysis.edges_dict, **kwargs):
r"""Analyzed filtered network
Analyze the selected (un)filtered networks with the passed elements-metrics. It
calls :meth:`AlloViz.AlloViz.Analysis.analyze` and results are stored in
instances of classes from the :mod:`AlloViz.AlloViz.Elements` module, which
extend the :class:`pandas.DataFrame` class.
Parameters
----------
pkgs : str or list, default: "all"
Package(s)/Network construction method(s) for which to analyze their raw edge
weights, which must be already calculated and their data saved as instance
attribute. In this case, "all" sends the computation for all available
methods that are already calculated and saved as instance attributes.
filterings : str or list, default: "all"
Filtering scheme(s) for which to perform the analyses, which must exist
already for the selected packages. "all" sends the computation for all
available schemes that are already saved.
elements : str or list, {"edges", "nodes"}
Network elements for which to perform the analysis.
metrics : str or list, default: "all"
Network metrics to compute, which must be keys in the `nodes_dict` or
`edges_dict` dictionaries. Default is "all" and it sends the computation for
all the metrics defined in the corresponding dictionary of the selected
elements in `element`.
cores : int, default: 1
Number of cores to use for parallelization with a `multiprocess` Pool.
Default value only uses 1 core with a custom :class:`AlloViz.utils.dummypool`
that performs computations synchronously.
Other Parameters
----------------
nodes_dict, edges_dict : dict
Optional kwarg(s) of the dictionary(ies) that maps network metrics custom names
(e.g., betweenness centrality, "btw") with their corresponding NetworkX
function (e.g., "networkx.algorithms.centrality.betweenness_centrality").
Functions strings must be written as if they were absolute imports, and must
return a dictionary of edges or nodes, depending on the element dictionary in
which they are. The keys of the dictionaries will be used to name the columns
of the analyzed data that the functions produce. Defaults are
:data:`~AlloViz.AlloViz.Analysis.nodes_dict` and
:data:`~AlloViz.AlloViz.Analysis.edges_dict`.
**kwargs
Other optional keyword arguments that will be passed to the NetworkX analysis
function(s) that is(are) used on the method call in case they need extra
parameters. All keyward arguments will be passed to all analysis function
calls, so if the function doesn't accept the arguments there will be an error.
`weight` parameter is already specified by AlloViz.
See Also
--------
AlloViz.AlloViz.Analysis : Module with analysis functions.
AlloViz.Protein.calculate : Class method to calculate the network(s) raw edge
weights with different network construction methods.
AlloViz.Protein.filter : Class method to filter the network(s) raw edge
weights with different criteria.
AlloViz.Protein.view : Class method to visualize the network on the protein
structure.
Examples
--------
>>> opioidGPCR = AlloViz.Protein(GPCR=169)
>>> opioidGPCR.calculate(["getcontacts", "dynetan"], cores=6, taskcpus=2)
>>> opioidGPCR.filter("dynetan", "GetContacts_edges")
>>> opioidGPCR.analyze("dynetan", "GetContacts_edges", "nodes", "btw")
<AlloViz.AlloViz.Elements.Nodes at 0x7f892c3c0fa0>
"""
# Analyze "all" packages (all the available packages that have been previously calculated and are in __dict__)
# or the ones passed as parameter (check that they are on the list of available packages and retrieve their case-sensitive names, else raise an Exception)
pkgs = utils.make_list(pkgs, if_all = [key for key in utils.pkgsl if key in self.__dict__], apply = utils.pkgname)
# # Depending on the desired cores, use a dummypool (synchronous calculations) or a `multiprocess` Pool
# # Changing it inside the `utils` module allows to share the same one between modules
if cores > 1:
mypool = Pool(cores)
else:
mypool = utils.dummypool()
utils.pool = mypool
for pkg in pkgs:
# Analyze for "all" filterings or the ones passed as parameter
# "all": all filterings that have been previously done and are in the pkg's __dict__ and also contain at least one of the utils.filteringsl members (to include combinations)
filterings = utils.make_list(
filterings,
if_all = [filt for filt in rgetattr(self, utils.pkgname(pkg), "__dict__") if any([f in filt for f in utils.filteringsl])]
)
for filtering in filterings:
filtered = rgetattr(self, utils.pkgname(pkg), filtering)
filtered.analyze(elements, metrics, cores=1, nodes_dict=nodes_dict, edges_dict=edges_dict, **kwargs)
# Close the pool
utils.pool.close()
utils.pool.join()
utils.pool = utils.dummypool()
[docs]
def view(
self,
pkg,
metric,
filtering="All",
element="edges",
num: int = 20,
colors: list = ["orange", "turquoise"],
nv=None,
):
r"""Visualize the analyzed networks on the protein structure.
Visualize the network corresponding to the selected package/network construction
method, filtering scheme, network element and network metric or metrics, all of
which must be previously calculated and analyzed. The number of (each of the)
elements to show and the color scale can be specified. It calls
:meth:`AlloViz.Elements.Element.view`.
Parameters
----------
pkg : str
Package/Network construction method for which to show the network.
metric : str
Network metric for which to show the network.
filtering : str
Filtering scheme for which to show the network.
element : str or list, {"edges", "nodes"}
Network element or elements to show on the protein structure representing the
chosen package, filtering scheme and metric.
num : int, default: 20
Number of (each of the) network elements to show on the structure.
colors : list, default: ["orange", "turquoise"]
List of two colors to assign to the minimum and maximum values of the network
to be represented, respectively. Middle value is assigned "white" and it will
be the mean of the network values or 0 if the network has both negative and
positive values.
nv : :class:`nglview.NGLWidget`, optional
A structure representation into which the shapes representing the chosen
network elements will be added.
Returns
-------
:class:`nglview.NGLWidget`
See Also
--------
AlloViz.AlloViz.Elements : Module with classes for storage and visualization of
filtered and analyzed networks.
AlloViz.Protein.calculate : Class method to calculate the network(s) raw edge
weights with different network construction methods.
AlloViz.Protein.filter : Class method to filter the network(s) raw edge
weights with different criteria.
AlloViz.Protein.analyze : Class method to visualize the network on the protein
structure.
Examples
--------
>>> opioidGPCR = AlloViz.Protein(GPCR=169)
>>> opioidGPCR.calculate("dynetan", cores=6, taskcpus=2)
>>> opioidGPCR.filter("dynetan", "GetContacts_edges")
>>> opioidGPCR.analyze("dynetan", "GetContacts_edges", "nodes", "btw")
>>> opioidGPCR.view("dynetan", "btw", "GetContacts_edges", element=["edges", "nodes"])
NGLWidget()
"""
# Get the package name with the correct nomenclature or raise an error if it doesn't exist
pkg = utils.pkgname(pkg)
# Create a list of the passed elements
elements = element if isinstance(element, list) else [element]
# Define a function to retrieve the corresponding Element attribute of the passed elem and check that it exists
get_element = lambda element: rgetattr(
self, pkg, filtering, element
)
if isinstance(get_element(elements[0]), bool):
raise Exception(
f"{elements[0]} analysis with {filtering} filtering needs to be sent first."
)
# Retrieve the NGLWidget from the Visualization module's class method `view`
nv = get_element(elements[0]).view(metric, num, colors, nv)
# If both elements were passed, check that the second Element attribute exists and update the NGLWidget
if len(element) == 2:
if not get_element(elements[1]):
raise Exception(
f"{elements[1]} analysis with {filtering} filtering needs to be sent first."
)
# The NGLWidget object `nv` is an optional argument of the `view` method and it is used to add the shapes to the passed NGLWidget
nv = get_element(elements[1]).view(metric, num, colors, nv)
# Returning of the NGLWidget is needed for its visualization in the notebook
return nv
[docs]
class Delta:
r"""AlloViz class for calculating a delta-network.
Used to calculate the delta-network between two :class:`~AlloViz.Protein` objects,
using all the available combinations of packages/network construction methods,
filterings, and elements-metrics that they have in common. A structural alignment of
the structures is performed with PyMOL with the selected method to find the
corresponding residues between the two structures for subtraction of edge weights.
Parameters
----------
refstate : :class:`AlloViz.Protein`
Object of the :class:`AlloViz.Protein` class to use as reference (values of the
other :class:`AlloViz.Protein` object will be subtracted from this one's values).
state2 : :class:`AlloViz.Protein`
Object of the :class:`AlloViz.Protein` class to compare with the reference one to
build the delta-network (this one's values will be subtracted from the reference
one's values).
pymol_aln : {"super", "align", "cealign"}, default: "super"
PyMOL's method to apply for the structural alignment of the two structures, used
to retrieve the corresponding residues between the two for subtraction of edge
weights. It is performed by :meth:`AlloViz.Delta._make_struct_aln`.
Attributes
----------
refstate
state2
See Also
--------
AlloViz.Protein : AlloViz main class for calculation of protein allosteric
communication networks.
Notes
-----
For PyMOL's structural alignment, `align` starts from a sequence alignment and is
optimal to align structures with identical sequences, but is deeply affected by
sequence differences. `super` performs a sequence-independent dynamic programming
structural alignment and it works well for structures with low sequence identity.
`cealign` uses the Combinatorial Extension (CE) algorithm and it is preferred for
structures with little to no sequence similarity (twilight zone). The methods are
described in `PyMOL's documentation <https://pymolwiki.org/index.php/Main_Page>`_.
Examples
--------
>>> activeB2AR = AlloViz.Protein(GPCR=117, name="Active B2AR")
>>> inactiveB2AR = AlloViz.Protein(GPCR=160, name="Inactive B2AR")
>>> for protein in [activeB2AR, inactiveB2AR]:
>>> protein.calculate("pytraj_CA")
>>> protein.analyze(metrics="btw")
>>> delta = AlloViz.Delta(activeB2AR, inactiveB2AR)
>>> print(delta.pytraj_CA.Whole.edges.df.shape)
(40690, 5)
"""
def __init__(self, refstate, state2, pymol_aln="super"):
# Store the passed Proteins as attributes and as a private list
self.refstate, self.state2 = refstate, state2
self._states = [self.refstate, self.state2]
# Add the present object as a new private attribute of each Protein object
for state in self._states:
setattr(state, "_delta", self)
# Perform a PyMOL structural alignment with the passed method
self._aln = self._make_struct_aln(pymol_aln)
# Future: used source-target nodes for metrics calculation
# nodes_subset = self._add_nodes_subset()
# self.sources_subset, self.targets_subset = nodes_subset.values()
# Calculate delta-network, simply by exploiting the classes' explicit __sub__ special methods
delta = self.refstate - self.state2
# Add the attributes of the calculated delta-network to the present object
self.__dict__.update(delta)
[docs]
def _make_struct_aln(self, pymol_aln):
r"""Perform a PyMOL structural alignment.
Return a :class:`Bio.Align.MultipleSeqAlignment` as a result of parsing the
ClustalW-formatted structural alignment provided by PyMOL. The alignment is also
used to create an `_aln_mapper` attribute in each Protein of the Delta object
that maps the alignment positions to its residues, which can/will be used by the
Protein's private method `_translate_ix`.
Parameters
----------
pymol_aln : {"super", "align", "cealign"}
PyMOL's method to apply for the structural alignment of the two structures,
usedto retrieve the corresponding residues between the two for subtraction of
edge weights.
Returns
-------
:class:`Bio.Align.MultipleSeqAlignment`
Notes
-----
Check `PyMol's documentation <https://pymolwiki.org/index.php/Main_Page>`_ for
descriptions of the structural alignment methods and their fitness for identical
or dissimilar structures.
"""
import pymol2
from Bio import AlignIO
from Bio.SeqUtils import seq1
# Perform PyMOL structural alignment: https://bioinformatics.stackexchange.com/questions/19105/perform-protein-structure-based-sequence-alignment-in-python
with pymol2.PyMOL() as pymol:
pymol.cmd.load(self.refstate._pdbf, "prot1")
pymol.cmd.load(self.state2._pdbf, "prot2")
eval(f"pymol.cmd.{pymol_aln}('prot1', 'prot2', object='aln')")
with pymol.cmd.lockcm:
aln = pymol2._cmd.get_seq_align_str(pymol.cmd._COb, "aln", -1, 0, -1)
# Parse retrieved alignment string with Biopython's AlignIO
with io.StringIO(aln) as f:
alignment = AlignIO.read(f, "clustal")
# Create the _aln_mapper attribute in each Protein passed to the Delta object to map the alignment's positions to its residues
iterator = zip(
self._states, alignment
) # looks like: ((refstate, alignment[0]), (state2, alignment[1]))
for state, aln in iterator:
# Get a list of residues in the Protein in the standardized name used in AlloViz
aas = [f"{res.resname}:{res.resid}" for res in state.protein.residues]
# Make a dictionary/mapper that maps each alignment position to a residue of the Protein
## The 'aln' object from the alignment has a `seq` attribute, which is a string with the same length as the alignment of the two sequences
## In each position of 'aln', there is either a one-letter code of a residue of the sequence or a "-" indicating a gap with respect to the other sequence
## So, each position of the alignment that is not a "-" corresponds to a residue of the Protein that will be in the 'aas' list
## 'aln_mapper' is generated by mapping the "popped first element of aas" to the corresponding alignment position (a pop and a position is skipped when it is "-")
aln_mapper = dict(
(aas.pop(0), pos)
for pos, res in enumerate(aln.seq)
if res != "-" and res == seq1(aas[0].split(":")[0])
)
# Also store the reverse mapping
aln_mapper.update(dict(map(reversed, aln_mapper.items())))
# And set the attribute in the Protein object
setattr(state, "_aln_mapper", aln_mapper)
return alignment
# def _add_nodes_subset(self):
# bfac = lambda atom: f"{atom.tempfactor:.2f}"
# is_wb = lambda atom: atom.name == "N" and -8.1 <= atom.tempfactor <= 8.1 and (bfac(atom) != "1.00" and bfac(atom) != "0.00")
# targets = [3.50, 6.30, 7.49, 7.50, 7.51, 7.52, 7.53]
# # In Ballesteros-Weinstein: Ionic lock 3.50 and 6.30; NPxxY 7.49-7.53
# def get_sources(states):
# resnum_to_wb = lambda nums, state: (bfac(atom)
# for residue in nums
# for atom in state.mdau.select_atoms(f"resnum {residue}")
# if is_wb(atom))
# has_lig = [state if ("LIG" in (seg.segid for seg in state.mdau.segments)) else False for state in self.states]
# if any(has_lig):
# state = next(item for item in has_lig if item != False)
# aas = state.mdau.select_atoms("(same residue as around 4 segid LIG) and protein").residues
# return list(resnum_to_wb(aas.resnums, state))
# else:
# return [3.32] # Conserved position
# nodes_subset = {"sources": [val for val in get_sources(self.states) if val not in targets],
# "targets": list(map(lambda x: f"{x:.2f}", targets))}
# format_res = lambda aa: f"A:{aa.resname}:{aa.resnum}"
# wb_to_aa = lambda wblist, state: list(map(format_res, (atom.residue
# for residue in state.mdau.select_atoms("protein").residues
# for atom in residue.atoms
# if is_wb(atom) and bfac(atom) in wblist)))
# for state in self.states:
# state.sources_subset, state.targets_subset = wb_to_aa(nodes_subset["sources"], state), wb_to_aa(nodes_subset["targets"], state)
# return nodes_subset
[docs]
def view(
self,
pkg,
metric,
filtering="All",
element="edges",
num=20,
colors=["orange", "turquoise"],
nv=None,
):
r"""Visualize the analyzed delta-networks on the protein structure.
Visualize the delta-network corresponding to the selected package/network
construction method, filtering scheme, network element and network metric or
metrics, all of which must be already present in the delta-network object by
having been previously calculated and analyzed in the respective Protein objects,
and then correctly processed during the Delta object creation. Structure of the
reference protein is used for displaying by default. The number of (each of the)
elements to show and the color used for each Protein can be specified (first
color will be used for negative values and thus for the second Protein, while the
second color will be used for positive values and thus the reference Protein).
Parameters
----------
pkg : str, default: "all"
Package/Network construction method for which to show the delta-network.
metric : str, default: "all"
Network metric for which to show the delta-network.
filtering : str
Filtering scheme for which to show the delta-network.
element : str or list, {"edges", "nodes"}
Delta-network element or elements to show on the protein structure
representing the chosen package, filtering scheme and metric.
num : int, default: 20
Number of (each of the) delta-network elements to show on the structure.
colors : list, default: ["orange", "turquoise"]
List of two colors to assign to the minimum and maximum values of the
delta-network to be represented, respectively. Abstractly, the first color
will be used for negative values and thus for elements that have higher value
in the second Protein, while the second color will be used for positive
values, and thus for the reference Protein. Dela-network values are
interpolated setting 0 as the middle value, which is assigned "white".
nv : :class:`nglview.NGLWidget`, optional
A structure representation into which the shapes representing the chosen
delta-network elements will be added.
Returns
-------
:class:`nglview.NGLWidget`
Examples
--------
>>> activeB2AR = AlloViz.Protein(GPCR=117, name="Active B2AR")
>>> inactiveB2AR = AlloViz.Protein(GPCR=160, name="Inactive B2AR")
>>> for protein in [activeB2AR, inactiveB2AR]:
>>> protein.calculate("pytraj_CA")
>>> protein.analyze(metrics="btw")
>>> delta = AlloViz.Delta(activeB2AR, inactiveB2AR)
>>> delta.view("pytraj_CA", "btw")
NGLWidget()
"""
# Function is the same one as the Protein class one but 'self' is passed to use Delta's attributes' data
return Protein.view(self, pkg, metric, filtering, element, num, colors, nv)