# -*- coding: utf-8 -*-
#
# excludedvolume.py - Excluded volume assessment for PDB-IHM
#
# Copyright (C) 2019-2025 Arthur Zalevsky, Sai Ganesan, Benjamin M. Webb, Brinda Vallat
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
"""
Excluded volume assessment for PDB-IHM
"""
from pathlib import Path
from mmcif_io import GetInputInformation
import ihm
import multiprocessing as mp
import pandas as pd
import numpy as np
from scipy.spatial import KDTree
import math
import pickle
import logging
import mendeleev
[docs]
class GetExcludedVolume(GetInputInformation):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
[docs]
def get_all_spheres(self, fname: str | None = None) -> dict:
""" Get spheres and atoms for each model """
if fname is not None:
system, encoding = utility.parse_ihm_cif(fname)
else:
system = self.system
model_dict = {}
for st in system.state_groups:
for s in st:
for mg in s:
for m in mg:
model_dict[m._id] = list(m.get_spheres()) + list(m.get_atoms())
return model_dict
[docs]
def get_nCr(self, n: int, r: int) -> int:
"""get all combinations"""
f = math.factorial
return int(f(n)/(f(r)*f(n-r)))
[docs]
def get_violation_percentage(self, viols: dict, n: int) -> float:
"""get information on all spheres for each model"""
number_of_violations = sum(viols.values())
number_of_combinations = self.get_nCr(n, 2)
return (1 - number_of_violations / number_of_combinations ) * 100.
[docs]
@staticmethod
def get_radii(atoms: list) -> dict:
""" Get VdW radii for all atom types """
elems = []
radii = {}
for atom in atoms:
if isinstance(atom, ihm.model.Atom):
elem_ = atom.type_symbol
elems.append(elem_)
elems = set(elems)
for elem in elems:
if not isinstance(elem, str):
logging.warning(f"Skipping missing element")
continue
# Try to guess elem:
elem_ = elem
if len(elem) == 1:
elem_ = elem.upper()
else:
elem_ = elem[0].upper() + elem[1].lower()
try:
vdwr = getattr(mendeleev, elem_).vdw_radius
# Convert picometers to angstroms
vdwr /= 100.0
radii[elem] = vdwr
except AttributeError as e:
logging.warning(f"Skipping unknown element {elem}")
return radii
[docs]
def get_xyzr(self, spheres: list) -> pd.DataFrame:
""" get X,Y, Z coords from sphere objects"""
raw = []
vdw = self.get_radii(spheres)
for sphere in spheres:
if isinstance(sphere, ihm.model.Sphere):
data_ = (sphere.x, sphere.y, sphere.z, sphere.radius)
raw.append(data_)
elif isinstance(sphere, ihm.model.Atom):
try:
radius = vdw[sphere.type_symbol]
except KeyError as e:
# We've already put a message in the log
# so just silently skip
continue
data_ = (sphere.x, sphere.y, sphere.z, radius)
raw.append(data_)
model_spheres_df = pd.DataFrame(raw, columns=['X', 'Y', 'Z', 'R'])
return model_spheres_df
[docs]
def get_violation_dict(self, model_spheres_df: pd.DataFrame) -> dict:
""" get violation from model_sphere df"""
viols = {}
# Get coordinates
xyz = model_spheres_df[['X', 'Y', 'Z']].to_numpy()
# Get radii
radii = model_spheres_df[['R']].to_numpy()
# Get maximum radius
maxr = np.max(radii)
# Build tree
t = KDTree(xyz)
# The enumeration is done to preveserve
# compatibility as it's a drop-in replacement
for indx, i in enumerate(range(len(xyz) - 1), 1):
viols_ = 0
# Get neighours in R1+R2 radius, where
# R1 is particle's i radius
# and R2 is the maxium radius
# Thus it's a greedy search
nb = t.query_ball_point(xyz[i], radii[i] + maxr)
# Check each neighbour
for j in nb:
# Only check pairs in a triangle
if j > i:
# np.linalg.norm is slow, but convenient
d = np.linalg.norm(xyz[i] - xyz[j])
if d < (radii[i] + radii[j]):
viols_ += 1
viols[indx] = viols_
return viols
def _get_exc_vol(self, sphere_list: list) -> (float, int):
"""
get violations from cart coords
"""
total = self.get_nCr(len(sphere_list), 2)
df = self.get_xyzr(sphere_list)
violation_dict = self.get_violation_dict(df)
violations = sum(violation_dict.values())
satisfaction = round(
self.get_violation_percentage(violation_dict, len(df)), 2)
return (total, violations, satisfaction)
def _run_exc_vol(self, model_dict: dict) -> dict:
""" get exc vol info in parallel """
complete_list = [
self._get_exc_vol(x) for x in model_dict.values()]
excluded_volume = {'Model ID': list(model_dict.keys()),
'Analysed': [i[0] for i in complete_list],
'Number of violations': [i[1] for i in complete_list],
'Excluded Volume Satisfaction (%)': [i[2] for i in complete_list],
}
return excluded_volume
[docs]
def get_excluded_volume(self):
cache_fn = Path(self.cache, f'{self.stem}.exv.pkl')
data = None
# Check if we already requested the data
if Path(cache_fn).is_file() and not self.nocache:
logging.info(f'Found {self.stem} in cache: {cache_fn}')
with open(cache_fn, 'rb') as f:
data = pickle.load(f)
elif not Path(cache_fn).is_file() or self.nocache:
logging.info("Excluded volume is being calculated...")
model_dict = self.get_all_spheres()
data = self._run_exc_vol(model_dict)
with open(cache_fn, 'wb') as f:
pickle.dump(data, f)
return data