Source code for ihm_validation.em

# -*- coding: utf-8 -*-
#
# em.py - 3DEM validation for PDB-IHM
#
# Copyright (C) 2025 Arthur Zalevsky <aozalevsky@gmail.com>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

"""
3DEM validation for PDB-IHM
"""
# system
import logging
import json
import pickle
import io
import re
import requests
import time
import gzip
import subprocess

# handling files
from pathlib import Path
import shutil
import glob
import tempfile
import os

# numerical
import numpy as np
import pandas as pd

# plotting
from bokeh.plotting import save, figure
from bokeh.layouts import gridplot
from bokeh.models import Range1d
from bokeh.models.widgets import Panel, Tabs
from bokeh.embed import json_item
from bokeh.io import export_svgs
import iqplot

# ihmv
from mmcif_io import GetInputInformation
import utility
import ihm
from molprobity import MyModelDumper, AtomSiteVariant, _stub
import images

# EMDB
import va

# Set global pandas options
pd.options.mode.chained_assignment = None

[docs] class EMValidation(GetInputInformation): VA_TIMEOUT = 900 def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) # Only atomic structures are supported so far
[docs] def save_plots(self, plot, title, imageDirName='.'): """Save bokeh plots as svg images and json blobs""" stem = f'{self.ID_f}_{title}' imgpath_json = Path( imageDirName, f'{stem}.json') with open(imgpath_json, 'w') as f: json.dump(json_item(plot, title), f) imgpath_svg = Path( imageDirName, f'{stem}.svg') svgs = export_svgs(plot, filename=imgpath_svg, timeout=15) svgs = [Path(x).name for x in svgs] return ('', imgpath_json, svgs)
[docs] @staticmethod def request_emdb(url: str) -> dict: '''pull data from EMDB''' result = None r = requests.get(url) if not r.ok: # Wait until cold request completes and go to DB cache logging.info(f'Retrying pulling {url} from EMDB') time.sleep(60) r = requests.get(url) if r.ok: try: result = r.json() except JSONDecodeError: pass return result
[docs] def get_emdb_data(self, code): ''' get data from EMDB ''' cache_fn = Path(self.cache, f'{code}.pkl') data = None # Check if we already requested the data if Path(cache_fn).is_file(): logging.info(f'Found {code} in cache! {cache_fn}') with open(cache_fn, 'rb') as f: data = pickle.load(f) elif not Path(cache_fn).is_file(): map_metadata = self.get_emdb_map_metadata(code) map_validation = self.get_emdb_map_validation(code) map_path = Path(self.cache, f'{code}.map') map_path = self.get_emdb_map(code, map_path) map_plots = self.get_emdb_map_images(code, map_validation) if (map_metadata is not None and map_validation is not None and Path(map_path).is_file() and len(map_plots) > 0): data = { 'emdb_id': code, 'map_metadata': map_metadata, 'map_validation': map_validation, 'map_path': map_path, 'map_plots': map_plots } with open(cache_fn, 'wb') as f: pickle.dump(data, f) else: logging.info(f'EMDB data for {code} is incomplete') return data
[docs] @staticmethod def get_emdb_numerical_code(code) -> str: """Extract only numerical part of the EMDB ID""" code_ = re.search('\d+', code).group() return code_
[docs] def get_emdb_map(self, code, fname) -> str: '''download validation data from EMDB''' # Extract only numerical part of the ID code_ = self.get_emdb_numerical_code(code) out = requests.get(f'https://files.wwpdb.org/pub/emdb/structures/EMD-{code_}/map/emd_{code_}.map.gz', stream=True) if out.status_code == 200: stream_ = io.BytesIO() for chunk in out.raw.stream(1024, decode_content=False): stream_.write(chunk) data_ = gzip.decompress(stream_.getvalue()) with open(fname, 'wb') as f: f.write(data_) else: logging.error(f'Could not download EMDB map, status code: {out.status_code}') return fname
[docs] def get_emdb_map_metadata(self, code) -> dict: '''download validation data from EMDB''' out = {} # Extract only numerical part of the ID code_ = self.get_emdb_numerical_code(code) r = requests.get(f'https://www.ebi.ac.uk/emdb/api/entry/{code_}') if r.status_code == 200: out = r.json() else: logging.error(f"Couldn't get map metadata from EMDB for {code}") return out
[docs] def get_emdb_map_images(self, code, map_validation) -> dict: plots = {} try: plots_ = {} for p in ['x', 'y', 'z']: name = f"{p}projection" label = p.upper() plots_[label] = utility.encode_img(self.get_map_image(code, name)) plots['map_proj'] = plots_ except (KeyError, IndexError, ValueError, TypeError) as e: logging.error(e) try: plots_ = {} for p in ['x', 'y', 'z']: name = f"{p}central_slice" ind = map_validation['central_slice']['indices'][p] label = f"{p.upper()} Index: {ind}" plots_[label] = utility.encode_img(self.get_map_image(code, name)) plots['map_central_slice'] = plots_ except (KeyError, IndexError, ValueError, TypeError) as e: logging.error(e) try: plots_ = {} for p in ['x', 'y', 'z']: name = f"{p}largestvariance_slice" ind = map_validation['largest_variance_slice']['indices'][p] label = f"{p.upper()} Index: {ind}" plots_[label] = utility.encode_img(self.get_map_image(code, name)) plots['map_largest_variance_slice'] = plots_ except (KeyError, IndexError, ValueError, TypeError) as e: logging.error(e) try: plots_ = {} for p in ['x', 'y', 'z']: name = f"glow_{p}std" label = p.upper() plots_[label] = utility.encode_img(self.get_map_image(code, name)) plots['map_glow_std'] = plots_ except (KeyError, IndexError, ValueError, TypeError) as e: logging.error(e) try: plots_ = {} for p in ['x', 'y', 'z']: name = f"scaled_{p}surface" label = p.upper() plots_[label] = utility.encode_img(self.get_map_image(code, name)) plots['map_surface'] = plots_ except (KeyError, IndexError, ValueError, TypeError) as e: logging.error(e) # Try rawmap, not always available if 'rawmap_orthogonal_projection' in map_validation: try: plots_ = {} for p in ['x', 'y', 'z']: name = f"{p}projection" plots_[p.upper()] = utility.encode_img(self.get_map_image(code, name, rawmap=True)) plots['rawmap_proj'] = plots_ except (KeyError, IndexError, ValueError, TypeError) as e: logging.error(e) try: plots_ = {} for p in ['x', 'y', 'z']: name = f"{p}central_slice" ind = map_validation['rawmap_central_slice']['indices'][p] label = f"{p.upper()} Index: {ind}" plots_[label] = utility.encode_img(self.get_map_image(code, name, rawmap=True)) plots['rawmap_central_slice'] = plots_ except (KeyError, IndexError, ValueError, TypeError) as e: logging.error(e) try: plots_ = {} for p in ['x', 'y', 'z']: name = f"{p}largestvariance_slice" ind = map_validation['rawmap_largest_variance_slice']['indices'][p] label = f"{p.upper()} Index: {ind}" plots_[label] = utility.encode_img(self.get_map_image(code, name, rawmap=True)) plots['rawmap_largest_variance_slice'] = plots_ except (KeyError, IndexError, ValueError, TypeError) as e: logging.error(e) try: plots_ = {} for p in ['x', 'y', 'z']: name = f"glow_{p}std" label = p.upper() plots_[label] = utility.encode_img(self.get_map_image(code, name, rawmap=True)) plots['rawmap_glow_std'] = plots_ except (KeyError, IndexError, ValueError, TypeError) as e: logging.error(e) try: plots_ = {} for p in ['x', 'y', 'z']: name = f"scaled_{p}surface" label = p.upper() plots_[label] = utility.encode_img(self.get_map_image(code, name, rawmap=True)) plots['rawmap_surface'] = plots_ except (KeyError, IndexError, ValueError, TypeError) as e: logging.error(e) if 'masks' in map_validation: plots['masks'] = {} for k, v in map_validation['masks'].items(): try: plots_ = {} for p in ['x', 'y', 'z']: name = f"{k}_scaled_{p}maskview" label = p.upper() plots_[label] = utility.encode_img(self.get_map_image(code, name)) plots['masks'][k] = plots_ except (KeyError, IndexError, ValueError, TypeError) as e: logging.error(e) return plots
[docs] def get_emdb_map_validation(self, code) -> dict: '''download validation data from EMDB''' # Extract only numerical part of the ID code_ = self.get_emdb_numerical_code(code) r = requests.get(f'https://www.ebi.ac.uk/emdb/api/analysis/{code_}?information=all') out = {} if r.status_code == 200: out = r.json()[code_] else: logging.error(f"Couldn't get data quality from EMDB for {code}") return out
[docs] def get_emdb_ids(self) -> list: ''' get a list of all EMDB ids from entry ''' ids = [] for i, d in enumerate(self.system.orphan_datasets): if isinstance(d, ihm.dataset.EMDensityDataset): if isinstance(d.location, ihm.location.EMDBLocation): try: pid = d.location.access_code ids.append(pid) except AttributeError: pass return ids
[docs] def get_map_image(self, code, name, rawmap=False, extension="jpeg"): img = None code_ = self.get_emdb_numerical_code(code) url = f"https://www.ebi.ac.uk/pdbe/emdb/emdb-entry/emdbva/va-{code_}/va/emd_{code_}.map_{name}.{extension}" if rawmap: url = f"https://www.ebi.ac.uk/pdbe/emdb/emdb-entry/emdbva/va-{code_}/va/emd_{code_}_rawmap.map_{name}.{extension}" r = requests.get(url, stream=True) if r.status_code == 200: buffer = io.BytesIO() for chunk in r.raw.stream(1024, decode_content=False): buffer.write(chunk) img = buffer.getvalue() else: logging.error(f"Couldn't download {url}") return img
[docs] def validate_emdb_data(self, data: dict, imageDirName='.') -> tuple : """Match sequences, residues pairs and return stats""" out = (None, None) data_stats, data_plots, data_stats_plots = {}, {}, {} fit_stats, fit_plots = {}, {} # Unpack emdb data emdbid = data['emdb_id'] emdbid_ = self.get_emdb_numerical_code(emdbid) map_metadata = data['map_metadata'] map_validation = data['map_validation'] resolution = None try: resolution = float(map_validation["resolution"]["value"]) except (KeyError, ValueError, IndexError, TypeError) as e: logging.error(e) recl = None try: recl = float(map_validation['recommended_contour_level']['recl']) except (KeyError, ValueError, IndexError, TypeError) as e: logging.error(e) data_plots = data['map_plots'] # Voxel-value plot try: p = figure( title=f"Voxel-value distribution (Mode={map_validation['density_distribution']['mode']})", x_axis_label='Voxel value', y_axis_label='Number of voxels (log10)', plot_height=350 ) X = map_validation['density_distribution']['x'] Y = map_validation['density_distribution']['y'] p.line(X, Y, color='blue') try: p.line([recl, recl], [0, max(Y)], line_color='red', line_width=3, legend_label=f'Recommended contour level {recl:.2f}') except (KeyError, ValueError, IndexError, TypeError) as e: logging.error(e) p.yaxis.ticker.desired_num_ticks = 3 p.output_backend = "svg" p.title.text_font_size = "14pt" p.xaxis.axis_label_text_font_size = "14pt" p.yaxis.axis_label_text_font_size = "14pt" p.xaxis.major_label_text_font_size = "14pt" p.yaxis.major_label_text_font_size = "14pt" p.xaxis.axis_label_text_font_style = 'normal' p.yaxis.axis_label_text_font_style = 'normal' title = f'{emdbid}_voxel' plots_ = self.save_plots(p, title, imageDirName) data_stats_plots[title] = plots_ except (KeyError, ValueError, IndexError, TypeError) as e: logging.error(e) # Volume estimate vol = None try: vol = map_validation['volume_estimate']['estvolume'] except (KeyError, ValueError, IndexError, TypeError) as e: logging.error(e) try: X = map_validation['volume_estimate']['level'] Y = map_validation['volume_estimate']['volume'] p = figure( title=f"Volume estimate (Estimated volume={vol:.2f} nm³)", x_axis_label = 'Contour level', y_axis_label = 'Volume [nm³]', plot_height=350 ) p.yaxis.ticker.desired_num_ticks = 3 p.output_backend = "svg" p.line(X, Y, color='blue') try: p.line([recl, recl], [0, max(Y)], color='red', legend_label=f'Recommended contour level {recl:.2f}') except (KeyError, ValueError, IndexError, TypeError) as e: logging.error(e) p.line([min(X), max(X)], [vol, vol], color='orange', legend_label=f'Estimated volume {vol:.2f} nm³') title = f'{emdbid}_volume' p.title.text_font_size = "14pt" p.xaxis.axis_label_text_font_size = "14pt" p.yaxis.axis_label_text_font_size = "14pt" p.xaxis.major_label_text_font_size = "14pt" p.yaxis.major_label_text_font_size = "14pt" p.xaxis.axis_label_text_font_style = 'normal' p.yaxis.axis_label_text_font_style = 'normal' plots_ = self.save_plots(p, title, imageDirName) data_stats_plots[title] = plots_ except (KeyError, ValueError, IndexError, TypeError) as e: logging.error(e) # RAPS if 'rotationally_averaged_power_spectrum' in map_validation: try: p = figure( title=f"Rotationally averaged power spectrum", x_axis_label = 'Spatial frequency [Å⁻¹]', y_axis_label = 'Log (I)', plot_height=350 ) p.yaxis.ticker.desired_num_ticks = 3 p.output_backend = "svg" X = map_validation['rotationally_averaged_power_spectrum']['x'] Y = map_validation['rotationally_averaged_power_spectrum']['y'] maxy = np.max(Y) miny = np.min(Y) if 'rawmap_rotationally_averaged_power_spectrum' in map_validation: X_raw = map_validation['rawmap_rotationally_averaged_power_spectrum']['x'] Y_raw = map_validation['rawmap_rotationally_averaged_power_spectrum']['y'] maxy = max(np.max(Y), np.max(Y_raw)) miny = min(np.min(Y), np.min(Y)) p.line(X_raw, Y_raw, color='orange', legend_label='Raw map RAPS') p.line(X, Y, color='blue', legend_label='Primary map RAPS') try: loc = 1 / resolution p.line([loc, loc], [miny, maxy], color='red', legend_label=f'Reported resolution {resolution:.2f}*') except (KeyError, ValueError, IndexError, TypeError) as e: logging.error(e) p.title.text_font_size = "14pt" p.xaxis.axis_label_text_font_size = "14pt" p.yaxis.axis_label_text_font_size = "14pt" p.xaxis.major_label_text_font_size = "14pt" p.yaxis.major_label_text_font_size = "14pt" p.xaxis.axis_label_text_font_style = 'normal' p.yaxis.axis_label_text_font_style = 'normal' title = f'{emdbid}_raps' plots_ = self.save_plots(p, title, imageDirName) data_stats_plots[title] = plots_ except (KeyError, ValueError, IndexError, TypeError) as e: logging.error(e) # FSC if 'fsc' in map_validation: try: p = figure( title=f"FSC", x_axis_label = 'Spatial frequency [Å⁻¹]', y_axis_label = 'Correlation', y_range = (-0.1, 1.1), plot_height=350 ) p.yaxis.ticker.desired_num_ticks = 3 p.output_backend = "svg" if 'load_fsc' in map_validation: X0 = map_validation['load_fsc']['curves']['fscx'] Y0 = map_validation['load_fsc']['curves']['fscy'] p.line(X0, Y0, legend_label='Author provided', color='blue') X = map_validation['fsc']['curves']['level'] Y1 = map_validation['fsc']['curves']['fsc'] p.line(X, Y1, legend_label='Unmasked-calculated FSC', color='orange') Y2 = map_validation['fsc']['curves']['0.143'] p.line(X, Y2, line_dash='dashed', legend_label='0.143', color='green') Y2 = map_validation['fsc']['curves']['0.5'] p.line(X, Y2, line_dash='dashed', legend_label='0.5', color='red') Y3 = map_validation['fsc']['curves']['halfbit'] p.line(X, Y3, line_dash='dashed', legend_label='Half-bit', color='purple') p.line([loc, loc], [miny, maxy], color='black', legend_label=f'Reported resolution {resolution:.2f}*') p.title.text_font_size = "14pt" p.xaxis.axis_label_text_font_size = "14pt" p.yaxis.axis_label_text_font_size = "14pt" p.xaxis.major_label_text_font_size = "14pt" p.yaxis.major_label_text_font_size = "14pt" p.xaxis.axis_label_text_font_style = 'normal' p.yaxis.axis_label_text_font_style = 'normal' title = f'{emdbid}_fsc' plots_ = self.save_plots(p, title, imageDirName) data_stats_plots[title] = plots_ except (KeyError, ValueError, IndexError, TypeError) as e: logging.error(e) # Fit-to-data if self.atomic: mid = int(self.system.get_representative_model().model._id) varoot = Path(self.cache, f"{self.ID_f}_va") vapath = Path(varoot, f"{mid}_{emdbid}") os.makedirs(varoot, exist_ok=True) if Path(vapath).is_dir(): logging.info(f"Found VA for {emdbid} in cache: {vapath}") else: self.run_va(data, mid, vapath) try: plots_ = {} for p in ['x', 'y', 'z']: fname = glob.glob(str(Path(vapath, f'*cif*.map_scaled_{p}surface.jpeg')))[0] b64 = utility.load_img(fname) plots_[p] = b64 fit_plots['map_model'] = plots_ plots_ = {} for p in ['x', 'y', 'z']: fname = glob.glob(str(Path(vapath, f'*cif*.map_scaled_{p}qscoresurface.jpeg')))[0] b64 = utility.load_img(fname) plots_[p.upper()] = b64 fit_plots['model'] = mid fit_plots['map_model_q'] = plots_ fit_plots['q_scale'] = images.QSCORE_SCALE plots_ = {} for p in ['x', 'y', 'z']: fname = glob.glob(str(Path(vapath, f'*cif*.map_scaled_{p}fitsurface.jpeg')))[0] b64 = utility.load_img(fname) plots_[p] = b64 fit_plots['map_model_inclusion'] = plots_ fit_plots['ai_scale'] = images.AI_SCALE fname = glob.glob(str(Path(vapath, f'*.map_all.json')))[0] emdbid_ = self.get_emdb_numerical_code(emdbid) with open(fname, 'r') as f: data_ = json.load(f)[f'EMD-{emdbid_}.map'] fit_stats[mid] = {'ai_score': {}, 'q_score': {}} fit_stats[mid]['ai_score']['chains'] = {} fit_stats[mid]['ai_score']['chains']['All'] = { 'value': data_['atom_inclusion_by_level']['0']['average_ai_model'], 'color': data_['atom_inclusion_by_level']['0']['average_ai_color'], 'numberOfAtoms': data_['atom_inclusion_by_level']['0']['totalNumberOfAtoms'] } fit_stats[mid]['ai_score']['chains'].update(data_['atom_inclusion_by_level']['0']['chainaiscore']) fit_stats[mid]['ai_score']['average'] = data_['atom_inclusion_by_level']['0']['average_ai_model'] p = figure( title=f"Atom inclusion", x_axis_label = 'Contour level', y_axis_label = 'Fraction of atoms inside the map', y_range = (0., 1.1), plot_height=350, ) p.output_backend = "svg" X = data_['atom_inclusion_by_level']['0']['level'] Y1 = data_['atom_inclusion_by_level']['0']['backbone'] Y2 = data_['atom_inclusion_by_level']['0']['all_atom'] ai_recl_backbone = np.round(Y1[np.abs(recl - np.asarray(X)).argmin()] * 100.).astype(int) ai_recl_all = np.round(Y2[np.abs(recl - np.asarray(X)).argmin()] * 100.).astype(int) fit_stats[mid]['ai_score']['ai_recl_backbone'] = ai_recl_backbone fit_stats[mid]['ai_score']['ai_recl_all'] = ai_recl_all p.line(X, Y1, color='blue', legend_label='Backbone atoms') p.line(X, Y2, color='orange', legend_label='All non-hydrogen atoms') p.line([recl, recl], [0, 1.1], color='red', legend_label=f'Recommended contour level {recl:.3f}') p.title.text_font_size = "14pt" p.xaxis.axis_label_text_font_size = "14pt" p.yaxis.axis_label_text_font_size = "14pt" p.xaxis.major_label_text_font_size = "14pt" p.yaxis.major_label_text_font_size = "14pt" p.xaxis.axis_label_text_font_style = 'normal' p.yaxis.axis_label_text_font_style = 'normal' title = f'{mid}_{emdbid}_ai_plot' plots_ = self.save_plots(p, title, imageDirName) fit_plots['ai_plot'] = plots_ fit_stats[mid]['q_score']['chains'] = {} fit_stats[mid]['q_score']['chains']['All'] = { 'value': data_['qscore']['0']['data']['averageqscore'], 'color': data_['qscore']['0']['data']['averageqscore_color'], } fit_stats[mid]['q_score']['chains'].update(data_['qscore']['0']['data']['chainqscore']) fit_stats[mid]['q_score']['average'] = data_['qscore']['0']['data']['averageqscore'] except (FileNotFoundError, OSError, IndexError, KeyError) as e: logging.error(e) fit_stats = {} fit_plots = {} data_stats['specimen_preparation_list'] = map_metadata['structure_determination_list']['structure_determination'][0]['specimen_preparation_list']['specimen_preparation'] data_stats['reconstruction_method'] = self.get_em_reconstruction_method(map_metadata) resolution_estimates, comments = self.get_resolution_estimates(map_metadata, map_validation) data_stats['resolution'] = resolution data_stats['resolution_estimates'] = resolution_estimates if len(resolution_estimates) == 0: data_stats['resolution_estimates'] = None data_stats['resolution_estimates_comments'] = comments data_stats['recommended_level'] = recl if 'rawmap_contour_level' in map_validation: data_stats['rawmap_recommended_level'] = float(map_validation['rawmap_contour_level']['cl']) data_stats['estimated_volume'] = vol out_ = { 'emdbid': emdbid, 'emdbid_': emdbid_, 'data_stats': data_stats, 'data_plots': data_plots, 'data_stats_plots': data_stats_plots, "fit_stats": fit_stats, "fit_plots": fit_plots } return (out_, None, None)
[docs] def validate_all_emdb_data(self, imageDirName='.') -> list: '''perform data quality validation for all crosslinking-MS datasets''' codes = self.get_emdb_ids() outs = [] for code in codes: data = self.get_emdb_data(code) if data is not None: out, _, __ = self.validate_emdb_data(data, imageDirName=imageDirName) if out is not None: outs.append(out) return outs
[docs] @staticmethod def get_chimera_version() -> str: """return chimera version""" version_string = subprocess.check_output(['chimera', '--version', '--nogui']).decode() version = re.search(' (\d+.\d+) ', version_string).groups()[0] return version
[docs] @staticmethod def get_chimerax_version() -> str: """return chimera version""" version_string = subprocess.check_output(['chimerax', '--version', '--nogui']).decode() version = re.search(' (\d+.\d+) ', version_string).groups()[0] return version
[docs] @staticmethod def get_mapq_version() -> str: """return mapq version""" with tempfile.NamedTemporaryFile('w') as f: f.write('from mapq import mapqVersion; print(mapqVersion)') f.flush() version_string = subprocess.check_output(['chimera', '--nogui', '--script', f.name]).decode() version = version_string.strip() return version
[docs] @staticmethod def get_va_version() -> str: """return va version""" version = va.__version__ return version
[docs] def run_va(self, data, mid, outpath): """Run EMDB va validation pipeline for map and model""" # Select representative model mid = int(mid) logging.info(f'Running va from scratch for MODEL {mid}') map_path = data['map_path'] map_metadata = data['map_metadata'] method = self._get_em_reconstrucion_method_va(map_metadata) map_validation = data['map_validation'] resolution = float(map_validation["resolution"]["value"]) recl = float(map_validation['recommended_contour_level']['recl']) fn = self.mmcif_file # tempcif outfn = str(Path(self.cache, f'{self.stem}_temp.cif')) system, encoding = utility.parse_ihm_cif(fn) # Reassign label_asym_id as auth_asym_id # for MolProbity for asym in system.asym_units: asym.auth_seq_id_map = 0 asym._strand_id = asym.id system._check_after_write = _stub with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tempdir: outfn = str(Path(tempdir, f'{self.stem}_temp.cif')) with open(outfn, 'w', encoding=encoding) as f: ihm.dumper.write(f, [system], variant=AtomSiteVariant(mid)) mapfn = str(Path(map_path).name) shutil.copy(map_path, str(Path(tempdir, mapfn))) call = [ # 'xvfb-run', '-a', # Use framebuffer on machines with wayland 'va', '-d', tempdir, '-met', str(method), '-run', 'surface', 'inclusion', 'qscore', '-cl', str(recl), '-s', str(resolution), '-m', Path(mapfn).name, '-f', Path(outfn).name ] try: subprocess.check_call(call, cwd=tempdir, timeout=self.VA_TIMEOUT) except subprocess.TimeoutExpired: logging.error("Could not finish the VA run") shutil.copytree(tempdir, outpath)
[docs] @staticmethod def get_level_from_primary_contour(data: dict) -> float: """Get recommended level for the map""" level = None for contour in data['map']['contour_list']['contour']: if contour['primary']: level = contour['level'] return level
[docs] @staticmethod def get_resolution_estimates(map_data, val_data): estimates = {} comments = {'general': '', 'fsc': ''} resenum = { 'FSC 0.143 CUT-OFF': '0.143', 'FSC 0.5 CUT-OFF': '0.5', 'FSC 1/2 BIT CUT-OFF': 'Half-bit', '0.143': '0.143', '0.5': '0.5', 'halfbit': 'Half-bit' } rresenum = { '0.143': 'FSC 0.143 CUT-OFF', '0.5': 'FSC 0.5 CUT-OFF', 'Half-bit': 'FSC 1/2 BIT CUT-OFF', } try: estimates_ = {} rec = map_data['structure_determination_list']['structure_determination'][0]['image_processing'][0]['final_reconstruction'] res = rec['resolution'] byauthor = res['res_type'] == 'BY AUTHOR' if byauthor: resmethod = rec['resolution_method'] if resmethod == 'OTHER': resenum[resmethod] = 'Other' if resmethod in resenum: resmethod_ = resenum[resmethod] for k, v in resenum.items(): estimates_[v] = '-' estimates_[resmethod_] = f'{float(res["valueOf_"]):.2f}' estimates['Reported by author'] = estimates_ except KeyError: comments['general'] += 'Author provided resolution is not available. ' if 'load_fsc' in val_data: estimates_ = {} for k, v in resenum.items(): estimates_[v] = '-' for k, v in val_data['load_fsc']['intersections'].items(): if k in resenum: estimates_[resenum[k]] = f'{np.round(1 / v["x"], 2):.2f}' estimates['Author-provided FSC curve'] = estimates_ else: comments['general'] += 'Author-provided FSC curve is not available. ' if 'fsc' in val_data: estimates_ = {} for k, v in resenum.items(): estimates_[v] = '-' for k, v in val_data['fsc']['intersections'].items(): if k in resenum: estimates_[resenum[k]] = f'{np.round(1 / v["x"], 2):.2f}' float(res["valueOf_"]) estimates['Unmasked-calculated*'] = estimates_ comments['fsc'] = '*Resolution estimate based on FSC curve calculated by comparison of deposited half-maps. ' for k, v in estimates_.items(): ares = estimates['Reported by author'][k] if v != '-' and ares != '-': if abs(float(ares) - float(v)) > (float(ares) / 10.): comments['fsc'] += f'The value from deposited half-maps intersecting {rresenum[k]} {v} differs from the reported value {ares} by more than 10%. ' return estimates, comments
[docs] @staticmethod def get_em_reconstruction_method(map_metadata): """Get EM reconsruction method from the conrolled vocabulary""" m = utility.NA methods = { 'crystallography': 'CRYSTALLOGRAPHY', 'singleparticle': 'SINGLE PARTICLE', 'tomography': 'TOMOGRAPHY', 'subtomogramaveraging': 'SUBTOMOGRAM AVERAGING', 'helical': 'HELICAL' } m_ = map_metadata['structure_determination_list']['structure_determination'][0]['method'].lower() try: m = methods[m_] except KeyError as e: logging.error(f'Unknown em reconstuction method {e}') return m
@staticmethod def _get_em_reconstrucion_method_va(map_metadata): """Get EM reconstruction methods for the VA run""" m = map_metadata['structure_determination_list']['structure_determination'][0]['method'].lower() return m