Source code for ihm_validation.format_checker

#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# format_checker.py - Check residue and atom names in IHMCIF file
#
# Copyright (C) 2025 Arthur Zalevsky <aozalevsky@gmail.com>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <https://www.gnu.org/licenses/>.

"""
Check residue and atom names in IHMCIF file
"""

import sys
import logging
import argparse as ag
import ihm, ihm.reader, ihm.util.make_mmcif

# Non-standard histidine names (protonation states)
HISTIDINES = frozenset(('HIP', 'HID', 'HIE'))

[docs] def parse_ihm_cif(fname, encoding='utf8') -> tuple: try: with open(fname, encoding=encoding) as fh: system, = ihm.reader.read(fh) except UnicodeDecodeError: encoding = 'ascii' with open(fname, encoding=encoding, errors='ignore') as fh: system, = ihm.reader.read(fh) return(system, encoding)
[docs] def check_entities_histidines(system: ihm.System, histidines=HISTIDINES): """Find any non-standard histidine chemical components""" out = [] his = ihm.LPeptideAlphabet()['H'] for e in system.entities: for c in e.sequence: if c.id in histidines: out.append(c.id) if len(out) > 0: raise(ValueError(f"Non-canonical histidine variant found: {', '.join(set(out))}"))
[docs] def check_models(system: ihm.System): """Find any non-standard histidine chemical components""" for state_group in system.state_groups: for state in state_group: for model_group in state: for model in model_group: ihm.util.make_mmcif._check_atom_names(model, check_all=True)
[docs] def check_all_exception(system: ihm.System): """Perform all checks. Throw an exception if a check fails.""" # Disable atom check until python-ihm fixes # checks = [check_entities_histidines, check_models] checks = [check_entities_histidines] for check in checks: check(system)
[docs] def check_all_log(system: ihm.System) -> int: """Perform all checks. Throw a message in the log if a check fails and return a non-zero exit code""" # Disable atom check until python-ihm fixes # checks = [check_entities_histidines, check_models] checks = [check_entities_histidines] exit_code = 0 for check in checks: try: check(system) except ValueError as e: logging.error(e) exit_code = 127 return exit_code
[docs] def check_file_exception(fname: str): """Parse a file, do all checks, throw an exception if a check fails.""" system, encoding = parse_ihm_cif(fname) check_all_exception(system)
[docs] def check_file_log(fname: str) -> int: """Parse a file, do all checks, throw a log message if a check fails and return a non-zero exit code""" system, encoding = parse_ihm_cif(fname) exit_code = check_all_log(system) return exit_code
if __name__ == "__main__": parser = ag.ArgumentParser(description="Check residue and atom names in IHMCIF file") parser.add_argument("-i", "--input_file", help="Path to the input file") args = parser.parse_args() check_file_exception(args.input_file)