Source code for goodvibes.GoodVibes

#!/usr/bin/python
# -*- coding: utf-8 -*-
"""Quasi-harmonic thermochemical corrections for electronic structure calculations."""
import os.path
import sys
from glob import glob
from argparse import ArgumentParser

from .vib_scale_factors import scaling_data_dict, scaling_refs, canonicalize_level
from .io import write_xyz, load_cache, save_cache, qcdata_to_dict, find_spc_file
from .thermo import calc_bbe, get_free_space, FREESPACE_SOLVENTS
from .media import solvents, compute_media_conc, lookup_solvent
from .constants import (
    SUPPORTED_EXTENSIONS, GAS_CONSTANT, ATMOS,
    grimme_mRRHO_ref, grimme_msRRHO_ref, truhlar_ref, head_gordon_ref,
    oniom_scale_ref, gv_banner
)
import logging
from .utils import all_same, setup_logging, fatal, natural_key
from .validation import collect_and_validate_files, check_files, print_check_fails
from .sort import deduplicate, sort_thermo
from .selectivity import (get_boltz, parse_label_args, load_label_yaml,
                            assign_files_to_labels, compute_selectivity,
                            compute_selectivity_scan,
                            compute_selectivity_lowest_only,
                            compute_selectivity_lowest_only_scan)
from .output import (print_results, print_temperature_interval,
                      print_pes_results, print_cpu_time, write_json_results,
                      print_selectivity_results)

log = logging.getLogger('goodvibes')


[docs] def parse_arguments(): """Parse command-line arguments and return (options, args).""" # Get command line inputs. Use -h to list all possible arguments and default values parser = ArgumentParser( prog="goodvibes", description="Compute quasi-harmonic thermochemical corrections from quantum chemistry output files.") state = parser.add_argument_group("Temperature & state") state.add_argument("--temp", dest="temperature", default=298.15, type=float, metavar="TEMP", help="Temperature in Kelvin (default: 298.15)") state.add_argument("--ti", dest="temperature_interval", default=None, metavar="TI", help="Temperature interval as START,END,STEP in Kelvin (e.g. '300,1000,100')") state.add_argument("-c", "--conc", dest="conc", default=None, type=float, metavar="CONC", help="Concentration in mol/L for solution-phase entropy; gas phase (1 atm) if not set") state.add_argument("--media", dest="media", default=None, metavar="solvent", help="Apply standard-state concentration correction for the given solvent " "(e.g. H2O corrects to 55.34 M)") state.add_argument("--freespace", dest="freespace", default=None, type=str, metavar="SOLVENT", help="Apply free-space correction for the given solvent (e.g. H2O, toluene, DMF, " "AcOH, chloroform)") qh = parser.add_argument_group("Quasi-harmonic corrections") qh.add_argument("-q", dest="Q", action="store_true", default=False, help="Apply both quasi-harmonic entropy (Grimme mRRHO) and enthalpy (Head-Gordon) corrections") qh.add_argument("--qh", dest="QH", action="store_true", default=False, help="Apply Head-Gordon quasi-harmonic enthalpy correction") qh.add_argument("--qs", dest="QS", default="grimme", type=str.lower, metavar="QS", choices=('grimme', 'truhlar'), help="Quasi-harmonic entropy method: 'grimme' for mRRHO free-rotor interpolation, " "'truhlar' for frequency raising (default: grimme)") qh.add_argument("-f", "--tau", dest="freq_cutoff", default=100, type=float, metavar="FREQ_CUTOFF", help="Frequency cut-off for both entropy and enthalpy in cm-1 (default: 100)") qh.add_argument("--fh", dest="H_freq_cutoff", default=100.0, type=float, metavar="H_FREQ_CUTOFF", help="Frequency cut-off for enthalpy only in cm-1; overrides -f for H (default: 100)") qh.add_argument("--fs", dest="S_freq_cutoff", default=100.0, type=float, metavar="S_FREQ_CUTOFF", help="Frequency cut-off for entropy only in cm-1; overrides -f for S (default: 100)") qh.add_argument("--bav", dest='inertia', default="global", type=str, choices=['global', 'conf'], help="Moment of inertia for free-rotor entropy: 'global' uses Bav = 10e-44 kg m^2 " "for all molecules, 'conf' computes from rotational constants per file " "(default: global)") freq = parser.add_argument_group("Frequency handling") freq.add_argument("-v", "--vscal", dest="freq_scale_factor", default=None, type=float, metavar="SCALE_FACTOR", help="Vibrational frequency scaling factor for the partition-function frequencies " "(H_vib, S_vib); auto-detected from level of theory via Truhlar's harm_fac if " "not set, falls back to 1.0") freq.add_argument("--zpe-vscal", dest="zpe_scale_factor", default=None, type=float, metavar="ZPE_SCALE_FACTOR", help="Separate scaling factor for the zero-point energy (ZPE); auto-detected from " "level of theory via Truhlar's zpe_fac if not set. If --vscal is set but " "--zpe-vscal is not, ZPE inherits --vscal (back-compat).") freq.add_argument("--vmm", dest="mm_freq_scale_factor", default=None, type=float, metavar="MM_SCALE_FACTOR", help="Frequency scaling factor for the MM region in ONIOM calculations") freq.add_argument("--invert", dest="invert", nargs='?', const=True, default=None, type=float, help="Invert small imaginary frequencies (> -50 cm-1) to positive values; " "optionally provide a custom threshold in cm-1") freq.add_argument("--imag", dest="imag_freq", action="store_true", default=False, help="Print imaginary frequencies for each structure") sym = parser.add_argument_group("Symmetry") sym.add_argument("--symm", dest='symm', action="store_true", default=False, help="Apply external symmetry correction to entropy using point-group detection (pymsym)") sym.add_argument("--pg", dest='pg', action="store_true", default=False, help="Display point group from output file (no entropy correction applied)") sort_grp = parser.add_argument_group("Sorting & deduplication") sort_grp.add_argument("--sort", dest="sort", default=None, nargs='?', const='gibbs', type=str, choices=['energy', 'gibbs'], metavar="KEY", help="Sort output by energy ('energy' for SCF, 'gibbs' for quasi-harmonic G; default when flag used: gibbs)") sort_grp.add_argument("--boltz", dest="boltz", default=None, nargs='?', const='gibbs', type=str, choices=['energy', 'gibbs'], metavar="KEY", help="Print Boltzmann-weighted populations ('energy' for SCF, 'gibbs' for quasi-harmonic G; " "default when flag used: gibbs)") sort_grp.add_argument("--dedup", dest="duplicate", action="store_true", default=False, help="Remove duplicate structures based on energy, rotational constants, and stoichiometry") sort_grp.add_argument("--e_cutoff", dest="e_cutoff", default=0.05, type=float, metavar="KCAL", help="Energy cutoff for duplicate detection in kcal/mol (default: 0.05)") sort_grp.add_argument("--ro_cutoff", dest="ro_cutoff", default=0.01, type=float, metavar="FRAC", help="Rotational constant cutoff for duplicate detection as a fraction (default: 0.01 = 1%%)") sort_grp.add_argument("--rmsd_cutoff", dest="rmsd_cutoff", default=None, type=float, metavar="ANGS", help="RMSD cutoff for duplicate detection in Angstrom (default: off; 0.125 matches CREST)") sel = parser.add_argument_group("Selectivity & PES") sel.add_argument("--label", dest="labels", default=None, action='append', metavar="NAME=PATTERN", help="Repeatable: assign files matching fnmatch PATTERN to species NAME " "for selectivity analysis. Two or more species supported " "(N=2: excess + ΔΔG; N>2: populations only). Example: " "--label R='*P_R_*' --label S='*P_S_*'.") sel.add_argument("--selectivity", dest="selectivity_spec", default=None, metavar="FILE.yaml", help="YAML spec for selectivity analysis. Top-level key 'labels' " "maps species -> fnmatch pattern, or 'files' maps species -> " "explicit list of files. Use instead of --label for many " "species or shareable specs.") sel.add_argument("--ee", dest="ee", default=None, type=str, metavar="patterns", help="DEPRECATED — use --label/--selectivity. Compute 2-species " "selectivity from a colon-delimited glob pair (e.g. '*_R*:*_S*'). " "Will be removed in v5.0.") sel.add_argument("--pes", dest="pes", default=None, metavar="file", help="YAML file defining a reaction pathway for tabulating relative energies") sel.add_argument("--graph", dest='graph', default=None, metavar="file", help="Graph a reaction profile from free energies; provide the PES YAML file") sel.add_argument("--nogconf", dest="gconf", action="store_false", default=True, help="Disable the Gconf correction for multi-conformer ensembles (enabled by default)") sel.add_argument("--lowest-only", dest="lowest_only", action="store_true", default=False, help="PES tables: use only each species' lowest qh-G conformer " "(overrides --nogconf and the default Gconf correction)") inp = parser.add_argument_group("Input") inp.add_argument("--custom_ext", type=str, default='', metavar="exts", help="Additional file extensions to accept, comma-separated " "(e.g. '.qfi,.gaussian'); also settable via GOODVIBES_CUSTOM_EXT env var") inp.add_argument("--exclude", dest="exclude", default=None, type=str, metavar="PATTERN", help="Glob pattern to exclude files from the analysis (e.g. '*_TZ.out')") inp.add_argument("--spc", dest="spc", type=str, default=None, metavar="suffix", help="Single-point correction suffix: reads energy from FILE_SPC.ext " "(e.g. --spc TZ reads from FILE_TZ.log)") inp.add_argument("--import", dest="import_path", default=None, type=str, metavar="PATH", help="Read pre-parsed data from a v1.0 JSON file (or a legacy " "--cache-save envelope) instead of re-parsing the input " "QC outputs. Useful for fast re-analysis at a different " "temperature/concentration without re-reading large outputs.") inp.add_argument("--cache-read", dest="cache_read", default=None, type=str, metavar="FILE", help="(deprecated, use --import instead) Read pre-parsed data from a JSON file.") out = parser.add_argument_group("Output & reports") out.add_argument("--output", dest="output", default="output", metavar="name", help="Base name for the output file, written as GoodVibes_OUTPUT.dat (default: output)") out.add_argument("--dp", dest="dp", default=6, type=int, metavar="DP", help="Number of decimal places for energy values in output (default: 6)") out.add_argument("--json", dest="json_path", default=None, metavar="PATH", help="Write structured results to PATH as JSON (every parsed and computed " "field per file plus the run options). Schema is preview/v0.1.") out.add_argument("--csv", dest="csv_path", default=None, metavar="PATH", help="Write per-file thermochemistry to PATH as CSV (one row per " "structure; columns mirror ThermoResult). Requires pandas; " "install with `pip install goodvibes[full]`.") out.add_argument("--parquet", dest="parquet_path", default=None, metavar="PATH", help="Write per-file thermochemistry to PATH as Parquet (same " "columns as --csv). Requires pandas + a Parquet engine " "(pyarrow); install with `pip install goodvibes[full]`.") out.add_argument("--xyz", dest="xyz", action="store_true", default=False, help="Write optimized Cartesian coordinates to a .xyz file") out.add_argument("--strip-plot", dest="strip_plot_path", default=None, metavar="PATH", help="Save a per-species ΔG strip plot to PATH (PNG/PDF/SVG by " "extension). Requires --label or --selectivity to define " "buckets; matplotlib via `pip install goodvibes[plot]`.") out.add_argument("--pes-plot", dest="pes_plot_path", default=None, metavar="PATH", help="Save a clean reaction-profile diagram to PATH (PNG/PDF/SVG " "by extension). Requires --pes; uses goodvibes.plot.plot_pes " "(simpler than the legacy --graph FILE.yaml, no styling YAML " "needed). matplotlib via `pip install goodvibes[plot]`.") out.add_argument("--export", dest="export_path", default=None, type=str, metavar="PATH", help="Write the unified v1.0 JSON payload to PATH " "(per-file QCData + thermo + run options + selectivity / pes " "blocks). Identical to --json. The same file can be re-loaded " "via --import to skip parsing on a follow-up run.") out.add_argument("--cache-save", dest="cache_save", default=None, type=str, metavar="FILE", help="(deprecated, use --export instead) Save parsed data to a JSON file.") out.add_argument("--cpu", dest="cputime", action="store_true", default=False, help="Print total CPU time from output files") out.add_argument("--check", dest="check", action="store_true", default=False, help="Verify all files use the same program, level of theory, and solvation model; " "also flag potential duplicates") perf = parser.add_argument_group("Performance") perf.add_argument("--jobs", dest="jobs", type=int, default=1, metavar="N", help="Parse and compute thermochemistry in parallel across N " "worker processes (default 1, sequential). 0 = use all " "available CPU cores.") # Parse Arguments (options, args) = parser.parse_known_args() # If requested, turn on head-gordon enthalpy correction if options.Q: options.QH = True # If user has specified different file extensions if options.custom_ext or os.environ.get('GOODVIBES_CUSTOM_EXT', ''): custom_extensions = options.custom_ext.split(',') + os.environ.get('GOODVIBES_CUSTOM_EXT', '').split(',') for ext in custom_extensions: SUPPORTED_EXTENSIONS.add(ext.strip()) # Default value for inverting imaginary frequencies if options.invert is True: options.invert = -50.0 elif options.invert is not None and options.invert > 0: options.invert = -1 * options.invert # Build the command echo from the original CLI invocation. # File-path positional args (often dozens of .log/.out files) are # collapsed to a "<N files>" placeholder so the line stays readable. file_args, flag_args = [], [] for arg in sys.argv[1:]: if os.path.splitext(arg)[1].lower() in SUPPORTED_EXTENSIONS: file_args.append(arg) else: flag_args.append(arg) file_summary = f'<{len(file_args)} file{"" if len(file_args) == 1 else "s"}>' if file_args else '' parts = ['o Requested: goodvibes'] if file_summary: parts.append(file_summary) parts.extend(flag_args) command = ' '.join(parts) print(command) # Collect filenames from the unrecognized positional arguments files = [] for elem in args: if os.path.splitext(elem)[1].lower() not in SUPPORTED_EXTENSIONS: continue for file in glob(elem): if options.spc is not None and options.spc != 'link' and f'_{options.spc}.' in file: continue files.append(file) if options.spc is not None and options.spc != 'link': name, _ = os.path.splitext(file) if find_spc_file(name, options.spc) is None: sys.exit("\nError! No SPC calculation file '{0}_{1}.(log|out)' found! " "The --spc suffix must match the SPC filename exactly " "(e.g. --spc def2_TZVP for filename_def2_TZVP.log), or specify " "link job.\nFor help, use option '-h'\n".format(name, options.spc)) # Exclude files matching the --exclude glob pattern if options.exclude: from fnmatch import fnmatch files = [f for f in files if not fnmatch(f, options.exclude)] # Natural-sort so file order is deterministic across shells (some glob # implementations don't sort) and conf_10 follows conf_9, not conf_1. files.sort(key=natural_key) return options, files
[docs] def resolve_scaling_factor(files, options, level_of_theory): """Look up or validate the vibrational frequency scaling factor. If the user provided --freq_scale_factor, log it. Otherwise, attempt automatic lookup from the Truhlar database based on level of theory. Warns if multiple levels of theory are found. Parameters: files (list): output file paths. options (Namespace): parsed CLI options. Uses: freq_scale_factor, mm_freq_scale_factor, boltz, ee. level_of_theory (list): level of theory strings, one per file. """ if options.freq_scale_factor is not None: if 'ONIOM' not in level_of_theory[0]: log.info(f"\n User-defined vibrational scale factor {options.freq_scale_factor} for {level_of_theory[0]} level of theory") else: log.info(f"\n User-defined vibrational scale factor {options.freq_scale_factor} for QM region of {level_of_theory[0]}") else: # Look for vibrational scaling factor automatically. Truhlar's # database provides separate harm_fac (for partition functions) # and zpe_fac (for ZPE). When auto-detecting, use both; when the # user supplied --vscal explicitly, ZPE inherits it unless they # also passed --zpe-vscal (handled below). if all_same(level_of_theory): level = canonicalize_level(level_of_theory[0]) if level in scaling_data_dict: entry = scaling_data_dict[level] options.freq_scale_factor = entry.harm_fac if options.zpe_scale_factor is None: options.zpe_scale_factor = entry.zpe_fac ref = scaling_refs[entry.harm_ref] log.info("\n\no Found vibrational scaling factors of {:.3f} (harmonic, H/S) and " "{:.3f} (ZPE) for {} level of theory\n {}".format( entry.harm_fac, entry.zpe_fac, level_of_theory[0], ref)) # Warn if different levels of theory are found if not all_same(level_of_theory): print_check_fails(list(level_of_theory), list(files), "levels of theory") # Exit program if a comparison of Boltzmann factors is requested and level of theory is not uniform across all files if not all_same(level_of_theory) and (options.boltz or options.ee is not None): sys.exit("\n\n ✗ FATAL ERROR: Boltzmann factors require all species computed at the same level of theory\n") # Exit program if molecular mechanics scaling factor is given and all files are not ONIOM calculations if options.mm_freq_scale_factor is not None: if all_same(level_of_theory) and 'ONIOM' in level_of_theory[0]: log.info(f"\n\n User-defined vibrational scale factor {options.mm_freq_scale_factor} for MM region of {level_of_theory[0]}") log.info("\n {}".format(oniom_scale_ref)) else: sys.exit("\n Option --vmm is only for use in ONIOM calculation output files.\n " " help use option '-h'\n") if options.freq_scale_factor is None: options.freq_scale_factor = 1.0 # If no scaling factor is found use 1.0
[docs] def warn_orca_prescaled(files): """Warn about ORCA files where pre-scaled frequencies were un-scaled.""" for file in files: if file.endswith('.out'): try: # ORCA can emit non-UTF-8 bytes (paths, internal metadata); # match the io.py parsers and replace bad bytes rather than # crash on UnicodeDecodeError. with open(file, encoding='utf-8', errors='replace') as f: for line in f: if 'Scaling factor for frequencies' in line and 'already applied' in line: try: factor = float(line.split('=')[1].split('(')[0].strip()) except (IndexError, ValueError): factor = None if factor is not None and factor != 1.0: log.info("\n Caution! ORCA applied a frequency scaling factor of {:.6f} in {}. " "Frequencies have been un-scaled to avoid double-scaling.".format( factor, os.path.basename(file))) break except (IOError, OSError): pass
[docs] def validate_and_configure(options, solvation_model): """Validate solvent, print QH/QS configuration, and return (symm_option, vmm_option).""" # Checks to see whether the available free space of a requested solvent is defined if options.freespace is not None: freespace = get_free_space(options.freespace) if freespace != 1000.0: log.info(f"\n Specified solvent {options.freespace}: free volume {freespace / 10.0:.3f} (mol/l) corrects the translational entropy") # Check for implicit solvation if any('smd' in i.lower() or 'cpcm' in i.lower() for i in solvation_model): log.info("\n Caution! Implicit solvation (SMD/CPCM) detected. Enthalpic and entropic terms cannot be " "safely separated. Use them at your own risk!") if options.freq_cutoff != 100.0: options.S_freq_cutoff = options.freq_cutoff options.H_freq_cutoff = options.freq_cutoff # Summary of the quasi-harmonic treatment; print out the relevant reference if options.QS == "grimme": if options.freq_scale_factor == 1.0: log.info(f"\n\n ✔ Using mRRHO entropies with a frequency cut-off value (tau) of {options.S_freq_cutoff} cm-1") qs_ref = grimme_mRRHO_ref else: log.info(f"\n\n ✔ Using msRRHO entropies with a frequency cut-off value (tau) of {options.S_freq_cutoff} cm-1") qs_ref = grimme_msRRHO_ref elif options.QS == "truhlar": log.info(f"\n\n Using an RRHO treatment where low frequencies are adjusted to {options.S_freq_cutoff} cm-1") qs_ref = truhlar_ref else: fatal("\n ✗ FATAL ERROR: Unknown quasi-harmonic model " + options.QS + " specified (QS must = grimme or truhlar).") log.info("\n " + qs_ref) # Check if qh-H correction should be applied if options.QH: log.info(f"\n\n ✔ Enthalpy quasi-harmonic treatment: frequency cut-off value of {options.H_freq_cutoff} wavenumbers will be applied.") log.info("\n QH = Head-Gordon: Using an RRHO treatement with an approximation term for vibrational energy.") qh_ref = head_gordon_ref log.info("\n " + qh_ref + '\n') # Check if entropy symmetry correction should be applied if options.symm: log.info('\n ✔ Point groups and symmetry numbers auto-detected using pymsym: https://github.com/corinwagen/pymsym') else: log.info('\n ✔ Point group symmetry parsed directly from outputs - caution if not provided or incorrect! Use --symm to auto-detect with pymsym') # Whether single-point energies are to be used if options.spc: log.info("\n Combining final single point energy with thermal corrections.") # Solvent correction message if options.media: log.info(f"\n ✔ Applying standard concentration correction (based on density at 20C) to {options.media} solvent.")
def _calc_bbe_worker(args): """Top-level worker for ProcessPoolExecutor (must be picklable, hence not a closure). Takes (file, conc, qcdata, opts_dict) and returns a calc_bbe instance via the v5.0 from_options entry point.""" from .thermo import ThermoOptions file, conc, cached_qcdata, opts = args options = ThermoOptions( QS=opts['QS'], QH=opts['QH'], s_freq_cutoff=opts['S_freq_cutoff'], h_freq_cutoff=opts['H_freq_cutoff'], temperature=opts['temperature'], concentration=conc, freq_scale_factor=opts['freq_scale_factor'], zpe_scale_factor=opts.get('zpe_scale_factor'), solv=opts['freespace'], spc=opts['spc'], invert=opts['invert'], symm=opts['symm'], mm_freq_scale_factor=opts['mm_freq_scale_factor'], inertia=opts['inertia'], ) return calc_bbe.from_options(cached_qcdata if cached_qcdata is not None else file, options)
[docs] def compute_thermochem(files, options, qcdata_cache=None): """Run calc_bbe for each file and collect results. For files matching the --media solvent name, the neat solvent concentration is used instead of options.conc. Parallelized across `options.jobs` worker processes when `>1`; sequential otherwise. Parameters: files (list): output file paths. options (Namespace): parsed CLI options. Uses: QS, QH, S_freq_cutoff, H_freq_cutoff, temperature, conc, freq_scale_factor, freespace, spc, invert, symm, mm_freq_scale_factor, inertia, media, jobs. qcdata_cache (dict, optional): pre-parsed QCData keyed by basename. Returns: dict: file path → calc_bbe mapping (thermo_data). """ # Build per-file argument tuples sequentially (cheap; only the actual # calc_bbe work is parallelized). opts_dict = { 'QS': options.QS, 'QH': options.QH, 'S_freq_cutoff': options.S_freq_cutoff, 'h_freq_cutoff': options.H_freq_cutoff, 'H_freq_cutoff': options.H_freq_cutoff, 'temperature': options.temperature, 'freq_scale_factor': options.freq_scale_factor, 'zpe_scale_factor': getattr(options, 'zpe_scale_factor', None), 'freespace': options.freespace, 'spc': options.spc, 'invert': options.invert, 'symm': options.symm, 'mm_freq_scale_factor': options.mm_freq_scale_factor, 'inertia': options.inertia, } default_conc = options.conc if options.conc else ATMOS / (GAS_CONSTANT * options.temperature) per_file_args = [] for file in files: cached_qcdata = None if qcdata_cache is not None: key = os.path.splitext(os.path.basename(file))[0] cached_qcdata = qcdata_cache.get(key) conc = default_conc if options.media: media_conc = compute_media_conc(options.media, file) if media_conc is not None: conc = media_conc per_file_args.append((file, conc, cached_qcdata, opts_dict)) jobs = getattr(options, 'jobs', 1) or 1 if jobs <= 0: jobs = os.cpu_count() or 1 if jobs == 1 or len(files) <= 1: bbe_vals = [_calc_bbe_worker(args) for args in per_file_args] else: from concurrent.futures import ProcessPoolExecutor with ProcessPoolExecutor(max_workers=jobs) as ex: bbe_vals = list(ex.map(_calc_bbe_worker, per_file_args)) return dict(zip(files, bbe_vals))
[docs] def main(): """CLI entry point: parse arguments, compute thermochemistry, and print results.""" options, files = parse_arguments() # Start logger setup_logging("GoodVibes", options.output) # Print banner log.info(gv_banner) # Fail fast on unknown solvent names — let the user fix the typo before we # parse a thousand files. if options.media is not None: try: lookup_solvent(options.media) except ValueError as exc: fatal(f"\n ✗ FATAL ERROR: {exc}") if options.freespace is not None and options.freespace not in FREESPACE_SOLVENTS: fatal( f"\n ✗ FATAL ERROR: Unknown --freespace solvent {options.freespace!r}. " f"Supported (case-sensitive): {', '.join(FREESPACE_SOLVENTS)}." ) # Selectivity spec: parse --label and --selectivity into a label_patterns # / label_files dict before any heavy work, so typos fail immediately. label_patterns, label_files = None, None if options.labels and options.selectivity_spec: fatal("\n ✗ FATAL ERROR: --label and --selectivity are mutually exclusive.") if options.ee is not None and (options.labels or options.selectivity_spec): fatal("\n ✗ FATAL ERROR: --ee is incompatible with --label / --selectivity. " "Use one selectivity API at a time.") if options.labels: try: label_patterns = parse_label_args(options.labels) except ValueError as exc: fatal(f"\n ✗ FATAL ERROR: {exc}") elif options.selectivity_spec: try: mode, spec = load_label_yaml(options.selectivity_spec) except (ValueError, RuntimeError, FileNotFoundError, OSError) as exc: fatal(f"\n ✗ FATAL ERROR: {exc}") if mode == 'patterns': label_patterns = spec else: label_files = spec # --cache-read is deprecated in v5.0 in favor of --import; redirect with a # one-time warning. --cache-save likewise → --export. if options.cache_read is not None: import warnings warnings.warn( "--cache-read is deprecated; use --import instead. Both accept " "v1.0 JSON payloads and legacy cache envelopes.", DeprecationWarning, stacklevel=2, ) if options.import_path is None: options.import_path = options.cache_read if options.cache_save is not None: import warnings warnings.warn( "--cache-save is deprecated; use --export instead (or --json, " "which is the same writer).", DeprecationWarning, stacklevel=2, ) if options.export_path is None: options.export_path = options.cache_save options.cache_save = None # legacy save_cache block below skipped # --export is just a synonym for --json; if both are set, --json wins so # explicit --json scripts keep working unchanged. if options.export_path is not None and options.json_path is None: options.json_path = options.export_path # Load QCData cache early if requested (needed to determine file list in cache-only mode) qcdata_cache = None cache_only = False if options.import_path: qcdata_cache = load_cache(options.import_path) if not files: # Cache-only mode: derive file list from cache entries cache_only = True files = [key + '.cache' for key in qcdata_cache] # Check if user has specified any files if not files: sys.exit("\n! Specify at least one output files on the command line ¯\\_(ツ)_/¯ \n") # log.info('\n' + command + '\n') if options.temperature_interval is None: log.info(f" Temperature = {options.temperature} Kelvin") # Concentration / pressure if options.conc: log.info(f" Concentration = {options.conc} mol/L") else: log.info(" Pressure = 1 atm") if cache_only: # Cache-only mode: skip file validation (no output files on disk) level_of_theory, solvation_model = [], [] for key, qcdata in qcdata_cache.items(): level_of_theory.append('') # solvation_model may be a string or a list [sorted, display] depending on parser sm = qcdata.solvation_model if isinstance(sm, list): sm = sm[0] solvation_model.append(sm) if options.freq_scale_factor is None: options.freq_scale_factor = 1.0 log.info("\n Reading from QCData cache: " + options.import_path) else: # Collect file data and validate files, level_of_theory, solvation_model = collect_and_validate_files(files, options) # Resolve frequency scaling factor resolve_scaling_factor(files, options, level_of_theory) if options.import_path: log.info("\n Loaded QCData cache from " + options.import_path) # Warn about ORCA files if pre-scaled frequencies were un-scaled warn_orca_prescaled(files) # Validate options, configure and print validate_and_configure(options, solvation_model) # Compute thermochemistry for all files thermo_data = compute_thermochem(files, options, qcdata_cache=qcdata_cache) # Media concentration for display in output (the per-file conc override is handled in compute_thermochem) media_conc = None if options.media and options.media.lower() in solvents: mweight, density = solvents[options.media.lower()] media_conc = (density * 1000) / mweight # Save QCData cache if requested if options.cache_save: cache_dict = {} for file, bbe in thermo_data.items(): key = os.path.splitext(os.path.basename(file))[0] if hasattr(bbe, 'xyz') and bbe.xyz is not None: cache_dict[key] = qcdata_to_dict(bbe.xyz) save_cache(cache_dict, options.cache_save) log.info("\n Saved QCData cache to " + options.cache_save) # Sort output if requested if options.sort: thermo_data = sort_thermo(thermo_data, options.sort) # Deduplicate structures if requested (needed for both standard and PES output) dup_list = deduplicate(thermo_data, e_cutoff=options.e_cutoff, ro_cutoff=options.ro_cutoff, rmsd_cutoff=options.rmsd_cutoff) if options.duplicate else [] # Compute Boltzmann factors once (used by --boltz display and --ee selectivity) boltz_facs = None if options.boltz or options.ee is not None: boltz_key = options.boltz if options.boltz else 'gibbs' boltz_facs = get_boltz(thermo_data, options.temperature, dup_list, key=boltz_key) # Selectivity (new --label / --selectivity API). Computed once for # single-T mode; one entry per temperature in --ti scan mode. Two # methods are computed side-by-side: Boltzmann-averaged across all # conformers in each species, and lowest-conformer-only. selectivity_results = None selectivity_lowest_results = None if label_patterns is not None or label_files is not None: if label_patterns is not None: files_per_label = assign_files_to_labels(list(thermo_data), label_patterns) else: files_per_label = label_files sel_key = options.boltz if options.boltz else 'gibbs' try: if options.temperature_interval is None: selectivity_results = [compute_selectivity( thermo_data, files_per_label, options.temperature, dup_list=dup_list, key=sel_key)] selectivity_lowest_results = [compute_selectivity_lowest_only( thermo_data, files_per_label, options.temperature, dup_list=dup_list, key=sel_key)] else: # Mirror print_temperature_interval's parsing of --ti. ti = [float(x) for x in options.temperature_interval.split(',')] if len(ti) == 2: ti.append((ti[1] - ti[0]) / 10.0) temps = list(range(int(ti[0]), int(ti[1]) + 1, int(ti[2]))) selectivity_results = compute_selectivity_scan( thermo_data, files_per_label, temps, dup_list=dup_list, key=sel_key) selectivity_lowest_results = compute_selectivity_lowest_only_scan( thermo_data, files_per_label, temps, dup_list=dup_list, key=sel_key) except ValueError as exc: fatal(f"\n ✗ FATAL ERROR: {exc}") # Variables populated by temperature-interval mode, used by PES interval_bbe_data, interval, file_list = None, None, None # Standard mode: single temperature if options.temperature_interval is None: print_results(thermo_data, options, media_conc=media_conc, dup_list=dup_list, boltz_facs=boltz_facs) if selectivity_results is not None: print_selectivity_results({ 'Boltzmann-averaged': selectivity_results, 'Lowest conformer only': selectivity_lowest_results, }) # PES: build the v4.2 model once for single-T mode so we can pass it # to both the Rich table renderer and the JSON writer. T-interval mode # still flows through the legacy print_pes_results below. pes_result = None if options.pes and options.temperature_interval is None: from .pes_loader import load_pes pes_result = load_pes(options.pes, thermo_data, temperatures=[options.temperature]) # Structured (JSON) output — v1.0 stable schema. Additive; runs # alongside the .dat output. if options.json_path: # JSON write fires in both single-T and T-interval modes. In # T-interval mode the per-file thermo numbers reflect the base # temperature only; the temperature_interval value is recorded # in `options` for downstream consumers to scan. media_conc_per_file = {} if options.media: for file in thermo_data: mc = compute_media_conc(options.media, file) if mc is not None: media_conc_per_file[file] = mc write_json_results(thermo_data, options, options.json_path, media_conc_per_file=media_conc_per_file, boltz_facs=boltz_facs, selectivity_results=selectivity_results, selectivity_lowest_results=selectivity_lowest_results, pes_result=pes_result) # CSV / Parquet exports (single-T only; one row per structure, # ThermoResult columns). Pandas is in the optional `[full]` extras; # Parquet additionally needs pyarrow. Build the ThermoResult list # once and reuse for both formats. if (options.csv_path or options.parquet_path) and options.temperature_interval is None: from .api import bbe_to_result, to_dataframe, to_parquet try: results = [bbe_to_result(bbe, file) for file, bbe in thermo_data.items()] if options.csv_path: to_dataframe(results).to_csv(options.csv_path, index=False) log.info(f"\n ✔ CSV written to {options.csv_path}") if options.parquet_path: to_parquet(results, options.parquet_path) log.info(f"\n ✔ Parquet written to {options.parquet_path}") except ImportError as exc: fatal(str(exc)) # Selectivity strip plot — needs a SelectivityResult and a # {file: qh_g} lookup. matplotlib via `goodvibes[plot]`. if options.strip_plot_path: if selectivity_results is None or not selectivity_results: fatal("--strip-plot requires --label or --selectivity to define species buckets.") try: from .plot import plot_selectivity_strip except ImportError as exc: fatal(str(exc)) # Use the single-T (or first-T-in-scan) selectivity result. sel = selectivity_results[0] thermo_lookup = {f: bbe.qh_gibbs_free_energy for f, bbe in thermo_data.items()} ax = plot_selectivity_strip(sel, thermo_lookup) ax.figure.savefig(options.strip_plot_path, dpi=200, bbox_inches="tight") log.info(f"\n ✔ Strip plot written to {options.strip_plot_path}\n") # PES profile diagram — clean v5.0 plot driven by PESResult. # Reuses the pes_result that was built earlier for the Rich # tables / JSON in single-T mode. matplotlib via `goodvibes[plot]`. if options.pes_plot_path: if pes_result is None: fatal("--pes-plot requires --pes (a reaction pathway YAML) to define the profile.") try: from .plot import plot_pes except ImportError as exc: fatal(str(exc)) ax = plot_pes(pes_result) ax.figure.savefig(options.pes_plot_path, dpi=200, bbox_inches="tight") log.info(f"\n ✔ PES plot written to {options.pes_plot_path}\n") # Legacy --graph FILE.yaml: the busier matplotlib reaction profile # (bezier connectors, jitter overlays, YAML-driven styling). The # styling YAML can be the same file as --pes — both blocks # (`--- # PES` and `--- # FORMAT`) coexist. Wired here in main() # because v4.2's single-T path doesn't call print_pes_results, # which is where this used to live. if options.graph is not None: try: import matplotlib.pyplot as plt except ImportError: fatal("--graph requires matplotlib. Install with `pip install goodvibes[plot]`.") from .pes import get_pes, graph_reaction_profile graph_data = get_pes(options.graph, thermo_data, options.temperature, options.gconf, options.QH) graph_reaction_profile(graph_data, options, plt) # Perform checks for consistent options if options.check: check_files(thermo_data, options, level_of_theory) # Variable temperature analysis elif options.temperature_interval: print_temperature_interval(thermo_data, options, media_conc=media_conc, qcdata_cache=qcdata_cache) if selectivity_results is not None: print_selectivity_results({ 'Boltzmann-averaged': selectivity_results, 'Lowest conformer only': selectivity_lowest_results, }) # Print CPU usage if requested if options.cputime: print_cpu_time(thermo_data, options.exclude) # Tabulate relative values (PES). New Rich table renderer for the # single-T case; T-interval still uses the legacy text path. if options.pes: if options.temperature_interval is None: from .output import print_pes_tables print_pes_tables(pes_result, options, temperature=options.temperature) else: print_pes_results(thermo_data, options, dup_list, boltz_facs=boltz_facs, interval_bbe_data=interval_bbe_data, interval=interval, file_list=file_list) # Close the log logging.shutdown() # Write xyz file if requested if options.xyz: write_xyz("GoodVibes_"+options.output+".xyz", files, thermo_data) log.info(" ✔ Cartesian coordinates written to GoodVibes_"+options.output+".xyz\n")
if __name__ == "__main__": main()