"""Programmatic API façade for GoodVibes (v4.2 item 5).
A small import-friendly entry point for notebooks and scripts. Replaces
the 15-positional-arg `calc_bbe` constructor with kwargs that have the
same defaults as the CLI, and returns a structured `ThermoResult` rather
than the raw `calc_bbe` instance.
from goodvibes import compute_thermo, compute_batch
r = compute_thermo("file.log", QH=True, spc="TZ")
print(r.qh_gibbs_free_energy)
rs = compute_batch(glob.glob("*.log"))
Internally just calls `calc_bbe`; no behavior change relative to the CLI.
The underlying `calc_bbe` and `QCData` instances stay accessible via
`result.bbe` / `result.qcdata` for advanced use (e.g. PES analysis,
direct attribute reads not yet promoted to the result dataclass).
"""
from __future__ import annotations
from dataclasses import dataclass
from typing import Any, List, Optional, Sequence
from .constants import ATMOS, GAS_CONSTANT
from .io import parse_qcdata, read_initial
from .thermo import ThermoOptions, calc_bbe
from .utils import display_name
from .vib_scale_factors import canonicalize_level, scaling_data_dict
[docs]
@dataclass(frozen=True)
class ThermoResult:
"""Bundle of thermochemistry for one structure.
All numeric fields are in atomic units (Hartree for energies,
Hartree/K for entropies); frequencies are cm⁻¹. Fields that aren't
available for the given file (e.g. `qh_enthalpy` when `QH=False`,
`sp_energy` when `spc=None`) are reported as `None` rather than
a sentinel.
"""
file: str # absolute or as-passed path
name: str # display name (basename sans ext)
# Energies (Hartree)
scf_energy: Optional[float]
sp_energy: Optional[float]
zpe: Optional[float]
enthalpy: Optional[float]
qh_enthalpy: Optional[float]
# Entropies (Hartree/K)
entropy: Optional[float]
qh_entropy: Optional[float]
# Gibbs energies (Hartree)
gibbs_free_energy: Optional[float]
qh_gibbs_free_energy: Optional[float]
# Frequencies (cm⁻¹)
frequency_wn: Optional[List[float]]
im_frequency_wn: Optional[List[float]]
inverted_freqs: Optional[List[float]]
# Metadata pulled from the source QCData
point_group: Optional[str]
symmno: Optional[int]
linear_mol: bool
multiplicity: Optional[int]
job_type: Optional[str]
level_of_theory: Optional[str]
program: Optional[str]
# Original objects for advanced use (PES analysis, attribute reads
# that haven't been promoted to the result surface yet).
bbe: Any
qcdata: Any
@property
def has_thermo(self) -> bool:
"""True when calc_bbe found enough information to compute G(T).
False for SP-only outputs (no frequency block)."""
return self.gibbs_free_energy is not None
[docs]
def compute_thermo(
path: Optional[str] = None,
*,
qcdata: Any = None,
QS: str = "grimme",
QH: bool = False,
s_freq_cutoff: float = 100.0,
h_freq_cutoff: float = 100.0,
temperature: float = 298.15,
concentration: Optional[float] = None,
freq_scale_factor: Optional[float] = None,
zpe_scale_factor: Optional[float] = None,
solv: Optional[str] = None,
spc: Optional[str] = None,
invert: Optional[float] = None,
symm: bool = False,
mm_freq_scale_factor: Optional[float] = None,
inertia: str = "global",
) -> ThermoResult:
"""Compute thermochemistry for one QC output file.
Pass *either* `path` (a filesystem location) or a pre-parsed
`qcdata` (skips re-parsing). When `concentration` is None the
gas-phase reference at 1 atm is used (`P / RT`).
Parameters mirror the CLI:
QS 'grimme' (default) or 'truhlar' quasi-harmonic entropy
QH apply Head-Gordon quasi-harmonic enthalpy correction
s_freq_cutoff entropy cutoff (cm⁻¹)
h_freq_cutoff enthalpy cutoff (cm⁻¹) — only used when QH=True
temperature K
concentration mol/L. None → gas phase 1 atm.
freq_scale_factor None → auto-lookup harm_fac from level of theory.
Applied to the partition-function frequencies
(used in H_vib and S_vib).
zpe_scale_factor None → auto-lookup zpe_fac from level of theory.
Applied to ZPE only. If `freq_scale_factor` is
explicitly set but `zpe_scale_factor` is None,
ZPE inherits `freq_scale_factor` (back-compat).
solv 'none', or solvent name for free-space correction
spc None, 'link', or filename suffix for SPC files
invert None, or threshold for converting small imag → real
symm apply pymsym symmetry-number correction
"""
if path is None and qcdata is None:
raise ValueError("compute_thermo requires either `path` or `qcdata`")
if concentration is None:
concentration = ATMOS / (GAS_CONSTANT * temperature)
options = ThermoOptions(
QS=QS, QH=QH,
s_freq_cutoff=s_freq_cutoff, h_freq_cutoff=h_freq_cutoff,
temperature=temperature, concentration=concentration,
freq_scale_factor=freq_scale_factor,
zpe_scale_factor=zpe_scale_factor,
solv=solv, spc=spc, invert=invert,
symm=symm, mm_freq_scale_factor=mm_freq_scale_factor,
inertia=inertia,
)
# from_options does the Truhlar-DB auto-lookup when freq/zpe scale
# factors are None; we just need the level-of-theory string for the
# result's `level_of_theory` field.
lot = None
if path is not None:
try:
scanned = read_initial(path)[0]
if scanned and scanned != "none":
lot = scanned
except (IOError, OSError):
pass
bbe = calc_bbe.from_options(qcdata if qcdata is not None else path, options)
return bbe_to_result(bbe, path, level_of_theory=lot)
[docs]
def compute_batch(
paths: Sequence[str],
*,
jobs: int = 1,
**kwargs: Any,
) -> List[ThermoResult]:
"""Compute thermochemistry for a list of files.
Parameters:
paths: list of QC output file paths.
jobs: parallelism level. ``1`` (default) is sequential — no
process-pool overhead. ``> 1`` spawns that many worker
processes via ``concurrent.futures.ProcessPoolExecutor``.
``0`` or negative uses ``os.cpu_count()`` (or 1 if unknown).
**kwargs: forwarded unchanged to every ``compute_thermo`` call.
Returns:
Results in input order (matches ``executor.map``'s contract).
Workers can't write to the orchestrator's logger, so per-file
``log.info`` from inside ``calc_bbe`` is silenced under
``jobs > 1``; warnings that surface through ``ThermoResult``
(e.g. missing-frequency files) still come through unchanged.
"""
if not paths:
return []
if jobs <= 0:
import os
jobs = os.cpu_count() or 1
if jobs == 1 or len(paths) == 1:
return [compute_thermo(p, **kwargs) for p in paths]
from concurrent.futures import ProcessPoolExecutor
from functools import partial
fn = partial(compute_thermo, **kwargs)
with ProcessPoolExecutor(max_workers=jobs) as ex:
return list(ex.map(fn, paths))
[docs]
def to_dataframe(results: Sequence[ThermoResult]):
"""Convert a list of ThermoResults into a pandas DataFrame.
Pandas is an optional dependency; raises ImportError with an install
hint if it isn't available. The DataFrame has one row per result
and columns for every public scalar field on `ThermoResult`
(omits the `bbe` and `qcdata` references).
"""
try:
import pandas as pd
except ImportError as exc: # pragma: no cover
raise ImportError(
"to_dataframe requires pandas; install with `pip install pandas`."
) from exc
skip = {"bbe", "qcdata", "frequency_wn", "im_frequency_wn", "inverted_freqs"}
rows = []
for r in results:
rows.append({
f.name: getattr(r, f.name)
for f in r.__dataclass_fields__.values()
if f.name not in skip
})
return pd.DataFrame(rows)
[docs]
def to_parquet(results: Sequence[ThermoResult], path: str) -> None:
"""Write a list of ThermoResults to a Parquet file at `path`.
Same column set as `to_dataframe`. Requires pandas + a Parquet
engine (pyarrow or fastparquet); install with
`pip install goodvibes[full]` or `pip install pyarrow`.
"""
df = to_dataframe(results)
try:
df.to_parquet(path, index=False)
except ImportError as exc: # pragma: no cover
raise ImportError(
"to_parquet requires a Parquet engine. Install pyarrow "
"(`pip install pyarrow`) or fastparquet, or use "
"`pip install goodvibes[full]`."
) from exc
# ---------------------------------------------------------------------------
# helpers
# ---------------------------------------------------------------------------
[docs]
def bbe_to_result(
bbe: Any,
path: Optional[str] = None,
*,
level_of_theory: Optional[str] = None,
) -> ThermoResult:
"""Project a `calc_bbe` instance into a `ThermoResult`.
Useful for adapting CLI internals (which keep `thermo_data` as a
`{path: calc_bbe}` dict) to the structured API without re-parsing.
`level_of_theory` is read from the file via `read_initial()` if not
supplied; pass it explicitly to avoid the extra file scan.
"""
qc = getattr(bbe, "xyz", None)
file = path if path is not None else getattr(qc, "file", "<unknown>")
# Read level_of_theory from the file if it wasn't supplied (lookup
# was already done at compute_thermo entry when freq_scale_factor
# was None; for the explicit-factor path we still want the field
# populated on the result for display).
if level_of_theory is None and path is not None:
try:
lot = read_initial(path)[0]
if lot and lot != "none":
level_of_theory = lot
except (IOError, OSError):
pass
sp = getattr(bbe, "sp_energy", None)
if sp == "!":
sp = None
# calc_bbe leaves qh_enthalpy at 0.0 when CLI --QH is False; surface
# that as None (the user can still read .bbe.qh_enthalpy directly).
qh_h = getattr(bbe, "qh_enthalpy", None)
if qh_h == 0.0:
qh_h = None
return ThermoResult(
file=file,
name=display_name(file),
scf_energy=getattr(bbe, "scf_energy", None),
sp_energy=sp,
zpe=getattr(bbe, "zpe", None),
enthalpy=getattr(bbe, "enthalpy", None),
qh_enthalpy=qh_h,
entropy=getattr(bbe, "entropy", None),
qh_entropy=getattr(bbe, "qh_entropy", None),
gibbs_free_energy=getattr(bbe, "gibbs_free_energy", None),
qh_gibbs_free_energy=getattr(bbe, "qh_gibbs_free_energy", None),
frequency_wn=getattr(bbe, "frequency_wn", None) or None,
im_frequency_wn=getattr(bbe, "im_frequency_wn", None) or None,
inverted_freqs=getattr(bbe, "inverted_freqs", None) or None,
point_group=getattr(qc, "point_group", None) if qc else None,
symmno=getattr(qc, "symmno", None) if qc else None,
linear_mol=bool(getattr(qc, "linear_mol", False)) if qc else False,
multiplicity=getattr(qc, "multiplicity", None) if qc else None,
job_type=getattr(qc, "job_type", None) if qc else None,
level_of_theory=level_of_theory,
program=getattr(qc, "program", None) if qc else None,
bbe=bbe,
qcdata=qc,
)