"""PES data model.
Pure data + arithmetic; no I/O, no parsing, no global state. The model
is what the legacy parser and the new YAML parser produce, and what
the output layer (Rich tables, JSON) consumes.
Three layers:
ConformerSet one species, ≥1 conformers (calc_bbe instances)
Point stoichiometric sum of species at one node of a path
Pathway ordered list of points + a designated zero
PESResult container of pathways + options + temperatures
Arithmetic on ThermoVector collapses the legacy nine-parallel-list
pattern into one expression: rel = point.thermo(T) - zero.thermo(T).
"""
from __future__ import annotations
import math
import re
from dataclasses import dataclass, field
from typing import Any, List, Optional, Tuple
GAS_CONSTANT = 8.3144621 # J / K / mol
J_TO_AU = 4.184 * 627.509541 * 1000.0
KCAL_TO_AU = 627.509541
# ---------------------------------------------------------------------------
# ThermoVector
# ---------------------------------------------------------------------------
[docs]
@dataclass(frozen=True)
class ThermoVector:
"""Bundle of thermo quantities for one species at one temperature.
Energy fields are in Hartree; entropy fields are in Hartree/K (matching
calc_bbe). sp_energy is None when --spc was not used; arithmetic
propagates None (if either operand has sp_energy=None, the result does).
"""
scf_energy: float
zpe: float
enthalpy: float
qh_enthalpy: float
entropy: float
qh_entropy: float
gibbs: float
qh_gibbs: float
sp_energy: Optional[float] = None
def __add__(self, other: "ThermoVector") -> "ThermoVector":
if not isinstance(other, ThermoVector):
return NotImplemented
return ThermoVector(
scf_energy=self.scf_energy + other.scf_energy,
zpe=self.zpe + other.zpe,
enthalpy=self.enthalpy + other.enthalpy,
qh_enthalpy=self.qh_enthalpy + other.qh_enthalpy,
entropy=self.entropy + other.entropy,
qh_entropy=self.qh_entropy + other.qh_entropy,
gibbs=self.gibbs + other.gibbs,
qh_gibbs=self.qh_gibbs + other.qh_gibbs,
sp_energy=_add_opt(self.sp_energy, other.sp_energy),
)
def __sub__(self, other: "ThermoVector") -> "ThermoVector":
if not isinstance(other, ThermoVector):
return NotImplemented
return ThermoVector(
scf_energy=self.scf_energy - other.scf_energy,
zpe=self.zpe - other.zpe,
enthalpy=self.enthalpy - other.enthalpy,
qh_enthalpy=self.qh_enthalpy - other.qh_enthalpy,
entropy=self.entropy - other.entropy,
qh_entropy=self.qh_entropy - other.qh_entropy,
gibbs=self.gibbs - other.gibbs,
qh_gibbs=self.qh_gibbs - other.qh_gibbs,
sp_energy=_sub_opt(self.sp_energy, other.sp_energy),
)
def __mul__(self, k) -> "ThermoVector":
if not isinstance(k, (int, float)):
return NotImplemented
return ThermoVector(
scf_energy=self.scf_energy * k,
zpe=self.zpe * k,
enthalpy=self.enthalpy * k,
qh_enthalpy=self.qh_enthalpy * k,
entropy=self.entropy * k,
qh_entropy=self.qh_entropy * k,
gibbs=self.gibbs * k,
qh_gibbs=self.qh_gibbs * k,
sp_energy=self.sp_energy * k if self.sp_energy is not None else None,
)
__rmul__ = __mul__
[docs]
@classmethod
def zero(cls, with_sp: bool = False) -> "ThermoVector":
"""Identity element for addition."""
return cls(
scf_energy=0.0, zpe=0.0, enthalpy=0.0, qh_enthalpy=0.0,
entropy=0.0, qh_entropy=0.0, gibbs=0.0, qh_gibbs=0.0,
sp_energy=0.0 if with_sp else None,
)
def _add_opt(a: Optional[float], b: Optional[float]) -> Optional[float]:
if a is None or b is None:
return None
return a + b
def _sub_opt(a: Optional[float], b: Optional[float]) -> Optional[float]:
if a is None or b is None:
return None
return a - b
def _bbe_to_vector(bbe: Any) -> ThermoVector:
"""Project a calc_bbe instance into a ThermoVector.
Treats sp_energy=='!' (the sentinel for "SPC requested but missing")
as None — caller must decide whether that's an error.
`calc_bbe` leaves `qh_enthalpy` at 0.0 unless the CLI `--QH` flag was
set; in that case `qh_gibbs_free_energy` is computed from the plain
`enthalpy`. Mirror that fallback here so arithmetic on the model
produces correct relatives for both QH on and off runs.
"""
sp = getattr(bbe, "sp_energy", None)
if sp == "!":
sp = None
qh_h = bbe.qh_enthalpy if bbe.qh_enthalpy else bbe.enthalpy
return ThermoVector(
scf_energy=bbe.scf_energy,
zpe=bbe.zpe,
enthalpy=bbe.enthalpy,
qh_enthalpy=qh_h,
entropy=bbe.entropy,
qh_entropy=bbe.qh_entropy,
gibbs=bbe.gibbs_free_energy,
qh_gibbs=bbe.qh_gibbs_free_energy,
sp_energy=sp,
)
# ---------------------------------------------------------------------------
# ConformerSet
# ---------------------------------------------------------------------------
# ---------------------------------------------------------------------------
# Point — stoichiometric sum of species
# ---------------------------------------------------------------------------
# Matches "2*A", "2 * A", or "A". Coefficient is optional; default 1.
_TERM_RE = re.compile(r"""
^\s*
(?:(\d+)\s*\*\s*)? # optional integer coefficient
(.+?) # species name (lazy, trims trailing ws)
\s*$
""", re.VERBOSE)
[docs]
def parse_point_label(label: str) -> List[Tuple[int, str]]:
"""Parse a point label like "2*A + B + C" into [(2,'A'), (1,'B'), (1,'C')].
Whitespace-tolerant. Coefficients must be positive integers. Returns the
terms in source order; duplicate species are allowed (they'll be summed).
"""
if not label or not label.strip():
raise ValueError("empty point label")
terms: List[Tuple[int, str]] = []
for chunk in label.split('+'):
m = _TERM_RE.match(chunk)
if m is None:
raise ValueError(f"could not parse term {chunk!r} in point {label!r}")
coeff_str, name = m.group(1), m.group(2).strip()
if not name:
raise ValueError(f"empty species name in point {label!r}")
coeff = int(coeff_str) if coeff_str else 1
if coeff < 1:
raise ValueError(
f"non-positive coefficient {coeff} in point {label!r}"
)
terms.append((coeff, name))
return terms
[docs]
@dataclass
class Point:
"""One node of a pathway: a stoichiometric sum of ConformerSets."""
label: str # original string, e.g. "2*A + B"
species: List[Tuple[int, ConformerSet]] # parsed: [(coeff, set), ...]
[docs]
def thermo(
self,
T: float,
gconf: bool = True,
QH: bool = True,
lowest_only: bool = False,
) -> ThermoVector:
"""Total thermo at this point: Σ coeff_i × ConformerSet_i.thermo(T).
Per-species rollup precedence:
lowest_only=True → the species' lowest qh-G conformer only
else gconf=True → lowest + Boltzmann adjustment + mixing entropy
else → pure Boltzmann-weighted average
"""
total: Optional[ThermoVector] = None
for coeff, cset in self.species:
if lowest_only:
term = cset.lowest_conformer() * coeff
elif gconf:
term = cset.gconf_corrected(T, QH=QH) * coeff
else:
term = cset.boltzmann_weighted(T) * coeff
total = term if total is None else total + term
if total is None:
raise ValueError(f"Point {self.label!r} has no species")
return total
[docs]
@classmethod
def from_label(cls, label: str, species_map: dict) -> "Point":
"""Build a Point from a label string and {name: ConformerSet} map."""
terms = parse_point_label(label)
resolved: List[Tuple[int, ConformerSet]] = []
missing = [name for _, name in terms if name not in species_map]
if missing:
raise KeyError(
f"unknown species in point {label!r}: {missing} "
f"(available: {sorted(species_map)})"
)
for coeff, name in terms:
resolved.append((coeff, species_map[name]))
return cls(label=label, species=resolved)
# ---------------------------------------------------------------------------
# Pathway / PESResult
# ---------------------------------------------------------------------------
[docs]
@dataclass
class Pathway:
"""Ordered points + a designated zero (defaults to points[0])."""
name: str
points: List[Point]
zero: Point = field(default=None) # type: ignore[assignment]
def __post_init__(self):
if not self.points:
raise ValueError(f"Pathway {self.name!r} has no points")
if self.zero is None:
self.zero = self.points[0]
[docs]
def relative(
self, T: float, gconf: bool = True, QH: bool = True, lowest_only: bool = False,
) -> List[ThermoVector]:
"""Per-point ΔThermo relative to self.zero, in Hartree."""
kw = dict(gconf=gconf, QH=QH, lowest_only=lowest_only)
zero = self.zero.thermo(T, **kw)
return [p.thermo(T, **kw) - zero for p in self.points]
[docs]
@dataclass
class PESOptions:
units: str = "kcal/mol" # 'kcal/mol' or 'kJ/mol'
decimals: int = 2
gconf: bool = True
QH: bool = True
spc_used: bool = False # True when --spc was set
lowest_only: bool = False # True when --lowest-only overrides gconf/Boltzmann
[docs]
def to_user_units(self, hartree: Optional[float]) -> Optional[float]:
if hartree is None:
return None
if self.units == "kJ/mol":
return J_TO_AU / 1000.0 * hartree
return KCAL_TO_AU * hartree # default kcal/mol
[docs]
@dataclass
class PESResult:
"""Top-level container: pathways + options + temperatures."""
pathways: List[Pathway]
options: PESOptions
temperatures: List[float] = field(default_factory=lambda: [298.15])