Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions ChangeLog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,13 @@ Known issues
Change history
==============

2026-05-21 R2.2.0
-----------------

Modified:

* Use lark for better error reporting from the formula parser

2026-02-27 R2.1.0
-----------------

Expand Down
7 changes: 6 additions & 1 deletion doc/sphinx/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
sys.path.insert(0, os.path.abspath('../..'))
sys.path.insert(0, os.path.abspath('_extensions'))
import periodictable
from periodictable.lark_parse import grammar


# -- General configuration -----------------------------------------------------
Expand Down Expand Up @@ -65,6 +66,11 @@
('py:class', 'numpy._typing._array_like._ScalarT'),
('py:class', 'numpy._typing._nested_sequence._NestedSequence'),
('py:class', 'pyparsing.core.ParserElement'),
('py:class', 'lark.tree.Tree'),
('py:class', 'lark.lexer.Token'),
('py:class', 'lark.visitors.Transformer'),
('py:class', 'lark.visitors._Leaf_T'),
('py:class', 'lark.visitors._Return_T'),

('py:class', 'periodictable.core._AtomBase'),
('py:class', 'periodictable.core.IonSet'),
Expand Down Expand Up @@ -300,4 +306,3 @@
if os.path.exists('rst_prolog'):
with io.open('rst_prolog', encoding='utf-8') as fid:
rst_prolog = fid.read()

1 change: 1 addition & 0 deletions doc/sphinx/genmods.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ def genfiles(package, package_name, modules, dir='api'):
#('__init__', 'Top level namespace'),
('core', 'Core table'),
('formulas', 'Chemical formula operations'),
('lark_parse', 'Chemical formula parser'),
('covalent_radius', 'Covalent radius'),
('constants', 'Fundamental constants'),
('crystal_structure', 'Crystal structure'),
Expand Down
106 changes: 77 additions & 29 deletions doc/sphinx/guide/formula_grammar.rst
Original file line number Diff line number Diff line change
Expand Up @@ -159,28 +159,68 @@ The grammar used for parsing formula strings is the following:

::

formula :: compound | mixture | nothing
mixture :: quantity | percentage
quantity :: number unit part ('//' number unit part)*
percentage :: number 'wt%|vol%' part ('//' number '%' part)* '//' part
part :: compound | '(' mixture ')'
compound :: (composite | fasta) density?
fasta :: ('dna' | 'rna' | 'aa') ':' [A-Z -*]+
composite :: group (separator group)*
group :: number element+ | '(' formula ')' number
element :: symbol isotope? ion? number?
symbol :: [A-Z][a-z]*
isotope :: '[' integer ']'
ion :: '{' integer? [+-] '}'
density :: '@' number [ni]?
number :: integer | fraction
integer :: [1-9][0-9]*
fraction :: ([1-9][0-9]* | 0)? '.' [0-9]*
separator :: space? '+'? space?
unit :: mass | volume | length
mass :: 'kg' | 'g' | 'mg' | 'ug' | 'ng'
volume :: 'L' | 'mL' | 'uL' | 'nL'
length :: 'cm' | 'mm' | 'um' | 'nm'
# formula: composite @ density | str:sequence @ density | mixture
formula : compound | mixture
compound : (composite | fasta) [density]
# Density applies to the entire composite, such as "NaCl + 29.2H2O @ 1.07n"
# For the density of a mixture you need parentheses: "(10 wt% NaCl // H2O)@1.07n"

# Activation only cares about total mass, so you can freely mix masses and volumes if
# you have the density for each component. For scattering you need the density of the
# mixture. When this is different from the mixture of densities use (mixture)@density.
# For thin film samples, allow stacking of layers with the thickness of each layer.
# With density for each layer the relative quantities of each element in the stack can
# be calculated. Convert to mass by multiplying density by thickness (cm) and area (cm²).

# mixture: quantity compound // quantity compound // ...
mixture : byamount | byvolume | byweight | layers
byamount : quantity compound (MIX quantity compound)*
byvolume : volumepct compound (MIX percentage compound)* MIX compound
byweight : weightpct compound (MIX percentage compound)* MIX compound
layers : thickness compound (MIX thickness compound)*
quantity : NUMBER SPACE? (MASS | VOLUME) SPACE
weightpct : NUMBER SPACE? WEIGHTPCT SPACE
volumepct : NUMBER SPACE? VOLUMEPCT SPACE
thickness : NUMBER SPACE? LENGTH SPACE
percentage : NUMBER SPACE? "%" SPACE # Allows "3 % "
MIX : SPACE? "//" SPACE?
WEIGHTPCT : /%w((eigh)?t)?/ | /w((eigh)?t)?%/ | /%m(ass)?/ | /m(ass)?%/
VOLUMEPCT : /%v(ol(ume)?)?/ | /v(ol(ume)?)?%/
MASS : "kg" | "g" | "mg" | "ug" | "μg" | "ng"
VOLUME : "L" | "mL" | "uL" | "μL" | "nL"
LENGTH : "cm" | "mm" | "um" | "μm" | "nm" | "Ang" | "Å"

# FASTA sequence: (rna|dna|aa):SEQUENCE @ density
fasta : FASTA ":" SEQUENCE
FASTA : /[a-z]+/ # str:sequence reports better errors than /dna|rna|aa/:sequence
SEQUENCE : /[-A-Z *]+/

# composite: number group number group ... @density
# group: El count El count ...
composite : [NUMBER] group (SEPARATOR [NUMBER] group)*
group : ((atom | isoatom | "(" formula ")") [COUNT])+
atom : SYMBOL [isotope] [valence]
isoatom : SUPERINT SYMBOL [valence] # For example ²H for deuterium
isotope : "[" INTEGER "]"
valence : "{" [INTEGER] CHARGE "}" | [SUPERINT] SUPERCHARGE
density : SPACE? "@" SPACE? DENSITY [DENSITYMODE]
# could list all elements, but better error reporting if element symbol lookup fails
SYMBOL : /[A-Z][a-z]*/
CHARGE : /[+]+|[-]+/ # allow valence using {++} or {--}
SUPERCHARGE: /\u207A+|\u207B+/ # unicode valence such as Ca⁺⁺ and O²⁻
DENSITY : NUMBER # using alias DENSITY for number for better error reporting
DENSITYMODE: /[ni]/ # n=natural density, i=isotopic density
COUNT : NUMBER | SUBNUM # atom counts can be normal numbers or unicode subscripts
SEPARATOR : SPACE? /[+•·]/ SPACE? | SPACE # For example, CaCO₃·6H₂O

SPACE : /[ \\t\\n\\r]+/
NUMBER : INTEGER | FRACTION
INTEGER : /[1-9][0-9]*/
FRACTION : /([1-9][0-9]*|0)?[.][0-9]*/ # allow all floats?
SUBNUM : SUBINT | SUBFRAC
SUBINT : /(\u2080|[\u2081-\u2089][\u2080-\u2089]*)/
SUBFRAC : /(\u2080|[\u2081-\u2089][\u2080-\u2089]*|)([.][\u2080-\u2089]*)/
SUPERINT : /(\u2070|[\u00B9\u00B2\u00B3\u2074-\u2079][\u2070\u00B9\u00B2\u00B3\u2074-\u2079]*)/

Formulas can also be constructed from atoms or other formulas:

Expand Down Expand Up @@ -259,18 +299,26 @@ following is a 2:1 mixture of water and heavy water:
>>> H2O = formula('H2O',natural_density=1)
>>> D2O = formula('D2O',natural_density=1)
>>> mix = mix_by_volume(H2O,2,D2O,1)
>>> print(f"{mix} {mix.density:.4g}")
(H2O)2D2O 1.037
>>> print(f"{mix} @ {mix.density:.4g}")
(H2O)2D2O @ 1.037

Note that this is different from a 2:1 mixture by weight:
This is different from a 2:1 mixture by weight:

>>> mix = mix_by_weight(H2O,2,D2O,1)
>>> print(f"{mix} {mix.density:.4g}")
(H2O)2.22339D2O 1.035
>>> print(f"{mix} @ {mix.density:.4g}")
(H2O)2.22339D2O @ 1.035

Except in the simplest of cases, the density of the mixture cannot be
computed from the densities of the components, and the resulting density
should be set explicitly.
computed from the densities of the components. Even when the component
density is known the resulting density should be set explicitly:

>>> mix = mix_by_weight("NaCl@2.17", 0.1, "H2O@1", 0.9)
>>> print(f"{mix} @ {mix.density:.4g}")
NaCl(H2O)29.1956 @ 1.057
>>> mix = mix_by_weight("NaCl@2.17", 0.1, "H2O@1", 0.9, density=1.07)
>>> print(f"{mix} @ {mix.density:.4g}")
NaCl(H2O)29.1956 @ 1.07


Derived values
--------------
Expand Down
6 changes: 3 additions & 3 deletions periodictable/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -398,9 +398,9 @@ def __init__(self, element_or_isotope: Element|Isotope):
def __getitem__(self, charge: int) -> Ion:
if charge not in self.ionset:
if charge not in self.element_or_isotope.ions:
raise ValueError("%(charge)d is not a valid charge for %(symbol)s"
% dict(charge=charge,
symbol=self.element_or_isotope.symbol))
valence = f"{abs(charge)}{'-' if charge < 0 else '+'}"
symbol = self.element_or_isotope.symbol
raise ValueError(f"valence {valence} is not valid for {symbol}")
self.ionset[charge] = Ion(self.element_or_isotope, charge)
return self.ionset[charge]

Expand Down
8 changes: 4 additions & 4 deletions periodictable/fasta.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@
from collections.abc import Iterator
from typing import IO, cast

from .formulas import formula as parse_formula, Formula, FormulaInput
from .formulas import formula as make_formula, Formula, FormulaInput
from .nsf import neutron_sld
from .xsf import xray_sld
from .core import default_table, Atom
Expand Down Expand Up @@ -177,7 +177,7 @@ def __init__(
elements = default_table()

# Fill in density or cell_volume.
M = parse_formula(formula, natural_density=density)
M = make_formula(formula, natural_density=density)
# CRUFT: use of T rather than H[1] is deprecated since 1.5.3
if elements.T in M.atoms:
warnings.warn("Use of tritium for labile hydrogen is deprecated."
Expand Down Expand Up @@ -274,7 +274,7 @@ def __init__(self, name: str, sequence: str, type: str='aa'):
structure.extend(list(p.labile_formula.structure))
# Add H + OH terminators to the sequence
structure.extend(((2, elements.H[1]), (1, elements.O)))
formula = parse_formula(structure).hill
formula = make_formula(structure).hill

Molecule.__init__(
self, name, formula, cell_volume=cell_volume, charge=charge)
Expand Down Expand Up @@ -356,7 +356,7 @@ def _code_average(bases, code_table) -> tuple[Formula, float, float]:
Note: averaging can lead to a fractional charge on the returned molecule.
"""
n = len(bases)
formula, cell_volume, charge = parse_formula(), 0., 0.
formula, cell_volume, charge = make_formula(), 0., 0.
for c in bases:
base = code_table[c]
formula += base.labile_formula
Expand Down
Loading
Loading