Source code for bsym.interface.pymatgen

from __future__ import annotations

from typing import Callable, Any
from pymatgen.symmetry.analyzer import SpacegroupAnalyzer, PointGroupAnalyzer
from pymatgen.util.coord import coord_list_mapping_pbc, coord_list_mapping
from pymatgen.core.structure import Molecule, Structure
from pymatgen.core.operations import SymmOp

from bsym import SpaceGroup, SymmetryOperation, ConfigurationSpace, PointGroup
from bsym.configuration import load_configurations, save_configurations
from functools import partial
import numpy as np


[docs] def structure_cartesian_coordinates_mapping( structure: Structure, symmop: SymmOp ) -> np.ndarray: """ Maps the coordinates of pymatgen ``Structure`` according to a ``SymmOp`` symmetry operation. Args: structure: The pymatgen ``Structure``. symmop: The pymatgen symmetry operation object. Returns: The mapped Cartesian coordinates. """ return structure.lattice.get_cartesian_coords(symmop.operate_multi(structure.frac_coords))
[docs] def molecule_cartesian_coordinates_mapping( molecule: Molecule, symmop: SymmOp ) -> np.ndarray: """ Maps the coordinates of pymatgen ``Molecule`` according to a ``SymmOp`` symmetry operation. Args: molecule: The pymatgen ``Molecule``. symmop: The pymatgen symmetry operation object. Returns: The mapped Cartesian coordinates. """ return symmop.operate_multi(molecule.cart_coords)
[docs] def structure_mapping_list( new_structure: Structure, mapping_structure: Structure, atol: float ) -> list[int]: """ Gives the index mapping between two pymatgen ``Structure`` objects. Args: new_structure: The new structure. mapping_structure: The structure to map to. atol: Absolute tolerance for coordinate matching. Returns: List of indices such that mapping_structure.sites[indices] == new_structure.sites """ return coord_list_mapping_pbc( # type: ignore[no-any-return] new_structure.frac_coords, mapping_structure.frac_coords, atol=atol )
[docs] def molecule_mapping_list( new_molecule: Molecule, mapping_molecule: Molecule, atol: float ) -> list[int]: """ Gives the index mapping between two pymatgen ``Molecule`` objects. Args: new_molecule: The new molecule. mapping_molecule: The molecule to map to. atol: Absolute tolerance for coordinate matching. Returns: List of indices such that mapping_molecule.sites[indices] == new_molecule.sites """ return coord_list_mapping( # type: ignore[no-any-return] new_molecule.cart_coords, mapping_molecule.cart_coords, atol=atol )
[docs] def unique_symmetry_operations_as_vectors_from_structure( structure: Structure | Molecule, verbose: bool = False, subset: list[int] | None = None, atol: float = 1e-5 ) -> list[list[int]]: """ Uses `pymatgen`_ symmetry analysis to find the minimum complete set of symmetry operations for the space group of a structure. Args: structure: Structure or Molecule to be analysed. verbose: Print verbose output including space/point group information. subset: List of atom indices to be used for generating the symmetry operations. atol: Tolerance factor for the ``pymatgen`` `coordinate mapping`_ under each symmetry operation. Returns: A list of lists, containing the symmetry operations as vector mappings. .. _pymatgen: http://pymatgen.org .. _coordinate mapping: http://pymatgen.org/pymatgen.util.coord_utils.html#pymatgen.util.coord_utils.coord_list_mapping_pbc """ if isinstance(structure, Structure): instantiate_structure: Callable[..., Structure | Molecule] = partial( Structure, lattice=structure.lattice, coords_are_cartesian=True ) coord_mapping: Callable[[Structure | Molecule, SymmOp], np.ndarray] = structure_cartesian_coordinates_mapping # type: ignore[assignment] mapping_list: Callable[[Structure | Molecule, Structure | Molecule, float], list[int]] = structure_mapping_list # type: ignore[assignment] symmetry_analyzer: SpacegroupAnalyzer | PointGroupAnalyzer = SpacegroupAnalyzer(structure) if verbose: if isinstance(symmetry_analyzer, SpacegroupAnalyzer): print(f"The space group for this structure is {symmetry_analyzer.get_space_group_symbol()}") elif isinstance(structure, Molecule): instantiate_structure = Molecule coord_mapping = molecule_cartesian_coordinates_mapping # type: ignore[assignment] mapping_list = molecule_mapping_list # type: ignore[assignment] symmetry_analyzer = PointGroupAnalyzer(structure, tolerance=atol) if verbose: print(f"The point group for this structure is {symmetry_analyzer.sch_symbol}") else: raise ValueError('structure argument should be a Structure or Molecule object') symmetry_operations = symmetry_analyzer.get_symmetry_operations() seen: set[tuple[int, ...]] = set() mappings: list[list[int]] = [] mapping_structure: Structure | Molecule if subset: species_subset = [spec for i, spec in enumerate(structure.species) if i in subset] cart_coords_subset = [coord for i, coord in enumerate(structure.cart_coords) if i in subset] mapping_structure = instantiate_structure(species=species_subset, coords=cart_coords_subset) else: mapping_structure = structure for symmop in symmetry_operations: cart_coords = coord_mapping(mapping_structure, symmop) new_structure = instantiate_structure(species=mapping_structure.species, coords=cart_coords) new_mapping = [x + 1 for x in list(mapping_list(new_structure, mapping_structure, atol))] key = tuple(new_mapping) if key not in seen: seen.add(key) mappings.append(new_mapping) return mappings
[docs] def space_group_symbol_from_structure(structure: Structure) -> str: """ Returns the symbol for the space group defined by this structure. Args: structure: The input structure. Returns: The space group symbol. """ symmetry_analyzer = SpacegroupAnalyzer(structure) symbol = symmetry_analyzer.get_space_group_symbol() return symbol
[docs] def space_group_from_structure( structure: Structure, subset: list[int] | None = None, atol: float = 1e-5 ) -> SpaceGroup: """ Generates a ``SpaceGroup`` object from a `pymatgen` ``Structure``. Args: structure: Structure to be used to define the :any:`SpaceGroup`. subset: List of atom indices to be used for generating the symmetry operations. atol: Tolerance factor for the ``pymatgen`` `coordinate mapping`_ under each symmetry operation. Returns: A new :any:`SpaceGroup` instance .. _coordinate mapping: http://pymatgen.org/pymatgen.util.coord_utils.html#pymatgen.util.coord_utils.coord_list_mapping_pbc """ mappings = unique_symmetry_operations_as_vectors_from_structure(structure, subset=subset, atol=atol) symmetry_operations = [SymmetryOperation.from_vector(m) for m in mappings] return SpaceGroup(symmetry_operations=symmetry_operations)
[docs] def point_group_from_molecule( molecule: Molecule, subset: list[int] | None = None, atol: float = 1e-5 ) -> PointGroup: """ Generates a ``PointGroup`` object from a `pymatgen` ``Molecule``. Args: molecule: Molecule to be used to define the :any:`PointGroup`. subset: List of atom indices to be used for generating the symmetry operations. atol: Tolerance factor for the ``pymatgen`` `coordinate mapping`_ under each symmetry operation. Returns: A new :any:`PointGroup` instance .. _coordinate mapping: http://pymatgen.org/pymatgen.util.coord_utils.html#pymatgen.util.coord_utils.coord_list_mapping """ molecule = Molecule(molecule.species, molecule.cart_coords - molecule.center_of_mass) mappings = unique_symmetry_operations_as_vectors_from_structure(molecule, subset=subset, atol=atol) symmetry_operations = [SymmetryOperation.from_vector(m) for m in mappings] return PointGroup(symmetry_operations=symmetry_operations)
[docs] def configuration_space_from_structure( structure: Structure, subset: list[int] | None = None, atol: float = 1e-5 ) -> ConfigurationSpace: """ Generate a ``ConfigurationSpace`` object from a `pymatgen` ``Structure``. Args: structure: Structure to be used to define the :any:`ConfigurationSpace`. subset: List of atom indices to be used for generating the configuration space. atol: Tolerance factor for the ``pymatgen`` `coordinate mapping`_ under each symmetry operation. Returns: A new :any:`ConfigurationSpace` instance. .. _coordinate mapping: http://pymatgen.org/pymatgen.util.coord_utils.html#pymatgen.util.coord_utils.coord_list_mapping_pbc """ space_group = space_group_from_structure(structure, subset=subset, atol=atol) if subset is None: subset = list(range(1, len(structure) + 1)) config_space = ConfigurationSpace(objects=subset, symmetry_group=space_group) return config_space
[docs] def configuration_space_from_molecule( molecule: Molecule, subset: list[int] | None = None, atol: float = 1e-5 ) -> ConfigurationSpace: """ Generate a ``ConfigurationSpace`` object from a `pymatgen` ``Molecule``. Args: molecule: Molecule to be used to define the :any:`ConfigurationSpace`. subset: List of atom indices to be used for generating the configuration space. atol: Tolerance factor for the ``pymatgen`` `coordinate mapping`_ under each symmetry operation. Returns: A new :any:`ConfigurationSpace` instance. .. _coordinate mapping: http://pymatgen.org/pymatgen.util.coord_utils.html#pymatgen.util.coord_utils.coord_list_mapping """ point_group = point_group_from_molecule(molecule, subset=subset, atol=atol) if subset is None: subset = list(range(1, len(molecule) + 1)) config_space = ConfigurationSpace(objects=subset, symmetry_group=point_group) return config_space
[docs] def unique_structure_substitutions( structure: Structure | Molecule, to_substitute: str, site_distribution: dict[str, int], verbose: bool = False, atol: float = 1e-5, show_progress: bool | str = False ) -> list[Any]: """ Generate all symmetry-unique structures formed by substituting a set of sites in a `pymatgen` structure. Args: structure: The parent structure. to_substitute: Atom label for the sites to be substituted. site_distribution: A dictionary that defines the number of each substituting element. verbose: Verbose output. atol: Tolerance factor for the ``pymatgen`` `coordinate mapping`_ under each symmetry operation. Default=1e-5. show_progress: Show a progress bar. Setting to `True` gives a simple progress bar. Setting to `"notebook"` gives a Jupyter notebook compatible progress bar. Returns: A list of Structure objects for each unique substitution. Notes: The number of symmetry-equivalent configurations for each structure is stored in the `number_of_equivalent_configurations` attribute. If the parent structure was previously generated using this function (as part of a sequence of substitutions) the full configuration degeneracy of each symmetry inequivalent configuration is stored in the `full_configuration_degeneracy` attribute. If the parent structure is a standard Pymatgen Structure object, `number_of_equivalent_configurations` and `full_configuration_degeneracy` will be equal. .. _coordinate mapping: http://pymatgen.org/pymatgen.util.coord_utils.html#pymatgen.util.coord_utils.coord_list_mapping_pbc """ site_substitution_index = list(structure.indices_from_symbol(to_substitute)) if len(site_substitution_index) != sum(site_distribution.values()): raise ValueError("Number of sites from index does not match number from site distribution") if isinstance(structure, Structure): config_space = configuration_space_from_structure(structure, subset=site_substitution_index, atol=atol) elif isinstance(structure, Molecule): structure = Molecule(structure.species, structure.cart_coords - structure.center_of_mass) config_space = configuration_space_from_molecule(structure, subset=site_substitution_index, atol=atol) else: raise ValueError("pymatgen Structure or Molecule object expected") numeric_site_distribution, numeric_site_mapping = parse_site_distribution(site_distribution) unique_configurations = config_space.unique_configurations( numeric_site_distribution, verbose=verbose, show_progress=show_progress ) new_structures = [ new_structure_from_substitution( structure, site_substitution_index, [numeric_site_mapping[k] for k in c.tolist()] ) for c in unique_configurations ] if hasattr(structure, 'number_of_equivalent_configurations'): for s, c in zip(new_structures, unique_configurations): s.number_of_equivalent_configurations = c.count s.full_configuration_degeneracy = c.count * structure.full_configuration_degeneracy # type: ignore[union-attr] else: for s, c in zip(new_structures, unique_configurations): s.number_of_equivalent_configurations = c.count s.full_configuration_degeneracy = c.count return new_structures
[docs] def parse_site_distribution( site_distribution: dict[str, int] ) -> tuple[dict[int, int], dict[int, str]]: """ Converts a site distribution using species labels into one using integer labels. Args: site_distribution: e.g. `{'Mg': 1, 'Li': 3}` Returns: Tuple of (numeric_site_distribution, numeric_site_mapping) e.g. ({0: 3, 1: 1}, {0: 'Li', 1: 'Mg'}) """ numeric_site_distribution: dict[int, int] = {} numeric_site_mapping: dict[int, str] = {} for i, k in enumerate(site_distribution.keys()): numeric_site_distribution[i] = site_distribution[k] numeric_site_mapping[i] = k return numeric_site_distribution, numeric_site_mapping
[docs] def new_structure_from_substitution( parent_structure: Structure | Molecule, site_substitution_index: list[int], new_species_list: list[str] ) -> Any: """ Generate a new pymatgen ``Structure`` from site substitution parameters. Args: parent_structure: The parent pymatgen ``Structure`` or ``Molecule`` object. site_substitution_index: The list of site indices to be substituted. new_species_list: A list of the replacement atomic species. Returns: The new pymatgen ``Structure`` or ``Molecule``. Notes: pymatgen ``Structure`` and ``Molecule`` classes both subclass ``SiteCollection``. This function will also accept a parent ``Molecule`` object, and return a new ``Molecule``. """ if len(site_substitution_index) != len(new_species_list): raise ValueError("site_substitution_index and new_species_list must have same length") if any(i >= len(parent_structure) for i in site_substitution_index): raise ValueError("site_substitution_index contains invalid site indices") s = parent_structure.copy() for i, spec in zip(site_substitution_index, new_species_list): s[i] = spec return s
[docs] def unique_structure_substitutions_by_composition( structure: Structure | Molecule, to_substitute: str, species_list: list[str], bounds: dict[str, tuple[int | None, int | None]] | None = None, verbose: bool = False, atol: float = 1e-5, show_progress: bool | str = False ) -> dict[tuple[int, ...], list[Any]]: """ Generate all symmetry-unique structures for all compositions of substituting species. Args: structure: The parent structure. to_substitute: Atom label for the sites to be substituted. species_list: List of species to substitute, e.g., ['Li', 'Na', 'Mg']. Order determines composition tuple indices. bounds: Occupancy bounds for each species. Keys are species names, values are (min, max) tuples. e.g., {'Li': (1, 3), 'Na': (0, 2)} verbose: Verbose output. atol: Tolerance factor for coordinate mapping. Default=1e-5. show_progress: Show progress bars. Can be True, False, or "notebook". Returns: Mapping from composition tuples to lists of Structure objects. Keys are tuples like (2, 1, 1) corresponding to species_list order. Each Structure has a `number_of_equivalent_configurations` attribute. Example: >>> results = unique_structure_substitutions_by_composition( ... structure, 'X', ['Li', 'Na']) >>> # results[(2, 2)] gives structures with 2 Li and 2 Na """ # Get sites to substitute site_substitution_index = list(structure.indices_from_symbol(to_substitute)) n_sites = len(site_substitution_index) # Create configuration space if isinstance(structure, Structure): config_space = configuration_space_from_structure(structure, subset=site_substitution_index, atol=atol) elif isinstance(structure, Molecule): structure = Molecule(structure.species, structure.cart_coords - structure.center_of_mass) config_space = configuration_space_from_molecule(structure, subset=site_substitution_index, atol=atol) else: raise ValueError("pymatgen Structure or Molecule object expected") # Create species name to index mapping species_to_index = {species: i for i, species in enumerate(species_list)} # Convert bounds from species names to indices if provided bounds_numeric: dict[int, tuple[int | None, int | None]] | None = None if bounds is not None: bounds_numeric = {} for species, (min_val, max_val) in bounds.items(): if species not in species_to_index: raise ValueError(f"Species '{species}' in bounds not found in species_list") index = species_to_index[species] bounds_numeric[index] = (min_val, max_val) # Get unique configurations by composition configs_by_composition = config_space.unique_configurations_by_composition( n_species=len(species_list), bounds=bounds_numeric, verbose=verbose, show_progress=show_progress ) # Convert configurations to structures results: dict[tuple[int, ...], list[Any]] = {} for composition_tuple, configurations in configs_by_composition.items(): structures: list[Any] = [] for config in configurations: # Map configuration indices to species names species_for_sites = [species_list[species_idx] for species_idx in config.tolist()] # Create new structure new_structure = new_structure_from_substitution( structure, site_substitution_index, species_for_sites ) # Add metadata new_structure.number_of_equivalent_configurations = config.count structures.append(new_structure) results[composition_tuple] = structures return results
[docs] def random_unique_structure_substitutions( structure, to_substitute, site_distribution, n, sampling='degeneracy_weighted', seed=None, atol=1e-5, exclude_file=None, output_file=None, ): """ Generate n random symmetry-unique structures by substituting sites in a pymatgen structure. Args: structure (pymatgen.Structure): The parent structure. to_substitute (str): Atom label for the sites to be substituted. site_distribution (dict): Dictionary mapping species to counts, e.g. {'O': 8, 'F': 8}. n (int): Number of unique structures to generate. sampling (str): Sampling method. Either 'degeneracy_weighted' (default) or 'uniform'. 'degeneracy_weighted' samples configurations with probability proportional to their degeneracy. 'uniform' samples uniformly over equivalence classes. seed (int, optional): Random seed for reproducibility. atol (float): Tolerance factor for coordinate mapping. Default=1e-5. exclude_file (str or list[str], optional): Path(s) to JSON file(s) of configurations to exclude. output_file (str, optional): Path to save generated configurations. Returns: list[Structure]: A list of n unique Structure objects. Each has a `number_of_equivalent_configurations` attribute. """ site_substitution_index = list(structure.indices_from_symbol(to_substitute)) n_sites = len(site_substitution_index) config_space = configuration_space_from_structure( structure, subset=site_substitution_index, atol=atol ) numeric_site_distribution, numeric_site_mapping = parse_site_distribution( site_distribution ) # Load excluded configurations exclude = None if exclude_file is not None: exclude = [] if isinstance(exclude_file, str): exclude_file = [exclude_file] for filename in exclude_file: loaded = load_configurations(filename) for config in loaded: if len(config.tolist()) != n_sites: raise ValueError( f"Configuration length {len(config.tolist())} in '{filename}' " f"does not match number of sites {n_sites}" ) exclude.extend(loaded) configurations = config_space.random_unique_configurations( site_distribution=numeric_site_distribution, n=n, sampling=sampling, seed=seed, exclude=exclude, ) # Save configurations if requested if output_file is not None: save_configurations(configurations, output_file) unique_structures = [] for config in configurations: species_for_sites = [numeric_site_mapping[i] for i in config.tolist()] new_structure = new_structure_from_substitution( structure, site_substitution_index, species_for_sites ) new_structure.number_of_equivalent_configurations = config.count unique_structures.append(new_structure) return unique_structures