Source code for hiphive.structure_container

"""
This module provides functionality for storing structures and their fit
matrices together with target forces and displacements
"""

import tarfile
import numpy as np
from collections import OrderedDict
from ase.calculators.singlepoint import SinglePointCalculator

from .io.read_write_files import (add_items_to_tarfile_hdf5,
                                  add_items_to_tarfile_pickle,
                                  add_items_to_tarfile_custom,
                                  add_list_to_tarfile_custom,
                                  read_items_hdf5,
                                  read_items_pickle,
                                  read_list_custom)

from .cluster_space import ClusterSpace
from .force_constant_model import ForceConstantModel
from .io.logging import logger
logger = logger.getChild('structure_container')


[docs]class StructureContainer: """ This class serves as a container for structures as well as associated fit properties and fit matrices. Parameters ----------- cs : ClusterSpace cluster space that is the basis for the container fit_structure_list : list(FitStructure) structures to be added to the container """ def __init__(self, cs, fit_structure_list=None): """ Attributes ----------- _cs : ClusterSpace cluster space that is the basis for the container _structure_list : list(FitStructure) structures to add to container _previous_fcm : ForceConstantModel FCM object used for last fit matrix calculation; check will be carried out to decide if this FCM can be used for a new structure or not, which often enables a considerable speed-up """ self._cs = cs.copy() self._previous_fcm = None # Add atoms from atoms_list self._structure_list = [] if fit_structure_list is not None: for fit_structure in fit_structure_list: if not isinstance(fit_structure, FitStructure): raise TypeError('Can only add FitStructures') self._structure_list.append(fit_structure) def __len__(self): return len(self._structure_list) def __getitem__(self, ind): return self._structure_list[ind] @property def data_shape(self): """ tuple : tuple of integers representing the shape of the fit data matrix """ n_cols = self._cs.n_dofs n_rows = sum(len(fs) * 3 for fs in self) if n_rows == 0: return None return n_rows, n_cols @property def cluster_space(self): """ ClusterSpace : copy of the cluster space the structure container is based on""" return self._cs.copy()
[docs] @staticmethod def read(fileobj, read_structures=True): """Restore a StructureContainer object from file. Parameters ---------- f : str or file object name of input file (str) or stream to load from (file object) read_structures : bool if True the structures will be read; if False only the cluster space will be read """ if isinstance(fileobj, str): tar_file = tarfile.open(mode='r', name=fileobj) else: tar_file = tarfile.open(mode='r', fileobj=fileobj) # Read clusterspace f = tar_file.extractfile('cluster_space') cs = ClusterSpace.read(f) # Read fitstructures fit_structure_list = None if read_structures: fit_structure_list = read_list_custom( tar_file, 'fit_structure', FitStructure.read) # setup StructureContainer sc = StructureContainer(cs, fit_structure_list) # Read previous FCM if it exists if 'previous_fcm' in tar_file.getnames(): f = tar_file.extractfile('previous_fcm') fcm = ForceConstantModel.read(f) sc._previous_fcm = fcm return sc
[docs] def write(self, f): """Write a StructureContainer instance to a file. Parameters ---------- f : str or file object name of input file (str) or stream to write to (file object) """ if isinstance(f, str): tar_file = tarfile.open(mode='w', name=f) else: tar_file = tarfile.open(mode='w', fileobj=f) # save cs and previous_fcm (if it exists) custom_items = dict(cluster_space=self._cs) if self._previous_fcm is not None: custom_items['previous_fcm'] = self._previous_fcm add_items_to_tarfile_custom(tar_file, custom_items) # save fit structures add_list_to_tarfile_custom(tar_file, self._structure_list, 'fit_structure') tar_file.close()
[docs] def add_structure(self, atoms, **meta_data): """Add a structure to the container. Note that custom information about the atoms object may not be stored inside, for example an ASE :class:`SinglePointCalculator` will not be kept. Parameters ---------- atoms : ase.Atoms the structure to be added; the Atoms object must contain supplementary per-atom arrays with displacements and forces meta_data : dict dict with meta_data about the atoms """ atoms_copy = atoms.copy() # atoms object must contain displacements if 'displacements' not in atoms_copy.arrays.keys(): raise ValueError('Atoms must have displacements array') # atoms object must contain forces if 'forces' not in atoms_copy.arrays.keys(): if isinstance(atoms.calc, SinglePointCalculator): atoms_copy.new_array('forces', atoms.get_forces()) else: raise ValueError('Atoms must have forces') # check if an identical atoms object already exists in the container for i, structure in enumerate(self._structure_list): if are_configurations_equal(atoms_copy, structure.atoms): raise ValueError('Atoms is identical to structure {}'.format(i)) logger.info('Adding structure') M = self._compute_fit_matrix(atoms) structure = FitStructure(atoms_copy, M, **meta_data) self._structure_list.append(structure)
[docs] def delete_all_structures(self): """ Remove all current structures in StructureContainer. """ self._structure_list = []
[docs] def get_fit_data(self, structures=None): """Return fit data for structures. The fit matrices and target forces for the structures are stacked into NumPy arrays. Parameters ---------- structures: list, tuple list of integers corresponding to structure indices. Defaults to None and in that case returns all fit data available. Returns ------- numpy.ndarray, numpy.ndarray stacked fit matrices, stacked target forces for the structures """ if structures is None: structures = range(len(self)) M_list, f_list = [], [] for i in structures: M_list.append(self._structure_list[i].fit_matrix) f_list.append(self._structure_list[i].forces.flatten()) if len(M_list) == 0: return None return np.vstack(M_list), np.hstack(f_list)
def __str__(self): if len(self._structure_list) > 0: return self._get_str_structure_list() else: return 'Empty StructureContainer' def __repr__(self): return 'StructureContainer({!r}, {!r})'.format( self._cs, self._structure_list) def _get_str_structure_list(self): """ Return formatted string of the structure list """ def str_structure(index, structure): fields = OrderedDict([ ('index', '{:^4}'.format(index)), ('num-atoms', '{:^5}'.format(len(structure))), ('avg-disp', '{:7.4f}' .format(np.mean([np.linalg.norm(d) for d in structure.displacements]))), ('avg-force', '{:7.4f}' .format(np.mean([np.linalg.norm(f) for f in structure.forces]))), ('max-force', '{:7.4f}' .format(np.max([np.linalg.norm(f) for f in structure.forces])))]) s = [] for name, value in fields.items(): n = max(len(name), len(value)) if index < 0: s += ['{s:^{n}}'.format(s=name, n=n)] else: s += ['{s:^{n}}'.format(s=value, n=n)] return ' | '.join(s) # table width dummy = self._structure_list[0] n = len(str_structure(-1, dummy)) # table header s = [] s.append(' Structure Container '.center(n, '=')) s += ['{:22} : {}'.format('Total number of structures', len(self))] s.append(''.center(n, '-')) s.append(str_structure(-1, dummy)) s.append(''.center(n, '-')) # table body for i, structure in enumerate(self._structure_list): s.append(str_structure(i, structure)) s.append(''.center(n, '=')) return '\n'.join(s) def _compute_fit_matrix(self, atoms): """ Compute fit matrix for a single atoms object """ logger.info('Computing fit matrix') if atoms != getattr(self._previous_fcm, 'atoms', None): logger.info(' Building new FCM object') self._previous_fcm = ForceConstantModel(atoms, self._cs) else: logger.info(' Reusing old FCM object') return self._previous_fcm.get_fit_matrix(atoms.get_array('displacements'))
[docs]class FitStructure: """This class holds a structure with displacements and forces as well as the fit matrix. Parameters ---------- atoms : ase.Atoms supercell structure fit_matrix : numpy.ndarray fit matrix, `N, M` array with `N = 3 * len(atoms)` meta_data : dict any meta data that needs to be stored in the FitStructure """ def __init__(self, atoms, fit_matrix, **meta_data): if 3 * len(atoms) != fit_matrix.shape[0]: raise ValueError('fit matrix not compatible with atoms') self._atoms = atoms self._fit_matrix = fit_matrix self.meta_data = meta_data @property def fit_matrix(self): """ numpy.ndarray : the fit matrix """ return self._fit_matrix @property def atoms(self): """ ase.Atoms : supercell structure """ return self._atoms.copy() @property def forces(self): """ numpy.ndarray : forces """ return self._atoms.get_array('forces') @property def displacements(self): """ numpy.ndarray : atomic displacements """ return self._atoms.get_array('displacements') def __getattr__(self, key): """ Accesses meta_data if possible and returns value. """ if key not in self.meta_data.keys(): return super().__getattribute__(key) return self.meta_data[key] def __len__(self): return len(self._atoms) def __str__(self): s = [] s.append(' FitStructure '.center(65, '=')) s.append('Formula: {}'.format(self.atoms.get_chemical_formula())) s.append(('Cell:' + '\n [{:9.5f} {:9.5f} {:9.5f}]'*3).format( *self.atoms.cell[0], *self.atoms.cell[1], *self.atoms.cell[2])) s.append('Atoms (positions, displacements, forces):') for atom, disp, force in zip(self.atoms, self.displacements, self.forces): array_fmt = '[ {:9.5f} {:9.5f} {:9.5f} ]' row_str = '{:3d} {}'.format(atom.index, atom.symbol) row_str += array_fmt.format(*atom.position) row_str += array_fmt.format(*disp) row_str += array_fmt.format(*force) s.append(row_str) return '\n'.join(s) def __repr__(self): return 'FitStructure({!r}, ..., {!r})'.format(self.atoms, self.meta_data)
[docs] def write(self, fileobj): """ Write the instance to file. Parameters ---------- fileobj : str or file object name of input file (str) or stream to write to (file object) """ if isinstance(fileobj, str): tar_file = tarfile.open(name=fileobj, mode='w') else: tar_file = tarfile.open(fileobj=fileobj, mode='w') items_pickle = dict(atoms=self._atoms, meta_data=self.meta_data) items_hdf5 = dict(fit_matrix=self.fit_matrix) add_items_to_tarfile_pickle(tar_file, items_pickle, 'items.pickle') add_items_to_tarfile_hdf5(tar_file, items_hdf5, 'fit_matrix.hdf5') tar_file.close()
[docs] @staticmethod def read(fileobj, read_fit_matrix=True): """ Read a OrientationFamily instance from a file. Parameters ---------- fileobj : str or file object name of input file (str) or stream to read from (file object) read_fit_matrix : bool whether or not to read the fit_matrix Returns ------- FitStructure instance """ if isinstance(fileobj, str): tar_file = tarfile.open(mode='r', name=fileobj) else: tar_file = tarfile.open(mode='r', fileobj=fileobj) items = read_items_pickle(tar_file, 'items.pickle') fit_matrix = read_items_hdf5(tar_file, 'fit_matrix.hdf5')['fit_matrix'] return FitStructure(items['atoms'], fit_matrix, **items['meta_data'])
[docs]def are_configurations_equal(atoms1, atoms2, tol=1e-10): """ Compare if two configurations are equal within some tolerance. This includes checking all available arrays in the two atoms objects. Parameters ---------- atoms1 : ase.Atoms atoms2 : ase.Atoms Returns ------- bool True if atoms are equal, False otherwise """ # pbc if not all(atoms1.pbc == atoms2.pbc): return False # cell if not np.allclose(atoms1.cell, atoms2.cell, atol=tol, rtol=0.0): return False # arrays if not len(atoms1.arrays.keys()) == len(atoms2.arrays.keys()): return False for key, array1 in atoms1.arrays.items(): if key not in atoms2.arrays.keys(): return False if not np.allclose(array1, atoms2.arrays[key], atol=tol, rtol=0.0): return False # passed all test, atoms must be equal return True