Coverage for hiphive/structure_container.py: 95%
190 statements
« prev ^ index » next coverage.py v7.6.8, created at 2024-11-28 11:20 +0000
« prev ^ index » next coverage.py v7.6.8, created at 2024-11-28 11:20 +0000
1"""
2This module provides functionality for storing structures and their fit
3matrices together with target forces and displacements
4"""
6import tarfile
7import numpy as np
8from collections import OrderedDict
9from ase.calculators.singlepoint import SinglePointCalculator
11from .input_output.read_write_files import (add_items_to_tarfile_hdf5,
12 add_items_to_tarfile_pickle,
13 add_items_to_tarfile_custom,
14 add_list_to_tarfile_custom,
15 read_items_hdf5,
16 read_items_pickle,
17 read_list_custom)
19from .cluster_space import ClusterSpace
20from .force_constant_model import ForceConstantModel
21from .input_output.logging_tools import logger
22logger = logger.getChild('structure_container')
25class StructureContainer:
26 """
27 This class serves as a container for structures as well as associated
28 fit properties and fit matrices.
30 Parameters
31 -----------
32 cs : ClusterSpace
33 cluster space that is the basis for the container
34 fit_structure_list : list(FitStructure)
35 structures to be added to the container
36 """
38 def __init__(self, cs, fit_structure_list=None):
39 """
40 Attributes
41 -----------
42 _cs : ClusterSpace
43 cluster space that is the basis for the container
44 _structure_list : list(FitStructure)
45 structures to add to container
46 _previous_fcm : ForceConstantModel
47 FCM object used for last fit matrix calculation; check will be
48 carried out to decide if this FCM can be used for a new structure
49 or not, which often enables a considerable speed-up
50 """
51 self._cs = cs.copy()
52 self._previous_fcm = None
54 # Add atoms from atoms_list
55 self._structure_list = []
56 if fit_structure_list is not None:
57 for fit_structure in fit_structure_list:
58 if not isinstance(fit_structure, FitStructure): 58 ↛ 59line 58 didn't jump to line 59 because the condition on line 58 was never true
59 raise TypeError('Can only add FitStructures')
60 self._structure_list.append(fit_structure)
62 def __len__(self):
63 return len(self._structure_list)
65 def __getitem__(self, ind):
66 return self._structure_list[ind]
68 @property
69 def data_shape(self):
70 """ tuple : tuple of integers representing the shape of the fit data
71 matrix """
72 n_cols = self._cs.n_dofs
73 n_rows = sum(len(fs) * 3 for fs in self)
74 if n_rows == 0:
75 return None
76 return n_rows, n_cols
78 @property
79 def cluster_space(self):
80 """ ClusterSpace : copy of the cluster space the structure
81 container is based on"""
82 return self._cs.copy()
84 @staticmethod
85 def read(fileobj, read_structures=True):
86 """Restore a StructureContainer object from file.
88 Parameters
89 ----------
90 f : str or file object
91 name of input file (str) or stream to load from (file object)
92 read_structures : bool
93 if True the structures will be read; if False only the cluster
94 space will be read
95 """
96 if isinstance(fileobj, str): 96 ↛ 97line 96 didn't jump to line 97 because the condition on line 96 was never true
97 tar_file = tarfile.open(mode='r', name=fileobj)
98 else:
99 tar_file = tarfile.open(mode='r', fileobj=fileobj)
101 # Read clusterspace
102 f = tar_file.extractfile('cluster_space')
103 cs = ClusterSpace.read(f)
105 # Read fitstructures
106 fit_structure_list = None
107 if read_structures:
108 fit_structure_list = read_list_custom(tar_file, 'fit_structure', FitStructure.read)
110 # setup StructureContainer
111 sc = StructureContainer(cs, fit_structure_list)
113 # Read previous FCM if it exists
114 if 'previous_fcm' in tar_file.getnames():
115 f = tar_file.extractfile('previous_fcm')
116 fcm = ForceConstantModel.read(f)
117 sc._previous_fcm = fcm
119 return sc
121 def write(self, f):
122 """Write a StructureContainer instance to a file.
124 Parameters
125 ----------
126 f : str or file object
127 name of input file (str) or stream to write to (file object)
128 """
130 if isinstance(f, str): 130 ↛ 131line 130 didn't jump to line 131 because the condition on line 130 was never true
131 tar_file = tarfile.open(mode='w', name=f)
132 else:
133 tar_file = tarfile.open(mode='w', fileobj=f)
135 # save cs and previous_fcm (if it exists)
136 custom_items = dict(cluster_space=self._cs)
137 if self._previous_fcm is not None:
138 custom_items['previous_fcm'] = self._previous_fcm
139 add_items_to_tarfile_custom(tar_file, custom_items)
141 # save fit structures
142 add_list_to_tarfile_custom(tar_file, self._structure_list, 'fit_structure')
144 tar_file.close()
146 def add_structure(self, atoms, **meta_data):
147 """Add a structure to the container.
149 Note that custom information about the atoms object may not be
150 stored inside, for example an ASE
151 :class:`SinglePointCalculator` will not be kept.
153 Parameters
154 ----------
155 atoms : ase.Atoms
156 the structure to be added; the Atoms object must contain
157 supplementary per-atom arrays with displacements and forces
158 meta_data : dict
159 dict with meta_data about the atoms
160 """
162 atoms_copy = atoms.copy()
164 # atoms object must contain displacements
165 if 'displacements' not in atoms_copy.arrays.keys():
166 raise ValueError('Atoms must have displacements array')
168 # atoms object must contain forces
169 if 'forces' not in atoms_copy.arrays.keys():
170 if isinstance(atoms.calc, SinglePointCalculator):
171 atoms_copy.new_array('forces', atoms.get_forces())
172 else:
173 raise ValueError('Atoms must have forces')
175 # check if an identical atoms object already exists in the container
176 for i, structure in enumerate(self._structure_list):
177 if are_configurations_equal(atoms_copy, structure.atoms):
178 raise ValueError('Atoms is identical to structure {}'.format(i))
180 logger.debug('Adding structure')
181 M = self._compute_fit_matrix(atoms)
182 structure = FitStructure(atoms_copy, M, **meta_data)
183 self._structure_list.append(structure)
185 def delete_all_structures(self):
186 """ Remove all current structures in StructureContainer. """
187 self._structure_list = []
189 def get_fit_data(self, structures=None):
190 """Return fit data for structures. The fit matrices and target forces
191 for the structures are stacked into NumPy arrays.
193 Parameters
194 ----------
195 structures: list, tuple
196 list of integers corresponding to structure indices. Defaults to
197 None and in that case returns all fit data available.
199 Returns
200 -------
201 numpy.ndarray, numpy.ndarray
202 stacked fit matrices, stacked target forces for the structures
203 """
204 if structures is None:
205 structures = range(len(self))
207 M_list, f_list = [], []
208 for i in structures:
209 M_list.append(self._structure_list[i].fit_matrix)
210 f_list.append(self._structure_list[i].forces.flatten())
212 if len(M_list) == 0:
213 return None
214 return np.vstack(M_list), np.hstack(f_list)
216 def __str__(self):
217 if len(self._structure_list) > 0:
218 return self._get_str_structure_list()
219 else:
220 return 'Empty StructureContainer'
222 def __repr__(self):
223 return 'StructureContainer({!r}, {!r})'.format(
224 self._cs, self._structure_list)
226 def _get_str_structure_list(self):
227 """ Return formatted string of the structure list """
228 def str_structure(index, structure):
229 fields = OrderedDict([
230 ('index', '{:^4}'.format(index)),
231 ('num-atoms', '{:^5}'.format(len(structure))),
232 ('avg-disp', '{:7.4f}'
233 .format(np.mean([np.linalg.norm(d) for d in
234 structure.displacements]))),
235 ('avg-force', '{:7.4f}'
236 .format(np.mean([np.linalg.norm(f) for f in
237 structure.forces]))),
238 ('max-force', '{:7.4f}'
239 .format(np.max([np.linalg.norm(f) for f in
240 structure.forces])))])
241 s = []
242 for name, value in fields.items():
243 n = max(len(name), len(value))
244 if index < 0:
245 s += ['{s:^{n}}'.format(s=name, n=n)]
246 else:
247 s += ['{s:^{n}}'.format(s=value, n=n)]
248 return ' | '.join(s)
250 # table width
251 dummy = self._structure_list[0]
252 n = len(str_structure(-1, dummy))
254 # table header
255 s = []
256 s.append(' Structure Container '.center(n, '='))
257 s += ['{:22} : {}'.format('Total number of structures', len(self))]
258 _, target_forces = self.get_fit_data()
259 s += ['{:22} : {}'.format('Number of force components', len(target_forces))]
260 s.append(''.center(n, '-'))
261 s.append(str_structure(-1, dummy))
262 s.append(''.center(n, '-'))
264 # table body
265 for i, structure in enumerate(self._structure_list):
266 s.append(str_structure(i, structure))
267 s.append(''.center(n, '='))
268 return '\n'.join(s)
270 def _compute_fit_matrix(self, atoms):
271 """ Compute fit matrix for a single atoms object """
272 logger.debug('Computing fit matrix')
273 if atoms != getattr(self._previous_fcm, 'atoms', None):
274 logger.debug(' Building new FCM object')
275 self._previous_fcm = ForceConstantModel(atoms, self._cs)
276 else:
277 logger.debug(' Reusing old FCM object')
278 return self._previous_fcm.get_fit_matrix(atoms.get_array('displacements'))
281class FitStructure:
282 """This class holds a structure with displacements and forces as well as
283 the fit matrix.
285 Parameters
286 ----------
287 atoms : ase.Atoms
288 supercell structure
289 fit_matrix : numpy.ndarray
290 fit matrix, `N, M` array with `N = 3 * len(atoms)`
291 meta_data : dict
292 any meta data that needs to be stored in the FitStructure
293 """
295 def __init__(self, atoms, fit_matrix, **meta_data):
296 if 3 * len(atoms) != fit_matrix.shape[0]: 296 ↛ 297line 296 didn't jump to line 297 because the condition on line 296 was never true
297 raise ValueError('fit matrix not compatible with atoms')
298 self._atoms = atoms
299 self._fit_matrix = fit_matrix
300 self.meta_data = meta_data
302 @property
303 def fit_matrix(self):
304 """ numpy.ndarray : the fit matrix """
305 return self._fit_matrix
307 @property
308 def atoms(self):
309 """ ase.Atoms : supercell structure """
310 return self._atoms.copy()
312 @property
313 def forces(self):
314 """ numpy.ndarray : forces """
315 return self._atoms.get_array('forces')
317 @property
318 def displacements(self):
319 """ numpy.ndarray : atomic displacements """
320 return self._atoms.get_array('displacements')
322 def __getattr__(self, key):
323 """ Accesses meta_data if possible and returns value. """
324 if key not in self.meta_data.keys():
325 return super().__getattribute__(key)
326 return self.meta_data[key]
328 def __len__(self):
329 return len(self._atoms)
331 def __str__(self):
332 s = []
333 s.append(' FitStructure '.center(65, '='))
334 s.append('Formula: {}'.format(self.atoms.get_chemical_formula()))
335 s.append(('Cell:' + '\n [{:9.5f} {:9.5f} {:9.5f}]'*3).format(
336 *self.atoms.cell[0], *self.atoms.cell[1], *self.atoms.cell[2]))
337 s.append('Atoms (positions, displacements, forces):')
338 for atom, disp, force in zip(self.atoms, self.displacements, self.forces):
339 array_fmt = '[ {:9.5f} {:9.5f} {:9.5f} ]'
340 row_str = '{:3d} {}'.format(atom.index, atom.symbol)
341 row_str += array_fmt.format(*atom.position)
342 row_str += array_fmt.format(*disp)
343 row_str += array_fmt.format(*force)
344 s.append(row_str)
345 return '\n'.join(s)
347 def __repr__(self):
348 return 'FitStructure({!r}, ..., {!r})'.format(self.atoms, self.meta_data)
350 def write(self, fileobj):
351 """ Write the instance to file.
353 Parameters
354 ----------
355 fileobj : str or file object
356 name of input file (str) or stream to write to (file object)
357 """
358 if isinstance(fileobj, str): 358 ↛ 359line 358 didn't jump to line 359 because the condition on line 358 was never true
359 tar_file = tarfile.open(name=fileobj, mode='w')
360 else:
361 tar_file = tarfile.open(fileobj=fileobj, mode='w')
363 items_pickle = dict(atoms=self._atoms, meta_data=self.meta_data)
364 items_hdf5 = dict(fit_matrix=self.fit_matrix)
366 add_items_to_tarfile_pickle(tar_file, items_pickle, 'items.pickle')
367 add_items_to_tarfile_hdf5(tar_file, items_hdf5, 'fit_matrix.hdf5')
369 tar_file.close()
371 @staticmethod
372 def read(fileobj, read_fit_matrix=True):
373 """ Read a OrientationFamily instance from a file.
375 Parameters
376 ----------
377 fileobj : str or file object
378 name of input file (str) or stream to read from (file object)
379 read_fit_matrix : bool
380 whether or not to read the fit_matrix
381 Returns
382 -------
383 FitStructure instance
384 """
385 if isinstance(fileobj, str): 385 ↛ 386line 385 didn't jump to line 386 because the condition on line 385 was never true
386 tar_file = tarfile.open(mode='r', name=fileobj)
387 else:
388 tar_file = tarfile.open(mode='r', fileobj=fileobj)
390 items = read_items_pickle(tar_file, 'items.pickle')
391 fit_matrix = read_items_hdf5(tar_file, 'fit_matrix.hdf5')['fit_matrix']
393 return FitStructure(items['atoms'], fit_matrix, **items['meta_data'])
396def are_configurations_equal(atoms1, atoms2, tol=1e-10):
397 """ Compare if two configurations are equal within some tolerance. This
398 includes checking all available arrays in the two atoms objects.
400 Parameters
401 ----------
402 atoms1 : ase.Atoms
403 atoms2 : ase.Atoms
405 Returns
406 -------
407 bool
408 True if atoms are equal, False otherwise
409 """
411 # pbc
412 if not all(atoms1.pbc == atoms2.pbc):
413 return False
415 # cell
416 if not np.allclose(atoms1.cell, atoms2.cell, atol=tol, rtol=0.0):
417 return False
419 # arrays
420 if not len(atoms1.arrays.keys()) == len(atoms2.arrays.keys()):
421 return False
422 for key, array1 in atoms1.arrays.items():
423 if key not in atoms2.arrays.keys():
424 return False
425 if not np.allclose(array1, atoms2.arrays[key], atol=tol, rtol=0.0):
426 return False
428 # passed all test, atoms must be equal
429 return True