Coverage for hiphive/structure_container.py: 95%

190 statements  

« prev     ^ index     » next       coverage.py v7.6.8, created at 2024-11-28 11:20 +0000

1""" 

2This module provides functionality for storing structures and their fit 

3matrices together with target forces and displacements 

4""" 

5 

6import tarfile 

7import numpy as np 

8from collections import OrderedDict 

9from ase.calculators.singlepoint import SinglePointCalculator 

10 

11from .input_output.read_write_files import (add_items_to_tarfile_hdf5, 

12 add_items_to_tarfile_pickle, 

13 add_items_to_tarfile_custom, 

14 add_list_to_tarfile_custom, 

15 read_items_hdf5, 

16 read_items_pickle, 

17 read_list_custom) 

18 

19from .cluster_space import ClusterSpace 

20from .force_constant_model import ForceConstantModel 

21from .input_output.logging_tools import logger 

22logger = logger.getChild('structure_container') 

23 

24 

25class StructureContainer: 

26 """ 

27 This class serves as a container for structures as well as associated 

28 fit properties and fit matrices. 

29 

30 Parameters 

31 ----------- 

32 cs : ClusterSpace 

33 cluster space that is the basis for the container 

34 fit_structure_list : list(FitStructure) 

35 structures to be added to the container 

36 """ 

37 

38 def __init__(self, cs, fit_structure_list=None): 

39 """ 

40 Attributes 

41 ----------- 

42 _cs : ClusterSpace 

43 cluster space that is the basis for the container 

44 _structure_list : list(FitStructure) 

45 structures to add to container 

46 _previous_fcm : ForceConstantModel 

47 FCM object used for last fit matrix calculation; check will be 

48 carried out to decide if this FCM can be used for a new structure 

49 or not, which often enables a considerable speed-up 

50 """ 

51 self._cs = cs.copy() 

52 self._previous_fcm = None 

53 

54 # Add atoms from atoms_list 

55 self._structure_list = [] 

56 if fit_structure_list is not None: 

57 for fit_structure in fit_structure_list: 

58 if not isinstance(fit_structure, FitStructure): 58 ↛ 59line 58 didn't jump to line 59 because the condition on line 58 was never true

59 raise TypeError('Can only add FitStructures') 

60 self._structure_list.append(fit_structure) 

61 

62 def __len__(self): 

63 return len(self._structure_list) 

64 

65 def __getitem__(self, ind): 

66 return self._structure_list[ind] 

67 

68 @property 

69 def data_shape(self): 

70 """ tuple : tuple of integers representing the shape of the fit data 

71 matrix """ 

72 n_cols = self._cs.n_dofs 

73 n_rows = sum(len(fs) * 3 for fs in self) 

74 if n_rows == 0: 

75 return None 

76 return n_rows, n_cols 

77 

78 @property 

79 def cluster_space(self): 

80 """ ClusterSpace : copy of the cluster space the structure 

81 container is based on""" 

82 return self._cs.copy() 

83 

84 @staticmethod 

85 def read(fileobj, read_structures=True): 

86 """Restore a StructureContainer object from file. 

87 

88 Parameters 

89 ---------- 

90 f : str or file object 

91 name of input file (str) or stream to load from (file object) 

92 read_structures : bool 

93 if True the structures will be read; if False only the cluster 

94 space will be read 

95 """ 

96 if isinstance(fileobj, str): 96 ↛ 97line 96 didn't jump to line 97 because the condition on line 96 was never true

97 tar_file = tarfile.open(mode='r', name=fileobj) 

98 else: 

99 tar_file = tarfile.open(mode='r', fileobj=fileobj) 

100 

101 # Read clusterspace 

102 f = tar_file.extractfile('cluster_space') 

103 cs = ClusterSpace.read(f) 

104 

105 # Read fitstructures 

106 fit_structure_list = None 

107 if read_structures: 

108 fit_structure_list = read_list_custom(tar_file, 'fit_structure', FitStructure.read) 

109 

110 # setup StructureContainer 

111 sc = StructureContainer(cs, fit_structure_list) 

112 

113 # Read previous FCM if it exists 

114 if 'previous_fcm' in tar_file.getnames(): 

115 f = tar_file.extractfile('previous_fcm') 

116 fcm = ForceConstantModel.read(f) 

117 sc._previous_fcm = fcm 

118 

119 return sc 

120 

121 def write(self, f): 

122 """Write a StructureContainer instance to a file. 

123 

124 Parameters 

125 ---------- 

126 f : str or file object 

127 name of input file (str) or stream to write to (file object) 

128 """ 

129 

130 if isinstance(f, str): 130 ↛ 131line 130 didn't jump to line 131 because the condition on line 130 was never true

131 tar_file = tarfile.open(mode='w', name=f) 

132 else: 

133 tar_file = tarfile.open(mode='w', fileobj=f) 

134 

135 # save cs and previous_fcm (if it exists) 

136 custom_items = dict(cluster_space=self._cs) 

137 if self._previous_fcm is not None: 

138 custom_items['previous_fcm'] = self._previous_fcm 

139 add_items_to_tarfile_custom(tar_file, custom_items) 

140 

141 # save fit structures 

142 add_list_to_tarfile_custom(tar_file, self._structure_list, 'fit_structure') 

143 

144 tar_file.close() 

145 

146 def add_structure(self, atoms, **meta_data): 

147 """Add a structure to the container. 

148 

149 Note that custom information about the atoms object may not be 

150 stored inside, for example an ASE 

151 :class:`SinglePointCalculator` will not be kept. 

152 

153 Parameters 

154 ---------- 

155 atoms : ase.Atoms 

156 the structure to be added; the Atoms object must contain 

157 supplementary per-atom arrays with displacements and forces 

158 meta_data : dict 

159 dict with meta_data about the atoms 

160 """ 

161 

162 atoms_copy = atoms.copy() 

163 

164 # atoms object must contain displacements 

165 if 'displacements' not in atoms_copy.arrays.keys(): 

166 raise ValueError('Atoms must have displacements array') 

167 

168 # atoms object must contain forces 

169 if 'forces' not in atoms_copy.arrays.keys(): 

170 if isinstance(atoms.calc, SinglePointCalculator): 

171 atoms_copy.new_array('forces', atoms.get_forces()) 

172 else: 

173 raise ValueError('Atoms must have forces') 

174 

175 # check if an identical atoms object already exists in the container 

176 for i, structure in enumerate(self._structure_list): 

177 if are_configurations_equal(atoms_copy, structure.atoms): 

178 raise ValueError('Atoms is identical to structure {}'.format(i)) 

179 

180 logger.debug('Adding structure') 

181 M = self._compute_fit_matrix(atoms) 

182 structure = FitStructure(atoms_copy, M, **meta_data) 

183 self._structure_list.append(structure) 

184 

185 def delete_all_structures(self): 

186 """ Remove all current structures in StructureContainer. """ 

187 self._structure_list = [] 

188 

189 def get_fit_data(self, structures=None): 

190 """Return fit data for structures. The fit matrices and target forces 

191 for the structures are stacked into NumPy arrays. 

192 

193 Parameters 

194 ---------- 

195 structures: list, tuple 

196 list of integers corresponding to structure indices. Defaults to 

197 None and in that case returns all fit data available. 

198 

199 Returns 

200 ------- 

201 numpy.ndarray, numpy.ndarray 

202 stacked fit matrices, stacked target forces for the structures 

203 """ 

204 if structures is None: 

205 structures = range(len(self)) 

206 

207 M_list, f_list = [], [] 

208 for i in structures: 

209 M_list.append(self._structure_list[i].fit_matrix) 

210 f_list.append(self._structure_list[i].forces.flatten()) 

211 

212 if len(M_list) == 0: 

213 return None 

214 return np.vstack(M_list), np.hstack(f_list) 

215 

216 def __str__(self): 

217 if len(self._structure_list) > 0: 

218 return self._get_str_structure_list() 

219 else: 

220 return 'Empty StructureContainer' 

221 

222 def __repr__(self): 

223 return 'StructureContainer({!r}, {!r})'.format( 

224 self._cs, self._structure_list) 

225 

226 def _get_str_structure_list(self): 

227 """ Return formatted string of the structure list """ 

228 def str_structure(index, structure): 

229 fields = OrderedDict([ 

230 ('index', '{:^4}'.format(index)), 

231 ('num-atoms', '{:^5}'.format(len(structure))), 

232 ('avg-disp', '{:7.4f}' 

233 .format(np.mean([np.linalg.norm(d) for d in 

234 structure.displacements]))), 

235 ('avg-force', '{:7.4f}' 

236 .format(np.mean([np.linalg.norm(f) for f in 

237 structure.forces]))), 

238 ('max-force', '{:7.4f}' 

239 .format(np.max([np.linalg.norm(f) for f in 

240 structure.forces])))]) 

241 s = [] 

242 for name, value in fields.items(): 

243 n = max(len(name), len(value)) 

244 if index < 0: 

245 s += ['{s:^{n}}'.format(s=name, n=n)] 

246 else: 

247 s += ['{s:^{n}}'.format(s=value, n=n)] 

248 return ' | '.join(s) 

249 

250 # table width 

251 dummy = self._structure_list[0] 

252 n = len(str_structure(-1, dummy)) 

253 

254 # table header 

255 s = [] 

256 s.append(' Structure Container '.center(n, '=')) 

257 s += ['{:22} : {}'.format('Total number of structures', len(self))] 

258 _, target_forces = self.get_fit_data() 

259 s += ['{:22} : {}'.format('Number of force components', len(target_forces))] 

260 s.append(''.center(n, '-')) 

261 s.append(str_structure(-1, dummy)) 

262 s.append(''.center(n, '-')) 

263 

264 # table body 

265 for i, structure in enumerate(self._structure_list): 

266 s.append(str_structure(i, structure)) 

267 s.append(''.center(n, '=')) 

268 return '\n'.join(s) 

269 

270 def _compute_fit_matrix(self, atoms): 

271 """ Compute fit matrix for a single atoms object """ 

272 logger.debug('Computing fit matrix') 

273 if atoms != getattr(self._previous_fcm, 'atoms', None): 

274 logger.debug(' Building new FCM object') 

275 self._previous_fcm = ForceConstantModel(atoms, self._cs) 

276 else: 

277 logger.debug(' Reusing old FCM object') 

278 return self._previous_fcm.get_fit_matrix(atoms.get_array('displacements')) 

279 

280 

281class FitStructure: 

282 """This class holds a structure with displacements and forces as well as 

283 the fit matrix. 

284 

285 Parameters 

286 ---------- 

287 atoms : ase.Atoms 

288 supercell structure 

289 fit_matrix : numpy.ndarray 

290 fit matrix, `N, M` array with `N = 3 * len(atoms)` 

291 meta_data : dict 

292 any meta data that needs to be stored in the FitStructure 

293 """ 

294 

295 def __init__(self, atoms, fit_matrix, **meta_data): 

296 if 3 * len(atoms) != fit_matrix.shape[0]: 296 ↛ 297line 296 didn't jump to line 297 because the condition on line 296 was never true

297 raise ValueError('fit matrix not compatible with atoms') 

298 self._atoms = atoms 

299 self._fit_matrix = fit_matrix 

300 self.meta_data = meta_data 

301 

302 @property 

303 def fit_matrix(self): 

304 """ numpy.ndarray : the fit matrix """ 

305 return self._fit_matrix 

306 

307 @property 

308 def atoms(self): 

309 """ ase.Atoms : supercell structure """ 

310 return self._atoms.copy() 

311 

312 @property 

313 def forces(self): 

314 """ numpy.ndarray : forces """ 

315 return self._atoms.get_array('forces') 

316 

317 @property 

318 def displacements(self): 

319 """ numpy.ndarray : atomic displacements """ 

320 return self._atoms.get_array('displacements') 

321 

322 def __getattr__(self, key): 

323 """ Accesses meta_data if possible and returns value. """ 

324 if key not in self.meta_data.keys(): 

325 return super().__getattribute__(key) 

326 return self.meta_data[key] 

327 

328 def __len__(self): 

329 return len(self._atoms) 

330 

331 def __str__(self): 

332 s = [] 

333 s.append(' FitStructure '.center(65, '=')) 

334 s.append('Formula: {}'.format(self.atoms.get_chemical_formula())) 

335 s.append(('Cell:' + '\n [{:9.5f} {:9.5f} {:9.5f}]'*3).format( 

336 *self.atoms.cell[0], *self.atoms.cell[1], *self.atoms.cell[2])) 

337 s.append('Atoms (positions, displacements, forces):') 

338 for atom, disp, force in zip(self.atoms, self.displacements, self.forces): 

339 array_fmt = '[ {:9.5f} {:9.5f} {:9.5f} ]' 

340 row_str = '{:3d} {}'.format(atom.index, atom.symbol) 

341 row_str += array_fmt.format(*atom.position) 

342 row_str += array_fmt.format(*disp) 

343 row_str += array_fmt.format(*force) 

344 s.append(row_str) 

345 return '\n'.join(s) 

346 

347 def __repr__(self): 

348 return 'FitStructure({!r}, ..., {!r})'.format(self.atoms, self.meta_data) 

349 

350 def write(self, fileobj): 

351 """ Write the instance to file. 

352 

353 Parameters 

354 ---------- 

355 fileobj : str or file object 

356 name of input file (str) or stream to write to (file object) 

357 """ 

358 if isinstance(fileobj, str): 358 ↛ 359line 358 didn't jump to line 359 because the condition on line 358 was never true

359 tar_file = tarfile.open(name=fileobj, mode='w') 

360 else: 

361 tar_file = tarfile.open(fileobj=fileobj, mode='w') 

362 

363 items_pickle = dict(atoms=self._atoms, meta_data=self.meta_data) 

364 items_hdf5 = dict(fit_matrix=self.fit_matrix) 

365 

366 add_items_to_tarfile_pickle(tar_file, items_pickle, 'items.pickle') 

367 add_items_to_tarfile_hdf5(tar_file, items_hdf5, 'fit_matrix.hdf5') 

368 

369 tar_file.close() 

370 

371 @staticmethod 

372 def read(fileobj, read_fit_matrix=True): 

373 """ Read a OrientationFamily instance from a file. 

374 

375 Parameters 

376 ---------- 

377 fileobj : str or file object 

378 name of input file (str) or stream to read from (file object) 

379 read_fit_matrix : bool 

380 whether or not to read the fit_matrix 

381 Returns 

382 ------- 

383 FitStructure instance 

384 """ 

385 if isinstance(fileobj, str): 385 ↛ 386line 385 didn't jump to line 386 because the condition on line 385 was never true

386 tar_file = tarfile.open(mode='r', name=fileobj) 

387 else: 

388 tar_file = tarfile.open(mode='r', fileobj=fileobj) 

389 

390 items = read_items_pickle(tar_file, 'items.pickle') 

391 fit_matrix = read_items_hdf5(tar_file, 'fit_matrix.hdf5')['fit_matrix'] 

392 

393 return FitStructure(items['atoms'], fit_matrix, **items['meta_data']) 

394 

395 

396def are_configurations_equal(atoms1, atoms2, tol=1e-10): 

397 """ Compare if two configurations are equal within some tolerance. This 

398 includes checking all available arrays in the two atoms objects. 

399 

400 Parameters 

401 ---------- 

402 atoms1 : ase.Atoms 

403 atoms2 : ase.Atoms 

404 

405 Returns 

406 ------- 

407 bool 

408 True if atoms are equal, False otherwise 

409 """ 

410 

411 # pbc 

412 if not all(atoms1.pbc == atoms2.pbc): 

413 return False 

414 

415 # cell 

416 if not np.allclose(atoms1.cell, atoms2.cell, atol=tol, rtol=0.0): 

417 return False 

418 

419 # arrays 

420 if not len(atoms1.arrays.keys()) == len(atoms2.arrays.keys()): 

421 return False 

422 for key, array1 in atoms1.arrays.items(): 

423 if key not in atoms2.arrays.keys(): 

424 return False 

425 if not np.allclose(array1, atoms2.arrays[key], atol=tol, rtol=0.0): 

426 return False 

427 

428 # passed all test, atoms must be equal 

429 return True