Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

239

240

241

242

243

244

245

246

247

248

249

250

251

252

253

254

255

256

257

258

259

260

261

262

263

264

265

266

267

268

269

270

271

272

273

274

275

276

277

278

279

280

281

282

283

284

285

286

287

288

289

290

291

292

293

294

295

296

297

298

299

300

301

302

303

304

305

306

307

308

309

310

311

312

313

314

315

316

317

318

319

320

321

322

323

324

325

326

327

328

329

330

331

332

333

334

335

336

337

338

339

340

341

342

343

344

345

346

347

348

349

350

351

352

353

354

355

356

357

358

359

360

361

362

363

364

365

366

367

368

369

370

371

372

373

374

375

376

377

378

379

380

381

382

383

384

385

386

387

388

389

390

391

392

393

394

395

396

397

398

399

400

401

402

403

404

405

406

407

408

409

410

411

412

413

414

415

416

417

418

419

420

421

422

423

424

425

426

427

428

429

430

431

432

433

434

435

436

437

438

439

440

441

442

443

444

445

446

447

448

449

450

451

452

453

454

455

456

457

458

459

460

461

462

463

464

465

466

467

468

469

470

""" 

This module provides functionality for storing structures and their fit 

matrices together with target forces and displacements 

""" 

 

import tarfile 

import numpy as np 

from collections import OrderedDict 

from ase.calculators.singlepoint import SinglePointCalculator 

 

from .io.read_write_files import (add_items_to_tarfile_hdf5, 

add_items_to_tarfile_pickle, 

add_items_to_tarfile_custom, 

add_list_to_tarfile_custom, 

read_items_hdf5, 

read_items_pickle, 

read_list_custom) 

 

from .cluster_space import ClusterSpace 

from .force_constant_model import ForceConstantModel 

from .io.logging import logger 

logger = logger.getChild('structure_container') 

 

 

class StructureContainer: 

""" 

This class serves as a container for structures as well as associated 

fit properties and fit matrices. 

 

Parameters 

----------- 

cs : ClusterSpace object 

cluster space that is the basis for the container 

fit_structure_list : list of FitStructure objects (optional) 

structures to be added to the container 

""" 

 

def __init__(self, cs, fit_structure_list=None): 

""" 

Attributes 

----------- 

_cs : ClusterSpace object 

ClusterSpace object which is the basis for the container 

_structure_list : list 

List of FitStructure objects 

_previous_fcm : ForceConstantModel object 

FCM object used for last fit matrix calculation 

_previous_atoms: ASE Atoms object 

atoms object used in last fit_matrix calculation; it is used to 

decided if the previous FCM can be used for a new structure or not, 

which often enables a considerable speed-up 

""" 

self._cs = cs.copy() 

 

self._previous_fcm = None 

self._previous_atoms = None 

 

# Add atoms from atoms_list 

self._structure_list = [] 

if fit_structure_list is not None: 

for fit_structure in fit_structure_list: 

62 ↛ 63line 62 didn't jump to line 63, because the condition on line 62 was never true if not isinstance(fit_structure, FitStructure): 

raise TypeError('Can only add FitStructures') 

self._structure_list.append(fit_structure) 

 

def __len__(self): 

return len(self._structure_list) 

 

def __getitem__(self, ind): 

return self._structure_list[ind] 

 

@property 

def data_shape(self): 

""" tuple : tuple of integers representing the shape of the fit data 

matrix """ 

n_cols = self._cs.number_of_dofs 

n_rows = sum(len(fs) * 3 for fs in self if fs.fit_ready) 

if n_rows == 0: 

return None 

return n_rows, n_cols 

 

@property 

def cluster_space(self): 

""" ClusterSpace object : copy of the cluster space the structure 

container is based on""" 

return self._cs.copy() 

 

def get_structure_indices(self, user_tag): 

""" Get structure indices via user tag. 

 

Parameters 

---------- 

user_tag : string 

user tag used for selecting structures 

 

Returns 

------- 

list 

list of structures with specified user tag 

""" 

return [i for i, s in enumerate(self) if s.user_tag == user_tag] 

 

@staticmethod 

def read(fileobj, read_fit_matrices=True): 

"""Restore a StructureContainer object from file. 

 

Parameters 

---------- 

f : string or file object 

name of input file (string) or stream to load from (file object) 

read_fit_matrices : bool 

Whether or not to load the fit matrices. 

""" 

114 ↛ 115line 114 didn't jump to line 115, because the condition on line 114 was never true if isinstance(fileobj, str): 

tar_file = tarfile.open(mode='r', name=fileobj) 

else: 

tar_file = tarfile.open(mode='r', fileobj=fileobj) 

 

# Read clusterspace 

f = tar_file.extractfile('cluster_space') 

cs = ClusterSpace.read(f) 

 

# Read fitstructures 

fit_structure_list = read_list_custom( 

tar_file, 'fit_structure', FitStructure.read, 

read_fit_matrix=read_fit_matrices) 

 

return StructureContainer(cs, fit_structure_list) 

 

def write(self, f): 

"""Write a StructureContainer instance to a file. 

 

Parameters 

---------- 

f : str or file object 

name of input file (str) or stream to write to (file object) 

""" 

 

139 ↛ 140line 139 didn't jump to line 140, because the condition on line 139 was never true if isinstance(f, str): 

tar_file = tarfile.open(mode='w', name=f) 

else: 

tar_file = tarfile.open(mode='w', fileobj=f) 

 

# save cs 

custom_items = dict(cluster_space=self._cs) 

add_items_to_tarfile_custom(tar_file, custom_items) 

 

# save fit structures 

add_list_to_tarfile_custom(tar_file, self._structure_list, 

'fit_structure') 

 

tar_file.close() 

 

def add_structure(self, atoms, user_tag=None, compute_fit_matrix=True): 

"""Add a structure to the container. 

 

Note that custom information about the atoms object may not be stored 

inside, for example a SinglePointCalculator will not be kept. 

 

Parameters 

---------- 

atoms : ASE Atoms object 

the structure to be added; the Atoms object must contain 

supplementary per-atom arrays with displacements and forces 

user_tag : string 

custom user tag to identify structure 

compute_fit_matrix : boolean 

if True the fit matrix of the structure is computed 

""" 

 

atoms_copy = atoms.copy() 

 

# atoms object must contain displacements and forces 

if 'displacements' not in atoms_copy.arrays.keys(): 

logger.warning('Skipping structure; no displacements found') 

return 

if 'forces' not in atoms_copy.arrays.keys(): 

if isinstance(atoms.calc, SinglePointCalculator): 

atoms_copy.new_array('forces', atoms.get_forces()) 

else: 

logger.warning('Skipping structure; no forces found') 

return 

 

# check if an identical atoms object already exists in the container 

for i, structure in enumerate(self._structure_list): 

if atoms_equals(atoms_copy, structure.atoms): 

logger.warning('Skipping structure; is equal to structure' 

' {}'.format(i)) 

return 

 

logger.info('Adding structure') 

structure = FitStructure(atoms_copy, user_tag) 

if compute_fit_matrix: 

M = self._compute_fit_matrix(structure.atoms) 

structure.set_fit_matrix(M) 

self._structure_list.append(structure) 

 

def update_fit_data(self): 

"""Compute the fit matrices for structures for which they are not yet 

available.""" 

for structure in self._structure_list: 

202 ↛ 201line 202 didn't jump to line 201, because the condition on line 202 was never false if not structure.fit_ready: 

M = self._compute_fit_matrix(structure.atoms) 

structure.set_fit_matrix(M) 

 

def get_fit_data(self, structures=None): 

"""Return fit data for structures. The fit matrices and target forces 

for the structures are stacked into NumPy arrays. 

 

Parameters 

---------- 

structures: list, tuple 

list of integers corresponding to structure indices. Defaults to 

None and in that case returns all fit data available. 

 

Returns 

------- 

NumPy array, NumPy array 

stacked fit matrices, stacked target forces for the structures 

""" 

if structures is None: 

M_list = [s.fit_matrix 

for s in self._structure_list if s.fit_ready] 

f_list = [s.forces.flatten() 

for s in self._structure_list if s.fit_ready] 

else: 

M_list, f_list = [], [] 

for i in structures: 

229 ↛ 230line 229 didn't jump to line 230, because the condition on line 229 was never true if not self._structure_list[i].fit_ready: 

raise ValueError('Structure {} is not fit ready'.format(i)) 

M_list.append(self._structure_list[i].fit_matrix) 

f_list.append(self._structure_list[i].forces.flatten()) 

 

if len(M_list) == 0: 

logger.warning('No available fit data for {}'.format(structures)) 

return None 

return np.vstack(M_list), np.hstack(f_list) 

 

def print_cluster_space(self, include_orbits=False): 

"""Print information concerning the cluster space this structure 

container is based on. 

 

Parameters 

---------- 

include_orbits : boolean 

if True also print the list of orbits associated with the 

cluster space 

""" 

print(self._cs) 

250 ↛ 251line 250 didn't jump to line 251, because the condition on line 250 was never true if include_orbits: 

self._cs.print_orbits() 

 

def __str__(self): 

if len(self._structure_list) > 0: 

return self._get_str_structure_list() 

else: 

return 'Empty StructureContainer' 

 

def __repr__(self): 

return 'StructureContainer({!r}, {!r})'.format( 

self._cs, self._structure_list) 

 

def _get_str_structure_list(self): 

""" Return formatted string of the structure list """ 

def str_structure(index, structure): 

fields = OrderedDict([ 

('index', '{:^4}'.format(index)), 

('user-tag', '{:^16}'.format(structure.user_tag)), 

('num-atoms', '{:^5}'.format(len(structure))), 

('fit-ready', '{:^5}'.format(str(structure.fit_ready))), 

('avg-disp', '{:7.4f}' 

.format(np.mean([np.linalg.norm(d) for d in 

structure.displacements]))), 

('avg-force', '{:7.4f}' 

.format(np.mean([np.linalg.norm(f) for f in 

structure.forces]))), 

('max-force', '{:7.4f}' 

.format(np.max([np.linalg.norm(f) for f in 

structure.forces])))]) 

s = [] 

for name, value in fields.items(): 

n = max(len(name), len(value)) 

if index < 0: 

s += ['{s:^{n}}'.format(s=name, n=n)] 

else: 

s += ['{s:^{n}}'.format(s=value, n=n)] 

return ' | '.join(s) 

 

# table width 

dummy = self._structure_list[0] 

n = len(str_structure(-1, dummy)) 

 

# table header 

s = [] 

s.append(' Structure Container '.center(n, '=')) 

s += ['{:22} : {}'.format('Total number of structures', len(self))] 

s.append(''.center(n, '-')) 

s.append(str_structure(-1, dummy)) 

s.append(''.center(n, '-')) 

 

# table body 

for i, structure in enumerate(self._structure_list): 

s.append(str_structure(i, structure)) 

s.append(''.center(n, '=')) 

return '\n'.join(s) 

 

def _compute_fit_matrix(self, atoms): 

""" Compute fit matrix for a single atoms object """ 

logger.info('Computing fit matrix') 

if atoms != self._previous_atoms: 

logger.info(' Building new FCM object') 

self._previous_atoms = atoms.copy() 

self._previous_fcm = ForceConstantModel(atoms, self._cs) 

else: 

logger.info(' Reusing old FCM object') 

return self._previous_fcm.get_fit_matrix( 

atoms.get_array('displacements')) 

 

 

class FitStructure: 

"""This class holds a structure with displacements and forces as well as 

the fit matrix. 

 

Parameters 

---------- 

atoms : ASE Atoms object 

supercell structure 

user_tag : string 

custom user tag 

fit_matrix : NumPy array 

fit matrix; NumPy (N, M) array with `N = 3 * len(atoms)` 

""" 

 

def __init__(self, atoms, user_tag=None, fit_matrix=None): 

self._fit_ready = False 

self._atoms = atoms 

self._user_tag = user_tag 

self.set_fit_matrix(fit_matrix) 

 

@property 

def fit_matrix(self): 

"""NumPy array : the fit matrix""" 

return self._fit_matrix 

 

@property 

def atoms(self): 

"""ASE Atoms object : supercell structure""" 

return self._atoms 

 

@property 

def forces(self): 

"""NumPy array : forces""" 

return self._atoms.get_array('forces') 

 

@property 

def displacements(self): 

"""NumPy array : atomic displacements""" 

return self._atoms.get_array('displacements') 

 

@property 

def fit_ready(self): 

"""boolean : True if the structure is prepared for fitting, i.e. the 

fit matrix is available""" 

return self._fit_ready 

 

@property 

def user_tag(self): 

return str(self._user_tag) 

 

def set_fit_matrix(self, fit_matrix): 

"""Set the fit matrix. 

 

Parameters 

---------- 

fit_matrix : NumPy array 

fit matrix for this structure; NumPy (N, M) array with 

`N = 3 * len(atoms)` 

""" 

if fit_matrix is not None: 

380 ↛ 384line 380 didn't jump to line 384, because the condition on line 380 was never false if fit_matrix.shape[0] == len(self._atoms) * 3: 

self._fit_matrix = fit_matrix 

self._fit_ready = True 

else: 

logger.warning('fit matrix not compatible with atoms') 

else: 

self._fit_matrix = None 

self._fit_ready = False 

 

def __len__(self): 

return len(self._atoms) 

 

def __str__(self): 

s = [] 

s.append('atoms: {}'.format(self.atoms)) 

s.append('user_tag: {}'.format(self.user_tag)) 

s.append('fit_ready: {}'.format(self.fit_ready)) 

return '\n'.join(s) 

 

def __repr__(self): 

return 'FitStructure({!r}, {!r})'.format(self.atoms, self.user_tag) 

 

def write(self, fileobj): 

403 ↛ 404line 403 didn't jump to line 404, because the condition on line 403 was never true if isinstance(fileobj, str): 

tar_file = tarfile.open(name=fileobj, mode='w') 

else: 

tar_file = tarfile.open(fileobj=fileobj, mode='w') 

 

items_pickle = dict(atoms=self._atoms, user_tag=self.user_tag, 

fit_ready=self.fit_ready) 

items_hdf5 = dict(fit_matrix=self.fit_matrix) 

 

add_items_to_tarfile_pickle(tar_file, items_pickle, 'items.pickle') 

413 ↛ 416line 413 didn't jump to line 416, because the condition on line 413 was never false if self.fit_ready: 

add_items_to_tarfile_hdf5(tar_file, items_hdf5, 'fit_matrix.hdf5') 

 

tar_file.close() 

 

@staticmethod 

def read(fileobj, read_fit_matrix=True): 

420 ↛ 421line 420 didn't jump to line 421, because the condition on line 420 was never true if isinstance(fileobj, str): 

tar_file = tarfile.open(mode='r', name=fileobj) 

else: 

tar_file = tarfile.open(mode='r', fileobj=fileobj) 

 

items = read_items_pickle(tar_file, 'items.pickle') 

if items['fit_ready'] and read_fit_matrix: 

fit_matrix = read_items_hdf5( 

tar_file, 'fit_matrix.hdf5')['fit_matrix'] 

else: 

fit_matrix = None 

 

return FitStructure(items['atoms'], user_tag=items['user_tag'], 

fit_matrix=fit_matrix) 

 

 

def atoms_equals(atoms1, atoms2, tol=1e-10): 

""" Compare if two atoms are equals within some tolerance 

 

Parameters 

---------- 

atoms1 : ASE Atoms object 

atoms object 1 

atoms2 : ASE Atoms object 

atoms object 2 

 

Returns 

------- 

bool 

True if atoms are equal, else False 

""" 

 

# pbc 

if not all(atoms1.pbc == atoms2.pbc): 

return False 

 

# cell 

if not np.allclose(atoms1.cell, atoms2.cell, atol=tol, rtol=0.0): 

return False 

 

# arrays 

if not len(atoms1.arrays.keys()) == len(atoms2.arrays.keys()): 

return False 

for key, array1 in atoms1.arrays.items(): 

if key not in atoms2.arrays.keys(): 

return False 

if not np.allclose(array1, atoms2.arrays[key], atol=tol, rtol=0.0): 

return False 

 

# passed all test, atoms must be equal 

return True