# Interface with scikit-learn¶

This tutorial demonstrates how hiPhive can be easily interfaced with other Python libraries. Here, we specifically consider scitkit-learn but it is equally straightforward to interface with e.g., SciPy or TensorFlow.

## Regression¶

hiPhive provides simplified interfaces to several linear models in scikit-learn. Yet there are many more features of scikit-learn that can be of interest to advanced users. This tutorial demonstrates how to use hiPhive in such situations. The following snippet illustrates for example how to use the HuberRegressor linear model (which is not included among the standard fitting methods) to find optimal parameters.

from sklearn.linear_model import HuberRegressor
A, y = sc.get_fit_data()
hr = HuberRegressor()
hr.fit(A, y)
fcp = ForceConstantPotential(cs, hr.coef_)


## t-SNE¶

While regression is probably the most common task, other machine learning techniques can also be of interest. This section illustrates the utility of the t-distributed Stochastic Neighbor Embedding (t-SNE) algorithm for visualizing datasets (also see here). Specifically, the example considers the three most common crystalline phases of titanium (HCP, BCC, omega).

## Source code¶

Structure preparation
tutorial/advanced/interface_with_sklearn/prepare_structures.py
from ase.build import bulk
from hiphive import ClusterSpace, StructureContainer
from tools import get_bcc_structures, get_hcp_structures, get_omega_structures

# parameters
a0 = 3.32389853
cutoffs = [6.0, 4.0, 4.0]
n_structures = 1

# expansion around the BCC structure
prim = bulk('Ti', 'bcc', a=a0)
cs = ClusterSpace(prim, [6.0, 4.0, 4.0])

# generate rattled structuress
bcc_rattled = get_bcc_structures(n_structures, a=a0, rattle=0.1)
hcp_rattled = get_hcp_structures(n_structures, a=a0, rattle=0.02)
omega_rattled = get_omega_structures(n_structures, a=a0, rattle=0.02)

# setup StructureContainer
sc = StructureContainer(cs)
user_tag = 'bcc-rattle'
for structure in bcc_rattled:

user_tag = 'hcp-rattle'
for structure in hcp_rattled:

user_tag = 'omega-rattle'
for structure in omega_rattled:

user_tag = 'md-T1400'
for structure in md_structures:

# save sc
sc.write('structure_container')

t-SNE analysis
tutorial/advanced/interface_with_sklearn/tsne_analysis.py
import numpy as np
from matplotlib import pyplot as plt
from sklearn.manifold import TSNE
from hiphive import StructureContainer

def get_partial_fit_matrix(sc, tags, n_rows):
M_list, tag_list = [], []
for tag in tags:
structure_inds = sc.get_structure_indices(tag)
M, _ = sc.get_fit_data(structure_inds)
M_list.append(M[:n_rows, :])
tag_list.extend([tag] * n_rows)
return np.vstack(M_list), tag_list

# parameters
n_rows = 300
perplexity = 30
learning_rate = 50
n_iter = 2000

unique_tags = set(fs.user_tag for fs in sc)
M, tags = get_partial_fit_matrix(sc, unique_tags, n_rows)

# t-SNE transformation
tsne = TSNE(perplexity=perplexity, n_iter=n_iter, learning_rate=learning_rate,
verbose=1)
XY_tsne = tsne.fit_transform(M)

# plot
ms = 100
alpha = 0.25
fs = 14

for unique_tag in unique_tags:
indices = [tag == unique_tag for tag in tags]
plt.scatter(XY_tsne[indices, 0], XY_tsne[indices, 1], ms, alpha=alpha,
label=unique_tag)

legend = plt.legend(fontsize=fs)
for lh in legend.legendHandles:
lh.set_alpha(1)

plt.tight_layout()
plt.savefig('Ti_tsne_analysis.svg')

Tools
tutorial/advanced/interface_with_sklearn/tools.py
import numpy as np
from ase import Atoms
from ase.build import bulk
from hiphive.structure_generation import generate_rattled_structures

def get_bcc_structures(n_structures, a=1, rattle=0.05, size=6):
atoms_ideal = bulk('Ti', 'bcc', a=a).repeat(size)
rattled_structures = generate_rattled_structures(
atoms_ideal, n_structures, rattle)
structures = []
for structure in rattled_structures:
atoms = atoms_ideal.copy()
displacements = structure.positions - atoms.positions
forces = np.zeros((len(atoms), 3))
atoms.new_array('displacements', displacements)
atoms.new_array('forces', forces)
structures.append(atoms)
return structures

def get_hcp_structures(n_structures, a=1, rattle=0.05, size=(3, 3, 6)):
atoms_ideal = _get_atoms_along_hcp_path(0.0, a, size)
atoms_hcp = _get_atoms_along_hcp_path(0.2484, a, size)
rattled_structures = generate_rattled_structures(
atoms_hcp, n_structures, rattle)
structures = []
for structure in rattled_structures:
atoms = atoms_ideal.copy()
displacements = structure.positions - atoms.positions
forces = np.zeros((len(atoms), 3))
atoms.new_array('displacements', displacements)
atoms.new_array('forces', forces)
structures.append(atoms)
return structures

def get_omega_structures(n_structures, a=1, rattle=0.05, size=(4, 3, 6)):
atoms_ideal = _get_atoms_along_hcp_path(0.0, a, size)
atoms_omega = _get_atoms_along_hcp_path(0.48, a, size)
rattled_structures = generate_rattled_structures(
atoms_omega, n_structures, rattle)
structures = []
for structure in rattled_structures:
atoms = atoms_ideal.copy()
displacements = structure.positions - atoms.positions
forces = np.zeros((len(atoms), 3))
atoms.new_array('displacements', displacements)
atoms.new_array('forces', forces)
structures.append(atoms)
return structures

def _get_atoms_along_omega_path(dx, a, size):
""" Returns atoms object displaced along bcc-omega mode """

x = 1 / np.sqrt(2.0)
y = x / np.sqrt(3.0)
z = y / np.sqrt(2.0)

positions = a * np.array([
[0, 0,     0],
[x, 3*y,   0],
[0, 4*y, 1*z],
[x, 1*y, 1*z],
[0, 2*y, 2*z],
[x, 5*y, 2*z]])

cell = a * np.diag([2*x, 6*y, 3*z])
bcc = Atoms(cell=cell, positions=positions, symbols=['Ti']*6, pbc=True)
bcc.wrap()

indices1 = [2, 3]
indices2 = [4, 5]

atoms = bcc.copy()
for ind in indices1:
atoms[ind].position[2] += dx
for ind in indices2:
atoms[ind].position[2] -= dx

atoms = atoms.repeat(size)
return atoms

def _get_atoms_along_hcp_path(dx, a, size):
""" Returns atoms object displaced along bcc-hcp mode """

positions = a * np.array([
[1,   0,   0],
[0,   0,   0],
[0,   1,   0],
[1,   1,   0],
[1.5, 0.5, 0.5],
[0.5, 0.5, 0.5],
[0.5, 1.5, 0.5],
[1.5, 1.5, 0.5]])

cell = a * np.diag([2, 2, 1])
bcc = Atoms(cell=cell, positions=positions, symbols=['Ti']*8, pbc=True)
bcc.wrap()

indices1 = [0, 2, 5, 7]
indices2 = [1, 3, 4, 6]

atoms = bcc.copy()
for ind in indices1:
atoms[ind].position[0] += dx / np.sqrt(2.0)
atoms[ind].position[1] -= dx / np.sqrt(2.0)
for ind in indices2:
atoms[ind].position[0] -= dx / np.sqrt(2.0)
atoms[ind].position[1] += dx / np.sqrt(2.0)

atoms = atoms.repeat(size)
return atoms