Interface with scikit-learn

This tutorial demonstrates how hiPhive can be easily interfaced with other Python libraries. Here, we specifically consider scitkit-learn but it is equally straightforward to interface with e.g., SciPy or TensorFlow.

Regression

hiPhive provides simplified interfaces to several linear models in scikit-learn. Yet there are many more features of scikit-learn that can be of interest to advanced users. This tutorial demonstrates how to use hiPhive in such situations. The following snippet illustrates for example how to use the HuberRegressor linear model (which is not included among the standard fitting methods) to find optimal parameters.

from sklearn.linear_model import HuberRegressor
A, y = sc.get_fit_data()
hr = HuberRegressor()
hr.fit(A, y)
fcp = ForceConstantPotential(cs, hr.coef_)

t-SNE

While regression is probably the most common task, other machine learning techniques can also be of interest. This section illustrates the utility of the t-distributed Stochastic Neighbor Embedding (t-SNE) algorithm for visualizing datasets (also see here). Specifically, the example considers the three most common crystalline phases of titanium (HCP, BCC, omega).

../_images/Ti_tsne_analysis.svg

t-SNE analysis of a Ti dataset consisting of BCC, HCP and omega structures as well as configurations from a high temperature molecular dynamics simulation.

Source code

Structure preparation
tutorial/advanced/interface_with_sklearn/prepare_structures.py
from ase.build import bulk
from ase.io import read
from hiphive import ClusterSpace, StructureContainer
from tools import get_bcc_structures, get_hcp_structures, get_omega_structures


# parameters
a0 = 3.32389853
cutoffs = [6.0, 4.0, 4.0]
n_structures = 1


# expansion around the BCC structure
prim = bulk('Ti', 'bcc', a=a0)
cs = ClusterSpace(prim, [6.0, 4.0, 4.0])


# generate rattled structuress
bcc_rattled = get_bcc_structures(n_structures, a=a0, rattle=0.1)
hcp_rattled = get_hcp_structures(n_structures, a=a0, rattle=0.02)
omega_rattled = get_omega_structures(n_structures, a=a0, rattle=0.02)
md_structures = read('Ti_md_structures_T1400.extxyz@:')


# setup StructureContainer
sc = StructureContainer(cs)
user_tag = 'bcc-rattle'
for structure in bcc_rattled:
    sc.add_structure(structure, user_tag=user_tag)

user_tag = 'hcp-rattle'
for structure in hcp_rattled:
    sc.add_structure(structure, user_tag=user_tag)

user_tag = 'omega-rattle'
for structure in omega_rattled:
    sc.add_structure(structure, user_tag=user_tag)

user_tag = 'md-T1400'
for structure in md_structures:
    sc.add_structure(structure, user_tag=user_tag)

# save sc
sc.save_to_file('structure_container')
t-SNE analysis
tutorial/advanced/interface_with_sklearn/tsne_analysis.py
import numpy as np
from matplotlib import pyplot as plt
from sklearn.manifold import TSNE
from hiphive import StructureContainer


def get_partial_fit_matrix(sc, tags, n_rows):
    M_list, tag_list = [], []
    for tag in tags:
        structure_inds = sc.get_structure_indices(tag)
        M, _ = sc.get_fit_data(structure_inds)
        M_list.append(M[:n_rows, :])
        tag_list.extend([tag] * n_rows)
    return np.vstack(M_list), tag_list


# parameters
n_rows = 300
perplexity = 30
learning_rate = 50
n_iter = 2000

# read data
sc = StructureContainer.load_from_file('structure_container')
unique_tags = set(fs.user_tag for fs in sc)
M, tags = get_partial_fit_matrix(sc, unique_tags, n_rows)


# t-SNE transformation
tsne = TSNE(perplexity=perplexity, n_iter=n_iter, learning_rate=learning_rate,
            verbose=1)
XY_tsne = tsne.fit_transform(M)


# plot
ms = 100
alpha = 0.25
fs = 14

for unique_tag in unique_tags:
    indices = [tag == unique_tag for tag in tags]
    plt.scatter(XY_tsne[indices, 0], XY_tsne[indices, 1], ms, alpha=alpha,
                label=unique_tag)

legend = plt.legend(fontsize=fs)
for lh in legend.legendHandles:
    lh.set_alpha(1)

plt.tight_layout()
plt.savefig('Ti_tsne_analysis.svg')
plt.show()
Tools
tutorial/advanced/interface_with_sklearn/tools.py
import numpy as np
from ase import Atoms
from ase.build import bulk
from hiphive.structure_generation import generate_rattled_structures


def get_bcc_structures(n_structures, a=1, rattle=0.05, size=6):
    atoms_ideal = bulk('Ti', 'bcc', a=a).repeat(size)
    rattled_structures = generate_rattled_structures(
        atoms_ideal, n_structures, rattle)
    structures = []
    for structure in rattled_structures:
        atoms = atoms_ideal.copy()
        displacements = structure.positions - atoms.positions
        forces = np.zeros((len(atoms), 3))
        atoms.new_array('displacements', displacements)
        atoms.new_array('forces', forces)
        structures.append(atoms)
    return structures


def get_hcp_structures(n_structures, a=1, rattle=0.05, size=(3, 3, 6)):
    atoms_ideal = _get_atoms_along_hcp_path(0.0, a, size)
    atoms_hcp = _get_atoms_along_hcp_path(0.2484, a, size)
    rattled_structures = generate_rattled_structures(
        atoms_hcp, n_structures, rattle)
    structures = []
    for structure in rattled_structures:
        atoms = atoms_ideal.copy()
        displacements = structure.positions - atoms.positions
        forces = np.zeros((len(atoms), 3))
        atoms.new_array('displacements', displacements)
        atoms.new_array('forces', forces)
        structures.append(atoms)
    return structures


def get_omega_structures(n_structures, a=1, rattle=0.05, size=(4, 3, 6)):
    atoms_ideal = _get_atoms_along_hcp_path(0.0, a, size)
    atoms_omega = _get_atoms_along_hcp_path(0.48, a, size)
    rattled_structures = generate_rattled_structures(
        atoms_omega, n_structures, rattle)
    structures = []
    for structure in rattled_structures:
        atoms = atoms_ideal.copy()
        displacements = structure.positions - atoms.positions
        forces = np.zeros((len(atoms), 3))
        atoms.new_array('displacements', displacements)
        atoms.new_array('forces', forces)
        structures.append(atoms)
    return structures


def _get_atoms_along_omega_path(dx, a, size):
    """ Returns atoms object displaced along bcc-omega mode """

    x = 1 / np.sqrt(2.0)
    y = x / np.sqrt(3.0)
    z = y / np.sqrt(2.0)

    positions = a * np.array([
                [0, 0,     0],
                [x, 3*y,   0],
                [0, 4*y, 1*z],
                [x, 1*y, 1*z],
                [0, 2*y, 2*z],
                [x, 5*y, 2*z]])

    cell = a * np.diag([2*x, 6*y, 3*z])
    bcc = Atoms(cell=cell, positions=positions, symbols=['Ti']*6, pbc=True)
    bcc.wrap()

    indices1 = [2, 3]
    indices2 = [4, 5]

    atoms = bcc.copy()
    for ind in indices1:
        atoms[ind].position[2] += dx
    for ind in indices2:
        atoms[ind].position[2] -= dx

    atoms = atoms.repeat(size)
    return atoms


def _get_atoms_along_hcp_path(dx, a, size):
    """ Returns atoms object displaced along bcc-hcp mode """

    positions = a * np.array([
                [1,   0,   0],
                [0,   0,   0],
                [0,   1,   0],
                [1,   1,   0],
                [1.5, 0.5, 0.5],
                [0.5, 0.5, 0.5],
                [0.5, 1.5, 0.5],
                [1.5, 1.5, 0.5]])

    cell = a * np.diag([2, 2, 1])
    bcc = Atoms(cell=cell, positions=positions, symbols=['Ti']*8, pbc=True)
    bcc.wrap()

    indices1 = [0, 2, 5, 7]
    indices2 = [1, 3, 4, 6]

    atoms = bcc.copy()
    for ind in indices1:
        atoms[ind].position[0] += dx / np.sqrt(2.0)
        atoms[ind].position[1] -= dx / np.sqrt(2.0)
    for ind in indices2:
        atoms[ind].position[0] -= dx / np.sqrt(2.0)
        atoms[ind].position[1] += dx / np.sqrt(2.0)

    atoms = atoms.repeat(size)
    return atoms