import io
import re
import warnings
from collections import defaultdict
from typing import Any, Dict, List, Optional
import numpy as np
from ase import Atoms, units
from ase.calculators.singlepoint import SinglePointCalculator
from ase.constraints import FixAtoms, FixCartesian, FixConstraint
from ase.parallel import paropen
from ase.utils import reader, string2index
[docs]
@reader
def read_castep_castep(fd, index=-1):
"""Read a .castep file and returns an Atoms object.
The calculator information will be stored in the calc attribute.
Notes
-----
This routine will return an atom ordering as found within the castep file.
This means that the species will be ordered by ascending atomic numbers.
The atoms witin a species are ordered as given in the original cell file.
"""
# look for last result, if several CASTEP run are appended
record_start, record_end, end_found, _ = _castep_find_last_record(fd)
if not end_found:
warnings.warn(f'No regular end found in {fd.name} file.')
return # we return here, because the file has no a regular end
# These variables are finally assigned to `SinglePointCalculator`
# for backward compatibility with the `Castep` calculator.
cut_off_energy = None
kpoints = None
total_time = None
peak_memory = None
fd.seek(record_start)
# read header
parameters_header = _read_header(fd)
if 'cut_off_energy' in parameters_header:
cut_off_energy = parameters_header['cut_off_energy']
if 'basis_precision' in parameters_header:
del parameters_header['cut_off_energy'] # avoid conflict
markers_new_iteration = [
'BFGS: starting iteration',
'BFGS: improving iteration',
'Starting MD iteration',
]
images = []
results = {}
species_pot = []
castep_warnings = []
while True:
# TODO: add a switch if we have a geometry optimization: record
# atoms objects for intermediate steps.
try:
line = fd.readline()
if not line or fd.tell() > record_end:
break
elif 'Number of kpoints used' in line:
kpoints = int(line.split('=')[-1].strip())
elif 'Unit Cell' in line:
lattice_real = _read_unit_cell(fd)
elif 'Cell Contents' in line:
while True:
line = fd.readline()
if 'Total number of ions in cell' in line:
n_atoms = int(line.split()[7])
if 'Total number of species in cell' in line:
int(line.split()[7])
fields = line.split()
if len(fields) == 0:
break
elif 'Fractional coordinates of atoms' in line:
species, custom_species, positions_frac = \
_read_fractional_coordinates(fd, n_atoms)
elif 'Files used for pseudopotentials' in line:
while True:
line = fd.readline()
if 'Pseudopotential generated on-the-fly' in line:
continue
fields = line.split()
if len(fields) == 2:
species_pot.append(fields)
else:
break
elif 'k-Points For BZ Sampling' in line:
# TODO: generalize for non-Monkhorst Pack case
# (i.e. kpoint lists) -
# kpoints_offset cannot be read this way and
# is hence always set to None
while True:
line = fd.readline()
if not line.strip():
break
if 'MP grid size for SCF calculation' in line:
# kpoints = ' '.join(line.split()[-3:])
# self.kpoints_mp_grid = kpoints
# self.kpoints_mp_offset = '0. 0. 0.'
# not set here anymore because otherwise
# two calculator objects go out of sync
# after each calculation triggering unnecessary
# recalculation
break
elif 'Final energy' in line:
key = 'energy_without_dispersion_correction'
results[key] = float(line.split()[-2])
elif 'Final free energy' in line:
key = 'free_energy_without_dispersion_correction'
results[key] = float(line.split()[-2])
elif 'NB est. 0K energy' in line:
key = 'energy_zero_without_dispersion_correction'
results[key] = float(line.split()[-2])
# Add support for dispersion correction
# filtering due to SEDC is done in get_potential_energy
elif 'Dispersion corrected final energy' in line:
key = 'energy_with_dispersion_correlation'
results[key] = float(line.split()[-2])
elif 'Dispersion corrected final free energy' in line:
key = 'free_energy_with_dispersion_correlation'
results[key] = float(line.split()[-2])
elif 'NB dispersion corrected est. 0K energy' in line:
key = 'energy_zero_with_dispersion_correlation'
results[key] = float(line.split()[-2])
# check if we had a finite basis set correction
elif 'Total energy corrected for finite basis set' in line:
key = 'energy_with_finite_basis_set_correction'
results[key] = float(line.split()[-2])
# ******************** Forces *********************
# ************** Symmetrised Forces ***************
# ******************** Constrained Forces ********************
# ******************* Unconstrained Forces *******************
elif re.search(r'\**.* Forces \**', line):
forces, constraints = _read_forces(fd, n_atoms)
results['forces'] = np.array(forces)
# ***************** Stress Tensor *****************
# *********** Symmetrised Stress Tensor ***********
elif re.search(r'\**.* Stress Tensor \**', line):
results.update(_read_stress(fd))
elif any(_ in line for _ in markers_new_iteration):
_add_atoms(
images,
lattice_real,
species,
custom_species,
positions_frac,
constraints,
results,
)
# reset for the next step
lattice_real = None
species = None
positions_frac = None
results = {}
# extract info from the Mulliken analysis
elif 'Atomic Populations' in line:
results.update(_read_mulliken_charges(fd))
# extract detailed Hirshfeld analysis (iprint > 1)
elif 'Hirshfeld total electronic charge (e)' in line:
results.update(_read_hirshfeld_details(fd, n_atoms))
elif 'Hirshfeld Analysis' in line:
results.update(_read_hirshfeld_charges(fd))
# There is actually no good reason to get out of the loop
# already at this point... or do I miss something?
# elif 'BFGS: Final Configuration:' in line:
# break
elif 'warn' in line.lower():
castep_warnings.append(line)
# fetch some last info
elif 'Total time' in line:
pattern = r'.*=\s*([\d\.]+) s'
total_time = float(re.search(pattern, line).group(1))
elif 'Peak Memory Use' in line:
pattern = r'.*=\s*([\d]+) kB'
peak_memory = int(re.search(pattern, line).group(1))
except Exception as exception:
msg = f'{line}|-> line triggered exception: {str(exception)}'
raise Exception(msg) from exception
# add the last image
_add_atoms(
images,
lattice_real,
species,
custom_species,
positions_frac,
constraints,
results,
)
for atoms in images:
# these variables are temporarily assigned to `SinglePointCalculator`
# to be assigned to the `Castep` calculator for backward compatibility
atoms.calc._cut_off_energy = cut_off_energy
atoms.calc._kpoints = kpoints
atoms.calc._species_pot = species_pot
atoms.calc._total_time = total_time
atoms.calc._peak_memory = peak_memory
atoms.calc._parameters_header = parameters_header
if castep_warnings:
warnings.warn('WARNING: .castep file contains warnings')
for warning in castep_warnings:
warnings.warn(warning)
if isinstance(index, str):
index = string2index(index)
return images[index]
def _castep_find_last_record(castep_file):
"""Checks wether a given castep file has a regular
ending message following the last banner message. If this
is the case, the line number of the last banner is message
is return, otherwise False.
returns (record_start, record_end, end_found, last_record_complete)
"""
if isinstance(castep_file, str):
castep_file = paropen(castep_file, 'r')
file_opened = True
else:
file_opened = False
record_starts = []
while True:
line = castep_file.readline()
if (('Welcome' in line or 'Materials Studio' in line)
and 'CASTEP' in line):
record_starts = [castep_file.tell()] + record_starts
if not line:
break
if not record_starts:
warnings.warn(
f'Could not find CASTEP label in result file: {castep_file}.'
' Are you sure this is a .castep file?'
)
return None
# search for regular end of file
end_found = False
# start to search from record beginning from the back
# and see if
record_end = -1
for record_nr, record_start in enumerate(record_starts):
castep_file.seek(record_start)
while True:
line = castep_file.readline()
if not line:
break
if 'Finalisation time =' in line:
end_found = True
record_end = castep_file.tell()
break
if end_found:
break
if file_opened:
castep_file.close()
if end_found:
# record_nr == 0 corresponds to the last record here
if record_nr == 0:
return (record_start, record_end, True, True)
else:
return (record_start, record_end, True, False)
else:
return (0, record_end, False, False)
def _read_header(out: io.TextIOBase):
"""Read the header blocks from a .castep file.
Returns
-------
parameters : dict
Dictionary storing keys and values of a .param file.
"""
def _parse_on_off(_: str):
return {'on': True, 'off': False}[_]
read_title = False
parameters: Dict[str, Any] = {}
while True:
line = out.readline()
if len(line) == 0: # end of file
break
if re.search(r'^\s*\*+$', line) and read_title: # end of header
break
if re.search(r'\**.* Title \**', line):
read_title = True
# General Parameters
elif 'output verbosity' in line:
parameters['iprint'] = int(line.split()[-1][1])
elif re.match(r'\stype of calculation\s*:', line):
parameters['task'] = {
'single point energy': 'SinglePoint',
'geometry optimization': 'GeometryOptimization',
'band structure': 'BandStructure',
'molecular dynamics': 'MolecularDynamics',
'optical properties': 'Optics',
'phonon calculation': 'Phonon',
'E-field calculation': 'Efield',
'Phonon followed by E-field': 'Phonon+Efield',
'transition state search': 'TransitionStateSearch',
'Magnetic Resonance': 'MagRes',
'Core level spectra': 'Elnes',
'Electronic Spectroscopy': 'ElectronicSpectroscopy',
}[line.split(':')[-1].strip()]
elif 'stress calculation' in line:
parameters['calculate_stress'] = _parse_on_off(line.split()[-1])
elif 'calculation limited to maximum' in line:
parameters['run_time'] = float(line.split()[-2])
elif re.match(r'\soptimization strategy\s*:', line):
parameters['opt_strategy'] = {
'maximize speed(+++)': 'Speed',
'minimize memory(---)': 'Memory',
'balance speed and memory': 'Default',
}[line.split(':')[-1].strip()]
# Exchange-Correlation Parameters
elif re.match(r'\susing functional\s*:', line):
parameters['xc_functional'] = {
'Local Density Approximation': 'LDA',
'Perdew Wang (1991)': 'PW91',
'Perdew Burke Ernzerhof': 'PBE',
'revised Perdew Burke Ernzerhof': 'RPBE',
'PBE with Wu-Cohen exchange': 'WC',
'PBE for solids (2008)': 'PBESOL',
'Hartree-Fock': 'HF',
'Hartree-Fock +': 'HF-LDA',
'Screened Hartree-Fock': 'sX',
'Screened Hartree-Fock + ': 'sX-LDA',
'hybrid PBE0': 'PBE0',
'hybrid B3LYP': 'B3LYP',
'hybrid HSE03': 'HSE03',
'hybrid HSE06': 'HSE06',
'RSCAN': 'RSCAN',
}[line.split(':')[-1].strip()]
elif 'DFT+D: Semi-empirical dispersion correction' in line:
parameters['sedc_apply'] = _parse_on_off(line.split()[-1])
elif 'SEDC with' in line:
parameters['sedc_scheme'] = {
'OBS correction scheme': 'OBS',
'G06 correction scheme': 'G06',
'D3 correction scheme': 'D3',
'D3(BJ) correction scheme': 'D3-BJ',
'D4 correction scheme': 'D4',
'JCHS correction scheme': 'JCHS',
'TS correction scheme': 'TS',
'TSsurf correction scheme': 'TSSURF',
'TS+SCS correction scheme': 'TSSCS',
'aperiodic TS+SCS correction scheme': 'ATSSCS',
'aperiodic MBD@SCS method': 'AMBD',
'MBD@SCS method': 'MBD',
'aperiodic MBD@rsSCS method': 'AMBD*',
'MBD@rsSCS method': 'MBD*',
'XDM correction scheme': 'XDM',
}[line.split(':')[-1].strip()]
# Basis Set Parameters
elif 'basis set accuracy' in line:
parameters['basis_precision'] = line.split()[-1]
elif 'plane wave basis set cut-off' in line:
parameters['cut_off_energy'] = float(line.split()[-2])
elif re.match(r'\sfinite basis set correction\s*:', line):
parameters['finite_basis_corr'] = {
'none': 0,
'manual': 1,
'automatic': 2,
}[line.split()[-1]]
# Electronic Parameters
elif 'treating system as spin-polarized' in line:
parameters['spin_polarized'] = True
# Electronic Minimization Parameters
elif 'Treating system as non-metallic' in line:
parameters['fix_occupancy'] = True
elif 'total energy / atom convergence tol.' in line:
parameters['elec_energy_tol'] = float(line.split()[-2])
elif 'convergence tolerance window' in line:
parameters['elec_convergence_win'] = int(line.split()[-2])
elif 'max. number of SCF cycles:' in line:
parameters['max_scf_cycles'] = float(line.split()[-1])
elif 'dump wavefunctions every' in line:
parameters['num_dump_cycles'] = float(line.split()[-3])
# Density Mixing Parameters
elif 'density-mixing scheme' in line:
parameters['mixing_scheme'] = line.split()[-1]
return parameters
def _read_unit_cell(out: io.TextIOBase):
"""Read a Unit Cell block from a .castep file."""
while True:
line = out.readline()
fields = line.split()
if len(fields) == 6:
break
lattice_real = []
for _ in range(3):
lattice_real.append([float(f) for f in fields[0:3]])
line = out.readline()
fields = line.split()
return np.array(lattice_real)
def _read_forces(out: io.TextIOBase, n_atoms: int):
"""Read a block for atomic forces from a .castep file."""
constraints: List[FixConstraint] = []
forces = []
while True:
line = out.readline()
fields = line.split()
if len(fields) == 7:
break
for n in range(n_atoms):
consd = np.array([0, 0, 0])
fxyz = [0.0, 0.0, 0.0]
for i, force_component in enumerate(fields[-4:-1]):
if force_component.count("(cons'd)") > 0:
consd[i] = 1
# remove constraint labels in force components
fxyz[i] = float(force_component.replace("(cons'd)", ''))
if consd.all():
constraints.append(FixAtoms(n))
elif consd.any():
constraints.append(FixCartesian(n, consd))
forces.append(fxyz)
line = out.readline()
fields = line.split()
return forces, constraints
def _read_fractional_coordinates(out: io.TextIOBase, n_atoms: int):
"""Read fractional coordinates from a .castep file."""
species: List[str] = []
custom_species: Optional[List[str]] = None # A CASTEP special thing
positions_frac: List[List[float]] = []
while True:
line = out.readline()
fields = line.split()
if len(fields) == 7:
break
for _ in range(n_atoms):
spec_custom = fields[1].split(':', 1)
elem = spec_custom[0]
if len(spec_custom) > 1 and custom_species is None:
# Add it to the custom info!
custom_species = list(species)
species.append(elem)
if custom_species is not None:
custom_species.append(fields[1])
positions_frac.append([float(s) for s in fields[3:6]])
line = out.readline()
fields = line.split()
return species, custom_species, positions_frac
def _read_stress(out: io.TextIOBase):
"""Read a block for the stress tensor from a .castep file."""
while True:
line = out.readline()
fields = line.split()
if len(fields) == 6:
break
results = {}
stress = []
for _ in range(3):
stress.append([float(s) for s in fields[2:5]])
line = out.readline()
fields = line.split()
# stress in .castep file is given in GPa
results['stress'] = np.array(stress) * units.GPa
results['stress'] = results['stress'].reshape(9)[[0, 4, 8, 5, 2, 1]]
line = out.readline()
if "Pressure:" in line:
results['pressure'] = float(line.split()[-2]) * units.GPa
return results
def _add_atoms(
images,
lattice_real,
species,
custom_species,
positions_frac,
constraints,
results,
):
# If all the lattice parameters are fixed,
# the `Unit Cell` block in the .castep file is not printed
# in every ionic step.
# The lattice paramters are therefore taken from the last step.
# This happens:
# - `GeometryOptimization`: `FIX_ALL_CELL : TRUE`
# - `MolecularDynamics`: `MD_ENSEMBLE : NVE or NVT`
if lattice_real is None:
lattice_real = images[-1].cell.copy()
# for highly symmetric systems (where essentially only the
# stress is optimized, but the atomic positions) positions
# are only printed once.
if species is None:
species = images[-1].symbols
if positions_frac is None:
positions_frac = images[-1].get_scaled_positions()
_set_energy_and_free_energy(results)
atoms = Atoms(
species,
cell=lattice_real,
constraint=constraints,
pbc=True,
scaled_positions=positions_frac,
)
if custom_species is not None:
atoms.new_array(
'castep_custom_species',
np.array(custom_species),
)
atoms.calc = SinglePointCalculator(atoms)
atoms.calc.results = results
images.append(atoms)
def _read_mulliken_charges(out: io.TextIOBase):
"""Read a block for Mulliken charges from a .castep file."""
for i in range(3):
line = out.readline()
if i == 1:
spin_polarized = 'Spin' in line
results = defaultdict(list)
while True:
line = out.readline()
fields = line.split()
if len(fields) == 1:
break
if spin_polarized:
if len(fields) != 7: # due to CASTEP 18 outformat changes
results['charges'].append(float(fields[-2]))
results['magmoms'].append(float(fields[-1]))
else:
results['charges'].append(float(fields[-1]))
return {k: np.array(v) for k, v in results.items()}
def _read_hirshfeld_details(out: io.TextIOBase, n_atoms: int):
"""Read the Hirshfeld analysis when iprint > 1 from a .castep file."""
results = defaultdict(list)
for _ in range(n_atoms):
while True:
line = out.readline()
if line.strip() == '':
break # end for each atom
if 'Hirshfeld / free atomic volume :' in line:
line = out.readline()
fields = line.split()
results['hirshfeld_volume_ratios'].append(float(fields[0]))
return {k: np.array(v) for k, v in results.items()}
def _read_hirshfeld_charges(out: io.TextIOBase):
"""Read a block for Hirshfeld charges from a .castep file."""
for i in range(3):
line = out.readline()
if i == 1:
spin_polarized = 'Spin' in line
results = defaultdict(list)
while True:
line = out.readline()
fields = line.split()
if len(fields) == 1:
break
if spin_polarized:
results['hirshfeld_charges'].append(float(fields[-2]))
results['hirshfeld_magmoms'].append(float(fields[-1]))
else:
results['hirshfeld_charges'].append(float(fields[-1]))
return {k: np.array(v) for k, v in results.items()}
def _set_energy_and_free_energy(results: Dict[str, Any]):
"""Set values referred to as `energy` and `free_energy`."""
if 'energy_with_dispersion_correction' in results:
suffix = '_with_dispersion_correction'
else:
suffix = '_without_dispersion_correction'
if 'free_energy' + suffix in results: # metallic
keye = 'energy_zero' + suffix
keyf = 'free_energy' + suffix
else: # non-metallic
# The finite basis set correction is applied to the energy at finite T
# (not the energy at 0 K). We should hence refer to the corrected value
# as `energy` only when the free energy is unavailable, i.e., only when
# FIX_OCCUPANCY : TRUE and thus no smearing is applied.
if 'energy_with_finite_basis_set_correction' in results:
keye = 'energy_with_finite_basis_set_correction'
else:
keye = 'energy' + suffix
keyf = 'energy' + suffix
results['energy'] = results[keye]
results['free_energy'] = results[keyf]