Source code for mdsuite.database.experiment_database

"""
MDSuite: A Zincwarecode package.

License
-------
This program and the accompanying materials are made available under the terms
of the Eclipse Public License v2.0 which accompanies this distribution, and is
available at https://www.eclipse.org/legal/epl-v20.html

SPDX-License-Identifier: EPL-2.0

Copyright Contributors to the Zincwarecode Project.

Contact Information
-------------------
email: zincwarecode@gmail.com
github: https://github.com/zincware
web: https://zincwarecode.com/

Citation
--------
If you use this module please cite us with:

Summary
-------
"""
from __future__ import annotations

import dataclasses
import logging
from typing import TYPE_CHECKING, Dict, List, Union

import numpy as np

import mdsuite.database.scheme as db
from mdsuite.database.simulation_database import MoleculeInfo, SpeciesInfo
from mdsuite.utils.database import get_or_create
from mdsuite.utils.units import Units

if TYPE_CHECKING:
    from mdsuite import Project

log = logging.getLogger(__name__)


[docs]class LazyProperty: """Property preset for I/O with the database References ---------- https://realpython.com/python-descriptors/ """ def __set_name__(self, owner, name): """See https://www.python.org/dev/peps/pep-0487/""" self.name = name def __get__(self, instance: ExperimentDatabase, owner): """Get the value either from memory or from the database Try to get the value from memory, if not write it to memory """ try: return instance.__dict__[self.name] except KeyError: instance.__dict__[self.name] = instance.get_db(self.name) return self.__get__(instance, owner) def __set__(self, instance: ExperimentDatabase, value): """Write value to the database Write the given value to the database and remove it from memory """ if value is None: return instance.set_db(self.name, value) instance.__dict__.pop(self.name, None)
[docs]class ExperimentDatabase: temperature = LazyProperty() time_step = LazyProperty() number_of_configurations = LazyProperty() number_of_atoms = LazyProperty() sample_rate = LazyProperty() property_groups = LazyProperty() def __init__(self, project: Project, name): self.project = project self.name = name # Property cache self._species = None self._molecules = None
[docs] def export_property_data(self, parameters: dict) -> List[db.Computation]: """ Export property data from the SQL database. Parameters ---------- parameters : dict Parameters to be used in the addition, i.e. {"Analysis": "Green_Kubo_Self_Diffusion", "Subject": "Na", "data_range": 500} Returns ------- output : list A list of rows represented as dictionaries. """ raise DeprecationWarning( "This function has been removed and replaced by queue_database" )
[docs] def set_db(self, name: str, value): """Store values in the database Parameters ---------- name: str Name of the database entry value: Any serializeable data type that can be written to the database """ with self.project.session as ses: experiment = get_or_create(ses, db.Experiment, name=self.name) if not isinstance(value, dict): value = {"serialized_value": value} attribute: db.ExperimentAttribute = get_or_create( ses, db.ExperimentAttribute, experiment=experiment, name=name ) attribute.data = value ses.commit()
[docs] def get_db(self, name: str, default=None): """Load values from the database Parameters ---------- name: str Name of the datbase entry to query from default: default=None Default value to yield if not entry is presend Returns ------- Any: returns the entry that was put in the database, can be any json serializeable data Notes ----- Internally the values will be converted to dict, so e.g. tuples or sets might be converted to lists """ with self.project.session as ses: experiment = get_or_create(ses, db.Experiment, name=self.name) attribute: db.ExperimentAttribute = ( ses.query(db.ExperimentAttribute) .filter(db.ExperimentAttribute.experiment == experiment) .filter(db.ExperimentAttribute.name == name) .first() ) try: data = attribute.data except AttributeError: log.debug(f"Got no database entries for {name}") return default try: return data["serialized_value"] except KeyError: log.debug(f"Returning a dictionary for {name}") return data
@property def active(self): """Get the state (activated or not) of the experiment""" with self.project.session as ses: experiment = get_or_create(ses, db.Experiment, name=self.name) return experiment.active @active.setter def active(self, value): """Set the state (activated or not) of the experiment""" if value is None: return with self.project.session as ses: experiment = get_or_create(ses, db.Experiment, name=self.name) experiment.active = value ses.commit() @property def species(self) -> Dict[str, SpeciesInfo]: """Get species Returns ------- dict[str, SpeciesInfo]: A dictionary of species such as {Li: SpeciesInfo} """ if self._species is None: with self.project.session as ses: experiment = ( ses.query(db.Experiment) .filter(db.Experiment.name == self.name) .first() ) self._species = { key: SpeciesInfo(name=key, **val) for key, val in experiment.get_species().items() } return self._species @species.setter def species(self, value: dict): """Save the SpeciesInfo to the SQL database Parameters ---------- value: dict A dictionary of {element: SpeciesInfo} """ if value is None: return if not isinstance(value, dict): raise ValueError( "species must be a dict[str, SpeciesInfo] or dict[str, dict]" ) # Do not allow the key "indices" in the SQL database! processed_value = {} for species_name, species_obj in value.items(): if isinstance(species_obj, SpeciesInfo): processed_value[species_name] = dataclasses.asdict(species_obj) # we do not use the name here, because it is already used as the key else: processed_value[species_name] = species_obj # can't have name or indices in the dict processed_value[species_name].pop("name", None) processed_value[species_name].pop("indices", None) value = processed_value self._species = None with self.project.session as ses: experiment = ( ses.query(db.Experiment).filter(db.Experiment.name == self.name).first() ) for species_name, species_data in value.items(): species = get_or_create( ses, db.ExperimentSpecies, name=species_name, experiment=experiment ) species.data = species_data ses.commit() @property def molecules(self) -> Dict[str, MoleculeInfo]: """Get the molecules dict""" if self._molecules is None: with self.project.session as ses: experiment = ( ses.query(db.Experiment) .filter(db.Experiment.name == self.name) .first() ) self._molecules = experiment.get_molecules() # hotfix to convert to SpeciesInfo for molecule_name, molecule_obj in self._molecules.items(): # set properties = None if it does not exist molecule_obj["properties"] = molecule_obj.get("properties", []) molecule_info = MoleculeInfo(name=molecule_name, **molecule_obj) self._molecules[molecule_name] = molecule_info return self._molecules @molecules.setter def molecules(self, value): """Save the molecules dict to the database""" if value is None: return processed_value = {} for molecule_name, molecule_obj in value.items(): if isinstance(molecule_obj, MoleculeInfo): processed_value[molecule_name] = dataclasses.asdict(molecule_obj) # we do not use the name here, because it is already used as the key else: processed_value[molecule_name] = molecule_obj # can't have name or indices in the dict processed_value[molecule_name].pop("name", None) processed_value[molecule_name].pop("indices", None) value = processed_value self._molecules = None with self.project.session as ses: experiment = ( ses.query(db.Experiment).filter(db.Experiment.name == self.name).first() ) for molecule_name, molecule_data in value.items(): molecule = get_or_create( ses, db.ExperimentSpecies, name=molecule_name, experiment=experiment, molecule=True, ) molecule.data = molecule_data ses.commit() # Almost Lazy Properties @property def box_array(self): """Get the sample_rate of the experiment""" return self.get_db(name="box_array") @box_array.setter def box_array(self, value): """Set the time_step of the experiment""" if value is None: return if isinstance(value, np.ndarray): value = value.tolist() self.set_db(name="box_array", value=value) @property def units(self) -> Union[Units, None]: """Get the units of the experiment""" dict_data = self.get_db(name="units") if dict_data is None: return None return Units(**dict_data) @units.setter def units(self, value: Units): """Set the units of the experiment""" if value is None: return self.set_db(name="units", value=dataclasses.asdict(value)) @property def read_files(self): """ Returns ------- read_files: list[str] A List of all files that were added to the database already """ return self.get_db(name="read_files", default=[]) @read_files.setter def read_files(self, value): """Add a file that has been read to the database Does nothing if the file already exists within the database Parameters ---------- value: str, Path A filepath that will be added to the database """ if value is None: return self.set_db(name="read_files", value=value) @property def simulation_data(self) -> dict: """ Load simulation data from internals. If not available try to read them from file Returns ------- dict: A dictionary containing all simulation_data """ return self.get_db(name="simulation_data", default={}) @simulation_data.setter def simulation_data(self, value: dict): """Update simulation data Try to load the data from self.simulation_data_file, update the internals and update the yaml file. Parameters ---------- value: dict A dictionary containing the (new) simulation data Returns ------- Updates the internal simulation_data """ if value is None: return self.set_db(name="simulation_data", value=value) @property def version(self) -> int: """Get the version of the experiment Versioning starts at 0 and can be increased by +1 for every added file """ return self.get_db(name="version", default=0) @version.setter def version(self, value: int): """Update the version of the experiment Can be used to differentiate between different experiment versions in calculations """ if value is None: return self.set_db(name="version", value=value) # On the fly properties @property def volume(self): """Compute the Volume""" return np.prod(self.box_array)