latmatcher / backend /db_utils /structure_database.py
AndreiVoicuT's picture
Upload 85 files
1c703f0 verified
raw
history blame
6.36 kB
"""
Structure of our main database:
DB
β”œβ”€β”€ unique_id.json :(dictionary)
β”‚ β”œβ”€β”€ unique_id [str]
β”‚ β”œβ”€β”€ formula [str]
β”‚ β”œβ”€β”€ key_words [list(str)]
β”‚ β”œβ”€β”€ source: (dictionary)
β”‚ β”‚ β”œβ”€β”€ name: [str](ex: JARVIS /C2DB /None etc)
β”‚ β”‚ └── source_id:[str]
β”‚ β”œβ”€β”€ structure:(dictionary)
β”‚ β”‚ β”œβ”€β”€ atoms [list(int)]
β”‚ β”‚ β”œβ”€β”€ lattice_vectors [3x3[array]]
β”‚ β”‚ β”œβ”€β”€ pbc [bool, bool ,bool]
β”‚ β”‚ └── position [nx3[array]] n-nr of atoms
β”‚ β”œβ”€β”€ sub_components: [list(str)] (list of parents ids )
β”‚ β”œβ”€β”€ propriety: (dictionary)
β”‚ β”‚ β”œβ”€β”€ energy: (list[prop_dictionary])
β”‚ β”‚ β”œβ”€β”€ band gap: (list[prop_dictionary])
β”‚ β”‚ β”œβ”€β”€ band structure:(list[prop_dictionary])
β”‚ β”‚ β”œβ”€β”€ ....
β”‚ β”‚ └── density of states:(list[prop_dictionary])
β”‚ β”‚
β”‚ β”œβ”€β”€ citation:"str"
β”‚ └── author: (dictionary)
β”‚ β”œβ”€β”€ name
β”‚ └── id
β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€
β”‚
β”œβ”€β”€ prop_dictionary
β”‚ β”œβ”€β”€ name :(str) energy
β”‚ β”œβ”€β”€ value: (int float list )
β”‚ └── source: (dictionary)
β”‚ β”œβ”€β”€ tipe: (DB/computation/author_enforce)
β”‚ β”œβ”€β”€ citation:"str"
β”‚ └── description: (dictionary)
β”‚ β”œβ”€β”€ author_id : (str)
β”‚ └── method_name : (str)
β”‚ β”œβ”€β”€ hyperparameters :(dictionary)
β”‚ └── inputs :(dictionary)
β”‚
└─────────────────────────────────────────────────────────────────────────────────
"""
from .utils import read_json , process_chemical_formula
from .parse_c2db import *
from .parse_jarvis import *
# Constants:
JARVIS_DB_path = "/Users/voicutomut/Documents/GitLab/bespoke-interface/DB_experiments/JARVIS2D/d2-12-12-2022.json"
class DBInstance:
def __init__(self):
self.unique_id = None
self.formula = None
self.key_words = []
self.source = None
self.structure = {}
self.sub_components = []
self.propriety = {}
self.author = {}
def from_json_file(self, file_name):
data = read_json(file_name)
self.unique_id = data["unique_id"]
self.formula = data["formula"]
self.key_words = data["key_words"]
self.structure = data["structure"]
self.sub_components = data["sub_components"]
self.source = data["source"]
prop = {}
for key in data["propriety"].keys():
prop[key] = []
read_prop = data["propriety"][key]
for pro in read_prop:
source = Source(pro["source"]["tipe"],
pro["source"]["author_id"],
pro["source"]["description"],
pro["source"]["cite"])
r_pro = DBPropriety(pro["name"], pro["value"], source)
prop[key].append(r_pro)
self.propriety = prop
self.author = data["author"]
return self
def from_c2db_jason(self, file_path):
c2 = read_c2db_json(file_path)
self.unique_id = from_c2db_unique_id(c2['structure.json'])
self.formula = from_c2db_formula(c2['structure.json'])
self.structure = from_c2db_structure(c2['structure.json'])
self.source = {
"name": "C2DB",
"id": from_c2db_unique_id(c2['structure.json']),
"file": file_path.split("/")[-1],
"cite": "DOI 10.1088/2053-1583/aacfc1"
}
self.propriety = {
"energy": [from_c2db_energy(c2['structure.json'])],
"band_gap": [from_c2db_band_gap(c2)],
"band_structure": [from_c2db_band_structure(c2)],
"density_of_states": [from_c2db_density_of_states(c2)],
}
# Todo:
self.key_words = process_chemical_formula(self.formula)
self.sub_components = {}
self.author = {"name": "c2db",
"id": "c2db"}
return self
def from_jarvis_json(self, jarvis_id, jarvis_data=JARVIS_DB_path):
self.unique_id = jarvis_id
jr_data = read_jarvis(jarvis_data)
jr = get_jarvis_instance(jr_data, jarvis_id)
self.formula = from_jarvis_formula(jr)
self.structure = from_jarvis_structure(jr)
self.source = {
"name": "Jarvis",
"id": jarvis_id,
"file": jarvis_data.split("/")[-1],
"cite": "https://jarvis.nist.gov"
}
self.propriety = {
"energy": [from_jarvis_energy(jr)],
"band_gap": [from_jarvis_band_gap(jr)],
"band_structure": [from_jarvis_band_structure(jr)],
"density_of_states": [from_jarvis_density_of_states(jr)],
}
print("band_structure:", self.propriety)
self.key_words = process_chemical_formula(self.formula)
self.sub_components = {}
self.author = {"name": "jarvis",
"id": "jarvis"}
return self
def json_descript(self):
propriety = {}
for key in self.propriety.keys():
propriety[key] = [prop.json_descript() for prop in self.propriety[key] if prop is not None]
data_instance = {"unique_id": self.unique_id,
"formula": self.formula,
"key_words": self.key_words,
"source": self.source,
"structure": self.structure,
"sub_components": self.sub_components,
"propriety": propriety,
"author": self.author}
return data_instance
# inst = DBInstance().from_c2db_jason("/Users/voicutomut/Desktop/WS2-64090c9845f8_data.json")
# print(inst.propriety["energy"][0].value)