Spaces:
Paused
Paused
# huggingface_dataset_manager.py | |
from datasets import load_dataset, Dataset | |
from typing import List, Dict, Any | |
import logging | |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') | |
class HuggingFaceDatasetManager: | |
def __init__(self, dataset_name: str): | |
self.dataset_name = dataset_name | |
def persist_to_dataset(self, metadata_list: List[Dict[str, Any]]): | |
if not metadata_list: | |
logging.warning("No metadata to persist.") | |
return | |
try: | |
dataset = load_dataset(self.dataset_name) | |
new_dataset = Dataset.from_dict({k: [d[k] for d in metadata_list] for k in metadata_list[0]}) | |
dataset = dataset.add_item(new_dataset) | |
dataset.push_to_hub(self.dataset_name) | |
logging.info(f"Updated and pushed dataset: {self.dataset_name}") | |
except Exception as e: | |
logging.error(f"Error persisting to dataset: {str(e)}") | |
def update_dataset(self, new_data: List[Dict[str, Any]]): | |
try: | |
dataset = load_dataset(self.dataset_name) | |
new_dataset = Dataset.from_dict({k: [d[k] for d in new_data] for k in new_data[0]}) | |
dataset = dataset.add_item(new_dataset) | |
dataset.push_to_hub(self.dataset_name) | |
logging.info(f"Updated Hugging Face dataset: {self.dataset_name}") | |
except Exception as e: | |
logging.error(f"Error updating Hugging Face dataset: {str(e)}") | |