Source code for simdb.database.models.file

import uuid
from datetime import datetime as datetime_
from pathlib import Path
from typing import Dict, Optional

from dateutil import parser as date_parser
from sqlalchemy import Column
from sqlalchemy import types as sql_types

from simdb import uri as urilib
from simdb.checksum import sha1_checksum
from simdb.cli.manifest import DataObject
from simdb.config.config import Config
from simdb.docstrings import inherit_docstrings
from simdb.imas.checksum import checksum as imas_checksum
from simdb.imas.utils import imas_files, imas_timestamp
from simdb.remote.models import FileData, FileGetDataResponse, FileInfo

from .base import Base
from .types import URI, UUID
from .utils import checked_get


[docs] @inherit_docstrings class File(Base): """ Class to represent files in the database ORM. """ __tablename__ = "files" id = Column(sql_types.Integer, primary_key=True) uuid = Column(UUID, nullable=False, unique=True, index=True) uri: urilib.URI = Column(URI(1024), nullable=True) checksum = Column(sql_types.String(64), nullable=True) type = Column(sql_types.Enum(DataObject.Type), nullable=True) datetime = Column(sql_types.DateTime, nullable=False) def __init__( self, type: DataObject.Type, uri: urilib.URI, ids_list: Optional[list] = None, perform_integrity_check: bool = True, config: Optional[Config] = None, ) -> None: self.uuid = uuid.uuid1() self.uri = uri self.type = type if perform_integrity_check: self.datetime = self.get_creation_date() if type == DataObject.Type.IMAS and ids_list is None: raise ValueError("IDS list is not set") self.checksum = self.generate_checksum(config, ids_list or []) def __str__(self): result = "" for name in ( "uuid", "uri", "checksum", "type", "datetime", ): result += " {}:{}{}\n".format( name, ((14 - len(name)) * " "), getattr(self, name), ) return result def __repr__(self): result = f"{self.uuid} ({self.uri})" return result
[docs] def generate_checksum(self, config, ids_list: list): if config and config.get_option("development.disable_checksum", default=False): return "" elif self.type == DataObject.Type.IMAS: checksum = imas_checksum(self.uri, ids_list) elif self.type == DataObject.Type.FILE: checksum = sha1_checksum(self.uri) else: raise NotImplementedError(f"Cannot generate checksum for type {self.type}.") return checksum
[docs] def get_creation_date(self) -> datetime_: if self.type == DataObject.Type.IMAS: return imas_timestamp(self.uri) elif self.type == DataObject.Type.FILE: if self.uri.path is None: raise ValueError("Data object uri path not set") return datetime_.fromtimestamp(Path(self.uri.path).stat().st_ctime) else: raise NotImplementedError(f"Cannot generate checksum for type {self.type}.")
[docs] @classmethod def from_data(cls, data: Dict) -> "File": data_type = checked_get(data, "type", str) uri = checked_get(data, "uri", str) file = File( DataObject.Type[data_type], urilib.URI(uri), perform_integrity_check=False ) file.uuid = checked_get(data, "uuid", uuid.UUID) file.checksum = checked_get(data, "checksum", str) file.datetime = date_parser.parse(checked_get(data, "datetime", str)) return file
[docs] @classmethod def from_data_model(cls, data: FileData) -> "File": data_type = data.type uri = data.uri file = File( DataObject.Type[data_type], urilib.URI(uri), perform_integrity_check=False ) file.uuid = data.uuid file.checksum = data.checksum file.datetime = data.datetime return file
[docs] def data(self, recurse: bool = False) -> Dict[str, str]: data = { "uuid": self.uuid, "uri": str(self.uri), "checksum": self.checksum, "type": self.type.name, "datetime": self.datetime.isoformat(), } return data
[docs] def to_model(self) -> FileData: return FileData( type=self.type.name, uri=str(self.uri), uuid=self.uuid, checksum=self.checksum, datetime=self.datetime, )
[docs] def to_model_with_path(self) -> FileGetDataResponse: if self.type.name == "FILE": if self.uri.path is None: raise ValueError("File path not set") files = [FileInfo(path=self.uri.path, checksum=self.checksum)] else: files = [ FileInfo(path=path, checksum=sha1_checksum(URI(f"file:{path}"))) for path in imas_files(self.uri) ] return FileGetDataResponse( type=self.type.name, uri=str(self.uri), uuid=self.uuid, checksum=self.checksum, datetime=self.datetime, files=files, )