import uuid
from datetime import datetime as datetime_
from pathlib import Path
from typing import Dict, Optional
from dateutil import parser as date_parser
from sqlalchemy import Column
from sqlalchemy import types as sql_types
from simdb import uri as urilib
from simdb.checksum import sha1_checksum
from simdb.cli.manifest import DataObject
from simdb.config.config import Config
from simdb.docstrings import inherit_docstrings
from simdb.imas.checksum import checksum as imas_checksum
from simdb.imas.utils import imas_files, imas_timestamp
from simdb.remote.models import FileData, FileGetDataResponse, FileInfo
from .base import Base
from .types import URI, UUID
from .utils import checked_get
[docs]
@inherit_docstrings
class File(Base):
"""
Class to represent files in the database ORM.
"""
__tablename__ = "files"
id = Column(sql_types.Integer, primary_key=True)
uuid = Column(UUID, nullable=False, unique=True, index=True)
uri: urilib.URI = Column(URI(1024), nullable=True)
checksum = Column(sql_types.String(64), nullable=True)
type = Column(sql_types.Enum(DataObject.Type), nullable=True)
datetime = Column(sql_types.DateTime, nullable=False)
def __init__(
self,
type: DataObject.Type,
uri: urilib.URI,
ids_list: Optional[list] = None,
perform_integrity_check: bool = True,
config: Optional[Config] = None,
) -> None:
self.uuid = uuid.uuid1()
self.uri = uri
self.type = type
if perform_integrity_check:
self.datetime = self.get_creation_date()
if type == DataObject.Type.IMAS and ids_list is None:
raise ValueError("IDS list is not set")
self.checksum = self.generate_checksum(config, ids_list or [])
def __str__(self):
result = ""
for name in (
"uuid",
"uri",
"checksum",
"type",
"datetime",
):
result += " {}:{}{}\n".format(
name,
((14 - len(name)) * " "),
getattr(self, name),
)
return result
def __repr__(self):
result = f"{self.uuid} ({self.uri})"
return result
[docs]
def generate_checksum(self, config, ids_list: list):
if config and config.get_option("development.disable_checksum", default=False):
return ""
elif self.type == DataObject.Type.IMAS:
checksum = imas_checksum(self.uri, ids_list)
elif self.type == DataObject.Type.FILE:
checksum = sha1_checksum(self.uri)
else:
raise NotImplementedError(f"Cannot generate checksum for type {self.type}.")
return checksum
[docs]
def get_creation_date(self) -> datetime_:
if self.type == DataObject.Type.IMAS:
return imas_timestamp(self.uri)
elif self.type == DataObject.Type.FILE:
if self.uri.path is None:
raise ValueError("Data object uri path not set")
return datetime_.fromtimestamp(Path(self.uri.path).stat().st_ctime)
else:
raise NotImplementedError(f"Cannot generate checksum for type {self.type}.")
[docs]
@classmethod
def from_data(cls, data: Dict) -> "File":
data_type = checked_get(data, "type", str)
uri = checked_get(data, "uri", str)
file = File(
DataObject.Type[data_type], urilib.URI(uri), perform_integrity_check=False
)
file.uuid = checked_get(data, "uuid", uuid.UUID)
file.checksum = checked_get(data, "checksum", str)
file.datetime = date_parser.parse(checked_get(data, "datetime", str))
return file
[docs]
@classmethod
def from_data_model(cls, data: FileData) -> "File":
data_type = data.type
uri = data.uri
file = File(
DataObject.Type[data_type], urilib.URI(uri), perform_integrity_check=False
)
file.uuid = data.uuid
file.checksum = data.checksum
file.datetime = data.datetime
return file
[docs]
def data(self, recurse: bool = False) -> Dict[str, str]:
data = {
"uuid": self.uuid,
"uri": str(self.uri),
"checksum": self.checksum,
"type": self.type.name,
"datetime": self.datetime.isoformat(),
}
return data
[docs]
def to_model(self) -> FileData:
return FileData(
type=self.type.name,
uri=str(self.uri),
uuid=self.uuid,
checksum=self.checksum,
datetime=self.datetime,
)
[docs]
def to_model_with_path(self) -> FileGetDataResponse:
if self.type.name == "FILE":
if self.uri.path is None:
raise ValueError("File path not set")
files = [FileInfo(path=self.uri.path, checksum=self.checksum)]
else:
files = [
FileInfo(path=path, checksum=sha1_checksum(URI(f"file:{path}")))
for path in imas_files(self.uri)
]
return FileGetDataResponse(
type=self.type.name,
uri=str(self.uri),
uuid=self.uuid,
checksum=self.checksum,
datetime=self.datetime,
files=files,
)