"""
Created on 2024-01-27
@author: wf, using ChatGPT-4 prompting
"""
from dataclasses import fields
from collections.abc import Iterable, Mapping
from rdflib import BNode, Graph, Literal, Namespace, URIRef
from rdflib.namespace import RDF
from lodstorage.linkml_gen import PythonTypes, Schema
from typing import Any
[docs]
class RDFDumper:
"""
A class to convert instances of data models (based on a LinkML schema) into an RDF graph.
"""
def __init__(self, schema: Schema, instance: object):
"""
Initialize the RDFDumper.
Args:
schema (Schema): The LinkML schema defining the structure of the data models.
instance (object): The instance of the data model to be converted into RDF.
"""
self.schema = schema
self.instance = instance
self.graph = Graph()
self.namespaces = {
prefix: Namespace(uri) for prefix, uri in schema.prefixes.items()
}
[docs]
def convert_to_rdf(self):
"""
Converts the provided instance into RDF triples based on the LinkML schema.
"""
# Process the instance data according to its class in the schema
instance_class = self.instance.__class__.__name__
if instance_class in self.schema.classes:
self.process_class(instance_class, self.instance)
[docs]
def serialize(self, rdf_format: str = "turtle") -> str:
"""
Serializes the RDF graph into a string representation in the specified format.
Args:
format (str): The serialization format (e.g., 'turtle', 'xml', 'json-ld').
Returns:
str: The serialized RDF graph.
"""
return self.graph.serialize(format=rdf_format)
[docs]
def value_iterator(self, value: Any):
"""
Iterates over values in a mapping or iterable.
Args:
value: The value to iterate over. It can be a mapping, iterable, or a single value.
Yields:
Tuples of (key, value) from the input value. For single values, key is None.
"""
if isinstance(value, Mapping):
yield from value.items()
elif isinstance(value, Iterable) and not isinstance(value, (str, bytes)):
yield from ((None, v) for v in value)
else:
yield (None, value)
[docs]
def process_class(self, class_name: str, instance_data: object):
# Get the base namespace URI
self.base_uri = self.namespaces[self.schema.default_prefix]
# get the class object
# class_obj = self.schema.classes[class_name]
# Construct class_uri using the namespace and class_name with a separator
class_uri = URIRef(f"{self.base_uri}:{class_name}")
# Create a unique URI or a Blank Node for the instance
instance_uri = self.get_instance_uri(instance_data)
# Type the instance with its class
self.graph.add((instance_uri, RDF.type, class_uri))
#loop over all fieds of the instance data
for field_info in fields(instance_data):
slot_name = field_info.name
# assure we only work on fields defined
# in our schema
slot_obj = self.schema.slots.get(slot_name)
if not slot_obj:
continue
# Combine the namespace with the slot name to form the field URI
field_uri = URIRef(f"{self.base_uri}:{slot_name}")
field_value = getattr(instance_data, slot_name, None)
# Use value_iterator to handle different types of values
for key, item in self.value_iterator(field_value):
if key is not None:
# Handle as a mapping
key_uri = URIRef(self.namespaces[self.schema.default_prefix][key])
self.graph.add((instance_uri, field_uri, key_uri))
self.graph.add((key_uri, RDF.value, self.convert_to_literal(item, slot_obj)))
else:
# Handle as a single value or an item from an iterable
# Check if item has an 'identifier' property
if hasattr(item, 'identifier') and getattr(item, 'identifier'):
item_uri = self.get_instance_uri(item)
self.graph.add((instance_uri, field_uri, item_uri))
self.process_class(item.__class__.__name__, item)
else:
self.graph.add(
(
instance_uri,
field_uri,
self.convert_to_literal(item, slot_obj),
)
)
[docs]
def get_instance_uri(self, instance_data):
"""
Generates a URI for an instance. If the instance has an 'identifier' property, it uses that as part of the URI.
Otherwise, it generates or retrieves a unique URI.
"""
if hasattr(instance_data, 'identifier') and getattr(instance_data, 'identifier'):
identifier = getattr(instance_data, 'identifier')
return URIRef(f"{self.base_uri}:{identifier}")
else:
# Fallback to a blank node if no identifier is found
return BNode()
[docs]
def convert_to_literal(self, value, slot_obj):
"""
Converts a value to an RDFLib Literal with appropriate datatype.
Args:
value: The value to be converted.
slot_obj: The slot object containing information about the field.
Returns:
An RDFLib Literal with the value and appropriate datatype.
"""
# Determine the datatype based on the Python type of the value
datatype = PythonTypes.get_rdf_datatype(type(value))
# Create and return the literal
return Literal(value, datatype=datatype)