1
1
import logging
2
- from typing import Dict , Iterable , List , Any , Optional
3
- import typing
2
+ from typing import Dict , List , Any
4
3
from collections import defaultdict
5
4
6
5
from linkml .utils .schema_builder import SchemaBuilder
10
9
SlotDefinition ,
11
10
ClassDefinition ,
12
11
)
13
- # from funowl.converters.functional_converter import to_python
14
- # from funowl import *
12
+ from funowl .converters .functional_converter import to_python
13
+ from funowl import *
15
14
16
15
from dataclasses import dataclass , field
17
16
18
17
from linkml_runtime .utils .formatutils import underscore
19
18
from linkml_runtime .utils .introspection import package_schemaview
20
- from rdflib import Graph , RDF , OWL , URIRef , RDFS , SKOS , SDO , Namespace , Literal
19
+ from rdflib import Graph , RDF , OWL , URIRef , RDFS , SKOS , SDO , Namespace
21
20
from schema_automator .importers .import_engine import ImportEngine
22
21
from schema_automator .utils .schemautils import write_schema
23
22
24
23
25
24
HTTP_SDO = Namespace ("http://schema.org/" )
26
25
27
- DEFAULT_METAMODEL_MAPPINGS : Dict [ str , List [ URIRef ]] = {
26
+ DEFAULT_METAMODEL_MAPPINGS = {
28
27
"is_a" : [RDFS .subClassOf , SKOS .broader ],
29
28
"domain_of" : [HTTP_SDO .domainIncludes , SDO .domainIncludes ],
30
- "range " : [HTTP_SDO .rangeIncludes , SDO .rangeIncludes ],
29
+ "rangeIncludes " : [HTTP_SDO .rangeIncludes , SDO .rangeIncludes ],
31
30
"exact_mappings" : [OWL .sameAs , HTTP_SDO .sameAs ],
32
31
ClassDefinition .__name__ : [RDFS .Class , OWL .Class , SKOS .Concept ],
33
32
SlotDefinition .__name__ : [
@@ -44,26 +43,22 @@ class RdfsImportEngine(ImportEngine):
44
43
"""
45
44
An ImportEngine that takes RDFS and converts it to a LinkML schema
46
45
"""
47
- #: View over the LinkML metamodel
48
- metamodel : SchemaView = field (init = False )
49
- #: Mapping from field names in this RDF schema (e.g. `price`) to IRIs (e.g. `http://schema.org/price`)
50
- mappings : Dict [str , URIRef ] = field (default_factory = dict )
51
- #: User-defined mapping from LinkML metamodel slots (such as `domain_of`) to RDFS IRIs (such as http://schema.org/domainIncludes)
52
- initial_metamodel_mappings : Dict [str , URIRef | List [URIRef ]] = field (default_factory = dict )
53
- #: Combined mapping from LinkML metamodel slots to RDFS IRIs
54
- metamodel_mappings : Dict [str , List [URIRef ]] = field (default_factory = lambda : defaultdict (list ))
55
- #: Reverse of `metamodel_mappings`, but supports multiple terms mapping to the same IRI
56
- reverse_metamodel_mappings : Dict [URIRef , List [str ]] = field (default_factory = lambda : defaultdict (list ))
57
- #: The names of LinkML ClassDefinition slots
58
- classdef_slots : set [str ] = field (init = False )
59
- #: The names of LinkML SlotDefinition slot slots
60
- slotdef_slots : set [str ] = field (init = False )
46
+
47
+ mappings : dict = None
48
+ initial_metamodel_mappings : Dict [str , List [URIRef ]] = None
49
+ metamodel_mappings : Dict [str , List [URIRef ]] = None
50
+ reverse_metamodel_mappings : Dict [URIRef , List [str ]] = None
51
+ include_unmapped_annotations = False
52
+ metamodel = None
53
+ metamodel_schemaview : SchemaView = None
54
+ classdef_slots : List [str ] = None
61
55
62
56
def __post_init__ (self ):
63
57
sv = package_schemaview ("linkml_runtime.linkml_model.meta" )
58
+ self .metamodel_schemaview = sv
64
59
self .metamodel = sv
65
-
66
- # Populate the combined metamodel mappings
60
+ self . metamodel_mappings = defaultdict ( list )
61
+ self . reverse_metamodel_mappings = defaultdict ( list )
67
62
for k , vs in DEFAULT_METAMODEL_MAPPINGS .items ():
68
63
self .metamodel_mappings [k ].extend (vs )
69
64
for v in vs :
@@ -76,8 +71,6 @@ def __post_init__(self):
76
71
for v in vs :
77
72
self .reverse_metamodel_mappings [URIRef (v )].append (k )
78
73
logging .info (f"Adding mapping { k } -> { v } " )
79
-
80
- # LinkML fields have some built-in mappings to other ontologies, such as https://w3id.org/linkml/Any -> AnyValue
81
74
for e in sv .all_elements ().values ():
82
75
mappings = []
83
76
for ms in sv .get_mappings (e .name , expand = True ).values ():
@@ -86,18 +79,17 @@ def __post_init__(self):
86
79
mappings .append (uri )
87
80
self .reverse_metamodel_mappings [uri ].append (e .name )
88
81
self .metamodel_mappings [e .name ] = mappings
89
- self .classdef_slots = {s .name for s in sv .class_induced_slots (ClassDefinition .class_name )}
90
- self .slotdef_slots = {s .name for s in sv .class_induced_slots (SlotDefinition .class_name )}
82
+ self .defclass_slots = [s .name for s in sv .class_induced_slots (ClassDefinition .class_name )]
91
83
92
84
def convert (
93
85
self ,
94
86
file : str ,
95
- name : str | None = None ,
96
- format : str | None = "turtle" ,
97
- default_prefix : str | None = None ,
98
- model_uri : str | None = None ,
99
- identifier : str | None = None ,
100
- ** kwargs : Any ,
87
+ name : str = None ,
88
+ format = "turtle" ,
89
+ default_prefix : str = None ,
90
+ model_uri : str = None ,
91
+ identifier : str = None ,
92
+ ** kwargs ,
101
93
) -> SchemaDefinition :
102
94
"""
103
95
Converts an OWL schema-style ontology
@@ -109,6 +101,7 @@ def convert(
109
101
:param kwargs:
110
102
:return:
111
103
"""
104
+ self .mappings = {}
112
105
g = Graph ()
113
106
g .parse (file , format = format )
114
107
if name is not None and default_prefix is None :
@@ -124,123 +117,78 @@ def convert(
124
117
if k == "schema" and v != "http://schema.org/" :
125
118
continue
126
119
sb .add_prefix (k , v , replace_if_present = True )
127
- if default_prefix is not None and schema . prefixes is not None :
120
+ if default_prefix is not None :
128
121
schema .default_prefix = default_prefix
129
- if model_uri is not None and default_prefix not in schema .prefixes :
122
+ if default_prefix not in schema .prefixes :
130
123
sb .add_prefix (default_prefix , model_uri , replace_if_present = True )
131
124
schema .id = schema .prefixes [default_prefix ].prefix_reference
132
125
cls_slots = defaultdict (list )
133
-
134
- for slot in self .generate_rdfs_properties (g , cls_slots ):
135
- sb .add_slot (slot )
136
- for cls in self .process_rdfs_classes (g , cls_slots ):
137
- sb .add_class (cls )
138
-
139
- if identifier is not None :
140
- id_slot = SlotDefinition (identifier , identifier = True , range = "uriorcurie" )
141
- schema .slots [identifier ] = id_slot
142
- for c in schema .classes .values ():
143
- if not c .is_a and not c .mixins :
144
- if identifier not in c .slots :
145
- c .slots .append (identifier )
146
- return schema
147
-
148
- def process_rdfs_classes (
149
- self ,
150
- g : Graph ,
151
- cls_slots : Dict [str , List [str ]],
152
- ) -> Iterable [ClassDefinition ]:
153
- """
154
- Converts the RDFS classes in the graph to LinkML SlotDefinitions
155
- """
156
- rdfs_classes : List [URIRef ] = []
157
-
158
- for rdfs_class_metaclass in self ._rdfs_metamodel_iri (ClassDefinition .__name__ ):
159
- for s in g .subjects (RDF .type , rdfs_class_metaclass ):
160
- if isinstance (s , URIRef ):
161
- rdfs_classes .append (s )
162
-
163
- # implicit classes
164
- for metap in [RDFS .subClassOf ]:
165
- for s , _ , o in g .triples ((None , metap , None )):
166
- if isinstance (s , URIRef ):
167
- rdfs_classes .append (s )
168
- if isinstance (o , URIRef ):
169
- rdfs_classes .append (o )
170
-
171
- for s in set (rdfs_classes ):
172
- cn = self .iri_to_name (s )
173
- init_dict = self ._dict_for_subject (g , s , "class" )
174
- c = ClassDefinition (cn , ** init_dict )
175
- c .slots = cls_slots .get (cn , [])
176
- c .class_uri = str (s .n3 (g .namespace_manager ))
177
- yield c
178
-
179
- def generate_rdfs_properties (
180
- self ,
181
- g : Graph ,
182
- cls_slots : Dict [str , List [str ]]
183
- ) -> Iterable [SlotDefinition ]:
184
- """
185
- Converts the RDFS properties in the graph to LinkML SlotDefinitions
186
- """
187
- props : List [URIRef ] = []
188
-
189
- # Add explicit properties, ie those with a RDF.type mapping
190
- for rdfs_property_metaclass in self ._rdfs_metamodel_iri (SlotDefinition .__name__ ):
126
+ props = []
127
+ for rdfs_property_metaclass in self ._rdfs_metamodel_iri (
128
+ SlotDefinition .__name__
129
+ ):
191
130
for p in g .subjects (RDF .type , rdfs_property_metaclass ):
192
- if isinstance (p , URIRef ):
193
- props .append (p )
194
-
195
- # Add implicit properties, ie those that are the domain or range of a property
131
+ props .append (p )
132
+ # implicit properties
196
133
for metap in (
197
- self .metamodel_mappings ["domain_of" ]
198
- + self .metamodel_mappings ["rangeIncludes" ]
134
+ self .reverse_metamodel_mappings ["domain_of" ]
135
+ + self .reverse_metamodel_mappings ["rangeIncludes" ]
199
136
):
200
137
for p , _ , _o in g .triples ((None , metap , None )):
201
- if isinstance (p , URIRef ):
202
- props .append (p )
203
-
138
+ props .append (p )
204
139
for p in set (props ):
205
140
sn = self .iri_to_name (p )
206
- #: kwargs for SlotDefinition
207
- init_dict = self ._dict_for_subject (g , p , "slot" )
208
-
209
- # Special case for domains and ranges: add them directly as class slots
141
+ init_dict = self ._dict_for_subject (g , p )
210
142
if "domain_of" in init_dict :
211
143
for x in init_dict ["domain_of" ]:
212
144
cls_slots [x ].append (sn )
213
145
del init_dict ["domain_of" ]
214
- if "range" in init_dict :
215
- range = init_dict ["range" ]
216
- # Handle a range of multiple types
217
- if isinstance (range , list ):
218
- init_dict ["any_of" ] = [{"range" : x } for x in init_dict ["rangeIncludes" ]]
219
- del init_dict ["range" ]
146
+ if "rangeIncludes" in init_dict :
147
+ init_dict ["any_of" ] = [{"range" : x } for x in init_dict ["rangeIncludes" ]]
148
+ del init_dict ["rangeIncludes" ]
220
149
slot = SlotDefinition (sn , ** init_dict )
221
150
slot .slot_uri = str (p .n3 (g .namespace_manager ))
222
- yield slot
151
+ sb .add_slot (slot )
152
+ rdfs_classes = []
153
+ for rdfs_class_metaclass in self ._rdfs_metamodel_iri (ClassDefinition .__name__ ):
154
+ for s in g .subjects (RDF .type , rdfs_class_metaclass ):
155
+ rdfs_classes .append (s )
156
+ # implicit classes
157
+ for metap in [RDFS .subClassOf ]:
158
+ for s , _ , o in g .triples ((None , metap , None )):
159
+ rdfs_classes .append (s )
160
+ rdfs_classes .append (o )
161
+ for s in set (rdfs_classes ):
162
+ cn = self .iri_to_name (s )
163
+ init_dict = self ._dict_for_subject (g , s )
164
+ c = ClassDefinition (cn , ** init_dict )
165
+ c .slots = cls_slots .get (cn , [])
166
+ c .class_uri = str (s .n3 (g .namespace_manager ))
167
+ sb .add_class (c )
168
+ if identifier is not None :
169
+ id_slot = SlotDefinition (identifier , identifier = True , range = "uriorcurie" )
170
+ schema .slots [identifier ] = id_slot
171
+ for c in schema .classes .values ():
172
+ if not c .is_a and not c .mixins :
173
+ if identifier not in c .slots :
174
+ c .slots .append (identifier )
175
+ return schema
223
176
224
- def _dict_for_subject (self , g : Graph , s : URIRef , subject_type : typing . Literal [ "slot" , "class" ] ) -> Dict [str , Any ]:
177
+ def _dict_for_subject (self , g : Graph , s : URIRef ) -> Dict [str , Any ]:
225
178
"""
226
179
Looks up triples for a subject and converts to dict using linkml keys.
227
180
228
- :param g: RDFS graph
229
- :param s: property URI in that graph
230
- :return: Dictionary mapping linkml metamodel keys to values
181
+ :param g:
182
+ :param p:
183
+ :return:
231
184
"""
232
185
init_dict = {}
233
- # Each RDFS predicate/object pair corresponds to a LinkML key value pair for the slot
234
186
for pp , obj in g .predicate_objects (s ):
235
187
if pp == RDF .type :
236
188
continue
237
189
metaslot_name = self ._element_from_iri (pp )
238
190
logging .debug (f"Mapping { pp } -> { metaslot_name } " )
239
- # Filter out slots that don't belong in a class definition
240
- if subject_type == "class" and metaslot_name not in self .classdef_slots :
241
- continue
242
- # Filter out slots that don't belong in a slot definition
243
- if subject_type == "slot" and metaslot_name not in self .slotdef_slots :
191
+ if metaslot_name not in self .defclass_slots :
244
192
continue
245
193
if metaslot_name is None :
246
194
logging .warning (f"Not mapping { pp } " )
@@ -261,14 +209,14 @@ def _dict_for_subject(self, g: Graph, s: URIRef, subject_type: typing.Literal["s
261
209
def _rdfs_metamodel_iri (self , name : str ) -> List [URIRef ]:
262
210
return self .metamodel_mappings .get (name , [])
263
211
264
- def _element_from_iri (self , iri : URIRef ) -> str | None :
212
+ def _element_from_iri (self , iri : URIRef ) -> str :
265
213
r = self .reverse_metamodel_mappings .get (iri , [])
266
214
if len (r ) > 0 :
267
215
if len (r ) > 1 :
268
216
logging .debug (f"Multiple mappings for { iri } : { r } " )
269
217
return r [0 ]
270
218
271
- def _object_to_value (self , obj : Any , metaslot : SlotDefinition ) -> Any :
219
+ def _object_to_value (self , obj : Any , metaslot : SlotDefinition = None ) -> Any :
272
220
if isinstance (obj , URIRef ):
273
221
if metaslot .range == "uriorcurie" or metaslot .range == "uri" :
274
222
return str (obj )
@@ -283,9 +231,9 @@ def iri_to_name(self, v: URIRef) -> str:
283
231
self .mappings [n ] = v
284
232
return n
285
233
286
- def _as_name (self , v : URIRef ) -> str :
287
- v_str = str (v )
234
+ def _as_name (self , v : URIRef ):
235
+ v = str (v )
288
236
for sep in ["#" , "/" , ":" ]:
289
- if sep in v_str :
290
- return v_str .split (sep )[- 1 ]
291
- return v_str
237
+ if sep in v :
238
+ return v .split (sep )[- 1 ]
239
+ return v
0 commit comments