1
1
import logging
2
- from typing import Dict , Iterable , List , Any
2
+ from pathlib import Path
3
+ from typing import Dict , Iterable , List , Any , Mapping , TextIO
3
4
import typing
4
- from collections import defaultdict
5
+ from collections import defaultdict , Counter
5
6
7
+ from jsonasobj2 import JsonObj
6
8
from linkml .utils .schema_builder import SchemaBuilder
7
9
from linkml_runtime import SchemaView
8
10
from linkml_runtime .linkml_model import (
9
11
SchemaDefinition ,
10
12
SlotDefinition ,
11
13
ClassDefinition ,
12
14
Prefix ,
13
- Uriorcurie
14
15
)
15
16
16
17
from dataclasses import dataclass , field
24
25
HTTP_SDO = Namespace ("http://schema.org/" )
25
26
26
27
DEFAULT_METAMODEL_MAPPINGS : Dict [str , List [URIRef ]] = {
28
+ # See https://github.com/linkml/linkml/issues/2507
29
+ "description" : [RDFS .comment ],
27
30
"is_a" : [RDFS .subClassOf , SKOS .broader ],
28
31
"domain_of" : [HTTP_SDO .domainIncludes , SDO .domainIncludes , RDFS .domain ],
29
32
"range" : [HTTP_SDO .rangeIncludes , SDO .rangeIncludes , RDFS .range ],
@@ -55,9 +58,12 @@ class RdfsImportEngine(ImportEngine):
55
58
reverse_metamodel_mappings : Dict [URIRef , List [str ]] = field (default_factory = lambda : defaultdict (list ))
56
59
#: The names of LinkML ClassDefinition slots
57
60
classdef_slots : set [str ] = field (init = False )
58
- #: The names of LinkML SlotDefinition slot slots
61
+ #: The names of LinkML SlotDefinition slots
59
62
slotdef_slots : set [str ] = field (init = False )
63
+ #: Every prefix seen in the graph
60
64
seen_prefixes : set [str ] = field (default_factory = set )
65
+ #: The counts of each prefix, used to infer the default prefix
66
+ prefix_counts : Counter [str ] = field (default_factory = Counter )
61
67
62
68
def __post_init__ (self ):
63
69
sv = package_schemaview ("linkml_runtime.linkml_model.meta" )
@@ -91,7 +97,7 @@ def __post_init__(self):
91
97
92
98
def convert (
93
99
self ,
94
- file : str ,
100
+ file : str | Path | TextIO ,
95
101
name : str | None = None ,
96
102
format : str | None = "turtle" ,
97
103
default_prefix : str | None = None ,
@@ -101,23 +107,10 @@ def convert(
101
107
) -> SchemaDefinition :
102
108
"""
103
109
Converts an OWL schema-style ontology
104
-
105
- :param file:
106
- :param name:
107
- :param model_uri:
108
- :param identifier:
109
- :param kwargs:
110
- :return:
111
110
"""
112
111
g = Graph (bind_namespaces = "none" )
113
112
g .parse (file , format = format )
114
- if name is not None and default_prefix is None :
115
- default_prefix = name
116
- if name is None :
117
- name = default_prefix
118
- if name is None :
119
- name = "example"
120
- sb = SchemaBuilder (name = name )
113
+ sb = SchemaBuilder ()
121
114
sb .add_defaults ()
122
115
schema = sb .schema
123
116
for k , v in g .namespaces ():
@@ -153,13 +146,29 @@ def convert(
153
146
# Remove prefixes that aren't used
154
147
if isinstance (schema .imports , list ):
155
148
for imp in schema .imports :
156
- prefix , suffix = imp .split (":" , 1 )
149
+ prefix , _suffix = imp .split (":" , 1 )
157
150
self .seen_prefixes .add (prefix )
158
151
schema .prefixes = {key : value for key , value in schema .prefixes .items () if key in self .seen_prefixes }
159
-
152
+ self . infer_metadata ( schema , name , default_prefix , model_uri )
160
153
self .fix_missing (schema )
161
154
return schema
162
155
156
+ def infer_metadata (self , schema : SchemaDefinition , name : str | None , default_prefix : str | None = None , model_uri : str | None = None ):
157
+ top_count = self .prefix_counts .most_common (1 )
158
+ if len (top_count ) == 0 :
159
+ raise ValueError ("No prefixes found in the graph" )
160
+ inferred_prefix = top_count [0 ][0 ]
161
+
162
+ schema .name = name or inferred_prefix
163
+ schema .default_prefix = default_prefix or inferred_prefix
164
+ prefix_uri = None
165
+ if isinstance (schema .prefixes , Mapping ):
166
+ prefix_uri = schema .prefixes .get (inferred_prefix )
167
+ elif isinstance (schema .prefixes , JsonObj ):
168
+ prefix_uri = schema .prefixes ._get (inferred_prefix )
169
+ if isinstance (prefix_uri , Prefix ):
170
+ schema .id = model_uri or prefix_uri .prefix_reference
171
+
163
172
def fix_missing (self , schema : SchemaDefinition ) -> None :
164
173
"""
165
174
For some properties we have a `subproperty_of` that references a slot that doesn't exist.
@@ -181,8 +190,9 @@ def track_uri(self, uri: str, g: Graph) -> None:
181
190
"""
182
191
Updates the set of prefixes seen in the graph
183
192
"""
184
- prefix , namespace , name = g .namespace_manager .compute_qname (uri )
193
+ prefix , _namespace , _name = g .namespace_manager .compute_qname (uri )
185
194
self .seen_prefixes .add (prefix )
195
+ self .prefix_counts .update ([prefix ])
186
196
187
197
def process_rdfs_classes (
188
198
self ,
0 commit comments