Skip to content

Commit b854dbc

Browse files
committed
Adding FHIR CodeSystem importer
1 parent 398e7f9 commit b854dbc

File tree

7 files changed

+119
-7
lines changed

7 files changed

+119
-7
lines changed

poetry.lock

Lines changed: 18 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ xmltodict = "^0.13.0"
3232
click-default-group = "^1.2.4"
3333
linkml-runtime = "^1.7.2"
3434
duckdb = "^0.10.1"
35+
duckdb-engine = "^0.11.2"
3536

3637

3738
[tool.poetry.dev-dependencies]

schema_automator/cli.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -242,7 +242,7 @@ def import_dosdps(dpfiles, output, **args):
242242
@schema_name_option
243243
def import_sql(db, output, **args):
244244
"""
245-
Imports a schema by introspecting a relational database
245+
Imports a schema by introspecting a relational database.
246246
247247
See :ref:`importers` for more on the importers framework
248248
"""
@@ -297,13 +297,14 @@ def import_htmltable(url, output, class_name, schema_name, columns,
297297
@click.option('--enum-threshold', default=0.1, help='if the number of distinct values / rows is less than this, do not make an enum')
298298
@click.option('--omit-null/--no-omit-null', default=False, help="if true, ignore null values")
299299
@click.option('--inlined-map', multiple=True, help="SLOT_NAME.KEY pairs indicating which slots are inlined as dict")
300+
@click.option('--index-slot', help="slot to inject for lists of objects")
300301
@click.option('--depluralize/--no-depluralized',
301302
default=True,
302303
show_default=True,
303304
help="Auto-depluralize class names to singular form")
304305
def generalize_json(input, output, schema_name, depluralize: bool, format, omit_null, inlined_map, **kwargs):
305306
"""
306-
Generalizes from a JSON file to a schema
307+
Generalizes from a JSON (or YAML) file to a schema
307308
308309
See :ref:`generalizers` for more on the generalization framework
309310

schema_automator/generalizers/csv_data_generalizer.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -264,17 +264,22 @@ def convert_multiple(self, files: List[str], **kwargs) -> SchemaDefinition:
264264
self.inject_foreign_keys(sv, fks)
265265
return sv.schema
266266

267-
def convert(self, file: str, **kwargs) -> SchemaDefinition:
267+
def convert(self, file: str, delimiter=None, **kwargs) -> SchemaDefinition:
268268
"""
269269
Converts a single TSV file to a single-class schema
270270
271271
:param file:
272272
:param kwargs:
273273
:return:
274274
"""
275+
if delimiter is None:
276+
if file.endswith(".csv"):
277+
delimiter = ","
278+
else:
279+
delimiter = self.column_separator
275280
with open(file, newline='', encoding='utf-8') as tsv_file:
276-
header = [h.strip() for h in tsv_file.readline().split('\t')]
277-
rr = csv.DictReader(tsv_file, fieldnames=header, delimiter=self.column_separator, skipinitialspace=False)
281+
header = [h.strip() for h in tsv_file.readline().split(delimiter)]
282+
rr = csv.DictReader(tsv_file, fieldnames=header, delimiter=delimiter, skipinitialspace=False)
278283
return self.convert_dicts([r for r in rr], **kwargs)
279284

280285
def convert_from_dataframe(self, df: pd.DataFrame, **kwargs) -> SchemaDefinition:

schema_automator/generalizers/json_instance_generalizer.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,13 +36,14 @@ class JsonDataGeneralizer(Generalizer):
3636

3737

3838
def convert(self, input: Union[str, Dict], format: str = 'json',
39+
index_slot: str = None,
3940
container_class_name='Container',
4041
**kwargs) -> SchemaDefinition:
4142
"""
4243
Generalizes from a JSON file
4344
4445
:param input:
45-
:param format:
46+
:param format: json or yaml; use yaml_multi for multiple documents
4647
:param container_class_name:
4748
:param kwargs:
4849
:return:
@@ -62,13 +63,19 @@ def convert(self, input: Union[str, Dict], format: str = 'json',
6263
obj = json.load(stream)
6364
elif format == 'yaml':
6465
obj = yaml.safe_load(stream)
66+
elif format == 'yaml_multi':
67+
obj = list(yaml.safe_load_all(stream))
6568
elif format == 'toml':
6669
obj_str = "".join(stream.readlines())
6770
toml_obj = tomlkit.parse(obj_str)
6871
json_str = json.dumps(toml_obj)
6972
obj = json.loads(json_str)
7073
else:
7174
raise Exception(f'bad format {format}')
75+
if isinstance(obj, list):
76+
if index_slot is None:
77+
index_slot = 'members'
78+
obj = {index_slot: obj}
7279
rows_by_table = defaultdict(list)
7380
self.rows_by_table = rows_by_table
7481
self._convert_obj(obj, table=container_class_name)
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
import json
2+
from typing import Dict, Any
3+
4+
from linkml_runtime.linkml_model import SchemaDefinition, EnumDefinition, PermissibleValue
5+
from schema_automator.importers.import_engine import ImportEngine
6+
7+
class FHIRCodeSystemImportEngine(ImportEngine):
8+
def load(self, input: str) -> SchemaDefinition:
9+
# Parse the JSON input
10+
data = json.loads(input)
11+
12+
# Create a new SchemaDefinition
13+
schema = SchemaDefinition(
14+
name=data.get('name', 'FHIRCodeSystem'),
15+
id=data.get('url', 'http://example.org/FHIRCodeSystem')
16+
)
17+
18+
# Define the Enum for the CodeSystem
19+
code_system_enum = EnumDefinition(
20+
name='CodeSystemEnum',
21+
description=data.get('description', 'A FHIR CodeSystem resource')
22+
)
23+
24+
# Process the concepts and create permissible values
25+
if 'concept' in data:
26+
code_system_enum.permissible_values = self._process_concepts(data['concept'])
27+
28+
# Add the Enum to the schema
29+
schema.enums = {
30+
'CodeSystemEnum': code_system_enum
31+
}
32+
33+
return schema
34+
35+
def _process_concepts(self, concepts: Dict[str, Any]) -> Dict[str, PermissibleValue]:
36+
permissible_values = {}
37+
38+
for concept in concepts:
39+
code = concept['code']
40+
pv = PermissibleValue(
41+
text=code,
42+
title=concept.get('display', None),
43+
description=concept.get('definition', None),
44+
)
45+
46+
# Check for parent relationships in properties
47+
for prop in concept.get('property', []):
48+
if prop['code'] == 'subsumedBy':
49+
pv.is_a = prop['valueCode']
50+
if prop['code'] == 'status':
51+
pv.status = prop['valueCode']
52+
53+
permissible_values[code] = pv
54+
55+
return permissible_values
56+
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
import os
2+
3+
from linkml.generators import PythonGenerator
4+
from linkml_runtime import SchemaView
5+
6+
from schema_automator.importers.fhir_codesystem_import_engine import FHIRCodeSystemImportEngine
7+
from schema_automator.utils import write_schema
8+
from tests import INPUT_DIR, OUTPUT_DIR
9+
10+
INPUT_JSON = os.path.join(INPUT_DIR, "CodeSystem-v3-RoleCode.json")
11+
OUT = os.path.join(OUTPUT_DIR, "CodeSystem-v3-RoleCode.linkml.yaml")
12+
13+
14+
def test_fhir_code_system_import():
15+
with open(INPUT_JSON, "r", encoding="utf-8") as f:
16+
input_data = f.read()
17+
18+
ie = FHIRCodeSystemImportEngine()
19+
schema = ie.load(input_data)
20+
assert schema
21+
write_schema(schema, OUT)
22+
23+
py_str = PythonGenerator(OUT).serialize()
24+
assert py_str
25+
_sv = SchemaView(schema)

0 commit comments

Comments
 (0)