Skip to content

Commit 8e4764d

Browse files
committed
Merge branch 'cleanup-deps' into refactor
2 parents 65869ba + f1f2110 commit 8e4764d

File tree

7 files changed

+821
-733
lines changed

7 files changed

+821
-733
lines changed

.github/workflows/build-deploy-documentation.yaml

Lines changed: 0 additions & 35 deletions
This file was deleted.

docs/packages/importers.rst

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,13 @@ NCI implements a JSON serialization of ISO-11197. You can import this JSON and c
7777
schemauto import-cadsr "cdes/*.json"
7878
7979
80+
Importing from DBML
81+
--------------------
82+
83+
DBML is a simple DSL for defining database schemas. It is a subset of SQL DDL.
84+
85+
86+
8087
Packages for importing
8188
----------------------
8289

poetry.lock

Lines changed: 542 additions & 663 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,9 +56,10 @@ ruamel-yaml = "^0.18.6"
5656
rdflib = "^7.1.1"
5757
jsonasobj2 = "^1.0.4"
5858
deprecation = "^2.1.0"
59+
numpy = "<2.0"
60+
pydbml = "^1.1.2"
5961

6062
[tool.poetry.dev-dependencies]
61-
mkdocs = ">=1.2.3"
6263
pytest = ">=7.1.1"
6364
Sphinx = ">=4.4.0"
6465
sphinx-pdj-theme = ">=0.2.1"
@@ -86,7 +87,6 @@ extract-schema = "schema_automator.utils.schema_extractor:cli"
8687

8788
[tool.poetry.extras]
8889
docs = ["Sphinx", "sphinx-pdj-theme", "sphinxcontrib-mermaid"]
89-
mariadb = ["mariadb"]
9090
postgres = ["psycopg2-binary"]
9191

9292
[tool.codespell]
Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
from schema_automator.importers.import_engine import ImportEngine
2+
from pydbml import PyDBML
3+
from linkml_runtime.linkml_model import SchemaDefinition, ClassDefinition, SlotDefinition
4+
from dataclasses import dataclass
5+
6+
7+
def _map_dbml_type_to_linkml(dbml_type: str) -> str:
8+
"""
9+
Maps DBML data types to LinkML types.
10+
11+
:param dbml_type: The DBML column type.
12+
:return: Corresponding LinkML type.
13+
"""
14+
type_mapping = {
15+
"int": "integer",
16+
"varchar": "string",
17+
"text": "string",
18+
"float": "float",
19+
"boolean": "boolean",
20+
"date": "date",
21+
"datetime": "datetime",
22+
}
23+
return type_mapping.get(dbml_type.lower(), "string")
24+
25+
26+
@dataclass
27+
class DbmlImportEngine(ImportEngine):
28+
"""
29+
An ImportEngine that introspects a DBML schema to determine a corresponding LinkML schema.
30+
"""
31+
32+
def convert(
33+
self,
34+
file: str,
35+
name: str = None,
36+
model_uri: str = None,
37+
identifier: str = None,
38+
**kwargs
39+
) -> SchemaDefinition:
40+
"""
41+
Converts a DBML schema file into a LinkML SchemaDefinition.
42+
43+
:param file: Path to the DBML schema file.
44+
:param name: Optional name for the generated LinkML schema.
45+
:param model_uri: Optional URI for the schema.
46+
:param identifier: Identifier field for the schema.
47+
:return: SchemaDefinition object representing the DBML schema.
48+
"""
49+
# Initialize the schema definition
50+
schema_name = name or "GeneratedSchema"
51+
schema = SchemaDefinition(name=schema_name, id=model_uri or f"https://example.org/{schema_name}")
52+
53+
# Parse the DBML file
54+
with open(file, 'r', encoding='utf-8') as f:
55+
dbml_content = f.read()
56+
parsed_dbml = PyDBML(dbml_content)
57+
58+
# Process tables
59+
for table in parsed_dbml.tables:
60+
class_def = ClassDefinition(
61+
name=table.name,
62+
description=table.note or f"Auto-generated class for table '{table.name}'",
63+
slots=[],
64+
unique_keys=[], # Initialize unique keys property
65+
)
66+
processed_slots = set() # Track processed slot names to avoid duplicates
67+
68+
# Handle primary key and unique constraints
69+
primary_key_columns = [col for col in table.columns if col.pk]
70+
unique_columns = [col for col in table.columns if col.unique and not col.pk]
71+
72+
# Process columns
73+
for column in table.columns:
74+
75+
slot_name = column.name
76+
slot_def = SlotDefinition(
77+
name=slot_name,
78+
range=_map_dbml_type_to_linkml(column.type),
79+
description=column.note or f"Column '{slot_name}'",
80+
required=column in primary_key_columns or column.unique,
81+
identifier=column in primary_key_columns, # Mark primary key columns as identifiers
82+
)
83+
schema.slots[slot_name] = slot_def
84+
class_def.slots.append(slot_name)
85+
processed_slots.add(slot_name)
86+
87+
# Handle single unique column as primary key if no explicit primary key exists
88+
if not primary_key_columns and len(unique_columns) == 1:
89+
unique_column = unique_columns[0]
90+
schema.slots[unique_column.name].identifier = True
91+
schema.slots[unique_column.name].required = True
92+
93+
schema.classes[table.name] = class_def
94+
95+
return schema

0 commit comments

Comments
 (0)