Skip to content

add stub of a dbml importer #155

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Jan 15, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions docs/packages/importers.rst
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,13 @@ NCI implements a JSON serialization of ISO-11197. You can import this JSON and c
schemauto import-cadsr "cdes/*.json"
Importing from DBML
--------------------

DBML is a simple DSL for defining database schemas. It is a subset of SQL DDL.



Packages for importing
----------------------

Expand Down
26 changes: 18 additions & 8 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ click-default-group = "^1.2.4"
linkml-runtime = "^1.7.2"
duckdb = "^0.10.1"
numpy = "<2.0"
pydbml = "^1.1.2"

[tool.poetry.dev-dependencies]
pytest = ">=7.1.1"
Expand Down
95 changes: 95 additions & 0 deletions schema_automator/importers/dbml_import_engine.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
from schema_automator.importers.import_engine import ImportEngine
from pydbml import PyDBML
from linkml_runtime.linkml_model import SchemaDefinition, ClassDefinition, SlotDefinition
from dataclasses import dataclass


def _map_dbml_type_to_linkml(dbml_type: str) -> str:
"""
Maps DBML data types to LinkML types.

:param dbml_type: The DBML column type.
:return: Corresponding LinkML type.
"""
type_mapping = {
"int": "integer",
"varchar": "string",
"text": "string",
"float": "float",
"boolean": "boolean",
"date": "date",
"datetime": "datetime",
}
return type_mapping.get(dbml_type.lower(), "string")


@dataclass
class DbmlImportEngine(ImportEngine):
"""
An ImportEngine that introspects a DBML schema to determine a corresponding LinkML schema.
"""

def convert(
self,
file: str,
name: str = None,
model_uri: str = None,
identifier: str = None,
**kwargs
) -> SchemaDefinition:
"""
Converts a DBML schema file into a LinkML SchemaDefinition.

:param file: Path to the DBML schema file.
:param name: Optional name for the generated LinkML schema.
:param model_uri: Optional URI for the schema.
:param identifier: Identifier field for the schema.
:return: SchemaDefinition object representing the DBML schema.
"""
# Initialize the schema definition
schema_name = name or "GeneratedSchema"
schema = SchemaDefinition(name=schema_name, id=model_uri or f"https://example.org/{schema_name}")

# Parse the DBML file
with open(file, 'r', encoding='utf-8') as f:
dbml_content = f.read()
parsed_dbml = PyDBML(dbml_content)

# Process tables
for table in parsed_dbml.tables:
class_def = ClassDefinition(
name=table.name,
description=table.note or f"Auto-generated class for table '{table.name}'",
slots=[],
unique_keys=[], # Initialize unique keys property
)
processed_slots = set() # Track processed slot names to avoid duplicates

# Handle primary key and unique constraints
primary_key_columns = [col for col in table.columns if col.pk]
unique_columns = [col for col in table.columns if col.unique and not col.pk]

# Process columns
for column in table.columns:

slot_name = column.name
slot_def = SlotDefinition(
name=slot_name,
range=_map_dbml_type_to_linkml(column.type),
description=column.note or f"Column '{slot_name}'",
required=column in primary_key_columns or column.unique,
identifier=column in primary_key_columns, # Mark primary key columns as identifiers
)
schema.slots[slot_name] = slot_def
class_def.slots.append(slot_name)
processed_slots.add(slot_name)

# Handle single unique column as primary key if no explicit primary key exists
if not primary_key_columns and len(unique_columns) == 1:
unique_column = unique_columns[0]
schema.slots[unique_column.name].identifier = True
schema.slots[unique_column.name].required = True

schema.classes[table.name] = class_def

return schema
72 changes: 72 additions & 0 deletions tests/test_importers/test_dbml_importer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
import pytest
from linkml_runtime.linkml_model import SchemaDefinition
from schema_automator.importers.dbml_import_engine import DbmlImportEngine

# Sample DBML content for testing
DBML_SAMPLE = """
Table Users {
id int [primary key, not null]
email varchar [unique, not null]
username varchar
}

Table Orders {
order_id int [not null]
user_id int [not null]
product_id int [not null]
quantity int
}

Table Countries {
code varchar [primary key, not null]
name varchar [not null]
}
"""

@pytest.fixture
def dbml_file(tmp_path):
"""
Fixture to create a temporary DBML file.
"""
dbml_path = tmp_path / "test.dbml"
dbml_path.write_text(DBML_SAMPLE)
print(dbml_path)
return dbml_path

@pytest.fixture
def importer():
"""
Fixture to initialize the DbmlImportEngine.
"""
return DbmlImportEngine()

def test_dbml_to_linkml_conversion(dbml_file, importer):
"""
Test the basic conversion of DBML to a LinkML schema.
"""
schema = importer.convert(file=str(dbml_file), name="TestSchema")

# Assert the schema object is created
assert isinstance(schema, SchemaDefinition)

# Check that expected classes are present
assert "Users" in schema.classes
assert "Orders" in schema.classes

# Check that expected slots are present
assert "id" in schema.slots
assert schema.slots["id"].identifier
assert schema.slots["id"].required


def test_primary_key_handling(dbml_file, importer):
"""
Test correct handling of primary keys and required attributes.
"""
schema = importer.convert(file=str(dbml_file), name="TestSchema")

# Check that primary keys are marked as required and identifiers
users_class = schema.classes["Users"]
assert "id" in users_class.slots
assert schema.slots["id"].identifier
assert schema.slots["id"].required
Loading