Skip to content

Commit 6c142a6

Browse files
black-tea-loverchaubold
authored andcommitted
AP-23472: Fetch type names of LogicalTypes unknown to Python from Java
1 parent 457e76c commit 6c142a6

File tree

4 files changed

+94
-7
lines changed

4 files changed

+94
-7
lines changed

org.knime.python3.arrow.tests/src/test/python/unittest/test_pandas_extension_type.py

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -788,13 +788,7 @@ def test_timestamp_columns(self):
788788
) # drop index as it messes up equality
789789

790790
arrow_table = arrow_backend.write_table(df)
791-
knime_ts_ext_str = (
792-
"extension<logical={"
793-
'"value_factory_class":"org.knime.core.data.v2.value.cell.DictEncodedDataCellValueFactory",'
794-
'"data_type":{"cell_class":"org.knime.core.data.date.DateAndTimeCell"}}, '
795-
"storage=struct<extension<logical=structDictEncoded, storage=blob>, "
796-
"extension<logical=structDictEncoded, storage=string>>>"
797-
)
791+
knime_ts_ext_str = "Timestamp"
798792

799793
self.assertEqual(
800794
"<class 'knime.scripting._deprecated._arrow_table.ArrowWriteTable'>",

org.knime.python3.nodes.tests/src/test/python/unittest/test_utilities.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,33 @@ def gateway_property(self):
4242

4343
@property
4444
def jvm(self):
45+
def _getTypeNameForLogicalTypeString(logical_type: str):
46+
if (
47+
logical_type
48+
== '{"value_factory_class":"org.knime.core.data.v2.value.cell.DictEncodedDataCellValueFactory","data_type":{"cell_class":"org.knime.core.data.uri.URIDataCell"}}'
49+
):
50+
return "URI"
51+
elif (
52+
logical_type
53+
== '{"value_factory_class": "org.knime.core.data.v2.value.StringSetValueFactory"}'
54+
):
55+
return "Set (Collection of: String)"
56+
elif (
57+
logical_type
58+
== '{"value_factory_class":"org.knime.core.data.v2.value.SetValueFactory"}'
59+
):
60+
return "Set"
61+
elif (
62+
logical_type
63+
== '{"value_factory_class":"org.knime.core.data.v2.value.cell.DictEncodedDataCellValueFactory","data_type":{"cell_class":"org.knime.core.data.date.DateAndTimeCell"}}'
64+
):
65+
return "Timestamp"
66+
else:
67+
raise ValueError()
68+
4569
mock = MagicMock()
4670
mock.org = MagicMock()
71+
mock.org.knime.core.data.v2.ValueFactoryUtils.getTypeNameForLogicalTypeString = _getTypeNameForLogicalTypeString
4772
return mock
4873

4974

org.knime.python3.tests/src/test/python/unittest/test_knime_schema.py

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -547,6 +547,54 @@ def test_to_str(self):
547547
f"Schema<\n\t{sep.join(str(k.Column(t, n, None)) for t, n in zip(types, names))}>",
548548
)
549549

550+
def test_logical_type_to_str(self):
551+
"""Tests the string representation of LogicalTypes. Fetching types from the gateway (when no
552+
PythonValueFactory is registered) is mocked, so this test validates the recursive logic of handling
553+
LogicalTypes that are collections instead of validating for each type whether the gateway call returns
554+
the correct type name."""
555+
# Registered PythonValueFactory
556+
557+
datetime_type = k.logical(dt.datetime)
558+
datetime_repr = "Local Date and Time"
559+
self.assertEqual(str(datetime_type), datetime_repr)
560+
561+
datetime_list_type = k.ListType(k.logical(dt.datetime))
562+
datetime_list_repr = "List (Collection of: Local Date and Time)"
563+
self.assertEqual(str(datetime_list_type), datetime_list_repr)
564+
565+
# PythonValueFactory not registered and storage_type has no inner_type
566+
567+
uri_type = k.LogicalType(
568+
'{"value_factory_class":"org.knime.core.data.v2.value.cell.DictEncodedDataCellValueFactory",'
569+
'"data_type":{"cell_class":"org.knime.core.data.uri.URIDataCell"}}',
570+
None, # omitted because only relevant if type is a collection
571+
)
572+
uri_repr = "URI"
573+
self.assertEqual(str(uri_type), uri_repr)
574+
575+
string_set_type = k.LogicalType(
576+
'{"value_factory_class": "org.knime.core.data.v2.value.StringSetValueFactory"}',
577+
None,
578+
)
579+
string_set_repr = "Set (Collection of: String)"
580+
self.assertEqual(str(string_set_type), string_set_repr)
581+
582+
# PythonValueFactory not registered and storage_type has inner_type (LogicalType is a collection)
583+
584+
timestamp_set_storage_type = k.ListType(
585+
k.LogicalType(
586+
'{"value_factory_class":"org.knime.core.data.v2.value.cell.DictEncodedDataCellValueFactory",'
587+
'"data_type":{"cell_class":"org.knime.core.data.date.DateAndTimeCell"}}',
588+
None,
589+
)
590+
)
591+
timestamp_set_type = k.LogicalType(
592+
'{"value_factory_class":"org.knime.core.data.v2.value.SetValueFactory"}',
593+
timestamp_set_storage_type,
594+
)
595+
string_timestamp_set_repr = "Set (Collection of: Timestamp)"
596+
self.assertEqual(str(timestamp_set_type), string_timestamp_set_repr)
597+
550598
def test_logical_type_wrapping(self):
551599
types = [
552600
k.int32(),

org.knime.python3/src/main/python/knime/api/schema.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -386,6 +386,26 @@ def __str__(self) -> str:
386386
self.logical_type
387387
)
388388
except ValueError:
389+
# try to get the type from Java
390+
import knime._backend._gateway as kg
391+
392+
if kg.client_server != null:
393+
try:
394+
type_name = kg.client_server.jvm.org.knime.core.data.v2.ValueFactoryUtils.getTypeNameForLogicalTypeString(
395+
self.logical_type
396+
)
397+
398+
# we might want to cache these
399+
400+
# if the logical type is a collection, we also extract the inner type
401+
if hasattr(self.storage_type, "inner_type"):
402+
inner_type = str(self.storage_type.inner_type)
403+
return type_name + " (Collection of: " + inner_type + ")"
404+
else:
405+
return type_name
406+
except Exception:
407+
pass
408+
389409
return (
390410
f"extension<logical={self.logical_type}, storage={self.storage_type}>"
391411
)

0 commit comments

Comments
 (0)