Skip to content

Commit ac5257d

Browse files
authored
Feature: Timestamp support for JSONSchema generated schemas (#95)
Problem ======= Closes: #93 Solution ======== Add checks for `format == 'date-time`` inside the string type check for`string` and `array of string` Change summary: --------------- * Added format check for string fields ( and string arrays ) * Check for JSONSchema property `format` with value `date-time` * Updated jsonschema tests and updated the snapshots Steps to Verify: ---------------- 1. Generate a JSON Schema or use an existing one 2. Add `"format":"date-time"` to the field which should have a Date/Time value 3. Ensure that the value is a valid `Date` object 4. Enjoy
1 parent 43732c5 commit ac5257d

5 files changed

+214
-12
lines changed

README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,8 @@ var schema = new parquet.ParquetSchema.fromJsonSchema({
129129
"type": "number"
130130
},
131131
"date": {
132-
"type": "string"
132+
"type": "string",
133+
"format": "date-time"
133134
},
134135
"in_stock": {
135136
"type": "boolean"

lib/jsonSchema.ts

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,9 @@ const fromJsonSchemaArray = (fieldValue: SupportedJSONSchema4, optionalFieldList
6464

6565
switch (fieldValue.items.type) {
6666
case 'string':
67+
if (fieldValue.items.format && fieldValue.items.format == 'date-time') {
68+
return fields.createListField('TIMESTAMP_MILLIS', optionalFieldList);
69+
}
6770
return fields.createListField('UTF8', optionalFieldList);
6871
case 'integer':
6972
case 'number':
@@ -88,6 +91,9 @@ const fromJsonSchemaField = (jsonSchema: JSONSchema4) => (fieldName: string, fie
8891

8992
switch (fieldValue.type) {
9093
case 'string':
94+
if (fieldValue.format && fieldValue.format == 'date-time') {
95+
return fields.createTimestampField(optional);
96+
}
9197
return fields.createStringField(optional);
9298
case 'integer':
9399
case 'number':

test/jsonSchema.test.ts

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,18 @@ describe("Json Schema Conversion Test File", async function () {
6363
"items": { "type": "string" },
6464
"additionalItems": false
6565
},
66-
"timestamp_field": { "type": "string" },
66+
"timestamp_array_field": {
67+
"type": "array",
68+
"items": {
69+
"type": "string",
70+
"format": "date-time"
71+
},
72+
"additionalItems": false,
73+
},
74+
"timestamp_field": {
75+
"type": "string",
76+
"format": "date-time"
77+
},
6778
"obj_field": {
6879
"type": "object",
6980
"properties": {
@@ -107,7 +118,9 @@ describe("Json Schema Conversion Test File", async function () {
107118
const row1 = {
108119
string_field: 'string value',
109120
int_field: 10n,
110-
timestamp_field: new Date("2023-01-01 GMT").toUTCString(),
121+
timestamp_array_field: { list: [{ element: new Date("2023-01-01 GMT") }] },
122+
123+
timestamp_field: new Date("2023-01-01 GMT"),
111124

112125
array_field: {
113126
list: [{ element: 'array_field val1' }, { element: 'array_field val2' }],
@@ -162,7 +175,6 @@ describe("Json Schema Conversion Test File", async function () {
162175
const row = await cursor.next();
163176
const rowData = {
164177
...row1,
165-
timestamp_field: "Sun, 01 Jan 2023 00:00:00 GMT",
166178
};
167179
assert.deepEqual(row, rowData);
168180
});

test/test-files/json-schema-test-file.result.json

Lines changed: 39 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
"type_length": null,
55
"repetition_type": null,
66
"name": "root",
7-
"num_children": 6,
7+
"num_children": 7,
88
"converted_type": null,
99
"scale": null,
1010
"precision": null,
@@ -72,12 +72,48 @@
7272
"logicalType": null
7373
},
7474
{
75-
"type": 6,
75+
"type": null,
76+
"type_length": null,
77+
"repetition_type": 1,
78+
"name": "timestamp_array_field",
79+
"num_children": 1,
80+
"converted_type": 3,
81+
"scale": null,
82+
"precision": null,
83+
"field_id": null,
84+
"logicalType": null
85+
},
86+
{
87+
"type": null,
88+
"type_length": null,
89+
"repetition_type": 2,
90+
"name": "list",
91+
"num_children": 1,
92+
"converted_type": null,
93+
"scale": null,
94+
"precision": null,
95+
"field_id": null,
96+
"logicalType": null
97+
},
98+
{
99+
"type": 2,
100+
"type_length": null,
101+
"repetition_type": 1,
102+
"name": "element",
103+
"num_children": null,
104+
"converted_type": 9,
105+
"scale": null,
106+
"precision": null,
107+
"field_id": null,
108+
"logicalType": null
109+
},
110+
{
111+
"type": 2,
76112
"type_length": null,
77113
"repetition_type": 1,
78114
"name": "timestamp_field",
79115
"num_children": null,
80-
"converted_type": 0,
116+
"converted_type": 9,
81117
"scale": null,
82118
"precision": null,
83119
"field_id": null,

test/test-files/json-schema-test-file.schema.result.json

Lines changed: 152 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,26 @@
2929
}
3030
}
3131
},
32+
"timestamp_array_field": {
33+
"type": "LIST",
34+
"optional": true,
35+
"fields": {
36+
"list": {
37+
"repeated": true,
38+
"fields": {
39+
"element": {
40+
"optional": true,
41+
"type": "TIMESTAMP_MILLIS",
42+
"encoding": "PLAIN",
43+
"compression": "UNCOMPRESSED"
44+
}
45+
}
46+
}
47+
}
48+
},
3249
"timestamp_field": {
3350
"optional": true,
34-
"type": "UTF8",
51+
"type": "TIMESTAMP_MILLIS",
3552
"encoding": "PLAIN",
3653
"compression": "UNCOMPRESSED"
3754
},
@@ -183,10 +200,53 @@
183200
},
184201
"originalType": "LIST"
185202
},
203+
"timestamp_array_field": {
204+
"name": "timestamp_array_field",
205+
"path": [
206+
"timestamp_array_field"
207+
],
208+
"repetitionType": "OPTIONAL",
209+
"rLevelMax": 0,
210+
"dLevelMax": 1,
211+
"isNested": true,
212+
"fieldCount": 1,
213+
"fields": {
214+
"list": {
215+
"name": "list",
216+
"path": [
217+
"timestamp_array_field",
218+
"list"
219+
],
220+
"repetitionType": "REPEATED",
221+
"rLevelMax": 1,
222+
"dLevelMax": 2,
223+
"isNested": true,
224+
"fieldCount": 1,
225+
"fields": {
226+
"element": {
227+
"name": "element",
228+
"primitiveType": "INT64",
229+
"originalType": "TIMESTAMP_MILLIS",
230+
"path": [
231+
"timestamp_array_field",
232+
"list",
233+
"element"
234+
],
235+
"repetitionType": "OPTIONAL",
236+
"encoding": "PLAIN",
237+
"compression": "UNCOMPRESSED",
238+
"rLevelMax": 1,
239+
"dLevelMax": 3
240+
}
241+
}
242+
}
243+
},
244+
"originalType": "LIST"
245+
},
186246
"timestamp_field": {
187247
"name": "timestamp_field",
188-
"primitiveType": "BYTE_ARRAY",
189-
"originalType": "UTF8",
248+
"primitiveType": "INT64",
249+
"originalType": "TIMESTAMP_MILLIS",
190250
"path": [
191251
"timestamp_field"
192252
],
@@ -528,10 +588,97 @@
528588
"rLevelMax": 1,
529589
"dLevelMax": 3
530590
},
591+
{
592+
"name": "timestamp_array_field",
593+
"path": [
594+
"timestamp_array_field"
595+
],
596+
"repetitionType": "OPTIONAL",
597+
"rLevelMax": 0,
598+
"dLevelMax": 1,
599+
"isNested": true,
600+
"fieldCount": 1,
601+
"fields": {
602+
"list": {
603+
"name": "list",
604+
"path": [
605+
"timestamp_array_field",
606+
"list"
607+
],
608+
"repetitionType": "REPEATED",
609+
"rLevelMax": 1,
610+
"dLevelMax": 2,
611+
"isNested": true,
612+
"fieldCount": 1,
613+
"fields": {
614+
"element": {
615+
"name": "element",
616+
"primitiveType": "INT64",
617+
"originalType": "TIMESTAMP_MILLIS",
618+
"path": [
619+
"timestamp_array_field",
620+
"list",
621+
"element"
622+
],
623+
"repetitionType": "OPTIONAL",
624+
"encoding": "PLAIN",
625+
"compression": "UNCOMPRESSED",
626+
"rLevelMax": 1,
627+
"dLevelMax": 3
628+
}
629+
}
630+
}
631+
},
632+
"originalType": "LIST"
633+
},
634+
{
635+
"name": "list",
636+
"path": [
637+
"timestamp_array_field",
638+
"list"
639+
],
640+
"repetitionType": "REPEATED",
641+
"rLevelMax": 1,
642+
"dLevelMax": 2,
643+
"isNested": true,
644+
"fieldCount": 1,
645+
"fields": {
646+
"element": {
647+
"name": "element",
648+
"primitiveType": "INT64",
649+
"originalType": "TIMESTAMP_MILLIS",
650+
"path": [
651+
"timestamp_array_field",
652+
"list",
653+
"element"
654+
],
655+
"repetitionType": "OPTIONAL",
656+
"encoding": "PLAIN",
657+
"compression": "UNCOMPRESSED",
658+
"rLevelMax": 1,
659+
"dLevelMax": 3
660+
}
661+
}
662+
},
663+
{
664+
"name": "element",
665+
"primitiveType": "INT64",
666+
"originalType": "TIMESTAMP_MILLIS",
667+
"path": [
668+
"timestamp_array_field",
669+
"list",
670+
"element"
671+
],
672+
"repetitionType": "OPTIONAL",
673+
"encoding": "PLAIN",
674+
"compression": "UNCOMPRESSED",
675+
"rLevelMax": 1,
676+
"dLevelMax": 3
677+
},
531678
{
532679
"name": "timestamp_field",
533-
"primitiveType": "BYTE_ARRAY",
534-
"originalType": "UTF8",
680+
"primitiveType": "INT64",
681+
"originalType": "TIMESTAMP_MILLIS",
535682
"path": [
536683
"timestamp_field"
537684
],

0 commit comments

Comments
 (0)