Skip to content

Commit 4656afb

Browse files
committed
Prevent double parsing for dates
1 parent 7027a23 commit 4656afb

File tree

1 file changed

+14
-12
lines changed

1 file changed

+14
-12
lines changed

schema_automator/generalizers/csv_data_generalizer.py

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import datetime
12
import click
23
import logging
34
import yaml
@@ -644,13 +645,12 @@ def infer_range(slot: dict, vals: set, types: dict, coerce=True) -> str:
644645
return 'boolean'
645646
if all(isfloat(v) for v in nn_vals):
646647
return 'float'
647-
if all(is_date(v) for v in nn_vals):
648-
if all(
649-
not hasattr(parse(str(v)), 'hour') or
650-
(parse(str(v)).hour == 0 and parse(str(v)).minute == 0 and parse(str(v)).second == 0)
651-
for v in nn_vals
652-
): # Check if values are just dates without time
653-
return 'date'
648+
parsed_datetimes = [is_date_or_datetime(v) for v in nn_vals]
649+
if all(pd in ('date', 'datetime') for pd in parsed_datetimes):
650+
# This selects date when values are mixed which may fail validation
651+
# but is the best we can do... we know it isn't string
652+
return 'date'
653+
if all(pd == 'datetime' for pd in parsed_datetimes):
654654
return 'datetime'
655655
if is_all_measurement(nn_vals):
656656
return 'measurement'
@@ -681,19 +681,21 @@ def get_db(db_id: str) -> Optional[str]:
681681
return parts[0]
682682

683683

684-
def is_date(string, fuzzy=False):
684+
def is_date_or_datetime(string, fuzzy=False):
685685
"""
686-
Return whether the string can be interpreted as a date.
686+
Return whether the string can be interpreted as a date or datetime.
687687
688688
:param string: str, string to check for date
689689
:param fuzzy: bool, ignore unknown tokens in string if True
690690
"""
691691
try:
692-
parse(string, fuzzy=fuzzy)
693-
return True
692+
dt = parse(string, fuzzy=fuzzy)
693+
if dt.hour == 0 and dt.minute == 0 and dt.second == 0:
694+
return 'date'
695+
return 'datetime'
694696
except Exception:
695697
# https://stackoverflow.com/questions/4990718/how-can-i-write-a-try-except-block-that-catches-all-exceptions
696-
# we don't know all the different parse exceptions, we assume any error means this is a date
698+
# we don't know all the different parse exceptions, we assume any error means this is not a date
697699
return False
698700

699701

0 commit comments

Comments
 (0)