Skip to content

Commit f568fa8

Browse files
Merge branch 'main' into new_python_166
2 parents 32e5f4c + da64950 commit f568fa8

File tree

1 file changed

+24
-7
lines changed

1 file changed

+24
-7
lines changed

schema_automator/generalizers/csv_data_generalizer.py

Lines changed: 24 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import datetime
12
import click
23
import logging
34
import yaml
@@ -644,13 +645,11 @@ def infer_range(slot: dict, vals: set, types: dict, coerce=True) -> str:
644645
return 'boolean'
645646
if all(isfloat(v) for v in nn_vals):
646647
return 'float'
647-
if all(is_date(v) for v in nn_vals):
648-
if all(
649-
not hasattr(parse(str(v)), 'hour') or
650-
(parse(str(v)).hour == 0 and parse(str(v)).minute == 0 and parse(str(v)).second == 0)
651-
for v in nn_vals
652-
): # Check if values are just dates without time
653-
return 'date'
648+
parsed_datetimes = [is_date_or_datetime(v) for v in nn_vals]
649+
if all(pd == 'date' for pd in parsed_datetimes):
650+
return 'date'
651+
if all(pd in ('date', 'datetime') for pd in parsed_datetimes):
652+
# This selects datetime when values are mixed which may fail validation
654653
return 'datetime'
655654
if is_all_measurement(nn_vals):
656655
return 'measurement'
@@ -697,6 +696,24 @@ def is_date(string, fuzzy=False):
697696
return False
698697

699698

699+
def is_date_or_datetime(string, fuzzy=False):
700+
"""
701+
Return whether the string can be interpreted as a date or datetime.
702+
703+
:param string: str, string to check for date
704+
:param fuzzy: bool, ignore unknown tokens in string if True
705+
"""
706+
try:
707+
dt = parse(string, fuzzy=fuzzy)
708+
if dt.hour == 0 and dt.minute == 0 and dt.second == 0:
709+
return 'date'
710+
return 'datetime'
711+
except Exception:
712+
# https://stackoverflow.com/questions/4990718/how-can-i-write-a-try-except-block-that-catches-all-exceptions
713+
# we don't know all the different parse exceptions, we assume any error means this is not a date
714+
return False
715+
716+
700717
@dataclass
701718
class Hit:
702719
term_id: str

0 commit comments

Comments
 (0)