Skip to content

Commit

Permalink
PERF: Cython version of Python _TIMEPAT regexp in parsing.pyx (pandas…
Browse files Browse the repository at this point in the history
  • Loading branch information
anmyachev committed Apr 24, 2019
1 parent 94535a3 commit 06a9c76
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 6 deletions.
5 changes: 3 additions & 2 deletions asv_bench/benchmarks/io/csv.py
Expand Up @@ -234,11 +234,12 @@ def mem_parser_chunks(self):


class ReadCSVParseSpecialDate(StringIORewind):
params = (['mY', 'mdY'],)
params = (['mY', 'mdY', 'hm'],)
params_name = ['value']
objects = {
'mY': '01-2019\n10-2019\n02/2000\n',
'mdY': '12/02/2010\n'
'mdY': '12/02/2010\n',
'hm': '21:34\n'
}

def setup(self, value):
Expand Down
39 changes: 35 additions & 4 deletions pandas/_libs/tslibs/parsing.pyx
Expand Up @@ -70,9 +70,8 @@ class DateParseError(ValueError):
_DEFAULT_DATETIME = datetime(1, 1, 1).replace(hour=0, minute=0,
second=0, microsecond=0)

cdef object _TIMEPAT = re.compile(r'^([01]?[0-9]|2[0-3]):([0-5][0-9])')

cdef set _not_datelike_strings = {'a', 'A', 'm', 'M', 'p', 'P', 't', 'T'}
cdef:
set _not_datelike_strings = {'a', 'A', 'm', 'M', 'p', 'P', 't', 'T'}

# ----------------------------------------------------------------------
cdef:
Expand Down Expand Up @@ -170,6 +169,38 @@ cdef inline object _parse_delimited_date(object date_string, bint dayfirst):
raise DateParseError("Invalid date specified ({}/{})".format(month, day))


cdef inline bint does_string_look_like_time(object parse_string):
"""
Checks whether given string is a time: it has to start either from
H:MM or from HH:MM, and hour and minute values must be valid.
Parameters
----------
date_string : str
Returns:
--------
whether given string is a time
"""
cdef:
const char* buf
Py_ssize_t length
int hour = -1, minute = -1

buf = get_c_string_buf_and_size(parse_string, &length)
if length >= 4:
if buf[1] == b':':
# h:MM format
hour = getdigit_ascii(buf[0], -1)
minute = _parse_2digit(buf + 2)
elif buf[2] == b':':
# HH:MM format
hour = _parse_2digit(buf)
minute = _parse_2digit(buf + 3)

return 0 <= hour <= 23 and 0 <= minute <= 59


def parse_datetime_string(date_string, freq=None, dayfirst=False,
yearfirst=False, **kwargs):
"""parse datetime string, only returns datetime.
Expand All @@ -186,7 +217,7 @@ def parse_datetime_string(date_string, freq=None, dayfirst=False,
if not _does_string_look_like_datetime(date_string):
raise ValueError('Given date string not likely a datetime.')

if _TIMEPAT.match(date_string):
if does_string_look_like_time(date_string):
# use current datetime as default, not pass _DEFAULT_DATETIME
dt = du_parse(date_string, dayfirst=dayfirst,
yearfirst=yearfirst, **kwargs)
Expand Down

0 comments on commit 06a9c76

Please sign in to comment.