@@ -355,6 +355,8 @@ convert_datetimestruct_local_to_utc(pandas_datetimestruct *out_dts_utc,
355355 * + Doesn't handle 24:00:00 as synonym for midnight (00:00:00) tomorrow
356356 * + Accepts special values "NaT" (not a time), "Today", (current
357357 * day according to local time) and "Now" (current time in UTC).
358+ * + ':' separator between hours, minutes, and seconds is optional. When
359+ * omitted, each component must be 2 digits if it appears. (GH-10041)
358360 *
359361 * 'str' must be a NULL-terminated string, and 'len' must be its length.
360362 * 'unit' should contain -1 if the unit is unknown, or the unit
@@ -394,15 +396,21 @@ parse_iso_8601_datetime(char *str, int len,
394396 char * substr , sublen ;
395397 PANDAS_DATETIMEUNIT bestunit ;
396398
397- /* if date components in are separated by one of valid separators
398- * months/days without leadings 0s will be parsed
399+ /* If year-month-day are separated by a valid separator,
400+ * months/days without leading zeroes will be parsed
399401 * (though not iso8601). If the components aren't separated,
400- * an error code will be retuned because the date is ambigous
402+ * 4 (YYYY) or 8 (YYYYMMDD) digits are expected. 6 digits are
403+ * forbidden here (but parsed as YYMMDD elsewhere).
401404 */
402- int has_sep = 0 ;
403- char sep = '\0' ;
404- char valid_sep [] = {'-' , '.' , '/' , '\\' , ' ' };
405- int valid_sep_len = 5 ;
405+ int has_ymd_sep = 0 ;
406+ char ymd_sep = '\0' ;
407+ char valid_ymd_sep [] = {'-' , '.' , '/' , '\\' , ' ' };
408+ int valid_ymd_sep_len = sizeof (valid_ymd_sep );
409+
410+ /* hour-minute-second may or may not separated by ':'. If not, then
411+ * each component must be 2 digits. */
412+ int has_hms_sep = 0 ;
413+ int hour_was_2_digits = 0 ;
406414
407415 /* Initialize the output to all zeros */
408416 memset (out , 0 , sizeof (pandas_datetimestruct ));
@@ -550,67 +558,58 @@ parse_iso_8601_datetime(char *str, int len,
550558 /* Check whether it's a leap-year */
551559 year_leap = is_leapyear (out -> year );
552560
553- /* Next character must be a separator, start of month or end */
561+ /* Next character must be a separator, start of month, or end of string */
554562 if (sublen == 0 ) {
555563 if (out_local != NULL ) {
556564 * out_local = 0 ;
557565 }
558566 bestunit = PANDAS_FR_Y ;
559567 goto finish ;
560568 }
561- else if (!isdigit (* substr )) {
562- for (i = 0 ; i < valid_sep_len ; ++ i ) {
563- if (* substr == valid_sep [i ]) {
564- has_sep = 1 ;
565- sep = valid_sep [i ];
566- ++ substr ;
567- -- sublen ;
569+
570+ if (!isdigit (* substr )) {
571+ for (i = 0 ; i < valid_ymd_sep_len ; ++ i ) {
572+ if (* substr == valid_ymd_sep [i ]) {
568573 break ;
569574 }
570575 }
571- if (i == valid_sep_len ) {
576+ if (i == valid_ymd_sep_len ) {
572577 goto parse_error ;
573578 }
574- }
575-
576- /* Can't have a trailing sep */
577- if (sublen == 0 ) {
578- goto parse_error ;
579- }
580-
581-
582- /* PARSE THE MONTH (2 digits) */
583- if (has_sep && ((sublen >= 2 && isdigit (substr [0 ]) && !isdigit (substr [1 ]))
584- || (sublen == 1 && isdigit (substr [0 ])))) {
585- out -> month = (substr [0 ] - '0' );
586-
587- if (out -> month < 1 ) {
588- PyErr_Format (PyExc_ValueError ,
589- "Month out of range in datetime string \"%s\"" , str );
590- goto error ;
591- }
579+ has_ymd_sep = 1 ;
580+ ymd_sep = valid_ymd_sep [i ];
592581 ++ substr ;
593582 -- sublen ;
583+ /* Cannot have trailing separator */
584+ if (sublen == 0 || !isdigit (* substr )) {
585+ goto parse_error ;
586+ }
594587 }
595- else if (sublen >= 2 && isdigit (substr [0 ]) && isdigit (substr [1 ])) {
596- out -> month = 10 * (substr [0 ] - '0' ) + (substr [1 ] - '0' );
597588
598- if (out -> month < 1 || out -> month > 12 ) {
599- PyErr_Format (PyExc_ValueError ,
600- "Month out of range in datetime string \"%s\"" , str );
601- goto error ;
602- }
603- substr += 2 ;
604- sublen -= 2 ;
589+ /* PARSE THE MONTH */
590+ /* First digit required */
591+ out -> month = (* substr - '0' );
592+ ++ substr ;
593+ -- sublen ;
594+ /* Second digit optional if there was a separator */
595+ if (isdigit (* substr )) {
596+ out -> month = 10 * out -> month + (* substr - '0' );
597+ ++ substr ;
598+ -- sublen ;
605599 }
606- else {
600+ else if (! has_ymd_sep ) {
607601 goto parse_error ;
608602 }
603+ if (out -> month < 1 || out -> month > 12 ) {
604+ PyErr_Format (PyExc_ValueError ,
605+ "Month out of range in datetime string \"%s\"" , str );
606+ goto error ;
607+ }
609608
610- /* Next character must be a '-' or the end of the string */
609+ /* Next character must be the separator, start of day, or end of string */
611610 if (sublen == 0 ) {
612- /* dates of form YYYYMM are not valid */
613- if (!has_sep ) {
611+ /* Forbid YYYYMM. Parsed instead as YYMMDD by someone else. */
612+ if (!has_ymd_sep ) {
614613 goto parse_error ;
615614 }
616615 if (out_local != NULL ) {
@@ -619,47 +618,40 @@ parse_iso_8601_datetime(char *str, int len,
619618 bestunit = PANDAS_FR_M ;
620619 goto finish ;
621620 }
622- else if (has_sep && * substr == sep ) {
621+
622+ if (has_ymd_sep ) {
623+ /* Must have separator, but cannot be trailing */
624+ if (* substr != ymd_sep || sublen == 1 ) {
625+ goto parse_error ;
626+ }
623627 ++ substr ;
624628 -- sublen ;
625629 }
626- else if (!isdigit (* substr )) {
627- goto parse_error ;
628- }
629630
630- /* Can't have a trailing '-' */
631- if (sublen == 0 ) {
632- goto parse_error ;
631+ /* PARSE THE DAY */
632+ /* First digit required */
633+ if (!isdigit (* substr )) {
634+ goto parse_error ;
633635 }
634-
635- /* PARSE THE DAY (2 digits) */
636- if (has_sep && ((sublen >= 2 && isdigit (substr [0 ]) && !isdigit (substr [1 ]))
637- || (sublen == 1 && isdigit (substr [0 ])))) {
638- out -> day = (substr [0 ] - '0' );
639-
640- if (out -> day < 1 ) {
641- PyErr_Format (PyExc_ValueError ,
642- "Day out of range in datetime string \"%s\"" , str );
643- goto error ;
644- }
636+ out -> day = (* substr - '0' );
637+ ++ substr ;
638+ -- sublen ;
639+ /* Second digit optional if there was a separator */
640+ if (isdigit (* substr )) {
641+ out -> day = 10 * out -> day + (* substr - '0' );
645642 ++ substr ;
646643 -- sublen ;
647644 }
648- else if (sublen >= 2 && isdigit (substr [0 ]) && isdigit (substr [1 ])) {
649- out -> day = 10 * (substr [0 ] - '0' ) + (substr [1 ] - '0' );
650-
651- if (out -> day < 1 ||
652- out -> day > days_per_month_table [year_leap ][out -> month - 1 ]) {
653- PyErr_Format (PyExc_ValueError ,
654- "Day out of range in datetime string \"%s\"" , str );
655- goto error ;
656- }
657- substr += 2 ;
658- sublen -= 2 ;
659- }
660- else {
645+ else if (!has_ymd_sep ) {
661646 goto parse_error ;
662647 }
648+ if (out -> day < 1 ||
649+ out -> day > days_per_month_table [year_leap ][out -> month - 1 ])
650+ {
651+ PyErr_Format (PyExc_ValueError ,
652+ "Day out of range in datetime string \"%s\"" , str );
653+ goto error ;
654+ }
663655
664656 /* Next character must be a 'T', ' ', or end of string */
665657 if (sublen == 0 ) {
@@ -669,104 +661,119 @@ parse_iso_8601_datetime(char *str, int len,
669661 bestunit = PANDAS_FR_D ;
670662 goto finish ;
671663 }
672- else if (* substr != 'T' && * substr != ' ' ) {
664+
665+ if ((* substr != 'T' && * substr != ' ' ) || sublen == 1 ) {
673666 goto parse_error ;
674667 }
675- else {
668+ ++ substr ;
669+ -- sublen ;
670+
671+ /* PARSE THE HOURS */
672+ /* First digit required */
673+ if (!isdigit (* substr )) {
674+ goto parse_error ;
675+ }
676+ out -> hour = (* substr - '0' );
677+ ++ substr ;
678+ -- sublen ;
679+ /* Second digit optional */
680+ if (isdigit (* substr )) {
681+ hour_was_2_digits = 1 ;
682+ out -> hour = 10 * out -> hour + (* substr - '0' );
676683 ++ substr ;
677684 -- sublen ;
678- }
679-
680- /* PARSE THE HOURS (2 digits) */
681- if (sublen >= 2 && isdigit (substr [0 ]) && isdigit (substr [1 ])) {
682- out -> hour = 10 * (substr [0 ] - '0' ) + (substr [1 ] - '0' );
683-
684685 if (out -> hour >= 24 ) {
685686 PyErr_Format (PyExc_ValueError ,
686687 "Hours out of range in datetime string \"%s\"" , str );
687688 goto error ;
688689 }
689- substr += 2 ;
690- sublen -= 2 ;
691- }
692- else if (sublen >= 1 && isdigit (substr [0 ])) {
693- out -> hour = substr [0 ] - '0' ;
694- ++ substr ;
695- -- sublen ;
696- }
697- else {
698- goto parse_error ;
699690 }
700691
701692 /* Next character must be a ':' or the end of the string */
702- if (sublen > 0 && * substr == ':' ) {
693+ if (sublen == 0 ) {
694+ if (!hour_was_2_digits ) {
695+ goto parse_error ;
696+ }
697+ bestunit = PANDAS_FR_h ;
698+ goto finish ;
699+ }
700+
701+ if (* substr == ':' ) {
702+ has_hms_sep = 1 ;
703703 ++ substr ;
704704 -- sublen ;
705+ /* Cannot have a trailing separator */
706+ if (sublen == 0 || !isdigit (* substr )) {
707+ goto parse_error ;
708+ }
705709 }
706- else {
710+ else if (!isdigit (* substr )) {
711+ if (!hour_was_2_digits ) {
712+ goto parse_error ;
713+ }
707714 bestunit = PANDAS_FR_h ;
708715 goto parse_timezone ;
709716 }
710717
711- /* Can't have a trailing ':' */
712- if (sublen == 0 ) {
713- goto parse_error ;
714- }
715-
716- /* PARSE THE MINUTES (2 digits) */
717- if (sublen >= 2 && isdigit (substr [0 ]) && isdigit (substr [1 ])) {
718- out -> min = 10 * (substr [0 ] - '0' ) + (substr [1 ] - '0' );
719-
718+ /* PARSE THE MINUTES */
719+ /* First digit required */
720+ out -> min = (* substr - '0' );
721+ ++ substr ;
722+ -- sublen ;
723+ /* Second digit optional if there was a separator */
724+ if (isdigit (* substr )) {
725+ out -> min = 10 * out -> min + (* substr - '0' );
726+ ++ substr ;
727+ -- sublen ;
720728 if (out -> min >= 60 ) {
721729 PyErr_Format (PyExc_ValueError ,
722- "Minutes out of range in datetime string \"%s\"" , str );
730+ "Minutes out of range in datetime string \"%s\"" , str );
723731 goto error ;
724732 }
725- substr += 2 ;
726- sublen -= 2 ;
727- }
728- else if (sublen >= 1 && isdigit (substr [0 ])) {
729- out -> min = substr [0 ] - '0' ;
730- ++ substr ;
731- -- sublen ;
732733 }
733- else {
734+ else if (! has_hms_sep ) {
734735 goto parse_error ;
735736 }
736737
737- /* Next character must be a ':' or the end of the string */
738- if (sublen > 0 && * substr == ':' ) {
738+ if (sublen == 0 ) {
739+ bestunit = PANDAS_FR_m ;
740+ goto finish ;
741+ }
742+
743+ /* If we make it through this condition block, then the next
744+ * character is a digit. */
745+ if (has_hms_sep && * substr == ':' ) {
739746 ++ substr ;
740747 -- sublen ;
748+ /* Cannot have a trailing ':' */
749+ if (sublen == 0 || !isdigit (* substr )) {
750+ goto parse_error ;
751+ }
752+ }
753+ else if (!has_hms_sep && isdigit (* substr )) {
741754 }
742755 else {
743756 bestunit = PANDAS_FR_m ;
744757 goto parse_timezone ;
745758 }
746759
747- /* Can't have a trailing ':' */
748- if (sublen == 0 ) {
749- goto parse_error ;
750- }
751-
752- /* PARSE THE SECONDS (2 digits) */
753- if (sublen >= 2 && isdigit (substr [0 ]) && isdigit (substr [1 ])) {
754- out -> sec = 10 * (substr [0 ] - '0' ) + (substr [1 ] - '0' );
755-
760+ /* PARSE THE SECONDS */
761+ /* First digit required */
762+ out -> sec = (* substr - '0' );
763+ ++ substr ;
764+ -- sublen ;
765+ /* Second digit optional if there was a separator */
766+ if (isdigit (* substr )) {
767+ out -> sec = 10 * out -> sec + (* substr - '0' );
768+ ++ substr ;
769+ -- sublen ;
756770 if (out -> sec >= 60 ) {
757771 PyErr_Format (PyExc_ValueError ,
758- "Seconds out of range in datetime string \"%s\"" , str );
772+ "Seconds out of range in datetime string \"%s\"" , str );
759773 goto error ;
760774 }
761- substr += 2 ;
762- sublen -= 2 ;
763- }
764- else if (sublen >= 1 && isdigit (substr [0 ])) {
765- out -> sec = substr [0 ] - '0' ;
766- ++ substr ;
767- -- sublen ;
768775 }
769- else {
776+ else if (! has_hms_sep ) {
770777 goto parse_error ;
771778 }
772779
0 commit comments