Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 10 additions & 6 deletions src/undate/converters/combined.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""
**Experimental** combined parser. Supports EDTF, Gregorian, Hebrew, and Hijri
where dates are unambiguous. Year-only dates are parsed as EDTF in
Gregorian calendar.
Combined parser. Supports EDTF, Gregorian, Hebrew, Hijri, and Christian
liturgical dates where dates are unambiguous. Year-only dates are parsed
as EDTF in Gregorian calendar.
"""

from typing import Union
Expand All @@ -16,6 +16,7 @@
from undate.converters.calendars.gregorian.transformer import GregorianDateTransformer
from undate.converters.calendars.hebrew.transformer import HebrewDateTransformer
from undate.converters.calendars.islamic.transformer import IslamicDateTransformer
from undate.converters.holidays import HolidayTransformer


class CombinedDateTransformer(Transformer):
Expand All @@ -35,6 +36,7 @@ def start(self, children):
hebrew=HebrewDateTransformer(),
islamic=IslamicDateTransformer(),
gregorian=GregorianDateTransformer(),
holidays=HolidayTransformer(),
)


Expand All @@ -47,14 +49,16 @@ def start(self, children):
class OmnibusDateConverter(BaseDateConverter):
"""
Combination parser that aggregates existing parser grammars.
Currently supports EDTF, Gregorian, Hebrew, and Hijri where dates are unambiguous.
(Year-only dates are parsed as EDTF in Gregorian calendar.)
Supports EDTF, Gregorian, Hebrew, Hijri, and Christian liturgical dates
where dates are unambiguous. Year-only dates are parsed as EDTF in
Gregorian calendar.

Does not support serialization.

Example usage::

Undate.parse("Tammuz 4816", "omnibus")
Undate.parse("Tammuz 4812", "omnibus")
Undate.parse("Easter 1916", "omnibus")

"""

Expand Down
4 changes: 3 additions & 1 deletion src/undate/converters/grammars/combined.lark
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
%import .undate_common.DATE_PUNCTUATION
%ignore DATE_PUNCTUATION

start: (edtf__start | hebrew__hebrew_date | islamic__islamic_date | gregorian__gregorian_date )
start: (edtf__start | hebrew__hebrew_date | islamic__islamic_date | gregorian__gregorian_date | holidays__holiday_date)

// Renaming of the import variables is required, as they receive the namespace of this file.
// See: https://github.com/lark-parser/lark/pull/973#issuecomment-907287565
Expand All @@ -30,6 +30,8 @@ start: (edtf__start | hebrew__hebrew_date | islamic__islamic_date | gregorian__
// gregorian calendar, in multiple languages
%import .gregorian.gregorian_date -> gregorian__gregorian_date

// relative import from holidays.lark
%import .holidays.holiday_date -> holidays__holiday_date

// override hebrew date to omit year-only, since year without calendar is ambiguous
// NOTE: potentially support year with calendar label
Expand Down
37 changes: 37 additions & 0 deletions src/undate/converters/grammars/holidays.lark
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
%import common.WS
%ignore WS

%import .undate_common.DATE_PUNCTUATION
%ignore DATE_PUNCTUATION


holiday_date: movable_feast year | fixed_date year?

// holidays that shift depending on the year
movable_feast: EASTER | EASTER_MONDAY | HOLY_SATURDAY | ASCENSION
| PENTECOST | WHIT_MONDAY | TRINITY | ASH_WEDNESDAY | SHROVE_TUESDAY

// holidays that are always on the same date
fixed_date: EPIPHANY | CANDLEMAS | ST_PATRICKS | ALL_FOOLS | ST_CYPRIANS

year: /\d{4}/
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

🧩 Analysis chain

🏁 Script executed:

#!/bin/bash
python - <<'PY'
import datetime

for year in (0, 1):
    try:
        print(year, datetime.date(year, 1, 1).isoformat())
    except Exception as exc:
        print(year, type(exc).__name__, exc)
PY

Repository: dh-tech/undate-python

Length of output: 113


🏁 Script executed:

# Find and examine the holidays.py file
fd -t f holidays.py

Repository: dh-tech/undate-python

Length of output: 137


🏁 Script executed:

# Look at the resolver code around the mentioned lines
find . -name "holidays.py" -type f | head -5 | xargs wc -l

Repository: dh-tech/undate-python

Length of output: 104


🏁 Script executed:

# Search for the resolver code that uses datetime.date
rg -A 5 -B 5 "datetime.date\|holidays.easter" --type py

Repository: dh-tech/undate-python

Length of output: 47


🏁 Script executed:

cat -n src/undate/converters/holidays.py | head -150

Repository: dh-tech/undate-python

Length of output: 6947


🏁 Script executed:

python - <<'PY'
from convertdate import holidays
import datetime

# Test what happens when calling holidays.easter(0)
for year in (0, 1, 9999, 10000):
    try:
        easter_result = holidays.easter(year)
        print(f"holidays.easter({year}) = {easter_result}")
        # Now try to create datetime.date with it
        try:
            d = datetime.date(*easter_result)
            print(f"  datetime.date(*{easter_result}) = {d.isoformat()}")
        except Exception as e:
            print(f"  datetime.date(*{easter_result}) raises {type(e).__name__}: {e}")
    except Exception as e:
        print(f"holidays.easter({year}) raises {type(e).__name__}: {e}")
PY

Repository: dh-tech/undate-python

Length of output: 188


🏁 Script executed:

# Find project configuration files
fd -t f -e "toml" -e "txt" -e "cfg" | grep -i "setup\|requirements\|pyproject"

Repository: dh-tech/undate-python

Length of output: 79


🏁 Script executed:

# Test datetime.date year boundaries
python - <<'PY'
import datetime

# Test valid year boundaries
test_years = [1, 9999, 10000, -1]
for year in test_years:
    try:
        d = datetime.date(year, 1, 1)
        print(f"datetime.date({year}, 1, 1) = {d.isoformat()}")
    except Exception as e:
        print(f"datetime.date({year}, 1, 1) raises {type(e).__name__}: {e}")
PY

Repository: dh-tech/undate-python

Length of output: 279


🏁 Script executed:

cat -n src/undate/converters/grammars/holidays.lark

Repository: dh-tech/undate-python

Length of output: 1407


Reject 0000 before it reaches the movable-feast calculation.

The grammar currently allows year: /\d{4}/ to match 0000, but the movable-feast resolver at line 127 uses datetime.date(*holidays.easter(parts["year"])), which only supports years 1–9999. Parsing inputs like Easter 0000 will therefore result in a ValueError at transformation time instead of a normal parse failure.

Grammar fix
-year: /\d{4}/
+year: /(?!0000)\d{4}/
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
year: /\d{4}/
year: /(?!0000)\d{4}/
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@src/undate/converters/grammars/holidays.lark` at line 17, The grammar rule
year: /\d{4}/ currently accepts "0000", which later causes
datetime.date(*holidays.easter(parts["year"])) in the movable-feast resolver to
raise ValueError; update the year rule to disallow "0000" (e.g. use a regex that
rejects all-zero year such as a negative lookahead or require the first digit
1-9) so that inputs like "0000" fail during parsing rather than during
transformation.


// all patterns use case-insensitive regex

// Fixed-date holidays
EPIPHANY: /epiphany/i
CANDLEMAS: /candlemass?/i // recognize with both one and 2 s
ST_PATRICKS: /st\.?\s*patrick'?s?\s*day/i
ALL_FOOLS: /(april|all)\s*fools?\s*day/i
ST_CYPRIANS: /st\.?\s*cyprian'?s?\s*day/i

// Moveable feasts
EASTER: /easter/i
EASTER_MONDAY: /easter\s*monday/i
HOLY_SATURDAY: /holy\s*saturday/i
ASCENSION: /ascension\s*day|ascension/i
PENTECOST: /pentecost/i
WHIT_MONDAY: /whit\s*monday|whitsun\s*monday/i
TRINITY: /trinity\s*sunday|trinity/i
ASH_WEDNESDAY: /ash\s*wednesday/i
SHROVE_TUESDAY: /shrove\s*tuesday/i
166 changes: 166 additions & 0 deletions src/undate/converters/holidays.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
"""
Holiday date Converter: parse Christian liturgical dates and convert to Gregorian.
"""

import datetime

from lark import Lark, Transformer, Tree, Token
from lark.exceptions import UnexpectedInput

from convertdate import holidays # type: ignore[import-untyped]
from undate import Undate, Calendar
from undate.converters.base import BaseDateConverter, GRAMMAR_FILE_PATH

# To add a new holiday:
# 1. Add a name and pattern to holidays.lark grammar file
# 2. Include the in appropriate section (fixed or movable)
# 3. Add an entry to FIXED_HOLIDAYS or MOVABLE_FEASTS; must match grammar terminal name


# holidays that fall on the same date every year
# key must match grammar term; value is tuple of numeric month, day
FIXED_HOLIDAYS = {
"EPIPHANY": (1, 6), # January 6
"CANDLEMAS": (2, 2), # February 2; 40th day & end of epiphany
"ST_PATRICKS": (3, 17), # March 17
"ALL_FOOLS": (4, 1), # All / April fools day, April 1
"ST_CYPRIANS": (9, 16), # St. Cyprian's Feast day: September 16
}
Comment on lines +22 to +28
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Typo: "CANDLEMASS" should be "CANDLEMAS".

The feast is spelled Candlemas (single "s"). This terminal name is likely mirrored in holidays.lark, so both the grammar terminal and this key need updating to match.

✏️ Proposed fix
 FIXED_HOLIDAYS = {
     "EPIPHANY": (1, 6),  # January 6
-    "CANDLEMASS": (2, 2),  # February 2; 40th day & end of epiphany
+    "CANDLEMAS": (2, 2),  # February 2; 40th day & end of epiphany
     "ST_PATRICKS": (3, 17),  # March 17
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
FIXED_HOLIDAYS = {
"EPIPHANY": (1, 6), # January 6
"CANDLEMASS": (2, 2), # February 2; 40th day & end of epiphany
"ST_PATRICKS": (3, 17), # March 17
"ALL_FOOLS": (4, 1), # All / April fools day, April 1
"ST_CYPRIANS": (9, 16), # St. Cyprian's Feast day: September 16
}
FIXED_HOLIDAYS = {
"EPIPHANY": (1, 6), # January 6
"CANDLEMAS": (2, 2), # February 2; 40th day & end of epiphany
"ST_PATRICKS": (3, 17), # March 17
"ALL_FOOLS": (4, 1), # All / April fools day, April 1
"ST_CYPRIANS": (9, 16), # St. Cyprian's Feast day: September 16
}
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@src/undate/converters/holidays.py` around lines 22 - 28, Rename the
misspelled holiday key "CANDLEMASS" to "CANDLEMAS" in the FIXED_HOLIDAYS dict
and update all corresponding references (grammar terminal in holidays.lark and
any usages in code, tests, or parser rules) to the single‑s spelling so the
terminal name and dict key match; search for "CANDLEMASS" across the repo and
replace with "CANDLEMAS", then run relevant tests/parsing to ensure no remaining
references break.


# holidays that shift depending on the year; value is days relative to Easter
MOVABLE_FEASTS = {
"EASTER": 0, # Easter, no offset
"HOLY_SATURDAY": -1, # day before Easter
"EASTER_MONDAY": 1, # day after Easter
"ASCENSION": 39, # fortieth day of Easter
"PENTECOST": 49, # 7 weeks after Easter
"WHIT_MONDAY": 50, # Monday after Pentecost
"TRINITY": 56, # first Sunday after Pentecost
"ASH_WEDNESDAY": -46, # Wednesday of the 7th week before Easter
"SHROVE_TUESDAY": -47, # day before Ash Wednesday
}


parser = Lark.open(
str(GRAMMAR_FILE_PATH / "holidays.lark"), rel_to=__file__, start="holiday_date"
)


class HolidayTransformer(Transformer):
calendar = Calendar.GREGORIAN

def year(self, items):
value = "".join([str(i) for i in items])
return Token("year", value)
# return Tree(data="year", children=[value])

def movable_feast(self, items):
# movable feast day can't be calculated without the year,
# so pass through
return items[0]

def fixed_date(self, items):
item = items[0]
# type is prefixed when included in the combined parser;
# we need the second portion
holiday_name = item.type.split("__")[-1]
try:
month, day = FIXED_HOLIDAYS[holiday_name]
except KeyError:
raise ValueError(f"Unknown fixed holiday {holiday_name}")
return Tree("fixed_date", [Token("month", month), Token("day", day)])

def holiday_date(self, items):
parts = self._get_date_parts(items)
return Undate(**parts)

def _get_date_parts(self, items) -> dict[str, int | str]:
# recursive method to take parsed tokens and trees and generate
# a dictionary of year, month, day for initializing an undate object
# handles nested tree with month/day (for fixed date holidays)
# and includes movable feast logic, after year is determined.

parts = {}
date_parts = ["year", "month", "day"]
movable_feast = None
for child in items:
field = value = None
# if this is a token, get type and value
if isinstance(child, Token):
# month/day from fixed date holiday
if child.type in date_parts:
field = child.type
value = child.value
# check for movable feast terminal
elif child.type in MOVABLE_FEASTS:
# collect but don't handle until we know the year
movable_feast = child.type
# handle namespaced token type; happens when called from combined grammar
elif (
"__" in child.type and child.type.split("__")[-1] in MOVABLE_FEASTS
):
# collect but don't handle until we know the year
movable_feast = child.type.split("__")[-1]

# if a tree, recurse on children to get date parts
if isinstance(child, Tree) and child.children:
parts.update(self._get_date_parts(child.children))

# if date fields were found, add to dictionary
if field and value:
# currently all date parts are integer only
parts[str(field)] = int(value)

# if date is a movable feast, calculate relative to Easter based on the year
if movable_feast is not None:
try:
year = parts["year"]
except KeyError:
raise ValueError("Year is required for movable feasts")
offset = MOVABLE_FEASTS[movable_feast]

holiday_date = datetime.date(*holidays.easter(year)) + datetime.timedelta(
days=offset
)
parts.update({"month": holiday_date.month, "day": holiday_date.day})
Comment thread
coderabbitai[bot] marked this conversation as resolved.

return parts


class HolidayDateConverter(BaseDateConverter):
"""
Converter for Christian liturgical dates.

Supports fixed-date holidays (Epiphany, Candlemass, etc.) and
Easter-relative movable feasts (Easter, Ash Wednesday, Pentecost, etc.).

Example usage::

Undate.parse("Easter 1942", "holidays")
Undate.parse("Ash Wednesday 1942", "holidays")
Undate.parse("Epiphany", "holidays")

Does not support serialization.
"""

name = "holidays"

def __init__(self):
self.transformer = HolidayTransformer()

def parse(self, value: str) -> Undate:
if not value:
raise ValueError("Parsing empty string is not supported")

try:
parsetree = parser.parse(value)
# transform the parse tree into an undate or undate interval
undate_obj = self.transformer.transform(parsetree)
# set the input holiday text as a label on the undate object
undate_obj.label = value
return undate_obj
except UnexpectedInput as err:
raise ValueError(f"Could not parse '{value}' as a holiday date") from err

def to_string(self, undate: Undate) -> str:
raise ValueError("Holiday converter does not support serialization")
14 changes: 13 additions & 1 deletion src/undate/date.py
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,7 @@ def weekday(self) -> Optional[int]:
thursday_week = self.astype("datetime64[W]")
days_from_thursday = (self - thursday_week).astype(int)
# if monday is 0, thursday is 3
return (days_from_thursday + 3) % 7
return int((days_from_thursday + 3) % 7)

return None

Expand All @@ -280,6 +280,18 @@ def __sub__(self, other):
# NOTE: add should not be subclassed because we want to return a Date, not a delta


class Weekday(IntEnum):
"""Weekday as an integer, compatible with :meth:`datetime.date.weekday`."""

MONDAY = 0
TUESDAY = 1
WEDNESDAY = 2
THURSDAY = 3
FRIDAY = 4
SATURDAY = 5
SUNDAY = 6


class DatePrecision(IntEnum):
"""date precision, to indicate date precision independent from how much
of the date is known."""
Expand Down
6 changes: 6 additions & 0 deletions tests/test_converters/test_combined_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,12 @@
("13 Jan 1602", Undate(1602, 1, 13, calendar="Gregorian")),
("2022 ugu. 4", Undate(2022, 11, 4, calendar="Gregorian")),
("18 avril", Undate(month=4, day=18, calendar="Gregorian")),
# Christian liturgical dates
("Easter 1942", Undate(1942, 4, 5)),
("Epiphany 1921", Undate(1921, 1, 6)),
("Pentecost 2016", Undate(2016, 5, 15)),
("Ash Wednesday 2000", Undate(2000, 3, 8)),
("Whit Monday 2023", Undate(2023, 5, 29)),
]


Expand Down
Loading
Loading