From af17d336eb67d155f8013533a506ead0da9631dd Mon Sep 17 00:00:00 2001 From: David Slusser Date: Sun, 8 Feb 2026 14:17:05 -0800 Subject: [PATCH 1/6] updating meetup event ingestion for online events --- .github/workflows/bandit.yaml | 1 + .github/workflows/isort.yaml | 1 + .github/workflows/mypy.yaml | 1 + .github/workflows/radon.yaml | 1 + .github/workflows/ruff_format.yaml | 1 + .github/workflows/ruff_lint.yaml | 1 + .../web/scripts/ingest_events.py | 7 +- .../web/utilities/scrapers/meetup.py | 85 ++++++++++++++++--- 8 files changed, 86 insertions(+), 12 deletions(-) diff --git a/.github/workflows/bandit.yaml b/.github/workflows/bandit.yaml index 7f52e3c..6176d80 100644 --- a/.github/workflows/bandit.yaml +++ b/.github/workflows/bandit.yaml @@ -9,6 +9,7 @@ jobs: bandit: runs-on: ubuntu-latest name: "bandit" + if: github.event.created == false # Skip if this push created a new branch steps: - uses: davidslusser/actions_python_bandit@v1.0.1 with: diff --git a/.github/workflows/isort.yaml b/.github/workflows/isort.yaml index f7564e3..3e29a4d 100644 --- a/.github/workflows/isort.yaml +++ b/.github/workflows/isort.yaml @@ -9,6 +9,7 @@ jobs: isort: runs-on: ubuntu-latest name: "isort" + if: github.event.created == false # Skip if this push created a new branch steps: - uses: davidslusser/actions_python_isort@v1.0.1 with: diff --git a/.github/workflows/mypy.yaml b/.github/workflows/mypy.yaml index 1da7730..d8df3a5 100644 --- a/.github/workflows/mypy.yaml +++ b/.github/workflows/mypy.yaml @@ -9,6 +9,7 @@ jobs: mypy: runs-on: ubuntu-latest name: "mypy" + if: github.event.created == false # Skip if this push created a new branch steps: - uses: davidslusser/actions_python_mypy@v1.0.1 with: diff --git a/.github/workflows/radon.yaml b/.github/workflows/radon.yaml index 8277e48..516014c 100644 --- a/.github/workflows/radon.yaml +++ b/.github/workflows/radon.yaml @@ -9,6 +9,7 @@ jobs: radon: runs-on: ubuntu-latest name: "radon" + if: github.event.created == false # Skip if this push created a new branch steps: - uses: actions/checkout@v3 - uses: davidslusser/actions_python_radon@v1.0.0 diff --git a/.github/workflows/ruff_format.yaml b/.github/workflows/ruff_format.yaml index 8bfcd67..f821266 100644 --- a/.github/workflows/ruff_format.yaml +++ b/.github/workflows/ruff_format.yaml @@ -9,6 +9,7 @@ jobs: ruff: runs-on: ubuntu-latest name: "ruff" + if: github.event.created == false # Skip if this push created a new branch steps: - name: actions_python_ruff uses: davidslusser/actions_python_ruff@v1.0.3 diff --git a/.github/workflows/ruff_lint.yaml b/.github/workflows/ruff_lint.yaml index 9728d31..20866de 100644 --- a/.github/workflows/ruff_lint.yaml +++ b/.github/workflows/ruff_lint.yaml @@ -9,6 +9,7 @@ jobs: ruff: runs-on: ubuntu-latest name: "ruff" + if: github.event.created == false # Skip if this push created a new branch steps: - name: actions_python_ruff uses: davidslusser/actions_python_ruff@v1.0.3 diff --git a/src/django_project/web/scripts/ingest_events.py b/src/django_project/web/scripts/ingest_events.py index 789b614..075723e 100644 --- a/src/django_project/web/scripts/ingest_events.py +++ b/src/django_project/web/scripts/ingest_events.py @@ -15,7 +15,10 @@ def get_eventbright_events(): def get_meetup_events() -> None: - tech_group_list = TechGroup.objects.filter(enabled=True, platform__name="Meetup") + # tech_group_list = TechGroup.objects.filter( + # enabled=True, platform__name="Meetup", name="Coeur d'Alene & Spokane WordPress" + # ) + tech_group_list = TechGroup.objects.filter(enabled=True, platform__name="Meetup", name__icontains="python") for group in tech_group_list: print("INFO: getting upcoming events for ", group.name) job = ingest_future_meetup_events.s(group.pk) @@ -23,5 +26,5 @@ def get_meetup_events() -> None: def run(): - get_eventbright_events() + # get_eventbright_events() get_meetup_events() diff --git a/src/django_project/web/utilities/scrapers/meetup.py b/src/django_project/web/utilities/scrapers/meetup.py index 63d6207..654e252 100644 --- a/src/django_project/web/utilities/scrapers/meetup.py +++ b/src/django_project/web/utilities/scrapers/meetup.py @@ -13,7 +13,7 @@ def get_end_datetime(datetime_string: str, time_string: str) -> datetime | None: Args: datetime_string (str): string representation of a datetime; example: '2025-01-06T07:00:00-08:00' - time_string (str): string representation of a time; example: '8:00 AM PST' + time_string (str): string representation of a time; example: '7:00 PM PST' Returns: datetime: datetime object with timezone information @@ -28,9 +28,21 @@ def get_end_datetime(datetime_string: str, time_string: str) -> datetime | None: offset = timedelta(hours=offset_hours, minutes=offset_minutes) tz = timezone(offset) - # Parse the time string - time_part: str = time_string.split(" ")[0].strip() # Get the time part - time_obj: datetime = datetime.strptime(time_part.replace(" ", ""), "%I:%M %p") # Parse 12-hour format + # Parse the time string using regex to extract the time part + match = re.search(r"(\d{1,2}):(\d{2})\s*([aApP][mM])", time_string) + if not match: + return None + hour = int(match.group(1)) + minute = int(match.group(2)) + period = match.group(3).upper() + + # Convert 12-hour format to 24-hour format + if period == "PM" and hour != 12: + hour += 12 + elif period == "AM" and hour == 12: + hour = 0 + + time_obj = datetime.min.replace(hour=hour, minute=minute) # Combine date and time into a new datetime object combined_datetime: datetime = datetime.combine(datetime.strptime(date_part, "%Y-%m-%d").date(), time_obj.time()) @@ -72,25 +84,78 @@ def get_event_information(url: str) -> dict: if isinstance(description_div, Tag): # Type check for Tag event_info["description"] = "".join(str(child) for child in description_div.children) - time_element: PageElement | Tag | NavigableString | None = soup.find("time") + time_element: PageElement | Tag | NavigableString | None = soup.find("time", class_="block") if time_element: if isinstance(time_element, Tag): # Check if time_element is a Tag start_time_string: str | AttributeValueList | None = time_element.get("datetime", None) time_text: str = time_element.get_text(separator=" ").strip() - end_time_string: str = time_text.split(" to ")[-1] if start_time_string: if isinstance(start_time_string, str): # Check if start_time_string is a str - event_info["start_datetime"] = datetime.fromisoformat(start_time_string) - event_info["end_datetime"] = get_end_datetime(start_time_string, end_time_string) + start_dt = datetime.fromisoformat(start_time_string) + event_info["start_datetime"] = start_dt + + # Parse duration from the time text which shows times in UTC + # Format: "Friday, Feb 13 ยท 2:00 AM to 3:00 AM UTC" + # We calculate the duration and add it to start_dt to preserve timezone + if " to " in time_text: + time_parts = time_text.split(" to ") + if len(time_parts) == 2: + # Extract start time from text (in UTC) + start_match = re.search(r"(\d{1,2}):(\d{2})\s*([APap][Mm])", time_parts[0]) + # Extract end time from text (in UTC) + end_match = re.search(r"(\d{1,2}):(\d{2})\s*([APap][Mm])", time_parts[1]) + + if start_match and end_match: + # Parse start time (UTC) + start_hour = int(start_match.group(1)) + start_minute = int(start_match.group(2)) + start_period = start_match.group(3).upper() + if start_period == "PM" and start_hour != 12: + start_hour += 12 + elif start_period == "AM" and start_hour == 12: + start_hour = 0 + + # Parse end time (UTC) + end_hour = int(end_match.group(1)) + end_minute = int(end_match.group(2)) + end_period = end_match.group(3).upper() + if end_period == "PM" and end_hour != 12: + end_hour += 12 + elif end_period == "AM" and end_hour == 12: + end_hour = 0 + + # Calculate duration in minutes + start_minutes = start_hour * 60 + start_minute + end_minutes = end_hour * 60 + end_minute + + # Handle overnight events + if end_minutes <= start_minutes: + end_minutes += 24 * 60 + + duration_minutes = end_minutes - start_minutes + + # Add duration to start_dt to get end_dt in the same timezone + end_dt = start_dt + timedelta(minutes=duration_minutes) + event_info["end_datetime"] = end_dt + else: + event_info["end_datetime"] = None + else: + event_info["end_datetime"] = None + else: + event_info["end_datetime"] = None location_name: str | Any = None match = re.search(r'"__typename":"Venue","id":"\d+","name":"([^"]+)"', page_content) if match: location_name = match.group(1) + if not location_name: + online_p = soup.find("p", class_="ds2-k16 text-ds2-text-fill-primary-enabled", string="Online event") + if online_p: + location_name = "Online event" event_info["location_name"] = location_name - location_address: str | Any = None + location_address: str = "" address_match: re.Match[str] | None = re.search( r'"__typename":"Venue","id":"\d+","name":"[^"]+","address":"([^"]+)","city":"([^"]+)","state":"([^"]+)","country":"([^"]+)"', page_content, @@ -103,7 +168,7 @@ def get_event_information(url: str) -> dict: location_address = f"{street}, {city}, {state}, {country.upper()}" event_info["location_address"] = location_address - map_link: str | Any = None + map_link: str = "" map_link_match: re.Match[str] | None = re.search( r']*data-testid="map-link"[^>]*href="([^"]+)"', page_content ) From 48d7ffbdda5da40395063496b9aa9e3c99d53ba4 Mon Sep 17 00:00:00 2001 From: David Slusser Date: Sun, 8 Feb 2026 14:18:20 -0800 Subject: [PATCH 2/6] revert ingest_events --- src/django_project/web/scripts/ingest_events.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/django_project/web/scripts/ingest_events.py b/src/django_project/web/scripts/ingest_events.py index 075723e..789b614 100644 --- a/src/django_project/web/scripts/ingest_events.py +++ b/src/django_project/web/scripts/ingest_events.py @@ -15,10 +15,7 @@ def get_eventbright_events(): def get_meetup_events() -> None: - # tech_group_list = TechGroup.objects.filter( - # enabled=True, platform__name="Meetup", name="Coeur d'Alene & Spokane WordPress" - # ) - tech_group_list = TechGroup.objects.filter(enabled=True, platform__name="Meetup", name__icontains="python") + tech_group_list = TechGroup.objects.filter(enabled=True, platform__name="Meetup") for group in tech_group_list: print("INFO: getting upcoming events for ", group.name) job = ingest_future_meetup_events.s(group.pk) @@ -26,5 +23,5 @@ def get_meetup_events() -> None: def run(): - # get_eventbright_events() + get_eventbright_events() get_meetup_events() From ba607fba2500c9eb0451df9aa0f235a09307308e Mon Sep 17 00:00:00 2001 From: David Slusser Date: Sun, 8 Feb 2026 14:22:59 -0800 Subject: [PATCH 3/6] update python version in workflows --- .github/workflows/bandit.yaml | 2 +- .github/workflows/fawltydeps.yaml | 2 +- .github/workflows/isort.yaml | 2 +- .github/workflows/mypy.yaml | 1 + .github/workflows/pytest.yaml | 2 +- .github/workflows/ruff_format.yaml | 2 +- .github/workflows/ruff_lint.yaml | 2 +- 7 files changed, 7 insertions(+), 6 deletions(-) diff --git a/.github/workflows/bandit.yaml b/.github/workflows/bandit.yaml index 6176d80..6241ca7 100644 --- a/.github/workflows/bandit.yaml +++ b/.github/workflows/bandit.yaml @@ -16,4 +16,4 @@ jobs: src: "src" options: "-c pyproject.toml -r" pip_install_command: "pip install .[dev]" - python_version: "3.11" + python_version: "3.13" diff --git a/.github/workflows/fawltydeps.yaml b/.github/workflows/fawltydeps.yaml index cbd1d42..4f709ad 100644 --- a/.github/workflows/fawltydeps.yaml +++ b/.github/workflows/fawltydeps.yaml @@ -12,7 +12,7 @@ jobs: - uses: actions/checkout@v4 - uses: actions/setup-python@v4 with: - python-version: "3.11" + python-version: "3.13" - name: Install dependencies run: | pip install -e .[dev] diff --git a/.github/workflows/isort.yaml b/.github/workflows/isort.yaml index 3e29a4d..5b52f95 100644 --- a/.github/workflows/isort.yaml +++ b/.github/workflows/isort.yaml @@ -15,4 +15,4 @@ jobs: with: src: "src/django_project" options: "--check --diff" - python_version: "3.11" + python_version: "3.13" diff --git a/.github/workflows/mypy.yaml b/.github/workflows/mypy.yaml index d8df3a5..a9d8de1 100644 --- a/.github/workflows/mypy.yaml +++ b/.github/workflows/mypy.yaml @@ -16,3 +16,4 @@ jobs: src: "src" options: "-v" pip_install_command: "pip install -e .[dev]" + python_version: "3.13" diff --git a/.github/workflows/pytest.yaml b/.github/workflows/pytest.yaml index d432e08..5ada98d 100644 --- a/.github/workflows/pytest.yaml +++ b/.github/workflows/pytest.yaml @@ -14,4 +14,4 @@ jobs: src: "" options: "" pip_install_command: "pip install -e .[dev]" - python_version: "3.11" + python_version: "3.13" diff --git a/.github/workflows/ruff_format.yaml b/.github/workflows/ruff_format.yaml index f821266..ea1f7fb 100644 --- a/.github/workflows/ruff_format.yaml +++ b/.github/workflows/ruff_format.yaml @@ -16,4 +16,4 @@ jobs: with: src: "src/django_project" command: ruff format src --check - python_version: "3.11" + python_version: "3.13" diff --git a/.github/workflows/ruff_lint.yaml b/.github/workflows/ruff_lint.yaml index 20866de..6d6545b 100644 --- a/.github/workflows/ruff_lint.yaml +++ b/.github/workflows/ruff_lint.yaml @@ -16,4 +16,4 @@ jobs: with: src: "src/django_project" options: "-v" - python_version: "3.11" + python_version: "3.13" From f6d3d80a69a50a6583e45b0316eb5be9e3545d67 Mon Sep 17 00:00:00 2001 From: David Slusser Date: Sun, 8 Feb 2026 14:27:10 -0800 Subject: [PATCH 4/6] removing log option from mypy --- .github/workflows/mypy.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/mypy.yaml b/.github/workflows/mypy.yaml index a9d8de1..ac392b3 100644 --- a/.github/workflows/mypy.yaml +++ b/.github/workflows/mypy.yaml @@ -14,6 +14,6 @@ jobs: - uses: davidslusser/actions_python_mypy@v1.0.1 with: src: "src" - options: "-v" + options: "" pip_install_command: "pip install -e .[dev]" python_version: "3.13" From 1dba4ba4a03c1957ad9d92d01f2d3c4054ddf425 Mon Sep 17 00:00:00 2001 From: David Slusser Date: Sun, 8 Feb 2026 14:31:57 -0800 Subject: [PATCH 5/6] mypy error --- src/django_project/web/utilities/scrapers/meetup.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/django_project/web/utilities/scrapers/meetup.py b/src/django_project/web/utilities/scrapers/meetup.py index 654e252..523c37c 100644 --- a/src/django_project/web/utilities/scrapers/meetup.py +++ b/src/django_project/web/utilities/scrapers/meetup.py @@ -5,6 +5,7 @@ from bs4 import BeautifulSoup, Tag from bs4.element import AttributeValueList, NavigableString, PageElement + from web.utilities.html_utils import fetch_content, fetch_content_with_playwright @@ -150,7 +151,9 @@ def get_event_information(url: str) -> dict: if match: location_name = match.group(1) if not location_name: - online_p = soup.find("p", class_="ds2-k16 text-ds2-text-fill-primary-enabled", string="Online event") + online_p = soup.find( + "p", attrs={"class": "ds2-k16 text-ds2-text-fill-primary-enabled"}, string="Online event" + ) if online_p: location_name = "Online event" event_info["location_name"] = location_name From 44a8b40081ae7f9d1971bb63fba5f37dc3a94bf7 Mon Sep 17 00:00:00 2001 From: David Slusser Date: Sun, 8 Feb 2026 15:13:28 -0800 Subject: [PATCH 6/6] resolving mypy --- src/django_project/web/utilities/scrapers/meetup.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/django_project/web/utilities/scrapers/meetup.py b/src/django_project/web/utilities/scrapers/meetup.py index 523c37c..989789d 100644 --- a/src/django_project/web/utilities/scrapers/meetup.py +++ b/src/django_project/web/utilities/scrapers/meetup.py @@ -5,7 +5,6 @@ from bs4 import BeautifulSoup, Tag from bs4.element import AttributeValueList, NavigableString, PageElement - from web.utilities.html_utils import fetch_content, fetch_content_with_playwright @@ -151,10 +150,8 @@ def get_event_information(url: str) -> dict: if match: location_name = match.group(1) if not location_name: - online_p = soup.find( - "p", attrs={"class": "ds2-k16 text-ds2-text-fill-primary-enabled"}, string="Online event" - ) - if online_p: + online_p = soup.find("p", class_="ds2-k16 text-ds2-text-fill-primary-enabled") + if online_p and online_p.get_text(strip=True) == "Online event": location_name = "Online event" event_info["location_name"] = location_name