Files
eventlens/backend/app/providers/barclays_arena.py
T
2026-04-18 14:23:24 +02:00

134 lines
4.3 KiB
Python

from datetime import datetime
import re
from urllib.parse import urljoin
import requests
from bs4 import BeautifulSoup
from app.models import RegionScope, WatchType
from app.providers.utils import normalize_search_text
class BarclaysArenaProvider:
source_name = "barclays_arena"
events_url = "https://www.barclays-arena.de/events/search"
def search_events(
self,
term: str,
watch_type: WatchType,
region_scope: RegionScope,
) -> list[dict]:
response = requests.get(
self.events_url,
headers={"User-Agent": "Mozilla/5.0"},
timeout=30,
)
response.raise_for_status()
soup = BeautifulSoup(response.text, "html.parser")
normalized_term = normalize_search_text(term)
results: list[dict] = []
for heading in soup.find_all("h3"):
title = heading.get_text(" ", strip=True)
if not title:
continue
subtitle_el = heading.find_next_sibling("h4")
subtitle = subtitle_el.get_text(" ", strip=True) if subtitle_el else ""
# Keep matching local to the actual heading/subtitle pair. Wider
# parent containers often contain several event cards.
haystack = normalize_search_text(f"{title} {subtitle}")
if normalized_term not in haystack:
continue
detail_link = self._find_card_link(heading)
if detail_link is None:
continue
date_text = self._find_card_date_text(heading)
event_date = self._parse_german_date(date_text)
href = detail_link["href"]
results.append(
{
"external_id": href,
"title": title,
"matched_term": term,
"venue_name": "Barclays Arena",
"city": "Hamburg",
"country_code": "DE",
"event_date": event_date,
"ticket_url": urljoin(self.events_url, href),
"image_url": None,
"raw_payload": {
"title": title,
"subtitle": subtitle,
"date_text": date_text,
"href": href,
},
}
)
unique_results: dict[str, dict] = {}
for result in results:
unique_results[result["external_id"]] = result
self.last_status = "ok"
self.last_message = (
f"Barclays Arena returned {len(unique_results)} matched events for term '{term}'."
)
return list(unique_results.values())
def _find_card_link(self, heading):
link = heading.find_parent("a", href=re.compile(r"/events/"))
if link is not None:
return link
current = heading
for _ in range(5):
current = current.parent
if current is None:
return None
link = current.find("a", href=re.compile(r"/events/"))
if link is not None and heading in link.find_all("h3"):
return link
return None
def _find_card_date_text(self, heading) -> str | None:
current = heading
for _ in range(6):
current = current.previous_element
if current is None:
return None
text = getattr(current, "get_text", lambda *args, **kwargs: str(current))(
" ", strip=True
)
date_text = self._extract_date_text(text)
if date_text:
return date_text
return None
def _extract_date_text(self, text: str) -> str | None:
match = re.search(
r"(Montag|Dienstag|Mittwoch|Donnerstag|Freitag|Samstag|Sonntag)\s*\|\s*(\d{2}\.\d{2}\.\d{4})",
text,
)
if match:
return match.group(2)
match = re.search(r"\b(\d{2}\.\d{2}\.\d{4})\b", text)
if match:
return match.group(1)
return None
def _parse_german_date(self, value: str | None) -> datetime | None:
if not value:
return None
try:
return datetime.strptime(value, "%d.%m.%Y")
except ValueError:
return None