102 lines
3.1 KiB
Python
102 lines
3.1 KiB
Python
from datetime import datetime
|
|
from urllib.parse import urljoin
|
|
|
|
import requests
|
|
from bs4 import BeautifulSoup
|
|
|
|
from app.models import RegionScope, WatchType
|
|
from app.providers.utils import normalize_search_text
|
|
|
|
|
|
class BarclaysArenaProvider:
|
|
source_name = "barclays_arena"
|
|
events_url = "https://www.barclays-arena.de/events"
|
|
|
|
def search_events(
|
|
self,
|
|
term: str,
|
|
watch_type: WatchType,
|
|
region_scope: RegionScope,
|
|
) -> list[dict]:
|
|
response = requests.get(
|
|
self.events_url,
|
|
headers={"User-Agent": "Mozilla/5.0"},
|
|
timeout=30,
|
|
)
|
|
response.raise_for_status()
|
|
soup = BeautifulSoup(response.text, "html.parser")
|
|
|
|
normalized_term = normalize_search_text(term)
|
|
results: list[dict] = []
|
|
|
|
headings = soup.find_all("h3")
|
|
for heading in headings:
|
|
title = heading.get_text(" ", strip=True)
|
|
if not title:
|
|
continue
|
|
|
|
subtitle = ""
|
|
subtitle_el = heading.find_next("h4")
|
|
if subtitle_el:
|
|
subtitle = subtitle_el.get_text(" ", strip=True)
|
|
|
|
haystack = normalize_search_text(f"{title} {subtitle}")
|
|
if normalized_term not in haystack:
|
|
continue
|
|
|
|
date_text = self._find_previous_date_text(heading)
|
|
event_date = self._parse_german_date(date_text)
|
|
|
|
link = heading.find_previous("a", href=True)
|
|
if link is None:
|
|
continue
|
|
|
|
results.append(
|
|
{
|
|
"external_id": link["href"],
|
|
"title": title,
|
|
"matched_term": term,
|
|
"venue_name": "Barclays Arena",
|
|
"city": "Hamburg",
|
|
"country_code": "DE",
|
|
"event_date": event_date,
|
|
"ticket_url": urljoin(self.events_url, link["href"]),
|
|
"image_url": None,
|
|
"raw_payload": {
|
|
"title": title,
|
|
"subtitle": subtitle,
|
|
"date_text": date_text,
|
|
"href": link["href"],
|
|
},
|
|
}
|
|
)
|
|
|
|
self.last_status = "ok"
|
|
self.last_message = (
|
|
f"Barclays Arena returned {len(results)} matched events for term '{term}'."
|
|
)
|
|
return results
|
|
|
|
def _find_previous_date_text(self, heading) -> str | None:
|
|
current = heading.previous_sibling
|
|
while current is not None:
|
|
text = getattr(current, "get_text", lambda *args, **kwargs: str(current))(
|
|
" ", strip=True
|
|
)
|
|
if text and "|" in text:
|
|
return text
|
|
current = current.previous_sibling
|
|
return None
|
|
|
|
def _parse_german_date(self, value: str | None) -> datetime | None:
|
|
if not value:
|
|
return None
|
|
parts = [part.strip() for part in value.split("|")]
|
|
if len(parts) < 2:
|
|
return None
|
|
try:
|
|
return datetime.strptime(parts[1], "%d.%m.%Y")
|
|
except ValueError:
|
|
return None
|
|
|