auf Webseiten Modell umgestellt

2026-04-18 14:23:24 +02:00
parent 5510d58e5a
commit 6cfbdba0a4
9 changed files with 890 additions and 30 deletions
@@ -114,3 +114,4 @@ sudo docker compose up -d --build
 - Bandsintown benoetigt eine echte, von Bandsintown freigeschaltete App-ID. Ohne diese wird der Provider deaktiviert oder als `blocked` angezeigt.
 - Barclays Arena wird ueber die offizielle Eventseite der Arena abgefragt.
 - Fabrik wird ueber die offizielle Veranstaltungsseite der Fabrik Hamburg abgefragt.
+- Fuer robuste persoenliche Ueberwachung koennen pro Watchlist-Eintrag direkte Quellen-URLs hinterlegt werden. Diese werden beim Sync gezielt per JSON-LD und HTML-Textscan durchsucht.
@@ -3,6 +3,7 @@ const state = {
  events: [],
  notifications: [],
  providerStatuses: [],
+  watchSources: [],
 };

 const watchItemsEl = document.querySelector("#watch-items");
@@ -92,6 +93,12 @@ function renderStats() {
 }

 function prettifyProviderName(value) {
+  if (value?.startsWith("source:")) {
+    const sourceId = Number(value.split(":")[1]);
+    const source = state.watchSources.find((entry) => entry.id === sourceId);
+    return source?.label || "Direkte Quelle";
+  }
+
  const names = {
    ticketmaster: "Ticketmaster",
    bandsintown: "Bandsintown",
@@ -206,12 +213,71 @@ function renderWatchItems() {
            </div>
          </div>
          <p>${escapeHtml(item.notes || "Keine Notiz hinterlegt.")}</p>
+          ${renderSourceList(item.id)}
+          <form class="source-form" data-watch-id="${item.id}">
+            <input
+              name="label"
+              type="text"
+              placeholder="Quelle, z. B. Kuenstlerseite"
+            />
+            <input
+              name="url"
+              type="url"
+              placeholder="https://..."
+              required
+            />
+            <button type="submit" class="action-button success">Quelle hinzufuegen</button>
+          </form>
        </article>
      `
    )
    .join("");
 }

+function renderSourceList(watchItemId) {
+  const sources = state.watchSources.filter((source) => source.watch_item_id === watchItemId);
+  if (!sources.length) {
+    return '<div class="source-list muted">Noch keine direkten Quellen hinterlegt.</div>';
+  }
+
+  return `
+    <div class="source-list">
+      ${sources
+        .map(
+          (source) => `
+            <div class="source-row">
+              <div>
+                <strong>${escapeHtml(source.label || "Quelle")}</strong>
+                <a href="${escapeHtml(source.url)}" target="_blank" rel="noreferrer">
+                  ${escapeHtml(source.url)}
+                </a>
+                <div class="pill-row">
+                  <span class="pill ${
+                    source.last_status === "ok"
+                      ? "success"
+                      : source.last_status === "error"
+                        ? "danger"
+                        : "warning"
+                  }">${escapeHtml(source.last_status)}</span>
+                  <span class="muted">${escapeHtml(source.last_message || "Noch nicht gescannt.")}</span>
+                </div>
+              </div>
+              <div class="action-row">
+                <button class="action-button" data-action="toggle-source" data-id="${source.id}">
+                  ${source.is_active ? "Pausieren" : "Aktivieren"}
+                </button>
+                <button class="action-button danger" data-action="delete-source" data-id="${source.id}">
+                  Loeschen
+                </button>
+              </div>
+            </div>
+          `
+        )
+        .join("")}
+    </div>
+  `;
+}
+
 function getWatchNameById(id) {
  return state.watchItems.find((item) => item.id === id)?.name || `Watch #${id}`;
 }
@@ -270,6 +336,13 @@ function renderEvents() {
              >
                ${event.is_ticket_purchased ? "Ticket entfernen" : "Ticket gekauft"}
              </button>
+              <button
+                class="action-button danger"
+                data-action="delete-event"
+                data-id="${event.id}"
+              >
+                Loeschen
+              </button>
            </div>
          </div>
          <div class="event-meta">
@@ -328,17 +401,19 @@ function updateSyncStatus(message) {
 }

 async function loadData() {
-  const [watchItems, events, notifications, providerStatuses] = await Promise.all([
+  const [watchItems, events, notifications, providerStatuses, watchSources] = await Promise.all([
    apiFetch("/watch-items"),
    apiFetch("/events"),
    apiFetch("/notifications"),
    apiFetch("/provider-statuses"),
+    apiFetch("/watch-sources"),
  ]);

  state.watchItems = watchItems;
  state.events = events;
  state.notifications = notifications;
  state.providerStatuses = providerStatuses;
+  state.watchSources = watchSources;

  renderStats();
  renderWatchItems();
@@ -433,7 +508,60 @@ document.addEventListener("click", async (event) => {
      });
      await loadData();
      showToast("Ticketstatus aktualisiert.");
+      return;
    }
+
+    if (action === "delete-event") {
+      await apiFetch(`/events/${id}`, { method: "DELETE" });
+      await loadData();
+      showToast("Event geloescht.");
+      return;
+    }
+
+    if (action === "delete-source") {
+      await apiFetch(`/watch-sources/${id}`, { method: "DELETE" });
+      await loadData();
+      showToast("Quelle geloescht.");
+      return;
+    }
+
+    if (action === "toggle-source") {
+      const source = state.watchSources.find((entry) => entry.id === Number(id));
+      await apiFetch(`/watch-sources/${id}`, {
+        method: "PATCH",
+        body: JSON.stringify({ is_active: !source.is_active }),
+      });
+      await loadData();
+      showToast("Quellenstatus aktualisiert.");
+    }
+  } catch (error) {
+    showToast(error.message);
+  }
+});
+
+document.addEventListener("submit", async (event) => {
+  const form = event.target.closest(".source-form");
+  if (!form) {
+    return;
+  }
+
+  event.preventDefault();
+  const watchId = form.dataset.watchId;
+  const formData = new FormData(form);
+  const payload = {
+    label: formData.get("label")?.toString().trim() || null,
+    url: formData.get("url")?.toString().trim(),
+    parser_type: "auto",
+  };
+
+  try {
+    await apiFetch(`/watch-items/${watchId}/sources`, {
+      method: "POST",
+      body: JSON.stringify(payload),
+    });
+    form.reset();
+    await loadData();
+    showToast("Quelle hinzugefuegt.");
  } catch (error) {
    showToast(error.message);
  }
@@ -381,6 +381,37 @@ button:hover,
  line-height: 1.55;
 }

+.source-list {
+  display: grid;
+  gap: 10px;
+  margin-top: 16px;
+}
+
+.source-row {
+  display: flex;
+  align-items: start;
+  justify-content: space-between;
+  gap: 16px;
+  padding: 14px;
+  border-radius: var(--radius-sm);
+  background: rgba(46, 39, 30, 0.05);
+}
+
+.source-row a {
+  display: block;
+  max-width: 46ch;
+  margin: 4px 0 8px;
+  overflow-wrap: anywhere;
+  color: var(--primary-dark);
+}
+
+.source-form {
+  display: grid;
+  grid-template-columns: 0.7fr 1.4fr auto;
+  gap: 10px;
+  margin-top: 14px;
+}
+
 .action-button {
  min-height: 38px;
  padding: 0 14px;
@@ -467,6 +498,7 @@ button:hover,
  }

  .watch-form,
+  .source-form,
  .status-panel {
    grid-template-columns: 1fr;
  }
@@ -8,7 +8,7 @@ from sqlalchemy.orm import Session

 from app.config import settings
 from app.database import Base, engine, get_db
-from app.models import TrackedEvent, WatchItem
+from app.models import TrackedEvent, WatchItem, WatchSource
 from app.scheduler import start_scheduler
 from app.schemas import (
    NotificationLogRead,
@@ -19,11 +19,15 @@ from app.schemas import (
    WatchItemCreate,
    WatchItemRead,
    WatchItemUpdate,
+    WatchSourceCreate,
+    WatchSourceRead,
+    WatchSourceUpdate,
 )
 from app.services import (
    list_events,
    list_notifications,
    list_provider_statuses,
+    list_watch_sources,
    list_watch_items,
    run_sync,
 )
@@ -114,6 +118,66 @@ def delete_watch_item(watch_item_id: int, db: Session = Depends(get_db)):
    db.commit()


+@app.get("/watch-sources", response_model=list[WatchSourceRead])
+def get_watch_sources(watch_item_id: int | None = None, db: Session = Depends(get_db)):
+    return list_watch_sources(db, watch_item_id)
+
+
+@app.post(
+    "/watch-items/{watch_item_id}/sources",
+    response_model=WatchSourceRead,
+    status_code=201,
+)
+def create_watch_source(
+    watch_item_id: int,
+    payload: WatchSourceCreate,
+    db: Session = Depends(get_db),
+):
+    watch_item = db.get(WatchItem, watch_item_id)
+    if watch_item is None:
+        raise HTTPException(status_code=404, detail="Watch item nicht gefunden.")
+
+    source = WatchSource(
+        watch_item=watch_item,
+        label=payload.label,
+        url=payload.url,
+        parser_type=payload.parser_type,
+    )
+    db.add(source)
+    db.commit()
+    db.refresh(source)
+    return source
+
+
+@app.patch("/watch-sources/{source_id}", response_model=WatchSourceRead)
+def update_watch_source(
+    source_id: int,
+    payload: WatchSourceUpdate,
+    db: Session = Depends(get_db),
+):
+    source = db.get(WatchSource, source_id)
+    if source is None:
+        raise HTTPException(status_code=404, detail="Quelle nicht gefunden.")
+
+    updates = payload.model_dump(exclude_unset=True)
+    for field_name, value in updates.items():
+        setattr(source, field_name, value)
+    source.updated_at = datetime.utcnow()
+    db.commit()
+    db.refresh(source)
+    return source
+
+
+@app.delete("/watch-sources/{source_id}", status_code=204)
+def delete_watch_source(source_id: int, db: Session = Depends(get_db)):
+    source = db.get(WatchSource, source_id)
+    if source is None:
+        raise HTTPException(status_code=404, detail="Quelle nicht gefunden.")
+
+    db.delete(source)
+    db.commit()
+
+
@app.get("/events", response_model=list[TrackedEventRead])
 def get_events(db: Session = Depends(get_db)):
    return list_events(db)
@@ -137,6 +201,16 @@ def update_purchase_status(
    return tracked_event


+@app.delete("/events/{event_id}", status_code=204)
+def delete_event(event_id: int, db: Session = Depends(get_db)):
+    tracked_event = db.get(TrackedEvent, event_id)
+    if tracked_event is None:
+        raise HTTPException(status_code=404, detail="Event nicht gefunden.")
+
+    db.delete(tracked_event)
+    db.commit()
+
+
@app.get("/notifications", response_model=list[NotificationLogRead])
 def get_notifications(db: Session = Depends(get_db)):
    return list_notifications(db)
@@ -35,6 +35,13 @@ class ProviderStatusType(str, Enum):
    error = "error"


+class SourceStatusType(str, Enum):
+    pending = "pending"
+    ok = "ok"
+    no_match = "no_match"
+    error = "error"
+
+
 class WatchItem(Base):
    __tablename__ = "watch_items"

@@ -57,6 +64,36 @@ class WatchItem(Base):
    tracked_events: Mapped[list["TrackedEvent"]] = relationship(
        back_populates="watch_item", cascade="all, delete-orphan"
    )
+    sources: Mapped[list["WatchSource"]] = relationship(
+        back_populates="watch_item", cascade="all, delete-orphan"
+    )
+
+
+class WatchSource(Base):
+    __tablename__ = "watch_sources"
+
+    id: Mapped[int] = mapped_column(Integer, primary_key=True, index=True)
+    watch_item_id: Mapped[int] = mapped_column(ForeignKey("watch_items.id"), nullable=False)
+    label: Mapped[str | None] = mapped_column(String(255), nullable=True)
+    url: Mapped[str] = mapped_column(String(1024), nullable=False)
+    parser_type: Mapped[str] = mapped_column(String(50), default="auto", nullable=False)
+    is_active: Mapped[bool] = mapped_column(Boolean, default=True, nullable=False)
+    last_status: Mapped[SourceStatusType] = mapped_column(
+        SqlEnum(SourceStatusType), default=SourceStatusType.pending, nullable=False
+    )
+    last_message: Mapped[str | None] = mapped_column(Text, nullable=True)
+    last_checked_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True)
+    created_at: Mapped[datetime] = mapped_column(
+        DateTime, default=datetime.utcnow, nullable=False
+    )
+    updated_at: Mapped[datetime] = mapped_column(
+        DateTime,
+        default=datetime.utcnow,
+        onupdate=datetime.utcnow,
+        nullable=False,
+    )
+
+    watch_item: Mapped[WatchItem] = relationship(back_populates="sources")


 class TrackedEvent(Base):
@@ -1,4 +1,5 @@
 from datetime import datetime
+import re
 from urllib.parse import urljoin

 import requests
@@ -10,7 +11,7 @@ from app.providers.utils import normalize_search_text

 class BarclaysArenaProvider:
    source_name = "barclays_arena"
-    events_url = "https://www.barclays-arena.de/events"
+    events_url = "https://www.barclays-arena.de/events/search"

    def search_events(
        self,
@@ -29,73 +30,104 @@ class BarclaysArenaProvider:
        normalized_term = normalize_search_text(term)
        results: list[dict] = []

-        headings = soup.find_all("h3")
-        for heading in headings:
+        for heading in soup.find_all("h3"):
            title = heading.get_text(" ", strip=True)
            if not title:
                continue

-            subtitle = ""
-            subtitle_el = heading.find_next("h4")
-            if subtitle_el:
-                subtitle = subtitle_el.get_text(" ", strip=True)
+            subtitle_el = heading.find_next_sibling("h4")
+            subtitle = subtitle_el.get_text(" ", strip=True) if subtitle_el else ""

+            # Keep matching local to the actual heading/subtitle pair. Wider
+            # parent containers often contain several event cards.
            haystack = normalize_search_text(f"{title} {subtitle}")
            if normalized_term not in haystack:
                continue

-            date_text = self._find_previous_date_text(heading)
-            event_date = self._parse_german_date(date_text)
-
-            link = heading.find_previous("a", href=True)
-            if link is None:
+            detail_link = self._find_card_link(heading)
+            if detail_link is None:
                continue

+            date_text = self._find_card_date_text(heading)
+            event_date = self._parse_german_date(date_text)
+            href = detail_link["href"]
+
            results.append(
                {
-                    "external_id": link["href"],
+                    "external_id": href,
                    "title": title,
                    "matched_term": term,
                    "venue_name": "Barclays Arena",
                    "city": "Hamburg",
                    "country_code": "DE",
                    "event_date": event_date,
-                    "ticket_url": urljoin(self.events_url, link["href"]),
+                    "ticket_url": urljoin(self.events_url, href),
                    "image_url": None,
                    "raw_payload": {
                        "title": title,
                        "subtitle": subtitle,
                        "date_text": date_text,
-                        "href": link["href"],
+                        "href": href,
                    },
                }
            )

+        unique_results: dict[str, dict] = {}
+        for result in results:
+            unique_results[result["external_id"]] = result
+
        self.last_status = "ok"
        self.last_message = (
-            f"Barclays Arena returned {len(results)} matched events for term '{term}'."
+            f"Barclays Arena returned {len(unique_results)} matched events for term '{term}'."
        )
-        return results
+        return list(unique_results.values())

-    def _find_previous_date_text(self, heading) -> str | None:
-        current = heading.previous_sibling
-        while current is not None:
+    def _find_card_link(self, heading):
+        link = heading.find_parent("a", href=re.compile(r"/events/"))
+        if link is not None:
+            return link
+
+        current = heading
+        for _ in range(5):
+            current = current.parent
+            if current is None:
+                return None
+            link = current.find("a", href=re.compile(r"/events/"))
+            if link is not None and heading in link.find_all("h3"):
+                return link
+        return None
+
+    def _find_card_date_text(self, heading) -> str | None:
+        current = heading
+        for _ in range(6):
+            current = current.previous_element
+            if current is None:
+                return None
            text = getattr(current, "get_text", lambda *args, **kwargs: str(current))(
                " ", strip=True
            )
-            if text and "|" in text:
-                return text
-            current = current.previous_sibling
+            date_text = self._extract_date_text(text)
+            if date_text:
+                return date_text
+        return None
+
+    def _extract_date_text(self, text: str) -> str | None:
+        match = re.search(
+            r"(Montag|Dienstag|Mittwoch|Donnerstag|Freitag|Samstag|Sonntag)\s*\|\s*(\d{2}\.\d{2}\.\d{4})",
+            text,
+        )
+        if match:
+            return match.group(2)
+
+        match = re.search(r"\b(\d{2}\.\d{2}\.\d{4})\b", text)
+        if match:
+            return match.group(1)
        return None

    def _parse_german_date(self, value: str | None) -> datetime | None:
        if not value:
            return None
-        parts = [part.strip() for part in value.split("|")]
-        if len(parts) < 2:
-            return None
        try:
-            return datetime.strptime(parts[1], "%d.%m.%Y")
+            return datetime.strptime(value, "%d.%m.%Y")
        except ValueError:
            return None
-
@@ -7,6 +7,7 @@ from app.models import (
    NotificationType,
    ProviderStatusType,
    RegionScope,
+    SourceStatusType,
    WatchType,
 )

@@ -39,6 +40,35 @@ class WatchItemRead(BaseModel):
    updated_at: datetime


+class WatchSourceCreate(BaseModel):
+    label: str | None = Field(default=None, max_length=255)
+    url: str = Field(min_length=8, max_length=1024)
+    parser_type: str = "auto"
+
+
+class WatchSourceUpdate(BaseModel):
+    label: str | None = Field(default=None, max_length=255)
+    url: str | None = Field(default=None, min_length=8, max_length=1024)
+    parser_type: str | None = None
+    is_active: bool | None = None
+
+
+class WatchSourceRead(BaseModel):
+    model_config = ConfigDict(from_attributes=True)
+
+    id: int
+    watch_item_id: int
+    label: str | None
+    url: str
+    parser_type: str
+    is_active: bool
+    last_status: SourceStatusType
+    last_message: str | None
+    last_checked_at: datetime | None
+    created_at: datetime
+    updated_at: datetime
+
+
 class PurchaseUpdate(BaseModel):
    is_ticket_purchased: bool

@@ -10,12 +10,15 @@ from app.models import (
    NotificationType,
    ProviderStatus,
    ProviderStatusType,
+    SourceStatusType,
    TrackedEvent,
    WatchItem,
+    WatchSource,
 )
 from app.notifications import send_email_notification
 from app.providers.registry import get_providers
 from app.schemas import SyncResult
+from app.source_scanner import SourceScanner


 logger = logging.getLogger(__name__)
@@ -71,6 +74,13 @@ def list_provider_statuses(db: Session) -> list[ProviderStatus]:
    return list(db.scalars(select(ProviderStatus).order_by(ProviderStatus.provider_name)))


+def list_watch_sources(db: Session, watch_item_id: int | None = None) -> list[WatchSource]:
+    stmt = select(WatchSource).order_by(WatchSource.created_at)
+    if watch_item_id is not None:
+        stmt = stmt.where(WatchSource.watch_item_id == watch_item_id)
+    return list(db.scalars(stmt))
+
+
 def update_provider_status(
    db: Session,
    provider_name: str,
@@ -261,6 +271,7 @@ def upsert_event(

 def run_sync(db: Session) -> SyncResult:
    providers = get_providers()
+    source_scanner = SourceScanner()
    provider_states = {
        provider.source_name: init_provider_sync_state(provider.source_name)
        for provider in providers
@@ -275,6 +286,75 @@ def run_sync(db: Session) -> SyncResult:
    notifications_skipped = 0

    for watch_item in active_items:
+        active_sources = [source for source in watch_item.sources if source.is_active]
+        for source in active_sources:
+            try:
+                events = source_scanner.scan(watch_item, source)
+                source.last_status = (
+                    SourceStatusType.ok if events else SourceStatusType.no_match
+                )
+                source.last_message = (
+                    f"{len(events)} passende Events gefunden."
+                    if events
+                    else "Keine passenden Events auf dieser Quelle gefunden."
+                )
+                source.last_checked_at = datetime.utcnow()
+            except Exception as exc:
+                logger.exception(
+                    "Source scan failed for watch_item=%s source=%s",
+                    watch_item.name,
+                    source.url,
+                )
+                db.rollback()
+                source.last_status = SourceStatusType.error
+                source.last_message = f"Scan fehlgeschlagen: {exc}"
+                source.last_checked_at = datetime.utcnow()
+                db.add(source)
+                db.commit()
+                continue
+
+            for event_data in events:
+                tracked_event, is_new = upsert_event(
+                    db=db,
+                    watch_item=watch_item,
+                    provider_name=f"source:{source.id}",
+                    event_data=event_data,
+                )
+                if is_new:
+                    new_events += 1
+                else:
+                    updated_events += 1
+
+                should_notify = (
+                    is_new
+                    and tracked_event.discovery_notified_at is None
+                    and not has_equivalent_existing_event(db, tracked_event)
+                )
+                if should_notify:
+                    status = send_email_notification(
+                        db=db,
+                        tracked_event=tracked_event,
+                        notification_type=NotificationType.discovery,
+                        subject=f"Neuer Termin fuer {watch_item.name}",
+                        body=(
+                            f"Es wurde ein neuer Termin fuer '{watch_item.name}' gefunden.\n\n"
+                            f"Quelle: {source.label or source.url}\n"
+                            f"Titel: {tracked_event.title}\n"
+                            f"Ort: {tracked_event.venue_name or 'unbekannt'}\n"
+                            f"Stadt: {tracked_event.city or 'unbekannt'}\n"
+                            f"Datum: {tracked_event.event_date or 'unbekannt'}\n"
+                            f"Tickets: {tracked_event.ticket_url or 'keine URL'}\n"
+                        ),
+                    )
+                    if status == NotificationStatus.sent:
+                        tracked_event.discovery_notified_at = datetime.utcnow()
+                        notifications_sent += 1
+                    else:
+                        notifications_skipped += 1
+
+            db.add(source)
+            db.commit()
+
        for provider in providers:
            try:
                events = provider.search_events(
@@ -0,0 +1,446 @@
+import json
+import re
+from datetime import datetime
+from html import unescape
+from urllib.parse import urljoin
+
+import requests
+from bs4 import BeautifulSoup
+
+from app.models import RegionScope, WatchItem, WatchSource, WatchType
+from app.providers.utils import normalize_search_text
+
+
+MONTH_ALIASES = {
+    "jan": 1,
+    "januar": 1,
+    "feb": 2,
+    "februar": 2,
+    "maer": 3,
+    "maerz": 3,
+    "mar": 3,
+    "maerz": 3,
+    "apr": 4,
+    "april": 4,
+    "mai": 5,
+    "jun": 6,
+    "juni": 6,
+    "jul": 7,
+    "juli": 7,
+    "aug": 8,
+    "august": 8,
+    "sep": 9,
+    "sept": 9,
+    "september": 9,
+    "okt": 10,
+    "oktober": 10,
+    "nov": 11,
+    "november": 11,
+    "dez": 12,
+    "dezember": 12,
+}
+
+
+class SourceScanner:
+    headers = {
+        "User-Agent": "eventlens/0.1 (+https://local)",
+        "Accept": "text/html,application/xhtml+xml,application/json",
+        "Accept-Language": "de-DE,de;q=0.9,en;q=0.7",
+    }
+
+    def scan(self, watch_item: WatchItem, source: WatchSource) -> list[dict]:
+        response = requests.get(
+            source.url,
+            headers=self.headers,
+            timeout=30,
+        )
+        response.raise_for_status()
+
+        content_type = response.headers.get("content-type", "")
+        if "application/json" in content_type:
+            return self._scan_json(watch_item, source, response.json())
+
+        return self._scan_html(watch_item, source, response.text)
+
+    def _scan_json(self, watch_item: WatchItem, source: WatchSource, payload) -> list[dict]:
+        events = self._extract_jsonld_events(payload)
+        return self._events_from_jsonld(watch_item, source, events)
+
+    def _scan_html(self, watch_item: WatchItem, source: WatchSource, html: str) -> list[dict]:
+        soup = BeautifulSoup(html, "html.parser")
+        jsonld_events = []
+
+        for script in soup.find_all("script", type="application/ld+json"):
+            raw_payload = script.string or script.get_text()
+            if not raw_payload:
+                continue
+            try:
+                payload = json.loads(unescape(raw_payload))
+            except json.JSONDecodeError:
+                continue
+            jsonld_events.extend(self._extract_jsonld_events(payload))
+
+        jsonld_results = self._events_from_jsonld(watch_item, source, jsonld_events)
+        if jsonld_results:
+            return jsonld_results
+
+        return self._events_from_html_text(watch_item, source, soup)
+
+    def _extract_jsonld_events(self, payload) -> list[dict]:
+        events: list[dict] = []
+        if isinstance(payload, list):
+            for item in payload:
+                events.extend(self._extract_jsonld_events(item))
+            return events
+
+        if not isinstance(payload, dict):
+            return events
+
+        graph = payload.get("@graph")
+        if isinstance(graph, list):
+            for item in graph:
+                events.extend(self._extract_jsonld_events(item))
+
+        item_type = payload.get("@type")
+        if isinstance(item_type, list):
+            is_event = "Event" in item_type
+        else:
+            is_event = item_type == "Event"
+        if is_event:
+            events.append(payload)
+
+        return events
+
+    def _events_from_jsonld(
+        self,
+        watch_item: WatchItem,
+        source: WatchSource,
+        events: list[dict],
+    ) -> list[dict]:
+        results: list[dict] = []
+        normalized_term = normalize_search_text(watch_item.name)
+
+        for event in events:
+            title = event.get("name") or ""
+            performers = self._extract_performer_names(event)
+            haystack = normalize_search_text(" ".join([title] + performers))
+            if normalized_term not in haystack:
+                continue
+
+            location = event.get("location") or {}
+            address = location.get("address") or {}
+            city = address.get("addressLocality") or location.get("name")
+            if watch_item.region_scope == RegionScope.hamburg and normalize_search_text(city) != "hamburg":
+                continue
+
+            event_date = self._parse_datetime(event.get("startDate"))
+            if event_date and event_date.date() < datetime.utcnow().date():
+                continue
+            ticket_url = event.get("url") or source.url
+
+            results.append(
+                {
+                    "external_id": str(event.get("@id") or ticket_url or f"{source.id}:{title}"),
+                    "title": title or watch_item.name,
+                    "matched_term": watch_item.name,
+                    "venue_name": location.get("name") or source.label,
+                    "city": city,
+                    "country_code": "DE",
+                    "event_date": event_date,
+                    "ticket_url": ticket_url,
+                    "image_url": self._extract_image(event),
+                    "raw_payload": event,
+                }
+            )
+
+        return results
+
+    def _events_from_html_text(
+        self,
+        watch_item: WatchItem,
+        source: WatchSource,
+        soup: BeautifulSoup,
+    ) -> list[dict]:
+        text = soup.get_text(" ", strip=True)
+        normalized_text = normalize_search_text(text)
+        normalized_term = normalize_search_text(watch_item.name)
+        if normalized_term not in normalized_text:
+            return []
+
+        results: list[dict] = []
+        seen_keys: set[str] = set()
+        for context in self._find_matching_contexts(soup, watch_item):
+            context_text = context.get_text(" ", strip=True)
+            event_date = self._find_nearest_date(context_text, watch_item.name)
+            if event_date is None:
+                continue
+            if event_date.date() < datetime.utcnow().date():
+                continue
+            if (
+                watch_item.region_scope == RegionScope.hamburg
+                and "hamburg" not in normalize_search_text(context_text)
+            ):
+                continue
+
+            title = self._find_title(context, watch_item.name)
+            link = self._find_nearest_link(context, watch_item.name, source.url) or source.url
+            key = f"{source.id}:{normalize_search_text(title)}:{event_date.date().isoformat()}"
+            if key in seen_keys:
+                continue
+            seen_keys.add(key)
+
+            results.append(
+                {
+                    "external_id": key,
+                    "title": title,
+                    "matched_term": watch_item.name,
+                    "venue_name": self._find_venue(context_text, source.label),
+                    "city": "Hamburg" if watch_item.region_scope == RegionScope.hamburg else None,
+                    "country_code": "DE",
+                    "event_date": event_date,
+                    "ticket_url": link,
+                    "image_url": None,
+                    "raw_payload": {
+                        "source_url": source.url,
+                        "parser": "html_text",
+                        "context": context_text[:1000],
+                    },
+                }
+            )
+
+        return results
+
+    def _extract_performer_names(self, event: dict) -> list[str]:
+        performer = event.get("performer") or event.get("performers")
+        if isinstance(performer, dict):
+            return [performer.get("name", "")]
+        if isinstance(performer, list):
+            return [item.get("name", "") for item in performer if isinstance(item, dict)]
+        return []
+
+    def _extract_image(self, event: dict) -> str | None:
+        image = event.get("image")
+        if isinstance(image, str):
+            return image
+        if isinstance(image, list):
+            for item in image:
+                if isinstance(item, str):
+                    return item
+        return None
+
+    def _parse_datetime(self, value: str | None) -> datetime | None:
+        if not value:
+            return None
+        try:
+            return datetime.fromisoformat(value.replace("Z", "+00:00")).replace(tzinfo=None)
+        except ValueError:
+            pass
+        for fmt in ("%d.%m.%Y", "%Y-%m-%d"):
+            try:
+                return datetime.strptime(value[:10], fmt)
+            except ValueError:
+                continue
+        return None
+
+    def _find_nearest_date(self, text: str, term: str) -> datetime | None:
+        normalized_term = normalize_search_text(term)
+        normalized_text = normalize_search_text(text)
+        term_index = normalized_text.find(normalized_term)
+        search_area = text
+        if term_index >= 0:
+            start = max(0, term_index - 300)
+            end = min(len(text), term_index + 500)
+            search_area = text[start:end]
+
+        candidates: list[datetime] = []
+        for pattern in (
+            r"\b(\d{1,2}\.\d{1,2}\.\d{4})\b",
+            r"\b(\d{1,2}\.\d{1,2}\.\d{2})\b",
+            r"\b(\d{1,2}\.\d{1,2}\.)\b",
+        ):
+            for match in re.finditer(pattern, search_area):
+                parsed = self._parse_german_date(match.group(1))
+                if parsed:
+                    candidates.append(parsed)
+
+        month_name_pattern = (
+            r"jan(?:uar)?|feb(?:ruar)?|m(?:ae|ä)r(?:z)?|apr(?:il)?|mai|jun(?:i)?|"
+            r"jul(?:i)?|aug(?:ust)?|sep(?:t|tember)?|okt(?:ober)?|nov(?:ember)?|dez(?:ember)?"
+        )
+        for match in re.finditer(
+            rf"\b(\d{{1,2}})\.?\s+({month_name_pattern})\.?\s*(\d{{4}})?\b",
+            search_area,
+            re.IGNORECASE,
+        ):
+            parsed = self._parse_named_month_date(match.group(1), match.group(2), match.group(3))
+            if parsed:
+                candidates.append(parsed)
+        for match in re.finditer(
+            rf"\b({month_name_pattern})\.?\s+(\d{{1,2}})\.?\s*(\d{{4}})?\b",
+            search_area,
+            re.IGNORECASE,
+        ):
+            parsed = self._parse_named_month_date(match.group(2), match.group(1), match.group(3))
+            if parsed:
+                candidates.append(parsed)
+
+        future_candidates = [
+            candidate for candidate in candidates if candidate.date() >= datetime.utcnow().date()
+        ]
+        if future_candidates:
+            return sorted(future_candidates)[0]
+        return sorted(candidates)[0] if candidates else None
+
+    def _parse_german_date(self, value: str) -> datetime | None:
+        cleaned = value.strip()
+        current_year = datetime.utcnow().year
+        candidates = [cleaned]
+        if re.fullmatch(r"\d{1,2}\.\d{1,2}\.", cleaned):
+            candidates.append(f"{cleaned}{current_year}")
+            candidates.append(f"{cleaned}{current_year + 1}")
+        elif re.fullmatch(r"\d{1,2}\.\d{1,2}\.\d{2}", cleaned):
+            day, month, year = cleaned.split(".")
+            candidates.append(f"{day}.{month}.20{year}")
+
+        for candidate in candidates:
+            try:
+                parsed = datetime.strptime(candidate, "%d.%m.%Y")
+                if parsed.date() < datetime.utcnow().date() and candidate != cleaned:
+                    continue
+                return parsed
+            except ValueError:
+                continue
+        return None
+
+    def _parse_named_month_date(
+        self,
+        day_value: str,
+        month_value: str,
+        year_value: str | None,
+    ) -> datetime | None:
+        month = MONTH_ALIASES.get(normalize_search_text(month_value).rstrip("."))
+        if month is None:
+            return None
+
+        day = int(day_value)
+        current_year = datetime.utcnow().year
+        years = [int(year_value)] if year_value else [current_year, current_year + 1]
+        for year in years:
+            try:
+                parsed = datetime(year, month, day)
+            except ValueError:
+                continue
+            if year_value or parsed.date() >= datetime.utcnow().date():
+                return parsed
+        return None
+
+    def _find_matching_contexts(self, soup: BeautifulSoup, watch_item: WatchItem) -> list:
+        normalized_term = normalize_search_text(watch_item.name)
+        selectors = [
+            "li.card",
+            ".tourplan .row",
+            "[class*=event]",
+            "[class*=termin]",
+            "article",
+            "tr",
+            "li",
+            ".row",
+        ]
+        candidates = []
+        seen_nodes = set()
+
+        for selector in selectors:
+            for node in soup.select(selector):
+                if id(node) in seen_nodes:
+                    continue
+                seen_nodes.add(id(node))
+                text = node.get_text(" ", strip=True)
+                if normalized_term not in normalize_search_text(text):
+                    continue
+                if len(text) > 3500:
+                    continue
+                if self._find_nearest_date(text, watch_item.name):
+                    candidates.append(node)
+
+        if candidates:
+            return candidates
+
+        fallback = self._find_best_context(soup, watch_item.name)
+        return [fallback] if fallback is not None else []
+
+    def _find_venue(self, text: str, default: str) -> str:
+        lines = [line.strip() for line in re.split(r"\s{2,}|\n|\r", text) if line.strip()]
+        for line in lines:
+            normalized = normalize_search_text(line)
+            if "hamburg" in normalized and len(line) <= 120:
+                return line
+        return default
+
+    def _find_best_context(self, soup: BeautifulSoup, term: str):
+        normalized_term = normalize_search_text(term)
+        candidates = []
+        for node in soup.find_all(string=True):
+            if normalized_term in normalize_search_text(str(node)):
+                parent = node.parent
+                if parent is None:
+                    continue
+                best_parent = self._climb_to_context_with_date(parent, term)
+                text = best_parent.get_text(" ", strip=True)
+                candidates.append(
+                    (
+                        0 if self._find_nearest_date(text, term) else 1,
+                        len(text),
+                        best_parent,
+                    )
+                )
+
+        if not candidates:
+            return None
+
+        candidates.sort(key=lambda item: (item[0], item[1]))
+        return candidates[0][2]
+
+    def _climb_to_context_with_date(self, node, term: str):
+        current = node
+        best = node
+        for _ in range(6):
+            if current is None:
+                break
+            context_text = current.get_text(" ", strip=True)
+            if self._find_nearest_date(context_text, term):
+                return current
+            best = current
+            current = current.parent
+        return best
+
+    def _find_title(self, soup: BeautifulSoup, term: str) -> str:
+        if soup is None:
+            return term
+        normalized_term = normalize_search_text(term)
+        for heading in soup.find_all(["h1", "h2", "h3", "h4", "strong", "b", "a"]):
+            title = heading.get_text(" ", strip=True)
+            if normalized_term in normalize_search_text(title):
+                return title
+
+        text = soup.get_text(" ", strip=True)
+        dated_match = re.search(
+            r"(.{0,40}\d{1,2}\.\d{1,2}\.(?:\d{2,4})?.{0,100}"
+            + re.escape(term)
+            + r".{0,100})",
+            text,
+            re.IGNORECASE,
+        )
+        if dated_match:
+            return " ".join(dated_match.group(1).split())
+
+        match = re.search(r"(.{0,80}" + re.escape(term) + r".{0,80})", text, re.IGNORECASE)
+        if match:
+            return " ".join(match.group(1).split())
+        return term
+
+    def _find_nearest_link(self, soup: BeautifulSoup, term: str, base_url: str) -> str | None:
+        normalized_term = normalize_search_text(term)
+        for link in soup.find_all("a", href=True):
+            if normalized_term in normalize_search_text(link.get_text(" ", strip=True)):
+                return urljoin(base_url, link["href"])
+        return None