auf Webseiten Modell umgestellt
This commit is contained in:
@@ -114,3 +114,4 @@ sudo docker compose up -d --build
|
||||
- Bandsintown benoetigt eine echte, von Bandsintown freigeschaltete App-ID. Ohne diese wird der Provider deaktiviert oder als `blocked` angezeigt.
|
||||
- Barclays Arena wird ueber die offizielle Eventseite der Arena abgefragt.
|
||||
- Fabrik wird ueber die offizielle Veranstaltungsseite der Fabrik Hamburg abgefragt.
|
||||
- Fuer robuste persoenliche Ueberwachung koennen pro Watchlist-Eintrag direkte Quellen-URLs hinterlegt werden. Diese werden beim Sync gezielt per JSON-LD und HTML-Textscan durchsucht.
|
||||
|
||||
@@ -3,6 +3,7 @@ const state = {
|
||||
events: [],
|
||||
notifications: [],
|
||||
providerStatuses: [],
|
||||
watchSources: [],
|
||||
};
|
||||
|
||||
const watchItemsEl = document.querySelector("#watch-items");
|
||||
@@ -92,6 +93,12 @@ function renderStats() {
|
||||
}
|
||||
|
||||
function prettifyProviderName(value) {
|
||||
if (value?.startsWith("source:")) {
|
||||
const sourceId = Number(value.split(":")[1]);
|
||||
const source = state.watchSources.find((entry) => entry.id === sourceId);
|
||||
return source?.label || "Direkte Quelle";
|
||||
}
|
||||
|
||||
const names = {
|
||||
ticketmaster: "Ticketmaster",
|
||||
bandsintown: "Bandsintown",
|
||||
@@ -206,12 +213,71 @@ function renderWatchItems() {
|
||||
</div>
|
||||
</div>
|
||||
<p>${escapeHtml(item.notes || "Keine Notiz hinterlegt.")}</p>
|
||||
${renderSourceList(item.id)}
|
||||
<form class="source-form" data-watch-id="${item.id}">
|
||||
<input
|
||||
name="label"
|
||||
type="text"
|
||||
placeholder="Quelle, z. B. Kuenstlerseite"
|
||||
/>
|
||||
<input
|
||||
name="url"
|
||||
type="url"
|
||||
placeholder="https://..."
|
||||
required
|
||||
/>
|
||||
<button type="submit" class="action-button success">Quelle hinzufuegen</button>
|
||||
</form>
|
||||
</article>
|
||||
`
|
||||
)
|
||||
.join("");
|
||||
}
|
||||
|
||||
function renderSourceList(watchItemId) {
|
||||
const sources = state.watchSources.filter((source) => source.watch_item_id === watchItemId);
|
||||
if (!sources.length) {
|
||||
return '<div class="source-list muted">Noch keine direkten Quellen hinterlegt.</div>';
|
||||
}
|
||||
|
||||
return `
|
||||
<div class="source-list">
|
||||
${sources
|
||||
.map(
|
||||
(source) => `
|
||||
<div class="source-row">
|
||||
<div>
|
||||
<strong>${escapeHtml(source.label || "Quelle")}</strong>
|
||||
<a href="${escapeHtml(source.url)}" target="_blank" rel="noreferrer">
|
||||
${escapeHtml(source.url)}
|
||||
</a>
|
||||
<div class="pill-row">
|
||||
<span class="pill ${
|
||||
source.last_status === "ok"
|
||||
? "success"
|
||||
: source.last_status === "error"
|
||||
? "danger"
|
||||
: "warning"
|
||||
}">${escapeHtml(source.last_status)}</span>
|
||||
<span class="muted">${escapeHtml(source.last_message || "Noch nicht gescannt.")}</span>
|
||||
</div>
|
||||
</div>
|
||||
<div class="action-row">
|
||||
<button class="action-button" data-action="toggle-source" data-id="${source.id}">
|
||||
${source.is_active ? "Pausieren" : "Aktivieren"}
|
||||
</button>
|
||||
<button class="action-button danger" data-action="delete-source" data-id="${source.id}">
|
||||
Loeschen
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
`
|
||||
)
|
||||
.join("")}
|
||||
</div>
|
||||
`;
|
||||
}
|
||||
|
||||
function getWatchNameById(id) {
|
||||
return state.watchItems.find((item) => item.id === id)?.name || `Watch #${id}`;
|
||||
}
|
||||
@@ -270,6 +336,13 @@ function renderEvents() {
|
||||
>
|
||||
${event.is_ticket_purchased ? "Ticket entfernen" : "Ticket gekauft"}
|
||||
</button>
|
||||
<button
|
||||
class="action-button danger"
|
||||
data-action="delete-event"
|
||||
data-id="${event.id}"
|
||||
>
|
||||
Loeschen
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
<div class="event-meta">
|
||||
@@ -328,17 +401,19 @@ function updateSyncStatus(message) {
|
||||
}
|
||||
|
||||
async function loadData() {
|
||||
const [watchItems, events, notifications, providerStatuses] = await Promise.all([
|
||||
const [watchItems, events, notifications, providerStatuses, watchSources] = await Promise.all([
|
||||
apiFetch("/watch-items"),
|
||||
apiFetch("/events"),
|
||||
apiFetch("/notifications"),
|
||||
apiFetch("/provider-statuses"),
|
||||
apiFetch("/watch-sources"),
|
||||
]);
|
||||
|
||||
state.watchItems = watchItems;
|
||||
state.events = events;
|
||||
state.notifications = notifications;
|
||||
state.providerStatuses = providerStatuses;
|
||||
state.watchSources = watchSources;
|
||||
|
||||
renderStats();
|
||||
renderWatchItems();
|
||||
@@ -433,7 +508,60 @@ document.addEventListener("click", async (event) => {
|
||||
});
|
||||
await loadData();
|
||||
showToast("Ticketstatus aktualisiert.");
|
||||
return;
|
||||
}
|
||||
|
||||
if (action === "delete-event") {
|
||||
await apiFetch(`/events/${id}`, { method: "DELETE" });
|
||||
await loadData();
|
||||
showToast("Event geloescht.");
|
||||
return;
|
||||
}
|
||||
|
||||
if (action === "delete-source") {
|
||||
await apiFetch(`/watch-sources/${id}`, { method: "DELETE" });
|
||||
await loadData();
|
||||
showToast("Quelle geloescht.");
|
||||
return;
|
||||
}
|
||||
|
||||
if (action === "toggle-source") {
|
||||
const source = state.watchSources.find((entry) => entry.id === Number(id));
|
||||
await apiFetch(`/watch-sources/${id}`, {
|
||||
method: "PATCH",
|
||||
body: JSON.stringify({ is_active: !source.is_active }),
|
||||
});
|
||||
await loadData();
|
||||
showToast("Quellenstatus aktualisiert.");
|
||||
}
|
||||
} catch (error) {
|
||||
showToast(error.message);
|
||||
}
|
||||
});
|
||||
|
||||
document.addEventListener("submit", async (event) => {
|
||||
const form = event.target.closest(".source-form");
|
||||
if (!form) {
|
||||
return;
|
||||
}
|
||||
|
||||
event.preventDefault();
|
||||
const watchId = form.dataset.watchId;
|
||||
const formData = new FormData(form);
|
||||
const payload = {
|
||||
label: formData.get("label")?.toString().trim() || null,
|
||||
url: formData.get("url")?.toString().trim(),
|
||||
parser_type: "auto",
|
||||
};
|
||||
|
||||
try {
|
||||
await apiFetch(`/watch-items/${watchId}/sources`, {
|
||||
method: "POST",
|
||||
body: JSON.stringify(payload),
|
||||
});
|
||||
form.reset();
|
||||
await loadData();
|
||||
showToast("Quelle hinzugefuegt.");
|
||||
} catch (error) {
|
||||
showToast(error.message);
|
||||
}
|
||||
|
||||
@@ -381,6 +381,37 @@ button:hover,
|
||||
line-height: 1.55;
|
||||
}
|
||||
|
||||
.source-list {
|
||||
display: grid;
|
||||
gap: 10px;
|
||||
margin-top: 16px;
|
||||
}
|
||||
|
||||
.source-row {
|
||||
display: flex;
|
||||
align-items: start;
|
||||
justify-content: space-between;
|
||||
gap: 16px;
|
||||
padding: 14px;
|
||||
border-radius: var(--radius-sm);
|
||||
background: rgba(46, 39, 30, 0.05);
|
||||
}
|
||||
|
||||
.source-row a {
|
||||
display: block;
|
||||
max-width: 46ch;
|
||||
margin: 4px 0 8px;
|
||||
overflow-wrap: anywhere;
|
||||
color: var(--primary-dark);
|
||||
}
|
||||
|
||||
.source-form {
|
||||
display: grid;
|
||||
grid-template-columns: 0.7fr 1.4fr auto;
|
||||
gap: 10px;
|
||||
margin-top: 14px;
|
||||
}
|
||||
|
||||
.action-button {
|
||||
min-height: 38px;
|
||||
padding: 0 14px;
|
||||
@@ -467,6 +498,7 @@ button:hover,
|
||||
}
|
||||
|
||||
.watch-form,
|
||||
.source-form,
|
||||
.status-panel {
|
||||
grid-template-columns: 1fr;
|
||||
}
|
||||
|
||||
+75
-1
@@ -8,7 +8,7 @@ from sqlalchemy.orm import Session
|
||||
|
||||
from app.config import settings
|
||||
from app.database import Base, engine, get_db
|
||||
from app.models import TrackedEvent, WatchItem
|
||||
from app.models import TrackedEvent, WatchItem, WatchSource
|
||||
from app.scheduler import start_scheduler
|
||||
from app.schemas import (
|
||||
NotificationLogRead,
|
||||
@@ -19,11 +19,15 @@ from app.schemas import (
|
||||
WatchItemCreate,
|
||||
WatchItemRead,
|
||||
WatchItemUpdate,
|
||||
WatchSourceCreate,
|
||||
WatchSourceRead,
|
||||
WatchSourceUpdate,
|
||||
)
|
||||
from app.services import (
|
||||
list_events,
|
||||
list_notifications,
|
||||
list_provider_statuses,
|
||||
list_watch_sources,
|
||||
list_watch_items,
|
||||
run_sync,
|
||||
)
|
||||
@@ -114,6 +118,66 @@ def delete_watch_item(watch_item_id: int, db: Session = Depends(get_db)):
|
||||
db.commit()
|
||||
|
||||
|
||||
@app.get("/watch-sources", response_model=list[WatchSourceRead])
|
||||
def get_watch_sources(watch_item_id: int | None = None, db: Session = Depends(get_db)):
|
||||
return list_watch_sources(db, watch_item_id)
|
||||
|
||||
|
||||
@app.post(
|
||||
"/watch-items/{watch_item_id}/sources",
|
||||
response_model=WatchSourceRead,
|
||||
status_code=201,
|
||||
)
|
||||
def create_watch_source(
|
||||
watch_item_id: int,
|
||||
payload: WatchSourceCreate,
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
watch_item = db.get(WatchItem, watch_item_id)
|
||||
if watch_item is None:
|
||||
raise HTTPException(status_code=404, detail="Watch item nicht gefunden.")
|
||||
|
||||
source = WatchSource(
|
||||
watch_item=watch_item,
|
||||
label=payload.label,
|
||||
url=payload.url,
|
||||
parser_type=payload.parser_type,
|
||||
)
|
||||
db.add(source)
|
||||
db.commit()
|
||||
db.refresh(source)
|
||||
return source
|
||||
|
||||
|
||||
@app.patch("/watch-sources/{source_id}", response_model=WatchSourceRead)
|
||||
def update_watch_source(
|
||||
source_id: int,
|
||||
payload: WatchSourceUpdate,
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
source = db.get(WatchSource, source_id)
|
||||
if source is None:
|
||||
raise HTTPException(status_code=404, detail="Quelle nicht gefunden.")
|
||||
|
||||
updates = payload.model_dump(exclude_unset=True)
|
||||
for field_name, value in updates.items():
|
||||
setattr(source, field_name, value)
|
||||
source.updated_at = datetime.utcnow()
|
||||
db.commit()
|
||||
db.refresh(source)
|
||||
return source
|
||||
|
||||
|
||||
@app.delete("/watch-sources/{source_id}", status_code=204)
|
||||
def delete_watch_source(source_id: int, db: Session = Depends(get_db)):
|
||||
source = db.get(WatchSource, source_id)
|
||||
if source is None:
|
||||
raise HTTPException(status_code=404, detail="Quelle nicht gefunden.")
|
||||
|
||||
db.delete(source)
|
||||
db.commit()
|
||||
|
||||
|
||||
@app.get("/events", response_model=list[TrackedEventRead])
|
||||
def get_events(db: Session = Depends(get_db)):
|
||||
return list_events(db)
|
||||
@@ -137,6 +201,16 @@ def update_purchase_status(
|
||||
return tracked_event
|
||||
|
||||
|
||||
@app.delete("/events/{event_id}", status_code=204)
|
||||
def delete_event(event_id: int, db: Session = Depends(get_db)):
|
||||
tracked_event = db.get(TrackedEvent, event_id)
|
||||
if tracked_event is None:
|
||||
raise HTTPException(status_code=404, detail="Event nicht gefunden.")
|
||||
|
||||
db.delete(tracked_event)
|
||||
db.commit()
|
||||
|
||||
|
||||
@app.get("/notifications", response_model=list[NotificationLogRead])
|
||||
def get_notifications(db: Session = Depends(get_db)):
|
||||
return list_notifications(db)
|
||||
|
||||
@@ -35,6 +35,13 @@ class ProviderStatusType(str, Enum):
|
||||
error = "error"
|
||||
|
||||
|
||||
class SourceStatusType(str, Enum):
|
||||
pending = "pending"
|
||||
ok = "ok"
|
||||
no_match = "no_match"
|
||||
error = "error"
|
||||
|
||||
|
||||
class WatchItem(Base):
|
||||
__tablename__ = "watch_items"
|
||||
|
||||
@@ -57,6 +64,36 @@ class WatchItem(Base):
|
||||
tracked_events: Mapped[list["TrackedEvent"]] = relationship(
|
||||
back_populates="watch_item", cascade="all, delete-orphan"
|
||||
)
|
||||
sources: Mapped[list["WatchSource"]] = relationship(
|
||||
back_populates="watch_item", cascade="all, delete-orphan"
|
||||
)
|
||||
|
||||
|
||||
class WatchSource(Base):
|
||||
__tablename__ = "watch_sources"
|
||||
|
||||
id: Mapped[int] = mapped_column(Integer, primary_key=True, index=True)
|
||||
watch_item_id: Mapped[int] = mapped_column(ForeignKey("watch_items.id"), nullable=False)
|
||||
label: Mapped[str | None] = mapped_column(String(255), nullable=True)
|
||||
url: Mapped[str] = mapped_column(String(1024), nullable=False)
|
||||
parser_type: Mapped[str] = mapped_column(String(50), default="auto", nullable=False)
|
||||
is_active: Mapped[bool] = mapped_column(Boolean, default=True, nullable=False)
|
||||
last_status: Mapped[SourceStatusType] = mapped_column(
|
||||
SqlEnum(SourceStatusType), default=SourceStatusType.pending, nullable=False
|
||||
)
|
||||
last_message: Mapped[str | None] = mapped_column(Text, nullable=True)
|
||||
last_checked_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True)
|
||||
created_at: Mapped[datetime] = mapped_column(
|
||||
DateTime, default=datetime.utcnow, nullable=False
|
||||
)
|
||||
updated_at: Mapped[datetime] = mapped_column(
|
||||
DateTime,
|
||||
default=datetime.utcnow,
|
||||
onupdate=datetime.utcnow,
|
||||
nullable=False,
|
||||
)
|
||||
|
||||
watch_item: Mapped[WatchItem] = relationship(back_populates="sources")
|
||||
|
||||
|
||||
class TrackedEvent(Base):
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
from datetime import datetime
|
||||
import re
|
||||
from urllib.parse import urljoin
|
||||
|
||||
import requests
|
||||
@@ -10,7 +11,7 @@ from app.providers.utils import normalize_search_text
|
||||
|
||||
class BarclaysArenaProvider:
|
||||
source_name = "barclays_arena"
|
||||
events_url = "https://www.barclays-arena.de/events"
|
||||
events_url = "https://www.barclays-arena.de/events/search"
|
||||
|
||||
def search_events(
|
||||
self,
|
||||
@@ -29,73 +30,104 @@ class BarclaysArenaProvider:
|
||||
normalized_term = normalize_search_text(term)
|
||||
results: list[dict] = []
|
||||
|
||||
headings = soup.find_all("h3")
|
||||
for heading in headings:
|
||||
for heading in soup.find_all("h3"):
|
||||
title = heading.get_text(" ", strip=True)
|
||||
if not title:
|
||||
continue
|
||||
|
||||
subtitle = ""
|
||||
subtitle_el = heading.find_next("h4")
|
||||
if subtitle_el:
|
||||
subtitle = subtitle_el.get_text(" ", strip=True)
|
||||
subtitle_el = heading.find_next_sibling("h4")
|
||||
subtitle = subtitle_el.get_text(" ", strip=True) if subtitle_el else ""
|
||||
|
||||
# Keep matching local to the actual heading/subtitle pair. Wider
|
||||
# parent containers often contain several event cards.
|
||||
haystack = normalize_search_text(f"{title} {subtitle}")
|
||||
if normalized_term not in haystack:
|
||||
continue
|
||||
|
||||
date_text = self._find_previous_date_text(heading)
|
||||
event_date = self._parse_german_date(date_text)
|
||||
|
||||
link = heading.find_previous("a", href=True)
|
||||
if link is None:
|
||||
detail_link = self._find_card_link(heading)
|
||||
if detail_link is None:
|
||||
continue
|
||||
|
||||
date_text = self._find_card_date_text(heading)
|
||||
event_date = self._parse_german_date(date_text)
|
||||
href = detail_link["href"]
|
||||
|
||||
results.append(
|
||||
{
|
||||
"external_id": link["href"],
|
||||
"external_id": href,
|
||||
"title": title,
|
||||
"matched_term": term,
|
||||
"venue_name": "Barclays Arena",
|
||||
"city": "Hamburg",
|
||||
"country_code": "DE",
|
||||
"event_date": event_date,
|
||||
"ticket_url": urljoin(self.events_url, link["href"]),
|
||||
"ticket_url": urljoin(self.events_url, href),
|
||||
"image_url": None,
|
||||
"raw_payload": {
|
||||
"title": title,
|
||||
"subtitle": subtitle,
|
||||
"date_text": date_text,
|
||||
"href": link["href"],
|
||||
"href": href,
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
unique_results: dict[str, dict] = {}
|
||||
for result in results:
|
||||
unique_results[result["external_id"]] = result
|
||||
|
||||
self.last_status = "ok"
|
||||
self.last_message = (
|
||||
f"Barclays Arena returned {len(results)} matched events for term '{term}'."
|
||||
f"Barclays Arena returned {len(unique_results)} matched events for term '{term}'."
|
||||
)
|
||||
return results
|
||||
return list(unique_results.values())
|
||||
|
||||
def _find_previous_date_text(self, heading) -> str | None:
|
||||
current = heading.previous_sibling
|
||||
while current is not None:
|
||||
def _find_card_link(self, heading):
|
||||
link = heading.find_parent("a", href=re.compile(r"/events/"))
|
||||
if link is not None:
|
||||
return link
|
||||
|
||||
current = heading
|
||||
for _ in range(5):
|
||||
current = current.parent
|
||||
if current is None:
|
||||
return None
|
||||
link = current.find("a", href=re.compile(r"/events/"))
|
||||
if link is not None and heading in link.find_all("h3"):
|
||||
return link
|
||||
return None
|
||||
|
||||
def _find_card_date_text(self, heading) -> str | None:
|
||||
current = heading
|
||||
for _ in range(6):
|
||||
current = current.previous_element
|
||||
if current is None:
|
||||
return None
|
||||
text = getattr(current, "get_text", lambda *args, **kwargs: str(current))(
|
||||
" ", strip=True
|
||||
)
|
||||
if text and "|" in text:
|
||||
return text
|
||||
current = current.previous_sibling
|
||||
date_text = self._extract_date_text(text)
|
||||
if date_text:
|
||||
return date_text
|
||||
return None
|
||||
|
||||
def _extract_date_text(self, text: str) -> str | None:
|
||||
match = re.search(
|
||||
r"(Montag|Dienstag|Mittwoch|Donnerstag|Freitag|Samstag|Sonntag)\s*\|\s*(\d{2}\.\d{2}\.\d{4})",
|
||||
text,
|
||||
)
|
||||
if match:
|
||||
return match.group(2)
|
||||
|
||||
match = re.search(r"\b(\d{2}\.\d{2}\.\d{4})\b", text)
|
||||
if match:
|
||||
return match.group(1)
|
||||
return None
|
||||
|
||||
def _parse_german_date(self, value: str | None) -> datetime | None:
|
||||
if not value:
|
||||
return None
|
||||
parts = [part.strip() for part in value.split("|")]
|
||||
if len(parts) < 2:
|
||||
return None
|
||||
try:
|
||||
return datetime.strptime(parts[1], "%d.%m.%Y")
|
||||
return datetime.strptime(value, "%d.%m.%Y")
|
||||
except ValueError:
|
||||
return None
|
||||
|
||||
|
||||
@@ -7,6 +7,7 @@ from app.models import (
|
||||
NotificationType,
|
||||
ProviderStatusType,
|
||||
RegionScope,
|
||||
SourceStatusType,
|
||||
WatchType,
|
||||
)
|
||||
|
||||
@@ -39,6 +40,35 @@ class WatchItemRead(BaseModel):
|
||||
updated_at: datetime
|
||||
|
||||
|
||||
class WatchSourceCreate(BaseModel):
|
||||
label: str | None = Field(default=None, max_length=255)
|
||||
url: str = Field(min_length=8, max_length=1024)
|
||||
parser_type: str = "auto"
|
||||
|
||||
|
||||
class WatchSourceUpdate(BaseModel):
|
||||
label: str | None = Field(default=None, max_length=255)
|
||||
url: str | None = Field(default=None, min_length=8, max_length=1024)
|
||||
parser_type: str | None = None
|
||||
is_active: bool | None = None
|
||||
|
||||
|
||||
class WatchSourceRead(BaseModel):
|
||||
model_config = ConfigDict(from_attributes=True)
|
||||
|
||||
id: int
|
||||
watch_item_id: int
|
||||
label: str | None
|
||||
url: str
|
||||
parser_type: str
|
||||
is_active: bool
|
||||
last_status: SourceStatusType
|
||||
last_message: str | None
|
||||
last_checked_at: datetime | None
|
||||
created_at: datetime
|
||||
updated_at: datetime
|
||||
|
||||
|
||||
class PurchaseUpdate(BaseModel):
|
||||
is_ticket_purchased: bool
|
||||
|
||||
|
||||
@@ -10,12 +10,15 @@ from app.models import (
|
||||
NotificationType,
|
||||
ProviderStatus,
|
||||
ProviderStatusType,
|
||||
SourceStatusType,
|
||||
TrackedEvent,
|
||||
WatchItem,
|
||||
WatchSource,
|
||||
)
|
||||
from app.notifications import send_email_notification
|
||||
from app.providers.registry import get_providers
|
||||
from app.schemas import SyncResult
|
||||
from app.source_scanner import SourceScanner
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -71,6 +74,13 @@ def list_provider_statuses(db: Session) -> list[ProviderStatus]:
|
||||
return list(db.scalars(select(ProviderStatus).order_by(ProviderStatus.provider_name)))
|
||||
|
||||
|
||||
def list_watch_sources(db: Session, watch_item_id: int | None = None) -> list[WatchSource]:
|
||||
stmt = select(WatchSource).order_by(WatchSource.created_at)
|
||||
if watch_item_id is not None:
|
||||
stmt = stmt.where(WatchSource.watch_item_id == watch_item_id)
|
||||
return list(db.scalars(stmt))
|
||||
|
||||
|
||||
def update_provider_status(
|
||||
db: Session,
|
||||
provider_name: str,
|
||||
@@ -261,6 +271,7 @@ def upsert_event(
|
||||
|
||||
def run_sync(db: Session) -> SyncResult:
|
||||
providers = get_providers()
|
||||
source_scanner = SourceScanner()
|
||||
provider_states = {
|
||||
provider.source_name: init_provider_sync_state(provider.source_name)
|
||||
for provider in providers
|
||||
@@ -275,6 +286,75 @@ def run_sync(db: Session) -> SyncResult:
|
||||
notifications_skipped = 0
|
||||
|
||||
for watch_item in active_items:
|
||||
active_sources = [source for source in watch_item.sources if source.is_active]
|
||||
for source in active_sources:
|
||||
try:
|
||||
events = source_scanner.scan(watch_item, source)
|
||||
source.last_status = (
|
||||
SourceStatusType.ok if events else SourceStatusType.no_match
|
||||
)
|
||||
source.last_message = (
|
||||
f"{len(events)} passende Events gefunden."
|
||||
if events
|
||||
else "Keine passenden Events auf dieser Quelle gefunden."
|
||||
)
|
||||
source.last_checked_at = datetime.utcnow()
|
||||
except Exception as exc:
|
||||
logger.exception(
|
||||
"Source scan failed for watch_item=%s source=%s",
|
||||
watch_item.name,
|
||||
source.url,
|
||||
)
|
||||
db.rollback()
|
||||
source.last_status = SourceStatusType.error
|
||||
source.last_message = f"Scan fehlgeschlagen: {exc}"
|
||||
source.last_checked_at = datetime.utcnow()
|
||||
db.add(source)
|
||||
db.commit()
|
||||
continue
|
||||
|
||||
for event_data in events:
|
||||
tracked_event, is_new = upsert_event(
|
||||
db=db,
|
||||
watch_item=watch_item,
|
||||
provider_name=f"source:{source.id}",
|
||||
event_data=event_data,
|
||||
)
|
||||
if is_new:
|
||||
new_events += 1
|
||||
else:
|
||||
updated_events += 1
|
||||
|
||||
should_notify = (
|
||||
is_new
|
||||
and tracked_event.discovery_notified_at is None
|
||||
and not has_equivalent_existing_event(db, tracked_event)
|
||||
)
|
||||
if should_notify:
|
||||
status = send_email_notification(
|
||||
db=db,
|
||||
tracked_event=tracked_event,
|
||||
notification_type=NotificationType.discovery,
|
||||
subject=f"Neuer Termin fuer {watch_item.name}",
|
||||
body=(
|
||||
f"Es wurde ein neuer Termin fuer '{watch_item.name}' gefunden.\n\n"
|
||||
f"Quelle: {source.label or source.url}\n"
|
||||
f"Titel: {tracked_event.title}\n"
|
||||
f"Ort: {tracked_event.venue_name or 'unbekannt'}\n"
|
||||
f"Stadt: {tracked_event.city or 'unbekannt'}\n"
|
||||
f"Datum: {tracked_event.event_date or 'unbekannt'}\n"
|
||||
f"Tickets: {tracked_event.ticket_url or 'keine URL'}\n"
|
||||
),
|
||||
)
|
||||
if status == NotificationStatus.sent:
|
||||
tracked_event.discovery_notified_at = datetime.utcnow()
|
||||
notifications_sent += 1
|
||||
else:
|
||||
notifications_skipped += 1
|
||||
|
||||
db.add(source)
|
||||
db.commit()
|
||||
|
||||
for provider in providers:
|
||||
try:
|
||||
events = provider.search_events(
|
||||
|
||||
@@ -0,0 +1,446 @@
|
||||
import json
|
||||
import re
|
||||
from datetime import datetime
|
||||
from html import unescape
|
||||
from urllib.parse import urljoin
|
||||
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
from app.models import RegionScope, WatchItem, WatchSource, WatchType
|
||||
from app.providers.utils import normalize_search_text
|
||||
|
||||
|
||||
MONTH_ALIASES = {
|
||||
"jan": 1,
|
||||
"januar": 1,
|
||||
"feb": 2,
|
||||
"februar": 2,
|
||||
"maer": 3,
|
||||
"maerz": 3,
|
||||
"mar": 3,
|
||||
"maerz": 3,
|
||||
"apr": 4,
|
||||
"april": 4,
|
||||
"mai": 5,
|
||||
"jun": 6,
|
||||
"juni": 6,
|
||||
"jul": 7,
|
||||
"juli": 7,
|
||||
"aug": 8,
|
||||
"august": 8,
|
||||
"sep": 9,
|
||||
"sept": 9,
|
||||
"september": 9,
|
||||
"okt": 10,
|
||||
"oktober": 10,
|
||||
"nov": 11,
|
||||
"november": 11,
|
||||
"dez": 12,
|
||||
"dezember": 12,
|
||||
}
|
||||
|
||||
|
||||
class SourceScanner:
|
||||
headers = {
|
||||
"User-Agent": "eventlens/0.1 (+https://local)",
|
||||
"Accept": "text/html,application/xhtml+xml,application/json",
|
||||
"Accept-Language": "de-DE,de;q=0.9,en;q=0.7",
|
||||
}
|
||||
|
||||
def scan(self, watch_item: WatchItem, source: WatchSource) -> list[dict]:
|
||||
response = requests.get(
|
||||
source.url,
|
||||
headers=self.headers,
|
||||
timeout=30,
|
||||
)
|
||||
response.raise_for_status()
|
||||
|
||||
content_type = response.headers.get("content-type", "")
|
||||
if "application/json" in content_type:
|
||||
return self._scan_json(watch_item, source, response.json())
|
||||
|
||||
return self._scan_html(watch_item, source, response.text)
|
||||
|
||||
def _scan_json(self, watch_item: WatchItem, source: WatchSource, payload) -> list[dict]:
|
||||
events = self._extract_jsonld_events(payload)
|
||||
return self._events_from_jsonld(watch_item, source, events)
|
||||
|
||||
def _scan_html(self, watch_item: WatchItem, source: WatchSource, html: str) -> list[dict]:
|
||||
soup = BeautifulSoup(html, "html.parser")
|
||||
jsonld_events = []
|
||||
|
||||
for script in soup.find_all("script", type="application/ld+json"):
|
||||
raw_payload = script.string or script.get_text()
|
||||
if not raw_payload:
|
||||
continue
|
||||
try:
|
||||
payload = json.loads(unescape(raw_payload))
|
||||
except json.JSONDecodeError:
|
||||
continue
|
||||
jsonld_events.extend(self._extract_jsonld_events(payload))
|
||||
|
||||
jsonld_results = self._events_from_jsonld(watch_item, source, jsonld_events)
|
||||
if jsonld_results:
|
||||
return jsonld_results
|
||||
|
||||
return self._events_from_html_text(watch_item, source, soup)
|
||||
|
||||
def _extract_jsonld_events(self, payload) -> list[dict]:
|
||||
events: list[dict] = []
|
||||
if isinstance(payload, list):
|
||||
for item in payload:
|
||||
events.extend(self._extract_jsonld_events(item))
|
||||
return events
|
||||
|
||||
if not isinstance(payload, dict):
|
||||
return events
|
||||
|
||||
graph = payload.get("@graph")
|
||||
if isinstance(graph, list):
|
||||
for item in graph:
|
||||
events.extend(self._extract_jsonld_events(item))
|
||||
|
||||
item_type = payload.get("@type")
|
||||
if isinstance(item_type, list):
|
||||
is_event = "Event" in item_type
|
||||
else:
|
||||
is_event = item_type == "Event"
|
||||
if is_event:
|
||||
events.append(payload)
|
||||
|
||||
return events
|
||||
|
||||
def _events_from_jsonld(
|
||||
self,
|
||||
watch_item: WatchItem,
|
||||
source: WatchSource,
|
||||
events: list[dict],
|
||||
) -> list[dict]:
|
||||
results: list[dict] = []
|
||||
normalized_term = normalize_search_text(watch_item.name)
|
||||
|
||||
for event in events:
|
||||
title = event.get("name") or ""
|
||||
performers = self._extract_performer_names(event)
|
||||
haystack = normalize_search_text(" ".join([title] + performers))
|
||||
if normalized_term not in haystack:
|
||||
continue
|
||||
|
||||
location = event.get("location") or {}
|
||||
address = location.get("address") or {}
|
||||
city = address.get("addressLocality") or location.get("name")
|
||||
if watch_item.region_scope == RegionScope.hamburg and normalize_search_text(city) != "hamburg":
|
||||
continue
|
||||
|
||||
event_date = self._parse_datetime(event.get("startDate"))
|
||||
if event_date and event_date.date() < datetime.utcnow().date():
|
||||
continue
|
||||
ticket_url = event.get("url") or source.url
|
||||
|
||||
results.append(
|
||||
{
|
||||
"external_id": str(event.get("@id") or ticket_url or f"{source.id}:{title}"),
|
||||
"title": title or watch_item.name,
|
||||
"matched_term": watch_item.name,
|
||||
"venue_name": location.get("name") or source.label,
|
||||
"city": city,
|
||||
"country_code": "DE",
|
||||
"event_date": event_date,
|
||||
"ticket_url": ticket_url,
|
||||
"image_url": self._extract_image(event),
|
||||
"raw_payload": event,
|
||||
}
|
||||
)
|
||||
|
||||
return results
|
||||
|
||||
def _events_from_html_text(
|
||||
self,
|
||||
watch_item: WatchItem,
|
||||
source: WatchSource,
|
||||
soup: BeautifulSoup,
|
||||
) -> list[dict]:
|
||||
text = soup.get_text(" ", strip=True)
|
||||
normalized_text = normalize_search_text(text)
|
||||
normalized_term = normalize_search_text(watch_item.name)
|
||||
if normalized_term not in normalized_text:
|
||||
return []
|
||||
|
||||
results: list[dict] = []
|
||||
seen_keys: set[str] = set()
|
||||
for context in self._find_matching_contexts(soup, watch_item):
|
||||
context_text = context.get_text(" ", strip=True)
|
||||
event_date = self._find_nearest_date(context_text, watch_item.name)
|
||||
if event_date is None:
|
||||
continue
|
||||
if event_date.date() < datetime.utcnow().date():
|
||||
continue
|
||||
if (
|
||||
watch_item.region_scope == RegionScope.hamburg
|
||||
and "hamburg" not in normalize_search_text(context_text)
|
||||
):
|
||||
continue
|
||||
|
||||
title = self._find_title(context, watch_item.name)
|
||||
link = self._find_nearest_link(context, watch_item.name, source.url) or source.url
|
||||
key = f"{source.id}:{normalize_search_text(title)}:{event_date.date().isoformat()}"
|
||||
if key in seen_keys:
|
||||
continue
|
||||
seen_keys.add(key)
|
||||
|
||||
results.append(
|
||||
{
|
||||
"external_id": key,
|
||||
"title": title,
|
||||
"matched_term": watch_item.name,
|
||||
"venue_name": self._find_venue(context_text, source.label),
|
||||
"city": "Hamburg" if watch_item.region_scope == RegionScope.hamburg else None,
|
||||
"country_code": "DE",
|
||||
"event_date": event_date,
|
||||
"ticket_url": link,
|
||||
"image_url": None,
|
||||
"raw_payload": {
|
||||
"source_url": source.url,
|
||||
"parser": "html_text",
|
||||
"context": context_text[:1000],
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
return results
|
||||
|
||||
def _extract_performer_names(self, event: dict) -> list[str]:
|
||||
performer = event.get("performer") or event.get("performers")
|
||||
if isinstance(performer, dict):
|
||||
return [performer.get("name", "")]
|
||||
if isinstance(performer, list):
|
||||
return [item.get("name", "") for item in performer if isinstance(item, dict)]
|
||||
return []
|
||||
|
||||
def _extract_image(self, event: dict) -> str | None:
|
||||
image = event.get("image")
|
||||
if isinstance(image, str):
|
||||
return image
|
||||
if isinstance(image, list):
|
||||
for item in image:
|
||||
if isinstance(item, str):
|
||||
return item
|
||||
return None
|
||||
|
||||
def _parse_datetime(self, value: str | None) -> datetime | None:
|
||||
if not value:
|
||||
return None
|
||||
try:
|
||||
return datetime.fromisoformat(value.replace("Z", "+00:00")).replace(tzinfo=None)
|
||||
except ValueError:
|
||||
pass
|
||||
for fmt in ("%d.%m.%Y", "%Y-%m-%d"):
|
||||
try:
|
||||
return datetime.strptime(value[:10], fmt)
|
||||
except ValueError:
|
||||
continue
|
||||
return None
|
||||
|
||||
def _find_nearest_date(self, text: str, term: str) -> datetime | None:
|
||||
normalized_term = normalize_search_text(term)
|
||||
normalized_text = normalize_search_text(text)
|
||||
term_index = normalized_text.find(normalized_term)
|
||||
search_area = text
|
||||
if term_index >= 0:
|
||||
start = max(0, term_index - 300)
|
||||
end = min(len(text), term_index + 500)
|
||||
search_area = text[start:end]
|
||||
|
||||
candidates: list[datetime] = []
|
||||
for pattern in (
|
||||
r"\b(\d{1,2}\.\d{1,2}\.\d{4})\b",
|
||||
r"\b(\d{1,2}\.\d{1,2}\.\d{2})\b",
|
||||
r"\b(\d{1,2}\.\d{1,2}\.)\b",
|
||||
):
|
||||
for match in re.finditer(pattern, search_area):
|
||||
parsed = self._parse_german_date(match.group(1))
|
||||
if parsed:
|
||||
candidates.append(parsed)
|
||||
|
||||
month_name_pattern = (
|
||||
r"jan(?:uar)?|feb(?:ruar)?|m(?:ae|ä)r(?:z)?|apr(?:il)?|mai|jun(?:i)?|"
|
||||
r"jul(?:i)?|aug(?:ust)?|sep(?:t|tember)?|okt(?:ober)?|nov(?:ember)?|dez(?:ember)?"
|
||||
)
|
||||
for match in re.finditer(
|
||||
rf"\b(\d{{1,2}})\.?\s+({month_name_pattern})\.?\s*(\d{{4}})?\b",
|
||||
search_area,
|
||||
re.IGNORECASE,
|
||||
):
|
||||
parsed = self._parse_named_month_date(match.group(1), match.group(2), match.group(3))
|
||||
if parsed:
|
||||
candidates.append(parsed)
|
||||
for match in re.finditer(
|
||||
rf"\b({month_name_pattern})\.?\s+(\d{{1,2}})\.?\s*(\d{{4}})?\b",
|
||||
search_area,
|
||||
re.IGNORECASE,
|
||||
):
|
||||
parsed = self._parse_named_month_date(match.group(2), match.group(1), match.group(3))
|
||||
if parsed:
|
||||
candidates.append(parsed)
|
||||
|
||||
future_candidates = [
|
||||
candidate for candidate in candidates if candidate.date() >= datetime.utcnow().date()
|
||||
]
|
||||
if future_candidates:
|
||||
return sorted(future_candidates)[0]
|
||||
return sorted(candidates)[0] if candidates else None
|
||||
|
||||
def _parse_german_date(self, value: str) -> datetime | None:
|
||||
cleaned = value.strip()
|
||||
current_year = datetime.utcnow().year
|
||||
candidates = [cleaned]
|
||||
if re.fullmatch(r"\d{1,2}\.\d{1,2}\.", cleaned):
|
||||
candidates.append(f"{cleaned}{current_year}")
|
||||
candidates.append(f"{cleaned}{current_year + 1}")
|
||||
elif re.fullmatch(r"\d{1,2}\.\d{1,2}\.\d{2}", cleaned):
|
||||
day, month, year = cleaned.split(".")
|
||||
candidates.append(f"{day}.{month}.20{year}")
|
||||
|
||||
for candidate in candidates:
|
||||
try:
|
||||
parsed = datetime.strptime(candidate, "%d.%m.%Y")
|
||||
if parsed.date() < datetime.utcnow().date() and candidate != cleaned:
|
||||
continue
|
||||
return parsed
|
||||
except ValueError:
|
||||
continue
|
||||
return None
|
||||
|
||||
def _parse_named_month_date(
|
||||
self,
|
||||
day_value: str,
|
||||
month_value: str,
|
||||
year_value: str | None,
|
||||
) -> datetime | None:
|
||||
month = MONTH_ALIASES.get(normalize_search_text(month_value).rstrip("."))
|
||||
if month is None:
|
||||
return None
|
||||
|
||||
day = int(day_value)
|
||||
current_year = datetime.utcnow().year
|
||||
years = [int(year_value)] if year_value else [current_year, current_year + 1]
|
||||
for year in years:
|
||||
try:
|
||||
parsed = datetime(year, month, day)
|
||||
except ValueError:
|
||||
continue
|
||||
if year_value or parsed.date() >= datetime.utcnow().date():
|
||||
return parsed
|
||||
return None
|
||||
|
||||
def _find_matching_contexts(self, soup: BeautifulSoup, watch_item: WatchItem) -> list:
|
||||
normalized_term = normalize_search_text(watch_item.name)
|
||||
selectors = [
|
||||
"li.card",
|
||||
".tourplan .row",
|
||||
"[class*=event]",
|
||||
"[class*=termin]",
|
||||
"article",
|
||||
"tr",
|
||||
"li",
|
||||
".row",
|
||||
]
|
||||
candidates = []
|
||||
seen_nodes = set()
|
||||
|
||||
for selector in selectors:
|
||||
for node in soup.select(selector):
|
||||
if id(node) in seen_nodes:
|
||||
continue
|
||||
seen_nodes.add(id(node))
|
||||
text = node.get_text(" ", strip=True)
|
||||
if normalized_term not in normalize_search_text(text):
|
||||
continue
|
||||
if len(text) > 3500:
|
||||
continue
|
||||
if self._find_nearest_date(text, watch_item.name):
|
||||
candidates.append(node)
|
||||
|
||||
if candidates:
|
||||
return candidates
|
||||
|
||||
fallback = self._find_best_context(soup, watch_item.name)
|
||||
return [fallback] if fallback is not None else []
|
||||
|
||||
def _find_venue(self, text: str, default: str) -> str:
|
||||
lines = [line.strip() for line in re.split(r"\s{2,}|\n|\r", text) if line.strip()]
|
||||
for line in lines:
|
||||
normalized = normalize_search_text(line)
|
||||
if "hamburg" in normalized and len(line) <= 120:
|
||||
return line
|
||||
return default
|
||||
|
||||
def _find_best_context(self, soup: BeautifulSoup, term: str):
|
||||
normalized_term = normalize_search_text(term)
|
||||
candidates = []
|
||||
for node in soup.find_all(string=True):
|
||||
if normalized_term in normalize_search_text(str(node)):
|
||||
parent = node.parent
|
||||
if parent is None:
|
||||
continue
|
||||
best_parent = self._climb_to_context_with_date(parent, term)
|
||||
text = best_parent.get_text(" ", strip=True)
|
||||
candidates.append(
|
||||
(
|
||||
0 if self._find_nearest_date(text, term) else 1,
|
||||
len(text),
|
||||
best_parent,
|
||||
)
|
||||
)
|
||||
|
||||
if not candidates:
|
||||
return None
|
||||
|
||||
candidates.sort(key=lambda item: (item[0], item[1]))
|
||||
return candidates[0][2]
|
||||
|
||||
def _climb_to_context_with_date(self, node, term: str):
|
||||
current = node
|
||||
best = node
|
||||
for _ in range(6):
|
||||
if current is None:
|
||||
break
|
||||
context_text = current.get_text(" ", strip=True)
|
||||
if self._find_nearest_date(context_text, term):
|
||||
return current
|
||||
best = current
|
||||
current = current.parent
|
||||
return best
|
||||
|
||||
def _find_title(self, soup: BeautifulSoup, term: str) -> str:
|
||||
if soup is None:
|
||||
return term
|
||||
normalized_term = normalize_search_text(term)
|
||||
for heading in soup.find_all(["h1", "h2", "h3", "h4", "strong", "b", "a"]):
|
||||
title = heading.get_text(" ", strip=True)
|
||||
if normalized_term in normalize_search_text(title):
|
||||
return title
|
||||
|
||||
text = soup.get_text(" ", strip=True)
|
||||
dated_match = re.search(
|
||||
r"(.{0,40}\d{1,2}\.\d{1,2}\.(?:\d{2,4})?.{0,100}"
|
||||
+ re.escape(term)
|
||||
+ r".{0,100})",
|
||||
text,
|
||||
re.IGNORECASE,
|
||||
)
|
||||
if dated_match:
|
||||
return " ".join(dated_match.group(1).split())
|
||||
|
||||
match = re.search(r"(.{0,80}" + re.escape(term) + r".{0,80})", text, re.IGNORECASE)
|
||||
if match:
|
||||
return " ".join(match.group(1).split())
|
||||
return term
|
||||
|
||||
def _find_nearest_link(self, soup: BeautifulSoup, term: str, base_url: str) -> str | None:
|
||||
normalized_term = normalize_search_text(term)
|
||||
for link in soup.find_all("a", href=True):
|
||||
if normalized_term in normalize_search_text(link.get_text(" ", strip=True)):
|
||||
return urljoin(base_url, link["href"])
|
||||
return None
|
||||
Reference in New Issue
Block a user