Webseitensuche verbessert UI aufgeräumt
This commit is contained in:
@@ -14,6 +14,9 @@ EVENTIM_ENABLED=true
|
||||
POLL_INTERVAL_HOURS=6
|
||||
REMINDER_INTERVAL_HOURS=12
|
||||
|
||||
EVENTLENS_AUTH_USERNAME=
|
||||
EVENTLENS_AUTH_PASSWORD=
|
||||
|
||||
SMTP_HOST=
|
||||
SMTP_PORT=587
|
||||
SMTP_USER=
|
||||
|
||||
@@ -26,15 +26,16 @@ cp .env.example .env
|
||||
docker compose up -d --build
|
||||
```
|
||||
|
||||
Danach ist das Webfrontend lokal unter `http://127.0.0.1:8000` erreichbar.
|
||||
Die Swagger-Oberflaeche liegt unter `http://127.0.0.1:8000/docs`.
|
||||
API-Statusinfo findest du unter `http://127.0.0.1:8000/api`.
|
||||
Danach ist das Webfrontend lokal unter `http://127.0.0.1:8001` erreichbar.
|
||||
Die Swagger-Oberflaeche liegt unter `http://127.0.0.1:8001/docs`.
|
||||
API-Statusinfo findest du unter `http://127.0.0.1:8001/api`.
|
||||
|
||||
## Wichtige Umgebungsvariablen
|
||||
|
||||
- `TICKETMASTER_API_KEY`: Ticketmaster Discovery API
|
||||
- `BANDSINTOWN_APP_ID`: echte Bandsintown App-ID fuer Artist-Events
|
||||
- `EVENTIM_ENABLED`: aktiviert den Eventim-Website-Provider
|
||||
- `EVENTLENS_AUTH_USERNAME`, `EVENTLENS_AUTH_PASSWORD`: optionaler Passwortschutz fuer Webfrontend und API
|
||||
- `NOTIFICATION_EMAIL_TO`: Empfaenger fuer Benachrichtigungen
|
||||
- `SMTP_HOST`, `SMTP_USER`, `SMTP_PASS`: SMTP-Zugang fuer E-Mails
|
||||
|
||||
@@ -43,7 +44,7 @@ API-Statusinfo findest du unter `http://127.0.0.1:8000/api`.
|
||||
1. Watch Item anlegen:
|
||||
|
||||
```bash
|
||||
curl -X POST http://127.0.0.1:8000/watch-items \
|
||||
curl -X POST http://127.0.0.1:8001/watch-items \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"name": "AnnenMayKantereit",
|
||||
@@ -55,28 +56,29 @@ curl -X POST http://127.0.0.1:8000/watch-items \
|
||||
2. Sync manuell anstossen:
|
||||
|
||||
```bash
|
||||
curl -X POST http://127.0.0.1:8000/sync
|
||||
curl -X POST http://127.0.0.1:8001/sync
|
||||
```
|
||||
|
||||
3. Events abfragen:
|
||||
|
||||
```bash
|
||||
curl http://127.0.0.1:8000/events
|
||||
curl http://127.0.0.1:8001/events
|
||||
```
|
||||
|
||||
4. Ticketkauf markieren:
|
||||
|
||||
```bash
|
||||
curl -X PATCH http://127.0.0.1:8000/events/1/purchase \
|
||||
curl -X PATCH http://127.0.0.1:8001/events/1/purchase \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"is_ticket_purchased": true}'
|
||||
```
|
||||
|
||||
## Hinweise fuer Debian 13 und NGINX
|
||||
|
||||
- NGINX kann nativ auf dem Host laufen und auf `127.0.0.1:8000` proxyen.
|
||||
- NGINX kann nativ auf dem Host laufen und auf `127.0.0.1:8001` proxyen.
|
||||
- Das Backend lauscht absichtlich nur auf `127.0.0.1`, damit es nicht direkt aus dem Internet erreichbar ist.
|
||||
- Fuer produktiven Betrieb solltest du TLS im NGINX-Terminator aktivieren.
|
||||
- Setze `EVENTLENS_AUTH_USERNAME` und `EVENTLENS_AUTH_PASSWORD`, wenn Eventlens ueber NGINX, VPN oder Tunnel erreichbar ist.
|
||||
- Das Frontend wird direkt vom FastAPI-Container ausgeliefert, es ist kein Node- oder Build-Container noetig.
|
||||
|
||||
## Bekannte Betriebsfalle
|
||||
|
||||
@@ -25,5 +25,8 @@ class Settings:
|
||||
notification_email_to = os.getenv("NOTIFICATION_EMAIL_TO", "")
|
||||
smtp_starttls = os.getenv("SMTP_STARTTLS", "true").lower() == "true"
|
||||
|
||||
auth_username = os.getenv("EVENTLENS_AUTH_USERNAME", "")
|
||||
auth_password = os.getenv("EVENTLENS_AUTH_PASSWORD", "")
|
||||
|
||||
|
||||
settings = Settings()
|
||||
|
||||
@@ -47,6 +47,47 @@ function formatDate(value) {
|
||||
}).format(date);
|
||||
}
|
||||
|
||||
function formatEventDay(value) {
|
||||
if (!value) {
|
||||
return "unbekannt";
|
||||
}
|
||||
|
||||
const normalizedValue =
|
||||
typeof value === "string" && /^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(\.\d+)?$/.test(value)
|
||||
? `${value}Z`
|
||||
: value;
|
||||
const date = new Date(normalizedValue);
|
||||
if (Number.isNaN(date.getTime())) {
|
||||
return value;
|
||||
}
|
||||
|
||||
return new Intl.DateTimeFormat("de-DE", {
|
||||
day: "2-digit",
|
||||
month: "short",
|
||||
year: "numeric",
|
||||
}).format(date);
|
||||
}
|
||||
|
||||
function formatEventTime(value) {
|
||||
if (!value) {
|
||||
return "";
|
||||
}
|
||||
|
||||
const normalizedValue =
|
||||
typeof value === "string" && /^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(\.\d+)?$/.test(value)
|
||||
? `${value}Z`
|
||||
: value;
|
||||
const date = new Date(normalizedValue);
|
||||
if (Number.isNaN(date.getTime())) {
|
||||
return "";
|
||||
}
|
||||
|
||||
return new Intl.DateTimeFormat("de-DE", {
|
||||
hour: "2-digit",
|
||||
minute: "2-digit",
|
||||
}).format(date);
|
||||
}
|
||||
|
||||
function escapeHtml(value) {
|
||||
return String(value ?? "")
|
||||
.replaceAll("&", "&")
|
||||
@@ -212,7 +253,11 @@ function renderWatchItems() {
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
<p>${escapeHtml(item.notes || "Keine Notiz hinterlegt.")}</p>
|
||||
${
|
||||
item.notes
|
||||
? `<p class="watch-note">${escapeHtml(item.notes)}</p>`
|
||||
: ""
|
||||
}
|
||||
${renderSourceList(item.id)}
|
||||
<form class="source-form" data-watch-id="${item.id}">
|
||||
<input
|
||||
@@ -246,12 +291,9 @@ function renderSourceList(watchItemId) {
|
||||
.map(
|
||||
(source) => `
|
||||
<div class="source-row">
|
||||
<div>
|
||||
<strong>${escapeHtml(source.label || "Quelle")}</strong>
|
||||
<a href="${escapeHtml(source.url)}" target="_blank" rel="noreferrer">
|
||||
${escapeHtml(source.url)}
|
||||
</a>
|
||||
<div class="pill-row">
|
||||
<div class="source-content">
|
||||
<div class="source-title-row">
|
||||
<strong>${escapeHtml(source.label || "Quelle")}</strong>
|
||||
<span class="pill ${
|
||||
source.last_status === "ok"
|
||||
? "success"
|
||||
@@ -259,8 +301,11 @@ function renderSourceList(watchItemId) {
|
||||
? "danger"
|
||||
: "warning"
|
||||
}">${escapeHtml(source.last_status)}</span>
|
||||
<span class="muted">${escapeHtml(source.last_message || "Noch nicht gescannt.")}</span>
|
||||
</div>
|
||||
<a href="${escapeHtml(source.url)}" target="_blank" rel="noreferrer">
|
||||
${escapeHtml(source.url)}
|
||||
</a>
|
||||
<p class="source-message">${escapeHtml(source.last_message || "Noch nicht gescannt.")}</p>
|
||||
</div>
|
||||
<div class="action-row">
|
||||
<button class="action-button" data-action="toggle-source" data-id="${source.id}">
|
||||
@@ -313,48 +358,48 @@ function renderEvents() {
|
||||
.map(
|
||||
(event) => `
|
||||
<article class="event-card">
|
||||
<div class="event-header">
|
||||
<div>
|
||||
<div class="event-date-badge">
|
||||
<strong>${escapeHtml(formatEventDay(event.event_date))}</strong>
|
||||
<span>${escapeHtml(formatEventTime(event.event_date) || "Zeit offen")}</span>
|
||||
</div>
|
||||
<div class="event-content">
|
||||
<div class="event-header">
|
||||
<h3>${escapeHtml(event.title)}</h3>
|
||||
<div class="pill-row">
|
||||
<span class="pill">${escapeHtml(getWatchNameById(event.watch_item_id))}</span>
|
||||
<span class="pill">${escapeHtml(event.city || "ohne Stadt")}</span>
|
||||
<span class="pill ${getProviderClass(event.source)}">
|
||||
${escapeHtml(prettifyProviderName(event.source))}
|
||||
</span>
|
||||
<span class="pill ${event.is_ticket_purchased ? "success" : "warning"}">
|
||||
${event.is_ticket_purchased ? "Ticket markiert" : "ohne Ticket"}
|
||||
</span>
|
||||
</div>
|
||||
<div class="event-meta">
|
||||
<span>${escapeHtml(getWatchNameById(event.watch_item_id))}</span>
|
||||
<span>${escapeHtml(event.city || "ohne Stadt")}</span>
|
||||
<span>${escapeHtml(event.venue_name || "Venue unbekannt")}</span>
|
||||
<span>${escapeHtml(prettifyProviderName(event.source))}</span>
|
||||
</div>
|
||||
<div class="event-footer">
|
||||
<span class="pill ${event.is_ticket_purchased ? "success" : "warning"}">
|
||||
${event.is_ticket_purchased ? "Ticket markiert" : "ohne Ticket"}
|
||||
</span>
|
||||
<div class="event-actions">
|
||||
${
|
||||
event.ticket_url
|
||||
? `<a class="action-button event-link" href="${escapeHtml(event.ticket_url)}" target="_blank" rel="noreferrer">Tickets</a>`
|
||||
: ""
|
||||
}
|
||||
<button
|
||||
class="action-button ${event.is_ticket_purchased ? "" : "success"}"
|
||||
data-action="toggle-ticket"
|
||||
data-id="${event.id}"
|
||||
data-value="${event.is_ticket_purchased ? "false" : "true"}"
|
||||
>
|
||||
${event.is_ticket_purchased ? "Ticket entfernen" : "Ticket gekauft"}
|
||||
</button>
|
||||
<button
|
||||
class="action-button danger"
|
||||
data-action="delete-event"
|
||||
data-id="${event.id}"
|
||||
>
|
||||
Loeschen
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
<div class="event-actions">
|
||||
<button
|
||||
class="action-button ${event.is_ticket_purchased ? "" : "success"}"
|
||||
data-action="toggle-ticket"
|
||||
data-id="${event.id}"
|
||||
data-value="${event.is_ticket_purchased ? "false" : "true"}"
|
||||
>
|
||||
${event.is_ticket_purchased ? "Ticket entfernen" : "Ticket gekauft"}
|
||||
</button>
|
||||
<button
|
||||
class="action-button danger"
|
||||
data-action="delete-event"
|
||||
data-id="${event.id}"
|
||||
>
|
||||
Loeschen
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
<div class="event-meta">
|
||||
<span><strong>Datum:</strong> ${escapeHtml(formatDate(event.event_date))}</span>
|
||||
<span><strong>Venue:</strong> ${escapeHtml(event.venue_name || "unbekannt")}</span>
|
||||
<span><strong>Quelle:</strong> ${escapeHtml(event.source)}</span>
|
||||
</div>
|
||||
${
|
||||
event.ticket_url
|
||||
? `<p><a class="event-link" href="${escapeHtml(event.ticket_url)}" target="_blank" rel="noreferrer">Ticketlink oeffnen</a></p>`
|
||||
: ""
|
||||
}
|
||||
</article>
|
||||
`
|
||||
)
|
||||
|
||||
@@ -11,10 +11,10 @@
|
||||
--success: #245e3f;
|
||||
--warning: #8d5a13;
|
||||
--danger: #8a2f2f;
|
||||
--shadow: 0 22px 70px rgba(96, 64, 24, 0.14);
|
||||
--radius-lg: 28px;
|
||||
--radius-md: 18px;
|
||||
--radius-sm: 12px;
|
||||
--shadow: 0 16px 42px rgba(72, 55, 36, 0.11);
|
||||
--radius-lg: 8px;
|
||||
--radius-md: 8px;
|
||||
--radius-sm: 8px;
|
||||
--mono: "IBM Plex Mono", monospace;
|
||||
--sans: "Space Grotesk", sans-serif;
|
||||
}
|
||||
@@ -29,8 +29,6 @@ body {
|
||||
font-family: var(--sans);
|
||||
color: var(--text);
|
||||
background:
|
||||
radial-gradient(circle at top left, rgba(194, 77, 44, 0.18), transparent 28%),
|
||||
radial-gradient(circle at 85% 18%, rgba(36, 94, 63, 0.14), transparent 22%),
|
||||
linear-gradient(135deg, var(--bg), #f7f2ea 48%, var(--bg-accent));
|
||||
}
|
||||
|
||||
@@ -49,14 +47,14 @@ body::before {
|
||||
.page-shell {
|
||||
width: min(1280px, calc(100vw - 32px));
|
||||
margin: 0 auto;
|
||||
padding: 32px 0 56px;
|
||||
padding: 24px 0 44px;
|
||||
}
|
||||
|
||||
.hero {
|
||||
display: grid;
|
||||
grid-template-columns: 1.6fr 1fr;
|
||||
gap: 24px;
|
||||
margin-bottom: 24px;
|
||||
gap: 16px;
|
||||
margin-bottom: 16px;
|
||||
}
|
||||
|
||||
.hero-copy,
|
||||
@@ -69,7 +67,7 @@ body::before {
|
||||
}
|
||||
|
||||
.hero-copy {
|
||||
padding: 36px;
|
||||
padding: 28px;
|
||||
border-radius: var(--radius-lg);
|
||||
}
|
||||
|
||||
@@ -93,36 +91,36 @@ p {
|
||||
}
|
||||
|
||||
h1 {
|
||||
font-size: clamp(2.5rem, 5vw, 4.9rem);
|
||||
line-height: 0.95;
|
||||
max-width: 11ch;
|
||||
font-size: clamp(2.2rem, 4vw, 3.8rem);
|
||||
line-height: 1;
|
||||
max-width: 14ch;
|
||||
}
|
||||
|
||||
.hero-text {
|
||||
max-width: 58ch;
|
||||
margin-top: 18px;
|
||||
font-size: 1.05rem;
|
||||
line-height: 1.6;
|
||||
margin-top: 14px;
|
||||
font-size: 1rem;
|
||||
line-height: 1.5;
|
||||
color: var(--muted);
|
||||
}
|
||||
|
||||
.hero-actions {
|
||||
display: flex;
|
||||
flex-wrap: wrap;
|
||||
gap: 12px;
|
||||
margin-top: 26px;
|
||||
gap: 10px;
|
||||
margin-top: 20px;
|
||||
}
|
||||
|
||||
.status-panel {
|
||||
display: grid;
|
||||
grid-template-columns: repeat(2, minmax(0, 1fr));
|
||||
gap: 14px;
|
||||
padding: 20px;
|
||||
gap: 10px;
|
||||
padding: 14px;
|
||||
border-radius: var(--radius-lg);
|
||||
}
|
||||
|
||||
.stat-card {
|
||||
padding: 18px;
|
||||
padding: 14px;
|
||||
background: rgba(255, 250, 242, 0.86);
|
||||
border-radius: var(--radius-md);
|
||||
border: 1px solid rgba(46, 39, 30, 0.08);
|
||||
@@ -130,7 +128,7 @@ h1 {
|
||||
|
||||
.stat-card strong {
|
||||
display: block;
|
||||
font-size: 2rem;
|
||||
font-size: 1.7rem;
|
||||
line-height: 1.1;
|
||||
}
|
||||
|
||||
@@ -151,12 +149,12 @@ h1 {
|
||||
|
||||
.dashboard-grid {
|
||||
display: grid;
|
||||
grid-template-columns: repeat(2, minmax(0, 1fr));
|
||||
gap: 24px;
|
||||
grid-template-columns: minmax(360px, 0.9fr) minmax(420px, 1.1fr);
|
||||
gap: 16px;
|
||||
}
|
||||
|
||||
.panel {
|
||||
padding: 24px;
|
||||
padding: 18px;
|
||||
border-radius: var(--radius-lg);
|
||||
}
|
||||
|
||||
@@ -168,8 +166,8 @@ h1 {
|
||||
display: flex;
|
||||
align-items: start;
|
||||
justify-content: space-between;
|
||||
gap: 18px;
|
||||
margin-bottom: 18px;
|
||||
gap: 14px;
|
||||
margin-bottom: 14px;
|
||||
}
|
||||
|
||||
.panel-tools {
|
||||
@@ -180,8 +178,8 @@ h1 {
|
||||
.watch-form {
|
||||
display: grid;
|
||||
grid-template-columns: repeat(2, minmax(0, 1fr));
|
||||
gap: 14px;
|
||||
margin-bottom: 20px;
|
||||
gap: 10px;
|
||||
margin-bottom: 16px;
|
||||
}
|
||||
|
||||
.full-width {
|
||||
@@ -206,7 +204,7 @@ input,
|
||||
select,
|
||||
textarea {
|
||||
width: 100%;
|
||||
padding: 14px 16px;
|
||||
padding: 10px 12px;
|
||||
border-radius: var(--radius-sm);
|
||||
border: 1px solid rgba(46, 39, 30, 0.16);
|
||||
background: rgba(255, 255, 255, 0.88);
|
||||
@@ -227,8 +225,8 @@ button,
|
||||
display: inline-flex;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
min-height: 48px;
|
||||
padding: 0 18px;
|
||||
min-height: 40px;
|
||||
padding: 0 14px;
|
||||
border: 0;
|
||||
border-radius: 999px;
|
||||
text-decoration: none;
|
||||
@@ -258,11 +256,11 @@ button:hover,
|
||||
.notification-list,
|
||||
.provider-status-list {
|
||||
display: grid;
|
||||
gap: 14px;
|
||||
gap: 10px;
|
||||
}
|
||||
|
||||
.empty-state {
|
||||
padding: 28px;
|
||||
padding: 20px;
|
||||
border-radius: var(--radius-md);
|
||||
border: 1px dashed rgba(46, 39, 30, 0.18);
|
||||
color: var(--muted);
|
||||
@@ -292,7 +290,7 @@ button:hover,
|
||||
display: flex;
|
||||
align-items: start;
|
||||
justify-content: space-between;
|
||||
gap: 16px;
|
||||
gap: 12px;
|
||||
}
|
||||
|
||||
.pill-row,
|
||||
@@ -302,14 +300,14 @@ button:hover,
|
||||
.notification-meta {
|
||||
display: flex;
|
||||
flex-wrap: wrap;
|
||||
gap: 8px;
|
||||
gap: 6px;
|
||||
}
|
||||
|
||||
.pill {
|
||||
display: inline-flex;
|
||||
align-items: center;
|
||||
min-height: 28px;
|
||||
padding: 0 10px;
|
||||
min-height: 24px;
|
||||
padding: 0 8px;
|
||||
border-radius: 999px;
|
||||
background: rgba(194, 77, 44, 0.09);
|
||||
color: var(--primary-dark);
|
||||
@@ -377,47 +375,74 @@ button:hover,
|
||||
.event-card p,
|
||||
.notification-card p,
|
||||
.provider-status-card p {
|
||||
margin-top: 12px;
|
||||
line-height: 1.55;
|
||||
margin-top: 8px;
|
||||
line-height: 1.45;
|
||||
}
|
||||
|
||||
.watch-note {
|
||||
color: var(--muted);
|
||||
}
|
||||
|
||||
.source-list {
|
||||
display: grid;
|
||||
gap: 10px;
|
||||
margin-top: 16px;
|
||||
gap: 8px;
|
||||
margin-top: 12px;
|
||||
}
|
||||
|
||||
.source-row {
|
||||
display: flex;
|
||||
display: grid;
|
||||
grid-template-columns: minmax(0, 1fr) auto;
|
||||
align-items: start;
|
||||
justify-content: space-between;
|
||||
gap: 16px;
|
||||
padding: 14px;
|
||||
gap: 12px;
|
||||
padding: 10px;
|
||||
border-radius: var(--radius-sm);
|
||||
background: rgba(46, 39, 30, 0.05);
|
||||
}
|
||||
|
||||
.source-content {
|
||||
min-width: 0;
|
||||
}
|
||||
|
||||
.source-title-row {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 8px;
|
||||
}
|
||||
|
||||
.source-row a {
|
||||
display: block;
|
||||
max-width: 46ch;
|
||||
margin: 4px 0 8px;
|
||||
max-width: 100%;
|
||||
margin: 4px 0;
|
||||
overflow-wrap: anywhere;
|
||||
color: var(--primary-dark);
|
||||
font-size: 0.9rem;
|
||||
}
|
||||
|
||||
.source-message {
|
||||
margin: 0;
|
||||
color: var(--muted);
|
||||
font-size: 0.9rem;
|
||||
}
|
||||
|
||||
.source-form {
|
||||
display: grid;
|
||||
grid-template-columns: 0.7fr 1.4fr auto;
|
||||
gap: 10px;
|
||||
margin-top: 14px;
|
||||
gap: 8px;
|
||||
margin-top: 10px;
|
||||
}
|
||||
|
||||
.action-button {
|
||||
min-height: 38px;
|
||||
padding: 0 14px;
|
||||
display: inline-flex;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
min-height: 32px;
|
||||
padding: 0 10px;
|
||||
border-radius: 999px;
|
||||
background: rgba(46, 39, 30, 0.08);
|
||||
color: var(--text);
|
||||
text-decoration: none;
|
||||
white-space: nowrap;
|
||||
font-size: 0.92rem;
|
||||
}
|
||||
|
||||
.action-button.danger {
|
||||
@@ -430,16 +455,67 @@ button:hover,
|
||||
color: var(--success);
|
||||
}
|
||||
|
||||
.event-card {
|
||||
display: grid;
|
||||
grid-template-columns: 112px minmax(0, 1fr);
|
||||
gap: 14px;
|
||||
padding: 14px;
|
||||
}
|
||||
|
||||
.event-date-badge {
|
||||
display: grid;
|
||||
align-content: center;
|
||||
min-height: 86px;
|
||||
padding: 10px;
|
||||
border-radius: var(--radius-sm);
|
||||
background: rgba(194, 77, 44, 0.1);
|
||||
color: var(--primary-dark);
|
||||
}
|
||||
|
||||
.event-date-badge strong {
|
||||
font-size: 1rem;
|
||||
line-height: 1.15;
|
||||
}
|
||||
|
||||
.event-date-badge span {
|
||||
margin-top: 4px;
|
||||
color: var(--muted);
|
||||
font-family: var(--mono);
|
||||
font-size: 0.78rem;
|
||||
}
|
||||
|
||||
.event-content {
|
||||
min-width: 0;
|
||||
}
|
||||
|
||||
.event-header h3 {
|
||||
overflow-wrap: anywhere;
|
||||
}
|
||||
|
||||
.event-meta,
|
||||
.notification-meta {
|
||||
margin-top: 14px;
|
||||
margin-top: 8px;
|
||||
color: var(--muted);
|
||||
font-size: 0.94rem;
|
||||
font-size: 0.9rem;
|
||||
}
|
||||
|
||||
.event-meta span:not(:last-child)::after {
|
||||
content: "/";
|
||||
margin-left: 8px;
|
||||
color: rgba(101, 89, 77, 0.52);
|
||||
}
|
||||
|
||||
.event-footer {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: space-between;
|
||||
gap: 10px;
|
||||
margin-top: 12px;
|
||||
}
|
||||
|
||||
.event-link {
|
||||
color: var(--primary-dark);
|
||||
text-decoration-thickness: 2px;
|
||||
background: rgba(194, 77, 44, 0.09);
|
||||
}
|
||||
|
||||
.toast {
|
||||
@@ -514,4 +590,30 @@ button:hover,
|
||||
.notification-header {
|
||||
flex-direction: column;
|
||||
}
|
||||
|
||||
.source-row,
|
||||
.event-card {
|
||||
grid-template-columns: 1fr;
|
||||
}
|
||||
|
||||
.event-date-badge {
|
||||
min-height: auto;
|
||||
}
|
||||
|
||||
.event-footer,
|
||||
.source-title-row {
|
||||
align-items: flex-start;
|
||||
flex-direction: column;
|
||||
}
|
||||
|
||||
.event-actions,
|
||||
.action-row,
|
||||
.panel-tools {
|
||||
width: 100%;
|
||||
}
|
||||
|
||||
.event-actions .action-button,
|
||||
.action-row .action-button {
|
||||
flex: 1 1 140px;
|
||||
}
|
||||
}
|
||||
|
||||
+30
-1
@@ -1,8 +1,10 @@
|
||||
import base64
|
||||
import secrets
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
|
||||
from fastapi import Depends, FastAPI, HTTPException
|
||||
from fastapi.responses import FileResponse
|
||||
from fastapi.responses import FileResponse, Response
|
||||
from fastapi.staticfiles import StaticFiles
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
@@ -44,6 +46,33 @@ static_dir = frontend_dir / "static"
|
||||
app.mount("/static", StaticFiles(directory=static_dir), name="static")
|
||||
|
||||
|
||||
@app.middleware("http")
|
||||
async def require_basic_auth(request, call_next):
|
||||
if not settings.auth_username or not settings.auth_password:
|
||||
return await call_next(request)
|
||||
|
||||
authorization = request.headers.get("authorization", "")
|
||||
scheme, _, credentials = authorization.partition(" ")
|
||||
if scheme.lower() == "basic" and credentials:
|
||||
try:
|
||||
decoded = base64.b64decode(credentials).decode("utf-8")
|
||||
except (ValueError, UnicodeDecodeError):
|
||||
decoded = ""
|
||||
|
||||
username, separator, password = decoded.partition(":")
|
||||
if (
|
||||
separator
|
||||
and secrets.compare_digest(username, settings.auth_username)
|
||||
and secrets.compare_digest(password, settings.auth_password)
|
||||
):
|
||||
return await call_next(request)
|
||||
|
||||
return Response(
|
||||
status_code=401,
|
||||
headers={"WWW-Authenticate": 'Basic realm="eventlens"'},
|
||||
)
|
||||
|
||||
|
||||
@app.on_event("startup")
|
||||
def startup():
|
||||
Base.metadata.create_all(bind=engine)
|
||||
|
||||
+38
-4
@@ -42,7 +42,7 @@ def list_watch_items(db: Session) -> list[WatchItem]:
|
||||
|
||||
|
||||
def list_events(db: Session) -> list[TrackedEvent]:
|
||||
events = list(db.scalars(select(TrackedEvent).order_by(desc(TrackedEvent.event_date))))
|
||||
events = list(db.scalars(select(TrackedEvent)))
|
||||
deduped: list[TrackedEvent] = []
|
||||
|
||||
for event in events:
|
||||
@@ -61,7 +61,16 @@ def list_events(db: Session) -> list[TrackedEvent]:
|
||||
if is_preferred_event(event, deduped[duplicate_index]):
|
||||
deduped[duplicate_index] = event
|
||||
|
||||
return deduped
|
||||
return sorted(deduped, key=event_sort_key)
|
||||
|
||||
|
||||
def event_sort_key(event: TrackedEvent):
|
||||
today = datetime.utcnow().date()
|
||||
if event.event_date is None:
|
||||
return (1, datetime.max)
|
||||
if event.event_date.date() >= today:
|
||||
return (0, event.event_date)
|
||||
return (2, datetime.max - (event.event_date - datetime.min))
|
||||
|
||||
|
||||
def list_notifications(db: Session):
|
||||
@@ -269,6 +278,27 @@ def upsert_event(
|
||||
return tracked_event, is_new
|
||||
|
||||
|
||||
def prune_stale_source_events(
|
||||
db: Session,
|
||||
watch_item: WatchItem,
|
||||
source_name: str,
|
||||
seen_external_ids: set[str],
|
||||
) -> int:
|
||||
stmt = select(TrackedEvent).where(
|
||||
TrackedEvent.watch_item_id == watch_item.id,
|
||||
TrackedEvent.source == source_name,
|
||||
TrackedEvent.is_ticket_purchased.is_(False),
|
||||
)
|
||||
stale_events = [
|
||||
event
|
||||
for event in db.scalars(stmt)
|
||||
if event.external_id not in seen_external_ids
|
||||
]
|
||||
for event in stale_events:
|
||||
db.delete(event)
|
||||
return len(stale_events)
|
||||
|
||||
|
||||
def run_sync(db: Session) -> SyncResult:
|
||||
providers = get_providers()
|
||||
source_scanner = SourceScanner()
|
||||
@@ -288,12 +318,14 @@ def run_sync(db: Session) -> SyncResult:
|
||||
for watch_item in active_items:
|
||||
active_sources = [source for source in watch_item.sources if source.is_active]
|
||||
for source in active_sources:
|
||||
source_name = f"source:{source.id}"
|
||||
seen_source_event_ids: set[str] = set()
|
||||
try:
|
||||
events = source_scanner.scan(watch_item, source)
|
||||
source.last_status = (
|
||||
SourceStatusType.ok if events else SourceStatusType.no_match
|
||||
)
|
||||
source.last_message = (
|
||||
source.last_message = source_scanner.last_message or (
|
||||
f"{len(events)} passende Events gefunden."
|
||||
if events
|
||||
else "Keine passenden Events auf dieser Quelle gefunden."
|
||||
@@ -317,9 +349,10 @@ def run_sync(db: Session) -> SyncResult:
|
||||
tracked_event, is_new = upsert_event(
|
||||
db=db,
|
||||
watch_item=watch_item,
|
||||
provider_name=f"source:{source.id}",
|
||||
provider_name=source_name,
|
||||
event_data=event_data,
|
||||
)
|
||||
seen_source_event_ids.add(tracked_event.external_id)
|
||||
if is_new:
|
||||
new_events += 1
|
||||
else:
|
||||
@@ -353,6 +386,7 @@ def run_sync(db: Session) -> SyncResult:
|
||||
notifications_skipped += 1
|
||||
|
||||
db.add(source)
|
||||
prune_stale_source_events(db, watch_item, source_name, seen_source_event_ids)
|
||||
db.commit()
|
||||
|
||||
for provider in providers:
|
||||
|
||||
+246
-21
@@ -1,8 +1,9 @@
|
||||
import json
|
||||
import re
|
||||
from datetime import datetime
|
||||
from difflib import SequenceMatcher
|
||||
from html import unescape
|
||||
from urllib.parse import urljoin
|
||||
from urllib.parse import urljoin, urlparse
|
||||
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
@@ -40,6 +41,20 @@ MONTH_ALIASES = {
|
||||
"dezember": 12,
|
||||
}
|
||||
|
||||
FOLLOW_LINK_KEYWORDS = (
|
||||
"event",
|
||||
"gig",
|
||||
"konzert",
|
||||
"live",
|
||||
"rausgegangen",
|
||||
"show",
|
||||
"termin",
|
||||
"ticket",
|
||||
"tour",
|
||||
)
|
||||
|
||||
MAX_FOLLOWED_LINKS = 6
|
||||
|
||||
|
||||
class SourceScanner:
|
||||
headers = {
|
||||
@@ -48,7 +63,11 @@ class SourceScanner:
|
||||
"Accept-Language": "de-DE,de;q=0.9,en;q=0.7",
|
||||
}
|
||||
|
||||
def __init__(self):
|
||||
self.last_message = ""
|
||||
|
||||
def scan(self, watch_item: WatchItem, source: WatchSource) -> list[dict]:
|
||||
self.last_message = ""
|
||||
response = requests.get(
|
||||
source.url,
|
||||
headers=self.headers,
|
||||
@@ -58,16 +77,30 @@ class SourceScanner:
|
||||
|
||||
content_type = response.headers.get("content-type", "")
|
||||
if "application/json" in content_type:
|
||||
return self._scan_json(watch_item, source, response.json())
|
||||
results = self._scan_json(watch_item, source, response.json())
|
||||
if not self.last_message:
|
||||
self.last_message = self._build_scan_message(results, "JSON-Daten ausgewertet.")
|
||||
return results
|
||||
|
||||
return self._scan_html(watch_item, source, response.text)
|
||||
results = self._scan_html(watch_item, source, response.text, response.url)
|
||||
if not self.last_message:
|
||||
self.last_message = self._build_scan_message(results, "HTML-Seite ausgewertet.")
|
||||
return results
|
||||
|
||||
def _scan_json(self, watch_item: WatchItem, source: WatchSource, payload) -> list[dict]:
|
||||
events = self._extract_jsonld_events(payload)
|
||||
return self._events_from_jsonld(watch_item, source, events)
|
||||
|
||||
def _scan_html(self, watch_item: WatchItem, source: WatchSource, html: str) -> list[dict]:
|
||||
def _scan_html(
|
||||
self,
|
||||
watch_item: WatchItem,
|
||||
source: WatchSource,
|
||||
html: str,
|
||||
base_url: str | None = None,
|
||||
follow_links: bool = True,
|
||||
) -> list[dict]:
|
||||
soup = BeautifulSoup(html, "html.parser")
|
||||
source_url = base_url or source.url
|
||||
jsonld_events = []
|
||||
|
||||
for script in soup.find_all("script", type="application/ld+json"):
|
||||
@@ -84,7 +117,16 @@ class SourceScanner:
|
||||
if jsonld_results:
|
||||
return jsonld_results
|
||||
|
||||
return self._events_from_html_text(watch_item, source, soup)
|
||||
html_results = self._events_from_html_text(watch_item, source, soup, source_url)
|
||||
if html_results:
|
||||
return html_results
|
||||
|
||||
if follow_links:
|
||||
linked_results = self._events_from_linked_pages(watch_item, source, soup, source_url)
|
||||
if linked_results:
|
||||
return linked_results
|
||||
|
||||
return []
|
||||
|
||||
def _extract_jsonld_events(self, payload) -> list[dict]:
|
||||
events: list[dict] = []
|
||||
@@ -119,22 +161,28 @@ class SourceScanner:
|
||||
) -> list[dict]:
|
||||
results: list[dict] = []
|
||||
normalized_term = normalize_search_text(watch_item.name)
|
||||
matching_name_count = 0
|
||||
outside_region_count = 0
|
||||
past_count = 0
|
||||
|
||||
for event in events:
|
||||
title = event.get("name") or ""
|
||||
performers = self._extract_performer_names(event)
|
||||
haystack = normalize_search_text(" ".join([title] + performers))
|
||||
if normalized_term not in haystack:
|
||||
if not self._term_matches_normalized(normalized_term, haystack):
|
||||
continue
|
||||
matching_name_count += 1
|
||||
|
||||
location = event.get("location") or {}
|
||||
address = location.get("address") or {}
|
||||
city = address.get("addressLocality") or location.get("name")
|
||||
if watch_item.region_scope == RegionScope.hamburg and normalize_search_text(city) != "hamburg":
|
||||
outside_region_count += 1
|
||||
continue
|
||||
|
||||
event_date = self._parse_datetime(event.get("startDate"))
|
||||
if event_date and event_date.date() < datetime.utcnow().date():
|
||||
past_count += 1
|
||||
continue
|
||||
ticket_url = event.get("url") or source.url
|
||||
|
||||
@@ -153,6 +201,22 @@ class SourceScanner:
|
||||
}
|
||||
)
|
||||
|
||||
if results:
|
||||
self.last_message = f"{len(results)} passende Events in strukturierten Daten gefunden."
|
||||
elif outside_region_count:
|
||||
self.last_message = (
|
||||
f"Strukturierte Daten gefunden, aber kein Termin fuer '{watch_item.name}' "
|
||||
"in Hamburg."
|
||||
)
|
||||
elif past_count:
|
||||
self.last_message = (
|
||||
f"Strukturierte Daten gefunden, aber nur vergangene Termine fuer '{watch_item.name}'."
|
||||
)
|
||||
elif matching_name_count:
|
||||
self.last_message = (
|
||||
f"Strukturierte Daten gefunden, aber keine passenden Termine fuer '{watch_item.name}'."
|
||||
)
|
||||
|
||||
return results
|
||||
|
||||
def _events_from_html_text(
|
||||
@@ -160,30 +224,41 @@ class SourceScanner:
|
||||
watch_item: WatchItem,
|
||||
source: WatchSource,
|
||||
soup: BeautifulSoup,
|
||||
base_url: str | None = None,
|
||||
) -> list[dict]:
|
||||
text = soup.get_text(" ", strip=True)
|
||||
normalized_text = normalize_search_text(text)
|
||||
normalized_term = normalize_search_text(watch_item.name)
|
||||
if normalized_term not in normalized_text:
|
||||
if not self._term_matches_normalized(normalized_term, normalized_text):
|
||||
self.last_message = (
|
||||
f"Seite erreichbar, aber der Name '{watch_item.name}' wurde nicht gefunden."
|
||||
)
|
||||
return []
|
||||
|
||||
results: list[dict] = []
|
||||
seen_keys: set[str] = set()
|
||||
contexts_with_date = 0
|
||||
past_contexts = 0
|
||||
outside_region_contexts = 0
|
||||
for context in self._find_matching_contexts(soup, watch_item):
|
||||
context_text = context.get_text(" ", strip=True)
|
||||
event_date = self._find_nearest_date(context_text, watch_item.name)
|
||||
if event_date is None:
|
||||
continue
|
||||
contexts_with_date += 1
|
||||
if event_date.date() < datetime.utcnow().date():
|
||||
past_contexts += 1
|
||||
continue
|
||||
if (
|
||||
watch_item.region_scope == RegionScope.hamburg
|
||||
and "hamburg" not in normalize_search_text(context_text)
|
||||
):
|
||||
outside_region_contexts += 1
|
||||
continue
|
||||
|
||||
title = self._find_title(context, watch_item.name)
|
||||
link = self._find_nearest_link(context, watch_item.name, source.url) or source.url
|
||||
context_url = base_url or source.url
|
||||
link = self._find_nearest_link(context, watch_item.name, context_url) or context_url
|
||||
key = f"{source.id}:{normalize_search_text(title)}:{event_date.date().isoformat()}"
|
||||
if key in seen_keys:
|
||||
continue
|
||||
@@ -201,15 +276,118 @@ class SourceScanner:
|
||||
"ticket_url": link,
|
||||
"image_url": None,
|
||||
"raw_payload": {
|
||||
"source_url": source.url,
|
||||
"source_url": context_url,
|
||||
"parser": "html_text",
|
||||
"context": context_text[:1000],
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
if results:
|
||||
self.last_message = f"{len(results)} passende Events gefunden."
|
||||
elif outside_region_contexts:
|
||||
self.last_message = (
|
||||
f"Seite erreichbar, Termine fuer '{watch_item.name}' gefunden, "
|
||||
"aber keiner in Hamburg."
|
||||
)
|
||||
elif past_contexts:
|
||||
self.last_message = (
|
||||
f"Seite erreichbar, aber nur vergangene Termine fuer '{watch_item.name}' gefunden."
|
||||
)
|
||||
elif contexts_with_date == 0:
|
||||
self.last_message = (
|
||||
f"Seite erreichbar, Name '{watch_item.name}' gefunden, "
|
||||
"aber keine auswertbaren Termine."
|
||||
)
|
||||
|
||||
return results
|
||||
|
||||
def _events_from_linked_pages(
|
||||
self,
|
||||
watch_item: WatchItem,
|
||||
source: WatchSource,
|
||||
soup: BeautifulSoup,
|
||||
base_url: str,
|
||||
) -> list[dict]:
|
||||
results: list[dict] = []
|
||||
seen_event_keys: set[str] = set()
|
||||
checked_links = 0
|
||||
|
||||
for link_url in self._candidate_follow_links(soup, base_url):
|
||||
try:
|
||||
response = requests.get(link_url, headers=self.headers, timeout=30)
|
||||
response.raise_for_status()
|
||||
except requests.RequestException:
|
||||
continue
|
||||
|
||||
checked_links += 1
|
||||
content_type = response.headers.get("content-type", "")
|
||||
if "application/json" in content_type:
|
||||
linked_results = self._scan_json(watch_item, source, response.json())
|
||||
elif "text/html" in content_type or "application/xhtml+xml" in content_type or not content_type:
|
||||
linked_results = self._scan_html(
|
||||
watch_item,
|
||||
source,
|
||||
response.text,
|
||||
response.url,
|
||||
follow_links=False,
|
||||
)
|
||||
else:
|
||||
continue
|
||||
|
||||
for event in linked_results:
|
||||
key = event["external_id"]
|
||||
if key in seen_event_keys:
|
||||
continue
|
||||
seen_event_keys.add(key)
|
||||
results.append(event)
|
||||
|
||||
if results:
|
||||
self.last_message = f"{len(results)} passende Events auf verlinkten Seiten gefunden."
|
||||
elif checked_links:
|
||||
self.last_message = (
|
||||
f"Seite erreichbar, {checked_links} relevante Links geprueft, "
|
||||
"aber keine passenden Events gefunden."
|
||||
)
|
||||
|
||||
return results
|
||||
|
||||
def _build_scan_message(self, results: list[dict], fallback: str) -> str:
|
||||
if results:
|
||||
return f"{len(results)} passende Events gefunden."
|
||||
return f"{fallback} Keine passenden Events gefunden."
|
||||
|
||||
def _candidate_follow_links(self, soup: BeautifulSoup, base_url: str) -> list[str]:
|
||||
base_host = urlparse(base_url).netloc.lower()
|
||||
scored_links: list[tuple[int, str]] = []
|
||||
seen_urls: set[str] = set()
|
||||
|
||||
for link in soup.find_all("a", href=True):
|
||||
href = link["href"].strip()
|
||||
if not href or href.startswith(("#", "mailto:", "tel:", "javascript:")):
|
||||
continue
|
||||
|
||||
link_url = urljoin(base_url, href)
|
||||
if link_url in seen_urls or link_url.rstrip("/") == base_url.rstrip("/"):
|
||||
continue
|
||||
seen_urls.add(link_url)
|
||||
|
||||
parsed = urlparse(link_url)
|
||||
if parsed.scheme not in {"http", "https"}:
|
||||
continue
|
||||
|
||||
link_text = link.get_text(" ", strip=True)
|
||||
haystack = normalize_search_text(" ".join([link_text, href, parsed.netloc, parsed.path]))
|
||||
keyword_hits = sum(1 for keyword in FOLLOW_LINK_KEYWORDS if keyword in haystack)
|
||||
if keyword_hits == 0:
|
||||
continue
|
||||
|
||||
same_host_bonus = 1 if parsed.netloc.lower() == base_host else 0
|
||||
scored_links.append((keyword_hits + same_host_bonus, link_url))
|
||||
|
||||
scored_links.sort(key=lambda item: item[0], reverse=True)
|
||||
return [link_url for _, link_url in scored_links[:MAX_FOLLOWED_LINKS]]
|
||||
|
||||
def _extract_performer_names(self, event: dict) -> list[str]:
|
||||
performer = event.get("performer") or event.get("performers")
|
||||
if isinstance(performer, dict):
|
||||
@@ -252,16 +430,22 @@ class SourceScanner:
|
||||
end = min(len(text), term_index + 500)
|
||||
search_area = text[start:end]
|
||||
|
||||
candidates: list[datetime] = []
|
||||
explicit_candidates: list[datetime] = []
|
||||
inferred_candidates: list[datetime] = []
|
||||
|
||||
for pattern in (
|
||||
r"\b(\d{1,2}\.\d{1,2}\.\d{4})\b",
|
||||
r"\b(\d{1,2}\.\d{1,2}\.\d{2})\b",
|
||||
r"\b(\d{1,2}\.\d{1,2}\.)\b",
|
||||
r"\b(\d{1,2}\.\d{1,2}\.\d{2})(?!\d)",
|
||||
):
|
||||
for match in re.finditer(pattern, search_area):
|
||||
parsed = self._parse_german_date(match.group(1))
|
||||
if parsed:
|
||||
candidates.append(parsed)
|
||||
explicit_candidates.append(parsed)
|
||||
|
||||
for match in re.finditer(r"\b(\d{1,2}\.\d{1,2}\.)(?!\d)", search_area):
|
||||
parsed = self._parse_german_date(match.group(1))
|
||||
if parsed:
|
||||
inferred_candidates.append(parsed)
|
||||
|
||||
month_name_pattern = (
|
||||
r"jan(?:uar)?|feb(?:ruar)?|m(?:ae|ä)r(?:z)?|apr(?:il)?|mai|jun(?:i)?|"
|
||||
@@ -274,7 +458,10 @@ class SourceScanner:
|
||||
):
|
||||
parsed = self._parse_named_month_date(match.group(1), match.group(2), match.group(3))
|
||||
if parsed:
|
||||
candidates.append(parsed)
|
||||
if match.group(3):
|
||||
explicit_candidates.append(parsed)
|
||||
else:
|
||||
inferred_candidates.append(parsed)
|
||||
for match in re.finditer(
|
||||
rf"\b({month_name_pattern})\.?\s+(\d{{1,2}})\.?\s*(\d{{4}})?\b",
|
||||
search_area,
|
||||
@@ -282,14 +469,27 @@ class SourceScanner:
|
||||
):
|
||||
parsed = self._parse_named_month_date(match.group(2), match.group(1), match.group(3))
|
||||
if parsed:
|
||||
candidates.append(parsed)
|
||||
if match.group(3):
|
||||
explicit_candidates.append(parsed)
|
||||
else:
|
||||
inferred_candidates.append(parsed)
|
||||
|
||||
if explicit_candidates:
|
||||
future_explicit_candidates = [
|
||||
candidate
|
||||
for candidate in explicit_candidates
|
||||
if candidate.date() >= datetime.utcnow().date()
|
||||
]
|
||||
if future_explicit_candidates:
|
||||
return sorted(future_explicit_candidates)[0]
|
||||
return sorted(explicit_candidates)[0]
|
||||
|
||||
future_candidates = [
|
||||
candidate for candidate in candidates if candidate.date() >= datetime.utcnow().date()
|
||||
candidate for candidate in inferred_candidates if candidate.date() >= datetime.utcnow().date()
|
||||
]
|
||||
if future_candidates:
|
||||
return sorted(future_candidates)[0]
|
||||
return sorted(candidates)[0] if candidates else None
|
||||
return sorted(inferred_candidates)[0] if inferred_candidates else None
|
||||
|
||||
def _parse_german_date(self, value: str) -> datetime | None:
|
||||
cleaned = value.strip()
|
||||
@@ -342,6 +542,7 @@ class SourceScanner:
|
||||
"[class*=event]",
|
||||
"[class*=termin]",
|
||||
"article",
|
||||
"a[href]",
|
||||
"tr",
|
||||
"li",
|
||||
".row",
|
||||
@@ -355,7 +556,7 @@ class SourceScanner:
|
||||
continue
|
||||
seen_nodes.add(id(node))
|
||||
text = node.get_text(" ", strip=True)
|
||||
if normalized_term not in normalize_search_text(text):
|
||||
if not self._term_matches_normalized(normalized_term, normalize_search_text(text)):
|
||||
continue
|
||||
if len(text) > 3500:
|
||||
continue
|
||||
@@ -380,7 +581,7 @@ class SourceScanner:
|
||||
normalized_term = normalize_search_text(term)
|
||||
candidates = []
|
||||
for node in soup.find_all(string=True):
|
||||
if normalized_term in normalize_search_text(str(node)):
|
||||
if self._term_matches_normalized(normalized_term, normalize_search_text(str(node))):
|
||||
parent = node.parent
|
||||
if parent is None:
|
||||
continue
|
||||
@@ -419,7 +620,7 @@ class SourceScanner:
|
||||
normalized_term = normalize_search_text(term)
|
||||
for heading in soup.find_all(["h1", "h2", "h3", "h4", "strong", "b", "a"]):
|
||||
title = heading.get_text(" ", strip=True)
|
||||
if normalized_term in normalize_search_text(title):
|
||||
if self._term_matches_normalized(normalized_term, normalize_search_text(title)):
|
||||
return title
|
||||
|
||||
text = soup.get_text(" ", strip=True)
|
||||
@@ -441,6 +642,30 @@ class SourceScanner:
|
||||
def _find_nearest_link(self, soup: BeautifulSoup, term: str, base_url: str) -> str | None:
|
||||
normalized_term = normalize_search_text(term)
|
||||
for link in soup.find_all("a", href=True):
|
||||
if normalized_term in normalize_search_text(link.get_text(" ", strip=True)):
|
||||
if self._term_matches_normalized(
|
||||
normalized_term,
|
||||
normalize_search_text(link.get_text(" ", strip=True)),
|
||||
):
|
||||
return urljoin(base_url, link["href"])
|
||||
return None
|
||||
|
||||
def _term_matches_normalized(self, normalized_term: str, normalized_text: str) -> bool:
|
||||
if not normalized_term or not normalized_text:
|
||||
return False
|
||||
if normalized_term in normalized_text:
|
||||
return True
|
||||
if len(normalized_term) < 8:
|
||||
return False
|
||||
|
||||
term_tokens = normalized_term.split()
|
||||
text_tokens = normalized_text.split()
|
||||
if not term_tokens or len(text_tokens) < len(term_tokens):
|
||||
return False
|
||||
|
||||
window_size = len(term_tokens)
|
||||
for index in range(len(text_tokens) - window_size + 1):
|
||||
candidate = " ".join(text_tokens[index : index + window_size])
|
||||
if SequenceMatcher(None, normalized_term, candidate).ratio() >= 0.9:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
@@ -3,7 +3,7 @@ server {
|
||||
server_name eventlens.example.com;
|
||||
|
||||
location / {
|
||||
proxy_pass http://127.0.0.1:8000;
|
||||
proxy_pass http://127.0.0.1:8001;
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
|
||||
Reference in New Issue
Block a user