From 4b2e5beaf6ec46585f1950a8bb4a0ba3332b9eb2 Mon Sep 17 00:00:00 2001
From: Sam <sam@openclaw.local>
Date: Tue, 3 Mar 2026 01:37:43 +0100
Subject: [PATCH] =?UTF-8?q?Fix:=20Tech-Kategorie=20f=C3=BCr=20Golem/Heise/?=
 =?UTF-8?q?Computerbase=20News?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Golem, Heise und Computerbase wurden als "allgemein" kategorisiert weil
die RSS-Feeds keine spezifische Kategorie liefern. Jetzt werden diese
Sources automatisch als "tech" kategorisiert.

- Source-basiertes Mapping in news_service._row_to_dict
- DB-Queries für category=tech filtern nach Source + allgemein
- "allgemein" Filter schließt Tech-Sources aus (kein Doppel)
- get_categories injiziert "tech" wenn Tech-Sources vorhanden

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 server/services/news_service.py | 47 ++++++++++++++++++++++++++++-----
 1 file changed, 41 insertions(+), 6 deletions(-)

diff --git a/server/services/news_service.py b/server/services/news_service.py
index 54cb216..cb24c6e 100644
--- a/server/services/news_service.py
+++ b/server/services/news_service.py
@@ -7,12 +7,19 @@ from typing import Any, Dict, List, Optional
 
 from server.db import get_pool
 
+# Sources whose articles should be categorised as "tech" when they have no
+# specific category or are filed under the generic "allgemein" bucket.
+TECH_SOURCES = {"Golem", "Heise", "Computerbase"}
+
 
 def _row_to_dict(row: asyncpg.Record) -> Dict[str, Any]:
     """Convert an asyncpg Record to a plain dictionary with JSON-safe values."""
     d: Dict[str, Any] = dict(row)
     if "published_at" in d and d["published_at"] is not None:
         d["published_at"] = d["published_at"].isoformat()
+    # Override category for known tech sources
+    if d.get("source") in TECH_SOURCES and d.get("category") in (None, "allgemein"):
+        d["category"] = "tech"
     return d
 
 
@@ -35,9 +42,23 @@ async def get_news(
     )
 
     if category is not None:
-        base_query += f" AND category = ${param_idx}"
-        params.append(category)
-        param_idx += 1
+        if category == "tech":
+            # "tech" is a virtual category — match tech sources with allgemein/NULL
+            src_placeholders = ", ".join(f"${param_idx + i}" for i in range(len(TECH_SOURCES)))
+            base_query += f" AND source IN ({src_placeholders}) AND (category IS NULL OR category = 'allgemein')"
+            params.extend(sorted(TECH_SOURCES))
+            param_idx += len(TECH_SOURCES)
+        else:
+            # Exclude tech sources from "allgemein" so they don't appear twice
+            if category == "allgemein":
+                src_placeholders = ", ".join(f"${param_idx + i}" for i in range(len(TECH_SOURCES)))
+                base_query += f" AND category = 'allgemein' AND source NOT IN ({src_placeholders})"
+                params.extend(sorted(TECH_SOURCES))
+                param_idx += len(TECH_SOURCES)
+            else:
+                base_query += f" AND category = ${param_idx}"
+                params.append(category)
+                param_idx += 1
 
     base_query += f" ORDER BY published_at DESC LIMIT ${param_idx} OFFSET ${param_idx + 1}"
     params.append(limit)
@@ -66,8 +87,17 @@ async def get_news_count(
     )
 
     if category is not None:
-        query += f" AND category = ${param_idx}"
-        params.append(category)
+        if category == "tech":
+            src_placeholders = ", ".join(f"${param_idx + i}" for i in range(len(TECH_SOURCES)))
+            query += f" AND source IN ({src_placeholders}) AND (category IS NULL OR category = 'allgemein')"
+            params.extend(sorted(TECH_SOURCES))
+        elif category == "allgemein":
+            src_placeholders = ", ".join(f"${param_idx + i}" for i in range(len(TECH_SOURCES)))
+            query += f" AND category = 'allgemein' AND source NOT IN ({src_placeholders})"
+            params.extend(sorted(TECH_SOURCES))
+        else:
+            query += f" AND category = ${param_idx}"
+            params.append(category)
 
     async with pool.acquire() as conn:
         row = await conn.fetchrow(query, *params)
@@ -90,4 +120,9 @@ async def get_categories(max_age_hours: int = 48) -> List[str]:
     async with pool.acquire() as conn:
         rows = await conn.fetch(query)
 
-    return [row["category"] for row in rows]
+    cats = [row["category"] for row in rows]
+    # Inject "tech" if any tech source has articles
+    if "allgemein" in cats and "tech" not in cats:
+        cats.append("tech")
+        cats.sort()
+    return cats