"""News service — queries market_news from PostgreSQL via shared pool.""" from __future__ import annotations import asyncpg from typing import Any, Dict, List, Optional from server.db import get_pool # Sources whose articles should be categorised as "tech" when they have no # specific category or are filed under the generic "allgemein" bucket. TECH_SOURCES = {"Golem", "Heise", "Computerbase"} def _row_to_dict(row: asyncpg.Record) -> Dict[str, Any]: """Convert an asyncpg Record to a plain dictionary with JSON-safe values.""" d: Dict[str, Any] = dict(row) if "published_at" in d and d["published_at"] is not None: d["published_at"] = d["published_at"].isoformat() # Override category for known tech sources if d.get("source") in TECH_SOURCES and d.get("category") in (None, "allgemein"): d["category"] = "tech" return d async def get_news( limit: int = 20, offset: int = 0, category: Optional[str] = None, max_age_hours: int = 48, ) -> List[Dict[str, Any]]: """Fetch recent news articles from the market_news table.""" pool = await get_pool() params: List[Any] = [] param_idx = 1 base_query = ( "SELECT id, source, title, url, category, published_at " "FROM market_news " f"WHERE published_at > NOW() - INTERVAL '{int(max_age_hours)} hours'" ) if category is not None: if category == "tech": # "tech" is a virtual category — match tech sources with allgemein/NULL src_placeholders = ", ".join(f"${param_idx + i}" for i in range(len(TECH_SOURCES))) base_query += f" AND source IN ({src_placeholders}) AND (category IS NULL OR category = 'allgemein')" params.extend(sorted(TECH_SOURCES)) param_idx += len(TECH_SOURCES) else: # Exclude tech sources from "allgemein" so they don't appear twice if category == "allgemein": src_placeholders = ", ".join(f"${param_idx + i}" for i in range(len(TECH_SOURCES))) base_query += f" AND category = 'allgemein' AND source NOT IN ({src_placeholders})" params.extend(sorted(TECH_SOURCES)) param_idx += len(TECH_SOURCES) else: base_query += f" AND category = ${param_idx}" params.append(category) param_idx += 1 base_query += f" ORDER BY published_at DESC LIMIT ${param_idx} OFFSET ${param_idx + 1}" params.append(limit) params.append(offset) async with pool.acquire() as conn: rows = await conn.fetch(base_query, *params) return [_row_to_dict(row) for row in rows] async def get_news_count( max_age_hours: int = 48, category: Optional[str] = None, ) -> int: """Return the total count of recent news articles.""" pool = await get_pool() params: List[Any] = [] param_idx = 1 query = ( "SELECT COUNT(*) AS cnt " "FROM market_news " f"WHERE published_at > NOW() - INTERVAL '{int(max_age_hours)} hours'" ) if category is not None: if category == "tech": src_placeholders = ", ".join(f"${param_idx + i}" for i in range(len(TECH_SOURCES))) query += f" AND source IN ({src_placeholders}) AND (category IS NULL OR category = 'allgemein')" params.extend(sorted(TECH_SOURCES)) elif category == "allgemein": src_placeholders = ", ".join(f"${param_idx + i}" for i in range(len(TECH_SOURCES))) query += f" AND category = 'allgemein' AND source NOT IN ({src_placeholders})" params.extend(sorted(TECH_SOURCES)) else: query += f" AND category = ${param_idx}" params.append(category) async with pool.acquire() as conn: row = await conn.fetchrow(query, *params) return int(row["cnt"]) if row else 0 async def get_categories(max_age_hours: int = 48) -> List[str]: """Return distinct categories from recent news articles.""" pool = await get_pool() query = ( "SELECT DISTINCT category " "FROM market_news " f"WHERE published_at > NOW() - INTERVAL '{int(max_age_hours)} hours' " "AND category IS NOT NULL " "ORDER BY category" ) async with pool.acquire() as conn: rows = await conn.fetch(query) cats = [row["category"] for row in rows] # Inject "tech" if any tech source has articles if "allgemein" in cats and "tech" not in cats: cats.append("tech") cats.sort() return cats