From 8bcdf676708a21670273a90920c1b514373e25db Mon Sep 17 00:00:00 2001
From: Wei Zang <goldlabel.apps@gmail.com>
Date: Mon, 13 Apr 2026 13:34:17 +0100
Subject: [PATCH 1/5] Handle linkedin_url input and use search_vector

Accept both linkedin_url and legacy linkedinUrl in request payloads and normalize to linkedin_url. Update error message to reference linkedin_url. Query logic now checks for a search_vector column and, if present, uses full-text search (plainto_tsquery) plus COALESCE(data->>'linkedin_url', data->>'linkedinUrl') to match stored data; otherwise fall back to previous prompt ILIKE / data match. Responses now return linkedin_url in the data payload for consistency and backwards-compatible lookup.
---
 app/api/prompt/linkedin.py | 54 +++++++++++++++++++++++++++++---------
 1 file changed, 42 insertions(+), 12 deletions(-)

diff --git a/app/api/prompt/linkedin.py b/app/api/prompt/linkedin.py
index a054b13..dfcd98b 100644
--- a/app/api/prompt/linkedin.py
+++ b/app/api/prompt/linkedin.py
@@ -9,10 +9,10 @@
 
 @router.post("/prompt/linkedin")
 def linkedin_prompt_success(payload: dict, api_key: str = Depends(get_api_key)) -> dict:
-    """POST /prompt/linkedin: return cached completion for linkedinUrl when available."""
-    linkedin_url = (payload.get("linkedinUrl") or "").strip()
+    """POST /prompt/linkedin: return cached completion for linkedin_url when available."""
+    linkedin_url = (payload.get("linkedin_url") or payload.get("linkedinUrl") or "").strip()
     if not linkedin_url:
-        raise HTTPException(status_code=400, detail="Missing 'linkedinUrl' in request body.")
+        raise HTTPException(status_code=400, detail="Missing 'linkedin_url' in request body.")
 
     conn = None
     cur = None
@@ -21,14 +21,44 @@ def linkedin_prompt_success(payload: dict, api_key: str = Depends(get_api_key))
         cur = conn.cursor()
         cur.execute(
             """
-            SELECT id, completion, time, model, data
-            FROM prompt
-            WHERE (data->>'linkedinUrl' = %s OR prompt ILIKE %s)
-            ORDER BY id DESC
-            LIMIT 1;
-            """,
-            (linkedin_url, f"%{linkedin_url}%"),
+            SELECT EXISTS (
+                SELECT 1
+                FROM information_schema.columns
+                WHERE table_schema = 'public'
+                  AND table_name = 'prompt'
+                  AND column_name = 'search_vector'
+            );
+            """
         )
+        exists_row = cur.fetchone()
+        has_search_vector = bool(exists_row and exists_row[0])
+
+        if has_search_vector:
+            cur.execute(
+                """
+                SELECT id, completion, time, model, data
+                FROM prompt
+                WHERE (
+                    COALESCE(data->>'linkedin_url', data->>'linkedinUrl') = %s
+                    OR search_vector @@ plainto_tsquery('english', %s)
+                    OR prompt ILIKE %s
+                )
+                ORDER BY id DESC
+                LIMIT 1;
+                """,
+                (linkedin_url, linkedin_url, f"%{linkedin_url}%"),
+            )
+        else:
+            cur.execute(
+                """
+                SELECT id, completion, time, model, data
+                FROM prompt
+                WHERE (COALESCE(data->>'linkedin_url', data->>'linkedinUrl') = %s OR prompt ILIKE %s)
+                ORDER BY id DESC
+                LIMIT 1;
+                """,
+                (linkedin_url, f"%{linkedin_url}%"),
+            )
         row = cur.fetchone()
 
         if row:
@@ -37,7 +67,7 @@ def linkedin_prompt_success(payload: dict, api_key: str = Depends(get_api_key))
                 "data": {
                     "cached": True,
                     "id": row[0],
-                    "linkedinUrl": linkedin_url,
+                    "linkedin_url": linkedin_url,
                     "completion": row[1],
                     "time": row[2].isoformat() if row[2] else None,
                     "model": row[3],
@@ -49,7 +79,7 @@ def linkedin_prompt_success(payload: dict, api_key: str = Depends(get_api_key))
             "meta": make_meta("warning", "LinkedIn URL not analysed yet"),
             "data": {
                 "cached": False,
-                "linkedinUrl": linkedin_url,
+                "linkedin_url": linkedin_url,
                 "completion": None,
             },
         }

From 5c5f427002be0068e12d4adfea6d2eea5382f7fb Mon Sep 17 00:00:00 2001
From: Wei Zang <goldlabel.apps@gmail.com>
Date: Mon, 13 Apr 2026 13:45:46 +0100
Subject: [PATCH 2/5] Remove prospect_id and adjust prompt table SQL
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Remove the prospect_id foreign key from the prompt CREATE TABLE SQL and adjust column order (model moved before data). Also delete drop_llm_table.sql which contained DROP TABLE IF EXISTS llm. Note: this updates the table creation script only—apply a DB migration if you need to remove the existing column or preserve data.
---
 app/api/prompt/sql/create_table.sql   | 3 +--
 app/api/prompt/sql/drop_llm_table.sql | 1 -
 2 files changed, 1 insertion(+), 3 deletions(-)
 delete mode 100644 app/api/prompt/sql/drop_llm_table.sql

diff --git a/app/api/prompt/sql/create_table.sql b/app/api/prompt/sql/create_table.sql
index 70776ce..d6fba35 100644
--- a/app/api/prompt/sql/create_table.sql
+++ b/app/api/prompt/sql/create_table.sql
@@ -6,7 +6,6 @@ CREATE TABLE IF NOT EXISTS prompt (
 	completion TEXT NOT NULL,
 	duration FLOAT,
 	time TIMESTAMPTZ DEFAULT NOW(),
-	data JSONB,
 	model TEXT,
-	prospect_id INTEGER REFERENCES prospects(id)
+	data JSONB
 );
diff --git a/app/api/prompt/sql/drop_llm_table.sql b/app/api/prompt/sql/drop_llm_table.sql
deleted file mode 100644
index 99fbcfb..0000000
--- a/app/api/prompt/sql/drop_llm_table.sql
+++ /dev/null
@@ -1 +0,0 @@
-DROP TABLE IF EXISTS llm;
\ No newline at end of file

From 38af09268f8782f2b18beb96598bb4cb10b61dff Mon Sep 17 00:00:00 2001
From: Wei Zang <goldlabel.apps@gmail.com>
Date: Mon, 13 Apr 2026 13:52:27 +0100
Subject: [PATCH 3/5] Return prompt in LinkedIn prompt responses

Include the original prompt in cached LinkedIn prompt lookups by adding the prompt column to the SELECT queries and updating the row index mappings. Also add a prompt key (None when not cached) to the response payload so callers receive the associated prompt along with completion, time, model, and record data.
---
 app/api/prompt/linkedin.py | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/app/api/prompt/linkedin.py b/app/api/prompt/linkedin.py
index dfcd98b..43ec403 100644
--- a/app/api/prompt/linkedin.py
+++ b/app/api/prompt/linkedin.py
@@ -36,7 +36,7 @@ def linkedin_prompt_success(payload: dict, api_key: str = Depends(get_api_key))
         if has_search_vector:
             cur.execute(
                 """
-                SELECT id, completion, time, model, data
+                SELECT id, prompt, completion, time, model, data
                 FROM prompt
                 WHERE (
                     COALESCE(data->>'linkedin_url', data->>'linkedinUrl') = %s
@@ -51,7 +51,7 @@ def linkedin_prompt_success(payload: dict, api_key: str = Depends(get_api_key))
         else:
             cur.execute(
                 """
-                SELECT id, completion, time, model, data
+                SELECT id, prompt, completion, time, model, data
                 FROM prompt
                 WHERE (COALESCE(data->>'linkedin_url', data->>'linkedinUrl') = %s OR prompt ILIKE %s)
                 ORDER BY id DESC
@@ -68,10 +68,11 @@ def linkedin_prompt_success(payload: dict, api_key: str = Depends(get_api_key))
                     "cached": True,
                     "id": row[0],
                     "linkedin_url": linkedin_url,
-                    "completion": row[1],
-                    "time": row[2].isoformat() if row[2] else None,
-                    "model": row[3],
-                    "record_data": row[4],
+                    "prompt": row[1],
+                    "completion": row[2],
+                    "time": row[3].isoformat() if row[3] else None,
+                    "model": row[4],
+                    "record_data": row[5],
                 },
             }
 
@@ -80,6 +81,7 @@ def linkedin_prompt_success(payload: dict, api_key: str = Depends(get_api_key))
             "data": {
                 "cached": False,
                 "linkedin_url": linkedin_url,
+                "prompt": None,
                 "completion": None,
             },
         }

From c5da6b1a858b9438350b6b5f1ae79044276e3632 Mon Sep 17 00:00:00 2001
From: Wei Zang <goldlabel.apps@gmail.com>
Date: Mon, 13 Apr 2026 14:06:15 +0100
Subject: [PATCH 4/5] Remove prospect_id support from prompt endpoints

Drop prospect_id filtering and storage from prompt APIs and DB interactions. get_prompt_records no longer accepts or handles prospect_id and always returns paginated prompt records (SELECT no longer includes prospect_id). llm_post no longer reads prospect_id from payload and INSERT into prompt omits the prospect_id column. Error handling and pagination metadata preserved.
---
 app/api/prompt/prompt.py | 115 +++++++++++++--------------------------
 1 file changed, 37 insertions(+), 78 deletions(-)

diff --git a/app/api/prompt/prompt.py b/app/api/prompt/prompt.py
index d507eab..90a8117 100644
--- a/app/api/prompt/prompt.py
+++ b/app/api/prompt/prompt.py
@@ -11,87 +11,47 @@ def get_prompt_records(
     request: Request,
     page: int = Query(1, ge=1, description="Page number (1-based)"),
     page_size: int = Query(10, ge=1, le=100, description="Records per page"),
-    prospect_id: int = Query(None, description="Filter by prospect_id"),
     api_key: str = Depends(get_api_key)
 ) -> dict:
     """GET /prompt: Paginated list of prompt completions."""
     try:
         conn = get_db_connection_direct()
         cur = conn.cursor()
-        if prospect_id is not None:
-            # No pagination for single prospect_id lookup
-            select_query = """
-                SELECT id, prompt, completion, duration, time, data, model, prospect_id
-                FROM prompt
-                WHERE prospect_id = %s
-                ORDER BY id DESC
-            """
-            cur.execute(select_query, (prospect_id,))
-            rows = cur.fetchall()
-            records = [
-                {
-                    "id": row[0],
-                    "prompt": row[1],
-                    "completion": row[2],
-                    "duration": row[3],
-                    "time": row[4].isoformat() if row[4] else None,
-                    "data": row[5],
-                    "model": row[6],
-                    "prospect_id": row[7],
-                }
-                for row in rows
-            ]
-            cur.close()
-            conn.close()
-            if records:
-                meta = make_meta("success", f"Found {len(records)} record(s) for prospect_id {prospect_id}")
-                return {
-                    "meta": meta,
-                    "data": records,
-                }
-            else:
-                meta = make_meta("warning", f"No records found for prospect_id {prospect_id}")
-                return {
-                    "meta": meta,
-                    "data": [],
-                }
-        else:
-            offset = (page - 1) * page_size
-            cur.execute("SELECT COUNT(*) FROM prompt;")
-            count_row = cur.fetchone()
-            total = count_row[0] if count_row and count_row[0] is not None else 0
-            cur.execute("""
-                SELECT id, prompt, completion, duration, time, data, model, prospect_id
-                FROM prompt
-                ORDER BY id DESC
-                LIMIT %s OFFSET %s;
-            """, (page_size, offset))
-            records = [
-                {
-                    "id": row[0],
-                    "prompt": row[1],
-                    "completion": row[2],
-                    "duration": row[3],
-                    "time": row[4].isoformat() if row[4] else None,
-                    "data": row[5],
-                    "model": row[6],
-                    "prospect_id": row[7],
-                }
-                for row in cur.fetchall()
-            ]
-            cur.close()
-            conn.close()
-            meta = make_meta("success", f"Prompt {len(records)} records (page {page})")
-            return {
-                "meta": meta,
-                "data": {
-                    "page": page,
-                    "page_size": page_size,
-                    "total": total,
-                    "pages": (total + page_size - 1) // page_size,
-                    "data": records,
-                },
+        offset = (page - 1) * page_size
+        cur.execute("SELECT COUNT(*) FROM prompt;")
+        count_row = cur.fetchone()
+        total = count_row[0] if count_row and count_row[0] is not None else 0
+        cur.execute("""
+            SELECT id, prompt, completion, duration, time, data, model
+            FROM prompt
+            ORDER BY id DESC
+            LIMIT %s OFFSET %s;
+        """, (page_size, offset))
+        records = [
+            {
+                "id": row[0],
+                "prompt": row[1],
+                "completion": row[2],
+                "duration": row[3],
+                "time": row[4].isoformat() if row[4] else None,
+                "data": row[5],
+                "model": row[6],
             }
+            for row in cur.fetchall()
+        ]
+        cur.close()
+        conn.close()
+        meta = make_meta("success", f"Prompt {len(records)} records (page {page})")
+        return {
+            "meta": meta,
+            "data": {
+                "page": page,
+                "page_size": page_size,
+                "total": total,
+                "pages": (total + page_size - 1) // page_size,
+                "data": records,
+            },
+        }
     except Exception as e:
         meta = make_meta("error", f"DB error: {str(e)}")
         return {"meta": meta, "data": {}}
@@ -100,7 +60,6 @@ def get_prompt_records(
 def llm_post(payload: dict) -> dict:
     """POST /prompt: send prompt to Gemini, returns completion google-genai SDK."""
     prompt = payload.get("prompt")
-    prospect_id = payload.get("prospect_id")
     if not prompt:
         raise HTTPException(status_code=400, detail="Missing 'prompt' in request body.")
     api_key = os.getenv("GEMINI_API_KEY")
@@ -148,11 +107,11 @@ def llm_post(payload: dict) -> dict:
             cur = conn.cursor()
             cur.execute(
                 """
-                INSERT INTO prompt (prompt, completion, duration, data, model, prospect_id)
-                VALUES (%s, %s, %s, %s, %s, %s)
+                INSERT INTO prompt (prompt, completion, duration, data, model)
+                VALUES (%s, %s, %s, %s, %s)
                 RETURNING id;
                 """,
-                (prompt, completion, duration, data_blob, used_model, prospect_id)
+                (prompt, completion, duration, data_blob, used_model)
             )
             record_id_row = cur.fetchone()
             record_id = record_id_row[0] if record_id_row else None

From 1bceae43b4302852f4130b21d17215efb0f870d7 Mon Sep 17 00:00:00 2001
From: Wei Zang <goldlabel.apps@gmail.com>
Date: Mon, 13 Apr 2026 14:12:58 +0100
Subject: [PATCH 5/5] Integrate Gemini for LinkedIn prompt cache miss

When a LinkedIn URL is not found in cache, call Google Gemini to generate a profile analysis and store the result. Adds GEMINI_API_KEY env check, a default prompt if none supplied, and a model fallback loop that tries several Gemini models until one returns text. Inserts the completion, model, duration and metadata into the prompt table (with search_vector when available), closes DB cursors/connections earlier, and returns the generated completion and record id in the response. Improves error handling and updates response meta messages accordingly.
---
 app/api/prompt/linkedin.py | 113 +++++++++++++++++++++++++++++++++++--
 1 file changed, 108 insertions(+), 5 deletions(-)

diff --git a/app/api/prompt/linkedin.py b/app/api/prompt/linkedin.py
index 43ec403..1dfa2cf 100644
--- a/app/api/prompt/linkedin.py
+++ b/app/api/prompt/linkedin.py
@@ -1,3 +1,5 @@
+import os
+
 from fastapi import APIRouter, Depends, HTTPException
 
 from app.utils.api_key_auth import get_api_key
@@ -9,11 +11,23 @@
 
 @router.post("/prompt/linkedin")
 def linkedin_prompt_success(payload: dict, api_key: str = Depends(get_api_key)) -> dict:
-    """POST /prompt/linkedin: return cached completion for linkedin_url when available."""
+    """POST /prompt/linkedin: return cached completion or create a new Gemini analysis."""
     linkedin_url = (payload.get("linkedin_url") or payload.get("linkedinUrl") or "").strip()
     if not linkedin_url:
         raise HTTPException(status_code=400, detail="Missing 'linkedin_url' in request body.")
 
+    prompt = (payload.get("prompt") or "").strip()
+    if not prompt:
+        prompt = (
+            "Analyse this LinkedIn profile URL and provide a concise summary of the person, "
+            "their role, company, seniority, likely responsibilities, and notable signals. "
+            f"LinkedIn URL: {linkedin_url}"
+        )
+
+    gemini_api_key = os.getenv("GEMINI_API_KEY")
+    if not gemini_api_key:
+        raise HTTPException(status_code=500, detail="Gemini API key not configured.")
+
     conn = None
     cur = None
     try:
@@ -62,6 +76,10 @@ def linkedin_prompt_success(payload: dict, api_key: str = Depends(get_api_key))
         row = cur.fetchone()
 
         if row:
+            cur.close()
+            conn.close()
+            cur = None
+            conn = None
             return {
                 "meta": make_meta("success", "LinkedIn URL already analysed"),
                 "data": {
@@ -76,20 +94,105 @@ def linkedin_prompt_success(payload: dict, api_key: str = Depends(get_api_key))
                 },
             }
 
+        cur.close()
+        conn.close()
+        cur = None
+        conn = None
+
+        import json
+        import logging
+        import time as time_mod
+        from app import __version__
+        from google import genai
+
+        client = genai.Client(api_key=gemini_api_key)
+        model_names = [
+            "models/gemini-flash-latest",
+            "models/gemini-1.5-pro",
+            "models/gemini-1.5-flash",
+            "models/gemini-1.0-pro",
+            "models/gemini-pro",
+            "models/gemini-pro-vision",
+        ]
+        response = None
+        completion = None
+        used_model = None
+        errors = {}
+        start_time = time_mod.time()
+        for model_name in model_names:
+            try:
+                response = client.models.generate_content(model=model_name, contents=prompt)
+                completion = getattr(response, "text", None)
+                if completion:
+                    used_model = model_name
+                    break
+            except Exception as model_exc:
+                errors[model_name] = str(model_exc)
+                continue
+
+        duration = time_mod.time() - start_time
+        if not completion:
+            error_details = " | ".join([f"{name}: {message}" for name, message in errors.items()])
+            raise Exception(
+                "No available Gemini model succeeded for generate_content with your API key. "
+                f"Details: {error_details}"
+            )
+
+        record_id = None
+        record_data = {
+            "version": __version__,
+            "linkedin_url": linkedin_url,
+        }
+        try:
+            conn = get_db_connection_direct()
+            cur = conn.cursor()
+            data_blob = json.dumps(record_data)
+            if has_search_vector:
+                cur.execute(
+                    """
+                    INSERT INTO prompt (prompt, completion, duration, model, data, search_vector)
+                    VALUES (%s, %s, %s, %s, %s, to_tsvector('english', %s || ' ' || %s))
+                    RETURNING id;
+                    """,
+                    (prompt, completion, duration, used_model, data_blob, prompt, completion)
+                )
+            else:
+                cur.execute(
+                    """
+                    INSERT INTO prompt (prompt, completion, duration, model, data)
+                    VALUES (%s, %s, %s, %s, %s)
+                    RETURNING id;
+                    """,
+                    (prompt, completion, duration, used_model, data_blob)
+                )
+            record_id_row = cur.fetchone()
+            record_id = record_id_row[0] if record_id_row else None
+            conn.commit()
+            cur.close()
+            conn.close()
+            cur = None
+            conn = None
+        except Exception as db_exc:
+            logging.error(f"Failed to insert prompt record: {db_exc}")
+
         return {
-            "meta": make_meta("warning", "LinkedIn URL not analysed yet"),
+            "meta": make_meta("success", f"Gemini completion received from {used_model}"),
             "data": {
                 "cached": False,
+                "id": record_id,
                 "linkedin_url": linkedin_url,
-                "prompt": None,
-                "completion": None,
+                "prompt": prompt,
+                "completion": completion,
+                "duration": duration,
+                "model": used_model,
+                "record_data": record_data,
             },
         }
     except HTTPException:
         raise
     except Exception as e:
         return {
-            "meta": make_meta("error", f"DB error: {str(e)}"),
+            "meta": make_meta("error", f"Gemini API error: {str(e)}"),
             "data": {},
         }
     finally: