From 77e67965cc4e80d3559dce659dcc7bf125d45563 Mon Sep 17 00:00:00 2001 From: Mohamad Bader almsaddi alzin Date: Wed, 20 May 2026 19:04:20 +0200 Subject: [PATCH 01/10] ingested api with expo retries, added fetch api records. --- ingest_api.py | 76 +++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 64 insertions(+), 12 deletions(-) diff --git a/ingest_api.py b/ingest_api.py index 66eb32c..2e7a660 100644 --- a/ingest_api.py +++ b/ingest_api.py @@ -1,9 +1,5 @@ -# Step 2 — Tasks 1 & 2: Error Handling + API Ingestion -# fetch_with_retry handles transient network errors (Task 1). -# fetch_api_records calls it and shapes the response into flat dicts (Task 2). import logging import time - import requests logger = logging.getLogger(__name__) @@ -11,15 +7,48 @@ API_URL = "https://api.open-meteo.com/v1/forecast" -def fetch_with_retry(url: str, params: dict, max_retries: int = 3, timeout: int = 10) -> dict: +def fetch_with_retry( + url: str, params: dict, max_retries: int = 3, timeout: int = 10 +) -> dict: """Fetch url with exponential backoff on transient errors. Retry on: ConnectionError, Timeout, 5xx status codes. Fail immediately on: 4xx status codes. Log each retry attempt with the error and delay. """ - # TODO: implement retry loop with exponential backoff - raise NotImplementedError + for attempt in range(max_retries): + try: + response = requests.get(url, params=params, timeout=timeout) + + response.raise_for_status() + return response.json() + + except requests.exceptions.HTTPError as e: + if response.status_code < 500: + logger.error( + f"Client error {response.status_code}. Failing immediately." + ) + raise + + error_msg = f"Server error {response.status_code}" + error_type = error_msg + + except (requests.exceptions.ConnectionError, requests.exceptions.Timeout) as e: + error_type = type(e).__name__ + + if attempt == max_retries - 1: + logger.error( + f"Max retries reached. Final attempt failed with {error_type}." + ) + raise + + wait_time = 2**attempt + logger.warning( + f"Attempt {attempt + 1} failed ({error_type}). Retrying in {wait_time}s..." + ) + time.sleep(wait_time) + + raise RuntimeError("unreachable") def fetch_api_records() -> list[dict]: @@ -34,8 +63,31 @@ def fetch_api_records() -> list[dict]: "hourly": "temperature_2m,relative_humidity_2m", "forecast_days": 7, } - # TODO: - # - Call fetch_with_retry with API_URL and params - # - The API returns {"hourly": {"time": [...], "temperature_2m": [...], "relative_humidity_2m": [...]}} - # - Flatten to a list of dicts; set station="Open-Meteo Copenhagen" for all records - raise NotImplementedError + + try: + data = fetch_with_retry(API_URL, params=params) + except Exception as e: + logger.error(f"Failed to fetch data from API: {e}") + return [] + + # Task 2: Robust parsing and flattening + hourly = data.get("hourly", {}) + times = hourly.get("time", []) + temps = hourly.get("temperature_2m", []) + humidities = hourly.get("relative_humidity_2m", []) + + if not times or not temps or not humidities: + logger.warning( + "API response structure was missing expected hourly data data arrays." + ) + return [] + + return [ + { + "station": "Open-Meteo Copenhagen", + "timestamp": ts, + "temperature_c": temp, + "humidity_pct": hum, + } + for ts, temp, hum in zip(times, temps, humidities, strict=True) + ] From 21fc4c6f882fbe0a70a8e8910cf68b8de185b329 Mon Sep 17 00:00:00 2001 From: Mohamad Bader almsaddi alzin Date: Wed, 20 May 2026 19:05:09 +0200 Subject: [PATCH 02/10] added read_csv_records using DictReader --- ingest_files.py | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/ingest_files.py b/ingest_files.py index 8b010ff..ae2b752 100644 --- a/ingest_files.py +++ b/ingest_files.py @@ -16,5 +16,23 @@ def read_csv_records(path: Path) -> list[dict]: - Convert temperature_c to float and humidity_pct to int where possible. - Leave unconvertible values (e.g. "N/A", "") as-is so validation can catch them. """ - # TODO: implement CSV reading and normalization - raise NotImplementedError + with open(path, newline="", encoding="utf-8") as f: + reader = csv.DictReader(f) + records = [] + for row in reader: + record = { + "station": row.get("station", "unknown"), + "timestamp": row.get("timestamp", ""), + "temperature_c": row.get("temperature_c", ""), + "humidity_pct": row.get("humidity_pct", ""), + } + try: + record["temperature_c"] = float(record["temperature_c"]) + except ValueError: + pass + try: + record["humidity_pct"] = int(record["humidity_pct"]) + except ValueError: + pass + records.append(record) + return records From 797d30aab7580cec7269a5847c640002d713d20e Mon Sep 17 00:00:00 2001 From: Mohamad Bader almsaddi alzin Date: Wed, 20 May 2026 19:21:52 +0200 Subject: [PATCH 03/10] added validate records with pydantic --- validate.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/validate.py b/validate.py index ada2716..f9388e3 100644 --- a/validate.py +++ b/validate.py @@ -18,5 +18,18 @@ def validate_records( raw_record - the original dict error_details - the Pydantic error list (ValidationError.errors()) """ - # TODO: iterate over records, try WeatherReading(**record), accumulate results - raise NotImplementedError + valid_list: list[WeatherReading] = [] + error_list: list[dict] = [] + for i, record in enumerate(records): + try: + valid_list.append(WeatherReading(**record)) + except ValidationError as e: + error_list.append( + { + "index": i, + "source": source, + "raw_record": record, + "error_details": e.errors(), + } + ) + return valid_list, error_list From 6d3ba8b759709a99fc9899e7ea43c779f4a86725 Mon Sep 17 00:00:00 2001 From: Mohamad Bader almsaddi alzin Date: Wed, 20 May 2026 19:39:41 +0200 Subject: [PATCH 04/10] created tables, implemented insert raw, upsert readings, count readings --- database.py | 67 ++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 59 insertions(+), 8 deletions(-) diff --git a/database.py b/database.py index 8322d17..d4a6d09 100644 --- a/database.py +++ b/database.py @@ -24,8 +24,32 @@ def create_tables(conn: sqlite3.Connection) -> None: weather_readings columns: id, station, timestamp, temperature_c, humidity_pct + UNIQUE(station, timestamp) constraint for upserts """ - # TODO: use conn.execute() with CREATE TABLE IF NOT EXISTS statements - raise NotImplementedError + conn.execute( + """ + CREATE TABLE IF NOT EXISTS raw_weather ( + id INTEGER PRIMARY KEY, + station TEXT NOT NULL, + timestamp TEXT NOT NULL, + temperature_c REAL NOT NULL, + humidity_pct REAL NOT NULL, + source TEXT NOT NULL, + ingested_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP + ) + """ + ) + conn.execute( + """ + CREATE TABLE IF NOT EXISTS weather_readings ( + id INTEGER PRIMARY KEY, + station TEXT NOT NULL, + timestamp TEXT NOT NULL, + temperature_c REAL NOT NULL, + humidity_pct REAL NOT NULL, + UNIQUE(station, timestamp) + ) + """ + ) + conn.commit() def insert_raw(conn: sqlite3.Connection, records: list[dict], source: str) -> None: @@ -33,8 +57,21 @@ def insert_raw(conn: sqlite3.Connection, records: list[dict], source: str) -> No Use parameterized queries with placeholder syntax; do not build SQL via string formatting. """ - # TODO: implement - raise NotImplementedError + for record in records: + conn.execute( + """ + INSERT INTO raw_weather (station, timestamp, temperature_c, humidity_pct, source) + VALUES (?, ?, ?, ?, ?) + """, + ( + record["station"], + record["timestamp"], + record["temperature_c"], + record["humidity_pct"], + source, + ), + ) + conn.commit() def upsert_readings(conn: sqlite3.Connection, readings: list[WeatherReading]) -> None: @@ -43,11 +80,25 @@ def upsert_readings(conn: sqlite3.Connection, readings: list[WeatherReading]) -> Use the upsert pattern to handle duplicate (station, timestamp) pairs. Use parameterized queries. """ - # TODO: implement - raise NotImplementedError + for reading in readings: + conn.execute( + """ + INSERT INTO weather_readings (station, timestamp, temperature_c, humidity_pct) + VALUES (?, ?, ?, ?) + ON CONFLICT(station, timestamp) DO UPDATE SET + temperature_c = excluded.temperature_c, + humidity_pct = excluded.humidity_pct + """, + ( + reading.station, + reading.timestamp, + reading.temperature_c, + reading.humidity_pct, + ), + ) + conn.commit() def count_readings(conn: sqlite3.Connection) -> int: """Return the total number of rows in weather_readings.""" - # TODO: implement - raise NotImplementedError + return conn.execute("SELECT COUNT(*) FROM weather_readings").fetchone()[0] From 732cb4637b07a17826d0cbe91f3ac9bc7d546826 Mon Sep 17 00:00:00 2001 From: Mohamad Bader almsaddi alzin Date: Wed, 20 May 2026 19:41:10 +0200 Subject: [PATCH 05/10] implemented run pipleline, called all created functions in entry point --- pipeline.py | 38 +++++++++++++++++++++++++++++++++----- 1 file changed, 33 insertions(+), 5 deletions(-) diff --git a/pipeline.py b/pipeline.py index d8dc710..e1b98aa 100644 --- a/pipeline.py +++ b/pipeline.py @@ -6,7 +6,13 @@ import logging from pathlib import Path -from database import count_readings, create_tables, get_connection, insert_raw, upsert_readings +from database import ( + count_readings, + create_tables, + get_connection, + insert_raw, + upsert_readings, +) from ingest_api import fetch_api_records from ingest_files import read_csv_records from validate import validate_records @@ -20,14 +26,38 @@ def run_pipeline() -> None: # TODO — implement each step in order: # - # 1. Fetch records from Open-Meteo API using fetch_api_records() + api_records = fetch_api_records() # 2. Read records from CSV using read_csv_records(CSV_PATH) + csv_records = read_csv_records(CSV_PATH) # 3. Open a DB connection, create tables, insert all raw records (both sources) + conn = get_connection() + create_tables(conn) + insert_raw(conn, api_records, "Open-Meteo") + insert_raw(conn, csv_records, "CSV") # 4. Validate all records — collect valid WeatherReading objects and error dicts + valid_readings, error_dicts = validate_records( + api_records + csv_records, source="combined" + ) # 5. Upsert valid records into weather_readings + upsert_readings(conn, valid_readings) # 6. Save error dicts as JSON to output/error_report.json + with open(OUTPUT_DIR / "error_report.json", "w") as f: + json.dump(error_dicts, f) # 7. Print the pipeline summary in the format below. - # + total_raw = len(api_records) + len(csv_records) + valid_count = len(valid_readings) + invalid_count = len(error_dicts) + database_count = count_readings(conn) + + print("=== Pipeline Summary ===") + print(f"API records fetched: {len(api_records)}") + print(f"CSV records read: {len(csv_records)}") + print(f"Total raw records: {total_raw}") + print(f"Valid records: {valid_count}") + print(f"Invalid records: {invalid_count}") + print(f"Records in database: {database_count}") + print("Error report: output/error_report.json") + # Note: the API count varies by time of day (Open-Meteo returns up to 168 hourly # records for 7 forecast days; the exact number depends on the current UTC hour). # The CSV contributes 6 invalid records and 4 valid ones; the duplicate Copenhagen @@ -43,8 +73,6 @@ def run_pipeline() -> None: # Records in database: 169 # Error report: output/error_report.json - raise NotImplementedError - if __name__ == "__main__": logging.basicConfig(level=logging.INFO) From aa9258ef88dfe04c83f2b0c93e198f1fdcf30e38 Mon Sep 17 00:00:00 2001 From: Mohamad Bader almsaddi alzin Date: Wed, 20 May 2026 19:41:31 +0200 Subject: [PATCH 06/10] added clean station (was not implemented) --- models.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/models.py b/models.py index cae42fe..cdd34d4 100644 --- a/models.py +++ b/models.py @@ -13,5 +13,4 @@ class WeatherReading(BaseModel): @field_validator("station") @classmethod def clean_station(cls, v: str) -> str: - # TODO: strip whitespace and convert to title case - raise NotImplementedError + return v.strip().title() From 10399f57f47233ec08a98a06cb52fe69a24bf1ed Mon Sep 17 00:00:00 2001 From: Mohamad Bader almsaddi alzin Date: Wed, 20 May 2026 21:35:03 +0200 Subject: [PATCH 07/10] completed azure compare.md, created script (removed token) to create output file --- azure.py | 23 +++++++++++++++++++++++ output/azure_compare.md | 6 +++--- 2 files changed, 26 insertions(+), 3 deletions(-) create mode 100644 azure.py diff --git a/azure.py b/azure.py new file mode 100644 index 0000000..4938685 --- /dev/null +++ b/azure.py @@ -0,0 +1,23 @@ +import json + +import requests +from pathlib import Path + + +OUTPUT_DIR = Path("output") +OUTPUT_DIR.mkdir(exist_ok=True) + + +token = "token_here" +subscription_id = "1120c89d-2a5f-4a15-a582-2ea34f0bb5c3" # from the portal above + +url = f"https://management.azure.com/subscriptions/{subscription_id}/resourcegroups?api-version=2024-03-01" +headers = {"Authorization": f"Bearer {token}"} + +response = requests.get(url, headers=headers, timeout=10) +response.raise_for_status() +for rg in response.json()["value"]: + print(f"{rg['name']}: {rg['location']}") + + with open(OUTPUT_DIR / "azure_resource_groups.json", "w") as f: + json.dump(response.json(), f, indent=2) diff --git a/output/azure_compare.md b/output/azure_compare.md index 22395b3..29b0aa1 100644 --- a/output/azure_compare.md +++ b/output/azure_compare.md @@ -4,12 +4,12 @@ Fill in each section below (2-3 sentences each) after completing the Task 7 step ## Auth - + ## Schema verbosity - + ## api-version in the URL - + From e01d894b3271dd5d49f3f08022beed0d1b62abfd Mon Sep 17 00:00:00 2001 From: Mohamad Bader almsaddi alzin Date: Wed, 20 May 2026 21:41:08 +0200 Subject: [PATCH 08/10] added AI debug --- AI_DEBUG.md | 77 ++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 68 insertions(+), 9 deletions(-) diff --git a/AI_DEBUG.md b/AI_DEBUG.md index edf34a5..57fa872 100644 --- a/AI_DEBUG.md +++ b/AI_DEBUG.md @@ -7,20 +7,79 @@ Document the debugging session below. If everything worked first try, introduce ## The Error - + ## The Prompt - - + ## The Solution - + ## Reflection - + From 1a70c1938339a1bfb7a85e563284e4fca2a940d6 Mon Sep 17 00:00:00 2001 From: Mohamad Bader almsaddi alzin Date: Wed, 20 May 2026 21:49:46 +0200 Subject: [PATCH 09/10] added more line breaks to azure_compare --- output/azure_compare.md | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/output/azure_compare.md b/output/azure_compare.md index 29b0aa1..63a9f0d 100644 --- a/output/azure_compare.md +++ b/output/azure_compare.md @@ -4,12 +4,18 @@ Fill in each section below (2-3 sentences each) after completing the Task 7 step ## Auth - + ## Schema verbosity - + ## api-version in the URL - + From 41f53a1670b75dc2e9ae00d2eeb5842251cd886b Mon Sep 17 00:00:00 2001 From: Mohamad Bader almsaddi alzin Date: Thu, 21 May 2026 16:07:11 +0200 Subject: [PATCH 10/10] removed azure_resource_groups from gitignore and commited the file --- .gitignore | 1 - output/azure_resource_groups.json | 13 +++++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) create mode 100644 output/azure_resource_groups.json diff --git a/.gitignore b/.gitignore index 9e117dc..21451a1 100644 --- a/.gitignore +++ b/.gitignore @@ -26,7 +26,6 @@ env/ # Generated pipeline output (committed templates stay; generated files do not) output/error_report.json -output/azure_resource_groups.json weather.db # Editor and IDE settings diff --git a/output/azure_resource_groups.json b/output/azure_resource_groups.json new file mode 100644 index 0000000..6fd6380 --- /dev/null +++ b/output/azure_resource_groups.json @@ -0,0 +1,13 @@ +{ + "value": [ + { + "id": "/subscriptions/1120c89d-2a5f-4a15-a582-2ea34f0bb5c3/resourceGroups/rg-hyf-students-readonly", + "name": "rg-hyf-students-readonly", + "type": "Microsoft.Resources/resourceGroups", + "location": "westeurope", + "properties": { + "provisioningState": "Succeeded" + } + } + ] +}