diff --git a/computing/config_loader.py b/computing/config_loader.py index 41df82d9..813709b5 100644 --- a/computing/config_loader.py +++ b/computing/config_loader.py @@ -3,7 +3,8 @@ import yaml _CONFIG_PATH = Path(__file__).resolve().parent / "config.yaml" -PROJECT_ROOT = Path(__file__).resolve().parents[1] +# PROJECT_ROOT = Path(__file__).resolve().parents[1] +PROJECT_ROOT = Path("/home/cfpt-jedi/developer/repos/core-stack-backend") def _load(): @@ -149,7 +150,7 @@ def _output_entry(module: str, index: int = 0) -> dict: PAN_INDIA_SLOPE_PERCENTAGE_PATH = PROJECT_ROOT / "data/base_layers/Pan_India_slope_percentage.tif" LOCAL_SLOPE_PERCENTAGE_OUTPUT = PROJECT_ROOT / "data/layers/slope_percentage" -PAN_INDIA_MWS_CONNECTIVITY_PATH = PROJECT_ROOT / "data/layers/mws_connectivity/Pan_India_mws_connectivity.geojson" +PAN_INDIA_MWS_CONNECTIVITY_PATH = PROJECT_ROOT / "data/base_layers/Pan_India_mws_connectivity.geojson" LOCAL_MWS_CONNECTIVITY_OUTPUT = PROJECT_ROOT / "data/layers/mws_connectivity/mws_connectivity_local" LOCAL_MWS_CENTROID_OUTPUT = PROJECT_ROOT / "data/layers/mws_centroid" diff --git a/computing/misc/antyodaya_local_compute.py b/computing/misc/antyodaya_local_compute.py index 9911ba18..cc2e7daf 100644 --- a/computing/misc/antyodaya_local_compute.py +++ b/computing/misc/antyodaya_local_compute.py @@ -26,7 +26,7 @@ from computing.local_compute_helper import ( PROJECT_ROOT, build_output_vector_path, - load_precomputed_watersheds, + load_precomputed_panchayat, read_validated_vector_file, write_vector_output, validate_geometry, @@ -39,20 +39,14 @@ GEOSERVER_WORKSPACE = "antyodaya_2020" -def _compute_antyodaya_for_watersheds(watersheds_gdf, antyodaya_gdf): - """ - Spatially filters Antyodaya features with watershed/ROI boundaries. - """ +def _compute_antyodaya_for_panchayat(panchayat_gdf, antyodaya_gdf): if antyodaya_gdf.empty: return antyodaya_gdf - - if watersheds_gdf.crs and antyodaya_gdf.crs and watersheds_gdf.crs != antyodaya_gdf.crs: - antyodaya_gdf = antyodaya_gdf.to_crs(watersheds_gdf.crs) - outer_boundary = watersheds_gdf.geometry.unary_union + outer_boundary = panchayat_gdf.geometry.unary_union - # Precise intersection check - antyodaya_in_roi = antyodaya_gdf[antyodaya_gdf.intersects(outer_boundary)].copy() + # Clip Antyodaya geometries to the panchayat boundary + antyodaya_in_roi = gpd.clip(antyodaya_gdf, outer_boundary).copy() # Final cleanup antyodaya_in_roi = antyodaya_in_roi[~antyodaya_in_roi.geometry.is_empty] @@ -77,7 +71,7 @@ def generate_antyodaya_data_local( ): if state and district and block: layer_name = f"antyodaya20_{valid_gee_text(district.lower())}_{valid_gee_text(block.lower())}" - watersheds_gdf, watershed_source = load_precomputed_watersheds( + panchayat_gdf, watershed_source = load_precomputed_panchayat( state=state, district=district, block=block, @@ -88,22 +82,22 @@ def generate_antyodaya_data_local( if not roi_path or not asset_suffix: raise ValueError("ROI path and asset_suffix are required for custom runs.") layer_name = f"antyodaya20_{valid_gee_text(asset_suffix).lower()}" - watersheds_gdf = read_validated_vector_file(roi_path, f"Invalid ROI file: {roi_path}") + panchayat_gdf = read_validated_vector_file(roi_path, f"Invalid ROI file: {roi_path}") print(f"ROI source: {roi_path}") if not os.path.exists(PAN_INDIA_ANTYODAYA_2020): raise FileNotFoundError(f"PAN INDIA Antyodaya file not found at {PAN_INDIA_ANTYODAYA_2020}") print("Loading Antyodaya data overlapping ROI...") - antyodaya_gdf = gpd.read_file(PAN_INDIA_ANTYODAYA_2020, mask=watersheds_gdf) + antyodaya_gdf = gpd.read_file(PAN_INDIA_ANTYODAYA_2020, mask=panchayat_gdf) antyodaya_gdf = validate_geometry(antyodaya_gdf) if antyodaya_gdf.empty: print("Warning: PAN INDIA Antyodaya file has no valid geometries overlapping ROI") else: print(f"Loaded {len(antyodaya_gdf)} Antyodaya features") - result_gdf = _compute_antyodaya_for_watersheds( - watersheds_gdf=watersheds_gdf, + result_gdf = _compute_antyodaya_for_panchayat( + panchayat_gdf=panchayat_gdf, antyodaya_gdf=antyodaya_gdf, ) print(f"Final valid Antyodaya features after spatial filter: {len(result_gdf)}") diff --git a/computing/misc/drainage_density_local_compute.py b/computing/misc/drainage_density_local_compute.py index 3591194b..932bec86 100644 --- a/computing/misc/drainage_density_local_compute.py +++ b/computing/misc/drainage_density_local_compute.py @@ -185,4 +185,4 @@ def drainage_density( update_layer_sync_status(layer_id=layer_id, sync_to_geoserver=True) print(f"Sync Data for layer_id: {layer_id}") - return True + return True \ No newline at end of file diff --git a/computing/misc/facilities_proximity_local_compute.py b/computing/misc/facilities_proximity_local_compute.py index 9f28eb6a..7cd4225d 100644 --- a/computing/misc/facilities_proximity_local_compute.py +++ b/computing/misc/facilities_proximity_local_compute.py @@ -26,27 +26,30 @@ def _compute_proximity_for_panchayat(panchayat_gdf, facilities_gdf): - """ - Filters facilities to strictly those intersecting the panchayat boundaries, - without altering/clipping their geometries. - """ if facilities_gdf.empty: return facilities_gdf - # Ensure CRS matches - if panchayat_gdf.crs and facilities_gdf.crs and panchayat_gdf.crs != facilities_gdf.crs: - facilities_gdf = facilities_gdf.to_crs(panchayat_gdf.crs) - outer_boundary = panchayat_gdf.geometry.unary_union # Keep facilities that intersect the boundary, geometries unchanged - facilities_in_roi = facilities_gdf[facilities_gdf.intersects(outer_boundary)].copy() + facilities_in_roi = gpd.clip(facilities_gdf, outer_boundary).copy() # Final cleanup facilities_in_roi = facilities_in_roi[~facilities_in_roi.geometry.is_empty] facilities_in_roi = facilities_in_roi[facilities_in_roi.geometry.is_valid] facilities_in_roi = facilities_in_roi[facilities_in_roi.geometry.notna()] + # Rename NAME to censusname + if "NAME" in facilities_in_roi.columns: + facilities_in_roi = facilities_in_roi.rename(columns={"NAME": "censusname"}) + + # Add state, district, tehsil from panchayat_gdf + for col in ["state", "district", "tehsil"]: + if col in panchayat_gdf.columns: + # Assign the value for the tehsil (taking the first valid row) + first_val = panchayat_gdf[col].dropna().iloc[0] if not panchayat_gdf[col].dropna().empty else None + facilities_in_roi[col] = first_val + return facilities_in_roi diff --git a/computing/misc/livestocks_local_compute.py b/computing/misc/livestocks_local_compute.py index 67120c27..c14e8660 100644 --- a/computing/misc/livestocks_local_compute.py +++ b/computing/misc/livestocks_local_compute.py @@ -11,7 +11,7 @@ from computing.local_compute_helper import ( PROJECT_ROOT, build_output_vector_path, - load_precomputed_watersheds, + load_precomputed_panchayat, read_validated_vector_file, write_vector_output, validate_geometry, @@ -54,22 +54,14 @@ def _coerce_nullable_integer_columns(gdf): gdf[column] = gdf[column].astype("Int64") return gdf -def _compute_livestocks_for_watersheds(watersheds_gdf, livestocks_gdf): - """ - Spatially filters Livestock features with watershed/ROI boundaries. - """ +def _compute_livestocks_for_panchayat(panchayat_gdf, livestocks_gdf): if livestocks_gdf.empty: return livestocks_gdf - - if watersheds_gdf.crs and livestocks_gdf.crs and watersheds_gdf.crs != livestocks_gdf.crs: - livestocks_gdf = livestocks_gdf.to_crs(watersheds_gdf.crs) - - outer_boundary = watersheds_gdf.geometry.unary_union - # Precise intersection check - livestocks_in_roi = livestocks_gdf[livestocks_gdf.intersects(outer_boundary)].copy() + outer_boundary = panchayat_gdf.geometry.unary_union - # Final cleanup + # Clip Antyodaya geometries to the panchayat boundary + livestocks_in_roi = gpd.clip(livestocks_gdf, outer_boundary).copy() livestocks_in_roi = livestocks_in_roi[~livestocks_in_roi.geometry.is_empty] livestocks_in_roi = livestocks_in_roi[livestocks_in_roi.geometry.is_valid] livestocks_in_roi = livestocks_in_roi[livestocks_in_roi.geometry.notna()] @@ -92,33 +84,32 @@ def generate_livestocks_data_local( ): if state and district and block: layer_name = f"livestocks_{valid_gee_text(district.lower())}_{valid_gee_text(block.lower())}" - watersheds_gdf, watershed_source = load_precomputed_watersheds( + panchayat_gdf, watershed_source = load_precomputed_panchayat( state=state, district=district, block=block, precomputed_roi_dir=precomputed_roi_dir, ) - print(f"Watershed boundary source: {watershed_source}") else: if not roi_path or not asset_suffix: raise ValueError("ROI path and asset_suffix are required for custom runs.") layer_name = f"livestocks_{valid_gee_text(asset_suffix).lower()}" - watersheds_gdf = read_validated_vector_file(roi_path, f"Invalid ROI file: {roi_path}") + panchayat_gdf = read_validated_vector_file(roi_path, f"Invalid ROI file: {roi_path}") print(f"ROI source: {roi_path}") if not os.path.exists(PAN_INDIA_LIVESTOCKS): raise FileNotFoundError(f"PAN INDIA Livestocks file not found at {PAN_INDIA_LIVESTOCKS}") print("Loading Livestocks data overlapping ROI...") - livestocks_gdf = gpd.read_file(PAN_INDIA_LIVESTOCKS, mask=watersheds_gdf) + livestocks_gdf = gpd.read_file(PAN_INDIA_LIVESTOCKS, mask=panchayat_gdf) livestocks_gdf = validate_geometry(livestocks_gdf) if livestocks_gdf.empty: print("Warning: PAN INDIA Livestocks file has no valid geometries overlapping ROI") else: print(f"Loaded {len(livestocks_gdf)} Livestock features") - result_gdf = _compute_livestocks_for_watersheds( - watersheds_gdf=watersheds_gdf, + result_gdf = _compute_livestocks_for_panchayat( + panchayat_gdf=panchayat_gdf, livestocks_gdf=livestocks_gdf, ) print(f"Final valid Livestock features after spatial filter: {len(result_gdf)}") @@ -158,7 +149,7 @@ def generate_livestocks_data_local( block=block, layer_name=layer_name, asset_id=asset_id, - dataset_name="Livestock Census", + dataset_name="Livestock Census 2019", misc={"is_generated_locally": True}, ) if layer_id: diff --git a/computing/mws/mws_connectivity_local_compute.py b/computing/mws/mws_connectivity_local_compute.py index e1765896..5c94d0a3 100644 --- a/computing/mws/mws_connectivity_local_compute.py +++ b/computing/mws/mws_connectivity_local_compute.py @@ -32,23 +32,15 @@ def _compute_mws_connectivity_for_watersheds(watersheds_gdf, mws_gdf): - mws_in_roi = mws_gdf.copy() - - if mws_in_roi.empty: + if mws_gdf.empty: print("No MWS connectivity found within the outer boundary.") - return mws_in_roi - - print(f"MWS connectivity within outer boundary: {len(mws_in_roi)}") + return mws_gdf - # Step 2: Spatial join to clip results to individual watersheds - mws_in_roi = gpd.sjoin( - mws_in_roi, - watersheds_gdf[["geometry"]], # no uid, no collision - how="inner", - predicate="intersects", - ).drop(columns=["index_right"], errors="ignore") + # Step 1: Clip to the outer boundary of watersheds + outer_boundary = watersheds_gdf.geometry.unary_union + mws_in_roi = gpd.clip(mws_gdf, outer_boundary).copy() - # Step 3: Drop empty/invalid geometries + # Step 2: Drop empty/invalid geometries mws_in_roi = fix_invalid_geometry_in_gdf(mws_in_roi) mws_in_roi = mws_in_roi[ mws_in_roi.geometry.notna()