| import numpy as np |
| import pandas as pd |
| import geopandas as gpd |
| from shapely.geometry import Polygon, Point |
| from scipy.spatial import cKDTree |
| import json |
| from typing import Dict, List, Tuple |
| from utils import load_data_and_process |
| from config import DATASET_LIST |
|
|
|
|
| class ExplorationGridSystem: |
| """ |
| Creates and manages a grid system for exploration planning. |
| Each grid cell gets scored on multiple criteria. |
| """ |
| |
| def __init__(self, cell_size_km: float = 5.0, study_bounds: Dict = None): |
| """ |
| Initialize grid system. |
| |
| Args: |
| cell_size_km: Size of each grid cell in kilometers |
| study_bounds: Optional bounds dict with 'min_lat', 'max_lat', 'min_lon', 'max_lon' |
| """ |
| self.cell_size_km = cell_size_km |
| self.cell_size_degrees = cell_size_km / 111.0 |
| |
| |
| self.bounds = study_bounds or { |
| 'min_lat': 50.0, 'max_lat': 62.0, |
| 'min_lon': -8.0, 'max_lon': 3.0 |
| } |
| |
| self.grid_gdf = None |
| self.scores = {} |
| |
| def create_grid(self) -> gpd.GeoDataFrame: |
| """Vectorized grid creation using global land mask for better performance.""" |
| print(f"Creating vectorized offshore grid...") |
| |
| try: |
| from global_land_mask import globe |
| except ImportError: |
| return self.create_grid_fallback() |
| |
| |
| lat_range = self.bounds['max_lat'] - self.bounds['min_lat'] |
| lon_range = self.bounds['max_lon'] - self.bounds['min_lon'] |
| |
| n_lat_cells = int(np.ceil(lat_range / self.cell_size_degrees)) |
| n_lon_cells = int(np.ceil(lon_range / self.cell_size_degrees)) |
| |
| |
| lat_centers = np.linspace( |
| self.bounds['min_lat'] + self.cell_size_degrees/2, |
| self.bounds['max_lat'] - self.cell_size_degrees/2, |
| n_lat_cells |
| ) |
| lon_centers = np.linspace( |
| self.bounds['min_lon'] + self.cell_size_degrees/2, |
| self.bounds['max_lon'] - self.cell_size_degrees/2, |
| n_lon_cells |
| ) |
| |
| |
| lon_grid, lat_grid = np.meshgrid(lon_centers, lat_centers) |
| |
| print(f"Checking {lat_grid.size} grid points with land mask...") |
| |
| |
| is_water_grid = globe.is_ocean(lat_grid, lon_grid) |
| |
| |
| water_indices = np.where(is_water_grid) |
| water_lats = lat_grid[water_indices] |
| water_lons = lon_grid[water_indices] |
| |
| print(f"Found {len(water_lats)} offshore cells from {lat_grid.size} total") |
| |
| |
| grid_cells = [] |
| for idx, (center_lat, center_lon) in enumerate(zip(water_lats, water_lons)): |
| min_lat = center_lat - self.cell_size_degrees/2 |
| max_lat = center_lat + self.cell_size_degrees/2 |
| min_lon = center_lon - self.cell_size_degrees/2 |
| max_lon = center_lon + self.cell_size_degrees/2 |
| |
| cell_polygon = Polygon([ |
| (min_lon, min_lat), (max_lon, min_lat), |
| (max_lon, max_lat), (min_lon, max_lat), |
| (min_lon, min_lat) |
| ]) |
|
|
| grid_cells.append({ |
| 'cell_id': idx, |
| 'grid_i': water_indices[0][idx], |
| 'grid_j': water_indices[1][idx], |
| 'center_lat': center_lat, |
| 'center_lon': center_lon, |
| 'geometry': cell_polygon |
| }) |
|
|
| self.grid_gdf = gpd.GeoDataFrame(grid_cells, crs='EPSG:4326') |
| print(f"Vectorized land mask complete: {len(self.grid_gdf)} offshore cells") |
| |
| return self.grid_gdf |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| |
| def create_sophisticated_offshore_grid(self) -> gpd.GeoDataFrame: |
| """Create grid using existing offshore infrastructure as a mask.""" |
| print(f"Creating sophisticated offshore grid...") |
| |
| |
| full_grid = self.create_full_grid_internal() |
| |
| |
| try: |
| df_wells = load_data_and_process("wells") |
| df_fields = load_data_and_process("offshore_fields") |
| df_pipelines = load_data_and_process("pipelines") |
| |
| |
| utm_crs = full_grid.estimate_utm_crs() |
| |
| |
| wells_utm = df_wells.to_crs(utm_crs) |
| fields_utm = df_fields.to_crs(utm_crs) |
| |
| |
| buffer_distance = 50000 |
| |
| infrastructure_zones = [] |
| |
| |
| wells_buffered = wells_utm.buffer(buffer_distance).unary_union |
| infrastructure_zones.append(wells_buffered) |
| |
| |
| fields_buffered = fields_utm.buffer(buffer_distance).unary_union |
| infrastructure_zones.append(fields_buffered) |
| |
| |
| from shapely.ops import unary_union |
| combined_offshore_zone = unary_union(infrastructure_zones) |
| |
| |
| import geopandas as gpd |
| from shapely.geometry import mapping |
| offshore_zone_gdf = gpd.GeoDataFrame([1], geometry=[combined_offshore_zone], crs=utm_crs) |
| offshore_zone_gdf = offshore_zone_gdf.to_crs('EPSG:4326') |
| |
| |
| grid_utm = full_grid.to_crs(utm_crs) |
| offshore_cells = gpd.sjoin(grid_utm, offshore_zone_gdf.to_crs(utm_crs), predicate='within') |
| |
| |
| self.grid_gdf = offshore_cells.to_crs('EPSG:4326') |
| print(f"Sophisticated filtering: {len(self.grid_gdf)} offshore cells from {len(full_grid)} total") |
| |
| except Exception as e: |
| print(f"Sophisticated filtering failed: {e}, using simple offshore bounds") |
| return self.create_offshore_grid() |
| |
| return self.grid_gdf |
|
|
| def create_full_grid_internal(self) -> gpd.GeoDataFrame: |
| """Internal method to create full grid without filtering.""" |
| |
| lat_range = self.bounds['max_lat'] - self.bounds['min_lat'] |
| lon_range = self.bounds['max_lon'] - self.bounds['min_lon'] |
| |
| n_lat_cells = int(np.ceil(lat_range / self.cell_size_degrees)) |
| n_lon_cells = int(np.ceil(lon_range / self.cell_size_degrees)) |
| |
| grid_cells = [] |
| cell_id = 0 |
| |
| for i in range(n_lat_cells): |
| for j in range(n_lon_cells): |
| min_lat = self.bounds['min_lat'] + i * self.cell_size_degrees |
| max_lat = min_lat + self.cell_size_degrees |
| min_lon = self.bounds['min_lon'] + j * self.cell_size_degrees |
| max_lon = min_lon + self.cell_size_degrees |
| |
| center_lat = (min_lat + max_lat) / 2 |
| center_lon = (min_lon + max_lon) / 2 |
| |
| cell_polygon = Polygon([ |
| (min_lon, min_lat), (max_lon, min_lat), |
| (max_lon, max_lat), (min_lon, max_lat), |
| (min_lon, min_lat) |
| ]) |
| |
| grid_cells.append({ |
| 'cell_id': cell_id, |
| 'grid_i': i, 'grid_j': j, |
| 'center_lat': center_lat, |
| 'center_lon': center_lon, |
| 'geometry': cell_polygon |
| }) |
| cell_id += 1 |
| |
| return gpd.GeoDataFrame(grid_cells, crs='EPSG:4326') |
|
|
|
|
| def calculate_seismic_score(self, radius_km: float = 25.0) -> np.ndarray: |
| """ |
| Calculate seismic risk score for each grid cell. |
| Higher score = more seismic events nearby = higher risk. |
| """ |
| print("Calculating seismic scores...") |
| |
| df_seismic = load_data_and_process("seismic") |
| |
| |
| utm_crs = self.grid_gdf.estimate_utm_crs() |
| grid_utm = self.grid_gdf.to_crs(utm_crs) |
| seismic_utm = df_seismic.to_crs(utm_crs) |
| |
| scores = np.zeros(len(self.grid_gdf)) |
| |
| for idx, cell in grid_utm.iterrows(): |
| |
| print(f"Calculating ecological score for cell number {idx}") |
| cell_center = cell.geometry.centroid |
| buffer = cell_center.buffer(radius_km * 1000) |
| |
| |
| within_buffer = seismic_utm[seismic_utm.geometry.within(buffer)] |
| scores[idx] = len(within_buffer) |
| |
| |
| if scores.max() > 0: |
| scores = scores / scores.max() |
| |
| self.scores['seismic'] = scores |
| print(f"Seismic scoring complete. Max events per cell: {scores.max() * (df_seismic.shape[0] if scores.max() > 0 else 0):.0f}") |
| |
| return scores |
| |
| def calculate_ecological_sensitivity_score(self) -> np.ndarray: |
| """ |
| Calculate ecological sensitivity score. |
| For now, uses proximity to existing offshore fields as proxy. |
| TODO: Replace with actual benthos/habitat data when available. |
| """ |
| print("Calculating ecological sensitivity scores...") |
| |
| |
| df_fields = load_data_and_process("offshore_fields") |
| |
| utm_crs = self.grid_gdf.estimate_utm_crs() |
| grid_utm = self.grid_gdf.to_crs(utm_crs) |
| fields_utm = df_fields.to_crs(utm_crs) |
| |
| scores = np.zeros(len(self.grid_gdf)) |
| |
| for idx, cell in grid_utm.iterrows(): |
| cell_center = cell.geometry.centroid |
| |
| |
| min_distance = float('inf') |
| for _, field in fields_utm.iterrows(): |
| distance = cell_center.distance(field.geometry) |
| min_distance = min(min_distance, distance) |
| |
| |
| if min_distance != float('inf'): |
| |
| sensitivity = max(0, 1 - (min_distance / (10 * 1000))) |
| scores[idx] = sensitivity |
| |
| self.scores['ecological'] = scores |
| print(f"Ecological sensitivity scoring complete. Max sensitivity: {scores.max():.3f}") |
| |
| return scores |
| |
| def calculate_infrastructure_proximity_score(self, radius_km: float = 50.0) -> np.ndarray: |
| """ |
| Calculate infrastructure proximity score. |
| Higher score = more infrastructure nearby = better for economics but worse for environment. |
| """ |
| print("Calculating infrastructure proximity scores...") |
| |
| |
| infrastructure_types = ['wells', 'pipelines', 'offshore_fields'] |
| |
| utm_crs = self.grid_gdf.estimate_utm_crs() |
| grid_utm = self.grid_gdf.to_crs(utm_crs) |
| |
| scores = np.zeros(len(self.grid_gdf)) |
| |
| for infra_type in infrastructure_types: |
| df_infra = load_data_and_process(infra_type) |
| infra_utm = df_infra.to_crs(utm_crs) |
| |
| for idx, cell in grid_utm.iterrows(): |
| cell_center = cell.geometry.centroid |
| buffer = cell_center.buffer(radius_km * 1000) |
| |
| |
| within_buffer = infra_utm[infra_utm.geometry.within(buffer)] |
| scores[idx] += len(within_buffer) |
| |
| |
| if scores.max() > 0: |
| scores = scores / scores.max() |
| |
| self.scores['infrastructure'] = scores |
| print(f"Infrastructure proximity scoring complete. Max infrastructure count: {scores.max() * 100:.0f}") |
| |
| return scores |
| |
| def calculate_all_scores(self) -> Dict[str, np.ndarray]: |
| """Calculate all scoring criteria.""" |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| return self.calculate_all_scores_vectorized() |
| |
| def get_scored_grid(self) -> gpd.GeoDataFrame: |
| """ |
| Get grid with all scores attached. |
| FIXED: Ensures MCDA is run if suitability_score doesn't exist. |
| """ |
| if not self.scores: |
| self.calculate_all_scores_vectorized() |
| |
| |
| if 'suitability_score' not in self.grid_gdf.columns: |
| print("suitability_score not found, running MCDA...") |
| self.run_mcda_analysis() |
| |
| return self.grid_gdf.copy() |
|
|
|
|
| def calculate_seismic_score_optimized(self, radius_km: float = 25.0) -> np.ndarray: |
| """ |
| Optimized seismic risk calculation using spatial indexing. |
| ~100x faster than the buffer approach. |
| """ |
| print("Calculating seismic scores (optimized)...") |
| |
| df_seismic = load_data_and_process("seismic") |
| |
| |
| grid_coords = np.array([[geom.centroid.x, geom.centroid.y] for geom in self.grid_gdf.geometry]) |
| seismic_coords = np.array([[geom.x, geom.y] for geom in df_seismic.geometry]) |
| |
| |
| seismic_tree = cKDTree(seismic_coords) |
| |
| |
| radius_degrees = radius_km / 111.0 |
| |
| scores = np.zeros(len(self.grid_gdf)) |
| |
| |
| for idx, grid_point in enumerate(grid_coords): |
| |
| indices = seismic_tree.query_ball_point(grid_point, radius_degrees) |
| scores[idx] = len(indices) |
| |
| if idx % 1000 == 0: |
| print(f"Processed {idx}/{len(grid_coords)} cells") |
| |
| |
| if scores.max() > 0: |
| scores = scores / scores.max() |
| |
| self.scores['seismic'] = scores |
| print(f"Seismic scoring complete. Max events per cell: {scores.max() * df_seismic.shape[0]:.0f}") |
| |
| return scores |
|
|
| def calculate_ecological_sensitivity_score_optimized(self) -> np.ndarray: |
| """ |
| Optimized ecological sensitivity calculation. |
| """ |
| print("Calculating ecological sensitivity scores (optimized)...") |
| |
| df_fields = load_data_and_process("offshore_fields") |
| |
| |
| grid_coords = np.array([[geom.centroid.x, geom.centroid.y] for geom in self.grid_gdf.geometry]) |
| |
| |
| field_coords = np.array([[geom.centroid.x, geom.centroid.y] for geom in df_fields.geometry]) |
| |
| |
| field_tree = cKDTree(field_coords) |
| |
| scores = np.zeros(len(self.grid_gdf)) |
| |
| |
| distances, _ = field_tree.query(grid_coords) |
| |
| |
| |
| max_sensitivity_distance = 0.09 |
| scores = np.maximum(0, 1 - (distances / max_sensitivity_distance)) |
| |
| self.scores['ecological'] = scores |
| print(f"Ecological sensitivity scoring complete. Max sensitivity: {scores.max():.3f}") |
| |
| return scores |
|
|
| def calculate_infrastructure_proximity_score_optimized(self, radius_km: float = 50.0) -> np.ndarray: |
| """ |
| Optimized infrastructure proximity calculation. |
| """ |
| print("Calculating infrastructure proximity scores (optimized)...") |
| |
| infrastructure_types = ['wells', 'pipelines', 'offshore_fields'] |
| |
| |
| grid_coords = np.array([[geom.centroid.x, geom.centroid.y] for geom in self.grid_gdf.geometry]) |
| |
| radius_degrees = radius_km / 111.0 |
| scores = np.zeros(len(self.grid_gdf)) |
| |
| for infra_type in infrastructure_types: |
| print(f" Processing {infra_type}...") |
| df_infra = load_data_and_process(infra_type) |
| |
| |
| if infra_type == 'pipelines': |
| |
| infra_coords = [] |
| for geom in df_infra.geometry: |
| if hasattr(geom, 'coords'): |
| |
| infra_coords.extend([(x, y) for x, y in geom.coords]) |
| elif hasattr(geom, 'geoms'): |
| |
| for line in geom.geoms: |
| infra_coords.extend([(x, y) for x, y in line.coords]) |
| infra_coords = np.array(infra_coords) |
| else: |
| |
| infra_coords = np.array([[geom.centroid.x, geom.centroid.y] for geom in df_infra.geometry]) |
| |
| if len(infra_coords) == 0: |
| continue |
| |
| |
| infra_tree = cKDTree(infra_coords) |
| |
| |
| for idx, grid_point in enumerate(grid_coords): |
| indices = infra_tree.query_ball_point(grid_point, radius_degrees) |
| scores[idx] += len(indices) |
| |
| |
| if scores.max() > 0: |
| scores = scores / scores.max() |
| |
| self.scores['infrastructure'] = scores |
| print(f"Infrastructure proximity scoring complete. Max count: {scores.max() * 100:.0f}") |
| |
| return scores |
|
|
| |
| def calculate_all_scores_vectorized(self): |
| """ |
| Ultra-fast vectorized approach using GeoPandas spatial operations. |
| """ |
| print("Calculating all scores (vectorized approach)...") |
| |
| if self.grid_gdf is None: |
| self.create_grid() |
| |
| |
| utm_crs = self.grid_gdf.estimate_utm_crs() |
| grid_utm = self.grid_gdf.to_crs(utm_crs) |
| |
| |
| buffer_25km = grid_utm.copy() |
| buffer_25km['geometry'] = buffer_25km.geometry.centroid.buffer(25000) |
| |
| buffer_50km = grid_utm.copy() |
| buffer_50km['geometry'] = buffer_50km.geometry.centroid.buffer(50000) |
| |
| |
| print(" Seismic scoring...") |
| df_seismic = load_data_and_process("seismic") |
| seismic_utm = df_seismic.to_crs(utm_crs) |
| |
| |
| seismic_counts = gpd.sjoin(buffer_25km, seismic_utm, predicate='contains').groupby(level=0).size() |
| seismic_scores = np.zeros(len(grid_utm)) |
| seismic_scores[seismic_counts.index] = seismic_counts.values |
| seismic_scores = seismic_scores / seismic_scores.max() if seismic_scores.max() > 0 else seismic_scores |
| |
| |
| print(" Infrastructure scoring...") |
| infrastructure_types = ['wells', 'pipelines', 'offshore_fields'] |
| infra_scores = np.zeros(len(grid_utm)) |
| |
| for infra_type in infrastructure_types: |
| df_infra = load_data_and_process(infra_type) |
| infra_utm = df_infra.to_crs(utm_crs) |
| |
| infra_counts = gpd.sjoin(buffer_50km, infra_utm, predicate='intersects').groupby(level=0).size() |
| temp_scores = np.zeros(len(grid_utm)) |
| temp_scores[infra_counts.index] = infra_counts.values |
| infra_scores += temp_scores |
| |
| infra_scores = infra_scores / infra_scores.max() if infra_scores.max() > 0 else infra_scores |
| |
| |
| print(" Ecological scoring...") |
| df_fields = load_data_and_process("offshore_fields") |
| fields_utm = df_fields.to_crs(utm_crs) |
| |
| |
| grid_centroids = grid_utm.geometry.centroid |
| ecological_scores = np.zeros(len(grid_utm)) |
| |
| for idx, centroid in enumerate(grid_centroids): |
| min_distance = fields_utm.geometry.distance(centroid).min() |
| |
| ecological_scores[idx] = max(0, 1 - (min_distance / 10000)) |
| |
| |
| self.scores = { |
| 'seismic': seismic_scores, |
| 'infrastructure': infra_scores, |
| 'ecological': ecological_scores |
| } |
| |
| |
| for score_name, score_values in self.scores.items(): |
| self.grid_gdf[f'{score_name}_score'] = score_values |
| |
|
|
| print("All scores calculated (vectorized)!") |
| return self.scores |
|
|
|
|
| def run_mcda_analysis(self, weights: Dict[str, float] = None) -> gpd.GeoDataFrame: |
| """ |
| Run MCDA analysis on grid using calculated scores. |
| FIXED: Ensures suitability_score is properly calculated and added. |
| """ |
| if not self.scores: |
| self.calculate_all_scores_vectorized() |
| |
| |
| if weights is None: |
| weights = { |
| 'seismic': 0.4, |
| 'ecological': 0.4, |
| 'infrastructure': 0.2 |
| } |
| |
| print(f"Running MCDA with weights: {weights}") |
| |
| |
| seismic_weight = weights.get('seismic', 0.4) |
| ecological_weight = weights.get('ecological', 0.4) |
| infrastructure_weight = weights.get('infrastructure', 0.2) |
| |
| suitability_score = ( |
| seismic_weight * self.scores['seismic'] + |
| ecological_weight * self.scores['ecological'] + |
| infrastructure_weight * self.scores['infrastructure'] |
| ) |
| |
| |
| self.grid_gdf['suitability_score'] = suitability_score |
| |
| |
| self.grid_gdf['rank'] = self.grid_gdf['suitability_score'].rank(method='min') |
| |
| |
| ranked_grid = self.grid_gdf.sort_values('suitability_score').reset_index(drop=True) |
| |
| print(f"MCDA analysis complete. Best score: {suitability_score.min():.3f}, Worst: {suitability_score.max():.3f}") |
| |
| return ranked_grid |