Machine Learning

Advanced machine learning applications in Earth Engine.

This page documents the machine learning example.

  1"""
  2Advanced Example 1: Machine Learning Classification
  3===================================================
  4
  5This example demonstrates:
  6- Land cover classification using machine learning
  7- Training data collection and preparation
  8- Random Forest classifier implementation
  9- Accuracy assessment and validation
 10- Large-scale prediction and export
 11
 12Use case: Mapping land cover types using Sentinel-2 imagery
 13"""
 14
 15import ee
 16import numpy as np
 17import pandas as pd
 18from sklearn.metrics import classification_report, confusion_matrix
 19import matplotlib.pyplot as plt
 20import seaborn as sns
 21
 22class LandCoverClassifier:
 23    """
 24    Advanced land cover classification using Google Earth Engine and Machine Learning.
 25    """
 26    
 27    def __init__(self, project_id):
 28        """
 29        Initialize the classifier with Earth Engine project.
 30        """
 31        self.project_id = project_id
 32        self.classifier = None
 33        self.trained_classifier = None
 34        self.class_names = ['Water', 'Forest', 'Urban', 'Agriculture', 'Bare_Soil']
 35        self.class_values = [0, 1, 2, 3, 4]
 36        
 37        # Initialize Earth Engine
 38        try:
 39            ee.Initialize(project=project_id)
 40            print("✓ Earth Engine initialized successfully!")
 41        except Exception as e:
 42            print(f"✗ Error initializing Earth Engine: {e}")
 43            raise
 44    
 45    def create_composite(self, geometry, start_date, end_date, cloud_threshold=10):
 46        """
 47        Create a cloud-free Sentinel-2 composite.
 48        
 49        Args:
 50            geometry: Area of interest
 51            start_date: Start date for image collection
 52            end_date: End date for image collection
 53            cloud_threshold: Maximum cloud cover percentage
 54        
 55        Returns:
 56            ee.Image: Cloud-free composite image
 57        """
 58        print(f"Creating Sentinel-2 composite from {start_date} to {end_date}")
 59        
 60        # Load Sentinel-2 Surface Reflectance collection
 61        collection = (ee.ImageCollection('COPERNICUS/S2_SR_HARMONIZED')
 62                     .filterDate(start_date, end_date)
 63                     .filterBounds(geometry)
 64                     .filter(ee.Filter.lt('CLOUDY_PIXEL_PERCENTAGE', cloud_threshold)))
 65        
 66        print(f"Found {collection.size().getInfo()} images")
 67        
 68        # Cloud masking function
 69        def mask_clouds(image):
 70            qa = image.select('QA60')
 71            cloud_bit_mask = 1 << 10
 72            cirrus_bit_mask = 1 << 11
 73            mask = qa.bitwiseAnd(cloud_bit_mask).eq(0).And(
 74                   qa.bitwiseAnd(cirrus_bit_mask).eq(0))
 75            return image.updateMask(mask).divide(10000)
 76        
 77        # Apply cloud masking and create median composite
 78        composite = collection.map(mask_clouds).median()
 79        
 80        # Add spectral indices
 81        composite = self.add_spectral_indices(composite)
 82        
 83        return composite
 84    
 85    def add_spectral_indices(self, image):
 86        """
 87        Add spectral indices to improve classification accuracy.
 88        
 89        Args:
 90            image: Sentinel-2 image
 91        
 92        Returns:
 93            ee.Image: Image with additional spectral indices
 94        """
 95        # NDVI (Normalized Difference Vegetation Index)
 96        ndvi = image.normalizedDifference(['B8', 'B4']).rename('NDVI')
 97        
 98        # NDWI (Normalized Difference Water Index)
 99        ndwi = image.normalizedDifference(['B3', 'B8']).rename('NDWI')
100        
101        # NDBI (Normalized Difference Built-up Index)
102        ndbi = image.normalizedDifference(['B11', 'B8']).rename('NDBI')
103        
104        # EVI (Enhanced Vegetation Index)
105        evi = image.expression(
106            '2.5 * ((NIR - RED) / (NIR + 6 * RED - 7.5 * BLUE + 1))',
107            {
108                'NIR': image.select('B8'),
109                'RED': image.select('B4'),
110                'BLUE': image.select('B2')
111            }
112        ).rename('EVI')
113        
114        # SAVI (Soil Adjusted Vegetation Index)
115        savi = image.expression(
116            '((NIR - RED) / (NIR + RED + 0.5)) * (1.5)',
117            {
118                'NIR': image.select('B8'),
119                'RED': image.select('B4')
120            }
121        ).rename('SAVI')
122        
123        return image.addBands([ndvi, ndwi, ndbi, evi, savi])
124    
125    def create_training_data(self, image, training_points):
126        """
127        Create training dataset from labeled points.
128        
129        Args:
130            image: Composite image for training
131            training_points: ee.FeatureCollection with training points
132        
133        Returns:
134            ee.FeatureCollection: Training features with spectral values
135        """
136        print("Creating training dataset...")
137        
138        # Select bands for training
139        bands = ['B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'B8', 'B8A', 'B11', 'B12',
140                'NDVI', 'NDWI', 'NDBI', 'EVI', 'SAVI']
141        
142        # Sample the image at training points
143        training = image.select(bands).sampleRegions(
144            collection=training_points,
145            properties=['landcover'],
146            scale=10
147        )
148        
149        print(f"Training samples: {training.size().getInfo()}")
150        
151        return training
152    
153    def train_classifier(self, training_data, n_trees=100):
154        """
155        Train Random Forest classifier.
156        
157        Args:
158            training_data: Training feature collection
159            n_trees: Number of trees in Random Forest
160        
161        Returns:
162            ee.Classifier: Trained classifier
163        """
164        print(f"Training Random Forest classifier with {n_trees} trees...")
165        
166        # Select features for training
167        bands = ['B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'B8', 'B8A', 'B11', 'B12',
168                'NDVI', 'NDWI', 'NDBI', 'EVI', 'SAVI']
169        
170        # Create and train classifier
171        classifier = ee.Classifier.smileRandomForest(n_trees).train(
172            features=training_data,
173            classProperty='landcover',
174            inputProperties=bands
175        )
176        
177        self.trained_classifier = classifier
178        print("✓ Classifier training completed!")
179        
180        return classifier
181    
182    def classify_image(self, image, classifier=None):
183        """
184        Classify the input image using trained classifier.
185        
186        Args:
187            image: Image to classify
188            classifier: Trained classifier (optional)
189        
190        Returns:
191            ee.Image: Classified image
192        """
193        if classifier is None:
194            classifier = self.trained_classifier
195        
196        if classifier is None:
197            raise ValueError("No trained classifier available")
198        
199        print("Classifying image...")
200        
201        # Select the same bands used for training
202        bands = ['B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'B8', 'B8A', 'B11', 'B12',
203                'NDVI', 'NDWI', 'NDBI', 'EVI', 'SAVI']
204        
205        classified = image.select(bands).classify(classifier)
206        
207        return classified
208    
209    def assess_accuracy(self, training_data, classifier=None):
210        """
211        Assess classifier accuracy using confusion matrix.
212        
213        Args:
214            training_data: Validation dataset
215            classifier: Trained classifier
216        
217        Returns:
218            dict: Accuracy metrics
219        """
220        if classifier is None:
221            classifier = self.trained_classifier
222        
223        print("Assessing accuracy...")
224        
225        # Get confusion matrix
226        confusion_matrix = classifier.confusionMatrix()
227        
228        # Calculate accuracy metrics
229        overall_accuracy = confusion_matrix.accuracy()
230        kappa = confusion_matrix.kappa()
231        
232        print(f"Overall Accuracy: {overall_accuracy.getInfo():.3f}")
233        print(f"Kappa Coefficient: {kappa.getInfo():.3f}")
234        
235        # Get confusion matrix as array
236        cm_array = confusion_matrix.getInfo()
237        
238        return {
239            'overall_accuracy': overall_accuracy.getInfo(),
240            'kappa': kappa.getInfo(),
241            'confusion_matrix': cm_array
242        }
243    
244    def create_training_points(self, geometry):
245        """
246        Create sample training points for different land cover classes.
247        This is a simplified example - in practice, use field data or 
248        careful visual interpretation.
249        
250        Args:
251            geometry: Area of interest
252        
253        Returns:
254            ee.FeatureCollection: Training points
255        """
256        print("Creating sample training points...")
257        
258        # Example training points (replace with actual training data)
259        water_points = ee.FeatureCollection([
260            ee.Feature(ee.Geometry.Point([-122.0, 37.4]), {'landcover': 0}),
261            ee.Feature(ee.Geometry.Point([-122.1, 37.3]), {'landcover': 0}),
262        ])
263        
264        forest_points = ee.FeatureCollection([
265            ee.Feature(ee.Geometry.Point([-122.2, 37.5]), {'landcover': 1}),
266            ee.Feature(ee.Geometry.Point([-122.3, 37.6]), {'landcover': 1}),
267        ])
268        
269        urban_points = ee.FeatureCollection([
270            ee.Feature(ee.Geometry.Point([-122.4, 37.7]), {'landcover': 2}),
271            ee.Feature(ee.Geometry.Point([-122.5, 37.8]), {'landcover': 2}),
272        ])
273        
274        agriculture_points = ee.FeatureCollection([
275            ee.Feature(ee.Geometry.Point([-122.6, 37.2]), {'landcover': 3}),
276            ee.Feature(ee.Geometry.Point([-122.7, 37.1]), {'landcover': 3}),
277        ])
278        
279        bare_soil_points = ee.FeatureCollection([
280            ee.Feature(ee.Geometry.Point([-122.8, 37.0]), {'landcover': 4}),
281            ee.Feature(ee.Geometry.Point([-122.9, 36.9]), {'landcover': 4}),
282        ])
283        
284        # Merge all training points
285        training_points = water_points.merge(forest_points)\
286                                   .merge(urban_points)\
287                                   .merge(agriculture_points)\
288                                   .merge(bare_soil_points)
289        
290        return training_points
291    
292    def export_classification(self, classified_image, geometry, filename, scale=10):
293        """
294        Export classified image to Google Drive.
295        
296        Args:
297            classified_image: Classified image
298            geometry: Export region
299            filename: Output filename
300            scale: Export scale in meters
301        """
302        print(f"Exporting classification to Google Drive: {filename}")
303        
304        task = ee.batch.Export.image.toDrive(
305            image=classified_image,
306            description=filename,
307            folder='EarthEngine_Exports',
308            fileNamePrefix=filename,
309            region=geometry,
310            scale=scale,
311            maxPixels=1e13
312        )
313        
314        task.start()
315        print(f"Export task started. Check Google Drive folder 'EarthEngine_Exports'")
316        print(f"Task ID: {task.id}")
317        
318        return task
319
320def main():
321    """
322    Main function demonstrating advanced machine learning classification.
323    """
324    # Initialize classifier
325    classifier_system = LandCoverClassifier('your-project-id')
326    
327    # Define area of interest (San Francisco Bay Area example)
328    geometry = ee.Geometry.Rectangle([-122.5, 37.0, -121.5, 38.0])
329    
330    # Create composite image
331    composite = classifier_system.create_composite(
332        geometry=geometry,
333        start_date='2023-06-01',
334        end_date='2023-08-31',
335        cloud_threshold=10
336    )
337    
338    # Create training points (replace with actual training data)
339    training_points = classifier_system.create_training_points(geometry)
340    
341    # Create training dataset
342    training_data = classifier_system.create_training_data(
343        image=composite,
344        training_points=training_points
345    )
346    
347    # Train classifier
348    classifier = classifier_system.train_classifier(
349        training_data=training_data,
350        n_trees=100
351    )
352    
353    # Classify the image
354    classified = classifier_system.classify_image(
355        image=composite,
356        classifier=classifier
357    )
358    
359    # Assess accuracy
360    accuracy_metrics = classifier_system.assess_accuracy(
361        training_data=training_data,
362        classifier=classifier
363    )
364    
365    print("\n📊 Classification Results:")
366    print(f"Overall Accuracy: {accuracy_metrics['overall_accuracy']:.1%}")
367    print(f"Kappa Coefficient: {accuracy_metrics['kappa']:.3f}")
368    
369    # Export results
370    export_task = classifier_system.export_classification(
371        classified_image=classified,
372        geometry=geometry,
373        filename='land_cover_classification_2023',
374        scale=10
375    )
376    
377    print("\n🎯 Advanced Classification Analysis Complete!")
378    print("Key Features Demonstrated:")
379    print("• Multi-spectral index calculation")
380    print("• Random Forest classification")
381    print("• Accuracy assessment")
382    print("• Large-scale processing and export")
383    print("• Object-oriented programming approach")
384
385if __name__ == "__main__":
386    main()