Machine Learning
Advanced machine learning applications in Earth Engine.
This page documents the machine learning example.
1"""
2Advanced Example 1: Machine Learning Classification
3===================================================
4
5This example demonstrates:
6- Land cover classification using machine learning
7- Training data collection and preparation
8- Random Forest classifier implementation
9- Accuracy assessment and validation
10- Large-scale prediction and export
11
12Use case: Mapping land cover types using Sentinel-2 imagery
13"""
14
15import ee
16import numpy as np
17import pandas as pd
18from sklearn.metrics import classification_report, confusion_matrix
19import matplotlib.pyplot as plt
20import seaborn as sns
21
22class LandCoverClassifier:
23 """
24 Advanced land cover classification using Google Earth Engine and Machine Learning.
25 """
26
27 def __init__(self, project_id):
28 """
29 Initialize the classifier with Earth Engine project.
30 """
31 self.project_id = project_id
32 self.classifier = None
33 self.trained_classifier = None
34 self.class_names = ['Water', 'Forest', 'Urban', 'Agriculture', 'Bare_Soil']
35 self.class_values = [0, 1, 2, 3, 4]
36
37 # Initialize Earth Engine
38 try:
39 ee.Initialize(project=project_id)
40 print("✓ Earth Engine initialized successfully!")
41 except Exception as e:
42 print(f"✗ Error initializing Earth Engine: {e}")
43 raise
44
45 def create_composite(self, geometry, start_date, end_date, cloud_threshold=10):
46 """
47 Create a cloud-free Sentinel-2 composite.
48
49 Args:
50 geometry: Area of interest
51 start_date: Start date for image collection
52 end_date: End date for image collection
53 cloud_threshold: Maximum cloud cover percentage
54
55 Returns:
56 ee.Image: Cloud-free composite image
57 """
58 print(f"Creating Sentinel-2 composite from {start_date} to {end_date}")
59
60 # Load Sentinel-2 Surface Reflectance collection
61 collection = (ee.ImageCollection('COPERNICUS/S2_SR_HARMONIZED')
62 .filterDate(start_date, end_date)
63 .filterBounds(geometry)
64 .filter(ee.Filter.lt('CLOUDY_PIXEL_PERCENTAGE', cloud_threshold)))
65
66 print(f"Found {collection.size().getInfo()} images")
67
68 # Cloud masking function
69 def mask_clouds(image):
70 qa = image.select('QA60')
71 cloud_bit_mask = 1 << 10
72 cirrus_bit_mask = 1 << 11
73 mask = qa.bitwiseAnd(cloud_bit_mask).eq(0).And(
74 qa.bitwiseAnd(cirrus_bit_mask).eq(0))
75 return image.updateMask(mask).divide(10000)
76
77 # Apply cloud masking and create median composite
78 composite = collection.map(mask_clouds).median()
79
80 # Add spectral indices
81 composite = self.add_spectral_indices(composite)
82
83 return composite
84
85 def add_spectral_indices(self, image):
86 """
87 Add spectral indices to improve classification accuracy.
88
89 Args:
90 image: Sentinel-2 image
91
92 Returns:
93 ee.Image: Image with additional spectral indices
94 """
95 # NDVI (Normalized Difference Vegetation Index)
96 ndvi = image.normalizedDifference(['B8', 'B4']).rename('NDVI')
97
98 # NDWI (Normalized Difference Water Index)
99 ndwi = image.normalizedDifference(['B3', 'B8']).rename('NDWI')
100
101 # NDBI (Normalized Difference Built-up Index)
102 ndbi = image.normalizedDifference(['B11', 'B8']).rename('NDBI')
103
104 # EVI (Enhanced Vegetation Index)
105 evi = image.expression(
106 '2.5 * ((NIR - RED) / (NIR + 6 * RED - 7.5 * BLUE + 1))',
107 {
108 'NIR': image.select('B8'),
109 'RED': image.select('B4'),
110 'BLUE': image.select('B2')
111 }
112 ).rename('EVI')
113
114 # SAVI (Soil Adjusted Vegetation Index)
115 savi = image.expression(
116 '((NIR - RED) / (NIR + RED + 0.5)) * (1.5)',
117 {
118 'NIR': image.select('B8'),
119 'RED': image.select('B4')
120 }
121 ).rename('SAVI')
122
123 return image.addBands([ndvi, ndwi, ndbi, evi, savi])
124
125 def create_training_data(self, image, training_points):
126 """
127 Create training dataset from labeled points.
128
129 Args:
130 image: Composite image for training
131 training_points: ee.FeatureCollection with training points
132
133 Returns:
134 ee.FeatureCollection: Training features with spectral values
135 """
136 print("Creating training dataset...")
137
138 # Select bands for training
139 bands = ['B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'B8', 'B8A', 'B11', 'B12',
140 'NDVI', 'NDWI', 'NDBI', 'EVI', 'SAVI']
141
142 # Sample the image at training points
143 training = image.select(bands).sampleRegions(
144 collection=training_points,
145 properties=['landcover'],
146 scale=10
147 )
148
149 print(f"Training samples: {training.size().getInfo()}")
150
151 return training
152
153 def train_classifier(self, training_data, n_trees=100):
154 """
155 Train Random Forest classifier.
156
157 Args:
158 training_data: Training feature collection
159 n_trees: Number of trees in Random Forest
160
161 Returns:
162 ee.Classifier: Trained classifier
163 """
164 print(f"Training Random Forest classifier with {n_trees} trees...")
165
166 # Select features for training
167 bands = ['B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'B8', 'B8A', 'B11', 'B12',
168 'NDVI', 'NDWI', 'NDBI', 'EVI', 'SAVI']
169
170 # Create and train classifier
171 classifier = ee.Classifier.smileRandomForest(n_trees).train(
172 features=training_data,
173 classProperty='landcover',
174 inputProperties=bands
175 )
176
177 self.trained_classifier = classifier
178 print("✓ Classifier training completed!")
179
180 return classifier
181
182 def classify_image(self, image, classifier=None):
183 """
184 Classify the input image using trained classifier.
185
186 Args:
187 image: Image to classify
188 classifier: Trained classifier (optional)
189
190 Returns:
191 ee.Image: Classified image
192 """
193 if classifier is None:
194 classifier = self.trained_classifier
195
196 if classifier is None:
197 raise ValueError("No trained classifier available")
198
199 print("Classifying image...")
200
201 # Select the same bands used for training
202 bands = ['B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'B8', 'B8A', 'B11', 'B12',
203 'NDVI', 'NDWI', 'NDBI', 'EVI', 'SAVI']
204
205 classified = image.select(bands).classify(classifier)
206
207 return classified
208
209 def assess_accuracy(self, training_data, classifier=None):
210 """
211 Assess classifier accuracy using confusion matrix.
212
213 Args:
214 training_data: Validation dataset
215 classifier: Trained classifier
216
217 Returns:
218 dict: Accuracy metrics
219 """
220 if classifier is None:
221 classifier = self.trained_classifier
222
223 print("Assessing accuracy...")
224
225 # Get confusion matrix
226 confusion_matrix = classifier.confusionMatrix()
227
228 # Calculate accuracy metrics
229 overall_accuracy = confusion_matrix.accuracy()
230 kappa = confusion_matrix.kappa()
231
232 print(f"Overall Accuracy: {overall_accuracy.getInfo():.3f}")
233 print(f"Kappa Coefficient: {kappa.getInfo():.3f}")
234
235 # Get confusion matrix as array
236 cm_array = confusion_matrix.getInfo()
237
238 return {
239 'overall_accuracy': overall_accuracy.getInfo(),
240 'kappa': kappa.getInfo(),
241 'confusion_matrix': cm_array
242 }
243
244 def create_training_points(self, geometry):
245 """
246 Create sample training points for different land cover classes.
247 This is a simplified example - in practice, use field data or
248 careful visual interpretation.
249
250 Args:
251 geometry: Area of interest
252
253 Returns:
254 ee.FeatureCollection: Training points
255 """
256 print("Creating sample training points...")
257
258 # Example training points (replace with actual training data)
259 water_points = ee.FeatureCollection([
260 ee.Feature(ee.Geometry.Point([-122.0, 37.4]), {'landcover': 0}),
261 ee.Feature(ee.Geometry.Point([-122.1, 37.3]), {'landcover': 0}),
262 ])
263
264 forest_points = ee.FeatureCollection([
265 ee.Feature(ee.Geometry.Point([-122.2, 37.5]), {'landcover': 1}),
266 ee.Feature(ee.Geometry.Point([-122.3, 37.6]), {'landcover': 1}),
267 ])
268
269 urban_points = ee.FeatureCollection([
270 ee.Feature(ee.Geometry.Point([-122.4, 37.7]), {'landcover': 2}),
271 ee.Feature(ee.Geometry.Point([-122.5, 37.8]), {'landcover': 2}),
272 ])
273
274 agriculture_points = ee.FeatureCollection([
275 ee.Feature(ee.Geometry.Point([-122.6, 37.2]), {'landcover': 3}),
276 ee.Feature(ee.Geometry.Point([-122.7, 37.1]), {'landcover': 3}),
277 ])
278
279 bare_soil_points = ee.FeatureCollection([
280 ee.Feature(ee.Geometry.Point([-122.8, 37.0]), {'landcover': 4}),
281 ee.Feature(ee.Geometry.Point([-122.9, 36.9]), {'landcover': 4}),
282 ])
283
284 # Merge all training points
285 training_points = water_points.merge(forest_points)\
286 .merge(urban_points)\
287 .merge(agriculture_points)\
288 .merge(bare_soil_points)
289
290 return training_points
291
292 def export_classification(self, classified_image, geometry, filename, scale=10):
293 """
294 Export classified image to Google Drive.
295
296 Args:
297 classified_image: Classified image
298 geometry: Export region
299 filename: Output filename
300 scale: Export scale in meters
301 """
302 print(f"Exporting classification to Google Drive: {filename}")
303
304 task = ee.batch.Export.image.toDrive(
305 image=classified_image,
306 description=filename,
307 folder='EarthEngine_Exports',
308 fileNamePrefix=filename,
309 region=geometry,
310 scale=scale,
311 maxPixels=1e13
312 )
313
314 task.start()
315 print(f"Export task started. Check Google Drive folder 'EarthEngine_Exports'")
316 print(f"Task ID: {task.id}")
317
318 return task
319
320def main():
321 """
322 Main function demonstrating advanced machine learning classification.
323 """
324 # Initialize classifier
325 classifier_system = LandCoverClassifier('your-project-id')
326
327 # Define area of interest (San Francisco Bay Area example)
328 geometry = ee.Geometry.Rectangle([-122.5, 37.0, -121.5, 38.0])
329
330 # Create composite image
331 composite = classifier_system.create_composite(
332 geometry=geometry,
333 start_date='2023-06-01',
334 end_date='2023-08-31',
335 cloud_threshold=10
336 )
337
338 # Create training points (replace with actual training data)
339 training_points = classifier_system.create_training_points(geometry)
340
341 # Create training dataset
342 training_data = classifier_system.create_training_data(
343 image=composite,
344 training_points=training_points
345 )
346
347 # Train classifier
348 classifier = classifier_system.train_classifier(
349 training_data=training_data,
350 n_trees=100
351 )
352
353 # Classify the image
354 classified = classifier_system.classify_image(
355 image=composite,
356 classifier=classifier
357 )
358
359 # Assess accuracy
360 accuracy_metrics = classifier_system.assess_accuracy(
361 training_data=training_data,
362 classifier=classifier
363 )
364
365 print("\n📊 Classification Results:")
366 print(f"Overall Accuracy: {accuracy_metrics['overall_accuracy']:.1%}")
367 print(f"Kappa Coefficient: {accuracy_metrics['kappa']:.3f}")
368
369 # Export results
370 export_task = classifier_system.export_classification(
371 classified_image=classified,
372 geometry=geometry,
373 filename='land_cover_classification_2023',
374 scale=10
375 )
376
377 print("\n🎯 Advanced Classification Analysis Complete!")
378 print("Key Features Demonstrated:")
379 print("• Multi-spectral index calculation")
380 print("• Random Forest classification")
381 print("• Accuracy assessment")
382 print("• Large-scale processing and export")
383 print("• Object-oriented programming approach")
384
385if __name__ == "__main__":
386 main()