Image Collection Filtering

Advanced techniques for filtering Earth Engine image collections.

This page documents the image collection filtering example.

  1"""
  2Intermediate Example 2: Image Collection Filtering
  3==================================================
  4
  5This example demonstrates:
  6- Advanced filtering techniques for image collections
  7- Temporal, spatial, and metadata filtering
  8- Quality assessment and cloud filtering
  9- Collection reduction and compositing methods
 10- Working with large datasets efficiently
 11
 12Prerequisites:
 13- Basic understanding of Earth Engine image collections
 14- Familiarity with filtering concepts
 15- Knowledge of satellite data characteristics
 16"""
 17
 18import ee
 19import pandas as pd
 20import matplotlib.pyplot as plt
 21from datetime import datetime, timedelta
 22
 23class ImageCollectionFilter:
 24    """Class for advanced image collection filtering operations."""
 25    
 26    def __init__(self, project_id):
 27        """Initialize the filter with Earth Engine project."""
 28        self.project_id = project_id
 29        self.initialize_ee()
 30    
 31    def initialize_ee(self):
 32        """Initialize Earth Engine."""
 33        try:
 34            ee.Initialize(project=self.project_id)
 35            print("✓ Earth Engine initialized successfully!")
 36        except Exception as e:
 37            print(f"✗ Error initializing Earth Engine: {e}")
 38            raise
 39    
 40    def basic_filtering_examples(self):
 41        """Demonstrate basic filtering techniques."""
 42        print("🔍 Basic Filtering Examples")
 43        print("-" * 40)
 44        
 45        # Date filtering
 46        print("1. Date Filtering:")
 47        collection = ee.ImageCollection('LANDSAT/LC08/C02/T1_L2')
 48        
 49        # Filter by date range
 50        date_filtered = collection.filterDate('2023-01-01', '2023-12-31')
 51        print(f"   Original collection size: {collection.size().getInfo()}")
 52        print(f"   After date filter: {date_filtered.size().getInfo()}")
 53        
 54        # Filter by specific months
 55        summer_images = collection.filter(
 56            ee.Filter.calendarRange(6, 8, 'month')
 57        )
 58        print(f"   Summer months only: {summer_images.size().getInfo()}")
 59        
 60        # Spatial filtering
 61        print("\n2. Spatial Filtering:")
 62        point = ee.Geometry.Point([-122.4, 37.8])
 63        region = ee.Geometry.Rectangle([-123, 37, -122, 38])
 64        
 65        # Filter by point intersection
 66        point_filtered = date_filtered.filterBounds(point)
 67        print(f"   Images containing point: {point_filtered.size().getInfo()}")
 68        
 69        # Filter by region intersection
 70        region_filtered = date_filtered.filterBounds(region)
 71        print(f"   Images intersecting region: {region_filtered.size().getInfo()}")
 72        
 73        return region_filtered
 74    
 75    def metadata_filtering(self, collection):
 76        """Demonstrate filtering by metadata properties."""
 77        print("\n📊 Metadata Filtering")
 78        print("-" * 30)
 79        
 80        # Cloud cover filtering
 81        print("1. Cloud Cover Filtering:")
 82        low_cloud = collection.filter(ee.Filter.lt('CLOUD_COVER', 10))
 83        medium_cloud = collection.filter(
 84            ee.Filter.And(
 85                ee.Filter.gte('CLOUD_COVER', 10),
 86                ee.Filter.lt('CLOUD_COVER', 30)
 87            )
 88        )
 89        print(f"   Low cloud cover (<10%): {low_cloud.size().getInfo()}")
 90        print(f"   Medium cloud cover (10-30%): {medium_cloud.size().getInfo()}")
 91        
 92        # Sun elevation filtering
 93        print("\n2. Sun Elevation Filtering:")
 94        high_sun = collection.filter(ee.Filter.gt('SUN_ELEVATION', 45))
 95        print(f"   High sun elevation (>45°): {high_sun.size().getInfo()}")
 96        
 97        # Acquisition DOY filtering
 98        print("\n3. Day of Year Filtering:")
 99        growing_season = collection.filter(
100            ee.Filter.And(
101                ee.Filter.gte('DAY_OF_YEAR', 120),  # May
102                ee.Filter.lte('DAY_OF_YEAR', 243)   # August
103            )
104        )
105        print(f"   Growing season images: {growing_season.size().getInfo()}")
106        
107        # Satellite path/row filtering
108        print("\n4. Path/Row Filtering:")
109        specific_tile = collection.filter(
110            ee.Filter.And(
111                ee.Filter.eq('WRS_PATH', 44),
112                ee.Filter.eq('WRS_ROW', 34)
113            )
114        )
115        print(f"   Specific Landsat tile: {specific_tile.size().getInfo()}")
116        
117        return low_cloud
118    
119    def advanced_filtering_techniques(self, collection):
120        """Demonstrate advanced filtering techniques."""
121        print("\n🎯 Advanced Filtering Techniques")
122        print("-" * 40)
123        
124        # Custom filter functions
125        def quality_filter(image):
126            """Custom function to filter based on multiple quality criteria."""
127            qa = image.select('QA_PIXEL')
128            
129            # Check for clear conditions
130            clear_conditions = (
131                qa.bitwiseAnd(1 << 3).eq(0).And(  # No cloud shadow
132                qa.bitwiseAnd(1 << 4).eq(0).And(  # No cloud
133                qa.bitwiseAnd(1 << 5).eq(0)))     # No cirrus
134            
135            # Calculate percentage of clear pixels
136            clear_percentage = clear_conditions.reduceRegion(
137                reducer=ee.Reducer.mean(),
138                geometry=image.geometry(),
139                scale=1000,
140                maxPixels=1e6
141            ).values().get(0)
142            
143            # Return image with clear percentage property
144            return image.set('CLEAR_PERCENTAGE', clear_percentage)
145        
146        print("1. Custom Quality Assessment:")
147        # Apply custom filter
148        quality_assessed = collection.map(quality_filter)
149        
150        # Filter by clear percentage
151        high_quality = quality_assessed.filter(
152            ee.Filter.gt('CLEAR_PERCENTAGE', 0.8)
153        )
154        print(f"   High quality images (>80% clear): {high_quality.size().getInfo()}")
155        
156        # Temporal proximity filtering
157        print("\n2. Temporal Proximity Filtering:")
158        target_date = ee.Date('2023-07-15')
159        
160        # Filter images within 30 days of target date
161        temporal_proximity = collection.filter(
162            ee.Filter.And(
163                ee.Filter.gte('system:time_start', target_date.advance(-30, 'day').millis()),
164                ee.Filter.lte('system:time_start', target_date.advance(30, 'day').millis())
165            )
166        )
167        print(f"   Images within 30 days of July 15: {temporal_proximity.size().getInfo()}")
168        
169        # Seasonal filtering
170        print("\n3. Seasonal Filtering:")
171        def get_season_filter(season):
172            """Get filter for specific season."""
173            season_ranges = {
174                'spring': [80, 171],   # March 21 - June 20
175                'summer': [172, 264],  # June 21 - September 21
176                'fall': [265, 354],    # September 22 - December 20
177                'winter': [355, 79]    # December 21 - March 20
178            }
179            
180            if season == 'winter':
181                # Handle year boundary for winter
182                return ee.Filter.Or(
183                    ee.Filter.gte('DAY_OF_YEAR', 355),
184                    ee.Filter.lte('DAY_OF_YEAR', 79)
185                )
186            else:
187                start, end = season_ranges[season]
188                return ee.Filter.And(
189                    ee.Filter.gte('DAY_OF_YEAR', start),
190                    ee.Filter.lte('DAY_OF_YEAR', end)
191                )
192        
193        spring_images = collection.filter(get_season_filter('spring'))
194        summer_images = collection.filter(get_season_filter('summer'))
195        print(f"   Spring images: {spring_images.size().getInfo()}")
196        print(f"   Summer images: {summer_images.size().getInfo()}")
197        
198        return high_quality
199    
200    def multi_sensor_filtering(self):
201        """Demonstrate filtering across multiple sensor collections."""
202        print("\n🛰️ Multi-Sensor Collection Filtering")
203        print("-" * 45)
204        
205        # Define common parameters
206        region = ee.Geometry.Rectangle([-122.5, 37.5, -122.0, 38.0])
207        start_date = '2023-01-01'
208        end_date = '2023-12-31'
209        
210        # Landsat 8
211        landsat8 = (ee.ImageCollection('LANDSAT/LC08/C02/T1_L2')
212                   .filterBounds(region)
213                   .filterDate(start_date, end_date)
214                   .filter(ee.Filter.lt('CLOUD_COVER', 20)))
215        
216        # Landsat 9
217        landsat9 = (ee.ImageCollection('LANDSAT/LC09/C02/T1_L2')
218                   .filterBounds(region)
219                   .filterDate(start_date, end_date)
220                   .filter(ee.Filter.lt('CLOUD_COVER', 20)))
221        
222        # Sentinel-2
223        sentinel2 = (ee.ImageCollection('COPERNICUS/S2_SR_HARMONIZED')
224                    .filterBounds(region)
225                    .filterDate(start_date, end_date)
226                    .filter(ee.Filter.lt('CLOUDY_PIXEL_PERCENTAGE', 20)))
227        
228        print(f"Landsat 8 images: {landsat8.size().getInfo()}")
229        print(f"Landsat 9 images: {landsat9.size().getInfo()}")
230        print(f"Sentinel-2 images: {sentinel2.size().getInfo()}")
231        
232        # Merge collections
233        merged_landsat = landsat8.merge(landsat9)
234        print(f"Merged Landsat: {merged_landsat.size().getInfo()}")
235        
236        # Filter merged collection
237        best_landsat = merged_landsat.filter(ee.Filter.lt('CLOUD_COVER', 10))
238        print(f"Best Landsat images (<10% cloud): {best_landsat.size().getInfo()}")
239        
240        return {
241            'landsat8': landsat8,
242            'landsat9': landsat9,
243            'sentinel2': sentinel2,
244            'merged_landsat': merged_landsat
245        }
246    
247    def temporal_filtering_strategies(self, collection):
248        """Demonstrate various temporal filtering strategies."""
249        print("\n⏰ Temporal Filtering Strategies")
250        print("-" * 40)
251        
252        # Monthly composites
253        print("1. Monthly Filtering:")
254        months = [1, 4, 7, 10]  # Jan, Apr, Jul, Oct
255        monthly_counts = {}
256        
257        for month in months:
258            monthly = collection.filter(ee.Filter.calendarRange(month, month, 'month'))
259            count = monthly.size().getInfo()
260            monthly_counts[month] = count
261            month_name = datetime(2023, month, 1).strftime('%B')
262            print(f"   {month_name}: {count} images")
263        
264        # Annual time series
265        print("\n2. Annual Time Series:")
266        years = [2020, 2021, 2022, 2023]
267        annual_counts = {}
268        
269        for year in years:
270            annual = collection.filter(ee.Filter.calendarRange(year, year, 'year'))
271            count = annual.size().getInfo()
272            annual_counts[year] = count
273            print(f"   {year}: {count} images")
274        
275        # Regular interval filtering
276        print("\n3. Regular Interval Filtering (16-day):")
277        start_date = ee.Date('2023-01-01')
278        
279        def create_16day_periods():
280            """Create 16-day period filters."""
281            periods = []
282            for i in range(0, 365, 16):
283                period_start = start_date.advance(i, 'day')
284                period_end = period_start.advance(16, 'day')
285                periods.append({
286                    'start': period_start,
287                    'end': period_end,
288                    'day': i + 1
289                })
290            return periods
291        
292        periods = create_16day_periods()
293        period_counts = []
294        
295        for i, period in enumerate(periods[:10]):  # Show first 10 periods
296            period_images = collection.filterDate(period['start'], period['end'])
297            count = period_images.size().getInfo()
298            period_counts.append(count)
299            print(f"   Period {i+1} (Day {period['day']}): {count} images")
300        
301        return {
302            'monthly': monthly_counts,
303            'annual': annual_counts,
304            'periods': period_counts
305        }
306    
307    def collection_reduction_methods(self, collection):
308        """Demonstrate collection reduction and compositing methods."""
309        print("\n📉 Collection Reduction Methods")
310        print("-" * 40)
311        
312        # Basic reductions
313        print("1. Basic Statistical Reductions:")
314        
315        # Median composite
316        median_composite = collection.median()
317        print("   ✓ Median composite created")
318        
319        # Mean composite
320        mean_composite = collection.mean()
321        print("   ✓ Mean composite created")
322        
323        # Min/Max composites
324        min_composite = collection.min()
325        max_composite = collection.max()
326        print("   ✓ Min/Max composites created")
327        
328        # Standard deviation
329        std_composite = collection.reduce(ee.Reducer.stdDev())
330        print("   ✓ Standard deviation composite created")
331        
332        # Advanced reductions
333        print("\n2. Advanced Reduction Methods:")
334        
335        # Quality mosaic (best pixel based on cloud score)
336        def add_cloud_score(image):
337            """Add cloud score to image."""
338            cloud_score = ee.Algorithms.Landsat.simpleCloudScore(image)
339            return image.addBands(cloud_score.select('cloud'))
340        
341        scored_collection = collection.map(add_cloud_score)
342        quality_mosaic = scored_collection.qualityMosaic('cloud')
343        print("   ✓ Quality mosaic created (lowest cloud score)")
344        
345        # Temporal percentiles
346        percentile_composite = collection.reduce(
347            ee.Reducer.percentile([10, 25, 50, 75, 90])
348        )
349        print("   ✓ Percentile composite created")
350        
351        # Custom reduction
352        def custom_reduction(collection):
353            """Custom reduction combining multiple statistics."""
354            return collection.reduce(
355                ee.Reducer.median()
356                .combine(ee.Reducer.stdDev(), sharedInputs=True)
357                .combine(ee.Reducer.count(), sharedInputs=True)
358            )
359        
360        custom_composite = custom_reduction(collection)
361        print("   ✓ Custom composite (median + stddev + count)")
362        
363        return {
364            'median': median_composite,
365            'mean': mean_composite,
366            'quality': quality_mosaic,
367            'percentiles': percentile_composite,
368            'custom': custom_composite
369        }
370    
371    def analyze_collection_temporal_distribution(self, collection):
372        """Analyze temporal distribution of image collection."""
373        print("\n📅 Temporal Distribution Analysis")
374        print("-" * 40)
375        
376        # Get image dates
377        def get_image_date(image):
378            """Extract date from image."""
379            return ee.Feature(None, {
380                'date': image.date(),
381                'timestamp': image.get('system:time_start'),
382                'cloud_cover': image.get('CLOUD_COVER')
383            })
384        
385        # Extract dates
386        dates_collection = collection.map(get_image_date)
387        dates_list = dates_collection.getInfo()['features']
388        
389        # Process dates
390        dates_data = []
391        for feature in dates_list:
392            props = feature['properties']
393            timestamp = props['timestamp']
394            date = datetime.fromtimestamp(timestamp / 1000)
395            
396            dates_data.append({
397                'date': date,
398                'year': date.year,
399                'month': date.month,
400                'day_of_year': date.timetuple().tm_yday,
401                'cloud_cover': props.get('cloud_cover', 0)
402            })
403        
404        df = pd.DataFrame(dates_data)
405        
406        # Temporal statistics
407        print(f"Total images: {len(df)}")
408        print(f"Date range: {df['date'].min()} to {df['date'].max()}")
409        print(f"Average cloud cover: {df['cloud_cover'].mean():.1f}%")
410        
411        # Monthly distribution
412        monthly_dist = df.groupby('month').size()
413        print(f"\nMonthly distribution:")
414        for month, count in monthly_dist.items():
415            month_name = datetime(2023, month, 1).strftime('%B')
416            print(f"   {month_name}: {count} images")
417        
418        # Yearly distribution
419        if len(df['year'].unique()) > 1:
420            yearly_dist = df.groupby('year').size()
421            print(f"\nYearly distribution:")
422            for year, count in yearly_dist.items():
423                print(f"   {year}: {count} images")
424        
425        return df
426
427def main():
428    """Main function demonstrating image collection filtering."""
429    
430    # Initialize filter system
431    filter_system = ImageCollectionFilter('your-project-id')
432    
433    print("="*60)
434    print("🔍 EARTH ENGINE IMAGE COLLECTION FILTERING GUIDE")
435    print("="*60)
436    
437    # Step 1: Basic filtering
438    basic_collection = filter_system.basic_filtering_examples()
439    
440    # Step 2: Metadata filtering
441    metadata_filtered = filter_system.metadata_filtering(basic_collection)
442    
443    # Step 3: Advanced filtering techniques
444    advanced_filtered = filter_system.advanced_filtering_techniques(metadata_filtered)
445    
446    # Step 4: Multi-sensor filtering
447    multi_sensor_collections = filter_system.multi_sensor_filtering()
448    
449    # Step 5: Temporal filtering strategies
450    temporal_stats = filter_system.temporal_filtering_strategies(advanced_filtered)
451    
452    # Step 6: Collection reduction methods
453    composites = filter_system.collection_reduction_methods(advanced_filtered)
454    
455    # Step 7: Temporal distribution analysis
456    temporal_df = filter_system.analyze_collection_temporal_distribution(advanced_filtered)
457    
458    # Summary
459    print("\n" + "="*60)
460    print("📊 FILTERING SUMMARY")
461    print("="*60)
462    
463    print("\n🎯 Key Filtering Techniques Demonstrated:")
464    print("• Basic temporal and spatial filtering")
465    print("• Metadata-based quality filtering")
466    print("• Custom filter functions")
467    print("• Multi-sensor collection handling")
468    print("• Temporal pattern analysis")
469    print("• Collection reduction and compositing")
470    
471    print("\n📈 Collection Statistics:")
472    print(f"• Final filtered collection: {advanced_filtered.size().getInfo()} images")
473    print(f"• Temporal range: {len(temporal_df)} images analyzed")
474    print(f"• Average cloud cover: {temporal_df['cloud_cover'].mean():.1f}%")
475    
476    print("\n🏆 Best Practices Applied:")
477    print("• Combine multiple filtering criteria")
478    print("• Use quality assessment metrics")
479    print("• Consider temporal distribution")
480    print("• Apply appropriate reduction methods")
481    print("• Validate filter effectiveness")
482    
483    print("\n✅ Image Collection Filtering Guide Complete!")
484
485if __name__ == "__main__":
486    main()