Enterprise Integration Guide for Staggered Selection Utilities
This guide provides detailed instructions for integrating the staggered selection utilities with Enterprise for pulsar timing analysis.
Table of Contents
Overview
The staggered selection utilities are designed to work seamlessly with Enterprise’s Selection class, providing a modern, well-documented API for creating selection functions. This guide covers everything from basic integration to advanced model building scenarios.
Key Benefits
Enterprise Compatibility: Full compatibility with
enterprise.signals.selections.SelectionHierarchical Selection: Support for staggered selection with automatic fallback
Frequency Filtering: Optional frequency band filtering
Type Safety: Complete type hints for better IDE support
Performance: Efficient numpy-based operations
Basic Integration
Simple Selection
from enterprise.signals.selections import Selection
from enterprise.signals import white_signals
from metapulsar.selection_utils import create_staggered_selection
# Create a simple group-based selection
group_sel = create_staggered_selection("efac", {"group": None})
# Wrap with Enterprise Selection
efac_selection = Selection(group_sel)
# Use in Enterprise model
white_signal = white_signals.MeasurementNoise(
efac=efac_selection,
log10_efac=Uniform(-10, 10)
)
Staggered Selection
# Create staggered selection with fallback
staggered_sel = create_staggered_selection("ecorr", {("group", "f"): None})
# Wrap with Enterprise Selection
ecorr_selection = Selection(staggered_sel)
# Use in Enterprise model
white_signal = white_signals.MeasurementNoise(
ecorr=ecorr_selection,
log10_ecorr=Uniform(-10, 10)
)
Frequency Band Selection
# Create frequency band selection
band_sel = create_staggered_selection(
"band",
{"group": None},
freq_range=(400, 1000) # 400-1000 MHz
)
# Wrap with Enterprise Selection
band_selection = Selection(band_sel)
# Use in Enterprise model
white_signal = white_signals.MeasurementNoise(
efac=band_selection,
log10_efac=Uniform(-10, 10)
)
Advanced Usage Patterns
Multiple Selection Criteria
# Create multiple selections for different purposes
efac_sel = create_staggered_selection("efac", {"group": None})
ecorr_sel = create_staggered_selection("ecorr", {("group", "f"): None})
band_sel = create_staggered_selection("band", {"group": None}, freq_range=(400, 1000))
# Wrap with Enterprise Selections
efac_selection = Selection(efac_sel)
ecorr_selection = Selection(ecorr_sel)
band_selection = Selection(band_sel)
# Use in Enterprise model
white_signal = white_signals.MeasurementNoise(
efac=efac_selection,
ecorr=ecorr_selection,
log10_efac=Uniform(-10, 10),
log10_ecorr=Uniform(-10, 10)
)
PTA-Specific Selections
# Create PTA-specific selections
def create_pta_selections():
"""Create selections specific to each PTA."""
# EPTA-specific selection
epta_sel = create_staggered_selection("efac", {"pta": "EPTA"})
# PPTA-specific selection with fallback
ppta_sel = create_staggered_selection("efac", {("pta", "group"): "PPTA"})
# NANOGrav-specific selection
nanograv_sel = create_staggered_selection("efac", {"pta": "NANOGrav"})
return {
'EPTA': Selection(epta_sel),
'PPTA': Selection(ppta_sel),
'NANOGrav': Selection(nanograv_sel)
}
# Use in model
pta_selections = create_pta_selections()
white_signal = white_signals.MeasurementNoise(
efac=pta_selections['EPTA'],
log10_efac=Uniform(-10, 10)
)
Complex Staggered Selection
# Create complex staggered selection with multiple fallbacks
complex_sel = create_staggered_selection("efac", {
("group", "f", "B"): None, # Triple fallback
"pta": "EPTA" # PTA-specific
})
# Wrap with Enterprise Selection
complex_selection = Selection(complex_sel)
# Use in Enterprise model
white_signal = white_signals.MeasurementNoise(
efac=complex_selection,
log10_efac=Uniform(-10, 10)
)
Model Building Examples
Basic Pulsar Model
from enterprise.pulsar import Pulsar
from enterprise.signals import white_signals, red_signals
from enterprise.models import model_singlepsr_noise
from metapulsar.selection_utils import create_staggered_selection
# Load pulsar data
psr = Pulsar(parfile, timfile)
# Create selections
efac_sel = create_staggered_selection("efac", {"group": None})
ecorr_sel = create_staggered_selection("ecorr", {("group", "f"): None})
# Create white noise signal
white_signal = white_signals.MeasurementNoise(
efac=Selection(efac_sel),
ecorr=Selection(ecorr_sel),
log10_efac=Uniform(-10, 10),
log10_ecorr=Uniform(-10, 10)
)
# Create red noise signal
red_signal = red_signals.RedNoise(
log10_A=Uniform(-20, -11),
gamma=Uniform(0, 7)
)
# Build model
model = model_singlepsr_noise(psr, white_signal, red_signal)
Multi-PTA Model
from enterprise.pulsar import Pulsar
from enterprise.signals import white_signals, red_signals, gp_signals
from enterprise.models import model_singlepsr_noise
from metapulsar.selection_utils import create_staggered_selection
def create_multi_pta_model(psr):
"""Create a model with multi-PTA selections."""
# Create PTA-specific selections
pta_efac_sel = create_staggered_selection("efac", {"pta": None})
pta_ecorr_sel = create_staggered_selection("ecorr", {("pta", "group"): None})
# Create backend-specific selections
backend_sel = create_staggered_selection("efac", {("backend", "group"): None})
# Create frequency band selections
low_band_sel = create_staggered_selection(
"band_low", {"group": None}, freq_range=(100, 500)
)
high_band_sel = create_staggered_selection(
"band_high", {"group": None}, freq_range=(1000, 2000)
)
# Create white noise signal
white_signal = white_signals.MeasurementNoise(
efac=Selection(pta_efac_sel),
ecorr=Selection(pta_ecorr_sel),
log10_efac=Uniform(-10, 10),
log10_ecorr=Uniform(-10, 10)
)
# Create red noise signal
red_signal = red_signals.RedNoise(
log10_A=Uniform(-20, -11),
gamma=Uniform(0, 7)
)
# Create GWB signal
gwb_signal = gp_signals.FourierBasisGP(
log10_A=Uniform(-18, -14),
gamma=Uniform(4, 5)
)
# Build model
model = model_singlepsr_noise(psr, white_signal, red_signal, gwb_signal)
return model
# Use the model
psr = Pulsar(parfile, timfile)
model = create_multi_pta_model(psr)
Custom Selection Factory
class SelectionFactory:
"""Factory for creating standardized selections."""
@staticmethod
def create_efac_selection(flag_criteria=None, freq_range=None):
"""Create standardized EFAC selection."""
if flag_criteria is None:
flag_criteria = {"group": None}
sel_func = create_staggered_selection("efac", flag_criteria, freq_range)
return Selection(sel_func)
@staticmethod
def create_ecorr_selection(flag_criteria=None, freq_range=None):
"""Create standardized ECORR selection."""
if flag_criteria is None:
flag_criteria = {("group", "f"): None}
sel_func = create_staggered_selection("ecorr", flag_criteria, freq_range)
return Selection(sel_func)
@staticmethod
def create_band_selection(band_name, freq_range, flag_criteria=None):
"""Create frequency band selection."""
if flag_criteria is None:
flag_criteria = {"group": None}
sel_func = create_staggered_selection(band_name, flag_criteria, freq_range)
return Selection(sel_func)
# Use the factory
factory = SelectionFactory()
# Create standardized selections
efac_sel = factory.create_efac_selection()
ecorr_sel = factory.create_ecorr_selection()
low_band_sel = factory.create_band_selection("low_band", (100, 500))
high_band_sel = factory.create_band_selection("high_band", (1000, 2000))
# Use in model
white_signal = white_signals.MeasurementNoise(
efac=efac_sel,
ecorr=ecorr_sel,
log10_efac=Uniform(-10, 10),
log10_ecorr=Uniform(-10, 10)
)
Performance Optimization
Memory Optimization
# For large datasets, consider chunked processing
def process_large_dataset(psr, chunk_size=10000):
"""Process large dataset in chunks."""
n_toas = len(psr.toas)
selections = []
for i in range(0, n_toas, chunk_size):
end_idx = min(i + chunk_size, n_toas)
# Create chunk-specific selections
chunk_sel = create_staggered_selection("efac", {"group": None})
selections.append(Selection(chunk_sel))
return selections
Computational Optimization
# Pre-compute selections for repeated use
class CachedSelectionFactory:
"""Factory with caching for repeated selections."""
def __init__(self):
self._cache = {}
def get_selection(self, name, flag_criteria, freq_range=None):
"""Get cached selection or create new one."""
cache_key = (name, str(flag_criteria), freq_range)
if cache_key not in self._cache:
sel_func = create_staggered_selection(name, flag_criteria, freq_range)
self._cache[cache_key] = Selection(sel_func)
return self._cache[cache_key]
# Use cached factory
factory = CachedSelectionFactory()
efac_sel = factory.get_selection("efac", {"group": None})
Batch Processing
# Process multiple pulsars efficiently
def process_multiple_pulsars(pulsar_list):
"""Process multiple pulsars with shared selections."""
# Create shared selections
efac_sel = create_staggered_selection("efac", {"group": None})
ecorr_sel = create_staggered_selection("ecorr", {("group", "f"): None})
# Wrap with Enterprise Selections
efac_selection = Selection(efac_sel)
ecorr_selection = Selection(ecorr_sel)
# Create models for all pulsars
models = []
for psr in pulsar_list:
white_signal = white_signals.MeasurementNoise(
efac=efac_selection,
ecorr=ecorr_selection,
log10_efac=Uniform(-10, 10),
log10_ecorr=Uniform(-10, 10)
)
model = model_singlepsr_noise(psr, white_signal)
models.append(model)
return models
Troubleshooting
Common Issues
1. Selection Not Working
Problem: Selection returns empty results
result = sel_func(flags, freqs) # Returns: {}
Debugging:
# Check flag values
print(f"Available flags: {list(flags.keys())}")
for flag_name, values in flags.items():
print(f"{flag_name}: {np.unique(values)}")
# Check frequency range
print(f"Frequency range: {freqs.min():.1f} - {freqs.max():.1f}")
# Test with simple criteria
simple_sel = create_staggered_selection("test", {"group": None})
result = simple_sel(flags, freqs)
print(f"Simple selection result: {result}")
2. Enterprise Integration Issues
Problem: Selection doesn’t work with Enterprise
# TypeError: selection_function() missing 1 required positional argument
Solution:
# Ensure correct function signature
def test_selection_function(flags, freqs):
"""Test function with correct signature."""
return sel_func(flags, freqs)
# Test with Enterprise
selection = Selection(test_selection_function)
3. Performance Issues
Problem: Slow selection performance
# Selection takes too long
Solutions:
# Use specific values instead of None
specific_sel = create_staggered_selection("efac", {"group": "ASP_430"})
# Pre-filter data
filtered_freqs = freqs[(freqs >= 400) & (freqs < 1000)]
filtered_flags = {k: v[(freqs >= 400) & (freqs < 1000)] for k, v in flags.items()}
# Use cached selections
factory = CachedSelectionFactory()
efac_sel = factory.get_selection("efac", {"group": None})
Debugging Tools
def debug_selection(sel_func, flags, freqs):
"""Debug selection function."""
print("=== Selection Debug Info ===")
print(f"Flags: {list(flags.keys())}")
print(f"Frequencies: {len(freqs)} TOAs")
print(f"Frequency range: {freqs.min():.1f} - {freqs.max():.1f}")
for flag_name, values in flags.items():
unique_values = np.unique(values)
print(f"{flag_name}: {len(unique_values)} unique values: {unique_values}")
result = sel_func(flags, freqs)
print(f"Result: {len(result)} selections")
for key, mask in result.items():
print(f" {key}: {mask.sum()} TOAs selected")
return result
# Use debug function
result = debug_selection(sel_func, flags, freqs)
Best Practices
1. Selection Design
Use realistic flag names: Use actual flag names from your data
Test with real data: Always test selections with your actual pulsar data
Document selections: Document the purpose and behavior of each selection
Use fallback wisely: Use staggered selection for robust multi-PTA analysis
2. Performance
Minimize flag criteria: Use only necessary flag criteria
Pre-filter data: Apply frequency filtering early when possible
Cache selections: Cache frequently used selections
Batch processing: Process multiple pulsars in batches
3. Error Handling
Validate inputs: Check flag names and values before creating selections
Handle edge cases: Test with empty data, missing flags, etc.
Provide feedback: Give clear error messages for common issues
4. Integration
Test Enterprise compatibility: Always test with Enterprise Selection class
Use type hints: Leverage type hints for better IDE support
Follow conventions: Follow Enterprise naming conventions
5. Documentation
Document selections: Document the purpose and behavior of each selection
Provide examples: Include realistic usage examples
Update documentation: Keep documentation up to date with code changes
Conclusion
The staggered selection utilities provide a powerful and flexible API for creating Enterprise-compatible selection functions. By following this guide, you can effectively integrate them into your pulsar timing analysis workflows while maintaining good performance and reliability.
For more information, see: