Black Box Model Integration Guide
Overview
The Black Box Model Builder provides a flexible system for wrapping arbitrary functions into Atlas-compatible models. This guide covers common usage patterns and best practices.
Key Components
1. BlackBoxModelConfig
Defines the complete specification of your model including:
Required and optional data variables
Dimensions (time, geography, etc.)
Output specifications
Optimization layer mappings
Metadata and preprocessing steps
2. BlackBoxModelConfigBuilder
A fluent API for building configurations:
config = (BlackBoxModelConfigBuilder()
.model_info(name="my_model", version="1.0.0")
.add_required_var("spend", DataVarType.CONTINUOUS)
.add_dimension("time")
.add_output("revenue", dims=["time"])
.build()
)
3. BlackBoxModel
The actual model wrapper that:
Validates inputs against configuration
Applies preprocessing
Calls your prediction function
Calculates contributions
Quick Start Examples
Example 1: Simple Linear Model
import xarray as xr
import numpy as np
from atlas import create_simple_blackbox_model
# Define prediction function
def linear_predict(data: xr.Dataset) -> xr.DataArray:
return xr.DataArray(
0.5 * data["tv_spend"].values + 0.3 * data["digital_spend"].values,
dims=data["tv_spend"].dims,
name="revenue"
)
# Create model with minimal configuration
model = create_simple_blackbox_model(
predict_fn=linear_predict,
input_vars=["tv_spend", "digital_spend"],
output_name="revenue",
dimensions=["time"]
)
# Use with optimizer
from atlas import OptimizerFactory
optimizer = OptimizerFactory.create("scipy", model=model)
result = optimizer.optimize(initial_budget, constraints)
Example 2: Marketing Mix Model with Saturation
from atlas import (
BlackBoxModelBuilder,
BlackBoxModelConfigBuilder,
DataVarType,
OptimizationLayer
)
def mmm_with_saturation(data: xr.Dataset) -> xr.DataArray:
"""Marketing mix model with diminishing returns."""
# Hill saturation function
def hill_transform(x, alpha=2.5, gamma=0.5):
return x**alpha / (x**alpha + gamma**alpha)
# Apply saturation to each channel
tv_effect = hill_transform(data["tv_spend"].values) * 0.4
digital_effect = hill_transform(data["digital_spend"].values) * 0.6
# Add base sales
base_sales = 1000
revenue = base_sales + tv_effect + digital_effect
return xr.DataArray(revenue, dims=data["tv_spend"].dims, name="revenue")
# Build comprehensive configuration
config = (BlackBoxModelConfigBuilder()
.model_info(
name="mmm_saturation",
version="1.0.0",
description="Marketing mix model with Hill saturation",
author="Data Science Team"
)
# Define variables with bounds and metadata
.add_required_var(
"tv_spend",
DataVarType.CONTINUOUS,
bounds=(0, 500_000),
units="USD",
optimization_layer=OptimizationLayer.BUDGET_ALLOCATION,
description="Television advertising spend"
)
.add_required_var(
"digital_spend",
DataVarType.CONTINUOUS,
bounds=(0, 500_000),
units="USD",
optimization_layer=OptimizationLayer.BUDGET_ALLOCATION,
description="Digital advertising spend"
)
# Add dimensions
.add_dimension("week", required=True, coordinate_type="time")
# Define output
.add_output(
"revenue",
dims=["week"],
units="USD",
aggregation_method="sum"
)
# Map optimization layers
.map_budget_allocation(["tv_spend", "digital_spend"])
# Add metadata
.add_metadata("saturation_alpha", 2.5)
.add_metadata("saturation_gamma", 0.5)
.build()
)
# Create model
model = BlackBoxModel(mmm_with_saturation, config)
Example 3: Multi-Geography Model with Constraints
def geo_aware_model(data: xr.Dataset) -> xr.DataArray:
"""Model with geography-specific effectiveness and constraints."""
# Geography-specific multipliers
geo_effectiveness = {
"US": {"digital": 1.2, "tv": 1.0},
"UK": {"digital": 1.0, "tv": 1.1},
"DE": {"digital": 0.9, "tv": 0.8}
}
# Calculate revenue by geography and channel
revenue = xr.zeros_like(data["digital_spend"])
for geo in data["geography"].values:
geo_idx = data["geography"] == geo
digital_mult = geo_effectiveness.get(str(geo), {}).get("digital", 1.0)
tv_mult = geo_effectiveness.get(str(geo), {}).get("tv", 1.0)
# Apply market size constraints if available
if "market_size" in data:
market_cap = data["market_size"].sel(geography=geo).values
else:
market_cap = float('inf')
geo_revenue = (
data["digital_spend"].sel(geography=geo) * digital_mult * 0.15 +
data["tv_spend"].sel(geography=geo) * tv_mult * 0.10
)
# Cap by market size
revenue.loc[{"geography": geo}] = np.minimum(geo_revenue, market_cap)
return revenue.rename("revenue")
# Configuration with optional constraint variable
config = (BlackBoxModelConfigBuilder()
.model_info("geo_model", "1.0.0")
# Budget variables
.add_required_var(
"digital_spend",
optimization_layer=OptimizationLayer.BUDGET_ALLOCATION
)
.add_required_var(
"tv_spend",
optimization_layer=OptimizationLayer.BUDGET_ALLOCATION
)
# Optional constraint
.add_optional_var(
"market_size",
DataVarType.CONTINUOUS,
default_value=float('inf'),
optimization_layer=OptimizationLayer.CONSTRAINT_LAYER,
description="Maximum market potential by geography"
)
# Multiple dimensions
.add_dimension("geography", required=True, coordinate_type="geography")
.add_dimension("time", required=True, coordinate_type="time")
# Output
.add_output("revenue", dims=["geography", "time"])
.build()
)
model = BlackBoxModel(geo_aware_model, config)
Advanced Features
Custom Contribution Calculation
By default, the black box model calculates proportional contributions. You can provide a custom contribution function:
def custom_contributions(data: xr.Dataset) -> xr.Dataset:
"""Calculate marginal contributions using finite differences."""
predictions = model.predict(data)
contributions = xr.Dataset()
# Calculate marginal contribution for each variable
for var in ["tv_spend", "digital_spend"]:
# Create perturbed data
delta = 1000 # $1000 change
data_plus = data.copy()
data_plus[var] = data[var] + delta
# Calculate marginal effect
pred_plus = model.predict(data_plus)
marginal = (pred_plus - predictions) / delta
contributions[f"{var}_marginal"] = marginal
contributions[f"{var}_total"] = marginal * data[var]
return contributions
# Use with model
model = BlackBoxModel(
predict_fn=mmm_with_saturation,
config=config,
contribution_fn=custom_contributions
)
Preprocessing and Transformations
Add preprocessing steps to your configuration:
config = (BlackBoxModelConfigBuilder()
# ... other configuration ...
# Add variable-level transformations
.add_required_var(
"price",
transformation="log", # Will apply log(x + 1e-8)
description="Log-transformed price"
)
# Add custom preprocessing steps
.add_preprocessing({
"type": "scale",
"factor": 1000,
"variables": ["spend_in_thousands"]
})
.add_preprocessing({
"type": "normalize",
"method": "z-score",
"variables": ["temperature", "humidity"]
})
.build()
)
Working with Time Series
Handle time-based features and carryover effects:
def time_series_model(data: xr.Dataset) -> xr.DataArray:
"""Model with time-based features."""
# Extract time features
time_coords = pd.to_datetime(data["time"].values)
# Seasonal factors
month = time_coords.month
seasonal_factor = 1 + 0.2 * np.sin(2 * np.pi * month / 12)
# Trend component
days_since_start = (time_coords - time_coords[0]).days
trend = 1 + 0.001 * days_since_start
# Adstock transformation for carryover
def adstock(x, decay=0.7):
result = np.zeros_like(x)
result[0] = x[0]
for i in range(1, len(x)):
result[i] = x[i] + decay * result[i-1]
return result
# Apply transformations
tv_adstock = adstock(data["tv_spend"].values)
# Calculate revenue
revenue = (
1000 * trend * seasonal_factor + # Base with trend and seasonality
0.1 * tv_adstock # Media effect with carryover
)
return xr.DataArray(revenue, dims=["time"], name="revenue")
# Configuration
config = (BlackBoxModelConfigBuilder()
.model_info("time_series_model", "1.0.0")
.add_required_var("tv_spend", optimization_layer=OptimizationLayer.BUDGET_ALLOCATION)
.add_dimension("time", required=True, coordinate_type="time")
.add_output("revenue", dims=["time"])
.add_metadata("adstock_decay", 0.7)
.add_metadata("has_seasonality", True)
.add_metadata("has_trend", True)
.build()
)
Integration with Atlas Optimization
Using with Different Optimizers
# Create model
model = BlackBoxModel(predict_fn, config)
# Use with SciPy optimizer
scipy_optimizer = OptimizerFactory.create("scipy", model=model)
scipy_result = scipy_optimizer.optimize(initial_budget, constraints)
# Use with Optuna optimizer for black-box optimization
optuna_optimizer = OptimizerFactory.create(
"optuna",
model=model,
config={
"n_trials": 1000,
"sampler": "TPE",
"n_jobs": -1
}
)
optuna_result = optuna_optimizer.optimize(initial_budget, constraints)
Multi-Objective Optimization
# Create multiple models for different objectives
revenue_model = BlackBoxModel(revenue_predict_fn, revenue_config)
awareness_model = BlackBoxModel(awareness_predict_fn, awareness_config)
# Use multi-objective optimizer
from atlas.optimizers import MultiObjectiveOptimizer
multi_optimizer = MultiObjectiveOptimizer(
models={"revenue": revenue_model, "awareness": awareness_model},
weights={"revenue": 0.7, "awareness": 0.3}
)
pareto_results = multi_optimizer.optimize(initial_budget, constraints)
Best Practices
1. Validate Your Functions
Always test your prediction function with sample data before wrapping:
# Test function directly
test_data = xr.Dataset({...})
test_output = my_predict_fn(test_data)
assert isinstance(test_output, xr.DataArray)
2. Use Meaningful Variable Names
Choose descriptive names that match your business domain:
.add_required_var("tv_grp", description="Television Gross Rating Points")
.add_required_var("digital_impressions", description="Digital ad impressions")
3. Document Metadata
Include important model information:
.add_metadata("training_period", "2023-01-01 to 2023-12-31")
.add_metadata("model_assumptions", ["linear additivity", "no interaction effects"])
.add_metadata("data_sources", ["Nielsen", "Google Analytics"])
4. Set Reasonable Bounds
Define realistic bounds for optimization:
.add_required_var(
"spend",
bounds=(0, 1_000_000), # Realistic budget constraints
description="Channel spend in USD"
)
5. Save and Version Configurations
# Save configuration
config.save("models/configs/mmm_v2.1.0.yaml")
# Load and reuse
loaded_config = BlackBoxModelConfig.load("models/configs/mmm_v2.1.0.yaml")
model = BlackBoxModel(predict_fn, loaded_config)
Troubleshooting
Common Issues
Missing Variables Error
# Error: Missing required variables: {'var1', 'var2'} # Solution: Ensure all required variables are in your input dataset
Dimension Mismatch
# Error: Missing required dimensions: {'time'} # Solution: Add required dimensions to your data data = data.expand_dims({"time": [0]})
Output Shape Issues
# Ensure output has correct dimensions return xr.DataArray( predictions, dims=data[input_var].dims, # Match input dimensions name=output_name )
Next Steps
Start with the
create_simple_blackbox_modelfunction for quick prototypesBuild comprehensive configurations for production models
Integrate with Atlas optimizers for budget optimization
Use the configuration system to document and version your models
Implement custom contribution functions for better interpretability
For more examples, see the test file test_blackbox_model_builder.py which includes additional patterns and use cases.