AutoML
Train machine learning models automatically using AutoGluon integration.
Overview
Strongly's AutoML automatically:
- Selects the best algorithms for your data
- Performs hyperparameter tuning
- Handles feature engineering
- Produces a ranked leaderboard of models
Basic Usage
import pandas as pd
from strongly_python.mlops import automl
# Load your data
df = pd.read_csv("data.csv")
# Create AutoML job
job = automl.create_job(
name="my-model",
data=df,
target_column="label",
problem_type="binary",
time_limit=300 # 5 minutes
)
# Wait for completion
job.wait()
# View results
print(job.get_leaderboard())
Creating Jobs
From DataFrame
Upload data directly from a pandas DataFrame:
import pandas as pd
from strongly_python.mlops import automl
df = pd.DataFrame({
"feature1": [1.0, 2.0, 3.0, 4.0, 5.0],
"feature2": [0.1, 0.2, 0.3, 0.4, 0.5],
"target": [0, 0, 1, 1, 1]
})
job = automl.create_job(
name="churn-prediction",
data=df,
target_column="target",
problem_type="binary",
time_limit=300
)
From Volume Path
Use data stored in your workspace volume:
job = automl.create_job(
name="sales-forecast",
volume_path="/project/data/sales.csv",
target_column="revenue",
problem_type="regression",
time_limit=600
)
Problem Types
| Type | Description | Use Case |
|---|---|---|
binary | Binary classification | Yes/No, True/False predictions |
multiclass | Multi-class classification | Category prediction |
regression | Continuous value prediction | Price, quantity forecasting |
# Binary classification
job = automl.create_job(
name="churn-model",
data=df,
target_column="churned",
problem_type="binary"
)
# Multi-class classification
job = automl.create_job(
name="category-model",
data=df,
target_column="category",
problem_type="multiclass"
)
# Regression
job = automl.create_job(
name="price-model",
data=df,
target_column="price",
problem_type="regression"
)
Presets
Control the quality/speed trade-off with presets:
| Preset | Description |
|---|---|
best_quality | Maximum accuracy, longest training |
high_quality | High accuracy, moderate training time |
medium_quality | Balanced accuracy and speed |
optimize_for_deployment | Fast inference, smaller models |
# For highest accuracy
job = automl.create_job(
name="production-model",
data=df,
target_column="label",
problem_type="binary",
time_limit=3600,
preset="best_quality"
)
# For fast deployment
job = automl.create_job(
name="edge-model",
data=df,
target_column="label",
problem_type="binary",
time_limit=300,
preset="optimize_for_deployment"
)
Job Management
Monitoring Progress
# Check status
print(f"Status: {job.status}")
print(f"Job ID: {job.job_id}")
# Wait for completion
job.wait()
# Or poll manually
import time
while job.status == "running":
print(f"Status: {job.status}")
time.sleep(30)
job.refresh()
Getting Results
# Get model leaderboard
leaderboard = job.get_leaderboard()
for i, entry in enumerate(leaderboard[:5]):
print(f"{i+1}. {entry.model_name}: score={entry.score:.4f}")
# Get best model info
best = job.get_best_model()
print(f"Best Model: {best.model_name}")
print(f"Score: {best.score:.4f}")
# Download model for deployment
model_path = job.download_model()
print(f"Model saved to: {model_path}")
Complete Example
import pandas as pd
from strongly_python.mlops import automl
def main():
# Create sample dataset
print("Creating sample dataset...")
df = pd.DataFrame({
"feature1": [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0],
"feature2": [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0],
"feature3": [10, 20, 30, 40, 50, 60, 70, 80, 90, 100],
"target": [0, 0, 0, 0, 1, 1, 1, 1, 1, 1]
})
# Create AutoML job
print("Starting AutoML job...")
job = automl.create_job(
name="churn-prediction-model",
data=df,
target_column="target",
problem_type="binary",
time_limit=300,
preset="medium_quality"
)
print(f"Job ID: {job.job_id}")
print(f"Status: {job.status}")
# Wait for completion
print("\nWaiting for AutoML to complete...")
job.wait()
# Get results
print("\n--- AutoML Results ---")
print(f"Status: {job.status}")
# View leaderboard
print("\n--- Model Leaderboard ---")
leaderboard = job.get_leaderboard()
for i, entry in enumerate(leaderboard[:5]):
print(f"{i+1}. {entry.model_name}: score={entry.score:.4f}")
# Get best model
best = job.get_best_model()
print(f"\nBest Model: {best.model_name}")
print(f"Score: {best.score:.4f}")
# Download model
print("\nDownloading model...")
model_path = job.download_model()
print(f"Model saved to: {model_path}")
if __name__ == "__main__":
main()
API Reference
Job Creation
automl.create_job(
name: str, # Job name
data: pd.DataFrame = None, # Data as DataFrame
volume_path: str = None, # Or path to CSV in volume
target_column: str, # Target column name
problem_type: str, # binary, multiclass, regression
time_limit: int = 300, # Training time in seconds
preset: str = "medium_quality" # Quality preset
)
Job Object
| Method/Property | Description |
|---|---|
job.job_id | Unique job identifier |
job.status | Current status |
job.wait() | Block until completion |
job.refresh() | Update status |
job.get_leaderboard() | Get ranked models |
job.get_best_model() | Get best model info |
job.download_model() | Download model artifact |
Job Status Values
| Status | Description |
|---|---|
pending | Job queued |
running | Training in progress |
completed | Training finished |
failed | Training failed |