Skip to main content

AutoML

Train machine learning models automatically using AutoGluon integration.

Overview

Strongly's AutoML automatically:

  • Selects the best algorithms for your data
  • Performs hyperparameter tuning
  • Handles feature engineering
  • Produces a ranked leaderboard of models

Basic Usage

import pandas as pd
from strongly_python.mlops import automl

# Load your data
df = pd.read_csv("data.csv")

# Create AutoML job
job = automl.create_job(
name="my-model",
data=df,
target_column="label",
problem_type="binary",
time_limit=300 # 5 minutes
)

# Wait for completion
job.wait()

# View results
print(job.get_leaderboard())

Creating Jobs

From DataFrame

Upload data directly from a pandas DataFrame:

import pandas as pd
from strongly_python.mlops import automl

df = pd.DataFrame({
"feature1": [1.0, 2.0, 3.0, 4.0, 5.0],
"feature2": [0.1, 0.2, 0.3, 0.4, 0.5],
"target": [0, 0, 1, 1, 1]
})

job = automl.create_job(
name="churn-prediction",
data=df,
target_column="target",
problem_type="binary",
time_limit=300
)

From Volume Path

Use data stored in your workspace volume:

job = automl.create_job(
name="sales-forecast",
volume_path="/project/data/sales.csv",
target_column="revenue",
problem_type="regression",
time_limit=600
)

Problem Types

TypeDescriptionUse Case
binaryBinary classificationYes/No, True/False predictions
multiclassMulti-class classificationCategory prediction
regressionContinuous value predictionPrice, quantity forecasting
# Binary classification
job = automl.create_job(
name="churn-model",
data=df,
target_column="churned",
problem_type="binary"
)

# Multi-class classification
job = automl.create_job(
name="category-model",
data=df,
target_column="category",
problem_type="multiclass"
)

# Regression
job = automl.create_job(
name="price-model",
data=df,
target_column="price",
problem_type="regression"
)

Presets

Control the quality/speed trade-off with presets:

PresetDescription
best_qualityMaximum accuracy, longest training
high_qualityHigh accuracy, moderate training time
medium_qualityBalanced accuracy and speed
optimize_for_deploymentFast inference, smaller models
# For highest accuracy
job = automl.create_job(
name="production-model",
data=df,
target_column="label",
problem_type="binary",
time_limit=3600,
preset="best_quality"
)

# For fast deployment
job = automl.create_job(
name="edge-model",
data=df,
target_column="label",
problem_type="binary",
time_limit=300,
preset="optimize_for_deployment"
)

Job Management

Monitoring Progress

# Check status
print(f"Status: {job.status}")
print(f"Job ID: {job.job_id}")

# Wait for completion
job.wait()

# Or poll manually
import time
while job.status == "running":
print(f"Status: {job.status}")
time.sleep(30)
job.refresh()

Getting Results

# Get model leaderboard
leaderboard = job.get_leaderboard()
for i, entry in enumerate(leaderboard[:5]):
print(f"{i+1}. {entry.model_name}: score={entry.score:.4f}")

# Get best model info
best = job.get_best_model()
print(f"Best Model: {best.model_name}")
print(f"Score: {best.score:.4f}")

# Download model for deployment
model_path = job.download_model()
print(f"Model saved to: {model_path}")

Complete Example

import pandas as pd
from strongly_python.mlops import automl

def main():
# Create sample dataset
print("Creating sample dataset...")
df = pd.DataFrame({
"feature1": [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0],
"feature2": [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0],
"feature3": [10, 20, 30, 40, 50, 60, 70, 80, 90, 100],
"target": [0, 0, 0, 0, 1, 1, 1, 1, 1, 1]
})

# Create AutoML job
print("Starting AutoML job...")
job = automl.create_job(
name="churn-prediction-model",
data=df,
target_column="target",
problem_type="binary",
time_limit=300,
preset="medium_quality"
)
print(f"Job ID: {job.job_id}")
print(f"Status: {job.status}")

# Wait for completion
print("\nWaiting for AutoML to complete...")
job.wait()

# Get results
print("\n--- AutoML Results ---")
print(f"Status: {job.status}")

# View leaderboard
print("\n--- Model Leaderboard ---")
leaderboard = job.get_leaderboard()
for i, entry in enumerate(leaderboard[:5]):
print(f"{i+1}. {entry.model_name}: score={entry.score:.4f}")

# Get best model
best = job.get_best_model()
print(f"\nBest Model: {best.model_name}")
print(f"Score: {best.score:.4f}")

# Download model
print("\nDownloading model...")
model_path = job.download_model()
print(f"Model saved to: {model_path}")

if __name__ == "__main__":
main()

API Reference

Job Creation

automl.create_job(
name: str, # Job name
data: pd.DataFrame = None, # Data as DataFrame
volume_path: str = None, # Or path to CSV in volume
target_column: str, # Target column name
problem_type: str, # binary, multiclass, regression
time_limit: int = 300, # Training time in seconds
preset: str = "medium_quality" # Quality preset
)

Job Object

Method/PropertyDescription
job.job_idUnique job identifier
job.statusCurrent status
job.wait()Block until completion
job.refresh()Update status
job.get_leaderboard()Get ranked models
job.get_best_model()Get best model info
job.download_model()Download model artifact

Job Status Values

StatusDescription
pendingJob queued
runningTraining in progress
completedTraining finished
failedTraining failed