Skip to main content

AutoML

Train machine learning models automatically using AutoGluon integration.

Overview

Strongly's AutoML automatically:

  • Selects the best algorithms for your data
  • Performs hyperparameter tuning
  • Handles feature engineering
  • Produces a ranked leaderboard of models

Basic Usage

import pandas as pd
from strongly.mlops import automl

# Load your data
df = pd.read_csv("data.csv")

# Create AutoML job
job = automl.create_job(
name="my-model",
data=df,
target_column="label",
problem_type="binary",
time_limit=300 # 5 minutes
)

# Wait for completion
job.wait()

# View results
print(job.get_leaderboard())

Creating Jobs

From DataFrame

Upload data directly from a pandas DataFrame:

import pandas as pd
from strongly.mlops import automl

df = pd.DataFrame({
"feature1": [1.0, 2.0, 3.0, 4.0, 5.0],
"feature2": [0.1, 0.2, 0.3, 0.4, 0.5],
"target": [0, 0, 1, 1, 1]
})

job = automl.create_job(
name="churn-prediction",
data=df,
target_column="target",
problem_type="binary",
time_limit=300
)

From Volume Path

Use data stored in your workspace volume:

job = automl.create_job(
name="sales-forecast",
volume_path="/project/data/sales.csv",
target_column="revenue",
problem_type="regression",
time_limit=600
)

Problem Types

TypeDescriptionUse Case
binaryBinary classificationYes/No, True/False predictions
multiclassMulti-class classificationCategory prediction
regressionContinuous value predictionPrice, quantity forecasting
# Binary classification
job = automl.create_job(
name="churn-model",
data=df,
target_column="churned",
problem_type="binary"
)

# Multi-class classification
job = automl.create_job(
name="category-model",
data=df,
target_column="category",
problem_type="multiclass"
)

# Regression
job = automl.create_job(
name="price-model",
data=df,
target_column="price",
problem_type="regression"
)

Presets

Control the quality/speed trade-off with presets:

PresetDescription
best_qualityMaximum accuracy, longest training
high_qualityHigh accuracy, moderate training time
medium_qualityBalanced accuracy and speed
optimize_for_deploymentFast inference, smaller models
# For highest accuracy
job = automl.create_job(
name="production-model",
data=df,
target_column="label",
problem_type="binary",
time_limit=3600,
preset="best_quality"
)

# For fast deployment
job = automl.create_job(
name="edge-model",
data=df,
target_column="label",
problem_type="binary",
time_limit=300,
preset="optimize_for_deployment"
)

Job Management

Monitoring Progress

# Check status
print(f"Status: {job.status}")
print(f"Job ID: {job.job_id}")

# Wait for completion
job.wait()

# Or poll manually
import time
while job.status == "running":
print(f"Status: {job.status}")
time.sleep(30)
job.refresh()

Getting Results

# Get model leaderboard
leaderboard = job.get_leaderboard()
for i, entry in enumerate(leaderboard[:5]):
print(f"{i+1}. {entry.model_name}: score={entry.score:.4f}")

# Get best model info
best = job.get_best_model()
print(f"Best Model: {best.model_name}")
print(f"Score: {best.score:.4f}")

# Download model for deployment
model_path = job.download_model()
print(f"Model saved to: {model_path}")

Complete Example

import pandas as pd
from strongly.mlops import automl

def main():
# Create sample dataset
print("Creating sample dataset...")
df = pd.DataFrame({
"feature1": [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0],
"feature2": [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0],
"feature3": [10, 20, 30, 40, 50, 60, 70, 80, 90, 100],
"target": [0, 0, 0, 0, 1, 1, 1, 1, 1, 1]
})

# Create AutoML job
print("Starting AutoML job...")
job = automl.create_job(
name="churn-prediction-model",
data=df,
target_column="target",
problem_type="binary",
time_limit=300,
preset="medium_quality"
)
print(f"Job ID: {job.job_id}")
print(f"Status: {job.status}")

# Wait for completion
print("\nWaiting for AutoML to complete...")
job.wait()

# Get results
print("\n--- AutoML Results ---")
print(f"Status: {job.status}")

# View leaderboard
print("\n--- Model Leaderboard ---")
leaderboard = job.get_leaderboard()
for i, entry in enumerate(leaderboard[:5]):
print(f"{i+1}. {entry.model_name}: score={entry.score:.4f}")

# Get best model
best = job.get_best_model()
print(f"\nBest Model: {best.model_name}")
print(f"Score: {best.score:.4f}")

# Download model
print("\nDownloading model...")
model_path = job.download_model()
print(f"Model saved to: {model_path}")

if __name__ == "__main__":
main()

Resource API — client.automl

In addition to the module-level convenience functions above, the SDK provides a full resource API for managing AutoML jobs programmatically:

from strongly import Strongly

client = Strongly()

# List AutoML jobs with filters
for job in client.automl.list_jobs(status="completed"):
print(f"{job.name}{job.status}")

# Get AutoML statistics
stats = client.automl.stats()
print(f"Total jobs: {stats.total}, Running: {stats.running}")

# List available datasets
datasets = client.automl.datasets()
for ds in datasets:
print(ds)

# Create a job via the resource API
job = client.automl.create_job({
"name": "churn-model",
"target_column": "churned",
"problem_type": "binary",
"time_limit": 300,
})

# Retrieve a job
job = client.automl.retrieve_job("job-abc123")
print(f"{job.name}: {job.status}")

# Stop a running job
client.automl.stop_job("job-abc123")

# Deploy the best model from a completed job
client.automl.deploy_best_model("job-abc123")

# Get training logs
logs = client.automl.job_logs("job-abc123", lines=50)
print(logs)

# Delete a job
client.automl.delete_job("job-abc123")

Resource Method Reference

MethodDescriptionReturns
list_jobs(*, status=None, search=None, limit=50)List AutoML jobs with filtersSyncPaginator[AutoMLJob]
stats()Get job statisticsAutoMLStats
datasets()List available datasetsList[dict]
create_job(body)Create an AutoML jobAutoMLJob
retrieve_job(job_id)Get a job by IDAutoMLJob
delete_job(job_id)Delete a jobdict
stop_job(job_id)Stop a running jobdict
deploy_best_model(job_id, **kwargs)Deploy the best model from a jobdict
job_logs(job_id, *, lines=None, since=None)Get training logsdict

Module-Level API Reference

Job Creation (via helpers)

automl.create_job(
name: str, # Job name
data: pd.DataFrame = None, # Data as DataFrame
volume_path: str = None, # Or path to CSV in volume
target_column: str, # Target column name
problem_type: str, # binary, multiclass, regression
time_limit: int = 300, # Training time in seconds
preset: str = "medium_quality" # Quality preset
)

Job Object (from helpers)

Method/PropertyDescription
job.job_idUnique job identifier
job.statusCurrent status
job.wait()Block until completion
job.refresh()Update status
job.get_leaderboard()Get ranked models
job.get_best_model()Get best model info
job.download_model()Download model artifact

Job Status Values

StatusDescription
pendingJob queued
runningTraining in progress
completedTraining finished
failedTraining failed