Skip to main content
The samay.utils module provides utility functions for working with time series datasets and data formats.

Dataset Management

get_gifteval_datasets

Get hierarchical and direct datasets from the GIFT-Eval benchmark.
from samay.utils import get_gifteval_datasets

datasets = get_gifteval_datasets(path)
path
str
required
Path to the directory containing GIFT-Eval datasets
datasets
dict
Dictionary mapping dataset paths to tuples of (frequency, size_in_MB)

Example

from samay.utils import get_gifteval_datasets

datasets = get_gifteval_datasets("/path/to/gifteval")
for path, (freq, size) in datasets.items():
    print(f"{path}: {freq} ({size:.2f} MB)")

get_monash_datasets

Get datasets from the Monash Time Series Forecasting Archive.
from samay.utils import get_monash_datasets

datasets = get_monash_datasets(path)
path
str
required
Path to the directory containing Monash datasets
datasets
dict
Dictionary mapping dataset paths to tuples of (inferred_frequency, size_in_MB), sorted by file size

Example

from samay.utils import get_monash_datasets

datasets = get_monash_datasets("/path/to/monash")
for path, (freq, size) in datasets.items():
    print(f"{path}: {freq} ({size:.2f} MB)")

get_tsb_ad_datasets

Read TSB-AD (Time Series Benchmark for Anomaly Detection) datasets stored as CSV files.
from samay.utils import get_tsb_ad_datasets

datasets = get_tsb_ad_datasets(path)
path
str
required
Path to the directory containing TSB-AD CSV files
datasets
dict
Dictionary mapping absolute file paths to tuples of (inferred_freq_or_None, size_in_MB)

Example

from samay.utils import get_tsb_ad_datasets

datasets = get_tsb_ad_datasets("/path/to/TSB-AD-U")
for path, (freq, size) in datasets.items():
    freq_str = freq if freq else "unknown"
    print(f"{path}: {freq_str} ({size:.2f} MB)")

Data Conversion

ts_to_csv

Convert a .ts file (time series format) to a .csv file.
from samay.utils import ts_to_csv

ts_to_csv(ts_file, csv_file, replace_missing_vals_with="NaN")
ts_file
str
required
Path to the input .ts file
csv_file
str
required
Path to the output .csv file
replace_missing_vals_with
str
default:"NaN"
Value to replace missing values with in the .ts file

Example

from samay.utils import ts_to_csv

ts_to_csv(
    ts_file="data/ECG5000_TRAIN.ts",
    csv_file="data/ECG5000_TRAIN.csv",
    replace_missing_vals_with="NaN"
)

arrow_to_csv

Convert Arrow format datasets to CSV.
from samay.utils import arrow_to_csv

arrow_to_csv(arrow_dir, freq=None)
arrow_dir
str
required
Path to the directory containing Arrow format data
freq
str
Frequency string for the time series (e.g., “1H”, “1D”, “1M”)

Example

from samay.utils import arrow_to_csv

arrow_to_csv(
    arrow_dir="/path/to/arrow/data",
    freq="1H"  # Hourly frequency
)

get_multivariate_data

Extract multivariate time series data from a DataFrame.
from samay.utils import get_multivariate_data

data, labels = get_multivariate_data(dataframe, label_col="label")
dataframe
pd.DataFrame
required
DataFrame containing the multivariate time series data
label_col
str
default:"label"
Name of the column containing labels
data
np.ndarray
Multivariate data array of shape (num_samples, num_channels, num_timesteps)
labels
np.ndarray
Array of labels

Example

import pandas as pd
from samay.utils import get_multivariate_data

df = pd.read_csv("data.csv")
data, labels = get_multivariate_data(df, label_col="label")

print(f"Data shape: {data.shape}")  # (num_samples, num_channels, num_timesteps)
print(f"Labels shape: {labels.shape}")

Configuration Management

load_args

Load arguments from a JSON file.
from samay.utils import load_args

args = load_args(file_path)
file_path
str
required
Path to the JSON file containing arguments
args
dict
Dictionary of loaded arguments

read_yaml

Read a YAML configuration file.
from samay.utils import read_yaml

config = read_yaml(file_path)
file_path
str
required
Path to the YAML file
config
dict
Dictionary containing the YAML configuration

prep_finetune_config

Prepare fine-tuning configuration from a YAML file or dictionary.
from samay.utils import prep_finetune_config

config = prep_finetune_config(file_path=None, config=None)
file_path
str
Path to the YAML configuration file
config
dict
Configuration dictionary (alternative to file_path)
config
dict
Processed configuration dictionary with keys:
  • batch_size: Batch size for training
  • max_epochs: Maximum number of epochs
  • seed: Random seed
  • tf32: TF32 setting
  • mod_torch: PyTorch trainer modifications
Either file_path or config must be provided, but not both.

GPU Utilities

get_least_used_gpu

Get the GPU device with the least memory usage.
from samay.utils import get_least_used_gpu

gpu_id = get_least_used_gpu()
gpu_id
int
Index of the least used GPU device, or -1 if no GPU is available or if an error occurs

Example

import torch
from samay.utils import get_least_used_gpu

gpu_id = get_least_used_gpu()
if gpu_id >= 0:
    device = torch.device(f"cuda:{gpu_id}")
    print(f"Using GPU {gpu_id}")
else:
    device = torch.device("cpu")
    print("Using CPU")

DataLoader Utilities

cleanup_dataloader

Best-effort shutdown for PyTorch DataLoader workers to prevent resource leaks.
from samay.utils import cleanup_dataloader

cleanup_dataloader(loader)
loader
torch.utils.data.DataLoader
required
PyTorch DataLoader to clean up

Example

from torch.utils.data import DataLoader
from samay.utils import cleanup_dataloader

# Create and use dataloader
loader = DataLoader(dataset, batch_size=32, num_workers=4)

# Train/evaluate...

# Clean up when done
cleanup_dataloader(loader)
This function stops worker processes and queues, preventing semaphore leaks in multi-process DataLoaders.

Build docs developers (and LLMs) love