Skip to main content

Class Signature

class TimesFM_2p5_Dataset(BaseDataset):
    def __init__(
        name: str = None,
        datetime_col: str = None,
        path: str = None,
        batch_size: int = 16,
        mode: str = "train",
        boundaries: list = [0, 0, 0],
        task_name: str = "evaluation",
        stride: int = 10,
        context_len: int = 512,
        horizon_len: int = 96,
        normalize: bool = True,
        **kwargs
    )
Dataset wrapper for preparing time-series data for the TimesFM 2.5 model.

Parameters

name
str
default:"None"
Dataset name (e.g., “ett”, “etth1”).
datetime_col
str
default:"None"
Name of the datetime column in the CSV file.
path
str
default:"None"
Path to the CSV file containing time-series data.
batch_size
int
default:"16"
Batch size for dataloaders.
mode
str
default:"train"
Dataset mode: “train” or “test”.
boundaries
list[int]
default:"[0, 0, 0]"
Train/val/test split boundaries. If [0,0,0], uses 50%/20%/30% split.
task_name
str
default:"evaluation"
Task name: “evaluation” or “finetuning”.
stride
int
default:"10"
Stride for windowing when creating sequences.
context_len
int
default:"512"
Length of historical context to use for forecasting.
horizon_len
int
default:"96"
Forecast horizon length. Automatically capped at 30% of dataset length.
normalize
bool
default:"True"
Whether to normalize the data.

Attributes

n_channels
int
Number of channels (columns) in the time series, computed as df.shape[1] - 1.

Methods

get_data_loader()

Returns a PyTorch DataLoader for the dataset.
def get_data_loader()
return
DataLoader
PyTorch DataLoader configured with the specified batch size.

len()

Returns the number of samples in the dataset.
def __len__()
return
int
Total number of samples available.

Usage Example

from samay.dataset import TimesFM_2p5_Dataset

# Create dataset for evaluation
eval_dataset = TimesFM_2p5_Dataset(
    name="ett",
    path="data/ETTh1.csv",
    datetime_col="date",
    mode="test",
    task_name="evaluation",
    context_len=512,
    horizon_len=96,
    batch_size=32,
    normalize=True
)

# Create dataset for training
train_dataset = TimesFM_2p5_Dataset(
    name="ett",
    path="data/ETTh1.csv",
    datetime_col="date",
    mode="train",
    task_name="finetuning",
    context_len=512,
    horizon_len=96,
    batch_size=16,
    normalize=True,
    stride=10
)

# Get dataloader
dataloader = eval_dataset.get_data_loader()

print(f"Dataset size: {len(eval_dataset)}")
print(f"Number of channels: {eval_dataset.n_channels}")
print(f"Context length: {eval_dataset.context_len}")
print(f"Horizon length: {eval_dataset.horizon_len}")

Notes

  • Horizon length is automatically capped at 30% of the dataset length
  • Default boundaries use 50%/20%/30% split for train/val/test if not specified
  • Data normalization is applied by default for better model performance
  • Datetime column is dropped after reading if specified
  • Supports both evaluation and fine-tuning tasks
  • Windowing with stride creates overlapping sequences for training

Build docs developers (and LLMs) love