Use this file to discover all available pages before exploring further.
Phoenix automatically versions your datasets, ensuring reproducibility and enabling you to track changes over time. Every modification to a dataset creates a new version, while experiments remain linked to their original versions.
from phoenix.client import Clientclient = Client()# Get all versions (latest first)versions = client.datasets.get_dataset_versions( dataset="qa-dataset", limit=100 # Get up to 100 most recent versions)# Display version historyfor version in versions: print(f"Version: {version['version_id']}") print(f" Created: {version['created_at']}") if version.get('description'): print(f" Description: {version['description']}") if version.get('metadata'): print(f" Metadata: {version['metadata']}") print()
from phoenix.client import Clientclient = Client()# Get latest version (default)latest = client.datasets.get_dataset(dataset="qa-dataset")print(f"Latest version: {latest.version_id} with {len(latest)} examples")# Get specific versionspecific = client.datasets.get_dataset( dataset="qa-dataset", version_id="v_abc123def456")print(f"Specific version: {specific.version_id} with {len(specific)} examples")# Compare versionsprint(f"Examples added: {len(latest) - len(specific)}")
Experiments are permanently linked to specific dataset versions:
from phoenix.client import Clientfrom phoenix.experiments import run_experimentclient = Client()# Get dataset at current versiondataset_v1 = client.datasets.get_dataset(dataset="qa-dataset")# Run experiment on v1experiment_v1 = run_experiment( dataset=dataset_v1, task=my_task, experiment_name="test-on-v1")print(f"Experiment dataset version: {dataset_v1.version_id}")# Add more examples (creates v2)dataset_v2 = client.datasets.add_examples_to_dataset( dataset="qa-dataset", inputs=[{"question": "New question"}], outputs=[{"answer": "New answer"}])# Run experiment on v2experiment_v2 = run_experiment( dataset=dataset_v2, task=my_task, experiment_name="test-on-v2")print(f"Experiment v1 still references: {dataset_v1.version_id}")print(f"Experiment v2 references: {dataset_v2.version_id}")# You can re-run on old versionsrerun_v1 = run_experiment( dataset=client.datasets.get_dataset( dataset="qa-dataset", version_id=dataset_v1.version_id ), task=improved_task, experiment_name="improved-on-v1")
Convert datasets to pandas DataFrames for analysis or storage:
from phoenix.client import Clientimport pandas as pdclient = Client()dataset = client.datasets.get_dataset(dataset="qa-dataset")# Export to DataFramedf = dataset.to_dataframe()print(df.head())print(df.columns)# Index(['input', 'output', 'metadata'], dtype='object')# Index name: 'example_id'# Save to CSVdf.to_csv("dataset_export.csv")# Save to Parquetdf.to_parquet("dataset_export.parquet")
from phoenix.client import Clientimport pandas as pdclient = Client()# Get all versionsversions = client.datasets.get_dataset_versions(dataset="qa-dataset")# Export each versionfor version in versions: dataset = client.datasets.get_dataset( dataset="qa-dataset", version_id=version['version_id'] ) # Save with version in filename filename = f"dataset_{version['version_id']}.csv" df = dataset.to_dataframe() df.to_csv(filename) print(f"Exported {filename}: {len(dataset)} examples")
While Phoenix keeps all versions, you can implement your own archival strategy:
from datetime import datetime, timedelta, timezonefrom phoenix.client import Clientclient = Client()# Find versions older than 90 dayscutoff = datetime.now(timezone.utc) - timedelta(days=90)versions = client.datasets.get_dataset_versions(dataset="qa-dataset")old_versions = [ v for v in versions if v['created_at'] < cutoff]print(f"Found {len(old_versions)} versions older than 90 days")