Kubernetes Deployment

Overview

Neurenix provides native Kubernetes integration for deploying, scaling, and managing ML models in production. The framework includes support for:

Deployments: Scalable model serving with rolling updates
Pods: Individual container instances
Services: Load balancing and service discovery
ConfigMaps & Secrets: Configuration and credential management
Jobs: Batch inference and training

Prerequisites

Kubernetes cluster (1.19+)
kubectl configured
Docker images built and pushed to a registry

# Verify kubectl
kubectl version --client

Quick Start

Deploy a Model

from neurenix.kubernetes import Deployment, DeploymentConfig

# Create deployment configuration
config = DeploymentConfig(
    name="neurenix-model",
    image="myregistry.com/neurenix-model:latest",
    replicas=3,
    namespace="default",
    ports=[{"containerPort": 8000, "protocol": "TCP"}],
    env={
        "MODEL_PATH": "/app/model.nx",
        "DEVICE": "cpu"
    },
    resources={
        "requests": {"cpu": "500m", "memory": "1Gi"},
        "limits": {"cpu": "2", "memory": "4Gi"}
    }
)

# Create and deploy
deployment = Deployment(name="neurenix-model", namespace="default")
deployment.create(config)

print("Deployment created successfully")

Expose via Service

from neurenix.kubernetes import Service, ServiceConfig

service_config = ServiceConfig(
    name="neurenix-model-service",
    namespace="default",
    selector={"app": "neurenix-model"},
    ports=[{"port": 80, "targetPort": 8000, "protocol": "TCP"}],
    type="LoadBalancer"
)

service = Service(name="neurenix-model-service", namespace="default")
service.create(service_config)

print("Service created successfully")

Deployments

DeploymentConfig

Comprehensive deployment configuration:

from neurenix.kubernetes import DeploymentConfig

config = DeploymentConfig(
    name="ml-inference",
    image="neurenix-model:v1.0",
    replicas=5,
    namespace="production",
    labels={
        "app": "ml-inference",
        "version": "v1.0",
        "component": "model-serving"
    },
    annotations={
        "prometheus.io/scrape": "true",
        "prometheus.io/port": "8000"
    },
    env={
        "MODEL_PATH": "/models/model.nx",
        "LOG_LEVEL": "info",
        "WORKERS": "4"
    },
    ports=[{"containerPort": 8000, "name": "http"}],
    resources={
        "requests": {
            "cpu": "1",
            "memory": "2Gi"
        },
        "limits": {
            "cpu": "4",
            "memory": "8Gi"
        }
    },
    liveness_probe={
        "httpGet": {
            "path": "/health",
            "port": 8000
        },
        "initialDelaySeconds": 30,
        "periodSeconds": 10
    },
    readiness_probe={
        "httpGet": {
            "path": "/ready",
            "port": 8000
        },
        "initialDelaySeconds": 10,
        "periodSeconds": 5
    },
    strategy={
        "type": "RollingUpdate",
        "rollingUpdate": {
            "maxSurge": 1,
            "maxUnavailable": 0
        }
    }
)

Deployment Operations

from neurenix.kubernetes import Deployment

deployment = Deployment(name="ml-inference", namespace="production")

# Create deployment
deployment.create(config)

# Check if exists
if deployment.exists():
    print("Deployment is running")

# Get deployment info
info = deployment.get()
print(f"Replicas: {info['spec']['replicas']}")

# Scale deployment
deployment.scale(replicas=10)

# Restart deployment (rolling restart)
deployment.restart()

# Get deployment status
status = deployment.status()
print(f"Available replicas: {status.get('availableReplicas', 0)}")

# View logs
logs = deployment.logs(tail=100)
print(logs)

# Execute command in deployment
output = deployment.exec(["curl", "http://localhost:8000/health"])
print(output)

# Update image
deployment.update_image("neurenix-model:v2.0")

# Delete deployment
deployment.delete(wait=True)

Neurenix-Specific Deployment

Simplified deployment creation:

deployment = Deployment(name="neurenix-prod", namespace="ml-models")

deployment.create_neurenix_deployment(
    image="neurenix-model:latest",
    model_path="/models/classifier.nx",
    replicas=3,
    gpu=False,
    memory="4Gi",
    cpu="2",
    port=8000,
    env={
        "BATCH_SIZE": "32",
        "TIMEOUT": "30"
    }
)

GPU Deployments

config = DeploymentConfig(
    name="gpu-inference",
    image="neurenix-model:cuda",
    replicas=2,
    namespace="gpu-workloads",
    resources={
        "requests": {
            "cpu": "4",
            "memory": "16Gi",
            "nvidia.com/gpu": "1"
        },
        "limits": {
            "cpu": "8",
            "memory": "32Gi",
            "nvidia.com/gpu": "1"
        }
    },
    node_selector={
        "accelerator": "nvidia-tesla-v100"
    },
    tolerations=[
        {
            "key": "nvidia.com/gpu",
            "operator": "Exists",
            "effect": "NoSchedule"
        }
    ]
)

Pods

PodConfig

from neurenix.kubernetes import PodConfig

config = PodConfig(
    name="inference-pod",
    image="neurenix-model:latest",
    namespace="default",
    labels={"app": "inference"},
    env={"MODEL_PATH": "/app/model.nx"},
    ports=[{"containerPort": 8000}],
    resources={
        "requests": {"cpu": "1", "memory": "2Gi"},
        "limits": {"cpu": "2", "memory": "4Gi"}
    },
    restart_policy="Always",
    volumes=[
        {
            "name": "model-storage",
            "persistentVolumeClaim": {"claimName": "model-pvc"}
        }
    ],
    volume_mounts=[
        {"name": "model-storage", "mountPath": "/app/models"}
    ]
)

Pod Operations

from neurenix.kubernetes import Pod

pod = Pod(name="inference-pod", namespace="default")

# Create pod
pod.create(config)

# Check status
status = pod.status()
print(f"Pod status: {status}")

# Get pod info
info = pod.get()
print(f"IP: {info['status']['podIP']}")

# View logs
logs = pod.logs(tail=50)
print(logs)

# Follow logs
logs = pod.logs(follow=True)

# Execute command
output = pod.exec(["ls", "-la", "/app"])
print(output)

# Port forwarding
port_forward_process = pod.port_forward(local_port=8080, remote_port=8000)
print("Access at http://localhost:8080")
# ... use the service ...
port_forward_process.terminate()

# Copy files
pod.copy_to("./model.nx", "/app/model.nx")
pod.copy_from("/app/output.json", "./output.json")

# Delete pod
pod.delete(force=True)

Create Neurenix Pod

pod = Pod(name="neurenix-worker", namespace="ml-jobs")

pod.create_neurenix_pod(
    image="neurenix-model:latest",
    model_path="/models/model.nx",
    gpu=True,
    memory="8Gi",
    cpu="4",
    port=8000,
    env={"DEVICE": "cuda"},
    command=["python"],
    args=["inference.py"]
)

Services

ServiceConfig

from neurenix.kubernetes import ServiceConfig

# ClusterIP (internal)
cluster_config = ServiceConfig(
    name="internal-service",
    namespace="default",
    selector={"app": "neurenix-model"},
    ports=[{"port": 80, "targetPort": 8000}],
    type="ClusterIP"
)

# NodePort (external access via node IP)
node_config = ServiceConfig(
    name="nodeport-service",
    namespace="default",
    selector={"app": "neurenix-model"},
    ports=[{
        "port": 80,
        "targetPort": 8000,
        "nodePort": 30080
    }],
    type="NodePort"
)

# LoadBalancer (cloud provider LB)
lb_config = ServiceConfig(
    name="lb-service",
    namespace="default",
    selector={"app": "neurenix-model"},
    ports=[{"port": 80, "targetPort": 8000}],
    type="LoadBalancer",
    external_traffic_policy="Local"
)

Service Operations

from neurenix.kubernetes import Service

service = Service(name="neurenix-service", namespace="default")

# Create service
service.create(lb_config)

# Check if exists
if service.exists():
    print("Service is running")

# Get service info
info = service.get()
print(f"Type: {info['spec']['type']}")

# Get external IP (LoadBalancer)
external_ip = service.get_external_ip()
if external_ip:
    print(f"Access at http://{external_ip}")

# Get cluster IP
cluster_ip = service.get_cluster_ip()
print(f"Internal IP: {cluster_ip}")

# Get node port
node_port = service.get_node_port(port=80)
if node_port:
    print(f"NodePort: {node_port}")

# Get endpoints
endpoints = service.get_endpoints()
print(f"Endpoints: {endpoints}")

# Port forward
port_forward = service.port_forward(local_port=8080, remote_port=80)
print("Forwarded to localhost:8080")

# Delete service
service.delete()

Create Neurenix Service

service = Service(name="neurenix-api", namespace="production")

service.create_neurenix_service(
    port=80,
    target_port=8000,
    type="LoadBalancer",
    selector={"app": "neurenix-model", "version": "v1"},
    external_traffic_policy="Local"
)

Complete Production Deployment

from neurenix.kubernetes import (
    Deployment, DeploymentConfig,
    Service, ServiceConfig,
    ConfigMap, Secret
)

# 1. Create ConfigMap for configuration
config_map = ConfigMap(
    name="model-config",
    namespace="production",
    data={
        "model.conf": "batch_size=32\ntimeout=30",
        "logging.conf": "level=info\nformat=json"
    }
)

# 2. Create Secret for credentials
secret = Secret(
    name="model-secrets",
    namespace="production",
    data={
        "api-key": "base64-encoded-key",
        "db-password": "base64-encoded-password"
    }
)

# 3. Create Deployment
deployment_config = DeploymentConfig(
    name="neurenix-production",
    image="myregistry.com/neurenix-model:v2.0",
    replicas=5,
    namespace="production",
    labels={
        "app": "neurenix",
        "version": "v2.0",
        "tier": "api"
    },
    env={
        "MODEL_PATH": "/models/model.nx",
        "CONFIG_PATH": "/etc/config"
    },
    env_from=[
        {"configMapRef": {"name": "model-config"}},
        {"secretRef": {"name": "model-secrets"}}
    ],
    ports=[{"containerPort": 8000, "name": "http"}],
    resources={
        "requests": {"cpu": "2", "memory": "4Gi"},
        "limits": {"cpu": "4", "memory": "8Gi"}
    },
    liveness_probe={
        "httpGet": {"path": "/health", "port": 8000},
        "initialDelaySeconds": 30,
        "periodSeconds": 10,
        "timeoutSeconds": 5,
        "failureThreshold": 3
    },
    readiness_probe={
        "httpGet": {"path": "/ready", "port": 8000},
        "initialDelaySeconds": 10,
        "periodSeconds": 5
    },
    strategy={
        "type": "RollingUpdate",
        "rollingUpdate": {
            "maxSurge": 1,
            "maxUnavailable": 0
        }
    }
)

deployment = Deployment(name="neurenix-production", namespace="production")
deployment.create(deployment_config)

# 4. Create Service
service_config = ServiceConfig(
    name="neurenix-api",
    namespace="production",
    selector={"app": "neurenix"},
    ports=[{"port": 80, "targetPort": 8000}],
    type="LoadBalancer",
    annotations={
        "service.beta.kubernetes.io/aws-load-balancer-type": "nlb"
    }
)

service = Service(name="neurenix-api", namespace="production")
service.create(service_config)

print("Production deployment complete!")
print(f"External IP: {service.get_external_ip()}")

YAML Export

Export configurations to YAML files:

# Export deployment YAML
yaml_content = deployment_config.to_yaml()
with open("deployment.yaml", "w") as f:
    f.write(yaml_content)

# Export service YAML
service_yaml = service_config.to_yaml()
with open("service.yaml", "w") as f:
    f.write(service_yaml)

# Apply with kubectl
import subprocess
subprocess.run(["kubectl", "apply", "-f", "deployment.yaml"])
subprocess.run(["kubectl", "apply", "-f", "service.yaml"])

Best Practices

Resource Limits: Always set CPU and memory limits to prevent resource exhaustion
Health Checks: Implement liveness and readiness probes for reliability
Rolling Updates: Use rolling updates with maxUnavailable=0 for zero-downtime deployments
Horizontal Pod Autoscaling: Configure HPA for automatic scaling based on metrics
Pod Disruption Budgets: Protect availability during cluster maintenance
Namespaces: Use separate namespaces for different environments
Labels and Selectors: Use consistent labeling for service discovery and monitoring
Secrets Management: Use Kubernetes secrets or external secret managers
Monitoring: Integrate with Prometheus and Grafana for observability
Logging: Use structured logging with centralized log aggregation

Troubleshooting

Check kubectl Installation

try:
    deployment = Deployment("test", "default")
except RuntimeError as e:
    print(f"kubectl error: {e}")
    # Install kubectl or configure kubeconfig

Debug Deployment Issues

# Check deployment status
status = deployment.status()
print(f"Desired replicas: {status.get('replicas')}")
print(f"Available replicas: {status.get('availableReplicas')}")

# View logs
logs = deployment.logs(tail=100)
print(logs)

# Describe deployment (using kubectl)
import subprocess
subprocess.run(["kubectl", "describe", "deployment", "neurenix-model"])

# Check events
subprocess.run(["kubectl", "get", "events", "--sort-by=.metadata.creationTimestamp"])

Get Started

Core Concepts

AI Agents

Reinforcement Learning

Advanced Features

Specialized Modules

Hardware Support

Deployment

Kubernetes Deployment

Overview

Prerequisites

Quick Start

Deploy a Model

Expose via Service

Deployments

DeploymentConfig

Deployment Operations

Neurenix-Specific Deployment

GPU Deployments

Pods

PodConfig

Pod Operations

Create Neurenix Pod

Services

ServiceConfig

Service Operations

Create Neurenix Service

Complete Production Deployment

YAML Export

Best Practices

Troubleshooting

Check kubectl Installation

Debug Deployment Issues

Next Steps

Build docs developers (and LLMs) love

Get Started

Core Concepts

AI Agents

Reinforcement Learning

Advanced Features

Specialized Modules

Hardware Support

Deployment

Documentation Index

​Overview

​Prerequisites

​Quick Start

​Deploy a Model

​Expose via Service

​Deployments

​DeploymentConfig

​Deployment Operations

​Neurenix-Specific Deployment

​GPU Deployments

​Pods

​PodConfig

​Pod Operations

​Create Neurenix Pod

​Services

​ServiceConfig

​Service Operations

​Create Neurenix Service

​Complete Production Deployment

​YAML Export

​Best Practices

​Troubleshooting

​Check kubectl Installation

​Debug Deployment Issues

​Next Steps

Build docs developers (and LLMs) love

Overview

Prerequisites

Quick Start

Deploy a Model

Expose via Service

Deployments

DeploymentConfig

Deployment Operations

Neurenix-Specific Deployment

GPU Deployments

Pods

PodConfig

Pod Operations

Create Neurenix Pod

Services

ServiceConfig

Service Operations

Create Neurenix Service

Complete Production Deployment

YAML Export

Best Practices

Troubleshooting

Check kubectl Installation

Debug Deployment Issues

Next Steps