<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:news="http://www.google.com/schemas/sitemap-news/0.9" xmlns:xhtml="http://www.w3.org/1999/xhtml" xmlns:image="http://www.google.com/schemas/sitemap-image/1.1" xmlns:video="http://www.google.com/schemas/sitemap-video/1.1">
    
  <url>
    <loc>https://mintlify.wiki/NVIDIA/TensorRT-LLM/api/llm</loc>
    <lastmod>2026-03-04T00:10:23.304Z</lastmod>
  </url>

  <url>
    <loc>https://mintlify.wiki/NVIDIA/TensorRT-LLM/api/request-output</loc>
    <lastmod>2026-03-04T00:10:23.300Z</lastmod>
  </url>

  <url>
    <loc>https://mintlify.wiki/NVIDIA/TensorRT-LLM/api/sampling-params</loc>
    <lastmod>2026-03-04T00:10:23.298Z</lastmod>
  </url>

  <url>
    <loc>https://mintlify.wiki/NVIDIA/TensorRT-LLM/api/tokenizer</loc>
    <lastmod>2026-03-04T00:10:23.299Z</lastmod>
  </url>

  <url>
    <loc>https://mintlify.wiki/NVIDIA/TensorRT-LLM/cli/trtllm-bench</loc>
    <lastmod>2026-03-04T00:10:23.303Z</lastmod>
  </url>

  <url>
    <loc>https://mintlify.wiki/NVIDIA/TensorRT-LLM/cli/trtllm-build</loc>
    <lastmod>2026-03-04T00:10:23.309Z</lastmod>
  </url>

  <url>
    <loc>https://mintlify.wiki/NVIDIA/TensorRT-LLM/cli/trtllm-eval</loc>
    <lastmod>2026-03-04T00:10:23.307Z</lastmod>
  </url>

  <url>
    <loc>https://mintlify.wiki/NVIDIA/TensorRT-LLM/cli/trtllm-prune</loc>
    <lastmod>2026-03-04T00:10:23.306Z</lastmod>
  </url>

  <url>
    <loc>https://mintlify.wiki/NVIDIA/TensorRT-LLM/cli/trtllm-refit</loc>
    <lastmod>2026-03-04T00:10:23.308Z</lastmod>
  </url>

  <url>
    <loc>https://mintlify.wiki/NVIDIA/TensorRT-LLM/cli/trtllm-serve</loc>
    <lastmod>2026-03-04T00:10:25.017Z</lastmod>
  </url>

  <url>
    <loc>https://mintlify.wiki/NVIDIA/TensorRT-LLM/concepts/architecture</loc>
    <lastmod>2026-03-04T00:10:25.011Z</lastmod>
  </url>

  <url>
    <loc>https://mintlify.wiki/NVIDIA/TensorRT-LLM/concepts/backends</loc>
    <lastmod>2026-03-04T00:10:25.018Z</lastmod>
  </url>

  <url>
    <loc>https://mintlify.wiki/NVIDIA/TensorRT-LLM/concepts/optimization-techniques</loc>
    <lastmod>2026-03-04T00:10:25.014Z</lastmod>
  </url>

  <url>
    <loc>https://mintlify.wiki/NVIDIA/TensorRT-LLM/config/llm-args</loc>
    <lastmod>2026-03-04T00:10:25.020Z</lastmod>
  </url>

  <url>
    <loc>https://mintlify.wiki/NVIDIA/TensorRT-LLM/config/model-config</loc>
    <lastmod>2026-03-04T00:10:25.020Z</lastmod>
  </url>

  <url>
    <loc>https://mintlify.wiki/NVIDIA/TensorRT-LLM/config/runtime-config</loc>
    <lastmod>2026-03-04T00:10:25.015Z</lastmod>
  </url>

  <url>
    <loc>https://mintlify.wiki/NVIDIA/TensorRT-LLM/deployment/distributed-inference</loc>
    <lastmod>2026-03-04T00:10:25.021Z</lastmod>
  </url>

  <url>
    <loc>https://mintlify.wiki/NVIDIA/TensorRT-LLM/deployment/llm-api</loc>
    <lastmod>2026-03-04T00:10:25.016Z</lastmod>
  </url>

  <url>
    <loc>https://mintlify.wiki/NVIDIA/TensorRT-LLM/deployment/production</loc>
    <lastmod>2026-03-04T00:10:25.019Z</lastmod>
  </url>

  <url>
    <loc>https://mintlify.wiki/NVIDIA/TensorRT-LLM/deployment/trtllm-serve</loc>
    <lastmod>2026-03-04T00:10:26.830Z</lastmod>
  </url>

  <url>
    <loc>https://mintlify.wiki/NVIDIA/TensorRT-LLM/developer/adding-models</loc>
    <lastmod>2026-03-04T00:10:26.826Z</lastmod>
  </url>

  <url>
    <loc>https://mintlify.wiki/NVIDIA/TensorRT-LLM/developer/autodeploy</loc>
    <lastmod>2026-03-04T00:10:26.825Z</lastmod>
  </url>

  <url>
    <loc>https://mintlify.wiki/NVIDIA/TensorRT-LLM/developer/build-from-source</loc>
    <lastmod>2026-03-04T00:10:26.833Z</lastmod>
  </url>

  <url>
    <loc>https://mintlify.wiki/NVIDIA/TensorRT-LLM/developer/ci-overview</loc>
    <lastmod>2026-03-04T00:10:26.827Z</lastmod>
  </url>

  <url>
    <loc>https://mintlify.wiki/NVIDIA/TensorRT-LLM/developer/coding-guidelines</loc>
    <lastmod>2026-03-04T00:10:26.832Z</lastmod>
  </url>

  <url>
    <loc>https://mintlify.wiki/NVIDIA/TensorRT-LLM/developer/contributing</loc>
    <lastmod>2026-03-04T00:10:26.831Z</lastmod>
  </url>

  <url>
    <loc>https://mintlify.wiki/NVIDIA/TensorRT-LLM/developer/custom-kernels</loc>
    <lastmod>2026-03-04T00:10:26.824Z</lastmod>
  </url>

  <url>
    <loc>https://mintlify.wiki/NVIDIA/TensorRT-LLM/developer/disaggregated-serving</loc>
    <lastmod>2026-03-04T00:10:26.832Z</lastmod>
  </url>

  <url>
    <loc>https://mintlify.wiki/NVIDIA/TensorRT-LLM/developer/plugins</loc>
    <lastmod>2026-03-04T00:10:26.829Z</lastmod>
  </url>

  <url>
    <loc>https://mintlify.wiki/NVIDIA/TensorRT-LLM/features/attention-mechanisms</loc>
    <lastmod>2026-03-04T00:10:28.327Z</lastmod>
  </url>

  <url>
    <loc>https://mintlify.wiki/NVIDIA/TensorRT-LLM/features/kv-cache</loc>
    <lastmod>2026-03-04T00:10:28.321Z</lastmod>
  </url>

  <url>
    <loc>https://mintlify.wiki/NVIDIA/TensorRT-LLM/features/lora</loc>
    <lastmod>2026-03-04T00:10:28.319Z</lastmod>
  </url>

  <url>
    <loc>https://mintlify.wiki/NVIDIA/TensorRT-LLM/features/multimodal</loc>
    <lastmod>2026-03-04T00:10:28.322Z</lastmod>
  </url>

  <url>
    <loc>https://mintlify.wiki/NVIDIA/TensorRT-LLM/features/parallelism</loc>
    <lastmod>2026-03-04T00:10:28.323Z</lastmod>
  </url>

  <url>
    <loc>https://mintlify.wiki/NVIDIA/TensorRT-LLM/features/quantization</loc>
    <lastmod>2026-03-04T00:10:28.325Z</lastmod>
  </url>

  <url>
    <loc>https://mintlify.wiki/NVIDIA/TensorRT-LLM/features/speculative-decoding</loc>
    <lastmod>2026-03-04T00:10:28.332Z</lastmod>
  </url>

  <url>
    <loc>https://mintlify.wiki/NVIDIA/TensorRT-LLM/installation</loc>
    <lastmod>2026-03-04T00:10:28.324Z</lastmod>
  </url>

  <url>
    <loc>https://mintlify.wiki/NVIDIA/TensorRT-LLM/introduction</loc>
    <lastmod>2026-03-04T00:10:28.329Z</lastmod>
  </url>

  <url>
    <loc>https://mintlify.wiki/NVIDIA/TensorRT-LLM/models/custom-models</loc>
    <lastmod>2026-03-04T00:10:28.328Z</lastmod>
  </url>

  <url>
    <loc>https://mintlify.wiki/NVIDIA/TensorRT-LLM/models/model-configuration</loc>
    <lastmod>2026-03-04T00:10:30.180Z</lastmod>
  </url>

  <url>
    <loc>https://mintlify.wiki/NVIDIA/TensorRT-LLM/models/supported-models</loc>
    <lastmod>2026-03-04T00:10:30.181Z</lastmod>
  </url>

  <url>
    <loc>https://mintlify.wiki/NVIDIA/TensorRT-LLM/performance/benchmarking</loc>
    <lastmod>2026-03-04T00:10:30.176Z</lastmod>
  </url>

  <url>
    <loc>https://mintlify.wiki/NVIDIA/TensorRT-LLM/performance/optimization-guide</loc>
    <lastmod>2026-03-04T00:10:30.182Z</lastmod>
  </url>

  <url>
    <loc>https://mintlify.wiki/NVIDIA/TensorRT-LLM/performance/profiling</loc>
    <lastmod>2026-03-04T00:10:30.179Z</lastmod>
  </url>

  <url>
    <loc>https://mintlify.wiki/NVIDIA/TensorRT-LLM/quickstart</loc>
    <lastmod>2026-03-04T00:10:30.181Z</lastmod>
  </url>
</urlset>