<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:news="http://www.google.com/schemas/sitemap-news/0.9" xmlns:xhtml="http://www.w3.org/1999/xhtml" xmlns:image="http://www.google.com/schemas/sitemap-image/1.1" xmlns:video="http://www.google.com/schemas/sitemap-video/1.1">
    
  <url>
    <loc>https://mintlify.wiki/lansinuote/Simple_Reinforcement_Learning/advanced/goal-conditioned-rl</loc>
    <lastmod>2026-06-14T02:15:14.763Z</lastmod>
  </url>

  <url>
    <loc>https://mintlify.wiki/lansinuote/Simple_Reinforcement_Learning/advanced/imitation-learning</loc>
    <lastmod>2026-06-14T02:15:14.806Z</lastmod>
  </url>

  <url>
    <loc>https://mintlify.wiki/lansinuote/Simple_Reinforcement_Learning/advanced/mbpo</loc>
    <lastmod>2026-06-14T02:15:14.812Z</lastmod>
  </url>

  <url>
    <loc>https://mintlify.wiki/lansinuote/Simple_Reinforcement_Learning/advanced/mpc</loc>
    <lastmod>2026-06-14T02:15:14.742Z</lastmod>
  </url>

  <url>
    <loc>https://mintlify.wiki/lansinuote/Simple_Reinforcement_Learning/advanced/multi-agent</loc>
    <lastmod>2026-06-14T02:15:14.777Z</lastmod>
  </url>

  <url>
    <loc>https://mintlify.wiki/lansinuote/Simple_Reinforcement_Learning/advanced/offline-rl</loc>
    <lastmod>2026-06-14T02:15:14.801Z</lastmod>
  </url>

  <url>
    <loc>https://mintlify.wiki/lansinuote/Simple_Reinforcement_Learning/algorithms/actor-critic</loc>
    <lastmod>2026-06-14T02:15:14.789Z</lastmod>
  </url>

  <url>
    <loc>https://mintlify.wiki/lansinuote/Simple_Reinforcement_Learning/algorithms/ddpg</loc>
    <lastmod>2026-06-14T02:15:14.749Z</lastmod>
  </url>

  <url>
    <loc>https://mintlify.wiki/lansinuote/Simple_Reinforcement_Learning/algorithms/dqn</loc>
    <lastmod>2026-06-14T02:15:14.782Z</lastmod>
  </url>

  <url>
    <loc>https://mintlify.wiki/lansinuote/Simple_Reinforcement_Learning/algorithms/policy-gradient</loc>
    <lastmod>2026-06-14T02:15:17.057Z</lastmod>
  </url>

  <url>
    <loc>https://mintlify.wiki/lansinuote/Simple_Reinforcement_Learning/algorithms/ppo</loc>
    <lastmod>2026-06-14T02:15:17.052Z</lastmod>
  </url>

  <url>
    <loc>https://mintlify.wiki/lansinuote/Simple_Reinforcement_Learning/algorithms/sac</loc>
    <lastmod>2026-06-14T02:15:17.056Z</lastmod>
  </url>

  <url>
    <loc>https://mintlify.wiki/lansinuote/Simple_Reinforcement_Learning/foundations/dynamic-programming</loc>
    <lastmod>2026-06-14T02:15:17.052Z</lastmod>
  </url>

  <url>
    <loc>https://mintlify.wiki/lansinuote/Simple_Reinforcement_Learning/foundations/gym-environment</loc>
    <lastmod>2026-06-14T02:15:17.051Z</lastmod>
  </url>

  <url>
    <loc>https://mintlify.wiki/lansinuote/Simple_Reinforcement_Learning/foundations/markov-decision-process</loc>
    <lastmod>2026-06-14T02:15:17.053Z</lastmod>
  </url>

  <url>
    <loc>https://mintlify.wiki/lansinuote/Simple_Reinforcement_Learning/foundations/stateless-bandits</loc>
    <lastmod>2026-06-14T02:15:17.055Z</lastmod>
  </url>

  <url>
    <loc>https://mintlify.wiki/lansinuote/Simple_Reinforcement_Learning/introduction</loc>
    <lastmod>2026-06-14T02:15:17.054Z</lastmod>
  </url>

  <url>
    <loc>https://mintlify.wiki/lansinuote/Simple_Reinforcement_Learning/methods/dynaq</loc>
    <lastmod>2026-06-14T02:15:17.050Z</lastmod>
  </url>

  <url>
    <loc>https://mintlify.wiki/lansinuote/Simple_Reinforcement_Learning/methods/temporal-difference</loc>
    <lastmod>2026-06-14T02:15:18.401Z</lastmod>
  </url>

  <url>
    <loc>https://mintlify.wiki/lansinuote/Simple_Reinforcement_Learning/setup</loc>
    <lastmod>2026-06-14T02:15:18.400Z</lastmod>
  </url>
</urlset>