alicia-ai-terminology/pages/model-types.html

<!DOCTYPE html>
<html lang="en">
<head>
  <meta charset="UTF-8">
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
  <title>Model Types - Cheat Sheet</title>
  <link rel="stylesheet" href="../css/style.css">
</head>
<body>

<nav>
  <div class="nav-inner">
    <a href="../index.html" class="nav-brand">AI Cheat Sheet</a>
    <div class="nav-links">
      <a href="/pages/terminology.html">Terminology</a>
      <a href="/pages/techniques.html">Techniques</a>
      <a href="/pages/use-cases.html">Use Cases</a>
      <a href="/pages/model-types.html" class="active">Model Types</a>
      <a href="/pages/prompts.html">Prompt Guide</a>
      <a href="/pages/math.html">Math & Concepts</a>
      <a href="/pages/chat.html">Chat</a>
      <a href="/pages/image-gen.html">Image Gen</a>
    </div>
    <button class="dark-toggle" id="darkToggle" aria-label="Toggle dark mode">🌙</button>
  </div>
</nav>

<button class="menu-toggle" id="menuToggle" aria-label="Toggle menu">☰</button>
<div class="sidebar-backdrop" id="sidebarBackdrop"></div>

<script>
(function(){
  var btn = document.getElementById('darkToggle');
  var saved = localStorage.getItem('theme');
  if(saved === 'dark' || (!saved && window.matchMedia('(prefers-color-scheme: dark)').matches)){
    document.documentElement.setAttribute('data-theme','dark');
    btn.textContent = '☀️';
  }
  btn.addEventListener('click', function(){
    var isDark = document.documentElement.getAttribute('data-theme') === 'dark';
    if(isDark){
      document.documentElement.removeAttribute('data-theme');
      btn.textContent = '🌙';
      localStorage.setItem('theme','light');
    } else {
      document.documentElement.setAttribute('data-theme','dark');
      btn.textContent = '☀️';
      localStorage.setItem('theme','dark');
    }
  });

  var menuToggle = document.getElementById('menuToggle');
  var nav = document.querySelector('nav');
  var backdrop = document.getElementById('sidebarBackdrop');
  if(menuToggle && nav){
    menuToggle.addEventListener('click', function(){
      nav.classList.toggle('sidebar-open');
      var isOpen = nav.classList.contains('sidebar-open');
      menuToggle.textContent = isOpen ? '✕' : '☰';
      if(backdrop){
        backdrop.classList.toggle('visible', isOpen);
      }
    });
    if(backdrop){
      backdrop.addEventListener('click', function(){
        nav.classList.remove('sidebar-open');
        menuToggle.textContent = '☰';
        backdrop.classList.remove('visible');
      });
    }
    document.addEventListener('click', function(e){
      if(nav.classList.contains('sidebar-open') && !nav.contains(e.target) && e.target !== menuToggle){
        nav.classList.remove('sidebar-open');
        menuToggle.textContent = '☰';
        if(backdrop) backdrop.classList.remove('visible');
      }
    });
  }
})();
</script>

<div class="hero">
  <h1>Model Types</h1>
  <p>Architectures and families of AI models — what they are and what they do.</p>
</div>

<div class="container">

  <h2 class="section-title">Language Models</h2>
  <div class="def-card">
    <span class="category">Transformer</span>
    <h3>LLM (Large Language Model)</h3>
    <p>Neural networks based on the transformer architecture, trained on massive text corpora. They predict the next token given a sequence, enabling fluency in language tasks.</p>
    <div class="example"><strong>Examples:</strong> GPT-4, Claude, Gemini, Llama 3, Mistral, Qwen</div>
    <button class="llm-btn" onclick="explainModel('💬 LLM (Large Language Model)', 'Neural networks based on the transformer architecture, trained on massive text corpora. They predict the next token given a sequence.')"><span class="icon">💬</span> Explain</button>
  </div>
  <div class="def-card">
    <span class="category">Transformer</span>
    <h3>Encoder-Only Models</h3>
    <p>Transformers designed to understand input (not generate text). Used for classification, sentiment analysis, and embedding generation.</p>
    <div class="example"><strong>Examples:</strong> BERT, RoBERTa, DeBERTa</div>
    <button class="llm-btn" onclick="explainModel('💬 Encoder-Only Models', 'Transformers designed to understand input (not generate text). Used for classification, sentiment analysis, and embedding generation.')"><span class="icon">💬</span> Explain</button>
  </div>
  <div class="def-card">
    <span class="category">Transformer</span>
    <h3>Decoder-Only Models</h3>
    <p>Transformers designed to generate text autoregressively — the dominant architecture for modern LLMs.</p>
    <div class="example"><strong>Examples:</strong> GPT series, Claude, Llama, Mistral</div>
    <button class="llm-btn" onclick="explainModel('💬 Decoder-Only Models', 'Transformers designed to generate text autoregressively — the dominant architecture for modern LLMs.')"><span class="icon">💬</span> Explain</button>
  </div>
  <div class="def-card">
    <span class="category">Transformer</span>
    <h3>Encoder-Decoder Models</h3>
    <p>Transformers with both encoder and decoder, used for tasks that transform input to output (translation, summarization).</p>
    <div class="example"><strong>Examples:</strong> T5, BART, Flan-T5</div>
    <button class="llm-btn" onclick="explainModel('💬 Encoder-Decoder Models', 'Transformers with both encoder and decoder, used for tasks that transform input to output.')"><span class="icon">💬</span> Explain</button>
  </div>

  <h2 class="section-title">Vision Models</h2>
  <div class="def-card">
    <span class="category">Vision</span>
    <h3>CNN (Convolutional Neural Network)</h3>
    <p>Neural networks with layers that scan images with small filters, detecting edges, textures, and patterns hierarchically. The backbone of computer vision for years.</p>
    <div class="example"><strong>Examples:</strong> ResNet, EfficientNet, VGG</div>
    <button class="llm-btn" onclick="explainModel('💬 CNN', 'Neural networks with layers that scan images with small filters, detecting edges, textures, and patterns hierarchically.')"><span class="icon">💬</span> Explain</button>
  </div>
  <div class="def-card">
    <span class="category">Vision</span>
    <h3>ViT (Vision Transformer)</h3>
    <p>Applying the transformer architecture to images by treating image patches as tokens. Often outperforms CNNs at scale.</p>
    <div class="example"><strong>Examples:</strong> CLIP, DINOv2, ViT-Base</div>
    <button class="llm-btn" onclick="explainModel('💬 ViT', 'Applying the transformer architecture to images by treating image patches as tokens.')"><span class="icon">💬</span> Explain</button>
  </div>
  <div class="def-card">
    <span class="category">Vision</span>
    <h3>Diffusion Models</h3>
    <p>Models that generate images by iteratively denoising random noise. The architecture behind most state-of-the-art image generators.</p>
    <div class="example"><strong>Examples:</strong> Stable Diffusion, DALL-E 3, Midjourney</div>
    <button class="llm-btn" onclick="explainModel('💬 Diffusion Models', 'Models that generate images by iteratively denoising random noise.')"><span class="icon">💬</span> Explain</button>
  </div>
  <div class="def-card">
    <span class="category">Vision</span>
    <h3>Multimodal Models</h3>
    <p>Models that process multiple input types — text, images, audio — and can generate outputs across modalities.</p>
    <div class="example"><strong>Examples:</strong> GPT-4V (vision), Claude 3, Gemini, Qwen-VL</div>
    <button class="llm-btn" onclick="explainModel('💬 Multimodal Models', 'Models that process multiple input types — text, images, audio — and can generate outputs across modalities.')"><span class="icon">💬</span> Explain</button>
  </div>

  <h2 class="section-title">Generative Models</h2>
  <div class="def-card">
    <span class="category">Generative</span>
    <h3>GAN (Generative Adversarial Network)</h3>
    <p>Two networks compete: a generator creates fake data, and a discriminator tries to detect fakes. Over time, both improve until the generator is indistinguishable from real data.</p>
    <div class="example"><strong>Example:</strong> Creating photorealistic faces that don't exist (StyleGAN).</div>
    <button class="llm-btn" onclick="explainModel('💬 GAN', 'Two networks compete: a generator creates fake data, and a discriminator tries to detect fakes.')"><span class="icon">💬</span> Explain</button>
  </div>
  <div class="def-card">
    <span class="category">Generative</span>
    <h3>VQ-VAE (Vector Quantized VAE)</h3>
    <p>Combines autoencoders with discrete codebooks to learn compressed representations. Used as a foundation for autoregressive generation.</p>
    <div class="example"><strong>Example:</strong> MusicGen (music generation), SoundStream (audio compression)</div>
    <button class="llm-btn" onclick="explainModel('💬 VQ-VAE', 'Combines autoencoders with discrete codebooks to learn compressed representations.')"><span class="icon">💬</span> Explain</button>
  </div>
  <div class="def-card">
    <span class="category">Generative</span>
    <h3>Flow Models</h3>
    <p>Models that learn a reversible transformation between data and noise, enabling exact likelihood computation and fast generation.</p>
    <div class="example"><strong>Examples:</strong> Glow, RealNVP, Rectified Flow</div>
    <button class="llm-btn" onclick="explainModel('💬 Flow Models', 'Models that learn a reversible transformation between data and noise.')"><span class="icon">💬</span> Explain</button>
  </div>

  <h2 class="section-title">Other Architectures</h2>
  <div class="def-card">
    <span class="category">Architecture</span>
    <h3>RNN / LSTM</h3>
    <p>Recurrent networks that process sequences step-by-step, maintaining a hidden state. Largely replaced by transformers but still used in some applications.</p>
    <div class="example"><strong>Use case:</strong> Time series prediction, speech recognition</div>
    <button class="llm-btn" onclick="explainModel('💬 RNN/LSTM', 'Recurrent networks that process sequences step-by-step, maintaining a hidden state.')"><span class="icon">💬</span> Explain</button>
  </div>
  <div class="def-card">
    <span class="category">Architecture</span>
    <h3>Mixture of Experts (MoE)</h3>
    <p>A model with multiple "expert" subnetworks. A routing mechanism selects which experts to use for each input, enabling large models that are computationally efficient at inference.</p>
    <div class="example"><strong>Examples:</strong> Mixtral 8x7B, Google's Switch Transformer, Grok-1</div>
    <button class="llm-btn" onclick="explainModel('💬 Mixture of Experts (MoE)', 'A model with multiple expert subnetworks. A routing mechanism selects which experts to use for each input.')"><span class="icon">💬</span> Explain</button>
  </div>
  <div class="def-card">
    <span class="category">Architecture</span>
    <h3>Retrieval Models</h3>
    <p>Models designed specifically for semantic search — finding the most relevant documents for a query from a large corpus.</p>
    <div class="example"><strong>Examples:</strong> BGE, E5, Cohere embed models</div>
    <button class="llm-btn" onclick="explainModel('💬 Retrieval Models', 'Models designed specifically for semantic search — finding the most relevant documents for a query.')"><span class="icon">💬</span> Explain</button>
  </div>
  <div class="def-card">
    <span class="category">Architecture</span>
    <h3>Small Language Models (SLMs)</h3>
    <p>Compact language models (under 7B parameters) optimized for edge devices and low-latency applications. Getting remarkably capable.</p>
    <div class="example"><strong>Examples:</strong> Phi-3, Gemma 2B, Qwen 1.5B, MicroLlama</div>
    <button class="llm-btn" onclick="explainModel('💬 Small Language Models (SLMs)', 'Compact language models (under 7B parameters) optimized for edge devices and low-latency applications.')"><span class="icon">💬</span> Explain</button>
  </div>

  <h2 class="section-title">Model Comparison</h2>
  <table class="glossary-table">
    <thead>
      <tr><th>Model</th><th>Type</th><th>Best For</th></tr>
    </thead>
    <tbody>
      <tr><td>GPT-4 / GPT-4o</td><td>Decoder LLM</td><td>General-purpose reasoning, coding, multimodal</td></tr>
      <tr><td>Claude 3.5</td><td>Decoder LLM</td><td>Long-context analysis, coding, writing</td></tr>
      <tr><td>Gemini 1.5 Pro</td><td>Decoder LLM</td><td>Massive context windows, multimodal</td></tr>
      <tr><td>Llama 3</td><td>Decoder LLM</td><td>Open-source, self-hosting, fine-tuning</td></tr>
      <tr><td>Mistral Large</td><td>Dense LLM</td><td>High-quality reasoning, multilingual</td></tr>
      <tr><td>Stable Diffusion</td><td>Diffusion</td><td>Image generation, open-source</td></tr>
      <tr><td>CLIP</td><td>Encoder (Vision+Text)</td><td>Image-text matching, embeddings</td></tr>
      <tr><td>BERT</td><td>Encoder</td><td>Text classification, search, NLU</td></tr>
      <tr><td>Whisper</td><td>Encoder-Decoder</td><td>Speech recognition, transcription</td></tr>
      <tr><td>TTS models</td><td>Decoder</td><td>Text-to-speech, voice synthesis</td></tr>
    </tbody>
  </table>

  <h2 class="section-title">AI Assistant</h2>
  <div class="def-card">
    <span class="category">Interactive</span>
    <h3>🤖 Which Model Should I Use?</h3>
    <p>Describe your task, constraints, and goals — the AI will recommend the best model architecture and specific model from the comparison table.</p>
    <div class="llm-mini-chat" id="model-chat">
      <div class="llm-mini-chat-header"><h4>🤖 Model Advisor</h4><button class="llm-close-btn" onclick="this.closest(\'.llm-mini-chat\').classList.remove(\'visible\')">✕</button></div>
      <div class="llm-mini-chat-input-row">
        <input class="llm-mini-chat-input" id="model-input" placeholder="e.g., I need to classify customer reviews by sentiment, processing 10,000 per day..." />
        <button class="llm-mini-chat-send" onclick="askModelAdvisor()">Ask</button>
      </div>
      <div class="llm-mini-chat-output" id="model-output" style="margin-top: 0.8rem;"></div>
    </div>
  </div>

</div>

<footer>AI Cheat Sheet &mdash; A learning reference for artificial intelligence</footer>

<div class="search-results-dropdown" id="searchResultsContainer"></div>

<script src="../lib/modal.js"></script>
<script src="../lib/llm.js"></script>
<script src="../lib/search.js"></script>
<script>Search.init();</script>
<script>
(function(){
  function explainModel(title, definition) {
    LLMModal.open(title);
    var messages = [
      { role: 'system', content: 'You are an AI educator explaining model architectures. Explain how this model type works, what makes it unique, when to use it vs alternatives, and concrete examples. Use analogies and keep it practical.' },
      { role: 'user', content: 'Explain this AI model architecture: ' + title + '. ' + definition + '. Compare it to similar architectures and explain when you would choose this one over alternatives.' }
    ];

    var fullText = '';
    LLM.callAPI(
      messages,
      function(chunk) {
        fullText += chunk;
        LLMModal.update(fullText);
      },
      function() {},
      function(err) {
        LLMModal.error(err);
      }
    );
  }

  function askModelAdvisor() {
    var input = document.getElementById('model-input');
    var output = document.getElementById('model-output');
    var text = input.value.trim();
    if (!text) return;

    output.innerHTML = '<span class="llm-loading">Analyzing...</span>';

    var comparisonTable = `
Model Comparison:
- GPT-4 / GPT-4o: Decoder LLM - General-purpose reasoning, coding, multimodal
- Claude 3.5: Decoder LLM - Long-context analysis, coding, writing
- Gemini 1.5 Pro: Decoder LLM - Massive context windows, multimodal
- Llama 3: Decoder LLM - Open-source, self-hosting, fine-tuning
- Mistral Large: Dense LLM - High-quality reasoning, multilingual
- Stable Diffusion: Diffusion - Image generation, open-source
- CLIP: Encoder (Vision+Text) - Image-text matching, embeddings
- BERT: Encoder - Text classification, search, NLU
- Whisper: Encoder-Decoder - Speech recognition, transcription
- TTS models: Decoder - Text-to-speech, voice synthesis`;

    var messages = [
      { role: 'system', content: 'You are an AI model advisor. Help users choose the right AI model for their task. Consider: 1) The task type (text, image, audio, structured data), 2) Scale/volume requirements, 3) Latency needs, 4) Budget constraints, 5) Open-source vs proprietary, 6) Deployment environment. Recommend specific models from the comparison table and explain why.' },
      { role: 'user', content: 'I need help choosing an AI model for my task: ' + text + '. Here are the available model options:\n' + comparisonTable + '\nWhich model should I use and why?' }
    ];

    LLM.chatWithHistory('model-output', messages).catch(function() {});
  }

  var modelInput = document.getElementById('model-input');
  if (modelInput) {
    modelInput.addEventListener('keydown', function(e) {
      if (e.key === 'Enter') {
        e.preventDefault();
        askModelAdvisor();
      }
    });
  }

  window.explainModel = explainModel;
  window.askModelAdvisor = askModelAdvisor;
})();
</script>

</body>
</html>