Skip to content

LLM

2025個人認為還做不到的事

推特AI 取暖會

ihower

李宏毅

Prompt

使用這些來源:https://www.cna.com.tw/, https://www.dcard.tw

產生和老婆聊天的10組有趣話題

實際動手好用案例

  • 第一個過程中全部透過agent mode的案例
    • Keycloak + LDAP
    • Vaultwaden
  • NotebookLM
    • 了解巴金森氏症

Agentic AI

LangChain/LangGraph

Deepagent

Deepagent CLI

curl -LsSf https://raw.githubusercontent.com/langchain-ai/deepagents/refs/heads/main/libs/cli/scripts/install.sh | bash

There is a OpenAI-Compatible API on http://10.184.28.123:8000 , follow the Configuration - Compatible APIs - Docs by LangChain

the model name can be checked by

curl http://10.184.28.123:8000/v1/models \
    -H "Content-Type: application/json" \
    | python3 -m json.tool

~/.deepagents/config.toml

[models]
recent = "openai:nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16"

[models.providers.openai]
base_url = "http://10.184.28.123:8000/v1"
api_key_env = "EXAMPLE_API_KEY"
models = [
    "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16",
]

EXAMPLE_API_KEY=dummy deepagents

Claude Agent SDK

OpenAI Agent SDK

NemoClaw

export NEMOCLAW_EXPERIMENTAL=1
curl -fsSL https://www.nvidia.com/nemoclaw.sh | bash
NEMOCLAW_EXPERIMENTAL=1 nemoclaw onboard
openshell term
nemoclaw my-assistant connect
openshell gateway destroy --name nemoclaw

/opt/nemoclaw-blueprint/policies/openclaw-sandbox.yaml

# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Default policy for the OpenClaw sandbox.
# Principle: deny by default, allow only what's needed for core functionality.
# Dynamic updates (network_policies, inference) can be applied post-creation
# via `openshell policy set`. Static fields are effectively creation-locked.
#
# Policy tiers (future):
#   default — this file. Minimum for onboard + basic agent operation.
#   relaxed — adds third-party model providers, broader web access.
#
# To add endpoints: update this file and re-run `nemoclaw onboard`
# or apply dynamically via `openshell policy set`.

version: 1

filesystem_policy:
  include_workdir: true
  read_only:
    - /usr
    - /lib
    - /proc
    - /dev/urandom
    - /app
    - /etc
    - /var/log
    - /sandbox/.openclaw            # Immutable gateway config — prevents agent
                                    # from tampering with auth tokens or CORS.
                                    # Writable state (agents, plugins) lives in
                                    # /sandbox/.openclaw-data via symlinks.
                                    # Ref: https://github.com/NVIDIA/NemoClaw/issues/514
  read_write:
    - /sandbox
    - /tmp
    - /dev/null
    - /sandbox/.openclaw-data       # Writable agent/plugin state (symlinked from .openclaw)

landlock:
  compatibility: best_effort

process:
  run_as_user: sandbox
  run_as_group: sandbox

network_policies:
  claude_code:
    name: claude_code
    endpoints:
      - host: api.anthropic.com
        port: 443
        protocol: rest
        enforcement: enforce
        tls: terminate
        rules:
          - allow: { method: "*", path: "/**" }
      - host: statsig.anthropic.com
        port: 443
        rules:
          - allow: { method: "*", path: "/**" }
      - host: sentry.io
        port: 443
        rules:
          - allow: { method: "*", path: "/**" }
    binaries:
      - { path: /usr/local/bin/claude }

  nvidia:
    name: nvidia
    endpoints:
      - host: integrate.api.nvidia.com
        port: 443
        protocol: rest
        enforcement: enforce
        tls: terminate
        rules:
          - allow: { method: "*", path: "/**" }
      - host: inference-api.nvidia.com
        port: 443
        protocol: rest
        enforcement: enforce
        tls: terminate
        rules:
          - allow: { method: "*", path: "/**" }
    binaries:
      - { path: /usr/local/bin/claude }
      - { path: /usr/local/bin/openclaw }

  github:
    name: github
    endpoints:
      - host: github.com
        port: 443
        access: full
      - host: api.github.com
        port: 443
        access: full
    binaries:
      - { path: /usr/bin/gh }
      - { path: /usr/bin/git }

  # ── OpenClaw "phone home" ────────────────────────────────────────────
  # Minimum viable set for OpenClaw to authenticate, discover plugins,
  # and reach ClawHub. Binary-restricted to openclaw only.
  # Docs access is read-only (GET). ClawHub and openclaw.ai are
  # restricted to GET+POST (auth flows, plugin discovery).

  clawhub:
    name: clawhub
    endpoints:
      - host: clawhub.com
        port: 443
        protocol: rest
        enforcement: enforce
        tls: terminate
        rules:
          - allow: { method: GET, path: "/**" }
          - allow: { method: POST, path: "/**" }
    binaries:
      - { path: /usr/local/bin/openclaw }

  openclaw_api:
    name: openclaw_api
    endpoints:
      - host: openclaw.ai
        port: 443
        protocol: rest
        enforcement: enforce
        tls: terminate
        rules:
          - allow: { method: GET, path: "/**" }
          - allow: { method: POST, path: "/**" }
    binaries:
      - { path: /usr/local/bin/openclaw }

  openclaw_docs:
    name: openclaw_docs
    endpoints:
      - host: docs.openclaw.ai
        port: 443
        protocol: rest
        enforcement: enforce
        tls: terminate
        rules:
          - allow: { method: GET, path: "/**" }
    binaries:
      - { path: /usr/local/bin/openclaw }

  # npm registry — needed for `openclaw plugins install` and `npm install`
  npm_registry:
    name: npm_registry
    endpoints:
      - host: registry.npmjs.org
        port: 443
        access: full
    binaries:
      - { path: /usr/local/bin/openclaw }
      - { path: /usr/local/bin/npm }

  # ── Messaging — pre-allowed for OpenClaw agent notifications ────
  # Restricted to node processes to prevent arbitrary data exfiltration
  # via curl, wget, python, etc. (See: #272)
  telegram:
    name: telegram
    endpoints:
      - host: api.telegram.org
        port: 443
        protocol: rest
        enforcement: enforce
        tls: terminate
        rules:
          - allow: { method: GET, path: "/bot*/**" }
          - allow: { method: POST, path: "/bot*/**" }
    binaries:
      - { path: /usr/local/bin/node }

  discord:
    name: discord
    endpoints:
      - host: discord.com
        port: 443
        protocol: rest
        enforcement: enforce
        tls: terminate
        rules:
          - allow: { method: GET, path: "/**" }
          - allow: { method: POST, path: "/**" }
      - host: gateway.discord.gg
        port: 443
        protocol: rest
        enforcement: enforce
        tls: terminate
        rules:
          - allow: { method: GET, path: "/**" }
          - allow: { method: POST, path: "/**" }
      - host: cdn.discordapp.com
        port: 443
        protocol: rest
        enforcement: enforce
        tls: terminate
        rules:
          - allow: { method: GET, path: "/**" }
    binaries:
      - { path: /usr/local/bin/node }

OpenShell

MCP

機器學習

Claude Code

應用

Computer Use

Browser Use

Cursor

  • Cursor Docs
  • Cursor Enterprise – Kickoff Guide
  • Cursor AI Tutorial for Beginners [2025 Edition] - YouTube
  • Best practices for coding with agents · Cursor
    • agent harness
      • Instructions: The system prompt and rules that guide agent behavior
      • Tools: File editing, codebase search, terminal execution, and more
      • User messages: Your prompts and follow-ups that direct the work
    • planning before coding
      • Using Plan Mode
        • Research your codebase to find relevant files
        • Ask clarifying questions about your requirements
        • Create a detailed implementation plan with file paths and code references
        • Wait for your approval before building
      • Not every task needs a detailed plan. For quick changes or tasks you've done many times before, jumping straight to the agent is fine.
    • Let the agent find context
      • You don't need to manually tag every file in your prompt.
      • Keep it simple: if you know the exact file, tag it. If not, the agent will find it.
    • When to start a new conversation
      • You're moving to a different task or feature
      • The agent seems confused or keeps making the same mistakes
      • You've finished one logical unit of work
      • Long conversations can cause the agent to lose focus.
    • Reference past work
    • Cursor provides two main ways to customize agent behavior
      • Rules for static context that applies to every conversation
        • Think of them as always-on context that the agent sees at the start of every conversation.
        • Create rules as markdown files in .cursor/rules/:
        • Start simple. Add rules only when you notice the agent making the same mistake repeatedly. Don't over-optimize before you understand your patterns.
      • Skills for dynamic capabilities the agent can use when relevant
        • extend what agents can do.
        • Skills package domain-specific knowledge, workflows, and scripts that agents can invoke when relevant.
        • Skills are defined in SKILL.md files
          • Custom commands: Reusable workflows triggered with / in the agent input
          • Hooks: Scripts that run before or after agent actions
          • Domain knowledge: Instructions for specific tasks the agent can pull in on demand
        • Unlike Rules which are always included, Skills are loaded dynamically when the agent decides they're relevant. This keeps your context window clean while giving the agent access to specialized capabilities.
        • Beyond coding, you can connect the agent to other tools you use daily. MCP (Model Context Protocol) lets the agent read Slack messages, investigate Datadog logs, debug errors from Sentry, query databases, and more.
    • The agent can process images directly from your prompts.
    • Common agent patterns
      • Test-driven development
        • The agent can write code, run tests, and iterate automatically
      • Codebase understanding
        • When onboarding to a new codebase, use the agent for learning and exploration. Ask the same questions you would ask a teammate
      • Git workflows
    • Reviewing code
    • Running agents in parallel
      • We've found that having multiple models attempt the same problem and picking the best result significantly improves the final output, especially for harder tasks.
      • Cursor automatically creates and manages git worktrees for parallel agents. Each agent runs in its own worktree with isolated files and changes, so agents can edit, build, and test code without stepping on each other.
      • A powerful pattern is running the same prompt across multiple models simultaneously.
    • Delegating to cloud agents
    • Debug Mode for tricky bugs
      • Instead of guessing at fixes, Debug Mode:
        • Generates multiple hypotheses about what could be wrong
        • Instruments your code with logging statements
        • Asks you to reproduce the bug while collecting runtime data
        • Analyzes actual behavior to pinpoint the root cause
        • Makes targeted fixes based on evidence
      • This works best for:
        • Bugs you can reproduce but can't figure out
        • Race conditions and timing issues
        • Performance problems and memory leaks
        • Regressions where something used to work
    • effective way
      • write specific prompts
        • :warning: "add tests for auth.ts"
        • :+1: "Write a test case for auth.ts covering the logout edge case, using the patterns in tests/ and avoiding mocks."
      • Start simple
      • review carefully
      • provide verifiable goals
      • treat agents as capable collaborators
  • Reviewing Code with Cursor | Cursor Docs

agent cmd-K tab

cursor rule cursor command MCP

Gemini

Inference engine

vLLM

  • nemotron-3-nano-30b-a3b Model by NVIDIA | NVIDIA NIM

    vllm serve --model nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16 \
      --max-num-seqs 8 \
      --tensor-parallel-size 1 \
      --max-model-len 262144 \
      --port 8000 \
      --trust-remote-code \
      --tool-call-parser qwen3_coder \
      --reasoning-parser-plugin nano_v3_reasoning_parser.py \
      --reasoning-parser nano_v3
    

  • https://github.com/NVIDIA/NemoClaw/issues/315#issuecomment-4090919603

    vllm serve --model nvidia/NVIDIA-Nemotron-3-Nano-4B-FP8 \
      --enable-auto-tool-choice \
      --tool-call-parser qwen3_coder \
      --reasoning-parser nemotron_v3 \
      --max-model-len 32768 \
      --max-num-seqs 1 \
      --trust-remote-code \
      --gpu-memory-utilization 0.85 \
      --kv-cache-dtype fp8 \
      --host 0.0.0.0
    

  • Using Docker - vLLM

  • Installing the NVIDIA Container Toolkit — NVIDIA Container Toolkit

nvidia/NVIDIA-Nemotron-3-Nano-4B-FP8

docker run -d --runtime nvidia --gpus all \
  -v ~/.cache/huggingface:/root/.cache/huggingface \
  -p 8000:8000 \
  --ipc=host \
  vllm/vllm-openai:latest \
  --model nvidia/NVIDIA-Nemotron-3-Nano-4B-FP8 \
  --enable-auto-tool-choice \
  --tool-call-parser qwen3_coder \
  --reasoning-parser nemotron_v3 \
  --max-model-len 32768 \
  --max-num-seqs 1 \
  --trust-remote-code \
  --gpu-memory-utilization 0.85 \
  --kv-cache-dtype fp8 \
  --host 0.0.0.0

nvidia/NVIDIA-Nemotron-3-Nano-4B-BF16

docker run -d --runtime nvidia --gpus all \
  -v ~/.cache/huggingface:/root/.cache/huggingface \
  -p 8000:8000 \
  --ipc=host \
  vllm/vllm-openai:latest \
  --model nvidia/NVIDIA-Nemotron-3-Nano-4B-BF16 \
  --enable-auto-tool-choice \
  --tool-call-parser qwen3_coder \
  --reasoning-parser nemotron_v3 \
  --max-model-len 32768 \
  --max-num-seqs 1 \
  --trust-remote-code \
  --gpu-memory-utilization 0.85 \
  --kv-cache-dtype fp8 \
  --host 0.0.0.0

nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16

docker run -d --runtime nvidia --gpus all \
  -v ~/.cache/huggingface:/root/.cache/huggingface \
  -p 8000:8000 \
  --ipc=host \
  vllm/vllm-openai:latest \
  --model nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16 \
  --max-num-seqs 8 \
  --tensor-parallel-size 1 \
  --max-model-len 262144 \
  --port 8000 \
  --trust-remote-code \
  --tool-call-parser qwen3_coder \
  --reasoning-parser nemotron_v3 \
  --enable-auto-tool-choice \
  --host 0.0.0.0

validation

curl http://localhost:8000/v1/models \
    -H "Content-Type: application/json"
{
    "object": "list",
    "data": [
        {
            "id": "nvidia/NVIDIA-Nemotron-3-Nano-4B-FP8",
            "object": "model",
            "created": 1774539878,
            "owned_by": "vllm",
            "root": "nvidia/NVIDIA-Nemotron-3-Nano-4B-FP8",
            "parent": null,
            "max_model_len": 32768,
            "permission": [
                {
                    "id": "modelperm-bd89d55791b3a278",
                    "object": "model_permission",
                    "created": 1774539878,
                    "allow_create_engine": false,
                    "allow_sampling": true,
                    "allow_logprobs": true,
                    "allow_search_indices": false,
                    "allow_view": true,
                    "allow_fine_tuning": false,
                    "organization": "*",
                    "group": null,
                    "is_blocking": false
                }
            ]
        }
    ]
}
curl http://localhost:8000/v1/chat/completions \
    -H "Content-Type: application/json" \
    -d '{
        "model": "nvidia/NVIDIA-Nemotron-3-Nano-4B-FP8",
        "messages":[{"role": "user", "content": "Write a haiku about GPUs"}],
        "chat_template_kwargs": {"enable_thinking": true}
    }' | python3 -m json.tool

curl http://localhost:8000/v1/chat/completions \
    -H "Content-Type: application/json" \
    -d '{
        "model": "nvidia/NVIDIA-Nemotron-3-Nano-4B-FP8",
        "messages":[{"role": "user", "content": "expalin the message from vllm log: Engine 000: Avg prompt throughput: 2.3 tokens/s, Avg generation throughput: 49.4 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 0.0%"}],
        "chat_template_kwargs": {"enable_thinking": true}
    }' | python3 -m json.tool
  • Open WebUI - vLLM
    docker run -d \
        --name open-webui \
        -p 3000:8080 \
        -v open-webui:/app/backend/data \
        -e OPENAI_API_BASE_URL=http://0.0.0.0:8000/v1 \
        --restart always \
        ghcr.io/open-webui/open-webui:main
    
services:
  vllm:
    image: vllm/vllm-openai:latest
    container_name: vllm
    restart: unless-stopped
    runtime: nvidia
    ipc: host
    ports:
      - "8000:8000"
    volumes:
      - ~/.cache/huggingface:/root/.cache/huggingface
    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              count: all
              capabilities: [gpu]
    command: >
      --model nvidia/NVIDIA-Nemotron-3-Nano-4B-FP8
      --enable-auto-tool-choice
      --tool-call-parser qwen3_coder
      --reasoning-parser nemotron_v3
      --max-model-len 32768
      --max-num-seqs 1
      --trust-remote-code
      --gpu-memory-utilization 0.85
      --kv-cache-dtype fp8
      --host 0.0.0.0

  open-webui:
    image: ghcr.io/open-webui/open-webui:main
    container_name: open-webui
    restart: always
    ports:
      - "3000:8080"
    volumes:
      - open-webui-data:/app/backend/data
    environment:
      - OPENAI_API_BASE_URL=http://vllm:8000/v1
    depends_on:
      - vllm

volumes:
  open-webui-data: