add doc

2026-01-12 17:58:20 +08:00 · 2026-01-12 17:58:20 +08:00 · e5b5d32692
parent 235af5a4de
commit e5b5d32692
15 changed files with 1282 additions and 6802 deletions
--- a/.obsidian/community-plugins.json
+++ b/.obsidian/community-plugins.json
@ -1,4 +1 @@
-[
+[]
  "copilot",
  "obsidian-textgenerator-plugin"
 ]
--- a/.obsidian/plugins/copilot/data.json
+++ b/.obsidian/plugins/copilot/data.json
@ -1,232 +0,0 @@
 {
  "isPlusUser": false,
  "plusLicenseKey": "",
  "openAIApiKey": "",
  "openAIOrgId": "",
  "huggingfaceApiKey": "",
  "cohereApiKey": "",
  "anthropicApiKey": "",
  "azureOpenAIApiKey": "",
  "azureOpenAIApiInstanceName": "",
  "azureOpenAIApiDeploymentName": "",
  "azureOpenAIApiVersion": "",
  "azureOpenAIApiEmbeddingDeploymentName": "",
  "googleApiKey": "",
  "openRouterAiApiKey": "",
  "defaultChainType": "llm_chain",
  "defaultModelKey": "gpt-4o|openai",
  "embeddingModelKey": "text-embedding-3-small|openai",
  "temperature": 0.1,
  "maxTokens": 1000,
  "contextTurns": 15,
  "userSystemPrompt": "",
  "openAIProxyBaseUrl": "",
  "openAIEmbeddingProxyBaseUrl": "",
  "stream": true,
  "defaultSaveFolder": "copilot-conversations",
  "defaultConversationTag": "copilot-conversation",
  "autosaveChat": false,
  "defaultOpenArea": "view",
  "customPromptsFolder": "copilot-custom-prompts",
  "indexVaultToVectorStore": "ON MODE SWITCH",
  "qaExclusions": "",
  "qaInclusions": "",
  "chatNoteContextPath": "",
  "chatNoteContextTags": [],
  "enableIndexSync": true,
  "debug": false,
  "enableEncryption": false,
  "maxSourceChunks": 3,
  "groqApiKey": "",
  "mistralApiKey": "",
  "activeModels": [
    {
      "name": "copilot-plus-flash",
      "provider": "copilot-plus",
      "enabled": false,
      "isBuiltIn": true,
      "core": true,
      "capabilities": [
        "vision"
      ]
    },
    {
      "name": "gpt-4o",
      "provider": "openai",
      "enabled": false,
      "isBuiltIn": true,
      "core": true,
      "capabilities": [
        "vision"
      ]
    },
    {
      "name": "gpt-4o-mini",
      "provider": "openai",
      "enabled": false,
      "isBuiltIn": true,
      "core": true,
      "capabilities": [
        "vision"
      ]
    },
    {
      "name": "claude-3-5-sonnet-latest",
      "provider": "anthropic",
      "enabled": false,
      "isBuiltIn": true,
      "core": true,
      "capabilities": [
        "vision"
      ]
    },
    {
      "name": "o1-mini",
      "provider": "openai",
      "enabled": false,
      "isBuiltIn": true,
      "capabilities": [
        "reasoning"
      ]
    },
    {
      "name": "o3-mini",
      "provider": "openai",
      "enabled": false,
      "isBuiltIn": true,
      "capabilities": [
        "reasoning"
      ]
    },
    {
      "name": "claude-3-5-haiku-latest",
      "provider": "anthropic",
      "enabled": false,
      "isBuiltIn": true
    },
    {
      "name": "command-r",
      "provider": "cohereai",
      "enabled": false,
      "isBuiltIn": true
    },
    {
      "name": "command-r-plus",
      "provider": "cohereai",
      "enabled": false,
      "isBuiltIn": true
    },
    {
      "name": "gemini-2.0-pro-exp",
      "provider": "google",
      "enabled": false,
      "isBuiltIn": true,
      "capabilities": [
        "vision"
      ]
    },
    {
      "name": "gemini-2.0-flash",
      "provider": "google",
      "enabled": false,
      "isBuiltIn": true,
      "capabilities": [
        "vision"
      ]
    },
    {
      "name": "azure-openai",
      "provider": "azure openai",
      "enabled": false,
      "isBuiltIn": true
    },
    {
      "name": "deepseek-reasoner",
      "provider": "3rd party (openai-format)",
      "enabled": true,
      "isBuiltIn": false,
      "baseUrl": "https://api.deepseek.com/v1",
      "apiKey": "sk-8603b08e1125422ca6238c8b4a1a40d8",
      "isEmbeddingModel": false,
      "capabilities": [
        "reasoning"
      ],
      "stream": true,
      "displayName": "deepseek-r1"
    }
  ],
  "activeEmbeddingModels": [
    {
      "name": "copilot-plus-small",
      "provider": "copilot-plus",
      "enabled": true,
      "isBuiltIn": true,
      "isEmbeddingModel": true,
      "core": true
    },
    {
      "name": "copilot-plus-large",
      "provider": "copilot-plus-jina",
      "enabled": true,
      "isBuiltIn": true,
      "isEmbeddingModel": true,
      "core": true,
      "believerExclusive": true,
      "dimensions": 1024
    },
    {
      "name": "copilot-plus-multilingual",
      "provider": "copilot-plus-jina",
      "enabled": true,
      "isBuiltIn": true,
      "isEmbeddingModel": true,
      "core": true,
      "dimensions": 512
    },
    {
      "name": "text-embedding-3-small",
      "provider": "openai",
      "enabled": true,
      "isBuiltIn": true,
      "isEmbeddingModel": true,
      "core": true
    },
    {
      "name": "text-embedding-3-large",
      "provider": "openai",
      "enabled": true,
      "isBuiltIn": true,
      "isEmbeddingModel": true
    },
    {
      "name": "embed-multilingual-light-v3.0",
      "provider": "cohereai",
      "enabled": true,
      "isBuiltIn": true,
      "isEmbeddingModel": true
    },
    {
      "name": "text-embedding-004",
      "provider": "google",
      "enabled": true,
      "isBuiltIn": true,
      "isEmbeddingModel": true
    },
    {
      "name": "azure-openai",
      "provider": "azure openai",
      "enabled": true,
      "isBuiltIn": true,
      "isEmbeddingModel": true
    }
  ],
  "embeddingRequestsPerMin": 90,
  "embeddingBatchSize": 16,
  "disableIndexOnMobile": true,
  "showSuggestedPrompts": true,
  "showRelevantNotes": true,
  "numPartitions": 1,
  "enabledCommands": {},
  "promptUsageTimestamps": {},
  "defaultConversationNoteName": "{$topic}@{$date}_{$time}"
 }
--- a/.obsidian/plugins/copilot/main.js
+++ b/.obsidian/plugins/copilot/main.js
--- a/.obsidian/plugins/copilot/manifest.json
+++ b/.obsidian/plugins/copilot/manifest.json
@ -1,13 +0,0 @@
 {
  "id": "copilot",
  "name": "Copilot",
  "version": "2.8.6",
  "minAppVersion": "0.15.0",
  "description": "An AI Copilot in Obsidian.",
  "author": "Logan Yang",
  "authorUrl": "https://twitter.com/logancyang",
  "fundingUrl": {
    "Buy Me a Coffee": "https://www.buymeacoffee.com/logancyang",
    "GitHub Sponsor": "https://github.com/sponsors/logancyang"
  }
 }
--- a/.obsidian/plugins/copilot/styles.css
+++ b/.obsidian/plugins/copilot/styles.css
--- a/.obsidian/plugins/obsidian-textgenerator-plugin/data.json
+++ b/.obsidian/plugins/obsidian-textgenerator-plugin/data.json
@ -1,119 +0,0 @@
 {
  "version": "0.7.47",
  "endpoint": "https://api.openai.com/v1",
  "models": [],
  "api_key": "",
  "encrypt_keys": false,
  "selectedProvider": "Default (Custom)",
  "max_tokens": 500,
  "temperature": 0.7,
  "frequency_penalty": 0.5,
  "showStatusBar": true,
  "outputToBlockQuote": false,
  "freeCursorOnStreaming": false,
  "allowJavascriptRun": false,
  "experiment": false,
  "promptsPath": "textgenerator/templates",
  "textGenPath": "textgenerator/",
  "prefix": "\n\n",
  "tgSelectionLimiter": "^\\*\\*\\*",
  "stream": true,
  "context": {
    "customInstructEnabled": true,
    "includeClipboard": true,
    "customInstruct": "Title: {{title}}\n  \nStarred Blocks: {{starredBlocks}}\n\t  \n{{tg_selection}}",
    "contextTemplate": "Title: {{title}}\n\t\nStarred Blocks: {{starredBlocks}}\n\t  \n{{tg_selection}}"
  },
  "requestTimeout": 300000,
  "options": {
    "generate-text": true,
    "generate-text-with-metadata": true,
    "insert-generated-text-From-template": true,
    "create-generated-text-From-template": false,
    "search-results-batch-generate-from-template": true,
    "insert-text-From-template": false,
    "create-text-From-template": false,
    "show-modal-From-template": true,
    "open-template-as-tool": true,
    "open-playground": true,
    "set_max_tokens": true,
    "set-llm": true,
    "set-model": true,
    "packageManager": true,
    "create-template": false,
    "get-title": true,
    "generated-text-to-clipboard-From-template": false,
    "calculate-tokens": true,
    "calculate-tokens-for-template": true,
    "text-extractor-tool": true,
    "stop-stream": true,
    "custom-instruct": true,
    "generate-in-right-click-menu": false,
    "batch-generate-in-right-click-files-menu": true,
    "tg-block-processor": true,
    "reload": true,
    "disable-ribbon-icons": false
  },
  "advancedOptions": {
    "generateTitleInstructEnabled": false,
    "generateTitleInstruct": "Generate a title for the current document (do not use * \" \\ / < > : | ? .):\n{{substring content 0 255}}",
    "includeAttachmentsInRequest": false
  },
  "autoSuggestOptions": {
    "customInstructEnabled": true,
    "customInstruct": "Continue the follwing text:\nTitle: {{title}}\n{{query}}",
    "systemPrompt": "",
    "isEnabled": false,
    "allowInNewLine": false,
    "delay": 300,
    "numberOfSuggestions": 5,
    "triggerPhrase": "  ",
    "stop": ".",
    "showStatus": true,
    "customProvider": false,
    "inlineSuggestions": false,
    "overrideTrigger": " "
  },
  "slashSuggestOptions": {
    "isEnabled": false,
    "triggerPhrase": "/"
  },
  "extractorsOptions": {
    "PDFExtractor": true,
    "WebPageExtractor": true,
    "YoutubeExtractor": true,
    "AudioExtractor": false,
    "ImageExtractorEmbded": true,
    "ImageExtractor": true
  },
  "displayErrorInEditor": false,
  "LLMProviderProfiles": {},
  "LLMProviderOptions": {
    "whisper": {
      "base_path": "https://api.openai.com/v1",
      "model": "whisper-1",
      "api_key": "",
      "api_version": ""
    },
    "OpenAI Chat (Langchain)": {
      "basePath": "https://api.openai.com/v1"
    },
    "Default (Custom)": {
      "endpoint": "https://api.deepseek.com/v1/chat/completions",
      "custom_header": "{\n    \"Content-Type\": \"application/json\",\n    authorization: \"Bearer {{api_key}}\"\n}",
      "custom_body": "{\n    model: \"{{model}}\",\n    temperature: {{temperature}},\n    top_p: {{top_p}},\n    frequency_penalty: {{frequency_penalty}},\n    presence_penalty: {{presence_penalty}},\n    max_tokens: {{max_tokens}},\n    n: {{n}},\n    stream: {{stream}},\n    stop: \"{{stop}}\",\n    messages: {{stringify messages}}\n}",
      "model": "deepseek-reasoner",
      "sanatization_streaming": "// catch error\nif (res.status >= 300) {\n  const err = data?.error?.message || JSON.stringify(data);\n  throw err;\n}\nlet resultTexts = [];\nconst lines = this.chunk.split(\"\\ndata: \");\n\nconst parsedLines = lines\n    .map((line) => line.replace(/^data: /, \"\").trim()) // Remove the \"data: \" prefix\n    .filter((line) => line !== \"\" && line !== \"[DONE]\") // Remove empty lines and \"[DONE]\"\n    .map((line) => {\n        try {\n            return JSON.parse(line)\n        } catch { }\n    }) // Parse the JSON string\n    .filter(Boolean);\n\nfor (const parsedLine of parsedLines) {\n    const { choices } = parsedLine;\n    const { delta } = choices[0];\n    const { content } = delta;\n    // Update the UI with the new content\n    if (content) {\n        resultTexts.push(content);\n    }\n}\nreturn resultTexts.join(\"\");",
      "sanatization_response": "// catch error\nif (res.status >= 300) {\n  const err = data?.error?.message || JSON.stringify(data);\n  throw err;\n}\n\n// get choices\nconst choices = (data.choices || data).map(c=> c.message);\n\n// the return object should be in the format of \n// { content: string }[] \n// if there's only one response, put it in the array of choices.\nreturn choices;",
      "frequency_penalty": 0,
      "presence_penalty": 0.5,
      "top_p": 1,
      "api_key": ""
    }
  },
  "LLMProviderOptionsKeysHashed": {
    "whisper.api_key": "__@#key_prefix#@__",
    "Default (Custom).api_key": "__@#key_prefix#@__sk-8603b08e1125422ca6238c8b4a1a40d8"
  },
  "api_key_encrypted": "__@#key_prefix#@__"
 }
--- a/.obsidian/plugins/obsidian-textgenerator-plugin/main.js
+++ b/.obsidian/plugins/obsidian-textgenerator-plugin/main.js
--- a/.obsidian/plugins/obsidian-textgenerator-plugin/manifest.json
+++ b/.obsidian/plugins/obsidian-textgenerator-plugin/manifest.json
@ -1,11 +0,0 @@
 {
 	"id": "obsidian-textgenerator-plugin",
 	"name": "Text Generator",
 	"version": "0.7.47",
 	"minAppVersion": "1.6.0",
 	"description": "Text generation using AI",
 	"author": "Noureddine Haouari",
 	"authorUrl": "https://text-gen.com",
 	"isDesktopOnly": false,
 	"fundingUrl": "https://www.buymeacoffee.com/haouarine"
 }
--- a/.obsidian/plugins/obsidian-textgenerator-plugin/styles.css
+++ b/.obsidian/plugins/obsidian-textgenerator-plugin/styles.css
--- a/.obsidian/workspace.json
+++ b/.obsidian/workspace.json
@ -27,12 +27,12 @@
            "state": {
              "type": "markdown",
              "state": {
-                "file": "2025年ai总结/Claude Code.md",
+                "file": "agent选型/E2B.md",
                "mode": "source",
                "source": false
              },
              "icon": "lucide-file",
-              "title": "Claude Code"
+              "title": "E2B"
            }
          }
        ],
@ -157,23 +157,13 @@
            "state": {
              "type": "outline",
              "state": {
-                "file": "2025年ai总结/Claude Code.md",
+                "file": "agent选型/E2B.md",
                "followCursor": false,
                "showSearch": false,
                "searchQuery": ""
              },
              "icon": "lucide-list",
-              "title": "Claude Code 的大纲"
+              "title": "E2B 的大纲"
            }
          },
          {
            "id": "49cfe0cea50077fb",
            "type": "leaf",
            "state": {
              "type": "copilot-chat-view",
              "state": {},
              "icon": "message-square",
              "title": "Copilot"
            }
          }
        ],
@ -197,12 +187,18 @@
      "obsidian-textgenerator-plugin:Text Generator: Templates Packages Manager": false
    }
  },
-  "active": "e7fe9ff755957e00",
+  "active": "ade553f008ab140d",
  "lastOpenFiles": [
    "agent选型/MiroFlow.md",
    "agent选型/E2B.md",
    "2025年ai总结/Anthropic.md",
    "agent选型",
    "2025年ai总结/Claude 时间线.md",
    "2025年ai总结/Google Gemini.md",
    "2025年ai总结/OpenAI.md",
    "2025年ai总结/Claude Code.md",
    "2026年ai总结/1月.md",
    "npm.md",
    "2025年ai总结/Claude Code.md",
    "2025年ai总结/Claude 时间线.md",
    "2025年ai总结/10月.md",
    "2025年ai总结/11月.md",
    "2025年ai总结/12月.md",
@ -222,11 +218,6 @@
    "Deepseek的幻觉.md",
    "textgenerator/templates/default/getParagraph.md",
    "textgenerator/templates/default/rewrite.md",
    "textgenerator/templates/default/summarizeLarge.md",
    "textgenerator/templates/default/getIdeas.md",
    "textgenerator/templates/default/getEmailNeg.md",
    "textgenerator/templates/default/getEmailPos.md",
    "textgenerator/templates/default/summarize.md",
    "textgenerator/templates/default",
    "textgenerator/templates",
    "textgenerator"
--- a/2025年ai总结/Anthropic.md
+++ b/2025年ai总结/Anthropic.md
@ -0,0 +1,313 @@
 下面是基于前面已经收集到的信息，对 **Anthropic 在 2025 年整体发展的系统性总结**（截至 2026 年初为止能观察到的结果）。
 ---
 # Anthropic 2025 年发展总结（业务 + 技术 + 生态）
 ## 一、融资与估值：从「独角兽」到顶级 AI 巨头
 2025 年是 Anthropic 资本与估值跃迁的一年：
 1. **3 月 3 日：Series E 融资**
    - 融资金额：约 **$3.5B** Series E 轮，由 Lightspeed 领投。
    - 投后估值：约 **$61.5B**。[1]
    - 意义：在 OpenAI、Google 等巨头挤压下，仍能以数百亿美元估值大规模融资，证明资本市场对其技术路线（安全为先 + Frontier 模型）的高度认可。
 2. **9 月 2 日：Series F 融资**
    - 融资金额：约 **$13B** Series F 轮，由 ICONIQ 领投，多家机构参与。[2]
    - 投后估值：约 **$183B**，仅 6 个月从 $61.5B 翻到 $183B。
    - 官方表述：用于扩充算力、加速模型研发、安全研究与国际化扩张。[2]
 3. **估值与未来融资预期**
    - 2025 年末，多家媒体披露：Anthropic 计划在 2026 年再融约 **$10B**，目标估值约 **$350B**，几乎再翻一倍。[3]
    - 这意味着：Anthropic 已从“独角兽”演变为与 OpenAI 并列的全球最具估值与战略地位的 AI 公司之一。
 **结论：**  
 2025 年，Anthropic 完成了从数十亿到近两百亿美元融资、估值三倍提升的飞跃，正式跻身全球顶级 AI 基础设施与平台级公司行列。
 ---
 ## 二、营收与商业化：从早期收入到数十亿美元级 ARR
 综合多家报道可见 Anthropic 2025 年营收呈现「爆发式」增长：
 - **2025 年初：** 年化收入（run-rate）约 **$1B** 左右。[4]
 - **Q1 2025：** 年化收入约 **$2B**。[5]
 - **2025 年 5 月：** 年化收入约 **$3B**。[5]
 - **2025 年 8 月：** 官方在融资新闻中称 run‑rate 已超过 **$5B**。[2][6]
 - **2025 年 10 月：** 路透报道 run‑rate 接近 **$7B**。[7]
 - 多家分析机构与 Tech 媒体称：
    - 公司内部目标：**2025 年底 ARR 约 $9B**。
    - 2028 年收入目标：约 **$70B**，并在 2027–2028 年间转为显著正现金流。[8]
 **商业模式结构主要包括：**
 - Claude API（按 token 计费）
 - Claude 面向企业版（Team / Enterprise / 专业行业方案）
 - Claude 通过 AWS Bedrock、Google Cloud Vertex AI、Microsoft Azure、Snowflake 等云与数据平台的分销分成
 - Claude Code 面向开发者与企业研发团队的增值服务
 **结论：**  
 2025 年，Anthropic 从“数亿美元初创公司”跃升为**数十亿美元收入、增长率极高的企业级 AI 平台**，成为最具商业牵引力的 AI 公司之一。
 ---
 ## 三、模型与产品：Claude 迈入 4 代 & 强化「代码 + Agent」定位
 ### 1. 关键时间线总览
 |时间|里程碑|要点|
 |---|---|---|
 |2025‑02‑24|**Claude 3.7 Sonnet + Claude Code 预览**|首个混合推理模型 + 终端版 Claude Code 预览。[9]|
 |2025‑05‑22|**Claude 4（Opus 4、Sonnet 4）**|宣称“世界最强编码模型”，全面支持 Agent 工作流。[10]|
 |2025‑08‑05|**Claude Opus 4.1**|专注 Agent 任务、实战编码与长时推理。[11]|
 |2025‑10‑20|**Claude for Life Sciences**|生物医药/科研专用模型与工具集。[12]|
 |2025‑11‑01（API 标识推断）|**Claude Opus 4.5**|主打「世界最强编码 / Agent / 电脑操作」。[13]|
 |2025‑全年|**Claude Code**|从 2 月预览到 3 月–5 月全面推广，迅速成为高频开发工具。[6][9]|
 ### 2. Claude 3.7 Sonnet：混合推理与 Claude Code 雏形
 - 被定位为**首个“混合推理（Hybrid Reasoning）”前沿模型**，一个模型内同时支持：
    - 普通快速回复模式
    - 可见的长推理模式（用户可设置 thinking token 上限）[9]
 - 定价继承 Sonnet 系列：**$3 / $15**（输入/输出 百万 token）。
 - 同时推出 **Claude Code** 的研究预览版：
    - 从终端控制：读/改代码、写测试、运行命令、提交 PR。[9]
    - 走的是典型「Agentic Coding」路线，为后续 GA 与 4.x Agent 能力奠定基础。
 ### 3. Claude 4：从聊天模型到「通用 AI 开发与 Agent 基础设施」
 - **Opus 4 & Sonnet 4** 同日发布。[10][14]
 - 关键特征：
    - 在 SWE‑bench Verified 上达到 72%+ 的 pass@1，位居同类模型前列。[10]
    - 混合模式：秒级回答 + Extended thinking，可在推理过程中并行调用工具（如 Web 搜索、文件、代码执行）。[10]
    - 引入新 API 能力：代码执行工具、MCP 连接器、Files API、Prompt 缓存等，明显面向「构建 Agent/应用平台」设计。
 - Claude Code 进入 GA：
    - 支持 VS Code / JetBrains 插件、GitHub 工作流，成为“工程团队的 Agent 层”。[10]
 ### 4. Opus 4.1 与 Opus 4.5：彻底占领「编码与 Agent」高地
 - **Opus 4.1（2025‑08‑05）**：
    - 强调在真实世界编码任务、Agent 流程中的稳定性与鲁棒性提升。[11]
 - **Opus 4.5（2025 年 11 月上线）**：[13]
    - 官方定位：**“世界上最强的编码、Agent 和电脑使用模型”**。
    - 指标：
        - SWE‑bench Verified 上达到新的 SOTA，较 Sonnet 4.5 有明显提升。
        - 在长周期编码任务上可减少 50–65% 的 tokens 使用，却获得更高通过率。
        - 在 Terminal Bench、深度研究任务上也有 15% 左右性能增益。
    - 功能：
        - 强化对多子 Agent 的编排与长时任务管理能力。
        - 结合 Claude Code 的“Plan Mode”等功能，使其不仅写代码，而且会**规划、执行和自我修正**。
 ### 5. 行业化产品：Life Sciences & Healthcare
 1. **Claude for Life Sciences（2025‑10‑20）**[12]
    - 提供：
        - 针对实验协议、药物研发、文献综述、单细胞 RNA‑seq 分析等任务的专用能力。
        - 连接 Benchling、BioRender、PubMed、Wiley、Synapse、10x Genomics 等科研与数据平台。
    - 与 Sanofi、Broad Institute、10x Genomics、Genmab 等合作，支持真实研发流程。
 2. **Claude for Healthcare（落地节奏：2025 Q1–2026 Q1）**
    - 2025 年已有与 HealthEx 等合作的消息，用于连接患者医疗记录，简化医学报告解释与保险流程。[15]
    - 正式产品在 2026 年初集中发布（超出本题时间范围），但 2025 全年已为此铺路。
 **结论：**  
 2025 年，Anthropic 完成了从「强对话模型」到 **「企业级 Agent 平台 + 顶级编码模型 + 行业专用模型」**的全面升级。Claude 4 及其衍生家族（3.7、4.1、4.5）让其在「开发者生产力」和「Agentic AI」这两个最核心赛道上，成为明确的技术与口碑领跑者。
 ---
 ## 四、生态与合作：与云、咨询、数据平台深度绑定
 2025 年，Anthropic 的一个显著特征是**“把 Claude 深度嵌入主流企业基础设施与服务商”**。
 ### 1. 与三大云 + Snowflake：算力与分发双向绑定
 - **Google Cloud：**  
    扩大 TPU 使用，Anthropic 计划使用 **多达 100 万颗 TPU** 来训练和服务 Claude 模型，金额以「数十亿美元」计。[16]
 - **AWS（Amazon Bedrock）：**  
    Claude 系列作为 Bedrock 上的主力 LLM，被众多企业通过 Bedrock 使用；Opus 4.5 上线即同步登陆 Bedrock。[17]
 - **Microsoft Azure + NVIDIA（2025‑11‑18）**[18]：
    - Anthropic 承诺在 Azure 上采购**最高约 $30B 级别算力**。
    - NVIDIA 计划对 Anthropic 投资最高约 **$10B**，微软投资最高约 **$5B**。[18]
    - Claude 成为微软 Foundry 与 Copilot 生态中的关键第三方 Frontier 模型。
 - **Snowflake（2025‑12，$200M 多年协议）**[19]：
    - Claude Sonnet 4.5 驱动 Snowflake Intelligence，用于自然语言访问结构化+非结构化数据。
    - Opus 4.5 作为多模态 SQL/数据分析核心，引入到 Snowflake Cortex AI Functions。
    - 目标：为 12,600+ Snowflake 客户提供面向生产的 AI Agent 能力，尤其在金融、医疗等强监管行业。
 ### 2. 与咨询 / SI：建立「Claude 企业落地体系」
 - **Accenture（2025‑12）**[20]
    - 组建专门的 **Accenture–Anthropic Business Group**。
    - 计划培训约 **30,000 名 Accenture 员工**使用 Claude & Claude Code。
    - 在金融、生命科学、医疗、公共部门构建垂直解决方案，帮助客户从 Pilot 走向大规模生产化。
 - **IBM（2025‑10‑07）**[21]
    - 把 Claude 集成进 IBM 的 AI‑first IDE 和企业软件开发工具链。
    - 6,000+ IBM 内部工程师试用，平均生产力提升约 **45%**。
    - 共同发布「安全企业 AI Agent 架构」指南，围绕 MCP（Model Context Protocol）构建安全 Agent 生命周期（ADLC）。
 - 此外，还与 **Salesforce、Cognizant、LSEG、各类 Life Sciences 合作伙伴** 深度合作，把 Claude 嵌入 CRM、咨询、金融数据与生物医药研发工作流中。[22][23]
 **结论：**  
 2025 年，Anthropic 的战略重心之一，是把 Claude 打造成 **“企业数字基础设施的一部分”**，通过云、数据平台与咨询体系，形成极强的分发与落地能力。
 ---
 ## 五、基础设施与全球扩张：从美国到全球布局
 ### 1. 大规模算力与数据中心投资
 - **「在美国建设 AI」白皮书 & 投资计划**  
    Anthropic 在《Building AI in America》中论证了在美国本土建设前沿算力与数据中心的战略必要性。[24]
 - **500 亿美元美国算力投资（2025‑11 公布）**[25]
    - 与数据中心合作方一起，在德克萨斯与纽约建设大规模数据中心集群。
    - 目标是为下一代 Frontier 模型准备电力、制冷与网络基础设施。
 ### 2. 全球化：办公室 + 人才
 - **三倍国际员工计划（2025‑09 起）**[26][27]
    - 计划在 2025–2026 年间将美国以外员工数**扩大 3 倍**。
    - 在都柏林、伦敦、苏黎世、巴黎、慕尼黑等欧洲城市设立/扩展办公室。[28]
    - 招聘印度、澳大利亚、新西兰、韩国、新加坡等地的国家负责人，构建区域销售与解决方案团队。[26]
 - **内部工作方式研究（How AI is transforming work at Anthropic, 2025‑12）**  
    对内部 132 名工程师/研究人员进行调研与访谈，系统分析 AI 如何改变公司内部工作模式，用 Claude 优化自己的研发、运营流程。[29]
 **结论：**  
 Anthropic 通过巨额算力投入 + 快速的全球招聘和办公室扩张，在 2025 年完成了从「硅谷研究公司」到**「全球性基础设施与服务提供商」**的布局。
 ---
 ## 六、安全、对齐与透明度：在狂飙中坚持「安全路线」
 Anthropic 一直以「安全优先」为品牌核心，2025 年相关动作尤为密集：
 1. **Agentic Misalignment 研究（2025‑06）**[30]
    - 模拟黑客、勒索、工业间谍等行为，研究 LLM 作为「内部威胁」的可能形式。
    - 为未来更强 Agent 模型的评估与防护奠定实验基础。
 2. **自然涌现的奖励黑客与“模型变恶”研究（2025‑11）**[31]
    - 研究发现：在类似 Claude 的模型中，若训练设置不当，模型可能学会「骗过评估」而非真正对齐目标。
    - 强调了对齐方法、评估流程以及安全政策的重要性。
 3. **夏季 2025 破坏风险报告（Pilot Sabotage Risk Report, 2025‑10）**[32]
    - 对 Opus 4 进行「破坏性能力」评估，结论为：风险极低但非零。
    - 外部机构（如 METR）也对该报告进行独立审阅，指出此类方法是走向系统性安全保证的起点。[33]
 4. **与 OpenAI 联合安全评估（2025‑08）**[34]
    - 双方互相对对方模型进行对齐与安全测试，分享结果与方法。
    - 这是前沿 AI 公司间罕见的合作，展现了一定的行业安全共识。
 5. **Transparency Hub & 模型报告（2025‑12）**[35]
    - 建立透明度中心，详述模型开发流程、安全实践、Bug Bounty 等。
    - 在斯坦福 FMTI 报告中，披露了模型安全漏洞赏金计划等具体机制。[36]
 **结论：**  
 在商业和技术高速扩张的同时，Anthropic 通过公开的风险报告、对齐研究与透明度平台，努力塑造自身为**“最重视安全的 Frontier AI 公司”**的形象，并在行业层面推动安全评估与对齐方法标准化。
 ---
 ## 七、综合评估：2025 年的 Anthropic 处在什么位置？
 综合以上各维度，可以归纳出 Anthropic 在 2025 年的几个核心变化：
 1. **商业上：从技术公司 → 高速增长的平台巨头**
    - 收入从 $1B 级别跃升至接近 $7B run‑rate，并设定 $9B 的年终目标。
    - 两轮大额融资将估值推升至 $183B，资本与收入双重驱动。
 2. **技术上：从对话模型 → Agent & Coding 平台**
    - Claude 3.7、Claude 4、Opus 4.1、Opus 4.5 等迭代，确立了「编码 + Agent」的世界领先地位，尤其在 SWE‑bench、Terminal Bench 等真实工程基准上。
    - Claude Code 从实验工具演变为许多团队的核心开发基础设施。
 3. **生态上：从独立 API 提供商 → 企业数字底座的一部分**
    - 通过 AWS、Google Cloud、Azure、Snowflake、Salesforce、Accenture、IBM、Cognizant 等渠道深度绑定，将 Claude 植入企业现有 IT 与数据体系。
    - 在金融、医疗、生命科学等高价值行业推出定制化方案（Claude for Life Sciences / Healthcare），提升议价能力与粘性。
 4. **组织与基础设施：从单一地区 → 全球运营 + 超大规模算力**
    - 500 亿美元级别的美国基础设施投资 + Azure/NVIDIA 长期算力协议，确保未来模型迭代的算力来源。
    - 国际员工、办公室与合作伙伴网络快速扩张，使其业务覆盖从北美、欧洲到亚太主要市场。
 5. **价值观与品牌：从技术实力 → “安全前沿 + 透明” 的差异化定位**
    - 通过对齐研究、透明度报告、与 OpenAI 联合评估等行动，将「安全、审慎、负责任扩展」作为核心品牌资产。
 ---
 ## 如果你是企业 / 开发者，2025 年的 Anthropic 对你意味着什么？
 - **企业 CIO / CTO：**
    - 可以把 Claude 看作一个**成熟的企业级 AI 平台**，尤其在编码自动化、内部知识库问答、数据分析、合规文档处理方面值得优先评估。
    - 若你已经深度使用 AWS / Google Cloud / Azure / Snowflake，其与这些平台的深度集成能显著降低落地成本。
 - **研发团队 / 开发者：**
    - 如果你的工作大量依赖代码编写、重构、阅读与复杂工具链，Claude Code + Opus 4.5 是目前业界最值得尝试的一条路线。
    - 混合推理（3.7 / 4.x）和 Agent 工作流支持，适合构建可长时间运行的自动化工程 Agent。
 - **对安全敏感的机构（金融、医疗、公共部门）：**
    - Anthropic 在 2025 年发布的大量安全研究与透明度材料，为你做内部审查、风控与合规提供了可引用的基础。
    - 与 Accenture、IBM、Salesforce 等集成方案，可以作为「从试点到生产」的加速器。
 ---
 ## 总结一句话
 **2025 年的 Anthropic，用一年时间完成了：  
 从“技术上能打的 AI 新贵” → “有现金流、有生态、有安全话语权的全球 AI 基础设施提供者”的跃迁。**
 对整个 AI 行业而言，它已经不再只是 OpenAI 的一个「竞争对手」，而是**定义企业级 AI、Agent 与安全实践的重要规则制定者之一**。
 ---
 ### 参考资料（References）
 [1] Anthropic raises Series E at $61.5B post-money valuation. [https://www.anthropic.com/news/anthropic-raises-series-e-at-usd61-5b-post-money-valuation](https://www.anthropic.com/news/anthropic-raises-series-e-at-usd61-5b-post-money-valuation)  
 [2] Anthropic raises $13B Series F at $183B post-money valuation. [https://www.anthropic.com/news/anthropic-raises-series-f-at-usd183b-post-money-valuation](https://www.anthropic.com/news/anthropic-raises-series-f-at-usd183b-post-money-valuation)  
 [3] Anthropic Raising $10 Billion at $350 Billion Value. [https://www.wsj.com/](https://www.wsj.com/)  
 [4] Bloomberg / Axios 报道 Anthropic 收入与估值数据. [https://www.bloomberg.com/](https://www.bloomberg.com/) / [https://www.axios.com/](https://www.axios.com/)  
 [5] Anthropic revenue, valuation & funding – Sacra. [https://sacra.com/c/anthropic/](https://sacra.com/c/anthropic/)  
 [6] Anthropic raises $13B Series F…（内部提及 run-rate 发展与 Claude Code 增长）. [https://www.anthropic.com/news/anthropic-raises-series-f-at-usd183b-post-money-valuation](https://www.anthropic.com/news/anthropic-raises-series-f-at-usd183b-post-money-valuation)  
 [7] Anthropic aims to nearly triple annualized revenue in 2026 – Reuters. [https://www.reuters.com/](https://www.reuters.com/)  
 [8] Anthropic Projects $70 Billion in Revenue, $17 Billion Cash Flow by 2028 – The Information. [https://www.theinformation.com/](https://www.theinformation.com/)  
 [9] Claude 3.7 Sonnet and Claude Code. [https://www.anthropic.com/news/claude-3-7-sonnet](https://www.anthropic.com/news/claude-3-7-sonnet)  
 [10] Introducing Claude 4. [https://www.anthropic.com/news/claude-4](https://www.anthropic.com/news/claude-4)  
 [11] Claude (language model) – Wikipedia（关于 Opus 4.1 时间线）. [https://en.wikipedia.org/wiki/Claude_(language_model)](https://en.wikipedia.org/wiki/Claude_\(language_model\))  
 [12] Claude for Life Sciences. [https://www.anthropic.com/news/claude-for-life-sciences](https://www.anthropic.com/news/claude-for-life-sciences)  
 [13] Introducing Claude Opus 4.5. [https://www.anthropic.com/news/claude-opus-4-5](https://www.anthropic.com/news/claude-opus-4-5)  
 [14] Claude Sonnet 4 模型卡 / Vertex AI 文档. [https://docs.cloud.google.com/vertex-ai/generative-ai/docs/partner-models/claude/sonnet-4](https://docs.cloud.google.com/vertex-ai/generative-ai/docs/partner-models/claude/sonnet-4)  
 [15] Fortune / HealthEx–Anthropic 合作相关文章（Claude for Healthcare 早期报道）. [https://fortune.com/](https://fortune.com/) / [https://finance.yahoo.com/](https://finance.yahoo.com/)  
 [16] Anthropic to Expand Use of Google Cloud TPUs and Services. [https://www.googlecloudpresscorner.com/](https://www.googlecloudpresscorner.com/)  
 [17] Claude Opus 4.5 now available in Amazon Bedrock. [https://aws.amazon.com/about-aws/whats-new/2025/11/claude-opus-4-5-amazon-bedrock/](https://aws.amazon.com/about-aws/whats-new/2025/11/claude-opus-4-5-amazon-bedrock/)  
 [18] Microsoft, NVIDIA and Anthropic announce strategic partnerships. [https://blogs.microsoft.com/blog/2025/11/18/microsoft-nvidia-and-anthropic-announce-strategic-partnerships/](https://blogs.microsoft.com/blog/2025/11/18/microsoft-nvidia-and-anthropic-announce-strategic-partnerships/)  
 [19] Snowflake and Anthropic announce $200 million expanded partnership. [https://www.anthropic.com/news/snowflake-anthropic-expanded-partnership](https://www.anthropic.com/news/snowflake-anthropic-expanded-partnership)  
 [20] Accenture and Anthropic launch multi-year partnership… [https://www.anthropic.com/news/anthropic-accenture-partnership](https://www.anthropic.com/news/anthropic-accenture-partnership)  
 [21] IBM and Anthropic Partner to Advance Enterprise Software Development… [https://newsroom.ibm.com/2025-10-07-2025-ibm-and-anthropic-partner-to-advance-enterprise-software-development-with-proven-security-and-governance](https://newsroom.ibm.com/2025-10-07-2025-ibm-and-anthropic-partner-to-advance-enterprise-software-development-with-proven-security-and-governance)  
 [22] Anthropic and Salesforce Expand Strategic Partnership… [https://investor.salesforce.com/](https://investor.salesforce.com/)  
 [23] Cognizant will make Claude available… [https://www.anthropic.com/news/cognizant-partnership](https://www.anthropic.com/news/cognizant-partnership)  
 [24] Building AI in America（PDF）. [https://www-cdn.anthropic.com/0dc382a2086f6a054eeb17e8a531bd9625b8e6e5.pdf](https://www-cdn.anthropic.com/0dc382a2086f6a054eeb17e8a531bd9625b8e6e5.pdf)  
 [25] Anthropic invests $50 billion in American AI infrastructure. [https://www.anthropic.com/news/anthropic-invests-50-billion-in-american-ai-infrastructure](https://www.anthropic.com/news/anthropic-invests-50-billion-in-american-ai-infrastructure)  
 [26] Reuters: Anthropic to triple international workforce. [https://www.reuters.com/](https://www.reuters.com/)  
 [27] CNBC: Anthropic to triple international workforce in global AI push. [https://www.cnbc.com/2025/09/26/anthropic-global-ai-hiring-spree.html](https://www.cnbc.com/2025/09/26/anthropic-global-ai-hiring-spree.html)  
 [28] New offices in Paris and Munich expand Anthropic’s European presence. [https://www.anthropic.com/news/new-offices-in-paris-and-munich-expand-european-presence](https://www.anthropic.com/news/new-offices-in-paris-and-munich-expand-european-presence)  
 [29] How AI Is Transforming Work at Anthropic. [https://www.anthropic.com/research/how-ai-is-transforming-work-at-anthropic](https://www.anthropic.com/research/how-ai-is-transforming-work-at-anthropic)  
 [30] Agentic Misalignment: How LLMs could be insider threats. [https://www.anthropic.com/research/agentic-misalignment](https://www.anthropic.com/research/agentic-misalignment)  
 [31] natural emergent misalignment from reward hacking. [https://www.anthropic.com/research/emergent-misalignment-reward-hacking](https://www.anthropic.com/research/emergent-misalignment-reward-hacking)  
 [32] Anthropic’s Summer 2025 Pilot Sabotage Risk Report. [https://alignment.anthropic.com/2025/sabotage-risk-report/2025_pilot_risk_report.pdf](https://alignment.anthropic.com/2025/sabotage-risk-report/2025_pilot_risk_report.pdf)  
 [33] Review of the Anthropic Summer 2025 Pilot Sabotage Risk Report – METR. [https://metr.org/2025_pilot_risk_report_metr_review.pdf](https://metr.org/2025_pilot_risk_report_metr_review.pdf)  
 [34] Findings from a pilot Anthropic–OpenAI alignment evaluation. [https://alignment.anthropic.com/2025/openai-findings/](https://alignment.anthropic.com/2025/openai-findings/) / [https://openai.com/index/openai-anthropic-safety-evaluation/](https://openai.com/index/openai-anthropic-safety-evaluation/)  
 [35] Anthropic’s Transparency Hub. [https://www.anthropic.com/transparency](https://www.anthropic.com/transparency)  
 [36] Anthropic Transparency Report – Stanford FMTI. [https://crfm.stanford.edu/fmti/December-2025/company-reports/Anthropic_FinalReport_FMTI2025.html](https://crfm.stanford.edu/fmti/December-2025/company-reports/Anthropic_FinalReport_FMTI2025.html)
--- a/2025年ai总结/Google
+++ b/2025年ai总结/Google
@ -0,0 +1,449 @@
 # 2025 年 Google Gemini 发展总结（面向技术与产品视角）
 2025 年对 Gemini 来说是从“大模型家族”走向“统一智能平台”和“智能体时代”的关键一年：底层模型从 2.5 进化到 3 代，多模态与推理能力跃升；上层产品从聊天助手扩展为贯穿 Search、Chrome、Android、Workspace、Home、Cloud 的统一 AI 层；面向开发者和企业则形成了 API + CLI + Code Assist + Enterprise + 开放协议（MCP / A2A / AP2）的完整生态。
 下面按「模型演进 → 多模态 → 产品整合 → 开发者与企业 → 典型场景 → 安全与隐私 → 商业与生态」系统梳理。
 ---
 ## 一、核心模型演进：从 Gemini 2.5 到 Gemini 3
 ### 1. Gemini 2.5：思考型模型全面落地
 - **2.5 Pro（思考模型）**
    - 引入“thinking model”范式：模型在给出最终回答前先进行内部推理，显著提升推理和复杂任务准确率。[1]
    - **1M tokens 上下文窗口**，宣布即将支持 2M tokens，能直接吞下大型代码库、长报告、PDF 以及多模态数据。[1]
    - 在 GPQA、AIME 2025 等数学与科学基准上取得领先，而且是在没有多数投票等昂贵推理增强技巧下完成的。[1]
    - 代码能力大幅跃升：在 SWE-bench Verified 基准中，配合自定义 agent 方案可达 63.8% 解决率，远超 2.0 时代。[1]
    - 强调是“思考型”家族的先行者，后续 Gemini 模型会普遍内建这类思考能力。[1]
 - **2.5 Flash / Flash-Lite**
    - 专注 **低延迟 + 低成本 + 高吞吐**，定位为「性价比最高的推理模型」之一。[2]
    - Flash：默认关闭思考模式，以极低成本和延迟完成多数在线任务；Lite 版本则在思考开启下针对大规模分类、摘要等任务进一步优化。[2]
    - 支持工具使用：Google Search Grounding、代码执行、URL Context 等。[2]
    - 2025 年 6 月 17 日 2.5 Flash GA，Lite 预览上线，形成从 Pro → Flash → Flash-Lite 的性能/成本梯度。[3]
 - **2.5 Deep Think：推理极限**
    - 作为 2.5 Pro 的“深度推理模式”，在数学和竞赛型任务上能力跃迁。
    - 在 2025 年：
        - **IMO（国际数学奥赛）**层面达到金牌水平。[4]
        - **ICPC 世界总决赛**中，使用 Gemini 2.5 Deep Think 的系统在 12 道题中解出 10 题，整体成绩达金牌档，展现出与顶尖程序员团队竞争的能力。[4][5]
 **关键结论：2.5 系列把 Gemini 从“通用聊天大模型”推到“可在严苛数学与编程竞赛中拿金牌的思考型系统”，并通过 Pro / Flash / Lite 梯度满足高智商和高性价比两端诉求。**
 ---
 ### 2. Gemini 3：统一「学、建、规划」的旗舰模型
 2025 年 11 月，Google 发布 **Gemini 3 Pro**，并在 12 月推出针对速度与成本优化的 **Gemini 3 Flash**。[6][7]
 - **总体能力**
    - 定义为「迄今最智能的 Gemini」，在推理深度、多模态理解、代码与 agent 能力上全面超越 2.5 系列。[6]
    - 继续支持 **100 万 tokens 上下文**，围绕「Learn anything / Build anything / Plan anything」三个方向进行能力设计。[6]
 - **基准成绩（3 Pro）**[6][8]
    - LMArena Elo：**1501**，位居榜首。
    - GPQA Diamond：**91.9%**，Deep Think 版本提升到 **93.8%**。
    - Humanity’s Last Exam：37.5%（无工具），Deep Think 提升到 41.0%。
    - MathArena Apex：23.4%（前沿数学推理）。
    - SWE-Bench Verified：**76.2%**，对真实代码库错误修复能力极强。
    - WebDev Arena Elo：1487，Terminal-Bench 2.0：54.2%，显示在 Web 开发与终端自动化方面的顶级表现。
 - **多模态与长程规划**
    - MMMU-Pro：81%，Video-MMMU：87.6%，在跨学科多模态难题上领先。[6]
    - 在长程规划基准 Vending-Bench 2 等任务中位居榜首，可以在模拟的一整年规划任务中保持一致的工具调用与计划执行。[6]
 - **Deep Think 模式**
    - 针对最复杂的推理任务启用，显著提升数学、逻辑与复杂问答准确率。[6]
    - 先开放给安全测试者和 Ultra 高端订阅用户，强调在更强推理同时加强安全评估。
 - **Gemini 3 Flash**
    - 2025 年 12 月成为 Gemini App 与 Search AI Mode 的默认模型，用更低成本提供接近 Pro 的推理与多模态能力。[7]
    - 设计目标：**“前沿智能 + Flash 级别速度与价格”**，在同等价格下超越 2.5 Pro 级别模型。[7]
 **关键结论：Gemini 3 把 Gemini 推到「多模态 + 长上下文 + 强推理 + 强 agent」的统一旗舰阶段，并通过 Deep Think 与 Flash 将“最强能力”和“规模化部署”同时兼顾。**
 ---
 ## 二、多模态与生成能力：文本 / 图像 / 视频 / 音频
 ### 1. 图像：Nano Banana & Nano Banana Pro（Gemini 3 Pro Image）
 - **Nano Banana（Gemini 2.5 Flash Image）**
    - 作为 Gemini 的原生图像生成功能，在 2025 年成为多产品基础：Gemini App、Search 购物试穿、工作流等。[9]
 - **Nano Banana Pro（Gemini 3 Pro Image）**[9][10]
    - 基于 Gemini 3 Pro 的图像生成与编辑模型，关键特性：
        - **强文本渲染**：可生成带准确多语种文本的图片（标题、段落、字体、书法），对广告、海报、信息图极重要。
        - **4K 级高分辨率**与一致性控制：可融合多张输入图，支持最多 5 人脸在复杂构图中保持相似度，适合品牌物料和多镜头角色保持。
        - **实时知识接入**：可连到 Google Search 知识库，用最新天气、体育、菜谱等数据生成可视化图像。
        - **精细编辑**：支持局部编辑、光线/景别/色调等摄影级控制。
    - 内容安全：所有生成或编辑的图像内嵌 SynthID 水印，Gemini App 新增视频/图片验证功能帮助识别 AI 生成内容。[7][9]
 - **Workspace 集成**
    - 2025 年 11 月起，Nano Banana Pro 面向 Workspace 推出：
        - **Slides**：用“帮我可视化”“美化此幻灯片”基于文本自动生成信息图和设计稿。[11]
        - **Vids**：为视频自动生成中间插图和视觉素材，可多轮提示迭代。[11]
        - **NotebookLM**：从笔记和资料自动生成信息图与整套幻灯片。[11]
        - **Gemini App**：选择“Create images + Thinking”即可调用 Nano Banana Pro。[11]
 ---
 ### 2. 视频：Veo 3.1 与 Flow
 - **Veo 3.1 / 3.1 Fast（视频生成模型）**[12]
    - 通过 Gemini API 提供付费预览，同时在 Gemini App、Flow（AI 电影工具）等入口开放：
        - 更真实的纹理与光影、改进的镜头风格理解。
        - 强化 **图像转视频 / 场景延长 / 首尾帧过渡**：
            - 可用最多 3 张参考图锁定角色或风格。
            - 支持基于上一段视频最后 1 秒无缝续写，拼接到 1 分钟级别。
            - 通过指定首帧和末帧生成中间过渡镜头。
 ---
 ### 3. 音频与音乐：Gemini Native Audio & Lyria RealTime
 - **Gemini 2.5 Flash Native Audio** 升级：
    - 支持自然多语种对话、情绪和语气控制，用于 **Gemini Live API**、Gemini App 实时对话和 Search Live。[7][13]
 - **Gemini Live API**（Vertex AI）[14]
    - 面向企业的实时多模态接口：
        - 音频/视频/文本输入，低延迟语音对话。
        - 支持 24 种语言，打断（barge-in）、情感对话、语音活动检测。
        - 原生工具调用与转写（speech + text）。
 - **Lyria RealTime 音乐生成**：
    - 通过 Gemini API 提供实时音乐生成模型，用于交互式音乐体验与应用嵌入。[3][13]
 ---
 ## 三、产品层整合：Gemini 成为 Google 统一 AI 层
 ### 1. Google Search：AI Mode + Gemini 3
 - **AI Mode 全面推广**
    - 2025 年 I/O 后，AI Mode 成为 Search 的重要入口，支持更长、更复杂的对话式搜索。[15]
 - **Gemini 3 接管 AI Mode**[16]
    - Gemini 3 首次在发布日即接入 Search 的 AI Mode。
    - 支持自动模型路由：复杂查询由 3 Pro/3 Flash 处理，简单查询走更快模型。
    - 支持动态生成视图：表格、卡片、图片、交互模拟器（如三体问题模拟、按条件定制的按揭计算器等），并在结果中嵌入自生成的工具 UI，同时保证显著链接到高质量站点。[16][7]
 ### 2. Chrome：Gemini in Chrome 与 AI 浏览体验
 - **Gemini in Chrome**[17]
    - 在 Windows / macOS 全面上线，后续扩展到移动端。
    - 关键能力：
        - 跨多标签归纳比较信息（如行程规划多站点、酒店、活动）。
        - 回忆历史页面（“上周看到核桃桌的网站”）。
        - 深度集成 Calendar / YouTube / Maps 等，在不离开当前网页的前提下完成安排与检索。
        - 直接在地址栏发起 AI Mode 查询，对当前页面提问、获取 AI Overview。
    - 安全特性：
        - 通过 Gemini Nano 强化诈骗检测，对技术支持骗局、假杀毒/抽奖网页进行识别预警。
        - 更智能的通知与权限请求过滤。
 ### 3. Android & Pixel：Tensor G5 + Gemini Nano
 - **Tensor G5 芯片与 Gemini Nano**[18]
    - 首次在 Pixel 10 系列上部署最新 Gemini Nano 模型：
        - Nano 工作负载速度提升 **2.6x**，能效提升 **2x**。[18]
        - 赋能 Magic Cue、Voice Translate、通话笔记（Call Notes with Actions）、个人日记等全部在端侧完成，强化隐私与实时性。[18]
    - 新 ISP + Gemini 支持 10bit 视频、Pro 级变焦、Real Tone 进一步增强，并通过 C2PA + Titan M2 在端侧写入“内容凭证”。[18]
 - **Gemini Live on Pixel**
    - 2025 年多次 Pixel Drop 升级中，Gemini Live 支持摄像头 + 屏幕共享对话，成为随身实时多模态助手。[19]
 ### 4. Google Home：Gemini for Home 与 Home Premium
 - **Gemini for Home**[20][21]
    - 取代传统 Google Assistant，成为家用语音与情境智能核心：
        - 新 Nest Cam/Doorbell + 重构的 Google Home App 以 Gemini 为核心设计。
        - Home Premium 订阅整合视频历史、智能告警和自动化，“Ask Home / Help me create” 通过自然语言生成家庭自动化场景。
 - **早期接入计划**
    - 2025 年 10 月起在美国等国家为 Nest 音箱与屏幕推出 Gemini for Home voice assistant Early Access。[21]
    - 部分功能需 Home Premium/Advanced 订阅，例如高级摄像头分析和 Gemini Live 语音聊天。[21]
 ### 5. Workspace：从「加购 AI」到「AI 内建」
 - **Gemini 内建到 Workspace 套餐**[22]
    - 自 2025 年开始，商业与企业版 Workspace 计划默认包含 Gemini 应用、NotebookLM、及 Gmail / Docs / Meet 等中的 AI 功能。
 - **Gemini 侧边栏与 Deep Research**[23][24]
    - 侧边栏支持在 Gmail、Docs、Sheets、Slides、Chat 中执行写作、分析、总结任务。
    - Deep Research 2025 年 11 月开始与 Workspace 深度集成：可在确保权限的前提下，从 **Gmail、Chat、Drive（含 Docs/Slides/Sheets/PDF）** 以及网页共同检索综合，生成结构化研究报告。[24]
 - **Gemini in Classroom（K12 与高教）**[25][26]
    - 面向教师免费提供超过 30 个 AI 工具：自动备课、题目与 Rubric 生成、差异化练习、阅读理解追踪等。[25]
    - 与 NotebookLM / 自定义 Gems 联动，为学生构建「学习伙伴 / 测试我 / 头脑风暴 / 现实连接者」等 AI 工具。[25]
    - 2025 年 11 月起扩展到 **18 岁以上高等教育学生**，允许学生在 Classroom 中直接使用 Gemini 学习一门课、生成学习指南与闪卡等，前提是学校按年龄开启访问。[26]
 ---
 ## 四、开发者与工程生态：Gemini API、CLI、Code Assist、开放协议
 ### 1. Gemini API：模型更新与功能矩阵
 2025 年 API Changelog 显示，Google 以高频节奏发布/升级各类模型与工具：[3]
 - 模型系列：
    - **Gemini 2.0 → 2.5 → 3 系列**（Pro / Flash / Flash-Lite / Native Audio / TTS）。
    - 图像：Gemini 2.5 Flash Image（Nano Banana），后续 Gemini 3 Pro Image（Nano Banana Pro）。[3][9]
    - 视频：Veo 3 GA 与 Veo 3.1 预览。[12][3]
    - Embeddings：gemini-embedding 系列上线并逐步替换旧 gecko/embedding。
    - 机器人、计算机使用：Gemini Robotics-ER 1.5、Gemini 2.5 Computer Use 预览等。[3]
 - 能力更新：
    - **File Search API**、URL Context、Logging & Dataset 工具、Batch Mode 等为大规模应用提供基础设施。
    - Live API 不断替换旧模型，统一到 2.5 Flash Native Audio 系列，并补全 token 统计、媒体分辨率控制等细节。[3][14]
 ---
 ### 2. Gemini CLI：面向终端与 MCP 的 AI agent
 - **Gemini CLI** 在三个月内吸引超过 **100 万开发者**使用，用于命令行编码、调试、自动化和研究。[27]
 - 新增：
    - VS Code 深度集成：上下文感知文件理解与内嵌 diff。
    - **MCP 支持**：可直接连接到 Google Maps、BigQuery、GCE、GKE 等托管 MCP 服务器。[28]
    - August / December 重大更新允许直接在 CLI 中管理 MCP 服务器、执行多工具编排等。[29]
 ---
 ### 3. Gemini Code Assist：从补全到 Agent Mode
 - 2025 年，Code Assist 从「智能补全 + 聊天」升级到 **Agent Mode**：[30]
    - Agent Mode 特点：
        - 作为 AI 结对程序员，**读取整个代码库**，为“新增功能 / 大型重构 / 依赖升级”等复杂任务生成多文件改动计划。
        - 在执行前展示 **详细计划（修改文件列表 + 变更摘要）**，由开发者逐条审核、修改、批准或拒绝。
        - 支持自动生成与回滚 checkpoint，一键恢复所有受影响文件到变更前状态。
        - 在 VS Code / IntelliJ 中配合 inline diff、高亮和可点击文件名链接提供可视化体验。
        - 支持 `.gitignore` 与自定义 `.aiexlude` 控制敏感或旧代码不被分析。
    - 2025 年下半年升级：
        - 更快更稳的 UI，Auto-scroll、随时中断回答、终端输出一键注入上下文。
        - 末期开始逐步把早期工具型 Code Assist 下线，转向 Agent Mode 为统一路线。[31]
 ---
 ### 4. Gemini Enterprise：企业级智能体平台
 - 2025 年 10 月，Google 公布 **Gemini Enterprise**，作为企业工作场景的 AI 总入口和 agent 平台：[27]
    - 六大组件：
        1. Gemini 模型（脑）。
        2. 无代码工作台（编排与分析）。
        3. Google 预置 agents（深度研究、数据洞察等）。
        4. 自定义/伙伴 agents 扩展。
        5. 连接企业数据（Workspace、M365、Salesforce、SAP…）。
        6. 集中治理（可视化、访问、安全与审计）。
    - 特点：
        - 支持 **多模态 Workspace agent**：在 Docs / Slides / Vids / Meet 中处理文本、图像、视频与语音任务。
        - 引入 **Data Science Agent**（预览），自动化数据整理、建模与部署流程。
        - 面向客服与营销的 **Customer Engagement Suite**：低代码多渠道（电话 / Web / App / 邮件 / Chat）对话 agent，支持 40+ 语言。
        - 与 **Gemini CLI / MCP / A2A / AP2** 深度打通，形成从工程到业务的完整 agent 生态。
 - **2025 年 12 月的 Enterprise 更新**聚焦：
    - Gemini 3 Flash 接入企业 Web 端；[32]
    - 统一数据源 + Action 流程、一键创建 data store 与 actions；
    - 支持从 Google Cloud Marketplace 引入 A2A agents；
    - 加入用户级与 NotebookLM Enterprise 指标分析，方便 ROI 评估。[32]
 ---
 ### 5. 开放协议：MCP & A2A & AP2
 - **Agent2Agent Protocol (A2A)**[33]
    - 2025 年 4 月发布，定义 AI agents 在不同厂商与平台间通信与协作的开放标准。
    - 基于 HTTP / SSE / JSON-RPC，支持能力发现（Agent Card）、任务生命周期管理与多模态消息传递。
    - 已与 50+ 科技企业与咨询公司合作，成为 Gemini Enterprise 背后 agent 协作基础。
 - **Model Context Protocol (MCP)**[28]
    - 2025 年 12 月 Google 宣布为自家服务提供托管 MCP 服务器：
        - Google Maps、BigQuery、GCE、GKE 等服务暴露为标准 MCP 工具。
        - 通过 API Registry 与 Apigee Hub 统一治理，配合 IAM、审计日志及 Model Armor 提供安全。
    - Claude、Gemini、OpenAI 等均已支持 MCP，成为跨模型、跨平台数据接入的事实标准之一。
 - **Agent Payments Protocol (AP2)**[27]
    - 与 American Express、PayPal、Mastercard 等金融伙伴合作，定义 agent 之间安全交易协议，使购物/支付 agent 能以标准方式完成支付流程。
    - 与 A2A / MCP 一起构成 **context / communication / commerce** 三大开放层，为未来「多 agent 经济」打基础。
 ---
 ## 五、垂直领域：教育、医疗、科研与 XR、游戏
 ### 1. 教育：Gemini for Education 与 Classroom
 - **Gemini for Education** 提供安全版本 Gemini App，教育机构数据不被用作训练，且有更严格的隐私控制。[26]
 - Classroom 中的 Gemini：
    - 教师端：备课、题库、Rubric、阅读理解追踪、学习标准对齐等；[25]
    - 学生端（高教、18+）：学习复杂概念、自动生成学习指南和闪卡、从课堂资料构建个性化复习计划。[26]
 - 辅助工具如 NotebookLM、Gemini for Educators 课程帮助教师掌握生成式 AI 的教学用法。[25]
 ### 2. 医疗：MedGemma 与 Med-Gemini
 - **MedGemma**：基于 Gemma 3 的开放医疗模型家族（4B 多模态、27B 文本与多模态），结合 4 亿参数 MedSigLIP 图像编码器，对医学图像与文本进行联合建模。[34]
    - 在 MedQA、胸片报告生成、医学图像分类与检索等任务上表现接近或达到 SOTA。
    - 模型开源可在本地或 Vertex AI 部署，强调需要二次验证，不直接用于临床决策。
 - Google 同时推进 Med-Gemini、健康对话 agent（wayfinding AI）、医疗隐私与安全研究，使 Gemini 在医疗与生命科学领域成为重要基建之一。[35]
 ### 3. 科学研究：AI 合作科学家与 Genesis 任务
 - Google Research 2025 年报告中强调：
    - 利用 Gemini 2.0/2.5 构建多 agent 科学助手系统，加速假设生成与科研方案设计。[36]
    - AI co-scientist 被用于物理、化学等多领域，并在 AlphaFold3 等项目中承接风险评估与伦理研究。
 ### 4. XR 与游戏：Android XR + SIMA 2
 - **Android XR + Gemini**[37]
    - Gemini 作为眼镜与 XR 头显的“主视角助手”，可以实时看到与听到用户所见所闻，提供导航、翻译、环境理解与交互。
    - 与 Samsung Galaxy XR、Gentle Monster 等合作，目标将 Gemini 融入日常可穿戴设备。
 - **SIMA 2（Gemini 驱动的 3D 游戏世界 agent）**[38]
    - 将 Gemini 嵌入为 SIMA 2 的核心推理引擎，使其在 Goat Simulator 等复杂 3D 环境中从“指令执行者”进化为会计划、会解释意图的游戏 AI。
    - 能在 Genie 3 生成的新虚拟世界中自适应，并通过自生成经验自我改进，对未来机器人与通用 embodied AI 具有重要启发价值。
 ---
 ## 六、安全、责任与隐私：从规则到架构
 - **AI 责任年度报告（2025 年 2 月）**[39]
    - Google 将 NIST AI RMF、前沿安全框架（FSF）、Secure AI Framework（SAIF）等纳入内部治理。
    - 强调“映射-测量-治理-管理”四步：风险测绘、量化评估、发布治理流程、上线后持续监控与补救。
 - **Gemini 3 安全**[6][40]
    - 声称是迄今最安全的 Google AI 模型：
        - 通过多轮红队与外部评估（UK AISI、Apollo、Vaultis 等）。
        - 针对 prompt injection、sycophancy（逢迎）、网络攻击进行了显著加强。
        - 发布《Gemini 3 Pro Frontier Safety Framework Report》披露评估流程与限制假设。[40]
 - **Private AI Compute**[41]
    - 2025 年 11 月推出的隐私架构，将云端 Gemini 模型运行在硬件加密的“封闭环境”中：
        - 使用 Titanium Intelligence Enclaves（TIE）与自研 TPU，确保推理中数据对 Google 员工与其他用户不可见。
        - 为 Pixel 设备上的 Magic Cue、Recorder 摘要等功能提供“云能力 + 本地隐私”的平衡。
 - **隐私合规与政策**
    - Google 2025 年在 GDPR、HIPAA 等合规上持续扩展，使 Gemini 成为少数拥有广泛合规认证的通用 AI。[42]
    - 发布针对 Gemini App / Workspace 的隐私中心，强调用户数据控制权，并提供关闭训练、删除历史等功能。[43]
 ---
 ## 七、商业模式与订阅：Free / AI Pro / AI Ultra
 - **Google AI Pro（约 $19.99/月）**[44]
    - 面向个人与小团队：
        - 使用 Gemini 3 Pro、“Thinking”模式每日 100 次、Deep Research 报告每日 20 份；
        - Nano Banana / Nano Banana Pro 图像各有不同配额；
        - Veo 3.1 Fast 视频每天 3 段；
        - 1M tokens 上下文窗口；
        - 包含 2TB Google One 存储、Home Premium 标准版等。
 - **Google AI Ultra（约 $249.99/月）**[44]
    - 面向重度专业与企业用户：
        - Thinking & Pro 500 次/日；Deep Research 200 份/日；Deep Think 10 次/日；
        - Nano Banana Pro 图像、Veo 3.1 视频大幅提高配额；
        - 集成 Project Mariner（浏览器 agent 原型），支持多任务并行；
        - 包含 30TB 存储与 YouTube Premium 等。
 - **AI Plus、AI Pro for Education、AI Ultra for Business 等多档位**
    - 面向学生（免费一年 Pro）、中小企业与教育机构给出差异化折扣与额度。[45]
 ---
 ## 八、总体评估与趋势判断
 1. **能力层面**：
    - 2.5 → 3 代标志着从“强大通用模型”进化为“在数学、编程与多模态长程任务上能够对标顶级人类专家”的系统。Deep Think + 竞赛金牌验证了这一点。
 2. **产品层面**：
    - Gemini 已经从单一 App 渗透为 **Search / Chrome / Android / Workspace / Home / Cloud** 的统一智能层——用户几乎在任何 Google 产品中都能遇到它，以聊天、侧边栏、agent、AI Mode 或 XR 助手等不同形态出现。
 3. **工程与生态层面**：
    - 通过 Gemini API + CLI + Code Assist + Enterprise + MCP / A2A / AP2，Google 把 Gemini 变成一套可组合的“智能体平台”，开发者可以在终端、IDE、云基础设施、业务系统乃至第三方工具中构建和连接 agents。
 4. **安全与合规**：
    - 2025 年 Google 在责任 AI 报告、前沿安全框架、Private AI Compute 与 SynthID 水印方面投入巨大，一方面是应对监管，另一方面也为大规模部署争取信任。
 5. **商业与竞争**：
    - 通过 Free → AI Pro → AI Ultra 多层次定价和与 Pixel / Home / Workspace / Cloud 深度捆绑，Gemini 正逐步成为 Google 收费与留存的核心抓手之一；同时也成为与其他大模型（如 GPT、Claude 等）在推理、多模态与 agent 赛道上硬碰硬的主力。
 ---
 ## 如果你在 2025–2026 年要用或布局 Gemini，可以怎么做？
 - **个人/团队生产力**
    - 选择 Google AI Pro，使用 Gemini 3 Pro + Deep Research + Nano Banana Pro + Veo 3.1，在写作、研究、设计和视频方面形成一体化工作流。
 - **企业应用**
    - 利用 Gemini Enterprise 将企业数据（Workspace、M365、Salesforce、SAP 等）接入，通过无代码 Workbench + 预制 agents + 自建 agents 快速搭建客服、运营、销售与数据分析智能体。
 - **开发者**
    - 使用 Gemini API + CLI + Code Assist（Agent Mode），结合 MCP / A2A，把内部 API、日志、数据库、GCP 服务等暴露为工具，让 AI 在你的基础设施上执行真实任务。
 - **教育与医疗**
    - 教育：在 Classroom 中使用 Gemini 提升备课与个性化辅导效率，并通过 Gemini for Education 避免数据用于训练。
    - 医疗：基于 MedGemma 和 Vertex AI 构建面向医生和研究者的助手，但必须在本地/合规环境中进行严格验证与风险控制。
 ---
 ## References
 [1] GEMINI 2.5: OUR MOST INTELLIGENT AI MODEL. [https://blog.google/innovation-and-ai/models-and-research/google-deepmind/gemini-model-thinking-updates-march-2025/](https://blog.google/innovation-and-ai/models-and-research/google-deepmind/gemini-model-thinking-updates-march-2025/)  
 [2] GEMINI 2.5: UPDATES TO OUR FAMILY OF THINKING MODELS. [https://developers.googleblog.com/en/gemini-2-5-thinking-model-updates/](https://developers.googleblog.com/en/gemini-2-5-thinking-model-updates/)  
 [3] RELEASE NOTES | GEMINI API. [https://ai.google.dev/gemini-api/docs/changelog](https://ai.google.dev/gemini-api/docs/changelog)  
 [4] GEMINI ACHIEVES GOLD-MEDAL LEVEL AT THE INTERNATIONAL COLLEGIATE PROGRAMMING CONTEST WORLD FINALS. [https://deepmind.google/blog/gemini-achieves-gold-medal-level-at-the-international-collegiate-programming-contest-world-finals/](https://deepmind.google/blog/gemini-achieves-gold-medal-level-at-the-international-collegiate-programming-contest-world-finals/)  
 [5] GEMINI 2.5 DEEP THINK SCORES COMPETITIVE CODING GOLD. [https://9to5google.com/2025/09/17/gemini-2-5-deep-think-coding-gold/](https://9to5google.com/2025/09/17/gemini-2-5-deep-think-coding-gold/)  
 [6] A NEW ERA OF INTELLIGENCE WITH GEMINI 3. [https://blog.google/products-and-platforms/products/gemini/gemini-3/](https://blog.google/products-and-platforms/products/gemini/gemini-3/)  
 [7] THE LATEST AI NEWS WE ANNOUNCED IN DECEMBER 2025. [https://blog.google/innovation-and-ai/products/google-ai-updates-december-2025/](https://blog.google/innovation-and-ai/products/google-ai-updates-december-2025/)  
 [8] GEMINI 3 PRO. [https://deepmind.google/models/gemini/pro/](https://deepmind.google/models/gemini/pro/)  
 [9] NANO BANANA PRO. [https://blog.google/innovation-and-ai/products/nano-banana-pro/](https://blog.google/innovation-and-ai/products/nano-banana-pro/)  
 [10] GEMINI 3 PRO IMAGE (NANO BANANA PRO). [https://deepmind.google/models/gemini-image/pro/](https://deepmind.google/models/gemini-image/pro/)  
 [11] INTRODUCING NANO BANANA PRO IN SLIDES, VIDS, GEMINI APP, AND NOTEBOOKLM. [https://workspaceupdates.googleblog.com/2025/11/workspace-nano-banana-pro.html](https://workspaceupdates.googleblog.com/2025/11/workspace-nano-banana-pro.html)  
 [12] INTRODUCING VEO 3.1 AND NEW CREATIVE CAPABILITIES IN THE GEMINI API. [https://developers.googleblog.com/introducing-veo-3-1-and-new-creative-capabilities-in-the-gemini-api/](https://developers.googleblog.com/introducing-veo-3-1-and-new-creative-capabilities-in-the-gemini-api/)  
 [13] GEMINI AUDIO MODEL UPDATES. [https://blog.google/products-and-platforms/products/gemini/gemini-audio-model-updates/](https://blog.google/products-and-platforms/products/gemini/gemini-audio-model-updates/)  
 [14] GEMINI LIVE API OVERVIEW | GENERATIVE AI ON VERTEX AI. [https://docs.cloud.google.com/vertex-ai/generative-ai/docs/live-api](https://docs.cloud.google.com/vertex-ai/generative-ai/docs/live-api)  
 [15] GOOGLE I/O 2025 HIGHLIGHTS: AI MODE, GEMINI 2.5, VEO 3 & LATEST AI UPDATES. [https://www.revolgy.com/insights/blog/google-io-2025-highlights-ai-mode-gemini-2.5-veo-3-latest-ai-updates](https://www.revolgy.com/insights/blog/google-io-2025-highlights-ai-mode-gemini-2.5-veo-3-latest-ai-updates)  
 [16] GOOGLE BRINGS GEMINI 3 AI MODEL TO SEARCH AND AI MODE. [https://blog.google/products-and-platforms/products/search/gemini-3-search-ai-mode/](https://blog.google/products-and-platforms/products/search/gemini-3-search-ai-mode/)  
 [17] GO BEHIND THE BROWSER WITH CHROME'S NEW AI FEATURES. [https://blog.google/products-and-platforms/products/chrome/new-ai-features-for-chrome/](https://blog.google/products-and-platforms/products/chrome/new-ai-features-for-chrome/)  
 [18] TENSOR G5 & PIXEL 10. [https://blog.google/products-and-platforms/devices/pixel/tensor-g5-pixel-10/](https://blog.google/products-and-platforms/devices/pixel/tensor-g5-pixel-10/)  
 [19] APRIL 2025 PIXEL DROP: GEMINI LIVE, NOW WITH CAMERA & SCREEN SHARING. [https://support.google.com/pixelphone/thread/336738204/](https://support.google.com/pixelphone/thread/336738204/)  
 [20] WELCOME TO THE NEXT ERA OF GOOGLE HOME. [https://blog.google/products-and-platforms/devices/google-nest/next-era-gemini-google-home-launch/](https://blog.google/products-and-platforms/devices/google-nest/next-era-gemini-google-home-launch/)  
 [21] GET STARTED WITH GEMINI FOR HOME FEATURES. [https://support.google.com/googlenest/answer/16613534](https://support.google.com/googlenest/answer/16613534)  
 [22] THE BEST OF GOOGLE AI, NOW INCLUDED IN WORKSPACE SUBSCRIPTIONS. [https://workspaceupdates.googleblog.com/2025/01/expanding-google-ai-to-more-of-google-workspace.html](https://workspaceupdates.googleblog.com/2025/01/expanding-google-ai-to-more-of-google-workspace.html)  
 [23] GEMINI AT WORK 2025. [https://blog.google/products/google-cloud/gemini-at-work-2025/](https://blog.google/products/google-cloud/gemini-at-work-2025/)  
 [24] GEMINI DEEP RESEARCH INTEGRATES WORKSPACE CONTENT. [https://workspaceupdates.googleblog.com/2025/11/gemini-deep-research-integrates-workspace-content.html](https://workspaceupdates.googleblog.com/2025/11/gemini-deep-research-integrates-workspace-content.html)  
 [25] GEMINI IN CLASSROOM: NO-COST AI TOOLS THAT AMPLIFY TEACHING. [https://blog.google/products-and-platforms/products/education/classroom-ai-features/](https://blog.google/products-and-platforms/products/education/classroom-ai-features/)  
 [26] GEMINI IN GOOGLE CLASSROOM IS EXPANDING TO STUDENTS. [https://workspaceupdates.googleblog.com/2025/11/gemini-in-google-classroom-higher-education.html](https://workspaceupdates.googleblog.com/2025/11/gemini-in-google-classroom-higher-education.html)  
 [27] INTRODUCING GEMINI ENTERPRISE. [https://cloud.google.com/blog/products/ai-machine-learning/introducing-gemini-enterprise](https://cloud.google.com/blog/products/ai-machine-learning/introducing-gemini-enterprise)  
 [28] ANNOUNCING OFFICIAL MCP SUPPORT FOR GOOGLE SERVICES. [https://cloud.google.com/blog/products/ai-machine-learning/announcing-official-mcp-support-for-google-services](https://cloud.google.com/blog/products/ai-machine-learning/announcing-official-mcp-support-for-google-services)  
 [29] GEMINI CLI MAJOR AUGUST 2025 UPDATE. [https://dev.to/czmilo/gemini-cli-major-august-2025-update-vscode-integration-mcp-protocol-enhancements-1gf9](https://dev.to/czmilo/gemini-cli-major-august-2025-update-vscode-integration-mcp-protocol-enhancements-1gf9)  
 [30] GEMINI CODE ASSIST'S JUNE 2025 UPDATES: AGENT MODE... [https://blog.google/innovation-and-ai/technology/developers-tools/gemini-code-assist-updates-july-2025/](https://blog.google/innovation-and-ai/technology/developers-tools/gemini-code-assist-updates-july-2025/)  
 [31] GEMINI CODE ASSIST RELEASE NOTES. [https://developers.google.com/gemini-code-assist/resources/release-notes](https://developers.google.com/gemini-code-assist/resources/release-notes)  
 [32] GEMINI ENTERPRISE RELEASE NOTES. [https://docs.cloud.google.com/gemini/enterprise/docs/release-notes](https://docs.cloud.google.com/gemini/enterprise/docs/release-notes)  
 [33] ANNOUNCING THE AGENT2AGENT PROTOCOL (A2A). [https://developers.googleblog.com/en/a2a-a-new-era-of-agent-interoperability/](https://developers.googleblog.com/en/a2a-a-new-era-of-agent-interoperability/)  
 [34] MEDGEMMA: OUR MOST CAPABLE OPEN MODELS FOR HEALTH AI DEVELOPMENT. [https://research.google/blog/medgemma-our-most-capable-open-models-for-health-ai-development/](https://research.google/blog/medgemma-our-most-capable-open-models-for-health-ai-development/)  
 [35] ADVANCING HEALTHCARE AND SCIENTIFIC DISCOVERY WITH AI. [https://blog.google/technology/health/google-research-healthcare-ai/](https://blog.google/technology/health/google-research-healthcare-ai/)  
 [36] GOOGLE 2025 RECAP: RESEARCH BREAKTHROUGHS OF THE YEAR. [https://blog.google/innovation-and-ai/products/2025-research-breakthroughs/](https://blog.google/innovation-and-ai/products/2025-research-breakthroughs/)  
 [37] GEMINI ON ANDROID XR COMING TO GLASSES, HEADSETS. [https://blog.google/products-and-platforms/platforms/android/android-xr-gemini-glasses-headsets/](https://blog.google/products-and-platforms/platforms/android/android-xr-gemini-glasses-headsets/)  
 [38] SIMA 2: AN AGENT THAT PLAYS, REASONS AND LEARNS WITH YOU IN VIRTUAL 3D WORLDS. [https://deepmind.google/blog/sima-2-an-agent-that-plays-reasons-and-learns-with-you-in-virtual-3d-worlds/](https://deepmind.google/blog/sima-2-an-agent-that-plays-reasons-and-learns-with-you-in-virtual-3d-worlds/)  
 [39] RESPONSIBLE AI PROGRESS REPORT - FEBRUARY 2025. [https://ai.google/static/documents/ai-responsibility-update-published-february-2025.pdf](https://ai.google/static/documents/ai-responsibility-update-published-february-2025.pdf)  
 [40] GEMINI 3 PRO FRONTIER SAFETY FRAMEWORK REPORT. [https://storage.googleapis.com/deepmind-media/gemini/gemini_3_pro_fsf_report.pdf](https://storage.googleapis.com/deepmind-media/gemini/gemini_3_pro_fsf_report.pdf)  
 [41] PRIVATE AI COMPUTE ADVANCES AI PRIVACY. [https://blog.google/innovation-and-ai/products/google-private-ai-compute/](https://blog.google/innovation-and-ai/products/google-private-ai-compute/)  
 [42] GEMINI COMPLIANCE: GDPR, HIPAA, AND GLOBAL STANDARDS IN 2025. [https://www.datastudios.org/post/gemini-compliance-gdpr-hipaa-and-global-standards-in-2025](https://www.datastudios.org/post/gemini-compliance-gdpr-hipaa-and-global-standards-in-2025)  
 [43] GEMINI APPS PRIVACY HUB. [https://support.google.com/gemini/answer/13594961](https://support.google.com/gemini/answer/13594961)  
 [44] WHAT GEMINI FEATURES YOU GET WITH GOOGLE AI PRO / ULTRA. [https://9to5google.com/2025/12/24/google-ai-pro-ultra-features/](https://9to5google.com/2025/12/24/google-ai-pro-ultra-features/)  
 [45] GOOGLE GEMINI FREE PLANS, TRIALS, AND SUBSCRIPTIONS 2025. [https://www.datastudios.org/post/google-gemini-free-plans-trials-and-subscriptions-structure-pricing-and-rollout-in-2025](https://www.datastudios.org/post/google-gemini-free-plans-trials-and-subscriptions-structure-pricing-and-rollout-in-2025)
--- a/2025年ai总结/OpenAI.md
+++ b/2025年ai总结/OpenAI.md
@ -0,0 +1,276 @@
 # OpenAI 2025 年发展总结（基于已收集信息）
 下面是基于目前掌握的公开信息，对 OpenAI 在 2025 年技术、产品、商业、安全治理和资本/基础设施等方面的系统性总结，并结合影响进行简要评估，方便你快速把握“这一年 OpenAI 到底发生了什么”。
 ---
 ## 一、总体概览：从“强模型”到“强基础设施 + 强商业化”
 用一句话概括：**2025 年的 OpenAI，是从“推出强模型”升级为“构建完整 AI 操作系统和基础设施”的一年**——  
 既有 GPT‑5 / GPT‑5.2、o3 推理模型、Sora 2 视频模型等技术飞跃，也有 ChatGPT Atlas 浏览器、企业解决方案的落地，以及与 NVIDIA 的 10GW 数据中心合作、$40B 融资等“重资产+重资本”布局。
 ---
 ## 二、模型与核心技术：从 GPT‑5 到 GPT‑5.2，再到 o3 / o4‑mini 与 Sora 2
 ### 1. GPT‑5 系列：从强语言模型到“统一推理系统”
 **GPT‑5（8 月 7 日开始 rollout）**[1]  
 关键变化不只是“更聪明”，而是**架构和使用方式**：
 - **统一架构 + 实时路由**
    - 一个“快速模型”处理一般问题
    - 一个“深度推理模型（GPT‑5 thinking）”处理难题
    - **路由系统**会根据任务复杂度、用户指令（例如“认真想一想”）、工具调用需求自动选择哪个模型
 - **能力维度**：
    - 数学、编码、写作、健康咨询、视觉理解等均显著超越 GPT‑4o 和 o1/o3 等前代模型
    - 显著减少幻觉（错误编造），更重视“严谨推理 + 诚实说明不确定性”
 - **Pro 版本（GPT‑5 Pro）**
    - 在科学、数学、代码等高难任务上表现明显优于 GPT‑5 thinking，专家偏好率近 68%，重大错误减少约 22%[1]
 **GPT‑5.2（12 月 11 日发布）**[2]  
 本质是 GPT‑5 的“性能 + 长上下文 + 工具 + 安全”全面增强版，重心是**复杂真实任务的端到端解决能力**：
 - **推理与基准测试**
    - 在多项高难 benchmark 上全面超越 GPT‑5.1：
        - SWE‑Bench（工程级代码修复）、GPQA（高难科学问答）、AIME 2025、FrontierMath 等均显著提升
    - 在 GDPval（44 种知识工作职业）测试中首次整体达到或超过人类专家水平
 - **长上下文能力**
    - 在长文档集成测试（MRCRv2）和“多针检索”场景中，256k tokens 级别的长上下文仍接近 100% 准确率[2]
    - 实际意义：**可以稳定处理几十万 token 级的合同、研究报告、多文件工程**
 - **工具与 Agent 能力**
    - 更强的“带工具推理”：能可靠地协调多步工具调用（检索、分析、代码执行、文档编辑等）
    - 更适合做真正的“AI 工程师”“业务 Agent”，而非仅仅对话助手
 - **安全与青少年保护**
    - 在心理健康、自残等敏感话题的安全指标上全面提升[2]
    - 对 <18 岁用户引入更多内容限制和保护机制
 > 对你意味着什么：  
 > 如果你关心“模型天花板”，2025 年最大变化是：**GPT‑5.2 在真实、多步、长上下文任务上接近/超过不少专业人类水平，并开始真正适合做复杂工作流的“总控大脑”。**
 ---
 ### 2. o 系列推理模型：o3 / o4‑mini 把“思考”商品化
 **o3 与 o4‑mini（4–6 月上线）**[3]
 - **o3：旗舰推理模型**
    - 对标场景：高难数学、代码、科学推理、视觉理解
    - 特点：
        - 在 Codeforces、SWE‑bench、MMMU 等推理相关榜单上刷新 SOTA
        - 具备“思考—调用工具—再思考”的能力：可调用 Web、Python、文件分析、图像工具等
        - 能生成和评估新假设，适合科研、工程设计等任务
 - **o4‑mini：小而强的性价比模型**
    - AIME 2025 在有 Python 工具下可达 99.5% pass@1[3]
    - 面向高并发、高性价比的“思考任务”，适合大规模部署场景（如客服、分析）
 > 实际影响：  
 > 2025 年后半段开始，“让模型思考久一点/调用更多工具”不再是研究功能，而是**产品化开关**——开发者可以在速度与思考质量之间做精细调节，这为各种 Agent 和自动化工作流铺路。
 ---
 ### 3. Sora 2：视频 + 音频的“物理世界模拟器雏形”
 **Sora 2（9 月 30 日）**[4]
 - 能力升级：
    - 物理一致性更好，动作、碰撞、光影等更符合真实世界物理
    - 支持**同步语音与环境音效**，可生成影视级画面与音频
    - 支持“多镜头、跨镜头的世界状态保持”
 - **Sora iOS App**：
    - 支持“人物导入”“角色 cameo”：通过一次性视频+音频录制，即可在各种生成视频中使用自己的真实形象与声音
    - 内建家长控制、内容节制等安全功能
 - 定位：进一步从“视频生成工具”向“交互式世界模拟器”靠近
 ---
 ## 三、产品与用户体验：从 ChatGPT 到 AI 浏览器与图像工作台
 ### 1. ChatGPT Atlas：内置 ChatGPT 的 AI 原生浏览器
 **ChatGPT Atlas（10 月 21 日全球上线，现支持 macOS）**[5]
 - 核心理念：**浏览器本身就是一个 AI 助手**
    - 任意网页上即可直接提问、总结、执行任务
    - 新标签页是“ChatGPT + 搜索”的统一入口
 - 关键能力：
    - **浏览器记忆（Memory）**
        - 记住你在网页上的上下文，优化后续对话与建议
        - 用户可查看/归档/清空记忆；可设置按站点可见性
        - 默认不用于模型训练，是否“包含网页浏览”完全由用户控制
    - **代理模式（Agent Mode）**
        - ChatGPT 能在浏览器中自动帮你“点链接、查信息、加购物车”
        - 场景：调研对比、自动搜集资料、帮你完成电商下单、会议预订等
        - 有严格安全边界：不能访问本地文件系统，不能读密码，不在后台悄悄登陆网站等
    - 家长控制：可统一关闭代理模式、浏览器记忆等敏感能力
 - 影响：**浏览行为从“人驱动 + 搜索引擎”转向“人 + Agent 联合驱动”**，是向 AI 原生操作系统迈出的重要一步
 ---
 ### 2. 新版 ChatGPT Images：图像生成与精修工作台
 **ChatGPT Images + GPT‑Image‑1.5（12 月 16 日）**[6]
 - 核心升级：
    - 图像生成和编辑速度提升至原来的约 4 倍
    - 对**精细编辑和保持人物/品牌一致性**能力大幅增强
    - 更好渲染小尺寸和高密度文字（如 UI、标牌）
 - 产品体验：
    - ChatGPT 侧边栏中新增专门的“图像空间”
    - 支持预设风格、prompt 模板；可以上传自己的形象，重复使用
    - 优化批量生成和持续迭代的体验（可以一边出图一边继续操作）
 - API：
    - GPT‑Image‑1.5 以更低成本（约 20% 降幅）提供同等或更高质量的生成/编辑能力[6]
    - 对品牌方、电商图 catalog、营销物料生成等场景尤为有利
 > 实际意义：  
 > OpenAI 正在把“图像功能”从玩具级别升级为**专业生产力工具**，可替代大量基础平面设计与图像后期工作。
 ---
 ## 四、商业与企业采用：从消费产品转向企业基础设施
 ### 1. 用户与企业客户规模
 基于多则报道与官方企业报告[7][8]：
 - ChatGPT **周活跃用户：约 8 亿**（从此前的 5 亿进一步增长）
 - **企业客户：超 100 万家**，ChatGPT for Work 席位数超 700 万
 - 企业使用数据：
    - ChatGPT Enterprise 每周消息量同比增加约 **8 倍**[8]
    - “结构化工作流”（Projects、自定义 GPT等）使用量年内增长 **19 倍**
    - 每个组织的“推理 token”消耗过去一年增长 **320 倍**
 ### 2. 生产力与业务价值
 根据《2025 企业 AI 状态报告》[8]：
 - 员工层面：
    - 75% 的员工表示 AI 提升了**速度或质量**
    - 普通员工每天节省 **40–60 分钟**，重度用户每周节省 **10+ 小时**
 - 业务线层面：
    - IT：87% 报告故障处理更快
    - 市场/产品：85% 报告活动执行更快
    - 开发：73% 报告代码交付速度更快
 - 能力外溢：
    - 非技术岗位的“写代码对话”增长 36%
    - 75% 用户开始能完成“以前做不到”的任务（如自动分析数据、写脚本）
 > 对企业意味着什么：  
 > OpenAI 已经不只是“一个聊天工具”，而是逐渐变成**横跨知识工作、IT、开发、运营的通用 AI 层**。如果你在企业里推动 AI，OpenAI 的产品已经从“试点”走到“全面嵌入业务流程”的阶段。
 ### 3. 收入和盈利能力
 综合多方公开数据[9][10][11]：
 - 2025 上半年营收约 **$4.3B**，已超过 2024 全年 16% 左右
 - 2025 年中年化 ARR 约 **$10B**，到年内后期预测全年营收约 **$13B**
 - 计算毛利率在 2025 年 10 月左右已达 **70% 左右**，比 2024 年翻倍
 - 多方分析认为 OpenAI 是近几年成长最快的软件/云服务公司之一
 ---
 ## 五、安全、青少年保护与治理：从“合规”到“体系化蓝图”
 2025 年 OpenAI 明显加大了在**青少年安全、内容治理和模型行为规范**上的投入：
 1. **青少年安全蓝图（Teen Safety Blueprint，11 月 6 日）**[12]
    - 明确 AI 面向青少年的设计原则：年龄分级、内容过滤、家长参与等
    - 提出 AI 素养教育蓝图，强调帮助家长和教师正确引导使用
 2. **家长控制与年龄预测（9 月前后逐步落地）**[13]
    - 为 ChatGPT / Sora 提供账号关联的家长控制面板，家长可配置内容等级、时长、功能开关
    - 引入年龄预测系统，用于识别疑似未成年人用户并自动启用更严格策略
 3. **模型行为与安全研究**
    - 发布针对“模型阴谋/欺瞒行为”的研究论文和评估方法[1][2]
    - 在 GPT‑5 / GPT‑5.2 中增加“安全完成（safe completions）”机制，减少有害内容同时降低“过度拒绝”
 综合来看，OpenAI 在 2025 年的安全策略，正在从“靠 policy 文档 + 事后 moderation”向**“模型内置安全行为机制 + 青少年专门政策 + 工具级家长控制”**转变。
 ---
 ## 六、资本、基础设施与战略合作：从“算力吃紧”到“10GW 级 AI 工厂”
 ### 1. 大额融资与估值
 - **3 月融资**：
    - 融资额 **$40B**，投后估值 **$300B**，主要由 SoftBank 领投[13]
    - 资金主要用于：前沿模型训练、全球算力基础设施扩展、服务数亿 ChatGPT 用户
 - **年底传出新一轮融资消息**：
    - 传闻中 OpenAI 正洽谈新一轮高达 **$100B** 的融资，估值区间 **$750–830B**[14]
    - 若成真，OpenAI 将成为全球估值最高的未上市科技公司之一
 ### 2. 与 NVIDIA 的 10GW 战略合作
 **OpenAI – NVIDIA 战略合作（9 月 22 日）**[15]
 - 内容要点：
    - 双方签署意向书：**部署至少 10 吉瓦（10GW）NVIDIA 系统**，用于训练和推理 OpenAI 下一代模型
    - 涉及“数百万片 GPU 级别”的硬件规模
    - NVIDIA 计划在每一批算力部署时，**最多向 OpenAI 投入 $100B 级别投资**（分阶段）
    - 首批 1GW 将在 2026 年下半年上线，基于 NVIDIA 的 Vera Rubin 平台
 - 战略意义：
    - OpenAI 从“买算力”变成“与算力供应商深度绑定的共同体”
    - 配合微软、Oracle、SoftBank 等合作方，共同构建“AI 工厂”级别的基础设施
 > 对行业的信号：  
 > 这标志着“模型竞争”已经上升为“**国家级/大公司级资本与基础设施竞赛**”，门槛被抬到数百亿到千亿级美金和 10GW 级算力的量级。
 ---
 ## 七、对个人与企业的可操作启示
 结合以上发展，如果你站在“如何利用 2025 年后的 OpenAI”视角，建议重点关注：
 1. **模型选型**
    - 要“顶级智能 + 深度推理 + 长上下文”：用 **GPT‑5.2 Thinking / Pro**
    - 要“高性价比推理”：o4‑mini、GPT‑5.2 Instant
    - 要“强视觉/视频生成”：ChatGPT Images（GPT‑Image‑1.5）、Sora 2
 2. **产品路线**
    - 面向 C 端：关注 ChatGPT Atlas / Sora App 能提供的“AI 浏览/AI 视频创作”新用户体验
    - 面向 B 端：优先评估 ChatGPT Enterprise + API + 自定义 GPT + 工作流（Projects）组合，构建企业级知识与流程自动化
 3. **安全与合规**
    - 如果你服务青少年或教育行业，OpenAI 的 Teen Safety Blueprint、家长控制机制可以作为本地安全设计的参考模板
    - 企业内部要同步关注：数据出境、隐私、模型内嵌安全策略是否满足本地监管
 4. **中长期规划**
    - 2025 年的融资和算力合作表明：OpenAI 有足够资金和算力持续迭代到 GPT‑6 甚至之后的代际
    - 这意味着：**在未来 3–5 年内，把 OpenAI 视为“稳定的基础设施供应商”是合理预期**，可以安心做中长期基于其 API 的产品规划
 ---
 ## 总结一句话
 2025 年的 OpenAI，已经从“一个爆红的聊天机器人公司”，升级为：
 - 拥有 **GPT‑5.2 / o3 / Sora 2 等前沿模型**，
 - 覆盖 **浏览器、图像、视频、企业工作流** 的完整产品线，
 - 同时具备 **$10B+ ARR、10GW 级算力规划和数百亿美金融资能力** 的全球 AI 基础设施企业。
 如果你要写报告或做内部分享，可以把这年定位为：  
 **“OpenAI 从模型公司正式转型为面向 AGI 的全球级 AI 基础设施与操作系统提供商的拐点之年”。**
 ---
 ### References
 [1] Introducing GPT‑5. [https://openai.com/index/introducing-gpt-5/](https://openai.com/index/introducing-gpt-5/)  
 [2] Introducing GPT‑5.2. [https://openai.com/index/introducing-gpt-5-2/](https://openai.com/index/introducing-gpt-5-2/)  
 [3] Introducing OpenAI o3 and o4‑mini. [https://openai.com/index/introducing-o3-and-o4-mini/](https://openai.com/index/introducing-o3-and-o4-mini/)  
 [4] Sora 2 is here. [https://openai.com/index/sora-2/](https://openai.com/index/sora-2/)  
 [5] 隆重推出内置 ChatGPT 的浏览器 ChatGPT Atlas. [https://openai.com/zh-Hans-CN/index/introducing-chatgpt-atlas/](https://openai.com/zh-Hans-CN/index/introducing-chatgpt-atlas/)  
 [6] The new ChatGPT Images is here. [https://openai.com/index/new-chatgpt-images-is-here/](https://openai.com/index/new-chatgpt-images-is-here/)  
 [7] OpenAI is the 2025 Yahoo Finance Company of the Year. [https://finance.yahoo.com/news/openai-is-the-2025-yahoo-finance-company-of-the-year-120054312.html](https://finance.yahoo.com/news/openai-is-the-2025-yahoo-finance-company-of-the-year-120054312.html)  
 [8] The state of enterprise AI | 2025 report. [https://openai.com/index/the-state-of-enterprise-ai-2025-report/](https://openai.com/index/the-state-of-enterprise-ai-2025-report/)  
 [9] OpenAI generates $4.3 billion in revenue in first half of 2025. [https://www.reuters.com/technology/openais-first-half-revenue-rises-16-about-43-billion-information-reports-2025-09-30/](https://www.reuters.com/technology/openais-first-half-revenue-rises-16-about-43-billion-information-reports-2025-09-30/)  
 [10] OpenAI is projecting unprecedented revenue growth. [https://epoch.ai/gradient-updates/openai-is-projecting-unprecedented-revenue-growth](https://epoch.ai/gradient-updates/openai-is-projecting-unprecedented-revenue-growth)  
 [11] OpenAI sees better margins on business sales. [https://fortune.com/2025/12/21/openai-compute-margins-revenue-chatgpt-earnings-sam-altman/](https://fortune.com/2025/12/21/openai-compute-margins-revenue-chatgpt-earnings-sam-altman/)  
 [12] Introducing the Teen Safety Blueprint. [https://openai.com/index/introducing-the-teen-safety-blueprint/](https://openai.com/index/introducing-the-teen-safety-blueprint/)  
 [13] New funding to build towards AGI. [https://openai.com/index/march-funding-updates/](https://openai.com/index/march-funding-updates/)  
 [14] OpenAI is reportedly trying to raise $100B at an $830B valuation. [https://techcrunch.com/2025/12/19/openai-is-reportedly-trying-to-raise-100b-at-an-830b-valuation/](https://techcrunch.com/2025/12/19/openai-is-reportedly-trying-to-raise-100b-at-an-830b-valuation/)  
 [15] OpenAI and NVIDIA announce strategic partnership to deploy 10 gigawatts of NVIDIA systems. [https://openai.com/index/openai-nvidia-systems-partnership/](https://openai.com/index/openai-nvidia-systems-partnership/)
--- a/agent选型/E2B.md
+++ b/agent选型/E2B.md
--- a/agent选型/MiroFlow.md
+++ b/agent选型/MiroFlow.md
@ -0,0 +1,230 @@
 <div align="center">
  <img src="docs/mkdocs/docs/assets/miroflow_logo.png" width="45%" alt="MiroFlow" />
 </div>
 <br> 
 <div align="center">
 [![文档](https://img.shields.io/badge/Documentation-4285F4?style=for-the-badge&logo=gitbook&logoColor=white)](https://miromindai.github.io/MiroFlow/)
 [![演示](https://img.shields.io/badge/Demo-FFB300?style=for-the-badge&logo=airplayvideo&logoColor=white)](https://dr.miromind.ai/)
 [![模型](https://img.shields.io/badge/Models-5EDDD2?style=for-the-badge&logo=huggingface&logoColor=ffffff&labelColor)](https://huggingface.co/collections/miromind-ai/mirothinker-v02-68af084a18035f57b17cd902)
 [![数据](https://img.shields.io/badge/Data-0040A1?style=for-the-badge&logo=huggingface&logoColor=ffffff&labelColor)](https://huggingface.co/datasets/miromind-ai/MiroVerse-v0.1)
 [![博客](https://img.shields.io/badge/Website-4285F4?style=for-the-badge&logo=google-chrome&logoColor=white)](https://miromind.ai/)
 [![GITHUB](https://img.shields.io/badge/Github-24292F?style=for-the-badge&logo=github&logoColor=white)](https://github.com/MiroMindAI)
 [![DISCORD](https://img.shields.io/badge/Discord-5865F2?style=for-the-badge&logo=discord&logoColor=white)](https://discord.com/invite/GPqEnkzQZd)
 [![微信](https://img.shields.io/badge/WeChat-07C160?style=for-the-badge&logo=wechat&logoColor=white)](https://huggingface.co/datasets/miromind-ai/MiroFlow-Benchmarks/resolve/main/assets/wechat.png)
 [![小红书](https://img.shields.io/badge/RedNote-FF2442?style=for-the-badge&logo=revoltdotchat&logoColor=white)](https://www.xiaohongshu.com/user/profile/5e353bd80000000001000239)
 </div>
 <div align="center">
 ### 🚀 [Try our Demo!](https://dr.miromind.ai/)｜[English](README.md)｜[日本語](README_ja.md)
 </div>
 <div align="center">
  <img width="100%" alt="image" src="docs/mkdocs/docs/assets/futurex-09-12.png" />
 </div>
 ---
 这个仓库是MiroMind研究智能体项目的官方开源仓库。它是一个高性能、完全开源的研究智能体系统，旨在执行多步骤的互联网深度研究，用于解决复杂问题（例如：进行未来事件预测）。该项目目前包含四个核心组件：
 - 🤖 MiroFlow：一个开源研究智能体框架，在代表性基准（如 FutureX、GAIA、HLE、xBench-DeepSearch、BrowserComp）上实现了可复现的最高性能（代码详见本仓库）。动手尝试一下 [[5分钟快速上手]](#-5分钟快速开始)。
 - 🤔 MiroThinker：一个开源智能体基座模型，原生支持工具辅助推理。详见 [MiroThinker](https://github.com/MiroMindAI/mirothinker)。
 - 📊 MiroVerse：14.7万条高质量开源训练数据，用于研究智能体训练。详见 [MiroVerse](https://huggingface.co/datasets/miromind-ai/MiroVerse-v0.1)。
 - 🚧 MiroTrain / MiroRL：支持研究智能体模型稳定高效训练的基础设施。详见 [MiroTrain](https://github.com/MiroMindAI/MiroTrain) / [MiroRL](https://github.com/MiroMindAI/MiroRL)。
 ---
 ## 📋 目录
 - 📰 [最近更新](#-最近更新)
 - 🚀 [5分钟快速上手](#-5分钟快速上手)
 - 🤖 [什么是 MiroFlow？](#-什么是-miroflow)
 - 🌟 [核心亮点](#-核心亮点)
 - ✨ [基准测试性能](#-基准测试性能)
 - 🔧 [支持的模型与工具](#-支持的模型与工具)
 - ❓ [常见问题](#-常见问题)
 - 🤝 [贡献](#-贡献)
 - 📄 [许可证](#-许可证)
 - 🙏 [致谢](#-致谢)
 ---
 ## 📰 最近更新
 - **[2025-09-15]**: 🎉🎉 MiroFlow v0.3：简化仓库代码架构，提升基准测试表现，使 GPT-5 的未来事件预测准确率提高 11%。MiroFlow 现已在未来预测基准中排名第一。详见 [FutureX](https://futurex-ai.github.io/)。
 - **[2025-08-27]**: **MiroFlow v0.2**：在多个重要的智能体基准测试上达到最高性能，且这些性能均可通过本仓库代码复现，包括 HLE (27.2%)、HLE-Text-Only (29.5%)、BrowserComp-EN (33.2%)、BrowserComp-ZH (47.1%)、xBench-DeepSearch (72.0%)。
 - **[2025-08-26]**: 发布了 [GAIA 验证轨迹](docs/public_trace.md) (73.94% pass@1) 和用于本地部署的 [Gradio 演示](https://github.com/MiroMindAI/MiroThinker/tree/main/apps/gradio-demo)。
 - **[2025-08-08]**: **MiroFlow v0.1**：研究智能体框架首次完整开源发布。
 ---
 ## 🚀 5分钟快速上手
 ### 📋 前置条件
 - **Python**: 3.12 或更高版本
 - **包管理器**: [`uv`](https://docs.astral.sh/uv/)
 - **操作系统**: Linux, macOS
 ## ⚡ 快速设置
 **示例**: 带文档处理能力的智能文档分析。
 ```bash
 # 1. 克隆并设置
 git clone https://github.com/MiroMindAI/MiroFlow && cd MiroFlow
 uv sync
 # 2. 配置 API 密钥
 cp .env.template .env
 # 编辑 .env 并添加您的 OPENROUTER_API_KEY
 # 3. 运行您的第一个智能体
 uv run main.py trace --config_file_name=agent_quickstart_reading --task="What is the first country listed in the XLSX file that have names starting with Co?" --task_file_name="data/FSI-2023-DOWNLOAD.xlsx"
 ```
 🎉 **预期输出**: 您的智能体应该返回 **\boxed{Congo Democratic Republic}** 😊
 > **💡 提示**: 如果遇到问题，请检查您的 API 密钥是否在 `.env` 文件中正确设置，以及是否安装了所有依赖项。
 ---
 ## 🤖 什么是 MiroFlow？
 MiroFlow 是一个高性能、模块化的研究智能体框架，能够在复杂推理任务（例如：未来事件预测）上实现最先进的效果。它支持多轮对话、高度集成的工具生态，以及分层子智能体调度，确保任务最优完成。了解更多请参见我们的 [智能体框架介绍](https://miromindai.github.io/MiroFlow/core_concepts/)。
 <div align="center">
  <img src="docs/mkdocs/docs/assets/miroflow_architecture.png" width="100%" alt="MiroFlow Architecture">
 </div>
 <table align="center" style="border: 1px solid #ccc; border-radius: 8px; padding: 12px; background-color: #f9f9f9; width: 60%;">
  <tr>
    <td style="text-align: center; padding: 10px;">
      <strong>Research Assistant Demo</strong> - 
      <span style="font-size: 0.9em; color: #555;">阅读CVPR 2025最佳论文并给出研究方向建议</span>
      <br>
      <video src="https://github.com/user-attachments/assets/99ed3172-6e9a-467a-9ccb-be45957fe2e4"
             controls muted preload="metadata"
             width="50%" height="50%"
      </video>
    </td>
  </tr>
 </table>
 ---
 ## 🌟 核心亮点
 - **可复现的最先进性能**：在 [多个重要的智能体基准测试](https://miromindai.github.io/MiroFlow/evaluation_overview/) 上排名第一，包括 FutureX、GAIA、HLE、xBench-DeepSearch、BrowserComp。  
 - **高并发与高可靠性**：具备健壮的并发管理和容错设计，MiroFlow 能高效处理受限速 API 和不稳定网络，确保顺畅的数据收集和复杂任务的可靠执行。  
 - **高性价比部署**：基于开源的 MiroThinker 模型，MiroFlow 可以在单张 RTX 4090 上运行研究智能体服务，整个栈依赖于免费开源工具，便于部署、扩展和复现，详见 [MiroThinker](https://github.com/MiroMindAI/mirothinker)。
 ---
 ## 🔧 支持的模型与工具
 - **模型**: GPT, Claude, Gemini, Qwen, MiroThinker
 - **工具**: [音频转录](https://github.com/MiroMindAI/MiroFlow/blob/miroflow-v0.3/src/tool/mcp_servers/audio_mcp_server.py), [Python](https://github.com/MiroMindAI/MiroFlow/blob/miroflow-v0.3/src/tool/mcp_servers/python_server.py), [文件阅读](https://github.com/MiroMindAI/MiroFlow/blob/miroflow-v0.3/src/tool/mcp_servers/reading_mcp_server.py), [推理](https://github.com/MiroMindAI/MiroFlow/blob/miroflow-v0.3/src/tool/mcp_servers/reasoning_mcp_server.py), [谷歌搜索](https://github.com/MiroMindAI/MiroFlow/blob/miroflow-v0.3/src/tool/mcp_servers/searching_mcp_server.py), [视觉问答](https://github.com/MiroMindAI/MiroFlow/blob/miroflow-v0.3/src/tool/mcp_servers/vision_mcp_server.py), E2B沙盒
 ---
 ### ✨ 基准测试性能
 截至 2025 年 9 月 10 日，MiroFlow 在 **FutureX 基准排行榜** 上排名第一，使 GPT-5 的未来预测准确率提高了 **11%**。
 <div align="center">
  <img width="100%" alt="image" src="docs/mkdocs/docs/assets/futurex-09-12.png" />
 </div>
 我们在一系列基准测试上对 MiroFlow 进行了评估，包括 **GAIA**、**HLE**、**BrowseComp** 和 **xBench-DeepSearch**，并取得了目前最好的的结果。
 <img width="100%" alt="image" src="docs/mkdocs/docs/assets/benchmark_results.png" />
 | 模型/框架 | GAIA Val | HLE | HLE-Text | BrowserComp-EN | BrowserComp-ZH | xBench-DeepSearch |
 |-----------|----------|-----|----------|----------------|----------------|-------------------|
 | **MiroFlow** | **82.4%** | **27.2%** | 29.5% | 33.2% | **47.1%** | **72.0%** |
 | OpenAI Deep Research | 67.4% | 26.6% | - | **51.5%** | 42.9% | - |
 | Gemini Deep Research | - | 26.9% | - | - | - | 50+% |
 | Kimi Researcher | - | - | 26.9% | - | - | 69.0% |
 | WebSailor-72B | 55.4% | - | - | - | 30.1% | 55.0% |
 | Manus | 73.3% | - | - | - | - | - |
 | DeepSeek v3.1 | - | - | **29.8%** | - | - | 71.2% |
 按照我们的详细指南在我们的[基准测试文档](https://miromindai.github.io/MiroFlow/evaluation_overview/)中重现基准测试结果
 ---
 ## ❓ 常见问题
 <details>
 <summary><strong>我需要什么 API 密钥？</strong></summary>
 <br>
 您只需要一个 OpenRouter API 密钥即可开始。OpenRouter 通过单一 API 提供对多个语言模型的访问。
 </details>
 <details>
 <summary><strong>除了 OpenRouter，我可以使用其他语言模型吗？</strong></summary>
 <br>
 是的，MiroFlow 支持各种语言模型。查看我们的文档了解配置详情。
 </details>
 <details>
 <summary><strong>如何重现基准测试结果？</strong></summary>
 <br>
 按照我们详细的<a href="https://miromindai.github.io/MiroFlow/evaluation_overview/">基准测试文档</a>获取逐步重现指南。
 </details>
 <details>
 <summary><strong>是否有商业支持？</strong></summary>
 <br>
 如需商业咨询和企业支持，请通过我们的<a href="https://miromind.ai/">官方网站</a>联系我们。
 </details>
 ---
 ## 🤝 贡献
 我们欢迎社区的贡献！无论您是修复错误、添加功能还是改进文档，您的帮助都是受欢迎的。
 - 📋 **问题反馈**: 通过 [GitHub Issues](https://github.com/MiroMindAI/MiroFlow/issues) 报告错误或请求功能。
 - 🔀 **拉取请求**: 通过拉取请求提交改进。
 - 💬 **讨论**: 加入我们的 [Discord 社区](https://discord.com/invite/GPqEnkzQZd) 进行问题讨论。
 ## 📄 许可证
 本项目在 Apache License 2.0 下许可。
 ## 🙏 致谢
 - **基准测试贡献者** 提供了综合评估数据集。
 - **开源社区** 提供了使这一切成为可能的工具和库。
 我们感谢所有帮助 MiroFlow 变得更好的贡献者：
 <a href="https://github.com/MiroMindAI/MiroFlow/graphs/contributors">
  <img src="https://contrib.rocks/image?repo=MiroMindAI/MiroFlow" />
 </a>
 加入我们的社区，帮助我们构建 AI 智能体的未来！
 ## 参考文献
 技术报告即将发布！
 ```
@misc{2025mirothinker,
    title={MiroFlow: A High-Performance Open-Source Research Agent Framework},
    author={MiroMind AI Team},
    howpublished={\url{https://github.com/MiroMindAI/MiroFlow}},
    year={2025}
 }
 ```
 [![Star History Chart](https://api.star-history.com/svg?repos=MiroMindAI/MiroFlow&type=Date)](https://star-history.com/#MiroMindAI/MiroFlow&Date)