{
  "kind": "tag",
  "slug": "multimodal-ai-topic",
  "id": 17730948119041167,
  "name": "Multimodal AI",
  "type": "topic",
  "aliases": [
    "audio-language models",
    "multimodal_ai",
    "multimodal AI",
    "vision_language_models",
    "vision-language models"
  ],
  "diffbot_id": null,
  "story_count_14d": 889,
  "cooccurring_tags": [
    {
      "id": 17723038993834764,
      "slug": "artificial-intelligence-topic",
      "name": "Artificial Intelligence",
      "type": "topic",
      "count": 466
    },
    {
      "id": 17723038994323052,
      "slug": "arxiv-organization",
      "name": "arXiv",
      "type": "organization",
      "count": 396
    },
    {
      "id": 17791452099123760,
      "slug": "llm-evals-topic",
      "name": "LLM Evals",
      "type": "topic",
      "count": 186
    },
    {
      "id": 17791452097663640,
      "slug": "ai-agents-topic",
      "name": "AI Agents",
      "type": "topic",
      "count": 140
    },
    {
      "id": 17791452099463022,
      "slug": "reasoning-models-topic",
      "name": "Reasoning Models",
      "type": "topic",
      "count": 82
    },
    {
      "id": 17791452103823983,
      "slug": "ai-infrastructure-topic",
      "name": "AI Infrastructure",
      "type": "topic",
      "count": 72
    },
    {
      "id": 17791452102628180,
      "slug": "inference-optimization-topic",
      "name": "Inference Optimization",
      "type": "topic",
      "count": 52
    },
    {
      "id": 17731007202817379,
      "slug": "zhipu-ai-organization",
      "name": "Zhipu AI",
      "type": "organization",
      "count": 48
    },
    {
      "id": 17791452100779851,
      "slug": "document-ai-topic",
      "name": "Document AI",
      "type": "topic",
      "count": 48
    },
    {
      "id": 17730931225185240,
      "slug": "tool-use-topic",
      "name": "Tool Use",
      "type": "topic",
      "count": 38
    },
    {
      "id": 17731005482466606,
      "slug": "model-security-topic",
      "name": "Model Security",
      "type": "topic",
      "count": 34
    },
    {
      "id": 17730928368970588,
      "slug": "synthetic-data-topic",
      "name": "Synthetic Data",
      "type": "topic",
      "count": 30
    },
    {
      "id": 17733518056319805,
      "slug": "github-organization",
      "name": "GitHub",
      "type": "organization",
      "count": 30
    },
    {
      "id": 17733572989242363,
      "slug": "ion-organization",
      "name": "ION",
      "type": "organization",
      "count": 19
    },
    {
      "id": 17791452098785214,
      "slug": "multi-agent-systems-topic",
      "name": "Multi-Agent Systems",
      "type": "topic",
      "count": 18
    }
  ],
  "top_sources": [
    {
      "name": "arxiv-multimodal-document-ai",
      "slug": "arxiv-multimodal-document-ai",
      "count": 144
    },
    {
      "name": "arxiv-ai-agents-tool-use",
      "slug": "arxiv-ai-agents-tool-use",
      "count": 131
    },
    {
      "name": "zhipu-ai-release-notes",
      "slug": "zhipu-ai-release-notes",
      "count": 48
    },
    {
      "name": "arxiv-model-efficiency-engineering",
      "slug": "arxiv-model-efficiency-engineering",
      "count": 37
    },
    {
      "name": "arxiv-rag-search-knowledge",
      "slug": "arxiv-rag-search-knowledge",
      "count": 36
    },
    {
      "name": "arxiv-frontier-methods-select",
      "slug": "arxiv-frontier-methods-select",
      "count": 34
    },
    {
      "name": "huggingface-nlp-blog",
      "slug": "huggingface-nlp-blog",
      "count": 12
    },
    {
      "name": "minimax-ai-news",
      "slug": "minimax-ai-news",
      "count": 10
    },
    {
      "name": "moonshot-ai-kimi-blog",
      "slug": "moonshot-ai-kimi-blog",
      "count": 8
    },
    {
      "name": "surge-ai-blog",
      "slug": "surge-ai-blog",
      "count": 6
    }
  ],
  "recent_stories": [
    {
      "id": 1780315021147458089,
      "slug": "deepimagesearch-benchmarking-multimodal-agents-for-context-a-7458089",
      "headline": "DeepImageSearch: Benchmarking Multimodal Agents for Context-Aware Image Retrieval in Visual Histories",
      "source": "arxiv-rag-search-knowledge",
      "home_domain": "engineering-technology",
      "published_date": "2026-06-01"
    },
    {
      "id": 1780315020846422031,
      "slug": "deepimagesearch-benchmarking-multimodal-agents-for-context-a-6422031",
      "headline": "DeepImageSearch: Benchmarking Multimodal Agents for Context-Aware Image Retrieval in Visual Histories",
      "source": "arxiv-rag-search-knowledge",
      "home_domain": "engineering-technology",
      "published_date": "2026-06-01"
    },
    {
      "id": 1780312489482545838,
      "slug": "multimodal-fusion-via-self-consistent-task-gradient-fields-2545838",
      "headline": "Multimodal Fusion via Self-Consistent Task-Gradient Fields",
      "source": "arxiv-multimodal-document-ai",
      "home_domain": "engineering-technology",
      "published_date": "2026-06-01"
    },
    {
      "id": 1780312489682474385,
      "slug": "multimodal-fusion-via-self-consistent-task-gradient-fields-2474385",
      "headline": "Multimodal Fusion via Self-Consistent Task-Gradient Fields",
      "source": "arxiv-multimodal-document-ai",
      "home_domain": "engineering-technology",
      "published_date": "2026-06-01"
    },
    {
      "id": 1780314580790791885,
      "slug": "lightweight-sar-ship-detection-via-contrastive-distillation-0791885",
      "headline": "Lightweight SAR Ship Detection via Contrastive Distillation",
      "source": "arxiv-multimodal-document-ai",
      "home_domain": "engineering-technology",
      "published_date": "2026-06-01"
    },
    {
      "id": 1780314581153984853,
      "slug": "lightweight-sar-ship-detection-via-contrastive-distillation-3984853",
      "headline": "Lightweight SAR Ship Detection via Contrastive Distillation",
      "source": "arxiv-multimodal-document-ai",
      "home_domain": "engineering-technology",
      "published_date": "2026-06-01"
    },
    {
      "id": 1780314510094664086,
      "slug": "generating-reports-or-repeating-templates-measuring-and-miti-4664086",
      "headline": "Generating Reports or Repeating Templates? Measuring and Mitigating Template Collapse in 3D CT Report Generation",
      "source": "arxiv-rag-search-knowledge",
      "home_domain": "engineering-technology",
      "published_date": "2026-06-01"
    },
    {
      "id": 1780314510403570095,
      "slug": "generating-reports-or-repeating-templates-measuring-and-miti-3570095",
      "headline": "Generating Reports or Repeating Templates? Measuring and Mitigating Template Collapse in 3D CT Report Generation",
      "source": "arxiv-rag-search-knowledge",
      "home_domain": "engineering-technology",
      "published_date": "2026-06-01"
    },
    {
      "id": 1780314508587875497,
      "slug": "consensus-multi-agent-collaboration-for-multimodal-sensing-7875497",
      "headline": "ConSensus: Multi-Agent Collaboration for Multimodal Sensing",
      "source": "arxiv-ai-agents-tool-use",
      "home_domain": "engineering-technology",
      "published_date": "2026-06-01"
    },
    {
      "id": 1780314508957555139,
      "slug": "consensus-multi-agent-collaboration-for-multimodal-sensing-7555139",
      "headline": "ConSensus: Multi-Agent Collaboration for Multimodal Sensing",
      "source": "arxiv-ai-agents-tool-use",
      "home_domain": "engineering-technology",
      "published_date": "2026-06-01"
    },
    {
      "id": 1780314143267971220,
      "slug": "probing-collision-grounding-in-vision-language-models-for-sa-7971220",
      "headline": "Probing Collision Grounding in Vision-Language Models for Safe Human-Robot Collaboration",
      "source": "arxiv-rag-search-knowledge",
      "home_domain": "engineering-technology",
      "published_date": "2026-06-01"
    },
    {
      "id": 1780314142923361003,
      "slug": "probing-collision-grounding-in-vision-language-models-for-sa-3361003",
      "headline": "Probing Collision Grounding in Vision-Language Models for Safe Human-Robot Collaboration",
      "source": "arxiv-rag-search-knowledge",
      "home_domain": "engineering-technology",
      "published_date": "2026-06-01"
    }
  ]
}