{
  "source": "arxiv",
  "fetched_at": "2026-05-30T12:07:49.625Z",
  "count": 50,
  "items": [
    {
      "id": "arxiv_2605.30353v1",
      "title": "Physics Is All You Need? A Case Study in Physicist-Supervised AI Development of Scientific Software",
      "url": "http://arxiv.org/abs/2605.30353v1",
      "summary": "Are AI agents tools, co-authors, or researchers? We present a quantified case study ($N=1$): a physicist supervising an AI coding agent (Claude Code, Sonnet and Opus models) over 12 work days and 57 s",
      "authors": [
        "Nhat-Minh Nguyen"
      ],
      "published": "2026-05-28T17:59:59Z"
    },
    {
      "id": "arxiv_2605.30351v1",
      "title": "VideoMLA: Low-Rank Latent KV Cache for Minute-Scale Autoregressive Video Diffusion",
      "url": "http://arxiv.org/abs/2605.30351v1",
      "summary": "Long-rollout causal video diffusion has converged on a fixed-size sliding-window KV cache, with recent progress innovating within this layout by changing which tokens occupy the window or how their po",
      "authors": [
        "Hidir Yesiltepe",
        "Jiazhen Hu",
        "Tuna Han Salih Meral",
        "Adil Kaan Akan"
      ],
      "published": "2026-05-28T17:59:57Z"
    },
    {
      "id": "arxiv_2605.30348v1",
      "title": "LLMSurgeon: Diagnosing Data Mixture of Large Language Models",
      "url": "http://arxiv.org/abs/2605.30348v1",
      "summary": "The pretraining data mixture of Large Language Models (LLMs) constitutes their \"digital DNA\", shaping model behaviors, capabilities, and failure modes. Yet this composition is rarely disclosed, making",
      "authors": [
        "Yaxin Luo",
        "Jiacheng Cui",
        "Xiaohan Zhao",
        "Xinyi Shang"
      ],
      "published": "2026-05-28T17:59:53Z"
    },
    {
      "id": "arxiv_2605.30345v1",
      "title": "SchGen: PCB Schematic Generation with Semantic-Grounded Code Representations",
      "url": "http://arxiv.org/abs/2605.30345v1",
      "summary": "Printed circuit board (PCB) schematic design defines nearly all electronic hardware, but it remains manual and expertise-intensive. While generative AI has advanced digital and analog IC design, PCB s",
      "authors": [
        "Qinpei Luo",
        "Ruichun Ma",
        "Xinyu Zhang",
        "Lili Qiu"
      ],
      "published": "2026-05-28T17:59:50Z"
    },
    {
      "id": "arxiv_2605.30344v1",
      "title": "Tiny but Trusted: Efficient Vision-Language Reasoning for Time-Series Anomaly Detection",
      "url": "http://arxiv.org/abs/2605.30344v1",
      "summary": "Recent advances in Vision-Language Models (VLMs) have achieved impressive performance across many tasks, yet prior studies report unsatisfactory performance when applying large language or multimodal ",
      "authors": [
        "Xiaona Zhou",
        "Muntasir Wahed",
        "Tianjiao Yu",
        "Constantin Brif"
      ],
      "published": "2026-05-28T17:59:50Z"
    },
    {
      "id": "arxiv_2605.30343v1",
      "title": "Unlocking the Working Memory of Large Language Models for Latent Reasoning",
      "url": "http://arxiv.org/abs/2605.30343v1",
      "summary": "To improve the reasoning capabilities of large language models, test-time compute is typically scaled by generating intermediate tokens before the final answer. However, this couples reasoning to auto",
      "authors": [
        "Lukas Aichberger",
        "Sepp Hochreiter"
      ],
      "published": "2026-05-28T17:59:49Z"
    },
    {
      "id": "arxiv_2605.30341v1",
      "title": "GPIC: A Giant Permissive Image Corpus for Visual Generation",
      "url": "http://arxiv.org/abs/2605.30341v1",
      "summary": "Studying scalable methods for visual generative modeling requires large, accessible, and stable datasets. We introduce GPIC, a Giant Permissive Image Corpus of approximately 28 trillion pixels. GPIC c",
      "authors": [
        "Keshigeyan Chandrasegaran",
        "Kyle Sargent",
        "Suchir Agarwal",
        "Michael Jang"
      ],
      "published": "2026-05-28T17:59:26Z"
    },
    {
      "id": "arxiv_2605.30335v1",
      "title": "Locally Coherent, Globally Incoherent: Bounding Compositional Incoherence in Multi-Component LLM Agents",
      "url": "http://arxiv.org/abs/2605.30335v1",
      "summary": "Multi-component LLM agents assemble probabilistic claims from components that each see only part of a joint problem; the composition can violate basic probability axioms even when every component is l",
      "authors": [
        "Anany Kotawala"
      ],
      "published": "2026-05-28T17:58:55Z"
    },
    {
      "id": "arxiv_2605.30334v1",
      "title": "Demystifying Data Organization for Enhanced LLM Training",
      "url": "http://arxiv.org/abs/2605.30334v1",
      "summary": "Large Language Models (LLMs) have revolutionized various fields, yet their training efficiency is heavily reliant on effective data curation. While data selection has been widely studied, the strategi",
      "authors": [
        "Yalun Dai",
        "Yangyu Huang",
        "Tongshen Yang",
        "Yonghan Wang"
      ],
      "published": "2026-05-28T17:58:53Z"
    },
    {
      "id": "arxiv_2605.30327v1",
      "title": "Reasoning with Sampling: Cutting at Decision Points",
      "url": "http://arxiv.org/abs/2605.30327v1",
      "summary": "Frontier reasoning models are produced by posttraining base language models with reinforcement learning. Recent work has challenged this by showing that sampling from a sharpened version of the base m",
      "authors": [
        "Felix Zhou",
        "Anay Mehrotra",
        "Quanquan C. Liu"
      ],
      "published": "2026-05-28T17:57:32Z"
    },
    {
      "id": "arxiv_2605.30326v1",
      "title": "RoboWits: Unexpected Challenges for Robotic Creative Problem Solving",
      "url": "http://arxiv.org/abs/2605.30326v1",
      "summary": "The ability to reason, adapt, and creatively solve problems under unexpected challenges is essential for robots operating in real-world environments. However, current robotic benchmarks primarily emph",
      "authors": [
        "Chunru Lin",
        "Hongxin Zhang",
        "Fenghao Yu",
        "Zhehuan Chen"
      ],
      "published": "2026-05-28T17:57:15Z"
    },
    {
      "id": "arxiv_2605.30324v1",
      "title": "On Language Generation in the Limit with Bounded Memory",
      "url": "http://arxiv.org/abs/2605.30324v1",
      "summary": "We study language generation in the limit under bounded memory. In this task, a learner observes examples from an unknown target language one at a time and must eventually output only new valid exampl",
      "authors": [
        "Jon Kleinberg",
        "Anay Mehrotra",
        "Amin Saberi",
        "Grigoris Velegkas"
      ],
      "published": "2026-05-28T17:57:03Z"
    },
    {
      "id": "arxiv_2605.30323v1",
      "title": "In-Context Reward Adaptation for Robust Preference Modeling",
      "url": "http://arxiv.org/abs/2605.30323v1",
      "summary": "Reinforcement Learning from Human Feedback (RLHF) typically relies on static reward models to align Large Language Models with human preferences. However, human values are inherently diverse and heter",
      "authors": [
        "Zhenyu Sun",
        "Zheng Xu",
        "Ermin Wei"
      ],
      "published": "2026-05-28T17:56:54Z"
    },
    {
      "id": "arxiv_2605.30322v1",
      "title": "Gram: Assessing sabotage propensities via automated alignment auditing",
      "url": "http://arxiv.org/abs/2605.30322v1",
      "summary": "We introduce Gram, an automated alignment auditing framework to assess the propensity of AI agents to engage in sabotage. We evaluate Gemini models across 17 simulated agentic deployment scenarios tha",
      "authors": [
        "David Lindner",
        "Victoria Krakovna",
        "Sebastian Farquhar"
      ],
      "published": "2026-05-28T17:56:18Z"
    },
    {
      "id": "arxiv_2605.30319v1",
      "title": "Improved Guarantees for Heterogeneous Treatment-Effect Estimation via Matrix Completion",
      "url": "http://arxiv.org/abs/2605.30319v1",
      "summary": "A central goal of modern causal inference is estimating heterogeneous treatment effects to answer questions like \"how does an intervention affect each unit,\" rather than only on average. We study this",
      "authors": [
        "Anay Mehrotra",
        "Phuc Tran",
        "Van H. Vu",
        "Manolis Zampetakis"
      ],
      "published": "2026-05-28T17:55:23Z"
    },
    {
      "id": "arxiv_2605.30318v1",
      "title": "Before the Shutter: Aesthetic and Actionable Portrait Photography Planning in 3D Scenes",
      "url": "http://arxiv.org/abs/2605.30318v1",
      "summary": "Portrait photography is largely decided before the shutter opens: the subject's pose, the camera configuration, and the lighting devices must be coordinated within the surrounding 3D scene. In contras",
      "authors": [
        "Ruixiang Jiang",
        "Chang Wen Chen"
      ],
      "published": "2026-05-28T17:55:09Z"
    },
    {
      "id": "arxiv_2605.30311v1",
      "title": "Archon: A Unified Multimodal Model for Holistic Digital Human Generation",
      "url": "http://arxiv.org/abs/2605.30311v1",
      "summary": "Digital humans are fundamental to immersive interaction, yet creating a unified model for holistic modalities, including text, audio, motion, and visual content, remains an open challenge. In this pap",
      "authors": [
        "Chong Bao",
        "Shichen Liu",
        "Lijun Yu",
        "David Futschik"
      ],
      "published": "2026-05-28T17:53:27Z"
    },
    {
      "id": "arxiv_2605.30310v1",
      "title": "City-Mesh3R: Simulation-Ready City-Scale 3D Mesh Reconstruction from Multi-View Images",
      "url": "http://arxiv.org/abs/2605.30310v1",
      "summary": "City-scale 3D surface reconstruction from multiview images for downstream 3D simulation, poses highly challenging problems due to the scale and complexity of urban scenes. Existing city-scale 3D recon",
      "authors": [
        "Sayan Paul",
        "Sourav Ghosh",
        "Siddharth Katageri",
        "Soumyadip Maity"
      ],
      "published": "2026-05-28T17:53:26Z"
    },
    {
      "id": "arxiv_2605.30295v1",
      "title": "MedCase-Structured: A Text-to-FHIR Dataset for Benchmarking Diagnostic Reasoning in Clinically Realistic EHR Settings",
      "url": "http://arxiv.org/abs/2605.30295v1",
      "summary": "Large language models (LLMs) show promise for clinical reasoning and decision support, but evaluation in realistic, electronic health record-congruent settings remains limited. Existing benchmarks oft",
      "authors": [
        "Valentina Bui Muti",
        "Eugénie Dulout",
        "Ziquan Fu"
      ],
      "published": "2026-05-28T17:42:43Z"
    },
    {
      "id": "arxiv_2605.30290v1",
      "title": "Self-Trained Verification for Training- and Test-Time Self-Improvement",
      "url": "http://arxiv.org/abs/2605.30290v1",
      "summary": "Self-improvement at scale has been a longstanding goal for reasoning models, and there are two natural places to do it: at test time, through verification-refinement (V-R) loops; and at training time,",
      "authors": [
        "Chen Henry Wu",
        "Aditi Raghunathan"
      ],
      "published": "2026-05-28T17:40:45Z"
    },
    {
      "id": "arxiv_2605.30288v1",
      "title": "MIRA: Mid-training Rubric Anchoring for Source-Aware Data Selection",
      "url": "http://arxiv.org/abs/2605.30288v1",
      "summary": "Mid-training has become an important stage in modern LLM development, using large-scale curated mixtures to strengthen capabilities before final post-training. Its data selection problem is distinct: ",
      "authors": [
        "Haowen Wang",
        "Yaxin Du",
        "Jian Yang",
        "Jiajun Wu"
      ],
      "published": "2026-05-28T17:40:40Z"
    },
    {
      "id": "arxiv_2605.30284v1",
      "title": "ProjectionBench: Evaluating Scientific Hypothesis Generation in LLMs Under Progressive Information Disclosure",
      "url": "http://arxiv.org/abs/2605.30284v1",
      "summary": "Scientific discovery is an inherently creative and uncertain process, requiring reasoning beyond the recall of known knowledge. While many benchmarks have been proposed to evaluate large language mode",
      "authors": [
        "A. J. Lew",
        "Y. Cao",
        "M. J. Buehler"
      ],
      "published": "2026-05-28T17:38:19Z"
    },
    {
      "id": "arxiv_2605.30283v1",
      "title": "mcp-proto-okn: Natural-language access to open scientific knowledge graphs through the Model Context Protocol",
      "url": "http://arxiv.org/abs/2605.30283v1",
      "summary": "MCP Server Proto-OKN (mcp-proto-okn) is a Python-based Model Context Protocol server that enables AI assistants to discover, inspect, query and integrate scientific knowledge graphs through natural la",
      "authors": [
        "Peter W. Rose",
        "Benjamin M. Good",
        "Amanda M. Saravia-Butler",
        "Charlotte A. Nelson"
      ],
      "published": "2026-05-28T17:37:54Z"
    },
    {
      "id": "arxiv_2605.30280v1",
      "title": "Qwen-VLA: Unifying Vision-Language-Action Modeling across Tasks, Environments, and Robot Embodiments",
      "url": "http://arxiv.org/abs/2605.30280v1",
      "summary": "Embodied intelligence is often studied through specialized models for individual tasks such as manipulation or navigation, resulting in fragmented capabilities and limited generalization across tasks,",
      "authors": [
        "Qiuyue Wang",
        "Mingsheng Li",
        "Jian Guan",
        "Jinhui Ye"
      ],
      "published": "2026-05-28T17:36:31Z"
    },
    {
      "id": "arxiv_2605.30274v1",
      "title": "Loong: A Human-Like Long Document Translation Agent with Observe-and-Act Adaptive Context Selection",
      "url": "http://arxiv.org/abs/2605.30274v1",
      "summary": "Document-level translation remains one of the most challenging tasks for large language models, which are constrained by limited context windows that impede global cohesion, while simultaneously suffe",
      "authors": [
        "Yutong Wang",
        "Xuebo Liu",
        "Derek F. Wong",
        "Zhilin Li"
      ],
      "published": "2026-05-28T17:32:25Z"
    },
    {
      "id": "arxiv_2605.30273v1",
      "title": "LLUMI: Improving LLM Writing Assistance for Mental Health Support with Online Community Feedback",
      "url": "http://arxiv.org/abs/2605.30273v1",
      "summary": "Large language models (LLMs) show promise in generating supportive responses for mental health queries, but improving their usefulness, empathy, and safety often requires substantial compute, expert i",
      "authors": [
        "Jiwon Kim",
        "Maya Ajit",
        "Sherry Gong",
        "Soorya Ram Shimgekar"
      ],
      "published": "2026-05-28T17:30:57Z"
    },
    {
      "id": "arxiv_2605.30268v1",
      "title": "PhyGenHOI: Physically-Aware 4D Generation of Dynamic Human-Object Interactions",
      "url": "http://arxiv.org/abs/2605.30268v1",
      "summary": "We address the task of generating physically accurate and visually faithful 4D Human-Object Interaction (HOI). Given a static 3D human and target object represented as 3D Gaussian Splats (3DGS), our g",
      "authors": [
        "Omer Benishu",
        "Gal Fiebelman",
        "Sagie Benaim"
      ],
      "published": "2026-05-28T17:29:19Z"
    },
    {
      "id": "arxiv_2605.30260v1",
      "title": "How LoRA Remembers? A Parametric Memory Law for LLM Finetuning",
      "url": "http://arxiv.org/abs/2605.30260v1",
      "summary": "Large Language Models (LLMs) must continuously learn and update knowledge to remain effective in dynamic real-world environments. While Low-Rank Adaptation (LoRA) is widely used for such memory update",
      "authors": [
        "Ziwen Xu",
        "Haiwen Hong",
        "Linsong Yu",
        "Benglei Cui"
      ],
      "published": "2026-05-28T17:22:24Z"
    },
    {
      "id": "arxiv_2605.30251v1",
      "title": "Same Evidence, Different Answers: Canonical-Context On-Policy Distillation for Multi-Turn Language Models",
      "url": "http://arxiv.org/abs/2605.30251v1",
      "summary": "Large language models (LLMs) often solve a task when all instructions are given in a single prompt, but fail when the same information is revealed gradually across turns. When a clean FULL prompt and ",
      "authors": [
        "Zizhuo Lin",
        "Quanling Liu",
        "Jinsheng Quan",
        "Chao Zhang"
      ],
      "published": "2026-05-28T17:14:29Z"
    },
    {
      "id": "arxiv_2605.30244v1",
      "title": "Reinforcement Learning with Robust Rubric Rewards",
      "url": "http://arxiv.org/abs/2605.30244v1",
      "summary": "While Reinforcement Learning with Verifiable Rewards (RLVR) is effective for deterministically checkable tasks, many vision-language tasks are partially verifiable, demanding multi-criteria supervisio",
      "authors": [
        "Ya-Qi Yu",
        "Hao Wang",
        "Fangyu Hong",
        "Xiangyang Qu"
      ],
      "published": "2026-05-28T17:11:03Z"
    },
    {
      "id": "arxiv_2605.30233v1",
      "title": "Do Language Models Track Entities Across State Changes?",
      "url": "http://arxiv.org/abs/2605.30233v1",
      "summary": "Entity tracking (ET), the ability to keep track of states, is a fundamental skill that underlies complex reasoning. An increasing amount of work investigates how transformer language models (LMs) solv",
      "authors": [
        "Zilu Tang",
        "Qiao Zhao",
        "Gabriel Franco",
        "Derry Wijaya"
      ],
      "published": "2026-05-28T17:03:42Z"
    },
    {
      "id": "arxiv_2605.30231v1",
      "title": "Beyond 3D VQAs: Injecting 3D Spatial Priors into Vision-Language Models for Enhanced Geometric Reasoning",
      "url": "http://arxiv.org/abs/2605.30231v1",
      "summary": "Vision-Language Models (VLMs) often struggle with robust 3D spatial reasoning. Prevailing methods that rely on fine-tuning with 3D visual question-answering (VQA) datasets may overfit dataset-specific",
      "authors": [
        "Chun-Hsiao Yeh",
        "Shengyi Qian",
        "Manchen Wang",
        "Yi Ma"
      ],
      "published": "2026-05-28T17:00:52Z"
    },
    {
      "id": "arxiv_2605.30227v1",
      "title": "Unifying Temporal and Structural Credit Assignment in LLM-Based Multi-Agent Prompt Optimization",
      "url": "http://arxiv.org/abs/2605.30227v1",
      "summary": "While Multi-Agent Systems (MAS) empower Large Language Models to tackle complex reasoning tasks through collaborative interaction, optimizing their dynamics remains a formidable challenge due to the d",
      "authors": [
        "Wenwu Li",
        "Yuran Song",
        "Mingze Zhao",
        "Bo Jin"
      ],
      "published": "2026-05-28T16:57:57Z"
    },
    {
      "id": "arxiv_2605.30226v1",
      "title": "BORA: Bridging Offline Reinforcement Learning and Online Residual Adaptation for Real-World Dexterous VLA Models",
      "url": "http://arxiv.org/abs/2605.30226v1",
      "summary": "Vision-Language-Action (VLA) models have emerged as a promising paradigm for grounding visual-language understanding into real-world robotic manipulation. However, dexterous manipulation remains chall",
      "authors": [
        "Zhongxi Chen",
        "Yifan Han",
        "Yanming Shao",
        "Huanming Liu"
      ],
      "published": "2026-05-28T16:57:47Z"
    },
    {
      "id": "arxiv_2605.30219v1",
      "title": "When Should Models Change Their Minds? Contextual Belief Management in Large Language Models",
      "url": "http://arxiv.org/abs/2605.30219v1",
      "summary": "Long-horizon interactions require language models to manage accumulating information: when to update their state, when to preserve their state, and what to ignore. We study this challenge as \\textbf{C",
      "authors": [
        "Haoming Xu",
        "Weihong Xu",
        "Zongrui Li",
        "Mengru Wang"
      ],
      "published": "2026-05-28T16:52:04Z"
    },
    {
      "id": "arxiv_2605.30208v1",
      "title": "Automating Low-Risk Code Review at Meta: RADAR, Risk Calibration, and Review Efficiency",
      "url": "http://arxiv.org/abs/2605.30208v1",
      "summary": "AI-assisted coding tools have altered software production. At Meta, significant lines of code per human-landed diff grew by 105.9% year over year and per-developer diff volume rose 51%, with agentic A",
      "authors": [
        "Chris Adams",
        "Arjun Singh Banga",
        "Parveen Bansal",
        "Souvik Bhattacharya"
      ],
      "published": "2026-05-28T16:44:07Z"
    },
    {
      "id": "arxiv_2605.30207v1",
      "title": "Persona Conditioning of Brand Recommendations in Retrieval-Augmented Commercial Chat: A Prominence-Stratified Cross-Provider Audit",
      "url": "http://arxiv.org/abs/2605.30207v1",
      "summary": "The same prompt -- \"best CRM software\" -- reaches AI assistants from buyers in widely different contexts: a solo founder, an enterprise VP, a UK SMB owner. We audit how strongly that contextual variat",
      "authors": [
        "Will Jack",
        "Noah Lehman",
        "Keller Maloney",
        "Sarah Xu"
      ],
      "published": "2026-05-28T16:43:38Z"
    },
    {
      "id": "arxiv_2605.30201v1",
      "title": "HPO: Hysteretic Policy Optimization for Stable and Efficient Training under Sparse-Reward Regime",
      "url": "http://arxiv.org/abs/2605.30201v1",
      "summary": "We investigate a narrow but common failure mode of GRPO-style reinforcement learning in the context of sparse verifiable rewards: early updates contain more responses with negative advantages than tho",
      "authors": [
        "Mohamed Sana",
        "Nicola Piovesan",
        "Antonio De Domenico",
        "Fadhel Ayed"
      ],
      "published": "2026-05-28T16:38:21Z"
    },
    {
      "id": "arxiv_2605.30200v1",
      "title": "Double-Edged Sword or Sharp Tool? Designing and Evaluating Triadic LLM-Teacher Collaboration for K-12 Writing at Scale",
      "url": "http://arxiv.org/abs/2605.30200v1",
      "summary": "The double-edged sword of integrating Large Language Models (LLMs) requires an effective triadic collaboration mechanism among LLMs, teachers and students, especially for K-12 education. By developing",
      "authors": [
        "Canran Wang",
        "Yuwen Yang",
        "Zhen Wang",
        "Ming Ma"
      ],
      "published": "2026-05-28T16:37:00Z"
    },
    {
      "id": "arxiv_2605.30195v1",
      "title": "What drives performance in molecular MPNNs? An operator-level factorial benchmark",
      "url": "http://arxiv.org/abs/2605.30195v1",
      "summary": "Message-passing neural networks (MPNNs) are widely used for molecular property prediction, but their deployment as monolithic architectures makes it difficult to identify how specific message-passing ",
      "authors": [
        "Panyu Jiao",
        "Shuizhou Chen",
        "Yiheng Shen",
        "Yuyang Wang"
      ],
      "published": "2026-05-28T16:34:53Z"
    },
    {
      "id": "arxiv_2605.30189v1",
      "title": "Token-Level Generalization in LoRA Adapter Backdoors: Attack Characterization and Behavioral Detection",
      "url": "http://arxiv.org/abs/2605.30189v1",
      "summary": "We show that LoRA adapters, the dominant distribution format for fine-tuned LLMs, can be reliably backdoored through training data poisoning while preserving baseline task performance. On a Qwen 2.5 1",
      "authors": [
        "Travis Lelle"
      ],
      "published": "2026-05-28T16:32:25Z"
    },
    {
      "id": "arxiv_2605.30188v1",
      "title": "CalArena: A Large-Scale Post-Hoc Calibration Benchmark",
      "url": "http://arxiv.org/abs/2605.30188v1",
      "summary": "Reliable probability estimates are critical in many machine learning applications, yet modern classifiers are often poorly calibrated. Post-hoc calibration provides a simple and widely used solution, ",
      "authors": [
        "Eugène Berta",
        "David Holzmüller",
        "Francis Bach",
        "Michael I. Jordan"
      ],
      "published": "2026-05-28T16:31:36Z"
    },
    {
      "id": "arxiv_2605.30187v1",
      "title": "Modularizing Educational LLM-Agency for Fostering Responsible Learning Assistance",
      "url": "http://arxiv.org/abs/2605.30187v1",
      "summary": "The widespread adoption of AI chatbots in education will drastically change learning, making responsible deployment a critical concern. While large language models (LLMs) might have access to sources ",
      "authors": [
        "Julius Gabelmann",
        "Felix Jahn",
        "Kevin Baum",
        "Sophie van Rossum"
      ],
      "published": "2026-05-28T16:31:32Z"
    },
    {
      "id": "arxiv_2605.30179v1",
      "title": "iLoRA: Bayesian Low-Rank Adaptation with Latent Interaction Graphs for Microbiome Diagnosis",
      "url": "http://arxiv.org/abs/2605.30179v1",
      "summary": "Parameter-efficient adaptation has made LLMs practical for domain prediction, but standard LoRA still relies on a static low-rank update and does not expose the latent interactions that often drive sc",
      "authors": [
        "Yang Song",
        "Yixuan Zhang",
        "Lingfa Meng",
        "Tongyuan Hu"
      ],
      "published": "2026-05-28T16:26:06Z"
    },
    {
      "id": "arxiv_2605.30169v1",
      "title": "Dissociative Identity: Language Model Agents Lack Grounding for Reputation Mechanisms",
      "url": "http://arxiv.org/abs/2605.30169v1",
      "summary": "As autonomous language model agents proliferate, forming an emerging agentic web with real-world consequences, what credibility signals can you use to decide whether to trust an unfamiliar agent in th",
      "authors": [
        "Botao Amber Hu",
        "Helena Rong",
        "Max Van Kleek"
      ],
      "published": "2026-05-28T16:20:19Z"
    },
    {
      "id": "arxiv_2605.30162v1",
      "title": "BioRefusalAudit: Auditing Biosecurity Refusal Depth Using General and Domain-Fine-Tuned Sparse Autoencoders",
      "url": "http://arxiv.org/abs/2605.30162v1",
      "summary": "Biosecurity evaluations of language models typically ask whether models produce hazardous output. This paper asks a complementary question: when a model refuses, is that refusal structurally sound, or",
      "authors": [
        "Caleb DeLeeuw"
      ],
      "published": "2026-05-28T16:18:07Z"
    },
    {
      "id": "arxiv_2605.30160v1",
      "title": "On Distributional Reinforcement Learning in Chaotic Dynamical Systems",
      "url": "http://arxiv.org/abs/2605.30160v1",
      "summary": "Chaotic dynamical systems pose a fundamental challenge for Reinforcement Learning (RL): exponential sensitivity to initial conditions induces high-variance bootstrap targets and poorly conditioned gra",
      "authors": [
        "James Rudd-Jones",
        "Mirco Musolesi",
        "María Pérez-Ortiz"
      ],
      "published": "2026-05-28T16:17:32Z"
    },
    {
      "id": "arxiv_2605.30159v1",
      "title": "Meta-Cognitive Memory Policy Optimization for Long-Horizon LLM Agents",
      "url": "http://arxiv.org/abs/2605.30159v1",
      "summary": "Memory-augmented LLM agents tackle complex long-horizon tasks by recursively summarizing interaction trajectories into compact memory. However, existing approaches typically train these memory policie",
      "authors": [
        "Ziyan Liu",
        "Zhezheng Hao",
        "Yeqiu Chen",
        "Hong Wang"
      ],
      "published": "2026-05-28T16:17:19Z"
    },
    {
      "id": "arxiv_2605.30155v1",
      "title": "Neural Network Verification using Partial Multi-Neuron Relaxation",
      "url": "http://arxiv.org/abs/2605.30155v1",
      "summary": "The increasing integration of deep neural networks in critical systems has spawned a theoretical and practical interest in formally guaranteeing safety properties about their behavior. To achieve this",
      "authors": [
        "Ido Shmuel",
        "Guy Katz"
      ],
      "published": "2026-05-28T16:15:13Z"
    },
    {
      "id": "arxiv_2605.30152v1",
      "title": "Do Proactive Agents Really Need an LLM to Decide When to Wake and What to Anchor?",
      "url": "http://arxiv.org/abs/2605.30152v1",
      "summary": "Proactive agents read user activity as text and call an LLM on every event to decide whether to act. But user activity is not natively text: it is a structured event stream of (actor, verb, object, ti",
      "authors": [
        "Xiaoze Liu",
        "Ruowang Zhang",
        "Amir H. Abdi",
        "Michel Galley"
      ],
      "published": "2026-05-28T16:10:32Z"
    }
  ]
}