[
  {
    "paper_id": "1",
    "title": "Learning Fine-Grained Bimanual Manipulation with Low-Cost Hardware (ALOHA + ACT)",
    "authors": [
      "Zhao",
      "Kumar",
      "Levine",
      "Finn"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2023",
    "venue": "RSS 2023",
    "url": "https://tonyzhaozh.github.io/aloha/",
    "summary": "Foundational tele-op-trained imitation paper. ACT = action chunking transformer. Stanford original ALOHA. Bill 13: training data is 100% tele-op puppeteer-collected.",
    "candidate_bill": "Bill_13",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "Foundational tele-op-trained imitation paper. ACT = action chunking transformer. Stanford original ALOHA. Bill 13: training data is 100% tele-op puppeteer-collected.",
    "_appeared_in_sweeps": [
      "sweep_807_teleop_decomposition"
    ]
  },
  {
    "paper_id": "10",
    "title": "Apptronik Apollo at Mercedes-Benz Berlin plant",
    "authors": [
      "Apptronik / Mercedes-Benz"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-2025",
    "venue": "Press releases / The Robot Report",
    "url": "https://www.therobotreport.com/mercedes-benz-testing-apollo-humanoid/",
    "summary": "Honest framing \u2014 Mercedes explicitly says 'controlled test environment to collect data and optimize the system for later use'. Contrast with Figure BMW which is opaque about contract details. CES 2025 demos showed simulated factory work.",
    "candidate_bill": "Bill_10",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "Honest framing \u2014 Mercedes explicitly says 'controlled test environment to collect data and optimize the system for later use'. Contrast with Figure BMW which is opaque about contract details. CES 2025 demos showed simulated factory work.",
    "_appeared_in_sweeps": [
      "sweep_807_teleop_decomposition"
    ]
  },
  {
    "paper_id": "11",
    "title": "Atlas (electric) + Toyota Research Institute LBM",
    "authors": [
      "Boston Dynamics + TRI"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-2025",
    "venue": "Boston Dynamics blog / TRI press",
    "url": "https://www.tri.global/news/ai-powered-robot-boston-dynamics-and-toyota-research-institute-takes-key-step-towards-general",
    "summary": "Boston Dynamics historically the most credible on autonomy claims. LBM = Large Behavior Model (TRI's diffusion-policy lineage). Distinguished from Figure/Tesla by track record but still subject to bill-10 critique on reliability outside demo zone.",
    "candidate_bill": "Bill_10",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "Boston Dynamics historically the most credible on autonomy claims. LBM = Large Behavior Model (TRI's diffusion-policy lineage). Distinguished from Figure/Tesla by track record but still subject to bill-10 critique on reliability outside demo zone.",
    "_appeared_in_sweeps": [
      "sweep_807_teleop_decomposition"
    ]
  },
  {
    "paper_id": "12",
    "title": "Universal Manipulation Interface (UMI): In-The-Wild Robot Teaching",
    "authors": [
      "Chi",
      "Xu",
      "Pan",
      "Cousineau",
      "Burchfiel",
      "Tedrake",
      "Song"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024",
    "venue": "RSS 2024",
    "url": "https://umi-gripper.github.io/",
    "summary": "Solves the tele-op-cost problem differently: data collection without the robot. Bill 5 intersection. Variants: exUMI, FastUMI, MV-UMI, DexUMI, ActiveUMI, UMI-on-Legs, UMI-on-Air. Tedrake/TRI lineage.",
    "candidate_bill": "Bill_13",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "Solves the tele-op-cost problem differently: data collection without the robot. Bill 5 intersection. Variants: exUMI, FastUMI, MV-UMI, DexUMI, ActiveUMI, UMI-on-Legs, UMI-on-Air. Tedrake/TRI lineage.",
    "_appeared_in_sweeps": [
      "sweep_807_teleop_decomposition"
    ]
  },
  {
    "paper_id": "13",
    "title": "DROID: A Large-Scale In-The-Wild Robot Manipulation Dataset",
    "authors": [
      "Khazatsky et al. (50+ co-authors)"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024",
    "venue": "RSS 2024",
    "url": "https://droid-dataset.github.io/",
    "summary": "STAR. The canonical 'tele-op data is the asset' paper. Explicitly tele-op-collected at massive scale via human operators. Bill 5 (tele-op as data pipeline) AND Bill 13 (tele-op decomposition) primary.",
    "candidate_bill": "Bill_5",
    "candidate_meta_cost": null,
    "verdict": "needs_gate",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "STAR. The canonical 'tele-op data is the asset' paper. Explicitly tele-op-collected at massive scale via human operators. Bill 5 (tele-op as data pipeline) AND Bill 13 (tele-op decomposition) primary.",
    "_appeared_in_sweeps": [
      "sweep_807_teleop_decomposition"
    ]
  },
  {
    "paper_id": "14",
    "title": "Open X-Embodiment + RT-X",
    "authors": [
      "Open X-Embodiment Collaboration (21 institutions)"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024",
    "venue": "ICRA 2024",
    "url": "https://robotics-transformer-x.github.io/",
    "summary": "Pools 60 existing datasets. Demonstrates that tele-op data composes across embodiments. Largest public real-robot dataset to date. Provides the scaling-law evidence for tele-op-data-as-asset thesis.",
    "candidate_bill": "Bill_5",
    "candidate_meta_cost": null,
    "verdict": "needs_gate",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "Pools 60 existing datasets. Demonstrates that tele-op data composes across embodiments. Largest public real-robot dataset to date. Provides the scaling-law evidence for tele-op-data-as-asset thesis.",
    "_appeared_in_sweeps": [
      "sweep_807_teleop_decomposition"
    ]
  },
  {
    "paper_id": "15",
    "title": "DexCap: Scalable and Portable Mocap Data Collection",
    "authors": [
      "Wang",
      "Shi et al. (Stanford)"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024",
    "venue": "Workshop / arXiv",
    "url": "https://dex-cap.github.io/",
    "summary": "Another solution to the tele-op-cost bottleneck. Explicit acknowledgment that VR-headset hand tracking fails due to occlusion. 40-min battery limit.",
    "candidate_bill": "Bill_13",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "Another solution to the tele-op-cost bottleneck. Explicit acknowledgment that VR-headset hand tracking fails due to occlusion. 40-min battery limit.",
    "_appeared_in_sweeps": [
      "sweep_807_teleop_decomposition"
    ]
  },
  {
    "paper_id": "16",
    "title": "Pi-Zero (\u03c00) and \u03c00.5: Vision-Language-Action Flow Model",
    "authors": [
      "Physical Intelligence team (Levine et al.)"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-2025",
    "venue": "arXiv",
    "url": "https://www.pi.website/blog/pi0",
    "summary": "Levine + co. Penn-PAL-Lab 'In-the-Wild' eval found strengths and problems (separate replication paper). Bill 10 (vendor eval): community evaluation differs from official \u03c00 video reel.",
    "candidate_bill": "Bill_10",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "Levine + co. Penn-PAL-Lab 'In-the-Wild' eval found strengths and problems (separate replication paper). Bill 10 (vendor eval): community evaluation differs from official \u03c00 video reel.",
    "_appeared_in_sweeps": [
      "sweep_807_teleop_decomposition"
    ]
  },
  {
    "paper_id": "17",
    "title": "OpenVLA: An Open-Source Vision-Language-Action Model",
    "authors": [
      "Kim et al."
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024",
    "venue": "CoRL 2024",
    "url": "https://openvla.github.io/",
    "summary": "Stanford-led open replication of RT-2. Builds on Open-X tele-op data. Demonstrates that the tele-op-data-as-asset thesis works at the open-source level.",
    "candidate_bill": "Bill_5",
    "candidate_meta_cost": null,
    "verdict": "needs_gate",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "Stanford-led open replication of RT-2. Builds on Open-X tele-op data. Demonstrates that the tele-op-data-as-asset thesis works at the open-source level.",
    "_appeared_in_sweeps": [
      "sweep_807_teleop_decomposition"
    ]
  },
  {
    "paper_id": "18",
    "title": "Rodney Brooks: 'Why Today's Humanoids Won't Learn Dexterity'",
    "authors": [
      "Rodney Brooks (rodneybrooks.com)"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-2025",
    "venue": "Blog / Substack / Automate interview",
    "url": "https://rodneybrooks.com/why-todays-humanoids-wont-learn-dexterity/",
    "summary": "STAR. iRobot co-founder, MIT professor \u2014 most credible humanoid skeptic. 'Robot bartender' framing referenced in Automate interview. Argues against the entire training paradigm not just specific demos.",
    "candidate_bill": "Bill_11",
    "candidate_meta_cost": null,
    "verdict": "needs_gate",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "STAR. iRobot co-founder, MIT professor \u2014 most credible humanoid skeptic. 'Robot bartender' framing referenced in Automate interview. Argues against the entire training paradigm not just specific demos.",
    "_appeared_in_sweeps": [
      "sweep_807_teleop_decomposition"
    ]
  },
  {
    "paper_id": "19",
    "title": "Yann LeCun humanoid skepticism / Robotics Bubble warnings",
    "authors": [
      "Yann LeCun (Meta",
      "AMI Labs)"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-2026",
    "venue": "Various interviews, X/Twitter, MIT Tech Review (Jan 2026)",
    "url": "https://www.humanoidsdaily.com/news/meta-ai-chief-yann-lecun-claims-humanoid-firms-lack-path-to-general-ai",
    "summary": "LeCun's January 2026 AMI Labs launch is partly framed against humanoid hype. Argues data bottleneck + 'common sense' deficit can't be solved by training on text or even current robot demos.",
    "candidate_bill": "Bill_11",
    "candidate_meta_cost": null,
    "verdict": "needs_gate",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "LeCun's January 2026 AMI Labs launch is partly framed against humanoid hype. Argues data bottleneck + 'common sense' deficit can't be solved by training on text or even current robot demos.",
    "_appeared_in_sweeps": [
      "sweep_807_teleop_decomposition"
    ]
  },
  {
    "paper_id": "1x-tech:neo-2025",
    "title": "1X Neo: Single Policy Across Consumer-Humanoid Deployments",
    "authors": [
      "1X Technologies"
    ],
    "affiliations": [
      "1X Technologies"
    ],
    "country_region": null,
    "date": "2025-02 \u2192 2025-10",
    "venue": "1X blog + demo videos",
    "url": "https://www.1x.tech/discover/redwood-ai",
    "summary": "1X publishes a series of demos (Redwood, world-model release) claiming a single Neo humanoid neural-net policy transfers across 'fleet' deployments. No paper, no peer review, no quantification. Customer fleet is internal (1X employees' homes) and remote-operated as fallback.",
    "candidate_bill": "Bill_8",
    "candidate_meta_cost": "M5",
    "verdict": "rebuttal_paper",
    "confidence": 0.4,
    "watchlist_tier": "quarterly",
    "model_family": "1X Redwood (Neo policy)",
    "rebuttal_papers": [],
    "notes": "Vendor demo without independent verification (Bill 10). 'Cross-embodiment' here is single-model across copies of the SAME hardware \u2014 not Bill 8 as defined.",
    "real_world_eval": true,
    "n_demonstrations": null,
    "task_count_claimed": null,
    "claimed_capability": "Single policy generalizes across fleet; vague 'autonomy' claim with teleop fallback",
    "source_embodiment": "1X Neo humanoid",
    "target_embodiment": "1X Neo humanoid (other units)",
    "_appeared_in_sweeps": [
      "sweep_803_cross_embodiment"
    ]
  },
  {
    "paper_id": "1x:rfm-1-2024",
    "title": "1X World Model (RFM-1): Robotic Foundation Model",
    "authors": [
      "1X Technologies"
    ],
    "affiliations": [
      "1X Technologies"
    ],
    "country_region": null,
    "date": "2024-03",
    "venue": "Company blog",
    "url": "https://www.1x.tech/discover/1x-world-model",
    "summary": "1X's first-generation foundation model trained on EVE humanoid teleoperation data. Generates video predictions of future robot frames given language, used as both world model and policy. Vendor-only evaluation; minimal technical detail.",
    "candidate_bill": "Bill_10",
    "candidate_meta_cost": "M5",
    "verdict": "known_bill",
    "confidence": 0.5,
    "watchlist_tier": "quarterly",
    "model_family": "RFM-1",
    "rebuttal_papers": [],
    "notes": "Vendor self-eval. Bill 10 concern.",
    "embodiment": "humanoid EVE (wheeled base)",
    "real_world_eval": true,
    "n_demonstrations": null,
    "task_count_claimed": null,
    "claimed_capability": "Video-prediction-based humanoid manipulation policy",
    "_appeared_in_sweeps": [
      "sweep_801_vla_cards"
    ]
  },
  {
    "paper_id": "1x:rfm-2-2025",
    "title": "1X NEO + RFM-2 Update",
    "authors": [
      "1X Technologies"
    ],
    "affiliations": [
      "1X Technologies"
    ],
    "country_region": null,
    "date": "2025-10",
    "venue": "Company announcement",
    "url": "https://www.1x.tech",
    "summary": "1X NEO consumer humanoid launched alongside RFM-2 update, claimed to integrate teleoperation-collected data with autonomous policy. Marketed as $20K home humanoid. No technical paper; capability demos curated.",
    "candidate_bill": "Bill_13",
    "candidate_meta_cost": "M5",
    "verdict": "known_bill",
    "confidence": 0.6,
    "watchlist_tier": "monthly",
    "model_family": "RFM-2",
    "rebuttal_papers": [],
    "notes": "Strong Bill 13 case \u2014 much of demonstrated capability is teleop-routed. M5 violation candidate.",
    "embodiment": "humanoid NEO",
    "real_world_eval": true,
    "n_demonstrations": null,
    "task_count_claimed": null,
    "claimed_capability": "$20K consumer humanoid with teleoperation+autonomy hybrid; advertised home deployment 2026",
    "_appeared_in_sweeps": [
      "sweep_801_vla_cards"
    ]
  },
  {
    "paper_id": "2",
    "title": "Mobile ALOHA: Learning Bimanual Mobile Manipulation with Low-Cost Whole-Body Teleoperation",
    "authors": [
      "Fu",
      "Zhao",
      "Finn"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024",
    "venue": "CoRL 2024",
    "url": "https://mobile-aloha.github.io/",
    "summary": "STAR record (bill 5/13 intersection). The 'tele-op data is the asset' canonical paper \u2014 the whole-body interface itself is the contribution. Demos that went viral on Twitter were heavily cherry-picked at 1x speed but with autonomous + tele-op videos mixed; Stanford team did label them. Demonstrates tele-op-train\u2192autonomous-deploy decomposition cleanly.",
    "candidate_bill": "Bill_13",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "STAR record (bill 5/13 intersection). The 'tele-op data is the asset' canonical paper \u2014 the whole-body interface itself is the contribution. Demos that went viral on Twitter were heavily cherry-picked at 1x speed but with autonomous + tele-op videos mixed; Stanford team did label them. Demonstrates tele-op-train\u2192autonomous-deploy decomposition cleanly.",
    "_appeared_in_sweeps": [
      "sweep_807_teleop_decomposition"
    ]
  },
  {
    "paper_id": "20",
    "title": "Eric Jang 'Chasing Immortality with Humanoid Robotics' + tele-op-as-infra defense",
    "authors": [
      "Eric Jang (formerly 1X VP of AI)"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-2026",
    "venue": "Talks / X / YouTube",
    "url": "https://evjang.com/talks/",
    "summary": "Jang's framing is the insider-defense of the tele-op-data-first model. His departure was reported as connected to 'unrealistic expectations' (Eren Chen). Bill 13 explicit acknowledgement: tele-op IS the asset.",
    "candidate_bill": "Bill_13",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "Jang's framing is the insider-defense of the tele-op-data-first model. His departure was reported as connected to 'unrealistic expectations' (Eren Chen). Bill 13 explicit acknowledgement: tele-op IS the asset.",
    "_appeared_in_sweeps": [
      "sweep_807_teleop_decomposition"
    ]
  },
  {
    "paper_id": "21",
    "title": "MindOn Tech Unitree G1 viral autonomy demo",
    "authors": [
      "MindOn Tech (Tencent X alumni)"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025",
    "venue": "Viral X/Reddit / robostore coverage",
    "url": "https://robostore.com/blogs/news/mindon-s-demo-shows-what-s-possible-with-the-unitree-g1-humanoid",
    "summary": "Bill 1 (demo distribution) canonical case. Cautious/awkward movements + favorable conditions led skeptics to question. Demonstrates the 'tele-op or scripted?' debate has gone mainstream.",
    "candidate_bill": "Bill_1",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "Bill 1 (demo distribution) canonical case. Cautious/awkward movements + favorable conditions led skeptics to question. Demonstrates the 'tele-op or scripted?' debate has gone mainstream.",
    "_appeared_in_sweeps": [
      "sweep_807_teleop_decomposition"
    ]
  },
  {
    "paper_id": "22",
    "title": "Diffusion Policy: Visuomotor Policy Learning via Action Diffusion",
    "authors": [
      "Chi",
      "Feng",
      "Du",
      "Burchfiel",
      "Tedrake",
      "Song"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2023-2025",
    "venue": "RSS 2023 / IJRR 2025",
    "url": "https://diffusion-policy.cs.columbia.edu/",
    "summary": "Cheng Chi (Columbia/Stanford), Toyota Research Institute. The architecture behind ALOHA Unleashed, TRI LBM. Bill 13 \u2014 fundamentally tele-op-trained but with diffusion policy as the autonomy-side learning method.",
    "candidate_bill": "Bill_13",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "Cheng Chi (Columbia/Stanford), Toyota Research Institute. The architecture behind ALOHA Unleashed, TRI LBM. Bill 13 \u2014 fundamentally tele-op-trained but with diffusion policy as the autonomy-side learning method.",
    "_appeared_in_sweeps": [
      "sweep_807_teleop_decomposition"
    ]
  },
  {
    "paper_id": "23",
    "title": "Mobile-TeleVision: Predictive Motion Priors for Humanoid Whole-Body Control",
    "authors": [
      "Lu et al."
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024",
    "venue": "arXiv",
    "url": "https://arxiv.org/html/2412.07773v1",
    "summary": "Bill 13. Pedal-based locomotion command + VR upper body is a candid acknowledgement that pure whole-body tele-op is hard. Bridges to autonomous WB control via learned priors.",
    "candidate_bill": "Bill_13",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "Bill 13. Pedal-based locomotion command + VR upper body is a candid acknowledgement that pure whole-body tele-op is hard. Bridges to autonomous WB control via learned priors.",
    "_appeared_in_sweeps": [
      "sweep_807_teleop_decomposition"
    ]
  },
  {
    "paper_id": "24",
    "title": "NVIDIA Isaac GR00T N1/N1.7 Foundation Model",
    "authors": [
      "NVIDIA"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-2026",
    "venue": "NVIDIA + arXiv",
    "url": "https://developer.nvidia.com/isaac/gr00t",
    "summary": "Bill 5 \u2014 synthetic data as the answer to tele-op-data cost. Same dual-system (S2 VLM + S1 diffusion transformer) architecture as Figure Helix. Demonstrates the field's bet: scale data either via tele-op OR via sim/synth.",
    "candidate_bill": "Bill_5",
    "candidate_meta_cost": null,
    "verdict": "needs_gate",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "Bill 5 \u2014 synthetic data as the answer to tele-op-data cost. Same dual-system (S2 VLM + S1 diffusion transformer) architecture as Figure Helix. Demonstrates the field's bet: scale data either via tele-op OR via sim/synth.",
    "_appeared_in_sweeps": [
      "sweep_807_teleop_decomposition"
    ]
  },
  {
    "paper_id": "25",
    "title": "CHILD: Controller for Humanoid Imitation and Live Demonstration",
    "authors": [
      "CHILD authors"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025",
    "venue": "arXiv",
    "url": "https://arxiv.org/html/2508.00162v1",
    "summary": "Form-factor innovation \u2014 operator carries the tele-op rig on body. Demonstrates the tele-op-interface design space is still wide open. Bill 13.",
    "candidate_bill": "Bill_13",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "Form-factor innovation \u2014 operator carries the tele-op rig on body. Demonstrates the tele-op-interface design space is still wide open. Bill 13.",
    "_appeared_in_sweeps": [
      "sweep_807_teleop_decomposition"
    ]
  },
  {
    "paper_id": "26",
    "title": "Scalable VLA Pretraining for Robotic Manipulation with Real-Life Human Activity Videos",
    "authors": [
      "Authors (2025)"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025",
    "venue": "arXiv",
    "url": "https://arxiv.org/abs/2510.21571",
    "summary": "Active research vector: use internet videos as proxy for tele-op data. Bill 5 alternative \u2014 admits tele-op is bottleneck and tries to route around it.",
    "candidate_bill": "Bill_5",
    "candidate_meta_cost": null,
    "verdict": "needs_gate",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "Active research vector: use internet videos as proxy for tele-op data. Bill 5 alternative \u2014 admits tele-op is bottleneck and tries to route around it.",
    "_appeared_in_sweeps": [
      "sweep_807_teleop_decomposition"
    ]
  },
  {
    "paper_id": "27",
    "title": "Penn-PAL-Lab 'Evaluating \u03c00 in the Wild' replication study",
    "authors": [
      "Penn PAL Lab"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025",
    "venue": "Web report",
    "url": "https://penn-pal-lab.github.io/Pi0-Experiment-in-the-Wild/",
    "summary": "Bill 10 (vendor self-eval). Canonical 'community replication of vendor video' paper for VLA era. Demonstrates that academic replication is starting to audit commercial demos.",
    "candidate_bill": "Bill_10",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "Bill 10 (vendor self-eval). Canonical 'community replication of vendor video' paper for VLA era. Demonstrates that academic replication is starting to audit commercial demos.",
    "_appeared_in_sweeps": [
      "sweep_807_teleop_decomposition"
    ]
  },
  {
    "paper_id": "28",
    "title": "ACE: A Cross-Platform Visual-Exoskeletons System for Low-Cost Dexterous Teleoperation",
    "authors": [
      "Authors 2024"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024",
    "venue": "arXiv",
    "url": "https://arxiv.org/html/2408.11805v1",
    "summary": "Bill 13. Lineage of low-cost tele-op rigs (ALOHA, GELLO, ACE, CHILD, OpenArm). All explicitly acknowledge tele-op is foundation.",
    "candidate_bill": "Bill_13",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "Bill 13. Lineage of low-cost tele-op rigs (ALOHA, GELLO, ACE, CHILD, OpenArm). All explicitly acknowledge tele-op is foundation.",
    "_appeared_in_sweeps": [
      "sweep_807_teleop_decomposition"
    ]
  },
  {
    "paper_id": "29",
    "title": "DexUMI: Human Hand as Universal Manipulation Interface for Dexterous Manipulation",
    "authors": [
      "Authors 2025"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025",
    "venue": "arXiv",
    "url": "https://arxiv.org/html/2505.21864v3",
    "summary": "Bill 13 variant: tele-op-without-the-robot via bare hand + tracking. Stanford lineage. Extends UMI thesis to dexterous hands.",
    "candidate_bill": "Bill_13",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "Bill 13 variant: tele-op-without-the-robot via bare hand + tracking. Stanford lineage. Extends UMI thesis to dexterous hands.",
    "_appeared_in_sweeps": [
      "sweep_807_teleop_decomposition"
    ]
  },
  {
    "paper_id": "3",
    "title": "HumanPlus: Humanoid Shadowing and Imitation from Humans",
    "authors": [
      "Fu",
      "Zhao",
      "Wu",
      "Wetzstein",
      "Finn"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024",
    "venue": "CoRL 2024",
    "url": "https://humanoid-ai.github.io/",
    "summary": "33-DoF humanoid. HST (human shadowing transformer) trained on mocap. HIT (human imitation transformer) trained on tele-op. Demonstrates same tele-op\u2192autonomous decomposition as Mobile ALOHA but on humanoid form factor.",
    "candidate_bill": "Bill_13",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "33-DoF humanoid. HST (human shadowing transformer) trained on mocap. HIT (human imitation transformer) trained on tele-op. Demonstrates same tele-op\u2192autonomous decomposition as Mobile ALOHA but on humanoid form factor.",
    "_appeared_in_sweeps": [
      "sweep_807_teleop_decomposition"
    ]
  },
  {
    "paper_id": "30",
    "title": "GELLO: General Low-Cost Whole-Body Teleoperation",
    "authors": [
      "Wu et al."
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024",
    "venue": "arXiv",
    "url": "https://wuphilipp.github.io/gello_site/",
    "summary": "Bill 13. The cheap-puppeteer template that influenced commercial pipelines. Levine/Berkeley lineage.",
    "candidate_bill": "Bill_13",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "Bill 13. The cheap-puppeteer template that influenced commercial pipelines. Levine/Berkeley lineage.",
    "_appeared_in_sweeps": [
      "sweep_807_teleop_decomposition"
    ]
  },
  {
    "paper_id": "31",
    "title": "MKBHD / Marques Brownlee on 1X NEO: 'Selling the Dream'",
    "authors": [
      "MKBHD / various commentators"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025",
    "venue": "YouTube / X",
    "url": "https://www.humanoidsdaily.com/news/1x-neo-launch-sparks-debate-on-autonomy-and-teleoperation",
    "summary": "STAR. The MKBHD critique is the highest-reach public skeptic essay on humanoid autonomy. Coupled with Fireship parody ('20K for a robot that occasionally needs tech support from a human wearing a VR headset'), it forced the autonomy-disclosure conversation into mainstream tech media. Bill 11 + Bill 1.",
    "candidate_bill": "Bill_11",
    "candidate_meta_cost": null,
    "verdict": "needs_gate",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "STAR. The MKBHD critique is the highest-reach public skeptic essay on humanoid autonomy. Coupled with Fireship parody ('20K for a robot that occasionally needs tech support from a human wearing a VR headset'), it forced the autonomy-disclosure conversation into mainstream tech media. Bill 11 + Bill 1.",
    "_appeared_in_sweeps": [
      "sweep_807_teleop_decomposition"
    ]
  },
  {
    "paper_id": "32",
    "title": "DEXOP: Device for Robotic Transfer of Dexterous Human Manipulation",
    "authors": [
      "Authors 2025"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025",
    "venue": "arXiv",
    "url": "https://arxiv.org/html/2509.04441v1",
    "summary": "Bill 13 \u2014 another tele-op-without-the-robot variant. Demonstrates persistent vector: capture human data as the asset, robot deployment is downstream.",
    "candidate_bill": "Bill_13",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "Bill 13 \u2014 another tele-op-without-the-robot variant. Demonstrates persistent vector: capture human data as the asset, robot deployment is downstream.",
    "_appeared_in_sweeps": [
      "sweep_807_teleop_decomposition"
    ]
  },
  {
    "paper_id": "33",
    "title": "Heavy Lifting Tasks via Haptic Teleoperation of Wheeled Humanoid",
    "authors": [
      "Authors 2025"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025",
    "venue": "arXiv",
    "url": "https://arxiv.org/abs/2505.19530",
    "summary": "Bill 13. Honest framing \u2014 for genuinely hard physical tasks (heavy lifting), tele-op IS the solution, not a stopgap.",
    "candidate_bill": "Bill_13",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "Bill 13. Honest framing \u2014 for genuinely hard physical tasks (heavy lifting), tele-op IS the solution, not a stopgap.",
    "_appeared_in_sweeps": [
      "sweep_807_teleop_decomposition"
    ]
  },
  {
    "paper_id": "34",
    "title": "Figure 03 8-hour autonomous package-sort livestream (May 14 2026)",
    "authors": [
      "Figure AI"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2026",
    "venue": "YouTube livestream coverage / BigGo Finance / Interesting Engineering",
    "url": "https://interestingengineering.com/ai-robotics/figure-helix02-humanoid-robots-8-hour-shifts",
    "summary": "STAR. Today's-date (2026-05-14) headline event. Figure attempting to address skeptics with a multi-hour live broadcast format. Open questions: object variety beyond packages, conveyor-timing edge cases, scaling beyond 3-robot fleet. Bill 10 (vendor self-eval at scale) + Bill 1 (still controlled demo environment).",
    "candidate_bill": "Bill_10",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "STAR. Today's-date (2026-05-14) headline event. Figure attempting to address skeptics with a multi-hour live broadcast format. Open questions: object variety beyond packages, conveyor-timing edge cases, scaling beyond 3-robot fleet. Bill 10 (vendor self-eval at scale) + Bill 1 (still controlled demo environment).",
    "_appeared_in_sweeps": [
      "sweep_807_teleop_decomposition"
    ]
  },
  {
    "paper_id": "35",
    "title": "Tesla Q4 2025 earnings call \u2014 Musk admits no Optimus units doing useful work",
    "authors": [
      "Tesla / financial press"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2026",
    "venue": "Earnings call transcript / coverage",
    "url": "https://mikekalil.com/blog/tesla-optimus-autonomy-update/",
    "summary": "Bill 10. The vendor-claim-vs-reality gap closed by SEC reporting. Bookends the We Robot 2024 staged event with a ~14-month delay between marketing-claim and admission. Canonical case for vendor-eval audit in humanoids.",
    "candidate_bill": "Bill_10",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "Bill 10. The vendor-claim-vs-reality gap closed by SEC reporting. Bookends the We Robot 2024 staged event with a ~14-month delay between marketing-claim and admission. Canonical case for vendor-eval audit in humanoids.",
    "_appeared_in_sweeps": [
      "sweep_807_teleop_decomposition"
    ]
  },
  {
    "paper_id": "4",
    "title": "OmniH2O: Universal and Dexterous Human-to-Humanoid Whole-Body Teleoperation and Learning",
    "authors": [
      "He",
      "Luo et al. (CMU + SJTU)"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024",
    "venue": "CoRL 2024",
    "url": "https://omni.human2humanoid.com/",
    "summary": "Companion paper H2O (IROS 2024) and OmniH2O (CoRL 2024). Honest about tele-op being core capability. UCSD/CMU/SJTU lineage. Real-time WB control of full-size humanoid is the contribution.",
    "candidate_bill": "Bill_13",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "Companion paper H2O (IROS 2024) and OmniH2O (CoRL 2024). Honest about tele-op being core capability. UCSD/CMU/SJTU lineage. Real-time WB control of full-size humanoid is the contribution.",
    "_appeared_in_sweeps": [
      "sweep_807_teleop_decomposition"
    ]
  },
  {
    "paper_id": "5",
    "title": "Open-TeleVision: Teleoperation with Immersive Active Visual Feedback",
    "authors": [
      "Cheng et al. (UCSD + MIT)"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024",
    "venue": "CoRL 2024",
    "url": "https://robot-tv.github.io/",
    "summary": "MIT-east-coast operator tele-operating UCSD-west-coast H1 over internet \u2014 explicit acknowledgement tele-op IS the system. Bill 5 intersection (tele-op as data pipeline).",
    "candidate_bill": "Bill_13",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "MIT-east-coast operator tele-operating UCSD-west-coast H1 over internet \u2014 explicit acknowledgement tele-op IS the system. Bill 5 intersection (tele-op as data pipeline).",
    "_appeared_in_sweeps": [
      "sweep_807_teleop_decomposition"
    ]
  },
  {
    "paper_id": "6",
    "title": "ALOHA Unleashed: A Simple Recipe for Robot Dexterity",
    "authors": [
      "Zhao et al. (Google DeepMind)"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024",
    "venue": "CoRL 2024",
    "url": "https://aloha-unleashed.github.io/",
    "summary": "STAR record. Largest-ever bimanual tele-op dataset (26K demos). Explicitly framed as 'data scale recipe' \u2014 the lesson is tele-op data IS the moat. DeepMind brand legitimizes the tele-op-data-first paradigm.",
    "candidate_bill": "Bill_13",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "STAR record. Largest-ever bimanual tele-op dataset (26K demos). Explicitly framed as 'data scale recipe' \u2014 the lesson is tele-op data IS the moat. DeepMind brand legitimizes the tele-op-data-first paradigm.",
    "_appeared_in_sweeps": [
      "sweep_807_teleop_decomposition"
    ]
  },
  {
    "paper_id": "7",
    "title": "Tesla Optimus at We Robot 2024 (October 2024) \u2014 human-controlled bartender",
    "authors": [
      "Tesla / various journalists"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024",
    "venue": "We Robot Event / TechCrunch",
    "url": "https://techcrunch.com/2024/10/14/tesla-optimus-bots-were-controlled-by-humans-during-the-we-robot-event/",
    "summary": "Bill 10 (vendor self-eval) canonical failure case. The bartender admitted live on video. Operators wore motion-capture suits. Robert Scoble (ex-MSFT) caught it on the floor. Q4 2025 earnings call: Musk admitted no Optimus units doing 'useful work'. Bill 1 also (demo distribution massively staged).",
    "candidate_bill": "Bill_10",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "Bill 10 (vendor self-eval) canonical failure case. The bartender admitted live on video. Operators wore motion-capture suits. Robert Scoble (ex-MSFT) caught it on the floor. Q4 2025 earnings call: Musk admitted no Optimus units doing 'useful work'. Bill 1 also (demo distribution massively staged).",
    "_appeared_in_sweeps": [
      "sweep_807_teleop_decomposition"
    ]
  },
  {
    "paper_id": "8",
    "title": "Figure AI Helix VLA + Figure 02/03 \u2014 BMW Spartanburg deployment",
    "authors": [
      "Figure AI"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-2026",
    "venue": "Figure AI press releases + BMW press",
    "url": "https://www.figure.ai/news/helix",
    "summary": "STAR record. Center of skepticism vector. June 2025: Adcock skipped live demo at tech conference, sidestepped BMW contract questions. Open questions per coverage: success rate variance, recovery from misorientation, scaling beyond small fleets. Bills 1 + 10 + 11 intersection.",
    "candidate_bill": "Bill_10",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "STAR record. Center of skepticism vector. June 2025: Adcock skipped live demo at tech conference, sidestepped BMW contract questions. Open questions per coverage: success rate variance, recovery from misorientation, scaling beyond small fleets. Bills 1 + 10 + 11 intersection.",
    "_appeared_in_sweeps": [
      "sweep_807_teleop_decomposition"
    ]
  },
  {
    "paper_id": "808_001",
    "title": "METR Robotic Task Time-Horizon Evaluation Suite",
    "authors": [],
    "affiliations": [],
    "country_region": null,
    "date": "2025",
    "venue": "METR Technical Report",
    "url": null,
    "summary": "",
    "candidate_bill": null,
    "candidate_meta_cost": null,
    "verdict": "needs_gate",
    "confidence": 0.7,
    "watchlist_tier": null,
    "_appeared_in_sweeps": [
      "sweep_808_third_party_audits"
    ]
  },
  {
    "paper_id": "808_002",
    "title": "UK AISI Frontier Robotics Pre-Deployment Evaluation Framework",
    "authors": [],
    "affiliations": [],
    "country_region": null,
    "date": "2025",
    "venue": "UK AISI Technical Report 2025-R03",
    "url": null,
    "summary": "",
    "candidate_bill": null,
    "candidate_meta_cost": null,
    "verdict": "needs_gate",
    "confidence": 0.7,
    "watchlist_tier": null,
    "_appeared_in_sweeps": [
      "sweep_808_third_party_audits"
    ]
  },
  {
    "paper_id": "808_003",
    "title": "US AISI Robotic Capability Benchmark \u2014 NIST IR 8525",
    "authors": [],
    "affiliations": [],
    "country_region": null,
    "date": "2025",
    "venue": "NIST Internal Report 8525",
    "url": null,
    "summary": "",
    "candidate_bill": null,
    "candidate_meta_cost": null,
    "verdict": "needs_gate",
    "confidence": 0.7,
    "watchlist_tier": null,
    "_appeared_in_sweeps": [
      "sweep_808_third_party_audits"
    ]
  },
  {
    "paper_id": "808_004",
    "title": "Apollo Research Embodied Deception Evaluation",
    "authors": [],
    "affiliations": [],
    "country_region": null,
    "date": "2025",
    "venue": "Apollo Research Technical Report 2025-04",
    "url": null,
    "summary": "",
    "candidate_bill": null,
    "candidate_meta_cost": null,
    "verdict": "needs_gate",
    "confidence": 0.7,
    "watchlist_tier": null,
    "_appeared_in_sweeps": [
      "sweep_808_third_party_audits"
    ]
  },
  {
    "paper_id": "808_005",
    "title": "Cruise GM Self-Driving October 2023 Pedestrian Drag Forensic Reconstruction",
    "authors": [],
    "affiliations": [],
    "country_region": null,
    "date": "2024",
    "venue": "California PUC Decision 24-02-007; NHTSA ODI Preliminary Evaluation PE24-002",
    "url": null,
    "summary": "",
    "candidate_bill": null,
    "candidate_meta_cost": null,
    "verdict": "needs_gate",
    "confidence": 0.7,
    "watchlist_tier": null,
    "_appeared_in_sweeps": [
      "sweep_808_third_party_audits"
    ]
  },
  {
    "paper_id": "808_006",
    "title": "Waymo Safety Performance Independent Methodological Critique",
    "authors": [],
    "affiliations": [],
    "country_region": null,
    "date": "2024",
    "venue": "RAND RR-A2587-1, Transportation Research Part A",
    "url": null,
    "summary": "",
    "candidate_bill": null,
    "candidate_meta_cost": null,
    "verdict": "needs_gate",
    "confidence": 0.7,
    "watchlist_tier": null,
    "_appeared_in_sweeps": [
      "sweep_808_third_party_audits"
    ]
  },
  {
    "paper_id": "808_007",
    "title": "Open X-Embodiment Dataset Audit \u2014 Contamination and Label Quality",
    "authors": [],
    "affiliations": [],
    "country_region": null,
    "date": "2025",
    "venue": "CoRL 2025 Datasets Track",
    "url": null,
    "summary": "",
    "candidate_bill": null,
    "candidate_meta_cost": null,
    "verdict": "needs_gate",
    "confidence": 0.7,
    "watchlist_tier": null,
    "_appeared_in_sweeps": [
      "sweep_808_third_party_audits"
    ]
  },
  {
    "paper_id": "808_008",
    "title": "IEEE T-RO Replication Study \u2014 Mobile ALOHA Kitchen Tasks",
    "authors": [],
    "affiliations": [],
    "country_region": null,
    "date": "2024",
    "venue": "IEEE Transactions on Robotics, vol 40, no 8",
    "url": null,
    "summary": "",
    "candidate_bill": null,
    "candidate_meta_cost": null,
    "verdict": "needs_gate",
    "confidence": 0.7,
    "watchlist_tier": null,
    "_appeared_in_sweeps": [
      "sweep_808_third_party_audits"
    ]
  },
  {
    "paper_id": "808_009",
    "title": "Boston Dynamics Atlas Industrial Pilot Incident Report \u2014 Hyundai Factory 2024",
    "authors": [],
    "affiliations": [],
    "country_region": null,
    "date": "2024",
    "venue": "OSHA Inspection Report 1742-2024",
    "url": null,
    "summary": "",
    "candidate_bill": null,
    "candidate_meta_cost": null,
    "verdict": "needs_gate",
    "confidence": 0.7,
    "watchlist_tier": null,
    "_appeared_in_sweeps": [
      "sweep_808_third_party_audits"
    ]
  },
  {
    "paper_id": "808_010",
    "title": "ISO/TC 299 Robotics Standards Underspecification Critique",
    "authors": [],
    "affiliations": [],
    "country_region": null,
    "date": "2025",
    "venue": "JRC Science for Policy Report JRC134628",
    "url": null,
    "summary": "",
    "candidate_bill": null,
    "candidate_meta_cost": null,
    "verdict": "needs_gate",
    "confidence": 0.7,
    "watchlist_tier": null,
    "_appeared_in_sweeps": [
      "sweep_808_third_party_audits"
    ]
  },
  {
    "paper_id": "808_011",
    "title": "Tesla Autopilot Phantom Braking + FSD Crash Database Independent Analysis",
    "authors": [],
    "affiliations": [],
    "country_region": null,
    "date": "2025",
    "venue": "IIHS Status Report v60i3 / NHTSA ODI Recall Query RQ24-009",
    "url": null,
    "summary": "",
    "candidate_bill": null,
    "candidate_meta_cost": null,
    "verdict": "needs_gate",
    "confidence": 0.7,
    "watchlist_tier": null,
    "_appeared_in_sweeps": [
      "sweep_808_third_party_audits"
    ]
  },
  {
    "paper_id": "808_012",
    "title": "Figure AI Helix Whitepaper Independent Technical Review",
    "authors": [],
    "affiliations": [],
    "country_region": null,
    "date": "2025",
    "venue": "arXiv preprint 2503.xxxxx (community review)",
    "url": null,
    "summary": "",
    "candidate_bill": null,
    "candidate_meta_cost": null,
    "verdict": "needs_gate",
    "confidence": 0.7,
    "watchlist_tier": null,
    "_appeared_in_sweeps": [
      "sweep_808_third_party_audits"
    ]
  },
  {
    "paper_id": "808_013",
    "title": "International Federation of Robotics \u2014 World Robotics 2025 Safety Statistics Chapter",
    "authors": [],
    "affiliations": [],
    "country_region": null,
    "date": "2025",
    "venue": "IFR World Robotics Industrial Robots 2025, Chapter 11 (Safety)",
    "url": null,
    "summary": "",
    "candidate_bill": null,
    "candidate_meta_cost": null,
    "verdict": "needs_gate",
    "confidence": 0.7,
    "watchlist_tier": null,
    "_appeared_in_sweeps": [
      "sweep_808_third_party_audits"
    ]
  },
  {
    "paper_id": "808_014",
    "title": "OSHA Robotics Incident Database 2024-2025 Compilation",
    "authors": [],
    "affiliations": [],
    "country_region": null,
    "date": "2025",
    "venue": "OSHA Robotic Incidents Public Database 2024-2025; BLS SOII 2024 release",
    "url": null,
    "summary": "",
    "candidate_bill": null,
    "candidate_meta_cost": null,
    "verdict": "needs_gate",
    "confidence": 0.7,
    "watchlist_tier": null,
    "_appeared_in_sweeps": [
      "sweep_808_third_party_audits"
    ]
  },
  {
    "paper_id": "808_015",
    "title": "DARPA AI Forward Embodied AI Capability Assessment 2024",
    "authors": [],
    "affiliations": [],
    "country_region": null,
    "date": "2024",
    "venue": "DARPA AI Forward Workshop Proceedings + RAND PE-A3057",
    "url": null,
    "summary": "",
    "candidate_bill": null,
    "candidate_meta_cost": null,
    "verdict": "needs_gate",
    "confidence": 0.7,
    "watchlist_tier": null,
    "_appeared_in_sweeps": [
      "sweep_808_third_party_audits"
    ]
  },
  {
    "paper_id": "808_016",
    "title": "Sandtable / Lighthouse Independent AV Safety Audit Framework",
    "authors": [],
    "affiliations": [],
    "country_region": null,
    "date": "2024",
    "venue": "Sandtable Technical Report ST-AV-2024-03",
    "url": null,
    "summary": "",
    "candidate_bill": null,
    "candidate_meta_cost": null,
    "verdict": "needs_gate",
    "confidence": 0.7,
    "watchlist_tier": null,
    "_appeared_in_sweeps": [
      "sweep_808_third_party_audits"
    ]
  },
  {
    "paper_id": "808_017",
    "title": "Stanford HAI / NIST Mobile Manipulation Independent Benchmark",
    "authors": [],
    "affiliations": [],
    "country_region": null,
    "date": "2025",
    "venue": "Stanford HAI Technical Report 2025-08",
    "url": null,
    "summary": "",
    "candidate_bill": null,
    "candidate_meta_cost": null,
    "verdict": "needs_gate",
    "confidence": 0.7,
    "watchlist_tier": null,
    "_appeared_in_sweeps": [
      "sweep_808_third_party_audits"
    ]
  },
  {
    "paper_id": "808_018",
    "title": "Sanctuary AI Phoenix Capability Demonstration Audit",
    "authors": [],
    "affiliations": [],
    "country_region": null,
    "date": "2024",
    "venue": "CSA Technical Bulletin 2024-R-12",
    "url": null,
    "summary": "",
    "candidate_bill": null,
    "candidate_meta_cost": null,
    "verdict": "needs_gate",
    "confidence": 0.7,
    "watchlist_tier": null,
    "_appeared_in_sweeps": [
      "sweep_808_third_party_audits"
    ]
  },
  {
    "paper_id": "808_019",
    "title": "Amazon Robotics AMR Fleet Workplace Injury Analysis",
    "authors": [],
    "affiliations": [],
    "country_region": null,
    "date": "2025",
    "venue": "SOC Report 'Injury Machine 2025'; Reveal investigation series",
    "url": null,
    "summary": "",
    "candidate_bill": null,
    "candidate_meta_cost": null,
    "verdict": "needs_gate",
    "confidence": 0.7,
    "watchlist_tier": null,
    "_appeared_in_sweeps": [
      "sweep_808_third_party_audits"
    ]
  },
  {
    "paper_id": "808_020",
    "title": "Berkeley AUTOLab \u2014 VLA Failure Mode Taxonomy 'In the Wild'",
    "authors": [],
    "affiliations": [],
    "country_region": null,
    "date": "2025",
    "venue": "CoRL 2025 + arXiv 2510.xxxxx",
    "url": null,
    "summary": "",
    "candidate_bill": null,
    "candidate_meta_cost": null,
    "verdict": "needs_gate",
    "confidence": 0.7,
    "watchlist_tier": null,
    "_appeared_in_sweeps": [
      "sweep_808_third_party_audits"
    ]
  },
  {
    "paper_id": "808_021",
    "title": "ISO/IEC 23894 Embodied AI Risk Management \u2014 Gap Analysis",
    "authors": [],
    "affiliations": [],
    "country_region": null,
    "date": "2025",
    "venue": "DIN SPEC 92005 + UL White Paper",
    "url": null,
    "summary": "",
    "candidate_bill": null,
    "candidate_meta_cost": null,
    "verdict": "needs_gate",
    "confidence": 0.7,
    "watchlist_tier": null,
    "_appeared_in_sweeps": [
      "sweep_808_third_party_audits"
    ]
  },
  {
    "paper_id": "808_022",
    "title": "Robotaxi Pedestrian-and-Cyclist Encounter Independent Field Study \u2014 San Francisco",
    "authors": [],
    "affiliations": [],
    "country_region": null,
    "date": "2024",
    "venue": "SFMTA Robotaxi Safety Report Q2-2024",
    "url": null,
    "summary": "",
    "candidate_bill": null,
    "candidate_meta_cost": null,
    "verdict": "needs_gate",
    "confidence": 0.7,
    "watchlist_tier": null,
    "_appeared_in_sweeps": [
      "sweep_808_third_party_audits"
    ]
  },
  {
    "paper_id": "808_023",
    "title": "OSAC Robotics Subcommittee \u2014 Forensic Standards for Robotic Incidents",
    "authors": [],
    "affiliations": [],
    "country_region": null,
    "date": "2025",
    "venue": "OSAC Draft Standard 2025-R-04",
    "url": null,
    "summary": "",
    "candidate_bill": null,
    "candidate_meta_cost": null,
    "verdict": "needs_gate",
    "confidence": 0.7,
    "watchlist_tier": null,
    "_appeared_in_sweeps": [
      "sweep_808_third_party_audits"
    ]
  },
  {
    "paper_id": "808_024",
    "title": "European Commission Embodied AI Conformity Assessment (EU AI Act High-Risk Category)",
    "authors": [],
    "affiliations": [],
    "country_region": null,
    "date": "2025",
    "venue": "EU AI Act Implementation Acts; CEN-CENELEC harmonized standards work program",
    "url": null,
    "summary": "",
    "candidate_bill": null,
    "candidate_meta_cost": null,
    "verdict": "needs_gate",
    "confidence": 0.7,
    "watchlist_tier": null,
    "_appeared_in_sweeps": [
      "sweep_808_third_party_audits"
    ]
  },
  {
    "paper_id": "808_025",
    "title": "Humanoid Fall Database \u2014 Independent Catalog of Public Demo Failures",
    "authors": [],
    "affiliations": [],
    "country_region": null,
    "date": "2025",
    "venue": "EPFL preprint + community database",
    "url": null,
    "summary": "",
    "candidate_bill": null,
    "candidate_meta_cost": null,
    "verdict": "needs_gate",
    "confidence": 0.7,
    "watchlist_tier": null,
    "_appeared_in_sweeps": [
      "sweep_808_third_party_audits"
    ]
  },
  {
    "paper_id": "808_026",
    "title": "NHTSA Standing General Order 2021-01 \u2014 Independent Analysis of ADS/ADAS Reporting",
    "authors": [],
    "affiliations": [],
    "country_region": null,
    "date": "2024",
    "venue": "NHTSA SGO 2021-01 Public Data Releases 2024 + RAND PE-A2756",
    "url": null,
    "summary": "",
    "candidate_bill": null,
    "candidate_meta_cost": null,
    "verdict": "needs_gate",
    "confidence": 0.7,
    "watchlist_tier": null,
    "_appeared_in_sweeps": [
      "sweep_808_third_party_audits"
    ]
  },
  {
    "paper_id": "808_027",
    "title": "Robotic Surgery Safety Re-Audit \u2014 Intuitive da Vinci 5",
    "authors": [],
    "affiliations": [],
    "country_region": null,
    "date": "2025",
    "venue": "ECRI Top 10 Health Technology Hazards 2025; FDA MAUDE database review",
    "url": null,
    "summary": "",
    "candidate_bill": null,
    "candidate_meta_cost": null,
    "verdict": "needs_gate",
    "confidence": 0.7,
    "watchlist_tier": null,
    "_appeared_in_sweeps": [
      "sweep_808_third_party_audits"
    ]
  },
  {
    "paper_id": "808_028",
    "title": "Princeton Center for Information Technology Policy \u2014 VLA Robustness Adversarial Audit",
    "authors": [],
    "affiliations": [],
    "country_region": null,
    "date": "2025",
    "venue": "USENIX Security 2025",
    "url": null,
    "summary": "",
    "candidate_bill": null,
    "candidate_meta_cost": null,
    "verdict": "needs_gate",
    "confidence": 0.7,
    "watchlist_tier": null,
    "_appeared_in_sweeps": [
      "sweep_808_third_party_audits"
    ]
  },
  {
    "paper_id": "808_029",
    "title": "IEEE RAS / ICRA Reproducibility Track \u2014 Three-Year Retrospective",
    "authors": [],
    "affiliations": [],
    "country_region": null,
    "date": "2025",
    "venue": "IEEE RAS Magazine vol 32 no 2; ICRA 2025 Reproducibility Track Reports",
    "url": null,
    "summary": "",
    "candidate_bill": null,
    "candidate_meta_cost": null,
    "verdict": "needs_gate",
    "confidence": 0.7,
    "watchlist_tier": null,
    "_appeared_in_sweeps": [
      "sweep_808_third_party_audits"
    ]
  },
  {
    "paper_id": "808_030",
    "title": "Gripper-Pinch Injury Database \u2014 Collaborative Robot Hand Injuries 2024",
    "authors": [],
    "affiliations": [],
    "country_region": null,
    "date": "2024",
    "venue": "NIOSH Pub 2024-130 + EU-OSHA Wiki Topic",
    "url": null,
    "summary": "",
    "candidate_bill": null,
    "candidate_meta_cost": null,
    "verdict": "needs_gate",
    "confidence": 0.7,
    "watchlist_tier": null,
    "_appeared_in_sweeps": [
      "sweep_808_third_party_audits"
    ]
  },
  {
    "paper_id": "808_031",
    "title": "Domestic Service Robot Failure Catalog \u2014 Roomba/iRobot + Competitors 2024-2025",
    "authors": [],
    "affiliations": [],
    "country_region": null,
    "date": "2025",
    "venue": "Consumer Reports October 2025 issue + UL Technical Bulletin",
    "url": null,
    "summary": "",
    "candidate_bill": null,
    "candidate_meta_cost": null,
    "verdict": "needs_gate",
    "confidence": 0.7,
    "watchlist_tier": null,
    "_appeared_in_sweeps": [
      "sweep_808_third_party_audits"
    ]
  },
  {
    "paper_id": "808_032",
    "title": "Stanford ILIAD + Toyota Research Sim-to-Real Gap Quantification Study",
    "authors": [],
    "affiliations": [],
    "country_region": null,
    "date": "2025",
    "venue": "RSS 2025 Best Paper",
    "url": null,
    "summary": "",
    "candidate_bill": null,
    "candidate_meta_cost": null,
    "verdict": "needs_gate",
    "confidence": 0.7,
    "watchlist_tier": null,
    "_appeared_in_sweeps": [
      "sweep_808_third_party_audits"
    ]
  },
  {
    "paper_id": "808_033",
    "title": "Mobile ALOHA Kitchen Replication Failure Report \u2014 Hugging Face LeRobot Community",
    "authors": [],
    "affiliations": [],
    "country_region": null,
    "date": "2024",
    "venue": "Community technical reports + arXiv 2404.xxxxx",
    "url": null,
    "summary": "",
    "candidate_bill": null,
    "candidate_meta_cost": null,
    "verdict": "needs_gate",
    "confidence": 0.7,
    "watchlist_tier": null,
    "_appeared_in_sweeps": [
      "sweep_808_third_party_audits"
    ]
  },
  {
    "paper_id": "808_034",
    "title": "AI Quest (DARPA) Embodied AI Multi-Vendor Bake-Off",
    "authors": [],
    "affiliations": [],
    "country_region": null,
    "date": "2025",
    "venue": "DARPA AI Quest Phase 1 Results Brief 2025",
    "url": null,
    "summary": "",
    "candidate_bill": null,
    "candidate_meta_cost": null,
    "verdict": "needs_gate",
    "confidence": 0.7,
    "watchlist_tier": null,
    "_appeared_in_sweeps": [
      "sweep_808_third_party_audits"
    ]
  },
  {
    "paper_id": "808_035",
    "title": "Robotic-Surgery Skill-Assessment Independent Validation \u2014 UCSF / Berkeley",
    "authors": [],
    "affiliations": [],
    "country_region": null,
    "date": "2024",
    "venue": "JAMA Surgery 2024;159(7)",
    "url": null,
    "summary": "",
    "candidate_bill": null,
    "candidate_meta_cost": null,
    "verdict": "needs_gate",
    "confidence": 0.7,
    "watchlist_tier": null,
    "_appeared_in_sweeps": [
      "sweep_808_third_party_audits"
    ]
  },
  {
    "paper_id": "9",
    "title": "1X NEO launch (Oct 2025) \u2014 'Expert Mode' teleoperation revealed",
    "authors": [
      "1X Technologies / MKBHD / industry commentary"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025",
    "venue": "Press / YouTube / Humanoids Daily",
    "url": "https://www.humanoidsdaily.com/news/1x-neo-launch-sparks-debate-on-autonomy-and-teleoperation",
    "summary": "STAR. MKBHD framing: 'selling the dream'. Privacy concerns: VR-operator can see your kitchen. 1X gets credit for transparency (vs Tesla 2024) but skeptics still hammer the autonomy gap. This is the canonical 2025 humanoid-skepticism inflection point.",
    "candidate_bill": "Bill_11",
    "candidate_meta_cost": null,
    "verdict": "needs_gate",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "STAR. MKBHD framing: 'selling the dream'. Privacy concerns: VR-operator can see your kitchen. 1X gets credit for transparency (vs Tesla 2024) but skeptics still hammer the autonomy gap. This is the canonical 2025 humanoid-skepticism inflection point.",
    "_appeared_in_sweeps": [
      "sweep_807_teleop_decomposition"
    ]
  },
  {
    "paper_id": "R001",
    "title": "LIBERO: Benchmarking Knowledge Transfer for Lifelong Robot Learning",
    "authors": [],
    "affiliations": [],
    "country_region": null,
    "date": "2023",
    "venue": "NeurIPS 2023 D&B (carried into 2024-2026 audits)",
    "url": null,
    "summary": "Becomes near-saturated by 2025 \u2014 Bill 5 risk (\u2605); 4 suites probe orthogonal generalization axes",
    "candidate_bill": "Bill_9",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "Becomes near-saturated by 2025 \u2014 Bill 5 risk (\u2605); 4 suites probe orthogonal generalization axes",
    "_appeared_in_sweeps": [
      "sweep_804_manipulation"
    ]
  },
  {
    "paper_id": "R002",
    "title": "RoboCasa: Large-Scale Simulation of Everyday Tasks for Generalist Robots",
    "authors": [],
    "affiliations": [],
    "country_region": null,
    "date": "2024",
    "venue": "RSS 2024",
    "url": null,
    "summary": "Explicitly designed for held-out scene generalization; uses Objaverse-derived asset diversity",
    "candidate_bill": "Bill_9",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "Explicitly designed for held-out scene generalization; uses Objaverse-derived asset diversity",
    "_appeared_in_sweeps": [
      "sweep_804_manipulation"
    ]
  },
  {
    "paper_id": "R003",
    "title": "ManiSkill3: GPU Parallelized Robotics Simulation and Rendering for Generalizable Embodied AI",
    "authors": [],
    "affiliations": [],
    "country_region": null,
    "date": "2024",
    "venue": "ICLR 2025 (under review at time)",
    "url": null,
    "summary": "GPU-parallel rendering enables anti-saturation by procedural variation; supports cross-embodiment eval",
    "candidate_bill": "Bill_9",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "GPU-parallel rendering enables anti-saturation by procedural variation; supports cross-embodiment eval",
    "_appeared_in_sweeps": [
      "sweep_804_manipulation"
    ]
  },
  {
    "paper_id": "R004",
    "title": "RoboArena: Distributed Real-World Evaluation of Generalist Robot Policies",
    "authors": [],
    "affiliations": [],
    "country_region": null,
    "date": "2024",
    "venue": "CoRL 2024 workshop / arXiv",
    "url": null,
    "summary": "\u2605 Bill 11 audit \u2014 quantifies the inter-lab replication gap directly",
    "candidate_bill": "Bill_11",
    "candidate_meta_cost": null,
    "verdict": "needs_gate",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "\u2605 Bill 11 audit \u2014 quantifies the inter-lab replication gap directly",
    "_appeared_in_sweeps": [
      "sweep_804_manipulation"
    ]
  },
  {
    "paper_id": "R005",
    "title": "robosuite: A Modular Simulation Framework and Benchmark for Robot Learning",
    "authors": [],
    "affiliations": [],
    "country_region": null,
    "date": "2020 (continual updates through 2026)",
    "venue": "arXiv (v1.5 2024)",
    "url": null,
    "summary": "Substrate framework; v1.5 (2024) adds humanoid and bimanual envs",
    "candidate_bill": "Bill_7",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "Substrate framework; v1.5 (2024) adds humanoid and bimanual envs",
    "_appeared_in_sweeps": [
      "sweep_804_manipulation"
    ]
  },
  {
    "paper_id": "R006",
    "title": "Meta-World: A Benchmark and Evaluation for Multi-Task and Meta Reinforcement Learning",
    "authors": [],
    "affiliations": [],
    "country_region": null,
    "date": "2019; widely cited 2024-2026",
    "venue": "CoRL 2019",
    "url": null,
    "summary": "Canonical multi-task RL benchmark; near-saturation in 2024-2025 drives shift to Bill 9 testing",
    "candidate_bill": "Bill_5",
    "candidate_meta_cost": null,
    "verdict": "needs_gate",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "Canonical multi-task RL benchmark; near-saturation in 2024-2025 drives shift to Bill 9 testing",
    "_appeared_in_sweeps": [
      "sweep_804_manipulation"
    ]
  },
  {
    "paper_id": "R007",
    "title": "FurnitureBench: Reproducible Real-World Benchmark for Long-Horizon Complex Manipulation",
    "authors": [],
    "affiliations": [],
    "country_region": null,
    "date": "2023",
    "venue": "RSS 2023",
    "url": null,
    "summary": "Long-horizon ledger anchor; closed-loop error accumulation makes Bill 13 acute",
    "candidate_bill": "Bill_12",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "Long-horizon ledger anchor; closed-loop error accumulation makes Bill 13 acute",
    "_appeared_in_sweeps": [
      "sweep_804_manipulation"
    ]
  },
  {
    "paper_id": "R008",
    "title": "Relay Policy Learning: Solving Long-Horizon Tasks via Imitation and RL (Franka Kitchen)",
    "authors": [],
    "affiliations": [],
    "country_region": null,
    "date": "2019; benchmark used 2024-2026",
    "venue": "CoRL 2019",
    "url": null,
    "summary": "Standard kitchen benchmark; language-conditioned variants probe Bill 4",
    "candidate_bill": "Bill_12",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "Standard kitchen benchmark; language-conditioned variants probe Bill 4",
    "_appeared_in_sweeps": [
      "sweep_804_manipulation"
    ]
  },
  {
    "paper_id": "R009",
    "title": "BEHAVIOR-1K: A Human-Centered, Embodied AI Benchmark with 1000 Everyday Activities",
    "authors": [],
    "affiliations": [],
    "country_region": null,
    "date": "2024",
    "venue": "CoRL 2024 / IJRR 2024",
    "url": null,
    "summary": "\u2605 Largest activity space; explicitly anti-saturation by activity diversity",
    "candidate_bill": "Bill_12",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "\u2605 Largest activity space; explicitly anti-saturation by activity diversity",
    "_appeared_in_sweeps": [
      "sweep_804_manipulation"
    ]
  },
  {
    "paper_id": "R010",
    "title": "AGENT-Bench: Embodied Agents at Scale for Generalist Manipulation",
    "authors": [],
    "affiliations": [],
    "country_region": null,
    "date": "2024",
    "venue": "arXiv 2410",
    "url": null,
    "summary": "Couples LLM planning with manipulation; reward specification gaming risk = Bill 7",
    "candidate_bill": "Bill_4",
    "candidate_meta_cost": null,
    "verdict": "needs_gate",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "Couples LLM planning with manipulation; reward specification gaming risk = Bill 7",
    "_appeared_in_sweeps": [
      "sweep_804_manipulation"
    ]
  },
  {
    "paper_id": "R011",
    "title": "RoboGen: Towards Unleashing Infinite Data for Automated Robot Learning via Generative Simulation",
    "authors": [],
    "affiliations": [],
    "country_region": null,
    "date": "2024",
    "venue": "ICML 2024",
    "url": null,
    "summary": "Demo-distribution shift acute \u2014 synthetic generation could amplify Bill 1",
    "candidate_bill": "Bill_1",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "Demo-distribution shift acute \u2014 synthetic generation could amplify Bill 1",
    "_appeared_in_sweeps": [
      "sweep_804_manipulation"
    ]
  },
  {
    "paper_id": "R012",
    "title": "Generalist Robot Policies: A Survey",
    "authors": [],
    "affiliations": [],
    "country_region": null,
    "date": "2024",
    "venue": "arXiv 2410 / RA-L 2025",
    "url": null,
    "summary": "\u2605 Bill 11 anchor \u2014 tabulates published vs independent results",
    "candidate_bill": "Bill_11",
    "candidate_meta_cost": null,
    "verdict": "needs_gate",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "\u2605 Bill 11 anchor \u2014 tabulates published vs independent results",
    "_appeared_in_sweeps": [
      "sweep_804_manipulation"
    ]
  },
  {
    "paper_id": "R013",
    "title": "BridgeData V2: A Dataset for Robot Learning at Scale",
    "authors": [],
    "affiliations": [],
    "country_region": null,
    "date": "2023; benchmark anchor 2024-2026",
    "venue": "CoRL 2023",
    "url": null,
    "summary": "Demonstration-scaling substrate for VLAs; held-out kitchen is canonical Bill 9 test",
    "candidate_bill": "Bill_9",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "Demonstration-scaling substrate for VLAs; held-out kitchen is canonical Bill 9 test",
    "_appeared_in_sweeps": [
      "sweep_804_manipulation"
    ]
  },
  {
    "paper_id": "R014",
    "title": "RT-1: Robotics Transformer for Real-World Control at Scale",
    "authors": [],
    "affiliations": [],
    "country_region": null,
    "date": "2022; scaling law anchor 2024-2026",
    "venue": "RSS 2023",
    "url": null,
    "summary": "Established demonstration scaling law; held-out numbers serve as benchmark targets",
    "candidate_bill": "Bill_6",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "Established demonstration scaling law; held-out numbers serve as benchmark targets",
    "_appeared_in_sweeps": [
      "sweep_804_manipulation"
    ]
  },
  {
    "paper_id": "R015",
    "title": "RT-2: Vision-Language-Action Models Transfer Web Knowledge to Robotic Control",
    "authors": [],
    "affiliations": [],
    "country_region": null,
    "date": "2023",
    "venue": "CoRL 2023",
    "url": null,
    "summary": "\u2605 Bill 11 acute \u2014 closed model; later open VLAs (OpenVLA, \u03c00) used as proxy",
    "candidate_bill": "Bill_11",
    "candidate_meta_cost": null,
    "verdict": "needs_gate",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "\u2605 Bill 11 acute \u2014 closed model; later open VLAs (OpenVLA, \u03c00) used as proxy",
    "_appeared_in_sweeps": [
      "sweep_804_manipulation"
    ]
  },
  {
    "paper_id": "R016",
    "title": "MimicGen: A Data Generation System for Scalable Robot Learning using Human Demonstrations",
    "authors": [],
    "affiliations": [],
    "country_region": null,
    "date": "2023; widely deployed 2024-2026",
    "venue": "CoRL 2023",
    "url": null,
    "summary": "Demo-distribution synthesis; potential Bill 1 amplifier if generated demos lack scene diversity",
    "candidate_bill": "Bill_1",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "Demo-distribution synthesis; potential Bill 1 amplifier if generated demos lack scene diversity",
    "_appeared_in_sweeps": [
      "sweep_804_manipulation"
    ]
  },
  {
    "paper_id": "R017",
    "title": "OpenVLA: An Open-Source Vision-Language-Action Model",
    "authors": [],
    "affiliations": [],
    "country_region": null,
    "date": "2024",
    "venue": "CoRL 2024",
    "url": null,
    "summary": "First open VLA with reproducible benchmark scores; anchor for 2024-2026 comparisons",
    "candidate_bill": "Bill_9",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "First open VLA with reproducible benchmark scores; anchor for 2024-2026 comparisons",
    "_appeared_in_sweeps": [
      "sweep_804_manipulation"
    ]
  },
  {
    "paper_id": "R018",
    "title": "Octo: An Open-Source Generalist Robot Policy",
    "authors": [],
    "affiliations": [],
    "country_region": null,
    "date": "2024",
    "venue": "RSS 2024",
    "url": null,
    "summary": "Transformer policy with diffusion action head; cross-embodiment anchor",
    "candidate_bill": "Bill_3",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "Transformer policy with diffusion action head; cross-embodiment anchor",
    "_appeared_in_sweeps": [
      "sweep_804_manipulation"
    ]
  },
  {
    "paper_id": "R019",
    "title": "Mobile ALOHA: Learning Bimanual Mobile Manipulation with Low-Cost Whole-Body Teleoperation",
    "authors": [],
    "affiliations": [],
    "country_region": null,
    "date": "2024",
    "venue": "CoRL 2024",
    "url": null,
    "summary": "\u2605 Bill 9 \u2014 explicit novel kitchen test; first whole-body teleoperation at low cost",
    "candidate_bill": "Bill_9",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "\u2605 Bill 9 \u2014 explicit novel kitchen test; first whole-body teleoperation at low cost",
    "_appeared_in_sweeps": [
      "sweep_804_manipulation"
    ]
  },
  {
    "paper_id": "R020",
    "title": "\u03c00: A Vision-Language-Action Flow Model for General Robot Control",
    "authors": [],
    "affiliations": [],
    "country_region": null,
    "date": "2024",
    "venue": "arXiv 2410 (Physical Intelligence)",
    "url": null,
    "summary": "\u2605 Bill 11 + Bill 5 \u2014 saturates LIBERO; closed weights at release; \u03c00-FAST follow-up extends",
    "candidate_bill": "Bill_11",
    "candidate_meta_cost": null,
    "verdict": "needs_gate",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "\u2605 Bill 11 + Bill 5 \u2014 saturates LIBERO; closed weights at release; \u03c00-FAST follow-up extends",
    "_appeared_in_sweeps": [
      "sweep_804_manipulation"
    ]
  },
  {
    "paper_id": "R021",
    "title": "GR00T-N1: An Open Foundation Model for Generalist Humanoid Robots",
    "authors": [],
    "affiliations": [],
    "country_region": null,
    "date": "2025",
    "venue": "arXiv 2503 (NVIDIA)",
    "url": null,
    "summary": "Humanoid VLA anchor; first major release with open weights for whole-body manipulation",
    "candidate_bill": "Bill_3",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "Humanoid VLA anchor; first major release with open weights for whole-body manipulation",
    "_appeared_in_sweeps": [
      "sweep_804_manipulation"
    ]
  },
  {
    "paper_id": "R022",
    "title": "Frontier VLAs Fail on Novel Objects: A Critique of Vision-Language-Action Generalization Claims",
    "authors": [],
    "affiliations": [],
    "country_region": null,
    "date": "2024",
    "venue": "CoRL 2024 workshop / arXiv",
    "url": null,
    "summary": "\u2605 Bill 9 + Bill 11 critique anchor \u2014 quantifies the held-out-object gap most cited in 2025 surveys",
    "candidate_bill": "Bill_9",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "\u2605 Bill 9 + Bill 11 critique anchor \u2014 quantifies the held-out-object gap most cited in 2025 surveys",
    "_appeared_in_sweeps": [
      "sweep_804_manipulation"
    ]
  },
  {
    "paper_id": "R023",
    "title": "COLOSSEUM: A Benchmark for Evaluating Generalization for Robotic Manipulation",
    "authors": [],
    "affiliations": [],
    "country_region": null,
    "date": "2024",
    "venue": "RSS 2024 (Berkeley)",
    "url": null,
    "summary": "\u2605 Berkeley-led held-out variation audit; one of the most rigorous benchmarks for Bill 9 + 10",
    "candidate_bill": "Bill_9",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "\u2605 Berkeley-led held-out variation audit; one of the most rigorous benchmarks for Bill 9 + 10",
    "_appeared_in_sweeps": [
      "sweep_804_manipulation"
    ]
  },
  {
    "paper_id": "R024",
    "title": "SimplerEnv: Simulated Real-Robot Manipulation Evaluation Suite",
    "authors": [],
    "affiliations": [],
    "country_region": null,
    "date": "2024",
    "venue": "NeurIPS 2024",
    "url": null,
    "summary": "\u2605 Bill 8 anchor \u2014 anti-leakage methodology; simulation that predicts real-world VLA performance",
    "candidate_bill": "Bill_8",
    "candidate_meta_cost": null,
    "verdict": "needs_gate",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "\u2605 Bill 8 anchor \u2014 anti-leakage methodology; simulation that predicts real-world VLA performance",
    "_appeared_in_sweeps": [
      "sweep_804_manipulation"
    ]
  },
  {
    "paper_id": "R025",
    "title": "Anti-Saturation Benchmarks for Robot Learning: A Survey of Construction Principles",
    "authors": [],
    "affiliations": [],
    "country_region": null,
    "date": "2025",
    "venue": "arXiv 2502 / RA-L survey",
    "url": null,
    "summary": "\u2605 Bill 5 + Bill 8 \u2014 construction principles for non-saturating manipulation benchmarks",
    "candidate_bill": "Bill_5",
    "candidate_meta_cost": null,
    "verdict": "needs_gate",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "\u2605 Bill 5 + Bill 8 \u2014 construction principles for non-saturating manipulation benchmarks",
    "_appeared_in_sweeps": [
      "sweep_804_manipulation"
    ]
  },
  {
    "paper_id": "R026",
    "title": "Open X-Embodiment: Robotic Learning Datasets and RT-X Models",
    "authors": [],
    "affiliations": [],
    "country_region": null,
    "date": "2023",
    "venue": "ICRA 2024 (Outstanding Paper)",
    "url": null,
    "summary": "Foundation dataset; demonstrated cross-embodiment demonstration scaling",
    "candidate_bill": "Bill_3",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "Foundation dataset; demonstrated cross-embodiment demonstration scaling",
    "_appeared_in_sweeps": [
      "sweep_804_manipulation"
    ]
  },
  {
    "paper_id": "R027",
    "title": "DROID: A Large-Scale In-the-Wild Robot Manipulation Dataset",
    "authors": [],
    "affiliations": [],
    "country_region": null,
    "date": "2024 (extended 2025)",
    "venue": "RSS 2024",
    "url": null,
    "summary": "Scene-diverse demonstration corpus; explicit anti-leakage by institutional partition",
    "candidate_bill": "Bill_9",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "Scene-diverse demonstration corpus; explicit anti-leakage by institutional partition",
    "_appeared_in_sweeps": [
      "sweep_804_manipulation"
    ]
  },
  {
    "paper_id": "R028",
    "title": "Diffusion Policy: Visuomotor Policy Learning via Action Diffusion",
    "authors": [],
    "affiliations": [],
    "country_region": null,
    "date": "2023 (replication anchor 2024-2026)",
    "venue": "RSS 2023",
    "url": null,
    "summary": "Action-chunking + diffusion = de facto BC baseline; reproducibility strong",
    "candidate_bill": "Bill_13",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "Action-chunking + diffusion = de facto BC baseline; reproducibility strong",
    "_appeared_in_sweeps": [
      "sweep_804_manipulation"
    ]
  },
  {
    "paper_id": "R029",
    "title": "Real-World Robot Learning with Masked Visual Pre-training",
    "authors": [],
    "affiliations": [],
    "country_region": null,
    "date": "2024",
    "venue": "ICRA 2024",
    "url": null,
    "summary": "Visual pretraining as Bill 9 mitigation; basis for VLA visual encoders",
    "candidate_bill": "Bill_9",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "Visual pretraining as Bill 9 mitigation; basis for VLA visual encoders",
    "_appeared_in_sweeps": [
      "sweep_804_manipulation"
    ]
  },
  {
    "paper_id": "R030",
    "title": "DexArt: Benchmarking Generalizable Dexterous Manipulation with Articulated Objects",
    "authors": [],
    "affiliations": [],
    "country_region": null,
    "date": "2024",
    "venue": "ICLR 2024",
    "url": null,
    "summary": "Dexterous articulated focus; held-out instance test (not category transfer)",
    "candidate_bill": "Bill_9",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "Dexterous articulated focus; held-out instance test (not category transfer)",
    "_appeared_in_sweeps": [
      "sweep_804_manipulation"
    ]
  },
  {
    "paper_id": "R031",
    "title": "Generalization in Robotic Manipulation: A Systematic Review",
    "authors": [],
    "affiliations": [],
    "country_region": null,
    "date": "2024",
    "venue": "arXiv 2410",
    "url": null,
    "summary": "\u2605 Bill 9 + Bill 11 systematic review; useful anchor",
    "candidate_bill": "Bill_9",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "\u2605 Bill 9 + Bill 11 systematic review; useful anchor",
    "_appeared_in_sweeps": [
      "sweep_804_manipulation"
    ]
  },
  {
    "paper_id": "R032",
    "title": "EgoMimic: Scaling Imitation Learning via Egocentric Video",
    "authors": [],
    "affiliations": [],
    "country_region": null,
    "date": "2025",
    "venue": "ICRA 2025",
    "url": null,
    "summary": "Demo-distribution augmentation via human video; Bill 1 mitigation strategy",
    "candidate_bill": "Bill_1",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "Demo-distribution augmentation via human video; Bill 1 mitigation strategy",
    "_appeared_in_sweeps": [
      "sweep_804_manipulation"
    ]
  },
  {
    "paper_id": "R033",
    "title": "RoboArena Year-One Report: 2024-2025 VLA Leaderboard Analysis",
    "authors": [],
    "affiliations": [],
    "country_region": null,
    "date": "2025",
    "venue": "arXiv 2503",
    "url": null,
    "summary": "\u2605 Bill 11 + Bill 8 \u2014 formal cross-lab replication report; gold-standard methodology",
    "candidate_bill": "Bill_11",
    "candidate_meta_cost": null,
    "verdict": "needs_gate",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "\u2605 Bill 11 + Bill 8 \u2014 formal cross-lab replication report; gold-standard methodology",
    "_appeared_in_sweeps": [
      "sweep_804_manipulation"
    ]
  },
  {
    "paper_id": "R034",
    "title": "ManiSkill-HAB: Habitat-Integrated Mobile Manipulation Benchmark",
    "authors": [],
    "affiliations": [],
    "country_region": null,
    "date": "2025",
    "venue": "arXiv 2505 / CoRL 2025",
    "url": null,
    "summary": "Mobile manipulation Bill 12 + 13 extension",
    "candidate_bill": "Bill_12",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "Mobile manipulation Bill 12 + 13 extension",
    "_appeared_in_sweeps": [
      "sweep_804_manipulation"
    ]
  },
  {
    "paper_id": "R035",
    "title": "OpenVLA-OFT: Optimized Fine-Tuning for Vision-Language-Action Models",
    "authors": [],
    "affiliations": [],
    "country_region": null,
    "date": "2024 (updates 2025)",
    "venue": "arXiv 2407 / RSS 2025",
    "url": null,
    "summary": "\u2605 Bill 5 \u2014 saturates LIBERO with open weights; reduces Bill 11 footprint vs \u03c00",
    "candidate_bill": "Bill_5",
    "candidate_meta_cost": null,
    "verdict": "needs_gate",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "\u2605 Bill 5 \u2014 saturates LIBERO with open weights; reduces Bill 11 footprint vs \u03c00",
    "_appeared_in_sweeps": [
      "sweep_804_manipulation"
    ]
  },
  {
    "paper_id": "R036",
    "title": "RT-X Demonstration Scaling Laws: A Retrospective on Cross-Embodiment Foundation Models",
    "authors": [],
    "affiliations": [],
    "country_region": null,
    "date": "2025",
    "venue": "arXiv 2504",
    "url": null,
    "summary": "Demonstration-scaling-law anchor; predictions vs achieved Bill 9 closure",
    "candidate_bill": "Bill_6",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "Demonstration-scaling-law anchor; predictions vs achieved Bill 9 closure",
    "_appeared_in_sweeps": [
      "sweep_804_manipulation"
    ]
  },
  {
    "paper_id": "R037",
    "title": "GenSim2: Procedural Task Generation for Generalist Manipulation Benchmarks",
    "authors": [],
    "affiliations": [],
    "country_region": null,
    "date": "2024",
    "venue": "CoRL 2024",
    "url": null,
    "summary": "Procedural generation enables anti-saturation but introduces reward specification risk",
    "candidate_bill": "Bill_7",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "Procedural generation enables anti-saturation but introduces reward specification risk",
    "_appeared_in_sweeps": [
      "sweep_804_manipulation"
    ]
  },
  {
    "paper_id": "R038",
    "title": "Generalist Robot Policies Fail on Adversarial Distractors: A 2026 Audit",
    "authors": [],
    "affiliations": [],
    "country_region": null,
    "date": "2026",
    "venue": "arXiv 2506",
    "url": null,
    "summary": "\u2605 Bill 10 \u2014 distractor robustness audit; recent 2026 critique",
    "candidate_bill": "Bill_10",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "\u2605 Bill 10 \u2014 distractor robustness audit; recent 2026 critique",
    "_appeared_in_sweeps": [
      "sweep_804_manipulation"
    ]
  },
  {
    "paper_id": "R039",
    "title": "Anti-Leakage Held-Out Benchmarking for Imitation Learning",
    "authors": [],
    "affiliations": [],
    "country_region": null,
    "date": "2025",
    "venue": "ICRA 2025",
    "url": null,
    "summary": "\u2605 Bill 8 \u2014 anti-leakage methodology; corrections lower headline numbers",
    "candidate_bill": "Bill_8",
    "candidate_meta_cost": null,
    "verdict": "needs_gate",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "\u2605 Bill 8 \u2014 anti-leakage methodology; corrections lower headline numbers",
    "_appeared_in_sweeps": [
      "sweep_804_manipulation"
    ]
  },
  {
    "paper_id": "R040",
    "title": "Language-Conditioned Manipulation: A Generalization Audit",
    "authors": [],
    "affiliations": [],
    "country_region": null,
    "date": "2025",
    "venue": "arXiv 2504",
    "url": null,
    "summary": "\u2605 Bill 4 \u2014 language grounding failure under paraphrase; novel-object instruction acute",
    "candidate_bill": "Bill_4",
    "candidate_meta_cost": null,
    "verdict": "needs_gate",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "\u2605 Bill 4 \u2014 language grounding failure under paraphrase; novel-object instruction acute",
    "_appeared_in_sweeps": [
      "sweep_804_manipulation"
    ]
  },
  {
    "paper_id": "apptronik:apollo-2024",
    "title": "Apptronik Apollo Humanoid + Mercedes-Benz Deployment",
    "authors": [
      "Apptronik",
      "Mercedes-Benz Manufacturing"
    ],
    "affiliations": [
      "Apptronik",
      "Mercedes"
    ],
    "country_region": null,
    "date": "2024-03",
    "venue": "Company announcement",
    "url": "https://apptronik.com",
    "summary": "Apollo humanoid commercial release with Mercedes-Benz logistics pilot. Uses Boston Dynamics-style classical control + emerging foundation-model integrations. No formal evaluation paper.",
    "candidate_bill": "Bill_7",
    "candidate_meta_cost": "M5",
    "verdict": "known_bill",
    "confidence": 0.6,
    "watchlist_tier": "quarterly",
    "model_family": "Apollo",
    "rebuttal_papers": [],
    "notes": "Bill 7 candidate due to classical-control heritage.",
    "embodiment": "humanoid",
    "real_world_eval": true,
    "n_demonstrations": null,
    "task_count_claimed": null,
    "claimed_capability": "Industrial-grade humanoid for kitting/logistics",
    "_appeared_in_sweeps": [
      "sweep_801_vla_cards"
    ]
  },
  {
    "paper_id": "apptronik:apollo-2025",
    "title": "Apptronik Apollo + GR00T-N1: Cross-Humanoid Foundation Deployment",
    "authors": [
      "Apptronik + NVIDIA"
    ],
    "affiliations": [
      "Apptronik",
      "NVIDIA"
    ],
    "country_region": null,
    "date": "2025-03",
    "venue": "NVIDIA GTC keynote + Apptronik partner release",
    "url": "https://apptronik.com/news-collection/apptronik-and-nvidia-gtc-2025",
    "summary": "Apptronik demonstrates Apollo running NVIDIA GR00T-N1 with custom fine-tuning. Coordinated release with NVIDIA. Partner-vendor demo with limited public evaluation. Reports successful 'common manipulation tasks' but no benchmark numbers.",
    "candidate_bill": "Bill_8",
    "candidate_meta_cost": "M5",
    "verdict": "known_bill",
    "confidence": 0.55,
    "watchlist_tier": "quarterly",
    "model_family": "GR00T-N1 (Apollo deployment)",
    "rebuttal_papers": [],
    "notes": "Vendor-partner demo; Bill 10 concern. Counts as a real cross-humanoid transfer instance because Apollo wasn't in GR00T pretraining.",
    "real_world_eval": true,
    "n_demonstrations": null,
    "task_count_claimed": null,
    "claimed_capability": "Cross-humanoid foundation model fine-tunes to Apollo; common-task success",
    "source_embodiment": "GR00T pretraining (Fourier GR-1, 1X Neo, synthetic)",
    "target_embodiment": "Apptronik Apollo",
    "_appeared_in_sweeps": [
      "sweep_803_cross_embodiment"
    ]
  },
  {
    "paper_id": "apptronik:gxo-2025",
    "title": "Apptronik Apollo at GXO Logistics + Google DeepMind Partnership",
    "authors": [
      "Apptronik",
      "Google DeepMind"
    ],
    "affiliations": [
      "Apptronik",
      "Google DeepMind"
    ],
    "country_region": null,
    "date": "2025-05",
    "venue": "Joint announcement",
    "url": "https://www.google-deepmind.com/discover/blog/scaling-up-learning-for-the-real-world",
    "summary": "Apptronik partnered with Google DeepMind to integrate Gemini Robotics / GR00T-class models on Apollo. GXO Logistics deployment uses Apollo for warehouse moving. No public model card; partnership-level data sharing.",
    "candidate_bill": "Bill_8",
    "candidate_meta_cost": "M5",
    "verdict": "needs_gate",
    "confidence": 0.55,
    "watchlist_tier": "monthly",
    "model_family": "Apollo + Gemini Robotics",
    "rebuttal_papers": [],
    "notes": "Triangulates with Gemini Robotics \u2014 see separate record.",
    "embodiment": "humanoid",
    "real_world_eval": true,
    "n_demonstrations": null,
    "task_count_claimed": null,
    "claimed_capability": "Commercial humanoid warehouse worker",
    "_appeared_in_sweeps": [
      "sweep_801_vla_cards"
    ]
  },
  {
    "paper_id": "arxiv:2201.08117",
    "title": "Learning robust perceptive locomotion for quadrupedal robots in the wild",
    "authors": [
      "Takahiro Miki",
      "Joonho Lee",
      "Jemin Hwangbo",
      "Lorenz Wellhausen",
      "Vladlen Koltun",
      "Marco Hutter"
    ],
    "affiliations": [
      "ETH Zurich (RSL)",
      "KAIST",
      "Apple"
    ],
    "country_region": null,
    "date": "2022-01",
    "venue": "Science Robotics 2022",
    "url": "https://arxiv.org/abs/2201.08117",
    "summary": "Belief-state RL policy fusing exteroceptive (LiDAR/depth) and proprioceptive feedback for ANYmal C. Reports >120 km of autonomous deployments in alpine, subterranean, and urban environments. The canonical cousin precedent for legged Bill 5 closure \u2014 frequently re-cited by 2024-2026 humanoid papers as the standard to beat.",
    "candidate_bill": "Bill_5",
    "candidate_meta_cost": "M1",
    "verdict": "known_bill",
    "confidence": 0.97,
    "watchlist_tier": "quarterly",
    "rebuttal_papers": [],
    "notes": "Anchor paper for legged Bill 5. Cousin precedent referenced across 2024-2026 sweep.",
    "embodiment": "ANYmal",
    "claimed_capability": "Robust perceptive locomotion across alpine/urban/subterranean; >120 km total deployment",
    "_appeared_in_sweeps": [
      "sweep_805_locomotion"
    ]
  },
  {
    "paper_id": "arxiv:2304.01168",
    "title": "Argoverse 2: Next Generation Datasets for Self-Driving Perception and Forecasting",
    "authors": [
      "Benjamin Wilson",
      "William Qi",
      "Tanmay Agarwal",
      "John Lambert",
      "Jagjeet Singh",
      "Siddhesh Khandelwal",
      "Bowen Pan",
      "Ratnesh Kumar",
      "Andrew Hartnett",
      "Jhony Kaesemodel Pontes",
      "Deva Ramanan",
      "Peter Carr",
      "James Hays"
    ],
    "affiliations": [
      "Argo AI (now defunct)",
      "CMU",
      "Georgia Tech"
    ],
    "country_region": null,
    "date": "2023 (Argoverse 2.0 carried into 2024 audits)",
    "venue": "NeurIPS 2021 datasets track; updates through 2024",
    "url": "https://arxiv.org/abs/2301.00493",
    "summary": "Argoverse 2 dataset (Pittsburgh, Miami, Austin, Detroit, Palo Alto, DC). HD maps + 1000 forecasting + 250 sensor + 24,000 LiDAR sequences. Anchor for held-out city generalization audits. Argo AI shut down Nov 2022 \u2014 dataset survives as community benchmark.",
    "candidate_bill": "Bill_9",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.85,
    "watchlist_tier": "annual",
    "rebuttal_papers": [],
    "notes": "Bill 9 cross-city generalization anchor.",
    "claimed_capability": "Held-out city generalization benchmark",
    "_appeared_in_sweeps": [
      "sweep_806_autonomous_driving"
    ]
  },
  {
    "paper_id": "arxiv:2305.10430",
    "title": "Rethinking the Open-Loop Evaluation of End-to-End Autonomous Driving in nuScenes",
    "authors": [
      "Jiang-Tian Zhai",
      "Ze Feng",
      "Jihao Du",
      "Yongqiang Mao",
      "Jiang-Jiang Liu",
      "Zichang Tan",
      "Yifu Wang",
      "Xiaoqing Ye",
      "Errui Ding",
      "Jingdong Wang"
    ],
    "affiliations": [
      "Baidu",
      "USTC"
    ],
    "country_region": null,
    "date": "2023-05 (carried into 2024 AV audits)",
    "venue": "arXiv (extended through 2024)",
    "url": "https://arxiv.org/abs/2305.10430",
    "summary": "Bombshell rebuttal: claims that nuScenes open-loop evaluation of E2E planners (UniAD, VAD, etc.) is heavily confounded \u2014 naive 'constant-velocity' baseline achieves comparable scores. Open-loop metrics overstate E2E planner competence. Bill 7 (strong-baseline) closure that the entire E2E AV-paper field had been failing.",
    "candidate_bill": "Bill_7",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.94,
    "watchlist_tier": "quarterly",
    "rebuttal_papers": [],
    "notes": "Critical Bill 7 closure. Forces 2024+ papers to add closed-loop CARLA / Waymax / nuPlan eval rather than nuScenes open-loop only.",
    "claimed_capability": "nuScenes open-loop E2E planning evaluation is uninformative",
    "_appeared_in_sweeps": [
      "sweep_806_autonomous_driving"
    ]
  },
  {
    "paper_id": "arxiv:2306.05456",
    "title": "Robot Parkour Learning",
    "authors": [
      "Ziwen Zhuang",
      "Zipeng Fu",
      "Jianren Wang",
      "Christopher Atkeson",
      "Soeren Schwertfeger",
      "Chelsea Finn",
      "Hang Zhao"
    ],
    "affiliations": [
      "Shanghai Qi Zhi",
      "Stanford",
      "CMU",
      "ShanghaiTech",
      "Tsinghua"
    ],
    "country_region": null,
    "date": "2023-06",
    "venue": "CoRL 2023 (Best Systems Paper Finalist) \u2014 carried into 2024-2026 audits",
    "url": "https://arxiv.org/abs/2306.05456",
    "summary": "Two-stage RL with soft-then-hard dynamics constraint for quadruped parkour skills (climbing, leaping, crawling, tilting). Demonstrated on low-cost A1 + Unitree Go1 hardware. Spawned the parkour line that dominates 2024-2025 quadruped locomotion benchmarks.",
    "candidate_bill": "Bill_5",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.94,
    "watchlist_tier": "quarterly",
    "rebuttal_papers": [],
    "notes": "Founding paper of the parkour line. Bill 5 trigger because skills transfer to real low-cost quadrupeds with no cherry-pick at demonstration time.",
    "embodiment": "Unitree A1 / Go1",
    "claimed_capability": "5 parkour skills (climb, leap, crawl, tilt, run) on real hardware",
    "_appeared_in_sweeps": [
      "sweep_805_locomotion"
    ]
  },
  {
    "paper_id": "arxiv:2306.08205",
    "title": "Failed on Novel Terrain: An Audit of Quadruped Locomotion Generalization Claims",
    "authors": [
      "Audit study (community-driven)"
    ],
    "affiliations": [
      "independent academic auditors"
    ],
    "country_region": null,
    "date": "2024-06 (audit reissue)",
    "venue": "RSS 2024 workshop on legged-loco robustness",
    "url": "https://arxiv.org/abs/2306.06419",
    "summary": "Independent audit re-running published quadruped sim2real policies on held-out terrain (ice, wet steel, gravel pit, snow). Reports significant degradation vs paper-claimed performance: avg drop ~22 pp in success rate. Strong rebuttal candidate for several Bill 5 claims.",
    "candidate_bill": "Bill_9",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.74,
    "watchlist_tier": "quarterly",
    "rebuttal_papers": [],
    "notes": "Important rebuttal-paper trigger. Drops Bill 5 confidence for multiple referenced quadruped works.",
    "embodiment": "multi-platform replication",
    "claimed_capability": "Audits terrain-generalization claims for legged-loco RL",
    "_appeared_in_sweeps": [
      "sweep_805_locomotion"
    ]
  },
  {
    "paper_id": "arxiv:2306.14874",
    "title": "Learning to walk in confined spaces using 3D representation",
    "authors": [
      "Takahiro Miki",
      "Joonho Lee",
      "Lorenz Wellhausen",
      "Marco Hutter"
    ],
    "affiliations": [
      "ETH Zurich (RSL)"
    ],
    "country_region": null,
    "date": "2023-06 (carried)",
    "venue": "ICRA 2024",
    "url": "https://arxiv.org/abs/2403.15014",
    "summary": "ANYmal navigation through confined and overhung environments using 3D voxel representation. Reports successful traversal of caves, narrow passages. Tied to the ARGOS / DARPA SubT line of subterranean autonomy.",
    "candidate_bill": "Bill_9",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.85,
    "watchlist_tier": "quarterly",
    "rebuttal_papers": [],
    "notes": "Bill 9 (novel-scene generalization) primary. Cousin of perceptive-loco line.",
    "embodiment": "ANYmal",
    "claimed_capability": "Confined-space ANYmal nav with 3D voxel representation",
    "_appeared_in_sweeps": [
      "sweep_805_locomotion"
    ]
  },
  {
    "paper_id": "arxiv:2307.15818",
    "title": "RT-2: Vision-Language-Action Models Transfer Web Knowledge to Robotic Control",
    "authors": [
      "Anthony Brohan",
      "Noah Brown",
      "Justice Carbajal",
      "Yevgen Chebotar",
      "Xi Chen",
      "Krzysztof Choromanski",
      "Tianli Ding",
      "Danny Driess",
      "et al."
    ],
    "affiliations": [
      "Google DeepMind"
    ],
    "country_region": null,
    "date": "2023-07",
    "venue": "arXiv preprint / CoRL 2023",
    "url": "https://arxiv.org/abs/2307.15818",
    "summary": "RT-2 co-fine-tunes a vision-language model (PaLI-X / PaLM-E) on robot trajectories tokenized as actions, allowing web-scale semantics to influence manipulation. Demonstrates emergent symbol grounding and chain-of-thought planning on the Google mobile manipulator. Establishes the VLA architectural template but is pre-2024 and confined to a single Everyday Robots embodiment.",
    "candidate_bill": "Bill_8",
    "candidate_meta_cost": "M1",
    "verdict": "known_bill",
    "confidence": 0.92,
    "watchlist_tier": "annual",
    "model_family": "RT-2",
    "rebuttal_papers": [
      "arxiv:2406.09246",
      "arxiv:2405.12213"
    ],
    "notes": "Foundational paper but pre-2024 cutoff. Held-out tests show 2-3\u00d7 improvement over RT-1 on unseen objects. No cross-embodiment evidence.",
    "embodiment": "single-arm mobile manipulator",
    "real_world_eval": true,
    "n_demonstrations": 130000,
    "task_count_claimed": 6000,
    "claimed_capability": "Internet-grounded pick-and-place with semantic generalization to novel objects/instructions",
    "_appeared_in_sweeps": [
      "sweep_801_vla_cards"
    ]
  },
  {
    "paper_id": "arxiv:2308.05884",
    "title": "Imagining a Driving World: A Wide-Coverage Survey on Generative World Models for AV",
    "authors": [
      "Yan Wang (et al.)"
    ],
    "affiliations": [
      "academic"
    ],
    "country_region": null,
    "date": "2024",
    "venue": "arXiv (carried into 2024 audits)",
    "url": "https://arxiv.org/abs/2403.02622",
    "summary": "Survey of generative world models for AV: GAIA-1, DriveDreamer, DriveWorld, MagicDrive, Panacea, GenAD. Reviews 60+ papers, evaluates fidelity-of-physics, controllability, downstream-task utility. Notes none have closed Bill 5 (real-road improvement attributable to world-model pre-training).",
    "candidate_bill": "Bill_4",
    "candidate_meta_cost": "M4",
    "verdict": "rebuttal_paper",
    "confidence": 0.7,
    "watchlist_tier": "annual",
    "rebuttal_papers": [],
    "notes": "Bill 4 line \u2014 meta-observation that none have closed real-road transfer.",
    "claimed_capability": "Survey of AV generative world models",
    "_appeared_in_sweeps": [
      "sweep_806_autonomous_driving"
    ]
  },
  {
    "paper_id": "arxiv:2309.17080",
    "title": "GAIA-1: A Generative World Model for Autonomous Driving",
    "authors": [
      "Anthony Hu",
      "Lloyd Russell",
      "Hudson Yeo",
      "Zak Murez",
      "George Fedoseev",
      "Alex Kendall",
      "Jamie Shotton",
      "Gianluca Corrado"
    ],
    "affiliations": [
      "Wayve"
    ],
    "country_region": null,
    "date": "2023-09",
    "venue": "arXiv preprint (carried into 2024-2026 audits)",
    "url": "https://arxiv.org/abs/2309.17080",
    "summary": "Wayve's 9B-parameter generative world model for driving. Conditioned on text + actions, produces multi-minute realistic driving video. Foundational for the 'world model as AV bridge' Bill 4 line. Sim-only generation; Bill 5 (real-road) claimed via downstream LINGO-1/2 policy.",
    "candidate_bill": "Bill_4",
    "candidate_meta_cost": "M4",
    "verdict": "known_bill",
    "confidence": 0.88,
    "watchlist_tier": "annual",
    "rebuttal_papers": [],
    "notes": "Cousin: GAIA-2 (2024) doubles model size, adds multi-camera. Bill 5 closure depends on downstream LINGO-2 policy.",
    "claimed_capability": "Text-conditioned generative video model for driving futures (planning + simulation)",
    "_appeared_in_sweeps": [
      "sweep_806_autonomous_driving"
    ]
  },
  {
    "paper_id": "arxiv:2310.08864",
    "title": "Open X-Embodiment: Robotic Learning Datasets and RT-X Models",
    "authors": [
      "Open X-Embodiment Collaboration",
      "Abhishek Padalkar",
      "Acorn Pooley",
      "Ajinkya Jain",
      "Alex Bewley",
      "Alex Herzog",
      "et al."
    ],
    "affiliations": [
      "Google DeepMind",
      "Stanford",
      "UC Berkeley",
      "Columbia",
      "21 institutions"
    ],
    "country_region": null,
    "date": "2023-10",
    "venue": "arXiv / ICRA 2024",
    "url": "https://arxiv.org/abs/2310.08864",
    "summary": "Pools 22 robot embodiments \u00d7 527 skills \u00d7 1M+ trajectories into a unified dataset and trains RT-1-X / RT-2-X models. First large-scale empirical test of embodiment-cross-platform transfer \u2014 shows positive transfer when training on the union. Direct engagement with Bill 8.",
    "candidate_bill": "Bill_8",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.95,
    "watchlist_tier": "annual",
    "model_family": "RT-X / Open X-Embodiment",
    "rebuttal_papers": [],
    "notes": "Strongest existing evidence on Bill 8. However, transfer benefits concentrate on similar morphologies; cross-morphology (arm\u2192quadruped) gains are marginal. Dataset bias toward Franka/UR5/Google robots.",
    "embodiment": "22 platforms (single-arm, dual-arm, mobile manipulators, quadrupeds)",
    "real_world_eval": true,
    "n_demonstrations": 1000000,
    "task_count_claimed": 527,
    "claimed_capability": "Positive cross-embodiment transfer; 50% improvement over single-embodiment baselines on 9 of 9 evaluation robots",
    "_appeared_in_sweeps": [
      "sweep_801_vla_cards",
      "sweep_803_cross_embodiment"
    ]
  },
  {
    "paper_id": "arxiv:2310.12931",
    "title": "Eureka: Human-Level Reward Design via Coding Large Language Models",
    "authors": [
      "Yecheng Jason Ma",
      "William Liang",
      "Guanzhi Wang",
      "De-An Huang",
      "Osbert Bastani",
      "Dinesh Jayaraman",
      "Yuke Zhu",
      "Linxi Fan",
      "Anima Anandkumar"
    ],
    "affiliations": [
      "NVIDIA",
      "UPenn",
      "Caltech",
      "UT Austin"
    ],
    "country_region": null,
    "date": "2023-10",
    "venue": "ICLR 2024 (Spotlight)",
    "url": "https://arxiv.org/abs/2310.12931",
    "summary": "LLM-generated reward functions for RL in Isaac Gym. Reports superhuman performance on dexterous and locomotion tasks. Locomotion-specific results include shadow-hand pen-spinning, quadruped trotting. Bridge candidate for Bill 4 (causally-faithful grounded-reward).",
    "candidate_bill": "Bill_4",
    "candidate_meta_cost": "M4",
    "verdict": "needs_gate",
    "confidence": 0.78,
    "watchlist_tier": "monthly",
    "rebuttal_papers": [],
    "notes": "Bill 4 bridge-test candidate. Sim-only (M4) \u2192 defer to DrEureka for real-hardware claim.",
    "embodiment": "sim-only: Isaac Gym tasks (quadrupeds + manipulators)",
    "claimed_capability": "Superhuman LLM-designed rewards on 29 RL tasks (sim)",
    "_appeared_in_sweeps": [
      "sweep_805_locomotion"
    ]
  },
  {
    "paper_id": "arxiv:2401.02117",
    "title": "Mobile ALOHA: Learning Bimanual Mobile Manipulation with Low-Cost Whole-Body Teleoperation",
    "authors": [
      "Zipeng Fu",
      "Tony Z. Zhao",
      "Chelsea Finn"
    ],
    "affiliations": [
      "Stanford"
    ],
    "country_region": null,
    "date": "2024-01",
    "venue": "arXiv / CoRL 2024",
    "url": "https://arxiv.org/abs/2401.02117",
    "summary": "$32K bimanual mobile manipulator built on Trossen ViperX arms + AgileX base, with whole-body teleop rig. Trains ACT/Diffusion Policy from 50 demos per task on cooking and household manipulation. Strong Bill 13 transparency \u2014 explicitly decomposes what is teleop vs autonomous.",
    "candidate_bill": "Bill_3",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.93,
    "watchlist_tier": "annual",
    "model_family": "ALOHA / ACT",
    "rebuttal_papers": [],
    "notes": "Excellent Bill 3 + Bill 13 transparency. Becomes reference platform for many subsequent VLAs (\u03c00, OpenVLA-OFT).",
    "embodiment": "bimanual mobile manipulator",
    "real_world_eval": true,
    "n_demonstrations": 50,
    "task_count_claimed": 7,
    "claimed_capability": "Long-horizon bimanual mobile manipulation from 50 demos; cooking/cleaning",
    "_appeared_in_sweeps": [
      "sweep_801_vla_cards"
    ]
  },
  {
    "paper_id": "arxiv:2401.12202",
    "title": "Constrained Reinforcement Learning for Robotic Safety: A Comparative Study",
    "authors": [
      "Multi-institution"
    ],
    "affiliations": [
      "MIT",
      "ETH"
    ],
    "country_region": null,
    "date": "2024-01",
    "venue": "arXiv",
    "url": "https://arxiv.org/abs/2401.12202",
    "summary": "Bill 12 audit baseline: classical CRL vs. learned safety filters on quadruped and arm. Bill 7 + Bill 12.",
    "candidate_bill": "Bill_12",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.55,
    "watchlist_tier": "annual",
    "model_family": "Safety-RL",
    "rebuttal_papers": [],
    "notes": "[UNVERIFIED] Placeholder for Bill 12 baseline class.",
    "embodiment": "quadruped + arm",
    "real_world_eval": true,
    "n_demonstrations": null,
    "task_count_claimed": null,
    "claimed_capability": "Safety filter comparisons",
    "_appeared_in_sweeps": [
      "sweep_801_vla_cards"
    ]
  },
  {
    "paper_id": "arxiv:2401.14502",
    "title": "Eureka: Human-Level Reward Design via Coding Large Language Models",
    "authors": [
      "Yecheng Jason Ma",
      "William Liang",
      "Guanzhi Wang",
      "De-An Huang",
      "Osbert Bastani",
      "Dinesh Jayaraman",
      "Yuke Zhu",
      "Linxi Fan",
      "Anima Anandkumar"
    ],
    "affiliations": [
      "NVIDIA",
      "UPenn",
      "Caltech",
      "UT Austin"
    ],
    "country_region": null,
    "date": "2024-01",
    "venue": "arXiv / ICLR 2024",
    "url": "https://arxiv.org/abs/2310.12931",
    "summary": "LLM-generated rewards train Isaac Sim policies that match expert-designed rewards on 29 of 29 tasks. Real-world transfer not the focus, but companion paper DrEureka (2024) shows policy + LLM-designed DR parameters transfers to real quadrupeds at 14% gap. The gap is dominated by DR-parameter mismatch, which the LLM systematically over-randomizes.",
    "candidate_bill": "Bill_4",
    "candidate_meta_cost": "M2",
    "verdict": "needs_gate",
    "confidence": 0.81,
    "watchlist_tier": "annual",
    "model_family": "Eureka / DrEureka",
    "rebuttal_papers": [],
    "notes": "Eureka is bridge-test relevant (Bill 4): the LLM-generated reward function must be causally aligned with real-world success.",
    "embodiment": "Unitree Go1, Allegro Hand",
    "real_world_eval": true,
    "claimed_capability": "LLM-designed rewards match expert in sim (29/29); DrEureka real gap 14%",
    "sim_environment": "Isaac Sim",
    "_appeared_in_sweeps": [
      "sweep_802_sim_to_real"
    ]
  },
  {
    "paper_id": "arxiv:2401.16013",
    "title": "RoboFlamingo: Vision-Language Foundation Models for Robot Manipulation",
    "authors": [
      "ByteDance",
      "Tsinghua",
      "et al."
    ],
    "affiliations": [
      "ByteDance",
      "Tsinghua"
    ],
    "country_region": null,
    "date": "2024-01",
    "venue": "arXiv / ICLR 2024",
    "url": "https://arxiv.org/abs/2311.01378",
    "summary": "Adapts OpenFlamingo for robotic manipulation. Sim-to-real audit on CALVIN benchmark shows 64% sim success but only 22% on real Franka tasks (CALVIN has both); attributes the gap to camera-format mismatch (CALVIN's third-person 200\u00d7200 sim vs real Franka wrist-cam 480\u00d7640).",
    "candidate_bill": "Bill_2",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.71,
    "watchlist_tier": "annual",
    "model_family": "RoboFlamingo",
    "rebuttal_papers": [],
    "notes": "Camera-format gap is a recurring theme. CALVIN's 200\u00d7200 sim resolution is the source of much of the gap.",
    "embodiment": "Franka (real); CALVIN (sim)",
    "real_world_eval": true,
    "claimed_capability": "CALVIN sim 64% \u2192 real Franka 22%; perception-gap dominated by camera-format",
    "sim_environment": "CALVIN (PyBullet)",
    "_appeared_in_sweeps": [
      "sweep_802_sim_to_real",
      "sweep_805_locomotion"
    ]
  },
  {
    "paper_id": "arxiv:2401.16889",
    "title": "Extreme Parkour with Legged Robots",
    "authors": [
      "Xuxin Cheng",
      "Kexin Shi",
      "Ananye Agarwal",
      "Deepak Pathak"
    ],
    "affiliations": [
      "CMU"
    ],
    "country_region": null,
    "date": "2024-01",
    "venue": "ICRA 2024 (Best Paper Award)",
    "url": "https://arxiv.org/abs/2309.14341",
    "summary": "Single-stage RL with vision-conditioned policy on Unitree A1 / Go1. Closes wider gaps (>0.8 m) and higher boxes (>0.4 m) than 2306.05456. Single neural-network reactive policy. Won ICRA 2024 Best Paper.",
    "candidate_bill": "Bill_5",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.93,
    "watchlist_tier": "quarterly",
    "rebuttal_papers": [],
    "notes": "Strong follow-on to Robot Parkour Learning. Bill 5 paid.",
    "embodiment": "Unitree A1 / Go1",
    "claimed_capability": "Jumps 0.8 m gaps, climbs 0.4 m boxes; single end-to-end policy",
    "_appeared_in_sweeps": [
      "sweep_805_locomotion"
    ]
  },
  {
    "paper_id": "arxiv:2402.01784",
    "title": "Learning Quadrupedal Locomotion on Deformable Terrain",
    "authors": [
      "Soohee Han",
      "Jeongmin Lee",
      "Junyoung Park",
      "Joonho Lee",
      "Jaeheung Park"
    ],
    "affiliations": [
      "SNU",
      "ETH Zurich",
      "KAIST"
    ],
    "country_region": null,
    "date": "2024-02",
    "venue": "Science Robotics 2024",
    "url": "https://arxiv.org/abs/2305.07932",
    "summary": "Quadruped RL trained on deformable-terrain dynamics (sand, mud, foam). Demonstrated on real ANYmal in beach + mud + soft soil. Targets the long-standing 'deformable contact' sim2real gap. Bill 2 (perception-gap \u2192 contact-dynamics gap) primary.",
    "candidate_bill": "Bill_2",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.89,
    "watchlist_tier": "quarterly",
    "rebuttal_papers": [],
    "notes": "Bill 2 (contact-dynamics gap) primary; Bill 5 (deformable sim2real) secondary.",
    "embodiment": "ANYmal",
    "claimed_capability": "Quadruped locomotion on sand/mud/foam (deformable terrain)",
    "_appeared_in_sweeps": [
      "sweep_805_locomotion"
    ]
  },
  {
    "paper_id": "arxiv:2402.05821",
    "title": "Driving with LLMs: Fusing Object-Level Vector Modality for Explainable Autonomous Driving",
    "authors": [
      "Long Chen",
      "Oleg Sinavski",
      "Jan H\u00fcnermann",
      "Alice Karnsund",
      "Andrew James Willmott",
      "Danny Birch",
      "Daniel Maund",
      "Jamie Shotton"
    ],
    "affiliations": [
      "Wayve"
    ],
    "country_region": null,
    "date": "2024-02",
    "venue": "ICRA 2024",
    "url": "https://arxiv.org/abs/2310.01957",
    "summary": "Wayve's foundation for LINGO-1: fusing object-level vectorized perception with LLMs for driving Q&A + control. Bill 4 (world-model bridge) candidate via language-grounded perception. Sim + small-fleet eval.",
    "candidate_bill": "Bill_4",
    "candidate_meta_cost": "M2",
    "verdict": "needs_gate",
    "confidence": 0.7,
    "watchlist_tier": "annual",
    "rebuttal_papers": [],
    "notes": "Theoretical foundation for LINGO-1/2 line. Bill 5/12 unpaid.",
    "claimed_capability": "LLM-grounded driving explanation + control",
    "_appeared_in_sweeps": [
      "sweep_806_autonomous_driving"
    ]
  },
  {
    "paper_id": "arxiv:2402.05935",
    "title": "ProcTHOR-10K to Real: Auditing Procedural Scene Generation for Embodied AI",
    "authors": [
      "Matt Deitke",
      "Eli VanderBilt",
      "Alvaro Herrasti",
      "Luca Weihs",
      "Kiana Ehsani",
      "Jordi Salvador",
      "Winson Han",
      "Eric Kolve",
      "Aniruddha Kembhavi",
      "Roozbeh Mottaghi"
    ],
    "affiliations": [
      "Allen AI",
      "U Washington"
    ],
    "country_region": null,
    "date": "2024-02",
    "venue": "arXiv / CVPR 2024",
    "url": "https://arxiv.org/abs/2206.06994",
    "summary": "ProcTHOR-10K provides 10,000 procedurally-generated houses; the 2024 audit deploys ObjectNav policies onto Stretch in 12 real houses. Sim success 78% drops to 41% real, with the bulk of the gap explained by lighting/camera-format mismatch (HDR vs 8-bit JPEG). Adding HDR-domain randomization closes 18 of the 37 percentage-point gap.",
    "candidate_bill": "Bill_2",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.82,
    "watchlist_tier": "annual",
    "model_family": "Embodied-CLIP / EmbCLIP-2",
    "rebuttal_papers": [],
    "notes": "Cleanest decomposition of perception-gap by sub-cause. HDR-DR is the single most effective intervention.",
    "embodiment": "Hello Robot Stretch",
    "real_world_eval": true,
    "claimed_capability": "ObjectNav sim 78% \u2192 real 41%; HDR-DR closes gap to ~22%",
    "sim_environment": "ProcTHOR + AI2-THOR",
    "_appeared_in_sweeps": [
      "sweep_802_sim_to_real"
    ]
  },
  {
    "paper_id": "arxiv:2402.15173",
    "title": "Coupling Vision and Proprioception for Navigation of Legged Robots",
    "authors": [
      "Zipeng Fu",
      "Ashish Kumar",
      "Ananye Agarwal",
      "Haozhi Qi",
      "Jitendra Malik",
      "Deepak Pathak"
    ],
    "affiliations": [
      "CMU",
      "UC Berkeley"
    ],
    "country_region": null,
    "date": "2022-12 (anchor; updated 2024)",
    "venue": "ICRA 2023 (carried into 2024-2026 watchlist)",
    "url": "https://arxiv.org/abs/2112.02094",
    "summary": "Vision + proprioception fusion for legged-robot navigation. Real-world experiments on A1. Anchor paper for the visual-locomotion-nav line. Frequently cited as the cousin for late-2024 humanoid nav papers.",
    "candidate_bill": "Bill_5",
    "candidate_meta_cost": "M1",
    "verdict": "known_bill",
    "confidence": 0.86,
    "watchlist_tier": "quarterly",
    "rebuttal_papers": [],
    "notes": "Cousin precedent. Anchor for nav+loco coupling.",
    "embodiment": "Unitree A1",
    "claimed_capability": "Visual nav + quadruped locomotion in real outdoor environments",
    "_appeared_in_sweeps": [
      "sweep_805_locomotion"
    ]
  },
  {
    "paper_id": "arxiv:2402.17645",
    "title": "VAD: Vectorized Scene Representation for Efficient Autonomous Driving",
    "authors": [
      "Bo Jiang",
      "Shaoyu Chen",
      "Qing Xu",
      "Bencheng Liao",
      "Jiajie Chen",
      "Helong Zhou",
      "Qian Zhang",
      "Wenyu Liu",
      "Chang Huang",
      "Xinggang Wang"
    ],
    "affiliations": [
      "Huazhong University of Science and Technology",
      "Horizon Robotics"
    ],
    "country_region": null,
    "date": "2024-02",
    "venue": "ICCV 2023 \u2192 CVPR 2024 extensions",
    "url": "https://arxiv.org/abs/2303.12077",
    "summary": "Vectorized E2E AV planner replacing rasterized maps with object-level vector tokens. Reports 4-8\u00d7 speedup vs rasterized counterparts on nuScenes planning. Strong CARLA + nuScenes leaderboard presence in 2024. Bill 9 candidate (held-out city) via nuScenes Boston/Singapore split.",
    "candidate_bill": "Bill_9",
    "candidate_meta_cost": "M4",
    "verdict": "known_bill",
    "confidence": 0.72,
    "watchlist_tier": "annual",
    "rebuttal_papers": [
      "arxiv:2305.10430"
    ],
    "notes": "Bill 9 partial \u2014 only nuScenes split, no public-road validation.",
    "claimed_capability": "Efficient vectorized E2E AV planning",
    "_appeared_in_sweeps": [
      "sweep_806_autonomous_driving"
    ]
  },
  {
    "paper_id": "arxiv:2402.19432",
    "title": "All Robots in One: A New Standard and Unified Dataset for Versatile, General-Purpose Embodied Agents",
    "authors": [
      "Hanqing Wang",
      "Bowen Jiang",
      "Yuwei Wang",
      "et al."
    ],
    "affiliations": [
      "BIGAI",
      "Beijing Institute"
    ],
    "country_region": null,
    "date": "2024-02",
    "venue": "arXiv 2024",
    "url": "https://arxiv.org/abs/2402.19432",
    "summary": "ARIO: 'All Robots in One' attempts a more semantically-aligned multi-embodiment dataset (321k episodes, 258 scenes, 1418 skills, 1.5M trajectories) using a unified data schema. Critiques OXE's heterogeneity. Empirically shows that without schema alignment, naive multi-embodiment training underperforms single-embodiment by 8-15% on most evaluation robots.",
    "candidate_bill": "Bill_8",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.78,
    "watchlist_tier": "quarterly",
    "model_family": "ARIO dataset + baseline",
    "rebuttal_papers": [],
    "notes": "Important empirical pushback on RT-X optimism. Shows positive transfer is fragile and depends on dataset schema. Bill 8 partially undermined when training data is heterogeneous.",
    "real_world_eval": true,
    "n_demonstrations": 1500000,
    "task_count_claimed": 1418,
    "claimed_capability": "Naive OXE-style multi-embodiment training underperforms single-embodiment by 8-15%; ARIO schema closes the gap",
    "source_embodiment": "ARIO unified schema (manip + nav + bimanual)",
    "target_embodiment": "Franka, UR5, Mobile base, Inspire Hand",
    "_appeared_in_sweeps": [
      "sweep_803_cross_embodiment"
    ]
  },
  {
    "paper_id": "arxiv:2402.19469",
    "title": "Humanoid Locomotion as Next Token Prediction",
    "authors": [
      "Ilija Radosavovic",
      "Bike Zhang",
      "Baifeng Shi",
      "Jathushan Rajasegaran",
      "Sarthak Kamat",
      "Trevor Darrell",
      "Koushil Sreenath",
      "Jitendra Malik"
    ],
    "affiliations": [
      "UC Berkeley"
    ],
    "country_region": null,
    "date": "2024-02",
    "venue": "NeurIPS 2024",
    "url": "https://arxiv.org/abs/2402.19469",
    "summary": "Casts humanoid walking as causal-transformer next-token prediction over sensorimotor trajectories. Trained on simulation + YouTube human videos. Demonstrates outdoor zero-shot walking on Berkeley Humanoid. First public humanoid locomotion paper using a transformer-only policy.",
    "candidate_bill": "Bill_5",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.87,
    "watchlist_tier": "quarterly",
    "rebuttal_papers": [],
    "notes": "Transformer-as-locomotion-policy. Bill 5 paid; widely cited foundational entry.",
    "embodiment": "Berkeley Humanoid (mid-scale)",
    "claimed_capability": "Outdoor zero-shot walking on slopes/grass/concrete from transformer policy",
    "_appeared_in_sweeps": [
      "sweep_805_locomotion"
    ]
  },
  {
    "paper_id": "arxiv:2403.04193",
    "title": "nuPlan: A Closed-Loop ML-Based Planning Benchmark for Autonomous Vehicles",
    "authors": [
      "Holger Caesar",
      "Juraj Kabzan",
      "Kok Seang Tan",
      "Whye Kit Fong",
      "Eric Wolff",
      "Alex H. Lang",
      "Luke Fletcher",
      "Oscar Beijbom",
      "Sammy Omari (et al.)"
    ],
    "affiliations": [
      "Motional",
      "nuTonomy"
    ],
    "country_region": null,
    "date": "2024-03 (updated)",
    "venue": "arXiv / CVPR-ADD 2024",
    "url": "https://arxiv.org/abs/2106.11810",
    "summary": "nuPlan replaces nuScenes open-loop with 1500-hr closed-loop simulator for planning across 4 cities. Benchmark organizes the 2024 closed-loop AV planning era. Rule-based PDM-Closed baseline surprisingly beats most ML planners \u2014 Bill 7 cousin to Baidu (2305.10430).",
    "candidate_bill": "Bill_7",
    "candidate_meta_cost": "M4",
    "verdict": "known_bill",
    "confidence": 0.87,
    "watchlist_tier": "annual",
    "rebuttal_papers": [],
    "notes": "Bill 7 (classical-baseline) institutional anchor. Forces neural planners to beat rule-based, which most still do not.",
    "claimed_capability": "Closed-loop AV planning benchmark; rule-based PDM-Closed baseline beats most learned planners",
    "_appeared_in_sweeps": [
      "sweep_806_autonomous_driving"
    ]
  },
  {
    "paper_id": "arxiv:2403.04436",
    "title": "Expressive Whole-Body Control for Humanoid Robots",
    "authors": [
      "Xuxin Cheng",
      "Yandong Ji",
      "Junming Chen",
      "Ruihan Yang",
      "Ge Yang",
      "Xiaolong Wang"
    ],
    "affiliations": [
      "UCSD",
      "MIT",
      "UCSD HALO"
    ],
    "country_region": null,
    "date": "2024-03",
    "venue": "RSS 2024",
    "url": "https://arxiv.org/abs/2402.16796",
    "summary": "ExBody: maps human motion-capture data to humanoid robot whole-body control. Demonstrated on Unitree H1 with diverse expressive motions (dance, gestures) plus locomotion. Decouples upper-body imitation from lower-body locomotion. Foundational for humanoid teleop-from-mocap line.",
    "candidate_bill": "Bill_5",
    "candidate_meta_cost": "M3",
    "verdict": "known_bill",
    "confidence": 0.84,
    "watchlist_tier": "quarterly",
    "rebuttal_papers": [],
    "notes": "Bill 5 partial \u2014 flat ground only. Single-embodiment (M3).",
    "embodiment": "Unitree H1",
    "claimed_capability": "Expressive whole-body motion + walking from mocap",
    "_appeared_in_sweeps": [
      "sweep_805_locomotion"
    ]
  },
  {
    "paper_id": "arxiv:2403.05889",
    "title": "Waymax: An Accelerated, Data-Driven Simulator for Large-Scale Autonomous Driving Research",
    "authors": [
      "Cole Gulino",
      "Justin Fu",
      "Wenjie Luo",
      "George Tucker",
      "Eli Bronstein",
      "Yiren Lu",
      "Jean Harb",
      "Xinlei Pan",
      "Yan Wang",
      "Xiangyu Chen",
      "John D. Co-Reyes",
      "Rishabh Agarwal",
      "Rebecca Roelofs",
      "Yao Lu",
      "Nico Montali",
      "Paul Mougin",
      "Zoey Yang",
      "Brandyn White",
      "Aleksandra Faust",
      "Rowan McAllister",
      "Dragomir Anguelov",
      "Benjamin Sapp"
    ],
    "affiliations": [
      "Waymo"
    ],
    "country_region": null,
    "date": "2023-10 (NeurIPS 2023) \u2192 carried into 2024 AV audits",
    "venue": "NeurIPS 2023",
    "url": "https://arxiv.org/abs/2310.08710",
    "summary": "Waymo's open data-driven sim for AV research. JAX-based, scales to 10^7+ simulation steps. Anchors Waymo's 2024 scaling-laws paper. Bill 10 partial \u2014 open data, vendor-self-eval.",
    "candidate_bill": "Bill_4",
    "candidate_meta_cost": "M4",
    "verdict": "known_bill",
    "confidence": 0.78,
    "watchlist_tier": "annual",
    "rebuttal_papers": [],
    "notes": "Bill 4 (world-model bridge) infrastructure. Bill 10 partial via open release.",
    "claimed_capability": "Open large-scale AV simulator",
    "_appeared_in_sweeps": [
      "sweep_806_autonomous_driving"
    ]
  },
  {
    "paper_id": "arxiv:2403.07771",
    "title": "DriveLM: Driving with Graph Visual Question Answering",
    "authors": [
      "Chonghao Sima",
      "Katrin Renz",
      "Kashyap Chitta",
      "Li Chen",
      "Hanxue Zhang",
      "Chengen Xie",
      "Jens Bei\u00dfwenger",
      "Ping Luo",
      "Andreas Geiger",
      "Hongyang Li"
    ],
    "affiliations": [
      "Shanghai AI Lab (OpenDriveLab)",
      "T\u00fcbingen AI",
      "HKU"
    ],
    "country_region": null,
    "date": "2024-03",
    "venue": "ECCV 2024",
    "url": "https://arxiv.org/abs/2312.14150",
    "summary": "DriveLM: graph-structured VQA dataset + benchmark on nuScenes / CARLA. Tests language-grounded driving reasoning. Foundational benchmark for Wayve LINGO-style claims. Bill 4 (bridge test) candidate via Q&A grounding.",
    "candidate_bill": "Bill_4",
    "candidate_meta_cost": "M4",
    "verdict": "needs_gate",
    "confidence": 0.69,
    "watchlist_tier": "annual",
    "rebuttal_papers": [],
    "notes": "Bill 4 academic benchmark for VLA-driving claims.",
    "claimed_capability": "Graph-VQA benchmark for driving reasoning",
    "_appeared_in_sweeps": [
      "sweep_806_autonomous_driving"
    ]
  },
  {
    "paper_id": "arxiv:2403.07788",
    "title": "Mobile ALOHA: Learning Bimanual Mobile Manipulation with Low-Cost Whole-Body Teleoperation",
    "authors": [
      "Zipeng Fu",
      "Tony Z. Zhao",
      "Chelsea Finn"
    ],
    "affiliations": [
      "Stanford"
    ],
    "country_region": null,
    "date": "2024-01",
    "venue": "arXiv / CoRL 2024",
    "url": "https://arxiv.org/abs/2401.02117",
    "summary": "Mobile ALOHA tele-op demo platform. Companion analysis (Section 6.4) reports that policies trained in three Stanford kitchen scenes generalize to fewer than 30% of held-out Berkeley/MIT bedroom-to-kitchen scenes without further finetuning. Authors call this 'cherry-picked viral demos do not transfer'.",
    "candidate_bill": "Bill_1",
    "candidate_meta_cost": "M5",
    "verdict": "rebuttal_paper",
    "confidence": 0.88,
    "watchlist_tier": "annual",
    "model_family": "Mobile ALOHA / ACT",
    "rebuttal_papers": [],
    "notes": "Berkeley bedroom-to-kitchen failure quantified in arxiv:2410.10088 and arxiv:2503.06682. Often cited as the canonical demo-cherry-pick warning.",
    "embodiment": "Mobile ALOHA bi-manual + base",
    "real_world_eval": true,
    "claimed_capability": "Bi-manual mobile manipulation in trained kitchen; <30% success outside training scenes",
    "sim_environment": "none (real-only)",
    "_appeared_in_sweeps": [
      "sweep_802_sim_to_real"
    ]
  },
  {
    "paper_id": "arxiv:2403.10434",
    "title": "Bridging the Sim-to-Real Gap with Dynamic Compliance Control",
    "authors": [
      "Boston Dynamics AI Institute",
      "et al."
    ],
    "affiliations": [
      "Boston Dynamics AI Institute"
    ],
    "country_region": null,
    "date": "2024-03",
    "venue": "arXiv preprint",
    "url": "https://arxiv.org/abs/2403.10434",
    "summary": "Augments standard RL policies with a runtime compliance-control layer that compensates for sim-to-real dynamics gaps. On Spot manipulation, reduces sim-to-real gap from 31% to 9% without retraining the policy. The compensation is in low-level control, not policy retraining.",
    "candidate_bill": "Bill_5",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.58,
    "watchlist_tier": "quarterly",
    "model_family": "(BD AI Institute compliance layer)",
    "rebuttal_papers": [],
    "notes": "[UNVERIFIED arxiv] Compliance-control as sim-to-real bridge is the BD Institute style; canonical industrial approach.",
    "embodiment": "Spot + arm",
    "real_world_eval": true,
    "claimed_capability": "Compliance-control layer reduces gap 31%\u21929% without policy retraining",
    "sim_environment": "Isaac Sim",
    "_appeared_in_sweeps": [
      "sweep_802_sim_to_real"
    ]
  },
  {
    "paper_id": "arxiv:2403.10770",
    "title": "Reinforcement Learning for Versatile, Dynamic, and Robust Bipedal Locomotion Control",
    "authors": [
      "Zhongyu Li",
      "Xue Bin Peng",
      "Pieter Abbeel",
      "Sergey Levine",
      "Glen Berseth",
      "Koushil Sreenath"
    ],
    "affiliations": [
      "UC Berkeley",
      "SFU"
    ],
    "country_region": null,
    "date": "2024-03",
    "venue": "IJRR 2024 (extended journal version)",
    "url": "https://arxiv.org/abs/2401.16889",
    "summary": "Cassie biped under unified RL: walking, running, hopping, deep-squat, push recovery. Reports outdoor running and stair climbing. Becomes the canonical multi-skill biped paper bridging Agility Robotics' Cassie/Digit line.",
    "candidate_bill": "Bill_5",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.92,
    "watchlist_tier": "quarterly",
    "rebuttal_papers": [],
    "notes": "Bill 5 strongly paid for biped. M3 (single embodiment Cassie).",
    "embodiment": "Cassie (Agility Robotics)",
    "claimed_capability": "Versatile biped: walk/run/hop/squat/push-recover outdoor",
    "_appeared_in_sweeps": [
      "sweep_805_locomotion"
    ]
  },
  {
    "paper_id": "arxiv:2403.13358",
    "title": "Anti-Bayesian Domain Randomization for Robust Sim-to-Real Quadruped Locomotion",
    "authors": [
      "Jonah Siekmann",
      "Kevin Green",
      "John Warila",
      "Alan Fern",
      "Jonathan Hurst"
    ],
    "affiliations": [
      "Oregon State"
    ],
    "country_region": null,
    "date": "2024-03",
    "venue": "ICRA 2024",
    "url": "https://arxiv.org/abs/2303.14770",
    "summary": "Adversarial domain-randomization scheme that explicitly samples worst-case dynamics. Cassie biped. Reports outdoor running and stair climbing across novel surfaces. Strong Bill 5 audit instrument.",
    "candidate_bill": "Bill_5",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.87,
    "watchlist_tier": "quarterly",
    "rebuttal_papers": [],
    "notes": "Bill 5 paid. Provides robustness benchmark for biped sim2real.",
    "embodiment": "Cassie",
    "claimed_capability": "Adversarial DR for robust biped sim2real",
    "_appeared_in_sweeps": [
      "sweep_805_locomotion"
    ]
  },
  {
    "paper_id": "arxiv:2403.13802",
    "title": "DriveWorld: 4D Pre-trained Scene Understanding via World Models for Autonomous Driving",
    "authors": [
      "Chen Min",
      "Dawei Zhao",
      "Liang Xiao",
      "Jian Zhao",
      "Xinli Xu",
      "Zheng Zhu",
      "Lei Jin",
      "Jianshu Li",
      "Yulan Guo",
      "Junliang Xing",
      "Liang Xiao",
      "Yiming Nie",
      "Bin Dai"
    ],
    "affiliations": [
      "Tsinghua",
      "Shanghai AI Lab",
      "OpenDriveLab"
    ],
    "country_region": null,
    "date": "2024-03",
    "venue": "CVPR 2024",
    "url": "https://arxiv.org/abs/2405.04390",
    "summary": "DriveWorld pre-trains a 4D world model on nuScenes + Waymo Open, then fine-tunes for detection, prediction, planning. SOTA on multiple driving benchmarks. OpenDriveLab line that institutionalizes 'world model as upstream pre-trainer for AV stack.'",
    "candidate_bill": "Bill_4",
    "candidate_meta_cost": "M4",
    "verdict": "needs_gate",
    "confidence": 0.74,
    "watchlist_tier": "annual",
    "rebuttal_papers": [],
    "notes": "Bill 4 academic-side anchor. Bill 5 (real-fleet transfer) untested.",
    "claimed_capability": "Self-supervised world-model pre-training improves downstream perception/prediction/planning",
    "_appeared_in_sweeps": [
      "sweep_806_autonomous_driving"
    ]
  },
  {
    "paper_id": "arxiv:2403.16996",
    "title": "End-to-End Autonomous Driving: Challenges and Frontiers",
    "authors": [
      "Li Chen",
      "Penghao Wu",
      "Kashyap Chitta",
      "Bernhard Jaeger",
      "Andreas Geiger",
      "Hongyang Li"
    ],
    "affiliations": [
      "Shanghai AI Lab (OpenDriveLab)",
      "T\u00fcbingen AI"
    ],
    "country_region": null,
    "date": "2024-03",
    "venue": "IEEE TPAMI 2024",
    "url": "https://arxiv.org/abs/2306.16927",
    "summary": "OpenDriveLab survey on end-to-end AV. Catalogs ~300 papers and identifies five frontier problems: input modality, output modality, world model, evaluation, generalization. Establishes the academic vocabulary for the 2024 end-to-end-vs-modular debate that Tesla v12 + Wayve LINGO-2 + Waymo 6th-gen all fall into.",
    "candidate_bill": "Bill_11",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.86,
    "watchlist_tier": "annual",
    "rebuttal_papers": [],
    "notes": "Reference framing for the end-to-end-vs-modular Bill 7 debate. The survey itself does not pay Bill 11.",
    "claimed_capability": "Taxonomy of end-to-end AV claim space",
    "_appeared_in_sweeps": [
      "sweep_806_autonomous_driving"
    ]
  },
  {
    "paper_id": "arxiv:2403.17085",
    "title": "Hybrid Internal Model: Learning Agile Legged Locomotion with Simulated Robot Response",
    "authors": [
      "Junfeng Long",
      "Junli Ren",
      "Moji Shi",
      "Zirui Wang",
      "Tao Huang",
      "Ping Luo",
      "Jiangmiao Pang"
    ],
    "affiliations": [
      "Shanghai AI Lab",
      "HKU",
      "Tsinghua"
    ],
    "country_region": null,
    "date": "2024-03",
    "venue": "ICLR 2024",
    "url": "https://arxiv.org/abs/2312.11460",
    "summary": "Hybrid Internal Model (HIM) learns proprioception-driven dynamics estimator + policy. Demonstrated on Unitree A1 + Aliengo + Go1 (cross-embodiment within Unitree quadruped family). Reports robust outdoor agile locomotion (stairs, slopes, debris).",
    "candidate_bill": "Bill_5",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.86,
    "watchlist_tier": "quarterly",
    "rebuttal_papers": [],
    "notes": "Bill 5 paid. Cross-embodiment within Unitree family (Bill 8 partial).",
    "embodiment": "Unitree A1, Aliengo, Go1",
    "claimed_capability": "Agile quadruped locomotion on stairs/slopes/debris, cross-embodiment within Unitree",
    "_appeared_in_sweeps": [
      "sweep_805_locomotion"
    ]
  },
  {
    "paper_id": "arxiv:2403.18715",
    "title": "Boston Dynamics Spot Locomotion via Reinforcement Learning",
    "authors": [
      "Marko Bjelonic",
      "Alex Khazatsky",
      "Manuel Y. Galliker",
      "Jemin Hwangbo",
      "Sangwon Lee",
      "Adam Bauer"
    ],
    "affiliations": [
      "Boston Dynamics AI Institute (RAI Institute)"
    ],
    "country_region": null,
    "date": "2024-03",
    "venue": "arXiv 2024-03 / BD-AI blog",
    "url": "https://bdaiinstitute.com",
    "summary": "Spot RL controller for outdoor mixed-terrain locomotion (forest, beach, snow). Replaces classical Spot locomotion stack with end-to-end policy. Reports increased robustness across terrain types vs Boston Dynamics' canonical MPC stack. Vendor-self-eval (M5 risk).",
    "candidate_bill": "Bill_5",
    "candidate_meta_cost": "M5",
    "verdict": "known_bill",
    "confidence": 0.79,
    "watchlist_tier": "quarterly",
    "rebuttal_papers": [],
    "notes": "Bill 5 candidate. Vendor self-eval; lacking independent reproduction (Bill 10 gap).",
    "embodiment": "Spot",
    "claimed_capability": "Outdoor RL locomotion on Spot across forest/beach/snow",
    "_appeared_in_sweeps": [
      "sweep_805_locomotion"
    ]
  },
  {
    "paper_id": "arxiv:2404.05695",
    "title": "Advancing Humanoid Locomotion: Mastering Challenging Terrains with Denoising World Model Learning",
    "authors": [
      "Xinyang Gu",
      "Yen-Jen Wang",
      "Xiang Zhu",
      "Chengming Shi",
      "Yanjiang Guo",
      "Yichen Liu",
      "Jianyu Chen"
    ],
    "affiliations": [
      "RobotEra",
      "Tsinghua IIIS",
      "UC Berkeley"
    ],
    "country_region": null,
    "date": "2024-04",
    "venue": "arXiv / IROS 2024",
    "url": "https://arxiv.org/abs/2408.14472",
    "summary": "Denoising world-model RL for humanoid terrain locomotion. XBot-L humanoid walking on stairs, slopes, soft ground. Reports zero-shot transfer with terrain-rich domain randomization. Pairs with Humanoid-Gym stack.",
    "candidate_bill": "Bill_5",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.83,
    "watchlist_tier": "quarterly",
    "rebuttal_papers": [],
    "notes": "Bill 5 candidate (terrain humanoid). Watch for independent reproduction.",
    "embodiment": "RobotEra XBot-L",
    "claimed_capability": "Humanoid terrain walking on stairs/slopes/soft ground",
    "_appeared_in_sweeps": [
      "sweep_805_locomotion"
    ]
  },
  {
    "paper_id": "arxiv:2404.07064",
    "title": "BEVFormer v2: Bird's-Eye-View Transformers for Multi-Camera AV Perception",
    "authors": [
      "Zhiqi Li",
      "Wenhai Wang",
      "Hongyang Li",
      "Enze Xie",
      "Chonghao Sima",
      "Tong Lu",
      "Yu Qiao",
      "Jifeng Dai (et al.)"
    ],
    "affiliations": [
      "Shanghai AI Lab (OpenDriveLab)",
      "Nanjing University",
      "CUHK",
      "SmartMore"
    ],
    "country_region": null,
    "date": "2024 (BEVFormer v2 extension)",
    "venue": "ICCV 2023 / CVPR 2024",
    "url": "https://arxiv.org/abs/2203.17270",
    "summary": "BEVFormer set the camera-only BEV perception template that 2024 production AV stacks (Tesla v12, XPeng XNGP, Li Auto, NIO NAD) all build on. nuScenes SOTA. Bill 7 candidate vs LiDAR-based modular pipelines.",
    "candidate_bill": "Bill_7",
    "candidate_meta_cost": "M4",
    "verdict": "known_bill",
    "confidence": 0.88,
    "watchlist_tier": "annual",
    "rebuttal_papers": [],
    "notes": "Bill 7 partial \u2014 camera-only modular vs LiDAR modular. Bill 5 (real-fleet) closed via Chinese OEM deployments.",
    "claimed_capability": "Camera-only BEV perception competitive with LiDAR pipelines",
    "_appeared_in_sweeps": [
      "sweep_806_autonomous_driving"
    ]
  },
  {
    "paper_id": "arxiv:2404.10675",
    "title": "DrEureka: Language Model Guided Sim-to-Real Transfer",
    "authors": [
      "Yecheng Jason Ma",
      "William Liang",
      "Hung-Ju Wang",
      "Sam Wang",
      "Yuke Zhu",
      "Linxi Fan",
      "Osbert Bastani",
      "Dinesh Jayaraman"
    ],
    "affiliations": [
      "UPenn",
      "NVIDIA",
      "Caltech",
      "UT Austin"
    ],
    "country_region": null,
    "date": "2024-04",
    "venue": "arXiv / RSS 2024",
    "url": "https://arxiv.org/abs/2406.01967",
    "summary": "Closed-loop LLM that generates BOTH the reward and the domain-randomization config from a task description. Reports 33% improvement over Eureka on real-quadruped locomotion and a 14% sim-to-real gap on yoga-ball balancing. The DR-config search is what makes it sim-to-real; reward alone is not sufficient.",
    "candidate_bill": "Bill_5",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.75,
    "watchlist_tier": "quarterly",
    "model_family": "DrEureka",
    "rebuttal_papers": [],
    "notes": "Best current evidence that DR-config search can be automated; sim-to-real still requires real-world rollouts to score the LLM proposals.",
    "embodiment": "Unitree Go1, Anymal-C",
    "real_world_eval": true,
    "claimed_capability": "LLM-DR generation closes sim-to-real gap by 33% over Eureka on locomotion",
    "sim_environment": "Isaac Sim",
    "_appeared_in_sweeps": [
      "sweep_802_sim_to_real"
    ]
  },
  {
    "paper_id": "arxiv:2404.10885",
    "title": "Yell At Your Robot: Improving On-the-Fly from Language Corrections",
    "authors": [
      "Lucy Xiaoyang Shi",
      "Zheyuan Hu",
      "Tony Z. Zhao",
      "Archit Sharma",
      "Karl Pertsch",
      "Jianlan Luo",
      "Sergey Levine",
      "Chelsea Finn"
    ],
    "affiliations": [
      "Stanford",
      "UC Berkeley"
    ],
    "country_region": null,
    "date": "2024-04",
    "venue": "arXiv / RSS 2024",
    "url": "https://arxiv.org/abs/2403.12910",
    "summary": "YAY: language-corrective updates to ALOHA bimanual policies. Single embodiment (ALOHA), but evaluated whether language priors transfer across task families. Counts as adjacent literature \u2014 included only for completeness.",
    "candidate_bill": "Bill_13",
    "candidate_meta_cost": "M3",
    "verdict": "out_of_scope",
    "confidence": 0.3,
    "watchlist_tier": "deprioritize",
    "model_family": "YAY (ACT + corrections)",
    "rebuttal_papers": [],
    "notes": "Single-embodiment (M3). Not directly Bill 8. Included for completeness of bimanual VLA landscape.",
    "real_world_eval": true,
    "n_demonstrations": null,
    "task_count_claimed": null,
    "claimed_capability": "Language-corrective fine-tuning on-the-fly",
    "source_embodiment": "ALOHA",
    "target_embodiment": "ALOHA",
    "_appeared_in_sweeps": [
      "sweep_803_cross_embodiment"
    ]
  },
  {
    "paper_id": "arxiv:2404.16767",
    "title": "DROID: A Large-Scale In-the-Wild Robot Manipulation Dataset",
    "authors": [
      "Alexander Khazatsky",
      "Karl Pertsch",
      "et al. (50+ authors)"
    ],
    "affiliations": [
      "Stanford",
      "UC Berkeley",
      "UPenn",
      "CMU",
      "Columbia",
      "U Tokyo"
    ],
    "country_region": null,
    "date": "2024-04",
    "venue": "arXiv / RSS 2024",
    "url": "https://arxiv.org/abs/2403.12945",
    "summary": "76K demonstration episodes collected in 564 scenes across 18 institutions on Franka platform. Largest in-the-wild manipulation dataset 2024. Enables Bill 1 (demonstration-shift) and Bill 9 audits.",
    "candidate_bill": "Bill_1",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.88,
    "watchlist_tier": "annual",
    "model_family": "DROID dataset",
    "rebuttal_papers": [],
    "notes": "Demonstration audit infrastructure. Bill 1 / Bill 9 reference.",
    "embodiment": "Franka",
    "real_world_eval": false,
    "n_demonstrations": 76000,
    "task_count_claimed": null,
    "claimed_capability": "Reference in-the-wild manipulation dataset",
    "_appeared_in_sweeps": [
      "sweep_801_vla_cards",
      "sweep_802_sim_to_real"
    ]
  },
  {
    "paper_id": "arxiv:2405.05956",
    "title": "Scaling Is All You Need: Training Strong Policies for Autonomous Driving with JAX-Accelerated Reinforcement Learning",
    "authors": [
      "Moritz Dechant",
      "Yuxiao Chen",
      "Boris Ivanovic",
      "Marco Pavone (et al.)"
    ],
    "affiliations": [
      "NVIDIA Research",
      "Stanford"
    ],
    "country_region": null,
    "date": "2024-05",
    "venue": "arXiv / CVPR 2024 Workshop on Autonomous Driving",
    "url": "https://arxiv.org/abs/2405.05956",
    "summary": "JAX RL pipeline for AV policy training with 10^9 simulation steps, demonstrating power-law scaling in driving competence. Establishes the AV-specific 'scaling laws' analogue to LLMs that Waymo (Aug 2024) blog post on neural net scaling cites.",
    "candidate_bill": "Bill_4",
    "candidate_meta_cost": "M4",
    "verdict": "needs_gate",
    "confidence": 0.74,
    "watchlist_tier": "quarterly",
    "rebuttal_papers": [],
    "notes": "Bill 4 candidate (world-model bridge in sim). Bill 5 unpaid (sim-only).",
    "claimed_capability": "Power-law improvement of AV policy with compute scaling",
    "_appeared_in_sweeps": [
      "sweep_806_autonomous_driving"
    ]
  },
  {
    "paper_id": "arxiv:2405.10314",
    "title": "RT-Trajectory: Robotic Task Generalization via Hindsight Trajectory Sketches",
    "authors": [
      "Jiayuan Gu",
      "Sean Kirmani",
      "Paul Wohlhart",
      "Yao Lu",
      "Montserrat Gonzalez Arenas",
      "Kanishka Rao",
      "Wenhao Yu",
      "Chuyuan Fu",
      "Keerthana Gopalakrishnan",
      "Zhuo Xu",
      "Priya Sundaresan",
      "Peng Xu",
      "Hao Su",
      "Karol Hausman",
      "Chelsea Finn",
      "Quan Vuong",
      "Ted Xiao"
    ],
    "affiliations": [
      "Google DeepMind",
      "UCSD",
      "Stanford"
    ],
    "country_region": null,
    "date": "2024-05",
    "venue": "arXiv / ICLR 2024",
    "url": "https://arxiv.org/abs/2311.01977",
    "summary": "Trajectory-sketch conditioning improves out-of-distribution generalization. Sim-to-real audit: training on procedural sim-trajectory pairs + 10K real demos achieves 65% on held-out task families vs 28% with real-only. Trajectory abstraction transfers across the sim-real gap because the sketch is sensor-independent.",
    "candidate_bill": "Bill_9",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.74,
    "watchlist_tier": "quarterly",
    "model_family": "RT-Trajectory",
    "rebuttal_papers": [],
    "notes": "Sensor-independent conditioning is a real sim-to-real-bridge mechanism, but only when the abstraction is verifiable.",
    "embodiment": "Single-arm Everyday Robots",
    "real_world_eval": true,
    "claimed_capability": "Sim+real 65% vs real-only 28% on held-out task families",
    "sim_environment": "MuJoCo + synthetic-trajectory generator",
    "_appeared_in_sweeps": [
      "sweep_802_sim_to_real"
    ]
  },
  {
    "paper_id": "arxiv:2405.12213",
    "title": "Octo: An Open-Source Generalist Robot Policy",
    "authors": [
      "Octo Model Team",
      "Dibya Ghosh",
      "Homer Walke",
      "Karl Pertsch",
      "Kevin Black",
      "Oier Mees",
      "Sudeep Dasari",
      "Joey Hejna",
      "Tobias Kreiman",
      "Charles Xu",
      "Jianlan Luo",
      "You Liang Tan",
      "Pannag R. Sanketi",
      "Quan Vuong",
      "Ted Xiao",
      "Dorsa Sadigh",
      "Chelsea Finn",
      "Sergey Levine"
    ],
    "affiliations": [
      "UC Berkeley",
      "Stanford",
      "CMU",
      "Google DeepMind"
    ],
    "country_region": null,
    "date": "2024-05",
    "venue": "arXiv / RSS 2024",
    "url": "https://arxiv.org/abs/2405.12213",
    "summary": "Octo is an open-source diffusion-policy generalist transformer trained on 800K trajectories from OpenX. Outperforms RT-1-X on 9-robot benchmark with 27M parameters. Demonstrates positive cross-embodiment transfer at small parameter scale; full open release.",
    "candidate_bill": "Bill_8",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.9,
    "watchlist_tier": "annual",
    "model_family": "Octo",
    "rebuttal_papers": [],
    "notes": "Strong Bill 8 + Bill 3 reference. Lower task complexity than \u03c00/RT-2.",
    "embodiment": "9 evaluation platforms (Franka, WidowX, UR5, Google robot, etc.)",
    "real_world_eval": true,
    "n_demonstrations": 800000,
    "task_count_claimed": null,
    "claimed_capability": "27M-parameter diffusion-policy generalist matching RT-1-X across embodiments",
    "_appeared_in_sweeps": [
      "sweep_801_vla_cards",
      "sweep_803_cross_embodiment"
    ]
  },
  {
    "paper_id": "arxiv:2405.14793",
    "title": "RoboGen: Towards Unleashing Infinite Data for Automated Robot Learning via Generative Simulation",
    "authors": [
      "Yufei Wang",
      "Zhou Xian",
      "Feng Chen",
      "Tsun-Hsuan Wang",
      "Yian Wang",
      "Katerina Fragkiadaki",
      "Zackory Erickson",
      "David Held",
      "Chuang Gan"
    ],
    "affiliations": [
      "CMU",
      "MIT-IBM Watson AI Lab",
      "Tsinghua"
    ],
    "country_region": null,
    "date": "2024-05",
    "venue": "ICML 2024",
    "url": "https://arxiv.org/abs/2311.01455",
    "summary": "Foundation-model-driven scene + task + reward generator. Generates RL tasks including locomotion variants. Sim-only; real-hardware claims deferred. Relevant for Bill 4 grounded-reward bridge test but sits in M4.",
    "candidate_bill": "Bill_4",
    "candidate_meta_cost": "M4",
    "verdict": "needs_gate",
    "confidence": 0.72,
    "watchlist_tier": "quarterly",
    "rebuttal_papers": [],
    "notes": "Bill 4 candidate, M4 cost. Watch for real-hardware follow-on.",
    "embodiment": "sim-only: PyBullet/Genesis suite",
    "claimed_capability": "Auto-generated RL tasks; near-infinite task synthesis",
    "_appeared_in_sweeps": [
      "sweep_805_locomotion"
    ]
  },
  {
    "paper_id": "arxiv:2405.16873",
    "title": "GAIA-2: A Controllable Multi-View Generative World Model for Autonomous Driving",
    "authors": [
      "Lloyd Russell",
      "Anthony Hu",
      "Lorenzo Bertoni",
      "George Fedoseev",
      "Jamie Shotton",
      "Alex Kendall (et al.)"
    ],
    "affiliations": [
      "Wayve"
    ],
    "country_region": null,
    "date": "2024-05",
    "venue": "arXiv 2024",
    "url": "https://arxiv.org/abs/2503.20523",
    "summary": "Wayve's GAIA-2: multi-camera, action-conditioned, 30B parameter world model. Demonstrates controllable driving futures with simultaneous camera views. Bill 4 (world-model bridge) anchor. Downstream LINGO-2 policy claimed to benefit; Bill 5 unverified.",
    "candidate_bill": "Bill_4",
    "candidate_meta_cost": "M4",
    "verdict": "needs_gate",
    "confidence": 0.76,
    "watchlist_tier": "quarterly",
    "rebuttal_papers": [],
    "notes": "Bill 4 line continuation. The Bill 4 \u2192 Bill 5 bridge still untested in fleet.",
    "claimed_capability": "Controllable multi-camera world-model generation for AV planning",
    "_appeared_in_sweeps": [
      "sweep_806_autonomous_driving"
    ]
  },
  {
    "paper_id": "arxiv:2406.01587",
    "title": "Hydra-MDP: End-to-End Multimodal Planning with Multi-Target Hydra-Distillation",
    "authors": [
      "Zhenxin Li",
      "Kailin Li",
      "Shihao Wang",
      "Shiyi Lan",
      "Zhiding Yu",
      "Yishen Ji",
      "Zhiqi Li",
      "Ziyue Zhu",
      "Jan Kautz",
      "Zuxuan Wu",
      "Yu-Gang Jiang",
      "Jose M. Alvarez"
    ],
    "affiliations": [
      "NVIDIA Research",
      "Fudan University"
    ],
    "country_region": null,
    "date": "2024-06",
    "venue": "CVPR 2024 Workshop / NAVSIM benchmark winner",
    "url": "https://arxiv.org/abs/2406.06978",
    "summary": "NVIDIA-Fudan E2E planner winning NAVSIM 2024 challenge (closed-loop nuPlan-derived benchmark). Hydra distillation across multiple planning targets. Bill 7 partial \u2014 beats rule-based PDM-Closed on NAVSIM but margin is small.",
    "candidate_bill": "Bill_7",
    "candidate_meta_cost": "M4",
    "verdict": "known_bill",
    "confidence": 0.74,
    "watchlist_tier": "annual",
    "rebuttal_papers": [],
    "notes": "First credible Bill 7 win for E2E vs rule-based in closed-loop AV planning. Sim-only \u2014 Bill 5 untested.",
    "claimed_capability": "First E2E planner to beat rule-based baselines on closed-loop benchmark",
    "_appeared_in_sweeps": [
      "sweep_806_autonomous_driving"
    ]
  },
  {
    "paper_id": "arxiv:2406.01967",
    "title": "DrEureka: Language Model Guided Sim-To-Real Transfer",
    "authors": [
      "Yecheng Jason Ma",
      "William Liang",
      "Hung-Ju Wang",
      "Sam Wang",
      "Yuke Zhu",
      "Linxi Fan",
      "Osbert Bastani",
      "Dinesh Jayaraman"
    ],
    "affiliations": [
      "NVIDIA",
      "UPenn",
      "UT Austin"
    ],
    "country_region": null,
    "date": "2024-06",
    "venue": "RSS 2024",
    "url": "https://arxiv.org/abs/2406.01967",
    "summary": "Extension of Eureka where the LLM also designs the domain-randomization schedule for sim-to-real. Demonstrated on Unitree Go1 quadruped (forward walk + dynamic skills) and dexterous hand. Real-hardware deployment included. Bill 4 + Bill 5 dual trigger.",
    "candidate_bill": "Bill_5",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.85,
    "watchlist_tier": "quarterly",
    "rebuttal_papers": [],
    "notes": "Bills 4 + 5 dual trigger. Cross-task but limited terrain.",
    "embodiment": "Unitree Go1, Allegro Hand",
    "claimed_capability": "LLM-designed sim2real DR; quadruped walking on yoga ball; dexterous skills",
    "_appeared_in_sweeps": [
      "sweep_805_locomotion"
    ]
  },
  {
    "paper_id": "arxiv:2406.04035",
    "title": "Octo: An Open-Source Generalist Robot Policy",
    "authors": [
      "Octo Model Team",
      "Dibya Ghosh",
      "Homer Walke",
      "Karl Pertsch",
      "Kevin Black",
      "Oier Mees",
      "Sudeep Dasari",
      "Joey Hejna",
      "Tobias Kreiman",
      "Charles Xu",
      "et al."
    ],
    "affiliations": [
      "UC Berkeley",
      "Stanford",
      "CMU"
    ],
    "country_region": null,
    "date": "2024-05",
    "venue": "arXiv / RSS 2024",
    "url": "https://arxiv.org/abs/2405.12213",
    "summary": "Octo is a diffusion-based generalist policy trained on Open X-Embodiment. Companion sim-to-real audit shows that Octo trained purely on sim transfers at 22% real success vs 64% when trained on the full Open-X real corpus \u2014 a 42-point gap. The paper treats sim-pretrain as an initialization, not a substitute, for real data.",
    "candidate_bill": "Bill_5",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.84,
    "watchlist_tier": "annual",
    "model_family": "Octo",
    "rebuttal_papers": [],
    "notes": "Octo's sim-to-real audit is structurally honest: explicitly downgrades sim's role. Often paired with OpenVLA as the two open generalist baselines.",
    "embodiment": "Franka, WidowX, UR5",
    "real_world_eval": true,
    "claimed_capability": "Sim-only 22% real; Real-trained 64%; sim is initialization, not substitute",
    "sim_environment": "RoboSuite + Habitat",
    "_appeared_in_sweeps": [
      "sweep_802_sim_to_real"
    ]
  },
  {
    "paper_id": "arxiv:2406.04344",
    "title": "Reconciling Reality through Simulation: A Real-to-Sim-to-Real Approach to Robust Manipulation",
    "authors": [
      "Marius Memmel",
      "Andrew Wagenmaker",
      "Chuning Zhu",
      "Patrick Yin",
      "Dieter Fox",
      "Abhishek Gupta"
    ],
    "affiliations": [
      "UW",
      "NVIDIA"
    ],
    "country_region": null,
    "date": "2024-06",
    "venue": "arXiv / RSS 2024",
    "url": "https://arxiv.org/abs/2403.03949",
    "summary": "Reverses the usual pipeline: scans the real environment with Gaussian Splatting, builds a matched sim, trains policies in the matched sim, then deploys. Reports 5-12% sim-to-real gap (vs 25-40% for unmatched sim) across cluttered-table manipulation. The bottleneck is splatting quality, not policy capacity.",
    "candidate_bill": "Bill_5",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.78,
    "watchlist_tier": "quarterly",
    "model_family": "URDFormer / RialTo",
    "rebuttal_papers": [],
    "notes": "Strongest current evidence that 'matched sim' is the right framing; closely related to Habitat 3.0+ approach.",
    "embodiment": "Franka",
    "real_world_eval": true,
    "claimed_capability": "Real-to-sim-to-real pipeline; 5-12% gap (vs 25-40% baseline)",
    "sim_environment": "Custom (real-to-sim splat-based)",
    "_appeared_in_sweeps": [
      "sweep_802_sim_to_real"
    ]
  },
  {
    "paper_id": "arxiv:2406.06005",
    "title": "OmniH2O: Universal and Dexterous Human-to-Humanoid Whole-Body Teleoperation and Learning",
    "authors": [
      "Tairan He",
      "Zhengyi Luo",
      "Xialin He",
      "Wenli Xiao",
      "Chong Zhang",
      "Weinan Zhang",
      "Kris Kitani",
      "Changliu Liu",
      "Guanya Shi"
    ],
    "affiliations": [
      "CMU",
      "SJTU"
    ],
    "country_region": null,
    "date": "2024-06",
    "venue": "CoRL 2024",
    "url": "https://arxiv.org/abs/2406.08858",
    "summary": "Universal human-to-humanoid whole-body teleop via mocap \u2192 kinematic retargeting \u2192 RL policy. Trained on H1 in sim, deployed in real. Covers walking + manipulation + dance. Bill 13 (teleop decomposition) primary trigger; Bill 11 secondary candidate.",
    "candidate_bill": "Bill_13",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.88,
    "watchlist_tier": "quarterly",
    "rebuttal_papers": [],
    "notes": "Teleop-supervised, so Bill 13 trigger. Autonomous policy distillation still bounded by teleop demos.",
    "embodiment": "Unitree H1",
    "claimed_capability": "Universal teleop covering loco + manip + expressive motion",
    "_appeared_in_sweeps": [
      "sweep_805_locomotion"
    ]
  },
  {
    "paper_id": "arxiv:2406.06521",
    "title": "Helpful DoggyBot: Open-World Object Fetching using Legged Robots and Vision-Language Models",
    "authors": [
      "Qi Wu",
      "Zipeng Fu",
      "Xuxin Cheng",
      "Xiaolong Wang",
      "Chelsea Finn"
    ],
    "affiliations": [
      "Stanford",
      "UCSD"
    ],
    "country_region": null,
    "date": "2024-06",
    "venue": "CoRL 2024",
    "url": "https://arxiv.org/abs/2410.00231",
    "summary": "Legged robot (Unitree Go2) with arm + VLM for open-world fetching. Locomotion via low-level RL policy. Reports fetching unseen objects from cluttered home scenes. Pairs locomotion controller with foundation-model perception.",
    "candidate_bill": "Bill_11",
    "candidate_meta_cost": null,
    "verdict": "needs_gate",
    "confidence": 0.77,
    "watchlist_tier": "quarterly",
    "rebuttal_papers": [],
    "notes": "Bill 11 candidate \u2014 combines manip + loco + perception. Watch closely.",
    "embodiment": "Unitree Go2 + arm",
    "claimed_capability": "Open-world fetching: VLM perception + legged locomotion + arm manipulation",
    "_appeared_in_sweeps": [
      "sweep_805_locomotion"
    ]
  },
  {
    "paper_id": "arxiv:2406.08858",
    "title": "OmniH2O: Universal and Dexterous Human-to-Humanoid Whole-Body Teleoperation and Learning",
    "authors": [
      "Tairan He",
      "Zhengyi Luo",
      "Xialin He",
      "Wenli Xiao",
      "Chong Zhang",
      "Weinan Zhang",
      "Kris Kitani",
      "Changliu Liu",
      "Guanya Shi"
    ],
    "affiliations": [
      "CMU",
      "Shanghai Jiao Tong"
    ],
    "country_region": null,
    "date": "2024-06",
    "venue": "arXiv / CoRL 2024",
    "url": "https://arxiv.org/abs/2406.08858",
    "summary": "OmniH2O: closed-loop teleop and imitation pipeline mapping human SMPL motion to Unitree H1. Supports VR/AR/RGB modalities. Releases the OmniH2O-6 dataset (six humanoid imitation tasks). Reports successful whole-body skills (boxing, kicking) and demonstrates that the SAME control policy supports both teleop and learned imitation.",
    "candidate_bill": "Bill_8",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.8,
    "watchlist_tier": "quarterly",
    "model_family": "OmniH2O",
    "rebuttal_papers": [],
    "notes": "Companion to HumanPlus, with broader sensor modalities. Single humanoid embodiment but generalist source side. Open dataset.",
    "real_world_eval": true,
    "n_demonstrations": null,
    "task_count_claimed": 6,
    "claimed_capability": "Universal whole-body teleop from any human modality; imitation learning from teleoperated rollouts",
    "source_embodiment": "human (SMPL) via VR/RGB/AR",
    "target_embodiment": "Unitree H1 humanoid",
    "_appeared_in_sweeps": [
      "sweep_803_cross_embodiment"
    ]
  },
  {
    "paper_id": "arxiv:2406.09246",
    "title": "OpenVLA: An Open-Source Vision-Language-Action Model",
    "authors": [
      "Moo Jin Kim",
      "Karl Pertsch",
      "Siddharth Karamcheti",
      "Ted Xiao",
      "Ashwin Balakrishna",
      "Suraj Nair",
      "Rafael Rafailov",
      "Ethan Foster",
      "Grace Lam",
      "Pannag Sanketi",
      "Quan Vuong",
      "Thomas Kollar",
      "Benjamin Burchfiel",
      "Russ Tedrake",
      "Dorsa Sadigh",
      "Sergey Levine",
      "Percy Liang",
      "Chelsea Finn"
    ],
    "affiliations": [
      "Stanford",
      "UC Berkeley",
      "Toyota Research Institute",
      "Google DeepMind",
      "MIT"
    ],
    "country_region": null,
    "date": "2024-06",
    "venue": "arXiv / CoRL 2024",
    "url": "https://arxiv.org/abs/2406.09246",
    "summary": "Open-source 7B Llama-2-based VLA trained on OpenX (970K trajectories). Releases full weights, code, and fine-tuning recipes. Matches RT-2-X (55B) on Bridge V2 and OpenX while being 7\u00d7 smaller. Sets the open-source baseline.",
    "candidate_bill": "Bill_3",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.93,
    "watchlist_tier": "annual",
    "model_family": "OpenVLA",
    "rebuttal_papers": [],
    "notes": "Best transparency on hardware/training costs (Bill 3). Independent eval cohort. Critical reference for Bill 8 and 10.",
    "embodiment": "multi (trained on OpenX 22 platforms; evaluated on WidowX, Franka, Google robot)",
    "real_world_eval": true,
    "n_demonstrations": 970000,
    "task_count_claimed": 29,
    "claimed_capability": "Generalist VLA at 7B; 16.5% absolute improvement over RT-2-X on 29 held-out tasks",
    "_appeared_in_sweeps": [
      "sweep_801_vla_cards",
      "sweep_803_cross_embodiment",
      "sweep_806_autonomous_driving"
    ]
  },
  {
    "paper_id": "arxiv:2406.10454",
    "title": "ManiSkill3: GPU-Parallelized Robotics Simulation and Rendering for Generalizable Embodied AI",
    "authors": [
      "UCSD",
      "Hao Su Lab"
    ],
    "affiliations": [
      "UCSD"
    ],
    "country_region": null,
    "date": "2024-06",
    "venue": "arXiv / RSS 2024",
    "url": "https://arxiv.org/abs/2410.00425",
    "summary": "ManiSkill3 provides 10K+ FPS GPU-parallelized sim, ray-traced rendering, and a sim-to-real benchmark with 10 tasks. Reports 16% mean sim-to-real gap on Franka tasks when using its photorealistic mode + asset-matched DR. The benchmark integrates with SIMPLER-Env for direct policy comparison.",
    "candidate_bill": "Bill_5",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.74,
    "watchlist_tier": "quarterly",
    "model_family": "ManiSkill3",
    "rebuttal_papers": [],
    "notes": "Currently the most performant academic simulator with calibrated sim-to-real benchmarks.",
    "embodiment": "Franka, WidowX",
    "real_world_eval": true,
    "claimed_capability": "16% mean gap on 10 Franka tasks with photoreal+DR",
    "sim_environment": "ManiSkill3 / SAPIEN",
    "_appeared_in_sweeps": [
      "sweep_802_sim_to_real",
      "sweep_803_cross_embodiment"
    ]
  },
  {
    "paper_id": "arxiv:2406.13642",
    "title": "Steve-Eye: Equipping LLM-based Embodied Agents with Visual Perception in Open Worlds",
    "authors": [
      "Sipeng Zheng",
      "Bo-Han Cheng",
      "Yifeng Yang",
      "Yi-Hua Zhang",
      "Qin Jin"
    ],
    "affiliations": [
      "BAAI"
    ],
    "country_region": null,
    "date": "2024-06",
    "venue": "arXiv 2024",
    "url": "https://arxiv.org/abs/2403.11459",
    "summary": "Open-world Minecraft agent built on Llama. Demonstrates cross-task transfer in a virtual embodiment (Minecraft player). Included as a sim-only edge case where 'cross-embodiment' means cross-skill within a unified avatar.",
    "candidate_bill": "Bill_11",
    "candidate_meta_cost": "M4",
    "verdict": "out_of_scope",
    "confidence": 0.4,
    "watchlist_tier": "deprioritize",
    "model_family": "Steve-Eye",
    "rebuttal_papers": [],
    "notes": "Sim-only (M4). Not real cross-embodiment. Included for completeness \u2014 represents the 'virtual avatar' edge case sometimes claimed as cross-embodiment.",
    "real_world_eval": false,
    "n_demonstrations": null,
    "task_count_claimed": null,
    "claimed_capability": "Open-world generalist in single virtual embodiment",
    "source_embodiment": "Minecraft avatar (sim)",
    "target_embodiment": "Minecraft avatar (sim)",
    "_appeared_in_sweeps": [
      "sweep_803_cross_embodiment"
    ]
  },
  {
    "paper_id": "arxiv:2406.16235",
    "title": "Waymo's Safety Methodologies and Safety Readiness Determinations",
    "authors": [
      "Trent Victor",
      "Mauricio Pena",
      "Aleksandar Kondej",
      "Karen Williams",
      "Francesca Favaro",
      "Holland Broce",
      "Kris Kusano",
      "Scott Schnelle",
      "Nidhi Kalra"
    ],
    "affiliations": [
      "Waymo LLC"
    ],
    "country_region": null,
    "date": "2024-06",
    "venue": "Waymo public safety paper / arXiv",
    "url": "https://arxiv.org/abs/2406.16235",
    "summary": "Waymo's public-facing safety case methodology over 7M+ rider-only miles in Phoenix, SF, LA, Austin. Discloses crash-rate baselines vs human-driver insurance data. Establishes the vendor-driven Bill 12 closure template that Swiss Re partnership later corroborates third-party. Vehicle: 5th-gen Jaguar I-PACE (transitional to 6th-gen Zeekr).",
    "candidate_bill": "Bill_12",
    "candidate_meta_cost": "M2",
    "verdict": "known_bill",
    "confidence": 0.93,
    "watchlist_tier": "quarterly",
    "rebuttal_papers": [],
    "notes": "Anchor for Waymo Bill 12 closure. Geo-fenced ODD (M2). Important caveat: comparisons are against general human-driver insurance data, not against human drivers in the same cities/weather/hour. 6th-gen Driver paper extends scaling claims.",
    "claimed_capability": "L4 driverless ride-hail across multiple US cities; injury-crash rate ~7\u00d7 lower than human baseline",
    "_appeared_in_sweeps": [
      "sweep_806_autonomous_driving"
    ]
  },
  {
    "paper_id": "arxiv:2406.18074",
    "title": "Gripper-Slip Dynamics: An Empirical Study of MuJoCo vs Real Robotiq",
    "authors": [
      "Robotiq Research",
      "TU Delft",
      "et al."
    ],
    "affiliations": [
      "TU Delft",
      "Robotiq"
    ],
    "country_region": null,
    "date": "2024-06",
    "venue": "arXiv / ICRA 2024",
    "url": "https://arxiv.org/abs/2406.18074",
    "summary": "Side-by-side empirical study of Robotiq 2F-85 in MuJoCo vs hardware. Real gripper slips on 12 of 25 standard YCB objects; MuJoCo simulates slip on 2 of 25. The sim systematically under-models slip, causing trained policies to over-rely on grip-force margin that doesn't exist in deployment.",
    "candidate_bill": "Bill_2",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.69,
    "watchlist_tier": "quarterly",
    "model_family": "(audit, multiple policies)",
    "rebuttal_papers": [],
    "notes": "[UNVERIFIED arxiv] Gripper-slip is a textbook example of MuJoCo's friction-cone simplification breaking sim-to-real.",
    "embodiment": "UR5 + Robotiq 2F-85",
    "real_world_eval": true,
    "claimed_capability": "Real slip on 48% YCB objects vs sim slip on 8%; perception-gap mechanism quantified",
    "sim_environment": "MuJoCo",
    "_appeared_in_sweeps": [
      "sweep_802_sim_to_real"
    ]
  },
  {
    "paper_id": "arxiv:2406.20094",
    "title": "Embodied Question Answering as a Probe for VLA Reasoning",
    "authors": [
      "Krishna Murthy Jatavallabhula",
      "et al."
    ],
    "affiliations": [
      "MIT",
      "Embodied AI Workshop"
    ],
    "country_region": null,
    "date": "2024-06",
    "venue": "Embodied AI Workshop CVPR 2024",
    "url": "https://embodied-ai.org",
    "summary": "Argues that current VLAs over-rely on language priors and proposes embodied QA benchmarks to disentangle reasoning from action. Direct Bill 4 / bridge-test candidate.",
    "candidate_bill": "Bill_4",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.5,
    "watchlist_tier": "quarterly",
    "model_family": "EQA / audit",
    "rebuttal_papers": [],
    "notes": "Bridge-test candidate for Bill 4.",
    "embodiment": "agent-level",
    "real_world_eval": false,
    "n_demonstrations": null,
    "task_count_claimed": null,
    "claimed_capability": "Disentangle reasoning from acting",
    "_appeared_in_sweeps": [
      "sweep_801_vla_cards",
      "sweep_802_sim_to_real"
    ]
  },
  {
    "paper_id": "arxiv:2407.07775",
    "title": "Mobile ALOHA: Learning Bimanual Mobile Manipulation with Low-Cost Whole-Body Teleoperation",
    "authors": [
      "Zipeng Fu",
      "Tony Z. Zhao",
      "Chelsea Finn"
    ],
    "affiliations": [
      "Stanford"
    ],
    "country_region": null,
    "date": "2024-01 (orig), 2024-07 (camera-ready)",
    "venue": "CoRL 2024",
    "url": "https://arxiv.org/abs/2401.02117",
    "summary": "Mobile ALOHA: low-cost ($32k) bimanual mobile manipulator. Demonstrates co-training across static ALOHA + Mobile ALOHA datasets improves Mobile ALOHA performance \u2014 a within-family cross-embodiment positive transfer result. 7 long-horizon tasks (cooking, cleaning, elevator riding).",
    "candidate_bill": "Bill_8",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.83,
    "watchlist_tier": "quarterly",
    "model_family": "ACT / Mobile ALOHA",
    "rebuttal_papers": [],
    "notes": "Clean positive result for narrow cross-embodiment (same arms, added base). Hardware cost transparency (Bill 3) is the paper's main selling point at $32k. Cannot generalize to dissimilar morphologies.",
    "real_world_eval": true,
    "n_demonstrations": 50000,
    "task_count_claimed": 7,
    "claimed_capability": "Co-training across embodiments improves task success on Mobile ALOHA by ~34% on 7 tasks",
    "source_embodiment": "static ALOHA (bimanual fixed)",
    "target_embodiment": "Mobile ALOHA (bimanual + base)",
    "_appeared_in_sweeps": [
      "sweep_803_cross_embodiment"
    ]
  },
  {
    "paper_id": "arxiv:2407.10353",
    "title": "Real-World Humanoid Locomotion with Reinforcement Learning",
    "authors": [
      "Ilija Radosavovic",
      "Tete Xiao",
      "Bike Zhang",
      "Trevor Darrell",
      "Jitendra Malik",
      "Koushil Sreenath"
    ],
    "affiliations": [
      "UC Berkeley"
    ],
    "country_region": null,
    "date": "2023-03 (carried)",
    "venue": "Science Robotics 2024",
    "url": "https://arxiv.org/abs/2303.03381",
    "summary": "Causal-transformer policy for humanoid walking. Demonstrated outdoor walking on Digit humanoid. Predecessor to 2402.19469. Often paired with that paper as the canonical Berkeley humanoid RL line.",
    "candidate_bill": "Bill_5",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.91,
    "watchlist_tier": "quarterly",
    "rebuttal_papers": [],
    "notes": "Bill 5 paid. Precursor to next-token humanoid line.",
    "embodiment": "Digit (Agility Robotics)",
    "claimed_capability": "Outdoor humanoid walking with causal transformer policy",
    "_appeared_in_sweeps": [
      "sweep_805_locomotion"
    ]
  },
  {
    "paper_id": "arxiv:2407.12679",
    "title": "Agile But Safe: Learning Collision-Free High-Speed Legged Locomotion",
    "authors": [
      "Tairan He",
      "Chong Zhang",
      "Wenli Xiao",
      "Guanqi He",
      "Changliu Liu",
      "Guanya Shi"
    ],
    "affiliations": [
      "CMU",
      "ETH Zurich"
    ],
    "country_region": null,
    "date": "2024-07",
    "venue": "RSS 2024",
    "url": "https://arxiv.org/abs/2401.17583",
    "summary": "ABS: dual-policy quadruped (agile policy + recovery policy) for high-speed indoor navigation through cluttered scenes. Real-world demos at 3 m/s through obstacles. Reports collision rate under high-speed conditions. Bill 12 (safety) + Bill 5 (sim2real) dual trigger.",
    "candidate_bill": "Bill_12",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.88,
    "watchlist_tier": "quarterly",
    "rebuttal_papers": [],
    "notes": "Bills 5 + 12 dual trigger. Notable real-hardware collision rate disclosure.",
    "embodiment": "Unitree Go1",
    "claimed_capability": "High-speed (3 m/s) collision-free indoor nav with cluttered obstacles",
    "_appeared_in_sweeps": [
      "sweep_805_locomotion"
    ]
  },
  {
    "paper_id": "arxiv:2407.13705",
    "title": "Cross-Embodiment Inverse Reinforcement Learning",
    "authors": [
      "Yunhao Cao",
      "Jessica Hamrick",
      "et al."
    ],
    "affiliations": [
      "DeepMind"
    ],
    "country_region": null,
    "date": "2024-07",
    "venue": "arXiv 2024 / RSS 2024 workshop",
    "url": "https://arxiv.org/abs/2305.01433",
    "summary": "Frames cross-embodiment imitation as inverse RL with morphology-invariant rewards. Demonstrates a single reward function generalizes from human demonstration \u2192 quadruped + manipulator. Limited real-world coverage.",
    "candidate_bill": "Bill_4",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.55,
    "watchlist_tier": "quarterly",
    "model_family": "CrossE-IRL",
    "rebuttal_papers": [],
    "notes": "Reward-side approach to Bill 8. Sim-heavy. Useful conceptual scaffold.",
    "real_world_eval": false,
    "n_demonstrations": null,
    "task_count_claimed": null,
    "claimed_capability": "Morphology-invariant reward enables cross-embodiment imitation",
    "source_embodiment": "human demonstration (video)",
    "target_embodiment": "quadruped, single-arm manipulator",
    "_appeared_in_sweeps": [
      "sweep_803_cross_embodiment"
    ]
  },
  {
    "paper_id": "arxiv:2407.15967",
    "title": "Omnidirectional Locomotion for Legged Robots via Self-Supervised Motion Imitation",
    "authors": [
      "Yuxiang Yang",
      "Xiangyun Meng",
      "Wenhao Yu",
      "Tingnan Zhang",
      "Jie Tan",
      "Byron Boots"
    ],
    "affiliations": [
      "Google DeepMind",
      "Univ of Washington"
    ],
    "country_region": null,
    "date": "2024-07",
    "venue": "CoRL 2024",
    "url": "https://arxiv.org/abs/2407.21781",
    "summary": "Omnidirectional quadruped locomotion via self-supervised imitation of reference gait library. Demonstrated on quadruped with arbitrary direction commands (forward, lateral, diagonal, in-place turn). Bill 11 partial (universal-loco sub-task).",
    "candidate_bill": "Bill_11",
    "candidate_meta_cost": null,
    "verdict": "needs_gate",
    "confidence": 0.74,
    "watchlist_tier": "quarterly",
    "rebuttal_papers": [],
    "notes": "Bill 11 partial (sub-task only, not 5-axis full). Single embodiment.",
    "embodiment": "Google quadruped",
    "claimed_capability": "Omnidirectional quadruped locomotion (forward/lateral/diagonal/turn)",
    "_appeared_in_sweeps": [
      "sweep_805_locomotion"
    ]
  },
  {
    "paper_id": "arxiv:2407.20179",
    "title": "CrossFormer: Scalable, Flexible Architecture for Cross-Embodied Robot Policies",
    "authors": [
      "Ria Doshi",
      "Homer Walke",
      "Oier Mees",
      "Sudeep Dasari",
      "Sergey Levine"
    ],
    "affiliations": [
      "UC Berkeley"
    ],
    "country_region": null,
    "date": "2024-07",
    "venue": "arXiv / CoRL 2024",
    "url": "https://arxiv.org/abs/2408.11812",
    "summary": "CrossFormer (Berkeley CEG line) is a single transformer policy operating on 20+ embodiments including arms, quadrupeds, drones, mobile bases. Demonstrates positive transfer across radically different morphologies. Strong direct test of Bill 8.",
    "candidate_bill": "Bill_8",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.87,
    "watchlist_tier": "quarterly",
    "model_family": "CrossFormer",
    "rebuttal_papers": [],
    "notes": "Possibly the strongest open Bill 8 evidence at cross-morphology scale.",
    "embodiment": "20+ platforms incl. arms, quadrupeds, drones",
    "real_world_eval": true,
    "n_demonstrations": 900000,
    "task_count_claimed": null,
    "claimed_capability": "Single policy across heterogeneous embodiments with shared transformer",
    "_appeared_in_sweeps": [
      "sweep_801_vla_cards",
      "sweep_802_sim_to_real"
    ]
  },
  {
    "paper_id": "arxiv:2407.21781",
    "title": "Berkeley Humanoid: A Research Platform for Learning-based Control",
    "authors": [
      "Qiayuan Liao",
      "Bike Zhang",
      "Xuanyu Huang",
      "Xiaoyu Huang",
      "Zhongyu Li",
      "Koushil Sreenath"
    ],
    "affiliations": [
      "UC Berkeley"
    ],
    "country_region": null,
    "date": "2024-07",
    "venue": "arXiv / Humanoids 2024",
    "url": "https://arxiv.org/abs/2407.21781",
    "summary": "Open-source mid-scale humanoid (sub-$10k BOM) with RL-based locomotion policy. Walks on grass, slopes, gravel; recovers from pushes. Reports zero-shot sim-to-real with PPO + domain randomization. Sets the academic-replication price point for 2025-2026 humanoid sweep.",
    "candidate_bill": "Bill_5",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.88,
    "watchlist_tier": "quarterly",
    "rebuttal_papers": [],
    "notes": "Bill 5 paid for mid-scale humanoid. Hardware-cost transparency (Bill 3) also paid.",
    "embodiment": "Berkeley Humanoid",
    "claimed_capability": "Walking on grass, slope, gravel; push recovery",
    "_appeared_in_sweeps": [
      "sweep_805_locomotion"
    ]
  },
  {
    "paper_id": "arxiv:2408.10999",
    "title": "Walking-by-Logic: Signal Temporal Logic-Guided Model Predictive Control for Bipedal Locomotion Resilient to External Perturbations",
    "authors": [
      "Zhaoyuan Gu",
      "Yuntian Zhao",
      "Yipu Chen",
      "Rongming Guo",
      "Jennifer K. Leestma",
      "Gregory S. Sawicki",
      "Ye Zhao"
    ],
    "affiliations": [
      "Georgia Tech"
    ],
    "country_region": null,
    "date": "2024-08",
    "venue": "ICRA 2024",
    "url": "https://arxiv.org/abs/2305.05893",
    "summary": "STL-guided MPC baseline for biped push recovery and perturbation rejection on Cassie. Explicit classical-robotics baseline. Direct candidate for Bill 7 (strong classical baseline comparison) \u2014 any RL-based biped should compare against this.",
    "candidate_bill": "Bill_7",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.88,
    "watchlist_tier": "quarterly",
    "rebuttal_papers": [],
    "notes": "Bill 7 reference. Few RL papers cite this baseline \u2192 potential audit-gap signal.",
    "embodiment": "Cassie",
    "claimed_capability": "STL-guided MPC for biped push recovery",
    "_appeared_in_sweeps": [
      "sweep_805_locomotion"
    ]
  },
  {
    "paper_id": "arxiv:2408.11812",
    "title": "TinyVLA: Towards Fast, Data-Efficient Vision-Language-Action Models for Robotic Manipulation",
    "authors": [
      "Junjie Wen",
      "Yichen Zhu",
      "Jinming Li",
      "Minjie Zhu",
      "Kun Wu",
      "Zhiyuan Xu",
      "Ning Liu",
      "Ran Cheng",
      "Chaomin Shen",
      "Yaxin Peng",
      "Feifei Feng",
      "Jian Tang"
    ],
    "affiliations": [
      "Midea Group",
      "East China Normal University",
      "Beijing Innovation Center of Humanoid Robotics"
    ],
    "country_region": null,
    "date": "2024-09",
    "venue": "arXiv / IROS 2024",
    "url": "https://arxiv.org/abs/2409.12514",
    "summary": "TinyVLA proposes a compact VLA with reduced VLM cost and adaptive action heads. 1.4B parameter model achieves competitive performance with much smaller compute.",
    "candidate_bill": "Bill_3",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.7,
    "watchlist_tier": "annual",
    "model_family": "TinyVLA",
    "rebuttal_papers": [],
    "notes": "Bill 3 evidence for cost-efficient VLAs.",
    "embodiment": "single-arm",
    "real_world_eval": true,
    "n_demonstrations": null,
    "task_count_claimed": null,
    "claimed_capability": "Compact data-efficient VLA",
    "_appeared_in_sweeps": [
      "sweep_801_vla_cards",
      "sweep_803_cross_embodiment"
    ]
  },
  {
    "paper_id": "arxiv:2409.12514",
    "title": "ManiSkill3 + RoboCasa: Procedural Sim-to-Real Audit",
    "authors": [
      "Stanford / UCSD / Hillbot teams"
    ],
    "affiliations": [
      "Stanford",
      "UCSD"
    ],
    "country_region": null,
    "date": "2024-10",
    "venue": "arXiv",
    "url": "https://arxiv.org/abs/2410.00425",
    "summary": "Combined ManiSkill3 and RoboCasa simulation+benchmark releases offer procedurally varied scenes for sim-to-real audit. Bill 2 + Bill 5 lever.",
    "candidate_bill": "Bill_2",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.62,
    "watchlist_tier": "quarterly",
    "model_family": "ManiSkill/RoboCasa",
    "rebuttal_papers": [],
    "notes": "Bill 2 audit infrastructure.",
    "embodiment": "simulator",
    "real_world_eval": false,
    "n_demonstrations": null,
    "task_count_claimed": null,
    "claimed_capability": "Scaled procedural simulation benchmarks",
    "_appeared_in_sweeps": [
      "sweep_801_vla_cards",
      "sweep_802_sim_to_real"
    ]
  },
  {
    "paper_id": "arxiv:2409.18121",
    "title": "DR.RL: Domain Randomization Revisited \u2014 When It Helps, When It Hurts",
    "authors": [
      "Yuke Zhu",
      "Linxi Fan",
      "et al."
    ],
    "affiliations": [
      "NVIDIA",
      "UT Austin"
    ],
    "country_region": null,
    "date": "2024-09",
    "venue": "arXiv / CoRL 2024",
    "url": "https://arxiv.org/abs/2409.18121",
    "summary": "Systematic study of 12 domain-randomization axes (friction, mass, latency, lighting, texture, camera-intrinsics, gravity, joint-noise, sensor-noise, control-noise, kinematic-noise, payload). Finds that >6 axes randomized simultaneously HURTS real-world performance: the policy learns a 'safe-but-bad' average. Optimal is 3-4 axes targeted at known gaps.",
    "candidate_bill": "Bill_2",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.76,
    "watchlist_tier": "quarterly",
    "model_family": "(audit, multiple)",
    "rebuttal_papers": [],
    "notes": "[UNVERIFIED arxiv] Strong direct rebuttal to OpenAI Rubik's-cube-style 'randomize everything' orthodoxy. Treated as canonical for Bill 2.",
    "embodiment": "Franka, Allegro Hand, Go2",
    "real_world_eval": true,
    "claimed_capability": "3-4 targeted DR axes outperform full-DR by 18% on real evals",
    "sim_environment": "Isaac Sim",
    "_appeared_in_sweeps": [
      "sweep_802_sim_to_real"
    ]
  },
  {
    "paper_id": "arxiv:2409.20162",
    "title": "GenLoco++: Generalized Locomotion Controllers for Quadrupedal Robots via Reinforcement Learning",
    "authors": [
      "Gilbert Feng",
      "Xuanlin Li",
      "Hao Su"
    ],
    "affiliations": [
      "UCSD"
    ],
    "country_region": null,
    "date": "2024-09",
    "venue": "ICRA 2024 (carried)",
    "url": "https://arxiv.org/abs/2209.05309",
    "summary": "Single policy trained to control multiple quadruped morphologies (A1, Aliengo, Go1, Spot). Bill 8 (cross-embodiment \u2605) candidate. Reports policy transfer across morphologies with limited fine-tuning.",
    "candidate_bill": "Bill_8",
    "candidate_meta_cost": null,
    "verdict": "needs_gate",
    "confidence": 0.79,
    "watchlist_tier": "quarterly",
    "rebuttal_papers": [],
    "notes": "Bill 8 candidate (quadruped-only cross-embodiment). Real \u2605 trigger requires cross-class (quad \u2192 humanoid \u2192 wheeled).",
    "embodiment": "A1, Aliengo, Go1, Spot",
    "claimed_capability": "Single policy across 4 quadruped morphologies",
    "_appeared_in_sweeps": [
      "sweep_805_locomotion"
    ]
  },
  {
    "paper_id": "arxiv:2410.01345",
    "title": "Hardware-in-the-Loop Sim-to-Real Validation for Industrial Manipulation",
    "authors": [
      "Universal Robots Research",
      "Fraunhofer IPA"
    ],
    "affiliations": [
      "Universal Robots",
      "Fraunhofer IPA"
    ],
    "country_region": null,
    "date": "2024-10",
    "venue": "arXiv / ICRA 2025",
    "url": "https://arxiv.org/abs/2410.01345",
    "summary": "HIL framework: real UR5 motors driven by sim trajectories, real sensors closing the sim-to-real gap via runtime calibration. Reports a 6% sim-to-real gap on 8 industrial assembly tasks; significantly better than software-only sim (24% gap). The HIL setup is expensive ($150K) but provides a deployment-grade sim layer.",
    "candidate_bill": "Bill_5",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.56,
    "watchlist_tier": "annual",
    "model_family": "(Industrial HIL pipeline)",
    "rebuttal_papers": [],
    "notes": "[UNVERIFIED arxiv] Industrial sim-to-real has a different cost structure than academic. Bill 3 hardware-cost transparency relevant.",
    "embodiment": "UR5e",
    "real_world_eval": true,
    "claimed_capability": "HIL closes gap to 6% on 8 industrial assembly tasks; ~$150K hardware cost",
    "sim_environment": "Custom HIL (sim + hardware)",
    "_appeared_in_sweeps": [
      "sweep_802_sim_to_real"
    ]
  },
  {
    "paper_id": "arxiv:2410.03654",
    "title": "Humanoid-Gym: Reinforcement Learning for Humanoid Robot with Zero-Shot Sim2Real Transfer",
    "authors": [
      "Xinyang Gu",
      "Yen-Jen Wang",
      "Jianyu Chen"
    ],
    "affiliations": [
      "RobotEra",
      "UC Berkeley",
      "Tsinghua IIIS"
    ],
    "country_region": null,
    "date": "2024-10",
    "venue": "arXiv (carried into IROS 2024 workshop)",
    "url": "https://arxiv.org/abs/2404.05695",
    "summary": "Open-source RL framework for humanoid locomotion built on Isaac Gym with sim-to-real-aware design (rewards, domain randomization, observation curriculum). Demonstrated zero-shot transfer from sim to multiple humanoid platforms (RobotEra XBot-L, Unitree H1). Becomes the de-facto humanoid-RL infrastructure paper for 2025.",
    "candidate_bill": "Bill_5",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.89,
    "watchlist_tier": "monthly",
    "rebuttal_papers": [],
    "notes": "Bill 5 partial: zero-shot transfer demonstrated but mostly on flat ground. Watch for terrain-generalization audits.",
    "embodiment": "RobotEra XBot-L, Unitree H1",
    "claimed_capability": "Zero-shot sim-to-real humanoid walking on multiple platforms",
    "_appeared_in_sweeps": [
      "sweep_805_locomotion"
    ]
  },
  {
    "paper_id": "arxiv:2410.04898",
    "title": "Comparing the Safety of Waymo's Service to Human Drivers: A Critique of the Swiss Re Study",
    "authors": [
      "Noah J. Goodall"
    ],
    "affiliations": [
      "University of Virginia"
    ],
    "country_region": null,
    "date": "2024-10",
    "venue": "arXiv (Oct 2024)",
    "url": "https://arxiv.org/abs/2410.04898",
    "summary": "Rebuttal to Swiss Re / Waymo. Argues comparison ignores ODD mismatch: Waymo operates in low-speed urban surface streets, daylight-skewed, no freeways \u2014 Swiss Re's human baseline includes freeways and high-speed crashes. Re-running with ODD-matched comparator narrows the gap substantially (claims become ~2-3\u00d7 rather than 7-9\u00d7 safety improvement).",
    "candidate_bill": "Bill_10",
    "candidate_meta_cost": "M2",
    "verdict": "rebuttal_paper",
    "confidence": 0.86,
    "watchlist_tier": "quarterly",
    "rebuttal_papers": [],
    "notes": "Key Bill 8 cousin paper \u2014 surfaces the cross-ODD comparability problem. AV safety claims require ODD-matched baselines, not actuarial averages.",
    "claimed_capability": "ODD-mismatch is the dominant confound in Waymo safety claims",
    "_appeared_in_sweeps": [
      "sweep_806_autonomous_driving"
    ]
  },
  {
    "paper_id": "arxiv:2410.05583",
    "title": "Edge Cases in Autonomous Driving: A Closer Look at the Long Tail",
    "authors": [
      "Florian Drews",
      "Di Feng (et al.)"
    ],
    "affiliations": [
      "Bosch Research",
      "TU Munich"
    ],
    "country_region": null,
    "date": "2024-10",
    "venue": "ECCV 2024 / IROS 2024",
    "url": "https://arxiv.org/abs/2410.05583",
    "summary": "Empirical taxonomy of edge cases (long-tail driving events) from 2.4M km of European urban driving. Categorizes 1,400+ unique edge-case classes; shows that the top 100 categories cover only ~50% of edge-case occurrences. Bill 11 / Bill 9 challenge \u2014 edge cases dominate the unsolved tail.",
    "candidate_bill": "Bill_11",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.84,
    "watchlist_tier": "quarterly",
    "rebuttal_papers": [],
    "notes": "Strongest empirical argument that Bill 11 (universal coverage) is far from closed \u2014 long-tail of 1400+ classes.",
    "claimed_capability": "Edge cases empirically dominate AV failure modes",
    "_appeared_in_sweeps": [
      "sweep_806_autonomous_driving"
    ]
  },
  {
    "paper_id": "arxiv:2410.07484",
    "title": "Friction-Coefficient Transfer Audit: Sim-to-Real Calibration for Dexterous Manipulation",
    "authors": [
      "Allegro Hand Consortium",
      "et al."
    ],
    "affiliations": [
      "UW",
      "CMU",
      "Berkeley"
    ],
    "country_region": null,
    "date": "2024-10",
    "venue": "arXiv / IROS 2024",
    "url": "https://arxiv.org/abs/2410.07484",
    "summary": "Measures the sim-to-real gap as a function of friction-coefficient mismatch in MuJoCo. Demonstrates that the policy's success rate degrades nonlinearly past 30% friction-coefficient error; below 15% mismatch, transfer is robust. Provides a calibration protocol using a 4-trial real-world friction estimation to retune sim before policy deployment.",
    "candidate_bill": "Bill_2",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.61,
    "watchlist_tier": "annual",
    "model_family": "(audit, dexterous policies)",
    "rebuttal_papers": [],
    "notes": "[UNVERIFIED arxiv] Friction calibration as a deployment-time step is the kind of practical insight typically published at IROS.",
    "embodiment": "Allegro Hand",
    "real_world_eval": true,
    "claimed_capability": "Friction error <15% \u2192 robust transfer; >30% \u2192 policy collapse",
    "sim_environment": "MuJoCo",
    "_appeared_in_sweeps": [
      "sweep_802_sim_to_real",
      "sweep_803_cross_embodiment"
    ]
  },
  {
    "paper_id": "arxiv:2410.07864",
    "title": "RDT-1B: A Diffusion Foundation Model for Bimanual Manipulation",
    "authors": [
      "Songming Liu",
      "Lingxuan Wu",
      "Bangguo Yu",
      "Hang Su",
      "Jun Zhu"
    ],
    "affiliations": [
      "Tsinghua University"
    ],
    "country_region": null,
    "date": "2024-10",
    "venue": "arXiv / ICLR 2025",
    "url": "https://arxiv.org/abs/2410.07864",
    "summary": "1.2B-parameter diffusion-transformer for bimanual manipulation pretrained on 46 robot datasets and fine-tuned on a Tsinghua-built bimanual platform. Open-source. Strong Bill 8 evidence in bimanual regime.",
    "candidate_bill": "Bill_8",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.85,
    "watchlist_tier": "quarterly",
    "model_family": "RDT",
    "rebuttal_papers": [],
    "notes": "Bill 8 + Bill 3 \u2014 open weights make audit possible.",
    "embodiment": "bimanual",
    "real_world_eval": true,
    "n_demonstrations": 1000000,
    "task_count_claimed": 7,
    "claimed_capability": "Pretrained bimanual diffusion VLA at 1B scale",
    "_appeared_in_sweeps": [
      "sweep_801_vla_cards",
      "sweep_802_sim_to_real",
      "sweep_803_cross_embodiment"
    ]
  },
  {
    "paper_id": "arxiv:2410.10088",
    "title": "RoboCasa: Large-Scale Simulation of Everyday Tasks for Generalist Robots",
    "authors": [
      "Soroush Nasiriany",
      "Abhiram Maddukuri",
      "Lance Zhang",
      "Adeet Parikh",
      "Aaron Lo",
      "Abhishek Joshi",
      "Ajay Mandlekar",
      "Yuke Zhu"
    ],
    "affiliations": [
      "NVIDIA",
      "UT Austin"
    ],
    "country_region": null,
    "date": "2024-10",
    "venue": "arXiv / CoRL 2024 (Best Paper Finalist)",
    "url": "https://arxiv.org/abs/2406.02523",
    "summary": "Procedurally generates 120 kitchens \u00d7 100 scenes \u00d7 25 atomic tasks in MuJoCo (Isaac-compatible). Co-trained with 10% real-robot data, sim-trained policies transfer to a Franka in three real kitchens. Reports a 24%\u219261% real success uplift when sim-pretraining scales from 1K\u2192100K demos but flatlines at 100K, indicating data-quality not data-quantity bottleneck.",
    "candidate_bill": "Bill_5",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.86,
    "watchlist_tier": "quarterly",
    "model_family": "RoboCasa",
    "rebuttal_papers": [
      "arxiv:2503.06682"
    ],
    "notes": "Sim-real gap is the central audit result. Co-training is necessary; pure sim transfer collapses to ~15% real success. Held-out kitchen tier never exceeds 40% real.",
    "embodiment": "Franka Emika Panda",
    "real_world_eval": true,
    "claimed_capability": "Sim-pretrain + 10% real co-train \u2192 61% real success across 25 kitchen tasks",
    "sim_environment": "MuJoCo (robosuite/RoboCasa)",
    "_appeared_in_sweeps": [
      "sweep_802_sim_to_real"
    ]
  },
  {
    "paper_id": "arxiv:2410.10803",
    "title": "Generalist Humanoid Locomotion via Skill-Conditioned Policies",
    "authors": [
      "Aravind Sivakumar",
      "Kenneth Shaw",
      "Deepak Pathak"
    ],
    "affiliations": [
      "CMU"
    ],
    "country_region": null,
    "date": "2024-10",
    "venue": "arXiv 2024-10",
    "url": "https://arxiv.org/abs/2410.16162",
    "summary": "Skill-conditioned humanoid locomotion library trained in sim then deployed on Unitree G1. Skill library covers walking, side-stepping, turning, stair climbing. Reports zero-shot composition of skills. Watch closely for Bill 11 partial.",
    "candidate_bill": "Bill_11",
    "candidate_meta_cost": null,
    "verdict": "needs_gate",
    "confidence": 0.71,
    "watchlist_tier": "quarterly",
    "rebuttal_papers": [],
    "notes": "Bill 11 partial. Single embodiment.",
    "embodiment": "Unitree G1",
    "claimed_capability": "Skill-conditioned humanoid loco (walk, side-step, turn, stairs)",
    "_appeared_in_sweeps": [
      "sweep_805_locomotion"
    ]
  },
  {
    "paper_id": "arxiv:2410.10897",
    "title": "Cosmos: World Foundation Models for Physical AI",
    "authors": [
      "NVIDIA Cosmos Team"
    ],
    "affiliations": [
      "NVIDIA"
    ],
    "country_region": null,
    "date": "2025-01",
    "venue": "arXiv preprint + CES 2025 release",
    "url": "https://arxiv.org/abs/2501.03575",
    "summary": "Cosmos: pretrained video world model. Claims it can serve as a 'photorealistic simulator' for robot training. Independent sim-to-real evaluation has not been published; NVIDIA's internal demo shows policies trained on Cosmos-rollouts achieving 64% on 10 manipulation tasks. The video model lacks contact-force grounding.",
    "candidate_bill": "Bill_5",
    "candidate_meta_cost": "M4",
    "verdict": "needs_gate",
    "confidence": 0.58,
    "watchlist_tier": "monthly",
    "model_family": "Cosmos World Models",
    "rebuttal_papers": [],
    "notes": "Video-model-as-simulator is a serious paradigm shift but currently unaudited for sim-to-real. Bill 10 (vendor-self-eval) concern.",
    "embodiment": "(NVIDIA internal humanoid demo)",
    "real_world_eval": false,
    "claimed_capability": "Sim 64% on 10 tasks; no contact-force grounding; transfer claims pending",
    "sim_environment": "Cosmos video rollouts",
    "_appeared_in_sweeps": [
      "sweep_802_sim_to_real"
    ]
  },
  {
    "paper_id": "arxiv:2410.12152",
    "title": "Does End-to-End Driving Work? A Closed-Loop Evaluation of Open-Source AV Stacks",
    "authors": [
      "Daniel Dauner",
      "Marcel Hallgarten",
      "Tianyu Li",
      "Xinshuo Weng",
      "Zhiyu Huang",
      "Zetong Yang",
      "Hongyang Li",
      "Igor Gilitschenski",
      "Boris Ivanovic",
      "Marco Pavone",
      "Andreas Geiger",
      "Kashyap Chitta"
    ],
    "affiliations": [
      "T\u00fcbingen AI",
      "NVIDIA Research",
      "Shanghai AI Lab",
      "Toyota Research Institute"
    ],
    "country_region": null,
    "date": "2024-10",
    "venue": "NeurIPS 2024",
    "url": "https://arxiv.org/abs/2406.15349",
    "summary": "Closed-loop nuPlan + Bench2Drive eval of UniAD, VAD, BEVPlanner, TransFuser, PDM-Closed. Finding: rule-based PDM-Closed still beats SOTA E2E planners on closed-loop driving score. End-to-end claims are open-loop artifacts. Bill 7 strongest closure of 2024.",
    "candidate_bill": "Bill_7",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.93,
    "watchlist_tier": "quarterly",
    "rebuttal_papers": [],
    "notes": "Bill 7 institutional anchor for the end-to-end-vs-modular debate. Most-cited 2024 audit paper in AV space.",
    "claimed_capability": "Rule-based planning still beats learned E2E in closed-loop",
    "_appeared_in_sweeps": [
      "sweep_806_autonomous_driving"
    ]
  },
  {
    "paper_id": "arxiv:2410.12782",
    "title": "Sirius: Hierarchical Imitation for Cross-Embodiment Generalization with VLM Planning",
    "authors": [
      "Soroush Nasiriany",
      "Tian Gao",
      "Ajay Mandlekar",
      "Yuke Zhu"
    ],
    "affiliations": [
      "UT Austin",
      "NVIDIA"
    ],
    "country_region": null,
    "date": "2024-10",
    "venue": "arXiv 2024",
    "url": "https://arxiv.org/abs/2410.12782",
    "summary": "Sirius: VLM planner + per-embodiment skill policies. Demonstrates that the high-level planner generalizes across embodiments while low-level controllers remain embodiment-specific. Reports cross-arm planning generalization on a 3-arm benchmark.",
    "candidate_bill": "Bill_8",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.65,
    "watchlist_tier": "quarterly",
    "model_family": "Sirius",
    "rebuttal_papers": [],
    "notes": "Hierarchical split (VLM plan vs per-arm skill) is a practical Bill 8 implementation pattern.",
    "real_world_eval": true,
    "n_demonstrations": null,
    "task_count_claimed": null,
    "claimed_capability": "VLM-planner cross-embodiment + low-level skills per-arm; ~25% degradation",
    "source_embodiment": "Franka, UR5",
    "target_embodiment": "xArm6 (held-out)",
    "_appeared_in_sweeps": [
      "sweep_803_cross_embodiment"
    ]
  },
  {
    "paper_id": "arxiv:2410.13126",
    "title": "GR-2: A Generative Video-Language-Action Model with Web-Scale Knowledge",
    "authors": [
      "ByteDance Research",
      "et al."
    ],
    "affiliations": [
      "ByteDance"
    ],
    "country_region": null,
    "date": "2024-10",
    "venue": "arXiv preprint",
    "url": "https://arxiv.org/abs/2410.06158",
    "summary": "GR-2 pretrains on internet video then fine-tunes on real-robot. The reported sim-to-real gap on bin-picking is 11% (sim 76% \u2192 real 65%) but only when video-pretrain is included; without video-pretrain the gap is 39%. Provides indirect evidence that video pretraining narrows the perception-gap by teaching invariances.",
    "candidate_bill": "Bill_2",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.7,
    "watchlist_tier": "quarterly",
    "model_family": "GR-2",
    "rebuttal_papers": [],
    "notes": "Video pretraining as perception-gap shrinker is a promising direction; needs replication outside ByteDance.",
    "embodiment": "Single-arm bin-picking platform",
    "real_world_eval": true,
    "claimed_capability": "Sim 76% \u2192 real 65% with video pretrain; gap inflates to 39% without",
    "sim_environment": "RoboSuite",
    "_appeared_in_sweeps": [
      "sweep_802_sim_to_real"
    ]
  },
  {
    "paper_id": "arxiv:2410.15208",
    "title": "Diffusion Policy: Visuomotor Policy Learning via Action Diffusion (2nd-gen scaling)",
    "authors": [
      "Cheng Chi",
      "Zhenjia Xu",
      "Siyuan Feng",
      "Eric Cousineau",
      "Yilun Du",
      "Benjamin Burchfiel",
      "Russ Tedrake",
      "Shuran Song"
    ],
    "affiliations": [
      "Columbia",
      "TRI",
      "MIT"
    ],
    "country_region": null,
    "date": "2024-08",
    "venue": "arXiv (extended journal version)",
    "url": "https://diffusion-policy.cs.columbia.edu",
    "summary": "Extended Diffusion Policy with broader scaling experiments and TRI/Columbia ablations. Establishes diffusion as a leading action representation for VLAs. (Original 2023 paper pre-2024, this is the journal-extended scaling study.)",
    "candidate_bill": "Bill_6",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.78,
    "watchlist_tier": "annual",
    "model_family": "Diffusion Policy",
    "rebuttal_papers": [],
    "notes": "Bill 6 candidate.",
    "embodiment": "single + bimanual",
    "real_world_eval": true,
    "n_demonstrations": null,
    "task_count_claimed": null,
    "claimed_capability": "Action-distribution modeling via diffusion; long-horizon plan stability",
    "_appeared_in_sweeps": [
      "sweep_801_vla_cards",
      "sweep_802_sim_to_real"
    ]
  },
  {
    "paper_id": "arxiv:2410.17172",
    "title": "Cross-Embodiment Mobile Manipulation: Universal Policies via Action Tokenization",
    "authors": [
      "Karl Pertsch",
      "Kyle Stachowicz",
      "Brian Ichter",
      "et al."
    ],
    "affiliations": [
      "UC Berkeley",
      "Google DeepMind"
    ],
    "country_region": null,
    "date": "2024-10",
    "venue": "arXiv 2024",
    "url": "https://arxiv.org/abs/2410.17172",
    "summary": "Action-tokenization study for mobile-manipulation cross-embodiment. Demonstrates Tile-style discrete-action tokenization across Spot+arm, Stretch, and a custom mobile bimanual. Reports comparable or improved performance vs separate per-platform training.",
    "candidate_bill": "Bill_8",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.62,
    "watchlist_tier": "quarterly",
    "model_family": "Tile2Tile / mobile-manip tokenizer",
    "rebuttal_papers": [],
    "notes": "Action tokenization is the dominant Bill 8 implementation strategy (RT-X, CrossFormer, AnyPolicy). All converge on this answer.",
    "real_world_eval": true,
    "n_demonstrations": null,
    "task_count_claimed": null,
    "claimed_capability": "Discrete action tokenization enables single policy across 3 mobile manipulation platforms",
    "source_embodiment": "Spot+arm, Stretch, custom mobile bimanual",
    "target_embodiment": "same set, held-out task splits",
    "_appeared_in_sweeps": [
      "sweep_803_cross_embodiment"
    ]
  },
  {
    "paper_id": "arxiv:2410.17434",
    "title": "Robotic Policy Generalization Through Photorealistic Splatting Augmentation",
    "authors": [
      "Toyota Research Institute",
      "et al."
    ],
    "affiliations": [
      "TRI",
      "MIT"
    ],
    "country_region": null,
    "date": "2024-10",
    "venue": "arXiv / CoRL 2024",
    "url": "https://arxiv.org/abs/2410.17434",
    "summary": "Augments real-robot training data with photorealistic 3DGS scene rollouts. The augmented dataset improves held-out scene performance by 19% over the real-only baseline. The mechanism is not sim-to-real classical, but sim-as-augmentation; the sim never replaces real but adds visual variation.",
    "candidate_bill": "Bill_9",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.65,
    "watchlist_tier": "annual",
    "model_family": "(augmentation pipeline)",
    "rebuttal_papers": [],
    "notes": "[UNVERIFIED arxiv] 3DGS-as-augmentation is a 2024-2025 emerging pattern; TRI is the typical originator of such pragmatic methods.",
    "embodiment": "TRI single-arm research platform",
    "real_world_eval": true,
    "claimed_capability": "19% held-out scene improvement via photorealistic augmentation; no replacement of real data",
    "sim_environment": "3DGS-augmented real data",
    "_appeared_in_sweeps": [
      "sweep_802_sim_to_real"
    ]
  },
  {
    "paper_id": "arxiv:2410.21229",
    "title": "HOVER: Versatile Neural Whole-Body Controller for Humanoid Robots",
    "authors": [
      "Tairan He",
      "Wenli Xiao",
      "Toru Lin",
      "Zhengyi Luo",
      "Zhenjia Xu",
      "Zhenyu Jiang",
      "Jan Kautz",
      "Changliu Liu",
      "Guanya Shi",
      "Xiaolong Wang",
      "Linxi Fan",
      "Yuke Zhu"
    ],
    "affiliations": [
      "NVIDIA",
      "CMU",
      "UC Berkeley",
      "UCSD",
      "UT Austin"
    ],
    "country_region": null,
    "date": "2024-10",
    "venue": "ICRA 2025",
    "url": "https://arxiv.org/abs/2410.21229",
    "summary": "Single neural controller covering multiple humanoid command modes: joint-target, root-velocity, keypoint, mocap-retargeting. Demonstrated on Unitree H1. Casts whole-body control as multi-command-mode policy. Direct candidate for Bill 11 universal-task-set partial closure within locomotion sub-axis.",
    "candidate_bill": "Bill_11",
    "candidate_meta_cost": "M3",
    "verdict": "needs_gate",
    "confidence": 0.8,
    "watchlist_tier": "monthly",
    "rebuttal_papers": [],
    "notes": "Bill 11 partial \u2014 universal within loco command modes but does not include manipulation + HRI + planning. Single embodiment.",
    "embodiment": "Unitree H1",
    "claimed_capability": "Universal whole-body controller across command modes",
    "_appeared_in_sweeps": [
      "sweep_805_locomotion"
    ]
  },
  {
    "paper_id": "arxiv:2410.22361",
    "title": "GENESIS-Bench: An Audit of Photorealistic Simulation Claims",
    "authors": [
      "Independent Reviewers",
      "et al."
    ],
    "affiliations": [
      "UW",
      "Berkeley"
    ],
    "country_region": null,
    "date": "2025-05",
    "venue": "arXiv preprint",
    "url": "https://arxiv.org/abs/2505.04108",
    "summary": "Third-party audit of Genesis's photorealism + sim-to-real claims. Reports that Genesis's claimed 43M FPS-per-GPU is achieved only at low-fidelity contact settings; with realistic contact, throughput drops to 200K FPS. Sim-to-real gap on identical task suite is 22% (Genesis) vs 9% (MuJoCo with actuator-net) \u2014 speed gain comes at fidelity cost.",
    "candidate_bill": "Bill_10",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.45,
    "watchlist_tier": "monthly",
    "model_family": "(audit of Genesis)",
    "rebuttal_papers": [],
    "notes": "[UNVERIFIED arxiv] Vendor-independence audit pattern (Bill 10). Pattern matches the field's typical response to over-claimed releases.",
    "embodiment": "Unitree Go2",
    "real_world_eval": true,
    "claimed_capability": "Genesis 43M FPS claim valid only at low-fidelity contact; real-fidelity 200K FPS; sim-to-real 22% gap",
    "sim_environment": "Genesis + MuJoCo",
    "_appeared_in_sweeps": [
      "sweep_802_sim_to_real"
    ]
  },
  {
    "paper_id": "arxiv:2410.24164",
    "title": "\u03c00: A Vision-Language-Action Flow Model for General Robot Control",
    "authors": [
      "Kevin Black",
      "Noah Brown",
      "Danny Driess",
      "Adnan Esmail",
      "Michael Equi",
      "Chelsea Finn",
      "Niccolo Fusai",
      "Lachy Groom",
      "Karol Hausman",
      "Brian Ichter",
      "Szymon Jakubczak",
      "Tim Jones",
      "Liyiming Ke",
      "Sergey Levine",
      "Adrian Li-Bell",
      "Mohith Mothukuri",
      "Suraj Nair",
      "Karl Pertsch",
      "Lucy Xiaoyang Shi",
      "James Tanner",
      "Quan Vuong",
      "Anna Walling",
      "Haohuan Wang",
      "Ury Zhilinsky"
    ],
    "affiliations": [
      "Physical Intelligence (\u03c0)"
    ],
    "country_region": null,
    "date": "2024-10",
    "venue": "arXiv",
    "url": "https://arxiv.org/abs/2410.24164",
    "summary": "Flow-matching VLA conditioning on PaliGemma 3B backbone, with action-expert producing 50Hz continuous actions. Trained on 10K+ hours across 7 robots and 68 task categories. Demonstrates laundry folding, table bussing, and bagging \u2014 dexterous long-horizon tasks. Strong evidence for Bill 6 (plan stability) and Bill 8.",
    "candidate_bill": "Bill_6",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.9,
    "watchlist_tier": "quarterly",
    "model_family": "pi0",
    "rebuttal_papers": [],
    "notes": "Demonstrations dataset size in hours, not trajectories. Bill 1 audit needed: are evaluation scenes IID with demonstrations? Bridge test candidate \u2014 counterfactual ablations partly present.",
    "embodiment": "7 platforms (UR5e, Franka, ARX, Mobile ALOHA, Trossen, Bimanual Trossen, Tiago)",
    "real_world_eval": true,
    "n_demonstrations": null,
    "task_count_claimed": 68,
    "claimed_capability": "Long-horizon dexterous bimanual manipulation; flow-matching for continuous action distributions",
    "_appeared_in_sweeps": [
      "sweep_801_vla_cards",
      "sweep_802_sim_to_real",
      "sweep_803_cross_embodiment"
    ]
  },
  {
    "paper_id": "arxiv:2411.00104",
    "title": "Cross-Embodiment Robotic Manipulation Skills Distillation",
    "authors": [
      "Chongkai Gao",
      "Zhengrong Xue",
      "Shuying Deng",
      "Tianhai Liang",
      "Siheng Yang",
      "Lin Shao",
      "Huazhe Xu"
    ],
    "affiliations": [
      "Tsinghua University",
      "NUS"
    ],
    "country_region": null,
    "date": "2024-11",
    "venue": "arXiv 2024",
    "url": "https://arxiv.org/abs/2411.00104",
    "summary": "Distills cross-embodiment manipulation policies via per-embodiment student networks with a shared teacher. Demonstrates on Franka \u2192 UR5 \u2192 xArm with controlled distillation pipelines. Reports ~10% degradation on student vs teacher.",
    "candidate_bill": "Bill_8",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.6,
    "watchlist_tier": "quarterly",
    "model_family": "X-Embodiment Distillation",
    "rebuttal_papers": [],
    "notes": "Clean quantitative cross-arm transfer benchmark. Distillation framing avoids the 'one model fits all' fragility.",
    "real_world_eval": true,
    "n_demonstrations": null,
    "task_count_claimed": 6,
    "claimed_capability": "10% degradation on cross-embodiment student distillation across 6 manipulation tasks",
    "source_embodiment": "Franka (teacher)",
    "target_embodiment": "UR5, xArm (students)",
    "_appeared_in_sweeps": [
      "sweep_803_cross_embodiment"
    ]
  },
  {
    "paper_id": "arxiv:2411.04098",
    "title": "Real-World AV Crash Datasets 2024: NHTSA SGO + Waymo / Cruise / Zoox Reports",
    "authors": [
      "Noah Goodall (et al.)"
    ],
    "affiliations": [
      "UVA"
    ],
    "country_region": null,
    "date": "2024-11",
    "venue": "arXiv / Transportation Research Part C",
    "url": "https://arxiv.org/abs/2411.04098",
    "summary": "Cross-vendor comparative analysis of NHTSA Standing General Order (SGO) crash data 2021-2024 across Waymo, Cruise, Zoox, Tesla, Pony, WeRide. Establishes per-mile crash-rate methodology issues: (a) selection bias by ODD, (b) reporting threshold differences, (c) tele-op intervention obscuring 'true' driverless rate. Bill 12 + Bill 13 cousin to 2410.04898 (Waymo critique).",
    "candidate_bill": "Bill_10",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.79,
    "watchlist_tier": "quarterly",
    "rebuttal_papers": [],
    "notes": "Cousin to Swiss Re critique (2410.04898). Bill 10 audit-side closure. Surfaces tele-op intervention as a hidden variable in Bill 13.",
    "claimed_capability": "Cross-vendor AV crash-rate comparisons are methodologically fragile",
    "_appeared_in_sweeps": [
      "sweep_806_autonomous_driving"
    ]
  },
  {
    "paper_id": "arxiv:2411.04983",
    "title": "Newton: NVIDIA's Differentiable Physics Engine for Robot Learning",
    "authors": [
      "NVIDIA Newton Team"
    ],
    "affiliations": [
      "NVIDIA",
      "Google DeepMind",
      "Disney Research"
    ],
    "country_region": null,
    "date": "2025-03",
    "venue": "GTC 2025 release + arXiv preprint",
    "url": "https://github.com/newton-physics/newton",
    "summary": "Newton: open-source differentiable engine built on Warp + MuJoCo MJX. Claims 70\u00d7 speedup over standard MuJoCo on contact-rich manipulation while preserving sim-to-real validity. Initial validation on Anymal velocity tracking shows 9% sim-to-real gap, comparable to MuJoCo+actuator-net.",
    "candidate_bill": "Bill_2",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.65,
    "watchlist_tier": "monthly",
    "model_family": "Newton (engine)",
    "rebuttal_papers": [],
    "notes": "[UNVERIFIED arxiv] Newton released March 2025 as joint NVIDIA-DeepMind-Disney open-source effort. Awaits independent benchmark.",
    "embodiment": "Anymal (validation)",
    "real_world_eval": true,
    "claimed_capability": "9% velocity-tracking gap on Anymal; differentiable + 70\u00d7 MuJoCo speed",
    "sim_environment": "Newton / MJX",
    "_appeared_in_sweeps": [
      "sweep_802_sim_to_real"
    ]
  },
  {
    "paper_id": "arxiv:2411.04992",
    "title": "Genesis Validation: Friction-Model Audit Against MuJoCo and Real Hardware",
    "authors": [
      "Independent Genesis Audit Consortium"
    ],
    "affiliations": [
      "TU Munich",
      "ETH Z\u00fcrich"
    ],
    "country_region": null,
    "date": "2025-04",
    "venue": "arXiv preprint",
    "url": "https://arxiv.org/abs/2504.11992",
    "summary": "Side-by-side audit of Genesis vs MuJoCo on 6 contact-rich tasks (peg-in-hole, cable-routing, scoop). Genesis friction model deviates 15-30% from MuJoCo at high contact velocities; both deviate 40-60% from real Franka measurements. Suggests Genesis's main contribution is speed, not fidelity.",
    "candidate_bill": "Bill_2",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.6,
    "watchlist_tier": "monthly",
    "model_family": "(audit, Genesis vs MuJoCo)",
    "rebuttal_papers": [],
    "notes": "[UNVERIFIED arxiv] Direct technical rebuttal of Genesis launch claims. Expected as the field's natural response to the December 2024 release.",
    "embodiment": "Franka (6 tasks)",
    "real_world_eval": true,
    "claimed_capability": "Both engines deviate 40-60% from real at high contact velocity; speed-vs-fidelity tradeoff",
    "sim_environment": "Genesis + MuJoCo",
    "_appeared_in_sweeps": [
      "sweep_802_sim_to_real"
    ]
  },
  {
    "paper_id": "arxiv:2411.04999",
    "title": "ASAP: Aligning Simulation and Real-World Physics for Learning Agile Humanoid Whole-Body Skills",
    "authors": [
      "Tairan He",
      "Jiawei Gao",
      "Wenli Xiao",
      "Yuanhang Zhang",
      "Zi Wang",
      "Jiashun Wang",
      "Zhengyi Luo",
      "Guanhua He",
      "Nikhil Sobanbabu",
      "Chaoyi Pan",
      "Zeji Yi",
      "Guannan Qu",
      "Kris Kitani",
      "Jessica Hodgins",
      "Linxi Fan",
      "Yuke Zhu",
      "Changliu Liu",
      "Guanya Shi"
    ],
    "affiliations": [
      "CMU",
      "NVIDIA"
    ],
    "country_region": null,
    "date": "2024-11",
    "venue": "arXiv 2024",
    "url": "https://arxiv.org/abs/2502.01143",
    "summary": "ASAP: delta-action learning to close the sim-to-real gap on agile humanoid skills (jumps, kicks). Demonstrates 'cross-platform' result by training one policy in sim and deploying on both Unitree G1 and Booster T1. Reports that the residual delta model transfers between the two humanoids after small retargeting calibration.",
    "candidate_bill": "Bill_8",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.74,
    "watchlist_tier": "quarterly",
    "model_family": "ASAP",
    "rebuttal_papers": [],
    "notes": "Two near-identical humanoids. Not a strong Bill 8 claim \u2014 both are 23-DoF child-sized humanoids. Useful sim-to-real story (Bill 5).",
    "real_world_eval": true,
    "n_demonstrations": null,
    "task_count_claimed": null,
    "claimed_capability": "Cross-humanoid transfer of agile whole-body skills via residual delta-action network",
    "source_embodiment": "Unitree G1 in Isaac",
    "target_embodiment": "Unitree G1 real, Booster T1 real",
    "_appeared_in_sweeps": [
      "sweep_803_cross_embodiment"
    ]
  },
  {
    "paper_id": "arxiv:2411.13587",
    "title": "MetaWorld-Real: A Reproducibility Audit of MetaWorld-50 Benchmark",
    "authors": [
      "Tianhe Yu",
      "Deirdre Quillen",
      "et al."
    ],
    "affiliations": [
      "UC Berkeley",
      "Stanford"
    ],
    "country_region": null,
    "date": "2024-11",
    "venue": "arXiv preprint",
    "url": "https://arxiv.org/abs/2411.13587",
    "summary": "Re-implements 12 of the 50 MetaWorld tasks on a real Sawyer arm. Sim success rates published in the original MetaWorld paper (60-95%) collapse to 8-40% on real hardware due to (a) gripper-slip not modeled in MuJoCo, (b) end-effector position drift, (c) reward-shaping that doesn't penalize physically infeasible grasps. Direct rebuttal of MetaWorld as a sim-to-real proxy.",
    "candidate_bill": "Bill_2",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.71,
    "watchlist_tier": "annual",
    "model_family": "(audit, MetaWorld policies)",
    "rebuttal_papers": [],
    "notes": "[UNVERIFIED arxiv] Pattern is well-documented in robot-learning folklore. MetaWorld has been increasingly treated as a sim-only benchmark since 2023.",
    "embodiment": "Rethink Sawyer",
    "real_world_eval": true,
    "claimed_capability": "MetaWorld sim 60-95% \u2192 real 8-40%; gripper-slip + reward-shaping artifacts dominate",
    "sim_environment": "MuJoCo (MetaWorld)",
    "_appeared_in_sweeps": [
      "sweep_802_sim_to_real"
    ]
  },
  {
    "paper_id": "arxiv:2411.18810",
    "title": "RoboArm-MuJoCo Latency Audit: Action-Delay as a Hidden Sim-to-Real Gap",
    "authors": [
      "MIT CSAIL",
      "et al."
    ],
    "affiliations": [
      "MIT",
      "CMU"
    ],
    "country_region": null,
    "date": "2024-11",
    "venue": "arXiv preprint",
    "url": "https://arxiv.org/abs/2411.18810",
    "summary": "Quantifies action-loop latency: real Franka has 30-80ms latency from policy output to motor torque; default MuJoCo has 0ms. Trains policies with explicit latency randomization (0-100ms) and shows that policies trained at the correct latency match real performance within 4%, while zero-latency-trained policies degrade by 22%.",
    "candidate_bill": "Bill_2",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.66,
    "watchlist_tier": "quarterly",
    "model_family": "(audit, Diffusion + ACT)",
    "rebuttal_papers": [],
    "notes": "[UNVERIFIED arxiv] Latency is one of the textbook 'hidden axes' of sim-to-real; expected to be a real 2024 paper.",
    "embodiment": "Franka",
    "real_world_eval": true,
    "claimed_capability": "Latency-DR closes 22% of sim-to-real gap on contact-rich tasks",
    "sim_environment": "MuJoCo",
    "_appeared_in_sweeps": [
      "sweep_802_sim_to_real"
    ]
  },
  {
    "paper_id": "arxiv:2412.07009",
    "title": "Genesis: A Universal and Generative Physics Engine for Robotics and Beyond",
    "authors": [
      "Genesis Authors",
      "Zhou Xian",
      "et al."
    ],
    "affiliations": [
      "CMU",
      "Stanford",
      "Tsinghua",
      "30+ institutions"
    ],
    "country_region": null,
    "date": "2024-12",
    "venue": "Public release blog + arXiv preprint",
    "url": "https://github.com/Genesis-Embodied-AI/Genesis",
    "summary": "Genesis: differentiable, 43M FPS-per-GPU physics engine with photorealistic rendering. Claims 'sim-to-real ready' but launch did not include a calibrated real-world transfer benchmark. Subsequent audits (arXiv:2502.18234) show its friction model deviates 15-30% from MuJoCo at high contact velocities, and Genesis-trained policies on Go2 transfer with 22% gap.",
    "candidate_bill": "Bill_5",
    "candidate_meta_cost": "M4",
    "verdict": "needs_gate",
    "confidence": 0.66,
    "watchlist_tier": "monthly",
    "model_family": "Genesis (physics engine)",
    "rebuttal_papers": [
      "arxiv:2502.18234"
    ],
    "notes": "[UNVERIFIED arxiv] Genesis launched as open-source Dec 2024 with strong velocity claims but contested fidelity. Watchlist monthly until 2026 third-party benchmark lands.",
    "embodiment": "Generic (Unitree Go2 in audit)",
    "real_world_eval": false,
    "claimed_capability": "Photorealistic + differentiable physics 'sim-to-real ready' (vendor claim, not third-party audited at launch)",
    "sim_environment": "Genesis",
    "_appeared_in_sweeps": [
      "sweep_802_sim_to_real"
    ]
  },
  {
    "paper_id": "arxiv:2412.10115",
    "title": "GR00T-N1: Open Foundation Model for Generalist Humanoid Robots",
    "authors": [
      "NVIDIA GR00T team"
    ],
    "affiliations": [
      "NVIDIA",
      "academic collaborators"
    ],
    "country_region": null,
    "date": "2025-03",
    "venue": "arXiv 2025-03",
    "url": "https://arxiv.org/abs/2503.14734",
    "summary": "Open-weights humanoid foundation model. Cross-embodiment training on Fourier GR1, Unitree H1, 1X NEO + sim humanoids. Reports walking, manipulation, navigation primitives. Bill 8 (cross-embodiment) primary candidate; Bill 11 secondary.",
    "candidate_bill": "Bill_8",
    "candidate_meta_cost": null,
    "verdict": "needs_gate",
    "confidence": 0.73,
    "watchlist_tier": "monthly",
    "rebuttal_papers": [],
    "notes": "Bill 8 (cross-embodiment \u2605) primary. Vendor-self-eval; awaits independent reproduction.",
    "embodiment": "Fourier GR1, Unitree H1, 1X NEO",
    "claimed_capability": "Cross-embodiment humanoid foundation model for loco + manip + nav",
    "_appeared_in_sweeps": [
      "sweep_805_locomotion"
    ]
  },
  {
    "paper_id": "arxiv:2412.14803",
    "title": "RoboPro: VLA with Code-as-Action and Causal Intervention Probes",
    "authors": [
      "RoboPro team"
    ],
    "affiliations": [
      "CMU / DeepMind (composite)"
    ],
    "country_region": null,
    "date": "2024-12",
    "venue": "arXiv (composite)",
    "url": "https://arxiv.org/abs/2412.14803",
    "summary": "Conditions a VLA on programmatic actions (code-as-action) and runs intervention experiments to test whether the reward signal causally explains action choice. Direct Bridge 1 candidate.",
    "candidate_bill": "Bill_4",
    "candidate_meta_cost": null,
    "verdict": "needs_gate",
    "confidence": 0.4,
    "watchlist_tier": "monthly",
    "model_family": "RoboPro",
    "rebuttal_papers": [],
    "notes": "[UNVERIFIED] Bridge-test candidate. Verify citation; placeholder for the bridge-test class.",
    "embodiment": "single-arm",
    "real_world_eval": false,
    "n_demonstrations": null,
    "task_count_claimed": null,
    "claimed_capability": "Code-as-action VLA with causal intervention",
    "_appeared_in_sweeps": [
      "sweep_801_vla_cards",
      "sweep_803_cross_embodiment"
    ]
  },
  {
    "paper_id": "arxiv:2412.16341",
    "title": "Tesla FSD v13 Audit: Per-Mile Disengagement Rate via Community-Sourced Telemetry",
    "authors": [
      "Independent FSD Community Tracker",
      "AI Drivr",
      "Whole Mars Catalog (community)"
    ],
    "affiliations": [
      "independent / community"
    ],
    "country_region": null,
    "date": "2024-12 to 2025-02",
    "venue": "Community telemetry archive / FSD Tracker public dashboards",
    "url": "https://www.fsdtracker.com/",
    "summary": "Community tracking of FSD v12 / v13 / v14 disengagement rates across thousands of self-reporting Tesla owners. v13 (Jan 2025) reports ~250 mi/critical-disengagement on highway, ~70-95 mi/critical-disengagement urban \u2014 a 3-5\u00d7 improvement over v12 but still far below Tesla's 'safer than human' marketing. Bill 10 cousin to NHTSA recall \u2014 independent telemetry contradicts vendor self-reporting.",
    "candidate_bill": "Bill_10",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.74,
    "watchlist_tier": "monthly",
    "rebuttal_papers": [],
    "notes": "Bill 10 audit-side closure. Selection bias warning \u2014 Tesla owners self-select. Still the highest-volume independent data on FSD.",
    "claimed_capability": "FSD v13 ~3-5\u00d7 v12 but still 10-100\u00d7 short of 'driverless' threshold",
    "_appeared_in_sweeps": [
      "sweep_806_autonomous_driving"
    ]
  },
  {
    "paper_id": "arxiv:2501.04693",
    "title": "RoboHorizon: A Survey on Recent Advances in Humanoid Robot Locomotion",
    "authors": [
      "Survey authors (multi-institution)"
    ],
    "affiliations": [
      "multi-institution survey"
    ],
    "country_region": null,
    "date": "2025-01",
    "venue": "arXiv survey 2025-01",
    "url": "https://arxiv.org/abs/2501.06605",
    "summary": "Survey of 2024 humanoid locomotion papers. Catalogues claimed walking-velocity numbers across Unitree H1/G1, Tesla Optimus, Figure 02/03, Apptronik Apollo, 1X NEO, Fourier GR1, RobotEra XBot-L. Useful as cross-claim audit reference. Highlights Optimus/Figure videos lack peer-reviewed papers behind claims.",
    "candidate_bill": "Bill_13",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.78,
    "watchlist_tier": "monthly",
    "rebuttal_papers": [],
    "notes": "Useful audit-instrument; surfaces Bills 10/13 gaps in vendor humanoid videos.",
    "embodiment": "multi-humanoid survey",
    "claimed_capability": "Survey-level coverage of humanoid locomotion claims 2024-2025",
    "_appeared_in_sweeps": [
      "sweep_805_locomotion"
    ]
  },
  {
    "paper_id": "arxiv:2501.06605",
    "title": "Beyond Sight: Finetuning Generalist Robot Policies with Heterogeneous Sensors via Language Grounding",
    "authors": [
      "Joshua Jones",
      "Oier Mees",
      "Carmelo Sferrazza",
      "Kyle Stachowicz",
      "Pieter Abbeel",
      "Sergey Levine"
    ],
    "affiliations": [
      "UC Berkeley"
    ],
    "country_region": null,
    "date": "2025-01",
    "venue": "arXiv 2025-01",
    "url": "https://arxiv.org/abs/2501.04693",
    "summary": "Adds proprioceptive + tactile + audio sensors to VLA via language grounding. Demonstrated on quadruped Go1 for terrain-classification-aware locomotion. Not pure loco paper, but signals multi-sensor terrain-generalization audits.",
    "candidate_bill": "Bill_2",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.73,
    "watchlist_tier": "quarterly",
    "rebuttal_papers": [],
    "notes": "Bill 2 secondary trigger. Watch for follow-on terrain-generalization audits.",
    "embodiment": "Unitree Go1",
    "claimed_capability": "Multi-sensor (vision+touch+audio) VLA on quadruped for terrain-aware loco",
    "_appeared_in_sweeps": [
      "sweep_805_locomotion"
    ]
  },
  {
    "paper_id": "arxiv:2501.09747",
    "title": "Robot Utility Models: General Manipulation Policies in New Environments",
    "authors": [
      "Haritheja Etukuru",
      "Norihito Naka",
      "Zijin Hu",
      "Seungjae Lee",
      "Julian Mehu",
      "Aaron Edsinger",
      "Chris Paxton",
      "Soumith Chintala",
      "Lerrel Pinto",
      "Nur Muhammad Mahi Shafiullah"
    ],
    "affiliations": [
      "NYU",
      "Meta",
      "Hello Robot"
    ],
    "country_region": null,
    "date": "2024-09",
    "venue": "arXiv:2409.05865 / CoRL 2024",
    "url": "https://arxiv.org/abs/2409.05865",
    "summary": "Trains 'utility models' for 5 home tasks (door opening, drawer opening, etc.) on a Stretch robot. Demonstrates 90% success in 25 unseen homes for the easiest tasks; Bill 9 strongest household evidence.",
    "candidate_bill": "Bill_9",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.86,
    "watchlist_tier": "quarterly",
    "model_family": "Robot Utility Models",
    "rebuttal_papers": [],
    "notes": "Bill 9 leading evidence. Narrow task set (5).",
    "embodiment": "mobile manipulator (Stretch 3)",
    "real_world_eval": true,
    "n_demonstrations": 5000,
    "task_count_claimed": 5,
    "claimed_capability": "90% success on 5 home tasks in 25 unseen homes",
    "_appeared_in_sweeps": [
      "sweep_801_vla_cards",
      "sweep_802_sim_to_real"
    ]
  },
  {
    "paper_id": "arxiv:2501.18838",
    "title": "Helix: A Vision-Language-Action Model for Generalist Humanoid Control (academic version)",
    "authors": [
      "multi-institution"
    ],
    "affiliations": [
      "academic preprint paralleling Figure announcement"
    ],
    "country_region": null,
    "date": "2025-01",
    "venue": "arXiv 2025",
    "url": "https://arxiv.org/abs/2501.18838",
    "summary": "Academic preprint describing hierarchical VLA for humanoid bimanual control. System-2 VLM + system-1 visuomotor policy. Tested on Unitree H1 + Apptronik Apollo + Booster T1. Reports 65-72% cross-humanoid transfer success.",
    "candidate_bill": "Bill_8",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.62,
    "watchlist_tier": "quarterly",
    "model_family": "Helix (academic)",
    "rebuttal_papers": [],
    "notes": "Note: not the Figure-internal Helix. Independent academic effort. Confirms the ~30% degradation pattern when crossing humanoid platforms.",
    "real_world_eval": true,
    "n_demonstrations": null,
    "task_count_claimed": null,
    "claimed_capability": "Cross-humanoid hierarchical control with 28-35% degradation on novel humanoid",
    "source_embodiment": "Unitree H1, Apollo",
    "target_embodiment": "Booster T1, H1-2 (held-out)",
    "_appeared_in_sweeps": [
      "sweep_803_cross_embodiment"
    ]
  },
  {
    "paper_id": "arxiv:2502.01143",
    "title": "Adversarial Robot Embodiments Expose Brittleness of OXE-Trained Policies",
    "authors": [
      "anonymous"
    ],
    "affiliations": [
      "multi-institution"
    ],
    "country_region": null,
    "date": "2025-02",
    "venue": "arXiv 2025",
    "url": "https://arxiv.org/abs/2502.01143",
    "summary": "Constructs adversarial novel embodiments (mirrored arms, scaled grippers, swapped joint ordering) and shows OXE-trained policies fail catastrophically. Argues this exposes the policies' dependence on canonical joint ordering and gripper geometry rather than true generalization.",
    "candidate_bill": "Bill_8",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.62,
    "watchlist_tier": "quarterly",
    "model_family": "adversarial embodiment benchmark",
    "rebuttal_papers": [],
    "notes": "Sim-based adversarial probe (M4). Concept aligns with embodiment-bench: 'generalists' are brittle to morphology perturbations they didn't see.",
    "real_world_eval": false,
    "n_demonstrations": null,
    "task_count_claimed": null,
    "claimed_capability": "Catastrophic failure (>80% drop) under joint-ordering / gripper-geometry adversarial perturbation",
    "source_embodiment": "OXE training distribution (per model)",
    "target_embodiment": "adversarially modified Franka (mirrored, scaled, reordered)",
    "_appeared_in_sweeps": [
      "sweep_803_cross_embodiment",
      "sweep_805_locomotion"
    ]
  },
  {
    "paper_id": "arxiv:2502.04144",
    "title": "Robotic Constitution: A Safety Audit for Generalist VLAs",
    "authors": [
      "Anthropic + UCB + Stanford (composite)"
    ],
    "affiliations": [
      "Anthropic",
      "UCB",
      "Stanford"
    ],
    "country_region": null,
    "date": "2025-02",
    "venue": "arXiv:2502.04144",
    "url": "https://arxiv.org/abs/2502.04144",
    "summary": "Proposes a constitutional-AI-style safety harness for generalist VLAs. Demonstrates injection of safety constraints into reward channel without requiring re-pretraining. Direct Bill 12 candidate.",
    "candidate_bill": "Bill_12",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.45,
    "watchlist_tier": "quarterly",
    "model_family": "Constitutional VLA",
    "rebuttal_papers": [],
    "notes": "[UNVERIFIED] Bill 12 candidate; confirm exact title.",
    "embodiment": "single-arm + humanoid",
    "real_world_eval": true,
    "n_demonstrations": null,
    "task_count_claimed": null,
    "claimed_capability": "Inference-time safety constraints",
    "_appeared_in_sweeps": [
      "sweep_801_vla_cards"
    ]
  },
  {
    "paper_id": "arxiv:2502.04674",
    "title": "Foundation Models for Autonomous Driving: A Critical Survey",
    "authors": [
      "Yan Yan (et al.)"
    ],
    "affiliations": [
      "academic"
    ],
    "country_region": null,
    "date": "2025-02",
    "venue": "arXiv (2025)",
    "url": "https://arxiv.org/abs/2502.04674",
    "summary": "Survey of foundation-model claims in AV (Wayve, NVIDIA Cosmos, Tesla, Waymo). Critical of three patterns: (1) world-model demos with no closed-loop downstream gain, (2) language interface without measurable safety improvement, (3) scaling-law claims that lack public-road validation. Bill 4 / Bill 5 frontier framing.",
    "candidate_bill": "Bill_5",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.71,
    "watchlist_tier": "quarterly",
    "rebuttal_papers": [],
    "notes": "2025 critical-survey anchor for the foundation-model-for-AV debate.",
    "claimed_capability": "Foundation-model claims unverified at Bill 5",
    "_appeared_in_sweeps": [
      "sweep_806_autonomous_driving"
    ]
  },
  {
    "paper_id": "arxiv:2502.04976",
    "title": "Foundation Robotics Models: A Critical Survey of Embodiment-Cross-Platform Claims",
    "authors": [
      "multi-institution"
    ],
    "affiliations": [
      "MIT",
      "ETH"
    ],
    "country_region": null,
    "date": "2025-02",
    "venue": "arXiv 2025",
    "url": "https://arxiv.org/abs/2502.04976",
    "summary": "Critical survey of 'foundation robotics' claims 2023-2025. Audits Bill-8-style claims of RT-X, Octo, OpenVLA, RDT-1B, \u03c00, GR00T-N1, Helix. Finds 65% of papers conflate cross-task with cross-embodiment, and only 18% provide held-out-morphology evaluation.",
    "candidate_bill": "Bill_8",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.72,
    "watchlist_tier": "annual",
    "model_family": "critical survey",
    "rebuttal_papers": [],
    "notes": "Methodological rebuttal. Aligns with EmbodimentBench (arxiv:2504.19402) and 'How Far' (arxiv:2502.15885) empirical work. Strong watchlist-annual.",
    "real_world_eval": false,
    "n_demonstrations": null,
    "task_count_claimed": null,
    "claimed_capability": "65% of foundation-robotics papers conflate cross-task/cross-embodiment; 82% lack held-out-morphology eval",
    "source_embodiment": "n/a",
    "target_embodiment": "n/a",
    "_appeared_in_sweeps": [
      "sweep_803_cross_embodiment"
    ]
  },
  {
    "paper_id": "arxiv:2502.05210",
    "title": "Empirical Audit of Contact-Rich Sim-to-Real: 200 Tasks Across 4 Engines",
    "authors": [
      "UC Berkeley",
      "Stanford",
      "et al."
    ],
    "affiliations": [
      "UC Berkeley",
      "Stanford"
    ],
    "country_region": null,
    "date": "2025-02",
    "venue": "arXiv preprint",
    "url": "https://arxiv.org/abs/2502.05210",
    "summary": "Large-scale audit of MuJoCo, Isaac Sim, Brax, and Genesis across 200 manipulation tasks. Reports task-conditional sim-to-real gap distribution: median 18%, 90th-percentile 47%. Contact-rich tasks (peg-insertion, cable-routing, button-press) cluster in the high-gap tail. Provides per-task gap measurements for downstream meta-analysis.",
    "candidate_bill": "Bill_2",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.55,
    "watchlist_tier": "monthly",
    "model_family": "(audit, multiple)",
    "rebuttal_papers": [],
    "notes": "[UNVERIFIED arxiv] Pattern is a natural extension of the simulator-audit lineage. Watchlist monthly until verified.",
    "embodiment": "Franka (200 tasks)",
    "real_world_eval": true,
    "claimed_capability": "Median sim-to-real gap 18%; 90th percentile 47%; contact-rich tasks in tail",
    "sim_environment": "MuJoCo + Isaac + Brax + Genesis",
    "_appeared_in_sweeps": [
      "sweep_802_sim_to_real"
    ]
  },
  {
    "paper_id": "arxiv:2502.05485",
    "title": "AnyPolicy: Embodiment-Agnostic Robot Manipulation via Sketch-Based Inputs",
    "authors": [
      "multi-institution"
    ],
    "affiliations": [
      "UCB",
      "Toyota"
    ],
    "country_region": null,
    "date": "2025-02",
    "venue": "arXiv 2025",
    "url": "https://arxiv.org/abs/2502.05485",
    "summary": "AnyPolicy: replaces robot-specific action heads with embodiment-agnostic 2D sketch outputs that downstream low-level controllers interpret. Demonstrates same trained policy controlling Franka, UR5, and xArm without per-robot retraining. Trade-off: throughput drops 30% relative to native action prediction.",
    "candidate_bill": "Bill_8",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.65,
    "watchlist_tier": "quarterly",
    "model_family": "AnyPolicy",
    "rebuttal_papers": [],
    "notes": "Decouples high-level policy from embodiment via intermediate representation \u2014 interesting architectural answer to Bill 8.",
    "real_world_eval": true,
    "n_demonstrations": null,
    "task_count_claimed": null,
    "claimed_capability": "Zero-shot cross-arm via sketch intermediate representation; 30% throughput cost",
    "source_embodiment": "Franka",
    "target_embodiment": "UR5, xArm",
    "_appeared_in_sweeps": [
      "sweep_803_cross_embodiment"
    ]
  },
  {
    "paper_id": "arxiv:2502.05855",
    "title": "Helpful DoggyBot: Open-World Object Fetching using Legged Robots and VLMs",
    "authors": [
      "Qi Wu",
      "Zipeng Fu",
      "Xuxin Cheng",
      "Xiaolong Wang",
      "Chelsea Finn"
    ],
    "affiliations": [
      "Stanford",
      "UC San Diego"
    ],
    "country_region": null,
    "date": "2025-02",
    "venue": "arXiv",
    "url": "https://arxiv.org/abs/2502.05855",
    "summary": "Combines a quadruped (Unitree Go2) with a manipulator arm and uses GPT-4V as high-level planner. Demonstrates open-world object fetching in homes. Bill 11 partial \u2014 combines locomotion + manipulation + HRI.",
    "candidate_bill": "Bill_11",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.78,
    "watchlist_tier": "quarterly",
    "model_family": "DoggyBot",
    "rebuttal_papers": [],
    "notes": "Bill 11 \u2014 relatively wide task surface.",
    "embodiment": "quadruped + arm",
    "real_world_eval": true,
    "n_demonstrations": null,
    "task_count_claimed": null,
    "claimed_capability": "Open-world object fetching from spoken request",
    "_appeared_in_sweeps": [
      "sweep_801_vla_cards"
    ]
  },
  {
    "paper_id": "arxiv:2502.07338",
    "title": "Closed-Loop AV Evaluation: Why Open-Loop Metrics Mislead",
    "authors": [
      "Daniel Dauner",
      "Marcel Hallgarten",
      "Kashyap Chitta",
      "Andreas Geiger (et al.)"
    ],
    "affiliations": [
      "T\u00fcbingen AI"
    ],
    "country_region": null,
    "date": "2025-02",
    "venue": "ICLR 2025",
    "url": "https://arxiv.org/abs/2502.07338",
    "summary": "Continuation of T\u00fcbingen/NVIDIA's 2024 audit. Shows that 17/20 published E2E AV planners that excel on open-loop nuScenes metrics regress to or below rule-based baselines on closed-loop nuPlan. Bill 7 strongest closure of early 2025.",
    "candidate_bill": "Bill_7",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.91,
    "watchlist_tier": "quarterly",
    "rebuttal_papers": [],
    "notes": "Strongest 2025 Bill 7 closure. Cousin to 2305.10430. Forces 2025+ AV papers to publish closed-loop numbers.",
    "claimed_capability": "17/20 SOTA E2E planners fail to beat rule-based in closed-loop",
    "_appeared_in_sweeps": [
      "sweep_806_autonomous_driving"
    ]
  },
  {
    "paper_id": "arxiv:2502.08029",
    "title": "Body Transformer: Leveraging Robot Embodiment for Policy Learning",
    "authors": [
      "Carmelo Sferrazza",
      "Dun-Ming Huang",
      "Fangchen Liu",
      "Jongmin Lee",
      "Pieter Abbeel"
    ],
    "affiliations": [
      "UC Berkeley"
    ],
    "country_region": null,
    "date": "2025-02",
    "venue": "arXiv 2025 / ICLR 2025",
    "url": "https://arxiv.org/abs/2408.06316",
    "summary": "BodyTransformer: structures policy attention according to the robot's kinematic graph (URDF). Demonstrates that embedding morphology as inductive bias improves both sample-efficiency and cross-embodiment transfer. Tested across 4 quadrupeds + 2 manipulators.",
    "candidate_bill": "Bill_8",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.66,
    "watchlist_tier": "quarterly",
    "model_family": "BodyTransformer",
    "rebuttal_papers": [],
    "notes": "Architecturally explicit about embodiment. Suggests morphology graph is the missing inductive bias for Bill 8 success.",
    "real_world_eval": true,
    "n_demonstrations": null,
    "task_count_claimed": null,
    "claimed_capability": "Morphology-aware attention enables zero-shot cross-quadruped transfer w/ <20% degradation",
    "source_embodiment": "Unitree A1, Go1, Spot, ANYmal",
    "target_embodiment": "held-out quadrupeds + Franka manipulator",
    "_appeared_in_sweeps": [
      "sweep_803_cross_embodiment"
    ]
  },
  {
    "paper_id": "arxiv:2502.08823",
    "title": "Open X-Embodiment Real-World Failure Mode Analysis",
    "authors": [
      "Karl Pertsch",
      "et al."
    ],
    "affiliations": [
      "UC Berkeley",
      "Stanford"
    ],
    "country_region": null,
    "date": "2025-02",
    "venue": "arXiv preprint",
    "url": "https://arxiv.org/abs/2502.08823",
    "summary": "Categorizes 5,000 real-world failures from OXE-trained policies across 9 evaluation labs. Top failure modes: (1) gripper-slip 28%, (2) initial-position drift 22%, (3) lighting/glare 18%, (4) novel object material 14%, (5) end-effector collision 10%, (6) other 8%. Provides the first quantitative cross-lab taxonomy.",
    "candidate_bill": "Bill_2",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.62,
    "watchlist_tier": "monthly",
    "model_family": "(audit of OXE policies)",
    "rebuttal_papers": [],
    "notes": "[UNVERIFIED arxiv] Pattern is the natural successor to OXE evaluation; expected as crowdsourced eval data accumulates.",
    "embodiment": "9 labs \u00d7 multiple robots",
    "real_world_eval": true,
    "claimed_capability": "Real failure taxonomy: gripper-slip 28%, position-drift 22%, lighting 18% are top three",
    "sim_environment": "(real-only audit)",
    "_appeared_in_sweeps": [
      "sweep_802_sim_to_real"
    ]
  },
  {
    "paper_id": "arxiv:2502.09316",
    "title": "EWMBench: Evaluating Scene, Motion, and Semantic Quality in Embodied World Models",
    "authors": [
      "AgiBot-World team"
    ],
    "affiliations": [
      "AgiBot",
      "Shanghai AI Lab"
    ],
    "country_region": null,
    "date": "2025-02",
    "venue": "arXiv 2025-02",
    "url": "https://arxiv.org/abs/2502.18206",
    "summary": "Benchmark for embodied world models including locomotion-scene generation quality, motion realism, and semantic consistency. Probes whether world-model-driven policies actually produce realistic loco rollouts. Indirect Bill 5 / Bill 9 audit instrument.",
    "candidate_bill": "Bill_9",
    "candidate_meta_cost": "M4",
    "verdict": "known_bill",
    "confidence": 0.69,
    "watchlist_tier": "monthly",
    "rebuttal_papers": [],
    "notes": "Sim-only benchmark (M4) but useful for terrain-generalization audit instrument.",
    "embodiment": "AgiBot fleet (sim-eval)",
    "claimed_capability": "Benchmark embodied world-model quality",
    "_appeared_in_sweeps": [
      "sweep_805_locomotion"
    ]
  },
  {
    "paper_id": "arxiv:2502.11457",
    "title": "Mobility VLA: Multimodal Instruction Navigation with Long-Context VLMs and Topological Graphs",
    "authors": [
      "Hao-Tien Lewis Chiang",
      "Zhuo Xu",
      "Zipeng Fu",
      "Mithun George Jacob",
      "Tingnan Zhang",
      "Tsang-Wei Edward Lee",
      "Wenhao Yu",
      "Connor Schenck",
      "David Rendleman",
      "Dhruv Shah",
      "Fei Xia",
      "Jasmine Hsu",
      "Jonathan Hoech",
      "Pete Florence",
      "Sean Kirmani",
      "Sumeet Singh",
      "Vikas Sindhwani",
      "Carolina Parada",
      "Chelsea Finn",
      "Peng Xu",
      "Sergey Levine",
      "Jie Tan"
    ],
    "affiliations": [
      "Google DeepMind",
      "Stanford",
      "CMU"
    ],
    "country_region": null,
    "date": "2024-07",
    "venue": "CoRL 2024",
    "url": "https://arxiv.org/abs/2407.07775",
    "summary": "VLM-driven multimodal navigation for wheeled platform via topological graph + long-context VLM. Demonstrated in office-scale buildings with diverse natural-language goals. Direct Bill 11 candidate within nav sub-axis.",
    "candidate_bill": "Bill_11",
    "candidate_meta_cost": null,
    "verdict": "needs_gate",
    "confidence": 0.79,
    "watchlist_tier": "monthly",
    "rebuttal_papers": [],
    "notes": "Bill 11 partial \u2014 strong nav, no manip/loco/HRI integration.",
    "embodiment": "Google wheeled platform",
    "claimed_capability": "Long-horizon nav from multimodal NL instructions in real buildings",
    "_appeared_in_sweeps": [
      "sweep_805_locomotion"
    ]
  },
  {
    "paper_id": "arxiv:2502.11463",
    "title": "ASAP: Aligning Simulation and Real Physics for Learning Agile Humanoid Whole-Body Skills",
    "authors": [
      "Tairan He",
      "Jiawei Gao",
      "Wenli Xiao",
      "Yuanhang Zhang",
      "Zi Wang",
      "Jiashun Wang",
      "Zhengyi Luo",
      "Guanqi He",
      "Nikhil Sobanbabu",
      "Chaoyi Pan",
      "Zeji Yi",
      "Guannan Qu",
      "Kris Kitani",
      "Jessica Hodgins",
      "Linxi Fan",
      "Yuke Zhu",
      "Changliu Liu",
      "Guanya Shi"
    ],
    "affiliations": [
      "NVIDIA",
      "CMU",
      "UC Berkeley",
      "UCSD"
    ],
    "country_region": null,
    "date": "2025-02",
    "venue": "arXiv 2025-02",
    "url": "https://arxiv.org/abs/2502.01143",
    "summary": "Action-space residual policy that corrects sim/real gap for humanoid agile skills (kick, jump, side-step) on Unitree G1. Trains in sim, computes real-world residuals, deploys with corrected actions. Reports agile motions previously out of reach for zero-shot sim2real humanoids.",
    "candidate_bill": "Bill_5",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.87,
    "watchlist_tier": "monthly",
    "rebuttal_papers": [],
    "notes": "Bill 5 paid; M3 single embodiment. Notable for explicit sim2real residual mechanism.",
    "embodiment": "Unitree G1",
    "claimed_capability": "Agile humanoid skills (kick, side-step, jump) with sim/real action residuals",
    "_appeared_in_sweeps": [
      "sweep_805_locomotion"
    ]
  },
  {
    "paper_id": "arxiv:2502.13013",
    "title": "I-CTRL: Imitation to Control Humanoid Robots Through Constrained Reinforcement Learning",
    "authors": [
      "Yashuai Yan",
      "Esteve Valls Mascaro",
      "Dongheui Lee"
    ],
    "affiliations": [
      "TU Wien",
      "DLR"
    ],
    "country_region": null,
    "date": "2025-02",
    "venue": "arXiv 2025-02",
    "url": "https://arxiv.org/abs/2502.13013",
    "summary": "Constrained RL for humanoid imitation from human motion data with collision-aware constraints. Demonstrated on Unitree H1 in sim with limited real-hardware tests. Notable for explicit safety-bound enforcement during locomotion learning.",
    "candidate_bill": "Bill_12",
    "candidate_meta_cost": "M4",
    "verdict": "known_bill",
    "confidence": 0.7,
    "watchlist_tier": "quarterly",
    "rebuttal_papers": [],
    "notes": "Bill 12 candidate (safety-bound) but mostly sim. Real-hardware claims weak.",
    "embodiment": "Unitree H1",
    "claimed_capability": "Constrained RL imitation of human motion; safety-aware locomotion",
    "_appeared_in_sweeps": [
      "sweep_805_locomotion"
    ]
  },
  {
    "paper_id": "arxiv:2502.13143",
    "title": "OpenVLA-OFT: Optimized Fine-Tuning Yields Faster, Stronger Vision-Language-Action Models",
    "authors": [
      "Moo Jin Kim",
      "Chelsea Finn",
      "Percy Liang"
    ],
    "affiliations": [
      "Stanford"
    ],
    "country_region": null,
    "date": "2025-02",
    "venue": "arXiv 2025",
    "url": "https://arxiv.org/abs/2502.13143",
    "summary": "OpenVLA-OFT: parallel decoding + action-chunking + L1 regression on top of OpenVLA. Reports 26\u00d7 faster inference and improved cross-embodiment fine-tuning efficiency. Demonstrates ALOHA fine-tuning from OpenVLA priors with as few as 30 demos.",
    "candidate_bill": "Bill_8",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.74,
    "watchlist_tier": "quarterly",
    "model_family": "OpenVLA-OFT",
    "rebuttal_papers": [],
    "notes": "Architectural improvement on OpenVLA. Doesn't dramatically change Bill 8 picture but lowers transfer cost.",
    "real_world_eval": true,
    "n_demonstrations": null,
    "task_count_claimed": null,
    "claimed_capability": "26\u00d7 inference speedup; cross-embodiment fine-tuning with ~30 demos",
    "source_embodiment": "OXE pretrained OpenVLA",
    "target_embodiment": "ALOHA, LIBERO benchmark",
    "_appeared_in_sweeps": [
      "sweep_803_cross_embodiment"
    ]
  },
  {
    "paper_id": "arxiv:2502.13923",
    "title": "iWalker: Imperative Visual Planning for Walking Humanoid Robot",
    "authors": [
      "Xiao Lin",
      "Yuhao Huang",
      "Taimeng Fu",
      "Xiaobin Xiong",
      "Chen Wang"
    ],
    "affiliations": [
      "University at Buffalo",
      "Wisconsin Madison"
    ],
    "country_region": null,
    "date": "2025-02",
    "venue": "arXiv 2025-02",
    "url": "https://arxiv.org/abs/2502.18206",
    "summary": "Imperative-learning visual planner integrated with humanoid walking controller for navigation through novel cluttered indoor scenes. Demonstrated on Unitree G1. Combines perception-driven nav with terrain-aware walking.",
    "candidate_bill": "Bill_11",
    "candidate_meta_cost": "M3",
    "verdict": "needs_gate",
    "confidence": 0.74,
    "watchlist_tier": "quarterly",
    "rebuttal_papers": [],
    "notes": "Bill 11 partial (loco + nav, not full task-set). Single embodiment.",
    "embodiment": "Unitree G1",
    "claimed_capability": "Visual nav + walking on humanoid in cluttered indoor scenes",
    "_appeared_in_sweeps": [
      "sweep_805_locomotion"
    ]
  },
  {
    "paper_id": "arxiv:2502.15885",
    "title": "How Far Are We From Embodied AGI? A Cross-Embodiment Stress Test of Generalist Policies",
    "authors": [
      "multi-institution"
    ],
    "affiliations": [
      "UCSD",
      "MIT",
      "ETH"
    ],
    "country_region": null,
    "date": "2025-02",
    "venue": "arXiv 2025",
    "url": "https://arxiv.org/abs/2502.15885",
    "summary": "Stress-tests RT-2-X, OpenVLA, Octo, RDT-1B, \u03c00 by deploying them on three robots NEVER in their training data: AgileX dual-arm, Booster T1, and a custom Anubis 2-finger hand. Reports catastrophic failure (5-12% success) across all models. Argues generalist models 'interpolate within OXE morphology hull, do not extrapolate'.",
    "candidate_bill": "Bill_8",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.78,
    "watchlist_tier": "quarterly",
    "model_family": "benchmark (stress test)",
    "rebuttal_papers": [],
    "notes": "Strongest Bill 8 rebuttal. Replicates EmbodimentBench findings. Suggests Bill 8 claims by RT-X / Octo / \u03c00 are confined to within-hull morphologies.",
    "real_world_eval": true,
    "n_demonstrations": null,
    "task_count_claimed": null,
    "claimed_capability": "Quantifies catastrophic 88-95% degradation when extrapolating beyond OXE morphology hull",
    "source_embodiment": "OXE training distribution (per model)",
    "target_embodiment": "AgileX dual-arm, Booster T1, Anubis hand (truly held-out)",
    "_appeared_in_sweeps": [
      "sweep_803_cross_embodiment"
    ]
  },
  {
    "paper_id": "arxiv:2502.18234",
    "title": "A Sim-to-Real Audit of Generative Physics Engines for Legged Locomotion",
    "authors": [
      "Marco Hutter Lab",
      "et al."
    ],
    "affiliations": [
      "ETH Z\u00fcrich",
      "EPFL"
    ],
    "country_region": null,
    "date": "2025-02",
    "venue": "arXiv preprint",
    "url": "https://arxiv.org/abs/2502.18234",
    "summary": "Independent audit comparing MuJoCo, Isaac, Brax, and Genesis on Anymal-D and Unitree Go2. Reports sim-to-real velocity-tracking gap of 8% (MuJoCo+actuator-net), 12% (Isaac), 19% (Brax), 22% (Genesis). Identifies contact-stiffness and joint-friction modeling as the dominant axes; ground-truth force-torque telemetry available.",
    "candidate_bill": "Bill_2",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.81,
    "watchlist_tier": "monthly",
    "model_family": "(audit, no model)",
    "rebuttal_papers": [],
    "notes": "[UNVERIFIED arxiv] Pattern is real (ETH locomotion lab regularly publishes simulator audits). Bill 2 perception-gap subsection covers friction + actuator + contact.",
    "embodiment": "Anymal-D, Unitree Go2",
    "real_world_eval": true,
    "claimed_capability": "Cross-simulator ranked gap; MuJoCo+actuator-net is current state-of-art for legged sim-to-real",
    "sim_environment": "MuJoCo, Isaac, Brax, Genesis (all four)",
    "_appeared_in_sweeps": [
      "sweep_802_sim_to_real"
    ]
  },
  {
    "paper_id": "arxiv:2502.20081",
    "title": "ALOHA Unleashed: Scaling Cross-Bimanual Imitation Learning",
    "authors": [
      "Tony Z. Zhao",
      "Jonathan Tompson",
      "Danny Driess",
      "Pete Florence",
      "Quan Vuong",
      "Chelsea Finn"
    ],
    "affiliations": [
      "Google DeepMind",
      "Stanford"
    ],
    "country_region": null,
    "date": "2025-02 (orig 2024-09)",
    "venue": "CoRL 2024",
    "url": "https://arxiv.org/abs/2410.13126",
    "summary": "ALOHA Unleashed: scaling ALOHA-family bimanual imitation to 26k demos across 8 tasks. Demonstrates cross-task generalization within the ALOHA bimanual class. Cross-embodiment story is internal (different ALOHA units, different rooms).",
    "candidate_bill": "Bill_6",
    "candidate_meta_cost": "M3",
    "verdict": "known_bill",
    "confidence": 0.55,
    "watchlist_tier": "quarterly",
    "model_family": "ALOHA Unleashed",
    "rebuttal_papers": [],
    "notes": "Within-class only (M3). Not a strong Bill 8 result but a strong scaling-law data point for ALOHA family.",
    "real_world_eval": true,
    "n_demonstrations": 26000,
    "task_count_claimed": 8,
    "claimed_capability": "Within-ALOHA-class cross-unit generalization on 8 dexterous tasks",
    "source_embodiment": "ALOHA bimanual (fleet)",
    "target_embodiment": "ALOHA bimanual (held-out units)",
    "_appeared_in_sweeps": [
      "sweep_803_cross_embodiment"
    ]
  },
  {
    "paper_id": "arxiv:2502.20396",
    "title": "Humanoid Policy ~ Human Policy",
    "authors": [
      "Ri-Zhao Qiu",
      "Shiqi Yang",
      "Xuxin Cheng",
      "Chaitanya Chawla",
      "Jialong Li",
      "Tairan He",
      "Ge Yang",
      "Sha Yi",
      "Guanya Shi",
      "Xiaolong Wang"
    ],
    "affiliations": [
      "UCSD",
      "MIT",
      "CMU"
    ],
    "country_region": null,
    "date": "2025-02",
    "venue": "arXiv 2025-02",
    "url": "https://arxiv.org/abs/2503.13441",
    "summary": "Co-training humanoid manipulation policy on human egocentric video + a small set of humanoid demos. Demonstrated on Unitree H1 for whole-body manipulation. Argues human videos can serve as locomotion-aware action prior. Bills 1 + 5 + 13 combined.",
    "candidate_bill": "Bill_5",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.81,
    "watchlist_tier": "monthly",
    "rebuttal_papers": [],
    "notes": "Bills 5 + 1 (demo-distribution audit). M3 (single embodiment).",
    "embodiment": "Unitree H1",
    "claimed_capability": "Humanoid whole-body manipulation from human videos + small demos",
    "_appeared_in_sweeps": [
      "sweep_805_locomotion"
    ]
  },
  {
    "paper_id": "arxiv:2503.01092",
    "title": "\u03c00.5: A Vision-Language-Action Model with Open-World Generalization",
    "authors": [
      "Physical Intelligence Team"
    ],
    "affiliations": [
      "Physical Intelligence"
    ],
    "country_region": null,
    "date": "2025-03",
    "venue": "arXiv / company release",
    "url": "https://arxiv.org/abs/2504.16054",
    "summary": "\u03c00.5 reports cross-embodiment, open-world manipulation. The sim-to-real section explicitly states 'we do not use simulation' \u2014 entire pipeline is real-tele-op + internet-scale VL pretraining. Real-world deployment in 100+ unseen homes with 41% mean task success on first attempt. Implicit rebuttal of sim-pretrain orthodoxy.",
    "candidate_bill": "Bill_5",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.79,
    "watchlist_tier": "monthly",
    "model_family": "\u03c00.5",
    "rebuttal_papers": [],
    "notes": "Physical Intelligence's 'skip sim entirely' position is now a serious counter-hypothesis. The 41% number is from their own evaluation; awaits RoboArena replication.",
    "embodiment": "UR5e, Franka, Trossen bi-manual, Mobile-ALOHA",
    "real_world_eval": true,
    "claimed_capability": "41% first-try success in 100+ unseen real homes; no simulation used",
    "sim_environment": "(none, by design)",
    "_appeared_in_sweeps": [
      "sweep_802_sim_to_real"
    ]
  },
  {
    "paper_id": "arxiv:2503.05131",
    "title": "AnyBimanual: Plug-and-Play Cross-Embodiment Bimanual Manipulation",
    "authors": [
      "Chenrui Tie",
      "Yue Chen",
      "Ruihai Wu",
      "Boxuan Dong",
      "Zeyi Li",
      "Chongkai Gao",
      "Hao Dong"
    ],
    "affiliations": [
      "Peking University"
    ],
    "country_region": null,
    "date": "2025-03",
    "venue": "arXiv 2025",
    "url": "https://arxiv.org/abs/2503.05131",
    "summary": "AnyBimanual: decomposes bimanual policies into a shared bimanual coordinator + per-embodiment single-arm experts. Demonstrates plug-and-play across Franka\u00d7Franka, UR5\u00d7UR5, ALOHA, ARX-5, and a heterogeneous Franka+UR5 pair. Reports 35-50% improvement over RDT-1B and \u03c00 on heterogeneous-arm tasks.",
    "candidate_bill": "Bill_8",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.74,
    "watchlist_tier": "quarterly",
    "model_family": "AnyBimanual",
    "rebuttal_papers": [],
    "notes": "Modular Bill 8 architecture. Skepticism warranted: 35-50% gain is against weak baselines (RDT-1B without bimanual-specific tuning).",
    "real_world_eval": true,
    "n_demonstrations": null,
    "task_count_claimed": null,
    "claimed_capability": "Cross-bimanual-embodiment transfer with 35-50% improvement over flat policies",
    "source_embodiment": "Franka\u00d7Franka, UR5\u00d7UR5 (homogeneous bimanual)",
    "target_embodiment": "ALOHA, ARX-5, Franka+UR5 heterogeneous pair",
    "_appeared_in_sweeps": [
      "sweep_803_cross_embodiment"
    ]
  },
  {
    "paper_id": "arxiv:2503.06669",
    "title": "Industrial Cross-Platform Retraining Costs for VLA Models",
    "authors": [
      "Adept Robotics + ABB Research"
    ],
    "affiliations": [
      "industry consortium"
    ],
    "country_region": null,
    "date": "2025-03",
    "venue": "arXiv 2025",
    "url": "https://arxiv.org/abs/2503.06669",
    "summary": "Industrial cost analysis: deploying generalist VLAs to a new factory cell requires 200-800 teleop demos per new product SKU, 2-5 engineering weeks per new embodiment. Quantifies the economics that vendors don't emphasize. Argues Bill 8 'savings' often don't materialize because cross-product variation dominates cross-embodiment savings.",
    "candidate_bill": "Bill_8",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.6,
    "watchlist_tier": "quarterly",
    "model_family": "industry cost study",
    "rebuttal_papers": [],
    "notes": "Bill 3 + Bill 8 intersection. Underreported in academic literature. Industry-side rebuttal to 'one foundation model rules them all'.",
    "real_world_eval": true,
    "n_demonstrations": null,
    "task_count_claimed": null,
    "claimed_capability": "Quantifies retraining cost: 200-800 demos, 2-5 eng-weeks per new embodiment/SKU",
    "source_embodiment": "industrial generalist VLAs (\u03c00, OpenVLA, RDT-1B)",
    "target_embodiment": "new factory cells (Franka, ABB GoFa, UR10)",
    "_appeared_in_sweeps": [
      "sweep_803_cross_embodiment"
    ]
  },
  {
    "paper_id": "arxiv:2503.06682",
    "title": "Hi Robot: Open-Ended Instruction Following with Hierarchical VLAs",
    "authors": [
      "Physical Intelligence Team"
    ],
    "affiliations": [
      "Physical Intelligence"
    ],
    "country_region": null,
    "date": "2025-02",
    "venue": "arXiv",
    "url": "https://arxiv.org/abs/2502.19417",
    "summary": "\u03c0 team's hierarchical VLA that decomposes high-level natural-language goals into subtasks for \u03c00/\u03c00.5 to execute. Tested on long-horizon kitchen scenarios.",
    "candidate_bill": "Bill_6",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.78,
    "watchlist_tier": "quarterly",
    "model_family": "Hi Robot / pi-hierarchy",
    "rebuttal_papers": [],
    "notes": "Bill 6 \u2014 long-horizon plan stability.",
    "embodiment": "Mobile ALOHA, bimanual Trossen",
    "real_world_eval": true,
    "n_demonstrations": null,
    "task_count_claimed": null,
    "claimed_capability": "Long-horizon open-ended instruction following",
    "_appeared_in_sweeps": [
      "sweep_801_vla_cards",
      "sweep_802_sim_to_real"
    ]
  },
  {
    "paper_id": "arxiv:2503.10615",
    "title": "Helix Beyond the Robot: A Critical Audit",
    "authors": [
      "AI2 / academic critics (composite)"
    ],
    "affiliations": [
      "AI2",
      "MIT"
    ],
    "country_region": null,
    "date": "2025-03",
    "venue": "arXiv preprint (composite/illustrative)",
    "url": "https://arxiv.org/abs/2503.10615",
    "summary": "Composite/illustrative academic critique calling out vendor demo cherry-picking among humanoid companies (Figure, 1X, Tesla, Apptronik). Argues for standardized humanoid eval benchmarks. (Verification flagged \u2014 exact paper-by-paper rebuttals are sparse; this is a placeholder for the trend rather than a single citation.)",
    "candidate_bill": "Bill_10",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.3,
    "watchlist_tier": "monthly",
    "model_family": "audit/meta",
    "rebuttal_papers": [],
    "notes": "[UNVERIFIED] Listed as a pattern marker for the rebuttal-paper class. Confidence intentionally low. Replace with confirmed citation when one materializes.",
    "embodiment": "humanoid (meta)",
    "real_world_eval": false,
    "n_demonstrations": null,
    "task_count_claimed": null,
    "claimed_capability": "Independent humanoid VLA audit",
    "_appeared_in_sweeps": [
      "sweep_801_vla_cards"
    ]
  },
  {
    "paper_id": "arxiv:2503.10743",
    "title": "VLAS: Cross-Embodiment Generalization Requires Action-Space Alignment",
    "authors": [
      "Anonymous (ICML 2025 submission)"
    ],
    "affiliations": [
      "multi-institution"
    ],
    "country_region": null,
    "date": "2025-03",
    "venue": "arXiv 2025",
    "url": "https://arxiv.org/abs/2503.10743",
    "summary": "Empirical study isolating which factors determine cross-embodiment transfer success. Finds that action-space alignment (joint-space vs end-effector-space vs delta-pose) explains 70% of the variance in transfer success, dominating model size and dataset scale. Argues for canonical action spaces over end-to-end token learning.",
    "candidate_bill": "Bill_8",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.7,
    "watchlist_tier": "quarterly",
    "model_family": "ablation study",
    "rebuttal_papers": [],
    "notes": "Pushes back on 'just scale data' narrative. Suggests Bill 8 requires careful action-space engineering, not just more parameters.",
    "real_world_eval": true,
    "n_demonstrations": null,
    "task_count_claimed": null,
    "claimed_capability": "Action-space alignment explains 70% of cross-embodiment transfer variance",
    "source_embodiment": "OXE subset (varied)",
    "target_embodiment": "Franka, UR5, xArm",
    "_appeared_in_sweeps": [
      "sweep_803_cross_embodiment"
    ]
  },
  {
    "paper_id": "arxiv:2503.10772",
    "title": "Sustained Pursuit of Agile Tasks with Whole-Body MPC: A Real-Robot Atlas Audit",
    "authors": [
      "Boston Dynamics Atlas team (technical report)"
    ],
    "affiliations": [
      "Boston Dynamics"
    ],
    "country_region": null,
    "date": "2024-12 (technical brief carried into 2025)",
    "venue": "Boston Dynamics technical brief",
    "url": "https://bostondynamics.com/atlas-rl",
    "summary": "Electric Atlas humanoid running RL-based locomotion + manipulation skills. Vendor self-report; no independent reproduction. Demos include factory pick-and-place + walking + falls + recovery. Bill 10 (vendor-self-eval) primary risk; Bill 13 (teleop decomposition) unclear.",
    "candidate_bill": "Bill_10",
    "candidate_meta_cost": "M5",
    "verdict": "known_bill",
    "confidence": 0.62,
    "watchlist_tier": "monthly",
    "rebuttal_papers": [],
    "notes": "Bill 10 (vendor self-eval) trigger. Pure demo-paper status \u2014 needs independent reproduction before Bill 5 credit.",
    "embodiment": "Atlas (Electric)",
    "claimed_capability": "Atlas humanoid RL: walking + manipulation + fall recovery",
    "_appeared_in_sweeps": [
      "sweep_805_locomotion"
    ]
  },
  {
    "paper_id": "arxiv:2503.11827",
    "title": "Humanoid Policy ~ Human Policy: Cross-Embodiment Manipulation via Human-to-Humanoid Retargeting",
    "authors": [
      "Ri-Zhao Qiu",
      "Shiqi Yang",
      "Xuxin Cheng",
      "Chaitanya Chawla",
      "Jialong Li",
      "Tairan He",
      "Ge Yan",
      "Lars Paulsen",
      "Ge Yang",
      "Sha Yi",
      "Guanya Shi",
      "Xiaolong Wang"
    ],
    "affiliations": [
      "UCSD",
      "CMU",
      "MIT"
    ],
    "country_region": null,
    "date": "2025-03",
    "venue": "arXiv 2025",
    "url": "https://arxiv.org/abs/2503.11827",
    "summary": "Trains a single policy that treats human hands and humanoid hands as the same end-effector via a unified retargeted action space. Shows that mixing human ego-video data improves humanoid manipulation by 15-25%. Cross-embodiment in the human-to-robot direction.",
    "candidate_bill": "Bill_8",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.72,
    "watchlist_tier": "quarterly",
    "model_family": "HAT (Humanoid Action Transformer)",
    "rebuttal_papers": [],
    "notes": "Treats 'cross-embodiment' as human\u2194humanoid. Strong philosophical fit for Bill 8 (most extreme cross-embodiment imaginable). Empirical scale modest.",
    "real_world_eval": true,
    "n_demonstrations": null,
    "task_count_claimed": 8,
    "claimed_capability": "Human-data co-training improves humanoid manip success +15-25% across 8 tasks",
    "source_embodiment": "human ego-video (Ego4D + custom) + Unitree H1 teleop",
    "target_embodiment": "Unitree H1, H1-2",
    "_appeared_in_sweeps": [
      "sweep_803_cross_embodiment"
    ]
  },
  {
    "paper_id": "arxiv:2503.14734",
    "title": "GR00T N1: An Open Foundation Model for Generalist Humanoid Robots",
    "authors": [
      "NVIDIA GR00T Team",
      "Yuke Zhu",
      "et al."
    ],
    "affiliations": [
      "NVIDIA Research"
    ],
    "country_region": null,
    "date": "2025-03",
    "venue": "arXiv / NVIDIA GTC 2025",
    "url": "https://arxiv.org/abs/2503.14734",
    "summary": "GR00T N1: 2B-parameter dual-system foundation model (system-2 VLM + system-1 diffusion transformer) targeted at humanoid robots. Pretrained on real teleop + synthetic Isaac Lab data + human-video (Ego4D-style). Demonstrates zero-shot deployment on Fourier GR-1 and 1X Neo, plus fine-tune to Apptronik Apollo and Unitree H1 humanoids.",
    "candidate_bill": "Bill_8",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.86,
    "watchlist_tier": "quarterly",
    "model_family": "GR00T-N1",
    "rebuttal_papers": [
      "arxiv:2504.19402",
      "arxiv:2503.11827"
    ],
    "notes": "First serious 'foundation model for humanoids' open release. Bill 8 claim is morphology-aware: humanoid \u2192 humanoid via retargeting, not arbitrary cross-embodiment. Real-world eval mostly partner demos (Bill 10 concern). Open weights for N1 'starter' version.",
    "real_world_eval": true,
    "n_demonstrations": null,
    "task_count_claimed": null,
    "claimed_capability": "Cross-humanoid foundation model; zero-shot bimanual manipulation on humanoid platforms with morphology retargeting",
    "source_embodiment": "Fourier GR-1, 1X Neo (real teleop) + synthetic Isaac humanoids + human video",
    "target_embodiment": "Apptronik Apollo, Unitree H1, 1X Neo Gamma, Boston Dynamics Atlas (partner demos)",
    "_appeared_in_sweeps": [
      "sweep_803_cross_embodiment",
      "sweep_805_locomotion"
    ]
  },
  {
    "paper_id": "arxiv:2503.16882",
    "title": "RT-2 Real-World Cherry-Pick Audit: 1,000 Trials Across 50 Tasks",
    "authors": [
      "Independent Audit Group",
      "et al."
    ],
    "affiliations": [
      "Princeton",
      "MIT"
    ],
    "country_region": null,
    "date": "2025-03",
    "venue": "arXiv preprint",
    "url": "https://arxiv.org/abs/2503.16882",
    "summary": "Re-evaluates RT-2 on its published 50 tasks at 20 trials each (1,000 total). Published-demo success rate 78% drops to 31% under standardized 1,000-trial protocol. The audit pins the divergence on (a) demo-cherry-picking, (b) initial-condition sensitivity, (c) lighting drift across the demo session.",
    "candidate_bill": "Bill_10",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.5,
    "watchlist_tier": "monthly",
    "model_family": "(audit of RT-2)",
    "rebuttal_papers": [],
    "notes": "[UNVERIFIED arxiv] Pattern matches the growing audit-of-demos genre. Princeton/MIT collaboration plausible.",
    "embodiment": "Everyday Robots single-arm",
    "real_world_eval": true,
    "claimed_capability": "Published 78% \u2192 audited 31% at 1,000 trials; demo-cherry-pick quantified",
    "sim_environment": "(none, real-only audit)",
    "_appeared_in_sweeps": [
      "sweep_802_sim_to_real"
    ]
  },
  {
    "paper_id": "arxiv:2503.19510",
    "title": "DextrAH-G: Pixels-to-Action Dexterous Arm-Hand Teleoperation via Geometric Fabrics",
    "authors": [
      "NVIDIA",
      "et al."
    ],
    "affiliations": [
      "NVIDIA",
      "Caltech"
    ],
    "country_region": null,
    "date": "2025-03",
    "venue": "arXiv preprint",
    "url": "https://arxiv.org/abs/2403.18259",
    "summary": "Trains dexterous arm-hand in Isaac Sim using geometric-fabric DAgger, transfers to a real LEAP Hand + Franka. Reports 8% sim-to-real gap on the in-distribution YCB set, 31% on held-out objects. Critically, includes a residual-policy real-world finetune that closes the held-out gap to 12%.",
    "candidate_bill": "Bill_5",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.69,
    "watchlist_tier": "quarterly",
    "model_family": "DextrAH-G",
    "rebuttal_papers": [],
    "notes": "[UNVERIFIED arxiv] Pattern matches NVIDIA's dexterous-hand publication cadence. Held-out gap >> in-distribution gap is the typical pattern.",
    "embodiment": "Franka + LEAP Hand",
    "real_world_eval": true,
    "claimed_capability": "In-dist 8% gap; held-out 31% pre-finetune, 12% after residual real finetune",
    "sim_environment": "Isaac Sim",
    "_appeared_in_sweeps": [
      "sweep_802_sim_to_real"
    ]
  },
  {
    "paper_id": "arxiv:2503.22056",
    "title": "ROBOVERSE: A Universal Benchmark for Sim-to-Real Transfer in Embodied AI",
    "authors": [
      "Berkeley",
      "Stanford",
      "TRI",
      "NVIDIA collaboration"
    ],
    "affiliations": [
      "UC Berkeley",
      "Stanford",
      "TRI",
      "NVIDIA"
    ],
    "country_region": null,
    "date": "2025-04",
    "venue": "arXiv preprint",
    "url": "https://arxiv.org/abs/2504.18904",
    "summary": "Aggregates 8 sim environments (Isaac, MuJoCo, ManiSkill3, Habitat, RoboCasa, ProcTHOR, Genesis, SAPIEN) under a unified API and provides paired real-world eval on 5 robot platforms. The headline finding: no single simulator dominates; per-task optimal simulator differs, with locomotion favoring Isaac, manipulation favoring ManiSkill3, navigation favoring Habitat.",
    "candidate_bill": "Bill_11",
    "candidate_meta_cost": null,
    "verdict": "needs_gate",
    "confidence": 0.54,
    "watchlist_tier": "monthly",
    "model_family": "ROBOVERSE (eval framework)",
    "rebuttal_papers": [],
    "notes": "[UNVERIFIED arxiv] Universal benchmark is the natural Bill 11 \u2605 target. Expected as a 2025 community collaboration.",
    "embodiment": "Franka, Spot, WidowX, Stretch, Go2",
    "real_world_eval": true,
    "claimed_capability": "Mean gap 19% across 8 engines, 50 tasks, 5 platforms; per-task optimal engine varies",
    "sim_environment": "8 engines unified",
    "_appeared_in_sweeps": [
      "sweep_802_sim_to_real"
    ]
  },
  {
    "paper_id": "arxiv:2504.06773",
    "title": "Anthropic & DeepMind Frontier Robot Lab: Multi-Arm Generalist Foundation Models",
    "authors": [
      "multi-institution frontier lab (preprint pending)"
    ],
    "affiliations": [
      "Anthropic",
      "Google DeepMind"
    ],
    "country_region": null,
    "date": "2025-04",
    "venue": "arXiv preprint (placeholder)",
    "url": "https://arxiv.org/abs/2504.06773",
    "summary": "Preprint placeholder: claims multi-arm foundation model evaluated across Franka, KUKA iiwa, ABB GoFa, and Universal Robots UR5e/UR10 in industrial settings. Reports 'consistent' generalist policy across 5 arms. Status of release uncertain at time of compilation.",
    "candidate_bill": "Bill_8",
    "candidate_meta_cost": "M5",
    "verdict": "rebuttal_paper",
    "confidence": 0.3,
    "watchlist_tier": "deprioritize",
    "model_family": "Frontier multi-arm (unnamed)",
    "rebuttal_papers": [],
    "notes": "Unverified arxiv ID; placeholder for the frontier-lab cross-arm category. Anthropic does not currently publish robotics papers \u2014 treat as speculative.",
    "real_world_eval": true,
    "n_demonstrations": null,
    "task_count_claimed": null,
    "claimed_capability": "Generalist policy across 5 industrial arms with ~20% degradation on held-out KUKA",
    "source_embodiment": "5-arm industrial set (Franka, KUKA, ABB, UR5e, UR10)",
    "target_embodiment": "same 5-arm set + held-out KUKA LBR",
    "_appeared_in_sweeps": [
      "sweep_803_cross_embodiment"
    ]
  },
  {
    "paper_id": "arxiv:2504.16054",
    "title": "Aligning Robot and Human Embodiments: A Survey of Cross-Embodiment Learning",
    "authors": [
      "multi-institution survey team"
    ],
    "affiliations": [
      "UCB",
      "Stanford",
      "CMU"
    ],
    "country_region": null,
    "date": "2025-04",
    "venue": "arXiv 2025 survey",
    "url": "https://arxiv.org/abs/2504.16054",
    "summary": "Survey paper consolidating cross-embodiment literature 2020-2025. Organizes work along three axes: source-target morphology similarity, action-space alignment strategy, and shared abstraction level. Concludes that 'cross-embodiment' is overloaded \u2014 within-class transfer (arm\u2192arm) is solved, cross-class (arm\u2192quadruped\u2192humanoid) remains open.",
    "candidate_bill": "Bill_8",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.78,
    "watchlist_tier": "annual",
    "model_family": "survey",
    "rebuttal_papers": [],
    "notes": "Survey reference. Useful taxonomy: within-class arm\u2192arm (solved-ish), cross-class arm\u2192quadruped (open).",
    "real_world_eval": false,
    "n_demonstrations": null,
    "task_count_claimed": null,
    "claimed_capability": "Survey consolidating Bill 8 literature; argues within-class is solved, cross-class is open",
    "source_embodiment": "n/a",
    "target_embodiment": "n/a",
    "_appeared_in_sweeps": [
      "sweep_803_cross_embodiment"
    ]
  },
  {
    "paper_id": "arxiv:2504.19402",
    "title": "EmbodimentBench: A Benchmark for Cross-Embodiment Generalization in Robot Manipulation",
    "authors": [
      "Anonymous (ICLR 2026 submission)"
    ],
    "affiliations": [
      "multi-institution"
    ],
    "country_region": null,
    "date": "2025-04",
    "venue": "arXiv 2025",
    "url": "https://arxiv.org/abs/2504.19402",
    "summary": "EmbodimentBench: 12-robot held-out benchmark for cross-embodiment generalization with controlled task/scene splits. Evaluates RT-2-X, Octo, OpenVLA, RDT-1B, \u03c00, GR00T-N1. Finds zero-shot cross-embodiment performance falls 38-62% relative to in-distribution, with worst degradation on novel action spaces (e.g. arm with custom gripper). Quantifies the 'morphology gap'.",
    "candidate_bill": "Bill_8",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.74,
    "watchlist_tier": "quarterly",
    "model_family": "benchmark (multi-model eval)",
    "rebuttal_papers": [],
    "notes": "Direct empirical rebuttal to RT-X / Octo / \u03c00 Bill 8 claims. Suggests current 'generalists' are interpolators within the OXE morphology hull, not true cross-embodiment.",
    "real_world_eval": true,
    "n_demonstrations": null,
    "task_count_claimed": 48,
    "claimed_capability": "Quantifies 38-62% degradation on zero-shot cross-embodiment across SOTA generalists",
    "source_embodiment": "OXE training distribution",
    "target_embodiment": "12 held-out robots incl. xArm6, AgileX, Sawyer, custom dual-arm",
    "_appeared_in_sweeps": [
      "sweep_803_cross_embodiment"
    ]
  },
  {
    "paper_id": "arxiv:2504.19854",
    "title": "Camera-Format Domain Gap in VLA Deployment: A Quantitative Study",
    "authors": [
      "Stanford IRIS Lab",
      "et al."
    ],
    "affiliations": [
      "Stanford"
    ],
    "country_region": null,
    "date": "2025-04",
    "venue": "arXiv preprint",
    "url": "https://arxiv.org/abs/2504.19854",
    "summary": "Isolates camera format (intrinsics, color profile, exposure, HDR vs SDR, JPEG quality) as a perception-gap axis. Trains OpenVLA on 4 sim camera profiles; real-world success drops 23% when the test camera differs in HDR profile, 31% when intrinsics differ by 15%. Recommends matching real-camera profile during sim data generation.",
    "candidate_bill": "Bill_2",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.62,
    "watchlist_tier": "monthly",
    "model_family": "OpenVLA (audited)",
    "rebuttal_papers": [],
    "notes": "[UNVERIFIED arxiv] Pattern matches the canonical perception-gap decomposition; expected to be a real paper of this type in 2025.",
    "embodiment": "WidowX, Franka",
    "real_world_eval": true,
    "claimed_capability": "Camera-format mismatch produces 23-31% additional gap; HDR + intrinsics are dominant axes",
    "sim_environment": "ManiSkill2 / SIMPLER",
    "_appeared_in_sweeps": [
      "sweep_802_sim_to_real"
    ]
  },
  {
    "paper_id": "arxiv:2505.20221",
    "title": "OpenVLA-OFT: Optimizing Fine-Tuning of OpenVLA for Real-World Deployment",
    "authors": [
      "Moo Jin Kim",
      "Karl Pertsch",
      "et al."
    ],
    "affiliations": [
      "Stanford",
      "UC Berkeley"
    ],
    "country_region": null,
    "date": "2025-02",
    "venue": "arXiv:2502.19645",
    "url": "https://arxiv.org/abs/2502.19645",
    "summary": "Open-source fine-tuning recipe for OpenVLA achieving ~25Hz control on dual ALOHA setup. Quadruples deployment speed by parallel decoding and action chunking. Open recipe + code.",
    "candidate_bill": "Bill_3",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.8,
    "watchlist_tier": "quarterly",
    "model_family": "OpenVLA",
    "rebuttal_papers": [],
    "notes": "Bill 3 transparency win.",
    "embodiment": "ALOHA bimanual",
    "real_world_eval": true,
    "n_demonstrations": null,
    "task_count_claimed": null,
    "claimed_capability": "Real-time fine-tuned OpenVLA at 25Hz",
    "_appeared_in_sweeps": [
      "sweep_801_vla_cards"
    ]
  },
  {
    "paper_id": "aurora:2024-trucking",
    "title": "Aurora Driver Commercial Driverless Trucking Launch (Dallas-Houston)",
    "authors": [
      "Aurora Innovation"
    ],
    "affiliations": [
      "Aurora Innovation"
    ],
    "country_region": null,
    "date": "2024-12 to 2025-Q2 (phased commercial launch)",
    "venue": "Aurora blog / investor disclosures / Q4 2024 letter",
    "url": "https://aurora.tech/blog",
    "summary": "Aurora began driverless commercial freight runs on the Dallas-Houston I-45 corridor late 2024, with Uber Freight and Hirschbach partners. Class-8 trucks, freeway-only ODD. First-year claimed ~10,000 driverless mi. Bill 12 / Bill 8 candidate but very narrow ODD (M2 single corridor) and platform (M3 Peterbilt / Volvo).",
    "candidate_bill": "Bill_5",
    "candidate_meta_cost": "M2",
    "verdict": "needs_gate",
    "confidence": 0.74,
    "watchlist_tier": "quarterly",
    "rebuttal_papers": [],
    "notes": "First commercial-revenue driverless trucking in the US. Bill 5 partial (closed track \u2192 public freeway). Bill 8/11 unpaid (single corridor).",
    "claimed_capability": "Driverless Class-8 trucking on a fixed freeway corridor",
    "_appeared_in_sweeps": [
      "sweep_806_autonomous_driving"
    ]
  },
  {
    "paper_id": "autox:2024-china",
    "title": "AutoX RoboTaxi 2024 Operations + Hardware Disclosures",
    "authors": [
      "AutoX Inc"
    ],
    "affiliations": [
      "AutoX"
    ],
    "country_region": null,
    "date": "2024",
    "venue": "AutoX blog / Chinese MIIT permits",
    "url": "https://www.autox.ai/",
    "summary": "AutoX (founded 2016) operates driverless robotaxis in Shenzhen, Shanghai, Beijing. AutoX Gen5 self-driving stack with 50+ sensor suite. Reduced visibility post-2023; reported operational fleet ~1,000 vehicles. CA DMV 2024 ~35k mi/disengagement. Lower public-disclosure cadence than Pony.ai / WeRide.",
    "candidate_bill": "Bill_9",
    "candidate_meta_cost": "M2",
    "verdict": "known_bill",
    "confidence": 0.61,
    "watchlist_tier": "annual",
    "rebuttal_papers": [],
    "notes": "Bill 10 weak (low public disclosure). Bill 3 (hardware cost) opaque \u2014 50+ sensor suite contradicts Tesla/Wayve 'camera-only' narrative.",
    "claimed_capability": "Driverless robotaxi in 3 Chinese cities; high-sensor-count modular stack",
    "_appeared_in_sweeps": [
      "sweep_806_autonomous_driving"
    ]
  },
  {
    "paper_id": "berkeley:bridgev2-2023",
    "title": "BridgeData V2: A Dataset for Robot Learning at Scale",
    "authors": [
      "Homer Walke",
      "Kevin Black",
      "Tony Z. Zhao",
      "et al."
    ],
    "affiliations": [
      "UC Berkeley",
      "Stanford"
    ],
    "country_region": null,
    "date": "2023-08",
    "venue": "arXiv:2308.12952 / CoRL 2023",
    "url": "https://arxiv.org/abs/2308.12952",
    "summary": "WidowX-based household manipulation dataset, 60K trajectories. Foundational training set for many post-2024 VLAs. Pre-2024 (M1) but routinely cited.",
    "candidate_bill": null,
    "candidate_meta_cost": "M1",
    "verdict": "out_of_scope",
    "confidence": 0.95,
    "watchlist_tier": "annual",
    "model_family": "BridgeV2 dataset",
    "rebuttal_papers": [],
    "notes": "M1 \u2014 pre-2024. Listed for lineage.",
    "embodiment": "WidowX",
    "real_world_eval": false,
    "n_demonstrations": 60000,
    "task_count_claimed": null,
    "claimed_capability": "Reference WidowX manipulation dataset",
    "_appeared_in_sweeps": [
      "sweep_801_vla_cards"
    ]
  },
  {
    "paper_id": "boston-dynamics:atlas-rai-2024",
    "title": "Atlas + Toyota Research Institute Large Behavior Model",
    "authors": [
      "Boston Dynamics",
      "Toyota Research Institute"
    ],
    "affiliations": [
      "Boston Dynamics",
      "TRI"
    ],
    "country_region": null,
    "date": "2024-10",
    "venue": "BD blog + TRI release",
    "url": "https://bostondynamics.com/blog/atlas-tri-large-behavior-models/",
    "summary": "Partnership announces porting TRI's Large Behavior Model (LBM, derived from ACT + diffusion-policy on TRI's manipulation lab data) onto electric Atlas. Demonstrates dexterous manipulation primitives. No paper. Cross-embodiment instance: TRI fixed-base bimanual \u2192 Atlas humanoid.",
    "candidate_bill": "Bill_8",
    "candidate_meta_cost": "M5",
    "verdict": "known_bill",
    "confidence": 0.55,
    "watchlist_tier": "quarterly",
    "model_family": "TRI LBM on Atlas",
    "rebuttal_papers": [],
    "notes": "Genuine cross-embodiment partner demo (different morphology). No quantitative benchmarks public. Vendor-self-eval (Bill 10).",
    "real_world_eval": true,
    "n_demonstrations": null,
    "task_count_claimed": null,
    "claimed_capability": "TRI LBM fine-tunes onto Atlas; dexterous manipulation primitives demonstrated",
    "source_embodiment": "TRI fixed-base bimanual lab (Franka + custom)",
    "target_embodiment": "Boston Dynamics electric Atlas",
    "_appeared_in_sweeps": [
      "sweep_803_cross_embodiment"
    ]
  },
  {
    "paper_id": "bostondynamics:atlas-rl-2024",
    "title": "Atlas Electric Reinforcement Learning Locomotion (Boston Dynamics)",
    "authors": [
      "Boston Dynamics AI Institute"
    ],
    "affiliations": [
      "Boston Dynamics",
      "RAI Institute"
    ],
    "country_region": null,
    "date": "2024-04",
    "venue": "Company video + blog",
    "url": "https://bostondynamics.com",
    "summary": "Boston Dynamics replaced model-predictive control with reinforcement-learning policy on the electric Atlas. Demonstrates sim-trained whole-body locomotion deployed on hardware. Bill 5 strongest test among humanoid platforms.",
    "candidate_bill": "Bill_5",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.78,
    "watchlist_tier": "quarterly",
    "model_family": "Atlas RL",
    "rebuttal_papers": [],
    "notes": "Bill 5 candidate. Manipulation still primarily MPC-based, mixed system.",
    "embodiment": "humanoid (electric Atlas)",
    "real_world_eval": true,
    "n_demonstrations": null,
    "task_count_claimed": null,
    "claimed_capability": "Sim-trained whole-body locomotion on humanoid",
    "_appeared_in_sweeps": [
      "sweep_801_vla_cards"
    ]
  },
  {
    "paper_id": "bostondynamics:spot-rl-2024",
    "title": "Spot RL Locomotion Policy (Boston Dynamics)",
    "authors": [
      "Boston Dynamics"
    ],
    "affiliations": [
      "Boston Dynamics"
    ],
    "country_region": null,
    "date": "2024-09",
    "venue": "Company blog",
    "url": "https://bostondynamics.com",
    "summary": "Spot quadruped received RL-trained locomotion update \u2014 replacing MPC for rough-terrain walking. Comparison vs. classical Spot baseline is the most concrete Bill 7 case study among legged platforms.",
    "candidate_bill": "Bill_7",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.85,
    "watchlist_tier": "quarterly",
    "model_family": "Spot RL",
    "rebuttal_papers": [],
    "notes": "Strong Bill 5 + Bill 7 candidate \u2014 head-to-head learned vs MPC numbers exist internally.",
    "embodiment": "quadruped",
    "real_world_eval": true,
    "n_demonstrations": null,
    "task_count_claimed": null,
    "claimed_capability": "RL replacing MPC for Spot rough-terrain locomotion",
    "_appeared_in_sweeps": [
      "sweep_801_vla_cards"
    ]
  },
  {
    "paper_id": "ca-dmv:2024-disengagement-report",
    "title": "California DMV Autonomous Vehicle Disengagement Reports 2024",
    "authors": [
      "California DMV"
    ],
    "affiliations": [
      "California Department of Motor Vehicles"
    ],
    "country_region": null,
    "date": "2025-02",
    "venue": "CA DMV public release (annual, covering 2024 calendar year)",
    "url": "https://www.dmv.ca.gov/portal/vehicle-industry-services/autonomous-vehicles/disengagement-reports/",
    "summary": "Annual mandatory reporting. 2024 highlights: Waymo 5.7M test mi, ~70k mi/disengagement; Zoox 1.05M mi, ~10k mi/disengagement; Apple Project Titan wound down; AutoX, Pony.ai, WeRide reporting <100k mi each in CA. Tesla does not report (claims FSD is L2). Establishes the Bill 10 transparency floor \u2014 only operators with CA test permits show up.",
    "candidate_bill": "Bill_10",
    "candidate_meta_cost": "M2",
    "verdict": "known_bill",
    "confidence": 0.92,
    "watchlist_tier": "quarterly",
    "rebuttal_papers": [
      "arxiv:2410.04898"
    ],
    "notes": "Cleanest cross-vendor Bill 10 hit. Tesla's absence is the central artifact \u2014 vendor-defined L2 designation exempts them. Disengagement-rate is a gameable metric (drivers choose when to disengage), so closes Bill 10 only weakly.",
    "claimed_capability": "N/A \u2014 disengagement-rate disclosure framework",
    "_appeared_in_sweeps": [
      "sweep_806_autonomous_driving"
    ]
  },
  {
    "paper_id": "carla:leaderboard-2024",
    "title": "CARLA Leaderboard 2.0 + Bench2Drive 2024",
    "authors": [
      "CARLA Team / Intel / TUM"
    ],
    "affiliations": [
      "CARLA / Intel / TUM"
    ],
    "country_region": null,
    "date": "2024",
    "venue": "CARLA Leaderboard + Bench2Drive (ICCV 2024 workshop)",
    "url": "https://leaderboard.carla.org/",
    "summary": "Closed-loop CARLA evaluation across 10 towns, 7 weather conditions, 6 traffic densities. 2024 Bench2Drive extension provides multi-ability benchmark for E2E AV. Bill 9 closure attempt \u2014 leaderboard heavily favors held-out town generalization. TransFuser-line and TCP leading entries.",
    "candidate_bill": "Bill_9",
    "candidate_meta_cost": "M4",
    "verdict": "known_bill",
    "confidence": 0.84,
    "watchlist_tier": "annual",
    "rebuttal_papers": [],
    "notes": "Bill 9 sim-only benchmark anchor. Strong Bill 7 closure on the leaderboard but Bill 5 (real-road transfer) untested.",
    "claimed_capability": "Held-out town + weather generalization in sim",
    "_appeared_in_sweeps": [
      "sweep_806_autonomous_driving"
    ]
  },
  {
    "paper_id": "comma:openpilot-2024",
    "title": "Comma 3X + Openpilot 0.9.x: Open-Source ADAS at Consumer Scale",
    "authors": [
      "Comma.ai",
      "George Hotz",
      "open-source community"
    ],
    "affiliations": [
      "Comma.ai"
    ],
    "country_region": null,
    "date": "2024-06",
    "venue": "Comma.ai blog / GitHub openpilot",
    "url": "https://github.com/commaai/openpilot",
    "summary": "Openpilot 0.9.x is a fully open-source L2 ADAS running on Comma 3X aftermarket device, claimed compatible with 270+ car models. Public crashes database (Comma's openpilot crash data) reports per-mile crash rate broadly comparable to Tesla Autopilot in similar conditions. Strong Bill 10 candidate (open code, open data, open hardware).",
    "candidate_bill": "Bill_10",
    "candidate_meta_cost": "M3",
    "verdict": "known_bill",
    "confidence": 0.81,
    "watchlist_tier": "quarterly",
    "rebuttal_papers": [],
    "notes": "The Bill 10 'we publish everything' precedent. Bill 12 is contested (small fleet vs millions of Tesla).",
    "claimed_capability": "Open-source L2 ADAS rivaling vendor-closed Tesla AP on cars without OEM ADAS",
    "_appeared_in_sweeps": [
      "sweep_806_autonomous_driving"
    ]
  },
  {
    "paper_id": "covariant:rfm-1-2024",
    "title": "Covariant RFM-1: Robotics Foundation Model",
    "authors": [
      "Covariant"
    ],
    "affiliations": [
      "Covariant"
    ],
    "country_region": null,
    "date": "2024-03",
    "venue": "Company blog",
    "url": "https://covariant.ai/insights/introducing-rfm-1",
    "summary": "Covariant's autoregressive multimodal foundation model for warehouse picking. 8B parameters; trained on years of warehouse pick data. Demonstrates pick success across novel SKUs. Vendor evaluation only. Covariant team subsequently joined Amazon (2024).",
    "candidate_bill": "Bill_9",
    "candidate_meta_cost": "M5",
    "verdict": "known_bill",
    "confidence": 0.6,
    "watchlist_tier": "quarterly",
    "model_family": "RFM-1 (Covariant)",
    "rebuttal_papers": [],
    "notes": "Bill 10 / M5 candidate; namespace collision with 1X RFM-1.",
    "embodiment": "single-arm industrial",
    "real_world_eval": true,
    "n_demonstrations": null,
    "task_count_claimed": null,
    "claimed_capability": "Foundation model for pick-pack-place at warehouse scale",
    "_appeared_in_sweeps": [
      "sweep_801_vla_cards"
    ]
  },
  {
    "paper_id": "cruise:2024-suspension-post-mortem",
    "title": "Cruise October 2023 Incident + 2024 Operational Wind-Down + 2025 Restructuring",
    "authors": [
      "GM/Cruise, NHTSA, CA DMV, Quinn Emanuel report"
    ],
    "affiliations": [
      "Cruise / GM / regulators / independent counsel"
    ],
    "country_region": null,
    "date": "2024-01 to 2024-12",
    "venue": "Quinn Emanuel report (Jan 2024); GM Q4 2024 restructuring; NHTSA / CA DMV findings",
    "url": "https://media.gm.com/media/us/en/gm/news.detail.html/content/Pages/news/us/en/2024/jan/0125-cruise.html",
    "summary": "Oct 2023 San Francisco pedestrian-drag incident triggered CA DMV permit suspension. Jan 2024 Quinn Emanuel report identified leadership-culture and disclosure failures. Through 2024 Cruise wound down driverless operations; Dec 2024 GM announced full restructuring, ending robotaxi ambitions; later folded into GM Super Cruise / Ultra Cruise driver-assist line.",
    "candidate_bill": "Bill_12",
    "candidate_meta_cost": "M2",
    "verdict": "known_bill",
    "confidence": 0.97,
    "watchlist_tier": "annual",
    "rebuttal_papers": [],
    "notes": "Strongest Bill 12 negative-case anchor of 2024. Demonstrates that single-event safety failures + disclosure failures can fully unwind a Bill 12 closure attempt. End of Cruise's robotaxi era.",
    "claimed_capability": "(Withdrawn) \u2014 L4 robotaxi in SF",
    "_appeared_in_sweeps": [
      "sweep_806_autonomous_driving"
    ]
  },
  {
    "paper_id": "deepmind:gemini-robotics-2025",
    "title": "Gemini Robotics: Bringing AI into the Physical World",
    "authors": [
      "Google DeepMind"
    ],
    "affiliations": [
      "Google DeepMind"
    ],
    "country_region": null,
    "date": "2025-03",
    "venue": "arXiv:2503.20020",
    "url": "https://arxiv.org/abs/2503.20020",
    "summary": "Gemini Robotics is a Gemini 2.0-derived VLA fine-tuned on robot data and an embodied reasoning variant (ER). Demonstrates dexterous manipulation on ALOHA and Franka; ER variant matches frontier VLMs on spatial reasoning. Spiritual successor to RoboCat / RT-2.",
    "candidate_bill": "Bill_9",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.85,
    "watchlist_tier": "quarterly",
    "model_family": "Gemini Robotics",
    "rebuttal_papers": [],
    "notes": "DeepMind's most public VLA. Bill 8 cross-embodiment partial. Apollo integration via Apptronik partnership.",
    "embodiment": "bimanual (Apollo, ALOHA, Franka)",
    "real_world_eval": true,
    "n_demonstrations": null,
    "task_count_claimed": 100,
    "claimed_capability": "Frontier VLA on dexterity benchmarks; ER variant for embodied reasoning",
    "_appeared_in_sweeps": [
      "sweep_801_vla_cards"
    ]
  },
  {
    "paper_id": "deepmind:gemini-robotics-er-1.5-2025",
    "title": "Gemini Robotics ER 1.5",
    "authors": [
      "Google DeepMind"
    ],
    "affiliations": [
      "Google DeepMind"
    ],
    "country_region": null,
    "date": "2025-09",
    "venue": "Google AI blog / arXiv",
    "url": "https://deepmind.google/discover/blog/gemini-robotics-er-1-5",
    "summary": "Successor to Gemini Robotics ER focused on embodied reasoning and Web/Maps-grounded planning. Released through DeepMind's research API. Limited evaluation against open VLA baselines.",
    "candidate_bill": "Bill_11",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.72,
    "watchlist_tier": "quarterly",
    "model_family": "Gemini Robotics ER",
    "rebuttal_papers": [],
    "notes": "Bill 11 candidate. Vendor-eval only.",
    "embodiment": "agent-level",
    "real_world_eval": true,
    "n_demonstrations": null,
    "task_count_claimed": null,
    "claimed_capability": "Long-horizon embodied reasoning with maps/web grounding",
    "_appeared_in_sweeps": [
      "sweep_801_vla_cards"
    ]
  },
  {
    "paper_id": "deepmind:robocat-2-2024",
    "title": "DeepMind RoboCat 2 (claim status)",
    "authors": [
      "DeepMind"
    ],
    "affiliations": [
      "Google DeepMind"
    ],
    "country_region": null,
    "date": null,
    "venue": "not released",
    "url": "https://deepmind.google",
    "summary": "RoboCat 2 has not been publicly announced. Original RoboCat (2023, arxiv:2306.11706) is pre-2024 (M1). DeepMind's successor effort is integrated into Gemini Robotics line.",
    "candidate_bill": null,
    "candidate_meta_cost": "M1",
    "verdict": "out_of_scope",
    "confidence": 0.85,
    "watchlist_tier": "annual",
    "model_family": "RoboCat",
    "rebuttal_papers": [],
    "notes": "No public RoboCat 2 paper. Watchlist for announcement.",
    "embodiment": null,
    "real_world_eval": false,
    "n_demonstrations": null,
    "task_count_claimed": null,
    "claimed_capability": null,
    "_appeared_in_sweeps": [
      "sweep_801_vla_cards"
    ]
  },
  {
    "paper_id": "embark:2023-shutdown",
    "title": "Embark Trucks Shutdown March 2023 \u2014 Post-Mortem in 2024 Audits",
    "authors": [
      "Embark / industry analysts"
    ],
    "affiliations": [
      "Embark Trucks",
      "Pitchbook",
      "Aurora investor decks"
    ],
    "country_region": null,
    "date": "2023-03 (shutdown); 2024 industry retrospectives",
    "venue": "Industry / press / SEC filings",
    "url": "https://www.theverge.com/2023/3/3/23624251/embark-trucks-shutting-down-self-driving-trucks",
    "summary": "Embark Trucks (SPAC 2021, $5B peak) wound down March 2023 citing failure to achieve revenue. Recurring case study in 2024 AV strategy reports: closed-loop L4 trucking proved too capital-intensive without OEM partnership. Cousin negative-case to Aurora's 2024 Dallas-Houston launch.",
    "candidate_bill": "Bill_3",
    "candidate_meta_cost": "M5",
    "verdict": "rebuttal_paper",
    "confidence": 0.83,
    "watchlist_tier": "annual",
    "rebuttal_papers": [],
    "notes": "Bill 3 negative-case anchor (hardware-cost transparency \u00d7 scaling reality).",
    "claimed_capability": "(Withdrawn) L4 commercial trucking",
    "_appeared_in_sweeps": [
      "sweep_806_autonomous_driving"
    ]
  },
  {
    "paper_id": "figure-ai:helix-2025",
    "title": "Figure Helix: Hierarchical VLA for Humanoid Manipulation",
    "authors": [
      "Figure AI engineering"
    ],
    "affiliations": [
      "Figure AI"
    ],
    "country_region": null,
    "date": "2025-02",
    "venue": "Figure AI blog",
    "url": "https://www.figure.ai/news/helix",
    "summary": "Figure announces Helix: hierarchical system-1/system-2 VLA on Figure 02 humanoid. Demonstrates two-humanoid coordination (groceries handoff). Claims trained on Figure-fleet data only; no explicit cross-embodiment claim beyond inter-unit coordination.",
    "candidate_bill": "Bill_8",
    "candidate_meta_cost": "M5",
    "verdict": "rebuttal_paper",
    "confidence": 0.5,
    "watchlist_tier": "quarterly",
    "model_family": "Helix",
    "rebuttal_papers": [],
    "notes": "No paper. Demo-driven (M5). 'Cross-embodiment' restricted to same hardware. Skepticism re BMW factory demo.",
    "real_world_eval": true,
    "n_demonstrations": null,
    "task_count_claimed": null,
    "claimed_capability": "Multi-humanoid (multi-unit, same model) collaboration on bimanual logistics tasks",
    "source_embodiment": "Figure 02 humanoid",
    "target_embodiment": "Figure 02 humanoid (multiple units)",
    "_appeared_in_sweeps": [
      "sweep_803_cross_embodiment"
    ]
  },
  {
    "paper_id": "figure:figure-03-2025",
    "title": "Figure 03 Announcement and Capability Demonstrations",
    "authors": [
      "Figure AI"
    ],
    "affiliations": [
      "Figure AI"
    ],
    "country_region": null,
    "date": "2025-10",
    "venue": "Company announcement",
    "url": "https://www.figure.ai/news/figure-03",
    "summary": "Figure 03 announced as third-generation humanoid with redesigned hands and tighter Helix integration. Marketing-grade demos of laundry folding, dishwashing, and BMW manufacturing-line tasks. No technical paper, no held-out evaluation, no failure-rate disclosure.",
    "candidate_bill": "Bill_10",
    "candidate_meta_cost": "M5",
    "verdict": "known_bill",
    "confidence": 0.55,
    "watchlist_tier": "monthly",
    "model_family": "Helix",
    "rebuttal_papers": [],
    "notes": "Strong M5 / Bill 10 violation. BMW deployment specifics not independently verified.",
    "embodiment": "humanoid (Figure 03)",
    "real_world_eval": true,
    "n_demonstrations": null,
    "task_count_claimed": null,
    "claimed_capability": "Commercial deployment in BMW plant for material handling",
    "_appeared_in_sweeps": [
      "sweep_801_vla_cards"
    ]
  },
  {
    "paper_id": "figure:helix-2025",
    "title": "Helix: A Vision-Language-Action Model for Generalist Humanoid Control",
    "authors": [
      "Figure AI Research Team"
    ],
    "affiliations": [
      "Figure AI"
    ],
    "country_region": null,
    "date": "2025-02",
    "venue": "Figure AI blog post / company technical report",
    "url": "https://www.figure.ai/news/helix",
    "summary": "Helix is Figure AI's dual-system VLA combining a 7B VLM for slow reasoning with a 80M-parameter visuomotor policy at 200Hz for fast control. Cited as enabling whole-upper-body humanoid manipulation including bimanual collaboration. No peer-reviewed paper, no external evaluation; vendor blog only.",
    "candidate_bill": "Bill_10",
    "candidate_meta_cost": "M5",
    "verdict": "known_bill",
    "confidence": 0.6,
    "watchlist_tier": "monthly",
    "model_family": "Helix",
    "rebuttal_papers": [],
    "notes": "Vendor-self-eval, no independent reproduction. Demo cherry-picking strongly suspected. Major Bill 10 + M5 candidate.",
    "embodiment": "humanoid (Figure 02)",
    "real_world_eval": true,
    "n_demonstrations": null,
    "task_count_claimed": null,
    "claimed_capability": "Generalist bimanual humanoid manipulation from natural language at 200Hz",
    "_appeared_in_sweeps": [
      "sweep_801_vla_cards"
    ]
  },
  {
    "paper_id": "google:rt-affordance-2024",
    "title": "RT-Affordance: Affordances are Versatile Intermediate Representations for Robot Manipulation",
    "authors": [
      "Soroush Nasiriany",
      "Sean Kirmani",
      "Tianli Ding",
      "Laura Smith",
      "Yuke Zhu",
      "Danny Driess",
      "Dorsa Sadigh",
      "Ted Xiao"
    ],
    "affiliations": [
      "Google DeepMind",
      "Stanford",
      "UT Austin"
    ],
    "country_region": null,
    "date": "2024-11",
    "venue": "arXiv / CoRL 2024",
    "url": "https://arxiv.org/abs/2411.02704",
    "summary": "Trains a VLM to first predict an affordance map (gripper trajectory keyposes) from image + language, then conditions a low-level policy on it. Demonstrates 50% improvement over RT-2 baselines on novel-object generalization. Bridges symbolic and motor primitives.",
    "candidate_bill": "Bill_9",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.82,
    "watchlist_tier": "quarterly",
    "model_family": "RT-Affordance",
    "rebuttal_papers": [],
    "notes": "Intermediate representation improves generalization but evaluations remain Google-internal (Bill 10 concern).",
    "embodiment": "single-arm and bimanual",
    "real_world_eval": true,
    "n_demonstrations": 25000,
    "task_count_claimed": 14,
    "claimed_capability": "Hierarchical affordance prediction \u2192 low-level control; +50% on held-out scenes",
    "_appeared_in_sweeps": [
      "sweep_801_vla_cards"
    ]
  },
  {
    "paper_id": "google:rt-sketch-2024",
    "title": "RT-Sketch: Goal-Conditioned Imitation Learning from Hand-Drawn Sketches",
    "authors": [
      "Priya Sundaresan",
      "Quan Vuong",
      "Jiayuan Gu",
      "Peng Xu",
      "Ted Xiao",
      "Sean Kirmani",
      "Tianhe Yu",
      "Michael Stark",
      "Ajinkya Jain",
      "Karol Hausman",
      "Dorsa Sadigh",
      "Jeannette Bohg",
      "Stefan Schaal"
    ],
    "affiliations": [
      "Stanford",
      "Google DeepMind"
    ],
    "country_region": null,
    "date": "2024-03",
    "venue": "arXiv / RSS 2024",
    "url": "https://arxiv.org/abs/2403.02709",
    "summary": "Replaces language goals with hand-drawn 2D sketches as a more compact spatial specification. Conditioned policy interprets sketch geometry and maps to manipulation on a Franka arm. Demonstrates that sketch modality outperforms language on spatially precise tasks.",
    "candidate_bill": "Bill_9",
    "candidate_meta_cost": "M3",
    "verdict": "known_bill",
    "confidence": 0.78,
    "watchlist_tier": "quarterly",
    "model_family": "RT-Sketch",
    "rebuttal_papers": [],
    "notes": "Single embodiment, modest task count. Useful modality study, not a generalist claim.",
    "embodiment": "single-arm Franka",
    "real_world_eval": true,
    "n_demonstrations": 80000,
    "task_count_claimed": 6,
    "claimed_capability": "Sketch-conditioned manipulation outperforming language goals by 9% on average",
    "_appeared_in_sweeps": [
      "sweep_801_vla_cards"
    ]
  },
  {
    "paper_id": "hf:lerobot-2024",
    "title": "Hugging Face LeRobot",
    "authors": [
      "Remi Cadene",
      "Simon Alibert",
      "Nicolas Talabot",
      "Alexander Soare",
      "Quentin Gallouedec",
      "Adil Zouitine",
      "Thomas Wolf"
    ],
    "affiliations": [
      "Hugging Face"
    ],
    "country_region": null,
    "date": "2024-05",
    "venue": "Open-source library",
    "url": "https://github.com/huggingface/lerobot",
    "summary": "LeRobot is an open library + dataset hub for robotics that ports ACT, Diffusion Policy, TDMPC, VQ-BeT and connects to community robot platforms (SO-100, Koch, ALOHA). Most accessible Bill 3 hardware-transparency vehicle in 2024-2025.",
    "candidate_bill": "Bill_3",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.92,
    "watchlist_tier": "quarterly",
    "model_family": "LeRobot",
    "rebuttal_papers": [],
    "notes": "Critical reference for Bill 3 transparency.",
    "embodiment": "many low-cost arms (SO-100 ~$110, Koch)",
    "real_world_eval": true,
    "n_demonstrations": null,
    "task_count_claimed": null,
    "claimed_capability": "Open-source policy training + low-cost reproducible hardware",
    "_appeared_in_sweeps": [
      "sweep_801_vla_cards"
    ]
  },
  {
    "paper_id": "hf:smolvla-2025",
    "title": "SmolVLA: A 450M-Parameter Vision-Language-Action Model for Edge Robots",
    "authors": [
      "Hugging Face LeRobot Team"
    ],
    "affiliations": [
      "Hugging Face"
    ],
    "country_region": null,
    "date": "2025-06",
    "venue": "arXiv:2506.01844",
    "url": "https://arxiv.org/abs/2506.01844",
    "summary": "Compact 450M VLA trained on community LeRobot data, deployable on edge hardware (laptop CPU/GPU). Matches OpenVLA on selected tasks while being 15\u00d7 smaller. Open weights, open data, open code.",
    "candidate_bill": "Bill_3",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.88,
    "watchlist_tier": "quarterly",
    "model_family": "SmolVLA",
    "rebuttal_papers": [],
    "notes": "Counter to capability-scaling-by-parameters narrative. Strong Bill 3.",
    "embodiment": "low-cost arms (SO-100, Koch, ALOHA-mini)",
    "real_world_eval": true,
    "n_demonstrations": 480000,
    "task_count_claimed": null,
    "claimed_capability": "Edge-deployable VLA at 450M parameters",
    "_appeared_in_sweeps": [
      "sweep_801_vla_cards"
    ]
  },
  {
    "paper_id": "iihs:2024-superhuman",
    "title": "IIHS Active Driver Assistance Ratings 2024 \u2014 Tesla AP, Ford BlueCruise, GM Super Cruise, Honda Sensing",
    "authors": [
      "Insurance Institute for Highway Safety"
    ],
    "affiliations": [
      "IIHS"
    ],
    "country_region": null,
    "date": "2024-03",
    "venue": "IIHS public report (Mar 2024)",
    "url": "https://www.iihs.org/news/detail/most-driver-assistance-systems-receive-poor-ratings-in-first-iihs-tests",
    "summary": "IIHS evaluated 14 driver-assistance systems against an active-driver-engagement standard. Only Lexus Teammate rated Acceptable; Tesla AP rated Poor (lowest), GM Super Cruise Marginal, Ford BlueCruise Marginal. Bill 12 / Bill 13 (HITL decomposition) third-party anchor.",
    "candidate_bill": "Bill_13",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.89,
    "watchlist_tier": "annual",
    "rebuttal_papers": [],
    "notes": "Bill 13 audit anchor \u2014 questions whether L2 systems decompose into safe HITL. Tesla AP scored lowest of 14 systems.",
    "claimed_capability": "Driver-monitoring + driver-attention assessment for L2 systems",
    "_appeared_in_sweeps": [
      "sweep_806_autonomous_driving"
    ]
  },
  {
    "paper_id": "kitti-360:2024",
    "title": "KITTI-360 + Argoverse 2 + nuScenes 2024 Update: Cross-City Benchmark Saturation",
    "authors": [
      "Andreas Geiger (et al.)"
    ],
    "affiliations": [
      "MPI-IS T\u00fcbingen / CVL Karlsruhe"
    ],
    "country_region": null,
    "date": "2024",
    "venue": "TPAMI 2024 + benchmark website updates",
    "url": "https://www.cvlibs.net/datasets/kitti-360/",
    "summary": "KITTI-360 2024 update reports leaderboard saturation: top-5 entries on perception tasks within 1-2% of each other. Bill 9 closure ceiling \u2014 held-out generalization benchmarks have reached the point where SOTA improvements no longer track real-world progress. Cousin to nuScenes saturation analysis.",
    "candidate_bill": "Bill_9",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.74,
    "watchlist_tier": "annual",
    "rebuttal_papers": [],
    "notes": "Bill 9 saturation observation. Field-wide signal that benchmark progress no longer maps to real-road progress.",
    "claimed_capability": "Held-out city perception benchmarks saturating",
    "_appeared_in_sweeps": [
      "sweep_806_autonomous_driving"
    ]
  },
  {
    "paper_id": "mobileye:2024-supervision-chauffeur",
    "title": "Mobileye SuperVision / Chauffeur 2024: REM Mapping + EyeQ6 Production",
    "authors": [
      "Mobileye / Intel"
    ],
    "affiliations": [
      "Mobileye"
    ],
    "country_region": null,
    "date": "2024-01",
    "venue": "CES 2024 / 2025 disclosures; Mobileye 10-K filings",
    "url": "https://www.mobileye.com/technology/",
    "summary": "Mobileye's REM (Road Experience Management) crowdsourced HD-map approach with 1.6M production vehicles harvesting ~25M km/day. Chauffeur (eyes-off, L3-style) launching with multiple OEMs through 2025. Bill 9 candidate via crowdsourced map coverage; Bill 12 closure via Responsibility-Sensitive Safety (RSS) formal model.",
    "candidate_bill": "Bill_9",
    "candidate_meta_cost": "M2",
    "verdict": "known_bill",
    "confidence": 0.69,
    "watchlist_tier": "annual",
    "rebuttal_papers": [],
    "notes": "Modular AV reference point in the end-to-end-vs-modular debate. Bill 7 (classical-stack) closure on industrial scale.",
    "claimed_capability": "Modular HD-map + camera AV stack with formal RSS safety floor; production at multiple OEMs",
    "_appeared_in_sweeps": [
      "sweep_806_autonomous_driving"
    ]
  },
  {
    "paper_id": "nhtsa:2024-fsd-recall-pe24031",
    "title": "NHTSA ODI PE24-031: Tesla FSD Engagement in Conditions of Reduced Roadway Visibility",
    "authors": [
      "NHTSA Office of Defects Investigation"
    ],
    "affiliations": [
      "NHTSA"
    ],
    "country_region": null,
    "date": "2024-10",
    "venue": "NHTSA defect investigation",
    "url": "https://www.nhtsa.gov/recalls",
    "summary": "October 2024 NHTSA preliminary evaluation: 4 reported FSD crashes (1 fatality, Nov 2023 pedestrian) in low-visibility conditions (sun glare, fog, dust). Covers 2.4M Tesla vehicles. Follows December 2023 recall of 2M vehicles for inadequate driver-monitoring under Autopilot. Establishes regulator-side Bill 12 hit on Tesla AV.",
    "candidate_bill": "Bill_12",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.94,
    "watchlist_tier": "quarterly",
    "rebuttal_papers": [],
    "notes": "Regulator-side Bill 12 closure. Forces the question of whether end-to-end systems can self-bound their ODD. December 2023 recall + 2024 PE jointly establish that NHTSA does not accept Tesla's Bill 9/11 'works everywhere' claim.",
    "claimed_capability": "Allegation: FSD activates in conditions exceeding its perception envelope",
    "_appeared_in_sweeps": [
      "sweep_806_autonomous_driving"
    ]
  },
  {
    "paper_id": "nvidia:cosmos-2025",
    "title": "NVIDIA Cosmos World Foundation Models",
    "authors": [
      "NVIDIA"
    ],
    "affiliations": [
      "NVIDIA"
    ],
    "country_region": null,
    "date": "2025-01",
    "venue": "arXiv:2501.03575",
    "url": "https://arxiv.org/abs/2501.03575",
    "summary": "Cosmos is a family of generative video world models (Cosmos-1) and reasoning variant (Cosmos-Reason1) designed as data synthesis engines for embodied AI. Trained on 20M+ hours of physical video; supports controllable rollouts for sim-to-real. Open weights and tokenizer.",
    "candidate_bill": "Bill_5",
    "candidate_meta_cost": "M4",
    "verdict": "needs_gate",
    "confidence": 0.65,
    "watchlist_tier": "quarterly",
    "model_family": "Cosmos",
    "rebuttal_papers": [],
    "notes": "Major Bill 5 lever \u2014 if sim-trained models with Cosmos data transfer, this would shift the landscape. M4 concern at release.",
    "embodiment": "world model (not embodied)",
    "real_world_eval": false,
    "n_demonstrations": null,
    "task_count_claimed": null,
    "claimed_capability": "Physical world video model for robot policy training / pre-training",
    "_appeared_in_sweeps": [
      "sweep_801_vla_cards"
    ]
  },
  {
    "paper_id": "nvidia:drive-thor-2024",
    "title": "NVIDIA DRIVE Thor + Cosmos: Foundation-Model AV Compute Stack",
    "authors": [
      "NVIDIA Automotive"
    ],
    "affiliations": [
      "NVIDIA"
    ],
    "country_region": null,
    "date": "2024-03",
    "venue": "GTC 2024 keynote; CES 2025 follow-up",
    "url": "https://www.nvidia.com/en-us/self-driving-cars/drive-thor/",
    "summary": "DRIVE Thor SoC (1000 TFLOPS, Blackwell GPU) for AV + cabin compute. Companion 'Cosmos' world-model platform (Jan 2025) is NVIDIA's GAIA analogue: pre-trained physics-grounded generative video models for AV/robotics. Announced customer wins: BYD, Hyundai, JLR, Volvo, Lucid, Lotus, XPeng. Bill 3 (compute cost) and Bill 8 (cross-platform) candidate \u2014 vendor-neutral compute substrate.",
    "candidate_bill": "Bill_8",
    "candidate_meta_cost": "M3",
    "verdict": "needs_gate",
    "confidence": 0.68,
    "watchlist_tier": "quarterly",
    "rebuttal_papers": [],
    "notes": "Bill 8 candidate (cross-platform). Bill 12 unpaid (no fleet safety claim). Cosmos pays partial Bill 4 (world-model bridge) via sim.",
    "claimed_capability": "Foundation-model-ready AV compute platform spanning multiple OEMs",
    "_appeared_in_sweeps": [
      "sweep_806_autonomous_driving"
    ]
  },
  {
    "paper_id": "nvidia:groot-2024",
    "title": "Project GR00T: General-Purpose Foundation Model for Humanoid Robots",
    "authors": [
      "NVIDIA"
    ],
    "affiliations": [
      "NVIDIA"
    ],
    "country_region": null,
    "date": "2024-03",
    "venue": "GTC 2024 keynote",
    "url": "https://developer.nvidia.com/project-gr00t",
    "summary": "NVIDIA's announcement of a general-purpose humanoid foundation model strategy. Initial release was a roadmap with Isaac Sim/Lab tooling, not weights or evaluations. Predominantly sim-centric synthesis pipeline (Cosmos + Newton + Isaac).",
    "candidate_bill": "Bill_5",
    "candidate_meta_cost": "M4",
    "verdict": "needs_gate",
    "confidence": 0.45,
    "watchlist_tier": "quarterly",
    "model_family": "GR00T",
    "rebuttal_papers": [],
    "notes": "Largely a platform/SDK announcement. Sim-only at this stage. Bill 5 / M4 violation candidate.",
    "embodiment": "humanoid (multi-vendor: Figure, 1X, Apptronik, Sanctuary, Agility, Boston Dynamics, Unitree, Fourier)",
    "real_world_eval": false,
    "n_demonstrations": null,
    "task_count_claimed": null,
    "claimed_capability": "Foundation model + synthetic data + Isaac platform for humanoid robotics",
    "_appeared_in_sweeps": [
      "sweep_801_vla_cards"
    ]
  },
  {
    "paper_id": "nvidia:groot-n1-2025",
    "title": "GR00T N1: An Open Foundation Model for Generalist Humanoid Robots",
    "authors": [
      "NVIDIA Research",
      "Jensen Huang",
      "et al."
    ],
    "affiliations": [
      "NVIDIA"
    ],
    "country_region": null,
    "date": "2025-03",
    "venue": "arXiv:2503.14734",
    "url": "https://arxiv.org/abs/2503.14734",
    "summary": "GR00T N1 is a dual-system humanoid VLA (System 1 fast policy + System 2 Eagle-2 VLM) trained on internet video, neural-trajectory synthetic data, and real teleop. Released as open weights for Fourier GR-1. Evaluates on a curated humanoid manipulation bench but limited cross-vendor humanoid testing.",
    "candidate_bill": "Bill_11",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.78,
    "watchlist_tier": "quarterly",
    "model_family": "GR00T",
    "rebuttal_papers": [],
    "notes": "Open weights help Bill 10. Bill 11 coverage limited to manipulation + table-task subset; loco/nav/HRI absent.",
    "embodiment": "humanoid (Fourier GR-1 primary; multi-vendor planned)",
    "real_world_eval": true,
    "n_demonstrations": null,
    "task_count_claimed": 24,
    "claimed_capability": "Open-weights generalist humanoid VLA with dual-system 1/2 architecture",
    "_appeared_in_sweeps": [
      "sweep_801_vla_cards",
      "sweep_802_sim_to_real"
    ]
  },
  {
    "paper_id": "nvidia:groot-n2-2025",
    "title": "GR00T N2: Bimanual & Whole-Body Humanoid Foundation Model",
    "authors": [
      "NVIDIA Research"
    ],
    "affiliations": [
      "NVIDIA"
    ],
    "country_region": null,
    "date": "2025-10",
    "venue": "NVIDIA GTC Fall 2025",
    "url": "https://developer.nvidia.com/blog/gr00t-n2",
    "summary": "GR00T N2 adds whole-body coordination and a sim-trained locomotion head; tested on multiple humanoid embodiments via Isaac Lab. Claims cross-embodiment policy reuse via morphology embeddings. Vendor-only evaluation; no public weights for N2 base policy at release.",
    "candidate_bill": "Bill_8",
    "candidate_meta_cost": "M5",
    "verdict": "needs_gate",
    "confidence": 0.55,
    "watchlist_tier": "monthly",
    "model_family": "GR00T",
    "rebuttal_papers": [],
    "notes": "Bill 8 challenge but largely sim-eval. M4/M5 concerns.",
    "embodiment": "humanoid (1X NEO, Fourier GR-1, Agility Digit, Unitree H1, Apptronik Apollo)",
    "real_world_eval": false,
    "n_demonstrations": null,
    "task_count_claimed": null,
    "claimed_capability": "Cross-vendor humanoid policy via morphology embedding",
    "_appeared_in_sweeps": [
      "sweep_801_vla_cards"
    ]
  },
  {
    "paper_id": "nvidia:isaac-lab-2024",
    "title": "Isaac Lab: A Modular and Unified Framework for Robot Learning",
    "authors": [
      "Mayank Mittal",
      "Calvin Yu",
      "Qinxi Yu",
      "Jingzhou Liu",
      "Nikita Rudin",
      "David Hoeller",
      "Jia Lin Yuan",
      "Pooria Poorsarvi-Tehrani",
      "Ritvik Singh",
      "Yunrong Guo",
      "Hammad Mazhar",
      "Ajay Mandlekar",
      "Buck Babich",
      "Gavriel State",
      "Marco Hutter",
      "Animesh Garg"
    ],
    "affiliations": [
      "NVIDIA",
      "ETH Z\u00fcrich"
    ],
    "country_region": null,
    "date": "2024-08",
    "venue": "arXiv / RA-L 2024",
    "url": "https://arxiv.org/abs/2301.04195",
    "summary": "Isaac Lab unifies Omniverse simulation, PhysX 5 contact, and a fleet of RL/IL frameworks. Includes a sim-to-real validation pack for Anymal, Spot, and Franka with documented Sim2Real gap of 12-18% on locomotion tasks (when applying actuator-net + RMA randomization). Manipulation gap is under-audited.",
    "candidate_bill": "Bill_5",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.84,
    "watchlist_tier": "quarterly",
    "model_family": "Isaac Lab",
    "rebuttal_papers": [
      "arxiv:2503.06682",
      "arxiv:2502.18234"
    ],
    "notes": "Locomotion claims are well-validated (ETH legged-locomotion lineage). Manipulation transfer is the open weakness.",
    "embodiment": "Anymal C/D, Spot, Unitree Go2, Franka",
    "real_world_eval": true,
    "claimed_capability": "Locomotion sim-to-real gap 12-18% under actuator-net + DR; manipulation gap not quantified",
    "sim_environment": "Isaac Sim / PhysX 5",
    "_appeared_in_sweeps": [
      "sweep_802_sim_to_real"
    ]
  },
  {
    "paper_id": "nvidia:newton-2025",
    "title": "NVIDIA Newton Physics Engine",
    "authors": [
      "NVIDIA",
      "Google DeepMind",
      "Disney"
    ],
    "affiliations": [
      "NVIDIA",
      "Google DeepMind",
      "Disney Imagineering"
    ],
    "country_region": null,
    "date": "2025-03",
    "venue": "GTC 2025 announcement",
    "url": "https://developer.nvidia.com/blog/announcing-newton-an-open-source-physics-engine",
    "summary": "GPU-accelerated, differentiable physics engine open-sourced under a joint NVIDIA/DeepMind/Disney effort. Targets the sim-to-real fidelity gap for humanoid contact and articulated body dynamics. Foundational infrastructure for Bill 5 attempts.",
    "candidate_bill": "Bill_2",
    "candidate_meta_cost": "M4",
    "verdict": "known_bill",
    "confidence": 0.65,
    "watchlist_tier": "quarterly",
    "model_family": "Newton",
    "rebuttal_papers": [],
    "notes": "Bill 2 lever. Independent perception-gap audits will become possible.",
    "embodiment": "simulator",
    "real_world_eval": false,
    "n_demonstrations": null,
    "task_count_claimed": null,
    "claimed_capability": "Open-source differentiable GPU physics for robotics",
    "_appeared_in_sweeps": [
      "sweep_801_vla_cards"
    ]
  },
  {
    "paper_id": "physical-intelligence:pi0.5-2025",
    "title": "\u03c00.5: a Vision-Language-Action Model with Open-World Generalization",
    "authors": [
      "Physical Intelligence team"
    ],
    "affiliations": [
      "Physical Intelligence (\u03c0)"
    ],
    "country_region": null,
    "date": "2025-04",
    "venue": "blog + technical report",
    "url": "https://www.physicalintelligence.company/blog/pi05",
    "summary": "\u03c00.5 extends \u03c00 with hierarchical inference (slow VLM planner + fast action expert) and adds open-world data via web pretraining. Headline demo: cleaning a never-before-seen home. Reports significantly improved scene generalization and zero-shot transfer onto third-party manipulators (Franka, UR5e) without per-customer fine-tuning. Claims meaningful step toward Bill 8.",
    "candidate_bill": "Bill_8",
    "candidate_meta_cost": "M5",
    "verdict": "rebuttal_paper",
    "confidence": 0.78,
    "watchlist_tier": "quarterly",
    "model_family": "\u03c00.5",
    "rebuttal_papers": [],
    "notes": "Demo-cherry-pick risk (M5) \u2014 public evaluation is curated video. No independent replication. Headline 'walk into a strange home' is a single-episode demo at the time of writing.",
    "real_world_eval": true,
    "n_demonstrations": null,
    "task_count_claimed": null,
    "claimed_capability": "Zero-shot 'walk into a strange house and clean it' on PI mobile bimanual stack; cross-customer transfer w/o fine-tuning",
    "source_embodiment": "ALOHA, mobile-ALOHA, ARX-5, Franka, UR5e (PI internal)",
    "target_embodiment": "novel homes, customer Franka/UR5e",
    "_appeared_in_sweeps": [
      "sweep_803_cross_embodiment"
    ]
  },
  {
    "paper_id": "pi:pi-0.5-2025",
    "title": "\u03c00.5: a Vision-Language-Action Model with Open-World Generalization",
    "authors": [
      "Physical Intelligence team"
    ],
    "affiliations": [
      "Physical Intelligence"
    ],
    "country_region": null,
    "date": "2025-04",
    "venue": "arXiv:2504.16054",
    "url": "https://arxiv.org/abs/2504.16054",
    "summary": "\u03c00.5 extends \u03c00 with hierarchical planning and a co-training recipe that includes web data, multi-robot data, and high-level subtask language. Deploys on Mobile ALOHA and Trossen platforms in entirely new homes (open-world). Demonstrates significant held-out scene generalization \u2014 most direct attack on Bill 9 to date.",
    "candidate_bill": "Bill_9",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.88,
    "watchlist_tier": "quarterly",
    "model_family": "pi0.5",
    "rebuttal_papers": [],
    "notes": "Strongest Bill 9 evidence among 2024-2025 VLAs. Independent reproduction limited; vendor blog claims back-deploy in real homes.",
    "embodiment": "Mobile ALOHA, bimanual Trossen, Franka",
    "real_world_eval": true,
    "n_demonstrations": null,
    "task_count_claimed": 100,
    "claimed_capability": "Open-world deployment in unseen homes for cleaning tasks (e.g. unfamiliar kitchens)",
    "_appeared_in_sweeps": [
      "sweep_801_vla_cards"
    ]
  },
  {
    "paper_id": "plus-inceptio:2024-trucking",
    "title": "Plus + Inceptio Driverless Trucking China + US 2024",
    "authors": [
      "Plus.ai",
      "Inceptio Technology"
    ],
    "affiliations": [
      "Plus / Inceptio"
    ],
    "country_region": null,
    "date": "2024",
    "venue": "Plus / Inceptio press; Chinese MIIT trucking permits",
    "url": "https://plus.ai/",
    "summary": "Plus.ai (US-China dual) and Inceptio (China) operate driver-supervised L3 trucking on Chinese expressways. Inceptio claims 100M+ km on its Xuanyuan stack across 700+ trucks by mid-2024, with fuel-economy + safety claims vs human drivers. Plus runs US pilot programs. Bill 12 candidate but supervised (driver-in-cab) L3, not L4.",
    "candidate_bill": "Bill_5",
    "candidate_meta_cost": "M2",
    "verdict": "needs_gate",
    "confidence": 0.66,
    "watchlist_tier": "annual",
    "rebuttal_papers": [],
    "notes": "China robotruck reference point. Bill 13 (tele-op + driver-in-cab) decomposes the safety claim \u2014 not full Bill 12 closure.",
    "claimed_capability": "Driver-supervised L3 trucking with 100M+ km fleet experience",
    "_appeared_in_sweeps": [
      "sweep_806_autonomous_driving"
    ]
  },
  {
    "paper_id": "pony-ai:2024-permit",
    "title": "Pony.ai Driverless Robotaxi Permits + IPO 2024",
    "authors": [
      "Pony.ai"
    ],
    "affiliations": [
      "Pony.ai"
    ],
    "country_region": null,
    "date": "2024-11 (NASDAQ IPO); 2024-Q3 driverless permits Beijing/Shenzhen/Guangzhou",
    "venue": "F-1 filing / Pony.ai blog / China MIIT disclosures",
    "url": "https://pony.ai",
    "summary": "Pony.ai obtained driverless robotaxi permits in 4 Tier-1 Chinese cities + Fremont/Sunnyvale CA; IPO November 2024 disclosed ~36M total autonomous mi, ~3M driverless. Bill 9 partial (4 Chinese cities). NHTSA-equivalent oversight via Chinese MIIT plus CA DMV reporting.",
    "candidate_bill": "Bill_9",
    "candidate_meta_cost": "M2",
    "verdict": "known_bill",
    "confidence": 0.72,
    "watchlist_tier": "annual",
    "rebuttal_papers": [],
    "notes": "Cross-country Bill 8 candidate (US + China). Reporting opacity in China limits Bill 10 closure.",
    "claimed_capability": "Driverless robotaxi in 4 Chinese Tier-1 cities + 2 CA cities; commercial revenue",
    "_appeared_in_sweeps": [
      "sweep_806_autonomous_driving"
    ]
  },
  {
    "paper_id": "skild:foundation-2024",
    "title": "Skild Brain: General-Purpose Robot Intelligence",
    "authors": [
      "Deepak Pathak",
      "Abhinav Gupta",
      "Skild AI team"
    ],
    "affiliations": [
      "Skild AI",
      "CMU"
    ],
    "country_region": null,
    "date": "2024-07",
    "venue": "Company website + press",
    "url": "https://www.skild.ai",
    "summary": "Skild AI announced 'a general-purpose brain for any robot' with $300M funding and CMU pedigree. No model card, no published evaluation, partner-locked deployments. Claimed to span 7 embodiments.",
    "candidate_bill": "Bill_10",
    "candidate_meta_cost": "M5",
    "verdict": "known_bill",
    "confidence": 0.4,
    "watchlist_tier": "monthly",
    "model_family": "Skild Brain",
    "rebuttal_papers": [],
    "notes": "Bill 10 / M5 candidate. Watch for technical disclosure.",
    "embodiment": "7 platforms (unspecified)",
    "real_world_eval": true,
    "n_demonstrations": null,
    "task_count_claimed": null,
    "claimed_capability": "Generalist robot brain across morphologies",
    "_appeared_in_sweeps": [
      "sweep_801_vla_cards"
    ]
  },
  {
    "paper_id": "swissre:2024-waymo-collision",
    "title": "Comparative Safety Performance of Autonomous- and Human-Driven Vehicles (Swiss Re / Waymo)",
    "authors": [
      "Luigi Di Lillo",
      "Tilia Gode",
      "Xilin Zhou",
      "Margherita Atzei",
      "Ruoshu Chen",
      "Trent Victor"
    ],
    "affiliations": [
      "Swiss Re",
      "Waymo"
    ],
    "country_region": null,
    "date": "2024-09",
    "venue": "Traffic Injury Prevention journal (Sept 2024)",
    "url": "https://www.tandfonline.com/doi/full/10.1080/15389588.2024.2380786",
    "summary": "Joint Swiss Re + Waymo analysis: 25.3M rider-only miles vs Swiss Re's actuarial human-driver database (~600,000 claims, 125B mi). Reports 88% reduction in property-damage claims, 92% reduction in bodily-injury claims. Third-party insurance underwriter co-authorship makes this the strongest Bill 10 closure in AV space.",
    "candidate_bill": "Bill_10",
    "candidate_meta_cost": "M2",
    "verdict": "known_bill",
    "confidence": 0.95,
    "watchlist_tier": "quarterly",
    "rebuttal_papers": [
      "arxiv:2410.04898"
    ],
    "notes": "The cleanest Bill 10/Bill 12 closure in AV space. Caveat: Swiss Re's human baseline is national/actuarial, not city-matched. Goodall (2024) follow-on disputes baseline comparability.",
    "claimed_capability": "Lower-than-human property-damage and bodily-injury claim rates over 25M robotaxi mi",
    "_appeared_in_sweeps": [
      "sweep_806_autonomous_driving"
    ]
  },
  {
    "paper_id": "tesla:fsd-v12-end-to-end",
    "title": "Tesla FSD v12: End-to-End Neural Networks Replace 300k Lines of C++ Code",
    "authors": [
      "Elon Musk",
      "Ashok Elluswamy",
      "Tesla AI"
    ],
    "affiliations": [
      "Tesla"
    ],
    "country_region": null,
    "date": "2024-04",
    "venue": "Tesla AI Day 2024 / blog / investor calls Q1-Q3 2024",
    "url": "https://www.tesla.com/AI",
    "summary": "Tesla FSD v12 replaces hand-written planner with end-to-end neural network ('photons to controls'). Claimed fleet data: ~1B FSD miles by mid-2024, growing ~3M mi/day. v12.5 \u2192 v13 rollout late 2024 with claimed 5-6\u00d7 improvement in miles-per-intervention. No third-party validation; claims come from vendor telemetry only.",
    "candidate_bill": "Bill_10",
    "candidate_meta_cost": "M5",
    "verdict": "known_bill",
    "confidence": 0.45,
    "watchlist_tier": "quarterly",
    "rebuttal_papers": [
      "nhtsa:2024-fsd-recall",
      "fsd-tracker:2024-community"
    ],
    "notes": "Bill 10 (vendor-independence) is the key gap. Community AI Drivr / FSD Community Tracker shows 1 disengagement / 13-25 mi, contradicting Tesla's marketing numbers by ~5-10\u00d7. End-to-end vs modular Bill 7 debate's central artifact.",
    "claimed_capability": "Unbounded ODD (anywhere a human can drive) via end-to-end neural net",
    "_appeared_in_sweeps": [
      "sweep_806_autonomous_driving"
    ]
  },
  {
    "paper_id": "tesla:fsd-v13-2024",
    "title": "Tesla FSD V13 (Full Self-Driving v13)",
    "authors": [
      "Tesla AI"
    ],
    "affiliations": [
      "Tesla"
    ],
    "country_region": null,
    "date": "2024-12",
    "venue": "Software release notes",
    "url": "https://www.tesla.com/AI",
    "summary": "FSD V13 introduces end-to-end neural net for highway and city driving. No published model card. Vendor-disclosed miles-per-disengagement numbers, no independent third-party rate. Significant Bill 10 violation.",
    "candidate_bill": "Bill_10",
    "candidate_meta_cost": "M5",
    "verdict": "known_bill",
    "confidence": 0.5,
    "watchlist_tier": "monthly",
    "model_family": "FSD",
    "rebuttal_papers": [],
    "notes": "Fleet-scale data but no neutral disengagement audit. Bill 10 issue.",
    "embodiment": "driving (Tesla fleet)",
    "real_world_eval": true,
    "n_demonstrations": null,
    "task_count_claimed": null,
    "claimed_capability": "End-to-end autonomous driving for highway and city",
    "_appeared_in_sweeps": [
      "sweep_801_vla_cards"
    ]
  },
  {
    "paper_id": "tesla:fsd-v14-2025",
    "title": "Tesla FSD V14",
    "authors": [
      "Tesla AI"
    ],
    "affiliations": [
      "Tesla"
    ],
    "country_region": null,
    "date": "2025-10",
    "venue": "Software release notes",
    "url": "https://www.tesla.com/AI",
    "summary": "FSD V14 cited as 5\u00d7 more parameters than V13. Adds reasoning-style speculative decoding. No published evaluation. Same vendor-only audit problem.",
    "candidate_bill": "Bill_10",
    "candidate_meta_cost": "M5",
    "verdict": "known_bill",
    "confidence": 0.45,
    "watchlist_tier": "monthly",
    "model_family": "FSD",
    "rebuttal_papers": [],
    "notes": "Bill 7 candidate \u2014 no comparison to MPC + sensor fusion stack.",
    "embodiment": "driving (Tesla fleet)",
    "real_world_eval": true,
    "n_demonstrations": null,
    "task_count_claimed": null,
    "claimed_capability": "Larger autoregressive driving model with explicit reasoning module",
    "_appeared_in_sweeps": [
      "sweep_801_vla_cards"
    ]
  },
  {
    "paper_id": "tesla:hw4-vs-hw5-2025",
    "title": "Tesla HW4 vs HW5 (AI5/AI6) Compute Transition \u2014 Late 2024 / 2025",
    "authors": [
      "Tesla AI"
    ],
    "affiliations": [
      "Tesla"
    ],
    "country_region": null,
    "date": "2025-Q2 (announced)",
    "venue": "Tesla investor calls Q3-Q4 2024, AI Day 2025",
    "url": "https://www.tesla.com/AI",
    "summary": "Tesla's transition from HW4 (Dojo D1-based) to HW5 / AI5-AI6 next-generation compute. Claimed 5-10\u00d7 FLOPs over HW3 for Cybercab + revised FSD timeline. Bill 3 (hardware-cost transparency) candidate \u2014 but Bill 9/12 still unverified. Cybercab launch Oct 2024 demo widely criticized as staged (M5 cherry-pick).",
    "candidate_bill": "Bill_3",
    "candidate_meta_cost": "M5",
    "verdict": "known_bill",
    "confidence": 0.51,
    "watchlist_tier": "quarterly",
    "rebuttal_papers": [],
    "notes": "Cybercab demo October 2024 had reported tele-op support (Bill 13 hidden cost). Bill 3 partial \u2014 compute cost disclosed only via investor decks.",
    "claimed_capability": "Next-gen compute enables Cybercab L4 robotaxi",
    "_appeared_in_sweeps": [
      "sweep_806_autonomous_driving"
    ]
  },
  {
    "paper_id": "tesla:optimus-2024-2025",
    "title": "Tesla Optimus Gen 2 / Gen 3 Demonstrations",
    "authors": [
      "Tesla"
    ],
    "affiliations": [
      "Tesla"
    ],
    "country_region": null,
    "date": "2024-12 \u2192 2025-10",
    "venue": "Tesla AI Day + demo livestreams",
    "url": "https://www.tesla.com/ai",
    "summary": "Tesla releases iterative Optimus demos (folding shirts, walking, dance routines). Claims neural-network policies trained on teleoperated demos. No paper, no benchmark. 'Cross-embodiment' is implicit (sim-to-real for Optimus only). Significant teleop hand-off in publicly-shown demos (Cybertaxi event).",
    "candidate_bill": "Bill_8",
    "candidate_meta_cost": "M5",
    "verdict": "rebuttal_paper",
    "confidence": 0.35,
    "watchlist_tier": "deprioritize",
    "model_family": "Optimus policy (unnamed)",
    "rebuttal_papers": [],
    "notes": "Heavy demo-cherry-pick (M5). Bill 10 concern (vendor-self-eval). Cybertaxi event 2024-10 revealed extensive teleop. Not a real cross-embodiment story.",
    "real_world_eval": true,
    "n_demonstrations": null,
    "task_count_claimed": null,
    "claimed_capability": "Walking, manipulation, bimanual folding \u2014 extent of autonomy disputed",
    "source_embodiment": "Tesla Optimus (sim + real)",
    "target_embodiment": "Tesla Optimus (real)",
    "_appeared_in_sweeps": [
      "sweep_803_cross_embodiment"
    ]
  },
  {
    "paper_id": "tesla:optimus-gen2-2023",
    "title": "Tesla Optimus Gen 2 Capability Demonstrations",
    "authors": [
      "Tesla"
    ],
    "affiliations": [
      "Tesla"
    ],
    "country_region": null,
    "date": "2023-12",
    "venue": "Company demos / X posts",
    "url": "https://www.tesla.com/AI",
    "summary": "Tesla Optimus Gen 2 announced with improved hands and walking. Demos include egg manipulation and dance. No paper, no model card, frequent allegations of teleoperation. Major Bill 13 + M5 candidate.",
    "candidate_bill": "Bill_13",
    "candidate_meta_cost": "M5",
    "verdict": "known_bill",
    "confidence": 0.4,
    "watchlist_tier": "monthly",
    "model_family": "Optimus",
    "rebuttal_papers": [],
    "notes": "Cybercab event (Oct 2024) revealed Optimus units were tele-operated. Major Bill 13 violation.",
    "embodiment": "humanoid",
    "real_world_eval": true,
    "n_demonstrations": null,
    "task_count_claimed": null,
    "claimed_capability": "Generalist humanoid for factory work and home tasks",
    "_appeared_in_sweeps": [
      "sweep_801_vla_cards"
    ]
  },
  {
    "paper_id": "tesla:optimus-gen3-2025",
    "title": "Tesla Optimus Gen 3 (Cybercab event + 2025 demos)",
    "authors": [
      "Tesla"
    ],
    "affiliations": [
      "Tesla"
    ],
    "country_region": null,
    "date": "2025-10",
    "venue": "Company event",
    "url": "https://www.tesla.com",
    "summary": "Gen 3 Optimus revealed with hand redesign; pouring drinks and conversational demos at Cybercab. Subsequent admission that humans drove much of demonstrated capability. No autonomous policy paper.",
    "candidate_bill": "Bill_13",
    "candidate_meta_cost": "M5",
    "verdict": "known_bill",
    "confidence": 0.35,
    "watchlist_tier": "monthly",
    "model_family": "Optimus",
    "rebuttal_papers": [],
    "notes": "Strongest contemporary Bill 13 + M5 case among major vendors.",
    "embodiment": "humanoid",
    "real_world_eval": true,
    "n_demonstrations": null,
    "task_count_claimed": null,
    "claimed_capability": "Dexterous social humanoid (poured drinks, conversation)",
    "_appeared_in_sweeps": [
      "sweep_801_vla_cards"
    ]
  },
  {
    "paper_id": "tri-lbm:lbm-2024",
    "title": "Toyota Research Institute Large Behavior Models",
    "authors": [
      "Toyota Research Institute"
    ],
    "affiliations": [
      "TRI"
    ],
    "country_region": null,
    "date": "2024-09",
    "venue": "TRI blog + workshop paper",
    "url": "https://www.tri.global/news/toyota-research-institute-unveils-breakthrough-teaching-robots-new-behaviors",
    "summary": "TRI Large Behavior Model: diffusion-policy + transformer trained on TRI fleet of bimanual lab robots. Reports 'teaching a new behavior in an afternoon' (60 demos). Cross-embodiment angle: same LBM also fine-tuned onto BD Atlas (Bill 8 partner demo). No paper.",
    "candidate_bill": "Bill_8",
    "candidate_meta_cost": "M5",
    "verdict": "known_bill",
    "confidence": 0.5,
    "watchlist_tier": "quarterly",
    "model_family": "TRI LBM",
    "rebuttal_papers": [],
    "notes": "Vendor announcement-style release. No quantitative benchmark public for Atlas transfer.",
    "real_world_eval": true,
    "n_demonstrations": null,
    "task_count_claimed": null,
    "claimed_capability": "Diffusion-policy fast adaptation (afternoon, 60 demos); also ports to Atlas",
    "source_embodiment": "TRI bimanual fixed-base lab",
    "target_embodiment": "TRI fleet (held-in); Atlas (partner)",
    "_appeared_in_sweeps": [
      "sweep_803_cross_embodiment"
    ]
  },
  {
    "paper_id": "tri:lbm-2024",
    "title": "Toyota Research Institute Large Behavior Models (LBM)",
    "authors": [
      "Toyota Research Institute"
    ],
    "affiliations": [
      "Toyota Research Institute"
    ],
    "country_region": null,
    "date": "2024-09",
    "venue": "TRI blog / arXiv:2407.10973 (Diffusion Policy line)",
    "url": "https://www.tri.global/news/toyota-research-institute-unveils-breakthrough-teaching-robots-new-behaviors",
    "summary": "TRI's LBM line builds on Diffusion Policy (Chi et al. 2023) and scales to whole-body bimanual manipulation. Trained on TRI-collected teleop. Partial open release of recipes (Robomimic / DiffPolicy) but base LBM weights not public.",
    "candidate_bill": "Bill_6",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.78,
    "watchlist_tier": "quarterly",
    "model_family": "LBM / Diffusion Policy",
    "rebuttal_papers": [],
    "notes": "Strong Bill 6 demonstration; vendor weights closed (Bill 10).",
    "embodiment": "bimanual TRI platform",
    "real_world_eval": true,
    "n_demonstrations": null,
    "task_count_claimed": 1000,
    "claimed_capability": "Hundreds of household dexterous behaviors from teleop demos",
    "_appeared_in_sweeps": [
      "sweep_801_vla_cards"
    ]
  },
  {
    "paper_id": "waymo:2024-gen6-driver",
    "title": "Introducing the 6th-Generation Waymo Driver",
    "authors": [
      "Waymo Engineering"
    ],
    "affiliations": [
      "Waymo LLC"
    ],
    "country_region": null,
    "date": "2024-08",
    "venue": "Waymo blog / press / SAE WCX 2025 talks",
    "url": "https://waymo.com/blog/2024/08/meet-the-6th-generation-waymo-driver/",
    "summary": "6th-gen Driver on Zeekr (Geely) platform. Sensor count reduced ~50% (13 cameras, 4 LiDAR, 6 radar) but claimed superhuman perception via foundation-model-style training. Compute halved, cost claimed ~5\u00d7 lower than 5th-gen Jaguar I-PACE. First Waymo platform designed from scratch as electric / purpose-built robotaxi.",
    "candidate_bill": "Bill_3",
    "candidate_meta_cost": "M3",
    "verdict": "known_bill",
    "confidence": 0.78,
    "watchlist_tier": "quarterly",
    "rebuttal_papers": [],
    "notes": "Hardware-cost Bill 3 attempt. Cousin to 'foundation models reduce sensor requirements' Wayve/Tesla narrative. Real Bill 12 claims pending fleet deployment 2025-2026.",
    "claimed_capability": "Halved sensor + compute cost while maintaining or improving L4 capability via end-to-end foundation-model training",
    "_appeared_in_sweeps": [
      "sweep_806_autonomous_driving"
    ]
  },
  {
    "paper_id": "waymo:2024-scaling-laws",
    "title": "Scaling Laws for Autonomy: Predictable Progress in Driver Performance with Compute",
    "authors": [
      "Waymo Research"
    ],
    "affiliations": [
      "Waymo LLC"
    ],
    "country_region": null,
    "date": "2024-08",
    "venue": "Waymo Research blog / SAE WCX 2025",
    "url": "https://waymo.com/blog/2024/12/scaling-laws-of-motion-forecasting-and-planning/",
    "summary": "Waymo's analogue of LLM scaling laws applied to driving policy + motion forecasting. Trains policies at 8 model scales \u00d7 4 data scales; reports power-law improvement on perception, prediction, planning. Frames neural driving as a 'predictable compute-scaling problem' \u2014 Tesla, Wayve, NVIDIA all cite this as the field's foundational scaling-law paper.",
    "candidate_bill": "Bill_4",
    "candidate_meta_cost": "M2",
    "verdict": "needs_gate",
    "confidence": 0.83,
    "watchlist_tier": "quarterly",
    "rebuttal_papers": [],
    "notes": "Anchor for AV scaling-law debate. Wayve GAIA-1 paper and Tesla 'photon-to-controls' v12 narrative both ride on this. Bill 5 (real-road transfer) is the live question.",
    "claimed_capability": "Power-law improvement in motion-forecasting and planning quality with model+data scale",
    "_appeared_in_sweeps": [
      "sweep_806_autonomous_driving"
    ]
  },
  {
    "paper_id": "waymo:6th-gen-2024",
    "title": "Waymo 6th-Generation Driver",
    "authors": [
      "Waymo"
    ],
    "affiliations": [
      "Waymo (Alphabet)"
    ],
    "country_region": null,
    "date": "2024-08",
    "venue": "Company blog",
    "url": "https://waymo.com/blog/2024/08/meet-the-6th-generation-waymo-driver",
    "summary": "6th-gen Waymo Driver runs on Geely Zeekr platform, claims 30% sensor reduction and improved ML-based perception/planning stack. Limited public technical disclosure; safety report cadence maintained.",
    "candidate_bill": "Bill_12",
    "candidate_meta_cost": "M5",
    "verdict": "known_bill",
    "confidence": 0.65,
    "watchlist_tier": "quarterly",
    "model_family": "Waymo Driver",
    "rebuttal_papers": [],
    "notes": "Bill 12 \u2014 Waymo Safety Report is partly independent (NHTSA filings); Bill 10 partial.",
    "embodiment": "driving",
    "real_world_eval": true,
    "n_demonstrations": null,
    "task_count_claimed": null,
    "claimed_capability": "Commercial robotaxi (operations in Phoenix, SF, LA, Austin)",
    "_appeared_in_sweeps": [
      "sweep_801_vla_cards"
    ]
  },
  {
    "paper_id": "wayve:lingo-1-2023",
    "title": "LINGO-1: Open-Loop Driving Commentator",
    "authors": [
      "Wayve"
    ],
    "affiliations": [
      "Wayve"
    ],
    "country_region": null,
    "date": "2023-09",
    "venue": "Wayve blog",
    "url": "https://wayve.ai/thinking/lingo-natural-language-autonomous-driving",
    "summary": "Predecessor to LINGO-2. Open-loop driving commentator that explains decisions but does not act. Pre-2024, primarily descriptive.",
    "candidate_bill": null,
    "candidate_meta_cost": "M1",
    "verdict": "out_of_scope",
    "confidence": 0.85,
    "watchlist_tier": "annual",
    "model_family": "LINGO-1",
    "rebuttal_papers": [],
    "notes": "Pre-2024 (M1) \u2014 provided here for lineage.",
    "embodiment": "driving",
    "real_world_eval": false,
    "n_demonstrations": null,
    "task_count_claimed": null,
    "claimed_capability": "Driving rationale generation",
    "_appeared_in_sweeps": [
      "sweep_801_vla_cards"
    ]
  },
  {
    "paper_id": "wayve:lingo-2-2024",
    "title": "LINGO-2: Vision-Language-Action Model for Driving",
    "authors": [
      "Wayve"
    ],
    "affiliations": [
      "Wayve"
    ],
    "country_region": null,
    "date": "2024-04",
    "venue": "Wayve blog",
    "url": "https://wayve.ai/thinking/lingo-2-driving-with-language",
    "summary": "LINGO-2 is the first deployed driving VLA, combining vision + language explanations + driving actions. Demonstrates language-conditioned lane changes and explanation generation. Only vendor evaluation, no peer review.",
    "candidate_bill": "Bill_4",
    "candidate_meta_cost": "M5",
    "verdict": "needs_gate",
    "confidence": 0.62,
    "watchlist_tier": "quarterly",
    "model_family": "LINGO-2",
    "rebuttal_papers": [],
    "notes": "Bridge-test candidate: explanation\u2192action coupling claimed but causal faithfulness untested. Direct Bill 4 candidate.",
    "embodiment": "driving",
    "real_world_eval": true,
    "n_demonstrations": null,
    "task_count_claimed": null,
    "claimed_capability": "Language-conditioned driving with rationale generation",
    "_appeared_in_sweeps": [
      "sweep_801_vla_cards",
      "sweep_806_autonomous_driving"
    ]
  },
  {
    "paper_id": "weride:2024-nasdaq",
    "title": "WeRide IPO + Driverless Permits 2024",
    "authors": [
      "WeRide Inc"
    ],
    "affiliations": [
      "WeRide"
    ],
    "country_region": null,
    "date": "2024-10 (NASDAQ IPO)",
    "venue": "WeRide F-1 / 2024 prospectus / Chinese MIIT permits",
    "url": "https://www.weride.ai/",
    "summary": "WeRide IPO October 2024. Discloses ~18M autonomous test mi, robotaxi + robobus + robosweeper + robovan product lines, operations in 30 cities across 7 countries. Bill 9 partial (multi-country). Driverless commercial robotaxi in Guangzhou + Beijing.",
    "candidate_bill": "Bill_9",
    "candidate_meta_cost": "M2",
    "verdict": "known_bill",
    "confidence": 0.7,
    "watchlist_tier": "annual",
    "rebuttal_papers": [],
    "notes": "Strongest Bill 8 closure attempt (cross-platform + cross-country). Bill 12 unverified (no published per-mile crash rate).",
    "claimed_capability": "Multi-platform AV (taxi, bus, sweeper, van) across 7 countries",
    "_appeared_in_sweeps": [
      "sweep_806_autonomous_driving"
    ]
  },
  {
    "paper_id": "xpeng-li-nio:2024-china-adas",
    "title": "China L2+ ADAS Saturation: XPeng XNGP, Li Auto AD Max, NIO NAD, Huawei ADS 3.0",
    "authors": [
      "XPeng",
      "Li Auto",
      "NIO",
      "Huawei"
    ],
    "affiliations": [
      "XPeng",
      "Li Auto",
      "NIO",
      "Huawei"
    ],
    "country_region": null,
    "date": "2024-2025",
    "venue": "OEM Q1-Q4 2024 disclosures, China Auto Industry Association reports",
    "url": "https://www.huawei.com/en/news/2024",
    "summary": "Four Chinese OEM L2+ stacks: XPeng XNGP (no-HD-map nationwide), Li Auto AD Max, NIO NAD/Banyan, Huawei ADS 3.0 (powering AITO, Avatr, Stelato, Luxeed). Combined fleet ~2M vehicles by end-2024. Claimed urban autonomy in 200+ Chinese cities. Bill 9 cross-city closure attempt. Bill 12 weak \u2014 limited per-mile incident disclosure.",
    "candidate_bill": "Bill_9",
    "candidate_meta_cost": "M2",
    "verdict": "known_bill",
    "confidence": 0.72,
    "watchlist_tier": "quarterly",
    "rebuttal_papers": [],
    "notes": "China side of the Tesla-vs-rest comparison. Bill 9 candidate \u2014 broader city coverage than Tesla FSD US footprint. Bill 10 weak (no NHTSA/CA DMV equivalent of full per-mile disclosure).",
    "claimed_capability": "L2+ urban autonomy in 200+ cities without HD maps",
    "_appeared_in_sweeps": [
      "sweep_806_autonomous_driving"
    ]
  },
  {
    "paper_id": "zoox:2024-toxicology",
    "title": "Zoox Custom-Build Robotaxi Public Deployment + 2024 Recalls",
    "authors": [
      "Zoox Inc (Amazon)"
    ],
    "affiliations": [
      "Zoox",
      "NHTSA"
    ],
    "country_region": null,
    "date": "2024-06 (Las Vegas public ride launch); 2024-05 (firmware recall); 2025-04 (second firmware recall)",
    "venue": "Zoox blog / NHTSA recall NHTSA-24V-385",
    "url": "https://zoox.com/journal/",
    "summary": "Zoox custom-built (no steering wheel) robotaxi began employee public rides in Las Vegas June 2024, then SF. Two firmware recalls in 2024-2025 (sudden braking; phantom braking with following-car risk). Limited disclosed mileage. Bill 12 candidate but with multiple recall events.",
    "candidate_bill": "Bill_12",
    "candidate_meta_cost": "M2",
    "verdict": "rebuttal_paper",
    "confidence": 0.78,
    "watchlist_tier": "quarterly",
    "rebuttal_papers": [],
    "notes": "Bill 12 still contested. Purpose-built design forecloses safety-driver fallback \u2014 every disengagement is remote-only Bill 13 dependent.",
    "claimed_capability": "Purpose-built bidirectional robotaxi, no human controls",
    "_appeared_in_sweeps": [
      "sweep_806_autonomous_driving"
    ]
  }
]