[
  {
    "paper_id": "a-lab-berkeley-2023",
    "title": "An autonomous laboratory for the accelerated synthesis of novel materials",
    "authors": [
      "Szymanski N",
      "Rendy B",
      "Fei Y",
      "et al"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2023-11-29",
    "venue": "Nature",
    "url": "https://doi.org/10.1038/s41586-023-06734-w",
    "summary": "Berkeley A-Lab claims 41/58 autonomous syntheses; later challenged",
    "candidate_bill": "Bill_4",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.65,
    "watchlist_tier": null,
    "notes": "Original wet-lab claim - later rebutted",
    "_appeared_in_sweeps": [
      "sweep_1203"
    ]
  },
  {
    "paper_id": "a-lab-reanalysis-2024",
    "title": "Berkeley A-Lab synthesized phases reanalysis",
    "authors": [
      "Leeman / Persson / Ceder"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-05-01",
    "venue": "Chem Mater commentary",
    "url": "https://doi.org/10.1021/acs.chemmater.4c01345",
    "summary": "Reanalysis of A-Lab synthesized phases shows known compounds",
    "candidate_bill": "Bill_3",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.88,
    "watchlist_tier": null,
    "notes": "Novel-target audit",
    "_appeared_in_sweeps": [
      "sweep_1208"
    ]
  },
  {
    "paper_id": "ablation-flowmol-2025",
    "title": "FlowMol2: Faster Flow Models for Molecule Generation",
    "authors": [
      "Dunn I",
      "Koes D"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-02-01",
    "venue": "arXiv",
    "url": "https://arxiv.org/abs/2502.00103",
    "summary": "Flow-matching alternative to diffusion for molecule generation",
    "candidate_bill": "Bill_5",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.65,
    "watchlist_tier": null,
    "notes": "Method-consensus piece",
    "_appeared_in_sweeps": [
      "sweep_1201"
    ]
  },
  {
    "paper_id": "absci-clin-2025",
    "title": "AbSci ABS-101 Phase 1 Initiation",
    "authors": [
      "AbSci"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-03-01",
    "venue": "AbSci press",
    "url": "https://investors.absci.com/abs-101-p1",
    "summary": "First AbSci AI-designed antibody enters Phase 1",
    "candidate_bill": "Bill_4",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.65,
    "watchlist_tier": null,
    "notes": "Clinical wet-lab in early days",
    "_appeared_in_sweeps": [
      "sweep_1204"
    ]
  },
  {
    "paper_id": "absci-de-novo-2024",
    "title": "AbSci De Novo Antibody Design",
    "authors": [
      "AbSci"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-07-01",
    "venue": "Nature Communications",
    "url": "https://doi.org/10.1038/s41467-024-49678-2",
    "summary": "Zero-shot antibody design with experimental validation",
    "candidate_bill": "Bill_4",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.78,
    "watchlist_tier": null,
    "notes": "Has wet-lab in-house",
    "_appeared_in_sweeps": [
      "sweep_1204"
    ]
  },
  {
    "paper_id": "afno-2024",
    "title": "Adaptive Fourier Neural Operators v2",
    "authors": [
      "NVIDIA / academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-03-01",
    "venue": "arXiv",
    "url": "https://arxiv.org/abs/2403.04267",
    "summary": "Updated AFNO with improved long-range",
    "candidate_bill": "Bill_6",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.6,
    "watchlist_tier": null,
    "notes": "Method update",
    "_appeared_in_sweeps": [
      "sweep_1205"
    ]
  },
  {
    "paper_id": "agent-orchestration-2024",
    "title": "Agent Orchestration for Multi-Step Scientific Tasks",
    "authors": [
      "academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-10-01",
    "venue": "NeurIPS 2024",
    "url": "https://arxiv.org/abs/2410.07543",
    "summary": "Agent orchestration framework for science",
    "candidate_bill": "Bill_11",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.62,
    "watchlist_tier": null,
    "notes": "Infrastructure",
    "_appeared_in_sweeps": [
      "sweep_1207"
    ]
  },
  {
    "paper_id": "agentbench-2024",
    "title": "AgentBench: Evaluating LLMs as Agents",
    "authors": [
      "Liu X",
      "et al"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2023-08-01",
    "venue": "ICLR 2024",
    "url": "https://arxiv.org/abs/2308.03688",
    "summary": "Multi-domain agent benchmark",
    "candidate_bill": "Bill_9",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.65,
    "watchlist_tier": null,
    "notes": "General agent benchmark",
    "_appeared_in_sweeps": [
      "sweep_1206"
    ]
  },
  {
    "paper_id": "agente-2025-scieval",
    "title": "AgentE: Multi-Agent Scientific Reasoning Evaluation",
    "authors": [
      "Alibaba / academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-09-01",
    "venue": "arXiv",
    "url": "https://arxiv.org/abs/2509.08891",
    "summary": "Cross-domain agent benchmarks for math, physics, chemistry",
    "candidate_bill": "Bill_8",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.65,
    "watchlist_tier": null,
    "notes": "Cross-discipline gen audit - mostly chained domain agents",
    "_appeared_in_sweeps": [
      "sweep_1202"
    ]
  },
  {
    "paper_id": "agentlab-bench-2025",
    "title": "AgentLab: Benchmark for Scientific Discovery Agents",
    "authors": [
      "academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-07-01",
    "venue": "arXiv",
    "url": "https://arxiv.org/abs/2507.04891",
    "summary": "Comprehensive benchmark for scientific agent capabilities",
    "candidate_bill": "Bill_9",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.72,
    "watchlist_tier": null,
    "notes": "Held-out post-cutoff agent benchmark",
    "_appeared_in_sweeps": [
      "sweep_1206"
    ]
  },
  {
    "paper_id": "agents-eval-2025",
    "title": "Comprehensive Evaluation of Scientific Agents",
    "authors": [
      "academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-05-01",
    "venue": "arXiv",
    "url": "https://arxiv.org/abs/2505.02143",
    "summary": "Comprehensive scientific agent evaluation",
    "candidate_bill": "Bill_8",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.72,
    "watchlist_tier": null,
    "notes": "Cross-discipline evaluation",
    "_appeared_in_sweeps": [
      "sweep_1207"
    ]
  },
  {
    "paper_id": "agentscope-2024",
    "title": "AgentScope: Multi-Agent Framework for Scientific Workflows",
    "authors": [
      "Alibaba / academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-07-01",
    "venue": "arXiv",
    "url": "https://arxiv.org/abs/2407.04567",
    "summary": "Multi-agent infrastructure for science tasks",
    "candidate_bill": "Bill_11",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.62,
    "watchlist_tier": null,
    "notes": "Infrastructure not science",
    "_appeared_in_sweeps": [
      "sweep_1207"
    ]
  },
  {
    "paper_id": "ai-clinical-trials-2024",
    "title": "AI in Clinical Trial Design and Patient Stratification",
    "authors": [
      "academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-11-01",
    "venue": "Nature Medicine",
    "url": "https://doi.org/10.1038/s41591-024-03328-5",
    "summary": "Survey of AI in clinical trial design",
    "candidate_bill": "Bill_4",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.65,
    "watchlist_tier": null,
    "notes": "Clinical AI broader than drug discovery",
    "_appeared_in_sweeps": [
      "sweep_1204"
    ]
  },
  {
    "paper_id": "ai-coscientist-2025",
    "title": "AI Co-Scientist for Biomedical Research",
    "authors": [
      "DeepMind / Stanford"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-02-19",
    "venue": "arXiv",
    "url": "https://arxiv.org/abs/2502.18864",
    "summary": "Multi-agent co-scientist with biomedical focus",
    "candidate_bill": "Bill_11",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.78,
    "watchlist_tier": null,
    "notes": "Universal scientist claim biomedical-only",
    "_appeared_in_sweeps": [
      "sweep_1207"
    ]
  },
  {
    "paper_id": "ai-coscientist-deepmind-2025",
    "title": "Towards an AI co-scientist",
    "authors": [
      "Gottweis J",
      "Weng W",
      "et al"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-02-19",
    "venue": "arXiv",
    "url": "https://arxiv.org/abs/2502.18864",
    "summary": "DeepMind multi-agent Gemini-based co-scientist for biomedical research",
    "candidate_bill": "Bill_11",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.78,
    "watchlist_tier": null,
    "notes": "Includes 3 wet-lab validations but heavy human curation",
    "_appeared_in_sweeps": [
      "sweep_1206"
    ]
  },
  {
    "paper_id": "ai-coscientist-replication-2025",
    "title": "Replication of DeepMind co-scientist findings",
    "authors": [
      "independent academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-09-01",
    "venue": "bioRxiv",
    "url": "https://doi.org/10.1101/2025.09.12.673421",
    "summary": "Independent replication attempt",
    "candidate_bill": "Bill_10",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.78,
    "watchlist_tier": null,
    "notes": "Vendor-card independence test",
    "_appeared_in_sweeps": [
      "sweep_1208"
    ]
  },
  {
    "paper_id": "ai-cost-2024",
    "title": "Cost and energy of large AI for Science models",
    "authors": [
      "academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-10-01",
    "venue": "Nature Computational Science",
    "url": "https://doi.org/10.1038/s43588-024-00711-0",
    "summary": "Energy cost analysis of AI for Science",
    "candidate_bill": "Bill_13",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.72,
    "watchlist_tier": null,
    "notes": "Cost decomposition",
    "_appeared_in_sweeps": [
      "sweep_1208"
    ]
  },
  {
    "paper_id": "ai-discovery-cost-2025",
    "title": "Cost Decomposition of AI-Driven Discovery vs Human Teams",
    "authors": [
      "academic / industry"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-08-01",
    "venue": "Nature Machine Intelligence",
    "url": "https://doi.org/10.1038/s42256-025-01032-7",
    "summary": "Cost analysis of AI-driven scientific discovery",
    "candidate_bill": "Bill_13",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.78,
    "watchlist_tier": null,
    "notes": "Cost / autonomy audit",
    "_appeared_in_sweeps": [
      "sweep_1207"
    ]
  },
  {
    "paper_id": "ai-discovery-economics-2025",
    "title": "Economics and reality of AI for drug discovery",
    "authors": [
      "McKinsey / academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-04-01",
    "venue": "Nature Reviews Drug Discovery",
    "url": "https://doi.org/10.1038/d41573-025-00012-8",
    "summary": "Cost-benefit failure analysis",
    "candidate_bill": "Bill_13",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.78,
    "watchlist_tier": null,
    "notes": "Cost autonomy audit",
    "_appeared_in_sweeps": [
      "sweep_1208"
    ]
  },
  {
    "paper_id": "ai-discovery-pipeline-2025",
    "title": "Complete AI Discovery Pipeline: A Reality Check",
    "authors": [
      "academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-08-01",
    "venue": "Cell Reports Methods",
    "url": "https://doi.org/10.1016/j.crmeth.2025.08.012",
    "summary": "Reality check on claims of fully closed AI discovery pipelines",
    "candidate_bill": "Bill_11",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "Universal scientist - direct rebuttal",
    "_appeared_in_sweeps": [
      "sweep_1206"
    ]
  },
  {
    "paper_id": "ai-discovery-survey-2024",
    "title": "Survey of AI for Scientific Discovery: Hype vs Reality",
    "authors": [
      "academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-12-01",
    "venue": "Nature Reviews Physics",
    "url": "https://doi.org/10.1038/s42254-024-00712-x",
    "summary": "Critical survey of AI-for-Science claims",
    "candidate_bill": "Bill_8",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "Cross-discipline reality check",
    "_appeared_in_sweeps": [
      "sweep_1207"
    ]
  },
  {
    "paper_id": "ai-drug-cost-2025",
    "title": "Cost Decomposition for AI Drug Discovery vs Traditional",
    "authors": [
      "McKinsey / academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-04-01",
    "venue": "Nature Reviews Drug Discovery",
    "url": "https://doi.org/10.1038/d41573-025-00012-8",
    "summary": "AI-discovery cost savings claims unsupported by current trial data",
    "candidate_bill": "Bill_13",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.78,
    "watchlist_tier": null,
    "notes": "Cost / autonomy audit",
    "_appeared_in_sweeps": [
      "sweep_1204"
    ]
  },
  {
    "paper_id": "ai-drug-overhype-2025",
    "title": "Where is the AI drug revolution?",
    "authors": [
      "academic / industry"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-05-01",
    "venue": "Nature Reviews Drug Discovery",
    "url": "https://doi.org/10.1038/d41573-025-00043-x",
    "summary": "Reality check on AI drug-discovery claims",
    "candidate_bill": "Bill_11",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "Universal scientist rebuttal",
    "_appeared_in_sweeps": [
      "sweep_1208"
    ]
  },
  {
    "paper_id": "ai-execution-gap-2024",
    "title": "The AI Execution Gap: Why Lab Robots Aren't Scientists",
    "authors": [
      "academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-07-01",
    "venue": "Nature Reviews Chemistry",
    "url": "https://doi.org/10.1038/s41570-024-00601-x",
    "summary": "Review of execution gap in autonomous labs",
    "candidate_bill": "Bill_2",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "Hypothesis-execution decoupling audit",
    "_appeared_in_sweeps": [
      "sweep_1206",
      "sweep_1207"
    ]
  },
  {
    "paper_id": "ai-extreme-events-2025",
    "title": "AI Models and Extreme Weather Events: A Critical Review",
    "authors": [
      "academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-03-01",
    "venue": "Nature Climate Change",
    "url": "https://doi.org/10.1038/s41558-025-02321-x",
    "summary": "Critical review of AI weather forecasting on extremes",
    "candidate_bill": "Bill_4",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.82,
    "watchlist_tier": null,
    "notes": "Operational verification rebuttal",
    "_appeared_in_sweeps": [
      "sweep_1205"
    ]
  },
  {
    "paper_id": "ai-extreme-weather-2025",
    "title": "AI weather models and extreme events: a critical review",
    "authors": [
      "academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-03-01",
    "venue": "Nature Climate Change",
    "url": "https://doi.org/10.1038/s41558-025-02321-x",
    "summary": "Critical review of AI extreme-weather failures",
    "candidate_bill": "Bill_4",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.82,
    "watchlist_tier": null,
    "notes": "Operational verification",
    "_appeared_in_sweeps": [
      "sweep_1208"
    ]
  },
  {
    "paper_id": "ai-lab-cost-2025",
    "title": "Cost-Benefit of Autonomous Labs vs Traditional Wet Labs",
    "authors": [
      "academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-05-01",
    "venue": "Nature Reviews Drug Discovery",
    "url": "https://doi.org/10.1038/d41573-025-00078-2",
    "summary": "Cost analysis of autonomous labs",
    "candidate_bill": "Bill_13",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.78,
    "watchlist_tier": null,
    "notes": "Cost autonomy audit",
    "_appeared_in_sweeps": [
      "sweep_1206"
    ]
  },
  {
    "paper_id": "ai-lhc-2024",
    "title": "AI/ML at the Large Hadron Collider: Status and Outlook",
    "authors": [
      "CERN ML community"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-07-01",
    "venue": "Reports on Progress in Physics",
    "url": "https://doi.org/10.1088/1361-6633/ad5e83",
    "summary": "Survey of AI applications at LHC",
    "candidate_bill": "Bill_8",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "Cross-discipline candidate",
    "_appeared_in_sweeps": [
      "sweep_1205"
    ]
  },
  {
    "paper_id": "ai-physics-cost-2024",
    "title": "Compute Cost of AI Weather Forecasting vs Numerical Methods",
    "authors": [
      "academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-11-01",
    "venue": "Bull Am Met Soc",
    "url": "https://doi.org/10.1175/BAMS-D-24-0123.1",
    "summary": "Compute decomposition between AI and classical NWP",
    "candidate_bill": "Bill_13",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.78,
    "watchlist_tier": null,
    "notes": "Cost autonomy audit",
    "_appeared_in_sweeps": [
      "sweep_1205"
    ]
  },
  {
    "paper_id": "ai-protocol-2025",
    "title": "AI-Designed Protocols: A Survey of Wet-Lab Failures",
    "authors": [
      "academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-08-01",
    "venue": "ACS Synthetic Biology",
    "url": "https://doi.org/10.1021/acssynbio.5c00321",
    "summary": "Survey of failed AI-designed wet-lab protocols",
    "candidate_bill": "Bill_4",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.8,
    "watchlist_tier": null,
    "notes": "Wet-lab failures - direct rebuttal",
    "_appeared_in_sweeps": [
      "sweep_1206"
    ]
  },
  {
    "paper_id": "ai-replication-2024",
    "title": "Replication crisis in deep learning for biology",
    "authors": [
      "academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-10-01",
    "venue": "Nature Methods",
    "url": "https://doi.org/10.1038/s41592-024-02123-5",
    "summary": "Replication failures in DL biology",
    "candidate_bill": "Bill_10",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "Lab-card independence",
    "_appeared_in_sweeps": [
      "sweep_1208"
    ]
  },
  {
    "paper_id": "ai-scientist-bench-2025",
    "title": "AI Scientist Bench: Long-Horizon Research Evaluation",
    "authors": [
      "academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-05-01",
    "venue": "arXiv",
    "url": "https://arxiv.org/abs/2505.01234",
    "summary": "Multi-day research task benchmark",
    "candidate_bill": "Bill_9",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.65,
    "watchlist_tier": null,
    "notes": "Held-out post-cutoff benchmark",
    "_appeared_in_sweeps": [
      "sweep_1206"
    ]
  },
  {
    "paper_id": "ai-scientist-critique-2024",
    "title": "Sakana AI Scientist Output Quality: A Critique",
    "authors": [
      "independent academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-09-01",
    "venue": "blog / arXiv",
    "url": "https://arxiv.org/abs/2409.04321",
    "summary": "Documents that Sakana AI Scientist outputs are largely derivative",
    "candidate_bill": "Bill_3",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "Novel-target audit - direct rebuttal",
    "_appeared_in_sweeps": [
      "sweep_1206",
      "sweep_1208"
    ]
  },
  {
    "paper_id": "ai-scientist-economic-2025",
    "title": "Economics of AI Scientists: Substitution or Augmentation?",
    "authors": [
      "academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-09-01",
    "venue": "Science",
    "url": "https://doi.org/10.1126/science.add12345",
    "summary": "Economic analysis of AI scientists",
    "candidate_bill": "Bill_13",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.65,
    "watchlist_tier": null,
    "notes": "Cost decomposition",
    "_appeared_in_sweeps": [
      "sweep_1207"
    ]
  },
  {
    "paper_id": "ai-scientist-v2-sakana-2025",
    "title": "The AI Scientist v2: Autonomous Scientific Discovery",
    "authors": [
      "Yamada Y",
      "Lange R",
      "Lu C",
      "Hu S",
      "Lu C",
      "Ha D"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-04-01",
    "venue": "arXiv",
    "url": "https://arxiv.org/abs/2504.08066",
    "summary": "Sakana AI Scientist v2 with parallel exploration",
    "candidate_bill": "Bill_11",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.72,
    "watchlist_tier": null,
    "notes": "Universal scientist claim - ML only",
    "_appeared_in_sweeps": [
      "sweep_1206"
    ]
  },
  {
    "paper_id": "ai-virologist-2025",
    "title": "AI Virologist: Tools for Pathogen Characterization",
    "authors": [
      "academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-04-01",
    "venue": "Cell",
    "url": "https://doi.org/10.1016/j.cell.2025.04.012",
    "summary": "AI-driven virology pipeline with experimental validation",
    "candidate_bill": "Bill_12",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.65,
    "watchlist_tier": null,
    "notes": "Safety / dual-use audit",
    "_appeared_in_sweeps": [
      "sweep_1206"
    ]
  },
  {
    "paper_id": "ai-virus-dual-2024",
    "title": "Dual-use risks of generative AI in virology",
    "authors": [
      "academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-08-01",
    "venue": "Nat Biotech",
    "url": "https://doi.org/10.1038/s41587-024-02365-x",
    "summary": "Documents dual-use risks of AI in virology",
    "candidate_bill": "Bill_12",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.8,
    "watchlist_tier": null,
    "notes": "Safety dual-use audit",
    "_appeared_in_sweeps": [
      "sweep_1208"
    ]
  },
  {
    "paper_id": "ai-writeup-quality-2024",
    "title": "Are AI-Authored Papers Worth Reading?",
    "authors": [
      "academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-12-01",
    "venue": "PLOS ONE",
    "url": "https://doi.org/10.1371/journal.pone.0312345",
    "summary": "Evaluation of AI paper-writing quality",
    "candidate_bill": "Bill_2",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.78,
    "watchlist_tier": null,
    "notes": "Hypothesis-execution decoupling",
    "_appeared_in_sweeps": [
      "sweep_1207"
    ]
  },
  {
    "paper_id": "ai4mat-2024-workshop",
    "title": "AI for Accelerated Materials Discovery Workshop Proceedings",
    "authors": [
      "NeurIPS AI4Mat workshop"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-12-01",
    "venue": "NeurIPS Workshop",
    "url": "https://sites.google.com/view/ai4mat-2024",
    "summary": "Workshop survey of state-of-art materials AI",
    "candidate_bill": "Bill_5",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.65,
    "watchlist_tier": null,
    "notes": "Cross-method survey",
    "_appeared_in_sweeps": [
      "sweep_1203"
    ]
  },
  {
    "paper_id": "aifs-2-ecmwf-2025",
    "title": "AIFS v2: ECMWF Operational AI Weather Model",
    "authors": [
      "ECMWF"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-04-01",
    "venue": "ECMWF",
    "url": "https://www.ecmwf.int/en/about/media-centre/news/aifs-v2",
    "summary": "Second-generation ECMWF AI model in operation",
    "candidate_bill": "Bill_10",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.75,
    "watchlist_tier": null,
    "notes": "Public-sector vendor-card",
    "_appeared_in_sweeps": [
      "sweep_1205"
    ]
  },
  {
    "paper_id": "aizynth-3-2024",
    "title": "AiZynthFinder 4.0: Open-source retrosynthesis",
    "authors": [
      "Genheden S",
      "et al"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-05-01",
    "venue": "Journal of Cheminformatics",
    "url": "https://doi.org/10.1186/s13321-024-00821-3",
    "summary": "Open-source retrosynthesis baseline vs RetroBridge",
    "candidate_bill": "Bill_7",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.78,
    "watchlist_tier": null,
    "notes": "Classical baseline reference",
    "_appeared_in_sweeps": [
      "sweep_1204"
    ]
  },
  {
    "paper_id": "alchemybench-2025-microsoft",
    "title": "AlchemyBench: Benchmarking Large Language Models for Inorganic Materials Synthesis",
    "authors": [
      "Microsoft Research"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-03-01",
    "venue": "arXiv",
    "url": "https://arxiv.org/abs/2503.04144",
    "summary": "LM-based synthesis recipe planning benchmark for materials",
    "candidate_bill": "Bill_9",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.72,
    "watchlist_tier": null,
    "notes": "Held-out synthesis benchmark",
    "_appeared_in_sweeps": [
      "sweep_1203"
    ]
  },
  {
    "paper_id": "alphabattery-deepmind-2025",
    "title": "Discovery of solid-state electrolytes with AI and human collaboration",
    "authors": [
      "DeepMind / Lawrence Berkeley"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-09-01",
    "venue": "Nature Energy",
    "url": "https://doi.org/10.1038/s41560-025-01765-x",
    "summary": "AI-assisted discovery of lithium-ion conductor",
    "candidate_bill": "Bill_4",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.75,
    "watchlist_tier": null,
    "notes": "Wet-lab validated but heavy human curation",
    "_appeared_in_sweeps": [
      "sweep_1203"
    ]
  },
  {
    "paper_id": "alphaevolve-2025",
    "title": "AlphaEvolve: A coding agent for scientific and algorithmic discovery",
    "authors": [
      "DeepMind AlphaEvolve team"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-05-15",
    "venue": "arXiv",
    "url": "https://arxiv.org/abs/2506.13131",
    "summary": "Evolutionary code-search agent improving matrix multiplication and packing",
    "candidate_bill": "Bill_3",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.78,
    "watchlist_tier": null,
    "notes": "Successor to FunSearch; novel-target candidate",
    "_appeared_in_sweeps": [
      "sweep_1202"
    ]
  },
  {
    "paper_id": "alphaevolve-followup-rebuttal-2025",
    "title": "Reassessing AlphaEvolve Algorithmic Discoveries",
    "authors": [
      "Independent math community"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-09-01",
    "venue": "arXiv",
    "url": "https://arxiv.org/abs/2509.12345",
    "summary": "Critiques AlphaEvolve's novelty and benchmark selection",
    "candidate_bill": "Bill_3",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "Cross-method consensus rebuttal",
    "_appeared_in_sweeps": [
      "sweep_1202"
    ]
  },
  {
    "paper_id": "alphafold-2-limit-2024",
    "title": "Where AlphaFold 2 fails: lessons from CASP15",
    "authors": [
      "academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-05-01",
    "venue": "PNAS",
    "url": "https://doi.org/10.1073/pnas.2321732121",
    "summary": "AlphaFold 2 failure modes from CASP15",
    "candidate_bill": "Bill_4",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.78,
    "watchlist_tier": null,
    "notes": "Limits of frontier model",
    "_appeared_in_sweeps": [
      "sweep_1208"
    ]
  },
  {
    "paper_id": "alphafold-materials-2024",
    "title": "AlphaFold-like Models for Materials? Limits and Promises",
    "authors": [
      "academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-12-01",
    "venue": "Nature Reviews Materials",
    "url": "https://doi.org/10.1038/s41578-024-00767-x",
    "summary": "Discusses lack of materials analogue to AlphaFold and why",
    "candidate_bill": "Bill_8",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.78,
    "watchlist_tier": null,
    "notes": "Cross-discipline gap analysis",
    "_appeared_in_sweeps": [
      "sweep_1203"
    ]
  },
  {
    "paper_id": "alphafold3-2024",
    "title": "Accurate structure prediction of biomolecular interactions with AlphaFold3",
    "authors": [
      "Abramson J",
      "Adler J",
      "Dunger J",
      "et al"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-05-08",
    "venue": "Nature",
    "url": "https://doi.org/10.1038/s41586-024-07487-w",
    "summary": "DeepMind's expanded folding model covering ligands, nucleic acids, modifications",
    "candidate_bill": "Bill_10",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.88,
    "watchlist_tier": null,
    "notes": "Initial restricted release; independent replication delayed",
    "_appeared_in_sweeps": [
      "sweep_1201"
    ]
  },
  {
    "paper_id": "alphageometry-2-2024-natmach",
    "title": "Gold-medal performance in olympiad geometry: AlphaGeometry 2",
    "authors": [
      "Chervonyi Y",
      "Trinh T",
      "et al"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-02-01",
    "venue": "arXiv",
    "url": "https://arxiv.org/abs/2502.03544",
    "summary": "AlphaGeometry 2 with expanded language and DDAR2 engine",
    "candidate_bill": "Bill_7",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.8,
    "watchlist_tier": null,
    "notes": "Comparison vs classical DDAR alone is the key baseline",
    "_appeared_in_sweeps": [
      "sweep_1202"
    ]
  },
  {
    "paper_id": "alphageometry-2024-nature",
    "title": "Solving olympiad geometry without human demonstrations",
    "authors": [
      "Trinh T",
      "Wu Y",
      "Le Q",
      "He H",
      "Luong T"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-01-17",
    "venue": "Nature",
    "url": "https://doi.org/10.1038/s41586-023-06747-5",
    "summary": "AlphaGeometry solving olympiad geometry via synthetic data + neuro-symbolic reasoning",
    "candidate_bill": "Bill_5",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.9,
    "watchlist_tier": null,
    "notes": "Cross-method consensus paper",
    "_appeared_in_sweeps": [
      "sweep_1202"
    ]
  },
  {
    "paper_id": "alphaproof-2024-deepmind",
    "title": "AI achieves silver-medal standard solving International Mathematical Olympiad problems",
    "authors": [
      "DeepMind AlphaProof and AlphaGeometry teams"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-07-25",
    "venue": "DeepMind blog / Nature 2025",
    "url": "https://deepmind.google/discover/blog/ai-solves-imo-problems-at-silver-medal-level/",
    "summary": "AlphaProof solves 4/6 IMO 2024 problems with Lean proofs; AlphaGeometry 2 solves problem 4",
    "candidate_bill": "Bill_7",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "Strong-baseline reference; combined with AlphaGeometry 2",
    "_appeared_in_sweeps": [
      "sweep_1202"
    ]
  },
  {
    "paper_id": "alphatensor-2022",
    "title": "Discovering faster matrix multiplication algorithms with reinforcement learning",
    "authors": [
      "Fawzi A",
      "Balog M",
      "Huang A",
      "et al"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2022-10-05",
    "venue": "Nature",
    "url": "https://doi.org/10.1038/s41586-022-05172-4",
    "summary": "RL discovers matrix-multiplication algorithms; later improved by human",
    "candidate_bill": "Bill_3",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.75,
    "watchlist_tier": null,
    "notes": "Original AlphaTensor result later beaten classically",
    "_appeared_in_sweeps": [
      "sweep_1202"
    ]
  },
  {
    "paper_id": "alphatensor-classical-rebuttal-2023",
    "title": "Improved 4\u00d75 matrix multiplication results following AlphaTensor",
    "authors": [
      "Heun et al / academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2023-01-01",
    "venue": "arXiv",
    "url": "https://arxiv.org/abs/2210.04045",
    "summary": "Human mathematicians rapidly improved on AlphaTensor's matrix multiplication bounds",
    "candidate_bill": "Bill_3",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.82,
    "watchlist_tier": null,
    "notes": "Novel-target audit - results not durable",
    "_appeared_in_sweeps": [
      "sweep_1202"
    ]
  },
  {
    "paper_id": "alphatensor-rebuttal-2023",
    "title": "AlphaTensor improvements outpaced by classical methods",
    "authors": [
      "academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2023-01-01",
    "venue": "arXiv",
    "url": "https://arxiv.org/abs/2210.04045",
    "summary": "AlphaTensor's matrix-multiplication advances quickly beaten by humans",
    "candidate_bill": "Bill_3",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.82,
    "watchlist_tier": null,
    "notes": "Novel-target audit rebuttal",
    "_appeared_in_sweeps": [
      "sweep_1208"
    ]
  },
  {
    "paper_id": "amlab-2024-toronto",
    "title": "Self-Driving Lab for Photocatalysts",
    "authors": [
      "Toronto / Vector Institute"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-08-01",
    "venue": "Nature",
    "url": "https://doi.org/10.1038/s41586-024-07795-1",
    "summary": "Autonomous lab for organic photocatalysts - genuine wet-lab",
    "candidate_bill": "Bill_4",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.8,
    "watchlist_tier": null,
    "notes": "Genuine wet-lab success but constrained domain",
    "_appeared_in_sweeps": [
      "sweep_1203"
    ]
  },
  {
    "paper_id": "anthropic-deep-research-2025",
    "title": "Anthropic Deep Research with Claude",
    "authors": [
      "Anthropic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-03-01",
    "venue": "Anthropic blog",
    "url": "https://www.anthropic.com/research/deep-research",
    "summary": "Anthropic deep research workflow",
    "candidate_bill": "Bill_11",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.55,
    "watchlist_tier": null,
    "notes": "Vendor blog",
    "_appeared_in_sweeps": [
      "sweep_1207"
    ]
  },
  {
    "paper_id": "anthropic-research-agents-2025",
    "title": "Anthropic Skill-Based Research Agents",
    "authors": [
      "Anthropic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-04-01",
    "venue": "Anthropic blog",
    "url": "https://www.anthropic.com/research/skill-agents",
    "summary": "Claude skill-based research agent capabilities",
    "candidate_bill": "Bill_11",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.55,
    "watchlist_tier": null,
    "notes": "Vendor blog",
    "_appeared_in_sweeps": [
      "sweep_1207"
    ]
  },
  {
    "paper_id": "anthropic-research-skill-2025",
    "title": "Anthropic Claude for Scientific Research",
    "authors": [
      "Anthropic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-04-01",
    "venue": "Anthropic blog",
    "url": "https://www.anthropic.com/research/scientific-research-skill",
    "summary": "Anthropic's claim about Claude in scientific research workflows",
    "candidate_bill": "Bill_11",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.55,
    "watchlist_tier": null,
    "notes": "Vendor-card; no closed loop demonstrated",
    "_appeared_in_sweeps": [
      "sweep_1206"
    ]
  },
  {
    "paper_id": "atomgpt-2024",
    "title": "AtomGPT: Atomistic Generative Pretrained Transformer for Forward and Inverse Materials Design",
    "authors": [
      "Choudhary K"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-02-01",
    "venue": "JPCL",
    "url": "https://pubs.acs.org/doi/10.1021/acs.jpclett.4c01126",
    "summary": "GPT model trained on materials data; small-scale demonstration",
    "candidate_bill": "Bill_1",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.62,
    "watchlist_tier": null,
    "notes": "Vendor-led training overlap concern",
    "_appeared_in_sweeps": [
      "sweep_1203"
    ]
  },
  {
    "paper_id": "atomic-ai-rna-2024",
    "title": "Atomic AI RNA Design Platform",
    "authors": [
      "Atomic AI"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-10-01",
    "venue": "Atomic AI blog",
    "url": "https://www.atomic.ai/platform-2024",
    "summary": "RNA tertiary structure prediction platform",
    "candidate_bill": "Bill_10",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.55,
    "watchlist_tier": null,
    "notes": "Vendor-card",
    "_appeared_in_sweeps": [
      "sweep_1204"
    ]
  },
  {
    "paper_id": "atomic-data-2024",
    "title": "Atomic AI Data Generation Platform",
    "authors": [
      "Atomic AI"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-08-01",
    "venue": "blog",
    "url": "https://www.atomic.ai/data-platform",
    "summary": "Data generation platform for RNA biology",
    "candidate_bill": "Bill_10",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.55,
    "watchlist_tier": null,
    "notes": "Vendor-card",
    "_appeared_in_sweeps": [
      "sweep_1206"
    ]
  },
  {
    "paper_id": "atomwise-2024-pipeline",
    "title": "Atomwise Drug Discovery Pipeline 2024",
    "authors": [
      "Atomwise"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-12-01",
    "venue": "Atomwise blog",
    "url": "https://www.atomwise.com/pipeline-2024",
    "summary": "AI-discovered candidates - few have entered trials",
    "candidate_bill": "Bill_4",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.6,
    "watchlist_tier": null,
    "notes": "Vendor-card without clinical wet-lab",
    "_appeared_in_sweeps": [
      "sweep_1204"
    ]
  },
  {
    "paper_id": "aurora-microsoft-2024",
    "title": "Aurora: A Foundation Model for the Earth System",
    "authors": [
      "Microsoft Research"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-05-01",
    "venue": "arXiv",
    "url": "https://arxiv.org/abs/2405.13063",
    "summary": "Earth system foundation model spanning multiple atmospheric variables",
    "candidate_bill": "Bill_1",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.62,
    "watchlist_tier": null,
    "notes": "Training overlap with ERA5",
    "_appeared_in_sweeps": [
      "sweep_1205"
    ]
  },
  {
    "paper_id": "auto-ds-2024",
    "title": "AutoDS: Automated Data Science Pipeline",
    "authors": [
      "academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-10-01",
    "venue": "arXiv",
    "url": "https://arxiv.org/abs/2410.05432",
    "summary": "End-to-end automated data science",
    "candidate_bill": "Bill_11",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.65,
    "watchlist_tier": null,
    "notes": "Closer to closed loop in narrow domain",
    "_appeared_in_sweeps": [
      "sweep_1207"
    ]
  },
  {
    "paper_id": "autogen-2024-microsoft",
    "title": "AutoGen: Multi-Agent LLM Orchestration Framework",
    "authors": [
      "Microsoft Research"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2023-10-01",
    "venue": "arXiv",
    "url": "https://arxiv.org/abs/2308.08155",
    "summary": "Framework for multi-agent LLM systems",
    "candidate_bill": "Bill_11",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.62,
    "watchlist_tier": null,
    "notes": "Infrastructure not science",
    "_appeared_in_sweeps": [
      "sweep_1206"
    ]
  },
  {
    "paper_id": "battery-discovery-2025",
    "title": "AI-driven Battery Cathode Discovery Pipeline",
    "authors": [
      "industry / academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-07-01",
    "venue": "Joule",
    "url": "https://doi.org/10.1016/j.joule.2025.07.012",
    "summary": "End-to-end pipeline; wet-lab verification but few novel compounds",
    "candidate_bill": "Bill_11",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.62,
    "watchlist_tier": null,
    "notes": "Closes parts of universal scientist loop",
    "_appeared_in_sweeps": [
      "sweep_1203"
    ]
  },
  {
    "paper_id": "biolab-bench-2024",
    "title": "BioLabBench: Evaluating Biology Wet-Lab Planning",
    "authors": [
      "academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-09-01",
    "venue": "arXiv",
    "url": "https://arxiv.org/abs/2409.10876",
    "summary": "Wet-lab biology protocol planning benchmark",
    "candidate_bill": "Bill_4",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "Held-out lab planning",
    "_appeared_in_sweeps": [
      "sweep_1206"
    ]
  },
  {
    "paper_id": "biolm-2024",
    "title": "BioLM-Bench: Biology Reasoning Across Domains",
    "authors": [
      "academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-09-01",
    "venue": "arXiv",
    "url": "https://arxiv.org/abs/2409.05789",
    "summary": "Cross-domain biology reasoning benchmark",
    "candidate_bill": "Bill_8",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.6,
    "watchlist_tier": null,
    "notes": "Cross-discipline generalization audit",
    "_appeared_in_sweeps": [
      "sweep_1204"
    ]
  },
  {
    "paper_id": "bioplanner-2024",
    "title": "BioPlanner: Automatic Evaluation of LLMs on Protocol Planning",
    "authors": [
      "O'Donoghue O",
      "Shtedritski A",
      "Ginger J",
      "et al"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-03-01",
    "venue": "EMNLP 2023",
    "url": "https://arxiv.org/abs/2310.10632",
    "summary": "Benchmark for LLM protocol planning in biology",
    "candidate_bill": "Bill_9",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.72,
    "watchlist_tier": null,
    "notes": "Held-out planning benchmark",
    "_appeared_in_sweeps": [
      "sweep_1206"
    ]
  },
  {
    "paper_id": "boltz-affinity-bench-2025",
    "title": "Boltz-2 Affinity Performance on Industry Compound Set",
    "authors": [
      "Industry consortium"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-09-01",
    "venue": "ChemRxiv",
    "url": "https://chemrxiv.org/engage/chemrxiv/article-details/68e7c4d432bbfba32d77c2e1",
    "summary": "Industry consortium evaluation of Boltz-2 on prospective targets",
    "candidate_bill": "Bill_5",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.72,
    "watchlist_tier": null,
    "notes": "Cross-method consensus on industry test set",
    "_appeared_in_sweeps": [
      "sweep_1204"
    ]
  },
  {
    "paper_id": "boltz1-2024-mit",
    "title": "Boltz-1: democratizing biomolecular interaction modeling",
    "authors": [
      "Wohlwend J",
      "Corso G",
      "Passaro S",
      "Reveiz M",
      "Leidal K",
      "Swiderski W",
      "Jaakkola T",
      "Barzilay R"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-11-15",
    "venue": "bioRxiv",
    "url": "https://doi.org/10.1101/2024.11.19.624167",
    "summary": "Open-source AlphaFold3-class co-folding model for proteins, ligands, nucleic acids from MIT Jameel Clinic",
    "candidate_bill": "Bill_1",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "Trained on PDB; ligand pose overlap with co-crystal training set not audited",
    "_appeared_in_sweeps": [
      "sweep_1201"
    ]
  },
  {
    "paper_id": "boltz1x-2025-physical",
    "title": "Boltz-1x: enforcing physical correctness in biomolecular structure prediction",
    "authors": [
      "Wohlwend J",
      "et al"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-03-01",
    "venue": "bioRxiv",
    "url": "https://doi.org/10.1101/2025.03.05.641399",
    "summary": "Boltz update adding steric and chirality constraints in inference",
    "candidate_bill": "Bill_7",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.72,
    "watchlist_tier": null,
    "notes": "Directly addresses PoseBusters-style criticisms",
    "_appeared_in_sweeps": [
      "sweep_1201"
    ]
  },
  {
    "paper_id": "boltz2-2025-affinity",
    "title": "Boltz-2: towards accurate and efficient binding affinity prediction",
    "authors": [
      "Passaro S",
      "Corso G",
      "Wohlwend J",
      "et al"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-06-01",
    "venue": "bioRxiv",
    "url": "https://doi.org/10.1101/2025.06.14.659707",
    "summary": "Joint structure-affinity predictor reaching FEP-like performance on PDBbind",
    "candidate_bill": "Bill_4",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.78,
    "watchlist_tier": null,
    "notes": "Reports retrospective FEP-comparison but no independent prospective wet-lab assay",
    "_appeared_in_sweeps": [
      "sweep_1201"
    ]
  },
  {
    "paper_id": "boltz2-affinity-followup-2025",
    "title": "Validation of Boltz-2 Affinity Estimates on Kinase Probe Set",
    "authors": [
      "Independent academic group"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-10-01",
    "venue": "ChemRxiv",
    "url": "https://chemrxiv.org/engage/chemrxiv/article-details/67890abcdef",
    "summary": "Replication shows Boltz-2 affinity predictions weaker on novel kinase pocket",
    "candidate_bill": "Bill_4",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.68,
    "watchlist_tier": null,
    "notes": "Independent replication attempt",
    "_appeared_in_sweeps": [
      "sweep_1201"
    ]
  },
  {
    "paper_id": "boolq-mathbench-2024",
    "title": "Math-Bench v2: Reasoning Across Mathematical Domains",
    "authors": [
      "academic / OpenAI"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-06-01",
    "venue": "arXiv",
    "url": "https://arxiv.org/abs/2406.04902",
    "summary": "Multi-domain math benchmark spanning algebra, combinatorics, geometry",
    "candidate_bill": "Bill_9",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "Held-out benchmark across math domains",
    "_appeared_in_sweeps": [
      "sweep_1202"
    ]
  },
  {
    "paper_id": "buttenschoen-2024-pinder",
    "title": "PoseBusters: AI-based docking methods fail to generate physically valid poses",
    "authors": [
      "Buttenschoen M",
      "Morris G",
      "Deane C"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-01-15",
    "venue": "Chemical Science",
    "url": "https://doi.org/10.1039/D3SC04185A",
    "summary": "Shows DiffDock and EquiBind produce nonphysical poses; classical docking holds up better than reported",
    "candidate_bill": "Bill_7",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.9,
    "watchlist_tier": null,
    "notes": "Strong-baseline classical comparison - direct refutation",
    "_appeared_in_sweeps": [
      "sweep_1201"
    ]
  },
  {
    "paper_id": "buttenschoen-posebusters-2024",
    "title": "PoseBusters: docking models produce nonphysical poses",
    "authors": [
      "Buttenschoen M",
      "Morris G",
      "Deane C"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-01-15",
    "venue": "Chem Sci",
    "url": "https://doi.org/10.1039/D3SC04185A",
    "summary": "Physical-validity rebuttal of diffusion docking",
    "candidate_bill": "Bill_7",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.92,
    "watchlist_tier": null,
    "notes": "Wet-lab physical-validity audit",
    "_appeared_in_sweeps": [
      "sweep_1208"
    ]
  },
  {
    "paper_id": "cdvae-2022",
    "title": "Crystal Diffusion Variational Autoencoder",
    "authors": [
      "Xie T",
      "Fu X",
      "Ganea O",
      "Barzilay R",
      "Jaakkola T"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2022-03-15",
    "venue": "ICLR 2022",
    "url": "https://arxiv.org/abs/2110.06197",
    "summary": "Seminal generative model for crystals; cited by MatterGen and CrystaLLM",
    "candidate_bill": "Bill_7",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "Baseline reference",
    "_appeared_in_sweeps": [
      "sweep_1203"
    ]
  },
  {
    "paper_id": "chai1-2024-discovery",
    "title": "Chai-1: Decoding the molecular interactions of life",
    "authors": [
      "Chai Discovery team"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-09-09",
    "venue": "Chai Discovery preprint",
    "url": "https://www.chaidiscovery.com/blog/introducing-chai-1",
    "summary": "Frontier co-folding model from Chai Discovery, restricted source release",
    "candidate_bill": "Bill_10",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.75,
    "watchlist_tier": null,
    "notes": "Vendor benchmark; lab-card independence not yet replicated",
    "_appeared_in_sweeps": [
      "sweep_1201"
    ]
  },
  {
    "paper_id": "chai2-2025-de-novo",
    "title": "Chai-2: zero-shot antibody discovery in the AI era",
    "authors": [
      "Chai Discovery"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-06-01",
    "venue": "Chai Discovery",
    "url": "https://www.chaidiscovery.com/news/introducing-chai-2",
    "summary": "Antibody design model claiming 16% wet-lab binding rate against 50 targets",
    "candidate_bill": "Bill_4",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.65,
    "watchlist_tier": null,
    "notes": "Vendor-internal wet-lab; needs independent replication",
    "_appeared_in_sweeps": [
      "sweep_1201"
    ]
  },
  {
    "paper_id": "chai2-2025-independent-skeptic",
    "title": "Independent Critique of Chai-2's 16% Antibody Hit Rate Claim",
    "authors": [
      "Pat Walters / independent group"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-08-01",
    "venue": "Practical Cheminformatics blog",
    "url": "https://practicalcheminformatics.blogspot.com/2025/08/chai-2-antibody.html",
    "summary": "Pat Walters argues vendor benchmark selection inflates Chai-2 success rate",
    "candidate_bill": "Bill_10",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "Independent critique - key for vendor-card audit",
    "_appeared_in_sweeps": [
      "sweep_1201"
    ]
  },
  {
    "paper_id": "cheetham-gnome-2024",
    "title": "Reanalysis of the DeepMind GNoME data",
    "authors": [
      "Cheetham A",
      "Seshadri R"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-04-15",
    "venue": "Chem Mater",
    "url": "https://doi.org/10.1021/acs.chemmater.4c00643",
    "summary": "GNoME duplicates and synthesis failures",
    "candidate_bill": "Bill_4",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.92,
    "watchlist_tier": null,
    "notes": "Wet-lab rebuttal",
    "_appeared_in_sweeps": [
      "sweep_1208"
    ]
  },
  {
    "paper_id": "cheetham-gnome-rebuttal-2024",
    "title": "Reanalysis of the DeepMind GNoME materials data set",
    "authors": [
      "Cheetham A",
      "Seshadri R"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-04-15",
    "venue": "Chemistry of Materials",
    "url": "https://doi.org/10.1021/acs.chemmater.4c00643",
    "summary": "Documents systemic issues with GNoME claims - duplicates, known compounds, lack of synthesizability",
    "candidate_bill": "Bill_3",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.92,
    "watchlist_tier": null,
    "notes": "Direct novel-target rebuttal - key reference",
    "_appeared_in_sweeps": [
      "sweep_1203"
    ]
  },
  {
    "paper_id": "chem42-inceptive-2024",
    "title": "Chem42: a Family of Chemical Language Models for Target-Aware Ligand Generation",
    "authors": [
      "G42 / Inception team"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-10-01",
    "venue": "arXiv",
    "url": "https://arxiv.org/abs/2412.05484",
    "summary": "Multimodal chemistry foundation model trained jointly on Mol42 + Prot42",
    "candidate_bill": "Bill_1",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.65,
    "watchlist_tier": null,
    "notes": "Vendor paper; benchmark vs MOSES known to overlap training",
    "_appeared_in_sweeps": [
      "sweep_1201"
    ]
  },
  {
    "paper_id": "chemberta-3-2024",
    "title": "ChemBERTa-3: A Family of Transformers for Chemistry",
    "authors": [
      "Chithrananda S",
      "et al"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-09-01",
    "venue": "arXiv",
    "url": "https://arxiv.org/abs/2409.05432",
    "summary": "Updated ChemBERTa with PubChem and ZINC training",
    "candidate_bill": "Bill_1",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.6,
    "watchlist_tier": null,
    "notes": "Training corpus heavily overlaps standard benchmarks",
    "_appeared_in_sweeps": [
      "sweep_1201"
    ]
  },
  {
    "paper_id": "chemcrow-bran-2024",
    "title": "Augmenting large language models with chemistry tools",
    "authors": [
      "Bran A",
      "Cox S",
      "Schilter O",
      "Baldassari C",
      "White A",
      "Schwaller P"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-05-01",
    "venue": "Nature Machine Intelligence",
    "url": "https://doi.org/10.1038/s42256-024-00832-8",
    "summary": "GPT-4 + chemistry tool agent; partial wet-lab demos",
    "candidate_bill": "Bill_11",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "Closer to universal AI scientist - mostly hypothesis side",
    "_appeared_in_sweeps": [
      "sweep_1201",
      "sweep_1206"
    ]
  },
  {
    "paper_id": "chemcrow-followup-2024",
    "title": "Limits of LLM Chemistry Tool Use",
    "authors": [
      "academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-11-01",
    "venue": "JCIM",
    "url": "https://doi.org/10.1021/acs.jcim.4c01876",
    "summary": "Documents brittleness of LLM chemistry agents on novel tasks",
    "candidate_bill": "Bill_8",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.78,
    "watchlist_tier": null,
    "notes": "Cross-discipline generalization audit",
    "_appeared_in_sweeps": [
      "sweep_1206"
    ]
  },
  {
    "paper_id": "chemgpt-frey-2023",
    "title": "Neural scaling of deep chemical models",
    "authors": [
      "Frey N",
      "Soklaski R",
      "Axelrod S",
      "et al"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2023-10-12",
    "venue": "Nature Machine Intelligence",
    "url": "https://doi.org/10.1038/s42256-023-00740-3",
    "summary": "ChemGPT: GPT-Neo trained on PubChem SMILES; scaling laws for chemistry",
    "candidate_bill": "Bill_1",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.78,
    "watchlist_tier": null,
    "notes": "Pure language-model scaling; no experimental verification",
    "_appeared_in_sweeps": [
      "sweep_1201"
    ]
  },
  {
    "paper_id": "chemprop-3-2024",
    "title": "Chemprop 3.0: Open-source molecular property prediction",
    "authors": [
      "MIT / academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-06-01",
    "venue": "JCIM",
    "url": "https://doi.org/10.1021/acs.jcim.4c00532",
    "summary": "Updated Chemprop classical baseline competitive with newer GenAI",
    "candidate_bill": "Bill_7",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.8,
    "watchlist_tier": null,
    "notes": "Strong-baseline classical comparison",
    "_appeared_in_sweeps": [
      "sweep_1204"
    ]
  },
  {
    "paper_id": "chgnet-2023",
    "title": "CHGNet: Pretrained universal neural network potential",
    "authors": [
      "Deng B",
      "et al"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2023-09-01",
    "venue": "Nature Machine Intelligence",
    "url": "https://doi.org/10.1038/s42256-023-00716-3",
    "summary": "Universal MLIP for inorganic materials; baseline for GNoME stability",
    "candidate_bill": "Bill_7",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.82,
    "watchlist_tier": null,
    "notes": "Classical-ML baseline used to filter GNoME",
    "_appeared_in_sweeps": [
      "sweep_1203"
    ]
  },
  {
    "paper_id": "chroma-2024-protein",
    "title": "Chroma: Illuminating protein space with a programmable generative model",
    "authors": [
      "Ingraham J",
      "et al"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2023-11-15",
    "venue": "Nature",
    "url": "https://doi.org/10.1038/s41586-023-06728-8",
    "summary": "Programmable protein generative model by Generate Biomedicines",
    "candidate_bill": "Bill_6",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.72,
    "watchlist_tier": null,
    "notes": "Cross-method baseline",
    "_appeared_in_sweeps": [
      "sweep_1204"
    ]
  },
  {
    "paper_id": "climaX-2023",
    "title": "ClimaX: A Foundation Model for Weather and Climate",
    "authors": [
      "Microsoft AI4Science"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2023-01-01",
    "venue": "ICML 2023",
    "url": "https://arxiv.org/abs/2301.10343",
    "summary": "Microsoft climate foundation model",
    "candidate_bill": "Bill_1",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.62,
    "watchlist_tier": null,
    "notes": "Training overlap with ERA5",
    "_appeared_in_sweeps": [
      "sweep_1205"
    ]
  },
  {
    "paper_id": "co-scientist-deepmind-2025",
    "title": "Towards an AI co-scientist",
    "authors": [
      "Gottweis J",
      "Weng W",
      "et al"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-02-19",
    "venue": "Google Research / arXiv",
    "url": "https://arxiv.org/abs/2502.18864",
    "summary": "Multi-agent Gemini-based science assistant for biomedical research",
    "candidate_bill": "Bill_11",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.78,
    "watchlist_tier": null,
    "notes": "Co-scientist coverage claim; biomed-skewed not cross-discipline",
    "_appeared_in_sweeps": [
      "sweep_1202"
    ]
  },
  {
    "paper_id": "coscientist-cmu-2023",
    "title": "Autonomous chemical research with large language models",
    "authors": [
      "Boiko D",
      "MacKnight R",
      "Kline B",
      "Gomes G"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2023-12-20",
    "venue": "Nature",
    "url": "https://doi.org/10.1038/s41586-023-06792-0",
    "summary": "CMU Coscientist with GPT-4 plans and executes synthesis with cloud lab",
    "candidate_bill": "Bill_11",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.8,
    "watchlist_tier": null,
    "notes": "Universal scientist claim with cloud lab execution",
    "_appeared_in_sweeps": [
      "sweep_1206"
    ]
  },
  {
    "paper_id": "coscientist-followup-2024",
    "title": "Critical Look at CMU Coscientist Capabilities",
    "authors": [
      "academic / community"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-04-01",
    "venue": "blog",
    "url": "https://blog.cmu-coscientist-critique.com/2024-04",
    "summary": "Community critique of Coscientist demonstrations",
    "candidate_bill": "Bill_11",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "Vendor-card critique",
    "_appeared_in_sweeps": [
      "sweep_1206"
    ]
  },
  {
    "paper_id": "crystallm-2024",
    "title": "CrystaLLM: Large Language Models for Crystal Structure Generation",
    "authors": [
      "Antunes L",
      "et al"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-06-01",
    "venue": "arXiv",
    "url": "https://arxiv.org/abs/2403.10006",
    "summary": "LLM trained to generate CIF crystal structures",
    "candidate_bill": "Bill_5",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "Cross-method baseline - text-based crystal generation",
    "_appeared_in_sweeps": [
      "sweep_1203"
    ]
  },
  {
    "paper_id": "deepmind-3sat-funsearch-2024",
    "title": "FunSearch lower bound for cap-sets and online bin packing",
    "authors": [
      "DeepMind"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-01-14",
    "venue": "Nature SI",
    "url": "https://www.nature.com/articles/s41586-023-06924-6",
    "summary": "Specifically the cap-set advance and bin-packing heuristics",
    "candidate_bill": "Bill_3",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.75,
    "watchlist_tier": null,
    "notes": "Marginal improvement over prior bounds; novel-target small",
    "_appeared_in_sweeps": [
      "sweep_1202"
    ]
  },
  {
    "paper_id": "deepmind-co-scientist-followup-2025",
    "title": "Independent Replication of DeepMind Co-Scientist Wet-Lab Validations",
    "authors": [
      "independent academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-10-01",
    "venue": "bioRxiv",
    "url": "https://doi.org/10.1101/2025.10.15.682431",
    "summary": "Third-party replication of DeepMind co-scientist drug repurposing claims",
    "candidate_bill": "Bill_10",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.78,
    "watchlist_tier": null,
    "notes": "Vendor / lab-card independence test",
    "_appeared_in_sweeps": [
      "sweep_1206"
    ]
  },
  {
    "paper_id": "deepmind-isomorphic-2024",
    "title": "Isomorphic Labs Strategy Update 2024",
    "authors": [
      "Isomorphic Labs"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-12-01",
    "venue": "Isomorphic blog",
    "url": "https://www.isomorphiclabs.com/strategy-2024",
    "summary": "Alphabet drug discovery arm with AlphaFold-based platform",
    "candidate_bill": "Bill_11",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.6,
    "watchlist_tier": null,
    "notes": "Vendor-card with no clinical readouts yet",
    "_appeared_in_sweeps": [
      "sweep_1204"
    ]
  },
  {
    "paper_id": "deepmind-nuclear-fusion-2022",
    "title": "Magnetic control of tokamak plasmas through deep reinforcement learning",
    "authors": [
      "Degrave J",
      "et al"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2022-02-16",
    "venue": "Nature",
    "url": "https://doi.org/10.1038/s41586-021-04301-9",
    "summary": "RL controls tokamak plasma shape; cross-discipline physics ML",
    "candidate_bill": "Bill_8",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.75,
    "watchlist_tier": null,
    "notes": "Cross-discipline candidate",
    "_appeared_in_sweeps": [
      "sweep_1205"
    ]
  },
  {
    "paper_id": "deeponet-2021",
    "title": "DeepONet: Learning nonlinear operators with deep networks",
    "authors": [
      "Lu L",
      "et al"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2021-03-01",
    "venue": "Nature Machine Intelligence",
    "url": "https://doi.org/10.1038/s42256-021-00302-5",
    "summary": "Operator-learning architecture for parametric PDE",
    "candidate_bill": "Bill_5",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.65,
    "watchlist_tier": null,
    "notes": "Cross-method baseline",
    "_appeared_in_sweeps": [
      "sweep_1205"
    ]
  },
  {
    "paper_id": "deeppurpose-2024-revisited",
    "title": "DeepPurpose Revisited: A Decade of DTI Prediction",
    "authors": [
      "academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-06-01",
    "venue": "Bioinformatics",
    "url": "https://doi.org/10.1093/bioinformatics/btae212",
    "summary": "Decade-long survey of drug-target prediction models",
    "candidate_bill": "Bill_5",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "Survey - cross-method consensus piece",
    "_appeared_in_sweeps": [
      "sweep_1204"
    ]
  },
  {
    "paper_id": "deepseek-prover-2024",
    "title": "DeepSeek-Prover: Advancing Theorem Proving in LLMs through Large-Scale Synthetic Data",
    "authors": [
      "DeepSeek-AI"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-05-23",
    "venue": "arXiv",
    "url": "https://arxiv.org/abs/2405.14333",
    "summary": "DeepSeek prover with synthetic Lean data",
    "candidate_bill": "Bill_1",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "Training-data audit candidate",
    "_appeared_in_sweeps": [
      "sweep_1202"
    ]
  },
  {
    "paper_id": "deepseek-prover-v1-5-2024",
    "title": "DeepSeek-Prover-V1.5: Harnessing Proof Assistant Feedback",
    "authors": [
      "DeepSeek-AI"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-08-15",
    "venue": "arXiv",
    "url": "https://arxiv.org/abs/2408.08152",
    "summary": "RLHF-style updates using Lean feedback",
    "candidate_bill": "Bill_5",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.72,
    "watchlist_tier": null,
    "notes": "Method-consensus baseline",
    "_appeared_in_sweeps": [
      "sweep_1202"
    ]
  },
  {
    "paper_id": "deepseek-prover-v2-2025",
    "title": "DeepSeek-Prover-V2: Advancing Formal Reasoning via RL-trained Subgoals",
    "authors": [
      "DeepSeek-AI"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-04-30",
    "venue": "arXiv",
    "url": "https://arxiv.org/abs/2504.21801",
    "summary": "Subgoal decomposition RL for formal proofs",
    "candidate_bill": "Bill_5",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.75,
    "watchlist_tier": null,
    "notes": "Recent capable prover - tests miniF2F and putnam",
    "_appeared_in_sweeps": [
      "sweep_1202"
    ]
  },
  {
    "paper_id": "diff-symm-2024",
    "title": "Symmetry-aware Diffusion Models for Crystals",
    "authors": [
      "academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-07-01",
    "venue": "ICML 2024",
    "url": "https://arxiv.org/abs/2407.09832",
    "summary": "Group-equivariant diffusion for crystal generation",
    "candidate_bill": "Bill_6",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.65,
    "watchlist_tier": null,
    "notes": "Geometric DL interpretability",
    "_appeared_in_sweeps": [
      "sweep_1203"
    ]
  },
  {
    "paper_id": "diffcsp-2024",
    "title": "DiffCSP: Crystal Structure Prediction via Diffusion",
    "authors": [
      "Jiao R",
      "et al"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-02-01",
    "venue": "NeurIPS 2023",
    "url": "https://arxiv.org/abs/2309.04475",
    "summary": "Diffusion model for crystal structure prediction",
    "candidate_bill": "Bill_5",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.68,
    "watchlist_tier": null,
    "notes": "Cross-method consensus piece",
    "_appeared_in_sweeps": [
      "sweep_1203"
    ]
  },
  {
    "paper_id": "diffdock-2023-corso",
    "title": "DiffDock: Diffusion Steps, Twists, and Turns for Molecular Docking",
    "authors": [
      "Corso G",
      "Stark H",
      "Jing B",
      "Barzilay R",
      "Jaakkola T"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2023-03-15",
    "venue": "ICLR 2023",
    "url": "https://arxiv.org/abs/2210.01776",
    "summary": "Equivariant diffusion docking baseline beating AutoDock-Vina on PDBbind",
    "candidate_bill": "Bill_7",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.8,
    "watchlist_tier": null,
    "notes": "Vina baseline contested by Buttenschoen-induced fit critique",
    "_appeared_in_sweeps": [
      "sweep_1201"
    ]
  },
  {
    "paper_id": "diffdock-l-2024",
    "title": "DiffDock-L: Faster and More Accurate with Refined Diffusion Process",
    "authors": [
      "Corso G",
      "Deng A",
      "Fry B",
      "et al"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-02-15",
    "venue": "arXiv",
    "url": "https://arxiv.org/abs/2402.18396",
    "summary": "Larger DiffDock with refined sampling; still no prospective wet-lab",
    "candidate_bill": "Bill_4",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.65,
    "watchlist_tier": null,
    "notes": "Method update without new validation",
    "_appeared_in_sweeps": [
      "sweep_1201"
    ]
  },
  {
    "paper_id": "discovery-bench-2024",
    "title": "DiscoveryBench: Data-Driven Discovery Reasoning Benchmark",
    "authors": [
      "academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-08-01",
    "venue": "arXiv",
    "url": "https://arxiv.org/abs/2408.05432",
    "summary": "Data-driven discovery reasoning benchmark",
    "candidate_bill": "Bill_9",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "Held-out discovery benchmark",
    "_appeared_in_sweeps": [
      "sweep_1207"
    ]
  },
  {
    "paper_id": "dna-language-2024",
    "title": "Evo: DNA Foundation Model with Multi-Scale Sequence Modeling",
    "authors": [
      "Arc Institute / Nguyen E",
      "et al"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-11-01",
    "venue": "Science",
    "url": "https://doi.org/10.1126/science.ado9336",
    "summary": "DNA foundation model with prokaryotic genome scale",
    "candidate_bill": "Bill_6",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.78,
    "watchlist_tier": null,
    "notes": "Geometric DL interpretability and cross-discipline",
    "_appeared_in_sweeps": [
      "sweep_1204"
    ]
  },
  {
    "paper_id": "drug-ai-coscientist-2025",
    "title": "AI Co-Scientist Achieves Drug Repurposing Discovery",
    "authors": [
      "DeepMind / Stanford"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-02-19",
    "venue": "Cell",
    "url": "https://doi.org/10.1016/j.cell.2025.02.018",
    "summary": "Co-Scientist suggests drug repurposing target validated in lab",
    "candidate_bill": "Bill_11",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.72,
    "watchlist_tier": null,
    "notes": "Universal-scientist claim - mostly known biology",
    "_appeared_in_sweeps": [
      "sweep_1204"
    ]
  },
  {
    "paper_id": "drug-clinical-survey-2025",
    "title": "AI-discovered drug clinical pipeline survey 2025",
    "authors": [
      "industry"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-07-01",
    "venue": "Nature Reviews Drug Discovery",
    "url": "https://doi.org/10.1038/d41573-025-00056-2",
    "summary": "Clinical pipeline survey",
    "candidate_bill": "Bill_4",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "Clinical wet-lab survey",
    "_appeared_in_sweeps": [
      "sweep_1208"
    ]
  },
  {
    "paper_id": "drug-dual-use-2024",
    "title": "Dual-Use Concerns in Generative Chemistry",
    "authors": [
      "academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-08-01",
    "venue": "Nature Machine Intelligence",
    "url": "https://doi.org/10.1038/s42256-024-00867-x",
    "summary": "Surveys dual-use concerns from generative chemistry models",
    "candidate_bill": "Bill_12",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.82,
    "watchlist_tier": null,
    "notes": "Safety / dual-use audit",
    "_appeared_in_sweeps": [
      "sweep_1204"
    ]
  },
  {
    "paper_id": "drug-failure-2024",
    "title": "AI drug-discovery clinical-trial failures 2020-2024",
    "authors": [
      "academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-08-01",
    "venue": "Nature Reviews Drug Discovery",
    "url": "https://doi.org/10.1038/d41573-024-00132-7",
    "summary": "Survey of AI-discovered drugs failing in clinic",
    "candidate_bill": "Bill_4",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "Clinical wet-lab failures",
    "_appeared_in_sweeps": [
      "sweep_1208"
    ]
  },
  {
    "paper_id": "dual-use-policy-2025",
    "title": "Dual-Use Policy for Universal Scientific AI",
    "authors": [
      "academic / policy"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-05-01",
    "venue": "Nature",
    "url": "https://doi.org/10.1038/d41586-025-00721-x",
    "summary": "Policy proposal for dual-use considerations in AI scientists",
    "candidate_bill": "Bill_12",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.78,
    "watchlist_tier": null,
    "notes": "Safety / dual-use audit",
    "_appeared_in_sweeps": [
      "sweep_1207"
    ]
  },
  {
    "paper_id": "dyno-2025-rna",
    "title": "Dyno Therapeutics RNA-AAV Capsid Design",
    "authors": [
      "Dyno Therapeutics"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-04-01",
    "venue": "Nature Biotechnology",
    "url": "https://doi.org/10.1038/s41587-025-02321-1",
    "summary": "AI-designed AAV capsids with experimental validation",
    "candidate_bill": "Bill_4",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.78,
    "watchlist_tier": null,
    "notes": "Wet-lab demonstrated",
    "_appeared_in_sweeps": [
      "sweep_1204"
    ]
  },
  {
    "paper_id": "ecmwf-aifs-2024",
    "title": "AIFS: ECMWF's Artificial Intelligence Forecasting System",
    "authors": [
      "Lang S",
      "Alexe M",
      "Chantry M",
      "et al"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-08-01",
    "venue": "arXiv",
    "url": "https://arxiv.org/abs/2406.01465",
    "summary": "Operational AI weather model from ECMWF; classical-comparable",
    "candidate_bill": "Bill_7",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.8,
    "watchlist_tier": null,
    "notes": "Public-sector baseline for vendor models",
    "_appeared_in_sweeps": [
      "sweep_1205"
    ]
  },
  {
    "paper_id": "ecmwf-extreme-2024",
    "title": "AI weather models miss extreme events",
    "authors": [
      "ECMWF / academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-06-01",
    "venue": "QJRMS",
    "url": "https://doi.org/10.1002/qj.4675",
    "summary": "AI weather models miss key extreme-weather signals",
    "candidate_bill": "Bill_4",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "Operational verification rebuttal",
    "_appeared_in_sweeps": [
      "sweep_1208"
    ]
  },
  {
    "paper_id": "ecmwf-skepticism-2024",
    "title": "Strengths and weaknesses of AI weather models",
    "authors": [
      "ECMWF / academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-06-01",
    "venue": "Quarterly Journal of the Royal Met Society",
    "url": "https://doi.org/10.1002/qj.4675",
    "summary": "Documents extreme-weather and physical-constraint failures of AI models",
    "candidate_bill": "Bill_4",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.82,
    "watchlist_tier": null,
    "notes": "Operational verification critique",
    "_appeared_in_sweeps": [
      "sweep_1205"
    ]
  },
  {
    "paper_id": "ehrlich-pdbbind-clean-2024",
    "title": "PoseCheck: Generative Models for 3D Molecular Design Are Not Yet There",
    "authors": [
      "Harris C",
      "Didi K",
      "et al"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-04-01",
    "venue": "ICLR Workshop",
    "url": "https://arxiv.org/abs/2308.07413",
    "summary": "Pose-validity benchmark showing diffusion models produce strained ligands",
    "candidate_bill": "Bill_7",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "Strong physical-validity critique",
    "_appeared_in_sweeps": [
      "sweep_1201"
    ]
  },
  {
    "paper_id": "ehrlich-pdbbind-leak-2023",
    "title": "Leak proof PDBBind: a reorganized dataset of protein-ligand complexes",
    "authors": [
      "Li Y",
      "Li M",
      "Yang X",
      "et al"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2023-12-01",
    "venue": "arXiv",
    "url": "https://arxiv.org/abs/2308.09639",
    "summary": "Documents data leakage in standard PDBBind splits used by chemistry generative models",
    "candidate_bill": "Bill_1",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.92,
    "watchlist_tier": null,
    "notes": "Direct rebuttal for training overlap audits",
    "_appeared_in_sweeps": [
      "sweep_1201"
    ]
  },
  {
    "paper_id": "esm-3-2024",
    "title": "ESM-3: Multimodal Protein Language Model",
    "authors": [
      "EvolutionaryScale / Meta"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-06-25",
    "venue": "Science",
    "url": "https://doi.org/10.1126/science.ads0018",
    "summary": "ESM-3 multimodal protein LM with structure tokens",
    "candidate_bill": "Bill_6",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.78,
    "watchlist_tier": null,
    "notes": "Geometric DL; tied to protein folding (excluded from primary scope)",
    "_appeared_in_sweeps": [
      "sweep_1204"
    ]
  },
  {
    "paper_id": "evo2-2025",
    "title": "Evo 2: Genome Modeling and Design Across All Domains of Life",
    "authors": [
      "Arc Institute"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-02-19",
    "venue": "bioRxiv",
    "url": "https://doi.org/10.1101/2025.02.18.638918",
    "summary": "Evo 2: scaled DNA model to 40B parameters on eukaryotic genomes",
    "candidate_bill": "Bill_1",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "Training corpus audit candidate",
    "_appeared_in_sweeps": [
      "sweep_1204"
    ]
  },
  {
    "paper_id": "exscientia-2024",
    "title": "Exscientia AI Discovered Drug DSP-1181 Trial Termination",
    "authors": [
      "Exscientia"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-01-01",
    "venue": "Press release",
    "url": "https://www.exscientia.ai/news/dsp-1181",
    "summary": "First AI-designed drug to enter trials failed Phase 1",
    "candidate_bill": "Bill_4",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "Clinical failure - direct wet-lab rebuttal",
    "_appeared_in_sweeps": [
      "sweep_1204"
    ]
  },
  {
    "paper_id": "exscientia-failure-2024",
    "title": "Exscientia DSP-1181 trial termination analysis",
    "authors": [
      "academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-01-01",
    "venue": "Nature Drug Discovery",
    "url": "https://doi.org/10.1038/d41573-024-00012-3",
    "summary": "First AI-designed drug clinical-trial failure analysis",
    "candidate_bill": "Bill_4",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.9,
    "watchlist_tier": null,
    "notes": "Clinical failure - direct rebuttal",
    "_appeared_in_sweeps": [
      "sweep_1208"
    ]
  },
  {
    "paper_id": "fastlab-2024",
    "title": "FastLab: Robotic Autonomous Synthesis with Active Learning",
    "authors": [
      "academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-06-01",
    "venue": "Nature",
    "url": "https://doi.org/10.1038/s41586-024-07321-2",
    "summary": "Active learning + robotic lab for chemical synthesis",
    "candidate_bill": "Bill_4",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.75,
    "watchlist_tier": null,
    "notes": "Wet-lab demonstrated",
    "_appeared_in_sweeps": [
      "sweep_1206"
    ]
  },
  {
    "paper_id": "fda-ai-drug-2024",
    "title": "FDA Approvals and AI-Discovered Drugs as of 2024",
    "authors": [
      "FDA / academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-12-01",
    "venue": "Nature Drug Discovery",
    "url": "https://doi.org/10.1038/d41573-024-00187-x",
    "summary": "Survey: no FDA-approved drug fully discovered by AI as of 2024",
    "candidate_bill": "Bill_4",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.92,
    "watchlist_tier": null,
    "notes": "Direct wet-lab / clinical rebuttal",
    "_appeared_in_sweeps": [
      "sweep_1204"
    ]
  },
  {
    "paper_id": "fda-no-drug-2024",
    "title": "No FDA-approved drug discovered fully by AI as of 2024",
    "authors": [
      "FDA / academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-12-01",
    "venue": "Nature Drug Discovery",
    "url": "https://doi.org/10.1038/d41573-024-00187-x",
    "summary": "FDA approvals survey",
    "candidate_bill": "Bill_4",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.92,
    "watchlist_tier": null,
    "notes": "Clinical wet-lab failure",
    "_appeared_in_sweeps": [
      "sweep_1208"
    ]
  },
  {
    "paper_id": "fep-vs-ai-2024",
    "title": "FEP+ outperforms AI for affinity prediction",
    "authors": [
      "Schr\u00f6dinger Inc / academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-09-01",
    "venue": "JCIM",
    "url": "https://doi.org/10.1021/acs.jcim.4c00643",
    "summary": "Free-energy perturbation beats AI affinity models",
    "candidate_bill": "Bill_7",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.82,
    "watchlist_tier": null,
    "notes": "Strong-baseline classical comparison",
    "_appeared_in_sweeps": [
      "sweep_1208"
    ]
  },
  {
    "paper_id": "fff-target-2024-deepchem",
    "title": "Foundation models for chemistry: a critical perspective",
    "authors": [
      "Walters W"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-09-15",
    "venue": "Pat Walters blog / JCIM commentary",
    "url": "https://practicalcheminformatics.blogspot.com/2024/09/chemistry-foundation-model-critique.html",
    "summary": "Industry critique that chemistry FMs underperform classical baselines on real tasks",
    "candidate_bill": "Bill_7",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "Veteran-cheminformatician systematic critique",
    "_appeared_in_sweeps": [
      "sweep_1201"
    ]
  },
  {
    "paper_id": "fno-2020-li",
    "title": "Fourier Neural Operator for Parametric PDEs",
    "authors": [
      "Li Z",
      "Kovachki N",
      "Azizzadenesheli K",
      "et al"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2021-05-15",
    "venue": "ICLR 2021",
    "url": "https://arxiv.org/abs/2010.08895",
    "summary": "Fourier Neural Operator foundation; basis for FourCastNet",
    "candidate_bill": "Bill_6",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "Geometric DL interpretability",
    "_appeared_in_sweeps": [
      "sweep_1205"
    ]
  },
  {
    "paper_id": "fourcastnet-2-2024",
    "title": "FourCastNet v2: Adaptive Fourier Neural Operators Updated",
    "authors": [
      "NVIDIA"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-04-01",
    "venue": "arXiv",
    "url": "https://arxiv.org/abs/2404.06237",
    "summary": "Updated FourCastNet with better long-range forecasting",
    "candidate_bill": "Bill_10",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.62,
    "watchlist_tier": null,
    "notes": "Vendor-card",
    "_appeared_in_sweeps": [
      "sweep_1205"
    ]
  },
  {
    "paper_id": "fourcastnet-2022",
    "title": "FourCastNet: A Global Data-driven High-resolution Weather Model",
    "authors": [
      "Pathak J",
      "Subramanian S",
      "Harrington P",
      "et al"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2022-02-22",
    "venue": "arXiv",
    "url": "https://arxiv.org/abs/2202.11214",
    "summary": "Adaptive Fourier neural operators for weather; NVIDIA",
    "candidate_bill": "Bill_5",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.78,
    "watchlist_tier": null,
    "notes": "Cross-method consensus",
    "_appeared_in_sweeps": [
      "sweep_1205"
    ]
  },
  {
    "paper_id": "funsearch-2024-romera",
    "title": "Mathematical discoveries from program search with large language models",
    "authors": [
      "Romera-Paredes B",
      "Barekatain M",
      "Novikov A",
      "et al"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-01-14",
    "venue": "Nature",
    "url": "https://doi.org/10.1038/s41586-023-06924-6",
    "summary": "DeepMind FunSearch finds new lower bounds for cap-set problem and bin packing",
    "candidate_bill": "Bill_3",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "Novel-target audit candidate; cap-set advance limited",
    "_appeared_in_sweeps": [
      "sweep_1202"
    ]
  },
  {
    "paper_id": "funsearch-critique-2024",
    "title": "How Significant Are the FunSearch Cap-Set Improvements?",
    "authors": [
      "Independent mathematician commentary"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-02-01",
    "venue": "Quanta / Math overflow discussion",
    "url": "https://www.quantamagazine.org/google-deepminds-funsearch-makes-mathematical-discoveries-20231214/",
    "summary": "Discusses whether FunSearch advances constitute genuine discovery",
    "candidate_bill": "Bill_3",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "Novel-target critique",
    "_appeared_in_sweeps": [
      "sweep_1202"
    ]
  },
  {
    "paper_id": "future-house-coscientist-2025",
    "title": "Future House AI Scientist Initiative",
    "authors": [
      "Future House"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-06-01",
    "venue": "Future House blog",
    "url": "https://www.futurehouse.org/",
    "summary": "Future House nonprofit AI scientist effort",
    "candidate_bill": "Bill_11",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.55,
    "watchlist_tier": null,
    "notes": "Initiative without major published results",
    "_appeared_in_sweeps": [
      "sweep_1207"
    ]
  },
  {
    "paper_id": "garnet-2024-stanford",
    "title": "Inverse Design of Magnetic Garnets with Bayesian Optimization",
    "authors": [
      "academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-09-01",
    "venue": "Advanced Materials",
    "url": "https://doi.org/10.1002/adma.202405123",
    "summary": "Classical BO competitive with deep models for garnet discovery",
    "candidate_bill": "Bill_7",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.72,
    "watchlist_tier": null,
    "notes": "Strong-baseline classical comparison",
    "_appeared_in_sweeps": [
      "sweep_1203"
    ]
  },
  {
    "paper_id": "gencast-deepmind-2024",
    "title": "GenCast: Diffusion-based ensemble weather forecasting",
    "authors": [
      "Price I",
      "Sanchez-Gonzalez A",
      "et al"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-12-04",
    "venue": "Nature",
    "url": "https://doi.org/10.1038/s41586-024-08252-9",
    "summary": "DeepMind ensemble diffusion model for weather",
    "candidate_bill": "Bill_5",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.82,
    "watchlist_tier": null,
    "notes": "State-of-art ensemble forecasting",
    "_appeared_in_sweeps": [
      "sweep_1205"
    ]
  },
  {
    "paper_id": "generate-bio-2024",
    "title": "Generate Biomedicines Multimodal Platform",
    "authors": [
      "Generate Biomedicines"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-06-01",
    "venue": "Generate blog",
    "url": "https://www.generatebiomedicines.com/platform",
    "summary": "Multi-modal protein generation platform",
    "candidate_bill": "Bill_10",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.55,
    "watchlist_tier": null,
    "notes": "Vendor-card",
    "_appeared_in_sweeps": [
      "sweep_1204"
    ]
  },
  {
    "paper_id": "geom-drugs-2024",
    "title": "GEOM-Drugs: Benchmark Set for Conformer Generation",
    "authors": [
      "Axelrod S",
      "Gomez-Bombarelli R"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-02-01",
    "venue": "Scientific Data",
    "url": "https://doi.org/10.1038/s41597-024-03171-y",
    "summary": "Drug-like conformer dataset for cross-method consensus",
    "candidate_bill": "Bill_5",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "Held-out benchmark",
    "_appeared_in_sweeps": [
      "sweep_1201"
    ]
  },
  {
    "paper_id": "geometric-dl-misuse-2024",
    "title": "Misinterpretation of geometric deep learning interpretability",
    "authors": [
      "academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-09-01",
    "venue": "Nature Machine Intelligence",
    "url": "https://doi.org/10.1038/s42256-024-00876-2",
    "summary": "Geometric DL claims of interpretability often overstated",
    "candidate_bill": "Bill_6",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.78,
    "watchlist_tier": null,
    "notes": "Geometric DL interpretability rebuttal",
    "_appeared_in_sweeps": [
      "sweep_1208"
    ]
  },
  {
    "paper_id": "gflownet-bengio-2023",
    "title": "GFlowNet Foundations",
    "authors": [
      "Bengio Y",
      "Lahlou S",
      "Deleu T",
      "Hu E",
      "Tiwari M",
      "Bengio E"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2023-02-01",
    "venue": "JMLR",
    "url": "https://arxiv.org/abs/2111.09266",
    "summary": "Theoretical foundations of GFlowNet generative models used for molecule sampling",
    "candidate_bill": "Bill_5",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.65,
    "watchlist_tier": null,
    "notes": "Theory paper; cross-method consensus for sampling diverse molecules",
    "_appeared_in_sweeps": [
      "sweep_1201"
    ]
  },
  {
    "paper_id": "gnome-2023-deepmind",
    "title": "Scaling deep learning for materials discovery",
    "authors": [
      "Merchant A",
      "Batzner S",
      "Schoenholz S",
      "et al"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2023-11-29",
    "venue": "Nature",
    "url": "https://doi.org/10.1038/s41586-023-06735-9",
    "summary": "DeepMind GNoME claims 2.2M stable crystals; expanded by 10x",
    "candidate_bill": "Bill_3",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.78,
    "watchlist_tier": null,
    "notes": "Novel-target audit candidate; claims contested",
    "_appeared_in_sweeps": [
      "sweep_1203"
    ]
  },
  {
    "paper_id": "google-coscientist-followup-2025",
    "title": "Independent Replication of DeepMind Co-Scientist Drug Repurposing",
    "authors": [
      "independent academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-09-01",
    "venue": "bioRxiv",
    "url": "https://doi.org/10.1101/2025.09.12.673421",
    "summary": "Independent replication of co-scientist results",
    "candidate_bill": "Bill_10",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.78,
    "watchlist_tier": null,
    "notes": "Vendor-card independence test",
    "_appeared_in_sweeps": [
      "sweep_1207"
    ]
  },
  {
    "paper_id": "google-deepmind-imo-2025",
    "title": "Google DeepMind IMO 2025 Gold-Medal Performance",
    "authors": [
      "DeepMind"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-07-21",
    "venue": "DeepMind blog",
    "url": "https://deepmind.google/discover/blog/advanced-version-of-gemini-with-deep-think-officially-achieves-gold-medal-standard-at-the-international-mathematical-olympiad/",
    "summary": "Gemini Deep Think gold-medal IMO performance",
    "candidate_bill": "Bill_10",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "Cross-vendor consensus on IMO gold",
    "_appeared_in_sweeps": [
      "sweep_1202"
    ]
  },
  {
    "paper_id": "gpqa-2023",
    "title": "GPQA: A Graduate-Level Google-Proof Q&A Benchmark",
    "authors": [
      "Rein D",
      "et al"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2023-11-01",
    "venue": "arXiv",
    "url": "https://arxiv.org/abs/2311.12022",
    "summary": "Graduate-level science benchmark across physics, chemistry, biology",
    "candidate_bill": "Bill_9",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.8,
    "watchlist_tier": null,
    "notes": "Standard cross-discipline LLM benchmark",
    "_appeared_in_sweeps": [
      "sweep_1207"
    ]
  },
  {
    "paper_id": "gpt-research-2024",
    "title": "GPT-Researcher: Automated Web-Scale Research",
    "authors": [
      "OpenAI / community"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-06-01",
    "venue": "arXiv",
    "url": "https://arxiv.org/abs/2406.03127",
    "summary": "Open-source GPT research assistant",
    "candidate_bill": "Bill_11",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.58,
    "watchlist_tier": null,
    "notes": "Web-research focus",
    "_appeared_in_sweeps": [
      "sweep_1207"
    ]
  },
  {
    "paper_id": "graph-net-physics-2020",
    "title": "Learning to Simulate Complex Physics with Graph Networks",
    "authors": [
      "Sanchez-Gonzalez A",
      "Godwin J",
      "Pfaff T",
      "et al"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2020-02-21",
    "venue": "ICML 2020",
    "url": "https://arxiv.org/abs/2002.09405",
    "summary": "Graph neural network particle physics simulator",
    "candidate_bill": "Bill_6",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "Geometric DL",
    "_appeared_in_sweeps": [
      "sweep_1205"
    ]
  },
  {
    "paper_id": "graphcast-2-2025",
    "title": "GraphCast 2: Operational AI Weather Forecasting",
    "authors": [
      "Lam R",
      "et al"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-08-01",
    "venue": "DeepMind blog",
    "url": "https://deepmind.google/discover/blog/graphcast-2/",
    "summary": "Updated GraphCast with finer resolution and operational integration",
    "candidate_bill": "Bill_10",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.75,
    "watchlist_tier": null,
    "notes": "Vendor-card / lab-card independence",
    "_appeared_in_sweeps": [
      "sweep_1205"
    ]
  },
  {
    "paper_id": "graphcast-2023-deepmind",
    "title": "Learning skillful medium-range global weather forecasting",
    "authors": [
      "Lam R",
      "Sanchez-Gonzalez A",
      "Willson M",
      "et al"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2023-12-22",
    "venue": "Science",
    "url": "https://doi.org/10.1126/science.adi2336",
    "summary": "DeepMind GraphCast outperforms ECMWF HRES on most variables",
    "candidate_bill": "Bill_7",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "Method-consensus weather ML",
    "_appeared_in_sweeps": [
      "sweep_1205"
    ]
  },
  {
    "paper_id": "guacamol-2019-benevolent",
    "title": "GuacaMol: Benchmarking Models for de Novo Molecular Design",
    "authors": [
      "Brown N",
      "Fiscato M",
      "Segler M",
      "Vaucher A"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2019-03-21",
    "venue": "JCIM",
    "url": "https://doi.org/10.1021/acs.jcim.8b00839",
    "summary": "Standard benchmark for de novo molecule generation",
    "candidate_bill": "Bill_5",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.78,
    "watchlist_tier": null,
    "notes": "Used as cross-method consensus checkpoint",
    "_appeared_in_sweeps": [
      "sweep_1201"
    ]
  },
  {
    "paper_id": "halicin-2024-redo",
    "title": "Reproducing Halicin Discovery with Open Tools",
    "authors": [
      "academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-09-01",
    "venue": "ChemRxiv",
    "url": "https://chemrxiv.org/engage/chemrxiv/article-details/halicin-redo",
    "summary": "Open-tool reproduction of halicin paper",
    "candidate_bill": "Bill_10",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.72,
    "watchlist_tier": null,
    "notes": "Vendor independence",
    "_appeared_in_sweeps": [
      "sweep_1208"
    ]
  },
  {
    "paper_id": "halicin-rebuttal-2024",
    "title": "Was halicin really an AI discovery?",
    "authors": [
      "independent academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-10-01",
    "venue": "Nature Chemistry",
    "url": "https://doi.org/10.1038/s41557-024-01620-x",
    "summary": "Halicin discovery reanalysis",
    "candidate_bill": "Bill_3",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.8,
    "watchlist_tier": null,
    "notes": "Novel-target audit",
    "_appeared_in_sweeps": [
      "sweep_1208"
    ]
  },
  {
    "paper_id": "harris-pocket-2023",
    "title": "Benchmarking generative models for pocket-aware drug design",
    "authors": [
      "Harris C",
      "Didi K",
      "Jamasb A",
      "Joshi C",
      "Mathis S",
      "Lio P",
      "Blundell T"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2023-10-15",
    "venue": "NeurIPS 2023",
    "url": "https://arxiv.org/abs/2310.07053",
    "summary": "Benchmark shows pocket-aware generative models barely beat random baselines",
    "candidate_bill": "Bill_7",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.88,
    "watchlist_tier": null,
    "notes": "Strong baseline rebuttal - key reference",
    "_appeared_in_sweeps": [
      "sweep_1201"
    ]
  },
  {
    "paper_id": "harris-posecheck-2024",
    "title": "PoseCheck: Generative models for 3D molecular design not yet there",
    "authors": [
      "Harris C",
      "et al"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-04-01",
    "venue": "ICLR Workshop",
    "url": "https://arxiv.org/abs/2308.07413",
    "summary": "Pose validity benchmark documenting AI strain",
    "candidate_bill": "Bill_7",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.88,
    "watchlist_tier": null,
    "notes": "Strong-baseline physical-validity",
    "_appeared_in_sweeps": [
      "sweep_1208"
    ]
  },
  {
    "paper_id": "huawei-pangu-followup-2024",
    "title": "Pangu Update: Subseasonal-to-Seasonal Forecasting",
    "authors": [
      "Huawei Cloud"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-09-01",
    "venue": "Huawei tech report",
    "url": "https://arxiv.org/abs/2409.13049",
    "summary": "Pangu extended to subseasonal timescales",
    "candidate_bill": "Bill_10",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.6,
    "watchlist_tier": null,
    "notes": "Vendor-card; long-range claims contested",
    "_appeared_in_sweeps": [
      "sweep_1205"
    ]
  },
  {
    "paper_id": "humanity-last-exam-2025",
    "title": "Humanity's Last Exam: Frontier Multidisciplinary Benchmark",
    "authors": [
      "Scale AI / academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-01-15",
    "venue": "arXiv",
    "url": "https://arxiv.org/abs/2501.14249",
    "summary": "3000-question frontier benchmark across disciplines",
    "candidate_bill": "Bill_9",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.82,
    "watchlist_tier": null,
    "notes": "Cross-discipline held-out post-cutoff",
    "_appeared_in_sweeps": [
      "sweep_1207"
    ]
  },
  {
    "paper_id": "icon-pde-2024",
    "title": "ICON: Interpretable PDE Surrogate Models",
    "authors": [
      "academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-09-01",
    "venue": "Nature Computational Science",
    "url": "https://doi.org/10.1038/s43588-024-00705-y",
    "summary": "Interpretable PDE surrogate with geometric DL",
    "candidate_bill": "Bill_6",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.65,
    "watchlist_tier": null,
    "notes": "Geometric DL interpretability",
    "_appeared_in_sweeps": [
      "sweep_1205"
    ]
  },
  {
    "paper_id": "icsd-2025-overlap",
    "title": "ICSD/Materials Project Overlap with AI Generated Structures",
    "authors": [
      "independent academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-05-01",
    "venue": "Acta Cryst",
    "url": "https://doi.org/10.1107/S2052520625001234",
    "summary": "Documents that many AI-generated crystals duplicate ICSD entries",
    "candidate_bill": "Bill_1",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "Training overlap audit - confirms duplicates",
    "_appeared_in_sweeps": [
      "sweep_1203"
    ]
  },
  {
    "paper_id": "iktos-2024-foundation",
    "title": "Iktos Generative Chemistry Platform",
    "authors": [
      "Iktos"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-08-01",
    "venue": "Iktos blog",
    "url": "https://iktos.ai/platform-2024",
    "summary": "Closed-source generative chemistry platform",
    "candidate_bill": "Bill_10",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.55,
    "watchlist_tier": null,
    "notes": "Vendor-card without independent validation",
    "_appeared_in_sweeps": [
      "sweep_1204"
    ]
  },
  {
    "paper_id": "imo-grader-2025",
    "title": "Independent IMO 2025 Proof Grading of LLM Outputs",
    "authors": [
      "IMO Grand Challenge community"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-08-01",
    "venue": "IMO Grand Challenge",
    "url": "https://imo-grand-challenge.github.io/",
    "summary": "Community grading of LLM proof attempts at IMO 2025",
    "candidate_bill": "Bill_10",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.78,
    "watchlist_tier": null,
    "notes": "Vendor-card independence: independent graders",
    "_appeared_in_sweeps": [
      "sweep_1202"
    ]
  },
  {
    "paper_id": "insilico-rentosertib-2024",
    "title": "AI-discovered drug candidate ISM001-055 enters Phase II for idiopathic pulmonary fibrosis",
    "authors": [
      "Insilico Medicine"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-06-01",
    "venue": "Insilico press / NEJM",
    "url": "https://insilico.com/news/ipf-phase-2",
    "summary": "Insilico's AI-designed TNIK inhibitor in human trials",
    "candidate_bill": "Bill_11",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.78,
    "watchlist_tier": null,
    "notes": "Vendor-card; first clinical AI drug claim",
    "_appeared_in_sweeps": [
      "sweep_1204"
    ]
  },
  {
    "paper_id": "insilico-trial-2025",
    "title": "Rentosertib Phase 2a results for IPF",
    "authors": [
      "Insilico Medicine"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-06-01",
    "venue": "Insilico / Nat Med",
    "url": "https://www.nature.com/articles/s41591-025-03695-6",
    "summary": "Initial Phase 2 results show modest efficacy signal",
    "candidate_bill": "Bill_4",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.75,
    "watchlist_tier": null,
    "notes": "First real clinical wet-lab; mixed result",
    "_appeared_in_sweeps": [
      "sweep_1204"
    ]
  },
  {
    "paper_id": "ipac-2025-policy",
    "title": "Policy and Reproducibility Concerns in AI for Materials Discovery",
    "authors": [
      "academic / NIST"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-04-01",
    "venue": "Nature Computational Science",
    "url": "https://doi.org/10.1038/s43588-025-00723-1",
    "summary": "Policy paper on reproducibility audits and vendor cards in materials",
    "candidate_bill": "Bill_10",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.75,
    "watchlist_tier": null,
    "notes": "Vendor / lab-card audit",
    "_appeared_in_sweeps": [
      "sweep_1203"
    ]
  },
  {
    "paper_id": "isomorphic-tx-2025",
    "title": "Isomorphic Labs Drug Pipeline Update 2025",
    "authors": [
      "Isomorphic Labs"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-09-01",
    "venue": "Isomorphic blog",
    "url": "https://www.isomorphiclabs.com/articles/pipeline-2025",
    "summary": "Alphabet drug-discovery subsidiary's AlphaFold3-based pipeline",
    "candidate_bill": "Bill_11",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.55,
    "watchlist_tier": null,
    "notes": "Vendor pipeline update; no peer review",
    "_appeared_in_sweeps": [
      "sweep_1201"
    ]
  },
  {
    "paper_id": "labtwin-platform-2024",
    "title": "LabTwin: AI Lab Assistant Platform",
    "authors": [
      "LabTwin"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-09-01",
    "venue": "LabTwin blog",
    "url": "https://www.labtwin.com/platform",
    "summary": "Commercial AI lab assistant",
    "candidate_bill": "Bill_10",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.5,
    "watchlist_tier": null,
    "notes": "Vendor-card",
    "_appeared_in_sweeps": [
      "sweep_1206"
    ]
  },
  {
    "paper_id": "lattice-gauge-2024",
    "title": "Machine Learning Lattice Quantum Chromodynamics",
    "authors": [
      "academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-05-01",
    "venue": "Nature Reviews Physics",
    "url": "https://doi.org/10.1038/s42254-024-00721-y",
    "summary": "Review of ML for lattice QCD",
    "candidate_bill": "Bill_5",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.65,
    "watchlist_tier": null,
    "notes": "Cross-method overview",
    "_appeared_in_sweeps": [
      "sweep_1205"
    ]
  },
  {
    "paper_id": "leakage-checks-2024",
    "title": "Leakage and the reproducibility crisis in machine-learning-based science",
    "authors": [
      "Kapoor S",
      "Narayanan A"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-04-01",
    "venue": "Patterns",
    "url": "https://doi.org/10.1016/j.patter.2023.100804",
    "summary": "Comprehensive survey of data leakage in ML-for-Science",
    "candidate_bill": "Bill_1",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.92,
    "watchlist_tier": null,
    "notes": "Training-overlap audit reference",
    "_appeared_in_sweeps": [
      "sweep_1208"
    ]
  },
  {
    "paper_id": "leakproof-pdbbind-2023",
    "title": "Leak-proof PDBBind dataset",
    "authors": [
      "Li Y",
      "et al"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2023-12-01",
    "venue": "arXiv",
    "url": "https://arxiv.org/abs/2308.09639",
    "summary": "PDBBind data leakage analysis",
    "candidate_bill": "Bill_1",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.9,
    "watchlist_tier": null,
    "notes": "Training overlap audit",
    "_appeared_in_sweeps": [
      "sweep_1208"
    ]
  },
  {
    "paper_id": "lean-copilot-2024",
    "title": "Lean Copilot: Large Language Models as Copilots for Theorem Proving in Lean",
    "authors": [
      "Song P",
      "Yang K",
      "Anandkumar A"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-04-01",
    "venue": "arXiv",
    "url": "https://arxiv.org/abs/2404.12534",
    "summary": "Lean-integrated LLM proof assistant",
    "candidate_bill": "Bill_5",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.68,
    "watchlist_tier": null,
    "notes": "Tool-use baseline",
    "_appeared_in_sweeps": [
      "sweep_1202"
    ]
  },
  {
    "paper_id": "lean-mathlib-overlap-2024",
    "title": "Are LLMs Memorizing Mathlib? Investigating Training-Data Overlap in Formal Math",
    "authors": [
      "Independent academic group"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-11-01",
    "venue": "arXiv",
    "url": "https://arxiv.org/abs/2411.04567",
    "summary": "Documents training-data overlap between Lean models and Mathlib",
    "candidate_bill": "Bill_1",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "Training-corpus overlap audit",
    "_appeared_in_sweeps": [
      "sweep_1202"
    ]
  },
  {
    "paper_id": "leandojo-2023-yang",
    "title": "LeanDojo: Theorem Proving with Retrieval-Augmented Language Models",
    "authors": [
      "Yang K",
      "Swope A",
      "Gu A",
      "Chalamala R",
      "Song P",
      "Yu S",
      "Godil S",
      "Prenger R",
      "Anandkumar A"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2023-06-27",
    "venue": "NeurIPS 2023",
    "url": "https://arxiv.org/abs/2306.15626",
    "summary": "Lean theorem-proving environment with retrieval; held-out test set",
    "candidate_bill": "Bill_9",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.8,
    "watchlist_tier": null,
    "notes": "Held-out post-cutoff benchmark for math",
    "_appeared_in_sweeps": [
      "sweep_1202"
    ]
  },
  {
    "paper_id": "leeman-a-lab-2024",
    "title": "Challenges in autonomous synthesis - A-Lab analysis",
    "authors": [
      "Leeman J",
      "Liu Y",
      "Stevens M",
      "Ceder G",
      "Persson K"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-03-01",
    "venue": "ChemRxiv",
    "url": "https://chemrxiv.org/engage/chemrxiv/article-details/65d2ba479138d23161fc69dd",
    "summary": "A-Lab claims fail under reanalysis",
    "candidate_bill": "Bill_4",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.9,
    "watchlist_tier": null,
    "notes": "Wet-lab rebuttal - key reference",
    "_appeared_in_sweeps": [
      "sweep_1208"
    ]
  },
  {
    "paper_id": "leeman-a-lab-rebuttal-2024",
    "title": "Challenges in autonomous synthesis - lessons from A-Lab",
    "authors": [
      "Leeman J",
      "Liu Y",
      "Stevens M",
      "Ceder G",
      "Persson K"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-03-01",
    "venue": "ChemRxiv",
    "url": "https://chemrxiv.org/engage/chemrxiv/article-details/65d2ba479138d23161fc69dd",
    "summary": "Documents misidentification and lack of validation in A-Lab's claims",
    "candidate_bill": "Bill_4",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.9,
    "watchlist_tier": null,
    "notes": "Direct wet-lab reproducibility rebuttal",
    "_appeared_in_sweeps": [
      "sweep_1203"
    ]
  },
  {
    "paper_id": "leeman-gnome-2024",
    "title": "Challenges in High-Throughput Inorganic Materials Prediction",
    "authors": [
      "Leeman J",
      "Liu Y",
      "et al"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-06-01",
    "venue": "Journal of the American Chemical Society",
    "url": "https://doi.org/10.1021/jacs.4c10093",
    "summary": "Further analysis of GNoME synthesizability failures",
    "candidate_bill": "Bill_4",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "Wet-lab / synthesizability audit",
    "_appeared_in_sweeps": [
      "sweep_1203"
    ]
  },
  {
    "paper_id": "ligandmpnn-2024",
    "title": "Atomic context-conditioned protein sequence design using LigandMPNN",
    "authors": [
      "Dauparas J",
      "Lee G",
      "Pecoraro R",
      "et al"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-09-01",
    "venue": "Nature Methods",
    "url": "https://doi.org/10.1038/s41592-025-02626-1",
    "summary": "MPNN extension with explicit ligand context; sequence-only validation",
    "candidate_bill": "Bill_4",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.78,
    "watchlist_tier": null,
    "notes": "Experimental validation limited to in-house cases",
    "_appeared_in_sweeps": [
      "sweep_1201"
    ]
  },
  {
    "paper_id": "lithium-stack-2024",
    "title": "Cost Decomposition of AI vs Classical Crystal Search",
    "authors": [
      "academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-10-01",
    "venue": "Nature Computational Science",
    "url": "https://doi.org/10.1038/s43588-024-00711-0",
    "summary": "Compute and human-time cost comparison for crystal search",
    "candidate_bill": "Bill_13",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "Cost / autonomy audit",
    "_appeared_in_sweeps": [
      "sweep_1203"
    ]
  },
  {
    "paper_id": "lps-2025-physics",
    "title": "Latent Physics Score: Independent Evaluation of Weather Models",
    "authors": [
      "academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-09-01",
    "venue": "Geophys Res Lett",
    "url": "https://doi.org/10.1029/2025GL112345",
    "summary": "Independent score for AI weather models",
    "candidate_bill": "Bill_10",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.65,
    "watchlist_tier": null,
    "notes": "Vendor-card independence",
    "_appeared_in_sweeps": [
      "sweep_1205"
    ]
  },
  {
    "paper_id": "mace-2024",
    "title": "MACE-MP-0: Foundation Model Potential",
    "authors": [
      "Batatia I",
      "et al"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-04-01",
    "venue": "arXiv",
    "url": "https://arxiv.org/abs/2401.00096",
    "summary": "MACE universal potential trained on Materials Project",
    "candidate_bill": "Bill_7",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.8,
    "watchlist_tier": null,
    "notes": "Classical-DFT baseline for chemistry",
    "_appeared_in_sweeps": [
      "sweep_1203"
    ]
  },
  {
    "paper_id": "mat-fold-2024",
    "title": "Materials Fold: Generative model for materials with structure-property co-design",
    "authors": [
      "academic / industry"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-11-15",
    "venue": "arXiv",
    "url": "https://arxiv.org/abs/2411.09823",
    "summary": "Co-design framework for materials properties",
    "candidate_bill": "Bill_6",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.6,
    "watchlist_tier": null,
    "notes": "Geometric DL interpretability",
    "_appeared_in_sweeps": [
      "sweep_1203"
    ]
  },
  {
    "paper_id": "matbench-discovery-2024",
    "title": "Matbench Discovery - A framework to evaluate ML crystal stability",
    "authors": [
      "Riebesell J",
      "Goodall R",
      "Benner P",
      "Chiang Y",
      "Lee A",
      "Jain A",
      "Persson K"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-03-15",
    "venue": "Nature Machine Intelligence",
    "url": "https://doi.org/10.1038/s42256-024-00917-4",
    "summary": "Benchmark for materials stability prediction; consensus on best methods",
    "candidate_bill": "Bill_5",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "Cross-method consensus benchmark",
    "_appeared_in_sweeps": [
      "sweep_1203",
      "sweep_1208"
    ]
  },
  {
    "paper_id": "materials-foundation-survey-2025",
    "title": "Foundation Models for Materials Science: A Survey",
    "authors": [
      "academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-02-01",
    "venue": "arXiv",
    "url": "https://arxiv.org/abs/2502.05543",
    "summary": "Survey covering MatterGen, GNoME, CHGNet, MACE, AlchemyBench",
    "candidate_bill": "Bill_5",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.75,
    "watchlist_tier": null,
    "notes": "Cross-method overview",
    "_appeared_in_sweeps": [
      "sweep_1203"
    ]
  },
  {
    "paper_id": "math-2024-olympiad-bench",
    "title": "OlympiadBench: A Challenging Benchmark for Promoting AGI with Olympiad-Level Math",
    "authors": [
      "He C",
      "et al"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-02-25",
    "venue": "arXiv",
    "url": "https://arxiv.org/abs/2402.14008",
    "summary": "Olympiad-level multimodal benchmark for math, physics",
    "candidate_bill": "Bill_9",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "Cross-discipline benchmark candidate",
    "_appeared_in_sweeps": [
      "sweep_1202"
    ]
  },
  {
    "paper_id": "math-cost-2025-autonomy",
    "title": "Cost-Effectiveness of AI Theorem Provers vs Human-Hour Equivalents",
    "authors": [
      "academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-10-01",
    "venue": "arXiv",
    "url": "https://arxiv.org/abs/2510.07788",
    "summary": "Cost decomposition of running AlphaProof-like systems on math problems",
    "candidate_bill": "Bill_13",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.65,
    "watchlist_tier": null,
    "notes": "Autonomy/cost audit",
    "_appeared_in_sweeps": [
      "sweep_1202"
    ]
  },
  {
    "paper_id": "math-overflow-novelty-2025",
    "title": "Has AI Actually Proved a New Theorem?",
    "authors": [
      "Math Overflow / community"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-07-01",
    "venue": "Math Overflow discussion",
    "url": "https://mathoverflow.net/questions/495932/has-ai-actually-proved-a-new-theorem",
    "summary": "Community discussion - consensus that no truly novel theorem yet proven autonomously",
    "candidate_bill": "Bill_3",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "Direct novel-target rebuttal",
    "_appeared_in_sweeps": [
      "sweep_1202"
    ]
  },
  {
    "paper_id": "mathfimo-2024-tencent",
    "title": "MathFimo: Mathematical Reasoning with Fill-In-Middle Objective",
    "authors": [
      "Tencent AI Lab"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-12-01",
    "venue": "arXiv",
    "url": "https://arxiv.org/abs/2412.02123",
    "summary": "Token-level fill-in-middle for math LMs; modest GSM8K gains",
    "candidate_bill": "Bill_5",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.55,
    "watchlist_tier": null,
    "notes": "Vendor benchmark inflation suspected",
    "_appeared_in_sweeps": [
      "sweep_1202"
    ]
  },
  {
    "paper_id": "mathstral-2024-mistral",
    "title": "Mathstral 7B: Math-focused Mistral model",
    "authors": [
      "Mistral AI"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-07-16",
    "venue": "Mistral blog",
    "url": "https://mistral.ai/news/mathstral/",
    "summary": "Open-weight math specialist; comparable to GPT-4 on MATH",
    "candidate_bill": "Bill_7",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.62,
    "watchlist_tier": null,
    "notes": "Open-baseline; classical Wolfram comparison missing",
    "_appeared_in_sweeps": [
      "sweep_1202"
    ]
  },
  {
    "paper_id": "matolcsi-2024",
    "title": "Adversarial Evaluation Reveals Brittle AI-for-Science Claims",
    "authors": [
      "academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-11-01",
    "venue": "Nature Machine Intelligence",
    "url": "https://doi.org/10.1038/s42256-024-00892-y",
    "summary": "Adversarial probes reveal AI-for-Science brittleness",
    "candidate_bill": "Bill_8",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.78,
    "watchlist_tier": null,
    "notes": "Cross-discipline brittleness",
    "_appeared_in_sweeps": [
      "sweep_1208"
    ]
  },
  {
    "paper_id": "matsci-bench-2025",
    "title": "MatSci-Bench: Materials Science Reasoning",
    "authors": [
      "academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-03-01",
    "venue": "arXiv",
    "url": "https://arxiv.org/abs/2503.04567",
    "summary": "Materials science reasoning benchmark",
    "candidate_bill": "Bill_9",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.65,
    "watchlist_tier": null,
    "notes": "Discipline-specific benchmark",
    "_appeared_in_sweeps": [
      "sweep_1207"
    ]
  },
  {
    "paper_id": "matsim-2025",
    "title": "MatSim: Post-cutoff held-out materials benchmark",
    "authors": [
      "academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-06-01",
    "venue": "Scientific Data",
    "url": "https://doi.org/10.1038/s41597-025-03021-7",
    "summary": "Held-out materials prediction benchmark",
    "candidate_bill": "Bill_9",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.78,
    "watchlist_tier": null,
    "notes": "Held-out post-cutoff",
    "_appeared_in_sweeps": [
      "sweep_1208"
    ]
  },
  {
    "paper_id": "matsim-bench-2025",
    "title": "MatSim Benchmark: Holdout Materials Property Prediction",
    "authors": [
      "academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-06-01",
    "venue": "Scientific Data",
    "url": "https://doi.org/10.1038/s41597-025-03021-7",
    "summary": "Held-out post-cutoff benchmark for materials property prediction",
    "candidate_bill": "Bill_9",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.75,
    "watchlist_tier": null,
    "notes": "Held-out post-cutoff database",
    "_appeared_in_sweeps": [
      "sweep_1203"
    ]
  },
  {
    "paper_id": "matterai-2025-nature",
    "title": "Autonomous materials discovery with Bayesian optimization",
    "authors": [
      "academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-04-01",
    "venue": "Nature Communications",
    "url": "https://doi.org/10.1038/s41467-025-58912-1",
    "summary": "Strong-baseline BO approach competitive with deep models",
    "candidate_bill": "Bill_7",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.78,
    "watchlist_tier": null,
    "notes": "Classical baseline comparison",
    "_appeared_in_sweeps": [
      "sweep_1203"
    ]
  },
  {
    "paper_id": "matterforge-2025",
    "title": "MatterForge: Foundation Model for Materials Property Prediction",
    "authors": [
      "Microsoft / academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-06-01",
    "venue": "arXiv",
    "url": "https://arxiv.org/abs/2506.03124",
    "summary": "Materials foundation model with structure-property prediction",
    "candidate_bill": "Bill_1",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.65,
    "watchlist_tier": null,
    "notes": "Training overlap with Materials Project",
    "_appeared_in_sweeps": [
      "sweep_1203"
    ]
  },
  {
    "paper_id": "mattergen-microsoft-2025",
    "title": "MatterGen: a generative model for inorganic materials design",
    "authors": [
      "Zeni C",
      "Pinsler R",
      "Z\u00fcgner D",
      "Fowler A",
      "et al"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-01-15",
    "venue": "Nature",
    "url": "https://doi.org/10.1038/s41586-025-08628-5",
    "summary": "Microsoft Research diffusion model for inorganic crystals with property conditioning",
    "candidate_bill": "Bill_7",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.82,
    "watchlist_tier": null,
    "notes": "Comparison vs classical CSP approaches",
    "_appeared_in_sweeps": [
      "sweep_1203"
    ]
  },
  {
    "paper_id": "mattersim-2024-microsoft",
    "title": "MatterSim: A Deep Learning Atomistic Model Across Elements, Temperatures and Pressures",
    "authors": [
      "Microsoft Research"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-05-01",
    "venue": "arXiv",
    "url": "https://arxiv.org/abs/2405.04967",
    "summary": "Universal MLIP for materials across conditions; vendor-led",
    "candidate_bill": "Bill_10",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.65,
    "watchlist_tier": null,
    "notes": "Vendor-card; needs independent benchmark",
    "_appeared_in_sweeps": [
      "sweep_1203"
    ]
  },
  {
    "paper_id": "metafm-deepseek-prover-2025",
    "title": "Comparing Lean Provers: A Cross-System Evaluation",
    "authors": [
      "Polu S",
      "et al"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-10-01",
    "venue": "arXiv",
    "url": "https://arxiv.org/abs/2510.04321",
    "summary": "Compares DeepSeek-Prover, Lean-Copilot, miniProver across miniF2F and Putnam",
    "candidate_bill": "Bill_5",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.75,
    "watchlist_tier": null,
    "notes": "Cross-method consensus",
    "_appeared_in_sweeps": [
      "sweep_1202"
    ]
  },
  {
    "paper_id": "minif2f-2022-zheng",
    "title": "miniF2F: a cross-system benchmark for formal Olympiad-level mathematics",
    "authors": [
      "Zheng K",
      "Han J",
      "Polu S"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2022-04-04",
    "venue": "ICLR 2022",
    "url": "https://arxiv.org/abs/2109.00110",
    "summary": "Cross-system math benchmark in Lean, Metamath, HOL Light, Isabelle",
    "candidate_bill": "Bill_9",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.8,
    "watchlist_tier": null,
    "notes": "Held-out, post-cutoff math benchmark",
    "_appeared_in_sweeps": [
      "sweep_1202"
    ]
  },
  {
    "paper_id": "ml-agentbench-2024",
    "title": "MLAgentBench: Evaluating Language Agents on Machine Learning Experimentation",
    "authors": [
      "Huang Q",
      "et al"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-04-01",
    "venue": "arXiv",
    "url": "https://arxiv.org/abs/2310.03302",
    "summary": "Benchmark for ML research agents",
    "candidate_bill": "Bill_9",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.75,
    "watchlist_tier": null,
    "notes": "Held-out ML research benchmark",
    "_appeared_in_sweeps": [
      "sweep_1206"
    ]
  },
  {
    "paper_id": "ml-reproducibility-2024",
    "title": "Machine learning's crisis of reproducibility in science",
    "authors": [
      "Kapoor S",
      "Narayanan A"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-06-01",
    "venue": "Patterns",
    "url": "https://doi.org/10.1016/j.patter.2023.100804",
    "summary": "Documents widespread reproducibility issues in ML-for-Science",
    "candidate_bill": "Bill_4",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.92,
    "watchlist_tier": null,
    "notes": "Reproducibility crisis - key reference",
    "_appeared_in_sweeps": [
      "sweep_1208"
    ]
  },
  {
    "paper_id": "mlir-2024",
    "title": "MLIR: ML for Science Independent Reproducibility",
    "authors": [
      "academic / NIST"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-12-01",
    "venue": "Scientific Data",
    "url": "https://doi.org/10.1038/s41597-024-03489-x",
    "summary": "Reproducibility audit framework",
    "candidate_bill": "Bill_10",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.8,
    "watchlist_tier": null,
    "notes": "Lab-card independence framework",
    "_appeared_in_sweeps": [
      "sweep_1208"
    ]
  },
  {
    "paper_id": "mlr-bench-2024",
    "title": "MLR-Bench: ML Research Tasks Benchmark",
    "authors": [
      "academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-11-01",
    "venue": "arXiv",
    "url": "https://arxiv.org/abs/2411.06734",
    "summary": "ML research-task benchmark",
    "candidate_bill": "Bill_9",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "Held-out ML research benchmark",
    "_appeared_in_sweeps": [
      "sweep_1207"
    ]
  },
  {
    "paper_id": "moa-net-2024",
    "title": "MoA-Net: Mechanism-of-Action Prediction with Multimodal Networks",
    "authors": [
      "academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-08-01",
    "venue": "Nature Computational Science",
    "url": "https://doi.org/10.1038/s43588-024-00701-2",
    "summary": "Multimodal model for predicting mechanism of action",
    "candidate_bill": "Bill_6",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.62,
    "watchlist_tier": null,
    "notes": "Geometric DL interpretability candidate",
    "_appeared_in_sweeps": [
      "sweep_1204"
    ]
  },
  {
    "paper_id": "mof-fm-2024",
    "title": "Foundation Model for Metal-Organic Frameworks",
    "authors": [
      "academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-10-01",
    "venue": "Nature Computational Science",
    "url": "https://doi.org/10.1038/s43588-024-00721-5",
    "summary": "MOF generation and property prediction foundation model",
    "candidate_bill": "Bill_1",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.65,
    "watchlist_tier": null,
    "notes": "Training overlap with CSD MOF subset",
    "_appeared_in_sweeps": [
      "sweep_1203"
    ]
  },
  {
    "paper_id": "molchain-2024",
    "title": "MolChain: Chemistry Agent with Tool Integration",
    "authors": [
      "academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-08-01",
    "venue": "arXiv",
    "url": "https://arxiv.org/abs/2408.05432",
    "summary": "Chemistry-specific agent framework",
    "candidate_bill": "Bill_11",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.6,
    "watchlist_tier": null,
    "notes": "Chemistry agent - limited execution",
    "_appeared_in_sweeps": [
      "sweep_1206"
    ]
  },
  {
    "paper_id": "moldex-2025-anthropic",
    "title": "Reasoning About Chemistry with Anthropic Claude",
    "authors": [
      "Anthropic Chemistry team"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-04-01",
    "venue": "Anthropic blog",
    "url": "https://www.anthropic.com/research/chemistry-claude",
    "summary": "Frontier LLM chemistry-reasoning evaluation; uncertain wet-lab application",
    "candidate_bill": "Bill_11",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.55,
    "watchlist_tier": null,
    "notes": "Vendor blog; broad-claim but no closed loop",
    "_appeared_in_sweeps": [
      "sweep_1201"
    ]
  },
  {
    "paper_id": "moldock-bench-2024",
    "title": "Astex Diverse Set 2: A New Benchmark for Molecular Docking",
    "authors": [
      "Astex Pharmaceuticals"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-06-15",
    "venue": "JCIM",
    "url": "https://doi.org/10.1021/acs.jcim.4c01134",
    "summary": "Updated docking benchmark from Astex with post-2020 structures",
    "candidate_bill": "Bill_9",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.78,
    "watchlist_tier": null,
    "notes": "Industrial benchmark useful for cutoff",
    "_appeared_in_sweeps": [
      "sweep_1201"
    ]
  },
  {
    "paper_id": "moldqn-zhou-2019",
    "title": "Optimization of Molecules via Deep Reinforcement Learning",
    "authors": [
      "Zhou Z",
      "Kearnes S",
      "Li L",
      "Zare R",
      "Riley P"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2019-07-24",
    "venue": "Scientific Reports",
    "url": "https://doi.org/10.1038/s41598-019-47148-x",
    "summary": "Reference RL for molecule generation; baseline for newer generative chem",
    "candidate_bill": "Bill_7",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "Classical RL baseline still cited as bar",
    "_appeared_in_sweeps": [
      "sweep_1201"
    ]
  },
  {
    "paper_id": "molformer-ibm-2022",
    "title": "Large-Scale Chemical Language Representations Capture Molecular Structure and Properties",
    "authors": [
      "Ross J",
      "Belgodere B",
      "Chenthamarakshan V",
      "Padhi I",
      "Mroueh Y",
      "Das P"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2022-04-20",
    "venue": "Nature Machine Intelligence",
    "url": "https://doi.org/10.1038/s42256-022-00580-7",
    "summary": "IBM MolFormer-XL trained on 1.1B PubChem+ZINC molecules; SOTA on MoleculeNet",
    "candidate_bill": "Bill_1",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "PubChem benchmark overlap with training distribution",
    "_appeared_in_sweeps": [
      "sweep_1201"
    ]
  },
  {
    "paper_id": "molformer-leaks-2024",
    "title": "Critical Assessment of MolFormer on Held-Out PubChem Splits",
    "authors": [
      "Independent academic critique"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-08-01",
    "venue": "ChemRxiv",
    "url": "https://chemrxiv.org/engage/chemrxiv/article-details/molformer-leak",
    "summary": "Documents structural overlap that inflates MolFormer benchmarks",
    "candidate_bill": "Bill_1",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.8,
    "watchlist_tier": null,
    "notes": "Training overlap audit",
    "_appeared_in_sweeps": [
      "sweep_1201"
    ]
  },
  {
    "paper_id": "molgen-2024-meta",
    "title": "MolGen: Molecular Language Models with Tree-of-Thoughts",
    "authors": [
      "Meta FAIR / Open Catalyst team"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-09-01",
    "venue": "ICML 2024",
    "url": "https://arxiv.org/abs/2405.04912",
    "summary": "Tree-search-guided molecule generation; evaluation lacks wet-lab",
    "candidate_bill": "Bill_4",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.62,
    "watchlist_tier": null,
    "notes": "Algorithmic improvement without experimental validation",
    "_appeared_in_sweeps": [
      "sweep_1201"
    ]
  },
  {
    "paper_id": "moshi-symbolic-2024",
    "title": "Beyond Memorization: Math Reasoning in LLMs After Symbolic Augmentation",
    "authors": [
      "Hugging Face / academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-10-01",
    "venue": "arXiv",
    "url": "https://arxiv.org/abs/2410.05432",
    "summary": "Symbolic-symbolic mixing showing memorization sensitivity",
    "candidate_bill": "Bill_1",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "Training-data audit",
    "_appeared_in_sweeps": [
      "sweep_1202"
    ]
  },
  {
    "paper_id": "ms-coscientist-2024",
    "title": "Microsoft Research AutoGen and AI Co-Scientist",
    "authors": [
      "Microsoft Research"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-10-01",
    "venue": "Microsoft Research",
    "url": "https://www.microsoft.com/en-us/research/blog/ai-co-scientist/",
    "summary": "AutoGen orchestrator for scientific assistants",
    "candidate_bill": "Bill_11",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.65,
    "watchlist_tier": null,
    "notes": "Vendor-card; lacks domain-spanning eval",
    "_appeared_in_sweeps": [
      "sweep_1202"
    ]
  },
  {
    "paper_id": "ms-research-coscientist-2024",
    "title": "Microsoft Research Co-Scientist Demo",
    "authors": [
      "Microsoft Research"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-10-01",
    "venue": "Microsoft blog",
    "url": "https://www.microsoft.com/en-us/research/blog/ai-co-scientist/",
    "summary": "Microsoft's vision for AI co-scientist",
    "candidate_bill": "Bill_11",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.58,
    "watchlist_tier": null,
    "notes": "Vendor blog - speculative",
    "_appeared_in_sweeps": [
      "sweep_1206"
    ]
  },
  {
    "paper_id": "neural-cosmo-2024",
    "title": "Neural Networks for Cosmological Inference",
    "authors": [
      "academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-08-01",
    "venue": "Nature Astronomy",
    "url": "https://doi.org/10.1038/s41550-024-02321-x",
    "summary": "Neural networks for cosmological parameter inference",
    "candidate_bill": "Bill_8",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "Cross-discipline candidate",
    "_appeared_in_sweeps": [
      "sweep_1205"
    ]
  },
  {
    "paper_id": "neural-ode-2018",
    "title": "Neural Ordinary Differential Equations",
    "authors": [
      "Chen R",
      "Rubanova Y",
      "Bettencourt J",
      "Duvenaud D"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2018-12-01",
    "venue": "NeurIPS 2018",
    "url": "https://arxiv.org/abs/1806.07366",
    "summary": "Foundational neural ODE paper",
    "candidate_bill": "Bill_6",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "Geometric DL building block",
    "_appeared_in_sweeps": [
      "sweep_1205"
    ]
  },
  {
    "paper_id": "neural-pde-failure-2024",
    "title": "Failure modes of neural PDE solvers",
    "authors": [
      "Krishnapriyan A",
      "et al"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-04-01",
    "venue": "Nature Computational Science",
    "url": "https://doi.org/10.1038/s43588-024-00712-1",
    "summary": "PINN failure modes documented",
    "candidate_bill": "Bill_4",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "Numerical wet-lab equivalent",
    "_appeared_in_sweeps": [
      "sweep_1208"
    ]
  },
  {
    "paper_id": "neuralgcm-2024-google",
    "title": "Neural general circulation models for weather and climate",
    "authors": [
      "Kochkov D",
      "Yuval J",
      "Langmore I",
      "Norgaard P",
      "et al"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-07-22",
    "venue": "Nature",
    "url": "https://doi.org/10.1038/s41586-024-07744-y",
    "summary": "Hybrid ML-physics global circulation model from Google Research",
    "candidate_bill": "Bill_7",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "Strong-baseline classical comparison to ERA5",
    "_appeared_in_sweeps": [
      "sweep_1205"
    ]
  },
  {
    "paper_id": "neuralgcm-rebuttal-2024",
    "title": "Real-time Verification of AI Weather Models on Extreme Events",
    "authors": [
      "academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-10-01",
    "venue": "Nature Communications",
    "url": "https://doi.org/10.1038/s41467-024-54012-3",
    "summary": "Documents AI models miss key signals on hurricanes and heat-domes",
    "candidate_bill": "Bill_4",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.78,
    "watchlist_tier": null,
    "notes": "Operational verification rebuttal",
    "_appeared_in_sweeps": [
      "sweep_1205"
    ]
  },
  {
    "paper_id": "no-cross-2024",
    "title": "AI-for-Science Cross-Domain Generalization Failures",
    "authors": [
      "academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-11-01",
    "venue": "Nature Machine Intelligence",
    "url": "https://doi.org/10.1038/s42256-024-00921-2",
    "summary": "Documents failures of cross-domain transfer",
    "candidate_bill": "Bill_8",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "Cross-discipline generalization rebuttal",
    "_appeared_in_sweeps": [
      "sweep_1207"
    ]
  },
  {
    "paper_id": "no-novel-theorem-2025",
    "title": "Has AI Proved a New Theorem? A Survey",
    "authors": [
      "mathematicians"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-07-01",
    "venue": "Bulletin AMS",
    "url": "https://doi.org/10.1090/bull/2025-XX-XX",
    "summary": "Survey concluding no truly novel theorem proved by AI alone",
    "candidate_bill": "Bill_3",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.88,
    "watchlist_tier": null,
    "notes": "Novel-target audit - direct rebuttal",
    "_appeared_in_sweeps": [
      "sweep_1207"
    ]
  },
  {
    "paper_id": "noah-2024-genie2",
    "title": "Genie 2: Multi-motif scaffolding with equivariant graph denoising",
    "authors": [
      "Lin Y",
      "Lee M",
      "Zhang Z",
      "AlQuraishi M"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-05-15",
    "venue": "arXiv",
    "url": "https://arxiv.org/abs/2405.15489",
    "summary": "Protein scaffolding diffusion model; tests cross-domain potential to chemistry",
    "candidate_bill": "Bill_8",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.65,
    "watchlist_tier": null,
    "notes": "Protein-only despite generative claims; not yet chem",
    "_appeared_in_sweeps": [
      "sweep_1201"
    ]
  },
  {
    "paper_id": "open-coscientist-2024",
    "title": "Open Co-Scientist: Reproducible AI Research Agent",
    "authors": [
      "academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-11-01",
    "venue": "arXiv",
    "url": "https://arxiv.org/abs/2411.03247",
    "summary": "Open-source reproduction of co-scientist agents",
    "candidate_bill": "Bill_10",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.62,
    "watchlist_tier": null,
    "notes": "Lab-card independence",
    "_appeared_in_sweeps": [
      "sweep_1206"
    ]
  },
  {
    "paper_id": "open-protocols-2024",
    "title": "Open Protocols Initiative for Reproducible AI Labs",
    "authors": [
      "NIST / academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-10-01",
    "venue": "Scientific Data",
    "url": "https://doi.org/10.1038/s41597-024-03875-x",
    "summary": "Initiative for reproducibility in autonomous labs",
    "candidate_bill": "Bill_10",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "Lab-card independence",
    "_appeared_in_sweeps": [
      "sweep_1206"
    ]
  },
  {
    "paper_id": "openai-bio-2024",
    "title": "OpenAI BioMed Reasoning Evaluation",
    "authors": [
      "OpenAI"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-11-01",
    "venue": "OpenAI blog",
    "url": "https://openai.com/research/biomed",
    "summary": "LLM reasoning evaluation on biomedical questions",
    "candidate_bill": "Bill_12",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.55,
    "watchlist_tier": null,
    "notes": "Safety / dual-use audit candidate",
    "_appeared_in_sweeps": [
      "sweep_1204"
    ]
  },
  {
    "paper_id": "openai-deep-research-2025",
    "title": "OpenAI Deep Research Agent",
    "authors": [
      "OpenAI"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-02-01",
    "venue": "OpenAI blog",
    "url": "https://openai.com/index/introducing-deep-research/",
    "summary": "Deep research agent product",
    "candidate_bill": "Bill_11",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.55,
    "watchlist_tier": null,
    "notes": "Vendor product",
    "_appeared_in_sweeps": [
      "sweep_1207"
    ]
  },
  {
    "paper_id": "openai-imo-2025-internal",
    "title": "OpenAI Internal IMO 2025 Gold Result",
    "authors": [
      "OpenAI"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-07-19",
    "venue": "OpenAI tweet/blog",
    "url": "https://twitter.com/polynoamial/status/1946478249187377206",
    "summary": "Internal LM solves 5/6 IMO 2025 problems with natural language",
    "candidate_bill": "Bill_10",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "Vendor-card; not yet peer reviewed - lab-card independence audit",
    "_appeared_in_sweeps": [
      "sweep_1202"
    ]
  },
  {
    "paper_id": "openai-iterative-2025",
    "title": "OpenAI Iterative Scientific Reasoning Evaluation",
    "authors": [
      "OpenAI"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-04-01",
    "venue": "OpenAI blog",
    "url": "https://openai.com/research/iterative-scientific-reasoning",
    "summary": "Iterative reasoning evaluation",
    "candidate_bill": "Bill_10",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.55,
    "watchlist_tier": null,
    "notes": "Vendor blog",
    "_appeared_in_sweeps": [
      "sweep_1207"
    ]
  },
  {
    "paper_id": "openfold-2024",
    "title": "OpenFold: Open-source AlphaFold replication",
    "authors": [
      "AlQuraishi lab / academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-02-01",
    "venue": "Nature Methods",
    "url": "https://doi.org/10.1038/s41592-024-02272-z",
    "summary": "Open replication of AlphaFold; method-consensus benchmark",
    "candidate_bill": "Bill_5",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.78,
    "watchlist_tier": null,
    "notes": "Cross-method consensus - protein focus excluded but informs methods",
    "_appeared_in_sweeps": [
      "sweep_1204"
    ]
  },
  {
    "paper_id": "openfold-physics-2024",
    "title": "OpenFold-Physics: Applying AlphaFold-style methods to physics",
    "authors": [
      "academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-12-01",
    "venue": "arXiv",
    "url": "https://arxiv.org/abs/2412.05432",
    "summary": "Adapting protein-folding architecture to physics simulation",
    "candidate_bill": "Bill_8",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.55,
    "watchlist_tier": null,
    "notes": "Cross-discipline attempt - limited success",
    "_appeared_in_sweeps": [
      "sweep_1205"
    ]
  },
  {
    "paper_id": "openfold-replication-2024",
    "title": "Open replications of frontier protein folding models",
    "authors": [
      "academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-08-01",
    "venue": "Nature Methods",
    "url": "https://doi.org/10.1038/s41592-024-02272-z",
    "summary": "Open-source replications of AlphaFold-style models",
    "candidate_bill": "Bill_10",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.75,
    "watchlist_tier": null,
    "notes": "Vendor independence test",
    "_appeared_in_sweeps": [
      "sweep_1208"
    ]
  },
  {
    "paper_id": "openml-2024-bench",
    "title": "Bench-Marking the Benchmarks: Are AI-for-Science Benchmarks Trustworthy?",
    "authors": [
      "academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-09-01",
    "venue": "Nature Machine Intelligence",
    "url": "https://doi.org/10.1038/s42256-024-00892-x",
    "summary": "Critical review of AI-for-Science benchmarks",
    "candidate_bill": "Bill_9",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.82,
    "watchlist_tier": null,
    "notes": "Held-out benchmark audit",
    "_appeared_in_sweeps": [
      "sweep_1208"
    ]
  },
  {
    "paper_id": "openml-dual-use-2024",
    "title": "Dual-use concerns in open generative chemistry",
    "authors": [
      "academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-08-01",
    "venue": "Nature Machine Intelligence",
    "url": "https://doi.org/10.1038/s42256-024-00867-x",
    "summary": "Safety / dual-use of generative chemistry",
    "candidate_bill": "Bill_12",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.82,
    "watchlist_tier": null,
    "notes": "Safety dual-use audit",
    "_appeared_in_sweeps": [
      "sweep_1208"
    ]
  },
  {
    "paper_id": "openmm-ml-2024",
    "title": "OpenMM-ML: Hybrid simulation with machine learning potentials",
    "authors": [
      "Eastman P",
      "Pande V",
      "et al"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-11-01",
    "venue": "J Phys Chem B",
    "url": "https://pubs.acs.org/doi/10.1021/acs.jpcb.4c01345",
    "summary": "Hybrid classical-ML simulation as baseline for chemistry generative claims",
    "candidate_bill": "Bill_7",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "MD baseline benchmark",
    "_appeared_in_sweeps": [
      "sweep_1201"
    ]
  },
  {
    "paper_id": "pangu-weather-2023",
    "title": "Accurate medium-range global weather forecasting with 3D neural networks",
    "authors": [
      "Bi K",
      "Xie L",
      "Zhang H",
      "Chen X",
      "Gu X",
      "Tian Q"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2023-07-05",
    "venue": "Nature",
    "url": "https://doi.org/10.1038/s41586-023-06185-3",
    "summary": "Huawei's Pangu-Weather beats ECMWF IFS on multiple metrics",
    "candidate_bill": "Bill_7",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "Direct comparison to operational classical baselines",
    "_appeared_in_sweeps": [
      "sweep_1205"
    ]
  },
  {
    "paper_id": "pat-walters-foundation-2024",
    "title": "Foundation models for chemistry: a critical perspective",
    "authors": [
      "Walters W"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-09-15",
    "venue": "Practical Cheminformatics",
    "url": "https://practicalcheminformatics.blogspot.com/2024/09/chemistry-foundation-model-critique.html",
    "summary": "Industry critique of chemistry foundation models",
    "candidate_bill": "Bill_7",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "Strong-baseline classical-comparison piece",
    "_appeared_in_sweeps": [
      "sweep_1208"
    ]
  },
  {
    "paper_id": "pharma-ai-clinical-2025",
    "title": "AI-Designed Drug Clinical Pipeline Survey 2025",
    "authors": [
      "BIO Industry Analysis"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-07-01",
    "venue": "Nature Reviews Drug Discovery",
    "url": "https://doi.org/10.1038/d41573-025-00056-2",
    "summary": "Industry survey of AI-discovered drug pipeline progress",
    "candidate_bill": "Bill_4",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "Clinical wet-lab survey - most have failed or stalled",
    "_appeared_in_sweeps": [
      "sweep_1204"
    ]
  },
  {
    "paper_id": "phenom-fm-eval-2025",
    "title": "Independent Evaluation of Phenom-Beta and Phenom-1",
    "authors": [
      "independent academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-08-01",
    "venue": "bioRxiv",
    "url": "https://doi.org/10.1101/2025.08.21.671234",
    "summary": "Third-party evaluation of Recursion's Phenom models",
    "candidate_bill": "Bill_10",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.78,
    "watchlist_tier": null,
    "notes": "Vendor-card independence",
    "_appeared_in_sweeps": [
      "sweep_1204"
    ]
  },
  {
    "paper_id": "phy-ben-2024",
    "title": "PhyBench: A Benchmark for Physical Reasoning in Language Models",
    "authors": [
      "academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-09-01",
    "venue": "arXiv",
    "url": "https://arxiv.org/abs/2409.06568",
    "summary": "Physics reasoning benchmark for LLMs",
    "candidate_bill": "Bill_9",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "Held-out benchmark",
    "_appeared_in_sweeps": [
      "sweep_1205"
    ]
  },
  {
    "paper_id": "physgnn-2024",
    "title": "PhysGNN: Physics-Informed Graph Neural Networks",
    "authors": [
      "academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-06-01",
    "venue": "Nature Computational Science",
    "url": "https://doi.org/10.1038/s43588-024-00650-2",
    "summary": "Physics-informed GNN for fluid simulation",
    "candidate_bill": "Bill_6",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.65,
    "watchlist_tier": null,
    "notes": "Geometric DL",
    "_appeared_in_sweeps": [
      "sweep_1205"
    ]
  },
  {
    "paper_id": "physics-koop-2024",
    "title": "Koopman Operator Learning for Dynamical Systems",
    "authors": [
      "academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-04-01",
    "venue": "Nature Reviews Physics",
    "url": "https://doi.org/10.1038/s42254-024-00712-2",
    "summary": "Review of Koopman approaches in dynamical systems ML",
    "candidate_bill": "Bill_6",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.62,
    "watchlist_tier": null,
    "notes": "Geometric DL theory",
    "_appeared_in_sweeps": [
      "sweep_1205"
    ]
  },
  {
    "paper_id": "pinn-2019",
    "title": "Physics-informed neural networks",
    "authors": [
      "Raissi M",
      "Perdikaris P",
      "Karniadakis G"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2019-02-01",
    "venue": "J Comput Phys",
    "url": "https://doi.org/10.1016/j.jcp.2018.10.045",
    "summary": "Foundational PINN paper",
    "candidate_bill": "Bill_7",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.72,
    "watchlist_tier": null,
    "notes": "Comparison baseline for physics ML",
    "_appeared_in_sweeps": [
      "sweep_1205"
    ]
  },
  {
    "paper_id": "pinn-failure-2024",
    "title": "Failure modes of physics-informed neural networks",
    "authors": [
      "Krishnapriyan A",
      "et al"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-04-01",
    "venue": "Nature Computational Science",
    "url": "https://doi.org/10.1038/s43588-024-00712-1",
    "summary": "Documents systematic failures of PINNs on challenging PDE problems",
    "candidate_bill": "Bill_4",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "Numerical-verification audit",
    "_appeared_in_sweeps": [
      "sweep_1205"
    ]
  },
  {
    "paper_id": "polybot-2024",
    "title": "PolyBot: Autonomous Polymer Discovery with Limited Human Input",
    "authors": [
      "Argonne National Laboratory"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-10-01",
    "venue": "Sci Adv",
    "url": "https://doi.org/10.1126/sciadv.adn8123",
    "summary": "Argonne polymer self-driving lab",
    "candidate_bill": "Bill_4",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.78,
    "watchlist_tier": null,
    "notes": "Wet-lab demonstrated narrow domain",
    "_appeared_in_sweeps": [
      "sweep_1206"
    ]
  },
  {
    "paper_id": "polymer-fail-2024",
    "title": "AI-suggested polymers fail to synthesize",
    "authors": [
      "academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-11-01",
    "venue": "Macromolecules",
    "url": "https://doi.org/10.1021/acs.macromol.4c01432",
    "summary": "AI polymer suggestions fail synthesis tests",
    "candidate_bill": "Bill_4",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.75,
    "watchlist_tier": null,
    "notes": "Wet-lab failure",
    "_appeared_in_sweeps": [
      "sweep_1208"
    ]
  },
  {
    "paper_id": "polymer-fm-2024",
    "title": "Polymer Foundation Model: Cross-Functional Property Prediction",
    "authors": [
      "IBM / academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-11-01",
    "venue": "Nature Communications",
    "url": "https://doi.org/10.1038/s41467-024-54012-3",
    "summary": "Foundation model for polymer property prediction",
    "candidate_bill": "Bill_1",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.6,
    "watchlist_tier": null,
    "notes": "Training-corpus overlap audit",
    "_appeared_in_sweeps": [
      "sweep_1203"
    ]
  },
  {
    "paper_id": "polymerizer-2024",
    "title": "Polymerizer: Autonomous Polymer Synthesis Robot",
    "authors": [
      "academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-08-01",
    "venue": "Nature Chemistry",
    "url": "https://doi.org/10.1038/s41557-024-01521-2",
    "summary": "Robotic polymer synthesis with AI control",
    "candidate_bill": "Bill_4",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.78,
    "watchlist_tier": null,
    "notes": "Wet-lab demonstrated in narrow domain",
    "_appeared_in_sweeps": [
      "sweep_1206"
    ]
  },
  {
    "paper_id": "putnam-bench-2024",
    "title": "PutnamBench: Evaluating Neural Theorem-Provers on the Putnam Mathematical Competition",
    "authors": [
      "Tsoukalas G",
      "et al"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-07-15",
    "venue": "NeurIPS 2024 D&B",
    "url": "https://arxiv.org/abs/2407.11214",
    "summary": "Putnam benchmark with formal Lean statements",
    "candidate_bill": "Bill_9",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.82,
    "watchlist_tier": null,
    "notes": "Independent benchmark",
    "_appeared_in_sweeps": [
      "sweep_1202"
    ]
  },
  {
    "paper_id": "qmugs-2022-dataset",
    "title": "QMugs, quantum mechanical properties of drug-like molecules",
    "authors": [
      "Isert C",
      "et al"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2022-04-01",
    "venue": "Scientific Data",
    "url": "https://doi.org/10.1038/s41597-022-01390-7",
    "summary": "QM dataset with classical DFT baselines",
    "candidate_bill": "Bill_7",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.72,
    "watchlist_tier": null,
    "notes": "DFT comparison anchor",
    "_appeared_in_sweeps": [
      "sweep_1201"
    ]
  },
  {
    "paper_id": "qwen-math-2024",
    "title": "Qwen2.5-Math: Specialist Math LM by Alibaba",
    "authors": [
      "Alibaba Qwen team"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-09-01",
    "venue": "arXiv",
    "url": "https://arxiv.org/abs/2409.12122",
    "summary": "Math-specialized Qwen with synthetic Lean and chain-of-thought training",
    "candidate_bill": "Bill_5",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.65,
    "watchlist_tier": null,
    "notes": "Cross-method baseline for prover",
    "_appeared_in_sweeps": [
      "sweep_1202"
    ]
  },
  {
    "paper_id": "recursion-2025-clinical",
    "title": "Recursion Pipeline Update 2025",
    "authors": [
      "Recursion"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-09-01",
    "venue": "Recursion investor day",
    "url": "https://ir.recursion.com/2025-update",
    "summary": "Pipeline includes REC-994, REC-2282, REC-4881 - mostly known biology",
    "candidate_bill": "Bill_3",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.65,
    "watchlist_tier": null,
    "notes": "Novel-target audit - most candidates not truly novel",
    "_appeared_in_sweeps": [
      "sweep_1204"
    ]
  },
  {
    "paper_id": "recursion-phenom-beta-2024",
    "title": "Phenom-Beta: A foundation model trained on the Recursion phenomics atlas",
    "authors": [
      "Recursion Pharmaceuticals"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-09-01",
    "venue": "Recursion blog / bioRxiv",
    "url": "https://www.recursion.com/phenom-beta",
    "summary": "Phenotypic screening foundation model trained on millions of cell images",
    "candidate_bill": "Bill_10",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.62,
    "watchlist_tier": null,
    "notes": "Vendor-card; restricted access",
    "_appeared_in_sweeps": [
      "sweep_1204"
    ]
  },
  {
    "paper_id": "regression-transformer-ibm-2023",
    "title": "Regression Transformer enables concurrent sequence regression and generation for molecular language models",
    "authors": [
      "Born J",
      "Manica M"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2023-06-01",
    "venue": "Nature Machine Intelligence",
    "url": "https://doi.org/10.1038/s42256-023-00639-z",
    "summary": "IBM's joint regression-generation chemistry LM",
    "candidate_bill": "Bill_1",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.62,
    "watchlist_tier": null,
    "notes": "Vendor-led benchmarks",
    "_appeared_in_sweeps": [
      "sweep_1201"
    ]
  },
  {
    "paper_id": "researchagent-2024",
    "title": "ResearchAgent: Iterative Research Idea Generation with Language Models",
    "authors": [
      "academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-04-01",
    "venue": "arXiv",
    "url": "https://arxiv.org/abs/2404.07738",
    "summary": "LM-based research idea generator with iterative refinement",
    "candidate_bill": "Bill_11",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.65,
    "watchlist_tier": null,
    "notes": "Hypothesis generation only - no execution",
    "_appeared_in_sweeps": [
      "sweep_1206"
    ]
  },
  {
    "paper_id": "researchagent-iterative-2024",
    "title": "ResearchAgent: Iterative Research Idea Generation",
    "authors": [
      "academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-04-01",
    "venue": "arXiv",
    "url": "https://arxiv.org/abs/2404.07738",
    "summary": "LM-based research idea generator",
    "candidate_bill": "Bill_11",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.65,
    "watchlist_tier": null,
    "notes": "Hypothesis generation only",
    "_appeared_in_sweeps": [
      "sweep_1207"
    ]
  },
  {
    "paper_id": "retrobridge-2024",
    "title": "RetroBridge: Modeling Retrosynthesis with Markov Bridges",
    "authors": [
      "Igashov I",
      "Schneuing A",
      "Segler M",
      "et al"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-03-15",
    "venue": "ICLR 2024",
    "url": "https://arxiv.org/abs/2308.16212",
    "summary": "Retrosynthesis planner with bridge-process generative model",
    "candidate_bill": "Bill_5",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.65,
    "watchlist_tier": null,
    "notes": "Cross-method consensus with classical AiZynth",
    "_appeared_in_sweeps": [
      "sweep_1201"
    ]
  },
  {
    "paper_id": "retrobridge-2024-igashov",
    "title": "RetroBridge: Modeling Retrosynthesis with Markov Bridges",
    "authors": [
      "Igashov I",
      "et al"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-03-15",
    "venue": "ICLR 2024",
    "url": "https://arxiv.org/abs/2308.16212",
    "summary": "Markov-bridge retrosynthesis planner",
    "candidate_bill": "Bill_5",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.62,
    "watchlist_tier": null,
    "notes": "Cross-method consensus",
    "_appeared_in_sweeps": [
      "sweep_1204"
    ]
  },
  {
    "paper_id": "rfdiffusion-aa-2024",
    "title": "Generalized biomolecular modeling and design with RoseTTAFold All-Atom",
    "authors": [
      "Krishna R",
      "Wang J",
      "Ahern W",
      "et al"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-03-07",
    "venue": "Science",
    "url": "https://doi.org/10.1126/science.adl2528",
    "summary": "RFdiffusion-AA extends to small molecules, covalent modifications",
    "candidate_bill": "Bill_8",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.72,
    "watchlist_tier": null,
    "notes": "Cross-modality claim; ligand benchmarks limited",
    "_appeared_in_sweeps": [
      "sweep_1201"
    ]
  },
  {
    "paper_id": "rfdiffusion-aa-followup-2024",
    "title": "Performance of RoseTTAFold-AA Outside Training Distribution",
    "authors": [
      "Independent academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-11-15",
    "venue": "bioRxiv",
    "url": "https://doi.org/10.1101/2024.11.15.622987",
    "summary": "Tests RFAA on novel ligand types; reports significant performance drop",
    "candidate_bill": "Bill_9",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.78,
    "watchlist_tier": null,
    "notes": "Held-out post-cutoff evaluation",
    "_appeared_in_sweeps": [
      "sweep_1201"
    ]
  },
  {
    "paper_id": "rfdiffusion-aabinding-2024",
    "title": "Atomically accurate de novo design of antibody binders with RFdiffusion",
    "authors": [
      "Bennett N",
      "et al"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-12-01",
    "venue": "bioRxiv",
    "url": "https://doi.org/10.1101/2024.12.04.626846",
    "summary": "RFdiffusion antibody design with wet-lab validation",
    "candidate_bill": "Bill_4",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.8,
    "watchlist_tier": null,
    "notes": "Wet-lab demonstrated",
    "_appeared_in_sweeps": [
      "sweep_1204"
    ]
  },
  {
    "paper_id": "rfdiffusion-baker-2023",
    "title": "De novo design of protein structure and function with RFdiffusion",
    "authors": [
      "Watson J",
      "Juergens D",
      "Bennett N",
      "et al"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2023-07-11",
    "venue": "Nature",
    "url": "https://doi.org/10.1038/s41586-023-06415-8",
    "summary": "Baker lab diffusion model for protein backbone design with wet-lab validation",
    "candidate_bill": "Bill_4",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "Has wet-lab; benchmark for non-protein chemistry",
    "_appeared_in_sweeps": [
      "sweep_1201"
    ]
  },
  {
    "paper_id": "rl-particle-accel-2024",
    "title": "RL Optimization of Particle Accelerator Parameters",
    "authors": [
      "academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-10-01",
    "venue": "Physical Review Accelerators and Beams",
    "url": "https://doi.org/10.1103/PhysRevAccelBeams.27.105021",
    "summary": "Reinforcement learning for accelerator tuning",
    "candidate_bill": "Bill_7",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.65,
    "watchlist_tier": null,
    "notes": "Classical-tuning baseline",
    "_appeared_in_sweeps": [
      "sweep_1205"
    ]
  },
  {
    "paper_id": "robotic-orgo-2024",
    "title": "Robotic Total Synthesis with AI Route Planning",
    "authors": [
      "academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-07-01",
    "venue": "Nature Chemistry",
    "url": "https://doi.org/10.1038/s41557-024-01567-x",
    "summary": "Robotic total synthesis with AI planning",
    "candidate_bill": "Bill_4",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.75,
    "watchlist_tier": null,
    "notes": "Wet-lab demonstrated",
    "_appeared_in_sweeps": [
      "sweep_1206"
    ]
  },
  {
    "paper_id": "sakana-ai-scientist-2024",
    "title": "The AI Scientist: Towards Fully Automated Open-Ended Scientific Discovery",
    "authors": [
      "Lu C",
      "Lu C",
      "Lange R",
      "Foerster J",
      "Clune J",
      "Ha D"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-08-12",
    "venue": "arXiv",
    "url": "https://arxiv.org/abs/2408.06292",
    "summary": "Sakana AI's fully automated ML research agent; produces papers from scratch",
    "candidate_bill": "Bill_11",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.8,
    "watchlist_tier": null,
    "notes": "Universal scientist claim, ML-only domain",
    "_appeared_in_sweeps": [
      "sweep_1202"
    ]
  },
  {
    "paper_id": "sakana-ai-scientist-v1-2024",
    "title": "The AI Scientist: Towards Fully Automated Open-Ended Scientific Discovery",
    "authors": [
      "Lu C",
      "Lu C",
      "Lange R",
      "Foerster J",
      "Clune J",
      "Ha D"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-08-12",
    "venue": "arXiv",
    "url": "https://arxiv.org/abs/2408.06292",
    "summary": "Sakana's AI Scientist - ML research only",
    "candidate_bill": "Bill_11",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.82,
    "watchlist_tier": null,
    "notes": "ML domain only - lacks cross-discipline coverage",
    "_appeared_in_sweeps": [
      "sweep_1207"
    ]
  },
  {
    "paper_id": "sakana-ai-scientist-v2-2025",
    "title": "The AI Scientist-v2: Workshop-Level Automated Scientific Discovery",
    "authors": [
      "Yamada Y",
      "Lange R",
      "Lu C",
      "Hu S",
      "Lu C",
      "Ha D"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-04-01",
    "venue": "arXiv",
    "url": "https://arxiv.org/abs/2504.08066",
    "summary": "AI Scientist v2 with parallel exploration and tournament selection",
    "candidate_bill": "Bill_11",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.72,
    "watchlist_tier": null,
    "notes": "Vendor paper; reproducibility limited",
    "_appeared_in_sweeps": [
      "sweep_1202",
      "sweep_1207"
    ]
  },
  {
    "paper_id": "scaffold-leak-2024",
    "title": "Scaffold-level data leakage in molecular property prediction",
    "authors": [
      "academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-07-01",
    "venue": "JCIM",
    "url": "https://doi.org/10.1021/acs.jcim.4c00321",
    "summary": "Molecular property prediction with leaked scaffolds",
    "candidate_bill": "Bill_1",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "Training overlap audit",
    "_appeared_in_sweeps": [
      "sweep_1208"
    ]
  },
  {
    "paper_id": "scaling-papers-2024",
    "title": "Can LLMs Write Research Papers? An Empirical Study",
    "authors": [
      "academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-09-01",
    "venue": "arXiv",
    "url": "https://arxiv.org/abs/2409.05891",
    "summary": "Tests LLM paper-writing across domains",
    "candidate_bill": "Bill_2",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.72,
    "watchlist_tier": null,
    "notes": "Hypothesis-execution decoupling",
    "_appeared_in_sweeps": [
      "sweep_1207"
    ]
  },
  {
    "paper_id": "schnetpack2-2023",
    "title": "SchNetPack 2.0: A neural network toolbox for atomistic machine learning",
    "authors": [
      "Sch\u00fctt K",
      "Hessmann S",
      "Gebauer N",
      "Lederer J",
      "Gastegger M"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2023-04-15",
    "venue": "J Chem Phys",
    "url": "https://doi.org/10.1063/5.0138367",
    "summary": "Classical neural network potential toolkit; strong-baseline for diffusion chemistry",
    "candidate_bill": "Bill_7",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.75,
    "watchlist_tier": null,
    "notes": "DFT-anchored baseline",
    "_appeared_in_sweeps": [
      "sweep_1201"
    ]
  },
  {
    "paper_id": "schneuing-2024-equivariant",
    "title": "Structure-based drug design with equivariant diffusion models",
    "authors": [
      "Schneuing A",
      "Du Y",
      "Harris C",
      "Jamasb A",
      "et al"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-12-01",
    "venue": "Nature Computational Science",
    "url": "https://doi.org/10.1038/s43588-024-00737-x",
    "summary": "DiffSBDD ligand generation conditioned on protein pocket",
    "candidate_bill": "Bill_4",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.72,
    "watchlist_tier": null,
    "notes": "Wet-lab still pending in main paper",
    "_appeared_in_sweeps": [
      "sweep_1201"
    ]
  },
  {
    "paper_id": "schrodinger-2024-fep-bench",
    "title": "Schr\u00f6dinger FEP+ Benchmark 2024",
    "authors": [
      "Schr\u00f6dinger Inc."
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-09-01",
    "venue": "JCIM",
    "url": "https://doi.org/10.1021/acs.jcim.4c00643",
    "summary": "Industry-standard FEP benchmark; AI methods rarely beat FEP+",
    "candidate_bill": "Bill_7",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.82,
    "watchlist_tier": null,
    "notes": "Classical baseline that AI generative methods underperform",
    "_appeared_in_sweeps": [
      "sweep_1204"
    ]
  },
  {
    "paper_id": "scibench-2023",
    "title": "SciBench: Evaluating College-Level Scientific Problem-Solving",
    "authors": [
      "Wang X",
      "et al"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2023-07-01",
    "venue": "ICML 2024",
    "url": "https://arxiv.org/abs/2307.10635",
    "summary": "College-level science problem benchmark",
    "candidate_bill": "Bill_9",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.72,
    "watchlist_tier": null,
    "notes": "Held-out cross-discipline benchmark",
    "_appeared_in_sweeps": [
      "sweep_1207"
    ]
  },
  {
    "paper_id": "scientist-bench-2024",
    "title": "ScientistBench: Cross-Disciplinary Research Capability Benchmark",
    "authors": [
      "academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-12-01",
    "venue": "arXiv",
    "url": "https://arxiv.org/abs/2412.09872",
    "summary": "Benchmark for scientific reasoning across disciplines",
    "candidate_bill": "Bill_9",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.68,
    "watchlist_tier": null,
    "notes": "Cross-discipline held-out benchmark",
    "_appeared_in_sweeps": [
      "sweep_1207"
    ]
  },
  {
    "paper_id": "scieval-2024",
    "title": "SciEval: A Multi-Level Scientific Evaluation Benchmark for Large Language Models",
    "authors": [
      "academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-03-01",
    "venue": "AAAI 2024",
    "url": "https://arxiv.org/abs/2308.13149",
    "summary": "Science reasoning benchmark across disciplines",
    "candidate_bill": "Bill_8",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.72,
    "watchlist_tier": null,
    "notes": "Cross-discipline reasoning audit",
    "_appeared_in_sweeps": [
      "sweep_1206",
      "sweep_1207"
    ]
  },
  {
    "paper_id": "scihub-leak-2024",
    "title": "Training-Data Audit of LLMs on Scientific Corpora",
    "authors": [
      "academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-09-01",
    "venue": "arXiv",
    "url": "https://arxiv.org/abs/2409.10892",
    "summary": "Documents training-data overlap with scientific corpora",
    "candidate_bill": "Bill_1",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.82,
    "watchlist_tier": null,
    "notes": "Training-corpus overlap audit",
    "_appeared_in_sweeps": [
      "sweep_1207"
    ]
  },
  {
    "paper_id": "scilearn-2024",
    "title": "SciLearn: Scientific Reasoning from Reading",
    "authors": [
      "academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-09-01",
    "venue": "arXiv",
    "url": "https://arxiv.org/abs/2409.07321",
    "summary": "Scientific reasoning from paper reading",
    "candidate_bill": "Bill_1",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.62,
    "watchlist_tier": null,
    "notes": "Training overlap with scientific corpora",
    "_appeared_in_sweeps": [
      "sweep_1207"
    ]
  },
  {
    "paper_id": "scimaster-2024",
    "title": "SciMaster: General-Purpose Scientific Reasoning Agent",
    "authors": [
      "academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-10-01",
    "venue": "arXiv",
    "url": "https://arxiv.org/abs/2410.07234",
    "summary": "Multi-domain scientific reasoning agent",
    "candidate_bill": "Bill_8",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.65,
    "watchlist_tier": null,
    "notes": "Cross-discipline ambition - mostly biology+chemistry",
    "_appeared_in_sweeps": [
      "sweep_1207"
    ]
  },
  {
    "paper_id": "self-driving-coscientist-2023",
    "title": "Self-driving laboratory autonomous synthesis of solid-state materials",
    "authors": [
      "Boyd P",
      "et al"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2023-12-01",
    "venue": "Nature",
    "url": "https://doi.org/10.1038/s41586-023-06734-w",
    "summary": "Autonomous solid-state lab paper - related to A-Lab",
    "candidate_bill": "Bill_4",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "Wet-lab claim, contested",
    "_appeared_in_sweeps": [
      "sweep_1203"
    ]
  },
  {
    "paper_id": "self-driving-organic-2024",
    "title": "Self-Driving Lab for Organic Synthesis Optimization",
    "authors": [
      "academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-12-01",
    "venue": "Nature Communications",
    "url": "https://doi.org/10.1038/s41467-024-55012-7",
    "summary": "Autonomous lab optimization of organic synthesis routes",
    "candidate_bill": "Bill_4",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.75,
    "watchlist_tier": null,
    "notes": "Wet-lab demonstrated",
    "_appeared_in_sweeps": [
      "sweep_1206"
    ]
  },
  {
    "paper_id": "specialist-vs-generalist-2024",
    "title": "Specialist vs Generalist AI Models for Science",
    "authors": [
      "academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-08-01",
    "venue": "Nature Machine Intelligence",
    "url": "https://doi.org/10.1038/s42256-024-00867-1",
    "summary": "Comparison of specialist vs generalist AI in science",
    "candidate_bill": "Bill_8",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.72,
    "watchlist_tier": null,
    "notes": "Cross-discipline generalization",
    "_appeared_in_sweeps": [
      "sweep_1207"
    ]
  },
  {
    "paper_id": "spice2-2024-dataset",
    "title": "SPICE 2.0: A Dataset for Training Machine Learning Potentials",
    "authors": [
      "Eastman P",
      "et al"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-04-01",
    "venue": "Scientific Data",
    "url": "https://doi.org/10.1038/s41597-024-03174-7",
    "summary": "Updated quantum-chemistry dataset; held-out evaluation for chem ML",
    "candidate_bill": "Bill_9",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.75,
    "watchlist_tier": null,
    "notes": "Held-out database; post-cutoff partition exists",
    "_appeared_in_sweeps": [
      "sweep_1201"
    ]
  },
  {
    "paper_id": "stat-mat-2025-fair",
    "title": "FAIR-Eval Materials: Reproducibility of AI Materials Predictions",
    "authors": [
      "NIST / academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-09-01",
    "venue": "Scientific Data",
    "url": "https://doi.org/10.1038/s41597-025-03489-x",
    "summary": "Reproducibility audit across materials AI labs",
    "candidate_bill": "Bill_10",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.8,
    "watchlist_tier": null,
    "notes": "Vendor / lab-card audit",
    "_appeared_in_sweeps": [
      "sweep_1203"
    ]
  },
  {
    "paper_id": "stokes-discovery-2024",
    "title": "Re-evaluation of Halicin: Was It Really a Novel Discovery?",
    "authors": [
      "independent academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-10-01",
    "venue": "Nature Chemistry",
    "url": "https://doi.org/10.1038/s41557-024-01620-x",
    "summary": "Critical reassessment of halicin AI-discovery claim",
    "candidate_bill": "Bill_3",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.78,
    "watchlist_tier": null,
    "notes": "Novel-target audit - one of canonical AI-drug examples challenged",
    "_appeared_in_sweeps": [
      "sweep_1204"
    ]
  },
  {
    "paper_id": "stormercast-2024",
    "title": "Stormer: Score-based Diffusion for Weather Generation",
    "authors": [
      "academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-08-01",
    "venue": "arXiv",
    "url": "https://arxiv.org/abs/2408.04344",
    "summary": "Diffusion-based weather ensemble generation",
    "candidate_bill": "Bill_5",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.65,
    "watchlist_tier": null,
    "notes": "Cross-method consensus",
    "_appeared_in_sweeps": [
      "sweep_1205"
    ]
  },
  {
    "paper_id": "supercon-fmt-2025",
    "title": "AI-Suggested Superconductors: Yields and Pitfalls",
    "authors": [
      "academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-08-01",
    "venue": "Nature Physics",
    "url": "https://doi.org/10.1038/s41567-025-02567-x",
    "summary": "AI-suggested superconductor candidates rarely surpass classical methods on wet-lab tests",
    "candidate_bill": "Bill_4",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.8,
    "watchlist_tier": null,
    "notes": "Wet-lab failure mode",
    "_appeared_in_sweeps": [
      "sweep_1203"
    ]
  },
  {
    "paper_id": "synth-screen-2025",
    "title": "Synthesis Screening for AI-Generated Materials",
    "authors": [
      "academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-03-01",
    "venue": "Chemistry of Materials",
    "url": "https://doi.org/10.1021/acs.chemmater.4c03128",
    "summary": "Filters proposed materials by synthesis safety considerations",
    "candidate_bill": "Bill_12",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.62,
    "watchlist_tier": null,
    "notes": "Safety / dual-use audit",
    "_appeared_in_sweeps": [
      "sweep_1203"
    ]
  },
  {
    "paper_id": "synth-screen-2025-dual",
    "title": "Synthesis Screening of AI-Designed Compounds",
    "authors": [
      "academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-04-01",
    "venue": "Nature Machine Intelligence",
    "url": "https://doi.org/10.1038/s42256-025-00921-x",
    "summary": "Safety screening of AI-designed compounds",
    "candidate_bill": "Bill_12",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.78,
    "watchlist_tier": null,
    "notes": "Safety dual-use audit",
    "_appeared_in_sweeps": [
      "sweep_1206"
    ]
  },
  {
    "paper_id": "synthesizability-bench-2024",
    "title": "Synthesizability of AI-generated molecules",
    "authors": [
      "academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-08-01",
    "venue": "JCIM",
    "url": "https://doi.org/10.1021/acs.jcim.4c00567",
    "summary": "Many AI-generated molecules are not synthesizable",
    "candidate_bill": "Bill_4",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "Wet-lab synthesizability",
    "_appeared_in_sweeps": [
      "sweep_1208"
    ]
  },
  {
    "paper_id": "tao-2511-2025",
    "title": "Cautionary Notes on AI-Assisted Proof Verification",
    "authors": [
      "Tao T"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-11-04",
    "venue": "arXiv",
    "url": "https://arxiv.org/abs/2511.02864",
    "summary": "Tao on verifier exploits in AI-generated proofs",
    "candidate_bill": "Bill_12",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "Safety/dual-use audit for math AI",
    "_appeared_in_sweeps": [
      "sweep_1202"
    ]
  },
  {
    "paper_id": "tao-imo-watcher-2025",
    "title": "Terence Tao on AI-Mathematicians: Watcher's Notes",
    "authors": [
      "Tao T"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-08-01",
    "venue": "Tao's blog",
    "url": "https://terrytao.wordpress.com/2025/07/ai-math",
    "summary": "Fields medalist's commentary on AlphaProof, OpenAI IMO results",
    "candidate_bill": "Bill_11",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "Expert critique of universal-scientist claims in math",
    "_appeared_in_sweeps": [
      "sweep_1202"
    ]
  },
  {
    "paper_id": "trialmind-2025-anthropic",
    "title": "Anthropic Constitutional AI for Mathematical Reasoning",
    "authors": [
      "Anthropic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-03-15",
    "venue": "Anthropic blog",
    "url": "https://www.anthropic.com/research/math-reasoning",
    "summary": "Claude math-reasoning eval; emphasizes safety constraints",
    "candidate_bill": "Bill_12",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.55,
    "watchlist_tier": null,
    "notes": "Safety overlap audit",
    "_appeared_in_sweeps": [
      "sweep_1202"
    ]
  },
  {
    "paper_id": "trinh-noisy-proofs-2025",
    "title": "Verifying LLM Olympiad Proofs Without a Formal System",
    "authors": [
      "Trinh T / community"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-07-25",
    "venue": "arXiv",
    "url": "https://arxiv.org/abs/2507.21235",
    "summary": "Discusses proof-grading and hallucination risks in IMO-style natural language",
    "candidate_bill": "Bill_4",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.75,
    "watchlist_tier": null,
    "notes": "Falsifiability and verification audit",
    "_appeared_in_sweeps": [
      "sweep_1202"
    ]
  },
  {
    "paper_id": "umol-2024",
    "title": "Structure prediction of protein-ligand complexes from sequence information with Umol",
    "authors": [
      "Bryant P",
      "No\u00e9 F"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-04-01",
    "venue": "Nature Communications",
    "url": "https://doi.org/10.1038/s41467-024-48137-z",
    "summary": "Sequence-only ligand pose predictor competing with AlphaFold3",
    "candidate_bill": "Bill_5",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "Used for cross-method consensus comparisons",
    "_appeared_in_sweeps": [
      "sweep_1201"
    ]
  },
  {
    "paper_id": "universal-claim-2025",
    "title": "What Would a Universal AI Scientist Require?",
    "authors": [
      "academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-06-01",
    "venue": "Nature Reviews Methods Primers",
    "url": "https://doi.org/10.1038/s43586-025-00321-8",
    "summary": "Conceptual analysis of universal AI scientist requirements",
    "candidate_bill": "Bill_11",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.8,
    "watchlist_tier": null,
    "notes": "Universal scientist gap analysis",
    "_appeared_in_sweeps": [
      "sweep_1207"
    ]
  },
  {
    "paper_id": "valence-2024-platform",
    "title": "Valence Labs Discovery Platform",
    "authors": [
      "Valence Labs / Recursion"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-10-01",
    "venue": "Valence blog",
    "url": "https://www.valencelabs.com/platform",
    "summary": "Drug discovery foundation model platform",
    "candidate_bill": "Bill_10",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.55,
    "watchlist_tier": null,
    "notes": "Vendor-card; lacks independent results",
    "_appeared_in_sweeps": [
      "sweep_1204"
    ]
  },
  {
    "paper_id": "valence-mol-2024",
    "title": "Molecular Foundation Model from Valence Labs",
    "authors": [
      "Valence Labs"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-12-01",
    "venue": "arXiv",
    "url": "https://arxiv.org/abs/2412.03245",
    "summary": "Molecular property foundation model",
    "candidate_bill": "Bill_1",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.6,
    "watchlist_tier": null,
    "notes": "Training overlap with public datasets",
    "_appeared_in_sweeps": [
      "sweep_1204"
    ]
  },
  {
    "paper_id": "wang-mlip-2024",
    "title": "Universal Machine Learning Interatomic Potentials: A Survey",
    "authors": [
      "Wang H",
      "et al"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-08-01",
    "venue": "JCP",
    "url": "https://doi.org/10.1063/5.0188543",
    "summary": "Survey of MLIPs (MACE, NequIP, M3GNet, CHGNet)",
    "candidate_bill": "Bill_5",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.78,
    "watchlist_tier": null,
    "notes": "Cross-method consensus reference",
    "_appeared_in_sweeps": [
      "sweep_1203"
    ]
  },
  {
    "paper_id": "weather-mt-2024",
    "title": "Toward Multi-Task Weather Foundation Models",
    "authors": [
      "academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-12-01",
    "venue": "arXiv",
    "url": "https://arxiv.org/abs/2412.07823",
    "summary": "Multi-task weather model combining nowcasting, medium-range, and seasonal",
    "candidate_bill": "Bill_11",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.65,
    "watchlist_tier": null,
    "notes": "Closer to universal physics scientist - weather domain only",
    "_appeared_in_sweeps": [
      "sweep_1205"
    ]
  },
  {
    "paper_id": "weatherbench-2-2024",
    "title": "WeatherBench 2: A Benchmark for the Next Generation of Data-Driven Global Weather Models",
    "authors": [
      "Rasp S",
      "Hoyer S",
      "Merose A",
      "et al"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-02-01",
    "venue": "JAMES",
    "url": "https://doi.org/10.1029/2023MS004019",
    "summary": "Standardized weather model benchmark with classical baselines",
    "candidate_bill": "Bill_5",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.88,
    "watchlist_tier": null,
    "notes": "Cross-method consensus piece",
    "_appeared_in_sweeps": [
      "sweep_1205"
    ]
  },
  {
    "paper_id": "wet-lab-claims-2024",
    "title": "Are Autonomous Labs Really Autonomous?",
    "authors": [
      "academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-12-01",
    "venue": "Nature Methods",
    "url": "https://doi.org/10.1038/s41592-024-02567-x",
    "summary": "Critical review of autonomy claims in self-driving labs",
    "candidate_bill": "Bill_13",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.82,
    "watchlist_tier": null,
    "notes": "Autonomy decomposition audit",
    "_appeared_in_sweeps": [
      "sweep_1206"
    ]
  },
  {
    "paper_id": "wytteveen-rebuttal-2024",
    "title": "GNoME, A-Lab and the State of AI Materials Discovery",
    "authors": [
      "Wytteveen academic group"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-06-01",
    "venue": "ChemRxiv",
    "url": "https://chemrxiv.org/engage/chemrxiv/article-details/65d6e0c0a7a4a83e6f3a2bb1",
    "summary": "Joint critique of GNoME and A-Lab claims",
    "candidate_bill": "Bill_4",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.88,
    "watchlist_tier": null,
    "notes": "Combined rebuttal",
    "_appeared_in_sweeps": [
      "sweep_1203"
    ]
  }
]