[
  {
    "paper_id": "aa-deepseek-r1-providers-2025",
    "title": "AA DeepSeek R1 provider variance",
    "authors": [
      "AA"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-02-15",
    "venue": "AA",
    "url": "https://artificialanalysis.ai/models/deepseek-r1",
    "summary": "DeepSeek R1 across providers shows variance, AISI/METR flagged quality differences.",
    "candidate_bill": "Bill_5",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.9,
    "watchlist_tier": null,
    "notes": "B5",
    "_appeared_in_sweeps": [
      "sweep_1308"
    ]
  },
  {
    "paper_id": "aa-deepseek-v3-providers-2025",
    "title": "AA DeepSeek V3 provider variance",
    "authors": [
      "AA"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-02-15",
    "venue": "AA",
    "url": "https://artificialanalysis.ai",
    "summary": "DeepSeek V3 hosted by 15+ providers. >10x variance.",
    "candidate_bill": "Bill_5",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.9,
    "watchlist_tier": null,
    "notes": "B5 closes",
    "_appeared_in_sweeps": [
      "sweep_1308"
    ]
  },
  {
    "paper_id": "aa-llama-3-providers-2024",
    "title": "ArtificialAnalysis Llama 3 provider leaderboard",
    "authors": [
      "ArtificialAnalysis.ai"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-08-15",
    "venue": "AA dashboard",
    "url": "https://artificialanalysis.ai/models/llama-3-70b-instruct/providers",
    "summary": "Real-time cross-provider latency/cost/output-quality. >5x variance.",
    "candidate_bill": "Bill_5",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.95,
    "watchlist_tier": null,
    "notes": "Third-party; closes B5 with strong evidence of empty space",
    "_appeared_in_sweeps": [
      "sweep_1308"
    ]
  },
  {
    "paper_id": "aa-multi-vendor-coverage-2025",
    "title": "AA multi-vendor coverage for Llama 4 / Qwen 3 / Mistral",
    "authors": [
      "AA"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-04-15",
    "venue": "AA",
    "url": "https://artificialanalysis.ai",
    "summary": "Universal coverage uneven across closed vendors.",
    "candidate_bill": "Bill_11",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.9,
    "watchlist_tier": null,
    "notes": "B11 \u2014 empty-space",
    "_appeared_in_sweeps": [
      "sweep_1308"
    ]
  },
  {
    "paper_id": "aa-pricing-history-2024",
    "title": "AA pricing history analysis Q4 2024",
    "authors": [
      "AA"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-12-15",
    "venue": "AA",
    "url": "https://artificialanalysis.ai",
    "summary": "Provider pricing volatility tracked. Same model price varies 10x.",
    "candidate_bill": "Bill_2",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.9,
    "watchlist_tier": null,
    "notes": "B2",
    "_appeared_in_sweeps": [
      "sweep_1308"
    ]
  },
  {
    "paper_id": "aa-provider-cost-variance-2025",
    "title": "AA Llama 3.3 70B provider cost analysis",
    "authors": [
      "AA"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-01-15",
    "venue": "AA",
    "url": "https://artificialanalysis.ai/models/llama-3-3-instruct-70b/providers",
    "summary": "Same model price varies $0.04 to $1.00 per M tokens.",
    "candidate_bill": "Bill_2",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.9,
    "watchlist_tier": null,
    "notes": "B2/B5",
    "_appeared_in_sweeps": [
      "sweep_1308"
    ]
  },
  {
    "paper_id": "aa-provider-quality-eval-2024",
    "title": "ArtificialAnalysis output quality comparison",
    "authors": [
      "AA"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-09-15",
    "venue": "AA",
    "url": "https://artificialanalysis.ai",
    "summary": "Some providers show slight output-quality differences for same model name.",
    "candidate_bill": "Bill_8",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.8,
    "watchlist_tier": null,
    "notes": "B8 \u2014 quantization differs across providers",
    "_appeared_in_sweeps": [
      "sweep_1308"
    ]
  },
  {
    "paper_id": "aa-tps-stability-2025",
    "title": "AA TPS stability cross-vendor 2025 Q1",
    "authors": [
      "AA"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-03-15",
    "venue": "AA",
    "url": "https://artificialanalysis.ai",
    "summary": "Quarterly variance >20% same model same provider.",
    "candidate_bill": "Bill_5",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.95,
    "watchlist_tier": null,
    "notes": "B5 \u2014 strong empty-space",
    "_appeared_in_sweeps": [
      "sweep_1308"
    ]
  },
  {
    "paper_id": "aa-tps-variance-2024",
    "title": "AA TPS day-over-day variance analysis",
    "authors": [
      "AA"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-12-15",
    "venue": "AA",
    "url": "https://artificialanalysis.ai",
    "summary": "Provider TPS varies 50-200% day-over-day for some closed vendors.",
    "candidate_bill": "Bill_5",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.9,
    "watchlist_tier": null,
    "notes": "B5 strong empty-space evidence",
    "_appeared_in_sweeps": [
      "sweep_1308"
    ]
  },
  {
    "paper_id": "aisi-deepseek-eval-2025",
    "title": "UK AISI DeepSeek model evaluation",
    "authors": [
      "UK AISI"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-02-15",
    "venue": "AISI",
    "url": "https://www.aisi.gov.uk",
    "summary": "Independent capability evaluation including provider-specific differences.",
    "candidate_bill": "Bill_8",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "B8",
    "_appeared_in_sweeps": [
      "sweep_1308"
    ]
  },
  {
    "paper_id": "aisi-frontier-eval-2024",
    "title": "AISI frontier model independence",
    "authors": [
      "UK AISI + US AISI"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-08-15",
    "venue": "AISI report",
    "url": "https://www.aisi.gov.uk",
    "summary": "Pre-deployment evaluation methodology. Vendor-self-eval independence.",
    "candidate_bill": "Bill_9",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "B9",
    "_appeared_in_sweeps": [
      "sweep_1308"
    ]
  },
  {
    "paper_id": "aisi-quant-eval-2024",
    "title": "UK AISI evaluating quantized models",
    "authors": [
      "UK AISI"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-10-15",
    "venue": "AISI report",
    "url": "https://www.aisi.gov.uk",
    "summary": "AISI evaluation methodology touches quantization. Notes capability drops under int4.",
    "candidate_bill": "Bill_8",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.8,
    "watchlist_tier": null,
    "notes": "B8 \u2014 third-party capability eval",
    "_appeared_in_sweeps": [
      "sweep_1305"
    ]
  },
  {
    "paper_id": "alibaba-pangu-2024",
    "title": "Alibaba Pangu Hanguang 800 NPU",
    "authors": [
      "Alibaba T-Head"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-09-15",
    "venue": "Alibaba conference",
    "url": "https://www.t-head.cn",
    "summary": "Internal cloud NPU. Limited external availability.",
    "candidate_bill": "Bill_10",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "M4 commercialization gap",
    "_appeared_in_sweeps": [
      "sweep_1303"
    ]
  },
  {
    "paper_id": "amazon-bedrock-inferentia-2024",
    "title": "Amazon Bedrock Claude 3 Haiku on Inferentia2",
    "authors": [
      "AWS"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-07-15",
    "venue": "AWS blog",
    "url": "https://aws.amazon.com/blogs/machine-learning/anthropic-claude-on-aws/",
    "summary": "Claude 3 Haiku served on Inferentia2. Cost-per-token not transparent.",
    "candidate_bill": "Bill_2",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.8,
    "watchlist_tier": null,
    "notes": "B2",
    "_appeared_in_sweeps": [
      "sweep_1303"
    ]
  },
  {
    "paper_id": "amd-mi300x-launch-2023",
    "title": "AMD Instinct MI300X launch",
    "authors": [
      "AMD"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2023-12-06",
    "venue": "AMD AI day",
    "url": "https://www.amd.com/en/products/accelerators/instinct/mi300/mi300x.html",
    "summary": "192GB HBM3. Claims 1.3x H100 on inference at lower price.",
    "candidate_bill": "Bill_1",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "M1 vendor-supplied; ROCm immaturity caveats",
    "_appeared_in_sweeps": [
      "sweep_1304"
    ]
  },
  {
    "paper_id": "amd-mi300x-mlperf-2024",
    "title": "AMD MI300X MLPerf Inference",
    "authors": [
      "AMD + MLCommons"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-09-04",
    "venue": "MLPerf",
    "url": "https://mlcommons.org",
    "summary": "MI300X submission Llama 2 70B. Underperforms H100 in some scenarios.",
    "candidate_bill": "Bill_9",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "Third-party-formatted but vendor-submitter",
    "_appeared_in_sweeps": [
      "sweep_1304"
    ]
  },
  {
    "paper_id": "amd-mi300x-mlperf-llama-2-2024",
    "title": "MI300X Llama 2 70B independent benchmark",
    "authors": [
      "TensorWave / Hot Aisle"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-09-15",
    "venue": "Cloud-provider blog",
    "url": "https://tensorwave.com",
    "summary": "Independent MI300X benchmarks. Shows variance vs vendor claims.",
    "candidate_bill": "Bill_5",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.8,
    "watchlist_tier": null,
    "notes": "B5 \u2014 variance",
    "_appeared_in_sweeps": [
      "sweep_1304"
    ]
  },
  {
    "paper_id": "amd-mi300x-power-2024",
    "title": "AMD MI300X 750W TDP",
    "authors": [
      "AMD datasheet"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2023-12-06",
    "venue": "AMD",
    "url": "https://www.amd.com",
    "summary": "750W TDP. Power-per-token measurements not published per task.",
    "candidate_bill": "Bill_13",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.75,
    "watchlist_tier": null,
    "notes": "B13",
    "_appeared_in_sweeps": [
      "sweep_1304"
    ]
  },
  {
    "paper_id": "amd-mi325x-2024",
    "title": "AMD MI325X launch",
    "authors": [
      "AMD"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-10-10",
    "venue": "AMD Advancing AI 2024",
    "url": "https://www.amd.com/en/newsroom/press-releases/2024-10-10-amd-extends-leadership-ai-portfolio.html",
    "summary": "256GB HBM3e on MI325X. Claims competitive with H200/B200.",
    "candidate_bill": "Bill_1",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "M1 vendor",
    "_appeared_in_sweeps": [
      "sweep_1304"
    ]
  },
  {
    "paper_id": "amd-mi350-roadmap-2024",
    "title": "AMD MI350 roadmap announcement",
    "authors": [
      "AMD"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-10-10",
    "venue": "AMD Advancing AI 2024",
    "url": "https://www.amd.com",
    "summary": "MI350/MI400 roadmap. 2025/2026 delivery. No product.",
    "candidate_bill": "Bill_10",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "M4 commercialization gap",
    "_appeared_in_sweeps": [
      "sweep_1304"
    ]
  },
  {
    "paper_id": "anthropic-bedrock-vs-trainium2-2024",
    "title": "Claude on Bedrock GPU vs Trainium2",
    "authors": [
      "AWS reports"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-12-15",
    "venue": "AWS blog",
    "url": "https://aws.amazon.com",
    "summary": "Trainium2 reportedly cheaper for Claude vs H100. Specifics undisclosed.",
    "candidate_bill": "Bill_7",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "B7 \u2014 closed silicon vs open GPU stack",
    "_appeared_in_sweeps": [
      "sweep_1307"
    ]
  },
  {
    "paper_id": "anthropic-claude-cost-2024",
    "title": "Anthropic Claude API pricing transparency",
    "authors": [
      "Anthropic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-10-15",
    "venue": "Anthropic pricing",
    "url": "https://www.anthropic.com/pricing",
    "summary": "Token pricing; underlying infra cost not disclosed.",
    "candidate_bill": "Bill_2",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "B2",
    "_appeared_in_sweeps": [
      "sweep_1308"
    ]
  },
  {
    "paper_id": "anthropic-trainium-2024-aws-deal",
    "title": "Anthropic Trainium2 commitment",
    "authors": [
      "AWS + Anthropic press"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-11-22",
    "venue": "AWS press release",
    "url": "https://www.anthropic.com/news/anthropic-amazon-trainium",
    "summary": "Anthropic commits to Project Rainier with up to 1M Trainium2 chips.",
    "candidate_bill": "Bill_2",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.8,
    "watchlist_tier": null,
    "notes": "Compute-procurement commitment; per-token cost economics not public",
    "_appeared_in_sweeps": [
      "sweep_1303"
    ]
  },
  {
    "paper_id": "anyscale-bench-2024-vllm-vs-tgi",
    "title": "Anyscale: vLLM vs TGI head-to-head",
    "authors": [
      "Anyscale Inc."
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-04-15",
    "venue": "Anyscale blog",
    "url": "https://www.anyscale.com/blog/continuous-batching-llm-inference",
    "summary": "Continuous batching benchmark vs TGI. Cross-framework variance shown.",
    "candidate_bill": "Bill_5",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "Self-interested (Anyscale ships vLLM); not third-party",
    "_appeared_in_sweeps": [
      "sweep_1301"
    ]
  },
  {
    "paper_id": "anyscale-cost-bench-2024",
    "title": "Anyscale LLM serving cost benchmark",
    "authors": [
      "Anyscale"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-06-15",
    "venue": "Anyscale blog",
    "url": "https://www.anyscale.com/blog/continuous-batching-llm-inference",
    "summary": "Cost comparisons across stacks. Self-interested.",
    "candidate_bill": "Bill_2",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.75,
    "watchlist_tier": null,
    "notes": "M2 self-interested",
    "_appeared_in_sweeps": [
      "sweep_1308"
    ]
  },
  {
    "paper_id": "apollo-research-quant-2024",
    "title": "Apollo Research: Hidden costs of quantization",
    "authors": [
      "Apollo Research"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-09-15",
    "venue": "Apollo blog",
    "url": "https://www.apolloresearch.ai",
    "summary": "Examines deception/scheming evals under FP4 vs FP16. Shows behavior changes.",
    "candidate_bill": "Bill_8",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.8,
    "watchlist_tier": null,
    "notes": "B8 \u2014 non-capability fidelity dimensions",
    "_appeared_in_sweeps": [
      "sweep_1305"
    ]
  },
  {
    "paper_id": "atom-low-bit-2024-isca",
    "title": "Atom: Low-bit Quantization for Efficient LLM Serving",
    "authors": [
      "Yilong Zhao",
      "Chien-Yu Lin",
      "Kan Zhu",
      "et al."
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-06-15",
    "venue": "MLSys 2024",
    "url": "https://arxiv.org/abs/2310.19102",
    "summary": "4-bit weight + 4-bit activation. Llama fidelity within 1-2% PPL.",
    "candidate_bill": "Bill_8",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "Peer-reviewed MLSys",
    "_appeared_in_sweeps": [
      "sweep_1305"
    ]
  },
  {
    "paper_id": "awq-2023-han-mit",
    "title": "AWQ: Activation-aware Weight Quantization for LLM",
    "authors": [
      "Ji Lin",
      "Jiaming Tang",
      "Haotian Tang",
      "Song Han",
      "et al."
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2023-06-15",
    "venue": "MLSys 2024",
    "url": "https://arxiv.org/abs/2306.00978",
    "summary": "INT4 weight-only quant. Claims minimal MMLU degradation on 7B-70B.",
    "candidate_bill": "Bill_8",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.9,
    "watchlist_tier": null,
    "notes": "Peer-reviewed; foundational",
    "_appeared_in_sweeps": [
      "sweep_1305"
    ]
  },
  {
    "paper_id": "aws-inferentia2-llama-2024",
    "title": "AWS Inferentia2 Llama benchmark",
    "authors": [
      "AWS"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-04-15",
    "venue": "AWS blog",
    "url": "https://aws.amazon.com/blogs/machine-learning/scaling-llm-inference-with-aws-inferentia2/",
    "summary": "Inferentia2 Llama 2 70B claims. Limited 2024 model coverage.",
    "candidate_bill": "Bill_1",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.8,
    "watchlist_tier": null,
    "notes": "M1 vendor; outdated model",
    "_appeared_in_sweeps": [
      "sweep_1303"
    ]
  },
  {
    "paper_id": "aws-inferentia3-2025-rumor",
    "title": "AWS Inferentia3 development reports",
    "authors": [
      "industry analysts"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-03-15",
    "venue": "SemiAnalysis + industry reports",
    "url": "https://www.semianalysis.com",
    "summary": "Reports of Inferentia3 in development. No public benchmark.",
    "candidate_bill": "Bill_10",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "M4 \u2014 research-preview gap",
    "_appeared_in_sweeps": [
      "sweep_1303"
    ]
  },
  {
    "paper_id": "aws-trainium-bedrock-claude-2024",
    "title": "Claude on Bedrock + Trainium2 launch",
    "authors": [
      "AWS + Anthropic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-12-03",
    "venue": "re:Invent 2024",
    "url": "https://www.anthropic.com/news/trainium2-and-distillation-on-bedrock",
    "summary": "Claude 3.5 Haiku on Trainium2. Throughput claims; cost-per-token implicit only.",
    "candidate_bill": "Bill_2",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.8,
    "watchlist_tier": null,
    "notes": "B2 cost-transparency gap",
    "_appeared_in_sweeps": [
      "sweep_1303"
    ]
  },
  {
    "paper_id": "aws-trainium2-2024-reinvent",
    "title": "AWS Trainium2 launch at re:Invent 2024",
    "authors": [
      "AWS Annapurna Labs"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-12-03",
    "venue": "AWS re:Invent 2024 keynote",
    "url": "https://aws.amazon.com/ec2/instance-types/trn2/",
    "summary": "Trainium2 with 4x perf vs Trainium1. Claims faster Llama 3.1 405B inference than H100.",
    "candidate_bill": "Bill_1",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "M1 vendor-supplied; no independent cross-cloud measurement",
    "_appeared_in_sweeps": [
      "sweep_1303"
    ]
  },
  {
    "paper_id": "azure-maia-aoai-2024",
    "title": "Azure OpenAI Service Maia migration",
    "authors": [
      "Microsoft Azure"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-06-15",
    "venue": "Microsoft Azure blog",
    "url": "https://azure.microsoft.com",
    "summary": "Reports of GPT-4 inference partially on Maia. No public benchmark.",
    "candidate_bill": "Bill_1",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "Internal-only; M5",
    "_appeared_in_sweeps": [
      "sweep_1303"
    ]
  },
  {
    "paper_id": "bentoml-bench-2024",
    "title": "BentoML LLM inference backend benchmark",
    "authors": [
      "BentoML"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-06-15",
    "venue": "BentoML blog",
    "url": "https://www.bentoml.com/blog/benchmarking-llm-inference-backends",
    "summary": "Direct apples-to-apples comparison. 5x variance.",
    "candidate_bill": "Bill_5",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "B5",
    "_appeared_in_sweeps": [
      "sweep_1308"
    ]
  },
  {
    "paper_id": "biren-br104-2024-discontinued",
    "title": "Biren BR104 update",
    "authors": [
      "Biren Technology"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-08-15",
    "venue": "Industry reports",
    "url": "https://www.birentech.com",
    "summary": "BR104 + BR200 roadmap. US sanctions impact.",
    "candidate_bill": "Bill_10",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "Commercial constraints",
    "_appeared_in_sweeps": [
      "sweep_1304"
    ]
  },
  {
    "paper_id": "blackwell-1200w-2024",
    "title": "Blackwell B200 1200W TDP",
    "authors": [
      "NVIDIA"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-03-18",
    "venue": "NVIDIA datasheet",
    "url": "https://www.nvidia.com",
    "summary": "B200 at 1200W per chip. Liquid cooling required. Efficiency-per-token unknown for real workloads.",
    "candidate_bill": "Bill_13",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.8,
    "watchlist_tier": null,
    "notes": "B13",
    "_appeared_in_sweeps": [
      "sweep_1304"
    ]
  },
  {
    "paper_id": "blackwell-mlperf-2024",
    "title": "Blackwell B200 MLPerf Inference v4.1",
    "authors": [
      "NVIDIA + MLCommons"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-09-04",
    "venue": "MLPerf",
    "url": "https://mlcommons.org/benchmarks/inference-datacenter/",
    "summary": "B200 submission. Claims 4x H100. Single submitter (NVIDIA).",
    "candidate_bill": "Bill_9",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "M1 vendor self-eval via MLPerf",
    "_appeared_in_sweeps": [
      "sweep_1304"
    ]
  },
  {
    "paper_id": "bytedance-trainium2-2025",
    "title": "ByteDance Trainium2 deal",
    "authors": [
      "AWS + ByteDance reports"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-01-15",
    "venue": "Bloomberg",
    "url": "https://www.bloomberg.com",
    "summary": "ByteDance commits to Trainium2. Workload details opaque.",
    "candidate_bill": "Bill_2",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.75,
    "watchlist_tier": null,
    "notes": "B2 transparency",
    "_appeared_in_sweeps": [
      "sweep_1303"
    ]
  },
  {
    "paper_id": "cambricon-mlu590-2024",
    "title": "Cambricon MLU590 / Siyuan 590 launch",
    "authors": [
      "Cambricon"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-06-15",
    "venue": "Cambricon press",
    "url": "https://www.cambricon.com",
    "summary": "Chinese-market accelerator. Limited LLM benchmarks.",
    "candidate_bill": "Bill_10",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "Commercial-availability gap outside China",
    "_appeared_in_sweeps": [
      "sweep_1304"
    ]
  },
  {
    "paper_id": "cerebras-aa-bench-2024",
    "title": "ArtificialAnalysis Cerebras Inference benchmark",
    "authors": [
      "ArtificialAnalysis.ai"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-09-05",
    "venue": "ArtificialAnalysis.ai blog",
    "url": "https://artificialanalysis.ai/providers/cerebras",
    "summary": "Confirms peak tok/s but shows high variance under load.",
    "candidate_bill": "Bill_5",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "Direct B5 evidence \u2014 variance well above 10%",
    "_appeared_in_sweeps": [
      "sweep_1302"
    ]
  },
  {
    "paper_id": "cerebras-deepseek-2025-distill",
    "title": "Cerebras DeepSeek R1 distilled 70B",
    "authors": [
      "Cerebras Systems"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-01-30",
    "venue": "Cerebras blog",
    "url": "https://cerebras.ai/blog/cerebras-becomes-the-worlds-fastest-host-for-deepseek-r1-distill-llama-70b",
    "summary": "Claims 1500+ tok/s on distilled R1 70B. Original DSV3 MoE not supported.",
    "candidate_bill": "Bill_11",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "MoE not supported by SRAM-only architecture; B11 empty-space supported",
    "_appeared_in_sweeps": [
      "sweep_1302"
    ]
  },
  {
    "paper_id": "cerebras-energy-efficiency-2024",
    "title": "Cerebras WSE-3 power efficiency claim",
    "authors": [
      "Cerebras"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-04-15",
    "venue": "Cerebras blog",
    "url": "https://cerebras.ai/blog/wafer-scale-engine-3",
    "summary": "Claims 100kW/system for 1800 tok/s. Independent verification absent.",
    "candidate_bill": "Bill_13",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.75,
    "watchlist_tier": null,
    "notes": "M1 vendor-supplied",
    "_appeared_in_sweeps": [
      "sweep_1302"
    ]
  },
  {
    "paper_id": "cerebras-inference-2024-tps-blog",
    "title": "Cerebras Inference 1800 tok/s Llama 3.1 70B",
    "authors": [
      "Cerebras Systems"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-08-27",
    "venue": "Cerebras blog",
    "url": "https://cerebras.ai/blog/introducing-cerebras-inference-ai-at-instant-speed",
    "summary": "Claims 1800 tok/s on Llama 3.1 70B, 450 tok/s on 405B.",
    "candidate_bill": "Bill_4",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.8,
    "watchlist_tier": null,
    "notes": "Vendor benchmark; ArtificialAnalysis later confirmed peak but flagged variance",
    "_appeared_in_sweeps": [
      "sweep_1302"
    ]
  },
  {
    "paper_id": "cerebras-vs-groq-vs-sambanova-leaderboard-2024",
    "title": "ArtificialAnalysis closed-vendor leaderboard 2024",
    "authors": [
      "AA"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-12-15",
    "venue": "AA",
    "url": "https://artificialanalysis.ai/providers",
    "summary": "Variance >10x across closed vendors on same model.",
    "candidate_bill": "Bill_5",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.9,
    "watchlist_tier": null,
    "notes": "B5 closure",
    "_appeared_in_sweeps": [
      "sweep_1307"
    ]
  },
  {
    "paper_id": "cerebras-vs-h200-aa-2024",
    "title": "Cerebras 1.8K tok/s vs H200 vLLM 100 tok/s",
    "authors": [
      "ArtificialAnalysis.ai"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-09-15",
    "venue": "AA",
    "url": "https://artificialanalysis.ai",
    "summary": "Cerebras dominates latency but available capacity is constrained.",
    "candidate_bill": "Bill_7",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "B7 \u2014 capacity vs latency tradeoff",
    "_appeared_in_sweeps": [
      "sweep_1307"
    ]
  },
  {
    "paper_id": "cerebras-vs-mlx-7b-2024",
    "title": "Cerebras 70B fast inference vs local MLX 7B",
    "authors": [
      "Various"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-09-15",
    "venue": "Community",
    "url": "https://artificialanalysis.ai",
    "summary": "Different design points; not directly comparable.",
    "candidate_bill": "Bill_7",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "B7 \u2014 design-point mismatch",
    "_appeared_in_sweeps": [
      "sweep_1307"
    ]
  },
  {
    "paper_id": "cerebras-wse3-2024-launch",
    "title": "Cerebras WSE-3 architecture announcement",
    "authors": [
      "Cerebras Systems"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-03-13",
    "venue": "Cerebras whitepaper",
    "url": "https://www.cerebras.ai/blog/wafer-scale-engine-3",
    "summary": "4 trillion transistors, 900,000 cores, 44 GB on-chip SRAM. Claims 8x Nvidia H100 perf.",
    "candidate_bill": "Bill_1",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "Vendor-only; no independent reproduction under matched comparison",
    "_appeared_in_sweeps": [
      "sweep_1302"
    ]
  },
  {
    "paper_id": "chunked-kv-cache-2024",
    "title": "Chunked KV cache offloading",
    "authors": [
      "Various vLLM contributors"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-08-15",
    "venue": "vLLM design docs",
    "url": "https://docs.vllm.ai",
    "summary": "Offload KV cache to CPU/disk. Latency tradeoff.",
    "candidate_bill": "Bill_6",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "B6 \u2014 latency cost; fidelity preserved",
    "_appeared_in_sweeps": [
      "sweep_1306"
    ]
  },
  {
    "paper_id": "deepseek-fp8-training-2024",
    "title": "DeepSeek V3 FP8 training and inference",
    "authors": [
      "DeepSeek-AI"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-12-26",
    "venue": "DeepSeek V3 technical report",
    "url": "https://arxiv.org/abs/2412.19437",
    "summary": "Trained natively in FP8. Fidelity verified at training time.",
    "candidate_bill": "Bill_8",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.9,
    "watchlist_tier": null,
    "notes": "Strong technical report",
    "_appeared_in_sweeps": [
      "sweep_1305"
    ]
  },
  {
    "paper_id": "deepseek-h800-2024-arxiv",
    "title": "DeepSeek H800 infrastructure paper",
    "authors": [
      "DeepSeek-AI"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-12-26",
    "venue": "arXiv 2412.19437",
    "url": "https://arxiv.org/abs/2412.19437",
    "summary": "DeepSeek V3 trained on H800 cluster. 2048 H800 GPUs. Cost-efficiency claims.",
    "candidate_bill": "Bill_2",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "Reasonable transparency claim",
    "_appeared_in_sweeps": [
      "sweep_1304"
    ]
  },
  {
    "paper_id": "deepseek-v3-h800-vs-groq-2025",
    "title": "DeepSeek V3 671B on H800 vs Cerebras R1 distill",
    "authors": [
      "DeepSeek / Cerebras / community"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-02-15",
    "venue": "Various",
    "url": "https://artificialanalysis.ai",
    "summary": "Open-stack H800 handles full 671B; Cerebras runs distilled 70B only.",
    "candidate_bill": "Bill_11",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "B7/B11 \u2014 architecture coverage favors open",
    "_appeared_in_sweeps": [
      "sweep_1307"
    ]
  },
  {
    "paper_id": "deepseek-v3-mla-2024",
    "title": "DeepSeek V3 Multi-head Latent Attention (MLA)",
    "authors": [
      "DeepSeek-AI"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-12-26",
    "venue": "DeepSeek tech report",
    "url": "https://arxiv.org/abs/2412.19437",
    "summary": "MLA compresses KV cache by 7x. Fidelity verified at training.",
    "candidate_bill": "Bill_6",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.9,
    "watchlist_tier": null,
    "notes": "Native attention design, not post-hoc compression",
    "_appeared_in_sweeps": [
      "sweep_1306"
    ]
  },
  {
    "paper_id": "deepspeed-mii-2024-microsoft",
    "title": "DeepSpeed-MII: Token-level scheduling for LLM serving",
    "authors": [
      "Microsoft DeepSpeed team"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2023-11-01",
    "venue": "Microsoft Research blog",
    "url": "https://github.com/microsoft/DeepSpeed-MII",
    "summary": "Persistent micro-batching. Claims 2.3x throughput vs vLLM baseline.",
    "candidate_bill": "Bill_5",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "Older comparison; vLLM since rewrote scheduler. Cross-framework variance documented unstable.",
    "_appeared_in_sweeps": [
      "sweep_1301"
    ]
  },
  {
    "paper_id": "distserve-2024-osdi",
    "title": "DistServe: Disaggregating Prefill and Decoding",
    "authors": [
      "Yinmin Zhong",
      "Shengyu Liu",
      "Junda Chen",
      "et al."
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-07-15",
    "venue": "OSDI 2024",
    "url": "https://arxiv.org/abs/2401.09670",
    "summary": "Goodput-oriented inference scheduling. Cross-vendor not measured.",
    "candidate_bill": "Bill_1",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "Independent academic, peer-reviewed; framework-agnostic implementation",
    "_appeared_in_sweeps": [
      "sweep_1301"
    ]
  },
  {
    "paper_id": "draft-model-fidelity-2024",
    "title": "Draft model fidelity in speculative decoding",
    "authors": [
      "Various academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-06-15",
    "venue": "arXiv 2405.04304",
    "url": "https://arxiv.org/abs/2405.04304",
    "summary": "Acceptance rate degrades for some long-tail domains.",
    "candidate_bill": "Bill_6",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.75,
    "watchlist_tier": null,
    "notes": "B6 \u2014 fidelity caveat",
    "_appeared_in_sweeps": [
      "sweep_1306"
    ]
  },
  {
    "paper_id": "dynamo-2025-gtc-keynote",
    "title": "NVIDIA Dynamo: AI Inference Operating System",
    "authors": [
      "NVIDIA"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-03-18",
    "venue": "NVIDIA GTC 2025 keynote + GitHub release",
    "url": "https://github.com/ai-dynamo/dynamo",
    "summary": "Disaggregated prefill/decode inference. Claims 30x throughput vs vLLM baseline on GB200 NVL72.",
    "candidate_bill": "Bill_1",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.8,
    "watchlist_tier": null,
    "notes": "Vendor self-eval on vendor hardware. Comparison config not matched.",
    "_appeared_in_sweeps": [
      "sweep_1301"
    ]
  },
  {
    "paper_id": "eagle-2024-icml",
    "title": "EAGLE: Speculative Sampling Requires Rethinking Feature Uncertainty",
    "authors": [
      "Yuhui Li",
      "Fangyun Wei",
      "Chao Zhang",
      "Hongyang Zhang"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-06-15",
    "venue": "ICML 2024",
    "url": "https://arxiv.org/abs/2401.15077",
    "summary": "Feature-level speculation. ~3x speedup on Llama 70B with full fidelity.",
    "candidate_bill": "Bill_6",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.95,
    "watchlist_tier": null,
    "notes": "Strong peer-reviewed",
    "_appeared_in_sweeps": [
      "sweep_1306"
    ]
  },
  {
    "paper_id": "eagle2-2024-arxiv",
    "title": "EAGLE-2: Dynamic Draft Trees",
    "authors": [
      "Yuhui Li",
      "Fangyun Wei",
      "Chao Zhang",
      "Hongyang Zhang"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-06-15",
    "venue": "arXiv 2406.16858",
    "url": "https://arxiv.org/abs/2406.16858",
    "summary": "Dynamic tree-based draft. 4x speedup. Lossless wrt target dist.",
    "candidate_bill": "Bill_6",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.9,
    "watchlist_tier": null,
    "notes": "B6 closure",
    "_appeared_in_sweeps": [
      "sweep_1306"
    ]
  },
  {
    "paper_id": "epoch-ai-compute-trends-2024",
    "title": "Epoch AI: Compute trends in frontier AI",
    "authors": [
      "Epoch AI"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-10-15",
    "venue": "Epoch AI report",
    "url": "https://epoch.ai/blog/training-compute-of-frontier-ai-models-grows-by-4-5x-per-year",
    "summary": "Compute scaling analysis. Inference cost data sparse.",
    "candidate_bill": "Bill_2",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "B2 partial",
    "_appeared_in_sweeps": [
      "sweep_1308"
    ]
  },
  {
    "paper_id": "epoch-ai-quant-cost-2024",
    "title": "Epoch AI: Quantization cost reductions and capability tradeoff",
    "authors": [
      "Epoch AI"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-09-15",
    "venue": "Epoch AI report",
    "url": "https://epoch.ai",
    "summary": "Analysis of cost-per-token reductions from quantization vs capability impact.",
    "candidate_bill": "Bill_2",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.8,
    "watchlist_tier": null,
    "notes": "Third-party; B2 closer",
    "_appeared_in_sweeps": [
      "sweep_1305"
    ]
  },
  {
    "paper_id": "epoch-deepseek-cost-2025",
    "title": "Epoch AI: DeepSeek V3 training cost analysis",
    "authors": [
      "Epoch AI"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-01-15",
    "venue": "Epoch AI",
    "url": "https://epoch.ai/blog/deepseek-v3-development",
    "summary": "DeepSeek V3 $6M claim analyzed. Includes inference at scale.",
    "candidate_bill": "Bill_2",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "B2",
    "_appeared_in_sweeps": [
      "sweep_1308"
    ]
  },
  {
    "paper_id": "epoch-frontier-cost-decline-2024",
    "title": "Epoch AI: GPT-3.5 to GPT-4o cost decline",
    "authors": [
      "Epoch AI"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-09-15",
    "venue": "Epoch AI",
    "url": "https://epoch.ai",
    "summary": "Per-token cost dropped 100x from GPT-3.5 to GPT-4o.",
    "candidate_bill": "Bill_2",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "B2 partial",
    "_appeared_in_sweeps": [
      "sweep_1308"
    ]
  },
  {
    "paper_id": "epoch-frontier-inference-2025",
    "title": "Epoch AI: Frontier inference cost projections",
    "authors": [
      "Epoch AI"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-03-15",
    "venue": "Epoch AI",
    "url": "https://epoch.ai",
    "summary": "Per-task inference cost trends for frontier models.",
    "candidate_bill": "Bill_2",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "B2",
    "_appeared_in_sweeps": [
      "sweep_1308"
    ]
  },
  {
    "paper_id": "epoch-gpu-availability-2024",
    "title": "Epoch AI: GPU supply analysis",
    "authors": [
      "Epoch AI"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-10-15",
    "venue": "Epoch AI",
    "url": "https://epoch.ai",
    "summary": "GPU supply chain analysis.",
    "candidate_bill": "Bill_10",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "B10",
    "_appeared_in_sweeps": [
      "sweep_1308"
    ]
  },
  {
    "paper_id": "epoch-inference-cost-2024",
    "title": "Epoch AI: Inference cost trends",
    "authors": [
      "Epoch AI"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-08-15",
    "venue": "Epoch AI",
    "url": "https://epoch.ai",
    "summary": "Inference cost declining ~10x/year. Hardware + software contributions.",
    "candidate_bill": "Bill_2",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "B2",
    "_appeared_in_sweeps": [
      "sweep_1308"
    ]
  },
  {
    "paper_id": "epoch-quantization-cost-2024",
    "title": "Epoch AI: Quantization driving inference cost down",
    "authors": [
      "Epoch AI"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-11-15",
    "venue": "Epoch AI",
    "url": "https://epoch.ai",
    "summary": "Quantization contributes ~30% of inference cost reduction.",
    "candidate_bill": "Bill_8",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.8,
    "watchlist_tier": null,
    "notes": "B8 third-party",
    "_appeared_in_sweeps": [
      "sweep_1308"
    ]
  },
  {
    "paper_id": "etched-funding-2024-primary",
    "title": "Etched $120M Series A",
    "authors": [
      "Etched press release"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-06-25",
    "venue": "Press releases",
    "url": "https://www.etched.com/announcing-etched",
    "summary": "Funding announcement; no production silicon or independent benchmark.",
    "candidate_bill": "Bill_10",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.9,
    "watchlist_tier": null,
    "notes": "Pre-product; cherry-pick risk extreme",
    "_appeared_in_sweeps": [
      "sweep_1302"
    ]
  },
  {
    "paper_id": "etched-paper-projection-2024",
    "title": "Etched Sohu vs B200 projection",
    "authors": [
      "Etched"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-06-25",
    "venue": "Etched blog",
    "url": "https://www.etched.com",
    "summary": "20x B200 cost-perf projection. Pre-silicon.",
    "candidate_bill": "Bill_7",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.9,
    "watchlist_tier": null,
    "notes": "B7 \u2014 vendor projection vs realized",
    "_appeared_in_sweeps": [
      "sweep_1307"
    ]
  },
  {
    "paper_id": "etched-sohu-2024-launch",
    "title": "Etched Sohu: transformer-specialized ASIC",
    "authors": [
      "Etched Labs"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-06-25",
    "venue": "Etched.com whitepaper",
    "url": "https://www.etched.com/announcing-etched",
    "summary": "Transformer-only ASIC. Claims 500K+ tok/s Llama 70B at 1/10 cost. No silicon shipped.",
    "candidate_bill": "Bill_1",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.9,
    "watchlist_tier": null,
    "notes": "Pre-silicon vendor projection; M4 commercialization-vs-research",
    "_appeared_in_sweeps": [
      "sweep_1302"
    ]
  },
  {
    "paper_id": "etched-sohu-hot-chips-2024",
    "title": "Etched Sohu architecture at Hot Chips 2024",
    "authors": [
      "Etched Labs"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-08-25",
    "venue": "Hot Chips 2024 talk",
    "url": "https://www.hotchips.org/hc36",
    "summary": "Transformer-only ASIC; no MoE/other arch support. Pre-silicon.",
    "candidate_bill": "Bill_11",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "Architecture-locked; universal-platform coverage fails",
    "_appeared_in_sweeps": [
      "sweep_1302"
    ]
  },
  {
    "paper_id": "etched-vs-blackwell-projection-2024",
    "title": "Etched Sohu vs Blackwell B200 hypothetical",
    "authors": [
      "Industry analyses"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-08-15",
    "venue": "SemiAnalysis",
    "url": "https://www.semianalysis.com",
    "summary": "Industry skepticism on Etched perf claims pre-silicon.",
    "candidate_bill": "Bill_7",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "B7 \u2014 closed vendor unverified",
    "_appeared_in_sweeps": [
      "sweep_1307"
    ]
  },
  {
    "paper_id": "etched-vs-h100-projection-2024",
    "title": "Etched Sohu vs H100 projection",
    "authors": [
      "Etched"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-06-25",
    "venue": "Etched whitepaper",
    "url": "https://www.etched.com",
    "summary": "Claims 20x H100 cost-perf. Pre-silicon, unverifiable.",
    "candidate_bill": "Bill_7",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.9,
    "watchlist_tier": null,
    "notes": "B7 \u2014 closed vendor pre-product cherry-pick",
    "_appeared_in_sweeps": [
      "sweep_1307"
    ]
  },
  {
    "paper_id": "exl2-quant-perplexity-2024",
    "title": "EXL2 quant ablation across model sizes",
    "authors": [
      "turboderp / community"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-06-15",
    "venue": "exllamav2 GitHub",
    "url": "https://github.com/turboderp/exllamav2",
    "summary": "Mixed-precision EXL2 perplexity by bits-per-weight.",
    "candidate_bill": "Bill_3",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.75,
    "watchlist_tier": null,
    "notes": "Community PPL only",
    "_appeared_in_sweeps": [
      "sweep_1305"
    ]
  },
  {
    "paper_id": "exllamav2-2024-turboderp",
    "title": "ExLlamaV2 EXL2 quantization format",
    "authors": [
      "turboderp"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-03-15",
    "venue": "ExLlamaV2 GitHub",
    "url": "https://github.com/turboderp/exllamav2",
    "summary": "Mixed-precision quant format. Community benchmarks limited to perplexity.",
    "candidate_bill": "Bill_3",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "Independent open-source; PPL-only fidelity reporting",
    "_appeared_in_sweeps": [
      "sweep_1301"
    ]
  },
  {
    "paper_id": "fastertransformer-2024-archive",
    "title": "FasterTransformer deprecation in favor of TensorRT-LLM",
    "authors": [
      "NVIDIA"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2023-12-01",
    "venue": "GitHub archive",
    "url": "https://github.com/NVIDIA/FasterTransformer",
    "summary": "Legacy NVIDIA inference framework, now archived. Inherits vendor-lock-in concerns.",
    "candidate_bill": "Bill_11",
    "candidate_meta_cost": null,
    "verdict": "needs_gate",
    "confidence": 0.8,
    "watchlist_tier": null,
    "notes": "M5 vendor-platform-only; not portable",
    "_appeared_in_sweeps": [
      "sweep_1301"
    ]
  },
  {
    "paper_id": "fireworks-vs-together-2024",
    "title": "Fireworks vs Together AI vs Anyscale cost",
    "authors": [
      "AA / community"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-11-15",
    "venue": "AA",
    "url": "https://artificialanalysis.ai/providers",
    "summary": "Same model 2-3x cost variance across hosts.",
    "candidate_bill": "Bill_5",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "B5",
    "_appeared_in_sweeps": [
      "sweep_1308"
    ]
  },
  {
    "paper_id": "flash-attention-3-2024",
    "title": "FlashAttention-3: Fast Async Attention",
    "authors": [
      "Jay Shah",
      "Ganesh Bikshandi",
      "Ying Zhang",
      "et al."
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-07-15",
    "venue": "arXiv 2407.08608",
    "url": "https://arxiv.org/abs/2407.08608",
    "summary": "FA3 with H100 async + FP8. Numerical fidelity claims maintained.",
    "candidate_bill": "Bill_6",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.9,
    "watchlist_tier": null,
    "notes": "Independent",
    "_appeared_in_sweeps": [
      "sweep_1306"
    ]
  },
  {
    "paper_id": "flashinfer-2024-yi-tay",
    "title": "FlashInfer: Efficient and Customizable Attention Engine for LLM Inference Serving",
    "authors": [
      "Zihao Ye",
      "Lequn Chen",
      "Ruihang Lai",
      "et al."
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-01-08",
    "venue": "arXiv 2401.08671",
    "url": "https://arxiv.org/abs/2401.08671",
    "summary": "Block-sparse attention kernels integrated into vLLM/SGLang. Reports 2.3x perf vs FlashAttention.",
    "candidate_bill": "Bill_6",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.9,
    "watchlist_tier": null,
    "notes": "Peer-reviewed, reproducible kernel benchmarks",
    "_appeared_in_sweeps": [
      "sweep_1301"
    ]
  },
  {
    "paper_id": "flexgen-2023-icml",
    "title": "FlexGen: High-Throughput Generative Inference with Single GPU",
    "authors": [
      "Ying Sheng",
      "Lianmin Zheng",
      "Binhang Yuan",
      "et al."
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2023-06-15",
    "venue": "ICML 2023",
    "url": "https://arxiv.org/abs/2303.06865",
    "summary": "Offloading-based inference for single-GPU large models. Pre-vLLM-era.",
    "candidate_bill": "Bill_4",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "Batch-only; streaming workload unsuitable. Historical baseline.",
    "_appeared_in_sweeps": [
      "sweep_1301"
    ]
  },
  {
    "paper_id": "fp4-blackwell-mxfp4-2024",
    "title": "MXFP4/MXFP6 Microscaling FP formats",
    "authors": [
      "Open Compute Project + NVIDIA"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-05-15",
    "venue": "OCP MX spec",
    "url": "https://www.opencompute.org/documents/ocp-microscaling-formats-mx-v1-0-spec-final-pdf",
    "summary": "OCP standard for FP4 microscaling. Used in Blackwell hardware.",
    "candidate_bill": "Bill_8",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.9,
    "watchlist_tier": null,
    "notes": "Spec; capability fidelity assessment still pending",
    "_appeared_in_sweeps": [
      "sweep_1305"
    ]
  },
  {
    "paper_id": "fp4-training-quant-2024",
    "title": "FP4 Training of LLMs",
    "authors": [
      "Various Microsoft + DeepSeek"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-10-15",
    "venue": "arXiv 2410.19313",
    "url": "https://arxiv.org/abs/2410.19313",
    "summary": "FP4 native training. Surprising fidelity claims.",
    "candidate_bill": "Bill_8",
    "candidate_meta_cost": null,
    "verdict": "needs_gate",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "G2 \u2014 claims strong but not yet replicated at scale",
    "_appeared_in_sweeps": [
      "sweep_1305"
    ]
  },
  {
    "paper_id": "ggml-2024-quant-ablation-readme",
    "title": "GGML quant format ablation summaries",
    "authors": [
      "ggml contributors"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-09-01",
    "venue": "ggml repo",
    "url": "https://github.com/ggerganov/ggml",
    "summary": "Aggregated community PPL measurements for Q2-Q8 quants. No reasoning-task fidelity.",
    "candidate_bill": "Bill_3",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.75,
    "watchlist_tier": null,
    "notes": "Community-led; not formal eval",
    "_appeared_in_sweeps": [
      "sweep_1301"
    ]
  },
  {
    "paper_id": "google-cloud-cost-models-2024",
    "title": "Google Cloud per-token Gemini pricing model",
    "authors": [
      "Google Cloud"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-09-15",
    "venue": "GCloud pricing",
    "url": "https://cloud.google.com/vertex-ai/generative-ai/pricing",
    "summary": "Token pricing for Gemini. Underlying infra cost not disclosed.",
    "candidate_bill": "Bill_2",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "B2",
    "_appeared_in_sweeps": [
      "sweep_1303"
    ]
  },
  {
    "paper_id": "google-ironwood-vs-trillium-2025",
    "title": "Ironwood vs Trillium TPU benchmarks",
    "authors": [
      "Google Cloud"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-04-09",
    "venue": "Google Cloud Next 2025",
    "url": "https://cloud.google.com/blog/products/compute/whats-new-with-ai-hypercomputer",
    "summary": "5x perf claim. Inference-only TPU. Internal benchmarks.",
    "candidate_bill": "Bill_9",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "M1 self-eval",
    "_appeared_in_sweeps": [
      "sweep_1303"
    ]
  },
  {
    "paper_id": "google-tpu-jetstream-2024",
    "title": "Google JetStream: TPU-native LLM serving",
    "authors": [
      "Google DeepMind / Cloud"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-04-15",
    "venue": "GitHub",
    "url": "https://github.com/AI-Hypercomputer/JetStream",
    "summary": "TPU-only inference engine. Targets Llama/Gemma on TPU v5e+.",
    "candidate_bill": "Bill_11",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "TPU-only; B11 fails",
    "_appeared_in_sweeps": [
      "sweep_1303"
    ]
  },
  {
    "paper_id": "google-vertex-gemini-vs-anthropic-2024",
    "title": "Vertex Gemini TPU vs Anthropic Claude on GPU/TPU",
    "authors": [
      "GCloud"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-09-15",
    "venue": "Vertex pricing",
    "url": "https://cloud.google.com",
    "summary": "Multiple inference paths; cross-vendor variance significant.",
    "candidate_bill": "Bill_5",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.75,
    "watchlist_tier": null,
    "notes": "B5",
    "_appeared_in_sweeps": [
      "sweep_1307"
    ]
  },
  {
    "paper_id": "google-vertex-vs-bedrock-2024",
    "title": "Vertex AI vs Bedrock model performance gaps",
    "authors": [
      "Various developer reports"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-09-15",
    "venue": "Developer forums",
    "url": "https://news.ycombinator.com",
    "summary": "Reports of latency/throughput differences between Vertex (TPU/GPU) and Bedrock (Inferentia/GPU).",
    "candidate_bill": "Bill_5",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "Cross-cloud variance documented anecdotally",
    "_appeared_in_sweeps": [
      "sweep_1303"
    ]
  },
  {
    "paper_id": "gptq-2023-frantar",
    "title": "GPTQ: Accurate Post-Training Quantization for Generative Pre-trained Transformers",
    "authors": [
      "Elias Frantar",
      "Saleh Ashkboos",
      "Torsten Hoefler",
      "Dan Alistarh"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2023-03-15",
    "venue": "ICLR 2023",
    "url": "https://arxiv.org/abs/2210.17323",
    "summary": "INT4/INT3 weight-only PTQ. Llama 65B INT4 with minimal perplexity drop.",
    "candidate_bill": "Bill_8",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.95,
    "watchlist_tier": null,
    "notes": "Peer-reviewed; baseline",
    "_appeared_in_sweeps": [
      "sweep_1305"
    ]
  },
  {
    "paper_id": "gqa-llama-2-2023",
    "title": "Grouped-Query Attention (GQA)",
    "authors": [
      "Joshua Ainslie",
      "James Lee-Thorp",
      "Michiel de Jong",
      "et al."
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2023-12-15",
    "venue": "EMNLP 2023",
    "url": "https://arxiv.org/abs/2305.13245",
    "summary": "GQA reduces KV cache by 8x. Fidelity-preserving in design.",
    "candidate_bill": "Bill_6",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.95,
    "watchlist_tier": null,
    "notes": "Native arch design; Llama 2/3 use",
    "_appeared_in_sweeps": [
      "sweep_1306"
    ]
  },
  {
    "paper_id": "graphcore-bow-2024-end-of-line",
    "title": "Graphcore Bow IPU end-of-line announcement",
    "authors": [
      "Graphcore"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-07-15",
    "venue": "Graphcore press",
    "url": "https://www.graphcore.ai",
    "summary": "Acquired by Softbank. Limited LLM inference success.",
    "candidate_bill": "Bill_10",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "Commercial-availability gap; closed shop",
    "_appeared_in_sweeps": [
      "sweep_1302"
    ]
  },
  {
    "paper_id": "groq-availability-vs-aws-2024",
    "title": "Groq availability vs AWS Bedrock",
    "authors": [
      "Industry"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-10-15",
    "venue": "Provider docs",
    "url": "https://aws.amazon.com/bedrock/",
    "summary": "Groq capacity-constrained; AWS Bedrock production-stable.",
    "candidate_bill": "Bill_10",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.8,
    "watchlist_tier": null,
    "notes": "B10 \u2014 commercial availability gap",
    "_appeared_in_sweeps": [
      "sweep_1307"
    ]
  },
  {
    "paper_id": "groq-batch-mode-2024",
    "title": "Groq batch API launch",
    "authors": [
      "Groq Inc."
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-11-15",
    "venue": "Groq blog",
    "url": "https://groq.com",
    "summary": "Batch API offered with cost reduction. Throughput-per-batch not characterized vs streaming.",
    "candidate_bill": "Bill_4",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.75,
    "watchlist_tier": null,
    "notes": "B4 partial \u2014 streaming vs batch differentiation but no fidelity comparison",
    "_appeared_in_sweeps": [
      "sweep_1302"
    ]
  },
  {
    "paper_id": "groq-batch-throughput-2024",
    "title": "Groq batch-vs-streaming behavior anecdotes",
    "authors": [
      "Various developer reports"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-09-01",
    "venue": "Developer forums",
    "url": "https://github.com/groq/groq-python/issues",
    "summary": "Developers report batched throughput well below single-stream marketing.",
    "candidate_bill": "Bill_4",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "Anecdotal but consistent; B4 batch-vs-streaming discrepancy",
    "_appeared_in_sweeps": [
      "sweep_1302"
    ]
  },
  {
    "paper_id": "groq-deepseek-r1-2025",
    "title": "Groq DeepSeek R1 distilled support",
    "authors": [
      "Groq Inc."
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-02-15",
    "venue": "Groq blog",
    "url": "https://groq.com/groqcloud-makes-deepseek-r1-distill-llama-70b-available/",
    "summary": "DeepSeek R1 distilled to Llama 70B for Groq. Original DSV3 not supported (MoE constraint).",
    "candidate_bill": "Bill_11",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "Universal coverage fails \u2014 model architecture constraints",
    "_appeared_in_sweeps": [
      "sweep_1302"
    ]
  },
  {
    "paper_id": "groq-energy-2024",
    "title": "Groq per-token energy claim",
    "authors": [
      "Groq Inc."
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-02-15",
    "venue": "Groq blog",
    "url": "https://wow.groq.com",
    "summary": "Claims 1-3 J/token. Independent verification absent.",
    "candidate_bill": "Bill_13",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.75,
    "watchlist_tier": null,
    "notes": "M1 vendor-supplied",
    "_appeared_in_sweeps": [
      "sweep_1302"
    ]
  },
  {
    "paper_id": "groq-llama-3.3-70b-2024",
    "title": "Groq Llama 3.3 70B benchmark",
    "authors": [
      "Groq Inc."
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-12-10",
    "venue": "Groq blog",
    "url": "https://groq.com/inferences-on-llama-3-3/",
    "summary": "Claims 276 tok/s on Llama 3.3 70B.",
    "candidate_bill": "Bill_1",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.8,
    "watchlist_tier": null,
    "notes": "Vendor-supplied; M1",
    "_appeared_in_sweeps": [
      "sweep_1302"
    ]
  },
  {
    "paper_id": "groq-llama3-tps-2024",
    "title": "Groq Llama 3 70B throughput claims",
    "authors": [
      "Groq Inc."
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-04-22",
    "venue": "Groq blog",
    "url": "https://wow.groq.com/groq-2-4x-faster-than-the-fastest/",
    "summary": "Claims 877 tok/s on Llama 3 70B. Single-stream latency-optimized.",
    "candidate_bill": "Bill_4",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.8,
    "watchlist_tier": null,
    "notes": "Single-stream; multi-tenant throughput economics unstated",
    "_appeared_in_sweeps": [
      "sweep_1302"
    ]
  },
  {
    "paper_id": "groq-llama4-2025-claim",
    "title": "Groq Llama 4 support announcement",
    "authors": [
      "Groq Inc."
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-04-15",
    "venue": "Groq blog",
    "url": "https://groq.com/llama-4-on-groq/",
    "summary": "Claims fastest Llama 4 inference. MoE architecture-specific limitations.",
    "candidate_bill": "Bill_11",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.8,
    "watchlist_tier": null,
    "notes": "Architecture coverage caveat; MoE may not run as efficiently as dense",
    "_appeared_in_sweeps": [
      "sweep_1302"
    ]
  },
  {
    "paper_id": "groq-lpu-2024-launch-paper",
    "title": "Groq LPU: Linear Processing Unit architecture",
    "authors": [
      "Groq Inc."
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-02-19",
    "venue": "Groq.com whitepaper",
    "url": "https://wow.groq.com/lpu-inference-engine/",
    "summary": "Single-chip deterministic inference. Claims 500+ tok/s on Llama 2 70B.",
    "candidate_bill": "Bill_1",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "Vendor whitepaper; M1 disqualification. Workload-narrow benchmark.",
    "_appeared_in_sweeps": [
      "sweep_1302"
    ]
  },
  {
    "paper_id": "groq-mixtral-2024-2x-claim",
    "title": "Groq Mixtral 8x7B claim",
    "authors": [
      "Groq Inc."
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-03-15",
    "venue": "Groq blog",
    "url": "https://wow.groq.com",
    "summary": "Claims 480 tok/s on Mixtral 8x7B.",
    "candidate_bill": "Bill_1",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.8,
    "watchlist_tier": null,
    "notes": "Single MoE model; cross-architecture coverage limited",
    "_appeared_in_sweeps": [
      "sweep_1302"
    ]
  },
  {
    "paper_id": "groq-no-spec-decode-2024",
    "title": "Groq deterministic batch=1 vs speculative debate",
    "authors": [
      "Groq engineering / blog"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-05-15",
    "venue": "Groq blog",
    "url": "https://wow.groq.com",
    "summary": "Groq's deterministic architecture doesn't use speculative decoding the same way.",
    "candidate_bill": "Bill_6",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "Architecture asymmetry",
    "_appeared_in_sweeps": [
      "sweep_1306"
    ]
  },
  {
    "paper_id": "groq-quant-2024-fp16-claim",
    "title": "Groq FP16-only inference vs FP8 competitors",
    "authors": [
      "Groq"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-06-15",
    "venue": "Groq materials",
    "url": "https://groq.com",
    "summary": "Groq runs FP16; claims fidelity advantage over FP8 competitors.",
    "candidate_bill": "Bill_8",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.75,
    "watchlist_tier": null,
    "notes": "M1 vendor framing",
    "_appeared_in_sweeps": [
      "sweep_1305"
    ]
  },
  {
    "paper_id": "groq-spec-decode-2024",
    "title": "Groq speculative decoding claim",
    "authors": [
      "Groq Inc."
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-07-15",
    "venue": "Groq blog",
    "url": "https://wow.groq.com/news_press/groq-speculative-decoding/",
    "summary": "Claims speculative decoding boosts throughput. Fidelity vs FP16 baseline not characterized.",
    "candidate_bill": "Bill_6",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.75,
    "watchlist_tier": null,
    "notes": "M1 vendor-supplied; fidelity audit absent",
    "_appeared_in_sweeps": [
      "sweep_1302"
    ]
  },
  {
    "paper_id": "groq-spec-decode-claim-2024",
    "title": "Groq + EAGLE speculative decoding claim",
    "authors": [
      "Groq"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-07-15",
    "venue": "Groq blog",
    "url": "https://groq.com",
    "summary": "Claims integration of EAGLE on LPU.",
    "candidate_bill": "Bill_6",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "M1 vendor",
    "_appeared_in_sweeps": [
      "sweep_1306"
    ]
  },
  {
    "paper_id": "groq-tractive-cost-2024",
    "title": "Groq commercial pricing model",
    "authors": [
      "Groq Inc."
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-06-15",
    "venue": "Groq pricing page",
    "url": "https://groq.com/pricing/",
    "summary": "Tokens per dollar quoted but underlying infra cost opaque.",
    "candidate_bill": "Bill_2",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "B2 cost transparency challenged",
    "_appeared_in_sweeps": [
      "sweep_1302"
    ]
  },
  {
    "paper_id": "groq-vs-cerebras-aa-leaderboard-2024",
    "title": "ArtificialAnalysis cross-provider leaderboard",
    "authors": [
      "ArtificialAnalysis.ai"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-12-15",
    "venue": "ArtificialAnalysis.ai",
    "url": "https://artificialanalysis.ai/models/llama-3-3-instruct-70b/providers",
    "summary": "Shows Groq, Cerebras, SambaNova within 2-3x of each other but with high variance day-over-day.",
    "candidate_bill": "Bill_5",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.9,
    "watchlist_tier": null,
    "notes": "Direct evidence B5 empty-space \u2014 variance well above 10%",
    "_appeared_in_sweeps": [
      "sweep_1302"
    ]
  },
  {
    "paper_id": "groq-vs-cerebras-llama3-2024",
    "title": "Groq Llama 3 70B vs Cerebras Llama 3.1 70B",
    "authors": [
      "AA"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-09-15",
    "venue": "ArtificialAnalysis",
    "url": "https://artificialanalysis.ai",
    "summary": "Cerebras 1.8K tok/s vs Groq 280 tok/s. Different design points.",
    "candidate_bill": "Bill_5",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.9,
    "watchlist_tier": null,
    "notes": "B5 \u2014 within closed-stack variance 6x",
    "_appeared_in_sweeps": [
      "sweep_1307"
    ]
  },
  {
    "paper_id": "groq-vs-h100-aa-2024",
    "title": "ArtificialAnalysis Groq vs H100 cross-vendor",
    "authors": [
      "ArtificialAnalysis.ai"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-05-15",
    "venue": "ArtificialAnalysis.ai",
    "url": "https://artificialanalysis.ai/providers",
    "summary": "Independent comparison showing Groq fast at small batch but variable across providers.",
    "candidate_bill": "Bill_5",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "Third-party measurement supporting B5 empty-space",
    "_appeared_in_sweeps": [
      "sweep_1302"
    ]
  },
  {
    "paper_id": "groq-vs-h100-comparison-2024",
    "title": "Groq vs H100 vs MI300X cross-vendor variance",
    "authors": [
      "Industry analyses"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-08-15",
    "venue": "Various",
    "url": "https://artificialanalysis.ai",
    "summary": "Aggregated cross-vendor data shows >10x variance in some configs.",
    "candidate_bill": "Bill_5",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "B5 closure",
    "_appeared_in_sweeps": [
      "sweep_1304"
    ]
  },
  {
    "paper_id": "groq-vs-sambanova-2024",
    "title": "Groq vs SambaNova head-to-head Llama 3.1 70B",
    "authors": [
      "AA"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-10-15",
    "venue": "AA",
    "url": "https://artificialanalysis.ai",
    "summary": "Groq 280 tok/s vs SambaNova 460 tok/s on Llama 3.1 70B.",
    "candidate_bill": "Bill_5",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "B5",
    "_appeared_in_sweeps": [
      "sweep_1307"
    ]
  },
  {
    "paper_id": "groq-vs-vllm-deepseek-r1-2025",
    "title": "Groq DSR1 distilled 70B vs vLLM full DSR1 671B",
    "authors": [
      "AA + community"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-02-15",
    "venue": "AA",
    "url": "https://artificialanalysis.ai",
    "summary": "Groq distill faster but smaller model; vLLM full reasoning trace.",
    "candidate_bill": "Bill_11",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "B7/B11 \u2014 open-stack coverage advantage",
    "_appeared_in_sweeps": [
      "sweep_1307"
    ]
  },
  {
    "paper_id": "groq-vs-vllm-h100-cost-2024",
    "title": "Cost-per-token Groq vs vLLM H100 comparison",
    "authors": [
      "TensorWave / industry"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-08-15",
    "venue": "Industry analyses",
    "url": "https://artificialanalysis.ai/providers",
    "summary": "Groq lists $0.59/M tokens; vLLM-providers $0.50-$0.80/M. Comparable at scale.",
    "candidate_bill": "Bill_2",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.8,
    "watchlist_tier": null,
    "notes": "B2/B7 \u2014 comparable but variable",
    "_appeared_in_sweeps": [
      "sweep_1307"
    ]
  },
  {
    "paper_id": "groq-vs-vllm-llama4-2025",
    "title": "Llama 4 Groq vs vLLM cross-vendor",
    "authors": [
      "AA + community"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-04-15",
    "venue": "ArtificialAnalysis",
    "url": "https://artificialanalysis.ai",
    "summary": "Groq Llama 4 fastest single-stream; vLLM H200 cheaper at scale.",
    "candidate_bill": "Bill_7",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "B7",
    "_appeared_in_sweeps": [
      "sweep_1307"
    ]
  },
  {
    "paper_id": "groq-vs-vllm-multilora-2024",
    "title": "Multi-tenant LoRA: Groq vs vLLM Punica",
    "authors": [
      "Industry comparison"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-09-15",
    "venue": "Various",
    "url": "https://github.com/vllm-project/vllm",
    "summary": "Groq doesn't support multi-LoRA same way; vLLM Punica enables it natively.",
    "candidate_bill": "Bill_11",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.8,
    "watchlist_tier": null,
    "notes": "B11 \u2014 universal coverage favors open",
    "_appeared_in_sweeps": [
      "sweep_1307"
    ]
  },
  {
    "paper_id": "groq-vs-vllm-streaming-2024",
    "title": "Groq vs vLLM streaming behavior under load",
    "authors": [
      "Various developer reports"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-10-15",
    "venue": "GitHub issues",
    "url": "https://github.com/vllm-project/vllm/issues",
    "summary": "Groq excels at single-stream; degrades under high concurrency. vLLM opposite.",
    "candidate_bill": "Bill_4",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.8,
    "watchlist_tier": null,
    "notes": "B4 \u2014 batch vs streaming asymmetry",
    "_appeared_in_sweeps": [
      "sweep_1307"
    ]
  },
  {
    "paper_id": "h100-fp8-llama-405b-perf-2024",
    "title": "NVIDIA H100 FP8 Llama 3.1 405B",
    "authors": [
      "NVIDIA"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-07-23",
    "venue": "NVIDIA blog",
    "url": "https://developer.nvidia.com/blog/scaling-llama-3-1-405b-with-tensorrt-llm/",
    "summary": "FP8 405B on 8x H100. Claims throughput. Fidelity vs FP16 not deeply measured.",
    "candidate_bill": "Bill_8",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.8,
    "watchlist_tier": null,
    "notes": "M1 vendor; B8 fidelity audit incomplete",
    "_appeared_in_sweeps": [
      "sweep_1304"
    ]
  },
  {
    "paper_id": "h100-vs-h200-mosaic-2024",
    "title": "Databricks MosaicML H200 measurements",
    "authors": [
      "Databricks/MosaicML"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-07-15",
    "venue": "Databricks blog",
    "url": "https://www.databricks.com/blog",
    "summary": "Third-party H200 inference for Llama 3 70B.",
    "candidate_bill": "Bill_7",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.8,
    "watchlist_tier": null,
    "notes": "Strong-baseline measurement",
    "_appeared_in_sweeps": [
      "sweep_1304"
    ]
  },
  {
    "paper_id": "h100-vs-h200-real-perf-2024",
    "title": "Independent H100 vs H200 Llama 3 70B benchmark",
    "authors": [
      "ArtificialAnalysis.ai"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-08-15",
    "venue": "AA + Modal",
    "url": "https://artificialanalysis.ai",
    "summary": "H200 shows ~1.5x throughput on Llama 3 70B (not 2x claimed).",
    "candidate_bill": "Bill_1",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "Third-party measurement closer to vendor claim minus 25%",
    "_appeared_in_sweeps": [
      "sweep_1304"
    ]
  },
  {
    "paper_id": "h200-vs-mi300x-aa-2024",
    "title": "ArtificialAnalysis H200 vs MI300X providers",
    "authors": [
      "ArtificialAnalysis.ai"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-10-15",
    "venue": "ArtificialAnalysis.ai",
    "url": "https://artificialanalysis.ai/providers",
    "summary": "Independent benchmark across providers. MI300X providers show wider variance.",
    "candidate_bill": "Bill_5",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "B5 third-party \u2014 variance documented",
    "_appeared_in_sweeps": [
      "sweep_1304"
    ]
  },
  {
    "paper_id": "h2o-kv-cache-2023",
    "title": "H2O: Heavy-Hitter Oracle for Efficient Generative Inference",
    "authors": [
      "Zhenyu Zhang",
      "Ying Sheng",
      "Tianyi Zhou",
      "et al."
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2023-12-15",
    "venue": "NeurIPS 2023",
    "url": "https://arxiv.org/abs/2306.14048",
    "summary": "Aggressive KV cache eviction. ~20% capability drop on long contexts.",
    "candidate_bill": "Bill_6",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.8,
    "watchlist_tier": null,
    "notes": "B6 \u2014 KV cache compression has fidelity cost",
    "_appeared_in_sweeps": [
      "sweep_1306"
    ]
  },
  {
    "paper_id": "huawei-ascend-910c-2024",
    "title": "Huawei Ascend 910C launch",
    "authors": [
      "Huawei"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-09-15",
    "venue": "Huawei Connect 2024",
    "url": "https://www.huawei.com/en/news",
    "summary": "China-domestic alternative to H100. Limited international availability.",
    "candidate_bill": "Bill_10",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.8,
    "watchlist_tier": null,
    "notes": "M4 \u2014 geo-restricted",
    "_appeared_in_sweeps": [
      "sweep_1304"
    ]
  },
  {
    "paper_id": "huawei-ascend-cluster-2024",
    "title": "Huawei Atlas Ascend 910C cluster announcement",
    "authors": [
      "Huawei"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-09-15",
    "venue": "Huawei Connect 2024",
    "url": "https://www.huawei.com",
    "summary": "Ascend 910C in datacenter cluster. China-only market.",
    "candidate_bill": "Bill_11",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.8,
    "watchlist_tier": null,
    "notes": "Coverage limited; B11 fails outside ecosystem",
    "_appeared_in_sweeps": [
      "sweep_1303"
    ]
  },
  {
    "paper_id": "huawei-ascend-r1-2025-claim",
    "title": "Huawei Ascend 910C running DeepSeek R1",
    "authors": [
      "Huawei"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-02-15",
    "venue": "Huawei press / SCMP",
    "url": "https://www.huawei.com",
    "summary": "Huawei claims Ascend 910C runs DeepSeek R1 at competitive throughput.",
    "candidate_bill": "Bill_1",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "M1; no independent verification",
    "_appeared_in_sweeps": [
      "sweep_1304"
    ]
  },
  {
    "paper_id": "huggingface-quanto-2024",
    "title": "HuggingFace Quanto FP8/INT8/INT4 toolkit",
    "authors": [
      "HuggingFace"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-04-15",
    "venue": "HF blog",
    "url": "https://huggingface.co/blog/quanto-introduction",
    "summary": "Quanto Python toolkit. Fidelity claims limited to standard benchmarks.",
    "candidate_bill": "Bill_8",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.75,
    "watchlist_tier": null,
    "notes": "M1 quasi-vendor; capability eval narrow",
    "_appeared_in_sweeps": [
      "sweep_1305"
    ]
  },
  {
    "paper_id": "kivi-kv-cache-2024",
    "title": "KIVI: A Tuning-Free Asymmetric 2bit Quantization for KV Cache",
    "authors": [
      "Zirui Liu",
      "Jiayi Yuan",
      "Hongye Jin",
      "et al."
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-02-15",
    "venue": "arXiv 2402.02750",
    "url": "https://arxiv.org/abs/2402.02750",
    "summary": "2-bit KV cache. 2.6x throughput. Fidelity claims.",
    "candidate_bill": "Bill_6",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "B6 partial",
    "_appeared_in_sweeps": [
      "sweep_1305"
    ]
  },
  {
    "paper_id": "ktransformers-2024-tsinghua",
    "title": "KTransformers: heterogeneous CPU+GPU inference",
    "authors": [
      "Tsinghua MADSys lab"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-08-01",
    "venue": "KTransformers GitHub",
    "url": "https://github.com/kvcache-ai/ktransformers",
    "summary": "Heterogeneous offload for MoE models. Claims DeepSeek-V2 on 24GB consumer GPU.",
    "candidate_bill": "Bill_4",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "Consumer-grade fidelity; streaming-vs-batch behavior under contention not measured",
    "_appeared_in_sweeps": [
      "sweep_1301"
    ]
  },
  {
    "paper_id": "kuleshov-q4-degradation-2024",
    "title": "Quantization degradation on reasoning tasks",
    "authors": [
      "Kuleshov et al. (Cornell)"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-09-15",
    "venue": "arXiv preprint",
    "url": "https://arxiv.org/abs/2410.09083",
    "summary": "INT4 quant shows ~10% drop on reasoning tasks vs FP16.",
    "candidate_bill": "Bill_8",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.8,
    "watchlist_tier": null,
    "notes": "B8 \u2014 strong evidence quant degrades real capability",
    "_appeared_in_sweeps": [
      "sweep_1305"
    ]
  },
  {
    "paper_id": "kv-cache-deepspeed-fastgen-2024",
    "title": "DeepSpeed-FastGen dynamic SplitFuse",
    "authors": [
      "Microsoft DeepSpeed"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2023-11-15",
    "venue": "Microsoft Research blog",
    "url": "https://github.com/microsoft/DeepSpeed",
    "summary": "Token-level scheduling with KV cache reuse.",
    "candidate_bill": "Bill_6",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.8,
    "watchlist_tier": null,
    "notes": "Vendor implementation",
    "_appeared_in_sweeps": [
      "sweep_1306"
    ]
  },
  {
    "paper_id": "kv-cache-quant-kvquant-2024",
    "title": "KVQuant: Towards 10 Million Context Length",
    "authors": [
      "Coleman Hooper",
      "Sehoon Kim",
      "Hiva Mohammadzadeh",
      "et al."
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-02-15",
    "venue": "arXiv 2401.18079",
    "url": "https://arxiv.org/abs/2401.18079",
    "summary": "Per-token + per-channel KV cache quant. 4-bit KV with minimal degradation.",
    "candidate_bill": "Bill_6",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "Independent",
    "_appeared_in_sweeps": [
      "sweep_1305"
    ]
  },
  {
    "paper_id": "kv-cache-storage-cost-2024",
    "title": "KV Cache as a percentage of inference cost",
    "authors": [
      "SemiAnalysis"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-09-15",
    "venue": "SemiAnalysis",
    "url": "https://www.semianalysis.com",
    "summary": "Estimates KV cache memory dominates inference cost at long context.",
    "candidate_bill": "Bill_2",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.8,
    "watchlist_tier": null,
    "notes": "B2 \u2014 cost transparency around KV cache",
    "_appeared_in_sweeps": [
      "sweep_1306"
    ]
  },
  {
    "paper_id": "kwon-2023-vllm-pagedattention-sosp",
    "title": "Efficient Memory Management for Large Language Model Serving with PagedAttention",
    "authors": [
      "Woosuk Kwon",
      "Zhuohan Li",
      "Siyuan Zhuang",
      "et al."
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2023-10-15",
    "venue": "SOSP 2023",
    "url": "https://arxiv.org/abs/2309.06180",
    "summary": "Original PagedAttention paper. Demonstrates 2-4x throughput improvements at matched latency.",
    "candidate_bill": "Bill_6",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.95,
    "watchlist_tier": null,
    "notes": "Peer-reviewed, widely reproduced. Baseline reference.",
    "_appeared_in_sweeps": [
      "sweep_1301"
    ]
  },
  {
    "paper_id": "lightmatter-2025-funding",
    "title": "Lightmatter Series D funding for Passage 3D",
    "authors": [
      "Lightmatter Inc."
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-10-15",
    "venue": "Lightmatter press release",
    "url": "https://lightmatter.co/news/lightmatter-passage-launch/",
    "summary": "Continued funding; no commercial LLM inference benchmark.",
    "candidate_bill": "Bill_10",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "Research-preview",
    "_appeared_in_sweeps": [
      "sweep_1302"
    ]
  },
  {
    "paper_id": "lightmatter-envise-2024",
    "title": "Lightmatter Envise: photonic inference chip",
    "authors": [
      "Lightmatter Inc."
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2023-12-15",
    "venue": "Lightmatter announcements",
    "url": "https://lightmatter.co/products/envise/",
    "summary": "Photonic compute claims; no public LLM benchmark.",
    "candidate_bill": "Bill_10",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "Research-preview gap",
    "_appeared_in_sweeps": [
      "sweep_1302"
    ]
  },
  {
    "paper_id": "lightmatter-passage-2024-natelectronics",
    "title": "Lightmatter Passage optical interconnect",
    "authors": [
      "Lightmatter Inc."
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-09-26",
    "venue": "Nature Electronics + Lightmatter blog",
    "url": "https://lightmatter.co/products/passage/",
    "summary": "Photonic interposer for high-bandwidth chip-to-chip. No commercial inference benchmark yet.",
    "candidate_bill": "Bill_10",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "M4 commercialization-vs-research; no available product",
    "_appeared_in_sweeps": [
      "sweep_1302"
    ]
  },
  {
    "paper_id": "llama-cpp-quant-tests-2024",
    "title": "llama.cpp K-quant accuracy ablation (community)",
    "authors": [
      "IK_llama and community"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-07-15",
    "venue": "llama.cpp wiki",
    "url": "https://github.com/ggerganov/llama.cpp/wiki/Tensor-Encoding-Schemes",
    "summary": "Q2_K through Q8_0 quantization with PPL deltas. Q4_K_M: ~3% PPL increase from FP16.",
    "candidate_bill": "Bill_3",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.75,
    "watchlist_tier": null,
    "notes": "PPL-only; not capability/reasoning eval",
    "_appeared_in_sweeps": [
      "sweep_1305"
    ]
  },
  {
    "paper_id": "llamacpp-2024-q4km-readme",
    "title": "llama.cpp: GGUF Q4_K_M quantization characterization",
    "authors": [
      "Georgi Gerganov",
      "llama.cpp contributors"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-08-15",
    "venue": "llama.cpp GitHub",
    "url": "https://github.com/ggerganov/llama.cpp/blob/master/examples/quantize/README.md",
    "summary": "Documents perplexity deltas for K-quant formats vs FP16 across Llama 2/3.",
    "candidate_bill": "Bill_3",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.75,
    "watchlist_tier": null,
    "notes": "Perplexity != downstream capability; many follow-up reports show large divergence on reasoning",
    "_appeared_in_sweeps": [
      "sweep_1301"
    ]
  },
  {
    "paper_id": "llamacpp-vs-mlx-vs-ollama-2024",
    "title": "Local inference comparison: llama.cpp vs MLX vs Ollama",
    "authors": [
      "Community benchmarks"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-09-15",
    "venue": "Hacker News / r/LocalLLaMA",
    "url": "https://news.ycombinator.com",
    "summary": "On M3 Max: MLX 1.3x faster than llama.cpp for 7B. Ollama wraps llama.cpp.",
    "candidate_bill": "Bill_7",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.75,
    "watchlist_tier": null,
    "notes": "B7 \u2014 open-stack variance",
    "_appeared_in_sweeps": [
      "sweep_1307"
    ]
  },
  {
    "paper_id": "llamacpp-vs-vllm-cpu-2024",
    "title": "llama.cpp CPU vs vLLM GPU at 7B",
    "authors": [
      "Community"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-08-15",
    "venue": "GitHub discussions",
    "url": "https://github.com/ggerganov/llama.cpp",
    "summary": "llama.cpp on Threadripper competitive with vLLM on consumer GPU at 7B.",
    "candidate_bill": "Bill_7",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "B7",
    "_appeared_in_sweeps": [
      "sweep_1307"
    ]
  },
  {
    "paper_id": "llm-compressor-vllm-fp8-2024",
    "title": "llm-compressor library (vLLM team)",
    "authors": [
      "vLLM team / Neural Magic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-10-15",
    "venue": "GitHub",
    "url": "https://github.com/vllm-project/llm-compressor",
    "summary": "FP8/INT4/INT8 quant library for vLLM. Recipe-based.",
    "candidate_bill": "Bill_8",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.75,
    "watchlist_tier": null,
    "notes": "Vendor toolkit",
    "_appeared_in_sweeps": [
      "sweep_1305"
    ]
  },
  {
    "paper_id": "llm-int8-2022-dettmers",
    "title": "LLM.int8(): 8-bit Matrix Multiplication",
    "authors": [
      "Tim Dettmers",
      "Mike Lewis",
      "Younes Belkada",
      "Luke Zettlemoyer"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2022-08-15",
    "venue": "NeurIPS 2022",
    "url": "https://arxiv.org/abs/2208.07339",
    "summary": "Foundational LLM int8 quant. Mixed-precision for outliers.",
    "candidate_bill": "Bill_8",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.95,
    "watchlist_tier": null,
    "notes": "Peer-reviewed baseline",
    "_appeared_in_sweeps": [
      "sweep_1305"
    ]
  },
  {
    "paper_id": "lmcache-2024-sglang-vllm",
    "title": "LMCache: Cross-instance KV caching",
    "authors": [
      "LMCache team"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-06-15",
    "venue": "arXiv + LMCache GitHub",
    "url": "https://github.com/LMCache/LMCache",
    "summary": "Persistent KV cache across vLLM/SGLang instances.",
    "candidate_bill": "Bill_6",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.8,
    "watchlist_tier": null,
    "notes": "Mechanism plausible; large-scale prod data not yet public",
    "_appeared_in_sweeps": [
      "sweep_1301"
    ]
  },
  {
    "paper_id": "lmdeploy-2024-internlm-rls",
    "title": "LMDeploy with TurboMind backend",
    "authors": [
      "InternLM team"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-08-01",
    "venue": "LMDeploy GitHub",
    "url": "https://github.com/InternLM/lmdeploy",
    "summary": "Claims faster than vLLM on InternLM models. Cross-vendor at matched workload not reported.",
    "candidate_bill": "Bill_5",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "Self-reported, vendor-favorable model selection",
    "_appeared_in_sweeps": [
      "sweep_1301"
    ]
  },
  {
    "paper_id": "lmstudio-2024-rls-readme",
    "title": "LM Studio runtime engines",
    "authors": [
      "LM Studio Inc."
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-10-01",
    "venue": "LM Studio docs",
    "url": "https://lmstudio.ai",
    "summary": "GUI wrapper over llama.cpp/MLX/Ollama. No published independent perf measurements.",
    "candidate_bill": "Bill_10",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "Marketing-grade benchmark transparency only",
    "_appeared_in_sweeps": [
      "sweep_1301"
    ]
  },
  {
    "paper_id": "lookahead-decoding-2024-mit",
    "title": "Lookahead Decoding for Parallel LLM Generation",
    "authors": [
      "Yichao Fu",
      "Peter Bailis",
      "Ion Stoica",
      "Hao Zhang"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-02-15",
    "venue": "ICML 2024",
    "url": "https://arxiv.org/abs/2402.02057",
    "summary": "No draft model needed. Parallel n-gram speculative.",
    "candidate_bill": "Bill_6",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "Independent",
    "_appeared_in_sweeps": [
      "sweep_1306"
    ]
  },
  {
    "paper_id": "lookahead-spec-decode-mlx-2024",
    "title": "MLX speculative decoding integration",
    "authors": [
      "Apple ML"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-09-15",
    "venue": "MLX examples",
    "url": "https://github.com/ml-explore/mlx-examples",
    "summary": "MLX supports speculative decoding. Apple Silicon-only.",
    "candidate_bill": "Bill_11",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.75,
    "watchlist_tier": null,
    "notes": "B11 \u2014 platform-limited",
    "_appeared_in_sweeps": [
      "sweep_1306"
    ]
  },
  {
    "paper_id": "marlin-vllm-int4-2024",
    "title": "Marlin: Mixed Precision Inference Kernels",
    "authors": [
      "IST Austria / Alistarh group"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-04-15",
    "venue": "arXiv 2408.11743",
    "url": "https://arxiv.org/abs/2408.11743",
    "summary": "INT4 W4A16 kernels for A100/H100. Throughput claims.",
    "candidate_bill": "Bill_8",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "Academic",
    "_appeared_in_sweeps": [
      "sweep_1305"
    ]
  },
  {
    "paper_id": "medusa-2024-mlsys",
    "title": "Medusa: Simple LLM Inference Acceleration Framework",
    "authors": [
      "Tianle Cai",
      "Yuhong Li",
      "Zhengyang Geng",
      "et al."
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-01-15",
    "venue": "arXiv 2401.10774",
    "url": "https://arxiv.org/abs/2401.10774",
    "summary": "Multi-head speculative draft. Self-draft. 2.3x speedup.",
    "candidate_bill": "Bill_6",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.9,
    "watchlist_tier": null,
    "notes": "Independent; Vicuna eval",
    "_appeared_in_sweeps": [
      "sweep_1306"
    ]
  },
  {
    "paper_id": "meta-mtia-v2-2024",
    "title": "Meta MTIA v2 inference chip",
    "authors": [
      "Meta"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-04-10",
    "venue": "Meta engineering blog",
    "url": "https://ai.meta.com/blog/next-generation-meta-training-inference-accelerator-AI-MTIA/",
    "summary": "Internal-only inference chip for Meta workloads.",
    "candidate_bill": "Bill_1",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "M5 platform-lock; M1 self-eval",
    "_appeared_in_sweeps": [
      "sweep_1303"
    ]
  },
  {
    "paper_id": "metr-frontier-2025",
    "title": "METR frontier model autonomy evaluations",
    "authors": [
      "METR"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-03-15",
    "venue": "METR",
    "url": "https://metr.org",
    "summary": "Quantization-impact on agentic task completion.",
    "candidate_bill": "Bill_8",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "B8",
    "_appeared_in_sweeps": [
      "sweep_1308"
    ]
  },
  {
    "paper_id": "metr-quant-eval-2024",
    "title": "METR: Evaluating quantized model capabilities",
    "authors": [
      "METR (Megan Kinniment et al.)"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-08-15",
    "venue": "METR blog",
    "url": "https://metr.org",
    "summary": "Quantization impact on agentic capability. Reveals downstream degradation invisible to MMLU.",
    "candidate_bill": "Bill_8",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "B8 \u2014 fidelity audit shows quant degrades agentic capability",
    "_appeared_in_sweeps": [
      "sweep_1305"
    ]
  },
  {
    "paper_id": "mi300x-vs-h100-semianalysis-2024",
    "title": "SemiAnalysis: MI300X vs H100 real-world",
    "authors": [
      "SemiAnalysis (Dylan Patel)"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-12-15",
    "venue": "SemiAnalysis Substack",
    "url": "https://www.semianalysis.com/p/mi300x-vs-h100-vs-h200-benchmark",
    "summary": "Independent extensive benchmark. MI300X advertised but ROCm gaps in real workload.",
    "candidate_bill": "Bill_7",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.95,
    "watchlist_tier": null,
    "notes": "Strong third-party analysis; partially closes B7 with caveats",
    "_appeared_in_sweeps": [
      "sweep_1304"
    ]
  },
  {
    "paper_id": "microsoft-maia-100-2023-ignite",
    "title": "Microsoft Maia 100 launch",
    "authors": [
      "Microsoft Azure"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2023-11-15",
    "venue": "Microsoft Ignite 2023",
    "url": "https://news.microsoft.com/source/features/ai/in-house-chips-silicon-to-service-to-meet-ai-demand/",
    "summary": "First Microsoft AI accelerator. Internal use for OpenAI workloads.",
    "candidate_bill": "Bill_1",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "M1 vendor-only; M5 platform-lock",
    "_appeared_in_sweeps": [
      "sweep_1303"
    ]
  },
  {
    "paper_id": "microsoft-maia-200-2025-rumor",
    "title": "Microsoft Maia 200 (Athena) reports",
    "authors": [
      "industry analysts"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-02-15",
    "venue": "The Information + SemiAnalysis",
    "url": "https://www.theinformation.com",
    "summary": "Reports of Maia 200 in development. No public benchmark.",
    "candidate_bill": "Bill_10",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "Research-preview",
    "_appeared_in_sweeps": [
      "sweep_1303"
    ]
  },
  {
    "paper_id": "minference-2024-microsoft",
    "title": "MInference: Dynamic Sparse Attention",
    "authors": [
      "Huiqiang Jiang",
      "Yucheng Li",
      "Chengruidong Zhang",
      "et al."
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-07-15",
    "venue": "NeurIPS 2024",
    "url": "https://arxiv.org/abs/2407.02490",
    "summary": "Dynamic sparse attention for long context. ~10x speedup.",
    "candidate_bill": "Bill_6",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "B6",
    "_appeared_in_sweeps": [
      "sweep_1306"
    ]
  },
  {
    "paper_id": "minicache-kv-2024",
    "title": "MiniCache: KV Cache Compression in Depth",
    "authors": [
      "Akide Liu",
      "Jing Liu",
      "Zizheng Pan",
      "et al."
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-05-15",
    "venue": "arXiv 2405.14366",
    "url": "https://arxiv.org/abs/2405.14366",
    "summary": "Layer-merging KV cache. 5x KV cache reduction.",
    "candidate_bill": "Bill_6",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "B6",
    "_appeared_in_sweeps": [
      "sweep_1306"
    ]
  },
  {
    "paper_id": "mlc-mlx-quant-2024",
    "title": "MLX 4-bit quant on Apple Silicon",
    "authors": [
      "Apple ML"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-04-15",
    "venue": "MLX docs",
    "url": "https://github.com/ml-explore/mlx-examples",
    "summary": "4-bit quant for on-device Apple Silicon. Limited fidelity audit.",
    "candidate_bill": "Bill_8",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "B8",
    "_appeared_in_sweeps": [
      "sweep_1305"
    ]
  },
  {
    "paper_id": "mlperf-inf-v4.1-tpu-2024",
    "title": "MLPerf Inference v4.1 TPU submissions",
    "authors": [
      "MLCommons"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-09-04",
    "venue": "MLPerf",
    "url": "https://mlcommons.org/benchmarks/inference-datacenter/",
    "summary": "Google submitted TPU v5e results. Closed division.",
    "candidate_bill": "Bill_9",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "Vendor-submitter bias",
    "_appeared_in_sweeps": [
      "sweep_1303"
    ]
  },
  {
    "paper_id": "mlperf-inference-v4-1-submitters-2024",
    "title": "MLPerf v4.1 submitter analysis",
    "authors": [
      "MLCommons"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-09-04",
    "venue": "MLPerf",
    "url": "https://mlcommons.org",
    "summary": "Most submissions by NVIDIA, AMD, Intel-collaborators. Closed vendors absent.",
    "candidate_bill": "Bill_9",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.9,
    "watchlist_tier": null,
    "notes": "B9 \u2014 coverage skewed",
    "_appeared_in_sweeps": [
      "sweep_1308"
    ]
  },
  {
    "paper_id": "mlperf-inference-v5-2025",
    "title": "MLPerf Inference v5.0 results",
    "authors": [
      "MLCommons"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-04-02",
    "venue": "MLPerf",
    "url": "https://mlcommons.org/benchmarks/inference-datacenter/",
    "summary": "v5.0 includes Llama 3.1 405B. NVIDIA Blackwell dominant.",
    "candidate_bill": "Bill_9",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.9,
    "watchlist_tier": null,
    "notes": "B9 \u2014 vendor self-eval dominance",
    "_appeared_in_sweeps": [
      "sweep_1308"
    ]
  },
  {
    "paper_id": "mlperf-llama2-70b-2024-vendor-coverage",
    "title": "MLPerf Llama 2 70B vendor coverage gap",
    "authors": [
      "MLCommons"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-09-04",
    "venue": "MLPerf",
    "url": "https://mlcommons.org",
    "summary": "Many vendors absent from Llama 2 70B server scenario.",
    "candidate_bill": "Bill_9",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.9,
    "watchlist_tier": null,
    "notes": "B9",
    "_appeared_in_sweeps": [
      "sweep_1308"
    ]
  },
  {
    "paper_id": "mlperf-power-2024",
    "title": "MLPerf Inference Power category",
    "authors": [
      "MLCommons"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-09-04",
    "venue": "MLPerf",
    "url": "https://mlcommons.org",
    "summary": "Power category limited submitters; standardization difficult.",
    "candidate_bill": "Bill_13",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "B13 audit gap",
    "_appeared_in_sweeps": [
      "sweep_1308"
    ]
  },
  {
    "paper_id": "mlx-2024-apple-paper-arxiv",
    "title": "MLX: A unified array framework for Apple silicon",
    "authors": [
      "Apple ML Research"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-03-12",
    "venue": "Apple ML Research blog + GitHub",
    "url": "https://github.com/ml-explore/mlx",
    "summary": "Unified memory framework, lazy compute, M2/M3 optimization. No cross-vendor variance metrics published.",
    "candidate_bill": "Bill_11",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "Apple-silicon-only; not directly comparable to CUDA stack at matched compute",
    "_appeared_in_sweeps": [
      "sweep_1301"
    ]
  },
  {
    "paper_id": "mlx-vs-llamacpp-q4_k_m-2024",
    "title": "MLX Q4 vs llama.cpp Q4_K_M on M3 Max",
    "authors": [
      "Community"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-09-15",
    "venue": "GitHub discussions",
    "url": "https://github.com/ml-explore/mlx",
    "summary": "MLX 1.2-1.5x faster than llama.cpp on same hardware.",
    "candidate_bill": "Bill_5",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.75,
    "watchlist_tier": null,
    "notes": "B5",
    "_appeared_in_sweeps": [
      "sweep_1307"
    ]
  },
  {
    "paper_id": "mlx-vs-vllm-7b-2024",
    "title": "Local MLX vs cloud vLLM 7B comparison",
    "authors": [
      "Community"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-08-15",
    "venue": "Hacker News",
    "url": "https://news.ycombinator.com",
    "summary": "MLX on M3 Max comparable to cloud H100 for batch=1 7B.",
    "candidate_bill": "Bill_7",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "B7 \u2014 surprising; reproducible",
    "_appeared_in_sweeps": [
      "sweep_1307"
    ]
  },
  {
    "paper_id": "model-extraction-attack-2024",
    "title": "Model extraction attacks via inference APIs",
    "authors": [
      "Various security researchers"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-07-15",
    "venue": "arXiv 2407.06146",
    "url": "https://arxiv.org/abs/2407.06146",
    "summary": "Model extraction via inference API logit access.",
    "candidate_bill": "Bill_13",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "B13 \u2014 safety/extraction angle",
    "_appeared_in_sweeps": [
      "sweep_1308"
    ]
  },
  {
    "paper_id": "modular-vs-vllm-vs-trt-2024",
    "title": "Modular MAX vs vLLM vs TRT-LLM",
    "authors": [
      "Modular Inc."
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-09-15",
    "venue": "Modular blog",
    "url": "https://www.modular.com",
    "summary": "Self-promotional; cross-framework variance documented.",
    "candidate_bill": "Bill_5",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "M1 self-interested but data documented",
    "_appeared_in_sweeps": [
      "sweep_1308"
    ]
  },
  {
    "paper_id": "moonshot-deepseek-h800-2024",
    "title": "Moonshot/Mooncake on H800 (export-controlled)",
    "authors": [
      "Moonshot AI"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-06-24",
    "venue": "arXiv",
    "url": "https://arxiv.org/abs/2407.00079",
    "summary": "H800 fleet for Kimi. Cross-hardware portability not benchmarked.",
    "candidate_bill": "Bill_11",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.75,
    "watchlist_tier": null,
    "notes": "Single-architecture",
    "_appeared_in_sweeps": [
      "sweep_1304"
    ]
  },
  {
    "paper_id": "moreh-amd-2024-llama-3",
    "title": "Moreh AMD MI300X Llama 3 70B benchmark",
    "authors": [
      "Moreh AI"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-07-15",
    "venue": "Moreh blog",
    "url": "https://moreh.io",
    "summary": "Third-party MI300X inference measurements.",
    "candidate_bill": "Bill_7",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.75,
    "watchlist_tier": null,
    "notes": "Limited third-party data",
    "_appeared_in_sweeps": [
      "sweep_1304"
    ]
  },
  {
    "paper_id": "moreh-mi300-2024-benchmark",
    "title": "Moreh + AMD Llama 3 405B run",
    "authors": [
      "Moreh + AMD"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-08-15",
    "venue": "Moreh blog",
    "url": "https://moreh.io",
    "summary": "Joint MI300X 405B run. Vendor-friendly.",
    "candidate_bill": "Bill_1",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "M1",
    "_appeared_in_sweeps": [
      "sweep_1304"
    ]
  },
  {
    "paper_id": "negative-results-llm-bench-2024",
    "title": "Negative-results LLM benchmark hardware analysis",
    "authors": [
      "Various academic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-10-15",
    "venue": "Workshop on negative results",
    "url": "https://arxiv.org",
    "summary": "Highlights variability of LLM benchmark results across hardware.",
    "candidate_bill": "Bill_1",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.75,
    "watchlist_tier": null,
    "notes": "B1",
    "_appeared_in_sweeps": [
      "sweep_1308"
    ]
  },
  {
    "paper_id": "neuralmagic-fp8-llama-2024",
    "title": "Neural Magic: FP8 W8A8 Llama 3 fidelity",
    "authors": [
      "Neural Magic / Red Hat"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-07-15",
    "venue": "Neural Magic blog",
    "url": "https://neuralmagic.com",
    "summary": "FP8 quant of Llama 3.1 70B and 405B. Claims <1% drop on standard benchmarks.",
    "candidate_bill": "Bill_8",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.8,
    "watchlist_tier": null,
    "notes": "M1 vendor; benchmark-narrow",
    "_appeared_in_sweeps": [
      "sweep_1305"
    ]
  },
  {
    "paper_id": "neuron-sdk-2024-aws-rls",
    "title": "AWS Neuron SDK for Trainium/Inferentia",
    "authors": [
      "AWS Annapurna"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-09-15",
    "venue": "AWS Neuron docs",
    "url": "https://awsdocs-neuron.readthedocs-hosted.com",
    "summary": "AWS-specific compiler/runtime. Llama 3 70B support. No cross-cloud portability.",
    "candidate_bill": "Bill_11",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "Universal-platform fails \u2014 AWS-lock",
    "_appeared_in_sweeps": [
      "sweep_1303"
    ]
  },
  {
    "paper_id": "nvidia-b200-shipping-2024-delays",
    "title": "Blackwell B200 shipping delays reports",
    "authors": [
      "The Information / Reuters"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-08-15",
    "venue": "The Information",
    "url": "https://www.theinformation.com",
    "summary": "Reports of 3-month Blackwell shipping delays due to design flaw.",
    "candidate_bill": "Bill_10",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "M4 commercialization-vs-research-preview",
    "_appeared_in_sweeps": [
      "sweep_1304"
    ]
  },
  {
    "paper_id": "nvidia-blackwell-b100-2024-gtc",
    "title": "NVIDIA Blackwell B100/B200 GTC 2024 launch",
    "authors": [
      "NVIDIA"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-03-18",
    "venue": "NVIDIA GTC 2024",
    "url": "https://nvidianews.nvidia.com/news/nvidia-blackwell-platform-arrives-to-power-a-new-era-of-computing",
    "summary": "Blackwell B100 (192GB) / B200 (192GB) with FP4 native. Claims 30x training perf, 25x inference.",
    "candidate_bill": "Bill_1",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "M1 vendor-supplied; first-deliverable Q4 2024",
    "_appeared_in_sweeps": [
      "sweep_1304"
    ]
  },
  {
    "paper_id": "nvidia-deepseek-r1-h200-2025",
    "title": "NVIDIA NIM DeepSeek R1 671B on H200",
    "authors": [
      "NVIDIA"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-01-31",
    "venue": "NVIDIA blog",
    "url": "https://blogs.nvidia.com/blog/deepseek-r1-nim-microservice/",
    "summary": "Full 671B DeepSeek R1 on 8x H200. Vendor benchmark.",
    "candidate_bill": "Bill_1",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "M1 vendor self-eval",
    "_appeared_in_sweeps": [
      "sweep_1304"
    ]
  },
  {
    "paper_id": "nvidia-fp8-fidelity-claim-blog",
    "title": "NVIDIA FP8 fidelity case studies",
    "authors": [
      "NVIDIA"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-06-15",
    "venue": "NVIDIA dev blog",
    "url": "https://developer.nvidia.com",
    "summary": "Aggregated FP8 fidelity claims. MMLU-centric.",
    "candidate_bill": "Bill_8",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.8,
    "watchlist_tier": null,
    "notes": "M1; downstream reasoning fidelity not measured",
    "_appeared_in_sweeps": [
      "sweep_1304"
    ]
  },
  {
    "paper_id": "nvidia-gb200-nvl72-2024",
    "title": "GB200 NVL72 rack-scale launch",
    "authors": [
      "NVIDIA"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-03-18",
    "venue": "NVIDIA GTC 2024",
    "url": "https://www.nvidia.com/en-us/data-center/gb200-nvl72/",
    "summary": "72-GPU NVL72 rack with NVLink5. Claims 30x perf for trillion-param inference.",
    "candidate_bill": "Bill_1",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "M1; deliveries Q1 2025",
    "_appeared_in_sweeps": [
      "sweep_1304"
    ]
  },
  {
    "paper_id": "nvidia-h200-2023-launch",
    "title": "NVIDIA H200 launch announcement",
    "authors": [
      "NVIDIA"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2023-11-13",
    "venue": "NVIDIA press release",
    "url": "https://nvidianews.nvidia.com/news/nvidia-supercharges-hopper-the-worlds-leading-ai-computing-platform",
    "summary": "H200 with 141GB HBM3e. Claims 2x Llama 2 70B inference perf vs H100.",
    "candidate_bill": "Bill_1",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "M1 vendor-supplied",
    "_appeared_in_sweeps": [
      "sweep_1304"
    ]
  },
  {
    "paper_id": "nvidia-h200-mlperf-2024",
    "title": "H200 MLPerf Inference 4.1 results",
    "authors": [
      "NVIDIA"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-09-04",
    "venue": "MLPerf",
    "url": "https://mlcommons.org/benchmarks/inference-datacenter/",
    "summary": "H200 results. NVIDIA-only submitter for some categories.",
    "candidate_bill": "Bill_9",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "Vendor-submitter",
    "_appeared_in_sweeps": [
      "sweep_1304"
    ]
  },
  {
    "paper_id": "nvidia-power-h100-2023",
    "title": "NVIDIA H100 power and thermals",
    "authors": [
      "NVIDIA datasheet"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2023-03-15",
    "venue": "NVIDIA datasheet",
    "url": "https://www.nvidia.com/en-us/data-center/h100/",
    "summary": "700W TDP H100 SXM. Power-per-token characterization not standardized.",
    "candidate_bill": "Bill_13",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.75,
    "watchlist_tier": null,
    "notes": "B13 power efficiency audit gap",
    "_appeared_in_sweeps": [
      "sweep_1304"
    ]
  },
  {
    "paper_id": "nvidia-rubin-2025-roadmap",
    "title": "NVIDIA Rubin roadmap reveal",
    "authors": [
      "NVIDIA"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-06-02",
    "venue": "Computex 2024",
    "url": "https://www.nvidia.com",
    "summary": "Rubin GPU + Vera CPU roadmap. 2026 delivery.",
    "candidate_bill": "Bill_10",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "Pre-product",
    "_appeared_in_sweeps": [
      "sweep_1304"
    ]
  },
  {
    "paper_id": "nvidia-tensorrt-llm-fp4-2024",
    "title": "TensorRT-LLM FP4 on Blackwell",
    "authors": [
      "NVIDIA"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-12-15",
    "venue": "NVIDIA blog",
    "url": "https://developer.nvidia.com/blog/nvidia-blackwell-platform-accelerates-llm-inference-with-fp4/",
    "summary": "FP4 quantization on Blackwell. Claims 2x speedup vs FP8. Fidelity claims for MMLU only.",
    "candidate_bill": "Bill_8",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "M1 vendor; B8 narrow eval",
    "_appeared_in_sweeps": [
      "sweep_1304"
    ]
  },
  {
    "paper_id": "ollama-2024-rls-notes",
    "title": "Ollama release notes 0.3+",
    "authors": [
      "Ollama team"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-09-20",
    "venue": "Ollama GitHub releases",
    "url": "https://github.com/ollama/ollama/releases",
    "summary": "Wraps llama.cpp; no independent throughput benchmarks vs vLLM/SGLang at matched precision.",
    "candidate_bill": "Bill_10",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "Distribution layer; inherits llama.cpp characteristics. Commercial-availability vs research-preview overlap.",
    "_appeared_in_sweeps": [
      "sweep_1301"
    ]
  },
  {
    "paper_id": "open-source-int4-llama3-eval-2024",
    "title": "Open eval: Llama 3 INT4 across HumanEval/MATH/GSM8k",
    "authors": [
      "Eleuther + LM Eval Harness community"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-08-15",
    "venue": "GitHub + arxiv",
    "url": "https://github.com/EleutherAI/lm-evaluation-harness",
    "summary": "Llama 3 8B INT4 shows -5 to -8% on MATH and GSM8k vs FP16.",
    "candidate_bill": "Bill_8",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "B8 \u2014 independent third-party reveals reasoning gap",
    "_appeared_in_sweeps": [
      "sweep_1305"
    ]
  },
  {
    "paper_id": "open-vs-closed-mlperf-coverage-2024",
    "title": "MLPerf Inference: open vs closed vendor coverage",
    "authors": [
      "MLCommons"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-09-04",
    "venue": "MLPerf",
    "url": "https://mlcommons.org",
    "summary": "NVIDIA dominant in submitter count; AMD/Intel small participation; Groq/Cerebras/SambaNova absent.",
    "candidate_bill": "Bill_9",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.9,
    "watchlist_tier": null,
    "notes": "B9 \u2014 vendor coverage biased",
    "_appeared_in_sweeps": [
      "sweep_1307"
    ]
  },
  {
    "paper_id": "openai-api-cost-2024",
    "title": "OpenAI API pricing transparency",
    "authors": [
      "OpenAI"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-09-15",
    "venue": "OpenAI pricing",
    "url": "https://openai.com/api/pricing/",
    "summary": "Token pricing; closed shop infra costs.",
    "candidate_bill": "Bill_2",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "B2",
    "_appeared_in_sweeps": [
      "sweep_1308"
    ]
  },
  {
    "paper_id": "openai-broadcom-2025-rumor",
    "title": "OpenAI custom chip taping out Q2 2026",
    "authors": [
      "FT + Reuters reports"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-02-15",
    "venue": "FT",
    "url": "https://www.ft.com/content/8c1ec0a5",
    "summary": "Reports of OpenAI tape-out via Broadcom + TSMC. No specs.",
    "candidate_bill": "Bill_10",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "Pre-silicon",
    "_appeared_in_sweeps": [
      "sweep_1303"
    ]
  },
  {
    "paper_id": "openai-custom-silicon-2024-reuters",
    "title": "OpenAI partnering with Broadcom for custom inference chip",
    "authors": [
      "Reuters / industry"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-10-29",
    "venue": "Reuters",
    "url": "https://www.reuters.com/technology/artificial-intelligence/openai-builds-first-chip-with-broadcom-tsmc-scales-back-foundry-ambition-2024-10-29/",
    "summary": "Broadcom + TSMC partnership for custom OpenAI inference chip. Targeted 2026.",
    "candidate_bill": "Bill_10",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.8,
    "watchlist_tier": null,
    "notes": "M4 commercialization gap; pre-silicon",
    "_appeared_in_sweeps": [
      "sweep_1303"
    ]
  },
  {
    "paper_id": "openai-stargate-2025-jan",
    "title": "OpenAI Stargate compute commitment ($500B)",
    "authors": [
      "OpenAI press / Reuters"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-01-21",
    "venue": "White House announcement",
    "url": "https://www.reuters.com/technology/artificial-intelligence/openai-oracle-softbank-launch-stargate-joint-venture-build-out-ai-infrastructure-2025-01-21/",
    "summary": "OpenAI+Oracle+SoftBank $500B compute commitment. Hardware mix unspecified.",
    "candidate_bill": "Bill_10",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.75,
    "watchlist_tier": null,
    "notes": "Procurement, not benchmark",
    "_appeared_in_sweeps": [
      "sweep_1303"
    ]
  },
  {
    "paper_id": "openrouter-cost-leaderboard-2024",
    "title": "OpenRouter LLM provider price comparison",
    "authors": [
      "OpenRouter"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-12-15",
    "venue": "OpenRouter",
    "url": "https://openrouter.ai/rankings",
    "summary": "Live cost/throughput ranking. Same model varies 10x across providers.",
    "candidate_bill": "Bill_2",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.9,
    "watchlist_tier": null,
    "notes": "B2/B5 \u2014 third-party",
    "_appeared_in_sweeps": [
      "sweep_1308"
    ]
  },
  {
    "paper_id": "openrouter-throughput-2025",
    "title": "OpenRouter throughput analytics",
    "authors": [
      "OpenRouter"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-03-15",
    "venue": "OpenRouter",
    "url": "https://openrouter.ai/rankings",
    "summary": "Live throughput by provider. >10x variance same model.",
    "candidate_bill": "Bill_5",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.9,
    "watchlist_tier": null,
    "notes": "B5",
    "_appeared_in_sweeps": [
      "sweep_1308"
    ]
  },
  {
    "paper_id": "outlier-aware-quant-2024",
    "title": "OmniQuant: Omnidirectionally Calibrated Quantization",
    "authors": [
      "Wenqi Shao",
      "Mengzhao Chen",
      "Zhaoyang Zhang",
      "et al."
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-04-15",
    "venue": "ICLR 2024",
    "url": "https://arxiv.org/abs/2308.13137",
    "summary": "Outlier-aware quant. Llama-2-70B W2A16 with reasonable fidelity.",
    "candidate_bill": "Bill_8",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "ICLR peer-reviewed",
    "_appeared_in_sweeps": [
      "sweep_1305"
    ]
  },
  {
    "paper_id": "paged-attention-validation-2024",
    "title": "PagedAttention correctness validation",
    "authors": [
      "vLLM community"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-05-15",
    "venue": "vLLM GitHub issues",
    "url": "https://github.com/vllm-project/vllm",
    "summary": "Reports of minor numerical issues in PagedAttention vs FA2 reference. Mostly resolved.",
    "candidate_bill": "Bill_6",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.75,
    "watchlist_tier": null,
    "notes": "B6 \u2014 kernel fidelity issues historically present",
    "_appeared_in_sweeps": [
      "sweep_1306"
    ]
  },
  {
    "paper_id": "perplexity-tput-blog-2024",
    "title": "Perplexity inference stack characterization",
    "authors": [
      "Perplexity AI engineering"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-08-15",
    "venue": "Perplexity blog",
    "url": "https://www.perplexity.ai/hub",
    "summary": "Mentions vLLM-derivative + custom kernels for production. No published variance data.",
    "candidate_bill": "Bill_2",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "Proprietary; not reproducible",
    "_appeared_in_sweeps": [
      "sweep_1301"
    ]
  },
  {
    "paper_id": "qkv-projection-merging-2024",
    "title": "QKV projection fusing on H100",
    "authors": [
      "Various"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-06-15",
    "venue": "Triton kernel community",
    "url": "https://github.com/openai/triton",
    "summary": "Kernel-level optimization for QKV. Numerical fidelity vs reference checked.",
    "candidate_bill": "Bill_6",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.8,
    "watchlist_tier": null,
    "notes": "Kernel work",
    "_appeared_in_sweeps": [
      "sweep_1306"
    ]
  },
  {
    "paper_id": "qlora-2023-dettmers",
    "title": "QLoRA: Efficient Finetuning of Quantized LLMs",
    "authors": [
      "Tim Dettmers",
      "Artidoro Pagnoni",
      "Ari Holtzman",
      "Luke Zettlemoyer"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2023-12-15",
    "venue": "NeurIPS 2023",
    "url": "https://arxiv.org/abs/2305.14314",
    "summary": "NF4 4-bit + LoRA. Demonstrates fine-tuning works on 4-bit base.",
    "candidate_bill": "Bill_8",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.95,
    "watchlist_tier": null,
    "notes": "Peer-reviewed",
    "_appeared_in_sweeps": [
      "sweep_1305"
    ]
  },
  {
    "paper_id": "qserve-2024-mit-han",
    "title": "QServe: W4A8KV4 Quantization with INT4 KV cache",
    "authors": [
      "Yujun Lin",
      "Haotian Tang",
      "Shang Yang",
      "et al."
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-05-15",
    "venue": "arXiv 2405.04532",
    "url": "https://arxiv.org/abs/2405.04532",
    "summary": "Full INT4 quant including KV cache. 2.4x throughput vs FP16. Fidelity claims.",
    "candidate_bill": "Bill_6",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "Academic; B6/B8 both relevant",
    "_appeared_in_sweeps": [
      "sweep_1305"
    ]
  },
  {
    "paper_id": "qstar-quantize-2024",
    "title": "Quantizing LLMs for Reasoning: A Survey",
    "authors": [
      "Various authors"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-10-15",
    "venue": "arXiv preprint",
    "url": "https://arxiv.org/abs/2410.00037",
    "summary": "Survey of quant reasoning impacts. Shows capability gaps invisible to PPL.",
    "candidate_bill": "Bill_8",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.8,
    "watchlist_tier": null,
    "notes": "B8 \u2014 capability vs PPL divergence",
    "_appeared_in_sweeps": [
      "sweep_1305"
    ]
  },
  {
    "paper_id": "quantization-impact-agentic-2024",
    "title": "Quantization impact on agentic tool-use capability",
    "authors": [
      "METR + community"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-11-15",
    "venue": "METR / community evals",
    "url": "https://metr.org",
    "summary": "INT4 quantized models show step-tier degradation on agentic tasks, even when MMLU is preserved.",
    "candidate_bill": "Bill_8",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "Strong B8 evidence \u2014 capability fidelity gap",
    "_appeared_in_sweeps": [
      "sweep_1305"
    ]
  },
  {
    "paper_id": "quarot-2024-microsoft",
    "title": "QuaRot: Outlier-free 4-bit Inference",
    "authors": [
      "Saleh Ashkboos",
      "Amirkeivan Mohtashami",
      "Maximilian Croci",
      "et al."
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-04-15",
    "venue": "arXiv 2404.00456",
    "url": "https://arxiv.org/abs/2404.00456",
    "summary": "Hadamard rotation enables 4-bit weight + activation + KV cache.",
    "candidate_bill": "Bill_8",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "Microsoft academic",
    "_appeared_in_sweeps": [
      "sweep_1305"
    ]
  },
  {
    "paper_id": "rain-ai-2024-altman-funding",
    "title": "Rain Neuromorphics analog-AI chip",
    "authors": [
      "Rain AI"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2023-08-15",
    "venue": "Rain.ai company materials",
    "url": "https://rain.ai",
    "summary": "Analog neuromorphic for inference. No public LLM benchmark.",
    "candidate_bill": "Bill_10",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "Research-preview only",
    "_appeared_in_sweeps": [
      "sweep_1302"
    ]
  },
  {
    "paper_id": "rain-ai-fundraising-2025",
    "title": "Rain.ai $150M Series B",
    "authors": [
      "Rain.ai press"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-01-10",
    "venue": "Press",
    "url": "https://rain.ai",
    "summary": "Funding without delivered hardware benchmark.",
    "candidate_bill": "Bill_10",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "Pre-product",
    "_appeared_in_sweeps": [
      "sweep_1302"
    ]
  },
  {
    "paper_id": "rayllm-2024-anyscale",
    "title": "RayLLM (now Anyscale): vLLM at scale",
    "authors": [
      "Anyscale"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-05-01",
    "venue": "Anyscale blog",
    "url": "https://docs.anyscale.com/llms/serving/intro/",
    "summary": "Production wrapper over vLLM. Cost-per-token transparency in commercial service only.",
    "candidate_bill": "Bill_2",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "Commercial wrapper; not independent benchmark",
    "_appeared_in_sweeps": [
      "sweep_1301"
    ]
  },
  {
    "paper_id": "rest-spec-decode-2024",
    "title": "REST: Retrieval-Based Speculative Decoding",
    "authors": [
      "Zhenyu He",
      "Zexuan Zhong",
      "Tianle Cai",
      "et al."
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-03-15",
    "venue": "NAACL 2024",
    "url": "https://arxiv.org/abs/2311.08252",
    "summary": "Retrieval-augmented speculative drafts. Speedups for code domain.",
    "candidate_bill": "Bill_6",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "Peer-reviewed",
    "_appeared_in_sweeps": [
      "sweep_1306"
    ]
  },
  {
    "paper_id": "rocm-vllm-2024-amd-blog",
    "title": "ROCm 6 + vLLM AMD MI300X support",
    "authors": [
      "AMD ROCm team"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-06-15",
    "venue": "AMD blog",
    "url": "https://www.amd.com/en/developer/resources/rocm-revision-history.html",
    "summary": "vLLM full support on MI300X. ROCm stack maturity claims.",
    "candidate_bill": "Bill_11",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.8,
    "watchlist_tier": null,
    "notes": "Software stack still less mature than CUDA",
    "_appeared_in_sweeps": [
      "sweep_1304"
    ]
  },
  {
    "paper_id": "rocm-vllm-mi300x-vs-cuda-2024",
    "title": "ROCm vLLM MI300X vs CUDA H100",
    "authors": [
      "SemiAnalysis"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-12-15",
    "venue": "SemiAnalysis",
    "url": "https://www.semianalysis.com/p/mi300x-vs-h100-vs-h200-benchmark",
    "summary": "Real-world vLLM ROCm vs CUDA shows software gap impacting MI300X.",
    "candidate_bill": "Bill_11",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.9,
    "watchlist_tier": null,
    "notes": "B7/B11 \u2014 universal coverage fails across vendors",
    "_appeared_in_sweeps": [
      "sweep_1307"
    ]
  },
  {
    "paper_id": "sambanova-405b-2024",
    "title": "SambaNova Llama 3.1 405B service",
    "authors": [
      "SambaNova"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-09-10",
    "venue": "SambaNova blog",
    "url": "https://sambanova.ai/blog/llama-3-1-405b",
    "summary": "Claims 132 tok/s on full Llama 3.1 405B.",
    "candidate_bill": "Bill_1",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.8,
    "watchlist_tier": null,
    "notes": "Vendor-supplied; M1",
    "_appeared_in_sweeps": [
      "sweep_1302"
    ]
  },
  {
    "paper_id": "sambanova-coherent-2024",
    "title": "SambaNova Composition-of-Experts agent serving",
    "authors": [
      "SambaNova"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-10-15",
    "venue": "SambaNova blog",
    "url": "https://sambanova.ai/blog/composition-of-experts",
    "summary": "Multi-model agent dispatch. Cross-vendor variance not measured.",
    "candidate_bill": "Bill_11",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.75,
    "watchlist_tier": null,
    "notes": "Vendor-platform-only",
    "_appeared_in_sweeps": [
      "sweep_1302"
    ]
  },
  {
    "paper_id": "sambanova-fast-api-2024",
    "title": "SambaNova Cloud Fast API launch",
    "authors": [
      "SambaNova Systems"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-09-10",
    "venue": "SambaNova blog",
    "url": "https://sambanova.ai/press/sambanova-launches-the-worlds-fastest-ai-platform",
    "summary": "Claims 132 tok/s Llama 3.1 405B. ArtificialAnalysis later challenged sustained throughput.",
    "candidate_bill": "Bill_5",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "Cross-vendor variance vs Groq/Cerebras unclear; M1",
    "_appeared_in_sweeps": [
      "sweep_1302"
    ]
  },
  {
    "paper_id": "sambanova-llama-405b-cost-vs-h200-2024",
    "title": "SambaNova 405B per-token cost vs H200 cluster",
    "authors": [
      "Industry analyses"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-09-15",
    "venue": "SemiAnalysis-style",
    "url": "https://www.semianalysis.com",
    "summary": "SambaNova premium pricing; H200 cluster lower per-token at scale.",
    "candidate_bill": "Bill_2",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.8,
    "watchlist_tier": null,
    "notes": "B2/B7",
    "_appeared_in_sweeps": [
      "sweep_1307"
    ]
  },
  {
    "paper_id": "sambanova-sn40l-2024-paper",
    "title": "SambaNova SN40L: 1.5 TB/s memory bandwidth dataflow chip",
    "authors": [
      "SambaNova Systems"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2023-09-19",
    "venue": "Hot Chips 2023 + SambaNova whitepaper",
    "url": "https://sambanova.ai/blog/sn40l-chip-the-engine-of-the-sn40l-system",
    "summary": "Reconfigurable dataflow. Claims Llama 3.1 405B at 132 tok/s on 16 chips.",
    "candidate_bill": "Bill_1",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "Vendor-supplied; M1",
    "_appeared_in_sweeps": [
      "sweep_1302"
    ]
  },
  {
    "paper_id": "sambanova-vs-vllm-fast-api-2024",
    "title": "SambaNova Fast API vs vLLM-hosting providers",
    "authors": [
      "SambaNova + reviews"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-11-15",
    "venue": "Industry",
    "url": "https://sambanova.ai",
    "summary": "SambaNova fast API premium; vLLM providers cheaper but more variable.",
    "candidate_bill": "Bill_7",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.8,
    "watchlist_tier": null,
    "notes": "B7",
    "_appeared_in_sweeps": [
      "sweep_1307"
    ]
  },
  {
    "paper_id": "sambanova-vs-vllm-tgi-2024",
    "title": "SambaNova Llama 3.1 405B vs open-stack 405B",
    "authors": [
      "SambaNova + Industry analyses"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-09-15",
    "venue": "Various",
    "url": "https://artificialanalysis.ai",
    "summary": "SambaNova 132 tok/s vs ~25-40 tok/s for 8x H100 vLLM. ~3x faster, premium pricing.",
    "candidate_bill": "Bill_7",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "B7 \u2014 closed wins latency, open wins generality",
    "_appeared_in_sweeps": [
      "sweep_1307"
    ]
  },
  {
    "paper_id": "sarathi-serve-2024-microsoft-osdi",
    "title": "Sarathi-Serve: Chunked prefill for LLM serving",
    "authors": [
      "Amey Agrawal",
      "Nitin Kedia",
      "Ashish Panwar",
      "et al."
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-07-15",
    "venue": "OSDI 2024",
    "url": "https://arxiv.org/abs/2403.02310",
    "summary": "Chunked prefill scheduling. Claims 5.6x improvement in 99th percentile TTFT.",
    "candidate_bill": "Bill_1",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.9,
    "watchlist_tier": null,
    "notes": "Peer-reviewed OSDI paper; independent reproduction available",
    "_appeared_in_sweeps": [
      "sweep_1301"
    ]
  },
  {
    "paper_id": "scaling-laws-quant-2024-iclr",
    "title": "Scaling Laws for Precision",
    "authors": [
      "Tanishq Kumar",
      "Zachary Ankner",
      "Benjamin Spector",
      "et al."
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-11-15",
    "venue": "arXiv 2411.04330",
    "url": "https://arxiv.org/abs/2411.04330",
    "summary": "Quantization at low precision degrades larger models more. Counter to vendor claims.",
    "candidate_bill": "Bill_8",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.9,
    "watchlist_tier": null,
    "notes": "B8 strong rebuttal",
    "_appeared_in_sweeps": [
      "sweep_1305"
    ]
  },
  {
    "paper_id": "scissorhands-kv-2023",
    "title": "Scissorhands: Exploiting the Persistence of Importance Hypothesis",
    "authors": [
      "Zichang Liu",
      "Aditya Desai",
      "Fangshuo Liao",
      "et al."
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2023-12-15",
    "venue": "NeurIPS 2023",
    "url": "https://arxiv.org/abs/2305.17118",
    "summary": "Window-based KV cache. Capability tradeoffs documented.",
    "candidate_bill": "Bill_6",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.8,
    "watchlist_tier": null,
    "notes": "B6 \u2014 fidelity loss documented",
    "_appeared_in_sweeps": [
      "sweep_1306"
    ]
  },
  {
    "paper_id": "self-speculative-2024",
    "title": "Self-Speculative Decoding",
    "authors": [
      "Jun Zhang",
      "Jue Wang",
      "Huan Li",
      "et al."
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-03-15",
    "venue": "ACL 2024",
    "url": "https://arxiv.org/abs/2309.08168",
    "summary": "Use layer-skipping for draft. No external draft model.",
    "candidate_bill": "Bill_6",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "Peer-reviewed",
    "_appeared_in_sweeps": [
      "sweep_1306"
    ]
  },
  {
    "paper_id": "semianalysis-blackwell-2024",
    "title": "SemiAnalysis: Blackwell shipping reality",
    "authors": [
      "SemiAnalysis"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-12-15",
    "venue": "SemiAnalysis",
    "url": "https://www.semianalysis.com",
    "summary": "Blackwell scaling pains, shipping reality.",
    "candidate_bill": "Bill_10",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "B10",
    "_appeared_in_sweeps": [
      "sweep_1308"
    ]
  },
  {
    "paper_id": "semianalysis-blackwell-shipping-2024",
    "title": "SemiAnalysis: Blackwell shipping reality H1 2025",
    "authors": [
      "SemiAnalysis"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-01-15",
    "venue": "SemiAnalysis",
    "url": "https://www.semianalysis.com",
    "summary": "Real Blackwell shipments lag announcement by 6+ months.",
    "candidate_bill": "Bill_10",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.9,
    "watchlist_tier": null,
    "notes": "B10",
    "_appeared_in_sweeps": [
      "sweep_1308"
    ]
  },
  {
    "paper_id": "semianalysis-cerebras-2024",
    "title": "SemiAnalysis: Cerebras Inference economics",
    "authors": [
      "SemiAnalysis"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-09-15",
    "venue": "SemiAnalysis",
    "url": "https://www.semianalysis.com/p/cerebras-inference-cracking-the-llm",
    "summary": "Examines Cerebras unit economics; questions sustainability.",
    "candidate_bill": "Bill_2",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.9,
    "watchlist_tier": null,
    "notes": "B2 third-party",
    "_appeared_in_sweeps": [
      "sweep_1308"
    ]
  },
  {
    "paper_id": "semianalysis-cerebras-2024-analysis",
    "title": "SemiAnalysis: Cerebras inference economics",
    "authors": [
      "Dylan Patel et al., SemiAnalysis"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-09-15",
    "venue": "SemiAnalysis Substack",
    "url": "https://www.semianalysis.com/p/cerebras-inference-cracking-the-llm",
    "summary": "Analysis of cost-per-token economics; raises questions about sustained margins.",
    "candidate_bill": "Bill_2",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "Third-party analysis; B2 cost-per-token transparency challenged",
    "_appeared_in_sweeps": [
      "sweep_1302"
    ]
  },
  {
    "paper_id": "semianalysis-deepseek-h800-2025",
    "title": "SemiAnalysis: DeepSeek H800 cluster real cost",
    "authors": [
      "SemiAnalysis"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-02-15",
    "venue": "SemiAnalysis",
    "url": "https://www.semianalysis.com/p/deepseek-debates",
    "summary": "DeepSeek claimed $6M training; actual H800 cluster cost much higher.",
    "candidate_bill": "Bill_2",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.9,
    "watchlist_tier": null,
    "notes": "B2 third-party",
    "_appeared_in_sweeps": [
      "sweep_1308"
    ]
  },
  {
    "paper_id": "semianalysis-gpu-cloud-cost-2024",
    "title": "SemiAnalysis: GPU cloud rental cost analysis",
    "authors": [
      "SemiAnalysis"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-08-15",
    "venue": "SemiAnalysis",
    "url": "https://www.semianalysis.com",
    "summary": "H100 rental rates by provider. Lambda/CoreWeave/Together/AWS.",
    "candidate_bill": "Bill_2",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.9,
    "watchlist_tier": null,
    "notes": "B2",
    "_appeared_in_sweeps": [
      "sweep_1308"
    ]
  },
  {
    "paper_id": "semianalysis-mi300x-2024",
    "title": "SemiAnalysis: MI300X vs H100 vs H200 deep dive",
    "authors": [
      "Dylan Patel et al., SemiAnalysis"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-12-15",
    "venue": "SemiAnalysis Substack",
    "url": "https://www.semianalysis.com/p/mi300x-vs-h100-vs-h200-benchmark",
    "summary": "Extensive five-month benchmark study. ROCm immaturity exposed.",
    "candidate_bill": "Bill_7",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.95,
    "watchlist_tier": null,
    "notes": "Strong third-party; closes B7 partially",
    "_appeared_in_sweeps": [
      "sweep_1308"
    ]
  },
  {
    "paper_id": "semianalysis-rubin-2025",
    "title": "SemiAnalysis: NVIDIA Rubin roadmap analysis",
    "authors": [
      "SemiAnalysis"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-06-15",
    "venue": "SemiAnalysis",
    "url": "https://www.semianalysis.com",
    "summary": "Pre-product Rubin roadmap analysis.",
    "candidate_bill": "Bill_10",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.75,
    "watchlist_tier": null,
    "notes": "Pre-product",
    "_appeared_in_sweeps": [
      "sweep_1308"
    ]
  },
  {
    "paper_id": "sglang-radix-blog-2024",
    "title": "SGLang RadixAttention deep-dive",
    "authors": [
      "LMSYS team"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-07-25",
    "venue": "LMSYS blog",
    "url": "https://lmsys.org/blog/2024-07-25-sglang-llama3/",
    "summary": "Up to 6.4x throughput on Llama 3 70B for structured/multi-turn workloads.",
    "candidate_bill": "Bill_1",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.8,
    "watchlist_tier": null,
    "notes": "Workload-specific; not generalizable. Cross-vendor variance not measured.",
    "_appeared_in_sweeps": [
      "sweep_1301"
    ]
  },
  {
    "paper_id": "sglang-spec-decode-2024",
    "title": "SGLang speculative decoding support",
    "authors": [
      "LMSYS"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-09-15",
    "venue": "SGLang blog",
    "url": "https://lmsys.org",
    "summary": "SGLang supports speculative decoding. EAGLE/Medusa integration.",
    "candidate_bill": "Bill_6",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.8,
    "watchlist_tier": null,
    "notes": "Mechanism standard",
    "_appeared_in_sweeps": [
      "sweep_1306"
    ]
  },
  {
    "paper_id": "sglang-vs-cerebras-2024-llama3.1",
    "title": "SGLang H100 cluster vs Cerebras Inference",
    "authors": [
      "Independent benchmarks"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-09-15",
    "venue": "Modal/Anyscale blogs",
    "url": "https://modal.com/blog",
    "summary": "Cerebras single-stream ~3x faster; SGLang multi-tenant 10x cheaper at scale.",
    "candidate_bill": "Bill_7",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "B7 \u2014 economics fundamentally differ",
    "_appeared_in_sweeps": [
      "sweep_1307"
    ]
  },
  {
    "paper_id": "sglang-vs-trt-llm-vs-cerebras-2024",
    "title": "Cross-framework cross-vendor latency variance",
    "authors": [
      "BentoML / Anyscale"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-08-15",
    "venue": "BentoML blog",
    "url": "https://www.bentoml.com/blog/benchmarking-llm-inference-backends",
    "summary": "Comprehensive comparison. ~5x variance across configurations.",
    "candidate_bill": "Bill_5",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.9,
    "watchlist_tier": null,
    "notes": "B5/B7 \u2014 variance well above 10%",
    "_appeared_in_sweeps": [
      "sweep_1307"
    ]
  },
  {
    "paper_id": "smoothquant-2023-icml",
    "title": "SmoothQuant: Accurate and Efficient Post-Training Quantization for LLMs",
    "authors": [
      "Guangxuan Xiao",
      "Ji Lin",
      "Mickael Seznec",
      "et al."
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2023-06-15",
    "venue": "ICML 2023",
    "url": "https://arxiv.org/abs/2211.10438",
    "summary": "W8A8 PTQ. Foundational paper. Fidelity within 1% on Llama 2.",
    "candidate_bill": "Bill_8",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.95,
    "watchlist_tier": null,
    "notes": "Peer-reviewed; well-established",
    "_appeared_in_sweeps": [
      "sweep_1305"
    ]
  },
  {
    "paper_id": "snapkv-2024",
    "title": "SnapKV: LLM Knows What You are Looking for",
    "authors": [
      "Yuhong Li",
      "Yingbing Huang",
      "Bowen Yang",
      "et al."
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-04-15",
    "venue": "NeurIPS 2024",
    "url": "https://arxiv.org/abs/2404.14469",
    "summary": "KV cache compression via attention pattern. 3.6x throughput.",
    "candidate_bill": "Bill_6",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "Peer-reviewed",
    "_appeared_in_sweeps": [
      "sweep_1306"
    ]
  },
  {
    "paper_id": "spec-bench-2024",
    "title": "SpecBench: Benchmarking Speculative Decoding Methods",
    "authors": [
      "Heming Xia",
      "Tao Ge",
      "Si-Qing Chen",
      "Furu Wei"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-07-15",
    "venue": "arXiv 2401.07851",
    "url": "https://arxiv.org/abs/2401.07851",
    "summary": "Benchmark across 6 spec-decode methods on different domains.",
    "candidate_bill": "Bill_6",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.9,
    "watchlist_tier": null,
    "notes": "Third-party",
    "_appeared_in_sweeps": [
      "sweep_1306"
    ]
  },
  {
    "paper_id": "spec-decode-distillation-medusa2-2024",
    "title": "Medusa-2: training scheme for draft heads",
    "authors": [
      "Tianle Cai et al."
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-06-15",
    "venue": "Medusa GitHub",
    "url": "https://github.com/FasterDecoding/Medusa",
    "summary": "Improved training for draft heads. Lossless wrt target.",
    "candidate_bill": "Bill_6",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.8,
    "watchlist_tier": null,
    "notes": "Academic",
    "_appeared_in_sweeps": [
      "sweep_1306"
    ]
  },
  {
    "paper_id": "spec-decode-leviathan-2023",
    "title": "Fast Inference from Transformers via Speculative Decoding",
    "authors": [
      "Yaniv Leviathan",
      "Matan Kalman",
      "Yossi Matias"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2023-05-15",
    "venue": "ICML 2023",
    "url": "https://arxiv.org/abs/2211.17192",
    "summary": "Google's speculative decoding paper. 2-3x speedup. Distribution-preserving.",
    "candidate_bill": "Bill_6",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.95,
    "watchlist_tier": null,
    "notes": "Peer-reviewed ICML; baseline",
    "_appeared_in_sweeps": [
      "sweep_1306"
    ]
  },
  {
    "paper_id": "spec-decode-mod-eagle3-2025",
    "title": "EAGLE-3 enhanced draft model architecture",
    "authors": [
      "Yuhui Li et al."
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-03-15",
    "venue": "arXiv 2503.01840",
    "url": "https://arxiv.org/abs/2503.01840",
    "summary": "EAGLE-3 improvements with multi-step draft head.",
    "candidate_bill": "Bill_6",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.8,
    "watchlist_tier": null,
    "notes": "Academic",
    "_appeared_in_sweeps": [
      "sweep_1306"
    ]
  },
  {
    "paper_id": "spec-decode-throughput-2024",
    "title": "Speculative Decoding Throughput Reality vs Marketing",
    "authors": [
      "Community benchmarks"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-09-15",
    "venue": "Anyscale/BentoML blogs",
    "url": "https://www.bentoml.com",
    "summary": "Real-world speedups often 1.3-1.7x vs 2-3x claimed.",
    "candidate_bill": "Bill_6",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.8,
    "watchlist_tier": null,
    "notes": "Closes B6 \u2014 workload variance",
    "_appeared_in_sweeps": [
      "sweep_1306"
    ]
  },
  {
    "paper_id": "specinfer-2024-osdi",
    "title": "SpecInfer: Tree-based Speculative Inference",
    "authors": [
      "Xupeng Miao",
      "Gabriele Oliaro",
      "Zhihao Zhang",
      "et al."
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-04-15",
    "venue": "ASPLOS 2024",
    "url": "https://arxiv.org/abs/2305.09781",
    "summary": "Tree-based speculative inference with multiple drafts. 2.8x.",
    "candidate_bill": "Bill_6",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.9,
    "watchlist_tier": null,
    "notes": "ASPLOS peer-reviewed",
    "_appeared_in_sweeps": [
      "sweep_1306"
    ]
  },
  {
    "paper_id": "speculative-decoding-2023-chen",
    "title": "Accelerating Large Language Model Decoding with Speculative Sampling",
    "authors": [
      "Charlie Chen",
      "Sebastian Borgeaud",
      "Geoffrey Irving",
      "et al."
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2023-02-15",
    "venue": "DeepMind tech report",
    "url": "https://arxiv.org/abs/2302.01318",
    "summary": "Original speculative sampling. Lossless wrt target distribution.",
    "candidate_bill": "Bill_6",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.95,
    "watchlist_tier": null,
    "notes": "Foundational; mathematically lossless",
    "_appeared_in_sweeps": [
      "sweep_1306"
    ]
  },
  {
    "paper_id": "speculative-distillation-2024",
    "title": "Speculative Knowledge Distillation",
    "authors": [
      "Wenda Xu",
      "Rujun Han",
      "Zifeng Wang",
      "et al."
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-10-15",
    "venue": "arXiv 2410.11325",
    "url": "https://arxiv.org/abs/2410.11325",
    "summary": "Distill verifier model for speculative decoding. Lossless wrt target.",
    "candidate_bill": "Bill_6",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.8,
    "watchlist_tier": null,
    "notes": "Academic",
    "_appeared_in_sweeps": [
      "sweep_1306"
    ]
  },
  {
    "paper_id": "spinquant-2024-meta",
    "title": "SpinQuant: LLM Quantization with Learned Rotations",
    "authors": [
      "Zechun Liu",
      "Changsheng Zhao",
      "Igor Fedorov",
      "et al. (Meta)"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-05-15",
    "venue": "arXiv 2405.16406",
    "url": "https://arxiv.org/abs/2405.16406",
    "summary": "Learned rotation for INT4 quant. Llama 3 70B INT4 W4A4 with low drop.",
    "candidate_bill": "Bill_8",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "Meta paper",
    "_appeared_in_sweeps": [
      "sweep_1305"
    ]
  },
  {
    "paper_id": "splitwise-2024-microsoft-isca",
    "title": "Splitwise: Efficient generative LLM inference using phase splitting",
    "authors": [
      "Pratyush Patel",
      "Esha Choukse",
      "Chaojie Zhang",
      "et al."
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-06-15",
    "venue": "ISCA 2024",
    "url": "https://arxiv.org/abs/2311.18677",
    "summary": "Separates prefill and decode onto different hardware. Inspired Dynamo disaggregation.",
    "candidate_bill": "Bill_1",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.9,
    "watchlist_tier": null,
    "notes": "Peer-reviewed ISCA; methodologically sound",
    "_appeared_in_sweeps": [
      "sweep_1301"
    ]
  },
  {
    "paper_id": "streaming-llm-2024",
    "title": "Efficient Streaming Language Models with Attention Sinks",
    "authors": [
      "Guangxuan Xiao",
      "Yuandong Tian",
      "Beidi Chen",
      "et al."
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-03-15",
    "venue": "ICLR 2024",
    "url": "https://arxiv.org/abs/2309.17453",
    "summary": "Attention-sink based streaming inference. Long-context performance tradeoffs.",
    "candidate_bill": "Bill_6",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "Peer-reviewed",
    "_appeared_in_sweeps": [
      "sweep_1306"
    ]
  },
  {
    "paper_id": "tencent-int4-bench-2024",
    "title": "Tencent: Llama 3 INT4 capability ablation",
    "authors": [
      "Tencent / various"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-06-15",
    "venue": "Tencent tech blog",
    "url": "https://github.com/Tencent",
    "summary": "Reports INT4 keeps MMLU but loses on math reasoning.",
    "candidate_bill": "Bill_8",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.75,
    "watchlist_tier": null,
    "notes": "B8 \u2014 capability gap revealed",
    "_appeared_in_sweeps": [
      "sweep_1305"
    ]
  },
  {
    "paper_id": "tencent-qingfeng-2024",
    "title": "Tencent Zixiao AI accelerator",
    "authors": [
      "Tencent"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-08-15",
    "venue": "Tencent press",
    "url": "https://www.tencent.com",
    "summary": "Internal Tencent inference silicon. Limited public details.",
    "candidate_bill": "Bill_10",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "Closed shop",
    "_appeared_in_sweeps": [
      "sweep_1303"
    ]
  },
  {
    "paper_id": "tensorrt-llm-2024-h100-perf-blog",
    "title": "TensorRT-LLM v0.10 H100 Performance",
    "authors": [
      "NVIDIA"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-06-04",
    "venue": "NVIDIA developer blog",
    "url": "https://developer.nvidia.com/blog/optimizing-inference-on-llms-with-tensorrt-llm-now-publicly-available/",
    "summary": "Vendor-published benchmarks for Llama 3 70B on H100. Claims 2x throughput improvement vs prior release.",
    "candidate_bill": "Bill_9",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.9,
    "watchlist_tier": null,
    "notes": "M1 vendor-supplied. No independent measurement against same workload.",
    "_appeared_in_sweeps": [
      "sweep_1301"
    ]
  },
  {
    "paper_id": "tensorrt-llm-2025-rls-h200",
    "title": "TensorRT-LLM Llama 3.1 405B H200 perf",
    "authors": [
      "NVIDIA"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-09-26",
    "venue": "NVIDIA blog",
    "url": "https://developer.nvidia.com/blog/post-training-quantization-of-llama-3-models-with-nvidia-tensorrt-model-optimizer/",
    "summary": "FP8 quantization of Llama 3.1 405B; claims <1% MMLU degradation.",
    "candidate_bill": "Bill_8",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.75,
    "watchlist_tier": null,
    "notes": "Vendor-supplied; M1 disqualification; quantization fidelity beyond MMLU not measured",
    "_appeared_in_sweeps": [
      "sweep_1301"
    ]
  },
  {
    "paper_id": "tensorrt-llm-int4-awq-2024",
    "title": "TensorRT-LLM INT4-AWQ on Llama 3",
    "authors": [
      "NVIDIA"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-08-01",
    "venue": "NVIDIA developer blog",
    "url": "https://developer.nvidia.com/blog/post-training-quantization-of-llama-3-models-with-nvidia-tensorrt-model-optimizer/",
    "summary": "INT4 weight + INT8 activation; claims <0.5% MMLU drop.",
    "candidate_bill": "Bill_8",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.75,
    "watchlist_tier": null,
    "notes": "Vendor-supplied; M1; downstream reasoning eval missing",
    "_appeared_in_sweeps": [
      "sweep_1301"
    ]
  },
  {
    "paper_id": "tensorrt-llm-multilora-blog-2024",
    "title": "TensorRT-LLM Multi-LoRA support",
    "authors": [
      "NVIDIA"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-05-15",
    "venue": "NVIDIA dev blog",
    "url": "https://developer.nvidia.com/blog/tune-and-deploy-lora-llms-with-nvidia-tensorrt-llm/",
    "summary": "Vendor implementation. Cross-framework variance unknown.",
    "candidate_bill": "Bill_11",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "M1 vendor-supplied; no matched config across SGLang/vLLM/TRT-LLM",
    "_appeared_in_sweeps": [
      "sweep_1301"
    ]
  },
  {
    "paper_id": "tensorrt-llm-vs-vllm-h100-2024",
    "title": "TRT-LLM vs vLLM head-to-head on H100",
    "authors": [
      "Various community benchmarks"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-09-15",
    "venue": "BentoML benchmarks, others",
    "url": "https://bentoml.com",
    "summary": "Mixed results. TRT-LLM faster on Llama 3 70B at decode; vLLM competitive elsewhere.",
    "candidate_bill": "Bill_5",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.8,
    "watchlist_tier": null,
    "notes": "B5 \u2014 variance",
    "_appeared_in_sweeps": [
      "sweep_1304"
    ]
  },
  {
    "paper_id": "tenstorrent-blackhole-2024",
    "title": "Tenstorrent Blackhole architecture",
    "authors": [
      "Tenstorrent"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-09-15",
    "venue": "Tenstorrent blog",
    "url": "https://tenstorrent.com/hardware/blackhole",
    "summary": "Second-gen chip. Targets datacenter inference. No third-party benchmarks yet.",
    "candidate_bill": "Bill_10",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.8,
    "watchlist_tier": null,
    "notes": "Hardware shipping but software stack maturity gap",
    "_appeared_in_sweeps": [
      "sweep_1302"
    ]
  },
  {
    "paper_id": "tenstorrent-blackhole-arxiv-2025",
    "title": "Tenstorrent open source kernels for transformer inference",
    "authors": [
      "Tenstorrent engineering"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-01-15",
    "venue": "GitHub + blogs",
    "url": "https://github.com/tenstorrent/tt-metal",
    "summary": "Open RISC-V tensor kernel framework. Llama support preview only.",
    "candidate_bill": "Bill_7",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.65,
    "watchlist_tier": null,
    "notes": "Promising but unverified at scale",
    "_appeared_in_sweeps": [
      "sweep_1302"
    ]
  },
  {
    "paper_id": "tenstorrent-wormhole-2024-launch",
    "title": "Tenstorrent Wormhole and Grayskull launch",
    "authors": [
      "Tenstorrent / Jim Keller"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-07-15",
    "venue": "Tenstorrent docs",
    "url": "https://tenstorrent.com/hardware/wormhole",
    "summary": "RISC-V + tensor cores. Claims competitive perf-per-dollar; limited LLM benchmarks.",
    "candidate_bill": "Bill_7",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.75,
    "watchlist_tier": null,
    "notes": "Limited published benchmarks; cross-vendor matched comparison absent",
    "_appeared_in_sweeps": [
      "sweep_1302"
    ]
  },
  {
    "paper_id": "tgi-huggingface-2024-rls",
    "title": "HuggingFace TGI release notes",
    "authors": [
      "HuggingFace"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-10-01",
    "venue": "TGI GitHub",
    "url": "https://github.com/huggingface/text-generation-inference",
    "summary": "Production inference server. Limited cross-framework benchmarks against vLLM/SGLang.",
    "candidate_bill": "Bill_5",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "Cross-vendor stability not published",
    "_appeared_in_sweeps": [
      "sweep_1301"
    ]
  },
  {
    "paper_id": "tgi-quant-cookbook-2024",
    "title": "TGI quantization cookbook",
    "authors": [
      "HuggingFace TGI"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-08-15",
    "venue": "HF docs",
    "url": "https://huggingface.co/docs/text-generation-inference",
    "summary": "TGI supports EETQ/GPTQ/AWQ. Latency claims; fidelity not deeply audited.",
    "candidate_bill": "Bill_8",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.75,
    "watchlist_tier": null,
    "notes": "B8 \u2014 vendor cookbook",
    "_appeared_in_sweeps": [
      "sweep_1305"
    ]
  },
  {
    "paper_id": "tinygrad-comma-amd-2024",
    "title": "Tinygrad George Hotz AMD test reports",
    "authors": [
      "George Hotz / tinygrad"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-09-15",
    "venue": "tinygrad GitHub",
    "url": "https://github.com/tinygrad/tinygrad",
    "summary": "Reports ROCm driver instability vs claimed AMD perf.",
    "candidate_bill": "Bill_7",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "B7 \u2014 vendor-closed at matched compute reveals real-world gap",
    "_appeared_in_sweeps": [
      "sweep_1304"
    ]
  },
  {
    "paper_id": "tpu-jax-pallas-2024",
    "title": "Pallas: JAX kernels for TPU",
    "authors": [
      "Google DeepMind"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-04-15",
    "venue": "JAX docs",
    "url": "https://jax.readthedocs.io/en/latest/pallas/index.html",
    "summary": "Low-level TPU kernel API. Not portable to GPU.",
    "candidate_bill": "Bill_11",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "Platform-lock",
    "_appeared_in_sweeps": [
      "sweep_1303"
    ]
  },
  {
    "paper_id": "tpu-pathways-2024-deepmind",
    "title": "Pathways system for TPU scaling",
    "authors": [
      "Google DeepMind"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-05-15",
    "venue": "DeepMind blog",
    "url": "https://deepmind.google",
    "summary": "Pathways system enables Gemini at scale on TPU. Closed.",
    "candidate_bill": "Bill_11",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.8,
    "watchlist_tier": null,
    "notes": "Platform-lock",
    "_appeared_in_sweeps": [
      "sweep_1303"
    ]
  },
  {
    "paper_id": "tpu-v5p-2023-google-launch",
    "title": "Google TPU v5p launch",
    "authors": [
      "Google Cloud"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2023-12-06",
    "venue": "Google Cloud blog",
    "url": "https://cloud.google.com/blog/products/ai-machine-learning/introducing-cloud-tpu-v5p-and-ai-hypercomputer",
    "summary": "TPU v5p for Gemini training/inference. Internal benchmarks only.",
    "candidate_bill": "Bill_1",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "Vendor-only; M5 platform-lock",
    "_appeared_in_sweeps": [
      "sweep_1303"
    ]
  },
  {
    "paper_id": "tpu-v5p-gemini-cost-2024",
    "title": "Google Gemini per-token cost TPU advantage",
    "authors": [
      "industry analysts"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-06-15",
    "venue": "SemiAnalysis + analyses",
    "url": "https://www.semianalysis.com",
    "summary": "Estimates of TPU cost advantage; not verifiable independently.",
    "candidate_bill": "Bill_2",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "B2 transparency missing",
    "_appeared_in_sweeps": [
      "sweep_1303"
    ]
  },
  {
    "paper_id": "tpu-v6e-trillium-2024",
    "title": "Google Trillium (TPU v6e) launch",
    "authors": [
      "Google Cloud"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-05-14",
    "venue": "Google I/O 2024",
    "url": "https://cloud.google.com/blog/products/compute/introducing-trillium-6th-gen-tpus",
    "summary": "4.7x compute vs v5e. Inference-optimized.",
    "candidate_bill": "Bill_1",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "Vendor-supplied; M1",
    "_appeared_in_sweeps": [
      "sweep_1303"
    ]
  },
  {
    "paper_id": "tpu-v7-ironwood-2025",
    "title": "Google Ironwood TPU (v7) at Google Cloud Next 2025",
    "authors": [
      "Google Cloud"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-04-09",
    "venue": "Google Cloud Next 2025",
    "url": "https://cloud.google.com/blog/products/compute/whats-new-with-ai-hypercomputer",
    "summary": "Inference-only TPU. Claims 5x perf vs v5p. 9216-chip pods.",
    "candidate_bill": "Bill_1",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "Vendor-only; cross-cloud variance unmeasured",
    "_appeared_in_sweeps": [
      "sweep_1303"
    ]
  },
  {
    "paper_id": "tpu-vs-h100-llama-2024-bench",
    "title": "TPU v5p vs H100 Llama 2 70B cross-cloud test",
    "authors": [
      "Independent benchmark teams"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-08-15",
    "venue": "MosaicML, Modular blog",
    "url": "https://www.databricks.com/blog",
    "summary": "Indicates variance 2-3x across clouds at matched cost basis.",
    "candidate_bill": "Bill_5",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.75,
    "watchlist_tier": null,
    "notes": "B5 \u2014 cross-vendor stability fails",
    "_appeared_in_sweeps": [
      "sweep_1303"
    ]
  },
  {
    "paper_id": "tpu-vs-h100-mlperf-2024",
    "title": "MLPerf Inference TPU vs H100 head-to-head",
    "authors": [
      "MLCommons"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-09-04",
    "venue": "MLPerf",
    "url": "https://mlcommons.org",
    "summary": "Direct submitter-to-submitter comparison constrained by MLPerf workload definition.",
    "candidate_bill": "Bill_9",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.75,
    "watchlist_tier": null,
    "notes": "M3 \u2014 workload mismatch with real production",
    "_appeared_in_sweeps": [
      "sweep_1303"
    ]
  },
  {
    "paper_id": "trainium2-anthropic-rainier-2025",
    "title": "Project Rainier: 400K Trainium2 build",
    "authors": [
      "AWS + Anthropic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-03-15",
    "venue": "AWS blog",
    "url": "https://aws.amazon.com",
    "summary": "Large-scale Trainium2 deployment. Independent throughput data not public.",
    "candidate_bill": "Bill_1",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.75,
    "watchlist_tier": null,
    "notes": "Procurement; no benchmark",
    "_appeared_in_sweeps": [
      "sweep_1303"
    ]
  },
  {
    "paper_id": "trainium2-llm-benchmarks-aa-2025",
    "title": "ArtificialAnalysis Trainium2 Llama 3.3 benchmark",
    "authors": [
      "ArtificialAnalysis.ai"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-04-15",
    "venue": "ArtificialAnalysis.ai",
    "url": "https://artificialanalysis.ai",
    "summary": "Independent measurement of Trainium2-hosted Llama 3.3 70B.",
    "candidate_bill": "Bill_5",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "Third-party measurement, partially closes B5",
    "_appeared_in_sweeps": [
      "sweep_1303"
    ]
  },
  {
    "paper_id": "trillium-claude-2024-blog",
    "title": "Anthropic Claude on Google TPU Trillium reports",
    "authors": [
      "Google + Anthropic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-10-30",
    "venue": "Google Cloud blog",
    "url": "https://cloud.google.com/blog/products/ai-machine-learning/anthropic-deepens-partnership-with-google-cloud",
    "summary": "Anthropic uses TPUs for training/inference. Per-platform cost-per-token not public.",
    "candidate_bill": "Bill_2",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.8,
    "watchlist_tier": null,
    "notes": "B2 transparency challenged",
    "_appeared_in_sweeps": [
      "sweep_1303"
    ]
  },
  {
    "paper_id": "triton-server-2024-mlperf-results",
    "title": "Triton Inference Server v24.x MLPerf submissions",
    "authors": [
      "NVIDIA"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-09-04",
    "venue": "MLPerf Inference v4.1",
    "url": "https://mlcommons.org/benchmarks/inference-datacenter/",
    "summary": "NVIDIA-submitted MLPerf results for Llama-2-70B server scenario.",
    "candidate_bill": "Bill_9",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "MLPerf format constrains workload; vendor submitter problem (M3 partially)",
    "_appeared_in_sweeps": [
      "sweep_1301"
    ]
  },
  {
    "paper_id": "trt-llm-spec-decode-2024",
    "title": "TensorRT-LLM speculative decoding for Medusa/EAGLE",
    "authors": [
      "NVIDIA"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-09-15",
    "venue": "NVIDIA blog",
    "url": "https://developer.nvidia.com/blog/optimizing-llm-inference-for-low-latency-applications/",
    "summary": "TRT-LLM supports speculative decoding. Vendor-supplied benchmarks.",
    "candidate_bill": "Bill_6",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.8,
    "watchlist_tier": null,
    "notes": "Vendor implementation",
    "_appeared_in_sweeps": [
      "sweep_1306"
    ]
  },
  {
    "paper_id": "trt-vs-vllm-mi300x-2024",
    "title": "MI300X vLLM ROCm gap analysis",
    "authors": [
      "Hot Aisle Inc."
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-10-15",
    "venue": "Hot Aisle blog",
    "url": "https://hotaisle.xyz",
    "summary": "Third-party MI300X cloud provider analysis.",
    "candidate_bill": "Bill_5",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.8,
    "watchlist_tier": null,
    "notes": "B5",
    "_appeared_in_sweeps": [
      "sweep_1304"
    ]
  },
  {
    "paper_id": "vllm-2024-paged-attention-v0.6",
    "title": "vLLM v0.6: 2.7x throughput improvement",
    "authors": [
      "vLLM team"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-09-05",
    "venue": "vLLM blog + GitHub release notes",
    "url": "https://blog.vllm.ai/2024/09/05/perf-update.html",
    "summary": "Major scheduling rewrite, TTFT improvements with claims of 2.7x throughput at matched latency.",
    "candidate_bill": "Bill_1",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "Self-reported; no independent reproduction under matched closed-vendor workload",
    "_appeared_in_sweeps": [
      "sweep_1301"
    ]
  },
  {
    "paper_id": "vllm-2024-prefix-caching",
    "title": "vLLM prefix caching feature analysis",
    "authors": [
      "vLLM team"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-05-15",
    "venue": "vLLM docs",
    "url": "https://docs.vllm.ai/en/latest/automatic_prefix_caching/apc.html",
    "summary": "Automatic prefix caching for shared system prompts. Throughput claims workload-dependent.",
    "candidate_bill": "Bill_6",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "Reasonable claim, mechanism validated independently",
    "_appeared_in_sweeps": [
      "sweep_1301"
    ]
  },
  {
    "paper_id": "vllm-async-engine-2025",
    "title": "vLLM v1 AsyncEngine architecture",
    "authors": [
      "vLLM team"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-01-27",
    "venue": "vLLM blog",
    "url": "https://blog.vllm.ai/2025/01/27/v1-alpha-release.html",
    "summary": "Lock-free async engine. Claims latency reductions; independent reproduction limited.",
    "candidate_bill": "Bill_1",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.75,
    "watchlist_tier": null,
    "notes": "M3 vendor-self-eval",
    "_appeared_in_sweeps": [
      "sweep_1301"
    ]
  },
  {
    "paper_id": "vllm-cost-trans-2024",
    "title": "vLLM open-source cost transparency",
    "authors": [
      "vLLM community"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-09-15",
    "venue": "vLLM docs",
    "url": "https://docs.vllm.ai",
    "summary": "Self-hosted vLLM cost depends on GPU rental. Per-token deeply transparent.",
    "candidate_bill": "Bill_2",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "B2 \u2014 open wins transparency",
    "_appeared_in_sweeps": [
      "sweep_1307"
    ]
  },
  {
    "paper_id": "vllm-fp8-2024-ampere-hopper",
    "title": "vLLM FP8 quantization support",
    "authors": [
      "vLLM team"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-06-15",
    "venue": "vLLM blog",
    "url": "https://blog.vllm.ai/2024/05/14/vllm-fp8.html",
    "summary": "Native FP8 on H100/H200. Claims <1% MMLU drop.",
    "candidate_bill": "Bill_8",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.75,
    "watchlist_tier": null,
    "notes": "Benchmark-only quantization fidelity; reasoning eval not in scope",
    "_appeared_in_sweeps": [
      "sweep_1301"
    ]
  },
  {
    "paper_id": "vllm-fp8-mmlu-blog-2024",
    "title": "Neural Magic FP8 MMLU on Llama 3 405B",
    "authors": [
      "Neural Magic"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-09-01",
    "venue": "Neural Magic blog",
    "url": "https://neuralmagic.com/blog/we-ran-over-half-a-million-evaluations-on-quantized-llms-heres-what-we-found/",
    "summary": "500K evals showing W8A8 retains 99% MMLU; INT4 ~96-97%.",
    "candidate_bill": "Bill_8",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "Vendor-supplied but extensive eval; B8 partially closed but reasoning gap remains",
    "_appeared_in_sweeps": [
      "sweep_1305"
    ]
  },
  {
    "paper_id": "vllm-h200-2024-update",
    "title": "vLLM H200 + FP8 Llama 3.1 405B blog",
    "authors": [
      "vLLM team + NVIDIA"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-10-22",
    "venue": "vLLM blog",
    "url": "https://blog.vllm.ai/2024/10/17/spec-decode.html",
    "summary": "Llama 3.1 405B FP8 on H200; documents throughput vs H100 only.",
    "candidate_bill": "Bill_1",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.75,
    "watchlist_tier": null,
    "notes": "Vendor-friendly; no cross-vendor matched comparison",
    "_appeared_in_sweeps": [
      "sweep_1301"
    ]
  },
  {
    "paper_id": "vllm-h200-vs-groq-llama3.3-2024",
    "title": "vLLM H200 Llama 3.3 70B vs Groq",
    "authors": [
      "Industry"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-12-15",
    "venue": "AA",
    "url": "https://artificialanalysis.ai",
    "summary": "Groq still 3-5x faster at single-stream; vLLM cheaper at scale.",
    "candidate_bill": "Bill_7",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "B7",
    "_appeared_in_sweeps": [
      "sweep_1307"
    ]
  },
  {
    "paper_id": "vllm-mlperf-vs-groq-mlperf-2024",
    "title": "MLPerf Inference v4.1 \u2014 vLLM submissions vs Groq",
    "authors": [
      "MLCommons"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-09-04",
    "venue": "MLPerf",
    "url": "https://mlcommons.org/benchmarks/inference-datacenter/",
    "summary": "Groq did not participate; vLLM submissions only via NVIDIA/AMD/AWS.",
    "candidate_bill": "Bill_9",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "B7/B9 \u2014 closed vendors avoid standardized benchmarks",
    "_appeared_in_sweeps": [
      "sweep_1307"
    ]
  },
  {
    "paper_id": "vllm-mooncake-2024",
    "title": "Mooncake: A KVCache-centric architecture",
    "authors": [
      "Ruoyu Qin",
      "Zheming Li",
      "Weiran He",
      "et al. (Moonshot)"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-06-24",
    "venue": "arXiv 2407.00079",
    "url": "https://arxiv.org/abs/2407.00079",
    "summary": "Disaggregated KV cache pooling. Production scale at Moonshot AI Kimi.",
    "candidate_bill": "Bill_6",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "Production paper; cross-framework portability claims unverified",
    "_appeared_in_sweeps": [
      "sweep_1301"
    ]
  },
  {
    "paper_id": "vllm-multilora-punica-2024",
    "title": "Punica: Multi-LoRA serving",
    "authors": [
      "Lequn Chen",
      "Zihao Ye",
      "et al."
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-03-12",
    "venue": "MLSys 2024",
    "url": "https://arxiv.org/abs/2310.18547",
    "summary": "Multi-tenant LoRA serving. Independent academic. Cross-vendor not measured.",
    "candidate_bill": "Bill_1",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "Peer-reviewed MLSys",
    "_appeared_in_sweeps": [
      "sweep_1301"
    ]
  },
  {
    "paper_id": "vllm-multinode-2024-disagg",
    "title": "vLLM disaggregated prefill (experimental)",
    "authors": [
      "vLLM team"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-02-15",
    "venue": "vLLM docs",
    "url": "https://docs.vllm.ai/en/latest/design/v1/prefix_caching.html",
    "summary": "Experimental disaggregated mode. No public throughput-at-matched-latency numbers.",
    "candidate_bill": "Bill_1",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.6,
    "watchlist_tier": null,
    "notes": "G2 \u2014 not enough public data",
    "_appeared_in_sweeps": [
      "sweep_1301"
    ]
  },
  {
    "paper_id": "vllm-spec-decode-2024",
    "title": "vLLM speculative decoding production support",
    "authors": [
      "vLLM team"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-10-17",
    "venue": "vLLM blog",
    "url": "https://blog.vllm.ai/2024/10/17/spec-decode.html",
    "summary": "Production EAGLE + ngram + draft model speculative. Up to 1.5x speedup.",
    "candidate_bill": "Bill_6",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "Vendor-supplied; mechanism is mathematically lossless",
    "_appeared_in_sweeps": [
      "sweep_1306"
    ]
  },
  {
    "paper_id": "vllm-v1-2025-engine-rewrite",
    "title": "vLLM v1: Major engine rewrite",
    "authors": [
      "vLLM team"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-01-27",
    "venue": "vLLM blog",
    "url": "https://blog.vllm.ai/2025/01/27/v1-alpha-release.html",
    "summary": "Async scheduler, persistent batching. Claims 1.7x speedup over v0.6.",
    "candidate_bill": "Bill_1",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "Self-reported; independent reproduction limited at release",
    "_appeared_in_sweeps": [
      "sweep_1301"
    ]
  },
  {
    "paper_id": "vllm-vs-groq-aa-llama3-2024",
    "title": "ArtificialAnalysis vLLM-providers vs Groq head-to-head",
    "authors": [
      "ArtificialAnalysis.ai"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-08-15",
    "venue": "ArtificialAnalysis.ai",
    "url": "https://artificialanalysis.ai/models/llama-3-3-instruct-70b/providers",
    "summary": "Groq significantly faster at low batch; vLLM-providers more stable at high concurrency.",
    "candidate_bill": "Bill_7",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.9,
    "watchlist_tier": null,
    "notes": "B7 \u2014 direct strong-baseline comparison; closed wins on latency, open wins on stability",
    "_appeared_in_sweeps": [
      "sweep_1307"
    ]
  },
  {
    "paper_id": "vllm-vs-sglang-2024-llm-d-blog",
    "title": "LLM-d: K8s inference scheduling layer",
    "authors": [
      "Red Hat, Google, IBM, NVIDIA"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2025-05-01",
    "venue": "llm-d.ai launch",
    "url": "https://llm-d.ai",
    "summary": "Cross-framework K8s scheduling. Cross-vendor matched-workload measurements forthcoming.",
    "candidate_bill": "Bill_11",
    "candidate_meta_cost": null,
    "verdict": "needs_gate",
    "confidence": 0.6,
    "watchlist_tier": null,
    "notes": "Brand-new project; promises cross-framework but no data yet",
    "_appeared_in_sweeps": [
      "sweep_1301"
    ]
  },
  {
    "paper_id": "vllm-vs-sglang-h100-2024",
    "title": "vLLM v0.6 vs SGLang on H100 Llama 3 70B",
    "authors": [
      "LMSYS"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-09-15",
    "venue": "LMSYS blog",
    "url": "https://lmsys.org/blog/2024-07-25-sglang-llama3/",
    "summary": "SGLang faster on structured workloads; vLLM faster on simple workloads.",
    "candidate_bill": "Bill_5",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.8,
    "watchlist_tier": null,
    "notes": "B5 \u2014 within open-stack variance",
    "_appeared_in_sweeps": [
      "sweep_1307"
    ]
  },
  {
    "paper_id": "vllm-vs-tensorrt-llm-h200-2024",
    "title": "vLLM v0.6 vs TensorRT-LLM on H200",
    "authors": [
      "BentoML"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-10-15",
    "venue": "BentoML blog",
    "url": "https://www.bentoml.com/blog/benchmarking-llm-inference-backends",
    "summary": "TRT-LLM 1.2-1.5x faster than vLLM at low concurrency. Reversed at high.",
    "candidate_bill": "Bill_5",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "B5 \u2014 workload-dependent",
    "_appeared_in_sweeps": [
      "sweep_1307"
    ]
  },
  {
    "paper_id": "vllm-vs-tgi-mixtral-2024",
    "title": "vLLM vs TGI Mixtral 8x7B on H100",
    "authors": [
      "BentoML"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-06-15",
    "venue": "BentoML blog",
    "url": "https://www.bentoml.com/blog/benchmarking-llm-inference-backends",
    "summary": "Roughly equivalent at high concurrency; vLLM slightly better TTFT.",
    "candidate_bill": "Bill_5",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.8,
    "watchlist_tier": null,
    "notes": "B5",
    "_appeared_in_sweeps": [
      "sweep_1307"
    ]
  },
  {
    "paper_id": "wormhole-llm-tt-metal-2024",
    "title": "Tenstorrent Metalium LLM kernels",
    "authors": [
      "Tenstorrent"
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-11-15",
    "venue": "Tenstorrent docs + GitHub",
    "url": "https://github.com/tenstorrent/tt-metal",
    "summary": "Open kernels for Wormhole; Llama 3 70B preview. No matched cross-vendor data.",
    "candidate_bill": "Bill_7",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.7,
    "watchlist_tier": null,
    "notes": "Open source but immature stack; matched comparison missing",
    "_appeared_in_sweeps": [
      "sweep_1302"
    ]
  },
  {
    "paper_id": "yoco-kvcache-2024",
    "title": "YOCO: You Only Cache Once",
    "authors": [
      "Yutao Sun",
      "Li Dong",
      "Yi Zhu",
      "et al."
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-05-15",
    "venue": "arXiv 2405.05254",
    "url": "https://arxiv.org/abs/2405.05254",
    "summary": "Cross-decoder KV-cache reuse. 6.4x throughput.",
    "candidate_bill": "Bill_6",
    "candidate_meta_cost": null,
    "verdict": "known_bill",
    "confidence": 0.85,
    "watchlist_tier": null,
    "notes": "Microsoft research",
    "_appeared_in_sweeps": [
      "sweep_1306"
    ]
  },
  {
    "paper_id": "zheng-2024-sglang-arxiv",
    "title": "SGLang: Efficient Execution of Structured Language Model Programs",
    "authors": [
      "Lianmin Zheng",
      "Liangsheng Yin",
      "Zhiqiang Xie",
      "et al."
    ],
    "affiliations": [],
    "country_region": null,
    "date": "2024-12-05",
    "venue": "NeurIPS 2024",
    "url": "https://arxiv.org/abs/2312.07104",
    "summary": "RadixAttention and structured generation. Claims 6.4x throughput vs Guidance baseline.",
    "candidate_bill": "Bill_1",
    "candidate_meta_cost": null,
    "verdict": "rebuttal_paper",
    "confidence": 0.9,
    "watchlist_tier": null,
    "notes": "Strong baseline disclosure; cross-vendor variance not directly measured",
    "_appeared_in_sweeps": [
      "sweep_1301"
    ]
  }
]