{
  "schema_version": "frozen-retrieval/v1",
  "artifact_filename": "frozen_retrieval_topK_500q.v1.jsonl",
  "artifact_sha256": "b308568994dd58608e46bbce8945330de74b8f9defc8fd06411980a99604f9e8",
  "artifact_size_bytes": 5943379,
  "build_utc": "2026-04-25T11:59:17Z",
  "benchmark": {
    "name": "LongMemEval-S",
    "n_questions": 500,
    "split": "standard (xiaowu0162/longmemeval / longmemeval_s)",
    "qtype_counts": {
      "single-session-user": 70,
      "multi-session": 133,
      "single-session-preference": 30,
      "temporal-reasoning": 133,
      "knowledge-update": 78,
      "single-session-assistant": 56
    }
  },
  "retrieval_contract": {
    "id": "warrant.hybrid.v1",
    "description": "BGE-large-en-v1.5 + QNDN v0 + BM25 -> RRF k=60 fusion -> MixK rerank -> top-K chunks delivered to reader. K_published=10.",
    "encoders": [
      "BGE-large-en-v1.5",
      "QNDN v0",
      "BM25"
    ],
    "fusion": "RRF k=60",
    "rerank": "MixK",
    "k_published_per_question": 10,
    "chunking": {
      "unit": "session-turn",
      "target_chars": 700,
      "hard_char_cap": 1600,
      "max_chunks_per_question": 1500,
      "header_format": "[Session -- {date}]"
    }
  },
  "retrieval_metrics": {
    "n_with_gold": 500,
    "R_at_1": 0.878,
    "R_at_5": 0.962,
    "R_at_10": 0.98,
    "hit_counts": {
      "at_1": 439,
      "at_5": 481,
      "at_10": 490
    }
  },
  "chunk_text_stats": {
    "n_chunks_total": 5000,
    "mean_chars": 979,
    "p50_chars": 452,
    "p95_chars": 2819,
    "p99_chars": 3361,
    "max_chars": 4325,
    "min_chars": 66
  },
  "row_schema": {
    "qid": "string -- LongMemEval question_id",
    "qtype": "string -- single-session-user / multi-session / temporal-reasoning / knowledge-update / single-session-preference / single-session-assistant",
    "question": "string -- the question shown to the reader",
    "gold": "string|number -- gold answer key",
    "question_date": "string -- LME-S 'YYYY/MM/DD (DDD) HH:MM' question timestamp",
    "answer_session_ids": "array<string> -- sorted gold session ids",
    "retrieval_contract_id": "string -- pin to retrieval contract identifier",
    "n_chunks_in_pool": "int -- candidate pool size before top-K selection",
    "mixk_weight": "float -- MixK rerank weight at retrieval time",
    "top_k": "array<object> -- ordered top-10 chunks delivered to the reader. Each object: rank, score, scores{bge,qndn,bm25,mixk}, session_id, chunk_text.",
    "retrieval_hit": "object -- {at_1, at_5, at_10} bool flags vs answer_session_ids",
    "top1_cos": "float -- top-1 BGE cosine score"
  },
  "reader_contract": {
    "input_to_reader": "All 10 chunk_text strings, in rank order, joined by a blank line, with rank-1 first. Reader must cite chunks by 1-indexed rank.",
    "prompt_template": "warrant-leaderboard/artifacts/benchmark_prompt.v1.md",
    "submission_schema": "warrant-leaderboard/artifacts/submission_schema.json",
    "judge": "GPT-4o, K=5 seeds, 3-of-5 majority vote (canonical leaderboard judge)."
  },
  "license": "LongMemEval license applies to question and haystack content. Retrieval results, chunking, and contract metadata are released under MIT.",
  "source_data": {
    "lme_s_dataset": "xiaowu0162/longmemeval (Hugging Face) -- file: longmemeval_s",
    "retrieval_run": "phase91_a50 / gemma_stack_answers.jsonl (Apr 2026 internal run)"
  },
  "reproducibility_notes": [
    "chunk_text was reconstructed deterministically from the LongMemEval-S standard split using the verbatim chunk_haystack_turns(target_chars=700, hard_char_cap=1600) function from the harness; chunks were matched back to retrieval-time previews by (session_id, longest-prefix-match).",
    "Top-K ordering, scores, and session ids are taken verbatim from the canonical retrieval run; they are byte-identical to the cwfix run's top_k_preview.",
    "If you re-run retrieval against this corpus you should reproduce the same top_k_session_ids and within-1e-3 cosine scores; minor float drift is acceptable."
  ]
}