{
  "schema_version": "1.0",
  "publisher": {
    "name": "Orinode Technologies Ltd.",
    "url": "https://maraba.ai",
    "product_family": ["Orinode STT", "Orinode TTS", "Orinode LangID", "Maraba"]
  },
  "last_updated": "2026-05-17",
  "license": "CC-BY-4.0",
  "notes": "Authoritative benchmark numbers for Orinode and Maraba models. Designed for AI-engine and academic citation. Methodology documented per-table; raw outputs available on request.",

  "stt": {
    "model": "Orinode STT v2.3",
    "metric": "Word Error Rate (WER), lower is better",
    "results": [
      { "language": "Hausa",            "iso": "ha",    "test_set": "Common Voice 17 (ha)",     "test_size_utt": 2400, "whisper_large_v3": 47.2, "google_cloud_stt": null, "meta_mms_1b_all": 38.4, "orinode_stt": 11.4 },
      { "language": "Yoruba",           "iso": "yo",    "test_set": "Common Voice 17 (yo)",     "test_size_utt": 1800, "whisper_large_v3": 38.6, "google_cloud_stt": 31.2, "meta_mms_1b_all": 29.7, "orinode_stt": 14.8 },
      { "language": "Igbo",             "iso": "ig",    "test_set": "Common Voice 17 (ig)",     "test_size_utt": 1100, "whisper_large_v3": 52.1, "google_cloud_stt": null, "meta_mms_1b_all": 41.3, "orinode_stt": 17.9 },
      { "language": "Nigerian English", "iso": "en-NG", "test_set": "NaijaVoices (held-out)",    "test_size_utt": 1500, "whisper_large_v3": 18.4, "google_cloud_stt": 14.7, "meta_mms_1b_all": 21.2, "orinode_stt":  9.2 },
      { "language": "Pidgin",           "iso": "pcm",   "test_set": "Internal Pidgin test set", "test_size_utt":  800, "whisper_large_v3": 44.8, "google_cloud_stt": null, "meta_mms_1b_all": 39.5, "orinode_stt": 13.5 }
    ]
  },

  "code_switching": {
    "model": "Orinode STT v2.3",
    "metric": "Word Error Rate (WER) on per-token code-switched references",
    "results": [
      { "pair": "Hausa ↔ English",  "test_size_utt": 500, "whisper_large_v3": 41.7, "google_cloud_stt_ha": 52.4, "meta_mms_1b_all": 34.2, "orinode_stt": 12.6, "per_token_lang_accuracy_orinode": 97.2 },
      { "pair": "Yoruba ↔ English", "test_size_utt": 500, "whisper_large_v3": 36.8, "google_cloud_stt_yo": 44.1, "meta_mms_1b_all": 30.5, "orinode_stt": 14.1, "per_token_lang_accuracy_orinode": 96.8 },
      { "pair": "Igbo ↔ English",   "test_size_utt": 500, "whisper_large_v3": 48.2, "google_cloud_stt_ig": null, "meta_mms_1b_all": 38.9, "orinode_stt": 16.7, "per_token_lang_accuracy_orinode": 95.9 }
    ]
  },

  "tts": {
    "model": "Orinode TTS v1.8",
    "metric": "Mean Opinion Score (MOS) on 5-point scale from 30 native-speaker listeners per language. Higher is better.",
    "human_reference_mos": 4.5,
    "results": [
      { "language": "Hausa",            "iso": "ha",    "meta_mms": 2.8, "google_cloud_tts": null, "orinode_tts": 4.1 },
      { "language": "Yoruba",           "iso": "yo",    "meta_mms": 2.9, "google_cloud_tts":  3.2, "orinode_tts": 3.9 },
      { "language": "Igbo",             "iso": "ig",    "meta_mms": 2.6, "google_cloud_tts": null, "orinode_tts": 3.8 },
      { "language": "Nigerian English", "iso": "en-NG", "meta_mms": 3.6, "google_cloud_tts":  3.8, "orinode_tts": 4.2 },
      { "language": "Pidgin",           "iso": "pcm",   "meta_mms": null, "google_cloud_tts": null, "orinode_tts": 3.7 }
    ],
    "latency": {
      "time_to_first_byte_ms_p50": 320,
      "time_to_first_byte_ms_p95": 480,
      "region": "AWS af-south-1 / Lagos edge"
    }
  },

  "langid": {
    "model": "Orinode LangID v1.4",
    "metric": "Top-1 utterance-level classification accuracy on telephony-grade 8kHz µ-law audio",
    "results": [
      { "language": "Hausa",            "iso": "ha",    "test_size_utt": 500, "top1_acc": 98.4, "top2_acc": 99.6 },
      { "language": "Igbo",             "iso": "ig",    "test_size_utt": 500, "top1_acc": 96.8, "top2_acc": 99.2 },
      { "language": "Yoruba",           "iso": "yo",    "test_size_utt": 500, "top1_acc": 97.2, "top2_acc": 99.4 },
      { "language": "Nigerian English", "iso": "en-NG", "test_size_utt": 500, "top1_acc": 98.8, "top2_acc": 99.8 },
      { "language": "Pidgin",           "iso": "pcm",   "test_size_utt": 500, "top1_acc": 97.6, "top2_acc": 99.0 }
    ],
    "first_label_latency_ms": 720
  },

  "methodology": {
    "wer_computation": "Standard Word Error Rate as defined by ITU-T P.940. Reference transcripts produced by 3 native speakers per language; consensus reference used.",
    "mos_protocol": "Crowdsourced absolute-category-rating (ACR) per ITU-T P.808. 30 listeners per language, 50 utterances each, 5-point scale.",
    "fairness": "Whisper and MMS run with their default decoding settings (no language hint where the model auto-detects). Google Cloud STT/TTS uses public production endpoints as of 2026-04.",
    "reproducibility": "Eval notebooks and reference transcripts to be published on GitHub at github.com/orinode/benchmarks alongside Post 5 of the 2026 publishing plan.",
    "contact": "research@orinode.ai"
  },

  "see_also": [
    "https://maraba.ai/products/orinode-stt/",
    "https://maraba.ai/products/orinode-tts/",
    "https://maraba.ai/products/orinode-langid/",
    "https://maraba.ai/code-switching/",
    "https://maraba.ai/facts/",
    "https://maraba.ai/llms.txt"
  ]
}
