{
  "@context": "https://schema.org",
  "@type": "TechArticle",
  "name": "Voidly Global Censorship Index - Methodology",
  "url": "https://voidly.ai/methodology",
  "datePublished": "2024-01-01",
  "dateModified": "2026-06-25T04:29:10.921Z",
  "author": {
    "@type": "Organization",
    "name": "Voidly Research"
  },
  "methodology": {
    "overview": "Composite censorship score from Voidly Network correlated with OONI, IODA, and CensoredPlanet. Domain categorization from Citizen Lab test lists.",
    "dataSources": {
      "ooni": {
        "name": "OONI (Open Observatory of Network Interference)",
        "coverage": "130 countries",
        "tests": [
          "web_connectivity",
          "telegram",
          "whatsapp",
          "signal",
          "tor",
          "http_invalid_request_line",
          "http_header_field_manipulation"
        ],
        "updateFrequency": "Every 6 hours"
      },
      "ioda": {
        "name": "IODA (Internet Outage Detection and Analysis)",
        "type": "BGP monitoring and outage detection",
        "updateFrequency": "Every 6 hours"
      },
      "censoredplanet": {
        "name": "CensoredPlanet (University of Michigan)",
        "type": "Remote DNS/HTTP blocking detection",
        "coverage": "50 countries",
        "updateFrequency": "Every 6 hours"
      },
      "citizenlab": {
        "name": "Citizen Lab",
        "type": "Domain categorization (14K+ domains classified)",
        "updateFrequency": "Weekly enrichment"
      },
      "voidlyProbes": {
        "name": "Voidly Network",
        "nodes": "30+",
        "domains": 80,
        "interval": "Every 5 minutes"
      }
    },
    "model": {
      "classifier": {
        "type": "GradientBoosting (sklearn)",
        "f1": 0.87,
        "aucRoc": 0.953,
        "validationNote": "v3.3 LOCO median F1 0.87 across 127 countries (honest leave-country-out). Trained on 4,237 labeled samples. v2 99.8% F1 retired 2026-05-21 (country-tier leakage)."
      },
      "forecast": {
        "type": "XGBoost",
        "aucRoc": 0.905,
        "recall": 0.355,
        "validationNote": "Internal eval on real ground truth incidents"
      },
      "features": 5,
      "featureImportance": [
        {
          "feature": "country_risk_tier",
          "importance": 0.85
        },
        {
          "feature": "anomaly_rate",
          "importance": 0.06
        },
        {
          "feature": "measurement_count",
          "importance": 0.04
        },
        {
          "feature": "source_diversity",
          "importance": 0.03
        },
        {
          "feature": "temporal_pattern",
          "importance": 0.02
        }
      ],
      "training": "Weekly retraining (Sundays 02:00 UTC)"
    },
    "scoring": {
      "scale": "0-100 (0 = free, 100 = total censorship)",
      "calculation": "block_rate * 100",
      "levels": [
        {
          "range": "0-10",
          "level": "free",
          "description": "Minimal or no censorship"
        },
        {
          "range": "11-25",
          "level": "low",
          "description": "Limited content restrictions"
        },
        {
          "range": "26-45",
          "level": "medium",
          "description": "Significant restrictions on some platforms"
        },
        {
          "range": "46-70",
          "level": "high",
          "description": "Widespread blocking of platforms and news"
        },
        {
          "range": "71-100",
          "level": "severe",
          "description": "Pervasive censorship / isolated internet"
        }
      ]
    },
    "limitations": [
      "Scores are national averages; regional variations not captured",
      "Sample sizes vary by country, affecting confidence levels",
      "Real-time events may take up to 24h to reflect in scores",
      "Content filtering and throttling harder to detect than blocking",
      "Self-censorship and legal restrictions not measured",
      "ML metrics based on internal evaluation; independent audit pending"
    ],
    "updatePipeline": {
      "ingestion": "Every 5 minutes (probes), every 6 hours (OONI, IODA, CensoredPlanet)",
      "classification": "Weekly model retraining",
      "scoreLatency": "~6 hours for aggregated sources, ~5 minutes for probe data"
    }
  },
  "citation": {
    "apa": "Voidly Research. (2026). Global Censorship Index [Data set]. https://voidly.ai/censorship-index",
    "bibtex": "@misc{voidly2026censorship,\n  title={Global Censorship Index},\n  author={{Voidly Research}},\n  year={2026},\n  url={https://voidly.ai/censorship-index},\n  note={Data set. Updated continuously.}\n}"
  },
  "license": "CC BY 4.0",
  "contact": "research@voidly.ai"
}