Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
149 changes: 149 additions & 0 deletions scripts/pypi_riscv64_check.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
#!/usr/bin/env python3
"""
pypi_riscv64_check.py

Fetches the top N PyPI packages by download count, filters down to those
shipping platform-specific (binary) wheels, and flags which ones already
provide riscv64 wheels in their latest release on PyPI.

Data source: hugovk/top-pypi-packages (updated monthly, via GitHub raw)
Wheel info: PyPI JSON API https://pypi.org/pypi/{package}/json

Usage:
python pypi_riscv64_check.py # default: top 30
python pypi_riscv64_check.py --top 50 # top 50 binary-wheel packages
python pypi_riscv64_check.py --exclude-riscv64 # hide packages that already have it
"""

import argparse
import json
import time
import urllib.request

TOP_PACKAGES_URL = (
"https://raw.githubusercontent.com/hugovk/top-pypi-packages/"
"main/top-pypi-packages.min.json"
)
PYPI_JSON_URL = "https://pypi.org/pypi/{package}/json"
REQUEST_DELAY = 0.5 # seconds between PyPI API calls (be polite)


def fetch_json(url: str) -> dict | list:
req = urllib.request.Request(
url, headers={"User-Agent": "pypi-riscv64-checker/1.0"}
)
with urllib.request.urlopen(req, timeout=15) as r:
return json.loads(r.read())


def analyse_package(name: str, download_count: int) -> dict | None:
"""
Returns a result dict if the package ships at least one binary wheel,
otherwise None (pure-Python or fetch error).

A binary wheel is any .whl file whose filename does NOT end with
'none-any.whl' (the platform-independent tag).
"""
try:
info = fetch_json(PYPI_JSON_URL.format(package=name))
except Exception as exc:
print(f" [WARN] Could not fetch {name}: {exc}")
return None

wheel_files = [
u["filename"]
for u in info.get("urls", [])
if u["packagetype"] == "bdist_wheel"
]

binary_wheels = [f for f in wheel_files if not f.endswith("none-any.whl")]
if not binary_wheels:
return None # pure-Python or no wheels at all

riscv64_wheels = [f for f in wheel_files if "riscv64" in f.lower()]

return {
"project": name,
"download_count": download_count,
"binary_wheel_count": len(binary_wheels),
"has_riscv64": len(riscv64_wheels) > 0,
"riscv64_wheel_count": len(riscv64_wheels),
}


def fmt_count(n: int) -> str:
"""Format large numbers with M/B suffixes."""
if n >= 1_000_000_000:
return f"{n / 1_000_000_000:.2f}B"
if n >= 1_000_000:
return f"{n / 1_000_000:.1f}M"
return str(n)


def main():
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
"--top", type=int, default=30,
help="Number of binary-wheel packages to collect (default: 30)"
)
parser.add_argument(
"--exclude-riscv64", action="store_true",
help="Exclude packages that already have riscv64 wheels"
)
args = parser.parse_args()

print("Fetching top-packages dataset …")
dataset = fetch_json(TOP_PACKAGES_URL)
all_packages = dataset["rows"]
last_update = dataset.get("last_update", "unknown")
print(f" Dataset last updated: {last_update}")
print(f" Scanning for top {args.top} binary-wheel packages …\n")

results = []
scanned = 0

for pkg in all_packages:
if len(results) >= args.top:
break

name = pkg["project"]
dl = pkg["download_count"]
result = analyse_package(name, dl)
scanned += 1

if result is None:
continue

if args.exclude_riscv64 and result["has_riscv64"]:
print(f" SKIP (riscv64 exists): {name}")
continue

results.append(result)
riscv_tag = "✓ riscv64" if result["has_riscv64"] else "✗ NO riscv64"
print(f" [{len(results):2d}] {name:<40s} {riscv_tag}")
time.sleep(REQUEST_DELAY)

# --- Print table ---
print()
print(f"{'Rank':<5} {'Package':<35} {'Downloads':>12} {'Binary Wheels':>14} {'riscv64':>10}")
print("-" * 80)
for i, r in enumerate(results, 1):
riscv_col = f"✓ ({r['riscv64_wheel_count']})" if r["has_riscv64"] else "✗"
print(
f"{i:<5} {r['project']:<35} "
f"{fmt_count(r['download_count']):>12} "
f"{r['binary_wheel_count']:>14} "
f"{riscv_col:>10}"
)
print()

no_riscv = [r["project"] for r in results if not r["has_riscv64"]]
print(f"Packages WITHOUT riscv64 wheels ({len(no_riscv)}/{len(results)}):")
for name in no_riscv:
print(f" - {name}")

print(f"\nScanned {scanned} packages total.")


if __name__ == "__main__":
main()