Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
277 changes: 205 additions & 72 deletions Cachyos/Scripts/WIP/gh/git-fetch.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@
import os
import queue
import sys
import tempfile
import unittest
from unittest import mock
import urllib.parse
import urllib.request
import urllib.error
Expand Down Expand Up @@ -92,6 +95,205 @@
raise urllib.error.URLError(f"{e} (url: {url})") from e


class _FakeResponse:
"""Test helper: minimal fake HTTPResponse for process_single_download tests."""

def __init__(
self,
status: int,
body: bytes = b"",
headers: dict[str, str] | None = None,
chunks: list[bytes] | None = None,
) -> None:
self.status = status
self._headers = {k.lower(): v for k, v in (headers or {}).items()}
if chunks is not None:
self._chunks: list[bytes] = list(chunks)
else:
self._chunks = [body] if body else [b""]

def getheader(self, name: str, default: str | None = None) -> str | None:
return self._headers.get(name.lower(), default)

def read(self, _: int = -1) -> bytes:
if not self._chunks:
return b""
return self._chunks.pop(0)


class _FakeConnection:
"""Test helper: minimal fake HTTPSConnection for process_single_download tests."""

def __init__(self, responses: list[_FakeResponse]) -> None:
self._responses: list[_FakeResponse] = list(responses)
self.request_calls: list[tuple[str, str, dict[str, str] | None]] = []
self.closed = False

def request(self, method: str, url: str, headers: dict[str, str] | None = None) -> None:
self.request_calls.append((method, url, headers))

def getresponse(self) -> _FakeResponse:
return self._responses.pop(0)

def close(self) -> None:
self.closed = True


class ProcessSingleDownloadTests(unittest.TestCase):
"""Unit tests for process_single_download retry and connection handling."""

def test_200_writes_file(self) -> None:
data = b"hello world"
resp = _FakeResponse(200, chunks=[data, b""])
conn = _FakeConnection([resp])
with tempfile.TemporaryDirectory() as tmpdir:
local_path = Path(tmpdir) / "file.txt"
result_conn = process_single_download(
conn=conn,

Check warning on line 152 in Cachyos/Scripts/WIP/gh/git-fetch.py

View check run for this annotation

Codacy Production / Codacy Static Code Analysis

Cachyos/Scripts/WIP/gh/git-fetch.py#L152

Argument "conn" to "process_single_download" has incompatible type "_FakeConnection"; expected "HTTPSConnection". (arg-type)
host="example.com",
url_path="/path",
local_path=local_path,
display_path="file.txt",
headers={},
)
self.assertTrue(local_path.exists())
self.assertEqual(local_path.read_bytes(), data)
# Connection should be reused for 200 without Connection: close

Check notice on line 161 in Cachyos/Scripts/WIP/gh/git-fetch.py

View check run for this annotation

Codacy Production / Codacy Static Code Analysis

Cachyos/Scripts/WIP/gh/git-fetch.py#L161

indentation is not a multiple of 4 (comment) (E114)
self.assertIs(result_conn, conn)

def test_5xx_and_429_retries_then_succeeds(self) -> None:
# First 500, then 429, then 200 with body.
resp1 = _FakeResponse(500, chunks=[b"", b""])
resp2 = _FakeResponse(429, chunks=[b"", b""])
data = b"ok"
resp3 = _FakeResponse(200, chunks=[data, b""])
conn = _FakeConnection([resp1, resp2, resp3])
with tempfile.TemporaryDirectory() as tmpdir:
local_path = Path(tmpdir) / "file.txt"
result_conn = process_single_download(
conn=conn,
host="example.com",
url_path="/retry",
local_path=local_path,
display_path="file.txt",
headers={},
)
self.assertTrue(local_path.exists())
self.assertEqual(local_path.read_bytes(), data)
# Should have issued three requests due to retries on 500 and 429.

Check notice on line 183 in Cachyos/Scripts/WIP/gh/git-fetch.py

View check run for this annotation

Codacy Production / Codacy Static Code Analysis

Cachyos/Scripts/WIP/gh/git-fetch.py#L183

indentation is not a multiple of 4 (comment) (E114)
self.assertEqual(len(conn.request_calls), 3)
self.assertIs(result_conn, conn)

def test_connection_close_triggers_new_httpsconnection(self) -> None:
data = b"data"
resp = _FakeResponse(
200,
chunks=[data, b""],
headers={"Connection": "close"},
)
initial_conn = _FakeConnection([resp])
with tempfile.TemporaryDirectory() as tmpdir:
local_path = Path(tmpdir) / "file.txt"
with mock.patch("http.client.HTTPSConnection") as MockHTTPSConnection:
new_conn_instance = mock.Mock(spec=http.client.HTTPSConnection)
MockHTTPSConnection.return_value = new_conn_instance
result_conn = process_single_download(
conn=initial_conn,

Check warning on line 201 in Cachyos/Scripts/WIP/gh/git-fetch.py

View check run for this annotation

Codacy Production / Codacy Static Code Analysis

Cachyos/Scripts/WIP/gh/git-fetch.py#L201

Argument "conn" to "process_single_download" has incompatible type "_FakeConnection"; expected "HTTPSConnection". (arg-type)
host="example.com",
url_path="/close",
local_path=local_path,
display_path="file.txt",
headers={},
)
# File should be written correctly.
self.assertTrue(local_path.exists())
self.assertEqual(local_path.read_bytes(), data)
# Initial connection should be closed, and a new HTTPSConnection created.
self.assertTrue(initial_conn.closed)
MockHTTPSConnection.assert_called_once_with("example.com", timeout=30)
self.assertIs(result_conn, new_conn_instance)


def process_single_download(

Check warning on line 217 in Cachyos/Scripts/WIP/gh/git-fetch.py

View check run for this annotation

Codacy Production / Codacy Static Code Analysis

Cachyos/Scripts/WIP/gh/git-fetch.py#L217

Method process_single_download has 62 lines of code (limit is 50)

Check warning on line 217 in Cachyos/Scripts/WIP/gh/git-fetch.py

View check run for this annotation

Codacy Production / Codacy Static Code Analysis

Cachyos/Scripts/WIP/gh/git-fetch.py#L217

Method process_single_download has a cyclomatic complexity of 16 (limit is 8)

Check warning on line 217 in Cachyos/Scripts/WIP/gh/git-fetch.py

View check run for this annotation

Codacy Production / Codacy Static Code Analysis

Cachyos/Scripts/WIP/gh/git-fetch.py#L217

process_single_download is too complex (16) (MC0001)
conn: http.client.HTTPSConnection,
host: str,
url_path: str,
local_path: Path,
display_path: str,
headers: dict[str, str],
) -> http.client.HTTPSConnection:
"""Process a single file download with retries."""
Comment thread
Ven0m0 marked this conversation as resolved.
retries = 3
while retries > 0:
try:
conn.request("GET", url_path, headers=headers)
resp = conn.getresponse()

# Check if the server wants to close the connection

Check notice on line 232 in Cachyos/Scripts/WIP/gh/git-fetch.py

View check run for this annotation

Codacy Production / Codacy Static Code Analysis

Cachyos/Scripts/WIP/gh/git-fetch.py#L232

indentation is not a multiple of 4 (comment) (E114)
connection_header = resp.getheader("Connection", "").lower()
should_close = connection_header == "close"

if resp.status == 200:

Check notice on line 236 in Cachyos/Scripts/WIP/gh/git-fetch.py

View check run for this annotation

Codacy Production / Codacy Static Code Analysis

Cachyos/Scripts/WIP/gh/git-fetch.py#L236

Unnecessary "elif" after "break", remove the leading "el" from "elif"
# Create parent directory to avoid FileNotFoundError
local_path.parent.mkdir(parents=True, exist_ok=True)
with open(local_path, "wb") as f:
while True:
chunk = resp.read(65536)
if not chunk:
break
f.write(chunk)
print(f"✓ {display_path}")

if should_close:
conn.close()
conn = http.client.HTTPSConnection(host, timeout=30)

break
elif resp.status in (301, 302, 307, 308):
loc = resp.getheader("Location")
resp.read() # Consume body
if loc:
print(f"✗ {display_path}: Redirect to {loc} not handled in persistent mode")
else:
print(f"✗ {display_path}: HTTP {resp.status}")

if should_close:
conn.close()
conn = http.client.HTTPSConnection(host, timeout=30)
break
else:
print(f"✗ {display_path}: HTTP {resp.status}")
resp.read() # Consume body
if should_close:
conn.close()
conn = http.client.HTTPSConnection(host, timeout=30)

# Non-retriable client errors: fail fast
if resp.status in (401, 403, 404):
break

# Retry on transient server errors and rate limiting
if 500 <= resp.status < 600 or resp.status == 429:
retries -= 1
if retries > 0:
continue
# Out of retries, give up

Check notice on line 280 in Cachyos/Scripts/WIP/gh/git-fetch.py

View check run for this annotation

Codacy Production / Codacy Static Code Analysis

Cachyos/Scripts/WIP/gh/git-fetch.py#L280

indentation is not a multiple of 4 (comment) (E114)
break

# Default: treat other statuses as non-retriable
break
except (http.client.HTTPException, OSError) as e:
# Connection might have been closed by server unexpectedly

Check notice on line 286 in Cachyos/Scripts/WIP/gh/git-fetch.py

View check run for this annotation

Codacy Production / Codacy Static Code Analysis

Cachyos/Scripts/WIP/gh/git-fetch.py#L286

indentation is not a multiple of 4 (comment) (E114)
conn.close()
retries -= 1
if retries > 0:
conn = http.client.HTTPSConnection(host, timeout=30)
else:
print(f"✗ {display_path}: {e}")

return conn
Comment on lines +217 to +294
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

While this refactoring is a good step in reducing complexity in download_worker, the extracted process_single_download function itself has some maintainability issues that could be improved:

  1. Magic Number: The timeout value 30 is hardcoded in multiple places (here and elsewhere in the file). This should be defined as a module-level constant, e.g., HTTP_TIMEOUT = 30, for better maintainability.

  2. Duplicated Logic (DRY violation): The logic for closing and reopening a connection when should_close is true is repeated three times within this function.

    if should_close:
      conn.close()
      conn = http.client.HTTPSConnection(host, timeout=30)

    This could be extracted into a small, private helper function (e.g., _reconnect(conn, host)) to reduce code duplication and make the intent clearer.



def download_worker(host: str, file_q: queue.Queue, headers: dict[str, str]) -> None:
"""Worker thread: process files from queue using a persistent connection."""
# Make a per-thread copy so we don't mutate a shared headers dict.
Expand All @@ -111,78 +313,9 @@

try:
url_path, local_path, display_path = item

# Process download
retries = 3
while retries > 0:
try:
conn.request("GET", url_path, headers=headers)
resp = conn.getresponse()

# Check if the server wants to close the connection
connection_header = resp.getheader("Connection", "").lower()
should_close = connection_header == "close"

if resp.status == 200:
# Create parent directory to avoid FileNotFoundError
local_path.parent.mkdir(parents=True, exist_ok=True)
with open(local_path, "wb") as f:
while True:
chunk = resp.read(65536)
if not chunk:
break
f.write(chunk)
print(f"✓ {display_path}")

if should_close:
conn.close()
# Reconnect for the next file in the queue
conn = http.client.HTTPSConnection(host, timeout=30)

break
elif resp.status in (301, 302, 307, 308):
loc = resp.getheader("Location")
resp.read() # Consume body
if loc:
print(
f"✗ {display_path}: Redirect to {loc} not handled in persistent mode"
)
else:
print(f"✗ {display_path}: HTTP {resp.status}")

if should_close:
conn.close()
conn = http.client.HTTPSConnection(host, timeout=30)
break
else:
print(f"✗ {display_path}: HTTP {resp.status}")
resp.read() # Consume body
if should_close:
conn.close()
conn = http.client.HTTPSConnection(host, timeout=30)

# Non-retriable client errors: fail fast
if resp.status in (401, 403, 404):
break

# Retry on transient server errors and rate limiting
if 500 <= resp.status < 600 or resp.status == 429:
retries -= 1
if retries > 0:
continue
# Out of retries, give up
break

# Default: treat other statuses as non-retriable
break
except (http.client.HTTPException, OSError) as e:
# Connection might have been closed by server unexpectedly
conn.close()
retries -= 1
if retries > 0:
conn = http.client.HTTPSConnection(host, timeout=30)
else:
print(f"✗ {display_path}: {e}")
conn = process_single_download(
conn, host, url_path, local_path, display_path, headers
)
finally:
file_q.task_done()
finally:
Expand Down
Loading