44import asyncio
55import logging
66import traceback
7+ from urllib .parse import urlparse
78from socksio .exceptions import SOCKSError
89from contextlib import asynccontextmanager
10+ from radixtarget import RadixTarget
911
1012from bbot .core .engine import EngineServer
1113from bbot .core .helpers .misc import bytes_to_human , human_to_bytes , get_exception_chain , truncate_string
@@ -36,6 +38,32 @@ def __init__(self, socket_path, target, config={}, debug=False):
3638 self .web_clients = {}
3739 self .web_client = self .AsyncClient (persist_cookies = False )
3840
41+ # proxy exclusion support
42+ self .has_proxy = bool (self .web_config .get ("http_proxy" , "" ))
43+ proxy_exclusions = self .web_config .get ("http_proxy_exclude" , [])
44+ self .noproxy_web_clients = {}
45+ self .proxy_bypass_all = False
46+ if self .has_proxy and proxy_exclusions :
47+ normalized = []
48+ for pattern in proxy_exclusions :
49+ pattern = str (pattern ).strip ()
50+ if pattern == "*" :
51+ self .proxy_bypass_all = True
52+ break
53+ # normalize NO_PROXY conventions for radixtarget
54+ # ".example.com" and "*.example.com" both mean "example.com + subdomains"
55+ if pattern .startswith ("*." ):
56+ pattern = pattern [2 :]
57+ elif pattern .startswith ("." ):
58+ pattern = pattern [1 :]
59+ if pattern :
60+ normalized .append (pattern )
61+ self .proxy_exclusion_target = RadixTarget (* normalized ) if normalized else RadixTarget ()
62+ self .noproxy_web_client = self ._AsyncClient_noproxy (persist_cookies = False )
63+ else :
64+ self .proxy_exclusion_target = RadixTarget ()
65+ self .noproxy_web_client = None
66+
3967 def AsyncClient (self , * args , ** kwargs ):
4068 # cache by retries to prevent unwanted accumulation of clients
4169 # (they are not garbage-collected)
@@ -49,12 +77,44 @@ def AsyncClient(self, *args, **kwargs):
4977 self .web_clients [client .retries ] = client
5078 return client
5179
80+ def _AsyncClient_noproxy (self , * args , ** kwargs ):
81+ """Create/cache a BBOTAsyncClient with proxy disabled, for excluded hosts."""
82+ retries = kwargs .get ("retries" , 1 )
83+ try :
84+ return self .noproxy_web_clients [retries ]
85+ except KeyError :
86+ from .client import BBOTAsyncClient
87+
88+ noproxy_config = dict (self .config )
89+ noproxy_web = dict (noproxy_config .get ("web" , {}))
90+ noproxy_web ["http_proxy" ] = None
91+ noproxy_config ["web" ] = noproxy_web
92+ client = BBOTAsyncClient .from_config (noproxy_config , self .target , * args , ** kwargs )
93+ self .noproxy_web_clients [client .retries ] = client
94+ return client
95+
96+ def _get_client_for_url (self , url , client = None ):
97+ """Return the appropriate client based on proxy exclusion rules.
98+
99+ If no explicit client is provided and the URL matches an exclusion pattern,
100+ returns the no-proxy client. Otherwise returns the given client or default.
101+ """
102+ if client is not None :
103+ return client
104+ if self .noproxy_web_client is not None and url :
105+ if self .proxy_bypass_all :
106+ return self .noproxy_web_client
107+ hostname = urlparse (str (url )).hostname
108+ if hostname and self .proxy_exclusion_target .get (hostname ):
109+ return self .noproxy_web_client
110+ return self .web_client
111+
52112 async def request (self , * args , ** kwargs ):
53113 raise_error = kwargs .pop ("raise_error" , False )
54114 # TODO: use this
55115 cache_for = kwargs .pop ("cache_for" , None ) # noqa
56116
57- client = kwargs .get ("client" , self . web_client )
117+ explicit_client = kwargs .pop ("client" , None )
58118
59119 # allow vs follow, httpx why??
60120 allow_redirects = kwargs .pop ("allow_redirects" , None )
@@ -79,6 +139,8 @@ async def request(self, *args, **kwargs):
79139
80140 if client_kwargs :
81141 client = self .AsyncClient (** client_kwargs )
142+ else :
143+ client = self ._get_client_for_url (url , explicit_client )
82144
83145 try :
84146 async with self ._acatch (url , raise_error ):
@@ -144,7 +206,8 @@ async def stream_request(self, url, **kwargs):
144206 chunk_size = 8192
145207 chunks = []
146208
147- async with self ._acatch (url , raise_error = True ), self .web_client .stream (url = url , ** kwargs ) as response :
209+ stream_client = self ._get_client_for_url (url )
210+ async with self ._acatch (url , raise_error = True ), stream_client .stream (url = url , ** kwargs ) as response :
148211 agen = response .aiter_bytes (chunk_size = chunk_size )
149212 async for chunk in agen :
150213 _chunk_size = len (chunk )
0 commit comments