From 3350bdeb87072a3117375625275da66bb065e467 Mon Sep 17 00:00:00 2001 From: coletdjnz Date: Sat, 18 May 2024 12:23:22 +1200 Subject: [PATCH] refactoring and add http erro test --- test/test_http_proxy.py | 19 ++++++++ yt_dlp/networking/_curlcffi.py | 2 +- yt_dlp/networking/_helper.py | 59 +++++++++++++++++++++++- yt_dlp/networking/_websockets.py | 79 ++++---------------------------- 4 files changed, 87 insertions(+), 72 deletions(-) diff --git a/test/test_http_proxy.py b/test/test_http_proxy.py index 94198163e..386f38b83 100644 --- a/test/test_http_proxy.py +++ b/test/test_http_proxy.py @@ -46,6 +46,11 @@ def do_proxy_auth(self, username, password): except Exception: return self.proxy_auth_error() + if auth_username == 'http_error': + self.send_response(404) + self.end_headers() + return False + if auth_username != (username or '') or auth_password != (password or ''): return self.proxy_auth_error() return True @@ -327,6 +332,14 @@ def test_http_bad_auth(self, handler, ctx): assert exc_info.value.response.status == 407 exc_info.value.response.close() + def test_http_error(self, handler, ctx): + with ctx.http_server(HTTPProxyHandler, username='http_error', password='test') as server_address: + with handler(proxies={ctx.REQUEST_PROTO: f'http://http_error:test@{server_address}'}) as rh: + with pytest.raises(HTTPError) as exc_info: + ctx.proxy_info_request(rh) + assert exc_info.value.response.status == 404 + exc_info.value.response.close() + def test_http_source_address(self, handler, ctx): with ctx.http_server(HTTPProxyHandler) as server_address: source_address = f'127.0.0.{random.randint(5, 255)}' @@ -398,6 +411,12 @@ def test_http_connect_bad_auth(self, handler, ctx): with pytest.raises(ProxyError): ctx.proxy_info_request(rh) + def test_http_connect_http_error(self, handler, ctx): + with ctx.http_server(HTTPConnectProxyHandler, username='http_error', password='test') as server_address: + with handler(verify=False, proxies={ctx.REQUEST_PROTO: f'http://http_error:test@{server_address}'}) as rh: + with pytest.raises(ProxyError): + ctx.proxy_info_request(rh) + def test_http_connect_source_address(self, handler, ctx): with ctx.http_server(HTTPConnectProxyHandler) as server_address: source_address = f'127.0.0.{random.randint(5, 255)}' diff --git a/yt_dlp/networking/_curlcffi.py b/yt_dlp/networking/_curlcffi.py index f2df399e3..12fdc9b07 100644 --- a/yt_dlp/networking/_curlcffi.py +++ b/yt_dlp/networking/_curlcffi.py @@ -222,7 +222,7 @@ def _send(self, request: Request): elif ( e.code == CurlECode.PROXY - or (e.code == CurlECode.RECV_ERROR and 'Received HTTP code 407 from proxy after CONNECT' in str(e)) + or (e.code == CurlECode.RECV_ERROR and 'from proxy after CONNECT' in str(e)) ): raise ProxyError(cause=e) from e else: diff --git a/yt_dlp/networking/_helper.py b/yt_dlp/networking/_helper.py index 8e678b26a..25dedf469 100644 --- a/yt_dlp/networking/_helper.py +++ b/yt_dlp/networking/_helper.py @@ -1,5 +1,6 @@ from __future__ import annotations +import base64 import contextlib import functools import os @@ -9,8 +10,9 @@ import typing import urllib.parse import urllib.request +from http.client import HTTPConnection, HTTPResponse -from .exceptions import RequestError, UnsupportedRequest +from .exceptions import ProxyError, RequestError, UnsupportedRequest from ..dependencies import certifi from ..socks import ProxyType, sockssocket from ..utils import format_field, traverse_obj @@ -285,3 +287,58 @@ def create_connection( # Explicitly break __traceback__ reference cycle # https://bugs.python.org/issue36820 err = None + + +class NoCloseHTTPResponse(HTTPResponse): + def begin(self): + super().begin() + # Revert the default behavior of closing the connection after reading the response + if not self._check_close() and not self.chunked and self.length is None: + self.will_close = False + + +def create_http_connect_connection( + proxy_host, + proxy_port, + connect_host, + connect_port, + timeout=None, + ssl_context=None, + source_address=None, + username=None, + password=None, +): + + proxy_headers = dict() + + if username is not None or password is not None: + proxy_headers['Proxy-Authorization'] = 'Basic ' + base64.b64encode( + f'{username or ""}:{password or ""}'.encode('utf-8')).decode('utf-8') + + conn = HTTPConnection(proxy_host, port=proxy_port, timeout=timeout) + conn.response_class = NoCloseHTTPResponse + + if hasattr(conn, '_create_connection'): + conn._create_connection = create_connection + + if source_address is not None: + conn.source_address = (source_address, 0) + + try: + conn.connect() + if ssl_context: + conn.sock = ssl_context.wrap_socket(conn.sock, server_hostname=proxy_host) + conn.request( + method='CONNECT', + url=f'{connect_host}:{connect_port}', + headers=proxy_headers) + response = conn.getresponse() + except OSError as e: + conn.close() + raise ProxyError('Unable to connect to proxy', cause=e) from e + + if response.status == 200: + return conn.sock + else: + conn.close() + raise ProxyError(f'Got HTTP Error {response.status} with CONNECT: {response.reason}') diff --git a/yt_dlp/networking/_websockets.py b/yt_dlp/networking/_websockets.py index f2277ae25..46d5f88af 100644 --- a/yt_dlp/networking/_websockets.py +++ b/yt_dlp/networking/_websockets.py @@ -1,16 +1,17 @@ from __future__ import annotations -import base64 + import contextlib import io import logging import ssl import sys import urllib.parse -from http.client import HTTPConnection, HTTPResponse + from ._helper import ( create_connection, + create_http_connect_connection, create_socks_proxy_socket, make_socks_proxy_opts, select_proxy, @@ -30,7 +31,7 @@ from ..dependencies import urllib3, websockets from ..socks import ProxyError as SocksProxyError from ..utils import int_or_none -from ..utils.networking import HTTPHeaderDict + if not websockets: raise ImportError('websockets is not installed') @@ -164,9 +165,11 @@ def _make_sock(self, proxy, url, timeout): ) elif parsed_proxy_url.scheme in ('http', 'https'): - return create_http_connect_conn( - proxy_url=proxy, - url=url, + return create_http_connect_connection( + proxy_port=parsed_proxy_url.port, + proxy_host=parsed_proxy_url.hostname, + connect_port=parsed_url.port, + connect_host=parsed_url.host, timeout=timeout, ssl_context=self._make_sslcontext() if parsed_proxy_url.scheme == 'https' else None, source_address=self.source_address, @@ -229,14 +232,6 @@ def _send(self, request): raise TransportError(cause=e) from e -class NoCloseHTTPResponse(HTTPResponse): - def begin(self): - super().begin() - # Revert the default behavior of closing the connection after reading the response - if not self._check_close() and not self.chunked and self.length is None: - self.will_close = False - - if urllib3_supported: from urllib3.util.ssltransport import SSLTransport @@ -273,59 +268,3 @@ def wrap_socket(self, sock, server_hostname=None): if isinstance(sock, ssl.SSLSocket): return WebsocketsSSLTransport(sock, self.ssl_context, server_hostname=server_hostname) return self.ssl_context.wrap_socket(sock, server_hostname=server_hostname) - - -def create_http_connect_conn( - proxy_url, - url, - timeout=None, - ssl_context=None, - source_address=None, - username=None, - password=None, -): - - proxy_headers = HTTPHeaderDict() - - if username is not None or password is not None: - proxy_headers['Proxy-Authorization'] = 'Basic ' + base64.b64encode( - f'{username or ""}:{password or ""}'.encode('utf-8')).decode('utf-8') - - proxy_url_parsed = urllib.parse.urlparse(proxy_url) - request_url_parsed = parse_uri(url) - - conn = HTTPConnection(proxy_url_parsed.hostname, port=proxy_url_parsed.port, timeout=timeout) - conn.response_class = NoCloseHTTPResponse - - if hasattr(conn, '_create_connection'): - conn._create_connection = create_connection - - if source_address is not None: - conn.source_address = (source_address, 0) - - try: - conn.connect() - if ssl_context: - conn.sock = ssl_context.wrap_socket(conn.sock, server_hostname=proxy_url_parsed.hostname) - conn.request( - method='CONNECT', - url=f'{request_url_parsed.host}:{request_url_parsed.port}', - headers=proxy_headers) - response = conn.getresponse() - except OSError as e: - conn.close() - raise ProxyError('Unable to connect to proxy', cause=e) from e - - if response.status == 200: - return conn.sock - elif response.status == 407: - conn.close() - raise ProxyError('Got HTTP Error 407 with CONNECT: Proxy Authentication Required') - else: - conn.close() - res_adapter = Response( - fp=io.BytesIO(b''), - url=proxy_url, headers=response.headers, - status=response.status, - reason=response.reason) - raise HTTPError(response=res_adapter)