Files
LAIC/venv/Lib/site-packages/werkzeug/sansio/utils.py
T
2026-05-06 17:30:35 +03:00

225 lines
7.3 KiB
Python

from __future__ import annotations
import re
import typing as t
from urllib.parse import quote
from .._internal import _plain_int
from ..exceptions import SecurityError
from ..http import parse_set_header
from ..urls import uri_to_iri
_host_re = re.compile(
r"""
(
[a-z0-9.-]+ # domain or ipv4
|
\[[a-f0-9]*:[a-f0-9.:]+] # ipv6
)
(?::[0-9]+)? # optional port
""",
flags=re.ASCII | re.IGNORECASE | re.VERBOSE,
)
def host_is_trusted(
hostname: str | None, trusted_list: t.Collection[str] | None = None
) -> bool:
"""Perform some checks on a ``Host`` header ``host:port``. The host must be
made up of valid characters, but this does not check validity beyond that.
If a list of trusted domains is given, the domain must match one.
:param hostname: The ``Host`` header ``host:port`` to check.
:param trusted_list: A list of trusted domains to match. These should
already be IDNA encoded, but will be encoded if needed. The port is
ignored for this check. If a name starts with a dot it will match as a
suffix, accepting all subdomains. If empty or ``None``, all domains are
allowed.
.. versionchanged:: 3.2
The value's characters are validated.
.. versionchanged:: 3.2
``trusted_list`` defaults to ``None``.
.. versionadded:: 0.9
"""
if not hostname:
return False
if _host_re.fullmatch(hostname) is None:
return False
hostname = hostname.partition(":")[0]
if not trusted_list:
return True
if isinstance(trusted_list, str):
trusted_list = [trusted_list]
for ref in trusted_list:
if ref.startswith("."):
ref = ref[1:]
suffix_match = True
else:
suffix_match = False
try:
ref = ref.partition(":")[0].encode("idna").decode("ascii")
except UnicodeEncodeError:
return False
if ref == hostname or (suffix_match and hostname.endswith(f".{ref}")):
return True
return False
def get_host(
scheme: str,
host_header: str | None,
server: tuple[str, int | None] | None = None,
trusted_hosts: t.Collection[str] | None = None,
) -> str:
"""Get and validate a request's ``host:port`` based on the given values.
The ``Host`` header sent by the client is preferred. Otherwise, the server's
configured address is used. The port is omitted if it matches the standard
HTTP or HTTPS ports.
The value is passed through :func:`host_is_trusted`. The host must be made
up of valid characters, but this does not check validity beyond that. If a
list of trusted domains is given, the domain must match one.
If the host header is not available, such as for HTTP/0.9 and 1.0, or it has
invalid characters, the empty string is returned. Subdomain and host
routing, and external URL building, will not work in these cases.
:param scheme: The protocol of the request. Used to omit the standard ports
80 and 443.
:param host_header: The ``Host`` header value.
:param server: The server's configured address ``(host, port)``. The server
may be using a Unix socket and give ``(path, None)``; this is ignored as
it would not produce a useful host value.
:param trusted_hosts: A list of trusted domains to match. These should
already be IDNA encoded, but will be encoded if needed. The port is
ignored for this check. If a name starts with a dot it will match as a
suffix, accepting all subdomains. If empty or ``None``, all domains are
allowed.
:return: Host, with port if necessary.
:raise .SecurityError: If the host is not trusted.
.. versionchanged:: 3.1.8
The empty string is again returned if no host header value is available,
or if the characters are invalid.
.. versionchanged:: 3.1.7
The characters of the host value are validated. The empty string is no
longer allowed if no header value is available.
.. versionchanged:: 3.2
When using the server address, Unix sockets are ignored.
.. versionchanged:: 3.1.3
If ``SERVER_NAME`` is IPv6, it is wrapped in ``[]``.
"""
if host_header is not None:
host = host_header
# The port server[1] will be None for a Unix socket. Ignore in that case.
elif server is not None and server[1] is not None:
host = server[0]
# If SERVER_NAME is IPv6, wrap it in [] to match Host header.
# Check for : because domain or IPv4 can't have that.
if ":" in host and host[0] != "[":
host = f"[{host}]"
host = f"{host}:{server[1]}"
else:
# Pass through empty host from HTTP/0.9 and 1.0.
return ""
if scheme in {"http", "ws"}:
host = host.removesuffix(":80")
elif scheme in {"https", "wss"}:
host = host.removesuffix(":443")
if not host_is_trusted(host, trusted_hosts):
if trusted_hosts:
raise SecurityError(f"Host {host!r} is not trusted.")
# Invalid characters, treat as empty.
return ""
return host
def get_current_url(
scheme: str,
host: str,
root_path: str | None = None,
path: str | None = None,
query_string: bytes | None = None,
) -> str:
"""Recreate the URL for a request. If an optional part isn't
provided, it and subsequent parts are not included in the URL.
The URL is an IRI, not a URI, so it may contain Unicode characters.
Use :func:`~werkzeug.urls.iri_to_uri` to convert it to ASCII.
:param scheme: The protocol the request used, like ``"https"``.
:param host: The host the request was made to. See :func:`get_host`.
:param root_path: Prefix that the application is mounted under. This
is prepended to ``path``.
:param path: The path part of the URL after ``root_path``.
:param query_string: The portion of the URL after the "?".
"""
url = [scheme, "://", host]
if root_path is None:
url.append("/")
return uri_to_iri("".join(url))
# safe = https://url.spec.whatwg.org/#url-path-segment-string
# as well as percent for things that are already quoted
url.append(quote(root_path.rstrip("/"), safe="!$&'()*+,/:;=@%"))
url.append("/")
if path is None:
return uri_to_iri("".join(url))
url.append(quote(path.lstrip("/"), safe="!$&'()*+,/:;=@%"))
if query_string:
url.append("?")
url.append(quote(query_string, safe="!$&'()*+,/:;=?@%"))
return uri_to_iri("".join(url))
def get_content_length(
http_content_length: str | None = None,
http_transfer_encoding: str | None = None,
) -> int | None:
"""Return the ``Content-Length`` header value as an int. If the header is not given
or the ``Transfer-Encoding`` header is ``chunked``, ``None`` is returned to indicate
a streaming request. If the value is not an integer, or negative, 0 is returned.
:param http_content_length: The Content-Length HTTP header.
:param http_transfer_encoding: The Transfer-Encoding HTTP header.
.. versionadded:: 2.2
"""
if (
http_transfer_encoding is not None
and "chunked" in parse_set_header(http_transfer_encoding)
) or http_content_length is None:
return None
try:
return max(0, _plain_int(http_content_length))
except ValueError:
return 0