-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrequest.py
More file actions
127 lines (102 loc) · 4.4 KB
/
request.py
File metadata and controls
127 lines (102 loc) · 4.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
"""
request.py - HTTP/1.1 Request Parser
Responsible for turning raw bytes off the socket into a structured Python
object. The HTTP/1.1 spec (RFC 7230) defines the message format as:
<Method> <Request-URI> <HTTP-Version>\r\n
<Header-Name>: <Header-Value>\r\n
...
\r\n
[optional body]
We rely on the \r\n\r\n separator to split headers from the body, which is
the canonical boundary defined by the protocol.
"""
import logging
from dataclasses import dataclass, field
from typing import Dict, Optional
logger = logging.getLogger(__name__)
# The double CRLF that marks the end of HTTP headers per RFC 7230 §3
HEADER_BODY_SEPARATOR: bytes = b"\r\n\r\n"
@dataclass
class HTTPRequest:
"""Structured representation of a parsed HTTP/1.1 request."""
method: str
path: str
version: str
headers: Dict[str, str]
body: bytes
# When parsing fails we store the error code here instead of raising so
# the caller can produce the correct HTTP error response without unwinding
# through multiple stack frames.
error: Optional[int] = None # None = no error; 400 = bad request
def parse_request(raw_bytes: bytes) -> HTTPRequest:
"""
Parse raw bytes received from a TCP socket into an HTTPRequest.
HTTP separates headers from the body with \\r\\n\\r\\n. We locate that
boundary first; if it is absent the bytes are not a valid HTTP message
and we return a 400 error object instead of raising so the server can
respond gracefully.
Args:
raw_bytes: The complete raw data read from the client socket.
Returns:
HTTPRequest with error=None on success, or error=400 on malformed input.
"""
if not raw_bytes:
logger.warning("Received empty request bytes")
return _bad_request()
# ── Split on the mandatory blank line between headers and body ──────────
if HEADER_BODY_SEPARATOR not in raw_bytes:
logger.warning("Request missing header/body separator (\\r\\n\\r\\n)")
return _bad_request()
header_section, body = raw_bytes.split(HEADER_BODY_SEPARATOR, maxsplit=1)
try:
header_text: str = header_section.decode("iso-8859-1")
except UnicodeDecodeError:
logger.warning("Header section contained non-ISO-8859-1 bytes")
return _bad_request()
lines = header_text.split("\r\n")
if not lines:
return _bad_request()
# ── Parse the Request-Line (first line) ─────────────────────────────────
request_line = lines[0]
parts = request_line.split(" ")
if len(parts) != 3:
logger.warning("Malformed request line: %r", request_line)
return _bad_request()
method, path, version = parts[0], parts[1], parts[2]
# Basic sanity checks so we don't forward garbage to handlers
if not method.isupper() or not path.startswith("/"):
logger.warning("Invalid method or path in request line: %r", request_line)
return _bad_request()
# ── Parse headers into a dict ────────────────────────────────────────────
headers: Dict[str, str] = {}
for line in lines[1:]:
if not line:
continue
if ":" not in line:
# A header line without a colon is technically malformed but many
# real clients are lenient, so we log and skip rather than reject.
logger.debug("Skipping malformed header line: %r", line)
continue
name, _, value = line.partition(":")
# Header names are case-insensitive per RFC 7230 §3.2; normalise to
# lower-case so handlers can use a single consistent key form.
headers[name.strip().lower()] = value.strip()
logger.debug("Parsed request: %s %s %s", method, path, version)
return HTTPRequest(
method=method,
path=path,
version=version,
headers=headers,
body=body,
)
# ── Internal helpers ────────────────────────────────────────────────────────
def _bad_request() -> HTTPRequest:
"""Return a sentinel HTTPRequest that signals a 400 Bad Request."""
return HTTPRequest(
method="",
path="",
version="",
headers={},
body=b"",
error=400,
)