Coverage for amazonorders/session.py: 90.77%
195 statements
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-24 18:41 +0000
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-24 18:41 +0000
1import json
2import logging
3import os
4from io import BytesIO
5from typing import Optional, Any, Dict
6from urllib.parse import urlparse
8import requests
9from PIL import Image
10from amazoncaptcha import AmazonCaptcha
11from bs4 import BeautifulSoup, Tag
12from requests import Session, Response
13from requests.utils import dict_from_cookiejar
15from amazonorders.conf import DEFAULT_COOKIE_JAR_PATH, DEFAULT_OUTPUT_DIR
16from amazonorders.exception import AmazonOrdersAuthError
18__author__ = "Alex Laird"
19__copyright__ = "Copyright 2024, Alex Laird"
20__version__ = "1.0.4"
22logger = logging.getLogger(__name__)
24BASE_URL = "https://www.amazon.com"
25BASE_HEADERS = {
26 "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
27 "Accept-Encoding": "gzip, deflate, br",
28 "Accept-Language": "en-US,en;q=0.9",
29 "Cache-Control": "max-age=0",
30 "Content-Type": "application/x-www-form-urlencoded",
31 "Origin": BASE_URL,
32 "Referer": "{}/ap/signin".format(BASE_URL),
33 "Sec-Ch-Ua": '"Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"',
34 "Sec-Ch-Ua-Mobile": "?0",
35 "Sec-Ch-Ua-Platform": "macOS",
36 "Sec-Ch-Viewport-Width": "1393",
37 "Sec-Fetch-Dest": "document",
38 "Sec-Fetch-Mode": "navigate",
39 "Sec-Fetch-Site": "same-origin",
40 "Sec-Fetch-User": "?1",
41 "Viewport-Width": "1393",
42 "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
43}
44SIGN_IN_FORM_NAME = "signIn"
45MFA_DEVICE_SELECT_FORM_ID = "auth-select-device-form"
46MFA_FORM_ID = "auth-mfa-form"
47CAPTCHA_1_DIV_ID = "cvf-page-content"
48CAPTCHA_1_FORM_CLASS = "cvf-widget-form-captcha"
49CAPTCHA_2_INPUT_ID = "captchacharacters"
50CAPTCHA_OTP_FORM_ID = "verification-code-form"
53class IODefault:
54 """
55 Handles input/output from the application. By default, this uses console commands, but
56 this class exists so that it can be overriden when constructing an :class:`AmazonSession`
57 if input/output should be handled another way.
58 """
60 def echo(self,
61 msg,
62 **kwargs):
63 """
64 Echo a message to the console.
66 :param msg: The data to send to output.
67 :param kwargs: Unused by the default implementation.
68 """
69 print(msg)
71 def prompt(self,
72 msg,
73 type=None,
74 **kwargs):
75 """
76 Prompt to the console for user input.
78 :param msg: The data to use as the input prompt.
79 :param type: Unused by the default implementation.
80 :param kwargs: Unused by the default implementation.
81 :return: The user input result.
82 """
83 return input("{}: ".format(msg))
86class AmazonSession:
87 """
88 An interface for interacting with Amazon and authenticating an underlying :class:`requests.Session`. Utilizing
89 this class means session data is maintained between requests. Session data is also persisted after each request,
90 meaning it will also be maintained between separate instantiations of the class or application.
92 To get started, call the :func:`login` function.
93 """
95 def __init__(self,
96 username: str,
97 password: str,
98 debug: bool = False,
99 max_auth_attempts: int = 10,
100 cookie_jar_path: str = None,
101 io: IODefault = IODefault(),
102 output_dir: str = None) -> None:
103 if not cookie_jar_path:
104 cookie_jar_path = DEFAULT_COOKIE_JAR_PATH
105 if not output_dir:
106 output_dir = DEFAULT_OUTPUT_DIR
108 #: An Amazon username.
109 self.username: str = username
110 #: An Amazon password.
111 self.password: str = password
113 #: Set logger ``DEBUG``, send output to ``stderr``, and write an HTML file for each request made on the session.
114 self.debug: bool = debug
115 if self.debug:
116 logger.setLevel(logging.DEBUG)
117 #: Will continue in :func:`login()`'s auth flow this many times (successes and failures).
118 self.max_auth_attempts: int = max_auth_attempts
119 #: The path to persist session cookies, defaults to ``conf.DEFAULT_COOKIE_JAR_PATH``.
120 self.cookie_jar_path: str = cookie_jar_path
121 #: The I/O handler for echoes and prompts.
122 self.io: IODefault = io
123 #: The directory where any output files will be produced, defaults to ``conf.DEFAULT_OUTPUT_DIR``.
124 self.output_dir = output_dir
126 #: The shared session to be used across all requests.
127 self.session: Session = Session()
128 #: The last response executed on the Session.
129 self.last_response: Optional[Response] = None
130 #: A parsed representation of the last response executed on the Session.
131 self.last_response_parsed: Optional[Tag] = None
132 #: If :func:`login()` has been executed and successfully logged in the session.
133 self.is_authenticated: bool = False
135 cookie_dir = os.path.dirname(self.cookie_jar_path)
136 if not os.path.exists(cookie_dir):
137 os.makedirs(cookie_dir)
138 if os.path.exists(self.cookie_jar_path):
139 with open(self.cookie_jar_path, "r", encoding="utf-8") as f:
140 data = json.loads(f.read())
141 cookies = requests.utils.cookiejar_from_dict(data)
142 self.session.cookies.update(cookies)
144 def request(self,
145 method: str,
146 url: str,
147 **kwargs: Any) -> Response:
148 """
149 Execute the request against Amazon with base headers, parsing and storing the response
150 and persisting response cookies.
152 :param method: The request method to execute.
153 :param url: The URL to execute ``method`` on.
154 :param kwargs: Remaining ``kwargs`` will be passed to :func:`requests.request`.
155 :return: The Response from the executed request.
156 """
157 if "headers" not in kwargs:
158 kwargs["headers"] = {}
159 kwargs["headers"].update(BASE_HEADERS)
161 logger.debug("{} request to {}".format(method, url))
163 self.last_response = self.session.request(method, url, **kwargs)
164 self.last_response_parsed = BeautifulSoup(self.last_response.text,
165 "html.parser")
167 cookies = dict_from_cookiejar(self.session.cookies)
168 if os.path.exists(self.cookie_jar_path):
169 os.remove(self.cookie_jar_path)
170 with open(self.cookie_jar_path, "w", encoding="utf-8") as f:
171 f.write(json.dumps(cookies))
173 logger.debug("Response: {} - {}".format(self.last_response.url,
174 self.last_response.status_code))
176 if self.debug:
177 page_name = self._get_page_from_url(self.last_response.url)
178 with open(os.path.join(self.output_dir, page_name), "w", encoding="utf-8") as html_file:
179 logger.debug(
180 "Response written to file: {}".format(html_file.name))
181 html_file.write(self.last_response.text)
183 return self.last_response
185 def get(self,
186 url: str,
187 **kwargs: Any):
188 """
189 Perform a GET request.
191 :param url: The URL to GET on.
192 :param kwargs: Remaining ``kwargs`` will be passed to :func:`AmazonSession.request`.
193 :return: The Response from the executed GET request.
194 """
195 return self.request("GET", url, **kwargs)
197 def post(self,
198 url,
199 **kwargs: Any) -> Response:
200 """
201 Perform a POST request.
203 :param url: The URL to POST on.
204 :param kwargs: Remaining ``kwargs`` will be passed to :func:`AmazonSession.request`.
205 :return: The Response from the executed POST request.
206 """
207 return self.request("POST", url, **kwargs)
209 def auth_cookies_stored(self):
210 cookies = dict_from_cookiejar(self.session.cookies)
211 return cookies.get("session-token") and cookies.get("x-main")
213 def login(self) -> None:
214 """
215 Execute an Amazon login process. This will include the sign-in page, and may also include Captcha challenges
216 and OTP pages (of 2FA authentication is enabled on your account).
218 If successful, ``is_authenticated`` will be set to ``True``.
220 Session cookies are persisted, and if existing session data is found during this auth flow, it will be
221 skipped entirely and flagged as authenticated.
222 """
223 self.get("{}/gp/sign-in.html".format(BASE_URL))
225 attempts = 0
226 while not self.is_authenticated and attempts < self.max_auth_attempts:
227 if self.auth_cookies_stored() or \
228 ("Hello, sign in" not in self.last_response.text and
229 "nav-item-signout" in self.last_response.text):
230 self.is_authenticated = True
231 break
233 if self._is_field_found(SIGN_IN_FORM_NAME):
234 self._sign_in()
235 elif self._is_field_found(CAPTCHA_1_FORM_CLASS, field_key="class"):
236 self._captcha_1_submit()
237 elif self.last_response_parsed.find("input",
238 id=lambda
239 value: value and value.startswith(
240 CAPTCHA_2_INPUT_ID)):
241 self._captcha_2_submit()
242 elif self._is_field_found(MFA_DEVICE_SELECT_FORM_ID,
243 field_key="id"):
244 self._mfa_device_select()
245 elif self._is_field_found(MFA_FORM_ID, field_key="id"):
246 self._mfa_submit()
247 elif self._is_field_found(CAPTCHA_OTP_FORM_ID, field_key="id"):
248 self._captcha_otp_submit()
249 else:
250 raise AmazonOrdersAuthError(
251 "An error occurred, this is an unknown page, or its parsed contents don't match a known auth flow: {}. To capture the page to a file, set the `debug` flag.".format(
252 self.last_response.url))
254 attempts += 1
256 if attempts == self.max_auth_attempts:
257 raise AmazonOrdersAuthError(
258 "Max authentication flow attempts reached.")
260 def logout(self) -> None:
261 """
262 Logout and close the existing Amazon session and clear cookies.
263 """
264 self.get("{}/gp/sign-out.html".format(BASE_URL))
266 if os.path.exists(self.cookie_jar_path):
267 os.remove(self.cookie_jar_path)
269 self.session.close()
270 self.session = Session()
272 self.is_authenticated = False
274 def _sign_in(self) -> None:
275 form = self.last_response_parsed.find("form",
276 {"name": SIGN_IN_FORM_NAME})
277 data = self._build_from_form(form,
278 additional_attrs={"email": self.username,
279 "password": self.password,
280 "rememberMe": "true"})
282 self.request(form.attrs.get("method", "GET"),
283 self._get_form_action(form),
284 data=data)
286 self._handle_errors(critical=True)
288 def _mfa_device_select(self) -> None:
289 form = self.last_response_parsed.find("form",
290 {"id": MFA_DEVICE_SELECT_FORM_ID})
291 contexts = form.find_all("input", {"name": "otpDeviceContext"})
293 i = 1
294 for field in contexts:
295 self.io.echo("{}: {}".format(i, field.attrs["value"].strip()))
296 i += 1
297 otp_device = int(
298 self.io.prompt("--> Enter where you would like your one-time passcode sent", type=int))
299 self.io.echo("")
301 form = self.last_response_parsed.find("form",
302 id=MFA_DEVICE_SELECT_FORM_ID)
303 data = self._build_from_form(form,
304 additional_attrs={"otpDeviceContext":
305 contexts[
306 otp_device - 1].attrs[
307 "value"]})
309 self.request(form.attrs.get("method", "GET"),
310 self._get_form_action(form),
311 data=data)
313 self._handle_errors()
315 def _mfa_submit(self) -> None:
316 otp = self.io.prompt("--> Enter the one-time passcode sent to your device")
317 self.io.echo("")
319 form = self.last_response_parsed.find("form", id=MFA_FORM_ID)
320 data = self._build_from_form(form,
321 additional_attrs={"otpCode": otp,
322 "rememberDevice": ""})
324 self.request(form.attrs.get("method", "GET"),
325 self._get_form_action(form),
326 data=data)
328 self._handle_errors()
330 def _captcha_1_submit(self) -> None:
331 captcha_div = self.last_response_parsed.find("div",
332 {"id": CAPTCHA_1_DIV_ID})
334 solution = self._solve_captcha(
335 captcha_div.find("img", {"alt": "captcha"}).attrs["src"])
337 form = self.last_response_parsed.find("form",
338 {"class": CAPTCHA_1_FORM_CLASS})
339 data = self._build_from_form(form,
340 additional_attrs={
341 "cvf_captcha_input": solution})
343 self.request(form.attrs.get("method", "GET"),
344 self._get_form_action(form,
345 prefix="{}/ap/cvf/".format(
346 BASE_URL)),
347 data=data)
349 self._handle_errors("cvf-widget-alert", "class")
351 def _captcha_2_submit(self) -> None:
352 form = self.last_response_parsed.find("input",
353 id=lambda
354 value: value and value.startswith(
355 CAPTCHA_2_INPUT_ID)).find_parent(
356 "form")
358 solution = self._solve_captcha(form.find("img").attrs["src"])
360 data = self._build_from_form(form,
361 additional_attrs={
362 "field-keywords": solution})
364 self.request(form.attrs.get("method", "GET"),
365 self._get_form_action(form,
366 prefix=BASE_URL),
367 params=data)
369 self._handle_errors("a-alert-info", "class")
371 def _captcha_otp_submit(self) -> None:
372 otp = self.io.prompt("--> Enter the one-time passcode sent to your device")
373 self.io.echo("")
375 form = self.last_response_parsed.find("form", id=CAPTCHA_OTP_FORM_ID)
376 data = self._build_from_form(form,
377 additional_attrs={"otpCode": otp})
379 self.request(form.attrs.get("method", "GET"),
380 self._get_form_action(form,
381 prefix=BASE_URL),
382 data=data)
384 self._handle_errors()
386 def _build_from_form(self,
387 form: Tag,
388 additional_attrs: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
389 data = {}
390 for field in form.find_all("input"):
391 try:
392 data[field["name"]] = field["value"]
393 except:
394 pass
395 if additional_attrs:
396 data.update(additional_attrs)
397 return data
399 def _get_form_action(self,
400 form: Tag,
401 prefix: Optional[str] = None) -> str:
402 action = form.attrs.get("action")
403 if not action:
404 action = self.last_response.url
405 # TODO: we should be able to clean this up, and even get it from the current URL (same as a browser does)
406 if prefix and not action.startswith("http"):
407 action = prefix + action
408 return action
410 def _is_field_found(self,
411 field_value: str,
412 field_type: str = "form",
413 field_key: str = "name") -> bool:
414 return self.last_response_parsed.find(field_type, {
415 field_key: field_value}) is not None
417 def _get_page_from_url(self,
418 url: str) -> str:
419 page_name = os.path.basename(urlparse(url).path).strip(".html")
420 i = 0
421 while os.path.isfile("{}_{}".format(page_name, 0)):
422 i += 1
423 return "{}_{}.html".format(page_name, i)
425 def _handle_errors(self,
426 error_div: str = "auth-error-message-box",
427 attr_name: str = "id",
428 critical: bool = False) -> None:
429 error_div = self.last_response_parsed.find("div",
430 {attr_name: error_div})
431 if error_div:
432 error_msg = "An error occurred: {}\n".format(error_div.text.strip())
434 if critical:
435 raise AmazonOrdersAuthError(error_msg)
436 else:
437 self.io.echo(error_msg, fg="red")
439 def _solve_captcha(self,
440 url: str) -> str:
441 captcha_response = AmazonCaptcha.fromlink(url).solve()
442 if not captcha_response or captcha_response.lower() == "not solved":
443 img_response = self.session.get(url)
444 img = Image.open(BytesIO(img_response.content))
445 img.show()
446 self.io.echo("Info: The Captcha couldn't be auto-solved.")
447 captcha_response = self.io.prompt("--> Enter the characters shown in the image")
448 self.io.echo("")
450 return captcha_response