Coverage for amazonorders/session.py: 90.11%
182 statements
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-18 21:57 +0000
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-18 21:57 +0000
1import json
2import logging
3import os
4from io import BytesIO
5from typing import Optional, Any, Dict
6from urllib.parse import urlparse
8import requests
9from PIL import Image
10from amazoncaptcha import AmazonCaptcha
11from bs4 import BeautifulSoup, Tag
12from requests import Session, Response
13from requests.utils import dict_from_cookiejar
15from amazonorders.exception import AmazonOrdersAuthError
17__author__ = "Alex Laird"
18__copyright__ = "Copyright 2024, Alex Laird"
19__version__ = "1.0.3"
21logger = logging.getLogger(__name__)
23BASE_URL = "https://www.amazon.com"
24BASE_HEADERS = {
25 "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
26 "Accept-Encoding": "gzip, deflate, br",
27 "Accept-Language": "en-US,en;q=0.9",
28 "Cache-Control": "max-age=0",
29 "Content-Type": "application/x-www-form-urlencoded",
30 "Origin": BASE_URL,
31 "Referer": "{}/ap/signin".format(BASE_URL),
32 "Sec-Ch-Ua": '"Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"',
33 "Sec-Ch-Ua-Mobile": "?0",
34 "Sec-Ch-Ua-Platform": "macOS",
35 "Sec-Ch-Viewport-Width": "1393",
36 "Sec-Fetch-Dest": "document",
37 "Sec-Fetch-Mode": "navigate",
38 "Sec-Fetch-Site": "same-origin",
39 "Sec-Fetch-User": "?1",
40 "Viewport-Width": "1393",
41 "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
42}
43SIGN_IN_FORM_NAME = "signIn"
44MFA_DEVICE_SELECT_FORM_ID = "auth-select-device-form"
45MFA_FORM_ID = "auth-mfa-form"
46CAPTCHA_1_DIV_ID = "cvf-page-content"
47CAPTCHA_1_FORM_CLASS = "cvf-widget-form"
48CAPTCHA_2_INPUT_ID = "captchacharacters"
50DEFAULT_COOKIE_JAR_PATH = os.path.join(os.path.expanduser("~"), ".config", "amazon-orders", "cookies.json")
53class IODefault:
54 """
55 Handles input/output from the application. By default, this uses console commands, but
56 this class exists so that it can be overriden when constructing an :class:`AmazonSession`
57 if input/output should be handled another way.
58 """
60 def echo(self,
61 msg,
62 **kwargs):
63 """
64 Echo a message to the console.
66 :param msg: The data to send to output.
67 :param kwargs: Unused by the default implementation.
68 """
69 print(msg)
71 def prompt(self,
72 msg,
73 type=None,
74 **kwargs):
75 """
76 Prompt to the console for user input.
78 :param msg: The data to use as the input prompt.
79 :param type: Unused by the default implementation.
80 :param kwargs: Unused by the default implementation.
81 :return: The user input result.
82 """
83 return input("{}: ".format(msg))
86class AmazonSession:
87 """
88 An interface for interacting with Amazon and authenticating an underlying :class:`requests.Session`. Utilizing
89 this class means session data is maintained between requests. Session data is also persisted after each request,
90 meaning it will also be maintained between separate instantiations of the class or application.
92 To get started, call the :func:`login` function.
93 """
95 def __init__(self,
96 username: str,
97 password: str,
98 debug: bool = False,
99 max_auth_attempts: int = 10,
100 cookie_jar_path: str = None,
101 io: IODefault = IODefault()) -> None:
102 if not cookie_jar_path:
103 cookie_jar_path = DEFAULT_COOKIE_JAR_PATH
105 #: An Amazon username.
106 self.username: str = username
107 #: An Amazon password.
108 self.password: str = password
110 #: Set logger ``DEBUG``, send output to ``stderr``, and write an HTML file for each request made on the session.
111 self.debug: bool = debug
112 if self.debug:
113 logger.setLevel(logging.DEBUG)
114 #: Will continue in :func:`login()`'s auth flow this many times.
115 self.max_auth_attempts: int = max_auth_attempts
116 #: The path to persist session cookies, defaults to ``conf.DEFAULT_COOKIE_JAR_PATH``.
117 self.cookie_jar_path: str = cookie_jar_path
118 #: The I/O handler for echoes and prompts.
119 self.io: IODefault = io
121 #: The shared session to be used across all requests.
122 self.session: Session = Session()
123 #: The last response executed on the Session.
124 self.last_response: Optional[Response] = None
125 #: A parsed representation of the last response executed on the Session.
126 self.last_response_parsed: Optional[Tag] = None
127 #: If :func:`login()` has been executed and successfully logged in the session.
128 self.is_authenticated: bool = False
130 cookie_dir = os.path.dirname(self.cookie_jar_path)
131 if not os.path.exists(cookie_dir):
132 os.makedirs(cookie_dir)
133 if os.path.exists(self.cookie_jar_path):
134 with open(cookie_jar_path, "r", encoding="utf-8") as f:
135 data = json.loads(f.read())
136 cookies = requests.utils.cookiejar_from_dict(data)
137 self.session.cookies.update(cookies)
139 def request(self,
140 method: str,
141 url: str,
142 **kwargs: Any) -> Response:
143 """
144 Execute the request against Amazon with base headers, parsing and storing the response
145 and persisting response cookies.
147 :param method: The request method to execute.
148 :param url: The URL to execute ``method`` on.
149 :param kwargs: Remaining ``kwargs`` will be passed to :func:`requests.request`.
150 :return: The Response from the executed request.
151 """
152 if "headers" not in kwargs:
153 kwargs["headers"] = {}
154 kwargs["headers"].update(BASE_HEADERS)
156 logger.debug("{} request to {}".format(method, url))
158 self.last_response = self.session.request(method, url, **kwargs)
159 self.last_response_parsed = BeautifulSoup(self.last_response.text,
160 "html.parser")
162 cookies = dict_from_cookiejar(self.session.cookies)
163 if os.path.exists(self.cookie_jar_path):
164 os.remove(self.cookie_jar_path)
165 with open(self.cookie_jar_path, "w", encoding="utf-8") as f:
166 f.write(json.dumps(cookies))
168 logger.debug("Response: {} - {}".format(self.last_response.url,
169 self.last_response.status_code))
171 if self.debug:
172 page_name = self._get_page_from_url(self.last_response.url)
173 with open(page_name, "w", encoding="utf-8") as html_file:
174 logger.debug(
175 "Response written to file: {}".format(html_file.name))
176 html_file.write(self.last_response.text)
178 return self.last_response
180 def get(self,
181 url: str,
182 **kwargs: Any):
183 """
184 Perform a GET request.
186 :param url: The URL to GET on.
187 :param kwargs: Remaining ``kwargs`` will be passed to :func:`AmazonSession.request`.
188 :return: The Response from the executed GET request.
189 """
190 return self.request("GET", url, **kwargs)
192 def post(self,
193 url,
194 **kwargs: Any) -> Response:
195 """
196 Perform a POST request.
198 :param url: The URL to POST on.
199 :param kwargs: Remaining ``kwargs`` will be passed to :func:`AmazonSession.request`.
200 :return: The Response from the executed POST request.
201 """
202 return self.request("POST", url, **kwargs)
204 def auth_cookies_stored(self):
205 cookies = dict_from_cookiejar(self.session.cookies)
206 return cookies.get("session-token") and cookies.get("x-main")
208 def login(self) -> None:
209 """
210 Execute an Amazon login process. This will include the sign-in page, and may also include Captcha challenges
211 and OTP pages (of 2FA authentication is enabled on your account).
213 If successful, ``is_authenticated`` will be set to ``True``.
215 Session cookies are persisted, and if existing session data is found during this auth flow, it will be
216 skipped entirely and flagged as authenticated.
217 """
218 self.get("{}/gp/sign-in.html".format(BASE_URL))
220 attempts = 0
221 while not self.is_authenticated and attempts < self.max_auth_attempts:
222 if self.auth_cookies_stored() or \
223 ("Hello, sign in" not in self.last_response.text and
224 "nav-item-signout" in self.last_response.text):
225 self.is_authenticated = True
226 break
228 if self._is_field_found(SIGN_IN_FORM_NAME):
229 self._sign_in()
230 elif self._is_field_found(CAPTCHA_1_FORM_CLASS, field_key="class"):
231 self._captcha_1_submit()
232 elif self.last_response_parsed.find("input",
233 id=lambda
234 value: value and value.startswith(
235 CAPTCHA_2_INPUT_ID)):
236 self._captcha_2_submit()
237 elif self._is_field_found(MFA_DEVICE_SELECT_FORM_ID,
238 field_key="id"):
239 self._mfa_device_select()
240 elif self._is_field_found(MFA_FORM_ID, field_key="id"):
241 self._mfa_submit()
242 else:
243 raise AmazonOrdersAuthError(
244 "An error occurred, this is an unknown page: {}. To capture the page to a file, set the `debug` flag.".format(
245 self.last_response.url))
247 attempts += 1
249 if attempts == self.max_auth_attempts:
250 raise AmazonOrdersAuthError(
251 "Max authentication flow attempts reached.")
253 def logout(self) -> None:
254 """
255 Logout and close the existing Amazon session and clear cookies.
256 """
257 self.get("{}/gp/sign-out.html".format(BASE_URL))
259 if os.path.exists(self.cookie_jar_path):
260 os.remove(self.cookie_jar_path)
262 self.session.close()
263 self.session = Session()
265 self.is_authenticated = False
267 def _sign_in(self) -> None:
268 form = self.last_response_parsed.find("form",
269 {"name": SIGN_IN_FORM_NAME})
270 data = self._build_from_form(form,
271 additional_attrs={"email": self.username,
272 "password": self.password,
273 "rememberMe": "true"})
275 self.request(form.attrs.get("method", "GET"),
276 self._get_form_action(form),
277 data=data)
279 self._handle_errors(critical=True)
281 def _mfa_device_select(self) -> None:
282 form = self.last_response_parsed.find("form",
283 {"id": MFA_DEVICE_SELECT_FORM_ID})
284 contexts = form.find_all("input", {"name": "otpDeviceContext"})
286 i = 1
287 for field in contexts:
288 self.io.echo("{}: {}".format(i, field.attrs["value"].strip()))
289 i += 1
290 otp_device = int(
291 self.io.prompt("--> Enter where you would like your one-time passcode sent", type=int))
292 self.io.echo("")
294 form = self.last_response_parsed.find("form",
295 id=MFA_DEVICE_SELECT_FORM_ID)
296 data = self._build_from_form(form,
297 additional_attrs={"otpDeviceContext":
298 contexts[
299 otp_device - 1].attrs[
300 "value"]})
302 self.request(form.attrs.get("method", "GET"),
303 self._get_form_action(form),
304 data=data)
306 self._handle_errors()
308 def _mfa_submit(self) -> None:
309 otp = self.io.prompt("--> Enter the one-time passcode sent to your device")
310 self.io.echo("")
312 form = self.last_response_parsed.find("form", id=MFA_FORM_ID)
313 data = self._build_from_form(form,
314 additional_attrs={"otpCode": otp,
315 "rememberDevice": ""})
317 self.request(form.attrs.get("method", "GET"),
318 self._get_form_action(form),
319 data=data)
321 self._handle_errors()
323 def _captcha_1_submit(self) -> None:
324 captcha_div = self.last_response_parsed.find("div",
325 {"id": CAPTCHA_1_DIV_ID})
327 solution = self._solve_captcha(
328 captcha_div.find("img", {"alt": "captcha"}).attrs["src"])
330 form = self.last_response_parsed.find("form",
331 {"class": CAPTCHA_1_FORM_CLASS})
332 data = self._build_from_form(form,
333 additional_attrs={
334 "cvf_captcha_input": solution})
336 self.request(form.attrs.get("method", "GET"),
337 self._get_form_action(form,
338 prefix="{}/ap/cvf/".format(
339 BASE_URL)),
340 data=data)
342 self._handle_errors("cvf-widget-alert", "class")
344 def _captcha_2_submit(self) -> None:
345 form = self.last_response_parsed.find("input",
346 id=lambda
347 value: value and value.startswith(
348 CAPTCHA_2_INPUT_ID)).find_parent(
349 "form")
351 solution = self._solve_captcha(form.find("img").attrs["src"])
353 data = self._build_from_form(form,
354 additional_attrs={
355 "field-keywords": solution})
357 self.request(form.attrs.get("method", "GET"),
358 self._get_form_action(form,
359 prefix=BASE_URL),
360 params=data)
362 self._handle_errors("a-alert-info", "class")
364 def _build_from_form(self,
365 form: Tag,
366 additional_attrs: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
367 data = {}
368 for field in form.find_all("input"):
369 try:
370 data[field["name"]] = field["value"]
371 except:
372 pass
373 if additional_attrs:
374 data.update(additional_attrs)
375 return data
377 def _get_form_action(self,
378 form: Tag,
379 prefix: Optional[str] = None) -> str:
380 action = form.attrs.get("action")
381 if not action:
382 action = self.last_response.url
383 if prefix and not action.startswith("http"):
384 action = prefix + action
385 return action
387 def _is_field_found(self,
388 field_value: str,
389 field_type: str = "form",
390 field_key: str = "name") -> bool:
391 return self.last_response_parsed.find(field_type, {
392 field_key: field_value}) is not None
394 def _get_page_from_url(self,
395 url: str) -> str:
396 page_name = os.path.basename(urlparse(url).path).strip(".html")
397 i = 0
398 while os.path.isfile("{}_{}".format(page_name, 0)):
399 i += 1
400 return "{}_{}.html".format(page_name, i)
402 def _handle_errors(self,
403 error_div: str = "auth-error-message-box",
404 attr_name: str = "id",
405 critical: bool = False) -> None:
406 error_div = self.last_response_parsed.find("div",
407 {attr_name: error_div})
408 if error_div:
409 error_msg = "An error occurred: {}\n".format(error_div.text.strip())
411 if critical:
412 raise AmazonOrdersAuthError(error_msg)
413 else:
414 self.io.echo(error_msg, fg="red")
416 def _solve_captcha(self,
417 url: str) -> str:
418 captcha_response = AmazonCaptcha.fromlink(url).solve()
419 if not captcha_response or captcha_response.lower() == "not solved":
420 img_response = self.session.get(url)
421 img = Image.open(BytesIO(img_response.content))
422 img.show()
423 self.io.echo("Info: The Captcha couldn't be auto-solved.")
424 captcha_response = self.io.prompt("--> Enter the characters shown in the image")
425 self.io.echo("")
427 return captcha_response