Coverage for amazonorders/session.py: 89.34%
197 statements
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-25 22:50 +0000
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-25 22:50 +0000
1import json
2import logging
3import os
4from io import BytesIO
5from typing import Optional, Any, Dict
6from urllib.parse import urlparse
8import requests
9from PIL import Image
10from amazoncaptcha import AmazonCaptcha
11from bs4 import BeautifulSoup, Tag
12from requests import Session, Response
13from requests.utils import dict_from_cookiejar
15from amazonorders.conf import DEFAULT_COOKIE_JAR_PATH, DEFAULT_OUTPUT_DIR
16from amazonorders.exception import AmazonOrdersAuthError
18__author__ = "Alex Laird"
19__copyright__ = "Copyright 2024, Alex Laird"
20__version__ = "1.0.6"
22logger = logging.getLogger(__name__)
24BASE_URL = "https://www.amazon.com"
25SIGN_IN_URL = "{}/ap/signin".format(BASE_URL)
26BASE_HEADERS = {
27 "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
28 "Accept-Encoding": "gzip, deflate, br",
29 "Accept-Language": "en-US,en;q=0.9",
30 "Cache-Control": "max-age=0",
31 "Content-Type": "application/x-www-form-urlencoded",
32 "Origin": BASE_URL,
33 "Referer": SIGN_IN_URL,
34 "Sec-Ch-Ua": '"Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"',
35 "Sec-Ch-Ua-Mobile": "?0",
36 "Sec-Ch-Ua-Platform": "macOS",
37 "Sec-Ch-Viewport-Width": "1393",
38 "Sec-Fetch-Dest": "document",
39 "Sec-Fetch-Mode": "navigate",
40 "Sec-Fetch-Site": "same-origin",
41 "Sec-Fetch-User": "?1",
42 "Viewport-Width": "1393",
43 "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
44}
45SIGN_IN_FORM_SELECTOR = "form[name='signIn']"
46MFA_DEVICE_SELECT_FORM_SELECTOR = "form[id='auth-select-device-form']"
47MFA_FORM_SELECTOR = "form[id='auth-mfa-form']"
48CAPTCHA_1_FORM_SELECTOR = "form[class*='cvf-widget-form-captcha']"
49CAPTCHA_2_FORM_SELECTOR = "form:has(input[id^='captchacharacters'])"
50CAPTCHA_OTP_FORM_SELECTOR = "form[id='verification-code-form']"
53class IODefault:
54 """
55 Handles input/output from the application. By default, this uses console commands, but
56 this class exists so that it can be overriden when constructing an :class:`AmazonSession`
57 if input/output should be handled another way.
58 """
60 def echo(self,
61 msg,
62 **kwargs):
63 """
64 Echo a message to the console.
66 :param msg: The data to send to output.
67 :param kwargs: Unused by the default implementation.
68 """
69 print(msg)
71 def prompt(self,
72 msg,
73 type=None,
74 **kwargs):
75 """
76 Prompt to the console for user input.
78 :param msg: The data to use as the input prompt.
79 :param type: Unused by the default implementation.
80 :param kwargs: Unused by the default implementation.
81 :return: The user input result.
82 """
83 return input("{}: ".format(msg))
86class AmazonSession:
87 """
88 An interface for interacting with Amazon and authenticating an underlying :class:`requests.Session`. Utilizing
89 this class means session data is maintained between requests. Session data is also persisted after each request,
90 meaning it will also be maintained between separate instantiations of the class or application.
92 To get started, call the :func:`login` function.
93 """
95 def __init__(self,
96 username: str,
97 password: str,
98 debug: bool = False,
99 max_auth_attempts: int = 10,
100 cookie_jar_path: str = None,
101 io: IODefault = IODefault(),
102 output_dir: str = None) -> None:
103 if not cookie_jar_path:
104 cookie_jar_path = DEFAULT_COOKIE_JAR_PATH
105 if not output_dir:
106 output_dir = DEFAULT_OUTPUT_DIR
108 #: An Amazon username.
109 self.username: str = username
110 #: An Amazon password.
111 self.password: str = password
113 #: Set logger ``DEBUG``, send output to ``stderr``, and write an HTML file for each request made on the session.
114 self.debug: bool = debug
115 if self.debug:
116 logger.setLevel(logging.DEBUG)
117 #: Will continue in :func:`login()`'s auth flow this many times (successes and failures).
118 self.max_auth_attempts: int = max_auth_attempts
119 #: The path to persist session cookies, defaults to ``conf.DEFAULT_COOKIE_JAR_PATH``.
120 self.cookie_jar_path: str = cookie_jar_path
121 #: The I/O handler for echoes and prompts.
122 self.io: IODefault = io
123 #: The directory where any output files will be produced, defaults to ``conf.DEFAULT_OUTPUT_DIR``.
124 self.output_dir = output_dir
126 #: The shared session to be used across all requests.
127 self.session: Session = Session()
128 #: The last response executed on the Session.
129 self.last_response: Optional[Response] = None
130 #: A parsed representation of the last response executed on the Session.
131 self.last_response_parsed: Optional[Tag] = None
132 #: If :func:`login()` has been executed and successfully logged in the session.
133 self.is_authenticated: bool = False
135 cookie_dir = os.path.dirname(self.cookie_jar_path)
136 if not os.path.exists(cookie_dir):
137 os.makedirs(cookie_dir)
138 if os.path.exists(self.cookie_jar_path):
139 with open(self.cookie_jar_path, "r", encoding="utf-8") as f:
140 data = json.loads(f.read())
141 cookies = requests.utils.cookiejar_from_dict(data)
142 self.session.cookies.update(cookies)
144 def request(self,
145 method: str,
146 url: str,
147 **kwargs: Any) -> Response:
148 """
149 Execute the request against Amazon with base headers, parsing and storing the response
150 and persisting response cookies.
152 :param method: The request method to execute.
153 :param url: The URL to execute ``method`` on.
154 :param kwargs: Remaining ``kwargs`` will be passed to :func:`requests.request`.
155 :return: The Response from the executed request.
156 """
157 if "headers" not in kwargs:
158 kwargs["headers"] = {}
159 kwargs["headers"].update(BASE_HEADERS)
161 logger.debug("{} request to {}".format(method, url))
163 self.last_response = self.session.request(method, url, **kwargs)
164 self.last_response_parsed = BeautifulSoup(self.last_response.text,
165 "html.parser")
167 cookies = dict_from_cookiejar(self.session.cookies)
168 if os.path.exists(self.cookie_jar_path):
169 os.remove(self.cookie_jar_path)
170 with open(self.cookie_jar_path, "w", encoding="utf-8") as f:
171 f.write(json.dumps(cookies))
173 logger.debug("Response: {} - {}".format(self.last_response.url,
174 self.last_response.status_code))
176 if self.debug:
177 page_name = self._get_page_from_url(self.last_response.url)
178 with open(os.path.join(self.output_dir, page_name), "w",
179 encoding="utf-8") as html_file:
180 logger.debug(
181 "Response written to file: {}".format(html_file.name))
182 html_file.write(self.last_response.text)
184 return self.last_response
186 def get(self,
187 url: str,
188 **kwargs: Any):
189 """
190 Perform a GET request.
192 :param url: The URL to GET on.
193 :param kwargs: Remaining ``kwargs`` will be passed to :func:`AmazonSession.request`.
194 :return: The Response from the executed GET request.
195 """
196 return self.request("GET", url, **kwargs)
198 def post(self,
199 url,
200 **kwargs: Any) -> Response:
201 """
202 Perform a POST request.
204 :param url: The URL to POST on.
205 :param kwargs: Remaining ``kwargs`` will be passed to :func:`AmazonSession.request`.
206 :return: The Response from the executed POST request.
207 """
208 return self.request("POST", url, **kwargs)
210 def auth_cookies_stored(self):
211 cookies = dict_from_cookiejar(self.session.cookies)
212 return cookies.get("session-token") and cookies.get("x-main")
214 def login(self) -> None:
215 """
216 Execute an Amazon login process. This will include the sign-in page, and may also include Captcha challenges
217 and OTP pages (of 2FA authentication is enabled on your account).
219 If successful, ``is_authenticated`` will be set to ``True``.
221 Session cookies are persisted, and if existing session data is found during this auth flow, it will be
222 skipped entirely and flagged as authenticated.
223 """
224 self.get("{}/gp/sign-in.html".format(BASE_URL))
226 # If our local session data is stale, Amazon will redirect us to the sign in page
227 if self.auth_cookies_stored() and self.last_response.url.split("?")[0] == SIGN_IN_URL:
228 self.logout()
229 self.get("{}/gp/sign-in.html".format(BASE_URL))
231 attempts = 0
232 while not self.is_authenticated and attempts < self.max_auth_attempts:
233 if self.auth_cookies_stored() or \
234 ("Hello, sign in" not in self.last_response.text and
235 "nav-item-signout" in self.last_response.text):
236 self.is_authenticated = True
237 break
239 if self.last_response_parsed.select_one(SIGN_IN_FORM_SELECTOR):
240 self._sign_in()
241 elif self.last_response_parsed.select_one(CAPTCHA_1_FORM_SELECTOR):
242 self._captcha_submit(CAPTCHA_1_FORM_SELECTOR,
243 "cvf_captcha_input",
244 "cvf-widget-alert")
245 elif self.last_response_parsed.select_one(CAPTCHA_2_FORM_SELECTOR):
246 self._captcha_submit(CAPTCHA_2_FORM_SELECTOR,
247 "field-keywords",
248 "a-alert-info")
249 elif self.last_response_parsed.select_one(
250 MFA_DEVICE_SELECT_FORM_SELECTOR):
251 self._mfa_device_select()
252 elif self.last_response_parsed.select_one(MFA_FORM_SELECTOR):
253 self._mfa_submit()
254 elif self.last_response_parsed.select_one(
255 CAPTCHA_OTP_FORM_SELECTOR):
256 self._captcha_otp_submit()
257 else:
258 raise AmazonOrdersAuthError(
259 "An error occurred, this is an unknown page, or its parsed contents don't match a known auth flow: {}. To capture the page to a file, set the `debug` flag.".format(
260 self.last_response.url))
262 attempts += 1
264 if attempts == self.max_auth_attempts:
265 raise AmazonOrdersAuthError(
266 "Max authentication flow attempts reached.")
268 def logout(self) -> None:
269 """
270 Logout and close the existing Amazon session and clear cookies.
271 """
272 self.get("{}/gp/sign-out.html".format(BASE_URL))
274 if os.path.exists(self.cookie_jar_path):
275 os.remove(self.cookie_jar_path)
277 self.session.close()
278 self.session = Session()
280 self.is_authenticated = False
282 def _sign_in(self) -> None:
283 form = self.last_response_parsed.select_one(SIGN_IN_FORM_SELECTOR)
284 data = self._build_from_form(form,
285 additional_attrs={"email": self.username,
286 "password": self.password,
287 "rememberMe": "true"})
289 self._submit_form(form, data)
291 self._handle_errors(critical=True)
293 def _mfa_device_select(self) -> None:
294 form = self.last_response_parsed.select_one(
295 MFA_DEVICE_SELECT_FORM_SELECTOR)
296 contexts = form.select("input[name='otpDeviceContext']")
298 i = 1
299 for field in contexts:
300 self.io.echo("{}: {}".format(i, field["value"].strip()))
301 i += 1
302 otp_device = int(
303 self.io.prompt(
304 "--> Enter where you would like your one-time passcode sent",
305 type=int))
306 self.io.echo("")
308 form = self.last_response_parsed.select_one(
309 MFA_DEVICE_SELECT_FORM_SELECTOR)
310 data = self._build_from_form(form,
311 additional_attrs={"otpDeviceContext":
312 contexts[
313 otp_device - 1]["value"]})
315 self._submit_form(form, data)
317 self._handle_errors()
319 def _mfa_submit(self) -> None:
320 otp = self.io.prompt(
321 "--> Enter the one-time passcode sent to your device")
322 self.io.echo("")
324 form = self.last_response_parsed.select_one(MFA_FORM_SELECTOR)
325 data = self._build_from_form(form,
326 additional_attrs={"otpCode": otp,
327 "rememberDevice": ""})
329 self._submit_form(form, data)
331 self._handle_errors()
333 def _captcha_submit(self, form_selector, solution_attr_key,
334 error_div_class) -> None:
335 form = self.last_response_parsed.select_one(form_selector)
337 solution = self._solve_captcha(
338 form.find_parent().select_one("img")["src"])
340 data = self._build_from_form(form,
341 additional_attrs={
342 solution_attr_key: solution})
344 self._submit_form(form, data)
346 self._handle_errors(error_div_class, "class")
348 def _captcha_otp_submit(self) -> None:
349 otp = self.io.prompt(
350 "--> Enter the one-time passcode sent to your device")
351 self.io.echo("")
353 form = self.last_response_parsed.select_one(CAPTCHA_OTP_FORM_SELECTOR)
354 data = self._build_from_form(form,
355 additional_attrs={"otpCode": otp})
357 self._submit_form(form, data)
359 self._handle_errors()
361 def _build_from_form(self,
362 form: Tag,
363 additional_attrs: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
364 data = {}
365 for field in form.select("input"):
366 try:
367 data[field["name"]] = field["value"]
368 except:
369 pass
370 if additional_attrs:
371 data.update(additional_attrs)
372 return data
374 def _get_form_action(self,
375 form: Tag) -> str:
376 action = form.get("action")
377 if not action:
378 return self.last_response.url
379 elif not action.startswith("http"):
380 if action.startswith("/"):
381 parsed_url = urlparse(self.last_response.url)
382 return "{}://{}{}".format(parsed_url.scheme, parsed_url.netloc,
383 action)
384 else:
385 return "{}/{}".format(
386 "/".join(self.last_response.url.split("/")[:-1]), action)
387 else:
388 return action
390 def _get_page_from_url(self,
391 url: str) -> str:
392 page_name = os.path.basename(urlparse(url).path).strip(".html")
393 i = 0
394 while os.path.isfile("{}_{}".format(page_name, 0)):
395 i += 1
396 return "{}_{}.html".format(page_name, i)
398 def _handle_errors(self,
399 error_div: str = "auth-error-message-box",
400 attr_name: str = "id",
401 critical: bool = False) -> None:
402 error_div = self.last_response_parsed.select_one(
403 "div[{}='{}']".format(attr_name, error_div))
404 if error_div:
405 error_msg = "An error occurred: {}\n".format(error_div.text.strip())
407 if critical:
408 raise AmazonOrdersAuthError(error_msg)
409 else:
410 self.io.echo(error_msg, fg="red")
412 def _solve_captcha(self,
413 url: str) -> str:
414 captcha_response = AmazonCaptcha.fromlink(url).solve()
415 if not captcha_response or captcha_response.lower() == "not solved":
416 img_response = self.session.get(url)
417 img = Image.open(BytesIO(img_response.content))
418 img.show()
419 self.io.echo("Info: The Captcha couldn't be auto-solved.")
420 captcha_response = self.io.prompt(
421 "--> Enter the characters shown in the image")
422 self.io.echo("")
424 return captcha_response
426 def _submit_form(self, form, data):
427 method = form.get("method", "GET").upper()
428 action = self._get_form_action(form)
429 request_data = {"params" if method == "GET" else "data": data}
430 self.request(method,
431 action,
432 **request_data)