Coverage for amazonorders/session.py: 89.94%
179 statements
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-18 14:34 +0000
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-18 14:34 +0000
1import json
2import logging
3import os
4from io import BytesIO
5from typing import Optional, Any, Dict
6from urllib.parse import urlparse
8import requests
9from PIL import Image
10from amazoncaptcha import AmazonCaptcha
11from bs4 import BeautifulSoup, Tag
12from requests import Session, Response
13from requests.utils import dict_from_cookiejar
15from amazonorders.exception import AmazonOrdersAuthError
17__author__ = "Alex Laird"
18__copyright__ = "Copyright 2024, Alex Laird"
19__version__ = "1.0.2"
21logger = logging.getLogger(__name__)
23BASE_URL = "https://www.amazon.com"
24BASE_HEADERS = {
25 "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
26 "Accept-Encoding": "gzip, deflate, br",
27 "Accept-Language": "en-US,en;q=0.9",
28 "Cache-Control": "max-age=0",
29 "Content-Type": "application/x-www-form-urlencoded",
30 "Origin": BASE_URL,
31 "Referer": "{}/ap/signin".format(BASE_URL),
32 "Sec-Ch-Ua": '"Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"',
33 "Sec-Ch-Ua-Mobile": "?0",
34 "Sec-Ch-Ua-Platform": "macOS",
35 "Sec-Ch-Viewport-Width": "1393",
36 "Sec-Fetch-Dest": "document",
37 "Sec-Fetch-Mode": "navigate",
38 "Sec-Fetch-Site": "same-origin",
39 "Sec-Fetch-User": "?1",
40 "Viewport-Width": "1393",
41 "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
42}
43SIGN_IN_FORM_NAME = "signIn"
44MFA_DEVICE_SELECT_FORM_ID = "auth-select-device-form"
45MFA_FORM_ID = "auth-mfa-form"
46CAPTCHA_1_DIV_ID = "cvf-page-content"
47CAPTCHA_1_FORM_CLASS = "cvf-widget-form"
48CAPTCHA_2_INPUT_ID = "captchacharacters"
50DEFAULT_COOKIE_JAR_PATH = os.path.join(os.path.expanduser("~"), ".config", "amazon-orders", "cookies.json")
53class IODefault:
54 """
55 Handles input/output from the application. By default, this uses console commands, but
56 this class exists so that it can be overriden when constructing an :class:`AmazonSession`
57 if input/output should be handled another way.
58 """
60 def echo(self, msg):
61 """
62 Echo a message to the console.
64 :param msg: The data to send to output.
65 """
66 print(msg)
68 def prompt(self, msg, type=None):
69 """
70 Prompt to the console for user input.
72 :param msg: The data to use as the input prompt.
73 :param type: Unused by the default implementation.
74 :return: The user input result.
75 """
76 return input("{}: ".format(msg))
79class AmazonSession:
80 """
82 """
84 def __init__(self,
85 username: str,
86 password: str,
87 debug: bool = False,
88 max_auth_attempts: int = 10,
89 cookie_jar_path: str = None,
90 io: IODefault = IODefault()) -> None:
91 if not cookie_jar_path:
92 cookie_jar_path = DEFAULT_COOKIE_JAR_PATH
94 #: An Amazon username.
95 self.username: str = username
96 #: An Amazon password.
97 self.password: str = password
99 #: Set logger ``DEBUG``, send output to ``stderr``, and write an HTML file for each request made on the session.
100 self.debug: bool = debug
101 if self.debug:
102 logger.setLevel(logging.DEBUG)
103 #: Will continue in :func:`login()`'s auth flow this many times.
104 self.max_auth_attempts: int = max_auth_attempts
105 #: The path to persist session cookies, defaults to ``conf.DEFAULT_COOKIE_JAR_PATH``.
106 self.cookie_jar_path: str = cookie_jar_path
107 #: The I/O handler for echoes and prompts.
108 self.io: IODefault = io
110 #: The shared session to be used across all requests.
111 self.session: Session = Session()
112 #: The last response executed on the Session.
113 self.last_response: Optional[Response] = None
114 #: A parsed representation of the last response executed on the Session.
115 self.last_response_parsed: Optional[Tag] = None
116 #: If :func:`login()` has been executed and successfully logged in the session.
117 self.is_authenticated: bool = False
119 cookie_dir = os.path.dirname(self.cookie_jar_path)
120 if not os.path.exists(cookie_dir):
121 os.makedirs(cookie_dir)
122 if os.path.exists(self.cookie_jar_path):
123 with open(cookie_jar_path, "r", encoding="utf-8") as f:
124 data = json.loads(f.read())
125 cookies = requests.utils.cookiejar_from_dict(data)
126 self.session.cookies.update(cookies)
128 def request(self,
129 method: str,
130 url: str,
131 **kwargs: Any) -> Response:
132 """
133 Execute the request against Amazon with base headers, parsing and storing the response
134 and persisting response cookies.
136 :param method: The request method to execute.
137 :param url: The URL to execute ``method`` on.
138 :param kwargs: Remaining ``kwargs`` will be passed to :func:`requests.request`.
139 :return: The Response from the executed request.
140 """
141 if "headers" not in kwargs:
142 kwargs["headers"] = {}
143 kwargs["headers"].update(BASE_HEADERS)
145 logger.debug("{} request to {}".format(method, url))
147 self.last_response = self.session.request(method, url, **kwargs)
148 self.last_response_parsed = BeautifulSoup(self.last_response.text,
149 "html.parser")
151 cookies = dict_from_cookiejar(self.session.cookies)
152 if os.path.exists(self.cookie_jar_path):
153 os.remove(self.cookie_jar_path)
154 with open(self.cookie_jar_path, "w", encoding="utf-8") as f:
155 f.write(json.dumps(cookies))
157 logger.debug("Response: {} - {}".format(self.last_response.url,
158 self.last_response.status_code))
160 if self.debug:
161 page_name = self._get_page_from_url(self.last_response.url)
162 with open(page_name, "w", encoding="utf-8") as html_file:
163 logger.debug(
164 "Response written to file: {}".format(html_file.name))
165 html_file.write(self.last_response.text)
167 return self.last_response
169 def get(self,
170 url: str,
171 **kwargs: Any):
172 """
173 Perform a GET request.
175 :param url: The URL to GET on.
176 :param kwargs: Remaining ``kwargs`` will be passed to :func:`AmazonSession.request`.
177 :return: The Response from the executed GET request.
178 """
179 return self.request("GET", url, **kwargs)
181 def post(self,
182 url,
183 **kwargs: Any) -> Response:
184 """
185 Perform a POST request.
187 :param url: The URL to POST on.
188 :param kwargs: Remaining ``kwargs`` will be passed to :func:`AmazonSession.request`.
189 :return: The Response from the executed POST request.
190 """
191 return self.request("POST", url, **kwargs)
193 def auth_cookies_stored(self):
194 cookies = dict_from_cookiejar(self.session.cookies)
195 return cookies.get("session-token") and cookies.get("x-main")
197 def login(self) -> None:
198 """
199 Execute an Amazon login process. This will include the sign-in page, and may also include Captcha challenges
200 and OTP pages (of 2FA authentication is enabled on your account).
202 If successful, ``is_authenticated`` will be set to ``True``.
204 Session cookies are persisted, and if existing session data is found during this auth flow, it will be
205 skipped entirely and flagged as authenticated.
206 """
207 self.get("{}/gp/sign-in.html".format(BASE_URL))
209 attempts = 0
210 while not self.is_authenticated and attempts < self.max_auth_attempts:
211 if self.auth_cookies_stored() or \
212 ("Hello, sign in" not in self.last_response.text and
213 "nav-item-signout" in self.last_response.text):
214 self.is_authenticated = True
215 break
217 if self._is_field_found(SIGN_IN_FORM_NAME):
218 self._sign_in()
219 elif self._is_field_found(CAPTCHA_1_FORM_CLASS, field_key="class"):
220 self._captcha_1_submit()
221 elif self.last_response_parsed.find("input",
222 id=lambda
223 value: value and value.startswith(
224 CAPTCHA_2_INPUT_ID)):
225 self._captcha_2_submit()
226 elif self._is_field_found(MFA_DEVICE_SELECT_FORM_ID,
227 field_key="id"):
228 self._mfa_device_select()
229 elif self._is_field_found(MFA_FORM_ID, field_key="id"):
230 self._mfa_submit()
231 else:
232 raise AmazonOrdersAuthError(
233 "An error occurred, this is an unknown page: {}. To capture the page to a file, set the `debug` flag.".format(
234 self.last_response.url))
236 attempts += 1
238 if attempts == self.max_auth_attempts:
239 raise AmazonOrdersAuthError(
240 "Max authentication flow attempts reached.")
242 def logout(self) -> None:
243 """
244 Logout and close the existing Amazon session and clear cookies.
245 """
246 self.get("{}/gp/sign-out.html".format(BASE_URL))
248 if os.path.exists(self.cookie_jar_path):
249 os.remove(self.cookie_jar_path)
251 self.session.close()
252 self.session = Session()
254 self.is_authenticated = False
256 def _sign_in(self) -> None:
257 form = self.last_response_parsed.find("form",
258 {"name": SIGN_IN_FORM_NAME})
259 data = self._build_from_form(form,
260 additional_attrs={"email": self.username,
261 "password": self.password,
262 "rememberMe": "true"})
264 self.request(form.attrs.get("method", "GET"),
265 self._get_form_action(form),
266 data=data)
268 self._handle_errors(critical=True)
270 def _mfa_device_select(self) -> None:
271 form = self.last_response_parsed.find("form",
272 {"id": MFA_DEVICE_SELECT_FORM_ID})
273 contexts = form.find_all("input", {"name": "otpDeviceContext"})
275 i = 1
276 for field in contexts:
277 self.io.echo("{}: {}".format(i, field.attrs["value"].strip()))
278 i += 1
279 otp_device = int(
280 self.io.prompt("Enter where you would like your one-time passcode sent", type=int))
282 form = self.last_response_parsed.find("form",
283 id=MFA_DEVICE_SELECT_FORM_ID)
284 data = self._build_from_form(form,
285 additional_attrs={"otpDeviceContext":
286 contexts[
287 otp_device - 1].attrs[
288 "value"]})
290 self.request(form.attrs.get("method", "GET"),
291 self._get_form_action(form),
292 data=data)
294 self._handle_errors()
296 def _mfa_submit(self) -> None:
297 otp = self.io.prompt("Enter the one-time passcode sent to your device")
299 form = self.last_response_parsed.find("form", id=MFA_FORM_ID)
300 data = self._build_from_form(form,
301 additional_attrs={"otpCode": otp,
302 "rememberDevice": ""})
304 self.request(form.attrs.get("method", "GET"),
305 self._get_form_action(form),
306 data=data)
308 self._handle_errors()
310 def _captcha_1_submit(self) -> None:
311 captcha_div = self.last_response_parsed.find("div",
312 {"id": CAPTCHA_1_DIV_ID})
314 solution = self._solve_captcha(
315 captcha_div.find("img", {"alt": "captcha"}).attrs["src"])
317 form = self.last_response_parsed.find("form",
318 {"class": CAPTCHA_1_FORM_CLASS})
319 data = self._build_from_form(form,
320 additional_attrs={
321 "cvf_captcha_input": solution})
323 self.request(form.attrs.get("method", "GET"),
324 self._get_form_action(form,
325 prefix="{}/ap/cvf/".format(
326 BASE_URL)),
327 data=data)
329 self._handle_errors("cvf-widget-alert", "class")
331 def _captcha_2_submit(self) -> None:
332 form = self.last_response_parsed.find("input",
333 id=lambda
334 value: value and value.startswith(
335 CAPTCHA_2_INPUT_ID)).find_parent(
336 "form")
338 solution = self._solve_captcha(form.find("img").attrs["src"])
340 data = self._build_from_form(form,
341 additional_attrs={
342 "field-keywords": solution})
344 self.request(form.attrs.get("method", "GET"),
345 self._get_form_action(form,
346 prefix=BASE_URL),
347 params=data)
349 self._handle_errors("a-alert-info", "class")
351 def _build_from_form(self,
352 form: Tag,
353 additional_attrs: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
354 data = {}
355 for field in form.find_all("input"):
356 try:
357 data[field["name"]] = field["value"]
358 except:
359 pass
360 if additional_attrs:
361 data.update(additional_attrs)
362 return data
364 def _get_form_action(self,
365 form: Tag,
366 prefix: Optional[str] = None) -> str:
367 action = form.attrs.get("action")
368 if not action:
369 action = self.last_response.url
370 if prefix and not action.startswith("http"):
371 action = prefix + action
372 return action
374 def _is_field_found(self,
375 field_value: str,
376 field_type: str = "form",
377 field_key: str = "name") -> bool:
378 return self.last_response_parsed.find(field_type, {
379 field_key: field_value}) is not None
381 def _get_page_from_url(self,
382 url: str) -> str:
383 page_name = os.path.basename(urlparse(url).path).strip(".html")
384 i = 0
385 while os.path.isfile("{}_{}".format(page_name, 0)):
386 i += 1
387 return "{}_{}.html".format(page_name, i)
389 def _handle_errors(self,
390 error_div: str = "auth-error-message-box",
391 attr_name: str = "id",
392 critical: bool = False) -> None:
393 error_div = self.last_response_parsed.find("div",
394 {attr_name: error_div})
395 if error_div:
396 error_msg = "An error occurred: {}".format(error_div.text.strip())
398 if critical:
399 raise AmazonOrdersAuthError(error_msg)
400 else:
401 self.io.echo(error_msg)
403 def _solve_captcha(self,
404 url: str) -> str:
405 captcha_response = AmazonCaptcha.fromlink(url).solve()
406 if not captcha_response or captcha_response.lower() == "not solved":
407 img_response = self.session.get(url)
408 img = Image.open(BytesIO(img_response.content))
409 img.show()
410 self.io.echo("The Captcha couldn't be auto-solved.")
411 captcha_response = self.io.prompt("Enter the characters shown in the image")
413 return captcha_response