Coverage for amazonorders/session.py: 93.98%
166 statements
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-17 01:52 +0000
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-17 01:52 +0000
1import json
2import logging
3import os
4from io import BytesIO
5from typing import Optional, Any, Dict
6from urllib.parse import urlparse
8import requests
9from PIL import Image
10from amazoncaptcha import AmazonCaptcha
11from bs4 import BeautifulSoup, Tag
12from requests import Session, Response
14from amazonorders.exception import AmazonOrdersAuthError
16__author__ = "Alex Laird"
17__copyright__ = "Copyright 2024, Alex Laird"
18__version__ = "1.0.0"
20logger = logging.getLogger(__name__)
22BASE_URL = "https://www.amazon.com"
23BASE_HEADERS = {
24 "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
25 "Accept-Encoding": "gzip, deflate, br",
26 "Accept-Language": "en-US,en;q=0.9",
27 "Cache-Control": "max-age=0",
28 "Content-Type": "application/x-www-form-urlencoded",
29 "Origin": BASE_URL,
30 "Referer": "{}/ap/signin".format(BASE_URL),
31 "Sec-Ch-Ua": '"Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"',
32 "Sec-Ch-Ua-Mobile": "?0",
33 "Sec-Ch-Ua-Platform": "macOS",
34 "Sec-Ch-Viewport-Width": "1393",
35 "Sec-Fetch-Dest": "document",
36 "Sec-Fetch-Mode": "navigate",
37 "Sec-Fetch-Site": "same-origin",
38 "Sec-Fetch-User": "?1",
39 "Viewport-Width": "1393",
40 "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
41}
42SIGN_IN_FORM_NAME = "signIn"
43MFA_DEVICE_SELECT_FORM_ID = "auth-select-device-form"
44MFA_FORM_ID = "auth-mfa-form"
45CAPTCHA_1_DIV_ID = "cvf-page-content"
46CAPTCHA_1_FORM_CLASS = "cvf-widget-form"
47CAPTCHA_2_INPUT_ID = "captchacharacters"
49DEFAULT_COOKIE_JAR_PATH = os.path.join(os.path.expanduser("~"), ".config", "amazon-orders", "cookies.json")
52class AmazonSession:
53 """
55 """
57 def __init__(self,
58 username: str,
59 password: str,
60 debug: bool = False,
61 max_auth_attempts: int = 10,
62 cookie_jar_path: str = None) -> None:
63 if not cookie_jar_path:
64 cookie_jar_path = DEFAULT_COOKIE_JAR_PATH
66 #:
67 self.username: str = username
68 #:
69 self.password: str = password
71 #: Set logger ``DEBUG``, send output to ``stderr``, and write an HTML file for each request made on the session.
72 self.debug: bool = debug
73 if self.debug:
74 logger.setLevel(logging.DEBUG)
75 #:
76 self.max_auth_attempts: int = max_auth_attempts
77 #: The path to persist session cookies, defaults to ``conf.DEFAULT_COOKIE_JAR_PATH``.
78 self.cookie_jar_path: str = cookie_jar_path
80 #:
81 self.session: Session = Session()
82 #:
83 self.last_response: Optional[Response] = None
84 #:
85 self.last_response_parsed: Optional[Tag] = None
86 #: If :func:`login()` has been executed and successfully logged in the session.
87 self.is_authenticated: bool = False
89 cookie_dir = os.path.dirname(self.cookie_jar_path)
90 if not os.path.exists(cookie_dir):
91 os.makedirs(cookie_dir)
92 if os.path.exists(self.cookie_jar_path):
93 with open(cookie_jar_path, "r", encoding="utf-8") as f:
94 data = json.loads(f.read())
95 cookies = requests.utils.cookiejar_from_dict(data)
96 self.session.cookies.update(cookies)
98 def request(self,
99 method: str,
100 url: str,
101 **kwargs: Any) -> Response:
102 """
104 :param method: The request method to execute.
105 :param url: The URL to execute ``method`` on.
106 :param kwargs: Remaining ``kwargs`` will be passed to :func:`requests.request`.
107 :return: The Response from the executed request.
108 """
109 if "headers" not in kwargs:
110 kwargs["headers"] = {}
111 kwargs["headers"].update(BASE_HEADERS)
113 logger.debug("{} request to {}".format(method, url))
115 self.last_response = self.session.request(method, url, **kwargs)
116 self.last_response_parsed = BeautifulSoup(self.last_response.text,
117 "html.parser")
119 cookies = requests.utils.dict_from_cookiejar(self.session.cookies)
120 if os.path.exists(self.cookie_jar_path):
121 os.remove(self.cookie_jar_path)
122 with open(self.cookie_jar_path, "w", encoding="utf-8") as f:
123 f.write(json.dumps(cookies))
125 logger.debug("Response: {} - {}".format(self.last_response.url,
126 self.last_response.status_code))
128 if self.debug:
129 page_name = self._get_page_from_url(self.last_response.url)
130 with open(page_name, "w", encoding="utf-8") as html_file:
131 logger.debug(
132 "Response written to file: {}".format(html_file.name))
133 html_file.write(self.last_response.text)
135 return self.last_response
137 def get(self,
138 url: str,
139 **kwargs: Any):
140 """
142 :param url: The URL to GET on.
143 :param kwargs: Remaining ``kwargs`` will be passed to :func:`AmazonSession.request`.
144 :return: The Response from the executed GET request.
145 """
146 return self.request("GET", url, **kwargs)
148 def post(self,
149 url,
150 **kwargs: Any) -> Response:
151 """
153 :param url: The URL to POST on.
154 :param kwargs: Remaining ``kwargs`` will be passed to :func:`AmazonSession.request`.
155 :return: The Response from the executed POST request.
156 """
157 return self.request("POST", url, **kwargs)
159 def login(self) -> None:
160 """
161 Execute an Amazon login process. This will include the sign-in page, and may also include Captcha challenges
162 and OTP pages (of 2FA authentication is enabled on your account).
164 If successful, ``is_authenciated`` will be set to ``True``.
165 """
166 self.get("{}/gp/sign-in.html".format(BASE_URL))
168 attempts = 0
169 while not self.is_authenticated and attempts < self.max_auth_attempts:
170 if "Hello, sign in" not in self.last_response.text and "nav-item-signout" in self.last_response.text:
171 self.is_authenticated = True
172 break
174 if self._is_field_found(SIGN_IN_FORM_NAME):
175 self._sign_in()
176 elif self._is_field_found(CAPTCHA_1_FORM_CLASS, field_key="class"):
177 self._captcha_1_submit()
178 elif self.last_response_parsed.find("input",
179 id=lambda
180 value: value and value.startswith(
181 CAPTCHA_2_INPUT_ID)):
182 self._captcha_2_submit()
183 elif self._is_field_found(MFA_DEVICE_SELECT_FORM_ID,
184 field_key="id"):
185 self._mfa_device_select()
186 elif self._is_field_found(MFA_FORM_ID, field_key="id"):
187 self._mfa_submit()
188 else:
189 raise AmazonOrdersAuthError(
190 "An error occurred, this is an unknown page: {}. To capture the page to a file, set the `debug` flag.".format(
191 self.last_response.url))
193 attempts += 1
195 if attempts == self.max_auth_attempts:
196 raise AmazonOrdersAuthError(
197 "Max authentication flow attempts reached.")
199 def logout(self) -> None:
200 """
202 """
203 self.get("{}/gp/sign-out.html".format(BASE_URL))
205 self.close()
207 def close(self) -> None:
208 """
210 """
211 self.session.close()
213 def _sign_in(self) -> None:
214 form = self.last_response_parsed.find("form",
215 {"name": SIGN_IN_FORM_NAME})
216 data = self._build_from_form(form,
217 additional_attrs={"email": self.username,
218 "password": self.password,
219 "rememberMe": "true"})
221 self.request(form.attrs.get("method", "GET"),
222 self._get_form_action(form),
223 data=data)
225 self._handle_errors(critical=True)
227 def _mfa_device_select(self) -> None:
228 form = self.last_response_parsed.find("form",
229 {"id": MFA_DEVICE_SELECT_FORM_ID})
230 contexts = form.find_all("input", {"name": "otpDeviceContext"})
231 i = 1
232 for field in contexts:
233 print("{}: {}".format(i, field.attrs["value"].strip()))
234 i += 1
235 otp_device = int(
236 input("Where would you like your one-time passcode sent? "))
238 form = self.last_response_parsed.find("form",
239 id=MFA_DEVICE_SELECT_FORM_ID)
240 data = self._build_from_form(form,
241 additional_attrs={"otpDeviceContext":
242 contexts[
243 otp_device - 1].attrs[
244 "value"]})
246 self.request(form.attrs.get("method", "GET"),
247 self._get_form_action(form),
248 data=data)
250 self._handle_errors()
252 def _mfa_submit(self) -> None:
253 otp = input("Enter the one-time passcode sent to your device: ")
255 form = self.last_response_parsed.find("form", id=MFA_FORM_ID)
256 data = self._build_from_form(form,
257 additional_attrs={"otpCode": otp,
258 "rememberDevice": ""})
260 self.request(form.attrs.get("method", "GET"),
261 self._get_form_action(form),
262 data=data)
264 self._handle_errors()
266 def _captcha_1_submit(self) -> None:
267 captcha_div = self.last_response_parsed.find("div",
268 {"id": CAPTCHA_1_DIV_ID})
270 solution = self._solve_captcha(
271 captcha_div.find("img", {"alt": "captcha"}).attrs["src"])
273 form = self.last_response_parsed.find("form",
274 {"class": CAPTCHA_1_FORM_CLASS})
275 data = self._build_from_form(form,
276 additional_attrs={
277 "cvf_captcha_input": solution})
279 self.request(form.attrs.get("method", "GET"),
280 self._get_form_action(form,
281 prefix="{}/ap/cvf/".format(
282 BASE_URL)),
283 data=data)
285 self._handle_errors("cvf-widget-alert", "class")
287 def _captcha_2_submit(self) -> None:
288 form = self.last_response_parsed.find("input",
289 id=lambda
290 value: value and value.startswith(
291 CAPTCHA_2_INPUT_ID)).find_parent(
292 "form")
294 solution = self._solve_captcha(form.find("img").attrs["src"])
296 data = self._build_from_form(form,
297 additional_attrs={
298 "field-keywords": solution})
300 self.request(form.attrs.get("method", "GET"),
301 self._get_form_action(form,
302 prefix=BASE_URL),
303 params=data)
305 self._handle_errors("a-alert-info", "class")
307 def _build_from_form(self,
308 form: Tag,
309 additional_attrs: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
310 data = {}
311 for field in form.find_all("input"):
312 try:
313 data[field["name"]] = field["value"]
314 except:
315 pass
316 if additional_attrs:
317 data.update(additional_attrs)
318 return data
320 def _get_form_action(self,
321 form: Tag,
322 prefix: Optional[str] = None) -> str:
323 action = form.attrs.get("action")
324 if not action:
325 action = self.last_response.url
326 if prefix and not action.startswith("http"):
327 action = prefix + action
328 return action
330 def _is_field_found(self,
331 field_value: str,
332 field_type: str = "form",
333 field_key: str = "name") -> bool:
334 return self.last_response_parsed.find(field_type, {
335 field_key: field_value}) is not None
337 def _get_page_from_url(self,
338 url: str) -> str:
339 page_name = os.path.basename(urlparse(url).path).strip(".html")
340 i = 0
341 while os.path.isfile("{}_{}".format(page_name, 0)):
342 i += 1
343 return "{}_{}.html".format(page_name, i)
345 def _handle_errors(self,
346 error_div: str = "auth-error-message-box",
347 attr_name: str = "id",
348 critical: bool = False) -> None:
349 error_div = self.last_response_parsed.find("div",
350 {attr_name: error_div})
351 if error_div:
352 error_msg = "An error occurred: {}".format(error_div.text.strip())
354 if critical:
355 raise AmazonOrdersAuthError(error_msg)
356 else:
357 print(error_msg)
359 def _solve_captcha(self,
360 url: str) -> str:
361 captcha_response = AmazonCaptcha.fromlink(url).solve()
362 if not captcha_response or captcha_response.lower() == "not solved":
363 img_response = self.session.get(url)
364 img = Image.open(BytesIO(img_response.content))
365 img.show()
366 captcha_response = input(
367 "The Captcha couldn't be auto-solved, enter the characters shown in the image: ")
369 return captcha_response