Coverage for amazonorders/session.py: 73.28%
116 statements
« prev ^ index » next coverage.py v7.4.1, created at 2024-02-07 21:56 +0000
« prev ^ index » next coverage.py v7.4.1, created at 2024-02-07 21:56 +0000
1import json
2import logging
3import os
4from typing import Optional, Any
5from urllib.parse import urlparse
7import requests
8from bs4 import BeautifulSoup, Tag
9from requests import Session, Response
10from requests.utils import dict_from_cookiejar
12from amazonorders import constants
13from amazonorders.conf import DEFAULT_COOKIE_JAR_PATH, DEFAULT_OUTPUT_DIR
14from amazonorders.exception import AmazonOrdersAuthError
15from amazonorders.forms import SignInForm, MfaDeviceSelectForm, MfaForm, CaptchaForm
17__author__ = "Alex Laird"
18__copyright__ = "Copyright 2024, Alex Laird"
19__version__ = "1.0.9"
21logger = logging.getLogger(__name__)
23AUTH_FORMS = [SignInForm(),
24 MfaDeviceSelectForm(),
25 MfaForm(),
26 CaptchaForm(),
27 CaptchaForm(constants.CAPTCHA_2_FORM_SELECTOR, constants.CAPTCHA_2_ERROR_SELECTOR, "field-keywords"),
28 MfaForm(constants.CAPTCHA_OTP_FORM_SELECTOR)]
31class IODefault:
32 """
33 Handles input/output from the application. By default, this uses console commands, but
34 this class exists so that it can be overriden when constructing an :class:`AmazonSession`
35 if input/output should be handled another way.
36 """
38 def echo(self,
39 msg: str,
40 **kwargs: Any):
41 """
42 Echo a message to the console.
44 :param msg: The data to send to output.
45 :param kwargs: Unused by the default implementation.
46 """
47 print(msg)
49 def prompt(self,
50 msg: str,
51 type: str = None,
52 **kwargs: Any):
53 """
54 Prompt to the console for user input.
56 :param msg: The data to use as the input prompt.
57 :param type: Unused by the default implementation.
58 :param kwargs: Unused by the default implementation.
59 :return: The user input result.
60 """
61 return input("--> {}: ".format(msg))
64class AmazonSession:
65 """
66 An interface for interacting with Amazon and authenticating an underlying :class:`requests.Session`. Utilizing
67 this class means session data is maintained between requests. Session data is also persisted after each request,
68 meaning it will also be maintained between separate instantiations of the class or application.
70 To get started, call the :func:`login` function.
71 """
73 def __init__(self,
74 username: str,
75 password: str,
76 debug: bool = False,
77 max_auth_attempts: int = 10,
78 cookie_jar_path: str = None,
79 io: IODefault = IODefault(),
80 output_dir: str = None) -> None:
81 if not cookie_jar_path:
82 cookie_jar_path = DEFAULT_COOKIE_JAR_PATH
83 if not output_dir:
84 output_dir = DEFAULT_OUTPUT_DIR
86 #: An Amazon username.
87 self.username: str = username
88 #: An Amazon password.
89 self.password: str = password
91 #: Set logger ``DEBUG``, send output to ``stderr``, and write an HTML file for each request made on the session.
92 self.debug: bool = debug
93 if self.debug:
94 logger.setLevel(logging.DEBUG)
95 #: Will continue in :func:`login`'s auth flow this many times (successes and failures).
96 self.max_auth_attempts: int = max_auth_attempts
97 #: The path to persist session cookies, defaults to ``conf.DEFAULT_COOKIE_JAR_PATH``.
98 self.cookie_jar_path: str = cookie_jar_path
99 #: The I/O handler for echoes and prompts.
100 self.io: IODefault = io
101 #: The directory where any output files will be produced, defaults to ``conf.DEFAULT_OUTPUT_DIR``.
102 self.output_dir = output_dir
104 #: The shared session to be used across all requests.
105 self.session: Session = Session()
106 #: The last response executed on the Session.
107 self.last_response: Optional[Response] = None
108 #: A parsed representation of the last response executed on the Session.
109 self.last_response_parsed: Optional[Tag] = None
110 #: If :func:`login` has been executed and successfully logged in the session.
111 self.is_authenticated: bool = False
113 cookie_dir = os.path.dirname(self.cookie_jar_path)
114 if not os.path.exists(cookie_dir):
115 os.makedirs(cookie_dir)
116 if os.path.exists(self.cookie_jar_path):
117 with open(self.cookie_jar_path, "r", encoding="utf-8") as f:
118 data = json.loads(f.read())
119 cookies = requests.utils.cookiejar_from_dict(data)
120 self.session.cookies.update(cookies)
122 def request(self,
123 method: str,
124 url: str,
125 **kwargs: Any) -> Response:
126 """
127 Execute the request against Amazon with base headers, parsing and storing the response
128 and persisting response cookies.
130 :param method: The request method to execute.
131 :param url: The URL to execute ``method`` on.
132 :param kwargs: Remaining ``kwargs`` will be passed to :func:`requests.request`.
133 :return: The Response from the executed request.
134 """
135 if "headers" not in kwargs:
136 kwargs["headers"] = {}
137 kwargs["headers"].update(constants.BASE_HEADERS)
139 logger.debug("{} request to {}".format(method, url))
141 self.last_response = self.session.request(method, url, **kwargs)
142 self.last_response_parsed = BeautifulSoup(self.last_response.text,
143 "html.parser")
145 cookies = dict_from_cookiejar(self.session.cookies)
146 if os.path.exists(self.cookie_jar_path):
147 os.remove(self.cookie_jar_path)
148 with open(self.cookie_jar_path, "w", encoding="utf-8") as f:
149 f.write(json.dumps(cookies))
151 logger.debug("Response: {} - {}".format(self.last_response.url,
152 self.last_response.status_code))
154 if self.debug:
155 page_name = self._get_page_from_url(self.last_response.url)
156 with open(os.path.join(self.output_dir, page_name), "w",
157 encoding="utf-8") as html_file:
158 logger.debug(
159 "Response written to file: {}".format(html_file.name))
160 html_file.write(self.last_response.text)
162 return self.last_response
164 def get(self,
165 url: str,
166 **kwargs: Any):
167 """
168 Perform a GET request.
170 :param url: The URL to GET on.
171 :param kwargs: Remaining ``kwargs`` will be passed to :func:`AmazonSession.request`.
172 :return: The Response from the executed GET request.
173 """
174 return self.request("GET", url, **kwargs)
176 def post(self,
177 url,
178 **kwargs: Any) -> Response:
179 """
180 Perform a POST request.
182 :param url: The URL to POST on.
183 :param kwargs: Remaining ``kwargs`` will be passed to :func:`AmazonSession.request`.
184 :return: The Response from the executed POST request.
185 """
186 return self.request("POST", url, **kwargs)
188 def auth_cookies_stored(self):
189 cookies = dict_from_cookiejar(self.session.cookies)
190 return cookies.get("session-token") and cookies.get("x-main")
192 def login(self) -> None:
193 """
194 Execute an Amazon login process. This will include the sign-in page, and may also include Captcha challenges
195 and OTP pages (of 2FA authentication is enabled on your account).
197 If successful, ``is_authenticated`` will be set to ``True``.
199 Session cookies are persisted, and if existing session data is found during this auth flow, it will be
200 skipped entirely and flagged as authenticated.
201 """
202 self.get(constants.SIGN_IN_URL)
204 # If our local session data is stale, Amazon will redirect us to the signin page
205 if self.auth_cookies_stored() and self.last_response.url.split("?")[0] == constants.SIGN_IN_REDIRECT_URL:
206 self.logout()
207 self.get(constants.SIGN_IN_URL)
209 attempts = 0
210 while not self.is_authenticated and attempts < self.max_auth_attempts:
211 # TODO: BeautifulSoup doesn't let us query for #nav-item-signout, maybe because it's dynamic on the page, but we should find a better way to do this
212 if self.auth_cookies_stored() or \
213 ("Hello, sign in" not in self.last_response.text and
214 "nav-item-signout" in self.last_response.text):
215 self.is_authenticated = True
216 break
218 form_found = False
219 for form in AUTH_FORMS:
220 if form.select_form(self, self.last_response_parsed):
221 form_found = True
223 form.fill_form()
224 form.submit()
226 break
228 if not form_found:
229 self._raise_auth_error()
231 attempts += 1
233 if attempts == self.max_auth_attempts:
234 raise AmazonOrdersAuthError(
235 "Max authentication flow attempts reached.")
237 def logout(self) -> None:
238 """
239 Logout and close the existing Amazon session and clear cookies.
240 """
241 self.get(constants.SIGN_OUT_URL)
243 if os.path.exists(self.cookie_jar_path):
244 os.remove(self.cookie_jar_path)
246 self.session.close()
247 self.session = Session()
249 self.is_authenticated = False
251 def _get_page_from_url(self,
252 url: str) -> str:
253 page_name = os.path.splitext(os.path.basename(urlparse(url).path))[0]
254 if not page_name:
255 page_name = "index"
257 i = 0
258 filename_frmt = "{}_{}.html"
259 while os.path.isfile(filename_frmt.format(page_name, i)):
260 i += 1
261 return filename_frmt.format(page_name, i)
263 def _raise_auth_error(self):
264 debug_str = " To capture the page to a file, set the `debug` flag." if not self.debug else ""
265 if self.last_response.ok:
266 error_msg = ("An error occurred, this is an unknown page, or its parsed contents don't match a "
267 "known auth flow: {}.{}").format(self.last_response.url, debug_str)
268 else:
269 error_msg = "An error occurred, the page {} returned {}.".format(self.last_response.url,
270 self.last_response.status_code)
271 if 500 <= self.last_response.status_code < 600:
272 error_msg += (" Amazon had an issue on their end, or may be temporarily blocking your requests. "
273 "Wait a bit before trying again.").format(self.last_response.url,
274 self.last_response.status_code)
276 error_msg += debug_str
278 raise AmazonOrdersAuthError(error_msg)