Coverage for amazonorders/session.py: 89.34%

197 statements  

« prev     ^ index     » next       coverage.py v7.4.0, created at 2024-01-25 22:50 +0000

1import json 

2import logging 

3import os 

4from io import BytesIO 

5from typing import Optional, Any, Dict 

6from urllib.parse import urlparse 

7 

8import requests 

9from PIL import Image 

10from amazoncaptcha import AmazonCaptcha 

11from bs4 import BeautifulSoup, Tag 

12from requests import Session, Response 

13from requests.utils import dict_from_cookiejar 

14 

15from amazonorders.conf import DEFAULT_COOKIE_JAR_PATH, DEFAULT_OUTPUT_DIR 

16from amazonorders.exception import AmazonOrdersAuthError 

17 

18__author__ = "Alex Laird" 

19__copyright__ = "Copyright 2024, Alex Laird" 

20__version__ = "1.0.6" 

21 

22logger = logging.getLogger(__name__) 

23 

24BASE_URL = "https://www.amazon.com" 

25SIGN_IN_URL = "{}/ap/signin".format(BASE_URL) 

26BASE_HEADERS = { 

27 "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7", 

28 "Accept-Encoding": "gzip, deflate, br", 

29 "Accept-Language": "en-US,en;q=0.9", 

30 "Cache-Control": "max-age=0", 

31 "Content-Type": "application/x-www-form-urlencoded", 

32 "Origin": BASE_URL, 

33 "Referer": SIGN_IN_URL, 

34 "Sec-Ch-Ua": '"Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"', 

35 "Sec-Ch-Ua-Mobile": "?0", 

36 "Sec-Ch-Ua-Platform": "macOS", 

37 "Sec-Ch-Viewport-Width": "1393", 

38 "Sec-Fetch-Dest": "document", 

39 "Sec-Fetch-Mode": "navigate", 

40 "Sec-Fetch-Site": "same-origin", 

41 "Sec-Fetch-User": "?1", 

42 "Viewport-Width": "1393", 

43 "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", 

44} 

45SIGN_IN_FORM_SELECTOR = "form[name='signIn']" 

46MFA_DEVICE_SELECT_FORM_SELECTOR = "form[id='auth-select-device-form']" 

47MFA_FORM_SELECTOR = "form[id='auth-mfa-form']" 

48CAPTCHA_1_FORM_SELECTOR = "form[class*='cvf-widget-form-captcha']" 

49CAPTCHA_2_FORM_SELECTOR = "form:has(input[id^='captchacharacters'])" 

50CAPTCHA_OTP_FORM_SELECTOR = "form[id='verification-code-form']" 

51 

52 

53class IODefault: 

54 """ 

55 Handles input/output from the application. By default, this uses console commands, but 

56 this class exists so that it can be overriden when constructing an :class:`AmazonSession` 

57 if input/output should be handled another way. 

58 """ 

59 

60 def echo(self, 

61 msg, 

62 **kwargs): 

63 """ 

64 Echo a message to the console. 

65 

66 :param msg: The data to send to output. 

67 :param kwargs: Unused by the default implementation. 

68 """ 

69 print(msg) 

70 

71 def prompt(self, 

72 msg, 

73 type=None, 

74 **kwargs): 

75 """ 

76 Prompt to the console for user input. 

77 

78 :param msg: The data to use as the input prompt. 

79 :param type: Unused by the default implementation. 

80 :param kwargs: Unused by the default implementation. 

81 :return: The user input result. 

82 """ 

83 return input("{}: ".format(msg)) 

84 

85 

86class AmazonSession: 

87 """ 

88 An interface for interacting with Amazon and authenticating an underlying :class:`requests.Session`. Utilizing 

89 this class means session data is maintained between requests. Session data is also persisted after each request, 

90 meaning it will also be maintained between separate instantiations of the class or application. 

91 

92 To get started, call the :func:`login` function. 

93 """ 

94 

95 def __init__(self, 

96 username: str, 

97 password: str, 

98 debug: bool = False, 

99 max_auth_attempts: int = 10, 

100 cookie_jar_path: str = None, 

101 io: IODefault = IODefault(), 

102 output_dir: str = None) -> None: 

103 if not cookie_jar_path: 

104 cookie_jar_path = DEFAULT_COOKIE_JAR_PATH 

105 if not output_dir: 

106 output_dir = DEFAULT_OUTPUT_DIR 

107 

108 #: An Amazon username. 

109 self.username: str = username 

110 #: An Amazon password. 

111 self.password: str = password 

112 

113 #: Set logger ``DEBUG``, send output to ``stderr``, and write an HTML file for each request made on the session. 

114 self.debug: bool = debug 

115 if self.debug: 

116 logger.setLevel(logging.DEBUG) 

117 #: Will continue in :func:`login()`'s auth flow this many times (successes and failures). 

118 self.max_auth_attempts: int = max_auth_attempts 

119 #: The path to persist session cookies, defaults to ``conf.DEFAULT_COOKIE_JAR_PATH``. 

120 self.cookie_jar_path: str = cookie_jar_path 

121 #: The I/O handler for echoes and prompts. 

122 self.io: IODefault = io 

123 #: The directory where any output files will be produced, defaults to ``conf.DEFAULT_OUTPUT_DIR``. 

124 self.output_dir = output_dir 

125 

126 #: The shared session to be used across all requests. 

127 self.session: Session = Session() 

128 #: The last response executed on the Session. 

129 self.last_response: Optional[Response] = None 

130 #: A parsed representation of the last response executed on the Session. 

131 self.last_response_parsed: Optional[Tag] = None 

132 #: If :func:`login()` has been executed and successfully logged in the session. 

133 self.is_authenticated: bool = False 

134 

135 cookie_dir = os.path.dirname(self.cookie_jar_path) 

136 if not os.path.exists(cookie_dir): 

137 os.makedirs(cookie_dir) 

138 if os.path.exists(self.cookie_jar_path): 

139 with open(self.cookie_jar_path, "r", encoding="utf-8") as f: 

140 data = json.loads(f.read()) 

141 cookies = requests.utils.cookiejar_from_dict(data) 

142 self.session.cookies.update(cookies) 

143 

144 def request(self, 

145 method: str, 

146 url: str, 

147 **kwargs: Any) -> Response: 

148 """ 

149 Execute the request against Amazon with base headers, parsing and storing the response 

150 and persisting response cookies. 

151 

152 :param method: The request method to execute. 

153 :param url: The URL to execute ``method`` on. 

154 :param kwargs: Remaining ``kwargs`` will be passed to :func:`requests.request`. 

155 :return: The Response from the executed request. 

156 """ 

157 if "headers" not in kwargs: 

158 kwargs["headers"] = {} 

159 kwargs["headers"].update(BASE_HEADERS) 

160 

161 logger.debug("{} request to {}".format(method, url)) 

162 

163 self.last_response = self.session.request(method, url, **kwargs) 

164 self.last_response_parsed = BeautifulSoup(self.last_response.text, 

165 "html.parser") 

166 

167 cookies = dict_from_cookiejar(self.session.cookies) 

168 if os.path.exists(self.cookie_jar_path): 

169 os.remove(self.cookie_jar_path) 

170 with open(self.cookie_jar_path, "w", encoding="utf-8") as f: 

171 f.write(json.dumps(cookies)) 

172 

173 logger.debug("Response: {} - {}".format(self.last_response.url, 

174 self.last_response.status_code)) 

175 

176 if self.debug: 

177 page_name = self._get_page_from_url(self.last_response.url) 

178 with open(os.path.join(self.output_dir, page_name), "w", 

179 encoding="utf-8") as html_file: 

180 logger.debug( 

181 "Response written to file: {}".format(html_file.name)) 

182 html_file.write(self.last_response.text) 

183 

184 return self.last_response 

185 

186 def get(self, 

187 url: str, 

188 **kwargs: Any): 

189 """ 

190 Perform a GET request. 

191 

192 :param url: The URL to GET on. 

193 :param kwargs: Remaining ``kwargs`` will be passed to :func:`AmazonSession.request`. 

194 :return: The Response from the executed GET request. 

195 """ 

196 return self.request("GET", url, **kwargs) 

197 

198 def post(self, 

199 url, 

200 **kwargs: Any) -> Response: 

201 """ 

202 Perform a POST request. 

203 

204 :param url: The URL to POST on. 

205 :param kwargs: Remaining ``kwargs`` will be passed to :func:`AmazonSession.request`. 

206 :return: The Response from the executed POST request. 

207 """ 

208 return self.request("POST", url, **kwargs) 

209 

210 def auth_cookies_stored(self): 

211 cookies = dict_from_cookiejar(self.session.cookies) 

212 return cookies.get("session-token") and cookies.get("x-main") 

213 

214 def login(self) -> None: 

215 """ 

216 Execute an Amazon login process. This will include the sign-in page, and may also include Captcha challenges 

217 and OTP pages (of 2FA authentication is enabled on your account). 

218 

219 If successful, ``is_authenticated`` will be set to ``True``. 

220 

221 Session cookies are persisted, and if existing session data is found during this auth flow, it will be 

222 skipped entirely and flagged as authenticated. 

223 """ 

224 self.get("{}/gp/sign-in.html".format(BASE_URL)) 

225 

226 # If our local session data is stale, Amazon will redirect us to the sign in page 

227 if self.auth_cookies_stored() and self.last_response.url.split("?")[0] == SIGN_IN_URL: 

228 self.logout() 

229 self.get("{}/gp/sign-in.html".format(BASE_URL)) 

230 

231 attempts = 0 

232 while not self.is_authenticated and attempts < self.max_auth_attempts: 

233 if self.auth_cookies_stored() or \ 

234 ("Hello, sign in" not in self.last_response.text and 

235 "nav-item-signout" in self.last_response.text): 

236 self.is_authenticated = True 

237 break 

238 

239 if self.last_response_parsed.select_one(SIGN_IN_FORM_SELECTOR): 

240 self._sign_in() 

241 elif self.last_response_parsed.select_one(CAPTCHA_1_FORM_SELECTOR): 

242 self._captcha_submit(CAPTCHA_1_FORM_SELECTOR, 

243 "cvf_captcha_input", 

244 "cvf-widget-alert") 

245 elif self.last_response_parsed.select_one(CAPTCHA_2_FORM_SELECTOR): 

246 self._captcha_submit(CAPTCHA_2_FORM_SELECTOR, 

247 "field-keywords", 

248 "a-alert-info") 

249 elif self.last_response_parsed.select_one( 

250 MFA_DEVICE_SELECT_FORM_SELECTOR): 

251 self._mfa_device_select() 

252 elif self.last_response_parsed.select_one(MFA_FORM_SELECTOR): 

253 self._mfa_submit() 

254 elif self.last_response_parsed.select_one( 

255 CAPTCHA_OTP_FORM_SELECTOR): 

256 self._captcha_otp_submit() 

257 else: 

258 raise AmazonOrdersAuthError( 

259 "An error occurred, this is an unknown page, or its parsed contents don't match a known auth flow: {}. To capture the page to a file, set the `debug` flag.".format( 

260 self.last_response.url)) 

261 

262 attempts += 1 

263 

264 if attempts == self.max_auth_attempts: 

265 raise AmazonOrdersAuthError( 

266 "Max authentication flow attempts reached.") 

267 

268 def logout(self) -> None: 

269 """ 

270 Logout and close the existing Amazon session and clear cookies. 

271 """ 

272 self.get("{}/gp/sign-out.html".format(BASE_URL)) 

273 

274 if os.path.exists(self.cookie_jar_path): 

275 os.remove(self.cookie_jar_path) 

276 

277 self.session.close() 

278 self.session = Session() 

279 

280 self.is_authenticated = False 

281 

282 def _sign_in(self) -> None: 

283 form = self.last_response_parsed.select_one(SIGN_IN_FORM_SELECTOR) 

284 data = self._build_from_form(form, 

285 additional_attrs={"email": self.username, 

286 "password": self.password, 

287 "rememberMe": "true"}) 

288 

289 self._submit_form(form, data) 

290 

291 self._handle_errors(critical=True) 

292 

293 def _mfa_device_select(self) -> None: 

294 form = self.last_response_parsed.select_one( 

295 MFA_DEVICE_SELECT_FORM_SELECTOR) 

296 contexts = form.select("input[name='otpDeviceContext']") 

297 

298 i = 1 

299 for field in contexts: 

300 self.io.echo("{}: {}".format(i, field["value"].strip())) 

301 i += 1 

302 otp_device = int( 

303 self.io.prompt( 

304 "--> Enter where you would like your one-time passcode sent", 

305 type=int)) 

306 self.io.echo("") 

307 

308 form = self.last_response_parsed.select_one( 

309 MFA_DEVICE_SELECT_FORM_SELECTOR) 

310 data = self._build_from_form(form, 

311 additional_attrs={"otpDeviceContext": 

312 contexts[ 

313 otp_device - 1]["value"]}) 

314 

315 self._submit_form(form, data) 

316 

317 self._handle_errors() 

318 

319 def _mfa_submit(self) -> None: 

320 otp = self.io.prompt( 

321 "--> Enter the one-time passcode sent to your device") 

322 self.io.echo("") 

323 

324 form = self.last_response_parsed.select_one(MFA_FORM_SELECTOR) 

325 data = self._build_from_form(form, 

326 additional_attrs={"otpCode": otp, 

327 "rememberDevice": ""}) 

328 

329 self._submit_form(form, data) 

330 

331 self._handle_errors() 

332 

333 def _captcha_submit(self, form_selector, solution_attr_key, 

334 error_div_class) -> None: 

335 form = self.last_response_parsed.select_one(form_selector) 

336 

337 solution = self._solve_captcha( 

338 form.find_parent().select_one("img")["src"]) 

339 

340 data = self._build_from_form(form, 

341 additional_attrs={ 

342 solution_attr_key: solution}) 

343 

344 self._submit_form(form, data) 

345 

346 self._handle_errors(error_div_class, "class") 

347 

348 def _captcha_otp_submit(self) -> None: 

349 otp = self.io.prompt( 

350 "--> Enter the one-time passcode sent to your device") 

351 self.io.echo("") 

352 

353 form = self.last_response_parsed.select_one(CAPTCHA_OTP_FORM_SELECTOR) 

354 data = self._build_from_form(form, 

355 additional_attrs={"otpCode": otp}) 

356 

357 self._submit_form(form, data) 

358 

359 self._handle_errors() 

360 

361 def _build_from_form(self, 

362 form: Tag, 

363 additional_attrs: Optional[Dict[str, Any]] = None) -> Dict[str, Any]: 

364 data = {} 

365 for field in form.select("input"): 

366 try: 

367 data[field["name"]] = field["value"] 

368 except: 

369 pass 

370 if additional_attrs: 

371 data.update(additional_attrs) 

372 return data 

373 

374 def _get_form_action(self, 

375 form: Tag) -> str: 

376 action = form.get("action") 

377 if not action: 

378 return self.last_response.url 

379 elif not action.startswith("http"): 

380 if action.startswith("/"): 

381 parsed_url = urlparse(self.last_response.url) 

382 return "{}://{}{}".format(parsed_url.scheme, parsed_url.netloc, 

383 action) 

384 else: 

385 return "{}/{}".format( 

386 "/".join(self.last_response.url.split("/")[:-1]), action) 

387 else: 

388 return action 

389 

390 def _get_page_from_url(self, 

391 url: str) -> str: 

392 page_name = os.path.basename(urlparse(url).path).strip(".html") 

393 i = 0 

394 while os.path.isfile("{}_{}".format(page_name, 0)): 

395 i += 1 

396 return "{}_{}.html".format(page_name, i) 

397 

398 def _handle_errors(self, 

399 error_div: str = "auth-error-message-box", 

400 attr_name: str = "id", 

401 critical: bool = False) -> None: 

402 error_div = self.last_response_parsed.select_one( 

403 "div[{}='{}']".format(attr_name, error_div)) 

404 if error_div: 

405 error_msg = "An error occurred: {}\n".format(error_div.text.strip()) 

406 

407 if critical: 

408 raise AmazonOrdersAuthError(error_msg) 

409 else: 

410 self.io.echo(error_msg, fg="red") 

411 

412 def _solve_captcha(self, 

413 url: str) -> str: 

414 captcha_response = AmazonCaptcha.fromlink(url).solve() 

415 if not captcha_response or captcha_response.lower() == "not solved": 

416 img_response = self.session.get(url) 

417 img = Image.open(BytesIO(img_response.content)) 

418 img.show() 

419 self.io.echo("Info: The Captcha couldn't be auto-solved.") 

420 captcha_response = self.io.prompt( 

421 "--> Enter the characters shown in the image") 

422 self.io.echo("") 

423 

424 return captcha_response 

425 

426 def _submit_form(self, form, data): 

427 method = form.get("method", "GET").upper() 

428 action = self._get_form_action(form) 

429 request_data = {"params" if method == "GET" else "data": data} 

430 self.request(method, 

431 action, 

432 **request_data)