Coverage for amazonorders/session.py: 90.77%

195 statements  

« prev     ^ index     » next       coverage.py v7.4.0, created at 2024-01-24 18:41 +0000

1import json 

2import logging 

3import os 

4from io import BytesIO 

5from typing import Optional, Any, Dict 

6from urllib.parse import urlparse 

7 

8import requests 

9from PIL import Image 

10from amazoncaptcha import AmazonCaptcha 

11from bs4 import BeautifulSoup, Tag 

12from requests import Session, Response 

13from requests.utils import dict_from_cookiejar 

14 

15from amazonorders.conf import DEFAULT_COOKIE_JAR_PATH, DEFAULT_OUTPUT_DIR 

16from amazonorders.exception import AmazonOrdersAuthError 

17 

18__author__ = "Alex Laird" 

19__copyright__ = "Copyright 2024, Alex Laird" 

20__version__ = "1.0.4" 

21 

22logger = logging.getLogger(__name__) 

23 

24BASE_URL = "https://www.amazon.com" 

25BASE_HEADERS = { 

26 "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7", 

27 "Accept-Encoding": "gzip, deflate, br", 

28 "Accept-Language": "en-US,en;q=0.9", 

29 "Cache-Control": "max-age=0", 

30 "Content-Type": "application/x-www-form-urlencoded", 

31 "Origin": BASE_URL, 

32 "Referer": "{}/ap/signin".format(BASE_URL), 

33 "Sec-Ch-Ua": '"Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"', 

34 "Sec-Ch-Ua-Mobile": "?0", 

35 "Sec-Ch-Ua-Platform": "macOS", 

36 "Sec-Ch-Viewport-Width": "1393", 

37 "Sec-Fetch-Dest": "document", 

38 "Sec-Fetch-Mode": "navigate", 

39 "Sec-Fetch-Site": "same-origin", 

40 "Sec-Fetch-User": "?1", 

41 "Viewport-Width": "1393", 

42 "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", 

43} 

44SIGN_IN_FORM_NAME = "signIn" 

45MFA_DEVICE_SELECT_FORM_ID = "auth-select-device-form" 

46MFA_FORM_ID = "auth-mfa-form" 

47CAPTCHA_1_DIV_ID = "cvf-page-content" 

48CAPTCHA_1_FORM_CLASS = "cvf-widget-form-captcha" 

49CAPTCHA_2_INPUT_ID = "captchacharacters" 

50CAPTCHA_OTP_FORM_ID = "verification-code-form" 

51 

52 

53class IODefault: 

54 """ 

55 Handles input/output from the application. By default, this uses console commands, but 

56 this class exists so that it can be overriden when constructing an :class:`AmazonSession` 

57 if input/output should be handled another way. 

58 """ 

59 

60 def echo(self, 

61 msg, 

62 **kwargs): 

63 """ 

64 Echo a message to the console. 

65 

66 :param msg: The data to send to output. 

67 :param kwargs: Unused by the default implementation. 

68 """ 

69 print(msg) 

70 

71 def prompt(self, 

72 msg, 

73 type=None, 

74 **kwargs): 

75 """ 

76 Prompt to the console for user input. 

77 

78 :param msg: The data to use as the input prompt. 

79 :param type: Unused by the default implementation. 

80 :param kwargs: Unused by the default implementation. 

81 :return: The user input result. 

82 """ 

83 return input("{}: ".format(msg)) 

84 

85 

86class AmazonSession: 

87 """ 

88 An interface for interacting with Amazon and authenticating an underlying :class:`requests.Session`. Utilizing 

89 this class means session data is maintained between requests. Session data is also persisted after each request, 

90 meaning it will also be maintained between separate instantiations of the class or application. 

91 

92 To get started, call the :func:`login` function. 

93 """ 

94 

95 def __init__(self, 

96 username: str, 

97 password: str, 

98 debug: bool = False, 

99 max_auth_attempts: int = 10, 

100 cookie_jar_path: str = None, 

101 io: IODefault = IODefault(), 

102 output_dir: str = None) -> None: 

103 if not cookie_jar_path: 

104 cookie_jar_path = DEFAULT_COOKIE_JAR_PATH 

105 if not output_dir: 

106 output_dir = DEFAULT_OUTPUT_DIR 

107 

108 #: An Amazon username. 

109 self.username: str = username 

110 #: An Amazon password. 

111 self.password: str = password 

112 

113 #: Set logger ``DEBUG``, send output to ``stderr``, and write an HTML file for each request made on the session. 

114 self.debug: bool = debug 

115 if self.debug: 

116 logger.setLevel(logging.DEBUG) 

117 #: Will continue in :func:`login()`'s auth flow this many times (successes and failures). 

118 self.max_auth_attempts: int = max_auth_attempts 

119 #: The path to persist session cookies, defaults to ``conf.DEFAULT_COOKIE_JAR_PATH``. 

120 self.cookie_jar_path: str = cookie_jar_path 

121 #: The I/O handler for echoes and prompts. 

122 self.io: IODefault = io 

123 #: The directory where any output files will be produced, defaults to ``conf.DEFAULT_OUTPUT_DIR``. 

124 self.output_dir = output_dir 

125 

126 #: The shared session to be used across all requests. 

127 self.session: Session = Session() 

128 #: The last response executed on the Session. 

129 self.last_response: Optional[Response] = None 

130 #: A parsed representation of the last response executed on the Session. 

131 self.last_response_parsed: Optional[Tag] = None 

132 #: If :func:`login()` has been executed and successfully logged in the session. 

133 self.is_authenticated: bool = False 

134 

135 cookie_dir = os.path.dirname(self.cookie_jar_path) 

136 if not os.path.exists(cookie_dir): 

137 os.makedirs(cookie_dir) 

138 if os.path.exists(self.cookie_jar_path): 

139 with open(self.cookie_jar_path, "r", encoding="utf-8") as f: 

140 data = json.loads(f.read()) 

141 cookies = requests.utils.cookiejar_from_dict(data) 

142 self.session.cookies.update(cookies) 

143 

144 def request(self, 

145 method: str, 

146 url: str, 

147 **kwargs: Any) -> Response: 

148 """ 

149 Execute the request against Amazon with base headers, parsing and storing the response 

150 and persisting response cookies. 

151 

152 :param method: The request method to execute. 

153 :param url: The URL to execute ``method`` on. 

154 :param kwargs: Remaining ``kwargs`` will be passed to :func:`requests.request`. 

155 :return: The Response from the executed request. 

156 """ 

157 if "headers" not in kwargs: 

158 kwargs["headers"] = {} 

159 kwargs["headers"].update(BASE_HEADERS) 

160 

161 logger.debug("{} request to {}".format(method, url)) 

162 

163 self.last_response = self.session.request(method, url, **kwargs) 

164 self.last_response_parsed = BeautifulSoup(self.last_response.text, 

165 "html.parser") 

166 

167 cookies = dict_from_cookiejar(self.session.cookies) 

168 if os.path.exists(self.cookie_jar_path): 

169 os.remove(self.cookie_jar_path) 

170 with open(self.cookie_jar_path, "w", encoding="utf-8") as f: 

171 f.write(json.dumps(cookies)) 

172 

173 logger.debug("Response: {} - {}".format(self.last_response.url, 

174 self.last_response.status_code)) 

175 

176 if self.debug: 

177 page_name = self._get_page_from_url(self.last_response.url) 

178 with open(os.path.join(self.output_dir, page_name), "w", encoding="utf-8") as html_file: 

179 logger.debug( 

180 "Response written to file: {}".format(html_file.name)) 

181 html_file.write(self.last_response.text) 

182 

183 return self.last_response 

184 

185 def get(self, 

186 url: str, 

187 **kwargs: Any): 

188 """ 

189 Perform a GET request. 

190 

191 :param url: The URL to GET on. 

192 :param kwargs: Remaining ``kwargs`` will be passed to :func:`AmazonSession.request`. 

193 :return: The Response from the executed GET request. 

194 """ 

195 return self.request("GET", url, **kwargs) 

196 

197 def post(self, 

198 url, 

199 **kwargs: Any) -> Response: 

200 """ 

201 Perform a POST request. 

202 

203 :param url: The URL to POST on. 

204 :param kwargs: Remaining ``kwargs`` will be passed to :func:`AmazonSession.request`. 

205 :return: The Response from the executed POST request. 

206 """ 

207 return self.request("POST", url, **kwargs) 

208 

209 def auth_cookies_stored(self): 

210 cookies = dict_from_cookiejar(self.session.cookies) 

211 return cookies.get("session-token") and cookies.get("x-main") 

212 

213 def login(self) -> None: 

214 """ 

215 Execute an Amazon login process. This will include the sign-in page, and may also include Captcha challenges 

216 and OTP pages (of 2FA authentication is enabled on your account). 

217 

218 If successful, ``is_authenticated`` will be set to ``True``. 

219 

220 Session cookies are persisted, and if existing session data is found during this auth flow, it will be 

221 skipped entirely and flagged as authenticated. 

222 """ 

223 self.get("{}/gp/sign-in.html".format(BASE_URL)) 

224 

225 attempts = 0 

226 while not self.is_authenticated and attempts < self.max_auth_attempts: 

227 if self.auth_cookies_stored() or \ 

228 ("Hello, sign in" not in self.last_response.text and 

229 "nav-item-signout" in self.last_response.text): 

230 self.is_authenticated = True 

231 break 

232 

233 if self._is_field_found(SIGN_IN_FORM_NAME): 

234 self._sign_in() 

235 elif self._is_field_found(CAPTCHA_1_FORM_CLASS, field_key="class"): 

236 self._captcha_1_submit() 

237 elif self.last_response_parsed.find("input", 

238 id=lambda 

239 value: value and value.startswith( 

240 CAPTCHA_2_INPUT_ID)): 

241 self._captcha_2_submit() 

242 elif self._is_field_found(MFA_DEVICE_SELECT_FORM_ID, 

243 field_key="id"): 

244 self._mfa_device_select() 

245 elif self._is_field_found(MFA_FORM_ID, field_key="id"): 

246 self._mfa_submit() 

247 elif self._is_field_found(CAPTCHA_OTP_FORM_ID, field_key="id"): 

248 self._captcha_otp_submit() 

249 else: 

250 raise AmazonOrdersAuthError( 

251 "An error occurred, this is an unknown page, or its parsed contents don't match a known auth flow: {}. To capture the page to a file, set the `debug` flag.".format( 

252 self.last_response.url)) 

253 

254 attempts += 1 

255 

256 if attempts == self.max_auth_attempts: 

257 raise AmazonOrdersAuthError( 

258 "Max authentication flow attempts reached.") 

259 

260 def logout(self) -> None: 

261 """ 

262 Logout and close the existing Amazon session and clear cookies. 

263 """ 

264 self.get("{}/gp/sign-out.html".format(BASE_URL)) 

265 

266 if os.path.exists(self.cookie_jar_path): 

267 os.remove(self.cookie_jar_path) 

268 

269 self.session.close() 

270 self.session = Session() 

271 

272 self.is_authenticated = False 

273 

274 def _sign_in(self) -> None: 

275 form = self.last_response_parsed.find("form", 

276 {"name": SIGN_IN_FORM_NAME}) 

277 data = self._build_from_form(form, 

278 additional_attrs={"email": self.username, 

279 "password": self.password, 

280 "rememberMe": "true"}) 

281 

282 self.request(form.attrs.get("method", "GET"), 

283 self._get_form_action(form), 

284 data=data) 

285 

286 self._handle_errors(critical=True) 

287 

288 def _mfa_device_select(self) -> None: 

289 form = self.last_response_parsed.find("form", 

290 {"id": MFA_DEVICE_SELECT_FORM_ID}) 

291 contexts = form.find_all("input", {"name": "otpDeviceContext"}) 

292 

293 i = 1 

294 for field in contexts: 

295 self.io.echo("{}: {}".format(i, field.attrs["value"].strip())) 

296 i += 1 

297 otp_device = int( 

298 self.io.prompt("--> Enter where you would like your one-time passcode sent", type=int)) 

299 self.io.echo("") 

300 

301 form = self.last_response_parsed.find("form", 

302 id=MFA_DEVICE_SELECT_FORM_ID) 

303 data = self._build_from_form(form, 

304 additional_attrs={"otpDeviceContext": 

305 contexts[ 

306 otp_device - 1].attrs[ 

307 "value"]}) 

308 

309 self.request(form.attrs.get("method", "GET"), 

310 self._get_form_action(form), 

311 data=data) 

312 

313 self._handle_errors() 

314 

315 def _mfa_submit(self) -> None: 

316 otp = self.io.prompt("--> Enter the one-time passcode sent to your device") 

317 self.io.echo("") 

318 

319 form = self.last_response_parsed.find("form", id=MFA_FORM_ID) 

320 data = self._build_from_form(form, 

321 additional_attrs={"otpCode": otp, 

322 "rememberDevice": ""}) 

323 

324 self.request(form.attrs.get("method", "GET"), 

325 self._get_form_action(form), 

326 data=data) 

327 

328 self._handle_errors() 

329 

330 def _captcha_1_submit(self) -> None: 

331 captcha_div = self.last_response_parsed.find("div", 

332 {"id": CAPTCHA_1_DIV_ID}) 

333 

334 solution = self._solve_captcha( 

335 captcha_div.find("img", {"alt": "captcha"}).attrs["src"]) 

336 

337 form = self.last_response_parsed.find("form", 

338 {"class": CAPTCHA_1_FORM_CLASS}) 

339 data = self._build_from_form(form, 

340 additional_attrs={ 

341 "cvf_captcha_input": solution}) 

342 

343 self.request(form.attrs.get("method", "GET"), 

344 self._get_form_action(form, 

345 prefix="{}/ap/cvf/".format( 

346 BASE_URL)), 

347 data=data) 

348 

349 self._handle_errors("cvf-widget-alert", "class") 

350 

351 def _captcha_2_submit(self) -> None: 

352 form = self.last_response_parsed.find("input", 

353 id=lambda 

354 value: value and value.startswith( 

355 CAPTCHA_2_INPUT_ID)).find_parent( 

356 "form") 

357 

358 solution = self._solve_captcha(form.find("img").attrs["src"]) 

359 

360 data = self._build_from_form(form, 

361 additional_attrs={ 

362 "field-keywords": solution}) 

363 

364 self.request(form.attrs.get("method", "GET"), 

365 self._get_form_action(form, 

366 prefix=BASE_URL), 

367 params=data) 

368 

369 self._handle_errors("a-alert-info", "class") 

370 

371 def _captcha_otp_submit(self) -> None: 

372 otp = self.io.prompt("--> Enter the one-time passcode sent to your device") 

373 self.io.echo("") 

374 

375 form = self.last_response_parsed.find("form", id=CAPTCHA_OTP_FORM_ID) 

376 data = self._build_from_form(form, 

377 additional_attrs={"otpCode": otp}) 

378 

379 self.request(form.attrs.get("method", "GET"), 

380 self._get_form_action(form, 

381 prefix=BASE_URL), 

382 data=data) 

383 

384 self._handle_errors() 

385 

386 def _build_from_form(self, 

387 form: Tag, 

388 additional_attrs: Optional[Dict[str, Any]] = None) -> Dict[str, Any]: 

389 data = {} 

390 for field in form.find_all("input"): 

391 try: 

392 data[field["name"]] = field["value"] 

393 except: 

394 pass 

395 if additional_attrs: 

396 data.update(additional_attrs) 

397 return data 

398 

399 def _get_form_action(self, 

400 form: Tag, 

401 prefix: Optional[str] = None) -> str: 

402 action = form.attrs.get("action") 

403 if not action: 

404 action = self.last_response.url 

405 # TODO: we should be able to clean this up, and even get it from the current URL (same as a browser does) 

406 if prefix and not action.startswith("http"): 

407 action = prefix + action 

408 return action 

409 

410 def _is_field_found(self, 

411 field_value: str, 

412 field_type: str = "form", 

413 field_key: str = "name") -> bool: 

414 return self.last_response_parsed.find(field_type, { 

415 field_key: field_value}) is not None 

416 

417 def _get_page_from_url(self, 

418 url: str) -> str: 

419 page_name = os.path.basename(urlparse(url).path).strip(".html") 

420 i = 0 

421 while os.path.isfile("{}_{}".format(page_name, 0)): 

422 i += 1 

423 return "{}_{}.html".format(page_name, i) 

424 

425 def _handle_errors(self, 

426 error_div: str = "auth-error-message-box", 

427 attr_name: str = "id", 

428 critical: bool = False) -> None: 

429 error_div = self.last_response_parsed.find("div", 

430 {attr_name: error_div}) 

431 if error_div: 

432 error_msg = "An error occurred: {}\n".format(error_div.text.strip()) 

433 

434 if critical: 

435 raise AmazonOrdersAuthError(error_msg) 

436 else: 

437 self.io.echo(error_msg, fg="red") 

438 

439 def _solve_captcha(self, 

440 url: str) -> str: 

441 captcha_response = AmazonCaptcha.fromlink(url).solve() 

442 if not captcha_response or captcha_response.lower() == "not solved": 

443 img_response = self.session.get(url) 

444 img = Image.open(BytesIO(img_response.content)) 

445 img.show() 

446 self.io.echo("Info: The Captcha couldn't be auto-solved.") 

447 captcha_response = self.io.prompt("--> Enter the characters shown in the image") 

448 self.io.echo("") 

449 

450 return captcha_response