Coverage for amazonorders/session.py: 90.11%

182 statements  

« prev     ^ index     » next       coverage.py v7.4.0, created at 2024-01-18 21:57 +0000

1import json 

2import logging 

3import os 

4from io import BytesIO 

5from typing import Optional, Any, Dict 

6from urllib.parse import urlparse 

7 

8import requests 

9from PIL import Image 

10from amazoncaptcha import AmazonCaptcha 

11from bs4 import BeautifulSoup, Tag 

12from requests import Session, Response 

13from requests.utils import dict_from_cookiejar 

14 

15from amazonorders.exception import AmazonOrdersAuthError 

16 

17__author__ = "Alex Laird" 

18__copyright__ = "Copyright 2024, Alex Laird" 

19__version__ = "1.0.3" 

20 

21logger = logging.getLogger(__name__) 

22 

23BASE_URL = "https://www.amazon.com" 

24BASE_HEADERS = { 

25 "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7", 

26 "Accept-Encoding": "gzip, deflate, br", 

27 "Accept-Language": "en-US,en;q=0.9", 

28 "Cache-Control": "max-age=0", 

29 "Content-Type": "application/x-www-form-urlencoded", 

30 "Origin": BASE_URL, 

31 "Referer": "{}/ap/signin".format(BASE_URL), 

32 "Sec-Ch-Ua": '"Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"', 

33 "Sec-Ch-Ua-Mobile": "?0", 

34 "Sec-Ch-Ua-Platform": "macOS", 

35 "Sec-Ch-Viewport-Width": "1393", 

36 "Sec-Fetch-Dest": "document", 

37 "Sec-Fetch-Mode": "navigate", 

38 "Sec-Fetch-Site": "same-origin", 

39 "Sec-Fetch-User": "?1", 

40 "Viewport-Width": "1393", 

41 "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", 

42} 

43SIGN_IN_FORM_NAME = "signIn" 

44MFA_DEVICE_SELECT_FORM_ID = "auth-select-device-form" 

45MFA_FORM_ID = "auth-mfa-form" 

46CAPTCHA_1_DIV_ID = "cvf-page-content" 

47CAPTCHA_1_FORM_CLASS = "cvf-widget-form" 

48CAPTCHA_2_INPUT_ID = "captchacharacters" 

49 

50DEFAULT_COOKIE_JAR_PATH = os.path.join(os.path.expanduser("~"), ".config", "amazon-orders", "cookies.json") 

51 

52 

53class IODefault: 

54 """ 

55 Handles input/output from the application. By default, this uses console commands, but 

56 this class exists so that it can be overriden when constructing an :class:`AmazonSession` 

57 if input/output should be handled another way. 

58 """ 

59 

60 def echo(self, 

61 msg, 

62 **kwargs): 

63 """ 

64 Echo a message to the console. 

65 

66 :param msg: The data to send to output. 

67 :param kwargs: Unused by the default implementation. 

68 """ 

69 print(msg) 

70 

71 def prompt(self, 

72 msg, 

73 type=None, 

74 **kwargs): 

75 """ 

76 Prompt to the console for user input. 

77 

78 :param msg: The data to use as the input prompt. 

79 :param type: Unused by the default implementation. 

80 :param kwargs: Unused by the default implementation. 

81 :return: The user input result. 

82 """ 

83 return input("{}: ".format(msg)) 

84 

85 

86class AmazonSession: 

87 """ 

88 An interface for interacting with Amazon and authenticating an underlying :class:`requests.Session`. Utilizing 

89 this class means session data is maintained between requests. Session data is also persisted after each request, 

90 meaning it will also be maintained between separate instantiations of the class or application. 

91 

92 To get started, call the :func:`login` function. 

93 """ 

94 

95 def __init__(self, 

96 username: str, 

97 password: str, 

98 debug: bool = False, 

99 max_auth_attempts: int = 10, 

100 cookie_jar_path: str = None, 

101 io: IODefault = IODefault()) -> None: 

102 if not cookie_jar_path: 

103 cookie_jar_path = DEFAULT_COOKIE_JAR_PATH 

104 

105 #: An Amazon username. 

106 self.username: str = username 

107 #: An Amazon password. 

108 self.password: str = password 

109 

110 #: Set logger ``DEBUG``, send output to ``stderr``, and write an HTML file for each request made on the session. 

111 self.debug: bool = debug 

112 if self.debug: 

113 logger.setLevel(logging.DEBUG) 

114 #: Will continue in :func:`login()`'s auth flow this many times. 

115 self.max_auth_attempts: int = max_auth_attempts 

116 #: The path to persist session cookies, defaults to ``conf.DEFAULT_COOKIE_JAR_PATH``. 

117 self.cookie_jar_path: str = cookie_jar_path 

118 #: The I/O handler for echoes and prompts. 

119 self.io: IODefault = io 

120 

121 #: The shared session to be used across all requests. 

122 self.session: Session = Session() 

123 #: The last response executed on the Session. 

124 self.last_response: Optional[Response] = None 

125 #: A parsed representation of the last response executed on the Session. 

126 self.last_response_parsed: Optional[Tag] = None 

127 #: If :func:`login()` has been executed and successfully logged in the session. 

128 self.is_authenticated: bool = False 

129 

130 cookie_dir = os.path.dirname(self.cookie_jar_path) 

131 if not os.path.exists(cookie_dir): 

132 os.makedirs(cookie_dir) 

133 if os.path.exists(self.cookie_jar_path): 

134 with open(cookie_jar_path, "r", encoding="utf-8") as f: 

135 data = json.loads(f.read()) 

136 cookies = requests.utils.cookiejar_from_dict(data) 

137 self.session.cookies.update(cookies) 

138 

139 def request(self, 

140 method: str, 

141 url: str, 

142 **kwargs: Any) -> Response: 

143 """ 

144 Execute the request against Amazon with base headers, parsing and storing the response 

145 and persisting response cookies. 

146 

147 :param method: The request method to execute. 

148 :param url: The URL to execute ``method`` on. 

149 :param kwargs: Remaining ``kwargs`` will be passed to :func:`requests.request`. 

150 :return: The Response from the executed request. 

151 """ 

152 if "headers" not in kwargs: 

153 kwargs["headers"] = {} 

154 kwargs["headers"].update(BASE_HEADERS) 

155 

156 logger.debug("{} request to {}".format(method, url)) 

157 

158 self.last_response = self.session.request(method, url, **kwargs) 

159 self.last_response_parsed = BeautifulSoup(self.last_response.text, 

160 "html.parser") 

161 

162 cookies = dict_from_cookiejar(self.session.cookies) 

163 if os.path.exists(self.cookie_jar_path): 

164 os.remove(self.cookie_jar_path) 

165 with open(self.cookie_jar_path, "w", encoding="utf-8") as f: 

166 f.write(json.dumps(cookies)) 

167 

168 logger.debug("Response: {} - {}".format(self.last_response.url, 

169 self.last_response.status_code)) 

170 

171 if self.debug: 

172 page_name = self._get_page_from_url(self.last_response.url) 

173 with open(page_name, "w", encoding="utf-8") as html_file: 

174 logger.debug( 

175 "Response written to file: {}".format(html_file.name)) 

176 html_file.write(self.last_response.text) 

177 

178 return self.last_response 

179 

180 def get(self, 

181 url: str, 

182 **kwargs: Any): 

183 """ 

184 Perform a GET request. 

185 

186 :param url: The URL to GET on. 

187 :param kwargs: Remaining ``kwargs`` will be passed to :func:`AmazonSession.request`. 

188 :return: The Response from the executed GET request. 

189 """ 

190 return self.request("GET", url, **kwargs) 

191 

192 def post(self, 

193 url, 

194 **kwargs: Any) -> Response: 

195 """ 

196 Perform a POST request. 

197 

198 :param url: The URL to POST on. 

199 :param kwargs: Remaining ``kwargs`` will be passed to :func:`AmazonSession.request`. 

200 :return: The Response from the executed POST request. 

201 """ 

202 return self.request("POST", url, **kwargs) 

203 

204 def auth_cookies_stored(self): 

205 cookies = dict_from_cookiejar(self.session.cookies) 

206 return cookies.get("session-token") and cookies.get("x-main") 

207 

208 def login(self) -> None: 

209 """ 

210 Execute an Amazon login process. This will include the sign-in page, and may also include Captcha challenges 

211 and OTP pages (of 2FA authentication is enabled on your account). 

212 

213 If successful, ``is_authenticated`` will be set to ``True``. 

214 

215 Session cookies are persisted, and if existing session data is found during this auth flow, it will be 

216 skipped entirely and flagged as authenticated. 

217 """ 

218 self.get("{}/gp/sign-in.html".format(BASE_URL)) 

219 

220 attempts = 0 

221 while not self.is_authenticated and attempts < self.max_auth_attempts: 

222 if self.auth_cookies_stored() or \ 

223 ("Hello, sign in" not in self.last_response.text and 

224 "nav-item-signout" in self.last_response.text): 

225 self.is_authenticated = True 

226 break 

227 

228 if self._is_field_found(SIGN_IN_FORM_NAME): 

229 self._sign_in() 

230 elif self._is_field_found(CAPTCHA_1_FORM_CLASS, field_key="class"): 

231 self._captcha_1_submit() 

232 elif self.last_response_parsed.find("input", 

233 id=lambda 

234 value: value and value.startswith( 

235 CAPTCHA_2_INPUT_ID)): 

236 self._captcha_2_submit() 

237 elif self._is_field_found(MFA_DEVICE_SELECT_FORM_ID, 

238 field_key="id"): 

239 self._mfa_device_select() 

240 elif self._is_field_found(MFA_FORM_ID, field_key="id"): 

241 self._mfa_submit() 

242 else: 

243 raise AmazonOrdersAuthError( 

244 "An error occurred, this is an unknown page: {}. To capture the page to a file, set the `debug` flag.".format( 

245 self.last_response.url)) 

246 

247 attempts += 1 

248 

249 if attempts == self.max_auth_attempts: 

250 raise AmazonOrdersAuthError( 

251 "Max authentication flow attempts reached.") 

252 

253 def logout(self) -> None: 

254 """ 

255 Logout and close the existing Amazon session and clear cookies. 

256 """ 

257 self.get("{}/gp/sign-out.html".format(BASE_URL)) 

258 

259 if os.path.exists(self.cookie_jar_path): 

260 os.remove(self.cookie_jar_path) 

261 

262 self.session.close() 

263 self.session = Session() 

264 

265 self.is_authenticated = False 

266 

267 def _sign_in(self) -> None: 

268 form = self.last_response_parsed.find("form", 

269 {"name": SIGN_IN_FORM_NAME}) 

270 data = self._build_from_form(form, 

271 additional_attrs={"email": self.username, 

272 "password": self.password, 

273 "rememberMe": "true"}) 

274 

275 self.request(form.attrs.get("method", "GET"), 

276 self._get_form_action(form), 

277 data=data) 

278 

279 self._handle_errors(critical=True) 

280 

281 def _mfa_device_select(self) -> None: 

282 form = self.last_response_parsed.find("form", 

283 {"id": MFA_DEVICE_SELECT_FORM_ID}) 

284 contexts = form.find_all("input", {"name": "otpDeviceContext"}) 

285 

286 i = 1 

287 for field in contexts: 

288 self.io.echo("{}: {}".format(i, field.attrs["value"].strip())) 

289 i += 1 

290 otp_device = int( 

291 self.io.prompt("--> Enter where you would like your one-time passcode sent", type=int)) 

292 self.io.echo("") 

293 

294 form = self.last_response_parsed.find("form", 

295 id=MFA_DEVICE_SELECT_FORM_ID) 

296 data = self._build_from_form(form, 

297 additional_attrs={"otpDeviceContext": 

298 contexts[ 

299 otp_device - 1].attrs[ 

300 "value"]}) 

301 

302 self.request(form.attrs.get("method", "GET"), 

303 self._get_form_action(form), 

304 data=data) 

305 

306 self._handle_errors() 

307 

308 def _mfa_submit(self) -> None: 

309 otp = self.io.prompt("--> Enter the one-time passcode sent to your device") 

310 self.io.echo("") 

311 

312 form = self.last_response_parsed.find("form", id=MFA_FORM_ID) 

313 data = self._build_from_form(form, 

314 additional_attrs={"otpCode": otp, 

315 "rememberDevice": ""}) 

316 

317 self.request(form.attrs.get("method", "GET"), 

318 self._get_form_action(form), 

319 data=data) 

320 

321 self._handle_errors() 

322 

323 def _captcha_1_submit(self) -> None: 

324 captcha_div = self.last_response_parsed.find("div", 

325 {"id": CAPTCHA_1_DIV_ID}) 

326 

327 solution = self._solve_captcha( 

328 captcha_div.find("img", {"alt": "captcha"}).attrs["src"]) 

329 

330 form = self.last_response_parsed.find("form", 

331 {"class": CAPTCHA_1_FORM_CLASS}) 

332 data = self._build_from_form(form, 

333 additional_attrs={ 

334 "cvf_captcha_input": solution}) 

335 

336 self.request(form.attrs.get("method", "GET"), 

337 self._get_form_action(form, 

338 prefix="{}/ap/cvf/".format( 

339 BASE_URL)), 

340 data=data) 

341 

342 self._handle_errors("cvf-widget-alert", "class") 

343 

344 def _captcha_2_submit(self) -> None: 

345 form = self.last_response_parsed.find("input", 

346 id=lambda 

347 value: value and value.startswith( 

348 CAPTCHA_2_INPUT_ID)).find_parent( 

349 "form") 

350 

351 solution = self._solve_captcha(form.find("img").attrs["src"]) 

352 

353 data = self._build_from_form(form, 

354 additional_attrs={ 

355 "field-keywords": solution}) 

356 

357 self.request(form.attrs.get("method", "GET"), 

358 self._get_form_action(form, 

359 prefix=BASE_URL), 

360 params=data) 

361 

362 self._handle_errors("a-alert-info", "class") 

363 

364 def _build_from_form(self, 

365 form: Tag, 

366 additional_attrs: Optional[Dict[str, Any]] = None) -> Dict[str, Any]: 

367 data = {} 

368 for field in form.find_all("input"): 

369 try: 

370 data[field["name"]] = field["value"] 

371 except: 

372 pass 

373 if additional_attrs: 

374 data.update(additional_attrs) 

375 return data 

376 

377 def _get_form_action(self, 

378 form: Tag, 

379 prefix: Optional[str] = None) -> str: 

380 action = form.attrs.get("action") 

381 if not action: 

382 action = self.last_response.url 

383 if prefix and not action.startswith("http"): 

384 action = prefix + action 

385 return action 

386 

387 def _is_field_found(self, 

388 field_value: str, 

389 field_type: str = "form", 

390 field_key: str = "name") -> bool: 

391 return self.last_response_parsed.find(field_type, { 

392 field_key: field_value}) is not None 

393 

394 def _get_page_from_url(self, 

395 url: str) -> str: 

396 page_name = os.path.basename(urlparse(url).path).strip(".html") 

397 i = 0 

398 while os.path.isfile("{}_{}".format(page_name, 0)): 

399 i += 1 

400 return "{}_{}.html".format(page_name, i) 

401 

402 def _handle_errors(self, 

403 error_div: str = "auth-error-message-box", 

404 attr_name: str = "id", 

405 critical: bool = False) -> None: 

406 error_div = self.last_response_parsed.find("div", 

407 {attr_name: error_div}) 

408 if error_div: 

409 error_msg = "An error occurred: {}\n".format(error_div.text.strip()) 

410 

411 if critical: 

412 raise AmazonOrdersAuthError(error_msg) 

413 else: 

414 self.io.echo(error_msg, fg="red") 

415 

416 def _solve_captcha(self, 

417 url: str) -> str: 

418 captcha_response = AmazonCaptcha.fromlink(url).solve() 

419 if not captcha_response or captcha_response.lower() == "not solved": 

420 img_response = self.session.get(url) 

421 img = Image.open(BytesIO(img_response.content)) 

422 img.show() 

423 self.io.echo("Info: The Captcha couldn't be auto-solved.") 

424 captcha_response = self.io.prompt("--> Enter the characters shown in the image") 

425 self.io.echo("") 

426 

427 return captcha_response