Coverage for amazonorders/session.py: 89.94%

179 statements  

« prev     ^ index     » next       coverage.py v7.4.0, created at 2024-01-18 14:34 +0000

1import json 

2import logging 

3import os 

4from io import BytesIO 

5from typing import Optional, Any, Dict 

6from urllib.parse import urlparse 

7 

8import requests 

9from PIL import Image 

10from amazoncaptcha import AmazonCaptcha 

11from bs4 import BeautifulSoup, Tag 

12from requests import Session, Response 

13from requests.utils import dict_from_cookiejar 

14 

15from amazonorders.exception import AmazonOrdersAuthError 

16 

17__author__ = "Alex Laird" 

18__copyright__ = "Copyright 2024, Alex Laird" 

19__version__ = "1.0.2" 

20 

21logger = logging.getLogger(__name__) 

22 

23BASE_URL = "https://www.amazon.com" 

24BASE_HEADERS = { 

25 "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7", 

26 "Accept-Encoding": "gzip, deflate, br", 

27 "Accept-Language": "en-US,en;q=0.9", 

28 "Cache-Control": "max-age=0", 

29 "Content-Type": "application/x-www-form-urlencoded", 

30 "Origin": BASE_URL, 

31 "Referer": "{}/ap/signin".format(BASE_URL), 

32 "Sec-Ch-Ua": '"Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"', 

33 "Sec-Ch-Ua-Mobile": "?0", 

34 "Sec-Ch-Ua-Platform": "macOS", 

35 "Sec-Ch-Viewport-Width": "1393", 

36 "Sec-Fetch-Dest": "document", 

37 "Sec-Fetch-Mode": "navigate", 

38 "Sec-Fetch-Site": "same-origin", 

39 "Sec-Fetch-User": "?1", 

40 "Viewport-Width": "1393", 

41 "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", 

42} 

43SIGN_IN_FORM_NAME = "signIn" 

44MFA_DEVICE_SELECT_FORM_ID = "auth-select-device-form" 

45MFA_FORM_ID = "auth-mfa-form" 

46CAPTCHA_1_DIV_ID = "cvf-page-content" 

47CAPTCHA_1_FORM_CLASS = "cvf-widget-form" 

48CAPTCHA_2_INPUT_ID = "captchacharacters" 

49 

50DEFAULT_COOKIE_JAR_PATH = os.path.join(os.path.expanduser("~"), ".config", "amazon-orders", "cookies.json") 

51 

52 

53class IODefault: 

54 """ 

55 Handles input/output from the application. By default, this uses console commands, but 

56 this class exists so that it can be overriden when constructing an :class:`AmazonSession` 

57 if input/output should be handled another way. 

58 """ 

59 

60 def echo(self, msg): 

61 """ 

62 Echo a message to the console. 

63 

64 :param msg: The data to send to output. 

65 """ 

66 print(msg) 

67 

68 def prompt(self, msg, type=None): 

69 """ 

70 Prompt to the console for user input. 

71 

72 :param msg: The data to use as the input prompt. 

73 :param type: Unused by the default implementation. 

74 :return: The user input result. 

75 """ 

76 return input("{}: ".format(msg)) 

77 

78 

79class AmazonSession: 

80 """ 

81 

82 """ 

83 

84 def __init__(self, 

85 username: str, 

86 password: str, 

87 debug: bool = False, 

88 max_auth_attempts: int = 10, 

89 cookie_jar_path: str = None, 

90 io: IODefault = IODefault()) -> None: 

91 if not cookie_jar_path: 

92 cookie_jar_path = DEFAULT_COOKIE_JAR_PATH 

93 

94 #: An Amazon username. 

95 self.username: str = username 

96 #: An Amazon password. 

97 self.password: str = password 

98 

99 #: Set logger ``DEBUG``, send output to ``stderr``, and write an HTML file for each request made on the session. 

100 self.debug: bool = debug 

101 if self.debug: 

102 logger.setLevel(logging.DEBUG) 

103 #: Will continue in :func:`login()`'s auth flow this many times. 

104 self.max_auth_attempts: int = max_auth_attempts 

105 #: The path to persist session cookies, defaults to ``conf.DEFAULT_COOKIE_JAR_PATH``. 

106 self.cookie_jar_path: str = cookie_jar_path 

107 #: The I/O handler for echoes and prompts. 

108 self.io: IODefault = io 

109 

110 #: The shared session to be used across all requests. 

111 self.session: Session = Session() 

112 #: The last response executed on the Session. 

113 self.last_response: Optional[Response] = None 

114 #: A parsed representation of the last response executed on the Session. 

115 self.last_response_parsed: Optional[Tag] = None 

116 #: If :func:`login()` has been executed and successfully logged in the session. 

117 self.is_authenticated: bool = False 

118 

119 cookie_dir = os.path.dirname(self.cookie_jar_path) 

120 if not os.path.exists(cookie_dir): 

121 os.makedirs(cookie_dir) 

122 if os.path.exists(self.cookie_jar_path): 

123 with open(cookie_jar_path, "r", encoding="utf-8") as f: 

124 data = json.loads(f.read()) 

125 cookies = requests.utils.cookiejar_from_dict(data) 

126 self.session.cookies.update(cookies) 

127 

128 def request(self, 

129 method: str, 

130 url: str, 

131 **kwargs: Any) -> Response: 

132 """ 

133 Execute the request against Amazon with base headers, parsing and storing the response 

134 and persisting response cookies. 

135 

136 :param method: The request method to execute. 

137 :param url: The URL to execute ``method`` on. 

138 :param kwargs: Remaining ``kwargs`` will be passed to :func:`requests.request`. 

139 :return: The Response from the executed request. 

140 """ 

141 if "headers" not in kwargs: 

142 kwargs["headers"] = {} 

143 kwargs["headers"].update(BASE_HEADERS) 

144 

145 logger.debug("{} request to {}".format(method, url)) 

146 

147 self.last_response = self.session.request(method, url, **kwargs) 

148 self.last_response_parsed = BeautifulSoup(self.last_response.text, 

149 "html.parser") 

150 

151 cookies = dict_from_cookiejar(self.session.cookies) 

152 if os.path.exists(self.cookie_jar_path): 

153 os.remove(self.cookie_jar_path) 

154 with open(self.cookie_jar_path, "w", encoding="utf-8") as f: 

155 f.write(json.dumps(cookies)) 

156 

157 logger.debug("Response: {} - {}".format(self.last_response.url, 

158 self.last_response.status_code)) 

159 

160 if self.debug: 

161 page_name = self._get_page_from_url(self.last_response.url) 

162 with open(page_name, "w", encoding="utf-8") as html_file: 

163 logger.debug( 

164 "Response written to file: {}".format(html_file.name)) 

165 html_file.write(self.last_response.text) 

166 

167 return self.last_response 

168 

169 def get(self, 

170 url: str, 

171 **kwargs: Any): 

172 """ 

173 Perform a GET request. 

174 

175 :param url: The URL to GET on. 

176 :param kwargs: Remaining ``kwargs`` will be passed to :func:`AmazonSession.request`. 

177 :return: The Response from the executed GET request. 

178 """ 

179 return self.request("GET", url, **kwargs) 

180 

181 def post(self, 

182 url, 

183 **kwargs: Any) -> Response: 

184 """ 

185 Perform a POST request. 

186 

187 :param url: The URL to POST on. 

188 :param kwargs: Remaining ``kwargs`` will be passed to :func:`AmazonSession.request`. 

189 :return: The Response from the executed POST request. 

190 """ 

191 return self.request("POST", url, **kwargs) 

192 

193 def auth_cookies_stored(self): 

194 cookies = dict_from_cookiejar(self.session.cookies) 

195 return cookies.get("session-token") and cookies.get("x-main") 

196 

197 def login(self) -> None: 

198 """ 

199 Execute an Amazon login process. This will include the sign-in page, and may also include Captcha challenges 

200 and OTP pages (of 2FA authentication is enabled on your account). 

201 

202 If successful, ``is_authenticated`` will be set to ``True``. 

203 

204 Session cookies are persisted, and if existing session data is found during this auth flow, it will be 

205 skipped entirely and flagged as authenticated. 

206 """ 

207 self.get("{}/gp/sign-in.html".format(BASE_URL)) 

208 

209 attempts = 0 

210 while not self.is_authenticated and attempts < self.max_auth_attempts: 

211 if self.auth_cookies_stored() or \ 

212 ("Hello, sign in" not in self.last_response.text and 

213 "nav-item-signout" in self.last_response.text): 

214 self.is_authenticated = True 

215 break 

216 

217 if self._is_field_found(SIGN_IN_FORM_NAME): 

218 self._sign_in() 

219 elif self._is_field_found(CAPTCHA_1_FORM_CLASS, field_key="class"): 

220 self._captcha_1_submit() 

221 elif self.last_response_parsed.find("input", 

222 id=lambda 

223 value: value and value.startswith( 

224 CAPTCHA_2_INPUT_ID)): 

225 self._captcha_2_submit() 

226 elif self._is_field_found(MFA_DEVICE_SELECT_FORM_ID, 

227 field_key="id"): 

228 self._mfa_device_select() 

229 elif self._is_field_found(MFA_FORM_ID, field_key="id"): 

230 self._mfa_submit() 

231 else: 

232 raise AmazonOrdersAuthError( 

233 "An error occurred, this is an unknown page: {}. To capture the page to a file, set the `debug` flag.".format( 

234 self.last_response.url)) 

235 

236 attempts += 1 

237 

238 if attempts == self.max_auth_attempts: 

239 raise AmazonOrdersAuthError( 

240 "Max authentication flow attempts reached.") 

241 

242 def logout(self) -> None: 

243 """ 

244 Logout and close the existing Amazon session and clear cookies. 

245 """ 

246 self.get("{}/gp/sign-out.html".format(BASE_URL)) 

247 

248 if os.path.exists(self.cookie_jar_path): 

249 os.remove(self.cookie_jar_path) 

250 

251 self.session.close() 

252 self.session = Session() 

253 

254 self.is_authenticated = False 

255 

256 def _sign_in(self) -> None: 

257 form = self.last_response_parsed.find("form", 

258 {"name": SIGN_IN_FORM_NAME}) 

259 data = self._build_from_form(form, 

260 additional_attrs={"email": self.username, 

261 "password": self.password, 

262 "rememberMe": "true"}) 

263 

264 self.request(form.attrs.get("method", "GET"), 

265 self._get_form_action(form), 

266 data=data) 

267 

268 self._handle_errors(critical=True) 

269 

270 def _mfa_device_select(self) -> None: 

271 form = self.last_response_parsed.find("form", 

272 {"id": MFA_DEVICE_SELECT_FORM_ID}) 

273 contexts = form.find_all("input", {"name": "otpDeviceContext"}) 

274 

275 i = 1 

276 for field in contexts: 

277 self.io.echo("{}: {}".format(i, field.attrs["value"].strip())) 

278 i += 1 

279 otp_device = int( 

280 self.io.prompt("Enter where you would like your one-time passcode sent", type=int)) 

281 

282 form = self.last_response_parsed.find("form", 

283 id=MFA_DEVICE_SELECT_FORM_ID) 

284 data = self._build_from_form(form, 

285 additional_attrs={"otpDeviceContext": 

286 contexts[ 

287 otp_device - 1].attrs[ 

288 "value"]}) 

289 

290 self.request(form.attrs.get("method", "GET"), 

291 self._get_form_action(form), 

292 data=data) 

293 

294 self._handle_errors() 

295 

296 def _mfa_submit(self) -> None: 

297 otp = self.io.prompt("Enter the one-time passcode sent to your device") 

298 

299 form = self.last_response_parsed.find("form", id=MFA_FORM_ID) 

300 data = self._build_from_form(form, 

301 additional_attrs={"otpCode": otp, 

302 "rememberDevice": ""}) 

303 

304 self.request(form.attrs.get("method", "GET"), 

305 self._get_form_action(form), 

306 data=data) 

307 

308 self._handle_errors() 

309 

310 def _captcha_1_submit(self) -> None: 

311 captcha_div = self.last_response_parsed.find("div", 

312 {"id": CAPTCHA_1_DIV_ID}) 

313 

314 solution = self._solve_captcha( 

315 captcha_div.find("img", {"alt": "captcha"}).attrs["src"]) 

316 

317 form = self.last_response_parsed.find("form", 

318 {"class": CAPTCHA_1_FORM_CLASS}) 

319 data = self._build_from_form(form, 

320 additional_attrs={ 

321 "cvf_captcha_input": solution}) 

322 

323 self.request(form.attrs.get("method", "GET"), 

324 self._get_form_action(form, 

325 prefix="{}/ap/cvf/".format( 

326 BASE_URL)), 

327 data=data) 

328 

329 self._handle_errors("cvf-widget-alert", "class") 

330 

331 def _captcha_2_submit(self) -> None: 

332 form = self.last_response_parsed.find("input", 

333 id=lambda 

334 value: value and value.startswith( 

335 CAPTCHA_2_INPUT_ID)).find_parent( 

336 "form") 

337 

338 solution = self._solve_captcha(form.find("img").attrs["src"]) 

339 

340 data = self._build_from_form(form, 

341 additional_attrs={ 

342 "field-keywords": solution}) 

343 

344 self.request(form.attrs.get("method", "GET"), 

345 self._get_form_action(form, 

346 prefix=BASE_URL), 

347 params=data) 

348 

349 self._handle_errors("a-alert-info", "class") 

350 

351 def _build_from_form(self, 

352 form: Tag, 

353 additional_attrs: Optional[Dict[str, Any]] = None) -> Dict[str, Any]: 

354 data = {} 

355 for field in form.find_all("input"): 

356 try: 

357 data[field["name"]] = field["value"] 

358 except: 

359 pass 

360 if additional_attrs: 

361 data.update(additional_attrs) 

362 return data 

363 

364 def _get_form_action(self, 

365 form: Tag, 

366 prefix: Optional[str] = None) -> str: 

367 action = form.attrs.get("action") 

368 if not action: 

369 action = self.last_response.url 

370 if prefix and not action.startswith("http"): 

371 action = prefix + action 

372 return action 

373 

374 def _is_field_found(self, 

375 field_value: str, 

376 field_type: str = "form", 

377 field_key: str = "name") -> bool: 

378 return self.last_response_parsed.find(field_type, { 

379 field_key: field_value}) is not None 

380 

381 def _get_page_from_url(self, 

382 url: str) -> str: 

383 page_name = os.path.basename(urlparse(url).path).strip(".html") 

384 i = 0 

385 while os.path.isfile("{}_{}".format(page_name, 0)): 

386 i += 1 

387 return "{}_{}.html".format(page_name, i) 

388 

389 def _handle_errors(self, 

390 error_div: str = "auth-error-message-box", 

391 attr_name: str = "id", 

392 critical: bool = False) -> None: 

393 error_div = self.last_response_parsed.find("div", 

394 {attr_name: error_div}) 

395 if error_div: 

396 error_msg = "An error occurred: {}".format(error_div.text.strip()) 

397 

398 if critical: 

399 raise AmazonOrdersAuthError(error_msg) 

400 else: 

401 self.io.echo(error_msg) 

402 

403 def _solve_captcha(self, 

404 url: str) -> str: 

405 captcha_response = AmazonCaptcha.fromlink(url).solve() 

406 if not captcha_response or captcha_response.lower() == "not solved": 

407 img_response = self.session.get(url) 

408 img = Image.open(BytesIO(img_response.content)) 

409 img.show() 

410 self.io.echo("The Captcha couldn't be auto-solved.") 

411 captcha_response = self.io.prompt("Enter the characters shown in the image") 

412 

413 return captcha_response