Coverage for amazonorders/session.py: 93.98%

166 statements  

« prev     ^ index     » next       coverage.py v7.4.0, created at 2024-01-17 01:52 +0000

1import json 

2import logging 

3import os 

4from io import BytesIO 

5from typing import Optional, Any, Dict 

6from urllib.parse import urlparse 

7 

8import requests 

9from PIL import Image 

10from amazoncaptcha import AmazonCaptcha 

11from bs4 import BeautifulSoup, Tag 

12from requests import Session, Response 

13 

14from amazonorders.exception import AmazonOrdersAuthError 

15 

16__author__ = "Alex Laird" 

17__copyright__ = "Copyright 2024, Alex Laird" 

18__version__ = "1.0.0" 

19 

20logger = logging.getLogger(__name__) 

21 

22BASE_URL = "https://www.amazon.com" 

23BASE_HEADERS = { 

24 "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7", 

25 "Accept-Encoding": "gzip, deflate, br", 

26 "Accept-Language": "en-US,en;q=0.9", 

27 "Cache-Control": "max-age=0", 

28 "Content-Type": "application/x-www-form-urlencoded", 

29 "Origin": BASE_URL, 

30 "Referer": "{}/ap/signin".format(BASE_URL), 

31 "Sec-Ch-Ua": '"Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"', 

32 "Sec-Ch-Ua-Mobile": "?0", 

33 "Sec-Ch-Ua-Platform": "macOS", 

34 "Sec-Ch-Viewport-Width": "1393", 

35 "Sec-Fetch-Dest": "document", 

36 "Sec-Fetch-Mode": "navigate", 

37 "Sec-Fetch-Site": "same-origin", 

38 "Sec-Fetch-User": "?1", 

39 "Viewport-Width": "1393", 

40 "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", 

41} 

42SIGN_IN_FORM_NAME = "signIn" 

43MFA_DEVICE_SELECT_FORM_ID = "auth-select-device-form" 

44MFA_FORM_ID = "auth-mfa-form" 

45CAPTCHA_1_DIV_ID = "cvf-page-content" 

46CAPTCHA_1_FORM_CLASS = "cvf-widget-form" 

47CAPTCHA_2_INPUT_ID = "captchacharacters" 

48 

49DEFAULT_COOKIE_JAR_PATH = os.path.join(os.path.expanduser("~"), ".config", "amazon-orders", "cookies.json") 

50 

51 

52class AmazonSession: 

53 """ 

54 

55 """ 

56 

57 def __init__(self, 

58 username: str, 

59 password: str, 

60 debug: bool = False, 

61 max_auth_attempts: int = 10, 

62 cookie_jar_path: str = None) -> None: 

63 if not cookie_jar_path: 

64 cookie_jar_path = DEFAULT_COOKIE_JAR_PATH 

65 

66 #: 

67 self.username: str = username 

68 #: 

69 self.password: str = password 

70 

71 #: Set logger ``DEBUG``, send output to ``stderr``, and write an HTML file for each request made on the session. 

72 self.debug: bool = debug 

73 if self.debug: 

74 logger.setLevel(logging.DEBUG) 

75 #: 

76 self.max_auth_attempts: int = max_auth_attempts 

77 #: The path to persist session cookies, defaults to ``conf.DEFAULT_COOKIE_JAR_PATH``. 

78 self.cookie_jar_path: str = cookie_jar_path 

79 

80 #: 

81 self.session: Session = Session() 

82 #: 

83 self.last_response: Optional[Response] = None 

84 #: 

85 self.last_response_parsed: Optional[Tag] = None 

86 #: If :func:`login()` has been executed and successfully logged in the session. 

87 self.is_authenticated: bool = False 

88 

89 cookie_dir = os.path.dirname(self.cookie_jar_path) 

90 if not os.path.exists(cookie_dir): 

91 os.makedirs(cookie_dir) 

92 if os.path.exists(self.cookie_jar_path): 

93 with open(cookie_jar_path, "r", encoding="utf-8") as f: 

94 data = json.loads(f.read()) 

95 cookies = requests.utils.cookiejar_from_dict(data) 

96 self.session.cookies.update(cookies) 

97 

98 def request(self, 

99 method: str, 

100 url: str, 

101 **kwargs: Any) -> Response: 

102 """ 

103 

104 :param method: The request method to execute. 

105 :param url: The URL to execute ``method`` on. 

106 :param kwargs: Remaining ``kwargs`` will be passed to :func:`requests.request`. 

107 :return: The Response from the executed request. 

108 """ 

109 if "headers" not in kwargs: 

110 kwargs["headers"] = {} 

111 kwargs["headers"].update(BASE_HEADERS) 

112 

113 logger.debug("{} request to {}".format(method, url)) 

114 

115 self.last_response = self.session.request(method, url, **kwargs) 

116 self.last_response_parsed = BeautifulSoup(self.last_response.text, 

117 "html.parser") 

118 

119 cookies = requests.utils.dict_from_cookiejar(self.session.cookies) 

120 if os.path.exists(self.cookie_jar_path): 

121 os.remove(self.cookie_jar_path) 

122 with open(self.cookie_jar_path, "w", encoding="utf-8") as f: 

123 f.write(json.dumps(cookies)) 

124 

125 logger.debug("Response: {} - {}".format(self.last_response.url, 

126 self.last_response.status_code)) 

127 

128 if self.debug: 

129 page_name = self._get_page_from_url(self.last_response.url) 

130 with open(page_name, "w", encoding="utf-8") as html_file: 

131 logger.debug( 

132 "Response written to file: {}".format(html_file.name)) 

133 html_file.write(self.last_response.text) 

134 

135 return self.last_response 

136 

137 def get(self, 

138 url: str, 

139 **kwargs: Any): 

140 """ 

141 

142 :param url: The URL to GET on. 

143 :param kwargs: Remaining ``kwargs`` will be passed to :func:`AmazonSession.request`. 

144 :return: The Response from the executed GET request. 

145 """ 

146 return self.request("GET", url, **kwargs) 

147 

148 def post(self, 

149 url, 

150 **kwargs: Any) -> Response: 

151 """ 

152 

153 :param url: The URL to POST on. 

154 :param kwargs: Remaining ``kwargs`` will be passed to :func:`AmazonSession.request`. 

155 :return: The Response from the executed POST request. 

156 """ 

157 return self.request("POST", url, **kwargs) 

158 

159 def login(self) -> None: 

160 """ 

161 Execute an Amazon login process. This will include the sign-in page, and may also include Captcha challenges 

162 and OTP pages (of 2FA authentication is enabled on your account). 

163 

164 If successful, ``is_authenciated`` will be set to ``True``. 

165 """ 

166 self.get("{}/gp/sign-in.html".format(BASE_URL)) 

167 

168 attempts = 0 

169 while not self.is_authenticated and attempts < self.max_auth_attempts: 

170 if "Hello, sign in" not in self.last_response.text and "nav-item-signout" in self.last_response.text: 

171 self.is_authenticated = True 

172 break 

173 

174 if self._is_field_found(SIGN_IN_FORM_NAME): 

175 self._sign_in() 

176 elif self._is_field_found(CAPTCHA_1_FORM_CLASS, field_key="class"): 

177 self._captcha_1_submit() 

178 elif self.last_response_parsed.find("input", 

179 id=lambda 

180 value: value and value.startswith( 

181 CAPTCHA_2_INPUT_ID)): 

182 self._captcha_2_submit() 

183 elif self._is_field_found(MFA_DEVICE_SELECT_FORM_ID, 

184 field_key="id"): 

185 self._mfa_device_select() 

186 elif self._is_field_found(MFA_FORM_ID, field_key="id"): 

187 self._mfa_submit() 

188 else: 

189 raise AmazonOrdersAuthError( 

190 "An error occurred, this is an unknown page: {}. To capture the page to a file, set the `debug` flag.".format( 

191 self.last_response.url)) 

192 

193 attempts += 1 

194 

195 if attempts == self.max_auth_attempts: 

196 raise AmazonOrdersAuthError( 

197 "Max authentication flow attempts reached.") 

198 

199 def logout(self) -> None: 

200 """ 

201 

202 """ 

203 self.get("{}/gp/sign-out.html".format(BASE_URL)) 

204 

205 self.close() 

206 

207 def close(self) -> None: 

208 """ 

209 

210 """ 

211 self.session.close() 

212 

213 def _sign_in(self) -> None: 

214 form = self.last_response_parsed.find("form", 

215 {"name": SIGN_IN_FORM_NAME}) 

216 data = self._build_from_form(form, 

217 additional_attrs={"email": self.username, 

218 "password": self.password, 

219 "rememberMe": "true"}) 

220 

221 self.request(form.attrs.get("method", "GET"), 

222 self._get_form_action(form), 

223 data=data) 

224 

225 self._handle_errors(critical=True) 

226 

227 def _mfa_device_select(self) -> None: 

228 form = self.last_response_parsed.find("form", 

229 {"id": MFA_DEVICE_SELECT_FORM_ID}) 

230 contexts = form.find_all("input", {"name": "otpDeviceContext"}) 

231 i = 1 

232 for field in contexts: 

233 print("{}: {}".format(i, field.attrs["value"].strip())) 

234 i += 1 

235 otp_device = int( 

236 input("Where would you like your one-time passcode sent? ")) 

237 

238 form = self.last_response_parsed.find("form", 

239 id=MFA_DEVICE_SELECT_FORM_ID) 

240 data = self._build_from_form(form, 

241 additional_attrs={"otpDeviceContext": 

242 contexts[ 

243 otp_device - 1].attrs[ 

244 "value"]}) 

245 

246 self.request(form.attrs.get("method", "GET"), 

247 self._get_form_action(form), 

248 data=data) 

249 

250 self._handle_errors() 

251 

252 def _mfa_submit(self) -> None: 

253 otp = input("Enter the one-time passcode sent to your device: ") 

254 

255 form = self.last_response_parsed.find("form", id=MFA_FORM_ID) 

256 data = self._build_from_form(form, 

257 additional_attrs={"otpCode": otp, 

258 "rememberDevice": ""}) 

259 

260 self.request(form.attrs.get("method", "GET"), 

261 self._get_form_action(form), 

262 data=data) 

263 

264 self._handle_errors() 

265 

266 def _captcha_1_submit(self) -> None: 

267 captcha_div = self.last_response_parsed.find("div", 

268 {"id": CAPTCHA_1_DIV_ID}) 

269 

270 solution = self._solve_captcha( 

271 captcha_div.find("img", {"alt": "captcha"}).attrs["src"]) 

272 

273 form = self.last_response_parsed.find("form", 

274 {"class": CAPTCHA_1_FORM_CLASS}) 

275 data = self._build_from_form(form, 

276 additional_attrs={ 

277 "cvf_captcha_input": solution}) 

278 

279 self.request(form.attrs.get("method", "GET"), 

280 self._get_form_action(form, 

281 prefix="{}/ap/cvf/".format( 

282 BASE_URL)), 

283 data=data) 

284 

285 self._handle_errors("cvf-widget-alert", "class") 

286 

287 def _captcha_2_submit(self) -> None: 

288 form = self.last_response_parsed.find("input", 

289 id=lambda 

290 value: value and value.startswith( 

291 CAPTCHA_2_INPUT_ID)).find_parent( 

292 "form") 

293 

294 solution = self._solve_captcha(form.find("img").attrs["src"]) 

295 

296 data = self._build_from_form(form, 

297 additional_attrs={ 

298 "field-keywords": solution}) 

299 

300 self.request(form.attrs.get("method", "GET"), 

301 self._get_form_action(form, 

302 prefix=BASE_URL), 

303 params=data) 

304 

305 self._handle_errors("a-alert-info", "class") 

306 

307 def _build_from_form(self, 

308 form: Tag, 

309 additional_attrs: Optional[Dict[str, Any]] = None) -> Dict[str, Any]: 

310 data = {} 

311 for field in form.find_all("input"): 

312 try: 

313 data[field["name"]] = field["value"] 

314 except: 

315 pass 

316 if additional_attrs: 

317 data.update(additional_attrs) 

318 return data 

319 

320 def _get_form_action(self, 

321 form: Tag, 

322 prefix: Optional[str] = None) -> str: 

323 action = form.attrs.get("action") 

324 if not action: 

325 action = self.last_response.url 

326 if prefix and not action.startswith("http"): 

327 action = prefix + action 

328 return action 

329 

330 def _is_field_found(self, 

331 field_value: str, 

332 field_type: str = "form", 

333 field_key: str = "name") -> bool: 

334 return self.last_response_parsed.find(field_type, { 

335 field_key: field_value}) is not None 

336 

337 def _get_page_from_url(self, 

338 url: str) -> str: 

339 page_name = os.path.basename(urlparse(url).path).strip(".html") 

340 i = 0 

341 while os.path.isfile("{}_{}".format(page_name, 0)): 

342 i += 1 

343 return "{}_{}.html".format(page_name, i) 

344 

345 def _handle_errors(self, 

346 error_div: str = "auth-error-message-box", 

347 attr_name: str = "id", 

348 critical: bool = False) -> None: 

349 error_div = self.last_response_parsed.find("div", 

350 {attr_name: error_div}) 

351 if error_div: 

352 error_msg = "An error occurred: {}".format(error_div.text.strip()) 

353 

354 if critical: 

355 raise AmazonOrdersAuthError(error_msg) 

356 else: 

357 print(error_msg) 

358 

359 def _solve_captcha(self, 

360 url: str) -> str: 

361 captcha_response = AmazonCaptcha.fromlink(url).solve() 

362 if not captcha_response or captcha_response.lower() == "not solved": 

363 img_response = self.session.get(url) 

364 img = Image.open(BytesIO(img_response.content)) 

365 img.show() 

366 captcha_response = input( 

367 "The Captcha couldn't be auto-solved, enter the characters shown in the image: ") 

368 

369 return captcha_response