Coverage for amazonorders/session.py: 93.87%

163 statements  

« prev     ^ index     » next       coverage.py v7.4.0, created at 2024-01-16 23:55 +0000

1import json 

2import logging 

3import os 

4from io import BytesIO 

5from typing import Optional, Any, Dict 

6from urllib.parse import urlparse 

7 

8import requests 

9from PIL import Image 

10from amazoncaptcha import AmazonCaptcha 

11from bs4 import BeautifulSoup, Tag 

12from requests import Session, Response 

13 

14from amazonorders.exception import AmazonOrdersAuthError 

15 

16__author__ = "Alex Laird" 

17__copyright__ = "Copyright 2024, Alex Laird" 

18__version__ = "0.0.7" 

19 

20logger = logging.getLogger(__name__) 

21 

22BASE_URL = "https://www.amazon.com" 

23BASE_HEADERS = { 

24 "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7", 

25 "Accept-Encoding": "gzip, deflate, br", 

26 "Accept-Language": "en-US,en;q=0.9", 

27 "Cache-Control": "max-age=0", 

28 "Content-Type": "application/x-www-form-urlencoded", 

29 "Origin": BASE_URL, 

30 "Referer": "{}/ap/signin".format(BASE_URL), 

31 "Sec-Ch-Ua": '"Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"', 

32 "Sec-Ch-Ua-Mobile": "?0", 

33 "Sec-Ch-Ua-Platform": "macOS", 

34 "Sec-Ch-Viewport-Width": "1393", 

35 "Sec-Fetch-Dest": "document", 

36 "Sec-Fetch-Mode": "navigate", 

37 "Sec-Fetch-Site": "same-origin", 

38 "Sec-Fetch-User": "?1", 

39 "Viewport-Width": "1393", 

40 "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", 

41} 

42SIGN_IN_FORM_NAME = "signIn" 

43MFA_DEVICE_SELECT_FORM_ID = "auth-select-device-form" 

44MFA_FORM_ID = "auth-mfa-form" 

45CAPTCHA_1_DIV_ID = "cvf-page-content" 

46CAPTCHA_1_FORM_CLASS = "cvf-widget-form" 

47CAPTCHA_2_INPUT_ID = "captchacharacters" 

48 

49 

50class AmazonSession: 

51 def __init__(self, 

52 username: str, 

53 password: str, 

54 debug: bool = False, 

55 max_auth_attempts: int = 10, 

56 cookie_jar_path: str = os.path.join(os.path.expanduser("~"), ".config", 

57 "amazon-orders", "cookies.json")) -> None: 

58 self.username: str = username 

59 self.password: str = password 

60 

61 self.debug: bool = debug 

62 if self.debug: 

63 logger.setLevel(logging.DEBUG) 

64 self.max_auth_attempts: int = max_auth_attempts 

65 self.cookie_jar_path: str = cookie_jar_path 

66 

67 self.session: Session = Session() 

68 self.last_response: Optional[Response] = None 

69 self.last_response_parsed: Optional[Tag] = None 

70 self.is_authenticated: bool = False 

71 

72 cookie_dir = os.path.dirname(self.cookie_jar_path) 

73 if not os.path.exists(cookie_dir): 

74 os.makedirs(cookie_dir) 

75 if os.path.exists(self.cookie_jar_path): 

76 with open(cookie_jar_path, "r", encoding="utf-8") as f: 

77 data = json.loads(f.read()) 

78 cookies = requests.utils.cookiejar_from_dict(data) 

79 self.session.cookies.update(cookies) 

80 

81 def request(self, 

82 method: str, 

83 url: str, 

84 **kwargs: Any) -> Response: 

85 if "headers" not in kwargs: 

86 kwargs["headers"] = {} 

87 kwargs["headers"].update(BASE_HEADERS) 

88 

89 logger.debug("{} request to {}".format(method, url)) 

90 

91 self.last_response = self.session.request(method, url, **kwargs) 

92 self.last_response_parsed = BeautifulSoup(self.last_response.text, 

93 "html.parser") 

94 

95 cookies = requests.utils.dict_from_cookiejar(self.session.cookies) 

96 if os.path.exists(self.cookie_jar_path): 

97 os.remove(self.cookie_jar_path) 

98 with open(self.cookie_jar_path, "w", encoding="utf-8") as f: 

99 f.write(json.dumps(cookies)) 

100 

101 logger.debug("Response: {} - {}".format(self.last_response.url, 

102 self.last_response.status_code)) 

103 

104 if self.debug: 

105 page_name = self._get_page_from_url(self.last_response.url) 

106 with open(page_name, "w", encoding="utf-8") as html_file: 

107 logger.debug( 

108 "Response written to file: {}".format(html_file.name)) 

109 html_file.write(self.last_response.text) 

110 

111 return self.last_response 

112 

113 def get(self, 

114 url: str, 

115 **kwargs: Any): 

116 return self.request("GET", url, **kwargs) 

117 

118 def post(self, 

119 url, 

120 **kwargs: Any) -> Response: 

121 return self.request("POST", url, **kwargs) 

122 

123 def login(self) -> None: 

124 self.get("{}/gp/sign-in.html".format(BASE_URL)) 

125 

126 attempts = 0 

127 while not self.is_authenticated and attempts < self.max_auth_attempts: 

128 if "Hello, sign in" not in self.last_response.text and "nav-item-signout" in self.last_response.text: 

129 self.is_authenticated = True 

130 break 

131 

132 if self._is_field_found(SIGN_IN_FORM_NAME): 

133 self._sign_in() 

134 elif self._is_field_found(CAPTCHA_1_FORM_CLASS, field_key="class"): 

135 self._captcha_1_submit() 

136 elif self.last_response_parsed.find("input", 

137 id=lambda 

138 value: value and value.startswith( 

139 CAPTCHA_2_INPUT_ID)): 

140 self._captcha_2_submit() 

141 elif self._is_field_found(MFA_DEVICE_SELECT_FORM_ID, 

142 field_key="id"): 

143 self._mfa_device_select() 

144 elif self._is_field_found(MFA_FORM_ID, field_key="id"): 

145 self._mfa_submit() 

146 else: 

147 raise AmazonOrdersAuthError( 

148 "An error occurred, this is an unknown page: {}. To capture the page to a file, set the `debug` flag.".format( 

149 self.last_response.url)) 

150 

151 attempts += 1 

152 

153 if attempts == self.max_auth_attempts: 

154 raise AmazonOrdersAuthError( 

155 "Max authentication flow attempts reached.") 

156 

157 def logout(self) -> None: 

158 self.get("{}/gp/sign-out.html".format(BASE_URL)) 

159 

160 self.close() 

161 

162 def close(self) -> None: 

163 self.session.close() 

164 

165 def _sign_in(self) -> None: 

166 form = self.last_response_parsed.find("form", 

167 {"name": SIGN_IN_FORM_NAME}) 

168 data = self._build_from_form(form, 

169 additional_attrs={"email": self.username, 

170 "password": self.password, 

171 "rememberMe": "true"}) 

172 

173 self.request(form.attrs.get("method", "GET"), 

174 self._get_form_action(form), 

175 data=data) 

176 

177 self._handle_errors(critical=True) 

178 

179 def _mfa_device_select(self) -> None: 

180 form = self.last_response_parsed.find("form", 

181 {"id": MFA_DEVICE_SELECT_FORM_ID}) 

182 contexts = form.find_all("input", {"name": "otpDeviceContext"}) 

183 i = 1 

184 for field in contexts: 

185 print("{}: {}".format(i, field.attrs["value"].strip())) 

186 i += 1 

187 otp_device = int( 

188 input("Where would you like your one-time passcode sent? ")) 

189 

190 form = self.last_response_parsed.find("form", 

191 id=MFA_DEVICE_SELECT_FORM_ID) 

192 data = self._build_from_form(form, 

193 additional_attrs={"otpDeviceContext": 

194 contexts[ 

195 otp_device - 1].attrs[ 

196 "value"]}) 

197 

198 self.request(form.attrs.get("method", "GET"), 

199 self._get_form_action(form), 

200 data=data) 

201 

202 self._handle_errors() 

203 

204 def _mfa_submit(self) -> None: 

205 otp = input("Enter the one-time passcode sent to your device: ") 

206 

207 form = self.last_response_parsed.find("form", id=MFA_FORM_ID) 

208 data = self._build_from_form(form, 

209 additional_attrs={"otpCode": otp, 

210 "rememberDevice": ""}) 

211 

212 self.request(form.attrs.get("method", "GET"), 

213 self._get_form_action(form), 

214 data=data) 

215 

216 self._handle_errors() 

217 

218 def _captcha_1_submit(self) -> None: 

219 captcha_div = self.last_response_parsed.find("div", 

220 {"id": CAPTCHA_1_DIV_ID}) 

221 

222 solution = self._solve_captcha( 

223 captcha_div.find("img", {"alt": "captcha"}).attrs["src"]) 

224 

225 form = self.last_response_parsed.find("form", 

226 {"class": CAPTCHA_1_FORM_CLASS}) 

227 data = self._build_from_form(form, 

228 additional_attrs={ 

229 "cvf_captcha_input": solution}) 

230 

231 self.request(form.attrs.get("method", "GET"), 

232 self._get_form_action(form, 

233 prefix="{}/ap/cvf/".format( 

234 BASE_URL)), 

235 data=data) 

236 

237 self._handle_errors("cvf-widget-alert", "class") 

238 

239 def _captcha_2_submit(self) -> None: 

240 form = self.last_response_parsed.find("input", 

241 id=lambda 

242 value: value and value.startswith( 

243 CAPTCHA_2_INPUT_ID)).find_parent( 

244 "form") 

245 

246 solution = self._solve_captcha(form.find("img").attrs["src"]) 

247 

248 data = self._build_from_form(form, 

249 additional_attrs={ 

250 "field-keywords": solution}) 

251 

252 self.request(form.attrs.get("method", "GET"), 

253 self._get_form_action(form, 

254 prefix=BASE_URL), 

255 params=data) 

256 

257 self._handle_errors("a-alert-info", "class") 

258 

259 def _build_from_form(self, 

260 form: Tag, 

261 additional_attrs: Optional[Dict[str, Any]] = None) -> Dict[str, Any]: 

262 data = {} 

263 for field in form.find_all("input"): 

264 try: 

265 data[field["name"]] = field["value"] 

266 except: 

267 pass 

268 if additional_attrs: 

269 data.update(additional_attrs) 

270 return data 

271 

272 def _get_form_action(self, 

273 form: Tag, 

274 prefix: Optional[str] = None) -> str: 

275 action = form.attrs.get("action") 

276 if not action: 

277 action = self.last_response.url 

278 if prefix and not action.startswith("http"): 

279 action = prefix + action 

280 return action 

281 

282 def _is_field_found(self, 

283 field_value: str, 

284 field_type: str = "form", 

285 field_key: str = "name") -> bool: 

286 return self.last_response_parsed.find(field_type, { 

287 field_key: field_value}) is not None 

288 

289 def _get_page_from_url(self, 

290 url: str) -> str: 

291 page_name = os.path.basename(urlparse(url).path).strip(".html") 

292 i = 0 

293 while os.path.isfile("{}_{}".format(page_name, 0)): 

294 i += 1 

295 return "{}_{}.html".format(page_name, i) 

296 

297 def _handle_errors(self, 

298 error_div: str = "auth-error-message-box", 

299 attr_name: str = "id", 

300 critical: bool = False) -> None: 

301 error_div = self.last_response_parsed.find("div", 

302 {attr_name: error_div}) 

303 if error_div: 

304 error_msg = "An error occurred: {}".format(error_div.text.strip()) 

305 

306 if critical: 

307 raise AmazonOrdersAuthError(error_msg) 

308 else: 

309 print(error_msg) 

310 

311 def _solve_captcha(self, 

312 url: str) -> str: 

313 captcha_response = AmazonCaptcha.fromlink(url).solve() 

314 if not captcha_response or captcha_response.lower() == "not solved": 

315 img_response = self.session.get(url) 

316 img = Image.open(BytesIO(img_response.content)) 

317 img.show() 

318 captcha_response = input( 

319 "The Captcha couldn't be auto-solved, enter the characters shown in the image: ") 

320 

321 return captcha_response