Coverage for amazonorders/session.py: 93.83%

162 statements  

« prev     ^ index     » next       coverage.py v7.4.0, created at 2024-01-16 21:17 +0000

1import json 

2import logging 

3import os 

4from io import BytesIO 

5from urllib.parse import urlparse 

6 

7import requests 

8from PIL import Image 

9from amazoncaptcha import AmazonCaptcha 

10from bs4 import BeautifulSoup 

11from requests import Session 

12 

13from amazonorders.exception import AmazonOrdersAuthError 

14 

15__author__ = "Alex Laird" 

16__copyright__ = "Copyright 2024, Alex Laird" 

17__version__ = "0.0.6" 

18 

19logger = logging.getLogger(__name__) 

20 

21BASE_URL = "https://www.amazon.com" 

22BASE_HEADERS = { 

23 "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7", 

24 "Accept-Encoding": "gzip, deflate, br", 

25 "Accept-Language": "en-US,en;q=0.9", 

26 "Cache-Control": "max-age=0", 

27 "Content-Type": "application/x-www-form-urlencoded", 

28 "Origin": BASE_URL, 

29 "Referer": "{}/ap/signin".format(BASE_URL), 

30 "Sec-Ch-Ua": '"Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"', 

31 "Sec-Ch-Ua-Mobile": "?0", 

32 "Sec-Ch-Ua-Platform": "macOS", 

33 "Sec-Ch-Viewport-Width": "1393", 

34 "Sec-Fetch-Dest": "document", 

35 "Sec-Fetch-Mode": "navigate", 

36 "Sec-Fetch-Site": "same-origin", 

37 "Sec-Fetch-User": "?1", 

38 "Viewport-Width": "1393", 

39 "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", 

40} 

41SIGN_IN_FORM_NAME = "signIn" 

42MFA_DEVICE_SELECT_FORM_ID = "auth-select-device-form" 

43MFA_FORM_ID = "auth-mfa-form" 

44CAPTCHA_1_DIV_ID = "cvf-page-content" 

45CAPTCHA_1_FORM_CLASS = "cvf-widget-form" 

46CAPTCHA_2_INPUT_ID = "captchacharacters" 

47 

48 

49class AmazonSession: 

50 def __init__(self, 

51 username, 

52 password, 

53 debug=False, 

54 max_auth_attempts=10, 

55 cookie_jar_path=os.path.join(os.path.expanduser("~"), ".config", 

56 "amazon-orders", "cookies.json")) -> None: 

57 self.username = username 

58 self.password = password 

59 

60 self.debug = debug 

61 if self.debug: 

62 logger.setLevel(logging.DEBUG) 

63 self.max_auth_attempts = max_auth_attempts 

64 self.cookie_jar_path = cookie_jar_path 

65 

66 self.session = Session() 

67 self.last_response = None 

68 self.last_response_parsed = None 

69 self.is_authenticated = False 

70 

71 cookie_dir = os.path.dirname(self.cookie_jar_path) 

72 if not os.path.exists(cookie_dir): 

73 os.makedirs(cookie_dir) 

74 if os.path.exists(self.cookie_jar_path): 

75 with open(cookie_jar_path, "r", encoding="utf-8") as f: 

76 data = json.loads(f.read()) 

77 cookies = requests.utils.cookiejar_from_dict(data) 

78 self.session.cookies.update(cookies) 

79 

80 def request(self, method, url, **kwargs): 

81 if "headers" not in kwargs: 

82 kwargs["headers"] = {} 

83 kwargs["headers"].update(BASE_HEADERS) 

84 

85 logger.debug("{} request to {}".format(method, url)) 

86 

87 self.last_response = self.session.request(method, url, **kwargs) 

88 self.last_response_parsed = BeautifulSoup(self.last_response.text, 

89 "html.parser") 

90 

91 cookies = requests.utils.dict_from_cookiejar(self.session.cookies) 

92 if os.path.exists(self.cookie_jar_path): 

93 os.remove(self.cookie_jar_path) 

94 with open(self.cookie_jar_path, "w", encoding="utf-8") as f: 

95 f.write(json.dumps(cookies)) 

96 

97 logger.debug("Response: {} - {}".format(self.last_response.url, 

98 self.last_response.status_code)) 

99 

100 if self.debug: 

101 page_name = self._get_page_from_url(self.last_response.url) 

102 with open(page_name, "w", encoding="utf-8") as html_file: 

103 logger.debug( 

104 "Response written to file: {}".format(html_file.name)) 

105 html_file.write(self.last_response.text) 

106 

107 return self.last_response 

108 

109 def get(self, url, **kwargs): 

110 return self.request("GET", url, **kwargs) 

111 

112 def post(self, url, **kwargs): 

113 return self.request("POST", url, **kwargs) 

114 

115 def login(self): 

116 self.get("{}/gp/sign-in.html".format(BASE_URL)) 

117 

118 attempts = 0 

119 while not self.is_authenticated and attempts < self.max_auth_attempts: 

120 if "Hello, sign in" not in self.last_response.text and "nav-item-signout" in self.last_response.text: 

121 self.is_authenticated = True 

122 break 

123 

124 if self._is_field_found(SIGN_IN_FORM_NAME): 

125 self._sign_in() 

126 elif self._is_field_found(CAPTCHA_1_FORM_CLASS, field_key="class"): 

127 self._captcha_1_submit() 

128 elif self.last_response_parsed.find("input", 

129 id=lambda 

130 value: value and value.startswith( 

131 CAPTCHA_2_INPUT_ID)): 

132 self._captcha_2_submit() 

133 elif self._is_field_found(MFA_DEVICE_SELECT_FORM_ID, 

134 field_key="id"): 

135 self._mfa_device_select() 

136 elif self._is_field_found(MFA_FORM_ID, field_key="id"): 

137 self._mfa_submit() 

138 else: 

139 raise AmazonOrdersAuthError( 

140 "An error occurred, this is an unknown page: {}. To capture the page to a file, set the `debug` flag.".format( 

141 self.last_response.url)) 

142 

143 attempts += 1 

144 

145 if attempts == self.max_auth_attempts: 

146 raise AmazonOrdersAuthError( 

147 "Max authentication flow attempts reached.") 

148 

149 def logout(self): 

150 self.get("{}/gp/sign-out.html".format(BASE_URL)) 

151 

152 self.close() 

153 

154 def close(self): 

155 self.session.close() 

156 

157 def _sign_in(self): 

158 form = self.last_response_parsed.find("form", 

159 {"name": SIGN_IN_FORM_NAME}) 

160 data = self._build_from_form(form, 

161 additional_attrs={"email": self.username, 

162 "password": self.password, 

163 "rememberMe": "true"}) 

164 

165 self.request(form.attrs.get("method", "GET"), 

166 self._get_form_action(form), 

167 data=data) 

168 

169 self._handle_errors(critical=True) 

170 

171 def _mfa_device_select(self): 

172 form = self.last_response_parsed.find("form", 

173 {"id": MFA_DEVICE_SELECT_FORM_ID}) 

174 contexts = form.find_all("input", {"name": "otpDeviceContext"}) 

175 i = 1 

176 for field in contexts: 

177 print("{}: {}".format(i, field.attrs["value"].strip())) 

178 i += 1 

179 otp_device = int( 

180 input("Where would you like your one-time passcode sent? ")) 

181 

182 form = self.last_response_parsed.find("form", 

183 id=MFA_DEVICE_SELECT_FORM_ID) 

184 data = self._build_from_form(form, 

185 additional_attrs={"otpDeviceContext": 

186 contexts[ 

187 otp_device - 1].attrs[ 

188 "value"]}) 

189 

190 self.request(form.attrs.get("method", "GET"), 

191 self._get_form_action(form), 

192 data=data) 

193 

194 self._handle_errors() 

195 

196 def _mfa_submit(self): 

197 otp = input("Enter the one-time passcode sent to your device: ") 

198 

199 form = self.last_response_parsed.find("form", id=MFA_FORM_ID) 

200 data = self._build_from_form(form, 

201 additional_attrs={"otpCode": otp, 

202 "rememberDevice": ""}) 

203 

204 self.request(form.attrs.get("method", "GET"), 

205 self._get_form_action(form), 

206 data=data) 

207 

208 self._handle_errors() 

209 

210 def _captcha_1_submit(self): 

211 captcha_div = self.last_response_parsed.find("div", 

212 {"id": CAPTCHA_1_DIV_ID}) 

213 

214 solution = self._solve_captcha( 

215 captcha_div.find("img", {"alt": "captcha"}).attrs["src"]) 

216 

217 form = self.last_response_parsed.find("form", 

218 {"class": CAPTCHA_1_FORM_CLASS}) 

219 data = self._build_from_form(form, 

220 additional_attrs={ 

221 "cvf_captcha_input": solution}) 

222 

223 self.request(form.attrs.get("method", "GET"), 

224 self._get_form_action(form, 

225 prefix="{}/ap/cvf/".format( 

226 BASE_URL)), 

227 data=data) 

228 

229 self._handle_errors("cvf-widget-alert", "class") 

230 

231 def _captcha_2_submit(self): 

232 form = self.last_response_parsed.find("input", 

233 id=lambda 

234 value: value and value.startswith( 

235 CAPTCHA_2_INPUT_ID)).find_parent( 

236 "form") 

237 

238 solution = self._solve_captcha(form.find("img").attrs["src"]) 

239 

240 data = self._build_from_form(form, 

241 additional_attrs={ 

242 "field-keywords": solution}) 

243 

244 self.request(form.attrs.get("method", "GET"), 

245 self._get_form_action(form, 

246 prefix=BASE_URL), 

247 params=data) 

248 

249 self._handle_errors("a-alert-info", "class") 

250 

251 def _build_from_form(self, form, additional_attrs=None): 

252 data = {} 

253 for field in form.find_all("input"): 

254 try: 

255 data[field["name"]] = field["value"] 

256 except: 

257 pass 

258 if additional_attrs: 

259 data.update(additional_attrs) 

260 return data 

261 

262 def _get_form_action(self, form, prefix=None): 

263 action = form.attrs.get("action") 

264 if not action: 

265 action = self.last_response.url 

266 if prefix and not action.startswith("http"): 

267 action = prefix + action 

268 return action 

269 

270 def _is_field_found(self, field_value, field_type="form", field_key="name"): 

271 return self.last_response_parsed.find(field_type, { 

272 field_key: field_value}) is not None 

273 

274 def _get_page_from_url(self, url): 

275 page_name = os.path.basename(urlparse(url).path).strip(".html") 

276 i = 0 

277 while os.path.isfile("{}_{}".format(page_name, 0)): 

278 i += 1 

279 return "{}_{}.html".format(page_name, i) 

280 

281 def _handle_errors(self, error_div="auth-error-message-box", attr_name="id", 

282 critical=False): 

283 error_div = self.last_response_parsed.find("div", 

284 {attr_name: error_div}) 

285 if error_div: 

286 error_msg = "An error occurred: {}".format(error_div.text.strip()) 

287 

288 if critical: 

289 raise AmazonOrdersAuthError(error_msg) 

290 else: 

291 print(error_msg) 

292 

293 def _solve_captcha(self, url): 

294 captcha_response = AmazonCaptcha.fromlink(url).solve() 

295 if not captcha_response or captcha_response.lower() == "not solved": 

296 img_response = self.session.get(url) 

297 img = Image.open(BytesIO(img_response.content)) 

298 img.show() 

299 captcha_response = input( 

300 "The Captcha couldn't be auto-solved, enter the characters shown in the image: ") 

301 

302 return captcha_response