Coverage for amazonorders/session.py: 80.77%

104 statements  

« prev     ^ index     » next       coverage.py v7.4.1, created at 2024-01-29 14:37 +0000

1import json 

2import logging 

3import os 

4from typing import Optional, Any 

5from urllib.parse import urlparse 

6 

7import requests 

8from bs4 import BeautifulSoup, Tag 

9from requests import Session, Response 

10from requests.utils import dict_from_cookiejar 

11 

12from amazonorders import constants 

13from amazonorders.conf import DEFAULT_COOKIE_JAR_PATH, DEFAULT_OUTPUT_DIR 

14from amazonorders.exception import AmazonOrdersAuthError 

15from amazonorders.forms import SignInForm, MfaDeviceSelectForm, MfaForm, CaptchaForm 

16 

17__author__ = "Alex Laird" 

18__copyright__ = "Copyright 2024, Alex Laird" 

19__version__ = "1.0.7" 

20 

21logger = logging.getLogger(__name__) 

22 

23AUTH_FORMS = [SignInForm(), 

24 MfaDeviceSelectForm(), 

25 MfaForm(), 

26 CaptchaForm(), 

27 CaptchaForm(constants.CAPTCHA_2_FORM_SELECTOR, constants.CAPTCHA_2_ERROR_SELECTOR, "field-keywords"), 

28 MfaForm(constants.CAPTCHA_OTP_FORM_SELECTOR)] 

29 

30 

31class IODefault: 

32 """ 

33 Handles input/output from the application. By default, this uses console commands, but 

34 this class exists so that it can be overriden when constructing an :class:`AmazonSession` 

35 if input/output should be handled another way. 

36 """ 

37 

38 def echo(self, 

39 msg: str, 

40 **kwargs: Any): 

41 """ 

42 Echo a message to the console. 

43 

44 :param msg: The data to send to output. 

45 :param kwargs: Unused by the default implementation. 

46 """ 

47 print(msg) 

48 

49 def prompt(self, 

50 msg: str, 

51 type: str = None, 

52 **kwargs: Any): 

53 """ 

54 Prompt to the console for user input. 

55 

56 :param msg: The data to use as the input prompt. 

57 :param type: Unused by the default implementation. 

58 :param kwargs: Unused by the default implementation. 

59 :return: The user input result. 

60 """ 

61 return input("{}: ".format(msg)) 

62 

63 

64class AmazonSession: 

65 """ 

66 An interface for interacting with Amazon and authenticating an underlying :class:`requests.Session`. Utilizing 

67 this class means session data is maintained between requests. Session data is also persisted after each request, 

68 meaning it will also be maintained between separate instantiations of the class or application. 

69 

70 To get started, call the :func:`login` function. 

71 """ 

72 

73 def __init__(self, 

74 username: str, 

75 password: str, 

76 debug: bool = False, 

77 max_auth_attempts: int = 10, 

78 cookie_jar_path: str = None, 

79 io: IODefault = IODefault(), 

80 output_dir: str = None) -> None: 

81 if not cookie_jar_path: 

82 cookie_jar_path = DEFAULT_COOKIE_JAR_PATH 

83 if not output_dir: 

84 output_dir = DEFAULT_OUTPUT_DIR 

85 

86 #: An Amazon username. 

87 self.username: str = username 

88 #: An Amazon password. 

89 self.password: str = password 

90 

91 #: Set logger ``DEBUG``, send output to ``stderr``, and write an HTML file for each request made on the session. 

92 self.debug: bool = debug 

93 if self.debug: 

94 logger.setLevel(logging.DEBUG) 

95 #: Will continue in :func:`login`'s auth flow this many times (successes and failures). 

96 self.max_auth_attempts: int = max_auth_attempts 

97 #: The path to persist session cookies, defaults to ``conf.DEFAULT_COOKIE_JAR_PATH``. 

98 self.cookie_jar_path: str = cookie_jar_path 

99 #: The I/O handler for echoes and prompts. 

100 self.io: IODefault = io 

101 #: The directory where any output files will be produced, defaults to ``conf.DEFAULT_OUTPUT_DIR``. 

102 self.output_dir = output_dir 

103 

104 #: The shared session to be used across all requests. 

105 self.session: Session = Session() 

106 #: The last response executed on the Session. 

107 self.last_response: Optional[Response] = None 

108 #: A parsed representation of the last response executed on the Session. 

109 self.last_response_parsed: Optional[Tag] = None 

110 #: If :func:`login` has been executed and successfully logged in the session. 

111 self.is_authenticated: bool = False 

112 

113 cookie_dir = os.path.dirname(self.cookie_jar_path) 

114 if not os.path.exists(cookie_dir): 

115 os.makedirs(cookie_dir) 

116 if os.path.exists(self.cookie_jar_path): 

117 with open(self.cookie_jar_path, "r", encoding="utf-8") as f: 

118 data = json.loads(f.read()) 

119 cookies = requests.utils.cookiejar_from_dict(data) 

120 self.session.cookies.update(cookies) 

121 

122 def request(self, 

123 method: str, 

124 url: str, 

125 **kwargs: Any) -> Response: 

126 """ 

127 Execute the request against Amazon with base headers, parsing and storing the response 

128 and persisting response cookies. 

129 

130 :param method: The request method to execute. 

131 :param url: The URL to execute ``method`` on. 

132 :param kwargs: Remaining ``kwargs`` will be passed to :func:`requests.request`. 

133 :return: The Response from the executed request. 

134 """ 

135 if "headers" not in kwargs: 

136 kwargs["headers"] = {} 

137 kwargs["headers"].update(constants.BASE_HEADERS) 

138 

139 logger.debug("{} request to {}".format(method, url)) 

140 

141 self.last_response = self.session.request(method, url, **kwargs) 

142 self.last_response_parsed = BeautifulSoup(self.last_response.text, 

143 "html.parser") 

144 

145 cookies = dict_from_cookiejar(self.session.cookies) 

146 if os.path.exists(self.cookie_jar_path): 

147 os.remove(self.cookie_jar_path) 

148 with open(self.cookie_jar_path, "w", encoding="utf-8") as f: 

149 f.write(json.dumps(cookies)) 

150 

151 logger.debug("Response: {} - {}".format(self.last_response.url, 

152 self.last_response.status_code)) 

153 

154 if self.debug: 

155 page_name = self._get_page_from_url(self.last_response.url) 

156 with open(os.path.join(self.output_dir, page_name), "w", 

157 encoding="utf-8") as html_file: 

158 logger.debug( 

159 "Response written to file: {}".format(html_file.name)) 

160 html_file.write(self.last_response.text) 

161 

162 return self.last_response 

163 

164 def get(self, 

165 url: str, 

166 **kwargs: Any): 

167 """ 

168 Perform a GET request. 

169 

170 :param url: The URL to GET on. 

171 :param kwargs: Remaining ``kwargs`` will be passed to :func:`AmazonSession.request`. 

172 :return: The Response from the executed GET request. 

173 """ 

174 return self.request("GET", url, **kwargs) 

175 

176 def post(self, 

177 url, 

178 **kwargs: Any) -> Response: 

179 """ 

180 Perform a POST request. 

181 

182 :param url: The URL to POST on. 

183 :param kwargs: Remaining ``kwargs`` will be passed to :func:`AmazonSession.request`. 

184 :return: The Response from the executed POST request. 

185 """ 

186 return self.request("POST", url, **kwargs) 

187 

188 def auth_cookies_stored(self): 

189 cookies = dict_from_cookiejar(self.session.cookies) 

190 return cookies.get("session-token") and cookies.get("x-main") 

191 

192 def login(self) -> None: 

193 """ 

194 Execute an Amazon login process. This will include the sign-in page, and may also include Captcha challenges 

195 and OTP pages (of 2FA authentication is enabled on your account). 

196 

197 If successful, ``is_authenticated`` will be set to ``True``. 

198 

199 Session cookies are persisted, and if existing session data is found during this auth flow, it will be 

200 skipped entirely and flagged as authenticated. 

201 """ 

202 self.get(constants.SIGN_IN_URL) 

203 

204 # If our local session data is stale, Amazon will redirect us to the signin page 

205 if self.auth_cookies_stored() and self.last_response.url.split("?")[0] == constants.SIGN_IN_REDIRECT_URL: 

206 self.logout() 

207 self.get(constants.SIGN_IN_URL) 

208 

209 attempts = 0 

210 while not self.is_authenticated and attempts < self.max_auth_attempts: 

211 # TODO: BeautifulSoup doesn't let us query for #nav-item-signout, maybe because it's dynamic on the page, but we should find a better way to do this 

212 if self.auth_cookies_stored() or \ 

213 ("Hello, sign in" not in self.last_response.text and 

214 "nav-item-signout" in self.last_response.text): 

215 self.is_authenticated = True 

216 break 

217 

218 form_found = False 

219 for form in AUTH_FORMS: 

220 if form.select_form(self, self.last_response_parsed): 

221 form_found = True 

222 

223 form.fill_form() 

224 form.submit() 

225 

226 break 

227 

228 if not form_found: 

229 raise AmazonOrdersAuthError( 

230 "An error occurred, this is an unknown page, or its parsed contents don't match a known auth flow: {}. To capture the page to a file, set the `debug` flag.".format( 

231 self.last_response.url)) 

232 

233 attempts += 1 

234 

235 if attempts == self.max_auth_attempts: 

236 raise AmazonOrdersAuthError( 

237 "Max authentication flow attempts reached.") 

238 

239 def logout(self) -> None: 

240 """ 

241 Logout and close the existing Amazon session and clear cookies. 

242 """ 

243 self.get(constants.SIGN_OUT_URL) 

244 

245 if os.path.exists(self.cookie_jar_path): 

246 os.remove(self.cookie_jar_path) 

247 

248 self.session.close() 

249 self.session = Session() 

250 

251 self.is_authenticated = False 

252 

253 def _get_page_from_url(self, 

254 url: str) -> str: 

255 page_name = os.path.basename(urlparse(url).path).strip(".html") 

256 i = 0 

257 while os.path.isfile("{}_{}".format(page_name, 0)): 

258 i += 1 

259 return "{}_{}.html".format(page_name, i)