Coverage for src/tyora/session.py: 90%
71 statements
« prev ^ index » next coverage.py v7.5.1, created at 2024-05-24 14:35 -0400
« prev ^ index » next coverage.py v7.5.1, created at 2024-05-24 14:35 -0400
1import importlib.metadata
2import logging
3import os
4import sys
5from typing import AnyStr, Optional
6from urllib.parse import urljoin
8import html5lib
9import requests
10from requests_toolbelt import user_agent
12logger = logging.getLogger(__name__)
14try:
15 __version__ = importlib.metadata.version("tyora")
16except importlib.metadata.PackageNotFoundError:
17 __version__ = "unknown"
20class MoocfiCsesSession(requests.Session):
21 def __init__(
22 self,
23 username: str,
24 password: str,
25 base_url: str,
26 cookies: Optional[dict] = None,
27 *args,
28 **kwargs,
29 ):
30 super().__init__(*args, **kwargs)
32 self.username = username
33 self.password = password
34 self.base_url = base_url
36 if cookies:
37 self.cookies.update(cookies)
39 self.headers.update(
40 {"User-Agent": user_agent(os.path.basename(sys.argv[0]), __version__)}
41 )
43 @property
44 def is_logged_in(self) -> bool:
45 res = self.get(urljoin(self.base_url, "list"))
46 res.raise_for_status()
47 login_link = find_link(res.text, './/a[@class="account"]')
48 login_text = login_link.get("text") or ""
49 return self.username in login_text
51 def login(self) -> None:
52 """Log into the site using webscraping
54 Steps:
55 - checks if already logged in
56 - retrieves base URL
57 - finds and retrieves login URL
58 - finds and submits login form
59 - checks if logged in
60 """
61 if self.is_logged_in:
62 return
64 res = self.get(urljoin(self.base_url, "list"))
65 res.raise_for_status()
66 login_link = find_link(res.text, './/a[@class="account"]')
67 if login_link:
68 login_url = urljoin(res.url, login_link.get("href"))
69 else:
70 logger.debug(
71 f"url: {res.url}, status: {res.status_code}\nhtml:\n{res.text}"
72 )
73 raise ValueError("Failed to find login url")
75 res = self.get(login_url, headers={"referer": res.url})
76 login_form = parse_form(res.text, ".//form")
77 if login_form:
78 action = login_form.get("_action")
79 login_form.pop("_action")
80 else:
81 logger.debug(
82 f"url: {res.url}, status: {res.status_code}\nhtml:\n{res.text}"
83 )
84 raise ValueError("Failed to find login form")
86 login_form["session[login]"] = self.username
87 login_form["session[password]"] = self.password
89 self.post(
90 url=urljoin(res.url, action),
91 headers={"referer": res.url},
92 data=login_form,
93 )
95 if not self.is_logged_in:
96 logger.debug(
97 f"url: {res.url}, status: {res.status_code}\nhtml:\n{res.text}"
98 )
99 raise ValueError("Login failed")
102def find_link(html: AnyStr, xpath: str) -> dict[str, Optional[str]]:
103 """Search for html link by xpath and return dict with href and text"""
104 anchor_element = html5lib.parse(html, namespaceHTMLElements=False).find(xpath)
105 if anchor_element is None:
106 return dict()
108 link_data = dict()
109 link_data["href"] = anchor_element.get("href")
110 link_data["text"] = anchor_element.text
112 return link_data
115def parse_form(html: AnyStr, xpath: str = ".//form") -> dict:
116 """Search for the first form in html and return dict with action and all other found inputs"""
117 form_element = html5lib.parse(html, namespaceHTMLElements=False).find(xpath)
118 form_data = dict()
119 if form_element is not None:
120 form_data["_action"] = form_element.get("action")
121 for form_input in form_element.iter("input"):
122 form_key = form_input.get("name") or ""
123 form_value = form_input.get("value") or ""
124 form_data[form_key] = form_value
126 return form_data