Coverage for src/epublib/parse.py: 100%

43 statements  

« prev     ^ index     » next       coverage.py v7.10.7, created at 2025-10-06 15:17 -0300

1from zipfile import ZipInfo 

2 

3from epublib.exceptions import NotEPUBError 

4from epublib.package.manifest import ManifestItem 

5from epublib.package.resource import PackageDocument 

6from epublib.resources import Resource, XMLResource 

7from epublib.resources.create import create_resource 

8from epublib.source import SourceProtocol 

9from epublib.util import attr_to_str 

10 

11 

12def parse_container_file(source: SourceProtocol): 

13 """ 

14 Parse the container.xml file at the root of the document. Only 

15 consider the first rootfile. Return also the filename of the package 

16 document 

17 """ 

18 

19 try: 

20 info = source.getinfo("META-INF/container.xml") 

21 except KeyError as error: 

22 raise NotEPUBError("Missing 'META-INF/container.xml'") from error 

23 container = XMLResource(source.open(info), info) 

24 rootfile = container.soup.select_one("rootfile") 

25 

26 if not rootfile: 

27 raise NotEPUBError("Can't find rootfile in container.xml") 

28 

29 package_document_filename = attr_to_str(rootfile.attrs.get("full-path", "")) 

30 

31 if not package_document_filename: 

32 raise NotEPUBError("rootfile in container.xml has no full-path") 

33 

34 return container, package_document_filename 

35 

36 

37def parse_package_document(source: SourceProtocol, filename: str): 

38 """Parse the package document (META-INF/container.xml)""" 

39 

40 info = source.getinfo(filename) 

41 return PackageDocument(source.open(info), info) 

42 

43 

44def init_resource( 

45 source: SourceProtocol, 

46 info: ZipInfo, 

47 manifest_item: ManifestItem | None, 

48): 

49 """Initialize a Resource object from a ZipInfo and optional ManifestItem""" 

50 

51 args = source.open(info), info 

52 

53 if manifest_item is None: 

54 return Resource(*args) 

55 

56 return create_resource( 

57 source.open(info), 

58 info, 

59 manifest_item.media_type, 

60 is_nav=manifest_item.has_property("nav"), 

61 ) 

62 

63 

64def parse( 

65 source: SourceProtocol, 

66) -> tuple[XMLResource, PackageDocument, list[Resource]]: 

67 """Read and parse the EPUB file from the source""" 

68 container_file, package_document_filename = parse_container_file(source) 

69 package_document = parse_package_document(source, package_document_filename) 

70 

71 resources: list[Resource] = [] 

72 for info in source.infolist(): 

73 if info.is_dir(): 

74 continue 

75 

76 manifest_item = package_document.manifest.get(info.filename) 

77 if info.filename == package_document.filename: 

78 resources.append(package_document) 

79 elif info.filename == container_file.filename: 

80 resources.append(container_file) 

81 else: 

82 resources.append(init_resource(source, info, manifest_item)) 

83 

84 return container_file, package_document, resources