Coverage for src/epublib/parse.py: 100%

45 statements  

« prev     ^ index     » next       coverage.py v7.10.6, created at 2025-09-18 16:07 -0300

1from zipfile import ZipInfo 

2 

3from epublib.exceptions import NotEPUBError 

4from epublib.mediatype import MediaType 

5from epublib.package.manifest import ManifestItem 

6from epublib.package.resource import PackageDocument 

7from epublib.resources import Resource, XMLResource 

8from epublib.resources.create import create_resource 

9from epublib.source import SourceProtocol 

10from epublib.util import attr_to_str 

11 

12 

13def parse_container_file(source: SourceProtocol): 

14 """ 

15 Parse the container.xml file at the root of the document. Only 

16 consider the first rootfile. Return also the filename of the package 

17 document 

18 """ 

19 

20 try: 

21 info = source.getinfo("META-INF/container.xml") 

22 except KeyError as error: 

23 raise NotEPUBError("Missing 'META-INF/container.xml'") from error 

24 container = XMLResource(source.open(info), info) 

25 rootfile = container.soup.select_one("rootfile") 

26 

27 if not rootfile: 

28 raise NotEPUBError("Can't find rootfile in container.xml") 

29 

30 package_document_filename = attr_to_str(rootfile.attrs.get("full-path", "")) 

31 

32 if not package_document_filename: 

33 raise NotEPUBError("rootfile in container.xml has no full-path") 

34 

35 return container, package_document_filename 

36 

37 

38def parse_package_document(source: SourceProtocol, filename: str): 

39 """Parse the package document (META-INF/container.xml)""" 

40 

41 info = source.getinfo(filename) 

42 return PackageDocument(source.open(info), info) 

43 

44 

45def init_resource( 

46 source: SourceProtocol, 

47 info: ZipInfo, 

48 manifest_item: ManifestItem | None, 

49): 

50 """Initialize a Resource object from a ZipInfo and optional ManifestItem""" 

51 

52 args = source.open(info), info 

53 

54 if manifest_item is None: 

55 return Resource(*args) 

56 

57 media_type = MediaType.coalesce(manifest_item.media_type) 

58 

59 return create_resource( 

60 source.open(info), 

61 info, 

62 media_type, 

63 is_nav=manifest_item.has_property("nav"), 

64 ) 

65 

66 

67def parse( 

68 source: SourceProtocol, 

69) -> tuple[XMLResource, PackageDocument, list[Resource]]: 

70 """Read and parse the EPUB file from the source""" 

71 container_file, package_document_filename = parse_container_file(source) 

72 package_document = parse_package_document(source, package_document_filename) 

73 

74 resources: list[Resource] = [] 

75 for info in source.infolist(): 

76 if info.is_dir(): 

77 continue 

78 

79 manifest_item = package_document.manifest.get(info.filename) 

80 if info.filename == package_document.filename: 

81 resources.append(package_document) 

82 elif info.filename == container_file.filename: 

83 resources.append(container_file) 

84 else: 

85 resources.append(init_resource(source, info, manifest_item)) 

86 

87 return container_file, package_document, resources