Recipes
Recipes source code is in the /recipes
directory of odfdo
sources.
Most recipes are autonomous scripts doing actual modifications of ODF sample files, you can check the results in the recipes/recipes_output
directory.
How to write hello world in a text document
Create a basic spreadsheet with “Hello World” in the first cell.
recipes/how_to_write_hello_world_in_a_text_document.py
#!/usr/bin/env python
"""Create a basic spreadsheet with "Hello World" in the first cell.
"""
import os
from pathlib import Path
from odfdo import Document, Paragraph
_DOC_SEQUENCE = 3
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "basic_hello"
TARGET = "document.odt"
def save_new(document: Document, name: str):
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
new_path = OUTPUT_DIR / name
print("Saving:", new_path)
document.save(new_path, pretty=True)
def main():
document = Document("text")
body = document.body
body.clear()
paragraph = Paragraph("Hello World")
body.append(paragraph)
test_unit(document)
save_new(document, TARGET)
def test_unit(document: Document) -> None:
# only for test suite:
if "ODFDO_TESTING" not in os.environ:
return
text = str(document.body)
print(text)
assert text == "Hello World\n"
if __name__ == "__main__":
main()
How to write hello world in a spreadsheet document
Create a basic spreadsheet with “Hello World” in the first cell.
recipes/how_to_write_hello_world_in_a_spreadsheet_document.py
#!/usr/bin/env python
"""Create a basic spreadsheet with "Hello World" in the first cell.
"""
import os
from pathlib import Path
from odfdo import Document, Table
_DOC_SEQUENCE = 5
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "basic_ods"
TARGET = "document.ods"
def save_new(document: Document, name: str):
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
new_path = OUTPUT_DIR / name
print("Saving:", new_path)
document.save(new_path, pretty=True)
def main():
document = Document("spreadsheet")
body = document.body
body.clear()
table = Table("Empty Table")
table.set_value("A1", "Hello World")
body.append(table)
test_unit(document)
save_new(document, TARGET)
def test_unit(document: Document) -> None:
# only for test suite:
if "ODFDO_TESTING" not in os.environ:
return
text = document.body.get_table(0).get_cell((0, 0)).value.strip()
print(text)
assert text == "Hello World"
if __name__ == "__main__":
main()
Basic presentation hello world
Write a basic “Hello World” in the middle of the first page of a presentation.
recipes/basic_presentation_hello_world.py
#!/usr/bin/env python
"""Write a basic "Hello World" in the middle of the first page
of a presentation.
"""
import os
from pathlib import Path
from odfdo import Document, DrawPage, Frame
_DOC_SEQUENCE = 7
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "basic_odp"
TARGET = "hello.odp"
def save_new(document: Document, name: str) -> None:
"""Save a recipe result Document."""
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
new_path = OUTPUT_DIR / name
print("Saving:", new_path)
document.save(new_path, pretty=True)
def add_text_frame(document: Document, text: str) -> None:
"""Add a text frame to an empty presentation."""
body = document.body
body.clear()
page = DrawPage("page1", name="Page 1")
text_frame = Frame.text_frame(
text,
size=("7cm", "5cm"),
position=("11cm", "8cm"),
style="Standard",
text_style="Standard",
)
page.append(text_frame)
body.append(page)
def main() -> None:
document = Document("presentation")
add_text_frame(document, "Hello world!")
test_unit(document)
save_new(document, TARGET)
def test_unit(document: Document) -> None:
# only for test suite:
if "ODFDO_TESTING" not in os.environ:
return
frames = document.body.get_frames()
assert len(frames) == 1
assert str(frames[0]).strip() == "Hello world!"
if __name__ == "__main__":
main()
Create a basic text document
Create a basic text document with headers and praragraphs.
recipes/create_a_basic_text_document.py
#!/usr/bin/env python
"""Create a basic text document with headers and praragraphs.
"""
import os
from pathlib import Path
from odfdo import Document, Header, Paragraph
_DOC_SEQUENCE = 10
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "basic_text"
TARGET = "document.odt"
def save_new(document: Document, name: str):
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
new_path = OUTPUT_DIR / name
print("Saving:", new_path)
document.save(new_path, pretty=True)
def create_basic_document():
document = Document("text")
body = document.body
body.clear()
body.append(Header(1, "De la Guerre des Gaules - Livre V"))
body.append(Header(2, "Préparatifs d'expédition en Bretagne"))
body.append(
Paragraph(
"Sous le consulat de Lucius Domitius et d'Appius Claudius, "
"César, quittant les quartiers d'hiver pour aller en Italie, "
"comme il avait coutume de le faire chaque année, ordonne aux "
"lieutenants qu'il laissait à la tête des légions de construire, "
"pendant l'hiver, le plus de vaisseaux qu'il serait possible, "
"et de réparer les anciens."
)
)
body.append(Header(2, "La Bretagne"))
body.append(
Paragraph(
"Cette île est de forme triangulaire ; l'un des côtés regarde "
"la Gaule. Des deux angles de ce côté, l'un est au levant, "
"vers le pays de Cantium, où abordent presque tous les vaisseaux "
"gaulois ; l'autre, plus bas, est au midi. La longueur de ce côté "
"est d'environ cinq cent mille pas. "
)
)
return document
def main():
document = create_basic_document()
test_unit(document)
save_new(document, TARGET)
def test_unit(document: Document) -> None:
# only for test suite:
if "ODFDO_TESTING" not in os.environ:
return
text = str(document.body.get_paragraph(position=1))
print(text)
assert text.startswith("Cette île est de forme triangulaire")
if __name__ == "__main__":
main()
How to add a paragraph to a text document
Minimal example of how to add a paragraph.
recipes/how_to_add_a_paragraph_to_a_text_document.py
"""Minimal example of how to add a paragraph.
"""
from odfdo import Document, Paragraph
_DOC_SEQUENCE = 12
def main():
document = Document("text")
body = document.body
# create a new paragraph with some content :
paragraph = Paragraph("Hello World")
body.append(paragraph)
if __name__ == "__main__":
main()
Create a basic text document with a list
Create a basic text document with a list.
recipes/create_a_basic_text_document_with_a_list.py
#!/usr/bin/env python
"""Create a basic text document with a list.
"""
import os
from pathlib import Path
from odfdo import Document, List, ListItem
_DOC_SEQUENCE = 20
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "basic_list"
TARGET = "document.odt"
def save_new(document: Document, name: str):
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
new_path = OUTPUT_DIR / name
print("Saving:", new_path)
document.save(new_path, pretty=True)
def main():
document = generate_document()
test_unit(document)
save_new(document, TARGET)
def generate_document():
# Create the document
document = Document("text")
body = document.body
# Adding List
my_list = List(["Arthur", "Ford", "Trillian"])
# The list accepts a Python list of strings and list items.
# The list can be written even though we will modify it afterwards:
body.append(my_list)
# Adding more List Item to the list
item = ListItem("Marvin")
my_list.append_item(item)
# it should contain:
print(document.get_formatted_text())
# - Arthur
# - Ford
# - Trillian
# - Marvin
return document
def test_unit(document: Document) -> None:
# only for test suite:
if "ODFDO_TESTING" not in os.environ:
return
assert str(document).strip() == "- Arthur\n- Ford\n- Trillian\n- Marvin"
if __name__ == "__main__":
main()
Create a basic text document with list and sublists
Create a basic text document with list and sublists.
recipes/create_a_basic_text_document_with_list_and_sublists.py
#!/usr/bin/env python
"""Create a basic text document with list and sublists.
"""
import os
from pathlib import Path
from odfdo import Document, List, ListItem
_DOC_SEQUENCE = 25
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "basic_sublist"
TARGET = "document.odt"
def save_new(document: Document, name: str):
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
new_path = OUTPUT_DIR / name
print("Saving:", new_path)
document.save(new_path, pretty=True)
def main():
document = generate_document()
test_unit(document)
save_new(document, TARGET)
def generate_document():
document = Document("text")
body = document.body
# Adding List
name_list = List(["Arthur", "Ford", "Trillian"])
item = ListItem("Marvin")
name_list.append_item(item)
body.append(name_list)
# Adding Sublist¶
# A sublist is simply a list as an item of another list:
item.append(List(["Paranoid Android", "older than the universe"]))
# See the result:
print(document.get_formatted_text())
# - Arthur
# - Ford
# - Trillian
# - Marvin
# - Paranoid Android
# - older than the universe
# Inserting List Item
# In case your forgot to insert an item:
name_list.insert_item("some dolphins", position=1)
# Or you can insert it before another item:
marvin = name_list.get_item(content="Marvin")
name_list.insert_item("Zaphod", before=marvin)
# Or after:
name_list.insert_item("and many others", after=marvin)
# See the result:
print(document.get_formatted_text())
# - Arthur
# - some dolphins
# - Ford
# - Trillian
# - Zaphod
# - Marvin
# - Paranoid Android
# - older than the universe
# - and many others
#
return document
def test_unit(document: Document) -> None:
# only for test suite:
if "ODFDO_TESTING" not in os.environ:
return
assert document.get_formatted_text().strip() == (
"- Arthur\n"
"- some dolphins\n"
"- Ford\n"
"- Trillian\n"
"- Zaphod\n"
"- Marvin\n"
" \n"
" - Paranoid Android\n"
" - older than the universe\n"
"- and many others"
)
if __name__ == "__main__":
main()
How to add a sublist to a list
Minimal example of how to add a paragraph.
recipes/how_to_add_a_sublist_to_a_list.py
"""Minimal example of how to add a paragraph.
"""
from odfdo import Document, List, ListItem
_DOC_SEQUENCE = 27
def main():
document = Document("text")
body = document.body
my_list = List(["chocolat", "café"])
body.append(my_list)
item = ListItem("thé")
my_list.append(item)
# A sublist is simply a list as an item of another list
item.append(List(["thé vert", "thé rouge"]))
print(body.serialize(True))
if __name__ == "__main__":
main()
How to insert a new item within a list
Minimal example of how to insert a new item within a list.
recipes/how_to_insert_a_new_item_within_a_list.py
"""Minimal example of how to insert a new item within a list.
"""
from odfdo import List
_DOC_SEQUENCE = 28
def main():
a_list = List(["chocolat", "café"])
# In case your forgot to insert an important item:
a_list.insert_item("Chicorée", position=1)
# Or you can insert it before another item:
cafe = a_list.get_item(content="café")
a_list.insert_item("Chicorée", before=cafe)
# Or after:
a_list.insert_item("Chicorée", after=cafe)
if __name__ == "__main__":
main()
How to add an item to a list
Minimal example of how to add an item to a list.
recipes/how_to_add_an_item_to_a_list.py
"""Minimal example of how to add an item to a list.
"""
from odfdo import List, ListItem
_DOC_SEQUENCE = 28
def main():
a_list = List(["chocolat", "café"])
item = ListItem("thé")
a_list.append(item)
if __name__ == "__main__":
main()
Get text content from odt file
Read the text content from an .odt file.
recipes/get_text_content_from_odt_file.py
#!/usr/bin/env python
"""Read the text content from an .odt file."""
import os
import sys
from pathlib import Path
from odfdo import Document
_DOC_SEQUENCE = 30
DATA = Path(__file__).parent / "data"
# ODF export of Wikipedia article Hitchhiker's Guide to the Galaxy (CC-By-SA) :
SOURCE = "collection2.odt"
def read_source_document() -> Document:
"""Return the source Document."""
try:
source = sys.argv[1]
except IndexError:
source = DATA / SOURCE
return Document(source)
def read_text_content(document: Document) -> str:
# just verify what type of document it is:
print("Type of document:", document.get_type())
# A quick way to get the text content:
text = document.get_formatted_text()
print("Size :", len(text))
# Let's show the beginning :
print(text[:320])
return text
def main() -> None:
document = read_source_document()
text = read_text_content(document)
test_unit(text)
def test_unit(text: str) -> None:
# only for test suite:
if "ODFDO_TESTING" not in os.environ:
return
assert len(text) == 56828
if __name__ == "__main__":
main()
Create a basic text document with a table of content
Create a basic text document with a table of content.
recipes/create_a_basic_text_document_with_a_table_of_content.py
#!/usr/bin/env python
"""Create a basic text document with a table of content.
"""
import os
from pathlib import Path
from odfdo import TOC, Document, Header, Paragraph
_DOC_SEQUENCE = 35
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "basic_toc"
DATA = Path(__file__).parent / "data"
LOREM = (DATA / "lorem.txt").read_text(encoding="utf8")
TARGET = "document.odt"
def save_new(document: Document, name: str):
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
new_path = OUTPUT_DIR / name
print("Saving:", new_path)
document.save(new_path, pretty=True)
def main():
document = Document("text")
make_toc(document)
save_new(document, TARGET)
def make_toc(document):
# Create the document
body = document.body
# Create the Table Of Content
toc = TOC()
# Changing the default "Table Of Content" Title :
toc.title = "My Table of Content"
# Do not forget to add the component to the document:
body.append(toc)
# Add some content with headers
title1 = Header(1, LOREM[:70])
body.append(title1)
for idx in range(3):
title = Header(2, LOREM[idx * 5 : 70 + idx * 5])
body.append(title)
paragraph = Paragraph(LOREM)
body.append(paragraph)
# Beware, update the TOC with the actual content. If not done there,
# the reader will need to "update the table of content" later.
toc.fill()
# only for test suite:
if "ODFDO_TESTING" in os.environ:
assert str(toc).split("\n")[2] == (
"1.1. Lorem ipsum dolor sit amet, consectetuer "
"adipiscing elit. Sed non risu"
)
if __name__ == "__main__":
main()
How to add a table of content to a document
Adding a table of content to an existing text document.
recipes/how_to_add_a_table_of_content_to_a_document.py
#!/usr/bin/env python
"""Adding a table of content to an existing text document.
"""
from pathlib import Path
from odfdo import TOC, Document, Paragraph, Style
_DOC_SEQUENCE = 37
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "add_toc"
TARGET = "document.odt"
DATA = Path(__file__).parent / "data"
SOURCE = DATA / "collection.odt"
def save_new(document: Document, name: str):
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
new_path = OUTPUT_DIR / name
print("Saving:", new_path)
document.save(new_path, pretty=True)
def main():
document = Document(SOURCE)
body = document.body
# here is a way to insert a page break:
page_break_style = Style("paragraph", name="page_break")
page_break_style.set_properties({"fo:break-before": "page"})
document.insert_style(page_break_style)
empty_paragraph = Paragraph("", style="page_break")
body.insert(empty_paragraph, 0)
# The TOC element comes from the toc module
toc = TOC()
# to put the TOC at the end, just do:
# body.append(toc)
body.insert(toc, 0)
# fill the toc with current content of document:
toc.fill()
save_new(document, TARGET)
if __name__ == "__main__":
main()
Update a text document with a table of content
Update the table of contents of a document.
recipes/update_a_text_document_with_a_table_of_content.py
#!/usr/bin/env python
"""Update the table of contents of a document.
"""
from pathlib import Path
from odfdo import Document, Header, Paragraph
_DOC_SEQUENCE = 38
DATA = Path(__file__).parent / "data"
SOURCE = "doc_with_toc.odt"
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "modified_toc"
TARGET = "document.odt"
def save_new(document: Document, name: str) -> None:
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
new_path = OUTPUT_DIR / name
print("Saving:", new_path)
document.save(new_path, pretty=True)
def main() -> None:
document = Document(DATA / SOURCE)
update_toc(document)
save_new(document, TARGET)
def update_toc(document: Document) -> None:
check_toc_v1(document)
add_some_header(document)
check_toc_v2(document)
change_toc_title(document)
check_toc_v3(document)
change_toc_title_to_empty(document)
check_toc_v4(document)
remove_second_header_1b(document)
check_toc_v5(document)
add_toc_title(document)
check_toc_v6(document)
def check_toc_v1(document: Document) -> None:
toc = document.body.toc
content = str(toc).split("\n")
assert len(content) == 5
assert content[0].startswith("Table of Contents")
assert content[1].startswith("1. Lorem 1")
assert content[2].startswith("1.1. Lorem 1A")
assert content[3].startswith("1.2. Lorem 1B")
assert content[4].startswith("1.3. Lorem 1C")
def add_some_header(document: Document) -> None:
header = Header(1, "New header")
document.body.append(header)
document.body.append(Paragraph("Some text after the new header."))
# update the table of contents
toc = document.body.toc
toc.fill(document)
def check_toc_v2(document: Document) -> None:
toc = document.body.toc
content = str(toc).split("\n")
assert len(content) == 6
assert content[0].startswith("Table of Contents")
assert content[1].startswith("1. Lorem 1")
assert content[2].startswith("1.1. Lorem 1A")
assert content[3].startswith("1.2. Lorem 1B")
assert content[4].startswith("1.3. Lorem 1C")
assert content[5].startswith("2. New header")
def change_toc_title(document: Document) -> None:
toc = document.body.toc
toc.set_toc_title("Another title")
toc.fill(document)
def check_toc_v3(document: Document) -> None:
toc = document.body.toc
content = str(toc).split("\n")
assert len(content) == 6
assert content[0].startswith("Another title")
def change_toc_title_to_empty(document: Document) -> None:
toc = document.body.toc
toc.set_toc_title("") # that will remove the title
toc.fill(document)
def check_toc_v4(document: Document) -> None:
toc = document.body.toc
content = str(toc).split("\n")
assert len(content) == 5
assert content[0].startswith("1. Lorem 1")
assert content[1].startswith("1.1. Lorem 1A")
assert content[2].startswith("1.2. Lorem 1B")
assert content[3].startswith("1.3. Lorem 1C")
assert content[4].startswith("2. New header")
def remove_second_header_1b(document: Document) -> None:
# find second header:
header = document.body.get_header(position=2)
# this 'header' variable is attached to the document, so
# deleting will remove the element from the document
header.delete()
toc = document.body.toc
toc.fill(document)
def check_toc_v5(document: Document) -> None:
toc = document.body.toc
content = str(toc).split("\n")
assert len(content) == 4
assert content[0].startswith("1. Lorem 1")
assert content[1].startswith("1.1. Lorem 1A")
assert content[2].startswith("1.2. Lorem 1C")
assert content[3].startswith("2. New header")
def add_toc_title(document: Document) -> None:
toc = document.body.toc
toc.set_toc_title("A new title")
toc.fill(document)
def check_toc_v6(document: Document) -> None:
toc = document.body.toc
content = str(toc).split("\n")
assert len(content) == 5
assert content[0].startswith("A new title")
assert content[1].startswith("1. Lorem 1")
assert content[2].startswith("1.1. Lorem 1A")
assert content[3].startswith("1.2. Lorem 1C")
assert content[4].startswith("2. New header")
if __name__ == "__main__":
main()
Create a basic text document with annotations
Create a basic text document with annotations.
recipes/create_a_basic_text_document_with_annotations.py
#!/usr/bin/env python
"""Create a basic text document with annotations.
"""
import os
from pathlib import Path
from odfdo import Document, Header, Paragraph
_DOC_SEQUENCE = 40
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "basic_annotations"
DATA = Path(__file__).parent / "data"
LOREM = (DATA / "lorem.txt").read_text(encoding="utf8")
TARGET = "document.odt"
def save_new(document: Document, name: str):
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
new_path = OUTPUT_DIR / name
print("Saving:", new_path)
document.save(new_path, pretty=True)
def main():
document = Document("text")
make_annotations(document)
test_unit(document)
save_new(document, TARGET)
def make_annotations(document):
body = document.body
title1 = Header(1, "Main title")
body.append(title1)
for index in range(3):
title = Header(2, f"title {index}")
body.append(title)
paragraph = Paragraph(LOREM[:240])
# Adding Annotation
# Annotations are notes that don't appear in the document but
# typically on a side bar in a desktop application. So they are not printed.
# Now we add some annotation on each paragraph
some_word = str(paragraph).split()[3]
# choosing the 4th word of the paragraph to insert the note
paragraph.insert_annotation(
after=some_word, # The word after what the annotation is inserted.
body="It's so easy!", # The annotation itself, at the end of the page.
creator="Bob", # The author of the annotation.
# date= xxx A datetime value, by default datetime.now().
)
body.append(paragraph)
def test_unit(document: Document) -> None:
# only for test suite:
if "ODFDO_TESTING" not in os.environ:
return
assert len(document.body.get_annotations(creator="Bob")) == 3
if __name__ == "__main__":
main()
Create a basic text document with footnotes
Create a basic text document with footnotes.
recipes/create_a_basic_text_document_with_footnotes.py
#!/usr/bin/env python
"""Create a basic text document with footnotes.
"""
import os
from pathlib import Path
from odfdo import Document, Header, Paragraph
_DOC_SEQUENCE = 45
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "basic_footnotes"
DATA = Path(__file__).parent / "data"
LOREM = (DATA / "lorem.txt").read_text(encoding="utf8")
TARGET = "document.odt"
def save_new(document: Document, name: str):
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
new_path = OUTPUT_DIR / name
print("Saving:", new_path)
document.save(new_path, pretty=True)
def main():
document = Document("text")
make_footnotes(document)
test_unit(document)
save_new(document, TARGET)
def make_footnotes(document):
body = document.body
# Add content (See Create_a_basic_document.py)
title1 = Header(1, "Main title")
body.append(title1)
for index in range(3):
title = Header(2, f"title {index}")
body.append(title)
paragraph = Paragraph(LOREM[:240])
# Adding Footnote
# Now we add a footnote on each paragraph
# Notes are quite complex so they deserve a dedicated API on paragraphs:
some_word = str(paragraph).split()[3]
# choosing the 4th word of the paragraph to insert the note
paragraph.insert_note(
after=some_word, # The word after what the “¹” citation is inserted.
note_id=f"note{index}", # The unique identifier of the note in the document.
citation="1", # The symbol the user sees to follow the footnote.
body=(
f'Author{index}, A. (2007). "How to cite references", Sample Editions.'
# The footnote itself, at the end of the page.
),
)
body.append(paragraph)
def test_unit(document: Document) -> None:
# only for test suite:
if "ODFDO_TESTING" not in os.environ:
return
assert len(document.body.get_notes()) == 3
if __name__ == "__main__":
main()
How to add footnote to a text document
Minimal example of how to add an footnote to a text document.
recipes/how_to_add_footnote_to_a_text_document.py
"""Minimal example of how to add an footnote to a text document.
"""
from odfdo import Document, Paragraph
_DOC_SEQUENCE = 47
def main():
document = Document("text")
body = document.body
body.clear()
paragraph = Paragraph("A paragraph with a footnote about some references.")
body.append(paragraph)
# Notes are quite complex so they deserve a dedicated API on paragraphs:
paragraph.insert_note(
after="graph",
note_id="note1",
citation="1",
body='Author, A. (2007). "How to cite references" New York: McGraw-Hill.',
)
# That looks complex so we detail the arguments:
#
# after => The word after what the “¹” citation is inserted.
# note_id => The unique identifier of the note in the document.
# citation => The symbol the user sees to follow the footnote.
# body => The footnote itself, at the end of the page.
#
# odfdo creates footnotes by default. To create endnotes (notes
# that appear at the end of the document), give the
# note_class='endnote' parameter.
if __name__ == "__main__":
main()
Create a text document with tables in it
Build a commercial document, with numerical values displayed in both the text and in a table.
recipes/create_a_text_document_with_tables_in_it.py
#!/usr/bin/env python
"""Build a commercial document, with numerical values displayed in
both the text and in a table.
"""
import os
from pathlib import Path
from odfdo import (
Cell,
Document,
Header,
List,
ListItem,
Paragraph,
Row,
Table,
create_table_cell_style,
make_table_cell_border_string,
)
_DOC_SEQUENCE = 50
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "commercial"
TARGET = "commercial.odt"
TAX_RATE = 0.20
class Product:
"""Minimalistic Product."""
def __init__(self, reference: int, name: str, price: float) -> None:
self.reference = reference
self.name = f"Product {name}"
self.price = price
class OrderLine:
"""Line of an Order."""
def __init__(self, reference: int, quantity: int) -> None:
self.reference = reference
self.quantity = quantity
def make_product_catalog() -> list[Product]:
"""Generate a list of Product."""
catalog: list[Product] = []
price = 10.0
for index in range(5):
catalog.append(Product(index, chr(65 + index), price))
price += 10.5
return catalog
def make_order(catalog: list[Product]) -> list[OrderLine]:
"""Generate purchase order list."""
order: list[OrderLine] = []
quantity = 1
for product in catalog:
quantity = int(quantity * 2.5)
order.append(OrderLine(product.reference, quantity))
return order
def save_new(document: Document, name: str) -> None:
"""Save a recipe result Document."""
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
new_path = OUTPUT_DIR / name
print("Saving:", new_path)
document.save(new_path, pretty=True)
def create_header_cell_style(doc: Document) -> str:
"""Create a Cell style, insert it in Document, return its name."""
border = make_table_cell_border_string(thick="0.03cm", color="black")
cell_style = create_table_cell_style(
color="black",
background_color=(210, 210, 210),
border_right=border,
border_left=border,
border_bottom=border,
border_top=border,
)
style_name = doc.insert_style(style=cell_style, automatic=True)
return style_name
def add_top_content(doc: Document, catalog: list[Product]) -> None:
"""Add some descriptive content to the document."""
body = doc.body
title1 = Header(1, "Basic commercial document")
body.append(title1)
title11 = Header(2, "Available products")
body.append(title11)
paragraph = Paragraph("Here the list:")
body.append(paragraph)
# List of products in a list :
product_list = List() # odfdo.List
body.append(product_list)
for product in catalog:
item = ListItem(f"{product.name:<10}, price: {product.price:.2f} €")
product_list.append(item)
def add_order_table(
doc: Document, catalog: list[Product], order: list[OrderLine]
) -> None:
"""Add a table with order lines."""
body = doc.body
title12 = Header(2, "Your order")
body.append(title12)
style_name = create_header_cell_style(doc)
table = make_order_table(catalog, order, style_name)
body.append(table)
def make_order_table(
catalog: list[Product],
order: list[OrderLine],
style_name: str,
) -> Table:
"""Build the order table."""
table = Table("Table")
# Header of table
row = Row()
row.set_values(["Product", "Price", "Quantity", "Amount"])
table.set_row("A1", row)
# or: table.set_row(0, row)
# Add a row for each order line
row_number = 0
for line in order:
row_number += 1
product = catalog[line.reference]
row = Row()
row.set_value("A", product.name)
# or : row.set_value(0, product.name)
cell = Cell()
cell.set_value(
product.price,
text=f"{product.price:.2f} €",
currency="EUR",
cell_type="float",
)
row.set_cell("B", cell)
# or : row.set_cell(1, cell)
row.set_value("C", line.quantity)
# row.set_value(2, line.quantity)
price = product.price * line.quantity
cell = Cell()
cell.set_value(
price,
text=f"{price:.2f} €",
currency="EUR",
cell_type="float",
)
row.set_cell("D", cell)
table.set_row(row_number, row)
# Total lines
# add a merged empty row
row = Row()
row_number += 1
table.set_row(row_number, row)
table.set_span((0, row_number, 3, row_number))
# compute total line
row = Row()
row_number += 1
row.set_value(0, "Total:")
total = sum(table.get_column_values(3)[1:-1])
# note: total is a Decimal
cell = Cell()
cell.set_value(
total,
text=f"{total:.2f} €",
currency="EUR",
cell_type="float",
)
row.set_cell(3, cell)
table.set_row(row_number, row)
# merge the 3 first columns for this row:
table.set_span((0, row_number, 2, row_number), merge=True)
# compute VAT line
row = Row()
row_number += 1
row.set_value(0, "Total with tax:")
total_vat = float(total) * (1 + TAX_RATE)
cell = Cell()
cell.set_value(
total_vat,
text=f"{total_vat:.2f} €",
currency="EUR",
cell_type="float",
)
row.set_cell(3, cell)
table.set_row(row_number, row)
table.set_span((0, row_number, 2, row_number), merge=True)
# Let's add some style on header row
row = table.get_row(0)
for cell in row.traverse():
cell.style = style_name
row.set_cell(x=cell.x, cell=cell)
table.set_row(row.y, row)
return table
def generate_commercial(catalog: list[Product], order: list[OrderLine]) -> Document:
"""Generate a Text Document with table in in."""
document = Document("text")
add_top_content(document, catalog)
add_order_table(document, catalog, order)
return document
def main() -> None:
catalog = make_product_catalog()
order = make_order(catalog)
document = generate_commercial(catalog, order)
test_unit(document)
save_new(document, TARGET)
def test_unit(document: Document) -> None:
# only for test suite:
if "ODFDO_TESTING" not in os.environ:
return
table = document.body.get_table(name="Table")
assert isinstance(table, Table)
assert table.get_cell("A1").value == "Product"
assert table.get_cell("A2").value == "Product A"
assert table.get_cell("A8").value == "Total:"
assert table.get_cell("B1").value == "Price"
assert table.get_cell("C1").value == "Quantity"
assert table.get_cell("C2").value == 2
assert table.get_cell("D1").value == "Amount"
if __name__ == "__main__":
main()
How to add a table to a document
Minimal example of how to add a table to a text document.
recipes/how_to_add_a_table_to_a_document.py
"""Minimal example of how to add a table to a text document."""
import os
from odfdo import Document, Header, Paragraph, Table
_DOC_SEQUENCE = 55
def generate_document() -> Document:
"""Add a 3x3 table to a new document."""
document = Document("text")
body = document.body
# Let's add another section to make our document clear:
body.append(Header(1, "Tables"))
body.append(Paragraph("A 3x3 table:"))
# Creating a table :
table = Table("Table 1", width=3, height=3)
body.append(table)
return document
def main() -> None:
document = generate_document()
test_unit(document)
def test_unit(document: Document) -> None:
# only for test suite:
if "ODFDO_TESTING" not in os.environ:
return
table = document.body.get_table(0)
assert table.size == (3, 3)
if __name__ == "__main__":
main()
Create a text document from plain text with layout
Create a document with styles.
We want to:
-
remove standard styles from the document
-
set some styles grabed from a styles.xml ODF file (or generated)
-
insert plain “python” text, containing some , , and spaces
recipes/create_a_text_document_from_plain_text_with_layout.py
#!/usr/bin/env python
"""Create a document with styles.
We want to:
- remove standard styles from the document
- set some styles grabed from a styles.xml ODF file (or generated)
- insert plain "python" text, containing some \t , \n, and spaces
"""
from pathlib import Path
from odfdo import Document, Element, Paragraph, Style
_DOC_SEQUENCE = 60
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "styled2"
TARGET = "document.odt"
# Element is the base class of all odfdo classes.
# Element.from_tag permits the creation of any ODF XML tag
# some font styles :
_style_font_1 = Element.from_tag(
'<style:font-face style:name="OpenSymbol" svg:font-family="OpenSymbol"/>'
)
_style_font_2 = Element.from_tag(
'<style:font-face style:name="Liberation Serif" '
'svg:font-family="Liberation Serif" '
'style:font-family-generic="roman" '
'style:font-pitch="variable"/>'
)
_style_font_3 = Element.from_tag(
'<style:font-face style:name="Liberation Sans" '
'svg:font-family="Liberation Sans" '
'style:font-family-generic="swiss" '
'style:font-pitch="variable"/>'
)
# page layout style (changing margin)
_style_page = Element.from_tag(
'<style:page-layout style:name="MyLayout">'
'<style:page-layout-properties fo:page-width="21.00cm" '
'fo:page-height="29.70cm" style:num-format="1" '
'style:print-orientation="portrait" fo:margin-top="1.7cm" '
'fo:margin-bottom="1.5cm" fo:margin-left="1.6cm" '
'fo:margin-right="1.6cm" style:writing-mode="lr-tb" '
'style:footnote-max-height="0cm"><style:footnote-sep '
'style:width="0.018cm" style:distance-before-sep="0.10cm" '
'style:distance-after-sep="0.10cm" style:line-style="solid" '
'style:adjustment="left" style:rel-width="25%" '
'style:color="#000000"/> </style:page-layout-properties>'
"<style:footer-style> "
'<style:header-footer-properties fo:min-height="0.6cm" '
'fo:margin-left="0cm" fo:margin-right="0cm" '
'fo:margin-top="0.3cm" style:dynamic-spacing="false"/> '
"</style:footer-style></style:page-layout>"
)
# master style, using the precedent layout for the actual document
_style_master = Element.from_tag(
'<style:master-page style:name="Standard" '
'style:page-layout-name="MyLayout"><style:footer>'
'<text:p text:style-name="Footer"> '
"<text:tab/><text:tab/><text:page-number "
'text:select-page="current"/> / <text:page-count '
'style:num-format="1">15</text:page-count>'
"</text:p></style:footer> "
"</style:master-page>"
)
# some footer
_style_footer = Element.from_tag(
'<style:style style:name="Footer" '
'style:family="paragraph" style:class="extra" '
'style:master-page-name="">'
'<style:paragraph-properties style:page-number="auto" '
'text:number-lines="false" text:line-number="0">'
"<style:tab-stops>"
'<style:tab-stop style:position="8.90cm" '
'style:type="center"/>'
'<style:tab-stop style:position="17.80cm" style:type="right"/>'
"</style:tab-stops>"
"</style:paragraph-properties>"
"<style:text-properties "
'style:font-name="Liberation Sans" '
'fo:font-size="7pt"/></style:style>'
)
# some text style using Liberation Sans font
_style_description = Element.from_tag(
'<style:style style:name="description" '
'style:family="paragraph" '
'style:class="text" style:master-page-name="">'
"<style:paragraph-properties "
'fo:margin="100%" fo:margin-left="0cm" fo:margin-right="0cm" '
'fo:margin-top="0.35cm" fo:margin-bottom="0.10cm" '
'style:contextual-spacing="false" '
'fo:text-indent="0cm" '
'style:auto-text-indent="false" '
'style:page-number="auto"/>'
"<style:text-properties "
'style:font-name="Liberation Sans" '
'fo:font-size="11pt"/>'
"</style:style>"
)
# some text style using Liberation Serif font
_style_small_serif = Element.from_tag(
'<style:style style:name="smallserif" '
'style:family="paragraph" style:class="text">'
'<style:paragraph-properties fo:margin="100%" '
'fo:margin-left="1.20cm" '
'fo:margin-right="0cm" fo:margin-top="0cm" '
'fo:margin-bottom="0.10cm" '
'style:contextual-spacing="false" '
'fo:text-indent="0cm" '
'style:auto-text-indent="false"/>'
'<style:text-properties style:font-name="Liberation Serif" '
'fo:font-size="9pt" '
'fo:font-weight="normal"/>'
"</style:style>"
)
# some style to have stylish line in text
_style_line = Element.from_tag(
'<style:style style:name="line" '
'style:family="paragraph" style:class="text">'
'<style:paragraph-properties fo:margin="100%" '
'fo:margin-left="0cm" '
'fo:margin-right="0cm" fo:margin-top="0cm" '
'fo:margin-bottom="0.15cm" '
'style:contextual-spacing="false" fo:text-indent="0cm" '
'style:auto-text-indent="false" fo:padding="0cm" '
'fo:border-left="none" '
'fo:border-right="none" fo:border-top="none" '
'fo:border-bottom="0.06pt solid #000000"/>'
'<style:text-properties style:font-name="Liberation Sans" '
'fo:font-size="9pt"/>'
"</style:style>"
)
# some odfdo generated style (for bold Span)
_style_bold = Style("text", name="bolder", bold=True)
def save_new(document: Document, name: str):
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
new_path = OUTPUT_DIR / name
print("Saving:", new_path)
document.save(new_path, pretty=True)
def main():
# Some plain text :
text_1 = (
"Lorem ipsum dolor sit amet,\n\t"
"consectetuer adipiscing elit.\n\tSed"
"non risus.\n\tSuspendisse lectus tortor,\n"
"ndignissim sit amet, \nadipiscing nec,"
"\nultricies sed, dolor.\n\n"
" Cras elementum ultrices diam. Maecenas ligula massa,"
"varius a,semper congue, euismod non,"
" mi. Proin porttitor, orci nec nonummy"
"molestie, enim est eleifend mi,"
" non fermentum diam nisl sit amet erat."
)
text_2 = (
"Vestibulum "
"ante "
"ipsum primis\n"
"in faucibus orci luctus et ultrices "
"posuere cubilia Curae; Aliquam nibh."
)
text_3 = (
"Duis semper. \n\tDuis arcu massa,"
" \n\t\tscelerisque vitae, \n"
"\t\t\tconsequat in, \n"
"\t\t\t\tpretium a, enim. \n"
"\t\t\t\t\tPellentesque congue. \n"
"Ut in risus volutpat libero pharetra "
"tempor. Cras vestibulum bibendum augue."
"Praesent egestas leo in pede. Praesent "
"blandit odio eu enim. Pellentesque sed"
)
document = Document("text")
# remove default styles
document.delete_styles()
# add our styles
document.insert_style(_style_font_1, default=True)
document.insert_style(_style_font_2, default=True)
document.insert_style(_style_font_3, default=True)
document.insert_style(_style_page, automatic=True)
document.insert_style(_style_master)
document.insert_style(_style_footer)
document.insert_style(_style_description)
document.insert_style(_style_small_serif)
document.insert_style(_style_bold)
body = document.body
# since version 3.8.14, the append_plain_text() mode is the
# default for paragraph creation, so the code is more simple:
# paragraph = Paragraph("", style="description")
# paragraph.append_plain_text(text_1)
# body.append(paragraph)
# paragraph = Paragraph(style="line")
# body.append(paragraph)
# paragraph = Paragraph(style="smallserif")
# paragraph.append_plain_text(text_2)
# body.append(paragraph)
# paragraph = Paragraph(style="line")
# body.append(paragraph)
# paragraph = Paragraph(style="description")
# paragraph.append_plain_text(text_3)
paragraph = Paragraph(text_1, style="description")
body.append(paragraph)
paragraph = Paragraph(style="line")
body.append(paragraph)
paragraph = Paragraph(text_2, style="smallserif")
body.append(paragraph)
paragraph = Paragraph(style="line")
body.append(paragraph)
paragraph = Paragraph("A: " + text_3, style="description")
# span offset become complex after inserting <CR> and <TAB> in a text
paragraph.set_span("bolder", offset=5, length=6) # find TEXT position 5 : 6
paragraph.set_span("bolder", offset=18, length=4) # find TEXT position 18 : 4
paragraph.set_span("bolder", offset=49) # find TEXT position 18 to the end
# of the text bloc
paragraph.set_span("bolder", regex=r"Praes\w+\s\w+") # regex: Praes. + next word
body.append(paragraph)
paragraph = Paragraph(style="line")
body.append(paragraph)
# it is possible to add the content without the original layout (\n, tab, spaces)
paragraph = Paragraph("B: " + text_3, style="description", formatted=False)
body.append(paragraph)
paragraph = Paragraph(style="line")
body.append(paragraph)
# text can also be append after paragraph creation
paragraph = Paragraph(style="description")
paragraph.append("C: " + text_3)
body.append(paragraph)
save_new(document, TARGET)
if __name__ == "__main__":
main()
Add a custom footer to a text document
Minimal example of setting a page footer using Style.set_page_footer().
Note: the created footer uses the current footer style, to change that footer style, use the method set_footer_style() on the ‘page-layout’ style family.
recipes/add_a_custom_footer_to_a_text_document.py
#!/usr/bin/env python
"""Minimal example of setting a page footer using Style.set_page_footer().
Note: the created footer uses the current footer style, to change that
footer style, use the method set_footer_style() on the 'page-layout'
style family.
"""
import os
from pathlib import Path
from odfdo import Document, Header, Paragraph, Tab, VarPageNumber
_DOC_SEQUENCE = 62
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "styled4"
TARGET = "document.odt"
def save_new(document: Document, name: str) -> None:
"""Save a recipe result Document."""
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
new_path = OUTPUT_DIR / name
print("Saving:", new_path)
document.save(new_path, pretty=True)
def make_document() -> Document:
"""Generate a short document with a page footer."""
text_1 = (
"Lorem ipsum dolor sit amet,\n\t"
"consectetuer adipiscing elit.\n\tSed"
"non risus.\n\tSuspendisse lectus tortor,\n"
"ndignissim sit amet, \nadipiscing nec,"
"\nultricies sed, dolor.\n\n"
" Cras elementum ultrices diam. Maecenas ligula massa,"
"varius a,semper congue, euismod non,"
" mi. Proin porttitor, orci nec nonummy"
"molestie, enim est eleifend mi,"
" non fermentum diam nisl sit amet erat."
)
document = Document("text")
body = document.body
body.clear()
body.append(Header(1, "Some Title"))
body.append(Paragraph(text_1))
# looking for the current "master-page" style, it is probably
# named "Standard". If not found, search with something like:
# print([s for s in document.get_styles() if s.family == "master-page"])
page_style = document.get_style("master-page", "Standard")
# The footer can be a Paragraph or a list of Paragraphs:
first_line = Paragraph("\tA first footer line")
second_line = Paragraph("Second line")
second_line.append(Tab())
second_line.append(Tab())
second_line.append(VarPageNumber())
second_line.append(".")
my_footer = [first_line, second_line]
page_style.set_page_footer(my_footer)
# important: insert again the modified style
document.insert_style(page_style)
return document
def main() -> None:
document = make_document()
test_unit(document)
save_new(document, TARGET)
def test_unit(document: Document) -> None:
# only for test suite:
if "ODFDO_TESTING" not in os.environ:
return
from odfdo import Style
assert len([s for s in document.get_styles() if s.family == "master-page"]) >= 1
page_style = document.get_style("master-page", "Standard")
assert isinstance(page_style, Style)
footer = page_style.get_page_footer()
content = footer.serialize()
assert "A first footer" in content
assert "Second line" in content
if __name__ == "__main__":
main()
How to add a picture to a text document
Create an empty text document and add a picture in a frame.
recipes/how_to_add_a_picture_to_a_text_document.py
#!/usr/bin/env python
"""Create an empty text document and add a picture in a frame.
"""
from pathlib import Path
from odfdo import Document, Frame, Paragraph
_DOC_SEQUENCE = 65
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "basic_picture"
TARGET = "document.odt"
DATA = Path(__file__).parent / "data"
IMAGE = DATA / "newlogo.png"
def save_new(document: Document, name: str):
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
new_path = OUTPUT_DIR / name
print("Saving:", new_path)
document.save(new_path, pretty=True)
def main():
document = Document("text")
body = document.body
image_path = str(DATA / IMAGE)
uri = document.add_file(image_path)
image_frame = Frame.image_frame(
uri,
size=("6cm", "4cm"),
position=("5cm", "10cm"),
)
# put image frame in a paragraph:
paragraph = Paragraph("")
paragraph.append(image_frame)
body.append(paragraph)
save_new(document, TARGET)
if __name__ == "__main__":
main()
How to add a right aligned picture to a text document
Create an empty text document and add a picture in a frame, aligned to the right or to the left.
Aligning an image requires applying a style to the frame. To do this, use the default frame position style and customize it. The frame position style allows you to choose alignment relative to the paragraph (default) or the page.
recipes/how_to_add_a_right_aligned_picture_to_a_text_document.py
#!/usr/bin/env python
"""Create an empty text document and add a picture in a frame,
aligned to the right or to the left.
Aligning an image requires applying a style to the frame. To do
this, use the default frame position style and customize it. The
frame position style allows you to choose alignment relative to
the paragraph (default) or the page.
"""
import os
from pathlib import Path
from odfdo import Document, Frame, Paragraph, default_frame_position_style
_DOC_SEQUENCE = 66
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "basic_picture_right"
TARGET = "document.odt"
DATA = Path(__file__).parent / "data"
IMAGE = DATA / "newlogo.png"
def save_new(document: Document, name: str) -> None:
"""Save a recipe result Document."""
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
new_path = OUTPUT_DIR / name
print("Saving:", new_path)
document.save(new_path, pretty=True)
def make_document() -> Document:
"""Generate a document containing two instances of an image,
align one left, the other right.
"""
document = Document("text")
# add an image to the document, remember its URI
image_path = str(DATA / IMAGE)
uri = document.add_file(image_path)
# add a frame style to the document, right alignment
right_style = default_frame_position_style(
name="right_frame", horizontal_pos="right"
)
document.insert_style(right_style)
# add a frame style to the document, left alignment
left_style = default_frame_position_style(
name="left_frame", horizontal_pos="left", horizontal_rel="page"
)
document.insert_style(left_style)
# make the image frames using previous informations
image_frame_right = Frame.image_frame(
uri,
size=("6cm", "4cm"),
position=("0cm", "5cm"),
style=right_style.name,
)
image_frame_left = Frame.image_frame(
uri,
size=("9cm", "6cm"),
position=("0cm", "12cm"),
style=left_style.name,
)
# put image frame in a paragraph:
paragraph = Paragraph("")
paragraph.append(image_frame_right)
paragraph.append(image_frame_left)
document.body.append(paragraph)
return document
def main() -> None:
document = make_document()
test_unit(document)
save_new(document, TARGET)
def test_unit(document: Document) -> None:
# only for test suite:
if "ODFDO_TESTING" not in os.environ:
return
assert len([s for s in document.get_styles() if s.family == "graphic"]) >= 2
graphic_style_names = [s.name for s in document.get_styles("graphic")]
assert "right_frame" in graphic_style_names
assert "left_frame" in graphic_style_names
if __name__ == "__main__":
main()
How to add a title to a text document
Minimal example of how to add a Header of first level to a text document.
recipes/how_to_add_a_title_to_a_text_document.py
"""Minimal example of how to add a Header of first level to a text document.
"""
from odfdo import Document, Header
_DOC_SEQUENCE = 67
def main():
document = Document("text")
body = document.body
title1 = Header(1, "The Title")
body.append(title1)
if __name__ == "__main__":
main()
Accessing a single element
Example of methods and properties to analyse a document.
These methods return a single element (or None):
- `body.get_note(position)`
- `body.get_paragraph(position)`
- `body.get_header(position)`
recipes/accessing_a_single_element.py
#!/usr/bin/env python
"""Example of methods and properties to analyse a document.
These methods return a single element (or None):
- `body.get_note(position)`
- `body.get_paragraph(position)`
- `body.get_header(position)`
"""
# Expected result on stdout:
# - Content of the first footnote:
# 1. [Gaiman, Neil](http://en.wikipedia.org/w/index.php?title=Neil_Gaiman)
# (2003). Don't Panic: Douglas Adams and the "Hitchhiker's Guide to the
# Galaxy". Titan Books. pp 144-145. ISBN 1-84023-742-2.
#
# - Content of the paragraph with the word 'Fish'
# In So Long, and Thanks for All the Fish (published in 1984), Arthur
# returns home to Earth, rather surprisingly since it was destroyed when
# he left. He meets and falls in love with a girl named
# [Fenchurch](http://en.wikipedia.org/w/index.php?title=Minor_characters_from_The_Hitchhiker%27s_Guide_to_the_Galaxy%23Fenchurch), and discovers this Earth is a replacement provided by the [dolphin](http://en.wikipedia.org/w/index.php?title=Dolphin)s in their Save the Humans campaign. Eventually he rejoins Ford, who claims to have saved the Universe in the meantime, to hitch-hike one last time and see God's Final Message to His Creation. Along the way, they are joined by Marvin, the Paranoid Android, who, although 37 times older than the universe itself (what with time
# travel and all), has just enough power left in his failing body to read
# the message and feel better about it all before expiring.
#
# - Content of the first Title:
# The Hitchhiker's Guide to the Galaxy
#
# - Content of the last Title:
# Official sites
import os
import sys
from pathlib import Path
from odfdo import Document
_DOC_SEQUENCE = 70
DATA = Path(__file__).parent / "data"
SOURCE = "collection2.odt"
def read_source_document() -> Document:
"""Return the source Document."""
try:
source = sys.argv[1]
except IndexError:
source = DATA / SOURCE
return Document(source)
def method_demo(document: Document) -> None:
"""Show some methos examples."""
# The body object is an XML element from which we can access one or several
# other elements we are looking for.
body = document.body
# Accessing a single element
# To access a single element by name, position or a regular expression on
# the content, use get_xxx_by_<criteria>, where criteria can be position,
# content, or for some of them name, id title, description.
print("- Content of the first footnote:")
print(str(body.get_note(position=0)))
print()
print("- Content of the paragraph with the word 'Fish'")
print(str(body.get_paragraph(content="Fish")))
print("- Content of the first Title:")
print(str(body.get_header(position=0)))
print("- Content of the last Title:")
print(str(body.get_header(position=-1)))
def main() -> None:
document = read_source_document()
method_demo(document)
test_unit(document)
def test_unit(document: Document) -> None:
# only for test suite:
if "ODFDO_TESTING" not in os.environ:
return
body = document.body
assert str(body.get_note(position=0)).startswith(
"1. [Gaiman, Neil](http://en.wikipedia.org/w/index.php?title=Neil_Gaiman) (2003)"
)
assert str(body.get_paragraph(content="Fish")).endswith("all before expiring.\n")
assert str(body.get_header(position=0)).startswith("The Hitchhiker's Guide")
assert str(body.get_header(position=-1)).startswith("Official sites")
if __name__ == "__main__":
main()
Accessing a list of elements
Example of methods and properties to analyse a document.
These methods or properties return a list of elements:
- `body.headers`
- `body.images`
- `body.paragraphs`
- `body.get_links()`
- `body.get_notes()`
- `body.tables`
- `body.get_paragraphs(content)`
recipes/accessing_a_list_of_elements.py
#!/usr/bin/env python
"""Example of methods and properties to analyse a document.
These methods or properties return a list of elements:
- `body.headers`
- `body.images`
- `body.paragraphs`
- `body.get_links()`
- `body.get_notes()`
- `body.tables`
- `body.get_paragraphs(content)`
"""
# Expected result on stdout:
# 96 get methods are available
# number of headings: 29
# number of images stored: 0
# number of paragraphs: 175
# number of links (URLs): 352
# number of footnotes: 49
# number of tables: 0
# Paragraphs with 'Fish': 4
# Paragraphs with 'answer' and '42': 1
import os
import sys
from pathlib import Path
from odfdo import Document
_DOC_SEQUENCE = 75
DATA = Path(__file__).parent / "data"
SOURCE = "collection2.odt"
def read_source_document() -> Document:
"""Return the source Document."""
try:
source = sys.argv[1]
except IndexError:
source = DATA / SOURCE
return Document(source)
def analysis(document: Document) -> dict[str, int]:
"""Returns some statistics about the document."""
result: dict[str, int] = {
"methods": 0,
"headings": 0,
"images": 0,
"paragraphs": 0,
"links": 0,
"footnotes": 0,
"tables": 0,
"fish": 0,
"answer": 0,
}
# The body object is an XML element from which we can access one or several
# other elements we are looking for.
body = document.body
# Accessing a list of elements
# Should you need to access all elements of a kind, there are the
# get_xxxs methods, where xxx can be paragraph, heading, list, table, ...
# Methods without parameters are accessible through properties.
result["methods"] = " ".join(dir(body)).count("get_")
# Some examples, that you can check against actual content of the odt file:
# See how complex is our wikipedia documents:
result["headings"] = len(body.headers)
result["images"] = len(body.images)
result["paragraphs"] = len(body.paragraphs)
result["links"] = len(body.get_links())
result["footnotes"] = len(body.get_notes())
# Our sample document has no table:
# print("number of tables:", len(body.get_tables()))
result["tables"] = len(body.tables)
# Each get_xxx_list method provides parameters for filtering the results.
# For example headings can be listed by level, annotations by creator, etc.
# Almost all of them accept filtering by style and content using a regular
# expressions.
result["fish"] = len(body.get_paragraphs(content=r"Fish"))
result["answer"] = len(body.get_paragraphs(content=r"answer.*42"))
return result
def display_analysis(stats: dict[str, int]) -> None:
"""Print the stats on stdout."""
print(f"{stats['methods']} get methods are available")
print(f"number of headings: {stats['headings']}")
print(f"number of images stored: {stats['images']}")
print(f"number of paragraphs: {stats['paragraphs']}")
print(f"number of links (URLs): {stats['links']}")
print(f"number of footnotes: {stats['footnotes']}")
print(f"number of tables: {stats['tables']}")
print(f"Paragraphs with 'Fish': {stats['fish']}")
print(f"Paragraphs with 'answer' and '42': {stats['answer']}")
def main() -> None:
document = read_source_document()
stats = analysis(document)
display_analysis(stats)
test_unit(stats)
def test_unit(stats: dict[str, int]) -> None:
# only for test suite:
if "ODFDO_TESTING" not in os.environ:
return
assert stats["methods"] == 96
assert stats["headings"] == 29
assert stats["images"] == 0
assert stats["paragraphs"] == 175
assert stats["links"] == 352
assert stats["footnotes"] == 49
assert stats["tables"] == 0
assert stats["fish"] == 4
assert stats["answer"] == 1
if __name__ == "__main__":
main()
Accessing other element from element like list
Accessing elements from element-like list.
Any fetched element is a XML tree context that can be queried, but only on the subtree it
contains. Here are quick examples of iteration on Paragraphs
and Lists
from the document.
recipes/accessing_other_element_from_element_like_list.py
#!/usr/bin/env python
"""Accessing elements from element-like list.
Any fetched element is a XML tree context that can be queried, but only on the subtree it
contains. Here are quick examples of iteration on `Paragraphs` and `Lists` from the document.
"""
# Expected result on stdout:
# Number of available lists in the document: 5
#
# The 4th list contains 9 paragraphs
#
# 1 : [BBC Cult website](http://www.bbc.co.uk/cult/hitchhikers/),
# official website for the [TV show version](http://en.wikipedia.org/w/index.php?title=The_Hitchhiker%27s_Guide_to_the_Galaxy_%28TV_series%29)
# (includes information, links and downloads)
#
# 2 : [BBC Radio 4 website for the 2004-2005
# series](http://www.bbc.co.uk/radio4/hitchhikers/)
#
# 3 : [Official Movie Site](http://hitchhikers.movies.go.com/)
#
# 4 : [The Hitchhiker's Guide to the Galaxy
# (2005 movie)](http://www.imdb.com/title/tt0371724/)at the
# [Internet Movie Database](http://en.wikipedia.org/w/index.php?title=Internet_Movie_Database)
#
# 5 : [The Hitch Hikers Guide to the Galaxy
# (1981 TV series)](http://www.imdb.com/title/tt0081874/)at the
# [Internet Movie Database](http://en.wikipedia.org/w/index.php?title=Internet_Movie_Database)
#
# 6 : [h2g2](http://www.bbc.co.uk/h2g2/guide/)
#
# 7 : [Encyclopedia of Television](http://www.museum.tv/archives/etv/H/htmlH/hitch-hickers/hitch-hickers.htm)
#
# 8 : [British Film Institute Screen Online](http://www.screenonline.org.uk/tv/id/560180/index.html)
# page devoted to the TV series
#
# 9 : [DC Comics H2G2 site](http://www.dccomics.com/graphic_novels/?gn=1816)
import os
import sys
from pathlib import Path
from odfdo import Document
_DOC_SEQUENCE = 80
DATA = Path(__file__).parent / "data"
SOURCE = "collection2.odt"
def read_source_document() -> Document:
"""Return the source Document."""
try:
source = sys.argv[1]
except IndexError:
source = DATA / SOURCE
return Document(source)
def analyse_list(document: Document):
# The body object is an XML element from which we can access one or several
# other elements we are looking for.
body = document.body
# Any element is a context for navigating but only on the subtree it
# contains. Just like the body was, but since the body contains all content,
# we didn't see the difference.
# Let's try the lists:
print("Number of available lists in the document:", len(body.lists))
print()
list4 = body.get_list(position=4)
print(f"The 4th list contains {len(list4.paragraphs)} paragraphs")
print()
# Now print the list content
paragraphs = list4.paragraphs
for count, paragraph in enumerate(paragraphs):
print(count + 1, ":", paragraph)
def main():
document = read_source_document()
analyse_list(document)
test_unit(document)
def test_unit(document: Document) -> None:
# only for test suite:
if "ODFDO_TESTING" not in os.environ:
return
body = document.body
list4 = body.get_list(position=4)
paragraphs = list4.paragraphs
assert len(body.lists) == 5
assert len(list4.paragraphs) == 9
assert str(paragraphs[0]).startswith("[BBC Cult website](http")
assert str(paragraphs[8]).startswith("[DC Comics H2G2 site](http")
if __name__ == "__main__":
main()
How to add a list to a text document
Create an empty text document and add a list.
recipes/how_to_add_a_list_to_a_text_document.py
#!/usr/bin/env python
"""Create an empty text document and add a list.
"""
import os
from pathlib import Path
# Lists are a dedicated object List
from odfdo import Document, List
_DOC_SEQUENCE = 90
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "add_list"
TARGET = "document.odt"
def save_new(document: Document, name: str):
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
new_path = OUTPUT_DIR / name
print("Saving:", new_path)
document.save(new_path, pretty=True)
def main():
document = Document("text")
body = document.body
body.clear()
some_list = List(["chocolate", "tea", "coffee"])
# The list factory accepts a Python list of strings and list items.
body.append(some_list)
test_unit(document)
save_new(document, TARGET)
def test_unit(document: Document) -> None:
# only for test suite:
if "ODFDO_TESTING" not in os.environ:
return
assert (document.get_formatted_text()).strip() == "- chocolate\n- tea\n- coffee"
if __name__ == "__main__":
main()
How to add a manual page break
Adding a manual page break to a text document.
recipes/how_to_add_a_manual_page_break.py
#!/usr/bin/env python
"""Adding a manual page break to a text document.
"""
from pathlib import Path
from odfdo import Document, PageBreak, Paragraph, Style
_DOC_SEQUENCE = 95
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "page_break"
TARGET = "document.odt"
def save_new(document: Document, name: str):
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
new_path = OUTPUT_DIR / name
print("Saving:", new_path)
document.save(new_path, pretty=True)
def main():
document = Document()
body = document.body
body.clear()
# here a simple way to insert a page break with odfdoshortcuts:
document.add_page_break_style()
body.append(Paragraph("First paragraph"))
body.append(PageBreak())
body.append(Paragraph("Second paragraph"))
# here is a different way to insert a page break:
page_break_style = Style("paragraph", name="page_break_before")
page_break_style.set_properties({"fo:break-before": "page"})
document.insert_style(page_break_style)
empty_paragraph = Paragraph("", style="page_break_before")
body.append(empty_paragraph)
body.append(Paragraph("Third paragraph"))
save_new(document, TARGET)
if __name__ == "__main__":
main()
Create a basic drawing
Insert a circle and a lot of lines (a fractal) in a text document.
recipes/create_a_basic_drawing.py
#!/usr/bin/env python
"""Insert a circle and a lot of lines (a fractal) in a text document.
"""
import cmath
from pathlib import Path
from typing import Union
from odfdo import Document, EllipseShape, Header, LineShape, Paragraph
_DOC_SEQUENCE = 100
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "basic_drawing"
TARGET = "koch.odt"
CYCLES = 4 # beware, 5 is big, 6 is too big to display...
def save_new(document: Document, name: str):
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
new_path = OUTPUT_DIR / name
print("Saving:", new_path)
document.save(new_path, pretty=True)
def cm(x: float) -> str:
return f"{x:.2f}cm" ""
# some graphic computations
class Vector:
def __init__(self, a: Union[float, complex], b: Union[float, complex]):
self.a = a
self.b = b
def koch_split(self) -> list:
c = self.a + 1.0 / 3.0 * (self.b - self.a)
d = self.a + 2.0 / 3.0 * (self.b - self.a)
m = 0.5 * (self.a + self.b)
e = m + (d - c) * complex(0, -1)
return [Vector(self.a, c), Vector(c, e), Vector(e, d), Vector(d, self.b)]
def centimeter(self, val) -> tuple:
if val == 0:
m = self.a
else:
m = self.b
return (cm(m.real), cm(m.imag))
def koch(vector_list, cycle=2):
if cycle <= 0:
return vector_list
else:
new_vector_list = []
for vector in vector_list:
new_vector_list.extend(vector.koch_split())
# del vector_list
return koch(new_vector_list, cycle - 1)
def make_fractal_coords(side, vpos):
orig = complex((17 - side) / 2.0, vpos)
v1 = Vector(orig, orig + complex(side, 0))
v2 = Vector(v1.b, orig + cmath.rect(side, cmath.pi / 3))
v3 = Vector(v2.b, orig)
center = (v1.a + v1.b + v2.b) / 3
vector_list = koch([v1, v2, v3], cycle=CYCLES)
return center, vector_list
def generate_document():
document = Document("text")
body = document.body
print("Making some Koch fractal")
title = Header(1, "Some Koch fractal")
body.append(title)
style = document.get_style("graphic")
style.set_properties({"svg:stroke_color": "#0000ff"})
style.set_properties(fill_color="#ffffcc")
paragraph = Paragraph("")
body.append(paragraph)
# some computation of oordinates
center, vector_list = make_fractal_coords(side=12.0, vpos=8.0)
# create a circle
radius = 8.0
pos = center - complex(radius, radius)
circle = EllipseShape(
size=(cm(radius * 2), cm(radius * 2)),
position=(cm(pos.real), cm(pos.imag)),
)
paragraph.append(circle)
# create a drawing with a lot of lines
paragraph.append(f"number of lines: {len(vector_list)}")
for vector in vector_list:
line = LineShape(p1=vector.centimeter(0), p2=vector.centimeter(1))
paragraph.append(line)
return document
def main():
document = generate_document()
save_new(document, TARGET)
if __name__ == "__main__":
main()
Add private annotations to a document
Add not printable annotations to a document.
Annotations are notes that do not appear in the document but typically on a side bar in a desktop application. So they are not printed.
recipes/add_private_annotations_to_a_document.py
#!/usr/bin/env python
"""Add not printable annotations to a document.
Annotations are notes that do not appear in the document but typically
on a side bar in a desktop application. So they are not printed.
"""
import os
from pathlib import Path
from odfdo import Document, Header, Paragraph
_DOC_SEQUENCE = 110
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "annotated"
TARGET = "annotated_document.odt"
def save_new(document: Document, name: str) -> None:
"""Save a recipe result Document."""
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
new_path = OUTPUT_DIR / name
print("Saving:", new_path)
document.save(new_path, pretty=True)
def base_document() -> Document:
"""Generate a basic document."""
document = Document("text")
body = document.body
body.append(Header(1, "De la Guerre des Gaules - Livre V"))
body.append(Header(2, "Préparatifs d'expédition en Bretagne"))
body.append(
Paragraph(
"Sous le consulat de Lucius Domitius et d'Appius Claudius, "
"César, quittant les quartiers d'hiver pour aller en Italie, "
"comme il avait coutume de le faire chaque année, ordonne aux "
"lieutenants qu'il laissait à la tête des légions de construire, "
"pendant l'hiver, le plus de vaisseaux qu'il serait possible, "
"et de réparer les anciens."
)
)
body.append(Header(2, "La Bretagne"))
body.append(
Paragraph(
"Cette île est de forme triangulaire ; l'un des côtés regarde "
"la Gaule. Des deux angles de ce côté, l'un est au levant, "
"vers le pays de Cantium, où abordent presque tous les vaisseaux "
"gaulois ; l'autre, plus bas, est au midi. La longueur de ce côté "
"est d'environ cinq cent mille pas. "
)
)
return document
def insert_annotation(document: Document) -> None:
"""Insert a not printable annotation in a document."""
body = document.body
paragraph = body.get_paragraph(content="consulat")
# Annotations are inserted like notes but they are simpler:
# Annotation arguments:
# after => The word after what the annotation is inserted.
# body => The annotation itself, at the end of the page.
# creator => The author of the annotation.
# date => A datetime value, by default datetime.now().
paragraph.insert_annotation(
after="Domitius",
body="Talking about Lucius Domitius",
creator="Luis",
)
def main() -> None:
document = base_document()
insert_annotation(document)
test_unit(document)
save_new(document, TARGET)
def test_unit(document: Document) -> None:
# only for test suite:
if "ODFDO_TESTING" not in os.environ:
return
assert len(document.body.get_annotations(creator="Luis")) == 1
if __name__ == "__main__":
main()
Accessibility check on a document
Basic Accessibility test: check, for every picture in a document, if there is:
- a title (svg_title),
- a description (svg_description)
or, at least, some caption text.
See test file planes.odt
file and the result of the script.
recipes/accessibility_check_on_a_document.py
#!/usr/bin/env python
"""Basic Accessibility test: check, for every picture in a document, if
there is:
- a title (svg_title),
- a description (svg_description)
or, at least, some caption text.
See test file `planes.odt` file and the result of the script.
"""
# Expected result on stdout:
# The document displays 3 pictures:
# - pictures with a title: 2
# - pictures with a description: 1
# - pictures with a caption: 0
# Image: 100000000000013B000000D345859F604DCE636A.jpg
# Name: graphics2, Title: Spitfire, general view, Description:Green spitfire in a hall, view from left front., Caption:None
# Image: 100000000000013B000000D3F908DA0A939D2F4B.jpg
# Name: graphics3, Title: Spitfire, detail, Description:None, Caption:None
# Image: 100000000000013B000000D375CEBFD6D7CB7CE9.jpg
# Name: graphics1, Title: None, Description:None, Caption:None
import os
import sys
from pathlib import Path
from typing import Any
from odfdo import Document
_DOC_SEQUENCE = 200
DATA = Path(__file__).parent / "data"
SOURCE = "planes.odt"
def read_source_document() -> Document:
"""Return the source Document."""
try:
source = sys.argv[1]
except IndexError:
source = DATA / SOURCE
return Document(source)
def accessibility_evaluator(document: Document) -> dict[str, Any]:
"""Count for each images: titles, caption,description."""
result: dict[str, Any] = {
"images": [],
"titles": 0,
"descriptions": 0,
"captions": 0,
}
# We want the images of the document.
body = document.body
images = body.images
for image in images:
uri = image.url
filename = uri.rpartition("/")[2]
frame = image.parent
name = frame.name
title = frame.svg_title
description = frame.svg_description
link = frame.parent
# this part requires some ODF know how:
caption = None
if link.tag == "draw:a":
caption = link.get_attribute("office:name")
result["images"].append(
f"Image: {filename}\n"
f" Name: {name}, Title: {title}, "
f"Description:{description}, Caption:{caption}"
)
if title:
result["titles"] += 1
if description:
result["descriptions"] += 1
if caption:
result["captions"] += 1
return result
def display_accessibilty(stats: dict[str, Any]) -> None:
"""Print the stats on stdout."""
print(f"The document displays {len(stats['images'])} pictures:")
print(f" - pictures with a title: {stats['titles']}")
print(f" - pictures with a description: {stats['descriptions']}")
print(f" - pictures with a caption: {stats['captions']}")
print()
for content in stats["images"]:
print(content)
def main() -> None:
document = read_source_document()
stats = accessibility_evaluator(document)
display_accessibilty(stats)
test_unit(stats)
def test_unit(stats: dict[str, Any]) -> None:
# only for test suite:
if "ODFDO_TESTING" not in os.environ:
return
assert len(stats["images"]) == 3
assert stats["titles"] == 2
assert stats["descriptions"] == 1
assert stats["captions"] == 0
if __name__ == "__main__":
main()
Add logo on presentation
Insert an image (e.g. the logo of an event, organization or a Creative Commons
attribution) with size x,y
at position x2,y2
on a number of slides in a
presentation slide deck.
recipes/add_logo_on_presentation.py
#!/usr/bin/env python
"""Insert an image (e.g. the logo of an event, organization or a Creative Commons
attribution) with size `x,y` at position `x2,y2` on a number of slides in a
presentation slide deck.
"""
import os
import sys
from pathlib import Path
# reading image size requires a graphic library
from PIL import Image
from odfdo import Document, Frame
_DOC_SEQUENCE = 250
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "add_logo"
TARGET = "presentation.odp"
DATA = Path(__file__).parent / "data"
SOURCE = "presentation_wo_logo.odp"
LOGO = DATA / "newlogo.png"
def read_source_document() -> Document:
"""Return the source Document."""
try:
source = sys.argv[1]
except IndexError:
source = DATA / SOURCE
return Document(source)
def save_new(document: Document, name: str) -> None:
"""Save a recipe result Document."""
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
new_path = OUTPUT_DIR / name
print("Saving:", new_path)
document.save(new_path, pretty=True)
def make_image_size(path: Path, size: float) -> tuple[str, str]:
"""Returns the display size (width, height) from the image path and the
largest dimension."""
width, height = Image.open(path).size
ratio = max(width / size, height / size)
return (f"{width / ratio:.2f}cm", f"{height / ratio:.2f}cm")
def add_logo(presentation: Document) -> None:
"""Add an image on a presentation."""
image_position = ("1.50cm", "1.50cm")
svg_title = "New Logo"
svg_description = "The new logo with blue background"
image_size = make_image_size(LOGO, 4.0)
presentation_body = presentation.body
uri = presentation.add_file(LOGO)
for slide in presentation_body.get_draw_pages():
# Create a frame for the image
image_frame = Frame.image_frame(
image=uri,
text="", # Text over the image object
size=image_size, # Display size of image
anchor_type="page",
page_number=None,
position=image_position,
style=None,
)
image_frame.svg_title = svg_title
image_frame.svg_description = svg_description
slide.append(image_frame)
def main() -> None:
document = read_source_document()
add_logo(document)
test_unit(document)
save_new(document, TARGET)
def test_unit(document: Document) -> None:
# only for test suite:
if "ODFDO_TESTING" not in os.environ:
return
slides = document.body.get_draw_pages()
assert len(slides) == 11
for slide in slides:
assert len(slide.get_images()) == 1
if __name__ == "__main__":
main()
Get pictures from document odt
Get all the pictures embeded in an .odt file.
recipes/get_pictures_from_document_odt.py
#!/usr/bin/env python
"""Get all the pictures embeded in an .odt file.
"""
import sys
from pathlib import Path
from pprint import pformat
from odfdo import Document
_DOC_SEQUENCE = 260
DATA = Path(__file__).parent / "data"
# ODF export of Wikipedia article Hitchhiker's Guide to the Galaxy (CC-By-SA)
# Remark: the document is badly made: the pictures are not displayed in the
# text, but are sill inside the document !
SOURCE = "collection.odt"
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "found_pics"
def read_source_document():
try:
source = sys.argv[1]
except IndexError:
source = DATA / SOURCE
return Document(source)
def main():
doc = read_source_document()
# show the list the content of the document parts
parts = doc.parts
print("Parts:")
print(pformat(parts))
print()
# We want the images of the document.
body = doc.body
found_pics = body.images
print("Pics :")
print(pformat(found_pics))
print()
# we use the get_part function from odfdo to get the actual content
# of the image, to copy the images out of the .odt file:
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
for pic in found_pics:
# where is the image actual content in the file:
url = pic.url
image_content = doc.get_part(url)
origin_path = Path(url)
destination_path = OUTPUT_DIR / origin_path.name
destination_path.write_bytes(image_content)
print(f"Files in {OUTPUT_DIR}:")
for file in OUTPUT_DIR.glob("*"):
print(file.name)
if __name__ == "__main__":
main()
Change image in many documents
Change an image in many ODF files.
This recipe is suitable for the scenario where an organization is moving from one company logo to another and needs to replace the logo in several hundred existing documents.
recipes/change_image_in_many_documents.py
#!/usr/bin/env python
"""Change an image in many ODF files.
This recipe is suitable for the scenario where an organization
is moving from one company logo to another and needs to replace
the logo in several hundred existing documents.
"""
import os
from hashlib import sha256
from pathlib import Path
from odfdo import Document
_DOC_SEQUENCE = 270
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "new_logo"
DATA = Path(__file__).parent / "data"
OLD_PRESENTATIONS = DATA / "old_presentations"
OLD_LOGO = OLD_PRESENTATIONS / "oldlogo.png"
NEW_LOGO = DATA / "newlogo.png"
def save_modified(document: Document) -> None:
"""Save a modified Document."""
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
name = Path(document.path).name
new_path = OUTPUT_DIR / name
print("Saving:", new_path)
document.save(new_path)
def footprint(content: bytes) -> str:
"""Return sha256 digest of a bytes content."""
return sha256(content).hexdigest()
def update_document_logo(
path: Path,
old_hash: str,
new_content: bytes,
stats: dict[str, int],
) -> None:
"""Update the logo inside a Document."""
stats["files"] += 1
if not path.suffix.lower().startswith(".od"):
return
try:
document = Document(path)
except Exception:
return
stats["odf_files"] += 1
document_changed = False
for image in document.body.images:
image_url = image.url
if not image_url:
continue
try:
image_content = document.get_part(image_url)
except KeyError:
print("- not found inside document:", path, end=" ")
print(" image URL:", image_url)
continue
if footprint(image_content) == old_hash:
document.set_part(image_url, new_content)
document_changed = True
if document_changed:
save_modified(document)
stats["updated_files"] += 1
def update_logos() -> dict[str, int]:
"""Update logo image in all documents."""
result: dict[str, int] = {
"files": 0,
"odf_files": 0,
"updated_files": 0,
}
old_hash = footprint(OLD_LOGO.read_bytes())
# making the new image content :
buffer = Document("text")
url = buffer.add_file(str(NEW_LOGO))
new_content = buffer.get_part(url)
for path in OLD_PRESENTATIONS.glob("**/*"):
update_document_logo(path, old_hash, new_content, result)
return result
def main() -> None:
stats = update_logos()
print(f"Files: {stats['files']}")
print(f"ODF files: {stats['odf_files']}")
print(f"Updated files: {stats['updated_files']}")
test_unit(stats)
def test_unit(stats: dict[str, int]) -> None:
# only for test suite:
if "ODFDO_TESTING" not in os.environ:
return
assert (stats["files"]) == 3
assert (stats["odf_files"]) == 2
assert (stats["updated_files"]) == 2
if __name__ == "__main__":
main()
Concatenate presentations
Concatenate several presentations (including presentations found in sub directories), possibly merge styles and images. Result for style may vary.
recipes/concatenate_presentations.py
#!/usr/bin/env python
"""Concatenate several presentations (including presentations found in sub
directories), possibly merge styles and images. Result for style may vary.
"""
import os
from pathlib import Path
from odfdo import Document
_DOC_SEQUENCE = 280
DATA = Path(__file__).parent / "data"
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "concatenate"
TARGET = "presentation.odp"
def save_new(document: Document, name: str) -> None:
"""Save a recipe result Document."""
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
new_path = OUTPUT_DIR / name
print("Saving:", new_path)
document.save(new_path, pretty=True)
def concatenate_presentations(path: Path) -> Document:
"""Return a presentation containing a copy of all presentations in path."""
concat_presentation = Document("presentation")
concat_presentation.body.clear()
concat_presentation.delete_styles()
count = 0
for presentation_path in path.glob("**/*.odp"):
count += 1
add_presentation(concat_presentation, presentation_path)
nb_slides = len(concat_presentation.body.get_draw_pages())
print(f"{count} presentations concatenated, {nb_slides} slides.")
return concat_presentation
def add_presentation(concat_presentation: Document, path: Path) -> None:
"""Using odfdo to open .odp document and copy content and styles."""
try:
document = Document(path)
except Exception:
return
concat_presentation.merge_styles_from(document)
# add all slides
dest_body = concat_presentation.body
dest_manifest = concat_presentation.manifest
manifest = document.manifest
slides = document.body.get_draw_pages()
print(f"- {path.name} has {len(slides)} slides")
for slide in slides:
slide = slide.clone
# dont forget images:
for image in slide.images:
uri = image.url
media_type = manifest.get_media_type(uri)
dest_manifest.add_full_path(uri, media_type)
concat_presentation.set_part(uri, document.get_part(uri))
# append slide, expecting nothing good about its final style
dest_body.append(slide)
def main() -> None:
document = concatenate_presentations(DATA)
test_unit(document)
save_new(document, TARGET)
def test_unit(document: Document) -> None:
# only for test suite:
if "ODFDO_TESTING" not in os.environ:
return
assert len(document.body.get_draw_pages()) == 38
if __name__ == "__main__":
main()
Make a presentation from pictures of a text document
Open a .odt file with pictures in it, find and analyse all the images, create a new .odp presentation, display all the pictures in the presentation, one image per frame.
recipes/make_a_presentation_from_pictures_of_a_text_document.py
#!/usr/bin/env python
"""Open a .odt file with pictures in it, find and analyse all the images,
create a new .odp presentation, display all the pictures in the presentation,
one image per frame.
"""
import os
from pathlib import Path
from tempfile import mkstemp
# analyzing embedded image need Pillow library
from PIL import Image
from odfdo import Document, DrawPage, Frame
_DOC_SEQUENCE = 285
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "presentation_images_in_odt"
TARGET = "presentation.odp"
DATA = Path(__file__).parent / "data"
SOURCE = DATA / "collection.odt"
def embedded_image_ratio(href, part):
image_suffix = "." + href.split(".")[-1]
fd, tmp_file = mkstemp(suffix=image_suffix)
tmp_file_handler = os.fdopen(fd, "wb")
tmp_file_handler.write(part)
tmp_file_handler.close()
width, height = Image.open(tmp_file).size
os.unlink(tmp_file)
print(f"image {href} , size : {width}x{height}")
ratio = 1.0 * width / height
return ratio
def save_new(document: Document, name: str):
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
new_path = OUTPUT_DIR / name
print("Saving:", new_path)
document.save(new_path, pretty=True)
def main():
document = generate_document()
save_new(document, TARGET)
_expected_result = """
image Pictures/12918371211855030272.jpe , size : 333x386
image Pictures/12918371212102410240.jpe , size : 200x350
image Pictures/12918371212184750080.jpe , size : 384x552
image Pictures/12918371212196450304.jpe , size : 373x576
image Pictures/12918371212450449408.jpe , size : 400x596
image Pictures/12918371212536940544.jpe , size : 800x1195
image Pictures/12918371212580190208.jpe , size : 561x282
image Pictures/12918371212597118976.jpe , size : 660x515
image Pictures/12918371212741570560.jpe , size : 328x504
"""
def generate_document():
# Open the input document
# doc_source = Document_extend(filename)
doc_source = Document(SOURCE)
# Making of the output Presentation document :
presentation = Document("presentation")
# Presentation got a body in which elements are stored
presentation_body = presentation.body
presentation_body.clear()
presentation_manifest = presentation.manifest
# For each image, we create a page in the presentation and display the image
# and some text on this frame
# First, get all image elements available in document:
images_source = doc_source.body.images
manifest_source = doc_source.manifest
for image in images_source:
# we use the get_part function from odfdo to get the actual content
# of the images, with the URI link to the image as argument
uri = image.url
# weight = len(doc_source.get_part(uri)) # only for info
# print "image %s , size in bytes: %s" % (uri, weight)
part = doc_source.get_part(uri) # actual image content
name = uri.split("/")[-1] # lets make a file name for image
# Compute the display size of the image on the final page
ratio = embedded_image_ratio(uri, part)
max_border = 16.0 # max size of the greatest border, in cm
a = max_border * ratio
b = max_border
if ratio > 1.0:
a /= ratio
b /= ratio
# Create an underlying page for the image and the text
page = DrawPage("page " + name)
# Create a frame for the image
image_frame = Frame.image_frame(
image=uri,
text="", # Text over the image object
size=(f"{a}cm", f"{b}cm"), # Display size of image
anchor_type="page",
page_number=None,
position=("3.5cm", "3.5 cm"),
style=None,
)
# Add some text object somehere on the frame, with a text frame
legend = f"Image {name} from Wikipedia document / {SOURCE.name}"
text_frame = Frame.text_frame(
legend,
size=("26cm", "2cm"),
position=("0.5cm", "0.5cm"),
style="Standard",
text_style="Standard",
)
# Append all the component, do not forget to add the actuel image file
# into the Picture global directory of the presentation file with set_part
page.append(text_frame)
page.append(image_frame)
presentation_body.append(page)
# for the same operation from a local filesystem image, just use:
# presentation_output.add_file(uri)
media_type = manifest_source.get_media_type(uri)
presentation_manifest.add_full_path(uri, media_type)
presentation.set_part(uri, doc_source.get_part(uri))
return presentation
if __name__ == "__main__":
main()
Make presentation from images
Create a presentation from a some images in a given directory, where each image is put on the center of its own page scaled to either the maximum available size, prefered maximum size, or cover the full page and lose some info.
recipes/make_presentation_from_images.py
#!/usr/bin/env python
"""Create a presentation from a some images in a given directory,
where each image is put on the center of its own page scaled to either
the maximum available size, prefered maximum size, or cover the full
page and lose some info.
"""
from pathlib import Path
# analyzing embedded image need Pillow library
from PIL import Image
from odfdo import Document, DrawPage, Frame
_DOC_SEQUENCE = 286
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "presentation_from_images"
TARGET = "presentation.odp"
IMAGES = Path(__file__).parent / "data" / "images"
MAX_SIZE = 15.0 # feel free to customize
CROP_SIZE = False # feel free to customize
# Size (in cm) of a slide : (default page-layout)
SLIDE_W, SLIDE_H = 28.0, 21.0 # 4/3 screen
# FIXME: this is the default page-layout.
# - Changing the style of the page-layout by program is not done in this script
# - an other way, merging with external page-layout/master-page requires
# extra files, out of the scope for this script.
def save_new(document: Document, name: str):
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
new_path = OUTPUT_DIR / name
print("Saving:", new_path)
document.save(new_path)
def main():
presentation = make_presentation()
if presentation is None:
print("Something went wrong.")
exit(0)
save_new(presentation, TARGET)
def make_presentation():
# Collecting images
images_pool = collect_images()
if not images_pool: # unable to find images
print("No image found !")
return None
# Creation of the output Presentation document :
# presentation = Document_from_type('presentation') # 092
presentation = Document("presentation")
# Presentation got a body in which content is stored
presentation_body = presentation.body
presentation_body.clear()
# For each image, we create a page in the presentation and display the image
# and some text on this frame
for image in images_pool:
# add the file to the document
uri = presentation.add_file(str(image.path))
# Create an underlying page for the image and the text
page = DrawPage("Page " + image.path.name)
# Create a frame for the image
image_frame = Frame.image_frame(
image=uri,
name=image.path.name,
text="", # Text over the image object
size=(image.disp_w, image.disp_h), # Display size of image
anchor_type="page",
page_number=None,
position=(image.pos_x, image.pos_y),
style=None,
)
# Append all the component
page.append(image_frame)
presentation_body.append(page)
return presentation
# Principle :
# - original image are left unmodified by the script
# - only the size they should appear is computed
# - later, the display engine (say LibreOffice) will merge this display
# information with other informations, like the size of the page
# (page-layout) and should act like a mask against the "big" croped image.
class ImageInfo:
def __init__(self, path: Path):
self.path = path
self.size = None
self.disp_w = self.disp_h = None
self.pos_x = self.pos_y = None
def adjust(self):
try:
self.size = Image.open(self.path).size
except OSError:
# Not an image ?
self.size = None
return
width, height = self.size
if MAX_SIZE:
ratio = max(width / MAX_SIZE, height / MAX_SIZE)
display_w = width / ratio
display_h = height / ratio
elif CROP_SIZE:
ratio = min(width / SLIDE_W, height / SLIDE_H)
display_w = width / ratio
display_h = height / ratio
else:
ratio = max(width / SLIDE_W, height / SLIDE_H)
display_w = width / ratio
display_h = height / ratio
self.disp_w = f"{display_w:2f}cm"
self.disp_h = f"{display_h:2f}cm"
self.pos_x = f"{(SLIDE_W - display_w) / 2:2f}cm"
self.pos_y = f"{(SLIDE_H - display_h) / 2:2f}cm"
print(self.path.name, self.disp_w, self.disp_h)
def collect_images():
pool = []
for path in IMAGES.glob("**/*"):
if not path.is_file():
continue
image_info = ImageInfo(path)
image_info.adjust()
if image_info.size:
pool.append(image_info)
return pool
if __name__ == "__main__":
main()
Make a presentation from text with different styles
Each line of the text becomes a slide of the presentation, we change of style depending on the length of text line.
recipes/make_a_presentation_from_text_with_different_styles.py
#!/usr/bin/env python
"""Each line of the text becomes a slide of the presentation, we change of style
depending on the length of text line.
"""
import os
from pathlib import Path
from odfdo import Document, DrawPage, Frame, Style
_DOC_SEQUENCE = 287
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "styled_prez"
TARGET = "presentation.odp"
CONTENT = """123
azertyuiop
azertyuiop azertyuiop
azertyuiop azertyuiop azertyuiop
azertyuiop azertyuiop azertyuiop azertyuiop
azertyuiop azertyuiop azertyuiop azertyuiop azertyuiop
azertyuiop azertyuiop azertyuiop azertyuiop azertyuiop azertyuiop
azertyuiop azertyuiop azertyuiop azertyuiop azertyuiop azertyuiop azertyuiop
azertyuiop azertyuiop azertyuiop azertyuiop azertyuiop azertyuiop azertyuiop azertyuiop
azertyuiop azertyuiop azertyuiop azertyuiop azertyuiop azertyuiop azertyuiop azertyuiop azertyuiop
end.
""".splitlines()
def save_new(document: Document, name: str) -> None:
"""Save a recipe result Document."""
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
new_path = OUTPUT_DIR / name
print("Saving:", new_path)
document.save(new_path, pretty=True)
def create_style() -> Style:
"""Creating a smooth style for the graphic item."""
base_style = Style(
"graphic",
name="Gloup48",
parent="standard",
stroke="none",
fill_color="#b3b3b3",
textarea_vertical_align="middle",
padding_top="1cm",
padding_bottom="1cm",
padding_left="1cm",
padding_right="1cm",
line_distance="0cm",
guide_overhang="0cm",
guide_distance="0cm",
)
base_style.set_properties(area="paragraph", align="center")
base_style.set_properties(
area="text",
color="#dd0000",
text_outline="false",
font="Liberation Sans",
font_family="Liberation Sans", # compatibility
font_style_name="Bold",
family_generic="swiss",
size="48pt",
weight="bold",
)
return base_style
def generate_document() -> Document:
"""Generate a Presentation Document with different styles."""
presentation = Document("presentation")
body = presentation.body
body.clear()
base_style = create_style()
presentation.insert_style(base_style)
# Making o lot of variations
variants = [10, 11, 14, 16, 20, 24, 32, 40, 44]
text_size = [95, 80, 65, 50, 40, 30, 20, 10, 5]
for size in variants:
variant_style = base_style.clone
variant_style.set_attribute("style:name", f"Gloup{size}")
variant_style.set_properties(area="text", size=f"{size}pt")
presentation.insert_style(variant_style)
for count, blurb in enumerate(CONTENT):
text = blurb
name = f"{count + 1} - {text[:10]}"
page = DrawPage(name)
# choosing some style:
size = 48
for index, max_size in enumerate(text_size):
if len(text) > max_size:
size = variants[index]
break
text_frame = Frame.text_frame(
text,
size=("24cm", "2cm"),
position=("2cm", "8cm"),
style=f"Gloup{size}",
text_style=f"Gloup{size}",
)
page.append(text_frame)
body.append(page)
return presentation
def main() -> None:
document = generate_document()
test_unit(document)
save_new(document, TARGET)
def test_unit(document: Document) -> None:
# only for test suite:
if "ODFDO_TESTING" not in os.environ:
return
body = document.body
count = len([item for item in body.children if isinstance(item, DrawPage)])
assert count == len(CONTENT)
first_page = body.children[0]
assert str(first_page).strip() == CONTENT[0].strip()
last_page = body.children[-1]
assert str(last_page).strip() == CONTENT[-1].strip()
if __name__ == "__main__":
main()
Extract and reorder slides
Create a new presentation from a previous one by extracting some slides, in a different order.
recipes/extract_and_reorder_slides.py
#!/usr/bin/env python
"""Create a new presentation from a previous one by extracting some slides,
in a different order.
"""
from pathlib import Path
from odfdo import Document
_DOC_SEQUENCE = 290
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "presentation_extracted"
TARGET = "presentation.odp"
DATA = Path(__file__).parent / "data"
SOURCE = DATA / "presentation_base.odp"
def save_new(document: Document, name: str):
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
new_path = OUTPUT_DIR / name
print("Saving:", new_path)
document.save(new_path, pretty=True)
def main():
new_order = (3, 5, 2, 2)
presentation_base = Document(SOURCE)
extracted = Document("presentation")
# Important, copy styles too:
extracted.delete_styles()
extracted.merge_styles_from(presentation_base)
extracted.body.clear()
for index in new_order:
try:
slide_position = index - 1
slide = presentation_base.body.get_draw_page(position=slide_position)
except Exception: # noqa: S112
continue
if slide is None:
continue
slide = slide.clone
extracted.body.append(slide)
save_new(extracted, TARGET)
if __name__ == "__main__":
main()
Change values of a chart inside a document
Open a text document with an embedded chart and change some values.
recipes/change_values_of_a_chart_inside_a_document.py
#!/usr/bin/env python
"""Open a text document with an embedded chart and change some values."""
import os
import sys
from pathlib import Path
# for cell style
from odfdo import Document
_DOC_SEQUENCE = 295
DATA = Path(__file__).parent / "data"
SOURCE = "chart.odt"
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "modified_chart"
TARGET = "modified_chart.odt"
def read_source_document() -> Document:
"""Return the source Document."""
try:
source = sys.argv[1]
except IndexError:
source = DATA / SOURCE
return Document(source)
def save_new(document: Document, name: str) -> None:
"""Save a recipe result Document."""
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
new_path = OUTPUT_DIR / name
print("Saving:", new_path)
document.save(new_path, pretty=True)
def change(document: Document) -> None:
"""Change some values in the embedded chart table."""
# list the parts if needed
print(document.parts)
# -> ['mimetype', 'ObjectReplacements/Object 1', 'Object 1/meta.xml', 'Object 1/styles.xml', 'Object 1/content.xml', ...
part = document.get_part("Object 1/content.xml")
body = part.body
table = body.get_table(0)
# if needed, get the values:
values = table.get_values()
print(values)
# -> [
# [None, "", "Column 2", "Column 3"],
# ["Row 1", Decimal("NaN"), 10, 20],
# ["Row 2", Decimal("NaN"), 30, 40],
# ["Row 3", Decimal("NaN"), 50, 360],
# ["Row 4", Decimal("NaN"), Decimal("9.02"), Decimal("6.2")],
# ]
# change some values
table.set_value("A2", "label changed")
table.set_value("D3", 4000)
table.set_value("D4", 4321)
def main() -> None:
document = read_source_document()
change(document)
test_unit(document)
save_new(document, TARGET)
def test_unit(document: Document) -> None:
# only for test suite:
if "ODFDO_TESTING" not in os.environ:
return
part = document.get_part("Object 1/content.xml")
table = part.body.get_table(0)
assert table.get_value("A3") == "Row 2"
assert table.get_value("A2") == "label changed"
assert table.get_value("D3") == 4000
assert table.get_value("D4") == 4321
if __name__ == "__main__":
main()
Add text span styles
Transform a not styled document into a multi styled document, by changing size and color of each parts of words.
recipes/add_text_span_styles.py
#!/usr/bin/env python
"""Transform a not styled document into a multi styled document,
by changing size and color of each parts of words.
"""
import os
import sys
from itertools import chain
from pathlib import Path
from odfdo import Document, Style
_DOC_SEQUENCE = 300
DATA = Path(__file__).parent / "data"
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "styled3"
SOURCE = "dormeur_notstyled.odt"
TARGET = "dormeur_styled.odt"
RANDOM_SEED = 1234
def read_source_document() -> Document:
"""Return the source Document."""
try:
source = sys.argv[1]
except IndexError:
source = DATA / SOURCE
return Document(source)
def save_new(document: Document, name: str) -> None:
"""Save a recipe result Document."""
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
new_path = OUTPUT_DIR / name
print("Saving:", new_path)
document.save(new_path, pretty=True)
class SimpleRandom:
"""Q&D reproductible random generator for tests."""
MODULUS = 2**31 - 1
MAXI = 2**31 - 2
def __init__(self) -> None:
self.current = 16807
def _next_number(self) -> None:
self.current = (16807 * self.current) % self.MODULUS
def set_seed(self, seed: int = 16807) -> None:
self.current = seed
def randint(self, max_value: int) -> int:
self._next_number()
return int(self.current * max_value / self.MAXI + 1)
def color_hex(r: int, g: int, b: int) -> str:
"""Convert red, green, blue values to #rgb string."""
return f"#{r:02X}{g:02X}{b:02X}"
def style_name_index(index: int) -> str:
"""Generate a style_name."""
return f"rnd_{index}"
def generate_random_styles(document: Document, rnd: SimpleRandom) -> None:
"""Generate 64 random styles."""
for index in range(1, 64):
style = Style(
"text",
name=style_name_index(index),
color=color_hex(rnd.randint(256), rnd.randint(256), rnd.randint(256)),
size=f"{8 + index / 5}",
)
document.insert_style(style)
def add_styles(document: Document) -> None:
"""Change randomly size and color of words."""
rnd = SimpleRandom()
body = document.body
generate_random_styles(document, rnd)
words = sorted(set(str(body).split()))
for word in words:
style_name = style_name_index(rnd.randint(64))
for paragraph in chain(body.paragraphs, body.headers):
# apply style to each text matching with the regex of some word
paragraph.set_span(style_name, regex=word)
def main():
document = read_source_document()
add_styles(document)
test_unit(document)
save_new(document, TARGET)
def test_unit(document: Document) -> None:
# only for test suite:
if "ODFDO_TESTING" not in os.environ:
return
assert len(document.body.spans) == 157
if __name__ == "__main__":
main()
How to copy some style from another document
Minimal example of copy of a style from another document.
recipes/how_to_copy_some_style_from_another_document.py
"""Minimal example of copy of a style from another document.
"""
from odfdo import Document, Style
_DOC_SEQUENCE = 310
def main():
document = Document("text")
body = document.body
body.clear()
# Let's imagine the sample_styles.odt document contains an interesting style.
#
# So let's first fetch the style:
try:
odfdo_styles = Document("sample_styles.odt")
highlight = odfdo_styles.get_style("text", display_name="Yellow Highlight")
except Exception:
# let's create some *very simple* text style.
highlight = Style(
"text", display_name="Yellow Highlight", color="blue", italic=True
)
# We made some assumptions here:
#
# 'text' : The family of the style, text styles apply on
# individual characters.
# ”Yellow Highlight” : The name of the style as we see it in a desktop
# application.
# display_name : Styles have an internal name (“Yellow_20_Highlight”
# in this example) but we gave the display_name
# instead.
#
# We hopefully have a style object that we add to our own collection:
document.insert_style(highlight, automatic=True)
if __name__ == "__main__":
main()
Copy style from another document
Copy the styles from an existing document.
For more advanced version, see the odfdo-style script.
recipes/copy_style_from_another_document.py
#!/usr/bin/env python
"""Copy the styles from an existing document.
For more advanced version, see the odfdo-style script.
"""
import os
import sys
from pathlib import Path
from odfdo import Document
_DOC_SEQUENCE = 320
DATA = Path(__file__).parent / "data"
SOURCE = "collection2.odt"
# copied here from the odfdo package:
STYLE_SOURCE = DATA / "lpod_styles.odt"
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "styled1"
TARGET = "document.odt"
def read_source_document() -> Document:
"""Return the source Document."""
try:
source = sys.argv[1]
except IndexError:
source = DATA / SOURCE
return Document(source)
def save_new(document: Document, name: str) -> None:
"""Save a recipe result Document."""
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
new_path = OUTPUT_DIR / name
print("Saving:", new_path)
document.save(new_path, pretty=True)
def merge_styles(document: Document) -> None:
# We want to change the styles of collection2.odt,
# we know the odfdo_styles.odt document contains an interesting style,
# So let's first fetch the style:
style_document = Document(STYLE_SOURCE)
# We could change only some styles, but here we want a clean basis:
document.delete_styles()
# And now the actual style change:
document.merge_styles_from(style_document)
def main() -> None:
document = read_source_document()
merge_styles(document)
test_unit(document)
save_new(document, TARGET)
def test_unit(document: Document) -> None:
# only for test suite:
if "ODFDO_TESTING" not in os.environ:
return
assert len(document.get_styles()) == 75
if __name__ == "__main__":
main()
Create basic text styles
Create basic text styles.
recipes/create_basic_text_styles.py
#!/usr/bin/env python
"""Create basic text styles.
"""
import os
from pathlib import Path
from odfdo import Document, Header, Paragraph, Style
_DOC_SEQUENCE = 330
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "basic_styles"
TARGET = "document.odt"
def save_new(document: Document, name: str) -> None:
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
new_path = OUTPUT_DIR / name
print("Saving:", new_path)
document.save(new_path, pretty=True)
def _create_style_header_blue(document: Document) -> None:
style = Style(
family="paragraph",
name="header_blue",
display_name="header_blue",
parent_style="Heading",
area="text",
bold=True,
color="blue",
size="160%",
)
style.set_attribute("style:default-outline-level", "1")
document.insert_style(style)
def _create_style_header_navy(document: Document) -> None:
style = Style(
family="paragraph",
name="header_navy",
display_name="header_navy",
parent_style="Heading",
area="text",
bold=True,
color="navy",
size="120%",
)
style.set_attribute("style:default-outline-level", "2")
document.insert_style(style)
def _create_style_steel(document: Document) -> None:
style = Style(
family="paragraph",
area="text",
name="steel",
display_name="steel",
color="yellow",
background_color="darkblue",
)
style.set_properties(
area="graphic",
properties={
"draw:fill": "solid",
"draw:fill-color": "darkblue",
},
)
document.insert_style(style)
def _create_style_special(document: Document) -> None:
style = Style(
family="paragraph",
area="text",
name="special",
display_name="special",
font="Courier New",
font_family="Courier New",
font_style_name="Regular",
font_pitch="fixed",
background_color="AntiqueWhite",
)
style.set_properties(
area="paragraph",
properties={
"fo:margin-left": "2cm",
"fo:margin-right": "2cm",
"fo:line-height": "150%",
"fo:text-align": "center",
},
)
document.insert_style(style)
def _create_style_bold_gold(document: Document) -> None:
style = Style(
family="text",
name="bold_gold",
display_name="bold_gold",
bold=True,
color="darkgoldenrod",
)
document.insert_style(style)
def _create_style_italic_lime(document: Document) -> None:
style = Style(
family="text",
name="italic_lime",
display_name="italic_lime",
italic=True,
size="120%",
color="lime",
)
document.insert_style(style)
def add_styles(document: Document) -> None:
_create_style_header_blue(document)
_create_style_header_navy(document)
_create_style_steel(document)
_create_style_special(document)
_create_style_bold_gold(document)
_create_style_italic_lime(document)
def add_content(document: Document) -> None:
body = document.body
body.append(Header(1, "First level header", style="header_blue"))
body.append(Header(2, "First sub header", style="header_navy"))
para = Paragraph(
"Lorem ipsum dolor sit amet, consectetuer "
"adipiscing elit. Sed non risus. "
"Suspendisse lectus tortor, dignissim sit amet, "
"adipiscing nec, ultricies sed, dolor."
)
para.set_span("bold_gold", regex="dolor")
para.set_span("italic_lime", regex=r"\w+ing")
body.append(para)
body.append(Header(2, "Second sub header", style="header_navy"))
para = Paragraph(
"Cras elementum ultrices diam. Maecenas ligula massa, "
"varius a, semper congue, euismod non, mi. Proin porttitor, "
"orci nec nonummy molestie, enim est eleifend mi, non "
"fermentum diam nisl sit amet erat. Duis semper.",
style="steel",
)
para.set_span("italic_lime", regex="semper")
body.append(para)
body.append(Header(2, "Third sub header", style="header_navy"))
para = Paragraph(
"Duis arcu massa, scelerisque vitae, consequat in, pretium a, "
"enim. Pellentesque congue. Ut in risus volutpat libero "
"pharetra tempor. Cras vestibulum bibendum augue. Praesent "
"egestas leo in pede. Praesent blandit odio eu enim. "
"Pellentesque sed dui ut augue blandit sodales.",
style="special",
)
body.append(para)
def create_document() -> Document:
document = Document()
body = document.body
body.clear()
add_styles(document)
add_content(document)
return document
def main() -> None:
document = create_document()
test_unit(document)
save_new(document, TARGET)
def test_unit(document: Document) -> None:
# only for test suite:
if "ODFDO_TESTING" not in os.environ:
return
style1 = document.get_style("paragraph", "header_blue").serialize()
assert 'name="header_blue"' in style1
assert 'color="#0000FF"' in style1
assert 'font-weight="bold"' in style1
assert 'font-size="160%"' in style1
style2 = document.get_style("paragraph", "header_navy").serialize()
assert 'name="header_navy"' in style2
assert 'color="#000080"' in style2
assert 'font-weight="bold"' in style2
assert 'font-size="120%"' in style2
style3 = document.get_style("paragraph", "steel").serialize()
assert 'name="steel"' in style3
assert 'color="#FFFF00"' in style3
assert "graphic-properties" in style3
assert 'draw:fill-color="#00008B"' in style3
style4 = document.get_style("paragraph", "special").serialize()
assert 'name="special"' in style4
assert 'background-color="#FAEBD7"' in style4
assert "Courier" in style4
assert 'line-height="150%"' in style4
assert 'margin-left="2cm"' in style4
assert 'margin-right="2cm"' in style4
assert 'text-align="center"' in style4
style5 = document.get_style("text", "bold_gold").serialize()
assert 'name="bold_gold"' in style5
assert 'color="#B8860B"' in style5
assert 'font-weight="bold"' in style5
style6 = document.get_style("text", "italic_lime").serialize()
assert 'name="italic_lime"' in style6
assert 'color="#00FF00"' in style6
assert 'font-style="italic"' in style6
assert 'font-size="120%"' in style6
if __name__ == "__main__":
main()
How to apply a style to a paragraph
Minimal example of how to add a styled paragraph to a document.
recipes/how_to_apply_a_style_to_a_paragraph.py
"""Minimal example of how to add a styled paragraph to a document.
"""
from odfdo import Document, Paragraph
_DOC_SEQUENCE = 335
def main():
document = Document("text")
body = document.body
body.clear()
# we knwo we have a style of name "highlight" :
body.append(Paragraph("Highlighting the word", style="highlight"))
if __name__ == "__main__":
main()
Change paragraph styles methods
Many examples of how to change paragraph (and in-paragraph) styles, either by changing the paragraph style itself or by using Span to select parts of the paragraph. Includes several ways to create or import styles.
recipes/change_paragraph_styles_methods.py
#!/usr/bin/env python
"""Many examples of how to change paragraph (and in-paragraph) styles, either
by changing the paragraph style itself or by using Span to select parts
of the paragraph. Includes several ways to create or import styles.
"""
import os
from collections.abc import Iterator
from itertools import cycle
from pathlib import Path
from odfdo import Document, Element, Header, Paragraph, Style
_DOC_SEQUENCE = 340
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "change_styles"
DATA = Path(__file__).parent / "data"
LOREM = (DATA / "lorem.txt").read_text(encoding="utf8")
STYLED_SOURCE = "lpod_styles.odt"
TARGET_BEFORE = "document_before.odt"
TARGET_AFTER = "document_after.odt"
def save_new(document: Document, name: str) -> None:
"""Save a recipe result Document."""
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
new_path = OUTPUT_DIR / name
print("Saving:", new_path)
document.save(new_path, pretty=True)
def iter_lorem() -> Iterator[str]:
"""Return infinite iterator on Lorem Ipsum content."""
return cycle(lr.strip() for lr in LOREM.replace("\n", " ").split("."))
def make_base_document() -> Document:
"""Generate document from parts of lorem ipsum content."""
document = Document("odt")
body = document.body
body.clear()
# Add some content with headers
lorem = iter_lorem()
title1 = Header(1, next(lorem))
body.append(title1)
for _i in range(3):
title = Header(2, next(lorem))
body.append(title)
for _j in range(5):
body.append(Paragraph(next(lorem) + ". " + next(lorem) + "."))
return document
def add_some_styles(document) -> None:
"""Add programmatically generated styles to the document."""
# Always simpler to copy styles from an actual .odt existing file, but:
document.insert_style(
Style(
family="paragraph",
area="text",
display_name="bold-blue",
color="blue",
bold=True,
),
automatic=True,
)
document.insert_style(
Style(
family="paragraph",
area="text",
display_name="italic-red",
color="red",
bold=True,
italic=True,
),
automatic=True,
)
document.insert_style(
Style(
family="text",
area="text",
display_name="green",
background_color="green",
),
automatic=True,
)
document.insert_style(
Style(
family="text",
area="text",
display_name="bold-yellow-blue",
color="yellow",
background_color="blue",
bold=True,
),
automatic=True,
)
document.insert_style(
Style(
family="text",
area="text",
display_name="bold-white-black",
color="white",
background_color="black",
bold=True,
),
automatic=True,
)
document.insert_style(
Style(
family="text",
area="text",
display_name="italic-red-yellow",
color="red",
background_color="yellow",
bold=True,
italic=True,
),
automatic=True,
)
def add_style_from_xml(document: Document) -> None:
"""Add styles defined by XML content to the document."""
# Styles can be defined by WML definition
document.insert_style(
Element.from_tag(
'<style:style style:name="custom" '
'style:display-name="custom" '
'style:family="paragraph" '
'style:parent-style-name="Text">'
'<style:paragraph-properties fo:margin-left="2cm"/>'
'<style:text-properties fo:color="#808080" loext:opacity="100%" '
'fo:font-size="16pt" fo:font-style="normal" '
'style:text-underline-style="solid" '
'style:text-underline-width="auto" '
'style:text-underline-color="font-color" '
'fo:font-weight="bold"/>'
"</style:style>"
)
)
def import_style_from_other_doc(document: Document) -> None:
"""Add styles imported from another document to the document."""
styled_doc = Document(DATA / STYLED_SOURCE)
highlight = styled_doc.get_style("text", display_name="Yellow Highlight")
document.insert_style(highlight, automatic=True)
def apply_styles(document: Document) -> None:
"""Apply some style changes to the document."""
def change_all_headers() -> None:
style = document.get_style(family="text", display_name="green")
# header styles should include some hints about he numeration level
# So, here we just prefer to apply style with a span
for header in document.body.headers:
header.set_span(style.name, offset=0)
def change_all_paragraphs() -> None:
style = document.get_style(family="paragraph", display_name="bold-blue")
for para in document.body.paragraphs:
para.style = style.name
def change_some_paragraph() -> None:
style = document.get_style(family="paragraph", display_name="italic-red")
document.body.get_paragraph(3).style = style.name
document.body.get_paragraph(5).style = style.name
document.body.get_paragraph(7).style = style.name
def apply_span_regex() -> None:
yellow = document.get_style(family="text", display_name="bold-yellow-blue")
white = document.get_style(family="text", display_name="bold-white-black")
for para in document.body.paragraphs:
para.set_span(yellow.name, regex=r"tortor|ipsum")
para.set_span(white.name, regex=r"A\w+")
def apply_span_offset() -> None:
red = document.get_style(family="text", display_name="italic-red-yellow")
para = document.body.get_paragraph(2)
para.set_span(red.name, offset=9, length=22)
def apply_custom_style() -> None:
para = document.body.get_paragraph(13)
para.style = "custom"
def apply_imported_style() -> None:
para = document.body.get_paragraph(14)
style = document.get_style(family="text", display_name="Yellow Highlight")
# feature: to not highlight spaces, make as many Spans as required:
for start, end in para.search_all(r"\w+"):
length = end - start
para.set_span(style.name, offset=start, length=length)
change_all_headers()
change_all_paragraphs()
change_some_paragraph()
apply_span_regex()
apply_span_offset()
apply_custom_style()
apply_imported_style()
def main() -> None:
document = make_base_document()
save_new(document, TARGET_BEFORE)
add_some_styles(document)
add_style_from_xml(document)
import_style_from_other_doc(document)
apply_styles(document)
test_unit(document)
save_new(document, TARGET_AFTER)
def test_unit(document: Document) -> None:
# only for test suite:
if "ODFDO_TESTING" not in os.environ:
return
assert len(list(document.body.paragraphs)) == 15
for display_name in (
"bold-blue",
"italic-red",
"custom",
):
style = document.get_style(family="paragraph", display_name=display_name)
assert document.get_styled_elements(style.name)
for display_name in (
"green",
"bold-yellow-blue",
"bold-white-black",
"Yellow Highlight",
):
style = document.get_style(family="text", display_name=display_name)
assert document.get_styled_elements(style.name)
style = document.get_style(family="text", display_name="Yellow Highlight")
assert len(document.get_styled_elements(style.name)) == 21
if __name__ == "__main__":
main()
Delete parts of a text document
Idea comming from issue #49: Deleting content from one point to another in a .odt document.
recipes/delete_parts_of_a_text_document.py
#!/usr/bin/env python
"""Idea comming from issue #49:
Deleting content from one point to another in a .odt document.
"""
import os
from pathlib import Path
from odfdo import Document, Element, Header, Paragraph
_DOC_SEQUENCE = 400
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "delete_content"
TARGET_INITIAL = "document_initial.odt"
TARGET_FINAL = "document_final.odt"
class KeepingState:
def __init__(self, initial: str):
self.step = initial
def save_new(document: Document, name: str):
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
new_path = OUTPUT_DIR / name
print("Saving:", new_path)
document.save(new_path, pretty=True)
def create_base_document():
document = Document("text")
body = document.body
body.clear()
body.append(Header(1, "Some title"))
body.append(Header(2, "part A"))
body.append(
Paragraph(
"Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Sed non risus."
)
)
body.append(
Paragraph(
"Suspendisse lectus tortor, dignissim sit amet, adipiscing "
"nec, ultricies sed, dolor. Cras elementum ultrices diam. "
"Maecenas ligula massa, varius a, semper congue, euismod non, mi."
)
)
body.append(Header(2, "part B"))
body.append(
Paragraph(
"Proin porttitor, orci nec nonummy molestie, enim est eleifend "
"mi, non fermentum diam nisl sit amet erat. Duis semper. "
"Duis arcu massa, scelerisque vitae, consequat in, pretium a, "
"enim. Pellentesque congue. Ut in risus volutpat libero pharetra tempor."
)
)
body.append(
Paragraph(
"Cras vestibulum bibendum augue. Praesent egestas leo in pede. "
"Praesent blandit odio eu enim. Pellentesque sed dui ut augue "
"blandit sodales. Vestibulum ante ipsum primis in faucibus orci "
"luctus et ultrices posuere cubilia Curae; Aliquam nibh."
)
)
body.append(Header(2, "part C"))
body.append(
Paragraph(
"Mauris ac mauris sed pede pellentesque fermentum. "
"Maecenas adipiscing ante non diam sodales hendrerit. Ut "
"velit mauris, egestas sed, gravida nec, ornare ut, mi."
)
)
body.append(
Paragraph(
"Aenean ut orci vel massa suscipit pulvinar. Nulla sollicitudin. "
"Fusce varius, ligula non tempus aliquam, nunc turpis "
"ullamcorper nibh, in tempus sapien eros vitae ligula. "
"Pellentesque rhoncus nunc et augue. Integer id felis. Curabitur "
"aliquet pellentesque diam. Integer quis metus vitae elit "
"lobortis egestas."
)
)
body.append(Header(2, "part D"))
body.append(
Paragraph(
"Morbi vel erat non mauris convallis vehicula. Nulla et sapien. "
"Integer tortor tellus, aliquam faucibus, convallis id, congue "
"eu, quam. Mauris ullamcorper felis vitae erat."
"Proin feugiat, augue non elementum posuere, metus purus "
"iaculis lectus, et tristique ligula justo vitae magna. Aliquam "
"convallis sollicitudin purus."
)
)
body.append(
Paragraph(
"Praesent aliquam, enim at fermentum mollis, ligula massa "
"adipiscing nisl, ac euismod nibh nisl eu lectus. Fusce "
"vulputate sem at sapien. Vivamus leo. Aliquam euismod "
"libero eu enim. Nulla nec felis sed leo placerat imperdiet."
)
)
body.append(
Paragraph(
"Aenean suscipit nulla in justo. Suspendisse cursus rutrum augue. "
"Nulla tincidunt tincidunt mi. Curabitur iaculis, lorem vel "
"rhoncus faucibus, felis magna fermentum augue, et ultricies "
"lacus lorem varius purus. Curabitur eu amet."
)
)
return document
def keep_element(state: KeepingState, elem: Element) -> bool:
# keep everythin until "part B"
if state.step == "before":
if isinstance(elem, Header) and "part B" in str(elem):
state.step = "deleting"
# delete everythin until paragraph strating with "Aenean"
if state.step == "deleting":
if isinstance(elem, Paragraph) and str(elem).startswith("Aenean"):
state.step = "after"
return state.step != "deleting"
def main():
document = create_base_document()
save_new(document, TARGET_INITIAL)
# Removing part B and half the part C
state = KeepingState("before")
keep_list = []
for elem in document.body.children:
if keep_element(state, elem):
keep_list.append(elem)
document.body.clear()
document.body.extend(keep_list)
save_new(document, TARGET_FINAL)
test_unit(document)
def test_unit(document: Document) -> None:
# only for test suite:
if "ODFDO_TESTING" not in os.environ:
return
text0 = str(document.body.get_paragraph(position=0))
print(text0)
assert text0.startswith("Lorem")
text1 = str(document.body.get_paragraph(position=3))
print(text1)
assert text1.startswith("Morbi")
if __name__ == "__main__":
main()
Create color chart in spreadsheet
Create some color chart in a spreadsheet using cells styles. (adapted from the odfdo library test cases)
recipes/create_color_chart_in_spreadsheet.py
#!/usr/bin/env python
"""Create some color chart in a spreadsheet using cells styles.
(adapted from the odfdo library test cases)
"""
from pathlib import Path
from odfdo import (
Cell,
Document,
Row,
Style,
Table,
__version__,
create_table_cell_style,
make_table_cell_border_string,
rgb2hex,
)
_DOC_SEQUENCE = 420
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "chart"
TARGET = "color_chart.ods"
def save_new(document: Document, name: str):
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
new_path = OUTPUT_DIR / name
print("Saving:", new_path)
document.save(new_path)
def hello_messages():
print("odfdo installation test")
print(f" Version : {__version__}")
print()
print(f"Generating color chart in {TARGET}")
print("...")
def generate_chart():
document = Document("spreadsheet")
body = document.body
body.clear()
table = Table("chart")
for y in range(0, 256, 8):
row = Row()
for x in range(0, 256, 32):
cell_value = (x, y, (x + y) % 256)
border_rl = make_table_cell_border_string(thick="0.20cm", color="white")
border_bt = make_table_cell_border_string(
thick="0.80cm",
color="white",
)
style = create_table_cell_style(
color="grey",
background_color=cell_value,
border_right=border_rl,
border_left=border_rl,
border_bottom=border_bt,
border_top=border_bt,
)
name = document.insert_style(style=style, automatic=True)
cell = Cell(value=rgb2hex(cell_value), style=name)
row.append_cell(cell)
table.append_row(row)
row_style = Style("table-row", height="1.80cm")
name_style_row = document.insert_style(style=row_style, automatic=True)
for row in table.rows:
row.style = name_style_row
table.set_row(row.y, row)
col_style = Style("table-column", width="3.6cm")
name = document.insert_style(style=col_style, automatic=True)
for column in table.columns:
column.style = col_style
table.set_column(column.x, column)
body.append(table)
return document
def main():
hello_messages()
document = generate_chart()
save_new(document, TARGET)
if __name__ == "__main__":
main()
Get cell background color
Read the background color of a table cell.
recipes/get_cell_background_color.py
#!/usr/bin/env python
"""Read the background color of a table cell.
"""
import os
import sys
from pathlib import Path
from odfdo import Document
_DOC_SEQUENCE = 440
DATA = Path(__file__).parent / "data"
SOURCE = "cell_color.ods"
def read_source_document():
try:
source = sys.argv[1]
except IndexError:
source = DATA / SOURCE
return Document(source)
def main():
doc = read_source_document()
# reading color from the table 0 (first sheet)
color_b2 = doc.get_cell_background_color(0, "b2")
print("Color for B2", color_b2)
color_b3 = doc.get_cell_background_color(0, "b3")
print("Color for B3", color_b3)
color_c3 = doc.get_cell_background_color(0, "c3")
print("Color for C3", color_c3)
# default is "#ffffff"
color_d3 = doc.get_cell_background_color(0, "d3")
print("Color for D3", color_d3)
# set another default
color_e3 = doc.get_cell_background_color(0, "e3", "#123456")
print("Color for e3", color_e3)
# read very far cell
color_far = doc.get_cell_background_color(0, (1000, 10000))
print("Color for far", color_far)
# only for test suite:
if "ODFDO_TESTING" not in os.environ:
return
assert color_b2 == "#2a6099"
assert color_b3 == "#ff4000"
assert color_c3 == "#ffff00"
assert color_d3 == "#ffffff"
assert color_e3 == "#123456"
assert color_far == "#ffffff"
if __name__ == "__main__":
main()
Extract a sub table from some big table
Create a table of 1000 lines and 100 columns, extract a sub table of 100 lines 26 columns, save the result in a spreadsheet document.
recipes/extract_a_sub_table_from_some_big_table.py
#!/usr/bin/env python
"""Create a table of 1000 lines and 100 columns, extract a sub table
of 100 lines 26 columns, save the result in a spreadsheet document.
"""
import os
from pathlib import Path
from odfdo import Document, Row, Table
_DOC_SEQUENCE = 450
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "extract_table"
TARGET = "document.ods"
def save_new(document: Document, name: str):
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
new_path = OUTPUT_DIR / name
print("Saving:", new_path)
document.save(new_path, pretty=True)
def syracuse(n: int) -> int:
if n % 2 == 0:
return n // 2
return 3 * n + 1
def generate_big_table(table_name) -> Document:
spreadsheet = Document("spreadsheet")
body = spreadsheet.body
body.clear()
table = Table(table_name)
body.append(table)
lines = 1000
cols = 100
for line in range(lines):
row = Row()
values = []
n = line
for _i in range(cols):
values.append(n)
n = syracuse(n)
row.set_values(values)
table.append(row)
return spreadsheet
def main():
table_name = "Big Table"
spreadsheet = generate_big_table(table_name)
body = spreadsheet.body
big_table = body.get_table(name=table_name)
print("Size of Big Table :", big_table.size)
# now extract 100 rows of 26 columns :
table1 = Table("Extract 1")
for r in range(800, 900):
row = big_table.get_row(r)
extracted_values = [row.get_value(x) for x in range(50, 76)]
new_row = Row()
new_row.set_values(extracted_values)
table1.append(new_row)
body.append(table1)
print("Size of extracted table 1 :", table1.size)
# other method
table2 = Table("Extract 2")
cells = big_table.get_cells(coord=(50, 800, 75, 899))
table2.set_cells(coord=(0, 0), cells=cells)
body.append(table2)
print("Size of extracted table 2 :", table2.size)
test_unit(spreadsheet)
save_new(spreadsheet, TARGET)
_expected_result = """
Size of Big Table : (100, 1000)
Size of extracted table 1 : (26, 100)
Size of extracted table 2 : (26, 100)
"""
def test_unit(spreadsheet: Document) -> None:
# only for test suite:
if "ODFDO_TESTING" not in os.environ:
return
body = spreadsheet.body
table1 = body.get_table(position=0)
assert table1.size == (100, 1000)
table2 = body.get_table(position=1)
assert table2.size == (26, 100)
if __name__ == "__main__":
main()
Make a basic spreadsheet
Create a spreadsheet with one table and a few data, strip the table and compute the table size.
recipes/make_a_basic_spreadsheet.py
#!/usr/bin/env python
"""Create a spreadsheet with one table and a few data, strip the table
and compute the table size.
"""
from pathlib import Path
from odfdo import Document, Table
_DOC_SEQUENCE = 460
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "basic_ods"
TARGET = "spreadsheet.ods"
def save_new(document: Document, name: str):
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
new_path = OUTPUT_DIR / name
print("Saving:", new_path)
document.save(new_path, pretty=True)
def main():
document = generate_document()
save_new(document, TARGET)
def generate_document():
# creating an empty spreadsheet document:
document = Document("spreadsheet")
# Each sheet of a spreadsheet is a table:
# setting drom the beginning width (columns) and height (rows)
# is not mandatory, but a good practice, since odfdo don't check
# actual existence of cells
body = document.body
body.clear()
table = Table("First Table", width=20, height=3)
body.append(table)
# A table contains rows, we can append some more.
for _ in range(2):
table.append_row()
print("rows in the table (3+2):", len(table.rows))
# A row contains cells
for row in table.rows:
print("row, nb of cells ", row.y, len(row.cells))
last_row = table.get_row(-1)
print("nb of cells of the last row:", len(last_row.cells))
# cell can have different kind of values
for row_nb in range(3):
for col_nb in range(10):
table.set_value((col_nb, row_nb), f"cell {col_nb} {row_nb}")
for row_nb in range(3, 5):
for col_nb in range(10):
table.set_value((col_nb, row_nb), col_nb * 100 + row_nb)
# Before saving the document, we can strip the unused colums:
print("table size:", table.size)
table.rstrip()
print("table size after strip:", table.size)
print("nb of cells of the last row:", len(table.get_row(-1).cells))
print("Content of the table:")
print(table.to_csv())
return document
if __name__ == "__main__":
main()
Make spreadsheet with named ranges
Create a spreadsheet with two tables, using some named ranges.
recipes/make_spreadsheet_with_named_ranges.py
#!/usr/bin/env python
"""Create a spreadsheet with two tables, using some named ranges.
"""
from pathlib import Path
from odfdo import Document, Table
_DOC_SEQUENCE = 470
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "named_range"
TARGET = "spreadsheet.ods"
def save_new(document: Document, name: str):
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
new_path = OUTPUT_DIR / name
print("Saving:", new_path)
document.save(new_path, pretty=True)
def main():
document = generate_document()
save_new(document, TARGET)
def generate_document():
document = Document("spreadsheet")
body = document.body
body.clear()
table = Table("First Table")
body.append(table)
# populate the table :
for index in range(10):
table.set_value((1, index), (index + 1) ** 2)
table.set_value("A11", "Total:")
# lets define a named range for the 10 values :
range_squares = "B1:B10"
name = "squares_values"
table_name = table.name
table.set_named_range(name, range_squares, table_name)
# we can define a single cell range, using notation "B11" or (1, 10) :
table.set_named_range("total", (1, 10), table_name)
# get named range values :
values = table.get_named_range("squares_values").get_values(flat=True)
# set named range value :
result = sum(values)
table.get_named_range("total").set_value(result)
# lets use the named ranges from a second table :
table2 = Table("Second Table")
body.append(table2)
named_range1 = table2.get_named_range("total")
table2.set_value("A1", "name:")
table2.set_value("B1", named_range1.name)
table2.set_value("A2", "range:")
table2.set_value("B2", str(named_range1.crange))
table2.set_value("A3", "from table:")
table2.set_value("B3", named_range1.table_name)
table2.set_value("A4", "content:")
table2.set_value("B4", named_range1.get_value())
named_range2 = table2.get_named_range("squares_values")
table2.set_value("D1", "name:")
table2.set_value("E1", named_range2.name)
table2.set_value("D2", "range:")
table2.set_value("E2", str(named_range2.crange))
table2.set_value("D3", "from table:")
table2.set_value("E3", named_range2.table_name)
table2.set_value("D4", "content:")
# using "E4:4" notaion is a little hack for the area starting at E4 on row 4
table2.set_values(values=[named_range2.get_values(flat=True)], coord="E4:4")
print("Content of the table1:")
print(table.name)
print(table.to_csv())
print(table2.name)
print(table2.to_csv())
# of course named ranges are stored in the document :
return document
if __name__ == "__main__":
main()
Introspecting elements
Demo of quick introspecting of a document’s elements.
recipes/introspecting_elements.py
#!/usr/bin/env python
"""Demo of quick introspecting of a document's elements.
"""
import sys
from pathlib import Path
from odfdo import Document
_DOC_SEQUENCE = 480
DATA = Path(__file__).parent / "data"
# ODF export of Wikipedia article Hitchhiker's Guide to the Galaxy (CC-By-SA) :
SOURCE = "collection2.odt"
def read_source_document():
try:
source = sys.argv[1]
except IndexError:
source = DATA / SOURCE
return Document(source)
def main():
document = read_source_document()
# The body object is an XML element from which we can access one or several
# other elements we are looking for.
body = document.body
# Should you be lost, remember elements are part of an XML tree:
para = body.get_paragraph(position=42)
print("Children of the praragraph:\n ", para.children)
print("\nParent of the paragraph:\n ", para.parent)
# And you can introspect any element as serialized XML:
link0 = body.get_link(position=0)
print("\nContent of the serialization link:")
print(" ", link0.serialize())
print("\nWhich is different from the text content of the link:")
print(" ", str(link0))
if __name__ == "__main__":
main()
Show meta data
Print the metadata informations of a ODF file.
recipes/show_meta_data.py
#!/usr/bin/env python
"""Print the metadata informations of a ODF file.
"""
import sys
from pathlib import Path
from odfdo import Document
_DOC_SEQUENCE = 490
DATA = Path(__file__).parent / "data"
SOURCE = "collection2.odt"
def read_source_document():
try:
source = sys.argv[1]
except IndexError:
source = DATA / SOURCE
return Document(source)
def main():
document = read_source_document()
# Metadata are accessible through the meta part:
# meta = document.get_part("meta.xml")
# or the shortcut:
meta = document.meta
# You then get access to various getters and setters. The getters return
# Python types and the respective setters take the same Python type as
# a parameter.
#
# Here are the output of the get_xxx methods for metadata.
# (Notice that odfdo doesn't increment editing cycles nor statistics
# when saving the document.
# For the metadata using dates or durations, lpOD provides datatypes that
# decode from and serialize back to strings.
# Strings are always decoded as unicode, numeric values are always decoded
# as Decimal (as they offer the best precision).
print(f"Meta data of {document.container.path}")
# print("Title :", meta.get_title())
print("Title :", meta.title)
# print("creator :", meta.get_creator())
print("creator :", meta.creator)
# print("creation date :", meta.get_creation_date())
print("creation date :", meta.creation_date)
# print("modification date :", meta.get_modification_date())
print("modification date :", meta.date)
# print("initial creator :", meta.get_initial_creator())
print("initial creator :", meta.initial_creator)
# print("subject :", meta.get_subject())
print("subject :", meta.subject)
# print("description :", meta.get_description())
print("description :", meta.description)
# print("editing cycles :", meta.get_editing_cycles())
print("editing cycles :", meta.editing_cycles)
# print("editing duration :", meta.get_editing_duration())
print("editing duration :", meta.editing_duration)
# print("generator :", meta.get_generator())
print("generator :", meta.generator)
# print("language :", meta.get_language())
print("language :", meta.language)
print("keywords :", meta.keyword)
print("statistics ")
if meta.statistic is not None:
for key, value in meta.statistic.items():
print(f" {key[5:]:<18}: {value}")
user_defined = meta.user_defined_metadata
if user_defined:
print("user defined metadata")
for key, value in user_defined.items():
print(f" {key[5:]:<18}: {value}")
# A quick way to have all of those informations:
print("-" * 70)
print(document.get_formated_meta())
if __name__ == "__main__":
main()
Move link to footnote
Remove all links from a document, transforming each link information (URL, text) into a footnote. Of course, removing links already inside notes, just keeping plain text URL. (Side note: most office suite dislike notes in notes)
recipes/move_link_to_footnote.py
#!/usr/bin/env python
"""Remove all links from a document, transforming each link information (URL,
text) into a footnote. Of course, removing links already inside notes, just
keeping plain text URL. (Side note: most office suite dislike notes in notes)
"""
import sys
from pathlib import Path
from odfdo import Document
_DOC_SEQUENCE = 500
DATA = Path(__file__).parent / "data"
SOURCE = "collection2.odt"
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "footnote1"
TARGET = "document.odt"
def save_new(document: Document, name: str):
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
new_path = OUTPUT_DIR / name
print("Saving:", new_path)
document.save(new_path, pretty=True)
def remove_links(element):
tag = "text:a"
keep_inside_tag = "None"
context = (tag, keep_inside_tag, False)
element, _is_modified = _tree_remove_tag(element, context)
def main():
try:
source = Path(sys.argv[1])
except IndexError:
source = DATA / SOURCE
document = Document(str(source))
body = document.body
print("Moving links to footnotes from", source)
print("links occurrences:", len(body.get_links()))
print("footnotes occurences:", len(body.get_notes()))
counter_links_in_notes = 0
for note in body.get_notes():
for link in note.get_links():
counter_links_in_notes += 1
url = link.get_attribute("xlink:href")
tail = link.tail
new_tail = f" (link: {url}) {tail}"
link.tail = new_tail
remove_links(note)
print("links in notes:", counter_links_in_notes)
counter_added_note = 0 # added notes counter
for paragraph in body.paragraphs:
for link in paragraph.get_links():
url = link.get_attribute("xlink:href")
text = link.inner_text
counter_added_note += 1
paragraph.insert_note(
after=link, # citation is inserted after current link
note_id=f"my_note_{counter_added_note}",
citation="1", # The symbol the user sees to follow the footnote.
# The footnote itself, at the end of the page:
body=(f". {text}, link: {url}"),
)
remove_links(paragraph)
print("links occurrences:", len(body.get_links()))
print("footnotes occurences:", len(body.get_notes()))
save_new(document, TARGET)
def _tree_remove_tag(element, context):
"""Remove tag in the element, recursive.
- context: tuple (tag to remove, protection tag, protection flag)
where protection tag protect from change sub elements one sub
level depth"""
buffer = element.clone
modified = False
sub_elements = []
tag, keep_inside_tag, protected = context
if keep_inside_tag and element.tag == keep_inside_tag:
protect_below = True
else:
protect_below = False
for child in buffer.children:
striped, is_modified = _tree_remove_tag(
child, (tag, keep_inside_tag, protect_below)
)
if is_modified:
modified = True
if isinstance(striped, list):
for item in striped:
sub_elements.append(item)
else:
sub_elements.append(striped)
if not protected and element.tag == tag:
element = []
modified = True
else:
if not modified:
# no change in element sub tree, no change on element
return (element, False)
element.clear()
try:
for key, value in buffer.attributes.items():
element.set_attribute(key, value)
except ValueError:
print("Incorrect attribute in", buffer)
text = buffer.text
tail = buffer.tail
if text is not None:
element.append(text)
for child in sub_elements:
element.append(child)
if tail is not None:
if isinstance(element, list):
element.append(tail)
else:
element.tail = tail
return (element, True)
if __name__ == "__main__":
main()
Remove http links
Remove the links (the text:a tag), keeping the inner text.
recipes/remove_http_links.py
#!/usr/bin/env python
"""Remove the links (the text:a tag), keeping the inner text."""
import sys
from pathlib import Path
from odfdo import Document
_DOC_SEQUENCE = 510
DATA = Path(__file__).parent / "data"
SOURCE = "collection2.odt"
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "nolink"
TARGET = "document.odt"
def save_new(document: Document, name: str):
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
new_path = OUTPUT_DIR / name
print("Saving:", new_path)
document.save(new_path, pretty=True)
def main():
try:
source = Path(sys.argv[1])
except IndexError:
source = DATA / SOURCE
document = Document(str(source))
body = document.body
print("Removing links from", source)
print("'text:a' occurrences:", len(body.get_links()))
remove_links(body)
print("'text:a' occurrences after removal:", len(body.get_links()))
save_new(document, TARGET)
def remove_links(element):
tag = "text:a"
keep_inside_tag = "None"
context = (tag, keep_inside_tag, False)
element, _is_modified = _tree_remove_tag(element, context)
def _tree_remove_tag(element, context):
"""Remove tag in the element, recursive.
- context: a tuple (tag to remove, protection tag, protection flag)
where protection tag protect from change sub elements one sub level depth
"""
buffer = element.clone
modified = False
sub_elements = []
tag, keep_inside_tag, protected = context
if keep_inside_tag and element.tag == keep_inside_tag:
protect_below = True
else:
protect_below = False
for child in buffer.children:
striped, is_modified = _tree_remove_tag(
child, (tag, keep_inside_tag, protect_below)
)
if is_modified:
modified = True
if isinstance(striped, list):
for item in striped:
sub_elements.append(item)
else:
sub_elements.append(striped)
if not protected and element.tag == tag:
element = []
modified = True
else:
if not modified:
# no change in element sub tree, no change on element
return (element, False)
element.clear()
try:
for key, value in buffer.attributes.items():
element.set_attribute(key, value)
except ValueError:
print("Bad attribute in", buffer)
text = buffer.text
tail = buffer.tail
if text is not None:
element.append(text)
for child in sub_elements:
element.append(child)
if tail is not None:
if isinstance(element, list):
element.append(tail)
else:
element.tail = tail
return (element, True)
if __name__ == "__main__":
main()
Remove span styles
Remove span styles (like some words in bold in a paragraph), except in titles.
recipes/remove_span_styles.py
#!/usr/bin/env python
"""Remove span styles (like some words in bold in a paragraph),
except in titles.
"""
import os
import sys
from pathlib import Path
from odfdo import Body, Document, Element
_DOC_SEQUENCE = 520
DATA = Path(__file__).parent / "data"
SOURCE = "dormeur.odt"
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "nostyle"
TARGET = "document.odt"
def read_source_document() -> Document:
"""Return the source Document."""
try:
source = sys.argv[1]
except IndexError:
source = DATA / SOURCE
return Document(source)
def save_new(document: Document, name: str) -> None:
"""Save a recipe result Document."""
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
new_path = OUTPUT_DIR / name
print("Saving:", new_path)
document.save(new_path, pretty=True)
def remove_text_span(body: Body) -> None:
"""Remove span styles from an Element, except in titles."""
tag = "text:span"
keep_inside_tag = "text:h"
context = (tag, keep_inside_tag, False)
body, _is_modified = _tree_remove_tag(body, context)
def _tree_remove_tag(element: Element, context: tuple) -> Element:
"""Send back a copy of the element, without span styles. Element should be
either paragraph or heading.
- context: a tuple (tag to remove, protection tag, protection flag)
where protection tag protects from change any sub elements one level depth
"""
buffer = element.clone
modified = False
sub_elements = []
tag, keep_inside_tag, protected = context
if keep_inside_tag and element.tag == keep_inside_tag:
protect_below = True
else:
protect_below = False
for child in buffer.children:
striped, is_modified = _tree_remove_tag(
child, (tag, keep_inside_tag, protect_below)
)
if is_modified:
modified = True
if isinstance(striped, list):
for item in striped:
sub_elements.append(item)
else:
sub_elements.append(striped)
if not protected and element.tag == tag:
element = []
modified = True
else:
if not modified:
# no change in element sub tree, no change on element
return (element, False)
element.clear()
try:
for key, value in buffer.attributes.items():
element.set_attribute(key, value)
except ValueError:
print("Bad attribute in", buffer)
text = buffer.text
tail = buffer.tail
if text is not None:
element.append(text)
for child in sub_elements:
element.append(child)
if tail is not None:
if isinstance(element, list):
element.append(tail)
else:
element.tail = tail
return (element, True)
def clean_document(document: Document) -> None:
"""Remove span styles from a Document."""
body = document.body
print("'text:span' occurrences:", len(body.spans))
remove_text_span(body)
print("'text:span' occurrences after removal:", len(body.spans))
def main() -> None:
document = read_source_document()
clean_document(document)
test_unit(document)
save_new(document, TARGET)
def test_unit(document: Document) -> None:
# only for test suite:
if "ODFDO_TESTING" not in os.environ:
return
assert len(document.body.spans) == 1
if __name__ == "__main__":
main()
Retrieve all pictures from odf files
Analyse a list of files and directory (recurse), open all ODF documents and copy pictures from documents in a directory.
recipes/retrieve_all_pictures_from_ODF_files.py
#!/usr/bin/env python
"""Analyse a list of files and directory (recurse), open all ODF documents
and copy pictures from documents in a directory.
"""
import sys
import time
from hashlib import sha256
from pathlib import Path
from odfdo import Document
_DOC_SEQUENCE = 530
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "collected_pics"
DATA = Path(__file__).parent / "data"
# encoding = "UTF8"
known_images = set()
counter_image = 0
counter_odf = 0
counter_outside = 0
def store_image(path, name, content):
"""Image new name is "odffile_imagename"."""
global counter_image
base = path.name.replace(".", "_")
cpt = 1
if not OUTPUT_DIR.is_dir():
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
target = OUTPUT_DIR / f"{base}_{cpt}_{name}"
while target.exists():
cpt += 1
target = OUTPUT_DIR / f"{base}_{cpt}_{name}"
target.write_bytes(content)
counter_image += 1
def parse_odf_pics(path: Path):
"""Using odfdo for:
- open possible ODF document: Document (including URI)
- find images inside the document: get_image_list, get_attribute
"""
if not path.suffix.lower().startswith(".od"):
return
try:
document = Document(path)
except Exception:
return
global counter_odf
global counter_outside
counter_odf += 1
for image in document.body.images:
image_url = image.url
if not image_url:
continue
try:
image_content = document.get_part(image_url)
except KeyError:
print("- not found inside document:", path)
print(" image URL:", image_url)
counter_outside += 1
continue
image_name = image_url.split("/")[-1]
if not known_pic(image_content):
store_image(path, image_name, image_content)
def known_pic(content) -> bool:
"""Remember already seen images by sha256 footprint."""
footprint = sha256(content).digest()
if footprint in known_images:
return True
known_images.add(footprint)
return False
def analyse_document(source):
for path in source.glob("**/*"):
if path.is_file():
parse_odf_pics(path)
def main():
try:
source = sys.argv[1]
except IndexError:
source = DATA
t0 = time.time()
analyse_document(Path(source))
elapsed = time.time() - t0
print(
f"{counter_image} images copied ({counter_outside} not found) from "
f"{counter_odf} ODF files to {OUTPUT_DIR} in {elapsed:.2f}sec."
)
if __name__ == "__main__":
main()
Read document from bytesio
Read a document from BytesIO.
recipes/read_document_from_bytesio.py
#!/usr/bin/env python
"""Read a document from BytesIO.
"""
import io
from pathlib import Path
from odfdo import Document
_DOC_SEQUENCE = 600
DATA = Path(__file__).parent / "data"
SOURCE = "lorem.odt"
def main():
file_path = DATA / SOURCE
with io.BytesIO() as bytes_content:
# read the file in the BytesIO (or read from some network)
bytes_content.write(file_path.read_bytes())
# Create the odfdo.Document from the BytesIO
bytes_content.seek(0)
document = Document(bytes_content)
# check :
if document.body.search("Lorem ipsum dolor sit amet") is None:
raise ValueError("string not found")
if __name__ == "__main__":
main()
Save document as bytesio
Save a document as BytesIO.
recipes/save_document_as_bytesio.py
#!/usr/bin/env python
"""Save a document as BytesIO.
"""
import io
from pathlib import Path
from odfdo import Document, Paragraph
_DOC_SEQUENCE = 605
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "bytes"
TARGET = "document.odt"
def make_document():
document = Document("text")
body = document.body
paragraph = Paragraph("Hello World")
body.append(paragraph)
return document
def main():
document = make_document()
with io.BytesIO() as bytes_content:
document.save(bytes_content)
# Now use the BytesIO in some way:
# In a netwotk context, typically:
# response.write(bytes_content.getvalue())
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
with open(OUTPUT_DIR / TARGET, "wb") as file:
file.write(bytes_content.getvalue())
if __name__ == "__main__":
main()
Export tables to csv format
Export tables to CSV format.
recipes/export_tables_to_csv_format.py
#!/usr/bin/env python
"""Export tables to CSV format."""
import os
import sys
from pathlib import Path
from odfdo import Document
_DOC_SEQUENCE = 610
DATA = Path(__file__).parent / "data"
SOURCE = "two_sheets.ods"
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "csv"
def read_source_document() -> Document:
"""Return the source Document."""
try:
source = sys.argv[1]
except IndexError:
source = DATA / SOURCE
return Document(source)
def export_tables_to_csv(document: Document) -> None:
"""Export tables to CSV format."""
for index, table in enumerate(document.body.tables):
# default parameters produce an "excell" CSV format,
# see Python csv library for options.
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
path = OUTPUT_DIR / f"content_{index}.csv"
table.to_csv(path)
def main() -> None:
document = read_source_document()
export_tables_to_csv(document)
test_unit(document)
def test_unit(document: Document) -> None:
# only for test suite:
if "ODFDO_TESTING" not in os.environ:
return
content0 = document.body.tables[0].to_csv()
expected0 = "col A,col B,col C\r\n1,2,3\r\na text,,another\r\n"
assert content0 == expected0
content1 = document.body.tables[1].to_csv()
expected1 = ",,,\r\n,col B,col C,col D\r\n,1,2,3\r\n,a text,,another\r\n"
assert content1 == expected1
if __name__ == "__main__":
main()
Import csv content into a table
Import CSV content into a table.
recipes/import_csv_content_into_a_table.py
#!/usr/bin/env python
"""Import CSV content into a table."""
import os
import sys
from pathlib import Path
from odfdo import Document, Table
_DOC_SEQUENCE = 615
DATA = Path(__file__).parent / "data"
SOURCE = "some_csv.csv"
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "csv2"
TARGET = "document.ods"
def save_new(document: Document, name: str) -> None:
"""Save a recipe result Document."""
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
new_path = OUTPUT_DIR / name
print("Saving:", new_path)
document.save(new_path, pretty=True)
def read_text_document() -> str:
"""Return the source text file."""
try:
source = sys.argv[1]
except IndexError:
source = DATA / SOURCE
return Path(source).read_text()
def import_csv() -> Document:
"""Return a document containing an imported CSV content."""
content = read_text_document()
document = Document("ods")
table = Table.from_csv(content, "Sheet name")
document.body.clear()
document.body.append(table)
return document
def main() -> None:
document = import_csv()
test_unit(document)
save_new(document, TARGET)
def test_unit(document: Document) -> None:
# only for test suite:
if "ODFDO_TESTING" not in os.environ:
return
table = document.body.get_table(0)
assert table.name == "Sheet name"
expected = ",,,\r\n,col B,col C,col D\r\n,1,2,3\r\n,a text,,another\r\n"
assert table.to_csv() == expected
if __name__ == "__main__":
main()
Search and replace words
Search and replace words in a text document.
recipes/search_and_replace_words.py
#!/usr/bin/env python
"""Search and replace words in a text document.
"""
from pathlib import Path
from odfdo import Document
_DOC_SEQUENCE = 700
DATA = Path(__file__).parent / "data"
SOURCE = "lorem.odt"
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "replaced_text"
TARGET = "lorem_replaced.odt"
def save_new(document: Document, name: str):
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
new_path = OUTPUT_DIR / name
print("Saving:", new_path)
document.save(new_path, pretty=True)
def search_replace(document):
body = document.body
# replace a string in the full document
body.replace("Lorem", "(Lorem replaced)")
# replace in paragraphs only
for paragraph in body.paragraphs:
paragraph.replace("ipsum", "(ipsum in paragraph)")
# replace in headers
for header in body.headers:
header.replace("ipsum", "(ipsum in header)")
# pattern is a regular expression
body.replace(r"\S+lit ", "(...lit) ")
body.replace(r"pul[a-z]+", "(pulvinar)")
def main():
document = Document(DATA / SOURCE)
search_replace(document)
save_new(document, TARGET)
if __name__ == "__main__":
main()
Spreadsheet with words frequency from a text
Load an ODF text, store the frequency of words in a spreadsheet, make requests on the table, by regex or value.
recipes/spreadsheet_with_words_frequency_from_a_text.py
#!/usr/bin/env python
"""Load an ODF text, store the frequency of words in a spreadsheet,
make requests on the table, by regex or value.
"""
import sys
from pathlib import Path
from odfdo import Document, Table
_DOC_SEQUENCE = 710
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "freq"
SOURCE = "collection2.odt"
DATA = Path(__file__).parent / "data"
TARGET = "frequency.ods"
def save_new(document: Document, name: str):
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
new_path = OUTPUT_DIR / name
print("Saving:", new_path)
document.save(new_path, pretty=True)
def read_source_document():
try:
source = sys.argv[1]
except IndexError:
source = DATA / SOURCE
return Document(source)
def main():
document = generate_document()
save_new(document, TARGET)
_expected_result = """
Word frequency analysis of collection2.odt
Nb of words: 9128
Unique words found: 2337
Rows in the table : 2337
Words corresponding to the regex: ^the
word: the occurences: 644
word: they occurences: 15
word: their occurences: 11
word: then occurences: 10
word: there occurences: 7
word: these occurences: 4
word: them occurences: 4
word: themselves occurences: 2
word: theme occurences: 2
word: themed occurences: 1
word: theatrical occurences: 1
List of words of frequency 15: two, they, release, one, its, his, film,
episodes, but, adaptation, UK, Radio, J, 0
"""
def frequence_count(document):
print("Word frequency analysis of", Path(document.container.path).name)
text = str(document.body)
for char in "():;!.,[]{}#@/\\=-_+*#@`\"'":
text = text.replace(char, " ") # slow algorithm
words = text.split()
print("Nb of words:", len(words))
frequences = {}
for word in words:
frequences[word] = frequences.get(word, 0) + 1
print("Unique words found:", len(frequences))
return frequences
def generate_document():
document_source = read_source_document()
spreadsheet = Document("spreadsheet")
frequences = frequence_count(document_source)
# Populate the table in the spreadsheet
body = spreadsheet.body
body.clear()
table = Table("Frequency Table")
body.append(table)
sorted_keys = reversed([(value, key) for key, value in frequences.items()])
# one solution :
# for value, key in sorted:
# row = Row()
# row.set_value(0, key)
# row.set_value(1, value) # Cell type is guessed.
# table.append_row(row)
# another solution :
sorted_keys = [(k, v) for (v, k) in sorted_keys]
table.set_values(sorted_keys)
print("Rows in the table :", len(table.rows))
# frequency of word:
regex_query = "^the"
print("Words corresponding to the regex:", regex_query)
result = table.get_rows(content=regex_query)
for row in result:
print(f" word: {row.get_value(0):<20} occurences: {row.get_value(1)}")
# list of words of frequecy = 15
found = []
for word, freq in table.iter_values():
if freq == 15:
found.append(word)
print("List of words of frequency 15:", ", ".join(found))
return spreadsheet
if __name__ == "__main__":
main()
Transpose table
Transpose a table. Create a spreadsheet table (example: 50 rows and 20 columns), and subsequently create a new table in a separate sheet where the columns and rows are now swapped (e.g. 20 rows and 50 columns).
recipes/transpose_table.py
#!/usr/bin/env python
"""Transpose a table. Create a spreadsheet table (example: 50 rows and 20
columns), and subsequently create a new table in a separate sheet where the
columns and rows are now swapped (e.g. 20 rows and 50 columns).
"""
from pathlib import Path
from odfdo import Document, Row, Table
_DOC_SEQUENCE = 800
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "transpose"
TARGET = "transposed.ods"
def save_new(document: Document, name: str):
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
new_path = OUTPUT_DIR / name
print("Saving:", new_path)
document.save(new_path, pretty=True)
def main():
document = generate_document()
save_new(document, TARGET)
def generate_document():
spreadsheet = Document("spreadsheet")
# Populate the table in the spreadsheet
body = spreadsheet.body
body.clear()
table = Table("Table")
body.append(table)
lines = 50
cols = 20
for line in range(lines):
row = Row()
for column in range(cols):
row.set_value(column, f"{chr(65 + column)}{line + 1}")
table.append(row)
print("Size of Table :", table.size)
table2 = Table("Symetry")
# building the symetric table using classical method :
for x in range(cols):
values = table.get_column_values(x)
table2.set_row_values(x, values)
body.append(table2)
print("Size of symetric table 2 :", table2.size)
# a more simple solution with the table.transpose() method :
table3 = table.clone
table3.transpose()
table3.name = "Transpose"
body.append(table3)
print("Size of symetric table 3 :", table3.size)
return spreadsheet
if __name__ == "__main__":
main()