Skip to content

Recipes

Recipes source code is in the /recipes directory of odfdo sources. Most recipes are autonomous scripts doing actual modifications of ODF sample files, you can check the results in the recipes/recipes_output directory.

How to write hello world in a text document

Create a basic spreadsheet with “Hello World” in the first cell.

recipes/how_to_write_hello_world_in_a_text_document.py
#!/usr/bin/env python
"""Create a basic spreadsheet with "Hello World" in the first cell.
"""
import os
from pathlib import Path

from odfdo import Document, Paragraph

_DOC_SEQUENCE = 3
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "basic_hello"
TARGET = "document.odt"


def save_new(document: Document, name: str):
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    new_path = OUTPUT_DIR / name
    print("Saving:", new_path)
    document.save(new_path, pretty=True)


def main():
    document = Document("text")
    body = document.body
    body.clear()
    paragraph = Paragraph("Hello World")
    body.append(paragraph)

    test_unit(document)
    save_new(document, TARGET)


def test_unit(document: Document) -> None:
    # only for test suite:
    if "ODFDO_TESTING" not in os.environ:
        return
    text = str(document.body)
    print(text)
    assert text == "Hello World\n"


if __name__ == "__main__":
    main()

How to write hello world in a spreadsheet document

Create a basic spreadsheet with “Hello World” in the first cell.

recipes/how_to_write_hello_world_in_a_spreadsheet_document.py
#!/usr/bin/env python
"""Create a basic spreadsheet with "Hello World" in the first cell.
"""
import os
from pathlib import Path

from odfdo import Document, Table

_DOC_SEQUENCE = 5
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "basic_ods"
TARGET = "document.ods"


def save_new(document: Document, name: str):
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    new_path = OUTPUT_DIR / name
    print("Saving:", new_path)
    document.save(new_path, pretty=True)


def main():
    document = Document("spreadsheet")
    body = document.body
    body.clear()

    table = Table("Empty Table")
    table.set_value("A1", "Hello World")
    body.append(table)
    test_unit(document)
    save_new(document, TARGET)


def test_unit(document: Document) -> None:
    # only for test suite:
    if "ODFDO_TESTING" not in os.environ:
        return
    text = document.body.get_table(0).get_cell((0, 0)).value.strip()
    print(text)
    assert text == "Hello World"


if __name__ == "__main__":
    main()

Basic presentation hello world

Write a basic “Hello World” in the middle of the first page of a presentation.

recipes/basic_presentation_hello_world.py
#!/usr/bin/env python
"""Write a basic "Hello World" in the middle of the first page
of a presentation.
"""

import os
from pathlib import Path

from odfdo import Document, DrawPage, Frame

_DOC_SEQUENCE = 7
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "basic_odp"
TARGET = "hello.odp"


def save_new(document: Document, name: str) -> None:
    """Save a recipe result Document."""
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    new_path = OUTPUT_DIR / name
    print("Saving:", new_path)
    document.save(new_path, pretty=True)


def add_text_frame(document: Document, text: str) -> None:
    """Add a text frame to an empty presentation."""
    body = document.body
    body.clear()

    page = DrawPage("page1", name="Page 1")
    text_frame = Frame.text_frame(
        text,
        size=("7cm", "5cm"),
        position=("11cm", "8cm"),
        style="Standard",
        text_style="Standard",
    )
    page.append(text_frame)
    body.append(page)


def main() -> None:
    document = Document("presentation")
    add_text_frame(document, "Hello world!")
    test_unit(document)
    save_new(document, TARGET)


def test_unit(document: Document) -> None:
    # only for test suite:
    if "ODFDO_TESTING" not in os.environ:
        return

    frames = document.body.get_frames()
    assert len(frames) == 1
    assert str(frames[0]).strip() == "Hello world!"


if __name__ == "__main__":
    main()

Create a basic text document

Create a basic text document with headers and praragraphs.

recipes/create_a_basic_text_document.py
#!/usr/bin/env python
"""Create a basic text document with headers and praragraphs.
"""
import os
from pathlib import Path

from odfdo import Document, Header, Paragraph

_DOC_SEQUENCE = 10
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "basic_text"
TARGET = "document.odt"


def save_new(document: Document, name: str):
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    new_path = OUTPUT_DIR / name
    print("Saving:", new_path)
    document.save(new_path, pretty=True)


def create_basic_document():
    document = Document("text")
    body = document.body
    body.clear()
    body.append(Header(1, "De la Guerre des Gaules - Livre V"))
    body.append(Header(2, "Préparatifs d'expédition en Bretagne"))
    body.append(
        Paragraph(
            "Sous le consulat de Lucius Domitius et d'Appius Claudius, "
            "César, quittant les quartiers d'hiver pour aller en Italie, "
            "comme il avait coutume de le faire chaque année, ordonne aux "
            "lieutenants qu'il laissait à la tête des légions de construire, "
            "pendant l'hiver, le plus de vaisseaux qu'il serait possible, "
            "et de réparer les anciens."
        )
    )
    body.append(Header(2, "La Bretagne"))
    body.append(
        Paragraph(
            "Cette île est de forme triangulaire ; l'un des côtés regarde "
            "la Gaule. Des deux angles de ce côté, l'un est au levant, "
            "vers le pays de Cantium, où abordent presque tous les vaisseaux "
            "gaulois ; l'autre, plus bas, est au midi. La longueur de ce côté "
            "est d'environ cinq cent mille pas. "
        )
    )
    return document


def main():
    document = create_basic_document()
    test_unit(document)
    save_new(document, TARGET)


def test_unit(document: Document) -> None:
    # only for test suite:
    if "ODFDO_TESTING" not in os.environ:
        return
    text = str(document.body.get_paragraph(position=1))
    print(text)
    assert text.startswith("Cette île est de forme triangulaire")


if __name__ == "__main__":
    main()

How to add a paragraph to a text document

Minimal example of how to add a paragraph.

recipes/how_to_add_a_paragraph_to_a_text_document.py
"""Minimal example of how to add a paragraph.
"""

from odfdo import Document, Paragraph

_DOC_SEQUENCE = 12


def main():
    document = Document("text")
    body = document.body

    # create a new paragraph with some content :
    paragraph = Paragraph("Hello World")
    body.append(paragraph)


if __name__ == "__main__":
    main()

Create a basic text document with a list

Create a basic text document with a list.

recipes/create_a_basic_text_document_with_a_list.py
#!/usr/bin/env python
"""Create a basic text document with a list.
"""
import os
from pathlib import Path

from odfdo import Document, List, ListItem

_DOC_SEQUENCE = 20
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "basic_list"
TARGET = "document.odt"


def save_new(document: Document, name: str):
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    new_path = OUTPUT_DIR / name
    print("Saving:", new_path)
    document.save(new_path, pretty=True)


def main():
    document = generate_document()
    test_unit(document)
    save_new(document, TARGET)


def generate_document():
    # Create the document
    document = Document("text")
    body = document.body

    # Adding List
    my_list = List(["Arthur", "Ford", "Trillian"])
    # The list accepts a Python list of strings and list items.

    # The list can be written even though we will modify it afterwards:
    body.append(my_list)

    # Adding more List Item to the list
    item = ListItem("Marvin")
    my_list.append_item(item)

    # it should contain:
    print(document.get_formatted_text())
    # - Arthur
    # - Ford
    # - Trillian
    # - Marvin

    return document


def test_unit(document: Document) -> None:
    # only for test suite:
    if "ODFDO_TESTING" not in os.environ:
        return
    assert str(document).strip() == "- Arthur\n- Ford\n- Trillian\n- Marvin"


if __name__ == "__main__":
    main()

Create a basic text document with list and sublists

Create a basic text document with list and sublists.

recipes/create_a_basic_text_document_with_list_and_sublists.py
#!/usr/bin/env python
"""Create a basic text document with list and sublists.
"""
import os
from pathlib import Path

from odfdo import Document, List, ListItem

_DOC_SEQUENCE = 25
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "basic_sublist"
TARGET = "document.odt"


def save_new(document: Document, name: str):
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    new_path = OUTPUT_DIR / name
    print("Saving:", new_path)
    document.save(new_path, pretty=True)


def main():
    document = generate_document()
    test_unit(document)
    save_new(document, TARGET)


def generate_document():
    document = Document("text")
    body = document.body

    # Adding List
    name_list = List(["Arthur", "Ford", "Trillian"])
    item = ListItem("Marvin")
    name_list.append_item(item)
    body.append(name_list)

    # Adding Sublist¶
    # A sublist is simply a list as an item of another list:
    item.append(List(["Paranoid Android", "older than the universe"]))

    # See the result:
    print(document.get_formatted_text())
    # - Arthur
    # - Ford
    # - Trillian
    # - Marvin
    #   - Paranoid Android
    #   - older than the universe

    # Inserting List Item
    # In case your forgot to insert an item:
    name_list.insert_item("some dolphins", position=1)

    # Or you can insert it before another item:
    marvin = name_list.get_item(content="Marvin")
    name_list.insert_item("Zaphod", before=marvin)
    # Or after:
    name_list.insert_item("and many others", after=marvin)

    # See the result:
    print(document.get_formatted_text())
    # - Arthur
    # - some dolphins
    # - Ford
    # - Trillian
    # - Zaphod
    # - Marvin
    #   - Paranoid Android
    #   - older than the universe
    # - and many others
    #

    return document


def test_unit(document: Document) -> None:
    # only for test suite:
    if "ODFDO_TESTING" not in os.environ:
        return
    assert document.get_formatted_text().strip() == (
        "- Arthur\n"
        "- some dolphins\n"
        "- Ford\n"
        "- Trillian\n"
        "- Zaphod\n"
        "- Marvin\n"
        "  \n"
        "  - Paranoid Android\n"
        "  - older than the universe\n"
        "- and many others"
    )


if __name__ == "__main__":
    main()

How to add a sublist to a list

Minimal example of how to add a paragraph.

recipes/how_to_add_a_sublist_to_a_list.py
"""Minimal example of how to add a paragraph.
"""

from odfdo import Document, List, ListItem

_DOC_SEQUENCE = 27


def main():
    document = Document("text")
    body = document.body

    my_list = List(["chocolat", "café"])
    body.append(my_list)

    item = ListItem("thé")
    my_list.append(item)

    # A sublist is simply a list as an item of another list
    item.append(List(["thé vert", "thé rouge"]))

    print(body.serialize(True))


if __name__ == "__main__":
    main()

How to insert a new item within a list

Minimal example of how to insert a new item within a list.

recipes/how_to_insert_a_new_item_within_a_list.py
"""Minimal example of how to insert a new item within a list.
"""

from odfdo import List

_DOC_SEQUENCE = 28


def main():

    a_list = List(["chocolat", "café"])

    # In case your forgot to insert an important item:
    a_list.insert_item("Chicorée", position=1)

    # Or you can insert it before another item:
    cafe = a_list.get_item(content="café")
    a_list.insert_item("Chicorée", before=cafe)

    # Or after:
    a_list.insert_item("Chicorée", after=cafe)


if __name__ == "__main__":
    main()

How to add an item to a list

Minimal example of how to add an item to a list.

recipes/how_to_add_an_item_to_a_list.py
"""Minimal example of how to add an item to a list.
"""

from odfdo import List, ListItem

_DOC_SEQUENCE = 28


def main():
    a_list = List(["chocolat", "café"])
    item = ListItem("thé")
    a_list.append(item)


if __name__ == "__main__":
    main()

Get text content from odt file

Read the text content from an .odt file.

recipes/get_text_content_from_odt_file.py
#!/usr/bin/env python
"""Read the text content from an .odt file."""

import os
import sys
from pathlib import Path

from odfdo import Document

_DOC_SEQUENCE = 30
DATA = Path(__file__).parent / "data"
# ODF export of Wikipedia article Hitchhiker's Guide to the Galaxy (CC-By-SA) :
SOURCE = "collection2.odt"


def read_source_document() -> Document:
    """Return the source Document."""
    try:
        source = sys.argv[1]
    except IndexError:
        source = DATA / SOURCE
    return Document(source)


def read_text_content(document: Document) -> str:
    # just verify what type of document it is:
    print("Type of document:", document.get_type())
    # A quick way to get the text content:
    text = document.get_formatted_text()

    print("Size :", len(text))

    # Let's show the beginning :
    print(text[:320])

    return text


def main() -> None:
    document = read_source_document()
    text = read_text_content(document)
    test_unit(text)


def test_unit(text: str) -> None:
    # only for test suite:
    if "ODFDO_TESTING" not in os.environ:
        return

    assert len(text) == 56828


if __name__ == "__main__":
    main()

Create a basic text document with a table of content

Create a basic text document with a table of content.

recipes/create_a_basic_text_document_with_a_table_of_content.py
#!/usr/bin/env python
"""Create a basic text document with a table of content.
"""
import os
from pathlib import Path

from odfdo import TOC, Document, Header, Paragraph

_DOC_SEQUENCE = 35
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "basic_toc"
DATA = Path(__file__).parent / "data"
LOREM = (DATA / "lorem.txt").read_text(encoding="utf8")
TARGET = "document.odt"


def save_new(document: Document, name: str):
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    new_path = OUTPUT_DIR / name
    print("Saving:", new_path)
    document.save(new_path, pretty=True)


def main():
    document = Document("text")
    make_toc(document)
    save_new(document, TARGET)


def make_toc(document):
    # Create the document
    body = document.body

    # Create the Table Of Content
    toc = TOC()
    # Changing the default "Table Of Content" Title :
    toc.title = "My Table of Content"

    # Do not forget to add the component to the document:
    body.append(toc)

    # Add some content with headers
    title1 = Header(1, LOREM[:70])
    body.append(title1)
    for idx in range(3):
        title = Header(2, LOREM[idx * 5 : 70 + idx * 5])
        body.append(title)
        paragraph = Paragraph(LOREM)
        body.append(paragraph)

    # Beware, update the TOC with the actual content. If not done there,
    # the reader will need to "update the table of content" later.
    toc.fill()

    # only for test suite:
    if "ODFDO_TESTING" in os.environ:
        assert str(toc).split("\n")[2] == (
            "1.1. Lorem ipsum dolor sit amet, consectetuer "
            "adipiscing elit. Sed non risu"
        )


if __name__ == "__main__":
    main()

How to add a table of content to a document

Adding a table of content to an existing text document.

recipes/how_to_add_a_table_of_content_to_a_document.py
#!/usr/bin/env python
"""Adding a table of content to an existing text document.
"""
from pathlib import Path

from odfdo import TOC, Document, Paragraph, Style

_DOC_SEQUENCE = 37
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "add_toc"
TARGET = "document.odt"
DATA = Path(__file__).parent / "data"
SOURCE = DATA / "collection.odt"


def save_new(document: Document, name: str):
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    new_path = OUTPUT_DIR / name
    print("Saving:", new_path)
    document.save(new_path, pretty=True)


def main():
    document = Document(SOURCE)
    body = document.body

    # here is a way to insert a page break:
    page_break_style = Style("paragraph", name="page_break")
    page_break_style.set_properties({"fo:break-before": "page"})
    document.insert_style(page_break_style)
    empty_paragraph = Paragraph("", style="page_break")
    body.insert(empty_paragraph, 0)

    # The TOC element comes from the toc module
    toc = TOC()
    # to put the TOC at the end, just do:
    # body.append(toc)
    body.insert(toc, 0)
    # fill the toc with current content of document:
    toc.fill()

    save_new(document, TARGET)


if __name__ == "__main__":
    main()

Update a text document with a table of content

Update the table of contents of a document.

recipes/update_a_text_document_with_a_table_of_content.py
#!/usr/bin/env python
"""Update the table of contents of a document.
"""

from pathlib import Path

from odfdo import Document, Header, Paragraph

_DOC_SEQUENCE = 38
DATA = Path(__file__).parent / "data"
SOURCE = "doc_with_toc.odt"
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "modified_toc"
TARGET = "document.odt"


def save_new(document: Document, name: str) -> None:
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    new_path = OUTPUT_DIR / name
    print("Saving:", new_path)
    document.save(new_path, pretty=True)


def main() -> None:
    document = Document(DATA / SOURCE)
    update_toc(document)
    save_new(document, TARGET)


def update_toc(document: Document) -> None:
    check_toc_v1(document)
    add_some_header(document)
    check_toc_v2(document)
    change_toc_title(document)
    check_toc_v3(document)
    change_toc_title_to_empty(document)
    check_toc_v4(document)
    remove_second_header_1b(document)
    check_toc_v5(document)
    add_toc_title(document)
    check_toc_v6(document)


def check_toc_v1(document: Document) -> None:
    toc = document.body.toc
    content = str(toc).split("\n")
    assert len(content) == 5
    assert content[0].startswith("Table of Contents")
    assert content[1].startswith("1. Lorem 1")
    assert content[2].startswith("1.1. Lorem 1A")
    assert content[3].startswith("1.2. Lorem 1B")
    assert content[4].startswith("1.3. Lorem 1C")


def add_some_header(document: Document) -> None:
    header = Header(1, "New header")
    document.body.append(header)
    document.body.append(Paragraph("Some text after the new header."))
    # update the table of contents
    toc = document.body.toc
    toc.fill(document)


def check_toc_v2(document: Document) -> None:
    toc = document.body.toc
    content = str(toc).split("\n")
    assert len(content) == 6
    assert content[0].startswith("Table of Contents")
    assert content[1].startswith("1. Lorem 1")
    assert content[2].startswith("1.1. Lorem 1A")
    assert content[3].startswith("1.2. Lorem 1B")
    assert content[4].startswith("1.3. Lorem 1C")
    assert content[5].startswith("2. New header")


def change_toc_title(document: Document) -> None:
    toc = document.body.toc
    toc.set_toc_title("Another title")
    toc.fill(document)


def check_toc_v3(document: Document) -> None:
    toc = document.body.toc
    content = str(toc).split("\n")
    assert len(content) == 6
    assert content[0].startswith("Another title")


def change_toc_title_to_empty(document: Document) -> None:
    toc = document.body.toc
    toc.set_toc_title("")  # that will remove the title
    toc.fill(document)


def check_toc_v4(document: Document) -> None:
    toc = document.body.toc
    content = str(toc).split("\n")
    assert len(content) == 5
    assert content[0].startswith("1. Lorem 1")
    assert content[1].startswith("1.1. Lorem 1A")
    assert content[2].startswith("1.2. Lorem 1B")
    assert content[3].startswith("1.3. Lorem 1C")
    assert content[4].startswith("2. New header")


def remove_second_header_1b(document: Document) -> None:
    # find second header:
    header = document.body.get_header(position=2)
    # this 'header' variable is attached to the document, so
    # deleting will remove the element from the document
    header.delete()

    toc = document.body.toc
    toc.fill(document)


def check_toc_v5(document: Document) -> None:
    toc = document.body.toc
    content = str(toc).split("\n")
    assert len(content) == 4
    assert content[0].startswith("1. Lorem 1")
    assert content[1].startswith("1.1. Lorem 1A")
    assert content[2].startswith("1.2. Lorem 1C")
    assert content[3].startswith("2. New header")


def add_toc_title(document: Document) -> None:
    toc = document.body.toc
    toc.set_toc_title("A new title")
    toc.fill(document)


def check_toc_v6(document: Document) -> None:
    toc = document.body.toc
    content = str(toc).split("\n")
    assert len(content) == 5
    assert content[0].startswith("A new title")
    assert content[1].startswith("1. Lorem 1")
    assert content[2].startswith("1.1. Lorem 1A")
    assert content[3].startswith("1.2. Lorem 1C")
    assert content[4].startswith("2. New header")


if __name__ == "__main__":
    main()

Create a basic text document with annotations

Create a basic text document with annotations.

recipes/create_a_basic_text_document_with_annotations.py
#!/usr/bin/env python
"""Create a basic text document with annotations.
"""
import os
from pathlib import Path

from odfdo import Document, Header, Paragraph

_DOC_SEQUENCE = 40
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "basic_annotations"
DATA = Path(__file__).parent / "data"
LOREM = (DATA / "lorem.txt").read_text(encoding="utf8")
TARGET = "document.odt"


def save_new(document: Document, name: str):
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    new_path = OUTPUT_DIR / name
    print("Saving:", new_path)
    document.save(new_path, pretty=True)


def main():
    document = Document("text")
    make_annotations(document)
    test_unit(document)
    save_new(document, TARGET)


def make_annotations(document):
    body = document.body
    title1 = Header(1, "Main title")
    body.append(title1)
    for index in range(3):
        title = Header(2, f"title {index}")
        body.append(title)
        paragraph = Paragraph(LOREM[:240])

        # Adding Annotation
        # Annotations are notes that don't appear in the document but
        # typically on a side bar in a desktop application. So they are not printed.

        # Now we add some annotation on each paragraph
        some_word = str(paragraph).split()[3]
        # choosing the 4th word of the paragraph to insert the note

        paragraph.insert_annotation(
            after=some_word,  # The word after what the annotation is inserted.
            body="It's so easy!",  # The annotation itself, at the end of the page.
            creator="Bob",  # The author of the annotation.
            # date= xxx              A datetime value, by default datetime.now().
        )

        body.append(paragraph)


def test_unit(document: Document) -> None:
    # only for test suite:
    if "ODFDO_TESTING" not in os.environ:
        return
    assert len(document.body.get_annotations(creator="Bob")) == 3


if __name__ == "__main__":
    main()

Create a basic text document with footnotes

Create a basic text document with footnotes.

recipes/create_a_basic_text_document_with_footnotes.py
#!/usr/bin/env python
"""Create a basic text document with footnotes.
"""
import os
from pathlib import Path

from odfdo import Document, Header, Paragraph

_DOC_SEQUENCE = 45
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "basic_footnotes"
DATA = Path(__file__).parent / "data"
LOREM = (DATA / "lorem.txt").read_text(encoding="utf8")
TARGET = "document.odt"


def save_new(document: Document, name: str):
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    new_path = OUTPUT_DIR / name
    print("Saving:", new_path)
    document.save(new_path, pretty=True)


def main():
    document = Document("text")
    make_footnotes(document)
    test_unit(document)
    save_new(document, TARGET)


def make_footnotes(document):
    body = document.body

    # Add content (See Create_a_basic_document.py)
    title1 = Header(1, "Main title")
    body.append(title1)
    for index in range(3):
        title = Header(2, f"title {index}")
        body.append(title)
        paragraph = Paragraph(LOREM[:240])

        # Adding Footnote
        # Now we add a footnote on each paragraph
        # Notes are quite complex so they deserve a dedicated API on paragraphs:
        some_word = str(paragraph).split()[3]
        # choosing the 4th word of the paragraph to insert the note
        paragraph.insert_note(
            after=some_word,  # The word after what the “¹” citation is inserted.
            note_id=f"note{index}",  # The unique identifier of the note in the document.
            citation="1",  # The symbol the user sees to follow the footnote.
            body=(
                f'Author{index}, A. (2007). "How to cite references", Sample Editions.'
                # The footnote itself, at the end of the page.
            ),
        )

        body.append(paragraph)


def test_unit(document: Document) -> None:
    # only for test suite:
    if "ODFDO_TESTING" not in os.environ:
        return
    assert len(document.body.get_notes()) == 3


if __name__ == "__main__":
    main()

How to add footnote to a text document

Minimal example of how to add an footnote to a text document.

recipes/how_to_add_footnote_to_a_text_document.py
"""Minimal example of how to add an footnote to a text document.
"""

from odfdo import Document, Paragraph

_DOC_SEQUENCE = 47


def main():
    document = Document("text")
    body = document.body
    body.clear()

    paragraph = Paragraph("A paragraph with a footnote about some references.")
    body.append(paragraph)

    # Notes are quite complex so they deserve a dedicated API on paragraphs:
    paragraph.insert_note(
        after="graph",
        note_id="note1",
        citation="1",
        body='Author, A. (2007). "How to cite references" New York: McGraw-Hill.',
    )

    # That looks complex so we detail the arguments:
    #
    # after    =>   The word after what the “¹” citation is inserted.
    # note_id  =>   The unique identifier of the note in the document.
    # citation =>   The symbol the user sees to follow the footnote.
    # body     =>   The footnote itself, at the end of the page.
    #
    # odfdo creates footnotes by default. To create endnotes (notes
    # that appear at the end of the document), give the
    # note_class='endnote' parameter.


if __name__ == "__main__":
    main()

Create a text document with tables in it

Build a commercial document, with numerical values displayed in both the text and in a table.

recipes/create_a_text_document_with_tables_in_it.py
#!/usr/bin/env python
"""Build a commercial document, with numerical values displayed in
both the text and in a table.
"""

import os
from pathlib import Path

from odfdo import (
    Cell,
    Document,
    Header,
    List,
    ListItem,
    Paragraph,
    Row,
    Table,
    create_table_cell_style,
    make_table_cell_border_string,
)

_DOC_SEQUENCE = 50
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "commercial"
TARGET = "commercial.odt"
TAX_RATE = 0.20


class Product:
    """Minimalistic Product."""

    def __init__(self, reference: int, name: str, price: float) -> None:
        self.reference = reference
        self.name = f"Product {name}"
        self.price = price


class OrderLine:
    """Line of an Order."""

    def __init__(self, reference: int, quantity: int) -> None:
        self.reference = reference
        self.quantity = quantity


def make_product_catalog() -> list[Product]:
    """Generate a list of Product."""
    catalog: list[Product] = []
    price = 10.0
    for index in range(5):
        catalog.append(Product(index, chr(65 + index), price))
        price += 10.5
    return catalog


def make_order(catalog: list[Product]) -> list[OrderLine]:
    """Generate purchase order list."""
    order: list[OrderLine] = []
    quantity = 1
    for product in catalog:
        quantity = int(quantity * 2.5)
        order.append(OrderLine(product.reference, quantity))
    return order


def save_new(document: Document, name: str) -> None:
    """Save a recipe result Document."""
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    new_path = OUTPUT_DIR / name
    print("Saving:", new_path)
    document.save(new_path, pretty=True)


def create_header_cell_style(doc: Document) -> str:
    """Create a Cell style, insert it in Document, return its name."""
    border = make_table_cell_border_string(thick="0.03cm", color="black")
    cell_style = create_table_cell_style(
        color="black",
        background_color=(210, 210, 210),
        border_right=border,
        border_left=border,
        border_bottom=border,
        border_top=border,
    )
    style_name = doc.insert_style(style=cell_style, automatic=True)
    return style_name


def add_top_content(doc: Document, catalog: list[Product]) -> None:
    """Add some descriptive content to the document."""
    body = doc.body

    title1 = Header(1, "Basic commercial document")
    body.append(title1)
    title11 = Header(2, "Available products")
    body.append(title11)
    paragraph = Paragraph("Here the list:")
    body.append(paragraph)
    # List of products in a list :
    product_list = List()  # odfdo.List
    body.append(product_list)
    for product in catalog:
        item = ListItem(f"{product.name:<10}, price: {product.price:.2f} €")
        product_list.append(item)


def add_order_table(
    doc: Document, catalog: list[Product], order: list[OrderLine]
) -> None:
    """Add a table with order lines."""
    body = doc.body
    title12 = Header(2, "Your order")
    body.append(title12)

    style_name = create_header_cell_style(doc)
    table = make_order_table(catalog, order, style_name)
    body.append(table)


def make_order_table(
    catalog: list[Product],
    order: list[OrderLine],
    style_name: str,
) -> Table:
    """Build the order table."""
    table = Table("Table")

    # Header of table
    row = Row()
    row.set_values(["Product", "Price", "Quantity", "Amount"])
    table.set_row("A1", row)
    # or: table.set_row(0, row)

    # Add a row for each order line
    row_number = 0
    for line in order:
        row_number += 1
        product = catalog[line.reference]

        row = Row()

        row.set_value("A", product.name)
        # or : row.set_value(0, product.name)

        cell = Cell()
        cell.set_value(
            product.price,
            text=f"{product.price:.2f} €",
            currency="EUR",
            cell_type="float",
        )
        row.set_cell("B", cell)
        # or : row.set_cell(1, cell)

        row.set_value("C", line.quantity)
        # row.set_value(2, line.quantity)

        price = product.price * line.quantity
        cell = Cell()
        cell.set_value(
            price,
            text=f"{price:.2f} €",
            currency="EUR",
            cell_type="float",
        )
        row.set_cell("D", cell)

        table.set_row(row_number, row)

    # Total lines

    # add a merged empty row
    row = Row()
    row_number += 1
    table.set_row(row_number, row)
    table.set_span((0, row_number, 3, row_number))

    # compute total line
    row = Row()
    row_number += 1
    row.set_value(0, "Total:")
    total = sum(table.get_column_values(3)[1:-1])
    # note: total is a Decimal
    cell = Cell()
    cell.set_value(
        total,
        text=f"{total:.2f} €",
        currency="EUR",
        cell_type="float",
    )
    row.set_cell(3, cell)
    table.set_row(row_number, row)
    # merge the 3 first columns for this row:
    table.set_span((0, row_number, 2, row_number), merge=True)

    # compute VAT line
    row = Row()
    row_number += 1
    row.set_value(0, "Total with tax:")
    total_vat = float(total) * (1 + TAX_RATE)
    cell = Cell()
    cell.set_value(
        total_vat,
        text=f"{total_vat:.2f} €",
        currency="EUR",
        cell_type="float",
    )
    row.set_cell(3, cell)
    table.set_row(row_number, row)
    table.set_span((0, row_number, 2, row_number), merge=True)

    # Let's add some style on header row
    row = table.get_row(0)
    for cell in row.traverse():
        cell.style = style_name
        row.set_cell(x=cell.x, cell=cell)
    table.set_row(row.y, row)

    return table


def generate_commercial(catalog: list[Product], order: list[OrderLine]) -> Document:
    """Generate a Text Document with table in in."""
    document = Document("text")
    add_top_content(document, catalog)
    add_order_table(document, catalog, order)

    return document


def main() -> None:
    catalog = make_product_catalog()
    order = make_order(catalog)
    document = generate_commercial(catalog, order)
    test_unit(document)
    save_new(document, TARGET)


def test_unit(document: Document) -> None:
    # only for test suite:
    if "ODFDO_TESTING" not in os.environ:
        return

    table = document.body.get_table(name="Table")
    assert isinstance(table, Table)
    assert table.get_cell("A1").value == "Product"
    assert table.get_cell("A2").value == "Product A"
    assert table.get_cell("A8").value == "Total:"
    assert table.get_cell("B1").value == "Price"
    assert table.get_cell("C1").value == "Quantity"
    assert table.get_cell("C2").value == 2
    assert table.get_cell("D1").value == "Amount"


if __name__ == "__main__":
    main()

How to add a table to a document

Minimal example of how to add a table to a text document.

recipes/how_to_add_a_table_to_a_document.py
"""Minimal example of how to add a table to a text document."""

import os

from odfdo import Document, Header, Paragraph, Table

_DOC_SEQUENCE = 55


def generate_document() -> Document:
    """Add a 3x3 table to a new document."""
    document = Document("text")
    body = document.body

    # Let's add another section to make our document clear:
    body.append(Header(1, "Tables"))
    body.append(Paragraph("A 3x3 table:"))

    # Creating a table :
    table = Table("Table 1", width=3, height=3)
    body.append(table)
    return document


def main() -> None:
    document = generate_document()
    test_unit(document)


def test_unit(document: Document) -> None:
    # only for test suite:
    if "ODFDO_TESTING" not in os.environ:
        return

    table = document.body.get_table(0)
    assert table.size == (3, 3)


if __name__ == "__main__":
    main()

Create a text document from plain text with layout

Create a document with styles.

We want to:

  • remove standard styles from the document

  • set some styles grabed from a styles.xml ODF file (or generated)

  • insert plain “python” text, containing some , , and spaces

recipes/create_a_text_document_from_plain_text_with_layout.py
#!/usr/bin/env python
"""Create a document with styles.

 We want to:

  - remove standard styles from the document

  - set some styles grabed from a styles.xml ODF file (or generated)

  - insert plain "python" text, containing some \t , \n, and spaces
"""
from pathlib import Path

from odfdo import Document, Element, Paragraph, Style

_DOC_SEQUENCE = 60
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "styled2"
TARGET = "document.odt"


# Element is the base class of all odfdo classes.
# Element.from_tag permits the creation of any ODF XML tag

# some font styles :
_style_font_1 = Element.from_tag(
    '<style:font-face style:name="OpenSymbol" svg:font-family="OpenSymbol"/>'
)

_style_font_2 = Element.from_tag(
    '<style:font-face style:name="Liberation Serif" '
    'svg:font-family="Liberation Serif" '
    'style:font-family-generic="roman" '
    'style:font-pitch="variable"/>'
)

_style_font_3 = Element.from_tag(
    '<style:font-face style:name="Liberation Sans" '
    'svg:font-family="Liberation Sans" '
    'style:font-family-generic="swiss" '
    'style:font-pitch="variable"/>'
)

# page layout style (changing margin)
_style_page = Element.from_tag(
    '<style:page-layout style:name="MyLayout">'
    '<style:page-layout-properties fo:page-width="21.00cm" '
    'fo:page-height="29.70cm" style:num-format="1" '
    'style:print-orientation="portrait" fo:margin-top="1.7cm" '
    'fo:margin-bottom="1.5cm" fo:margin-left="1.6cm" '
    'fo:margin-right="1.6cm" style:writing-mode="lr-tb" '
    'style:footnote-max-height="0cm"><style:footnote-sep '
    'style:width="0.018cm" style:distance-before-sep="0.10cm" '
    'style:distance-after-sep="0.10cm" style:line-style="solid" '
    'style:adjustment="left" style:rel-width="25%" '
    'style:color="#000000"/> </style:page-layout-properties>'
    "<style:footer-style> "
    '<style:header-footer-properties fo:min-height="0.6cm" '
    'fo:margin-left="0cm" fo:margin-right="0cm" '
    'fo:margin-top="0.3cm" style:dynamic-spacing="false"/> '
    "</style:footer-style></style:page-layout>"
)

# master style, using the precedent layout for the actual document
_style_master = Element.from_tag(
    '<style:master-page style:name="Standard" '
    'style:page-layout-name="MyLayout"><style:footer>'
    '<text:p text:style-name="Footer"> '
    "<text:tab/><text:tab/><text:page-number "
    'text:select-page="current"/> / <text:page-count '
    'style:num-format="1">15</text:page-count>'
    "</text:p></style:footer> "
    "</style:master-page>"
)

# some footer
_style_footer = Element.from_tag(
    '<style:style style:name="Footer" '
    'style:family="paragraph" style:class="extra" '
    'style:master-page-name="">'
    '<style:paragraph-properties style:page-number="auto" '
    'text:number-lines="false" text:line-number="0">'
    "<style:tab-stops>"
    '<style:tab-stop style:position="8.90cm" '
    'style:type="center"/>'
    '<style:tab-stop style:position="17.80cm" style:type="right"/>'
    "</style:tab-stops>"
    "</style:paragraph-properties>"
    "<style:text-properties "
    'style:font-name="Liberation Sans" '
    'fo:font-size="7pt"/></style:style>'
)

# some text style using Liberation Sans font
_style_description = Element.from_tag(
    '<style:style style:name="description" '
    'style:family="paragraph" '
    'style:class="text" style:master-page-name="">'
    "<style:paragraph-properties "
    'fo:margin="100%" fo:margin-left="0cm" fo:margin-right="0cm" '
    'fo:margin-top="0.35cm" fo:margin-bottom="0.10cm" '
    'style:contextual-spacing="false" '
    'fo:text-indent="0cm" '
    'style:auto-text-indent="false" '
    'style:page-number="auto"/>'
    "<style:text-properties "
    'style:font-name="Liberation Sans" '
    'fo:font-size="11pt"/>'
    "</style:style>"
)

# some text style using Liberation Serif font
_style_small_serif = Element.from_tag(
    '<style:style style:name="smallserif" '
    'style:family="paragraph" style:class="text">'
    '<style:paragraph-properties fo:margin="100%" '
    'fo:margin-left="1.20cm" '
    'fo:margin-right="0cm" fo:margin-top="0cm" '
    'fo:margin-bottom="0.10cm" '
    'style:contextual-spacing="false" '
    'fo:text-indent="0cm" '
    'style:auto-text-indent="false"/>'
    '<style:text-properties style:font-name="Liberation Serif" '
    'fo:font-size="9pt" '
    'fo:font-weight="normal"/>'
    "</style:style>"
)

# some style to have stylish line in text
_style_line = Element.from_tag(
    '<style:style style:name="line" '
    'style:family="paragraph" style:class="text">'
    '<style:paragraph-properties fo:margin="100%" '
    'fo:margin-left="0cm" '
    'fo:margin-right="0cm" fo:margin-top="0cm" '
    'fo:margin-bottom="0.15cm" '
    'style:contextual-spacing="false" fo:text-indent="0cm" '
    'style:auto-text-indent="false" fo:padding="0cm" '
    'fo:border-left="none" '
    'fo:border-right="none" fo:border-top="none" '
    'fo:border-bottom="0.06pt solid #000000"/>'
    '<style:text-properties style:font-name="Liberation Sans" '
    'fo:font-size="9pt"/>'
    "</style:style>"
)

# some odfdo generated style (for bold Span)
_style_bold = Style("text", name="bolder", bold=True)


def save_new(document: Document, name: str):
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    new_path = OUTPUT_DIR / name
    print("Saving:", new_path)
    document.save(new_path, pretty=True)


def main():
    # Some plain text :
    text_1 = (
        "Lorem ipsum dolor sit amet,\n\t"
        "consectetuer adipiscing elit.\n\tSed"
        "non risus.\n\tSuspendisse lectus tortor,\n"
        "ndignissim sit amet, \nadipiscing nec,"
        "\nultricies sed, dolor.\n\n"
        " Cras elementum ultrices diam. Maecenas ligula massa,"
        "varius a,semper congue, euismod non,"
        " mi. Proin porttitor, orci nec nonummy"
        "molestie, enim est eleifend mi,"
        " non fermentum diam nisl sit amet erat."
    )

    text_2 = (
        "Vestibulum                 "
        "ante               "
        "ipsum             primis\n"
        "in faucibus orci luctus et ultrices "
        "posuere cubilia Curae; Aliquam nibh."
    )

    text_3 = (
        "Duis semper. \n\tDuis arcu massa,"
        " \n\t\tscelerisque vitae, \n"
        "\t\t\tconsequat in, \n"
        "\t\t\t\tpretium a, enim. \n"
        "\t\t\t\t\tPellentesque congue. \n"
        "Ut in risus volutpat libero pharetra "
        "tempor. Cras vestibulum bibendum augue."
        "Praesent egestas leo in pede. Praesent "
        "blandit odio eu enim. Pellentesque sed"
    )

    document = Document("text")
    # remove default styles
    document.delete_styles()
    # add our styles
    document.insert_style(_style_font_1, default=True)
    document.insert_style(_style_font_2, default=True)
    document.insert_style(_style_font_3, default=True)
    document.insert_style(_style_page, automatic=True)
    document.insert_style(_style_master)
    document.insert_style(_style_footer)
    document.insert_style(_style_description)
    document.insert_style(_style_small_serif)
    document.insert_style(_style_bold)

    body = document.body

    # since version 3.8.14, the append_plain_text() mode is the
    # default for paragraph creation, so the code is more simple:

    # paragraph = Paragraph("", style="description")
    # paragraph.append_plain_text(text_1)
    # body.append(paragraph)

    # paragraph = Paragraph(style="line")
    # body.append(paragraph)

    # paragraph = Paragraph(style="smallserif")
    # paragraph.append_plain_text(text_2)
    # body.append(paragraph)

    # paragraph = Paragraph(style="line")
    # body.append(paragraph)

    # paragraph = Paragraph(style="description")
    # paragraph.append_plain_text(text_3)

    paragraph = Paragraph(text_1, style="description")
    body.append(paragraph)

    paragraph = Paragraph(style="line")
    body.append(paragraph)

    paragraph = Paragraph(text_2, style="smallserif")
    body.append(paragraph)

    paragraph = Paragraph(style="line")
    body.append(paragraph)

    paragraph = Paragraph("A: " + text_3, style="description")
    # span offset become complex after inserting <CR> and <TAB> in a text
    paragraph.set_span("bolder", offset=5, length=6)  # find TEXT position 5 : 6
    paragraph.set_span("bolder", offset=18, length=4)  # find TEXT position 18 : 4
    paragraph.set_span("bolder", offset=49)  # find TEXT position 18 to the end
    # of the text bloc
    paragraph.set_span("bolder", regex=r"Praes\w+\s\w+")  # regex: Praes. + next word

    body.append(paragraph)

    paragraph = Paragraph(style="line")
    body.append(paragraph)

    # it is possible to add the content without the original layout (\n, tab, spaces)
    paragraph = Paragraph("B: " + text_3, style="description", formatted=False)
    body.append(paragraph)

    paragraph = Paragraph(style="line")
    body.append(paragraph)

    # text can also be append after paragraph creation
    paragraph = Paragraph(style="description")
    paragraph.append("C: " + text_3)
    body.append(paragraph)

    save_new(document, TARGET)


if __name__ == "__main__":
    main()

Minimal example of setting a page footer using Style.set_page_footer().

Note: the created footer uses the current footer style, to change that footer style, use the method set_footer_style() on the ‘page-layout’ style family.

recipes/add_a_custom_footer_to_a_text_document.py
#!/usr/bin/env python
"""Minimal example of setting a page footer using Style.set_page_footer().

Note: the created footer uses the current footer style, to change that
footer style, use the method  set_footer_style() on the 'page-layout'
style family.
"""

import os
from pathlib import Path

from odfdo import Document, Header, Paragraph, Tab, VarPageNumber

_DOC_SEQUENCE = 62
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "styled4"
TARGET = "document.odt"


def save_new(document: Document, name: str) -> None:
    """Save a recipe result Document."""
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    new_path = OUTPUT_DIR / name
    print("Saving:", new_path)
    document.save(new_path, pretty=True)


def make_document() -> Document:
    """Generate a short document with a page footer."""
    text_1 = (
        "Lorem ipsum dolor sit amet,\n\t"
        "consectetuer adipiscing elit.\n\tSed"
        "non risus.\n\tSuspendisse lectus tortor,\n"
        "ndignissim sit amet, \nadipiscing nec,"
        "\nultricies sed, dolor.\n\n"
        " Cras elementum ultrices diam. Maecenas ligula massa,"
        "varius a,semper congue, euismod non,"
        " mi. Proin porttitor, orci nec nonummy"
        "molestie, enim est eleifend mi,"
        " non fermentum diam nisl sit amet erat."
    )

    document = Document("text")
    body = document.body
    body.clear()
    body.append(Header(1, "Some Title"))
    body.append(Paragraph(text_1))

    # looking for the current "master-page" style, it is probably
    # named "Standard". If not found, search with something like:
    # print([s for s in document.get_styles() if s.family == "master-page"])
    page_style = document.get_style("master-page", "Standard")

    # The footer can be a Paragraph or a list of Paragraphs:
    first_line = Paragraph("\tA first footer line")
    second_line = Paragraph("Second line")
    second_line.append(Tab())
    second_line.append(Tab())
    second_line.append(VarPageNumber())
    second_line.append(".")
    my_footer = [first_line, second_line]

    page_style.set_page_footer(my_footer)

    # important: insert again the modified style
    document.insert_style(page_style)

    return document


def main() -> None:
    document = make_document()
    test_unit(document)
    save_new(document, TARGET)


def test_unit(document: Document) -> None:
    # only for test suite:
    if "ODFDO_TESTING" not in os.environ:
        return
    from odfdo import Style

    assert len([s for s in document.get_styles() if s.family == "master-page"]) >= 1
    page_style = document.get_style("master-page", "Standard")
    assert isinstance(page_style, Style)
    footer = page_style.get_page_footer()
    content = footer.serialize()
    assert "A first footer" in content
    assert "Second line" in content


if __name__ == "__main__":
    main()

How to add a picture to a text document

Create an empty text document and add a picture in a frame.

recipes/how_to_add_a_picture_to_a_text_document.py
#!/usr/bin/env python
"""Create an empty text document and add a picture in a frame.
"""
from pathlib import Path

from odfdo import Document, Frame, Paragraph

_DOC_SEQUENCE = 65
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "basic_picture"
TARGET = "document.odt"
DATA = Path(__file__).parent / "data"
IMAGE = DATA / "newlogo.png"


def save_new(document: Document, name: str):
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    new_path = OUTPUT_DIR / name
    print("Saving:", new_path)
    document.save(new_path, pretty=True)


def main():
    document = Document("text")
    body = document.body
    image_path = str(DATA / IMAGE)
    uri = document.add_file(image_path)
    image_frame = Frame.image_frame(
        uri,
        size=("6cm", "4cm"),
        position=("5cm", "10cm"),
    )

    # put image frame in a paragraph:
    paragraph = Paragraph("")
    paragraph.append(image_frame)
    body.append(paragraph)

    save_new(document, TARGET)


if __name__ == "__main__":
    main()

How to add a right aligned picture to a text document

Create an empty text document and add a picture in a frame, aligned to the right or to the left.

Aligning an image requires applying a style to the frame. To do this, use the default frame position style and customize it. The frame position style allows you to choose alignment relative to the paragraph (default) or the page.

recipes/how_to_add_a_right_aligned_picture_to_a_text_document.py
#!/usr/bin/env python
"""Create an empty text document and add a picture in a frame,
aligned to the right or to the left.

Aligning an image requires applying a style to the frame. To do
this, use the default frame position style and customize it. The
frame position style allows you to choose alignment relative to
the paragraph (default) or the page.
"""

import os
from pathlib import Path

from odfdo import Document, Frame, Paragraph, default_frame_position_style

_DOC_SEQUENCE = 66
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "basic_picture_right"
TARGET = "document.odt"
DATA = Path(__file__).parent / "data"
IMAGE = DATA / "newlogo.png"


def save_new(document: Document, name: str) -> None:
    """Save a recipe result Document."""
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    new_path = OUTPUT_DIR / name
    print("Saving:", new_path)
    document.save(new_path, pretty=True)


def make_document() -> Document:
    """Generate a document containing two instances of an image,
    align one left, the other right.
    """
    document = Document("text")

    # add an image to the document, remember its URI
    image_path = str(DATA / IMAGE)
    uri = document.add_file(image_path)

    # add a frame style to the document, right alignment
    right_style = default_frame_position_style(
        name="right_frame", horizontal_pos="right"
    )
    document.insert_style(right_style)

    # add a frame style to the document, left alignment
    left_style = default_frame_position_style(
        name="left_frame", horizontal_pos="left", horizontal_rel="page"
    )
    document.insert_style(left_style)

    # make the image frames using previous informations
    image_frame_right = Frame.image_frame(
        uri,
        size=("6cm", "4cm"),
        position=("0cm", "5cm"),
        style=right_style.name,
    )
    image_frame_left = Frame.image_frame(
        uri,
        size=("9cm", "6cm"),
        position=("0cm", "12cm"),
        style=left_style.name,
    )

    # put image frame in a paragraph:
    paragraph = Paragraph("")
    paragraph.append(image_frame_right)
    paragraph.append(image_frame_left)
    document.body.append(paragraph)

    return document


def main() -> None:
    document = make_document()
    test_unit(document)
    save_new(document, TARGET)


def test_unit(document: Document) -> None:
    # only for test suite:
    if "ODFDO_TESTING" not in os.environ:
        return

    assert len([s for s in document.get_styles() if s.family == "graphic"]) >= 2
    graphic_style_names = [s.name for s in document.get_styles("graphic")]
    assert "right_frame" in graphic_style_names
    assert "left_frame" in graphic_style_names


if __name__ == "__main__":
    main()

How to add a title to a text document

Minimal example of how to add a Header of first level to a text document.

recipes/how_to_add_a_title_to_a_text_document.py
"""Minimal example of how to add a Header of first level to a text document.
"""

from odfdo import Document, Header

_DOC_SEQUENCE = 67


def main():
    document = Document("text")
    body = document.body

    title1 = Header(1, "The Title")
    body.append(title1)


if __name__ == "__main__":
    main()

Accessing a single element

Example of methods and properties to analyse a document.

These methods return a single element (or None):

- `body.get_note(position)`
- `body.get_paragraph(position)`
- `body.get_header(position)`
recipes/accessing_a_single_element.py
#!/usr/bin/env python
"""Example of methods and properties to analyse a document.

These methods return a single element (or None):

    - `body.get_note(position)`
    - `body.get_paragraph(position)`
    - `body.get_header(position)`
"""

# Expected result on stdout:
# - Content of the first footnote:
# 1. [Gaiman, Neil](http://en.wikipedia.org/w/index.php?title=Neil_Gaiman)
# (2003). Don't Panic: Douglas Adams and the "Hitchhiker's Guide to the
# Galaxy". Titan Books. pp 144-145. ISBN 1-84023-742-2.
#
# - Content of the paragraph with the word 'Fish'
# In So Long, and Thanks for All the Fish (published in 1984), Arthur
# returns home to Earth, rather surprisingly since it was destroyed when
# he left. He meets and falls in love with a girl named
# [Fenchurch](http://en.wikipedia.org/w/index.php?title=Minor_characters_from_The_Hitchhiker%27s_Guide_to_the_Galaxy%23Fenchurch), and discovers this Earth is a replacement provided by the [dolphin](http://en.wikipedia.org/w/index.php?title=Dolphin)s in their Save the Humans campaign. Eventually he rejoins Ford, who claims to have saved the Universe in the meantime, to hitch-hike one last time and see God's Final Message to His Creation. Along the way, they are joined by Marvin, the Paranoid Android, who, although 37 times older than the universe itself (what with time
# travel and all), has just enough power left in his failing body to read
# the message and feel better about it all before expiring.
#
# - Content of the first Title:
# The Hitchhiker's Guide to the Galaxy
#
# - Content of the last Title:
# Official sites

import os
import sys
from pathlib import Path

from odfdo import Document

_DOC_SEQUENCE = 70
DATA = Path(__file__).parent / "data"
SOURCE = "collection2.odt"


def read_source_document() -> Document:
    """Return the source Document."""
    try:
        source = sys.argv[1]
    except IndexError:
        source = DATA / SOURCE
    return Document(source)


def method_demo(document: Document) -> None:
    """Show some methos examples."""
    # The body object is an XML element from which we can access one or several
    # other elements we are looking for.
    body = document.body

    # Accessing a single element
    # To access a single element by name, position or a regular expression on
    # the content, use get_xxx_by_<criteria>, where criteria can be position,
    # content, or for some of them name, id title, description.
    print("- Content of the first footnote:")
    print(str(body.get_note(position=0)))
    print()
    print("- Content of the paragraph with the word 'Fish'")
    print(str(body.get_paragraph(content="Fish")))
    print("- Content of the first Title:")
    print(str(body.get_header(position=0)))
    print("- Content of the last Title:")
    print(str(body.get_header(position=-1)))


def main() -> None:
    document = read_source_document()
    method_demo(document)
    test_unit(document)


def test_unit(document: Document) -> None:
    # only for test suite:
    if "ODFDO_TESTING" not in os.environ:
        return

    body = document.body
    assert str(body.get_note(position=0)).startswith(
        "1. [Gaiman, Neil](http://en.wikipedia.org/w/index.php?title=Neil_Gaiman) (2003)"
    )
    assert str(body.get_paragraph(content="Fish")).endswith("all before expiring.\n")
    assert str(body.get_header(position=0)).startswith("The Hitchhiker's Guide")
    assert str(body.get_header(position=-1)).startswith("Official sites")


if __name__ == "__main__":
    main()

Accessing a list of elements

Example of methods and properties to analyse a document.

These methods or properties return a list of elements:

- `body.headers`
- `body.images`
- `body.paragraphs`
- `body.get_links()`
- `body.get_notes()`
- `body.tables`
- `body.get_paragraphs(content)`
recipes/accessing_a_list_of_elements.py
#!/usr/bin/env python
"""Example of methods and properties to analyse a document.

These methods or properties return a list of elements:

    - `body.headers`
    - `body.images`
    - `body.paragraphs`
    - `body.get_links()`
    - `body.get_notes()`
    - `body.tables`
    - `body.get_paragraphs(content)`
"""

# Expected result on stdout:
# 96 get methods are available
# number of headings: 29
# number of images stored: 0
# number of paragraphs: 175
# number of links (URLs): 352
# number of footnotes: 49
# number of tables: 0
# Paragraphs with 'Fish': 4
# Paragraphs with 'answer' and '42': 1

import os
import sys
from pathlib import Path

from odfdo import Document

_DOC_SEQUENCE = 75
DATA = Path(__file__).parent / "data"
SOURCE = "collection2.odt"


def read_source_document() -> Document:
    """Return the source Document."""
    try:
        source = sys.argv[1]
    except IndexError:
        source = DATA / SOURCE
    return Document(source)


def analysis(document: Document) -> dict[str, int]:
    """Returns some statistics about the document."""
    result: dict[str, int] = {
        "methods": 0,
        "headings": 0,
        "images": 0,
        "paragraphs": 0,
        "links": 0,
        "footnotes": 0,
        "tables": 0,
        "fish": 0,
        "answer": 0,
    }

    # The body object is an XML element from which we can access one or several
    # other elements we are looking for.
    body = document.body

    # Accessing a list of elements
    # Should you need to access all elements of a kind, there are the
    # get_xxxs methods, where xxx can be paragraph, heading, list, table, ...
    # Methods without parameters are accessible through properties.
    result["methods"] = " ".join(dir(body)).count("get_")
    # Some examples, that you can check against actual content of the odt file:
    # See how complex is our wikipedia documents:
    result["headings"] = len(body.headers)
    result["images"] = len(body.images)
    result["paragraphs"] = len(body.paragraphs)
    result["links"] = len(body.get_links())
    result["footnotes"] = len(body.get_notes())
    # Our sample document has no table:
    # print("number of tables:", len(body.get_tables()))
    result["tables"] = len(body.tables)

    # Each get_xxx_list method provides parameters for filtering the results.
    # For example headings can be listed by level, annotations by creator, etc.
    # Almost all of them accept filtering by style and content using a regular
    # expressions.
    result["fish"] = len(body.get_paragraphs(content=r"Fish"))
    result["answer"] = len(body.get_paragraphs(content=r"answer.*42"))

    return result


def display_analysis(stats: dict[str, int]) -> None:
    """Print the stats on stdout."""
    print(f"{stats['methods']} get methods are available")
    print(f"number of headings: {stats['headings']}")
    print(f"number of images stored: {stats['images']}")
    print(f"number of paragraphs: {stats['paragraphs']}")
    print(f"number of links (URLs): {stats['links']}")
    print(f"number of footnotes: {stats['footnotes']}")
    print(f"number of tables: {stats['tables']}")
    print(f"Paragraphs with 'Fish': {stats['fish']}")
    print(f"Paragraphs with 'answer' and '42': {stats['answer']}")


def main() -> None:
    document = read_source_document()
    stats = analysis(document)
    display_analysis(stats)
    test_unit(stats)


def test_unit(stats: dict[str, int]) -> None:
    # only for test suite:
    if "ODFDO_TESTING" not in os.environ:
        return

    assert stats["methods"] == 96
    assert stats["headings"] == 29
    assert stats["images"] == 0
    assert stats["paragraphs"] == 175
    assert stats["links"] == 352
    assert stats["footnotes"] == 49
    assert stats["tables"] == 0
    assert stats["fish"] == 4
    assert stats["answer"] == 1


if __name__ == "__main__":
    main()

Accessing other element from element like list

Accessing elements from element-like list.

Any fetched element is a XML tree context that can be queried, but only on the subtree it contains. Here are quick examples of iteration on Paragraphs and Lists from the document.

recipes/accessing_other_element_from_element_like_list.py
#!/usr/bin/env python
"""Accessing elements from element-like list.

Any fetched element is a XML tree context that can be queried, but only on the subtree it
contains. Here are quick examples of iteration on `Paragraphs` and `Lists` from the document.
"""

# Expected result on stdout:
# Number of available lists in the document: 5
#
# The 4th list contains 9 paragraphs
#
# 1 : [BBC Cult website](http://www.bbc.co.uk/cult/hitchhikers/),
# official website for the [TV show version](http://en.wikipedia.org/w/index.php?title=The_Hitchhiker%27s_Guide_to_the_Galaxy_%28TV_series%29)
# (includes information, links and downloads)
#
# 2 : [BBC Radio 4 website for the 2004-2005
# series](http://www.bbc.co.uk/radio4/hitchhikers/)
#
# 3 : [Official Movie Site](http://hitchhikers.movies.go.com/)
#
# 4 : [The Hitchhiker's Guide to the Galaxy
# (2005 movie)](http://www.imdb.com/title/tt0371724/)at the
# [Internet Movie Database](http://en.wikipedia.org/w/index.php?title=Internet_Movie_Database)
#
# 5 : [The Hitch Hikers Guide to the Galaxy
# (1981 TV series)](http://www.imdb.com/title/tt0081874/)at the
# [Internet Movie Database](http://en.wikipedia.org/w/index.php?title=Internet_Movie_Database)
#
# 6 : [h2g2](http://www.bbc.co.uk/h2g2/guide/)
#
# 7 : [Encyclopedia of Television](http://www.museum.tv/archives/etv/H/htmlH/hitch-hickers/hitch-hickers.htm)
#
# 8 : [British Film Institute Screen Online](http://www.screenonline.org.uk/tv/id/560180/index.html)
# page devoted to the TV series
#
# 9 : [DC Comics H2G2 site](http://www.dccomics.com/graphic_novels/?gn=1816)

import os
import sys
from pathlib import Path

from odfdo import Document

_DOC_SEQUENCE = 80
DATA = Path(__file__).parent / "data"
SOURCE = "collection2.odt"


def read_source_document() -> Document:
    """Return the source Document."""
    try:
        source = sys.argv[1]
    except IndexError:
        source = DATA / SOURCE
    return Document(source)


def analyse_list(document: Document):
    # The body object is an XML element from which we can access one or several
    # other elements we are looking for.
    body = document.body

    # Any element is a context for navigating but only on the subtree it
    # contains. Just like the body was, but since the body contains all content,
    # we didn't see the difference.
    # Let's try the lists:
    print("Number of available lists in the document:", len(body.lists))
    print()

    list4 = body.get_list(position=4)
    print(f"The 4th list contains {len(list4.paragraphs)} paragraphs")
    print()

    # Now print the list content
    paragraphs = list4.paragraphs
    for count, paragraph in enumerate(paragraphs):
        print(count + 1, ":", paragraph)


def main():
    document = read_source_document()
    analyse_list(document)
    test_unit(document)


def test_unit(document: Document) -> None:
    # only for test suite:
    if "ODFDO_TESTING" not in os.environ:
        return

    body = document.body
    list4 = body.get_list(position=4)
    paragraphs = list4.paragraphs
    assert len(body.lists) == 5
    assert len(list4.paragraphs) == 9
    assert str(paragraphs[0]).startswith("[BBC Cult website](http")
    assert str(paragraphs[8]).startswith("[DC Comics H2G2 site](http")


if __name__ == "__main__":
    main()

How to add a list to a text document

Create an empty text document and add a list.

recipes/how_to_add_a_list_to_a_text_document.py
#!/usr/bin/env python
"""Create an empty text document and add a list.
"""
import os
from pathlib import Path

# Lists are a dedicated object List
from odfdo import Document, List

_DOC_SEQUENCE = 90
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "add_list"
TARGET = "document.odt"


def save_new(document: Document, name: str):
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    new_path = OUTPUT_DIR / name
    print("Saving:", new_path)
    document.save(new_path, pretty=True)


def main():
    document = Document("text")
    body = document.body
    body.clear()
    some_list = List(["chocolate", "tea", "coffee"])
    # The list factory accepts a Python list of strings and list items.
    body.append(some_list)

    test_unit(document)

    save_new(document, TARGET)


def test_unit(document: Document) -> None:
    # only for test suite:
    if "ODFDO_TESTING" not in os.environ:
        return
    assert (document.get_formatted_text()).strip() == "- chocolate\n- tea\n- coffee"


if __name__ == "__main__":
    main()

How to add a manual page break

Adding a manual page break to a text document.

recipes/how_to_add_a_manual_page_break.py
#!/usr/bin/env python
"""Adding a manual page break to a text document.
"""
from pathlib import Path

from odfdo import Document, PageBreak, Paragraph, Style

_DOC_SEQUENCE = 95
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "page_break"
TARGET = "document.odt"


def save_new(document: Document, name: str):
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    new_path = OUTPUT_DIR / name
    print("Saving:", new_path)
    document.save(new_path, pretty=True)


def main():
    document = Document()
    body = document.body
    body.clear()

    # here a simple way to insert a page break with odfdoshortcuts:
    document.add_page_break_style()
    body.append(Paragraph("First paragraph"))
    body.append(PageBreak())
    body.append(Paragraph("Second paragraph"))

    # here is a different way to insert a page break:
    page_break_style = Style("paragraph", name="page_break_before")
    page_break_style.set_properties({"fo:break-before": "page"})
    document.insert_style(page_break_style)
    empty_paragraph = Paragraph("", style="page_break_before")
    body.append(empty_paragraph)
    body.append(Paragraph("Third paragraph"))

    save_new(document, TARGET)


if __name__ == "__main__":
    main()

Create a basic drawing

Insert a circle and a lot of lines (a fractal) in a text document.

recipes/create_a_basic_drawing.py
#!/usr/bin/env python
"""Insert a circle and a lot of lines (a fractal) in a text document.
"""
import cmath
from pathlib import Path
from typing import Union

from odfdo import Document, EllipseShape, Header, LineShape, Paragraph

_DOC_SEQUENCE = 100
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "basic_drawing"
TARGET = "koch.odt"

CYCLES = 4  # beware, 5 is big, 6 is too big to display...


def save_new(document: Document, name: str):
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    new_path = OUTPUT_DIR / name
    print("Saving:", new_path)
    document.save(new_path, pretty=True)


def cm(x: float) -> str:
    return f"{x:.2f}cm" ""


# some graphic computations
class Vector:
    def __init__(self, a: Union[float, complex], b: Union[float, complex]):
        self.a = a
        self.b = b

    def koch_split(self) -> list:
        c = self.a + 1.0 / 3.0 * (self.b - self.a)
        d = self.a + 2.0 / 3.0 * (self.b - self.a)
        m = 0.5 * (self.a + self.b)
        e = m + (d - c) * complex(0, -1)
        return [Vector(self.a, c), Vector(c, e), Vector(e, d), Vector(d, self.b)]

    def centimeter(self, val) -> tuple:
        if val == 0:
            m = self.a
        else:
            m = self.b
        return (cm(m.real), cm(m.imag))


def koch(vector_list, cycle=2):
    if cycle <= 0:
        return vector_list
    else:
        new_vector_list = []
        for vector in vector_list:
            new_vector_list.extend(vector.koch_split())
        # del vector_list
        return koch(new_vector_list, cycle - 1)


def make_fractal_coords(side, vpos):
    orig = complex((17 - side) / 2.0, vpos)
    v1 = Vector(orig, orig + complex(side, 0))
    v2 = Vector(v1.b, orig + cmath.rect(side, cmath.pi / 3))
    v3 = Vector(v2.b, orig)
    center = (v1.a + v1.b + v2.b) / 3
    vector_list = koch([v1, v2, v3], cycle=CYCLES)
    return center, vector_list


def generate_document():
    document = Document("text")
    body = document.body

    print("Making some Koch fractal")
    title = Header(1, "Some Koch fractal")
    body.append(title)

    style = document.get_style("graphic")
    style.set_properties({"svg:stroke_color": "#0000ff"})
    style.set_properties(fill_color="#ffffcc")

    paragraph = Paragraph("")
    body.append(paragraph)

    # some computation of oordinates
    center, vector_list = make_fractal_coords(side=12.0, vpos=8.0)

    # create a circle
    radius = 8.0
    pos = center - complex(radius, radius)
    circle = EllipseShape(
        size=(cm(radius * 2), cm(radius * 2)),
        position=(cm(pos.real), cm(pos.imag)),
    )
    paragraph.append(circle)

    # create a drawing with a lot of lines
    paragraph.append(f"number of lines: {len(vector_list)}")
    for vector in vector_list:
        line = LineShape(p1=vector.centimeter(0), p2=vector.centimeter(1))
        paragraph.append(line)

    return document


def main():
    document = generate_document()
    save_new(document, TARGET)


if __name__ == "__main__":
    main()

Add private annotations to a document

Add not printable annotations to a document.

Annotations are notes that do not appear in the document but typically on a side bar in a desktop application. So they are not printed.

recipes/add_private_annotations_to_a_document.py
#!/usr/bin/env python
"""Add not printable annotations to a document.

Annotations are notes that do not appear in the document but typically
on a side bar in a desktop application. So they are not printed.
"""

import os
from pathlib import Path

from odfdo import Document, Header, Paragraph

_DOC_SEQUENCE = 110
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "annotated"
TARGET = "annotated_document.odt"


def save_new(document: Document, name: str) -> None:
    """Save a recipe result Document."""
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    new_path = OUTPUT_DIR / name
    print("Saving:", new_path)
    document.save(new_path, pretty=True)


def base_document() -> Document:
    """Generate a basic document."""
    document = Document("text")
    body = document.body

    body.append(Header(1, "De la Guerre des Gaules - Livre V"))
    body.append(Header(2, "Préparatifs d'expédition en Bretagne"))
    body.append(
        Paragraph(
            "Sous le consulat de Lucius Domitius et d'Appius Claudius, "
            "César, quittant les quartiers d'hiver pour aller en Italie, "
            "comme il avait coutume de le faire chaque année, ordonne aux "
            "lieutenants qu'il laissait à la tête des légions de construire, "
            "pendant l'hiver, le plus de vaisseaux qu'il serait possible, "
            "et de réparer les anciens."
        )
    )
    body.append(Header(2, "La Bretagne"))
    body.append(
        Paragraph(
            "Cette île est de forme triangulaire ; l'un des côtés regarde "
            "la Gaule. Des deux angles de ce côté, l'un est au levant, "
            "vers le pays de Cantium, où abordent presque tous les vaisseaux "
            "gaulois ; l'autre, plus bas, est au midi. La longueur de ce côté "
            "est d'environ cinq cent mille pas. "
        )
    )
    return document


def insert_annotation(document: Document) -> None:
    """Insert a not printable annotation in a document."""
    body = document.body
    paragraph = body.get_paragraph(content="consulat")
    # Annotations are inserted like notes but they are simpler:
    # Annotation arguments:
    # after   =>  The word after what the annotation is inserted.
    # body    =>  The annotation itself, at the end of the page.
    # creator =>  The author of the annotation.
    # date    =>  A datetime value, by default datetime.now().
    paragraph.insert_annotation(
        after="Domitius",
        body="Talking about Lucius Domitius",
        creator="Luis",
    )


def main() -> None:
    document = base_document()
    insert_annotation(document)
    test_unit(document)
    save_new(document, TARGET)


def test_unit(document: Document) -> None:
    # only for test suite:
    if "ODFDO_TESTING" not in os.environ:
        return

    assert len(document.body.get_annotations(creator="Luis")) == 1


if __name__ == "__main__":
    main()

Accessibility check on a document

Basic Accessibility test: check, for every picture in a document, if there is:

  • a title (svg_title),
  • a description (svg_description)

or, at least, some caption text.

See test file planes.odt file and the result of the script.

recipes/accessibility_check_on_a_document.py
#!/usr/bin/env python
"""Basic Accessibility test: check, for every picture in a document, if
there is:

  - a title (svg_title),
  - a description (svg_description)

or, at least, some caption text.

See test file `planes.odt` file and the result of the script.
"""

# Expected result on stdout:
# The document displays 3 pictures:
#  - pictures with a title: 2
#  - pictures with a description: 1
#  - pictures with a caption: 0

# Image: 100000000000013B000000D345859F604DCE636A.jpg
#   Name: graphics2, Title: Spitfire, general view, Description:Green spitfire in a hall, view from left front., Caption:None
# Image: 100000000000013B000000D3F908DA0A939D2F4B.jpg
#   Name: graphics3, Title: Spitfire, detail, Description:None, Caption:None
# Image: 100000000000013B000000D375CEBFD6D7CB7CE9.jpg
#   Name: graphics1, Title: None, Description:None, Caption:None

import os
import sys
from pathlib import Path
from typing import Any

from odfdo import Document

_DOC_SEQUENCE = 200
DATA = Path(__file__).parent / "data"
SOURCE = "planes.odt"


def read_source_document() -> Document:
    """Return the source Document."""
    try:
        source = sys.argv[1]
    except IndexError:
        source = DATA / SOURCE
    return Document(source)


def accessibility_evaluator(document: Document) -> dict[str, Any]:
    """Count for each images: titles, caption,description."""
    result: dict[str, Any] = {
        "images": [],
        "titles": 0,
        "descriptions": 0,
        "captions": 0,
    }

    # We want the images of the document.
    body = document.body
    images = body.images

    for image in images:
        uri = image.url
        filename = uri.rpartition("/")[2]
        frame = image.parent
        name = frame.name
        title = frame.svg_title
        description = frame.svg_description
        link = frame.parent
        # this part requires some ODF know how:
        caption = None
        if link.tag == "draw:a":
            caption = link.get_attribute("office:name")

        result["images"].append(
            f"Image: {filename}\n"
            f"  Name: {name}, Title: {title}, "
            f"Description:{description}, Caption:{caption}"
        )
        if title:
            result["titles"] += 1
        if description:
            result["descriptions"] += 1
        if caption:
            result["captions"] += 1

    return result


def display_accessibilty(stats: dict[str, Any]) -> None:
    """Print the stats on stdout."""
    print(f"The document displays {len(stats['images'])} pictures:")
    print(f" - pictures with a title: {stats['titles']}")
    print(f" - pictures with a description: {stats['descriptions']}")
    print(f" - pictures with a caption: {stats['captions']}")
    print()
    for content in stats["images"]:
        print(content)


def main() -> None:
    document = read_source_document()
    stats = accessibility_evaluator(document)
    display_accessibilty(stats)
    test_unit(stats)


def test_unit(stats: dict[str, Any]) -> None:
    # only for test suite:
    if "ODFDO_TESTING" not in os.environ:
        return

    assert len(stats["images"]) == 3
    assert stats["titles"] == 2
    assert stats["descriptions"] == 1
    assert stats["captions"] == 0


if __name__ == "__main__":
    main()

Add logo on presentation

Insert an image (e.g. the logo of an event, organization or a Creative Commons attribution) with size x,y at position x2,y2 on a number of slides in a presentation slide deck.

recipes/add_logo_on_presentation.py
#!/usr/bin/env python
"""Insert an image (e.g. the logo of an event, organization or a Creative Commons
attribution) with size `x,y` at position `x2,y2` on a number of slides in a
presentation slide deck.
"""

import os
import sys
from pathlib import Path

# reading image size requires a graphic library
from PIL import Image

from odfdo import Document, Frame

_DOC_SEQUENCE = 250
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "add_logo"
TARGET = "presentation.odp"
DATA = Path(__file__).parent / "data"
SOURCE = "presentation_wo_logo.odp"
LOGO = DATA / "newlogo.png"


def read_source_document() -> Document:
    """Return the source Document."""
    try:
        source = sys.argv[1]
    except IndexError:
        source = DATA / SOURCE
    return Document(source)


def save_new(document: Document, name: str) -> None:
    """Save a recipe result Document."""
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    new_path = OUTPUT_DIR / name
    print("Saving:", new_path)
    document.save(new_path, pretty=True)


def make_image_size(path: Path, size: float) -> tuple[str, str]:
    """Returns the display size (width, height) from the image path and the
    largest dimension."""
    width, height = Image.open(path).size
    ratio = max(width / size, height / size)
    return (f"{width / ratio:.2f}cm", f"{height / ratio:.2f}cm")


def add_logo(presentation: Document) -> None:
    """Add an image on a presentation."""
    image_position = ("1.50cm", "1.50cm")
    svg_title = "New Logo"
    svg_description = "The new logo with blue background"

    image_size = make_image_size(LOGO, 4.0)
    presentation_body = presentation.body
    uri = presentation.add_file(LOGO)

    for slide in presentation_body.get_draw_pages():
        # Create a frame for the image
        image_frame = Frame.image_frame(
            image=uri,
            text="",  # Text over the image object
            size=image_size,  # Display size of image
            anchor_type="page",
            page_number=None,
            position=image_position,
            style=None,
        )
        image_frame.svg_title = svg_title
        image_frame.svg_description = svg_description
        slide.append(image_frame)


def main() -> None:
    document = read_source_document()
    add_logo(document)
    test_unit(document)
    save_new(document, TARGET)


def test_unit(document: Document) -> None:
    # only for test suite:
    if "ODFDO_TESTING" not in os.environ:
        return

    slides = document.body.get_draw_pages()
    assert len(slides) == 11
    for slide in slides:
        assert len(slide.get_images()) == 1


if __name__ == "__main__":
    main()

Get pictures from document odt

Get all the pictures embeded in an .odt file.

recipes/get_pictures_from_document_odt.py
#!/usr/bin/env python
"""Get all the pictures embeded in an .odt file.
"""
import sys
from pathlib import Path
from pprint import pformat

from odfdo import Document

_DOC_SEQUENCE = 260
DATA = Path(__file__).parent / "data"
# ODF export of Wikipedia article Hitchhiker's Guide to the Galaxy (CC-By-SA)
# Remark: the document is badly made: the pictures are not displayed in the
# text, but are sill inside the document !
SOURCE = "collection.odt"
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "found_pics"


def read_source_document():
    try:
        source = sys.argv[1]
    except IndexError:
        source = DATA / SOURCE
    return Document(source)


def main():
    doc = read_source_document()
    # show the list the content of the document parts
    parts = doc.parts
    print("Parts:")
    print(pformat(parts))
    print()

    # We want the images of the document.
    body = doc.body
    found_pics = body.images
    print("Pics :")
    print(pformat(found_pics))
    print()

    # we use the get_part function from odfdo to get the actual content
    # of the image, to copy the images out of the .odt file:
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    for pic in found_pics:
        # where is the image actual content in the file:
        url = pic.url
        image_content = doc.get_part(url)
        origin_path = Path(url)
        destination_path = OUTPUT_DIR / origin_path.name
        destination_path.write_bytes(image_content)

    print(f"Files in {OUTPUT_DIR}:")
    for file in OUTPUT_DIR.glob("*"):
        print(file.name)


if __name__ == "__main__":
    main()

Change image in many documents

Change an image in many ODF files.

This recipe is suitable for the scenario where an organization is moving from one company logo to another and needs to replace the logo in several hundred existing documents.

recipes/change_image_in_many_documents.py
#!/usr/bin/env python
"""Change an image in many ODF files.

This recipe is suitable for the scenario where an organization
is moving from one company logo to another and needs to replace
the logo in several hundred existing documents.
"""

import os
from hashlib import sha256
from pathlib import Path

from odfdo import Document

_DOC_SEQUENCE = 270
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "new_logo"
DATA = Path(__file__).parent / "data"
OLD_PRESENTATIONS = DATA / "old_presentations"
OLD_LOGO = OLD_PRESENTATIONS / "oldlogo.png"
NEW_LOGO = DATA / "newlogo.png"


def save_modified(document: Document) -> None:
    """Save a modified Document."""
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    name = Path(document.path).name
    new_path = OUTPUT_DIR / name
    print("Saving:", new_path)
    document.save(new_path)


def footprint(content: bytes) -> str:
    """Return sha256 digest of a bytes content."""
    return sha256(content).hexdigest()


def update_document_logo(
    path: Path,
    old_hash: str,
    new_content: bytes,
    stats: dict[str, int],
) -> None:
    """Update the logo inside a Document."""
    stats["files"] += 1
    if not path.suffix.lower().startswith(".od"):
        return
    try:
        document = Document(path)
    except Exception:
        return

    stats["odf_files"] += 1
    document_changed = False
    for image in document.body.images:
        image_url = image.url
        if not image_url:
            continue
        try:
            image_content = document.get_part(image_url)
        except KeyError:
            print("- not found inside document:", path, end=" ")
            print("  image URL:", image_url)
            continue
        if footprint(image_content) == old_hash:
            document.set_part(image_url, new_content)
            document_changed = True
    if document_changed:
        save_modified(document)
        stats["updated_files"] += 1


def update_logos() -> dict[str, int]:
    """Update logo image in all documents."""
    result: dict[str, int] = {
        "files": 0,
        "odf_files": 0,
        "updated_files": 0,
    }
    old_hash = footprint(OLD_LOGO.read_bytes())

    # making the new image content :
    buffer = Document("text")
    url = buffer.add_file(str(NEW_LOGO))
    new_content = buffer.get_part(url)

    for path in OLD_PRESENTATIONS.glob("**/*"):
        update_document_logo(path, old_hash, new_content, result)
    return result


def main() -> None:
    stats = update_logos()
    print(f"Files: {stats['files']}")
    print(f"ODF files: {stats['odf_files']}")
    print(f"Updated files: {stats['updated_files']}")
    test_unit(stats)


def test_unit(stats: dict[str, int]) -> None:
    # only for test suite:
    if "ODFDO_TESTING" not in os.environ:
        return

    assert (stats["files"]) == 3
    assert (stats["odf_files"]) == 2
    assert (stats["updated_files"]) == 2


if __name__ == "__main__":
    main()

Concatenate presentations

Concatenate several presentations (including presentations found in sub directories), possibly merge styles and images. Result for style may vary.

recipes/concatenate_presentations.py
#!/usr/bin/env python
"""Concatenate several presentations (including presentations found in sub
directories), possibly merge styles and images. Result for style may vary.
"""

import os
from pathlib import Path

from odfdo import Document

_DOC_SEQUENCE = 280
DATA = Path(__file__).parent / "data"
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "concatenate"
TARGET = "presentation.odp"


def save_new(document: Document, name: str) -> None:
    """Save a recipe result Document."""
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    new_path = OUTPUT_DIR / name
    print("Saving:", new_path)
    document.save(new_path, pretty=True)


def concatenate_presentations(path: Path) -> Document:
    """Return a presentation containing a copy of all presentations in path."""
    concat_presentation = Document("presentation")
    concat_presentation.body.clear()
    concat_presentation.delete_styles()

    count = 0
    for presentation_path in path.glob("**/*.odp"):
        count += 1
        add_presentation(concat_presentation, presentation_path)

    nb_slides = len(concat_presentation.body.get_draw_pages())
    print(f"{count} presentations concatenated, {nb_slides} slides.")

    return concat_presentation


def add_presentation(concat_presentation: Document, path: Path) -> None:
    """Using odfdo to open .odp document and copy content and styles."""
    try:
        document = Document(path)
    except Exception:
        return
    concat_presentation.merge_styles_from(document)
    # add all slides
    dest_body = concat_presentation.body
    dest_manifest = concat_presentation.manifest
    manifest = document.manifest
    slides = document.body.get_draw_pages()
    print(f"- {path.name} has {len(slides)} slides")
    for slide in slides:
        slide = slide.clone
        # dont forget images:
        for image in slide.images:
            uri = image.url
            media_type = manifest.get_media_type(uri)
            dest_manifest.add_full_path(uri, media_type)
            concat_presentation.set_part(uri, document.get_part(uri))
        # append slide, expecting nothing good about its final style
        dest_body.append(slide)


def main() -> None:
    document = concatenate_presentations(DATA)
    test_unit(document)
    save_new(document, TARGET)


def test_unit(document: Document) -> None:
    # only for test suite:
    if "ODFDO_TESTING" not in os.environ:
        return

    assert len(document.body.get_draw_pages()) == 38


if __name__ == "__main__":
    main()

Make a presentation from pictures of a text document

Open a .odt file with pictures in it, find and analyse all the images, create a new .odp presentation, display all the pictures in the presentation, one image per frame.

recipes/make_a_presentation_from_pictures_of_a_text_document.py
#!/usr/bin/env python
"""Open a .odt file with pictures in it, find and analyse all the images,
create a new .odp presentation, display all the pictures in the presentation,
one image per frame.
"""
import os
from pathlib import Path
from tempfile import mkstemp

# analyzing embedded image need Pillow library
from PIL import Image

from odfdo import Document, DrawPage, Frame

_DOC_SEQUENCE = 285
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "presentation_images_in_odt"
TARGET = "presentation.odp"
DATA = Path(__file__).parent / "data"
SOURCE = DATA / "collection.odt"


def embedded_image_ratio(href, part):
    image_suffix = "." + href.split(".")[-1]
    fd, tmp_file = mkstemp(suffix=image_suffix)
    tmp_file_handler = os.fdopen(fd, "wb")
    tmp_file_handler.write(part)
    tmp_file_handler.close()
    width, height = Image.open(tmp_file).size
    os.unlink(tmp_file)
    print(f"image {href} , size : {width}x{height}")
    ratio = 1.0 * width / height
    return ratio


def save_new(document: Document, name: str):
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    new_path = OUTPUT_DIR / name
    print("Saving:", new_path)
    document.save(new_path, pretty=True)


def main():
    document = generate_document()
    save_new(document, TARGET)

    _expected_result = """
    image Pictures/12918371211855030272.jpe , size : 333x386
    image Pictures/12918371212102410240.jpe , size : 200x350
    image Pictures/12918371212184750080.jpe , size : 384x552
    image Pictures/12918371212196450304.jpe , size : 373x576
    image Pictures/12918371212450449408.jpe , size : 400x596
    image Pictures/12918371212536940544.jpe , size : 800x1195
    image Pictures/12918371212580190208.jpe , size : 561x282
    image Pictures/12918371212597118976.jpe , size : 660x515
    image Pictures/12918371212741570560.jpe , size : 328x504
    """


def generate_document():
    # Open the input document
    # doc_source = Document_extend(filename)
    doc_source = Document(SOURCE)

    # Making of the output Presentation document :
    presentation = Document("presentation")

    # Presentation got a body in which elements are stored
    presentation_body = presentation.body
    presentation_body.clear()
    presentation_manifest = presentation.manifest

    # For each image, we create a page in the presentation and display the image
    # and some text on this frame
    # First, get all image elements available in document:
    images_source = doc_source.body.images
    manifest_source = doc_source.manifest

    for image in images_source:
        # we use the get_part function from odfdo to get the actual content
        # of the images, with the URI link to the image as argument
        uri = image.url
        # weight = len(doc_source.get_part(uri))  # only for info
        # print "image %s , size in bytes: %s" % (uri, weight)
        part = doc_source.get_part(uri)  # actual image content
        name = uri.split("/")[-1]  # lets make a file name for image

        # Compute the display size of the image on the final page
        ratio = embedded_image_ratio(uri, part)
        max_border = 16.0  # max size of the greatest border, in cm
        a = max_border * ratio
        b = max_border
        if ratio > 1.0:
            a /= ratio
            b /= ratio

        # Create an underlying page for the image and the text
        page = DrawPage("page " + name)

        # Create a frame for the image
        image_frame = Frame.image_frame(
            image=uri,
            text="",  # Text over the image object
            size=(f"{a}cm", f"{b}cm"),  # Display size of image
            anchor_type="page",
            page_number=None,
            position=("3.5cm", "3.5 cm"),
            style=None,
        )

        # Add some text object somehere on the frame, with a text frame
        legend = f"Image {name} from Wikipedia document / {SOURCE.name}"
        text_frame = Frame.text_frame(
            legend,
            size=("26cm", "2cm"),
            position=("0.5cm", "0.5cm"),
            style="Standard",
            text_style="Standard",
        )

        # Append all the component, do not forget to add the actuel image file
        # into the Picture global directory of the presentation file with set_part
        page.append(text_frame)
        page.append(image_frame)
        presentation_body.append(page)
        # for the same operation from a local filesystem image, just use:
        # presentation_output.add_file(uri)
        media_type = manifest_source.get_media_type(uri)
        presentation_manifest.add_full_path(uri, media_type)
        presentation.set_part(uri, doc_source.get_part(uri))

    return presentation


if __name__ == "__main__":
    main()

Make presentation from images

Create a presentation from a some images in a given directory, where each image is put on the center of its own page scaled to either the maximum available size, prefered maximum size, or cover the full page and lose some info.

recipes/make_presentation_from_images.py
#!/usr/bin/env python
"""Create a presentation from a some images in a given directory,
where each image is put on the center of its own page scaled to either
the maximum available size, prefered maximum size, or cover the full
page and lose some info.
"""
from pathlib import Path

# analyzing embedded image need Pillow library
from PIL import Image

from odfdo import Document, DrawPage, Frame

_DOC_SEQUENCE = 286
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "presentation_from_images"
TARGET = "presentation.odp"
IMAGES = Path(__file__).parent / "data" / "images"
MAX_SIZE = 15.0  # feel free to customize
CROP_SIZE = False  # feel free to customize

# Size (in cm) of a slide : (default page-layout)
SLIDE_W, SLIDE_H = 28.0, 21.0  # 4/3 screen
# FIXME: this is the default page-layout.
# - Changing the style of the page-layout by program is not done in this script
# - an other way, merging with external page-layout/master-page requires
#   extra files, out of the scope for this script.


def save_new(document: Document, name: str):
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    new_path = OUTPUT_DIR / name
    print("Saving:", new_path)
    document.save(new_path)


def main():
    presentation = make_presentation()
    if presentation is None:
        print("Something went wrong.")
        exit(0)
    save_new(presentation, TARGET)


def make_presentation():
    # Collecting images
    images_pool = collect_images()
    if not images_pool:  # unable to find images
        print("No image found !")
        return None

    # Creation of the output Presentation document :
    # presentation = Document_from_type('presentation')  # 092
    presentation = Document("presentation")

    # Presentation got a body in which content is stored
    presentation_body = presentation.body
    presentation_body.clear()

    # For each image, we create a page in the presentation and display the image
    # and some text on this frame
    for image in images_pool:
        # add the file to the document
        uri = presentation.add_file(str(image.path))

        # Create an underlying page for the image and the text
        page = DrawPage("Page " + image.path.name)

        # Create a frame for the image
        image_frame = Frame.image_frame(
            image=uri,
            name=image.path.name,
            text="",  # Text over the image object
            size=(image.disp_w, image.disp_h),  # Display size of image
            anchor_type="page",
            page_number=None,
            position=(image.pos_x, image.pos_y),
            style=None,
        )

        # Append all the component
        page.append(image_frame)
        presentation_body.append(page)

    return presentation


# Principle :
# - original image are left unmodified by the script
# - only the size they should appear is computed
# - later, the display engine (say LibreOffice) will merge this display
#   information with other informations, like the size of the page
#   (page-layout) and should act like a mask against the "big" croped image.
class ImageInfo:
    def __init__(self, path: Path):
        self.path = path
        self.size = None
        self.disp_w = self.disp_h = None
        self.pos_x = self.pos_y = None

    def adjust(self):
        try:
            self.size = Image.open(self.path).size
        except OSError:
            # Not an image ?
            self.size = None
            return
        width, height = self.size
        if MAX_SIZE:
            ratio = max(width / MAX_SIZE, height / MAX_SIZE)
            display_w = width / ratio
            display_h = height / ratio
        elif CROP_SIZE:
            ratio = min(width / SLIDE_W, height / SLIDE_H)
            display_w = width / ratio
            display_h = height / ratio
        else:
            ratio = max(width / SLIDE_W, height / SLIDE_H)
            display_w = width / ratio
            display_h = height / ratio
        self.disp_w = f"{display_w:2f}cm"
        self.disp_h = f"{display_h:2f}cm"
        self.pos_x = f"{(SLIDE_W - display_w) / 2:2f}cm"
        self.pos_y = f"{(SLIDE_H - display_h) / 2:2f}cm"
        print(self.path.name, self.disp_w, self.disp_h)


def collect_images():
    pool = []
    for path in IMAGES.glob("**/*"):
        if not path.is_file():
            continue
        image_info = ImageInfo(path)
        image_info.adjust()
        if image_info.size:
            pool.append(image_info)
    return pool


if __name__ == "__main__":
    main()

Make a presentation from text with different styles

Each line of the text becomes a slide of the presentation, we change of style depending on the length of text line.

recipes/make_a_presentation_from_text_with_different_styles.py
#!/usr/bin/env python
"""Each line of the text becomes a slide of the presentation, we change of style
depending on the length of text line.
"""

import os
from pathlib import Path

from odfdo import Document, DrawPage, Frame, Style

_DOC_SEQUENCE = 287
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "styled_prez"
TARGET = "presentation.odp"

CONTENT = """123
azertyuiop
azertyuiop azertyuiop
azertyuiop azertyuiop azertyuiop
azertyuiop azertyuiop azertyuiop azertyuiop
azertyuiop azertyuiop azertyuiop azertyuiop azertyuiop
azertyuiop azertyuiop azertyuiop azertyuiop azertyuiop azertyuiop
azertyuiop azertyuiop azertyuiop azertyuiop azertyuiop azertyuiop azertyuiop
azertyuiop azertyuiop azertyuiop azertyuiop azertyuiop azertyuiop azertyuiop azertyuiop
azertyuiop azertyuiop azertyuiop azertyuiop azertyuiop azertyuiop azertyuiop azertyuiop azertyuiop
end.
""".splitlines()


def save_new(document: Document, name: str) -> None:
    """Save a recipe result Document."""
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    new_path = OUTPUT_DIR / name
    print("Saving:", new_path)
    document.save(new_path, pretty=True)


def create_style() -> Style:
    """Creating a smooth style for the graphic item."""
    base_style = Style(
        "graphic",
        name="Gloup48",
        parent="standard",
        stroke="none",
        fill_color="#b3b3b3",
        textarea_vertical_align="middle",
        padding_top="1cm",
        padding_bottom="1cm",
        padding_left="1cm",
        padding_right="1cm",
        line_distance="0cm",
        guide_overhang="0cm",
        guide_distance="0cm",
    )
    base_style.set_properties(area="paragraph", align="center")
    base_style.set_properties(
        area="text",
        color="#dd0000",
        text_outline="false",
        font="Liberation Sans",
        font_family="Liberation Sans",  # compatibility
        font_style_name="Bold",
        family_generic="swiss",
        size="48pt",
        weight="bold",
    )
    return base_style


def generate_document() -> Document:
    """Generate a Presentation Document with different styles."""
    presentation = Document("presentation")
    body = presentation.body
    body.clear()

    base_style = create_style()
    presentation.insert_style(base_style)

    # Making o lot of variations
    variants = [10, 11, 14, 16, 20, 24, 32, 40, 44]
    text_size = [95, 80, 65, 50, 40, 30, 20, 10, 5]
    for size in variants:
        variant_style = base_style.clone
        variant_style.set_attribute("style:name", f"Gloup{size}")
        variant_style.set_properties(area="text", size=f"{size}pt")
        presentation.insert_style(variant_style)

    for count, blurb in enumerate(CONTENT):
        text = blurb
        name = f"{count + 1} - {text[:10]}"
        page = DrawPage(name)
        # choosing some style:
        size = 48
        for index, max_size in enumerate(text_size):
            if len(text) > max_size:
                size = variants[index]
                break

        text_frame = Frame.text_frame(
            text,
            size=("24cm", "2cm"),
            position=("2cm", "8cm"),
            style=f"Gloup{size}",
            text_style=f"Gloup{size}",
        )

        page.append(text_frame)
        body.append(page)

    return presentation


def main() -> None:
    document = generate_document()
    test_unit(document)
    save_new(document, TARGET)


def test_unit(document: Document) -> None:
    # only for test suite:
    if "ODFDO_TESTING" not in os.environ:
        return

    body = document.body
    count = len([item for item in body.children if isinstance(item, DrawPage)])
    assert count == len(CONTENT)
    first_page = body.children[0]
    assert str(first_page).strip() == CONTENT[0].strip()
    last_page = body.children[-1]
    assert str(last_page).strip() == CONTENT[-1].strip()


if __name__ == "__main__":
    main()

Extract and reorder slides

Create a new presentation from a previous one by extracting some slides, in a different order.

recipes/extract_and_reorder_slides.py
#!/usr/bin/env python
"""Create a new presentation from a previous one by extracting some slides,
in a different order.
"""
from pathlib import Path

from odfdo import Document

_DOC_SEQUENCE = 290
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "presentation_extracted"
TARGET = "presentation.odp"
DATA = Path(__file__).parent / "data"
SOURCE = DATA / "presentation_base.odp"


def save_new(document: Document, name: str):
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    new_path = OUTPUT_DIR / name
    print("Saving:", new_path)
    document.save(new_path, pretty=True)


def main():
    new_order = (3, 5, 2, 2)
    presentation_base = Document(SOURCE)
    extracted = Document("presentation")

    # Important, copy styles too:
    extracted.delete_styles()
    extracted.merge_styles_from(presentation_base)
    extracted.body.clear()

    for index in new_order:
        try:
            slide_position = index - 1
            slide = presentation_base.body.get_draw_page(position=slide_position)
        except Exception:  # noqa: S112
            continue
        if slide is None:
            continue

        slide = slide.clone
        extracted.body.append(slide)

    save_new(extracted, TARGET)


if __name__ == "__main__":
    main()

Change values of a chart inside a document

Open a text document with an embedded chart and change some values.

recipes/change_values_of_a_chart_inside_a_document.py
#!/usr/bin/env python
"""Open a text document with an embedded chart and change some values."""

import os
import sys
from pathlib import Path

# for cell style
from odfdo import Document

_DOC_SEQUENCE = 295
DATA = Path(__file__).parent / "data"
SOURCE = "chart.odt"
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "modified_chart"
TARGET = "modified_chart.odt"


def read_source_document() -> Document:
    """Return the source Document."""
    try:
        source = sys.argv[1]
    except IndexError:
        source = DATA / SOURCE
    return Document(source)


def save_new(document: Document, name: str) -> None:
    """Save a recipe result Document."""
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    new_path = OUTPUT_DIR / name
    print("Saving:", new_path)
    document.save(new_path, pretty=True)


def change(document: Document) -> None:
    """Change some values in the embedded chart table."""
    # list the parts if needed
    print(document.parts)
    # -> ['mimetype', 'ObjectReplacements/Object 1', 'Object 1/meta.xml', 'Object 1/styles.xml', 'Object 1/content.xml', ...

    part = document.get_part("Object 1/content.xml")
    body = part.body
    table = body.get_table(0)

    # if needed, get the values:
    values = table.get_values()
    print(values)
    # -> [
    #     [None, "", "Column 2", "Column 3"],
    #     ["Row 1", Decimal("NaN"), 10, 20],
    #     ["Row 2", Decimal("NaN"), 30, 40],
    #     ["Row 3", Decimal("NaN"), 50, 360],
    #     ["Row 4", Decimal("NaN"), Decimal("9.02"), Decimal("6.2")],
    # ]

    # change some values
    table.set_value("A2", "label changed")
    table.set_value("D3", 4000)
    table.set_value("D4", 4321)


def main() -> None:
    document = read_source_document()
    change(document)
    test_unit(document)
    save_new(document, TARGET)


def test_unit(document: Document) -> None:
    # only for test suite:
    if "ODFDO_TESTING" not in os.environ:
        return

    part = document.get_part("Object 1/content.xml")
    table = part.body.get_table(0)
    assert table.get_value("A3") == "Row 2"
    assert table.get_value("A2") == "label changed"
    assert table.get_value("D3") == 4000
    assert table.get_value("D4") == 4321


if __name__ == "__main__":
    main()

Add text span styles

Transform a not styled document into a multi styled document, by changing size and color of each parts of words.

recipes/add_text_span_styles.py
#!/usr/bin/env python
"""Transform a not styled document into a multi styled document,
by changing size and color of each parts of words.
"""

import os
import sys
from itertools import chain
from pathlib import Path

from odfdo import Document, Style

_DOC_SEQUENCE = 300
DATA = Path(__file__).parent / "data"
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "styled3"
SOURCE = "dormeur_notstyled.odt"
TARGET = "dormeur_styled.odt"
RANDOM_SEED = 1234


def read_source_document() -> Document:
    """Return the source Document."""
    try:
        source = sys.argv[1]
    except IndexError:
        source = DATA / SOURCE
    return Document(source)


def save_new(document: Document, name: str) -> None:
    """Save a recipe result Document."""
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    new_path = OUTPUT_DIR / name
    print("Saving:", new_path)
    document.save(new_path, pretty=True)


class SimpleRandom:
    """Q&D reproductible random generator for tests."""

    MODULUS = 2**31 - 1
    MAXI = 2**31 - 2

    def __init__(self) -> None:
        self.current = 16807

    def _next_number(self) -> None:
        self.current = (16807 * self.current) % self.MODULUS

    def set_seed(self, seed: int = 16807) -> None:
        self.current = seed

    def randint(self, max_value: int) -> int:
        self._next_number()
        return int(self.current * max_value / self.MAXI + 1)


def color_hex(r: int, g: int, b: int) -> str:
    """Convert red, green, blue values to #rgb string."""
    return f"#{r:02X}{g:02X}{b:02X}"


def style_name_index(index: int) -> str:
    """Generate a style_name."""
    return f"rnd_{index}"


def generate_random_styles(document: Document, rnd: SimpleRandom) -> None:
    """Generate 64 random styles."""
    for index in range(1, 64):
        style = Style(
            "text",
            name=style_name_index(index),
            color=color_hex(rnd.randint(256), rnd.randint(256), rnd.randint(256)),
            size=f"{8 + index / 5}",
        )
        document.insert_style(style)


def add_styles(document: Document) -> None:
    """Change randomly size and color of words."""
    rnd = SimpleRandom()
    body = document.body

    generate_random_styles(document, rnd)

    words = sorted(set(str(body).split()))
    for word in words:
        style_name = style_name_index(rnd.randint(64))
        for paragraph in chain(body.paragraphs, body.headers):
            # apply style to each text matching with the regex of some word
            paragraph.set_span(style_name, regex=word)


def main():
    document = read_source_document()
    add_styles(document)
    test_unit(document)
    save_new(document, TARGET)


def test_unit(document: Document) -> None:
    # only for test suite:
    if "ODFDO_TESTING" not in os.environ:
        return

    assert len(document.body.spans) == 157


if __name__ == "__main__":
    main()

How to copy some style from another document

Minimal example of copy of a style from another document.

recipes/how_to_copy_some_style_from_another_document.py
"""Minimal example of copy of a style from another document.
"""

from odfdo import Document, Style

_DOC_SEQUENCE = 310


def main():
    document = Document("text")
    body = document.body
    body.clear()

    # Let's imagine the sample_styles.odt document contains an interesting style.
    #
    # So let's first fetch the style:
    try:
        odfdo_styles = Document("sample_styles.odt")
        highlight = odfdo_styles.get_style("text", display_name="Yellow Highlight")
    except Exception:
        # let's create some *very simple* text style.
        highlight = Style(
            "text", display_name="Yellow Highlight", color="blue", italic=True
        )

    # We made some assumptions here:
    #
    # 'text'              : The family of the style, text styles apply on
    #                       individual characters.
    # ”Yellow Highlight”  : The name of the style as we see it in a desktop
    #                       application.
    # display_name        : Styles have an internal name (“Yellow_20_Highlight”
    #                       in this example) but we gave the display_name
    #                       instead.
    #
    # We hopefully have a style object that we add to our own collection:
    document.insert_style(highlight, automatic=True)


if __name__ == "__main__":
    main()

Copy style from another document

Copy the styles from an existing document.

For more advanced version, see the odfdo-style script.

recipes/copy_style_from_another_document.py
#!/usr/bin/env python
"""Copy the styles from an existing document.

For more advanced version, see the odfdo-style script.
"""

import os
import sys
from pathlib import Path

from odfdo import Document

_DOC_SEQUENCE = 320
DATA = Path(__file__).parent / "data"
SOURCE = "collection2.odt"
# copied here from the odfdo package:
STYLE_SOURCE = DATA / "lpod_styles.odt"
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "styled1"
TARGET = "document.odt"


def read_source_document() -> Document:
    """Return the source Document."""
    try:
        source = sys.argv[1]
    except IndexError:
        source = DATA / SOURCE
    return Document(source)


def save_new(document: Document, name: str) -> None:
    """Save a recipe result Document."""
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    new_path = OUTPUT_DIR / name
    print("Saving:", new_path)
    document.save(new_path, pretty=True)


def merge_styles(document: Document) -> None:
    # We want to change the styles of collection2.odt,
    # we know the odfdo_styles.odt document contains an interesting style,
    # So let's first fetch the style:
    style_document = Document(STYLE_SOURCE)

    # We could change only some styles, but here we want a clean basis:
    document.delete_styles()

    # And now the actual style change:
    document.merge_styles_from(style_document)


def main() -> None:
    document = read_source_document()
    merge_styles(document)
    test_unit(document)
    save_new(document, TARGET)


def test_unit(document: Document) -> None:
    # only for test suite:
    if "ODFDO_TESTING" not in os.environ:
        return

    assert len(document.get_styles()) == 75


if __name__ == "__main__":
    main()

Create basic text styles

Create basic text styles.

recipes/create_basic_text_styles.py
#!/usr/bin/env python
"""Create basic text styles.
"""
import os
from pathlib import Path

from odfdo import Document, Header, Paragraph, Style

_DOC_SEQUENCE = 330
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "basic_styles"
TARGET = "document.odt"


def save_new(document: Document, name: str) -> None:
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    new_path = OUTPUT_DIR / name
    print("Saving:", new_path)
    document.save(new_path, pretty=True)


def _create_style_header_blue(document: Document) -> None:
    style = Style(
        family="paragraph",
        name="header_blue",
        display_name="header_blue",
        parent_style="Heading",
        area="text",
        bold=True,
        color="blue",
        size="160%",
    )
    style.set_attribute("style:default-outline-level", "1")
    document.insert_style(style)


def _create_style_header_navy(document: Document) -> None:
    style = Style(
        family="paragraph",
        name="header_navy",
        display_name="header_navy",
        parent_style="Heading",
        area="text",
        bold=True,
        color="navy",
        size="120%",
    )
    style.set_attribute("style:default-outline-level", "2")
    document.insert_style(style)


def _create_style_steel(document: Document) -> None:
    style = Style(
        family="paragraph",
        area="text",
        name="steel",
        display_name="steel",
        color="yellow",
        background_color="darkblue",
    )
    style.set_properties(
        area="graphic",
        properties={
            "draw:fill": "solid",
            "draw:fill-color": "darkblue",
        },
    )
    document.insert_style(style)


def _create_style_special(document: Document) -> None:
    style = Style(
        family="paragraph",
        area="text",
        name="special",
        display_name="special",
        font="Courier New",
        font_family="Courier New",
        font_style_name="Regular",
        font_pitch="fixed",
        background_color="AntiqueWhite",
    )
    style.set_properties(
        area="paragraph",
        properties={
            "fo:margin-left": "2cm",
            "fo:margin-right": "2cm",
            "fo:line-height": "150%",
            "fo:text-align": "center",
        },
    )
    document.insert_style(style)


def _create_style_bold_gold(document: Document) -> None:
    style = Style(
        family="text",
        name="bold_gold",
        display_name="bold_gold",
        bold=True,
        color="darkgoldenrod",
    )
    document.insert_style(style)


def _create_style_italic_lime(document: Document) -> None:
    style = Style(
        family="text",
        name="italic_lime",
        display_name="italic_lime",
        italic=True,
        size="120%",
        color="lime",
    )
    document.insert_style(style)


def add_styles(document: Document) -> None:
    _create_style_header_blue(document)
    _create_style_header_navy(document)
    _create_style_steel(document)
    _create_style_special(document)
    _create_style_bold_gold(document)
    _create_style_italic_lime(document)


def add_content(document: Document) -> None:
    body = document.body
    body.append(Header(1, "First level header", style="header_blue"))

    body.append(Header(2, "First sub header", style="header_navy"))
    para = Paragraph(
        "Lorem ipsum dolor sit amet, consectetuer "
        "adipiscing elit. Sed non risus. "
        "Suspendisse lectus tortor, dignissim sit amet, "
        "adipiscing nec, ultricies sed, dolor."
    )
    para.set_span("bold_gold", regex="dolor")
    para.set_span("italic_lime", regex=r"\w+ing")
    body.append(para)

    body.append(Header(2, "Second sub header", style="header_navy"))
    para = Paragraph(
        "Cras elementum ultrices diam. Maecenas ligula massa, "
        "varius a, semper congue, euismod non, mi. Proin porttitor, "
        "orci nec nonummy molestie, enim est eleifend mi, non "
        "fermentum diam nisl sit amet erat. Duis semper.",
        style="steel",
    )
    para.set_span("italic_lime", regex="semper")
    body.append(para)

    body.append(Header(2, "Third sub header", style="header_navy"))
    para = Paragraph(
        "Duis arcu massa, scelerisque vitae, consequat in, pretium a, "
        "enim. Pellentesque congue. Ut in risus volutpat libero "
        "pharetra tempor. Cras vestibulum bibendum augue. Praesent "
        "egestas leo in pede. Praesent blandit odio eu enim. "
        "Pellentesque sed dui ut augue blandit sodales.",
        style="special",
    )
    body.append(para)


def create_document() -> Document:
    document = Document()
    body = document.body
    body.clear()
    add_styles(document)
    add_content(document)
    return document


def main() -> None:
    document = create_document()
    test_unit(document)
    save_new(document, TARGET)


def test_unit(document: Document) -> None:
    # only for test suite:
    if "ODFDO_TESTING" not in os.environ:
        return
    style1 = document.get_style("paragraph", "header_blue").serialize()
    assert 'name="header_blue"' in style1
    assert 'color="#0000FF"' in style1
    assert 'font-weight="bold"' in style1
    assert 'font-size="160%"' in style1

    style2 = document.get_style("paragraph", "header_navy").serialize()
    assert 'name="header_navy"' in style2
    assert 'color="#000080"' in style2
    assert 'font-weight="bold"' in style2
    assert 'font-size="120%"' in style2

    style3 = document.get_style("paragraph", "steel").serialize()
    assert 'name="steel"' in style3
    assert 'color="#FFFF00"' in style3
    assert "graphic-properties" in style3
    assert 'draw:fill-color="#00008B"' in style3

    style4 = document.get_style("paragraph", "special").serialize()
    assert 'name="special"' in style4
    assert 'background-color="#FAEBD7"' in style4
    assert "Courier" in style4
    assert 'line-height="150%"' in style4
    assert 'margin-left="2cm"' in style4
    assert 'margin-right="2cm"' in style4
    assert 'text-align="center"' in style4

    style5 = document.get_style("text", "bold_gold").serialize()
    assert 'name="bold_gold"' in style5
    assert 'color="#B8860B"' in style5
    assert 'font-weight="bold"' in style5

    style6 = document.get_style("text", "italic_lime").serialize()
    assert 'name="italic_lime"' in style6
    assert 'color="#00FF00"' in style6
    assert 'font-style="italic"' in style6
    assert 'font-size="120%"' in style6


if __name__ == "__main__":
    main()

How to apply a style to a paragraph

Minimal example of how to add a styled paragraph to a document.

recipes/how_to_apply_a_style_to_a_paragraph.py
"""Minimal example of how to add a styled paragraph to a document.
"""

from odfdo import Document, Paragraph

_DOC_SEQUENCE = 335


def main():
    document = Document("text")
    body = document.body
    body.clear()

    # we knwo we have a style of name "highlight" :
    body.append(Paragraph("Highlighting the word", style="highlight"))


if __name__ == "__main__":
    main()

Change paragraph styles methods

Many examples of how to change paragraph (and in-paragraph) styles, either by changing the paragraph style itself or by using Span to select parts of the paragraph. Includes several ways to create or import styles.

recipes/change_paragraph_styles_methods.py
#!/usr/bin/env python
"""Many examples of how to change paragraph (and in-paragraph) styles, either
by changing the paragraph style itself or by using Span to select parts
of the paragraph. Includes several ways to create or import styles.
"""

import os
from collections.abc import Iterator
from itertools import cycle
from pathlib import Path

from odfdo import Document, Element, Header, Paragraph, Style

_DOC_SEQUENCE = 340
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "change_styles"
DATA = Path(__file__).parent / "data"
LOREM = (DATA / "lorem.txt").read_text(encoding="utf8")
STYLED_SOURCE = "lpod_styles.odt"
TARGET_BEFORE = "document_before.odt"
TARGET_AFTER = "document_after.odt"


def save_new(document: Document, name: str) -> None:
    """Save a recipe result Document."""
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    new_path = OUTPUT_DIR / name
    print("Saving:", new_path)
    document.save(new_path, pretty=True)


def iter_lorem() -> Iterator[str]:
    """Return infinite iterator on Lorem Ipsum content."""
    return cycle(lr.strip() for lr in LOREM.replace("\n", " ").split("."))


def make_base_document() -> Document:
    """Generate document from parts of lorem ipsum content."""
    document = Document("odt")
    body = document.body
    body.clear()
    # Add some content with headers
    lorem = iter_lorem()
    title1 = Header(1, next(lorem))
    body.append(title1)
    for _i in range(3):
        title = Header(2, next(lorem))
        body.append(title)
        for _j in range(5):
            body.append(Paragraph(next(lorem) + ". " + next(lorem) + "."))
    return document


def add_some_styles(document) -> None:
    """Add programmatically generated styles to the document."""
    # Always simpler to copy styles from an actual .odt existing file, but:
    document.insert_style(
        Style(
            family="paragraph",
            area="text",
            display_name="bold-blue",
            color="blue",
            bold=True,
        ),
        automatic=True,
    )
    document.insert_style(
        Style(
            family="paragraph",
            area="text",
            display_name="italic-red",
            color="red",
            bold=True,
            italic=True,
        ),
        automatic=True,
    )
    document.insert_style(
        Style(
            family="text",
            area="text",
            display_name="green",
            background_color="green",
        ),
        automatic=True,
    )
    document.insert_style(
        Style(
            family="text",
            area="text",
            display_name="bold-yellow-blue",
            color="yellow",
            background_color="blue",
            bold=True,
        ),
        automatic=True,
    )
    document.insert_style(
        Style(
            family="text",
            area="text",
            display_name="bold-white-black",
            color="white",
            background_color="black",
            bold=True,
        ),
        automatic=True,
    )
    document.insert_style(
        Style(
            family="text",
            area="text",
            display_name="italic-red-yellow",
            color="red",
            background_color="yellow",
            bold=True,
            italic=True,
        ),
        automatic=True,
    )


def add_style_from_xml(document: Document) -> None:
    """Add styles defined by XML content to the document."""
    # Styles can be defined by WML definition
    document.insert_style(
        Element.from_tag(
            '<style:style style:name="custom" '
            'style:display-name="custom" '
            'style:family="paragraph" '
            'style:parent-style-name="Text">'
            '<style:paragraph-properties fo:margin-left="2cm"/>'
            '<style:text-properties fo:color="#808080" loext:opacity="100%" '
            'fo:font-size="16pt" fo:font-style="normal" '
            'style:text-underline-style="solid" '
            'style:text-underline-width="auto" '
            'style:text-underline-color="font-color" '
            'fo:font-weight="bold"/>'
            "</style:style>"
        )
    )


def import_style_from_other_doc(document: Document) -> None:
    """Add styles imported from another document to the document."""
    styled_doc = Document(DATA / STYLED_SOURCE)
    highlight = styled_doc.get_style("text", display_name="Yellow Highlight")
    document.insert_style(highlight, automatic=True)


def apply_styles(document: Document) -> None:
    """Apply some style changes to the document."""

    def change_all_headers() -> None:
        style = document.get_style(family="text", display_name="green")
        # header styles should include some hints about he numeration level
        # So, here we just prefer to apply style with a span
        for header in document.body.headers:
            header.set_span(style.name, offset=0)

    def change_all_paragraphs() -> None:
        style = document.get_style(family="paragraph", display_name="bold-blue")
        for para in document.body.paragraphs:
            para.style = style.name

    def change_some_paragraph() -> None:
        style = document.get_style(family="paragraph", display_name="italic-red")
        document.body.get_paragraph(3).style = style.name
        document.body.get_paragraph(5).style = style.name
        document.body.get_paragraph(7).style = style.name

    def apply_span_regex() -> None:
        yellow = document.get_style(family="text", display_name="bold-yellow-blue")
        white = document.get_style(family="text", display_name="bold-white-black")
        for para in document.body.paragraphs:
            para.set_span(yellow.name, regex=r"tortor|ipsum")
            para.set_span(white.name, regex=r"A\w+")

    def apply_span_offset() -> None:
        red = document.get_style(family="text", display_name="italic-red-yellow")
        para = document.body.get_paragraph(2)
        para.set_span(red.name, offset=9, length=22)

    def apply_custom_style() -> None:
        para = document.body.get_paragraph(13)
        para.style = "custom"

    def apply_imported_style() -> None:
        para = document.body.get_paragraph(14)
        style = document.get_style(family="text", display_name="Yellow Highlight")
        # feature: to not highlight spaces, make as many Spans as required:
        for start, end in para.search_all(r"\w+"):
            length = end - start
            para.set_span(style.name, offset=start, length=length)

    change_all_headers()
    change_all_paragraphs()
    change_some_paragraph()
    apply_span_regex()
    apply_span_offset()
    apply_custom_style()
    apply_imported_style()


def main() -> None:
    document = make_base_document()
    save_new(document, TARGET_BEFORE)
    add_some_styles(document)
    add_style_from_xml(document)
    import_style_from_other_doc(document)
    apply_styles(document)
    test_unit(document)
    save_new(document, TARGET_AFTER)


def test_unit(document: Document) -> None:
    # only for test suite:
    if "ODFDO_TESTING" not in os.environ:
        return

    assert len(list(document.body.paragraphs)) == 15
    for display_name in (
        "bold-blue",
        "italic-red",
        "custom",
    ):
        style = document.get_style(family="paragraph", display_name=display_name)
        assert document.get_styled_elements(style.name)
    for display_name in (
        "green",
        "bold-yellow-blue",
        "bold-white-black",
        "Yellow Highlight",
    ):
        style = document.get_style(family="text", display_name=display_name)
        assert document.get_styled_elements(style.name)
    style = document.get_style(family="text", display_name="Yellow Highlight")
    assert len(document.get_styled_elements(style.name)) == 21


if __name__ == "__main__":
    main()

Delete parts of a text document

Idea comming from issue #49: Deleting content from one point to another in a .odt document.

recipes/delete_parts_of_a_text_document.py
#!/usr/bin/env python
"""Idea comming from issue #49:
Deleting content from one point to another in a .odt document.
"""

import os
from pathlib import Path

from odfdo import Document, Element, Header, Paragraph

_DOC_SEQUENCE = 400
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "delete_content"
TARGET_INITIAL = "document_initial.odt"
TARGET_FINAL = "document_final.odt"


class KeepingState:
    def __init__(self, initial: str):
        self.step = initial


def save_new(document: Document, name: str):
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    new_path = OUTPUT_DIR / name
    print("Saving:", new_path)
    document.save(new_path, pretty=True)


def create_base_document():
    document = Document("text")
    body = document.body
    body.clear()
    body.append(Header(1, "Some title"))
    body.append(Header(2, "part A"))
    body.append(
        Paragraph(
            "Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Sed non risus."
        )
    )
    body.append(
        Paragraph(
            "Suspendisse lectus tortor, dignissim sit amet, adipiscing "
            "nec, ultricies sed, dolor. Cras elementum ultrices diam. "
            "Maecenas ligula massa, varius a, semper congue, euismod non, mi."
        )
    )
    body.append(Header(2, "part B"))
    body.append(
        Paragraph(
            "Proin porttitor, orci nec nonummy molestie, enim est eleifend "
            "mi, non fermentum diam nisl sit amet erat. Duis semper. "
            "Duis arcu massa, scelerisque vitae, consequat in, pretium a, "
            "enim. Pellentesque congue. Ut in risus volutpat libero pharetra tempor."
        )
    )
    body.append(
        Paragraph(
            "Cras vestibulum bibendum augue. Praesent egestas leo in pede. "
            "Praesent blandit odio eu enim. Pellentesque sed dui ut augue "
            "blandit sodales. Vestibulum ante ipsum primis in faucibus orci "
            "luctus et ultrices posuere cubilia Curae; Aliquam nibh."
        )
    )

    body.append(Header(2, "part C"))
    body.append(
        Paragraph(
            "Mauris ac mauris sed pede pellentesque fermentum. "
            "Maecenas adipiscing ante non diam sodales hendrerit. Ut "
            "velit mauris, egestas sed, gravida nec, ornare ut, mi."
        )
    )
    body.append(
        Paragraph(
            "Aenean ut orci vel massa suscipit pulvinar. Nulla sollicitudin. "
            "Fusce varius, ligula non tempus aliquam, nunc turpis "
            "ullamcorper nibh, in tempus sapien eros vitae ligula. "
            "Pellentesque rhoncus nunc et augue. Integer id felis. Curabitur "
            "aliquet pellentesque diam. Integer quis metus vitae elit "
            "lobortis egestas."
        )
    )
    body.append(Header(2, "part D"))
    body.append(
        Paragraph(
            "Morbi vel erat non mauris convallis vehicula. Nulla et sapien. "
            "Integer tortor tellus, aliquam faucibus, convallis id, congue "
            "eu, quam. Mauris ullamcorper felis vitae erat."
            "Proin feugiat, augue non elementum posuere, metus purus "
            "iaculis lectus, et tristique ligula justo vitae magna. Aliquam "
            "convallis sollicitudin purus."
        )
    )
    body.append(
        Paragraph(
            "Praesent aliquam, enim at fermentum mollis, ligula massa "
            "adipiscing nisl, ac euismod nibh nisl eu lectus. Fusce "
            "vulputate sem at sapien. Vivamus leo. Aliquam euismod "
            "libero eu enim. Nulla nec felis sed leo placerat imperdiet."
        )
    )
    body.append(
        Paragraph(
            "Aenean suscipit nulla in justo. Suspendisse cursus rutrum augue. "
            "Nulla tincidunt tincidunt mi. Curabitur iaculis, lorem vel "
            "rhoncus faucibus, felis magna fermentum augue, et ultricies "
            "lacus lorem varius purus. Curabitur eu amet."
        )
    )
    return document


def keep_element(state: KeepingState, elem: Element) -> bool:
    # keep everythin until "part B"
    if state.step == "before":
        if isinstance(elem, Header) and "part B" in str(elem):
            state.step = "deleting"
    # delete everythin until paragraph strating with "Aenean"
    if state.step == "deleting":
        if isinstance(elem, Paragraph) and str(elem).startswith("Aenean"):
            state.step = "after"
    return state.step != "deleting"


def main():
    document = create_base_document()
    save_new(document, TARGET_INITIAL)
    # Removing part B and half the part C
    state = KeepingState("before")
    keep_list = []
    for elem in document.body.children:
        if keep_element(state, elem):
            keep_list.append(elem)
    document.body.clear()
    document.body.extend(keep_list)
    save_new(document, TARGET_FINAL)
    test_unit(document)


def test_unit(document: Document) -> None:
    # only for test suite:
    if "ODFDO_TESTING" not in os.environ:
        return
    text0 = str(document.body.get_paragraph(position=0))
    print(text0)
    assert text0.startswith("Lorem")
    text1 = str(document.body.get_paragraph(position=3))
    print(text1)
    assert text1.startswith("Morbi")


if __name__ == "__main__":
    main()

Create color chart in spreadsheet

Create some color chart in a spreadsheet using cells styles. (adapted from the odfdo library test cases)

recipes/create_color_chart_in_spreadsheet.py
#!/usr/bin/env python
"""Create some color chart in a spreadsheet using cells styles.
(adapted from the odfdo library test cases)
"""

from pathlib import Path

from odfdo import (
    Cell,
    Document,
    Row,
    Style,
    Table,
    __version__,
    create_table_cell_style,
    make_table_cell_border_string,
    rgb2hex,
)

_DOC_SEQUENCE = 420
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "chart"
TARGET = "color_chart.ods"


def save_new(document: Document, name: str):
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    new_path = OUTPUT_DIR / name
    print("Saving:", new_path)
    document.save(new_path)


def hello_messages():
    print("odfdo installation test")
    print(f" Version           : {__version__}")
    print()
    print(f"Generating color chart in {TARGET}")
    print("...")


def generate_chart():
    document = Document("spreadsheet")
    body = document.body
    body.clear()
    table = Table("chart")

    for y in range(0, 256, 8):
        row = Row()
        for x in range(0, 256, 32):
            cell_value = (x, y, (x + y) % 256)
            border_rl = make_table_cell_border_string(thick="0.20cm", color="white")
            border_bt = make_table_cell_border_string(
                thick="0.80cm",
                color="white",
            )
            style = create_table_cell_style(
                color="grey",
                background_color=cell_value,
                border_right=border_rl,
                border_left=border_rl,
                border_bottom=border_bt,
                border_top=border_bt,
            )
            name = document.insert_style(style=style, automatic=True)
            cell = Cell(value=rgb2hex(cell_value), style=name)
            row.append_cell(cell)
        table.append_row(row)

        row_style = Style("table-row", height="1.80cm")
        name_style_row = document.insert_style(style=row_style, automatic=True)
        for row in table.rows:
            row.style = name_style_row
            table.set_row(row.y, row)

        col_style = Style("table-column", width="3.6cm")
        name = document.insert_style(style=col_style, automatic=True)
        for column in table.columns:
            column.style = col_style
            table.set_column(column.x, column)

    body.append(table)

    return document


def main():
    hello_messages()
    document = generate_chart()
    save_new(document, TARGET)


if __name__ == "__main__":
    main()

Get cell background color

Read the background color of a table cell.

recipes/get_cell_background_color.py
#!/usr/bin/env python
"""Read the background color of a table cell.
"""
import os
import sys
from pathlib import Path

from odfdo import Document

_DOC_SEQUENCE = 440
DATA = Path(__file__).parent / "data"
SOURCE = "cell_color.ods"


def read_source_document():
    try:
        source = sys.argv[1]
    except IndexError:
        source = DATA / SOURCE
    return Document(source)


def main():
    doc = read_source_document()

    # reading color from the table 0 (first sheet)
    color_b2 = doc.get_cell_background_color(0, "b2")
    print("Color for B2", color_b2)

    color_b3 = doc.get_cell_background_color(0, "b3")
    print("Color for B3", color_b3)

    color_c3 = doc.get_cell_background_color(0, "c3")
    print("Color for C3", color_c3)

    # default is "#ffffff"
    color_d3 = doc.get_cell_background_color(0, "d3")
    print("Color for D3", color_d3)

    # set another default
    color_e3 = doc.get_cell_background_color(0, "e3", "#123456")
    print("Color for e3", color_e3)

    # read very far cell
    color_far = doc.get_cell_background_color(0, (1000, 10000))
    print("Color for far", color_far)

    # only for test suite:
    if "ODFDO_TESTING" not in os.environ:
        return
    assert color_b2 == "#2a6099"
    assert color_b3 == "#ff4000"
    assert color_c3 == "#ffff00"
    assert color_d3 == "#ffffff"
    assert color_e3 == "#123456"
    assert color_far == "#ffffff"


if __name__ == "__main__":
    main()

Extract a sub table from some big table

Create a table of 1000 lines and 100 columns, extract a sub table of 100 lines 26 columns, save the result in a spreadsheet document.

recipes/extract_a_sub_table_from_some_big_table.py
#!/usr/bin/env python
"""Create a table of 1000 lines and 100 columns, extract a sub table
of 100 lines 26 columns, save the result in a spreadsheet document.
"""
import os
from pathlib import Path

from odfdo import Document, Row, Table

_DOC_SEQUENCE = 450
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "extract_table"
TARGET = "document.ods"


def save_new(document: Document, name: str):
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    new_path = OUTPUT_DIR / name
    print("Saving:", new_path)
    document.save(new_path, pretty=True)


def syracuse(n: int) -> int:
    if n % 2 == 0:
        return n // 2
    return 3 * n + 1


def generate_big_table(table_name) -> Document:
    spreadsheet = Document("spreadsheet")
    body = spreadsheet.body
    body.clear()
    table = Table(table_name)
    body.append(table)

    lines = 1000
    cols = 100

    for line in range(lines):
        row = Row()
        values = []
        n = line
        for _i in range(cols):
            values.append(n)
            n = syracuse(n)
        row.set_values(values)
        table.append(row)

    return spreadsheet


def main():
    table_name = "Big Table"
    spreadsheet = generate_big_table(table_name)
    body = spreadsheet.body
    big_table = body.get_table(name=table_name)
    print("Size of Big Table :", big_table.size)

    # now extract 100 rows of 26 columns :
    table1 = Table("Extract 1")
    for r in range(800, 900):
        row = big_table.get_row(r)
        extracted_values = [row.get_value(x) for x in range(50, 76)]
        new_row = Row()
        new_row.set_values(extracted_values)
        table1.append(new_row)
    body.append(table1)
    print("Size of extracted table 1 :", table1.size)

    # other method
    table2 = Table("Extract 2")
    cells = big_table.get_cells(coord=(50, 800, 75, 899))
    table2.set_cells(coord=(0, 0), cells=cells)
    body.append(table2)
    print("Size of extracted table 2 :", table2.size)

    test_unit(spreadsheet)

    save_new(spreadsheet, TARGET)

    _expected_result = """
Size of Big Table : (100, 1000)
Size of extracted table 1 : (26, 100)
Size of extracted table 2 : (26, 100)
"""


def test_unit(spreadsheet: Document) -> None:
    # only for test suite:
    if "ODFDO_TESTING" not in os.environ:
        return
    body = spreadsheet.body
    table1 = body.get_table(position=0)
    assert table1.size == (100, 1000)
    table2 = body.get_table(position=1)
    assert table2.size == (26, 100)


if __name__ == "__main__":
    main()

Make a basic spreadsheet

Create a spreadsheet with one table and a few data, strip the table and compute the table size.

recipes/make_a_basic_spreadsheet.py
#!/usr/bin/env python
"""Create a spreadsheet with one table and a few data, strip the table
and compute the table size.
"""
from pathlib import Path

from odfdo import Document, Table

_DOC_SEQUENCE = 460
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "basic_ods"
TARGET = "spreadsheet.ods"


def save_new(document: Document, name: str):
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    new_path = OUTPUT_DIR / name
    print("Saving:", new_path)
    document.save(new_path, pretty=True)


def main():
    document = generate_document()
    save_new(document, TARGET)


def generate_document():
    # creating an empty spreadsheet document:
    document = Document("spreadsheet")

    # Each sheet of a spreadsheet is a table:
    # setting drom the beginning width (columns) and height (rows)
    # is not mandatory, but a good practice, since odfdo don't check
    # actual existence of cells
    body = document.body
    body.clear()
    table = Table("First Table", width=20, height=3)
    body.append(table)

    # A table contains rows, we can append some more.
    for _ in range(2):
        table.append_row()
    print("rows in the table (3+2):", len(table.rows))

    #  A row contains cells
    for row in table.rows:
        print("row, nb of cells ", row.y, len(row.cells))

    last_row = table.get_row(-1)
    print("nb of cells of the last row:", len(last_row.cells))

    # cell can have different kind of values
    for row_nb in range(3):
        for col_nb in range(10):
            table.set_value((col_nb, row_nb), f"cell {col_nb} {row_nb}")
    for row_nb in range(3, 5):
        for col_nb in range(10):
            table.set_value((col_nb, row_nb), col_nb * 100 + row_nb)

    # Before saving the document,  we can strip the unused colums:
    print("table size:", table.size)
    table.rstrip()
    print("table size after strip:", table.size)
    print("nb of cells of the last row:", len(table.get_row(-1).cells))
    print("Content of the table:")
    print(table.to_csv())

    return document


if __name__ == "__main__":
    main()

Make spreadsheet with named ranges

Create a spreadsheet with two tables, using some named ranges.

recipes/make_spreadsheet_with_named_ranges.py
#!/usr/bin/env python
"""Create a spreadsheet with two tables, using some named ranges.
"""
from pathlib import Path

from odfdo import Document, Table

_DOC_SEQUENCE = 470
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "named_range"
TARGET = "spreadsheet.ods"


def save_new(document: Document, name: str):
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    new_path = OUTPUT_DIR / name
    print("Saving:", new_path)
    document.save(new_path, pretty=True)


def main():
    document = generate_document()
    save_new(document, TARGET)


def generate_document():
    document = Document("spreadsheet")
    body = document.body
    body.clear()
    table = Table("First Table")
    body.append(table)
    # populate the table :
    for index in range(10):
        table.set_value((1, index), (index + 1) ** 2)
    table.set_value("A11", "Total:")

    # lets define a named range for the 10 values :
    range_squares = "B1:B10"
    name = "squares_values"
    table_name = table.name
    table.set_named_range(name, range_squares, table_name)

    # we can define a single cell range, using notation "B11" or (1, 10) :
    table.set_named_range("total", (1, 10), table_name)

    # get named range values :
    values = table.get_named_range("squares_values").get_values(flat=True)

    # set named range value :
    result = sum(values)
    table.get_named_range("total").set_value(result)

    # lets use the named ranges from a second table :
    table2 = Table("Second Table")
    body.append(table2)

    named_range1 = table2.get_named_range("total")
    table2.set_value("A1", "name:")
    table2.set_value("B1", named_range1.name)
    table2.set_value("A2", "range:")
    table2.set_value("B2", str(named_range1.crange))
    table2.set_value("A3", "from table:")
    table2.set_value("B3", named_range1.table_name)
    table2.set_value("A4", "content:")
    table2.set_value("B4", named_range1.get_value())

    named_range2 = table2.get_named_range("squares_values")
    table2.set_value("D1", "name:")
    table2.set_value("E1", named_range2.name)
    table2.set_value("D2", "range:")
    table2.set_value("E2", str(named_range2.crange))
    table2.set_value("D3", "from table:")
    table2.set_value("E3", named_range2.table_name)
    table2.set_value("D4", "content:")
    # using "E4:4" notaion is a little hack for the area starting at E4 on row 4
    table2.set_values(values=[named_range2.get_values(flat=True)], coord="E4:4")

    print("Content of the table1:")
    print(table.name)
    print(table.to_csv())
    print(table2.name)
    print(table2.to_csv())

    # of course named ranges are stored in the document :
    return document


if __name__ == "__main__":
    main()

Introspecting elements

Demo of quick introspecting of a document’s elements.

recipes/introspecting_elements.py
#!/usr/bin/env python
"""Demo of quick introspecting of a document's elements.
"""
import sys
from pathlib import Path

from odfdo import Document

_DOC_SEQUENCE = 480
DATA = Path(__file__).parent / "data"
# ODF export of Wikipedia article Hitchhiker's Guide to the Galaxy (CC-By-SA) :
SOURCE = "collection2.odt"


def read_source_document():
    try:
        source = sys.argv[1]
    except IndexError:
        source = DATA / SOURCE
    return Document(source)


def main():
    document = read_source_document()

    # The body object is an XML element from which we can access one or several
    # other elements we are looking for.
    body = document.body

    # Should you be lost, remember elements are part of an XML tree:
    para = body.get_paragraph(position=42)
    print("Children of the praragraph:\n   ", para.children)
    print("\nParent of the paragraph:\n   ", para.parent)

    # And you can introspect any element as serialized XML:
    link0 = body.get_link(position=0)
    print("\nContent of the serialization link:")
    print("   ", link0.serialize())
    print("\nWhich is different from the text content of the link:")
    print("   ", str(link0))


if __name__ == "__main__":
    main()

Show meta data

Print the metadata informations of a ODF file.

recipes/show_meta_data.py
#!/usr/bin/env python
"""Print the metadata informations of a ODF file.
"""
import sys
from pathlib import Path

from odfdo import Document

_DOC_SEQUENCE = 490
DATA = Path(__file__).parent / "data"
SOURCE = "collection2.odt"


def read_source_document():
    try:
        source = sys.argv[1]
    except IndexError:
        source = DATA / SOURCE
    return Document(source)


def main():
    document = read_source_document()

    # Metadata are accessible through the meta part:
    # meta = document.get_part("meta.xml")
    # or the shortcut:
    meta = document.meta

    # You then get access to various getters and setters. The getters return
    # Python types and the respective setters take the same Python type as
    # a parameter.
    #
    # Here are the output of the get_xxx methods for metadata.
    # (Notice that odfdo doesn't increment editing cycles nor statistics
    # when saving the document.
    # For the metadata using dates or durations, lpOD provides datatypes that
    # decode from and serialize back to strings.
    # Strings are always decoded as unicode, numeric values are always decoded
    # as Decimal (as they offer the best precision).

    print(f"Meta data of {document.container.path}")
    # print("Title                :", meta.get_title())
    print("Title                :", meta.title)
    # print("creator              :", meta.get_creator())
    print("creator              :", meta.creator)
    # print("creation date        :", meta.get_creation_date())
    print("creation date        :", meta.creation_date)
    # print("modification date    :", meta.get_modification_date())
    print("modification date    :", meta.date)
    # print("initial creator      :", meta.get_initial_creator())
    print("initial creator      :", meta.initial_creator)
    # print("subject              :", meta.get_subject())
    print("subject              :", meta.subject)
    # print("description          :", meta.get_description())
    print("description          :", meta.description)
    # print("editing cycles       :", meta.get_editing_cycles())
    print("editing cycles       :", meta.editing_cycles)
    # print("editing duration     :", meta.get_editing_duration())
    print("editing duration     :", meta.editing_duration)
    # print("generator            :", meta.get_generator())
    print("generator            :", meta.generator)
    # print("language             :", meta.get_language())
    print("language             :", meta.language)
    print("keywords             :", meta.keyword)
    print("statistics    ")
    if meta.statistic is not None:
        for key, value in meta.statistic.items():
            print(f"   {key[5:]:<18}: {value}")
    user_defined = meta.user_defined_metadata
    if user_defined:
        print("user defined metadata")
        for key, value in user_defined.items():
            print(f"   {key[5:]:<18}: {value}")

    # A quick way to have all of those informations:
    print("-" * 70)
    print(document.get_formated_meta())


if __name__ == "__main__":
    main()

Remove all links from a document, transforming each link information (URL, text) into a footnote. Of course, removing links already inside notes, just keeping plain text URL. (Side note: most office suite dislike notes in notes)

recipes/move_link_to_footnote.py
#!/usr/bin/env python
"""Remove all links from a document, transforming each link information (URL,
text) into a footnote. Of course, removing links already inside notes, just
keeping plain text URL. (Side note: most office suite dislike notes in notes)
"""

import sys
from pathlib import Path

from odfdo import Document

_DOC_SEQUENCE = 500
DATA = Path(__file__).parent / "data"
SOURCE = "collection2.odt"
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "footnote1"
TARGET = "document.odt"


def save_new(document: Document, name: str):
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    new_path = OUTPUT_DIR / name
    print("Saving:", new_path)
    document.save(new_path, pretty=True)


def remove_links(element):
    tag = "text:a"
    keep_inside_tag = "None"
    context = (tag, keep_inside_tag, False)
    element, _is_modified = _tree_remove_tag(element, context)


def main():
    try:
        source = Path(sys.argv[1])
    except IndexError:
        source = DATA / SOURCE

    document = Document(str(source))
    body = document.body

    print("Moving links to footnotes from", source)
    print("links occurrences:", len(body.get_links()))
    print("footnotes occurences:", len(body.get_notes()))

    counter_links_in_notes = 0
    for note in body.get_notes():
        for link in note.get_links():
            counter_links_in_notes += 1
            url = link.get_attribute("xlink:href")
            tail = link.tail
            new_tail = f" (link: {url}) {tail}"
            link.tail = new_tail
            remove_links(note)

    print("links in notes:", counter_links_in_notes)

    counter_added_note = 0  # added notes counter
    for paragraph in body.paragraphs:
        for link in paragraph.get_links():
            url = link.get_attribute("xlink:href")
            text = link.inner_text
            counter_added_note += 1
            paragraph.insert_note(
                after=link,  # citation is inserted after current link
                note_id=f"my_note_{counter_added_note}",
                citation="1",  # The symbol the user sees to follow the footnote.
                # The footnote itself, at the end of the page:
                body=(f". {text}, link: {url}"),
            )
        remove_links(paragraph)

    print("links occurrences:", len(body.get_links()))
    print("footnotes occurences:", len(body.get_notes()))

    save_new(document, TARGET)


def _tree_remove_tag(element, context):
    """Remove tag in the element, recursive.
    - context: tuple (tag to remove, protection tag, protection flag)
    where protection tag protect from change sub elements one sub
    level depth"""
    buffer = element.clone
    modified = False
    sub_elements = []
    tag, keep_inside_tag, protected = context
    if keep_inside_tag and element.tag == keep_inside_tag:
        protect_below = True
    else:
        protect_below = False
    for child in buffer.children:
        striped, is_modified = _tree_remove_tag(
            child, (tag, keep_inside_tag, protect_below)
        )
        if is_modified:
            modified = True
        if isinstance(striped, list):
            for item in striped:
                sub_elements.append(item)
        else:
            sub_elements.append(striped)
    if not protected and element.tag == tag:
        element = []
        modified = True
    else:
        if not modified:
            # no change in element sub tree, no change on element
            return (element, False)
        element.clear()
        try:
            for key, value in buffer.attributes.items():
                element.set_attribute(key, value)
        except ValueError:
            print("Incorrect attribute in", buffer)
    text = buffer.text
    tail = buffer.tail
    if text is not None:
        element.append(text)
    for child in sub_elements:
        element.append(child)
    if tail is not None:
        if isinstance(element, list):
            element.append(tail)
        else:
            element.tail = tail
    return (element, True)


if __name__ == "__main__":
    main()

Remove the links (the text:a tag), keeping the inner text.

recipes/remove_http_links.py
#!/usr/bin/env python
"""Remove the links (the text:a tag), keeping the inner text."""

import sys
from pathlib import Path

from odfdo import Document

_DOC_SEQUENCE = 510
DATA = Path(__file__).parent / "data"
SOURCE = "collection2.odt"
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "nolink"
TARGET = "document.odt"


def save_new(document: Document, name: str):
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    new_path = OUTPUT_DIR / name
    print("Saving:", new_path)
    document.save(new_path, pretty=True)


def main():
    try:
        source = Path(sys.argv[1])
    except IndexError:
        source = DATA / SOURCE

    document = Document(str(source))
    body = document.body

    print("Removing links from", source)
    print("'text:a' occurrences:", len(body.get_links()))

    remove_links(body)
    print("'text:a' occurrences after removal:", len(body.get_links()))

    save_new(document, TARGET)


def remove_links(element):
    tag = "text:a"
    keep_inside_tag = "None"
    context = (tag, keep_inside_tag, False)
    element, _is_modified = _tree_remove_tag(element, context)


def _tree_remove_tag(element, context):
    """Remove tag in the element, recursive.

    - context: a tuple (tag to remove, protection tag, protection flag)
    where protection tag protect from change sub elements one sub level depth
    """
    buffer = element.clone
    modified = False
    sub_elements = []
    tag, keep_inside_tag, protected = context
    if keep_inside_tag and element.tag == keep_inside_tag:
        protect_below = True
    else:
        protect_below = False
    for child in buffer.children:
        striped, is_modified = _tree_remove_tag(
            child, (tag, keep_inside_tag, protect_below)
        )
        if is_modified:
            modified = True
        if isinstance(striped, list):
            for item in striped:
                sub_elements.append(item)
        else:
            sub_elements.append(striped)
    if not protected and element.tag == tag:
        element = []
        modified = True
    else:
        if not modified:
            # no change in element sub tree, no change on element
            return (element, False)
        element.clear()
        try:
            for key, value in buffer.attributes.items():
                element.set_attribute(key, value)
        except ValueError:
            print("Bad attribute in", buffer)
    text = buffer.text
    tail = buffer.tail
    if text is not None:
        element.append(text)
    for child in sub_elements:
        element.append(child)
    if tail is not None:
        if isinstance(element, list):
            element.append(tail)
        else:
            element.tail = tail
    return (element, True)


if __name__ == "__main__":
    main()

Remove span styles

Remove span styles (like some words in bold in a paragraph), except in titles.

recipes/remove_span_styles.py
#!/usr/bin/env python
"""Remove span styles (like some words in bold in a paragraph),
except in titles.
"""

import os
import sys
from pathlib import Path

from odfdo import Body, Document, Element

_DOC_SEQUENCE = 520
DATA = Path(__file__).parent / "data"
SOURCE = "dormeur.odt"
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "nostyle"
TARGET = "document.odt"


def read_source_document() -> Document:
    """Return the source Document."""
    try:
        source = sys.argv[1]
    except IndexError:
        source = DATA / SOURCE
    return Document(source)


def save_new(document: Document, name: str) -> None:
    """Save a recipe result Document."""
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    new_path = OUTPUT_DIR / name
    print("Saving:", new_path)
    document.save(new_path, pretty=True)


def remove_text_span(body: Body) -> None:
    """Remove span styles from an Element, except in titles."""
    tag = "text:span"
    keep_inside_tag = "text:h"
    context = (tag, keep_inside_tag, False)
    body, _is_modified = _tree_remove_tag(body, context)


def _tree_remove_tag(element: Element, context: tuple) -> Element:
    """Send back a copy of the element, without span styles. Element should be
    either paragraph or heading.

    - context: a tuple (tag to remove, protection tag, protection flag)
    where protection tag protects from change any sub elements one level depth
    """
    buffer = element.clone
    modified = False
    sub_elements = []
    tag, keep_inside_tag, protected = context
    if keep_inside_tag and element.tag == keep_inside_tag:
        protect_below = True
    else:
        protect_below = False
    for child in buffer.children:
        striped, is_modified = _tree_remove_tag(
            child, (tag, keep_inside_tag, protect_below)
        )
        if is_modified:
            modified = True
        if isinstance(striped, list):
            for item in striped:
                sub_elements.append(item)
        else:
            sub_elements.append(striped)
    if not protected and element.tag == tag:
        element = []
        modified = True
    else:
        if not modified:
            # no change in element sub tree, no change on element
            return (element, False)
        element.clear()
        try:
            for key, value in buffer.attributes.items():
                element.set_attribute(key, value)
        except ValueError:
            print("Bad attribute in", buffer)
    text = buffer.text
    tail = buffer.tail
    if text is not None:
        element.append(text)
    for child in sub_elements:
        element.append(child)
    if tail is not None:
        if isinstance(element, list):
            element.append(tail)
        else:
            element.tail = tail
    return (element, True)


def clean_document(document: Document) -> None:
    """Remove span styles from a Document."""
    body = document.body

    print("'text:span' occurrences:", len(body.spans))
    remove_text_span(body)
    print("'text:span' occurrences after removal:", len(body.spans))


def main() -> None:
    document = read_source_document()
    clean_document(document)
    test_unit(document)
    save_new(document, TARGET)


def test_unit(document: Document) -> None:
    # only for test suite:
    if "ODFDO_TESTING" not in os.environ:
        return

    assert len(document.body.spans) == 1


if __name__ == "__main__":
    main()

Retrieve all pictures from odf files

Analyse a list of files and directory (recurse), open all ODF documents and copy pictures from documents in a directory.

recipes/retrieve_all_pictures_from_ODF_files.py
#!/usr/bin/env python
"""Analyse a list of files and directory (recurse), open all ODF documents
and copy pictures from documents in a directory.
"""
import sys
import time
from hashlib import sha256
from pathlib import Path

from odfdo import Document

_DOC_SEQUENCE = 530
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "collected_pics"
DATA = Path(__file__).parent / "data"

# encoding = "UTF8"
known_images = set()
counter_image = 0
counter_odf = 0
counter_outside = 0


def store_image(path, name, content):
    """Image new name is "odffile_imagename"."""
    global counter_image

    base = path.name.replace(".", "_")
    cpt = 1
    if not OUTPUT_DIR.is_dir():
        OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    target = OUTPUT_DIR / f"{base}_{cpt}_{name}"
    while target.exists():
        cpt += 1
        target = OUTPUT_DIR / f"{base}_{cpt}_{name}"
    target.write_bytes(content)
    counter_image += 1


def parse_odf_pics(path: Path):
    """Using odfdo for:
    - open possible ODF document: Document (including URI)
    - find images inside the document: get_image_list, get_attribute
    """
    if not path.suffix.lower().startswith(".od"):
        return
    try:
        document = Document(path)
    except Exception:
        return

    global counter_odf
    global counter_outside

    counter_odf += 1
    for image in document.body.images:
        image_url = image.url
        if not image_url:
            continue
        try:
            image_content = document.get_part(image_url)
        except KeyError:
            print("- not found inside document:", path)
            print("  image URL:", image_url)
            counter_outside += 1
            continue
        image_name = image_url.split("/")[-1]
        if not known_pic(image_content):
            store_image(path, image_name, image_content)


def known_pic(content) -> bool:
    """Remember already seen images by sha256 footprint."""
    footprint = sha256(content).digest()
    if footprint in known_images:
        return True
    known_images.add(footprint)
    return False


def analyse_document(source):
    for path in source.glob("**/*"):
        if path.is_file():
            parse_odf_pics(path)


def main():
    try:
        source = sys.argv[1]
    except IndexError:
        source = DATA

    t0 = time.time()
    analyse_document(Path(source))
    elapsed = time.time() - t0
    print(
        f"{counter_image} images copied ({counter_outside} not found) from "
        f"{counter_odf} ODF files to {OUTPUT_DIR} in {elapsed:.2f}sec."
    )


if __name__ == "__main__":
    main()

Read document from bytesio

Read a document from BytesIO.

recipes/read_document_from_bytesio.py
#!/usr/bin/env python
"""Read a document from BytesIO.
"""
import io
from pathlib import Path

from odfdo import Document

_DOC_SEQUENCE = 600
DATA = Path(__file__).parent / "data"
SOURCE = "lorem.odt"


def main():
    file_path = DATA / SOURCE
    with io.BytesIO() as bytes_content:
        # read the file in the BytesIO (or read from some network)
        bytes_content.write(file_path.read_bytes())
        # Create the odfdo.Document from the BytesIO
        bytes_content.seek(0)
        document = Document(bytes_content)
        # check :
        if document.body.search("Lorem ipsum dolor sit amet") is None:
            raise ValueError("string not found")


if __name__ == "__main__":
    main()

Save document as bytesio

Save a document as BytesIO.

recipes/save_document_as_bytesio.py
#!/usr/bin/env python
"""Save a document as BytesIO.
"""
import io
from pathlib import Path

from odfdo import Document, Paragraph

_DOC_SEQUENCE = 605
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "bytes"
TARGET = "document.odt"


def make_document():
    document = Document("text")
    body = document.body
    paragraph = Paragraph("Hello World")
    body.append(paragraph)
    return document


def main():
    document = make_document()
    with io.BytesIO() as bytes_content:
        document.save(bytes_content)
        # Now use the BytesIO in some way:
        # In a netwotk context, typically:
        #    response.write(bytes_content.getvalue())
        OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
        with open(OUTPUT_DIR / TARGET, "wb") as file:
            file.write(bytes_content.getvalue())


if __name__ == "__main__":
    main()

Export tables to csv format

Export tables to CSV format.

recipes/export_tables_to_csv_format.py
#!/usr/bin/env python
"""Export tables to CSV format."""

import os
import sys
from pathlib import Path

from odfdo import Document

_DOC_SEQUENCE = 610
DATA = Path(__file__).parent / "data"
SOURCE = "two_sheets.ods"
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "csv"


def read_source_document() -> Document:
    """Return the source Document."""
    try:
        source = sys.argv[1]
    except IndexError:
        source = DATA / SOURCE
    return Document(source)


def export_tables_to_csv(document: Document) -> None:
    """Export tables to CSV format."""
    for index, table in enumerate(document.body.tables):
        # default parameters produce an "excell" CSV format,
        # see Python csv library for options.
        OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
        path = OUTPUT_DIR / f"content_{index}.csv"
        table.to_csv(path)


def main() -> None:
    document = read_source_document()
    export_tables_to_csv(document)
    test_unit(document)


def test_unit(document: Document) -> None:
    # only for test suite:
    if "ODFDO_TESTING" not in os.environ:
        return

    content0 = document.body.tables[0].to_csv()
    expected0 = "col A,col B,col C\r\n1,2,3\r\na text,,another\r\n"
    assert content0 == expected0

    content1 = document.body.tables[1].to_csv()
    expected1 = ",,,\r\n,col B,col C,col D\r\n,1,2,3\r\n,a text,,another\r\n"
    assert content1 == expected1


if __name__ == "__main__":
    main()

Import csv content into a table

Import CSV content into a table.

recipes/import_csv_content_into_a_table.py
#!/usr/bin/env python
"""Import CSV content into a table."""

import os
import sys
from pathlib import Path

from odfdo import Document, Table

_DOC_SEQUENCE = 615
DATA = Path(__file__).parent / "data"
SOURCE = "some_csv.csv"
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "csv2"
TARGET = "document.ods"


def save_new(document: Document, name: str) -> None:
    """Save a recipe result Document."""
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    new_path = OUTPUT_DIR / name
    print("Saving:", new_path)
    document.save(new_path, pretty=True)


def read_text_document() -> str:
    """Return the source text file."""
    try:
        source = sys.argv[1]
    except IndexError:
        source = DATA / SOURCE
    return Path(source).read_text()


def import_csv() -> Document:
    """Return a document containing an imported CSV content."""
    content = read_text_document()
    document = Document("ods")
    table = Table.from_csv(content, "Sheet name")
    document.body.clear()
    document.body.append(table)
    return document


def main() -> None:
    document = import_csv()
    test_unit(document)
    save_new(document, TARGET)


def test_unit(document: Document) -> None:
    # only for test suite:
    if "ODFDO_TESTING" not in os.environ:
        return

    table = document.body.get_table(0)
    assert table.name == "Sheet name"
    expected = ",,,\r\n,col B,col C,col D\r\n,1,2,3\r\n,a text,,another\r\n"
    assert table.to_csv() == expected


if __name__ == "__main__":
    main()

Search and replace words

Search and replace words in a text document.

recipes/search_and_replace_words.py
#!/usr/bin/env python
"""Search and replace words in a text document.
"""
from pathlib import Path

from odfdo import Document

_DOC_SEQUENCE = 700
DATA = Path(__file__).parent / "data"
SOURCE = "lorem.odt"
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "replaced_text"
TARGET = "lorem_replaced.odt"


def save_new(document: Document, name: str):
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    new_path = OUTPUT_DIR / name
    print("Saving:", new_path)
    document.save(new_path, pretty=True)


def search_replace(document):
    body = document.body

    # replace a string in the full document
    body.replace("Lorem", "(Lorem replaced)")

    # replace in paragraphs only
    for paragraph in body.paragraphs:
        paragraph.replace("ipsum", "(ipsum in paragraph)")

    # replace in headers
    for header in body.headers:
        header.replace("ipsum", "(ipsum in header)")

    # pattern is a regular expression
    body.replace(r"\S+lit ", "(...lit) ")
    body.replace(r"pul[a-z]+", "(pulvinar)")


def main():
    document = Document(DATA / SOURCE)
    search_replace(document)
    save_new(document, TARGET)


if __name__ == "__main__":
    main()

Spreadsheet with words frequency from a text

Load an ODF text, store the frequency of words in a spreadsheet, make requests on the table, by regex or value.

recipes/spreadsheet_with_words_frequency_from_a_text.py
#!/usr/bin/env python
"""Load an ODF text, store the frequency of words in a spreadsheet,
make requests on the table, by regex or value.
"""
import sys
from pathlib import Path

from odfdo import Document, Table

_DOC_SEQUENCE = 710
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "freq"
SOURCE = "collection2.odt"
DATA = Path(__file__).parent / "data"
TARGET = "frequency.ods"


def save_new(document: Document, name: str):
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    new_path = OUTPUT_DIR / name
    print("Saving:", new_path)
    document.save(new_path, pretty=True)


def read_source_document():
    try:
        source = sys.argv[1]
    except IndexError:
        source = DATA / SOURCE
    return Document(source)


def main():
    document = generate_document()
    save_new(document, TARGET)
    _expected_result = """
    Word frequency analysis of collection2.odt
    Nb of words: 9128
    Unique words found: 2337
    Rows in the table : 2337
    Words corresponding to the regex: ^the
      word: the                   occurences: 644
      word: they                  occurences: 15
      word: their                 occurences: 11
      word: then                  occurences: 10
      word: there                 occurences: 7
      word: these                 occurences: 4
      word: them                  occurences: 4
      word: themselves            occurences: 2
      word: theme                 occurences: 2
      word: themed                occurences: 1
      word: theatrical            occurences: 1
    List of words of frequency 15: two, they, release, one, its, his, film,
    episodes, but, adaptation, UK, Radio, J, 0
"""


def frequence_count(document):
    print("Word frequency analysis of", Path(document.container.path).name)
    text = str(document.body)
    for char in "():;!.,[]{}#@/\\=-_+*#@`\"'":
        text = text.replace(char, " ")  # slow algorithm
    words = text.split()
    print("Nb of words:", len(words))

    frequences = {}

    for word in words:
        frequences[word] = frequences.get(word, 0) + 1

    print("Unique words found:", len(frequences))
    return frequences


def generate_document():
    document_source = read_source_document()
    spreadsheet = Document("spreadsheet")

    frequences = frequence_count(document_source)

    # Populate the table in the spreadsheet
    body = spreadsheet.body
    body.clear()
    table = Table("Frequency Table")
    body.append(table)

    sorted_keys = reversed([(value, key) for key, value in frequences.items()])

    # one solution :

    # for value, key in sorted:
    #    row = Row()
    #    row.set_value(0, key)
    #    row.set_value(1, value) # Cell type is guessed.
    #    table.append_row(row)

    # another solution :
    sorted_keys = [(k, v) for (v, k) in sorted_keys]
    table.set_values(sorted_keys)

    print("Rows in the table :", len(table.rows))

    # frequency of word:
    regex_query = "^the"
    print("Words corresponding to the regex:", regex_query)
    result = table.get_rows(content=regex_query)
    for row in result:
        print(f"  word: {row.get_value(0):<20}  occurences: {row.get_value(1)}")

    # list of words of frequecy = 15
    found = []
    for word, freq in table.iter_values():
        if freq == 15:
            found.append(word)
    print("List of words of frequency 15:", ", ".join(found))
    return spreadsheet


if __name__ == "__main__":
    main()

Transpose table

Transpose a table. Create a spreadsheet table (example: 50 rows and 20 columns), and subsequently create a new table in a separate sheet where the columns and rows are now swapped (e.g. 20 rows and 50 columns).

recipes/transpose_table.py
#!/usr/bin/env python
"""Transpose a table. Create a spreadsheet table (example: 50 rows and 20
columns), and subsequently create a new table in a separate sheet where the
columns and rows are now swapped (e.g. 20 rows and 50 columns).
"""
from pathlib import Path

from odfdo import Document, Row, Table

_DOC_SEQUENCE = 800
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "transpose"
TARGET = "transposed.ods"


def save_new(document: Document, name: str):
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    new_path = OUTPUT_DIR / name
    print("Saving:", new_path)
    document.save(new_path, pretty=True)


def main():
    document = generate_document()
    save_new(document, TARGET)


def generate_document():
    spreadsheet = Document("spreadsheet")

    # Populate the table in the spreadsheet
    body = spreadsheet.body
    body.clear()
    table = Table("Table")
    body.append(table)

    lines = 50
    cols = 20

    for line in range(lines):
        row = Row()
        for column in range(cols):
            row.set_value(column, f"{chr(65 + column)}{line + 1}")
        table.append(row)

    print("Size of Table :", table.size)

    table2 = Table("Symetry")

    # building the symetric table using classical method :
    for x in range(cols):
        values = table.get_column_values(x)
        table2.set_row_values(x, values)
    body.append(table2)

    print("Size of symetric table 2 :", table2.size)

    # a more simple solution with the table.transpose() method :
    table3 = table.clone
    table3.transpose()
    table3.name = "Transpose"
    body.append(table3)

    print("Size of symetric table 3 :", table3.size)
    return spreadsheet


if __name__ == "__main__":
    main()