Skip to main content

Installation

pip install sopdf

1. Open a document and render pages

import sopdf

with sopdf.open("document.pdf") as doc:
    img_bytes = doc[0].render(dpi=150)          # PNG bytes
    doc[0].render_to_file("page-1.png", dpi=300)

2. Batch parallel rendering

import sopdf

with sopdf.open("document.pdf") as doc:
    images = sopdf.render_pages(doc.pages, dpi=150, parallel=True)
    print(f"rendered: {len(images)} pages")

3. Extract text and text blocks

import sopdf

with sopdf.open("document.pdf") as doc:
    text = doc[0].get_text()
    blocks = doc[0].get_text_blocks()  # list[TextBlock] with bounding boxes
    print(text[:200])
    print(len(blocks))
import sopdf

with sopdf.open("document.pdf") as doc:
    hits = doc[0].search("invoice", match_case=False)  # list[Rect]
    print(f"hit count: {len(hits)}")

5. Split and merge PDFs

import sopdf

with sopdf.open("source.pdf") as doc:
    chapter = doc.split(pages=[0, 1, 2], output="chapter-1.pdf")
    doc.split_each(output_dir="pages")

sopdf.merge(["intro.pdf", "body.pdf"], output="book.pdf")

6. Compressed save and in-memory export

import sopdf

with sopdf.open("source.pdf") as doc:
    doc.save("output.pdf", compress=True, garbage=True)
    raw_bytes = doc.to_bytes()  # export bytes without writing to disk
    print(len(raw_bytes))

7. Rotate pages

import sopdf

with sopdf.open("document.pdf") as doc:
    doc[0].rotation = 90
    doc.save("rotated.pdf")

8. Open from bytes / stream

import sopdf

with open("document.pdf", "rb") as f:
    data = f.read()

with sopdf.open(stream=data) as doc:
    print(doc.page_count)

9. Metadata and outline (TOC)

import sopdf

with sopdf.open("document.pdf") as doc:
    print(doc.metadata.title)
    doc.metadata.title = "Updated Title"
    print(doc.metadata.creation_datetime)

    for item in doc.outline.items:
        print(f"[p{item.page + 1}] {item.title}")

    flat = doc.outline.to_list()  # PyMuPDF-compatible flat outline
    print(len(flat))

10. Encrypted and corrupted PDFs

import sopdf

with sopdf.open("protected.pdf", password="hunter2") as doc:
    doc.save("unlocked.pdf")

with sopdf.open("corrupted.pdf") as doc:
    doc.save("repaired.pdf")
These examples are adapted from the SoPDF README and examples/ directory. For the latest API details, see: SoMarkAI/SoPDF