From f7ad69f9ec88808b7df0029f00c7844e97a12a4d Mon Sep 17 00:00:00 2001 From: Duncan Tourolle Date: Tue, 27 May 2025 11:58:19 +0200 Subject: [PATCH] first code commit --- .gitignore | 33 + LICENSE | 21 + MANIFEST.in | 10 + README.md | 93 ++ pyWebLayout/__init__.py | 44 + pyWebLayout/__main__.py | 12 + pyWebLayout/abstract/__init__.py | 6 + pyWebLayout/abstract/block.py | 783 +++++++++++++++ pyWebLayout/abstract/document.py | 377 +++++++ pyWebLayout/abstract/functional.py | 310 ++++++ pyWebLayout/abstract/inline.py | 208 ++++ pyWebLayout/base.py | 68 ++ pyWebLayout/concrete/__init__.py | 5 + pyWebLayout/concrete/box.py | 61 ++ pyWebLayout/concrete/functional.py | 545 +++++++++++ pyWebLayout/concrete/image.py | 233 +++++ pyWebLayout/concrete/page.py | 175 ++++ pyWebLayout/concrete/text.py | 455 +++++++++ pyWebLayout/core/__init__.py | 10 + pyWebLayout/core/base.py | 67 ++ pyWebLayout/examples/epub_viewer.py | 100 ++ pyWebLayout/html_parser.py | 918 ++++++++++++++++++ pyWebLayout/io/__init__.py | 69 ++ pyWebLayout/io/readers/__init__.py | 36 + pyWebLayout/io/readers/base.py | 229 +++++ pyWebLayout/io/readers/epub_metadata.py | 352 +++++++ pyWebLayout/io/readers/epub_reader.py | 400 ++++++++ pyWebLayout/io/readers/html.py | 190 ++++ pyWebLayout/io/readers/html_content.py | 269 +++++ pyWebLayout/io/readers/html_elements.py | 472 +++++++++ pyWebLayout/io/readers/html_metadata.py | 426 ++++++++ pyWebLayout/io/readers/html_resources.py | 483 +++++++++ pyWebLayout/io/readers/html_style.py | 281 ++++++ pyWebLayout/io/readers/html_text.py | 163 ++++ pyWebLayout/layout.py | 11 + pyWebLayout/localisation.py | 1 + pyWebLayout/style.py | 176 ++++ pyWebLayout/style/__init__.py | 17 + pyWebLayout/style/alignment.py | 16 + pyWebLayout/style/fonts.py | 176 ++++ pyWebLayout/table.py | 137 +++ pyWebLayout/typesetting/__init__.py | 15 + .../typesetting/document_pagination.py | 323 ++++++ pyWebLayout/typesetting/flow.py | 155 +++ pyWebLayout/typesetting/pagination.py | 231 +++++ pyproject.toml | 18 + setup.cfg | 23 + setup.py | 32 + tests/TESTING_STRATEGY.md | 299 ++++++ tests/__init__.py | 6 + tests/test_abstract_blocks.py | 275 ++++++ tests/test_html_content.py | 354 +++++++ tests/test_html_style.py | 182 ++++ tests/test_html_text.py | 247 +++++ tests/test_runner.py | 84 ++ 55 files changed, 10682 insertions(+) create mode 100644 .gitignore create mode 100644 LICENSE create mode 100644 MANIFEST.in create mode 100644 README.md create mode 100644 pyWebLayout/__init__.py create mode 100644 pyWebLayout/__main__.py create mode 100644 pyWebLayout/abstract/__init__.py create mode 100644 pyWebLayout/abstract/block.py create mode 100644 pyWebLayout/abstract/document.py create mode 100644 pyWebLayout/abstract/functional.py create mode 100644 pyWebLayout/abstract/inline.py create mode 100644 pyWebLayout/base.py create mode 100644 pyWebLayout/concrete/__init__.py create mode 100644 pyWebLayout/concrete/box.py create mode 100644 pyWebLayout/concrete/functional.py create mode 100644 pyWebLayout/concrete/image.py create mode 100644 pyWebLayout/concrete/page.py create mode 100644 pyWebLayout/concrete/text.py create mode 100644 pyWebLayout/core/__init__.py create mode 100644 pyWebLayout/core/base.py create mode 100644 pyWebLayout/examples/epub_viewer.py create mode 100644 pyWebLayout/html_parser.py create mode 100644 pyWebLayout/io/__init__.py create mode 100644 pyWebLayout/io/readers/__init__.py create mode 100644 pyWebLayout/io/readers/base.py create mode 100644 pyWebLayout/io/readers/epub_metadata.py create mode 100644 pyWebLayout/io/readers/epub_reader.py create mode 100644 pyWebLayout/io/readers/html.py create mode 100644 pyWebLayout/io/readers/html_content.py create mode 100644 pyWebLayout/io/readers/html_elements.py create mode 100644 pyWebLayout/io/readers/html_metadata.py create mode 100644 pyWebLayout/io/readers/html_resources.py create mode 100644 pyWebLayout/io/readers/html_style.py create mode 100644 pyWebLayout/io/readers/html_text.py create mode 100644 pyWebLayout/layout.py create mode 100644 pyWebLayout/localisation.py create mode 100644 pyWebLayout/style.py create mode 100644 pyWebLayout/style/__init__.py create mode 100644 pyWebLayout/style/alignment.py create mode 100644 pyWebLayout/style/fonts.py create mode 100644 pyWebLayout/table.py create mode 100644 pyWebLayout/typesetting/__init__.py create mode 100644 pyWebLayout/typesetting/document_pagination.py create mode 100644 pyWebLayout/typesetting/flow.py create mode 100644 pyWebLayout/typesetting/pagination.py create mode 100644 pyproject.toml create mode 100644 setup.cfg create mode 100644 setup.py create mode 100644 tests/TESTING_STRATEGY.md create mode 100644 tests/__init__.py create mode 100644 tests/test_abstract_blocks.py create mode 100644 tests/test_html_content.py create mode 100644 tests/test_html_style.py create mode 100644 tests/test_html_text.py create mode 100644 tests/test_runner.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..f68d435 --- /dev/null +++ b/.gitignore @@ -0,0 +1,33 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class +*/__pycache__ +# Distribution / packaging +dist/ +build/ +*.egg-info/ + +# Environment +venv/ +env/ +.env/ +.venv/ + +# Tests +.pytest_cache/ +.coverage +htmlcov/ + +# IDE files +.idea/ +.vscode/ +*.swp +*.swo + +# Project specific +*.png +*.jpg +*.jpeg +*.gif +*.svg diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..536b3ea --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2025 Duncan Tourolle + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..904e69d --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,10 @@ +include README.md +include LICENSE +include pyWebLayout/*.py +recursive-include pyWebLayout/abstract *.py +recursive-include pyWebLayout/concrete *.py +recursive-include pyWebLayout/style *.py +recursive-include pyWebLayout/core *.py +recursive-include pyWebLayout/typesetting *.py +recursive-include pyWebLayout/io *.py +recursive-include pyWebLayout/examples *.py diff --git a/README.md b/README.md new file mode 100644 index 0000000..74763f9 --- /dev/null +++ b/README.md @@ -0,0 +1,93 @@ +# PyWebLayout + +A Python library for HTML-like layout and rendering. + +## Description + +PyWebLayout provides classes for rendering HTML-like content to images using a box-based layout system. It includes support for text, tables, and containers, as well as an HTML parser for converting HTML to layout objects. + +## Features + +- HTML-like layout system +- Text rendering with font support +- Table layouts +- Container elements +- HTML parsing +- Image output + +## Installation + +```bash +pip install pyWebLayout +``` + +## Usage + +### Basic Example + +```python +from pyWebLayout.concrete.page import Page, Container +from pyWebLayout.abstract.inline import Line +from pyWebLayout.layout import Alignment +from PIL import ImageFont + +# Create a page +page = Page(size=(800, 600), background_color=(240, 240, 240)) + +# Add a title container +title_container = Container( + origin=(0, 0), + size=(780, 60), + direction='horizontal', + spacing=10, + padding=(10, 10, 10, 10), + halign=Alignment.CENTER, + valign=Alignment.CENTER +) +page.add_child(title_container) + +# Create a title line with text +title_font = ImageFont.load_default() +title_line = Line( + spacing=(8, 15), + origin=(0, 0), + size=(760, 40), + font=title_font, + text_color=(0, 0, 0), + halign=Alignment.CENTER +) +title_container.add_child(title_line) +title_line.add_word("PyWebLayout", title_font) +title_line.add_word("Example", title_font) + +# Layout and render the page +page.layout() +image = page.render() +image.save("example.png") +``` + +### HTML Example + +```python +from pyWebLayout.html_parser import html_to_image + +html = """ +
+

PyWebLayout HTML Example

+

This is a paragraph rendered from HTML.

+

The library supports bold, italic, and underlined text.

+
+""" + +# Render HTML to an image +image = html_to_image(html, page_size=(800, 600)) +image.save("html_example.png") +``` + +## License + +MIT License + +## Author + +Duncan Tourolle - duncan@tourolle.paris diff --git a/pyWebLayout/__init__.py b/pyWebLayout/__init__.py new file mode 100644 index 0000000..7df731a --- /dev/null +++ b/pyWebLayout/__init__.py @@ -0,0 +1,44 @@ +""" +PyWebLayout - A Python library for HTML-like layout and rendering. + +This library provides classes for rendering HTML-like content to images +using a box-based layout system. It includes support for text, tables, +and containers, as well as parsers for HTML and EPUB content. It also +supports pagination for ebook-like content with the ability to pause, +save state, and resume rendering. +""" + +__version__ = '0.1.0' + +# Core abstractions +from pyWebLayout.core import Renderable, Interactable, Layoutable, Queriable + +# Style components +from pyWebLayout.style import Alignment, Font, FontWeight, FontStyle, TextDecoration + +# Typesetting algorithms +from pyWebLayout.typesetting import ( + FlowLayout, + Paginator, PaginationState, + DocumentPaginator, DocumentPaginationState +) + +# Abstract document model +from pyWebLayout.abstract.document import Document, Book, Chapter, MetadataType + +# Concrete implementations +from pyWebLayout.concrete.box import Box +from pyWebLayout.concrete.text import Line +from pyWebLayout.concrete.page import Container, Page + +# Abstract components +from pyWebLayout.abstract.inline import Word + +# Layout components +from pyWebLayout.table import Table, TableCell + +# IO functionality (reading and writing) +from pyWebLayout.io import ( + parse_html, html_to_document, # HTML parsing + read_epub # EPUB reading +) diff --git a/pyWebLayout/__main__.py b/pyWebLayout/__main__.py new file mode 100644 index 0000000..b40fdf5 --- /dev/null +++ b/pyWebLayout/__main__.py @@ -0,0 +1,12 @@ +import os +import sys + +# Add the parent directory to sys.path for direct execution +sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) + +# Now import the example module +from pyWebLayout.example import save_examples + +if __name__ == "__main__": + print("Running PyWebLayout examples...") + save_examples() diff --git a/pyWebLayout/abstract/__init__.py b/pyWebLayout/abstract/__init__.py new file mode 100644 index 0000000..eaf2e6c --- /dev/null +++ b/pyWebLayout/abstract/__init__.py @@ -0,0 +1,6 @@ +from .block import Block, BlockType, Parapgraph, Heading, HeadingLevel, Quote, CodeBlock +from .block import HList, ListItem, ListStyle, Table, TableRow, TableCell +from .block import HorizontalRule, LineBreak, Image +from .inline import Word, FormattedSpan +from .document import Document, MetadataType, Chapter, Book +from .functional import Link, LinkType, Button, Form, FormField, FormFieldType diff --git a/pyWebLayout/abstract/block.py b/pyWebLayout/abstract/block.py new file mode 100644 index 0000000..4fb5105 --- /dev/null +++ b/pyWebLayout/abstract/block.py @@ -0,0 +1,783 @@ +from typing import List, Iterator, Tuple, Dict, Optional, Union, Any +from enum import Enum +from .inline import Word, FormattedSpan + + +class BlockType(Enum): + """Enumeration of different block types for classification purposes""" + PARAGRAPH = 1 + HEADING = 2 + QUOTE = 3 + CODE_BLOCK = 4 + LIST = 5 + LIST_ITEM = 6 + TABLE = 7 + TABLE_ROW = 8 + TABLE_CELL = 9 + HORIZONTAL_RULE = 10 + LINE_BREAK = 11 + IMAGE = 12 + + +class Block: + """ + Base class for all block-level elements. + Block elements typically represent visual blocks of content that stack vertically. + """ + + def __init__(self, block_type: BlockType): + """ + Initialize a block element. + + Args: + block_type: The type of block this element represents + """ + self._block_type = block_type + self._parent = None + + @property + def block_type(self) -> BlockType: + """Get the type of this block element""" + return self._block_type + + @property + def parent(self): + """Get the parent block containing this block, if any""" + return self._parent + + @parent.setter + def parent(self, parent): + """Set the parent block""" + self._parent = parent + + +class Parapgraph(Block): + """ + A paragraph is a block-level element that contains a sequence of words. + """ + + def __init__(self): + """Initialize an empty paragraph""" + super().__init__(BlockType.PARAGRAPH) + self._words: List[Word] = [] + self._spans: List[FormattedSpan] = [] + + def add_word(self, word: Word): + """ + Add a word to this paragraph. + + Args: + word: The Word object to add + """ + self._words.append(word) + + def add_span(self, span: FormattedSpan): + """ + Add a formatted span to this paragraph. + + Args: + span: The FormattedSpan object to add + """ + self._spans.append(span) + + def words(self) -> Iterator[Tuple[int, Word]]: + """ + Iterate over the words in this paragraph. + + Yields: + Tuples of (index, word) for each word in the paragraph + """ + for i, word in enumerate(self._words): + yield i, word + + def spans(self) -> Iterator[FormattedSpan]: + """ + Iterate over the formatted spans in this paragraph. + + Yields: + Each FormattedSpan in the paragraph + """ + for span in self._spans: + yield span + + @property + def word_count(self) -> int: + """Get the number of words in this paragraph""" + return len(self._words) + + +class HeadingLevel(Enum): + """Enumeration representing HTML heading levels (h1-h6)""" + H1 = 1 + H2 = 2 + H3 = 3 + H4 = 4 + H5 = 5 + H6 = 6 + + +class Heading(Parapgraph): + """ + A heading element (h1, h2, h3, etc.) that contains text with a specific heading level. + Headings inherit from Paragraph as they contain words but have additional properties. + """ + + def __init__(self, level: HeadingLevel = HeadingLevel.H1): + """ + Initialize a heading element. + + Args: + level: The heading level (h1-h6) + """ + super().__init__() + self._block_type = BlockType.HEADING + self._level = level + + @property + def level(self) -> HeadingLevel: + """Get the heading level""" + return self._level + + @level.setter + def level(self, level: HeadingLevel): + """Set the heading level""" + self._level = level + + +class Quote(Block): + """ + A blockquote element that can contain other block elements. + """ + + def __init__(self): + """Initialize an empty blockquote""" + super().__init__(BlockType.QUOTE) + self._blocks: List[Block] = [] + + def add_block(self, block: Block): + """ + Add a block element to this quote. + + Args: + block: The Block object to add + """ + self._blocks.append(block) + block.parent = self + + def blocks(self) -> Iterator[Block]: + """ + Iterate over the blocks in this quote. + + Yields: + Each Block in the quote + """ + for block in self._blocks: + yield block + + +class CodeBlock(Block): + """ + A code block element containing pre-formatted text with syntax highlighting. + """ + + def __init__(self, language: str = ""): + """ + Initialize a code block. + + Args: + language: The programming language for syntax highlighting + """ + super().__init__(BlockType.CODE_BLOCK) + self._language = language + self._lines: List[str] = [] + + @property + def language(self) -> str: + """Get the programming language""" + return self._language + + @language.setter + def language(self, language: str): + """Set the programming language""" + self._language = language + + def add_line(self, line: str): + """ + Add a line of code to this code block. + + Args: + line: The line of code to add + """ + self._lines.append(line) + + def lines(self) -> Iterator[Tuple[int, str]]: + """ + Iterate over the lines in this code block. + + Yields: + Tuples of (line_number, line_text) for each line + """ + for i, line in enumerate(self._lines): + yield i, line + + @property + def line_count(self) -> int: + """Get the number of lines in this code block""" + return len(self._lines) + + +class ListStyle(Enum): + """Enumeration of list styles""" + UNORDERED = 1 #