Coverage for pyWebLayout/layout/table_optimizer.py: 88%
151 statements
« prev ^ index » next coverage.py v7.11.2, created at 2025-11-12 12:02 +0000
« prev ^ index » next coverage.py v7.11.2, created at 2025-11-12 12:02 +0000
1"""
2Table column width optimization for pyWebLayout.
4This module provides intelligent column width distribution for tables,
5ensuring optimal space usage while respecting content constraints.
6"""
8from typing import List, Tuple, Optional, Dict
9from pyWebLayout.abstract.block import Table, TableRow
12def optimize_table_layout(table: Table,
13 available_width: int,
14 sample_size: int = 5,
15 style=None) -> List[int]:
16 """
17 Optimize column widths for a table.
19 Strategy:
20 1. Check for HTML width overrides (colspan, width attributes)
21 2. Sample first ~5 rows to estimate column requirements (performance)
22 3. Calculate minimum width for each column (longest unbreakable word)
23 4. Calculate preferred width for each column (no wrapping)
24 5. If total preferred fits: use preferred
25 6. Otherwise: distribute available space proportionally
26 7. Ensure no column < min_width
28 Note: Hyphenation threshold is controlled by Font.min_hyphenation_width,
29 not passed as a parameter here to avoid duplication.
31 Args:
32 table: The table to optimize
33 available_width: Total width available
34 sample_size: Number of rows to sample for measurement (default 5)
35 style: Optional table style for border/padding calculations
37 Returns:
38 List of optimized column widths
39 """
40 from pyWebLayout.concrete.dynamic_page import DynamicPage
42 n_cols = get_column_count(table)
43 if n_cols == 0:
44 return []
46 # Account for table borders/padding overhead
47 if style:
48 overhead = calculate_table_overhead(n_cols, style)
49 available_for_content = available_width - overhead
50 else:
51 # Default border overhead
52 border_width = 1
53 overhead = border_width * (n_cols + 1)
54 available_for_content = available_width - overhead
56 # Phase 0: Check for HTML width overrides
57 html_widths = extract_html_column_widths(table)
58 fixed_columns = {i: width for i, width in enumerate(html_widths) if width is not None}
60 # Phase 1: Sample rows and measure constraints for each column
61 min_widths = [] # Minimum without breaking words (Font handles hyphenation)
62 pref_widths = [] # Preferred (no wrapping)
64 # Sample first ~5 rows from each section (header, body, footer)
65 sampled_rows = sample_table_rows(table, sample_size)
67 for col_idx in range(n_cols):
68 # Check if this column has HTML width override
69 if col_idx in fixed_columns: 69 ↛ 70line 69 didn't jump to line 70 because the condition on line 69 was never true
70 fixed_width = fixed_columns[col_idx]
71 min_widths.append(fixed_width)
72 pref_widths.append(fixed_width)
73 continue
75 col_min = 50 # Absolute minimum
76 col_pref = 50
78 # Check sampled cells in this column
79 for row in sampled_rows:
80 cells = list(row.cells())
81 if col_idx >= len(cells): 81 ↛ 82line 81 didn't jump to line 82 because the condition on line 81 was never true
82 continue
84 cell = cells[col_idx]
86 # Create a DynamicPage for this cell with no padding/borders
87 # (we're just measuring content, not rendering a full page)
88 from pyWebLayout.style.page_style import PageStyle
89 measurement_style = PageStyle(padding=(0, 0, 0, 0), border_width=0)
90 cell_page = DynamicPage(style=measurement_style)
92 # Add cell content to page
93 layout_cell_content(cell_page, cell)
95 # Measure minimum width (Font's min_hyphenation_width controls breaking)
96 # DynamicPage returns pure content width (no padding since we set it to 0)
97 # TableRenderer will add cell padding later
98 cell_min = cell_page.get_min_width()
99 col_min = max(col_min, cell_min)
101 # Measure preferred width (no wrapping)
102 cell_pref = cell_page.get_preferred_width()
103 col_pref = max(col_pref, cell_pref)
105 min_widths.append(col_min)
106 pref_widths.append(col_pref)
108 # Phase 2: Distribute width (respecting fixed columns)
109 return distribute_column_widths(
110 min_widths,
111 pref_widths,
112 available_for_content,
113 fixed_columns
114 )
117def layout_cell_content(page, cell):
118 """
119 Layout cell content onto a DynamicPage.
121 This adds all blocks from the cell (paragraphs, images, etc.)
122 as children of the page so they can be measured.
124 Args:
125 page: DynamicPage to add content to
126 cell: TableCell containing blocks
127 """
128 from pyWebLayout.concrete.text import Line, Text
129 from pyWebLayout.style.fonts import Font
130 from pyWebLayout.style import FontWeight, Alignment
131 from pyWebLayout.abstract.block import Paragraph, Heading
132 from PIL import Image as PILImage, ImageDraw
134 # Default font for measurement
135 font_size = 12
136 font_path = "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf"
137 font = Font(font_path=font_path, font_size=font_size)
139 # Create a minimal draw context for Text measurement
140 # (Text needs this for width calculation)
141 dummy_img = PILImage.new('RGB', (1, 1))
142 dummy_draw = ImageDraw.Draw(dummy_img)
144 # Get all blocks from the cell
145 for block in cell.blocks():
146 if isinstance(block, (Paragraph, Heading)): 146 ↛ 145line 146 didn't jump to line 145 because the condition on line 146 was always true
147 # Get words from the block
148 word_items = block.words() if callable(block.words) else block.words
149 words = list(word_items)
151 if not words: 151 ↛ 152line 151 didn't jump to line 152 because the condition on line 151 was never true
152 continue
154 # Create a line for measurement
155 line = Line(
156 spacing=(3, 6), # word spacing
157 origin=(0, 0),
158 size=(1000, 20), # Large size for measurement
159 draw=dummy_draw,
160 font=font,
161 halign=Alignment.LEFT
162 )
164 # Add all words to estimate width
165 for word_item in words:
166 # Handle word tuples (index, word_obj)
167 if isinstance(word_item, tuple) and len(word_item) >= 2: 167 ↛ 168line 167 didn't jump to line 168 because the condition on line 167 was never true
168 word_obj = word_item[1]
169 else:
170 word_obj = word_item
172 # Extract text from the word
173 word_text = word_obj.text if hasattr(word_obj, 'text') else str(word_obj)
175 # Create Text object for the word
176 # Text constructor: (text, style, draw)
177 text_obj = Text(
178 text=word_text,
179 style=font, # Font is the style
180 draw=dummy_draw
181 )
183 line._text_objects.append(text_obj)
185 # Add line to page
186 page.add_child(line)
189def get_column_count(table: Table) -> int:
190 """
191 Get the number of columns in a table.
193 Args:
194 table: The table to analyze
196 Returns:
197 Number of columns
198 """
199 all_rows = list(table.all_rows())
200 if not all_rows:
201 return 0
203 # Get from first row
204 first_row = all_rows[0][1]
205 return first_row.cell_count
208def sample_table_rows(table: Table, sample_size: int) -> List[TableRow]:
209 """
210 Sample first ~sample_size rows from each table section.
212 Args:
213 table: The table to sample
214 sample_size: Number of rows to sample per section
216 Returns:
217 List of sampled rows
218 """
219 sampled = []
221 for section in ["header", "body", "footer"]:
222 section_rows = [row for sec, row in table.all_rows() if sec == section]
223 # Take first sample_size rows (or fewer if section is smaller)
224 sampled.extend(section_rows[:sample_size])
226 return sampled
229def extract_html_column_widths(table: Table) -> List[Optional[int]]:
230 """
231 Extract column width overrides from HTML attributes.
233 Checks for:
234 - <col width="100px"> elements
235 - <td width="100px"> in first row
236 - <th width="100px"> in header
238 Args:
239 table: The table to check
241 Returns:
242 List of widths (None for auto-layout columns)
243 """
244 n_cols = get_column_count(table)
245 widths = [None] * n_cols
247 # Check for <col> elements with width
248 if hasattr(table, 'col_widths'): 248 ↛ 249line 248 didn't jump to line 249 because the condition on line 248 was never true
249 for i, width in enumerate(table.col_widths):
250 if width is not None:
251 widths[i] = parse_html_width(width)
253 # Check first row cells for width attributes
254 all_rows = list(table.all_rows())
255 if all_rows: 255 ↛ 262line 255 didn't jump to line 262 because the condition on line 255 was always true
256 first_row = all_rows[0][1]
257 cells = list(first_row.cells())
258 for i, cell in enumerate(cells):
259 if i < len(widths) and hasattr(cell, 'width') and cell.width is not None:
260 widths[i] = parse_html_width(cell.width)
262 return widths
265def parse_html_width(width_value) -> Optional[int]:
266 """
267 Parse HTML width value (e.g., "100px", "20%", "100").
269 Args:
270 width_value: HTML width attribute value
272 Returns:
273 Width in pixels, or None if percentage/invalid
274 """
275 if isinstance(width_value, int):
276 return width_value
278 if isinstance(width_value, str): 278 ↛ 299line 278 didn't jump to line 299 because the condition on line 278 was always true
279 # Remove whitespace
280 width_value = width_value.strip()
282 # Percentage widths not supported yet
283 if '%' in width_value:
284 return None
286 # Parse pixel values
287 if width_value.endswith('px'):
288 try:
289 return int(width_value[:-2])
290 except ValueError:
291 return None
293 # Plain number
294 try:
295 return int(width_value)
296 except ValueError:
297 return None
299 return None
302def distribute_column_widths(min_widths: List[int],
303 pref_widths: List[int],
304 available_width: int,
305 fixed_columns: Dict[int, int]) -> List[int]:
306 """
307 Distribute width among columns, respecting fixed column widths.
309 Args:
310 min_widths: Minimum width for each column
311 pref_widths: Preferred width for each column
312 available_width: Total width available
313 fixed_columns: Dict mapping column index to fixed width
315 Returns:
316 List of final column widths
317 """
318 n_cols = len(min_widths)
319 if n_cols == 0:
320 return []
322 # Calculate available space for flexible columns
323 fixed_total = sum(fixed_columns.values())
324 flexible_available = available_width - fixed_total
326 # Get indices of flexible columns
327 flexible_cols = [i for i in range(n_cols) if i not in fixed_columns]
329 if not flexible_cols:
330 # All columns fixed - return as-is
331 return [fixed_columns.get(i, min_widths[i]) for i in range(n_cols)]
333 # Calculate totals for flexible columns only
334 flex_min_total = sum(min_widths[i] for i in flexible_cols)
335 flex_pref_total = sum(pref_widths[i] for i in flexible_cols)
337 # Distribute space among flexible columns
338 widths = [0] * n_cols
340 # Set fixed columns
341 for i, width in fixed_columns.items():
342 widths[i] = width
344 # Distribute to flexible columns
345 if flex_pref_total <= flexible_available:
346 # Preferred widths fit - distribute remaining space proportionally
347 extra_space = flexible_available - flex_pref_total
349 if extra_space > 0 and flex_pref_total > 0:
350 # Distribute extra space proportionally based on preferred widths
351 for i in flexible_cols:
352 proportion = pref_widths[i] / flex_pref_total
353 widths[i] = int(pref_widths[i] + (extra_space * proportion))
354 else:
355 # No extra space, just use preferred widths
356 for i in flexible_cols:
357 widths[i] = pref_widths[i]
358 elif flex_min_total > flexible_available:
359 # Can't satisfy minimum - force it anyway (graceful degradation)
360 for i in flexible_cols:
361 widths[i] = min_widths[i]
362 else:
363 # Proportional distribution between min and pref
364 extra_space = flexible_available - flex_min_total
365 flex_pref_over_min = flex_pref_total - flex_min_total
367 for i in flexible_cols:
368 if flex_pref_over_min > 0: 368 ↛ 374line 368 didn't jump to line 374 because the condition on line 368 was always true
369 pref_over_min = pref_widths[i] - min_widths[i]
370 proportion = pref_over_min / flex_pref_over_min
371 extra = extra_space * proportion
372 widths[i] = int(min_widths[i] + extra)
373 else:
374 widths[i] = int(min_widths[i])
376 return widths
379def calculate_table_overhead(n_cols: int, style) -> int:
380 """
381 Calculate the pixel overhead for table borders and spacing.
383 Args:
384 n_cols: Number of columns
385 style: TableStyle object
387 Returns:
388 Total pixel overhead
389 """
390 # Border on each side of each column + outer borders
391 border_overhead = style.border_width * (n_cols + 1)
393 # Cell spacing if any
394 spacing_overhead = style.cell_spacing * (n_cols - 1) if n_cols > 1 else 0
396 return border_overhead + spacing_overhead