Coverage for pyWebLayout/layout/table_optimizer.py: 88%

151 statements  

« prev     ^ index     » next       coverage.py v7.11.2, created at 2025-11-12 12:02 +0000

1""" 

2Table column width optimization for pyWebLayout. 

3 

4This module provides intelligent column width distribution for tables, 

5ensuring optimal space usage while respecting content constraints. 

6""" 

7 

8from typing import List, Tuple, Optional, Dict 

9from pyWebLayout.abstract.block import Table, TableRow 

10 

11 

12def optimize_table_layout(table: Table, 

13 available_width: int, 

14 sample_size: int = 5, 

15 style=None) -> List[int]: 

16 """ 

17 Optimize column widths for a table. 

18 

19 Strategy: 

20 1. Check for HTML width overrides (colspan, width attributes) 

21 2. Sample first ~5 rows to estimate column requirements (performance) 

22 3. Calculate minimum width for each column (longest unbreakable word) 

23 4. Calculate preferred width for each column (no wrapping) 

24 5. If total preferred fits: use preferred 

25 6. Otherwise: distribute available space proportionally 

26 7. Ensure no column < min_width 

27 

28 Note: Hyphenation threshold is controlled by Font.min_hyphenation_width, 

29 not passed as a parameter here to avoid duplication. 

30 

31 Args: 

32 table: The table to optimize 

33 available_width: Total width available 

34 sample_size: Number of rows to sample for measurement (default 5) 

35 style: Optional table style for border/padding calculations 

36 

37 Returns: 

38 List of optimized column widths 

39 """ 

40 from pyWebLayout.concrete.dynamic_page import DynamicPage 

41 

42 n_cols = get_column_count(table) 

43 if n_cols == 0: 

44 return [] 

45 

46 # Account for table borders/padding overhead 

47 if style: 

48 overhead = calculate_table_overhead(n_cols, style) 

49 available_for_content = available_width - overhead 

50 else: 

51 # Default border overhead 

52 border_width = 1 

53 overhead = border_width * (n_cols + 1) 

54 available_for_content = available_width - overhead 

55 

56 # Phase 0: Check for HTML width overrides 

57 html_widths = extract_html_column_widths(table) 

58 fixed_columns = {i: width for i, width in enumerate(html_widths) if width is not None} 

59 

60 # Phase 1: Sample rows and measure constraints for each column 

61 min_widths = [] # Minimum without breaking words (Font handles hyphenation) 

62 pref_widths = [] # Preferred (no wrapping) 

63 

64 # Sample first ~5 rows from each section (header, body, footer) 

65 sampled_rows = sample_table_rows(table, sample_size) 

66 

67 for col_idx in range(n_cols): 

68 # Check if this column has HTML width override 

69 if col_idx in fixed_columns: 69 ↛ 70line 69 didn't jump to line 70 because the condition on line 69 was never true

70 fixed_width = fixed_columns[col_idx] 

71 min_widths.append(fixed_width) 

72 pref_widths.append(fixed_width) 

73 continue 

74 

75 col_min = 50 # Absolute minimum 

76 col_pref = 50 

77 

78 # Check sampled cells in this column 

79 for row in sampled_rows: 

80 cells = list(row.cells()) 

81 if col_idx >= len(cells): 81 ↛ 82line 81 didn't jump to line 82 because the condition on line 81 was never true

82 continue 

83 

84 cell = cells[col_idx] 

85 

86 # Create a DynamicPage for this cell with no padding/borders 

87 # (we're just measuring content, not rendering a full page) 

88 from pyWebLayout.style.page_style import PageStyle 

89 measurement_style = PageStyle(padding=(0, 0, 0, 0), border_width=0) 

90 cell_page = DynamicPage(style=measurement_style) 

91 

92 # Add cell content to page 

93 layout_cell_content(cell_page, cell) 

94 

95 # Measure minimum width (Font's min_hyphenation_width controls breaking) 

96 # DynamicPage returns pure content width (no padding since we set it to 0) 

97 # TableRenderer will add cell padding later 

98 cell_min = cell_page.get_min_width() 

99 col_min = max(col_min, cell_min) 

100 

101 # Measure preferred width (no wrapping) 

102 cell_pref = cell_page.get_preferred_width() 

103 col_pref = max(col_pref, cell_pref) 

104 

105 min_widths.append(col_min) 

106 pref_widths.append(col_pref) 

107 

108 # Phase 2: Distribute width (respecting fixed columns) 

109 return distribute_column_widths( 

110 min_widths, 

111 pref_widths, 

112 available_for_content, 

113 fixed_columns 

114 ) 

115 

116 

117def layout_cell_content(page, cell): 

118 """ 

119 Layout cell content onto a DynamicPage. 

120 

121 This adds all blocks from the cell (paragraphs, images, etc.) 

122 as children of the page so they can be measured. 

123 

124 Args: 

125 page: DynamicPage to add content to 

126 cell: TableCell containing blocks 

127 """ 

128 from pyWebLayout.concrete.text import Line, Text 

129 from pyWebLayout.style.fonts import Font 

130 from pyWebLayout.style import FontWeight, Alignment 

131 from pyWebLayout.abstract.block import Paragraph, Heading 

132 from PIL import Image as PILImage, ImageDraw 

133 

134 # Default font for measurement 

135 font_size = 12 

136 font_path = "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf" 

137 font = Font(font_path=font_path, font_size=font_size) 

138 

139 # Create a minimal draw context for Text measurement 

140 # (Text needs this for width calculation) 

141 dummy_img = PILImage.new('RGB', (1, 1)) 

142 dummy_draw = ImageDraw.Draw(dummy_img) 

143 

144 # Get all blocks from the cell 

145 for block in cell.blocks(): 

146 if isinstance(block, (Paragraph, Heading)): 146 ↛ 145line 146 didn't jump to line 145 because the condition on line 146 was always true

147 # Get words from the block 

148 word_items = block.words() if callable(block.words) else block.words 

149 words = list(word_items) 

150 

151 if not words: 151 ↛ 152line 151 didn't jump to line 152 because the condition on line 151 was never true

152 continue 

153 

154 # Create a line for measurement 

155 line = Line( 

156 spacing=(3, 6), # word spacing 

157 origin=(0, 0), 

158 size=(1000, 20), # Large size for measurement 

159 draw=dummy_draw, 

160 font=font, 

161 halign=Alignment.LEFT 

162 ) 

163 

164 # Add all words to estimate width 

165 for word_item in words: 

166 # Handle word tuples (index, word_obj) 

167 if isinstance(word_item, tuple) and len(word_item) >= 2: 167 ↛ 168line 167 didn't jump to line 168 because the condition on line 167 was never true

168 word_obj = word_item[1] 

169 else: 

170 word_obj = word_item 

171 

172 # Extract text from the word 

173 word_text = word_obj.text if hasattr(word_obj, 'text') else str(word_obj) 

174 

175 # Create Text object for the word 

176 # Text constructor: (text, style, draw) 

177 text_obj = Text( 

178 text=word_text, 

179 style=font, # Font is the style 

180 draw=dummy_draw 

181 ) 

182 

183 line._text_objects.append(text_obj) 

184 

185 # Add line to page 

186 page.add_child(line) 

187 

188 

189def get_column_count(table: Table) -> int: 

190 """ 

191 Get the number of columns in a table. 

192 

193 Args: 

194 table: The table to analyze 

195 

196 Returns: 

197 Number of columns 

198 """ 

199 all_rows = list(table.all_rows()) 

200 if not all_rows: 

201 return 0 

202 

203 # Get from first row 

204 first_row = all_rows[0][1] 

205 return first_row.cell_count 

206 

207 

208def sample_table_rows(table: Table, sample_size: int) -> List[TableRow]: 

209 """ 

210 Sample first ~sample_size rows from each table section. 

211 

212 Args: 

213 table: The table to sample 

214 sample_size: Number of rows to sample per section 

215 

216 Returns: 

217 List of sampled rows 

218 """ 

219 sampled = [] 

220 

221 for section in ["header", "body", "footer"]: 

222 section_rows = [row for sec, row in table.all_rows() if sec == section] 

223 # Take first sample_size rows (or fewer if section is smaller) 

224 sampled.extend(section_rows[:sample_size]) 

225 

226 return sampled 

227 

228 

229def extract_html_column_widths(table: Table) -> List[Optional[int]]: 

230 """ 

231 Extract column width overrides from HTML attributes. 

232 

233 Checks for: 

234 - <col width="100px"> elements 

235 - <td width="100px"> in first row 

236 - <th width="100px"> in header 

237 

238 Args: 

239 table: The table to check 

240 

241 Returns: 

242 List of widths (None for auto-layout columns) 

243 """ 

244 n_cols = get_column_count(table) 

245 widths = [None] * n_cols 

246 

247 # Check for <col> elements with width 

248 if hasattr(table, 'col_widths'): 248 ↛ 249line 248 didn't jump to line 249 because the condition on line 248 was never true

249 for i, width in enumerate(table.col_widths): 

250 if width is not None: 

251 widths[i] = parse_html_width(width) 

252 

253 # Check first row cells for width attributes 

254 all_rows = list(table.all_rows()) 

255 if all_rows: 255 ↛ 262line 255 didn't jump to line 262 because the condition on line 255 was always true

256 first_row = all_rows[0][1] 

257 cells = list(first_row.cells()) 

258 for i, cell in enumerate(cells): 

259 if i < len(widths) and hasattr(cell, 'width') and cell.width is not None: 

260 widths[i] = parse_html_width(cell.width) 

261 

262 return widths 

263 

264 

265def parse_html_width(width_value) -> Optional[int]: 

266 """ 

267 Parse HTML width value (e.g., "100px", "20%", "100"). 

268 

269 Args: 

270 width_value: HTML width attribute value 

271 

272 Returns: 

273 Width in pixels, or None if percentage/invalid 

274 """ 

275 if isinstance(width_value, int): 

276 return width_value 

277 

278 if isinstance(width_value, str): 278 ↛ 299line 278 didn't jump to line 299 because the condition on line 278 was always true

279 # Remove whitespace 

280 width_value = width_value.strip() 

281 

282 # Percentage widths not supported yet 

283 if '%' in width_value: 

284 return None 

285 

286 # Parse pixel values 

287 if width_value.endswith('px'): 

288 try: 

289 return int(width_value[:-2]) 

290 except ValueError: 

291 return None 

292 

293 # Plain number 

294 try: 

295 return int(width_value) 

296 except ValueError: 

297 return None 

298 

299 return None 

300 

301 

302def distribute_column_widths(min_widths: List[int], 

303 pref_widths: List[int], 

304 available_width: int, 

305 fixed_columns: Dict[int, int]) -> List[int]: 

306 """ 

307 Distribute width among columns, respecting fixed column widths. 

308 

309 Args: 

310 min_widths: Minimum width for each column 

311 pref_widths: Preferred width for each column 

312 available_width: Total width available 

313 fixed_columns: Dict mapping column index to fixed width 

314 

315 Returns: 

316 List of final column widths 

317 """ 

318 n_cols = len(min_widths) 

319 if n_cols == 0: 

320 return [] 

321 

322 # Calculate available space for flexible columns 

323 fixed_total = sum(fixed_columns.values()) 

324 flexible_available = available_width - fixed_total 

325 

326 # Get indices of flexible columns 

327 flexible_cols = [i for i in range(n_cols) if i not in fixed_columns] 

328 

329 if not flexible_cols: 

330 # All columns fixed - return as-is 

331 return [fixed_columns.get(i, min_widths[i]) for i in range(n_cols)] 

332 

333 # Calculate totals for flexible columns only 

334 flex_min_total = sum(min_widths[i] for i in flexible_cols) 

335 flex_pref_total = sum(pref_widths[i] for i in flexible_cols) 

336 

337 # Distribute space among flexible columns 

338 widths = [0] * n_cols 

339 

340 # Set fixed columns 

341 for i, width in fixed_columns.items(): 

342 widths[i] = width 

343 

344 # Distribute to flexible columns 

345 if flex_pref_total <= flexible_available: 

346 # Preferred widths fit - distribute remaining space proportionally 

347 extra_space = flexible_available - flex_pref_total 

348 

349 if extra_space > 0 and flex_pref_total > 0: 

350 # Distribute extra space proportionally based on preferred widths 

351 for i in flexible_cols: 

352 proportion = pref_widths[i] / flex_pref_total 

353 widths[i] = int(pref_widths[i] + (extra_space * proportion)) 

354 else: 

355 # No extra space, just use preferred widths 

356 for i in flexible_cols: 

357 widths[i] = pref_widths[i] 

358 elif flex_min_total > flexible_available: 

359 # Can't satisfy minimum - force it anyway (graceful degradation) 

360 for i in flexible_cols: 

361 widths[i] = min_widths[i] 

362 else: 

363 # Proportional distribution between min and pref 

364 extra_space = flexible_available - flex_min_total 

365 flex_pref_over_min = flex_pref_total - flex_min_total 

366 

367 for i in flexible_cols: 

368 if flex_pref_over_min > 0: 368 ↛ 374line 368 didn't jump to line 374 because the condition on line 368 was always true

369 pref_over_min = pref_widths[i] - min_widths[i] 

370 proportion = pref_over_min / flex_pref_over_min 

371 extra = extra_space * proportion 

372 widths[i] = int(min_widths[i] + extra) 

373 else: 

374 widths[i] = int(min_widths[i]) 

375 

376 return widths 

377 

378 

379def calculate_table_overhead(n_cols: int, style) -> int: 

380 """ 

381 Calculate the pixel overhead for table borders and spacing. 

382 

383 Args: 

384 n_cols: Number of columns 

385 style: TableStyle object 

386 

387 Returns: 

388 Total pixel overhead 

389 """ 

390 # Border on each side of each column + outer borders 

391 border_overhead = style.border_width * (n_cols + 1) 

392 

393 # Cell spacing if any 

394 spacing_overhead = style.cell_spacing * (n_cols - 1) if n_cols > 1 else 0 

395 

396 return border_overhead + spacing_overhead