Coverage for pyWebLayout/core/highlight.py: 95%

95 statements  

« prev     ^ index     » next       coverage.py v7.11.2, created at 2025-11-12 12:02 +0000

1""" 

2Text highlighting system for ebook reader. 

3 

4Provides data structures and utilities for highlighting text regions, 

5managing highlight collections, and rendering highlights on pages. 

6""" 

7 

8from __future__ import annotations 

9from dataclasses import dataclass 

10from typing import List, Tuple, Optional, Dict, Any 

11from enum import Enum 

12import json 

13from pathlib import Path 

14 

15 

16class HighlightColor(Enum): 

17 """Predefined highlight colors with RGBA values""" 

18 YELLOW = (255, 255, 0, 100) # Classic highlight yellow 

19 GREEN = (100, 255, 100, 100) # Green for verified/correct 

20 BLUE = (100, 200, 255, 100) # Blue for important 

21 PINK = (255, 150, 200, 100) # Pink for questions 

22 ORANGE = (255, 180, 100, 100) # Orange for warnings 

23 PURPLE = (200, 150, 255, 100) # Purple for definitions 

24 RED = (255, 100, 100, 100) # Red for errors/concerns 

25 

26 

27@dataclass 

28class Highlight: 

29 """ 

30 Represents a highlighted text region. 

31 

32 Highlights are stored with both pixel bounds (for rendering) and 

33 semantic bounds (text content, for persistence across font changes). 

34 """ 

35 # Identification 

36 id: str # Unique identifier 

37 

38 # Visual properties 

39 bounds: List[Tuple[int, int, int, int]] # List of (x, y, w, h) rectangles 

40 color: Tuple[int, int, int, int] # RGBA color 

41 

42 # Semantic properties (for persistence) 

43 text: str # The highlighted text 

44 start_word_index: Optional[int] = None # Word index in document (if available) 

45 end_word_index: Optional[int] = None 

46 

47 # Metadata 

48 note: Optional[str] = None # Optional annotation 

49 tags: List[str] = None # Optional categorization tags 

50 timestamp: Optional[float] = None # When created 

51 

52 def __post_init__(self): 

53 """Initialize default values""" 

54 if self.tags is None: 

55 self.tags = [] 

56 

57 def to_dict(self) -> Dict[str, Any]: 

58 """Serialize to dictionary""" 

59 return { 

60 'id': self.id, 

61 'bounds': self.bounds, 

62 'color': self.color, 

63 'text': self.text, 

64 'start_word_index': self.start_word_index, 

65 'end_word_index': self.end_word_index, 

66 'note': self.note, 

67 'tags': self.tags, 

68 'timestamp': self.timestamp 

69 } 

70 

71 @classmethod 

72 def from_dict(cls, data: Dict[str, Any]) -> 'Highlight': 

73 """Deserialize from dictionary""" 

74 return cls( 

75 id=data['id'], 

76 bounds=[tuple(b) for b in data['bounds']], 

77 color=tuple(data['color']), 

78 text=data['text'], 

79 start_word_index=data.get('start_word_index'), 

80 end_word_index=data.get('end_word_index'), 

81 note=data.get('note'), 

82 tags=data.get('tags', []), 

83 timestamp=data.get('timestamp') 

84 ) 

85 

86 

87class HighlightManager: 

88 """ 

89 Manages highlights for a document. 

90 

91 Handles adding, removing, listing, and persisting highlights. 

92 """ 

93 

94 def __init__(self, document_id: str, highlights_dir: str = "highlights"): 

95 """ 

96 Initialize highlight manager. 

97 

98 Args: 

99 document_id: Unique identifier for the document 

100 highlights_dir: Directory to store highlight data 

101 """ 

102 self.document_id = document_id 

103 self.highlights_dir = Path(highlights_dir) 

104 self.highlights: Dict[str, Highlight] = {} # id -> Highlight 

105 

106 # Create directory if it doesn't exist 

107 self.highlights_dir.mkdir(parents=True, exist_ok=True) 

108 

109 # Load existing highlights 

110 self._load_highlights() 

111 

112 def add_highlight(self, highlight: Highlight) -> None: 

113 """ 

114 Add a highlight. 

115 

116 Args: 

117 highlight: Highlight to add 

118 """ 

119 self.highlights[highlight.id] = highlight 

120 self._save_highlights() 

121 

122 def remove_highlight(self, highlight_id: str) -> bool: 

123 """ 

124 Remove a highlight by ID. 

125 

126 Args: 

127 highlight_id: ID of highlight to remove 

128 

129 Returns: 

130 True if removed, False if not found 

131 """ 

132 if highlight_id in self.highlights: 

133 del self.highlights[highlight_id] 

134 self._save_highlights() 

135 return True 

136 return False 

137 

138 def get_highlight(self, highlight_id: str) -> Optional[Highlight]: 

139 """Get a highlight by ID""" 

140 return self.highlights.get(highlight_id) 

141 

142 def list_highlights(self) -> List[Highlight]: 

143 """Get all highlights""" 

144 return list(self.highlights.values()) 

145 

146 def clear_all(self) -> None: 

147 """Remove all highlights""" 

148 self.highlights.clear() 

149 self._save_highlights() 

150 

151 def get_highlights_for_page( 

152 self, page_bounds: Tuple[int, int, int, int]) -> List[Highlight]: 

153 """ 

154 Get highlights that appear on a specific page. 

155 

156 Args: 

157 page_bounds: Page bounds (x, y, width, height) 

158 

159 Returns: 

160 List of highlights on this page 

161 """ 

162 page_x, page_y, page_w, page_h = page_bounds 

163 page_highlights = [] 

164 

165 for highlight in self.highlights.values(): 

166 # Check if any highlight bounds overlap with page 

167 for hx, hy, hw, hh in highlight.bounds: 

168 if (hx < page_x + page_w and hx + hw > page_x and 

169 hy < page_y + page_h and hy + hh > page_y): 

170 page_highlights.append(highlight) 

171 break 

172 

173 return page_highlights 

174 

175 def _get_filepath(self) -> Path: 

176 """Get filepath for this document's highlights""" 

177 return self.highlights_dir / f"{self.document_id}_highlights.json" 

178 

179 def _save_highlights(self) -> None: 

180 """Persist highlights to disk""" 

181 try: 

182 filepath = self._get_filepath() 

183 data = { 

184 'document_id': self.document_id, 

185 'highlights': [h.to_dict() for h in self.highlights.values()] 

186 } 

187 

188 with open(filepath, 'w') as f: 

189 json.dump(data, f, indent=2) 

190 except Exception as e: 

191 print(f"Error saving highlights: {e}") 

192 

193 def _load_highlights(self) -> None: 

194 """Load highlights from disk""" 

195 try: 

196 filepath = self._get_filepath() 

197 if not filepath.exists(): 

198 return 

199 

200 with open(filepath, 'r') as f: 

201 data = json.load(f) 

202 

203 self.highlights = { 

204 h['id']: Highlight.from_dict(h) 

205 for h in data.get('highlights', []) 

206 } 

207 except Exception as e: 

208 print(f"Error loading highlights: {e}") 

209 self.highlights = {} 

210 

211 

212def create_highlight_from_query_result( 

213 result, 

214 color: Tuple[int, int, int, int] = HighlightColor.YELLOW.value, 

215 note: Optional[str] = None, 

216 tags: Optional[List[str]] = None 

217) -> Highlight: 

218 """ 

219 Create a highlight from a QueryResult. 

220 

221 Args: 

222 result: QueryResult from query_pixel or query_range 

223 color: RGBA color tuple 

224 note: Optional annotation 

225 tags: Optional categorization tags 

226 

227 Returns: 

228 Highlight instance 

229 """ 

230 from time import time 

231 import uuid 

232 

233 # Handle single result or SelectionRange 

234 if hasattr(result, 'results'): # SelectionRange 

235 bounds = result.bounds_list 

236 text = result.text 

237 else: # Single QueryResult 

238 bounds = [result.bounds] 

239 text = result.text or "" 

240 

241 return Highlight( 

242 id=str(uuid.uuid4()), 

243 bounds=bounds, 

244 color=color, 

245 text=text, 

246 note=note, 

247 tags=tags or [], 

248 timestamp=time() 

249 )