Coverage for pyWebLayout/core/highlight.py: 95%

1"""

2Text highlighting system for ebook reader.

4Provides data structures and utilities for highlighting text regions,

5managing highlight collections, and rendering highlights on pages.

6"""

8from __future__ import annotations

9from dataclasses import dataclass

10from typing import List, Tuple, Optional, Dict, Any

11from enum import Enum

12import json

13from pathlib import Path

16class HighlightColor(Enum):

17 """Predefined highlight colors with RGBA values"""

18 YELLOW = (255, 255, 0, 100) # Classic highlight yellow

19 GREEN = (100, 255, 100, 100) # Green for verified/correct

20 BLUE = (100, 200, 255, 100) # Blue for important

21 PINK = (255, 150, 200, 100) # Pink for questions

22 ORANGE = (255, 180, 100, 100) # Orange for warnings

23 PURPLE = (200, 150, 255, 100) # Purple for definitions

24 RED = (255, 100, 100, 100) # Red for errors/concerns

27@dataclass

28class Highlight:

29 """

30 Represents a highlighted text region.

32 Highlights are stored with both pixel bounds (for rendering) and

33 semantic bounds (text content, for persistence across font changes).

34 """

35 # Identification

36 id: str # Unique identifier

38 # Visual properties

39 bounds: List[Tuple[int, int, int, int]] # List of (x, y, w, h) rectangles

40 color: Tuple[int, int, int, int] # RGBA color

42 # Semantic properties (for persistence)

43 text: str # The highlighted text

44 start_word_index: Optional[int] = None # Word index in document (if available)

45 end_word_index: Optional[int] = None

47 # Metadata

48 note: Optional[str] = None # Optional annotation

49 tags: List[str] = None # Optional categorization tags

50 timestamp: Optional[float] = None # When created

52 def __post_init__(self):

53 """Initialize default values"""

54 if self.tags is None:

55 self.tags = []

57 def to_dict(self) -> Dict[str, Any]:

58 """Serialize to dictionary"""

59 return {

60 'id': self.id,

61 'bounds': self.bounds,

62 'color': self.color,

63 'text': self.text,

64 'start_word_index': self.start_word_index,

65 'end_word_index': self.end_word_index,

66 'note': self.note,

67 'tags': self.tags,

68 'timestamp': self.timestamp

69 }

71 @classmethod

72 def from_dict(cls, data: Dict[str, Any]) -> 'Highlight':

73 """Deserialize from dictionary"""

74 return cls(

75 id=data['id'],

76 bounds=[tuple(b) for b in data['bounds']],

77 color=tuple(data['color']),

78 text=data['text'],

79 start_word_index=data.get('start_word_index'),

80 end_word_index=data.get('end_word_index'),

81 note=data.get('note'),

82 tags=data.get('tags', []),

83 timestamp=data.get('timestamp')

84 )

87class HighlightManager:

88 """

89 Manages highlights for a document.

91 Handles adding, removing, listing, and persisting highlights.

92 """

94 def __init__(self, document_id: str, highlights_dir: str = "highlights"):

95 """

96 Initialize highlight manager.

98 Args:

99 document_id: Unique identifier for the document

100 highlights_dir: Directory to store highlight data

101 """

102 self.document_id = document_id

103 self.highlights_dir = Path(highlights_dir)

104 self.highlights: Dict[str, Highlight] = {} # id -> Highlight

105

106 # Create directory if it doesn't exist

107 self.highlights_dir.mkdir(parents=True, exist_ok=True)

108

109 # Load existing highlights

110 self._load_highlights()

111

112 def add_highlight(self, highlight: Highlight) -> None:

113 """

114 Add a highlight.

115

116 Args:

117 highlight: Highlight to add

118 """

119 self.highlights[highlight.id] = highlight

120 self._save_highlights()

121

122 def remove_highlight(self, highlight_id: str) -> bool:

123 """

124 Remove a highlight by ID.

125

126 Args:

127 highlight_id: ID of highlight to remove

128

129 Returns:

130 True if removed, False if not found

131 """

132 if highlight_id in self.highlights:

133 del self.highlights[highlight_id]

134 self._save_highlights()

135 return True

136 return False

137

138 def get_highlight(self, highlight_id: str) -> Optional[Highlight]:

139 """Get a highlight by ID"""

140 return self.highlights.get(highlight_id)

141

142 def list_highlights(self) -> List[Highlight]:

143 """Get all highlights"""

144 return list(self.highlights.values())

145

146 def clear_all(self) -> None:

147 """Remove all highlights"""

148 self.highlights.clear()

149 self._save_highlights()

150

151 def get_highlights_for_page(

152 self, page_bounds: Tuple[int, int, int, int]) -> List[Highlight]:

153 """

154 Get highlights that appear on a specific page.

155

156 Args:

157 page_bounds: Page bounds (x, y, width, height)

158

159 Returns:

160 List of highlights on this page

161 """

162 page_x, page_y, page_w, page_h = page_bounds

163 page_highlights = []

164

165 for highlight in self.highlights.values():

166 # Check if any highlight bounds overlap with page

167 for hx, hy, hw, hh in highlight.bounds:

168 if (hx < page_x + page_w and hx + hw > page_x and

169 hy < page_y + page_h and hy + hh > page_y):

170 page_highlights.append(highlight)

171 break

172

173 return page_highlights

174

175 def _get_filepath(self) -> Path:

176 """Get filepath for this document's highlights"""

177 return self.highlights_dir / f"{self.document_id}_highlights.json"

178

179 def _save_highlights(self) -> None:

180 """Persist highlights to disk"""

181 try:

182 filepath = self._get_filepath()

183 data = {

184 'document_id': self.document_id,

185 'highlights': [h.to_dict() for h in self.highlights.values()]

186 }

187

188 with open(filepath, 'w') as f:

189 json.dump(data, f, indent=2)

190 except Exception as e:

191 print(f"Error saving highlights: {e}")

192

193 def _load_highlights(self) -> None:

194 """Load highlights from disk"""

195 try:

196 filepath = self._get_filepath()

197 if not filepath.exists():

198 return

199

200 with open(filepath, 'r') as f:

201 data = json.load(f)

202

203 self.highlights = {

204 h['id']: Highlight.from_dict(h)

205 for h in data.get('highlights', [])

206 }

207 except Exception as e:

208 print(f"Error loading highlights: {e}")

209 self.highlights = {}

210

211

212def create_highlight_from_query_result(

213 result,

214 color: Tuple[int, int, int, int] = HighlightColor.YELLOW.value,

215 note: Optional[str] = None,

216 tags: Optional[List[str]] = None

217) -> Highlight:

218 """

219 Create a highlight from a QueryResult.

220

221 Args:

222 result: QueryResult from query_pixel or query_range

223 color: RGBA color tuple

224 note: Optional annotation

225 tags: Optional categorization tags

226

227 Returns:

228 Highlight instance

229 """

230 from time import time

231 import uuid

232

233 # Handle single result or SelectionRange

234 if hasattr(result, 'results'): # SelectionRange

235 bounds = result.bounds_list

236 text = result.text

237 else: # Single QueryResult

238 bounds = [result.bounds]

239 text = result.text or ""

240

241 return Highlight(

242 id=str(uuid.uuid4()),

243 bounds=bounds,

244 color=color,

245 text=text,

246 note=note,

247 tags=tags or [],

248 timestamp=time()

249 )