Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 17 additions & 7 deletions internal/cli/common/html.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ func StripHTML(s string) string {
s = RemoveTagWithContent(s, "head")

// Replace block-level elements with newlines before stripping tags
// Note: table cell elements (table, td, th, tbody, thead, tfoot) are NOT included
// because they're typically used for layout; tr is included to separate rows
blockTags := []string{"br", "p", "div", "tr", "li", "h1", "h2", "h3", "h4", "h5", "h6"}
for _, tag := range blockTags {
// Handle <br>, <br/>, <br />
Expand Down Expand Up @@ -52,17 +54,25 @@ func StripHTML(s string) string {
text = strings.ReplaceAll(text, " ", " ")
}

// Collapse multiple newlines
for strings.Contains(text, "\n\n\n") {
text = strings.ReplaceAll(text, "\n\n\n", "\n\n")
}

// Trim spaces from each line
// Trim spaces from each line first
lines := strings.Split(text, "\n")
for i, line := range lines {
lines[i] = strings.TrimSpace(line)
}
text = strings.Join(lines, "\n")

// Remove consecutive empty lines, keeping at most one blank line
var cleanedLines []string
prevEmpty := false
for _, line := range lines {
isEmpty := line == ""
if isEmpty && prevEmpty {
continue // Skip consecutive empty lines
}
cleanedLines = append(cleanedLines, line)
prevEmpty = isEmpty
}

text = strings.Join(cleanedLines, "\n")

// Remove leading/trailing empty lines
return strings.TrimSpace(text)
Expand Down