Skip to content

feat(article): build/extend article descriptions #274

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 3 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
feat(article): build/extend article descriptions
Signed-off-by: Gil Desmarais <git@desmarais.de>
  • Loading branch information
gildesmarais committed Jul 7, 2025
commit 8cc50e7488ca8f0372462acff6dc5885e92a7ff0
16 changes: 7 additions & 9 deletions lib/html2rss/rss_builder/article.rb
Original file line number Diff line number Diff line change
Expand Up @@ -76,15 +76,13 @@ def title
def description
return @description if defined?(@description)

return if (description = @to_h[:description]).to_s.empty?

description = self.class.remove_pattern_from_start(description, title) if title

@description = if self.class.contains_html?(description)
Html2rss::Selectors::PostProcessors::SanitizeHtml.get(description, url)
else
description.strip
end
@description = DescriptionBuilder.new(
base: @to_h[:description],
title:,
url:,
enclosure:,
image:
).call
end

# @return [Addressable::URI, nil]
Expand Down
96 changes: 96 additions & 0 deletions lib/html2rss/rss_builder/description_builder.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
# frozen_string_literal: true

require 'cgi'

module Html2rss
class RssBuilder
##
# Builds a sanitized article description from the base text, title, and optional media.
# Supports image, video, audio, and PDF enclosures.
class DescriptionBuilder
def initialize(base:, title:, url:, enclosure:, image:)
@base = base.to_s
@title = title
@url = url
@enclosure = enclosure
@enclosure_type = @enclosure&.type.to_s
@image = image
end

def call
result = (media_fragments << processed_base_description).compact.join("\n").strip
result.empty? ? nil : result
end

private

def media_fragments
[].tap do |fragments|
if image_from_enclosure?
fragments << render_image_from_enclosure
elsif @image
fragments << render_image_from_image
end

fragments << render_video if video?
fragments << render_audio if audio?
fragments << render_pdf if pdf?
end
end

def image_from_enclosure? = @enclosure_type.start_with?('image/')
def video? = @enclosure_type.start_with?('video/')
def audio? = @enclosure_type.start_with?('audio/')
def pdf? = @enclosure_type.start_with?('application/pdf')

def render_image_from_enclosure
title = CGI.escapeHTML(@title)

%(<img src="#{@enclosure.url}"
alt="#{title}"
title="#{title}"
loading="lazy"
referrerpolicy="no-referrer"
decoding="async"
crossorigin="anonymous">).delete("\n").gsub(/\s+/, ' ')
end

def render_image_from_image
title = CGI.escapeHTML(@title)

%(<img src="#{@image}"
alt="#{title}"
title="#{title}"
loading="lazy"
referrerpolicy="no-referrer"
decoding="async"
crossorigin="anonymous">).delete("\n").gsub(/\s+/, ' ')
end

def render_video
%(<video controls preload="none" referrerpolicy="no-referrer" crossorigin="anonymous" playsinline>
<source src="#{@enclosure.url}" type="#{@enclosure.type}">
</video>)
end

def render_audio
%(<audio controls preload="none" referrerpolicy="no-referrer" crossorigin="anonymous">
<source src="#{@enclosure.url}" type="#{@enclosure.type}">
</audio>)
end

def render_pdf
%(<iframe src="#{@enclosure.url}" width="100%" height="75vh"
sandbox=""
referrerpolicy="no-referrer"
loading="lazy">
</iframe>)
end

def processed_base_description
text = Article.remove_pattern_from_start(@base, @title)
Html2rss::Selectors::PostProcessors::SanitizeHtml.get(text, @url)
end
end
end
end
24 changes: 6 additions & 18 deletions spec/lib/html2rss/rss_builder/article_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -43,26 +43,14 @@
end

describe '#description' do
it 'returns the description if present', :aggregate_failures do
description = instance.description

expect(description).to eq('By John Doe')
expect(description.encoding).to eq(Encoding::UTF_8)
end

it 'returns nil if no description is present' do
instance = described_class.new(title: 'Sample instance')
expect(instance.description).to be_nil
end

it 'removes the title from the description if present' do
instance = described_class.new(title: 'Sample instance', description: ' Sample instance By John Doe ')
expect(instance.description).to eq('By John Doe')
before do
allow(Html2rss::RssBuilder::DescriptionBuilder).to receive(:new).and_call_original
instance.description
end

it 'sanitizes the HTML in the description' do
instance = described_class.new(description: '<b>Some bold text</b><script>alert();</script>')
expect(instance.description).to eq('<b>Some bold text</b>')
it 'calls the DescriptionBuilder' do
expect(Html2rss::RssBuilder::DescriptionBuilder).to have_received(:new)
.with(base: 'By John Doe', title: 'Sample instance', url: instance.url, enclosure: nil, image: nil)
end
end

Expand Down
193 changes: 193 additions & 0 deletions spec/lib/html2rss/rss_builder/description_builder_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,193 @@
# frozen_string_literal: true

require 'spec_helper'
require 'nokogiri'

RSpec.describe Html2rss::RssBuilder::DescriptionBuilder do
subject(:description) { described_class.new(base:, title:, url:, enclosure:, image:).call }

let(:title) { 'Sample instance' }
let(:url) { 'http://example.com' }
let(:image) { nil }
let(:enclosure) { nil }

def parse_html
Nokogiri::HTML.fragment(description)
end

def expect_tag_with_attributes(tag, attributes)
attributes.each do |key, value|
if value.nil?
expect(tag.has_attribute?(key)).to be true
else
expect(tag[key]).to eq(value)
end
end
end

describe '#call' do
context 'when base description is present without HTML' do
let(:base) { 'By John Doe' }

it 'returns the base description unchanged' do
expect(description).to eq('By John Doe')
end
end

context 'when base description contains HTML' do
let(:base) { '<b>Some bold text</b>' }

before do
allow(Html2rss::Selectors::PostProcessors::SanitizeHtml).to receive(:get)
.with(base, url)
.and_call_original
end

it 'sanitizes the HTML', :aggregate_failures do
expect(description).to eq('<b>Some bold text</b>')
expect(Html2rss::Selectors::PostProcessors::SanitizeHtml).to have_received(:get).with(base, url)
end
end

context 'when description starts with the title' do
let(:base) { 'Sample instance By John Doe' }

it 'removes the title from the start' do
expect(description).to include('By John Doe')
end
end

context 'when image enclosure is present' do
let(:base) { 'Caption' }
let(:enclosure) do
instance_double(Html2rss::RssBuilder::Enclosure,
url: 'http://example.com/image.jpg',
type: 'image/jpeg')
end

it 'renders correct <img> tag with attributes' do # rubocop:disable RSpec/ExampleLength
img = parse_html.at_css('img')
expect_tag_with_attributes(img, {
'src' => 'http://example.com/image.jpg',
'alt' => 'Sample instance',
'title' => 'Sample instance',
'loading' => 'lazy',
'referrerpolicy' => 'no-referrer',
'decoding' => 'async',
'crossorigin' => 'anonymous'
})
end
end

context 'when fallback image is present' do
let(:base) { 'Something' }
let(:image) { 'http://example.com/fallback.jpg' }

it 'renders fallback <img> tag with attributes' do # rubocop:disable RSpec/ExampleLength
img = parse_html.at_css('img')
expect(img).not_to be_nil
expect_tag_with_attributes(img, {
'src' => 'http://example.com/fallback.jpg',
'alt' => 'Sample instance',
'title' => 'Sample instance',
'loading' => 'lazy',
'referrerpolicy' => 'no-referrer',
'decoding' => 'async',
'crossorigin' => 'anonymous'
})
end
end

context 'when enclosure is a video' do
let(:base) { 'Watch this' }
let(:enclosure) do
instance_double(Html2rss::RssBuilder::Enclosure,
url: 'http://example.com/video.mp4',
type: 'video/mp4')
end

it 'renders correct <video> and <source> tags with attributes', :aggregate_failures do # rubocop:disable RSpec/ExampleLength
doc = parse_html
video = doc.at_css('video')
source = video&.at_css('source')

expect(video).not_to be_nil
expect(source).not_to be_nil

expect_tag_with_attributes(video, {
'controls' => nil,
'preload' => 'none',
'referrerpolicy' => 'no-referrer',
'crossorigin' => 'anonymous',
'playsinline' => nil
})

expect_tag_with_attributes(source, {
'src' => 'http://example.com/video.mp4',
'type' => 'video/mp4'
})
end
end

context 'when enclosure is audio' do
let(:base) { 'Listen to this' }
let(:enclosure) do
instance_double(Html2rss::RssBuilder::Enclosure,
url: 'http://example.com/audio.mp3',
type: 'audio/mpeg')
end

it 'renders correct <audio> and <source> tags with attributes', :aggregate_failures do # rubocop:disable RSpec/ExampleLength
doc = parse_html
audio = doc.at_css('audio')
source = audio&.at_css('source')

expect(audio).not_to be_nil
expect(source).not_to be_nil

expect_tag_with_attributes(audio, {
'controls' => nil,
'preload' => 'none',
'referrerpolicy' => 'no-referrer',
'crossorigin' => 'anonymous'
})

expect_tag_with_attributes(source, {
'src' => 'http://example.com/audio.mp3',
'type' => 'audio/mpeg'
})
end
end

context 'when enclosure is a PDF' do
let(:base) { 'See this document' }
let(:enclosure) do
instance_double(Html2rss::RssBuilder::Enclosure,
url: 'http://example.com/doc.pdf',
type: 'application/pdf')
end

it 'renders correct <iframe> tag with attributes' do # rubocop:disable RSpec/ExampleLength
iframe = parse_html.at_css('iframe')
expect(iframe).not_to be_nil

expect_tag_with_attributes(iframe, {
'src' => 'http://example.com/doc.pdf',
'width' => '100%',
'height' => '75vh',
'sandbox' => '',
'referrerpolicy' => 'no-referrer',
'loading' => 'lazy'
})
end
end

context 'when everything is nil or empty' do
let(:base) { '' }

it 'returns nil' do
expect(description).to be_nil
end
end
end
end
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy