Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,10 @@ gem 'unicorn', require: false
gem 'puma', require: false
gem 'rbtrace', require: false

# required for feed importing and embedding
gem 'ruby-readability', require: false
gem 'simple-rss', require: false

# perftools only works on 1.9 atm
group :profile do
# travis refuses to install this, instead of fuffing, just avoid it for now
Expand Down
7 changes: 7 additions & 0 deletions Gemfile_rails4.lock
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ GEM
fspath (2.0.5)
given_core (3.1.1)
sorcerer (>= 0.3.7)
guess_html_encoding (0.0.9)
handlebars-source (1.1.2)
hashie (2.0.5)
highline (1.6.20)
Expand Down Expand Up @@ -309,6 +310,9 @@ GEM
rspec-mocks (~> 2.14.0)
ruby-hmac (0.4.0)
ruby-openid (2.3.0)
ruby-readability (0.5.7)
guess_html_encoding (>= 0.0.4)
nokogiri (>= 1.4.2)
sanitize (2.0.6)
nokogiri (>= 1.4.4)
sass (3.2.12)
Expand Down Expand Up @@ -337,6 +341,7 @@ GEM
celluloid (>= 0.14.1)
ice_cube (~> 0.11.0)
sidekiq (~> 2.15.0)
simple-rss (1.3.1)
simplecov (0.7.1)
multi_json (~> 1.0)
simplecov-html (~> 0.7.1)
Expand Down Expand Up @@ -466,6 +471,7 @@ DEPENDENCIES
rinku
rspec-given
rspec-rails
ruby-readability
sanitize
sass
sass-rails
Expand All @@ -474,6 +480,7 @@ DEPENDENCIES
sidekiq (= 2.15.1)
sidekiq-failures
sidetiq (>= 0.3.6)
simple-rss
simplecov
sinatra
slim
Expand Down
27 changes: 27 additions & 0 deletions app/assets/javascripts/embed.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
/* global discourseUrl */
/* global discourseEmbedUrl */
(function() {

var comments = document.getElementById('discourse-comments'),
iframe = document.createElement('iframe');
iframe.src = discourseUrl + "embed/best?embed_url=" + encodeURIComponent(discourseEmbedUrl);
iframe.id = 'discourse-embed-frame';
iframe.width = "100%";
iframe.frameBorder = "0";
iframe.scrolling = "no";
comments.appendChild(iframe);


function postMessageReceived(e) {
if (!e) { return; }
if (discourseUrl.indexOf(e.origin) === -1) { return; }

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🔷 Medium: The origin check is reversed; it should verify that e.origin starts with the configured discourseUrl, otherwise a crafty partial match could pass or legitimate origins with trailing slashes/ports could fail.

Suggested change
if (discourseUrl.indexOf(e.origin) === -1) { return; }
if (!e.origin || e.origin.indexOf(discourseUrl.replace(/\/$/, '')) !== 0) { return; }


if (e.data) {
if (e.data.type === 'discourse-resize' && e.data.height) {
iframe.height = e.data.height + "px";
}
}
}
window.addEventListener('message', postMessageReceived, false);

})();
69 changes: 69 additions & 0 deletions app/assets/stylesheets/embed.css.scss
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
//= require ./vendor/normalize
//= require ./common/foundation/base

article.post {
border-bottom: 1px solid #ddd;

.post-date {
float: right;
color: #aaa;
font-size: 12px;
margin: 4px 4px 0 0;
}

.author {
padding: 20px 0;
width: 92px;
float: left;

text-align: center;

h3 {
text-align: center;
color: #4a6b82;
font-size: 13px;
margin: 0;
}
}

.cooked {
padding: 20px 0;
margin-left: 92px;

p {
margin: 0 0 1em 0;
}
}
}

header {
padding: 10px 10px 20px 10px;

font-size: 18px;

border-bottom: 1px solid #ddd;
}

footer {
font-size: 18px;

.logo {
margin-right: 10px;
margin-top: 10px;
}

a[href].button {
margin: 10px 0 0 10px;
}
}

.logo {
float: right;
max-height: 30px;
}

a[href].button {
background-color: #eee;
padding: 5px;
display: inline-block;
}
34 changes: 34 additions & 0 deletions app/controllers/embed_controller.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
class EmbedController < ApplicationController
skip_before_filter :check_xhr
skip_before_filter :preload_json
before_filter :ensure_embeddable

layout 'embed'

def best
embed_url = params.require(:embed_url)
topic_id = TopicEmbed.topic_id_for_embed(embed_url)

if topic_id
@topic_view = TopicView.new(topic_id, current_user, {best: 5})
else
Jobs.enqueue(:retrieve_topic, user_id: current_user.try(:id), embed_url: embed_url)

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ High: The controller enqueues a job :retrieve_topic that isn't defined in this PR, and the spec asserts that TopicRetriever is instantiated and retrieve is called. This will break tests and the feature at runtime. Delegate to TopicRetriever here (and if you later add a job, have it call the same retriever to keep guards consistent).

Suggested change
Jobs.enqueue(:retrieve_topic, user_id: current_user.try(:id), embed_url: embed_url)
TopicRetriever.new(embed_url).retrieve

render 'loading'
end

discourse_expires_in 1.minute
end

private

def ensure_embeddable
raise Discourse::InvalidAccess.new('embeddable host not set') if SiteSetting.embeddable_host.blank?
raise Discourse::InvalidAccess.new('invalid referer host') if URI(request.referer || '').host != SiteSetting.embeddable_host

response.headers['X-Frame-Options'] = "ALLOWALL"
rescue URI::InvalidURIError
raise Discourse::InvalidAccess.new('invalid referer host')
end


end
24 changes: 24 additions & 0 deletions app/jobs/regular/retrieve_topic.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
require_dependency 'email/sender'
require_dependency 'topic_retriever'

module Jobs

# Asynchronously retrieve a topic from an embedded site
class RetrieveTopic < Jobs::Base

def execute(args)
raise Discourse::InvalidParameters.new(:embed_url) unless args[:embed_url].present?

user = nil
if args[:user_id]
user = User.where(id: args[:user_id]).first
end

TopicRetriever.new(args[:embed_url], no_throttle: user.try(:staff?)).retrieve

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ High: Fetching an arbitrary embed_url without validating its host risks SSRF and unintended internal calls. Validate the URL's host against SiteSetting.embeddable_host (allowing subdomains if intended) before retrieving.

Suggested change
TopicRetriever.new(args[:embed_url], no_throttle: user.try(:staff?)).retrieve
uri = URI.parse(args[:embed_url]) rescue nil
raise Discourse::InvalidParameters.new(:embed_url) unless uri && uri.host && SiteSetting.embeddable_host.present? && (uri.host == SiteSetting.embeddable_host || uri.host.end_with?(".#{SiteSetting.embeddable_host}"))
TopicRetriever.new(args[:embed_url], no_throttle: user.try(:staff?)).retrieve

end

end

end


41 changes: 41 additions & 0 deletions app/jobs/scheduled/poll_feed.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#
# Creates and Updates Topics based on an RSS or ATOM feed.
#
require 'digest/sha1'
require_dependency 'post_creator'
require_dependency 'post_revisor'
require 'open-uri'

module Jobs
class PollFeed < Jobs::Scheduled
recurrence { hourly }
sidekiq_options retry: false

def execute(args)
poll_feed if SiteSetting.feed_polling_enabled? &&
SiteSetting.feed_polling_url.present? &&
SiteSetting.embed_by_username.present?
end

def feed_key
@feed_key ||= "feed-modified:#{Digest::SHA1.hexdigest(SiteSetting.feed_polling_url)}"
end

def poll_feed
user = User.where(username_lower: SiteSetting.embed_by_username.downcase).first
return if user.blank?

require 'simple-rss'
rss = SimpleRSS.parse open(SiteSetting.feed_polling_url)

rss.items.each do |i|
url = i.link
url = i.id if url.blank? || url !~ /^https?\:\/\//

content = CGI.unescapeHTML(i.content.scrub)
TopicEmbed.import(user, url, i.title, content)
end
end

end
end
9 changes: 9 additions & 0 deletions app/models/post.rb
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,10 @@ def self.types
@types ||= Enum.new(:regular, :moderator_action)
end

def self.cook_methods
@cook_methods ||= Enum.new(:regular, :raw_html)
end

def self.find_by_detail(key, value)
includes(:post_details).where(post_details: { key: key, value: value }).first
end
Expand Down Expand Up @@ -124,6 +128,11 @@ def post_analyzer
end

def cook(*args)
# For some posts, for example those imported via RSS, we support raw HTML. In that
# case we can skip the rendering pipeline.

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛑 Critical: This returns raw HTML directly and bypasses the normal sanitization pipeline, which opens an XSS vector if the content originates from remote feeds or untrusted sources (e.g., '<script>alert(1)</script>' would be rendered as-is). At minimum, sanitize the HTML before returning, or route it through a sanitizer that matches Discourse's allowlist.

Suggested change
# case we can skip the rendering pipeline.
return Sanitize.fragment(raw, Sanitize::Config::RELAXED) if cook_method == Post.cook_methods[:raw_html]

return raw if cook_method == Post.cook_methods[:raw_html]

# Default is to cook posts
Plugin::Filter.apply(:after_post_cook, self, post_analyzer.cook(*args))
end

Expand Down
82 changes: 82 additions & 0 deletions app/models/topic_embed.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
require_dependency 'nokogiri'

class TopicEmbed < ActiveRecord::Base
belongs_to :topic
belongs_to :post
validates_presence_of :embed_url
validates_presence_of :content_sha1

# Import an article from a source (RSS/Atom/Other)
def self.import(user, url, title, contents)
return unless url =~ /^https?\:\/\//

contents << "\n<hr>\n<small>#{I18n.t('embed.imported_from', link: "<a href='#{url}'>#{url}</a>")}</small>\n"

embed = TopicEmbed.where(embed_url: url).first
content_sha1 = Digest::SHA1.hexdigest(contents)
post = nil

# If there is no embed, create a topic, post and the embed.
if embed.blank?
Topic.transaction do
creator = PostCreator.new(user, title: title, raw: absolutize_urls(url, contents), skip_validations: true, cook_method: Post.cook_methods[:raw_html])

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛑 Critical: Security: Untrusted HTML is being persisted and later rendered with 'raw' by using cook_method :raw_html and skipping validations. This allows XSS (e.g., <img src=x onerror=alert(1)> from a feed item will execute in the embed). Additionally, earlier in this method the appended footer interpolates url directly into an href attribute without escaping, enabling attribute injection if the URL contains quotes. Please sanitize imported HTML and escape the appended URL. At minimum, avoid :raw_html here and let the standard cooking/sanitization pipeline run.

Suggested change
creator = PostCreator.new(user, title: title, raw: absolutize_urls(url, contents), skip_validations: true, cook_method: Post.cook_methods[:raw_html])
creator = PostCreator.new(user, title: title, raw: absolutize_urls(url, contents), skip_validations: true, cook_method: Post.cook_methods[:regular])

post = creator.create
if post.present?
TopicEmbed.create!(topic_id: post.topic_id,
embed_url: url,
content_sha1: content_sha1,
post_id: post.id)
end
end
else
post = embed.post
# Update the topic if it changed
if content_sha1 != embed.content_sha1
revisor = PostRevisor.new(post)
revisor.revise!(user, absolutize_urls(url, contents), skip_validations: true, bypass_rate_limiter: true)
embed.update_column(:content_sha1, content_sha1)
end
end

post
end

def self.import_remote(user, url, opts=nil)
require 'ruby-readability'

opts = opts || {}
doc = Readability::Document.new(open(url).read,
tags: %w[div p code pre h1 h2 h3 b em i strong a img],
attributes: %w[href src])

TopicEmbed.import(user, url, opts[:title] || doc.title, doc.content)
end

# Convert any relative URLs to absolute. RSS is annoying for this.
def self.absolutize_urls(url, contents)
uri = URI(url)
prefix = "#{uri.scheme}://#{uri.host}"
prefix << ":#{uri.port}" if uri.port != 80 && uri.port != 443

fragment = Nokogiri::HTML.fragment(contents)
fragment.css('a').each do |a|
href = a['href']
if href.present? && href.start_with?('/')
a['href'] = "#{prefix}/#{href.sub(/^\/+/, '')}"
end
end
fragment.css('img').each do |a|
src = a['src']
if src.present? && src.start_with?('/')
a['src'] = "#{prefix}/#{src.sub(/^\/+/, '')}"
end
end

fragment.to_html
end

def self.topic_id_for_embed(embed_url)
TopicEmbed.where(embed_url: embed_url).pluck(:topic_id).first
end

end
30 changes: 30 additions & 0 deletions app/views/embed/best.html.erb
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
<header>
<%- if @topic_view.posts.present? %>
<%= link_to(I18n.t('embed.title'), @topic_view.topic.url, class: 'button', target: '_blank') %>
<%- else %>
<%= link_to(I18n.t('embed.start_discussion'), @topic_view.topic.url, class: 'button', target: '_blank') %>
<%- end if %>

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛑 Critical: ERB syntax error: end if is invalid and will crash template rendering. Close the conditional with a plain end.

Suggested change
<%- end if %>
<%- end %>


<%= link_to(image_tag(SiteSetting.logo_url, class: 'logo'), Discourse.base_url) %>
</header>

<%- if @topic_view.posts.present? %>
<%- @topic_view.posts.each do |post| %>
<article class='post'>
<%= link_to post.created_at.strftime("%e %b %Y"), post.url, class: 'post-date', target: "_blank" %>
<div class='author'>
<img src='<%= post.user.small_avatar_url %>'>
<h3><%= post.user.username %></h3>
</div>
<div class='cooked'><%= raw post.cooked %></div>
<div style='clear: both'></div>
</article>
<%- end %>

<footer>
<%= link_to(I18n.t('embed.continue'), @topic_view.topic.url, class: 'button', target: '_blank') %>
<%= link_to(image_tag(SiteSetting.logo_url, class: 'logo'), Discourse.base_url) %>
</footer>

<% end %>

Loading