Merge pull request #492 from swanson/vk-feed-title-xss

Sanitize feed titles
stringer-rss · Nov 5, 2018 · 3712cf2 · 3712cf2
2 parents c8c9144 + 35b9183
commit 3712cf2
Show file tree

Hide file tree

Showing 6 changed files with 66 additions and 37 deletions.
diff --git a/app/commands/feeds/add_new_feed.rb b/app/commands/feeds/add_new_feed.rb
@@ -1,4 +1,5 @@
 require_relative "../../models/feed"
+require_relative "../../utils/content_sanitizer"
 require_relative "../../utils/feed_discovery"
 
 class AddNewFeed
@@ -8,7 +9,7 @@ def self.add(url, discoverer = FeedDiscovery.new, repo = Feed)
     result = discoverer.discover(url)
     return false unless result
 
-    repo.create(name: result.title,
+    repo.create(name: ContentSanitizer.sanitize(result.title),
                 url: result.feed_url,
                 last_fetched: Time.now - ONE_DAY)
   end

diff --git a/app/repositories/story_repository.rb b/app/repositories/story_repository.rb
@@ -1,5 +1,6 @@
 require_relative "../helpers/url_helpers"
 require_relative "../models/story"
+require_relative "../utils/content_sanitizer"
 require_relative "../utils/sample_story"
 
 class StoryRepository
@@ -91,9 +92,9 @@ def self.extract_content(entry)
     sanitized_content = ""
 
     if entry.content
-      sanitized_content = sanitize(entry.content)
+      sanitized_content = ContentSanitizer.sanitize(entry.content)
     elsif entry.summary
-      sanitized_content = sanitize(entry.summary)
+      sanitized_content = ContentSanitizer.sanitize(entry.summary)
     end
 
     if entry.url.present?
@@ -104,18 +105,11 @@ def self.extract_content(entry)
   end
 
   def self.extract_title(entry)
-    return sanitize(entry.title) if entry.title.present?
-    return sanitize(entry.summary) if entry.summary.present?
+    return ContentSanitizer.sanitize(entry.title) if entry.title.present?
+    return ContentSanitizer.sanitize(entry.summary) if entry.summary.present?
     "There isn't a title for this story"
   end
 
-  def self.sanitize(content)
-    Loofah.fragment(content.gsub(/<wbr\s*>/i, ""))
-          .scrub!(:prune)
-          .scrub!(:unprintable)
-          .to_s
-  end
-
   def self.samples
     [
       SampleStory.new("Darin' Fireballs", "Why you should trade your firstborn for a Retina iPad"),

diff --git a/app/utils/content_sanitizer.rb b/app/utils/content_sanitizer.rb
@@ -0,0 +1,8 @@
+class ContentSanitizer
+  def self.sanitize(content)
+    Loofah.fragment(content.gsub(/<wbr\s*>/i, ""))
+          .scrub!(:prune)
+          .scrub!(:unprintable)
+          .to_s
+  end
+end
diff --git a/spec/commands/feeds/add_new_feed_spec.rb b/spec/commands/feeds/add_new_feed_spec.rb
@@ -27,6 +27,18 @@
 
         expect(result).to be feed
       end
+
+      context "title includes a script tag" do
+        let(:feed_result) { double(title: "foo<script>alert('xss');</script>bar", feed_url: feed.url) }
+
+        it "deletes the script tag from the title" do
+          allow(repo).to receive(:create)
+
+          AddNewFeed.add("http://feed.com", discoverer, repo)
+
+          expect(repo).to have_received(:create).with(include(name: "foobar"))
+        end
+      end
     end
   end
 end
diff --git a/spec/repositories/story_repository_spec.rb b/spec/repositories/story_repository_spec.rb
@@ -16,14 +16,23 @@
       StoryRepository.add(entry, feed)
     end
 
-    it "sanitizes titles" do
+    it "deletes line and paragraph separator characters from titles" do
       entry = double(title: "n\u2028\u2029", content: "").as_null_object
       allow(StoryRepository).to receive(:normalize_url)
 
       expect(Story).to receive(:create).with(hash_including(title: "n"))
 
       StoryRepository.add(entry, feed)
     end
+
+    it "deletes script tags from titles" do
+      entry = double(title: "n<script>alert('xss');</script>", content: "").as_null_object
+      allow(StoryRepository).to receive(:normalize_url)
+
+      expect(Story).to receive(:create).with(hash_including(title: "n"))
+
+      StoryRepository.add(entry, feed)
+    end
   end
 
   describe ".extract_url" do
@@ -102,28 +111,4 @@
       expect(StoryRepository.extract_content(entry)).to eq "<a href=\"page\">Page</a>"
     end
   end
-
-  describe ".sanitize" do
-    context "regressions" do
-      it "handles <wbr> tag properly" do
-        result = StoryRepository.sanitize("<code>WM_<wbr\t\n >ERROR</code> asdf")
-        expect(result).to eq "<code>WM_ERROR</code> asdf"
-      end
-
-      it "handles <figure> tag properly" do
-        result = StoryRepository.sanitize("<figure>some code</figure>")
-        expect(result).to eq "<figure>some code</figure>"
-      end
-
-      it "handles unprintable characters" do
-        result = StoryRepository.sanitize("n\u2028\u2029")
-        expect(result).to eq "n"
-      end
-
-      it "preserves line endings" do
-        result = StoryRepository.sanitize("test\r\ncase")
-        expect(result).to eq "test\r\ncase"
-      end
-    end
-  end
 end
diff --git a/spec/utils/content_sanitizer_spec.rb b/spec/utils/content_sanitizer_spec.rb
@@ -0,0 +1,29 @@
+require "spec_helper"
+
+app_require "utils/content_sanitizer"
+
+describe ContentSanitizer do
+  describe ".sanitize" do
+    context "regressions" do
+      it "handles <wbr> tag properly" do
+        result = described_class.sanitize("<code>WM_<wbr\t\n >ERROR</code> asdf")
+        expect(result).to eq "<code>WM_ERROR</code> asdf"
+      end
+
+      it "handles <figure> tag properly" do
+        result = described_class.sanitize("<figure>some code</figure>")
+        expect(result).to eq "<figure>some code</figure>"
+      end
+
+      it "handles unprintable characters" do
+        result = described_class.sanitize("n\u2028\u2029")
+        expect(result).to eq "n"
+      end
+
+      it "preserves line endings" do
+        result = described_class.sanitize("test\r\ncase")
+        expect(result).to eq "test\r\ncase"
+      end
+    end
+  end
+end