From 9f4e4ecf4977e13fc1e7518bb4ea1ca3470aee02 Mon Sep 17 00:00:00 2001
From: Fredrik Ekelund <fredrik@fredrik.computer>
Date: Tue, 25 Jul 2017 18:29:05 +0200
Subject: [PATCH] Added try/catch block around URL parsing in
 Crawler#getRobotsTxt redirection logic

More in #363
---
 lib/crawler.js | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/lib/crawler.js b/lib/crawler.js
index 19f9adb..94f5cbc 100644
--- a/lib/crawler.js
+++ b/lib/crawler.js
@@ -650,9 +650,18 @@ Crawler.prototype.getRobotsTxt = function(url, callback) {
 
             response.destroy();
 
-            var redirectTarget = uri(response.headers.location)
-                .absoluteTo(robotsTxtUrl)
-                .normalize();
+            var redirectTarget;
+
+            try {
+                redirectTarget = uri(response.headers.location)
+                    .absoluteTo(robotsTxtUrl)
+                    .normalize();
+            } catch (error) {
+                var robotsTxtHost = uri(robotsTxtUrl).pathname("").href();
+                errorMsg = util.format("Faulty redirect URL when fetching robots.txt for %s", robotsTxtHost);
+
+                return callback(new Error(errorMsg));
+            }
 
             if (crawler.domainValid(redirectTarget.hostname())) {
                 crawler.getRobotsTxt(redirectTarget.href(), callback);