From eb3c2fc39809124f859e3b64f5c33025b85146ac Mon Sep 17 00:00:00 2001 From: Jonathan Harker Date: Tue, 24 Nov 2015 10:09:48 +1300 Subject: [PATCH] URL title is content type header for non HTML responses. --- models.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/models.py b/models.py index 61e8d2a..00fa79e 100644 --- a/models.py +++ b/models.py @@ -68,8 +68,11 @@ class Url(Model): if title is None: r = requests.get(url) if r.status_code == 200: - dom = BeautifulSoup(r.content, 'html.parser') - self.title = dom.title.string + if 'text/html' in r.headers['content-type']: + dom = BeautifulSoup(r.content, 'html.parser') + self.title = dom.title.string + else: + self.title = r.headers['content-type'] else: self.title = "Error: HTTP %s" % r.status_code