djangocms_blog/djangocms_blog/feeds.py

195 lines
6.8 KiB
Python
Raw Normal View History

2014-01-04 17:07:09 +01:00
# -*- coding: utf-8 -*-
2015-09-13 00:46:05 +02:00
from __future__ import absolute_import, print_function, unicode_literals
2015-09-07 09:18:17 +02:00
from aldryn_apphooks_config.utils import get_app_instance
2014-01-04 17:07:09 +01:00
from django.contrib.sites.models import Site
from django.contrib.syndication.views import Feed
2016-04-30 00:45:17 +02:00
from django.core.cache import cache
2014-01-04 17:07:09 +01:00
from django.core.urlresolvers import reverse
2016-04-30 00:45:17 +02:00
from django.utils.encoding import force_text
2016-04-30 11:14:27 +02:00
from django.utils.feedgenerator import Rss201rev2Feed
2016-04-30 11:04:26 +02:00
from django.utils.html import strip_tags
2016-04-23 21:17:51 +02:00
from django.utils.safestring import mark_safe
2016-04-30 11:14:27 +02:00
from django.utils.six import BytesIO
2016-05-12 07:19:57 +02:00
from django.utils.text import normalize_newlines
2016-04-30 00:45:17 +02:00
from django.utils.translation import get_language_from_request, ugettext as _
2016-04-30 11:14:27 +02:00
from lxml import etree
2014-01-04 17:07:09 +01:00
2016-04-23 21:17:51 +02:00
from djangocms_blog.settings import get_setting
from djangocms_blog.views import PostDetailView
2016-04-30 00:45:17 +02:00
2014-01-04 17:07:09 +01:00
from .models import Post
2016-04-30 11:14:27 +02:00
try:
import HTMLParser
h = HTMLParser.HTMLParser()
except ImportError:
2016-05-01 21:38:54 +02:00
from html.parser import HTMLParser
2016-04-30 11:14:27 +02:00
2016-05-01 21:38:54 +02:00
h = HTMLParser()
2016-04-30 11:14:27 +02:00
2014-01-04 17:07:09 +01:00
class LatestEntriesFeed(Feed):
2016-04-23 21:17:51 +02:00
feed_type = Rss201rev2Feed
2016-04-30 11:04:26 +02:00
feed_items_number = get_setting('FEED_LATEST_ITEMS')
2014-01-04 17:07:09 +01:00
2015-09-20 01:11:14 +02:00
def __call__(self, request, *args, **kwargs):
2016-04-23 21:17:51 +02:00
self.request = request
2015-09-20 01:11:14 +02:00
self.namespace, self.config = get_app_instance(request)
return super(LatestEntriesFeed, self).__call__(request, *args, **kwargs)
2014-01-04 17:07:09 +01:00
def link(self):
2015-09-20 01:11:14 +02:00
return reverse('%s:posts-latest' % self.namespace, current_app=self.namespace)
2014-01-04 17:07:09 +01:00
def title(self):
2016-04-30 11:14:27 +02:00
return Site.objects.get_current().name
def description(self):
2016-05-01 21:38:54 +02:00
return _('Blog articles on %(site_name)s') % {'site_name': Site.objects.get_current().name}
2014-01-04 17:07:09 +01:00
2014-06-11 18:42:25 +02:00
def items(self, obj=None):
2016-04-30 11:14:27 +02:00
return Post.objects.namespace(
self.namespace
).published().order_by('-date_published')[:self.feed_items_number]
2014-01-04 17:07:09 +01:00
def item_title(self, item):
2016-04-30 11:14:27 +02:00
return mark_safe(item.safe_translation_getter('title'))
2014-01-04 17:07:09 +01:00
def item_description(self, item):
2016-04-23 21:17:51 +02:00
if item.app_config.use_abstract:
2016-04-30 11:14:27 +02:00
return mark_safe(item.safe_translation_getter('abstract'))
return mark_safe(item.safe_translation_getter('post_text'))
def item_updateddate(self, item):
return item.date_modified
def item_pubdate(self, item):
return item.date_published
def item_guid(self, item):
return item.guid
def item_author_name(self, item):
return item.get_author_name()
def item_author_url(self, item):
return item.get_author_url()
2014-01-04 17:07:09 +01:00
class TagFeed(LatestEntriesFeed):
2016-04-30 11:04:26 +02:00
feed_items_number = get_setting('FEED_TAGS_ITEMS')
2014-01-04 17:07:09 +01:00
def get_object(self, request, tag):
2014-06-11 18:42:25 +02:00
return tag # pragma: no cover
2014-01-04 17:07:09 +01:00
2014-06-11 18:42:25 +02:00
def items(self, obj=None):
2016-04-30 11:04:26 +02:00
return Post.objects.published().filter(tags__slug=obj)[:self.feed_items_number]
2016-04-23 21:17:51 +02:00
class FBInstantFeed(Rss201rev2Feed):
2016-05-12 07:19:57 +02:00
date_format = '%Y-%m-%dT%H:%M:%S%z'
2016-04-23 21:17:51 +02:00
def rss_attributes(self):
return {
'version': self._version,
'xmlns:content': 'http://purl.org/rss/1.0/modules/content/'
}
def add_root_elements(self, handler):
2016-04-30 11:14:27 +02:00
handler.addQuickElement('title', self.feed['title'])
handler.addQuickElement('link', self.feed['link'])
handler.addQuickElement('description', self.feed['description'])
2016-04-23 21:17:51 +02:00
if self.feed['language'] is not None:
2016-05-03 07:33:04 +02:00
handler.addQuickElement('language', self.feed['language'])
2016-04-23 21:17:51 +02:00
for cat in self.feed['categories']:
2016-04-30 11:14:27 +02:00
handler.addQuickElement('category', cat)
2016-04-23 21:17:51 +02:00
if self.feed['feed_copyright'] is not None:
2016-04-30 11:14:27 +02:00
handler.addQuickElement('copyright', self.feed['feed_copyright'])
2016-05-12 07:19:57 +02:00
handler.addQuickElement(
'lastBuildDate', self.latest_post_date().strftime(self.date_format)
)
2016-04-23 21:17:51 +02:00
if self.feed['ttl'] is not None:
2016-04-30 11:14:27 +02:00
handler.addQuickElement('ttl', self.feed['ttl'])
2016-04-23 21:17:51 +02:00
def add_item_elements(self, handler, item):
super(FBInstantFeed, self).add_item_elements(handler, item)
2016-04-30 11:14:27 +02:00
if item['author']:
handler.addQuickElement('author', item['author'])
2016-05-03 07:33:04 +02:00
if item['date_pub'] is not None:
2016-05-12 07:19:57 +02:00
handler.addQuickElement('pubDate', item['date_pub'].strftime(self.date_format))
if item['date_mod'] is not None:
handler.addQuickElement('modDate', item['date_mod'].strftime(self.date_format))
2016-04-30 11:14:27 +02:00
handler.startElement('description', {})
2016-05-12 07:19:57 +02:00
handler._write('<![CDATA[{0}]]>'.format(
h.unescape(normalize_newlines(force_text(item['abstract'])).replace('\n', ' ')))
)
2016-04-30 11:14:27 +02:00
handler.endElement('description')
2016-04-23 21:17:51 +02:00
handler.startElement('content:encoded', {})
handler._write('<![CDATA[')
2016-04-30 11:14:27 +02:00
handler._write('<!doctype html>')
handler._write(h.unescape(force_text(item['content'])))
2016-04-23 21:17:51 +02:00
handler._write(']]>')
handler.endElement('content:encoded')
class FBInstantArticles(LatestEntriesFeed):
feed_type = FBInstantFeed
2016-04-30 11:04:26 +02:00
feed_items_number = get_setting('FEED_INSTANT_ITEMS')
2016-05-12 07:19:57 +02:00
def items(self, obj=None):
return Post.objects.namespace(
self.namespace
).published().order_by('-date_modified')[:self.feed_items_number]
2016-04-30 11:04:26 +02:00
def _clean_html(self, content):
2016-04-30 11:14:27 +02:00
body = BytesIO(content)
document = etree.iterparse(body, html=True)
2016-04-30 11:04:26 +02:00
for a, e in document:
2016-04-30 11:14:27 +02:00
if not (e.text and e.text.strip()) and len(e) == 0 and e.tag == 'p':
2016-04-30 11:04:26 +02:00
e.getparent().remove(e)
2016-04-30 11:14:27 +02:00
if e.tag in ('h3', 'h4', 'h5', 'h6') and 'op-kicker' not in e.attrib.get('class', ''):
e.tag = 'h2'
2016-04-30 11:04:26 +02:00
return etree.tostring(document.root)
2016-04-23 21:17:51 +02:00
def item_extra_kwargs(self, item):
if not item:
return {}
language = get_language_from_request(self.request, check_path=True)
key = item.get_cache_key(language, 'feed')
content = cache.get(key)
if not content:
view = PostDetailView.as_view(instant_article=True)
response = view(self.request, slug=item.safe_translation_getter('slug'))
response.render()
2016-04-30 11:14:27 +02:00
content = self._clean_html(response.content)
2016-04-23 21:17:51 +02:00
cache.set(key, content, timeout=get_setting('FEED_CACHE_TIMEOUT'))
2016-04-30 11:14:27 +02:00
if item.app_config.use_abstract:
abstract = strip_tags(item.safe_translation_getter('abstract'))
else:
abstract = strip_tags(item.safe_translation_getter('post_text'))
2016-04-23 21:17:51 +02:00
return {
2016-04-30 11:14:27 +02:00
'author': item.get_author_name(),
2016-04-23 21:17:51 +02:00
'content': content,
2016-04-30 11:14:27 +02:00
'date': item.date_modified,
'date_pub': item.date_modified,
'date_mod': item.date_modified,
'abstract': abstract
2016-04-23 21:17:51 +02:00
}
2016-04-30 11:04:26 +02:00
2016-05-03 07:33:04 +02:00
def item_categories(self, item):
return [category.safe_translation_getter('name') for category in item.categories.all()]
2016-04-30 11:14:27 +02:00
def item_author_name(self, item):
return ''
def item_author_url(self, item):
return ''
2016-04-30 11:04:26 +02:00
def item_description(self, item):
2016-04-30 11:14:27 +02:00
return None
def item_pubdate(self, item):
return None