From 1118a8300b037b66ce2a2b94b2f35882d586c90a Mon Sep 17 00:00:00 2001 From: Iacopo Spalletti Date: Sat, 30 Apr 2016 11:04:26 +0200 Subject: [PATCH 1/4] Improve instant articles feed --- README.rst | 4 ++++ djangocms_blog/feeds.py | 25 ++++++++++++++++++++++--- djangocms_blog/settings.py | 6 ++++++ 3 files changed, 32 insertions(+), 3 deletions(-) diff --git a/README.rst b/README.rst index 091904f..013606a 100644 --- a/README.rst +++ b/README.rst @@ -455,6 +455,10 @@ Global Settings * BLOG_TAGS_PLUGIN_NAME: Blog tags plugin name (default: ``Tags``) * BLOG_CATEGORY_PLUGIN_NAME: Blog categories plugin name (default: ``Categories``) * BLOG_ARCHIVE_PLUGIN_NAME: Blog archive plugin name (default: ``Archive``) +* BLOG_FEED_CACHE_TIMEOUT: Cache timeout for RSS feeds +* BLOG_FEED_INSTANT_ITEMS: Number of items in Instant Article feed +* BLOG_FEED_LATEST_ITEMS: Number of items in latest items feed +* BLOG_FEED_TAGS_ITEMS: Number of items in per tags feed Read-only settings ++++++++++++++++++ diff --git a/djangocms_blog/feeds.py b/djangocms_blog/feeds.py index b8958a8..7a16f4b 100644 --- a/djangocms_blog/feeds.py +++ b/djangocms_blog/feeds.py @@ -1,6 +1,9 @@ # -*- coding: utf-8 -*- from __future__ import absolute_import, print_function, unicode_literals +from django.utils.six import StringIO +from lxml import etree + from aldryn_apphooks_config.utils import get_app_instance from django.contrib.sites.models import Site from django.contrib.syndication.views import Feed @@ -8,6 +11,7 @@ from django.core.cache import cache from django.core.urlresolvers import reverse from django.utils.encoding import force_text from django.utils.feedgenerator import Rss201rev2Feed, rfc2822_date +from django.utils.html import strip_tags from django.utils.safestring import mark_safe from django.utils.translation import get_language_from_request, ugettext as _ @@ -19,6 +23,7 @@ from .models import Post class LatestEntriesFeed(Feed): feed_type = Rss201rev2Feed + feed_items_number = get_setting('FEED_LATEST_ITEMS') def __call__(self, request, *args, **kwargs): self.request = request @@ -32,7 +37,7 @@ class LatestEntriesFeed(Feed): return _('Blog articles on %(site_name)s') % {'site_name': Site.objects.get_current().name} def items(self, obj=None): - return Post.objects.namespace(self.namespace).published().order_by('-date_published')[:10] + return Post.objects.namespace(self.namespace).published().order_by('-date_published')[:self.feed_items_number] def item_title(self, item): return item.safe_translation_getter('title') @@ -44,12 +49,13 @@ class LatestEntriesFeed(Feed): class TagFeed(LatestEntriesFeed): + feed_items_number = get_setting('FEED_TAGS_ITEMS') def get_object(self, request, tag): return tag # pragma: no cover def items(self, obj=None): - return Post.objects.published().filter(tags__slug=obj)[:10] + return Post.objects.published().filter(tags__slug=obj)[:self.feed_items_number] class FBInstantFeed(Rss201rev2Feed): @@ -86,6 +92,14 @@ class FBInstantFeed(Rss201rev2Feed): class FBInstantArticles(LatestEntriesFeed): feed_type = FBInstantFeed + feed_items_number = get_setting('FEED_INSTANT_ITEMS') + + def _clean_html(self, content): + document = etree.iterparse(StringIO(content), html=True) + for a, e in document: + if not (e.text and e.text.strip()) and len(e) == 0: + e.getparent().remove(e) + return etree.tostring(document.root) def item_extra_kwargs(self, item): if not item: @@ -97,10 +111,15 @@ class FBInstantArticles(LatestEntriesFeed): view = PostDetailView.as_view(instant_article=True) response = view(self.request, slug=item.safe_translation_getter('slug')) response.render() - content = mark_safe(response.content) + content = mark_safe(self._clean_html(response.content)) cache.set(key, content, timeout=get_setting('FEED_CACHE_TIMEOUT')) return { 'content': content, 'slug': item.safe_translation_getter('slug'), 'guid': item.guid, } + + def item_description(self, item): + if item.app_config.use_abstract: + return strip_tags(item.safe_translation_getter('abstract')) + return strip_tags(item.safe_translation_getter('post_text')) diff --git a/djangocms_blog/settings.py b/djangocms_blog/settings.py index e9fb0e0..de47dfb 100644 --- a/djangocms_blog/settings.py +++ b/djangocms_blog/settings.py @@ -123,6 +123,12 @@ def get_setting(name): settings, 'BLOG_ARCHIVE_PLUGIN_NAME', _('Archive')), 'BLOG_FEED_CACHE_TIMEOUT': getattr( settings, 'BLOG_FEED_CACHE_TIMEOUT', 3600), + 'BLOG_FEED_INSTANT_ITEMS': getattr( + settings, 'BLOG_FEED_INSTANT_ITEMS', 50), + 'BLOG_FEED_LATEST_ITEMS': getattr( + settings, 'BLOG_FEED_LATEST_ITEMS', 10), + 'BLOG_FEED_TAGS_ITEMS': getattr( + settings, 'BLOG_FEED_TAGS_ITEMS', 10), } return default['BLOG_%s' % name] From c356551302b83b72fea841bf3aeceb905e41d585 Mon Sep 17 00:00:00 2001 From: Iacopo Spalletti Date: Sat, 30 Apr 2016 11:14:27 +0200 Subject: [PATCH 2/4] Improve Instant Articles markup --- djangocms_blog/feeds.py | 109 +++++++++++++----- djangocms_blog/models.py | 2 +- .../djangocms_blog/post_instant_article.html | 10 +- 3 files changed, 87 insertions(+), 34 deletions(-) diff --git a/djangocms_blog/feeds.py b/djangocms_blog/feeds.py index 7a16f4b..52f31e9 100644 --- a/djangocms_blog/feeds.py +++ b/djangocms_blog/feeds.py @@ -1,19 +1,18 @@ # -*- coding: utf-8 -*- from __future__ import absolute_import, print_function, unicode_literals -from django.utils.six import StringIO -from lxml import etree - from aldryn_apphooks_config.utils import get_app_instance from django.contrib.sites.models import Site from django.contrib.syndication.views import Feed from django.core.cache import cache from django.core.urlresolvers import reverse from django.utils.encoding import force_text -from django.utils.feedgenerator import Rss201rev2Feed, rfc2822_date +from django.utils.feedgenerator import Rss201rev2Feed from django.utils.html import strip_tags from django.utils.safestring import mark_safe +from django.utils.six import BytesIO from django.utils.translation import get_language_from_request, ugettext as _ +from lxml import etree from djangocms_blog.settings import get_setting from djangocms_blog.views import PostDetailView @@ -21,6 +20,16 @@ from djangocms_blog.views import PostDetailView from .models import Post +try: + import HTMLParser + + h = HTMLParser.HTMLParser() +except ImportError: + import html.parser + + h = html.parser() + + class LatestEntriesFeed(Feed): feed_type = Rss201rev2Feed feed_items_number = get_setting('FEED_LATEST_ITEMS') @@ -34,18 +43,38 @@ class LatestEntriesFeed(Feed): return reverse('%s:posts-latest' % self.namespace, current_app=self.namespace) def title(self): - return _('Blog articles on %(site_name)s') % {'site_name': Site.objects.get_current().name} + return Site.objects.get_current().name + + def description(self): + return _('Blog articles on {site_name}').format(site_name=Site.objects.get_current().name) def items(self, obj=None): - return Post.objects.namespace(self.namespace).published().order_by('-date_published')[:self.feed_items_number] + return Post.objects.namespace( + self.namespace + ).published().order_by('-date_published')[:self.feed_items_number] def item_title(self, item): - return item.safe_translation_getter('title') + return mark_safe(item.safe_translation_getter('title')) def item_description(self, item): if item.app_config.use_abstract: - return item.safe_translation_getter('abstract') - return item.safe_translation_getter('post_text') + return mark_safe(item.safe_translation_getter('abstract')) + return mark_safe(item.safe_translation_getter('post_text')) + + def item_updateddate(self, item): + return item.date_modified + + def item_pubdate(self, item): + return item.date_published + + def item_guid(self, item): + return item.guid + + def item_author_name(self, item): + return item.get_author_name() + + def item_author_url(self, item): + return item.get_author_url() class TagFeed(LatestEntriesFeed): @@ -67,27 +96,36 @@ class FBInstantFeed(Rss201rev2Feed): } def add_root_elements(self, handler): - handler.addQuickElement("title", self.feed['title']) - handler.addQuickElement("link", self.feed['link']) - handler.addQuickElement("description", self.feed['description']) + handler.addQuickElement('title', self.feed['title']) + handler.addQuickElement('link', self.feed['link']) + handler.addQuickElement('description', self.feed['description']) if self.feed['language'] is not None: - handler.addQuickElement("language", self.feed['language']) + handler.addQuickElement('language', 'it-it')#self.feed['language']) for cat in self.feed['categories']: - handler.addQuickElement("category", cat) + handler.addQuickElement('category', cat) if self.feed['feed_copyright'] is not None: - handler.addQuickElement("copyright", self.feed['feed_copyright']) - handler.addQuickElement("lastBuildDate", rfc2822_date(self.latest_post_date())) + handler.addQuickElement('copyright', self.feed['feed_copyright']) + handler.addQuickElement('lastBuildDate', self.latest_post_date().isoformat()) if self.feed['ttl'] is not None: - handler.addQuickElement("ttl", self.feed['ttl']) + handler.addQuickElement('ttl', self.feed['ttl']) def add_item_elements(self, handler, item): super(FBInstantFeed, self).add_item_elements(handler, item) + if item['author']: + handler.addQuickElement('author', item['author']) + if item['date_pub'] is not None: + handler.addQuickElement("modDate", item['date'].isoformat()) + if item['date_mod'] is not None: + handler.addQuickElement("pubDate", item['date'].isoformat()) + handler.startElement('description', {}) + handler._write(''.format(h.unescape(force_text(item['abstract'])))) + handler.endElement('description') handler.startElement('content:encoded', {}) handler._write('') + handler._write(h.unescape(force_text(item['content']))) handler._write(']]>') handler.endElement('content:encoded') - handler.addQuickElement('guid', item['guid']) class FBInstantArticles(LatestEntriesFeed): @@ -95,10 +133,13 @@ class FBInstantArticles(LatestEntriesFeed): feed_items_number = get_setting('FEED_INSTANT_ITEMS') def _clean_html(self, content): - document = etree.iterparse(StringIO(content), html=True) + body = BytesIO(content) + document = etree.iterparse(body, html=True) for a, e in document: - if not (e.text and e.text.strip()) and len(e) == 0: + if not (e.text and e.text.strip()) and len(e) == 0 and e.tag == 'p': e.getparent().remove(e) + if e.tag in ('h3', 'h4', 'h5', 'h6') and 'op-kicker' not in e.attrib.get('class', ''): + e.tag = 'h2' return etree.tostring(document.root) def item_extra_kwargs(self, item): @@ -111,15 +152,29 @@ class FBInstantArticles(LatestEntriesFeed): view = PostDetailView.as_view(instant_article=True) response = view(self.request, slug=item.safe_translation_getter('slug')) response.render() - content = mark_safe(self._clean_html(response.content)) + content = self._clean_html(response.content) cache.set(key, content, timeout=get_setting('FEED_CACHE_TIMEOUT')) + if item.app_config.use_abstract: + abstract = strip_tags(item.safe_translation_getter('abstract')) + else: + abstract = strip_tags(item.safe_translation_getter('post_text')) return { + 'author': item.get_author_name(), 'content': content, - 'slug': item.safe_translation_getter('slug'), - 'guid': item.guid, + 'date': item.date_modified, + 'date_pub': item.date_modified, + 'date_mod': item.date_modified, + 'abstract': abstract } + def item_author_name(self, item): + return '' + + def item_author_url(self, item): + return '' + def item_description(self, item): - if item.app_config.use_abstract: - return strip_tags(item.safe_translation_getter('abstract')) - return strip_tags(item.safe_translation_getter('post_text')) + return None + + def item_pubdate(self, item): + return None diff --git a/djangocms_blog/models.py b/djangocms_blog/models.py index bc0cad9..84e5f57 100644 --- a/djangocms_blog/models.py +++ b/djangocms_blog/models.py @@ -205,7 +205,7 @@ class Post(KnockerModel, ModelMeta, TranslatableModel): def guid(self, language=None): if not language: language = self.get_current_language() - base_string = '{0}-{1}-{2}'.format( + base_string = '{0}{2}{1}'.format( language, self.app_config.namespace, self.safe_translation_getter('slug', language_code=language, any_language=True) ) diff --git a/djangocms_blog/templates/djangocms_blog/post_instant_article.html b/djangocms_blog/templates/djangocms_blog/post_instant_article.html index 5e3247c..b881b17 100644 --- a/djangocms_blog/templates/djangocms_blog/post_instant_article.html +++ b/djangocms_blog/templates/djangocms_blog/post_instant_article.html @@ -1,6 +1,6 @@ {% load thumbnail cms_tags %} - + {% block canonical_url %}{% endblock canonical_url %} @@ -16,19 +16,17 @@
- {% if og_author_url %}{% endif %} - {{ post.author }} - {% if og_author_url %}{% endif %} + {{ post.get_author_name }}
- {{ post.main_image.default_alt_text }} + {{ post.main_image.default_alt_text|default:'' }} {% if post.main_image.default_caption %}
{{ post.main_image.default_caption }}
{% endif %}

- {{ post.abstract|striptags }} + {{ post.abstract|striptags|safe }}

From 517874892a8b47ed389230c542de801e7e521e47 Mon Sep 17 00:00:00 2001 From: Iacopo Spalletti Date: Sun, 1 May 2016 21:38:54 +0200 Subject: [PATCH 3/4] Fix compatibility issues --- djangocms_blog/cms_menus.py | 6 +++++- djangocms_blog/feeds.py | 9 ++++----- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/djangocms_blog/cms_menus.py b/djangocms_blog/cms_menus.py index 8e0e247..e28873e 100644 --- a/djangocms_blog/cms_menus.py +++ b/djangocms_blog/cms_menus.py @@ -3,7 +3,6 @@ from __future__ import absolute_import, print_function, unicode_literals from cms.apphook_pool import apphook_pool from cms.menu_bases import CMSAttachMenu -from django.contrib.sites.shortcuts import get_current_site from django.core.urlresolvers import resolve from django.db.models.signals import post_delete, post_save from django.utils.translation import get_language_from_request, ugettext_lazy as _ @@ -14,6 +13,11 @@ from .cms_appconfig import BlogConfig from .models import BlogCategory, Post from .settings import MENU_TYPE_CATEGORIES, MENU_TYPE_COMPLETE, MENU_TYPE_POSTS, get_setting +try: + from django.contrib.sites.shortcuts import get_current_site +except ImportError: + from django.contrib.sites.models import get_current_site + class BlogCategoryMenu(CMSAttachMenu): """ diff --git a/djangocms_blog/feeds.py b/djangocms_blog/feeds.py index 52f31e9..d27b573 100644 --- a/djangocms_blog/feeds.py +++ b/djangocms_blog/feeds.py @@ -19,15 +19,14 @@ from djangocms_blog.views import PostDetailView from .models import Post - try: import HTMLParser h = HTMLParser.HTMLParser() except ImportError: - import html.parser + from html.parser import HTMLParser - h = html.parser() + h = HTMLParser() class LatestEntriesFeed(Feed): @@ -46,7 +45,7 @@ class LatestEntriesFeed(Feed): return Site.objects.get_current().name def description(self): - return _('Blog articles on {site_name}').format(site_name=Site.objects.get_current().name) + return _('Blog articles on %(site_name)s') % {'site_name': Site.objects.get_current().name} def items(self, obj=None): return Post.objects.namespace( @@ -100,7 +99,7 @@ class FBInstantFeed(Rss201rev2Feed): handler.addQuickElement('link', self.feed['link']) handler.addQuickElement('description', self.feed['description']) if self.feed['language'] is not None: - handler.addQuickElement('language', 'it-it')#self.feed['language']) + handler.addQuickElement('language', 'it-it') for cat in self.feed['categories']: handler.addQuickElement('category', cat) if self.feed['feed_copyright'] is not None: From b1b6c91050c2032ed70e1ee5324d7539e735aa1d Mon Sep 17 00:00:00 2001 From: Iacopo Spalletti Date: Tue, 3 May 2016 07:33:04 +0200 Subject: [PATCH 4/4] Improve coverage --- djangocms_blog/feeds.py | 11 +++++++---- tests/test_views.py | 19 +++++++++++++++++++ 2 files changed, 26 insertions(+), 4 deletions(-) diff --git a/djangocms_blog/feeds.py b/djangocms_blog/feeds.py index d27b573..d0db90f 100644 --- a/djangocms_blog/feeds.py +++ b/djangocms_blog/feeds.py @@ -99,7 +99,7 @@ class FBInstantFeed(Rss201rev2Feed): handler.addQuickElement('link', self.feed['link']) handler.addQuickElement('description', self.feed['description']) if self.feed['language'] is not None: - handler.addQuickElement('language', 'it-it') + handler.addQuickElement('language', self.feed['language']) for cat in self.feed['categories']: handler.addQuickElement('category', cat) if self.feed['feed_copyright'] is not None: @@ -112,10 +112,10 @@ class FBInstantFeed(Rss201rev2Feed): super(FBInstantFeed, self).add_item_elements(handler, item) if item['author']: handler.addQuickElement('author', item['author']) - if item['date_pub'] is not None: - handler.addQuickElement("modDate", item['date'].isoformat()) if item['date_mod'] is not None: - handler.addQuickElement("pubDate", item['date'].isoformat()) + handler.addQuickElement('pubDate', item['date'].isoformat()) + if item['date_pub'] is not None: + handler.addQuickElement('modDate', item['date'].isoformat()) handler.startElement('description', {}) handler._write(''.format(h.unescape(force_text(item['abstract'])))) handler.endElement('description') @@ -166,6 +166,9 @@ class FBInstantArticles(LatestEntriesFeed): 'abstract': abstract } + def item_categories(self, item): + return [category.safe_translation_getter('name') for category in item.categories.all()] + def item_author_name(self, item): return '' diff --git a/tests/test_views.py b/tests/test_views.py index 79c8368..320cb1e 100644 --- a/tests/test_views.py +++ b/tests/test_views.py @@ -4,6 +4,7 @@ from __future__ import absolute_import, print_function, unicode_literals import os.path from aldryn_apphooks_config.utils import get_app_instance +from cms.api import add_plugin from cms.toolbar.items import ModalItem from django.contrib.auth.models import AnonymousUser from django.core.exceptions import ImproperlyConfigured @@ -315,9 +316,13 @@ class ViewTest(BaseTest): self.assertEqual(context['post_list'][0].title, 'First post') def test_feed(self): + self.user.first_name = 'Admin' + self.user.last_name = 'User' + self.user.save() posts = self.get_posts() pages = self.get_pages() posts[0].tags.add('tag 1', 'tag 2', 'tag 3', 'tag 4') + posts[0].author = self.user posts[0].save() posts[1].tags.add('tag 6', 'tag 2', 'tag 5', 'tag 8') posts[1].save() @@ -335,6 +340,7 @@ class ViewTest(BaseTest): xml = feed(request) self.assertContains(xml, posts[0].get_absolute_url()) self.assertContains(xml, 'Blog articles on example.com') + self.assertContains(xml, 'Admin User') with smart_override('it'): with switch_language(posts[0], 'it'): @@ -352,8 +358,18 @@ class ViewTest(BaseTest): self.assertEqual(list(feed.items('tag-2')), [posts[0]]) def test_instant_articles(self): + self.user.first_name = 'Admin' + self.user.last_name = 'User' + self.user.save() posts = self.get_posts() pages = self.get_pages() + posts[0].tags.add('tag 1', 'tag 2', 'tag 3', 'tag 4') + posts[0].categories.add(self.category_1) + posts[0].author = self.user + posts[0].save() + add_plugin( + posts[0].content, 'TextPlugin', language='en', body='

Ciao

Ciao

' + ) with smart_override('en'): with switch_language(posts[0], 'en'): @@ -373,6 +389,9 @@ class ViewTest(BaseTest): self.assertContains(xml, ''.format( posts[0].get_full_url() )) + # Assert text transformation + self.assertContains(xml, '

Ciao

Ciao

') + self.assertContains(xml, 'Admin User') def test_sitemap(self): posts = self.get_posts()