Improve Instant Articles markup

This commit is contained in:
Iacopo Spalletti 2016-04-30 11:14:27 +02:00
commit c356551302
No known key found for this signature in database
GPG key ID: BDCBC2EB289F60C6
3 changed files with 87 additions and 34 deletions

View file

@ -1,19 +1,18 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from __future__ import absolute_import, print_function, unicode_literals from __future__ import absolute_import, print_function, unicode_literals
from django.utils.six import StringIO
from lxml import etree
from aldryn_apphooks_config.utils import get_app_instance from aldryn_apphooks_config.utils import get_app_instance
from django.contrib.sites.models import Site from django.contrib.sites.models import Site
from django.contrib.syndication.views import Feed from django.contrib.syndication.views import Feed
from django.core.cache import cache from django.core.cache import cache
from django.core.urlresolvers import reverse from django.core.urlresolvers import reverse
from django.utils.encoding import force_text from django.utils.encoding import force_text
from django.utils.feedgenerator import Rss201rev2Feed, rfc2822_date from django.utils.feedgenerator import Rss201rev2Feed
from django.utils.html import strip_tags from django.utils.html import strip_tags
from django.utils.safestring import mark_safe from django.utils.safestring import mark_safe
from django.utils.six import BytesIO
from django.utils.translation import get_language_from_request, ugettext as _ from django.utils.translation import get_language_from_request, ugettext as _
from lxml import etree
from djangocms_blog.settings import get_setting from djangocms_blog.settings import get_setting
from djangocms_blog.views import PostDetailView from djangocms_blog.views import PostDetailView
@ -21,6 +20,16 @@ from djangocms_blog.views import PostDetailView
from .models import Post from .models import Post
try:
import HTMLParser
h = HTMLParser.HTMLParser()
except ImportError:
import html.parser
h = html.parser()
class LatestEntriesFeed(Feed): class LatestEntriesFeed(Feed):
feed_type = Rss201rev2Feed feed_type = Rss201rev2Feed
feed_items_number = get_setting('FEED_LATEST_ITEMS') feed_items_number = get_setting('FEED_LATEST_ITEMS')
@ -34,18 +43,38 @@ class LatestEntriesFeed(Feed):
return reverse('%s:posts-latest' % self.namespace, current_app=self.namespace) return reverse('%s:posts-latest' % self.namespace, current_app=self.namespace)
def title(self): def title(self):
return _('Blog articles on %(site_name)s') % {'site_name': Site.objects.get_current().name} return Site.objects.get_current().name
def description(self):
return _('Blog articles on {site_name}').format(site_name=Site.objects.get_current().name)
def items(self, obj=None): def items(self, obj=None):
return Post.objects.namespace(self.namespace).published().order_by('-date_published')[:self.feed_items_number] return Post.objects.namespace(
self.namespace
).published().order_by('-date_published')[:self.feed_items_number]
def item_title(self, item): def item_title(self, item):
return item.safe_translation_getter('title') return mark_safe(item.safe_translation_getter('title'))
def item_description(self, item): def item_description(self, item):
if item.app_config.use_abstract: if item.app_config.use_abstract:
return item.safe_translation_getter('abstract') return mark_safe(item.safe_translation_getter('abstract'))
return item.safe_translation_getter('post_text') return mark_safe(item.safe_translation_getter('post_text'))
def item_updateddate(self, item):
return item.date_modified
def item_pubdate(self, item):
return item.date_published
def item_guid(self, item):
return item.guid
def item_author_name(self, item):
return item.get_author_name()
def item_author_url(self, item):
return item.get_author_url()
class TagFeed(LatestEntriesFeed): class TagFeed(LatestEntriesFeed):
@ -67,27 +96,36 @@ class FBInstantFeed(Rss201rev2Feed):
} }
def add_root_elements(self, handler): def add_root_elements(self, handler):
handler.addQuickElement("title", self.feed['title']) handler.addQuickElement('title', self.feed['title'])
handler.addQuickElement("link", self.feed['link']) handler.addQuickElement('link', self.feed['link'])
handler.addQuickElement("description", self.feed['description']) handler.addQuickElement('description', self.feed['description'])
if self.feed['language'] is not None: if self.feed['language'] is not None:
handler.addQuickElement("language", self.feed['language']) handler.addQuickElement('language', 'it-it')#self.feed['language'])
for cat in self.feed['categories']: for cat in self.feed['categories']:
handler.addQuickElement("category", cat) handler.addQuickElement('category', cat)
if self.feed['feed_copyright'] is not None: if self.feed['feed_copyright'] is not None:
handler.addQuickElement("copyright", self.feed['feed_copyright']) handler.addQuickElement('copyright', self.feed['feed_copyright'])
handler.addQuickElement("lastBuildDate", rfc2822_date(self.latest_post_date())) handler.addQuickElement('lastBuildDate', self.latest_post_date().isoformat())
if self.feed['ttl'] is not None: if self.feed['ttl'] is not None:
handler.addQuickElement("ttl", self.feed['ttl']) handler.addQuickElement('ttl', self.feed['ttl'])
def add_item_elements(self, handler, item): def add_item_elements(self, handler, item):
super(FBInstantFeed, self).add_item_elements(handler, item) super(FBInstantFeed, self).add_item_elements(handler, item)
if item['author']:
handler.addQuickElement('author', item['author'])
if item['date_pub'] is not None:
handler.addQuickElement("modDate", item['date'].isoformat())
if item['date_mod'] is not None:
handler.addQuickElement("pubDate", item['date'].isoformat())
handler.startElement('description', {})
handler._write('<![CDATA[{0}]]>'.format(h.unescape(force_text(item['abstract']))))
handler.endElement('description')
handler.startElement('content:encoded', {}) handler.startElement('content:encoded', {})
handler._write('<![CDATA[') handler._write('<![CDATA[')
handler._write(force_text(item['content'])) handler._write('<!doctype html>')
handler._write(h.unescape(force_text(item['content'])))
handler._write(']]>') handler._write(']]>')
handler.endElement('content:encoded') handler.endElement('content:encoded')
handler.addQuickElement('guid', item['guid'])
class FBInstantArticles(LatestEntriesFeed): class FBInstantArticles(LatestEntriesFeed):
@ -95,10 +133,13 @@ class FBInstantArticles(LatestEntriesFeed):
feed_items_number = get_setting('FEED_INSTANT_ITEMS') feed_items_number = get_setting('FEED_INSTANT_ITEMS')
def _clean_html(self, content): def _clean_html(self, content):
document = etree.iterparse(StringIO(content), html=True) body = BytesIO(content)
document = etree.iterparse(body, html=True)
for a, e in document: for a, e in document:
if not (e.text and e.text.strip()) and len(e) == 0: if not (e.text and e.text.strip()) and len(e) == 0 and e.tag == 'p':
e.getparent().remove(e) e.getparent().remove(e)
if e.tag in ('h3', 'h4', 'h5', 'h6') and 'op-kicker' not in e.attrib.get('class', ''):
e.tag = 'h2'
return etree.tostring(document.root) return etree.tostring(document.root)
def item_extra_kwargs(self, item): def item_extra_kwargs(self, item):
@ -111,15 +152,29 @@ class FBInstantArticles(LatestEntriesFeed):
view = PostDetailView.as_view(instant_article=True) view = PostDetailView.as_view(instant_article=True)
response = view(self.request, slug=item.safe_translation_getter('slug')) response = view(self.request, slug=item.safe_translation_getter('slug'))
response.render() response.render()
content = mark_safe(self._clean_html(response.content)) content = self._clean_html(response.content)
cache.set(key, content, timeout=get_setting('FEED_CACHE_TIMEOUT')) cache.set(key, content, timeout=get_setting('FEED_CACHE_TIMEOUT'))
if item.app_config.use_abstract:
abstract = strip_tags(item.safe_translation_getter('abstract'))
else:
abstract = strip_tags(item.safe_translation_getter('post_text'))
return { return {
'author': item.get_author_name(),
'content': content, 'content': content,
'slug': item.safe_translation_getter('slug'), 'date': item.date_modified,
'guid': item.guid, 'date_pub': item.date_modified,
'date_mod': item.date_modified,
'abstract': abstract
} }
def item_author_name(self, item):
return ''
def item_author_url(self, item):
return ''
def item_description(self, item): def item_description(self, item):
if item.app_config.use_abstract: return None
return strip_tags(item.safe_translation_getter('abstract'))
return strip_tags(item.safe_translation_getter('post_text')) def item_pubdate(self, item):
return None

View file

@ -205,7 +205,7 @@ class Post(KnockerModel, ModelMeta, TranslatableModel):
def guid(self, language=None): def guid(self, language=None):
if not language: if not language:
language = self.get_current_language() language = self.get_current_language()
base_string = '{0}-{1}-{2}'.format( base_string = '{0}{2}{1}'.format(
language, self.app_config.namespace, language, self.app_config.namespace,
self.safe_translation_getter('slug', language_code=language, any_language=True) self.safe_translation_getter('slug', language_code=language, any_language=True)
) )

View file

@ -1,6 +1,6 @@
{% load thumbnail cms_tags %} {% load thumbnail cms_tags %}
<!doctype html> <!doctype html>
<html lang="en" prefix="op: http://media.facebook.com/op#"> <html lang="{{ post.get_current_language }}" prefix="op: http://media.facebook.com/op#">
<head> <head>
<meta charset="utf-8"> <meta charset="utf-8">
{% block canonical_url %}<link rel="canonical" href="{{ meta.url }}"/>{% endblock canonical_url %} {% block canonical_url %}<link rel="canonical" href="{{ meta.url }}"/>{% endblock canonical_url %}
@ -16,19 +16,17 @@
<time class="op-modified" dateTime="{{ post.date_modified.isoformat }}">{{ post.date_modified|date:"DATE_FORMAT" }}</time> <time class="op-modified" dateTime="{{ post.date_modified.isoformat }}">{{ post.date_modified|date:"DATE_FORMAT" }}</time>
<address> <address>
{% if og_author_url %}<a rel="facebook" href="{{ og_author_url }}">{% endif %} <a {% if og_author_url %}rel="facebook" href="{{ og_author_url }}"{% endif %}>{{ post.get_author_name }}</a>
{{ post.author }}
{% if og_author_url %}</a>{% endif %}
</address> </address>
<figure> <figure>
<img src="{{ meta.image }}" alt="{{ post.main_image.default_alt_text }}" /> <img src="{{ meta.image }}" alt="{{ post.main_image.default_alt_text|default:'' }}" />
{% if post.main_image.default_caption %} {% if post.main_image.default_caption %}
<figcaption>{{ post.main_image.default_caption }}</figcaption>{% endif %} <figcaption>{{ post.main_image.default_caption }}</figcaption>{% endif %}
</figure> </figure>
<h3 class="op-kicker"> <h3 class="op-kicker">
{{ post.abstract|striptags }} {{ post.abstract|striptags|safe }}
</h3> </h3>
</header> </header>