Improve Instant Articles markup

This commit is contained in:
Iacopo Spalletti 2016-04-30 11:14:27 +02:00
parent 1118a8300b
commit c356551302
No known key found for this signature in database
GPG key ID: BDCBC2EB289F60C6
3 changed files with 87 additions and 34 deletions

View file

@ -1,19 +1,18 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import, print_function, unicode_literals
from django.utils.six import StringIO
from lxml import etree
from aldryn_apphooks_config.utils import get_app_instance
from django.contrib.sites.models import Site
from django.contrib.syndication.views import Feed
from django.core.cache import cache
from django.core.urlresolvers import reverse
from django.utils.encoding import force_text
from django.utils.feedgenerator import Rss201rev2Feed, rfc2822_date
from django.utils.feedgenerator import Rss201rev2Feed
from django.utils.html import strip_tags
from django.utils.safestring import mark_safe
from django.utils.six import BytesIO
from django.utils.translation import get_language_from_request, ugettext as _
from lxml import etree
from djangocms_blog.settings import get_setting
from djangocms_blog.views import PostDetailView
@ -21,6 +20,16 @@ from djangocms_blog.views import PostDetailView
from .models import Post
try:
import HTMLParser
h = HTMLParser.HTMLParser()
except ImportError:
import html.parser
h = html.parser()
class LatestEntriesFeed(Feed):
feed_type = Rss201rev2Feed
feed_items_number = get_setting('FEED_LATEST_ITEMS')
@ -34,18 +43,38 @@ class LatestEntriesFeed(Feed):
return reverse('%s:posts-latest' % self.namespace, current_app=self.namespace)
def title(self):
return _('Blog articles on %(site_name)s') % {'site_name': Site.objects.get_current().name}
return Site.objects.get_current().name
def description(self):
return _('Blog articles on {site_name}').format(site_name=Site.objects.get_current().name)
def items(self, obj=None):
return Post.objects.namespace(self.namespace).published().order_by('-date_published')[:self.feed_items_number]
return Post.objects.namespace(
self.namespace
).published().order_by('-date_published')[:self.feed_items_number]
def item_title(self, item):
return item.safe_translation_getter('title')
return mark_safe(item.safe_translation_getter('title'))
def item_description(self, item):
if item.app_config.use_abstract:
return item.safe_translation_getter('abstract')
return item.safe_translation_getter('post_text')
return mark_safe(item.safe_translation_getter('abstract'))
return mark_safe(item.safe_translation_getter('post_text'))
def item_updateddate(self, item):
return item.date_modified
def item_pubdate(self, item):
return item.date_published
def item_guid(self, item):
return item.guid
def item_author_name(self, item):
return item.get_author_name()
def item_author_url(self, item):
return item.get_author_url()
class TagFeed(LatestEntriesFeed):
@ -67,27 +96,36 @@ class FBInstantFeed(Rss201rev2Feed):
}
def add_root_elements(self, handler):
handler.addQuickElement("title", self.feed['title'])
handler.addQuickElement("link", self.feed['link'])
handler.addQuickElement("description", self.feed['description'])
handler.addQuickElement('title', self.feed['title'])
handler.addQuickElement('link', self.feed['link'])
handler.addQuickElement('description', self.feed['description'])
if self.feed['language'] is not None:
handler.addQuickElement("language", self.feed['language'])
handler.addQuickElement('language', 'it-it')#self.feed['language'])
for cat in self.feed['categories']:
handler.addQuickElement("category", cat)
handler.addQuickElement('category', cat)
if self.feed['feed_copyright'] is not None:
handler.addQuickElement("copyright", self.feed['feed_copyright'])
handler.addQuickElement("lastBuildDate", rfc2822_date(self.latest_post_date()))
handler.addQuickElement('copyright', self.feed['feed_copyright'])
handler.addQuickElement('lastBuildDate', self.latest_post_date().isoformat())
if self.feed['ttl'] is not None:
handler.addQuickElement("ttl", self.feed['ttl'])
handler.addQuickElement('ttl', self.feed['ttl'])
def add_item_elements(self, handler, item):
super(FBInstantFeed, self).add_item_elements(handler, item)
if item['author']:
handler.addQuickElement('author', item['author'])
if item['date_pub'] is not None:
handler.addQuickElement("modDate", item['date'].isoformat())
if item['date_mod'] is not None:
handler.addQuickElement("pubDate", item['date'].isoformat())
handler.startElement('description', {})
handler._write('<![CDATA[{0}]]>'.format(h.unescape(force_text(item['abstract']))))
handler.endElement('description')
handler.startElement('content:encoded', {})
handler._write('<![CDATA[')
handler._write(force_text(item['content']))
handler._write('<!doctype html>')
handler._write(h.unescape(force_text(item['content'])))
handler._write(']]>')
handler.endElement('content:encoded')
handler.addQuickElement('guid', item['guid'])
class FBInstantArticles(LatestEntriesFeed):
@ -95,10 +133,13 @@ class FBInstantArticles(LatestEntriesFeed):
feed_items_number = get_setting('FEED_INSTANT_ITEMS')
def _clean_html(self, content):
document = etree.iterparse(StringIO(content), html=True)
body = BytesIO(content)
document = etree.iterparse(body, html=True)
for a, e in document:
if not (e.text and e.text.strip()) and len(e) == 0:
if not (e.text and e.text.strip()) and len(e) == 0 and e.tag == 'p':
e.getparent().remove(e)
if e.tag in ('h3', 'h4', 'h5', 'h6') and 'op-kicker' not in e.attrib.get('class', ''):
e.tag = 'h2'
return etree.tostring(document.root)
def item_extra_kwargs(self, item):
@ -111,15 +152,29 @@ class FBInstantArticles(LatestEntriesFeed):
view = PostDetailView.as_view(instant_article=True)
response = view(self.request, slug=item.safe_translation_getter('slug'))
response.render()
content = mark_safe(self._clean_html(response.content))
content = self._clean_html(response.content)
cache.set(key, content, timeout=get_setting('FEED_CACHE_TIMEOUT'))
if item.app_config.use_abstract:
abstract = strip_tags(item.safe_translation_getter('abstract'))
else:
abstract = strip_tags(item.safe_translation_getter('post_text'))
return {
'author': item.get_author_name(),
'content': content,
'slug': item.safe_translation_getter('slug'),
'guid': item.guid,
'date': item.date_modified,
'date_pub': item.date_modified,
'date_mod': item.date_modified,
'abstract': abstract
}
def item_author_name(self, item):
return ''
def item_author_url(self, item):
return ''
def item_description(self, item):
if item.app_config.use_abstract:
return strip_tags(item.safe_translation_getter('abstract'))
return strip_tags(item.safe_translation_getter('post_text'))
return None
def item_pubdate(self, item):
return None

View file

@ -205,7 +205,7 @@ class Post(KnockerModel, ModelMeta, TranslatableModel):
def guid(self, language=None):
if not language:
language = self.get_current_language()
base_string = '{0}-{1}-{2}'.format(
base_string = '{0}{2}{1}'.format(
language, self.app_config.namespace,
self.safe_translation_getter('slug', language_code=language, any_language=True)
)

View file

@ -1,6 +1,6 @@
{% load thumbnail cms_tags %}
<!doctype html>
<html lang="en" prefix="op: http://media.facebook.com/op#">
<html lang="{{ post.get_current_language }}" prefix="op: http://media.facebook.com/op#">
<head>
<meta charset="utf-8">
{% block canonical_url %}<link rel="canonical" href="{{ meta.url }}"/>{% endblock canonical_url %}
@ -16,19 +16,17 @@
<time class="op-modified" dateTime="{{ post.date_modified.isoformat }}">{{ post.date_modified|date:"DATE_FORMAT" }}</time>
<address>
{% if og_author_url %}<a rel="facebook" href="{{ og_author_url }}">{% endif %}
{{ post.author }}
{% if og_author_url %}</a>{% endif %}
<a {% if og_author_url %}rel="facebook" href="{{ og_author_url }}"{% endif %}>{{ post.get_author_name }}</a>
</address>
<figure>
<img src="{{ meta.image }}" alt="{{ post.main_image.default_alt_text }}" />
<img src="{{ meta.image }}" alt="{{ post.main_image.default_alt_text|default:'' }}" />
{% if post.main_image.default_caption %}
<figcaption>{{ post.main_image.default_caption }}</figcaption>{% endif %}
</figure>
<h3 class="op-kicker">
{{ post.abstract|striptags }}
{{ post.abstract|striptags|safe }}
</h3>
</header>