Merge pull request #205 from skirsdeda/fix_search_indexes

fix Post.get_keywords to handle empty keywords, cleanup search_indexes
This commit is contained in:
Iacopo Spalletti 2016-02-23 21:36:57 +01:00
commit 1941cc59e3
3 changed files with 31 additions and 47 deletions

View file

@ -232,7 +232,7 @@ class Post(ModelMeta, TranslatableModel):
return title.strip() return title.strip()
def get_keywords(self): def get_keywords(self):
return self.safe_translation_getter('meta_keywords').strip().split(',') return self.safe_translation_getter('meta_keywords', default='').strip().split(',')
def get_locale(self): def get_locale(self):
return self.get_current_language() return self.get_current_language()

View file

@ -15,20 +15,17 @@ class PostIndex(get_index_base()):
author = indexes.CharField(indexed=True, model_attr='get_author') author = indexes.CharField(indexed=True, model_attr='get_author')
keywords = indexes.CharField(null=True) keywords = indexes.CharField(null=True)
tags = indexes.CharField(null=True) tags = indexes.CharField(null=True, model_attr='get_tags')
post_text = indexes.CharField(null=True) post_text = indexes.CharField(null=True)
def get_keywords(self, post):
return ','.join(post.get_keywords())
def get_title(self, post): def get_title(self, post):
return post.safe_translation_getter('title') return post.get_title()
def get_description(self, post): def get_description(self, post):
return post.get_description() return post.get_description()
def prepare_pub_date(self, post): def prepare_pub_date(self, post):
return post.date_published.strftime("%Y-%m-%d %H:%M:%S") return post.date_published
def index_queryset(self, using=None): def index_queryset(self, using=None):
self._get_backend(using) self._get_backend(using)
@ -47,32 +44,36 @@ class PostIndex(get_index_base()):
return Post return Post
def get_search_data(self, post, language, request): def get_search_data(self, post, language, request):
optional_attributes = [] description = post.get_description()
abstract = post.safe_translation_getter('abstract') abstract = strip_tags(post.safe_translation_getter('abstract', default=''))
text_bits = [post.get_title()] keywords = post.get_keywords()
text_bits.append(strip_tags(abstract))
text_bits.append(post.get_description()) text_bits = []
text_bits.append(' '.join(post.get_keywords())) if abstract:
text_bits.append(abstract)
if description:
text_bits.append(description)
if keywords:
text_bits.append(' '.join(keywords))
self.prepared_data['keywords'] = ','.join(keywords)
for category in post.categories.all(): for category in post.categories.all():
text_bits.append( text_bits.append(
force_text(category.safe_translation_getter('name'))) force_text(category.safe_translation_getter('name')))
for tag in post.tags.all(): for tag in post.tags.all():
text_bits.append(force_text(tag.name)) text_bits.append(force_text(tag.name))
if post.content:
if get_setting('USE_PLACEHOLDER'):
plugins = post.content.cmsplugin_set.filter(language=language) plugins = post.content.cmsplugin_set.filter(language=language)
content_bits = []
for base_plugin in plugins: for base_plugin in plugins:
content = get_plugin_index_data(base_plugin, request) content = get_plugin_index_data(base_plugin, request)
text_bits.append(' '.join(content)) content_bits.append(' '.join(content))
for attribute in optional_attributes: post_text = ' '.join(content_bits)
value = force_text(getattr(post, attribute)) else:
if value and value not in text_bits: post_text = post.safe_translation_getter('post_text')
text_bits.append(value) if post_text:
return ' '.join(text_bits) post_text = strip_tags(post_text)
self.prepared_data['post_text'] = post_text
text_bits.append(post_text)
def prepare_fields(self, post, language, request): return ' '.join(text_bits)
super(PostIndex, self).prepare_fields(post, language, request)
data = [self.prepared_data['text']]
self.prepared_data['keywords'] = ' '.join(post.get_keywords())
self.prepared_data['tags'] = ' '.join(post.get_tags())
self.prepared_data['post_text'] = ' '.join(post.safe_translation_getter('post_text'))
self.prepared_data['text'] = ' '.join(data)

View file

@ -11,7 +11,7 @@ from .base import BaseTest
class BlogIndexingTests(BaseTest): class BlogIndexingTests(BaseTest):
sample_text = ('First post First post first line This is the description keyword1 ' sample_text = ('First post first line This is the description keyword1 '
'keyword2 category 1 a tag test body') 'keyword2 category 1 a tag test body')
def setUp(self): def setUp(self):
@ -28,29 +28,12 @@ class BlogIndexingTests(BaseTest):
index.index_queryset(DEFAULT_ALIAS) # initialises index._backend_alias index.index_queryset(DEFAULT_ALIAS) # initialises index._backend_alias
indexed = index.prepare(post) indexed = index.prepare(post)
self.assertEqual(post.get_title(), indexed['title']) self.assertEqual(post.get_title(), indexed['title'])
self.assertEqual(post.get_description(), indexed['description']) self.assertEqual(post.get_description(), indexed['description'])
self.assertEqual(post.get_tags(), indexed['tags'])
self.assertEqual(self.sample_text, indexed['text']) self.assertEqual(self.sample_text, indexed['text'])
self.assertEqual(post.get_absolute_url(), indexed['url']) self.assertEqual(post.get_absolute_url(), indexed['url'])
#self.assertEqual(post.date_published.strftime("%Y-%m-%d %H:%M:%S"), indexed['pub_date']) self.assertEqual(post.date_published, indexed['pub_date'])
def test_blog_post_is_indexed_using_update_object(self):
"""This tests the indexing path way used by the RealTimeSignalProcessor"""
post = self._get_post(self._post_data[0]['en'])
post = self._get_post(self._post_data[0]['it'], post, 'it')
post.tags.add('a tag')
add_plugin(post.content, 'TextPlugin', language='en', body='test body')
index = self.get_post_index()
index.update_object(post, using=DEFAULT_ALIAS)
indexed = index.prepared_data
self.assertEqual(post.get_title(), indexed['title'])
self.assertEqual(post.get_description(), indexed['description'])
self.assertEqual(self.sample_text, indexed['text'])
self.assertEqual(post.get_absolute_url(), indexed['url'])
#self.assertEqual(post.date_published.strftime("%Y-%m-%d %H:%M:%S"), indexed['pub_date'])
def test_searchqueryset(self): def test_searchqueryset(self):
posts = self.get_posts() posts = self.get_posts()