搜索多个单词elasticsearch haystack

我曾经使用过django,haystack和elasticsearch。

我的search_index.py:

from haystack import indexes

from models import Advertisement

class AdvertisementIndex(indexes.SearchIndex, indexes.Indexable):

text = indexes.CharField(document=True, use_template=True)

make = indexes.CharField()

section = indexes.CharField()

subcategory = indexes.CharField()

content = indexes.CharField(model_attr='content')

images = indexes.CharField(model_attr='images')

def get_model(self):

return Advertisement

def index_queryset(self, using=None):

return self.get_model().objects.filter(is_published=True).select_related('make').select_related('section').select_related('subcategory')

搜索表格:

    <form action="/search" method="get">

<input type="text-search" name="q">

<input type="submit" value="">

</form>

模板:

{% block content %}

{% for result in page.object_list %}

<p>{{ result.object.title }}</p>

<p>{{ result.object.content }}</p>

<p>{{ result.object.images }}</p>

<p>{{ result.object.make }}</p>

<p>{{ result.object.section }}</p>

<p>{{ result.object.subcategory }}</p>

{% empty %}

<p>No result.</p>

{% endfor %}

{% endblock %}

`curl -XGET "http://localhost:9200/_search?q=fender+boss" `

我得到所有的价值,那里有“老板”和“挡泥板”

当您在搜索框中输入“ boss fender”时,我没有结果。从搜索表单中,我只能得到一个单词的结果,例如“老板”。如何使搜索多个单词的能力?

回答:

这个月我陷入了这个问题。

为了执行正确的查询,您将需要覆盖一些干草堆对象。我发现这篇文章对扩展Haystack的Elasticsearch后端很有帮助。刚开始时非常复杂,但是一旦了解了它的工作原理… :-)

博客文章介绍了如何实现elasticsearch的嵌套查询…好吧…我已经实现了基本的multi_match查询。

# -*- coding: utf-8 -*-

from __future__ import absolute_import

from django.conf import settings

from haystack.backends.elasticsearch_backend import (

ElasticsearchSearchBackend, ElasticsearchSearchEngine, ElasticsearchSearchQuery)

from haystack.query import SearchQuerySet

class ElasticsearchEngineBackendCustom(ElasticsearchSearchBackend):

DEFAULT_ANALYZER = "snowball"

def __init__(self, connection_alias, **connection_options):

super(ElasticsearchEngineBackendCustom, self).__init__(connection_alias, **connection_options)

user_settings = getattr(settings, 'ELASTICSEARCH_INDEX_SETTINGS', {})

if user_settings:

setattr(self, 'DEFAULT_SETTINGS', user_settings)

user_analyzer = getattr(settings, 'ELASTICSEARCH_DEFAULT_ANALYZER', '')

if user_analyzer:

setattr(self, 'DEFAULT_ANALYZER', user_analyzer)

def build_search_kwargs(self, query_string, sort_by=None, start_offset=0, end_offset=None,

fields='', highlight=False, facets=None,

date_facets=None, query_facets=None,

narrow_queries=None, spelling_query=None,

within=None, dwithin=None, distance_point=None,

models=None, limit_to_registered_models=None,

result_class=None, multi_match=None):

out = super(ElasticsearchEngineBackendCustom, self).build_search_kwargs(query_string, sort_by, start_offset,

end_offset,

fields, highlight, facets,

date_facets, query_facets,

narrow_queries, spelling_query,

within, dwithin, distance_point,

models, limit_to_registered_models,

result_class)

if multi_match:

out['query'] = {

'multi_match': {

'query': multi_match['query'],

'fields': multi_match['fields'],

'tie_breaker': multi_match['tie_breaker'],

'minimum_should_match': multi_match['minimum_should_match'],

}

}

return out

def build_schema(self, fields):

content_field_name, mapping = super(ElasticsearchEngineBackendCustom, self).build_schema(fields)

for field_name, field_class in fields.items():

field_mapping = mapping[field_class.index_fieldname]

if field_mapping['type'] == 'string' and field_class.indexed:

if not hasattr(field_class, 'facet_for') or field_class.field_type in ('ngram', 'edge_ngram'):

field_mapping['analyzer'] = getattr(field_class, 'analyzer', self.DEFAULT_ANALYZER)

mapping.update({field_class.index_fieldname: field_mapping})

return content_field_name, mapping

def multi_match_run(self, query, fields, minimum_should_match, tie_breaker):

from elasticsearch_dsl import Search

from elasticsearch_dsl.query import MultiMatch

raw = Search().using(self.conn).query(

MultiMatch(query=u'{}'.format(query), fields=fields, minimum_should_match=minimum_should_match, tie_breaker=tie_breaker)

).execute()

return self._process_results(raw)

class ElasticsearchSearchQueryCustom(ElasticsearchSearchQuery):

def multi_match(self, query, fields, minimum_should_match, tie_breaker):

results = self.backend.multi_match_run(query, fields, minimum_should_match, tie_breaker)

self._results = results.get('results', [])

self._hit_count = results.get('hits', 0)

def add_multi_match_query(self, query, fields, minimum_should_match, tie_breaker):

self.multi_match_query = {

'query': query,

'fields': fields,

'minimum_should_match': minimum_should_match,

'tie_breaker': tie_breaker

}

def build_params(self, spelling_query=None, **kwargs):

search_kwargs = super(ElasticsearchSearchQueryCustom, self).build_params(spelling_query, **kwargs)

if self.multi_match_query:

search_kwargs['multi_match'] = self.multi_match_query

return search_kwargs

class ElasticsearchSearchQuerySetCustom(SearchQuerySet):

def multi_match(self, query, fields, minimum_should_match="35%", tie_breaker=0.3):

clone = self._clone()

clone.query.add_multi_match_query(query, fields, minimum_should_match, tie_breaker)

clone.query.multi_match(query, fields, minimum_should_match, tie_breaker)

return clone

class ElasticsearchEngineCustom(ElasticsearchSearchEngine):

backend = ElasticsearchEngineBackendCustom

query = ElasticsearchSearchQueryCustom

如您所见,我曾经elasticsearc-

dsl执行查询(MultiMatch),这句话概括了博客文章:ElasticsearchSearchQuerySetCustom().multi_match(...)调用取决于ElasticsearchSearchQueryCustom,取决于ElasticsearchEngineBackendCustom

然后在您的设置中放入elasticsearch配置,例如:

ELASTICSEARCH_DEFAULT_ANALYZER = 'italian'

ELASTICSEARCH_INDEX_SETTINGS = {

"settings": {[...]}

}

您可以ELASTICSEARCH_INDEX_SETTINGS从语言分析器中获取您的语言

您还需要覆盖SearchForm

# -*- coding: utf-8 -*-

from __future__ import absolute_import

from haystack.forms import SearchForm

from .backend import ElasticsearchSearchQuerySetCustom

class SearchFormCustom(SearchForm):

def search(self):

query = self.searchqueryset.query.clean(self.cleaned_data.get('q'))

if not self.is_valid() or not query:

return self.no_query_found()

sqs = ElasticsearchSearchQuerySetCustom().multi_match(query, ['title^8', 'text^0.5'])

return sqs

字段titletext必须在索引中,并且脱字符号用于对字段进行增强。

您需要覆盖haystack url模式才能使用自定义格式:

urlpatterns = patterns(

'search.views',

url('^$', search_view_factory(form_class=SearchFormCustom), name='haystack-search'),

)

就是这样,HTH :-)

不要使用,result.object.something而是使用索引上的字段,例如result.tilte,因为result.object.tilte打数据库!参见干草堆最佳实践

以上是 搜索多个单词elasticsearch haystack 的全部内容, 来源链接: utcz.com/qa/403073.html

回到顶部