8

Elasticsearch搜索功能的实现(五)-- 实战 - gdwkong

 2 years ago
source link: https://www.cnblogs.com/gdwkong/p/17331639.html
Go to the source link to view the article. You can view the picture content, updated content and better typesetting reading experience. If the link is broken, please click the button below to view the snapshot at that time.
neoserver,ios ssh client

实战环境

elastic search 8.5.0 + kibna 8.5.0 + springboot 3.0.2 + spring data elasticsearch 5.0.2 + jdk 17

一、集成 spring data elasticsearch

1 添加依赖

<dependency>
    <groupId>org.springframework.boot</groupId>
    <artifactId>spring-boot-starter-data-elasticsearch</artifactId>
</dependency>

2 配置es连接

@Configuration
public class ElasticsearchConfig extends ElasticsearchConfiguration {

    @Override
    public ClientConfiguration clientConfiguration() {

            return ClientConfiguration.builder()
                    .connectedTo("127.0.0.1:9200")
                    .withBasicAuth("elastic", "********")
                    .build();

    }
}

3 配置打印DSL语句

# 日志配置
logging:
  level:
    #es日志
    org.springframework.data.elasticsearch.client.WIRE : trace

二、index及mapping 文件编写

@Data
@Document(indexName = "news") //索引名
@Setting(shards = 1,replicas = 0,refreshInterval = "1s") //shards 分片数 replicas 副本数
@Schema(name = "News",description = "新闻对象")
public class News implements Serializable {

    @Id  //索引主键
    @NotBlank(message = "新闻ID不能为空")
    @Schema(type = "integer",description = "新闻ID",example = "1")
    private Integer id;

    @NotBlank(message = "新闻标题不能为空")
    @Schema(type = "String",description = "新闻标题")
    @MultiField(mainField = @Field(type = FieldType.Text, analyzer = "ik_max_word", searchAnalyzer = "ik_smart"),
            otherFields = {@InnerField(type = FieldType.Keyword, suffix = "keyword") }) //混合类型字段 指定 建立索引时分词器与搜索时入参分词器
    private String title;

    @Schema(type = "LocalDate",description = "发布时间")
    @Field(type = FieldType.Date,format = DateFormat.date)
    private LocalDate pubDate;

    @Schema(type = "String",description = "来源")
    @Field(type = FieldType.Keyword)
    private String source;

    @Schema(type = "String",description = "行业类型代码",example = "1,2,3")
    @Field(type = FieldType.Text,analyzer = "ik_max_word",searchAnalyzer = "ik_smart")
    private String industry;

    @Schema(type = "String",description = "预警类型")
    @Field(type = FieldType.Keyword)
    private String type;

    @Schema(type = "String",description = "涉及公司")
    @Field(type = FieldType.Text, analyzer = "ik_max_word", searchAnalyzer = "ik_smart")
    private String companies;

    @Schema(type = "String",description = "新闻内容")
    @Field(type = FieldType.Text, analyzer = "ik_max_word", searchAnalyzer = "ik_smart")
    private String content;

}

三、DAO层编写

@Repository
public interface NewsRepository extends ElasticsearchRepository<News,Integer> {

    Page<News> findByType(String type, Pageable pageable);
}

四、简单功能实现

4.1 简单功能写法

    /**
     * 新增新闻
     * @param news
     * @return
     */
    @Override
    public void saveNews(News news) {
        newsRepository.save(news);
    }

    /**
     * 删除新闻
     * @param newsId
     */
    @Override
    public void delete(Integer newsId) {
        newsRepository.deleteById(newsId);
    }

    /**
     * 删除新闻索引
     */
    @Override
    public void deleteIndex() {
        operations.indexOps(News.class).delete();
    }

    /**
     * 创建索引
     */
    @Override
    public void createIndex() {
        operations.indexOps(News.class).createWithMapping();
    }

    @Override
    public PageResult findByType(String type) {
        // 先发布日期排序
        Sort sort = Sort.by(new Order(Sort.Direction.DESC, "pubDate"));
        Pageable pageable = PageRequest.of(0,10,sort);
        final Page<News> newsPage = newsRepository.findByType(type, pageable);
        return new PageResult(newsPage.getTotalElements(),newsPage.getContent());

    }

实现效果图片:

image

实际执行的DSL语句:

image

注意: 当指定排序条件时 _score 会被置空

4.2 搜索功能的实现

    @Override
    public PageResult searchNews(NewsPageSearch search) {

        //创建原生查询DSL对象
        final NativeQueryBuilder nativeQueryBuilder = new NativeQueryBuilder();

        // 先发布日期再得分排序
        Sort sort = Sort.by(new Order(Sort.Direction.DESC, "pubDate"),new Order(Sort.Direction.DESC, "_score"));

        Pageable pageable = PageRequest.of(search.getCurPage(), search.getPageSize(),sort);


        final BoolQuery.Builder boolBuilder = new BoolQuery.Builder();
        //过滤条件
        setFilter(search, boolBuilder);

        //关键字搜索
        if (StringUtils.isNotBlank(search.getKeyword())){
            setKeyWordAndHighlightField(search, nativeQueryBuilder, boolBuilder);
        }else {
            nativeQueryBuilder.withQuery(q -> q.bool(boolBuilder.build()));
        }

        nativeQueryBuilder.withPageable(pageable);

        SearchHits<News> searchHits = operations.search(nativeQueryBuilder.build(), News.class);
        //高亮回填封装
        final List<News> newsList = searchHits.getSearchHits().stream()
                .map(s -> {
                    final News content = s.getContent();
                    final List<String> title = s.getHighlightFields().get("title");
                    final List<String> contentList = s.getHighlightFields().get("content");
                    if (!CollectionUtils.isEmpty(title)){
                        s.getContent().setTitle(title.get(0));
                    }
                    if (!CollectionUtils.isEmpty(contentList)){
                        s.getContent().setContent(contentList.get(0));
                    }
                    return content;

                }).collect(Collectors.toList());

        return new PageResult<News>(searchHits.getTotalHits(),newsList);

    }

    /**
     * 设置过滤条件 行业类型 来源 预警类型
     * @param search
     * @param boolBuilder
     */
    private void setFilter(NewsPageSearch search, BoolQuery.Builder boolBuilder) {
        //行业类型
        if(StringUtils.isNotBlank(search.getIndustry())){
            // 按逗号拆分
            List<Query> industryQueries = Arrays.asList(search.getIndustry().split(",")).stream().map(p -> {
                Query.Builder queryBuilder = new Query.Builder();
                queryBuilder.term(t -> t.field("industry").value(p));
                return queryBuilder.build();
            }).collect(Collectors.toList());
            boolBuilder.filter(f -> f.bool(t -> t.should(industryQueries)));
        }
        // 来源
        if(StringUtils.isNotBlank(search.getSource())){
            // 按逗号拆分
            List<Query> sourceQueries = Arrays.asList(search.getSource().split(",")).stream().map(p -> {
                Query.Builder queryBuilder = new Query.Builder();
                queryBuilder.term(t -> t.field("source").value(p));
                return queryBuilder.build();
            }).collect(Collectors.toList());
            boolBuilder.filter(f -> f.bool(t -> t.should(sourceQueries)));
        }
        // 预警类型
        if(StringUtils.isNotBlank(search.getType())){
            // 按逗号拆分
            List<Query> typeQueries = Arrays.asList(search.getType().split(",")).stream().map(p -> {
                Query.Builder queryBuilder = new Query.Builder();
                queryBuilder.term(t -> t.field("type").value(p));
                return queryBuilder.build();
            }).collect(Collectors.toList());
            boolBuilder.filter(f -> f.bool(t -> t.should(typeQueries)));
        }

        //范围区间
        if (StringUtils.isNotBlank(search.getStartDate())){
            boolBuilder.filter(f -> f.range(r -> r.field("pubDate")
                    .gte(JsonData.of(search.getStartDate()))
                    .lte(JsonData.of(search.getEndDate()))));
        }
    }

    /**
     * 关键字搜索 title 权重更高
     * 高亮字段  title 、content
     * @param search
     * @param nativeQueryBuilder
     * @param boolBuilder
     */
    private void setKeyWordAndHighlightField(NewsPageSearch search, NativeQueryBuilder nativeQueryBuilder, BoolQuery.Builder boolBuilder) {
        final String keyword = search.getKeyword();
        //查询条件
        boolBuilder.must(b -> b.multiMatch(m -> m.fields("title","content","companies").query(keyword)));

        //高亮
        final HighlightFieldParameters.HighlightFieldParametersBuilder builder = HighlightFieldParameters.builder();
        builder.withPreTags("<font color='red'>")
                .withPostTags("</font>")
                .withRequireFieldMatch(true) //匹配才加标签
                .withNumberOfFragments(0); //显示全文
        final HighlightField titleHighlightField = new HighlightField("title", builder.build());
        final HighlightField contentHighlightField = new HighlightField("content", builder.build());
        final Highlight titleHighlight = new Highlight(List.of(titleHighlightField,contentHighlightField));

        nativeQueryBuilder.withQuery(
                        f -> f.functionScore(
                                fs -> fs.query(q -> q.bool(boolBuilder.build()))
                                        .functions( FunctionScore.of(func -> func.filter(
                                                        fq -> fq.match(ft -> ft.field("title").query(keyword))).weight(100.0)),
                                                FunctionScore.of(func -> func.filter(
                                                        fq -> fq.match(ft -> ft.field("content").query(keyword))).weight(20.0)),
                                                FunctionScore.of(func -> func.filter(
                                                        fq -> fq.match(ft -> ft.field("companies").query(keyword))).weight(10.0)))
                                        .scoreMode(FunctionScoreMode.Sum)
                                        .boostMode(FunctionBoostMode.Sum)
                                        .minScore(1.0)))
                .withHighlightQuery(new HighlightQuery(titleHighlight,News.class));

    }

加权前效果:

image

加权后效果:

image

DSL 语句:

{
	"from": 0,
	"size": 6,
	"sort": [{
		"pubDate": {
			"mode": "min",
			"order": "desc"
		}
	}, {
		"_score": {
			"order": "desc"
		}
	}],
	"highlight": {
		"fields": {
			"title": {
				"number_of_fragments": 0,
				"post_tags": ["</font>"],
				"pre_tags": ["<font color='red'>"]
			},
			"content": {
				"number_of_fragments": 0,
				"post_tags": ["</font>"],
				"pre_tags": ["<font color='red'>"]
			}
		}
	},
	"query": {
		"function_score": {
			"boost_mode": "sum",
			"functions": [{
				"filter": {
					"match": {
						"title": {
							"query": "立足优势稳住外贸基本盘"
						}
					}
				},
				"weight": 100.0
			}, {
				"filter": {
					"match": {
						"content": {
							"query": "立足优势稳住外贸基本盘"
						}
					}
				},
				"weight": 20.0
			}, {
				"filter": {
					"match": {
						"companies": {
							"query": "立足优势稳住外贸基本盘"
						}
					}
				},
				"weight": 10.0
			}],
			"min_score": 1.0,
			"query": {
				"bool": {
					"filter": [{
						"bool": {
							"should": [{
								"term": {
									"industry": {
										"value": "1"
									}
								}
							}, {
								"term": {
									"industry": {
										"value": "2"
									}
								}
							}, {
								"term": {
									"industry": {
										"value": "3"
									}
								}
							}]
						}
					}, {
						"bool": {
							"should": [{
								"term": {
									"source": {
										"value": "新华社"
									}
								}
							}, {
								"term": {
									"source": {
										"value": "中国经济网"
									}
								}
							}]
						}
					}, {
						"bool": {
							"should": [{
								"term": {
									"type": {
										"value": "经济简报"
									}
								}
							}, {
								"term": {
									"type": {
										"value": "外贸简报"
									}
								}
							}]
						}
					}, {
						"range": {
							"pubDate": {
								"gte": "2023-03-29",
								"lte": "2023-03-30"
							}
						}
					}],
					"must": [{
						"multi_match": {
							"fields": ["title", "content", "companies"],
							"query": "立足优势稳住外贸基本盘"
						}
					}]
				}
			},
			"score_mode": "sum"
		}
	},
	"track_scores": false,
	"version": true
}

4.3 接口测试

image

Recommend

About Joyk


Aggregate valuable and interesting links.
Joyk means Joy of geeK