postgres中文全文分词搜索实例

database

Dockerfile

# If you don‘t want to build it youself, you can try `docker pull killercai/postgres`.

FROM healthcheck/postgres:latest

# China debian mirror

RUN sed -i s@/deb.debian.org/@/mirrors.aliyun.com/@g /etc/apt/sources.list

RUN apt-get clean && apt-get update

RUN apt-get install -y wget git build-essential libpq-dev python-dev postgresql-server-dev-all

# SCWS (Simple Chinese Word Segmentation library)

RUN cd /tmp && wget -q -O - http://www.xunsearch.com/scws/down/scws-1.2.1.tar.bz2 | tar xjf - && cd scws-1.2.1 && ./configure && make install

# zhpaser (postgres plugin)

RUN cd /tmp && git clone https://github.com/amutu/zhparser.git && cd zhparser && make && make install

example.sql

-- 安装扩展

CREATE EXTENSION zhparser;

-- 中文分词配置

CREATE TEXT SEARCH CONFIGURATION chinese_parser (PARSER = zhparser);

ALTER TEXT SEARCH CONFIGURATION chinese_parser ADD MAPPING FOR n,v,a,i,e,l,j WITH simple;

-- create test table

CREATE TABLE text_search(

text_id SERIAL PRIMARY KEY,

text_content TEXT,

tsv_column tsvector

);

-- 创建gin_index以加快匹配速度

CREATE INDEX idx_gin_tsv ON test_search USING GIN(tsv_column);

-- 创建触发器以从目标同步tsvector列

CREATE TRIGGER sync_trigger

BEFORE INSERT OR UPDATE ON test_search FOR EACH ROW

EXECUTE PROCEDURE

tsvector_update_trigger(tsv_column, "public.chinese_parser", text_content);

-- 插入数据

INSERT INTO text_search(text_content) VALUES ("ThinkPad 小红点多功能蓝牙键盘多平台WIN安卓IOS支持 4X30K12182");

INSERT INTO text_search(text_content) VALUES ("多平台 富文本 写作软件");

-- 搜索关键词

SELECT * FROM text_search WHERE tsv_column @@ to_tsquery("chinese_parser", "多平台");

SELECT * FROM text_search WHERE tsv_column @@ to_tsquery("chinese_parser", "小红点&多平台");

tsvector类型表示一个为文本搜索优化的形式下的文档,tsquery类型表示一个文本查询

以上是 postgres中文全文分词搜索实例 的全部内容, 来源链接: utcz.com/z/533174.html

回到顶部