postgres中文全文分词搜索实例
Dockerfile
# If you don‘t want to build it youself, you can try `docker pull killercai/postgres`.FROM healthcheck/postgres:latest
# China debian mirror
RUN sed -i s@/deb.debian.org/@/mirrors.aliyun.com/@g /etc/apt/sources.list
RUN apt-get clean && apt-get update
RUN apt-get install -y wget git build-essential libpq-dev python-dev postgresql-server-dev-all
# SCWS (Simple Chinese Word Segmentation library)
RUN cd /tmp && wget -q -O - http://www.xunsearch.com/scws/down/scws-1.2.1.tar.bz2 | tar xjf - && cd scws-1.2.1 && ./configure && make install
# zhpaser (postgres plugin)
RUN cd /tmp && git clone https://github.com/amutu/zhparser.git && cd zhparser && make && make install
example.sql
-- 安装扩展CREATE EXTENSION zhparser;
-- 中文分词配置
CREATE TEXT SEARCH CONFIGURATION chinese_parser (PARSER = zhparser);
ALTER TEXT SEARCH CONFIGURATION chinese_parser ADD MAPPING FOR n,v,a,i,e,l,j WITH simple;
-- create test table
CREATE TABLE text_search(
text_id SERIAL PRIMARY KEY,
text_content TEXT,
tsv_column tsvector
);
-- 创建gin_index以加快匹配速度
CREATE INDEX idx_gin_tsv ON test_search USING GIN(tsv_column);
-- 创建触发器以从目标同步tsvector列
CREATE TRIGGER sync_trigger
BEFORE INSERT OR UPDATE ON test_search FOR EACH ROW
EXECUTE PROCEDURE
tsvector_update_trigger(tsv_column, "public.chinese_parser", text_content);
-- 插入数据
INSERT INTO text_search(text_content) VALUES ("ThinkPad 小红点多功能蓝牙键盘多平台WIN安卓IOS支持 4X30K12182");
INSERT INTO text_search(text_content) VALUES ("多平台 富文本 写作软件");
-- 搜索关键词
SELECT * FROM text_search WHERE tsv_column @@ to_tsquery("chinese_parser", "多平台");
SELECT * FROM text_search WHERE tsv_column @@ to_tsquery("chinese_parser", "小红点&多平台");
tsvector类型表示一个为文本搜索优化的形式下的文档,tsquery类型表示一个文本查询
以上是 postgres中文全文分词搜索实例 的全部内容, 来源链接: utcz.com/z/533174.html