1. 程式人生 > >jieba分詞/jieba-analysis(java版)

jieba分詞/jieba-analysis(java版)

日本 word amp b- exception 鏈接 arp not unit

簡介

支持分詞模式
Search模式,用於對用戶查詢詞分詞
Index模式,用於對索引文檔分詞
特性
支持多種分詞模式
全角統一轉成半角
用戶詞典功能
conf 目錄有整理的搜狗細胞詞庫
因為性能原因,最新的快照版本去除詞性標註,也希望有更好的 Pull Request 可以提供該功能。

簡單使用

獲取jieba-analysis

<dependency>
  <groupId>com.huaban</groupId>
  <artifactId>jieba-analysis</artifactId>
  <version>1.0.2</
version> </dependency>

案例

@Test
public void testDemo() {
    JiebaSegmenter segmenter = new JiebaSegmenter();
    String[] sentences =
        new String[] {"這是一個伸手不見五指的黑夜。我叫孫悟空,我愛北京,我愛Python和C++。", "我不喜歡日本和服。", "雷猴回歸人間。",
                      "工信處女幹事每月經過下屬科室都要親口交代24口交換機等技術性器件的安裝工作", "結果婚的和尚未結過婚的"};
    
for (String sentence : sentences) { System.out.println(segmenter.process(sentence, SegMode.INDEX).toString()); } }

原文鏈接:https://github.com/huaban/jieba-analysis

我的應用

技術分享
package com.analysis;

import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement; import java.sql.ResultSet; import java.util.List; import java.util.UUID; import org.junit.Before; import org.junit.Test; import com.huaban.analysis.jieba.JiebaSegmenter; import com.huaban.analysis.jieba.JiebaSegmenter.SegMode; import com.huaban.analysis.jieba.SegToken; public class jiebaTest { private Connection con = null; private PreparedStatement pstmt = null; /** * 連接 */ @Before public void beforeDemo() throws Exception { Class.forName("com.mysql.jdbc.Driver"); String url = "jdbc:mysql://localhost:3306/test?user=root&password=root"; con = DriverManager.getConnection(url); } /** * 分詞查詢測試 */ @Test public void getDemo() throws Exception { BufferedReader br = new BufferedReader(new InputStreamReader(System.in)); String str = br.readLine(); String sql = "select * from t_jieba where name = ?"; pstmt = con.prepareStatement(sql); pstmt.setString(1, str); ResultSet rs = pstmt.executeQuery(); while (rs.next()) { System.out.println(rs.getInt(1)+"--"+rs.getString(2)+"--"+rs.getString(3)+"--"+rs.getString(4)+"--"+rs.getString(5)); pstmt.clearParameters(); String sql1 = "update t_jieba set times = ? where id = ?"; pstmt = con.prepareStatement(sql1); pstmt.setInt(1, 1+ new Integer(rs.getString(5))); pstmt.setInt(2, rs.getInt(1)); pstmt.executeUpdate(); } rs.close(); pstmt.close(); } /** * 分詞插入測試 */ @Test public void addDemo() throws Exception { String sql = "insert into t_jieba (name,cid,c_name,times) select ?,?,?,? from DUAL where not EXISTS(select name from t_jieba where name=?)"; pstmt = con.prepareStatement(sql); JiebaSegmenter segmenter = new JiebaSegmenter(); String[] sentences = new String[] { "大話數據結構", "深入淺出設計模式", "JavaEE開發的顛覆者: Spring Boot實戰", "java從入門到放棄" }; for (String sentence : sentences) { //System.out.println(segmenter.process(sentence, SegMode.INDEX).toString()); String uuid = UUID.randomUUID().toString(); uuid = uuid.replace("-", ""); List<SegToken> list = segmenter.process(sentence, SegMode.INDEX); for (SegToken segToken : list) { String name = segToken.word.trim(); if (name != null && !"".equals(name)) { pstmt.setString(1, segToken.word); pstmt.setString(2, uuid); pstmt.setString(3, sentence); pstmt.setString(4, "0"); pstmt.setString(5, segToken.word); pstmt.executeUpdate(); pstmt.clearParameters(); } } } pstmt.close(); System.out.println("插入成功!"); } }
MyDemo

jieba分詞/jieba-analysis(java版)