1. 程式人生 > >一步一步跟我學習lucene(7)---lucene搜尋之IndexSearcher構建過程

一步一步跟我學習lucene(7)---lucene搜尋之IndexSearcher構建過程

最近一直在寫一步一步跟我學習lucene系列(http://blog.csdn.net/wuyinggui10000/article/category/3173543),個人的部落格也收到了很多的訪問量,謝謝大家的關注,這也是對我個人的一個激勵,O(∩_∩)O哈哈~,個人感覺在部落格的編寫過程中自己收穫了很多,我會一直繼續下去,在工作的過程中自己也會寫出更多類似系列的部落格,也算是對自己只是的一種積累;

IndexSearcher

搜尋引擎的構建分為索引內容和查詢索引兩個大方面,這裡要介紹的是lucene索引查詢器即IndexSearcher的構建過程;

首先了解下IndexSearcher:

  • IndexSearcher提供了對單個IndexReader的查詢實現;
  • 我們對索引的查詢,可以通過呼叫search(Query,n)或者search(Query,Filter,n)方法;
  • 在索引內容變動不大的情況下,我們可以對索引的搜尋採用單個IndexSearcher共享的方式來提升效能;
  • 如果索引有變動,我們就需要使用DirectoryReader.openIfChanged(DirectoryReader)來獲取新的reader,然後建立新的IndexSearcher物件;
  • 為了使查詢延遲率低,我們最好使用近實時搜尋的方法(此時我們的DirectoryReader的構建就要採用DirectoryReader.open(IndexWriter, boolean)
  • IndexSearcher例項是完全執行緒安全的,這意味著多個執行緒可以併發呼叫任何方法。如果需要外部同步,無需新增IndexSearcher的同步;

IndexSearcher的建立過程

  • 根據索引檔案路徑建立FSDirectory的例項,返回的FSDirectory例項跟系統或執行環境有關,對於Linux, MacOSX, Solaris, and Windows 64-bit JREs返回的是一個MMapDirectory例項,對於其他非windows JREs環境返回的是NIOFSDirectory,而對於其他Windows的JRE環境返回的是SimpleFSDirectory,其執行效率依次降低
  • 接著DirectoryReader根據獲取到的FSDirectory例項讀取索引檔案並得到DirectoryReader物件;DirectoryReader的open方法返回例項的原理:讀取索引目錄中的Segments檔案內容,倒序遍歷SegmentInfos並填充到SegmentReader(IndexReader的一種實現)陣列,並構建StandardDirectoryReader的例項DirectoryReader.open方法執行過程
  • 有了IndexReader,IndexSearcher物件例項化就手到拈來了,new IndexSearcher(DirectoryReader)就可以得到其例項;如果我們想提高IndexSearcher的執行效率可以new IndexSearcher(DirecotoryReader,ExcuterService)來建立IndexSearcher物件,這樣做的好處為對每塊segment採用了分工查詢,但是要注意IndexSearcher並不維護ExcuterService的生命週期,我們還需要自行呼叫ExcuterService的close/awaitTermination

相關實踐

以下是根據IndexSearcher相關的構建過程及其特性編寫的一個搜尋的工具類

package com.lucene.search;

import java.io.File;
import java.io.IOException;
import java.nio.file.Paths;
import java.util.concurrent.ExecutorService;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.MultiReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.NumericRangeQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.SortField.Type;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TopFieldCollector;
import org.apache.lucene.store.FSDirectory;

import com.lucene.index.IndexUtil;

public class SearchUtil {
	public static final Analyzer analyzer = new StandardAnalyzer();
	/**獲取IndexSearcher物件(適合單索引目錄查詢使用)
	 * @param indexPath 索引目錄
	 * @return
	 * @throws IOException
	 * @throws InterruptedException 
	 */
	public static IndexSearcher getIndexSearcher(String indexPath,ExecutorService service,boolean realtime) throws IOException, InterruptedException{
		DirectoryReader reader = DirectoryReader.open(IndexUtil.getIndexWriter(indexPath, true), realtime);
		IndexSearcher searcher = new IndexSearcher(reader,service);
		if(service != null){
			service.shutdown();
		}
		return searcher;
	}
	
	/**多目錄多執行緒查詢
	 * @param parentPath 父級索引目錄
	 * @param service 多執行緒查詢
	 * @return
	 * @throws IOException
	 * @throws InterruptedException 
	 */
	public static IndexSearcher getMultiSearcher(String parentPath,ExecutorService service,boolean realtime) throws IOException, InterruptedException{
		MultiReader multiReader;
		File file = new File(parentPath);
		File[] files = file.listFiles();
		IndexReader[] readers = new IndexReader[files.length];
		if(!realtime){
			for (int i = 0 ; i < files.length ; i ++) {
				readers[i] = DirectoryReader.open(FSDirectory.open(Paths.get(files[i].getPath(), new String[0])));
			}
		}else{
			for (int i = 0 ; i < files.length ; i ++) {
				readers[i] = DirectoryReader.open(IndexUtil.getIndexWriter(files[i].getPath(), true), true);
			}
		}
	
		multiReader = new MultiReader(readers);
		IndexSearcher searcher = new IndexSearcher(multiReader,service);
		if(service != null){
			service.shutdown();
		}
		return searcher;
	}
	
	/**從指定配置項中查詢
	 * @return
	 * @param analyzer 分詞器
	 * @param field 欄位
	 * @param fieldType	欄位型別
	 * @param queryStr 查詢條件
	 * @param range 是否區間查詢
	 * @return
	 */
	public static Query getQuery(String field,String fieldType,String queryStr,boolean range){
		Query q = null;
		if(queryStr != null && !"".equals(queryStr)){
			if(range){
				String[] strs = queryStr.split("\\|");
				if("int".equals(fieldType)){
					int min = new Integer(strs[0]);
					int max = new Integer(strs[1]);
					q = NumericRangeQuery.newIntRange(field, min, max, true, true);
				}else if("double".equals(fieldType)){
					Double min = new Double(strs[0]);
					Double max = new Double(strs[1]);
					q = NumericRangeQuery.newDoubleRange(field, min, max, true, true);
				}else if("float".equals(fieldType)){
					Float min = new Float(strs[0]);
					Float max = new Float(strs[1]);
					q = NumericRangeQuery.newFloatRange(field, min, max, true, true);
				}else if("long".equals(fieldType)){
					Long min = new Long(strs[0]);
					Long max = new Long(strs[1]);
					q = NumericRangeQuery.newLongRange(field, min, max, true, true);
				}
			}else{
				if("int".equals(fieldType)){
					q = NumericRangeQuery.newIntRange(field, new Integer(queryStr), new Integer(queryStr), true, true);
				}else if("double".equals(fieldType)){
					q = NumericRangeQuery.newDoubleRange(field, new Double(queryStr), new Double(queryStr), true, true);
				}else if("float".equals(fieldType)){
					q = NumericRangeQuery.newFloatRange(field, new Float(queryStr), new Float(queryStr), true, true);
				}else{
					Term term = new Term(field, queryStr);
					q = new TermQuery(term);
				}
			}
		}else{
			q= new MatchAllDocsQuery();
		}
		
		System.out.println(q);
		return q;
	}
	/**多條件查詢類似於sql in
	 * @param querys
	 * @return
	 */
	public static Query getMultiQueryLikeSqlIn(Query ... querys){
		BooleanQuery query = new BooleanQuery();
		for (Query subQuery : querys) {
			query.add(subQuery,Occur.SHOULD);
		}
		return query;
	}
	
	/**多條件查詢類似於sql and
	 * @param querys
	 * @return
	 */
	public static Query getMultiQueryLikeSqlAnd(Query ... querys){
		BooleanQuery query = new BooleanQuery();
		for (Query subQuery : querys) {
			query.add(subQuery,Occur.MUST);
		}
		return query;
	}
	/**對多個條件進行排序構建排序條件
	 * @param fields
	 * @param type
	 * @param reverses
	 * @return
	 */
	public static Sort getSortInfo(String[] fields,Type[] types,boolean[] reverses){
		SortField[] sortFields = null;
		int fieldLength = fields.length;
		int typeLength = types.length;
		int reverLength = reverses.length;
		if(!(fieldLength == typeLength) || !(fieldLength == reverLength)){
			return null;
		}else{
			sortFields = new SortField[fields.length];
			for (int i = 0; i < fields.length; i++) {
				sortFields[i] = new SortField(fields[i], types[i], reverses[i]);
			}
		}
		return new Sort(sortFields);
	}
	/**根據查詢器、查詢條件、每頁數、排序條件進行查詢
	 * @param query 查詢條件
	 * @param first 起始值
	 * @param max 最大值
	 * @param sort 排序條件
	 * @return
	 */
	public static TopDocs getScoreDocsByPerPageAndSortField(IndexSearcher searcher,Query query, int first,int max, Sort sort){
		try {
			if(query == null){
				System.out.println(" Query is null return null ");
				return null;
			}
			TopFieldCollector collector = null;
			if(sort != null){
				collector = TopFieldCollector.create(sort, first+max, false, false, false);
			}else{
				sort = new Sort(new SortField[]{new SortField("modified", SortField.Type.LONG)});
				collector = TopFieldCollector.create(sort, first+max, false, false, false);
			}
			searcher.search(query, collector);
			return collector.topDocs(first, max);
		} catch (IOException e) {
			// TODO Auto-generated catch block
		}
		return null;
	}
	
	/**獲取上次索引的id,增量更新使用
	 * @return
	 */
	public static Integer getLastIndexBeanID(IndexReader multiReader){
		Query query = new MatchAllDocsQuery();
		IndexSearcher searcher = null;
		searcher = new IndexSearcher(multiReader);
		SortField sortField = new SortField("id", SortField.Type.INT,true);
		Sort sort = new Sort(new SortField[]{sortField});
		TopDocs docs = getScoreDocsByPerPageAndSortField(searcher,query, 0, 1, sort);
		ScoreDoc[] scoreDocs = docs.scoreDocs;
		int total = scoreDocs.length;
		if(total > 0){
			ScoreDoc scoreDoc = scoreDocs[0];
			Document doc = null;
			try {
				doc = searcher.doc(scoreDoc.doc);
			} catch (IOException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			}
			return new Integer(doc.get("id"));
		}
		return 0;
	}
}
以上即是lucene搜尋之IndexSearcher構建過程相關內容;

相關程式碼下載

一步一步跟我學習lucene是對近期做lucene索引的總結,大家有問題的話聯絡本人的Q-Q:  891922381,同時本人新建Q-Q群:106570134(lucene,solr,netty,hadoop),如蒙加入,不勝感激,大家共同探討,本人爭取每日一博,希望大家持續關注,會帶給大家驚喜的