Lucene版Hello world(世界,你好)1、首先從lucene官網(wǎng)上下載
lucene2.4.0(也可以點(diǎn)擊直接下載,我這里用的這個(gè)版本,現(xiàn)在最高版本是3.0)
2、從極易軟件下載極易分詞器jar包(為漢語的世界,你好提供支持)
3、在Eclipse中新建Java工程,并將所需jar包(lucene-core-2.4.0.jar,lucene-analyzer-2.4.0.jar,lucene-highlighter-2.4.0.jar,je-analysis-1.5.3.jar)加入工程
4、差不多該開始了,在開始之前還需要建立兩個(gè)文件夾,我這里是luceneDataSource放文件(用來建立索引庫),luceneIndexs(存放索引庫的位置),最終的結(jié)構(gòu)是:
5、好,我們開始,首先建立HelloWorld類,類里有兩個(gè)方法createIndex和search分別是創(chuàng)建索引庫和搜索,搜索出來的結(jié)果高亮顯示,具體實(shí)現(xiàn)為:
package com.lucene.helloworld;
import java.util.logging.SimpleFormatter;
import jeasy.analysis.MMAnalyzer;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriter.MaxFieldLength;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.Formatter;
import org.apache.lucene.search.highlight.Fragmenter;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.Scorer;
import org.apache.lucene.search.highlight.SimpleFragmenter;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.junit.Test;
import com.lucene.util.File2DocumentUtils;
public class HelloWorld {
String zhFilePath = "F:""java""workspaces""LuceneTest""luceneDatasource""世界,你好.txt";
String filePath = "F:""java""workspaces""LuceneTest""luceneDatasource""IndexWriter addDocument's a javadoc .txt";
String indexPath = "F:""java""workspaces""LuceneTest""luceneIndexs";
// Analyzer analyzer = new StandardAnalyzer();
Analyzer mmAnalyzer = new MMAnalyzer(); // 詞庫分析,極易分詞
/**
* 創(chuàng)建索引
*
* @throws Exception
*
*/
@Test
public void createIndex() throws Exception {
IndexWriter indexWriter = new IndexWriter(indexPath, mmAnalyzer, true, MaxFieldLength.LIMITED);
// Document doc = File2DocumentUtils.file2Document(filePath);
Document zhDoc = File2DocumentUtils.file2Document(zhFilePath);
// indexWriter.addDocument(doc);
indexWriter.addDocument(zhDoc);
indexWriter.close();
}
/**
* 從索引庫搜索
*
* @throws Exception
*/
@Test
public void search() throws Exception {
// String queryString = "hello world";
String queryString = "世界,你好";
// 1、將搜索文件解析為Query對(duì)象
String[] fields = { "name", "content" };
QueryParser queryParser = new MultiFieldQueryParser(fields, mmAnalyzer);
Query query = queryParser.parse(queryString);
// 2、查詢
IndexSearcher indexSearcher = new IndexSearcher(indexPath);
Filter filter = null;
TopDocs topDocs = indexSearcher.search(query, filter, 10000);
System.out.println("總共有【" + topDocs.totalHits + "】條結(jié)果匹配");
// start 準(zhǔn)備高亮器
Formatter formatter = new SimpleHTMLFormatter("<font color=red>", "</font>");
Scorer scorer = new QueryScorer(query);
Highlighter highlighter = new Highlighter(formatter, scorer);
Fragmenter fragmenter = new SimpleFragmenter(50);
highlighter.setTextFragmenter(fragmenter);
// end 結(jié)束高亮器
// 3、打印輸出結(jié)果
for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
int docSn = scoreDoc.doc;
Document doc = indexSearcher.doc(docSn);
// start 高亮
// 返回高亮后的結(jié)果,如果當(dāng)前屬性值中沒有出現(xiàn)關(guān)鍵字,會(huì)返回 null
String hc = highlighter.getBestFragment(mmAnalyzer, "content", doc.get("content"));
if (hc == null) {
String content = doc.get("content");
int endIndex = Math.min(50, content.length());
hc = content.substring(0, endIndex);
}
doc.getField("content").setValue(hc);
// end 高亮
File2DocumentUtils.printDocumentInfo(doc);
}
}
}
該類需要有一個(gè)工具類支持,來將file轉(zhuǎn)換為Document,具體實(shí)現(xiàn)如下:
package com.lucene.util;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.NumberTools;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
publicclass File2DocumentUtils {
publicstatic Document file2Document(String path) {
File file = new File(path);
Document doc = new Document();
doc.add(new Field("name", file.getName(), Store.YES, Index.ANALYZED));
doc.add(new Field("content", readFileContent(file), Store.YES, Index.ANALYZED));
doc.add(new Field("size", NumberTools.longToString(file.length()), Store.YES, Index.NOT_ANALYZED));
doc.add(new Field("path", file.getAbsolutePath(), Store.YES, Index.NOT_ANALYZED));
return doc;
}
// public static void document2File(Document doc ){
//
// }
publicstatic String readFileContent(File file) {
try {
BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(file)));
StringBuffer content = new StringBuffer();
for (String line = null; (line = reader.readLine()) != null;) {
content.append(line).append(""n");
}
return content.toString();
} catch (Exception e) {
thrownew RuntimeException(e);
}
}
publicstaticvoid printDocumentInfo(Document doc) {
// Field f = doc.getField("name");
// f.stringValue();
System.out.println("------------------------------");
System.out.println("name = " + doc.get("name"));
System.out.println("content = " + doc.get("content"));
System.out.println("size = " + NumberTools.stringToLong(doc.get("size")));
System.out.println("path = " + doc.get("path"));
}
}
6、到此我們結(jié)束,看下成果,英文版的我就不寫了,想對(duì)來說比較容易,來看下中文版的結(jié)果