Lucene更新实在太快了,只好紧跟脚步开始学习Lucene5,花了点时间写了一个demo,就是程序根据用户提供的一个文件夹,读取该文件夹下的所有文件,然后读取文件里的内容写入索引。读取文件部分采用的是最新的NIO2.0API,因此,JDK必须使用1.7及以上版本。Lucene5开发压缩包请在下载。不多说了,对于码农来说,最直接的就是上代码。
package com.yida.framework.lucene5.core;import java.io.BufferedReader;import java.io.IOException;import java.io.InputStream;import java.io.InputStreamReader;import java.nio.charset.StandardCharsets;import java.nio.file.FileVisitResult;import java.nio.file.Files;import java.nio.file.LinkOption;import java.nio.file.OpenOption;import java.nio.file.Path;import java.nio.file.Paths;import java.nio.file.SimpleFileVisitor;import java.nio.file.attribute.BasicFileAttributes;import org.apache.lucene.analysis.Analyzer;import org.apache.lucene.analysis.standard.StandardAnalyzer;import org.apache.lucene.document.Document;import org.apache.lucene.document.Field;import org.apache.lucene.document.LongField;import org.apache.lucene.document.StringField;import org.apache.lucene.document.TextField;import org.apache.lucene.index.IndexWriter;import org.apache.lucene.index.IndexWriterConfig;import org.apache.lucene.index.Term;import org.apache.lucene.store.Directory;import org.apache.lucene.store.FSDirectory;/** * 读取硬盘文件,创建索引 * * @author Lanxiaowei * */@SuppressWarnings({ "unchecked", "unused", "rawtypes" })public class IndexFile { public static void main(String[] args) throws IOException { String dirPath = "D:/docPath"; String indexPath = "D:/lucenedir"; createIndex(dirPath, indexPath); } /** * 创建索引 * @param dirPath 需要读取的文件所在文件目录 * @param indexPath 索引存放目录 * @throws IOException */ public static void createIndex(String dirPath, String indexPath) throws IOException { createIndex(dirPath, indexPath, false); } /** * 创建索引 * @param dirPath 需要读取的文件所在文件目录 * @param indexPath 索引存放目录 * @param createOrAppend 始终重建索引/不存在则追加索引 * @throws IOException */ public static void createIndex(String dirPath, String indexPath, boolean createOrAppend) throws IOException { long start = System.currentTimeMillis(); Directory dir = FSDirectory.open(Paths.get(indexPath, new String[0])); Path docDirPath = Paths.get(dirPath, new String[0]); Analyzer analyzer = new StandardAnalyzer(); IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer); if (createOrAppend) { indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE); } else { indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); } IndexWriter writer = new IndexWriter(dir, indexWriterConfig); indexDocs(writer, docDirPath); writer.close(); long end = System.currentTimeMillis(); System.out.println("Time consumed:" + (end - start) + " ms"); } /** * * @param writer * 索引写入器 * @param path * 文件路径 * @throws IOException */ public static void indexDocs(final IndexWriter writer, Path path) throws IOException { // 如果是目录,查找目录下的文件 if (Files.isDirectory(path, new LinkOption[0])) { System.out.println("directory"); Files.walkFileTree(path, new SimpleFileVisitor() { @Override public FileVisitResult visitFile(Object file, BasicFileAttributes attrs) throws IOException { Path path = (Path)file; System.out.println(path.getFileName()); indexDoc(writer, path, attrs.lastModifiedTime().toMillis()); return FileVisitResult.CONTINUE; } }); } else { indexDoc(writer, path, Files.getLastModifiedTime(path, new LinkOption[0]) .toMillis()); } } /** * 读取文件创建索引 * * @param writer * 索引写入器 * @param file * 文件路径 * @param lastModified * 文件最后一次修改时间 * @throws IOException */ public static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException { InputStream stream = Files.newInputStream(file, new OpenOption[0]); Document doc = new Document(); Field pathField = new StringField("path", file.toString(), Field.Store.YES); doc.add(pathField); doc.add(new LongField("modified", lastModified, Field.Store.NO)); doc.add(new TextField("contents", new BufferedReader( new InputStreamReader(stream, StandardCharsets.UTF_8)))); if (writer.getConfig().getOpenMode() == IndexWriterConfig.OpenMode.CREATE) { System.out.println("adding " + file); writer.addDocument(doc); } else { System.out.println("updating " + file); writer.updateDocument(new Term("path", file.toString()), doc); } writer.commit(); }}
项目采用的是Maven构建,怎么创建Maven Project就不用介绍了吧,我就贴下pom配置吧。
4.0.0 com.yida.framework lucene5 war 1.0 lucene5 Maven Webapp http://maven.apache.org 5.0.0 junit junit 3.8.1 test org.apache.lucene lucene-core ${lucene.version} org.apache.lucene lucene-analyzers-common ${lucene.version} org.apache.lucene lucene-queryparser ${lucene.version} org.apache.lucene lucene-highlighter ${lucene.version} lucene5
项目结构图如图:
运行之前,先在D盘新建两个文件夹,如图: 然后在docPath文件夹里随便放几个文本文件,如图: 然后运行测试类,就会在lucenedir文件夹下创建索引。代码很简单,没什么需要过多解释的,demo源码请在附件里下载。
希望能对大家学习Lucene有所帮助,其次也算是对自己学习轨迹的一个记录,写博客这个习惯
我会努力保持下去。
若你还有什么疑问,请加我Q-Q:7-3-6-0-3-1-3-0-5,或者加裙:
,欢迎你加入一起交流学习。