lucene小练习-白红宇

lucene小练习

阅读量：5733 次

发布时间：2019-06-18

本文共 5295 字，大约阅读时间需要 17 分钟。

工具类======================================》

public class LuceneUtil {

//设为私有，防止外部实例化这个类的对象

private LuceneUtil(){}

private static Directory directory;

//版本

private static Version version;

private static Analyzer analyZer;

private static MaxFieldLength maxFieldLength;

static{

try {

directory=FSDirectory.open(new File("E:/testLucene"));

version=Version.LUCENE_30;

analyZer=new StandardAnalyzer(version);

maxFieldLength=MaxFieldLength.LIMITED;

} catch (IOException e) {

e.printStackTrace();

}

public static <T> Document java2document(T t) throws NoSuchMethodException, SecurityException, IllegalAccessException, IllegalArgumentException, InvocationTargetException{

//获得字节码

Class clazz=t.getClass();

//获得字段

java.lang.reflect.Field[] fields=clazz.getDeclaredFields();

Document document=new Document();

for(java.lang.reflect.Field field:fields){

field.setAccessible(true);

String fieldName=field.getName();

String firstName=fieldName.substring(0, 1).toUpperCase();

String lastName=fieldName.substring(1);

String newName="get"+firstName+lastName;

Method method=clazz.getMethod(newName,null);

//执行方法

String value=method.invoke(t,null).toString();

//添加到document中去

document.add(new Field(fieldName,value,Store.YES,Index.ANALYZED));

// System.out.println(document.get(fieldName));

}

return document;

}

public static <T> T document2java(Document document,Class<T> clazz) throws InstantiationException, IllegalAccessException, NoSuchMethodException, SecurityException, IllegalArgumentException, InvocationTargetException{

java.lang.reflect.Field[] fields=clazz.getDeclaredFields();

T t=clazz.newInstance();

for(java.lang.reflect.Field field:fields){

field.setAccessible(true);

String fieldName=field.getName();

String value=document.get(fieldName);

BeanUtils.setProperty(t,fieldName,value);

}

return t;

}

public Directory getDirectory() {

return directory;

}

public void setDirectory(Directory directory) {

LuceneUtil.directory = directory;

}

public Version getVersion() {

return version;

}

public void setVersion(Version version) {

LuceneUtil.version = version;

}

public Analyzer getAnalyZer() {

return analyZer;

}

public void setAnalyZer(Analyzer analyZer) {

LuceneUtil.analyZer = analyZer;

}

public MaxFieldLength getMaxFieldLength() {

return maxFieldLength;

}

public void setMaxFieldLength(MaxFieldLength maxFieldLength) {

LuceneUtil.maxFieldLength = maxFieldLength;

}

没用工具类的操作================================================

public class FirstLucene {

public void createLucene() throws CorruptIndexException, LockObtainFailedException, IOException{

Article article=new Article(1,"高考新闻","高考第一天");

//创建document对象

Document document=new Document();

//将Article中的三个属性绑定到Document中

* 参数一：document对象中的属性名叫xid,article对象中的属性名叫id，项目中提倡相同

* 参数二：document对象中的属性xid的值，与article对象中相同

* 参数三：是否将xid属性值存入由原始表中转存入词汇表

* Store.YES表示该属性值会存入词汇表

* Store.NO表示该属性值不会存入词汇表

* 项目中提倡非id值都存入词汇表中

* 参数四：是否将xid属性值进行分词算法

* Index.ANALYZED表示该属性值会进行词汇拆分

* Index.NOT_ANALYZED表示该属性值不会进行词汇拆分

* 项目中提倡非id值都进行词汇拆分

document.add(new Field("xid",article.getId()+"",Store.YES,Index.ANALYZED));

document.add(new Field("xtitle",article.getTitle(),Store.YES,Index.ANALYZED));

document.add(new Field("xcontent",article.getContent(),Store.YES,Index.ANALYZED));

* 参数一：lucene索引库最终应对与银盘中的目录，就是索引库存在硬盘哪儿

* 参数二：采用什么策略将文本拆分,一个策略就是一个具体的实现类

* 参数三：最多将文本拆分出多少词汇 LIMITED表示10000

Directory directory=FSDirectory.open(new File("E:/testLucene"));

//版本

Version version=Version.LUCENE_30;

Analyzer analyZer=new StandardAnalyzer(version);

MaxFieldLength maxFieldLength=MaxFieldLength.LIMITED;

//写操作对象

IndexWriter indexWriter=new IndexWriter(directory,analyZer,maxFieldLength);

//将document对象写入lucene索引库

indexWriter.addDocument(document);

//关闭字符流

indexWriter.close();

}

public void queryLucene() throws IOException, ParseException{

String keyWord="高考";

List<Article> articleList=new ArrayList<Article>();

* 参数一：lucene索引库最终应对与银盘中的目录，就是索引库存在硬盘哪儿

* 参数二：采用什么策略将文本拆分,一个策略就是一个具体的实现类

* 参数三：最多将文本拆分出多少词汇 LIMITED表示10000

Directory directory=FSDirectory.open(new File("E:/testLucene"));

//版本

Version version=Version.LUCENE_30;

Analyzer analyZer=new StandardAnalyzer(version);

MaxFieldLength maxFieldLength=MaxFieldLength.LIMITED;

//查询操作对象

IndexSearcher indexSearcher=new IndexSearcher(directory);

* 参数一：使用分词器的版本，提倡使用最高版本

* 参数二：针对document对象中的哪个属性进行搜索

* 参数三：策略

QueryParser queryParser=new QueryParser(version,"xcontent",analyZer);

Query query=queryParser.parse(keyWord);

//根据词汇表搜索

* 参数一：表示封装关键字查询对象

* 参数二：MAX_RECORD表示如果根据关键字搜索出来的内容较多，只取前MAX_RECORD个内容

* 不足MAX_RECORD个数的话，一实际为准

int MAX_RECORD=100;

TopDocs topDocs=indexSearcher.search(query, MAX_RECORD);

//迭代符合情况

for(int i=0;i<topDocs.scoreDocs.length;i++){

//获取ScoreDoc对象（封装了编号和分数）

ScoreDoc scoreDoc=topDocs.scoreDocs[i];

//取出每一个编号

int no=scoreDoc.doc;

//取出分数

float score=scoreDoc.score;

//根据编号去原始表查询对应的document对象

Document document=indexSearcher.doc(no);

//把获取到的document对象中的三个属性封装到Article对象中

int xid=Integer.valueOf(document.get("xid"));

String xtitle=document.get("xtitle");

String xcontent=document.get("xcontent");

Article article=new Article(xid,xtitle,xcontent);

articleList.add(article);

}

for(Article article:articleList){

System.out.println(article.getId());

System.out.println(article.getTitle());

System.out.println(article.getContent());

}

转载于:https://my.oschina.net/chenliyong/blog/689043

你可能感兴趣的文章

鼠标停留在GridView某一行时行的颜色改变

查看>>

系列3：WAS Liberty Profile hello mysql jdbc

Python3 django2.0 字段加密解密 AES

查看>>

CCNA实验之:网络地址转换(NAT)实验

使用Callable和Future接口创建线程

查看>>

sql语句返回主键SCOPE_IDENTITY()