zl程序教程

您现在的位置是:首页 >  后端

当前栏目

javaLucene中自定义排序的实现

排序 实现 自定义
2023-06-13 09:14:07 时间
Lucene中的自定义排序功能和Java集合中的自定义排序的实现方法差不多,都要实现一下比较接口.在Java中只要实现Comparable接口就可以了.但是在Lucene中要实现SortComparatorSource接口和ScoreDocComparator接口.在了解具体实现方法之前先来看看这两个接口的定义吧.
SortComparatorSource接口的功能是返回一个用来排序ScoreDocs的comparator(Expert:returnsacomparatorforsortingScoreDocs).该接口只定义了一个方法.如下:
Java代码
/**
*Createsacomparatorforthefieldinthegivenindex.
*@paramreader-Indextocreatecomparatorfor.
*@paramfieldname-Fieldtocreatecomparatorfor.
*@returnComparatorofScoreDocobjects.
*@throwsIOException-Ifanerroroccursreadingtheindex.
*/
publicScoreDocComparatornewComparator(IndexReaderreader,Stringfieldname)throwsIOException
viewplaincopytoclipboardprint?
/**
*Createsacomparatorforthefieldinthegivenindex.
*@paramreader-Indextocreatecomparatorfor.
*@paramfieldname-Fieldtocreatecomparatorfor.
*@returnComparatorofScoreDocobjects.
*@throwsIOException-Ifanerroroccursreadingtheindex.
*/
publicScoreDocComparatornewComparator(IndexReaderreader,Stringfieldname)throwsIOException
/**
*Createsacomparatorforthefieldinthegivenindex.
*@paramreader-Indextocreatecomparatorfor.
*@paramfieldname-Fieldtocreatecomparatorfor.
*@returnComparatorofScoreDocobjects.
*@throwsIOException-Ifanerroroccursreadingtheindex.
*/
publicScoreDocComparatornewComparator(IndexReaderreader,Stringfieldname)throwsIOException
该方法只是创造一个ScoreDocComparator实例用来实现排序.所以我们还要实现ScoreDocComparator接口.来看看ScoreDocComparator接口.功能是比较来两个ScoreDoc对象来排序(ComparestwoScoreDocobjectsforsorting)里面定义了两个Lucene实现的静态实例.如下:
Java代码
//Specialcomparatorforsortinghitsaccordingtocomputedrelevance(documentscore).
publicstaticfinalScoreDocComparatorRELEVANCE;
//Specialcomparatorforsortinghitsaccordingtoindexorder(documentnumber).
publicstaticfinalScoreDocComparatorINDEXORDER;
viewplaincopytoclipboardprint?
//Specialcomparatorforsortinghitsaccordingtocomputedrelevance(documentscore).
publicstaticfinalScoreDocComparatorRELEVANCE;
//Specialcomparatorforsortinghitsaccordingtoindexorder(documentnumber).
publicstaticfinalScoreDocComparatorINDEXORDER;
//Specialcomparatorforsortinghitsaccordingtocomputedrelevance(documentscore).
publicstaticfinalScoreDocComparatorRELEVANCE;

//Specialcomparatorforsortinghitsaccordingtoindexorder(documentnumber).
publicstaticfinalScoreDocComparatorINDEXORDER;
有3个方法与排序相关,需要我们实现分别如下:
Java代码
/**
*ComparestwoScoreDocobjectsandreturnsaresultindicatingtheirsortorder.
*@paramiFirstScoreDoc
*@paramjSecondScoreDoc
*@return-1ifishouldcomebeforej;
*1ifishouldcomeafterj;
*0iftheyareequal
*/
publicintcompare(ScoreDoci,ScoreDocj);
/**
*Returnsthevalueusedtosortthegivendocument.Theobjectreturnedmustimplementthejava.io.Serializableinterface.Thisisusedbymultisearcherstodeterminehowtocollateresultsfromtheirsearchers.
*@paramiDocument
*@returnSerializableobject
*/
publicComparablesortValue(ScoreDoci);
/**
*Returnsthetypeofsort.ShouldreturnSortField.SCORE,SortField.DOC,SortField.STRING,SortField.INTEGER,SortField.FLOATorSortField.CUSTOM.ItisnotvalidtoreturnSortField.AUTO.Thisisusedbymultisearcherstodeterminehowtocollateresultsfromtheirsearchers.
*@returnOneoftheconstantsinSortField.
*/
publicintsortType();
viewplaincopytoclipboardprint?
/**
*ComparestwoScoreDocobjectsandreturnsaresultindicatingtheirsortorder.
*@paramiFirstScoreDoc
*@paramjSecondScoreDoc
*@return-1ifishouldcomebeforej;
*1ifishouldcomeafterj;
*0iftheyareequal
*/
publicintcompare(ScoreDoci,ScoreDocj);
/**
*Returnsthevalueusedtosortthegivendocument.Theobjectreturnedmustimplementthejava.io.Serializableinterface.Thisisusedbymultisearcherstodeterminehowtocollateresultsfromtheirsearchers.
*@paramiDocument
*@returnSerializableobject
*/
publicComparablesortValue(ScoreDoci);
/**
*Returnsthetypeofsort.ShouldreturnSortField.SCORE,SortField.DOC,SortField.STRING,SortField.INTEGER,SortField.FLOATorSortField.CUSTOM.ItisnotvalidtoreturnSortField.AUTO.Thisisusedbymultisearcherstodeterminehowtocollateresultsfromtheirsearchers.
*@returnOneoftheconstantsinSortField.
*/
publicintsortType();
/**
    *ComparestwoScoreDocobjectsandreturnsaresultindicatingtheirsortorder.
    *@paramiFirstScoreDoc
    *@paramjSecondScoreDoc
    *@return-1ifishouldcomebeforej;
    *1ifishouldcomeafterj;
    *0iftheyareequal
    */
    publicintcompare(ScoreDoci,ScoreDocj);
    /**
    *Returnsthevalueusedtosortthegivendocument.Theobjectreturnedmustimplementthejava.io.Serializableinterface.Thisisusedbymultisearcherstodeterminehowtocollateresultsfromtheirsearchers.
    *@paramiDocument
    *@returnSerializableobject
    */
    publicComparablesortValue(ScoreDoci);
    /**
    *Returnsthetypeofsort.ShouldreturnSortField.SCORE,SortField.DOC,SortField.STRING,SortField.INTEGER,SortField.FLOATorSortField.CUSTOM.ItisnotvalidtoreturnSortField.AUTO.Thisisusedbymultisearcherstodeterminehowtocollateresultsfromtheirsearchers.
    *@returnOneoftheconstantsinSortField.
    */
    publicintsortType();
看个例子吧!
该例子为LuceneinAction中的一个实现,用来搜索距你最近的餐馆的名字.餐馆坐标用字符串"x,y"来存储.
Java代码
packagecom.nikee.lucene;
importjava.io.IOException;
importorg.apache.lucene.index.IndexReader;
importorg.apache.lucene.index.Term;
importorg.apache.lucene.index.TermDocs;
importorg.apache.lucene.index.TermEnum;
importorg.apache.lucene.search.ScoreDoc;
importorg.apache.lucene.search.ScoreDocComparator;
importorg.apache.lucene.search.SortComparatorSource;
importorg.apache.lucene.search.SortField;
//实现了搜索距你最近的餐馆的名字.餐馆坐标用字符串"x,y"来存储
//DistanceComparatorSource实现了SortComparatorSource接口
publicclassDistanceComparatorSourceimplementsSortComparatorSource{
privatestaticfinallongserialVersionUID=1L;
//xy用来保存坐标位置
privateintx;
privateinty;
publicDistanceComparatorSource(intx,inty){
this.x=x;
this.y=y;
}
//返回ScoreDocComparator用来实现排序功能
publicScoreDocComparatornewComparator(IndexReaderreader,Stringfieldname)throwsIOException{
returnnewDistanceScoreDocLookupComparator(reader,fieldname,x,y);
}
//DistanceScoreDocLookupComparator实现了ScoreDocComparator用来排序
privatestaticclassDistanceScoreDocLookupComparatorimplementsScoreDocComparator{
privatefloat[]distances;//保存每个餐馆到指定点的距离
//构造函数,构造函数在这里几乎完成所有的准备工作.
publicDistanceScoreDocLookupComparator(IndexReaderreader,Stringfieldname,intx,inty)throwsIOException{
System.out.println("fieldName2="+fieldname);
finalTermEnumenumerator=reader.terms(newTerm(fieldname,""));
System.out.println("maxDoc="+reader.maxDoc());
distances=newfloat[reader.maxDoc()];//初始化distances
if(distances.length>0){
TermDocstermDocs=reader.termDocs();
try{
if(enumerator.term()==null){
thrownewRuntimeException("notermsinfield"+fieldname);
}
inti=0,j=0;
do{
System.out.println("indo-while:"+i++);
Termterm=enumerator.term();//取出每一个Term
if(term.field()!=fieldname)//与给定的域不符合则比较下一个
break;
//SetsthistothedataforthecurrentterminaTermEnum.
//Thismaybeoptimizedinsomeimplementations.
termDocs.seek(enumerator);//参考TermDocsDoc
while(termDocs.next()){
System.out.println("inwhile:"+j++);
System.out.println("inwhile,Term:"+term.toString());
String[]xy=term.text().split(",");//去处xy
intdeltax=Integer.parseInt(xy[0])-x;
intdeltay=Integer.parseInt(xy[1])-y;
//计算距离
distances[termDocs.doc()]=(float)Math.sqrt(deltax*deltax+deltay*deltay);
}
}
while(enumerator.next());
}finally{
termDocs.close();
}
}
}
//有上面的构造函数的准备这里就比较简单了
publicintcompare(ScoreDoci,ScoreDocj){
if(distances[i.doc]<distances[j.doc])
return-1;
if(distances[i.doc]>distances[j.doc])
return1;
return0;
}
//返回距离
publicComparablesortValue(ScoreDoci){
returnnewFloat(distances[i.doc]);
}
//指定SortType
publicintsortType(){
returnSortField.FLOAT;
}
}
publicStringtoString(){
return"Distancefrom("+x+","+y+")";
}
}
viewplaincopytoclipboardprint?
packagecom.nikee.lucene;
importjava.io.IOException;
importorg.apache.lucene.index.IndexReader;
importorg.apache.lucene.index.Term;
importorg.apache.lucene.index.TermDocs;
importorg.apache.lucene.index.TermEnum;
importorg.apache.lucene.search.ScoreDoc;
importorg.apache.lucene.search.ScoreDocComparator;
importorg.apache.lucene.search.SortComparatorSource;
importorg.apache.lucene.search.SortField;
//实现了搜索距你最近的餐馆的名字.餐馆坐标用字符串"x,y"来存储
//DistanceComparatorSource实现了SortComparatorSource接口
publicclassDistanceComparatorSourceimplementsSortComparatorSource{
privatestaticfinallongserialVersionUID=1L;
//xy用来保存坐标位置
privateintx;
privateinty;
publicDistanceComparatorSource(intx,inty){
this.x=x;
this.y=y;
}
//返回ScoreDocComparator用来实现排序功能
publicScoreDocComparatornewComparator(IndexReaderreader,Stringfieldname)throwsIOException{
returnnewDistanceScoreDocLookupComparator(reader,fieldname,x,y);
}
//DistanceScoreDocLookupComparator实现了ScoreDocComparator用来排序
privatestaticclassDistanceScoreDocLookupComparatorimplementsScoreDocComparator{
privatefloat[]distances;//保存每个餐馆到指定点的距离
//构造函数,构造函数在这里几乎完成所有的准备工作.
publicDistanceScoreDocLookupComparator(IndexReaderreader,Stringfieldname,intx,inty)throwsIOException{
System.out.println("fieldName2="+fieldname);
finalTermEnumenumerator=reader.terms(newTerm(fieldname,""));
System.out.println("maxDoc="+reader.maxDoc());
distances=newfloat[reader.maxDoc()];//初始化distances
if(distances.length>0){
TermDocstermDocs=reader.termDocs();
try{
if(enumerator.term()==null){
thrownewRuntimeException("notermsinfield"+fieldname);
}
inti=0,j=0;
do{
System.out.println("indo-while:"+i++);
Termterm=enumerator.term();//取出每一个Term
if(term.field()!=fieldname)//与给定的域不符合则比较下一个
break;
//SetsthistothedataforthecurrentterminaTermEnum.
//Thismaybeoptimizedinsomeimplementations.
termDocs.seek(enumerator);//参考TermDocsDoc
while(termDocs.next()){
System.out.println("inwhile:"+j++);
System.out.println("inwhile,Term:"+term.toString());
String[]xy=term.text().split(",");//去处xy
intdeltax=Integer.parseInt(xy[0])-x;
intdeltay=Integer.parseInt(xy[1])-y;
//计算距离
distances[termDocs.doc()]=(float)Math.sqrt(deltax*deltax+deltay*deltay);
}
}
while(enumerator.next());
}finally{
termDocs.close();
}
}
}
//有上面的构造函数的准备这里就比较简单了
publicintcompare(ScoreDoci,ScoreDocj){
if(distances[i.doc]<distances[j.doc])
return-1;
if(distances[i.doc]>distances[j.doc])
return1;
return0;
}
//返回距离
publicComparablesortValue(ScoreDoci){
returnnewFloat(distances[i.doc]);
}
//指定SortType
publicintsortType(){
returnSortField.FLOAT;
}
}
publicStringtoString(){
return"Distancefrom("+x+","+y+")";
}
}
packagecom.nikee.lucene;
importjava.io.IOException;
importorg.apache.lucene.index.IndexReader;
importorg.apache.lucene.index.Term;
importorg.apache.lucene.index.TermDocs;
importorg.apache.lucene.index.TermEnum;
importorg.apache.lucene.search.ScoreDoc;
importorg.apache.lucene.search.ScoreDocComparator;
importorg.apache.lucene.search.SortComparatorSource;
importorg.apache.lucene.search.SortField;
//实现了搜索距你最近的餐馆的名字.餐馆坐标用字符串"x,y"来存储
//DistanceComparatorSource实现了SortComparatorSource接口
publicclassDistanceComparatorSourceimplementsSortComparatorSource{
    privatestaticfinallongserialVersionUID=1L;

    //xy用来保存坐标位置
    privateintx;
    privateinty;

    publicDistanceComparatorSource(intx,inty){
        this.x=x;
        this.y=y;
    }

    //返回ScoreDocComparator用来实现排序功能
    publicScoreDocComparatornewComparator(IndexReaderreader,Stringfieldname)throwsIOException{
        returnnewDistanceScoreDocLookupComparator(reader,fieldname,x,y);
    }

    //DistanceScoreDocLookupComparator实现了ScoreDocComparator用来排序
    privatestaticclassDistanceScoreDocLookupComparatorimplementsScoreDocComparator{
        privatefloat[]distances;//保存每个餐馆到指定点的距离

        //构造函数,构造函数在这里几乎完成所有的准备工作.
        publicDistanceScoreDocLookupComparator(IndexReaderreader,Stringfieldname,intx,inty)throwsIOException{
            System.out.println("fieldName2="+fieldname);
            finalTermEnumenumerator=reader.terms(newTerm(fieldname,""));

            System.out.println("maxDoc="+reader.maxDoc());
            distances=newfloat[reader.maxDoc()];//初始化distances
            if(distances.length>0){
                TermDocstermDocs=reader.termDocs();
                try{
                    if(enumerator.term()==null){
                        thrownewRuntimeException("notermsinfield"+fieldname);
                    }
                    inti=0,j=0;
                    do{
                        System.out.println("indo-while:"+i++);
                        Termterm=enumerator.term();//取出每一个Term
                        if(term.field()!=fieldname)//与给定的域不符合则比较下一个
                            break;

                        //SetsthistothedataforthecurrentterminaTermEnum.
                        //Thismaybeoptimizedinsomeimplementations.
                        termDocs.seek(enumerator);//参考TermDocsDoc
                        while(termDocs.next()){
                            System.out.println("inwhile:"+j++);
                            System.out.println("inwhile,Term:"+term.toString());

                            String[]xy=term.text().split(",");//去处xy
                            intdeltax=Integer.parseInt(xy[0])-x;
                            intdeltay=Integer.parseInt(xy[1])-y;
                            //计算距离
                            distances[termDocs.doc()]=(float)Math.sqrt(deltax*deltax+deltay*deltay);
                        }
                    }
                    while(enumerator.next());
                }finally{
                    termDocs.close();
                }
            }
        }
        //有上面的构造函数的准备这里就比较简单了
        publicintcompare(ScoreDoci,ScoreDocj){
            if(distances[i.doc]<distances[j.doc])
                return-1;
            if(distances[i.doc]>distances[j.doc])
                return1;
            return0;
        }

        //返回距离
        publicComparablesortValue(ScoreDoci){
            returnnewFloat(distances[i.doc]);
        }

        //指定SortType
        publicintsortType(){
            returnSortField.FLOAT;
        }
    }

    publicStringtoString(){
        return"Distancefrom("+x+","+y+")";
    }
}
这是一个实现了上面两个接口的两个类,里面带有详细注释,可以看出自定义排序并不是很难的.该实现能否正确实现,我们来看看测试代码能否通过吧.
Java代码
packagecom.nikee.lucene.test;
importjava.io.IOException;
importjunit.framework.TestCase;
importorg.apache.lucene.analysis.WhitespaceAnalyzer;
importorg.apache.lucene.document.Document;
importorg.apache.lucene.document.Field;
importorg.apache.lucene.index.IndexWriter;
importorg.apache.lucene.index.Term;
importorg.apache.lucene.search.FieldDoc;
importorg.apache.lucene.search.Hits;
importorg.apache.lucene.search.IndexSearcher;
importorg.apache.lucene.search.Query;
importorg.apache.lucene.search.ScoreDoc;
importorg.apache.lucene.search.Sort;
importorg.apache.lucene.search.SortField;
importorg.apache.lucene.search.TermQuery;
importorg.apache.lucene.search.TopFieldDocs;
importorg.apache.lucene.store.RAMDirectory;
importcom.nikee.lucene.DistanceComparatorSource;
publicclassDistanceComparatorSourceTestextendsTestCase{
privateRAMDirectorydirectory;
privateIndexSearchersearcher;
privateQueryquery;
//建立测试环境
protectedvoidsetUp()throwsException{
directory=newRAMDirectory();
IndexWriterwriter=newIndexWriter(directory,newWhitespaceAnalyzer(),true);
addPoint(writer,"ElCharro","restaurant",1,2);
addPoint(writer,"CafePocaCosa","restaurant",5,9);
addPoint(writer,"LosBetos","restaurant",9,6);
addPoint(writer,"Nico"sTacoShop","restaurant",3,8);
writer.close();
searcher=newIndexSearcher(directory);
query=newTermQuery(newTerm("type","restaurant"));
}
privatevoidaddPoint(IndexWriterwriter,Stringname,Stringtype,intx,inty)throwsIOException{
Documentdoc=newDocument();
doc.add(newField("name",name,Field.Store.YES,Field.Index.TOKENIZED));
doc.add(newField("type",type,Field.Store.YES,Field.Index.TOKENIZED));
doc.add(newField("location",x+","+y,Field.Store.YES,Field.Index.UN_TOKENIZED));
writer.addDocument(doc);
}
publicvoidtestNearestRestaurantToHome()throwsException{
//使用DistanceComparatorSource来构造一个SortField
Sortsort=newSort(newSortField("location",newDistanceComparatorSource(0,0)));
Hitshits=searcher.search(query,sort);//搜索
//测试
assertEquals("closest","ElCharro",hits.doc(0).get("name"));
assertEquals("furthest","LosBetos",hits.doc(3).get("name"));
}
publicvoidtestNeareastRestaurantToWork()throwsException{
Sortsort=newSort(newSortField("location",newDistanceComparatorSource(10,10)));//工作的坐标10,10
//上面的测试实现了自定义排序,但是并不能访问自定义排序的更详细信息,利用
//TopFieldDocs可以进一步访问相关信息
TopFieldDocsdocs=searcher.search(query,null,3,sort);
assertEquals(4,docs.totalHits);
assertEquals(3,docs.scoreDocs.length);
//取得FieldDoc利用FieldDoc可以取得关于排序的更详细信息请查看FieldDocDoc
FieldDocfieldDoc=(FieldDoc)docs.scoreDocs[0];
assertEquals("(10,10)->(9,6)=sqrt(17)",newFloat(Math.sqrt(17)),fieldDoc.fields[0]);
Documentdocument=searcher.doc(fieldDoc.doc);
assertEquals("LosBetos",document.get("name"));
dumpDocs(sort,docs);//显示相关信息
}
//显示有关排序的信息
privatevoiddumpDocs(Sortsort,TopFieldDocsdocs)throwsIOException{
System.out.println("Sortedby:"+sort);
ScoreDoc[]scoreDocs=docs.scoreDocs;
for(inti=0;i<scoreDocs.length;i++){
FieldDocfieldDoc=(FieldDoc)scoreDocs[i];
Floatdistance=(Float)fieldDoc.fields[0];
Documentdoc=searcher.doc(fieldDoc.doc);
System.out.println(""+doc.get("name")+"@("+doc.get("location")+")->"+distance);
}
}
}