lucene实现自定义的评分



lucene实现自定义的评分
  1. public class MyScoreQuery1{  
  2.       
  3.     public void searchByScoreQuery() throws Exception{  
  4.         IndexSearcher searcher = DocUtil.getSearcher();  
  5.         Query query = new TermQuery(new Term("content","java"));  
  6.           
  7.         //1、创建评分域,如果Type是String类型,那么是Type.BYTE  
  8.         //该域必须是数值型的,并且不能使用norms索引,以及每个文档中该域只能由一个语汇  
  9.         //单元,通常可用Field.Index.not_analyzer_no_norms来进行创建索引  
  10.         FieldScoreQuery fieldScoreQuery = new FieldScoreQuery("size",Type.INT);  
  11.         //2、根据评分域和原有的Query创建自定义的Query对象  
  12.         //query是原有的query,fieldScoreQuery是专门做评分的query  
  13.         MyCustomScoreQuery customQuery = new MyCustomScoreQuery(query, fieldScoreQuery);  
  14.           
  15.         TopDocs topdoc = searcher.search(customQuery, 100);  
  16.         DocUtil.printDocument(topdoc, searcher);  
  17.         searcher.close();  
  18.           
  19.     }  
  20.       
  21.     @SuppressWarnings("serial")  
  22.     private class MyCustomScoreQuery extends CustomScoreQuery{  
  23.   
  24.         public MyCustomScoreQuery(Query subQuery, ValueSourceQuery valSrcQuery) {  
  25.             super(subQuery, valSrcQuery);  
  26.         }  
  27.           
  28.         /** 
  29.          * 这里的reader是针对段的,意思是如果索引包含的段不止一个,那么搜索期间会多次调用 
  30.          * 这个方法,强调这点是重要的,因为它使你的评分逻辑能够有效使用段reader来对域缓存 
  31.          * 中的值进行检索 
  32.          */  
  33.         @Override  
  34.         protected CustomScoreProvider getCustomScoreProvider(IndexReader reader)  
  35.                 throws IOException {  
  36.             //默认情况实现的评分是通过原有的评分*传入进来的评分域所获取的评分来确定最终打分的  
  37.             //为了根据不同的需求进行评分,需要自己进行评分的设定  
  38.             /** 
  39.              * 自定评分的步骤 
  40.              * 创建一个类继承于CustomScoreProvider 
  41.              * 覆盖customScore方法 
  42.              */  
  43. //          return super.getCustomScoreProvider(reader);  
  44.             return new MyCustomScoreProvider(reader);  
  45.         }  
  46.           
  47.           
  48.     }  
  49.       
  50.     private class MyCustomScoreProvider extends CustomScoreProvider{  
  51.   
  52.         public MyCustomScoreProvider(IndexReader reader) {  
  53.             super(reader);  
  54.         }  
  55.           
  56.         /** 
  57.          * subQueryScore表示默认文档的打分 
  58.          * valSrcScore表示的评分域的打分 
  59.          * 默认是subQueryScore*valSrcScore返回的 
  60.          */  
  61.         @Override  
  62.         public float customScore(int doc, float subQueryScore, float valSrcScore)throws IOException {  
  63.             System.out.println("Doc:"+doc);  
  64.             System.out.println("subQueryScore:"+subQueryScore);  
  65.             System.out.println("valSrcScore:"+valSrcScore);  
  66. //          return super.customScore(doc, subQueryScore, valSrcScore);  
  67.             return subQueryScore / valSrcScore;  
  68.         }  
  69.           
  70.     }  
  71. }  

  1. public class MyScoreQuery2 {  
  2.     public void searchByFileScoreQuery() throws Exception{  
  3.         IndexSearcher searcher = DocUtil.getSearcher();  
  4.         Query query = new TermQuery(new Term("content","java"));  
  5.           
  6.         FilenameScoreQuery fieldScoreQuery = new FilenameScoreQuery(query);  
  7.           
  8.         TopDocs topdoc = searcher.search(fieldScoreQuery, 100);  
  9.         DocUtil.printDocument(topdoc, searcher);  
  10.         searcher.close();  
  11.           
  12.     }  
  13.       
  14.     @SuppressWarnings("serial")  
  15.     private class FilenameScoreQuery extends CustomScoreQuery{  
  16.   
  17.         public FilenameScoreQuery(Query subQuery) {  
  18.             super(subQuery);  
  19.         }  
  20.   
  21.         @Override  
  22.         protected CustomScoreProvider getCustomScoreProvider(IndexReader reader)  
  23.                 throws IOException {  
  24. //          return super.getCustomScoreProvider(reader);  
  25.             return new FilenameScoreProvider(reader);  
  26.         }  
  27.     }  
  28.       
  29.     private class FilenameScoreProvider extends CustomScoreProvider{  
  30.         String[] filenames = null;  
  31.         public FilenameScoreProvider(IndexReader reader) {  
  32.             super(reader);  
  33.             try {  
  34.                 filenames = FieldCache.DEFAULT.getStrings(reader, "filename");  
  35.             } catch (IOException e) {e.printStackTrace();}  
  36.         }  
  37.   
  38.         //如何根据doc获取相应的field的值  
  39.         /* 
  40.          * 在reader没有关闭之前,所有的数据会存储要一个域缓存中,可以通过域缓存获取很多有用 
  41.          * 的信息filenames = FieldCache.DEFAULT.getStrings(reader, "filename");可以获取 
  42.          * 所有的filename域的信息 
  43.          */  
  44.         @Override  
  45.         public float customScore(int doc, float subQueryScore, float valSrcScore)  
  46.                 throws IOException {  
  47.             String fileName = filenames[doc];  
  48.             System.out.println(doc+":"+fileName);  
  49. //          return super.customScore(doc, subQueryScore, valSrcScore);  
  50.             if("9.txt".equals(fileName) || "4.txt".equals(fileName)) {  
  51.                 return subQueryScore*1.5f;  
  52.             }  
  53.             return subQueryScore/1.5f;  
  54.         }  
  55.           
  56.     }  
  57. }  
Please read full article from lucene实现自定义的评分

No comments:

Post a Comment

Labels

Algorithm (219) Lucene (130) LeetCode (97) Database (36) Data Structure (33) text mining (28) Solr (27) java (27) Mathematical Algorithm (26) Difficult Algorithm (25) Logic Thinking (23) Puzzles (23) Bit Algorithms (22) Math (21) List (20) Dynamic Programming (19) Linux (19) Tree (18) Machine Learning (15) EPI (11) Queue (11) Smart Algorithm (11) Operating System (9) Java Basic (8) Recursive Algorithm (8) Stack (8) Eclipse (7) Scala (7) Tika (7) J2EE (6) Monitoring (6) Trie (6) Concurrency (5) Geometry Algorithm (5) Greedy Algorithm (5) Mahout (5) MySQL (5) xpost (5) C (4) Interview (4) Vi (4) regular expression (4) to-do (4) C++ (3) Chrome (3) Divide and Conquer (3) Graph Algorithm (3) Permutation (3) Powershell (3) Random (3) Segment Tree (3) UIMA (3) Union-Find (3) Video (3) Virtualization (3) Windows (3) XML (3) Advanced Data Structure (2) Android (2) Bash (2) Classic Algorithm (2) Debugging (2) Design Pattern (2) Google (2) Hadoop (2) Java Collections (2) Markov Chains (2) Probabilities (2) Shell (2) Site (2) Web Development (2) Workplace (2) angularjs (2) .Net (1) Amazon Interview (1) Android Studio (1) Array (1) Boilerpipe (1) Book Notes (1) ChromeOS (1) Chromebook (1) Codility (1) Desgin (1) Design (1) Divide and Conqure (1) GAE (1) Google Interview (1) Great Stuff (1) Hash (1) High Tech Companies (1) Improving (1) LifeTips (1) Maven (1) Network (1) Performance (1) Programming (1) Resources (1) Sampling (1) Sed (1) Smart Thinking (1) Sort (1) Spark (1) Stanford NLP (1) System Design (1) Trove (1) VIP (1) tools (1)

Popular Posts