Home » lucene-3.0.1 » org.apache » lucene » demo » [javadoc | source]

    1   package org.apache.lucene.demo;
    2   
    3   /**
    4    * Licensed to the Apache Software Foundation (ASF) under one or more
    5    * contributor license agreements.  See the NOTICE file distributed with
    6    * this work for additional information regarding copyright ownership.
    7    * The ASF licenses this file to You under the Apache License, Version 2.0
    8    * (the "License"); you may not use this file except in compliance with
    9    * the License.  You may obtain a copy of the License at
   10    *
   11    *     http://www.apache.org/licenses/LICENSE-2.0
   12    *
   13    * Unless required by applicable law or agreed to in writing, software
   14    * distributed under the License is distributed on an "AS IS" BASIS,
   15    * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   16    * See the License for the specific language governing permissions and
   17    * limitations under the License.
   18    */
   19   
   20   import java.io.BufferedReader;
   21   import java.io.File;
   22   import java.io.FileReader;
   23   import java.io.IOException;
   24   import java.io.InputStreamReader;
   25   import java.util.Date;
   26   
   27   import org.apache.lucene.analysis.Analyzer;
   28   import org.apache.lucene.analysis.standard.StandardAnalyzer;
   29   import org.apache.lucene.document.Document;
   30   import org.apache.lucene.index.FilterIndexReader;
   31   import org.apache.lucene.index.IndexReader;
   32   import org.apache.lucene.queryParser.QueryParser;
   33   import org.apache.lucene.search.Collector;
   34   import org.apache.lucene.search.IndexSearcher;
   35   import org.apache.lucene.search.Query;
   36   import org.apache.lucene.search.ScoreDoc;
   37   import org.apache.lucene.search.Scorer;
   38   import org.apache.lucene.search.Searcher;
   39   import org.apache.lucene.search.TopScoreDocCollector;
   40   import org.apache.lucene.store.FSDirectory;
   41   import org.apache.lucene.util.Version;
   42   
   43   /** Simple command-line based search demo. */
   44   public class SearchFiles {
   45   
   46     /** Use the norms from one field for all fields.  Norms are read into memory,
   47      * using a byte of memory per document per searched field.  This can cause
   48      * search of large collections with a large number of fields to run out of
   49      * memory.  If all of the fields contain only a single token, then the norms
   50      * are all identical, then single norm vector may be shared. */
   51     private static class OneNormsReader extends FilterIndexReader {
   52       private String field;
   53   
   54       public OneNormsReader(IndexReader in, String field) {
   55         super(in);
   56         this.field = field;
   57       }
   58   
   59       @Override
   60       public byte[] norms(String field) throws IOException {
   61         return in.norms(this.field);
   62       }
   63     }
   64   
   65     private SearchFiles() {}
   66   
   67     /** Simple command-line based search demo. */
   68     public static void main(String[] args) throws Exception {
   69       String usage =
   70         "Usage:\tjava org.apache.lucene.demo.SearchFiles [-index dir] [-field f] [-repeat n] [-queries file] [-raw] [-norms field] [-paging hitsPerPage]";
   71       usage += "\n\tSpecify 'false' for hitsPerPage to use streaming instead of paging search.";
   72       if (args.length > 0 && ("-h".equals(args[0]) || "-help".equals(args[0]))) {
   73         System.out.println(usage);
   74         System.exit(0);
   75       }
   76   
   77       String index = "index";
   78       String field = "contents";
   79       String queries = null;
   80       int repeat = 0;
   81       boolean raw = false;
   82       String normsField = null;
   83       boolean paging = true;
   84       int hitsPerPage = 10;
   85       
   86       for (int i = 0; i < args.length; i++) {
   87         if ("-index".equals(args[i])) {
   88           index = args[i+1];
   89           i++;
   90         } else if ("-field".equals(args[i])) {
   91           field = args[i+1];
   92           i++;
   93         } else if ("-queries".equals(args[i])) {
   94           queries = args[i+1];
   95           i++;
   96         } else if ("-repeat".equals(args[i])) {
   97           repeat = Integer.parseInt(args[i+1]);
   98           i++;
   99         } else if ("-raw".equals(args[i])) {
  100           raw = true;
  101         } else if ("-norms".equals(args[i])) {
  102           normsField = args[i+1];
  103           i++;
  104         } else if ("-paging".equals(args[i])) {
  105           if (args[i+1].equals("false")) {
  106             paging = false;
  107           } else {
  108             hitsPerPage = Integer.parseInt(args[i+1]);
  109             if (hitsPerPage == 0) {
  110               paging = false;
  111             }
  112           }
  113           i++;
  114         }
  115       }
  116       
  117       IndexReader reader = IndexReader.open(FSDirectory.open(new File(index)), true); // only searching, so read-only=true
  118   
  119       if (normsField != null)
  120         reader = new OneNormsReader(reader, normsField);
  121   
  122       Searcher searcher = new IndexSearcher(reader);
  123       Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT);
  124   
  125       BufferedReader in = null;
  126       if (queries != null) {
  127         in = new BufferedReader(new FileReader(queries));
  128       } else {
  129         in = new BufferedReader(new InputStreamReader(System.in, "UTF-8"));
  130       }
  131       QueryParser parser = new QueryParser(Version.LUCENE_CURRENT, field, analyzer);
  132       while (true) {
  133         if (queries == null)                        // prompt the user
  134           System.out.println("Enter query: ");
  135   
  136         String line = in.readLine();
  137   
  138         if (line == null || line.length() == -1)
  139           break;
  140   
  141         line = line.trim();
  142         if (line.length() == 0)
  143           break;
  144         
  145         Query query = parser.parse(line);
  146         System.out.println("Searching for: " + query.toString(field));
  147   
  148               
  149         if (repeat > 0) {                           // repeat & time as benchmark
  150           Date start = new Date();
  151           for (int i = 0; i < repeat; i++) {
  152             searcher.search(query, null, 100);
  153           }
  154           Date end = new Date();
  155           System.out.println("Time: "+(end.getTime()-start.getTime())+"ms");
  156         }
  157   
  158         if (paging) {
  159           doPagingSearch(in, searcher, query, hitsPerPage, raw, queries == null);
  160         } else {
  161           doStreamingSearch(searcher, query);
  162         }
  163       }
  164       reader.close();
  165     }
  166     
  167     /**
  168      * This method uses a custom HitCollector implementation which simply prints out
  169      * the docId and score of every matching document. 
  170      * 
  171      *  This simulates the streaming search use case, where all hits are supposed to
  172      *  be processed, regardless of their relevance.
  173      */
  174     public static void doStreamingSearch(final Searcher searcher, Query query) throws IOException {
  175       Collector streamingHitCollector = new Collector() {
  176         private Scorer scorer;
  177         private int docBase;
  178         
  179         // simply print docId and score of every matching document
  180         @Override
  181         public void collect(int doc) throws IOException {
  182           System.out.println("doc=" + doc + docBase + " score=" + scorer.score());
  183         }
  184   
  185         @Override
  186         public boolean acceptsDocsOutOfOrder() {
  187           return true;
  188         }
  189   
  190         @Override
  191         public void setNextReader(IndexReader reader, int docBase)
  192             throws IOException {
  193           this.docBase = docBase;
  194         }
  195   
  196         @Override
  197         public void setScorer(Scorer scorer) throws IOException {
  198           this.scorer = scorer;
  199         }
  200         
  201       };
  202       
  203       searcher.search(query, streamingHitCollector);
  204     }
  205   
  206     /**
  207      * This demonstrates a typical paging search scenario, where the search engine presents 
  208      * pages of size n to the user. The user can then go to the next page if interested in
  209      * the next hits.
  210      * 
  211      * When the query is executed for the first time, then only enough results are collected
  212      * to fill 5 result pages. If the user wants to page beyond this limit, then the query
  213      * is executed another time and all hits are collected.
  214      * 
  215      */
  216     public static void doPagingSearch(BufferedReader in, Searcher searcher, Query query, 
  217                                        int hitsPerPage, boolean raw, boolean interactive) throws IOException {
  218    
  219       // Collect enough docs to show 5 pages
  220       TopScoreDocCollector collector = TopScoreDocCollector.create(
  221           5 * hitsPerPage, false);
  222       searcher.search(query, collector);
  223       ScoreDoc[] hits = collector.topDocs().scoreDocs;
  224       
  225       int numTotalHits = collector.getTotalHits();
  226       System.out.println(numTotalHits + " total matching documents");
  227   
  228       int start = 0;
  229       int end = Math.min(numTotalHits, hitsPerPage);
  230           
  231       while (true) {
  232         if (end > hits.length) {
  233           System.out.println("Only results 1 - " + hits.length +" of " + numTotalHits + " total matching documents collected.");
  234           System.out.println("Collect more (y/n) ?");
  235           String line = in.readLine();
  236           if (line.length() == 0 || line.charAt(0) == 'n') {
  237             break;
  238           }
  239   
  240           collector = TopScoreDocCollector.create(numTotalHits, false);
  241           searcher.search(query, collector);
  242           hits = collector.topDocs().scoreDocs;
  243         }
  244         
  245         end = Math.min(hits.length, start + hitsPerPage);
  246         
  247         for (int i = start; i < end; i++) {
  248           if (raw) {                              // output raw format
  249             System.out.println("doc="+hits[i].doc+" score="+hits[i].score);
  250             continue;
  251           }
  252   
  253           Document doc = searcher.doc(hits[i].doc);
  254           String path = doc.get("path");
  255           if (path != null) {
  256             System.out.println((i+1) + ". " + path);
  257             String title = doc.get("title");
  258             if (title != null) {
  259               System.out.println("   Title: " + doc.get("title"));
  260             }
  261           } else {
  262             System.out.println((i+1) + ". " + "No path for this document");
  263           }
  264                     
  265         }
  266   
  267         if (!interactive) {
  268           break;
  269         }
  270   
  271         if (numTotalHits >= end) {
  272           boolean quit = false;
  273           while (true) {
  274             System.out.print("Press ");
  275             if (start - hitsPerPage >= 0) {
  276               System.out.print("(p)revious page, ");  
  277             }
  278             if (start + hitsPerPage < numTotalHits) {
  279               System.out.print("(n)ext page, ");
  280             }
  281             System.out.println("(q)uit or enter number to jump to a page.");
  282             
  283             String line = in.readLine();
  284             if (line.length() == 0 || line.charAt(0)=='q') {
  285               quit = true;
  286               break;
  287             }
  288             if (line.charAt(0) == 'p') {
  289               start = Math.max(0, start - hitsPerPage);
  290               break;
  291             } else if (line.charAt(0) == 'n') {
  292               if (start + hitsPerPage < numTotalHits) {
  293                 start+=hitsPerPage;
  294               }
  295               break;
  296             } else {
  297               int page = Integer.parseInt(line);
  298               if ((page - 1) * hitsPerPage < numTotalHits) {
  299                 start = (page - 1) * hitsPerPage;
  300                 break;
  301               } else {
  302                 System.out.println("No such page");
  303               }
  304             }
  305           }
  306           if (quit) break;
  307           end = Math.min(numTotalHits, start + hitsPerPage);
  308         }
  309         
  310       }
  311   
  312     }
  313   }

Home » lucene-3.0.1 » org.apache » lucene » demo » [javadoc | source]