1 package org.apache.lucene.demo; 2 3 /** 4 * Licensed to the Apache Software Foundation (ASF) under one or more 5 * contributor license agreements. See the NOTICE file distributed with 6 * this work for additional information regarding copyright ownership. 7 * The ASF licenses this file to You under the Apache License, Version 2.0 8 * (the "License"); you may not use this file except in compliance with 9 * the License. You may obtain a copy of the License at 10 * 11 * http://www.apache.org/licenses/LICENSE-2.0 12 * 13 * Unless required by applicable law or agreed to in writing, software 14 * distributed under the License is distributed on an "AS IS" BASIS, 15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 * See the License for the specific language governing permissions and 17 * limitations under the License. 18 */ 19 20 import java.io.File; 21 import java.io.FileReader; 22 23 import org.apache.lucene.document.DateTools; 24 import org.apache.lucene.document.Document; 25 import org.apache.lucene.document.Field; 26 27 /** A utility for making Lucene Documents from a File. */ 28 29 public class FileDocument { 30 /** Makes a document for a File. 31 <p> 32 The document has three fields: 33 <ul> 34 <li><code>path</code>--containing the pathname of the file, as a stored, 35 untokenized field; 36 <li><code>modified</code>--containing the last modified date of the file as 37 a field as created by <a 38 href="lucene.document.DateTools.html">DateTools</a>; and 39 <li><code>contents</code>--containing the full contents of the file, as a 40 Reader field; 41 */ 42 public static Document Document(File f) 43 throws java.io.FileNotFoundException { 44 45 // make a new, empty document 46 Document doc = new Document(); 47 48 // Add the path of the file as a field named "path". Use a field that is 49 // indexed (i.e. searchable), but don't tokenize the field into words. 50 doc.add(new Field("path", f.getPath(), Field.Store.YES, Field.Index.NOT_ANALYZED)); 51 52 // Add the last modified date of the file a field named "modified". Use 53 // a field that is indexed (i.e. searchable), but don't tokenize the field 54 // into words. 55 doc.add(new Field("modified", 56 DateTools.timeToString(f.lastModified(), DateTools.Resolution.MINUTE), 57 Field.Store.YES, Field.Index.NOT_ANALYZED)); 58 59 // Add the contents of the file to a field named "contents". Specify a Reader, 60 // so that the text of the file is tokenized and indexed, but not stored. 61 // Note that FileReader expects the file to be in the system's default encoding. 62 // If that's not the case searching for special characters will fail. 63 doc.add(new Field("contents", new FileReader(f))); 64 65 // return the document 66 return doc; 67 } 68 69 private FileDocument() {} 70 } 71