| Method from org.apache.nutch.searcher.NutchBean Detail: |
public void close() throws IOException {
if (searchBean != null) { searchBean.close(); }
if (segmentBean != null) { segmentBean.close(); }
if (linkDb != null) { linkDb.close(); }
if (fs != null) { fs.close(); }
}
|
public static NutchBean get(ServletContext app,
Configuration conf) throws IOException {
final NutchBean bean = (NutchBean)app.getAttribute(KEY);
return bean;
}
Returns the cached instance in the servlet context. |
public String[] getAnchors(HitDetails hit) throws IOException {
return linkDb.getAnchors(hit);
}
|
public byte[] getContent(HitDetails hit) throws IOException {
return segmentBean.getContent(hit);
}
|
public HitDetails getDetails(Hit hit) throws IOException {
return searchBean.getDetails(hit);
}
|
public HitDetails[] getDetails(Hit[] hits) throws IOException {
return searchBean.getDetails(hits);
}
|
public String getExplanation(Query query,
Hit hit) throws IOException {
return searchBean.getExplanation(query, hit);
}
|
public long getFetchDate(HitDetails hit) throws IOException {
return segmentBean.getFetchDate(hit);
}
|
public Inlinks getInlinks(HitDetails hit) throws IOException {
return linkDb.getInlinks(hit);
}
|
public ParseData getParseData(HitDetails hit) throws IOException {
return segmentBean.getParseData(hit);
}
|
public ParseText getParseText(HitDetails hit) throws IOException {
return segmentBean.getParseText(hit);
}
|
public long getProtocolVersion(String className,
long clientVersion) throws IOException {
if(RPCSearchBean.class.getName().equals(className) &&
searchBean instanceof RPCSearchBean) {
final RPCSearchBean rpcBean = (RPCSearchBean)searchBean;
return rpcBean.getProtocolVersion(className, clientVersion);
} else if (RPCSegmentBean.class.getName().equals(className) &&
segmentBean instanceof RPCSegmentBean) {
final RPCSegmentBean rpcBean = (RPCSegmentBean)segmentBean;
return rpcBean.getProtocolVersion(className, clientVersion);
} else {
throw new IOException("Unknown Protocol classname:" + className);
}
}
|
public String[] getSegmentNames() throws IOException {
return segmentBean.getSegmentNames();
}
|
public Summary getSummary(HitDetails hit,
Query query) throws IOException {
return segmentBean.getSummary(hit, query);
}
|
public Summary[] getSummary(HitDetails[] hits,
Query query) throws IOException {
return segmentBean.getSummary(hits, query);
}
|
public static void main(String[] args) throws Exception {
final String usage = "NutchBean query";
if (args.length == 0) {
System.err.println(usage);
System.exit(-1);
}
final Configuration conf = NutchConfiguration.create();
final NutchBean bean = new NutchBean(conf);
try {
final Query query = Query.parse(args[0], conf);
final Hits hits = bean.search(query, 10);
System.out.println("Total hits: " + hits.getTotal());
final int length = (int)Math.min(hits.getTotal(), 10);
final Hit[] show = hits.getHits(0, length);
final HitDetails[] details = bean.getDetails(show);
final Summary[] summaries = bean.getSummary(details, query);
for (int i = 0; i < hits.getLength(); i++) {
System.out.println(" " + i + " " + details[i] + "\n" + summaries[i]);
}
} catch (Throwable t) {
LOG.error("Exception occured while executing search: " + t, t);
System.exit(1);
}
System.exit(0);
}
|
public boolean ping() {
return true;
}
|
public static List readAddresses(Path path,
Configuration conf) throws IOException {
final List< InetSocketAddress > addrs = new ArrayList< InetSocketAddress >();
for (final String line : readConfig(path, conf)) {
final StringTokenizer tokens = new StringTokenizer(line);
if (tokens.hasMoreTokens()) {
final String host = tokens.nextToken();
if (tokens.hasMoreTokens()) {
final String port = tokens.nextToken();
addrs.add(new InetSocketAddress(host, Integer.parseInt(port)));
}
}
}
return addrs;
}
|
public static List readConfig(Path path,
Configuration conf) throws IOException {
final FileSystem fs = FileSystem.get(conf);
final BufferedReader reader =
new BufferedReader(new InputStreamReader(fs.open(path)));
try {
final ArrayList< String > addrs = new ArrayList< String >();
String line;
while ((line = reader.readLine()) != null) {
addrs.add(line);
}
return addrs;
} finally {
reader.close();
}
}
|
public Hits search(Query query,
int numHits) throws IOException {
return search(query, numHits, null, null, false);
}
|
public Hits search(Query query,
int numHits,
int maxHitsPerDup) throws IOException {
return search(query, numHits, maxHitsPerDup, "site", null, false);
}
|
public Hits search(Query query,
int numHits,
int maxHitsPerDup,
String dedupField) throws IOException {
return search(query, numHits, maxHitsPerDup, dedupField, null, false);
}
|
public Hits search(Query query,
int numHits,
String dedupField,
String sortField,
boolean reverse) throws IOException {
return searchBean.search(query, numHits, dedupField, sortField, reverse);
}
|
public Hits search(Query query,
int numHits,
int maxHitsPerDup,
String dedupField,
String sortField,
boolean reverse) throws IOException {
if (maxHitsPerDup < = 0) // disable dup checking
return search(query, numHits, dedupField, sortField, reverse);
final float rawHitsFactor = this.conf.getFloat("searcher.hostgrouping.rawhits.factor", 2.0f);
int numHitsRaw = (int)(numHits * rawHitsFactor);
if (LOG.isInfoEnabled()) {
LOG.info("searching for "+numHitsRaw+" raw hits");
}
Hits hits = searchBean.search(query, numHitsRaw,
dedupField, sortField, reverse);
final long total = hits.getTotal();
final Map< String, DupHits > dupToHits = new HashMap< String, DupHits >();
final List< Hit > resultList = new ArrayList< Hit >();
final Set< Hit > seen = new HashSet< Hit >();
final List< String > excludedValues = new ArrayList< String >();
boolean totalIsExact = true;
for (int rawHitNum = 0; rawHitNum < hits.getTotal(); rawHitNum++) {
// get the next raw hit
if (rawHitNum >= hits.getLength()) {
// optimize query by prohibiting more matches on some excluded values
final Query optQuery = (Query)query.clone();
for (int i = 0; i < excludedValues.size(); i++) {
if (i == MAX_PROHIBITED_TERMS)
break;
optQuery.addProhibitedTerm(excludedValues.get(i),
dedupField);
}
numHitsRaw = (int)(numHitsRaw * rawHitsFactor);
if (LOG.isInfoEnabled()) {
LOG.info("re-searching for "+numHitsRaw+" raw hits, query: "+optQuery);
}
hits = searchBean.search(optQuery, numHitsRaw,
dedupField, sortField, reverse);
if (LOG.isInfoEnabled()) {
LOG.info("found "+hits.getTotal()+" raw hits");
}
rawHitNum = -1;
continue;
}
final Hit hit = hits.getHit(rawHitNum);
if (seen.contains(hit))
continue;
seen.add(hit);
// get dup hits for its value
final String value = hit.getDedupValue();
DupHits dupHits = dupToHits.get(value);
if (dupHits == null)
dupToHits.put(value, dupHits = new DupHits());
// does this hit exceed maxHitsPerDup?
if (dupHits.size() == maxHitsPerDup) { // yes -- ignore the hit
if (!dupHits.maxSizeExceeded) {
// mark prior hits with moreFromDupExcluded
for (int i = 0; i < dupHits.size(); i++) {
dupHits.get(i).setMoreFromDupExcluded(true);
}
dupHits.maxSizeExceeded = true;
excludedValues.add(value); // exclude dup
}
totalIsExact = false;
} else { // no -- collect the hit
resultList.add(hit);
dupHits.add(hit);
// are we done?
// we need to find one more than asked for, so that we can tell if
// there are more hits to be shown
if (resultList.size() > numHits)
break;
}
}
final Hits results =
new Hits(total,
resultList.toArray(new Hit[resultList.size()]));
results.setTotalIsExact(totalIsExact);
return results;
}
|