use of org.apache.nutch.parse.ParseResult in project nutch by apache.
the class ZipParser method main.
public static void main(String[] args) throws IOException {
if (args.length < 1) {
System.out.println("ZipParser <zip_file>");
System.exit(1);
}
File file = new File(args[0]);
String url = "file:" + file.getCanonicalPath();
FileInputStream in = new FileInputStream(file);
byte[] bytes = new byte[in.available()];
in.read(bytes);
in.close();
Configuration conf = NutchConfiguration.create();
ZipParser parser = new ZipParser();
parser.setConf(conf);
Metadata meta = new Metadata();
meta.add(Response.CONTENT_LENGTH, "" + file.length());
ParseResult parseResult = parser.getParse(new Content(url, url, bytes, "application/zip", meta, conf));
Parse p = parseResult.get(url);
System.out.println(parseResult.size());
System.out.println("Parse Text:");
System.out.println(p.getText());
System.out.println("Parse Data:");
System.out.println(p.getData());
}
Aggregations