use of org.apache.hadoop.io.MapFile in project hadoop by apache.
the class MapFileOutputFormat method getRecordWriter.
public RecordWriter<WritableComparable<?>, Writable> getRecordWriter(TaskAttemptContext context) throws IOException {
Configuration conf = context.getConfiguration();
CompressionCodec codec = null;
CompressionType compressionType = CompressionType.NONE;
if (getCompressOutput(context)) {
// find the kind of compression to do
compressionType = SequenceFileOutputFormat.getOutputCompressionType(context);
// find the right codec
Class<?> codecClass = getOutputCompressorClass(context, DefaultCodec.class);
codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf);
}
Path file = getDefaultWorkFile(context, "");
FileSystem fs = file.getFileSystem(conf);
// ignore the progress parameter, since MapFile is local
final MapFile.Writer out = new MapFile.Writer(conf, fs, file.toString(), context.getOutputKeyClass().asSubclass(WritableComparable.class), context.getOutputValueClass().asSubclass(Writable.class), compressionType, codec, context);
return new RecordWriter<WritableComparable<?>, Writable>() {
public void write(WritableComparable<?> key, Writable value) throws IOException {
out.append(key, value);
}
public void close(TaskAttemptContext context) throws IOException {
out.close();
}
};
}
use of org.apache.hadoop.io.MapFile in project hadoop by apache.
the class MapFileOutputFormat method getRecordWriter.
public RecordWriter<WritableComparable, Writable> getRecordWriter(FileSystem ignored, JobConf job, String name, Progressable progress) throws IOException {
// get the path of the temporary output file
Path file = FileOutputFormat.getTaskOutputPath(job, name);
FileSystem fs = file.getFileSystem(job);
CompressionCodec codec = null;
CompressionType compressionType = CompressionType.NONE;
if (getCompressOutput(job)) {
// find the kind of compression to do
compressionType = SequenceFileOutputFormat.getOutputCompressionType(job);
// find the right codec
Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job, DefaultCodec.class);
codec = ReflectionUtils.newInstance(codecClass, job);
}
// ignore the progress parameter, since MapFile is local
final MapFile.Writer out = new MapFile.Writer(job, fs, file.toString(), job.getOutputKeyClass().asSubclass(WritableComparable.class), job.getOutputValueClass().asSubclass(Writable.class), compressionType, codec, progress);
return new RecordWriter<WritableComparable, Writable>() {
public void write(WritableComparable key, Writable value) throws IOException {
out.append(key, value);
}
public void close(Reporter reporter) throws IOException {
out.close();
}
};
}
use of org.apache.hadoop.io.MapFile in project nutch by apache.
the class TestSegmentMergerCrawlDatums method checkMergedSegment.
/**
* Checks the merged segment and removes the stuff again.
*
* @param the
* test directory
* @param the
* merged segment
* @return the final status
*/
protected byte checkMergedSegment(Path testDir, Path mergedSegment) throws Exception {
// Get a MapFile reader for the <Text,CrawlDatum> pairs
MapFile.Reader[] readers = MapFileOutputFormat.getReaders(new Path(mergedSegment, CrawlDatum.FETCH_DIR_NAME), conf);
Text key = new Text();
CrawlDatum value = new CrawlDatum();
byte finalStatus = 0x0;
for (MapFile.Reader reader : readers) {
while (reader.next(key, value)) {
LOG.info("Reading status for: " + key.toString() + " > " + CrawlDatum.getStatusName(value.getStatus()));
// Only consider fetch status
if (CrawlDatum.hasFetchStatus(value) && key.toString().equals("http://nutch.apache.org/")) {
finalStatus = value.getStatus();
}
}
// Close the reader again
reader.close();
}
// Remove the test directory again
fs.delete(testDir, true);
LOG.info("Final fetch status for: http://nutch.apache.org/ > " + CrawlDatum.getStatusName(finalStatus));
// Return the final status
return finalStatus;
}
Aggregations