use of org.apache.hadoop.mapreduce.lib.input.FileSplit in project ignite by apache.
the class HadoopSplitWrapperSelfTest method testSerialization.
/**
* Tests serialization of wrapper and the wrapped native split.
* @throws Exception If fails.
*/
public void testSerialization() throws Exception {
FileSplit nativeSplit = new FileSplit(new Path("/path/to/file"), 100, 500, new String[] { "host1", "host2" });
assertEquals("/path/to/file:100+500", nativeSplit.toString());
HadoopSplitWrapper split = HadoopUtils.wrapSplit(10, nativeSplit, nativeSplit.getLocations());
assertEquals("[host1, host2]", Arrays.toString(split.hosts()));
ByteArrayOutputStream buf = new ByteArrayOutputStream();
ObjectOutput out = new ObjectOutputStream(buf);
out.writeObject(split);
ObjectInput in = new ObjectInputStream(new ByteArrayInputStream(buf.toByteArray()));
final HadoopSplitWrapper res = (HadoopSplitWrapper) in.readObject();
assertEquals("/path/to/file:100+500", HadoopUtils.unwrapSplit(res).toString());
GridTestUtils.assertThrows(log, new Callable<Object>() {
@Override
public Object call() throws Exception {
res.hosts();
return null;
}
}, AssertionError.class, null);
}
use of org.apache.hadoop.mapreduce.lib.input.FileSplit in project ignite by apache.
the class HadoopV2Context method getInputSplit.
/** {@inheritDoc} */
@Override
public InputSplit getInputSplit() {
if (inputSplit == null) {
HadoopInputSplit split = ctx.taskInfo().inputSplit();
if (split == null)
return null;
if (split instanceof HadoopFileBlock) {
HadoopFileBlock fileBlock = (HadoopFileBlock) split;
inputSplit = new FileSplit(new Path(fileBlock.file()), fileBlock.start(), fileBlock.length(), null);
} else {
try {
inputSplit = (InputSplit) ((HadoopV2TaskContext) ctx).getNativeSplit(split);
} catch (IgniteCheckedException e) {
throw new IllegalStateException(e);
}
}
}
return inputSplit;
}
use of org.apache.hadoop.mapreduce.lib.input.FileSplit in project jena by apache.
the class AbstractRdfReader method initialize.
@Override
public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException, InterruptedException {
LOG.debug("initialize({}, {})", genericSplit, context);
// Assuming file split
if (!(genericSplit instanceof FileSplit))
throw new IOException("This record reader only supports FileSplit inputs");
// Find RDF language
FileSplit split = (FileSplit) genericSplit;
Path path = split.getPath();
Lang lang = RDFLanguages.filenameToLang(path.getName());
if (lang == null)
throw new IOException("There is no registered RDF language for the input file " + path.toString());
// Select the record reader and initialize
this.reader = this.selectRecordReader(lang);
this.reader.initialize(split, context);
}
use of org.apache.hadoop.mapreduce.lib.input.FileSplit in project jena by apache.
the class AbstractWholeFileNodeTupleReader method initialize.
@Override
public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException {
LOG.debug("initialize({}, {})", genericSplit, context);
// Assuming file split
if (!(genericSplit instanceof FileSplit))
throw new IOException("This record reader only supports FileSplit inputs");
FileSplit split = (FileSplit) genericSplit;
// Configuration
Configuration config = context.getConfiguration();
this.ignoreBadTuples = config.getBoolean(RdfIOConstants.INPUT_IGNORE_BAD_TUPLES, true);
if (this.ignoreBadTuples)
LOG.warn("Configured to ignore bad tuples, parsing errors will be logged and further parsing aborted but no user visible errors will be thrown. Consider setting {} to false to disable this behaviour", RdfIOConstants.INPUT_IGNORE_BAD_TUPLES);
// Figure out what portion of the file to read
if (split.getStart() > 0)
throw new IOException("This record reader requires a file split which covers the entire file");
final Path file = split.getPath();
long totalLength = file.getFileSystem(context.getConfiguration()).getFileStatus(file).getLen();
CompressionCodecFactory factory = new CompressionCodecFactory(config);
this.compressionCodecs = factory.getCodec(file);
LOG.info(String.format("Got split with start %d and length %d for file with total length of %d", new Object[] { split.getStart(), split.getLength(), totalLength }));
if (totalLength > split.getLength())
throw new IOException("This record reader requires a file split which covers the entire file");
// Open the file and prepare the input stream
FileSystem fs = file.getFileSystem(config);
FSDataInputStream fileIn = fs.open(file);
this.length = split.getLength();
if (this.compressionCodecs != null) {
// Compressed input
input = new TrackedInputStream(this.compressionCodecs.createInputStream(fileIn));
} else {
// Uncompressed input
input = new TrackedInputStream(fileIn);
}
// Set up background thread for parser
iter = this.getPipedIterator();
this.stream = this.getPipedStream(iter, this.input);
RDFParserBuilder builder = RdfIOUtils.createRDFParserBuilder(context, file);
Runnable parserRunnable = this.createRunnable(this, this.input, stream, this.getRdfLanguage(), builder);
this.parserThread = new Thread(parserRunnable);
this.parserThread.setDaemon(true);
this.parserThread.start();
}
Aggregations