use of org.apache.hadoop.mapreduce.RecordReader in project incubator-rya by apache.
the class StatementPatternStorageTest method createStorages.
protected List<StatementPatternStorage> createStorages(String location) throws IOException, InterruptedException {
List<StatementPatternStorage> storages = new ArrayList<StatementPatternStorage>();
StatementPatternStorage storage = new StatementPatternStorage();
InputFormat inputFormat = storage.getInputFormat();
Job job = new Job(new Configuration());
storage.setLocation(location, job);
List<InputSplit> splits = inputFormat.getSplits(job);
assertNotNull(splits);
for (InputSplit inputSplit : splits) {
storage = new StatementPatternStorage();
job = new Job(new Configuration());
storage.setLocation(location, job);
TaskAttemptContext taskAttemptContext = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID("jtid", 0, false, 0, 0));
RecordReader recordReader = inputFormat.createRecordReader(inputSplit, taskAttemptContext);
recordReader.initialize(inputSplit, taskAttemptContext);
storage.prepareToRead(recordReader, null);
storages.add(storage);
}
return storages;
}
use of org.apache.hadoop.mapreduce.RecordReader in project incubator-rya by apache.
the class GraphXEdgeInputFormatTest method testInputFormat.
@SuppressWarnings("rawtypes")
@Test
public void testInputFormat() throws Exception {
RyaStatement input = RyaStatement.builder().setSubject(new RyaURI("http://www.google.com")).setPredicate(new RyaURI("http://some_other_uri")).setObject(new RyaURI("http://www.yahoo.com")).setColumnVisibility(new byte[0]).setValue(new byte[0]).build();
apiImpl.add(input);
Job jobConf = Job.getInstance();
GraphXEdgeInputFormat.setMockInstance(jobConf, instance.getInstanceName());
GraphXEdgeInputFormat.setConnectorInfo(jobConf, username, password);
GraphXEdgeInputFormat.setTableLayout(jobConf, TABLE_LAYOUT.SPO);
GraphXEdgeInputFormat.setInputTableName(jobConf, table);
GraphXEdgeInputFormat.setInputTableName(jobConf, table);
GraphXEdgeInputFormat.setScanIsolation(jobConf, false);
GraphXEdgeInputFormat.setLocalIterators(jobConf, false);
GraphXEdgeInputFormat.setOfflineTableScan(jobConf, false);
GraphXEdgeInputFormat inputFormat = new GraphXEdgeInputFormat();
JobContext context = new JobContextImpl(jobConf.getConfiguration(), jobConf.getJobID());
List<InputSplit> splits = inputFormat.getSplits(context);
Assert.assertEquals(1, splits.size());
TaskAttemptContext taskAttemptContext = new TaskAttemptContextImpl(context.getConfiguration(), new TaskAttemptID(new TaskID(), 1));
RecordReader reader = inputFormat.createRecordReader(splits.get(0), taskAttemptContext);
RecordReader ryaStatementRecordReader = (RecordReader) reader;
ryaStatementRecordReader.initialize(splits.get(0), taskAttemptContext);
List<Edge> results = new ArrayList<Edge>();
while (ryaStatementRecordReader.nextKeyValue()) {
Edge writable = (Edge) ryaStatementRecordReader.getCurrentValue();
long srcId = writable.srcId();
long destId = writable.dstId();
RyaTypeWritable rtw = null;
Object text = ryaStatementRecordReader.getCurrentKey();
Edge<RyaTypeWritable> edge = new Edge<RyaTypeWritable>(srcId, destId, rtw);
results.add(edge);
System.out.println(text);
}
System.out.println(results.size());
System.out.println(results);
Assert.assertTrue(results.size() == 2);
}
use of org.apache.hadoop.mapreduce.RecordReader in project incubator-rya by apache.
the class AccumuloHDFSFileInputFormat method createRecordReader.
@Override
public RecordReader<Key, Value> createRecordReader(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
return new RecordReader<Key, Value>() {
private FileSKVIterator fileSKVIterator;
private boolean started = false;
@Override
public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
FileSplit split = (FileSplit) inputSplit;
Configuration job = taskAttemptContext.getConfiguration();
Path file = split.getPath();
FileSystem fs = file.getFileSystem(job);
Instance instance = MRUtils.AccumuloProps.getInstance(taskAttemptContext);
fileSKVIterator = RFileOperations.getInstance().openReader(file.toString(), ALLRANGE, new HashSet<ByteSequence>(), false, fs, job, instance.getConfiguration());
}
@Override
public boolean nextKeyValue() throws IOException, InterruptedException {
if (started) {
fileSKVIterator.next();
} else {
// don't move past the first record yet
started = true;
}
return fileSKVIterator.hasTop();
}
@Override
public Key getCurrentKey() throws IOException, InterruptedException {
return fileSKVIterator.getTopKey();
}
@Override
public Value getCurrentValue() throws IOException, InterruptedException {
return fileSKVIterator.getTopValue();
}
@Override
public float getProgress() throws IOException, InterruptedException {
return 0;
}
@Override
public void close() throws IOException {
}
};
}
use of org.apache.hadoop.mapreduce.RecordReader in project carbondata by apache.
the class DistributableDataMapFormat method createRecordReader.
@Override
public RecordReader<Void, ExtendedBlocklet> createRecordReader(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
return new RecordReader<Void, ExtendedBlocklet>() {
private Iterator<ExtendedBlocklet> blockletIterator;
private ExtendedBlocklet currBlocklet;
@Override
public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
DataMapDistributableWrapper distributable = (DataMapDistributableWrapper) inputSplit;
TableDataMap dataMap = DataMapStoreManager.getInstance().getDataMap(table, distributable.getDistributable().getDataMapSchema());
List<ExtendedBlocklet> blocklets = dataMap.prune(distributable.getDistributable(), dataMapExprWrapper.getFilterResolverIntf(distributable.getUniqueId()), partitions);
for (ExtendedBlocklet blocklet : blocklets) {
blocklet.setDataMapUniqueId(distributable.getUniqueId());
}
blockletIterator = blocklets.iterator();
}
@Override
public boolean nextKeyValue() throws IOException, InterruptedException {
boolean hasNext = blockletIterator.hasNext();
if (hasNext) {
currBlocklet = blockletIterator.next();
}
return hasNext;
}
@Override
public Void getCurrentKey() throws IOException, InterruptedException {
return null;
}
@Override
public ExtendedBlocklet getCurrentValue() throws IOException, InterruptedException {
return currBlocklet;
}
@Override
public float getProgress() throws IOException, InterruptedException {
return 0;
}
@Override
public void close() throws IOException {
}
};
}
use of org.apache.hadoop.mapreduce.RecordReader in project hbase by apache.
the class TableInputFormatBase method createRecordReader.
/**
* Builds a {@link TableRecordReader}. If no {@link TableRecordReader} was provided, uses
* the default.
*
* @param split The split to work with.
* @param context The current context.
* @return The newly created record reader.
* @throws IOException When creating the reader fails.
* @see org.apache.hadoop.mapreduce.InputFormat#createRecordReader(
* org.apache.hadoop.mapreduce.InputSplit,
* org.apache.hadoop.mapreduce.TaskAttemptContext)
*/
@Override
public RecordReader<ImmutableBytesWritable, Result> createRecordReader(InputSplit split, TaskAttemptContext context) throws IOException {
// Just in case a subclass is relying on JobConfigurable magic.
if (table == null) {
initialize(context);
}
// null check in case our child overrides getTable to not throw.
try {
if (getTable() == null) {
// initialize() must not have been implemented in the subclass.
throw new IOException(INITIALIZATION_ERROR);
}
} catch (IllegalStateException exception) {
throw new IOException(INITIALIZATION_ERROR, exception);
}
TableSplit tSplit = (TableSplit) split;
LOG.info("Input split length: " + StringUtils.humanReadableInt(tSplit.getLength()) + " bytes.");
final TableRecordReader trr = this.tableRecordReader != null ? this.tableRecordReader : new TableRecordReader();
Scan sc = new Scan(this.scan);
sc.withStartRow(tSplit.getStartRow());
sc.withStopRow(tSplit.getEndRow());
trr.setScan(sc);
trr.setTable(getTable());
return new RecordReader<ImmutableBytesWritable, Result>() {
@Override
public void close() throws IOException {
trr.close();
closeTable();
}
@Override
public ImmutableBytesWritable getCurrentKey() throws IOException, InterruptedException {
return trr.getCurrentKey();
}
@Override
public Result getCurrentValue() throws IOException, InterruptedException {
return trr.getCurrentValue();
}
@Override
public float getProgress() throws IOException, InterruptedException {
return trr.getProgress();
}
@Override
public void initialize(InputSplit inputsplit, TaskAttemptContext context) throws IOException, InterruptedException {
trr.initialize(inputsplit, context);
}
@Override
public boolean nextKeyValue() throws IOException, InterruptedException {
return trr.nextKeyValue();
}
};
}
Aggregations