use of io.cdap.cdap.api.data.batch.Split in project cdap by caskdata.
the class DatasetInputFormat method getSplits.
@Override
public InputSplit[] getSplits(JobConf jobConf, int numSplits) throws IOException {
try (DatasetAccessor datasetAccessor = new DatasetAccessor(jobConf)) {
try {
datasetAccessor.initialize();
} catch (Exception e) {
throw new IOException("Could not get dataset", e);
}
try (RecordScannable recordScannable = datasetAccessor.getDataset()) {
Job job = new Job(jobConf);
JobContext jobContext = ShimLoader.getHadoopShims().newJobContext(job);
Path[] tablePaths = FileInputFormat.getInputPaths(jobContext);
List<Split> dsSplits = recordScannable.getSplits();
InputSplit[] inputSplits = new InputSplit[dsSplits.size()];
for (int i = 0; i < dsSplits.size(); i++) {
inputSplits[i] = new DatasetInputSplit(dsSplits.get(i), tablePaths[0]);
}
return inputSplits;
}
}
}
use of io.cdap.cdap.api.data.batch.Split in project cdap by caskdata.
the class BufferingTable method getSplits.
/**
* Fallback implementation of getSplits, {@link SplitsUtil#primitiveGetSplits(int, byte[], byte[])}.
* Ideally should be overridden by subclasses.
*
* @param numSplits Desired number of splits. If greater than zero, at most this many splits will be returned.
* If less or equal to zero, any number of splits can be returned.
* @param start If non-null, the returned splits will only cover keys that are greater or equal.
* @param stop If non-null, the returned splits will only cover keys that are less.
* @return list of {@link Split}
*/
@Override
public List<Split> getSplits(int numSplits, byte[] start, byte[] stop) {
ensureTransactionIsStarted();
List<KeyRange> keyRanges = SplitsUtil.primitiveGetSplits(numSplits, start, stop);
return Lists.transform(keyRanges, new Function<KeyRange, Split>() {
@Nullable
@Override
public Split apply(@Nullable KeyRange input) {
return new TableSplit(input == null ? null : input.getStart(), input == null ? null : input.getStop());
}
});
}
use of io.cdap.cdap.api.data.batch.Split in project cdap by caskdata.
the class DatasetInputFormatProvider method createBatchReadableConfiguration.
private Map<String, String> createBatchReadableConfiguration() {
List<Split> splits = this.splits;
if (splits == null) {
splits = ((BatchReadable<?, ?>) dataset).getSplits();
}
Configuration hConf = new Configuration();
hConf.clear();
try {
AbstractBatchReadableInputFormat.setDatasetSplits(hConf, datasetNamespace, datasetName, datasetArgs, splits);
return ConfigurationUtil.toMap(hConf);
} catch (IOException e) {
throw new IllegalArgumentException(e);
}
}
use of io.cdap.cdap.api.data.batch.Split in project cdap by caskdata.
the class ObjectStoreDatasetTest method testBatchCustomList.
@Test
public void testBatchCustomList() throws Exception {
DatasetId customlist = DatasetFrameworkTestUtil.NAMESPACE_ID.dataset("customlist");
createObjectStoreInstance(customlist, new TypeToken<List<Custom>>() {
}.getType());
final ObjectStoreDataset<List<Custom>> customStore = dsFrameworkUtil.getInstance(customlist);
TransactionExecutor txnl = dsFrameworkUtil.newInMemoryTransactionExecutor(customStore);
final SortedSet<Long> keysWritten = Sets.newTreeSet();
txnl.execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
List<Custom> customList1 = Arrays.asList(new Custom(1, Lists.newArrayList("one", "ONE")), new Custom(2, Lists.newArrayList("two", "TWO")));
Random rand = new Random(100);
long key1 = rand.nextLong();
keysWritten.add(key1);
customStore.write(Bytes.toBytes(key1), customList1);
List<Custom> customList2 = Arrays.asList(new Custom(3, Lists.newArrayList("three", "THREE")), new Custom(4, Lists.newArrayList("four", "FOUR")));
long key2 = rand.nextLong();
keysWritten.add(key2);
customStore.write(Bytes.toBytes(key2), customList2);
}
});
final SortedSet<Long> keysWrittenCopy = ImmutableSortedSet.copyOf(keysWritten);
txnl.execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
// get the splits for the table
List<Split> splits = customStore.getSplits();
for (Split split : splits) {
SplitReader<byte[], List<Custom>> reader = customStore.createSplitReader(split);
reader.initialize(split);
while (reader.nextKeyValue()) {
byte[] key = reader.getCurrentKey();
Assert.assertTrue(keysWritten.remove(Bytes.toLong(key)));
}
}
// verify all keys have been read
if (!keysWritten.isEmpty()) {
System.out.println("Remaining [" + keysWritten.size() + "]: " + keysWritten);
}
Assert.assertTrue(keysWritten.isEmpty());
}
});
deleteAndVerifyInBatch(customStore, txnl, keysWrittenCopy);
dsFrameworkUtil.deleteInstance(customlist);
}
use of io.cdap.cdap.api.data.batch.Split in project cdap by caskdata.
the class ObjectMappedTableDatasetTest method testGetSplits.
@Test
public void testGetSplits() throws Exception {
dsFrameworkUtil.createInstance(ObjectMappedTable.class.getName(), RECORDS_ID, ObjectMappedTableProperties.builder().setType(Record.class).build());
try {
final ObjectMappedTableDataset<Record> records = dsFrameworkUtil.getInstance(RECORDS_ID);
TransactionExecutor txnl = dsFrameworkUtil.newInMemoryTransactionExecutor((TransactionAware) records);
final Record record = new Record(Integer.MAX_VALUE, Long.MAX_VALUE, Float.MAX_VALUE, Double.MAX_VALUE, "foobar", Bytes.toBytes("foobar"), ByteBuffer.wrap(Bytes.toBytes("foobar")), UUID.randomUUID());
final byte[] rowkey = Bytes.toBytes("row1");
txnl.execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
records.write(rowkey, record);
}
});
// should not include the record, since upper bound is not inclusive
txnl.execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
List<Split> splits = records.getSplits(1, null, rowkey);
List<Record> recordsRead = new ArrayList<>();
for (Split split : splits) {
SplitReader<byte[], Record> splitReader = records.createSplitReader(split);
try {
splitReader.initialize(split);
while (splitReader.nextKeyValue()) {
recordsRead.add(splitReader.getCurrentValue());
}
} finally {
splitReader.close();
}
}
Assert.assertEquals(0, recordsRead.size());
}
});
// should include the record, since lower bound is inclusive
txnl.execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
List<Split> splits = records.getSplits(1, rowkey, null);
List<Record> recordsRead = new ArrayList<>();
for (Split split : splits) {
SplitReader<byte[], Record> splitReader = records.createSplitReader(split);
try {
splitReader.initialize(split);
while (splitReader.nextKeyValue()) {
recordsRead.add(splitReader.getCurrentValue());
}
} finally {
splitReader.close();
}
}
Assert.assertEquals(1, recordsRead.size());
Assert.assertEquals(record, recordsRead.get(0));
}
});
} finally {
dsFrameworkUtil.deleteInstance(RECORDS_ID);
}
}
Aggregations