use of org.apache.hyracks.api.io.FileSplit in project asterixdb by apache.
the class Common method parseFileSplits.
static FileSplit[] parseFileSplits(String fileSplits) {
String[] splits = fileSplits.split(",");
FileSplit[] fSplits = new FileSplit[splits.length];
for (int i = 0; i < splits.length; ++i) {
String s = splits[i].trim();
int idx = s.indexOf(':');
if (idx < 0) {
throw new IllegalArgumentException("File split " + s + " not well formed");
}
fSplits[i] = new UnmanagedFileSplit(s.substring(0, idx), new File(s.substring(idx + 1)).getAbsolutePath());
}
return fSplits;
}
use of org.apache.hyracks.api.io.FileSplit in project asterixdb by apache.
the class CountOfCountsTest method countOfCountsSingleNC.
@Test
public void countOfCountsSingleNC() throws Exception {
JobSpecification spec = new JobSpecification();
FileSplit[] splits = new FileSplit[] { new ManagedFileSplit(NC2_ID, "data" + File.separator + "words.txt") };
IFileSplitProvider splitProvider = new ConstantFileSplitProvider(splits);
RecordDescriptor desc = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer() });
FileScanOperatorDescriptor csvScanner = new FileScanOperatorDescriptor(spec, splitProvider, new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE }, ','), desc);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, csvScanner, NC2_ID);
InMemorySortOperatorDescriptor sorter = new InMemorySortOperatorDescriptor(spec, new int[] { 0 }, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, desc);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, sorter, NC2_ID);
RecordDescriptor desc2 = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE });
PreclusteredGroupOperatorDescriptor group = new PreclusteredGroupOperatorDescriptor(spec, new int[] { 0 }, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] { new CountFieldAggregatorFactory(true) }), desc2);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, group, NC2_ID);
InMemorySortOperatorDescriptor sorter2 = new InMemorySortOperatorDescriptor(spec, new int[] { 1 }, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY) }, desc2);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, sorter2, NC2_ID);
RecordDescriptor desc3 = new RecordDescriptor(new ISerializerDeserializer[] { IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE });
PreclusteredGroupOperatorDescriptor group2 = new PreclusteredGroupOperatorDescriptor(spec, new int[] { 1 }, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY) }, new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] { new CountFieldAggregatorFactory(true) }), desc3);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, group2, NC2_ID);
ResultSetId rsId = new ResultSetId(1);
IOperatorDescriptor printer = new ResultWriterOperatorDescriptor(spec, rsId, true, false, ResultSerializerFactoryProvider.INSTANCE.getResultSerializerFactoryProvider());
spec.addResultSetId(rsId);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC2_ID);
IConnectorDescriptor conn1 = new MToNPartitioningConnectorDescriptor(spec, new FieldHashPartitionComputerFactory(new int[] { 0 }, new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) }));
spec.connect(conn1, csvScanner, 0, sorter, 0);
IConnectorDescriptor conn2 = new OneToOneConnectorDescriptor(spec);
spec.connect(conn2, sorter, 0, group, 0);
IConnectorDescriptor conn3 = new MToNPartitioningConnectorDescriptor(spec, new FieldHashPartitionComputerFactory(new int[] { 1 }, new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) }));
spec.connect(conn3, group, 0, sorter2, 0);
IConnectorDescriptor conn4 = new OneToOneConnectorDescriptor(spec);
spec.connect(conn4, sorter2, 0, group2, 0);
IConnectorDescriptor conn5 = new MToNBroadcastConnectorDescriptor(spec);
spec.connect(conn5, group2, 0, printer, 0);
spec.addRoot(printer);
runTest(spec);
}
use of org.apache.hyracks.api.io.FileSplit in project asterixdb by apache.
the class AbstractRTreeOperatorTest method loadPrimaryIndex.
protected void loadPrimaryIndex() throws Exception {
JobSpecification spec = new JobSpecification();
FileSplit[] ordersSplits = new FileSplit[] { new ManagedFileSplit(NC1_ID, "data" + File.separator + "orders-with-locations-part1.txt") };
IFileSplitProvider ordersSplitProvider = new ConstantFileSplitProvider(ordersSplits);
RecordDescriptor ordersDesc = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), DoubleSerializerDeserializer.INSTANCE, DoubleSerializerDeserializer.INSTANCE, DoubleSerializerDeserializer.INSTANCE, DoubleSerializerDeserializer.INSTANCE });
FileScanOperatorDescriptor ordScanner = new FileScanOperatorDescriptor(spec, ordersSplitProvider, new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, DoubleParserFactory.INSTANCE, DoubleParserFactory.INSTANCE, DoubleParserFactory.INSTANCE, DoubleParserFactory.INSTANCE }, '|'), ordersDesc);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, ordScanner, NC1_ID);
ExternalSortOperatorDescriptor sorter = new ExternalSortOperatorDescriptor(spec, 1000, new int[] { 0 }, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, ordersDesc);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, sorter, NC1_ID);
int[] fieldPermutation = { 0, 1, 2, 4, 5, 7, 9, 10, 11, 12 };
TreeIndexBulkLoadOperatorDescriptor primaryBulkLoad = new TreeIndexBulkLoadOperatorDescriptor(spec, primaryRecDesc, fieldPermutation, 0.7f, false, 1000L, true, primaryHelperFactory);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, primaryBulkLoad, NC1_ID);
NullSinkOperatorDescriptor nsOpDesc = new NullSinkOperatorDescriptor(spec);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, nsOpDesc, NC1_ID);
spec.connect(new OneToOneConnectorDescriptor(spec), ordScanner, 0, sorter, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), sorter, 0, primaryBulkLoad, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), primaryBulkLoad, 0, nsOpDesc, 0);
spec.addRoot(nsOpDesc);
runTest(spec);
}
use of org.apache.hyracks.api.io.FileSplit in project asterixdb by apache.
the class DatasetUtil method createDatasetJobSpec.
public static JobSpecification createDatasetJobSpec(Dataset dataset, MetadataProvider metadataProvider) throws AlgebricksException {
Index index = IndexUtil.getPrimaryIndex(dataset);
ARecordType itemType = (ARecordType) metadataProvider.findType(dataset);
// get meta item type
ARecordType metaItemType = null;
if (dataset.hasMetaPart()) {
metaItemType = (ARecordType) metadataProvider.findMetaType(dataset);
}
JobSpecification spec = RuntimeUtils.createJobSpecification(metadataProvider.getApplicationContext());
Pair<IFileSplitProvider, AlgebricksPartitionConstraint> splitsAndConstraint = metadataProvider.getSplitProviderAndConstraints(dataset);
FileSplit[] fs = splitsAndConstraint.first.getFileSplits();
StringBuilder sb = new StringBuilder();
for (int i = 0; i < fs.length; i++) {
sb.append(fs[i] + " ");
}
LOGGER.info("CREATING File Splits: " + sb.toString());
Pair<ILSMMergePolicyFactory, Map<String, String>> compactionInfo = DatasetUtil.getMergePolicyFactory(dataset, metadataProvider.getMetadataTxnContext());
//prepare a LocalResourceMetadata which will be stored in NC's local resource repository
IResourceFactory resourceFactory = dataset.getResourceFactory(metadataProvider, index, itemType, metaItemType, compactionInfo.first, compactionInfo.second);
IndexBuilderFactory indexBuilderFactory = new IndexBuilderFactory(metadataProvider.getStorageComponentProvider().getStorageManager(), splitsAndConstraint.first, resourceFactory, !dataset.isTemp());
IndexCreateOperatorDescriptor indexCreateOp = new IndexCreateOperatorDescriptor(spec, indexBuilderFactory);
AlgebricksPartitionConstraintHelper.setPartitionConstraintInJobSpec(spec, indexCreateOp, splitsAndConstraint.second);
spec.addRoot(indexCreateOp);
return spec;
}
use of org.apache.hyracks.api.io.FileSplit in project asterixdb by apache.
the class PlainFileWriterOperatorDescriptor method createPushRuntime.
/*
* (non-Javadoc)
*
* @see
* org.apache.hyracks.api.dataflow.IActivityNode#createPushRuntime(edu.
* uci.ics.hyracks.api.context.IHyracksContext,
* org.apache.hyracks.api.job.IOperatorEnvironment,
* org.apache.hyracks.api.dataflow.value.IRecordDescriptorProvider, int,
* int)
*/
@Override
public IOperatorNodePushable createPushRuntime(IHyracksTaskContext ctx, IRecordDescriptorProvider recordDescProvider, final int partition, int nPartitions) throws HyracksDataException {
// Output files
final FileSplit[] splits = fileSplitProvider.getFileSplits();
IIOManager ioManager = ctx.getIoManager();
// Frame accessor
final FrameTupleAccessor frameTupleAccessor = new FrameTupleAccessor(recordDescProvider.getInputRecordDescriptor(getActivityId(), 0));
// Record descriptor
final RecordDescriptor recordDescriptor = recordDescProvider.getInputRecordDescriptor(getActivityId(), 0);
return new AbstractUnaryInputSinkOperatorNodePushable() {
private BufferedWriter out;
private ByteBufferInputStream bbis;
private DataInputStream di;
@Override
public void open() throws HyracksDataException {
try {
out = new BufferedWriter(new FileWriter(splits[partition].getFile(ioManager)));
bbis = new ByteBufferInputStream();
di = new DataInputStream(bbis);
} catch (Exception e) {
throw new HyracksDataException(e);
}
}
@Override
public void nextFrame(ByteBuffer buffer) throws HyracksDataException {
try {
frameTupleAccessor.reset(buffer);
for (int tIndex = 0; tIndex < frameTupleAccessor.getTupleCount(); tIndex++) {
int start = frameTupleAccessor.getTupleStartOffset(tIndex) + frameTupleAccessor.getFieldSlotsLength();
bbis.setByteBuffer(buffer, start);
Object[] record = new Object[recordDescriptor.getFieldCount()];
for (int i = 0; i < record.length; ++i) {
Object instance = recordDescriptor.getFields()[i].deserialize(di);
if (i == 0) {
out.write(String.valueOf(instance));
} else {
out.write(delim + String.valueOf(instance));
}
}
out.write("\n");
}
} catch (IOException ex) {
throw new HyracksDataException(ex);
}
}
@Override
public void fail() throws HyracksDataException {
}
@Override
public void close() throws HyracksDataException {
try {
out.close();
} catch (IOException e) {
throw new HyracksDataException(e);
}
}
};
}
Aggregations