use of org.apache.hyracks.api.io.FileSplit in project asterixdb by apache.
the class DatasetUtil method createDatasetJobSpec.
public static JobSpecification createDatasetJobSpec(Dataset dataset, MetadataProvider metadataProvider) throws AlgebricksException {
Index index = IndexUtil.getPrimaryIndex(dataset);
ARecordType itemType = (ARecordType) metadataProvider.findType(dataset);
// get meta item type
ARecordType metaItemType = null;
if (dataset.hasMetaPart()) {
metaItemType = (ARecordType) metadataProvider.findMetaType(dataset);
}
JobSpecification spec = RuntimeUtils.createJobSpecification(metadataProvider.getApplicationContext());
Pair<IFileSplitProvider, AlgebricksPartitionConstraint> splitsAndConstraint = metadataProvider.getSplitProviderAndConstraints(dataset);
FileSplit[] fs = splitsAndConstraint.first.getFileSplits();
StringBuilder sb = new StringBuilder();
for (int i = 0; i < fs.length; i++) {
sb.append(fs[i] + " ");
}
LOGGER.info("CREATING File Splits: " + sb.toString());
Pair<ILSMMergePolicyFactory, Map<String, String>> compactionInfo = DatasetUtil.getMergePolicyFactory(dataset, metadataProvider.getMetadataTxnContext());
//prepare a LocalResourceMetadata which will be stored in NC's local resource repository
IResourceFactory resourceFactory = dataset.getResourceFactory(metadataProvider, index, itemType, metaItemType, compactionInfo.first, compactionInfo.second);
IndexBuilderFactory indexBuilderFactory = new IndexBuilderFactory(metadataProvider.getStorageComponentProvider().getStorageManager(), splitsAndConstraint.first, resourceFactory, !dataset.isTemp());
IndexCreateOperatorDescriptor indexCreateOp = new IndexCreateOperatorDescriptor(spec, indexBuilderFactory);
AlgebricksPartitionConstraintHelper.setPartitionConstraintInJobSpec(spec, indexCreateOp, splitsAndConstraint.second);
spec.addRoot(indexCreateOp);
return spec;
}
use of org.apache.hyracks.api.io.FileSplit in project asterixdb by apache.
the class FileRemoveOperatorDescriptor method createPushRuntime.
@Override
public IOperatorNodePushable createPushRuntime(IHyracksTaskContext ctx, IRecordDescriptorProvider recordDescProvider, int partition, int nPartitions) throws HyracksDataException {
final FileSplit split = fileSplitProvider.getFileSplits()[partition];
final IIOManager ioManager = ctx.getIoManager();
return new AbstractOperatorNodePushable() {
@Override
public void setOutputFrameWriter(int index, IFrameWriter writer, RecordDescriptor recordDesc) {
throw new IllegalStateException();
}
@Override
public void initialize() throws HyracksDataException {
// will only work for files inside the io devices
File f = split.getFile(ioManager);
if (quietly) {
FileUtils.deleteQuietly(f);
} else {
try {
FileUtils.deleteDirectory(f);
} catch (IOException e) {
throw new HyracksDataException(e);
}
}
}
@Override
public IFrameWriter getInputFrameWriter(int index) {
throw new IllegalStateException();
}
@Override
public int getInputArity() {
return 0;
}
@Override
public void deinitialize() throws HyracksDataException {
}
};
}
use of org.apache.hyracks.api.io.FileSplit in project asterixdb by apache.
the class FileScanOperatorDescriptor method createPushRuntime.
@Override
public IOperatorNodePushable createPushRuntime(IHyracksTaskContext ctx, IRecordDescriptorProvider recordDescProvider, int partition, int nPartitions) throws HyracksDataException {
final FileSplit split = fileSplitProvider.getFileSplits()[partition];
final ITupleParser tp = tupleParserFactory.createTupleParser(ctx);
final IIOManager ioManager = ctx.getIoManager();
return new AbstractUnaryOutputSourceOperatorNodePushable() {
@Override
public void initialize() throws HyracksDataException {
File f = split.getFile(ioManager);
try {
writer.open();
InputStream in;
try {
in = new FileInputStream(f);
} catch (FileNotFoundException e) {
writer.fail();
throw new HyracksDataException(e);
}
tp.parse(in, writer);
} catch (Throwable th) {
writer.fail();
throw new HyracksDataException(th);
} finally {
writer.close();
}
}
};
}
Aggregations