use of org.apache.hyracks.api.dataflow.value.RecordDescriptor in project asterixdb by apache.
the class UnionTest method createUnionJobSpec.
public static JobSpecification createUnionJobSpec() throws Exception {
JobSpecification spec = new JobSpecification();
IFileSplitProvider splitProvider = new ConstantFileSplitProvider(new FileSplit[] { new ManagedFileSplit(NC2_ID, "data" + File.separator + "words.txt"), new ManagedFileSplit(NC1_ID, "data" + File.separator + "nc1" + File.separator + "words.txt") });
RecordDescriptor desc = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer() });
FileScanOperatorDescriptor csvScanner01 = new FileScanOperatorDescriptor(spec, splitProvider, new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE }, ','), desc);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, csvScanner01, NC2_ID, NC1_ID);
FileScanOperatorDescriptor csvScanner02 = new FileScanOperatorDescriptor(spec, splitProvider, new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE }, ','), desc);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, csvScanner02, NC2_ID, NC1_ID);
UnionAllOperatorDescriptor unionAll = new UnionAllOperatorDescriptor(spec, 2, desc);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, unionAll, NC2_ID, NC1_ID);
ResultSetId rsId = new ResultSetId(1);
spec.addResultSetId(rsId);
IOperatorDescriptor printer = new ResultWriterOperatorDescriptor(spec, rsId, false, false, ResultSerializerFactoryProvider.INSTANCE.getResultSerializerFactoryProvider());
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC2_ID, NC1_ID);
spec.connect(new OneToOneConnectorDescriptor(spec), csvScanner01, 0, unionAll, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), csvScanner02, 0, unionAll, 1);
spec.connect(new OneToOneConnectorDescriptor(spec), unionAll, 0, printer, 0);
spec.addRoot(printer);
return spec;
}
use of org.apache.hyracks.api.dataflow.value.RecordDescriptor in project asterixdb by apache.
the class NestedPlansRunningAggregatorFactory method assemblePipeline.
private IFrameWriter assemblePipeline(AlgebricksPipeline subplan, IFrameWriter writer, IHyracksTaskContext ctx) throws HyracksDataException {
// plug the operators
IFrameWriter start = writer;
IPushRuntimeFactory[] runtimeFactories = subplan.getRuntimeFactories();
RecordDescriptor[] recordDescriptors = subplan.getRecordDescriptors();
for (int i = runtimeFactories.length - 1; i >= 0; i--) {
IPushRuntime newRuntime = runtimeFactories[i].createPushRuntime(ctx);
newRuntime.setFrameWriter(0, start, recordDescriptors[i]);
if (i > 0) {
newRuntime.setInputRecordDescriptor(0, recordDescriptors[i - 1]);
} else {
// the nts has the same input and output rec. desc.
newRuntime.setInputRecordDescriptor(0, recordDescriptors[0]);
}
start = newRuntime;
}
return start;
}
use of org.apache.hyracks.api.dataflow.value.RecordDescriptor in project asterixdb by apache.
the class ResultWriterOperatorDescriptor method createPushRuntime.
@Override
public IOperatorNodePushable createPushRuntime(final IHyracksTaskContext ctx, IRecordDescriptorProvider recordDescProvider, final int partition, final int nPartitions) throws HyracksDataException {
final IDatasetPartitionManager dpm = ctx.getDatasetPartitionManager();
final IFrame frame = new VSizeFrame(ctx);
final FrameOutputStream frameOutputStream = new FrameOutputStream(ctx.getInitialFrameSize());
frameOutputStream.reset(frame, true);
PrintStream printStream = new PrintStream(frameOutputStream);
final RecordDescriptor outRecordDesc = recordDescProvider.getInputRecordDescriptor(getActivityId(), 0);
final IResultSerializer resultSerializer = resultSerializerFactory.createResultSerializer(outRecordDesc, printStream);
final FrameTupleAccessor frameTupleAccessor = new FrameTupleAccessor(outRecordDesc);
return new AbstractUnaryInputSinkOperatorNodePushable() {
private IFrameWriter datasetPartitionWriter;
private boolean failed = false;
@Override
public void open() throws HyracksDataException {
try {
datasetPartitionWriter = dpm.createDatasetPartitionWriter(ctx, rsId, ordered, asyncMode, partition, nPartitions);
datasetPartitionWriter.open();
resultSerializer.init();
} catch (HyracksException e) {
throw HyracksDataException.create(e);
}
}
@Override
public void nextFrame(ByteBuffer buffer) throws HyracksDataException {
frameTupleAccessor.reset(buffer);
for (int tIndex = 0; tIndex < frameTupleAccessor.getTupleCount(); tIndex++) {
resultSerializer.appendTuple(frameTupleAccessor, tIndex);
if (!frameOutputStream.appendTuple()) {
frameOutputStream.flush(datasetPartitionWriter);
resultSerializer.appendTuple(frameTupleAccessor, tIndex);
frameOutputStream.appendTuple();
}
}
}
@Override
public void fail() throws HyracksDataException {
failed = true;
datasetPartitionWriter.fail();
}
@Override
public void close() throws HyracksDataException {
try {
if (!failed && frameOutputStream.getTupleCount() > 0) {
frameOutputStream.flush(datasetPartitionWriter);
}
} catch (Exception e) {
datasetPartitionWriter.fail();
throw e;
} finally {
datasetPartitionWriter.close();
}
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append("{ ");
sb.append("\"rsId\": \"").append(rsId).append("\", ");
sb.append("\"ordered\": ").append(ordered).append(", ");
sb.append("\"asyncMode\": ").append(asyncMode).append(" }");
return sb.toString();
}
};
}
use of org.apache.hyracks.api.dataflow.value.RecordDescriptor in project asterixdb by apache.
the class FileRemoveOperatorDescriptor method createPushRuntime.
@Override
public IOperatorNodePushable createPushRuntime(IHyracksTaskContext ctx, IRecordDescriptorProvider recordDescProvider, int partition, int nPartitions) throws HyracksDataException {
final FileSplit split = fileSplitProvider.getFileSplits()[partition];
final IIOManager ioManager = ctx.getIoManager();
return new AbstractOperatorNodePushable() {
@Override
public void setOutputFrameWriter(int index, IFrameWriter writer, RecordDescriptor recordDesc) {
throw new IllegalStateException();
}
@Override
public void initialize() throws HyracksDataException {
// will only work for files inside the io devices
File f = split.getFile(ioManager);
if (quietly) {
FileUtils.deleteQuietly(f);
} else {
try {
FileUtils.deleteDirectory(f);
} catch (IOException e) {
throw new HyracksDataException(e);
}
}
}
@Override
public IFrameWriter getInputFrameWriter(int index) {
throw new IllegalStateException();
}
@Override
public int getInputArity() {
return 0;
}
@Override
public void deinitialize() throws HyracksDataException {
}
};
}
use of org.apache.hyracks.api.dataflow.value.RecordDescriptor in project asterixdb by apache.
the class FlushDatasetUtil method flushDataset.
public static void flushDataset(IHyracksClientConnection hcc, MetadataProvider metadataProvider, String dataverseName, String datasetName, String indexName) throws Exception {
CompilerProperties compilerProperties = metadataProvider.getApplicationContext().getCompilerProperties();
int frameSize = compilerProperties.getFrameSize();
JobSpecification spec = new JobSpecification(frameSize);
RecordDescriptor[] rDescs = new RecordDescriptor[] { new RecordDescriptor(new ISerializerDeserializer[] {}) };
AlgebricksMetaOperatorDescriptor emptySource = new AlgebricksMetaOperatorDescriptor(spec, 0, 1, new IPushRuntimeFactory[] { new EmptyTupleSourceRuntimeFactory() }, rDescs);
org.apache.asterix.common.transactions.JobId jobId = JobIdFactory.generateJobId();
Dataset dataset = metadataProvider.findDataset(dataverseName, datasetName);
FlushDatasetOperatorDescriptor flushOperator = new FlushDatasetOperatorDescriptor(spec, jobId, dataset.getDatasetId());
spec.connect(new OneToOneConnectorDescriptor(spec), emptySource, 0, flushOperator, 0);
Pair<IFileSplitProvider, AlgebricksPartitionConstraint> primarySplitsAndConstraint = metadataProvider.getSplitProviderAndConstraints(dataset, indexName);
AlgebricksPartitionConstraint primaryPartitionConstraint = primarySplitsAndConstraint.second;
AlgebricksPartitionConstraintHelper.setPartitionConstraintInJobSpec(spec, emptySource, primaryPartitionConstraint);
JobEventListenerFactory jobEventListenerFactory = new JobEventListenerFactory(jobId, true);
spec.setJobletEventListenerFactory(jobEventListenerFactory);
JobUtils.runJob(hcc, spec, true);
}
Aggregations