use of org.apache.hyracks.dataflow.std.base.AbstractUnaryOutputSourceOperatorNodePushable in project asterixdb by apache.
the class HDFSReadOperatorDescriptor method createPushRuntime.
@Override
public IOperatorNodePushable createPushRuntime(final IHyracksTaskContext ctx, IRecordDescriptorProvider recordDescProvider, final int partition, final int nPartitions) throws HyracksDataException {
final InputSplit[] inputSplits = splitsFactory.getSplits();
return new AbstractUnaryOutputSourceOperatorNodePushable() {
private String nodeName = ctx.getJobletContext().getServiceContext().getNodeId();
@SuppressWarnings("unchecked")
@Override
public void initialize() throws HyracksDataException {
ClassLoader ctxCL = Thread.currentThread().getContextClassLoader();
try {
writer.open();
Thread.currentThread().setContextClassLoader(ctx.getJobletContext().getClassLoader());
JobConf conf = confFactory.getConf();
conf.setClassLoader(ctx.getJobletContext().getClassLoader());
IKeyValueParser parser = tupleParserFactory.createKeyValueParser(ctx);
try {
parser.open(writer);
InputFormat inputFormat = conf.getInputFormat();
for (int i = 0; i < inputSplits.length; i++) {
/**
* read all the partitions scheduled to the current node
*/
if (scheduledLocations[i].equals(nodeName)) {
/**
* pick an unread split to read
* synchronize among simultaneous partitions in the same machine
*/
synchronized (executed) {
if (executed[i] == false) {
executed[i] = true;
} else {
continue;
}
}
/**
* read the split
*/
RecordReader reader = inputFormat.getRecordReader(inputSplits[i], conf, Reporter.NULL);
Object key = reader.createKey();
Object value = reader.createValue();
while (reader.next(key, value) == true) {
parser.parse(key, value, writer, inputSplits[i].toString());
}
}
}
} finally {
parser.close(writer);
}
} catch (Throwable th) {
writer.fail();
throw new HyracksDataException(th);
} finally {
writer.close();
Thread.currentThread().setContextClassLoader(ctxCL);
}
}
};
}
use of org.apache.hyracks.dataflow.std.base.AbstractUnaryOutputSourceOperatorNodePushable in project asterixdb by apache.
the class SortGroupbyTest method initial.
@Override
protected void initial(final IHyracksTaskContext ctx, int tableSize, final int numFrames) throws HyracksDataException {
builder = new ExternalSortGroupByRunGenerator(ctx, keyFields, inRecordDesc, numFrames, keyFields, normalizedKeyComputerFactory, comparatorFactories, partialAggrInState, outputRec, Algorithm.QUICK_SORT);
mergerOperator = new AbstractUnaryOutputSourceOperatorNodePushable() {
@Override
public void initialize() throws HyracksDataException {
List<GeneratedRunFileReader> runs = builder.getRuns();
ISorter sorter = builder.getSorter();
IBinaryComparator[] comparators = new IBinaryComparator[comparatorFactories.length];
for (int i = 0; i < comparatorFactories.length; ++i) {
comparators[i] = comparatorFactories[i].createBinaryComparator();
}
INormalizedKeyComputer nmkComputer = normalizedKeyComputerFactory == null ? null : normalizedKeyComputerFactory.createNormalizedKeyComputer();
AbstractExternalSortRunMerger merger = new ExternalSortGroupByRunMerger(ctx, sorter, runs, keyFields, inRecordDesc, outputRec, outputRec, numFrames, writer, keyFields, nmkComputer, comparators, partialAggrInState, finalAggrInState, true);
merger.process();
}
};
}
use of org.apache.hyracks.dataflow.std.base.AbstractUnaryOutputSourceOperatorNodePushable in project asterixdb by apache.
the class DataGenOperatorDescriptor method createPushRuntime.
@Override
public IOperatorNodePushable createPushRuntime(IHyracksTaskContext ctx, IRecordDescriptorProvider recordDescProvider, int partition, int nPartitions) throws HyracksDataException {
final FrameTupleAppender appender = new FrameTupleAppender(new VSizeFrame(ctx));
final RecordDescriptor recDesc = outRecDescs[0];
final ArrayTupleBuilder tb = new ArrayTupleBuilder(recDesc.getFields().length);
final Random rnd = new Random(randomSeed);
final int maxUniqueAttempts = 20;
return new AbstractUnaryOutputSourceOperatorNodePushable() {
// for quick & dirty exclusion of duplicates
// WARNING: could contain numRecord entries and use a lot of memory
HashSet<String> stringHs = new HashSet<String>();
HashSet<Integer> intHs = new HashSet<Integer>();
@Override
public void initialize() throws HyracksDataException {
try {
writer.open();
for (int i = 0; i < numRecords; i++) {
tb.reset();
for (int j = 0; j < recDesc.getFieldCount(); j++) {
genField(tb, j);
}
if (!appender.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize())) {
appender.write(writer, true);
if (!appender.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize())) {
throw new HyracksDataException("Record size (" + tb.getSize() + ") larger than frame size (" + appender.getBuffer().capacity() + ")");
}
}
}
appender.write(writer, true);
} catch (Throwable th) {
writer.fail();
throw new HyracksDataException(th);
} finally {
writer.close();
}
}
private void genField(ArrayTupleBuilder tb, int fieldIndex) throws HyracksDataException {
DataOutput dos = tb.getDataOutput();
if (recDesc.getFields()[fieldIndex] instanceof IntegerSerializerDeserializer) {
int val = -1;
if (fieldIndex == uniqueField) {
int attempt = 0;
while (attempt < maxUniqueAttempts) {
int tmp = Math.abs(rnd.nextInt()) % (intMaxVal - intMinVal) + intMinVal;
if (intHs.contains(tmp))
attempt++;
else {
val = tmp;
intHs.add(val);
break;
}
}
if (attempt == maxUniqueAttempts)
throw new HyracksDataException("MaxUnique attempts reached in datagen");
} else {
val = Math.abs(rnd.nextInt()) % (intMaxVal - intMinVal) + intMinVal;
}
recDesc.getFields()[fieldIndex].serialize(val, dos);
tb.addFieldEndOffset();
} else if (recDesc.getFields()[fieldIndex] instanceof UTF8StringSerializerDeserializer) {
String val = null;
if (fieldIndex == uniqueField) {
int attempt = 0;
while (attempt < maxUniqueAttempts) {
String tmp = randomString(maxStrLen, rnd);
if (stringHs.contains(tmp))
attempt++;
else {
val = tmp;
stringHs.add(val);
break;
}
}
if (attempt == maxUniqueAttempts)
throw new HyracksDataException("MaxUnique attempts reached in datagen");
} else {
val = randomString(maxStrLen, rnd);
}
recDesc.getFields()[fieldIndex].serialize(val, dos);
tb.addFieldEndOffset();
} else {
throw new HyracksDataException("Type unsupported in data generator. Only integers and strings allowed");
}
}
private String randomString(int length, Random random) {
String s = Long.toHexString(Double.doubleToLongBits(random.nextDouble()));
StringBuilder strBuilder = new StringBuilder();
for (int i = 0; i < s.length() && i < length; i++) {
strBuilder.append(s.charAt(Math.abs(random.nextInt()) % s.length()));
}
return strBuilder.toString();
}
};
}
use of org.apache.hyracks.dataflow.std.base.AbstractUnaryOutputSourceOperatorNodePushable in project asterixdb by apache.
the class AlgebricksMetaOperatorDescriptor method createSourceInputPushRuntime.
private IOperatorNodePushable createSourceInputPushRuntime(final IHyracksTaskContext ctx) {
return new AbstractUnaryOutputSourceOperatorNodePushable() {
@Override
public void initialize() throws HyracksDataException {
IFrameWriter startOfPipeline;
RecordDescriptor pipelineOutputRecordDescriptor = outputArity > 0 ? AlgebricksMetaOperatorDescriptor.this.outRecDescs[0] : null;
PipelineAssembler pa = new PipelineAssembler(pipeline, inputArity, outputArity, null, pipelineOutputRecordDescriptor);
startOfPipeline = pa.assemblePipeline(writer, ctx);
try {
startOfPipeline.open();
} catch (Exception e) {
startOfPipeline.fail();
throw e;
} finally {
startOfPipeline.close();
}
}
};
}
use of org.apache.hyracks.dataflow.std.base.AbstractUnaryOutputSourceOperatorNodePushable in project asterixdb by apache.
the class HDFSReadOperatorDescriptor method createPushRuntime.
@Override
public IOperatorNodePushable createPushRuntime(final IHyracksTaskContext ctx, IRecordDescriptorProvider recordDescProvider, final int partition, final int nPartitions) throws HyracksDataException {
final List<FileSplit> inputSplits = splitsFactory.getSplits();
return new AbstractUnaryOutputSourceOperatorNodePushable() {
private String nodeName = ctx.getJobletContext().getServiceContext().getNodeId();
private ContextFactory ctxFactory = new ContextFactory();
@SuppressWarnings("unchecked")
@Override
public void initialize() throws HyracksDataException {
ClassLoader ctxCL = Thread.currentThread().getContextClassLoader();
try {
writer.open();
Thread.currentThread().setContextClassLoader(ctx.getJobletContext().getClassLoader());
Job job = confFactory.getConf();
job.getConfiguration().setClassLoader(ctx.getJobletContext().getClassLoader());
IKeyValueParser parser = tupleParserFactory.createKeyValueParser(ctx);
InputFormat inputFormat = ReflectionUtils.newInstance(job.getInputFormatClass(), job.getConfiguration());
int size = inputSplits.size();
for (int i = 0; i < size; i++) {
/**
* read all the partitions scheduled to the current node
*/
if (scheduledLocations[i].equals(nodeName)) {
/**
* pick an unread split to read synchronize among
* simultaneous partitions in the same machine
*/
synchronized (executed) {
if (executed[i] == false) {
executed[i] = true;
} else {
continue;
}
}
/**
* read the split
*/
TaskAttemptContext context = ctxFactory.createContext(job.getConfiguration(), i);
context.getConfiguration().setClassLoader(ctx.getJobletContext().getClassLoader());
RecordReader reader = inputFormat.createRecordReader(inputSplits.get(i), context);
reader.initialize(inputSplits.get(i), context);
while (reader.nextKeyValue() == true) {
parser.parse(reader.getCurrentKey(), reader.getCurrentValue(), writer, inputSplits.get(i).toString());
}
}
}
parser.close(writer);
} catch (Throwable th) {
writer.fail();
throw new HyracksDataException(th);
} finally {
writer.close();
Thread.currentThread().setContextClassLoader(ctxCL);
}
}
};
}
Aggregations