use of org.apache.flink.api.common.typeinfo.TypeInformation in project flink by apache.
the class RowCsvInputFormatTest method testRemovingTrailingCR.
private static void testRemovingTrailingCR(String lineBreakerInFile, String lineBreakerSetup) throws IOException {
String fileContent = FIRST_PART + lineBreakerInFile + SECOND_PART + lineBreakerInFile;
// create input file
File tempFile = File.createTempFile("CsvInputFormatTest", "tmp");
tempFile.deleteOnExit();
tempFile.setWritable(true);
OutputStreamWriter wrt = new OutputStreamWriter(new FileOutputStream(tempFile));
wrt.write(fileContent);
wrt.close();
TypeInformation[] fieldTypes = new TypeInformation[] { BasicTypeInfo.STRING_TYPE_INFO };
RowCsvInputFormat inputFormat = new RowCsvInputFormat(new Path(tempFile.toURI().toString()), fieldTypes);
inputFormat.configure(new Configuration());
inputFormat.setDelimiter(lineBreakerSetup);
FileInputSplit[] splits = inputFormat.createInputSplits(1);
inputFormat.open(splits[0]);
Row result = inputFormat.nextRecord(new Row(1));
assertNotNull("Expecting to not return null", result);
assertEquals(FIRST_PART, result.getField(0));
result = inputFormat.nextRecord(result);
assertNotNull("Expecting to not return null", result);
assertEquals(SECOND_PART, result.getField(0));
}
use of org.apache.flink.api.common.typeinfo.TypeInformation in project flink by apache.
the class JavaApiPostPass method traverse.
protected void traverse(PlanNode node) {
if (!alreadyDone.add(node)) {
// already worked on that one
return;
}
// distinguish the node types
if (node instanceof SinkPlanNode) {
// descend to the input channel
SinkPlanNode sn = (SinkPlanNode) node;
Channel inchannel = sn.getInput();
traverseChannel(inchannel);
} else if (node instanceof SourcePlanNode) {
TypeInformation<?> typeInfo = getTypeInfoFromSource((SourcePlanNode) node);
((SourcePlanNode) node).setSerializer(createSerializer(typeInfo));
} else if (node instanceof BulkIterationPlanNode) {
BulkIterationPlanNode iterationNode = (BulkIterationPlanNode) node;
if (iterationNode.getRootOfStepFunction() instanceof NAryUnionPlanNode) {
throw new CompilerException("Optimizer cannot compile an iteration step function where next partial solution is created by a Union node.");
}
// traverse the termination criterion for the first time. create schema only, no utilities. Needed in case of intermediate termination criterion
if (iterationNode.getRootOfTerminationCriterion() != null) {
SingleInputPlanNode addMapper = (SingleInputPlanNode) iterationNode.getRootOfTerminationCriterion();
traverseChannel(addMapper.getInput());
}
BulkIterationBase<?> operator = (BulkIterationBase<?>) iterationNode.getProgramOperator();
// set the serializer
iterationNode.setSerializerForIterationChannel(createSerializer(operator.getOperatorInfo().getOutputType()));
// done, we can now propagate our info down
traverseChannel(iterationNode.getInput());
traverse(iterationNode.getRootOfStepFunction());
} else if (node instanceof WorksetIterationPlanNode) {
WorksetIterationPlanNode iterationNode = (WorksetIterationPlanNode) node;
if (iterationNode.getNextWorkSetPlanNode() instanceof NAryUnionPlanNode) {
throw new CompilerException("Optimizer cannot compile a workset iteration step function where the next workset is produced by a Union node.");
}
if (iterationNode.getSolutionSetDeltaPlanNode() instanceof NAryUnionPlanNode) {
throw new CompilerException("Optimizer cannot compile a workset iteration step function where the solution set delta is produced by a Union node.");
}
DeltaIterationBase<?, ?> operator = (DeltaIterationBase<?, ?>) iterationNode.getProgramOperator();
// set the serializers and comparators for the workset iteration
iterationNode.setSolutionSetSerializer(createSerializer(operator.getOperatorInfo().getFirstInputType()));
iterationNode.setWorksetSerializer(createSerializer(operator.getOperatorInfo().getSecondInputType()));
iterationNode.setSolutionSetComparator(createComparator(operator.getOperatorInfo().getFirstInputType(), iterationNode.getSolutionSetKeyFields(), getSortOrders(iterationNode.getSolutionSetKeyFields(), null)));
// traverse the inputs
traverseChannel(iterationNode.getInput1());
traverseChannel(iterationNode.getInput2());
// traverse the step function
traverse(iterationNode.getSolutionSetDeltaPlanNode());
traverse(iterationNode.getNextWorkSetPlanNode());
} else if (node instanceof SingleInputPlanNode) {
SingleInputPlanNode sn = (SingleInputPlanNode) node;
if (!(sn.getOptimizerNode().getOperator() instanceof SingleInputOperator)) {
// Special case for delta iterations
if (sn.getOptimizerNode().getOperator() instanceof NoOpUnaryUdfOp) {
traverseChannel(sn.getInput());
return;
} else {
throw new RuntimeException("Wrong operator type found in post pass.");
}
}
SingleInputOperator<?, ?, ?> singleInputOperator = (SingleInputOperator<?, ?, ?>) sn.getOptimizerNode().getOperator();
// parameterize the node's driver strategy
for (int i = 0; i < sn.getDriverStrategy().getNumRequiredComparators(); i++) {
sn.setComparator(createComparator(singleInputOperator.getOperatorInfo().getInputType(), sn.getKeys(i), getSortOrders(sn.getKeys(i), sn.getSortOrders(i))), i);
}
// done, we can now propagate our info down
traverseChannel(sn.getInput());
// don't forget the broadcast inputs
for (Channel c : sn.getBroadcastInputs()) {
traverseChannel(c);
}
} else if (node instanceof DualInputPlanNode) {
DualInputPlanNode dn = (DualInputPlanNode) node;
if (!(dn.getOptimizerNode().getOperator() instanceof DualInputOperator)) {
throw new RuntimeException("Wrong operator type found in post pass.");
}
DualInputOperator<?, ?, ?, ?> dualInputOperator = (DualInputOperator<?, ?, ?, ?>) dn.getOptimizerNode().getOperator();
// parameterize the node's driver strategy
if (dn.getDriverStrategy().getNumRequiredComparators() > 0) {
dn.setComparator1(createComparator(dualInputOperator.getOperatorInfo().getFirstInputType(), dn.getKeysForInput1(), getSortOrders(dn.getKeysForInput1(), dn.getSortOrders())));
dn.setComparator2(createComparator(dualInputOperator.getOperatorInfo().getSecondInputType(), dn.getKeysForInput2(), getSortOrders(dn.getKeysForInput2(), dn.getSortOrders())));
dn.setPairComparator(createPairComparator(dualInputOperator.getOperatorInfo().getFirstInputType(), dualInputOperator.getOperatorInfo().getSecondInputType()));
}
traverseChannel(dn.getInput1());
traverseChannel(dn.getInput2());
// don't forget the broadcast inputs
for (Channel c : dn.getBroadcastInputs()) {
traverseChannel(c);
}
} else // catch the sources of the iterative step functions
if (node instanceof BulkPartialSolutionPlanNode || node instanceof SolutionSetPlanNode || node instanceof WorksetPlanNode) {
// Do nothing :D
} else if (node instanceof NAryUnionPlanNode) {
// Traverse to all child channels
for (Channel channel : node.getInputs()) {
traverseChannel(channel);
}
} else {
throw new CompilerPostPassException("Unknown node type encountered: " + node.getClass().getName());
}
}
use of org.apache.flink.api.common.typeinfo.TypeInformation in project flink by apache.
the class ExternalSortLargeRecordsITCase method testSortWithLongAndShortRecordsMixed.
@Test
public void testSortWithLongAndShortRecordsMixed() {
try {
final int NUM_RECORDS = 1000000;
final int LARGE_REC_INTERVAL = 100000;
final TypeInformation<?>[] types = new TypeInformation<?>[] { BasicTypeInfo.LONG_TYPE_INFO, new ValueTypeInfo<SomeMaybeLongValue>(SomeMaybeLongValue.class) };
final TupleTypeInfo<Tuple2<Long, SomeMaybeLongValue>> typeInfo = new TupleTypeInfo<Tuple2<Long, SomeMaybeLongValue>>(types);
final TypeSerializer<Tuple2<Long, SomeMaybeLongValue>> serializer = typeInfo.createSerializer(new ExecutionConfig());
final TypeComparator<Tuple2<Long, SomeMaybeLongValue>> comparator = typeInfo.createComparator(new int[] { 0 }, new boolean[] { false }, 0, new ExecutionConfig());
MutableObjectIterator<Tuple2<Long, SomeMaybeLongValue>> source = new MutableObjectIterator<Tuple2<Long, SomeMaybeLongValue>>() {
private final Random rnd = new Random(145610843608763871L);
private int num = -1;
@Override
public Tuple2<Long, SomeMaybeLongValue> next(Tuple2<Long, SomeMaybeLongValue> reuse) {
return next();
}
@Override
public Tuple2<Long, SomeMaybeLongValue> next() {
if (++num < NUM_RECORDS) {
long val = rnd.nextLong();
return new Tuple2<Long, SomeMaybeLongValue>(val, new SomeMaybeLongValue((int) val, num % LARGE_REC_INTERVAL == 0));
} else {
return null;
}
}
};
@SuppressWarnings("unchecked") Sorter<Tuple2<Long, SomeMaybeLongValue>> sorter = new UnilateralSortMerger<Tuple2<Long, SomeMaybeLongValue>>(this.memoryManager, this.ioManager, source, this.parentTask, new RuntimeSerializerFactory<Tuple2<Long, SomeMaybeLongValue>>(serializer, (Class<Tuple2<Long, SomeMaybeLongValue>>) (Class<?>) Tuple2.class), comparator, 1.0, 1, 128, 0.7f, true, /*use large record handler*/
true);
// check order
MutableObjectIterator<Tuple2<Long, SomeMaybeLongValue>> iterator = sorter.getIterator();
Tuple2<Long, SomeMaybeLongValue> val = serializer.createInstance();
long prevKey = Long.MAX_VALUE;
for (int i = 0; i < NUM_RECORDS; i++) {
val = iterator.next(val);
assertTrue("Sort order violated", val.f0 <= prevKey);
assertEquals("Serialization of test data type incorrect", val.f0.intValue(), val.f1.val());
}
assertNull(iterator.next(val));
sorter.close();
testSuccess = true;
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
use of org.apache.flink.api.common.typeinfo.TypeInformation in project flink by apache.
the class ExternalSortLargeRecordsITCase method testSortWithShortMediumAndLargeRecords.
@Test
public void testSortWithShortMediumAndLargeRecords() {
try {
final int NUM_RECORDS = 50000;
final int LARGE_REC_INTERVAL = 10000;
final int MEDIUM_REC_INTERVAL = 500;
final TypeInformation<?>[] types = new TypeInformation<?>[] { BasicTypeInfo.LONG_TYPE_INFO, new ValueTypeInfo<SmallOrMediumOrLargeValue>(SmallOrMediumOrLargeValue.class) };
final TupleTypeInfo<Tuple2<Long, SmallOrMediumOrLargeValue>> typeInfo = new TupleTypeInfo<Tuple2<Long, SmallOrMediumOrLargeValue>>(types);
final TypeSerializer<Tuple2<Long, SmallOrMediumOrLargeValue>> serializer = typeInfo.createSerializer(new ExecutionConfig());
final TypeComparator<Tuple2<Long, SmallOrMediumOrLargeValue>> comparator = typeInfo.createComparator(new int[] { 0 }, new boolean[] { false }, 0, new ExecutionConfig());
MutableObjectIterator<Tuple2<Long, SmallOrMediumOrLargeValue>> source = new MutableObjectIterator<Tuple2<Long, SmallOrMediumOrLargeValue>>() {
private final Random rnd = new Random(1456108743687167086L);
private int num = -1;
@Override
public Tuple2<Long, SmallOrMediumOrLargeValue> next(Tuple2<Long, SmallOrMediumOrLargeValue> reuse) {
return next();
}
@Override
public Tuple2<Long, SmallOrMediumOrLargeValue> next() {
if (++num < NUM_RECORDS) {
int size;
if (num % LARGE_REC_INTERVAL == 0) {
size = SmallOrMediumOrLargeValue.LARGE_SIZE;
} else if (num % MEDIUM_REC_INTERVAL == 0) {
size = SmallOrMediumOrLargeValue.MEDIUM_SIZE;
} else {
size = SmallOrMediumOrLargeValue.SMALL_SIZE;
}
long val = rnd.nextLong();
return new Tuple2<Long, SmallOrMediumOrLargeValue>(val, new SmallOrMediumOrLargeValue((int) val, size));
} else {
return null;
}
}
};
@SuppressWarnings("unchecked") Sorter<Tuple2<Long, SmallOrMediumOrLargeValue>> sorter = new UnilateralSortMerger<Tuple2<Long, SmallOrMediumOrLargeValue>>(this.memoryManager, this.ioManager, source, this.parentTask, new RuntimeSerializerFactory<Tuple2<Long, SmallOrMediumOrLargeValue>>(serializer, (Class<Tuple2<Long, SmallOrMediumOrLargeValue>>) (Class<?>) Tuple2.class), comparator, 1.0, 1, 128, 0.7f, true, /*use large record handler*/
false);
// check order
MutableObjectIterator<Tuple2<Long, SmallOrMediumOrLargeValue>> iterator = sorter.getIterator();
Tuple2<Long, SmallOrMediumOrLargeValue> val = serializer.createInstance();
long prevKey = Long.MAX_VALUE;
for (int i = 0; i < NUM_RECORDS; i++) {
val = iterator.next(val);
assertTrue(val.f0 <= prevKey);
assertTrue(val.f0.intValue() == val.f1.val());
}
assertNull(iterator.next(val));
sorter.close();
testSuccess = true;
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
use of org.apache.flink.api.common.typeinfo.TypeInformation in project flink by apache.
the class ExternalSortLargeRecordsITCase method testSortWithMediumRecordsOnly.
@Test
public void testSortWithMediumRecordsOnly() {
try {
final int NUM_RECORDS = 70;
final TypeInformation<?>[] types = new TypeInformation<?>[] { BasicTypeInfo.LONG_TYPE_INFO, new ValueTypeInfo<SmallOrMediumOrLargeValue>(SmallOrMediumOrLargeValue.class) };
final TupleTypeInfo<Tuple2<Long, SmallOrMediumOrLargeValue>> typeInfo = new TupleTypeInfo<Tuple2<Long, SmallOrMediumOrLargeValue>>(types);
final TypeSerializer<Tuple2<Long, SmallOrMediumOrLargeValue>> serializer = typeInfo.createSerializer(new ExecutionConfig());
final TypeComparator<Tuple2<Long, SmallOrMediumOrLargeValue>> comparator = typeInfo.createComparator(new int[] { 0 }, new boolean[] { false }, 0, new ExecutionConfig());
MutableObjectIterator<Tuple2<Long, SmallOrMediumOrLargeValue>> source = new MutableObjectIterator<Tuple2<Long, SmallOrMediumOrLargeValue>>() {
private final Random rnd = new Random(62360187263087678L);
private int num = -1;
@Override
public Tuple2<Long, SmallOrMediumOrLargeValue> next(Tuple2<Long, SmallOrMediumOrLargeValue> reuse) {
return next();
}
@Override
public Tuple2<Long, SmallOrMediumOrLargeValue> next() {
if (++num < NUM_RECORDS) {
long val = rnd.nextLong();
return new Tuple2<Long, SmallOrMediumOrLargeValue>(val, new SmallOrMediumOrLargeValue((int) val, SmallOrMediumOrLargeValue.MEDIUM_SIZE));
} else {
return null;
}
}
};
@SuppressWarnings("unchecked") Sorter<Tuple2<Long, SmallOrMediumOrLargeValue>> sorter = new UnilateralSortMerger<Tuple2<Long, SmallOrMediumOrLargeValue>>(this.memoryManager, this.ioManager, source, this.parentTask, new RuntimeSerializerFactory<Tuple2<Long, SmallOrMediumOrLargeValue>>(serializer, (Class<Tuple2<Long, SmallOrMediumOrLargeValue>>) (Class<?>) Tuple2.class), comparator, 1.0, 1, 128, 0.7f, true, /*use large record handler*/
true);
// check order
MutableObjectIterator<Tuple2<Long, SmallOrMediumOrLargeValue>> iterator = sorter.getIterator();
Tuple2<Long, SmallOrMediumOrLargeValue> val = serializer.createInstance();
long prevKey = Long.MAX_VALUE;
for (int i = 0; i < NUM_RECORDS; i++) {
val = iterator.next(val);
assertTrue(val.f0 <= prevKey);
assertTrue(val.f0.intValue() == val.f1.val());
}
assertNull(iterator.next(val));
sorter.close();
testSuccess = true;
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
Aggregations