use of org.apache.flink.types.StringValue in project flink by apache.
the class ReusingKeyGroupedIteratorTest method setup.
@Before
public void setup() {
final ArrayList<IntStringPair> source = new ArrayList<IntStringPair>();
// add elements to the source
source.add(new IntStringPair(new IntValue(1), new StringValue("A")));
source.add(new IntStringPair(new IntValue(2), new StringValue("B")));
source.add(new IntStringPair(new IntValue(3), new StringValue("C")));
source.add(new IntStringPair(new IntValue(3), new StringValue("D")));
source.add(new IntStringPair(new IntValue(4), new StringValue("E")));
source.add(new IntStringPair(new IntValue(4), new StringValue("F")));
source.add(new IntStringPair(new IntValue(4), new StringValue("G")));
source.add(new IntStringPair(new IntValue(5), new StringValue("H")));
source.add(new IntStringPair(new IntValue(5), new StringValue("I")));
source.add(new IntStringPair(new IntValue(5), new StringValue("J")));
source.add(new IntStringPair(new IntValue(5), new StringValue("K")));
source.add(new IntStringPair(new IntValue(5), new StringValue("L")));
this.sourceIter = new MutableObjectIterator<Record>() {
final Iterator<IntStringPair> it = source.iterator();
@Override
public Record next(Record reuse) throws IOException {
if (it.hasNext()) {
IntStringPair pair = it.next();
reuse.setField(0, pair.getInteger());
reuse.setField(1, pair.getString());
return reuse;
} else {
return null;
}
}
@Override
public Record next() throws IOException {
if (it.hasNext()) {
IntStringPair pair = it.next();
Record result = new Record(2);
result.setField(0, pair.getInteger());
result.setField(1, pair.getString());
return result;
} else {
return null;
}
}
};
final RecordSerializer serializer = RecordSerializer.get();
@SuppressWarnings("unchecked") final RecordComparator comparator = new RecordComparator(new int[] { 0 }, new Class[] { IntValue.class });
this.psi = new ReusingKeyGroupedIterator<Record>(this.sourceIter, serializer, comparator);
}
use of org.apache.flink.types.StringValue in project flink by apache.
the class ReusingKeyGroupedIteratorTest method testMixedProgress.
@Test
public void testMixedProgress() throws Exception {
try {
// Progression only via nextKey() and hasNext() - Key 1, Value A
Assert.assertTrue("KeyGroupedIterator must have another key.", this.psi.nextKey());
Assert.assertTrue("KeyGroupedIterator must have another value.", this.psi.getValues().hasNext());
Assert.assertTrue(hasIterator(this.psi.getValues()));
Assert.assertFalse(hasIterator(this.psi.getValues()));
// Progression only through nextKey() - Key 2, Value B
Assert.assertTrue("KeyGroupedIterator must have another key.", this.psi.nextKey());
Assert.assertTrue(hasIterator(this.psi.getValues()));
Assert.assertFalse(hasIterator(this.psi.getValues()));
// Progression first though haNext() and next(), then through hasNext() - Key 3, Values
// C, D
Assert.assertTrue("KeyGroupedIterator must have another key.", this.psi.nextKey());
Assert.assertTrue(hasIterator(this.psi.getValues()));
Assert.assertFalse(hasIterator(this.psi.getValues()));
Assert.assertTrue("KeyGroupedIterator must have another value.", this.psi.getValues().hasNext());
Assert.assertTrue("KeyGroupedIterator returned a wrong key.", this.psi.getComparatorWithCurrentReference().equalToReference(new Record(new IntValue(3))));
Assert.assertEquals("KeyGroupedIterator returned a wrong key.", 3, this.psi.getCurrent().getField(0, IntValue.class).getValue());
Assert.assertEquals("KeyGroupedIterator returned a wrong value.", new StringValue("C"), this.psi.getValues().next().getField(1, StringValue.class));
Assert.assertTrue("KeyGroupedIterator must have another value.", this.psi.getValues().hasNext());
Assert.assertTrue("KeyGroupedIterator returned a wrong key.", this.psi.getComparatorWithCurrentReference().equalToReference(new Record(new IntValue(3))));
Assert.assertEquals("KeyGroupedIterator returned a wrong key.", 3, this.psi.getCurrent().getField(0, IntValue.class).getValue());
// Progression first via next() only, then hasNext() only Key 4, Values E, F, G
Assert.assertTrue("KeyGroupedIterator must have another key.", this.psi.nextKey());
Assert.assertTrue(hasIterator(this.psi.getValues()));
Assert.assertFalse(hasIterator(this.psi.getValues()));
Assert.assertEquals("KeyGroupedIterator returned a wrong value.", new StringValue("E"), this.psi.getValues().next().getField(1, StringValue.class));
Assert.assertTrue("KeyGroupedIterator must have another value.", this.psi.getValues().hasNext());
// Key 5, Values H, I, J, K, L
Assert.assertTrue("KeyGroupedIterator must have another key.", this.psi.nextKey());
Assert.assertEquals("KeyGroupedIterator returned a wrong value.", new StringValue("H"), this.psi.getValues().next().getField(1, StringValue.class));
Assert.assertTrue("KeyGroupedIterator must have another value.", this.psi.getValues().hasNext());
Assert.assertTrue("KeyGroupedIterator returned a wrong key.", this.psi.getComparatorWithCurrentReference().equalToReference(new Record(new IntValue(5))));
Assert.assertEquals("KeyGroupedIterator returned a wrong key.", 5, this.psi.getCurrent().getField(0, IntValue.class).getValue());
Assert.assertEquals("KeyGroupedIterator returned a wrong value.", new StringValue("I"), this.psi.getValues().next().getField(1, StringValue.class));
Assert.assertTrue(hasIterator(this.psi.getValues()));
Assert.assertFalse(hasIterator(this.psi.getValues()));
Assert.assertTrue("KeyGroupedIterator must have another value.", this.psi.getValues().hasNext());
// end
Assert.assertFalse("KeyGroupedIterator must not have another key.", this.psi.nextKey());
Assert.assertFalse("KeyGroupedIterator must not have another key.", this.psi.nextKey());
} catch (Exception e) {
e.printStackTrace();
Assert.fail("The test encountered an unexpected exception.");
}
}
use of org.apache.flink.types.StringValue in project flink by apache.
the class GroupReduceDriverTest method testAllReduceDriverMutable.
@Test
public void testAllReduceDriverMutable() {
try {
TestTaskContext<GroupReduceFunction<Tuple2<StringValue, IntValue>, Tuple2<StringValue, IntValue>>, Tuple2<StringValue, IntValue>> context = new TestTaskContext<GroupReduceFunction<Tuple2<StringValue, IntValue>, Tuple2<StringValue, IntValue>>, Tuple2<StringValue, IntValue>>();
List<Tuple2<StringValue, IntValue>> data = DriverTestData.createReduceMutableData();
TupleTypeInfo<Tuple2<StringValue, IntValue>> typeInfo = (TupleTypeInfo<Tuple2<StringValue, IntValue>>) TypeExtractor.getForObject(data.get(0));
MutableObjectIterator<Tuple2<StringValue, IntValue>> input = new RegularToMutableObjectIterator<Tuple2<StringValue, IntValue>>(data.iterator(), typeInfo.createSerializer(new ExecutionConfig()));
TypeComparator<Tuple2<StringValue, IntValue>> comparator = typeInfo.createComparator(new int[] { 0 }, new boolean[] { true }, 0, new ExecutionConfig());
GatheringCollector<Tuple2<StringValue, IntValue>> result = new GatheringCollector<Tuple2<StringValue, IntValue>>(typeInfo.createSerializer(new ExecutionConfig()));
context.setDriverStrategy(DriverStrategy.SORTED_GROUP_REDUCE);
context.setInput1(input, typeInfo.createSerializer(new ExecutionConfig()));
context.setComparator1(comparator);
context.setCollector(result);
context.setUdf(new ConcatSumMutableReducer());
GroupReduceDriver<Tuple2<StringValue, IntValue>, Tuple2<StringValue, IntValue>> driver = new GroupReduceDriver<Tuple2<StringValue, IntValue>, Tuple2<StringValue, IntValue>>();
driver.setup(context);
driver.prepare();
driver.run();
Object[] res = result.getList().toArray();
Object[] expected = DriverTestData.createReduceMutableDataGroupedResult().toArray();
DriverTestData.compareTupleArrays(expected, res);
} catch (Exception e) {
System.err.println(e.getMessage());
e.printStackTrace();
Assert.fail(e.getMessage());
}
}
use of org.apache.flink.types.StringValue in project flink by apache.
the class AccumulatorITCase method postSubmit.
@Override
protected void postSubmit() throws Exception {
compareResultsByLinesInMemory(EXPECTED, resultPath);
// Test accumulator results
System.out.println("Accumulator results:");
JobExecutionResult res = this.result;
System.out.println(AccumulatorHelper.getResultsFormatted(res.getAllAccumulatorResults()));
Assert.assertEquals(Integer.valueOf(3), res.getAccumulatorResult("num-lines"));
Assert.assertEquals(Integer.valueOf(3), res.getIntCounterResult("num-lines"));
Assert.assertEquals(Double.valueOf(getParallelism()), res.getAccumulatorResult("open-close-counter"));
// Test histogram (words per line distribution)
Map<Integer, Integer> dist = new HashMap<>();
dist.put(1, 1);
dist.put(2, 1);
dist.put(3, 1);
Assert.assertEquals(dist, res.getAccumulatorResult("words-per-line"));
// Test distinct words (custom accumulator)
Set<StringValue> distinctWords = new HashSet<>();
distinctWords.add(new StringValue("one"));
distinctWords.add(new StringValue("two"));
distinctWords.add(new StringValue("three"));
Assert.assertEquals(distinctWords, res.getAccumulatorResult("distinct-words"));
}
use of org.apache.flink.types.StringValue in project flink by apache.
the class MassiveStringValueSorting method testStringValueSorting.
public void testStringValueSorting() {
File input = null;
File sorted = null;
try {
// the source file
input = generateFileWithStrings(300000, "http://some-uri.com/that/is/a/common/prefix/to/all");
// the sorted file
sorted = File.createTempFile("sorted_strings", "txt");
String[] command = { "/bin/bash", "-c", "export LC_ALL=\"C\" && cat \"" + input.getAbsolutePath() + "\" | sort > \"" + sorted.getAbsolutePath() + "\"" };
Process p = null;
try {
p = Runtime.getRuntime().exec(command);
int retCode = p.waitFor();
if (retCode != 0) {
throw new Exception("Command failed with return code " + retCode);
}
p = null;
} finally {
if (p != null) {
p.destroy();
}
}
// sort the data
Sorter<StringValue> sorter = null;
BufferedReader reader = null;
BufferedReader verifyReader = null;
MemoryManager mm = null;
try (IOManager ioMan = new IOManagerAsync()) {
mm = MemoryManagerBuilder.newBuilder().setMemorySize(1024 * 1024).build();
TypeSerializer<StringValue> serializer = new CopyableValueSerializer<StringValue>(StringValue.class);
TypeComparator<StringValue> comparator = new CopyableValueComparator<StringValue>(true, StringValue.class);
reader = new BufferedReader(new FileReader(input));
MutableObjectIterator<StringValue> inputIterator = new StringValueReaderMutableObjectIterator(reader);
sorter = ExternalSorter.newBuilder(mm, new DummyInvokable(), serializer, comparator).maxNumFileHandles(128).enableSpilling(ioMan, 0.8f).memoryFraction(1.0).objectReuse(true).largeRecords(true).build(inputIterator);
MutableObjectIterator<StringValue> sortedData = sorter.getIterator();
reader.close();
// verify
verifyReader = new BufferedReader(new FileReader(sorted));
String nextVerify;
StringValue nextFromFlinkSort = new StringValue();
while ((nextVerify = verifyReader.readLine()) != null) {
nextFromFlinkSort = sortedData.next(nextFromFlinkSort);
Assert.assertNotNull(nextFromFlinkSort);
Assert.assertEquals(nextVerify, nextFromFlinkSort.getValue());
}
} finally {
if (reader != null) {
reader.close();
}
if (verifyReader != null) {
verifyReader.close();
}
if (sorter != null) {
sorter.close();
}
if (mm != null) {
mm.shutdown();
}
}
} catch (Exception e) {
System.err.println(e.getMessage());
e.printStackTrace();
Assert.fail(e.getMessage());
} finally {
if (input != null) {
// noinspection ResultOfMethodCallIgnored
input.delete();
}
if (sorted != null) {
// noinspection ResultOfMethodCallIgnored
sorted.delete();
}
}
}
Aggregations