use of org.apache.flink.api.java.typeutils.runtime.RuntimeSerializerFactory in project flink by apache.
the class LargeRecordHandler method addRecord.
// --------------------------------------------------------------------------------------------
@SuppressWarnings("unchecked")
public long addRecord(T record) throws IOException {
if (recordsOutFile == null) {
if (closed) {
throw new IllegalStateException("The large record handler has been closed.");
}
if (recordsReader != null) {
throw new IllegalStateException("The handler has already switched to sorting.");
}
LOG.debug("Initializing the large record spilling...");
// initialize the utilities
{
final TypeComparator<?>[] keyComps = comparator.getFlatComparators();
numKeyFields = keyComps.length;
Object[] keyHolder = new Object[numKeyFields];
comparator.extractKeys(record, keyHolder, 0);
TypeSerializer<?>[] keySers = new TypeSerializer<?>[numKeyFields];
TypeSerializer<?>[] tupleSers = new TypeSerializer<?>[numKeyFields + 1];
int[] keyPos = new int[numKeyFields];
for (int i = 0; i < numKeyFields; i++) {
keyPos[i] = i;
keySers[i] = createSerializer(keyHolder[i], i);
tupleSers[i] = keySers[i];
}
// add the long serializer for the offset
tupleSers[numKeyFields] = LongSerializer.INSTANCE;
keySerializer = new TupleSerializer<Tuple>((Class<Tuple>) Tuple.getTupleClass(numKeyFields + 1), tupleSers);
keyComparator = new TupleComparator<Tuple>(keyPos, keyComps, keySers);
keySerializerFactory = new RuntimeSerializerFactory<Tuple>(keySerializer, keySerializer.getTupleClass());
keyTuple = keySerializer.createInstance();
}
// initialize the spilling
final int totalNumSegments = memory.size();
final int segmentsForKeys = (totalNumSegments >= 2 * MAX_SEGMENTS_FOR_KEY_SPILLING) ? MAX_SEGMENTS_FOR_KEY_SPILLING : Math.max(MIN_SEGMENTS_FOR_KEY_SPILLING, totalNumSegments - MAX_SEGMENTS_FOR_KEY_SPILLING);
List<MemorySegment> recordsMemory = new ArrayList<MemorySegment>();
List<MemorySegment> keysMemory = new ArrayList<MemorySegment>();
for (int i = 0; i < segmentsForKeys; i++) {
keysMemory.add(memory.get(i));
}
for (int i = segmentsForKeys; i < totalNumSegments; i++) {
recordsMemory.add(memory.get(i));
}
recordsChannel = ioManager.createChannel();
keysChannel = ioManager.createChannel();
recordsOutFile = new FileChannelOutputView(ioManager.createBlockChannelWriter(recordsChannel), memManager, recordsMemory, memManager.getPageSize());
keysOutFile = new FileChannelOutputView(ioManager.createBlockChannelWriter(keysChannel), memManager, keysMemory, memManager.getPageSize());
}
final long offset = recordsOutFile.getWriteOffset();
if (offset < 0) {
throw new RuntimeException("wrong offset");
}
Object[] keyHolder = new Object[numKeyFields];
comparator.extractKeys(record, keyHolder, 0);
for (int i = 0; i < numKeyFields; i++) {
keyTuple.setField(keyHolder[i], i);
}
keyTuple.setField(offset, numKeyFields);
keySerializer.serialize(keyTuple, keysOutFile);
serializer.serialize(record, recordsOutFile);
recordCounter++;
return offset;
}
use of org.apache.flink.api.java.typeutils.runtime.RuntimeSerializerFactory in project flink by apache.
the class MassiveStringSorting method testStringSorting.
public void testStringSorting() {
File input = null;
File sorted = null;
try {
// the source file
input = generateFileWithStrings(300000, "http://some-uri.com/that/is/a/common/prefix/to/all");
// the sorted file
sorted = File.createTempFile("sorted_strings", "txt");
String[] command = { "/bin/bash", "-c", "export LC_ALL=\"C\" && cat \"" + input.getAbsolutePath() + "\" | sort > \"" + sorted.getAbsolutePath() + "\"" };
Process p = null;
try {
p = Runtime.getRuntime().exec(command);
int retCode = p.waitFor();
if (retCode != 0) {
throw new Exception("Command failed with return code " + retCode);
}
p = null;
} finally {
if (p != null) {
p.destroy();
}
}
// sort the data
UnilateralSortMerger<String> sorter = null;
BufferedReader reader = null;
BufferedReader verifyReader = null;
try {
MemoryManager mm = new MemoryManager(1024 * 1024, 1);
IOManager ioMan = new IOManagerAsync();
TypeSerializer<String> serializer = StringSerializer.INSTANCE;
TypeComparator<String> comparator = new StringComparator(true);
reader = new BufferedReader(new FileReader(input));
MutableObjectIterator<String> inputIterator = new StringReaderMutableObjectIterator(reader);
sorter = new UnilateralSortMerger<String>(mm, ioMan, inputIterator, new DummyInvokable(), new RuntimeSerializerFactory<String>(serializer, String.class), comparator, 1.0, 4, 0.8f, true, /* use large record handler */
false);
MutableObjectIterator<String> sortedData = sorter.getIterator();
reader.close();
// verify
verifyReader = new BufferedReader(new FileReader(sorted));
String next;
while ((next = verifyReader.readLine()) != null) {
String nextFromStratoSort = sortedData.next("");
Assert.assertNotNull(nextFromStratoSort);
Assert.assertEquals(next, nextFromStratoSort);
}
} finally {
if (reader != null) {
reader.close();
}
if (verifyReader != null) {
verifyReader.close();
}
if (sorter != null) {
sorter.close();
}
}
} catch (Exception e) {
System.err.println(e.getMessage());
e.printStackTrace();
Assert.fail(e.getMessage());
} finally {
if (input != null) {
input.delete();
}
if (sorted != null) {
sorted.delete();
}
}
}
use of org.apache.flink.api.java.typeutils.runtime.RuntimeSerializerFactory in project flink by apache.
the class MassiveStringSorting method testStringTuplesSorting.
@SuppressWarnings("unchecked")
public void testStringTuplesSorting() {
final int NUM_STRINGS = 300000;
File input = null;
File sorted = null;
try {
// the source file
input = generateFileWithStringTuples(NUM_STRINGS, "http://some-uri.com/that/is/a/common/prefix/to/all");
// the sorted file
sorted = File.createTempFile("sorted_strings", "txt");
String[] command = { "/bin/bash", "-c", "export LC_ALL=\"C\" && cat \"" + input.getAbsolutePath() + "\" | sort > \"" + sorted.getAbsolutePath() + "\"" };
Process p = null;
try {
p = Runtime.getRuntime().exec(command);
int retCode = p.waitFor();
if (retCode != 0) {
throw new Exception("Command failed with return code " + retCode);
}
p = null;
} finally {
if (p != null) {
p.destroy();
}
}
// sort the data
UnilateralSortMerger<Tuple2<String, String[]>> sorter = null;
BufferedReader reader = null;
BufferedReader verifyReader = null;
try {
MemoryManager mm = new MemoryManager(1024 * 1024, 1);
IOManager ioMan = new IOManagerAsync();
TupleTypeInfo<Tuple2<String, String[]>> typeInfo = (TupleTypeInfo<Tuple2<String, String[]>>) TypeInfoParser.<Tuple2<String, String[]>>parse("Tuple2<String, String[]>");
TypeSerializer<Tuple2<String, String[]>> serializer = typeInfo.createSerializer(new ExecutionConfig());
TypeComparator<Tuple2<String, String[]>> comparator = typeInfo.createComparator(new int[] { 0 }, new boolean[] { true }, 0, new ExecutionConfig());
reader = new BufferedReader(new FileReader(input));
MutableObjectIterator<Tuple2<String, String[]>> inputIterator = new StringTupleReaderMutableObjectIterator(reader);
sorter = new UnilateralSortMerger<Tuple2<String, String[]>>(mm, ioMan, inputIterator, new DummyInvokable(), new RuntimeSerializerFactory<Tuple2<String, String[]>>(serializer, (Class<Tuple2<String, String[]>>) (Class<?>) Tuple2.class), comparator, 1.0, 4, 0.8f, true, /* use large record handler */
false);
// use this part to verify that all if good when sorting in memory
// List<MemorySegment> memory = mm.allocatePages(new DummyInvokable(), mm.computeNumberOfPages(1024*1024*1024));
// NormalizedKeySorter<Tuple2<String, String[]>> nks = new NormalizedKeySorter<Tuple2<String,String[]>>(serializer, comparator, memory);
//
// {
// Tuple2<String, String[]> wi = new Tuple2<String, String[]>("", new String[0]);
// while ((wi = inputIterator.next(wi)) != null) {
// Assert.assertTrue(nks.write(wi));
// }
//
// new QuickSort().sort(nks);
// }
//
// MutableObjectIterator<Tuple2<String, String[]>> sortedData = nks.getIterator();
MutableObjectIterator<Tuple2<String, String[]>> sortedData = sorter.getIterator();
reader.close();
// verify
verifyReader = new BufferedReader(new FileReader(sorted));
MutableObjectIterator<Tuple2<String, String[]>> verifyIterator = new StringTupleReaderMutableObjectIterator(verifyReader);
Tuple2<String, String[]> next = new Tuple2<String, String[]>("", new String[0]);
Tuple2<String, String[]> nextFromStratoSort = new Tuple2<String, String[]>("", new String[0]);
int num = 0;
while ((next = verifyIterator.next(next)) != null) {
num++;
nextFromStratoSort = sortedData.next(nextFromStratoSort);
Assert.assertNotNull(nextFromStratoSort);
Assert.assertEquals(next.f0, nextFromStratoSort.f0);
Assert.assertArrayEquals(next.f1, nextFromStratoSort.f1);
}
Assert.assertNull(sortedData.next(nextFromStratoSort));
Assert.assertEquals(NUM_STRINGS, num);
} finally {
if (reader != null) {
reader.close();
}
if (verifyReader != null) {
verifyReader.close();
}
if (sorter != null) {
sorter.close();
}
}
} catch (Exception e) {
System.err.println(e.getMessage());
e.printStackTrace();
Assert.fail(e.getMessage());
} finally {
if (input != null) {
input.delete();
}
if (sorted != null) {
sorted.delete();
}
}
}
use of org.apache.flink.api.java.typeutils.runtime.RuntimeSerializerFactory in project flink by apache.
the class MassiveStringValueSorting method testStringValueTuplesSorting.
@SuppressWarnings("unchecked")
public void testStringValueTuplesSorting() {
final int NUM_STRINGS = 300000;
File input = null;
File sorted = null;
try {
// the source file
input = generateFileWithStringTuples(NUM_STRINGS, "http://some-uri.com/that/is/a/common/prefix/to/all");
// the sorted file
sorted = File.createTempFile("sorted_strings", "txt");
String[] command = { "/bin/bash", "-c", "export LC_ALL=\"C\" && cat \"" + input.getAbsolutePath() + "\" | sort > \"" + sorted.getAbsolutePath() + "\"" };
Process p = null;
try {
p = Runtime.getRuntime().exec(command);
int retCode = p.waitFor();
if (retCode != 0) {
throw new Exception("Command failed with return code " + retCode);
}
p = null;
} finally {
if (p != null) {
p.destroy();
}
}
// sort the data
UnilateralSortMerger<Tuple2<StringValue, StringValue[]>> sorter = null;
BufferedReader reader = null;
BufferedReader verifyReader = null;
try {
MemoryManager mm = new MemoryManager(1024 * 1024, 1);
IOManager ioMan = new IOManagerAsync();
TupleTypeInfo<Tuple2<StringValue, StringValue[]>> typeInfo = (TupleTypeInfo<Tuple2<StringValue, StringValue[]>>) TypeInfoParser.<Tuple2<StringValue, StringValue[]>>parse("Tuple2<org.apache.flink.types.StringValue, org.apache.flink.types.StringValue[]>");
TypeSerializer<Tuple2<StringValue, StringValue[]>> serializer = typeInfo.createSerializer(new ExecutionConfig());
TypeComparator<Tuple2<StringValue, StringValue[]>> comparator = typeInfo.createComparator(new int[] { 0 }, new boolean[] { true }, 0, new ExecutionConfig());
reader = new BufferedReader(new FileReader(input));
MutableObjectIterator<Tuple2<StringValue, StringValue[]>> inputIterator = new StringValueTupleReaderMutableObjectIterator(reader);
sorter = new UnilateralSortMerger<Tuple2<StringValue, StringValue[]>>(mm, ioMan, inputIterator, new DummyInvokable(), new RuntimeSerializerFactory<Tuple2<StringValue, StringValue[]>>(serializer, (Class<Tuple2<StringValue, StringValue[]>>) (Class<?>) Tuple2.class), comparator, 1.0, 4, 0.8f, true, /* use large record handler */
false);
// use this part to verify that all if good when sorting in memory
// List<MemorySegment> memory = mm.allocatePages(new DummyInvokable(), mm.computeNumberOfPages(1024*1024*1024));
// NormalizedKeySorter<Tuple2<String, String[]>> nks = new NormalizedKeySorter<Tuple2<String,String[]>>(serializer, comparator, memory);
//
// {
// Tuple2<String, String[]> wi = new Tuple2<String, String[]>("", new String[0]);
// while ((wi = inputIterator.next(wi)) != null) {
// Assert.assertTrue(nks.write(wi));
// }
//
// new QuickSort().sort(nks);
// }
//
// MutableObjectIterator<Tuple2<String, String[]>> sortedData = nks.getIterator();
MutableObjectIterator<Tuple2<StringValue, StringValue[]>> sortedData = sorter.getIterator();
reader.close();
// verify
verifyReader = new BufferedReader(new FileReader(sorted));
MutableObjectIterator<Tuple2<StringValue, StringValue[]>> verifyIterator = new StringValueTupleReaderMutableObjectIterator(verifyReader);
Tuple2<StringValue, StringValue[]> nextVerify = new Tuple2<StringValue, StringValue[]>(new StringValue(), new StringValue[0]);
Tuple2<StringValue, StringValue[]> nextFromFlinkSort = new Tuple2<StringValue, StringValue[]>(new StringValue(), new StringValue[0]);
int num = 0;
while ((nextVerify = verifyIterator.next(nextVerify)) != null) {
num++;
nextFromFlinkSort = sortedData.next(nextFromFlinkSort);
Assert.assertNotNull(nextFromFlinkSort);
Assert.assertEquals(nextVerify.f0, nextFromFlinkSort.f0);
Assert.assertArrayEquals(nextVerify.f1, nextFromFlinkSort.f1);
}
Assert.assertNull(sortedData.next(nextFromFlinkSort));
Assert.assertEquals(NUM_STRINGS, num);
} finally {
if (reader != null) {
reader.close();
}
if (verifyReader != null) {
verifyReader.close();
}
if (sorter != null) {
sorter.close();
}
}
} catch (Exception e) {
System.err.println(e.getMessage());
e.printStackTrace();
Assert.fail(e.getMessage());
} finally {
if (input != null) {
//noinspection ResultOfMethodCallIgnored
input.delete();
}
if (sorted != null) {
//noinspection ResultOfMethodCallIgnored
sorted.delete();
}
}
}
use of org.apache.flink.api.java.typeutils.runtime.RuntimeSerializerFactory in project flink by apache.
the class MassiveStringValueSorting method testStringValueSorting.
public void testStringValueSorting() {
File input = null;
File sorted = null;
try {
// the source file
input = generateFileWithStrings(300000, "http://some-uri.com/that/is/a/common/prefix/to/all");
// the sorted file
sorted = File.createTempFile("sorted_strings", "txt");
String[] command = { "/bin/bash", "-c", "export LC_ALL=\"C\" && cat \"" + input.getAbsolutePath() + "\" | sort > \"" + sorted.getAbsolutePath() + "\"" };
Process p = null;
try {
p = Runtime.getRuntime().exec(command);
int retCode = p.waitFor();
if (retCode != 0) {
throw new Exception("Command failed with return code " + retCode);
}
p = null;
} finally {
if (p != null) {
p.destroy();
}
}
// sort the data
UnilateralSortMerger<StringValue> sorter = null;
BufferedReader reader = null;
BufferedReader verifyReader = null;
try {
MemoryManager mm = new MemoryManager(1024 * 1024, 1);
IOManager ioMan = new IOManagerAsync();
TypeSerializer<StringValue> serializer = new CopyableValueSerializer<StringValue>(StringValue.class);
TypeComparator<StringValue> comparator = new CopyableValueComparator<StringValue>(true, StringValue.class);
reader = new BufferedReader(new FileReader(input));
MutableObjectIterator<StringValue> inputIterator = new StringValueReaderMutableObjectIterator(reader);
sorter = new UnilateralSortMerger<StringValue>(mm, ioMan, inputIterator, new DummyInvokable(), new RuntimeSerializerFactory<StringValue>(serializer, StringValue.class), comparator, 1.0, 4, 0.8f, true, /* use large record handler */
true);
MutableObjectIterator<StringValue> sortedData = sorter.getIterator();
reader.close();
// verify
verifyReader = new BufferedReader(new FileReader(sorted));
String nextVerify;
StringValue nextFromFlinkSort = new StringValue();
while ((nextVerify = verifyReader.readLine()) != null) {
nextFromFlinkSort = sortedData.next(nextFromFlinkSort);
Assert.assertNotNull(nextFromFlinkSort);
Assert.assertEquals(nextVerify, nextFromFlinkSort.getValue());
}
} finally {
if (reader != null) {
reader.close();
}
if (verifyReader != null) {
verifyReader.close();
}
if (sorter != null) {
sorter.close();
}
}
} catch (Exception e) {
System.err.println(e.getMessage());
e.printStackTrace();
Assert.fail(e.getMessage());
} finally {
if (input != null) {
//noinspection ResultOfMethodCallIgnored
input.delete();
}
if (sorted != null) {
//noinspection ResultOfMethodCallIgnored
sorted.delete();
}
}
}
Aggregations