use of uk.gov.gchq.gaffer.parquetstore.operation.handler.utilities.SortGroupSplit in project Gaffer by gchq.
the class SortGroupSplitTest method sortTest.
@Test
public void sortTest(@TempDir java.nio.file.Path tempDir) throws IOException {
// Given
final FileSystem fs = FileSystem.get(new Configuration());
final SparkSession sparkSession = SparkSessionProvider.getSparkSession();
final String inputDir = Files.createDirectories(tempDir.resolve("input")).toString();
final String outputDir = tempDir.resolve("output").toString();
generateDate(inputDir);
final List<String> sortColumns = new ArrayList<>();
sortColumns.add(ParquetStore.VERTEX);
sortColumns.add("date");
// When
new SortGroupSplit(fs, sparkSession, sortColumns, inputDir, outputDir, CompressionCodecName.GZIP).call();
// Then
// - Check output directory exists and contains one Parquet file
assertTrue(fs.exists(new Path(outputDir)));
final FileStatus[] outputFiles = fs.listStatus(new Path(outputDir), path1 -> path1.getName().endsWith(".parquet"));
assertThat(outputFiles).hasSize(1);
// - Read results and check in correct order
final Row[] results = (Row[]) sparkSession.read().parquet(outputFiles[0].getPath().toString()).collect();
for (int i = 0; i < 40; i++) {
assertEquals((long) i / 2, (long) results[i].getAs(ParquetStore.VERTEX));
assertEquals('b', ((byte[]) results[i].getAs("byte"))[0]);
assertEquals(7f, results[i].getAs("float"), 0.01f);
assertEquals(11L * (i / 2), (long) results[i].getAs("long"));
assertEquals(13, (int) results[i].getAs("short"));
if (i % 2 == 0) {
assertEquals(new Date(100000L).getTime(), (long) results[i].getAs("date"));
} else {
assertEquals(new Date(200000L).getTime(), (long) results[i].getAs("date"));
}
assertEquals(2, (int) results[i].getAs("count"));
assertArrayEquals(new String[] { "A", "B", "C" }, (String[]) ((WrappedArray<String>) results[i].getAs("treeSet")).array());
assertEquals(JavaConversions$.MODULE$.mapAsScalaMap(TestUtils.MERGED_FREQMAP), results[i].getAs("freqMap"));
}
}
Aggregations