Search in sources :

Example 1 with WrappedArray

use of scala.collection.mutable.WrappedArray in project Gaffer by gchq.

the class AggregateDataForGroupTest method aggregateDataForGroupTest.

@Test
public void aggregateDataForGroupTest(@TempDir java.nio.file.Path tempDir) throws Exception {
    // Given
    final SchemaUtils schemaUtils = new SchemaUtils(TestUtils.gafferSchema("schemaUsingLongVertexType"));
    final String file1 = tempDir.resolve("inputdata1.parquet").toString();
    final String file2 = tempDir.resolve("inputdata2.parquet").toString();
    generateData(file1, schemaUtils);
    generateData(file2, schemaUtils);
    final SparkSession sparkSession = SparkSessionProvider.getSparkSession();
    final List<String> inputFiles = new ArrayList<>(Sets.newHashSet(file1, file2));
    final String outputFolder = tempDir.resolve("aggregated").toString();
    final AggregateDataForGroup aggregator = new AggregateDataForGroup(FileSystem.get(new Configuration()), schemaUtils, TestGroups.ENTITY, inputFiles, outputFolder, sparkSession);
    // When
    aggregator.call();
    // Then
    final FileSystem fs = FileSystem.get(new Configuration());
    assertTrue(fs.exists(new Path(outputFolder)));
    final Row[] results = (Row[]) sparkSession.read().parquet(outputFolder).sort(ParquetStore.VERTEX).collect();
    for (int i = 0; i < 20; i++) {
        assertEquals((long) i, (long) results[i].getAs(ParquetStore.VERTEX));
        assertEquals('b', ((byte[]) results[i].getAs("byte"))[0]);
        assertEquals(14f, results[i].getAs("float"), 0.01f);
        assertEquals(11L * 2 * i, (long) results[i].getAs("long"));
        assertEquals(26, (int) results[i].getAs("short"));
        assertEquals(TestUtils.DATE.getTime(), (long) results[i].getAs("date"));
        assertEquals(4, (int) results[i].getAs("count"));
        assertArrayEquals(new String[] { "A", "B", "C" }, (String[]) ((WrappedArray<String>) results[i].getAs("treeSet")).array());
        final FreqMap mergedFreqMap = new FreqMap();
        mergedFreqMap.put("A", 4L);
        mergedFreqMap.put("B", 2L);
        mergedFreqMap.put("C", 2L);
        assertEquals(JavaConversions$.MODULE$.mapAsScalaMap(mergedFreqMap), results[i].getAs("freqMap"));
    }
}
Also used : Path(org.apache.hadoop.fs.Path) SparkSession(org.apache.spark.sql.SparkSession) Configuration(org.apache.hadoop.conf.Configuration) FreqMap(uk.gov.gchq.gaffer.types.FreqMap) AggregateDataForGroup(uk.gov.gchq.gaffer.parquetstore.operation.handler.utilities.AggregateDataForGroup) ArrayList(java.util.ArrayList) WrappedArray(scala.collection.mutable.WrappedArray) FileSystem(org.apache.hadoop.fs.FileSystem) Row(org.apache.spark.sql.Row) Test(org.junit.jupiter.api.Test)

Example 2 with WrappedArray

use of scala.collection.mutable.WrappedArray in project Gaffer by gchq.

the class SortGroupSplitTest method sortTest.

@Test
public void sortTest(@TempDir java.nio.file.Path tempDir) throws IOException {
    // Given
    final FileSystem fs = FileSystem.get(new Configuration());
    final SparkSession sparkSession = SparkSessionProvider.getSparkSession();
    final String inputDir = Files.createDirectories(tempDir.resolve("input")).toString();
    final String outputDir = tempDir.resolve("output").toString();
    generateDate(inputDir);
    final List<String> sortColumns = new ArrayList<>();
    sortColumns.add(ParquetStore.VERTEX);
    sortColumns.add("date");
    // When
    new SortGroupSplit(fs, sparkSession, sortColumns, inputDir, outputDir, CompressionCodecName.GZIP).call();
    // Then
    // - Check output directory exists and contains one Parquet file
    assertTrue(fs.exists(new Path(outputDir)));
    final FileStatus[] outputFiles = fs.listStatus(new Path(outputDir), path1 -> path1.getName().endsWith(".parquet"));
    assertThat(outputFiles).hasSize(1);
    // - Read results and check in correct order
    final Row[] results = (Row[]) sparkSession.read().parquet(outputFiles[0].getPath().toString()).collect();
    for (int i = 0; i < 40; i++) {
        assertEquals((long) i / 2, (long) results[i].getAs(ParquetStore.VERTEX));
        assertEquals('b', ((byte[]) results[i].getAs("byte"))[0]);
        assertEquals(7f, results[i].getAs("float"), 0.01f);
        assertEquals(11L * (i / 2), (long) results[i].getAs("long"));
        assertEquals(13, (int) results[i].getAs("short"));
        if (i % 2 == 0) {
            assertEquals(new Date(100000L).getTime(), (long) results[i].getAs("date"));
        } else {
            assertEquals(new Date(200000L).getTime(), (long) results[i].getAs("date"));
        }
        assertEquals(2, (int) results[i].getAs("count"));
        assertArrayEquals(new String[] { "A", "B", "C" }, (String[]) ((WrappedArray<String>) results[i].getAs("treeSet")).array());
        assertEquals(JavaConversions$.MODULE$.mapAsScalaMap(TestUtils.MERGED_FREQMAP), results[i].getAs("freqMap"));
    }
}
Also used : Path(org.apache.hadoop.fs.Path) SparkSession(org.apache.spark.sql.SparkSession) FileStatus(org.apache.hadoop.fs.FileStatus) Configuration(org.apache.hadoop.conf.Configuration) ArrayList(java.util.ArrayList) Date(java.util.Date) SortGroupSplit(uk.gov.gchq.gaffer.parquetstore.operation.handler.utilities.SortGroupSplit) WrappedArray(scala.collection.mutable.WrappedArray) FileSystem(org.apache.hadoop.fs.FileSystem) Row(org.apache.spark.sql.Row) Test(org.junit.jupiter.api.Test)

Example 3 with WrappedArray

use of scala.collection.mutable.WrappedArray in project Gaffer by gchq.

the class AddElementsHandlerTest method testOnePartitionAllGroups.

@Test
public void testOnePartitionAllGroups(@TempDir java.nio.file.Path tempDir) throws IOException, OperationException, StoreException {
    // Given
    final List<Element> elementsToAdd = new ArrayList<>();
    // - Data for TestGroups.ENTITY
    elementsToAdd.addAll(AggregateAndSortDataTest.generateData());
    elementsToAdd.addAll(AggregateAndSortDataTest.generateData());
    // - Data for TestGroups.ENTITY_2
    elementsToAdd.add(WriteUnsortedDataTest.createEntityForEntityGroup_2(10000L));
    elementsToAdd.add(WriteUnsortedDataTest.createEntityForEntityGroup_2(100L));
    elementsToAdd.add(WriteUnsortedDataTest.createEntityForEntityGroup_2(10L));
    elementsToAdd.add(WriteUnsortedDataTest.createEntityForEntityGroup_2(1L));
    // - Data for TestGroups.EDGE
    elementsToAdd.add(WriteUnsortedDataTest.createEdgeForEdgeGroup(10000L, 1000L, true, new Date(100L)));
    elementsToAdd.add(WriteUnsortedDataTest.createEdgeForEdgeGroup(100L, 100000L, false, new Date(200L)));
    elementsToAdd.add(WriteUnsortedDataTest.createEdgeForEdgeGroup(1L, 10L, true, new Date(300L)));
    elementsToAdd.add(WriteUnsortedDataTest.createEdgeForEdgeGroup(1L, 10L, true, new Date(400L)));
    elementsToAdd.add(WriteUnsortedDataTest.createEdgeForEdgeGroup(1L, 10L, false, new Date(400L)));
    elementsToAdd.add(WriteUnsortedDataTest.createEdgeForEdgeGroup(1L, 2L, false, new Date(400L)));
    // - Data for TestGroups.EDGE_2
    elementsToAdd.add(WriteUnsortedDataTest.createEdgeForEdgeGroup_2(10000L, 20L, true));
    elementsToAdd.add(WriteUnsortedDataTest.createEdgeForEdgeGroup_2(100L, 200L, false));
    elementsToAdd.add(WriteUnsortedDataTest.createEdgeForEdgeGroup_2(10L, 50L, true));
    elementsToAdd.add(WriteUnsortedDataTest.createEdgeForEdgeGroup_2(1L, 2000L, false));
    // - Shuffle the list so that the order is random
    Collections.shuffle(elementsToAdd);
    final AddElements add = new AddElements.Builder().input(elementsToAdd).build();
    final Context context = new Context();
    final Schema schema = TestUtils.gafferSchema("schemaUsingLongVertexType");
    final ParquetStoreProperties storeProperties = new ParquetStoreProperties();
    final String testDir = tempDir.toString();
    storeProperties.setDataDir(testDir + "/data");
    storeProperties.setTempFilesDir(testDir + "/tmpdata");
    final ParquetStore store = (ParquetStore) ParquetStore.createStore("graphId", schema, storeProperties);
    final FileSystem fs = FileSystem.get(new Configuration());
    final SparkSession sparkSession = SparkSessionProvider.getSparkSession();
    // When
    new AddElementsHandler().doOperation(add, context, store);
    // Then
    // - New snapshot directory should have been created.
    final long snapshotId = store.getLatestSnapshot();
    final Path snapshotPath = new Path(testDir + "/data", ParquetStore.getSnapshotPath(snapshotId));
    assertTrue(fs.exists(snapshotPath));
    // - There should be 1 file named partition-0.parquet (and an associated .crc file) in the "group=BasicEntity"
    // directory.
    assertTrue(fs.exists(new Path(snapshotPath, ParquetStore.getGroupSubDir(TestGroups.ENTITY, false) + "/" + ParquetStore.getFile(0))));
    assertTrue(fs.exists(new Path(snapshotPath, ParquetStore.getGroupSubDir(TestGroups.ENTITY, false) + "/." + ParquetStore.getFile(0) + ".crc")));
    // - The files should contain the data sorted by vertex and date.
    Row[] results = (Row[]) sparkSession.read().parquet(new Path(snapshotPath, ParquetStore.getGroupSubDir(TestGroups.ENTITY, false) + "/" + ParquetStore.getFile(0)).toString()).collect();
    assertThat(results).hasSize(40);
    for (int i = 0; i < 40; i++) {
        assertEquals((long) i / 2, (long) results[i].getAs(ParquetStore.VERTEX));
        assertEquals(i % 2 == 0 ? 'b' : 'a', ((byte[]) results[i].getAs("byte"))[0]);
        assertEquals(i % 2 == 0 ? 8f : 6f, results[i].getAs("float"), 0.01f);
        assertEquals(11L * 2 * (i / 2), (long) results[i].getAs("long"));
        assertEquals(i % 2 == 0 ? 14 : 12, (int) results[i].getAs("short"));
        assertEquals(i % 2 == 0 ? 100000L : 200000L, (long) results[i].getAs("date"));
        assertEquals(2, (int) results[i].getAs("count"));
        assertArrayEquals(i % 2 == 0 ? new String[] { "A", "C" } : new String[] { "A", "B" }, (String[]) ((WrappedArray<String>) results[i].getAs("treeSet")).array());
        final FreqMap mergedFreqMap1 = new FreqMap();
        mergedFreqMap1.put("A", 2L);
        mergedFreqMap1.put("B", 2L);
        final FreqMap mergedFreqMap2 = new FreqMap();
        mergedFreqMap2.put("A", 2L);
        mergedFreqMap2.put("C", 2L);
        assertEquals(JavaConversions$.MODULE$.mapAsScalaMap(i % 2 == 0 ? mergedFreqMap2 : mergedFreqMap1), results[i].getAs("freqMap"));
    }
    // - There should be 1 file named partition-0.parquet (and an associated .crc file) in the "group=BasicEntity2"
    // directory.
    assertTrue(fs.exists(new Path(snapshotPath, ParquetStore.getGroupSubDir(TestGroups.ENTITY_2, false) + "/" + ParquetStore.getFile(0))));
    assertTrue(fs.exists(new Path(snapshotPath, ParquetStore.getGroupSubDir(TestGroups.ENTITY_2, false) + "/." + ParquetStore.getFile(0) + ".crc")));
    // - The files should contain the data sorted by vertex.
    results = (Row[]) sparkSession.read().parquet(new Path(snapshotPath, "graph/group=BasicEntity2/" + ParquetStore.getFile(0)).toString()).collect();
    assertThat(results).hasSize(4);
    checkEntityGroup2(WriteUnsortedDataTest.createEntityForEntityGroup_2(1L), results[0]);
    checkEntityGroup2(WriteUnsortedDataTest.createEntityForEntityGroup_2(10L), results[1]);
    checkEntityGroup2(WriteUnsortedDataTest.createEntityForEntityGroup_2(100L), results[2]);
    checkEntityGroup2(WriteUnsortedDataTest.createEntityForEntityGroup_2(10000L), results[3]);
    // - There should be 1 file named partition-0.parquet (and an associated .crc file) in the "group=BasicEdge"
    // directory and in the "reversed-group=BasicEdge" directory.
    assertTrue(fs.exists(new Path(snapshotPath, ParquetStore.getGroupSubDir(TestGroups.EDGE, false) + "/" + ParquetStore.getFile(0))));
    assertTrue(fs.exists(new Path(snapshotPath, ParquetStore.getGroupSubDir(TestGroups.EDGE, false) + "/." + ParquetStore.getFile(0) + ".crc")));
    assertTrue(fs.exists(new Path(snapshotPath, ParquetStore.getGroupSubDir(TestGroups.EDGE, true) + "/" + ParquetStore.getFile(0))));
    assertTrue(fs.exists(new Path(snapshotPath, ParquetStore.getGroupSubDir(TestGroups.EDGE, true) + "/." + ParquetStore.getFile(0) + ".crc")));
    // - The files should contain the data sorted by source, destination, directed, date
    results = (Row[]) sparkSession.read().parquet(new Path(snapshotPath, ParquetStore.getGroupSubDir(TestGroups.EDGE, false) + "/" + ParquetStore.getFile(0)).toString()).collect();
    assertThat(results).hasSize(6);
    checkEdge(WriteUnsortedDataTest.createEdgeForEdgeGroup(1L, 2L, false, new Date(400L)), results[0]);
    checkEdge(WriteUnsortedDataTest.createEdgeForEdgeGroup(1L, 10L, false, new Date(400L)), results[1]);
    checkEdge(WriteUnsortedDataTest.createEdgeForEdgeGroup(1L, 10L, true, new Date(300L)), results[2]);
    checkEdge(WriteUnsortedDataTest.createEdgeForEdgeGroup(1L, 10L, true, new Date(400L)), results[3]);
    checkEdge(WriteUnsortedDataTest.createEdgeForEdgeGroup(100L, 100000L, false, new Date(200L)), results[4]);
    checkEdge(WriteUnsortedDataTest.createEdgeForEdgeGroup(10000L, 1000L, true, new Date(100L)), results[5]);
    results = (Row[]) sparkSession.read().parquet(new Path(snapshotPath, ParquetStore.getGroupSubDir(TestGroups.EDGE, true) + "/" + ParquetStore.getFile(0)).toString()).collect();
    assertThat(results).hasSize(6);
    checkEdge(WriteUnsortedDataTest.createEdgeForEdgeGroup(1L, 2L, false, new Date(400L)), results[0]);
    checkEdge(WriteUnsortedDataTest.createEdgeForEdgeGroup(1L, 10L, false, new Date(400L)), results[1]);
    checkEdge(WriteUnsortedDataTest.createEdgeForEdgeGroup(1L, 10L, true, new Date(300L)), results[2]);
    checkEdge(WriteUnsortedDataTest.createEdgeForEdgeGroup(1L, 10L, true, new Date(400L)), results[3]);
    checkEdge(WriteUnsortedDataTest.createEdgeForEdgeGroup(10000L, 1000L, true, new Date(100L)), results[4]);
    checkEdge(WriteUnsortedDataTest.createEdgeForEdgeGroup(100L, 100000L, false, new Date(200L)), results[5]);
    // - There should be 1 file named partition-0.parquet (and an associated .crc file) in the "group=BasicEdge2"
    // directory and in the "reversed-group=BasicEdge2" directory.
    assertTrue(fs.exists(new Path(snapshotPath, ParquetStore.getGroupSubDir(TestGroups.EDGE_2, false) + "/" + ParquetStore.getFile(0))));
    assertTrue(fs.exists(new Path(snapshotPath, ParquetStore.getGroupSubDir(TestGroups.EDGE_2, false) + "/." + ParquetStore.getFile(0) + ".crc")));
    // - The files should contain the data sorted by source, destination, directed
    results = (Row[]) sparkSession.read().parquet(new Path(snapshotPath, ParquetStore.getGroupSubDir(TestGroups.EDGE_2, false) + "/" + ParquetStore.getFile(0)).toString()).collect();
    assertThat(results).hasSize(4);
    checkEdge(WriteUnsortedDataTest.createEdgeForEdgeGroup_2(1L, 2000L, false), results[0]);
    checkEdge(WriteUnsortedDataTest.createEdgeForEdgeGroup_2(10L, 50L, true), results[1]);
    checkEdge(WriteUnsortedDataTest.createEdgeForEdgeGroup_2(100L, 200L, false), results[2]);
    checkEdge(WriteUnsortedDataTest.createEdgeForEdgeGroup_2(10000L, 20L, true), results[3]);
    results = (Row[]) sparkSession.read().parquet(new Path(snapshotPath, ParquetStore.getGroupSubDir(TestGroups.EDGE_2, true) + "/" + ParquetStore.getFile(0)).toString()).collect();
    assertThat(results).hasSize(4);
    checkEdge(WriteUnsortedDataTest.createEdgeForEdgeGroup_2(10000L, 20L, true), results[0]);
    checkEdge(WriteUnsortedDataTest.createEdgeForEdgeGroup_2(10L, 50L, true), results[1]);
    checkEdge(WriteUnsortedDataTest.createEdgeForEdgeGroup_2(100L, 200L, false), results[2]);
    checkEdge(WriteUnsortedDataTest.createEdgeForEdgeGroup_2(1L, 2000L, false), results[3]);
}
Also used : AddElements(uk.gov.gchq.gaffer.operation.impl.add.AddElements) Context(uk.gov.gchq.gaffer.store.Context) ParquetStore(uk.gov.gchq.gaffer.parquetstore.ParquetStore) Path(org.apache.hadoop.fs.Path) SparkSession(org.apache.spark.sql.SparkSession) Configuration(org.apache.hadoop.conf.Configuration) FreqMap(uk.gov.gchq.gaffer.types.FreqMap) Element(uk.gov.gchq.gaffer.data.element.Element) Schema(uk.gov.gchq.gaffer.store.schema.Schema) ArrayList(java.util.ArrayList) Date(java.util.Date) WrappedArray(scala.collection.mutable.WrappedArray) ParquetStoreProperties(uk.gov.gchq.gaffer.parquetstore.ParquetStoreProperties) FileSystem(org.apache.hadoop.fs.FileSystem) Row(org.apache.spark.sql.Row) WriteUnsortedDataTest(uk.gov.gchq.gaffer.parquetstore.utils.WriteUnsortedDataTest) Test(org.junit.jupiter.api.Test) AggregateAndSortDataTest(uk.gov.gchq.gaffer.parquetstore.utils.AggregateAndSortDataTest)

Example 4 with WrappedArray

use of scala.collection.mutable.WrappedArray in project Gaffer by gchq.

the class GafferGroupObjectConverter method sparkRowToGafferObject.

/**
 * Extracts an object corresponding to column {@code gafferColumn} from the provided {@link GenericRowWithSchema}.
 *
 * @param gafferColumn the column to extract
 * @param row          the row to extract from
 * @return the extracted {@link Object}
 * @throws SerialisationException if the conversion from Parquet objects to the original object throws a
 *                                {@link SerialisationException}
 */
public Object sparkRowToGafferObject(final String gafferColumn, final Row row) throws SerialisationException {
    final ArrayList<Object> objectsList = new ArrayList<>();
    final String[] paths = columnToPaths.get(gafferColumn);
    if (paths[0].contains(".")) {
        final Object nestedRow = row.getAs(gafferColumn);
        if (null != nestedRow) {
            if (nestedRow instanceof GenericRowWithSchema) {
                getObjectsFromNestedRow(objectsList, (GenericRowWithSchema) nestedRow);
            } else if (nestedRow instanceof WrappedArray) {
                objectsList.add(((WrappedArray) nestedRow).array());
            } else if (nestedRow instanceof scala.collection.Map) {
                objectsList.add(scala.collection.JavaConversions.mapAsJavaMap((scala.collection.Map) nestedRow));
            } else if (nestedRow instanceof Object[]) {
                objectsList.add(nestedRow);
            } else {
                throw new SerialisationException("sparkRowToGafferObject does not know how to deal with a " + nestedRow.getClass().getCanonicalName());
            }
        } else {
            objectsList.add(null);
        }
    } else {
        for (final String path : paths) {
            final Object obj = row.getAs(path);
            objectsList.add(obj);
        }
    }
    final Object[] objects;
    if (paths[0].endsWith("key_value.key")) {
        objects = new Object[1];
    } else {
        objects = new Object[paths.length];
    }
    objectsList.toArray(objects);
    final Object gafferObject = parquetObjectsToGafferObject(gafferColumn, objects);
    if (null == gafferObject) {
        LOGGER.debug("Failed to get the Gaffer Object from the Spark Row for the column: {}", gafferColumn);
    }
    return gafferObject;
}
Also used : SerialisationException(uk.gov.gchq.gaffer.exception.SerialisationException) WrappedArray(scala.collection.mutable.WrappedArray) ArrayList(java.util.ArrayList) GenericRowWithSchema(org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema) HashMap(java.util.HashMap) Map(java.util.Map)

Example 5 with WrappedArray

use of scala.collection.mutable.WrappedArray in project Gaffer by gchq.

the class AggregateAndSortDataTest method test.

@Test
public void test(@TempDir java.nio.file.Path tempDir) throws Exception {
    // Given
    final FileSystem fs = FileSystem.get(new Configuration());
    final SchemaUtils schemaUtils = new SchemaUtils(TestUtils.gafferSchema("schemaUsingLongVertexType"));
    final String file1 = tempDir.resolve("inputdata1.parquet").toString();
    final String file2 = tempDir.resolve("inputdata2.parquet").toString();
    writeData(file1, schemaUtils);
    writeData(file2, schemaUtils);
    final SparkSession sparkSession = SparkSessionProvider.getSparkSession();
    final List<String> inputFiles = new ArrayList<>(Sets.newHashSet(file1, file2));
    final String outputFolder = tempDir.resolve("aggregated").toString();
    // When
    new AggregateAndSortData(schemaUtils, fs, inputFiles, outputFolder, TestGroups.ENTITY, "test", false, CompressionCodecName.GZIP, sparkSession).call();
    // Then
    assertTrue(fs.exists(new Path(outputFolder)));
    final Row[] results = (Row[]) sparkSession.read().parquet(outputFolder).collect();
    // Should be sorted by vertex and date
    for (int i = 0; i < 40; i++) {
        assertEquals((long) i / 2, (long) results[i].getAs(ParquetStore.VERTEX));
        assertEquals(i % 2 == 0 ? 'b' : 'a', ((byte[]) results[i].getAs("byte"))[0]);
        assertEquals(i % 2 == 0 ? 8f : 6f, results[i].getAs("float"), 0.01f);
        assertEquals(11L * 2 * (i / 2), (long) results[i].getAs("long"));
        assertEquals(i % 2 == 0 ? 14 : 12, (int) results[i].getAs("short"));
        assertEquals(i % 2 == 0 ? 100000L : 200000L, (long) results[i].getAs("date"));
        assertEquals(2, (int) results[i].getAs("count"));
        assertArrayEquals(i % 2 == 0 ? new String[] { "A", "C" } : new String[] { "A", "B" }, (String[]) ((WrappedArray<String>) results[i].getAs("treeSet")).array());
        final FreqMap mergedFreqMap1 = new FreqMap();
        mergedFreqMap1.put("A", 2L);
        mergedFreqMap1.put("B", 2L);
        final FreqMap mergedFreqMap2 = new FreqMap();
        mergedFreqMap2.put("A", 2L);
        mergedFreqMap2.put("C", 2L);
        assertEquals(JavaConversions$.MODULE$.mapAsScalaMap(i % 2 == 0 ? mergedFreqMap2 : mergedFreqMap1), results[i].getAs("freqMap"));
    }
}
Also used : Path(org.apache.hadoop.fs.Path) SparkSession(org.apache.spark.sql.SparkSession) Configuration(org.apache.hadoop.conf.Configuration) FreqMap(uk.gov.gchq.gaffer.types.FreqMap) ArrayList(java.util.ArrayList) AggregateAndSortData(uk.gov.gchq.gaffer.parquetstore.operation.handler.utilities.AggregateAndSortData) WrappedArray(scala.collection.mutable.WrappedArray) FileSystem(org.apache.hadoop.fs.FileSystem) Row(org.apache.spark.sql.Row) Test(org.junit.jupiter.api.Test)

Aggregations

ArrayList (java.util.ArrayList)7 WrappedArray (scala.collection.mutable.WrappedArray)7 Configuration (org.apache.hadoop.conf.Configuration)6 FileSystem (org.apache.hadoop.fs.FileSystem)6 Path (org.apache.hadoop.fs.Path)6 Row (org.apache.spark.sql.Row)6 SparkSession (org.apache.spark.sql.SparkSession)6 Test (org.junit.jupiter.api.Test)6 FreqMap (uk.gov.gchq.gaffer.types.FreqMap)5 Date (java.util.Date)3 Element (uk.gov.gchq.gaffer.data.element.Element)3 AddElements (uk.gov.gchq.gaffer.operation.impl.add.AddElements)3 ParquetStore (uk.gov.gchq.gaffer.parquetstore.ParquetStore)3 ParquetStoreProperties (uk.gov.gchq.gaffer.parquetstore.ParquetStoreProperties)3 AggregateAndSortDataTest (uk.gov.gchq.gaffer.parquetstore.utils.AggregateAndSortDataTest)3 WriteUnsortedDataTest (uk.gov.gchq.gaffer.parquetstore.utils.WriteUnsortedDataTest)3 Context (uk.gov.gchq.gaffer.store.Context)3 Schema (uk.gov.gchq.gaffer.store.schema.Schema)3 HashMap (java.util.HashMap)1 Map (java.util.Map)1