Search in sources :

Example 11 with GetDataFrameOfElements

use of uk.gov.gchq.gaffer.spark.operation.dataframe.GetDataFrameOfElements in project Gaffer by gchq.

the class GetDataFrameOfElementsHandlerTest method checkCanDealWithNonStandardProperties.

@Test
public void checkCanDealWithNonStandardProperties() throws OperationException {
    final Graph graph = getGraph("/schema-DataFrame/elementsNonstandardTypes.json", getElementsWithNonStandardProperties());
    // Edges group - check get correct edges
    GetDataFrameOfElements dfOperation = new GetDataFrameOfElements.Builder().view(new View.Builder().edge(EDGE_GROUP).build()).build();
    Dataset<Row> dataFrame = graph.execute(dfOperation, new User());
    Set<Row> results = new HashSet<>(dataFrame.collectAsList());
    final Set<Row> expectedRows = new HashSet<>();
    final MutableList<Object> fields1 = new MutableList<>();
    Map<String, Long> freqMap = Map$.MODULE$.empty();
    freqMap.put("Y", 1000L);
    freqMap.put("Z", 10000L);
    fields1.appendElem(EDGE_GROUP);
    fields1.appendElem("B");
    fields1.appendElem("C");
    fields1.appendElem(true);
    fields1.appendElem(null);
    fields1.appendElem(freqMap);
    final HyperLogLogPlus hllpp = new HyperLogLogPlus(5, 5);
    hllpp.offer("AAA");
    hllpp.offer("BBB");
    fields1.appendElem(hllpp.cardinality());
    expectedRows.add(Row$.MODULE$.fromSeq(fields1));
    assertEquals(expectedRows, results);
    // Entities group - check get correct entities
    dfOperation = new GetDataFrameOfElements.Builder().view(new View.Builder().entity(ENTITY_GROUP).build()).build();
    dataFrame = graph.execute(dfOperation, new User());
    results.clear();
    results.addAll(dataFrame.collectAsList());
    expectedRows.clear();
    fields1.clear();
    freqMap.clear();
    freqMap.put("W", 10L);
    freqMap.put("X", 100L);
    fields1.appendElem(ENTITY_GROUP);
    fields1.appendElem("A");
    fields1.appendElem(freqMap);
    final HyperLogLogPlus hllpp2 = new HyperLogLogPlus(5, 5);
    hllpp2.offer("AAA");
    fields1.appendElem(hllpp2.cardinality());
    expectedRows.add(Row$.MODULE$.fromSeq(fields1));
    assertEquals(expectedRows, results);
}
Also used : GetDataFrameOfElements(uk.gov.gchq.gaffer.spark.operation.dataframe.GetDataFrameOfElements) User(uk.gov.gchq.gaffer.user.User) View(uk.gov.gchq.gaffer.data.elementdefinition.view.View) Graph(uk.gov.gchq.gaffer.graph.Graph) MutableList(scala.collection.mutable.MutableList) HyperLogLogPlus(com.clearspring.analytics.stream.cardinality.HyperLogLogPlus) Row(org.apache.spark.sql.Row) HashSet(java.util.HashSet) Test(org.junit.jupiter.api.Test)

Example 12 with GetDataFrameOfElements

use of uk.gov.gchq.gaffer.spark.operation.dataframe.GetDataFrameOfElements in project Gaffer by gchq.

the class GetDataFrameOfElementsHandlerTest method checkGetCorrectElementsInDataFrame.

@Test
public void checkGetCorrectElementsInDataFrame() throws OperationException {
    final Graph graph = getGraph("/schema-DataFrame/elements.json", getElements());
    // Edges group - check get correct edges
    GetDataFrameOfElements dfOperation = new GetDataFrameOfElements.Builder().view(new View.Builder().edge(EDGE_GROUP).build()).build();
    Dataset<Row> dataFrame = graph.execute(dfOperation, new User());
    Set<Row> results = new HashSet<>(dataFrame.collectAsList());
    final Set<Row> expectedRows = new HashSet<>();
    for (int i = 0; i < NUM_ELEMENTS; i++) {
        final MutableList<Object> fields1 = new MutableList<>();
        fields1.appendElem(EDGE_GROUP);
        fields1.appendElem("" + i);
        fields1.appendElem("B");
        fields1.appendElem(true);
        fields1.appendElem(null);
        fields1.appendElem(1);
        fields1.appendElem(2);
        fields1.appendElem(3.0F);
        fields1.appendElem(4.0D);
        fields1.appendElem(5L);
        fields1.appendElem(100L);
        expectedRows.add(Row$.MODULE$.fromSeq(fields1));
        final MutableList<Object> fields2 = new MutableList<>();
        fields2.appendElem(EDGE_GROUP);
        fields2.appendElem("" + i);
        fields2.appendElem("C");
        fields2.appendElem(true);
        fields2.appendElem(null);
        fields2.appendElem(6);
        fields2.appendElem(7);
        fields2.appendElem(8.0F);
        fields2.appendElem(9.0D);
        fields2.appendElem(10L);
        fields2.appendElem(i * 200L);
        expectedRows.add(Row$.MODULE$.fromSeq(fields2));
    }
    assertEquals(expectedRows, results);
    // Entities group - check get correct entities
    dfOperation = new GetDataFrameOfElements.Builder().view(new View.Builder().entity(ENTITY_GROUP).build()).build();
    dataFrame = graph.execute(dfOperation, new User());
    results.clear();
    results.addAll(dataFrame.collectAsList());
    expectedRows.clear();
    for (int i = 0; i < NUM_ELEMENTS; i++) {
        final MutableList<Object> fields1 = new MutableList<>();
        fields1.clear();
        fields1.appendElem(ENTITY_GROUP);
        fields1.appendElem("" + i);
        fields1.appendElem(1);
        fields1.appendElem(i);
        fields1.appendElem(3.0F);
        fields1.appendElem(4.0D);
        fields1.appendElem(5L);
        fields1.appendElem(6);
        expectedRows.add(Row$.MODULE$.fromSeq(fields1));
    }
    assertEquals(expectedRows, results);
}
Also used : GetDataFrameOfElements(uk.gov.gchq.gaffer.spark.operation.dataframe.GetDataFrameOfElements) User(uk.gov.gchq.gaffer.user.User) View(uk.gov.gchq.gaffer.data.elementdefinition.view.View) Graph(uk.gov.gchq.gaffer.graph.Graph) MutableList(scala.collection.mutable.MutableList) Row(org.apache.spark.sql.Row) HashSet(java.util.HashSet) Test(org.junit.jupiter.api.Test)

Example 13 with GetDataFrameOfElements

use of uk.gov.gchq.gaffer.spark.operation.dataframe.GetDataFrameOfElements in project Gaffer by gchq.

the class GetDataFrameOfElementsHandlerTest method checkGetCorrectElementsInDataFrameWithProjection.

@Test
public void checkGetCorrectElementsInDataFrameWithProjection() throws OperationException {
    final Graph graph = getGraph("/schema-DataFrame/elements.json", getElements());
    // Get all edges
    final GetDataFrameOfElements dfOperation = new GetDataFrameOfElements.Builder().view(new View.Builder().edge(EDGE_GROUP).build()).build();
    final Dataset<Row> dataFrame = graph.execute(dfOperation, new User());
    // Check get correct rows when ask for src, dst and property2 columns
    Set<Row> results = new HashSet<>(dataFrame.select("src", "dst", "property2").collectAsList());
    final Set<Row> expectedRows = new HashSet<>();
    for (int i = 0; i < NUM_ELEMENTS; i++) {
        MutableList<Object> fields1 = new MutableList<>();
        fields1.appendElem("" + i);
        fields1.appendElem("B");
        fields1.appendElem(3.0F);
        expectedRows.add(Row$.MODULE$.fromSeq(fields1));
        MutableList<Object> fields2 = new MutableList<>();
        fields2.appendElem("" + i);
        fields2.appendElem("C");
        fields2.appendElem(8.0F);
        expectedRows.add(Row$.MODULE$.fromSeq(fields2));
    }
    assertEquals(expectedRows, results);
    // Check get correct rows when ask for just property2 column
    results = new HashSet<>(dataFrame.select("property2").collectAsList());
    expectedRows.clear();
    for (int i = 0; i < NUM_ELEMENTS; i++) {
        MutableList<Object> fields1 = new MutableList<>();
        fields1.appendElem(3.0F);
        expectedRows.add(Row$.MODULE$.fromSeq(fields1));
        MutableList<Object> fields2 = new MutableList<>();
        fields2.appendElem(8.0F);
        expectedRows.add(Row$.MODULE$.fromSeq(fields2));
    }
    assertEquals(expectedRows, results);
}
Also used : GetDataFrameOfElements(uk.gov.gchq.gaffer.spark.operation.dataframe.GetDataFrameOfElements) User(uk.gov.gchq.gaffer.user.User) Graph(uk.gov.gchq.gaffer.graph.Graph) MutableList(scala.collection.mutable.MutableList) Row(org.apache.spark.sql.Row) HashSet(java.util.HashSet) Test(org.junit.jupiter.api.Test)

Aggregations

GetDataFrameOfElements (uk.gov.gchq.gaffer.spark.operation.dataframe.GetDataFrameOfElements)13 Row (org.apache.spark.sql.Row)10 User (uk.gov.gchq.gaffer.user.User)10 Test (org.junit.jupiter.api.Test)8 View (uk.gov.gchq.gaffer.data.elementdefinition.view.View)8 Graph (uk.gov.gchq.gaffer.graph.Graph)8 HashSet (java.util.HashSet)7 MutableList (scala.collection.mutable.MutableList)7 HyperLogLogPlus (com.clearspring.analytics.stream.cardinality.HyperLogLogPlus)2 ArrayList (java.util.ArrayList)1 SparkSession (org.apache.spark.sql.SparkSession)1 ElementFilter (uk.gov.gchq.gaffer.data.element.function.ElementFilter)1 ViewElementDefinition (uk.gov.gchq.gaffer.data.elementdefinition.view.ViewElementDefinition)1 Converter (uk.gov.gchq.gaffer.spark.operation.dataframe.converter.property.Converter)1 IsMoreThan (uk.gov.gchq.koryphe.impl.predicate.IsMoreThan)1