Search in sources :

Example 6 with ArrayBuffer

use of scala.collection.mutable.ArrayBuffer in project Gaffer by gchq.

the class ImportKeyValuePairRDDToAccumuloHandlerTest method checkImportRDDOfElements.

@Test
public void checkImportRDDOfElements(@TempDir Path tempDir) throws OperationException, IOException {
    final Graph graph1 = new Graph.Builder().config(new GraphConfig.Builder().graphId("graphId").build()).addSchema(getClass().getResourceAsStream("/schema/elements.json")).addSchema(getClass().getResourceAsStream("/schema/types.json")).addSchema(getClass().getResourceAsStream("/schema/serialisation.json")).storeProperties(PROPERTIES).build();
    final ArrayBuffer<Element> elements = new ArrayBuffer<>();
    for (int i = 0; i < 10; i++) {
        final Entity entity = new Entity.Builder().group(TestGroups.ENTITY).vertex("" + i).build();
        final Edge edge1 = new Edge.Builder().group(TestGroups.EDGE).source("" + i).dest("B").directed(false).property(TestPropertyNames.COUNT, 2).build();
        final Edge edge2 = new Edge.Builder().group(TestGroups.EDGE).source("" + i).dest("C").directed(false).property(TestPropertyNames.COUNT, 4).build();
        elements.$plus$eq(edge1);
        elements.$plus$eq(edge2);
        elements.$plus$eq(entity);
    }
    final User user = new User();
    final SparkSession sparkSession = SparkSessionProvider.getSparkSession();
    // Create Hadoop configuration and serialise to a string
    final Configuration configuration = new Configuration();
    final String configurationString = AbstractGetRDDHandler.convertConfigurationToString(configuration);
    final String outputPath = tempDir.resolve("output").toAbsolutePath().toString();
    final String failurePath = tempDir.resolve("failure").toAbsolutePath().toString();
    final ElementConverterFunction func = new ElementConverterFunction(sparkSession.sparkContext().broadcast(new ByteEntityAccumuloElementConverter(graph1.getSchema()), ACCUMULO_ELEMENT_CONVERTER_CLASS_TAG));
    final RDD<Tuple2<Key, Value>> elementRDD = sparkSession.sparkContext().parallelize(elements, 1, ELEMENT_CLASS_TAG).flatMap(func, TUPLE2_CLASS_TAG);
    final ImportKeyValuePairRDDToAccumulo addRdd = new ImportKeyValuePairRDDToAccumulo.Builder().input(elementRDD).outputPath(outputPath).failurePath(failurePath).build();
    graph1.execute(addRdd, user);
    // Check all elements were added
    final GetRDDOfAllElements rddQuery = new GetRDDOfAllElements.Builder().option(AbstractGetRDDHandler.HADOOP_CONFIGURATION_KEY, configurationString).build();
    final RDD<Element> rdd = graph1.execute(rddQuery, user);
    if (rdd == null) {
        fail("No RDD returned");
    }
    final Set<Element> results = new HashSet<>();
    final Element[] returnedElements = (Element[]) rdd.collect();
    Collections.addAll(results, returnedElements);
    assertEquals(elements.size(), results.size());
}
Also used : Entity(uk.gov.gchq.gaffer.data.element.Entity) User(uk.gov.gchq.gaffer.user.User) SparkSession(org.apache.spark.sql.SparkSession) Configuration(org.apache.hadoop.conf.Configuration) Element(uk.gov.gchq.gaffer.data.element.Element) Graph(uk.gov.gchq.gaffer.graph.Graph) GetRDDOfAllElements(uk.gov.gchq.gaffer.spark.operation.scalardd.GetRDDOfAllElements) Tuple2(scala.Tuple2) ImportKeyValuePairRDDToAccumulo(uk.gov.gchq.gaffer.sparkaccumulo.operation.scalardd.ImportKeyValuePairRDDToAccumulo) ArrayBuffer(scala.collection.mutable.ArrayBuffer) ByteEntityAccumuloElementConverter(uk.gov.gchq.gaffer.accumulostore.key.core.impl.byteEntity.ByteEntityAccumuloElementConverter) ElementConverterFunction(uk.gov.gchq.gaffer.sparkaccumulo.operation.utils.scala.ElementConverterFunction) Edge(uk.gov.gchq.gaffer.data.element.Edge) HashSet(java.util.HashSet) Test(org.junit.jupiter.api.Test)

Example 7 with ArrayBuffer

use of scala.collection.mutable.ArrayBuffer in project Gaffer by gchq.

the class SplitStoreFromRDDOfElementsHandlerTest method createElements.

private ArrayBuffer<Element> createElements() {
    final ArrayBuffer<Element> elements = new ArrayBuffer<>();
    for (int i = 0; i < 10; i++) {
        final Entity entity = new Entity.Builder().group(TestGroups.ENTITY).vertex("" + i).build();
        final Edge edge1 = new Edge.Builder().group(TestGroups.EDGE).source("" + i).dest("B").directed(false).property(TestPropertyNames.COUNT, 2).build();
        final Edge edge2 = new Edge.Builder().group(TestGroups.EDGE).source("" + i).dest("C").directed(false).property(TestPropertyNames.COUNT, 4).build();
        elements.$plus$eq(edge1);
        elements.$plus$eq(edge2);
        elements.$plus$eq(entity);
    }
    return elements;
}
Also used : Entity(uk.gov.gchq.gaffer.data.element.Entity) Element(uk.gov.gchq.gaffer.data.element.Element) ArrayBuffer(scala.collection.mutable.ArrayBuffer) Edge(uk.gov.gchq.gaffer.data.element.Edge)

Example 8 with ArrayBuffer

use of scala.collection.mutable.ArrayBuffer in project flink by apache.

the class PythonCorrelateSplitRule method createNewFieldNames.

private List<String> createNewFieldNames(RelDataType rowType, RexBuilder rexBuilder, int primitiveFieldCount, ArrayBuffer<RexNode> extractedRexNodes, List<RexNode> calcProjects) {
    for (int i = 0; i < primitiveFieldCount; i++) {
        calcProjects.add(RexInputRef.of(i, rowType));
    }
    // change RexCorrelVariable to RexInputRef.
    RexDefaultVisitor<RexNode> visitor = new RexDefaultVisitor<RexNode>() {

        @Override
        public RexNode visitFieldAccess(RexFieldAccess fieldAccess) {
            RexNode expr = fieldAccess.getReferenceExpr();
            if (expr instanceof RexCorrelVariable) {
                RelDataTypeField field = fieldAccess.getField();
                return new RexInputRef(field.getIndex(), field.getType());
            } else {
                return rexBuilder.makeFieldAccess(expr.accept(this), fieldAccess.getField().getIndex());
            }
        }

        @Override
        public RexNode visitNode(RexNode rexNode) {
            return rexNode;
        }
    };
    // add the fields of the extracted rex calls.
    Iterator<RexNode> iterator = extractedRexNodes.iterator();
    while (iterator.hasNext()) {
        RexNode rexNode = iterator.next();
        if (rexNode instanceof RexCall) {
            RexCall rexCall = (RexCall) rexNode;
            List<RexNode> newProjects = rexCall.getOperands().stream().map(x -> x.accept(visitor)).collect(Collectors.toList());
            RexCall newRexCall = rexCall.clone(rexCall.getType(), newProjects);
            calcProjects.add(newRexCall);
        } else {
            calcProjects.add(rexNode);
        }
    }
    List<String> nameList = new LinkedList<>();
    for (int i = 0; i < primitiveFieldCount; i++) {
        nameList.add(rowType.getFieldNames().get(i));
    }
    Iterator<Object> indicesIterator = extractedRexNodes.indices().iterator();
    while (indicesIterator.hasNext()) {
        nameList.add("f" + indicesIterator.next());
    }
    return SqlValidatorUtil.uniquify(nameList, rexBuilder.getTypeFactory().getTypeSystem().isSchemaCaseSensitive());
}
Also used : RexFieldAccess(org.apache.calcite.rex.RexFieldAccess) RexProgram(org.apache.calcite.rex.RexProgram) RexUtil(org.apache.calcite.rex.RexUtil) SqlValidatorUtil(org.apache.calcite.sql.validate.SqlValidatorUtil) RexNode(org.apache.calcite.rex.RexNode) LinkedList(java.util.LinkedList) ArrayBuffer(scala.collection.mutable.ArrayBuffer) PythonUtil(org.apache.flink.table.planner.plan.utils.PythonUtil) RelDataType(org.apache.calcite.rel.type.RelDataType) RexDefaultVisitor(org.apache.flink.table.planner.plan.utils.RexDefaultVisitor) RexBuilder(org.apache.calcite.rex.RexBuilder) Iterator(scala.collection.Iterator) FlinkLogicalTableFunctionScan(org.apache.flink.table.planner.plan.nodes.logical.FlinkLogicalTableFunctionScan) FlinkLogicalCalc(org.apache.flink.table.planner.plan.nodes.logical.FlinkLogicalCalc) RelNode(org.apache.calcite.rel.RelNode) Collectors(java.util.stream.Collectors) RelOptRuleCall(org.apache.calcite.plan.RelOptRuleCall) RexInputRef(org.apache.calcite.rex.RexInputRef) RelOptRule(org.apache.calcite.plan.RelOptRule) RexProgramBuilder(org.apache.calcite.rex.RexProgramBuilder) List(java.util.List) StreamPhysicalCorrelateRule(org.apache.flink.table.planner.plan.rules.physical.stream.StreamPhysicalCorrelateRule) RelDataTypeField(org.apache.calcite.rel.type.RelDataTypeField) RexCorrelVariable(org.apache.calcite.rex.RexCorrelVariable) HepRelVertex(org.apache.calcite.plan.hep.HepRelVertex) FlinkLogicalCorrelate(org.apache.flink.table.planner.plan.nodes.logical.FlinkLogicalCorrelate) RexCall(org.apache.calcite.rex.RexCall) RexCorrelVariable(org.apache.calcite.rex.RexCorrelVariable) RexDefaultVisitor(org.apache.flink.table.planner.plan.utils.RexDefaultVisitor) LinkedList(java.util.LinkedList) RexCall(org.apache.calcite.rex.RexCall) RelDataTypeField(org.apache.calcite.rel.type.RelDataTypeField) RexInputRef(org.apache.calcite.rex.RexInputRef) RexFieldAccess(org.apache.calcite.rex.RexFieldAccess) RexNode(org.apache.calcite.rex.RexNode)

Example 9 with ArrayBuffer

use of scala.collection.mutable.ArrayBuffer in project flink by apache.

the class PythonCorrelateSplitRule method onMatch.

@Override
public void onMatch(RelOptRuleCall call) {
    FlinkLogicalCorrelate correlate = call.rel(0);
    RexBuilder rexBuilder = call.builder().getRexBuilder();
    RelNode left = ((HepRelVertex) correlate.getLeft()).getCurrentRel();
    RelNode right = ((HepRelVertex) correlate.getRight()).getCurrentRel();
    int primitiveLeftFieldCount = left.getRowType().getFieldCount();
    ArrayBuffer<RexNode> extractedRexNodes = new ArrayBuffer<>();
    RelNode rightNewInput;
    if (right instanceof FlinkLogicalTableFunctionScan) {
        FlinkLogicalTableFunctionScan scan = (FlinkLogicalTableFunctionScan) right;
        rightNewInput = createNewScan(scan, createScalarFunctionSplitter(null, rexBuilder, primitiveLeftFieldCount, extractedRexNodes, scan.getCall()));
    } else {
        FlinkLogicalCalc calc = (FlinkLogicalCalc) right;
        FlinkLogicalTableFunctionScan scan = StreamPhysicalCorrelateRule.getTableScan(calc);
        FlinkLogicalCalc mergedCalc = StreamPhysicalCorrelateRule.getMergedCalc(calc);
        FlinkLogicalTableFunctionScan newScan = createNewScan(scan, createScalarFunctionSplitter(null, rexBuilder, primitiveLeftFieldCount, extractedRexNodes, scan.getCall()));
        rightNewInput = mergedCalc.copy(mergedCalc.getTraitSet(), newScan, mergedCalc.getProgram());
    }
    FlinkLogicalCorrelate newCorrelate;
    if (extractedRexNodes.size() > 0) {
        FlinkLogicalCalc leftCalc = createNewLeftCalc(left, rexBuilder, extractedRexNodes, correlate);
        newCorrelate = new FlinkLogicalCorrelate(correlate.getCluster(), correlate.getTraitSet(), leftCalc, rightNewInput, correlate.getCorrelationId(), correlate.getRequiredColumns(), correlate.getJoinType());
    } else {
        newCorrelate = new FlinkLogicalCorrelate(correlate.getCluster(), correlate.getTraitSet(), left, rightNewInput, correlate.getCorrelationId(), correlate.getRequiredColumns(), correlate.getJoinType());
    }
    FlinkLogicalCalc newTopCalc = createTopCalc(primitiveLeftFieldCount, rexBuilder, extractedRexNodes, correlate.getRowType(), newCorrelate);
    call.transformTo(newTopCalc);
}
Also used : FlinkLogicalCorrelate(org.apache.flink.table.planner.plan.nodes.logical.FlinkLogicalCorrelate) HepRelVertex(org.apache.calcite.plan.hep.HepRelVertex) RelNode(org.apache.calcite.rel.RelNode) FlinkLogicalCalc(org.apache.flink.table.planner.plan.nodes.logical.FlinkLogicalCalc) RexBuilder(org.apache.calcite.rex.RexBuilder) FlinkLogicalTableFunctionScan(org.apache.flink.table.planner.plan.nodes.logical.FlinkLogicalTableFunctionScan) ArrayBuffer(scala.collection.mutable.ArrayBuffer) RexNode(org.apache.calcite.rex.RexNode)

Aggregations

ArrayBuffer (scala.collection.mutable.ArrayBuffer)9 Edge (uk.gov.gchq.gaffer.data.element.Edge)5 Element (uk.gov.gchq.gaffer.data.element.Element)5 Entity (uk.gov.gchq.gaffer.data.element.Entity)5 HashSet (java.util.HashSet)3 HepRelVertex (org.apache.calcite.plan.hep.HepRelVertex)3 RelNode (org.apache.calcite.rel.RelNode)3 RexBuilder (org.apache.calcite.rex.RexBuilder)3 RexNode (org.apache.calcite.rex.RexNode)3 FlinkLogicalCalc (org.apache.flink.table.planner.plan.nodes.logical.FlinkLogicalCalc)3 Configuration (org.apache.hadoop.conf.Configuration)3 Tuple2 (scala.Tuple2)3 Graph (uk.gov.gchq.gaffer.graph.Graph)3 GetRDDOfAllElements (uk.gov.gchq.gaffer.spark.operation.scalardd.GetRDDOfAllElements)3 User (uk.gov.gchq.gaffer.user.User)3 LinkedList (java.util.LinkedList)2 List (java.util.List)2 Collectors (java.util.stream.Collectors)2 RelOptRule (org.apache.calcite.plan.RelOptRule)2 RelOptRuleCall (org.apache.calcite.plan.RelOptRuleCall)2