use of uk.gov.gchq.gaffer.spark.operation.dataframe.ConvertElementToRow in project Gaffer by gchq.
the class AccumuloStoreRelationTest method testBuildScanWithView.
private void testBuildScanWithView(final String name, final View view, final Predicate<Element> returnElement) throws OperationException, StoreException {
// Given
final SQLContext sqlContext = getSqlContext(name);
final Schema schema = getSchema();
final AccumuloProperties properties = AccumuloProperties.loadStoreProperties(AccumuloStoreRelationTest.class.getResourceAsStream("/store.properties"));
final SingleUseMockAccumuloStore store = new SingleUseMockAccumuloStore();
store.initialise(schema, properties);
addElements(store);
// When
final AccumuloStoreRelation relation = new AccumuloStoreRelation(sqlContext, Collections.emptyList(), view, store, new User());
final RDD<Row> rdd = relation.buildScan();
final Row[] returnedElements = (Row[]) rdd.collect();
// Then
// - Actual results are:
final Set<Row> results = new HashSet<>();
for (int i = 0; i < returnedElements.length; i++) {
results.add(returnedElements[i]);
}
// - Expected results are:
final SchemaToStructTypeConverter schemaConverter = new SchemaToStructTypeConverter(schema, view, new ArrayList<>());
final ConvertElementToRow elementConverter = new ConvertElementToRow(schemaConverter.getUsedProperties(), schemaConverter.getPropertyNeedsConversion(), schemaConverter.getConverterByProperty());
final Set<Row> expectedRows = new HashSet<>();
StreamSupport.stream(getElements().spliterator(), false).filter(returnElement).map(elementConverter::apply).forEach(expectedRows::add);
assertEquals(expectedRows, results);
sqlContext.sparkContext().stop();
}
use of uk.gov.gchq.gaffer.spark.operation.dataframe.ConvertElementToRow in project Gaffer by gchq.
the class AccumuloStoreRelation method buildScan.
/**
* Creates a <code>DataFrame</code> of all {@link Element}s from the specified groups with columns that are not
* required filtered out.
* <p>
* Currently this does not push the projection down to the store (i.e. it should be implemented in an iterator,
* not in the transform). Issue 320 refers to this.
*
* @param requiredColumns The columns to return.
* @return An {@link RDD} of {@link Row}s containing the requested columns.
*/
@Override
public RDD<Row> buildScan(final String[] requiredColumns) {
try {
LOGGER.info("Building scan with required columns: {}", StringUtils.join(requiredColumns, ','));
LOGGER.info("Building GetRDDOfAllElements with view set to groups {}", StringUtils.join(groups, ','));
final GetRDDOfAllElements operation = new GetRDDOfAllElements(sqlContext.sparkContext());
operation.setView(view);
final RDD<Element> rdd = store.execute(operation, user);
return rdd.map(new ConvertElementToRow(new LinkedHashSet<>(Arrays.asList(requiredColumns)), propertyNeedsConversion, converterByProperty), ClassTagConstants.ROW_CLASS_TAG);
} catch (final OperationException e) {
LOGGER.error("OperationException while executing operation {}", e);
return null;
}
}
use of uk.gov.gchq.gaffer.spark.operation.dataframe.ConvertElementToRow in project Gaffer by gchq.
the class AccumuloStoreRelationTest method testBuildScanSpecifyColumnsAndFiltersWithView.
private void testBuildScanSpecifyColumnsAndFiltersWithView(final String name, final View view, final String[] requiredColumns, final Filter[] filters, final Predicate<Element> returnElement) throws OperationException, StoreException {
// Given
final SQLContext sqlContext = getSqlContext(name);
final Schema schema = getSchema();
final AccumuloProperties properties = AccumuloProperties.loadStoreProperties(getClass().getResourceAsStream("/store.properties"));
final SingleUseMockAccumuloStore store = new SingleUseMockAccumuloStore();
store.initialise(schema, properties);
addElements(store);
// When
final AccumuloStoreRelation relation = new AccumuloStoreRelation(sqlContext, Collections.emptyList(), view, store, new User());
final RDD<Row> rdd = relation.buildScan(requiredColumns, filters);
final Row[] returnedElements = (Row[]) rdd.collect();
// Then
// - Actual results are:
final Set<Row> results = new HashSet<>();
for (int i = 0; i < returnedElements.length; i++) {
results.add(returnedElements[i]);
}
// - Expected results are:
final SchemaToStructTypeConverter schemaConverter = new SchemaToStructTypeConverter(schema, view, new ArrayList<>());
final ConvertElementToRow elementConverter = new ConvertElementToRow(new LinkedHashSet<>(Arrays.asList(requiredColumns)), schemaConverter.getPropertyNeedsConversion(), schemaConverter.getConverterByProperty());
final Set<Row> expectedRows = new HashSet<>();
StreamSupport.stream(getElements().spliterator(), false).filter(returnElement).map(elementConverter::apply).forEach(expectedRows::add);
assertEquals(expectedRows, results);
sqlContext.sparkContext().stop();
}
use of uk.gov.gchq.gaffer.spark.operation.dataframe.ConvertElementToRow in project Gaffer by gchq.
the class AccumuloStoreRelationTest method testBuildScanSpecifyColumnsWithView.
private void testBuildScanSpecifyColumnsWithView(final String name, final View view, final String[] requiredColumns, final Predicate<Element> returnElement) throws OperationException, StoreException {
// Given
final SQLContext sqlContext = getSqlContext(name);
final Schema schema = getSchema();
final AccumuloProperties properties = AccumuloProperties.loadStoreProperties(getClass().getResourceAsStream("/store.properties"));
final SingleUseMockAccumuloStore store = new SingleUseMockAccumuloStore();
store.initialise(schema, properties);
addElements(store);
// When
final AccumuloStoreRelation relation = new AccumuloStoreRelation(sqlContext, Collections.emptyList(), view, store, new User());
final RDD<Row> rdd = relation.buildScan(requiredColumns);
final Row[] returnedElements = (Row[]) rdd.collect();
// Then
// - Actual results are:
final Set<Row> results = new HashSet<>();
for (int i = 0; i < returnedElements.length; i++) {
results.add(returnedElements[i]);
}
// - Expected results are:
final SchemaToStructTypeConverter schemaConverter = new SchemaToStructTypeConverter(schema, view, new ArrayList<>());
final ConvertElementToRow elementConverter = new ConvertElementToRow(new LinkedHashSet<>(Arrays.asList(requiredColumns)), schemaConverter.getPropertyNeedsConversion(), schemaConverter.getConverterByProperty());
final Set<Row> expectedRows = new HashSet<>();
StreamSupport.stream(getElements().spliterator(), false).filter(returnElement).map(elementConverter::apply).forEach(expectedRows::add);
assertEquals(expectedRows, results);
sqlContext.sparkContext().stop();
}
use of uk.gov.gchq.gaffer.spark.operation.dataframe.ConvertElementToRow in project Gaffer by gchq.
the class AccumuloStoreRelation method buildScan.
/**
* Creates a <code>DataFrame</code> of all {@link Element}s from the specified groups.
*
* @return An {@link RDD} of {@link Row}s containing {@link Element}s whose group is in <code>groups</code>.
*/
@Override
public RDD<Row> buildScan() {
try {
LOGGER.info("Building GetRDDOfAllElements with view set to groups {}", StringUtils.join(groups, ','));
final GetRDDOfAllElements operation = new GetRDDOfAllElements(sqlContext.sparkContext());
operation.setView(view);
final RDD<Element> rdd = store.execute(operation, user);
return rdd.map(new ConvertElementToRow(usedProperties, propertyNeedsConversion, converterByProperty), ClassTagConstants.ROW_CLASS_TAG);
} catch (final OperationException e) {
LOGGER.error("OperationException while executing operation: {}", e);
return null;
}
}
Aggregations