use of org.apache.spark.Partition in project gatk by broadinstitute.
the class RangePartitionCoalescer method coalesce.
@Override
public PartitionGroup[] coalesce(int maxPartitions, RDD<?> parent) {
if (maxPartitions != parent.getNumPartitions()) {
throw new IllegalArgumentException("Cannot use " + getClass().getSimpleName() + " with a different number of partitions to the parent RDD.");
}
List<Partition> partitions = Arrays.asList(parent.getPartitions());
PartitionGroup[] groups = new PartitionGroup[partitions.size()];
for (int i = 0; i < partitions.size(); i++) {
Seq<String> preferredLocations = parent.getPreferredLocations(partitions.get(i));
scala.Option<String> preferredLocation = scala.Option.apply(preferredLocations.isEmpty() ? null : preferredLocations.apply(0));
PartitionGroup group = new PartitionGroup(preferredLocation);
List<Partition> partitionsInGroup = partitions.subList(i, maxEndPartitionIndexes.get(i) + 1);
group.partitions().append(JavaConversions.asScalaBuffer(partitionsInGroup));
groups[i] = group;
}
return groups;
}
use of org.apache.spark.Partition in project Gaffer by gchq.
the class RFileReaderIteratorTest method initWithNullAuthsDoesNotHaveNextIterator.
@Test
public void initWithNullAuthsDoesNotHaveNextIterator() {
final Partition partition = new AccumuloTablet(0, 0, "a", "b");
final TaskContext taskContext = mock(TaskContext.class);
final RFileReaderIterator iterator = new RFileReaderIterator(partition, taskContext, new Configuration(), null);
assertThat(iterator).isExhausted();
}
use of org.apache.spark.Partition in project Gaffer by gchq.
the class RFileReaderIteratorTest method initWithEmptyAccumuloTabletDoesNotHaveNextIterator.
@Test
public void initWithEmptyAccumuloTabletDoesNotHaveNextIterator() {
final Partition partition = new AccumuloTablet(0, 0, "a", "b");
final TaskContext taskContext = mock(TaskContext.class);
final Set<String> auths = new HashSet<>();
final RFileReaderIterator iterator = new RFileReaderIterator(partition, taskContext, new Configuration(), auths);
assertThat(iterator).isExhausted();
}
use of org.apache.spark.Partition in project Gaffer by gchq.
the class RFileReaderIteratorTest method initWithNullTaskContentShouldThrowNPE.
@Test
public void initWithNullTaskContentShouldThrowNPE() {
final Partition partition = new AccumuloTablet(0, 0, "a", "b");
final Set<String> auths = new HashSet<>();
assertThatExceptionOfType(NullPointerException.class).isThrownBy(() -> new RFileReaderIterator(partition, null, new Configuration(), auths));
}
use of org.apache.spark.Partition in project Gaffer by gchq.
the class RFileReaderRDD method getPartitions.
@Override
public Partition[] getPartitions() {
// Get connection
final Connector connector;
try {
final Instance instance = new ZooKeeperInstance(instanceName, zookeepers);
connector = instance.getConnector(user, new PasswordToken(password));
} catch (final AccumuloException | AccumuloSecurityException e) {
throw new RuntimeException("Exception connecting to Accumulo", e);
}
LOGGER.info("Obtained connection to instance {} as user {}", instanceName, user);
try {
// Check user has access
if (!checkAccess(connector, user, tableName)) {
throw new RuntimeException("User " + user + " does not have access to table " + tableName);
}
LOGGER.info("Confirmed user {} has access to table {}", user, tableName);
// Get id and split points for table
final String tableId = connector.tableOperations().tableIdMap().get(tableName);
final int numTablets = connector.tableOperations().listSplits(tableName).size() + 1;
LOGGER.info("Table {} has id {} and {} tablets", tableName, tableId, numTablets);
// Create map from tablet name to information about that tablet, including location of the RFiles
final Map<String, AccumuloTablet> tabletNameToInfo = createTabletMap(connector, user, tableId);
// Create array of partitions
final Partition[] partitions = new Partition[tabletNameToInfo.size()];
for (final AccumuloTablet accumuloTablet : tabletNameToInfo.values()) {
partitions[accumuloTablet.index()] = accumuloTablet;
}
LOGGER.info("Returning {} partitions", partitions.length);
return partitions;
} catch (final TableNotFoundException | AccumuloSecurityException | AccumuloException e) {
throw new RuntimeException("Exception creating partitions", e);
}
}
Aggregations