use of org.apache.cassandra.locator.Replica in project cassandra by apache.
the class LongStreamingTest method testStream.
private void testStream(boolean useSstableCompression) throws InvalidRequestException, IOException, ExecutionException, InterruptedException {
String KS = useSstableCompression ? "sstable_compression_ks" : "stream_compression_ks";
String TABLE = "table1";
File tempdir = new File(Files.createTempDir());
File dataDir = new File(tempdir.absolutePath() + File.pathSeparator() + KS + File.pathSeparator() + TABLE);
assert dataDir.tryCreateDirectories();
String schema = "CREATE TABLE " + KS + '.' + TABLE + " (" + " k int PRIMARY KEY," + " v1 text," + " v2 int" + ") with compression = " + (useSstableCompression ? "{'class': 'LZ4Compressor'};" : "{};");
String insert = "INSERT INTO " + KS + '.' + TABLE + " (k, v1, v2) VALUES (?, ?, ?)";
CQLSSTableWriter writer = CQLSSTableWriter.builder().sorted().inDirectory(dataDir).forTable(schema).using(insert).build();
CompressionParams compressionParams = Keyspace.open(KS).getColumnFamilyStore(TABLE).metadata().params.compression;
Assert.assertEquals(useSstableCompression, compressionParams.isEnabled());
long start = nanoTime();
for (int i = 0; i < 10_000_000; i++) writer.addRow(i, "test1", 24);
writer.close();
System.err.println(String.format("Writer finished after %d seconds....", TimeUnit.NANOSECONDS.toSeconds(nanoTime() - start)));
File[] dataFiles = dataDir.tryList((dir, name) -> name.endsWith("-Data.db"));
long dataSizeInBytes = 0l;
for (File file : dataFiles) {
System.err.println("File : " + file.absolutePath());
dataSizeInBytes += file.length();
}
SSTableLoader loader = new SSTableLoader(dataDir, new SSTableLoader.Client() {
private String ks;
public void init(String keyspace) {
for (Replica range : StorageService.instance.getLocalReplicas(KS)) addRangeForEndpoint(range.range(), FBUtilities.getBroadcastAddressAndPort());
this.ks = keyspace;
}
public TableMetadataRef getTableMetadata(String cfName) {
return Schema.instance.getTableMetadataRef(ks, cfName);
}
}, new OutputHandler.SystemOutput(false, false));
start = nanoTime();
loader.stream().get();
long millis = TimeUnit.NANOSECONDS.toMillis(nanoTime() - start);
System.err.println(String.format("Finished Streaming in %.2f seconds: %.2f MiBsec", millis / 1000d, (dataSizeInBytes / (1 << 20) / (millis / 1000d)) * 8));
// Stream again
loader = new SSTableLoader(dataDir, new SSTableLoader.Client() {
private String ks;
public void init(String keyspace) {
for (Replica range : StorageService.instance.getLocalReplicas(KS)) addRangeForEndpoint(range.range(), FBUtilities.getBroadcastAddressAndPort());
this.ks = keyspace;
}
public TableMetadataRef getTableMetadata(String cfName) {
return Schema.instance.getTableMetadataRef(ks, cfName);
}
}, new OutputHandler.SystemOutput(false, false));
start = nanoTime();
loader.stream().get();
millis = TimeUnit.NANOSECONDS.toMillis(nanoTime() - start);
System.err.println(String.format("Finished Streaming in %.2f seconds: %.2f MiBsec", millis / 1000d, (dataSizeInBytes / (1 << 20) / (millis / 1000d)) * 8));
// Compact them both
start = nanoTime();
Keyspace.open(KS).getColumnFamilyStore(TABLE).forceMajorCompaction();
millis = TimeUnit.NANOSECONDS.toMillis(nanoTime() - start);
System.err.println(String.format("Finished Compacting in %.2f seconds: %.2f MiBsec", millis / 1000d, (dataSizeInBytes * 2 / (1 << 20) / (millis / 1000d)) * 8));
UntypedResultSet rs = QueryProcessor.executeInternal("SELECT * FROM " + KS + '.' + TABLE + " limit 100;");
assertEquals(100, rs.size());
}
use of org.apache.cassandra.locator.Replica in project cassandra by apache.
the class CassandraStreamManager method createOutgoingStreams.
@Override
public Collection<OutgoingStream> createOutgoingStreams(StreamSession session, RangesAtEndpoint replicas, UUID pendingRepair, PreviewKind previewKind) {
Refs<SSTableReader> refs = new Refs<>();
try {
final List<Range<PartitionPosition>> keyRanges = new ArrayList<>(replicas.size());
for (Replica replica : replicas) keyRanges.add(Range.makeRowRange(replica.range()));
refs.addAll(cfs.selectAndReference(view -> {
Set<SSTableReader> sstables = Sets.newHashSet();
SSTableIntervalTree intervalTree = SSTableIntervalTree.build(view.select(SSTableSet.CANONICAL));
Predicate<SSTableReader> predicate;
if (previewKind.isPreview()) {
predicate = previewKind.predicate();
} else if (pendingRepair == ActiveRepairService.NO_PENDING_REPAIR) {
predicate = Predicates.alwaysTrue();
} else {
predicate = s -> s.isPendingRepair() && s.getSSTableMetadata().pendingRepair.equals(pendingRepair);
}
for (Range<PartitionPosition> keyRange : keyRanges) {
// still actually selecting what we wanted.
for (SSTableReader sstable : Iterables.filter(View.sstablesInBounds(keyRange.left, keyRange.right, intervalTree), predicate)) {
sstables.add(sstable);
}
}
if (logger.isDebugEnabled())
logger.debug("ViewFilter for {}/{} sstables", sstables.size(), Iterables.size(view.select(SSTableSet.CANONICAL)));
return sstables;
}).refs);
List<Range<Token>> normalizedFullRanges = Range.normalize(replicas.onlyFull().ranges());
List<Range<Token>> normalizedAllRanges = Range.normalize(replicas.ranges());
// Create outgoing file streams for ranges possibly skipping repaired ranges in sstables
List<OutgoingStream> streams = new ArrayList<>(refs.size());
for (SSTableReader sstable : refs) {
List<Range<Token>> ranges = sstable.isRepaired() ? normalizedFullRanges : normalizedAllRanges;
List<SSTableReader.PartitionPositionBounds> sections = sstable.getPositionsForRanges(ranges);
Ref<SSTableReader> ref = refs.get(sstable);
if (sections.isEmpty()) {
ref.release();
continue;
}
streams.add(new CassandraOutgoingFile(session.getStreamOperation(), ref, sections, ranges, sstable.estimatedKeysForRanges(ranges)));
}
return streams;
} catch (Throwable t) {
refs.release();
throw t;
}
}
use of org.apache.cassandra.locator.Replica in project cassandra by apache.
the class RangeRelocator method calculateRangesToStreamWithEndpoints.
/**
* calculating endpoints to stream current ranges to if needed
* in some situations node will handle current ranges as part of the new ranges
*/
public static RangesByEndpoint calculateRangesToStreamWithEndpoints(RangesAtEndpoint streamRanges, AbstractReplicationStrategy strat, TokenMetadata tmdBefore, TokenMetadata tmdAfter) {
RangesByEndpoint.Builder endpointRanges = new RangesByEndpoint.Builder();
for (Replica toStream : streamRanges) {
// If the range we are sending is full only send it to the new full replica
// There will also be a new transient replica we need to send the data to, but not
// the repaired data
EndpointsForRange oldEndpoints = strat.calculateNaturalReplicas(toStream.range().right, tmdBefore);
EndpointsForRange newEndpoints = strat.calculateNaturalReplicas(toStream.range().right, tmdAfter);
logger.debug("Need to stream {}, current endpoints {}, new endpoints {}", toStream, oldEndpoints, newEndpoints);
for (Replica newEndpoint : newEndpoints) {
Replica oldEndpoint = oldEndpoints.byEndpoint().get(newEndpoint.endpoint());
// Nothing to do
if (newEndpoint.equals(oldEndpoint))
continue;
// Completely new range for this endpoint
if (oldEndpoint == null) {
if (toStream.isTransient() && newEndpoint.isFull())
throw new AssertionError(String.format("Need to stream %s, but only have %s which is transient and not full", newEndpoint, toStream));
for (Range<Token> intersection : newEndpoint.range().intersectionWith(toStream.range())) {
endpointRanges.put(newEndpoint.endpoint(), newEndpoint.decorateSubrange(intersection));
}
} else {
Set<Range<Token>> subsToStream = Collections.singleton(toStream.range());
// First subtract what we already have
if (oldEndpoint.isFull() == newEndpoint.isFull() || oldEndpoint.isFull())
subsToStream = toStream.range().subtract(oldEndpoint.range());
// Now we only stream what is still replicated
subsToStream.stream().flatMap(range -> range.intersectionWith(newEndpoint.range()).stream()).forEach(tokenRange -> endpointRanges.put(newEndpoint.endpoint(), newEndpoint.decorateSubrange(tokenRange)));
}
}
}
return endpointRanges.build();
}
use of org.apache.cassandra.locator.Replica in project cassandra by apache.
the class StorageProxy method syncWriteToBatchlog.
private static void syncWriteToBatchlog(Collection<Mutation> mutations, ReplicaPlan.ForTokenWrite replicaPlan, UUID uuid, long queryStartNanoTime) throws WriteTimeoutException, WriteFailureException {
WriteResponseHandler<?> handler = new WriteResponseHandler(replicaPlan, WriteType.BATCH_LOG, queryStartNanoTime);
Batch batch = Batch.createLocal(uuid, FBUtilities.timestampMicros(), mutations);
Message<Batch> message = Message.out(BATCH_STORE_REQ, batch);
for (Replica replica : replicaPlan.liveAndDown()) {
logger.trace("Sending batchlog store request {} to {} for {} mutations", batch.id, replica, batch.size());
if (replica.isSelf())
performLocally(Stage.MUTATION, replica, () -> BatchlogManager.store(batch), handler);
else
MessagingService.instance().sendWithCallback(message, replica.endpoint(), handler);
}
handler.get();
}
use of org.apache.cassandra.locator.Replica in project cassandra by apache.
the class StorageProxy method findSuitableReplica.
/**
* Find a suitable replica as leader for counter update.
* For now, we pick a random replica in the local DC (or ask the snitch if
* there is no replica alive in the local DC).
* TODO: if we track the latency of the counter writes (which makes sense
* contrarily to standard writes since there is a read involved), we could
* trust the dynamic snitch entirely, which may be a better solution. It
* is unclear we want to mix those latencies with read latencies, so this
* may be a bit involved.
*/
private static Replica findSuitableReplica(String keyspaceName, DecoratedKey key, String localDataCenter, ConsistencyLevel cl) throws UnavailableException {
Keyspace keyspace = Keyspace.open(keyspaceName);
IEndpointSnitch snitch = DatabaseDescriptor.getEndpointSnitch();
AbstractReplicationStrategy replicationStrategy = keyspace.getReplicationStrategy();
EndpointsForToken replicas = replicationStrategy.getNaturalReplicasForToken(key);
// CASSANDRA-13043: filter out those endpoints not accepting clients yet, maybe because still bootstrapping
replicas = replicas.filter(replica -> StorageService.instance.isRpcReady(replica.endpoint()));
// TODO have a way to compute the consistency level
if (replicas.isEmpty())
throw UnavailableException.create(cl, cl.blockFor(replicationStrategy), 0);
List<Replica> localReplicas = new ArrayList<>(replicas.size());
for (Replica replica : replicas) if (snitch.getDatacenter(replica).equals(localDataCenter))
localReplicas.add(replica);
if (localReplicas.isEmpty()) {
// If the consistency required is local then we should not involve other DCs
if (cl.isDatacenterLocal())
throw UnavailableException.create(cl, cl.blockFor(replicationStrategy), 0);
// No endpoint in local DC, pick the closest endpoint according to the snitch
replicas = snitch.sortedByProximity(FBUtilities.getBroadcastAddressAndPort(), replicas);
return replicas.get(0);
}
return localReplicas.get(ThreadLocalRandom.current().nextInt(localReplicas.size()));
}
Aggregations