use of org.apache.cassandra.schema.TableMetadataRef in project cassandra by apache.
the class SSTableLoader method openSSTables.
@SuppressWarnings("resource")
protected Collection<SSTableReader> openSSTables(final Map<InetAddress, Collection<Range<Token>>> ranges) {
outputHandler.output("Opening sstables and calculating sections to stream");
LifecycleTransaction.getFiles(directory.toPath(), (file, type) -> {
File dir = file.getParentFile();
String name = file.getName();
if (type != Directories.FileType.FINAL) {
outputHandler.output(String.format("Skipping temporary file %s", name));
return false;
}
Pair<Descriptor, Component> p = SSTable.tryComponentFromFilename(file);
Descriptor desc = p == null ? null : p.left;
if (p == null || !p.right.equals(Component.DATA))
return false;
if (!new File(desc.filenameFor(Component.PRIMARY_INDEX)).exists()) {
outputHandler.output(String.format("Skipping file %s because index is missing", name));
return false;
}
TableMetadataRef metadata = client.getTableMetadata(desc.cfname);
if (metadata == null) {
outputHandler.output(String.format("Skipping file %s: table %s.%s doesn't exist", name, keyspace, desc.cfname));
return false;
}
Set<Component> components = new HashSet<>();
components.add(Component.DATA);
components.add(Component.PRIMARY_INDEX);
if (new File(desc.filenameFor(Component.SUMMARY)).exists())
components.add(Component.SUMMARY);
if (new File(desc.filenameFor(Component.COMPRESSION_INFO)).exists())
components.add(Component.COMPRESSION_INFO);
if (new File(desc.filenameFor(Component.STATS)).exists())
components.add(Component.STATS);
try {
// To conserve memory, open SSTableReaders without bloom filters and discard
// the index summary after calculating the file sections to stream and the estimated
// number of keys for each endpoint. See CASSANDRA-5555 for details.
SSTableReader sstable = SSTableReader.openForBatch(desc, components, metadata);
sstables.add(sstable);
// keys per host
for (Map.Entry<InetAddress, Collection<Range<Token>>> entry : ranges.entrySet()) {
InetAddress endpoint = entry.getKey();
Collection<Range<Token>> tokenRanges = entry.getValue();
List<Pair<Long, Long>> sstableSections = sstable.getPositionsForRanges(tokenRanges);
long estimatedKeys = sstable.estimatedKeysForRanges(tokenRanges);
Ref<SSTableReader> ref = sstable.ref();
StreamSession.SSTableStreamingSections details = new StreamSession.SSTableStreamingSections(ref, sstableSections, estimatedKeys, ActiveRepairService.UNREPAIRED_SSTABLE);
streamingDetails.put(endpoint, details);
}
// to conserve heap space when bulk loading
sstable.releaseSummary();
} catch (IOException e) {
outputHandler.output(String.format("Skipping file %s, error opening it: %s", name, e.getMessage()));
}
return false;
}, Directories.OnTxnErr.IGNORE);
return sstables;
}
use of org.apache.cassandra.schema.TableMetadataRef in project cassandra by apache.
the class LongStreamingTest method testCompressedStream.
@Test
public void testCompressedStream() throws InvalidRequestException, IOException, ExecutionException, InterruptedException {
String KS = "cql_keyspace";
String TABLE = "table1";
File tempdir = Files.createTempDir();
File dataDir = new File(tempdir.getAbsolutePath() + File.separator + KS + File.separator + TABLE);
assert dataDir.mkdirs();
String schema = "CREATE TABLE cql_keyspace.table1 (" + " k int PRIMARY KEY," + " v1 text," + " v2 int" + // with compression = {};";
");";
String insert = "INSERT INTO cql_keyspace.table1 (k, v1, v2) VALUES (?, ?, ?)";
CQLSSTableWriter writer = CQLSSTableWriter.builder().sorted().inDirectory(dataDir).forTable(schema).using(insert).build();
long start = System.nanoTime();
for (int i = 0; i < 10_000_000; i++) writer.addRow(i, "test1", 24);
writer.close();
System.err.println(String.format("Writer finished after %d seconds....", TimeUnit.NANOSECONDS.toSeconds(System.nanoTime() - start)));
File[] dataFiles = dataDir.listFiles((dir, name) -> name.endsWith("-Data.db"));
long dataSize = 0l;
for (File file : dataFiles) {
System.err.println("File : " + file.getAbsolutePath());
dataSize += file.length();
}
SSTableLoader loader = new SSTableLoader(dataDir, new SSTableLoader.Client() {
private String ks;
public void init(String keyspace) {
for (Range<Token> range : StorageService.instance.getLocalRanges("cql_keyspace")) addRangeForEndpoint(range, FBUtilities.getBroadcastAddress());
this.ks = keyspace;
}
public TableMetadataRef getTableMetadata(String cfName) {
return Schema.instance.getTableMetadataRef(ks, cfName);
}
}, new OutputHandler.SystemOutput(false, false));
start = System.nanoTime();
loader.stream().get();
long millis = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - start);
System.err.println(String.format("Finished Streaming in %.2f seconds: %.2f Mb/sec", millis / 1000d, (dataSize / (1 << 20) / (millis / 1000d)) * 8));
//Stream again
loader = new SSTableLoader(dataDir, new SSTableLoader.Client() {
private String ks;
public void init(String keyspace) {
for (Range<Token> range : StorageService.instance.getLocalRanges("cql_keyspace")) addRangeForEndpoint(range, FBUtilities.getBroadcastAddress());
this.ks = keyspace;
}
public TableMetadataRef getTableMetadata(String cfName) {
return Schema.instance.getTableMetadataRef(ks, cfName);
}
}, new OutputHandler.SystemOutput(false, false));
start = System.nanoTime();
loader.stream().get();
millis = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - start);
System.err.println(String.format("Finished Streaming in %.2f seconds: %.2f Mb/sec", millis / 1000d, (dataSize / (1 << 20) / (millis / 1000d)) * 8));
//Compact them both
start = System.nanoTime();
Keyspace.open(KS).getColumnFamilyStore(TABLE).forceMajorCompaction();
millis = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - start);
System.err.println(String.format("Finished Compacting in %.2f seconds: %.2f Mb/sec", millis / 1000d, (dataSize * 2 / (1 << 20) / (millis / 1000d)) * 8));
UntypedResultSet rs = QueryProcessor.executeInternal("SELECT * FROM cql_keyspace.table1 limit 100;");
assertEquals(100, rs.size());
}
Aggregations