use of org.apache.cassandra.io.util.FastByteArrayInputStream in project eiger by wlloyd.
the class RangeSliceReply method read.
public static RangeSliceReply read(byte[] body, int version) throws IOException {
FastByteArrayInputStream bufIn = new FastByteArrayInputStream(body);
DataInputStream dis = new DataInputStream(bufIn);
int rowCount = dis.readInt();
List<Row> rows = new ArrayList<Row>(rowCount);
for (int i = 0; i < rowCount; i++) {
rows.add(Row.serializer().deserialize(dis, version));
}
return new RangeSliceReply(rows);
}
use of org.apache.cassandra.io.util.FastByteArrayInputStream in project eiger by wlloyd.
the class ReadRepairVerbHandler method doVerb.
@Override
public void doVerb(Message message, String id) {
byte[] body = message.getMessageBody();
FastByteArrayInputStream buffer = new FastByteArrayInputStream(body);
try {
RowMutation rm = RowMutation.serializer().deserialize(new DataInputStream(buffer), message.getVersion());
//WL TODO: Might need to check dependencies here, if we implement on top of quorums
rm.apply();
WriteResponse response = new WriteResponse(rm.getTable(), rm.key(), true);
Message responseMessage = WriteResponse.makeWriteResponseMessage(message, response);
MessagingService.instance().sendReply(responseMessage, id, message.getFrom());
} catch (IOException e) {
throw new IOError(e);
}
}
use of org.apache.cassandra.io.util.FastByteArrayInputStream in project eiger by wlloyd.
the class GossipDigestAckVerbHandler method doVerb.
public void doVerb(Message message, String id) {
InetAddress from = message.getFrom();
if (logger_.isTraceEnabled())
logger_.trace("Received a GossipDigestAckMessage from {}", from);
if (!Gossiper.instance.isEnabled()) {
if (logger_.isTraceEnabled())
logger_.trace("Ignoring GossipDigestAckMessage because gossip is disabled");
return;
}
byte[] bytes = message.getMessageBody();
DataInputStream dis = new DataInputStream(new FastByteArrayInputStream(bytes));
try {
GossipDigestAckMessage gDigestAckMessage = GossipDigestAckMessage.serializer().deserialize(dis, message.getVersion());
List<GossipDigest> gDigestList = gDigestAckMessage.getGossipDigestList();
Map<InetAddress, EndpointState> epStateMap = gDigestAckMessage.getEndpointStateMap();
if (epStateMap.size() > 0) {
/* Notify the Failure Detector */
Gossiper.instance.notifyFailureDetector(epStateMap);
Gossiper.instance.applyStateLocally(epStateMap);
}
/* Get the state required to send to this gossipee - construct GossipDigestAck2Message */
Map<InetAddress, EndpointState> deltaEpStateMap = new HashMap<InetAddress, EndpointState>();
for (GossipDigest gDigest : gDigestList) {
InetAddress addr = gDigest.getEndpoint();
EndpointState localEpStatePtr = Gossiper.instance.getStateForVersionBiggerThan(addr, gDigest.getMaxVersion());
if (localEpStatePtr != null)
deltaEpStateMap.put(addr, localEpStatePtr);
}
GossipDigestAck2Message gDigestAck2 = new GossipDigestAck2Message(deltaEpStateMap);
Message gDigestAck2Message = Gossiper.instance.makeGossipDigestAck2Message(gDigestAck2, message.getVersion());
if (logger_.isTraceEnabled())
logger_.trace("Sending a GossipDigestAck2Message to {}", from);
MessagingService.instance().sendOneWay(gDigestAck2Message, from);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
use of org.apache.cassandra.io.util.FastByteArrayInputStream in project eiger by wlloyd.
the class CommitLog method recover.
/**
* Perform recovery on a list of commit log files.
*
* @param clogs the list of commit log files to replay
* @return the number of mutations replayed
*/
public int recover(File[] clogs) throws IOException {
final Set<Table> tablesRecovered = new HashSet<Table>();
List<Future<?>> futures = new ArrayList<Future<?>>();
byte[] bytes = new byte[4096];
Map<Integer, AtomicInteger> invalidMutations = new HashMap<Integer, AtomicInteger>();
// count the number of replayed mutation. We don't really care about atomicity, but we need it to be a reference.
final AtomicInteger replayedCount = new AtomicInteger();
// compute per-CF and global replay positions
final Map<Integer, ReplayPosition> cfPositions = new HashMap<Integer, ReplayPosition>();
for (ColumnFamilyStore cfs : ColumnFamilyStore.all()) {
// it's important to call RP.gRP per-cf, before aggregating all the positions w/ the Ordering.min call
// below: gRP will return NONE if there are no flushed sstables, which is important to have in the
// list (otherwise we'll just start replay from the first flush position that we do have, which is not correct).
ReplayPosition rp = ReplayPosition.getReplayPosition(cfs.getSSTables());
cfPositions.put(cfs.metadata.cfId, rp);
}
final ReplayPosition globalPosition = Ordering.from(ReplayPosition.comparator).min(cfPositions.values());
Checksum checksum = new CRC32();
for (final File file : clogs) {
logger.info("Replaying " + file.getPath());
final long segment = CommitLogSegment.idFromFilename(file.getName());
RandomAccessReader reader = RandomAccessReader.open(new File(file.getAbsolutePath()), true);
assert reader.length() <= Integer.MAX_VALUE;
try {
int replayPosition;
if (globalPosition.segment < segment)
replayPosition = 0;
else if (globalPosition.segment == segment)
replayPosition = globalPosition.position;
else
replayPosition = (int) reader.length();
if (replayPosition < 0 || replayPosition >= reader.length()) {
// replayPosition > reader.length() can happen if some data gets flushed before it is written to the commitlog
// (see https://issues.apache.org/jira/browse/CASSANDRA-2285)
logger.debug("skipping replay of fully-flushed {}", file);
continue;
}
reader.seek(replayPosition);
if (logger.isDebugEnabled())
logger.debug("Replaying " + file + " starting at " + reader.getFilePointer());
/* read the logs populate RowMutation and apply */
while (!reader.isEOF()) {
if (logger.isDebugEnabled())
logger.debug("Reading mutation at " + reader.getFilePointer());
long claimedCRC32;
int serializedSize;
try {
// any of the reads may hit EOF
serializedSize = reader.readInt();
if (serializedSize == CommitLog.END_OF_SEGMENT_MARKER) {
logger.debug("Encountered end of segment marker at " + reader.getFilePointer());
break;
}
// This prevents CRC by being fooled by special-case garbage in the file; see CASSANDRA-2128
if (serializedSize < 10)
break;
long claimedSizeChecksum = reader.readLong();
checksum.reset();
checksum.update(serializedSize);
if (checksum.getValue() != claimedSizeChecksum)
// entry wasn't synced correctly/fully. that's ok.
break;
if (serializedSize > bytes.length)
bytes = new byte[(int) (1.2 * serializedSize)];
reader.readFully(bytes, 0, serializedSize);
claimedCRC32 = reader.readLong();
} catch (EOFException eof) {
// last CL entry didn't get completely written. that's ok.
break;
}
checksum.update(bytes, 0, serializedSize);
if (claimedCRC32 != checksum.getValue()) {
// but just in case there is no harm in trying them (since we still read on an entry boundary)
continue;
}
/* deserialize the commit log entry */
FastByteArrayInputStream bufIn = new FastByteArrayInputStream(bytes, 0, serializedSize);
RowMutation rm = null;
try {
// assuming version here. We've gone to lengths to make sure what gets written to the CL is in
// the current version. so do make sure the CL is drained prior to upgrading a node.
rm = RowMutation.serializer().deserialize(new DataInputStream(bufIn), MessagingService.version_, IColumnSerializer.Flag.LOCAL);
} catch (UnserializableColumnFamilyException ex) {
AtomicInteger i = invalidMutations.get(ex.cfId);
if (i == null) {
i = new AtomicInteger(1);
invalidMutations.put(ex.cfId, i);
} else
i.incrementAndGet();
continue;
}
if (logger.isDebugEnabled())
logger.debug(String.format("replaying mutation for %s.%s: %s", rm.getTable(), ByteBufferUtil.bytesToHex(rm.key()), "{" + StringUtils.join(rm.getColumnFamilies().iterator(), ", ") + "}"));
final long entryLocation = reader.getFilePointer();
final RowMutation frm = rm;
Runnable runnable = new WrappedRunnable() {
public void runMayThrow() throws IOException {
if (Schema.instance.getKSMetaData(frm.getTable()) == null)
return;
final Table table = Table.open(frm.getTable());
RowMutation newRm = new RowMutation(frm.getTable(), frm.key());
// thing based on the cfid instead.
for (ColumnFamily columnFamily : frm.getColumnFamilies()) {
if (Schema.instance.getCF(columnFamily.id()) == null)
// null means the cf has been dropped
continue;
ReplayPosition rp = cfPositions.get(columnFamily.id());
// segment, if we are after the replay position
if (segment > rp.segment || (segment == rp.segment && entryLocation > rp.position)) {
newRm.add(columnFamily);
replayedCount.incrementAndGet();
}
}
if (!newRm.isEmpty()) {
Table.open(newRm.getTable()).apply(newRm, false);
tablesRecovered.add(table);
}
}
};
futures.add(StageManager.getStage(Stage.MUTATION).submit(runnable));
if (futures.size() > MAX_OUTSTANDING_REPLAY_COUNT) {
FBUtilities.waitOnFutures(futures);
futures.clear();
}
}
} finally {
FileUtils.closeQuietly(reader);
logger.info("Finished reading " + file);
}
}
for (Map.Entry<Integer, AtomicInteger> entry : invalidMutations.entrySet()) logger.info(String.format("Skipped %d mutations from unknown (probably removed) CF with id %d", entry.getValue().intValue(), entry.getKey()));
// wait for all the writes to finish on the mutation stage
FBUtilities.waitOnFutures(futures);
logger.debug("Finished waiting on mutations from recovery");
// flush replayed tables
futures.clear();
for (Table table : tablesRecovered) futures.addAll(table.flush());
FBUtilities.waitOnFutures(futures);
return replayedCount.get();
}
Aggregations