use of org.apache.hadoop.hbase.CellComparatorImpl in project hbase by apache.
the class TestHFile method testGetShortMidpoint.
@Test
public void testGetShortMidpoint() {
Cell left = getCell(Bytes.toBytes("a"), Bytes.toBytes("a"), Bytes.toBytes("a"));
Cell right = getCell(Bytes.toBytes("a"), Bytes.toBytes("a"), Bytes.toBytes("a"));
Cell mid = HFileWriterImpl.getMidpoint(CellComparatorImpl.COMPARATOR, left, right);
assertTrue(PrivateCellUtil.compareKeyIgnoresMvcc(CellComparatorImpl.COMPARATOR, left, mid) <= 0);
assertTrue(PrivateCellUtil.compareKeyIgnoresMvcc(CellComparatorImpl.COMPARATOR, mid, right) <= 0);
left = getCell(Bytes.toBytes("a"), Bytes.toBytes("a"), Bytes.toBytes("a"));
right = getCell(Bytes.toBytes("b"), Bytes.toBytes("a"), Bytes.toBytes("a"));
mid = HFileWriterImpl.getMidpoint(CellComparatorImpl.COMPARATOR, left, right);
assertTrue(PrivateCellUtil.compareKeyIgnoresMvcc(CellComparatorImpl.COMPARATOR, left, mid) < 0);
assertTrue(PrivateCellUtil.compareKeyIgnoresMvcc(CellComparatorImpl.COMPARATOR, mid, right) <= 0);
left = getCell(Bytes.toBytes("g"), Bytes.toBytes("a"), Bytes.toBytes("a"));
right = getCell(Bytes.toBytes("i"), Bytes.toBytes("a"), Bytes.toBytes("a"));
mid = HFileWriterImpl.getMidpoint(CellComparatorImpl.COMPARATOR, left, right);
assertTrue(PrivateCellUtil.compareKeyIgnoresMvcc(CellComparatorImpl.COMPARATOR, left, mid) < 0);
assertTrue(PrivateCellUtil.compareKeyIgnoresMvcc(CellComparatorImpl.COMPARATOR, mid, right) <= 0);
left = getCell(Bytes.toBytes("a"), Bytes.toBytes("a"), Bytes.toBytes("a"));
right = getCell(Bytes.toBytes("bbbbbbb"), Bytes.toBytes("a"), Bytes.toBytes("a"));
mid = HFileWriterImpl.getMidpoint(CellComparatorImpl.COMPARATOR, left, right);
assertTrue(PrivateCellUtil.compareKeyIgnoresMvcc(CellComparatorImpl.COMPARATOR, left, mid) < 0);
assertTrue(PrivateCellUtil.compareKeyIgnoresMvcc(CellComparatorImpl.COMPARATOR, mid, right) < 0);
assertEquals(1, mid.getRowLength());
left = getCell(Bytes.toBytes("a"), Bytes.toBytes("a"), Bytes.toBytes("a"));
right = getCell(Bytes.toBytes("b"), Bytes.toBytes("a"), Bytes.toBytes("a"));
mid = HFileWriterImpl.getMidpoint(CellComparatorImpl.COMPARATOR, left, right);
assertTrue(PrivateCellUtil.compareKeyIgnoresMvcc(CellComparatorImpl.COMPARATOR, left, mid) < 0);
assertTrue(PrivateCellUtil.compareKeyIgnoresMvcc(CellComparatorImpl.COMPARATOR, mid, right) <= 0);
left = getCell(Bytes.toBytes("a"), Bytes.toBytes("a"), Bytes.toBytes("a"));
right = getCell(Bytes.toBytes("a"), Bytes.toBytes("aaaaaaaa"), Bytes.toBytes("b"));
mid = HFileWriterImpl.getMidpoint(CellComparatorImpl.COMPARATOR, left, right);
assertTrue(PrivateCellUtil.compareKeyIgnoresMvcc(CellComparatorImpl.COMPARATOR, left, mid) < 0);
assertTrue(PrivateCellUtil.compareKeyIgnoresMvcc(CellComparatorImpl.COMPARATOR, mid, right) < 0);
assertEquals(2, mid.getFamilyLength());
left = getCell(Bytes.toBytes("a"), Bytes.toBytes("a"), Bytes.toBytes("a"));
right = getCell(Bytes.toBytes("a"), Bytes.toBytes("a"), Bytes.toBytes("aaaaaaaaa"));
mid = HFileWriterImpl.getMidpoint(CellComparatorImpl.COMPARATOR, left, right);
assertTrue(PrivateCellUtil.compareKeyIgnoresMvcc(CellComparatorImpl.COMPARATOR, left, mid) < 0);
assertTrue(PrivateCellUtil.compareKeyIgnoresMvcc(CellComparatorImpl.COMPARATOR, mid, right) < 0);
assertEquals(2, mid.getQualifierLength());
left = getCell(Bytes.toBytes("a"), Bytes.toBytes("a"), Bytes.toBytes("a"));
right = getCell(Bytes.toBytes("a"), Bytes.toBytes("a"), Bytes.toBytes("b"));
mid = HFileWriterImpl.getMidpoint(CellComparatorImpl.COMPARATOR, left, right);
assertTrue(PrivateCellUtil.compareKeyIgnoresMvcc(CellComparatorImpl.COMPARATOR, left, mid) < 0);
assertTrue(PrivateCellUtil.compareKeyIgnoresMvcc(CellComparatorImpl.COMPARATOR, mid, right) <= 0);
assertEquals(1, mid.getQualifierLength());
// Verify boundary conditions
left = getCell(Bytes.toBytes("a"), Bytes.toBytes("a"), new byte[] { 0x00, (byte) 0xFE });
right = getCell(Bytes.toBytes("a"), Bytes.toBytes("a"), new byte[] { 0x00, (byte) 0xFF });
mid = HFileWriterImpl.getMidpoint(CellComparatorImpl.COMPARATOR, left, right);
assertTrue(PrivateCellUtil.compareKeyIgnoresMvcc(CellComparatorImpl.COMPARATOR, left, mid) < 0);
assertTrue(PrivateCellUtil.compareKeyIgnoresMvcc(CellComparatorImpl.COMPARATOR, mid, right) == 0);
left = getCell(Bytes.toBytes("a"), Bytes.toBytes("a"), new byte[] { 0x00, 0x12 });
right = getCell(Bytes.toBytes("a"), Bytes.toBytes("a"), new byte[] { 0x00, 0x12, 0x00 });
mid = HFileWriterImpl.getMidpoint(CellComparatorImpl.COMPARATOR, left, right);
assertTrue(PrivateCellUtil.compareKeyIgnoresMvcc(CellComparatorImpl.COMPARATOR, left, mid) < 0);
assertTrue(PrivateCellUtil.compareKeyIgnoresMvcc(CellComparatorImpl.COMPARATOR, mid, right) == 0);
// Assert that if meta comparator, it returns the right cell -- i.e. no
// optimization done.
left = getCell(Bytes.toBytes("g"), Bytes.toBytes("a"), Bytes.toBytes("a"));
right = getCell(Bytes.toBytes("i"), Bytes.toBytes("a"), Bytes.toBytes("a"));
mid = HFileWriterImpl.getMidpoint(MetaCellComparator.META_COMPARATOR, left, right);
assertTrue(PrivateCellUtil.compareKeyIgnoresMvcc(CellComparatorImpl.COMPARATOR, left, mid) < 0);
assertTrue(PrivateCellUtil.compareKeyIgnoresMvcc(CellComparatorImpl.COMPARATOR, mid, right) == 0);
byte[] family = Bytes.toBytes("family");
byte[] qualA = Bytes.toBytes("qfA");
byte[] qualB = Bytes.toBytes("qfB");
final CellComparatorImpl keyComparator = CellComparatorImpl.COMPARATOR;
// verify that faked shorter rowkey could be generated
long ts = 5;
KeyValue kv1 = new KeyValue(Bytes.toBytes("the quick brown fox"), family, qualA, ts, Type.Put);
KeyValue kv2 = new KeyValue(Bytes.toBytes("the who test text"), family, qualA, ts, Type.Put);
Cell newKey = HFileWriterImpl.getMidpoint(keyComparator, kv1, kv2);
assertTrue(keyComparator.compare(kv1, newKey) < 0);
assertTrue((keyComparator.compare(kv2, newKey)) > 0);
byte[] expectedArray = Bytes.toBytes("the r");
Bytes.equals(newKey.getRowArray(), newKey.getRowOffset(), newKey.getRowLength(), expectedArray, 0, expectedArray.length);
// verify: same with "row + family + qualifier", return rightKey directly
kv1 = new KeyValue(Bytes.toBytes("ilovehbase"), family, qualA, 5, Type.Put);
kv2 = new KeyValue(Bytes.toBytes("ilovehbase"), family, qualA, 0, Type.Put);
assertTrue(keyComparator.compare(kv1, kv2) < 0);
newKey = HFileWriterImpl.getMidpoint(keyComparator, kv1, kv2);
assertTrue(keyComparator.compare(kv1, newKey) < 0);
assertTrue((keyComparator.compare(kv2, newKey)) == 0);
kv1 = new KeyValue(Bytes.toBytes("ilovehbase"), family, qualA, -5, Type.Put);
kv2 = new KeyValue(Bytes.toBytes("ilovehbase"), family, qualA, -10, Type.Put);
assertTrue(keyComparator.compare(kv1, kv2) < 0);
newKey = HFileWriterImpl.getMidpoint(keyComparator, kv1, kv2);
assertTrue(keyComparator.compare(kv1, newKey) < 0);
assertTrue((keyComparator.compare(kv2, newKey)) == 0);
// verify: same with row, different with qualifier
kv1 = new KeyValue(Bytes.toBytes("ilovehbase"), family, qualA, 5, Type.Put);
kv2 = new KeyValue(Bytes.toBytes("ilovehbase"), family, qualB, 5, Type.Put);
assertTrue(keyComparator.compare(kv1, kv2) < 0);
newKey = HFileWriterImpl.getMidpoint(keyComparator, kv1, kv2);
assertTrue(keyComparator.compare(kv1, newKey) < 0);
assertTrue((keyComparator.compare(kv2, newKey)) > 0);
assertTrue(Arrays.equals(CellUtil.cloneFamily(newKey), family));
assertTrue(Arrays.equals(CellUtil.cloneQualifier(newKey), qualB));
assertTrue(newKey.getTimestamp() == HConstants.LATEST_TIMESTAMP);
assertTrue(newKey.getTypeByte() == Type.Maximum.getCode());
// verify metaKeyComparator's getShortMidpointKey output
final CellComparatorImpl metaKeyComparator = MetaCellComparator.META_COMPARATOR;
kv1 = new KeyValue(Bytes.toBytes("ilovehbase123"), family, qualA, 5, Type.Put);
kv2 = new KeyValue(Bytes.toBytes("ilovehbase234"), family, qualA, 0, Type.Put);
newKey = HFileWriterImpl.getMidpoint(metaKeyComparator, kv1, kv2);
assertTrue(metaKeyComparator.compare(kv1, newKey) < 0);
assertTrue((metaKeyComparator.compare(kv2, newKey) == 0));
// verify common fix scenario
kv1 = new KeyValue(Bytes.toBytes("ilovehbase"), family, qualA, ts, Type.Put);
kv2 = new KeyValue(Bytes.toBytes("ilovehbaseandhdfs"), family, qualA, ts, Type.Put);
assertTrue(keyComparator.compare(kv1, kv2) < 0);
newKey = HFileWriterImpl.getMidpoint(keyComparator, kv1, kv2);
assertTrue(keyComparator.compare(kv1, newKey) < 0);
assertTrue((keyComparator.compare(kv2, newKey)) > 0);
expectedArray = Bytes.toBytes("ilovehbasea");
Bytes.equals(newKey.getRowArray(), newKey.getRowOffset(), newKey.getRowLength(), expectedArray, 0, expectedArray.length);
// verify only 1 offset scenario
kv1 = new KeyValue(Bytes.toBytes("100abcdefg"), family, qualA, ts, Type.Put);
kv2 = new KeyValue(Bytes.toBytes("101abcdefg"), family, qualA, ts, Type.Put);
assertTrue(keyComparator.compare(kv1, kv2) < 0);
newKey = HFileWriterImpl.getMidpoint(keyComparator, kv1, kv2);
assertTrue(keyComparator.compare(kv1, newKey) < 0);
assertTrue((keyComparator.compare(kv2, newKey)) > 0);
expectedArray = Bytes.toBytes("101");
Bytes.equals(newKey.getRowArray(), newKey.getRowOffset(), newKey.getRowLength(), expectedArray, 0, expectedArray.length);
}
use of org.apache.hadoop.hbase.CellComparatorImpl in project hive by apache.
the class HiveHFileOutputFormat method getHiveRecordWriter.
@Override
public RecordWriter getHiveRecordWriter(final JobConf jc, final Path finalOutPath, Class<? extends Writable> valueClass, boolean isCompressed, Properties tableProperties, final Progressable progressable) throws IOException {
String hbaseTableName = jc.get(HBaseSerDe.HBASE_TABLE_NAME);
if (hbaseTableName == null) {
hbaseTableName = tableProperties.getProperty(hive_metastoreConstants.META_TABLE_NAME);
hbaseTableName = hbaseTableName.toLowerCase();
if (hbaseTableName.startsWith(HBaseStorageHandler.DEFAULT_PREFIX)) {
hbaseTableName = hbaseTableName.substring(HBaseStorageHandler.DEFAULT_PREFIX.length());
}
}
jc.set(OUTPUT_TABLE_NAME_CONF_KEY, hbaseTableName);
// Read configuration for the target path, first from jobconf, then from table properties
String hfilePath = getFamilyPath(jc, tableProperties);
if (hfilePath == null) {
throw new RuntimeException("Please set " + HFILE_FAMILY_PATH + " to target location for HFiles");
}
// Target path's last component is also the column family name.
final Path columnFamilyPath = new Path(hfilePath);
final String columnFamilyName = columnFamilyPath.getName();
final byte[] columnFamilyNameBytes = Bytes.toBytes(columnFamilyName);
final Job job = new Job(jc);
setCompressOutput(job, isCompressed);
setOutputPath(job, finalOutPath);
// Create the HFile writer
final org.apache.hadoop.mapreduce.TaskAttemptContext tac = ShimLoader.getHadoopShims().newTaskAttemptContext(job.getConfiguration(), progressable);
final Path outputdir = FileOutputFormat.getOutputPath(tac);
final Path taskAttemptOutputdir = new FileOutputCommitter(outputdir, tac).getWorkPath();
final org.apache.hadoop.mapreduce.RecordWriter<ImmutableBytesWritable, Cell> fileWriter = getFileWriter(tac);
// Individual columns are going to be pivoted to HBase cells,
// and for each row, they need to be written out in order
// of column name, so sort the column names now, creating a
// mapping to their column position. However, the first
// column is interpreted as the row key.
String columnList = tableProperties.getProperty("columns");
String[] columnArray = columnList.split(",");
final SortedMap<byte[], Integer> columnMap = new TreeMap<byte[], Integer>(Bytes.BYTES_COMPARATOR);
int i = 0;
for (String columnName : columnArray) {
if (i != 0) {
columnMap.put(Bytes.toBytes(columnName), i);
}
++i;
}
return new RecordWriter() {
@Override
public void close(boolean abort) throws IOException {
try {
fileWriter.close(null);
if (abort) {
return;
}
// Move the hfiles file(s) from the task output directory to the
// location specified by the user.
FileSystem fs = outputdir.getFileSystem(jc);
fs.mkdirs(columnFamilyPath);
Path srcDir = taskAttemptOutputdir;
FileStatus[] files = null;
for (; ; ) {
try {
files = fs.listStatus(srcDir, FileUtils.STAGING_DIR_PATH_FILTER);
} catch (FileNotFoundException fnf) {
LOG.debug("File doesn't exist {} ", srcDir, fnf);
break;
}
if ((files == null) || (files.length == 0)) {
throw new IOException("No family directories found in " + srcDir);
}
if (files.length != 1) {
throw new IOException("Multiple family directories found in " + srcDir);
}
srcDir = files[0].getPath();
if (srcDir.getName().equals(columnFamilyName)) {
break;
}
if (files[0].isFile()) {
throw new IOException("No family directories found in " + taskAttemptOutputdir + ". " + "The last component in hfile path should match column family name " + columnFamilyName);
}
}
if (files != null) {
for (FileStatus regionFile : fs.listStatus(srcDir, FileUtils.STAGING_DIR_PATH_FILTER)) {
fs.rename(regionFile.getPath(), new Path(columnFamilyPath, regionFile.getPath().getName()));
}
}
} catch (InterruptedException ex) {
throw new IOException(ex);
} catch (FileNotFoundException fnf) {
// Ignore....
LOG.debug("File doesn't exist.", fnf);
}
}
private void writeText(Text text) throws IOException {
// Decompose the incoming text row into fields.
String s = text.toString();
String[] fields = s.split("\u0001");
assert (fields.length <= (columnMap.size() + 1));
// First field is the row key.
byte[] rowKeyBytes = Bytes.toBytes(fields[0]);
// Remaining fields are cells addressed by column name within row.
for (Map.Entry<byte[], Integer> entry : columnMap.entrySet()) {
byte[] columnNameBytes = entry.getKey();
int iColumn = entry.getValue();
String val;
if (iColumn >= fields.length) {
// trailing blank field
val = "";
} else {
val = fields[iColumn];
if ("\\N".equals(val)) {
// omit nulls
continue;
}
}
byte[] valBytes = Bytes.toBytes(val);
KeyValue kv = new KeyValue(rowKeyBytes, columnFamilyNameBytes, columnNameBytes, valBytes);
try {
fileWriter.write(null, kv);
} catch (IOException e) {
LOG.error("Failed while writing row: " + s);
throw e;
} catch (InterruptedException ex) {
throw new IOException(ex);
}
}
}
private void writePut(PutWritable put) throws IOException {
ImmutableBytesWritable row = new ImmutableBytesWritable(put.getPut().getRow());
SortedMap<byte[], List<Cell>> cells = put.getPut().getFamilyCellMap();
for (Map.Entry<byte[], List<Cell>> entry : cells.entrySet()) {
Collections.sort(entry.getValue(), new CellComparatorImpl());
for (Cell c : entry.getValue()) {
try {
fileWriter.write(row, KeyValueUtil.copyToNewKeyValue(c));
} catch (InterruptedException e) {
throw (InterruptedIOException) new InterruptedIOException().initCause(e);
}
}
}
}
@Override
public void write(Writable w) throws IOException {
if (w instanceof Text) {
writeText((Text) w);
} else if (w instanceof PutWritable) {
writePut((PutWritable) w);
} else {
throw new IOException("Unexpected writable " + w);
}
}
};
}
use of org.apache.hadoop.hbase.CellComparatorImpl in project hbase by apache.
the class TestStripeStoreEngine method createEngine.
private static TestStoreEngine createEngine(Configuration conf) throws Exception {
HRegion region = mock(HRegion.class);
HStore store = mock(HStore.class);
store.conf = conf;
when(store.getRegionInfo()).thenReturn(RegionInfoBuilder.FIRST_META_REGIONINFO);
when(store.getHRegion()).thenReturn(region);
CellComparatorImpl kvComparator = mock(CellComparatorImpl.class);
return (TestStoreEngine) StoreEngine.create(store, conf, kvComparator);
}
Aggregations