use of org.apache.hadoop.hbase.client.HTableInterface in project hive by apache.
the class HBaseReadWrite method putPartitions.
/**
* Add a group of partitions. This should only be used when all partitions are new. It
* blindly increments the ref count on the storage descriptor.
* @param partitions list of partitions to add
* @throws IOException
*/
void putPartitions(List<Partition> partitions) throws IOException {
List<Put> puts = new ArrayList<>(partitions.size());
for (Partition partition : partitions) {
byte[] hash = putStorageDescriptor(partition.getSd());
List<String> partTypes = HBaseUtils.getPartitionKeyTypes(getTable(partition.getDbName(), partition.getTableName()).getPartitionKeys());
byte[][] serialized = HBaseUtils.serializePartition(partition, partTypes, hash);
Put p = new Put(serialized[0]);
p.add(CATALOG_CF, CATALOG_COL, serialized[1]);
puts.add(p);
partCache.put(partition.getDbName(), partition.getTableName(), partition);
}
HTableInterface htab = conn.getHBaseTable(PART_TABLE);
htab.put(puts);
conn.flush(htab);
}
use of org.apache.hadoop.hbase.client.HTableInterface in project Solbase by Photobucket.
the class TermDocMetadataLoader method loadObject.
public CachedObjectWrapper<CompactedTermDocMetadataArray, TermDocMetadataVersionIdentifier> loadObject(Term term, int start, int end, LayeredCache<Term, CompactedTermDocMetadataArray, TermDocMetadataVersionIdentifier, TermDocMetadata> cache) throws IOException {
HTableInterface termVectorTable = SolbaseUtil.getTermVectorTable();
try {
byte[] termBeginKey = SolbaseUtil.generateTermKey(term, start);
byte[] termEndKey;
if (end > SolbaseShardUtil.getMaxDocId()) {
// in case, we are in last shard, our end key is always get start from \xff\xff\xff\xff
// meaning fetch remaining docs.
termEndKey = SolbaseUtil.generateTermEndKey(term);
} else {
termEndKey = SolbaseUtil.generateTermKey(term, end);
}
Scan fieldScan = new Scan(termBeginKey, termEndKey);
fieldScan.addFamily(SolbaseUtil.termVectorDocColumnFamilyName);
fieldScan.setBatch(2000);
fieldScan.setCaching(2000);
ResultScanner fieldScanner = termVectorTable.getScanner(fieldScan);
Result termDoc;
ByteArrayOutputStream bis = new ByteArrayOutputStream();
int docAmount = 0;
while ((termDoc = fieldScanner.next()) != null) {
if (storageType == STORAGE_TYPE.WIDE_ROW) {
convertResultChunkToTermDoc(termDoc, bis);
} else {
convertResultToTermDoc(termDoc, bis);
docAmount++;
}
}
fieldScanner.close();
logger.info("Read from HBase for term: " + term.toString() + " has this many docs: " + docAmount);
// TODO LOAD VERSION
TermDocMetadataVersionIdentifier versionIdentifier = getVersionIdentifier(term, start, end);
return new CachedObjectWrapper<CompactedTermDocMetadataArray, TermDocMetadataVersionIdentifier>(new CompactedTermDocMetadataArray(bis, docAmount), versionIdentifier, System.currentTimeMillis());
} finally {
SolbaseUtil.releaseTable(termVectorTable);
}
}
use of org.apache.hadoop.hbase.client.HTableInterface in project Solbase by Photobucket.
the class TermDocMetadataLoader method getStaticVersionIdentifier.
public static TermDocMetadataVersionIdentifier getStaticVersionIdentifier(Term key, int startDocId, int endDocId) throws IOException {
HTableInterface termVectorVersionIDTable = SolbaseUtil.getTermVectorVersionIDTable();
try {
byte[] fieldTermKey = SolbaseUtil.generateTermKey(key);
Get get = new Get(Bytes.add(fieldTermKey, Bytes.toBytes(startDocId), Bytes.toBytes(endDocId)));
Result result = termVectorVersionIDTable.get(get);
if (result.isEmpty()) {
Put updatePut = new Put(Bytes.add(fieldTermKey, Bytes.toBytes(startDocId), Bytes.toBytes(endDocId)));
long currentTime = System.currentTimeMillis();
updatePut.add(SolbaseUtil.timestampColumnFamilyName, Bytes.toBytes(""), Bytes.toBytes(currentTime));
termVectorVersionIDTable.put(updatePut);
return new TermDocMetadataVersionIdentifier(currentTime, startDocId, endDocId);
} else {
return new TermDocMetadataVersionIdentifier(Bytes.toLong(result.getValue(Bytes.toBytes("timestamp"), Bytes.toBytes(""))), startDocId, endDocId);
}
} finally {
SolbaseUtil.releaseTable(termVectorVersionIDTable);
}
}
use of org.apache.hadoop.hbase.client.HTableInterface in project Solbase by Photobucket.
the class DocumentLoader method loadObject.
public CachedObjectWrapper<Document, Long> loadObject(Integer docNum, int start, int end, LayeredCache<Integer, Document, Long, ParsedDoc> cache) throws IOException {
Document document = new Document();
Get documentGet = new Get(SolbaseUtil.randomize(docNum));
if (fieldNames == null || fieldNames.size() == 0) {
// get all columns ( except this skips meta info )
documentGet.addFamily(Bytes.toBytes("field"));
} else {
for (byte[] fieldName : fieldNames) {
documentGet.addColumn(Bytes.toBytes("field"), fieldName);
}
}
Result documentResult = null;
// if docTable is set up, reuse instance, otherwise create brand new one and close after done
if (this.docTable == null) {
HTableInterface docTable = null;
try {
docTable = SolbaseUtil.getDocTable();
documentResult = docTable.get(documentGet);
} finally {
SolbaseUtil.releaseTable(docTable);
}
} else {
documentResult = this.docTable.get(documentGet);
}
if (documentResult == null || documentResult.isEmpty()) {
return null;
}
// TODO, get from result
Long versionIdentifier = 0l;
NavigableMap<byte[], byte[]> familyMap = documentResult.getFamilyMap(Bytes.toBytes("field"));
for (Map.Entry<byte[], byte[]> fieldColumn : familyMap.entrySet()) {
Field field = null;
String fieldName = Bytes.toString(fieldColumn.getKey());
byte[] value;
ByteBuffer v = ByteBuffer.wrap(fieldColumn.getValue());
int vlimit = v.limit() + v.arrayOffset();
if (v.array()[vlimit - 1] != Byte.MAX_VALUE && v.array()[vlimit - 1] != Byte.MIN_VALUE) {
throw new CorruptIndexException("Solbase field is not properly encoded: " + docNum + "(" + fieldName + ")");
} else if (v.array()[vlimit - 1] == Byte.MAX_VALUE) {
// Binary
value = new byte[vlimit - 1];
System.arraycopy(v.array(), v.position() + v.arrayOffset(), value, 0, vlimit - 1);
field = new Field(fieldName, value, Store.YES);
document.add(field);
} else if (v.array()[vlimit - 1] == Byte.MIN_VALUE) {
// String
value = new byte[vlimit - 1];
System.arraycopy(v.array(), v.position() + v.arrayOffset(), value, 0, vlimit - 1);
// Check for multi-fields
String fieldString = new String(value, "UTF-8");
if (fieldString.indexOf(Bytes.toString(SolbaseUtil.delimiter)) >= 0) {
StringTokenizer tok = new StringTokenizer(fieldString, Bytes.toString(SolbaseUtil.delimiter));
while (tok.hasMoreTokens()) {
// update logic
if (schema != null) {
SchemaField sfield = schema.getFieldOrNull(fieldName);
if (sfield.getType() instanceof EmbeddedIndexedIntField) {
EmbeddedIndexedIntField eiif = (EmbeddedIndexedIntField) sfield.getType();
EmbeddedSortField sf = new EmbeddedSortField(fieldName, tok.nextToken(), Field.Store.YES, Field.Index.NO, eiif.getFieldNumber());
document.add(sf);
} else {
Field f = sfield.createField(tok.nextToken(), 1.0f);
if (f != null) {
// null fields are not added
document.add(f);
}
}
} else {
field = new Field(fieldName, tok.nextToken(), Store.YES, Index.ANALYZED);
document.add(field);
}
}
} else {
// update logic
if (schema != null) {
SchemaField sfield = schema.getFieldOrNull(fieldName);
if (sfield.getType() instanceof EmbeddedIndexedIntField) {
EmbeddedIndexedIntField eiif = (EmbeddedIndexedIntField) sfield.getType();
EmbeddedSortField sf = new EmbeddedSortField(fieldName, fieldString, Field.Store.YES, Field.Index.NO, eiif.getFieldNumber());
document.add(sf);
} else {
Field f = sfield.createField(fieldString, 1.0f);
if (f != null) {
// null fields are not added
document.add(f);
}
}
} else {
field = new Field(fieldName, fieldString, Store.YES, Index.ANALYZED);
document.add(field);
}
}
}
}
return new CachedObjectWrapper<Document, Long>(document, versionIdentifier, System.currentTimeMillis());
}
use of org.apache.hadoop.hbase.client.HTableInterface in project Solbase by Photobucket.
the class IndexWriter method updateDocument.
public void updateDocument(Put documentPut, Document doc) {
String uniqId = doc.get("global_uniq_id");
Put mappingPut = new Put(Bytes.toBytes(uniqId));
mappingPut.add(SolbaseUtil.docIdColumnFamilyName, SolbaseUtil.tombstonedColumnFamilyQualifierBytes, Bytes.toBytes(0));
updateDocKeyIdMap(mappingPut);
// for remote server update via solr update, we want to use
// getDocTable(), but for now map/red can use local htable
HTableInterface docTable = SolbaseUtil.getDocTable();
// insert document to doctable
try {
documentPut.add(SolbaseUtil.timestampColumnFamilyName, SolbaseUtil.tombstonedColumnFamilyQualifierBytes, Bytes.toBytes(0));
docTable.put(documentPut);
} catch (IOException e) {
throw new SolbaseException(SolbaseException.ErrorCode.SERVER_ERROR, e.getMessage());
} finally {
SolbaseUtil.releaseTable(docTable);
}
}
Aggregations