use of org.solbase.cache.CachedObjectWrapper in project Solbase by Photobucket.
the class ShardDocumentLoader method loadObject.
public CachedObjectWrapper<Document, Long> loadObject(String docIdKey, int start, int end, LayeredCache<String, Document, Long, Document> cache) throws IOException {
Document document = new Document();
// TODO should probably get SchemaField from Schema object.
String keyField = "docId";
Field field = new Field(keyField, this.parseDocIdFromKey(docIdKey), Store.YES, Index.ANALYZED);
document.add(field);
// TODO, get from result
Long versionIdentifier = 0l;
return new CachedObjectWrapper<Document, Long>(document, versionIdentifier, System.currentTimeMillis());
}
use of org.solbase.cache.CachedObjectWrapper in project Solbase by Photobucket.
the class TermDocMetadataLoader method loadObject.
public CachedObjectWrapper<CompactedTermDocMetadataArray, TermDocMetadataVersionIdentifier> loadObject(Term term, int start, int end, LayeredCache<Term, CompactedTermDocMetadataArray, TermDocMetadataVersionIdentifier, TermDocMetadata> cache) throws IOException {
HTableInterface termVectorTable = SolbaseUtil.getTermVectorTable();
try {
byte[] termBeginKey = SolbaseUtil.generateTermKey(term, start);
byte[] termEndKey;
if (end > SolbaseShardUtil.getMaxDocId()) {
// in case, we are in last shard, our end key is always get start from \xff\xff\xff\xff
// meaning fetch remaining docs.
termEndKey = SolbaseUtil.generateTermEndKey(term);
} else {
termEndKey = SolbaseUtil.generateTermKey(term, end);
}
Scan fieldScan = new Scan(termBeginKey, termEndKey);
fieldScan.addFamily(SolbaseUtil.termVectorDocColumnFamilyName);
fieldScan.setBatch(2000);
fieldScan.setCaching(2000);
ResultScanner fieldScanner = termVectorTable.getScanner(fieldScan);
Result termDoc;
ByteArrayOutputStream bis = new ByteArrayOutputStream();
int docAmount = 0;
while ((termDoc = fieldScanner.next()) != null) {
if (storageType == STORAGE_TYPE.WIDE_ROW) {
convertResultChunkToTermDoc(termDoc, bis);
} else {
convertResultToTermDoc(termDoc, bis);
docAmount++;
}
}
fieldScanner.close();
logger.info("Read from HBase for term: " + term.toString() + " has this many docs: " + docAmount);
// TODO LOAD VERSION
TermDocMetadataVersionIdentifier versionIdentifier = getVersionIdentifier(term, start, end);
return new CachedObjectWrapper<CompactedTermDocMetadataArray, TermDocMetadataVersionIdentifier>(new CompactedTermDocMetadataArray(bis, docAmount), versionIdentifier, System.currentTimeMillis());
} finally {
SolbaseUtil.releaseTable(termVectorTable);
}
}
use of org.solbase.cache.CachedObjectWrapper in project Solbase by Photobucket.
the class DocumentLoader method loadObject.
public CachedObjectWrapper<Document, Long> loadObject(Integer docNum, int start, int end, LayeredCache<Integer, Document, Long, ParsedDoc> cache) throws IOException {
Document document = new Document();
Get documentGet = new Get(SolbaseUtil.randomize(docNum));
if (fieldNames == null || fieldNames.size() == 0) {
// get all columns ( except this skips meta info )
documentGet.addFamily(Bytes.toBytes("field"));
} else {
for (byte[] fieldName : fieldNames) {
documentGet.addColumn(Bytes.toBytes("field"), fieldName);
}
}
Result documentResult = null;
// if docTable is set up, reuse instance, otherwise create brand new one and close after done
if (this.docTable == null) {
HTableInterface docTable = null;
try {
docTable = SolbaseUtil.getDocTable();
documentResult = docTable.get(documentGet);
} finally {
SolbaseUtil.releaseTable(docTable);
}
} else {
documentResult = this.docTable.get(documentGet);
}
if (documentResult == null || documentResult.isEmpty()) {
return null;
}
// TODO, get from result
Long versionIdentifier = 0l;
NavigableMap<byte[], byte[]> familyMap = documentResult.getFamilyMap(Bytes.toBytes("field"));
for (Map.Entry<byte[], byte[]> fieldColumn : familyMap.entrySet()) {
Field field = null;
String fieldName = Bytes.toString(fieldColumn.getKey());
byte[] value;
ByteBuffer v = ByteBuffer.wrap(fieldColumn.getValue());
int vlimit = v.limit() + v.arrayOffset();
if (v.array()[vlimit - 1] != Byte.MAX_VALUE && v.array()[vlimit - 1] != Byte.MIN_VALUE) {
throw new CorruptIndexException("Solbase field is not properly encoded: " + docNum + "(" + fieldName + ")");
} else if (v.array()[vlimit - 1] == Byte.MAX_VALUE) {
// Binary
value = new byte[vlimit - 1];
System.arraycopy(v.array(), v.position() + v.arrayOffset(), value, 0, vlimit - 1);
field = new Field(fieldName, value, Store.YES);
document.add(field);
} else if (v.array()[vlimit - 1] == Byte.MIN_VALUE) {
// String
value = new byte[vlimit - 1];
System.arraycopy(v.array(), v.position() + v.arrayOffset(), value, 0, vlimit - 1);
// Check for multi-fields
String fieldString = new String(value, "UTF-8");
if (fieldString.indexOf(Bytes.toString(SolbaseUtil.delimiter)) >= 0) {
StringTokenizer tok = new StringTokenizer(fieldString, Bytes.toString(SolbaseUtil.delimiter));
while (tok.hasMoreTokens()) {
// update logic
if (schema != null) {
SchemaField sfield = schema.getFieldOrNull(fieldName);
if (sfield.getType() instanceof EmbeddedIndexedIntField) {
EmbeddedIndexedIntField eiif = (EmbeddedIndexedIntField) sfield.getType();
EmbeddedSortField sf = new EmbeddedSortField(fieldName, tok.nextToken(), Field.Store.YES, Field.Index.NO, eiif.getFieldNumber());
document.add(sf);
} else {
Field f = sfield.createField(tok.nextToken(), 1.0f);
if (f != null) {
// null fields are not added
document.add(f);
}
}
} else {
field = new Field(fieldName, tok.nextToken(), Store.YES, Index.ANALYZED);
document.add(field);
}
}
} else {
// update logic
if (schema != null) {
SchemaField sfield = schema.getFieldOrNull(fieldName);
if (sfield.getType() instanceof EmbeddedIndexedIntField) {
EmbeddedIndexedIntField eiif = (EmbeddedIndexedIntField) sfield.getType();
EmbeddedSortField sf = new EmbeddedSortField(fieldName, fieldString, Field.Store.YES, Field.Index.NO, eiif.getFieldNumber());
document.add(sf);
} else {
Field f = sfield.createField(fieldString, 1.0f);
if (f != null) {
// null fields are not added
document.add(f);
}
}
} else {
field = new Field(fieldName, fieldString, Store.YES, Index.ANALYZED);
document.add(field);
}
}
}
}
return new CachedObjectWrapper<Document, Long>(document, versionIdentifier, System.currentTimeMillis());
}
Aggregations