Search in sources :

Example 16 with HTableInterface

use of org.apache.hadoop.hbase.client.HTableInterface in project hive by apache.

the class HBaseReadWrite method putPartitions.

/**
   * Add a group of partitions.  This should only be used when all partitions are new.  It
   * blindly increments the ref count on the storage descriptor.
   * @param partitions list of partitions to add
   * @throws IOException
   */
void putPartitions(List<Partition> partitions) throws IOException {
    List<Put> puts = new ArrayList<>(partitions.size());
    for (Partition partition : partitions) {
        byte[] hash = putStorageDescriptor(partition.getSd());
        List<String> partTypes = HBaseUtils.getPartitionKeyTypes(getTable(partition.getDbName(), partition.getTableName()).getPartitionKeys());
        byte[][] serialized = HBaseUtils.serializePartition(partition, partTypes, hash);
        Put p = new Put(serialized[0]);
        p.add(CATALOG_CF, CATALOG_COL, serialized[1]);
        puts.add(p);
        partCache.put(partition.getDbName(), partition.getTableName(), partition);
    }
    HTableInterface htab = conn.getHBaseTable(PART_TABLE);
    htab.put(puts);
    conn.flush(htab);
}
Also used : Partition(org.apache.hadoop.hive.metastore.api.Partition) ArrayList(java.util.ArrayList) HTableInterface(org.apache.hadoop.hbase.client.HTableInterface) Put(org.apache.hadoop.hbase.client.Put)

Example 17 with HTableInterface

use of org.apache.hadoop.hbase.client.HTableInterface in project Solbase by Photobucket.

the class TermDocMetadataLoader method loadObject.

public CachedObjectWrapper<CompactedTermDocMetadataArray, TermDocMetadataVersionIdentifier> loadObject(Term term, int start, int end, LayeredCache<Term, CompactedTermDocMetadataArray, TermDocMetadataVersionIdentifier, TermDocMetadata> cache) throws IOException {
    HTableInterface termVectorTable = SolbaseUtil.getTermVectorTable();
    try {
        byte[] termBeginKey = SolbaseUtil.generateTermKey(term, start);
        byte[] termEndKey;
        if (end > SolbaseShardUtil.getMaxDocId()) {
            // in case, we are in last shard, our end key is always get start from \xff\xff\xff\xff
            // meaning fetch remaining docs.
            termEndKey = SolbaseUtil.generateTermEndKey(term);
        } else {
            termEndKey = SolbaseUtil.generateTermKey(term, end);
        }
        Scan fieldScan = new Scan(termBeginKey, termEndKey);
        fieldScan.addFamily(SolbaseUtil.termVectorDocColumnFamilyName);
        fieldScan.setBatch(2000);
        fieldScan.setCaching(2000);
        ResultScanner fieldScanner = termVectorTable.getScanner(fieldScan);
        Result termDoc;
        ByteArrayOutputStream bis = new ByteArrayOutputStream();
        int docAmount = 0;
        while ((termDoc = fieldScanner.next()) != null) {
            if (storageType == STORAGE_TYPE.WIDE_ROW) {
                convertResultChunkToTermDoc(termDoc, bis);
            } else {
                convertResultToTermDoc(termDoc, bis);
                docAmount++;
            }
        }
        fieldScanner.close();
        logger.info("Read from HBase for term: " + term.toString() + " has this many docs: " + docAmount);
        // TODO LOAD VERSION
        TermDocMetadataVersionIdentifier versionIdentifier = getVersionIdentifier(term, start, end);
        return new CachedObjectWrapper<CompactedTermDocMetadataArray, TermDocMetadataVersionIdentifier>(new CompactedTermDocMetadataArray(bis, docAmount), versionIdentifier, System.currentTimeMillis());
    } finally {
        SolbaseUtil.releaseTable(termVectorTable);
    }
}
Also used : CachedObjectWrapper(org.solbase.cache.CachedObjectWrapper) ResultScanner(org.apache.hadoop.hbase.client.ResultScanner) Scan(org.apache.hadoop.hbase.client.Scan) ByteArrayOutputStream(java.io.ByteArrayOutputStream) HTableInterface(org.apache.hadoop.hbase.client.HTableInterface) Result(org.apache.hadoop.hbase.client.Result)

Example 18 with HTableInterface

use of org.apache.hadoop.hbase.client.HTableInterface in project Solbase by Photobucket.

the class TermDocMetadataLoader method getStaticVersionIdentifier.

public static TermDocMetadataVersionIdentifier getStaticVersionIdentifier(Term key, int startDocId, int endDocId) throws IOException {
    HTableInterface termVectorVersionIDTable = SolbaseUtil.getTermVectorVersionIDTable();
    try {
        byte[] fieldTermKey = SolbaseUtil.generateTermKey(key);
        Get get = new Get(Bytes.add(fieldTermKey, Bytes.toBytes(startDocId), Bytes.toBytes(endDocId)));
        Result result = termVectorVersionIDTable.get(get);
        if (result.isEmpty()) {
            Put updatePut = new Put(Bytes.add(fieldTermKey, Bytes.toBytes(startDocId), Bytes.toBytes(endDocId)));
            long currentTime = System.currentTimeMillis();
            updatePut.add(SolbaseUtil.timestampColumnFamilyName, Bytes.toBytes(""), Bytes.toBytes(currentTime));
            termVectorVersionIDTable.put(updatePut);
            return new TermDocMetadataVersionIdentifier(currentTime, startDocId, endDocId);
        } else {
            return new TermDocMetadataVersionIdentifier(Bytes.toLong(result.getValue(Bytes.toBytes("timestamp"), Bytes.toBytes(""))), startDocId, endDocId);
        }
    } finally {
        SolbaseUtil.releaseTable(termVectorVersionIDTable);
    }
}
Also used : Get(org.apache.hadoop.hbase.client.Get) HTableInterface(org.apache.hadoop.hbase.client.HTableInterface) Put(org.apache.hadoop.hbase.client.Put) Result(org.apache.hadoop.hbase.client.Result)

Example 19 with HTableInterface

use of org.apache.hadoop.hbase.client.HTableInterface in project Solbase by Photobucket.

the class DocumentLoader method loadObject.

public CachedObjectWrapper<Document, Long> loadObject(Integer docNum, int start, int end, LayeredCache<Integer, Document, Long, ParsedDoc> cache) throws IOException {
    Document document = new Document();
    Get documentGet = new Get(SolbaseUtil.randomize(docNum));
    if (fieldNames == null || fieldNames.size() == 0) {
        // get all columns ( except this skips meta info )
        documentGet.addFamily(Bytes.toBytes("field"));
    } else {
        for (byte[] fieldName : fieldNames) {
            documentGet.addColumn(Bytes.toBytes("field"), fieldName);
        }
    }
    Result documentResult = null;
    // if docTable is set up, reuse instance, otherwise create brand new one and close after done
    if (this.docTable == null) {
        HTableInterface docTable = null;
        try {
            docTable = SolbaseUtil.getDocTable();
            documentResult = docTable.get(documentGet);
        } finally {
            SolbaseUtil.releaseTable(docTable);
        }
    } else {
        documentResult = this.docTable.get(documentGet);
    }
    if (documentResult == null || documentResult.isEmpty()) {
        return null;
    }
    // TODO, get from result
    Long versionIdentifier = 0l;
    NavigableMap<byte[], byte[]> familyMap = documentResult.getFamilyMap(Bytes.toBytes("field"));
    for (Map.Entry<byte[], byte[]> fieldColumn : familyMap.entrySet()) {
        Field field = null;
        String fieldName = Bytes.toString(fieldColumn.getKey());
        byte[] value;
        ByteBuffer v = ByteBuffer.wrap(fieldColumn.getValue());
        int vlimit = v.limit() + v.arrayOffset();
        if (v.array()[vlimit - 1] != Byte.MAX_VALUE && v.array()[vlimit - 1] != Byte.MIN_VALUE) {
            throw new CorruptIndexException("Solbase field is not properly encoded: " + docNum + "(" + fieldName + ")");
        } else if (v.array()[vlimit - 1] == Byte.MAX_VALUE) {
            // Binary
            value = new byte[vlimit - 1];
            System.arraycopy(v.array(), v.position() + v.arrayOffset(), value, 0, vlimit - 1);
            field = new Field(fieldName, value, Store.YES);
            document.add(field);
        } else if (v.array()[vlimit - 1] == Byte.MIN_VALUE) {
            // String
            value = new byte[vlimit - 1];
            System.arraycopy(v.array(), v.position() + v.arrayOffset(), value, 0, vlimit - 1);
            // Check for multi-fields
            String fieldString = new String(value, "UTF-8");
            if (fieldString.indexOf(Bytes.toString(SolbaseUtil.delimiter)) >= 0) {
                StringTokenizer tok = new StringTokenizer(fieldString, Bytes.toString(SolbaseUtil.delimiter));
                while (tok.hasMoreTokens()) {
                    // update logic
                    if (schema != null) {
                        SchemaField sfield = schema.getFieldOrNull(fieldName);
                        if (sfield.getType() instanceof EmbeddedIndexedIntField) {
                            EmbeddedIndexedIntField eiif = (EmbeddedIndexedIntField) sfield.getType();
                            EmbeddedSortField sf = new EmbeddedSortField(fieldName, tok.nextToken(), Field.Store.YES, Field.Index.NO, eiif.getFieldNumber());
                            document.add(sf);
                        } else {
                            Field f = sfield.createField(tok.nextToken(), 1.0f);
                            if (f != null) {
                                // null fields are not added
                                document.add(f);
                            }
                        }
                    } else {
                        field = new Field(fieldName, tok.nextToken(), Store.YES, Index.ANALYZED);
                        document.add(field);
                    }
                }
            } else {
                // update logic
                if (schema != null) {
                    SchemaField sfield = schema.getFieldOrNull(fieldName);
                    if (sfield.getType() instanceof EmbeddedIndexedIntField) {
                        EmbeddedIndexedIntField eiif = (EmbeddedIndexedIntField) sfield.getType();
                        EmbeddedSortField sf = new EmbeddedSortField(fieldName, fieldString, Field.Store.YES, Field.Index.NO, eiif.getFieldNumber());
                        document.add(sf);
                    } else {
                        Field f = sfield.createField(fieldString, 1.0f);
                        if (f != null) {
                            // null fields are not added
                            document.add(f);
                        }
                    }
                } else {
                    field = new Field(fieldName, fieldString, Store.YES, Index.ANALYZED);
                    document.add(field);
                }
            }
        }
    }
    return new CachedObjectWrapper<Document, Long>(document, versionIdentifier, System.currentTimeMillis());
}
Also used : CachedObjectWrapper(org.solbase.cache.CachedObjectWrapper) CorruptIndexException(org.apache.lucene.index.CorruptIndexException) EmbeddedSortField(org.apache.lucene.document.EmbeddedSortField) Document(org.apache.lucene.document.Document) SolrInputDocument(org.apache.solr.common.SolrInputDocument) HTableInterface(org.apache.hadoop.hbase.client.HTableInterface) ByteBuffer(java.nio.ByteBuffer) Result(org.apache.hadoop.hbase.client.Result) SchemaField(org.apache.solr.schema.SchemaField) EmbeddedIndexedIntField(org.apache.solr.schema.EmbeddedIndexedIntField) SchemaField(org.apache.solr.schema.SchemaField) Field(org.apache.lucene.document.Field) EmbeddedSortField(org.apache.lucene.document.EmbeddedSortField) StringTokenizer(java.util.StringTokenizer) Get(org.apache.hadoop.hbase.client.Get) Map(java.util.Map) NavigableMap(java.util.NavigableMap) EmbeddedIndexedIntField(org.apache.solr.schema.EmbeddedIndexedIntField)

Example 20 with HTableInterface

use of org.apache.hadoop.hbase.client.HTableInterface in project Solbase by Photobucket.

the class IndexWriter method updateDocument.

public void updateDocument(Put documentPut, Document doc) {
    String uniqId = doc.get("global_uniq_id");
    Put mappingPut = new Put(Bytes.toBytes(uniqId));
    mappingPut.add(SolbaseUtil.docIdColumnFamilyName, SolbaseUtil.tombstonedColumnFamilyQualifierBytes, Bytes.toBytes(0));
    updateDocKeyIdMap(mappingPut);
    // for remote server update via solr update, we want to use
    // getDocTable(), but for now map/red can use local htable
    HTableInterface docTable = SolbaseUtil.getDocTable();
    // insert document to doctable
    try {
        documentPut.add(SolbaseUtil.timestampColumnFamilyName, SolbaseUtil.tombstonedColumnFamilyQualifierBytes, Bytes.toBytes(0));
        docTable.put(documentPut);
    } catch (IOException e) {
        throw new SolbaseException(SolbaseException.ErrorCode.SERVER_ERROR, e.getMessage());
    } finally {
        SolbaseUtil.releaseTable(docTable);
    }
}
Also used : SolbaseException(org.solbase.common.SolbaseException) IOException(java.io.IOException) HTableInterface(org.apache.hadoop.hbase.client.HTableInterface) Put(org.apache.hadoop.hbase.client.Put)

Aggregations

HTableInterface (org.apache.hadoop.hbase.client.HTableInterface)117 Result (org.apache.hadoop.hbase.client.Result)43 Put (org.apache.hadoop.hbase.client.Put)41 IOException (java.io.IOException)36 ArrayList (java.util.ArrayList)26 PhoenixConnection (org.apache.phoenix.jdbc.PhoenixConnection)23 Get (org.apache.hadoop.hbase.client.Get)21 Scan (org.apache.hadoop.hbase.client.Scan)21 Test (org.junit.Test)20 SQLException (java.sql.SQLException)19 ResultScanner (org.apache.hadoop.hbase.client.ResultScanner)17 Connection (java.sql.Connection)15 HashMap (java.util.HashMap)15 HBaseAdmin (org.apache.hadoop.hbase.client.HBaseAdmin)13 Delete (org.apache.hadoop.hbase.client.Delete)12 Mutation (org.apache.hadoop.hbase.client.Mutation)12 PhoenixIOException (org.apache.phoenix.exception.PhoenixIOException)11 ResultSet (java.sql.ResultSet)10 Configuration (org.apache.hadoop.conf.Configuration)9 ConnectionQueryServices (org.apache.phoenix.query.ConnectionQueryServices)9