Search in sources :

Example 21 with ByteArrayRef

use of org.apache.hadoop.hive.serde2.lazy.ByteArrayRef in project hive by apache.

the class LazyDioInteger method init.

/* (non-Javadoc)
   * This provides a LazyInteger like class which can be initialized from data stored in a
   * binary format.
   *
   * @see org.apache.hadoop.hive.serde2.lazy.LazyObject#init
   *        (org.apache.hadoop.hive.serde2.lazy.ByteArrayRef, int, int)
   */
@Override
public void init(ByteArrayRef bytes, int start, int length) {
    int value = 0;
    try {
        in = new ByteStream.Input(bytes.getData(), start, length);
        din = new DataInputStream(in);
        value = din.readInt();
        data.set(value);
        isNull = false;
    } catch (IOException e) {
        isNull = true;
    } finally {
        try {
            din.close();
        } catch (IOException e) {
        // swallow exception
        }
        try {
            in.close();
        } catch (IOException e) {
        // swallow exception
        }
    }
}
Also used : ByteStream(org.apache.hadoop.hive.serde2.ByteStream) IOException(java.io.IOException) DataInputStream(java.io.DataInputStream)

Example 22 with ByteArrayRef

use of org.apache.hadoop.hive.serde2.lazy.ByteArrayRef in project hive by apache.

the class LazyDioShort method init.

/* (non-Javadoc)
   * This provides a LazyShort like class which can be initialized from data stored in a
   * binary format.
   *
   * @see org.apache.hadoop.hive.serde2.lazy.LazyObject#init
   *        (org.apache.hadoop.hive.serde2.lazy.ByteArrayRef, int, int)
   */
@Override
public void init(ByteArrayRef bytes, int start, int length) {
    short value = 0;
    try {
        in = new ByteStream.Input(bytes.getData(), start, length);
        din = new DataInputStream(in);
        value = din.readShort();
        data.set(value);
        isNull = false;
    } catch (Exception e) {
        isNull = true;
    } finally {
        try {
            din.close();
        } catch (IOException e) {
        // swallow exception
        }
        try {
            in.close();
        } catch (IOException e) {
        // swallow exception
        }
    }
}
Also used : ByteStream(org.apache.hadoop.hive.serde2.ByteStream) IOException(java.io.IOException) DataInputStream(java.io.DataInputStream) IOException(java.io.IOException)

Example 23 with ByteArrayRef

use of org.apache.hadoop.hive.serde2.lazy.ByteArrayRef in project hive by apache.

the class AccumuloCompositeRowId method toLazyObject.

/**
   * Create an initialize a {@link LazyObject} with the given bytes for the given fieldID.
   *
   * @param fieldID
   *          field for which the object is to be created
   * @param bytes
   *          value with which the object is to be initialized with
   * @return initialized {@link LazyObject}
   * */
public LazyObject<? extends ObjectInspector> toLazyObject(int fieldID, byte[] bytes) {
    ObjectInspector fieldOI = oi.getAllStructFieldRefs().get(fieldID).getFieldObjectInspector();
    LazyObject<? extends ObjectInspector> lazyObject = LazyFactory.createLazyObject(fieldOI);
    ByteArrayRef ref = new ByteArrayRef();
    ref.setData(bytes);
    // initialize the lazy object
    lazyObject.init(ref, 0, ref.getData().length);
    return lazyObject;
}
Also used : LazySimpleStructObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) ByteArrayRef(org.apache.hadoop.hive.serde2.lazy.ByteArrayRef)

Example 24 with ByteArrayRef

use of org.apache.hadoop.hive.serde2.lazy.ByteArrayRef in project hive by apache.

the class TestAccumuloRowSerializer method testVisibilityLabel.

@Test
public void testVisibilityLabel() throws IOException, SerDeException {
    List<String> columns = Arrays.asList("row", "cq1", "cq2", "cq3");
    List<TypeInfo> types = Arrays.<TypeInfo>asList(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.intTypeInfo, TypeInfoFactory.intTypeInfo, TypeInfoFactory.stringTypeInfo);
    List<String> typeNames = new ArrayList<String>(types.size());
    for (TypeInfo type : types) {
        typeNames.add(type.getTypeName());
    }
    Properties tableProperties = new Properties();
    tableProperties.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, ":rowid,cf:cq1#b,cf:cq2#b,cf:cq3");
    tableProperties.setProperty(serdeConstants.FIELD_DELIM, " ");
    tableProperties.setProperty(serdeConstants.LIST_COLUMNS, Joiner.on(',').join(columns));
    tableProperties.setProperty(serdeConstants.LIST_COLUMN_TYPES, Joiner.on(',').join(typeNames));
    AccumuloSerDeParameters accumuloSerDeParams = new AccumuloSerDeParameters(new Configuration(), tableProperties, AccumuloSerDe.class.getSimpleName());
    LazySerDeParameters serDeParams = accumuloSerDeParams.getSerDeParameters();
    LazySimpleStructObjectInspector oi = (LazySimpleStructObjectInspector) LazyFactory.createLazyStructInspector(columns, types, serDeParams.getSeparators(), serDeParams.getNullSequence(), serDeParams.isLastColumnTakesRest(), serDeParams.isEscaped(), serDeParams.getEscapeChar());
    AccumuloRowSerializer serializer = new AccumuloRowSerializer(0, serDeParams, accumuloSerDeParams.getColumnMappings(), new ColumnVisibility("foo"), accumuloSerDeParams.getRowIdFactory());
    // Create the LazyStruct from the LazyStruct...Inspector
    LazyStruct obj = (LazyStruct) LazyFactory.createLazyObject(oi);
    ByteArrayRef byteRef = new ByteArrayRef();
    byteRef.setData(new byte[] { 'r', 'o', 'w', '1', ' ', '1', '0', ' ', '2', '0', ' ', 'v', 'a', 'l', 'u', 'e' });
    obj.init(byteRef, 0, byteRef.getData().length);
    Mutation m = (Mutation) serializer.serialize(obj, oi);
    Assert.assertArrayEquals("row1".getBytes(), m.getRow());
    List<ColumnUpdate> updates = m.getUpdates();
    Assert.assertEquals(3, updates.size());
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    DataOutputStream out = new DataOutputStream(baos);
    ColumnUpdate update = updates.get(0);
    Assert.assertEquals("cf", new String(update.getColumnFamily()));
    Assert.assertEquals("cq1", new String(update.getColumnQualifier()));
    Assert.assertEquals("foo", new String(update.getColumnVisibility()));
    out.writeInt(10);
    Assert.assertArrayEquals(baos.toByteArray(), update.getValue());
    update = updates.get(1);
    Assert.assertEquals("cf", new String(update.getColumnFamily()));
    Assert.assertEquals("cq2", new String(update.getColumnQualifier()));
    Assert.assertEquals("foo", new String(update.getColumnVisibility()));
    baos.reset();
    out.writeInt(20);
    Assert.assertArrayEquals(baos.toByteArray(), update.getValue());
    update = updates.get(2);
    Assert.assertEquals("cf", new String(update.getColumnFamily()));
    Assert.assertEquals("cq3", new String(update.getColumnQualifier()));
    Assert.assertEquals("foo", new String(update.getColumnVisibility()));
    Assert.assertEquals("value", new String(update.getValue()));
}
Also used : LazySimpleStructObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector) ColumnUpdate(org.apache.accumulo.core.data.ColumnUpdate) Configuration(org.apache.hadoop.conf.Configuration) LazySerDeParameters(org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters) DataOutputStream(java.io.DataOutputStream) ArrayList(java.util.ArrayList) ByteArrayOutputStream(java.io.ByteArrayOutputStream) Properties(java.util.Properties) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) ByteArrayRef(org.apache.hadoop.hive.serde2.lazy.ByteArrayRef) ColumnVisibility(org.apache.accumulo.core.security.ColumnVisibility) Mutation(org.apache.accumulo.core.data.Mutation) LazyStruct(org.apache.hadoop.hive.serde2.lazy.LazyStruct) Test(org.junit.Test)

Example 25 with ByteArrayRef

use of org.apache.hadoop.hive.serde2.lazy.ByteArrayRef in project hive by apache.

the class MultiDelimitSerDe method doDeserialize.

@Override
public Object doDeserialize(Writable blob) throws SerDeException {
    if (byteArrayRef == null) {
        byteArrayRef = new ByteArrayRef();
    }
    // we use the default field delimiter('\1') to replace the multiple-char field delimiter
    // but we cannot use it to parse the row since column data can contain '\1' as well
    String rowStr;
    if (blob instanceof BytesWritable) {
        BytesWritable b = (BytesWritable) blob;
        rowStr = new String(b.getBytes());
    } else if (blob instanceof Text) {
        Text rowText = (Text) blob;
        rowStr = rowText.toString();
    } else {
        throw new SerDeException(getClass() + ": expects either BytesWritable or Text object!");
    }
    byteArrayRef.setData(rowStr.replaceAll(Pattern.quote(fieldDelimited), "\1").getBytes());
    cachedLazyStruct.init(byteArrayRef, 0, byteArrayRef.getData().length);
    // use the multi-char delimiter to parse the lazy struct
    cachedLazyStruct.parseMultiDelimit(rowStr.getBytes(), fieldDelimited.getBytes());
    return cachedLazyStruct;
}
Also used : ByteArrayRef(org.apache.hadoop.hive.serde2.lazy.ByteArrayRef) BytesWritable(org.apache.hadoop.io.BytesWritable) Text(org.apache.hadoop.io.Text)

Aggregations

ByteArrayRef (org.apache.hadoop.hive.serde2.lazy.ByteArrayRef)27 LazySimpleStructObjectInspector (org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector)12 Text (org.apache.hadoop.io.Text)11 Test (org.junit.Test)11 Mutation (org.apache.accumulo.core.data.Mutation)10 LazyStruct (org.apache.hadoop.hive.serde2.lazy.LazyStruct)9 DataInputStream (java.io.DataInputStream)8 Properties (java.util.Properties)8 Configuration (org.apache.hadoop.conf.Configuration)8 ByteStream (org.apache.hadoop.hive.serde2.ByteStream)8 LazySerDeParameters (org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters)8 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)8 IOException (java.io.IOException)7 Connector (org.apache.accumulo.core.client.Connector)6 MockInstance (org.apache.accumulo.core.client.mock.MockInstance)6 PasswordToken (org.apache.accumulo.core.client.security.tokens.PasswordToken)6 Key (org.apache.accumulo.core.data.Key)6 Value (org.apache.accumulo.core.data.Value)6 Authorizations (org.apache.accumulo.core.security.Authorizations)6 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)6