Search in sources :

Example 6 with AccumuloSerDeParameters

use of org.apache.hadoop.hive.accumulo.serde.AccumuloSerDeParameters in project hive by apache.

the class TestLazyAccumuloRow method testDeserializationOfBinaryEncoding.

@Test
public void testDeserializationOfBinaryEncoding() throws Exception {
    List<String> columns = Arrays.asList("row", "given_name", "surname", "age", "weight", "height");
    List<TypeInfo> types = Arrays.<TypeInfo>asList(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo, TypeInfoFactory.intTypeInfo, TypeInfoFactory.intTypeInfo, TypeInfoFactory.intTypeInfo);
    LazySimpleStructObjectInspector objectInspector = (LazySimpleStructObjectInspector) LazyFactory.createLazyStructInspector(columns, types, LazySerDeParameters.DefaultSeparators, new Text("\\N"), false, false, (byte) '\\');
    DefaultAccumuloRowIdFactory rowIdFactory = new DefaultAccumuloRowIdFactory();
    Properties props = new Properties();
    props.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, ":rowid#s,personal:given_name#s,personal:surname#s,personal:age,personal:weight,personal:height");
    props.setProperty(serdeConstants.LIST_COLUMNS, Joiner.on(',').join(columns));
    props.setProperty(serdeConstants.LIST_COLUMN_TYPES, Joiner.on(',').join(types));
    props.setProperty(AccumuloSerDeParameters.DEFAULT_STORAGE_TYPE, ColumnEncoding.BINARY.getName());
    AccumuloSerDeParameters params = new AccumuloSerDeParameters(new Configuration(), props, AccumuloSerDe.class.getName());
    rowIdFactory.init(params, props);
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    DataOutputStream out = new DataOutputStream(baos);
    LazyAccumuloRow lazyRow = new LazyAccumuloRow(objectInspector);
    AccumuloHiveRow hiveRow = new AccumuloHiveRow("1");
    hiveRow.add("personal", "given_name", "Bob".getBytes());
    hiveRow.add("personal", "surname", "Stevens".getBytes());
    out.writeInt(30);
    hiveRow.add("personal", "age", baos.toByteArray());
    baos.reset();
    out.writeInt(200);
    hiveRow.add("personal", "weight", baos.toByteArray());
    baos.reset();
    out.writeInt(72);
    hiveRow.add("personal", "height", baos.toByteArray());
    ColumnMapper columnMapper = params.getColumnMapper();
    lazyRow.init(hiveRow, columnMapper.getColumnMappings(), rowIdFactory);
    Object o = lazyRow.getField(0);
    Assert.assertNotNull(o);
    Assert.assertEquals(LazyString.class, o.getClass());
    Assert.assertEquals("1", ((LazyString) o).toString());
    o = lazyRow.getField(1);
    Assert.assertNotNull(o);
    Assert.assertEquals(LazyString.class, o.getClass());
    Assert.assertEquals("Bob", ((LazyString) o).toString());
    o = lazyRow.getField(2);
    Assert.assertNotNull(o);
    Assert.assertEquals(LazyString.class, o.getClass());
    Assert.assertEquals("Stevens", ((LazyString) o).toString());
    o = lazyRow.getField(3);
    Assert.assertNotNull(o);
    Assert.assertEquals(LazyDioInteger.class, o.getClass());
    Assert.assertEquals("30", ((LazyDioInteger) o).toString());
    o = lazyRow.getField(4);
    Assert.assertNotNull(o);
    Assert.assertEquals(LazyDioInteger.class, o.getClass());
    Assert.assertEquals("200", ((LazyDioInteger) o).toString());
    o = lazyRow.getField(5);
    Assert.assertNotNull(o);
    Assert.assertEquals(LazyDioInteger.class, o.getClass());
    Assert.assertEquals("72", ((LazyDioInteger) o).toString());
}
Also used : LazySimpleStructObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector) Configuration(org.apache.hadoop.conf.Configuration) DataOutputStream(java.io.DataOutputStream) Text(org.apache.hadoop.io.Text) LazyString(org.apache.hadoop.hive.serde2.lazy.LazyString) DefaultAccumuloRowIdFactory(org.apache.hadoop.hive.accumulo.serde.DefaultAccumuloRowIdFactory) ByteArrayOutputStream(java.io.ByteArrayOutputStream) Properties(java.util.Properties) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) AccumuloSerDe(org.apache.hadoop.hive.accumulo.serde.AccumuloSerDe) AccumuloSerDeParameters(org.apache.hadoop.hive.accumulo.serde.AccumuloSerDeParameters) ColumnMapper(org.apache.hadoop.hive.accumulo.columns.ColumnMapper) Test(org.junit.Test)

Example 7 with AccumuloSerDeParameters

use of org.apache.hadoop.hive.accumulo.serde.AccumuloSerDeParameters in project hive by apache.

the class TestLazyAccumuloRow method testExpectedDeserializationOfColumns.

@Test
public void testExpectedDeserializationOfColumns() throws Exception {
    List<String> columns = Arrays.asList("row", "given_name", "surname", "age", "weight", "height");
    List<TypeInfo> types = Arrays.<TypeInfo>asList(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo, TypeInfoFactory.intTypeInfo, TypeInfoFactory.intTypeInfo, TypeInfoFactory.intTypeInfo);
    LazySimpleStructObjectInspector objectInspector = (LazySimpleStructObjectInspector) LazyFactory.createLazyStructInspector(columns, types, LazySerDeParameters.DefaultSeparators, new Text("\\N"), false, false, (byte) '\\');
    DefaultAccumuloRowIdFactory rowIdFactory = new DefaultAccumuloRowIdFactory();
    Properties props = new Properties();
    props.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, ":rowid,personal:given_name,personal:surname,personal:age,personal:weight,personal:height");
    props.setProperty(serdeConstants.LIST_COLUMNS, Joiner.on(',').join(columns));
    props.setProperty(serdeConstants.LIST_COLUMN_TYPES, Joiner.on(',').join(types));
    AccumuloSerDeParameters params = new AccumuloSerDeParameters(new Configuration(), props, AccumuloSerDe.class.getName());
    rowIdFactory.init(params, props);
    LazyAccumuloRow lazyRow = new LazyAccumuloRow(objectInspector);
    AccumuloHiveRow hiveRow = new AccumuloHiveRow("1");
    hiveRow.add("personal", "given_name", "Bob".getBytes());
    hiveRow.add("personal", "surname", "Stevens".getBytes());
    hiveRow.add("personal", "age", "30".getBytes());
    hiveRow.add("personal", "weight", "200".getBytes());
    hiveRow.add("personal", "height", "72".getBytes());
    ColumnMapper columnMapper = params.getColumnMapper();
    lazyRow.init(hiveRow, columnMapper.getColumnMappings(), rowIdFactory);
    Object o = lazyRow.getField(0);
    Assert.assertEquals(LazyString.class, o.getClass());
    Assert.assertEquals("1", ((LazyString) o).toString());
    o = lazyRow.getField(1);
    Assert.assertEquals(LazyString.class, o.getClass());
    Assert.assertEquals("Bob", ((LazyString) o).toString());
    o = lazyRow.getField(2);
    Assert.assertEquals(LazyString.class, o.getClass());
    Assert.assertEquals("Stevens", ((LazyString) o).toString());
    o = lazyRow.getField(3);
    Assert.assertEquals(LazyInteger.class, o.getClass());
    Assert.assertEquals("30", ((LazyInteger) o).toString());
    o = lazyRow.getField(4);
    Assert.assertEquals(LazyInteger.class, o.getClass());
    Assert.assertEquals("200", ((LazyInteger) o).toString());
    o = lazyRow.getField(5);
    Assert.assertEquals(LazyInteger.class, o.getClass());
    Assert.assertEquals("72", ((LazyInteger) o).toString());
}
Also used : LazySimpleStructObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector) Configuration(org.apache.hadoop.conf.Configuration) Text(org.apache.hadoop.io.Text) LazyString(org.apache.hadoop.hive.serde2.lazy.LazyString) DefaultAccumuloRowIdFactory(org.apache.hadoop.hive.accumulo.serde.DefaultAccumuloRowIdFactory) Properties(java.util.Properties) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) AccumuloSerDe(org.apache.hadoop.hive.accumulo.serde.AccumuloSerDe) AccumuloSerDeParameters(org.apache.hadoop.hive.accumulo.serde.AccumuloSerDeParameters) ColumnMapper(org.apache.hadoop.hive.accumulo.columns.ColumnMapper) Test(org.junit.Test)

Example 8 with AccumuloSerDeParameters

use of org.apache.hadoop.hive.accumulo.serde.AccumuloSerDeParameters in project hive by apache.

the class AccumuloStorageHandler method configureJobConf.

@Override
public void configureJobConf(TableDesc tableDesc, JobConf jobConf) {
    helper.loadDependentJars(jobConf);
    Properties tblProperties = tableDesc.getProperties();
    AccumuloSerDeParameters serDeParams = null;
    try {
        serDeParams = new AccumuloSerDeParameters(jobConf, tblProperties, AccumuloSerDe.class.getName());
    } catch (SerDeException e) {
        LOG.error("Could not instantiate AccumuloSerDeParameters", e);
        return;
    }
    try {
        serDeParams.getRowIdFactory().addDependencyJars(jobConf);
    } catch (IOException e) {
        LOG.error("Could not add necessary dependencies for " + serDeParams.getRowIdFactory().getClass(), e);
    }
    // Job so that it gets passed down to the YARN/Tez task.
    if (connectionParams.useSasl()) {
        try {
            // Open an accumulo connection
            Connector conn = connectionParams.getConnector();
            // Convert the Accumulo token in a Hadoop token
            Token<? extends TokenIdentifier> accumuloToken = helper.setConnectorInfoForInputAndOutput(connectionParams, conn, jobConf);
            LOG.debug("Adding Hadoop Token for Accumulo to Job's Credentials");
            // Add the Hadoop token to the JobConf
            helper.mergeTokenIntoJobConf(jobConf, accumuloToken);
            LOG.debug("All job tokens: " + jobConf.getCredentials().getAllTokens());
        } catch (Exception e) {
            throw new RuntimeException("Failed to obtain DelegationToken for " + connectionParams.getAccumuloUserName(), e);
        }
    }
}
Also used : Connector(org.apache.accumulo.core.client.Connector) AccumuloSerDeParameters(org.apache.hadoop.hive.accumulo.serde.AccumuloSerDeParameters) IOException(java.io.IOException) Properties(java.util.Properties) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) TableExistsException(org.apache.accumulo.core.client.TableExistsException) TableNotFoundException(org.apache.accumulo.core.client.TableNotFoundException) AccumuloSecurityException(org.apache.accumulo.core.client.AccumuloSecurityException) IOException(java.io.IOException) AccumuloException(org.apache.accumulo.core.client.AccumuloException) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException)

Aggregations

Properties (java.util.Properties)8 AccumuloSerDeParameters (org.apache.hadoop.hive.accumulo.serde.AccumuloSerDeParameters)8 Configuration (org.apache.hadoop.conf.Configuration)7 AccumuloSerDe (org.apache.hadoop.hive.accumulo.serde.AccumuloSerDe)7 LazySimpleStructObjectInspector (org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector)7 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)7 Text (org.apache.hadoop.io.Text)7 Test (org.junit.Test)7 Connector (org.apache.accumulo.core.client.Connector)5 Entry (java.util.Map.Entry)4 Instance (org.apache.accumulo.core.client.Instance)4 MockInstance (org.apache.accumulo.core.client.mock.MockInstance)4 PasswordToken (org.apache.accumulo.core.client.security.tokens.PasswordToken)4 Key (org.apache.accumulo.core.data.Key)4 Mutation (org.apache.accumulo.core.data.Mutation)4 Value (org.apache.accumulo.core.data.Value)4 Authorizations (org.apache.accumulo.core.security.Authorizations)4 FileSystem (org.apache.hadoop.fs.FileSystem)4 AccumuloRowSerializer (org.apache.hadoop.hive.accumulo.serde.AccumuloRowSerializer)4 ByteArrayRef (org.apache.hadoop.hive.serde2.lazy.ByteArrayRef)4