Search in sources :

Example 1 with HCatRecord

use of org.apache.hive.hcatalog.data.HCatRecord in project beam by apache.

the class PartitionReaderFn method processElement.

@ProcessElement
public void processElement(ProcessContext c) throws Exception {
    final Read readRequest = c.element().getKey();
    final Integer partitionIndexToRead = c.element().getValue();
    ReaderContext readerContext = getReaderContext(readRequest, partitionIndexToRead);
    for (int i = 0; i < readerContext.numSplits(); i++) {
        HCatReader reader = DataTransferFactory.getHCatReader(readerContext, i);
        Iterator<HCatRecord> hcatIterator = reader.read();
        while (hcatIterator.hasNext()) {
            final HCatRecord record = hcatIterator.next();
            c.output(record);
        }
    }
}
Also used : Read(org.apache.beam.sdk.io.hcatalog.HCatalogIO.Read) ReaderContext(org.apache.hive.hcatalog.data.transfer.ReaderContext) HCatReader(org.apache.hive.hcatalog.data.transfer.HCatReader) HCatRecord(org.apache.hive.hcatalog.data.HCatRecord)

Example 2 with HCatRecord

use of org.apache.hive.hcatalog.data.HCatRecord in project hive by apache.

the class TestHCatLoaderEncryption method testReadDataFromEncryptedHiveTableByHCatMR.

@Test
public void testReadDataFromEncryptedHiveTableByHCatMR() throws Exception {
    assumeTrue(!TestUtil.shouldSkip(storageFormat, DISABLED_STORAGE_FORMATS));
    readRecords.clear();
    Configuration conf = new Configuration();
    Job job = new Job(conf, "hcat mapreduce read encryption test");
    job.setJarByClass(this.getClass());
    job.setMapperClass(TestHCatLoaderEncryption.MapRead.class);
    // input/output settings
    job.setInputFormatClass(HCatInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    HCatInputFormat.setInput(job, Warehouse.DEFAULT_DATABASE_NAME, ENCRYPTED_TABLE, null);
    job.setMapOutputKeyClass(BytesWritable.class);
    job.setMapOutputValueClass(Text.class);
    job.setNumReduceTasks(0);
    FileSystem fs = new LocalFileSystem();
    String pathLoc = TEST_DATA_DIR + "/testHCatMREncryptionOutput";
    Path path = new Path(pathLoc);
    if (fs.exists(path)) {
        fs.delete(path, true);
    }
    TextOutputFormat.setOutputPath(job, new Path(pathLoc));
    job.waitForCompletion(true);
    int numTuplesRead = 0;
    for (HCatRecord hCatRecord : readRecords) {
        assertEquals(2, hCatRecord.size());
        assertNotNull(hCatRecord.get(0));
        assertNotNull(hCatRecord.get(1));
        assertTrue(hCatRecord.get(0).getClass() == Integer.class);
        assertTrue(hCatRecord.get(1).getClass() == String.class);
        assertEquals(hCatRecord.get(0), basicInputData.get(numTuplesRead).first);
        assertEquals(hCatRecord.get(1), basicInputData.get(numTuplesRead).second);
        numTuplesRead++;
    }
    assertEquals("failed HCat MR read with storage format: " + this.storageFormat, basicInputData.size(), numTuplesRead);
}
Also used : Path(org.apache.hadoop.fs.Path) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Configuration(org.apache.hadoop.conf.Configuration) LocalFileSystem(org.apache.hadoop.fs.LocalFileSystem) FileSystem(org.apache.hadoop.fs.FileSystem) LocalFileSystem(org.apache.hadoop.fs.LocalFileSystem) Job(org.apache.hadoop.mapreduce.Job) HCatRecord(org.apache.hive.hcatalog.data.HCatRecord) HCatBaseTest(org.apache.hive.hcatalog.mapreduce.HCatBaseTest) Test(org.junit.Test)

Example 3 with HCatRecord

use of org.apache.hive.hcatalog.data.HCatRecord in project hive by apache.

the class TestHCatPartitioned method tableSchemaTest.

// test that new columns gets added to table schema
private void tableSchemaTest() throws Exception {
    HCatSchema tableSchema = getTableSchema();
    assertEquals(4, tableSchema.getFields().size());
    // Update partition schema to have 3 fields
    partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c3", serdeConstants.STRING_TYPE_NAME, "")));
    writeRecords = new ArrayList<HCatRecord>();
    for (int i = 0; i < 20; i++) {
        List<Object> objList = new ArrayList<Object>();
        objList.add(i);
        objList.add("strvalue" + i);
        objList.add("str2value" + i);
        writeRecords.add(new DefaultHCatRecord(objList));
    }
    Map<String, String> partitionMap = new HashMap<String, String>();
    partitionMap.put("part1", "p1value5");
    partitionMap.put("part0", "505");
    runMRCreate(partitionMap, partitionColumns, writeRecords, 10, true);
    tableSchema = getTableSchema();
    // assert that c3 has got added to table schema
    assertEquals(5, tableSchema.getFields().size());
    assertEquals("c1", tableSchema.getFields().get(0).getName());
    assertEquals("c2", tableSchema.getFields().get(1).getName());
    assertEquals("c3", tableSchema.getFields().get(2).getName());
    assertEquals("part1", tableSchema.getFields().get(3).getName());
    assertEquals("part0", tableSchema.getFields().get(4).getName());
    // Test that changing column data type fails
    partitionMap.clear();
    partitionMap.put("part1", "p1value6");
    partitionMap.put("part0", "506");
    partitionColumns = new ArrayList<HCatFieldSchema>();
    partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, "")));
    partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c2", serdeConstants.INT_TYPE_NAME, "")));
    IOException exc = null;
    try {
        runMRCreate(partitionMap, partitionColumns, writeRecords, 20, true);
    } catch (IOException e) {
        exc = e;
    }
    assertTrue(exc != null);
    assertTrue(exc instanceof HCatException);
    assertEquals(ErrorType.ERROR_SCHEMA_TYPE_MISMATCH, ((HCatException) exc).getErrorType());
    // Test that partition key is not allowed in data
    partitionColumns = new ArrayList<HCatFieldSchema>();
    partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, "")));
    partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, "")));
    partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c3", serdeConstants.STRING_TYPE_NAME, "")));
    partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("part1", serdeConstants.STRING_TYPE_NAME, "")));
    List<HCatRecord> recordsContainingPartitionCols = new ArrayList<HCatRecord>(20);
    for (int i = 0; i < 20; i++) {
        List<Object> objList = new ArrayList<Object>();
        objList.add(i);
        objList.add("c2value" + i);
        objList.add("c3value" + i);
        objList.add("p1value6");
        recordsContainingPartitionCols.add(new DefaultHCatRecord(objList));
    }
    exc = null;
    try {
        runMRCreate(partitionMap, partitionColumns, recordsContainingPartitionCols, 20, true);
    } catch (IOException e) {
        exc = e;
    }
    List<HCatRecord> records = runMRRead(20, "part1 = \"p1value6\"");
    assertEquals(20, records.size());
    records = runMRRead(20, "part0 = \"506\"");
    assertEquals(20, records.size());
    Integer i = 0;
    for (HCatRecord rec : records) {
        assertEquals(5, rec.size());
        assertEquals(rec.get(0), i);
        assertEquals(rec.get(1), "c2value" + i);
        assertEquals(rec.get(2), "c3value" + i);
        assertEquals(rec.get(3), "p1value6");
        assertEquals(rec.get(4), 506);
        i++;
    }
}
Also used : HashMap(java.util.HashMap) HCatFieldSchema(org.apache.hive.hcatalog.data.schema.HCatFieldSchema) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) ArrayList(java.util.ArrayList) HCatException(org.apache.hive.hcatalog.common.HCatException) IOException(java.io.IOException) HCatFieldSchema(org.apache.hive.hcatalog.data.schema.HCatFieldSchema) HCatSchema(org.apache.hive.hcatalog.data.schema.HCatSchema) DefaultHCatRecord(org.apache.hive.hcatalog.data.DefaultHCatRecord) DefaultHCatRecord(org.apache.hive.hcatalog.data.DefaultHCatRecord) HCatRecord(org.apache.hive.hcatalog.data.HCatRecord)

Example 4 with HCatRecord

use of org.apache.hive.hcatalog.data.HCatRecord in project hive by apache.

the class HCatBaseLoader method getNext.

@Override
public Tuple getNext() throws IOException {
    try {
        HCatRecord hr = (HCatRecord) (reader.nextKeyValue() ? reader.getCurrentValue() : null);
        Tuple t = PigHCatUtil.transformToTuple(hr, outputSchema);
        // change this when plans for that solidifies.
        return t;
    } catch (ExecException e) {
        int errCode = 6018;
        String errMsg = "Error while reading input";
        throw new ExecException(errMsg, errCode, PigException.REMOTE_ENVIRONMENT, e);
    } catch (Exception eOther) {
        int errCode = 6018;
        String errMsg = "Error converting read value to tuple";
        throw new ExecException(errMsg, errCode, PigException.REMOTE_ENVIRONMENT, eOther);
    }
}
Also used : ExecException(org.apache.pig.backend.executionengine.ExecException) HCatRecord(org.apache.hive.hcatalog.data.HCatRecord) Tuple(org.apache.pig.data.Tuple) FrontendException(org.apache.pig.impl.logicalLayer.FrontendException) IOException(java.io.IOException) PigException(org.apache.pig.PigException) ExecException(org.apache.pig.backend.executionengine.ExecException)

Example 5 with HCatRecord

use of org.apache.hive.hcatalog.data.HCatRecord in project hive by apache.

the class TestHCatDynamicPartitioned method generateWriteRecords.

protected static void generateWriteRecords(int max, int mod, int offset) {
    writeRecords = new ArrayList<HCatRecord>();
    for (int i = 0; i < max; i++) {
        List<Object> objList = new ArrayList<Object>();
        objList.add(i);
        objList.add("strvalue" + i);
        objList.add(String.valueOf((i % mod) + offset));
        writeRecords.add(new DefaultHCatRecord(objList));
    }
}
Also used : DefaultHCatRecord(org.apache.hive.hcatalog.data.DefaultHCatRecord) ArrayList(java.util.ArrayList) DefaultHCatRecord(org.apache.hive.hcatalog.data.DefaultHCatRecord) HCatRecord(org.apache.hive.hcatalog.data.HCatRecord)

Aggregations

HCatRecord (org.apache.hive.hcatalog.data.HCatRecord)14 IOException (java.io.IOException)6 DefaultHCatRecord (org.apache.hive.hcatalog.data.DefaultHCatRecord)6 ArrayList (java.util.ArrayList)4 WritableComparable (org.apache.hadoop.io.WritableComparable)4 HCatException (org.apache.hive.hcatalog.common.HCatException)4 HashMap (java.util.HashMap)3 TaskAttemptContext (org.apache.hadoop.mapreduce.TaskAttemptContext)3 HCatSchema (org.apache.hive.hcatalog.data.schema.HCatSchema)3 ReaderContext (org.apache.hive.hcatalog.data.transfer.ReaderContext)3 Path (org.apache.hadoop.fs.Path)2 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)2 InputSplit (org.apache.hadoop.mapreduce.InputSplit)2 Job (org.apache.hadoop.mapreduce.Job)2 OutputCommitter (org.apache.hadoop.mapreduce.OutputCommitter)2 TaskAttemptID (org.apache.hadoop.mapreduce.TaskAttemptID)2 HCatFieldSchema (org.apache.hive.hcatalog.data.schema.HCatFieldSchema)2 HCatReader (org.apache.hive.hcatalog.data.transfer.HCatReader)2 HCatInputFormat (org.apache.hive.hcatalog.mapreduce.HCatInputFormat)2 HCatOutputFormat (org.apache.hive.hcatalog.mapreduce.HCatOutputFormat)2