Search in sources :

Example 1 with DefaultHCatRecord

use of org.apache.hive.hcatalog.data.DefaultHCatRecord in project hive by apache.

the class HCatBaseStorer method putNext.

@Override
public void putNext(Tuple tuple) throws IOException {
    List<Object> outgoing = new ArrayList<Object>(tuple.size());
    int i = 0;
    for (HCatFieldSchema fSchema : computedSchema.getFields()) {
        outgoing.add(getJavaObj(tuple.get(i++), fSchema));
    }
    try {
        writer.write(null, new DefaultHCatRecord(outgoing));
    } catch (InterruptedException e) {
        throw new BackendException("Error while writing tuple: " + tuple, PigHCatUtil.PIG_EXCEPTION_CODE, e);
    }
}
Also used : DefaultHCatRecord(org.apache.hive.hcatalog.data.DefaultHCatRecord) ArrayList(java.util.ArrayList) HCatFieldSchema(org.apache.hive.hcatalog.data.schema.HCatFieldSchema) BackendException(org.apache.pig.backend.BackendException)

Example 2 with DefaultHCatRecord

use of org.apache.hive.hcatalog.data.DefaultHCatRecord in project hive by apache.

the class TestHCatDynamicPartitioned method generateWriteRecords.

protected static void generateWriteRecords(int max, int mod, int offset) {
    writeRecords = new ArrayList<HCatRecord>();
    for (int i = 0; i < max; i++) {
        List<Object> objList = new ArrayList<Object>();
        objList.add(i);
        objList.add("strvalue" + i);
        objList.add(String.valueOf((i % mod) + offset));
        writeRecords.add(new DefaultHCatRecord(objList));
    }
}
Also used : DefaultHCatRecord(org.apache.hive.hcatalog.data.DefaultHCatRecord) ArrayList(java.util.ArrayList) DefaultHCatRecord(org.apache.hive.hcatalog.data.DefaultHCatRecord) HCatRecord(org.apache.hive.hcatalog.data.HCatRecord)

Example 3 with DefaultHCatRecord

use of org.apache.hive.hcatalog.data.DefaultHCatRecord in project hive by apache.

the class TestHCatPartitioned method tableSchemaTest.

//test that new columns gets added to table schema
private void tableSchemaTest() throws Exception {
    HCatSchema tableSchema = getTableSchema();
    assertEquals(4, tableSchema.getFields().size());
    //Update partition schema to have 3 fields
    partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c3", serdeConstants.STRING_TYPE_NAME, "")));
    writeRecords = new ArrayList<HCatRecord>();
    for (int i = 0; i < 20; i++) {
        List<Object> objList = new ArrayList<Object>();
        objList.add(i);
        objList.add("strvalue" + i);
        objList.add("str2value" + i);
        writeRecords.add(new DefaultHCatRecord(objList));
    }
    Map<String, String> partitionMap = new HashMap<String, String>();
    partitionMap.put("part1", "p1value5");
    partitionMap.put("part0", "505");
    runMRCreate(partitionMap, partitionColumns, writeRecords, 10, true);
    tableSchema = getTableSchema();
    //assert that c3 has got added to table schema
    assertEquals(5, tableSchema.getFields().size());
    assertEquals("c1", tableSchema.getFields().get(0).getName());
    assertEquals("c2", tableSchema.getFields().get(1).getName());
    assertEquals("c3", tableSchema.getFields().get(2).getName());
    assertEquals("part1", tableSchema.getFields().get(3).getName());
    assertEquals("part0", tableSchema.getFields().get(4).getName());
    //Test that changing column data type fails
    partitionMap.clear();
    partitionMap.put("part1", "p1value6");
    partitionMap.put("part0", "506");
    partitionColumns = new ArrayList<HCatFieldSchema>();
    partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, "")));
    partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c2", serdeConstants.INT_TYPE_NAME, "")));
    IOException exc = null;
    try {
        runMRCreate(partitionMap, partitionColumns, writeRecords, 20, true);
    } catch (IOException e) {
        exc = e;
    }
    assertTrue(exc != null);
    assertTrue(exc instanceof HCatException);
    assertEquals(ErrorType.ERROR_SCHEMA_TYPE_MISMATCH, ((HCatException) exc).getErrorType());
    //Test that partition key is not allowed in data
    partitionColumns = new ArrayList<HCatFieldSchema>();
    partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, "")));
    partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, "")));
    partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c3", serdeConstants.STRING_TYPE_NAME, "")));
    partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("part1", serdeConstants.STRING_TYPE_NAME, "")));
    List<HCatRecord> recordsContainingPartitionCols = new ArrayList<HCatRecord>(20);
    for (int i = 0; i < 20; i++) {
        List<Object> objList = new ArrayList<Object>();
        objList.add(i);
        objList.add("c2value" + i);
        objList.add("c3value" + i);
        objList.add("p1value6");
        recordsContainingPartitionCols.add(new DefaultHCatRecord(objList));
    }
    exc = null;
    try {
        runMRCreate(partitionMap, partitionColumns, recordsContainingPartitionCols, 20, true);
    } catch (IOException e) {
        exc = e;
    }
    List<HCatRecord> records = runMRRead(20, "part1 = \"p1value6\"");
    assertEquals(20, records.size());
    records = runMRRead(20, "part0 = \"506\"");
    assertEquals(20, records.size());
    Integer i = 0;
    for (HCatRecord rec : records) {
        assertEquals(5, rec.size());
        assertEquals(rec.get(0), i);
        assertEquals(rec.get(1), "c2value" + i);
        assertEquals(rec.get(2), "c3value" + i);
        assertEquals(rec.get(3), "p1value6");
        assertEquals(rec.get(4), 506);
        i++;
    }
}
Also used : HashMap(java.util.HashMap) HCatFieldSchema(org.apache.hive.hcatalog.data.schema.HCatFieldSchema) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) ArrayList(java.util.ArrayList) HCatException(org.apache.hive.hcatalog.common.HCatException) IOException(java.io.IOException) HCatFieldSchema(org.apache.hive.hcatalog.data.schema.HCatFieldSchema) HCatSchema(org.apache.hive.hcatalog.data.schema.HCatSchema) DefaultHCatRecord(org.apache.hive.hcatalog.data.DefaultHCatRecord) DefaultHCatRecord(org.apache.hive.hcatalog.data.DefaultHCatRecord) HCatRecord(org.apache.hive.hcatalog.data.HCatRecord)

Example 4 with DefaultHCatRecord

use of org.apache.hive.hcatalog.data.DefaultHCatRecord in project hive by apache.

the class TestHCatPartitioned method columnOrderChangeTest.

//check behavior while change the order of columns
private void columnOrderChangeTest() throws Exception {
    HCatSchema tableSchema = getTableSchema();
    assertEquals(5, tableSchema.getFields().size());
    partitionColumns = new ArrayList<HCatFieldSchema>();
    partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, "")));
    partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c3", serdeConstants.STRING_TYPE_NAME, "")));
    partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, "")));
    writeRecords = new ArrayList<HCatRecord>();
    for (int i = 0; i < 10; i++) {
        List<Object> objList = new ArrayList<Object>();
        objList.add(i);
        objList.add("co strvalue" + i);
        objList.add("co str2value" + i);
        writeRecords.add(new DefaultHCatRecord(objList));
    }
    Map<String, String> partitionMap = new HashMap<String, String>();
    partitionMap.put("part1", "p1value8");
    partitionMap.put("part0", "508");
    Exception exc = null;
    try {
        runMRCreate(partitionMap, partitionColumns, writeRecords, 10, true);
    } catch (IOException e) {
        exc = e;
    }
    assertTrue(exc != null);
    assertTrue(exc instanceof HCatException);
    assertEquals(ErrorType.ERROR_SCHEMA_COLUMN_MISMATCH, ((HCatException) exc).getErrorType());
    partitionColumns = new ArrayList<HCatFieldSchema>();
    partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, "")));
    partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, "")));
    writeRecords = new ArrayList<HCatRecord>();
    for (int i = 0; i < 10; i++) {
        List<Object> objList = new ArrayList<Object>();
        objList.add(i);
        objList.add("co strvalue" + i);
        writeRecords.add(new DefaultHCatRecord(objList));
    }
    runMRCreate(partitionMap, partitionColumns, writeRecords, 10, true);
    if (isTableImmutable()) {
        //Read should get 10 + 20 + 10 + 10 + 20 rows
        runMRRead(70);
    } else {
        // +20 from the duplicate publish
        runMRRead(90);
    }
}
Also used : HashMap(java.util.HashMap) HCatFieldSchema(org.apache.hive.hcatalog.data.schema.HCatFieldSchema) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) ArrayList(java.util.ArrayList) HCatException(org.apache.hive.hcatalog.common.HCatException) IOException(java.io.IOException) HCatException(org.apache.hive.hcatalog.common.HCatException) IOException(java.io.IOException) HCatFieldSchema(org.apache.hive.hcatalog.data.schema.HCatFieldSchema) HCatSchema(org.apache.hive.hcatalog.data.schema.HCatSchema) DefaultHCatRecord(org.apache.hive.hcatalog.data.DefaultHCatRecord) DefaultHCatRecord(org.apache.hive.hcatalog.data.DefaultHCatRecord) HCatRecord(org.apache.hive.hcatalog.data.HCatRecord)

Example 5 with DefaultHCatRecord

use of org.apache.hive.hcatalog.data.DefaultHCatRecord in project hive by apache.

the class HCatRecordReader method nextKeyValue.

/**
   * Check if the wrapped RecordReader has another record, and if so convert it into an
   * HCatRecord. We both check for records and convert here so a configurable percent of
   * bad records can be tolerated.
   *
   * @return if there is a next record
   * @throws IOException on error
   * @throws InterruptedException on error
   */
@Override
public boolean nextKeyValue() throws IOException, InterruptedException {
    if (currentKey == null) {
        currentKey = baseRecordReader.createKey();
        currentValue = baseRecordReader.createValue();
    }
    while (baseRecordReader.next(currentKey, currentValue)) {
        HCatRecord r = null;
        Throwable t = null;
        errorTracker.incRecords();
        try {
            Object o = deserializer.deserialize(currentValue);
            r = new LazyHCatRecord(o, deserializer.getObjectInspector());
        } catch (Throwable throwable) {
            t = throwable;
        }
        if (r == null) {
            errorTracker.incErrors(t);
            continue;
        }
        DefaultHCatRecord dr = new DefaultHCatRecord(outputSchema.size());
        int i = 0;
        for (String fieldName : outputSchema.getFieldNames()) {
            if (dataSchema.getPosition(fieldName) != null) {
                dr.set(i, r.get(fieldName, dataSchema));
            } else {
                dr.set(i, valuesNotInDataCols.get(fieldName));
            }
            i++;
        }
        currentHCatRecord = dr;
        return true;
    }
    return false;
}
Also used : DefaultHCatRecord(org.apache.hive.hcatalog.data.DefaultHCatRecord) LazyHCatRecord(org.apache.hive.hcatalog.data.LazyHCatRecord) DefaultHCatRecord(org.apache.hive.hcatalog.data.DefaultHCatRecord) HCatRecord(org.apache.hive.hcatalog.data.HCatRecord) LazyHCatRecord(org.apache.hive.hcatalog.data.LazyHCatRecord)

Aggregations

DefaultHCatRecord (org.apache.hive.hcatalog.data.DefaultHCatRecord)5 ArrayList (java.util.ArrayList)4 HCatRecord (org.apache.hive.hcatalog.data.HCatRecord)4 HCatFieldSchema (org.apache.hive.hcatalog.data.schema.HCatFieldSchema)3 IOException (java.io.IOException)2 HashMap (java.util.HashMap)2 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)2 HCatException (org.apache.hive.hcatalog.common.HCatException)2 HCatSchema (org.apache.hive.hcatalog.data.schema.HCatSchema)2 LazyHCatRecord (org.apache.hive.hcatalog.data.LazyHCatRecord)1 BackendException (org.apache.pig.backend.BackendException)1