Search in sources :

Example 21 with HCatSchema

use of org.apache.hive.hcatalog.data.schema.HCatSchema in project hive by apache.

the class TestHCatInputFormatMethods method testGetPartitionAndDataColumns.

@Test
public void testGetPartitionAndDataColumns() throws Exception {
    Configuration conf = new Configuration();
    Job myJob = new Job(conf, "hcatTest");
    HCatInputFormat.setInput(myJob, "default", "testHCIFMethods");
    HCatSchema cols = HCatInputFormat.getDataColumns(myJob.getConfiguration());
    Assert.assertTrue(cols.getFields() != null);
    Assert.assertEquals(cols.getFields().size(), 2);
    Assert.assertTrue(cols.getFields().get(0).getName().equals("a"));
    Assert.assertTrue(cols.getFields().get(1).getName().equals("b"));
    Assert.assertTrue(cols.getFields().get(0).getType().equals(HCatFieldSchema.Type.STRING));
    Assert.assertTrue(cols.getFields().get(1).getType().equals(HCatFieldSchema.Type.INT));
    HCatSchema pcols = HCatInputFormat.getPartitionColumns(myJob.getConfiguration());
    Assert.assertTrue(pcols.getFields() != null);
    Assert.assertEquals(pcols.getFields().size(), 2);
    Assert.assertTrue(pcols.getFields().get(0).getName().equals("x"));
    Assert.assertTrue(pcols.getFields().get(1).getName().equals("y"));
    Assert.assertTrue(pcols.getFields().get(0).getType().equals(HCatFieldSchema.Type.STRING));
    Assert.assertTrue(pcols.getFields().get(1).getType().equals(HCatFieldSchema.Type.STRING));
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) HCatSchema(org.apache.hive.hcatalog.data.schema.HCatSchema) Job(org.apache.hadoop.mapreduce.Job) Test(org.junit.Test)

Example 22 with HCatSchema

use of org.apache.hive.hcatalog.data.schema.HCatSchema in project hive by apache.

the class TestHCatPartitioned method columnOrderChangeTest.

//check behavior while change the order of columns
private void columnOrderChangeTest() throws Exception {
    HCatSchema tableSchema = getTableSchema();
    assertEquals(5, tableSchema.getFields().size());
    partitionColumns = new ArrayList<HCatFieldSchema>();
    partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, "")));
    partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c3", serdeConstants.STRING_TYPE_NAME, "")));
    partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, "")));
    writeRecords = new ArrayList<HCatRecord>();
    for (int i = 0; i < 10; i++) {
        List<Object> objList = new ArrayList<Object>();
        objList.add(i);
        objList.add("co strvalue" + i);
        objList.add("co str2value" + i);
        writeRecords.add(new DefaultHCatRecord(objList));
    }
    Map<String, String> partitionMap = new HashMap<String, String>();
    partitionMap.put("part1", "p1value8");
    partitionMap.put("part0", "508");
    Exception exc = null;
    try {
        runMRCreate(partitionMap, partitionColumns, writeRecords, 10, true);
    } catch (IOException e) {
        exc = e;
    }
    assertTrue(exc != null);
    assertTrue(exc instanceof HCatException);
    assertEquals(ErrorType.ERROR_SCHEMA_COLUMN_MISMATCH, ((HCatException) exc).getErrorType());
    partitionColumns = new ArrayList<HCatFieldSchema>();
    partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, "")));
    partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, "")));
    writeRecords = new ArrayList<HCatRecord>();
    for (int i = 0; i < 10; i++) {
        List<Object> objList = new ArrayList<Object>();
        objList.add(i);
        objList.add("co strvalue" + i);
        writeRecords.add(new DefaultHCatRecord(objList));
    }
    runMRCreate(partitionMap, partitionColumns, writeRecords, 10, true);
    if (isTableImmutable()) {
        //Read should get 10 + 20 + 10 + 10 + 20 rows
        runMRRead(70);
    } else {
        // +20 from the duplicate publish
        runMRRead(90);
    }
}
Also used : HashMap(java.util.HashMap) HCatFieldSchema(org.apache.hive.hcatalog.data.schema.HCatFieldSchema) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) ArrayList(java.util.ArrayList) HCatException(org.apache.hive.hcatalog.common.HCatException) IOException(java.io.IOException) HCatException(org.apache.hive.hcatalog.common.HCatException) IOException(java.io.IOException) HCatFieldSchema(org.apache.hive.hcatalog.data.schema.HCatFieldSchema) HCatSchema(org.apache.hive.hcatalog.data.schema.HCatSchema) DefaultHCatRecord(org.apache.hive.hcatalog.data.DefaultHCatRecord) DefaultHCatRecord(org.apache.hive.hcatalog.data.DefaultHCatRecord) HCatRecord(org.apache.hive.hcatalog.data.HCatRecord)

Example 23 with HCatSchema

use of org.apache.hive.hcatalog.data.schema.HCatSchema in project hive by apache.

the class TestLazyHCatRecord method testGetWithName.

@Test
public void testGetWithName() throws Exception {
    TypeInfo ti = getTypeInfo();
    HCatRecord r = new LazyHCatRecord(getHCatRecord(), getObjectInspector(ti));
    HCatSchema schema = HCatSchemaUtils.getHCatSchema(ti).get(0).getStructSubSchema();
    Assert.assertEquals(INT_CONST, ((Integer) r.get("an_int", schema)).intValue());
    Assert.assertEquals(LONG_CONST, ((Long) r.get("a_long", schema)).longValue());
    Assert.assertEquals(DOUBLE_CONST, ((Double) r.get("a_double", schema)).doubleValue(), 0);
    Assert.assertEquals(STRING_CONST, (String) r.get("a_string", schema));
}
Also used : HCatSchema(org.apache.hive.hcatalog.data.schema.HCatSchema) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) Test(org.junit.Test)

Example 24 with HCatSchema

use of org.apache.hive.hcatalog.data.schema.HCatSchema in project hive by apache.

the class TestHCatUtil method testGetTableSchemaWithPtnColsApi.

@Test
public void testGetTableSchemaWithPtnColsApi() throws IOException {
    // Check the schema of a table with one field & no partition keys.
    StorageDescriptor sd = new StorageDescriptor(Lists.newArrayList(new FieldSchema("username", serdeConstants.STRING_TYPE_NAME, null)), "location", "org.apache.hadoop.mapred.TextInputFormat", "org.apache.hadoop.mapred.TextOutputFormat", false, -1, new SerDeInfo(), new ArrayList<String>(), new ArrayList<Order>(), new HashMap<String, String>());
    org.apache.hadoop.hive.metastore.api.Table apiTable = new org.apache.hadoop.hive.metastore.api.Table("test_tblname", "test_dbname", "test_owner", 0, 0, 0, sd, new ArrayList<FieldSchema>(), new HashMap<String, String>(), "viewOriginalText", "viewExpandedText", TableType.EXTERNAL_TABLE.name());
    Table table = new Table(apiTable);
    List<HCatFieldSchema> expectedHCatSchema = Lists.newArrayList(new HCatFieldSchema("username", HCatFieldSchema.Type.STRING, null));
    Assert.assertEquals(new HCatSchema(expectedHCatSchema), HCatUtil.getTableSchemaWithPtnCols(table));
    // Add a partition key & ensure its reflected in the schema.
    List<FieldSchema> partitionKeys = Lists.newArrayList(new FieldSchema("dt", serdeConstants.STRING_TYPE_NAME, null));
    table.getTTable().setPartitionKeys(partitionKeys);
    expectedHCatSchema.add(new HCatFieldSchema("dt", HCatFieldSchema.Type.STRING, null));
    Assert.assertEquals(new HCatSchema(expectedHCatSchema), HCatUtil.getTableSchemaWithPtnCols(table));
}
Also used : Order(org.apache.hadoop.hive.metastore.api.Order) Table(org.apache.hadoop.hive.ql.metadata.Table) HCatFieldSchema(org.apache.hive.hcatalog.data.schema.HCatFieldSchema) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) SerDeInfo(org.apache.hadoop.hive.metastore.api.SerDeInfo) StorageDescriptor(org.apache.hadoop.hive.metastore.api.StorageDescriptor) HCatFieldSchema(org.apache.hive.hcatalog.data.schema.HCatFieldSchema) HCatSchema(org.apache.hive.hcatalog.data.schema.HCatSchema) Test(org.junit.Test)

Example 25 with HCatSchema

use of org.apache.hive.hcatalog.data.schema.HCatSchema in project hive by apache.

the class HCatMapReduceTest method runMRCreate.

/**
   * Run a local map reduce job to load data from in memory records to an HCatalog Table
   * @param partitionValues
   * @param partitionColumns
   * @param records data to be written to HCatalog table
   * @param writeCount
   * @param assertWrite
   * @param asSingleMapTask
   * @return
   * @throws Exception
   */
Job runMRCreate(Map<String, String> partitionValues, List<HCatFieldSchema> partitionColumns, List<HCatRecord> records, int writeCount, boolean assertWrite, boolean asSingleMapTask, String customDynamicPathPattern) throws Exception {
    writeRecords = records;
    MapCreate.writeCount = 0;
    Configuration conf = new Configuration();
    Job job = new Job(conf, "hcat mapreduce write test");
    job.setJarByClass(this.getClass());
    job.setMapperClass(HCatMapReduceTest.MapCreate.class);
    // input/output settings
    job.setInputFormatClass(TextInputFormat.class);
    if (asSingleMapTask) {
        // One input path would mean only one map task
        Path path = new Path(fs.getWorkingDirectory(), "mapred/testHCatMapReduceInput");
        createInputFile(path, writeCount);
        TextInputFormat.setInputPaths(job, path);
    } else {
        // Create two input paths so that two map tasks get triggered. There could be other ways
        // to trigger two map tasks.
        Path path = new Path(fs.getWorkingDirectory(), "mapred/testHCatMapReduceInput");
        createInputFile(path, writeCount / 2);
        Path path2 = new Path(fs.getWorkingDirectory(), "mapred/testHCatMapReduceInput2");
        createInputFile(path2, (writeCount - writeCount / 2));
        TextInputFormat.setInputPaths(job, path, path2);
    }
    job.setOutputFormatClass(HCatOutputFormat.class);
    OutputJobInfo outputJobInfo = OutputJobInfo.create(dbName, tableName, partitionValues);
    if (customDynamicPathPattern != null) {
        job.getConfiguration().set(HCatConstants.HCAT_DYNAMIC_CUSTOM_PATTERN, customDynamicPathPattern);
    }
    HCatOutputFormat.setOutput(job, outputJobInfo);
    job.setMapOutputKeyClass(BytesWritable.class);
    job.setMapOutputValueClass(DefaultHCatRecord.class);
    job.setNumReduceTasks(0);
    HCatOutputFormat.setSchema(job, new HCatSchema(partitionColumns));
    boolean success = job.waitForCompletion(true);
    // Ensure counters are set when data has actually been read.
    if (partitionValues != null) {
        assertTrue(job.getCounters().getGroup("FileSystemCounters").findCounter("FILE_BYTES_READ").getValue() > 0);
    }
    if (!HCatUtil.isHadoop23()) {
        // Local mode outputcommitter hook is not invoked in Hadoop 1.x
        if (success) {
            new FileOutputCommitterContainer(job, null).commitJob(job);
        } else {
            new FileOutputCommitterContainer(job, null).abortJob(job, JobStatus.State.FAILED);
        }
    }
    if (assertWrite) {
        // we assert only if we expected to assert with this call.
        Assert.assertEquals(writeCount, MapCreate.writeCount);
    }
    if (isTableExternal()) {
        externalTableLocation = outputJobInfo.getTableInfo().getTableLocation();
    }
    return job;
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) HCatSchema(org.apache.hive.hcatalog.data.schema.HCatSchema) Job(org.apache.hadoop.mapreduce.Job)

Aggregations

HCatSchema (org.apache.hive.hcatalog.data.schema.HCatSchema)44 HCatFieldSchema (org.apache.hive.hcatalog.data.schema.HCatFieldSchema)21 Job (org.apache.hadoop.mapreduce.Job)16 ArrayList (java.util.ArrayList)14 Configuration (org.apache.hadoop.conf.Configuration)12 GenericOptionsParser (org.apache.hadoop.util.GenericOptionsParser)10 HashMap (java.util.HashMap)9 IOException (java.io.IOException)8 HCatException (org.apache.hive.hcatalog.common.HCatException)8 Table (org.apache.hadoop.hive.ql.metadata.Table)6 Test (org.junit.Test)6 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)5 Properties (java.util.Properties)4 Path (org.apache.hadoop.fs.Path)4 ResourceSchema (org.apache.pig.ResourceSchema)4 FrontendException (org.apache.pig.impl.logicalLayer.FrontendException)4 StorageDescriptor (org.apache.hadoop.hive.metastore.api.StorageDescriptor)3 HCatRecord (org.apache.hive.hcatalog.data.HCatRecord)3 PigException (org.apache.pig.PigException)3 Map (java.util.Map)2