Search in sources :

Example 31 with HCatSchema

use of org.apache.hive.hcatalog.data.schema.HCatSchema in project hive by apache.

the class WriteJson method run.

public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    args = new GenericOptionsParser(conf, args).getRemainingArgs();
    String serverUri = args[0];
    String inputTableName = args[1];
    String outputTableName = args[2];
    String dbName = null;
    String principalID = System.getProperty(HCatConstants.HCAT_METASTORE_PRINCIPAL);
    if (principalID != null)
        conf.set(HCatConstants.HCAT_METASTORE_PRINCIPAL, principalID);
    Job job = new Job(conf, "WriteJson");
    HCatInputFormat.setInput(job, dbName, inputTableName);
    // initialize HCatOutputFormat
    job.setInputFormatClass(HCatInputFormat.class);
    job.setJarByClass(WriteJson.class);
    job.setMapperClass(Map.class);
    job.setOutputKeyClass(WritableComparable.class);
    job.setOutputValueClass(DefaultHCatRecord.class);
    job.setNumReduceTasks(0);
    HCatOutputFormat.setOutput(job, OutputJobInfo.create(dbName, outputTableName, null));
    HCatSchema s = HCatInputFormat.getTableSchema(job);
    System.err.println("INFO: output schema explicitly set for writing:" + s);
    HCatOutputFormat.setSchema(job, s);
    job.setOutputFormatClass(HCatOutputFormat.class);
    return (job.waitForCompletion(true) ? 0 : 1);
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) HCatSchema(org.apache.hive.hcatalog.data.schema.HCatSchema) Job(org.apache.hadoop.mapreduce.Job) GenericOptionsParser(org.apache.hadoop.util.GenericOptionsParser)

Example 32 with HCatSchema

use of org.apache.hive.hcatalog.data.schema.HCatSchema in project hive by apache.

the class TestHCatInputFormatMethods method testGetPartitionAndDataColumns.

@Test
public void testGetPartitionAndDataColumns() throws Exception {
    Configuration conf = new Configuration();
    Job myJob = new Job(conf, "hcatTest");
    HCatInputFormat.setInput(myJob, "default", "testHCIFMethods");
    HCatSchema cols = HCatInputFormat.getDataColumns(myJob.getConfiguration());
    Assert.assertTrue(cols.getFields() != null);
    Assert.assertEquals(cols.getFields().size(), 2);
    Assert.assertTrue(cols.getFields().get(0).getName().equals("a"));
    Assert.assertTrue(cols.getFields().get(1).getName().equals("b"));
    Assert.assertTrue(cols.getFields().get(0).getType().equals(HCatFieldSchema.Type.STRING));
    Assert.assertTrue(cols.getFields().get(1).getType().equals(HCatFieldSchema.Type.INT));
    HCatSchema pcols = HCatInputFormat.getPartitionColumns(myJob.getConfiguration());
    Assert.assertTrue(pcols.getFields() != null);
    Assert.assertEquals(pcols.getFields().size(), 2);
    Assert.assertTrue(pcols.getFields().get(0).getName().equals("x"));
    Assert.assertTrue(pcols.getFields().get(1).getName().equals("y"));
    Assert.assertTrue(pcols.getFields().get(0).getType().equals(HCatFieldSchema.Type.STRING));
    Assert.assertTrue(pcols.getFields().get(1).getType().equals(HCatFieldSchema.Type.STRING));
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) HCatSchema(org.apache.hive.hcatalog.data.schema.HCatSchema) Job(org.apache.hadoop.mapreduce.Job) Test(org.junit.Test)

Example 33 with HCatSchema

use of org.apache.hive.hcatalog.data.schema.HCatSchema in project hive by apache.

the class TestLazyHCatRecord method testGetWithName.

@Test
public void testGetWithName() throws Exception {
    TypeInfo ti = getTypeInfo();
    HCatRecord r = new LazyHCatRecord(getHCatRecord(), getObjectInspector(ti));
    HCatSchema schema = HCatSchemaUtils.getHCatSchema(ti).get(0).getStructSubSchema();
    Assert.assertEquals(INT_CONST, ((Integer) r.get("an_int", schema)).intValue());
    Assert.assertEquals(LONG_CONST, ((Long) r.get("a_long", schema)).longValue());
    Assert.assertEquals(DOUBLE_CONST, ((Double) r.get("a_double", schema)).doubleValue(), 0);
    Assert.assertEquals(STRING_CONST, (String) r.get("a_string", schema));
}
Also used : HCatSchema(org.apache.hive.hcatalog.data.schema.HCatSchema) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) Test(org.junit.Test)

Example 34 with HCatSchema

use of org.apache.hive.hcatalog.data.schema.HCatSchema in project flink by apache.

the class HCatInputFormatBase method getFields.

/**
	 * Specifies the fields which are returned by the InputFormat and their order.
	 *
	 * @param fields The fields and their order which are returned by the InputFormat.
	 * @return This InputFormat with specified return fields.
	 * @throws java.io.IOException
	 */
public HCatInputFormatBase<T> getFields(String... fields) throws IOException {
    // build output schema
    ArrayList<HCatFieldSchema> fieldSchemas = new ArrayList<HCatFieldSchema>(fields.length);
    for (String field : fields) {
        fieldSchemas.add(this.outputSchema.get(field));
    }
    this.outputSchema = new HCatSchema(fieldSchemas);
    // update output schema configuration
    configuration.set("mapreduce.lib.hcat.output.schema", HCatUtil.serialize(outputSchema));
    return this;
}
Also used : HCatSchema(org.apache.hive.hcatalog.data.schema.HCatSchema) ArrayList(java.util.ArrayList) HCatFieldSchema(org.apache.hive.hcatalog.data.schema.HCatFieldSchema)

Example 35 with HCatSchema

use of org.apache.hive.hcatalog.data.schema.HCatSchema in project hive by apache.

the class FosterStorageHandler method configureInputJobProperties.

@Override
public void configureInputJobProperties(TableDesc tableDesc, Map<String, String> jobProperties) {
    try {
        Map<String, String> tableProperties = tableDesc.getJobProperties();
        String jobInfoProperty = tableProperties.get(HCatConstants.HCAT_KEY_JOB_INFO);
        if (jobInfoProperty != null) {
            InputJobInfo inputJobInfo = (InputJobInfo) HCatUtil.deserialize(jobInfoProperty);
            HCatTableInfo tableInfo = inputJobInfo.getTableInfo();
            HCatSchema dataColumns = tableInfo.getDataColumns();
            List<HCatFieldSchema> dataFields = dataColumns.getFields();
            StringBuilder columnNamesSb = new StringBuilder();
            StringBuilder typeNamesSb = new StringBuilder();
            for (HCatFieldSchema dataField : dataFields) {
                if (columnNamesSb.length() > 0) {
                    columnNamesSb.append(",");
                    typeNamesSb.append(":");
                }
                columnNamesSb.append(dataField.getName());
                typeNamesSb.append(dataField.getTypeString());
            }
            jobProperties.put(IOConstants.SCHEMA_EVOLUTION_COLUMNS, columnNamesSb.toString());
            jobProperties.put(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES, typeNamesSb.toString());
            boolean isTransactionalTable = AcidUtils.isTablePropertyTransactional(tableProperties);
            AcidUtils.AcidOperationalProperties acidOperationalProperties = AcidUtils.getAcidOperationalProperties(tableProperties);
            AcidUtils.setAcidOperationalProperties(jobProperties, isTransactionalTable, acidOperationalProperties);
        }
    } catch (IOException e) {
        throw new IllegalStateException("Failed to set output path", e);
    }
}
Also used : IOException(java.io.IOException) HCatFieldSchema(org.apache.hive.hcatalog.data.schema.HCatFieldSchema) HCatSchema(org.apache.hive.hcatalog.data.schema.HCatSchema) AcidUtils(org.apache.hadoop.hive.ql.io.AcidUtils)

Aggregations

HCatSchema (org.apache.hive.hcatalog.data.schema.HCatSchema)45 HCatFieldSchema (org.apache.hive.hcatalog.data.schema.HCatFieldSchema)21 Job (org.apache.hadoop.mapreduce.Job)17 ArrayList (java.util.ArrayList)14 Configuration (org.apache.hadoop.conf.Configuration)13 HashMap (java.util.HashMap)10 GenericOptionsParser (org.apache.hadoop.util.GenericOptionsParser)10 IOException (java.io.IOException)8 HCatException (org.apache.hive.hcatalog.common.HCatException)8 Table (org.apache.hadoop.hive.ql.metadata.Table)6 Test (org.junit.Test)6 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)5 Properties (java.util.Properties)4 Path (org.apache.hadoop.fs.Path)4 ResourceSchema (org.apache.pig.ResourceSchema)4 FrontendException (org.apache.pig.impl.logicalLayer.FrontendException)4 StorageDescriptor (org.apache.hadoop.hive.metastore.api.StorageDescriptor)3 HCatRecord (org.apache.hive.hcatalog.data.HCatRecord)3 PigException (org.apache.pig.PigException)3 Map (java.util.Map)2