Search in sources :

Example 1 with TargetTableRef

use of org.apache.phoenix.mapreduce.bulkload.TargetTableRef in project phoenix by apache.

the class AbstractBulkLoadTool method completebulkload.

private void completebulkload(Configuration conf, Path outputPath, List<TargetTableRef> tablesToBeLoaded) throws Exception {
    Set<String> tableNames = new HashSet<>(tablesToBeLoaded.size());
    for (TargetTableRef table : tablesToBeLoaded) {
        if (tableNames.contains(table.getPhysicalName())) {
            continue;
        }
        tableNames.add(table.getPhysicalName());
        LoadIncrementalHFiles loader = new LoadIncrementalHFiles(conf);
        String tableName = table.getPhysicalName();
        Path tableOutputPath = CsvBulkImportUtil.getOutputPath(outputPath, tableName);
        HTable htable = new HTable(conf, tableName);
        LOG.info("Loading HFiles for {} from {}", tableName, tableOutputPath);
        loader.doBulkLoad(tableOutputPath, htable);
        LOG.info("Incremental load complete for table=" + tableName);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) TargetTableRef(org.apache.phoenix.mapreduce.bulkload.TargetTableRef) LoadIncrementalHFiles(org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles) HTable(org.apache.hadoop.hbase.client.HTable) HashSet(java.util.HashSet)

Example 2 with TargetTableRef

use of org.apache.phoenix.mapreduce.bulkload.TargetTableRef in project phoenix by apache.

the class MultiHfileOutputFormat method configureIncrementalLoad.

/**
 * Configures the job for MultiHfileOutputFormat.
 * @param job
 * @param tablesToBeLoaded
 * @throws IOException
 */
public static void configureIncrementalLoad(Job job, List<TargetTableRef> tablesToBeLoaded) throws IOException {
    Configuration conf = job.getConfiguration();
    job.setOutputFormatClass(MultiHfileOutputFormat.class);
    conf.setStrings("io.serializations", conf.get("io.serializations"), MutationSerialization.class.getName(), ResultSerialization.class.getName(), KeyValueSerialization.class.getName());
    // tableStartKeys for all tables.
    Set<TableRowkeyPair> tablesStartKeys = Sets.newTreeSet();
    for (TargetTableRef table : tablesToBeLoaded) {
        final String tableName = table.getPhysicalName();
        try (HTable htable = new HTable(conf, tableName)) {
            Set<TableRowkeyPair> startKeys = getRegionStartKeys(tableName, htable.getRegionLocator());
            tablesStartKeys.addAll(startKeys);
            String compressionConfig = configureCompression(htable.getTableDescriptor());
            String bloomTypeConfig = configureBloomType(htable.getTableDescriptor());
            String blockSizeConfig = configureBlockSize(htable.getTableDescriptor());
            String blockEncodingConfig = configureDataBlockEncoding(htable.getTableDescriptor());
            Map<String, String> tableConfigs = Maps.newHashMap();
            if (StringUtils.isNotBlank(compressionConfig)) {
                tableConfigs.put(COMPRESSION_FAMILIES_CONF_KEY, compressionConfig);
            }
            if (StringUtils.isNotBlank(bloomTypeConfig)) {
                tableConfigs.put(BLOOM_TYPE_FAMILIES_CONF_KEY, bloomTypeConfig);
            }
            if (StringUtils.isNotBlank(blockSizeConfig)) {
                tableConfigs.put(BLOCK_SIZE_FAMILIES_CONF_KEY, blockSizeConfig);
            }
            if (StringUtils.isNotBlank(blockEncodingConfig)) {
                tableConfigs.put(DATABLOCK_ENCODING_FAMILIES_CONF_KEY, blockEncodingConfig);
            }
            table.setConfiguration(tableConfigs);
            final String tableDefns = TargetTableRefFunctions.TO_JSON.apply(table);
            // set the table definition in the config to be used during the RecordWriter..
            conf.set(tableName, tableDefns);
            TargetTableRef tbl = TargetTableRefFunctions.FROM_JSON.apply(tableDefns);
            LOG.info(" the table logical name is " + tbl.getLogicalName());
        }
    }
    LOG.info("Configuring " + tablesStartKeys.size() + " reduce partitions to match current region count");
    job.setNumReduceTasks(tablesStartKeys.size());
    configurePartitioner(job, tablesStartKeys);
    TableMapReduceUtil.addDependencyJars(job);
    TableMapReduceUtil.initCredentials(job);
}
Also used : TableRowkeyPair(org.apache.phoenix.mapreduce.bulkload.TableRowkeyPair) Configuration(org.apache.hadoop.conf.Configuration) TargetTableRef(org.apache.phoenix.mapreduce.bulkload.TargetTableRef) MutationSerialization(org.apache.hadoop.hbase.mapreduce.MutationSerialization) ResultSerialization(org.apache.hadoop.hbase.mapreduce.ResultSerialization) KeyValueSerialization(org.apache.hadoop.hbase.mapreduce.KeyValueSerialization) HTable(org.apache.hadoop.hbase.client.HTable)

Example 3 with TargetTableRef

use of org.apache.phoenix.mapreduce.bulkload.TargetTableRef in project phoenix by apache.

the class AbstractBulkLoadTool method loadData.

private int loadData(Configuration conf, CommandLine cmdLine) throws Exception {
    String tableName = cmdLine.getOptionValue(TABLE_NAME_OPT.getOpt());
    String schemaName = cmdLine.getOptionValue(SCHEMA_NAME_OPT.getOpt());
    String indexTableName = cmdLine.getOptionValue(INDEX_TABLE_NAME_OPT.getOpt());
    String qualifiedTableName = SchemaUtil.getQualifiedTableName(schemaName, tableName);
    String qualifiedIndexTableName = null;
    if (indexTableName != null) {
        qualifiedIndexTableName = SchemaUtil.getQualifiedTableName(schemaName, indexTableName);
    }
    if (cmdLine.hasOption(ZK_QUORUM_OPT.getOpt())) {
        // ZK_QUORUM_OPT is optional, but if it's there, use it for both the conn and the job.
        String zkQuorum = cmdLine.getOptionValue(ZK_QUORUM_OPT.getOpt());
        PhoenixDriver.ConnectionInfo info = PhoenixDriver.ConnectionInfo.create(zkQuorum);
        LOG.info("Configuring HBase connection to {}", info);
        for (Map.Entry<String, String> entry : info.asProps()) {
            if (LOG.isDebugEnabled()) {
                LOG.debug("Setting {} = {}", entry.getKey(), entry.getValue());
            }
            conf.set(entry.getKey(), entry.getValue());
        }
    }
    final Connection conn = QueryUtil.getConnection(conf);
    if (LOG.isDebugEnabled()) {
        LOG.debug("Reading columns from {} :: {}", ((PhoenixConnection) conn).getURL(), qualifiedTableName);
    }
    List<ColumnInfo> importColumns = buildImportColumns(conn, cmdLine, qualifiedTableName);
    Preconditions.checkNotNull(importColumns);
    Preconditions.checkArgument(!importColumns.isEmpty(), "Column info list is empty");
    FormatToBytesWritableMapper.configureColumnInfoList(conf, importColumns);
    boolean ignoreInvalidRows = cmdLine.hasOption(IGNORE_ERRORS_OPT.getOpt());
    conf.setBoolean(FormatToBytesWritableMapper.IGNORE_INVALID_ROW_CONFKEY, ignoreInvalidRows);
    conf.set(FormatToBytesWritableMapper.TABLE_NAME_CONFKEY, qualifiedTableName);
    // give subclasses their hook
    configureOptions(cmdLine, importColumns, conf);
    try {
        validateTable(conn, schemaName, tableName);
    } finally {
        conn.close();
    }
    final String inputPaths = cmdLine.getOptionValue(INPUT_PATH_OPT.getOpt());
    final Path outputPath;
    if (cmdLine.hasOption(OUTPUT_PATH_OPT.getOpt())) {
        outputPath = new Path(cmdLine.getOptionValue(OUTPUT_PATH_OPT.getOpt()));
    } else {
        outputPath = new Path("/tmp/" + UUID.randomUUID());
    }
    List<TargetTableRef> tablesToBeLoaded = new ArrayList<TargetTableRef>();
    PTable table = PhoenixRuntime.getTable(conn, qualifiedTableName);
    tablesToBeLoaded.add(new TargetTableRef(qualifiedTableName, table.getPhysicalName().getString()));
    boolean hasLocalIndexes = false;
    for (PTable index : table.getIndexes()) {
        if (index.getIndexType() == IndexType.LOCAL) {
            hasLocalIndexes = qualifiedIndexTableName == null ? true : index.getTableName().getString().equals(qualifiedIndexTableName);
            if (hasLocalIndexes)
                break;
        }
    }
    // using conn after it's been closed... o.O
    tablesToBeLoaded.addAll(getIndexTables(conn, schemaName, qualifiedTableName));
    // When loading a single index table, check index table name is correct
    if (qualifiedIndexTableName != null) {
        TargetTableRef targetIndexRef = null;
        for (TargetTableRef tmpTable : tablesToBeLoaded) {
            if (tmpTable.getLogicalName().compareToIgnoreCase(qualifiedIndexTableName) == 0) {
                targetIndexRef = tmpTable;
                break;
            }
        }
        if (targetIndexRef == null) {
            throw new IllegalStateException("Bulk Loader error: index table " + qualifiedIndexTableName + " doesn't exist");
        }
        tablesToBeLoaded.clear();
        tablesToBeLoaded.add(targetIndexRef);
    }
    return submitJob(conf, tableName, inputPaths, outputPath, tablesToBeLoaded, hasLocalIndexes);
}
Also used : Path(org.apache.hadoop.fs.Path) PhoenixDriver(org.apache.phoenix.jdbc.PhoenixDriver) Connection(java.sql.Connection) PhoenixConnection(org.apache.phoenix.jdbc.PhoenixConnection) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.phoenix.util.ColumnInfo) PTable(org.apache.phoenix.schema.PTable) TargetTableRef(org.apache.phoenix.mapreduce.bulkload.TargetTableRef) Map(java.util.Map)

Example 4 with TargetTableRef

use of org.apache.phoenix.mapreduce.bulkload.TargetTableRef in project phoenix by apache.

the class MultiHfileOutputFormat method getTableConfigurations.

/**
 * Returns the set of configurations that have been configured for the table during job initialization.
 * @param conf
 * @param tableName
 * @return
 */
private static Map<String, String> getTableConfigurations(Configuration conf, final String tableName) {
    String tableDefn = conf.get(tableName);
    if (StringUtils.isEmpty(tableDefn)) {
        return null;
    }
    TargetTableRef table = TargetTableRefFunctions.FROM_JSON.apply(tableDefn);
    Map<String, String> tableConfigs = table.getConfiguration();
    return tableConfigs;
}
Also used : TargetTableRef(org.apache.phoenix.mapreduce.bulkload.TargetTableRef)

Example 5 with TargetTableRef

use of org.apache.phoenix.mapreduce.bulkload.TargetTableRef in project phoenix by apache.

the class AbstractBulkLoadTool method getIndexTables.

/**
 * Get the index tables of current data table
 * @throws java.sql.SQLException
 */
private List<TargetTableRef> getIndexTables(Connection conn, String schemaName, String qualifiedTableName) throws SQLException {
    PTable table = PhoenixRuntime.getTable(conn, qualifiedTableName);
    List<TargetTableRef> indexTables = new ArrayList<TargetTableRef>();
    for (PTable indexTable : table.getIndexes()) {
        indexTables.add(new TargetTableRef(indexTable.getName().getString(), indexTable.getPhysicalName().getString()));
    }
    return indexTables;
}
Also used : TargetTableRef(org.apache.phoenix.mapreduce.bulkload.TargetTableRef) ArrayList(java.util.ArrayList) PTable(org.apache.phoenix.schema.PTable)

Aggregations

TargetTableRef (org.apache.phoenix.mapreduce.bulkload.TargetTableRef)5 ArrayList (java.util.ArrayList)2 Path (org.apache.hadoop.fs.Path)2 HTable (org.apache.hadoop.hbase.client.HTable)2 PTable (org.apache.phoenix.schema.PTable)2 Connection (java.sql.Connection)1 HashSet (java.util.HashSet)1 Map (java.util.Map)1 Configuration (org.apache.hadoop.conf.Configuration)1 KeyValueSerialization (org.apache.hadoop.hbase.mapreduce.KeyValueSerialization)1 LoadIncrementalHFiles (org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles)1 MutationSerialization (org.apache.hadoop.hbase.mapreduce.MutationSerialization)1 ResultSerialization (org.apache.hadoop.hbase.mapreduce.ResultSerialization)1 PhoenixConnection (org.apache.phoenix.jdbc.PhoenixConnection)1 PhoenixDriver (org.apache.phoenix.jdbc.PhoenixDriver)1 TableRowkeyPair (org.apache.phoenix.mapreduce.bulkload.TableRowkeyPair)1 ColumnInfo (org.apache.phoenix.util.ColumnInfo)1