Search in sources :

Example 81 with Location

use of org.apache.twill.filesystem.Location in project cdap by caskdata.

the class StreamUtils method getNextSequenceId.

/**
   * Finds the next sequence id for the given partition with the given file prefix.
   *
   * @param partitionLocation the directory where the stream partition is
   * @param filePrefix prefix of file name to match
   * @return the next sequence id, which is the current max id + 1.
   * @throws IOException if failed to find the next sequence id
   */
public static int getNextSequenceId(Location partitionLocation, String filePrefix) throws IOException {
    // Try to find the file of this bucket with the highest sequence number.
    int maxSequence = -1;
    for (Location location : partitionLocation.list()) {
        String fileName = location.getName();
        if (!fileName.startsWith(filePrefix)) {
            continue;
        }
        StreamUtils.getSequenceId(fileName);
        int idx = fileName.lastIndexOf('.');
        if (idx < filePrefix.length()) {
            // Ignore file with invalid stream file name
            continue;
        }
        try {
            // File name format is [prefix].[sequenceId].[dat|idx]
            int seq = StreamUtils.getSequenceId(fileName);
            if (seq > maxSequence) {
                maxSequence = seq;
            }
        } catch (NumberFormatException e) {
        // Ignore stream file with invalid sequence id
        }
    }
    return maxSequence + 1;
}
Also used : Location(org.apache.twill.filesystem.Location)

Example 82 with Location

use of org.apache.twill.filesystem.Location in project cdap by caskdata.

the class StreamUtils method fetchStreamFilesSize.

/**
   * Get the size of the data persisted for the stream under the given stream location.
   *
   * @param streamLocation stream to get data size of
   * @return the size of the data persisted for the stream which config is the {@code streamName}
   * @throws IOException in case of any error in fetching the size
   */
public static long fetchStreamFilesSize(Location streamLocation) throws IOException {
    Processor<LocationStatus, Long> processor = new Processor<LocationStatus, Long>() {

        private long size = 0;

        @Override
        public boolean process(LocationStatus input) {
            if (!input.isDir() && StreamFileType.EVENT.isMatched(input.getUri().getPath())) {
                size += input.getLength();
            }
            return true;
        }

        @Override
        public Long getResult() {
            return size;
        }
    };
    List<Location> locations = streamLocation.list();
    // All directories are partition directories
    for (Location location : locations) {
        if (!location.isDirectory() || !isPartition(location.getName())) {
            continue;
        }
        Locations.processLocations(location, false, processor);
    }
    return processor.getResult();
}
Also used : Processor(co.cask.cdap.common.io.Processor) LocationStatus(co.cask.cdap.common.io.LocationStatus) Location(org.apache.twill.filesystem.Location)

Example 83 with Location

use of org.apache.twill.filesystem.Location in project cdap by caskdata.

the class AbstractHBaseDataSetAdmin method updateTable.

/**
   * Performs update on a given HBase table. It will be updated if either its spec has
   * changed since the HBase table was created or updated, or if the CDAP version recorded
   * in the HTable descriptor is less than the current CDAP version.
   *
   * @param force forces update regardless of whether the table needs it.
   * @throws IOException If update failed.
   */
public void updateTable(boolean force) throws IOException {
    try (HBaseDDLExecutor ddlExecutor = ddlExecutorFactory.get()) {
        HTableDescriptor tableDescriptor;
        try (HBaseAdmin admin = new HBaseAdmin(hConf)) {
            tableDescriptor = tableUtil.getHTableDescriptor(admin, tableId);
        }
        // update any table properties if necessary
        boolean needUpdate = needsUpdate(tableDescriptor) || force;
        // Get the cdap version from the table
        ProjectInfo.Version version = HBaseTableUtil.getVersion(tableDescriptor);
        if (!needUpdate && version.compareTo(ProjectInfo.getVersion()) >= 0) {
            // If neither the table spec nor the cdap version have changed, no need to update
            LOG.info("Table '{}' has not changed and its version '{}' is same or greater " + "than current CDAP version '{}'", tableId, version, ProjectInfo.getVersion());
            return;
        }
        // create a new descriptor for the table update
        HTableDescriptorBuilder newDescriptor = tableUtil.buildHTableDescriptor(tableDescriptor);
        // Generate the coprocessor jar
        CoprocessorJar coprocessorJar = createCoprocessorJar();
        Location jarLocation = coprocessorJar.getJarLocation();
        // Check if coprocessor upgrade is needed
        Map<String, HBaseTableUtil.CoprocessorInfo> coprocessorInfo = HBaseTableUtil.getCoprocessorInfo(tableDescriptor);
        // For all required coprocessors, check if they've need to be upgraded.
        for (Class<? extends Coprocessor> coprocessor : coprocessorJar.getCoprocessors()) {
            HBaseTableUtil.CoprocessorInfo info = coprocessorInfo.get(coprocessor.getName());
            if (info != null) {
                // The same coprocessor has been configured, check by the file name to see if they are the same.
                if (!jarLocation.getName().equals(info.getPath().getName())) {
                    // Remove old one and add the new one.
                    newDescriptor.removeCoprocessor(info.getClassName());
                    addCoprocessor(newDescriptor, coprocessor, coprocessorJar.getPriority(coprocessor));
                }
            } else {
                // The coprocessor is missing from the table, add it.
                addCoprocessor(newDescriptor, coprocessor, coprocessorJar.getPriority(coprocessor));
            }
        }
        // Removes all old coprocessors
        Set<String> coprocessorNames = ImmutableSet.copyOf(Iterables.transform(coprocessorJar.coprocessors, CLASS_TO_NAME));
        for (String remove : Sets.difference(coprocessorInfo.keySet(), coprocessorNames)) {
            newDescriptor.removeCoprocessor(remove);
        }
        HBaseTableUtil.setVersion(newDescriptor);
        HBaseTableUtil.setTablePrefix(newDescriptor, cConf);
        LOG.info("Updating table '{}'...", tableId);
        TableName tableName = HTableNameConverter.toTableName(cConf.get(Constants.Dataset.TABLE_PREFIX), tableId);
        boolean enableTable = false;
        try {
            ddlExecutor.disableTableIfEnabled(tableName.getNamespaceAsString(), tableName.getQualifierAsString());
            enableTable = true;
        } catch (TableNotEnabledException e) {
            // If the table is in cdap_system namespace enable it regardless so that they can be used later. See CDAP-7324
            if (isSystemTable()) {
                enableTable = true;
            } else {
                LOG.debug("Table '{}' was not enabled before update and will not be enabled after update.", tableId);
            }
        }
        tableUtil.modifyTable(ddlExecutor, newDescriptor.build());
        if (enableTable) {
            LOG.debug("Enabling table '{}'...", tableId);
            ddlExecutor.enableTableIfDisabled(tableName.getNamespaceAsString(), tableName.getQualifierAsString());
        }
    }
    LOG.info("Table '{}' update completed.", tableId);
}
Also used : HBaseDDLExecutor(co.cask.cdap.spi.hbase.HBaseDDLExecutor) HTableDescriptorBuilder(co.cask.cdap.data2.util.hbase.HTableDescriptorBuilder) HBaseTableUtil(co.cask.cdap.data2.util.hbase.HBaseTableUtil) HTableDescriptor(org.apache.hadoop.hbase.HTableDescriptor) HBaseAdmin(org.apache.hadoop.hbase.client.HBaseAdmin) TableName(org.apache.hadoop.hbase.TableName) ProjectInfo(co.cask.cdap.common.utils.ProjectInfo) Location(org.apache.twill.filesystem.Location) TableNotEnabledException(org.apache.hadoop.hbase.TableNotEnabledException)

Example 84 with Location

use of org.apache.twill.filesystem.Location in project cdap by caskdata.

the class PartitionedFileSetDataset method postTxCommit.

@Override
public void postTxCommit() {
    // simply delete the quarantine directory for this transaction
    try {
        Location quarantine = getQuarantineLocation();
        if (quarantine.exists()) {
            boolean deleteSuccess = quarantine.delete(true);
            if (!deleteSuccess) {
                throw new DataSetException(String.format("Error deleting quarantine location %s.", quarantine));
            }
        }
    } catch (IOException e) {
        throw new DataSetException(String.format("Error deleting quarantine location for tx %s.", tx.getWritePointer()), e);
    }
    this.tx = null;
    super.postTxCommit();
}
Also used : DataSetException(co.cask.cdap.api.dataset.DataSetException) IOException(java.io.IOException) Location(org.apache.twill.filesystem.Location)

Example 85 with Location

use of org.apache.twill.filesystem.Location in project cdap by caskdata.

the class PartitionedFileSetDataset method dropPartition.

@WriteOnly
@Override
public void dropPartition(PartitionKey key) {
    byte[] rowKey = generateRowKey(key, partitioning);
    PartitionDetail partition = getPartition(key);
    if (partition == null) {
        // silently ignore non-existing partitions
        return;
    }
    // TODO: make DDL operations transactional [CDAP-1393]
    dropPartitionFromExplore(key);
    partitionsTable.delete(rowKey);
    if (!isExternal) {
        Location partitionLocation = partition.getLocation();
        try {
            if (partitionLocation.exists()) {
                Location dstLocation = getQuarantineLocation().append(partition.getRelativePath());
                Location dstParent = Locations.getParent(dstLocation);
                // shouldn't be null, since dstLocation was created by appending to a location, so it must have a parent
                Preconditions.checkNotNull(dstParent);
                // before moving into quarantine, we need to ensure that parent location exists
                if (!dstParent.exists()) {
                    if (!dstParent.mkdirs()) {
                        throw new DataSetException(String.format("Failed to create parent directory %s", dstParent));
                    }
                }
                partitionLocation.renameTo(dstLocation);
            }
        } catch (IOException ioe) {
            throw new DataSetException(String.format("Failed to move location %s into quarantine", partitionLocation));
        }
        operationsInThisTx.add(new DropPartitionOperation(key, partition.getRelativePath()));
    }
}
Also used : DataSetException(co.cask.cdap.api.dataset.DataSetException) IOException(java.io.IOException) PartitionDetail(co.cask.cdap.api.dataset.lib.PartitionDetail) Location(org.apache.twill.filesystem.Location) WriteOnly(co.cask.cdap.api.annotation.WriteOnly)

Aggregations

Location (org.apache.twill.filesystem.Location)246 Test (org.junit.Test)104 IOException (java.io.IOException)57 File (java.io.File)39 LocalLocationFactory (org.apache.twill.filesystem.LocalLocationFactory)29 LocationFactory (org.apache.twill.filesystem.LocationFactory)29 FileSet (co.cask.cdap.api.dataset.lib.FileSet)28 StreamEvent (co.cask.cdap.api.flow.flowlet.StreamEvent)27 PartitionedFileSet (co.cask.cdap.api.dataset.lib.PartitionedFileSet)23 CConfiguration (co.cask.cdap.common.conf.CConfiguration)19 NamespaceId (co.cask.cdap.proto.id.NamespaceId)19 Manifest (java.util.jar.Manifest)18 HashMap (java.util.HashMap)17 StreamId (co.cask.cdap.proto.id.StreamId)16 OutputStream (java.io.OutputStream)15 DatasetFramework (co.cask.cdap.data2.dataset2.DatasetFramework)13 TimePartitionedFileSet (co.cask.cdap.api.dataset.lib.TimePartitionedFileSet)11 StreamConfig (co.cask.cdap.data2.transaction.stream.StreamConfig)10 ArrayList (java.util.ArrayList)9 StreamAdmin (co.cask.cdap.data2.transaction.stream.StreamAdmin)8