Search in sources :

Example 66 with StructuredRow

use of io.cdap.cdap.spi.data.StructuredRow in project cdap by cdapio.

the class AppMetadataStore method scanApplications.

/**
 * Scans applications. Allows to optionally set namespace / filters and implement pagination. For pagination
 * set {@link ScanApplicationsRequest#getScanFrom()} to the last application id of the previous page.
 *
 * @param request parameters defining filters and sorting
 * @param func a {@link Function} to consume application metadata entries generated by the scan. The boolean
 *             value returned is {@code true}, the scan will continue; otherwise the scan will stop and return.
 *             Note that the parameter is a {@link Map.Entry} to allow lazy deserialization of
 *             {@link ApplicationMeta} and it should not be replaced with {@link BiFunction}.
 * @see ScanApplicationsRequest#builder(ScanApplicationsRequest) to create a next page / batch request
 * @throws IOException if failed to scan the storage
 */
public void scanApplications(ScanApplicationsRequest request, Function<Map.Entry<ApplicationId, ApplicationMeta>, Boolean> func) throws IOException {
    Range.Bound startBound = Range.Bound.INCLUSIVE;
    Collection<Field<?>> startFields = request.getNamespaceId() == null ? Collections.emptyList() : Collections.singletonList(Fields.stringField(StoreDefinition.AppMetadataStore.NAMESPACE_FIELD, request.getNamespaceId().getNamespace()));
    Range.Bound endBound = Range.Bound.INCLUSIVE;
    Collection<Field<?>> endFields = startFields;
    if (request.getScanFrom() != null) {
        if (request.getNamespaceId() != null && !request.getNamespaceId().equals(request.getScanFrom().getNamespaceId())) {
            throw new IllegalArgumentException("Requested to start scan from application " + request.getScanFrom() + " that is outside of scan namespace " + request.getNamespaceId());
        }
        startBound = Range.Bound.EXCLUSIVE;
        startFields = getApplicationPrimaryKeys(request.getScanFrom());
    }
    if (request.getScanTo() != null) {
        if (request.getNamespaceId() != null && !request.getNamespaceId().equals(request.getScanTo().getNamespaceId())) {
            throw new IllegalArgumentException("Requested to finish scan at application " + request.getScanTo() + " that is outside of scan namespace " + request.getNamespaceId());
        }
        endBound = Range.Bound.EXCLUSIVE;
        endFields = getApplicationPrimaryKeys(request.getScanTo());
    }
    Range range;
    if (request.getSortOrder() == SortOrder.ASC) {
        range = Range.create(startFields, startBound, endFields, endBound);
    } else {
        range = Range.create(endFields, endBound, startFields, startBound);
    }
    // As of now this is where we push filter to. it does not go to the StructuredTable,
    // but we don't deserialize ApplicationMeta unless needed
    Predicate<AppScanEntry> scanEntryPredicate = e -> true;
    for (ApplicationFilter filter : request.getFilters()) {
        if (filter instanceof ApplicationFilter.ApplicationIdFilter) {
            scanEntryPredicate = scanEntryPredicate.and(e -> ((ApplicationFilter.ApplicationIdFilter) filter).test(e.getKey()));
        } else if (filter instanceof ApplicationFilter.ArtifactIdFilter) {
            scanEntryPredicate = scanEntryPredicate.and(e -> ((ApplicationFilter.ArtifactIdFilter) filter).test(e.getValue().getSpec().getArtifactId()));
        } else {
            throw new UnsupportedOperationException("Application filter " + filter + " is not supported");
        }
    }
    StructuredTable table = getApplicationSpecificationTable();
    int limit = request.getLimit();
    try (CloseableIterator<StructuredRow> iterator = table.scan(range, Integer.MAX_VALUE, request.getSortOrder())) {
        boolean keepScanning = true;
        while (iterator.hasNext() && keepScanning && limit > 0) {
            StructuredRow row = iterator.next();
            AppScanEntry scanEntry = new AppScanEntry(row);
            if (scanEntryPredicate.test(scanEntry)) {
                keepScanning = func.apply(scanEntry);
                limit--;
            }
        }
    }
}
Also used : Arrays(java.util.Arrays) NamespaceId(io.cdap.cdap.proto.id.NamespaceId) StructuredRow(io.cdap.cdap.spi.data.StructuredRow) BiFunction(java.util.function.BiFunction) LoggerFactory(org.slf4j.LoggerFactory) Bytes(io.cdap.cdap.api.common.Bytes) Fields(io.cdap.cdap.spi.data.table.field.Fields) GsonBuilder(com.google.gson.GsonBuilder) ProgramRunCluster(io.cdap.cdap.proto.ProgramRunCluster) ScanApplicationsRequest(io.cdap.cdap.app.store.ScanApplicationsRequest) DatasetId(io.cdap.cdap.proto.id.DatasetId) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Gson(com.google.gson.Gson) Map(java.util.Map) Field(io.cdap.cdap.spi.data.table.field.Field) RunId(org.apache.twill.api.RunId) BasicWorkflowToken(io.cdap.cdap.internal.app.runtime.workflow.BasicWorkflowToken) SortOrder(io.cdap.cdap.spi.data.SortOrder) BasicThrowable(io.cdap.cdap.proto.BasicThrowable) StoreDefinition(io.cdap.cdap.store.StoreDefinition) ImmutableMap(com.google.common.collect.ImmutableMap) Predicate(java.util.function.Predicate) Collection(java.util.Collection) ApplicationSpecification(io.cdap.cdap.api.app.ApplicationSpecification) Set(java.util.Set) ProgramRunStatus(io.cdap.cdap.proto.ProgramRunStatus) StructuredTableContext(io.cdap.cdap.spi.data.StructuredTableContext) Collectors(java.util.stream.Collectors) Sets(com.google.common.collect.Sets) ApplicationSpecificationAdapter(io.cdap.cdap.internal.app.ApplicationSpecificationAdapter) Objects(java.util.Objects) AbstractCloseableIterator(io.cdap.cdap.api.dataset.lib.AbstractCloseableIterator) List(java.util.List) Type(java.lang.reflect.Type) Optional(java.util.Optional) Constants(io.cdap.cdap.common.conf.Constants) ProfileId(io.cdap.cdap.proto.id.ProfileId) ProgramOptionConstants(io.cdap.cdap.internal.app.runtime.ProgramOptionConstants) ApplicationId(io.cdap.cdap.proto.id.ApplicationId) HashMap(java.util.HashMap) TypeToken(com.google.common.reflect.TypeToken) ProgramType(io.cdap.cdap.proto.ProgramType) Function(java.util.function.Function) JsonReader(com.google.gson.stream.JsonReader) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) LinkedHashMap(java.util.LinkedHashMap) WorkflowToken(io.cdap.cdap.api.workflow.WorkflowToken) ProgramRunId(io.cdap.cdap.proto.id.ProgramRunId) ImmutableList(com.google.common.collect.ImmutableList) BiConsumer(java.util.function.BiConsumer) SystemArguments(io.cdap.cdap.internal.app.runtime.SystemArguments) LinkedHashSet(java.util.LinkedHashSet) Nullable(javax.annotation.Nullable) WorkflowNodeStateDetail(io.cdap.cdap.proto.WorkflowNodeStateDetail) Logger(org.slf4j.Logger) RunIds(io.cdap.cdap.common.app.RunIds) ApplicationFilter(io.cdap.cdap.app.store.ApplicationFilter) ProgramId(io.cdap.cdap.proto.id.ProgramId) IOException(java.io.IOException) BadRequestException(io.cdap.cdap.common.BadRequestException) CloseableIterator(io.cdap.cdap.api.dataset.lib.CloseableIterator) ProgramRunClusterStatus(io.cdap.cdap.proto.ProgramRunClusterStatus) TableNotFoundException(io.cdap.cdap.spi.data.TableNotFoundException) TimeUnit(java.util.concurrent.TimeUnit) Consumer(java.util.function.Consumer) JsonToken(com.google.gson.stream.JsonToken) StringReader(java.io.StringReader) StructuredTable(io.cdap.cdap.spi.data.StructuredTable) Range(io.cdap.cdap.spi.data.table.field.Range) Preconditions(com.google.common.base.Preconditions) VisibleForTesting(com.google.common.annotations.VisibleForTesting) Collections(java.util.Collections) ArtifactId(io.cdap.cdap.api.artifact.ArtifactId) StructuredTable(io.cdap.cdap.spi.data.StructuredTable) StructuredRow(io.cdap.cdap.spi.data.StructuredRow) Range(io.cdap.cdap.spi.data.table.field.Range) Field(io.cdap.cdap.spi.data.table.field.Field) ApplicationFilter(io.cdap.cdap.app.store.ApplicationFilter)

Example 67 with StructuredRow

use of io.cdap.cdap.spi.data.StructuredRow in project cdap by cdapio.

the class WorkflowTable method getRecord.

@Nullable
WorkflowRunRecord getRecord(WorkflowId id, String pid) throws IOException {
    RunId runId = RunIds.fromString(pid);
    long startTime = RunIds.getTime(runId, TimeUnit.SECONDS);
    List<Field<?>> fields = getPrimaryKeyFields(id, startTime);
    Optional<StructuredRow> indexRow = table.read(fields);
    if (!indexRow.isPresent()) {
        return null;
    }
    return getRunRecordFromRow(indexRow.get());
}
Also used : Field(io.cdap.cdap.spi.data.table.field.Field) StructuredRow(io.cdap.cdap.spi.data.StructuredRow) RunId(org.apache.twill.api.RunId) Nullable(javax.annotation.Nullable)

Example 68 with StructuredRow

use of io.cdap.cdap.spi.data.StructuredRow in project cdap by cdapio.

the class WorkflowTable method getNeighbors.

/**
 * Returns a map of WorkflowRunId to WorkflowRunRecord that are close to the WorkflowRunId provided by the user.
 *
 * @param id The workflow
 * @param runId The runid of the workflow
 * @param limit The limit on each side of the run that we want to see into
 * @param timeInterval The time interval that we want the results to be spaced apart
 * @return A Map of WorkflowRunId to the corresponding Workflow Run Record. A map is used so that duplicates of
 * the WorkflowRunRecord are not obtained
 */
private Map<String, WorkflowRunRecord> getNeighbors(WorkflowId id, RunId runId, int limit, long timeInterval) throws IOException {
    long startTime = RunIds.getTime(runId, TimeUnit.SECONDS);
    Map<String, WorkflowRunRecord> workflowRunRecords = new HashMap<>();
    int i = -limit;
    long prevStartTime = startTime - (limit * timeInterval);
    // The loop iterates across the range that is startTime - (limit * timeInterval) to
    // startTime + (limit * timeInterval) since we want to capture all runs that started in this range.
    // Since we want to stop getting the same key, we have the prevStartTime become 1 more than the time at which
    // the last record was found if the (interval * the count of the loop) is less than the time.
    long upperBound = startTime + (limit * timeInterval);
    while (prevStartTime <= upperBound) {
        List<Field<?>> lowerBoundFields = getPrimaryKeyFields(id, prevStartTime);
        List<Field<?>> upperBoundFields = getPrimaryKeyFields(id, upperBound);
        // last primary key which is numeric.
        try (CloseableIterator<StructuredRow> iterator = table.scan(Range.create(lowerBoundFields, Range.Bound.INCLUSIVE, upperBoundFields, Range.Bound.INCLUSIVE), 1)) {
            if (!iterator.hasNext()) {
                return workflowRunRecords;
            }
            StructuredRow indexRow = iterator.next();
            long timeOfNextRecord = indexRow.getLong(StoreDefinition.WorkflowStore.START_TIME_FIELD);
            workflowRunRecords.put(indexRow.getString(StoreDefinition.WorkflowStore.RUN_ID_FIELD), getRunRecordFromRow(indexRow));
            prevStartTime = startTime + (i * timeInterval) < timeOfNextRecord ? timeOfNextRecord + 1 : startTime + (i * timeInterval);
            i++;
        }
    }
    return workflowRunRecords;
}
Also used : Field(io.cdap.cdap.spi.data.table.field.Field) HashMap(java.util.HashMap) StructuredRow(io.cdap.cdap.spi.data.StructuredRow)

Example 69 with StructuredRow

use of io.cdap.cdap.spi.data.StructuredRow in project cdap by cdapio.

the class LineageTable method getAccessTimesForRun.

/**
 * @return a set of access times (for program and data it accesses) associated with a program run.
 */
@VisibleForTesting
public List<Long> getAccessTimesForRun(ProgramRunId run) throws IOException {
    ImmutableList.Builder<Long> builder = ImmutableList.builder();
    List<Field<?>> prefix = getRunScanStartKey(run);
    try (CloseableIterator<StructuredRow> iterator = getProgramTable().scan(Range.singleton(prefix), Integer.MAX_VALUE)) {
        while (iterator.hasNext()) {
            StructuredRow row = iterator.next();
            if (run.getRun().equals(row.getString(StoreDefinition.LineageStore.RUN_FIELD))) {
                builder.add(row.getLong(StoreDefinition.LineageStore.ACCESS_TIME_FIELD));
            }
        }
    }
    return builder.build();
}
Also used : Field(io.cdap.cdap.spi.data.table.field.Field) ImmutableList(com.google.common.collect.ImmutableList) StructuredRow(io.cdap.cdap.spi.data.StructuredRow) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 70 with StructuredRow

use of io.cdap.cdap.spi.data.StructuredRow in project cdap by cdapio.

the class FieldLineageTable method getSourceFields.

private Set<String> getSourceFields(EndPoint endPoint, long start, long end) throws IOException {
    Set<Long> checksums = getChecksumsWithProgramRunsInRange(OUTGOING_DIRECTION_MARKER, endPoint, start, end).keySet();
    Set<String> fields = new HashSet<>();
    for (long checksum : checksums) {
        List<Field<?>> prefix = getSummaryPrefix(checksum, OUTGOING_DIRECTION_MARKER, endPoint);
        try (CloseableIterator<StructuredRow> iterator = getSummaryFieldsTable().scan(Range.singleton(prefix), Integer.MAX_VALUE)) {
            while (iterator.hasNext()) {
                StructuredRow row = iterator.next();
                fields.add(row.getString(StoreDefinition.FieldLineageStore.ENDPOINT_FIELD));
            }
        }
    }
    return fields;
}
Also used : Field(io.cdap.cdap.spi.data.table.field.Field) StructuredRow(io.cdap.cdap.spi.data.StructuredRow) HashSet(java.util.HashSet) LinkedHashSet(java.util.LinkedHashSet)

Aggregations

StructuredRow (io.cdap.cdap.spi.data.StructuredRow)142 StructuredTable (io.cdap.cdap.spi.data.StructuredTable)68 Field (io.cdap.cdap.spi.data.table.field.Field)66 ArrayList (java.util.ArrayList)54 Range (io.cdap.cdap.spi.data.table.field.Range)36 HashSet (java.util.HashSet)28 IOException (java.io.IOException)22 HashMap (java.util.HashMap)22 LinkedHashSet (java.util.LinkedHashSet)22 List (java.util.List)20 LinkedHashMap (java.util.LinkedHashMap)18 Map (java.util.Map)18 ArtifactNotFoundException (io.cdap.cdap.common.ArtifactNotFoundException)16 Collection (java.util.Collection)16 Set (java.util.Set)16 Nullable (javax.annotation.Nullable)16 ImmutableList (com.google.common.collect.ImmutableList)14 CloseableIterator (io.cdap.cdap.api.dataset.lib.CloseableIterator)14 TableNotFoundException (io.cdap.cdap.spi.data.TableNotFoundException)14 Fields (io.cdap.cdap.spi.data.table.field.Fields)14