use of org.sonar.duplications.block.Block in project sonarqube by SonarSource.
the class OriginalCloneDetectionAlgorithm method createGroups.
private BlocksGroup[] createGroups(Collection<Block> fileBlocks) {
// 2: let f be the list of tuples corresponding to filename sorted by statement index
// either read from the index or calculated on the fly
int size = fileBlocks.size();
// Godin: create one group per unique hash
// TODO Godin: can we create map with expected size?
Map<ByteArray, BlocksGroup> groupsByHash = new HashMap<>();
for (Block fileBlock : fileBlocks) {
ByteArray hash = fileBlock.getBlockHash();
BlocksGroup sameHash = groupsByHash.get(hash);
if (sameHash == null) {
sameHash = BlocksGroup.empty();
groupsByHash.put(hash, sameHash);
}
sameHash.blocks.add(fileBlock);
}
// Godin: retrieve blocks from index
for (Map.Entry<ByteArray, BlocksGroup> entry : groupsByHash.entrySet()) {
ByteArray hash = entry.getKey();
BlocksGroup group = entry.getValue();
for (Block blockFromIndex : cloneIndex.getBySequenceHash(hash)) {
// Godin: skip blocks for this file if they come from index
if (!originResourceId.equals(blockFromIndex.getResourceId())) {
group.blocks.add(blockFromIndex);
}
}
Collections.sort(group.blocks, BlocksGroup.BlockComparator.INSTANCE);
}
// 3: let c be a list with c(0) = empty
BlocksGroup[] sameHashBlocksGroups = new BlocksGroup[size + 2];
sameHashBlocksGroups[0] = BlocksGroup.empty();
// 4: for i := 1 to length(f) do
for (Block fileBlock : fileBlocks) {
ByteArray hash = fileBlock.getBlockHash();
int i = fileBlock.getIndexInFile() + 1;
// 5: retrieve tuples with same sequence hash as f(i)
// 6: store this set as c(i)
sameHashBlocksGroups[i] = groupsByHash.get(hash);
}
// Godin: allows to report clones at the end of file, because condition at line 13 would be evaluated as true
sameHashBlocksGroups[size + 1] = BlocksGroup.empty();
return sameHashBlocksGroups;
}
use of org.sonar.duplications.block.Block in project sonarqube by SonarSource.
the class OriginalCloneDetectionAlgorithm method findClones.
private void findClones(Collection<Block> fileBlocks) {
originResourceId = fileBlocks.iterator().next().getResourceId();
BlocksGroup[] sameHashBlocksGroups = createGroups(fileBlocks);
// 7: for i := 1 to length(c) do
for (int i = 1; i < sameHashBlocksGroups.length; i++) {
// 8: if |c(i)| < 2 or c(i) subsumed by c(i - 1) then
if (sameHashBlocksGroups[i].size() < 2 || sameHashBlocksGroups[i].subsumedBy(sameHashBlocksGroups[i - 1], 1)) {
// 9: continue with next loop iteration
continue;
}
// The set a introduced in Line 10 is called the active set and
// contains all tuples corresponding to clones which have not yet
// been reported. At each iteration of the inner loop the set a
// is reduced to tuples which are also present in c(j); again the
// intersection operator has to account for the increased statement
// index and different hash and info fields. The new value is
// stored in a0. Clones are only reported, if tuples are lost in
// Line 12, as otherwise all current clones could be prolonged
// by one statement. Clone reporting matches tuples that, after
// correction of the statement index, appear in both c(i) and a,
// each matched pair corresponds to a single clone. Its location
// can be extracted from the filename and info fields.
// 10: let a := c(i)
BlocksGroup currentBlocksGroup = sameHashBlocksGroups[i];
// 11: for j := i + 1 to length(c) do
for (int j = i + 1; j < sameHashBlocksGroups.length; j++) {
// 12: let a0 := a intersect c(j)
BlocksGroup intersectedBlocksGroup = currentBlocksGroup.intersect(sameHashBlocksGroups[j]);
// 13: if |a0| < |a| then
if (intersectedBlocksGroup.size() < currentBlocksGroup.size()) {
// 14: report clones from c(i) to a (see text)
// One problem of this algorithm is that clone classes with
// multiple instances in the same file are encountered and
// reported multiple times. Furthermore, when calculating the clone
// groups for all files in a system, clone groups will be reported
// more than once as well. Both cases can be avoided, by
// checking whether the first element of a0 (with respect to a
// fixed order) is equal to f(j) and only report in this case.
Block first = currentBlocksGroup.first(originResourceId);
if (first != null && first.getIndexInFile() == j - 2) {
// Godin: We report clones, which start in i-1 and end in j-2, so length is j-2-(i-1)+1=j-i
reportClones(sameHashBlocksGroups[i], currentBlocksGroup, j - i);
}
}
// 15: a := a0
currentBlocksGroup = intersectedBlocksGroup;
// 16: if |a| < 2 or a subsumed by c(i-1) then
if (currentBlocksGroup.size() < 2 || currentBlocksGroup.subsumedBy(sameHashBlocksGroups[i - 1], j - i + 1)) {
// 17: break inner loop
break;
}
}
}
}
use of org.sonar.duplications.block.Block in project sonarqube by SonarSource.
the class DuplicationsCollector method endOfGroup.
/**
* Constructs CloneGroup and saves it.
*/
@Override
public void endOfGroup() {
ClonePart origin = null;
CloneGroup.Builder builder = CloneGroup.builder().setLength(length);
List<ClonePart> parts = new ArrayList<>(count);
for (int[] b : blockNumbers) {
Block firstBlock = text.getBlock(b[0]);
Block lastBlock = text.getBlock(b[1]);
ClonePart part = new ClonePart(firstBlock.getResourceId(), firstBlock.getIndexInFile(), firstBlock.getStartLine(), lastBlock.getEndLine());
// TODO Godin: maybe use FastStringComparator here ?
if (originResourceId.equals(part.getResourceId())) {
// part from origin
if (origin == null) {
origin = part;
// To calculate length important to use the origin, because otherwise block may come from DB without required data
builder.setLengthInUnits(lastBlock.getEndUnit() - firstBlock.getStartUnit() + 1);
} else if (part.getUnitStart() < origin.getUnitStart()) {
origin = part;
}
}
parts.add(part);
}
Collections.sort(parts, ContainsInComparator.CLONEPART_COMPARATOR);
builder.setOrigin(origin).setParts(parts);
filter(builder.build());
reset();
}
use of org.sonar.duplications.block.Block in project sonarqube by SonarSource.
the class CpdExecutorTest method should_ignore_missing_component.
@Test
public void should_ignore_missing_component() {
Block block = Block.builder().setBlockHash(new ByteArray("AAAABBBBCCCC")).setResourceId("unknown").build();
index.insert(batchComponent1, Collections.singletonList(block));
executor.execute();
verify(executorService).shutdown();
verifyNoMoreInteractions(executorService);
readDuplications(batchComponent1, 0);
assertThat(logTester.logs(LoggerLevel.ERROR)).contains("Resource not found in component store: unknown. Skipping CPD computation for it");
}
use of org.sonar.duplications.block.Block in project sonarqube by SonarSource.
the class CpdExecutorTest method should_timeout.
@Test
public void should_timeout() {
Block block = Block.builder().setBlockHash(new ByteArray("AAAABBBBCCCC")).setResourceId(batchComponent1.key()).build();
index.insert(batchComponent1, Collections.singletonList(block));
when(executorService.submit(ArgumentMatchers.any(Callable.class))).thenReturn(new CompletableFuture());
executor.execute(1);
readDuplications(0);
assertThat(logTester.logs(LoggerLevel.WARN)).usingElementComparator((l, r) -> l.matches(r) ? 0 : 1).containsOnly("Timeout during detection of duplications for .*Foo.php");
}
Aggregations