You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

CpdTokenizerSensor.java 2.9KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485
  1. /*
  2. * SonarQube
  3. * Copyright (C) 2009-2023 SonarSource SA
  4. * mailto:info AT sonarsource DOT com
  5. *
  6. * This program is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU Lesser General Public
  8. * License as published by the Free Software Foundation; either
  9. * version 3 of the License, or (at your option) any later version.
  10. *
  11. * This program is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. * Lesser General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU Lesser General Public License
  17. * along with this program; if not, write to the Free Software Foundation,
  18. * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  19. */
  20. package org.sonar.xoo.lang;
  21. import java.io.IOException;
  22. import org.apache.commons.io.FileUtils;
  23. import org.sonar.api.batch.fs.FilePredicates;
  24. import org.sonar.api.batch.fs.InputFile;
  25. import org.sonar.api.batch.fs.InputFile.Type;
  26. import org.sonar.api.batch.sensor.Sensor;
  27. import org.sonar.api.batch.sensor.SensorContext;
  28. import org.sonar.api.batch.sensor.SensorDescriptor;
  29. import org.sonar.api.batch.sensor.cpd.NewCpdTokens;
  30. import org.sonar.xoo.Xoo;
  31. /**
  32. * Tokenize files for CPD
  33. */
  34. public class CpdTokenizerSensor implements Sensor {
  35. private void tokenize(InputFile inputFile, SensorContext context) {
  36. int lineIdx = 1;
  37. NewCpdTokens newCpdTokens = context.newCpdTokens().onFile(inputFile);
  38. try {
  39. StringBuilder sb = new StringBuilder();
  40. for (String line : FileUtils.readLines(inputFile.file(), inputFile.charset())) {
  41. int startOffset = 0;
  42. int endOffset = 0;
  43. for (int i = 0; i < line.length(); i++) {
  44. char c = line.charAt(i);
  45. if (Character.isWhitespace(c)) {
  46. if (sb.length() > 0) {
  47. newCpdTokens.addToken(inputFile.newRange(lineIdx, startOffset, lineIdx, endOffset), sb.toString());
  48. sb.setLength(0);
  49. }
  50. startOffset = endOffset;
  51. } else {
  52. sb.append(c);
  53. }
  54. endOffset++;
  55. }
  56. if (sb.length() > 0) {
  57. newCpdTokens.addToken(inputFile.newRange(lineIdx, startOffset, lineIdx, endOffset), sb.toString());
  58. sb.setLength(0);
  59. }
  60. lineIdx++;
  61. }
  62. } catch (IOException e) {
  63. throw new IllegalStateException("Unable to tokenize", e);
  64. }
  65. newCpdTokens.save();
  66. }
  67. @Override
  68. public void describe(SensorDescriptor descriptor) {
  69. descriptor
  70. .name("Xoo Cpd Tokenizer Sensor")
  71. .onlyOnLanguages(Xoo.KEY);
  72. }
  73. @Override
  74. public void execute(SensorContext context) {
  75. FilePredicates p = context.fileSystem().predicates();
  76. for (InputFile file : context.fileSystem().inputFiles(p.and(p.hasLanguages(Xoo.KEY), p.hasType(Type.MAIN)))) {
  77. tokenize(file, context);
  78. }
  79. }
  80. }