From a3037a714262a238991f499906d739b5b4427bbc Mon Sep 17 00:00:00 2001 From: Julien HENRY Date: Thu, 30 Jan 2014 11:20:38 +0100 Subject: [PATCH] SONAR-926 Improve language recognition to have case unsentitive file extensions --- .../scan/filesystem/LanguageRecognizer.java | 105 +++++++++--------- .../batch/scan/filesystem/PathPattern.java | 34 +++++- .../filesystem/LanguageRecognizerTest.java | 31 ++++-- 3 files changed, 108 insertions(+), 62 deletions(-) diff --git a/sonar-batch/src/main/java/org/sonar/batch/scan/filesystem/LanguageRecognizer.java b/sonar-batch/src/main/java/org/sonar/batch/scan/filesystem/LanguageRecognizer.java index ff4fd3c3006..50748e0b2bd 100644 --- a/sonar-batch/src/main/java/org/sonar/batch/scan/filesystem/LanguageRecognizer.java +++ b/sonar-batch/src/main/java/org/sonar/batch/scan/filesystem/LanguageRecognizer.java @@ -19,12 +19,8 @@ */ package org.sonar.batch.scan.filesystem; -import org.sonar.api.scan.filesystem.InputFile; - -import com.google.common.collect.HashMultimap; +import com.google.common.base.Joiner; import com.google.common.collect.Maps; -import com.google.common.collect.SetMultimap; -import org.apache.commons.io.FilenameUtils; import org.apache.commons.lang.StringUtils; import org.picocontainer.Startable; import org.slf4j.Logger; @@ -34,12 +30,14 @@ import org.sonar.api.CoreProperties; import org.sonar.api.config.Settings; import org.sonar.api.resources.Language; import org.sonar.api.resources.Languages; +import org.sonar.api.scan.filesystem.InputFile; import org.sonar.api.utils.SonarException; import javax.annotation.CheckForNull; +import java.util.ArrayList; +import java.util.List; import java.util.Map; -import java.util.Set; /** * Detect language of source files. @@ -53,7 +51,6 @@ public class LanguageRecognizer implements BatchComponent, Startable { /** * Lower-case extension -> languages */ - private SetMultimap langsByExtension = HashMultimap.create(); private Map patternByLanguage = Maps.newLinkedHashMap(); private Settings settings; @@ -66,20 +63,27 @@ public class LanguageRecognizer implements BatchComponent, Startable { @Override public void start() { for (Language language : languages.all()) { - for (String suffix : language.getFileSuffixes()) { - String extension = sanitizeExtension(suffix); - langsByExtension.put(extension, language.getKey()); - } - String[] filePatterns = settings.getStringArray(getFilePatternPropKey(language.getKey())); + String[] filePatterns = settings.getStringArray(getFileLangPatternPropKey(language.getKey())); PathPattern[] pathPatterns = PathPattern.create(filePatterns); if (pathPatterns.length > 0) { patternByLanguage.put(language.getKey(), pathPatterns); + } else if (language.getFileSuffixes().length > 0) { + // If no custom language pattern is defined then fallback to suffixes declared by language + String[] patterns = language.getFileSuffixes(); + for (int i = 0; i < patterns.length; i++) { + String suffix = patterns[i]; + String extension = sanitizeExtension(suffix); + patterns[i] = "**/*." + extension; + } + PathPattern[] defaultLanguagePatterns = PathPattern.create(patterns); + patternByLanguage.put(language.getKey(), defaultLanguagePatterns); + LOG.debug("Declared extensions of language " + language + " were converted to " + getDetails(language.getKey())); } } } - private String getFilePatternPropKey(String languageKey) { - return "sonar." + languageKey + ".filePatterns"; + private String getFileLangPatternPropKey(String languageKey) { + return "sonar.lang.patterns." + languageKey; } @Override @@ -89,58 +93,57 @@ public class LanguageRecognizer implements BatchComponent, Startable { @CheckForNull String of(InputFile inputFile) { - // First try with patterns - String forcedLanguage = null; - for (Map.Entry languagePattern : patternByLanguage.entrySet()) { - PathPattern[] patterns = languagePattern.getValue(); - for (PathPattern pathPattern : patterns) { - if (pathPattern.match(inputFile)) { - if (forcedLanguage == null) { - forcedLanguage = languagePattern.getKey(); - break; - } else { - // Language was already forced by another pattern - throw new SonarException("Language of file '" + inputFile.path() + "' can not be decided as the file matches patterns of both " + getFilePatternPropKey(forcedLanguage) - + " and " - + getFilePatternPropKey(languagePattern.getKey())); + String deprecatedLanguageParam = settings.getString(CoreProperties.PROJECT_LANGUAGE_PROPERTY); + + // First try with lang patterns + List languagesToConsider = new ArrayList(); + if (!StringUtils.isBlank(deprecatedLanguageParam)) { + languagesToConsider.add(deprecatedLanguageParam); + } else { + languagesToConsider.addAll(patternByLanguage.keySet()); + } + String detectedLanguage = null; + for (String languageKey : languagesToConsider) { + PathPattern[] patterns = patternByLanguage.get(languageKey); + if (patterns != null) { + for (PathPattern pathPattern : patterns) { + if (pathPattern.match(inputFile, false)) { + if (detectedLanguage == null) { + detectedLanguage = languageKey; + break; + } else { + // Language was already forced by another pattern + throw new SonarException("Language of file '" + inputFile.path() + "' can not be decided as the file matches patterns of both " + getDetails(detectedLanguage) + + " and " + getDetails(languageKey)); + } } } } } - if (forcedLanguage != null) { - LOG.debug("Language of file '" + inputFile.path() + "' was forced to '" + forcedLanguage + "'"); - return forcedLanguage; + if (detectedLanguage != null) { + LOG.debug("Language of file '" + inputFile.path() + "' was detected to be '" + detectedLanguage + "'"); + return detectedLanguage; } - String extension = sanitizeExtension(FilenameUtils.getExtension(inputFile.file().getName())); - - // Check if deprecated sonar.language is used - String languageKey = settings.getString(CoreProperties.PROJECT_LANGUAGE_PROPERTY); - if (StringUtils.isNotBlank(languageKey)) { - Language language = languages.get(languageKey); + // Check if deprecated sonar.language is used and we are on a language without declared extensions + if (StringUtils.isNotBlank(deprecatedLanguageParam)) { + Language language = languages.get(deprecatedLanguageParam); if (language == null) { - throw new SonarException("No language is installed with key '" + languageKey + "'. Please update property '" + CoreProperties.PROJECT_LANGUAGE_PROPERTY + "'"); + throw new SonarException("No language is installed with key '" + deprecatedLanguageParam + "'. Please update property '" + CoreProperties.PROJECT_LANGUAGE_PROPERTY + "'"); } // Languages without declared suffixes match everything String[] fileSuffixes = language.getFileSuffixes(); if (fileSuffixes.length == 0) { - return languageKey; - } - for (String fileSuffix : fileSuffixes) { - if (sanitizeExtension(fileSuffix).equals(extension)) { - return languageKey; - } + return deprecatedLanguageParam; } return null; } - // At this point use extension to detect language - Set langs = langsByExtension.get(extension); - if (langs.size() > 1) { - throw new SonarException("Language of file '" + inputFile.path() + "' can not be decided as the file extension '" + extension + "' is declared by several languages: " - + StringUtils.join(langs, ", ")); - } - return langs.isEmpty() ? null : langs.iterator().next(); + return null; + } + + private String getDetails(String detectedLanguage) { + return getFileLangPatternPropKey(detectedLanguage) + " : " + Joiner.on(",").join(patternByLanguage.get(detectedLanguage)); } static String sanitizeExtension(String suffix) { diff --git a/sonar-batch/src/main/java/org/sonar/batch/scan/filesystem/PathPattern.java b/sonar-batch/src/main/java/org/sonar/batch/scan/filesystem/PathPattern.java index cd67d3cadf6..917ae1597a6 100644 --- a/sonar-batch/src/main/java/org/sonar/batch/scan/filesystem/PathPattern.java +++ b/sonar-batch/src/main/java/org/sonar/batch/scan/filesystem/PathPattern.java @@ -19,6 +19,7 @@ */ package org.sonar.batch.scan.filesystem; +import org.apache.commons.io.FilenameUtils; import org.apache.commons.lang.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -40,6 +41,8 @@ abstract class PathPattern { abstract boolean match(InputFile inputFile); + abstract boolean match(InputFile inputFile, boolean caseSensitiveFileExtension); + abstract boolean supportResource(); static PathPattern create(String s) { @@ -70,7 +73,20 @@ abstract class PathPattern { @Override boolean match(InputFile inputFile) { - return pattern.match(inputFile.absolutePath()); + return match(inputFile, true); + } + + @Override + boolean match(InputFile inputFile, boolean caseSensitiveFileExtension) { + String path = inputFile.absolutePath(); + if (!caseSensitiveFileExtension) { + String extension = sanitizeExtension(FilenameUtils.getExtension(inputFile.file().getName())); + if (StringUtils.isNotBlank(extension)) { + StringUtils.removeEndIgnoreCase(path, extension); + path = path + extension; + } + } + return pattern.match(path); } @Override @@ -99,7 +115,19 @@ abstract class PathPattern { @Override boolean match(InputFile inputFile) { + return match(inputFile, true); + } + + @Override + boolean match(InputFile inputFile, boolean caseSensitiveFileExtension) { String path = inputFile.path(); + if (!caseSensitiveFileExtension) { + String extension = sanitizeExtension(FilenameUtils.getExtension(inputFile.file().getName())); + if (StringUtils.isNotBlank(extension)) { + path = StringUtils.removeEndIgnoreCase(path, extension); + path = path + extension; + } + } return path != null && pattern.match(path); } @@ -118,4 +146,8 @@ abstract class PathPattern { return pattern.toString(); } } + + static String sanitizeExtension(String suffix) { + return StringUtils.lowerCase(StringUtils.removeStart(suffix, ".")); + } } diff --git a/sonar-batch/src/test/java/org/sonar/batch/scan/filesystem/LanguageRecognizerTest.java b/sonar-batch/src/test/java/org/sonar/batch/scan/filesystem/LanguageRecognizerTest.java index 9d4e8e99689..b6e02c2d840 100644 --- a/sonar-batch/src/test/java/org/sonar/batch/scan/filesystem/LanguageRecognizerTest.java +++ b/sonar-batch/src/test/java/org/sonar/batch/scan/filesystem/LanguageRecognizerTest.java @@ -19,8 +19,6 @@ */ package org.sonar.batch.scan.filesystem; -import org.sonar.api.scan.filesystem.InputFile; - import com.google.common.base.Charsets; import org.hamcrest.BaseMatcher; import org.hamcrest.Description; @@ -32,6 +30,7 @@ import org.sonar.api.CoreProperties; import org.sonar.api.config.Settings; import org.sonar.api.resources.Language; import org.sonar.api.resources.Languages; +import org.sonar.api.scan.filesystem.InputFile; import org.sonar.api.scan.filesystem.internal.InputFileBuilder; import org.sonar.api.utils.SonarException; @@ -160,9 +159,9 @@ public class LanguageRecognizerTest { @Override public boolean matches(Object arg0) { // Need custom matcher because order of language in the exception is not deterministic (hashmap) - return arg0.toString().contains("Language of file 'abc.xhtml' can not be decided as the file extension 'xhtml' is declared by several languages: ") - && arg0.toString().contains("web") - && arg0.toString().contains("xml"); + return arg0.toString().contains("Language of file 'abc.xhtml' can not be decided as the file matches patterns of both ") + && arg0.toString().contains("sonar.lang.patterns.web : **/*.xhtml") + && arg0.toString().contains("sonar.lang.patterns.xml : **/*.xhtml"); } }); recognizer.of(newInputFile("abc.xhtml")); @@ -174,8 +173,8 @@ public class LanguageRecognizerTest { Languages languages = new Languages(new MockLanguage("xml", "xhtml"), new MockLanguage("web", "xhtml")); Settings settings = new Settings(); - settings.setProperty("sonar.xml.filePatterns", "xml/**"); - settings.setProperty("sonar.web.filePatterns", "web/**"); + settings.setProperty("sonar.lang.patterns.xml", "xml/**"); + settings.setProperty("sonar.lang.patterns.web", "web/**"); LanguageRecognizer recognizer = new LanguageRecognizer(settings, languages); recognizer.start(); assertThat(recognizer.of(newInputFile("xml/abc.xhtml"))).isEqualTo("xml"); @@ -188,14 +187,26 @@ public class LanguageRecognizerTest { Languages languages = new Languages(new MockLanguage("abap", "abap"), new MockLanguage("cobol", "cobol")); Settings settings = new Settings(); - settings.setProperty("sonar.abap.filePatterns", "*.abap,*.txt"); - settings.setProperty("sonar.cobol.filePatterns", "*.cobol,*.txt"); + settings.setProperty("sonar.lang.patterns.abap", "*.abap,*.txt"); + settings.setProperty("sonar.lang.patterns.cobol", "*.cobol,*.txt"); LanguageRecognizer recognizer = new LanguageRecognizer(settings, languages); recognizer.start(); assertThat(recognizer.of(newInputFile("abc.abap"))).isEqualTo("abap"); assertThat(recognizer.of(newInputFile("abc.cobol"))).isEqualTo("cobol"); thrown.expect(SonarException.class); - thrown.expectMessage("Language of file 'abc.txt' can not be decided as the file matches patterns of both sonar.abap.filePatterns and sonar.cobol.filePatterns"); + thrown.expectMessage(new BaseMatcher() { + @Override + public void describeTo(Description arg0) { + } + + @Override + public boolean matches(Object arg0) { + // Need custom matcher because order of language in the exception is not deterministic (hashmap) + return arg0.toString().contains("Language of file 'abc.txt' can not be decided as the file matches patterns of both ") + && arg0.toString().contains("sonar.lang.patterns.abap : *.abap,*.txt") + && arg0.toString().contains("sonar.lang.patterns.cobol : *.cobol,*.txt"); + } + }); recognizer.of(newInputFile("abc.txt")); recognizer.stop(); } -- 2.39.5