123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293 |
- /*
- * SonarQube
- * Copyright (C) 2009-2019 SonarSource SA
- * mailto:info AT sonarsource DOT com
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 3 of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with this program; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- */
- package org.sonar.scanner.scan.filesystem;
-
- import java.io.BufferedInputStream;
- import java.io.IOException;
- import java.io.InputStream;
- import java.nio.charset.Charset;
- import java.nio.file.Files;
- import java.nio.file.Path;
- import java.util.Arrays;
- import javax.annotation.CheckForNull;
- import org.apache.commons.io.ByteOrderMark;
- import org.apache.commons.io.IOUtils;
-
- public class CharsetDetector {
- private static final int BYTES_TO_DECODE = 4192;
- private final Path filePath;
- private BufferedInputStream stream;
- private Charset detectedCharset;
- private Charset userEncoding;
-
- public CharsetDetector(Path filePath, Charset userEncoding) {
- this.filePath = filePath;
- this.userEncoding = userEncoding;
- }
-
- public boolean run() {
- try {
- byte[] buf = readBuffer();
- return detectCharset(buf);
- } catch (IOException e) {
- throw new IllegalStateException("Unable to read file " + filePath.toAbsolutePath().toString(), e);
- }
- }
-
- @CheckForNull
- public Charset charset() {
- assertRun();
- return detectedCharset;
- }
-
- public InputStream inputStream() {
- assertRun();
- return stream;
- }
-
- private byte[] readBuffer() throws IOException {
- stream = new BufferedInputStream(Files.newInputStream(filePath), BYTES_TO_DECODE * 2);
- stream.mark(BYTES_TO_DECODE);
- byte[] buf = new byte[BYTES_TO_DECODE];
- int read = IOUtils.read(stream, buf, 0, BYTES_TO_DECODE);
- stream.reset();
- stream.mark(-1);
- return Arrays.copyOf(buf, read);
- }
-
- private boolean detectCharset(byte[] buf) throws IOException {
- ByteCharsetDetector detector = new ByteCharsetDetector(new CharsetValidation(), userEncoding);
- ByteOrderMark bom = detector.detectBOM(buf);
- if (bom != null) {
- detectedCharset = Charset.forName(bom.getCharsetName());
- stream.skip(bom.length());
- return true;
- }
-
- detectedCharset = detector.detect(buf);
- return detectedCharset != null;
- }
-
- private void assertRun() {
- if (stream == null) {
- throw new IllegalStateException("Charset detection did not run");
- }
- }
- }
|