1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
|
/*
* SonarQube
* Copyright (C) 2009-2025 SonarSource SA
* mailto:info AT sonarsource DOT com
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 3 of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
package org.sonar.scanner.scan.filesystem;
import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.Charset;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Arrays;
import javax.annotation.CheckForNull;
import org.apache.commons.io.ByteOrderMark;
import org.apache.commons.io.IOUtils;
public class CharsetDetector {
private static final int BYTES_TO_DECODE = 4192;
private final Path filePath;
private BufferedInputStream stream;
private Charset detectedCharset;
private Charset userEncoding;
public CharsetDetector(Path filePath, Charset userEncoding) {
this.filePath = filePath;
this.userEncoding = userEncoding;
}
public boolean run() {
try {
byte[] buf = readBuffer();
return detectCharset(buf);
} catch (IOException e) {
throw new IllegalStateException("Unable to read file " + filePath.toAbsolutePath(), e);
}
}
@CheckForNull
public Charset charset() {
assertRun();
return detectedCharset;
}
public InputStream inputStream() {
assertRun();
return stream;
}
private byte[] readBuffer() throws IOException {
stream = new BufferedInputStream(Files.newInputStream(filePath), BYTES_TO_DECODE * 2);
stream.mark(BYTES_TO_DECODE);
byte[] buf = new byte[BYTES_TO_DECODE];
int read = IOUtils.read(stream, buf, 0, BYTES_TO_DECODE);
stream.reset();
stream.mark(-1);
return Arrays.copyOf(buf, read);
}
private boolean detectCharset(byte[] buf) throws IOException {
ByteCharsetDetector detector = new ByteCharsetDetector(new CharsetValidation(), userEncoding);
ByteOrderMark bom = detector.detectBOM(buf);
if (bom != null) {
detectedCharset = Charset.forName(bom.getCharsetName());
stream.skip(bom.length());
return true;
}
detectedCharset = detector.detect(buf);
return detectedCharset != null;
}
private void assertRun() {
if (stream == null) {
throw new IllegalStateException("Charset detection did not run");
}
}
}
|