Browse Source

implement nicer ascii text conversion for ole package paths

git-svn-id: https://svn.code.sf.net/p/jackcess/code/jackcess/trunk@801 f203690c-595d-4dc9-a70b-905162fa7fd2
tags/jackcess-2.0.1
James Ahlborn 10 years ago
parent
commit
08c5914dc9
1 changed files with 17 additions and 0 deletions
  1. 17
    0
      src/main/java/com/healthmarketscience/jackcess/impl/OleUtil.java

+ 17
- 0
src/main/java/com/healthmarketscience/jackcess/impl/OleUtil.java View File

@@ -30,8 +30,10 @@ import java.nio.charset.Charset;
import java.sql.Blob;
import java.sql.SQLException;
import java.sql.SQLFeatureNotSupportedException;
import java.text.Normalizer;
import java.util.EnumSet;
import java.util.Set;
import java.util.regex.Pattern;

import com.healthmarketscience.jackcess.DataType;
import com.healthmarketscience.jackcess.util.OleBlob;
@@ -78,6 +80,10 @@ public class OleUtil
0x00, 0x00, 0x01, (byte)0xAD, 0x05, (byte)0xFE
};

// regex pattern which matches all the crazy extra stuff in unicode
private static final Pattern UNICODE_ACCENT_PATTERN =
Pattern.compile("[\\p{InCombiningDiacriticalMarks}\\p{IsLm}\\p{IsSk}]+");

private static final CompoundPackageFactory COMPOUND_FACTORY;

static {
@@ -433,6 +439,17 @@ public class OleUtil
}

private static byte[] getZeroTermStrBytes(String str) {
// since we are converting to ascii, try to make "nicer" versions of crazy
// chars (e.g. convert "u with an umlaut" to just "u"). this may not
// ultimately help anything but it is what ms access does.

// decompose complex chars into combos of char and accent
str = Normalizer.normalize(str, Normalizer.Form.NFD);
// strip the accents
str = UNICODE_ACCENT_PATTERN.matcher(str).replaceAll("");
// (re)normalize what is left
str = Normalizer.normalize(str, Normalizer.Form.NFC);

return (str + '\0').getBytes(OLE_CHARSET);
}


Loading…
Cancel
Save