diff options
author | PJ Fanning <fanningpj@apache.org> | 2023-03-17 23:35:33 +0000 |
---|---|---|
committer | PJ Fanning <fanningpj@apache.org> | 2023-03-17 23:35:33 +0000 |
commit | 0275daa5deae2e0069badd1f46268abb43fbc3dc (patch) | |
tree | 66f7bba4045ecb67d87bdf9294ef3f43278e9fd4 /poi/src | |
parent | 3e6dddaa95bb4d99e2da81002a70a8c1dc1965af (diff) | |
download | poi-0275daa5deae2e0069badd1f46268abb43fbc3dc.tar.gz poi-0275daa5deae2e0069badd1f46268abb43fbc3dc.zip |
[bug-66532] more performant way to iterate over codepoints. Thanks to Matthias Raschhofer
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1908458 13f79535-47bb-0310-9956-ffa450edef68
Diffstat (limited to 'poi/src')
-rw-r--r-- | poi/src/main/java/org/apache/poi/util/CodepointsUtil.java | 16 | ||||
-rw-r--r-- | poi/src/test/java/org/apache/poi/util/TestCodepointsUtil.java | 19 |
2 files changed, 33 insertions, 2 deletions
diff --git a/poi/src/main/java/org/apache/poi/util/CodepointsUtil.java b/poi/src/main/java/org/apache/poi/util/CodepointsUtil.java index 9f2d01780c..71fcdbe85e 100644 --- a/poi/src/main/java/org/apache/poi/util/CodepointsUtil.java +++ b/poi/src/main/java/org/apache/poi/util/CodepointsUtil.java @@ -18,12 +18,28 @@ package org.apache.poi.util; import java.util.Iterator; +import java.util.PrimitiveIterator; @Internal public class CodepointsUtil { + + /** + * @param text to iterate over + * @return iterator with Strings representing the codepoints + * @see #primitiveIterator(String) a more performnt iterator + */ public static Iterator<String> iteratorFor(String text) { return text.codePoints() .mapToObj(codePoint -> new String(Character.toChars(codePoint))) .iterator(); } + + /** + * @param text to iterate over + * @return iterator with ints representing the codepoints + * @since POI 5.2.4 + */ + public static PrimitiveIterator.OfInt primitiveIterator(String text) { + return text.codePoints().iterator(); + } }
\ No newline at end of file diff --git a/poi/src/test/java/org/apache/poi/util/TestCodepointsUtil.java b/poi/src/test/java/org/apache/poi/util/TestCodepointsUtil.java index 3610535c95..f6d31ef5d4 100644 --- a/poi/src/test/java/org/apache/poi/util/TestCodepointsUtil.java +++ b/poi/src/test/java/org/apache/poi/util/TestCodepointsUtil.java @@ -23,6 +23,7 @@ import static org.junit.jupiter.api.Assertions.assertTrue; import java.util.ArrayList; import java.util.Iterator; import java.util.List; +import java.util.function.IntConsumer; import org.junit.jupiter.api.Test; @@ -38,8 +39,22 @@ class TestCodepointsUtil { List<String> codePoints = new ArrayList<>(); CodepointsUtil.iteratorFor(unicodeSurrogates).forEachRemaining(codePoints::add); assertEquals(17, codePoints.size()); - for(String point : codePoints){ - assertTrue(point.length() >=1 && point.length() <= 2, "codepoint " + point + "is wrong size"); + for (String point : codePoints) { + assertTrue(point.length() >= 1 && point.length() <= 2, "codepoint " + point + "is wrong size"); + } + } + + @Test + void testPrimitiveIterator() { + final String unicodeSurrogates = "\uD835\uDF4A\uD835\uDF4B\uD835\uDF4C\uD835\uDF4D\uD835\uDF4E" + + "abcdef123456"; + List<String> codePoints = new ArrayList<>(); + CodepointsUtil.primitiveIterator(unicodeSurrogates).forEachRemaining((IntConsumer) (i) -> { + codePoints.add(new String(Character.toChars(i))); + }); + assertEquals(17, codePoints.size()); + for (String point : codePoints) { + assertTrue(point.length() >= 1 && point.length() <= 2, "codepoint " + point + "is wrong size"); } } |