From c039da1b94a24224bcd7d6a09b1828782a2acb00 Mon Sep 17 00:00:00 2001 From: Andreas Beeker Date: Fri, 28 Dec 2018 23:43:31 +0000 Subject: #63028 - Provide font embedding for slideshows git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1849898 13f79535-47bb-0310-9956-ffa450edef68 --- src/java/org/apache/poi/sl/draw/DrawFontInfo.java | 48 ---- .../poi/sl/extractor/SlideShowExtractor.java | 305 +++++++++++++-------- .../apache/poi/sl/usermodel/FontCollection.java | 22 -- .../org/apache/poi/sl/usermodel/Resources.java | 29 -- src/java/org/apache/poi/sl/usermodel/Slide.java | 3 +- .../org/apache/poi/sl/usermodel/SlideShow.java | 29 +- src/java/org/apache/poi/sl/usermodel/TextRun.java | 8 + 7 files changed, 223 insertions(+), 221 deletions(-) delete mode 100644 src/java/org/apache/poi/sl/usermodel/FontCollection.java delete mode 100644 src/java/org/apache/poi/sl/usermodel/Resources.java (limited to 'src/java/org/apache/poi/sl') diff --git a/src/java/org/apache/poi/sl/draw/DrawFontInfo.java b/src/java/org/apache/poi/sl/draw/DrawFontInfo.java index dc7afb4e24..da1979cb9e 100644 --- a/src/java/org/apache/poi/sl/draw/DrawFontInfo.java +++ b/src/java/org/apache/poi/sl/draw/DrawFontInfo.java @@ -19,10 +19,7 @@ package org.apache.poi.sl.draw; -import org.apache.poi.common.usermodel.fonts.FontCharset; -import org.apache.poi.common.usermodel.fonts.FontFamily; import org.apache.poi.common.usermodel.fonts.FontInfo; -import org.apache.poi.common.usermodel.fonts.FontPitch; import org.apache.poi.util.Internal; /** @@ -37,53 +34,8 @@ import org.apache.poi.util.Internal; this.typeface = typeface; } - @Override - public Integer getIndex() { - return null; - } - - @Override - public void setIndex(int index) { - throw new UnsupportedOperationException("DrawFontManagers FontInfo can't be changed."); - } - @Override public String getTypeface() { return typeface; } - - @Override - public void setTypeface(String typeface) { - throw new UnsupportedOperationException("DrawFontManagers FontInfo can't be changed."); - } - - @Override - public FontCharset getCharset() { - return FontCharset.ANSI; - } - - @Override - public void setCharset(FontCharset charset) { - throw new UnsupportedOperationException("DrawFontManagers FontInfo can't be changed."); - } - - @Override - public FontFamily getFamily() { - return FontFamily.FF_SWISS; - } - - @Override - public void setFamily(FontFamily family) { - throw new UnsupportedOperationException("DrawFontManagers FontInfo can't be changed."); - } - - @Override - public FontPitch getPitch() { - return FontPitch.VARIABLE; - } - - @Override - public void setPitch(FontPitch pitch) { - throw new UnsupportedOperationException("DrawFontManagers FontInfo can't be changed."); - } } diff --git a/src/java/org/apache/poi/sl/extractor/SlideShowExtractor.java b/src/java/org/apache/poi/sl/extractor/SlideShowExtractor.java index dee4d44a03..7173c24e97 100644 --- a/src/java/org/apache/poi/sl/extractor/SlideShowExtractor.java +++ b/src/java/org/apache/poi/sl/extractor/SlideShowExtractor.java @@ -18,10 +18,14 @@ package org.apache.poi.sl.extractor; import java.util.ArrayList; +import java.util.BitSet; +import java.util.LinkedList; import java.util.List; +import java.util.function.Consumer; +import java.util.function.Function; +import java.util.function.Predicate; import org.apache.poi.extractor.POITextExtractor; -import org.apache.poi.sl.usermodel.Comment; import org.apache.poi.sl.usermodel.MasterSheet; import org.apache.poi.sl.usermodel.Notes; import org.apache.poi.sl.usermodel.ObjectShape; @@ -52,6 +56,10 @@ public class SlideShowExtractor< > extends POITextExtractor { private static final POILogger LOG = POILogFactory.getLogger(SlideShowExtractor.class); + // placeholder text for slide numbers + private static final String SLIDE_NUMBER_PH = "‹#›"; + + private SlideShow slideshow; private boolean slidesByDefault = true; @@ -59,7 +67,8 @@ public class SlideShowExtractor< private boolean commentsByDefault; private boolean masterByDefault; - + private Predicate filter = o -> true; + public SlideShowExtractor(final SlideShow slideshow) { setFilesystem(slideshow); this.slideshow = slideshow; @@ -115,9 +124,8 @@ public class SlideShowExtractor< @Override public String getText() { final StringBuilder sb = new StringBuilder(); - for (final Slide slide : slideshow.getSlides()) { - sb.append(getText(slide)); + getText(slide, sb::append); } return sb.toString(); @@ -125,34 +133,37 @@ public class SlideShowExtractor< public String getText(final Slide slide) { final StringBuilder sb = new StringBuilder(); + getText(slide, sb::append); + return sb.toString(); + } + + private void getText(final Slide slide, final Consumer consumer) { if (slidesByDefault) { - printShapeText(slide, sb); + printShapeText(slide, consumer); } if (masterByDefault) { final MasterSheet ms = slide.getMasterSheet(); - printSlideMaster(ms, sb); + printSlideMaster(ms, consumer); // only print slide layout, if it's a different instance final MasterSheet sl = slide.getSlideLayout(); if (sl != ms) { - printSlideMaster(sl, sb); + printSlideMaster(sl, consumer); } } if (commentsByDefault) { - printComments(slide, sb); + printComments(slide, consumer); } if (notesByDefault) { - printNotes(slide, sb); + printNotes(slide, consumer); } - - return sb.toString(); } - private void printSlideMaster(final MasterSheet master, final StringBuilder sb) { + private void printSlideMaster(final MasterSheet master, final Consumer consumer) { if (master == null) { return; } @@ -163,163 +174,140 @@ public class SlideShowExtractor< if (text == null || text.isEmpty() || "*".equals(text)) { continue; } + if (ts.isPlaceholder()) { // don't bother about boiler plate text on master sheets LOG.log(POILogger.INFO, "Ignoring boiler plate (placeholder) text on slide master:", text); continue; } - sb.append(text); - if (!text.endsWith("\n")) { - sb.append("\n"); - } + printTextParagraphs(ts.getTextParagraphs(), consumer); } } } - private String printHeaderReturnFooter(final Sheet sheet, final StringBuilder sb) { - final Sheet m = (sheet instanceof Slide) ? sheet.getMasterSheet() : sheet; - final StringBuilder footer = new StringBuilder("\n"); - addSheetPlaceholderDatails(sheet, Placeholder.HEADER, sb); - addSheetPlaceholderDatails(sheet, Placeholder.FOOTER, footer); + private void printTextParagraphs(final List

paras, final Consumer consumer) { + printTextParagraphs(paras, consumer, "\n"); + } - if (masterByDefault) { - // write header texts and determine footer text - for (Shape s : m) { - if (!(s instanceof TextShape)) { - continue; - } - final TextShape ts = (TextShape) s; - final PlaceholderDetails pd = ts.getPlaceholderDetails(); - if (pd == null || !pd.isVisible() || pd.getPlaceholder() == null) { - continue; - } - switch (pd.getPlaceholder()) { - case HEADER: - sb.append(ts.getText()); - sb.append('\n'); - break; - case SLIDE_NUMBER: - if (sheet instanceof Slide) { - footer.append(ts.getText().replace("‹#›", Integer.toString(((Slide) sheet).getSlideNumber() + 1))); - footer.append('\n'); - } - break; - case FOOTER: - footer.append(ts.getText()); - footer.append('\n'); - break; - case DATETIME: - // currently not supported - default: - break; + + private void printTextParagraphs(final List

paras, final Consumer consumer, String trailer) { + printTextParagraphs(paras, consumer, trailer, SlideShowExtractor::replaceTextCap); + } + + private void printTextParagraphs(final List

paras, final Consumer consumer, String trailer, final Function converter) { + for (P p : paras) { + for (TextRun r : p) { + if (filter.test(r)) { + consumer.accept(converter.apply(r)); } } + if (!trailer.isEmpty() && filter.test(trailer)) { + consumer.accept(trailer); + } } - - return (footer.length() > 1) ? footer.toString() : ""; } - private void addSheetPlaceholderDatails(final Sheet sheet, final Placeholder placeholder, final StringBuilder sb) { - final PlaceholderDetails headerPD = sheet.getPlaceholderDetails(placeholder); - if (headerPD == null) { + private void printHeaderFooter(final Sheet sheet, final Consumer consumer, final Consumer footerCon) { + final Sheet m = (sheet instanceof Slide) ? sheet.getMasterSheet() : sheet; + addSheetPlaceholderDatails(sheet, Placeholder.HEADER, consumer); + addSheetPlaceholderDatails(sheet, Placeholder.FOOTER, footerCon); + + if (!masterByDefault) { return; } - final String headerStr = headerPD.getText(); - if (headerStr == null) { - return; + + // write header texts and determine footer text + for (Shape s : m) { + if (!(s instanceof TextShape)) { + continue; + } + final TextShape ts = (TextShape) s; + final PlaceholderDetails pd = ts.getPlaceholderDetails(); + if (pd == null || !pd.isVisible() || pd.getPlaceholder() == null) { + continue; + } + switch (pd.getPlaceholder()) { + case HEADER: + printTextParagraphs(ts.getTextParagraphs(), consumer); + break; + case FOOTER: + printTextParagraphs(ts.getTextParagraphs(), footerCon); + break; + case SLIDE_NUMBER: + printTextParagraphs(ts.getTextParagraphs(), footerCon, "\n", SlideShowExtractor::replaceSlideNumber); + break; + case DATETIME: + // currently not supported + default: + break; + } } - sb.append(headerStr); } - private void printShapeText(final Sheet sheet, final StringBuilder sb) { - final String footer = printHeaderReturnFooter(sheet, sb); - printShapeText((ShapeContainer)sheet, sb); - sb.append(footer); + + private void addSheetPlaceholderDatails(final Sheet sheet, final Placeholder placeholder, final Consumer consumer) { + final PlaceholderDetails headerPD = sheet.getPlaceholderDetails(placeholder); + final String headerStr = (headerPD != null) ? headerPD.getText() : null; + if (headerStr != null && filter.test(headerPD)) { + consumer.accept(headerStr); + } + } + + private void printShapeText(final Sheet sheet, final Consumer consumer) { + final List footer = new LinkedList<>(); + printHeaderFooter(sheet, consumer, footer::add); + printShapeText((ShapeContainer)sheet, consumer); + footer.forEach(consumer); } @SuppressWarnings("unchecked") - private void printShapeText(final ShapeContainer container, final StringBuilder sb) { + private void printShapeText(final ShapeContainer container, final Consumer consumer) { for (Shape shape : container) { if (shape instanceof TextShape) { - printShapeText((TextShape)shape, sb); + printTextParagraphs(((TextShape)shape).getTextParagraphs(), consumer); } else if (shape instanceof TableShape) { - printShapeText((TableShape)shape, sb); + printShapeText((TableShape)shape, consumer); } else if (shape instanceof ShapeContainer) { - printShapeText((ShapeContainer)shape, sb); + printShapeText((ShapeContainer)shape, consumer); } } } - private void printShapeText(final TextShape shape, final StringBuilder sb) { - final List

paraList = shape.getTextParagraphs(); - if (paraList.isEmpty()) { - sb.append('\n'); - return; - } - for (final P para : paraList) { - for (final TextRun tr : para) { - final String str = tr.getRawText().replace("\r", ""); - final String newStr; - switch (tr.getTextCap()) { - case ALL: - newStr = str.toUpperCase(LocaleUtil.getUserLocale()); - break; - case SMALL: - newStr = str.toLowerCase(LocaleUtil.getUserLocale()); - break; - default: - case NONE: - newStr = str; - break; - } - sb.append(newStr); - } - sb.append('\n'); - } - } - @SuppressWarnings("Duplicates") - private void printShapeText(final TableShape shape, final StringBuilder sb) { + private void printShapeText(final TableShape shape, final Consumer consumer) { final int nrows = shape.getNumberOfRows(); final int ncols = shape.getNumberOfColumns(); - for (int row = 0; row < nrows; row++){ + for (int row = 0; row < nrows; row++) { + String trailer = ""; for (int col = 0; col < ncols; col++){ TableCell cell = shape.getCell(row, col); //defensive null checks; don't know if they're necessary - if (cell != null){ - String txt = cell.getText(); - txt = (txt == null) ? "" : txt; - sb.append(txt); - if (col < ncols-1){ - sb.append('\t'); - } + if (cell != null) { + trailer = col < ncols-1 ? "\t" : "\n"; + printTextParagraphs(cell.getTextParagraphs(), consumer, trailer); } } - sb.append('\n'); + if (!trailer.equals("\n") && filter.test("\n")) { + consumer.accept("\n"); + } } } - private void printComments(final Slide slide, final StringBuilder sb) { - for (final Comment comment : slide.getComments()) { - sb.append(comment.getAuthor()); - sb.append(" - "); - sb.append(comment.getText()); - sb.append("\n"); - } + private void printComments(final Slide slide, final Consumer consumer) { + slide.getComments().stream().filter(filter).map(c -> c.getAuthor()+" - "+c.getText()).forEach(consumer); } - private void printNotes(final Slide slide, final StringBuilder sb) { + private void printNotes(final Slide slide, final Consumer consumer) { final Notes notes = slide.getNotes(); if (notes == null) { return; } - final String footer = printHeaderReturnFooter(notes, sb); - - printShapeText(notes, sb); - - sb.append(footer); + List footer = new LinkedList<>(); + printHeaderFooter(notes, consumer, footer::add); + printShapeText(notes, consumer); + footer.forEach(consumer); } public List> getOLEShapes() { @@ -342,4 +330,83 @@ public class SlideShowExtractor< } } } + + private static String replaceSlideNumber(TextRun tr) { + String raw = tr.getRawText(); + + if (!raw.contains(SLIDE_NUMBER_PH)) { + return raw; + } + + TextParagraph tp = tr.getParagraph(); + TextShape ps = (tp != null) ? tp.getParentShape() : null; + Sheet sh = (ps != null) ? ps.getSheet() : null; + String slideNr = (sh instanceof Slide) ? Integer.toString(((Slide)sh).getSlideNumber() + 1) : ""; + + return raw.replace(SLIDE_NUMBER_PH, slideNr); + } + + private static String replaceTextCap(TextRun tr) { + final TextParagraph tp = tr.getParagraph(); + final TextShape sh = (tp != null) ? tp.getParentShape() : null; + final Placeholder ph = (sh != null) ? sh.getPlaceholder() : null; + + // 0xB acts like cariage return in page titles and like blank in the others + final char sep = ( + ph == Placeholder.TITLE || + ph == Placeholder.CENTERED_TITLE || + ph == Placeholder.SUBTITLE + ) ? '\n' : ' '; + + // PowerPoint seems to store files with \r as the line break + // The messes things up on everything but a Mac, so translate them to \n + String txt = tr.getRawText(); + txt = txt.replace('\r', '\n'); + txt = txt.replace((char) 0x0B, sep); + + switch (tr.getTextCap()) { + case ALL: + txt = txt.toUpperCase(LocaleUtil.getUserLocale()); + case SMALL: + txt = txt.toLowerCase(LocaleUtil.getUserLocale()); + } + + return txt; + } + + /** + * Extract the used codepoints for font embedding / subsetting + * @param typeface the typeface/font family of the textruns to examine + * @param italic use {@code true} for italic TextRuns, {@code false} for non-italic ones and + * {@code null} if it doesn't matter + * @param bold use {@code true} for bold TextRuns, {@code false} for non-bold ones and + * {@code null} if it doesn't matter + * @return a bitset with the marked/used codepoints + */ + public BitSet getCodepoints(String typeface, Boolean italic, Boolean bold) { + final BitSet glyphs = new BitSet(); + + Predicate filterOld = filter; + try { + filter = o -> filterFonts(o, typeface, italic, bold); + slideshow.getSlides().forEach(slide -> + getText(slide, s -> s.codePoints().forEach(glyphs::set)) + ); + } finally { + filter = filterOld; + } + + return glyphs; + } + + private static boolean filterFonts(Object o, String typeface, Boolean italic, Boolean bold) { + if (!(o instanceof TextRun)) { + return false; + } + TextRun tr = (TextRun)o; + return + typeface.equalsIgnoreCase(tr.getFontFamily()) && + (italic == null || tr.isItalic() == italic) && + (bold == null || tr.isBold() == bold); + } } diff --git a/src/java/org/apache/poi/sl/usermodel/FontCollection.java b/src/java/org/apache/poi/sl/usermodel/FontCollection.java deleted file mode 100644 index 61278f4618..0000000000 --- a/src/java/org/apache/poi/sl/usermodel/FontCollection.java +++ /dev/null @@ -1,22 +0,0 @@ -/* ==================================================================== - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -==================================================================== */ - -package org.apache.poi.sl.usermodel; - -public interface FontCollection { - -} diff --git a/src/java/org/apache/poi/sl/usermodel/Resources.java b/src/java/org/apache/poi/sl/usermodel/Resources.java deleted file mode 100644 index 96170e50bd..0000000000 --- a/src/java/org/apache/poi/sl/usermodel/Resources.java +++ /dev/null @@ -1,29 +0,0 @@ -/* ==================================================================== - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -==================================================================== */ - -package org.apache.poi.sl.usermodel; - -/** - * Common SlideShow resources, such as fonts, pictures - * and multimedia data - */ -public interface Resources { - public FontCollection getFontCollection(); - - public PictureData[] getPictureData(); - public int addPictureData(PictureData pict); -} diff --git a/src/java/org/apache/poi/sl/usermodel/Slide.java b/src/java/org/apache/poi/sl/usermodel/Slide.java index 91b80f107e..7c0d566138 100644 --- a/src/java/org/apache/poi/sl/usermodel/Slide.java +++ b/src/java/org/apache/poi/sl/usermodel/Slide.java @@ -19,6 +19,7 @@ package org.apache.poi.sl.usermodel; import java.util.List; +@SuppressWarnings("unused") public interface Slide< S extends Shape, P extends TextParagraph @@ -82,7 +83,7 @@ public interface Slide< * * @since POI 4.0.0 */ - MasterSheet getSlideLayout(); + MasterSheet getSlideLayout(); /** * @return the slide name, defaults to "Slide[slideNumber]" diff --git a/src/java/org/apache/poi/sl/usermodel/SlideShow.java b/src/java/org/apache/poi/sl/usermodel/SlideShow.java index 175ad2b00e..751379de92 100644 --- a/src/java/org/apache/poi/sl/usermodel/SlideShow.java +++ b/src/java/org/apache/poi/sl/usermodel/SlideShow.java @@ -25,6 +25,7 @@ import java.io.InputStream; import java.io.OutputStream; import java.util.List; +import org.apache.poi.common.usermodel.fonts.FontInfo; import org.apache.poi.extractor.POITextExtractor; import org.apache.poi.sl.usermodel.PictureData.PictureType; @@ -44,8 +45,6 @@ public interface SlideShow< */ List> getSlideMasters(); - Resources getResources(); - /** * Returns the current page size * @@ -135,4 +134,30 @@ public interface SlideShow< * @since POI 4.0.0 */ Object getPersistDocument(); + + /** + * Add an EOT font to the slideshow. + * An EOT or MTX font is a transformed True-Type (.ttf) or Open-Type (.otf) font. + * To transform a True-Type font use the sfntly library (see "see also" below)

+ * + * (Older?) Powerpoint versions handle embedded fonts by converting them to .ttf files + * and put them into the Windows fonts directory. If the user is not allowed to install + * fonts, the slideshow can't be opened. While the slideshow is opened, its possible + * to copy the extracted .ttfs from the fonts directory. When the slideshow is closed, + * they will be removed. + * + * @param fontData the EOT font as stream + * @return the font info object containing the new font data + * @throws IOException if the fontData can't be saved or if the fontData is no EOT font + * + * @see EOT specification + * @see googles sfntly library + * @see Example on how to subset and embed fonts + */ + FontInfo addFont(InputStream fontData) throws IOException; + + /** + * @return a list of registered fonts + */ + List getFonts(); } diff --git a/src/java/org/apache/poi/sl/usermodel/TextRun.java b/src/java/org/apache/poi/sl/usermodel/TextRun.java index 394166071c..7dfd4933d8 100644 --- a/src/java/org/apache/poi/sl/usermodel/TextRun.java +++ b/src/java/org/apache/poi/sl/usermodel/TextRun.java @@ -27,6 +27,7 @@ import org.apache.poi.util.Internal; /** * Some text. */ +@SuppressWarnings("unused") public interface TextRun { /** * Type of text capitals @@ -243,4 +244,11 @@ public interface TextRun { */ @Internal FieldType getFieldType(); + + /** + * @return the paragraph which contains this TextRun + * + * @since POI 4.1.0 + */ + TextParagraph getParagraph(); } -- cgit v1.2.3