aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--hyph/hyphenation.dtd68
-rw-r--r--hyph/readme43
2 files changed, 111 insertions, 0 deletions
diff --git a/hyph/hyphenation.dtd b/hyph/hyphenation.dtd
new file mode 100644
index 000000000..d6094694a
--- /dev/null
+++ b/hyph/hyphenation.dtd
@@ -0,0 +1,68 @@
+<?xml version="1.0" encoding="US-ASCII"?>
+<!--
+ Copyright 1999-2004 The Apache Software Foundation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<!-- $Id$ -->
+
+<!ELEMENT hyphenation-info (hyphen-char?, hyphen-min?,
+ classes, exceptions?, patterns)>
+
+<!-- Hyphen character to be used in the exception list as shortcut for
+ <hyphen pre-break="-"/>. Defaults to '-'
+-->
+<!ELEMENT hyphen-char EMPTY>
+<!ATTLIST hyphen-char value CDATA #REQUIRED>
+
+<!-- Default minimun length in characters of hyphenated word fragments
+ before and after the line break. For some languages this is not
+ only for aesthetic purposes, wrong hyphens may be generated if this
+ is not accounted for.
+-->
+<!ELEMENT hyphen-min EMPTY>
+<!ATTLIST hyphen-min before CDATA #REQUIRED>
+<!ATTLIST hyphen-min after CDATA #REQUIRED>
+
+<!-- Character equivalent classes: space separated list of character groups, all
+ characters in a group are to be treated equivalent as far as
+ the hyphenation algorithm is concerned. The first character in a group
+ is the group's equivalent character. Patterns should only contain
+ first characters. It also defines word characters, i.e. a word that
+ contains characters not present in any of the classes is not hyphenated.
+-->
+<!ELEMENT classes (#PCDATA)>
+
+<!-- Hyphenation exceptions: space separated list of hyphenated words.
+ A hyphen is indicated by the hyphen tag, but you can use the
+ hyphen-char defined previously as shortcut. This is in cases
+ when the algorithm procedure finds wrong hyphens or you want
+ to provide your own hyphenation for some words.
+-->
+<!ELEMENT exceptions (#PCDATA|hyphen)* >
+
+<!-- The hyphenation patterns, space separated. A pattern is made of 'equivalent'
+ characters as described before, between any two word characters a digit
+ in the range 0 to 9 may be specified. The absence of a digit is equivalent
+ to zero. The '.' character is reserved to indicate begining or ending
+ of words. -->
+<!ELEMENT patterns (#PCDATA)>
+
+<!-- A "full hyphen" equivalent to TeX's \discretionary
+ with pre-break, post-break and no-break attributes.
+ To be used in the exceptions list, the hyphen character is not
+ automatically added -->
+<!ELEMENT hyphen EMPTY>
+<!ATTLIST hyphen pre CDATA #IMPLIED>
+<!ATTLIST hyphen no CDATA #IMPLIED>
+<!ATTLIST hyphen post CDATA #IMPLIED>
diff --git a/hyph/readme b/hyph/readme
new file mode 100644
index 000000000..5425ce20e
--- /dev/null
+++ b/hyph/readme
@@ -0,0 +1,43 @@
+Hyphenation
+
+Fop comes with some hyphenation pattern. If you need a hyphenation
+pattern which isn't included in the distribution, do the following:
+
+1. get the TeX hyphenation pattern file and turn it into an xml file
+ which conforms to the hyphenation.dtd in the sub directory /hyph
+
+2. name this new file following this schema: languageCode_countryCode.xml.
+ If you don't need a country code, leave it away, p.e. the file name
+ for an American english hyphenation pattern would look like this:
+ en_US.xml.
+ For an Italian file: it.xml.
+ Language and country codes must be the same as in xsl:fo, that is follow
+ ISO 639 <http://www.ics.uci.edu/pub/ietf/http/related/iso639.txt> and
+ ISO 3166 <http://www.ics.uci.edu/pub/ietf/http/related/iso3166.txt>
+ respectively.
+ NOTE: The ISO 639/ISO 3166 convention is that language names are
+ written in lower case, while country codes are written in upper case.
+
+3. If you have build your new hyphenation pattern file successfully there are
+ two ways to make it accessible to Fop.
+ a) Put this new file into the directory /hyph and rebuild Fop. The file will
+ be picked up and added to the fop.jar.
+ b) Put the file into a directory of your choice and specify this directory
+ in the userconfig.xml in the entry <hyphenation-dir>
+
+4. If the license of your hyphenation pattern file does allow it, please send
+ it to the list fop-dev, so it can be made part of the Fop distribution.
+
+the following hyphenation patterns are part of the Fop distribution
+
+da Danish
+de German (traditional)
+de_DR German (new spelling)
+en American English
+en_GB British English
+en_US American English
+es Spanish
+fi Finnish
+fr French
+it Italian
+nl Dutch (new official Dutch spelling)