diff options
-rw-r--r-- | hyph/hyphenation.dtd | 68 | ||||
-rw-r--r-- | hyph/readme | 43 |
2 files changed, 111 insertions, 0 deletions
diff --git a/hyph/hyphenation.dtd b/hyph/hyphenation.dtd new file mode 100644 index 000000000..d6094694a --- /dev/null +++ b/hyph/hyphenation.dtd @@ -0,0 +1,68 @@ +<?xml version="1.0" encoding="US-ASCII"?> +<!-- + Copyright 1999-2004 The Apache Software Foundation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> +<!-- $Id$ --> + +<!ELEMENT hyphenation-info (hyphen-char?, hyphen-min?, + classes, exceptions?, patterns)> + +<!-- Hyphen character to be used in the exception list as shortcut for + <hyphen pre-break="-"/>. Defaults to '-' +--> +<!ELEMENT hyphen-char EMPTY> +<!ATTLIST hyphen-char value CDATA #REQUIRED> + +<!-- Default minimun length in characters of hyphenated word fragments + before and after the line break. For some languages this is not + only for aesthetic purposes, wrong hyphens may be generated if this + is not accounted for. +--> +<!ELEMENT hyphen-min EMPTY> +<!ATTLIST hyphen-min before CDATA #REQUIRED> +<!ATTLIST hyphen-min after CDATA #REQUIRED> + +<!-- Character equivalent classes: space separated list of character groups, all + characters in a group are to be treated equivalent as far as + the hyphenation algorithm is concerned. The first character in a group + is the group's equivalent character. Patterns should only contain + first characters. It also defines word characters, i.e. a word that + contains characters not present in any of the classes is not hyphenated. +--> +<!ELEMENT classes (#PCDATA)> + +<!-- Hyphenation exceptions: space separated list of hyphenated words. + A hyphen is indicated by the hyphen tag, but you can use the + hyphen-char defined previously as shortcut. This is in cases + when the algorithm procedure finds wrong hyphens or you want + to provide your own hyphenation for some words. +--> +<!ELEMENT exceptions (#PCDATA|hyphen)* > + +<!-- The hyphenation patterns, space separated. A pattern is made of 'equivalent' + characters as described before, between any two word characters a digit + in the range 0 to 9 may be specified. The absence of a digit is equivalent + to zero. The '.' character is reserved to indicate begining or ending + of words. --> +<!ELEMENT patterns (#PCDATA)> + +<!-- A "full hyphen" equivalent to TeX's \discretionary + with pre-break, post-break and no-break attributes. + To be used in the exceptions list, the hyphen character is not + automatically added --> +<!ELEMENT hyphen EMPTY> +<!ATTLIST hyphen pre CDATA #IMPLIED> +<!ATTLIST hyphen no CDATA #IMPLIED> +<!ATTLIST hyphen post CDATA #IMPLIED> diff --git a/hyph/readme b/hyph/readme new file mode 100644 index 000000000..5425ce20e --- /dev/null +++ b/hyph/readme @@ -0,0 +1,43 @@ +Hyphenation + +Fop comes with some hyphenation pattern. If you need a hyphenation +pattern which isn't included in the distribution, do the following: + +1. get the TeX hyphenation pattern file and turn it into an xml file + which conforms to the hyphenation.dtd in the sub directory /hyph + +2. name this new file following this schema: languageCode_countryCode.xml. + If you don't need a country code, leave it away, p.e. the file name + for an American english hyphenation pattern would look like this: + en_US.xml. + For an Italian file: it.xml. + Language and country codes must be the same as in xsl:fo, that is follow + ISO 639 <http://www.ics.uci.edu/pub/ietf/http/related/iso639.txt> and + ISO 3166 <http://www.ics.uci.edu/pub/ietf/http/related/iso3166.txt> + respectively. + NOTE: The ISO 639/ISO 3166 convention is that language names are + written in lower case, while country codes are written in upper case. + +3. If you have build your new hyphenation pattern file successfully there are + two ways to make it accessible to Fop. + a) Put this new file into the directory /hyph and rebuild Fop. The file will + be picked up and added to the fop.jar. + b) Put the file into a directory of your choice and specify this directory + in the userconfig.xml in the entry <hyphenation-dir> + +4. If the license of your hyphenation pattern file does allow it, please send + it to the list fop-dev, so it can be made part of the Fop distribution. + +the following hyphenation patterns are part of the Fop distribution + +da Danish +de German (traditional) +de_DR German (new spelling) +en American English +en_GB British English +en_US American English +es Spanish +fi Finnish +fr French +it Italian +nl Dutch (new official Dutch spelling) |