You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

single_byte.go 45KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882
  1. package chardet
  2. // Recognizer for single byte charset family
  3. type recognizerSingleByte struct {
  4. charset string
  5. hasC1ByteCharset string
  6. language string
  7. charMap *[256]byte
  8. ngram *[64]uint32
  9. }
  10. func (r *recognizerSingleByte) Match(input *recognizerInput) recognizerOutput {
  11. var charset string = r.charset
  12. if input.hasC1Bytes && len(r.hasC1ByteCharset) > 0 {
  13. charset = r.hasC1ByteCharset
  14. }
  15. return recognizerOutput{
  16. Charset: charset,
  17. Language: r.language,
  18. Confidence: r.parseNgram(input.input),
  19. }
  20. }
  21. type ngramState struct {
  22. ngram uint32
  23. ignoreSpace bool
  24. ngramCount, ngramHit uint32
  25. table *[64]uint32
  26. }
  27. func newNgramState(table *[64]uint32) *ngramState {
  28. return &ngramState{
  29. ngram: 0,
  30. ignoreSpace: false,
  31. ngramCount: 0,
  32. ngramHit: 0,
  33. table: table,
  34. }
  35. }
  36. func (s *ngramState) AddByte(b byte) {
  37. const ngramMask = 0xFFFFFF
  38. if !(b == 0x20 && s.ignoreSpace) {
  39. s.ngram = ((s.ngram << 8) | uint32(b)) & ngramMask
  40. s.ignoreSpace = (s.ngram == 0x20)
  41. s.ngramCount++
  42. if s.lookup() {
  43. s.ngramHit++
  44. }
  45. }
  46. s.ignoreSpace = (b == 0x20)
  47. }
  48. func (s *ngramState) HitRate() float32 {
  49. if s.ngramCount == 0 {
  50. return 0
  51. }
  52. return float32(s.ngramHit) / float32(s.ngramCount)
  53. }
  54. func (s *ngramState) lookup() bool {
  55. var index int
  56. if s.table[index+32] <= s.ngram {
  57. index += 32
  58. }
  59. if s.table[index+16] <= s.ngram {
  60. index += 16
  61. }
  62. if s.table[index+8] <= s.ngram {
  63. index += 8
  64. }
  65. if s.table[index+4] <= s.ngram {
  66. index += 4
  67. }
  68. if s.table[index+2] <= s.ngram {
  69. index += 2
  70. }
  71. if s.table[index+1] <= s.ngram {
  72. index += 1
  73. }
  74. if s.table[index] > s.ngram {
  75. index -= 1
  76. }
  77. if index < 0 || s.table[index] != s.ngram {
  78. return false
  79. }
  80. return true
  81. }
  82. func (r *recognizerSingleByte) parseNgram(input []byte) int {
  83. state := newNgramState(r.ngram)
  84. for _, inChar := range input {
  85. c := r.charMap[inChar]
  86. if c != 0 {
  87. state.AddByte(c)
  88. }
  89. }
  90. state.AddByte(0x20)
  91. rate := state.HitRate()
  92. if rate > 0.33 {
  93. return 98
  94. }
  95. return int(rate * 300)
  96. }
  97. var charMap_8859_1 = [256]byte{
  98. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  99. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  100. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  101. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  102. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
  103. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  104. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  105. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  106. 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
  107. 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
  108. 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
  109. 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
  110. 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
  111. 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
  112. 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
  113. 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
  114. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  115. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  116. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  117. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  118. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  119. 0x20, 0x20, 0xAA, 0x20, 0x20, 0x20, 0x20, 0x20,
  120. 0x20, 0x20, 0x20, 0x20, 0x20, 0xB5, 0x20, 0x20,
  121. 0x20, 0x20, 0xBA, 0x20, 0x20, 0x20, 0x20, 0x20,
  122. 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
  123. 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
  124. 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20,
  125. 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xDF,
  126. 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
  127. 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
  128. 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20,
  129. 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF,
  130. }
  131. var ngrams_8859_1_en = [64]uint32{
  132. 0x206120, 0x20616E, 0x206265, 0x20636F, 0x20666F, 0x206861, 0x206865, 0x20696E, 0x206D61, 0x206F66, 0x207072, 0x207265, 0x207361, 0x207374, 0x207468, 0x20746F,
  133. 0x207768, 0x616964, 0x616C20, 0x616E20, 0x616E64, 0x617320, 0x617420, 0x617465, 0x617469, 0x642061, 0x642074, 0x652061, 0x652073, 0x652074, 0x656420, 0x656E74,
  134. 0x657220, 0x657320, 0x666F72, 0x686174, 0x686520, 0x686572, 0x696420, 0x696E20, 0x696E67, 0x696F6E, 0x697320, 0x6E2061, 0x6E2074, 0x6E6420, 0x6E6720, 0x6E7420,
  135. 0x6F6620, 0x6F6E20, 0x6F7220, 0x726520, 0x727320, 0x732061, 0x732074, 0x736169, 0x737420, 0x742074, 0x746572, 0x746861, 0x746865, 0x74696F, 0x746F20, 0x747320,
  136. }
  137. var ngrams_8859_1_da = [64]uint32{
  138. 0x206166, 0x206174, 0x206465, 0x20656E, 0x206572, 0x20666F, 0x206861, 0x206920, 0x206D65, 0x206F67, 0x2070E5, 0x207369, 0x207374, 0x207469, 0x207669, 0x616620,
  139. 0x616E20, 0x616E64, 0x617220, 0x617420, 0x646520, 0x64656E, 0x646572, 0x646574, 0x652073, 0x656420, 0x656465, 0x656E20, 0x656E64, 0x657220, 0x657265, 0x657320,
  140. 0x657420, 0x666F72, 0x676520, 0x67656E, 0x676572, 0x696765, 0x696C20, 0x696E67, 0x6B6520, 0x6B6B65, 0x6C6572, 0x6C6967, 0x6C6C65, 0x6D6564, 0x6E6465, 0x6E6520,
  141. 0x6E6720, 0x6E6765, 0x6F6720, 0x6F6D20, 0x6F7220, 0x70E520, 0x722064, 0x722065, 0x722073, 0x726520, 0x737465, 0x742073, 0x746520, 0x746572, 0x74696C, 0x766572,
  142. }
  143. var ngrams_8859_1_de = [64]uint32{
  144. 0x20616E, 0x206175, 0x206265, 0x206461, 0x206465, 0x206469, 0x206569, 0x206765, 0x206861, 0x20696E, 0x206D69, 0x207363, 0x207365, 0x20756E, 0x207665, 0x20766F,
  145. 0x207765, 0x207A75, 0x626572, 0x636820, 0x636865, 0x636874, 0x646173, 0x64656E, 0x646572, 0x646965, 0x652064, 0x652073, 0x65696E, 0x656974, 0x656E20, 0x657220,
  146. 0x657320, 0x67656E, 0x68656E, 0x687420, 0x696368, 0x696520, 0x696E20, 0x696E65, 0x697420, 0x6C6963, 0x6C6C65, 0x6E2061, 0x6E2064, 0x6E2073, 0x6E6420, 0x6E6465,
  147. 0x6E6520, 0x6E6720, 0x6E6765, 0x6E7465, 0x722064, 0x726465, 0x726569, 0x736368, 0x737465, 0x742064, 0x746520, 0x74656E, 0x746572, 0x756E64, 0x756E67, 0x766572,
  148. }
  149. var ngrams_8859_1_es = [64]uint32{
  150. 0x206120, 0x206361, 0x20636F, 0x206465, 0x20656C, 0x20656E, 0x206573, 0x20696E, 0x206C61, 0x206C6F, 0x207061, 0x20706F, 0x207072, 0x207175, 0x207265, 0x207365,
  151. 0x20756E, 0x207920, 0x612063, 0x612064, 0x612065, 0x61206C, 0x612070, 0x616369, 0x61646F, 0x616C20, 0x617220, 0x617320, 0x6369F3, 0x636F6E, 0x646520, 0x64656C,
  152. 0x646F20, 0x652064, 0x652065, 0x65206C, 0x656C20, 0x656E20, 0x656E74, 0x657320, 0x657374, 0x69656E, 0x69F36E, 0x6C6120, 0x6C6F73, 0x6E2065, 0x6E7465, 0x6F2064,
  153. 0x6F2065, 0x6F6E20, 0x6F7220, 0x6F7320, 0x706172, 0x717565, 0x726120, 0x726573, 0x732064, 0x732065, 0x732070, 0x736520, 0x746520, 0x746F20, 0x756520, 0xF36E20,
  154. }
  155. var ngrams_8859_1_fr = [64]uint32{
  156. 0x206175, 0x20636F, 0x206461, 0x206465, 0x206475, 0x20656E, 0x206574, 0x206C61, 0x206C65, 0x207061, 0x20706F, 0x207072, 0x207175, 0x207365, 0x20736F, 0x20756E,
  157. 0x20E020, 0x616E74, 0x617469, 0x636520, 0x636F6E, 0x646520, 0x646573, 0x647520, 0x652061, 0x652063, 0x652064, 0x652065, 0x65206C, 0x652070, 0x652073, 0x656E20,
  158. 0x656E74, 0x657220, 0x657320, 0x657420, 0x657572, 0x696F6E, 0x697320, 0x697420, 0x6C6120, 0x6C6520, 0x6C6573, 0x6D656E, 0x6E2064, 0x6E6520, 0x6E7320, 0x6E7420,
  159. 0x6F6E20, 0x6F6E74, 0x6F7572, 0x717565, 0x72206C, 0x726520, 0x732061, 0x732064, 0x732065, 0x73206C, 0x732070, 0x742064, 0x746520, 0x74696F, 0x756520, 0x757220,
  160. }
  161. var ngrams_8859_1_it = [64]uint32{
  162. 0x20616C, 0x206368, 0x20636F, 0x206465, 0x206469, 0x206520, 0x20696C, 0x20696E, 0x206C61, 0x207065, 0x207072, 0x20756E, 0x612063, 0x612064, 0x612070, 0x612073,
  163. 0x61746F, 0x636865, 0x636F6E, 0x64656C, 0x646920, 0x652061, 0x652063, 0x652064, 0x652069, 0x65206C, 0x652070, 0x652073, 0x656C20, 0x656C6C, 0x656E74, 0x657220,
  164. 0x686520, 0x692061, 0x692063, 0x692064, 0x692073, 0x696120, 0x696C20, 0x696E20, 0x696F6E, 0x6C6120, 0x6C6520, 0x6C6920, 0x6C6C61, 0x6E6520, 0x6E6920, 0x6E6F20,
  165. 0x6E7465, 0x6F2061, 0x6F2064, 0x6F2069, 0x6F2073, 0x6F6E20, 0x6F6E65, 0x706572, 0x726120, 0x726520, 0x736920, 0x746120, 0x746520, 0x746920, 0x746F20, 0x7A696F,
  166. }
  167. var ngrams_8859_1_nl = [64]uint32{
  168. 0x20616C, 0x206265, 0x206461, 0x206465, 0x206469, 0x206565, 0x20656E, 0x206765, 0x206865, 0x20696E, 0x206D61, 0x206D65, 0x206F70, 0x207465, 0x207661, 0x207665,
  169. 0x20766F, 0x207765, 0x207A69, 0x61616E, 0x616172, 0x616E20, 0x616E64, 0x617220, 0x617420, 0x636874, 0x646520, 0x64656E, 0x646572, 0x652062, 0x652076, 0x65656E,
  170. 0x656572, 0x656E20, 0x657220, 0x657273, 0x657420, 0x67656E, 0x686574, 0x696520, 0x696E20, 0x696E67, 0x697320, 0x6E2062, 0x6E2064, 0x6E2065, 0x6E2068, 0x6E206F,
  171. 0x6E2076, 0x6E6465, 0x6E6720, 0x6F6E64, 0x6F6F72, 0x6F7020, 0x6F7220, 0x736368, 0x737465, 0x742064, 0x746520, 0x74656E, 0x746572, 0x76616E, 0x766572, 0x766F6F,
  172. }
  173. var ngrams_8859_1_no = [64]uint32{
  174. 0x206174, 0x206176, 0x206465, 0x20656E, 0x206572, 0x20666F, 0x206861, 0x206920, 0x206D65, 0x206F67, 0x2070E5, 0x207365, 0x20736B, 0x20736F, 0x207374, 0x207469,
  175. 0x207669, 0x20E520, 0x616E64, 0x617220, 0x617420, 0x646520, 0x64656E, 0x646574, 0x652073, 0x656420, 0x656E20, 0x656E65, 0x657220, 0x657265, 0x657420, 0x657474,
  176. 0x666F72, 0x67656E, 0x696B6B, 0x696C20, 0x696E67, 0x6B6520, 0x6B6B65, 0x6C6520, 0x6C6C65, 0x6D6564, 0x6D656E, 0x6E2073, 0x6E6520, 0x6E6720, 0x6E6765, 0x6E6E65,
  177. 0x6F6720, 0x6F6D20, 0x6F7220, 0x70E520, 0x722073, 0x726520, 0x736F6D, 0x737465, 0x742073, 0x746520, 0x74656E, 0x746572, 0x74696C, 0x747420, 0x747465, 0x766572,
  178. }
  179. var ngrams_8859_1_pt = [64]uint32{
  180. 0x206120, 0x20636F, 0x206461, 0x206465, 0x20646F, 0x206520, 0x206573, 0x206D61, 0x206E6F, 0x206F20, 0x207061, 0x20706F, 0x207072, 0x207175, 0x207265, 0x207365,
  181. 0x20756D, 0x612061, 0x612063, 0x612064, 0x612070, 0x616465, 0x61646F, 0x616C20, 0x617220, 0x617261, 0x617320, 0x636F6D, 0x636F6E, 0x646120, 0x646520, 0x646F20,
  182. 0x646F73, 0x652061, 0x652064, 0x656D20, 0x656E74, 0x657320, 0x657374, 0x696120, 0x696361, 0x6D656E, 0x6E7465, 0x6E746F, 0x6F2061, 0x6F2063, 0x6F2064, 0x6F2065,
  183. 0x6F2070, 0x6F7320, 0x706172, 0x717565, 0x726120, 0x726573, 0x732061, 0x732064, 0x732065, 0x732070, 0x737461, 0x746520, 0x746F20, 0x756520, 0xE36F20, 0xE7E36F,
  184. }
  185. var ngrams_8859_1_sv = [64]uint32{
  186. 0x206174, 0x206176, 0x206465, 0x20656E, 0x2066F6, 0x206861, 0x206920, 0x20696E, 0x206B6F, 0x206D65, 0x206F63, 0x2070E5, 0x20736B, 0x20736F, 0x207374, 0x207469,
  187. 0x207661, 0x207669, 0x20E472, 0x616465, 0x616E20, 0x616E64, 0x617220, 0x617474, 0x636820, 0x646520, 0x64656E, 0x646572, 0x646574, 0x656420, 0x656E20, 0x657220,
  188. 0x657420, 0x66F672, 0x67656E, 0x696C6C, 0x696E67, 0x6B6120, 0x6C6C20, 0x6D6564, 0x6E2073, 0x6E6120, 0x6E6465, 0x6E6720, 0x6E6765, 0x6E696E, 0x6F6368, 0x6F6D20,
  189. 0x6F6E20, 0x70E520, 0x722061, 0x722073, 0x726120, 0x736B61, 0x736F6D, 0x742073, 0x746120, 0x746520, 0x746572, 0x74696C, 0x747420, 0x766172, 0xE47220, 0xF67220,
  190. }
  191. func newRecognizer_8859_1(language string, ngram *[64]uint32) *recognizerSingleByte {
  192. return &recognizerSingleByte{
  193. charset: "ISO-8859-1",
  194. hasC1ByteCharset: "windows-1252",
  195. language: language,
  196. charMap: &charMap_8859_1,
  197. ngram: ngram,
  198. }
  199. }
  200. func newRecognizer_8859_1_en() *recognizerSingleByte {
  201. return newRecognizer_8859_1("en", &ngrams_8859_1_en)
  202. }
  203. func newRecognizer_8859_1_da() *recognizerSingleByte {
  204. return newRecognizer_8859_1("da", &ngrams_8859_1_da)
  205. }
  206. func newRecognizer_8859_1_de() *recognizerSingleByte {
  207. return newRecognizer_8859_1("de", &ngrams_8859_1_de)
  208. }
  209. func newRecognizer_8859_1_es() *recognizerSingleByte {
  210. return newRecognizer_8859_1("es", &ngrams_8859_1_es)
  211. }
  212. func newRecognizer_8859_1_fr() *recognizerSingleByte {
  213. return newRecognizer_8859_1("fr", &ngrams_8859_1_fr)
  214. }
  215. func newRecognizer_8859_1_it() *recognizerSingleByte {
  216. return newRecognizer_8859_1("it", &ngrams_8859_1_it)
  217. }
  218. func newRecognizer_8859_1_nl() *recognizerSingleByte {
  219. return newRecognizer_8859_1("nl", &ngrams_8859_1_nl)
  220. }
  221. func newRecognizer_8859_1_no() *recognizerSingleByte {
  222. return newRecognizer_8859_1("no", &ngrams_8859_1_no)
  223. }
  224. func newRecognizer_8859_1_pt() *recognizerSingleByte {
  225. return newRecognizer_8859_1("pt", &ngrams_8859_1_pt)
  226. }
  227. func newRecognizer_8859_1_sv() *recognizerSingleByte {
  228. return newRecognizer_8859_1("sv", &ngrams_8859_1_sv)
  229. }
  230. var charMap_8859_2 = [256]byte{
  231. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  232. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  233. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  234. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  235. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
  236. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  237. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  238. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  239. 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
  240. 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
  241. 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
  242. 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
  243. 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
  244. 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
  245. 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
  246. 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
  247. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  248. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  249. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  250. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  251. 0x20, 0xB1, 0x20, 0xB3, 0x20, 0xB5, 0xB6, 0x20,
  252. 0x20, 0xB9, 0xBA, 0xBB, 0xBC, 0x20, 0xBE, 0xBF,
  253. 0x20, 0xB1, 0x20, 0xB3, 0x20, 0xB5, 0xB6, 0xB7,
  254. 0x20, 0xB9, 0xBA, 0xBB, 0xBC, 0x20, 0xBE, 0xBF,
  255. 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
  256. 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
  257. 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20,
  258. 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xDF,
  259. 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
  260. 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
  261. 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20,
  262. 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0x20,
  263. }
  264. var ngrams_8859_2_cs = [64]uint32{
  265. 0x206120, 0x206279, 0x20646F, 0x206A65, 0x206E61, 0x206E65, 0x206F20, 0x206F64, 0x20706F, 0x207072, 0x2070F8, 0x20726F, 0x207365, 0x20736F, 0x207374, 0x20746F,
  266. 0x207620, 0x207679, 0x207A61, 0x612070, 0x636520, 0x636820, 0x652070, 0x652073, 0x652076, 0x656D20, 0x656EED, 0x686F20, 0x686F64, 0x697374, 0x6A6520, 0x6B7465,
  267. 0x6C6520, 0x6C6920, 0x6E6120, 0x6EE920, 0x6EEC20, 0x6EED20, 0x6F2070, 0x6F646E, 0x6F6A69, 0x6F7374, 0x6F7520, 0x6F7661, 0x706F64, 0x706F6A, 0x70726F, 0x70F865,
  268. 0x736520, 0x736F75, 0x737461, 0x737469, 0x73746E, 0x746572, 0x746EED, 0x746F20, 0x752070, 0xBE6520, 0xE16EED, 0xE9686F, 0xED2070, 0xED2073, 0xED6D20, 0xF86564,
  269. }
  270. var ngrams_8859_2_hu = [64]uint32{
  271. 0x206120, 0x20617A, 0x206265, 0x206567, 0x20656C, 0x206665, 0x206861, 0x20686F, 0x206973, 0x206B65, 0x206B69, 0x206BF6, 0x206C65, 0x206D61, 0x206D65, 0x206D69,
  272. 0x206E65, 0x20737A, 0x207465, 0x20E973, 0x612061, 0x61206B, 0x61206D, 0x612073, 0x616B20, 0x616E20, 0x617A20, 0x62616E, 0x62656E, 0x656779, 0x656B20, 0x656C20,
  273. 0x656C65, 0x656D20, 0x656E20, 0x657265, 0x657420, 0x657465, 0x657474, 0x677920, 0x686F67, 0x696E74, 0x697320, 0x6B2061, 0x6BF67A, 0x6D6567, 0x6D696E, 0x6E2061,
  274. 0x6E616B, 0x6E656B, 0x6E656D, 0x6E7420, 0x6F6779, 0x732061, 0x737A65, 0x737A74, 0x737AE1, 0x73E967, 0x742061, 0x747420, 0x74E173, 0x7A6572, 0xE16E20, 0xE97320,
  275. }
  276. var ngrams_8859_2_pl = [64]uint32{
  277. 0x20637A, 0x20646F, 0x206920, 0x206A65, 0x206B6F, 0x206D61, 0x206D69, 0x206E61, 0x206E69, 0x206F64, 0x20706F, 0x207072, 0x207369, 0x207720, 0x207769, 0x207779,
  278. 0x207A20, 0x207A61, 0x612070, 0x612077, 0x616E69, 0x636820, 0x637A65, 0x637A79, 0x646F20, 0x647A69, 0x652070, 0x652073, 0x652077, 0x65207A, 0x65676F, 0x656A20,
  279. 0x656D20, 0x656E69, 0x676F20, 0x696120, 0x696520, 0x69656A, 0x6B6120, 0x6B6920, 0x6B6965, 0x6D6965, 0x6E6120, 0x6E6961, 0x6E6965, 0x6F2070, 0x6F7761, 0x6F7769,
  280. 0x706F6C, 0x707261, 0x70726F, 0x70727A, 0x727A65, 0x727A79, 0x7369EA, 0x736B69, 0x737461, 0x776965, 0x796368, 0x796D20, 0x7A6520, 0x7A6965, 0x7A7920, 0xF37720,
  281. }
  282. var ngrams_8859_2_ro = [64]uint32{
  283. 0x206120, 0x206163, 0x206361, 0x206365, 0x20636F, 0x206375, 0x206465, 0x206469, 0x206C61, 0x206D61, 0x207065, 0x207072, 0x207365, 0x2073E3, 0x20756E, 0x20BA69,
  284. 0x20EE6E, 0x612063, 0x612064, 0x617265, 0x617420, 0x617465, 0x617520, 0x636172, 0x636F6E, 0x637520, 0x63E320, 0x646520, 0x652061, 0x652063, 0x652064, 0x652070,
  285. 0x652073, 0x656120, 0x656920, 0x656C65, 0x656E74, 0x657374, 0x692061, 0x692063, 0x692064, 0x692070, 0x696520, 0x696920, 0x696E20, 0x6C6120, 0x6C6520, 0x6C6F72,
  286. 0x6C7569, 0x6E6520, 0x6E7472, 0x6F7220, 0x70656E, 0x726520, 0x726561, 0x727520, 0x73E320, 0x746520, 0x747275, 0x74E320, 0x756920, 0x756C20, 0xBA6920, 0xEE6E20,
  287. }
  288. func newRecognizer_8859_2(language string, ngram *[64]uint32) *recognizerSingleByte {
  289. return &recognizerSingleByte{
  290. charset: "ISO-8859-2",
  291. hasC1ByteCharset: "windows-1250",
  292. language: language,
  293. charMap: &charMap_8859_2,
  294. ngram: ngram,
  295. }
  296. }
  297. func newRecognizer_8859_2_cs() *recognizerSingleByte {
  298. return newRecognizer_8859_2("cs", &ngrams_8859_2_cs)
  299. }
  300. func newRecognizer_8859_2_hu() *recognizerSingleByte {
  301. return newRecognizer_8859_2("hu", &ngrams_8859_2_hu)
  302. }
  303. func newRecognizer_8859_2_pl() *recognizerSingleByte {
  304. return newRecognizer_8859_2("pl", &ngrams_8859_2_pl)
  305. }
  306. func newRecognizer_8859_2_ro() *recognizerSingleByte {
  307. return newRecognizer_8859_2("ro", &ngrams_8859_2_ro)
  308. }
  309. var charMap_8859_5 = [256]byte{
  310. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  311. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  312. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  313. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  314. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
  315. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  316. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  317. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  318. 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
  319. 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
  320. 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
  321. 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
  322. 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
  323. 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
  324. 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
  325. 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
  326. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  327. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  328. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  329. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  330. 0x20, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
  331. 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0x20, 0xFE, 0xFF,
  332. 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7,
  333. 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
  334. 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
  335. 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
  336. 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7,
  337. 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
  338. 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
  339. 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
  340. 0x20, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
  341. 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0x20, 0xFE, 0xFF,
  342. }
  343. var ngrams_8859_5_ru = [64]uint32{
  344. 0x20D220, 0x20D2DE, 0x20D4DE, 0x20D7D0, 0x20D820, 0x20DAD0, 0x20DADE, 0x20DDD0, 0x20DDD5, 0x20DED1, 0x20DFDE, 0x20DFE0, 0x20E0D0, 0x20E1DE, 0x20E1E2, 0x20E2DE,
  345. 0x20E7E2, 0x20EDE2, 0xD0DDD8, 0xD0E2EC, 0xD3DE20, 0xD5DBEC, 0xD5DDD8, 0xD5E1E2, 0xD5E220, 0xD820DF, 0xD8D520, 0xD8D820, 0xD8EF20, 0xDBD5DD, 0xDBD820, 0xDBECDD,
  346. 0xDDD020, 0xDDD520, 0xDDD8D5, 0xDDD8EF, 0xDDDE20, 0xDDDED2, 0xDE20D2, 0xDE20DF, 0xDE20E1, 0xDED220, 0xDED2D0, 0xDED3DE, 0xDED920, 0xDEDBEC, 0xDEDC20, 0xDEE1E2,
  347. 0xDFDEDB, 0xDFE0D5, 0xDFE0D8, 0xDFE0DE, 0xE0D0D2, 0xE0D5D4, 0xE1E2D0, 0xE1E2D2, 0xE1E2D8, 0xE1EF20, 0xE2D5DB, 0xE2DE20, 0xE2DEE0, 0xE2EC20, 0xE7E2DE, 0xEBE520,
  348. }
  349. func newRecognizer_8859_5(language string, ngram *[64]uint32) *recognizerSingleByte {
  350. return &recognizerSingleByte{
  351. charset: "ISO-8859-5",
  352. language: language,
  353. charMap: &charMap_8859_5,
  354. ngram: ngram,
  355. }
  356. }
  357. func newRecognizer_8859_5_ru() *recognizerSingleByte {
  358. return newRecognizer_8859_5("ru", &ngrams_8859_5_ru)
  359. }
  360. var charMap_8859_6 = [256]byte{
  361. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  362. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  363. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  364. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  365. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
  366. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  367. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  368. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  369. 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
  370. 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
  371. 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
  372. 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
  373. 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
  374. 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
  375. 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
  376. 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
  377. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  378. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  379. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  380. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  381. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  382. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  383. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  384. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  385. 0x20, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7,
  386. 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF,
  387. 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7,
  388. 0xD8, 0xD9, 0xDA, 0x20, 0x20, 0x20, 0x20, 0x20,
  389. 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
  390. 0xE8, 0xE9, 0xEA, 0x20, 0x20, 0x20, 0x20, 0x20,
  391. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  392. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  393. }
  394. var ngrams_8859_6_ar = [64]uint32{
  395. 0x20C7E4, 0x20C7E6, 0x20C8C7, 0x20D9E4, 0x20E1EA, 0x20E4E4, 0x20E5E6, 0x20E8C7, 0xC720C7, 0xC7C120, 0xC7CA20, 0xC7D120, 0xC7E420, 0xC7E4C3, 0xC7E4C7, 0xC7E4C8,
  396. 0xC7E4CA, 0xC7E4CC, 0xC7E4CD, 0xC7E4CF, 0xC7E4D3, 0xC7E4D9, 0xC7E4E2, 0xC7E4E5, 0xC7E4E8, 0xC7E4EA, 0xC7E520, 0xC7E620, 0xC7E6CA, 0xC820C7, 0xC920C7, 0xC920E1,
  397. 0xC920E4, 0xC920E5, 0xC920E8, 0xCA20C7, 0xCF20C7, 0xCFC920, 0xD120C7, 0xD1C920, 0xD320C7, 0xD920C7, 0xD9E4E9, 0xE1EA20, 0xE420C7, 0xE4C920, 0xE4E920, 0xE4EA20,
  398. 0xE520C7, 0xE5C720, 0xE5C920, 0xE5E620, 0xE620C7, 0xE720C7, 0xE7C720, 0xE8C7E4, 0xE8E620, 0xE920C7, 0xEA20C7, 0xEA20E5, 0xEA20E8, 0xEAC920, 0xEAD120, 0xEAE620,
  399. }
  400. func newRecognizer_8859_6(language string, ngram *[64]uint32) *recognizerSingleByte {
  401. return &recognizerSingleByte{
  402. charset: "ISO-8859-6",
  403. language: language,
  404. charMap: &charMap_8859_6,
  405. ngram: ngram,
  406. }
  407. }
  408. func newRecognizer_8859_6_ar() *recognizerSingleByte {
  409. return newRecognizer_8859_6("ar", &ngrams_8859_6_ar)
  410. }
  411. var charMap_8859_7 = [256]byte{
  412. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  413. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  414. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  415. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  416. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
  417. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  418. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  419. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  420. 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
  421. 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
  422. 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
  423. 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
  424. 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
  425. 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
  426. 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
  427. 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
  428. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  429. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  430. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  431. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  432. 0x20, 0xA1, 0xA2, 0x20, 0x20, 0x20, 0x20, 0x20,
  433. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  434. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0xDC, 0x20,
  435. 0xDD, 0xDE, 0xDF, 0x20, 0xFC, 0x20, 0xFD, 0xFE,
  436. 0xC0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
  437. 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
  438. 0xF0, 0xF1, 0x20, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
  439. 0xF8, 0xF9, 0xFA, 0xFB, 0xDC, 0xDD, 0xDE, 0xDF,
  440. 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
  441. 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
  442. 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
  443. 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0x20,
  444. }
  445. var ngrams_8859_7_el = [64]uint32{
  446. 0x20E1ED, 0x20E1F0, 0x20E3E9, 0x20E4E9, 0x20E5F0, 0x20E720, 0x20EAE1, 0x20ECE5, 0x20EDE1, 0x20EF20, 0x20F0E1, 0x20F0EF, 0x20F0F1, 0x20F3F4, 0x20F3F5, 0x20F4E7,
  447. 0x20F4EF, 0xDFE120, 0xE120E1, 0xE120F4, 0xE1E920, 0xE1ED20, 0xE1F0FC, 0xE1F220, 0xE3E9E1, 0xE5E920, 0xE5F220, 0xE720F4, 0xE7ED20, 0xE7F220, 0xE920F4, 0xE9E120,
  448. 0xE9EADE, 0xE9F220, 0xEAE1E9, 0xEAE1F4, 0xECE520, 0xED20E1, 0xED20E5, 0xED20F0, 0xEDE120, 0xEFF220, 0xEFF520, 0xF0EFF5, 0xF0F1EF, 0xF0FC20, 0xF220E1, 0xF220E5,
  449. 0xF220EA, 0xF220F0, 0xF220F4, 0xF3E520, 0xF3E720, 0xF3F4EF, 0xF4E120, 0xF4E1E9, 0xF4E7ED, 0xF4E7F2, 0xF4E9EA, 0xF4EF20, 0xF4EFF5, 0xF4F9ED, 0xF9ED20, 0xFEED20,
  450. }
  451. func newRecognizer_8859_7(language string, ngram *[64]uint32) *recognizerSingleByte {
  452. return &recognizerSingleByte{
  453. charset: "ISO-8859-7",
  454. hasC1ByteCharset: "windows-1253",
  455. language: language,
  456. charMap: &charMap_8859_7,
  457. ngram: ngram,
  458. }
  459. }
  460. func newRecognizer_8859_7_el() *recognizerSingleByte {
  461. return newRecognizer_8859_7("el", &ngrams_8859_7_el)
  462. }
  463. var charMap_8859_8 = [256]byte{
  464. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  465. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  466. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  467. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  468. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
  469. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  470. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  471. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  472. 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
  473. 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
  474. 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
  475. 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
  476. 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
  477. 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
  478. 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
  479. 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
  480. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  481. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  482. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  483. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  484. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  485. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  486. 0x20, 0x20, 0x20, 0x20, 0x20, 0xB5, 0x20, 0x20,
  487. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  488. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  489. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  490. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  491. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  492. 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
  493. 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
  494. 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
  495. 0xF8, 0xF9, 0xFA, 0x20, 0x20, 0x20, 0x20, 0x20,
  496. }
  497. var ngrams_8859_8_I_he = [64]uint32{
  498. 0x20E0E5, 0x20E0E7, 0x20E0E9, 0x20E0FA, 0x20E1E9, 0x20E1EE, 0x20E4E0, 0x20E4E5, 0x20E4E9, 0x20E4EE, 0x20E4F2, 0x20E4F9, 0x20E4FA, 0x20ECE0, 0x20ECE4, 0x20EEE0,
  499. 0x20F2EC, 0x20F9EC, 0xE0FA20, 0xE420E0, 0xE420E1, 0xE420E4, 0xE420EC, 0xE420EE, 0xE420F9, 0xE4E5E0, 0xE5E020, 0xE5ED20, 0xE5EF20, 0xE5F820, 0xE5FA20, 0xE920E4,
  500. 0xE9E420, 0xE9E5FA, 0xE9E9ED, 0xE9ED20, 0xE9EF20, 0xE9F820, 0xE9FA20, 0xEC20E0, 0xEC20E4, 0xECE020, 0xECE420, 0xED20E0, 0xED20E1, 0xED20E4, 0xED20EC, 0xED20EE,
  501. 0xED20F9, 0xEEE420, 0xEF20E4, 0xF0E420, 0xF0E920, 0xF0E9ED, 0xF2EC20, 0xF820E4, 0xF8E9ED, 0xF9EC20, 0xFA20E0, 0xFA20E1, 0xFA20E4, 0xFA20EC, 0xFA20EE, 0xFA20F9,
  502. }
  503. var ngrams_8859_8_he = [64]uint32{
  504. 0x20E0E5, 0x20E0EC, 0x20E4E9, 0x20E4EC, 0x20E4EE, 0x20E4F0, 0x20E9F0, 0x20ECF2, 0x20ECF9, 0x20EDE5, 0x20EDE9, 0x20EFE5, 0x20EFE9, 0x20F8E5, 0x20F8E9, 0x20FAE0,
  505. 0x20FAE5, 0x20FAE9, 0xE020E4, 0xE020EC, 0xE020ED, 0xE020FA, 0xE0E420, 0xE0E5E4, 0xE0EC20, 0xE0EE20, 0xE120E4, 0xE120ED, 0xE120FA, 0xE420E4, 0xE420E9, 0xE420EC,
  506. 0xE420ED, 0xE420EF, 0xE420F8, 0xE420FA, 0xE4EC20, 0xE5E020, 0xE5E420, 0xE7E020, 0xE9E020, 0xE9E120, 0xE9E420, 0xEC20E4, 0xEC20ED, 0xEC20FA, 0xECF220, 0xECF920,
  507. 0xEDE9E9, 0xEDE9F0, 0xEDE9F8, 0xEE20E4, 0xEE20ED, 0xEE20FA, 0xEEE120, 0xEEE420, 0xF2E420, 0xF920E4, 0xF920ED, 0xF920FA, 0xF9E420, 0xFAE020, 0xFAE420, 0xFAE5E9,
  508. }
  509. func newRecognizer_8859_8(language string, ngram *[64]uint32) *recognizerSingleByte {
  510. return &recognizerSingleByte{
  511. charset: "ISO-8859-8",
  512. hasC1ByteCharset: "windows-1255",
  513. language: language,
  514. charMap: &charMap_8859_8,
  515. ngram: ngram,
  516. }
  517. }
  518. func newRecognizer_8859_8_I_he() *recognizerSingleByte {
  519. r := newRecognizer_8859_8("he", &ngrams_8859_8_I_he)
  520. r.charset = "ISO-8859-8-I"
  521. return r
  522. }
  523. func newRecognizer_8859_8_he() *recognizerSingleByte {
  524. return newRecognizer_8859_8("he", &ngrams_8859_8_he)
  525. }
  526. var charMap_8859_9 = [256]byte{
  527. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  528. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  529. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  530. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  531. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
  532. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  533. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  534. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  535. 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
  536. 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
  537. 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
  538. 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
  539. 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
  540. 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
  541. 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
  542. 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
  543. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  544. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  545. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  546. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  547. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  548. 0x20, 0x20, 0xAA, 0x20, 0x20, 0x20, 0x20, 0x20,
  549. 0x20, 0x20, 0x20, 0x20, 0x20, 0xB5, 0x20, 0x20,
  550. 0x20, 0x20, 0xBA, 0x20, 0x20, 0x20, 0x20, 0x20,
  551. 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
  552. 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
  553. 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20,
  554. 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0x69, 0xFE, 0xDF,
  555. 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
  556. 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
  557. 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20,
  558. 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF,
  559. }
  560. var ngrams_8859_9_tr = [64]uint32{
  561. 0x206261, 0x206269, 0x206275, 0x206461, 0x206465, 0x206765, 0x206861, 0x20696C, 0x206B61, 0x206B6F, 0x206D61, 0x206F6C, 0x207361, 0x207461, 0x207665, 0x207961,
  562. 0x612062, 0x616B20, 0x616C61, 0x616D61, 0x616E20, 0x616EFD, 0x617220, 0x617261, 0x6172FD, 0x6173FD, 0x617961, 0x626972, 0x646120, 0x646520, 0x646920, 0x652062,
  563. 0x65206B, 0x656469, 0x656E20, 0x657220, 0x657269, 0x657369, 0x696C65, 0x696E20, 0x696E69, 0x697220, 0x6C616E, 0x6C6172, 0x6C6520, 0x6C6572, 0x6E2061, 0x6E2062,
  564. 0x6E206B, 0x6E6461, 0x6E6465, 0x6E6520, 0x6E6920, 0x6E696E, 0x6EFD20, 0x72696E, 0x72FD6E, 0x766520, 0x796120, 0x796F72, 0xFD6E20, 0xFD6E64, 0xFD6EFD, 0xFDF0FD,
  565. }
  566. func newRecognizer_8859_9(language string, ngram *[64]uint32) *recognizerSingleByte {
  567. return &recognizerSingleByte{
  568. charset: "ISO-8859-9",
  569. hasC1ByteCharset: "windows-1254",
  570. language: language,
  571. charMap: &charMap_8859_9,
  572. ngram: ngram,
  573. }
  574. }
  575. func newRecognizer_8859_9_tr() *recognizerSingleByte {
  576. return newRecognizer_8859_9("tr", &ngrams_8859_9_tr)
  577. }
  578. var charMap_windows_1256 = [256]byte{
  579. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  580. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  581. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  582. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  583. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
  584. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  585. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  586. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  587. 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
  588. 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
  589. 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
  590. 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
  591. 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
  592. 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
  593. 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
  594. 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
  595. 0x20, 0x81, 0x20, 0x83, 0x20, 0x20, 0x20, 0x20,
  596. 0x88, 0x20, 0x8A, 0x20, 0x9C, 0x8D, 0x8E, 0x8F,
  597. 0x90, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  598. 0x98, 0x20, 0x9A, 0x20, 0x9C, 0x20, 0x20, 0x9F,
  599. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  600. 0x20, 0x20, 0xAA, 0x20, 0x20, 0x20, 0x20, 0x20,
  601. 0x20, 0x20, 0x20, 0x20, 0x20, 0xB5, 0x20, 0x20,
  602. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  603. 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7,
  604. 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF,
  605. 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0x20,
  606. 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
  607. 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
  608. 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
  609. 0x20, 0x20, 0x20, 0x20, 0xF4, 0x20, 0x20, 0x20,
  610. 0x20, 0xF9, 0x20, 0xFB, 0xFC, 0x20, 0x20, 0xFF,
  611. }
  612. var ngrams_windows_1256 = [64]uint32{
  613. 0x20C7E1, 0x20C7E4, 0x20C8C7, 0x20DAE1, 0x20DDED, 0x20E1E1, 0x20E3E4, 0x20E6C7, 0xC720C7, 0xC7C120, 0xC7CA20, 0xC7D120, 0xC7E120, 0xC7E1C3, 0xC7E1C7, 0xC7E1C8,
  614. 0xC7E1CA, 0xC7E1CC, 0xC7E1CD, 0xC7E1CF, 0xC7E1D3, 0xC7E1DA, 0xC7E1DE, 0xC7E1E3, 0xC7E1E6, 0xC7E1ED, 0xC7E320, 0xC7E420, 0xC7E4CA, 0xC820C7, 0xC920C7, 0xC920DD,
  615. 0xC920E1, 0xC920E3, 0xC920E6, 0xCA20C7, 0xCF20C7, 0xCFC920, 0xD120C7, 0xD1C920, 0xD320C7, 0xDA20C7, 0xDAE1EC, 0xDDED20, 0xE120C7, 0xE1C920, 0xE1EC20, 0xE1ED20,
  616. 0xE320C7, 0xE3C720, 0xE3C920, 0xE3E420, 0xE420C7, 0xE520C7, 0xE5C720, 0xE6C7E1, 0xE6E420, 0xEC20C7, 0xED20C7, 0xED20E3, 0xED20E6, 0xEDC920, 0xEDD120, 0xEDE420,
  617. }
  618. func newRecognizer_windows_1256() *recognizerSingleByte {
  619. return &recognizerSingleByte{
  620. charset: "windows-1256",
  621. language: "ar",
  622. charMap: &charMap_windows_1256,
  623. ngram: &ngrams_windows_1256,
  624. }
  625. }
  626. var charMap_windows_1251 = [256]byte{
  627. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  628. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  629. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  630. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  631. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
  632. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  633. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  634. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  635. 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
  636. 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
  637. 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
  638. 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
  639. 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
  640. 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
  641. 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
  642. 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
  643. 0x90, 0x83, 0x20, 0x83, 0x20, 0x20, 0x20, 0x20,
  644. 0x20, 0x20, 0x9A, 0x20, 0x9C, 0x9D, 0x9E, 0x9F,
  645. 0x90, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  646. 0x20, 0x20, 0x9A, 0x20, 0x9C, 0x9D, 0x9E, 0x9F,
  647. 0x20, 0xA2, 0xA2, 0xBC, 0x20, 0xB4, 0x20, 0x20,
  648. 0xB8, 0x20, 0xBA, 0x20, 0x20, 0x20, 0x20, 0xBF,
  649. 0x20, 0x20, 0xB3, 0xB3, 0xB4, 0xB5, 0x20, 0x20,
  650. 0xB8, 0x20, 0xBA, 0x20, 0xBC, 0xBE, 0xBE, 0xBF,
  651. 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
  652. 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
  653. 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
  654. 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF,
  655. 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
  656. 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
  657. 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
  658. 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF,
  659. }
  660. var ngrams_windows_1251 = [64]uint32{
  661. 0x20E220, 0x20E2EE, 0x20E4EE, 0x20E7E0, 0x20E820, 0x20EAE0, 0x20EAEE, 0x20EDE0, 0x20EDE5, 0x20EEE1, 0x20EFEE, 0x20EFF0, 0x20F0E0, 0x20F1EE, 0x20F1F2, 0x20F2EE,
  662. 0x20F7F2, 0x20FDF2, 0xE0EDE8, 0xE0F2FC, 0xE3EE20, 0xE5EBFC, 0xE5EDE8, 0xE5F1F2, 0xE5F220, 0xE820EF, 0xE8E520, 0xE8E820, 0xE8FF20, 0xEBE5ED, 0xEBE820, 0xEBFCED,
  663. 0xEDE020, 0xEDE520, 0xEDE8E5, 0xEDE8FF, 0xEDEE20, 0xEDEEE2, 0xEE20E2, 0xEE20EF, 0xEE20F1, 0xEEE220, 0xEEE2E0, 0xEEE3EE, 0xEEE920, 0xEEEBFC, 0xEEEC20, 0xEEF1F2,
  664. 0xEFEEEB, 0xEFF0E5, 0xEFF0E8, 0xEFF0EE, 0xF0E0E2, 0xF0E5E4, 0xF1F2E0, 0xF1F2E2, 0xF1F2E8, 0xF1FF20, 0xF2E5EB, 0xF2EE20, 0xF2EEF0, 0xF2FC20, 0xF7F2EE, 0xFBF520,
  665. }
  666. func newRecognizer_windows_1251() *recognizerSingleByte {
  667. return &recognizerSingleByte{
  668. charset: "windows-1251",
  669. language: "ar",
  670. charMap: &charMap_windows_1251,
  671. ngram: &ngrams_windows_1251,
  672. }
  673. }
  674. var charMap_KOI8_R = [256]byte{
  675. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  676. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  677. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  678. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  679. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
  680. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  681. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  682. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  683. 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
  684. 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
  685. 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
  686. 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
  687. 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
  688. 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
  689. 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
  690. 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
  691. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  692. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  693. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  694. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  695. 0x20, 0x20, 0x20, 0xA3, 0x20, 0x20, 0x20, 0x20,
  696. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  697. 0x20, 0x20, 0x20, 0xA3, 0x20, 0x20, 0x20, 0x20,
  698. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  699. 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7,
  700. 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF,
  701. 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7,
  702. 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
  703. 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7,
  704. 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF,
  705. 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7,
  706. 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
  707. }
  708. var ngrams_KOI8_R = [64]uint32{
  709. 0x20C4CF, 0x20C920, 0x20CBC1, 0x20CBCF, 0x20CEC1, 0x20CEC5, 0x20CFC2, 0x20D0CF, 0x20D0D2, 0x20D2C1, 0x20D3CF, 0x20D3D4, 0x20D4CF, 0x20D720, 0x20D7CF, 0x20DAC1,
  710. 0x20DCD4, 0x20DED4, 0xC1CEC9, 0xC1D4D8, 0xC5CCD8, 0xC5CEC9, 0xC5D3D4, 0xC5D420, 0xC7CF20, 0xC920D0, 0xC9C520, 0xC9C920, 0xC9D120, 0xCCC5CE, 0xCCC920, 0xCCD8CE,
  711. 0xCEC120, 0xCEC520, 0xCEC9C5, 0xCEC9D1, 0xCECF20, 0xCECFD7, 0xCF20D0, 0xCF20D3, 0xCF20D7, 0xCFC7CF, 0xCFCA20, 0xCFCCD8, 0xCFCD20, 0xCFD3D4, 0xCFD720, 0xCFD7C1,
  712. 0xD0CFCC, 0xD0D2C5, 0xD0D2C9, 0xD0D2CF, 0xD2C1D7, 0xD2C5C4, 0xD3D120, 0xD3D4C1, 0xD3D4C9, 0xD3D4D7, 0xD4C5CC, 0xD4CF20, 0xD4CFD2, 0xD4D820, 0xD9C820, 0xDED4CF,
  713. }
  714. func newRecognizer_KOI8_R() *recognizerSingleByte {
  715. return &recognizerSingleByte{
  716. charset: "KOI8-R",
  717. language: "ru",
  718. charMap: &charMap_KOI8_R,
  719. ngram: &ngrams_KOI8_R,
  720. }
  721. }
  722. var charMap_IBM424_he = [256]byte{
  723. /* -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 -A -B -C -D -E -F */
  724. /* 0- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
  725. /* 1- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
  726. /* 2- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
  727. /* 3- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
  728. /* 4- */ 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
  729. /* 5- */ 0x40, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
  730. /* 6- */ 0x40, 0x40, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
  731. /* 7- */ 0x40, 0x71, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x00, 0x40, 0x40,
  732. /* 8- */ 0x40, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
  733. /* 9- */ 0x40, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
  734. /* A- */ 0xA0, 0x40, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
  735. /* B- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
  736. /* C- */ 0x40, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
  737. /* D- */ 0x40, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
  738. /* E- */ 0x40, 0x40, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
  739. /* F- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
  740. }
  741. var ngrams_IBM424_he_rtl = [64]uint32{
  742. 0x404146, 0x404148, 0x404151, 0x404171, 0x404251, 0x404256, 0x404541, 0x404546, 0x404551, 0x404556, 0x404562, 0x404569, 0x404571, 0x405441, 0x405445, 0x405641,
  743. 0x406254, 0x406954, 0x417140, 0x454041, 0x454042, 0x454045, 0x454054, 0x454056, 0x454069, 0x454641, 0x464140, 0x465540, 0x465740, 0x466840, 0x467140, 0x514045,
  744. 0x514540, 0x514671, 0x515155, 0x515540, 0x515740, 0x516840, 0x517140, 0x544041, 0x544045, 0x544140, 0x544540, 0x554041, 0x554042, 0x554045, 0x554054, 0x554056,
  745. 0x554069, 0x564540, 0x574045, 0x584540, 0x585140, 0x585155, 0x625440, 0x684045, 0x685155, 0x695440, 0x714041, 0x714042, 0x714045, 0x714054, 0x714056, 0x714069,
  746. }
  747. var ngrams_IBM424_he_ltr = [64]uint32{
  748. 0x404146, 0x404154, 0x404551, 0x404554, 0x404556, 0x404558, 0x405158, 0x405462, 0x405469, 0x405546, 0x405551, 0x405746, 0x405751, 0x406846, 0x406851, 0x407141,
  749. 0x407146, 0x407151, 0x414045, 0x414054, 0x414055, 0x414071, 0x414540, 0x414645, 0x415440, 0x415640, 0x424045, 0x424055, 0x424071, 0x454045, 0x454051, 0x454054,
  750. 0x454055, 0x454057, 0x454068, 0x454071, 0x455440, 0x464140, 0x464540, 0x484140, 0x514140, 0x514240, 0x514540, 0x544045, 0x544055, 0x544071, 0x546240, 0x546940,
  751. 0x555151, 0x555158, 0x555168, 0x564045, 0x564055, 0x564071, 0x564240, 0x564540, 0x624540, 0x694045, 0x694055, 0x694071, 0x694540, 0x714140, 0x714540, 0x714651,
  752. }
  753. func newRecognizer_IBM424_he(charset string, ngram *[64]uint32) *recognizerSingleByte {
  754. return &recognizerSingleByte{
  755. charset: charset,
  756. language: "he",
  757. charMap: &charMap_IBM424_he,
  758. ngram: ngram,
  759. }
  760. }
  761. func newRecognizer_IBM424_he_rtl() *recognizerSingleByte {
  762. return newRecognizer_IBM424_he("IBM424_rtl", &ngrams_IBM424_he_rtl)
  763. }
  764. func newRecognizer_IBM424_he_ltr() *recognizerSingleByte {
  765. return newRecognizer_IBM424_he("IBM424_ltr", &ngrams_IBM424_he_ltr)
  766. }
  767. var charMap_IBM420_ar = [256]byte{
  768. /* -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 -A -B -C -D -E -F */
  769. /* 0- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
  770. /* 1- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
  771. /* 2- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
  772. /* 3- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
  773. /* 4- */ 0x40, 0x40, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
  774. /* 5- */ 0x40, 0x51, 0x52, 0x40, 0x40, 0x55, 0x56, 0x57, 0x58, 0x59, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
  775. /* 6- */ 0x40, 0x40, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
  776. /* 7- */ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
  777. /* 8- */ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F,
  778. /* 9- */ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F,
  779. /* A- */ 0xA0, 0x40, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF,
  780. /* B- */ 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0x40, 0x40, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF,
  781. /* C- */ 0x40, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x40, 0xCB, 0x40, 0xCD, 0x40, 0xCF,
  782. /* D- */ 0x40, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
  783. /* E- */ 0x40, 0x40, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xEA, 0xEB, 0x40, 0xED, 0xEE, 0xEF,
  784. /* F- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0xFB, 0xFC, 0xFD, 0xFE, 0x40,
  785. }
  786. var ngrams_IBM420_ar_rtl = [64]uint32{
  787. 0x4056B1, 0x4056BD, 0x405856, 0x409AB1, 0x40ABDC, 0x40B1B1, 0x40BBBD, 0x40CF56, 0x564056, 0x564640, 0x566340, 0x567540, 0x56B140, 0x56B149, 0x56B156, 0x56B158,
  788. 0x56B163, 0x56B167, 0x56B169, 0x56B173, 0x56B178, 0x56B19A, 0x56B1AD, 0x56B1BB, 0x56B1CF, 0x56B1DC, 0x56BB40, 0x56BD40, 0x56BD63, 0x584056, 0x624056, 0x6240AB,
  789. 0x6240B1, 0x6240BB, 0x6240CF, 0x634056, 0x734056, 0x736240, 0x754056, 0x756240, 0x784056, 0x9A4056, 0x9AB1DA, 0xABDC40, 0xB14056, 0xB16240, 0xB1DA40, 0xB1DC40,
  790. 0xBB4056, 0xBB5640, 0xBB6240, 0xBBBD40, 0xBD4056, 0xBF4056, 0xBF5640, 0xCF56B1, 0xCFBD40, 0xDA4056, 0xDC4056, 0xDC40BB, 0xDC40CF, 0xDC6240, 0xDC7540, 0xDCBD40,
  791. }
  792. var ngrams_IBM420_ar_ltr = [64]uint32{
  793. 0x404656, 0x4056BB, 0x4056BF, 0x406273, 0x406275, 0x4062B1, 0x4062BB, 0x4062DC, 0x406356, 0x407556, 0x4075DC, 0x40B156, 0x40BB56, 0x40BD56, 0x40BDBB, 0x40BDCF,
  794. 0x40BDDC, 0x40DAB1, 0x40DCAB, 0x40DCB1, 0x49B156, 0x564056, 0x564058, 0x564062, 0x564063, 0x564073, 0x564075, 0x564078, 0x56409A, 0x5640B1, 0x5640BB, 0x5640BD,
  795. 0x5640BF, 0x5640DA, 0x5640DC, 0x565840, 0x56B156, 0x56CF40, 0x58B156, 0x63B156, 0x63BD56, 0x67B156, 0x69B156, 0x73B156, 0x78B156, 0x9AB156, 0xAB4062, 0xADB156,
  796. 0xB14062, 0xB15640, 0xB156CF, 0xB19A40, 0xB1B140, 0xBB4062, 0xBB40DC, 0xBBB156, 0xBD5640, 0xBDBB40, 0xCF4062, 0xCF40DC, 0xCFB156, 0xDAB19A, 0xDCAB40, 0xDCB156,
  797. }
  798. func newRecognizer_IBM420_ar(charset string, ngram *[64]uint32) *recognizerSingleByte {
  799. return &recognizerSingleByte{
  800. charset: charset,
  801. language: "ar",
  802. charMap: &charMap_IBM420_ar,
  803. ngram: ngram,
  804. }
  805. }
  806. func newRecognizer_IBM420_ar_rtl() *recognizerSingleByte {
  807. return newRecognizer_IBM420_ar("IBM420_rtl", &ngrams_IBM420_ar_rtl)
  808. }
  809. func newRecognizer_IBM420_ar_ltr() *recognizerSingleByte {
  810. return newRecognizer_IBM420_ar("IBM420_ltr", &ngrams_IBM420_ar_ltr)
  811. }