2525class Decoder {
2626 private static final Logger LOG = LoggerFactory .getLogger (Decoder .class );
2727
28+ private static final char GREEK_CAPITAL_ALPHA = 'Α' ;
29+
2830 private Decoder () {
2931 // Prevent instantiation.
3032 }
@@ -216,33 +218,33 @@ public Unicode2Ascii(final int min, final int max, @Nullable final String conver
216218 };
217219
218220 private final static Unicode2Ascii [] UNICODE2ASCII = {
219- new Unicode2Ascii (0x0041 , 0x005a , "ABCDEFGHIJKLMNOPQRSTUVWXYZ" ), // Roman
220- new Unicode2Ascii (0x0391 , 0x03a9 , "ABGDFZHQIKLMNCOJP?STYVXRW" ), // Greek
221- new Unicode2Ascii (0x0410 , 0x042f , "AZBGDEFNI?KLMHOJPCTYQXSVW????U?R" ), // Cyrillic
222- new Unicode2Ascii (0x05d0 , 0x05ea , "ABCDFIGHJKLMNPQ?ROSETUVWXYZ" ), // Hebrew
223- new Unicode2Ascii (0x0905 , 0x0939 , "A?????????E?????B?CD?F?G??HJZ?KL?MNP?QU?RS?T?V??W??XY" ), // Hindi
224- new Unicode2Ascii (0x0d07 , 0x0d39 , "I?U?E??????A??BCD??F?G??HOJ??KLMNP?????Q?RST?VWX?YZ" ), // Malai
225- new Unicode2Ascii (0x10a0 , 0x10bf , "AB?CE?D?UF?GHOJ?KLMINPQRSTVW?XYZ" ), // Georgisch
221+ new Unicode2Ascii (0x0041 , 0x005a , "ABCDEFGHIJKLMNOPQRSTUVWXYZ" ), // Roman
222+ new Unicode2Ascii (0x0391 , 0x03a9 , "ABGDFZHQIKLMNCOJP?STYVXRW" ), // Greek
223+ new Unicode2Ascii (0x0410 , 0x042f , "AZBGDEFNI?KLMHOJPCTYQXSVW????U?R" ), // Cyrillic
224+ new Unicode2Ascii (0x05d0 , 0x05ea , "ABCDFIGHJKLMNPQ?ROSETUVWXYZ" ), // Hebrew
225+ new Unicode2Ascii (0x0905 , 0x0939 , "A?????????E?????B?CD?F?G??HJZ?KL?MNP?QU?RS?T?V??W??XY" ), // Hindi
226+ new Unicode2Ascii (0x0d07 , 0x0d39 , "I?U?E??????A??BCD??F?G??HOJ??KLMNP?????Q?RST?VWX?YZ" ), // Malai
227+ new Unicode2Ascii (0x10a0 , 0x10bf , "AB?CE?D?UF?GHOJ?KLMINPQRSTVW?XYZ" ), // Georgian
226228 new Unicode2Ascii (0x30a2 , 0x30f2 , "A?I?O?U?EB?C?D?F?G?H???J???????K??????L?M?N?????P??Q??R??S?????TV?????WX???Y????Z" ), // Katakana
227- new Unicode2Ascii (0x0e01 , 0x0e32 , "BC?D??FGHJ??O???K??L?MNP?Q?R????S?T?V?W????UXYZAIE" ), // Thai
228- new Unicode2Ascii (0x0e81 , 0x0ec6 , "BC?D??FG?H??J??????K??L?MN?P?Q??RST???V??WX?Y?ZA????????????U?????EI?O" ), // Lao
229- new Unicode2Ascii (0x0532 , 0x0556 , "BCDE??FGHI?J?KLM?N?U?PQ?R??STVWXYZ?OA" ), // Armenian
230- new Unicode2Ascii (0x0985 , 0x09b9 , "A??????B??E???U?CDF?GH??J??KLMNPQR?S?T?VW?X??Y??????Z" ), // Bengali
231- new Unicode2Ascii (0x0a05 , 0x0a39 , "A?????????E?????B?CD?F?G??HJZ?KL?MNP?QU?RS?T?V??W??XY" ), // Gurmukhi
232- new Unicode2Ascii (0x0f40 , 0x0f66 , "BCD?FGHJ??K?L?MN?P?QR?S?A?????TV?WXYEUZ" ), // Tibetan
233-
234- new Unicode2Ascii (0x0966 , 0x096f , "" ), // Hindi
235- new Unicode2Ascii (0x0d66 , 0x0d6f , "" ), // Malai
236- new Unicode2Ascii (0x0e50 , 0x0e59 , "" ), // Thai
237- new Unicode2Ascii (0x09e6 , 0x09ef , "" ), // Bengali
238- new Unicode2Ascii (0x0a66 , 0x0a6f , "" ), // Gurmukhi
239- new Unicode2Ascii (0x0f20 , 0x0f29 , "" ), // Tibetan
240-
241- // lowercase variants: greek, georgisch
242- new Unicode2Ascii (0x03B1 , 0x03c9 , "ABGDFZHQIKLMNCOJP?STYVXRW" ), // Greek
229+ new Unicode2Ascii (0x0e01 , 0x0e32 , "BC?D??FGHJ??O???K??L?MNP?Q?R????S?T?V?W????UXYZAIE" ), // Thai
230+ new Unicode2Ascii (0x0e81 , 0x0ec6 , "BC?D??FG?H??J??????K??L?MN?P?Q??RST???V??WX?Y?ZA????????????U?????EI?O" ), // Lao
231+ new Unicode2Ascii (0x0532 , 0x0556 , "BCDE??FGHI?J?KLM?N?U?PQ?R??STVWXYZ?OA" ), // Armenian
232+ new Unicode2Ascii (0x0985 , 0x09b9 , "A??????B??E???U?CDF?GH??J??KLMNPQR?S?T?VW?X??Y??????Z" ), // Bengali
233+ new Unicode2Ascii (0x0a05 , 0x0a39 , "A?????????E?????B?CD?F?G??HJZ?KL?MNP?QU?RS?T?V??W??XY" ), // Gurmukhi
234+ new Unicode2Ascii (0x0f40 , 0x0f66 , "BCD?FGHJ??K?L?MN?P?QR?S?A?????TV?WXYEUZ" ), // Tibetan
235+
236+ new Unicode2Ascii (0x0966 , 0x096f , "" ), // Hindi
237+ new Unicode2Ascii (0x0d66 , 0x0d6f , "" ), // Malai
238+ new Unicode2Ascii (0x0e50 , 0x0e59 , "" ), // Thai
239+ new Unicode2Ascii (0x09e6 , 0x09ef , "" ), // Bengali
240+ new Unicode2Ascii (0x0a66 , 0x0a6f , "" ), // Gurmukhi
241+ new Unicode2Ascii (0x0f20 , 0x0f29 , "" ), // Tibetan
242+
243+ // lowercase variants: greek, georgian
244+ new Unicode2Ascii (0x03B1 , 0x03c9 , "ABGDFZHQIKLMNCOJP?STYVXRW" ), // Greek
243245 // lowercase
244- new Unicode2Ascii (0x10d0 , 0x10ef , "AB?CE?D?UF?GHOJ?KLMINPQRSTVW?XYZ" ), // Georgisch lowercase
245- new Unicode2Ascii (0x0562 , 0x0586 , "BCDE??FGHI?J?KLM?N?U?PQ?R??STVWXYZ?OA" ), // Armenian
246+ new Unicode2Ascii (0x10d0 , 0x10ef , "AB?CE?D?UF?GHOJ?KLMINPQRSTVW?XYZ" ), // Georgisch lowercase
247+ new Unicode2Ascii (0x0562 , 0x0586 , "BCDE??FGHI?J?KLM?N?U?PQ?R??STVWXYZ?OA" ), // Armenian
246248 // lowercase
247249 new Unicode2Ascii (0 , 0 , null )
248250 };
@@ -508,9 +510,7 @@ private static Point decodeStarpipe(final String input, final int firstindex, fi
508510 * }
509511 */
510512
511- final Point retval = add2res (cornery , cornerx , dividerx << 2 , dividery , -1 , extrapostfix );
512-
513- return retval ;
513+ return add2res (cornery , cornerx , dividerx << 2 , dividery , -1 , extrapostfix );
514514 }
515515 storageStart += product ;
516516 i ++;
@@ -602,39 +602,47 @@ private static String aeuUnpack(final String argStr) {
602602 /**
603603 * This method decodes a Unicode string to ASCII. Package private for access by other modules.
604604 *
605- * @param str Unicode string.
605+ * @param mapcode Unicode string.
606606 * @return ASCII string.
607607 */
608- static String decodeUTF16 (final String str ) {
609- final StringBuilder asciibuf = new StringBuilder ();
610- for (int index = 0 ; index < str .length (); index ++) {
611- if (str .charAt (index ) == '.' ) {
612- asciibuf .append (str .charAt (index ));
613- } else if ((str .charAt (index ) >= 1 ) && (str .charAt (index ) <= 'z' )) {
608+ static String decodeUTF16 (final String mapcode ) {
609+ String result ;
610+ final StringBuilder asciiBuf = new StringBuilder ();
611+ for (int index = 0 ; index < mapcode .length (); index ++) {
612+ final char ch = mapcode .charAt (index );
613+ if (ch == '.' ) {
614+ asciiBuf .append (ch );
615+ } else if ((ch >= 1 ) && (ch <= 'z' )) {
614616 // normal ascii
615- asciibuf .append (str . charAt ( index ) );
617+ asciiBuf .append (ch );
616618 } else {
617619 boolean found = false ;
618620 for (int i = 0 ; UNICODE2ASCII [i ].min != 0 ; i ++) {
619- if ((str .charAt (index ) >= UNICODE2ASCII [i ].min )
620- && (str .charAt (index ) <= UNICODE2ASCII [i ].max )) {
621- String convert = UNICODE2ASCII [i ].convert ;
622- if (convert == null ) {
623- convert = "0123456789" ;
624- }
625- asciibuf .append (convert .charAt (((int ) str .charAt (index )) - UNICODE2ASCII [i ].min ));
621+ if ((ch >= UNICODE2ASCII [i ].min ) && (ch <= UNICODE2ASCII [i ].max )) {
622+ final String convert = (UNICODE2ASCII [i ].convert != null ) ? UNICODE2ASCII [i ].convert : "0123456789" ;
623+ final int pos = ((int ) ch ) - UNICODE2ASCII [i ].min ;
624+ asciiBuf .append (convert .charAt (pos ));
626625 found = true ;
627626 break ;
628627 }
629628 }
630629 if (!found ) {
631- asciibuf .append ('?' );
630+ asciiBuf .append ('?' );
632631 break ;
633632 }
634633 }
635634 }
635+ result = asciiBuf .toString ();
636636
637- return asciibuf .toString ();
637+ // Repack if this was a Greek 'alpha' code. This will have been converted to a regular 'A' after one iteration.
638+ if (mapcode .startsWith (String .valueOf (GREEK_CAPITAL_ALPHA ))) {
639+ final String unpacked = aeuUnpack (result );
640+ if (unpacked .isEmpty ()) {
641+ throw new AssertionError ("decodeUTF16: cannot decode " + mapcode );
642+ }
643+ result = Encoder .aeuPack (unpacked , false );
644+ }
645+ return result ;
638646 }
639647
640648 static String encodeUTF16 (final String string , final int alphabet ) {
@@ -657,7 +665,9 @@ static String encodeUTF16(final String string, final int alphabet) {
657665
658666 static String encodeToAlphabetCode (final String mapcode , int alphabetCode ) {
659667 if (ASCII2LANGUAGE [alphabetCode ][4 ] == 0x003f ) {
660- if (mapcode .matches ("^.*[EUeu].*" )) {
668+
669+ // Alphabet does not contain 'E'.
670+ if (mapcode .matches ("^.*[EU].*" )) {
661671 final String unpacked = aeuUnpack (mapcode );
662672 if (unpacked .isEmpty ()) {
663673 throw new AssertionError ("encodeToAlphabetCode: cannot encode '" + mapcode +
0 commit comments