IBM-864 Arabic support

Arabic support
qzind · Aug 6, 2018 · 8f2e617 · 8f2e617
1 parent 56be1bd
commit 8f2e617
Show file tree

Hide file tree

Showing 6 changed files with 207 additions and 4 deletions.
diff --git a/ant/lib/slim-icu.py b/ant/lib/slim-icu.py
@@ -0,0 +1,94 @@
+# Download icu4j source code, build using ant,
+# it will generate icu4j.jar and icu4j-charset.jar
+# Run slim-icu.py to generate slim version
+# Currently this script will only keep Arabic and English data
+
+# slim ICU
+import sys
+import os
+from pathlib import Path
+import zipfile
+from zipfile import ZipFile
+
+directory = str(Path(__file__).resolve().parent)
+if len(sys.argv) > 1:
+    directory = sys.argv[1]
+
+mode = zipfile.ZIP_DEFLATED
+
+
+def keep_file(filename):
+    # skip all break iterators
+    if filename.endswith(".brk") \
+            or filename.endswith(".dict") \
+            or filename.endswith("unames.icu") \
+            or filename.endswith("ucadata.icu") \
+            or filename.endswith(".spp"):
+        return False
+
+    # keep english and arabic
+    if filename.startswith("en") \
+            or filename.startswith("ar") \
+            or not filename.endswith(".res"):
+        return True
+
+    return False
+
+
+zin = ZipFile(os.path.join(directory, 'icu4j.jar'), 'r')
+zout = ZipFile(os.path.join(directory, 'icu4j-slim.jar'), 'w', mode)
+
+for item in zin.infolist():
+    buff = zin.read(item.filename)
+    print(item.filename)
+
+    if keep_file(item.filename):
+        print("Keep")
+        zout.writestr(item, buff)
+    else:
+        print("Remove")
+
+zout.close()
+zin.close()
+
+
+def keep_charset_file(filename):
+    to_remove = [
+        "cns-11643-1992.cnv",
+        "ebcdic-xml-us.cnv",
+        "euc-jp-2007.cnv",
+        "euc-tw-2014.cnv",
+        "gb18030.cnv",
+        "ibm-1363_P11B-1998.cnv",
+        "ibm-1364_P110-2007.cnv",
+        "ibm-1371_P100-1999.cnv",
+        "ibm-1373_P100-2002.cnv",
+        "ibm-1375_P100-2008.cnv",
+        "ibm-1383_P110-1999.cnv",
+        "ibm-1386_P100-2001.cnv",
+        "ibm-1388_P103-2001.cnv",
+        "ibm-1390_P110-2003.cnv"
+    ]
+
+    for i in to_remove:
+        if i in filename:
+            return False
+
+    return True
+
+
+zin = ZipFile(os.path.join(directory, 'icu4j-charset.jar'), 'r')
+zout = ZipFile(os.path.join(directory, 'icu4j-charset-slim.jar'), 'w', mode)
+
+for item in zin.infolist():
+    buff = zin.read(item.filename)
+    print(item.filename, end=' ')
+
+    if keep_charset_file(item.filename):
+        print("Keep")
+        zout.writestr(item, buff)
+    else:
+        print("Remove")
+
+zout.close()
+zin.close()
diff --git a/build.xml b/build.xml
@@ -113,7 +113,7 @@
         <java jar="${dist.jar}" fork="true" outputproperty="build.version">
             <arg value="--version"/>
         </java>
-		
+
         <!-- Fallback to a bogus version number if the above command failed -->
         <property name="build.version" value="0.0.0" />
         <echo>Version ${build.version}</echo>
@@ -170,6 +170,12 @@
         </copy>
     </target>
 
+    <target name="distill-icu" depends="init">
+        <exec executable="python">
+            <arg line="ant/lib/slim-icu.py ${lib.dir}/charsets"/>
+        </exec>
+    </target>
+
     <!--
     ################################################################
     #               Prepackage Steps - All Platforms               #

diff --git a/lib/charsets/icu4j-charset-slim.jar b/lib/charsets/icu4j-charset-slim.jar
diff --git a/lib/charsets/icu4j-slim.jar b/lib/charsets/icu4j-slim.jar
diff --git a/src/qz/printer/action/PrintRaw.java b/src/qz/printer/action/PrintRaw.java
@@ -9,6 +9,7 @@
  */
 package qz.printer.action;
 
+import com.ibm.icu.text.ArabicShapingException;
 import org.apache.commons.lang3.StringUtils;
 import org.apache.commons.ssl.Base64;
 import org.codehaus.jettison.json.JSONArray;
@@ -43,7 +44,6 @@
 import java.util.Locale;
 import java.util.concurrent.atomic.AtomicBoolean;
 
-
 /**
  * Sends raw data to the printer, overriding your operating system's print
  * driver. Most useful for printers such as zebra card or barcode printers.
@@ -56,7 +56,7 @@ public class PrintRaw implements PrintProcessor {
 
     private ByteArrayBuilder commands;
 
-    String encoding = null;
+    private String encoding = null;
 
 
     public PrintRaw() {
@@ -68,6 +68,20 @@ public PrintingUtilities.Type getType() {
         return PrintingUtilities.Type.RAW;
     }
 
+    private byte[] getBytes(String str, String encoding) throws ArabicShapingException, IOException {
+        switch(encoding.toLowerCase()) {
+            case "ibm864":
+            case "cp864":
+            case "csibm864":
+            case "864":
+            case "ibm-864":
+                return ArabicConversionUtilities.convertToIBM864(str);
+            default:
+                return str.getBytes(encoding);
+        }
+    }
+
+
     @Override
     public void parseData(JSONArray printData, PrintOptions options) throws JSONException, UnsupportedOperationException {
         for(int i = 0; i < printData.length(); i++) {
@@ -107,7 +121,7 @@ public void parseData(JSONArray printData, PrintOptions options) throws JSONExce
                         break;
                     case PLAIN:
                     default:
-                        commands.append(cmd.getBytes(encoding));
+                        commands.append(getBytes(cmd, encoding));
                         break;
                 }
             }

diff --git a/src/qz/utils/ArabicConversionUtilities.java b/src/qz/utils/ArabicConversionUtilities.java
@@ -0,0 +1,89 @@
+package qz.utils;
+
+import com.ibm.icu.charset.CharsetEncoderICU;
+import com.ibm.icu.charset.CharsetProviderICU;
+import com.ibm.icu.text.ArabicShaping;
+import com.ibm.icu.text.ArabicShapingException;
+import com.ibm.icu.text.Bidi;
+
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.CharacterCodingException;
+import java.nio.charset.Charset;
+import java.nio.charset.CoderResult;
+import java.nio.charset.StandardCharsets;
+
+/**
+ * Created by Yohanes Nugroho on 7/10/2018.
+ */
+public class ArabicConversionUtilities {
+
+    /**
+     * This is the simplest and most reliable method:
+     * If all characters on input string does not contain any Arabic letters then return it as it is,
+     * otherwise do special Arabic text conversion
+     * <p>
+     * To send data to printer, we need to split the commands from the text, eg:<br/>
+     * {@code var data = ['\x1b\x41\x42', "Arabic text to print", '\x1b\x42x53', "Other texts"]}
+     *
+     * @param escp_or_text a String that contains only ESC/P code or only text
+     * @return encoded bytes
+     */
+    public static byte[] convertToIBM864(String escp_or_text) throws CharacterCodingException, ArabicShapingException {
+        boolean allAscii = true;
+        for(int i = 0; i < escp_or_text.length(); i++) {
+            //https://wiki.sei.cmu.edu/confluence/display/java/STR01-J.+Do+not+assume+that+a+Java+char+fully+represents+a+Unicode+code+point
+            int ch = escp_or_text.codePointAt(i);
+            if (ch > 255) {
+                allAscii = false;
+            }
+        }
+
+        if (allAscii) {
+            //we use 'ISO-8859-1' that will map bytes as it is
+            return escp_or_text.getBytes(StandardCharsets.ISO_8859_1);
+        } else {
+            //Layout the characters from logical order to visual ordering
+            Bidi para = new Bidi();
+            para.setPara(escp_or_text, Bidi.LEVEL_DEFAULT_LTR, null);
+            String data = para.writeReordered(Bidi.DO_MIRRORING);
+            return convertVisualOrderedToIBM864(data);
+        }
+    }
+
+    /**
+     * Shape a visual ordered Arabic string and then encode it in IBM864 encoding
+     *
+     * @param str input string
+     * @return encoded bytes
+     */
+    private static byte[] convertVisualOrderedToIBM864(String str) throws ArabicShapingException, CharacterCodingException {
+        //We shape the characters to map it to Unicode in FExx range
+        //Note that the output of Bidi is VISUAL_LTR, so we need the flag: ArabicShaping.TEXT_DIRECTION_VISUAL_LTR)
+        ArabicShaping as = new ArabicShaping(ArabicShaping.LETTERS_SHAPE | ArabicShaping.TEXT_DIRECTION_VISUAL_LTR | ArabicShaping.LENGTH_GROW_SHRINK);
+        String shaped = as.shape(str);
+
+        //then we need to convert it to IBM864 using ICU Encoder
+        CharsetProviderICU icu = new CharsetProviderICU();
+        Charset cs = icu.charsetForName("IBM864");
+        CharsetEncoderICU icuc = (CharsetEncoderICU)cs.newEncoder();
+
+        //We need to use fallback for some character forms that can not be found
+        icuc.setFallbackUsed(true);
+        ByteBuffer output = ByteBuffer.allocate(shaped.length() * 2);
+        CharBuffer inp = CharBuffer.wrap(shaped);
+        CoderResult res = icuc.encode(inp, output, true);
+        if (res.isError()) {
+            res.throwException();
+        }
+
+        int length = output.position();
+        byte all[] = output.array();
+
+        byte out[] = new byte[length];
+        System.arraycopy(all, 0, out, 0, length);
+
+        return out;
+    }
+
+}