Skip to content

Commit

Permalink
IBM-864 Arabic support
Browse files Browse the repository at this point in the history
Arabic support
  • Loading branch information
tresf authored Aug 6, 2018
1 parent 56be1bd commit 8f2e617
Show file tree
Hide file tree
Showing 6 changed files with 207 additions and 4 deletions.
94 changes: 94 additions & 0 deletions ant/lib/slim-icu.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
# Download icu4j source code, build using ant,
# it will generate icu4j.jar and icu4j-charset.jar
# Run slim-icu.py to generate slim version
# Currently this script will only keep Arabic and English data

# slim ICU
import sys
import os
from pathlib import Path
import zipfile
from zipfile import ZipFile

directory = str(Path(__file__).resolve().parent)
if len(sys.argv) > 1:
directory = sys.argv[1]

mode = zipfile.ZIP_DEFLATED


def keep_file(filename):
# skip all break iterators
if filename.endswith(".brk") \
or filename.endswith(".dict") \
or filename.endswith("unames.icu") \
or filename.endswith("ucadata.icu") \
or filename.endswith(".spp"):
return False

# keep english and arabic
if filename.startswith("en") \
or filename.startswith("ar") \
or not filename.endswith(".res"):
return True

return False


zin = ZipFile(os.path.join(directory, 'icu4j.jar'), 'r')
zout = ZipFile(os.path.join(directory, 'icu4j-slim.jar'), 'w', mode)

for item in zin.infolist():
buff = zin.read(item.filename)
print(item.filename)

if keep_file(item.filename):
print("Keep")
zout.writestr(item, buff)
else:
print("Remove")

zout.close()
zin.close()


def keep_charset_file(filename):
to_remove = [
"cns-11643-1992.cnv",
"ebcdic-xml-us.cnv",
"euc-jp-2007.cnv",
"euc-tw-2014.cnv",
"gb18030.cnv",
"ibm-1363_P11B-1998.cnv",
"ibm-1364_P110-2007.cnv",
"ibm-1371_P100-1999.cnv",
"ibm-1373_P100-2002.cnv",
"ibm-1375_P100-2008.cnv",
"ibm-1383_P110-1999.cnv",
"ibm-1386_P100-2001.cnv",
"ibm-1388_P103-2001.cnv",
"ibm-1390_P110-2003.cnv"
]

for i in to_remove:
if i in filename:
return False

return True


zin = ZipFile(os.path.join(directory, 'icu4j-charset.jar'), 'r')
zout = ZipFile(os.path.join(directory, 'icu4j-charset-slim.jar'), 'w', mode)

for item in zin.infolist():
buff = zin.read(item.filename)
print(item.filename, end=' ')

if keep_charset_file(item.filename):
print("Keep")
zout.writestr(item, buff)
else:
print("Remove")

zout.close()
zin.close()
8 changes: 7 additions & 1 deletion build.xml
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@
<java jar="${dist.jar}" fork="true" outputproperty="build.version">
<arg value="--version"/>
</java>

<!-- Fallback to a bogus version number if the above command failed -->
<property name="build.version" value="0.0.0" />
<echo>Version ${build.version}</echo>
Expand Down Expand Up @@ -170,6 +170,12 @@
</copy>
</target>

<target name="distill-icu" depends="init">
<exec executable="python">
<arg line="ant/lib/slim-icu.py ${lib.dir}/charsets"/>
</exec>
</target>

<!--
################################################################
# Prepackage Steps - All Platforms #
Expand Down
Binary file added lib/charsets/icu4j-charset-slim.jar
Binary file not shown.
Binary file added lib/charsets/icu4j-slim.jar
Binary file not shown.
20 changes: 17 additions & 3 deletions src/qz/printer/action/PrintRaw.java
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
*/
package qz.printer.action;

import com.ibm.icu.text.ArabicShapingException;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.ssl.Base64;
import org.codehaus.jettison.json.JSONArray;
Expand Down Expand Up @@ -43,7 +44,6 @@
import java.util.Locale;
import java.util.concurrent.atomic.AtomicBoolean;


/**
* Sends raw data to the printer, overriding your operating system's print
* driver. Most useful for printers such as zebra card or barcode printers.
Expand All @@ -56,7 +56,7 @@ public class PrintRaw implements PrintProcessor {

private ByteArrayBuilder commands;

String encoding = null;
private String encoding = null;


public PrintRaw() {
Expand All @@ -68,6 +68,20 @@ public PrintingUtilities.Type getType() {
return PrintingUtilities.Type.RAW;
}

private byte[] getBytes(String str, String encoding) throws ArabicShapingException, IOException {
switch(encoding.toLowerCase()) {
case "ibm864":
case "cp864":
case "csibm864":
case "864":
case "ibm-864":
return ArabicConversionUtilities.convertToIBM864(str);
default:
return str.getBytes(encoding);
}
}


@Override
public void parseData(JSONArray printData, PrintOptions options) throws JSONException, UnsupportedOperationException {
for(int i = 0; i < printData.length(); i++) {
Expand Down Expand Up @@ -107,7 +121,7 @@ public void parseData(JSONArray printData, PrintOptions options) throws JSONExce
break;
case PLAIN:
default:
commands.append(cmd.getBytes(encoding));
commands.append(getBytes(cmd, encoding));
break;
}
}
Expand Down
89 changes: 89 additions & 0 deletions src/qz/utils/ArabicConversionUtilities.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
package qz.utils;

import com.ibm.icu.charset.CharsetEncoderICU;
import com.ibm.icu.charset.CharsetProviderICU;
import com.ibm.icu.text.ArabicShaping;
import com.ibm.icu.text.ArabicShapingException;
import com.ibm.icu.text.Bidi;

import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.CharacterCodingException;
import java.nio.charset.Charset;
import java.nio.charset.CoderResult;
import java.nio.charset.StandardCharsets;

/**
* Created by Yohanes Nugroho on 7/10/2018.
*/
public class ArabicConversionUtilities {

/**
* This is the simplest and most reliable method:
* If all characters on input string does not contain any Arabic letters then return it as it is,
* otherwise do special Arabic text conversion
* <p>
* To send data to printer, we need to split the commands from the text, eg:<br/>
* {@code var data = ['\x1b\x41\x42', "Arabic text to print", '\x1b\x42x53', "Other texts"]}
*
* @param escp_or_text a String that contains only ESC/P code or only text
* @return encoded bytes
*/
public static byte[] convertToIBM864(String escp_or_text) throws CharacterCodingException, ArabicShapingException {
boolean allAscii = true;
for(int i = 0; i < escp_or_text.length(); i++) {
//https://wiki.sei.cmu.edu/confluence/display/java/STR01-J.+Do+not+assume+that+a+Java+char+fully+represents+a+Unicode+code+point
int ch = escp_or_text.codePointAt(i);
if (ch > 255) {
allAscii = false;
}
}

if (allAscii) {
//we use 'ISO-8859-1' that will map bytes as it is
return escp_or_text.getBytes(StandardCharsets.ISO_8859_1);
} else {
//Layout the characters from logical order to visual ordering
Bidi para = new Bidi();
para.setPara(escp_or_text, Bidi.LEVEL_DEFAULT_LTR, null);
String data = para.writeReordered(Bidi.DO_MIRRORING);
return convertVisualOrderedToIBM864(data);
}
}

/**
* Shape a visual ordered Arabic string and then encode it in IBM864 encoding
*
* @param str input string
* @return encoded bytes
*/
private static byte[] convertVisualOrderedToIBM864(String str) throws ArabicShapingException, CharacterCodingException {
//We shape the characters to map it to Unicode in FExx range
//Note that the output of Bidi is VISUAL_LTR, so we need the flag: ArabicShaping.TEXT_DIRECTION_VISUAL_LTR)
ArabicShaping as = new ArabicShaping(ArabicShaping.LETTERS_SHAPE | ArabicShaping.TEXT_DIRECTION_VISUAL_LTR | ArabicShaping.LENGTH_GROW_SHRINK);
String shaped = as.shape(str);

//then we need to convert it to IBM864 using ICU Encoder
CharsetProviderICU icu = new CharsetProviderICU();
Charset cs = icu.charsetForName("IBM864");
CharsetEncoderICU icuc = (CharsetEncoderICU)cs.newEncoder();

//We need to use fallback for some character forms that can not be found
icuc.setFallbackUsed(true);
ByteBuffer output = ByteBuffer.allocate(shaped.length() * 2);
CharBuffer inp = CharBuffer.wrap(shaped);
CoderResult res = icuc.encode(inp, output, true);
if (res.isError()) {
res.throwException();
}

int length = output.position();
byte all[] = output.array();

byte out[] = new byte[length];
System.arraycopy(all, 0, out, 0, length);

return out;
}

}

0 comments on commit 8f2e617

Please sign in to comment.