-
Notifications
You must be signed in to change notification settings - Fork 0
/
eb-voice-anki.wok
54 lines (47 loc) · 2 KB
/
eb-voice-anki.wok
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
#!/usr/bin/wok -f
/** eb-voice-anki.wok
*
* Usage:
* java -jar wok-0.1.0.jar -f eb-voice-anki.wok -v src=0 -v dst=1 -v idx=0 -v update=true -v@rawstr dic=path_to_epwing_dir -v@rawstr media=path_to_media_dir input > output
*
* Option:
* -v src: The index number of the source column.
* -v dst: The index number of the destination column.
* -v idx: The index number of the target voice of the query result.
* -v@rawstr dic: Path to a directory containing Epwing's CATALOGS file.
* -v@rawstr media: Path to a directory to be used as the output destination of voice files.
* -v update: Whether or not update all cells of the column specified by `dst`.
*
* Note: This script has a dependency for a funcion to decode BASE64, only exists in Sun's Java Runtime Environment.
*/
FS = '\t'
FQ = Quote.Min
OFS = '\t'
OFQ = Quote.All
val eb = "ebquery-0.3.1.jar"
val tag = """<audio class="ebsd" controls="controls" src="data:audio/x-wav;base64,([^"]+)">""".r
val b64 = new sun.misc.BASE64Decoder()
val md = java.security.MessageDigest.getInstance("md5")
if (eb nonExistent)
eb write Resource.fromURL("https://bitbucket.org/rubyu/ebquery/downloads/ebquery-0.3.1.jar").bytes
if (media nonExistent)
media.createDirectory()
def query(s: String) = tag.findAllMatchIn(Seq("java", "-Dfile.encoding=UTF-8", "-jar", eb, "-d", dic, "-f", "html", "-m", "sd", s).!>.string)
.zipWithIndex .filter(_._2 == idx)
.map { case (tag(data), i) =>
val bytes = b64 decodeBuffer data
md.reset()
md update bytes
val hash = md.digest().map("%02X" format _).mkString
val file = media / s"$hash.wav"
if (file nonExistent)
file write bytes
s"""[sound:${file.name}]"""
}
.toList.headOption getOrElse ""
In { _
.filter (_ isDefinedAt src)
.map (_.padTo(dst+1, ""))
.map (row => if (update || row(dst).isEmpty) row.updated(dst, query(row(src))) else row)
.foreach (row => println(row: _*))
}