-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Improve performance on encoder and decoder
- Loading branch information
1 parent
ee9c863
commit cadab0d
Showing
29 changed files
with
602 additions
and
68 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
// | ||
// Array+PrevCurrent.swift | ||
// | ||
// | ||
// Created by Alberto Espinilla Garrido on 4/3/23. | ||
// | ||
|
||
import Foundation | ||
|
||
extension Array { | ||
func prevCurrent<T>(_ body: (Element, Element) throws -> T) rethrows -> [T] { | ||
enumerated().compactMap({ index, element in | ||
guard index > 0 else { return nil } | ||
let prev = self[index-1] | ||
return try? body(prev, element) | ||
}) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
// | ||
// Array+Unique.swift | ||
// | ||
// | ||
// Created by Alberto Espinilla Garrido on 4/3/23. | ||
// | ||
|
||
import Foundation | ||
|
||
extension Array where Element: Hashable { | ||
var unique: [Element] { | ||
Set(self).toArray | ||
} | ||
} |
14 changes: 14 additions & 0 deletions
14
Sources/GPT3 Tokenizer/Extensions/Dictionary+Inverted.swift
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
// | ||
// Dictionary+Inverted.swift | ||
// | ||
// | ||
// Created by Alberto Espinilla Garrido on 3/3/23. | ||
// | ||
|
||
import Foundation | ||
|
||
extension Dictionary where Key: Hashable, Value: Hashable { | ||
var inverted: Dictionary<Value, Key> { | ||
Dictionary<Value, Key>(uniqueKeysWithValues: lazy.map { ($0.value, $0.key) }) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
// | ||
// Set+ToArray.swift | ||
// | ||
// | ||
// Created by Alberto Espinilla Garrido on 4/3/23. | ||
// | ||
|
||
import Foundation | ||
|
||
extension Set { | ||
var toArray: [Element] { | ||
Array(self) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
// | ||
// String+Encoders.swift | ||
// | ||
// | ||
// Created by Alberto Espinilla Garrido on 4/3/23. | ||
// | ||
|
||
import Foundation | ||
|
||
extension String { | ||
func bytesToUnicode(encoder: [Int: String]) -> String { | ||
utf8.compactMap({ | ||
let value = Int($0) | ||
return encoder[value] | ||
}) | ||
.joined() | ||
} | ||
|
||
func encode(encoder: [String: Int]) -> [Int] { | ||
splitWords.compactMap({ encoder[$0] }) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
// | ||
// String+Split.swift | ||
// | ||
// | ||
// Created by Alberto Espinilla Garrido on 4/3/23. | ||
// | ||
|
||
import Foundation | ||
|
||
extension String { | ||
var splitWords: [String] { | ||
split(separator: " ", omittingEmptySubsequences: true).toString | ||
} | ||
} | ||
|
||
extension Array where Element == Substring { | ||
var toString: [String] { | ||
map({ String($0) }) | ||
} | ||
} |
28 changes: 28 additions & 0 deletions
28
Sources/GPT3 Tokenizer/FileReader/Decoder/BpeRanksDecoder.swift
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
// | ||
// BpeRanksDecoder.swift | ||
// | ||
// | ||
// Created by Alberto Espinilla Garrido on 5/3/23. | ||
// | ||
|
||
import Foundation | ||
|
||
protocol BpeRanksDecoder { | ||
func decode(from data: Data) throws -> [Pairs: Int] | ||
} | ||
|
||
struct BpeRanksDecoderImpl: BpeRanksDecoder { | ||
func decode(from data: Data) throws -> [Pairs: Int] { | ||
guard let vocab = String(data: data, encoding: .utf8) | ||
else { return [:] } | ||
|
||
return vocab.split(separator: "\n", omittingEmptySubsequences: true) | ||
.compactMap({ | ||
let line = String($0).splitWords | ||
guard let first = line.first, let last = line.last else { return nil } | ||
return Pairs(first: first, second: last) | ||
}) | ||
.enumerated() | ||
.reduce(into: [:]) { $0[$1.element] = $1.offset } | ||
} | ||
} |
24 changes: 24 additions & 0 deletions
24
Sources/GPT3 Tokenizer/FileReader/Decoder/TableCodeDecoder.swift
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
// | ||
// TableCodeDecoder.swift | ||
// | ||
// | ||
// Created by Alberto Espinilla Garrido on 5/3/23. | ||
// | ||
|
||
import Foundation | ||
|
||
protocol TableCodeDecoder { | ||
func decode(from data: Data) throws -> [String: Int] | ||
} | ||
|
||
struct TableCodeDecoderImpl: TableCodeDecoder { | ||
private let decoder: JSONDecoder | ||
|
||
init(decoder: JSONDecoder = .init()) { | ||
self.decoder = decoder | ||
} | ||
|
||
func decode(from data: Data) throws -> [String: Int] { | ||
try decoder.decode([String: Int].self, from: data) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.