diff --git a/go.mod b/go.mod
index de0789ce..2e2f94e7 100644
--- a/go.mod
+++ b/go.mod
@@ -26,9 +26,9 @@ require (
github.com/stretchr/testify v1.9.0
github.com/stvp/rollbar v0.5.1
github.com/urfave/cli/v2 v2.27.4
- golang.org/x/crypto v0.26.0
- golang.org/x/term v0.23.0
- golang.org/x/text v0.17.0
+ golang.org/x/crypto v0.27.0
+ golang.org/x/term v0.24.0
+ golang.org/x/text v0.18.0
gopkg.in/errgo.v1 v1.0.1
)
@@ -38,8 +38,8 @@ require (
github.com/ProtonMail/go-crypto v1.1.0-alpha.5-proton // indirect
github.com/VividCortex/ewma v1.2.0 // indirect
github.com/cloudflare/circl v1.4.0 // indirect
- github.com/cpuguy83/go-md2man/v2 v2.0.4 // indirect
- github.com/cyphar/filepath-securejoin v0.3.1 // indirect
+ github.com/cpuguy83/go-md2man/v2 v2.0.5 // indirect
+ github.com/cyphar/filepath-securejoin v0.3.3 // indirect
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/emirpasic/gods v1.18.1 // indirect
github.com/go-git/gcfg v1.5.1-0.20230307220236-3a3c6141e376 // indirect
@@ -64,8 +64,8 @@ require (
github.com/skeema/knownhosts v1.3.0 // indirect
github.com/xanzy/ssh-agent v0.3.3 // indirect
github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 // indirect
- golang.org/x/net v0.28.0 // indirect
- golang.org/x/sys v0.23.0 // indirect
+ golang.org/x/net v0.29.0 // indirect
+ golang.org/x/sys v0.25.0 // indirect
gopkg.in/warnings.v0 v0.1.2 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
)
diff --git a/go.sum b/go.sum
index 638ae209..e6680aab 100644
--- a/go.sum
+++ b/go.sum
@@ -31,12 +31,12 @@ github.com/cheggaaa/pb/v3 v3.1.5 h1:QuuUzeM2WsAqG2gMqtzaWithDJv0i+i6UlnwSCI4QLk=
github.com/cheggaaa/pb/v3 v3.1.5/go.mod h1:CrxkeghYTXi1lQBEI7jSn+3svI3cuc19haAj6jM60XI=
github.com/cloudflare/circl v1.4.0 h1:BV7h5MgrktNzytKmWjpOtdYrf0lkkbF8YMlBGPhJQrY=
github.com/cloudflare/circl v1.4.0/go.mod h1:PDRU+oXvdD7KCtgKxW95M5Z8BpSCJXQORiZFnBQS5QU=
-github.com/cpuguy83/go-md2man/v2 v2.0.4 h1:wfIWP927BUkWJb2NmU/kNDYIBTh/ziUX91+lVfRxZq4=
-github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
+github.com/cpuguy83/go-md2man/v2 v2.0.5 h1:ZtcqGrnekaHpVLArFSe4HK5DoKx1T0rq2DwVB0alcyc=
+github.com/cpuguy83/go-md2man/v2 v2.0.5/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
github.com/creack/pty v1.1.17 h1:QeVUsEDNrLBW4tMgZHvxy18sKtr6VI492kBhUfhDJNI=
github.com/creack/pty v1.1.17/go.mod h1:MOBLtS5ELjhRRrroQr9kyvTxUAFNvYEK993ew/Vr4O4=
-github.com/cyphar/filepath-securejoin v0.3.1 h1:1V7cHiaW+C+39wEfpH6XlLBQo3j/PciWFrgfCLS8XrE=
-github.com/cyphar/filepath-securejoin v0.3.1/go.mod h1:F7i41x/9cBF7lzCrVsYs9fuzwRZm4NQsGTBdpp6mETc=
+github.com/cyphar/filepath-securejoin v0.3.3 h1:lofZkCEVFIBe0KcdQOzFs8Soy9oaHOWl4gGtPI+gCFc=
+github.com/cyphar/filepath-securejoin v0.3.3/go.mod h1:8s/MCNJREmFK0H02MF6Ihv1nakJe4L/w3WZLHNkvlYM=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
@@ -157,8 +157,8 @@ golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACk
golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
golang.org/x/crypto v0.0.0-20220622213112-05595931fe9d/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4=
-golang.org/x/crypto v0.26.0 h1:RrRspgV4mU+YwB4FYnuBoKsUapNIL5cohGAmSH3azsw=
-golang.org/x/crypto v0.26.0/go.mod h1:GY7jblb9wI+FOo5y8/S2oY4zWP07AkOJ4+jxCqdqn54=
+golang.org/x/crypto v0.27.0 h1:GXm2NjJrPaiv/h1tb2UH8QfgC/hOf/+z0p6PT8o1w7A=
+golang.org/x/crypto v0.27.0/go.mod h1:1Xngt8kV6Dvbssa53Ziq6Eqn0HqbZi5Z6R0ZpwQzt70=
golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
@@ -167,8 +167,8 @@ golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v
golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM=
golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
-golang.org/x/net v0.28.0 h1:a9JDOJc5GMUJ0+UDqmLT86WiEy7iWyIhz8gz8E4e5hE=
-golang.org/x/net v0.28.0/go.mod h1:yqtgsTWOOnlGLG9GFRrK3++bGOUEkNBoHZc8MEDWPNg=
+golang.org/x/net v0.29.0 h1:5ORfpBpCs4HzDYoodCDBbwHzdR5UrLBZ3sOnUJmFoHo=
+golang.org/x/net v0.29.0/go.mod h1:gLkgy8jTGERgjzMic6DS9+SP0ajcu6Xu3Orq/SpETg0=
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
@@ -188,19 +188,19 @@ golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBc
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.23.0 h1:YfKFowiIMvtgl1UERQoTPPToxltDeZfbj4H7dVUCwmM=
-golang.org/x/sys v0.23.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
+golang.org/x/sys v0.25.0 h1:r+8e+loiHxRqhXVl6ML1nO3l1+oFoWbnlu2Ehimmi34=
+golang.org/x/sys v0.25.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
-golang.org/x/term v0.23.0 h1:F6D4vR+EHoL9/sWAWgAR1H2DcHr4PareCbAaCo1RpuU=
-golang.org/x/term v0.23.0/go.mod h1:DgV24QBUrK6jhZXl+20l6UWznPlwAHm1Q1mGHtydmSk=
+golang.org/x/term v0.24.0 h1:Mh5cbb+Zk2hqqXNO7S1iTjEphVL+jb8ZWaqh/g+JWkM=
+golang.org/x/term v0.24.0/go.mod h1:lOBK/LVxemqiMij05LGJ0tzNr8xlmwBRJ81PX6wVLH8=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
golang.org/x/text v0.4.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
-golang.org/x/text v0.17.0 h1:XtiM5bkSOt+ewxlOE/aE/AKEHibwj/6gvWMl9Rsh0Qc=
-golang.org/x/text v0.17.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY=
+golang.org/x/text v0.18.0 h1:XvMDiNzPAl0jr17s6W9lcaIhGUfUORdGCNsuLmPG224=
+golang.org/x/text v0.18.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.1.1/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk=
diff --git a/vendor/github.com/cpuguy83/go-md2man/v2/md2man/debug.go b/vendor/github.com/cpuguy83/go-md2man/v2/md2man/debug.go
new file mode 100644
index 00000000..0ec4b12c
--- /dev/null
+++ b/vendor/github.com/cpuguy83/go-md2man/v2/md2man/debug.go
@@ -0,0 +1,62 @@
+package md2man
+
+import (
+ "fmt"
+ "io"
+ "os"
+ "strings"
+
+ "github.com/russross/blackfriday/v2"
+)
+
+func fmtListFlags(flags blackfriday.ListType) string {
+ knownFlags := []struct {
+ name string
+ flag blackfriday.ListType
+ }{
+ {"ListTypeOrdered", blackfriday.ListTypeOrdered},
+ {"ListTypeDefinition", blackfriday.ListTypeDefinition},
+ {"ListTypeTerm", blackfriday.ListTypeTerm},
+ {"ListItemContainsBlock", blackfriday.ListItemContainsBlock},
+ {"ListItemBeginningOfList", blackfriday.ListItemBeginningOfList},
+ {"ListItemEndOfList", blackfriday.ListItemEndOfList},
+ }
+
+ var f []string
+ for _, kf := range knownFlags {
+ if flags&kf.flag != 0 {
+ f = append(f, kf.name)
+ flags &^= kf.flag
+ }
+ }
+ if flags != 0 {
+ f = append(f, fmt.Sprintf("Unknown(%#x)", flags))
+ }
+ return strings.Join(f, "|")
+}
+
+type debugDecorator struct {
+ blackfriday.Renderer
+}
+
+func depth(node *blackfriday.Node) int {
+ d := 0
+ for n := node.Parent; n != nil; n = n.Parent {
+ d++
+ }
+ return d
+}
+
+func (d *debugDecorator) RenderNode(w io.Writer, node *blackfriday.Node, entering bool) blackfriday.WalkStatus {
+ fmt.Fprintf(os.Stderr, "%s%s %v %v\n",
+ strings.Repeat(" ", depth(node)),
+ map[bool]string{true: "+", false: "-"}[entering],
+ node,
+ fmtListFlags(node.ListFlags))
+ var b strings.Builder
+ status := d.Renderer.RenderNode(io.MultiWriter(&b, w), node, entering)
+ if b.Len() > 0 {
+ fmt.Fprintf(os.Stderr, ">> %q\n", b.String())
+ }
+ return status
+}
diff --git a/vendor/github.com/cpuguy83/go-md2man/v2/md2man/md2man.go b/vendor/github.com/cpuguy83/go-md2man/v2/md2man/md2man.go
index 42bf32aa..62d91b77 100644
--- a/vendor/github.com/cpuguy83/go-md2man/v2/md2man/md2man.go
+++ b/vendor/github.com/cpuguy83/go-md2man/v2/md2man/md2man.go
@@ -1,16 +1,23 @@
package md2man
import (
+ "os"
+ "strconv"
+
"github.com/russross/blackfriday/v2"
)
// Render converts a markdown document into a roff formatted document.
func Render(doc []byte) []byte {
renderer := NewRoffRenderer()
+ var r blackfriday.Renderer = renderer
+ if v, _ := strconv.ParseBool(os.Getenv("MD2MAN_DEBUG")); v {
+ r = &debugDecorator{Renderer: r}
+ }
return blackfriday.Run(doc,
[]blackfriday.Option{
- blackfriday.WithRenderer(renderer),
+ blackfriday.WithRenderer(r),
blackfriday.WithExtensions(renderer.GetExtensions()),
}...)
}
diff --git a/vendor/github.com/cpuguy83/go-md2man/v2/md2man/roff.go b/vendor/github.com/cpuguy83/go-md2man/v2/md2man/roff.go
index 8a290f19..9d6c473f 100644
--- a/vendor/github.com/cpuguy83/go-md2man/v2/md2man/roff.go
+++ b/vendor/github.com/cpuguy83/go-md2man/v2/md2man/roff.go
@@ -14,10 +14,8 @@ import (
// roffRenderer implements the blackfriday.Renderer interface for creating
// roff format (manpages) from markdown text
type roffRenderer struct {
- extensions blackfriday.Extensions
listCounters []int
firstHeader bool
- firstDD bool
listDepth int
}
@@ -43,7 +41,7 @@ const (
quoteTag = "\n.PP\n.RS\n"
quoteCloseTag = "\n.RE\n"
listTag = "\n.RS\n"
- listCloseTag = "\n.RE\n"
+ listCloseTag = ".RE\n"
dtTag = "\n.TP\n"
dd2Tag = "\n"
tableStart = "\n.TS\nallbox;\n"
@@ -56,23 +54,18 @@ const (
// NewRoffRenderer creates a new blackfriday Renderer for generating roff documents
// from markdown
func NewRoffRenderer() *roffRenderer { // nolint: golint
- var extensions blackfriday.Extensions
-
- extensions |= blackfriday.NoIntraEmphasis
- extensions |= blackfriday.Tables
- extensions |= blackfriday.FencedCode
- extensions |= blackfriday.SpaceHeadings
- extensions |= blackfriday.Footnotes
- extensions |= blackfriday.Titleblock
- extensions |= blackfriday.DefinitionLists
- return &roffRenderer{
- extensions: extensions,
- }
+ return &roffRenderer{}
}
// GetExtensions returns the list of extensions used by this renderer implementation
-func (r *roffRenderer) GetExtensions() blackfriday.Extensions {
- return r.extensions
+func (*roffRenderer) GetExtensions() blackfriday.Extensions {
+ return blackfriday.NoIntraEmphasis |
+ blackfriday.Tables |
+ blackfriday.FencedCode |
+ blackfriday.SpaceHeadings |
+ blackfriday.Footnotes |
+ blackfriday.Titleblock |
+ blackfriday.DefinitionLists
}
// RenderHeader handles outputting the header at document start
@@ -103,7 +96,23 @@ func (r *roffRenderer) RenderNode(w io.Writer, node *blackfriday.Node, entering
switch node.Type {
case blackfriday.Text:
- escapeSpecialChars(w, node.Literal)
+ // Special case: format the NAME section as required for proper whatis parsing.
+ // Refer to the lexgrog(1) and groff_man(7) manual pages for details.
+ if node.Parent != nil &&
+ node.Parent.Type == blackfriday.Paragraph &&
+ node.Parent.Prev != nil &&
+ node.Parent.Prev.Type == blackfriday.Heading &&
+ node.Parent.Prev.FirstChild != nil &&
+ bytes.EqualFold(node.Parent.Prev.FirstChild.Literal, []byte("NAME")) {
+ before, after, found := bytes.Cut(node.Literal, []byte(" - "))
+ escapeSpecialChars(w, before)
+ if found {
+ out(w, ` \- `)
+ escapeSpecialChars(w, after)
+ }
+ } else {
+ escapeSpecialChars(w, node.Literal)
+ }
case blackfriday.Softbreak:
out(w, crTag)
case blackfriday.Hardbreak:
@@ -141,14 +150,25 @@ func (r *roffRenderer) RenderNode(w io.Writer, node *blackfriday.Node, entering
case blackfriday.Document:
break
case blackfriday.Paragraph:
- // roff .PP markers break lists
- if r.listDepth > 0 {
- return blackfriday.GoToNext
- }
if entering {
- out(w, paraTag)
+ if r.listDepth > 0 {
+ // roff .PP markers break lists
+ if node.Prev != nil { // continued paragraph
+ if node.Prev.Type == blackfriday.List && node.Prev.ListFlags&blackfriday.ListTypeDefinition == 0 {
+ out(w, ".IP\n")
+ } else {
+ out(w, crTag)
+ }
+ }
+ } else if node.Prev != nil && node.Prev.Type == blackfriday.Heading {
+ out(w, crTag)
+ } else {
+ out(w, paraTag)
+ }
} else {
- out(w, crTag)
+ if node.Next == nil || node.Next.Type != blackfriday.List {
+ out(w, crTag)
+ }
}
case blackfriday.BlockQuote:
if entering {
@@ -211,6 +231,10 @@ func (r *roffRenderer) handleHeading(w io.Writer, node *blackfriday.Node, enteri
func (r *roffRenderer) handleList(w io.Writer, node *blackfriday.Node, entering bool) {
openTag := listTag
closeTag := listCloseTag
+ if (entering && r.listDepth == 0) || (!entering && r.listDepth == 1) {
+ openTag = crTag
+ closeTag = ""
+ }
if node.ListFlags&blackfriday.ListTypeDefinition != 0 {
// tags for definition lists handled within Item node
openTag = ""
@@ -239,23 +263,25 @@ func (r *roffRenderer) handleItem(w io.Writer, node *blackfriday.Node, entering
} else if node.ListFlags&blackfriday.ListTypeTerm != 0 {
// DT (definition term): line just before DD (see below).
out(w, dtTag)
- r.firstDD = true
} else if node.ListFlags&blackfriday.ListTypeDefinition != 0 {
// DD (definition description): line that starts with ": ".
//
// We have to distinguish between the first DD and the
// subsequent ones, as there should be no vertical
// whitespace between the DT and the first DD.
- if r.firstDD {
- r.firstDD = false
- } else {
- out(w, dd2Tag)
+ if node.Prev != nil && node.Prev.ListFlags&(blackfriday.ListTypeTerm|blackfriday.ListTypeDefinition) == blackfriday.ListTypeDefinition {
+ if node.Prev.Type == blackfriday.Item &&
+ node.Prev.LastChild != nil &&
+ node.Prev.LastChild.Type == blackfriday.List &&
+ node.Prev.LastChild.ListFlags&blackfriday.ListTypeDefinition == 0 {
+ out(w, ".IP\n")
+ } else {
+ out(w, dd2Tag)
+ }
}
} else {
out(w, ".IP \\(bu 2\n")
}
- } else {
- out(w, "\n")
}
}
diff --git a/vendor/github.com/cyphar/filepath-securejoin/CHANGELOG.md b/vendor/github.com/cyphar/filepath-securejoin/CHANGELOG.md
index 7436896e..23f7bc7f 100644
--- a/vendor/github.com/cyphar/filepath-securejoin/CHANGELOG.md
+++ b/vendor/github.com/cyphar/filepath-securejoin/CHANGELOG.md
@@ -6,6 +6,36 @@ and this project adheres to [Semantic Versioning](http://semver.org/).
## [Unreleased] ##
+## [0.3.3] - 2024-09-30 ##
+
+### Fixed ###
+- The mode and owner verification logic in `MkdirAll` has been removed. This
+ was originally intended to protect against some theoretical attacks but upon
+ further consideration these protections don't actually buy us anything and
+ they were causing spurious errors with more complicated filesystem setups.
+- The "is the created directory empty" logic in `MkdirAll` has also been
+ removed. This was not causing us issues yet, but some pseudofilesystems (such
+ as `cgroup`) create non-empty directories and so this logic would've been
+ wrong for such cases.
+
+## [0.3.2] - 2024-09-13 ##
+
+### Changed ###
+- Passing the `S_ISUID` or `S_ISGID` modes to `MkdirAllInRoot` will now return
+ an explicit error saying that those bits are ignored by `mkdirat(2)`. In the
+ past a different error was returned, but since the silent ignoring behaviour
+ is codified in the man pages a more explicit error seems apt. While silently
+ ignoring these bits would be the most compatible option, it could lead to
+ users thinking their code sets these bits when it doesn't. Programs that need
+ to deal with compatibility can mask the bits themselves. (#23, #25)
+
+### Fixed ###
+- If a directory has `S_ISGID` set, then all child directories will have
+ `S_ISGID` set when created and a different gid will be used for any inode
+ created under the directory. Previously, the "expected owner and mode"
+ validation in `securejoin.MkdirAll` did not correctly handle this. We now
+ correctly handle this case. (#24, #25)
+
## [0.3.1] - 2024-07-23 ##
### Changed ###
@@ -127,7 +157,9 @@ This is our first release of `github.com/cyphar/filepath-securejoin`,
containing a full implementation with a coverage of 93.5% (the only missing
cases are the error cases, which are hard to mocktest at the moment).
-[Unreleased]: https://github.com/cyphar/filepath-securejoin/compare/v0.3.1...HEAD
+[Unreleased]: https://github.com/cyphar/filepath-securejoin/compare/v0.3.3...HEAD
+[0.3.3]: https://github.com/cyphar/filepath-securejoin/compare/v0.3.2...v0.3.3
+[0.3.2]: https://github.com/cyphar/filepath-securejoin/compare/v0.3.1...v0.3.2
[0.3.1]: https://github.com/cyphar/filepath-securejoin/compare/v0.3.0...v0.3.1
[0.3.0]: https://github.com/cyphar/filepath-securejoin/compare/v0.2.5...v0.3.0
[0.2.5]: https://github.com/cyphar/filepath-securejoin/compare/v0.2.4...v0.2.5
diff --git a/vendor/github.com/cyphar/filepath-securejoin/VERSION b/vendor/github.com/cyphar/filepath-securejoin/VERSION
index 9e11b32f..1c09c74e 100644
--- a/vendor/github.com/cyphar/filepath-securejoin/VERSION
+++ b/vendor/github.com/cyphar/filepath-securejoin/VERSION
@@ -1 +1 @@
-0.3.1
+0.3.3
diff --git a/vendor/github.com/cyphar/filepath-securejoin/join.go b/vendor/github.com/cyphar/filepath-securejoin/join.go
index bd86a48b..ca4ce1f2 100644
--- a/vendor/github.com/cyphar/filepath-securejoin/join.go
+++ b/vendor/github.com/cyphar/filepath-securejoin/join.go
@@ -22,27 +22,27 @@ const maxSymlinkLimit = 255
// IsNotExist tells you if err is an error that implies that either the path
// accessed does not exist (or path components don't exist). This is
-// effectively a more broad version of os.IsNotExist.
+// effectively a more broad version of [os.IsNotExist].
func IsNotExist(err error) bool {
// Check that it's not actually an ENOTDIR, which in some cases is a more
// convoluted case of ENOENT (usually involving weird paths).
return errors.Is(err, os.ErrNotExist) || errors.Is(err, syscall.ENOTDIR) || errors.Is(err, syscall.ENOENT)
}
-// SecureJoinVFS joins the two given path components (similar to Join) except
+// SecureJoinVFS joins the two given path components (similar to [filepath.Join]) except
// that the returned path is guaranteed to be scoped inside the provided root
// path (when evaluated). Any symbolic links in the path are evaluated with the
// given root treated as the root of the filesystem, similar to a chroot. The
-// filesystem state is evaluated through the given VFS interface (if nil, the
-// standard os.* family of functions are used).
+// filesystem state is evaluated through the given [VFS] interface (if nil, the
+// standard [os].* family of functions are used).
//
// Note that the guarantees provided by this function only apply if the path
// components in the returned string are not modified (in other words are not
// replaced with symlinks on the filesystem) after this function has returned.
-// Such a symlink race is necessarily out-of-scope of SecureJoin.
+// Such a symlink race is necessarily out-of-scope of SecureJoinVFS.
//
// NOTE: Due to the above limitation, Linux users are strongly encouraged to
-// use OpenInRoot instead, which does safely protect against these kinds of
+// use [OpenInRoot] instead, which does safely protect against these kinds of
// attacks. There is no way to solve this problem with SecureJoinVFS because
// the API is fundamentally wrong (you cannot return a "safe" path string and
// guarantee it won't be modified afterwards).
@@ -123,8 +123,8 @@ func SecureJoinVFS(root, unsafePath string, vfs VFS) (string, error) {
return filepath.Join(root, finalPath), nil
}
-// SecureJoin is a wrapper around SecureJoinVFS that just uses the os.* library
-// of functions as the VFS. If in doubt, use this function over SecureJoinVFS.
+// SecureJoin is a wrapper around [SecureJoinVFS] that just uses the [os].* library
+// of functions as the [VFS]. If in doubt, use this function over [SecureJoinVFS].
func SecureJoin(root, unsafePath string) (string, error) {
return SecureJoinVFS(root, unsafePath, nil)
}
diff --git a/vendor/github.com/cyphar/filepath-securejoin/mkdir_linux.go b/vendor/github.com/cyphar/filepath-securejoin/mkdir_linux.go
index ad2bd797..b5f67452 100644
--- a/vendor/github.com/cyphar/filepath-securejoin/mkdir_linux.go
+++ b/vendor/github.com/cyphar/filepath-securejoin/mkdir_linux.go
@@ -9,7 +9,6 @@ package securejoin
import (
"errors"
"fmt"
- "io"
"os"
"path/filepath"
"slices"
@@ -23,29 +22,36 @@ var (
errPossibleAttack = errors.New("possible attack detected")
)
-// MkdirAllHandle is equivalent to MkdirAll, except that it is safer to use in
-// two respects:
+// MkdirAllHandle is equivalent to [MkdirAll], except that it is safer to use
+// in two respects:
//
-// - The caller provides the root directory as an *os.File (preferably O_PATH)
+// - The caller provides the root directory as an *[os.File] (preferably O_PATH)
// handle. This means that the caller can be sure which root directory is
// being used. Note that this can be emulated by using /proc/self/fd/... as
-// the root path with MkdirAll.
+// the root path with [os.MkdirAll].
//
-// - Once all of the directories have been created, an *os.File (O_PATH) handle
+// - Once all of the directories have been created, an *[os.File] O_PATH handle
// to the directory at unsafePath is returned to the caller. This is done in
// an effectively-race-free way (an attacker would only be able to swap the
// final directory component), which is not possible to emulate with
-// MkdirAll.
+// [MkdirAll].
//
// In addition, the returned handle is obtained far more efficiently than doing
-// a brand new lookup of unsafePath (such as with SecureJoin or openat2) after
-// doing MkdirAll. If you intend to open the directory after creating it, you
+// a brand new lookup of unsafePath (such as with [SecureJoin] or openat2) after
+// doing [MkdirAll]. If you intend to open the directory after creating it, you
// should use MkdirAllHandle.
func MkdirAllHandle(root *os.File, unsafePath string, mode int) (_ *os.File, Err error) {
// Make sure there are no os.FileMode bits set.
if mode&^0o7777 != 0 {
return nil, fmt.Errorf("%w for mkdir 0o%.3o", errInvalidMode, mode)
}
+ // On Linux, mkdirat(2) (and os.Mkdir) silently ignore the suid and sgid
+ // bits. We could also silently ignore them but since we have very few
+ // users it seems more prudent to return an error so users notice that
+ // these bits will not be set.
+ if mode&^0o1777 != 0 {
+ return nil, fmt.Errorf("%w for mkdir 0o%.3o: suid and sgid are ignored by mkdir", errInvalidMode, mode)
+ }
// Try to open as much of the path as possible.
currentDir, remainingPath, err := partialLookupInRoot(root, unsafePath)
@@ -101,24 +107,6 @@ func MkdirAllHandle(root *os.File, unsafePath string, mode int) (_ *os.File, Err
// Make sure the mode doesn't have any type bits.
mode &^= unix.S_IFMT
- // What properties do we expect any newly created directories to have?
- var (
- // While umask(2) is a per-thread property, and thus this value could
- // vary between threads, a functioning Go program would LockOSThread
- // threads with different umasks and so we don't need to LockOSThread
- // for this entire mkdirat loop (if we are in the locked thread with a
- // different umask, we are already locked and there's nothing for us to
- // do -- and if not then it doesn't matter which thread we run on and
- // there's nothing for us to do).
- expectedMode = uint32(unix.S_IFDIR | (mode &^ getUmask()))
-
- // We would want to get the fs[ug]id here, but we can't access those
- // from userspace. In practice, nobody uses setfs[ug]id() anymore, so
- // just use the effective [ug]id (which is equivalent to the fs[ug]id
- // for programs that don't use setfs[ug]id).
- expectedUid = uint32(unix.Geteuid())
- expectedGid = uint32(unix.Getegid())
- )
// Create the remaining components.
for _, part := range remainingParts {
@@ -129,7 +117,7 @@ func MkdirAllHandle(root *os.File, unsafePath string, mode int) (_ *os.File, Err
}
// NOTE: mkdir(2) will not follow trailing symlinks, so we can safely
- // create the finaly component without worrying about symlink-exchange
+ // create the final component without worrying about symlink-exchange
// attacks.
if err := unix.Mkdirat(int(currentDir.Fd()), part, uint32(mode)); err != nil {
err = &os.PathError{Op: "mkdirat", Path: currentDir.Name() + "/" + part, Err: err}
@@ -157,40 +145,30 @@ func MkdirAllHandle(root *os.File, unsafePath string, mode int) (_ *os.File, Err
_ = currentDir.Close()
currentDir = nextDir
- // Make sure that the directory matches what we expect. An attacker
- // could have swapped the directory between us making it and opening
- // it. There's no way for us to be sure that the directory is
- // _precisely_ the same as the directory we created, but if we are in
- // an empty directory with the same owner and mode as the one we
- // created then there is nothing the attacker could do with this new
- // directory that they couldn't do with the old one.
- if stat, err := fstat(currentDir); err != nil {
- return nil, fmt.Errorf("check newly created directory: %w", err)
- } else {
- if stat.Mode != expectedMode {
- return nil, fmt.Errorf("%w: newly created directory %q has incorrect mode 0o%.3o (expected 0o%.3o)", errPossibleAttack, currentDir.Name(), stat.Mode, expectedMode)
- }
- if stat.Uid != expectedUid || stat.Gid != expectedGid {
- return nil, fmt.Errorf("%w: newly created directory %q has incorrect owner %d:%d (expected %d:%d)", errPossibleAttack, currentDir.Name(), stat.Uid, stat.Gid, expectedUid, expectedGid)
- }
- // Check that the directory is empty. We only need to check for
- // a single entry, and we should get EOF if the directory is
- // empty.
- _, err := currentDir.Readdirnames(1)
- if !errors.Is(err, io.EOF) {
- if err == nil {
- err = fmt.Errorf("%w: newly created directory %q is non-empty", errPossibleAttack, currentDir.Name())
- }
- return nil, fmt.Errorf("check if newly created directory %q is empty: %w", currentDir.Name(), err)
- }
- // Reset the offset.
- _, _ = currentDir.Seek(0, unix.SEEK_SET)
- }
+ // It's possible that the directory we just opened was swapped by an
+ // attacker. Unfortunately there isn't much we can do to protect
+ // against this, and MkdirAll's behaviour is that we will reuse
+ // existing directories anyway so the need to protect against this is
+ // incredibly limited (and arguably doesn't even deserve mention here).
+ //
+ // Ideally we might want to check that the owner and mode match what we
+ // would've created -- unfortunately, it is non-trivial to verify that
+ // the owner and mode of the created directory match. While plain Unix
+ // DAC rules seem simple enough to emulate, there are a bunch of other
+ // factors that can change the mode or owner of created directories
+ // (default POSIX ACLs, mount options like uid=1,gid=2,umask=0 on
+ // filesystems like vfat, etc etc). We used to try to verify this but
+ // it just lead to a series of spurious errors.
+ //
+ // We could also check that the directory is non-empty, but
+ // unfortunately some pseduofilesystems (like cgroupfs) create
+ // non-empty directories, which would result in different spurious
+ // errors.
}
return currentDir, nil
}
-// MkdirAll is a race-safe alternative to the Go stdlib's os.MkdirAll function,
+// MkdirAll is a race-safe alternative to the [os.MkdirAll] function,
// where the new directory is guaranteed to be within the root directory (if an
// attacker can move directories from inside the root to outside the root, the
// created directory tree might be outside of the root but the key constraint
@@ -203,16 +181,16 @@ func MkdirAllHandle(root *os.File, unsafePath string, mode int) (_ *os.File, Err
// err := os.MkdirAll(path, mode)
//
// But is much safer. The above implementation is unsafe because if an attacker
-// can modify the filesystem tree between SecureJoin and MkdirAll, it is
+// can modify the filesystem tree between [SecureJoin] and [os.MkdirAll], it is
// possible for MkdirAll to resolve unsafe symlink components and create
// directories outside of the root.
//
// If you plan to open the directory after you have created it or want to use
-// an open directory handle as the root, you should use MkdirAllHandle instead.
-// This function is a wrapper around MkdirAllHandle.
+// an open directory handle as the root, you should use [MkdirAllHandle] instead.
+// This function is a wrapper around [MkdirAllHandle].
//
// NOTE: The mode argument must be set the unix mode bits (unix.S_I...), not
-// the Go generic mode bits (os.Mode...).
+// the Go generic mode bits ([os.FileMode]...).
func MkdirAll(root, unsafePath string, mode int) error {
rootDir, err := os.OpenFile(root, unix.O_PATH|unix.O_DIRECTORY|unix.O_CLOEXEC, 0)
if err != nil {
diff --git a/vendor/github.com/cyphar/filepath-securejoin/open_linux.go b/vendor/github.com/cyphar/filepath-securejoin/open_linux.go
index 52dce76f..230be73f 100644
--- a/vendor/github.com/cyphar/filepath-securejoin/open_linux.go
+++ b/vendor/github.com/cyphar/filepath-securejoin/open_linux.go
@@ -14,8 +14,8 @@ import (
"golang.org/x/sys/unix"
)
-// OpenatInRoot is equivalent to OpenInRoot, except that the root is provided
-// using an *os.File handle, to ensure that the correct root directory is used.
+// OpenatInRoot is equivalent to [OpenInRoot], except that the root is provided
+// using an *[os.File] handle, to ensure that the correct root directory is used.
func OpenatInRoot(root *os.File, unsafePath string) (*os.File, error) {
handle, err := completeLookupInRoot(root, unsafePath)
if err != nil {
@@ -31,7 +31,7 @@ func OpenatInRoot(root *os.File, unsafePath string) (*os.File, error) {
// handle, err := os.OpenFile(path, unix.O_PATH|unix.O_CLOEXEC)
//
// But is much safer. The above implementation is unsafe because if an attacker
-// can modify the filesystem tree between SecureJoin and OpenFile, it is
+// can modify the filesystem tree between [SecureJoin] and [os.OpenFile], it is
// possible for the returned file to be outside of the root.
//
// Note that the returned handle is an O_PATH handle, meaning that only a very
@@ -39,7 +39,7 @@ func OpenatInRoot(root *os.File, unsafePath string) (*os.File, error) {
// accidentally opening an untrusted file that could cause issues (such as a
// disconnected TTY that could cause a DoS, or some other issue). In order to
// use the returned handle, you can "upgrade" it to a proper handle using
-// Reopen.
+// [Reopen].
func OpenInRoot(root, unsafePath string) (*os.File, error) {
rootDir, err := os.OpenFile(root, unix.O_PATH|unix.O_DIRECTORY|unix.O_CLOEXEC, 0)
if err != nil {
@@ -49,7 +49,7 @@ func OpenInRoot(root, unsafePath string) (*os.File, error) {
return OpenatInRoot(rootDir, unsafePath)
}
-// Reopen takes an *os.File handle and re-opens it through /proc/self/fd.
+// Reopen takes an *[os.File] handle and re-opens it through /proc/self/fd.
// Reopen(file, flags) is effectively equivalent to
//
// fdPath := fmt.Sprintf("/proc/self/fd/%d", file.Fd())
@@ -59,7 +59,9 @@ func OpenInRoot(root, unsafePath string) (*os.File, error) {
// maliciously-configured /proc mount. While this attack scenario is not
// common, in container runtimes it is possible for higher-level runtimes to be
// tricked into configuring an unsafe /proc that can be used to attack file
-// operations. See CVE-2019-19921 for more details.
+// operations. See [CVE-2019-19921] for more details.
+//
+// [CVE-2019-19921]: https://github.com/advisories/GHSA-fh74-hm69-rqjw
func Reopen(handle *os.File, flags int) (*os.File, error) {
procRoot, err := getProcRoot()
if err != nil {
diff --git a/vendor/github.com/cyphar/filepath-securejoin/openat2_linux.go b/vendor/github.com/cyphar/filepath-securejoin/openat2_linux.go
index 921b3e1d..ae3b381e 100644
--- a/vendor/github.com/cyphar/filepath-securejoin/openat2_linux.go
+++ b/vendor/github.com/cyphar/filepath-securejoin/openat2_linux.go
@@ -13,34 +13,21 @@ import (
"path/filepath"
"strings"
"sync"
- "testing"
"golang.org/x/sys/unix"
)
-var (
- hasOpenat2Bool bool
- hasOpenat2Once sync.Once
-
- testingForceHasOpenat2 *bool
-)
-
-func hasOpenat2() bool {
- if testing.Testing() && testingForceHasOpenat2 != nil {
- return *testingForceHasOpenat2
- }
- hasOpenat2Once.Do(func() {
- fd, err := unix.Openat2(unix.AT_FDCWD, ".", &unix.OpenHow{
- Flags: unix.O_PATH | unix.O_CLOEXEC,
- Resolve: unix.RESOLVE_NO_SYMLINKS | unix.RESOLVE_IN_ROOT,
- })
- if err == nil {
- hasOpenat2Bool = true
- _ = unix.Close(fd)
- }
+var hasOpenat2 = sync.OnceValue(func() bool {
+ fd, err := unix.Openat2(unix.AT_FDCWD, ".", &unix.OpenHow{
+ Flags: unix.O_PATH | unix.O_CLOEXEC,
+ Resolve: unix.RESOLVE_NO_SYMLINKS | unix.RESOLVE_IN_ROOT,
})
- return hasOpenat2Bool
-}
+ if err != nil {
+ return false
+ }
+ _ = unix.Close(fd)
+ return true
+})
func scopedLookupShouldRetry(how *unix.OpenHow, err error) bool {
// RESOLVE_IN_ROOT (and RESOLVE_BENEATH) can return -EAGAIN if we resolve
diff --git a/vendor/github.com/cyphar/filepath-securejoin/openat_linux.go b/vendor/github.com/cyphar/filepath-securejoin/openat_linux.go
index 949fb5f2..ac083f21 100644
--- a/vendor/github.com/cyphar/filepath-securejoin/openat_linux.go
+++ b/vendor/github.com/cyphar/filepath-securejoin/openat_linux.go
@@ -42,6 +42,10 @@ func fstatatFile(dir *os.File, path string, flags int) (unix.Stat_t, error) {
return stat, nil
}
+func fstatFile(fd *os.File) (unix.Stat_t, error) {
+ return fstatatFile(fd, "", unix.AT_EMPTY_PATH)
+}
+
func readlinkatFile(dir *os.File, path string) (string, error) {
size := 4096
for {
diff --git a/vendor/github.com/cyphar/filepath-securejoin/procfs_linux.go b/vendor/github.com/cyphar/filepath-securejoin/procfs_linux.go
index adf0bd08..fa7929a5 100644
--- a/vendor/github.com/cyphar/filepath-securejoin/procfs_linux.go
+++ b/vendor/github.com/cyphar/filepath-securejoin/procfs_linux.go
@@ -54,33 +54,26 @@ func verifyProcRoot(procRoot *os.File) error {
return nil
}
-var (
- hasNewMountApiBool bool
- hasNewMountApiOnce sync.Once
-)
-
-func hasNewMountApi() bool {
- hasNewMountApiOnce.Do(func() {
- // All of the pieces of the new mount API we use (fsopen, fsconfig,
- // fsmount, open_tree) were added together in Linux 5.1[1,2], so we can
- // just check for one of the syscalls and the others should also be
- // available.
- //
- // Just try to use open_tree(2) to open a file without OPEN_TREE_CLONE.
- // This is equivalent to openat(2), but tells us if open_tree is
- // available (and thus all of the other basic new mount API syscalls).
- // open_tree(2) is most light-weight syscall to test here.
- //
- // [1]: merge commit 400913252d09
- // [2]:
- fd, err := unix.OpenTree(-int(unix.EBADF), "/", unix.OPEN_TREE_CLOEXEC)
- if err == nil {
- hasNewMountApiBool = true
- _ = unix.Close(fd)
- }
- })
- return hasNewMountApiBool
-}
+var hasNewMountApi = sync.OnceValue(func() bool {
+ // All of the pieces of the new mount API we use (fsopen, fsconfig,
+ // fsmount, open_tree) were added together in Linux 5.1[1,2], so we can
+ // just check for one of the syscalls and the others should also be
+ // available.
+ //
+ // Just try to use open_tree(2) to open a file without OPEN_TREE_CLONE.
+ // This is equivalent to openat(2), but tells us if open_tree is
+ // available (and thus all of the other basic new mount API syscalls).
+ // open_tree(2) is most light-weight syscall to test here.
+ //
+ // [1]: merge commit 400913252d09
+ // [2]:
+ fd, err := unix.OpenTree(-int(unix.EBADF), "/", unix.OPEN_TREE_CLOEXEC)
+ if err != nil {
+ return false
+ }
+ _ = unix.Close(fd)
+ return true
+})
func fsopen(fsName string, flags int) (*os.File, error) {
// Make sure we always set O_CLOEXEC.
@@ -172,14 +165,6 @@ func privateProcRoot() (*os.File, error) {
return procRoot, err
}
-var (
- procRootHandle *os.File
- procRootError error
- procRootOnce sync.Once
-
- errUnsafeProcfs = errors.New("unsafe procfs detected")
-)
-
func unsafeHostProcRoot() (_ *os.File, Err error) {
procRoot, err := os.OpenFile("/proc", unix.O_PATH|unix.O_NOFOLLOW|unix.O_DIRECTORY|unix.O_CLOEXEC, 0)
if err != nil {
@@ -207,17 +192,15 @@ func doGetProcRoot() (*os.File, error) {
return procRoot, err
}
-func getProcRoot() (*os.File, error) {
- procRootOnce.Do(func() {
- procRootHandle, procRootError = doGetProcRoot()
- })
- return procRootHandle, procRootError
-}
+var getProcRoot = sync.OnceValues(func() (*os.File, error) {
+ return doGetProcRoot()
+})
-var (
- haveProcThreadSelf bool
- haveProcThreadSelfOnce sync.Once
-)
+var hasProcThreadSelf = sync.OnceValue(func() bool {
+ return unix.Access("/proc/thread-self/", unix.F_OK) == nil
+})
+
+var errUnsafeProcfs = errors.New("unsafe procfs detected")
type procThreadSelfCloser func()
@@ -230,13 +213,6 @@ type procThreadSelfCloser func()
// This is similar to ProcThreadSelf from runc, but with extra hardening
// applied and using *os.File.
func procThreadSelf(procRoot *os.File, subpath string) (_ *os.File, _ procThreadSelfCloser, Err error) {
- haveProcThreadSelfOnce.Do(func() {
- // If the kernel doesn't support thread-self, it doesn't matter which
- // /proc handle we use.
- _, err := fstatatFile(procRoot, "thread-self", unix.AT_SYMLINK_NOFOLLOW)
- haveProcThreadSelf = (err == nil)
- })
-
// We need to lock our thread until the caller is done with the handle
// because between getting the handle and using it we could get interrupted
// by the Go runtime and hit the case where the underlying thread is
@@ -251,7 +227,7 @@ func procThreadSelf(procRoot *os.File, subpath string) (_ *os.File, _ procThread
// Figure out what prefix we want to use.
threadSelf := "thread-self/"
- if !haveProcThreadSelf || testingForceProcSelfTask() {
+ if !hasProcThreadSelf() || testingForceProcSelfTask() {
/// Pre-3.17 kernels don't have /proc/thread-self, so do it manually.
threadSelf = "self/task/" + strconv.Itoa(unix.Gettid()) + "/"
if _, err := fstatatFile(procRoot, threadSelf, unix.AT_SYMLINK_NOFOLLOW); err != nil || testingForceProcSelf() {
@@ -275,7 +251,7 @@ func procThreadSelf(procRoot *os.File, subpath string) (_ *os.File, _ procThread
// absolutely sure we are operating on a clean /proc handle that
// doesn't have any cheeky overmounts that could trick us (including
// symlink mounts on top of /proc/thread-self). RESOLVE_BENEATH isn't
- // stricly needed, but just use it since we have it.
+ // strictly needed, but just use it since we have it.
//
// NOTE: /proc/self is technically a magic-link (the contents of the
// symlink are generated dynamically), but it doesn't use
@@ -313,24 +289,16 @@ func procThreadSelf(procRoot *os.File, subpath string) (_ *os.File, _ procThread
return handle, runtime.UnlockOSThread, nil
}
-var (
- hasStatxMountIdBool bool
- hasStatxMountIdOnce sync.Once
-)
-
-func hasStatxMountId() bool {
- hasStatxMountIdOnce.Do(func() {
- var (
- stx unix.Statx_t
- // We don't care which mount ID we get. The kernel will give us the
- // unique one if it is supported.
- wantStxMask uint32 = unix.STATX_MNT_ID_UNIQUE | unix.STATX_MNT_ID
- )
- err := unix.Statx(-int(unix.EBADF), "/", 0, int(wantStxMask), &stx)
- hasStatxMountIdBool = (err == nil && (stx.Mask&wantStxMask != 0))
- })
- return hasStatxMountIdBool
-}
+var hasStatxMountId = sync.OnceValue(func() bool {
+ var (
+ stx unix.Statx_t
+ // We don't care which mount ID we get. The kernel will give us the
+ // unique one if it is supported.
+ wantStxMask uint32 = unix.STATX_MNT_ID_UNIQUE | unix.STATX_MNT_ID
+ )
+ err := unix.Statx(-int(unix.EBADF), "/", 0, int(wantStxMask), &stx)
+ return err == nil && stx.Mask&wantStxMask != 0
+})
func getMountId(dir *os.File, path string) (uint64, error) {
// If we don't have statx(STATX_MNT_ID*) support, we can't do anything.
@@ -443,22 +411,6 @@ func isDeadInode(file *os.File) error {
return nil
}
-func getUmask() int {
- // umask is a per-thread property, but it is inherited by children, so we
- // need to lock our OS thread to make sure that no other goroutine runs in
- // this thread and no goroutines are spawned from this thread until we
- // revert to the old umask.
- //
- // We could parse /proc/self/status to avoid this get-set problem, but
- // /proc/thread-self requires LockOSThread anyway, so there's no real
- // benefit over just using umask(2).
- runtime.LockOSThread()
- umask := unix.Umask(0)
- unix.Umask(umask)
- runtime.UnlockOSThread()
- return umask
-}
-
func checkProcSelfFdPath(path string, file *os.File) error {
if err := isDeadInode(file); err != nil {
return err
diff --git a/vendor/github.com/cyphar/filepath-securejoin/vfs.go b/vendor/github.com/cyphar/filepath-securejoin/vfs.go
index 6e27c7dd..36373f8c 100644
--- a/vendor/github.com/cyphar/filepath-securejoin/vfs.go
+++ b/vendor/github.com/cyphar/filepath-securejoin/vfs.go
@@ -10,19 +10,19 @@ import "os"
// are several projects (umoci and go-mtree) that are using this sort of
// interface.
-// VFS is the minimal interface necessary to use SecureJoinVFS. A nil VFS is
-// equivalent to using the standard os.* family of functions. This is mainly
+// VFS is the minimal interface necessary to use [SecureJoinVFS]. A nil VFS is
+// equivalent to using the standard [os].* family of functions. This is mainly
// used for the purposes of mock testing, but also can be used to otherwise use
-// SecureJoin with VFS-like system.
+// [SecureJoinVFS] with VFS-like system.
type VFS interface {
- // Lstat returns a FileInfo describing the named file. If the file is a
- // symbolic link, the returned FileInfo describes the symbolic link. Lstat
- // makes no attempt to follow the link. These semantics are identical to
- // os.Lstat.
+ // Lstat returns an [os.FileInfo] describing the named file. If the
+ // file is a symbolic link, the returned [os.FileInfo] describes the
+ // symbolic link. Lstat makes no attempt to follow the link.
+ // The semantics are identical to [os.Lstat].
Lstat(name string) (os.FileInfo, error)
- // Readlink returns the destination of the named symbolic link. These
- // semantics are identical to os.Readlink.
+ // Readlink returns the destination of the named symbolic link.
+ // The semantics are identical to [os.Readlink].
Readlink(name string) (string, error)
}
@@ -30,12 +30,6 @@ type VFS interface {
// module.
type osVFS struct{}
-// Lstat returns a FileInfo describing the named file. If the file is a
-// symbolic link, the returned FileInfo describes the symbolic link. Lstat
-// makes no attempt to follow the link. These semantics are identical to
-// os.Lstat.
func (o osVFS) Lstat(name string) (os.FileInfo, error) { return os.Lstat(name) }
-// Readlink returns the destination of the named symbolic link. These
-// semantics are identical to os.Readlink.
func (o osVFS) Readlink(name string) (string, error) { return os.Readlink(name) }
diff --git a/vendor/golang.org/x/crypto/argon2/blamka_amd64.s b/vendor/golang.org/x/crypto/argon2/blamka_amd64.s
index 6713acca..c3895478 100644
--- a/vendor/golang.org/x/crypto/argon2/blamka_amd64.s
+++ b/vendor/golang.org/x/crypto/argon2/blamka_amd64.s
@@ -1,243 +1,2791 @@
-// Copyright 2017 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
+// Code generated by command: go run blamka_amd64.go -out ../blamka_amd64.s -pkg argon2. DO NOT EDIT.
//go:build amd64 && gc && !purego
#include "textflag.h"
-DATA ·c40<>+0x00(SB)/8, $0x0201000706050403
-DATA ·c40<>+0x08(SB)/8, $0x0a09080f0e0d0c0b
-GLOBL ·c40<>(SB), (NOPTR+RODATA), $16
-
-DATA ·c48<>+0x00(SB)/8, $0x0100070605040302
-DATA ·c48<>+0x08(SB)/8, $0x09080f0e0d0c0b0a
-GLOBL ·c48<>(SB), (NOPTR+RODATA), $16
-
-#define SHUFFLE(v2, v3, v4, v5, v6, v7, t1, t2) \
- MOVO v4, t1; \
- MOVO v5, v4; \
- MOVO t1, v5; \
- MOVO v6, t1; \
- PUNPCKLQDQ v6, t2; \
- PUNPCKHQDQ v7, v6; \
- PUNPCKHQDQ t2, v6; \
- PUNPCKLQDQ v7, t2; \
- MOVO t1, v7; \
- MOVO v2, t1; \
- PUNPCKHQDQ t2, v7; \
- PUNPCKLQDQ v3, t2; \
- PUNPCKHQDQ t2, v2; \
- PUNPCKLQDQ t1, t2; \
- PUNPCKHQDQ t2, v3
-
-#define SHUFFLE_INV(v2, v3, v4, v5, v6, v7, t1, t2) \
- MOVO v4, t1; \
- MOVO v5, v4; \
- MOVO t1, v5; \
- MOVO v2, t1; \
- PUNPCKLQDQ v2, t2; \
- PUNPCKHQDQ v3, v2; \
- PUNPCKHQDQ t2, v2; \
- PUNPCKLQDQ v3, t2; \
- MOVO t1, v3; \
- MOVO v6, t1; \
- PUNPCKHQDQ t2, v3; \
- PUNPCKLQDQ v7, t2; \
- PUNPCKHQDQ t2, v6; \
- PUNPCKLQDQ t1, t2; \
- PUNPCKHQDQ t2, v7
-
-#define HALF_ROUND(v0, v1, v2, v3, v4, v5, v6, v7, t0, c40, c48) \
- MOVO v0, t0; \
- PMULULQ v2, t0; \
- PADDQ v2, v0; \
- PADDQ t0, v0; \
- PADDQ t0, v0; \
- PXOR v0, v6; \
- PSHUFD $0xB1, v6, v6; \
- MOVO v4, t0; \
- PMULULQ v6, t0; \
- PADDQ v6, v4; \
- PADDQ t0, v4; \
- PADDQ t0, v4; \
- PXOR v4, v2; \
- PSHUFB c40, v2; \
- MOVO v0, t0; \
- PMULULQ v2, t0; \
- PADDQ v2, v0; \
- PADDQ t0, v0; \
- PADDQ t0, v0; \
- PXOR v0, v6; \
- PSHUFB c48, v6; \
- MOVO v4, t0; \
- PMULULQ v6, t0; \
- PADDQ v6, v4; \
- PADDQ t0, v4; \
- PADDQ t0, v4; \
- PXOR v4, v2; \
- MOVO v2, t0; \
- PADDQ v2, t0; \
- PSRLQ $63, v2; \
- PXOR t0, v2; \
- MOVO v1, t0; \
- PMULULQ v3, t0; \
- PADDQ v3, v1; \
- PADDQ t0, v1; \
- PADDQ t0, v1; \
- PXOR v1, v7; \
- PSHUFD $0xB1, v7, v7; \
- MOVO v5, t0; \
- PMULULQ v7, t0; \
- PADDQ v7, v5; \
- PADDQ t0, v5; \
- PADDQ t0, v5; \
- PXOR v5, v3; \
- PSHUFB c40, v3; \
- MOVO v1, t0; \
- PMULULQ v3, t0; \
- PADDQ v3, v1; \
- PADDQ t0, v1; \
- PADDQ t0, v1; \
- PXOR v1, v7; \
- PSHUFB c48, v7; \
- MOVO v5, t0; \
- PMULULQ v7, t0; \
- PADDQ v7, v5; \
- PADDQ t0, v5; \
- PADDQ t0, v5; \
- PXOR v5, v3; \
- MOVO v3, t0; \
- PADDQ v3, t0; \
- PSRLQ $63, v3; \
- PXOR t0, v3
-
-#define LOAD_MSG_0(block, off) \
- MOVOU 8*(off+0)(block), X0; \
- MOVOU 8*(off+2)(block), X1; \
- MOVOU 8*(off+4)(block), X2; \
- MOVOU 8*(off+6)(block), X3; \
- MOVOU 8*(off+8)(block), X4; \
- MOVOU 8*(off+10)(block), X5; \
- MOVOU 8*(off+12)(block), X6; \
- MOVOU 8*(off+14)(block), X7
-
-#define STORE_MSG_0(block, off) \
- MOVOU X0, 8*(off+0)(block); \
- MOVOU X1, 8*(off+2)(block); \
- MOVOU X2, 8*(off+4)(block); \
- MOVOU X3, 8*(off+6)(block); \
- MOVOU X4, 8*(off+8)(block); \
- MOVOU X5, 8*(off+10)(block); \
- MOVOU X6, 8*(off+12)(block); \
- MOVOU X7, 8*(off+14)(block)
-
-#define LOAD_MSG_1(block, off) \
- MOVOU 8*off+0*8(block), X0; \
- MOVOU 8*off+16*8(block), X1; \
- MOVOU 8*off+32*8(block), X2; \
- MOVOU 8*off+48*8(block), X3; \
- MOVOU 8*off+64*8(block), X4; \
- MOVOU 8*off+80*8(block), X5; \
- MOVOU 8*off+96*8(block), X6; \
- MOVOU 8*off+112*8(block), X7
-
-#define STORE_MSG_1(block, off) \
- MOVOU X0, 8*off+0*8(block); \
- MOVOU X1, 8*off+16*8(block); \
- MOVOU X2, 8*off+32*8(block); \
- MOVOU X3, 8*off+48*8(block); \
- MOVOU X4, 8*off+64*8(block); \
- MOVOU X5, 8*off+80*8(block); \
- MOVOU X6, 8*off+96*8(block); \
- MOVOU X7, 8*off+112*8(block)
-
-#define BLAMKA_ROUND_0(block, off, t0, t1, c40, c48) \
- LOAD_MSG_0(block, off); \
- HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, t0, c40, c48); \
- SHUFFLE(X2, X3, X4, X5, X6, X7, t0, t1); \
- HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, t0, c40, c48); \
- SHUFFLE_INV(X2, X3, X4, X5, X6, X7, t0, t1); \
- STORE_MSG_0(block, off)
-
-#define BLAMKA_ROUND_1(block, off, t0, t1, c40, c48) \
- LOAD_MSG_1(block, off); \
- HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, t0, c40, c48); \
- SHUFFLE(X2, X3, X4, X5, X6, X7, t0, t1); \
- HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, t0, c40, c48); \
- SHUFFLE_INV(X2, X3, X4, X5, X6, X7, t0, t1); \
- STORE_MSG_1(block, off)
-
// func blamkaSSE4(b *block)
-TEXT ·blamkaSSE4(SB), 4, $0-8
- MOVQ b+0(FP), AX
-
- MOVOU ·c40<>(SB), X10
- MOVOU ·c48<>(SB), X11
+// Requires: SSE2, SSSE3
+TEXT ·blamkaSSE4(SB), NOSPLIT, $0-8
+ MOVQ b+0(FP), AX
+ MOVOU ·c40<>+0(SB), X10
+ MOVOU ·c48<>+0(SB), X11
+ MOVOU (AX), X0
+ MOVOU 16(AX), X1
+ MOVOU 32(AX), X2
+ MOVOU 48(AX), X3
+ MOVOU 64(AX), X4
+ MOVOU 80(AX), X5
+ MOVOU 96(AX), X6
+ MOVOU 112(AX), X7
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFD $0xb1, X6, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ PSHUFB X10, X2
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFB X11, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ MOVO X2, X8
+ PADDQ X2, X8
+ PSRLQ $0x3f, X2
+ PXOR X8, X2
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFD $0xb1, X7, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ PSHUFB X10, X3
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFB X11, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ MOVO X3, X8
+ PADDQ X3, X8
+ PSRLQ $0x3f, X3
+ PXOR X8, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X6, X8
+ PUNPCKLQDQ X6, X9
+ PUNPCKHQDQ X7, X6
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X7, X9
+ MOVO X8, X7
+ MOVO X2, X8
+ PUNPCKHQDQ X9, X7
+ PUNPCKLQDQ X3, X9
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X3
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFD $0xb1, X6, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ PSHUFB X10, X2
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFB X11, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ MOVO X2, X8
+ PADDQ X2, X8
+ PSRLQ $0x3f, X2
+ PXOR X8, X2
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFD $0xb1, X7, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ PSHUFB X10, X3
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFB X11, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ MOVO X3, X8
+ PADDQ X3, X8
+ PSRLQ $0x3f, X3
+ PXOR X8, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X2, X8
+ PUNPCKLQDQ X2, X9
+ PUNPCKHQDQ X3, X2
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X3, X9
+ MOVO X8, X3
+ MOVO X6, X8
+ PUNPCKHQDQ X9, X3
+ PUNPCKLQDQ X7, X9
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X7
+ MOVOU X0, (AX)
+ MOVOU X1, 16(AX)
+ MOVOU X2, 32(AX)
+ MOVOU X3, 48(AX)
+ MOVOU X4, 64(AX)
+ MOVOU X5, 80(AX)
+ MOVOU X6, 96(AX)
+ MOVOU X7, 112(AX)
+ MOVOU 128(AX), X0
+ MOVOU 144(AX), X1
+ MOVOU 160(AX), X2
+ MOVOU 176(AX), X3
+ MOVOU 192(AX), X4
+ MOVOU 208(AX), X5
+ MOVOU 224(AX), X6
+ MOVOU 240(AX), X7
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFD $0xb1, X6, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ PSHUFB X10, X2
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFB X11, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ MOVO X2, X8
+ PADDQ X2, X8
+ PSRLQ $0x3f, X2
+ PXOR X8, X2
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFD $0xb1, X7, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ PSHUFB X10, X3
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFB X11, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ MOVO X3, X8
+ PADDQ X3, X8
+ PSRLQ $0x3f, X3
+ PXOR X8, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X6, X8
+ PUNPCKLQDQ X6, X9
+ PUNPCKHQDQ X7, X6
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X7, X9
+ MOVO X8, X7
+ MOVO X2, X8
+ PUNPCKHQDQ X9, X7
+ PUNPCKLQDQ X3, X9
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X3
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFD $0xb1, X6, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ PSHUFB X10, X2
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFB X11, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ MOVO X2, X8
+ PADDQ X2, X8
+ PSRLQ $0x3f, X2
+ PXOR X8, X2
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFD $0xb1, X7, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ PSHUFB X10, X3
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFB X11, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ MOVO X3, X8
+ PADDQ X3, X8
+ PSRLQ $0x3f, X3
+ PXOR X8, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X2, X8
+ PUNPCKLQDQ X2, X9
+ PUNPCKHQDQ X3, X2
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X3, X9
+ MOVO X8, X3
+ MOVO X6, X8
+ PUNPCKHQDQ X9, X3
+ PUNPCKLQDQ X7, X9
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X7
+ MOVOU X0, 128(AX)
+ MOVOU X1, 144(AX)
+ MOVOU X2, 160(AX)
+ MOVOU X3, 176(AX)
+ MOVOU X4, 192(AX)
+ MOVOU X5, 208(AX)
+ MOVOU X6, 224(AX)
+ MOVOU X7, 240(AX)
+ MOVOU 256(AX), X0
+ MOVOU 272(AX), X1
+ MOVOU 288(AX), X2
+ MOVOU 304(AX), X3
+ MOVOU 320(AX), X4
+ MOVOU 336(AX), X5
+ MOVOU 352(AX), X6
+ MOVOU 368(AX), X7
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFD $0xb1, X6, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ PSHUFB X10, X2
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFB X11, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ MOVO X2, X8
+ PADDQ X2, X8
+ PSRLQ $0x3f, X2
+ PXOR X8, X2
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFD $0xb1, X7, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ PSHUFB X10, X3
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFB X11, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ MOVO X3, X8
+ PADDQ X3, X8
+ PSRLQ $0x3f, X3
+ PXOR X8, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X6, X8
+ PUNPCKLQDQ X6, X9
+ PUNPCKHQDQ X7, X6
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X7, X9
+ MOVO X8, X7
+ MOVO X2, X8
+ PUNPCKHQDQ X9, X7
+ PUNPCKLQDQ X3, X9
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X3
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFD $0xb1, X6, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ PSHUFB X10, X2
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFB X11, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ MOVO X2, X8
+ PADDQ X2, X8
+ PSRLQ $0x3f, X2
+ PXOR X8, X2
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFD $0xb1, X7, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ PSHUFB X10, X3
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFB X11, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ MOVO X3, X8
+ PADDQ X3, X8
+ PSRLQ $0x3f, X3
+ PXOR X8, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X2, X8
+ PUNPCKLQDQ X2, X9
+ PUNPCKHQDQ X3, X2
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X3, X9
+ MOVO X8, X3
+ MOVO X6, X8
+ PUNPCKHQDQ X9, X3
+ PUNPCKLQDQ X7, X9
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X7
+ MOVOU X0, 256(AX)
+ MOVOU X1, 272(AX)
+ MOVOU X2, 288(AX)
+ MOVOU X3, 304(AX)
+ MOVOU X4, 320(AX)
+ MOVOU X5, 336(AX)
+ MOVOU X6, 352(AX)
+ MOVOU X7, 368(AX)
+ MOVOU 384(AX), X0
+ MOVOU 400(AX), X1
+ MOVOU 416(AX), X2
+ MOVOU 432(AX), X3
+ MOVOU 448(AX), X4
+ MOVOU 464(AX), X5
+ MOVOU 480(AX), X6
+ MOVOU 496(AX), X7
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFD $0xb1, X6, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ PSHUFB X10, X2
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFB X11, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ MOVO X2, X8
+ PADDQ X2, X8
+ PSRLQ $0x3f, X2
+ PXOR X8, X2
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFD $0xb1, X7, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ PSHUFB X10, X3
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFB X11, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ MOVO X3, X8
+ PADDQ X3, X8
+ PSRLQ $0x3f, X3
+ PXOR X8, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X6, X8
+ PUNPCKLQDQ X6, X9
+ PUNPCKHQDQ X7, X6
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X7, X9
+ MOVO X8, X7
+ MOVO X2, X8
+ PUNPCKHQDQ X9, X7
+ PUNPCKLQDQ X3, X9
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X3
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFD $0xb1, X6, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ PSHUFB X10, X2
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFB X11, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ MOVO X2, X8
+ PADDQ X2, X8
+ PSRLQ $0x3f, X2
+ PXOR X8, X2
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFD $0xb1, X7, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ PSHUFB X10, X3
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFB X11, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ MOVO X3, X8
+ PADDQ X3, X8
+ PSRLQ $0x3f, X3
+ PXOR X8, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X2, X8
+ PUNPCKLQDQ X2, X9
+ PUNPCKHQDQ X3, X2
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X3, X9
+ MOVO X8, X3
+ MOVO X6, X8
+ PUNPCKHQDQ X9, X3
+ PUNPCKLQDQ X7, X9
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X7
+ MOVOU X0, 384(AX)
+ MOVOU X1, 400(AX)
+ MOVOU X2, 416(AX)
+ MOVOU X3, 432(AX)
+ MOVOU X4, 448(AX)
+ MOVOU X5, 464(AX)
+ MOVOU X6, 480(AX)
+ MOVOU X7, 496(AX)
+ MOVOU 512(AX), X0
+ MOVOU 528(AX), X1
+ MOVOU 544(AX), X2
+ MOVOU 560(AX), X3
+ MOVOU 576(AX), X4
+ MOVOU 592(AX), X5
+ MOVOU 608(AX), X6
+ MOVOU 624(AX), X7
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFD $0xb1, X6, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ PSHUFB X10, X2
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFB X11, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ MOVO X2, X8
+ PADDQ X2, X8
+ PSRLQ $0x3f, X2
+ PXOR X8, X2
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFD $0xb1, X7, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ PSHUFB X10, X3
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFB X11, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ MOVO X3, X8
+ PADDQ X3, X8
+ PSRLQ $0x3f, X3
+ PXOR X8, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X6, X8
+ PUNPCKLQDQ X6, X9
+ PUNPCKHQDQ X7, X6
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X7, X9
+ MOVO X8, X7
+ MOVO X2, X8
+ PUNPCKHQDQ X9, X7
+ PUNPCKLQDQ X3, X9
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X3
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFD $0xb1, X6, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ PSHUFB X10, X2
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFB X11, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ MOVO X2, X8
+ PADDQ X2, X8
+ PSRLQ $0x3f, X2
+ PXOR X8, X2
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFD $0xb1, X7, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ PSHUFB X10, X3
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFB X11, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ MOVO X3, X8
+ PADDQ X3, X8
+ PSRLQ $0x3f, X3
+ PXOR X8, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X2, X8
+ PUNPCKLQDQ X2, X9
+ PUNPCKHQDQ X3, X2
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X3, X9
+ MOVO X8, X3
+ MOVO X6, X8
+ PUNPCKHQDQ X9, X3
+ PUNPCKLQDQ X7, X9
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X7
+ MOVOU X0, 512(AX)
+ MOVOU X1, 528(AX)
+ MOVOU X2, 544(AX)
+ MOVOU X3, 560(AX)
+ MOVOU X4, 576(AX)
+ MOVOU X5, 592(AX)
+ MOVOU X6, 608(AX)
+ MOVOU X7, 624(AX)
+ MOVOU 640(AX), X0
+ MOVOU 656(AX), X1
+ MOVOU 672(AX), X2
+ MOVOU 688(AX), X3
+ MOVOU 704(AX), X4
+ MOVOU 720(AX), X5
+ MOVOU 736(AX), X6
+ MOVOU 752(AX), X7
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFD $0xb1, X6, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ PSHUFB X10, X2
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFB X11, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ MOVO X2, X8
+ PADDQ X2, X8
+ PSRLQ $0x3f, X2
+ PXOR X8, X2
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFD $0xb1, X7, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ PSHUFB X10, X3
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFB X11, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ MOVO X3, X8
+ PADDQ X3, X8
+ PSRLQ $0x3f, X3
+ PXOR X8, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X6, X8
+ PUNPCKLQDQ X6, X9
+ PUNPCKHQDQ X7, X6
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X7, X9
+ MOVO X8, X7
+ MOVO X2, X8
+ PUNPCKHQDQ X9, X7
+ PUNPCKLQDQ X3, X9
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X3
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFD $0xb1, X6, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ PSHUFB X10, X2
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFB X11, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ MOVO X2, X8
+ PADDQ X2, X8
+ PSRLQ $0x3f, X2
+ PXOR X8, X2
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFD $0xb1, X7, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ PSHUFB X10, X3
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFB X11, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ MOVO X3, X8
+ PADDQ X3, X8
+ PSRLQ $0x3f, X3
+ PXOR X8, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X2, X8
+ PUNPCKLQDQ X2, X9
+ PUNPCKHQDQ X3, X2
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X3, X9
+ MOVO X8, X3
+ MOVO X6, X8
+ PUNPCKHQDQ X9, X3
+ PUNPCKLQDQ X7, X9
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X7
+ MOVOU X0, 640(AX)
+ MOVOU X1, 656(AX)
+ MOVOU X2, 672(AX)
+ MOVOU X3, 688(AX)
+ MOVOU X4, 704(AX)
+ MOVOU X5, 720(AX)
+ MOVOU X6, 736(AX)
+ MOVOU X7, 752(AX)
+ MOVOU 768(AX), X0
+ MOVOU 784(AX), X1
+ MOVOU 800(AX), X2
+ MOVOU 816(AX), X3
+ MOVOU 832(AX), X4
+ MOVOU 848(AX), X5
+ MOVOU 864(AX), X6
+ MOVOU 880(AX), X7
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFD $0xb1, X6, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ PSHUFB X10, X2
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFB X11, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ MOVO X2, X8
+ PADDQ X2, X8
+ PSRLQ $0x3f, X2
+ PXOR X8, X2
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFD $0xb1, X7, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ PSHUFB X10, X3
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFB X11, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ MOVO X3, X8
+ PADDQ X3, X8
+ PSRLQ $0x3f, X3
+ PXOR X8, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X6, X8
+ PUNPCKLQDQ X6, X9
+ PUNPCKHQDQ X7, X6
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X7, X9
+ MOVO X8, X7
+ MOVO X2, X8
+ PUNPCKHQDQ X9, X7
+ PUNPCKLQDQ X3, X9
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X3
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFD $0xb1, X6, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ PSHUFB X10, X2
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFB X11, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ MOVO X2, X8
+ PADDQ X2, X8
+ PSRLQ $0x3f, X2
+ PXOR X8, X2
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFD $0xb1, X7, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ PSHUFB X10, X3
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFB X11, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ MOVO X3, X8
+ PADDQ X3, X8
+ PSRLQ $0x3f, X3
+ PXOR X8, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X2, X8
+ PUNPCKLQDQ X2, X9
+ PUNPCKHQDQ X3, X2
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X3, X9
+ MOVO X8, X3
+ MOVO X6, X8
+ PUNPCKHQDQ X9, X3
+ PUNPCKLQDQ X7, X9
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X7
+ MOVOU X0, 768(AX)
+ MOVOU X1, 784(AX)
+ MOVOU X2, 800(AX)
+ MOVOU X3, 816(AX)
+ MOVOU X4, 832(AX)
+ MOVOU X5, 848(AX)
+ MOVOU X6, 864(AX)
+ MOVOU X7, 880(AX)
+ MOVOU 896(AX), X0
+ MOVOU 912(AX), X1
+ MOVOU 928(AX), X2
+ MOVOU 944(AX), X3
+ MOVOU 960(AX), X4
+ MOVOU 976(AX), X5
+ MOVOU 992(AX), X6
+ MOVOU 1008(AX), X7
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFD $0xb1, X6, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ PSHUFB X10, X2
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFB X11, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ MOVO X2, X8
+ PADDQ X2, X8
+ PSRLQ $0x3f, X2
+ PXOR X8, X2
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFD $0xb1, X7, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ PSHUFB X10, X3
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFB X11, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ MOVO X3, X8
+ PADDQ X3, X8
+ PSRLQ $0x3f, X3
+ PXOR X8, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X6, X8
+ PUNPCKLQDQ X6, X9
+ PUNPCKHQDQ X7, X6
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X7, X9
+ MOVO X8, X7
+ MOVO X2, X8
+ PUNPCKHQDQ X9, X7
+ PUNPCKLQDQ X3, X9
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X3
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFD $0xb1, X6, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ PSHUFB X10, X2
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFB X11, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ MOVO X2, X8
+ PADDQ X2, X8
+ PSRLQ $0x3f, X2
+ PXOR X8, X2
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFD $0xb1, X7, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ PSHUFB X10, X3
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFB X11, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ MOVO X3, X8
+ PADDQ X3, X8
+ PSRLQ $0x3f, X3
+ PXOR X8, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X2, X8
+ PUNPCKLQDQ X2, X9
+ PUNPCKHQDQ X3, X2
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X3, X9
+ MOVO X8, X3
+ MOVO X6, X8
+ PUNPCKHQDQ X9, X3
+ PUNPCKLQDQ X7, X9
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X7
+ MOVOU X0, 896(AX)
+ MOVOU X1, 912(AX)
+ MOVOU X2, 928(AX)
+ MOVOU X3, 944(AX)
+ MOVOU X4, 960(AX)
+ MOVOU X5, 976(AX)
+ MOVOU X6, 992(AX)
+ MOVOU X7, 1008(AX)
+ MOVOU (AX), X0
+ MOVOU 128(AX), X1
+ MOVOU 256(AX), X2
+ MOVOU 384(AX), X3
+ MOVOU 512(AX), X4
+ MOVOU 640(AX), X5
+ MOVOU 768(AX), X6
+ MOVOU 896(AX), X7
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFD $0xb1, X6, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ PSHUFB X10, X2
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFB X11, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ MOVO X2, X8
+ PADDQ X2, X8
+ PSRLQ $0x3f, X2
+ PXOR X8, X2
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFD $0xb1, X7, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ PSHUFB X10, X3
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFB X11, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ MOVO X3, X8
+ PADDQ X3, X8
+ PSRLQ $0x3f, X3
+ PXOR X8, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X6, X8
+ PUNPCKLQDQ X6, X9
+ PUNPCKHQDQ X7, X6
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X7, X9
+ MOVO X8, X7
+ MOVO X2, X8
+ PUNPCKHQDQ X9, X7
+ PUNPCKLQDQ X3, X9
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X3
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFD $0xb1, X6, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ PSHUFB X10, X2
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFB X11, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ MOVO X2, X8
+ PADDQ X2, X8
+ PSRLQ $0x3f, X2
+ PXOR X8, X2
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFD $0xb1, X7, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ PSHUFB X10, X3
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFB X11, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ MOVO X3, X8
+ PADDQ X3, X8
+ PSRLQ $0x3f, X3
+ PXOR X8, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X2, X8
+ PUNPCKLQDQ X2, X9
+ PUNPCKHQDQ X3, X2
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X3, X9
+ MOVO X8, X3
+ MOVO X6, X8
+ PUNPCKHQDQ X9, X3
+ PUNPCKLQDQ X7, X9
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X7
+ MOVOU X0, (AX)
+ MOVOU X1, 128(AX)
+ MOVOU X2, 256(AX)
+ MOVOU X3, 384(AX)
+ MOVOU X4, 512(AX)
+ MOVOU X5, 640(AX)
+ MOVOU X6, 768(AX)
+ MOVOU X7, 896(AX)
+ MOVOU 16(AX), X0
+ MOVOU 144(AX), X1
+ MOVOU 272(AX), X2
+ MOVOU 400(AX), X3
+ MOVOU 528(AX), X4
+ MOVOU 656(AX), X5
+ MOVOU 784(AX), X6
+ MOVOU 912(AX), X7
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFD $0xb1, X6, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ PSHUFB X10, X2
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFB X11, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ MOVO X2, X8
+ PADDQ X2, X8
+ PSRLQ $0x3f, X2
+ PXOR X8, X2
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFD $0xb1, X7, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ PSHUFB X10, X3
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFB X11, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ MOVO X3, X8
+ PADDQ X3, X8
+ PSRLQ $0x3f, X3
+ PXOR X8, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X6, X8
+ PUNPCKLQDQ X6, X9
+ PUNPCKHQDQ X7, X6
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X7, X9
+ MOVO X8, X7
+ MOVO X2, X8
+ PUNPCKHQDQ X9, X7
+ PUNPCKLQDQ X3, X9
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X3
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFD $0xb1, X6, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ PSHUFB X10, X2
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFB X11, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ MOVO X2, X8
+ PADDQ X2, X8
+ PSRLQ $0x3f, X2
+ PXOR X8, X2
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFD $0xb1, X7, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ PSHUFB X10, X3
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFB X11, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ MOVO X3, X8
+ PADDQ X3, X8
+ PSRLQ $0x3f, X3
+ PXOR X8, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X2, X8
+ PUNPCKLQDQ X2, X9
+ PUNPCKHQDQ X3, X2
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X3, X9
+ MOVO X8, X3
+ MOVO X6, X8
+ PUNPCKHQDQ X9, X3
+ PUNPCKLQDQ X7, X9
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X7
+ MOVOU X0, 16(AX)
+ MOVOU X1, 144(AX)
+ MOVOU X2, 272(AX)
+ MOVOU X3, 400(AX)
+ MOVOU X4, 528(AX)
+ MOVOU X5, 656(AX)
+ MOVOU X6, 784(AX)
+ MOVOU X7, 912(AX)
+ MOVOU 32(AX), X0
+ MOVOU 160(AX), X1
+ MOVOU 288(AX), X2
+ MOVOU 416(AX), X3
+ MOVOU 544(AX), X4
+ MOVOU 672(AX), X5
+ MOVOU 800(AX), X6
+ MOVOU 928(AX), X7
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFD $0xb1, X6, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ PSHUFB X10, X2
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFB X11, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ MOVO X2, X8
+ PADDQ X2, X8
+ PSRLQ $0x3f, X2
+ PXOR X8, X2
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFD $0xb1, X7, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ PSHUFB X10, X3
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFB X11, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ MOVO X3, X8
+ PADDQ X3, X8
+ PSRLQ $0x3f, X3
+ PXOR X8, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X6, X8
+ PUNPCKLQDQ X6, X9
+ PUNPCKHQDQ X7, X6
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X7, X9
+ MOVO X8, X7
+ MOVO X2, X8
+ PUNPCKHQDQ X9, X7
+ PUNPCKLQDQ X3, X9
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X3
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFD $0xb1, X6, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ PSHUFB X10, X2
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFB X11, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ MOVO X2, X8
+ PADDQ X2, X8
+ PSRLQ $0x3f, X2
+ PXOR X8, X2
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFD $0xb1, X7, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ PSHUFB X10, X3
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFB X11, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ MOVO X3, X8
+ PADDQ X3, X8
+ PSRLQ $0x3f, X3
+ PXOR X8, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X2, X8
+ PUNPCKLQDQ X2, X9
+ PUNPCKHQDQ X3, X2
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X3, X9
+ MOVO X8, X3
+ MOVO X6, X8
+ PUNPCKHQDQ X9, X3
+ PUNPCKLQDQ X7, X9
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X7
+ MOVOU X0, 32(AX)
+ MOVOU X1, 160(AX)
+ MOVOU X2, 288(AX)
+ MOVOU X3, 416(AX)
+ MOVOU X4, 544(AX)
+ MOVOU X5, 672(AX)
+ MOVOU X6, 800(AX)
+ MOVOU X7, 928(AX)
+ MOVOU 48(AX), X0
+ MOVOU 176(AX), X1
+ MOVOU 304(AX), X2
+ MOVOU 432(AX), X3
+ MOVOU 560(AX), X4
+ MOVOU 688(AX), X5
+ MOVOU 816(AX), X6
+ MOVOU 944(AX), X7
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFD $0xb1, X6, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ PSHUFB X10, X2
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFB X11, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ MOVO X2, X8
+ PADDQ X2, X8
+ PSRLQ $0x3f, X2
+ PXOR X8, X2
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFD $0xb1, X7, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ PSHUFB X10, X3
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFB X11, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ MOVO X3, X8
+ PADDQ X3, X8
+ PSRLQ $0x3f, X3
+ PXOR X8, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X6, X8
+ PUNPCKLQDQ X6, X9
+ PUNPCKHQDQ X7, X6
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X7, X9
+ MOVO X8, X7
+ MOVO X2, X8
+ PUNPCKHQDQ X9, X7
+ PUNPCKLQDQ X3, X9
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X3
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFD $0xb1, X6, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ PSHUFB X10, X2
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFB X11, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ MOVO X2, X8
+ PADDQ X2, X8
+ PSRLQ $0x3f, X2
+ PXOR X8, X2
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFD $0xb1, X7, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ PSHUFB X10, X3
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFB X11, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ MOVO X3, X8
+ PADDQ X3, X8
+ PSRLQ $0x3f, X3
+ PXOR X8, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X2, X8
+ PUNPCKLQDQ X2, X9
+ PUNPCKHQDQ X3, X2
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X3, X9
+ MOVO X8, X3
+ MOVO X6, X8
+ PUNPCKHQDQ X9, X3
+ PUNPCKLQDQ X7, X9
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X7
+ MOVOU X0, 48(AX)
+ MOVOU X1, 176(AX)
+ MOVOU X2, 304(AX)
+ MOVOU X3, 432(AX)
+ MOVOU X4, 560(AX)
+ MOVOU X5, 688(AX)
+ MOVOU X6, 816(AX)
+ MOVOU X7, 944(AX)
+ MOVOU 64(AX), X0
+ MOVOU 192(AX), X1
+ MOVOU 320(AX), X2
+ MOVOU 448(AX), X3
+ MOVOU 576(AX), X4
+ MOVOU 704(AX), X5
+ MOVOU 832(AX), X6
+ MOVOU 960(AX), X7
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFD $0xb1, X6, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ PSHUFB X10, X2
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFB X11, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ MOVO X2, X8
+ PADDQ X2, X8
+ PSRLQ $0x3f, X2
+ PXOR X8, X2
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFD $0xb1, X7, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ PSHUFB X10, X3
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFB X11, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ MOVO X3, X8
+ PADDQ X3, X8
+ PSRLQ $0x3f, X3
+ PXOR X8, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X6, X8
+ PUNPCKLQDQ X6, X9
+ PUNPCKHQDQ X7, X6
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X7, X9
+ MOVO X8, X7
+ MOVO X2, X8
+ PUNPCKHQDQ X9, X7
+ PUNPCKLQDQ X3, X9
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X3
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFD $0xb1, X6, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ PSHUFB X10, X2
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFB X11, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ MOVO X2, X8
+ PADDQ X2, X8
+ PSRLQ $0x3f, X2
+ PXOR X8, X2
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFD $0xb1, X7, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ PSHUFB X10, X3
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFB X11, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ MOVO X3, X8
+ PADDQ X3, X8
+ PSRLQ $0x3f, X3
+ PXOR X8, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X2, X8
+ PUNPCKLQDQ X2, X9
+ PUNPCKHQDQ X3, X2
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X3, X9
+ MOVO X8, X3
+ MOVO X6, X8
+ PUNPCKHQDQ X9, X3
+ PUNPCKLQDQ X7, X9
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X7
+ MOVOU X0, 64(AX)
+ MOVOU X1, 192(AX)
+ MOVOU X2, 320(AX)
+ MOVOU X3, 448(AX)
+ MOVOU X4, 576(AX)
+ MOVOU X5, 704(AX)
+ MOVOU X6, 832(AX)
+ MOVOU X7, 960(AX)
+ MOVOU 80(AX), X0
+ MOVOU 208(AX), X1
+ MOVOU 336(AX), X2
+ MOVOU 464(AX), X3
+ MOVOU 592(AX), X4
+ MOVOU 720(AX), X5
+ MOVOU 848(AX), X6
+ MOVOU 976(AX), X7
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFD $0xb1, X6, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ PSHUFB X10, X2
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFB X11, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ MOVO X2, X8
+ PADDQ X2, X8
+ PSRLQ $0x3f, X2
+ PXOR X8, X2
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFD $0xb1, X7, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ PSHUFB X10, X3
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFB X11, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ MOVO X3, X8
+ PADDQ X3, X8
+ PSRLQ $0x3f, X3
+ PXOR X8, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X6, X8
+ PUNPCKLQDQ X6, X9
+ PUNPCKHQDQ X7, X6
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X7, X9
+ MOVO X8, X7
+ MOVO X2, X8
+ PUNPCKHQDQ X9, X7
+ PUNPCKLQDQ X3, X9
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X3
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFD $0xb1, X6, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ PSHUFB X10, X2
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFB X11, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ MOVO X2, X8
+ PADDQ X2, X8
+ PSRLQ $0x3f, X2
+ PXOR X8, X2
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFD $0xb1, X7, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ PSHUFB X10, X3
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFB X11, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ MOVO X3, X8
+ PADDQ X3, X8
+ PSRLQ $0x3f, X3
+ PXOR X8, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X2, X8
+ PUNPCKLQDQ X2, X9
+ PUNPCKHQDQ X3, X2
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X3, X9
+ MOVO X8, X3
+ MOVO X6, X8
+ PUNPCKHQDQ X9, X3
+ PUNPCKLQDQ X7, X9
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X7
+ MOVOU X0, 80(AX)
+ MOVOU X1, 208(AX)
+ MOVOU X2, 336(AX)
+ MOVOU X3, 464(AX)
+ MOVOU X4, 592(AX)
+ MOVOU X5, 720(AX)
+ MOVOU X6, 848(AX)
+ MOVOU X7, 976(AX)
+ MOVOU 96(AX), X0
+ MOVOU 224(AX), X1
+ MOVOU 352(AX), X2
+ MOVOU 480(AX), X3
+ MOVOU 608(AX), X4
+ MOVOU 736(AX), X5
+ MOVOU 864(AX), X6
+ MOVOU 992(AX), X7
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFD $0xb1, X6, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ PSHUFB X10, X2
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFB X11, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ MOVO X2, X8
+ PADDQ X2, X8
+ PSRLQ $0x3f, X2
+ PXOR X8, X2
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFD $0xb1, X7, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ PSHUFB X10, X3
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFB X11, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ MOVO X3, X8
+ PADDQ X3, X8
+ PSRLQ $0x3f, X3
+ PXOR X8, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X6, X8
+ PUNPCKLQDQ X6, X9
+ PUNPCKHQDQ X7, X6
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X7, X9
+ MOVO X8, X7
+ MOVO X2, X8
+ PUNPCKHQDQ X9, X7
+ PUNPCKLQDQ X3, X9
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X3
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFD $0xb1, X6, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ PSHUFB X10, X2
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFB X11, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ MOVO X2, X8
+ PADDQ X2, X8
+ PSRLQ $0x3f, X2
+ PXOR X8, X2
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFD $0xb1, X7, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ PSHUFB X10, X3
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFB X11, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ MOVO X3, X8
+ PADDQ X3, X8
+ PSRLQ $0x3f, X3
+ PXOR X8, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X2, X8
+ PUNPCKLQDQ X2, X9
+ PUNPCKHQDQ X3, X2
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X3, X9
+ MOVO X8, X3
+ MOVO X6, X8
+ PUNPCKHQDQ X9, X3
+ PUNPCKLQDQ X7, X9
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X7
+ MOVOU X0, 96(AX)
+ MOVOU X1, 224(AX)
+ MOVOU X2, 352(AX)
+ MOVOU X3, 480(AX)
+ MOVOU X4, 608(AX)
+ MOVOU X5, 736(AX)
+ MOVOU X6, 864(AX)
+ MOVOU X7, 992(AX)
+ MOVOU 112(AX), X0
+ MOVOU 240(AX), X1
+ MOVOU 368(AX), X2
+ MOVOU 496(AX), X3
+ MOVOU 624(AX), X4
+ MOVOU 752(AX), X5
+ MOVOU 880(AX), X6
+ MOVOU 1008(AX), X7
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFD $0xb1, X6, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ PSHUFB X10, X2
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFB X11, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ MOVO X2, X8
+ PADDQ X2, X8
+ PSRLQ $0x3f, X2
+ PXOR X8, X2
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFD $0xb1, X7, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ PSHUFB X10, X3
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFB X11, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ MOVO X3, X8
+ PADDQ X3, X8
+ PSRLQ $0x3f, X3
+ PXOR X8, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X6, X8
+ PUNPCKLQDQ X6, X9
+ PUNPCKHQDQ X7, X6
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X7, X9
+ MOVO X8, X7
+ MOVO X2, X8
+ PUNPCKHQDQ X9, X7
+ PUNPCKLQDQ X3, X9
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X3
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFD $0xb1, X6, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ PSHUFB X10, X2
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFB X11, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ MOVO X2, X8
+ PADDQ X2, X8
+ PSRLQ $0x3f, X2
+ PXOR X8, X2
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFD $0xb1, X7, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ PSHUFB X10, X3
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFB X11, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ MOVO X3, X8
+ PADDQ X3, X8
+ PSRLQ $0x3f, X3
+ PXOR X8, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X2, X8
+ PUNPCKLQDQ X2, X9
+ PUNPCKHQDQ X3, X2
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X3, X9
+ MOVO X8, X3
+ MOVO X6, X8
+ PUNPCKHQDQ X9, X3
+ PUNPCKLQDQ X7, X9
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X7
+ MOVOU X0, 112(AX)
+ MOVOU X1, 240(AX)
+ MOVOU X2, 368(AX)
+ MOVOU X3, 496(AX)
+ MOVOU X4, 624(AX)
+ MOVOU X5, 752(AX)
+ MOVOU X6, 880(AX)
+ MOVOU X7, 1008(AX)
+ RET
- BLAMKA_ROUND_0(AX, 0, X8, X9, X10, X11)
- BLAMKA_ROUND_0(AX, 16, X8, X9, X10, X11)
- BLAMKA_ROUND_0(AX, 32, X8, X9, X10, X11)
- BLAMKA_ROUND_0(AX, 48, X8, X9, X10, X11)
- BLAMKA_ROUND_0(AX, 64, X8, X9, X10, X11)
- BLAMKA_ROUND_0(AX, 80, X8, X9, X10, X11)
- BLAMKA_ROUND_0(AX, 96, X8, X9, X10, X11)
- BLAMKA_ROUND_0(AX, 112, X8, X9, X10, X11)
+DATA ·c40<>+0(SB)/8, $0x0201000706050403
+DATA ·c40<>+8(SB)/8, $0x0a09080f0e0d0c0b
+GLOBL ·c40<>(SB), RODATA|NOPTR, $16
- BLAMKA_ROUND_1(AX, 0, X8, X9, X10, X11)
- BLAMKA_ROUND_1(AX, 2, X8, X9, X10, X11)
- BLAMKA_ROUND_1(AX, 4, X8, X9, X10, X11)
- BLAMKA_ROUND_1(AX, 6, X8, X9, X10, X11)
- BLAMKA_ROUND_1(AX, 8, X8, X9, X10, X11)
- BLAMKA_ROUND_1(AX, 10, X8, X9, X10, X11)
- BLAMKA_ROUND_1(AX, 12, X8, X9, X10, X11)
- BLAMKA_ROUND_1(AX, 14, X8, X9, X10, X11)
- RET
+DATA ·c48<>+0(SB)/8, $0x0100070605040302
+DATA ·c48<>+8(SB)/8, $0x09080f0e0d0c0b0a
+GLOBL ·c48<>(SB), RODATA|NOPTR, $16
-// func mixBlocksSSE2(out, a, b, c *block)
-TEXT ·mixBlocksSSE2(SB), 4, $0-32
+// func mixBlocksSSE2(out *block, a *block, b *block, c *block)
+// Requires: SSE2
+TEXT ·mixBlocksSSE2(SB), NOSPLIT, $0-32
MOVQ out+0(FP), DX
MOVQ a+8(FP), AX
MOVQ b+16(FP), BX
MOVQ c+24(FP), CX
- MOVQ $128, DI
+ MOVQ $0x00000080, DI
loop:
- MOVOU 0(AX), X0
- MOVOU 0(BX), X1
- MOVOU 0(CX), X2
+ MOVOU (AX), X0
+ MOVOU (BX), X1
+ MOVOU (CX), X2
PXOR X1, X0
PXOR X2, X0
- MOVOU X0, 0(DX)
- ADDQ $16, AX
- ADDQ $16, BX
- ADDQ $16, CX
- ADDQ $16, DX
- SUBQ $2, DI
+ MOVOU X0, (DX)
+ ADDQ $0x10, AX
+ ADDQ $0x10, BX
+ ADDQ $0x10, CX
+ ADDQ $0x10, DX
+ SUBQ $0x02, DI
JA loop
RET
-// func xorBlocksSSE2(out, a, b, c *block)
-TEXT ·xorBlocksSSE2(SB), 4, $0-32
+// func xorBlocksSSE2(out *block, a *block, b *block, c *block)
+// Requires: SSE2
+TEXT ·xorBlocksSSE2(SB), NOSPLIT, $0-32
MOVQ out+0(FP), DX
MOVQ a+8(FP), AX
MOVQ b+16(FP), BX
MOVQ c+24(FP), CX
- MOVQ $128, DI
+ MOVQ $0x00000080, DI
loop:
- MOVOU 0(AX), X0
- MOVOU 0(BX), X1
- MOVOU 0(CX), X2
- MOVOU 0(DX), X3
+ MOVOU (AX), X0
+ MOVOU (BX), X1
+ MOVOU (CX), X2
+ MOVOU (DX), X3
PXOR X1, X0
PXOR X2, X0
PXOR X3, X0
- MOVOU X0, 0(DX)
- ADDQ $16, AX
- ADDQ $16, BX
- ADDQ $16, CX
- ADDQ $16, DX
- SUBQ $2, DI
+ MOVOU X0, (DX)
+ ADDQ $0x10, AX
+ ADDQ $0x10, BX
+ ADDQ $0x10, CX
+ ADDQ $0x10, DX
+ SUBQ $0x02, DI
JA loop
RET
diff --git a/vendor/golang.org/x/crypto/blake2b/blake2bAVX2_amd64.s b/vendor/golang.org/x/crypto/blake2b/blake2bAVX2_amd64.s
index 9ae8206c..f75162e0 100644
--- a/vendor/golang.org/x/crypto/blake2b/blake2bAVX2_amd64.s
+++ b/vendor/golang.org/x/crypto/blake2b/blake2bAVX2_amd64.s
@@ -1,722 +1,4517 @@
-// Copyright 2016 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
+// Code generated by command: go run blake2bAVX2_amd64_asm.go -out ../../blake2bAVX2_amd64.s -pkg blake2b. DO NOT EDIT.
//go:build amd64 && gc && !purego
#include "textflag.h"
-DATA ·AVX2_iv0<>+0x00(SB)/8, $0x6a09e667f3bcc908
-DATA ·AVX2_iv0<>+0x08(SB)/8, $0xbb67ae8584caa73b
-DATA ·AVX2_iv0<>+0x10(SB)/8, $0x3c6ef372fe94f82b
-DATA ·AVX2_iv0<>+0x18(SB)/8, $0xa54ff53a5f1d36f1
-GLOBL ·AVX2_iv0<>(SB), (NOPTR+RODATA), $32
-
-DATA ·AVX2_iv1<>+0x00(SB)/8, $0x510e527fade682d1
-DATA ·AVX2_iv1<>+0x08(SB)/8, $0x9b05688c2b3e6c1f
-DATA ·AVX2_iv1<>+0x10(SB)/8, $0x1f83d9abfb41bd6b
-DATA ·AVX2_iv1<>+0x18(SB)/8, $0x5be0cd19137e2179
-GLOBL ·AVX2_iv1<>(SB), (NOPTR+RODATA), $32
-
-DATA ·AVX2_c40<>+0x00(SB)/8, $0x0201000706050403
-DATA ·AVX2_c40<>+0x08(SB)/8, $0x0a09080f0e0d0c0b
-DATA ·AVX2_c40<>+0x10(SB)/8, $0x0201000706050403
-DATA ·AVX2_c40<>+0x18(SB)/8, $0x0a09080f0e0d0c0b
-GLOBL ·AVX2_c40<>(SB), (NOPTR+RODATA), $32
-
-DATA ·AVX2_c48<>+0x00(SB)/8, $0x0100070605040302
-DATA ·AVX2_c48<>+0x08(SB)/8, $0x09080f0e0d0c0b0a
-DATA ·AVX2_c48<>+0x10(SB)/8, $0x0100070605040302
-DATA ·AVX2_c48<>+0x18(SB)/8, $0x09080f0e0d0c0b0a
-GLOBL ·AVX2_c48<>(SB), (NOPTR+RODATA), $32
-
-DATA ·AVX_iv0<>+0x00(SB)/8, $0x6a09e667f3bcc908
-DATA ·AVX_iv0<>+0x08(SB)/8, $0xbb67ae8584caa73b
-GLOBL ·AVX_iv0<>(SB), (NOPTR+RODATA), $16
-
-DATA ·AVX_iv1<>+0x00(SB)/8, $0x3c6ef372fe94f82b
-DATA ·AVX_iv1<>+0x08(SB)/8, $0xa54ff53a5f1d36f1
-GLOBL ·AVX_iv1<>(SB), (NOPTR+RODATA), $16
-
-DATA ·AVX_iv2<>+0x00(SB)/8, $0x510e527fade682d1
-DATA ·AVX_iv2<>+0x08(SB)/8, $0x9b05688c2b3e6c1f
-GLOBL ·AVX_iv2<>(SB), (NOPTR+RODATA), $16
-
-DATA ·AVX_iv3<>+0x00(SB)/8, $0x1f83d9abfb41bd6b
-DATA ·AVX_iv3<>+0x08(SB)/8, $0x5be0cd19137e2179
-GLOBL ·AVX_iv3<>(SB), (NOPTR+RODATA), $16
-
-DATA ·AVX_c40<>+0x00(SB)/8, $0x0201000706050403
-DATA ·AVX_c40<>+0x08(SB)/8, $0x0a09080f0e0d0c0b
-GLOBL ·AVX_c40<>(SB), (NOPTR+RODATA), $16
-
-DATA ·AVX_c48<>+0x00(SB)/8, $0x0100070605040302
-DATA ·AVX_c48<>+0x08(SB)/8, $0x09080f0e0d0c0b0a
-GLOBL ·AVX_c48<>(SB), (NOPTR+RODATA), $16
-
-#define VPERMQ_0x39_Y1_Y1 BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xc9; BYTE $0x39
-#define VPERMQ_0x93_Y1_Y1 BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xc9; BYTE $0x93
-#define VPERMQ_0x4E_Y2_Y2 BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xd2; BYTE $0x4e
-#define VPERMQ_0x93_Y3_Y3 BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xdb; BYTE $0x93
-#define VPERMQ_0x39_Y3_Y3 BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xdb; BYTE $0x39
-
-#define ROUND_AVX2(m0, m1, m2, m3, t, c40, c48) \
- VPADDQ m0, Y0, Y0; \
- VPADDQ Y1, Y0, Y0; \
- VPXOR Y0, Y3, Y3; \
- VPSHUFD $-79, Y3, Y3; \
- VPADDQ Y3, Y2, Y2; \
- VPXOR Y2, Y1, Y1; \
- VPSHUFB c40, Y1, Y1; \
- VPADDQ m1, Y0, Y0; \
- VPADDQ Y1, Y0, Y0; \
- VPXOR Y0, Y3, Y3; \
- VPSHUFB c48, Y3, Y3; \
- VPADDQ Y3, Y2, Y2; \
- VPXOR Y2, Y1, Y1; \
- VPADDQ Y1, Y1, t; \
- VPSRLQ $63, Y1, Y1; \
- VPXOR t, Y1, Y1; \
- VPERMQ_0x39_Y1_Y1; \
- VPERMQ_0x4E_Y2_Y2; \
- VPERMQ_0x93_Y3_Y3; \
- VPADDQ m2, Y0, Y0; \
- VPADDQ Y1, Y0, Y0; \
- VPXOR Y0, Y3, Y3; \
- VPSHUFD $-79, Y3, Y3; \
- VPADDQ Y3, Y2, Y2; \
- VPXOR Y2, Y1, Y1; \
- VPSHUFB c40, Y1, Y1; \
- VPADDQ m3, Y0, Y0; \
- VPADDQ Y1, Y0, Y0; \
- VPXOR Y0, Y3, Y3; \
- VPSHUFB c48, Y3, Y3; \
- VPADDQ Y3, Y2, Y2; \
- VPXOR Y2, Y1, Y1; \
- VPADDQ Y1, Y1, t; \
- VPSRLQ $63, Y1, Y1; \
- VPXOR t, Y1, Y1; \
- VPERMQ_0x39_Y3_Y3; \
- VPERMQ_0x4E_Y2_Y2; \
- VPERMQ_0x93_Y1_Y1
-
-#define VMOVQ_SI_X11_0 BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x1E
-#define VMOVQ_SI_X12_0 BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x26
-#define VMOVQ_SI_X13_0 BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x2E
-#define VMOVQ_SI_X14_0 BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x36
-#define VMOVQ_SI_X15_0 BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x3E
-
-#define VMOVQ_SI_X11(n) BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x5E; BYTE $n
-#define VMOVQ_SI_X12(n) BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x66; BYTE $n
-#define VMOVQ_SI_X13(n) BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x6E; BYTE $n
-#define VMOVQ_SI_X14(n) BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x76; BYTE $n
-#define VMOVQ_SI_X15(n) BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x7E; BYTE $n
-
-#define VPINSRQ_1_SI_X11_0 BYTE $0xC4; BYTE $0x63; BYTE $0xA1; BYTE $0x22; BYTE $0x1E; BYTE $0x01
-#define VPINSRQ_1_SI_X12_0 BYTE $0xC4; BYTE $0x63; BYTE $0x99; BYTE $0x22; BYTE $0x26; BYTE $0x01
-#define VPINSRQ_1_SI_X13_0 BYTE $0xC4; BYTE $0x63; BYTE $0x91; BYTE $0x22; BYTE $0x2E; BYTE $0x01
-#define VPINSRQ_1_SI_X14_0 BYTE $0xC4; BYTE $0x63; BYTE $0x89; BYTE $0x22; BYTE $0x36; BYTE $0x01
-#define VPINSRQ_1_SI_X15_0 BYTE $0xC4; BYTE $0x63; BYTE $0x81; BYTE $0x22; BYTE $0x3E; BYTE $0x01
-
-#define VPINSRQ_1_SI_X11(n) BYTE $0xC4; BYTE $0x63; BYTE $0xA1; BYTE $0x22; BYTE $0x5E; BYTE $n; BYTE $0x01
-#define VPINSRQ_1_SI_X12(n) BYTE $0xC4; BYTE $0x63; BYTE $0x99; BYTE $0x22; BYTE $0x66; BYTE $n; BYTE $0x01
-#define VPINSRQ_1_SI_X13(n) BYTE $0xC4; BYTE $0x63; BYTE $0x91; BYTE $0x22; BYTE $0x6E; BYTE $n; BYTE $0x01
-#define VPINSRQ_1_SI_X14(n) BYTE $0xC4; BYTE $0x63; BYTE $0x89; BYTE $0x22; BYTE $0x76; BYTE $n; BYTE $0x01
-#define VPINSRQ_1_SI_X15(n) BYTE $0xC4; BYTE $0x63; BYTE $0x81; BYTE $0x22; BYTE $0x7E; BYTE $n; BYTE $0x01
-
-#define VMOVQ_R8_X15 BYTE $0xC4; BYTE $0x41; BYTE $0xF9; BYTE $0x6E; BYTE $0xF8
-#define VPINSRQ_1_R9_X15 BYTE $0xC4; BYTE $0x43; BYTE $0x81; BYTE $0x22; BYTE $0xF9; BYTE $0x01
-
-// load msg: Y12 = (i0, i1, i2, i3)
-// i0, i1, i2, i3 must not be 0
-#define LOAD_MSG_AVX2_Y12(i0, i1, i2, i3) \
- VMOVQ_SI_X12(i0*8); \
- VMOVQ_SI_X11(i2*8); \
- VPINSRQ_1_SI_X12(i1*8); \
- VPINSRQ_1_SI_X11(i3*8); \
- VINSERTI128 $1, X11, Y12, Y12
-
-// load msg: Y13 = (i0, i1, i2, i3)
-// i0, i1, i2, i3 must not be 0
-#define LOAD_MSG_AVX2_Y13(i0, i1, i2, i3) \
- VMOVQ_SI_X13(i0*8); \
- VMOVQ_SI_X11(i2*8); \
- VPINSRQ_1_SI_X13(i1*8); \
- VPINSRQ_1_SI_X11(i3*8); \
- VINSERTI128 $1, X11, Y13, Y13
-
-// load msg: Y14 = (i0, i1, i2, i3)
-// i0, i1, i2, i3 must not be 0
-#define LOAD_MSG_AVX2_Y14(i0, i1, i2, i3) \
- VMOVQ_SI_X14(i0*8); \
- VMOVQ_SI_X11(i2*8); \
- VPINSRQ_1_SI_X14(i1*8); \
- VPINSRQ_1_SI_X11(i3*8); \
- VINSERTI128 $1, X11, Y14, Y14
-
-// load msg: Y15 = (i0, i1, i2, i3)
-// i0, i1, i2, i3 must not be 0
-#define LOAD_MSG_AVX2_Y15(i0, i1, i2, i3) \
- VMOVQ_SI_X15(i0*8); \
- VMOVQ_SI_X11(i2*8); \
- VPINSRQ_1_SI_X15(i1*8); \
- VPINSRQ_1_SI_X11(i3*8); \
- VINSERTI128 $1, X11, Y15, Y15
-
-#define LOAD_MSG_AVX2_0_2_4_6_1_3_5_7_8_10_12_14_9_11_13_15() \
- VMOVQ_SI_X12_0; \
- VMOVQ_SI_X11(4*8); \
- VPINSRQ_1_SI_X12(2*8); \
- VPINSRQ_1_SI_X11(6*8); \
- VINSERTI128 $1, X11, Y12, Y12; \
- LOAD_MSG_AVX2_Y13(1, 3, 5, 7); \
- LOAD_MSG_AVX2_Y14(8, 10, 12, 14); \
- LOAD_MSG_AVX2_Y15(9, 11, 13, 15)
-
-#define LOAD_MSG_AVX2_14_4_9_13_10_8_15_6_1_0_11_5_12_2_7_3() \
- LOAD_MSG_AVX2_Y12(14, 4, 9, 13); \
- LOAD_MSG_AVX2_Y13(10, 8, 15, 6); \
- VMOVQ_SI_X11(11*8); \
- VPSHUFD $0x4E, 0*8(SI), X14; \
- VPINSRQ_1_SI_X11(5*8); \
- VINSERTI128 $1, X11, Y14, Y14; \
- LOAD_MSG_AVX2_Y15(12, 2, 7, 3)
-
-#define LOAD_MSG_AVX2_11_12_5_15_8_0_2_13_10_3_7_9_14_6_1_4() \
- VMOVQ_SI_X11(5*8); \
- VMOVDQU 11*8(SI), X12; \
- VPINSRQ_1_SI_X11(15*8); \
- VINSERTI128 $1, X11, Y12, Y12; \
- VMOVQ_SI_X13(8*8); \
- VMOVQ_SI_X11(2*8); \
- VPINSRQ_1_SI_X13_0; \
- VPINSRQ_1_SI_X11(13*8); \
- VINSERTI128 $1, X11, Y13, Y13; \
- LOAD_MSG_AVX2_Y14(10, 3, 7, 9); \
- LOAD_MSG_AVX2_Y15(14, 6, 1, 4)
-
-#define LOAD_MSG_AVX2_7_3_13_11_9_1_12_14_2_5_4_15_6_10_0_8() \
- LOAD_MSG_AVX2_Y12(7, 3, 13, 11); \
- LOAD_MSG_AVX2_Y13(9, 1, 12, 14); \
- LOAD_MSG_AVX2_Y14(2, 5, 4, 15); \
- VMOVQ_SI_X15(6*8); \
- VMOVQ_SI_X11_0; \
- VPINSRQ_1_SI_X15(10*8); \
- VPINSRQ_1_SI_X11(8*8); \
- VINSERTI128 $1, X11, Y15, Y15
-
-#define LOAD_MSG_AVX2_9_5_2_10_0_7_4_15_14_11_6_3_1_12_8_13() \
- LOAD_MSG_AVX2_Y12(9, 5, 2, 10); \
- VMOVQ_SI_X13_0; \
- VMOVQ_SI_X11(4*8); \
- VPINSRQ_1_SI_X13(7*8); \
- VPINSRQ_1_SI_X11(15*8); \
- VINSERTI128 $1, X11, Y13, Y13; \
- LOAD_MSG_AVX2_Y14(14, 11, 6, 3); \
- LOAD_MSG_AVX2_Y15(1, 12, 8, 13)
-
-#define LOAD_MSG_AVX2_2_6_0_8_12_10_11_3_4_7_15_1_13_5_14_9() \
- VMOVQ_SI_X12(2*8); \
- VMOVQ_SI_X11_0; \
- VPINSRQ_1_SI_X12(6*8); \
- VPINSRQ_1_SI_X11(8*8); \
- VINSERTI128 $1, X11, Y12, Y12; \
- LOAD_MSG_AVX2_Y13(12, 10, 11, 3); \
- LOAD_MSG_AVX2_Y14(4, 7, 15, 1); \
- LOAD_MSG_AVX2_Y15(13, 5, 14, 9)
-
-#define LOAD_MSG_AVX2_12_1_14_4_5_15_13_10_0_6_9_8_7_3_2_11() \
- LOAD_MSG_AVX2_Y12(12, 1, 14, 4); \
- LOAD_MSG_AVX2_Y13(5, 15, 13, 10); \
- VMOVQ_SI_X14_0; \
- VPSHUFD $0x4E, 8*8(SI), X11; \
- VPINSRQ_1_SI_X14(6*8); \
- VINSERTI128 $1, X11, Y14, Y14; \
- LOAD_MSG_AVX2_Y15(7, 3, 2, 11)
-
-#define LOAD_MSG_AVX2_13_7_12_3_11_14_1_9_5_15_8_2_0_4_6_10() \
- LOAD_MSG_AVX2_Y12(13, 7, 12, 3); \
- LOAD_MSG_AVX2_Y13(11, 14, 1, 9); \
- LOAD_MSG_AVX2_Y14(5, 15, 8, 2); \
- VMOVQ_SI_X15_0; \
- VMOVQ_SI_X11(6*8); \
- VPINSRQ_1_SI_X15(4*8); \
- VPINSRQ_1_SI_X11(10*8); \
- VINSERTI128 $1, X11, Y15, Y15
-
-#define LOAD_MSG_AVX2_6_14_11_0_15_9_3_8_12_13_1_10_2_7_4_5() \
- VMOVQ_SI_X12(6*8); \
- VMOVQ_SI_X11(11*8); \
- VPINSRQ_1_SI_X12(14*8); \
- VPINSRQ_1_SI_X11_0; \
- VINSERTI128 $1, X11, Y12, Y12; \
- LOAD_MSG_AVX2_Y13(15, 9, 3, 8); \
- VMOVQ_SI_X11(1*8); \
- VMOVDQU 12*8(SI), X14; \
- VPINSRQ_1_SI_X11(10*8); \
- VINSERTI128 $1, X11, Y14, Y14; \
- VMOVQ_SI_X15(2*8); \
- VMOVDQU 4*8(SI), X11; \
- VPINSRQ_1_SI_X15(7*8); \
- VINSERTI128 $1, X11, Y15, Y15
-
-#define LOAD_MSG_AVX2_10_8_7_1_2_4_6_5_15_9_3_13_11_14_12_0() \
- LOAD_MSG_AVX2_Y12(10, 8, 7, 1); \
- VMOVQ_SI_X13(2*8); \
- VPSHUFD $0x4E, 5*8(SI), X11; \
- VPINSRQ_1_SI_X13(4*8); \
- VINSERTI128 $1, X11, Y13, Y13; \
- LOAD_MSG_AVX2_Y14(15, 9, 3, 13); \
- VMOVQ_SI_X15(11*8); \
- VMOVQ_SI_X11(12*8); \
- VPINSRQ_1_SI_X15(14*8); \
- VPINSRQ_1_SI_X11_0; \
- VINSERTI128 $1, X11, Y15, Y15
-
// func hashBlocksAVX2(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte)
-TEXT ·hashBlocksAVX2(SB), 4, $320-48 // frame size = 288 + 32 byte alignment
- MOVQ h+0(FP), AX
- MOVQ c+8(FP), BX
- MOVQ flag+16(FP), CX
- MOVQ blocks_base+24(FP), SI
- MOVQ blocks_len+32(FP), DI
-
- MOVQ SP, DX
- ADDQ $31, DX
- ANDQ $~31, DX
-
- MOVQ CX, 16(DX)
- XORQ CX, CX
- MOVQ CX, 24(DX)
-
- VMOVDQU ·AVX2_c40<>(SB), Y4
- VMOVDQU ·AVX2_c48<>(SB), Y5
-
- VMOVDQU 0(AX), Y8
+// Requires: AVX, AVX2
+TEXT ·hashBlocksAVX2(SB), NOSPLIT, $320-48
+ MOVQ h+0(FP), AX
+ MOVQ c+8(FP), BX
+ MOVQ flag+16(FP), CX
+ MOVQ blocks_base+24(FP), SI
+ MOVQ blocks_len+32(FP), DI
+ MOVQ SP, DX
+ ADDQ $+31, DX
+ ANDQ $-32, DX
+ MOVQ CX, 16(DX)
+ XORQ CX, CX
+ MOVQ CX, 24(DX)
+ VMOVDQU ·AVX2_c40<>+0(SB), Y4
+ VMOVDQU ·AVX2_c48<>+0(SB), Y5
+ VMOVDQU (AX), Y8
VMOVDQU 32(AX), Y9
- VMOVDQU ·AVX2_iv0<>(SB), Y6
- VMOVDQU ·AVX2_iv1<>(SB), Y7
-
- MOVQ 0(BX), R8
- MOVQ 8(BX), R9
- MOVQ R9, 8(DX)
+ VMOVDQU ·AVX2_iv0<>+0(SB), Y6
+ VMOVDQU ·AVX2_iv1<>+0(SB), Y7
+ MOVQ (BX), R8
+ MOVQ 8(BX), R9
+ MOVQ R9, 8(DX)
loop:
- ADDQ $128, R8
- MOVQ R8, 0(DX)
- CMPQ R8, $128
+ ADDQ $0x80, R8
+ MOVQ R8, (DX)
+ CMPQ R8, $0x80
JGE noinc
INCQ R9
MOVQ R9, 8(DX)
noinc:
- VMOVDQA Y8, Y0
- VMOVDQA Y9, Y1
- VMOVDQA Y6, Y2
- VPXOR 0(DX), Y7, Y3
-
- LOAD_MSG_AVX2_0_2_4_6_1_3_5_7_8_10_12_14_9_11_13_15()
- VMOVDQA Y12, 32(DX)
- VMOVDQA Y13, 64(DX)
- VMOVDQA Y14, 96(DX)
- VMOVDQA Y15, 128(DX)
- ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5)
- LOAD_MSG_AVX2_14_4_9_13_10_8_15_6_1_0_11_5_12_2_7_3()
- VMOVDQA Y12, 160(DX)
- VMOVDQA Y13, 192(DX)
- VMOVDQA Y14, 224(DX)
- VMOVDQA Y15, 256(DX)
-
- ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5)
- LOAD_MSG_AVX2_11_12_5_15_8_0_2_13_10_3_7_9_14_6_1_4()
- ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5)
- LOAD_MSG_AVX2_7_3_13_11_9_1_12_14_2_5_4_15_6_10_0_8()
- ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5)
- LOAD_MSG_AVX2_9_5_2_10_0_7_4_15_14_11_6_3_1_12_8_13()
- ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5)
- LOAD_MSG_AVX2_2_6_0_8_12_10_11_3_4_7_15_1_13_5_14_9()
- ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5)
- LOAD_MSG_AVX2_12_1_14_4_5_15_13_10_0_6_9_8_7_3_2_11()
- ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5)
- LOAD_MSG_AVX2_13_7_12_3_11_14_1_9_5_15_8_2_0_4_6_10()
- ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5)
- LOAD_MSG_AVX2_6_14_11_0_15_9_3_8_12_13_1_10_2_7_4_5()
- ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5)
- LOAD_MSG_AVX2_10_8_7_1_2_4_6_5_15_9_3_13_11_14_12_0()
- ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5)
-
- ROUND_AVX2(32(DX), 64(DX), 96(DX), 128(DX), Y10, Y4, Y5)
- ROUND_AVX2(160(DX), 192(DX), 224(DX), 256(DX), Y10, Y4, Y5)
-
- VPXOR Y0, Y8, Y8
- VPXOR Y1, Y9, Y9
- VPXOR Y2, Y8, Y8
- VPXOR Y3, Y9, Y9
-
- LEAQ 128(SI), SI
- SUBQ $128, DI
- JNE loop
-
- MOVQ R8, 0(BX)
- MOVQ R9, 8(BX)
-
- VMOVDQU Y8, 0(AX)
- VMOVDQU Y9, 32(AX)
+ VMOVDQA Y8, Y0
+ VMOVDQA Y9, Y1
+ VMOVDQA Y6, Y2
+ VPXOR (DX), Y7, Y3
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x26
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x5e
+ BYTE $0x20
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x99
+ BYTE $0x22
+ BYTE $0x66
+ BYTE $0x10
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0xa1
+ BYTE $0x22
+ BYTE $0x5e
+ BYTE $0x30
+ BYTE $0x01
+ VINSERTI128 $0x01, X11, Y12, Y12
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x6e
+ BYTE $0x08
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x5e
+ BYTE $0x28
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x91
+ BYTE $0x22
+ BYTE $0x6e
+ BYTE $0x18
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0xa1
+ BYTE $0x22
+ BYTE $0x5e
+ BYTE $0x38
+ BYTE $0x01
+ VINSERTI128 $0x01, X11, Y13, Y13
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x76
+ BYTE $0x40
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x5e
+ BYTE $0x60
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x89
+ BYTE $0x22
+ BYTE $0x76
+ BYTE $0x50
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0xa1
+ BYTE $0x22
+ BYTE $0x5e
+ BYTE $0x70
+ BYTE $0x01
+ VINSERTI128 $0x01, X11, Y14, Y14
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x7e
+ BYTE $0x48
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x5e
+ BYTE $0x68
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x81
+ BYTE $0x22
+ BYTE $0x7e
+ BYTE $0x58
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0xa1
+ BYTE $0x22
+ BYTE $0x5e
+ BYTE $0x78
+ BYTE $0x01
+ VINSERTI128 $0x01, X11, Y15, Y15
+ VMOVDQA Y12, 32(DX)
+ VMOVDQA Y13, 64(DX)
+ VMOVDQA Y14, 96(DX)
+ VMOVDQA Y15, 128(DX)
+ VPADDQ Y12, Y0, Y0
+ VPADDQ Y1, Y0, Y0
+ VPXOR Y0, Y3, Y3
+ VPSHUFD $-79, Y3, Y3
+ VPADDQ Y3, Y2, Y2
+ VPXOR Y2, Y1, Y1
+ VPSHUFB Y4, Y1, Y1
+ VPADDQ Y13, Y0, Y0
+ VPADDQ Y1, Y0, Y0
+ VPXOR Y0, Y3, Y3
+ VPSHUFB Y5, Y3, Y3
+ VPADDQ Y3, Y2, Y2
+ VPXOR Y2, Y1, Y1
+ VPADDQ Y1, Y1, Y10
+ VPSRLQ $0x3f, Y1, Y1
+ VPXOR Y10, Y1, Y1
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xc9
+ BYTE $0x39
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xd2
+ BYTE $0x4e
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xdb
+ BYTE $0x93
+ VPADDQ Y14, Y0, Y0
+ VPADDQ Y1, Y0, Y0
+ VPXOR Y0, Y3, Y3
+ VPSHUFD $-79, Y3, Y3
+ VPADDQ Y3, Y2, Y2
+ VPXOR Y2, Y1, Y1
+ VPSHUFB Y4, Y1, Y1
+ VPADDQ Y15, Y0, Y0
+ VPADDQ Y1, Y0, Y0
+ VPXOR Y0, Y3, Y3
+ VPSHUFB Y5, Y3, Y3
+ VPADDQ Y3, Y2, Y2
+ VPXOR Y2, Y1, Y1
+ VPADDQ Y1, Y1, Y10
+ VPSRLQ $0x3f, Y1, Y1
+ VPXOR Y10, Y1, Y1
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xdb
+ BYTE $0x39
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xd2
+ BYTE $0x4e
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xc9
+ BYTE $0x93
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x66
+ BYTE $0x70
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x5e
+ BYTE $0x48
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x99
+ BYTE $0x22
+ BYTE $0x66
+ BYTE $0x20
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0xa1
+ BYTE $0x22
+ BYTE $0x5e
+ BYTE $0x68
+ BYTE $0x01
+ VINSERTI128 $0x01, X11, Y12, Y12
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x6e
+ BYTE $0x50
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x5e
+ BYTE $0x78
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x91
+ BYTE $0x22
+ BYTE $0x6e
+ BYTE $0x40
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0xa1
+ BYTE $0x22
+ BYTE $0x5e
+ BYTE $0x30
+ BYTE $0x01
+ VINSERTI128 $0x01, X11, Y13, Y13
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x5e
+ BYTE $0x58
+ VPSHUFD $0x4e, (SI), X14
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0xa1
+ BYTE $0x22
+ BYTE $0x5e
+ BYTE $0x28
+ BYTE $0x01
+ VINSERTI128 $0x01, X11, Y14, Y14
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x7e
+ BYTE $0x60
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x5e
+ BYTE $0x38
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x81
+ BYTE $0x22
+ BYTE $0x7e
+ BYTE $0x10
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0xa1
+ BYTE $0x22
+ BYTE $0x5e
+ BYTE $0x18
+ BYTE $0x01
+ VINSERTI128 $0x01, X11, Y15, Y15
+ VMOVDQA Y12, 160(DX)
+ VMOVDQA Y13, 192(DX)
+ VMOVDQA Y14, 224(DX)
+ VMOVDQA Y15, 256(DX)
+ VPADDQ Y12, Y0, Y0
+ VPADDQ Y1, Y0, Y0
+ VPXOR Y0, Y3, Y3
+ VPSHUFD $-79, Y3, Y3
+ VPADDQ Y3, Y2, Y2
+ VPXOR Y2, Y1, Y1
+ VPSHUFB Y4, Y1, Y1
+ VPADDQ Y13, Y0, Y0
+ VPADDQ Y1, Y0, Y0
+ VPXOR Y0, Y3, Y3
+ VPSHUFB Y5, Y3, Y3
+ VPADDQ Y3, Y2, Y2
+ VPXOR Y2, Y1, Y1
+ VPADDQ Y1, Y1, Y10
+ VPSRLQ $0x3f, Y1, Y1
+ VPXOR Y10, Y1, Y1
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xc9
+ BYTE $0x39
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xd2
+ BYTE $0x4e
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xdb
+ BYTE $0x93
+ VPADDQ Y14, Y0, Y0
+ VPADDQ Y1, Y0, Y0
+ VPXOR Y0, Y3, Y3
+ VPSHUFD $-79, Y3, Y3
+ VPADDQ Y3, Y2, Y2
+ VPXOR Y2, Y1, Y1
+ VPSHUFB Y4, Y1, Y1
+ VPADDQ Y15, Y0, Y0
+ VPADDQ Y1, Y0, Y0
+ VPXOR Y0, Y3, Y3
+ VPSHUFB Y5, Y3, Y3
+ VPADDQ Y3, Y2, Y2
+ VPXOR Y2, Y1, Y1
+ VPADDQ Y1, Y1, Y10
+ VPSRLQ $0x3f, Y1, Y1
+ VPXOR Y10, Y1, Y1
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xdb
+ BYTE $0x39
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xd2
+ BYTE $0x4e
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xc9
+ BYTE $0x93
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x5e
+ BYTE $0x28
+ VMOVDQU 88(SI), X12
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0xa1
+ BYTE $0x22
+ BYTE $0x5e
+ BYTE $0x78
+ BYTE $0x01
+ VINSERTI128 $0x01, X11, Y12, Y12
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x6e
+ BYTE $0x40
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x5e
+ BYTE $0x10
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x91
+ BYTE $0x22
+ BYTE $0x2e
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0xa1
+ BYTE $0x22
+ BYTE $0x5e
+ BYTE $0x68
+ BYTE $0x01
+ VINSERTI128 $0x01, X11, Y13, Y13
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x76
+ BYTE $0x50
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x5e
+ BYTE $0x38
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x89
+ BYTE $0x22
+ BYTE $0x76
+ BYTE $0x18
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0xa1
+ BYTE $0x22
+ BYTE $0x5e
+ BYTE $0x48
+ BYTE $0x01
+ VINSERTI128 $0x01, X11, Y14, Y14
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x7e
+ BYTE $0x70
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x5e
+ BYTE $0x08
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x81
+ BYTE $0x22
+ BYTE $0x7e
+ BYTE $0x30
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0xa1
+ BYTE $0x22
+ BYTE $0x5e
+ BYTE $0x20
+ BYTE $0x01
+ VINSERTI128 $0x01, X11, Y15, Y15
+ VPADDQ Y12, Y0, Y0
+ VPADDQ Y1, Y0, Y0
+ VPXOR Y0, Y3, Y3
+ VPSHUFD $-79, Y3, Y3
+ VPADDQ Y3, Y2, Y2
+ VPXOR Y2, Y1, Y1
+ VPSHUFB Y4, Y1, Y1
+ VPADDQ Y13, Y0, Y0
+ VPADDQ Y1, Y0, Y0
+ VPXOR Y0, Y3, Y3
+ VPSHUFB Y5, Y3, Y3
+ VPADDQ Y3, Y2, Y2
+ VPXOR Y2, Y1, Y1
+ VPADDQ Y1, Y1, Y10
+ VPSRLQ $0x3f, Y1, Y1
+ VPXOR Y10, Y1, Y1
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xc9
+ BYTE $0x39
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xd2
+ BYTE $0x4e
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xdb
+ BYTE $0x93
+ VPADDQ Y14, Y0, Y0
+ VPADDQ Y1, Y0, Y0
+ VPXOR Y0, Y3, Y3
+ VPSHUFD $-79, Y3, Y3
+ VPADDQ Y3, Y2, Y2
+ VPXOR Y2, Y1, Y1
+ VPSHUFB Y4, Y1, Y1
+ VPADDQ Y15, Y0, Y0
+ VPADDQ Y1, Y0, Y0
+ VPXOR Y0, Y3, Y3
+ VPSHUFB Y5, Y3, Y3
+ VPADDQ Y3, Y2, Y2
+ VPXOR Y2, Y1, Y1
+ VPADDQ Y1, Y1, Y10
+ VPSRLQ $0x3f, Y1, Y1
+ VPXOR Y10, Y1, Y1
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xdb
+ BYTE $0x39
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xd2
+ BYTE $0x4e
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xc9
+ BYTE $0x93
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x66
+ BYTE $0x38
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x5e
+ BYTE $0x68
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x99
+ BYTE $0x22
+ BYTE $0x66
+ BYTE $0x18
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0xa1
+ BYTE $0x22
+ BYTE $0x5e
+ BYTE $0x58
+ BYTE $0x01
+ VINSERTI128 $0x01, X11, Y12, Y12
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x6e
+ BYTE $0x48
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x5e
+ BYTE $0x60
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x91
+ BYTE $0x22
+ BYTE $0x6e
+ BYTE $0x08
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0xa1
+ BYTE $0x22
+ BYTE $0x5e
+ BYTE $0x70
+ BYTE $0x01
+ VINSERTI128 $0x01, X11, Y13, Y13
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x76
+ BYTE $0x10
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x5e
+ BYTE $0x20
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x89
+ BYTE $0x22
+ BYTE $0x76
+ BYTE $0x28
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0xa1
+ BYTE $0x22
+ BYTE $0x5e
+ BYTE $0x78
+ BYTE $0x01
+ VINSERTI128 $0x01, X11, Y14, Y14
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x7e
+ BYTE $0x30
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x1e
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x81
+ BYTE $0x22
+ BYTE $0x7e
+ BYTE $0x50
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0xa1
+ BYTE $0x22
+ BYTE $0x5e
+ BYTE $0x40
+ BYTE $0x01
+ VINSERTI128 $0x01, X11, Y15, Y15
+ VPADDQ Y12, Y0, Y0
+ VPADDQ Y1, Y0, Y0
+ VPXOR Y0, Y3, Y3
+ VPSHUFD $-79, Y3, Y3
+ VPADDQ Y3, Y2, Y2
+ VPXOR Y2, Y1, Y1
+ VPSHUFB Y4, Y1, Y1
+ VPADDQ Y13, Y0, Y0
+ VPADDQ Y1, Y0, Y0
+ VPXOR Y0, Y3, Y3
+ VPSHUFB Y5, Y3, Y3
+ VPADDQ Y3, Y2, Y2
+ VPXOR Y2, Y1, Y1
+ VPADDQ Y1, Y1, Y10
+ VPSRLQ $0x3f, Y1, Y1
+ VPXOR Y10, Y1, Y1
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xc9
+ BYTE $0x39
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xd2
+ BYTE $0x4e
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xdb
+ BYTE $0x93
+ VPADDQ Y14, Y0, Y0
+ VPADDQ Y1, Y0, Y0
+ VPXOR Y0, Y3, Y3
+ VPSHUFD $-79, Y3, Y3
+ VPADDQ Y3, Y2, Y2
+ VPXOR Y2, Y1, Y1
+ VPSHUFB Y4, Y1, Y1
+ VPADDQ Y15, Y0, Y0
+ VPADDQ Y1, Y0, Y0
+ VPXOR Y0, Y3, Y3
+ VPSHUFB Y5, Y3, Y3
+ VPADDQ Y3, Y2, Y2
+ VPXOR Y2, Y1, Y1
+ VPADDQ Y1, Y1, Y10
+ VPSRLQ $0x3f, Y1, Y1
+ VPXOR Y10, Y1, Y1
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xdb
+ BYTE $0x39
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xd2
+ BYTE $0x4e
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xc9
+ BYTE $0x93
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x66
+ BYTE $0x48
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x5e
+ BYTE $0x10
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x99
+ BYTE $0x22
+ BYTE $0x66
+ BYTE $0x28
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0xa1
+ BYTE $0x22
+ BYTE $0x5e
+ BYTE $0x50
+ BYTE $0x01
+ VINSERTI128 $0x01, X11, Y12, Y12
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x2e
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x5e
+ BYTE $0x20
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x91
+ BYTE $0x22
+ BYTE $0x6e
+ BYTE $0x38
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0xa1
+ BYTE $0x22
+ BYTE $0x5e
+ BYTE $0x78
+ BYTE $0x01
+ VINSERTI128 $0x01, X11, Y13, Y13
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x76
+ BYTE $0x70
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x5e
+ BYTE $0x30
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x89
+ BYTE $0x22
+ BYTE $0x76
+ BYTE $0x58
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0xa1
+ BYTE $0x22
+ BYTE $0x5e
+ BYTE $0x18
+ BYTE $0x01
+ VINSERTI128 $0x01, X11, Y14, Y14
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x7e
+ BYTE $0x08
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x5e
+ BYTE $0x40
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x81
+ BYTE $0x22
+ BYTE $0x7e
+ BYTE $0x60
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0xa1
+ BYTE $0x22
+ BYTE $0x5e
+ BYTE $0x68
+ BYTE $0x01
+ VINSERTI128 $0x01, X11, Y15, Y15
+ VPADDQ Y12, Y0, Y0
+ VPADDQ Y1, Y0, Y0
+ VPXOR Y0, Y3, Y3
+ VPSHUFD $-79, Y3, Y3
+ VPADDQ Y3, Y2, Y2
+ VPXOR Y2, Y1, Y1
+ VPSHUFB Y4, Y1, Y1
+ VPADDQ Y13, Y0, Y0
+ VPADDQ Y1, Y0, Y0
+ VPXOR Y0, Y3, Y3
+ VPSHUFB Y5, Y3, Y3
+ VPADDQ Y3, Y2, Y2
+ VPXOR Y2, Y1, Y1
+ VPADDQ Y1, Y1, Y10
+ VPSRLQ $0x3f, Y1, Y1
+ VPXOR Y10, Y1, Y1
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xc9
+ BYTE $0x39
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xd2
+ BYTE $0x4e
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xdb
+ BYTE $0x93
+ VPADDQ Y14, Y0, Y0
+ VPADDQ Y1, Y0, Y0
+ VPXOR Y0, Y3, Y3
+ VPSHUFD $-79, Y3, Y3
+ VPADDQ Y3, Y2, Y2
+ VPXOR Y2, Y1, Y1
+ VPSHUFB Y4, Y1, Y1
+ VPADDQ Y15, Y0, Y0
+ VPADDQ Y1, Y0, Y0
+ VPXOR Y0, Y3, Y3
+ VPSHUFB Y5, Y3, Y3
+ VPADDQ Y3, Y2, Y2
+ VPXOR Y2, Y1, Y1
+ VPADDQ Y1, Y1, Y10
+ VPSRLQ $0x3f, Y1, Y1
+ VPXOR Y10, Y1, Y1
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xdb
+ BYTE $0x39
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xd2
+ BYTE $0x4e
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xc9
+ BYTE $0x93
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x66
+ BYTE $0x10
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x1e
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x99
+ BYTE $0x22
+ BYTE $0x66
+ BYTE $0x30
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0xa1
+ BYTE $0x22
+ BYTE $0x5e
+ BYTE $0x40
+ BYTE $0x01
+ VINSERTI128 $0x01, X11, Y12, Y12
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x6e
+ BYTE $0x60
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x5e
+ BYTE $0x58
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x91
+ BYTE $0x22
+ BYTE $0x6e
+ BYTE $0x50
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0xa1
+ BYTE $0x22
+ BYTE $0x5e
+ BYTE $0x18
+ BYTE $0x01
+ VINSERTI128 $0x01, X11, Y13, Y13
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x76
+ BYTE $0x20
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x5e
+ BYTE $0x78
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x89
+ BYTE $0x22
+ BYTE $0x76
+ BYTE $0x38
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0xa1
+ BYTE $0x22
+ BYTE $0x5e
+ BYTE $0x08
+ BYTE $0x01
+ VINSERTI128 $0x01, X11, Y14, Y14
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x7e
+ BYTE $0x68
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x5e
+ BYTE $0x70
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x81
+ BYTE $0x22
+ BYTE $0x7e
+ BYTE $0x28
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0xa1
+ BYTE $0x22
+ BYTE $0x5e
+ BYTE $0x48
+ BYTE $0x01
+ VINSERTI128 $0x01, X11, Y15, Y15
+ VPADDQ Y12, Y0, Y0
+ VPADDQ Y1, Y0, Y0
+ VPXOR Y0, Y3, Y3
+ VPSHUFD $-79, Y3, Y3
+ VPADDQ Y3, Y2, Y2
+ VPXOR Y2, Y1, Y1
+ VPSHUFB Y4, Y1, Y1
+ VPADDQ Y13, Y0, Y0
+ VPADDQ Y1, Y0, Y0
+ VPXOR Y0, Y3, Y3
+ VPSHUFB Y5, Y3, Y3
+ VPADDQ Y3, Y2, Y2
+ VPXOR Y2, Y1, Y1
+ VPADDQ Y1, Y1, Y10
+ VPSRLQ $0x3f, Y1, Y1
+ VPXOR Y10, Y1, Y1
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xc9
+ BYTE $0x39
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xd2
+ BYTE $0x4e
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xdb
+ BYTE $0x93
+ VPADDQ Y14, Y0, Y0
+ VPADDQ Y1, Y0, Y0
+ VPXOR Y0, Y3, Y3
+ VPSHUFD $-79, Y3, Y3
+ VPADDQ Y3, Y2, Y2
+ VPXOR Y2, Y1, Y1
+ VPSHUFB Y4, Y1, Y1
+ VPADDQ Y15, Y0, Y0
+ VPADDQ Y1, Y0, Y0
+ VPXOR Y0, Y3, Y3
+ VPSHUFB Y5, Y3, Y3
+ VPADDQ Y3, Y2, Y2
+ VPXOR Y2, Y1, Y1
+ VPADDQ Y1, Y1, Y10
+ VPSRLQ $0x3f, Y1, Y1
+ VPXOR Y10, Y1, Y1
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xdb
+ BYTE $0x39
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xd2
+ BYTE $0x4e
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xc9
+ BYTE $0x93
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x66
+ BYTE $0x60
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x5e
+ BYTE $0x70
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x99
+ BYTE $0x22
+ BYTE $0x66
+ BYTE $0x08
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0xa1
+ BYTE $0x22
+ BYTE $0x5e
+ BYTE $0x20
+ BYTE $0x01
+ VINSERTI128 $0x01, X11, Y12, Y12
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x6e
+ BYTE $0x28
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x5e
+ BYTE $0x68
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x91
+ BYTE $0x22
+ BYTE $0x6e
+ BYTE $0x78
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0xa1
+ BYTE $0x22
+ BYTE $0x5e
+ BYTE $0x50
+ BYTE $0x01
+ VINSERTI128 $0x01, X11, Y13, Y13
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x36
+ VPSHUFD $0x4e, 64(SI), X11
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x89
+ BYTE $0x22
+ BYTE $0x76
+ BYTE $0x30
+ BYTE $0x01
+ VINSERTI128 $0x01, X11, Y14, Y14
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x7e
+ BYTE $0x38
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x5e
+ BYTE $0x10
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x81
+ BYTE $0x22
+ BYTE $0x7e
+ BYTE $0x18
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0xa1
+ BYTE $0x22
+ BYTE $0x5e
+ BYTE $0x58
+ BYTE $0x01
+ VINSERTI128 $0x01, X11, Y15, Y15
+ VPADDQ Y12, Y0, Y0
+ VPADDQ Y1, Y0, Y0
+ VPXOR Y0, Y3, Y3
+ VPSHUFD $-79, Y3, Y3
+ VPADDQ Y3, Y2, Y2
+ VPXOR Y2, Y1, Y1
+ VPSHUFB Y4, Y1, Y1
+ VPADDQ Y13, Y0, Y0
+ VPADDQ Y1, Y0, Y0
+ VPXOR Y0, Y3, Y3
+ VPSHUFB Y5, Y3, Y3
+ VPADDQ Y3, Y2, Y2
+ VPXOR Y2, Y1, Y1
+ VPADDQ Y1, Y1, Y10
+ VPSRLQ $0x3f, Y1, Y1
+ VPXOR Y10, Y1, Y1
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xc9
+ BYTE $0x39
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xd2
+ BYTE $0x4e
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xdb
+ BYTE $0x93
+ VPADDQ Y14, Y0, Y0
+ VPADDQ Y1, Y0, Y0
+ VPXOR Y0, Y3, Y3
+ VPSHUFD $-79, Y3, Y3
+ VPADDQ Y3, Y2, Y2
+ VPXOR Y2, Y1, Y1
+ VPSHUFB Y4, Y1, Y1
+ VPADDQ Y15, Y0, Y0
+ VPADDQ Y1, Y0, Y0
+ VPXOR Y0, Y3, Y3
+ VPSHUFB Y5, Y3, Y3
+ VPADDQ Y3, Y2, Y2
+ VPXOR Y2, Y1, Y1
+ VPADDQ Y1, Y1, Y10
+ VPSRLQ $0x3f, Y1, Y1
+ VPXOR Y10, Y1, Y1
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xdb
+ BYTE $0x39
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xd2
+ BYTE $0x4e
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xc9
+ BYTE $0x93
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x66
+ BYTE $0x68
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x5e
+ BYTE $0x60
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x99
+ BYTE $0x22
+ BYTE $0x66
+ BYTE $0x38
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0xa1
+ BYTE $0x22
+ BYTE $0x5e
+ BYTE $0x18
+ BYTE $0x01
+ VINSERTI128 $0x01, X11, Y12, Y12
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x6e
+ BYTE $0x58
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x5e
+ BYTE $0x08
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x91
+ BYTE $0x22
+ BYTE $0x6e
+ BYTE $0x70
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0xa1
+ BYTE $0x22
+ BYTE $0x5e
+ BYTE $0x48
+ BYTE $0x01
+ VINSERTI128 $0x01, X11, Y13, Y13
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x76
+ BYTE $0x28
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x5e
+ BYTE $0x40
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x89
+ BYTE $0x22
+ BYTE $0x76
+ BYTE $0x78
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0xa1
+ BYTE $0x22
+ BYTE $0x5e
+ BYTE $0x10
+ BYTE $0x01
+ VINSERTI128 $0x01, X11, Y14, Y14
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x3e
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x5e
+ BYTE $0x30
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x81
+ BYTE $0x22
+ BYTE $0x7e
+ BYTE $0x20
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0xa1
+ BYTE $0x22
+ BYTE $0x5e
+ BYTE $0x50
+ BYTE $0x01
+ VINSERTI128 $0x01, X11, Y15, Y15
+ VPADDQ Y12, Y0, Y0
+ VPADDQ Y1, Y0, Y0
+ VPXOR Y0, Y3, Y3
+ VPSHUFD $-79, Y3, Y3
+ VPADDQ Y3, Y2, Y2
+ VPXOR Y2, Y1, Y1
+ VPSHUFB Y4, Y1, Y1
+ VPADDQ Y13, Y0, Y0
+ VPADDQ Y1, Y0, Y0
+ VPXOR Y0, Y3, Y3
+ VPSHUFB Y5, Y3, Y3
+ VPADDQ Y3, Y2, Y2
+ VPXOR Y2, Y1, Y1
+ VPADDQ Y1, Y1, Y10
+ VPSRLQ $0x3f, Y1, Y1
+ VPXOR Y10, Y1, Y1
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xc9
+ BYTE $0x39
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xd2
+ BYTE $0x4e
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xdb
+ BYTE $0x93
+ VPADDQ Y14, Y0, Y0
+ VPADDQ Y1, Y0, Y0
+ VPXOR Y0, Y3, Y3
+ VPSHUFD $-79, Y3, Y3
+ VPADDQ Y3, Y2, Y2
+ VPXOR Y2, Y1, Y1
+ VPSHUFB Y4, Y1, Y1
+ VPADDQ Y15, Y0, Y0
+ VPADDQ Y1, Y0, Y0
+ VPXOR Y0, Y3, Y3
+ VPSHUFB Y5, Y3, Y3
+ VPADDQ Y3, Y2, Y2
+ VPXOR Y2, Y1, Y1
+ VPADDQ Y1, Y1, Y10
+ VPSRLQ $0x3f, Y1, Y1
+ VPXOR Y10, Y1, Y1
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xdb
+ BYTE $0x39
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xd2
+ BYTE $0x4e
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xc9
+ BYTE $0x93
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x66
+ BYTE $0x30
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x5e
+ BYTE $0x58
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x99
+ BYTE $0x22
+ BYTE $0x66
+ BYTE $0x70
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0xa1
+ BYTE $0x22
+ BYTE $0x1e
+ BYTE $0x01
+ VINSERTI128 $0x01, X11, Y12, Y12
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x6e
+ BYTE $0x78
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x5e
+ BYTE $0x18
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x91
+ BYTE $0x22
+ BYTE $0x6e
+ BYTE $0x48
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0xa1
+ BYTE $0x22
+ BYTE $0x5e
+ BYTE $0x40
+ BYTE $0x01
+ VINSERTI128 $0x01, X11, Y13, Y13
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x5e
+ BYTE $0x08
+ VMOVDQU 96(SI), X14
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0xa1
+ BYTE $0x22
+ BYTE $0x5e
+ BYTE $0x50
+ BYTE $0x01
+ VINSERTI128 $0x01, X11, Y14, Y14
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x7e
+ BYTE $0x10
+ VMOVDQU 32(SI), X11
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x81
+ BYTE $0x22
+ BYTE $0x7e
+ BYTE $0x38
+ BYTE $0x01
+ VINSERTI128 $0x01, X11, Y15, Y15
+ VPADDQ Y12, Y0, Y0
+ VPADDQ Y1, Y0, Y0
+ VPXOR Y0, Y3, Y3
+ VPSHUFD $-79, Y3, Y3
+ VPADDQ Y3, Y2, Y2
+ VPXOR Y2, Y1, Y1
+ VPSHUFB Y4, Y1, Y1
+ VPADDQ Y13, Y0, Y0
+ VPADDQ Y1, Y0, Y0
+ VPXOR Y0, Y3, Y3
+ VPSHUFB Y5, Y3, Y3
+ VPADDQ Y3, Y2, Y2
+ VPXOR Y2, Y1, Y1
+ VPADDQ Y1, Y1, Y10
+ VPSRLQ $0x3f, Y1, Y1
+ VPXOR Y10, Y1, Y1
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xc9
+ BYTE $0x39
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xd2
+ BYTE $0x4e
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xdb
+ BYTE $0x93
+ VPADDQ Y14, Y0, Y0
+ VPADDQ Y1, Y0, Y0
+ VPXOR Y0, Y3, Y3
+ VPSHUFD $-79, Y3, Y3
+ VPADDQ Y3, Y2, Y2
+ VPXOR Y2, Y1, Y1
+ VPSHUFB Y4, Y1, Y1
+ VPADDQ Y15, Y0, Y0
+ VPADDQ Y1, Y0, Y0
+ VPXOR Y0, Y3, Y3
+ VPSHUFB Y5, Y3, Y3
+ VPADDQ Y3, Y2, Y2
+ VPXOR Y2, Y1, Y1
+ VPADDQ Y1, Y1, Y10
+ VPSRLQ $0x3f, Y1, Y1
+ VPXOR Y10, Y1, Y1
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xdb
+ BYTE $0x39
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xd2
+ BYTE $0x4e
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xc9
+ BYTE $0x93
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x66
+ BYTE $0x50
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x5e
+ BYTE $0x38
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x99
+ BYTE $0x22
+ BYTE $0x66
+ BYTE $0x40
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0xa1
+ BYTE $0x22
+ BYTE $0x5e
+ BYTE $0x08
+ BYTE $0x01
+ VINSERTI128 $0x01, X11, Y12, Y12
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x6e
+ BYTE $0x10
+ VPSHUFD $0x4e, 40(SI), X11
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x91
+ BYTE $0x22
+ BYTE $0x6e
+ BYTE $0x20
+ BYTE $0x01
+ VINSERTI128 $0x01, X11, Y13, Y13
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x76
+ BYTE $0x78
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x5e
+ BYTE $0x18
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x89
+ BYTE $0x22
+ BYTE $0x76
+ BYTE $0x48
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0xa1
+ BYTE $0x22
+ BYTE $0x5e
+ BYTE $0x68
+ BYTE $0x01
+ VINSERTI128 $0x01, X11, Y14, Y14
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x7e
+ BYTE $0x58
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x5e
+ BYTE $0x60
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x81
+ BYTE $0x22
+ BYTE $0x7e
+ BYTE $0x70
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0xa1
+ BYTE $0x22
+ BYTE $0x1e
+ BYTE $0x01
+ VINSERTI128 $0x01, X11, Y15, Y15
+ VPADDQ Y12, Y0, Y0
+ VPADDQ Y1, Y0, Y0
+ VPXOR Y0, Y3, Y3
+ VPSHUFD $-79, Y3, Y3
+ VPADDQ Y3, Y2, Y2
+ VPXOR Y2, Y1, Y1
+ VPSHUFB Y4, Y1, Y1
+ VPADDQ Y13, Y0, Y0
+ VPADDQ Y1, Y0, Y0
+ VPXOR Y0, Y3, Y3
+ VPSHUFB Y5, Y3, Y3
+ VPADDQ Y3, Y2, Y2
+ VPXOR Y2, Y1, Y1
+ VPADDQ Y1, Y1, Y10
+ VPSRLQ $0x3f, Y1, Y1
+ VPXOR Y10, Y1, Y1
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xc9
+ BYTE $0x39
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xd2
+ BYTE $0x4e
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xdb
+ BYTE $0x93
+ VPADDQ Y14, Y0, Y0
+ VPADDQ Y1, Y0, Y0
+ VPXOR Y0, Y3, Y3
+ VPSHUFD $-79, Y3, Y3
+ VPADDQ Y3, Y2, Y2
+ VPXOR Y2, Y1, Y1
+ VPSHUFB Y4, Y1, Y1
+ VPADDQ Y15, Y0, Y0
+ VPADDQ Y1, Y0, Y0
+ VPXOR Y0, Y3, Y3
+ VPSHUFB Y5, Y3, Y3
+ VPADDQ Y3, Y2, Y2
+ VPXOR Y2, Y1, Y1
+ VPADDQ Y1, Y1, Y10
+ VPSRLQ $0x3f, Y1, Y1
+ VPXOR Y10, Y1, Y1
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xdb
+ BYTE $0x39
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xd2
+ BYTE $0x4e
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xc9
+ BYTE $0x93
+ VPADDQ 32(DX), Y0, Y0
+ VPADDQ Y1, Y0, Y0
+ VPXOR Y0, Y3, Y3
+ VPSHUFD $-79, Y3, Y3
+ VPADDQ Y3, Y2, Y2
+ VPXOR Y2, Y1, Y1
+ VPSHUFB Y4, Y1, Y1
+ VPADDQ 64(DX), Y0, Y0
+ VPADDQ Y1, Y0, Y0
+ VPXOR Y0, Y3, Y3
+ VPSHUFB Y5, Y3, Y3
+ VPADDQ Y3, Y2, Y2
+ VPXOR Y2, Y1, Y1
+ VPADDQ Y1, Y1, Y10
+ VPSRLQ $0x3f, Y1, Y1
+ VPXOR Y10, Y1, Y1
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xc9
+ BYTE $0x39
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xd2
+ BYTE $0x4e
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xdb
+ BYTE $0x93
+ VPADDQ 96(DX), Y0, Y0
+ VPADDQ Y1, Y0, Y0
+ VPXOR Y0, Y3, Y3
+ VPSHUFD $-79, Y3, Y3
+ VPADDQ Y3, Y2, Y2
+ VPXOR Y2, Y1, Y1
+ VPSHUFB Y4, Y1, Y1
+ VPADDQ 128(DX), Y0, Y0
+ VPADDQ Y1, Y0, Y0
+ VPXOR Y0, Y3, Y3
+ VPSHUFB Y5, Y3, Y3
+ VPADDQ Y3, Y2, Y2
+ VPXOR Y2, Y1, Y1
+ VPADDQ Y1, Y1, Y10
+ VPSRLQ $0x3f, Y1, Y1
+ VPXOR Y10, Y1, Y1
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xdb
+ BYTE $0x39
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xd2
+ BYTE $0x4e
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xc9
+ BYTE $0x93
+ VPADDQ 160(DX), Y0, Y0
+ VPADDQ Y1, Y0, Y0
+ VPXOR Y0, Y3, Y3
+ VPSHUFD $-79, Y3, Y3
+ VPADDQ Y3, Y2, Y2
+ VPXOR Y2, Y1, Y1
+ VPSHUFB Y4, Y1, Y1
+ VPADDQ 192(DX), Y0, Y0
+ VPADDQ Y1, Y0, Y0
+ VPXOR Y0, Y3, Y3
+ VPSHUFB Y5, Y3, Y3
+ VPADDQ Y3, Y2, Y2
+ VPXOR Y2, Y1, Y1
+ VPADDQ Y1, Y1, Y10
+ VPSRLQ $0x3f, Y1, Y1
+ VPXOR Y10, Y1, Y1
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xc9
+ BYTE $0x39
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xd2
+ BYTE $0x4e
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xdb
+ BYTE $0x93
+ VPADDQ 224(DX), Y0, Y0
+ VPADDQ Y1, Y0, Y0
+ VPXOR Y0, Y3, Y3
+ VPSHUFD $-79, Y3, Y3
+ VPADDQ Y3, Y2, Y2
+ VPXOR Y2, Y1, Y1
+ VPSHUFB Y4, Y1, Y1
+ VPADDQ 256(DX), Y0, Y0
+ VPADDQ Y1, Y0, Y0
+ VPXOR Y0, Y3, Y3
+ VPSHUFB Y5, Y3, Y3
+ VPADDQ Y3, Y2, Y2
+ VPXOR Y2, Y1, Y1
+ VPADDQ Y1, Y1, Y10
+ VPSRLQ $0x3f, Y1, Y1
+ VPXOR Y10, Y1, Y1
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xdb
+ BYTE $0x39
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xd2
+ BYTE $0x4e
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xc9
+ BYTE $0x93
+ VPXOR Y0, Y8, Y8
+ VPXOR Y1, Y9, Y9
+ VPXOR Y2, Y8, Y8
+ VPXOR Y3, Y9, Y9
+ LEAQ 128(SI), SI
+ SUBQ $0x80, DI
+ JNE loop
+ MOVQ R8, (BX)
+ MOVQ R9, 8(BX)
+ VMOVDQU Y8, (AX)
+ VMOVDQU Y9, 32(AX)
VZEROUPPER
-
RET
-#define VPUNPCKLQDQ_X2_X2_X15 BYTE $0xC5; BYTE $0x69; BYTE $0x6C; BYTE $0xFA
-#define VPUNPCKLQDQ_X3_X3_X15 BYTE $0xC5; BYTE $0x61; BYTE $0x6C; BYTE $0xFB
-#define VPUNPCKLQDQ_X7_X7_X15 BYTE $0xC5; BYTE $0x41; BYTE $0x6C; BYTE $0xFF
-#define VPUNPCKLQDQ_X13_X13_X15 BYTE $0xC4; BYTE $0x41; BYTE $0x11; BYTE $0x6C; BYTE $0xFD
-#define VPUNPCKLQDQ_X14_X14_X15 BYTE $0xC4; BYTE $0x41; BYTE $0x09; BYTE $0x6C; BYTE $0xFE
-
-#define VPUNPCKHQDQ_X15_X2_X2 BYTE $0xC4; BYTE $0xC1; BYTE $0x69; BYTE $0x6D; BYTE $0xD7
-#define VPUNPCKHQDQ_X15_X3_X3 BYTE $0xC4; BYTE $0xC1; BYTE $0x61; BYTE $0x6D; BYTE $0xDF
-#define VPUNPCKHQDQ_X15_X6_X6 BYTE $0xC4; BYTE $0xC1; BYTE $0x49; BYTE $0x6D; BYTE $0xF7
-#define VPUNPCKHQDQ_X15_X7_X7 BYTE $0xC4; BYTE $0xC1; BYTE $0x41; BYTE $0x6D; BYTE $0xFF
-#define VPUNPCKHQDQ_X15_X3_X2 BYTE $0xC4; BYTE $0xC1; BYTE $0x61; BYTE $0x6D; BYTE $0xD7
-#define VPUNPCKHQDQ_X15_X7_X6 BYTE $0xC4; BYTE $0xC1; BYTE $0x41; BYTE $0x6D; BYTE $0xF7
-#define VPUNPCKHQDQ_X15_X13_X3 BYTE $0xC4; BYTE $0xC1; BYTE $0x11; BYTE $0x6D; BYTE $0xDF
-#define VPUNPCKHQDQ_X15_X13_X7 BYTE $0xC4; BYTE $0xC1; BYTE $0x11; BYTE $0x6D; BYTE $0xFF
-
-#define SHUFFLE_AVX() \
- VMOVDQA X6, X13; \
- VMOVDQA X2, X14; \
- VMOVDQA X4, X6; \
- VPUNPCKLQDQ_X13_X13_X15; \
- VMOVDQA X5, X4; \
- VMOVDQA X6, X5; \
- VPUNPCKHQDQ_X15_X7_X6; \
- VPUNPCKLQDQ_X7_X7_X15; \
- VPUNPCKHQDQ_X15_X13_X7; \
- VPUNPCKLQDQ_X3_X3_X15; \
- VPUNPCKHQDQ_X15_X2_X2; \
- VPUNPCKLQDQ_X14_X14_X15; \
- VPUNPCKHQDQ_X15_X3_X3; \
-
-#define SHUFFLE_AVX_INV() \
- VMOVDQA X2, X13; \
- VMOVDQA X4, X14; \
- VPUNPCKLQDQ_X2_X2_X15; \
- VMOVDQA X5, X4; \
- VPUNPCKHQDQ_X15_X3_X2; \
- VMOVDQA X14, X5; \
- VPUNPCKLQDQ_X3_X3_X15; \
- VMOVDQA X6, X14; \
- VPUNPCKHQDQ_X15_X13_X3; \
- VPUNPCKLQDQ_X7_X7_X15; \
- VPUNPCKHQDQ_X15_X6_X6; \
- VPUNPCKLQDQ_X14_X14_X15; \
- VPUNPCKHQDQ_X15_X7_X7; \
-
-#define HALF_ROUND_AVX(v0, v1, v2, v3, v4, v5, v6, v7, m0, m1, m2, m3, t0, c40, c48) \
- VPADDQ m0, v0, v0; \
- VPADDQ v2, v0, v0; \
- VPADDQ m1, v1, v1; \
- VPADDQ v3, v1, v1; \
- VPXOR v0, v6, v6; \
- VPXOR v1, v7, v7; \
- VPSHUFD $-79, v6, v6; \
- VPSHUFD $-79, v7, v7; \
- VPADDQ v6, v4, v4; \
- VPADDQ v7, v5, v5; \
- VPXOR v4, v2, v2; \
- VPXOR v5, v3, v3; \
- VPSHUFB c40, v2, v2; \
- VPSHUFB c40, v3, v3; \
- VPADDQ m2, v0, v0; \
- VPADDQ v2, v0, v0; \
- VPADDQ m3, v1, v1; \
- VPADDQ v3, v1, v1; \
- VPXOR v0, v6, v6; \
- VPXOR v1, v7, v7; \
- VPSHUFB c48, v6, v6; \
- VPSHUFB c48, v7, v7; \
- VPADDQ v6, v4, v4; \
- VPADDQ v7, v5, v5; \
- VPXOR v4, v2, v2; \
- VPXOR v5, v3, v3; \
- VPADDQ v2, v2, t0; \
- VPSRLQ $63, v2, v2; \
- VPXOR t0, v2, v2; \
- VPADDQ v3, v3, t0; \
- VPSRLQ $63, v3, v3; \
- VPXOR t0, v3, v3
-
-// load msg: X12 = (i0, i1), X13 = (i2, i3), X14 = (i4, i5), X15 = (i6, i7)
-// i0, i1, i2, i3, i4, i5, i6, i7 must not be 0
-#define LOAD_MSG_AVX(i0, i1, i2, i3, i4, i5, i6, i7) \
- VMOVQ_SI_X12(i0*8); \
- VMOVQ_SI_X13(i2*8); \
- VMOVQ_SI_X14(i4*8); \
- VMOVQ_SI_X15(i6*8); \
- VPINSRQ_1_SI_X12(i1*8); \
- VPINSRQ_1_SI_X13(i3*8); \
- VPINSRQ_1_SI_X14(i5*8); \
- VPINSRQ_1_SI_X15(i7*8)
-
-// load msg: X12 = (0, 2), X13 = (4, 6), X14 = (1, 3), X15 = (5, 7)
-#define LOAD_MSG_AVX_0_2_4_6_1_3_5_7() \
- VMOVQ_SI_X12_0; \
- VMOVQ_SI_X13(4*8); \
- VMOVQ_SI_X14(1*8); \
- VMOVQ_SI_X15(5*8); \
- VPINSRQ_1_SI_X12(2*8); \
- VPINSRQ_1_SI_X13(6*8); \
- VPINSRQ_1_SI_X14(3*8); \
- VPINSRQ_1_SI_X15(7*8)
-
-// load msg: X12 = (1, 0), X13 = (11, 5), X14 = (12, 2), X15 = (7, 3)
-#define LOAD_MSG_AVX_1_0_11_5_12_2_7_3() \
- VPSHUFD $0x4E, 0*8(SI), X12; \
- VMOVQ_SI_X13(11*8); \
- VMOVQ_SI_X14(12*8); \
- VMOVQ_SI_X15(7*8); \
- VPINSRQ_1_SI_X13(5*8); \
- VPINSRQ_1_SI_X14(2*8); \
- VPINSRQ_1_SI_X15(3*8)
-
-// load msg: X12 = (11, 12), X13 = (5, 15), X14 = (8, 0), X15 = (2, 13)
-#define LOAD_MSG_AVX_11_12_5_15_8_0_2_13() \
- VMOVDQU 11*8(SI), X12; \
- VMOVQ_SI_X13(5*8); \
- VMOVQ_SI_X14(8*8); \
- VMOVQ_SI_X15(2*8); \
- VPINSRQ_1_SI_X13(15*8); \
- VPINSRQ_1_SI_X14_0; \
- VPINSRQ_1_SI_X15(13*8)
-
-// load msg: X12 = (2, 5), X13 = (4, 15), X14 = (6, 10), X15 = (0, 8)
-#define LOAD_MSG_AVX_2_5_4_15_6_10_0_8() \
- VMOVQ_SI_X12(2*8); \
- VMOVQ_SI_X13(4*8); \
- VMOVQ_SI_X14(6*8); \
- VMOVQ_SI_X15_0; \
- VPINSRQ_1_SI_X12(5*8); \
- VPINSRQ_1_SI_X13(15*8); \
- VPINSRQ_1_SI_X14(10*8); \
- VPINSRQ_1_SI_X15(8*8)
+DATA ·AVX2_c40<>+0(SB)/8, $0x0201000706050403
+DATA ·AVX2_c40<>+8(SB)/8, $0x0a09080f0e0d0c0b
+DATA ·AVX2_c40<>+16(SB)/8, $0x0201000706050403
+DATA ·AVX2_c40<>+24(SB)/8, $0x0a09080f0e0d0c0b
+GLOBL ·AVX2_c40<>(SB), RODATA|NOPTR, $32
-// load msg: X12 = (9, 5), X13 = (2, 10), X14 = (0, 7), X15 = (4, 15)
-#define LOAD_MSG_AVX_9_5_2_10_0_7_4_15() \
- VMOVQ_SI_X12(9*8); \
- VMOVQ_SI_X13(2*8); \
- VMOVQ_SI_X14_0; \
- VMOVQ_SI_X15(4*8); \
- VPINSRQ_1_SI_X12(5*8); \
- VPINSRQ_1_SI_X13(10*8); \
- VPINSRQ_1_SI_X14(7*8); \
- VPINSRQ_1_SI_X15(15*8)
+DATA ·AVX2_c48<>+0(SB)/8, $0x0100070605040302
+DATA ·AVX2_c48<>+8(SB)/8, $0x09080f0e0d0c0b0a
+DATA ·AVX2_c48<>+16(SB)/8, $0x0100070605040302
+DATA ·AVX2_c48<>+24(SB)/8, $0x09080f0e0d0c0b0a
+GLOBL ·AVX2_c48<>(SB), RODATA|NOPTR, $32
-// load msg: X12 = (2, 6), X13 = (0, 8), X14 = (12, 10), X15 = (11, 3)
-#define LOAD_MSG_AVX_2_6_0_8_12_10_11_3() \
- VMOVQ_SI_X12(2*8); \
- VMOVQ_SI_X13_0; \
- VMOVQ_SI_X14(12*8); \
- VMOVQ_SI_X15(11*8); \
- VPINSRQ_1_SI_X12(6*8); \
- VPINSRQ_1_SI_X13(8*8); \
- VPINSRQ_1_SI_X14(10*8); \
- VPINSRQ_1_SI_X15(3*8)
+DATA ·AVX2_iv0<>+0(SB)/8, $0x6a09e667f3bcc908
+DATA ·AVX2_iv0<>+8(SB)/8, $0xbb67ae8584caa73b
+DATA ·AVX2_iv0<>+16(SB)/8, $0x3c6ef372fe94f82b
+DATA ·AVX2_iv0<>+24(SB)/8, $0xa54ff53a5f1d36f1
+GLOBL ·AVX2_iv0<>(SB), RODATA|NOPTR, $32
-// load msg: X12 = (0, 6), X13 = (9, 8), X14 = (7, 3), X15 = (2, 11)
-#define LOAD_MSG_AVX_0_6_9_8_7_3_2_11() \
- MOVQ 0*8(SI), X12; \
- VPSHUFD $0x4E, 8*8(SI), X13; \
- MOVQ 7*8(SI), X14; \
- MOVQ 2*8(SI), X15; \
- VPINSRQ_1_SI_X12(6*8); \
- VPINSRQ_1_SI_X14(3*8); \
- VPINSRQ_1_SI_X15(11*8)
-
-// load msg: X12 = (6, 14), X13 = (11, 0), X14 = (15, 9), X15 = (3, 8)
-#define LOAD_MSG_AVX_6_14_11_0_15_9_3_8() \
- MOVQ 6*8(SI), X12; \
- MOVQ 11*8(SI), X13; \
- MOVQ 15*8(SI), X14; \
- MOVQ 3*8(SI), X15; \
- VPINSRQ_1_SI_X12(14*8); \
- VPINSRQ_1_SI_X13_0; \
- VPINSRQ_1_SI_X14(9*8); \
- VPINSRQ_1_SI_X15(8*8)
-
-// load msg: X12 = (5, 15), X13 = (8, 2), X14 = (0, 4), X15 = (6, 10)
-#define LOAD_MSG_AVX_5_15_8_2_0_4_6_10() \
- MOVQ 5*8(SI), X12; \
- MOVQ 8*8(SI), X13; \
- MOVQ 0*8(SI), X14; \
- MOVQ 6*8(SI), X15; \
- VPINSRQ_1_SI_X12(15*8); \
- VPINSRQ_1_SI_X13(2*8); \
- VPINSRQ_1_SI_X14(4*8); \
- VPINSRQ_1_SI_X15(10*8)
-
-// load msg: X12 = (12, 13), X13 = (1, 10), X14 = (2, 7), X15 = (4, 5)
-#define LOAD_MSG_AVX_12_13_1_10_2_7_4_5() \
- VMOVDQU 12*8(SI), X12; \
- MOVQ 1*8(SI), X13; \
- MOVQ 2*8(SI), X14; \
- VPINSRQ_1_SI_X13(10*8); \
- VPINSRQ_1_SI_X14(7*8); \
- VMOVDQU 4*8(SI), X15
-
-// load msg: X12 = (15, 9), X13 = (3, 13), X14 = (11, 14), X15 = (12, 0)
-#define LOAD_MSG_AVX_15_9_3_13_11_14_12_0() \
- MOVQ 15*8(SI), X12; \
- MOVQ 3*8(SI), X13; \
- MOVQ 11*8(SI), X14; \
- MOVQ 12*8(SI), X15; \
- VPINSRQ_1_SI_X12(9*8); \
- VPINSRQ_1_SI_X13(13*8); \
- VPINSRQ_1_SI_X14(14*8); \
- VPINSRQ_1_SI_X15_0
+DATA ·AVX2_iv1<>+0(SB)/8, $0x510e527fade682d1
+DATA ·AVX2_iv1<>+8(SB)/8, $0x9b05688c2b3e6c1f
+DATA ·AVX2_iv1<>+16(SB)/8, $0x1f83d9abfb41bd6b
+DATA ·AVX2_iv1<>+24(SB)/8, $0x5be0cd19137e2179
+GLOBL ·AVX2_iv1<>(SB), RODATA|NOPTR, $32
// func hashBlocksAVX(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte)
-TEXT ·hashBlocksAVX(SB), 4, $288-48 // frame size = 272 + 16 byte alignment
- MOVQ h+0(FP), AX
- MOVQ c+8(FP), BX
- MOVQ flag+16(FP), CX
- MOVQ blocks_base+24(FP), SI
- MOVQ blocks_len+32(FP), DI
-
- MOVQ SP, R10
- ADDQ $15, R10
- ANDQ $~15, R10
-
- VMOVDQU ·AVX_c40<>(SB), X0
- VMOVDQU ·AVX_c48<>(SB), X1
+// Requires: AVX, SSE2
+TEXT ·hashBlocksAVX(SB), NOSPLIT, $288-48
+ MOVQ h+0(FP), AX
+ MOVQ c+8(FP), BX
+ MOVQ flag+16(FP), CX
+ MOVQ blocks_base+24(FP), SI
+ MOVQ blocks_len+32(FP), DI
+ MOVQ SP, R10
+ ADDQ $0x0f, R10
+ ANDQ $-16, R10
+ VMOVDQU ·AVX_c40<>+0(SB), X0
+ VMOVDQU ·AVX_c48<>+0(SB), X1
VMOVDQA X0, X8
VMOVDQA X1, X9
-
- VMOVDQU ·AVX_iv3<>(SB), X0
- VMOVDQA X0, 0(R10)
- XORQ CX, 0(R10) // 0(R10) = ·AVX_iv3 ^ (CX || 0)
-
- VMOVDQU 0(AX), X10
+ VMOVDQU ·AVX_iv3<>+0(SB), X0
+ VMOVDQA X0, (R10)
+ XORQ CX, (R10)
+ VMOVDQU (AX), X10
VMOVDQU 16(AX), X11
VMOVDQU 32(AX), X2
VMOVDQU 48(AX), X3
-
- MOVQ 0(BX), R8
- MOVQ 8(BX), R9
+ MOVQ (BX), R8
+ MOVQ 8(BX), R9
loop:
- ADDQ $128, R8
- CMPQ R8, $128
+ ADDQ $0x80, R8
+ CMPQ R8, $0x80
JGE noinc
INCQ R9
noinc:
- VMOVQ_R8_X15
- VPINSRQ_1_R9_X15
-
+ BYTE $0xc4
+ BYTE $0x41
+ BYTE $0xf9
+ BYTE $0x6e
+ BYTE $0xf8
+ BYTE $0xc4
+ BYTE $0x43
+ BYTE $0x81
+ BYTE $0x22
+ BYTE $0xf9
+ BYTE $0x01
VMOVDQA X10, X0
VMOVDQA X11, X1
- VMOVDQU ·AVX_iv0<>(SB), X4
- VMOVDQU ·AVX_iv1<>(SB), X5
- VMOVDQU ·AVX_iv2<>(SB), X6
-
+ VMOVDQU ·AVX_iv0<>+0(SB), X4
+ VMOVDQU ·AVX_iv1<>+0(SB), X5
+ VMOVDQU ·AVX_iv2<>+0(SB), X6
VPXOR X15, X6, X6
- VMOVDQA 0(R10), X7
-
- LOAD_MSG_AVX_0_2_4_6_1_3_5_7()
+ VMOVDQA (R10), X7
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x26
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x6e
+ BYTE $0x20
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x76
+ BYTE $0x08
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x7e
+ BYTE $0x28
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x99
+ BYTE $0x22
+ BYTE $0x66
+ BYTE $0x10
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x91
+ BYTE $0x22
+ BYTE $0x6e
+ BYTE $0x30
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x89
+ BYTE $0x22
+ BYTE $0x76
+ BYTE $0x18
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x81
+ BYTE $0x22
+ BYTE $0x7e
+ BYTE $0x38
+ BYTE $0x01
VMOVDQA X12, 16(R10)
VMOVDQA X13, 32(R10)
VMOVDQA X14, 48(R10)
VMOVDQA X15, 64(R10)
- HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
- SHUFFLE_AVX()
- LOAD_MSG_AVX(8, 10, 12, 14, 9, 11, 13, 15)
+ VPADDQ X12, X0, X0
+ VPADDQ X2, X0, X0
+ VPADDQ X13, X1, X1
+ VPADDQ X3, X1, X1
+ VPXOR X0, X6, X6
+ VPXOR X1, X7, X7
+ VPSHUFD $-79, X6, X6
+ VPSHUFD $-79, X7, X7
+ VPADDQ X6, X4, X4
+ VPADDQ X7, X5, X5
+ VPXOR X4, X2, X2
+ VPXOR X5, X3, X3
+ VPSHUFB X8, X2, X2
+ VPSHUFB X8, X3, X3
+ VPADDQ X14, X0, X0
+ VPADDQ X2, X0, X0
+ VPADDQ X15, X1, X1
+ VPADDQ X3, X1, X1
+ VPXOR X0, X6, X6
+ VPXOR X1, X7, X7
+ VPSHUFB X9, X6, X6
+ VPSHUFB X9, X7, X7
+ VPADDQ X6, X4, X4
+ VPADDQ X7, X5, X5
+ VPXOR X4, X2, X2
+ VPXOR X5, X3, X3
+ VPADDQ X2, X2, X15
+ VPSRLQ $0x3f, X2, X2
+ VPXOR X15, X2, X2
+ VPADDQ X3, X3, X15
+ VPSRLQ $0x3f, X3, X3
+ VPXOR X15, X3, X3
+ VMOVDQA X6, X13
+ VMOVDQA X2, X14
+ VMOVDQA X4, X6
+ BYTE $0xc4
+ BYTE $0x41
+ BYTE $0x11
+ BYTE $0x6c
+ BYTE $0xfd
+ VMOVDQA X5, X4
+ VMOVDQA X6, X5
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x41
+ BYTE $0x6d
+ BYTE $0xf7
+ BYTE $0xc5
+ BYTE $0x41
+ BYTE $0x6c
+ BYTE $0xff
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x11
+ BYTE $0x6d
+ BYTE $0xff
+ BYTE $0xc5
+ BYTE $0x61
+ BYTE $0x6c
+ BYTE $0xfb
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x69
+ BYTE $0x6d
+ BYTE $0xd7
+ BYTE $0xc4
+ BYTE $0x41
+ BYTE $0x09
+ BYTE $0x6c
+ BYTE $0xfe
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x61
+ BYTE $0x6d
+ BYTE $0xdf
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x66
+ BYTE $0x40
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x6e
+ BYTE $0x60
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x76
+ BYTE $0x48
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x7e
+ BYTE $0x68
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x99
+ BYTE $0x22
+ BYTE $0x66
+ BYTE $0x50
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x91
+ BYTE $0x22
+ BYTE $0x6e
+ BYTE $0x70
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x89
+ BYTE $0x22
+ BYTE $0x76
+ BYTE $0x58
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x81
+ BYTE $0x22
+ BYTE $0x7e
+ BYTE $0x78
+ BYTE $0x01
VMOVDQA X12, 80(R10)
VMOVDQA X13, 96(R10)
VMOVDQA X14, 112(R10)
VMOVDQA X15, 128(R10)
- HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
- SHUFFLE_AVX_INV()
-
- LOAD_MSG_AVX(14, 4, 9, 13, 10, 8, 15, 6)
+ VPADDQ X12, X0, X0
+ VPADDQ X2, X0, X0
+ VPADDQ X13, X1, X1
+ VPADDQ X3, X1, X1
+ VPXOR X0, X6, X6
+ VPXOR X1, X7, X7
+ VPSHUFD $-79, X6, X6
+ VPSHUFD $-79, X7, X7
+ VPADDQ X6, X4, X4
+ VPADDQ X7, X5, X5
+ VPXOR X4, X2, X2
+ VPXOR X5, X3, X3
+ VPSHUFB X8, X2, X2
+ VPSHUFB X8, X3, X3
+ VPADDQ X14, X0, X0
+ VPADDQ X2, X0, X0
+ VPADDQ X15, X1, X1
+ VPADDQ X3, X1, X1
+ VPXOR X0, X6, X6
+ VPXOR X1, X7, X7
+ VPSHUFB X9, X6, X6
+ VPSHUFB X9, X7, X7
+ VPADDQ X6, X4, X4
+ VPADDQ X7, X5, X5
+ VPXOR X4, X2, X2
+ VPXOR X5, X3, X3
+ VPADDQ X2, X2, X15
+ VPSRLQ $0x3f, X2, X2
+ VPXOR X15, X2, X2
+ VPADDQ X3, X3, X15
+ VPSRLQ $0x3f, X3, X3
+ VPXOR X15, X3, X3
+ VMOVDQA X2, X13
+ VMOVDQA X4, X14
+ BYTE $0xc5
+ BYTE $0x69
+ BYTE $0x6c
+ BYTE $0xfa
+ VMOVDQA X5, X4
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x61
+ BYTE $0x6d
+ BYTE $0xd7
+ VMOVDQA X14, X5
+ BYTE $0xc5
+ BYTE $0x61
+ BYTE $0x6c
+ BYTE $0xfb
+ VMOVDQA X6, X14
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x11
+ BYTE $0x6d
+ BYTE $0xdf
+ BYTE $0xc5
+ BYTE $0x41
+ BYTE $0x6c
+ BYTE $0xff
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x49
+ BYTE $0x6d
+ BYTE $0xf7
+ BYTE $0xc4
+ BYTE $0x41
+ BYTE $0x09
+ BYTE $0x6c
+ BYTE $0xfe
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x41
+ BYTE $0x6d
+ BYTE $0xff
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x66
+ BYTE $0x70
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x6e
+ BYTE $0x48
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x76
+ BYTE $0x50
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x7e
+ BYTE $0x78
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x99
+ BYTE $0x22
+ BYTE $0x66
+ BYTE $0x20
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x91
+ BYTE $0x22
+ BYTE $0x6e
+ BYTE $0x68
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x89
+ BYTE $0x22
+ BYTE $0x76
+ BYTE $0x40
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x81
+ BYTE $0x22
+ BYTE $0x7e
+ BYTE $0x30
+ BYTE $0x01
VMOVDQA X12, 144(R10)
VMOVDQA X13, 160(R10)
VMOVDQA X14, 176(R10)
VMOVDQA X15, 192(R10)
- HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
- SHUFFLE_AVX()
- LOAD_MSG_AVX_1_0_11_5_12_2_7_3()
+ VPADDQ X12, X0, X0
+ VPADDQ X2, X0, X0
+ VPADDQ X13, X1, X1
+ VPADDQ X3, X1, X1
+ VPXOR X0, X6, X6
+ VPXOR X1, X7, X7
+ VPSHUFD $-79, X6, X6
+ VPSHUFD $-79, X7, X7
+ VPADDQ X6, X4, X4
+ VPADDQ X7, X5, X5
+ VPXOR X4, X2, X2
+ VPXOR X5, X3, X3
+ VPSHUFB X8, X2, X2
+ VPSHUFB X8, X3, X3
+ VPADDQ X14, X0, X0
+ VPADDQ X2, X0, X0
+ VPADDQ X15, X1, X1
+ VPADDQ X3, X1, X1
+ VPXOR X0, X6, X6
+ VPXOR X1, X7, X7
+ VPSHUFB X9, X6, X6
+ VPSHUFB X9, X7, X7
+ VPADDQ X6, X4, X4
+ VPADDQ X7, X5, X5
+ VPXOR X4, X2, X2
+ VPXOR X5, X3, X3
+ VPADDQ X2, X2, X15
+ VPSRLQ $0x3f, X2, X2
+ VPXOR X15, X2, X2
+ VPADDQ X3, X3, X15
+ VPSRLQ $0x3f, X3, X3
+ VPXOR X15, X3, X3
+ VMOVDQA X6, X13
+ VMOVDQA X2, X14
+ VMOVDQA X4, X6
+ BYTE $0xc4
+ BYTE $0x41
+ BYTE $0x11
+ BYTE $0x6c
+ BYTE $0xfd
+ VMOVDQA X5, X4
+ VMOVDQA X6, X5
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x41
+ BYTE $0x6d
+ BYTE $0xf7
+ BYTE $0xc5
+ BYTE $0x41
+ BYTE $0x6c
+ BYTE $0xff
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x11
+ BYTE $0x6d
+ BYTE $0xff
+ BYTE $0xc5
+ BYTE $0x61
+ BYTE $0x6c
+ BYTE $0xfb
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x69
+ BYTE $0x6d
+ BYTE $0xd7
+ BYTE $0xc4
+ BYTE $0x41
+ BYTE $0x09
+ BYTE $0x6c
+ BYTE $0xfe
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x61
+ BYTE $0x6d
+ BYTE $0xdf
+ VPSHUFD $0x4e, (SI), X12
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x6e
+ BYTE $0x58
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x76
+ BYTE $0x60
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x7e
+ BYTE $0x38
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x91
+ BYTE $0x22
+ BYTE $0x6e
+ BYTE $0x28
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x89
+ BYTE $0x22
+ BYTE $0x76
+ BYTE $0x10
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x81
+ BYTE $0x22
+ BYTE $0x7e
+ BYTE $0x18
+ BYTE $0x01
VMOVDQA X12, 208(R10)
VMOVDQA X13, 224(R10)
VMOVDQA X14, 240(R10)
VMOVDQA X15, 256(R10)
- HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
- SHUFFLE_AVX_INV()
-
- LOAD_MSG_AVX_11_12_5_15_8_0_2_13()
- HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
- SHUFFLE_AVX()
- LOAD_MSG_AVX(10, 3, 7, 9, 14, 6, 1, 4)
- HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
- SHUFFLE_AVX_INV()
-
- LOAD_MSG_AVX(7, 3, 13, 11, 9, 1, 12, 14)
- HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
- SHUFFLE_AVX()
- LOAD_MSG_AVX_2_5_4_15_6_10_0_8()
- HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
- SHUFFLE_AVX_INV()
-
- LOAD_MSG_AVX_9_5_2_10_0_7_4_15()
- HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
- SHUFFLE_AVX()
- LOAD_MSG_AVX(14, 11, 6, 3, 1, 12, 8, 13)
- HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
- SHUFFLE_AVX_INV()
-
- LOAD_MSG_AVX_2_6_0_8_12_10_11_3()
- HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
- SHUFFLE_AVX()
- LOAD_MSG_AVX(4, 7, 15, 1, 13, 5, 14, 9)
- HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
- SHUFFLE_AVX_INV()
-
- LOAD_MSG_AVX(12, 1, 14, 4, 5, 15, 13, 10)
- HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
- SHUFFLE_AVX()
- LOAD_MSG_AVX_0_6_9_8_7_3_2_11()
- HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
- SHUFFLE_AVX_INV()
-
- LOAD_MSG_AVX(13, 7, 12, 3, 11, 14, 1, 9)
- HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
- SHUFFLE_AVX()
- LOAD_MSG_AVX_5_15_8_2_0_4_6_10()
- HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
- SHUFFLE_AVX_INV()
-
- LOAD_MSG_AVX_6_14_11_0_15_9_3_8()
- HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
- SHUFFLE_AVX()
- LOAD_MSG_AVX_12_13_1_10_2_7_4_5()
- HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
- SHUFFLE_AVX_INV()
-
- LOAD_MSG_AVX(10, 8, 7, 1, 2, 4, 6, 5)
- HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
- SHUFFLE_AVX()
- LOAD_MSG_AVX_15_9_3_13_11_14_12_0()
- HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
- SHUFFLE_AVX_INV()
-
- HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, 16(R10), 32(R10), 48(R10), 64(R10), X15, X8, X9)
- SHUFFLE_AVX()
- HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, 80(R10), 96(R10), 112(R10), 128(R10), X15, X8, X9)
- SHUFFLE_AVX_INV()
-
- HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, 144(R10), 160(R10), 176(R10), 192(R10), X15, X8, X9)
- SHUFFLE_AVX()
- HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, 208(R10), 224(R10), 240(R10), 256(R10), X15, X8, X9)
- SHUFFLE_AVX_INV()
-
+ VPADDQ X12, X0, X0
+ VPADDQ X2, X0, X0
+ VPADDQ X13, X1, X1
+ VPADDQ X3, X1, X1
+ VPXOR X0, X6, X6
+ VPXOR X1, X7, X7
+ VPSHUFD $-79, X6, X6
+ VPSHUFD $-79, X7, X7
+ VPADDQ X6, X4, X4
+ VPADDQ X7, X5, X5
+ VPXOR X4, X2, X2
+ VPXOR X5, X3, X3
+ VPSHUFB X8, X2, X2
+ VPSHUFB X8, X3, X3
+ VPADDQ X14, X0, X0
+ VPADDQ X2, X0, X0
+ VPADDQ X15, X1, X1
+ VPADDQ X3, X1, X1
+ VPXOR X0, X6, X6
+ VPXOR X1, X7, X7
+ VPSHUFB X9, X6, X6
+ VPSHUFB X9, X7, X7
+ VPADDQ X6, X4, X4
+ VPADDQ X7, X5, X5
+ VPXOR X4, X2, X2
+ VPXOR X5, X3, X3
+ VPADDQ X2, X2, X15
+ VPSRLQ $0x3f, X2, X2
+ VPXOR X15, X2, X2
+ VPADDQ X3, X3, X15
+ VPSRLQ $0x3f, X3, X3
+ VPXOR X15, X3, X3
+ VMOVDQA X2, X13
+ VMOVDQA X4, X14
+ BYTE $0xc5
+ BYTE $0x69
+ BYTE $0x6c
+ BYTE $0xfa
+ VMOVDQA X5, X4
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x61
+ BYTE $0x6d
+ BYTE $0xd7
+ VMOVDQA X14, X5
+ BYTE $0xc5
+ BYTE $0x61
+ BYTE $0x6c
+ BYTE $0xfb
+ VMOVDQA X6, X14
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x11
+ BYTE $0x6d
+ BYTE $0xdf
+ BYTE $0xc5
+ BYTE $0x41
+ BYTE $0x6c
+ BYTE $0xff
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x49
+ BYTE $0x6d
+ BYTE $0xf7
+ BYTE $0xc4
+ BYTE $0x41
+ BYTE $0x09
+ BYTE $0x6c
+ BYTE $0xfe
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x41
+ BYTE $0x6d
+ BYTE $0xff
+ VMOVDQU 88(SI), X12
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x6e
+ BYTE $0x28
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x76
+ BYTE $0x40
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x7e
+ BYTE $0x10
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x91
+ BYTE $0x22
+ BYTE $0x6e
+ BYTE $0x78
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x89
+ BYTE $0x22
+ BYTE $0x36
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x81
+ BYTE $0x22
+ BYTE $0x7e
+ BYTE $0x68
+ BYTE $0x01
+ VPADDQ X12, X0, X0
+ VPADDQ X2, X0, X0
+ VPADDQ X13, X1, X1
+ VPADDQ X3, X1, X1
+ VPXOR X0, X6, X6
+ VPXOR X1, X7, X7
+ VPSHUFD $-79, X6, X6
+ VPSHUFD $-79, X7, X7
+ VPADDQ X6, X4, X4
+ VPADDQ X7, X5, X5
+ VPXOR X4, X2, X2
+ VPXOR X5, X3, X3
+ VPSHUFB X8, X2, X2
+ VPSHUFB X8, X3, X3
+ VPADDQ X14, X0, X0
+ VPADDQ X2, X0, X0
+ VPADDQ X15, X1, X1
+ VPADDQ X3, X1, X1
+ VPXOR X0, X6, X6
+ VPXOR X1, X7, X7
+ VPSHUFB X9, X6, X6
+ VPSHUFB X9, X7, X7
+ VPADDQ X6, X4, X4
+ VPADDQ X7, X5, X5
+ VPXOR X4, X2, X2
+ VPXOR X5, X3, X3
+ VPADDQ X2, X2, X15
+ VPSRLQ $0x3f, X2, X2
+ VPXOR X15, X2, X2
+ VPADDQ X3, X3, X15
+ VPSRLQ $0x3f, X3, X3
+ VPXOR X15, X3, X3
+ VMOVDQA X6, X13
+ VMOVDQA X2, X14
+ VMOVDQA X4, X6
+ BYTE $0xc4
+ BYTE $0x41
+ BYTE $0x11
+ BYTE $0x6c
+ BYTE $0xfd
+ VMOVDQA X5, X4
+ VMOVDQA X6, X5
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x41
+ BYTE $0x6d
+ BYTE $0xf7
+ BYTE $0xc5
+ BYTE $0x41
+ BYTE $0x6c
+ BYTE $0xff
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x11
+ BYTE $0x6d
+ BYTE $0xff
+ BYTE $0xc5
+ BYTE $0x61
+ BYTE $0x6c
+ BYTE $0xfb
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x69
+ BYTE $0x6d
+ BYTE $0xd7
+ BYTE $0xc4
+ BYTE $0x41
+ BYTE $0x09
+ BYTE $0x6c
+ BYTE $0xfe
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x61
+ BYTE $0x6d
+ BYTE $0xdf
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x66
+ BYTE $0x50
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x6e
+ BYTE $0x38
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x76
+ BYTE $0x70
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x7e
+ BYTE $0x08
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x99
+ BYTE $0x22
+ BYTE $0x66
+ BYTE $0x18
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x91
+ BYTE $0x22
+ BYTE $0x6e
+ BYTE $0x48
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x89
+ BYTE $0x22
+ BYTE $0x76
+ BYTE $0x30
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x81
+ BYTE $0x22
+ BYTE $0x7e
+ BYTE $0x20
+ BYTE $0x01
+ VPADDQ X12, X0, X0
+ VPADDQ X2, X0, X0
+ VPADDQ X13, X1, X1
+ VPADDQ X3, X1, X1
+ VPXOR X0, X6, X6
+ VPXOR X1, X7, X7
+ VPSHUFD $-79, X6, X6
+ VPSHUFD $-79, X7, X7
+ VPADDQ X6, X4, X4
+ VPADDQ X7, X5, X5
+ VPXOR X4, X2, X2
+ VPXOR X5, X3, X3
+ VPSHUFB X8, X2, X2
+ VPSHUFB X8, X3, X3
+ VPADDQ X14, X0, X0
+ VPADDQ X2, X0, X0
+ VPADDQ X15, X1, X1
+ VPADDQ X3, X1, X1
+ VPXOR X0, X6, X6
+ VPXOR X1, X7, X7
+ VPSHUFB X9, X6, X6
+ VPSHUFB X9, X7, X7
+ VPADDQ X6, X4, X4
+ VPADDQ X7, X5, X5
+ VPXOR X4, X2, X2
+ VPXOR X5, X3, X3
+ VPADDQ X2, X2, X15
+ VPSRLQ $0x3f, X2, X2
+ VPXOR X15, X2, X2
+ VPADDQ X3, X3, X15
+ VPSRLQ $0x3f, X3, X3
+ VPXOR X15, X3, X3
+ VMOVDQA X2, X13
+ VMOVDQA X4, X14
+ BYTE $0xc5
+ BYTE $0x69
+ BYTE $0x6c
+ BYTE $0xfa
+ VMOVDQA X5, X4
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x61
+ BYTE $0x6d
+ BYTE $0xd7
+ VMOVDQA X14, X5
+ BYTE $0xc5
+ BYTE $0x61
+ BYTE $0x6c
+ BYTE $0xfb
+ VMOVDQA X6, X14
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x11
+ BYTE $0x6d
+ BYTE $0xdf
+ BYTE $0xc5
+ BYTE $0x41
+ BYTE $0x6c
+ BYTE $0xff
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x49
+ BYTE $0x6d
+ BYTE $0xf7
+ BYTE $0xc4
+ BYTE $0x41
+ BYTE $0x09
+ BYTE $0x6c
+ BYTE $0xfe
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x41
+ BYTE $0x6d
+ BYTE $0xff
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x66
+ BYTE $0x38
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x6e
+ BYTE $0x68
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x76
+ BYTE $0x48
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x7e
+ BYTE $0x60
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x99
+ BYTE $0x22
+ BYTE $0x66
+ BYTE $0x18
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x91
+ BYTE $0x22
+ BYTE $0x6e
+ BYTE $0x58
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x89
+ BYTE $0x22
+ BYTE $0x76
+ BYTE $0x08
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x81
+ BYTE $0x22
+ BYTE $0x7e
+ BYTE $0x70
+ BYTE $0x01
+ VPADDQ X12, X0, X0
+ VPADDQ X2, X0, X0
+ VPADDQ X13, X1, X1
+ VPADDQ X3, X1, X1
+ VPXOR X0, X6, X6
+ VPXOR X1, X7, X7
+ VPSHUFD $-79, X6, X6
+ VPSHUFD $-79, X7, X7
+ VPADDQ X6, X4, X4
+ VPADDQ X7, X5, X5
+ VPXOR X4, X2, X2
+ VPXOR X5, X3, X3
+ VPSHUFB X8, X2, X2
+ VPSHUFB X8, X3, X3
+ VPADDQ X14, X0, X0
+ VPADDQ X2, X0, X0
+ VPADDQ X15, X1, X1
+ VPADDQ X3, X1, X1
+ VPXOR X0, X6, X6
+ VPXOR X1, X7, X7
+ VPSHUFB X9, X6, X6
+ VPSHUFB X9, X7, X7
+ VPADDQ X6, X4, X4
+ VPADDQ X7, X5, X5
+ VPXOR X4, X2, X2
+ VPXOR X5, X3, X3
+ VPADDQ X2, X2, X15
+ VPSRLQ $0x3f, X2, X2
+ VPXOR X15, X2, X2
+ VPADDQ X3, X3, X15
+ VPSRLQ $0x3f, X3, X3
+ VPXOR X15, X3, X3
+ VMOVDQA X6, X13
+ VMOVDQA X2, X14
+ VMOVDQA X4, X6
+ BYTE $0xc4
+ BYTE $0x41
+ BYTE $0x11
+ BYTE $0x6c
+ BYTE $0xfd
+ VMOVDQA X5, X4
+ VMOVDQA X6, X5
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x41
+ BYTE $0x6d
+ BYTE $0xf7
+ BYTE $0xc5
+ BYTE $0x41
+ BYTE $0x6c
+ BYTE $0xff
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x11
+ BYTE $0x6d
+ BYTE $0xff
+ BYTE $0xc5
+ BYTE $0x61
+ BYTE $0x6c
+ BYTE $0xfb
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x69
+ BYTE $0x6d
+ BYTE $0xd7
+ BYTE $0xc4
+ BYTE $0x41
+ BYTE $0x09
+ BYTE $0x6c
+ BYTE $0xfe
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x61
+ BYTE $0x6d
+ BYTE $0xdf
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x66
+ BYTE $0x10
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x6e
+ BYTE $0x20
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x76
+ BYTE $0x30
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x3e
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x99
+ BYTE $0x22
+ BYTE $0x66
+ BYTE $0x28
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x91
+ BYTE $0x22
+ BYTE $0x6e
+ BYTE $0x78
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x89
+ BYTE $0x22
+ BYTE $0x76
+ BYTE $0x50
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x81
+ BYTE $0x22
+ BYTE $0x7e
+ BYTE $0x40
+ BYTE $0x01
+ VPADDQ X12, X0, X0
+ VPADDQ X2, X0, X0
+ VPADDQ X13, X1, X1
+ VPADDQ X3, X1, X1
+ VPXOR X0, X6, X6
+ VPXOR X1, X7, X7
+ VPSHUFD $-79, X6, X6
+ VPSHUFD $-79, X7, X7
+ VPADDQ X6, X4, X4
+ VPADDQ X7, X5, X5
+ VPXOR X4, X2, X2
+ VPXOR X5, X3, X3
+ VPSHUFB X8, X2, X2
+ VPSHUFB X8, X3, X3
+ VPADDQ X14, X0, X0
+ VPADDQ X2, X0, X0
+ VPADDQ X15, X1, X1
+ VPADDQ X3, X1, X1
+ VPXOR X0, X6, X6
+ VPXOR X1, X7, X7
+ VPSHUFB X9, X6, X6
+ VPSHUFB X9, X7, X7
+ VPADDQ X6, X4, X4
+ VPADDQ X7, X5, X5
+ VPXOR X4, X2, X2
+ VPXOR X5, X3, X3
+ VPADDQ X2, X2, X15
+ VPSRLQ $0x3f, X2, X2
+ VPXOR X15, X2, X2
+ VPADDQ X3, X3, X15
+ VPSRLQ $0x3f, X3, X3
+ VPXOR X15, X3, X3
+ VMOVDQA X2, X13
+ VMOVDQA X4, X14
+ BYTE $0xc5
+ BYTE $0x69
+ BYTE $0x6c
+ BYTE $0xfa
+ VMOVDQA X5, X4
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x61
+ BYTE $0x6d
+ BYTE $0xd7
+ VMOVDQA X14, X5
+ BYTE $0xc5
+ BYTE $0x61
+ BYTE $0x6c
+ BYTE $0xfb
+ VMOVDQA X6, X14
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x11
+ BYTE $0x6d
+ BYTE $0xdf
+ BYTE $0xc5
+ BYTE $0x41
+ BYTE $0x6c
+ BYTE $0xff
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x49
+ BYTE $0x6d
+ BYTE $0xf7
+ BYTE $0xc4
+ BYTE $0x41
+ BYTE $0x09
+ BYTE $0x6c
+ BYTE $0xfe
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x41
+ BYTE $0x6d
+ BYTE $0xff
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x66
+ BYTE $0x48
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x6e
+ BYTE $0x10
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x36
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x7e
+ BYTE $0x20
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x99
+ BYTE $0x22
+ BYTE $0x66
+ BYTE $0x28
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x91
+ BYTE $0x22
+ BYTE $0x6e
+ BYTE $0x50
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x89
+ BYTE $0x22
+ BYTE $0x76
+ BYTE $0x38
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x81
+ BYTE $0x22
+ BYTE $0x7e
+ BYTE $0x78
+ BYTE $0x01
+ VPADDQ X12, X0, X0
+ VPADDQ X2, X0, X0
+ VPADDQ X13, X1, X1
+ VPADDQ X3, X1, X1
+ VPXOR X0, X6, X6
+ VPXOR X1, X7, X7
+ VPSHUFD $-79, X6, X6
+ VPSHUFD $-79, X7, X7
+ VPADDQ X6, X4, X4
+ VPADDQ X7, X5, X5
+ VPXOR X4, X2, X2
+ VPXOR X5, X3, X3
+ VPSHUFB X8, X2, X2
+ VPSHUFB X8, X3, X3
+ VPADDQ X14, X0, X0
+ VPADDQ X2, X0, X0
+ VPADDQ X15, X1, X1
+ VPADDQ X3, X1, X1
+ VPXOR X0, X6, X6
+ VPXOR X1, X7, X7
+ VPSHUFB X9, X6, X6
+ VPSHUFB X9, X7, X7
+ VPADDQ X6, X4, X4
+ VPADDQ X7, X5, X5
+ VPXOR X4, X2, X2
+ VPXOR X5, X3, X3
+ VPADDQ X2, X2, X15
+ VPSRLQ $0x3f, X2, X2
+ VPXOR X15, X2, X2
+ VPADDQ X3, X3, X15
+ VPSRLQ $0x3f, X3, X3
+ VPXOR X15, X3, X3
+ VMOVDQA X6, X13
+ VMOVDQA X2, X14
+ VMOVDQA X4, X6
+ BYTE $0xc4
+ BYTE $0x41
+ BYTE $0x11
+ BYTE $0x6c
+ BYTE $0xfd
+ VMOVDQA X5, X4
+ VMOVDQA X6, X5
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x41
+ BYTE $0x6d
+ BYTE $0xf7
+ BYTE $0xc5
+ BYTE $0x41
+ BYTE $0x6c
+ BYTE $0xff
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x11
+ BYTE $0x6d
+ BYTE $0xff
+ BYTE $0xc5
+ BYTE $0x61
+ BYTE $0x6c
+ BYTE $0xfb
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x69
+ BYTE $0x6d
+ BYTE $0xd7
+ BYTE $0xc4
+ BYTE $0x41
+ BYTE $0x09
+ BYTE $0x6c
+ BYTE $0xfe
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x61
+ BYTE $0x6d
+ BYTE $0xdf
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x66
+ BYTE $0x70
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x6e
+ BYTE $0x30
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x76
+ BYTE $0x08
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x7e
+ BYTE $0x40
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x99
+ BYTE $0x22
+ BYTE $0x66
+ BYTE $0x58
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x91
+ BYTE $0x22
+ BYTE $0x6e
+ BYTE $0x18
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x89
+ BYTE $0x22
+ BYTE $0x76
+ BYTE $0x60
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x81
+ BYTE $0x22
+ BYTE $0x7e
+ BYTE $0x68
+ BYTE $0x01
+ VPADDQ X12, X0, X0
+ VPADDQ X2, X0, X0
+ VPADDQ X13, X1, X1
+ VPADDQ X3, X1, X1
+ VPXOR X0, X6, X6
+ VPXOR X1, X7, X7
+ VPSHUFD $-79, X6, X6
+ VPSHUFD $-79, X7, X7
+ VPADDQ X6, X4, X4
+ VPADDQ X7, X5, X5
+ VPXOR X4, X2, X2
+ VPXOR X5, X3, X3
+ VPSHUFB X8, X2, X2
+ VPSHUFB X8, X3, X3
+ VPADDQ X14, X0, X0
+ VPADDQ X2, X0, X0
+ VPADDQ X15, X1, X1
+ VPADDQ X3, X1, X1
+ VPXOR X0, X6, X6
+ VPXOR X1, X7, X7
+ VPSHUFB X9, X6, X6
+ VPSHUFB X9, X7, X7
+ VPADDQ X6, X4, X4
+ VPADDQ X7, X5, X5
+ VPXOR X4, X2, X2
+ VPXOR X5, X3, X3
+ VPADDQ X2, X2, X15
+ VPSRLQ $0x3f, X2, X2
+ VPXOR X15, X2, X2
+ VPADDQ X3, X3, X15
+ VPSRLQ $0x3f, X3, X3
+ VPXOR X15, X3, X3
+ VMOVDQA X2, X13
+ VMOVDQA X4, X14
+ BYTE $0xc5
+ BYTE $0x69
+ BYTE $0x6c
+ BYTE $0xfa
+ VMOVDQA X5, X4
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x61
+ BYTE $0x6d
+ BYTE $0xd7
+ VMOVDQA X14, X5
+ BYTE $0xc5
+ BYTE $0x61
+ BYTE $0x6c
+ BYTE $0xfb
+ VMOVDQA X6, X14
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x11
+ BYTE $0x6d
+ BYTE $0xdf
+ BYTE $0xc5
+ BYTE $0x41
+ BYTE $0x6c
+ BYTE $0xff
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x49
+ BYTE $0x6d
+ BYTE $0xf7
+ BYTE $0xc4
+ BYTE $0x41
+ BYTE $0x09
+ BYTE $0x6c
+ BYTE $0xfe
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x41
+ BYTE $0x6d
+ BYTE $0xff
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x66
+ BYTE $0x10
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x2e
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x76
+ BYTE $0x60
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x7e
+ BYTE $0x58
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x99
+ BYTE $0x22
+ BYTE $0x66
+ BYTE $0x30
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x91
+ BYTE $0x22
+ BYTE $0x6e
+ BYTE $0x40
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x89
+ BYTE $0x22
+ BYTE $0x76
+ BYTE $0x50
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x81
+ BYTE $0x22
+ BYTE $0x7e
+ BYTE $0x18
+ BYTE $0x01
+ VPADDQ X12, X0, X0
+ VPADDQ X2, X0, X0
+ VPADDQ X13, X1, X1
+ VPADDQ X3, X1, X1
+ VPXOR X0, X6, X6
+ VPXOR X1, X7, X7
+ VPSHUFD $-79, X6, X6
+ VPSHUFD $-79, X7, X7
+ VPADDQ X6, X4, X4
+ VPADDQ X7, X5, X5
+ VPXOR X4, X2, X2
+ VPXOR X5, X3, X3
+ VPSHUFB X8, X2, X2
+ VPSHUFB X8, X3, X3
+ VPADDQ X14, X0, X0
+ VPADDQ X2, X0, X0
+ VPADDQ X15, X1, X1
+ VPADDQ X3, X1, X1
+ VPXOR X0, X6, X6
+ VPXOR X1, X7, X7
+ VPSHUFB X9, X6, X6
+ VPSHUFB X9, X7, X7
+ VPADDQ X6, X4, X4
+ VPADDQ X7, X5, X5
+ VPXOR X4, X2, X2
+ VPXOR X5, X3, X3
+ VPADDQ X2, X2, X15
+ VPSRLQ $0x3f, X2, X2
+ VPXOR X15, X2, X2
+ VPADDQ X3, X3, X15
+ VPSRLQ $0x3f, X3, X3
+ VPXOR X15, X3, X3
+ VMOVDQA X6, X13
+ VMOVDQA X2, X14
+ VMOVDQA X4, X6
+ BYTE $0xc4
+ BYTE $0x41
+ BYTE $0x11
+ BYTE $0x6c
+ BYTE $0xfd
+ VMOVDQA X5, X4
+ VMOVDQA X6, X5
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x41
+ BYTE $0x6d
+ BYTE $0xf7
+ BYTE $0xc5
+ BYTE $0x41
+ BYTE $0x6c
+ BYTE $0xff
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x11
+ BYTE $0x6d
+ BYTE $0xff
+ BYTE $0xc5
+ BYTE $0x61
+ BYTE $0x6c
+ BYTE $0xfb
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x69
+ BYTE $0x6d
+ BYTE $0xd7
+ BYTE $0xc4
+ BYTE $0x41
+ BYTE $0x09
+ BYTE $0x6c
+ BYTE $0xfe
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x61
+ BYTE $0x6d
+ BYTE $0xdf
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x66
+ BYTE $0x20
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x6e
+ BYTE $0x78
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x76
+ BYTE $0x68
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x7e
+ BYTE $0x70
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x99
+ BYTE $0x22
+ BYTE $0x66
+ BYTE $0x38
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x91
+ BYTE $0x22
+ BYTE $0x6e
+ BYTE $0x08
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x89
+ BYTE $0x22
+ BYTE $0x76
+ BYTE $0x28
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x81
+ BYTE $0x22
+ BYTE $0x7e
+ BYTE $0x48
+ BYTE $0x01
+ VPADDQ X12, X0, X0
+ VPADDQ X2, X0, X0
+ VPADDQ X13, X1, X1
+ VPADDQ X3, X1, X1
+ VPXOR X0, X6, X6
+ VPXOR X1, X7, X7
+ VPSHUFD $-79, X6, X6
+ VPSHUFD $-79, X7, X7
+ VPADDQ X6, X4, X4
+ VPADDQ X7, X5, X5
+ VPXOR X4, X2, X2
+ VPXOR X5, X3, X3
+ VPSHUFB X8, X2, X2
+ VPSHUFB X8, X3, X3
+ VPADDQ X14, X0, X0
+ VPADDQ X2, X0, X0
+ VPADDQ X15, X1, X1
+ VPADDQ X3, X1, X1
+ VPXOR X0, X6, X6
+ VPXOR X1, X7, X7
+ VPSHUFB X9, X6, X6
+ VPSHUFB X9, X7, X7
+ VPADDQ X6, X4, X4
+ VPADDQ X7, X5, X5
+ VPXOR X4, X2, X2
+ VPXOR X5, X3, X3
+ VPADDQ X2, X2, X15
+ VPSRLQ $0x3f, X2, X2
+ VPXOR X15, X2, X2
+ VPADDQ X3, X3, X15
+ VPSRLQ $0x3f, X3, X3
+ VPXOR X15, X3, X3
+ VMOVDQA X2, X13
+ VMOVDQA X4, X14
+ BYTE $0xc5
+ BYTE $0x69
+ BYTE $0x6c
+ BYTE $0xfa
+ VMOVDQA X5, X4
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x61
+ BYTE $0x6d
+ BYTE $0xd7
+ VMOVDQA X14, X5
+ BYTE $0xc5
+ BYTE $0x61
+ BYTE $0x6c
+ BYTE $0xfb
+ VMOVDQA X6, X14
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x11
+ BYTE $0x6d
+ BYTE $0xdf
+ BYTE $0xc5
+ BYTE $0x41
+ BYTE $0x6c
+ BYTE $0xff
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x49
+ BYTE $0x6d
+ BYTE $0xf7
+ BYTE $0xc4
+ BYTE $0x41
+ BYTE $0x09
+ BYTE $0x6c
+ BYTE $0xfe
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x41
+ BYTE $0x6d
+ BYTE $0xff
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x66
+ BYTE $0x60
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x6e
+ BYTE $0x70
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x76
+ BYTE $0x28
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x7e
+ BYTE $0x68
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x99
+ BYTE $0x22
+ BYTE $0x66
+ BYTE $0x08
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x91
+ BYTE $0x22
+ BYTE $0x6e
+ BYTE $0x20
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x89
+ BYTE $0x22
+ BYTE $0x76
+ BYTE $0x78
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x81
+ BYTE $0x22
+ BYTE $0x7e
+ BYTE $0x50
+ BYTE $0x01
+ VPADDQ X12, X0, X0
+ VPADDQ X2, X0, X0
+ VPADDQ X13, X1, X1
+ VPADDQ X3, X1, X1
+ VPXOR X0, X6, X6
+ VPXOR X1, X7, X7
+ VPSHUFD $-79, X6, X6
+ VPSHUFD $-79, X7, X7
+ VPADDQ X6, X4, X4
+ VPADDQ X7, X5, X5
+ VPXOR X4, X2, X2
+ VPXOR X5, X3, X3
+ VPSHUFB X8, X2, X2
+ VPSHUFB X8, X3, X3
+ VPADDQ X14, X0, X0
+ VPADDQ X2, X0, X0
+ VPADDQ X15, X1, X1
+ VPADDQ X3, X1, X1
+ VPXOR X0, X6, X6
+ VPXOR X1, X7, X7
+ VPSHUFB X9, X6, X6
+ VPSHUFB X9, X7, X7
+ VPADDQ X6, X4, X4
+ VPADDQ X7, X5, X5
+ VPXOR X4, X2, X2
+ VPXOR X5, X3, X3
+ VPADDQ X2, X2, X15
+ VPSRLQ $0x3f, X2, X2
+ VPXOR X15, X2, X2
+ VPADDQ X3, X3, X15
+ VPSRLQ $0x3f, X3, X3
+ VPXOR X15, X3, X3
+ VMOVDQA X6, X13
+ VMOVDQA X2, X14
+ VMOVDQA X4, X6
+ BYTE $0xc4
+ BYTE $0x41
+ BYTE $0x11
+ BYTE $0x6c
+ BYTE $0xfd
+ VMOVDQA X5, X4
+ VMOVDQA X6, X5
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x41
+ BYTE $0x6d
+ BYTE $0xf7
+ BYTE $0xc5
+ BYTE $0x41
+ BYTE $0x6c
+ BYTE $0xff
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x11
+ BYTE $0x6d
+ BYTE $0xff
+ BYTE $0xc5
+ BYTE $0x61
+ BYTE $0x6c
+ BYTE $0xfb
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x69
+ BYTE $0x6d
+ BYTE $0xd7
+ BYTE $0xc4
+ BYTE $0x41
+ BYTE $0x09
+ BYTE $0x6c
+ BYTE $0xfe
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x61
+ BYTE $0x6d
+ BYTE $0xdf
+ MOVQ (SI), X12
+ VPSHUFD $0x4e, 64(SI), X13
+ MOVQ 56(SI), X14
+ MOVQ 16(SI), X15
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x99
+ BYTE $0x22
+ BYTE $0x66
+ BYTE $0x30
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x89
+ BYTE $0x22
+ BYTE $0x76
+ BYTE $0x18
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x81
+ BYTE $0x22
+ BYTE $0x7e
+ BYTE $0x58
+ BYTE $0x01
+ VPADDQ X12, X0, X0
+ VPADDQ X2, X0, X0
+ VPADDQ X13, X1, X1
+ VPADDQ X3, X1, X1
+ VPXOR X0, X6, X6
+ VPXOR X1, X7, X7
+ VPSHUFD $-79, X6, X6
+ VPSHUFD $-79, X7, X7
+ VPADDQ X6, X4, X4
+ VPADDQ X7, X5, X5
+ VPXOR X4, X2, X2
+ VPXOR X5, X3, X3
+ VPSHUFB X8, X2, X2
+ VPSHUFB X8, X3, X3
+ VPADDQ X14, X0, X0
+ VPADDQ X2, X0, X0
+ VPADDQ X15, X1, X1
+ VPADDQ X3, X1, X1
+ VPXOR X0, X6, X6
+ VPXOR X1, X7, X7
+ VPSHUFB X9, X6, X6
+ VPSHUFB X9, X7, X7
+ VPADDQ X6, X4, X4
+ VPADDQ X7, X5, X5
+ VPXOR X4, X2, X2
+ VPXOR X5, X3, X3
+ VPADDQ X2, X2, X15
+ VPSRLQ $0x3f, X2, X2
+ VPXOR X15, X2, X2
+ VPADDQ X3, X3, X15
+ VPSRLQ $0x3f, X3, X3
+ VPXOR X15, X3, X3
+ VMOVDQA X2, X13
+ VMOVDQA X4, X14
+ BYTE $0xc5
+ BYTE $0x69
+ BYTE $0x6c
+ BYTE $0xfa
+ VMOVDQA X5, X4
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x61
+ BYTE $0x6d
+ BYTE $0xd7
+ VMOVDQA X14, X5
+ BYTE $0xc5
+ BYTE $0x61
+ BYTE $0x6c
+ BYTE $0xfb
+ VMOVDQA X6, X14
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x11
+ BYTE $0x6d
+ BYTE $0xdf
+ BYTE $0xc5
+ BYTE $0x41
+ BYTE $0x6c
+ BYTE $0xff
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x49
+ BYTE $0x6d
+ BYTE $0xf7
+ BYTE $0xc4
+ BYTE $0x41
+ BYTE $0x09
+ BYTE $0x6c
+ BYTE $0xfe
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x41
+ BYTE $0x6d
+ BYTE $0xff
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x66
+ BYTE $0x68
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x6e
+ BYTE $0x60
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x76
+ BYTE $0x58
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x7e
+ BYTE $0x08
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x99
+ BYTE $0x22
+ BYTE $0x66
+ BYTE $0x38
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x91
+ BYTE $0x22
+ BYTE $0x6e
+ BYTE $0x18
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x89
+ BYTE $0x22
+ BYTE $0x76
+ BYTE $0x70
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x81
+ BYTE $0x22
+ BYTE $0x7e
+ BYTE $0x48
+ BYTE $0x01
+ VPADDQ X12, X0, X0
+ VPADDQ X2, X0, X0
+ VPADDQ X13, X1, X1
+ VPADDQ X3, X1, X1
+ VPXOR X0, X6, X6
+ VPXOR X1, X7, X7
+ VPSHUFD $-79, X6, X6
+ VPSHUFD $-79, X7, X7
+ VPADDQ X6, X4, X4
+ VPADDQ X7, X5, X5
+ VPXOR X4, X2, X2
+ VPXOR X5, X3, X3
+ VPSHUFB X8, X2, X2
+ VPSHUFB X8, X3, X3
+ VPADDQ X14, X0, X0
+ VPADDQ X2, X0, X0
+ VPADDQ X15, X1, X1
+ VPADDQ X3, X1, X1
+ VPXOR X0, X6, X6
+ VPXOR X1, X7, X7
+ VPSHUFB X9, X6, X6
+ VPSHUFB X9, X7, X7
+ VPADDQ X6, X4, X4
+ VPADDQ X7, X5, X5
+ VPXOR X4, X2, X2
+ VPXOR X5, X3, X3
+ VPADDQ X2, X2, X15
+ VPSRLQ $0x3f, X2, X2
+ VPXOR X15, X2, X2
+ VPADDQ X3, X3, X15
+ VPSRLQ $0x3f, X3, X3
+ VPXOR X15, X3, X3
+ VMOVDQA X6, X13
+ VMOVDQA X2, X14
+ VMOVDQA X4, X6
+ BYTE $0xc4
+ BYTE $0x41
+ BYTE $0x11
+ BYTE $0x6c
+ BYTE $0xfd
+ VMOVDQA X5, X4
+ VMOVDQA X6, X5
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x41
+ BYTE $0x6d
+ BYTE $0xf7
+ BYTE $0xc5
+ BYTE $0x41
+ BYTE $0x6c
+ BYTE $0xff
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x11
+ BYTE $0x6d
+ BYTE $0xff
+ BYTE $0xc5
+ BYTE $0x61
+ BYTE $0x6c
+ BYTE $0xfb
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x69
+ BYTE $0x6d
+ BYTE $0xd7
+ BYTE $0xc4
+ BYTE $0x41
+ BYTE $0x09
+ BYTE $0x6c
+ BYTE $0xfe
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x61
+ BYTE $0x6d
+ BYTE $0xdf
+ MOVQ 40(SI), X12
+ MOVQ 64(SI), X13
+ MOVQ (SI), X14
+ MOVQ 48(SI), X15
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x99
+ BYTE $0x22
+ BYTE $0x66
+ BYTE $0x78
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x91
+ BYTE $0x22
+ BYTE $0x6e
+ BYTE $0x10
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x89
+ BYTE $0x22
+ BYTE $0x76
+ BYTE $0x20
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x81
+ BYTE $0x22
+ BYTE $0x7e
+ BYTE $0x50
+ BYTE $0x01
+ VPADDQ X12, X0, X0
+ VPADDQ X2, X0, X0
+ VPADDQ X13, X1, X1
+ VPADDQ X3, X1, X1
+ VPXOR X0, X6, X6
+ VPXOR X1, X7, X7
+ VPSHUFD $-79, X6, X6
+ VPSHUFD $-79, X7, X7
+ VPADDQ X6, X4, X4
+ VPADDQ X7, X5, X5
+ VPXOR X4, X2, X2
+ VPXOR X5, X3, X3
+ VPSHUFB X8, X2, X2
+ VPSHUFB X8, X3, X3
+ VPADDQ X14, X0, X0
+ VPADDQ X2, X0, X0
+ VPADDQ X15, X1, X1
+ VPADDQ X3, X1, X1
+ VPXOR X0, X6, X6
+ VPXOR X1, X7, X7
+ VPSHUFB X9, X6, X6
+ VPSHUFB X9, X7, X7
+ VPADDQ X6, X4, X4
+ VPADDQ X7, X5, X5
+ VPXOR X4, X2, X2
+ VPXOR X5, X3, X3
+ VPADDQ X2, X2, X15
+ VPSRLQ $0x3f, X2, X2
+ VPXOR X15, X2, X2
+ VPADDQ X3, X3, X15
+ VPSRLQ $0x3f, X3, X3
+ VPXOR X15, X3, X3
+ VMOVDQA X2, X13
+ VMOVDQA X4, X14
+ BYTE $0xc5
+ BYTE $0x69
+ BYTE $0x6c
+ BYTE $0xfa
+ VMOVDQA X5, X4
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x61
+ BYTE $0x6d
+ BYTE $0xd7
+ VMOVDQA X14, X5
+ BYTE $0xc5
+ BYTE $0x61
+ BYTE $0x6c
+ BYTE $0xfb
+ VMOVDQA X6, X14
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x11
+ BYTE $0x6d
+ BYTE $0xdf
+ BYTE $0xc5
+ BYTE $0x41
+ BYTE $0x6c
+ BYTE $0xff
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x49
+ BYTE $0x6d
+ BYTE $0xf7
+ BYTE $0xc4
+ BYTE $0x41
+ BYTE $0x09
+ BYTE $0x6c
+ BYTE $0xfe
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x41
+ BYTE $0x6d
+ BYTE $0xff
+ MOVQ 48(SI), X12
+ MOVQ 88(SI), X13
+ MOVQ 120(SI), X14
+ MOVQ 24(SI), X15
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x99
+ BYTE $0x22
+ BYTE $0x66
+ BYTE $0x70
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x91
+ BYTE $0x22
+ BYTE $0x2e
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x89
+ BYTE $0x22
+ BYTE $0x76
+ BYTE $0x48
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x81
+ BYTE $0x22
+ BYTE $0x7e
+ BYTE $0x40
+ BYTE $0x01
+ VPADDQ X12, X0, X0
+ VPADDQ X2, X0, X0
+ VPADDQ X13, X1, X1
+ VPADDQ X3, X1, X1
+ VPXOR X0, X6, X6
+ VPXOR X1, X7, X7
+ VPSHUFD $-79, X6, X6
+ VPSHUFD $-79, X7, X7
+ VPADDQ X6, X4, X4
+ VPADDQ X7, X5, X5
+ VPXOR X4, X2, X2
+ VPXOR X5, X3, X3
+ VPSHUFB X8, X2, X2
+ VPSHUFB X8, X3, X3
+ VPADDQ X14, X0, X0
+ VPADDQ X2, X0, X0
+ VPADDQ X15, X1, X1
+ VPADDQ X3, X1, X1
+ VPXOR X0, X6, X6
+ VPXOR X1, X7, X7
+ VPSHUFB X9, X6, X6
+ VPSHUFB X9, X7, X7
+ VPADDQ X6, X4, X4
+ VPADDQ X7, X5, X5
+ VPXOR X4, X2, X2
+ VPXOR X5, X3, X3
+ VPADDQ X2, X2, X15
+ VPSRLQ $0x3f, X2, X2
+ VPXOR X15, X2, X2
+ VPADDQ X3, X3, X15
+ VPSRLQ $0x3f, X3, X3
+ VPXOR X15, X3, X3
+ VMOVDQA X6, X13
+ VMOVDQA X2, X14
+ VMOVDQA X4, X6
+ BYTE $0xc4
+ BYTE $0x41
+ BYTE $0x11
+ BYTE $0x6c
+ BYTE $0xfd
+ VMOVDQA X5, X4
+ VMOVDQA X6, X5
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x41
+ BYTE $0x6d
+ BYTE $0xf7
+ BYTE $0xc5
+ BYTE $0x41
+ BYTE $0x6c
+ BYTE $0xff
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x11
+ BYTE $0x6d
+ BYTE $0xff
+ BYTE $0xc5
+ BYTE $0x61
+ BYTE $0x6c
+ BYTE $0xfb
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x69
+ BYTE $0x6d
+ BYTE $0xd7
+ BYTE $0xc4
+ BYTE $0x41
+ BYTE $0x09
+ BYTE $0x6c
+ BYTE $0xfe
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x61
+ BYTE $0x6d
+ BYTE $0xdf
+ VMOVDQU 96(SI), X12
+ MOVQ 8(SI), X13
+ MOVQ 16(SI), X14
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x91
+ BYTE $0x22
+ BYTE $0x6e
+ BYTE $0x50
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x89
+ BYTE $0x22
+ BYTE $0x76
+ BYTE $0x38
+ BYTE $0x01
+ VMOVDQU 32(SI), X15
+ VPADDQ X12, X0, X0
+ VPADDQ X2, X0, X0
+ VPADDQ X13, X1, X1
+ VPADDQ X3, X1, X1
+ VPXOR X0, X6, X6
+ VPXOR X1, X7, X7
+ VPSHUFD $-79, X6, X6
+ VPSHUFD $-79, X7, X7
+ VPADDQ X6, X4, X4
+ VPADDQ X7, X5, X5
+ VPXOR X4, X2, X2
+ VPXOR X5, X3, X3
+ VPSHUFB X8, X2, X2
+ VPSHUFB X8, X3, X3
+ VPADDQ X14, X0, X0
+ VPADDQ X2, X0, X0
+ VPADDQ X15, X1, X1
+ VPADDQ X3, X1, X1
+ VPXOR X0, X6, X6
+ VPXOR X1, X7, X7
+ VPSHUFB X9, X6, X6
+ VPSHUFB X9, X7, X7
+ VPADDQ X6, X4, X4
+ VPADDQ X7, X5, X5
+ VPXOR X4, X2, X2
+ VPXOR X5, X3, X3
+ VPADDQ X2, X2, X15
+ VPSRLQ $0x3f, X2, X2
+ VPXOR X15, X2, X2
+ VPADDQ X3, X3, X15
+ VPSRLQ $0x3f, X3, X3
+ VPXOR X15, X3, X3
+ VMOVDQA X2, X13
+ VMOVDQA X4, X14
+ BYTE $0xc5
+ BYTE $0x69
+ BYTE $0x6c
+ BYTE $0xfa
+ VMOVDQA X5, X4
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x61
+ BYTE $0x6d
+ BYTE $0xd7
+ VMOVDQA X14, X5
+ BYTE $0xc5
+ BYTE $0x61
+ BYTE $0x6c
+ BYTE $0xfb
+ VMOVDQA X6, X14
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x11
+ BYTE $0x6d
+ BYTE $0xdf
+ BYTE $0xc5
+ BYTE $0x41
+ BYTE $0x6c
+ BYTE $0xff
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x49
+ BYTE $0x6d
+ BYTE $0xf7
+ BYTE $0xc4
+ BYTE $0x41
+ BYTE $0x09
+ BYTE $0x6c
+ BYTE $0xfe
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x41
+ BYTE $0x6d
+ BYTE $0xff
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x66
+ BYTE $0x50
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x6e
+ BYTE $0x38
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x76
+ BYTE $0x10
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x7e
+ BYTE $0x30
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x99
+ BYTE $0x22
+ BYTE $0x66
+ BYTE $0x40
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x91
+ BYTE $0x22
+ BYTE $0x6e
+ BYTE $0x08
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x89
+ BYTE $0x22
+ BYTE $0x76
+ BYTE $0x20
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x81
+ BYTE $0x22
+ BYTE $0x7e
+ BYTE $0x28
+ BYTE $0x01
+ VPADDQ X12, X0, X0
+ VPADDQ X2, X0, X0
+ VPADDQ X13, X1, X1
+ VPADDQ X3, X1, X1
+ VPXOR X0, X6, X6
+ VPXOR X1, X7, X7
+ VPSHUFD $-79, X6, X6
+ VPSHUFD $-79, X7, X7
+ VPADDQ X6, X4, X4
+ VPADDQ X7, X5, X5
+ VPXOR X4, X2, X2
+ VPXOR X5, X3, X3
+ VPSHUFB X8, X2, X2
+ VPSHUFB X8, X3, X3
+ VPADDQ X14, X0, X0
+ VPADDQ X2, X0, X0
+ VPADDQ X15, X1, X1
+ VPADDQ X3, X1, X1
+ VPXOR X0, X6, X6
+ VPXOR X1, X7, X7
+ VPSHUFB X9, X6, X6
+ VPSHUFB X9, X7, X7
+ VPADDQ X6, X4, X4
+ VPADDQ X7, X5, X5
+ VPXOR X4, X2, X2
+ VPXOR X5, X3, X3
+ VPADDQ X2, X2, X15
+ VPSRLQ $0x3f, X2, X2
+ VPXOR X15, X2, X2
+ VPADDQ X3, X3, X15
+ VPSRLQ $0x3f, X3, X3
+ VPXOR X15, X3, X3
+ VMOVDQA X6, X13
+ VMOVDQA X2, X14
+ VMOVDQA X4, X6
+ BYTE $0xc4
+ BYTE $0x41
+ BYTE $0x11
+ BYTE $0x6c
+ BYTE $0xfd
+ VMOVDQA X5, X4
+ VMOVDQA X6, X5
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x41
+ BYTE $0x6d
+ BYTE $0xf7
+ BYTE $0xc5
+ BYTE $0x41
+ BYTE $0x6c
+ BYTE $0xff
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x11
+ BYTE $0x6d
+ BYTE $0xff
+ BYTE $0xc5
+ BYTE $0x61
+ BYTE $0x6c
+ BYTE $0xfb
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x69
+ BYTE $0x6d
+ BYTE $0xd7
+ BYTE $0xc4
+ BYTE $0x41
+ BYTE $0x09
+ BYTE $0x6c
+ BYTE $0xfe
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x61
+ BYTE $0x6d
+ BYTE $0xdf
+ MOVQ 120(SI), X12
+ MOVQ 24(SI), X13
+ MOVQ 88(SI), X14
+ MOVQ 96(SI), X15
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x99
+ BYTE $0x22
+ BYTE $0x66
+ BYTE $0x48
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x91
+ BYTE $0x22
+ BYTE $0x6e
+ BYTE $0x68
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x89
+ BYTE $0x22
+ BYTE $0x76
+ BYTE $0x70
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x81
+ BYTE $0x22
+ BYTE $0x3e
+ BYTE $0x01
+ VPADDQ X12, X0, X0
+ VPADDQ X2, X0, X0
+ VPADDQ X13, X1, X1
+ VPADDQ X3, X1, X1
+ VPXOR X0, X6, X6
+ VPXOR X1, X7, X7
+ VPSHUFD $-79, X6, X6
+ VPSHUFD $-79, X7, X7
+ VPADDQ X6, X4, X4
+ VPADDQ X7, X5, X5
+ VPXOR X4, X2, X2
+ VPXOR X5, X3, X3
+ VPSHUFB X8, X2, X2
+ VPSHUFB X8, X3, X3
+ VPADDQ X14, X0, X0
+ VPADDQ X2, X0, X0
+ VPADDQ X15, X1, X1
+ VPADDQ X3, X1, X1
+ VPXOR X0, X6, X6
+ VPXOR X1, X7, X7
+ VPSHUFB X9, X6, X6
+ VPSHUFB X9, X7, X7
+ VPADDQ X6, X4, X4
+ VPADDQ X7, X5, X5
+ VPXOR X4, X2, X2
+ VPXOR X5, X3, X3
+ VPADDQ X2, X2, X15
+ VPSRLQ $0x3f, X2, X2
+ VPXOR X15, X2, X2
+ VPADDQ X3, X3, X15
+ VPSRLQ $0x3f, X3, X3
+ VPXOR X15, X3, X3
+ VMOVDQA X2, X13
+ VMOVDQA X4, X14
+ BYTE $0xc5
+ BYTE $0x69
+ BYTE $0x6c
+ BYTE $0xfa
+ VMOVDQA X5, X4
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x61
+ BYTE $0x6d
+ BYTE $0xd7
+ VMOVDQA X14, X5
+ BYTE $0xc5
+ BYTE $0x61
+ BYTE $0x6c
+ BYTE $0xfb
+ VMOVDQA X6, X14
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x11
+ BYTE $0x6d
+ BYTE $0xdf
+ BYTE $0xc5
+ BYTE $0x41
+ BYTE $0x6c
+ BYTE $0xff
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x49
+ BYTE $0x6d
+ BYTE $0xf7
+ BYTE $0xc4
+ BYTE $0x41
+ BYTE $0x09
+ BYTE $0x6c
+ BYTE $0xfe
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x41
+ BYTE $0x6d
+ BYTE $0xff
+ VPADDQ 16(R10), X0, X0
+ VPADDQ X2, X0, X0
+ VPADDQ 32(R10), X1, X1
+ VPADDQ X3, X1, X1
+ VPXOR X0, X6, X6
+ VPXOR X1, X7, X7
+ VPSHUFD $-79, X6, X6
+ VPSHUFD $-79, X7, X7
+ VPADDQ X6, X4, X4
+ VPADDQ X7, X5, X5
+ VPXOR X4, X2, X2
+ VPXOR X5, X3, X3
+ VPSHUFB X8, X2, X2
+ VPSHUFB X8, X3, X3
+ VPADDQ 48(R10), X0, X0
+ VPADDQ X2, X0, X0
+ VPADDQ 64(R10), X1, X1
+ VPADDQ X3, X1, X1
+ VPXOR X0, X6, X6
+ VPXOR X1, X7, X7
+ VPSHUFB X9, X6, X6
+ VPSHUFB X9, X7, X7
+ VPADDQ X6, X4, X4
+ VPADDQ X7, X5, X5
+ VPXOR X4, X2, X2
+ VPXOR X5, X3, X3
+ VPADDQ X2, X2, X15
+ VPSRLQ $0x3f, X2, X2
+ VPXOR X15, X2, X2
+ VPADDQ X3, X3, X15
+ VPSRLQ $0x3f, X3, X3
+ VPXOR X15, X3, X3
+ VMOVDQA X6, X13
+ VMOVDQA X2, X14
+ VMOVDQA X4, X6
+ BYTE $0xc4
+ BYTE $0x41
+ BYTE $0x11
+ BYTE $0x6c
+ BYTE $0xfd
+ VMOVDQA X5, X4
+ VMOVDQA X6, X5
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x41
+ BYTE $0x6d
+ BYTE $0xf7
+ BYTE $0xc5
+ BYTE $0x41
+ BYTE $0x6c
+ BYTE $0xff
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x11
+ BYTE $0x6d
+ BYTE $0xff
+ BYTE $0xc5
+ BYTE $0x61
+ BYTE $0x6c
+ BYTE $0xfb
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x69
+ BYTE $0x6d
+ BYTE $0xd7
+ BYTE $0xc4
+ BYTE $0x41
+ BYTE $0x09
+ BYTE $0x6c
+ BYTE $0xfe
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x61
+ BYTE $0x6d
+ BYTE $0xdf
+ VPADDQ 80(R10), X0, X0
+ VPADDQ X2, X0, X0
+ VPADDQ 96(R10), X1, X1
+ VPADDQ X3, X1, X1
+ VPXOR X0, X6, X6
+ VPXOR X1, X7, X7
+ VPSHUFD $-79, X6, X6
+ VPSHUFD $-79, X7, X7
+ VPADDQ X6, X4, X4
+ VPADDQ X7, X5, X5
+ VPXOR X4, X2, X2
+ VPXOR X5, X3, X3
+ VPSHUFB X8, X2, X2
+ VPSHUFB X8, X3, X3
+ VPADDQ 112(R10), X0, X0
+ VPADDQ X2, X0, X0
+ VPADDQ 128(R10), X1, X1
+ VPADDQ X3, X1, X1
+ VPXOR X0, X6, X6
+ VPXOR X1, X7, X7
+ VPSHUFB X9, X6, X6
+ VPSHUFB X9, X7, X7
+ VPADDQ X6, X4, X4
+ VPADDQ X7, X5, X5
+ VPXOR X4, X2, X2
+ VPXOR X5, X3, X3
+ VPADDQ X2, X2, X15
+ VPSRLQ $0x3f, X2, X2
+ VPXOR X15, X2, X2
+ VPADDQ X3, X3, X15
+ VPSRLQ $0x3f, X3, X3
+ VPXOR X15, X3, X3
+ VMOVDQA X2, X13
+ VMOVDQA X4, X14
+ BYTE $0xc5
+ BYTE $0x69
+ BYTE $0x6c
+ BYTE $0xfa
+ VMOVDQA X5, X4
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x61
+ BYTE $0x6d
+ BYTE $0xd7
+ VMOVDQA X14, X5
+ BYTE $0xc5
+ BYTE $0x61
+ BYTE $0x6c
+ BYTE $0xfb
+ VMOVDQA X6, X14
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x11
+ BYTE $0x6d
+ BYTE $0xdf
+ BYTE $0xc5
+ BYTE $0x41
+ BYTE $0x6c
+ BYTE $0xff
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x49
+ BYTE $0x6d
+ BYTE $0xf7
+ BYTE $0xc4
+ BYTE $0x41
+ BYTE $0x09
+ BYTE $0x6c
+ BYTE $0xfe
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x41
+ BYTE $0x6d
+ BYTE $0xff
+ VPADDQ 144(R10), X0, X0
+ VPADDQ X2, X0, X0
+ VPADDQ 160(R10), X1, X1
+ VPADDQ X3, X1, X1
+ VPXOR X0, X6, X6
+ VPXOR X1, X7, X7
+ VPSHUFD $-79, X6, X6
+ VPSHUFD $-79, X7, X7
+ VPADDQ X6, X4, X4
+ VPADDQ X7, X5, X5
+ VPXOR X4, X2, X2
+ VPXOR X5, X3, X3
+ VPSHUFB X8, X2, X2
+ VPSHUFB X8, X3, X3
+ VPADDQ 176(R10), X0, X0
+ VPADDQ X2, X0, X0
+ VPADDQ 192(R10), X1, X1
+ VPADDQ X3, X1, X1
+ VPXOR X0, X6, X6
+ VPXOR X1, X7, X7
+ VPSHUFB X9, X6, X6
+ VPSHUFB X9, X7, X7
+ VPADDQ X6, X4, X4
+ VPADDQ X7, X5, X5
+ VPXOR X4, X2, X2
+ VPXOR X5, X3, X3
+ VPADDQ X2, X2, X15
+ VPSRLQ $0x3f, X2, X2
+ VPXOR X15, X2, X2
+ VPADDQ X3, X3, X15
+ VPSRLQ $0x3f, X3, X3
+ VPXOR X15, X3, X3
+ VMOVDQA X6, X13
+ VMOVDQA X2, X14
+ VMOVDQA X4, X6
+ BYTE $0xc4
+ BYTE $0x41
+ BYTE $0x11
+ BYTE $0x6c
+ BYTE $0xfd
+ VMOVDQA X5, X4
+ VMOVDQA X6, X5
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x41
+ BYTE $0x6d
+ BYTE $0xf7
+ BYTE $0xc5
+ BYTE $0x41
+ BYTE $0x6c
+ BYTE $0xff
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x11
+ BYTE $0x6d
+ BYTE $0xff
+ BYTE $0xc5
+ BYTE $0x61
+ BYTE $0x6c
+ BYTE $0xfb
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x69
+ BYTE $0x6d
+ BYTE $0xd7
+ BYTE $0xc4
+ BYTE $0x41
+ BYTE $0x09
+ BYTE $0x6c
+ BYTE $0xfe
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x61
+ BYTE $0x6d
+ BYTE $0xdf
+ VPADDQ 208(R10), X0, X0
+ VPADDQ X2, X0, X0
+ VPADDQ 224(R10), X1, X1
+ VPADDQ X3, X1, X1
+ VPXOR X0, X6, X6
+ VPXOR X1, X7, X7
+ VPSHUFD $-79, X6, X6
+ VPSHUFD $-79, X7, X7
+ VPADDQ X6, X4, X4
+ VPADDQ X7, X5, X5
+ VPXOR X4, X2, X2
+ VPXOR X5, X3, X3
+ VPSHUFB X8, X2, X2
+ VPSHUFB X8, X3, X3
+ VPADDQ 240(R10), X0, X0
+ VPADDQ X2, X0, X0
+ VPADDQ 256(R10), X1, X1
+ VPADDQ X3, X1, X1
+ VPXOR X0, X6, X6
+ VPXOR X1, X7, X7
+ VPSHUFB X9, X6, X6
+ VPSHUFB X9, X7, X7
+ VPADDQ X6, X4, X4
+ VPADDQ X7, X5, X5
+ VPXOR X4, X2, X2
+ VPXOR X5, X3, X3
+ VPADDQ X2, X2, X15
+ VPSRLQ $0x3f, X2, X2
+ VPXOR X15, X2, X2
+ VPADDQ X3, X3, X15
+ VPSRLQ $0x3f, X3, X3
+ VPXOR X15, X3, X3
+ VMOVDQA X2, X13
+ VMOVDQA X4, X14
+ BYTE $0xc5
+ BYTE $0x69
+ BYTE $0x6c
+ BYTE $0xfa
+ VMOVDQA X5, X4
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x61
+ BYTE $0x6d
+ BYTE $0xd7
+ VMOVDQA X14, X5
+ BYTE $0xc5
+ BYTE $0x61
+ BYTE $0x6c
+ BYTE $0xfb
+ VMOVDQA X6, X14
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x11
+ BYTE $0x6d
+ BYTE $0xdf
+ BYTE $0xc5
+ BYTE $0x41
+ BYTE $0x6c
+ BYTE $0xff
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x49
+ BYTE $0x6d
+ BYTE $0xf7
+ BYTE $0xc4
+ BYTE $0x41
+ BYTE $0x09
+ BYTE $0x6c
+ BYTE $0xfe
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x41
+ BYTE $0x6d
+ BYTE $0xff
VMOVDQU 32(AX), X14
VMOVDQU 48(AX), X15
VPXOR X0, X10, X10
@@ -729,16 +4524,36 @@ noinc:
VPXOR X7, X15, X3
VMOVDQU X2, 32(AX)
VMOVDQU X3, 48(AX)
+ LEAQ 128(SI), SI
+ SUBQ $0x80, DI
+ JNE loop
+ VMOVDQU X10, (AX)
+ VMOVDQU X11, 16(AX)
+ MOVQ R8, (BX)
+ MOVQ R9, 8(BX)
+ VZEROUPPER
+ RET
- LEAQ 128(SI), SI
- SUBQ $128, DI
- JNE loop
+DATA ·AVX_c40<>+0(SB)/8, $0x0201000706050403
+DATA ·AVX_c40<>+8(SB)/8, $0x0a09080f0e0d0c0b
+GLOBL ·AVX_c40<>(SB), RODATA|NOPTR, $16
- VMOVDQU X10, 0(AX)
- VMOVDQU X11, 16(AX)
+DATA ·AVX_c48<>+0(SB)/8, $0x0100070605040302
+DATA ·AVX_c48<>+8(SB)/8, $0x09080f0e0d0c0b0a
+GLOBL ·AVX_c48<>(SB), RODATA|NOPTR, $16
- MOVQ R8, 0(BX)
- MOVQ R9, 8(BX)
- VZEROUPPER
+DATA ·AVX_iv3<>+0(SB)/8, $0x1f83d9abfb41bd6b
+DATA ·AVX_iv3<>+8(SB)/8, $0x5be0cd19137e2179
+GLOBL ·AVX_iv3<>(SB), RODATA|NOPTR, $16
- RET
+DATA ·AVX_iv0<>+0(SB)/8, $0x6a09e667f3bcc908
+DATA ·AVX_iv0<>+8(SB)/8, $0xbb67ae8584caa73b
+GLOBL ·AVX_iv0<>(SB), RODATA|NOPTR, $16
+
+DATA ·AVX_iv1<>+0(SB)/8, $0x3c6ef372fe94f82b
+DATA ·AVX_iv1<>+8(SB)/8, $0xa54ff53a5f1d36f1
+GLOBL ·AVX_iv1<>(SB), RODATA|NOPTR, $16
+
+DATA ·AVX_iv2<>+0(SB)/8, $0x510e527fade682d1
+DATA ·AVX_iv2<>+8(SB)/8, $0x9b05688c2b3e6c1f
+GLOBL ·AVX_iv2<>(SB), RODATA|NOPTR, $16
diff --git a/vendor/golang.org/x/crypto/blake2b/blake2b_amd64.s b/vendor/golang.org/x/crypto/blake2b/blake2b_amd64.s
index adfac00c..9a0ce212 100644
--- a/vendor/golang.org/x/crypto/blake2b/blake2b_amd64.s
+++ b/vendor/golang.org/x/crypto/blake2b/blake2b_amd64.s
@@ -1,278 +1,1441 @@
-// Copyright 2016 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
+// Code generated by command: go run blake2b_amd64_asm.go -out ../../blake2b_amd64.s -pkg blake2b. DO NOT EDIT.
//go:build amd64 && gc && !purego
#include "textflag.h"
-DATA ·iv0<>+0x00(SB)/8, $0x6a09e667f3bcc908
-DATA ·iv0<>+0x08(SB)/8, $0xbb67ae8584caa73b
-GLOBL ·iv0<>(SB), (NOPTR+RODATA), $16
-
-DATA ·iv1<>+0x00(SB)/8, $0x3c6ef372fe94f82b
-DATA ·iv1<>+0x08(SB)/8, $0xa54ff53a5f1d36f1
-GLOBL ·iv1<>(SB), (NOPTR+RODATA), $16
-
-DATA ·iv2<>+0x00(SB)/8, $0x510e527fade682d1
-DATA ·iv2<>+0x08(SB)/8, $0x9b05688c2b3e6c1f
-GLOBL ·iv2<>(SB), (NOPTR+RODATA), $16
-
-DATA ·iv3<>+0x00(SB)/8, $0x1f83d9abfb41bd6b
-DATA ·iv3<>+0x08(SB)/8, $0x5be0cd19137e2179
-GLOBL ·iv3<>(SB), (NOPTR+RODATA), $16
-
-DATA ·c40<>+0x00(SB)/8, $0x0201000706050403
-DATA ·c40<>+0x08(SB)/8, $0x0a09080f0e0d0c0b
-GLOBL ·c40<>(SB), (NOPTR+RODATA), $16
-
-DATA ·c48<>+0x00(SB)/8, $0x0100070605040302
-DATA ·c48<>+0x08(SB)/8, $0x09080f0e0d0c0b0a
-GLOBL ·c48<>(SB), (NOPTR+RODATA), $16
-
-#define SHUFFLE(v2, v3, v4, v5, v6, v7, t1, t2) \
- MOVO v4, t1; \
- MOVO v5, v4; \
- MOVO t1, v5; \
- MOVO v6, t1; \
- PUNPCKLQDQ v6, t2; \
- PUNPCKHQDQ v7, v6; \
- PUNPCKHQDQ t2, v6; \
- PUNPCKLQDQ v7, t2; \
- MOVO t1, v7; \
- MOVO v2, t1; \
- PUNPCKHQDQ t2, v7; \
- PUNPCKLQDQ v3, t2; \
- PUNPCKHQDQ t2, v2; \
- PUNPCKLQDQ t1, t2; \
- PUNPCKHQDQ t2, v3
-
-#define SHUFFLE_INV(v2, v3, v4, v5, v6, v7, t1, t2) \
- MOVO v4, t1; \
- MOVO v5, v4; \
- MOVO t1, v5; \
- MOVO v2, t1; \
- PUNPCKLQDQ v2, t2; \
- PUNPCKHQDQ v3, v2; \
- PUNPCKHQDQ t2, v2; \
- PUNPCKLQDQ v3, t2; \
- MOVO t1, v3; \
- MOVO v6, t1; \
- PUNPCKHQDQ t2, v3; \
- PUNPCKLQDQ v7, t2; \
- PUNPCKHQDQ t2, v6; \
- PUNPCKLQDQ t1, t2; \
- PUNPCKHQDQ t2, v7
-
-#define HALF_ROUND(v0, v1, v2, v3, v4, v5, v6, v7, m0, m1, m2, m3, t0, c40, c48) \
- PADDQ m0, v0; \
- PADDQ m1, v1; \
- PADDQ v2, v0; \
- PADDQ v3, v1; \
- PXOR v0, v6; \
- PXOR v1, v7; \
- PSHUFD $0xB1, v6, v6; \
- PSHUFD $0xB1, v7, v7; \
- PADDQ v6, v4; \
- PADDQ v7, v5; \
- PXOR v4, v2; \
- PXOR v5, v3; \
- PSHUFB c40, v2; \
- PSHUFB c40, v3; \
- PADDQ m2, v0; \
- PADDQ m3, v1; \
- PADDQ v2, v0; \
- PADDQ v3, v1; \
- PXOR v0, v6; \
- PXOR v1, v7; \
- PSHUFB c48, v6; \
- PSHUFB c48, v7; \
- PADDQ v6, v4; \
- PADDQ v7, v5; \
- PXOR v4, v2; \
- PXOR v5, v3; \
- MOVOU v2, t0; \
- PADDQ v2, t0; \
- PSRLQ $63, v2; \
- PXOR t0, v2; \
- MOVOU v3, t0; \
- PADDQ v3, t0; \
- PSRLQ $63, v3; \
- PXOR t0, v3
-
-#define LOAD_MSG(m0, m1, m2, m3, src, i0, i1, i2, i3, i4, i5, i6, i7) \
- MOVQ i0*8(src), m0; \
- PINSRQ $1, i1*8(src), m0; \
- MOVQ i2*8(src), m1; \
- PINSRQ $1, i3*8(src), m1; \
- MOVQ i4*8(src), m2; \
- PINSRQ $1, i5*8(src), m2; \
- MOVQ i6*8(src), m3; \
- PINSRQ $1, i7*8(src), m3
-
// func hashBlocksSSE4(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte)
-TEXT ·hashBlocksSSE4(SB), 4, $288-48 // frame size = 272 + 16 byte alignment
- MOVQ h+0(FP), AX
- MOVQ c+8(FP), BX
- MOVQ flag+16(FP), CX
- MOVQ blocks_base+24(FP), SI
- MOVQ blocks_len+32(FP), DI
-
- MOVQ SP, R10
- ADDQ $15, R10
- ANDQ $~15, R10
-
- MOVOU ·iv3<>(SB), X0
- MOVO X0, 0(R10)
- XORQ CX, 0(R10) // 0(R10) = ·iv3 ^ (CX || 0)
-
- MOVOU ·c40<>(SB), X13
- MOVOU ·c48<>(SB), X14
-
- MOVOU 0(AX), X12
+// Requires: SSE2, SSE4.1, SSSE3
+TEXT ·hashBlocksSSE4(SB), NOSPLIT, $288-48
+ MOVQ h+0(FP), AX
+ MOVQ c+8(FP), BX
+ MOVQ flag+16(FP), CX
+ MOVQ blocks_base+24(FP), SI
+ MOVQ blocks_len+32(FP), DI
+ MOVQ SP, R10
+ ADDQ $0x0f, R10
+ ANDQ $-16, R10
+ MOVOU ·iv3<>+0(SB), X0
+ MOVO X0, (R10)
+ XORQ CX, (R10)
+ MOVOU ·c40<>+0(SB), X13
+ MOVOU ·c48<>+0(SB), X14
+ MOVOU (AX), X12
MOVOU 16(AX), X15
-
- MOVQ 0(BX), R8
- MOVQ 8(BX), R9
+ MOVQ (BX), R8
+ MOVQ 8(BX), R9
loop:
- ADDQ $128, R8
- CMPQ R8, $128
+ ADDQ $0x80, R8
+ CMPQ R8, $0x80
JGE noinc
INCQ R9
noinc:
- MOVQ R8, X8
- PINSRQ $1, R9, X8
-
- MOVO X12, X0
- MOVO X15, X1
- MOVOU 32(AX), X2
- MOVOU 48(AX), X3
- MOVOU ·iv0<>(SB), X4
- MOVOU ·iv1<>(SB), X5
- MOVOU ·iv2<>(SB), X6
-
- PXOR X8, X6
- MOVO 0(R10), X7
-
- LOAD_MSG(X8, X9, X10, X11, SI, 0, 2, 4, 6, 1, 3, 5, 7)
- MOVO X8, 16(R10)
- MOVO X9, 32(R10)
- MOVO X10, 48(R10)
- MOVO X11, 64(R10)
- HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
- SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
- LOAD_MSG(X8, X9, X10, X11, SI, 8, 10, 12, 14, 9, 11, 13, 15)
- MOVO X8, 80(R10)
- MOVO X9, 96(R10)
- MOVO X10, 112(R10)
- MOVO X11, 128(R10)
- HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
- SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
-
- LOAD_MSG(X8, X9, X10, X11, SI, 14, 4, 9, 13, 10, 8, 15, 6)
- MOVO X8, 144(R10)
- MOVO X9, 160(R10)
- MOVO X10, 176(R10)
- MOVO X11, 192(R10)
- HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
- SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
- LOAD_MSG(X8, X9, X10, X11, SI, 1, 0, 11, 5, 12, 2, 7, 3)
- MOVO X8, 208(R10)
- MOVO X9, 224(R10)
- MOVO X10, 240(R10)
- MOVO X11, 256(R10)
- HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
- SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
-
- LOAD_MSG(X8, X9, X10, X11, SI, 11, 12, 5, 15, 8, 0, 2, 13)
- HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
- SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
- LOAD_MSG(X8, X9, X10, X11, SI, 10, 3, 7, 9, 14, 6, 1, 4)
- HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
- SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
-
- LOAD_MSG(X8, X9, X10, X11, SI, 7, 3, 13, 11, 9, 1, 12, 14)
- HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
- SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
- LOAD_MSG(X8, X9, X10, X11, SI, 2, 5, 4, 15, 6, 10, 0, 8)
- HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
- SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
-
- LOAD_MSG(X8, X9, X10, X11, SI, 9, 5, 2, 10, 0, 7, 4, 15)
- HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
- SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
- LOAD_MSG(X8, X9, X10, X11, SI, 14, 11, 6, 3, 1, 12, 8, 13)
- HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
- SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
-
- LOAD_MSG(X8, X9, X10, X11, SI, 2, 6, 0, 8, 12, 10, 11, 3)
- HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
- SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
- LOAD_MSG(X8, X9, X10, X11, SI, 4, 7, 15, 1, 13, 5, 14, 9)
- HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
- SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
-
- LOAD_MSG(X8, X9, X10, X11, SI, 12, 1, 14, 4, 5, 15, 13, 10)
- HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
- SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
- LOAD_MSG(X8, X9, X10, X11, SI, 0, 6, 9, 8, 7, 3, 2, 11)
- HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
- SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
-
- LOAD_MSG(X8, X9, X10, X11, SI, 13, 7, 12, 3, 11, 14, 1, 9)
- HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
- SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
- LOAD_MSG(X8, X9, X10, X11, SI, 5, 15, 8, 2, 0, 4, 6, 10)
- HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
- SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
-
- LOAD_MSG(X8, X9, X10, X11, SI, 6, 14, 11, 0, 15, 9, 3, 8)
- HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
- SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
- LOAD_MSG(X8, X9, X10, X11, SI, 12, 13, 1, 10, 2, 7, 4, 5)
- HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
- SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
-
- LOAD_MSG(X8, X9, X10, X11, SI, 10, 8, 7, 1, 2, 4, 6, 5)
- HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
- SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
- LOAD_MSG(X8, X9, X10, X11, SI, 15, 9, 3, 13, 11, 14, 12, 0)
- HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
- SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
-
- HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, 16(R10), 32(R10), 48(R10), 64(R10), X11, X13, X14)
- SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
- HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, 80(R10), 96(R10), 112(R10), 128(R10), X11, X13, X14)
- SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
+ MOVQ R8, X8
+ PINSRQ $0x01, R9, X8
+ MOVO X12, X0
+ MOVO X15, X1
+ MOVOU 32(AX), X2
+ MOVOU 48(AX), X3
+ MOVOU ·iv0<>+0(SB), X4
+ MOVOU ·iv1<>+0(SB), X5
+ MOVOU ·iv2<>+0(SB), X6
+ PXOR X8, X6
+ MOVO (R10), X7
+ MOVQ (SI), X8
+ PINSRQ $0x01, 16(SI), X8
+ MOVQ 32(SI), X9
+ PINSRQ $0x01, 48(SI), X9
+ MOVQ 8(SI), X10
+ PINSRQ $0x01, 24(SI), X10
+ MOVQ 40(SI), X11
+ PINSRQ $0x01, 56(SI), X11
+ MOVO X8, 16(R10)
+ MOVO X9, 32(R10)
+ MOVO X10, 48(R10)
+ MOVO X11, 64(R10)
+ PADDQ X8, X0
+ PADDQ X9, X1
+ PADDQ X2, X0
+ PADDQ X3, X1
+ PXOR X0, X6
+ PXOR X1, X7
+ PSHUFD $0xb1, X6, X6
+ PSHUFD $0xb1, X7, X7
+ PADDQ X6, X4
+ PADDQ X7, X5
+ PXOR X4, X2
+ PXOR X5, X3
+ PSHUFB X13, X2
+ PSHUFB X13, X3
+ PADDQ X10, X0
+ PADDQ X11, X1
+ PADDQ X2, X0
+ PADDQ X3, X1
+ PXOR X0, X6
+ PXOR X1, X7
+ PSHUFB X14, X6
+ PSHUFB X14, X7
+ PADDQ X6, X4
+ PADDQ X7, X5
+ PXOR X4, X2
+ PXOR X5, X3
+ MOVOU X2, X11
+ PADDQ X2, X11
+ PSRLQ $0x3f, X2
+ PXOR X11, X2
+ MOVOU X3, X11
+ PADDQ X3, X11
+ PSRLQ $0x3f, X3
+ PXOR X11, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X6, X8
+ PUNPCKLQDQ X6, X9
+ PUNPCKHQDQ X7, X6
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X7, X9
+ MOVO X8, X7
+ MOVO X2, X8
+ PUNPCKHQDQ X9, X7
+ PUNPCKLQDQ X3, X9
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X3
+ MOVQ 64(SI), X8
+ PINSRQ $0x01, 80(SI), X8
+ MOVQ 96(SI), X9
+ PINSRQ $0x01, 112(SI), X9
+ MOVQ 72(SI), X10
+ PINSRQ $0x01, 88(SI), X10
+ MOVQ 104(SI), X11
+ PINSRQ $0x01, 120(SI), X11
+ MOVO X8, 80(R10)
+ MOVO X9, 96(R10)
+ MOVO X10, 112(R10)
+ MOVO X11, 128(R10)
+ PADDQ X8, X0
+ PADDQ X9, X1
+ PADDQ X2, X0
+ PADDQ X3, X1
+ PXOR X0, X6
+ PXOR X1, X7
+ PSHUFD $0xb1, X6, X6
+ PSHUFD $0xb1, X7, X7
+ PADDQ X6, X4
+ PADDQ X7, X5
+ PXOR X4, X2
+ PXOR X5, X3
+ PSHUFB X13, X2
+ PSHUFB X13, X3
+ PADDQ X10, X0
+ PADDQ X11, X1
+ PADDQ X2, X0
+ PADDQ X3, X1
+ PXOR X0, X6
+ PXOR X1, X7
+ PSHUFB X14, X6
+ PSHUFB X14, X7
+ PADDQ X6, X4
+ PADDQ X7, X5
+ PXOR X4, X2
+ PXOR X5, X3
+ MOVOU X2, X11
+ PADDQ X2, X11
+ PSRLQ $0x3f, X2
+ PXOR X11, X2
+ MOVOU X3, X11
+ PADDQ X3, X11
+ PSRLQ $0x3f, X3
+ PXOR X11, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X2, X8
+ PUNPCKLQDQ X2, X9
+ PUNPCKHQDQ X3, X2
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X3, X9
+ MOVO X8, X3
+ MOVO X6, X8
+ PUNPCKHQDQ X9, X3
+ PUNPCKLQDQ X7, X9
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X7
+ MOVQ 112(SI), X8
+ PINSRQ $0x01, 32(SI), X8
+ MOVQ 72(SI), X9
+ PINSRQ $0x01, 104(SI), X9
+ MOVQ 80(SI), X10
+ PINSRQ $0x01, 64(SI), X10
+ MOVQ 120(SI), X11
+ PINSRQ $0x01, 48(SI), X11
+ MOVO X8, 144(R10)
+ MOVO X9, 160(R10)
+ MOVO X10, 176(R10)
+ MOVO X11, 192(R10)
+ PADDQ X8, X0
+ PADDQ X9, X1
+ PADDQ X2, X0
+ PADDQ X3, X1
+ PXOR X0, X6
+ PXOR X1, X7
+ PSHUFD $0xb1, X6, X6
+ PSHUFD $0xb1, X7, X7
+ PADDQ X6, X4
+ PADDQ X7, X5
+ PXOR X4, X2
+ PXOR X5, X3
+ PSHUFB X13, X2
+ PSHUFB X13, X3
+ PADDQ X10, X0
+ PADDQ X11, X1
+ PADDQ X2, X0
+ PADDQ X3, X1
+ PXOR X0, X6
+ PXOR X1, X7
+ PSHUFB X14, X6
+ PSHUFB X14, X7
+ PADDQ X6, X4
+ PADDQ X7, X5
+ PXOR X4, X2
+ PXOR X5, X3
+ MOVOU X2, X11
+ PADDQ X2, X11
+ PSRLQ $0x3f, X2
+ PXOR X11, X2
+ MOVOU X3, X11
+ PADDQ X3, X11
+ PSRLQ $0x3f, X3
+ PXOR X11, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X6, X8
+ PUNPCKLQDQ X6, X9
+ PUNPCKHQDQ X7, X6
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X7, X9
+ MOVO X8, X7
+ MOVO X2, X8
+ PUNPCKHQDQ X9, X7
+ PUNPCKLQDQ X3, X9
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X3
+ MOVQ 8(SI), X8
+ PINSRQ $0x01, (SI), X8
+ MOVQ 88(SI), X9
+ PINSRQ $0x01, 40(SI), X9
+ MOVQ 96(SI), X10
+ PINSRQ $0x01, 16(SI), X10
+ MOVQ 56(SI), X11
+ PINSRQ $0x01, 24(SI), X11
+ MOVO X8, 208(R10)
+ MOVO X9, 224(R10)
+ MOVO X10, 240(R10)
+ MOVO X11, 256(R10)
+ PADDQ X8, X0
+ PADDQ X9, X1
+ PADDQ X2, X0
+ PADDQ X3, X1
+ PXOR X0, X6
+ PXOR X1, X7
+ PSHUFD $0xb1, X6, X6
+ PSHUFD $0xb1, X7, X7
+ PADDQ X6, X4
+ PADDQ X7, X5
+ PXOR X4, X2
+ PXOR X5, X3
+ PSHUFB X13, X2
+ PSHUFB X13, X3
+ PADDQ X10, X0
+ PADDQ X11, X1
+ PADDQ X2, X0
+ PADDQ X3, X1
+ PXOR X0, X6
+ PXOR X1, X7
+ PSHUFB X14, X6
+ PSHUFB X14, X7
+ PADDQ X6, X4
+ PADDQ X7, X5
+ PXOR X4, X2
+ PXOR X5, X3
+ MOVOU X2, X11
+ PADDQ X2, X11
+ PSRLQ $0x3f, X2
+ PXOR X11, X2
+ MOVOU X3, X11
+ PADDQ X3, X11
+ PSRLQ $0x3f, X3
+ PXOR X11, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X2, X8
+ PUNPCKLQDQ X2, X9
+ PUNPCKHQDQ X3, X2
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X3, X9
+ MOVO X8, X3
+ MOVO X6, X8
+ PUNPCKHQDQ X9, X3
+ PUNPCKLQDQ X7, X9
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X7
+ MOVQ 88(SI), X8
+ PINSRQ $0x01, 96(SI), X8
+ MOVQ 40(SI), X9
+ PINSRQ $0x01, 120(SI), X9
+ MOVQ 64(SI), X10
+ PINSRQ $0x01, (SI), X10
+ MOVQ 16(SI), X11
+ PINSRQ $0x01, 104(SI), X11
+ PADDQ X8, X0
+ PADDQ X9, X1
+ PADDQ X2, X0
+ PADDQ X3, X1
+ PXOR X0, X6
+ PXOR X1, X7
+ PSHUFD $0xb1, X6, X6
+ PSHUFD $0xb1, X7, X7
+ PADDQ X6, X4
+ PADDQ X7, X5
+ PXOR X4, X2
+ PXOR X5, X3
+ PSHUFB X13, X2
+ PSHUFB X13, X3
+ PADDQ X10, X0
+ PADDQ X11, X1
+ PADDQ X2, X0
+ PADDQ X3, X1
+ PXOR X0, X6
+ PXOR X1, X7
+ PSHUFB X14, X6
+ PSHUFB X14, X7
+ PADDQ X6, X4
+ PADDQ X7, X5
+ PXOR X4, X2
+ PXOR X5, X3
+ MOVOU X2, X11
+ PADDQ X2, X11
+ PSRLQ $0x3f, X2
+ PXOR X11, X2
+ MOVOU X3, X11
+ PADDQ X3, X11
+ PSRLQ $0x3f, X3
+ PXOR X11, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X6, X8
+ PUNPCKLQDQ X6, X9
+ PUNPCKHQDQ X7, X6
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X7, X9
+ MOVO X8, X7
+ MOVO X2, X8
+ PUNPCKHQDQ X9, X7
+ PUNPCKLQDQ X3, X9
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X3
+ MOVQ 80(SI), X8
+ PINSRQ $0x01, 24(SI), X8
+ MOVQ 56(SI), X9
+ PINSRQ $0x01, 72(SI), X9
+ MOVQ 112(SI), X10
+ PINSRQ $0x01, 48(SI), X10
+ MOVQ 8(SI), X11
+ PINSRQ $0x01, 32(SI), X11
+ PADDQ X8, X0
+ PADDQ X9, X1
+ PADDQ X2, X0
+ PADDQ X3, X1
+ PXOR X0, X6
+ PXOR X1, X7
+ PSHUFD $0xb1, X6, X6
+ PSHUFD $0xb1, X7, X7
+ PADDQ X6, X4
+ PADDQ X7, X5
+ PXOR X4, X2
+ PXOR X5, X3
+ PSHUFB X13, X2
+ PSHUFB X13, X3
+ PADDQ X10, X0
+ PADDQ X11, X1
+ PADDQ X2, X0
+ PADDQ X3, X1
+ PXOR X0, X6
+ PXOR X1, X7
+ PSHUFB X14, X6
+ PSHUFB X14, X7
+ PADDQ X6, X4
+ PADDQ X7, X5
+ PXOR X4, X2
+ PXOR X5, X3
+ MOVOU X2, X11
+ PADDQ X2, X11
+ PSRLQ $0x3f, X2
+ PXOR X11, X2
+ MOVOU X3, X11
+ PADDQ X3, X11
+ PSRLQ $0x3f, X3
+ PXOR X11, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X2, X8
+ PUNPCKLQDQ X2, X9
+ PUNPCKHQDQ X3, X2
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X3, X9
+ MOVO X8, X3
+ MOVO X6, X8
+ PUNPCKHQDQ X9, X3
+ PUNPCKLQDQ X7, X9
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X7
+ MOVQ 56(SI), X8
+ PINSRQ $0x01, 24(SI), X8
+ MOVQ 104(SI), X9
+ PINSRQ $0x01, 88(SI), X9
+ MOVQ 72(SI), X10
+ PINSRQ $0x01, 8(SI), X10
+ MOVQ 96(SI), X11
+ PINSRQ $0x01, 112(SI), X11
+ PADDQ X8, X0
+ PADDQ X9, X1
+ PADDQ X2, X0
+ PADDQ X3, X1
+ PXOR X0, X6
+ PXOR X1, X7
+ PSHUFD $0xb1, X6, X6
+ PSHUFD $0xb1, X7, X7
+ PADDQ X6, X4
+ PADDQ X7, X5
+ PXOR X4, X2
+ PXOR X5, X3
+ PSHUFB X13, X2
+ PSHUFB X13, X3
+ PADDQ X10, X0
+ PADDQ X11, X1
+ PADDQ X2, X0
+ PADDQ X3, X1
+ PXOR X0, X6
+ PXOR X1, X7
+ PSHUFB X14, X6
+ PSHUFB X14, X7
+ PADDQ X6, X4
+ PADDQ X7, X5
+ PXOR X4, X2
+ PXOR X5, X3
+ MOVOU X2, X11
+ PADDQ X2, X11
+ PSRLQ $0x3f, X2
+ PXOR X11, X2
+ MOVOU X3, X11
+ PADDQ X3, X11
+ PSRLQ $0x3f, X3
+ PXOR X11, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X6, X8
+ PUNPCKLQDQ X6, X9
+ PUNPCKHQDQ X7, X6
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X7, X9
+ MOVO X8, X7
+ MOVO X2, X8
+ PUNPCKHQDQ X9, X7
+ PUNPCKLQDQ X3, X9
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X3
+ MOVQ 16(SI), X8
+ PINSRQ $0x01, 40(SI), X8
+ MOVQ 32(SI), X9
+ PINSRQ $0x01, 120(SI), X9
+ MOVQ 48(SI), X10
+ PINSRQ $0x01, 80(SI), X10
+ MOVQ (SI), X11
+ PINSRQ $0x01, 64(SI), X11
+ PADDQ X8, X0
+ PADDQ X9, X1
+ PADDQ X2, X0
+ PADDQ X3, X1
+ PXOR X0, X6
+ PXOR X1, X7
+ PSHUFD $0xb1, X6, X6
+ PSHUFD $0xb1, X7, X7
+ PADDQ X6, X4
+ PADDQ X7, X5
+ PXOR X4, X2
+ PXOR X5, X3
+ PSHUFB X13, X2
+ PSHUFB X13, X3
+ PADDQ X10, X0
+ PADDQ X11, X1
+ PADDQ X2, X0
+ PADDQ X3, X1
+ PXOR X0, X6
+ PXOR X1, X7
+ PSHUFB X14, X6
+ PSHUFB X14, X7
+ PADDQ X6, X4
+ PADDQ X7, X5
+ PXOR X4, X2
+ PXOR X5, X3
+ MOVOU X2, X11
+ PADDQ X2, X11
+ PSRLQ $0x3f, X2
+ PXOR X11, X2
+ MOVOU X3, X11
+ PADDQ X3, X11
+ PSRLQ $0x3f, X3
+ PXOR X11, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X2, X8
+ PUNPCKLQDQ X2, X9
+ PUNPCKHQDQ X3, X2
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X3, X9
+ MOVO X8, X3
+ MOVO X6, X8
+ PUNPCKHQDQ X9, X3
+ PUNPCKLQDQ X7, X9
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X7
+ MOVQ 72(SI), X8
+ PINSRQ $0x01, 40(SI), X8
+ MOVQ 16(SI), X9
+ PINSRQ $0x01, 80(SI), X9
+ MOVQ (SI), X10
+ PINSRQ $0x01, 56(SI), X10
+ MOVQ 32(SI), X11
+ PINSRQ $0x01, 120(SI), X11
+ PADDQ X8, X0
+ PADDQ X9, X1
+ PADDQ X2, X0
+ PADDQ X3, X1
+ PXOR X0, X6
+ PXOR X1, X7
+ PSHUFD $0xb1, X6, X6
+ PSHUFD $0xb1, X7, X7
+ PADDQ X6, X4
+ PADDQ X7, X5
+ PXOR X4, X2
+ PXOR X5, X3
+ PSHUFB X13, X2
+ PSHUFB X13, X3
+ PADDQ X10, X0
+ PADDQ X11, X1
+ PADDQ X2, X0
+ PADDQ X3, X1
+ PXOR X0, X6
+ PXOR X1, X7
+ PSHUFB X14, X6
+ PSHUFB X14, X7
+ PADDQ X6, X4
+ PADDQ X7, X5
+ PXOR X4, X2
+ PXOR X5, X3
+ MOVOU X2, X11
+ PADDQ X2, X11
+ PSRLQ $0x3f, X2
+ PXOR X11, X2
+ MOVOU X3, X11
+ PADDQ X3, X11
+ PSRLQ $0x3f, X3
+ PXOR X11, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X6, X8
+ PUNPCKLQDQ X6, X9
+ PUNPCKHQDQ X7, X6
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X7, X9
+ MOVO X8, X7
+ MOVO X2, X8
+ PUNPCKHQDQ X9, X7
+ PUNPCKLQDQ X3, X9
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X3
+ MOVQ 112(SI), X8
+ PINSRQ $0x01, 88(SI), X8
+ MOVQ 48(SI), X9
+ PINSRQ $0x01, 24(SI), X9
+ MOVQ 8(SI), X10
+ PINSRQ $0x01, 96(SI), X10
+ MOVQ 64(SI), X11
+ PINSRQ $0x01, 104(SI), X11
+ PADDQ X8, X0
+ PADDQ X9, X1
+ PADDQ X2, X0
+ PADDQ X3, X1
+ PXOR X0, X6
+ PXOR X1, X7
+ PSHUFD $0xb1, X6, X6
+ PSHUFD $0xb1, X7, X7
+ PADDQ X6, X4
+ PADDQ X7, X5
+ PXOR X4, X2
+ PXOR X5, X3
+ PSHUFB X13, X2
+ PSHUFB X13, X3
+ PADDQ X10, X0
+ PADDQ X11, X1
+ PADDQ X2, X0
+ PADDQ X3, X1
+ PXOR X0, X6
+ PXOR X1, X7
+ PSHUFB X14, X6
+ PSHUFB X14, X7
+ PADDQ X6, X4
+ PADDQ X7, X5
+ PXOR X4, X2
+ PXOR X5, X3
+ MOVOU X2, X11
+ PADDQ X2, X11
+ PSRLQ $0x3f, X2
+ PXOR X11, X2
+ MOVOU X3, X11
+ PADDQ X3, X11
+ PSRLQ $0x3f, X3
+ PXOR X11, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X2, X8
+ PUNPCKLQDQ X2, X9
+ PUNPCKHQDQ X3, X2
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X3, X9
+ MOVO X8, X3
+ MOVO X6, X8
+ PUNPCKHQDQ X9, X3
+ PUNPCKLQDQ X7, X9
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X7
+ MOVQ 16(SI), X8
+ PINSRQ $0x01, 48(SI), X8
+ MOVQ (SI), X9
+ PINSRQ $0x01, 64(SI), X9
+ MOVQ 96(SI), X10
+ PINSRQ $0x01, 80(SI), X10
+ MOVQ 88(SI), X11
+ PINSRQ $0x01, 24(SI), X11
+ PADDQ X8, X0
+ PADDQ X9, X1
+ PADDQ X2, X0
+ PADDQ X3, X1
+ PXOR X0, X6
+ PXOR X1, X7
+ PSHUFD $0xb1, X6, X6
+ PSHUFD $0xb1, X7, X7
+ PADDQ X6, X4
+ PADDQ X7, X5
+ PXOR X4, X2
+ PXOR X5, X3
+ PSHUFB X13, X2
+ PSHUFB X13, X3
+ PADDQ X10, X0
+ PADDQ X11, X1
+ PADDQ X2, X0
+ PADDQ X3, X1
+ PXOR X0, X6
+ PXOR X1, X7
+ PSHUFB X14, X6
+ PSHUFB X14, X7
+ PADDQ X6, X4
+ PADDQ X7, X5
+ PXOR X4, X2
+ PXOR X5, X3
+ MOVOU X2, X11
+ PADDQ X2, X11
+ PSRLQ $0x3f, X2
+ PXOR X11, X2
+ MOVOU X3, X11
+ PADDQ X3, X11
+ PSRLQ $0x3f, X3
+ PXOR X11, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X6, X8
+ PUNPCKLQDQ X6, X9
+ PUNPCKHQDQ X7, X6
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X7, X9
+ MOVO X8, X7
+ MOVO X2, X8
+ PUNPCKHQDQ X9, X7
+ PUNPCKLQDQ X3, X9
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X3
+ MOVQ 32(SI), X8
+ PINSRQ $0x01, 56(SI), X8
+ MOVQ 120(SI), X9
+ PINSRQ $0x01, 8(SI), X9
+ MOVQ 104(SI), X10
+ PINSRQ $0x01, 40(SI), X10
+ MOVQ 112(SI), X11
+ PINSRQ $0x01, 72(SI), X11
+ PADDQ X8, X0
+ PADDQ X9, X1
+ PADDQ X2, X0
+ PADDQ X3, X1
+ PXOR X0, X6
+ PXOR X1, X7
+ PSHUFD $0xb1, X6, X6
+ PSHUFD $0xb1, X7, X7
+ PADDQ X6, X4
+ PADDQ X7, X5
+ PXOR X4, X2
+ PXOR X5, X3
+ PSHUFB X13, X2
+ PSHUFB X13, X3
+ PADDQ X10, X0
+ PADDQ X11, X1
+ PADDQ X2, X0
+ PADDQ X3, X1
+ PXOR X0, X6
+ PXOR X1, X7
+ PSHUFB X14, X6
+ PSHUFB X14, X7
+ PADDQ X6, X4
+ PADDQ X7, X5
+ PXOR X4, X2
+ PXOR X5, X3
+ MOVOU X2, X11
+ PADDQ X2, X11
+ PSRLQ $0x3f, X2
+ PXOR X11, X2
+ MOVOU X3, X11
+ PADDQ X3, X11
+ PSRLQ $0x3f, X3
+ PXOR X11, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X2, X8
+ PUNPCKLQDQ X2, X9
+ PUNPCKHQDQ X3, X2
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X3, X9
+ MOVO X8, X3
+ MOVO X6, X8
+ PUNPCKHQDQ X9, X3
+ PUNPCKLQDQ X7, X9
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X7
+ MOVQ 96(SI), X8
+ PINSRQ $0x01, 8(SI), X8
+ MOVQ 112(SI), X9
+ PINSRQ $0x01, 32(SI), X9
+ MOVQ 40(SI), X10
+ PINSRQ $0x01, 120(SI), X10
+ MOVQ 104(SI), X11
+ PINSRQ $0x01, 80(SI), X11
+ PADDQ X8, X0
+ PADDQ X9, X1
+ PADDQ X2, X0
+ PADDQ X3, X1
+ PXOR X0, X6
+ PXOR X1, X7
+ PSHUFD $0xb1, X6, X6
+ PSHUFD $0xb1, X7, X7
+ PADDQ X6, X4
+ PADDQ X7, X5
+ PXOR X4, X2
+ PXOR X5, X3
+ PSHUFB X13, X2
+ PSHUFB X13, X3
+ PADDQ X10, X0
+ PADDQ X11, X1
+ PADDQ X2, X0
+ PADDQ X3, X1
+ PXOR X0, X6
+ PXOR X1, X7
+ PSHUFB X14, X6
+ PSHUFB X14, X7
+ PADDQ X6, X4
+ PADDQ X7, X5
+ PXOR X4, X2
+ PXOR X5, X3
+ MOVOU X2, X11
+ PADDQ X2, X11
+ PSRLQ $0x3f, X2
+ PXOR X11, X2
+ MOVOU X3, X11
+ PADDQ X3, X11
+ PSRLQ $0x3f, X3
+ PXOR X11, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X6, X8
+ PUNPCKLQDQ X6, X9
+ PUNPCKHQDQ X7, X6
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X7, X9
+ MOVO X8, X7
+ MOVO X2, X8
+ PUNPCKHQDQ X9, X7
+ PUNPCKLQDQ X3, X9
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X3
+ MOVQ (SI), X8
+ PINSRQ $0x01, 48(SI), X8
+ MOVQ 72(SI), X9
+ PINSRQ $0x01, 64(SI), X9
+ MOVQ 56(SI), X10
+ PINSRQ $0x01, 24(SI), X10
+ MOVQ 16(SI), X11
+ PINSRQ $0x01, 88(SI), X11
+ PADDQ X8, X0
+ PADDQ X9, X1
+ PADDQ X2, X0
+ PADDQ X3, X1
+ PXOR X0, X6
+ PXOR X1, X7
+ PSHUFD $0xb1, X6, X6
+ PSHUFD $0xb1, X7, X7
+ PADDQ X6, X4
+ PADDQ X7, X5
+ PXOR X4, X2
+ PXOR X5, X3
+ PSHUFB X13, X2
+ PSHUFB X13, X3
+ PADDQ X10, X0
+ PADDQ X11, X1
+ PADDQ X2, X0
+ PADDQ X3, X1
+ PXOR X0, X6
+ PXOR X1, X7
+ PSHUFB X14, X6
+ PSHUFB X14, X7
+ PADDQ X6, X4
+ PADDQ X7, X5
+ PXOR X4, X2
+ PXOR X5, X3
+ MOVOU X2, X11
+ PADDQ X2, X11
+ PSRLQ $0x3f, X2
+ PXOR X11, X2
+ MOVOU X3, X11
+ PADDQ X3, X11
+ PSRLQ $0x3f, X3
+ PXOR X11, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X2, X8
+ PUNPCKLQDQ X2, X9
+ PUNPCKHQDQ X3, X2
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X3, X9
+ MOVO X8, X3
+ MOVO X6, X8
+ PUNPCKHQDQ X9, X3
+ PUNPCKLQDQ X7, X9
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X7
+ MOVQ 104(SI), X8
+ PINSRQ $0x01, 56(SI), X8
+ MOVQ 96(SI), X9
+ PINSRQ $0x01, 24(SI), X9
+ MOVQ 88(SI), X10
+ PINSRQ $0x01, 112(SI), X10
+ MOVQ 8(SI), X11
+ PINSRQ $0x01, 72(SI), X11
+ PADDQ X8, X0
+ PADDQ X9, X1
+ PADDQ X2, X0
+ PADDQ X3, X1
+ PXOR X0, X6
+ PXOR X1, X7
+ PSHUFD $0xb1, X6, X6
+ PSHUFD $0xb1, X7, X7
+ PADDQ X6, X4
+ PADDQ X7, X5
+ PXOR X4, X2
+ PXOR X5, X3
+ PSHUFB X13, X2
+ PSHUFB X13, X3
+ PADDQ X10, X0
+ PADDQ X11, X1
+ PADDQ X2, X0
+ PADDQ X3, X1
+ PXOR X0, X6
+ PXOR X1, X7
+ PSHUFB X14, X6
+ PSHUFB X14, X7
+ PADDQ X6, X4
+ PADDQ X7, X5
+ PXOR X4, X2
+ PXOR X5, X3
+ MOVOU X2, X11
+ PADDQ X2, X11
+ PSRLQ $0x3f, X2
+ PXOR X11, X2
+ MOVOU X3, X11
+ PADDQ X3, X11
+ PSRLQ $0x3f, X3
+ PXOR X11, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X6, X8
+ PUNPCKLQDQ X6, X9
+ PUNPCKHQDQ X7, X6
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X7, X9
+ MOVO X8, X7
+ MOVO X2, X8
+ PUNPCKHQDQ X9, X7
+ PUNPCKLQDQ X3, X9
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X3
+ MOVQ 40(SI), X8
+ PINSRQ $0x01, 120(SI), X8
+ MOVQ 64(SI), X9
+ PINSRQ $0x01, 16(SI), X9
+ MOVQ (SI), X10
+ PINSRQ $0x01, 32(SI), X10
+ MOVQ 48(SI), X11
+ PINSRQ $0x01, 80(SI), X11
+ PADDQ X8, X0
+ PADDQ X9, X1
+ PADDQ X2, X0
+ PADDQ X3, X1
+ PXOR X0, X6
+ PXOR X1, X7
+ PSHUFD $0xb1, X6, X6
+ PSHUFD $0xb1, X7, X7
+ PADDQ X6, X4
+ PADDQ X7, X5
+ PXOR X4, X2
+ PXOR X5, X3
+ PSHUFB X13, X2
+ PSHUFB X13, X3
+ PADDQ X10, X0
+ PADDQ X11, X1
+ PADDQ X2, X0
+ PADDQ X3, X1
+ PXOR X0, X6
+ PXOR X1, X7
+ PSHUFB X14, X6
+ PSHUFB X14, X7
+ PADDQ X6, X4
+ PADDQ X7, X5
+ PXOR X4, X2
+ PXOR X5, X3
+ MOVOU X2, X11
+ PADDQ X2, X11
+ PSRLQ $0x3f, X2
+ PXOR X11, X2
+ MOVOU X3, X11
+ PADDQ X3, X11
+ PSRLQ $0x3f, X3
+ PXOR X11, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X2, X8
+ PUNPCKLQDQ X2, X9
+ PUNPCKHQDQ X3, X2
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X3, X9
+ MOVO X8, X3
+ MOVO X6, X8
+ PUNPCKHQDQ X9, X3
+ PUNPCKLQDQ X7, X9
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X7
+ MOVQ 48(SI), X8
+ PINSRQ $0x01, 112(SI), X8
+ MOVQ 88(SI), X9
+ PINSRQ $0x01, (SI), X9
+ MOVQ 120(SI), X10
+ PINSRQ $0x01, 72(SI), X10
+ MOVQ 24(SI), X11
+ PINSRQ $0x01, 64(SI), X11
+ PADDQ X8, X0
+ PADDQ X9, X1
+ PADDQ X2, X0
+ PADDQ X3, X1
+ PXOR X0, X6
+ PXOR X1, X7
+ PSHUFD $0xb1, X6, X6
+ PSHUFD $0xb1, X7, X7
+ PADDQ X6, X4
+ PADDQ X7, X5
+ PXOR X4, X2
+ PXOR X5, X3
+ PSHUFB X13, X2
+ PSHUFB X13, X3
+ PADDQ X10, X0
+ PADDQ X11, X1
+ PADDQ X2, X0
+ PADDQ X3, X1
+ PXOR X0, X6
+ PXOR X1, X7
+ PSHUFB X14, X6
+ PSHUFB X14, X7
+ PADDQ X6, X4
+ PADDQ X7, X5
+ PXOR X4, X2
+ PXOR X5, X3
+ MOVOU X2, X11
+ PADDQ X2, X11
+ PSRLQ $0x3f, X2
+ PXOR X11, X2
+ MOVOU X3, X11
+ PADDQ X3, X11
+ PSRLQ $0x3f, X3
+ PXOR X11, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X6, X8
+ PUNPCKLQDQ X6, X9
+ PUNPCKHQDQ X7, X6
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X7, X9
+ MOVO X8, X7
+ MOVO X2, X8
+ PUNPCKHQDQ X9, X7
+ PUNPCKLQDQ X3, X9
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X3
+ MOVQ 96(SI), X8
+ PINSRQ $0x01, 104(SI), X8
+ MOVQ 8(SI), X9
+ PINSRQ $0x01, 80(SI), X9
+ MOVQ 16(SI), X10
+ PINSRQ $0x01, 56(SI), X10
+ MOVQ 32(SI), X11
+ PINSRQ $0x01, 40(SI), X11
+ PADDQ X8, X0
+ PADDQ X9, X1
+ PADDQ X2, X0
+ PADDQ X3, X1
+ PXOR X0, X6
+ PXOR X1, X7
+ PSHUFD $0xb1, X6, X6
+ PSHUFD $0xb1, X7, X7
+ PADDQ X6, X4
+ PADDQ X7, X5
+ PXOR X4, X2
+ PXOR X5, X3
+ PSHUFB X13, X2
+ PSHUFB X13, X3
+ PADDQ X10, X0
+ PADDQ X11, X1
+ PADDQ X2, X0
+ PADDQ X3, X1
+ PXOR X0, X6
+ PXOR X1, X7
+ PSHUFB X14, X6
+ PSHUFB X14, X7
+ PADDQ X6, X4
+ PADDQ X7, X5
+ PXOR X4, X2
+ PXOR X5, X3
+ MOVOU X2, X11
+ PADDQ X2, X11
+ PSRLQ $0x3f, X2
+ PXOR X11, X2
+ MOVOU X3, X11
+ PADDQ X3, X11
+ PSRLQ $0x3f, X3
+ PXOR X11, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X2, X8
+ PUNPCKLQDQ X2, X9
+ PUNPCKHQDQ X3, X2
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X3, X9
+ MOVO X8, X3
+ MOVO X6, X8
+ PUNPCKHQDQ X9, X3
+ PUNPCKLQDQ X7, X9
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X7
+ MOVQ 80(SI), X8
+ PINSRQ $0x01, 64(SI), X8
+ MOVQ 56(SI), X9
+ PINSRQ $0x01, 8(SI), X9
+ MOVQ 16(SI), X10
+ PINSRQ $0x01, 32(SI), X10
+ MOVQ 48(SI), X11
+ PINSRQ $0x01, 40(SI), X11
+ PADDQ X8, X0
+ PADDQ X9, X1
+ PADDQ X2, X0
+ PADDQ X3, X1
+ PXOR X0, X6
+ PXOR X1, X7
+ PSHUFD $0xb1, X6, X6
+ PSHUFD $0xb1, X7, X7
+ PADDQ X6, X4
+ PADDQ X7, X5
+ PXOR X4, X2
+ PXOR X5, X3
+ PSHUFB X13, X2
+ PSHUFB X13, X3
+ PADDQ X10, X0
+ PADDQ X11, X1
+ PADDQ X2, X0
+ PADDQ X3, X1
+ PXOR X0, X6
+ PXOR X1, X7
+ PSHUFB X14, X6
+ PSHUFB X14, X7
+ PADDQ X6, X4
+ PADDQ X7, X5
+ PXOR X4, X2
+ PXOR X5, X3
+ MOVOU X2, X11
+ PADDQ X2, X11
+ PSRLQ $0x3f, X2
+ PXOR X11, X2
+ MOVOU X3, X11
+ PADDQ X3, X11
+ PSRLQ $0x3f, X3
+ PXOR X11, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X6, X8
+ PUNPCKLQDQ X6, X9
+ PUNPCKHQDQ X7, X6
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X7, X9
+ MOVO X8, X7
+ MOVO X2, X8
+ PUNPCKHQDQ X9, X7
+ PUNPCKLQDQ X3, X9
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X3
+ MOVQ 120(SI), X8
+ PINSRQ $0x01, 72(SI), X8
+ MOVQ 24(SI), X9
+ PINSRQ $0x01, 104(SI), X9
+ MOVQ 88(SI), X10
+ PINSRQ $0x01, 112(SI), X10
+ MOVQ 96(SI), X11
+ PINSRQ $0x01, (SI), X11
+ PADDQ X8, X0
+ PADDQ X9, X1
+ PADDQ X2, X0
+ PADDQ X3, X1
+ PXOR X0, X6
+ PXOR X1, X7
+ PSHUFD $0xb1, X6, X6
+ PSHUFD $0xb1, X7, X7
+ PADDQ X6, X4
+ PADDQ X7, X5
+ PXOR X4, X2
+ PXOR X5, X3
+ PSHUFB X13, X2
+ PSHUFB X13, X3
+ PADDQ X10, X0
+ PADDQ X11, X1
+ PADDQ X2, X0
+ PADDQ X3, X1
+ PXOR X0, X6
+ PXOR X1, X7
+ PSHUFB X14, X6
+ PSHUFB X14, X7
+ PADDQ X6, X4
+ PADDQ X7, X5
+ PXOR X4, X2
+ PXOR X5, X3
+ MOVOU X2, X11
+ PADDQ X2, X11
+ PSRLQ $0x3f, X2
+ PXOR X11, X2
+ MOVOU X3, X11
+ PADDQ X3, X11
+ PSRLQ $0x3f, X3
+ PXOR X11, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X2, X8
+ PUNPCKLQDQ X2, X9
+ PUNPCKHQDQ X3, X2
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X3, X9
+ MOVO X8, X3
+ MOVO X6, X8
+ PUNPCKHQDQ X9, X3
+ PUNPCKLQDQ X7, X9
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X7
+ PADDQ 16(R10), X0
+ PADDQ 32(R10), X1
+ PADDQ X2, X0
+ PADDQ X3, X1
+ PXOR X0, X6
+ PXOR X1, X7
+ PSHUFD $0xb1, X6, X6
+ PSHUFD $0xb1, X7, X7
+ PADDQ X6, X4
+ PADDQ X7, X5
+ PXOR X4, X2
+ PXOR X5, X3
+ PSHUFB X13, X2
+ PSHUFB X13, X3
+ PADDQ 48(R10), X0
+ PADDQ 64(R10), X1
+ PADDQ X2, X0
+ PADDQ X3, X1
+ PXOR X0, X6
+ PXOR X1, X7
+ PSHUFB X14, X6
+ PSHUFB X14, X7
+ PADDQ X6, X4
+ PADDQ X7, X5
+ PXOR X4, X2
+ PXOR X5, X3
+ MOVOU X2, X11
+ PADDQ X2, X11
+ PSRLQ $0x3f, X2
+ PXOR X11, X2
+ MOVOU X3, X11
+ PADDQ X3, X11
+ PSRLQ $0x3f, X3
+ PXOR X11, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X6, X8
+ PUNPCKLQDQ X6, X9
+ PUNPCKHQDQ X7, X6
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X7, X9
+ MOVO X8, X7
+ MOVO X2, X8
+ PUNPCKHQDQ X9, X7
+ PUNPCKLQDQ X3, X9
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X3
+ PADDQ 80(R10), X0
+ PADDQ 96(R10), X1
+ PADDQ X2, X0
+ PADDQ X3, X1
+ PXOR X0, X6
+ PXOR X1, X7
+ PSHUFD $0xb1, X6, X6
+ PSHUFD $0xb1, X7, X7
+ PADDQ X6, X4
+ PADDQ X7, X5
+ PXOR X4, X2
+ PXOR X5, X3
+ PSHUFB X13, X2
+ PSHUFB X13, X3
+ PADDQ 112(R10), X0
+ PADDQ 128(R10), X1
+ PADDQ X2, X0
+ PADDQ X3, X1
+ PXOR X0, X6
+ PXOR X1, X7
+ PSHUFB X14, X6
+ PSHUFB X14, X7
+ PADDQ X6, X4
+ PADDQ X7, X5
+ PXOR X4, X2
+ PXOR X5, X3
+ MOVOU X2, X11
+ PADDQ X2, X11
+ PSRLQ $0x3f, X2
+ PXOR X11, X2
+ MOVOU X3, X11
+ PADDQ X3, X11
+ PSRLQ $0x3f, X3
+ PXOR X11, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X2, X8
+ PUNPCKLQDQ X2, X9
+ PUNPCKHQDQ X3, X2
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X3, X9
+ MOVO X8, X3
+ MOVO X6, X8
+ PUNPCKHQDQ X9, X3
+ PUNPCKLQDQ X7, X9
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X7
+ PADDQ 144(R10), X0
+ PADDQ 160(R10), X1
+ PADDQ X2, X0
+ PADDQ X3, X1
+ PXOR X0, X6
+ PXOR X1, X7
+ PSHUFD $0xb1, X6, X6
+ PSHUFD $0xb1, X7, X7
+ PADDQ X6, X4
+ PADDQ X7, X5
+ PXOR X4, X2
+ PXOR X5, X3
+ PSHUFB X13, X2
+ PSHUFB X13, X3
+ PADDQ 176(R10), X0
+ PADDQ 192(R10), X1
+ PADDQ X2, X0
+ PADDQ X3, X1
+ PXOR X0, X6
+ PXOR X1, X7
+ PSHUFB X14, X6
+ PSHUFB X14, X7
+ PADDQ X6, X4
+ PADDQ X7, X5
+ PXOR X4, X2
+ PXOR X5, X3
+ MOVOU X2, X11
+ PADDQ X2, X11
+ PSRLQ $0x3f, X2
+ PXOR X11, X2
+ MOVOU X3, X11
+ PADDQ X3, X11
+ PSRLQ $0x3f, X3
+ PXOR X11, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X6, X8
+ PUNPCKLQDQ X6, X9
+ PUNPCKHQDQ X7, X6
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X7, X9
+ MOVO X8, X7
+ MOVO X2, X8
+ PUNPCKHQDQ X9, X7
+ PUNPCKLQDQ X3, X9
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X3
+ PADDQ 208(R10), X0
+ PADDQ 224(R10), X1
+ PADDQ X2, X0
+ PADDQ X3, X1
+ PXOR X0, X6
+ PXOR X1, X7
+ PSHUFD $0xb1, X6, X6
+ PSHUFD $0xb1, X7, X7
+ PADDQ X6, X4
+ PADDQ X7, X5
+ PXOR X4, X2
+ PXOR X5, X3
+ PSHUFB X13, X2
+ PSHUFB X13, X3
+ PADDQ 240(R10), X0
+ PADDQ 256(R10), X1
+ PADDQ X2, X0
+ PADDQ X3, X1
+ PXOR X0, X6
+ PXOR X1, X7
+ PSHUFB X14, X6
+ PSHUFB X14, X7
+ PADDQ X6, X4
+ PADDQ X7, X5
+ PXOR X4, X2
+ PXOR X5, X3
+ MOVOU X2, X11
+ PADDQ X2, X11
+ PSRLQ $0x3f, X2
+ PXOR X11, X2
+ MOVOU X3, X11
+ PADDQ X3, X11
+ PSRLQ $0x3f, X3
+ PXOR X11, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X2, X8
+ PUNPCKLQDQ X2, X9
+ PUNPCKHQDQ X3, X2
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X3, X9
+ MOVO X8, X3
+ MOVO X6, X8
+ PUNPCKHQDQ X9, X3
+ PUNPCKLQDQ X7, X9
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X7
+ MOVOU 32(AX), X10
+ MOVOU 48(AX), X11
+ PXOR X0, X12
+ PXOR X1, X15
+ PXOR X2, X10
+ PXOR X3, X11
+ PXOR X4, X12
+ PXOR X5, X15
+ PXOR X6, X10
+ PXOR X7, X11
+ MOVOU X10, 32(AX)
+ MOVOU X11, 48(AX)
+ LEAQ 128(SI), SI
+ SUBQ $0x80, DI
+ JNE loop
+ MOVOU X12, (AX)
+ MOVOU X15, 16(AX)
+ MOVQ R8, (BX)
+ MOVQ R9, 8(BX)
+ RET
- HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, 144(R10), 160(R10), 176(R10), 192(R10), X11, X13, X14)
- SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
- HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, 208(R10), 224(R10), 240(R10), 256(R10), X11, X13, X14)
- SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
+DATA ·iv3<>+0(SB)/8, $0x1f83d9abfb41bd6b
+DATA ·iv3<>+8(SB)/8, $0x5be0cd19137e2179
+GLOBL ·iv3<>(SB), RODATA|NOPTR, $16
- MOVOU 32(AX), X10
- MOVOU 48(AX), X11
- PXOR X0, X12
- PXOR X1, X15
- PXOR X2, X10
- PXOR X3, X11
- PXOR X4, X12
- PXOR X5, X15
- PXOR X6, X10
- PXOR X7, X11
- MOVOU X10, 32(AX)
- MOVOU X11, 48(AX)
+DATA ·c40<>+0(SB)/8, $0x0201000706050403
+DATA ·c40<>+8(SB)/8, $0x0a09080f0e0d0c0b
+GLOBL ·c40<>(SB), RODATA|NOPTR, $16
- LEAQ 128(SI), SI
- SUBQ $128, DI
- JNE loop
+DATA ·c48<>+0(SB)/8, $0x0100070605040302
+DATA ·c48<>+8(SB)/8, $0x09080f0e0d0c0b0a
+GLOBL ·c48<>(SB), RODATA|NOPTR, $16
- MOVOU X12, 0(AX)
- MOVOU X15, 16(AX)
+DATA ·iv0<>+0(SB)/8, $0x6a09e667f3bcc908
+DATA ·iv0<>+8(SB)/8, $0xbb67ae8584caa73b
+GLOBL ·iv0<>(SB), RODATA|NOPTR, $16
- MOVQ R8, 0(BX)
- MOVQ R9, 8(BX)
+DATA ·iv1<>+0(SB)/8, $0x3c6ef372fe94f82b
+DATA ·iv1<>+8(SB)/8, $0xa54ff53a5f1d36f1
+GLOBL ·iv1<>(SB), RODATA|NOPTR, $16
- RET
+DATA ·iv2<>+0(SB)/8, $0x510e527fade682d1
+DATA ·iv2<>+8(SB)/8, $0x9b05688c2b3e6c1f
+GLOBL ·iv2<>(SB), RODATA|NOPTR, $16
diff --git a/vendor/golang.org/x/crypto/internal/poly1305/sum_amd64.s b/vendor/golang.org/x/crypto/internal/poly1305/sum_amd64.s
index e0d3c647..13375738 100644
--- a/vendor/golang.org/x/crypto/internal/poly1305/sum_amd64.s
+++ b/vendor/golang.org/x/crypto/internal/poly1305/sum_amd64.s
@@ -1,108 +1,93 @@
-// Copyright 2012 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
+// Code generated by command: go run sum_amd64_asm.go -out ../sum_amd64.s -pkg poly1305. DO NOT EDIT.
//go:build gc && !purego
-#include "textflag.h"
-
-#define POLY1305_ADD(msg, h0, h1, h2) \
- ADDQ 0(msg), h0; \
- ADCQ 8(msg), h1; \
- ADCQ $1, h2; \
- LEAQ 16(msg), msg
-
-#define POLY1305_MUL(h0, h1, h2, r0, r1, t0, t1, t2, t3) \
- MOVQ r0, AX; \
- MULQ h0; \
- MOVQ AX, t0; \
- MOVQ DX, t1; \
- MOVQ r0, AX; \
- MULQ h1; \
- ADDQ AX, t1; \
- ADCQ $0, DX; \
- MOVQ r0, t2; \
- IMULQ h2, t2; \
- ADDQ DX, t2; \
- \
- MOVQ r1, AX; \
- MULQ h0; \
- ADDQ AX, t1; \
- ADCQ $0, DX; \
- MOVQ DX, h0; \
- MOVQ r1, t3; \
- IMULQ h2, t3; \
- MOVQ r1, AX; \
- MULQ h1; \
- ADDQ AX, t2; \
- ADCQ DX, t3; \
- ADDQ h0, t2; \
- ADCQ $0, t3; \
- \
- MOVQ t0, h0; \
- MOVQ t1, h1; \
- MOVQ t2, h2; \
- ANDQ $3, h2; \
- MOVQ t2, t0; \
- ANDQ $0xFFFFFFFFFFFFFFFC, t0; \
- ADDQ t0, h0; \
- ADCQ t3, h1; \
- ADCQ $0, h2; \
- SHRQ $2, t3, t2; \
- SHRQ $2, t3; \
- ADDQ t2, h0; \
- ADCQ t3, h1; \
- ADCQ $0, h2
-
-// func update(state *[7]uint64, msg []byte)
+// func update(state *macState, msg []byte)
TEXT ·update(SB), $0-32
MOVQ state+0(FP), DI
MOVQ msg_base+8(FP), SI
MOVQ msg_len+16(FP), R15
-
- MOVQ 0(DI), R8 // h0
- MOVQ 8(DI), R9 // h1
- MOVQ 16(DI), R10 // h2
- MOVQ 24(DI), R11 // r0
- MOVQ 32(DI), R12 // r1
-
- CMPQ R15, $16
+ MOVQ (DI), R8
+ MOVQ 8(DI), R9
+ MOVQ 16(DI), R10
+ MOVQ 24(DI), R11
+ MOVQ 32(DI), R12
+ CMPQ R15, $0x10
JB bytes_between_0_and_15
loop:
- POLY1305_ADD(SI, R8, R9, R10)
+ ADDQ (SI), R8
+ ADCQ 8(SI), R9
+ ADCQ $0x01, R10
+ LEAQ 16(SI), SI
multiply:
- POLY1305_MUL(R8, R9, R10, R11, R12, BX, CX, R13, R14)
- SUBQ $16, R15
- CMPQ R15, $16
- JAE loop
+ MOVQ R11, AX
+ MULQ R8
+ MOVQ AX, BX
+ MOVQ DX, CX
+ MOVQ R11, AX
+ MULQ R9
+ ADDQ AX, CX
+ ADCQ $0x00, DX
+ MOVQ R11, R13
+ IMULQ R10, R13
+ ADDQ DX, R13
+ MOVQ R12, AX
+ MULQ R8
+ ADDQ AX, CX
+ ADCQ $0x00, DX
+ MOVQ DX, R8
+ MOVQ R12, R14
+ IMULQ R10, R14
+ MOVQ R12, AX
+ MULQ R9
+ ADDQ AX, R13
+ ADCQ DX, R14
+ ADDQ R8, R13
+ ADCQ $0x00, R14
+ MOVQ BX, R8
+ MOVQ CX, R9
+ MOVQ R13, R10
+ ANDQ $0x03, R10
+ MOVQ R13, BX
+ ANDQ $-4, BX
+ ADDQ BX, R8
+ ADCQ R14, R9
+ ADCQ $0x00, R10
+ SHRQ $0x02, R14, R13
+ SHRQ $0x02, R14
+ ADDQ R13, R8
+ ADCQ R14, R9
+ ADCQ $0x00, R10
+ SUBQ $0x10, R15
+ CMPQ R15, $0x10
+ JAE loop
bytes_between_0_and_15:
TESTQ R15, R15
JZ done
- MOVQ $1, BX
+ MOVQ $0x00000001, BX
XORQ CX, CX
XORQ R13, R13
ADDQ R15, SI
flush_buffer:
- SHLQ $8, BX, CX
- SHLQ $8, BX
+ SHLQ $0x08, BX, CX
+ SHLQ $0x08, BX
MOVB -1(SI), R13
XORQ R13, BX
DECQ SI
DECQ R15
JNZ flush_buffer
-
ADDQ BX, R8
ADCQ CX, R9
- ADCQ $0, R10
- MOVQ $16, R15
+ ADCQ $0x00, R10
+ MOVQ $0x00000010, R15
JMP multiply
done:
- MOVQ R8, 0(DI)
+ MOVQ R8, (DI)
MOVQ R9, 8(DI)
MOVQ R10, 16(DI)
RET
diff --git a/vendor/golang.org/x/crypto/ssh/agent/keyring.go b/vendor/golang.org/x/crypto/ssh/agent/keyring.go
index 21bfa870..c1b43610 100644
--- a/vendor/golang.org/x/crypto/ssh/agent/keyring.go
+++ b/vendor/golang.org/x/crypto/ssh/agent/keyring.go
@@ -175,6 +175,15 @@ func (r *keyring) Add(key AddedKey) error {
p.expire = &t
}
+ // If we already have a Signer with the same public key, replace it with the
+ // new one.
+ for idx, k := range r.keys {
+ if bytes.Equal(k.signer.PublicKey().Marshal(), p.signer.PublicKey().Marshal()) {
+ r.keys[idx] = p
+ return nil
+ }
+ }
+
r.keys = append(r.keys, p)
return nil
diff --git a/vendor/golang.org/x/sys/cpu/cpu.go b/vendor/golang.org/x/sys/cpu/cpu.go
index ec07aab0..02609d5b 100644
--- a/vendor/golang.org/x/sys/cpu/cpu.go
+++ b/vendor/golang.org/x/sys/cpu/cpu.go
@@ -201,6 +201,25 @@ var S390X struct {
_ CacheLinePad
}
+// RISCV64 contains the supported CPU features and performance characteristics for riscv64
+// platforms. The booleans in RISCV64, with the exception of HasFastMisaligned, indicate
+// the presence of RISC-V extensions.
+//
+// It is safe to assume that all the RV64G extensions are supported and so they are omitted from
+// this structure. As riscv64 Go programs require at least RV64G, the code that populates
+// this structure cannot run successfully if some of the RV64G extensions are missing.
+// The struct is padded to avoid false sharing.
+var RISCV64 struct {
+ _ CacheLinePad
+ HasFastMisaligned bool // Fast misaligned accesses
+ HasC bool // Compressed instruction-set extension
+ HasV bool // Vector extension compatible with RVV 1.0
+ HasZba bool // Address generation instructions extension
+ HasZbb bool // Basic bit-manipulation extension
+ HasZbs bool // Single-bit instructions extension
+ _ CacheLinePad
+}
+
func init() {
archInit()
initOptions()
diff --git a/vendor/golang.org/x/sys/cpu/cpu_linux_noinit.go b/vendor/golang.org/x/sys/cpu/cpu_linux_noinit.go
index cd63e733..7d902b68 100644
--- a/vendor/golang.org/x/sys/cpu/cpu_linux_noinit.go
+++ b/vendor/golang.org/x/sys/cpu/cpu_linux_noinit.go
@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-//go:build linux && !arm && !arm64 && !mips64 && !mips64le && !ppc64 && !ppc64le && !s390x
+//go:build linux && !arm && !arm64 && !mips64 && !mips64le && !ppc64 && !ppc64le && !s390x && !riscv64
package cpu
diff --git a/vendor/golang.org/x/sys/cpu/cpu_linux_riscv64.go b/vendor/golang.org/x/sys/cpu/cpu_linux_riscv64.go
new file mode 100644
index 00000000..cb4a0c57
--- /dev/null
+++ b/vendor/golang.org/x/sys/cpu/cpu_linux_riscv64.go
@@ -0,0 +1,137 @@
+// Copyright 2024 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package cpu
+
+import (
+ "syscall"
+ "unsafe"
+)
+
+// RISC-V extension discovery code for Linux. The approach here is to first try the riscv_hwprobe
+// syscall falling back to HWCAP to check for the C extension if riscv_hwprobe is not available.
+//
+// A note on detection of the Vector extension using HWCAP.
+//
+// Support for the Vector extension version 1.0 was added to the Linux kernel in release 6.5.
+// Support for the riscv_hwprobe syscall was added in 6.4. It follows that if the riscv_hwprobe
+// syscall is not available then neither is the Vector extension (which needs kernel support).
+// The riscv_hwprobe syscall should then be all we need to detect the Vector extension.
+// However, some RISC-V board manufacturers ship boards with an older kernel on top of which
+// they have back-ported various versions of the Vector extension patches but not the riscv_hwprobe
+// patches. These kernels advertise support for the Vector extension using HWCAP. Falling
+// back to HWCAP to detect the Vector extension, if riscv_hwprobe is not available, or simply not
+// bothering with riscv_hwprobe at all and just using HWCAP may then seem like an attractive option.
+//
+// Unfortunately, simply checking the 'V' bit in AT_HWCAP will not work as this bit is used by
+// RISC-V board and cloud instance providers to mean different things. The Lichee Pi 4A board
+// and the Scaleway RV1 cloud instances use the 'V' bit to advertise their support for the unratified
+// 0.7.1 version of the Vector Specification. The Banana Pi BPI-F3 and the CanMV-K230 board use
+// it to advertise support for 1.0 of the Vector extension. Versions 0.7.1 and 1.0 of the Vector
+// extension are binary incompatible. HWCAP can then not be used in isolation to populate the
+// HasV field as this field indicates that the underlying CPU is compatible with RVV 1.0.
+//
+// There is a way at runtime to distinguish between versions 0.7.1 and 1.0 of the Vector
+// specification by issuing a RVV 1.0 vsetvli instruction and checking the vill bit of the vtype
+// register. This check would allow us to safely detect version 1.0 of the Vector extension
+// with HWCAP, if riscv_hwprobe were not available. However, the check cannot
+// be added until the assembler supports the Vector instructions.
+//
+// Note the riscv_hwprobe syscall does not suffer from these ambiguities by design as all of the
+// extensions it advertises support for are explicitly versioned. It's also worth noting that
+// the riscv_hwprobe syscall is the only way to detect multi-letter RISC-V extensions, e.g., Zba.
+// These cannot be detected using HWCAP and so riscv_hwprobe must be used to detect the majority
+// of RISC-V extensions.
+//
+// Please see https://docs.kernel.org/arch/riscv/hwprobe.html for more information.
+
+// golang.org/x/sys/cpu is not allowed to depend on golang.org/x/sys/unix so we must
+// reproduce the constants, types and functions needed to make the riscv_hwprobe syscall
+// here.
+
+const (
+ // Copied from golang.org/x/sys/unix/ztypes_linux_riscv64.go.
+ riscv_HWPROBE_KEY_IMA_EXT_0 = 0x4
+ riscv_HWPROBE_IMA_C = 0x2
+ riscv_HWPROBE_IMA_V = 0x4
+ riscv_HWPROBE_EXT_ZBA = 0x8
+ riscv_HWPROBE_EXT_ZBB = 0x10
+ riscv_HWPROBE_EXT_ZBS = 0x20
+ riscv_HWPROBE_KEY_CPUPERF_0 = 0x5
+ riscv_HWPROBE_MISALIGNED_FAST = 0x3
+ riscv_HWPROBE_MISALIGNED_MASK = 0x7
+)
+
+const (
+ // sys_RISCV_HWPROBE is copied from golang.org/x/sys/unix/zsysnum_linux_riscv64.go.
+ sys_RISCV_HWPROBE = 258
+)
+
+// riscvHWProbePairs is copied from golang.org/x/sys/unix/ztypes_linux_riscv64.go.
+type riscvHWProbePairs struct {
+ key int64
+ value uint64
+}
+
+const (
+ // CPU features
+ hwcap_RISCV_ISA_C = 1 << ('C' - 'A')
+)
+
+func doinit() {
+ // A slice of key/value pair structures is passed to the RISCVHWProbe syscall. The key
+ // field should be initialised with one of the key constants defined above, e.g.,
+ // RISCV_HWPROBE_KEY_IMA_EXT_0. The syscall will set the value field to the appropriate value.
+ // If the kernel does not recognise a key it will set the key field to -1 and the value field to 0.
+
+ pairs := []riscvHWProbePairs{
+ {riscv_HWPROBE_KEY_IMA_EXT_0, 0},
+ {riscv_HWPROBE_KEY_CPUPERF_0, 0},
+ }
+
+ // This call only indicates that extensions are supported if they are implemented on all cores.
+ if riscvHWProbe(pairs, 0) {
+ if pairs[0].key != -1 {
+ v := uint(pairs[0].value)
+ RISCV64.HasC = isSet(v, riscv_HWPROBE_IMA_C)
+ RISCV64.HasV = isSet(v, riscv_HWPROBE_IMA_V)
+ RISCV64.HasZba = isSet(v, riscv_HWPROBE_EXT_ZBA)
+ RISCV64.HasZbb = isSet(v, riscv_HWPROBE_EXT_ZBB)
+ RISCV64.HasZbs = isSet(v, riscv_HWPROBE_EXT_ZBS)
+ }
+ if pairs[1].key != -1 {
+ v := pairs[1].value & riscv_HWPROBE_MISALIGNED_MASK
+ RISCV64.HasFastMisaligned = v == riscv_HWPROBE_MISALIGNED_FAST
+ }
+ }
+
+ // Let's double check with HWCAP if the C extension does not appear to be supported.
+ // This may happen if we're running on a kernel older than 6.4.
+
+ if !RISCV64.HasC {
+ RISCV64.HasC = isSet(hwCap, hwcap_RISCV_ISA_C)
+ }
+}
+
+func isSet(hwc uint, value uint) bool {
+ return hwc&value != 0
+}
+
+// riscvHWProbe is a simplified version of the generated wrapper function found in
+// golang.org/x/sys/unix/zsyscall_linux_riscv64.go. We simplify it by removing the
+// cpuCount and cpus parameters which we do not need. We always want to pass 0 for
+// these parameters here so the kernel only reports the extensions that are present
+// on all cores.
+func riscvHWProbe(pairs []riscvHWProbePairs, flags uint) bool {
+ var _zero uintptr
+ var p0 unsafe.Pointer
+ if len(pairs) > 0 {
+ p0 = unsafe.Pointer(&pairs[0])
+ } else {
+ p0 = unsafe.Pointer(&_zero)
+ }
+
+ _, _, e1 := syscall.Syscall6(sys_RISCV_HWPROBE, uintptr(p0), uintptr(len(pairs)), uintptr(0), uintptr(0), uintptr(flags), 0)
+ return e1 == 0
+}
diff --git a/vendor/golang.org/x/sys/cpu/cpu_riscv64.go b/vendor/golang.org/x/sys/cpu/cpu_riscv64.go
index 7f0c79c0..aca3199c 100644
--- a/vendor/golang.org/x/sys/cpu/cpu_riscv64.go
+++ b/vendor/golang.org/x/sys/cpu/cpu_riscv64.go
@@ -8,4 +8,13 @@ package cpu
const cacheLineSize = 64
-func initOptions() {}
+func initOptions() {
+ options = []option{
+ {Name: "fastmisaligned", Feature: &RISCV64.HasFastMisaligned},
+ {Name: "c", Feature: &RISCV64.HasC},
+ {Name: "v", Feature: &RISCV64.HasV},
+ {Name: "zba", Feature: &RISCV64.HasZba},
+ {Name: "zbb", Feature: &RISCV64.HasZbb},
+ {Name: "zbs", Feature: &RISCV64.HasZbs},
+ }
+}
diff --git a/vendor/golang.org/x/sys/unix/mkerrors.sh b/vendor/golang.org/x/sys/unix/mkerrors.sh
index d07dd09e..e14b766a 100644
--- a/vendor/golang.org/x/sys/unix/mkerrors.sh
+++ b/vendor/golang.org/x/sys/unix/mkerrors.sh
@@ -552,6 +552,7 @@ ccflags="$@"
$2 !~ /^RTC_VL_(ACCURACY|BACKUP|DATA)/ &&
$2 ~ /^(NETLINK|NLM|NLMSG|NLA|IFA|IFAN|RT|RTC|RTCF|RTN|RTPROT|RTNH|ARPHRD|ETH_P|NETNSA)_/ ||
$2 ~ /^SOCK_|SK_DIAG_|SKNLGRP_$/ ||
+ $2 ~ /^(CONNECT|SAE)_/ ||
$2 ~ /^FIORDCHK$/ ||
$2 ~ /^SIOC/ ||
$2 ~ /^TIOC/ ||
diff --git a/vendor/golang.org/x/sys/unix/syscall_darwin.go b/vendor/golang.org/x/sys/unix/syscall_darwin.go
index 2d15200a..099867de 100644
--- a/vendor/golang.org/x/sys/unix/syscall_darwin.go
+++ b/vendor/golang.org/x/sys/unix/syscall_darwin.go
@@ -566,6 +566,43 @@ func PthreadFchdir(fd int) (err error) {
return pthread_fchdir_np(fd)
}
+// Connectx calls connectx(2) to initiate a connection on a socket.
+//
+// srcIf, srcAddr, and dstAddr are filled into a [SaEndpoints] struct and passed as the endpoints argument.
+//
+// - srcIf is the optional source interface index. 0 means unspecified.
+// - srcAddr is the optional source address. nil means unspecified.
+// - dstAddr is the destination address.
+//
+// On success, Connectx returns the number of bytes enqueued for transmission.
+func Connectx(fd int, srcIf uint32, srcAddr, dstAddr Sockaddr, associd SaeAssocID, flags uint32, iov []Iovec, connid *SaeConnID) (n uintptr, err error) {
+ endpoints := SaEndpoints{
+ Srcif: srcIf,
+ }
+
+ if srcAddr != nil {
+ addrp, addrlen, err := srcAddr.sockaddr()
+ if err != nil {
+ return 0, err
+ }
+ endpoints.Srcaddr = (*RawSockaddr)(addrp)
+ endpoints.Srcaddrlen = uint32(addrlen)
+ }
+
+ if dstAddr != nil {
+ addrp, addrlen, err := dstAddr.sockaddr()
+ if err != nil {
+ return 0, err
+ }
+ endpoints.Dstaddr = (*RawSockaddr)(addrp)
+ endpoints.Dstaddrlen = uint32(addrlen)
+ }
+
+ err = connectx(fd, &endpoints, associd, flags, iov, &n, connid)
+ return
+}
+
+//sys connectx(fd int, endpoints *SaEndpoints, associd SaeAssocID, flags uint32, iov []Iovec, n *uintptr, connid *SaeConnID) (err error)
//sys sendfile(infd int, outfd int, offset int64, len *int64, hdtr unsafe.Pointer, flags int) (err error)
//sys shmat(id int, addr uintptr, flag int) (ret uintptr, err error)
diff --git a/vendor/golang.org/x/sys/unix/syscall_hurd.go b/vendor/golang.org/x/sys/unix/syscall_hurd.go
index ba46651f..a6a2d2fc 100644
--- a/vendor/golang.org/x/sys/unix/syscall_hurd.go
+++ b/vendor/golang.org/x/sys/unix/syscall_hurd.go
@@ -11,6 +11,7 @@ package unix
int ioctl(int, unsigned long int, uintptr_t);
*/
import "C"
+import "unsafe"
func ioctl(fd int, req uint, arg uintptr) (err error) {
r0, er := C.ioctl(C.int(fd), C.ulong(req), C.uintptr_t(arg))
diff --git a/vendor/golang.org/x/sys/unix/zerrors_darwin_amd64.go b/vendor/golang.org/x/sys/unix/zerrors_darwin_amd64.go
index 4308ac17..d73c4652 100644
--- a/vendor/golang.org/x/sys/unix/zerrors_darwin_amd64.go
+++ b/vendor/golang.org/x/sys/unix/zerrors_darwin_amd64.go
@@ -237,6 +237,9 @@ const (
CLOCK_UPTIME_RAW_APPROX = 0x9
CLONE_NOFOLLOW = 0x1
CLONE_NOOWNERCOPY = 0x2
+ CONNECT_DATA_AUTHENTICATED = 0x4
+ CONNECT_DATA_IDEMPOTENT = 0x2
+ CONNECT_RESUME_ON_READ_WRITE = 0x1
CR0 = 0x0
CR1 = 0x1000
CR2 = 0x2000
@@ -1265,6 +1268,10 @@ const (
RTV_SSTHRESH = 0x20
RUSAGE_CHILDREN = -0x1
RUSAGE_SELF = 0x0
+ SAE_ASSOCID_ALL = 0xffffffff
+ SAE_ASSOCID_ANY = 0x0
+ SAE_CONNID_ALL = 0xffffffff
+ SAE_CONNID_ANY = 0x0
SCM_CREDS = 0x3
SCM_RIGHTS = 0x1
SCM_TIMESTAMP = 0x2
diff --git a/vendor/golang.org/x/sys/unix/zerrors_darwin_arm64.go b/vendor/golang.org/x/sys/unix/zerrors_darwin_arm64.go
index c8068a7a..4a55a400 100644
--- a/vendor/golang.org/x/sys/unix/zerrors_darwin_arm64.go
+++ b/vendor/golang.org/x/sys/unix/zerrors_darwin_arm64.go
@@ -237,6 +237,9 @@ const (
CLOCK_UPTIME_RAW_APPROX = 0x9
CLONE_NOFOLLOW = 0x1
CLONE_NOOWNERCOPY = 0x2
+ CONNECT_DATA_AUTHENTICATED = 0x4
+ CONNECT_DATA_IDEMPOTENT = 0x2
+ CONNECT_RESUME_ON_READ_WRITE = 0x1
CR0 = 0x0
CR1 = 0x1000
CR2 = 0x2000
@@ -1265,6 +1268,10 @@ const (
RTV_SSTHRESH = 0x20
RUSAGE_CHILDREN = -0x1
RUSAGE_SELF = 0x0
+ SAE_ASSOCID_ALL = 0xffffffff
+ SAE_ASSOCID_ANY = 0x0
+ SAE_CONNID_ALL = 0xffffffff
+ SAE_CONNID_ANY = 0x0
SCM_CREDS = 0x3
SCM_RIGHTS = 0x1
SCM_TIMESTAMP = 0x2
diff --git a/vendor/golang.org/x/sys/unix/zerrors_zos_s390x.go b/vendor/golang.org/x/sys/unix/zerrors_zos_s390x.go
index da08b2ab..1ec2b140 100644
--- a/vendor/golang.org/x/sys/unix/zerrors_zos_s390x.go
+++ b/vendor/golang.org/x/sys/unix/zerrors_zos_s390x.go
@@ -581,6 +581,8 @@ const (
AT_EMPTY_PATH = 0x1000
AT_REMOVEDIR = 0x200
RENAME_NOREPLACE = 1 << 0
+ ST_RDONLY = 1
+ ST_NOSUID = 2
)
const (
diff --git a/vendor/golang.org/x/sys/unix/zsyscall_darwin_amd64.go b/vendor/golang.org/x/sys/unix/zsyscall_darwin_amd64.go
index b622533e..24b346e1 100644
--- a/vendor/golang.org/x/sys/unix/zsyscall_darwin_amd64.go
+++ b/vendor/golang.org/x/sys/unix/zsyscall_darwin_amd64.go
@@ -841,6 +841,26 @@ var libc_pthread_fchdir_np_trampoline_addr uintptr
// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT
+func connectx(fd int, endpoints *SaEndpoints, associd SaeAssocID, flags uint32, iov []Iovec, n *uintptr, connid *SaeConnID) (err error) {
+ var _p0 unsafe.Pointer
+ if len(iov) > 0 {
+ _p0 = unsafe.Pointer(&iov[0])
+ } else {
+ _p0 = unsafe.Pointer(&_zero)
+ }
+ _, _, e1 := syscall_syscall9(libc_connectx_trampoline_addr, uintptr(fd), uintptr(unsafe.Pointer(endpoints)), uintptr(associd), uintptr(flags), uintptr(_p0), uintptr(len(iov)), uintptr(unsafe.Pointer(n)), uintptr(unsafe.Pointer(connid)), 0)
+ if e1 != 0 {
+ err = errnoErr(e1)
+ }
+ return
+}
+
+var libc_connectx_trampoline_addr uintptr
+
+//go:cgo_import_dynamic libc_connectx connectx "/usr/lib/libSystem.B.dylib"
+
+// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT
+
func sendfile(infd int, outfd int, offset int64, len *int64, hdtr unsafe.Pointer, flags int) (err error) {
_, _, e1 := syscall_syscall6(libc_sendfile_trampoline_addr, uintptr(infd), uintptr(outfd), uintptr(offset), uintptr(unsafe.Pointer(len)), uintptr(hdtr), uintptr(flags))
if e1 != 0 {
diff --git a/vendor/golang.org/x/sys/unix/zsyscall_darwin_amd64.s b/vendor/golang.org/x/sys/unix/zsyscall_darwin_amd64.s
index cfe6646b..ebd21310 100644
--- a/vendor/golang.org/x/sys/unix/zsyscall_darwin_amd64.s
+++ b/vendor/golang.org/x/sys/unix/zsyscall_darwin_amd64.s
@@ -248,6 +248,11 @@ TEXT libc_pthread_fchdir_np_trampoline<>(SB),NOSPLIT,$0-0
GLOBL ·libc_pthread_fchdir_np_trampoline_addr(SB), RODATA, $8
DATA ·libc_pthread_fchdir_np_trampoline_addr(SB)/8, $libc_pthread_fchdir_np_trampoline<>(SB)
+TEXT libc_connectx_trampoline<>(SB),NOSPLIT,$0-0
+ JMP libc_connectx(SB)
+GLOBL ·libc_connectx_trampoline_addr(SB), RODATA, $8
+DATA ·libc_connectx_trampoline_addr(SB)/8, $libc_connectx_trampoline<>(SB)
+
TEXT libc_sendfile_trampoline<>(SB),NOSPLIT,$0-0
JMP libc_sendfile(SB)
GLOBL ·libc_sendfile_trampoline_addr(SB), RODATA, $8
diff --git a/vendor/golang.org/x/sys/unix/zsyscall_darwin_arm64.go b/vendor/golang.org/x/sys/unix/zsyscall_darwin_arm64.go
index 13f624f6..824b9c2d 100644
--- a/vendor/golang.org/x/sys/unix/zsyscall_darwin_arm64.go
+++ b/vendor/golang.org/x/sys/unix/zsyscall_darwin_arm64.go
@@ -841,6 +841,26 @@ var libc_pthread_fchdir_np_trampoline_addr uintptr
// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT
+func connectx(fd int, endpoints *SaEndpoints, associd SaeAssocID, flags uint32, iov []Iovec, n *uintptr, connid *SaeConnID) (err error) {
+ var _p0 unsafe.Pointer
+ if len(iov) > 0 {
+ _p0 = unsafe.Pointer(&iov[0])
+ } else {
+ _p0 = unsafe.Pointer(&_zero)
+ }
+ _, _, e1 := syscall_syscall9(libc_connectx_trampoline_addr, uintptr(fd), uintptr(unsafe.Pointer(endpoints)), uintptr(associd), uintptr(flags), uintptr(_p0), uintptr(len(iov)), uintptr(unsafe.Pointer(n)), uintptr(unsafe.Pointer(connid)), 0)
+ if e1 != 0 {
+ err = errnoErr(e1)
+ }
+ return
+}
+
+var libc_connectx_trampoline_addr uintptr
+
+//go:cgo_import_dynamic libc_connectx connectx "/usr/lib/libSystem.B.dylib"
+
+// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT
+
func sendfile(infd int, outfd int, offset int64, len *int64, hdtr unsafe.Pointer, flags int) (err error) {
_, _, e1 := syscall_syscall6(libc_sendfile_trampoline_addr, uintptr(infd), uintptr(outfd), uintptr(offset), uintptr(unsafe.Pointer(len)), uintptr(hdtr), uintptr(flags))
if e1 != 0 {
diff --git a/vendor/golang.org/x/sys/unix/zsyscall_darwin_arm64.s b/vendor/golang.org/x/sys/unix/zsyscall_darwin_arm64.s
index fe222b75..4f178a22 100644
--- a/vendor/golang.org/x/sys/unix/zsyscall_darwin_arm64.s
+++ b/vendor/golang.org/x/sys/unix/zsyscall_darwin_arm64.s
@@ -248,6 +248,11 @@ TEXT libc_pthread_fchdir_np_trampoline<>(SB),NOSPLIT,$0-0
GLOBL ·libc_pthread_fchdir_np_trampoline_addr(SB), RODATA, $8
DATA ·libc_pthread_fchdir_np_trampoline_addr(SB)/8, $libc_pthread_fchdir_np_trampoline<>(SB)
+TEXT libc_connectx_trampoline<>(SB),NOSPLIT,$0-0
+ JMP libc_connectx(SB)
+GLOBL ·libc_connectx_trampoline_addr(SB), RODATA, $8
+DATA ·libc_connectx_trampoline_addr(SB)/8, $libc_connectx_trampoline<>(SB)
+
TEXT libc_sendfile_trampoline<>(SB),NOSPLIT,$0-0
JMP libc_sendfile(SB)
GLOBL ·libc_sendfile_trampoline_addr(SB), RODATA, $8
diff --git a/vendor/golang.org/x/sys/unix/ztypes_darwin_amd64.go b/vendor/golang.org/x/sys/unix/ztypes_darwin_amd64.go
index 091d107f..d003c3d4 100644
--- a/vendor/golang.org/x/sys/unix/ztypes_darwin_amd64.go
+++ b/vendor/golang.org/x/sys/unix/ztypes_darwin_amd64.go
@@ -306,6 +306,19 @@ type XVSockPgen struct {
type _Socklen uint32
+type SaeAssocID uint32
+
+type SaeConnID uint32
+
+type SaEndpoints struct {
+ Srcif uint32
+ Srcaddr *RawSockaddr
+ Srcaddrlen uint32
+ Dstaddr *RawSockaddr
+ Dstaddrlen uint32
+ _ [4]byte
+}
+
type Xucred struct {
Version uint32
Uid uint32
diff --git a/vendor/golang.org/x/sys/unix/ztypes_darwin_arm64.go b/vendor/golang.org/x/sys/unix/ztypes_darwin_arm64.go
index 28ff4ef7..0d45a941 100644
--- a/vendor/golang.org/x/sys/unix/ztypes_darwin_arm64.go
+++ b/vendor/golang.org/x/sys/unix/ztypes_darwin_arm64.go
@@ -306,6 +306,19 @@ type XVSockPgen struct {
type _Socklen uint32
+type SaeAssocID uint32
+
+type SaeConnID uint32
+
+type SaEndpoints struct {
+ Srcif uint32
+ Srcaddr *RawSockaddr
+ Srcaddrlen uint32
+ Dstaddr *RawSockaddr
+ Dstaddrlen uint32
+ _ [4]byte
+}
+
type Xucred struct {
Version uint32
Uid uint32
diff --git a/vendor/golang.org/x/sys/unix/ztypes_freebsd_386.go b/vendor/golang.org/x/sys/unix/ztypes_freebsd_386.go
index 6cbd094a..51e13eb0 100644
--- a/vendor/golang.org/x/sys/unix/ztypes_freebsd_386.go
+++ b/vendor/golang.org/x/sys/unix/ztypes_freebsd_386.go
@@ -625,6 +625,7 @@ const (
POLLRDNORM = 0x40
POLLWRBAND = 0x100
POLLWRNORM = 0x4
+ POLLRDHUP = 0x4000
)
type CapRights struct {
diff --git a/vendor/golang.org/x/sys/unix/ztypes_freebsd_amd64.go b/vendor/golang.org/x/sys/unix/ztypes_freebsd_amd64.go
index 7c03b6ee..d002d8ef 100644
--- a/vendor/golang.org/x/sys/unix/ztypes_freebsd_amd64.go
+++ b/vendor/golang.org/x/sys/unix/ztypes_freebsd_amd64.go
@@ -630,6 +630,7 @@ const (
POLLRDNORM = 0x40
POLLWRBAND = 0x100
POLLWRNORM = 0x4
+ POLLRDHUP = 0x4000
)
type CapRights struct {
diff --git a/vendor/golang.org/x/sys/unix/ztypes_freebsd_arm.go b/vendor/golang.org/x/sys/unix/ztypes_freebsd_arm.go
index 422107ee..3f863d89 100644
--- a/vendor/golang.org/x/sys/unix/ztypes_freebsd_arm.go
+++ b/vendor/golang.org/x/sys/unix/ztypes_freebsd_arm.go
@@ -616,6 +616,7 @@ const (
POLLRDNORM = 0x40
POLLWRBAND = 0x100
POLLWRNORM = 0x4
+ POLLRDHUP = 0x4000
)
type CapRights struct {
diff --git a/vendor/golang.org/x/sys/unix/ztypes_freebsd_arm64.go b/vendor/golang.org/x/sys/unix/ztypes_freebsd_arm64.go
index 505a12ac..61c72931 100644
--- a/vendor/golang.org/x/sys/unix/ztypes_freebsd_arm64.go
+++ b/vendor/golang.org/x/sys/unix/ztypes_freebsd_arm64.go
@@ -610,6 +610,7 @@ const (
POLLRDNORM = 0x40
POLLWRBAND = 0x100
POLLWRNORM = 0x4
+ POLLRDHUP = 0x4000
)
type CapRights struct {
diff --git a/vendor/golang.org/x/sys/unix/ztypes_freebsd_riscv64.go b/vendor/golang.org/x/sys/unix/ztypes_freebsd_riscv64.go
index cc986c79..b5d17414 100644
--- a/vendor/golang.org/x/sys/unix/ztypes_freebsd_riscv64.go
+++ b/vendor/golang.org/x/sys/unix/ztypes_freebsd_riscv64.go
@@ -612,6 +612,7 @@ const (
POLLRDNORM = 0x40
POLLWRBAND = 0x100
POLLWRNORM = 0x4
+ POLLRDHUP = 0x4000
)
type CapRights struct {
diff --git a/vendor/golang.org/x/sys/unix/ztypes_linux.go b/vendor/golang.org/x/sys/unix/ztypes_linux.go
index b102b95a..9f2550dc 100644
--- a/vendor/golang.org/x/sys/unix/ztypes_linux.go
+++ b/vendor/golang.org/x/sys/unix/ztypes_linux.go
@@ -2486,7 +2486,7 @@ type XDPMmapOffsets struct {
type XDPUmemReg struct {
Addr uint64
Len uint64
- Chunk_size uint32
+ Size uint32
Headroom uint32
Flags uint32
Tx_metadata_len uint32
@@ -3807,6 +3807,9 @@ const (
ETHTOOL_MSG_PSE_GET_REPLY = 0x25
ETHTOOL_MSG_RSS_GET_REPLY = 0x26
ETHTOOL_MSG_KERNEL_MAX = 0x2b
+ ETHTOOL_FLAG_COMPACT_BITSETS = 0x1
+ ETHTOOL_FLAG_OMIT_REPLY = 0x2
+ ETHTOOL_FLAG_STATS = 0x4
ETHTOOL_A_HEADER_UNSPEC = 0x0
ETHTOOL_A_HEADER_DEV_INDEX = 0x1
ETHTOOL_A_HEADER_DEV_NAME = 0x2
diff --git a/vendor/golang.org/x/sys/unix/ztypes_linux_riscv64.go b/vendor/golang.org/x/sys/unix/ztypes_linux_riscv64.go
index 15adc041..ad05b51a 100644
--- a/vendor/golang.org/x/sys/unix/ztypes_linux_riscv64.go
+++ b/vendor/golang.org/x/sys/unix/ztypes_linux_riscv64.go
@@ -727,6 +727,37 @@ const (
RISCV_HWPROBE_EXT_ZBA = 0x8
RISCV_HWPROBE_EXT_ZBB = 0x10
RISCV_HWPROBE_EXT_ZBS = 0x20
+ RISCV_HWPROBE_EXT_ZICBOZ = 0x40
+ RISCV_HWPROBE_EXT_ZBC = 0x80
+ RISCV_HWPROBE_EXT_ZBKB = 0x100
+ RISCV_HWPROBE_EXT_ZBKC = 0x200
+ RISCV_HWPROBE_EXT_ZBKX = 0x400
+ RISCV_HWPROBE_EXT_ZKND = 0x800
+ RISCV_HWPROBE_EXT_ZKNE = 0x1000
+ RISCV_HWPROBE_EXT_ZKNH = 0x2000
+ RISCV_HWPROBE_EXT_ZKSED = 0x4000
+ RISCV_HWPROBE_EXT_ZKSH = 0x8000
+ RISCV_HWPROBE_EXT_ZKT = 0x10000
+ RISCV_HWPROBE_EXT_ZVBB = 0x20000
+ RISCV_HWPROBE_EXT_ZVBC = 0x40000
+ RISCV_HWPROBE_EXT_ZVKB = 0x80000
+ RISCV_HWPROBE_EXT_ZVKG = 0x100000
+ RISCV_HWPROBE_EXT_ZVKNED = 0x200000
+ RISCV_HWPROBE_EXT_ZVKNHA = 0x400000
+ RISCV_HWPROBE_EXT_ZVKNHB = 0x800000
+ RISCV_HWPROBE_EXT_ZVKSED = 0x1000000
+ RISCV_HWPROBE_EXT_ZVKSH = 0x2000000
+ RISCV_HWPROBE_EXT_ZVKT = 0x4000000
+ RISCV_HWPROBE_EXT_ZFH = 0x8000000
+ RISCV_HWPROBE_EXT_ZFHMIN = 0x10000000
+ RISCV_HWPROBE_EXT_ZIHINTNTL = 0x20000000
+ RISCV_HWPROBE_EXT_ZVFH = 0x40000000
+ RISCV_HWPROBE_EXT_ZVFHMIN = 0x80000000
+ RISCV_HWPROBE_EXT_ZFA = 0x100000000
+ RISCV_HWPROBE_EXT_ZTSO = 0x200000000
+ RISCV_HWPROBE_EXT_ZACAS = 0x400000000
+ RISCV_HWPROBE_EXT_ZICOND = 0x800000000
+ RISCV_HWPROBE_EXT_ZIHINTPAUSE = 0x1000000000
RISCV_HWPROBE_KEY_CPUPERF_0 = 0x5
RISCV_HWPROBE_MISALIGNED_UNKNOWN = 0x0
RISCV_HWPROBE_MISALIGNED_EMULATED = 0x1
@@ -734,4 +765,6 @@ const (
RISCV_HWPROBE_MISALIGNED_FAST = 0x3
RISCV_HWPROBE_MISALIGNED_UNSUPPORTED = 0x4
RISCV_HWPROBE_MISALIGNED_MASK = 0x7
+ RISCV_HWPROBE_KEY_ZICBOZ_BLOCK_SIZE = 0x6
+ RISCV_HWPROBE_WHICH_CPUS = 0x1
)
diff --git a/vendor/golang.org/x/sys/windows/syscall_windows.go b/vendor/golang.org/x/sys/windows/syscall_windows.go
index 1fa34fd1..5cee9a31 100644
--- a/vendor/golang.org/x/sys/windows/syscall_windows.go
+++ b/vendor/golang.org/x/sys/windows/syscall_windows.go
@@ -313,6 +313,10 @@ func NewCallbackCDecl(fn interface{}) uintptr {
//sys SetConsoleMode(console Handle, mode uint32) (err error) = kernel32.SetConsoleMode
//sys GetConsoleScreenBufferInfo(console Handle, info *ConsoleScreenBufferInfo) (err error) = kernel32.GetConsoleScreenBufferInfo
//sys setConsoleCursorPosition(console Handle, position uint32) (err error) = kernel32.SetConsoleCursorPosition
+//sys GetConsoleCP() (cp uint32, err error) = kernel32.GetConsoleCP
+//sys GetConsoleOutputCP() (cp uint32, err error) = kernel32.GetConsoleOutputCP
+//sys SetConsoleCP(cp uint32) (err error) = kernel32.SetConsoleCP
+//sys SetConsoleOutputCP(cp uint32) (err error) = kernel32.SetConsoleOutputCP
//sys WriteConsole(console Handle, buf *uint16, towrite uint32, written *uint32, reserved *byte) (err error) = kernel32.WriteConsoleW
//sys ReadConsole(console Handle, buf *uint16, toread uint32, read *uint32, inputControl *byte) (err error) = kernel32.ReadConsoleW
//sys resizePseudoConsole(pconsole Handle, size uint32) (hr error) = kernel32.ResizePseudoConsole
diff --git a/vendor/golang.org/x/sys/windows/types_windows.go b/vendor/golang.org/x/sys/windows/types_windows.go
index 4d0c1574..7b97a154 100644
--- a/vendor/golang.org/x/sys/windows/types_windows.go
+++ b/vendor/golang.org/x/sys/windows/types_windows.go
@@ -1060,6 +1060,7 @@ const (
SIO_GET_EXTENSION_FUNCTION_POINTER = IOC_INOUT | IOC_WS2 | 6
SIO_KEEPALIVE_VALS = IOC_IN | IOC_VENDOR | 4
SIO_UDP_CONNRESET = IOC_IN | IOC_VENDOR | 12
+ SIO_UDP_NETRESET = IOC_IN | IOC_VENDOR | 15
// cf. http://support.microsoft.com/default.aspx?scid=kb;en-us;257460
@@ -2031,6 +2032,50 @@ const (
IF_TYPE_IEEE1394 = 144
)
+// Enum NL_PREFIX_ORIGIN for [IpAdapterUnicastAddress], see
+// https://learn.microsoft.com/en-us/windows/win32/api/nldef/ne-nldef-nl_prefix_origin
+const (
+ IpPrefixOriginOther = 0
+ IpPrefixOriginManual = 1
+ IpPrefixOriginWellKnown = 2
+ IpPrefixOriginDhcp = 3
+ IpPrefixOriginRouterAdvertisement = 4
+ IpPrefixOriginUnchanged = 1 << 4
+)
+
+// Enum NL_SUFFIX_ORIGIN for [IpAdapterUnicastAddress], see
+// https://learn.microsoft.com/en-us/windows/win32/api/nldef/ne-nldef-nl_suffix_origin
+const (
+ NlsoOther = 0
+ NlsoManual = 1
+ NlsoWellKnown = 2
+ NlsoDhcp = 3
+ NlsoLinkLayerAddress = 4
+ NlsoRandom = 5
+ IpSuffixOriginOther = 0
+ IpSuffixOriginManual = 1
+ IpSuffixOriginWellKnown = 2
+ IpSuffixOriginDhcp = 3
+ IpSuffixOriginLinkLayerAddress = 4
+ IpSuffixOriginRandom = 5
+ IpSuffixOriginUnchanged = 1 << 4
+)
+
+// Enum NL_DAD_STATE for [IpAdapterUnicastAddress], see
+// https://learn.microsoft.com/en-us/windows/win32/api/nldef/ne-nldef-nl_dad_state
+const (
+ NldsInvalid = 0
+ NldsTentative = 1
+ NldsDuplicate = 2
+ NldsDeprecated = 3
+ NldsPreferred = 4
+ IpDadStateInvalid = 0
+ IpDadStateTentative = 1
+ IpDadStateDuplicate = 2
+ IpDadStateDeprecated = 3
+ IpDadStatePreferred = 4
+)
+
type SocketAddress struct {
Sockaddr *syscall.RawSockaddrAny
SockaddrLength int32
diff --git a/vendor/golang.org/x/sys/windows/zsyscall_windows.go b/vendor/golang.org/x/sys/windows/zsyscall_windows.go
index 9bb979a3..4c2e1bdc 100644
--- a/vendor/golang.org/x/sys/windows/zsyscall_windows.go
+++ b/vendor/golang.org/x/sys/windows/zsyscall_windows.go
@@ -247,7 +247,9 @@ var (
procGetCommandLineW = modkernel32.NewProc("GetCommandLineW")
procGetComputerNameExW = modkernel32.NewProc("GetComputerNameExW")
procGetComputerNameW = modkernel32.NewProc("GetComputerNameW")
+ procGetConsoleCP = modkernel32.NewProc("GetConsoleCP")
procGetConsoleMode = modkernel32.NewProc("GetConsoleMode")
+ procGetConsoleOutputCP = modkernel32.NewProc("GetConsoleOutputCP")
procGetConsoleScreenBufferInfo = modkernel32.NewProc("GetConsoleScreenBufferInfo")
procGetCurrentDirectoryW = modkernel32.NewProc("GetCurrentDirectoryW")
procGetCurrentProcessId = modkernel32.NewProc("GetCurrentProcessId")
@@ -347,8 +349,10 @@ var (
procSetCommMask = modkernel32.NewProc("SetCommMask")
procSetCommState = modkernel32.NewProc("SetCommState")
procSetCommTimeouts = modkernel32.NewProc("SetCommTimeouts")
+ procSetConsoleCP = modkernel32.NewProc("SetConsoleCP")
procSetConsoleCursorPosition = modkernel32.NewProc("SetConsoleCursorPosition")
procSetConsoleMode = modkernel32.NewProc("SetConsoleMode")
+ procSetConsoleOutputCP = modkernel32.NewProc("SetConsoleOutputCP")
procSetCurrentDirectoryW = modkernel32.NewProc("SetCurrentDirectoryW")
procSetDefaultDllDirectories = modkernel32.NewProc("SetDefaultDllDirectories")
procSetDllDirectoryW = modkernel32.NewProc("SetDllDirectoryW")
@@ -2162,6 +2166,15 @@ func GetComputerName(buf *uint16, n *uint32) (err error) {
return
}
+func GetConsoleCP() (cp uint32, err error) {
+ r0, _, e1 := syscall.Syscall(procGetConsoleCP.Addr(), 0, 0, 0, 0)
+ cp = uint32(r0)
+ if cp == 0 {
+ err = errnoErr(e1)
+ }
+ return
+}
+
func GetConsoleMode(console Handle, mode *uint32) (err error) {
r1, _, e1 := syscall.Syscall(procGetConsoleMode.Addr(), 2, uintptr(console), uintptr(unsafe.Pointer(mode)), 0)
if r1 == 0 {
@@ -2170,6 +2183,15 @@ func GetConsoleMode(console Handle, mode *uint32) (err error) {
return
}
+func GetConsoleOutputCP() (cp uint32, err error) {
+ r0, _, e1 := syscall.Syscall(procGetConsoleOutputCP.Addr(), 0, 0, 0, 0)
+ cp = uint32(r0)
+ if cp == 0 {
+ err = errnoErr(e1)
+ }
+ return
+}
+
func GetConsoleScreenBufferInfo(console Handle, info *ConsoleScreenBufferInfo) (err error) {
r1, _, e1 := syscall.Syscall(procGetConsoleScreenBufferInfo.Addr(), 2, uintptr(console), uintptr(unsafe.Pointer(info)), 0)
if r1 == 0 {
@@ -3038,6 +3060,14 @@ func SetCommTimeouts(handle Handle, timeouts *CommTimeouts) (err error) {
return
}
+func SetConsoleCP(cp uint32) (err error) {
+ r1, _, e1 := syscall.Syscall(procSetConsoleCP.Addr(), 1, uintptr(cp), 0, 0)
+ if r1 == 0 {
+ err = errnoErr(e1)
+ }
+ return
+}
+
func setConsoleCursorPosition(console Handle, position uint32) (err error) {
r1, _, e1 := syscall.Syscall(procSetConsoleCursorPosition.Addr(), 2, uintptr(console), uintptr(position), 0)
if r1 == 0 {
@@ -3054,6 +3084,14 @@ func SetConsoleMode(console Handle, mode uint32) (err error) {
return
}
+func SetConsoleOutputCP(cp uint32) (err error) {
+ r1, _, e1 := syscall.Syscall(procSetConsoleOutputCP.Addr(), 1, uintptr(cp), 0, 0)
+ if r1 == 0 {
+ err = errnoErr(e1)
+ }
+ return
+}
+
func SetCurrentDirectory(path *uint16) (err error) {
r1, _, e1 := syscall.Syscall(procSetCurrentDirectoryW.Addr(), 1, uintptr(unsafe.Pointer(path)), 0, 0)
if r1 == 0 {
diff --git a/vendor/golang.org/x/term/term_windows.go b/vendor/golang.org/x/term/term_windows.go
index 465f5606..df6bf948 100644
--- a/vendor/golang.org/x/term/term_windows.go
+++ b/vendor/golang.org/x/term/term_windows.go
@@ -26,6 +26,7 @@ func makeRaw(fd int) (*State, error) {
return nil, err
}
raw := st &^ (windows.ENABLE_ECHO_INPUT | windows.ENABLE_PROCESSED_INPUT | windows.ENABLE_LINE_INPUT | windows.ENABLE_PROCESSED_OUTPUT)
+ raw |= windows.ENABLE_VIRTUAL_TERMINAL_INPUT
if err := windows.SetConsoleMode(windows.Handle(fd), raw); err != nil {
return nil, err
}
diff --git a/vendor/modules.txt b/vendor/modules.txt
index aee54912..1f50ee31 100644
--- a/vendor/modules.txt
+++ b/vendor/modules.txt
@@ -84,11 +84,11 @@ github.com/cloudflare/circl/math/mlsbset
github.com/cloudflare/circl/sign
github.com/cloudflare/circl/sign/ed25519
github.com/cloudflare/circl/sign/ed448
-# github.com/cpuguy83/go-md2man/v2 v2.0.4
+# github.com/cpuguy83/go-md2man/v2 v2.0.5
## explicit; go 1.11
github.com/cpuguy83/go-md2man/v2/md2man
-# github.com/cyphar/filepath-securejoin v0.3.1
-## explicit; go 1.20
+# github.com/cyphar/filepath-securejoin v0.3.3
+## explicit; go 1.21
github.com/cyphar/filepath-securejoin
# github.com/davecgh/go-spew v1.1.1
## explicit
@@ -265,7 +265,7 @@ github.com/xanzy/ssh-agent
# github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1
## explicit; go 1.15
github.com/xrash/smetrics
-# golang.org/x/crypto v0.26.0
+# golang.org/x/crypto v0.27.0
## explicit; go 1.20
golang.org/x/crypto/argon2
golang.org/x/crypto/blake2b
@@ -287,22 +287,22 @@ golang.org/x/crypto/ssh
golang.org/x/crypto/ssh/agent
golang.org/x/crypto/ssh/internal/bcrypt_pbkdf
golang.org/x/crypto/ssh/knownhosts
-# golang.org/x/net v0.28.0
+# golang.org/x/net v0.29.0
## explicit; go 1.18
golang.org/x/net/context
golang.org/x/net/internal/socks
golang.org/x/net/proxy
-# golang.org/x/sys v0.23.0
+# golang.org/x/sys v0.25.0
## explicit; go 1.18
golang.org/x/sys/cpu
golang.org/x/sys/execabs
golang.org/x/sys/plan9
golang.org/x/sys/unix
golang.org/x/sys/windows
-# golang.org/x/term v0.23.0
+# golang.org/x/term v0.24.0
## explicit; go 1.18
golang.org/x/term
-# golang.org/x/text v0.17.0
+# golang.org/x/text v0.18.0
## explicit; go 1.18
golang.org/x/text/cases
golang.org/x/text/internal