diff --git a/go.mod b/go.mod index de0789ce..2e2f94e7 100644 --- a/go.mod +++ b/go.mod @@ -26,9 +26,9 @@ require ( github.com/stretchr/testify v1.9.0 github.com/stvp/rollbar v0.5.1 github.com/urfave/cli/v2 v2.27.4 - golang.org/x/crypto v0.26.0 - golang.org/x/term v0.23.0 - golang.org/x/text v0.17.0 + golang.org/x/crypto v0.27.0 + golang.org/x/term v0.24.0 + golang.org/x/text v0.18.0 gopkg.in/errgo.v1 v1.0.1 ) @@ -38,8 +38,8 @@ require ( github.com/ProtonMail/go-crypto v1.1.0-alpha.5-proton // indirect github.com/VividCortex/ewma v1.2.0 // indirect github.com/cloudflare/circl v1.4.0 // indirect - github.com/cpuguy83/go-md2man/v2 v2.0.4 // indirect - github.com/cyphar/filepath-securejoin v0.3.1 // indirect + github.com/cpuguy83/go-md2man/v2 v2.0.5 // indirect + github.com/cyphar/filepath-securejoin v0.3.3 // indirect github.com/davecgh/go-spew v1.1.1 // indirect github.com/emirpasic/gods v1.18.1 // indirect github.com/go-git/gcfg v1.5.1-0.20230307220236-3a3c6141e376 // indirect @@ -64,8 +64,8 @@ require ( github.com/skeema/knownhosts v1.3.0 // indirect github.com/xanzy/ssh-agent v0.3.3 // indirect github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 // indirect - golang.org/x/net v0.28.0 // indirect - golang.org/x/sys v0.23.0 // indirect + golang.org/x/net v0.29.0 // indirect + golang.org/x/sys v0.25.0 // indirect gopkg.in/warnings.v0 v0.1.2 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect ) diff --git a/go.sum b/go.sum index 638ae209..e6680aab 100644 --- a/go.sum +++ b/go.sum @@ -31,12 +31,12 @@ github.com/cheggaaa/pb/v3 v3.1.5 h1:QuuUzeM2WsAqG2gMqtzaWithDJv0i+i6UlnwSCI4QLk= github.com/cheggaaa/pb/v3 v3.1.5/go.mod h1:CrxkeghYTXi1lQBEI7jSn+3svI3cuc19haAj6jM60XI= github.com/cloudflare/circl v1.4.0 h1:BV7h5MgrktNzytKmWjpOtdYrf0lkkbF8YMlBGPhJQrY= github.com/cloudflare/circl v1.4.0/go.mod h1:PDRU+oXvdD7KCtgKxW95M5Z8BpSCJXQORiZFnBQS5QU= -github.com/cpuguy83/go-md2man/v2 v2.0.4 h1:wfIWP927BUkWJb2NmU/kNDYIBTh/ziUX91+lVfRxZq4= -github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= +github.com/cpuguy83/go-md2man/v2 v2.0.5 h1:ZtcqGrnekaHpVLArFSe4HK5DoKx1T0rq2DwVB0alcyc= +github.com/cpuguy83/go-md2man/v2 v2.0.5/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= github.com/creack/pty v1.1.17 h1:QeVUsEDNrLBW4tMgZHvxy18sKtr6VI492kBhUfhDJNI= github.com/creack/pty v1.1.17/go.mod h1:MOBLtS5ELjhRRrroQr9kyvTxUAFNvYEK993ew/Vr4O4= -github.com/cyphar/filepath-securejoin v0.3.1 h1:1V7cHiaW+C+39wEfpH6XlLBQo3j/PciWFrgfCLS8XrE= -github.com/cyphar/filepath-securejoin v0.3.1/go.mod h1:F7i41x/9cBF7lzCrVsYs9fuzwRZm4NQsGTBdpp6mETc= +github.com/cyphar/filepath-securejoin v0.3.3 h1:lofZkCEVFIBe0KcdQOzFs8Soy9oaHOWl4gGtPI+gCFc= +github.com/cyphar/filepath-securejoin v0.3.3/go.mod h1:8s/MCNJREmFK0H02MF6Ihv1nakJe4L/w3WZLHNkvlYM= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= @@ -157,8 +157,8 @@ golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACk golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= golang.org/x/crypto v0.0.0-20220622213112-05595931fe9d/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= -golang.org/x/crypto v0.26.0 h1:RrRspgV4mU+YwB4FYnuBoKsUapNIL5cohGAmSH3azsw= -golang.org/x/crypto v0.26.0/go.mod h1:GY7jblb9wI+FOo5y8/S2oY4zWP07AkOJ4+jxCqdqn54= +golang.org/x/crypto v0.27.0 h1:GXm2NjJrPaiv/h1tb2UH8QfgC/hOf/+z0p6PT8o1w7A= +golang.org/x/crypto v0.27.0/go.mod h1:1Xngt8kV6Dvbssa53Ziq6Eqn0HqbZi5Z6R0ZpwQzt70= golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= @@ -167,8 +167,8 @@ golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM= golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= -golang.org/x/net v0.28.0 h1:a9JDOJc5GMUJ0+UDqmLT86WiEy7iWyIhz8gz8E4e5hE= -golang.org/x/net v0.28.0/go.mod h1:yqtgsTWOOnlGLG9GFRrK3++bGOUEkNBoHZc8MEDWPNg= +golang.org/x/net v0.29.0 h1:5ORfpBpCs4HzDYoodCDBbwHzdR5UrLBZ3sOnUJmFoHo= +golang.org/x/net v0.29.0/go.mod h1:gLkgy8jTGERgjzMic6DS9+SP0ajcu6Xu3Orq/SpETg0= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -188,19 +188,19 @@ golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.23.0 h1:YfKFowiIMvtgl1UERQoTPPToxltDeZfbj4H7dVUCwmM= -golang.org/x/sys v0.23.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.25.0 h1:r+8e+loiHxRqhXVl6ML1nO3l1+oFoWbnlu2Ehimmi34= +golang.org/x/sys v0.25.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= -golang.org/x/term v0.23.0 h1:F6D4vR+EHoL9/sWAWgAR1H2DcHr4PareCbAaCo1RpuU= -golang.org/x/term v0.23.0/go.mod h1:DgV24QBUrK6jhZXl+20l6UWznPlwAHm1Q1mGHtydmSk= +golang.org/x/term v0.24.0 h1:Mh5cbb+Zk2hqqXNO7S1iTjEphVL+jb8ZWaqh/g+JWkM= +golang.org/x/term v0.24.0/go.mod h1:lOBK/LVxemqiMij05LGJ0tzNr8xlmwBRJ81PX6wVLH8= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= golang.org/x/text v0.4.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= -golang.org/x/text v0.17.0 h1:XtiM5bkSOt+ewxlOE/aE/AKEHibwj/6gvWMl9Rsh0Qc= -golang.org/x/text v0.17.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY= +golang.org/x/text v0.18.0 h1:XvMDiNzPAl0jr17s6W9lcaIhGUfUORdGCNsuLmPG224= +golang.org/x/text v0.18.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.1.1/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= diff --git a/vendor/github.com/cpuguy83/go-md2man/v2/md2man/debug.go b/vendor/github.com/cpuguy83/go-md2man/v2/md2man/debug.go new file mode 100644 index 00000000..0ec4b12c --- /dev/null +++ b/vendor/github.com/cpuguy83/go-md2man/v2/md2man/debug.go @@ -0,0 +1,62 @@ +package md2man + +import ( + "fmt" + "io" + "os" + "strings" + + "github.com/russross/blackfriday/v2" +) + +func fmtListFlags(flags blackfriday.ListType) string { + knownFlags := []struct { + name string + flag blackfriday.ListType + }{ + {"ListTypeOrdered", blackfriday.ListTypeOrdered}, + {"ListTypeDefinition", blackfriday.ListTypeDefinition}, + {"ListTypeTerm", blackfriday.ListTypeTerm}, + {"ListItemContainsBlock", blackfriday.ListItemContainsBlock}, + {"ListItemBeginningOfList", blackfriday.ListItemBeginningOfList}, + {"ListItemEndOfList", blackfriday.ListItemEndOfList}, + } + + var f []string + for _, kf := range knownFlags { + if flags&kf.flag != 0 { + f = append(f, kf.name) + flags &^= kf.flag + } + } + if flags != 0 { + f = append(f, fmt.Sprintf("Unknown(%#x)", flags)) + } + return strings.Join(f, "|") +} + +type debugDecorator struct { + blackfriday.Renderer +} + +func depth(node *blackfriday.Node) int { + d := 0 + for n := node.Parent; n != nil; n = n.Parent { + d++ + } + return d +} + +func (d *debugDecorator) RenderNode(w io.Writer, node *blackfriday.Node, entering bool) blackfriday.WalkStatus { + fmt.Fprintf(os.Stderr, "%s%s %v %v\n", + strings.Repeat(" ", depth(node)), + map[bool]string{true: "+", false: "-"}[entering], + node, + fmtListFlags(node.ListFlags)) + var b strings.Builder + status := d.Renderer.RenderNode(io.MultiWriter(&b, w), node, entering) + if b.Len() > 0 { + fmt.Fprintf(os.Stderr, ">> %q\n", b.String()) + } + return status +} diff --git a/vendor/github.com/cpuguy83/go-md2man/v2/md2man/md2man.go b/vendor/github.com/cpuguy83/go-md2man/v2/md2man/md2man.go index 42bf32aa..62d91b77 100644 --- a/vendor/github.com/cpuguy83/go-md2man/v2/md2man/md2man.go +++ b/vendor/github.com/cpuguy83/go-md2man/v2/md2man/md2man.go @@ -1,16 +1,23 @@ package md2man import ( + "os" + "strconv" + "github.com/russross/blackfriday/v2" ) // Render converts a markdown document into a roff formatted document. func Render(doc []byte) []byte { renderer := NewRoffRenderer() + var r blackfriday.Renderer = renderer + if v, _ := strconv.ParseBool(os.Getenv("MD2MAN_DEBUG")); v { + r = &debugDecorator{Renderer: r} + } return blackfriday.Run(doc, []blackfriday.Option{ - blackfriday.WithRenderer(renderer), + blackfriday.WithRenderer(r), blackfriday.WithExtensions(renderer.GetExtensions()), }...) } diff --git a/vendor/github.com/cpuguy83/go-md2man/v2/md2man/roff.go b/vendor/github.com/cpuguy83/go-md2man/v2/md2man/roff.go index 8a290f19..9d6c473f 100644 --- a/vendor/github.com/cpuguy83/go-md2man/v2/md2man/roff.go +++ b/vendor/github.com/cpuguy83/go-md2man/v2/md2man/roff.go @@ -14,10 +14,8 @@ import ( // roffRenderer implements the blackfriday.Renderer interface for creating // roff format (manpages) from markdown text type roffRenderer struct { - extensions blackfriday.Extensions listCounters []int firstHeader bool - firstDD bool listDepth int } @@ -43,7 +41,7 @@ const ( quoteTag = "\n.PP\n.RS\n" quoteCloseTag = "\n.RE\n" listTag = "\n.RS\n" - listCloseTag = "\n.RE\n" + listCloseTag = ".RE\n" dtTag = "\n.TP\n" dd2Tag = "\n" tableStart = "\n.TS\nallbox;\n" @@ -56,23 +54,18 @@ const ( // NewRoffRenderer creates a new blackfriday Renderer for generating roff documents // from markdown func NewRoffRenderer() *roffRenderer { // nolint: golint - var extensions blackfriday.Extensions - - extensions |= blackfriday.NoIntraEmphasis - extensions |= blackfriday.Tables - extensions |= blackfriday.FencedCode - extensions |= blackfriday.SpaceHeadings - extensions |= blackfriday.Footnotes - extensions |= blackfriday.Titleblock - extensions |= blackfriday.DefinitionLists - return &roffRenderer{ - extensions: extensions, - } + return &roffRenderer{} } // GetExtensions returns the list of extensions used by this renderer implementation -func (r *roffRenderer) GetExtensions() blackfriday.Extensions { - return r.extensions +func (*roffRenderer) GetExtensions() blackfriday.Extensions { + return blackfriday.NoIntraEmphasis | + blackfriday.Tables | + blackfriday.FencedCode | + blackfriday.SpaceHeadings | + blackfriday.Footnotes | + blackfriday.Titleblock | + blackfriday.DefinitionLists } // RenderHeader handles outputting the header at document start @@ -103,7 +96,23 @@ func (r *roffRenderer) RenderNode(w io.Writer, node *blackfriday.Node, entering switch node.Type { case blackfriday.Text: - escapeSpecialChars(w, node.Literal) + // Special case: format the NAME section as required for proper whatis parsing. + // Refer to the lexgrog(1) and groff_man(7) manual pages for details. + if node.Parent != nil && + node.Parent.Type == blackfriday.Paragraph && + node.Parent.Prev != nil && + node.Parent.Prev.Type == blackfriday.Heading && + node.Parent.Prev.FirstChild != nil && + bytes.EqualFold(node.Parent.Prev.FirstChild.Literal, []byte("NAME")) { + before, after, found := bytes.Cut(node.Literal, []byte(" - ")) + escapeSpecialChars(w, before) + if found { + out(w, ` \- `) + escapeSpecialChars(w, after) + } + } else { + escapeSpecialChars(w, node.Literal) + } case blackfriday.Softbreak: out(w, crTag) case blackfriday.Hardbreak: @@ -141,14 +150,25 @@ func (r *roffRenderer) RenderNode(w io.Writer, node *blackfriday.Node, entering case blackfriday.Document: break case blackfriday.Paragraph: - // roff .PP markers break lists - if r.listDepth > 0 { - return blackfriday.GoToNext - } if entering { - out(w, paraTag) + if r.listDepth > 0 { + // roff .PP markers break lists + if node.Prev != nil { // continued paragraph + if node.Prev.Type == blackfriday.List && node.Prev.ListFlags&blackfriday.ListTypeDefinition == 0 { + out(w, ".IP\n") + } else { + out(w, crTag) + } + } + } else if node.Prev != nil && node.Prev.Type == blackfriday.Heading { + out(w, crTag) + } else { + out(w, paraTag) + } } else { - out(w, crTag) + if node.Next == nil || node.Next.Type != blackfriday.List { + out(w, crTag) + } } case blackfriday.BlockQuote: if entering { @@ -211,6 +231,10 @@ func (r *roffRenderer) handleHeading(w io.Writer, node *blackfriday.Node, enteri func (r *roffRenderer) handleList(w io.Writer, node *blackfriday.Node, entering bool) { openTag := listTag closeTag := listCloseTag + if (entering && r.listDepth == 0) || (!entering && r.listDepth == 1) { + openTag = crTag + closeTag = "" + } if node.ListFlags&blackfriday.ListTypeDefinition != 0 { // tags for definition lists handled within Item node openTag = "" @@ -239,23 +263,25 @@ func (r *roffRenderer) handleItem(w io.Writer, node *blackfriday.Node, entering } else if node.ListFlags&blackfriday.ListTypeTerm != 0 { // DT (definition term): line just before DD (see below). out(w, dtTag) - r.firstDD = true } else if node.ListFlags&blackfriday.ListTypeDefinition != 0 { // DD (definition description): line that starts with ": ". // // We have to distinguish between the first DD and the // subsequent ones, as there should be no vertical // whitespace between the DT and the first DD. - if r.firstDD { - r.firstDD = false - } else { - out(w, dd2Tag) + if node.Prev != nil && node.Prev.ListFlags&(blackfriday.ListTypeTerm|blackfriday.ListTypeDefinition) == blackfriday.ListTypeDefinition { + if node.Prev.Type == blackfriday.Item && + node.Prev.LastChild != nil && + node.Prev.LastChild.Type == blackfriday.List && + node.Prev.LastChild.ListFlags&blackfriday.ListTypeDefinition == 0 { + out(w, ".IP\n") + } else { + out(w, dd2Tag) + } } } else { out(w, ".IP \\(bu 2\n") } - } else { - out(w, "\n") } } diff --git a/vendor/github.com/cyphar/filepath-securejoin/CHANGELOG.md b/vendor/github.com/cyphar/filepath-securejoin/CHANGELOG.md index 7436896e..23f7bc7f 100644 --- a/vendor/github.com/cyphar/filepath-securejoin/CHANGELOG.md +++ b/vendor/github.com/cyphar/filepath-securejoin/CHANGELOG.md @@ -6,6 +6,36 @@ and this project adheres to [Semantic Versioning](http://semver.org/). ## [Unreleased] ## +## [0.3.3] - 2024-09-30 ## + +### Fixed ### +- The mode and owner verification logic in `MkdirAll` has been removed. This + was originally intended to protect against some theoretical attacks but upon + further consideration these protections don't actually buy us anything and + they were causing spurious errors with more complicated filesystem setups. +- The "is the created directory empty" logic in `MkdirAll` has also been + removed. This was not causing us issues yet, but some pseudofilesystems (such + as `cgroup`) create non-empty directories and so this logic would've been + wrong for such cases. + +## [0.3.2] - 2024-09-13 ## + +### Changed ### +- Passing the `S_ISUID` or `S_ISGID` modes to `MkdirAllInRoot` will now return + an explicit error saying that those bits are ignored by `mkdirat(2)`. In the + past a different error was returned, but since the silent ignoring behaviour + is codified in the man pages a more explicit error seems apt. While silently + ignoring these bits would be the most compatible option, it could lead to + users thinking their code sets these bits when it doesn't. Programs that need + to deal with compatibility can mask the bits themselves. (#23, #25) + +### Fixed ### +- If a directory has `S_ISGID` set, then all child directories will have + `S_ISGID` set when created and a different gid will be used for any inode + created under the directory. Previously, the "expected owner and mode" + validation in `securejoin.MkdirAll` did not correctly handle this. We now + correctly handle this case. (#24, #25) + ## [0.3.1] - 2024-07-23 ## ### Changed ### @@ -127,7 +157,9 @@ This is our first release of `github.com/cyphar/filepath-securejoin`, containing a full implementation with a coverage of 93.5% (the only missing cases are the error cases, which are hard to mocktest at the moment). -[Unreleased]: https://github.com/cyphar/filepath-securejoin/compare/v0.3.1...HEAD +[Unreleased]: https://github.com/cyphar/filepath-securejoin/compare/v0.3.3...HEAD +[0.3.3]: https://github.com/cyphar/filepath-securejoin/compare/v0.3.2...v0.3.3 +[0.3.2]: https://github.com/cyphar/filepath-securejoin/compare/v0.3.1...v0.3.2 [0.3.1]: https://github.com/cyphar/filepath-securejoin/compare/v0.3.0...v0.3.1 [0.3.0]: https://github.com/cyphar/filepath-securejoin/compare/v0.2.5...v0.3.0 [0.2.5]: https://github.com/cyphar/filepath-securejoin/compare/v0.2.4...v0.2.5 diff --git a/vendor/github.com/cyphar/filepath-securejoin/VERSION b/vendor/github.com/cyphar/filepath-securejoin/VERSION index 9e11b32f..1c09c74e 100644 --- a/vendor/github.com/cyphar/filepath-securejoin/VERSION +++ b/vendor/github.com/cyphar/filepath-securejoin/VERSION @@ -1 +1 @@ -0.3.1 +0.3.3 diff --git a/vendor/github.com/cyphar/filepath-securejoin/join.go b/vendor/github.com/cyphar/filepath-securejoin/join.go index bd86a48b..ca4ce1f2 100644 --- a/vendor/github.com/cyphar/filepath-securejoin/join.go +++ b/vendor/github.com/cyphar/filepath-securejoin/join.go @@ -22,27 +22,27 @@ const maxSymlinkLimit = 255 // IsNotExist tells you if err is an error that implies that either the path // accessed does not exist (or path components don't exist). This is -// effectively a more broad version of os.IsNotExist. +// effectively a more broad version of [os.IsNotExist]. func IsNotExist(err error) bool { // Check that it's not actually an ENOTDIR, which in some cases is a more // convoluted case of ENOENT (usually involving weird paths). return errors.Is(err, os.ErrNotExist) || errors.Is(err, syscall.ENOTDIR) || errors.Is(err, syscall.ENOENT) } -// SecureJoinVFS joins the two given path components (similar to Join) except +// SecureJoinVFS joins the two given path components (similar to [filepath.Join]) except // that the returned path is guaranteed to be scoped inside the provided root // path (when evaluated). Any symbolic links in the path are evaluated with the // given root treated as the root of the filesystem, similar to a chroot. The -// filesystem state is evaluated through the given VFS interface (if nil, the -// standard os.* family of functions are used). +// filesystem state is evaluated through the given [VFS] interface (if nil, the +// standard [os].* family of functions are used). // // Note that the guarantees provided by this function only apply if the path // components in the returned string are not modified (in other words are not // replaced with symlinks on the filesystem) after this function has returned. -// Such a symlink race is necessarily out-of-scope of SecureJoin. +// Such a symlink race is necessarily out-of-scope of SecureJoinVFS. // // NOTE: Due to the above limitation, Linux users are strongly encouraged to -// use OpenInRoot instead, which does safely protect against these kinds of +// use [OpenInRoot] instead, which does safely protect against these kinds of // attacks. There is no way to solve this problem with SecureJoinVFS because // the API is fundamentally wrong (you cannot return a "safe" path string and // guarantee it won't be modified afterwards). @@ -123,8 +123,8 @@ func SecureJoinVFS(root, unsafePath string, vfs VFS) (string, error) { return filepath.Join(root, finalPath), nil } -// SecureJoin is a wrapper around SecureJoinVFS that just uses the os.* library -// of functions as the VFS. If in doubt, use this function over SecureJoinVFS. +// SecureJoin is a wrapper around [SecureJoinVFS] that just uses the [os].* library +// of functions as the [VFS]. If in doubt, use this function over [SecureJoinVFS]. func SecureJoin(root, unsafePath string) (string, error) { return SecureJoinVFS(root, unsafePath, nil) } diff --git a/vendor/github.com/cyphar/filepath-securejoin/mkdir_linux.go b/vendor/github.com/cyphar/filepath-securejoin/mkdir_linux.go index ad2bd797..b5f67452 100644 --- a/vendor/github.com/cyphar/filepath-securejoin/mkdir_linux.go +++ b/vendor/github.com/cyphar/filepath-securejoin/mkdir_linux.go @@ -9,7 +9,6 @@ package securejoin import ( "errors" "fmt" - "io" "os" "path/filepath" "slices" @@ -23,29 +22,36 @@ var ( errPossibleAttack = errors.New("possible attack detected") ) -// MkdirAllHandle is equivalent to MkdirAll, except that it is safer to use in -// two respects: +// MkdirAllHandle is equivalent to [MkdirAll], except that it is safer to use +// in two respects: // -// - The caller provides the root directory as an *os.File (preferably O_PATH) +// - The caller provides the root directory as an *[os.File] (preferably O_PATH) // handle. This means that the caller can be sure which root directory is // being used. Note that this can be emulated by using /proc/self/fd/... as -// the root path with MkdirAll. +// the root path with [os.MkdirAll]. // -// - Once all of the directories have been created, an *os.File (O_PATH) handle +// - Once all of the directories have been created, an *[os.File] O_PATH handle // to the directory at unsafePath is returned to the caller. This is done in // an effectively-race-free way (an attacker would only be able to swap the // final directory component), which is not possible to emulate with -// MkdirAll. +// [MkdirAll]. // // In addition, the returned handle is obtained far more efficiently than doing -// a brand new lookup of unsafePath (such as with SecureJoin or openat2) after -// doing MkdirAll. If you intend to open the directory after creating it, you +// a brand new lookup of unsafePath (such as with [SecureJoin] or openat2) after +// doing [MkdirAll]. If you intend to open the directory after creating it, you // should use MkdirAllHandle. func MkdirAllHandle(root *os.File, unsafePath string, mode int) (_ *os.File, Err error) { // Make sure there are no os.FileMode bits set. if mode&^0o7777 != 0 { return nil, fmt.Errorf("%w for mkdir 0o%.3o", errInvalidMode, mode) } + // On Linux, mkdirat(2) (and os.Mkdir) silently ignore the suid and sgid + // bits. We could also silently ignore them but since we have very few + // users it seems more prudent to return an error so users notice that + // these bits will not be set. + if mode&^0o1777 != 0 { + return nil, fmt.Errorf("%w for mkdir 0o%.3o: suid and sgid are ignored by mkdir", errInvalidMode, mode) + } // Try to open as much of the path as possible. currentDir, remainingPath, err := partialLookupInRoot(root, unsafePath) @@ -101,24 +107,6 @@ func MkdirAllHandle(root *os.File, unsafePath string, mode int) (_ *os.File, Err // Make sure the mode doesn't have any type bits. mode &^= unix.S_IFMT - // What properties do we expect any newly created directories to have? - var ( - // While umask(2) is a per-thread property, and thus this value could - // vary between threads, a functioning Go program would LockOSThread - // threads with different umasks and so we don't need to LockOSThread - // for this entire mkdirat loop (if we are in the locked thread with a - // different umask, we are already locked and there's nothing for us to - // do -- and if not then it doesn't matter which thread we run on and - // there's nothing for us to do). - expectedMode = uint32(unix.S_IFDIR | (mode &^ getUmask())) - - // We would want to get the fs[ug]id here, but we can't access those - // from userspace. In practice, nobody uses setfs[ug]id() anymore, so - // just use the effective [ug]id (which is equivalent to the fs[ug]id - // for programs that don't use setfs[ug]id). - expectedUid = uint32(unix.Geteuid()) - expectedGid = uint32(unix.Getegid()) - ) // Create the remaining components. for _, part := range remainingParts { @@ -129,7 +117,7 @@ func MkdirAllHandle(root *os.File, unsafePath string, mode int) (_ *os.File, Err } // NOTE: mkdir(2) will not follow trailing symlinks, so we can safely - // create the finaly component without worrying about symlink-exchange + // create the final component without worrying about symlink-exchange // attacks. if err := unix.Mkdirat(int(currentDir.Fd()), part, uint32(mode)); err != nil { err = &os.PathError{Op: "mkdirat", Path: currentDir.Name() + "/" + part, Err: err} @@ -157,40 +145,30 @@ func MkdirAllHandle(root *os.File, unsafePath string, mode int) (_ *os.File, Err _ = currentDir.Close() currentDir = nextDir - // Make sure that the directory matches what we expect. An attacker - // could have swapped the directory between us making it and opening - // it. There's no way for us to be sure that the directory is - // _precisely_ the same as the directory we created, but if we are in - // an empty directory with the same owner and mode as the one we - // created then there is nothing the attacker could do with this new - // directory that they couldn't do with the old one. - if stat, err := fstat(currentDir); err != nil { - return nil, fmt.Errorf("check newly created directory: %w", err) - } else { - if stat.Mode != expectedMode { - return nil, fmt.Errorf("%w: newly created directory %q has incorrect mode 0o%.3o (expected 0o%.3o)", errPossibleAttack, currentDir.Name(), stat.Mode, expectedMode) - } - if stat.Uid != expectedUid || stat.Gid != expectedGid { - return nil, fmt.Errorf("%w: newly created directory %q has incorrect owner %d:%d (expected %d:%d)", errPossibleAttack, currentDir.Name(), stat.Uid, stat.Gid, expectedUid, expectedGid) - } - // Check that the directory is empty. We only need to check for - // a single entry, and we should get EOF if the directory is - // empty. - _, err := currentDir.Readdirnames(1) - if !errors.Is(err, io.EOF) { - if err == nil { - err = fmt.Errorf("%w: newly created directory %q is non-empty", errPossibleAttack, currentDir.Name()) - } - return nil, fmt.Errorf("check if newly created directory %q is empty: %w", currentDir.Name(), err) - } - // Reset the offset. - _, _ = currentDir.Seek(0, unix.SEEK_SET) - } + // It's possible that the directory we just opened was swapped by an + // attacker. Unfortunately there isn't much we can do to protect + // against this, and MkdirAll's behaviour is that we will reuse + // existing directories anyway so the need to protect against this is + // incredibly limited (and arguably doesn't even deserve mention here). + // + // Ideally we might want to check that the owner and mode match what we + // would've created -- unfortunately, it is non-trivial to verify that + // the owner and mode of the created directory match. While plain Unix + // DAC rules seem simple enough to emulate, there are a bunch of other + // factors that can change the mode or owner of created directories + // (default POSIX ACLs, mount options like uid=1,gid=2,umask=0 on + // filesystems like vfat, etc etc). We used to try to verify this but + // it just lead to a series of spurious errors. + // + // We could also check that the directory is non-empty, but + // unfortunately some pseduofilesystems (like cgroupfs) create + // non-empty directories, which would result in different spurious + // errors. } return currentDir, nil } -// MkdirAll is a race-safe alternative to the Go stdlib's os.MkdirAll function, +// MkdirAll is a race-safe alternative to the [os.MkdirAll] function, // where the new directory is guaranteed to be within the root directory (if an // attacker can move directories from inside the root to outside the root, the // created directory tree might be outside of the root but the key constraint @@ -203,16 +181,16 @@ func MkdirAllHandle(root *os.File, unsafePath string, mode int) (_ *os.File, Err // err := os.MkdirAll(path, mode) // // But is much safer. The above implementation is unsafe because if an attacker -// can modify the filesystem tree between SecureJoin and MkdirAll, it is +// can modify the filesystem tree between [SecureJoin] and [os.MkdirAll], it is // possible for MkdirAll to resolve unsafe symlink components and create // directories outside of the root. // // If you plan to open the directory after you have created it or want to use -// an open directory handle as the root, you should use MkdirAllHandle instead. -// This function is a wrapper around MkdirAllHandle. +// an open directory handle as the root, you should use [MkdirAllHandle] instead. +// This function is a wrapper around [MkdirAllHandle]. // // NOTE: The mode argument must be set the unix mode bits (unix.S_I...), not -// the Go generic mode bits (os.Mode...). +// the Go generic mode bits ([os.FileMode]...). func MkdirAll(root, unsafePath string, mode int) error { rootDir, err := os.OpenFile(root, unix.O_PATH|unix.O_DIRECTORY|unix.O_CLOEXEC, 0) if err != nil { diff --git a/vendor/github.com/cyphar/filepath-securejoin/open_linux.go b/vendor/github.com/cyphar/filepath-securejoin/open_linux.go index 52dce76f..230be73f 100644 --- a/vendor/github.com/cyphar/filepath-securejoin/open_linux.go +++ b/vendor/github.com/cyphar/filepath-securejoin/open_linux.go @@ -14,8 +14,8 @@ import ( "golang.org/x/sys/unix" ) -// OpenatInRoot is equivalent to OpenInRoot, except that the root is provided -// using an *os.File handle, to ensure that the correct root directory is used. +// OpenatInRoot is equivalent to [OpenInRoot], except that the root is provided +// using an *[os.File] handle, to ensure that the correct root directory is used. func OpenatInRoot(root *os.File, unsafePath string) (*os.File, error) { handle, err := completeLookupInRoot(root, unsafePath) if err != nil { @@ -31,7 +31,7 @@ func OpenatInRoot(root *os.File, unsafePath string) (*os.File, error) { // handle, err := os.OpenFile(path, unix.O_PATH|unix.O_CLOEXEC) // // But is much safer. The above implementation is unsafe because if an attacker -// can modify the filesystem tree between SecureJoin and OpenFile, it is +// can modify the filesystem tree between [SecureJoin] and [os.OpenFile], it is // possible for the returned file to be outside of the root. // // Note that the returned handle is an O_PATH handle, meaning that only a very @@ -39,7 +39,7 @@ func OpenatInRoot(root *os.File, unsafePath string) (*os.File, error) { // accidentally opening an untrusted file that could cause issues (such as a // disconnected TTY that could cause a DoS, or some other issue). In order to // use the returned handle, you can "upgrade" it to a proper handle using -// Reopen. +// [Reopen]. func OpenInRoot(root, unsafePath string) (*os.File, error) { rootDir, err := os.OpenFile(root, unix.O_PATH|unix.O_DIRECTORY|unix.O_CLOEXEC, 0) if err != nil { @@ -49,7 +49,7 @@ func OpenInRoot(root, unsafePath string) (*os.File, error) { return OpenatInRoot(rootDir, unsafePath) } -// Reopen takes an *os.File handle and re-opens it through /proc/self/fd. +// Reopen takes an *[os.File] handle and re-opens it through /proc/self/fd. // Reopen(file, flags) is effectively equivalent to // // fdPath := fmt.Sprintf("/proc/self/fd/%d", file.Fd()) @@ -59,7 +59,9 @@ func OpenInRoot(root, unsafePath string) (*os.File, error) { // maliciously-configured /proc mount. While this attack scenario is not // common, in container runtimes it is possible for higher-level runtimes to be // tricked into configuring an unsafe /proc that can be used to attack file -// operations. See CVE-2019-19921 for more details. +// operations. See [CVE-2019-19921] for more details. +// +// [CVE-2019-19921]: https://github.com/advisories/GHSA-fh74-hm69-rqjw func Reopen(handle *os.File, flags int) (*os.File, error) { procRoot, err := getProcRoot() if err != nil { diff --git a/vendor/github.com/cyphar/filepath-securejoin/openat2_linux.go b/vendor/github.com/cyphar/filepath-securejoin/openat2_linux.go index 921b3e1d..ae3b381e 100644 --- a/vendor/github.com/cyphar/filepath-securejoin/openat2_linux.go +++ b/vendor/github.com/cyphar/filepath-securejoin/openat2_linux.go @@ -13,34 +13,21 @@ import ( "path/filepath" "strings" "sync" - "testing" "golang.org/x/sys/unix" ) -var ( - hasOpenat2Bool bool - hasOpenat2Once sync.Once - - testingForceHasOpenat2 *bool -) - -func hasOpenat2() bool { - if testing.Testing() && testingForceHasOpenat2 != nil { - return *testingForceHasOpenat2 - } - hasOpenat2Once.Do(func() { - fd, err := unix.Openat2(unix.AT_FDCWD, ".", &unix.OpenHow{ - Flags: unix.O_PATH | unix.O_CLOEXEC, - Resolve: unix.RESOLVE_NO_SYMLINKS | unix.RESOLVE_IN_ROOT, - }) - if err == nil { - hasOpenat2Bool = true - _ = unix.Close(fd) - } +var hasOpenat2 = sync.OnceValue(func() bool { + fd, err := unix.Openat2(unix.AT_FDCWD, ".", &unix.OpenHow{ + Flags: unix.O_PATH | unix.O_CLOEXEC, + Resolve: unix.RESOLVE_NO_SYMLINKS | unix.RESOLVE_IN_ROOT, }) - return hasOpenat2Bool -} + if err != nil { + return false + } + _ = unix.Close(fd) + return true +}) func scopedLookupShouldRetry(how *unix.OpenHow, err error) bool { // RESOLVE_IN_ROOT (and RESOLVE_BENEATH) can return -EAGAIN if we resolve diff --git a/vendor/github.com/cyphar/filepath-securejoin/openat_linux.go b/vendor/github.com/cyphar/filepath-securejoin/openat_linux.go index 949fb5f2..ac083f21 100644 --- a/vendor/github.com/cyphar/filepath-securejoin/openat_linux.go +++ b/vendor/github.com/cyphar/filepath-securejoin/openat_linux.go @@ -42,6 +42,10 @@ func fstatatFile(dir *os.File, path string, flags int) (unix.Stat_t, error) { return stat, nil } +func fstatFile(fd *os.File) (unix.Stat_t, error) { + return fstatatFile(fd, "", unix.AT_EMPTY_PATH) +} + func readlinkatFile(dir *os.File, path string) (string, error) { size := 4096 for { diff --git a/vendor/github.com/cyphar/filepath-securejoin/procfs_linux.go b/vendor/github.com/cyphar/filepath-securejoin/procfs_linux.go index adf0bd08..fa7929a5 100644 --- a/vendor/github.com/cyphar/filepath-securejoin/procfs_linux.go +++ b/vendor/github.com/cyphar/filepath-securejoin/procfs_linux.go @@ -54,33 +54,26 @@ func verifyProcRoot(procRoot *os.File) error { return nil } -var ( - hasNewMountApiBool bool - hasNewMountApiOnce sync.Once -) - -func hasNewMountApi() bool { - hasNewMountApiOnce.Do(func() { - // All of the pieces of the new mount API we use (fsopen, fsconfig, - // fsmount, open_tree) were added together in Linux 5.1[1,2], so we can - // just check for one of the syscalls and the others should also be - // available. - // - // Just try to use open_tree(2) to open a file without OPEN_TREE_CLONE. - // This is equivalent to openat(2), but tells us if open_tree is - // available (and thus all of the other basic new mount API syscalls). - // open_tree(2) is most light-weight syscall to test here. - // - // [1]: merge commit 400913252d09 - // [2]: - fd, err := unix.OpenTree(-int(unix.EBADF), "/", unix.OPEN_TREE_CLOEXEC) - if err == nil { - hasNewMountApiBool = true - _ = unix.Close(fd) - } - }) - return hasNewMountApiBool -} +var hasNewMountApi = sync.OnceValue(func() bool { + // All of the pieces of the new mount API we use (fsopen, fsconfig, + // fsmount, open_tree) were added together in Linux 5.1[1,2], so we can + // just check for one of the syscalls and the others should also be + // available. + // + // Just try to use open_tree(2) to open a file without OPEN_TREE_CLONE. + // This is equivalent to openat(2), but tells us if open_tree is + // available (and thus all of the other basic new mount API syscalls). + // open_tree(2) is most light-weight syscall to test here. + // + // [1]: merge commit 400913252d09 + // [2]: + fd, err := unix.OpenTree(-int(unix.EBADF), "/", unix.OPEN_TREE_CLOEXEC) + if err != nil { + return false + } + _ = unix.Close(fd) + return true +}) func fsopen(fsName string, flags int) (*os.File, error) { // Make sure we always set O_CLOEXEC. @@ -172,14 +165,6 @@ func privateProcRoot() (*os.File, error) { return procRoot, err } -var ( - procRootHandle *os.File - procRootError error - procRootOnce sync.Once - - errUnsafeProcfs = errors.New("unsafe procfs detected") -) - func unsafeHostProcRoot() (_ *os.File, Err error) { procRoot, err := os.OpenFile("/proc", unix.O_PATH|unix.O_NOFOLLOW|unix.O_DIRECTORY|unix.O_CLOEXEC, 0) if err != nil { @@ -207,17 +192,15 @@ func doGetProcRoot() (*os.File, error) { return procRoot, err } -func getProcRoot() (*os.File, error) { - procRootOnce.Do(func() { - procRootHandle, procRootError = doGetProcRoot() - }) - return procRootHandle, procRootError -} +var getProcRoot = sync.OnceValues(func() (*os.File, error) { + return doGetProcRoot() +}) -var ( - haveProcThreadSelf bool - haveProcThreadSelfOnce sync.Once -) +var hasProcThreadSelf = sync.OnceValue(func() bool { + return unix.Access("/proc/thread-self/", unix.F_OK) == nil +}) + +var errUnsafeProcfs = errors.New("unsafe procfs detected") type procThreadSelfCloser func() @@ -230,13 +213,6 @@ type procThreadSelfCloser func() // This is similar to ProcThreadSelf from runc, but with extra hardening // applied and using *os.File. func procThreadSelf(procRoot *os.File, subpath string) (_ *os.File, _ procThreadSelfCloser, Err error) { - haveProcThreadSelfOnce.Do(func() { - // If the kernel doesn't support thread-self, it doesn't matter which - // /proc handle we use. - _, err := fstatatFile(procRoot, "thread-self", unix.AT_SYMLINK_NOFOLLOW) - haveProcThreadSelf = (err == nil) - }) - // We need to lock our thread until the caller is done with the handle // because between getting the handle and using it we could get interrupted // by the Go runtime and hit the case where the underlying thread is @@ -251,7 +227,7 @@ func procThreadSelf(procRoot *os.File, subpath string) (_ *os.File, _ procThread // Figure out what prefix we want to use. threadSelf := "thread-self/" - if !haveProcThreadSelf || testingForceProcSelfTask() { + if !hasProcThreadSelf() || testingForceProcSelfTask() { /// Pre-3.17 kernels don't have /proc/thread-self, so do it manually. threadSelf = "self/task/" + strconv.Itoa(unix.Gettid()) + "/" if _, err := fstatatFile(procRoot, threadSelf, unix.AT_SYMLINK_NOFOLLOW); err != nil || testingForceProcSelf() { @@ -275,7 +251,7 @@ func procThreadSelf(procRoot *os.File, subpath string) (_ *os.File, _ procThread // absolutely sure we are operating on a clean /proc handle that // doesn't have any cheeky overmounts that could trick us (including // symlink mounts on top of /proc/thread-self). RESOLVE_BENEATH isn't - // stricly needed, but just use it since we have it. + // strictly needed, but just use it since we have it. // // NOTE: /proc/self is technically a magic-link (the contents of the // symlink are generated dynamically), but it doesn't use @@ -313,24 +289,16 @@ func procThreadSelf(procRoot *os.File, subpath string) (_ *os.File, _ procThread return handle, runtime.UnlockOSThread, nil } -var ( - hasStatxMountIdBool bool - hasStatxMountIdOnce sync.Once -) - -func hasStatxMountId() bool { - hasStatxMountIdOnce.Do(func() { - var ( - stx unix.Statx_t - // We don't care which mount ID we get. The kernel will give us the - // unique one if it is supported. - wantStxMask uint32 = unix.STATX_MNT_ID_UNIQUE | unix.STATX_MNT_ID - ) - err := unix.Statx(-int(unix.EBADF), "/", 0, int(wantStxMask), &stx) - hasStatxMountIdBool = (err == nil && (stx.Mask&wantStxMask != 0)) - }) - return hasStatxMountIdBool -} +var hasStatxMountId = sync.OnceValue(func() bool { + var ( + stx unix.Statx_t + // We don't care which mount ID we get. The kernel will give us the + // unique one if it is supported. + wantStxMask uint32 = unix.STATX_MNT_ID_UNIQUE | unix.STATX_MNT_ID + ) + err := unix.Statx(-int(unix.EBADF), "/", 0, int(wantStxMask), &stx) + return err == nil && stx.Mask&wantStxMask != 0 +}) func getMountId(dir *os.File, path string) (uint64, error) { // If we don't have statx(STATX_MNT_ID*) support, we can't do anything. @@ -443,22 +411,6 @@ func isDeadInode(file *os.File) error { return nil } -func getUmask() int { - // umask is a per-thread property, but it is inherited by children, so we - // need to lock our OS thread to make sure that no other goroutine runs in - // this thread and no goroutines are spawned from this thread until we - // revert to the old umask. - // - // We could parse /proc/self/status to avoid this get-set problem, but - // /proc/thread-self requires LockOSThread anyway, so there's no real - // benefit over just using umask(2). - runtime.LockOSThread() - umask := unix.Umask(0) - unix.Umask(umask) - runtime.UnlockOSThread() - return umask -} - func checkProcSelfFdPath(path string, file *os.File) error { if err := isDeadInode(file); err != nil { return err diff --git a/vendor/github.com/cyphar/filepath-securejoin/vfs.go b/vendor/github.com/cyphar/filepath-securejoin/vfs.go index 6e27c7dd..36373f8c 100644 --- a/vendor/github.com/cyphar/filepath-securejoin/vfs.go +++ b/vendor/github.com/cyphar/filepath-securejoin/vfs.go @@ -10,19 +10,19 @@ import "os" // are several projects (umoci and go-mtree) that are using this sort of // interface. -// VFS is the minimal interface necessary to use SecureJoinVFS. A nil VFS is -// equivalent to using the standard os.* family of functions. This is mainly +// VFS is the minimal interface necessary to use [SecureJoinVFS]. A nil VFS is +// equivalent to using the standard [os].* family of functions. This is mainly // used for the purposes of mock testing, but also can be used to otherwise use -// SecureJoin with VFS-like system. +// [SecureJoinVFS] with VFS-like system. type VFS interface { - // Lstat returns a FileInfo describing the named file. If the file is a - // symbolic link, the returned FileInfo describes the symbolic link. Lstat - // makes no attempt to follow the link. These semantics are identical to - // os.Lstat. + // Lstat returns an [os.FileInfo] describing the named file. If the + // file is a symbolic link, the returned [os.FileInfo] describes the + // symbolic link. Lstat makes no attempt to follow the link. + // The semantics are identical to [os.Lstat]. Lstat(name string) (os.FileInfo, error) - // Readlink returns the destination of the named symbolic link. These - // semantics are identical to os.Readlink. + // Readlink returns the destination of the named symbolic link. + // The semantics are identical to [os.Readlink]. Readlink(name string) (string, error) } @@ -30,12 +30,6 @@ type VFS interface { // module. type osVFS struct{} -// Lstat returns a FileInfo describing the named file. If the file is a -// symbolic link, the returned FileInfo describes the symbolic link. Lstat -// makes no attempt to follow the link. These semantics are identical to -// os.Lstat. func (o osVFS) Lstat(name string) (os.FileInfo, error) { return os.Lstat(name) } -// Readlink returns the destination of the named symbolic link. These -// semantics are identical to os.Readlink. func (o osVFS) Readlink(name string) (string, error) { return os.Readlink(name) } diff --git a/vendor/golang.org/x/crypto/argon2/blamka_amd64.s b/vendor/golang.org/x/crypto/argon2/blamka_amd64.s index 6713acca..c3895478 100644 --- a/vendor/golang.org/x/crypto/argon2/blamka_amd64.s +++ b/vendor/golang.org/x/crypto/argon2/blamka_amd64.s @@ -1,243 +1,2791 @@ -// Copyright 2017 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. +// Code generated by command: go run blamka_amd64.go -out ../blamka_amd64.s -pkg argon2. DO NOT EDIT. //go:build amd64 && gc && !purego #include "textflag.h" -DATA ·c40<>+0x00(SB)/8, $0x0201000706050403 -DATA ·c40<>+0x08(SB)/8, $0x0a09080f0e0d0c0b -GLOBL ·c40<>(SB), (NOPTR+RODATA), $16 - -DATA ·c48<>+0x00(SB)/8, $0x0100070605040302 -DATA ·c48<>+0x08(SB)/8, $0x09080f0e0d0c0b0a -GLOBL ·c48<>(SB), (NOPTR+RODATA), $16 - -#define SHUFFLE(v2, v3, v4, v5, v6, v7, t1, t2) \ - MOVO v4, t1; \ - MOVO v5, v4; \ - MOVO t1, v5; \ - MOVO v6, t1; \ - PUNPCKLQDQ v6, t2; \ - PUNPCKHQDQ v7, v6; \ - PUNPCKHQDQ t2, v6; \ - PUNPCKLQDQ v7, t2; \ - MOVO t1, v7; \ - MOVO v2, t1; \ - PUNPCKHQDQ t2, v7; \ - PUNPCKLQDQ v3, t2; \ - PUNPCKHQDQ t2, v2; \ - PUNPCKLQDQ t1, t2; \ - PUNPCKHQDQ t2, v3 - -#define SHUFFLE_INV(v2, v3, v4, v5, v6, v7, t1, t2) \ - MOVO v4, t1; \ - MOVO v5, v4; \ - MOVO t1, v5; \ - MOVO v2, t1; \ - PUNPCKLQDQ v2, t2; \ - PUNPCKHQDQ v3, v2; \ - PUNPCKHQDQ t2, v2; \ - PUNPCKLQDQ v3, t2; \ - MOVO t1, v3; \ - MOVO v6, t1; \ - PUNPCKHQDQ t2, v3; \ - PUNPCKLQDQ v7, t2; \ - PUNPCKHQDQ t2, v6; \ - PUNPCKLQDQ t1, t2; \ - PUNPCKHQDQ t2, v7 - -#define HALF_ROUND(v0, v1, v2, v3, v4, v5, v6, v7, t0, c40, c48) \ - MOVO v0, t0; \ - PMULULQ v2, t0; \ - PADDQ v2, v0; \ - PADDQ t0, v0; \ - PADDQ t0, v0; \ - PXOR v0, v6; \ - PSHUFD $0xB1, v6, v6; \ - MOVO v4, t0; \ - PMULULQ v6, t0; \ - PADDQ v6, v4; \ - PADDQ t0, v4; \ - PADDQ t0, v4; \ - PXOR v4, v2; \ - PSHUFB c40, v2; \ - MOVO v0, t0; \ - PMULULQ v2, t0; \ - PADDQ v2, v0; \ - PADDQ t0, v0; \ - PADDQ t0, v0; \ - PXOR v0, v6; \ - PSHUFB c48, v6; \ - MOVO v4, t0; \ - PMULULQ v6, t0; \ - PADDQ v6, v4; \ - PADDQ t0, v4; \ - PADDQ t0, v4; \ - PXOR v4, v2; \ - MOVO v2, t0; \ - PADDQ v2, t0; \ - PSRLQ $63, v2; \ - PXOR t0, v2; \ - MOVO v1, t0; \ - PMULULQ v3, t0; \ - PADDQ v3, v1; \ - PADDQ t0, v1; \ - PADDQ t0, v1; \ - PXOR v1, v7; \ - PSHUFD $0xB1, v7, v7; \ - MOVO v5, t0; \ - PMULULQ v7, t0; \ - PADDQ v7, v5; \ - PADDQ t0, v5; \ - PADDQ t0, v5; \ - PXOR v5, v3; \ - PSHUFB c40, v3; \ - MOVO v1, t0; \ - PMULULQ v3, t0; \ - PADDQ v3, v1; \ - PADDQ t0, v1; \ - PADDQ t0, v1; \ - PXOR v1, v7; \ - PSHUFB c48, v7; \ - MOVO v5, t0; \ - PMULULQ v7, t0; \ - PADDQ v7, v5; \ - PADDQ t0, v5; \ - PADDQ t0, v5; \ - PXOR v5, v3; \ - MOVO v3, t0; \ - PADDQ v3, t0; \ - PSRLQ $63, v3; \ - PXOR t0, v3 - -#define LOAD_MSG_0(block, off) \ - MOVOU 8*(off+0)(block), X0; \ - MOVOU 8*(off+2)(block), X1; \ - MOVOU 8*(off+4)(block), X2; \ - MOVOU 8*(off+6)(block), X3; \ - MOVOU 8*(off+8)(block), X4; \ - MOVOU 8*(off+10)(block), X5; \ - MOVOU 8*(off+12)(block), X6; \ - MOVOU 8*(off+14)(block), X7 - -#define STORE_MSG_0(block, off) \ - MOVOU X0, 8*(off+0)(block); \ - MOVOU X1, 8*(off+2)(block); \ - MOVOU X2, 8*(off+4)(block); \ - MOVOU X3, 8*(off+6)(block); \ - MOVOU X4, 8*(off+8)(block); \ - MOVOU X5, 8*(off+10)(block); \ - MOVOU X6, 8*(off+12)(block); \ - MOVOU X7, 8*(off+14)(block) - -#define LOAD_MSG_1(block, off) \ - MOVOU 8*off+0*8(block), X0; \ - MOVOU 8*off+16*8(block), X1; \ - MOVOU 8*off+32*8(block), X2; \ - MOVOU 8*off+48*8(block), X3; \ - MOVOU 8*off+64*8(block), X4; \ - MOVOU 8*off+80*8(block), X5; \ - MOVOU 8*off+96*8(block), X6; \ - MOVOU 8*off+112*8(block), X7 - -#define STORE_MSG_1(block, off) \ - MOVOU X0, 8*off+0*8(block); \ - MOVOU X1, 8*off+16*8(block); \ - MOVOU X2, 8*off+32*8(block); \ - MOVOU X3, 8*off+48*8(block); \ - MOVOU X4, 8*off+64*8(block); \ - MOVOU X5, 8*off+80*8(block); \ - MOVOU X6, 8*off+96*8(block); \ - MOVOU X7, 8*off+112*8(block) - -#define BLAMKA_ROUND_0(block, off, t0, t1, c40, c48) \ - LOAD_MSG_0(block, off); \ - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, t0, c40, c48); \ - SHUFFLE(X2, X3, X4, X5, X6, X7, t0, t1); \ - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, t0, c40, c48); \ - SHUFFLE_INV(X2, X3, X4, X5, X6, X7, t0, t1); \ - STORE_MSG_0(block, off) - -#define BLAMKA_ROUND_1(block, off, t0, t1, c40, c48) \ - LOAD_MSG_1(block, off); \ - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, t0, c40, c48); \ - SHUFFLE(X2, X3, X4, X5, X6, X7, t0, t1); \ - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, t0, c40, c48); \ - SHUFFLE_INV(X2, X3, X4, X5, X6, X7, t0, t1); \ - STORE_MSG_1(block, off) - // func blamkaSSE4(b *block) -TEXT ·blamkaSSE4(SB), 4, $0-8 - MOVQ b+0(FP), AX - - MOVOU ·c40<>(SB), X10 - MOVOU ·c48<>(SB), X11 +// Requires: SSE2, SSSE3 +TEXT ·blamkaSSE4(SB), NOSPLIT, $0-8 + MOVQ b+0(FP), AX + MOVOU ·c40<>+0(SB), X10 + MOVOU ·c48<>+0(SB), X11 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU 32(AX), X2 + MOVOU 48(AX), X3 + MOVOU 64(AX), X4 + MOVOU 80(AX), X5 + MOVOU 96(AX), X6 + MOVOU 112(AX), X7 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFD $0xb1, X6, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + PSHUFB X10, X2 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFB X11, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + MOVO X2, X8 + PADDQ X2, X8 + PSRLQ $0x3f, X2 + PXOR X8, X2 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFD $0xb1, X7, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + PSHUFB X10, X3 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFB X11, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + MOVO X3, X8 + PADDQ X3, X8 + PSRLQ $0x3f, X3 + PXOR X8, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X6, X8 + PUNPCKLQDQ X6, X9 + PUNPCKHQDQ X7, X6 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X7, X9 + MOVO X8, X7 + MOVO X2, X8 + PUNPCKHQDQ X9, X7 + PUNPCKLQDQ X3, X9 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X3 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFD $0xb1, X6, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + PSHUFB X10, X2 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFB X11, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + MOVO X2, X8 + PADDQ X2, X8 + PSRLQ $0x3f, X2 + PXOR X8, X2 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFD $0xb1, X7, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + PSHUFB X10, X3 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFB X11, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + MOVO X3, X8 + PADDQ X3, X8 + PSRLQ $0x3f, X3 + PXOR X8, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X2, X8 + PUNPCKLQDQ X2, X9 + PUNPCKHQDQ X3, X2 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X3, X9 + MOVO X8, X3 + MOVO X6, X8 + PUNPCKHQDQ X9, X3 + PUNPCKLQDQ X7, X9 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X7 + MOVOU X0, (AX) + MOVOU X1, 16(AX) + MOVOU X2, 32(AX) + MOVOU X3, 48(AX) + MOVOU X4, 64(AX) + MOVOU X5, 80(AX) + MOVOU X6, 96(AX) + MOVOU X7, 112(AX) + MOVOU 128(AX), X0 + MOVOU 144(AX), X1 + MOVOU 160(AX), X2 + MOVOU 176(AX), X3 + MOVOU 192(AX), X4 + MOVOU 208(AX), X5 + MOVOU 224(AX), X6 + MOVOU 240(AX), X7 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFD $0xb1, X6, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + PSHUFB X10, X2 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFB X11, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + MOVO X2, X8 + PADDQ X2, X8 + PSRLQ $0x3f, X2 + PXOR X8, X2 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFD $0xb1, X7, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + PSHUFB X10, X3 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFB X11, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + MOVO X3, X8 + PADDQ X3, X8 + PSRLQ $0x3f, X3 + PXOR X8, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X6, X8 + PUNPCKLQDQ X6, X9 + PUNPCKHQDQ X7, X6 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X7, X9 + MOVO X8, X7 + MOVO X2, X8 + PUNPCKHQDQ X9, X7 + PUNPCKLQDQ X3, X9 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X3 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFD $0xb1, X6, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + PSHUFB X10, X2 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFB X11, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + MOVO X2, X8 + PADDQ X2, X8 + PSRLQ $0x3f, X2 + PXOR X8, X2 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFD $0xb1, X7, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + PSHUFB X10, X3 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFB X11, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + MOVO X3, X8 + PADDQ X3, X8 + PSRLQ $0x3f, X3 + PXOR X8, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X2, X8 + PUNPCKLQDQ X2, X9 + PUNPCKHQDQ X3, X2 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X3, X9 + MOVO X8, X3 + MOVO X6, X8 + PUNPCKHQDQ X9, X3 + PUNPCKLQDQ X7, X9 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X7 + MOVOU X0, 128(AX) + MOVOU X1, 144(AX) + MOVOU X2, 160(AX) + MOVOU X3, 176(AX) + MOVOU X4, 192(AX) + MOVOU X5, 208(AX) + MOVOU X6, 224(AX) + MOVOU X7, 240(AX) + MOVOU 256(AX), X0 + MOVOU 272(AX), X1 + MOVOU 288(AX), X2 + MOVOU 304(AX), X3 + MOVOU 320(AX), X4 + MOVOU 336(AX), X5 + MOVOU 352(AX), X6 + MOVOU 368(AX), X7 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFD $0xb1, X6, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + PSHUFB X10, X2 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFB X11, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + MOVO X2, X8 + PADDQ X2, X8 + PSRLQ $0x3f, X2 + PXOR X8, X2 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFD $0xb1, X7, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + PSHUFB X10, X3 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFB X11, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + MOVO X3, X8 + PADDQ X3, X8 + PSRLQ $0x3f, X3 + PXOR X8, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X6, X8 + PUNPCKLQDQ X6, X9 + PUNPCKHQDQ X7, X6 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X7, X9 + MOVO X8, X7 + MOVO X2, X8 + PUNPCKHQDQ X9, X7 + PUNPCKLQDQ X3, X9 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X3 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFD $0xb1, X6, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + PSHUFB X10, X2 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFB X11, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + MOVO X2, X8 + PADDQ X2, X8 + PSRLQ $0x3f, X2 + PXOR X8, X2 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFD $0xb1, X7, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + PSHUFB X10, X3 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFB X11, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + MOVO X3, X8 + PADDQ X3, X8 + PSRLQ $0x3f, X3 + PXOR X8, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X2, X8 + PUNPCKLQDQ X2, X9 + PUNPCKHQDQ X3, X2 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X3, X9 + MOVO X8, X3 + MOVO X6, X8 + PUNPCKHQDQ X9, X3 + PUNPCKLQDQ X7, X9 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X7 + MOVOU X0, 256(AX) + MOVOU X1, 272(AX) + MOVOU X2, 288(AX) + MOVOU X3, 304(AX) + MOVOU X4, 320(AX) + MOVOU X5, 336(AX) + MOVOU X6, 352(AX) + MOVOU X7, 368(AX) + MOVOU 384(AX), X0 + MOVOU 400(AX), X1 + MOVOU 416(AX), X2 + MOVOU 432(AX), X3 + MOVOU 448(AX), X4 + MOVOU 464(AX), X5 + MOVOU 480(AX), X6 + MOVOU 496(AX), X7 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFD $0xb1, X6, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + PSHUFB X10, X2 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFB X11, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + MOVO X2, X8 + PADDQ X2, X8 + PSRLQ $0x3f, X2 + PXOR X8, X2 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFD $0xb1, X7, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + PSHUFB X10, X3 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFB X11, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + MOVO X3, X8 + PADDQ X3, X8 + PSRLQ $0x3f, X3 + PXOR X8, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X6, X8 + PUNPCKLQDQ X6, X9 + PUNPCKHQDQ X7, X6 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X7, X9 + MOVO X8, X7 + MOVO X2, X8 + PUNPCKHQDQ X9, X7 + PUNPCKLQDQ X3, X9 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X3 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFD $0xb1, X6, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + PSHUFB X10, X2 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFB X11, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + MOVO X2, X8 + PADDQ X2, X8 + PSRLQ $0x3f, X2 + PXOR X8, X2 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFD $0xb1, X7, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + PSHUFB X10, X3 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFB X11, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + MOVO X3, X8 + PADDQ X3, X8 + PSRLQ $0x3f, X3 + PXOR X8, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X2, X8 + PUNPCKLQDQ X2, X9 + PUNPCKHQDQ X3, X2 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X3, X9 + MOVO X8, X3 + MOVO X6, X8 + PUNPCKHQDQ X9, X3 + PUNPCKLQDQ X7, X9 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X7 + MOVOU X0, 384(AX) + MOVOU X1, 400(AX) + MOVOU X2, 416(AX) + MOVOU X3, 432(AX) + MOVOU X4, 448(AX) + MOVOU X5, 464(AX) + MOVOU X6, 480(AX) + MOVOU X7, 496(AX) + MOVOU 512(AX), X0 + MOVOU 528(AX), X1 + MOVOU 544(AX), X2 + MOVOU 560(AX), X3 + MOVOU 576(AX), X4 + MOVOU 592(AX), X5 + MOVOU 608(AX), X6 + MOVOU 624(AX), X7 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFD $0xb1, X6, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + PSHUFB X10, X2 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFB X11, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + MOVO X2, X8 + PADDQ X2, X8 + PSRLQ $0x3f, X2 + PXOR X8, X2 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFD $0xb1, X7, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + PSHUFB X10, X3 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFB X11, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + MOVO X3, X8 + PADDQ X3, X8 + PSRLQ $0x3f, X3 + PXOR X8, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X6, X8 + PUNPCKLQDQ X6, X9 + PUNPCKHQDQ X7, X6 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X7, X9 + MOVO X8, X7 + MOVO X2, X8 + PUNPCKHQDQ X9, X7 + PUNPCKLQDQ X3, X9 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X3 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFD $0xb1, X6, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + PSHUFB X10, X2 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFB X11, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + MOVO X2, X8 + PADDQ X2, X8 + PSRLQ $0x3f, X2 + PXOR X8, X2 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFD $0xb1, X7, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + PSHUFB X10, X3 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFB X11, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + MOVO X3, X8 + PADDQ X3, X8 + PSRLQ $0x3f, X3 + PXOR X8, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X2, X8 + PUNPCKLQDQ X2, X9 + PUNPCKHQDQ X3, X2 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X3, X9 + MOVO X8, X3 + MOVO X6, X8 + PUNPCKHQDQ X9, X3 + PUNPCKLQDQ X7, X9 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X7 + MOVOU X0, 512(AX) + MOVOU X1, 528(AX) + MOVOU X2, 544(AX) + MOVOU X3, 560(AX) + MOVOU X4, 576(AX) + MOVOU X5, 592(AX) + MOVOU X6, 608(AX) + MOVOU X7, 624(AX) + MOVOU 640(AX), X0 + MOVOU 656(AX), X1 + MOVOU 672(AX), X2 + MOVOU 688(AX), X3 + MOVOU 704(AX), X4 + MOVOU 720(AX), X5 + MOVOU 736(AX), X6 + MOVOU 752(AX), X7 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFD $0xb1, X6, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + PSHUFB X10, X2 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFB X11, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + MOVO X2, X8 + PADDQ X2, X8 + PSRLQ $0x3f, X2 + PXOR X8, X2 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFD $0xb1, X7, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + PSHUFB X10, X3 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFB X11, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + MOVO X3, X8 + PADDQ X3, X8 + PSRLQ $0x3f, X3 + PXOR X8, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X6, X8 + PUNPCKLQDQ X6, X9 + PUNPCKHQDQ X7, X6 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X7, X9 + MOVO X8, X7 + MOVO X2, X8 + PUNPCKHQDQ X9, X7 + PUNPCKLQDQ X3, X9 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X3 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFD $0xb1, X6, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + PSHUFB X10, X2 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFB X11, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + MOVO X2, X8 + PADDQ X2, X8 + PSRLQ $0x3f, X2 + PXOR X8, X2 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFD $0xb1, X7, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + PSHUFB X10, X3 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFB X11, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + MOVO X3, X8 + PADDQ X3, X8 + PSRLQ $0x3f, X3 + PXOR X8, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X2, X8 + PUNPCKLQDQ X2, X9 + PUNPCKHQDQ X3, X2 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X3, X9 + MOVO X8, X3 + MOVO X6, X8 + PUNPCKHQDQ X9, X3 + PUNPCKLQDQ X7, X9 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X7 + MOVOU X0, 640(AX) + MOVOU X1, 656(AX) + MOVOU X2, 672(AX) + MOVOU X3, 688(AX) + MOVOU X4, 704(AX) + MOVOU X5, 720(AX) + MOVOU X6, 736(AX) + MOVOU X7, 752(AX) + MOVOU 768(AX), X0 + MOVOU 784(AX), X1 + MOVOU 800(AX), X2 + MOVOU 816(AX), X3 + MOVOU 832(AX), X4 + MOVOU 848(AX), X5 + MOVOU 864(AX), X6 + MOVOU 880(AX), X7 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFD $0xb1, X6, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + PSHUFB X10, X2 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFB X11, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + MOVO X2, X8 + PADDQ X2, X8 + PSRLQ $0x3f, X2 + PXOR X8, X2 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFD $0xb1, X7, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + PSHUFB X10, X3 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFB X11, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + MOVO X3, X8 + PADDQ X3, X8 + PSRLQ $0x3f, X3 + PXOR X8, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X6, X8 + PUNPCKLQDQ X6, X9 + PUNPCKHQDQ X7, X6 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X7, X9 + MOVO X8, X7 + MOVO X2, X8 + PUNPCKHQDQ X9, X7 + PUNPCKLQDQ X3, X9 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X3 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFD $0xb1, X6, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + PSHUFB X10, X2 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFB X11, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + MOVO X2, X8 + PADDQ X2, X8 + PSRLQ $0x3f, X2 + PXOR X8, X2 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFD $0xb1, X7, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + PSHUFB X10, X3 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFB X11, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + MOVO X3, X8 + PADDQ X3, X8 + PSRLQ $0x3f, X3 + PXOR X8, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X2, X8 + PUNPCKLQDQ X2, X9 + PUNPCKHQDQ X3, X2 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X3, X9 + MOVO X8, X3 + MOVO X6, X8 + PUNPCKHQDQ X9, X3 + PUNPCKLQDQ X7, X9 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X7 + MOVOU X0, 768(AX) + MOVOU X1, 784(AX) + MOVOU X2, 800(AX) + MOVOU X3, 816(AX) + MOVOU X4, 832(AX) + MOVOU X5, 848(AX) + MOVOU X6, 864(AX) + MOVOU X7, 880(AX) + MOVOU 896(AX), X0 + MOVOU 912(AX), X1 + MOVOU 928(AX), X2 + MOVOU 944(AX), X3 + MOVOU 960(AX), X4 + MOVOU 976(AX), X5 + MOVOU 992(AX), X6 + MOVOU 1008(AX), X7 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFD $0xb1, X6, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + PSHUFB X10, X2 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFB X11, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + MOVO X2, X8 + PADDQ X2, X8 + PSRLQ $0x3f, X2 + PXOR X8, X2 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFD $0xb1, X7, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + PSHUFB X10, X3 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFB X11, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + MOVO X3, X8 + PADDQ X3, X8 + PSRLQ $0x3f, X3 + PXOR X8, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X6, X8 + PUNPCKLQDQ X6, X9 + PUNPCKHQDQ X7, X6 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X7, X9 + MOVO X8, X7 + MOVO X2, X8 + PUNPCKHQDQ X9, X7 + PUNPCKLQDQ X3, X9 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X3 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFD $0xb1, X6, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + PSHUFB X10, X2 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFB X11, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + MOVO X2, X8 + PADDQ X2, X8 + PSRLQ $0x3f, X2 + PXOR X8, X2 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFD $0xb1, X7, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + PSHUFB X10, X3 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFB X11, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + MOVO X3, X8 + PADDQ X3, X8 + PSRLQ $0x3f, X3 + PXOR X8, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X2, X8 + PUNPCKLQDQ X2, X9 + PUNPCKHQDQ X3, X2 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X3, X9 + MOVO X8, X3 + MOVO X6, X8 + PUNPCKHQDQ X9, X3 + PUNPCKLQDQ X7, X9 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X7 + MOVOU X0, 896(AX) + MOVOU X1, 912(AX) + MOVOU X2, 928(AX) + MOVOU X3, 944(AX) + MOVOU X4, 960(AX) + MOVOU X5, 976(AX) + MOVOU X6, 992(AX) + MOVOU X7, 1008(AX) + MOVOU (AX), X0 + MOVOU 128(AX), X1 + MOVOU 256(AX), X2 + MOVOU 384(AX), X3 + MOVOU 512(AX), X4 + MOVOU 640(AX), X5 + MOVOU 768(AX), X6 + MOVOU 896(AX), X7 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFD $0xb1, X6, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + PSHUFB X10, X2 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFB X11, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + MOVO X2, X8 + PADDQ X2, X8 + PSRLQ $0x3f, X2 + PXOR X8, X2 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFD $0xb1, X7, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + PSHUFB X10, X3 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFB X11, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + MOVO X3, X8 + PADDQ X3, X8 + PSRLQ $0x3f, X3 + PXOR X8, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X6, X8 + PUNPCKLQDQ X6, X9 + PUNPCKHQDQ X7, X6 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X7, X9 + MOVO X8, X7 + MOVO X2, X8 + PUNPCKHQDQ X9, X7 + PUNPCKLQDQ X3, X9 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X3 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFD $0xb1, X6, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + PSHUFB X10, X2 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFB X11, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + MOVO X2, X8 + PADDQ X2, X8 + PSRLQ $0x3f, X2 + PXOR X8, X2 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFD $0xb1, X7, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + PSHUFB X10, X3 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFB X11, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + MOVO X3, X8 + PADDQ X3, X8 + PSRLQ $0x3f, X3 + PXOR X8, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X2, X8 + PUNPCKLQDQ X2, X9 + PUNPCKHQDQ X3, X2 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X3, X9 + MOVO X8, X3 + MOVO X6, X8 + PUNPCKHQDQ X9, X3 + PUNPCKLQDQ X7, X9 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X7 + MOVOU X0, (AX) + MOVOU X1, 128(AX) + MOVOU X2, 256(AX) + MOVOU X3, 384(AX) + MOVOU X4, 512(AX) + MOVOU X5, 640(AX) + MOVOU X6, 768(AX) + MOVOU X7, 896(AX) + MOVOU 16(AX), X0 + MOVOU 144(AX), X1 + MOVOU 272(AX), X2 + MOVOU 400(AX), X3 + MOVOU 528(AX), X4 + MOVOU 656(AX), X5 + MOVOU 784(AX), X6 + MOVOU 912(AX), X7 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFD $0xb1, X6, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + PSHUFB X10, X2 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFB X11, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + MOVO X2, X8 + PADDQ X2, X8 + PSRLQ $0x3f, X2 + PXOR X8, X2 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFD $0xb1, X7, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + PSHUFB X10, X3 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFB X11, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + MOVO X3, X8 + PADDQ X3, X8 + PSRLQ $0x3f, X3 + PXOR X8, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X6, X8 + PUNPCKLQDQ X6, X9 + PUNPCKHQDQ X7, X6 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X7, X9 + MOVO X8, X7 + MOVO X2, X8 + PUNPCKHQDQ X9, X7 + PUNPCKLQDQ X3, X9 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X3 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFD $0xb1, X6, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + PSHUFB X10, X2 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFB X11, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + MOVO X2, X8 + PADDQ X2, X8 + PSRLQ $0x3f, X2 + PXOR X8, X2 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFD $0xb1, X7, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + PSHUFB X10, X3 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFB X11, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + MOVO X3, X8 + PADDQ X3, X8 + PSRLQ $0x3f, X3 + PXOR X8, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X2, X8 + PUNPCKLQDQ X2, X9 + PUNPCKHQDQ X3, X2 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X3, X9 + MOVO X8, X3 + MOVO X6, X8 + PUNPCKHQDQ X9, X3 + PUNPCKLQDQ X7, X9 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X7 + MOVOU X0, 16(AX) + MOVOU X1, 144(AX) + MOVOU X2, 272(AX) + MOVOU X3, 400(AX) + MOVOU X4, 528(AX) + MOVOU X5, 656(AX) + MOVOU X6, 784(AX) + MOVOU X7, 912(AX) + MOVOU 32(AX), X0 + MOVOU 160(AX), X1 + MOVOU 288(AX), X2 + MOVOU 416(AX), X3 + MOVOU 544(AX), X4 + MOVOU 672(AX), X5 + MOVOU 800(AX), X6 + MOVOU 928(AX), X7 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFD $0xb1, X6, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + PSHUFB X10, X2 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFB X11, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + MOVO X2, X8 + PADDQ X2, X8 + PSRLQ $0x3f, X2 + PXOR X8, X2 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFD $0xb1, X7, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + PSHUFB X10, X3 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFB X11, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + MOVO X3, X8 + PADDQ X3, X8 + PSRLQ $0x3f, X3 + PXOR X8, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X6, X8 + PUNPCKLQDQ X6, X9 + PUNPCKHQDQ X7, X6 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X7, X9 + MOVO X8, X7 + MOVO X2, X8 + PUNPCKHQDQ X9, X7 + PUNPCKLQDQ X3, X9 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X3 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFD $0xb1, X6, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + PSHUFB X10, X2 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFB X11, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + MOVO X2, X8 + PADDQ X2, X8 + PSRLQ $0x3f, X2 + PXOR X8, X2 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFD $0xb1, X7, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + PSHUFB X10, X3 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFB X11, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + MOVO X3, X8 + PADDQ X3, X8 + PSRLQ $0x3f, X3 + PXOR X8, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X2, X8 + PUNPCKLQDQ X2, X9 + PUNPCKHQDQ X3, X2 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X3, X9 + MOVO X8, X3 + MOVO X6, X8 + PUNPCKHQDQ X9, X3 + PUNPCKLQDQ X7, X9 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X7 + MOVOU X0, 32(AX) + MOVOU X1, 160(AX) + MOVOU X2, 288(AX) + MOVOU X3, 416(AX) + MOVOU X4, 544(AX) + MOVOU X5, 672(AX) + MOVOU X6, 800(AX) + MOVOU X7, 928(AX) + MOVOU 48(AX), X0 + MOVOU 176(AX), X1 + MOVOU 304(AX), X2 + MOVOU 432(AX), X3 + MOVOU 560(AX), X4 + MOVOU 688(AX), X5 + MOVOU 816(AX), X6 + MOVOU 944(AX), X7 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFD $0xb1, X6, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + PSHUFB X10, X2 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFB X11, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + MOVO X2, X8 + PADDQ X2, X8 + PSRLQ $0x3f, X2 + PXOR X8, X2 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFD $0xb1, X7, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + PSHUFB X10, X3 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFB X11, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + MOVO X3, X8 + PADDQ X3, X8 + PSRLQ $0x3f, X3 + PXOR X8, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X6, X8 + PUNPCKLQDQ X6, X9 + PUNPCKHQDQ X7, X6 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X7, X9 + MOVO X8, X7 + MOVO X2, X8 + PUNPCKHQDQ X9, X7 + PUNPCKLQDQ X3, X9 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X3 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFD $0xb1, X6, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + PSHUFB X10, X2 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFB X11, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + MOVO X2, X8 + PADDQ X2, X8 + PSRLQ $0x3f, X2 + PXOR X8, X2 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFD $0xb1, X7, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + PSHUFB X10, X3 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFB X11, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + MOVO X3, X8 + PADDQ X3, X8 + PSRLQ $0x3f, X3 + PXOR X8, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X2, X8 + PUNPCKLQDQ X2, X9 + PUNPCKHQDQ X3, X2 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X3, X9 + MOVO X8, X3 + MOVO X6, X8 + PUNPCKHQDQ X9, X3 + PUNPCKLQDQ X7, X9 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X7 + MOVOU X0, 48(AX) + MOVOU X1, 176(AX) + MOVOU X2, 304(AX) + MOVOU X3, 432(AX) + MOVOU X4, 560(AX) + MOVOU X5, 688(AX) + MOVOU X6, 816(AX) + MOVOU X7, 944(AX) + MOVOU 64(AX), X0 + MOVOU 192(AX), X1 + MOVOU 320(AX), X2 + MOVOU 448(AX), X3 + MOVOU 576(AX), X4 + MOVOU 704(AX), X5 + MOVOU 832(AX), X6 + MOVOU 960(AX), X7 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFD $0xb1, X6, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + PSHUFB X10, X2 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFB X11, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + MOVO X2, X8 + PADDQ X2, X8 + PSRLQ $0x3f, X2 + PXOR X8, X2 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFD $0xb1, X7, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + PSHUFB X10, X3 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFB X11, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + MOVO X3, X8 + PADDQ X3, X8 + PSRLQ $0x3f, X3 + PXOR X8, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X6, X8 + PUNPCKLQDQ X6, X9 + PUNPCKHQDQ X7, X6 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X7, X9 + MOVO X8, X7 + MOVO X2, X8 + PUNPCKHQDQ X9, X7 + PUNPCKLQDQ X3, X9 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X3 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFD $0xb1, X6, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + PSHUFB X10, X2 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFB X11, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + MOVO X2, X8 + PADDQ X2, X8 + PSRLQ $0x3f, X2 + PXOR X8, X2 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFD $0xb1, X7, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + PSHUFB X10, X3 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFB X11, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + MOVO X3, X8 + PADDQ X3, X8 + PSRLQ $0x3f, X3 + PXOR X8, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X2, X8 + PUNPCKLQDQ X2, X9 + PUNPCKHQDQ X3, X2 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X3, X9 + MOVO X8, X3 + MOVO X6, X8 + PUNPCKHQDQ X9, X3 + PUNPCKLQDQ X7, X9 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X7 + MOVOU X0, 64(AX) + MOVOU X1, 192(AX) + MOVOU X2, 320(AX) + MOVOU X3, 448(AX) + MOVOU X4, 576(AX) + MOVOU X5, 704(AX) + MOVOU X6, 832(AX) + MOVOU X7, 960(AX) + MOVOU 80(AX), X0 + MOVOU 208(AX), X1 + MOVOU 336(AX), X2 + MOVOU 464(AX), X3 + MOVOU 592(AX), X4 + MOVOU 720(AX), X5 + MOVOU 848(AX), X6 + MOVOU 976(AX), X7 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFD $0xb1, X6, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + PSHUFB X10, X2 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFB X11, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + MOVO X2, X8 + PADDQ X2, X8 + PSRLQ $0x3f, X2 + PXOR X8, X2 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFD $0xb1, X7, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + PSHUFB X10, X3 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFB X11, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + MOVO X3, X8 + PADDQ X3, X8 + PSRLQ $0x3f, X3 + PXOR X8, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X6, X8 + PUNPCKLQDQ X6, X9 + PUNPCKHQDQ X7, X6 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X7, X9 + MOVO X8, X7 + MOVO X2, X8 + PUNPCKHQDQ X9, X7 + PUNPCKLQDQ X3, X9 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X3 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFD $0xb1, X6, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + PSHUFB X10, X2 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFB X11, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + MOVO X2, X8 + PADDQ X2, X8 + PSRLQ $0x3f, X2 + PXOR X8, X2 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFD $0xb1, X7, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + PSHUFB X10, X3 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFB X11, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + MOVO X3, X8 + PADDQ X3, X8 + PSRLQ $0x3f, X3 + PXOR X8, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X2, X8 + PUNPCKLQDQ X2, X9 + PUNPCKHQDQ X3, X2 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X3, X9 + MOVO X8, X3 + MOVO X6, X8 + PUNPCKHQDQ X9, X3 + PUNPCKLQDQ X7, X9 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X7 + MOVOU X0, 80(AX) + MOVOU X1, 208(AX) + MOVOU X2, 336(AX) + MOVOU X3, 464(AX) + MOVOU X4, 592(AX) + MOVOU X5, 720(AX) + MOVOU X6, 848(AX) + MOVOU X7, 976(AX) + MOVOU 96(AX), X0 + MOVOU 224(AX), X1 + MOVOU 352(AX), X2 + MOVOU 480(AX), X3 + MOVOU 608(AX), X4 + MOVOU 736(AX), X5 + MOVOU 864(AX), X6 + MOVOU 992(AX), X7 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFD $0xb1, X6, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + PSHUFB X10, X2 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFB X11, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + MOVO X2, X8 + PADDQ X2, X8 + PSRLQ $0x3f, X2 + PXOR X8, X2 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFD $0xb1, X7, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + PSHUFB X10, X3 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFB X11, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + MOVO X3, X8 + PADDQ X3, X8 + PSRLQ $0x3f, X3 + PXOR X8, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X6, X8 + PUNPCKLQDQ X6, X9 + PUNPCKHQDQ X7, X6 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X7, X9 + MOVO X8, X7 + MOVO X2, X8 + PUNPCKHQDQ X9, X7 + PUNPCKLQDQ X3, X9 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X3 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFD $0xb1, X6, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + PSHUFB X10, X2 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFB X11, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + MOVO X2, X8 + PADDQ X2, X8 + PSRLQ $0x3f, X2 + PXOR X8, X2 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFD $0xb1, X7, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + PSHUFB X10, X3 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFB X11, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + MOVO X3, X8 + PADDQ X3, X8 + PSRLQ $0x3f, X3 + PXOR X8, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X2, X8 + PUNPCKLQDQ X2, X9 + PUNPCKHQDQ X3, X2 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X3, X9 + MOVO X8, X3 + MOVO X6, X8 + PUNPCKHQDQ X9, X3 + PUNPCKLQDQ X7, X9 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X7 + MOVOU X0, 96(AX) + MOVOU X1, 224(AX) + MOVOU X2, 352(AX) + MOVOU X3, 480(AX) + MOVOU X4, 608(AX) + MOVOU X5, 736(AX) + MOVOU X6, 864(AX) + MOVOU X7, 992(AX) + MOVOU 112(AX), X0 + MOVOU 240(AX), X1 + MOVOU 368(AX), X2 + MOVOU 496(AX), X3 + MOVOU 624(AX), X4 + MOVOU 752(AX), X5 + MOVOU 880(AX), X6 + MOVOU 1008(AX), X7 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFD $0xb1, X6, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + PSHUFB X10, X2 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFB X11, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + MOVO X2, X8 + PADDQ X2, X8 + PSRLQ $0x3f, X2 + PXOR X8, X2 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFD $0xb1, X7, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + PSHUFB X10, X3 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFB X11, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + MOVO X3, X8 + PADDQ X3, X8 + PSRLQ $0x3f, X3 + PXOR X8, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X6, X8 + PUNPCKLQDQ X6, X9 + PUNPCKHQDQ X7, X6 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X7, X9 + MOVO X8, X7 + MOVO X2, X8 + PUNPCKHQDQ X9, X7 + PUNPCKLQDQ X3, X9 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X3 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFD $0xb1, X6, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + PSHUFB X10, X2 + MOVO X0, X8 + PMULULQ X2, X8 + PADDQ X2, X0 + PADDQ X8, X0 + PADDQ X8, X0 + PXOR X0, X6 + PSHUFB X11, X6 + MOVO X4, X8 + PMULULQ X6, X8 + PADDQ X6, X4 + PADDQ X8, X4 + PADDQ X8, X4 + PXOR X4, X2 + MOVO X2, X8 + PADDQ X2, X8 + PSRLQ $0x3f, X2 + PXOR X8, X2 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFD $0xb1, X7, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + PSHUFB X10, X3 + MOVO X1, X8 + PMULULQ X3, X8 + PADDQ X3, X1 + PADDQ X8, X1 + PADDQ X8, X1 + PXOR X1, X7 + PSHUFB X11, X7 + MOVO X5, X8 + PMULULQ X7, X8 + PADDQ X7, X5 + PADDQ X8, X5 + PADDQ X8, X5 + PXOR X5, X3 + MOVO X3, X8 + PADDQ X3, X8 + PSRLQ $0x3f, X3 + PXOR X8, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X2, X8 + PUNPCKLQDQ X2, X9 + PUNPCKHQDQ X3, X2 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X3, X9 + MOVO X8, X3 + MOVO X6, X8 + PUNPCKHQDQ X9, X3 + PUNPCKLQDQ X7, X9 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X7 + MOVOU X0, 112(AX) + MOVOU X1, 240(AX) + MOVOU X2, 368(AX) + MOVOU X3, 496(AX) + MOVOU X4, 624(AX) + MOVOU X5, 752(AX) + MOVOU X6, 880(AX) + MOVOU X7, 1008(AX) + RET - BLAMKA_ROUND_0(AX, 0, X8, X9, X10, X11) - BLAMKA_ROUND_0(AX, 16, X8, X9, X10, X11) - BLAMKA_ROUND_0(AX, 32, X8, X9, X10, X11) - BLAMKA_ROUND_0(AX, 48, X8, X9, X10, X11) - BLAMKA_ROUND_0(AX, 64, X8, X9, X10, X11) - BLAMKA_ROUND_0(AX, 80, X8, X9, X10, X11) - BLAMKA_ROUND_0(AX, 96, X8, X9, X10, X11) - BLAMKA_ROUND_0(AX, 112, X8, X9, X10, X11) +DATA ·c40<>+0(SB)/8, $0x0201000706050403 +DATA ·c40<>+8(SB)/8, $0x0a09080f0e0d0c0b +GLOBL ·c40<>(SB), RODATA|NOPTR, $16 - BLAMKA_ROUND_1(AX, 0, X8, X9, X10, X11) - BLAMKA_ROUND_1(AX, 2, X8, X9, X10, X11) - BLAMKA_ROUND_1(AX, 4, X8, X9, X10, X11) - BLAMKA_ROUND_1(AX, 6, X8, X9, X10, X11) - BLAMKA_ROUND_1(AX, 8, X8, X9, X10, X11) - BLAMKA_ROUND_1(AX, 10, X8, X9, X10, X11) - BLAMKA_ROUND_1(AX, 12, X8, X9, X10, X11) - BLAMKA_ROUND_1(AX, 14, X8, X9, X10, X11) - RET +DATA ·c48<>+0(SB)/8, $0x0100070605040302 +DATA ·c48<>+8(SB)/8, $0x09080f0e0d0c0b0a +GLOBL ·c48<>(SB), RODATA|NOPTR, $16 -// func mixBlocksSSE2(out, a, b, c *block) -TEXT ·mixBlocksSSE2(SB), 4, $0-32 +// func mixBlocksSSE2(out *block, a *block, b *block, c *block) +// Requires: SSE2 +TEXT ·mixBlocksSSE2(SB), NOSPLIT, $0-32 MOVQ out+0(FP), DX MOVQ a+8(FP), AX MOVQ b+16(FP), BX MOVQ c+24(FP), CX - MOVQ $128, DI + MOVQ $0x00000080, DI loop: - MOVOU 0(AX), X0 - MOVOU 0(BX), X1 - MOVOU 0(CX), X2 + MOVOU (AX), X0 + MOVOU (BX), X1 + MOVOU (CX), X2 PXOR X1, X0 PXOR X2, X0 - MOVOU X0, 0(DX) - ADDQ $16, AX - ADDQ $16, BX - ADDQ $16, CX - ADDQ $16, DX - SUBQ $2, DI + MOVOU X0, (DX) + ADDQ $0x10, AX + ADDQ $0x10, BX + ADDQ $0x10, CX + ADDQ $0x10, DX + SUBQ $0x02, DI JA loop RET -// func xorBlocksSSE2(out, a, b, c *block) -TEXT ·xorBlocksSSE2(SB), 4, $0-32 +// func xorBlocksSSE2(out *block, a *block, b *block, c *block) +// Requires: SSE2 +TEXT ·xorBlocksSSE2(SB), NOSPLIT, $0-32 MOVQ out+0(FP), DX MOVQ a+8(FP), AX MOVQ b+16(FP), BX MOVQ c+24(FP), CX - MOVQ $128, DI + MOVQ $0x00000080, DI loop: - MOVOU 0(AX), X0 - MOVOU 0(BX), X1 - MOVOU 0(CX), X2 - MOVOU 0(DX), X3 + MOVOU (AX), X0 + MOVOU (BX), X1 + MOVOU (CX), X2 + MOVOU (DX), X3 PXOR X1, X0 PXOR X2, X0 PXOR X3, X0 - MOVOU X0, 0(DX) - ADDQ $16, AX - ADDQ $16, BX - ADDQ $16, CX - ADDQ $16, DX - SUBQ $2, DI + MOVOU X0, (DX) + ADDQ $0x10, AX + ADDQ $0x10, BX + ADDQ $0x10, CX + ADDQ $0x10, DX + SUBQ $0x02, DI JA loop RET diff --git a/vendor/golang.org/x/crypto/blake2b/blake2bAVX2_amd64.s b/vendor/golang.org/x/crypto/blake2b/blake2bAVX2_amd64.s index 9ae8206c..f75162e0 100644 --- a/vendor/golang.org/x/crypto/blake2b/blake2bAVX2_amd64.s +++ b/vendor/golang.org/x/crypto/blake2b/blake2bAVX2_amd64.s @@ -1,722 +1,4517 @@ -// Copyright 2016 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. +// Code generated by command: go run blake2bAVX2_amd64_asm.go -out ../../blake2bAVX2_amd64.s -pkg blake2b. DO NOT EDIT. //go:build amd64 && gc && !purego #include "textflag.h" -DATA ·AVX2_iv0<>+0x00(SB)/8, $0x6a09e667f3bcc908 -DATA ·AVX2_iv0<>+0x08(SB)/8, $0xbb67ae8584caa73b -DATA ·AVX2_iv0<>+0x10(SB)/8, $0x3c6ef372fe94f82b -DATA ·AVX2_iv0<>+0x18(SB)/8, $0xa54ff53a5f1d36f1 -GLOBL ·AVX2_iv0<>(SB), (NOPTR+RODATA), $32 - -DATA ·AVX2_iv1<>+0x00(SB)/8, $0x510e527fade682d1 -DATA ·AVX2_iv1<>+0x08(SB)/8, $0x9b05688c2b3e6c1f -DATA ·AVX2_iv1<>+0x10(SB)/8, $0x1f83d9abfb41bd6b -DATA ·AVX2_iv1<>+0x18(SB)/8, $0x5be0cd19137e2179 -GLOBL ·AVX2_iv1<>(SB), (NOPTR+RODATA), $32 - -DATA ·AVX2_c40<>+0x00(SB)/8, $0x0201000706050403 -DATA ·AVX2_c40<>+0x08(SB)/8, $0x0a09080f0e0d0c0b -DATA ·AVX2_c40<>+0x10(SB)/8, $0x0201000706050403 -DATA ·AVX2_c40<>+0x18(SB)/8, $0x0a09080f0e0d0c0b -GLOBL ·AVX2_c40<>(SB), (NOPTR+RODATA), $32 - -DATA ·AVX2_c48<>+0x00(SB)/8, $0x0100070605040302 -DATA ·AVX2_c48<>+0x08(SB)/8, $0x09080f0e0d0c0b0a -DATA ·AVX2_c48<>+0x10(SB)/8, $0x0100070605040302 -DATA ·AVX2_c48<>+0x18(SB)/8, $0x09080f0e0d0c0b0a -GLOBL ·AVX2_c48<>(SB), (NOPTR+RODATA), $32 - -DATA ·AVX_iv0<>+0x00(SB)/8, $0x6a09e667f3bcc908 -DATA ·AVX_iv0<>+0x08(SB)/8, $0xbb67ae8584caa73b -GLOBL ·AVX_iv0<>(SB), (NOPTR+RODATA), $16 - -DATA ·AVX_iv1<>+0x00(SB)/8, $0x3c6ef372fe94f82b -DATA ·AVX_iv1<>+0x08(SB)/8, $0xa54ff53a5f1d36f1 -GLOBL ·AVX_iv1<>(SB), (NOPTR+RODATA), $16 - -DATA ·AVX_iv2<>+0x00(SB)/8, $0x510e527fade682d1 -DATA ·AVX_iv2<>+0x08(SB)/8, $0x9b05688c2b3e6c1f -GLOBL ·AVX_iv2<>(SB), (NOPTR+RODATA), $16 - -DATA ·AVX_iv3<>+0x00(SB)/8, $0x1f83d9abfb41bd6b -DATA ·AVX_iv3<>+0x08(SB)/8, $0x5be0cd19137e2179 -GLOBL ·AVX_iv3<>(SB), (NOPTR+RODATA), $16 - -DATA ·AVX_c40<>+0x00(SB)/8, $0x0201000706050403 -DATA ·AVX_c40<>+0x08(SB)/8, $0x0a09080f0e0d0c0b -GLOBL ·AVX_c40<>(SB), (NOPTR+RODATA), $16 - -DATA ·AVX_c48<>+0x00(SB)/8, $0x0100070605040302 -DATA ·AVX_c48<>+0x08(SB)/8, $0x09080f0e0d0c0b0a -GLOBL ·AVX_c48<>(SB), (NOPTR+RODATA), $16 - -#define VPERMQ_0x39_Y1_Y1 BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xc9; BYTE $0x39 -#define VPERMQ_0x93_Y1_Y1 BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xc9; BYTE $0x93 -#define VPERMQ_0x4E_Y2_Y2 BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xd2; BYTE $0x4e -#define VPERMQ_0x93_Y3_Y3 BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xdb; BYTE $0x93 -#define VPERMQ_0x39_Y3_Y3 BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xdb; BYTE $0x39 - -#define ROUND_AVX2(m0, m1, m2, m3, t, c40, c48) \ - VPADDQ m0, Y0, Y0; \ - VPADDQ Y1, Y0, Y0; \ - VPXOR Y0, Y3, Y3; \ - VPSHUFD $-79, Y3, Y3; \ - VPADDQ Y3, Y2, Y2; \ - VPXOR Y2, Y1, Y1; \ - VPSHUFB c40, Y1, Y1; \ - VPADDQ m1, Y0, Y0; \ - VPADDQ Y1, Y0, Y0; \ - VPXOR Y0, Y3, Y3; \ - VPSHUFB c48, Y3, Y3; \ - VPADDQ Y3, Y2, Y2; \ - VPXOR Y2, Y1, Y1; \ - VPADDQ Y1, Y1, t; \ - VPSRLQ $63, Y1, Y1; \ - VPXOR t, Y1, Y1; \ - VPERMQ_0x39_Y1_Y1; \ - VPERMQ_0x4E_Y2_Y2; \ - VPERMQ_0x93_Y3_Y3; \ - VPADDQ m2, Y0, Y0; \ - VPADDQ Y1, Y0, Y0; \ - VPXOR Y0, Y3, Y3; \ - VPSHUFD $-79, Y3, Y3; \ - VPADDQ Y3, Y2, Y2; \ - VPXOR Y2, Y1, Y1; \ - VPSHUFB c40, Y1, Y1; \ - VPADDQ m3, Y0, Y0; \ - VPADDQ Y1, Y0, Y0; \ - VPXOR Y0, Y3, Y3; \ - VPSHUFB c48, Y3, Y3; \ - VPADDQ Y3, Y2, Y2; \ - VPXOR Y2, Y1, Y1; \ - VPADDQ Y1, Y1, t; \ - VPSRLQ $63, Y1, Y1; \ - VPXOR t, Y1, Y1; \ - VPERMQ_0x39_Y3_Y3; \ - VPERMQ_0x4E_Y2_Y2; \ - VPERMQ_0x93_Y1_Y1 - -#define VMOVQ_SI_X11_0 BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x1E -#define VMOVQ_SI_X12_0 BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x26 -#define VMOVQ_SI_X13_0 BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x2E -#define VMOVQ_SI_X14_0 BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x36 -#define VMOVQ_SI_X15_0 BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x3E - -#define VMOVQ_SI_X11(n) BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x5E; BYTE $n -#define VMOVQ_SI_X12(n) BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x66; BYTE $n -#define VMOVQ_SI_X13(n) BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x6E; BYTE $n -#define VMOVQ_SI_X14(n) BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x76; BYTE $n -#define VMOVQ_SI_X15(n) BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x7E; BYTE $n - -#define VPINSRQ_1_SI_X11_0 BYTE $0xC4; BYTE $0x63; BYTE $0xA1; BYTE $0x22; BYTE $0x1E; BYTE $0x01 -#define VPINSRQ_1_SI_X12_0 BYTE $0xC4; BYTE $0x63; BYTE $0x99; BYTE $0x22; BYTE $0x26; BYTE $0x01 -#define VPINSRQ_1_SI_X13_0 BYTE $0xC4; BYTE $0x63; BYTE $0x91; BYTE $0x22; BYTE $0x2E; BYTE $0x01 -#define VPINSRQ_1_SI_X14_0 BYTE $0xC4; BYTE $0x63; BYTE $0x89; BYTE $0x22; BYTE $0x36; BYTE $0x01 -#define VPINSRQ_1_SI_X15_0 BYTE $0xC4; BYTE $0x63; BYTE $0x81; BYTE $0x22; BYTE $0x3E; BYTE $0x01 - -#define VPINSRQ_1_SI_X11(n) BYTE $0xC4; BYTE $0x63; BYTE $0xA1; BYTE $0x22; BYTE $0x5E; BYTE $n; BYTE $0x01 -#define VPINSRQ_1_SI_X12(n) BYTE $0xC4; BYTE $0x63; BYTE $0x99; BYTE $0x22; BYTE $0x66; BYTE $n; BYTE $0x01 -#define VPINSRQ_1_SI_X13(n) BYTE $0xC4; BYTE $0x63; BYTE $0x91; BYTE $0x22; BYTE $0x6E; BYTE $n; BYTE $0x01 -#define VPINSRQ_1_SI_X14(n) BYTE $0xC4; BYTE $0x63; BYTE $0x89; BYTE $0x22; BYTE $0x76; BYTE $n; BYTE $0x01 -#define VPINSRQ_1_SI_X15(n) BYTE $0xC4; BYTE $0x63; BYTE $0x81; BYTE $0x22; BYTE $0x7E; BYTE $n; BYTE $0x01 - -#define VMOVQ_R8_X15 BYTE $0xC4; BYTE $0x41; BYTE $0xF9; BYTE $0x6E; BYTE $0xF8 -#define VPINSRQ_1_R9_X15 BYTE $0xC4; BYTE $0x43; BYTE $0x81; BYTE $0x22; BYTE $0xF9; BYTE $0x01 - -// load msg: Y12 = (i0, i1, i2, i3) -// i0, i1, i2, i3 must not be 0 -#define LOAD_MSG_AVX2_Y12(i0, i1, i2, i3) \ - VMOVQ_SI_X12(i0*8); \ - VMOVQ_SI_X11(i2*8); \ - VPINSRQ_1_SI_X12(i1*8); \ - VPINSRQ_1_SI_X11(i3*8); \ - VINSERTI128 $1, X11, Y12, Y12 - -// load msg: Y13 = (i0, i1, i2, i3) -// i0, i1, i2, i3 must not be 0 -#define LOAD_MSG_AVX2_Y13(i0, i1, i2, i3) \ - VMOVQ_SI_X13(i0*8); \ - VMOVQ_SI_X11(i2*8); \ - VPINSRQ_1_SI_X13(i1*8); \ - VPINSRQ_1_SI_X11(i3*8); \ - VINSERTI128 $1, X11, Y13, Y13 - -// load msg: Y14 = (i0, i1, i2, i3) -// i0, i1, i2, i3 must not be 0 -#define LOAD_MSG_AVX2_Y14(i0, i1, i2, i3) \ - VMOVQ_SI_X14(i0*8); \ - VMOVQ_SI_X11(i2*8); \ - VPINSRQ_1_SI_X14(i1*8); \ - VPINSRQ_1_SI_X11(i3*8); \ - VINSERTI128 $1, X11, Y14, Y14 - -// load msg: Y15 = (i0, i1, i2, i3) -// i0, i1, i2, i3 must not be 0 -#define LOAD_MSG_AVX2_Y15(i0, i1, i2, i3) \ - VMOVQ_SI_X15(i0*8); \ - VMOVQ_SI_X11(i2*8); \ - VPINSRQ_1_SI_X15(i1*8); \ - VPINSRQ_1_SI_X11(i3*8); \ - VINSERTI128 $1, X11, Y15, Y15 - -#define LOAD_MSG_AVX2_0_2_4_6_1_3_5_7_8_10_12_14_9_11_13_15() \ - VMOVQ_SI_X12_0; \ - VMOVQ_SI_X11(4*8); \ - VPINSRQ_1_SI_X12(2*8); \ - VPINSRQ_1_SI_X11(6*8); \ - VINSERTI128 $1, X11, Y12, Y12; \ - LOAD_MSG_AVX2_Y13(1, 3, 5, 7); \ - LOAD_MSG_AVX2_Y14(8, 10, 12, 14); \ - LOAD_MSG_AVX2_Y15(9, 11, 13, 15) - -#define LOAD_MSG_AVX2_14_4_9_13_10_8_15_6_1_0_11_5_12_2_7_3() \ - LOAD_MSG_AVX2_Y12(14, 4, 9, 13); \ - LOAD_MSG_AVX2_Y13(10, 8, 15, 6); \ - VMOVQ_SI_X11(11*8); \ - VPSHUFD $0x4E, 0*8(SI), X14; \ - VPINSRQ_1_SI_X11(5*8); \ - VINSERTI128 $1, X11, Y14, Y14; \ - LOAD_MSG_AVX2_Y15(12, 2, 7, 3) - -#define LOAD_MSG_AVX2_11_12_5_15_8_0_2_13_10_3_7_9_14_6_1_4() \ - VMOVQ_SI_X11(5*8); \ - VMOVDQU 11*8(SI), X12; \ - VPINSRQ_1_SI_X11(15*8); \ - VINSERTI128 $1, X11, Y12, Y12; \ - VMOVQ_SI_X13(8*8); \ - VMOVQ_SI_X11(2*8); \ - VPINSRQ_1_SI_X13_0; \ - VPINSRQ_1_SI_X11(13*8); \ - VINSERTI128 $1, X11, Y13, Y13; \ - LOAD_MSG_AVX2_Y14(10, 3, 7, 9); \ - LOAD_MSG_AVX2_Y15(14, 6, 1, 4) - -#define LOAD_MSG_AVX2_7_3_13_11_9_1_12_14_2_5_4_15_6_10_0_8() \ - LOAD_MSG_AVX2_Y12(7, 3, 13, 11); \ - LOAD_MSG_AVX2_Y13(9, 1, 12, 14); \ - LOAD_MSG_AVX2_Y14(2, 5, 4, 15); \ - VMOVQ_SI_X15(6*8); \ - VMOVQ_SI_X11_0; \ - VPINSRQ_1_SI_X15(10*8); \ - VPINSRQ_1_SI_X11(8*8); \ - VINSERTI128 $1, X11, Y15, Y15 - -#define LOAD_MSG_AVX2_9_5_2_10_0_7_4_15_14_11_6_3_1_12_8_13() \ - LOAD_MSG_AVX2_Y12(9, 5, 2, 10); \ - VMOVQ_SI_X13_0; \ - VMOVQ_SI_X11(4*8); \ - VPINSRQ_1_SI_X13(7*8); \ - VPINSRQ_1_SI_X11(15*8); \ - VINSERTI128 $1, X11, Y13, Y13; \ - LOAD_MSG_AVX2_Y14(14, 11, 6, 3); \ - LOAD_MSG_AVX2_Y15(1, 12, 8, 13) - -#define LOAD_MSG_AVX2_2_6_0_8_12_10_11_3_4_7_15_1_13_5_14_9() \ - VMOVQ_SI_X12(2*8); \ - VMOVQ_SI_X11_0; \ - VPINSRQ_1_SI_X12(6*8); \ - VPINSRQ_1_SI_X11(8*8); \ - VINSERTI128 $1, X11, Y12, Y12; \ - LOAD_MSG_AVX2_Y13(12, 10, 11, 3); \ - LOAD_MSG_AVX2_Y14(4, 7, 15, 1); \ - LOAD_MSG_AVX2_Y15(13, 5, 14, 9) - -#define LOAD_MSG_AVX2_12_1_14_4_5_15_13_10_0_6_9_8_7_3_2_11() \ - LOAD_MSG_AVX2_Y12(12, 1, 14, 4); \ - LOAD_MSG_AVX2_Y13(5, 15, 13, 10); \ - VMOVQ_SI_X14_0; \ - VPSHUFD $0x4E, 8*8(SI), X11; \ - VPINSRQ_1_SI_X14(6*8); \ - VINSERTI128 $1, X11, Y14, Y14; \ - LOAD_MSG_AVX2_Y15(7, 3, 2, 11) - -#define LOAD_MSG_AVX2_13_7_12_3_11_14_1_9_5_15_8_2_0_4_6_10() \ - LOAD_MSG_AVX2_Y12(13, 7, 12, 3); \ - LOAD_MSG_AVX2_Y13(11, 14, 1, 9); \ - LOAD_MSG_AVX2_Y14(5, 15, 8, 2); \ - VMOVQ_SI_X15_0; \ - VMOVQ_SI_X11(6*8); \ - VPINSRQ_1_SI_X15(4*8); \ - VPINSRQ_1_SI_X11(10*8); \ - VINSERTI128 $1, X11, Y15, Y15 - -#define LOAD_MSG_AVX2_6_14_11_0_15_9_3_8_12_13_1_10_2_7_4_5() \ - VMOVQ_SI_X12(6*8); \ - VMOVQ_SI_X11(11*8); \ - VPINSRQ_1_SI_X12(14*8); \ - VPINSRQ_1_SI_X11_0; \ - VINSERTI128 $1, X11, Y12, Y12; \ - LOAD_MSG_AVX2_Y13(15, 9, 3, 8); \ - VMOVQ_SI_X11(1*8); \ - VMOVDQU 12*8(SI), X14; \ - VPINSRQ_1_SI_X11(10*8); \ - VINSERTI128 $1, X11, Y14, Y14; \ - VMOVQ_SI_X15(2*8); \ - VMOVDQU 4*8(SI), X11; \ - VPINSRQ_1_SI_X15(7*8); \ - VINSERTI128 $1, X11, Y15, Y15 - -#define LOAD_MSG_AVX2_10_8_7_1_2_4_6_5_15_9_3_13_11_14_12_0() \ - LOAD_MSG_AVX2_Y12(10, 8, 7, 1); \ - VMOVQ_SI_X13(2*8); \ - VPSHUFD $0x4E, 5*8(SI), X11; \ - VPINSRQ_1_SI_X13(4*8); \ - VINSERTI128 $1, X11, Y13, Y13; \ - LOAD_MSG_AVX2_Y14(15, 9, 3, 13); \ - VMOVQ_SI_X15(11*8); \ - VMOVQ_SI_X11(12*8); \ - VPINSRQ_1_SI_X15(14*8); \ - VPINSRQ_1_SI_X11_0; \ - VINSERTI128 $1, X11, Y15, Y15 - // func hashBlocksAVX2(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte) -TEXT ·hashBlocksAVX2(SB), 4, $320-48 // frame size = 288 + 32 byte alignment - MOVQ h+0(FP), AX - MOVQ c+8(FP), BX - MOVQ flag+16(FP), CX - MOVQ blocks_base+24(FP), SI - MOVQ blocks_len+32(FP), DI - - MOVQ SP, DX - ADDQ $31, DX - ANDQ $~31, DX - - MOVQ CX, 16(DX) - XORQ CX, CX - MOVQ CX, 24(DX) - - VMOVDQU ·AVX2_c40<>(SB), Y4 - VMOVDQU ·AVX2_c48<>(SB), Y5 - - VMOVDQU 0(AX), Y8 +// Requires: AVX, AVX2 +TEXT ·hashBlocksAVX2(SB), NOSPLIT, $320-48 + MOVQ h+0(FP), AX + MOVQ c+8(FP), BX + MOVQ flag+16(FP), CX + MOVQ blocks_base+24(FP), SI + MOVQ blocks_len+32(FP), DI + MOVQ SP, DX + ADDQ $+31, DX + ANDQ $-32, DX + MOVQ CX, 16(DX) + XORQ CX, CX + MOVQ CX, 24(DX) + VMOVDQU ·AVX2_c40<>+0(SB), Y4 + VMOVDQU ·AVX2_c48<>+0(SB), Y5 + VMOVDQU (AX), Y8 VMOVDQU 32(AX), Y9 - VMOVDQU ·AVX2_iv0<>(SB), Y6 - VMOVDQU ·AVX2_iv1<>(SB), Y7 - - MOVQ 0(BX), R8 - MOVQ 8(BX), R9 - MOVQ R9, 8(DX) + VMOVDQU ·AVX2_iv0<>+0(SB), Y6 + VMOVDQU ·AVX2_iv1<>+0(SB), Y7 + MOVQ (BX), R8 + MOVQ 8(BX), R9 + MOVQ R9, 8(DX) loop: - ADDQ $128, R8 - MOVQ R8, 0(DX) - CMPQ R8, $128 + ADDQ $0x80, R8 + MOVQ R8, (DX) + CMPQ R8, $0x80 JGE noinc INCQ R9 MOVQ R9, 8(DX) noinc: - VMOVDQA Y8, Y0 - VMOVDQA Y9, Y1 - VMOVDQA Y6, Y2 - VPXOR 0(DX), Y7, Y3 - - LOAD_MSG_AVX2_0_2_4_6_1_3_5_7_8_10_12_14_9_11_13_15() - VMOVDQA Y12, 32(DX) - VMOVDQA Y13, 64(DX) - VMOVDQA Y14, 96(DX) - VMOVDQA Y15, 128(DX) - ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5) - LOAD_MSG_AVX2_14_4_9_13_10_8_15_6_1_0_11_5_12_2_7_3() - VMOVDQA Y12, 160(DX) - VMOVDQA Y13, 192(DX) - VMOVDQA Y14, 224(DX) - VMOVDQA Y15, 256(DX) - - ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5) - LOAD_MSG_AVX2_11_12_5_15_8_0_2_13_10_3_7_9_14_6_1_4() - ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5) - LOAD_MSG_AVX2_7_3_13_11_9_1_12_14_2_5_4_15_6_10_0_8() - ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5) - LOAD_MSG_AVX2_9_5_2_10_0_7_4_15_14_11_6_3_1_12_8_13() - ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5) - LOAD_MSG_AVX2_2_6_0_8_12_10_11_3_4_7_15_1_13_5_14_9() - ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5) - LOAD_MSG_AVX2_12_1_14_4_5_15_13_10_0_6_9_8_7_3_2_11() - ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5) - LOAD_MSG_AVX2_13_7_12_3_11_14_1_9_5_15_8_2_0_4_6_10() - ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5) - LOAD_MSG_AVX2_6_14_11_0_15_9_3_8_12_13_1_10_2_7_4_5() - ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5) - LOAD_MSG_AVX2_10_8_7_1_2_4_6_5_15_9_3_13_11_14_12_0() - ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5) - - ROUND_AVX2(32(DX), 64(DX), 96(DX), 128(DX), Y10, Y4, Y5) - ROUND_AVX2(160(DX), 192(DX), 224(DX), 256(DX), Y10, Y4, Y5) - - VPXOR Y0, Y8, Y8 - VPXOR Y1, Y9, Y9 - VPXOR Y2, Y8, Y8 - VPXOR Y3, Y9, Y9 - - LEAQ 128(SI), SI - SUBQ $128, DI - JNE loop - - MOVQ R8, 0(BX) - MOVQ R9, 8(BX) - - VMOVDQU Y8, 0(AX) - VMOVDQU Y9, 32(AX) + VMOVDQA Y8, Y0 + VMOVDQA Y9, Y1 + VMOVDQA Y6, Y2 + VPXOR (DX), Y7, Y3 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x26 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x20 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x99 + BYTE $0x22 + BYTE $0x66 + BYTE $0x10 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x30 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y12, Y12 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x6e + BYTE $0x08 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x28 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x6e + BYTE $0x18 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x38 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y13, Y13 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x76 + BYTE $0x40 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x60 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x76 + BYTE $0x50 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x70 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y14, Y14 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x7e + BYTE $0x48 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x68 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x58 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x78 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y15, Y15 + VMOVDQA Y12, 32(DX) + VMOVDQA Y13, 64(DX) + VMOVDQA Y14, 96(DX) + VMOVDQA Y15, 128(DX) + VPADDQ Y12, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFD $-79, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPSHUFB Y4, Y1, Y1 + VPADDQ Y13, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFB Y5, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPADDQ Y1, Y1, Y10 + VPSRLQ $0x3f, Y1, Y1 + VPXOR Y10, Y1, Y1 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xc9 + BYTE $0x39 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xd2 + BYTE $0x4e + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xdb + BYTE $0x93 + VPADDQ Y14, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFD $-79, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPSHUFB Y4, Y1, Y1 + VPADDQ Y15, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFB Y5, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPADDQ Y1, Y1, Y10 + VPSRLQ $0x3f, Y1, Y1 + VPXOR Y10, Y1, Y1 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xdb + BYTE $0x39 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xd2 + BYTE $0x4e + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xc9 + BYTE $0x93 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x66 + BYTE $0x70 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x48 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x99 + BYTE $0x22 + BYTE $0x66 + BYTE $0x20 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x68 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y12, Y12 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x6e + BYTE $0x50 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x78 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x6e + BYTE $0x40 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x30 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y13, Y13 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x58 + VPSHUFD $0x4e, (SI), X14 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x28 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y14, Y14 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x7e + BYTE $0x60 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x38 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x10 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x18 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y15, Y15 + VMOVDQA Y12, 160(DX) + VMOVDQA Y13, 192(DX) + VMOVDQA Y14, 224(DX) + VMOVDQA Y15, 256(DX) + VPADDQ Y12, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFD $-79, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPSHUFB Y4, Y1, Y1 + VPADDQ Y13, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFB Y5, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPADDQ Y1, Y1, Y10 + VPSRLQ $0x3f, Y1, Y1 + VPXOR Y10, Y1, Y1 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xc9 + BYTE $0x39 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xd2 + BYTE $0x4e + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xdb + BYTE $0x93 + VPADDQ Y14, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFD $-79, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPSHUFB Y4, Y1, Y1 + VPADDQ Y15, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFB Y5, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPADDQ Y1, Y1, Y10 + VPSRLQ $0x3f, Y1, Y1 + VPXOR Y10, Y1, Y1 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xdb + BYTE $0x39 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xd2 + BYTE $0x4e + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xc9 + BYTE $0x93 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x28 + VMOVDQU 88(SI), X12 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x78 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y12, Y12 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x6e + BYTE $0x40 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x10 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x2e + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x68 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y13, Y13 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x76 + BYTE $0x50 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x38 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x76 + BYTE $0x18 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x48 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y14, Y14 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x7e + BYTE $0x70 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x08 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x30 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x20 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y15, Y15 + VPADDQ Y12, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFD $-79, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPSHUFB Y4, Y1, Y1 + VPADDQ Y13, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFB Y5, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPADDQ Y1, Y1, Y10 + VPSRLQ $0x3f, Y1, Y1 + VPXOR Y10, Y1, Y1 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xc9 + BYTE $0x39 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xd2 + BYTE $0x4e + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xdb + BYTE $0x93 + VPADDQ Y14, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFD $-79, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPSHUFB Y4, Y1, Y1 + VPADDQ Y15, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFB Y5, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPADDQ Y1, Y1, Y10 + VPSRLQ $0x3f, Y1, Y1 + VPXOR Y10, Y1, Y1 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xdb + BYTE $0x39 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xd2 + BYTE $0x4e + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xc9 + BYTE $0x93 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x66 + BYTE $0x38 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x68 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x99 + BYTE $0x22 + BYTE $0x66 + BYTE $0x18 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x58 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y12, Y12 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x6e + BYTE $0x48 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x60 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x6e + BYTE $0x08 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x70 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y13, Y13 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x76 + BYTE $0x10 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x20 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x76 + BYTE $0x28 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x78 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y14, Y14 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x7e + BYTE $0x30 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x1e + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x50 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x40 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y15, Y15 + VPADDQ Y12, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFD $-79, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPSHUFB Y4, Y1, Y1 + VPADDQ Y13, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFB Y5, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPADDQ Y1, Y1, Y10 + VPSRLQ $0x3f, Y1, Y1 + VPXOR Y10, Y1, Y1 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xc9 + BYTE $0x39 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xd2 + BYTE $0x4e + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xdb + BYTE $0x93 + VPADDQ Y14, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFD $-79, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPSHUFB Y4, Y1, Y1 + VPADDQ Y15, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFB Y5, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPADDQ Y1, Y1, Y10 + VPSRLQ $0x3f, Y1, Y1 + VPXOR Y10, Y1, Y1 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xdb + BYTE $0x39 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xd2 + BYTE $0x4e + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xc9 + BYTE $0x93 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x66 + BYTE $0x48 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x10 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x99 + BYTE $0x22 + BYTE $0x66 + BYTE $0x28 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x50 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y12, Y12 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x2e + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x20 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x6e + BYTE $0x38 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x78 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y13, Y13 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x76 + BYTE $0x70 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x30 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x76 + BYTE $0x58 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x18 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y14, Y14 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x7e + BYTE $0x08 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x40 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x60 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x68 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y15, Y15 + VPADDQ Y12, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFD $-79, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPSHUFB Y4, Y1, Y1 + VPADDQ Y13, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFB Y5, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPADDQ Y1, Y1, Y10 + VPSRLQ $0x3f, Y1, Y1 + VPXOR Y10, Y1, Y1 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xc9 + BYTE $0x39 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xd2 + BYTE $0x4e + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xdb + BYTE $0x93 + VPADDQ Y14, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFD $-79, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPSHUFB Y4, Y1, Y1 + VPADDQ Y15, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFB Y5, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPADDQ Y1, Y1, Y10 + VPSRLQ $0x3f, Y1, Y1 + VPXOR Y10, Y1, Y1 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xdb + BYTE $0x39 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xd2 + BYTE $0x4e + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xc9 + BYTE $0x93 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x66 + BYTE $0x10 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x1e + BYTE $0xc4 + BYTE $0x63 + BYTE $0x99 + BYTE $0x22 + BYTE $0x66 + BYTE $0x30 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x40 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y12, Y12 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x6e + BYTE $0x60 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x58 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x6e + BYTE $0x50 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x18 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y13, Y13 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x76 + BYTE $0x20 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x78 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x76 + BYTE $0x38 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x08 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y14, Y14 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x7e + BYTE $0x68 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x70 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x28 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x48 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y15, Y15 + VPADDQ Y12, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFD $-79, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPSHUFB Y4, Y1, Y1 + VPADDQ Y13, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFB Y5, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPADDQ Y1, Y1, Y10 + VPSRLQ $0x3f, Y1, Y1 + VPXOR Y10, Y1, Y1 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xc9 + BYTE $0x39 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xd2 + BYTE $0x4e + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xdb + BYTE $0x93 + VPADDQ Y14, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFD $-79, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPSHUFB Y4, Y1, Y1 + VPADDQ Y15, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFB Y5, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPADDQ Y1, Y1, Y10 + VPSRLQ $0x3f, Y1, Y1 + VPXOR Y10, Y1, Y1 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xdb + BYTE $0x39 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xd2 + BYTE $0x4e + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xc9 + BYTE $0x93 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x66 + BYTE $0x60 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x70 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x99 + BYTE $0x22 + BYTE $0x66 + BYTE $0x08 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x20 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y12, Y12 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x6e + BYTE $0x28 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x68 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x6e + BYTE $0x78 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x50 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y13, Y13 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x36 + VPSHUFD $0x4e, 64(SI), X11 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x76 + BYTE $0x30 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y14, Y14 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x7e + BYTE $0x38 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x10 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x18 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x58 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y15, Y15 + VPADDQ Y12, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFD $-79, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPSHUFB Y4, Y1, Y1 + VPADDQ Y13, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFB Y5, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPADDQ Y1, Y1, Y10 + VPSRLQ $0x3f, Y1, Y1 + VPXOR Y10, Y1, Y1 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xc9 + BYTE $0x39 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xd2 + BYTE $0x4e + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xdb + BYTE $0x93 + VPADDQ Y14, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFD $-79, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPSHUFB Y4, Y1, Y1 + VPADDQ Y15, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFB Y5, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPADDQ Y1, Y1, Y10 + VPSRLQ $0x3f, Y1, Y1 + VPXOR Y10, Y1, Y1 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xdb + BYTE $0x39 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xd2 + BYTE $0x4e + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xc9 + BYTE $0x93 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x66 + BYTE $0x68 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x60 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x99 + BYTE $0x22 + BYTE $0x66 + BYTE $0x38 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x18 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y12, Y12 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x6e + BYTE $0x58 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x08 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x6e + BYTE $0x70 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x48 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y13, Y13 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x76 + BYTE $0x28 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x40 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x76 + BYTE $0x78 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x10 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y14, Y14 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x3e + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x30 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x20 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x50 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y15, Y15 + VPADDQ Y12, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFD $-79, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPSHUFB Y4, Y1, Y1 + VPADDQ Y13, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFB Y5, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPADDQ Y1, Y1, Y10 + VPSRLQ $0x3f, Y1, Y1 + VPXOR Y10, Y1, Y1 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xc9 + BYTE $0x39 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xd2 + BYTE $0x4e + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xdb + BYTE $0x93 + VPADDQ Y14, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFD $-79, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPSHUFB Y4, Y1, Y1 + VPADDQ Y15, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFB Y5, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPADDQ Y1, Y1, Y10 + VPSRLQ $0x3f, Y1, Y1 + VPXOR Y10, Y1, Y1 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xdb + BYTE $0x39 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xd2 + BYTE $0x4e + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xc9 + BYTE $0x93 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x66 + BYTE $0x30 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x58 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x99 + BYTE $0x22 + BYTE $0x66 + BYTE $0x70 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x1e + BYTE $0x01 + VINSERTI128 $0x01, X11, Y12, Y12 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x6e + BYTE $0x78 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x18 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x6e + BYTE $0x48 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x40 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y13, Y13 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x08 + VMOVDQU 96(SI), X14 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x50 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y14, Y14 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x7e + BYTE $0x10 + VMOVDQU 32(SI), X11 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x38 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y15, Y15 + VPADDQ Y12, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFD $-79, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPSHUFB Y4, Y1, Y1 + VPADDQ Y13, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFB Y5, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPADDQ Y1, Y1, Y10 + VPSRLQ $0x3f, Y1, Y1 + VPXOR Y10, Y1, Y1 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xc9 + BYTE $0x39 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xd2 + BYTE $0x4e + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xdb + BYTE $0x93 + VPADDQ Y14, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFD $-79, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPSHUFB Y4, Y1, Y1 + VPADDQ Y15, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFB Y5, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPADDQ Y1, Y1, Y10 + VPSRLQ $0x3f, Y1, Y1 + VPXOR Y10, Y1, Y1 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xdb + BYTE $0x39 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xd2 + BYTE $0x4e + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xc9 + BYTE $0x93 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x66 + BYTE $0x50 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x38 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x99 + BYTE $0x22 + BYTE $0x66 + BYTE $0x40 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x08 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y12, Y12 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x6e + BYTE $0x10 + VPSHUFD $0x4e, 40(SI), X11 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x6e + BYTE $0x20 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y13, Y13 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x76 + BYTE $0x78 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x18 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x76 + BYTE $0x48 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x68 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y14, Y14 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x7e + BYTE $0x58 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x60 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x70 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x1e + BYTE $0x01 + VINSERTI128 $0x01, X11, Y15, Y15 + VPADDQ Y12, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFD $-79, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPSHUFB Y4, Y1, Y1 + VPADDQ Y13, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFB Y5, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPADDQ Y1, Y1, Y10 + VPSRLQ $0x3f, Y1, Y1 + VPXOR Y10, Y1, Y1 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xc9 + BYTE $0x39 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xd2 + BYTE $0x4e + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xdb + BYTE $0x93 + VPADDQ Y14, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFD $-79, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPSHUFB Y4, Y1, Y1 + VPADDQ Y15, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFB Y5, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPADDQ Y1, Y1, Y10 + VPSRLQ $0x3f, Y1, Y1 + VPXOR Y10, Y1, Y1 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xdb + BYTE $0x39 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xd2 + BYTE $0x4e + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xc9 + BYTE $0x93 + VPADDQ 32(DX), Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFD $-79, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPSHUFB Y4, Y1, Y1 + VPADDQ 64(DX), Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFB Y5, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPADDQ Y1, Y1, Y10 + VPSRLQ $0x3f, Y1, Y1 + VPXOR Y10, Y1, Y1 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xc9 + BYTE $0x39 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xd2 + BYTE $0x4e + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xdb + BYTE $0x93 + VPADDQ 96(DX), Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFD $-79, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPSHUFB Y4, Y1, Y1 + VPADDQ 128(DX), Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFB Y5, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPADDQ Y1, Y1, Y10 + VPSRLQ $0x3f, Y1, Y1 + VPXOR Y10, Y1, Y1 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xdb + BYTE $0x39 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xd2 + BYTE $0x4e + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xc9 + BYTE $0x93 + VPADDQ 160(DX), Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFD $-79, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPSHUFB Y4, Y1, Y1 + VPADDQ 192(DX), Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFB Y5, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPADDQ Y1, Y1, Y10 + VPSRLQ $0x3f, Y1, Y1 + VPXOR Y10, Y1, Y1 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xc9 + BYTE $0x39 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xd2 + BYTE $0x4e + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xdb + BYTE $0x93 + VPADDQ 224(DX), Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFD $-79, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPSHUFB Y4, Y1, Y1 + VPADDQ 256(DX), Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFB Y5, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPADDQ Y1, Y1, Y10 + VPSRLQ $0x3f, Y1, Y1 + VPXOR Y10, Y1, Y1 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xdb + BYTE $0x39 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xd2 + BYTE $0x4e + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xc9 + BYTE $0x93 + VPXOR Y0, Y8, Y8 + VPXOR Y1, Y9, Y9 + VPXOR Y2, Y8, Y8 + VPXOR Y3, Y9, Y9 + LEAQ 128(SI), SI + SUBQ $0x80, DI + JNE loop + MOVQ R8, (BX) + MOVQ R9, 8(BX) + VMOVDQU Y8, (AX) + VMOVDQU Y9, 32(AX) VZEROUPPER - RET -#define VPUNPCKLQDQ_X2_X2_X15 BYTE $0xC5; BYTE $0x69; BYTE $0x6C; BYTE $0xFA -#define VPUNPCKLQDQ_X3_X3_X15 BYTE $0xC5; BYTE $0x61; BYTE $0x6C; BYTE $0xFB -#define VPUNPCKLQDQ_X7_X7_X15 BYTE $0xC5; BYTE $0x41; BYTE $0x6C; BYTE $0xFF -#define VPUNPCKLQDQ_X13_X13_X15 BYTE $0xC4; BYTE $0x41; BYTE $0x11; BYTE $0x6C; BYTE $0xFD -#define VPUNPCKLQDQ_X14_X14_X15 BYTE $0xC4; BYTE $0x41; BYTE $0x09; BYTE $0x6C; BYTE $0xFE - -#define VPUNPCKHQDQ_X15_X2_X2 BYTE $0xC4; BYTE $0xC1; BYTE $0x69; BYTE $0x6D; BYTE $0xD7 -#define VPUNPCKHQDQ_X15_X3_X3 BYTE $0xC4; BYTE $0xC1; BYTE $0x61; BYTE $0x6D; BYTE $0xDF -#define VPUNPCKHQDQ_X15_X6_X6 BYTE $0xC4; BYTE $0xC1; BYTE $0x49; BYTE $0x6D; BYTE $0xF7 -#define VPUNPCKHQDQ_X15_X7_X7 BYTE $0xC4; BYTE $0xC1; BYTE $0x41; BYTE $0x6D; BYTE $0xFF -#define VPUNPCKHQDQ_X15_X3_X2 BYTE $0xC4; BYTE $0xC1; BYTE $0x61; BYTE $0x6D; BYTE $0xD7 -#define VPUNPCKHQDQ_X15_X7_X6 BYTE $0xC4; BYTE $0xC1; BYTE $0x41; BYTE $0x6D; BYTE $0xF7 -#define VPUNPCKHQDQ_X15_X13_X3 BYTE $0xC4; BYTE $0xC1; BYTE $0x11; BYTE $0x6D; BYTE $0xDF -#define VPUNPCKHQDQ_X15_X13_X7 BYTE $0xC4; BYTE $0xC1; BYTE $0x11; BYTE $0x6D; BYTE $0xFF - -#define SHUFFLE_AVX() \ - VMOVDQA X6, X13; \ - VMOVDQA X2, X14; \ - VMOVDQA X4, X6; \ - VPUNPCKLQDQ_X13_X13_X15; \ - VMOVDQA X5, X4; \ - VMOVDQA X6, X5; \ - VPUNPCKHQDQ_X15_X7_X6; \ - VPUNPCKLQDQ_X7_X7_X15; \ - VPUNPCKHQDQ_X15_X13_X7; \ - VPUNPCKLQDQ_X3_X3_X15; \ - VPUNPCKHQDQ_X15_X2_X2; \ - VPUNPCKLQDQ_X14_X14_X15; \ - VPUNPCKHQDQ_X15_X3_X3; \ - -#define SHUFFLE_AVX_INV() \ - VMOVDQA X2, X13; \ - VMOVDQA X4, X14; \ - VPUNPCKLQDQ_X2_X2_X15; \ - VMOVDQA X5, X4; \ - VPUNPCKHQDQ_X15_X3_X2; \ - VMOVDQA X14, X5; \ - VPUNPCKLQDQ_X3_X3_X15; \ - VMOVDQA X6, X14; \ - VPUNPCKHQDQ_X15_X13_X3; \ - VPUNPCKLQDQ_X7_X7_X15; \ - VPUNPCKHQDQ_X15_X6_X6; \ - VPUNPCKLQDQ_X14_X14_X15; \ - VPUNPCKHQDQ_X15_X7_X7; \ - -#define HALF_ROUND_AVX(v0, v1, v2, v3, v4, v5, v6, v7, m0, m1, m2, m3, t0, c40, c48) \ - VPADDQ m0, v0, v0; \ - VPADDQ v2, v0, v0; \ - VPADDQ m1, v1, v1; \ - VPADDQ v3, v1, v1; \ - VPXOR v0, v6, v6; \ - VPXOR v1, v7, v7; \ - VPSHUFD $-79, v6, v6; \ - VPSHUFD $-79, v7, v7; \ - VPADDQ v6, v4, v4; \ - VPADDQ v7, v5, v5; \ - VPXOR v4, v2, v2; \ - VPXOR v5, v3, v3; \ - VPSHUFB c40, v2, v2; \ - VPSHUFB c40, v3, v3; \ - VPADDQ m2, v0, v0; \ - VPADDQ v2, v0, v0; \ - VPADDQ m3, v1, v1; \ - VPADDQ v3, v1, v1; \ - VPXOR v0, v6, v6; \ - VPXOR v1, v7, v7; \ - VPSHUFB c48, v6, v6; \ - VPSHUFB c48, v7, v7; \ - VPADDQ v6, v4, v4; \ - VPADDQ v7, v5, v5; \ - VPXOR v4, v2, v2; \ - VPXOR v5, v3, v3; \ - VPADDQ v2, v2, t0; \ - VPSRLQ $63, v2, v2; \ - VPXOR t0, v2, v2; \ - VPADDQ v3, v3, t0; \ - VPSRLQ $63, v3, v3; \ - VPXOR t0, v3, v3 - -// load msg: X12 = (i0, i1), X13 = (i2, i3), X14 = (i4, i5), X15 = (i6, i7) -// i0, i1, i2, i3, i4, i5, i6, i7 must not be 0 -#define LOAD_MSG_AVX(i0, i1, i2, i3, i4, i5, i6, i7) \ - VMOVQ_SI_X12(i0*8); \ - VMOVQ_SI_X13(i2*8); \ - VMOVQ_SI_X14(i4*8); \ - VMOVQ_SI_X15(i6*8); \ - VPINSRQ_1_SI_X12(i1*8); \ - VPINSRQ_1_SI_X13(i3*8); \ - VPINSRQ_1_SI_X14(i5*8); \ - VPINSRQ_1_SI_X15(i7*8) - -// load msg: X12 = (0, 2), X13 = (4, 6), X14 = (1, 3), X15 = (5, 7) -#define LOAD_MSG_AVX_0_2_4_6_1_3_5_7() \ - VMOVQ_SI_X12_0; \ - VMOVQ_SI_X13(4*8); \ - VMOVQ_SI_X14(1*8); \ - VMOVQ_SI_X15(5*8); \ - VPINSRQ_1_SI_X12(2*8); \ - VPINSRQ_1_SI_X13(6*8); \ - VPINSRQ_1_SI_X14(3*8); \ - VPINSRQ_1_SI_X15(7*8) - -// load msg: X12 = (1, 0), X13 = (11, 5), X14 = (12, 2), X15 = (7, 3) -#define LOAD_MSG_AVX_1_0_11_5_12_2_7_3() \ - VPSHUFD $0x4E, 0*8(SI), X12; \ - VMOVQ_SI_X13(11*8); \ - VMOVQ_SI_X14(12*8); \ - VMOVQ_SI_X15(7*8); \ - VPINSRQ_1_SI_X13(5*8); \ - VPINSRQ_1_SI_X14(2*8); \ - VPINSRQ_1_SI_X15(3*8) - -// load msg: X12 = (11, 12), X13 = (5, 15), X14 = (8, 0), X15 = (2, 13) -#define LOAD_MSG_AVX_11_12_5_15_8_0_2_13() \ - VMOVDQU 11*8(SI), X12; \ - VMOVQ_SI_X13(5*8); \ - VMOVQ_SI_X14(8*8); \ - VMOVQ_SI_X15(2*8); \ - VPINSRQ_1_SI_X13(15*8); \ - VPINSRQ_1_SI_X14_0; \ - VPINSRQ_1_SI_X15(13*8) - -// load msg: X12 = (2, 5), X13 = (4, 15), X14 = (6, 10), X15 = (0, 8) -#define LOAD_MSG_AVX_2_5_4_15_6_10_0_8() \ - VMOVQ_SI_X12(2*8); \ - VMOVQ_SI_X13(4*8); \ - VMOVQ_SI_X14(6*8); \ - VMOVQ_SI_X15_0; \ - VPINSRQ_1_SI_X12(5*8); \ - VPINSRQ_1_SI_X13(15*8); \ - VPINSRQ_1_SI_X14(10*8); \ - VPINSRQ_1_SI_X15(8*8) +DATA ·AVX2_c40<>+0(SB)/8, $0x0201000706050403 +DATA ·AVX2_c40<>+8(SB)/8, $0x0a09080f0e0d0c0b +DATA ·AVX2_c40<>+16(SB)/8, $0x0201000706050403 +DATA ·AVX2_c40<>+24(SB)/8, $0x0a09080f0e0d0c0b +GLOBL ·AVX2_c40<>(SB), RODATA|NOPTR, $32 -// load msg: X12 = (9, 5), X13 = (2, 10), X14 = (0, 7), X15 = (4, 15) -#define LOAD_MSG_AVX_9_5_2_10_0_7_4_15() \ - VMOVQ_SI_X12(9*8); \ - VMOVQ_SI_X13(2*8); \ - VMOVQ_SI_X14_0; \ - VMOVQ_SI_X15(4*8); \ - VPINSRQ_1_SI_X12(5*8); \ - VPINSRQ_1_SI_X13(10*8); \ - VPINSRQ_1_SI_X14(7*8); \ - VPINSRQ_1_SI_X15(15*8) +DATA ·AVX2_c48<>+0(SB)/8, $0x0100070605040302 +DATA ·AVX2_c48<>+8(SB)/8, $0x09080f0e0d0c0b0a +DATA ·AVX2_c48<>+16(SB)/8, $0x0100070605040302 +DATA ·AVX2_c48<>+24(SB)/8, $0x09080f0e0d0c0b0a +GLOBL ·AVX2_c48<>(SB), RODATA|NOPTR, $32 -// load msg: X12 = (2, 6), X13 = (0, 8), X14 = (12, 10), X15 = (11, 3) -#define LOAD_MSG_AVX_2_6_0_8_12_10_11_3() \ - VMOVQ_SI_X12(2*8); \ - VMOVQ_SI_X13_0; \ - VMOVQ_SI_X14(12*8); \ - VMOVQ_SI_X15(11*8); \ - VPINSRQ_1_SI_X12(6*8); \ - VPINSRQ_1_SI_X13(8*8); \ - VPINSRQ_1_SI_X14(10*8); \ - VPINSRQ_1_SI_X15(3*8) +DATA ·AVX2_iv0<>+0(SB)/8, $0x6a09e667f3bcc908 +DATA ·AVX2_iv0<>+8(SB)/8, $0xbb67ae8584caa73b +DATA ·AVX2_iv0<>+16(SB)/8, $0x3c6ef372fe94f82b +DATA ·AVX2_iv0<>+24(SB)/8, $0xa54ff53a5f1d36f1 +GLOBL ·AVX2_iv0<>(SB), RODATA|NOPTR, $32 -// load msg: X12 = (0, 6), X13 = (9, 8), X14 = (7, 3), X15 = (2, 11) -#define LOAD_MSG_AVX_0_6_9_8_7_3_2_11() \ - MOVQ 0*8(SI), X12; \ - VPSHUFD $0x4E, 8*8(SI), X13; \ - MOVQ 7*8(SI), X14; \ - MOVQ 2*8(SI), X15; \ - VPINSRQ_1_SI_X12(6*8); \ - VPINSRQ_1_SI_X14(3*8); \ - VPINSRQ_1_SI_X15(11*8) - -// load msg: X12 = (6, 14), X13 = (11, 0), X14 = (15, 9), X15 = (3, 8) -#define LOAD_MSG_AVX_6_14_11_0_15_9_3_8() \ - MOVQ 6*8(SI), X12; \ - MOVQ 11*8(SI), X13; \ - MOVQ 15*8(SI), X14; \ - MOVQ 3*8(SI), X15; \ - VPINSRQ_1_SI_X12(14*8); \ - VPINSRQ_1_SI_X13_0; \ - VPINSRQ_1_SI_X14(9*8); \ - VPINSRQ_1_SI_X15(8*8) - -// load msg: X12 = (5, 15), X13 = (8, 2), X14 = (0, 4), X15 = (6, 10) -#define LOAD_MSG_AVX_5_15_8_2_0_4_6_10() \ - MOVQ 5*8(SI), X12; \ - MOVQ 8*8(SI), X13; \ - MOVQ 0*8(SI), X14; \ - MOVQ 6*8(SI), X15; \ - VPINSRQ_1_SI_X12(15*8); \ - VPINSRQ_1_SI_X13(2*8); \ - VPINSRQ_1_SI_X14(4*8); \ - VPINSRQ_1_SI_X15(10*8) - -// load msg: X12 = (12, 13), X13 = (1, 10), X14 = (2, 7), X15 = (4, 5) -#define LOAD_MSG_AVX_12_13_1_10_2_7_4_5() \ - VMOVDQU 12*8(SI), X12; \ - MOVQ 1*8(SI), X13; \ - MOVQ 2*8(SI), X14; \ - VPINSRQ_1_SI_X13(10*8); \ - VPINSRQ_1_SI_X14(7*8); \ - VMOVDQU 4*8(SI), X15 - -// load msg: X12 = (15, 9), X13 = (3, 13), X14 = (11, 14), X15 = (12, 0) -#define LOAD_MSG_AVX_15_9_3_13_11_14_12_0() \ - MOVQ 15*8(SI), X12; \ - MOVQ 3*8(SI), X13; \ - MOVQ 11*8(SI), X14; \ - MOVQ 12*8(SI), X15; \ - VPINSRQ_1_SI_X12(9*8); \ - VPINSRQ_1_SI_X13(13*8); \ - VPINSRQ_1_SI_X14(14*8); \ - VPINSRQ_1_SI_X15_0 +DATA ·AVX2_iv1<>+0(SB)/8, $0x510e527fade682d1 +DATA ·AVX2_iv1<>+8(SB)/8, $0x9b05688c2b3e6c1f +DATA ·AVX2_iv1<>+16(SB)/8, $0x1f83d9abfb41bd6b +DATA ·AVX2_iv1<>+24(SB)/8, $0x5be0cd19137e2179 +GLOBL ·AVX2_iv1<>(SB), RODATA|NOPTR, $32 // func hashBlocksAVX(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte) -TEXT ·hashBlocksAVX(SB), 4, $288-48 // frame size = 272 + 16 byte alignment - MOVQ h+0(FP), AX - MOVQ c+8(FP), BX - MOVQ flag+16(FP), CX - MOVQ blocks_base+24(FP), SI - MOVQ blocks_len+32(FP), DI - - MOVQ SP, R10 - ADDQ $15, R10 - ANDQ $~15, R10 - - VMOVDQU ·AVX_c40<>(SB), X0 - VMOVDQU ·AVX_c48<>(SB), X1 +// Requires: AVX, SSE2 +TEXT ·hashBlocksAVX(SB), NOSPLIT, $288-48 + MOVQ h+0(FP), AX + MOVQ c+8(FP), BX + MOVQ flag+16(FP), CX + MOVQ blocks_base+24(FP), SI + MOVQ blocks_len+32(FP), DI + MOVQ SP, R10 + ADDQ $0x0f, R10 + ANDQ $-16, R10 + VMOVDQU ·AVX_c40<>+0(SB), X0 + VMOVDQU ·AVX_c48<>+0(SB), X1 VMOVDQA X0, X8 VMOVDQA X1, X9 - - VMOVDQU ·AVX_iv3<>(SB), X0 - VMOVDQA X0, 0(R10) - XORQ CX, 0(R10) // 0(R10) = ·AVX_iv3 ^ (CX || 0) - - VMOVDQU 0(AX), X10 + VMOVDQU ·AVX_iv3<>+0(SB), X0 + VMOVDQA X0, (R10) + XORQ CX, (R10) + VMOVDQU (AX), X10 VMOVDQU 16(AX), X11 VMOVDQU 32(AX), X2 VMOVDQU 48(AX), X3 - - MOVQ 0(BX), R8 - MOVQ 8(BX), R9 + MOVQ (BX), R8 + MOVQ 8(BX), R9 loop: - ADDQ $128, R8 - CMPQ R8, $128 + ADDQ $0x80, R8 + CMPQ R8, $0x80 JGE noinc INCQ R9 noinc: - VMOVQ_R8_X15 - VPINSRQ_1_R9_X15 - + BYTE $0xc4 + BYTE $0x41 + BYTE $0xf9 + BYTE $0x6e + BYTE $0xf8 + BYTE $0xc4 + BYTE $0x43 + BYTE $0x81 + BYTE $0x22 + BYTE $0xf9 + BYTE $0x01 VMOVDQA X10, X0 VMOVDQA X11, X1 - VMOVDQU ·AVX_iv0<>(SB), X4 - VMOVDQU ·AVX_iv1<>(SB), X5 - VMOVDQU ·AVX_iv2<>(SB), X6 - + VMOVDQU ·AVX_iv0<>+0(SB), X4 + VMOVDQU ·AVX_iv1<>+0(SB), X5 + VMOVDQU ·AVX_iv2<>+0(SB), X6 VPXOR X15, X6, X6 - VMOVDQA 0(R10), X7 - - LOAD_MSG_AVX_0_2_4_6_1_3_5_7() + VMOVDQA (R10), X7 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x26 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x6e + BYTE $0x20 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x76 + BYTE $0x08 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x7e + BYTE $0x28 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x99 + BYTE $0x22 + BYTE $0x66 + BYTE $0x10 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x6e + BYTE $0x30 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x76 + BYTE $0x18 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x38 + BYTE $0x01 VMOVDQA X12, 16(R10) VMOVDQA X13, 32(R10) VMOVDQA X14, 48(R10) VMOVDQA X15, 64(R10) - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) - SHUFFLE_AVX() - LOAD_MSG_AVX(8, 10, 12, 14, 9, 11, 13, 15) + VPADDQ X12, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X13, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFD $-79, X6, X6 + VPSHUFD $-79, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPSHUFB X8, X2, X2 + VPSHUFB X8, X3, X3 + VPADDQ X14, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X15, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFB X9, X6, X6 + VPSHUFB X9, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPADDQ X2, X2, X15 + VPSRLQ $0x3f, X2, X2 + VPXOR X15, X2, X2 + VPADDQ X3, X3, X15 + VPSRLQ $0x3f, X3, X3 + VPXOR X15, X3, X3 + VMOVDQA X6, X13 + VMOVDQA X2, X14 + VMOVDQA X4, X6 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x11 + BYTE $0x6c + BYTE $0xfd + VMOVDQA X5, X4 + VMOVDQA X6, X5 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x41 + BYTE $0x6d + BYTE $0xf7 + BYTE $0xc5 + BYTE $0x41 + BYTE $0x6c + BYTE $0xff + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x11 + BYTE $0x6d + BYTE $0xff + BYTE $0xc5 + BYTE $0x61 + BYTE $0x6c + BYTE $0xfb + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x69 + BYTE $0x6d + BYTE $0xd7 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x09 + BYTE $0x6c + BYTE $0xfe + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x61 + BYTE $0x6d + BYTE $0xdf + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x66 + BYTE $0x40 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x6e + BYTE $0x60 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x76 + BYTE $0x48 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x7e + BYTE $0x68 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x99 + BYTE $0x22 + BYTE $0x66 + BYTE $0x50 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x6e + BYTE $0x70 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x76 + BYTE $0x58 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x78 + BYTE $0x01 VMOVDQA X12, 80(R10) VMOVDQA X13, 96(R10) VMOVDQA X14, 112(R10) VMOVDQA X15, 128(R10) - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) - SHUFFLE_AVX_INV() - - LOAD_MSG_AVX(14, 4, 9, 13, 10, 8, 15, 6) + VPADDQ X12, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X13, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFD $-79, X6, X6 + VPSHUFD $-79, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPSHUFB X8, X2, X2 + VPSHUFB X8, X3, X3 + VPADDQ X14, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X15, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFB X9, X6, X6 + VPSHUFB X9, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPADDQ X2, X2, X15 + VPSRLQ $0x3f, X2, X2 + VPXOR X15, X2, X2 + VPADDQ X3, X3, X15 + VPSRLQ $0x3f, X3, X3 + VPXOR X15, X3, X3 + VMOVDQA X2, X13 + VMOVDQA X4, X14 + BYTE $0xc5 + BYTE $0x69 + BYTE $0x6c + BYTE $0xfa + VMOVDQA X5, X4 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x61 + BYTE $0x6d + BYTE $0xd7 + VMOVDQA X14, X5 + BYTE $0xc5 + BYTE $0x61 + BYTE $0x6c + BYTE $0xfb + VMOVDQA X6, X14 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x11 + BYTE $0x6d + BYTE $0xdf + BYTE $0xc5 + BYTE $0x41 + BYTE $0x6c + BYTE $0xff + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x49 + BYTE $0x6d + BYTE $0xf7 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x09 + BYTE $0x6c + BYTE $0xfe + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x41 + BYTE $0x6d + BYTE $0xff + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x66 + BYTE $0x70 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x6e + BYTE $0x48 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x76 + BYTE $0x50 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x7e + BYTE $0x78 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x99 + BYTE $0x22 + BYTE $0x66 + BYTE $0x20 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x6e + BYTE $0x68 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x76 + BYTE $0x40 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x30 + BYTE $0x01 VMOVDQA X12, 144(R10) VMOVDQA X13, 160(R10) VMOVDQA X14, 176(R10) VMOVDQA X15, 192(R10) - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) - SHUFFLE_AVX() - LOAD_MSG_AVX_1_0_11_5_12_2_7_3() + VPADDQ X12, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X13, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFD $-79, X6, X6 + VPSHUFD $-79, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPSHUFB X8, X2, X2 + VPSHUFB X8, X3, X3 + VPADDQ X14, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X15, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFB X9, X6, X6 + VPSHUFB X9, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPADDQ X2, X2, X15 + VPSRLQ $0x3f, X2, X2 + VPXOR X15, X2, X2 + VPADDQ X3, X3, X15 + VPSRLQ $0x3f, X3, X3 + VPXOR X15, X3, X3 + VMOVDQA X6, X13 + VMOVDQA X2, X14 + VMOVDQA X4, X6 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x11 + BYTE $0x6c + BYTE $0xfd + VMOVDQA X5, X4 + VMOVDQA X6, X5 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x41 + BYTE $0x6d + BYTE $0xf7 + BYTE $0xc5 + BYTE $0x41 + BYTE $0x6c + BYTE $0xff + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x11 + BYTE $0x6d + BYTE $0xff + BYTE $0xc5 + BYTE $0x61 + BYTE $0x6c + BYTE $0xfb + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x69 + BYTE $0x6d + BYTE $0xd7 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x09 + BYTE $0x6c + BYTE $0xfe + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x61 + BYTE $0x6d + BYTE $0xdf + VPSHUFD $0x4e, (SI), X12 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x6e + BYTE $0x58 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x76 + BYTE $0x60 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x7e + BYTE $0x38 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x6e + BYTE $0x28 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x76 + BYTE $0x10 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x18 + BYTE $0x01 VMOVDQA X12, 208(R10) VMOVDQA X13, 224(R10) VMOVDQA X14, 240(R10) VMOVDQA X15, 256(R10) - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) - SHUFFLE_AVX_INV() - - LOAD_MSG_AVX_11_12_5_15_8_0_2_13() - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) - SHUFFLE_AVX() - LOAD_MSG_AVX(10, 3, 7, 9, 14, 6, 1, 4) - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) - SHUFFLE_AVX_INV() - - LOAD_MSG_AVX(7, 3, 13, 11, 9, 1, 12, 14) - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) - SHUFFLE_AVX() - LOAD_MSG_AVX_2_5_4_15_6_10_0_8() - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) - SHUFFLE_AVX_INV() - - LOAD_MSG_AVX_9_5_2_10_0_7_4_15() - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) - SHUFFLE_AVX() - LOAD_MSG_AVX(14, 11, 6, 3, 1, 12, 8, 13) - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) - SHUFFLE_AVX_INV() - - LOAD_MSG_AVX_2_6_0_8_12_10_11_3() - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) - SHUFFLE_AVX() - LOAD_MSG_AVX(4, 7, 15, 1, 13, 5, 14, 9) - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) - SHUFFLE_AVX_INV() - - LOAD_MSG_AVX(12, 1, 14, 4, 5, 15, 13, 10) - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) - SHUFFLE_AVX() - LOAD_MSG_AVX_0_6_9_8_7_3_2_11() - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) - SHUFFLE_AVX_INV() - - LOAD_MSG_AVX(13, 7, 12, 3, 11, 14, 1, 9) - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) - SHUFFLE_AVX() - LOAD_MSG_AVX_5_15_8_2_0_4_6_10() - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) - SHUFFLE_AVX_INV() - - LOAD_MSG_AVX_6_14_11_0_15_9_3_8() - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) - SHUFFLE_AVX() - LOAD_MSG_AVX_12_13_1_10_2_7_4_5() - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) - SHUFFLE_AVX_INV() - - LOAD_MSG_AVX(10, 8, 7, 1, 2, 4, 6, 5) - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) - SHUFFLE_AVX() - LOAD_MSG_AVX_15_9_3_13_11_14_12_0() - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) - SHUFFLE_AVX_INV() - - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, 16(R10), 32(R10), 48(R10), 64(R10), X15, X8, X9) - SHUFFLE_AVX() - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, 80(R10), 96(R10), 112(R10), 128(R10), X15, X8, X9) - SHUFFLE_AVX_INV() - - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, 144(R10), 160(R10), 176(R10), 192(R10), X15, X8, X9) - SHUFFLE_AVX() - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, 208(R10), 224(R10), 240(R10), 256(R10), X15, X8, X9) - SHUFFLE_AVX_INV() - + VPADDQ X12, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X13, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFD $-79, X6, X6 + VPSHUFD $-79, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPSHUFB X8, X2, X2 + VPSHUFB X8, X3, X3 + VPADDQ X14, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X15, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFB X9, X6, X6 + VPSHUFB X9, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPADDQ X2, X2, X15 + VPSRLQ $0x3f, X2, X2 + VPXOR X15, X2, X2 + VPADDQ X3, X3, X15 + VPSRLQ $0x3f, X3, X3 + VPXOR X15, X3, X3 + VMOVDQA X2, X13 + VMOVDQA X4, X14 + BYTE $0xc5 + BYTE $0x69 + BYTE $0x6c + BYTE $0xfa + VMOVDQA X5, X4 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x61 + BYTE $0x6d + BYTE $0xd7 + VMOVDQA X14, X5 + BYTE $0xc5 + BYTE $0x61 + BYTE $0x6c + BYTE $0xfb + VMOVDQA X6, X14 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x11 + BYTE $0x6d + BYTE $0xdf + BYTE $0xc5 + BYTE $0x41 + BYTE $0x6c + BYTE $0xff + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x49 + BYTE $0x6d + BYTE $0xf7 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x09 + BYTE $0x6c + BYTE $0xfe + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x41 + BYTE $0x6d + BYTE $0xff + VMOVDQU 88(SI), X12 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x6e + BYTE $0x28 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x76 + BYTE $0x40 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x7e + BYTE $0x10 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x6e + BYTE $0x78 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x36 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x68 + BYTE $0x01 + VPADDQ X12, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X13, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFD $-79, X6, X6 + VPSHUFD $-79, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPSHUFB X8, X2, X2 + VPSHUFB X8, X3, X3 + VPADDQ X14, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X15, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFB X9, X6, X6 + VPSHUFB X9, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPADDQ X2, X2, X15 + VPSRLQ $0x3f, X2, X2 + VPXOR X15, X2, X2 + VPADDQ X3, X3, X15 + VPSRLQ $0x3f, X3, X3 + VPXOR X15, X3, X3 + VMOVDQA X6, X13 + VMOVDQA X2, X14 + VMOVDQA X4, X6 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x11 + BYTE $0x6c + BYTE $0xfd + VMOVDQA X5, X4 + VMOVDQA X6, X5 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x41 + BYTE $0x6d + BYTE $0xf7 + BYTE $0xc5 + BYTE $0x41 + BYTE $0x6c + BYTE $0xff + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x11 + BYTE $0x6d + BYTE $0xff + BYTE $0xc5 + BYTE $0x61 + BYTE $0x6c + BYTE $0xfb + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x69 + BYTE $0x6d + BYTE $0xd7 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x09 + BYTE $0x6c + BYTE $0xfe + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x61 + BYTE $0x6d + BYTE $0xdf + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x66 + BYTE $0x50 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x6e + BYTE $0x38 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x76 + BYTE $0x70 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x7e + BYTE $0x08 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x99 + BYTE $0x22 + BYTE $0x66 + BYTE $0x18 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x6e + BYTE $0x48 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x76 + BYTE $0x30 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x20 + BYTE $0x01 + VPADDQ X12, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X13, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFD $-79, X6, X6 + VPSHUFD $-79, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPSHUFB X8, X2, X2 + VPSHUFB X8, X3, X3 + VPADDQ X14, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X15, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFB X9, X6, X6 + VPSHUFB X9, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPADDQ X2, X2, X15 + VPSRLQ $0x3f, X2, X2 + VPXOR X15, X2, X2 + VPADDQ X3, X3, X15 + VPSRLQ $0x3f, X3, X3 + VPXOR X15, X3, X3 + VMOVDQA X2, X13 + VMOVDQA X4, X14 + BYTE $0xc5 + BYTE $0x69 + BYTE $0x6c + BYTE $0xfa + VMOVDQA X5, X4 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x61 + BYTE $0x6d + BYTE $0xd7 + VMOVDQA X14, X5 + BYTE $0xc5 + BYTE $0x61 + BYTE $0x6c + BYTE $0xfb + VMOVDQA X6, X14 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x11 + BYTE $0x6d + BYTE $0xdf + BYTE $0xc5 + BYTE $0x41 + BYTE $0x6c + BYTE $0xff + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x49 + BYTE $0x6d + BYTE $0xf7 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x09 + BYTE $0x6c + BYTE $0xfe + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x41 + BYTE $0x6d + BYTE $0xff + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x66 + BYTE $0x38 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x6e + BYTE $0x68 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x76 + BYTE $0x48 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x7e + BYTE $0x60 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x99 + BYTE $0x22 + BYTE $0x66 + BYTE $0x18 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x6e + BYTE $0x58 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x76 + BYTE $0x08 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x70 + BYTE $0x01 + VPADDQ X12, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X13, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFD $-79, X6, X6 + VPSHUFD $-79, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPSHUFB X8, X2, X2 + VPSHUFB X8, X3, X3 + VPADDQ X14, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X15, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFB X9, X6, X6 + VPSHUFB X9, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPADDQ X2, X2, X15 + VPSRLQ $0x3f, X2, X2 + VPXOR X15, X2, X2 + VPADDQ X3, X3, X15 + VPSRLQ $0x3f, X3, X3 + VPXOR X15, X3, X3 + VMOVDQA X6, X13 + VMOVDQA X2, X14 + VMOVDQA X4, X6 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x11 + BYTE $0x6c + BYTE $0xfd + VMOVDQA X5, X4 + VMOVDQA X6, X5 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x41 + BYTE $0x6d + BYTE $0xf7 + BYTE $0xc5 + BYTE $0x41 + BYTE $0x6c + BYTE $0xff + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x11 + BYTE $0x6d + BYTE $0xff + BYTE $0xc5 + BYTE $0x61 + BYTE $0x6c + BYTE $0xfb + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x69 + BYTE $0x6d + BYTE $0xd7 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x09 + BYTE $0x6c + BYTE $0xfe + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x61 + BYTE $0x6d + BYTE $0xdf + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x66 + BYTE $0x10 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x6e + BYTE $0x20 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x76 + BYTE $0x30 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x3e + BYTE $0xc4 + BYTE $0x63 + BYTE $0x99 + BYTE $0x22 + BYTE $0x66 + BYTE $0x28 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x6e + BYTE $0x78 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x76 + BYTE $0x50 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x40 + BYTE $0x01 + VPADDQ X12, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X13, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFD $-79, X6, X6 + VPSHUFD $-79, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPSHUFB X8, X2, X2 + VPSHUFB X8, X3, X3 + VPADDQ X14, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X15, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFB X9, X6, X6 + VPSHUFB X9, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPADDQ X2, X2, X15 + VPSRLQ $0x3f, X2, X2 + VPXOR X15, X2, X2 + VPADDQ X3, X3, X15 + VPSRLQ $0x3f, X3, X3 + VPXOR X15, X3, X3 + VMOVDQA X2, X13 + VMOVDQA X4, X14 + BYTE $0xc5 + BYTE $0x69 + BYTE $0x6c + BYTE $0xfa + VMOVDQA X5, X4 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x61 + BYTE $0x6d + BYTE $0xd7 + VMOVDQA X14, X5 + BYTE $0xc5 + BYTE $0x61 + BYTE $0x6c + BYTE $0xfb + VMOVDQA X6, X14 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x11 + BYTE $0x6d + BYTE $0xdf + BYTE $0xc5 + BYTE $0x41 + BYTE $0x6c + BYTE $0xff + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x49 + BYTE $0x6d + BYTE $0xf7 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x09 + BYTE $0x6c + BYTE $0xfe + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x41 + BYTE $0x6d + BYTE $0xff + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x66 + BYTE $0x48 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x6e + BYTE $0x10 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x36 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x7e + BYTE $0x20 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x99 + BYTE $0x22 + BYTE $0x66 + BYTE $0x28 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x6e + BYTE $0x50 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x76 + BYTE $0x38 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x78 + BYTE $0x01 + VPADDQ X12, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X13, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFD $-79, X6, X6 + VPSHUFD $-79, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPSHUFB X8, X2, X2 + VPSHUFB X8, X3, X3 + VPADDQ X14, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X15, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFB X9, X6, X6 + VPSHUFB X9, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPADDQ X2, X2, X15 + VPSRLQ $0x3f, X2, X2 + VPXOR X15, X2, X2 + VPADDQ X3, X3, X15 + VPSRLQ $0x3f, X3, X3 + VPXOR X15, X3, X3 + VMOVDQA X6, X13 + VMOVDQA X2, X14 + VMOVDQA X4, X6 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x11 + BYTE $0x6c + BYTE $0xfd + VMOVDQA X5, X4 + VMOVDQA X6, X5 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x41 + BYTE $0x6d + BYTE $0xf7 + BYTE $0xc5 + BYTE $0x41 + BYTE $0x6c + BYTE $0xff + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x11 + BYTE $0x6d + BYTE $0xff + BYTE $0xc5 + BYTE $0x61 + BYTE $0x6c + BYTE $0xfb + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x69 + BYTE $0x6d + BYTE $0xd7 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x09 + BYTE $0x6c + BYTE $0xfe + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x61 + BYTE $0x6d + BYTE $0xdf + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x66 + BYTE $0x70 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x6e + BYTE $0x30 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x76 + BYTE $0x08 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x7e + BYTE $0x40 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x99 + BYTE $0x22 + BYTE $0x66 + BYTE $0x58 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x6e + BYTE $0x18 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x76 + BYTE $0x60 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x68 + BYTE $0x01 + VPADDQ X12, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X13, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFD $-79, X6, X6 + VPSHUFD $-79, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPSHUFB X8, X2, X2 + VPSHUFB X8, X3, X3 + VPADDQ X14, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X15, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFB X9, X6, X6 + VPSHUFB X9, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPADDQ X2, X2, X15 + VPSRLQ $0x3f, X2, X2 + VPXOR X15, X2, X2 + VPADDQ X3, X3, X15 + VPSRLQ $0x3f, X3, X3 + VPXOR X15, X3, X3 + VMOVDQA X2, X13 + VMOVDQA X4, X14 + BYTE $0xc5 + BYTE $0x69 + BYTE $0x6c + BYTE $0xfa + VMOVDQA X5, X4 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x61 + BYTE $0x6d + BYTE $0xd7 + VMOVDQA X14, X5 + BYTE $0xc5 + BYTE $0x61 + BYTE $0x6c + BYTE $0xfb + VMOVDQA X6, X14 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x11 + BYTE $0x6d + BYTE $0xdf + BYTE $0xc5 + BYTE $0x41 + BYTE $0x6c + BYTE $0xff + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x49 + BYTE $0x6d + BYTE $0xf7 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x09 + BYTE $0x6c + BYTE $0xfe + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x41 + BYTE $0x6d + BYTE $0xff + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x66 + BYTE $0x10 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x2e + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x76 + BYTE $0x60 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x7e + BYTE $0x58 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x99 + BYTE $0x22 + BYTE $0x66 + BYTE $0x30 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x6e + BYTE $0x40 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x76 + BYTE $0x50 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x18 + BYTE $0x01 + VPADDQ X12, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X13, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFD $-79, X6, X6 + VPSHUFD $-79, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPSHUFB X8, X2, X2 + VPSHUFB X8, X3, X3 + VPADDQ X14, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X15, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFB X9, X6, X6 + VPSHUFB X9, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPADDQ X2, X2, X15 + VPSRLQ $0x3f, X2, X2 + VPXOR X15, X2, X2 + VPADDQ X3, X3, X15 + VPSRLQ $0x3f, X3, X3 + VPXOR X15, X3, X3 + VMOVDQA X6, X13 + VMOVDQA X2, X14 + VMOVDQA X4, X6 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x11 + BYTE $0x6c + BYTE $0xfd + VMOVDQA X5, X4 + VMOVDQA X6, X5 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x41 + BYTE $0x6d + BYTE $0xf7 + BYTE $0xc5 + BYTE $0x41 + BYTE $0x6c + BYTE $0xff + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x11 + BYTE $0x6d + BYTE $0xff + BYTE $0xc5 + BYTE $0x61 + BYTE $0x6c + BYTE $0xfb + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x69 + BYTE $0x6d + BYTE $0xd7 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x09 + BYTE $0x6c + BYTE $0xfe + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x61 + BYTE $0x6d + BYTE $0xdf + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x66 + BYTE $0x20 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x6e + BYTE $0x78 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x76 + BYTE $0x68 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x7e + BYTE $0x70 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x99 + BYTE $0x22 + BYTE $0x66 + BYTE $0x38 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x6e + BYTE $0x08 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x76 + BYTE $0x28 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x48 + BYTE $0x01 + VPADDQ X12, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X13, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFD $-79, X6, X6 + VPSHUFD $-79, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPSHUFB X8, X2, X2 + VPSHUFB X8, X3, X3 + VPADDQ X14, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X15, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFB X9, X6, X6 + VPSHUFB X9, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPADDQ X2, X2, X15 + VPSRLQ $0x3f, X2, X2 + VPXOR X15, X2, X2 + VPADDQ X3, X3, X15 + VPSRLQ $0x3f, X3, X3 + VPXOR X15, X3, X3 + VMOVDQA X2, X13 + VMOVDQA X4, X14 + BYTE $0xc5 + BYTE $0x69 + BYTE $0x6c + BYTE $0xfa + VMOVDQA X5, X4 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x61 + BYTE $0x6d + BYTE $0xd7 + VMOVDQA X14, X5 + BYTE $0xc5 + BYTE $0x61 + BYTE $0x6c + BYTE $0xfb + VMOVDQA X6, X14 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x11 + BYTE $0x6d + BYTE $0xdf + BYTE $0xc5 + BYTE $0x41 + BYTE $0x6c + BYTE $0xff + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x49 + BYTE $0x6d + BYTE $0xf7 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x09 + BYTE $0x6c + BYTE $0xfe + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x41 + BYTE $0x6d + BYTE $0xff + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x66 + BYTE $0x60 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x6e + BYTE $0x70 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x76 + BYTE $0x28 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x7e + BYTE $0x68 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x99 + BYTE $0x22 + BYTE $0x66 + BYTE $0x08 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x6e + BYTE $0x20 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x76 + BYTE $0x78 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x50 + BYTE $0x01 + VPADDQ X12, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X13, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFD $-79, X6, X6 + VPSHUFD $-79, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPSHUFB X8, X2, X2 + VPSHUFB X8, X3, X3 + VPADDQ X14, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X15, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFB X9, X6, X6 + VPSHUFB X9, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPADDQ X2, X2, X15 + VPSRLQ $0x3f, X2, X2 + VPXOR X15, X2, X2 + VPADDQ X3, X3, X15 + VPSRLQ $0x3f, X3, X3 + VPXOR X15, X3, X3 + VMOVDQA X6, X13 + VMOVDQA X2, X14 + VMOVDQA X4, X6 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x11 + BYTE $0x6c + BYTE $0xfd + VMOVDQA X5, X4 + VMOVDQA X6, X5 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x41 + BYTE $0x6d + BYTE $0xf7 + BYTE $0xc5 + BYTE $0x41 + BYTE $0x6c + BYTE $0xff + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x11 + BYTE $0x6d + BYTE $0xff + BYTE $0xc5 + BYTE $0x61 + BYTE $0x6c + BYTE $0xfb + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x69 + BYTE $0x6d + BYTE $0xd7 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x09 + BYTE $0x6c + BYTE $0xfe + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x61 + BYTE $0x6d + BYTE $0xdf + MOVQ (SI), X12 + VPSHUFD $0x4e, 64(SI), X13 + MOVQ 56(SI), X14 + MOVQ 16(SI), X15 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x99 + BYTE $0x22 + BYTE $0x66 + BYTE $0x30 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x76 + BYTE $0x18 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x58 + BYTE $0x01 + VPADDQ X12, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X13, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFD $-79, X6, X6 + VPSHUFD $-79, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPSHUFB X8, X2, X2 + VPSHUFB X8, X3, X3 + VPADDQ X14, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X15, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFB X9, X6, X6 + VPSHUFB X9, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPADDQ X2, X2, X15 + VPSRLQ $0x3f, X2, X2 + VPXOR X15, X2, X2 + VPADDQ X3, X3, X15 + VPSRLQ $0x3f, X3, X3 + VPXOR X15, X3, X3 + VMOVDQA X2, X13 + VMOVDQA X4, X14 + BYTE $0xc5 + BYTE $0x69 + BYTE $0x6c + BYTE $0xfa + VMOVDQA X5, X4 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x61 + BYTE $0x6d + BYTE $0xd7 + VMOVDQA X14, X5 + BYTE $0xc5 + BYTE $0x61 + BYTE $0x6c + BYTE $0xfb + VMOVDQA X6, X14 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x11 + BYTE $0x6d + BYTE $0xdf + BYTE $0xc5 + BYTE $0x41 + BYTE $0x6c + BYTE $0xff + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x49 + BYTE $0x6d + BYTE $0xf7 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x09 + BYTE $0x6c + BYTE $0xfe + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x41 + BYTE $0x6d + BYTE $0xff + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x66 + BYTE $0x68 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x6e + BYTE $0x60 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x76 + BYTE $0x58 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x7e + BYTE $0x08 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x99 + BYTE $0x22 + BYTE $0x66 + BYTE $0x38 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x6e + BYTE $0x18 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x76 + BYTE $0x70 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x48 + BYTE $0x01 + VPADDQ X12, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X13, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFD $-79, X6, X6 + VPSHUFD $-79, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPSHUFB X8, X2, X2 + VPSHUFB X8, X3, X3 + VPADDQ X14, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X15, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFB X9, X6, X6 + VPSHUFB X9, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPADDQ X2, X2, X15 + VPSRLQ $0x3f, X2, X2 + VPXOR X15, X2, X2 + VPADDQ X3, X3, X15 + VPSRLQ $0x3f, X3, X3 + VPXOR X15, X3, X3 + VMOVDQA X6, X13 + VMOVDQA X2, X14 + VMOVDQA X4, X6 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x11 + BYTE $0x6c + BYTE $0xfd + VMOVDQA X5, X4 + VMOVDQA X6, X5 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x41 + BYTE $0x6d + BYTE $0xf7 + BYTE $0xc5 + BYTE $0x41 + BYTE $0x6c + BYTE $0xff + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x11 + BYTE $0x6d + BYTE $0xff + BYTE $0xc5 + BYTE $0x61 + BYTE $0x6c + BYTE $0xfb + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x69 + BYTE $0x6d + BYTE $0xd7 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x09 + BYTE $0x6c + BYTE $0xfe + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x61 + BYTE $0x6d + BYTE $0xdf + MOVQ 40(SI), X12 + MOVQ 64(SI), X13 + MOVQ (SI), X14 + MOVQ 48(SI), X15 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x99 + BYTE $0x22 + BYTE $0x66 + BYTE $0x78 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x6e + BYTE $0x10 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x76 + BYTE $0x20 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x50 + BYTE $0x01 + VPADDQ X12, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X13, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFD $-79, X6, X6 + VPSHUFD $-79, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPSHUFB X8, X2, X2 + VPSHUFB X8, X3, X3 + VPADDQ X14, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X15, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFB X9, X6, X6 + VPSHUFB X9, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPADDQ X2, X2, X15 + VPSRLQ $0x3f, X2, X2 + VPXOR X15, X2, X2 + VPADDQ X3, X3, X15 + VPSRLQ $0x3f, X3, X3 + VPXOR X15, X3, X3 + VMOVDQA X2, X13 + VMOVDQA X4, X14 + BYTE $0xc5 + BYTE $0x69 + BYTE $0x6c + BYTE $0xfa + VMOVDQA X5, X4 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x61 + BYTE $0x6d + BYTE $0xd7 + VMOVDQA X14, X5 + BYTE $0xc5 + BYTE $0x61 + BYTE $0x6c + BYTE $0xfb + VMOVDQA X6, X14 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x11 + BYTE $0x6d + BYTE $0xdf + BYTE $0xc5 + BYTE $0x41 + BYTE $0x6c + BYTE $0xff + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x49 + BYTE $0x6d + BYTE $0xf7 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x09 + BYTE $0x6c + BYTE $0xfe + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x41 + BYTE $0x6d + BYTE $0xff + MOVQ 48(SI), X12 + MOVQ 88(SI), X13 + MOVQ 120(SI), X14 + MOVQ 24(SI), X15 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x99 + BYTE $0x22 + BYTE $0x66 + BYTE $0x70 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x2e + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x76 + BYTE $0x48 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x40 + BYTE $0x01 + VPADDQ X12, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X13, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFD $-79, X6, X6 + VPSHUFD $-79, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPSHUFB X8, X2, X2 + VPSHUFB X8, X3, X3 + VPADDQ X14, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X15, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFB X9, X6, X6 + VPSHUFB X9, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPADDQ X2, X2, X15 + VPSRLQ $0x3f, X2, X2 + VPXOR X15, X2, X2 + VPADDQ X3, X3, X15 + VPSRLQ $0x3f, X3, X3 + VPXOR X15, X3, X3 + VMOVDQA X6, X13 + VMOVDQA X2, X14 + VMOVDQA X4, X6 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x11 + BYTE $0x6c + BYTE $0xfd + VMOVDQA X5, X4 + VMOVDQA X6, X5 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x41 + BYTE $0x6d + BYTE $0xf7 + BYTE $0xc5 + BYTE $0x41 + BYTE $0x6c + BYTE $0xff + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x11 + BYTE $0x6d + BYTE $0xff + BYTE $0xc5 + BYTE $0x61 + BYTE $0x6c + BYTE $0xfb + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x69 + BYTE $0x6d + BYTE $0xd7 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x09 + BYTE $0x6c + BYTE $0xfe + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x61 + BYTE $0x6d + BYTE $0xdf + VMOVDQU 96(SI), X12 + MOVQ 8(SI), X13 + MOVQ 16(SI), X14 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x6e + BYTE $0x50 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x76 + BYTE $0x38 + BYTE $0x01 + VMOVDQU 32(SI), X15 + VPADDQ X12, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X13, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFD $-79, X6, X6 + VPSHUFD $-79, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPSHUFB X8, X2, X2 + VPSHUFB X8, X3, X3 + VPADDQ X14, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X15, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFB X9, X6, X6 + VPSHUFB X9, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPADDQ X2, X2, X15 + VPSRLQ $0x3f, X2, X2 + VPXOR X15, X2, X2 + VPADDQ X3, X3, X15 + VPSRLQ $0x3f, X3, X3 + VPXOR X15, X3, X3 + VMOVDQA X2, X13 + VMOVDQA X4, X14 + BYTE $0xc5 + BYTE $0x69 + BYTE $0x6c + BYTE $0xfa + VMOVDQA X5, X4 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x61 + BYTE $0x6d + BYTE $0xd7 + VMOVDQA X14, X5 + BYTE $0xc5 + BYTE $0x61 + BYTE $0x6c + BYTE $0xfb + VMOVDQA X6, X14 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x11 + BYTE $0x6d + BYTE $0xdf + BYTE $0xc5 + BYTE $0x41 + BYTE $0x6c + BYTE $0xff + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x49 + BYTE $0x6d + BYTE $0xf7 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x09 + BYTE $0x6c + BYTE $0xfe + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x41 + BYTE $0x6d + BYTE $0xff + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x66 + BYTE $0x50 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x6e + BYTE $0x38 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x76 + BYTE $0x10 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x7e + BYTE $0x30 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x99 + BYTE $0x22 + BYTE $0x66 + BYTE $0x40 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x6e + BYTE $0x08 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x76 + BYTE $0x20 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x28 + BYTE $0x01 + VPADDQ X12, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X13, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFD $-79, X6, X6 + VPSHUFD $-79, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPSHUFB X8, X2, X2 + VPSHUFB X8, X3, X3 + VPADDQ X14, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X15, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFB X9, X6, X6 + VPSHUFB X9, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPADDQ X2, X2, X15 + VPSRLQ $0x3f, X2, X2 + VPXOR X15, X2, X2 + VPADDQ X3, X3, X15 + VPSRLQ $0x3f, X3, X3 + VPXOR X15, X3, X3 + VMOVDQA X6, X13 + VMOVDQA X2, X14 + VMOVDQA X4, X6 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x11 + BYTE $0x6c + BYTE $0xfd + VMOVDQA X5, X4 + VMOVDQA X6, X5 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x41 + BYTE $0x6d + BYTE $0xf7 + BYTE $0xc5 + BYTE $0x41 + BYTE $0x6c + BYTE $0xff + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x11 + BYTE $0x6d + BYTE $0xff + BYTE $0xc5 + BYTE $0x61 + BYTE $0x6c + BYTE $0xfb + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x69 + BYTE $0x6d + BYTE $0xd7 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x09 + BYTE $0x6c + BYTE $0xfe + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x61 + BYTE $0x6d + BYTE $0xdf + MOVQ 120(SI), X12 + MOVQ 24(SI), X13 + MOVQ 88(SI), X14 + MOVQ 96(SI), X15 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x99 + BYTE $0x22 + BYTE $0x66 + BYTE $0x48 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x6e + BYTE $0x68 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x76 + BYTE $0x70 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x3e + BYTE $0x01 + VPADDQ X12, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X13, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFD $-79, X6, X6 + VPSHUFD $-79, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPSHUFB X8, X2, X2 + VPSHUFB X8, X3, X3 + VPADDQ X14, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X15, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFB X9, X6, X6 + VPSHUFB X9, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPADDQ X2, X2, X15 + VPSRLQ $0x3f, X2, X2 + VPXOR X15, X2, X2 + VPADDQ X3, X3, X15 + VPSRLQ $0x3f, X3, X3 + VPXOR X15, X3, X3 + VMOVDQA X2, X13 + VMOVDQA X4, X14 + BYTE $0xc5 + BYTE $0x69 + BYTE $0x6c + BYTE $0xfa + VMOVDQA X5, X4 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x61 + BYTE $0x6d + BYTE $0xd7 + VMOVDQA X14, X5 + BYTE $0xc5 + BYTE $0x61 + BYTE $0x6c + BYTE $0xfb + VMOVDQA X6, X14 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x11 + BYTE $0x6d + BYTE $0xdf + BYTE $0xc5 + BYTE $0x41 + BYTE $0x6c + BYTE $0xff + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x49 + BYTE $0x6d + BYTE $0xf7 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x09 + BYTE $0x6c + BYTE $0xfe + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x41 + BYTE $0x6d + BYTE $0xff + VPADDQ 16(R10), X0, X0 + VPADDQ X2, X0, X0 + VPADDQ 32(R10), X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFD $-79, X6, X6 + VPSHUFD $-79, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPSHUFB X8, X2, X2 + VPSHUFB X8, X3, X3 + VPADDQ 48(R10), X0, X0 + VPADDQ X2, X0, X0 + VPADDQ 64(R10), X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFB X9, X6, X6 + VPSHUFB X9, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPADDQ X2, X2, X15 + VPSRLQ $0x3f, X2, X2 + VPXOR X15, X2, X2 + VPADDQ X3, X3, X15 + VPSRLQ $0x3f, X3, X3 + VPXOR X15, X3, X3 + VMOVDQA X6, X13 + VMOVDQA X2, X14 + VMOVDQA X4, X6 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x11 + BYTE $0x6c + BYTE $0xfd + VMOVDQA X5, X4 + VMOVDQA X6, X5 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x41 + BYTE $0x6d + BYTE $0xf7 + BYTE $0xc5 + BYTE $0x41 + BYTE $0x6c + BYTE $0xff + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x11 + BYTE $0x6d + BYTE $0xff + BYTE $0xc5 + BYTE $0x61 + BYTE $0x6c + BYTE $0xfb + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x69 + BYTE $0x6d + BYTE $0xd7 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x09 + BYTE $0x6c + BYTE $0xfe + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x61 + BYTE $0x6d + BYTE $0xdf + VPADDQ 80(R10), X0, X0 + VPADDQ X2, X0, X0 + VPADDQ 96(R10), X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFD $-79, X6, X6 + VPSHUFD $-79, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPSHUFB X8, X2, X2 + VPSHUFB X8, X3, X3 + VPADDQ 112(R10), X0, X0 + VPADDQ X2, X0, X0 + VPADDQ 128(R10), X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFB X9, X6, X6 + VPSHUFB X9, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPADDQ X2, X2, X15 + VPSRLQ $0x3f, X2, X2 + VPXOR X15, X2, X2 + VPADDQ X3, X3, X15 + VPSRLQ $0x3f, X3, X3 + VPXOR X15, X3, X3 + VMOVDQA X2, X13 + VMOVDQA X4, X14 + BYTE $0xc5 + BYTE $0x69 + BYTE $0x6c + BYTE $0xfa + VMOVDQA X5, X4 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x61 + BYTE $0x6d + BYTE $0xd7 + VMOVDQA X14, X5 + BYTE $0xc5 + BYTE $0x61 + BYTE $0x6c + BYTE $0xfb + VMOVDQA X6, X14 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x11 + BYTE $0x6d + BYTE $0xdf + BYTE $0xc5 + BYTE $0x41 + BYTE $0x6c + BYTE $0xff + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x49 + BYTE $0x6d + BYTE $0xf7 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x09 + BYTE $0x6c + BYTE $0xfe + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x41 + BYTE $0x6d + BYTE $0xff + VPADDQ 144(R10), X0, X0 + VPADDQ X2, X0, X0 + VPADDQ 160(R10), X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFD $-79, X6, X6 + VPSHUFD $-79, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPSHUFB X8, X2, X2 + VPSHUFB X8, X3, X3 + VPADDQ 176(R10), X0, X0 + VPADDQ X2, X0, X0 + VPADDQ 192(R10), X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFB X9, X6, X6 + VPSHUFB X9, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPADDQ X2, X2, X15 + VPSRLQ $0x3f, X2, X2 + VPXOR X15, X2, X2 + VPADDQ X3, X3, X15 + VPSRLQ $0x3f, X3, X3 + VPXOR X15, X3, X3 + VMOVDQA X6, X13 + VMOVDQA X2, X14 + VMOVDQA X4, X6 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x11 + BYTE $0x6c + BYTE $0xfd + VMOVDQA X5, X4 + VMOVDQA X6, X5 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x41 + BYTE $0x6d + BYTE $0xf7 + BYTE $0xc5 + BYTE $0x41 + BYTE $0x6c + BYTE $0xff + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x11 + BYTE $0x6d + BYTE $0xff + BYTE $0xc5 + BYTE $0x61 + BYTE $0x6c + BYTE $0xfb + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x69 + BYTE $0x6d + BYTE $0xd7 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x09 + BYTE $0x6c + BYTE $0xfe + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x61 + BYTE $0x6d + BYTE $0xdf + VPADDQ 208(R10), X0, X0 + VPADDQ X2, X0, X0 + VPADDQ 224(R10), X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFD $-79, X6, X6 + VPSHUFD $-79, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPSHUFB X8, X2, X2 + VPSHUFB X8, X3, X3 + VPADDQ 240(R10), X0, X0 + VPADDQ X2, X0, X0 + VPADDQ 256(R10), X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFB X9, X6, X6 + VPSHUFB X9, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPADDQ X2, X2, X15 + VPSRLQ $0x3f, X2, X2 + VPXOR X15, X2, X2 + VPADDQ X3, X3, X15 + VPSRLQ $0x3f, X3, X3 + VPXOR X15, X3, X3 + VMOVDQA X2, X13 + VMOVDQA X4, X14 + BYTE $0xc5 + BYTE $0x69 + BYTE $0x6c + BYTE $0xfa + VMOVDQA X5, X4 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x61 + BYTE $0x6d + BYTE $0xd7 + VMOVDQA X14, X5 + BYTE $0xc5 + BYTE $0x61 + BYTE $0x6c + BYTE $0xfb + VMOVDQA X6, X14 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x11 + BYTE $0x6d + BYTE $0xdf + BYTE $0xc5 + BYTE $0x41 + BYTE $0x6c + BYTE $0xff + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x49 + BYTE $0x6d + BYTE $0xf7 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x09 + BYTE $0x6c + BYTE $0xfe + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x41 + BYTE $0x6d + BYTE $0xff VMOVDQU 32(AX), X14 VMOVDQU 48(AX), X15 VPXOR X0, X10, X10 @@ -729,16 +4524,36 @@ noinc: VPXOR X7, X15, X3 VMOVDQU X2, 32(AX) VMOVDQU X3, 48(AX) + LEAQ 128(SI), SI + SUBQ $0x80, DI + JNE loop + VMOVDQU X10, (AX) + VMOVDQU X11, 16(AX) + MOVQ R8, (BX) + MOVQ R9, 8(BX) + VZEROUPPER + RET - LEAQ 128(SI), SI - SUBQ $128, DI - JNE loop +DATA ·AVX_c40<>+0(SB)/8, $0x0201000706050403 +DATA ·AVX_c40<>+8(SB)/8, $0x0a09080f0e0d0c0b +GLOBL ·AVX_c40<>(SB), RODATA|NOPTR, $16 - VMOVDQU X10, 0(AX) - VMOVDQU X11, 16(AX) +DATA ·AVX_c48<>+0(SB)/8, $0x0100070605040302 +DATA ·AVX_c48<>+8(SB)/8, $0x09080f0e0d0c0b0a +GLOBL ·AVX_c48<>(SB), RODATA|NOPTR, $16 - MOVQ R8, 0(BX) - MOVQ R9, 8(BX) - VZEROUPPER +DATA ·AVX_iv3<>+0(SB)/8, $0x1f83d9abfb41bd6b +DATA ·AVX_iv3<>+8(SB)/8, $0x5be0cd19137e2179 +GLOBL ·AVX_iv3<>(SB), RODATA|NOPTR, $16 - RET +DATA ·AVX_iv0<>+0(SB)/8, $0x6a09e667f3bcc908 +DATA ·AVX_iv0<>+8(SB)/8, $0xbb67ae8584caa73b +GLOBL ·AVX_iv0<>(SB), RODATA|NOPTR, $16 + +DATA ·AVX_iv1<>+0(SB)/8, $0x3c6ef372fe94f82b +DATA ·AVX_iv1<>+8(SB)/8, $0xa54ff53a5f1d36f1 +GLOBL ·AVX_iv1<>(SB), RODATA|NOPTR, $16 + +DATA ·AVX_iv2<>+0(SB)/8, $0x510e527fade682d1 +DATA ·AVX_iv2<>+8(SB)/8, $0x9b05688c2b3e6c1f +GLOBL ·AVX_iv2<>(SB), RODATA|NOPTR, $16 diff --git a/vendor/golang.org/x/crypto/blake2b/blake2b_amd64.s b/vendor/golang.org/x/crypto/blake2b/blake2b_amd64.s index adfac00c..9a0ce212 100644 --- a/vendor/golang.org/x/crypto/blake2b/blake2b_amd64.s +++ b/vendor/golang.org/x/crypto/blake2b/blake2b_amd64.s @@ -1,278 +1,1441 @@ -// Copyright 2016 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. +// Code generated by command: go run blake2b_amd64_asm.go -out ../../blake2b_amd64.s -pkg blake2b. DO NOT EDIT. //go:build amd64 && gc && !purego #include "textflag.h" -DATA ·iv0<>+0x00(SB)/8, $0x6a09e667f3bcc908 -DATA ·iv0<>+0x08(SB)/8, $0xbb67ae8584caa73b -GLOBL ·iv0<>(SB), (NOPTR+RODATA), $16 - -DATA ·iv1<>+0x00(SB)/8, $0x3c6ef372fe94f82b -DATA ·iv1<>+0x08(SB)/8, $0xa54ff53a5f1d36f1 -GLOBL ·iv1<>(SB), (NOPTR+RODATA), $16 - -DATA ·iv2<>+0x00(SB)/8, $0x510e527fade682d1 -DATA ·iv2<>+0x08(SB)/8, $0x9b05688c2b3e6c1f -GLOBL ·iv2<>(SB), (NOPTR+RODATA), $16 - -DATA ·iv3<>+0x00(SB)/8, $0x1f83d9abfb41bd6b -DATA ·iv3<>+0x08(SB)/8, $0x5be0cd19137e2179 -GLOBL ·iv3<>(SB), (NOPTR+RODATA), $16 - -DATA ·c40<>+0x00(SB)/8, $0x0201000706050403 -DATA ·c40<>+0x08(SB)/8, $0x0a09080f0e0d0c0b -GLOBL ·c40<>(SB), (NOPTR+RODATA), $16 - -DATA ·c48<>+0x00(SB)/8, $0x0100070605040302 -DATA ·c48<>+0x08(SB)/8, $0x09080f0e0d0c0b0a -GLOBL ·c48<>(SB), (NOPTR+RODATA), $16 - -#define SHUFFLE(v2, v3, v4, v5, v6, v7, t1, t2) \ - MOVO v4, t1; \ - MOVO v5, v4; \ - MOVO t1, v5; \ - MOVO v6, t1; \ - PUNPCKLQDQ v6, t2; \ - PUNPCKHQDQ v7, v6; \ - PUNPCKHQDQ t2, v6; \ - PUNPCKLQDQ v7, t2; \ - MOVO t1, v7; \ - MOVO v2, t1; \ - PUNPCKHQDQ t2, v7; \ - PUNPCKLQDQ v3, t2; \ - PUNPCKHQDQ t2, v2; \ - PUNPCKLQDQ t1, t2; \ - PUNPCKHQDQ t2, v3 - -#define SHUFFLE_INV(v2, v3, v4, v5, v6, v7, t1, t2) \ - MOVO v4, t1; \ - MOVO v5, v4; \ - MOVO t1, v5; \ - MOVO v2, t1; \ - PUNPCKLQDQ v2, t2; \ - PUNPCKHQDQ v3, v2; \ - PUNPCKHQDQ t2, v2; \ - PUNPCKLQDQ v3, t2; \ - MOVO t1, v3; \ - MOVO v6, t1; \ - PUNPCKHQDQ t2, v3; \ - PUNPCKLQDQ v7, t2; \ - PUNPCKHQDQ t2, v6; \ - PUNPCKLQDQ t1, t2; \ - PUNPCKHQDQ t2, v7 - -#define HALF_ROUND(v0, v1, v2, v3, v4, v5, v6, v7, m0, m1, m2, m3, t0, c40, c48) \ - PADDQ m0, v0; \ - PADDQ m1, v1; \ - PADDQ v2, v0; \ - PADDQ v3, v1; \ - PXOR v0, v6; \ - PXOR v1, v7; \ - PSHUFD $0xB1, v6, v6; \ - PSHUFD $0xB1, v7, v7; \ - PADDQ v6, v4; \ - PADDQ v7, v5; \ - PXOR v4, v2; \ - PXOR v5, v3; \ - PSHUFB c40, v2; \ - PSHUFB c40, v3; \ - PADDQ m2, v0; \ - PADDQ m3, v1; \ - PADDQ v2, v0; \ - PADDQ v3, v1; \ - PXOR v0, v6; \ - PXOR v1, v7; \ - PSHUFB c48, v6; \ - PSHUFB c48, v7; \ - PADDQ v6, v4; \ - PADDQ v7, v5; \ - PXOR v4, v2; \ - PXOR v5, v3; \ - MOVOU v2, t0; \ - PADDQ v2, t0; \ - PSRLQ $63, v2; \ - PXOR t0, v2; \ - MOVOU v3, t0; \ - PADDQ v3, t0; \ - PSRLQ $63, v3; \ - PXOR t0, v3 - -#define LOAD_MSG(m0, m1, m2, m3, src, i0, i1, i2, i3, i4, i5, i6, i7) \ - MOVQ i0*8(src), m0; \ - PINSRQ $1, i1*8(src), m0; \ - MOVQ i2*8(src), m1; \ - PINSRQ $1, i3*8(src), m1; \ - MOVQ i4*8(src), m2; \ - PINSRQ $1, i5*8(src), m2; \ - MOVQ i6*8(src), m3; \ - PINSRQ $1, i7*8(src), m3 - // func hashBlocksSSE4(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte) -TEXT ·hashBlocksSSE4(SB), 4, $288-48 // frame size = 272 + 16 byte alignment - MOVQ h+0(FP), AX - MOVQ c+8(FP), BX - MOVQ flag+16(FP), CX - MOVQ blocks_base+24(FP), SI - MOVQ blocks_len+32(FP), DI - - MOVQ SP, R10 - ADDQ $15, R10 - ANDQ $~15, R10 - - MOVOU ·iv3<>(SB), X0 - MOVO X0, 0(R10) - XORQ CX, 0(R10) // 0(R10) = ·iv3 ^ (CX || 0) - - MOVOU ·c40<>(SB), X13 - MOVOU ·c48<>(SB), X14 - - MOVOU 0(AX), X12 +// Requires: SSE2, SSE4.1, SSSE3 +TEXT ·hashBlocksSSE4(SB), NOSPLIT, $288-48 + MOVQ h+0(FP), AX + MOVQ c+8(FP), BX + MOVQ flag+16(FP), CX + MOVQ blocks_base+24(FP), SI + MOVQ blocks_len+32(FP), DI + MOVQ SP, R10 + ADDQ $0x0f, R10 + ANDQ $-16, R10 + MOVOU ·iv3<>+0(SB), X0 + MOVO X0, (R10) + XORQ CX, (R10) + MOVOU ·c40<>+0(SB), X13 + MOVOU ·c48<>+0(SB), X14 + MOVOU (AX), X12 MOVOU 16(AX), X15 - - MOVQ 0(BX), R8 - MOVQ 8(BX), R9 + MOVQ (BX), R8 + MOVQ 8(BX), R9 loop: - ADDQ $128, R8 - CMPQ R8, $128 + ADDQ $0x80, R8 + CMPQ R8, $0x80 JGE noinc INCQ R9 noinc: - MOVQ R8, X8 - PINSRQ $1, R9, X8 - - MOVO X12, X0 - MOVO X15, X1 - MOVOU 32(AX), X2 - MOVOU 48(AX), X3 - MOVOU ·iv0<>(SB), X4 - MOVOU ·iv1<>(SB), X5 - MOVOU ·iv2<>(SB), X6 - - PXOR X8, X6 - MOVO 0(R10), X7 - - LOAD_MSG(X8, X9, X10, X11, SI, 0, 2, 4, 6, 1, 3, 5, 7) - MOVO X8, 16(R10) - MOVO X9, 32(R10) - MOVO X10, 48(R10) - MOVO X11, 64(R10) - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) - LOAD_MSG(X8, X9, X10, X11, SI, 8, 10, 12, 14, 9, 11, 13, 15) - MOVO X8, 80(R10) - MOVO X9, 96(R10) - MOVO X10, 112(R10) - MOVO X11, 128(R10) - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) - - LOAD_MSG(X8, X9, X10, X11, SI, 14, 4, 9, 13, 10, 8, 15, 6) - MOVO X8, 144(R10) - MOVO X9, 160(R10) - MOVO X10, 176(R10) - MOVO X11, 192(R10) - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) - LOAD_MSG(X8, X9, X10, X11, SI, 1, 0, 11, 5, 12, 2, 7, 3) - MOVO X8, 208(R10) - MOVO X9, 224(R10) - MOVO X10, 240(R10) - MOVO X11, 256(R10) - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) - - LOAD_MSG(X8, X9, X10, X11, SI, 11, 12, 5, 15, 8, 0, 2, 13) - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) - LOAD_MSG(X8, X9, X10, X11, SI, 10, 3, 7, 9, 14, 6, 1, 4) - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) - - LOAD_MSG(X8, X9, X10, X11, SI, 7, 3, 13, 11, 9, 1, 12, 14) - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) - LOAD_MSG(X8, X9, X10, X11, SI, 2, 5, 4, 15, 6, 10, 0, 8) - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) - - LOAD_MSG(X8, X9, X10, X11, SI, 9, 5, 2, 10, 0, 7, 4, 15) - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) - LOAD_MSG(X8, X9, X10, X11, SI, 14, 11, 6, 3, 1, 12, 8, 13) - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) - - LOAD_MSG(X8, X9, X10, X11, SI, 2, 6, 0, 8, 12, 10, 11, 3) - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) - LOAD_MSG(X8, X9, X10, X11, SI, 4, 7, 15, 1, 13, 5, 14, 9) - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) - - LOAD_MSG(X8, X9, X10, X11, SI, 12, 1, 14, 4, 5, 15, 13, 10) - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) - LOAD_MSG(X8, X9, X10, X11, SI, 0, 6, 9, 8, 7, 3, 2, 11) - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) - - LOAD_MSG(X8, X9, X10, X11, SI, 13, 7, 12, 3, 11, 14, 1, 9) - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) - LOAD_MSG(X8, X9, X10, X11, SI, 5, 15, 8, 2, 0, 4, 6, 10) - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) - - LOAD_MSG(X8, X9, X10, X11, SI, 6, 14, 11, 0, 15, 9, 3, 8) - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) - LOAD_MSG(X8, X9, X10, X11, SI, 12, 13, 1, 10, 2, 7, 4, 5) - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) - - LOAD_MSG(X8, X9, X10, X11, SI, 10, 8, 7, 1, 2, 4, 6, 5) - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) - LOAD_MSG(X8, X9, X10, X11, SI, 15, 9, 3, 13, 11, 14, 12, 0) - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) - - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, 16(R10), 32(R10), 48(R10), 64(R10), X11, X13, X14) - SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, 80(R10), 96(R10), 112(R10), 128(R10), X11, X13, X14) - SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) + MOVQ R8, X8 + PINSRQ $0x01, R9, X8 + MOVO X12, X0 + MOVO X15, X1 + MOVOU 32(AX), X2 + MOVOU 48(AX), X3 + MOVOU ·iv0<>+0(SB), X4 + MOVOU ·iv1<>+0(SB), X5 + MOVOU ·iv2<>+0(SB), X6 + PXOR X8, X6 + MOVO (R10), X7 + MOVQ (SI), X8 + PINSRQ $0x01, 16(SI), X8 + MOVQ 32(SI), X9 + PINSRQ $0x01, 48(SI), X9 + MOVQ 8(SI), X10 + PINSRQ $0x01, 24(SI), X10 + MOVQ 40(SI), X11 + PINSRQ $0x01, 56(SI), X11 + MOVO X8, 16(R10) + MOVO X9, 32(R10) + MOVO X10, 48(R10) + MOVO X11, 64(R10) + PADDQ X8, X0 + PADDQ X9, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFD $0xb1, X6, X6 + PSHUFD $0xb1, X7, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + PSHUFB X13, X2 + PSHUFB X13, X3 + PADDQ X10, X0 + PADDQ X11, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFB X14, X6 + PSHUFB X14, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + MOVOU X2, X11 + PADDQ X2, X11 + PSRLQ $0x3f, X2 + PXOR X11, X2 + MOVOU X3, X11 + PADDQ X3, X11 + PSRLQ $0x3f, X3 + PXOR X11, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X6, X8 + PUNPCKLQDQ X6, X9 + PUNPCKHQDQ X7, X6 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X7, X9 + MOVO X8, X7 + MOVO X2, X8 + PUNPCKHQDQ X9, X7 + PUNPCKLQDQ X3, X9 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X3 + MOVQ 64(SI), X8 + PINSRQ $0x01, 80(SI), X8 + MOVQ 96(SI), X9 + PINSRQ $0x01, 112(SI), X9 + MOVQ 72(SI), X10 + PINSRQ $0x01, 88(SI), X10 + MOVQ 104(SI), X11 + PINSRQ $0x01, 120(SI), X11 + MOVO X8, 80(R10) + MOVO X9, 96(R10) + MOVO X10, 112(R10) + MOVO X11, 128(R10) + PADDQ X8, X0 + PADDQ X9, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFD $0xb1, X6, X6 + PSHUFD $0xb1, X7, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + PSHUFB X13, X2 + PSHUFB X13, X3 + PADDQ X10, X0 + PADDQ X11, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFB X14, X6 + PSHUFB X14, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + MOVOU X2, X11 + PADDQ X2, X11 + PSRLQ $0x3f, X2 + PXOR X11, X2 + MOVOU X3, X11 + PADDQ X3, X11 + PSRLQ $0x3f, X3 + PXOR X11, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X2, X8 + PUNPCKLQDQ X2, X9 + PUNPCKHQDQ X3, X2 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X3, X9 + MOVO X8, X3 + MOVO X6, X8 + PUNPCKHQDQ X9, X3 + PUNPCKLQDQ X7, X9 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X7 + MOVQ 112(SI), X8 + PINSRQ $0x01, 32(SI), X8 + MOVQ 72(SI), X9 + PINSRQ $0x01, 104(SI), X9 + MOVQ 80(SI), X10 + PINSRQ $0x01, 64(SI), X10 + MOVQ 120(SI), X11 + PINSRQ $0x01, 48(SI), X11 + MOVO X8, 144(R10) + MOVO X9, 160(R10) + MOVO X10, 176(R10) + MOVO X11, 192(R10) + PADDQ X8, X0 + PADDQ X9, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFD $0xb1, X6, X6 + PSHUFD $0xb1, X7, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + PSHUFB X13, X2 + PSHUFB X13, X3 + PADDQ X10, X0 + PADDQ X11, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFB X14, X6 + PSHUFB X14, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + MOVOU X2, X11 + PADDQ X2, X11 + PSRLQ $0x3f, X2 + PXOR X11, X2 + MOVOU X3, X11 + PADDQ X3, X11 + PSRLQ $0x3f, X3 + PXOR X11, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X6, X8 + PUNPCKLQDQ X6, X9 + PUNPCKHQDQ X7, X6 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X7, X9 + MOVO X8, X7 + MOVO X2, X8 + PUNPCKHQDQ X9, X7 + PUNPCKLQDQ X3, X9 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X3 + MOVQ 8(SI), X8 + PINSRQ $0x01, (SI), X8 + MOVQ 88(SI), X9 + PINSRQ $0x01, 40(SI), X9 + MOVQ 96(SI), X10 + PINSRQ $0x01, 16(SI), X10 + MOVQ 56(SI), X11 + PINSRQ $0x01, 24(SI), X11 + MOVO X8, 208(R10) + MOVO X9, 224(R10) + MOVO X10, 240(R10) + MOVO X11, 256(R10) + PADDQ X8, X0 + PADDQ X9, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFD $0xb1, X6, X6 + PSHUFD $0xb1, X7, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + PSHUFB X13, X2 + PSHUFB X13, X3 + PADDQ X10, X0 + PADDQ X11, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFB X14, X6 + PSHUFB X14, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + MOVOU X2, X11 + PADDQ X2, X11 + PSRLQ $0x3f, X2 + PXOR X11, X2 + MOVOU X3, X11 + PADDQ X3, X11 + PSRLQ $0x3f, X3 + PXOR X11, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X2, X8 + PUNPCKLQDQ X2, X9 + PUNPCKHQDQ X3, X2 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X3, X9 + MOVO X8, X3 + MOVO X6, X8 + PUNPCKHQDQ X9, X3 + PUNPCKLQDQ X7, X9 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X7 + MOVQ 88(SI), X8 + PINSRQ $0x01, 96(SI), X8 + MOVQ 40(SI), X9 + PINSRQ $0x01, 120(SI), X9 + MOVQ 64(SI), X10 + PINSRQ $0x01, (SI), X10 + MOVQ 16(SI), X11 + PINSRQ $0x01, 104(SI), X11 + PADDQ X8, X0 + PADDQ X9, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFD $0xb1, X6, X6 + PSHUFD $0xb1, X7, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + PSHUFB X13, X2 + PSHUFB X13, X3 + PADDQ X10, X0 + PADDQ X11, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFB X14, X6 + PSHUFB X14, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + MOVOU X2, X11 + PADDQ X2, X11 + PSRLQ $0x3f, X2 + PXOR X11, X2 + MOVOU X3, X11 + PADDQ X3, X11 + PSRLQ $0x3f, X3 + PXOR X11, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X6, X8 + PUNPCKLQDQ X6, X9 + PUNPCKHQDQ X7, X6 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X7, X9 + MOVO X8, X7 + MOVO X2, X8 + PUNPCKHQDQ X9, X7 + PUNPCKLQDQ X3, X9 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X3 + MOVQ 80(SI), X8 + PINSRQ $0x01, 24(SI), X8 + MOVQ 56(SI), X9 + PINSRQ $0x01, 72(SI), X9 + MOVQ 112(SI), X10 + PINSRQ $0x01, 48(SI), X10 + MOVQ 8(SI), X11 + PINSRQ $0x01, 32(SI), X11 + PADDQ X8, X0 + PADDQ X9, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFD $0xb1, X6, X6 + PSHUFD $0xb1, X7, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + PSHUFB X13, X2 + PSHUFB X13, X3 + PADDQ X10, X0 + PADDQ X11, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFB X14, X6 + PSHUFB X14, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + MOVOU X2, X11 + PADDQ X2, X11 + PSRLQ $0x3f, X2 + PXOR X11, X2 + MOVOU X3, X11 + PADDQ X3, X11 + PSRLQ $0x3f, X3 + PXOR X11, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X2, X8 + PUNPCKLQDQ X2, X9 + PUNPCKHQDQ X3, X2 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X3, X9 + MOVO X8, X3 + MOVO X6, X8 + PUNPCKHQDQ X9, X3 + PUNPCKLQDQ X7, X9 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X7 + MOVQ 56(SI), X8 + PINSRQ $0x01, 24(SI), X8 + MOVQ 104(SI), X9 + PINSRQ $0x01, 88(SI), X9 + MOVQ 72(SI), X10 + PINSRQ $0x01, 8(SI), X10 + MOVQ 96(SI), X11 + PINSRQ $0x01, 112(SI), X11 + PADDQ X8, X0 + PADDQ X9, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFD $0xb1, X6, X6 + PSHUFD $0xb1, X7, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + PSHUFB X13, X2 + PSHUFB X13, X3 + PADDQ X10, X0 + PADDQ X11, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFB X14, X6 + PSHUFB X14, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + MOVOU X2, X11 + PADDQ X2, X11 + PSRLQ $0x3f, X2 + PXOR X11, X2 + MOVOU X3, X11 + PADDQ X3, X11 + PSRLQ $0x3f, X3 + PXOR X11, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X6, X8 + PUNPCKLQDQ X6, X9 + PUNPCKHQDQ X7, X6 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X7, X9 + MOVO X8, X7 + MOVO X2, X8 + PUNPCKHQDQ X9, X7 + PUNPCKLQDQ X3, X9 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X3 + MOVQ 16(SI), X8 + PINSRQ $0x01, 40(SI), X8 + MOVQ 32(SI), X9 + PINSRQ $0x01, 120(SI), X9 + MOVQ 48(SI), X10 + PINSRQ $0x01, 80(SI), X10 + MOVQ (SI), X11 + PINSRQ $0x01, 64(SI), X11 + PADDQ X8, X0 + PADDQ X9, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFD $0xb1, X6, X6 + PSHUFD $0xb1, X7, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + PSHUFB X13, X2 + PSHUFB X13, X3 + PADDQ X10, X0 + PADDQ X11, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFB X14, X6 + PSHUFB X14, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + MOVOU X2, X11 + PADDQ X2, X11 + PSRLQ $0x3f, X2 + PXOR X11, X2 + MOVOU X3, X11 + PADDQ X3, X11 + PSRLQ $0x3f, X3 + PXOR X11, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X2, X8 + PUNPCKLQDQ X2, X9 + PUNPCKHQDQ X3, X2 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X3, X9 + MOVO X8, X3 + MOVO X6, X8 + PUNPCKHQDQ X9, X3 + PUNPCKLQDQ X7, X9 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X7 + MOVQ 72(SI), X8 + PINSRQ $0x01, 40(SI), X8 + MOVQ 16(SI), X9 + PINSRQ $0x01, 80(SI), X9 + MOVQ (SI), X10 + PINSRQ $0x01, 56(SI), X10 + MOVQ 32(SI), X11 + PINSRQ $0x01, 120(SI), X11 + PADDQ X8, X0 + PADDQ X9, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFD $0xb1, X6, X6 + PSHUFD $0xb1, X7, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + PSHUFB X13, X2 + PSHUFB X13, X3 + PADDQ X10, X0 + PADDQ X11, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFB X14, X6 + PSHUFB X14, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + MOVOU X2, X11 + PADDQ X2, X11 + PSRLQ $0x3f, X2 + PXOR X11, X2 + MOVOU X3, X11 + PADDQ X3, X11 + PSRLQ $0x3f, X3 + PXOR X11, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X6, X8 + PUNPCKLQDQ X6, X9 + PUNPCKHQDQ X7, X6 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X7, X9 + MOVO X8, X7 + MOVO X2, X8 + PUNPCKHQDQ X9, X7 + PUNPCKLQDQ X3, X9 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X3 + MOVQ 112(SI), X8 + PINSRQ $0x01, 88(SI), X8 + MOVQ 48(SI), X9 + PINSRQ $0x01, 24(SI), X9 + MOVQ 8(SI), X10 + PINSRQ $0x01, 96(SI), X10 + MOVQ 64(SI), X11 + PINSRQ $0x01, 104(SI), X11 + PADDQ X8, X0 + PADDQ X9, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFD $0xb1, X6, X6 + PSHUFD $0xb1, X7, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + PSHUFB X13, X2 + PSHUFB X13, X3 + PADDQ X10, X0 + PADDQ X11, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFB X14, X6 + PSHUFB X14, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + MOVOU X2, X11 + PADDQ X2, X11 + PSRLQ $0x3f, X2 + PXOR X11, X2 + MOVOU X3, X11 + PADDQ X3, X11 + PSRLQ $0x3f, X3 + PXOR X11, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X2, X8 + PUNPCKLQDQ X2, X9 + PUNPCKHQDQ X3, X2 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X3, X9 + MOVO X8, X3 + MOVO X6, X8 + PUNPCKHQDQ X9, X3 + PUNPCKLQDQ X7, X9 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X7 + MOVQ 16(SI), X8 + PINSRQ $0x01, 48(SI), X8 + MOVQ (SI), X9 + PINSRQ $0x01, 64(SI), X9 + MOVQ 96(SI), X10 + PINSRQ $0x01, 80(SI), X10 + MOVQ 88(SI), X11 + PINSRQ $0x01, 24(SI), X11 + PADDQ X8, X0 + PADDQ X9, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFD $0xb1, X6, X6 + PSHUFD $0xb1, X7, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + PSHUFB X13, X2 + PSHUFB X13, X3 + PADDQ X10, X0 + PADDQ X11, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFB X14, X6 + PSHUFB X14, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + MOVOU X2, X11 + PADDQ X2, X11 + PSRLQ $0x3f, X2 + PXOR X11, X2 + MOVOU X3, X11 + PADDQ X3, X11 + PSRLQ $0x3f, X3 + PXOR X11, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X6, X8 + PUNPCKLQDQ X6, X9 + PUNPCKHQDQ X7, X6 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X7, X9 + MOVO X8, X7 + MOVO X2, X8 + PUNPCKHQDQ X9, X7 + PUNPCKLQDQ X3, X9 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X3 + MOVQ 32(SI), X8 + PINSRQ $0x01, 56(SI), X8 + MOVQ 120(SI), X9 + PINSRQ $0x01, 8(SI), X9 + MOVQ 104(SI), X10 + PINSRQ $0x01, 40(SI), X10 + MOVQ 112(SI), X11 + PINSRQ $0x01, 72(SI), X11 + PADDQ X8, X0 + PADDQ X9, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFD $0xb1, X6, X6 + PSHUFD $0xb1, X7, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + PSHUFB X13, X2 + PSHUFB X13, X3 + PADDQ X10, X0 + PADDQ X11, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFB X14, X6 + PSHUFB X14, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + MOVOU X2, X11 + PADDQ X2, X11 + PSRLQ $0x3f, X2 + PXOR X11, X2 + MOVOU X3, X11 + PADDQ X3, X11 + PSRLQ $0x3f, X3 + PXOR X11, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X2, X8 + PUNPCKLQDQ X2, X9 + PUNPCKHQDQ X3, X2 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X3, X9 + MOVO X8, X3 + MOVO X6, X8 + PUNPCKHQDQ X9, X3 + PUNPCKLQDQ X7, X9 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X7 + MOVQ 96(SI), X8 + PINSRQ $0x01, 8(SI), X8 + MOVQ 112(SI), X9 + PINSRQ $0x01, 32(SI), X9 + MOVQ 40(SI), X10 + PINSRQ $0x01, 120(SI), X10 + MOVQ 104(SI), X11 + PINSRQ $0x01, 80(SI), X11 + PADDQ X8, X0 + PADDQ X9, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFD $0xb1, X6, X6 + PSHUFD $0xb1, X7, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + PSHUFB X13, X2 + PSHUFB X13, X3 + PADDQ X10, X0 + PADDQ X11, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFB X14, X6 + PSHUFB X14, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + MOVOU X2, X11 + PADDQ X2, X11 + PSRLQ $0x3f, X2 + PXOR X11, X2 + MOVOU X3, X11 + PADDQ X3, X11 + PSRLQ $0x3f, X3 + PXOR X11, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X6, X8 + PUNPCKLQDQ X6, X9 + PUNPCKHQDQ X7, X6 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X7, X9 + MOVO X8, X7 + MOVO X2, X8 + PUNPCKHQDQ X9, X7 + PUNPCKLQDQ X3, X9 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X3 + MOVQ (SI), X8 + PINSRQ $0x01, 48(SI), X8 + MOVQ 72(SI), X9 + PINSRQ $0x01, 64(SI), X9 + MOVQ 56(SI), X10 + PINSRQ $0x01, 24(SI), X10 + MOVQ 16(SI), X11 + PINSRQ $0x01, 88(SI), X11 + PADDQ X8, X0 + PADDQ X9, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFD $0xb1, X6, X6 + PSHUFD $0xb1, X7, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + PSHUFB X13, X2 + PSHUFB X13, X3 + PADDQ X10, X0 + PADDQ X11, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFB X14, X6 + PSHUFB X14, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + MOVOU X2, X11 + PADDQ X2, X11 + PSRLQ $0x3f, X2 + PXOR X11, X2 + MOVOU X3, X11 + PADDQ X3, X11 + PSRLQ $0x3f, X3 + PXOR X11, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X2, X8 + PUNPCKLQDQ X2, X9 + PUNPCKHQDQ X3, X2 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X3, X9 + MOVO X8, X3 + MOVO X6, X8 + PUNPCKHQDQ X9, X3 + PUNPCKLQDQ X7, X9 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X7 + MOVQ 104(SI), X8 + PINSRQ $0x01, 56(SI), X8 + MOVQ 96(SI), X9 + PINSRQ $0x01, 24(SI), X9 + MOVQ 88(SI), X10 + PINSRQ $0x01, 112(SI), X10 + MOVQ 8(SI), X11 + PINSRQ $0x01, 72(SI), X11 + PADDQ X8, X0 + PADDQ X9, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFD $0xb1, X6, X6 + PSHUFD $0xb1, X7, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + PSHUFB X13, X2 + PSHUFB X13, X3 + PADDQ X10, X0 + PADDQ X11, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFB X14, X6 + PSHUFB X14, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + MOVOU X2, X11 + PADDQ X2, X11 + PSRLQ $0x3f, X2 + PXOR X11, X2 + MOVOU X3, X11 + PADDQ X3, X11 + PSRLQ $0x3f, X3 + PXOR X11, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X6, X8 + PUNPCKLQDQ X6, X9 + PUNPCKHQDQ X7, X6 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X7, X9 + MOVO X8, X7 + MOVO X2, X8 + PUNPCKHQDQ X9, X7 + PUNPCKLQDQ X3, X9 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X3 + MOVQ 40(SI), X8 + PINSRQ $0x01, 120(SI), X8 + MOVQ 64(SI), X9 + PINSRQ $0x01, 16(SI), X9 + MOVQ (SI), X10 + PINSRQ $0x01, 32(SI), X10 + MOVQ 48(SI), X11 + PINSRQ $0x01, 80(SI), X11 + PADDQ X8, X0 + PADDQ X9, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFD $0xb1, X6, X6 + PSHUFD $0xb1, X7, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + PSHUFB X13, X2 + PSHUFB X13, X3 + PADDQ X10, X0 + PADDQ X11, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFB X14, X6 + PSHUFB X14, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + MOVOU X2, X11 + PADDQ X2, X11 + PSRLQ $0x3f, X2 + PXOR X11, X2 + MOVOU X3, X11 + PADDQ X3, X11 + PSRLQ $0x3f, X3 + PXOR X11, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X2, X8 + PUNPCKLQDQ X2, X9 + PUNPCKHQDQ X3, X2 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X3, X9 + MOVO X8, X3 + MOVO X6, X8 + PUNPCKHQDQ X9, X3 + PUNPCKLQDQ X7, X9 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X7 + MOVQ 48(SI), X8 + PINSRQ $0x01, 112(SI), X8 + MOVQ 88(SI), X9 + PINSRQ $0x01, (SI), X9 + MOVQ 120(SI), X10 + PINSRQ $0x01, 72(SI), X10 + MOVQ 24(SI), X11 + PINSRQ $0x01, 64(SI), X11 + PADDQ X8, X0 + PADDQ X9, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFD $0xb1, X6, X6 + PSHUFD $0xb1, X7, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + PSHUFB X13, X2 + PSHUFB X13, X3 + PADDQ X10, X0 + PADDQ X11, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFB X14, X6 + PSHUFB X14, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + MOVOU X2, X11 + PADDQ X2, X11 + PSRLQ $0x3f, X2 + PXOR X11, X2 + MOVOU X3, X11 + PADDQ X3, X11 + PSRLQ $0x3f, X3 + PXOR X11, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X6, X8 + PUNPCKLQDQ X6, X9 + PUNPCKHQDQ X7, X6 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X7, X9 + MOVO X8, X7 + MOVO X2, X8 + PUNPCKHQDQ X9, X7 + PUNPCKLQDQ X3, X9 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X3 + MOVQ 96(SI), X8 + PINSRQ $0x01, 104(SI), X8 + MOVQ 8(SI), X9 + PINSRQ $0x01, 80(SI), X9 + MOVQ 16(SI), X10 + PINSRQ $0x01, 56(SI), X10 + MOVQ 32(SI), X11 + PINSRQ $0x01, 40(SI), X11 + PADDQ X8, X0 + PADDQ X9, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFD $0xb1, X6, X6 + PSHUFD $0xb1, X7, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + PSHUFB X13, X2 + PSHUFB X13, X3 + PADDQ X10, X0 + PADDQ X11, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFB X14, X6 + PSHUFB X14, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + MOVOU X2, X11 + PADDQ X2, X11 + PSRLQ $0x3f, X2 + PXOR X11, X2 + MOVOU X3, X11 + PADDQ X3, X11 + PSRLQ $0x3f, X3 + PXOR X11, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X2, X8 + PUNPCKLQDQ X2, X9 + PUNPCKHQDQ X3, X2 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X3, X9 + MOVO X8, X3 + MOVO X6, X8 + PUNPCKHQDQ X9, X3 + PUNPCKLQDQ X7, X9 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X7 + MOVQ 80(SI), X8 + PINSRQ $0x01, 64(SI), X8 + MOVQ 56(SI), X9 + PINSRQ $0x01, 8(SI), X9 + MOVQ 16(SI), X10 + PINSRQ $0x01, 32(SI), X10 + MOVQ 48(SI), X11 + PINSRQ $0x01, 40(SI), X11 + PADDQ X8, X0 + PADDQ X9, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFD $0xb1, X6, X6 + PSHUFD $0xb1, X7, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + PSHUFB X13, X2 + PSHUFB X13, X3 + PADDQ X10, X0 + PADDQ X11, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFB X14, X6 + PSHUFB X14, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + MOVOU X2, X11 + PADDQ X2, X11 + PSRLQ $0x3f, X2 + PXOR X11, X2 + MOVOU X3, X11 + PADDQ X3, X11 + PSRLQ $0x3f, X3 + PXOR X11, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X6, X8 + PUNPCKLQDQ X6, X9 + PUNPCKHQDQ X7, X6 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X7, X9 + MOVO X8, X7 + MOVO X2, X8 + PUNPCKHQDQ X9, X7 + PUNPCKLQDQ X3, X9 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X3 + MOVQ 120(SI), X8 + PINSRQ $0x01, 72(SI), X8 + MOVQ 24(SI), X9 + PINSRQ $0x01, 104(SI), X9 + MOVQ 88(SI), X10 + PINSRQ $0x01, 112(SI), X10 + MOVQ 96(SI), X11 + PINSRQ $0x01, (SI), X11 + PADDQ X8, X0 + PADDQ X9, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFD $0xb1, X6, X6 + PSHUFD $0xb1, X7, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + PSHUFB X13, X2 + PSHUFB X13, X3 + PADDQ X10, X0 + PADDQ X11, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFB X14, X6 + PSHUFB X14, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + MOVOU X2, X11 + PADDQ X2, X11 + PSRLQ $0x3f, X2 + PXOR X11, X2 + MOVOU X3, X11 + PADDQ X3, X11 + PSRLQ $0x3f, X3 + PXOR X11, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X2, X8 + PUNPCKLQDQ X2, X9 + PUNPCKHQDQ X3, X2 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X3, X9 + MOVO X8, X3 + MOVO X6, X8 + PUNPCKHQDQ X9, X3 + PUNPCKLQDQ X7, X9 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X7 + PADDQ 16(R10), X0 + PADDQ 32(R10), X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFD $0xb1, X6, X6 + PSHUFD $0xb1, X7, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + PSHUFB X13, X2 + PSHUFB X13, X3 + PADDQ 48(R10), X0 + PADDQ 64(R10), X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFB X14, X6 + PSHUFB X14, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + MOVOU X2, X11 + PADDQ X2, X11 + PSRLQ $0x3f, X2 + PXOR X11, X2 + MOVOU X3, X11 + PADDQ X3, X11 + PSRLQ $0x3f, X3 + PXOR X11, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X6, X8 + PUNPCKLQDQ X6, X9 + PUNPCKHQDQ X7, X6 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X7, X9 + MOVO X8, X7 + MOVO X2, X8 + PUNPCKHQDQ X9, X7 + PUNPCKLQDQ X3, X9 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X3 + PADDQ 80(R10), X0 + PADDQ 96(R10), X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFD $0xb1, X6, X6 + PSHUFD $0xb1, X7, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + PSHUFB X13, X2 + PSHUFB X13, X3 + PADDQ 112(R10), X0 + PADDQ 128(R10), X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFB X14, X6 + PSHUFB X14, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + MOVOU X2, X11 + PADDQ X2, X11 + PSRLQ $0x3f, X2 + PXOR X11, X2 + MOVOU X3, X11 + PADDQ X3, X11 + PSRLQ $0x3f, X3 + PXOR X11, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X2, X8 + PUNPCKLQDQ X2, X9 + PUNPCKHQDQ X3, X2 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X3, X9 + MOVO X8, X3 + MOVO X6, X8 + PUNPCKHQDQ X9, X3 + PUNPCKLQDQ X7, X9 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X7 + PADDQ 144(R10), X0 + PADDQ 160(R10), X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFD $0xb1, X6, X6 + PSHUFD $0xb1, X7, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + PSHUFB X13, X2 + PSHUFB X13, X3 + PADDQ 176(R10), X0 + PADDQ 192(R10), X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFB X14, X6 + PSHUFB X14, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + MOVOU X2, X11 + PADDQ X2, X11 + PSRLQ $0x3f, X2 + PXOR X11, X2 + MOVOU X3, X11 + PADDQ X3, X11 + PSRLQ $0x3f, X3 + PXOR X11, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X6, X8 + PUNPCKLQDQ X6, X9 + PUNPCKHQDQ X7, X6 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X7, X9 + MOVO X8, X7 + MOVO X2, X8 + PUNPCKHQDQ X9, X7 + PUNPCKLQDQ X3, X9 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X3 + PADDQ 208(R10), X0 + PADDQ 224(R10), X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFD $0xb1, X6, X6 + PSHUFD $0xb1, X7, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + PSHUFB X13, X2 + PSHUFB X13, X3 + PADDQ 240(R10), X0 + PADDQ 256(R10), X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFB X14, X6 + PSHUFB X14, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + MOVOU X2, X11 + PADDQ X2, X11 + PSRLQ $0x3f, X2 + PXOR X11, X2 + MOVOU X3, X11 + PADDQ X3, X11 + PSRLQ $0x3f, X3 + PXOR X11, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X2, X8 + PUNPCKLQDQ X2, X9 + PUNPCKHQDQ X3, X2 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X3, X9 + MOVO X8, X3 + MOVO X6, X8 + PUNPCKHQDQ X9, X3 + PUNPCKLQDQ X7, X9 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X7 + MOVOU 32(AX), X10 + MOVOU 48(AX), X11 + PXOR X0, X12 + PXOR X1, X15 + PXOR X2, X10 + PXOR X3, X11 + PXOR X4, X12 + PXOR X5, X15 + PXOR X6, X10 + PXOR X7, X11 + MOVOU X10, 32(AX) + MOVOU X11, 48(AX) + LEAQ 128(SI), SI + SUBQ $0x80, DI + JNE loop + MOVOU X12, (AX) + MOVOU X15, 16(AX) + MOVQ R8, (BX) + MOVQ R9, 8(BX) + RET - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, 144(R10), 160(R10), 176(R10), 192(R10), X11, X13, X14) - SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, 208(R10), 224(R10), 240(R10), 256(R10), X11, X13, X14) - SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) +DATA ·iv3<>+0(SB)/8, $0x1f83d9abfb41bd6b +DATA ·iv3<>+8(SB)/8, $0x5be0cd19137e2179 +GLOBL ·iv3<>(SB), RODATA|NOPTR, $16 - MOVOU 32(AX), X10 - MOVOU 48(AX), X11 - PXOR X0, X12 - PXOR X1, X15 - PXOR X2, X10 - PXOR X3, X11 - PXOR X4, X12 - PXOR X5, X15 - PXOR X6, X10 - PXOR X7, X11 - MOVOU X10, 32(AX) - MOVOU X11, 48(AX) +DATA ·c40<>+0(SB)/8, $0x0201000706050403 +DATA ·c40<>+8(SB)/8, $0x0a09080f0e0d0c0b +GLOBL ·c40<>(SB), RODATA|NOPTR, $16 - LEAQ 128(SI), SI - SUBQ $128, DI - JNE loop +DATA ·c48<>+0(SB)/8, $0x0100070605040302 +DATA ·c48<>+8(SB)/8, $0x09080f0e0d0c0b0a +GLOBL ·c48<>(SB), RODATA|NOPTR, $16 - MOVOU X12, 0(AX) - MOVOU X15, 16(AX) +DATA ·iv0<>+0(SB)/8, $0x6a09e667f3bcc908 +DATA ·iv0<>+8(SB)/8, $0xbb67ae8584caa73b +GLOBL ·iv0<>(SB), RODATA|NOPTR, $16 - MOVQ R8, 0(BX) - MOVQ R9, 8(BX) +DATA ·iv1<>+0(SB)/8, $0x3c6ef372fe94f82b +DATA ·iv1<>+8(SB)/8, $0xa54ff53a5f1d36f1 +GLOBL ·iv1<>(SB), RODATA|NOPTR, $16 - RET +DATA ·iv2<>+0(SB)/8, $0x510e527fade682d1 +DATA ·iv2<>+8(SB)/8, $0x9b05688c2b3e6c1f +GLOBL ·iv2<>(SB), RODATA|NOPTR, $16 diff --git a/vendor/golang.org/x/crypto/internal/poly1305/sum_amd64.s b/vendor/golang.org/x/crypto/internal/poly1305/sum_amd64.s index e0d3c647..13375738 100644 --- a/vendor/golang.org/x/crypto/internal/poly1305/sum_amd64.s +++ b/vendor/golang.org/x/crypto/internal/poly1305/sum_amd64.s @@ -1,108 +1,93 @@ -// Copyright 2012 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. +// Code generated by command: go run sum_amd64_asm.go -out ../sum_amd64.s -pkg poly1305. DO NOT EDIT. //go:build gc && !purego -#include "textflag.h" - -#define POLY1305_ADD(msg, h0, h1, h2) \ - ADDQ 0(msg), h0; \ - ADCQ 8(msg), h1; \ - ADCQ $1, h2; \ - LEAQ 16(msg), msg - -#define POLY1305_MUL(h0, h1, h2, r0, r1, t0, t1, t2, t3) \ - MOVQ r0, AX; \ - MULQ h0; \ - MOVQ AX, t0; \ - MOVQ DX, t1; \ - MOVQ r0, AX; \ - MULQ h1; \ - ADDQ AX, t1; \ - ADCQ $0, DX; \ - MOVQ r0, t2; \ - IMULQ h2, t2; \ - ADDQ DX, t2; \ - \ - MOVQ r1, AX; \ - MULQ h0; \ - ADDQ AX, t1; \ - ADCQ $0, DX; \ - MOVQ DX, h0; \ - MOVQ r1, t3; \ - IMULQ h2, t3; \ - MOVQ r1, AX; \ - MULQ h1; \ - ADDQ AX, t2; \ - ADCQ DX, t3; \ - ADDQ h0, t2; \ - ADCQ $0, t3; \ - \ - MOVQ t0, h0; \ - MOVQ t1, h1; \ - MOVQ t2, h2; \ - ANDQ $3, h2; \ - MOVQ t2, t0; \ - ANDQ $0xFFFFFFFFFFFFFFFC, t0; \ - ADDQ t0, h0; \ - ADCQ t3, h1; \ - ADCQ $0, h2; \ - SHRQ $2, t3, t2; \ - SHRQ $2, t3; \ - ADDQ t2, h0; \ - ADCQ t3, h1; \ - ADCQ $0, h2 - -// func update(state *[7]uint64, msg []byte) +// func update(state *macState, msg []byte) TEXT ·update(SB), $0-32 MOVQ state+0(FP), DI MOVQ msg_base+8(FP), SI MOVQ msg_len+16(FP), R15 - - MOVQ 0(DI), R8 // h0 - MOVQ 8(DI), R9 // h1 - MOVQ 16(DI), R10 // h2 - MOVQ 24(DI), R11 // r0 - MOVQ 32(DI), R12 // r1 - - CMPQ R15, $16 + MOVQ (DI), R8 + MOVQ 8(DI), R9 + MOVQ 16(DI), R10 + MOVQ 24(DI), R11 + MOVQ 32(DI), R12 + CMPQ R15, $0x10 JB bytes_between_0_and_15 loop: - POLY1305_ADD(SI, R8, R9, R10) + ADDQ (SI), R8 + ADCQ 8(SI), R9 + ADCQ $0x01, R10 + LEAQ 16(SI), SI multiply: - POLY1305_MUL(R8, R9, R10, R11, R12, BX, CX, R13, R14) - SUBQ $16, R15 - CMPQ R15, $16 - JAE loop + MOVQ R11, AX + MULQ R8 + MOVQ AX, BX + MOVQ DX, CX + MOVQ R11, AX + MULQ R9 + ADDQ AX, CX + ADCQ $0x00, DX + MOVQ R11, R13 + IMULQ R10, R13 + ADDQ DX, R13 + MOVQ R12, AX + MULQ R8 + ADDQ AX, CX + ADCQ $0x00, DX + MOVQ DX, R8 + MOVQ R12, R14 + IMULQ R10, R14 + MOVQ R12, AX + MULQ R9 + ADDQ AX, R13 + ADCQ DX, R14 + ADDQ R8, R13 + ADCQ $0x00, R14 + MOVQ BX, R8 + MOVQ CX, R9 + MOVQ R13, R10 + ANDQ $0x03, R10 + MOVQ R13, BX + ANDQ $-4, BX + ADDQ BX, R8 + ADCQ R14, R9 + ADCQ $0x00, R10 + SHRQ $0x02, R14, R13 + SHRQ $0x02, R14 + ADDQ R13, R8 + ADCQ R14, R9 + ADCQ $0x00, R10 + SUBQ $0x10, R15 + CMPQ R15, $0x10 + JAE loop bytes_between_0_and_15: TESTQ R15, R15 JZ done - MOVQ $1, BX + MOVQ $0x00000001, BX XORQ CX, CX XORQ R13, R13 ADDQ R15, SI flush_buffer: - SHLQ $8, BX, CX - SHLQ $8, BX + SHLQ $0x08, BX, CX + SHLQ $0x08, BX MOVB -1(SI), R13 XORQ R13, BX DECQ SI DECQ R15 JNZ flush_buffer - ADDQ BX, R8 ADCQ CX, R9 - ADCQ $0, R10 - MOVQ $16, R15 + ADCQ $0x00, R10 + MOVQ $0x00000010, R15 JMP multiply done: - MOVQ R8, 0(DI) + MOVQ R8, (DI) MOVQ R9, 8(DI) MOVQ R10, 16(DI) RET diff --git a/vendor/golang.org/x/crypto/ssh/agent/keyring.go b/vendor/golang.org/x/crypto/ssh/agent/keyring.go index 21bfa870..c1b43610 100644 --- a/vendor/golang.org/x/crypto/ssh/agent/keyring.go +++ b/vendor/golang.org/x/crypto/ssh/agent/keyring.go @@ -175,6 +175,15 @@ func (r *keyring) Add(key AddedKey) error { p.expire = &t } + // If we already have a Signer with the same public key, replace it with the + // new one. + for idx, k := range r.keys { + if bytes.Equal(k.signer.PublicKey().Marshal(), p.signer.PublicKey().Marshal()) { + r.keys[idx] = p + return nil + } + } + r.keys = append(r.keys, p) return nil diff --git a/vendor/golang.org/x/sys/cpu/cpu.go b/vendor/golang.org/x/sys/cpu/cpu.go index ec07aab0..02609d5b 100644 --- a/vendor/golang.org/x/sys/cpu/cpu.go +++ b/vendor/golang.org/x/sys/cpu/cpu.go @@ -201,6 +201,25 @@ var S390X struct { _ CacheLinePad } +// RISCV64 contains the supported CPU features and performance characteristics for riscv64 +// platforms. The booleans in RISCV64, with the exception of HasFastMisaligned, indicate +// the presence of RISC-V extensions. +// +// It is safe to assume that all the RV64G extensions are supported and so they are omitted from +// this structure. As riscv64 Go programs require at least RV64G, the code that populates +// this structure cannot run successfully if some of the RV64G extensions are missing. +// The struct is padded to avoid false sharing. +var RISCV64 struct { + _ CacheLinePad + HasFastMisaligned bool // Fast misaligned accesses + HasC bool // Compressed instruction-set extension + HasV bool // Vector extension compatible with RVV 1.0 + HasZba bool // Address generation instructions extension + HasZbb bool // Basic bit-manipulation extension + HasZbs bool // Single-bit instructions extension + _ CacheLinePad +} + func init() { archInit() initOptions() diff --git a/vendor/golang.org/x/sys/cpu/cpu_linux_noinit.go b/vendor/golang.org/x/sys/cpu/cpu_linux_noinit.go index cd63e733..7d902b68 100644 --- a/vendor/golang.org/x/sys/cpu/cpu_linux_noinit.go +++ b/vendor/golang.org/x/sys/cpu/cpu_linux_noinit.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -//go:build linux && !arm && !arm64 && !mips64 && !mips64le && !ppc64 && !ppc64le && !s390x +//go:build linux && !arm && !arm64 && !mips64 && !mips64le && !ppc64 && !ppc64le && !s390x && !riscv64 package cpu diff --git a/vendor/golang.org/x/sys/cpu/cpu_linux_riscv64.go b/vendor/golang.org/x/sys/cpu/cpu_linux_riscv64.go new file mode 100644 index 00000000..cb4a0c57 --- /dev/null +++ b/vendor/golang.org/x/sys/cpu/cpu_linux_riscv64.go @@ -0,0 +1,137 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package cpu + +import ( + "syscall" + "unsafe" +) + +// RISC-V extension discovery code for Linux. The approach here is to first try the riscv_hwprobe +// syscall falling back to HWCAP to check for the C extension if riscv_hwprobe is not available. +// +// A note on detection of the Vector extension using HWCAP. +// +// Support for the Vector extension version 1.0 was added to the Linux kernel in release 6.5. +// Support for the riscv_hwprobe syscall was added in 6.4. It follows that if the riscv_hwprobe +// syscall is not available then neither is the Vector extension (which needs kernel support). +// The riscv_hwprobe syscall should then be all we need to detect the Vector extension. +// However, some RISC-V board manufacturers ship boards with an older kernel on top of which +// they have back-ported various versions of the Vector extension patches but not the riscv_hwprobe +// patches. These kernels advertise support for the Vector extension using HWCAP. Falling +// back to HWCAP to detect the Vector extension, if riscv_hwprobe is not available, or simply not +// bothering with riscv_hwprobe at all and just using HWCAP may then seem like an attractive option. +// +// Unfortunately, simply checking the 'V' bit in AT_HWCAP will not work as this bit is used by +// RISC-V board and cloud instance providers to mean different things. The Lichee Pi 4A board +// and the Scaleway RV1 cloud instances use the 'V' bit to advertise their support for the unratified +// 0.7.1 version of the Vector Specification. The Banana Pi BPI-F3 and the CanMV-K230 board use +// it to advertise support for 1.0 of the Vector extension. Versions 0.7.1 and 1.0 of the Vector +// extension are binary incompatible. HWCAP can then not be used in isolation to populate the +// HasV field as this field indicates that the underlying CPU is compatible with RVV 1.0. +// +// There is a way at runtime to distinguish between versions 0.7.1 and 1.0 of the Vector +// specification by issuing a RVV 1.0 vsetvli instruction and checking the vill bit of the vtype +// register. This check would allow us to safely detect version 1.0 of the Vector extension +// with HWCAP, if riscv_hwprobe were not available. However, the check cannot +// be added until the assembler supports the Vector instructions. +// +// Note the riscv_hwprobe syscall does not suffer from these ambiguities by design as all of the +// extensions it advertises support for are explicitly versioned. It's also worth noting that +// the riscv_hwprobe syscall is the only way to detect multi-letter RISC-V extensions, e.g., Zba. +// These cannot be detected using HWCAP and so riscv_hwprobe must be used to detect the majority +// of RISC-V extensions. +// +// Please see https://docs.kernel.org/arch/riscv/hwprobe.html for more information. + +// golang.org/x/sys/cpu is not allowed to depend on golang.org/x/sys/unix so we must +// reproduce the constants, types and functions needed to make the riscv_hwprobe syscall +// here. + +const ( + // Copied from golang.org/x/sys/unix/ztypes_linux_riscv64.go. + riscv_HWPROBE_KEY_IMA_EXT_0 = 0x4 + riscv_HWPROBE_IMA_C = 0x2 + riscv_HWPROBE_IMA_V = 0x4 + riscv_HWPROBE_EXT_ZBA = 0x8 + riscv_HWPROBE_EXT_ZBB = 0x10 + riscv_HWPROBE_EXT_ZBS = 0x20 + riscv_HWPROBE_KEY_CPUPERF_0 = 0x5 + riscv_HWPROBE_MISALIGNED_FAST = 0x3 + riscv_HWPROBE_MISALIGNED_MASK = 0x7 +) + +const ( + // sys_RISCV_HWPROBE is copied from golang.org/x/sys/unix/zsysnum_linux_riscv64.go. + sys_RISCV_HWPROBE = 258 +) + +// riscvHWProbePairs is copied from golang.org/x/sys/unix/ztypes_linux_riscv64.go. +type riscvHWProbePairs struct { + key int64 + value uint64 +} + +const ( + // CPU features + hwcap_RISCV_ISA_C = 1 << ('C' - 'A') +) + +func doinit() { + // A slice of key/value pair structures is passed to the RISCVHWProbe syscall. The key + // field should be initialised with one of the key constants defined above, e.g., + // RISCV_HWPROBE_KEY_IMA_EXT_0. The syscall will set the value field to the appropriate value. + // If the kernel does not recognise a key it will set the key field to -1 and the value field to 0. + + pairs := []riscvHWProbePairs{ + {riscv_HWPROBE_KEY_IMA_EXT_0, 0}, + {riscv_HWPROBE_KEY_CPUPERF_0, 0}, + } + + // This call only indicates that extensions are supported if they are implemented on all cores. + if riscvHWProbe(pairs, 0) { + if pairs[0].key != -1 { + v := uint(pairs[0].value) + RISCV64.HasC = isSet(v, riscv_HWPROBE_IMA_C) + RISCV64.HasV = isSet(v, riscv_HWPROBE_IMA_V) + RISCV64.HasZba = isSet(v, riscv_HWPROBE_EXT_ZBA) + RISCV64.HasZbb = isSet(v, riscv_HWPROBE_EXT_ZBB) + RISCV64.HasZbs = isSet(v, riscv_HWPROBE_EXT_ZBS) + } + if pairs[1].key != -1 { + v := pairs[1].value & riscv_HWPROBE_MISALIGNED_MASK + RISCV64.HasFastMisaligned = v == riscv_HWPROBE_MISALIGNED_FAST + } + } + + // Let's double check with HWCAP if the C extension does not appear to be supported. + // This may happen if we're running on a kernel older than 6.4. + + if !RISCV64.HasC { + RISCV64.HasC = isSet(hwCap, hwcap_RISCV_ISA_C) + } +} + +func isSet(hwc uint, value uint) bool { + return hwc&value != 0 +} + +// riscvHWProbe is a simplified version of the generated wrapper function found in +// golang.org/x/sys/unix/zsyscall_linux_riscv64.go. We simplify it by removing the +// cpuCount and cpus parameters which we do not need. We always want to pass 0 for +// these parameters here so the kernel only reports the extensions that are present +// on all cores. +func riscvHWProbe(pairs []riscvHWProbePairs, flags uint) bool { + var _zero uintptr + var p0 unsafe.Pointer + if len(pairs) > 0 { + p0 = unsafe.Pointer(&pairs[0]) + } else { + p0 = unsafe.Pointer(&_zero) + } + + _, _, e1 := syscall.Syscall6(sys_RISCV_HWPROBE, uintptr(p0), uintptr(len(pairs)), uintptr(0), uintptr(0), uintptr(flags), 0) + return e1 == 0 +} diff --git a/vendor/golang.org/x/sys/cpu/cpu_riscv64.go b/vendor/golang.org/x/sys/cpu/cpu_riscv64.go index 7f0c79c0..aca3199c 100644 --- a/vendor/golang.org/x/sys/cpu/cpu_riscv64.go +++ b/vendor/golang.org/x/sys/cpu/cpu_riscv64.go @@ -8,4 +8,13 @@ package cpu const cacheLineSize = 64 -func initOptions() {} +func initOptions() { + options = []option{ + {Name: "fastmisaligned", Feature: &RISCV64.HasFastMisaligned}, + {Name: "c", Feature: &RISCV64.HasC}, + {Name: "v", Feature: &RISCV64.HasV}, + {Name: "zba", Feature: &RISCV64.HasZba}, + {Name: "zbb", Feature: &RISCV64.HasZbb}, + {Name: "zbs", Feature: &RISCV64.HasZbs}, + } +} diff --git a/vendor/golang.org/x/sys/unix/mkerrors.sh b/vendor/golang.org/x/sys/unix/mkerrors.sh index d07dd09e..e14b766a 100644 --- a/vendor/golang.org/x/sys/unix/mkerrors.sh +++ b/vendor/golang.org/x/sys/unix/mkerrors.sh @@ -552,6 +552,7 @@ ccflags="$@" $2 !~ /^RTC_VL_(ACCURACY|BACKUP|DATA)/ && $2 ~ /^(NETLINK|NLM|NLMSG|NLA|IFA|IFAN|RT|RTC|RTCF|RTN|RTPROT|RTNH|ARPHRD|ETH_P|NETNSA)_/ || $2 ~ /^SOCK_|SK_DIAG_|SKNLGRP_$/ || + $2 ~ /^(CONNECT|SAE)_/ || $2 ~ /^FIORDCHK$/ || $2 ~ /^SIOC/ || $2 ~ /^TIOC/ || diff --git a/vendor/golang.org/x/sys/unix/syscall_darwin.go b/vendor/golang.org/x/sys/unix/syscall_darwin.go index 2d15200a..099867de 100644 --- a/vendor/golang.org/x/sys/unix/syscall_darwin.go +++ b/vendor/golang.org/x/sys/unix/syscall_darwin.go @@ -566,6 +566,43 @@ func PthreadFchdir(fd int) (err error) { return pthread_fchdir_np(fd) } +// Connectx calls connectx(2) to initiate a connection on a socket. +// +// srcIf, srcAddr, and dstAddr are filled into a [SaEndpoints] struct and passed as the endpoints argument. +// +// - srcIf is the optional source interface index. 0 means unspecified. +// - srcAddr is the optional source address. nil means unspecified. +// - dstAddr is the destination address. +// +// On success, Connectx returns the number of bytes enqueued for transmission. +func Connectx(fd int, srcIf uint32, srcAddr, dstAddr Sockaddr, associd SaeAssocID, flags uint32, iov []Iovec, connid *SaeConnID) (n uintptr, err error) { + endpoints := SaEndpoints{ + Srcif: srcIf, + } + + if srcAddr != nil { + addrp, addrlen, err := srcAddr.sockaddr() + if err != nil { + return 0, err + } + endpoints.Srcaddr = (*RawSockaddr)(addrp) + endpoints.Srcaddrlen = uint32(addrlen) + } + + if dstAddr != nil { + addrp, addrlen, err := dstAddr.sockaddr() + if err != nil { + return 0, err + } + endpoints.Dstaddr = (*RawSockaddr)(addrp) + endpoints.Dstaddrlen = uint32(addrlen) + } + + err = connectx(fd, &endpoints, associd, flags, iov, &n, connid) + return +} + +//sys connectx(fd int, endpoints *SaEndpoints, associd SaeAssocID, flags uint32, iov []Iovec, n *uintptr, connid *SaeConnID) (err error) //sys sendfile(infd int, outfd int, offset int64, len *int64, hdtr unsafe.Pointer, flags int) (err error) //sys shmat(id int, addr uintptr, flag int) (ret uintptr, err error) diff --git a/vendor/golang.org/x/sys/unix/syscall_hurd.go b/vendor/golang.org/x/sys/unix/syscall_hurd.go index ba46651f..a6a2d2fc 100644 --- a/vendor/golang.org/x/sys/unix/syscall_hurd.go +++ b/vendor/golang.org/x/sys/unix/syscall_hurd.go @@ -11,6 +11,7 @@ package unix int ioctl(int, unsigned long int, uintptr_t); */ import "C" +import "unsafe" func ioctl(fd int, req uint, arg uintptr) (err error) { r0, er := C.ioctl(C.int(fd), C.ulong(req), C.uintptr_t(arg)) diff --git a/vendor/golang.org/x/sys/unix/zerrors_darwin_amd64.go b/vendor/golang.org/x/sys/unix/zerrors_darwin_amd64.go index 4308ac17..d73c4652 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_darwin_amd64.go +++ b/vendor/golang.org/x/sys/unix/zerrors_darwin_amd64.go @@ -237,6 +237,9 @@ const ( CLOCK_UPTIME_RAW_APPROX = 0x9 CLONE_NOFOLLOW = 0x1 CLONE_NOOWNERCOPY = 0x2 + CONNECT_DATA_AUTHENTICATED = 0x4 + CONNECT_DATA_IDEMPOTENT = 0x2 + CONNECT_RESUME_ON_READ_WRITE = 0x1 CR0 = 0x0 CR1 = 0x1000 CR2 = 0x2000 @@ -1265,6 +1268,10 @@ const ( RTV_SSTHRESH = 0x20 RUSAGE_CHILDREN = -0x1 RUSAGE_SELF = 0x0 + SAE_ASSOCID_ALL = 0xffffffff + SAE_ASSOCID_ANY = 0x0 + SAE_CONNID_ALL = 0xffffffff + SAE_CONNID_ANY = 0x0 SCM_CREDS = 0x3 SCM_RIGHTS = 0x1 SCM_TIMESTAMP = 0x2 diff --git a/vendor/golang.org/x/sys/unix/zerrors_darwin_arm64.go b/vendor/golang.org/x/sys/unix/zerrors_darwin_arm64.go index c8068a7a..4a55a400 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_darwin_arm64.go +++ b/vendor/golang.org/x/sys/unix/zerrors_darwin_arm64.go @@ -237,6 +237,9 @@ const ( CLOCK_UPTIME_RAW_APPROX = 0x9 CLONE_NOFOLLOW = 0x1 CLONE_NOOWNERCOPY = 0x2 + CONNECT_DATA_AUTHENTICATED = 0x4 + CONNECT_DATA_IDEMPOTENT = 0x2 + CONNECT_RESUME_ON_READ_WRITE = 0x1 CR0 = 0x0 CR1 = 0x1000 CR2 = 0x2000 @@ -1265,6 +1268,10 @@ const ( RTV_SSTHRESH = 0x20 RUSAGE_CHILDREN = -0x1 RUSAGE_SELF = 0x0 + SAE_ASSOCID_ALL = 0xffffffff + SAE_ASSOCID_ANY = 0x0 + SAE_CONNID_ALL = 0xffffffff + SAE_CONNID_ANY = 0x0 SCM_CREDS = 0x3 SCM_RIGHTS = 0x1 SCM_TIMESTAMP = 0x2 diff --git a/vendor/golang.org/x/sys/unix/zerrors_zos_s390x.go b/vendor/golang.org/x/sys/unix/zerrors_zos_s390x.go index da08b2ab..1ec2b140 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_zos_s390x.go +++ b/vendor/golang.org/x/sys/unix/zerrors_zos_s390x.go @@ -581,6 +581,8 @@ const ( AT_EMPTY_PATH = 0x1000 AT_REMOVEDIR = 0x200 RENAME_NOREPLACE = 1 << 0 + ST_RDONLY = 1 + ST_NOSUID = 2 ) const ( diff --git a/vendor/golang.org/x/sys/unix/zsyscall_darwin_amd64.go b/vendor/golang.org/x/sys/unix/zsyscall_darwin_amd64.go index b622533e..24b346e1 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_darwin_amd64.go +++ b/vendor/golang.org/x/sys/unix/zsyscall_darwin_amd64.go @@ -841,6 +841,26 @@ var libc_pthread_fchdir_np_trampoline_addr uintptr // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT +func connectx(fd int, endpoints *SaEndpoints, associd SaeAssocID, flags uint32, iov []Iovec, n *uintptr, connid *SaeConnID) (err error) { + var _p0 unsafe.Pointer + if len(iov) > 0 { + _p0 = unsafe.Pointer(&iov[0]) + } else { + _p0 = unsafe.Pointer(&_zero) + } + _, _, e1 := syscall_syscall9(libc_connectx_trampoline_addr, uintptr(fd), uintptr(unsafe.Pointer(endpoints)), uintptr(associd), uintptr(flags), uintptr(_p0), uintptr(len(iov)), uintptr(unsafe.Pointer(n)), uintptr(unsafe.Pointer(connid)), 0) + if e1 != 0 { + err = errnoErr(e1) + } + return +} + +var libc_connectx_trampoline_addr uintptr + +//go:cgo_import_dynamic libc_connectx connectx "/usr/lib/libSystem.B.dylib" + +// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT + func sendfile(infd int, outfd int, offset int64, len *int64, hdtr unsafe.Pointer, flags int) (err error) { _, _, e1 := syscall_syscall6(libc_sendfile_trampoline_addr, uintptr(infd), uintptr(outfd), uintptr(offset), uintptr(unsafe.Pointer(len)), uintptr(hdtr), uintptr(flags)) if e1 != 0 { diff --git a/vendor/golang.org/x/sys/unix/zsyscall_darwin_amd64.s b/vendor/golang.org/x/sys/unix/zsyscall_darwin_amd64.s index cfe6646b..ebd21310 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_darwin_amd64.s +++ b/vendor/golang.org/x/sys/unix/zsyscall_darwin_amd64.s @@ -248,6 +248,11 @@ TEXT libc_pthread_fchdir_np_trampoline<>(SB),NOSPLIT,$0-0 GLOBL ·libc_pthread_fchdir_np_trampoline_addr(SB), RODATA, $8 DATA ·libc_pthread_fchdir_np_trampoline_addr(SB)/8, $libc_pthread_fchdir_np_trampoline<>(SB) +TEXT libc_connectx_trampoline<>(SB),NOSPLIT,$0-0 + JMP libc_connectx(SB) +GLOBL ·libc_connectx_trampoline_addr(SB), RODATA, $8 +DATA ·libc_connectx_trampoline_addr(SB)/8, $libc_connectx_trampoline<>(SB) + TEXT libc_sendfile_trampoline<>(SB),NOSPLIT,$0-0 JMP libc_sendfile(SB) GLOBL ·libc_sendfile_trampoline_addr(SB), RODATA, $8 diff --git a/vendor/golang.org/x/sys/unix/zsyscall_darwin_arm64.go b/vendor/golang.org/x/sys/unix/zsyscall_darwin_arm64.go index 13f624f6..824b9c2d 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_darwin_arm64.go +++ b/vendor/golang.org/x/sys/unix/zsyscall_darwin_arm64.go @@ -841,6 +841,26 @@ var libc_pthread_fchdir_np_trampoline_addr uintptr // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT +func connectx(fd int, endpoints *SaEndpoints, associd SaeAssocID, flags uint32, iov []Iovec, n *uintptr, connid *SaeConnID) (err error) { + var _p0 unsafe.Pointer + if len(iov) > 0 { + _p0 = unsafe.Pointer(&iov[0]) + } else { + _p0 = unsafe.Pointer(&_zero) + } + _, _, e1 := syscall_syscall9(libc_connectx_trampoline_addr, uintptr(fd), uintptr(unsafe.Pointer(endpoints)), uintptr(associd), uintptr(flags), uintptr(_p0), uintptr(len(iov)), uintptr(unsafe.Pointer(n)), uintptr(unsafe.Pointer(connid)), 0) + if e1 != 0 { + err = errnoErr(e1) + } + return +} + +var libc_connectx_trampoline_addr uintptr + +//go:cgo_import_dynamic libc_connectx connectx "/usr/lib/libSystem.B.dylib" + +// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT + func sendfile(infd int, outfd int, offset int64, len *int64, hdtr unsafe.Pointer, flags int) (err error) { _, _, e1 := syscall_syscall6(libc_sendfile_trampoline_addr, uintptr(infd), uintptr(outfd), uintptr(offset), uintptr(unsafe.Pointer(len)), uintptr(hdtr), uintptr(flags)) if e1 != 0 { diff --git a/vendor/golang.org/x/sys/unix/zsyscall_darwin_arm64.s b/vendor/golang.org/x/sys/unix/zsyscall_darwin_arm64.s index fe222b75..4f178a22 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_darwin_arm64.s +++ b/vendor/golang.org/x/sys/unix/zsyscall_darwin_arm64.s @@ -248,6 +248,11 @@ TEXT libc_pthread_fchdir_np_trampoline<>(SB),NOSPLIT,$0-0 GLOBL ·libc_pthread_fchdir_np_trampoline_addr(SB), RODATA, $8 DATA ·libc_pthread_fchdir_np_trampoline_addr(SB)/8, $libc_pthread_fchdir_np_trampoline<>(SB) +TEXT libc_connectx_trampoline<>(SB),NOSPLIT,$0-0 + JMP libc_connectx(SB) +GLOBL ·libc_connectx_trampoline_addr(SB), RODATA, $8 +DATA ·libc_connectx_trampoline_addr(SB)/8, $libc_connectx_trampoline<>(SB) + TEXT libc_sendfile_trampoline<>(SB),NOSPLIT,$0-0 JMP libc_sendfile(SB) GLOBL ·libc_sendfile_trampoline_addr(SB), RODATA, $8 diff --git a/vendor/golang.org/x/sys/unix/ztypes_darwin_amd64.go b/vendor/golang.org/x/sys/unix/ztypes_darwin_amd64.go index 091d107f..d003c3d4 100644 --- a/vendor/golang.org/x/sys/unix/ztypes_darwin_amd64.go +++ b/vendor/golang.org/x/sys/unix/ztypes_darwin_amd64.go @@ -306,6 +306,19 @@ type XVSockPgen struct { type _Socklen uint32 +type SaeAssocID uint32 + +type SaeConnID uint32 + +type SaEndpoints struct { + Srcif uint32 + Srcaddr *RawSockaddr + Srcaddrlen uint32 + Dstaddr *RawSockaddr + Dstaddrlen uint32 + _ [4]byte +} + type Xucred struct { Version uint32 Uid uint32 diff --git a/vendor/golang.org/x/sys/unix/ztypes_darwin_arm64.go b/vendor/golang.org/x/sys/unix/ztypes_darwin_arm64.go index 28ff4ef7..0d45a941 100644 --- a/vendor/golang.org/x/sys/unix/ztypes_darwin_arm64.go +++ b/vendor/golang.org/x/sys/unix/ztypes_darwin_arm64.go @@ -306,6 +306,19 @@ type XVSockPgen struct { type _Socklen uint32 +type SaeAssocID uint32 + +type SaeConnID uint32 + +type SaEndpoints struct { + Srcif uint32 + Srcaddr *RawSockaddr + Srcaddrlen uint32 + Dstaddr *RawSockaddr + Dstaddrlen uint32 + _ [4]byte +} + type Xucred struct { Version uint32 Uid uint32 diff --git a/vendor/golang.org/x/sys/unix/ztypes_freebsd_386.go b/vendor/golang.org/x/sys/unix/ztypes_freebsd_386.go index 6cbd094a..51e13eb0 100644 --- a/vendor/golang.org/x/sys/unix/ztypes_freebsd_386.go +++ b/vendor/golang.org/x/sys/unix/ztypes_freebsd_386.go @@ -625,6 +625,7 @@ const ( POLLRDNORM = 0x40 POLLWRBAND = 0x100 POLLWRNORM = 0x4 + POLLRDHUP = 0x4000 ) type CapRights struct { diff --git a/vendor/golang.org/x/sys/unix/ztypes_freebsd_amd64.go b/vendor/golang.org/x/sys/unix/ztypes_freebsd_amd64.go index 7c03b6ee..d002d8ef 100644 --- a/vendor/golang.org/x/sys/unix/ztypes_freebsd_amd64.go +++ b/vendor/golang.org/x/sys/unix/ztypes_freebsd_amd64.go @@ -630,6 +630,7 @@ const ( POLLRDNORM = 0x40 POLLWRBAND = 0x100 POLLWRNORM = 0x4 + POLLRDHUP = 0x4000 ) type CapRights struct { diff --git a/vendor/golang.org/x/sys/unix/ztypes_freebsd_arm.go b/vendor/golang.org/x/sys/unix/ztypes_freebsd_arm.go index 422107ee..3f863d89 100644 --- a/vendor/golang.org/x/sys/unix/ztypes_freebsd_arm.go +++ b/vendor/golang.org/x/sys/unix/ztypes_freebsd_arm.go @@ -616,6 +616,7 @@ const ( POLLRDNORM = 0x40 POLLWRBAND = 0x100 POLLWRNORM = 0x4 + POLLRDHUP = 0x4000 ) type CapRights struct { diff --git a/vendor/golang.org/x/sys/unix/ztypes_freebsd_arm64.go b/vendor/golang.org/x/sys/unix/ztypes_freebsd_arm64.go index 505a12ac..61c72931 100644 --- a/vendor/golang.org/x/sys/unix/ztypes_freebsd_arm64.go +++ b/vendor/golang.org/x/sys/unix/ztypes_freebsd_arm64.go @@ -610,6 +610,7 @@ const ( POLLRDNORM = 0x40 POLLWRBAND = 0x100 POLLWRNORM = 0x4 + POLLRDHUP = 0x4000 ) type CapRights struct { diff --git a/vendor/golang.org/x/sys/unix/ztypes_freebsd_riscv64.go b/vendor/golang.org/x/sys/unix/ztypes_freebsd_riscv64.go index cc986c79..b5d17414 100644 --- a/vendor/golang.org/x/sys/unix/ztypes_freebsd_riscv64.go +++ b/vendor/golang.org/x/sys/unix/ztypes_freebsd_riscv64.go @@ -612,6 +612,7 @@ const ( POLLRDNORM = 0x40 POLLWRBAND = 0x100 POLLWRNORM = 0x4 + POLLRDHUP = 0x4000 ) type CapRights struct { diff --git a/vendor/golang.org/x/sys/unix/ztypes_linux.go b/vendor/golang.org/x/sys/unix/ztypes_linux.go index b102b95a..9f2550dc 100644 --- a/vendor/golang.org/x/sys/unix/ztypes_linux.go +++ b/vendor/golang.org/x/sys/unix/ztypes_linux.go @@ -2486,7 +2486,7 @@ type XDPMmapOffsets struct { type XDPUmemReg struct { Addr uint64 Len uint64 - Chunk_size uint32 + Size uint32 Headroom uint32 Flags uint32 Tx_metadata_len uint32 @@ -3807,6 +3807,9 @@ const ( ETHTOOL_MSG_PSE_GET_REPLY = 0x25 ETHTOOL_MSG_RSS_GET_REPLY = 0x26 ETHTOOL_MSG_KERNEL_MAX = 0x2b + ETHTOOL_FLAG_COMPACT_BITSETS = 0x1 + ETHTOOL_FLAG_OMIT_REPLY = 0x2 + ETHTOOL_FLAG_STATS = 0x4 ETHTOOL_A_HEADER_UNSPEC = 0x0 ETHTOOL_A_HEADER_DEV_INDEX = 0x1 ETHTOOL_A_HEADER_DEV_NAME = 0x2 diff --git a/vendor/golang.org/x/sys/unix/ztypes_linux_riscv64.go b/vendor/golang.org/x/sys/unix/ztypes_linux_riscv64.go index 15adc041..ad05b51a 100644 --- a/vendor/golang.org/x/sys/unix/ztypes_linux_riscv64.go +++ b/vendor/golang.org/x/sys/unix/ztypes_linux_riscv64.go @@ -727,6 +727,37 @@ const ( RISCV_HWPROBE_EXT_ZBA = 0x8 RISCV_HWPROBE_EXT_ZBB = 0x10 RISCV_HWPROBE_EXT_ZBS = 0x20 + RISCV_HWPROBE_EXT_ZICBOZ = 0x40 + RISCV_HWPROBE_EXT_ZBC = 0x80 + RISCV_HWPROBE_EXT_ZBKB = 0x100 + RISCV_HWPROBE_EXT_ZBKC = 0x200 + RISCV_HWPROBE_EXT_ZBKX = 0x400 + RISCV_HWPROBE_EXT_ZKND = 0x800 + RISCV_HWPROBE_EXT_ZKNE = 0x1000 + RISCV_HWPROBE_EXT_ZKNH = 0x2000 + RISCV_HWPROBE_EXT_ZKSED = 0x4000 + RISCV_HWPROBE_EXT_ZKSH = 0x8000 + RISCV_HWPROBE_EXT_ZKT = 0x10000 + RISCV_HWPROBE_EXT_ZVBB = 0x20000 + RISCV_HWPROBE_EXT_ZVBC = 0x40000 + RISCV_HWPROBE_EXT_ZVKB = 0x80000 + RISCV_HWPROBE_EXT_ZVKG = 0x100000 + RISCV_HWPROBE_EXT_ZVKNED = 0x200000 + RISCV_HWPROBE_EXT_ZVKNHA = 0x400000 + RISCV_HWPROBE_EXT_ZVKNHB = 0x800000 + RISCV_HWPROBE_EXT_ZVKSED = 0x1000000 + RISCV_HWPROBE_EXT_ZVKSH = 0x2000000 + RISCV_HWPROBE_EXT_ZVKT = 0x4000000 + RISCV_HWPROBE_EXT_ZFH = 0x8000000 + RISCV_HWPROBE_EXT_ZFHMIN = 0x10000000 + RISCV_HWPROBE_EXT_ZIHINTNTL = 0x20000000 + RISCV_HWPROBE_EXT_ZVFH = 0x40000000 + RISCV_HWPROBE_EXT_ZVFHMIN = 0x80000000 + RISCV_HWPROBE_EXT_ZFA = 0x100000000 + RISCV_HWPROBE_EXT_ZTSO = 0x200000000 + RISCV_HWPROBE_EXT_ZACAS = 0x400000000 + RISCV_HWPROBE_EXT_ZICOND = 0x800000000 + RISCV_HWPROBE_EXT_ZIHINTPAUSE = 0x1000000000 RISCV_HWPROBE_KEY_CPUPERF_0 = 0x5 RISCV_HWPROBE_MISALIGNED_UNKNOWN = 0x0 RISCV_HWPROBE_MISALIGNED_EMULATED = 0x1 @@ -734,4 +765,6 @@ const ( RISCV_HWPROBE_MISALIGNED_FAST = 0x3 RISCV_HWPROBE_MISALIGNED_UNSUPPORTED = 0x4 RISCV_HWPROBE_MISALIGNED_MASK = 0x7 + RISCV_HWPROBE_KEY_ZICBOZ_BLOCK_SIZE = 0x6 + RISCV_HWPROBE_WHICH_CPUS = 0x1 ) diff --git a/vendor/golang.org/x/sys/windows/syscall_windows.go b/vendor/golang.org/x/sys/windows/syscall_windows.go index 1fa34fd1..5cee9a31 100644 --- a/vendor/golang.org/x/sys/windows/syscall_windows.go +++ b/vendor/golang.org/x/sys/windows/syscall_windows.go @@ -313,6 +313,10 @@ func NewCallbackCDecl(fn interface{}) uintptr { //sys SetConsoleMode(console Handle, mode uint32) (err error) = kernel32.SetConsoleMode //sys GetConsoleScreenBufferInfo(console Handle, info *ConsoleScreenBufferInfo) (err error) = kernel32.GetConsoleScreenBufferInfo //sys setConsoleCursorPosition(console Handle, position uint32) (err error) = kernel32.SetConsoleCursorPosition +//sys GetConsoleCP() (cp uint32, err error) = kernel32.GetConsoleCP +//sys GetConsoleOutputCP() (cp uint32, err error) = kernel32.GetConsoleOutputCP +//sys SetConsoleCP(cp uint32) (err error) = kernel32.SetConsoleCP +//sys SetConsoleOutputCP(cp uint32) (err error) = kernel32.SetConsoleOutputCP //sys WriteConsole(console Handle, buf *uint16, towrite uint32, written *uint32, reserved *byte) (err error) = kernel32.WriteConsoleW //sys ReadConsole(console Handle, buf *uint16, toread uint32, read *uint32, inputControl *byte) (err error) = kernel32.ReadConsoleW //sys resizePseudoConsole(pconsole Handle, size uint32) (hr error) = kernel32.ResizePseudoConsole diff --git a/vendor/golang.org/x/sys/windows/types_windows.go b/vendor/golang.org/x/sys/windows/types_windows.go index 4d0c1574..7b97a154 100644 --- a/vendor/golang.org/x/sys/windows/types_windows.go +++ b/vendor/golang.org/x/sys/windows/types_windows.go @@ -1060,6 +1060,7 @@ const ( SIO_GET_EXTENSION_FUNCTION_POINTER = IOC_INOUT | IOC_WS2 | 6 SIO_KEEPALIVE_VALS = IOC_IN | IOC_VENDOR | 4 SIO_UDP_CONNRESET = IOC_IN | IOC_VENDOR | 12 + SIO_UDP_NETRESET = IOC_IN | IOC_VENDOR | 15 // cf. http://support.microsoft.com/default.aspx?scid=kb;en-us;257460 @@ -2031,6 +2032,50 @@ const ( IF_TYPE_IEEE1394 = 144 ) +// Enum NL_PREFIX_ORIGIN for [IpAdapterUnicastAddress], see +// https://learn.microsoft.com/en-us/windows/win32/api/nldef/ne-nldef-nl_prefix_origin +const ( + IpPrefixOriginOther = 0 + IpPrefixOriginManual = 1 + IpPrefixOriginWellKnown = 2 + IpPrefixOriginDhcp = 3 + IpPrefixOriginRouterAdvertisement = 4 + IpPrefixOriginUnchanged = 1 << 4 +) + +// Enum NL_SUFFIX_ORIGIN for [IpAdapterUnicastAddress], see +// https://learn.microsoft.com/en-us/windows/win32/api/nldef/ne-nldef-nl_suffix_origin +const ( + NlsoOther = 0 + NlsoManual = 1 + NlsoWellKnown = 2 + NlsoDhcp = 3 + NlsoLinkLayerAddress = 4 + NlsoRandom = 5 + IpSuffixOriginOther = 0 + IpSuffixOriginManual = 1 + IpSuffixOriginWellKnown = 2 + IpSuffixOriginDhcp = 3 + IpSuffixOriginLinkLayerAddress = 4 + IpSuffixOriginRandom = 5 + IpSuffixOriginUnchanged = 1 << 4 +) + +// Enum NL_DAD_STATE for [IpAdapterUnicastAddress], see +// https://learn.microsoft.com/en-us/windows/win32/api/nldef/ne-nldef-nl_dad_state +const ( + NldsInvalid = 0 + NldsTentative = 1 + NldsDuplicate = 2 + NldsDeprecated = 3 + NldsPreferred = 4 + IpDadStateInvalid = 0 + IpDadStateTentative = 1 + IpDadStateDuplicate = 2 + IpDadStateDeprecated = 3 + IpDadStatePreferred = 4 +) + type SocketAddress struct { Sockaddr *syscall.RawSockaddrAny SockaddrLength int32 diff --git a/vendor/golang.org/x/sys/windows/zsyscall_windows.go b/vendor/golang.org/x/sys/windows/zsyscall_windows.go index 9bb979a3..4c2e1bdc 100644 --- a/vendor/golang.org/x/sys/windows/zsyscall_windows.go +++ b/vendor/golang.org/x/sys/windows/zsyscall_windows.go @@ -247,7 +247,9 @@ var ( procGetCommandLineW = modkernel32.NewProc("GetCommandLineW") procGetComputerNameExW = modkernel32.NewProc("GetComputerNameExW") procGetComputerNameW = modkernel32.NewProc("GetComputerNameW") + procGetConsoleCP = modkernel32.NewProc("GetConsoleCP") procGetConsoleMode = modkernel32.NewProc("GetConsoleMode") + procGetConsoleOutputCP = modkernel32.NewProc("GetConsoleOutputCP") procGetConsoleScreenBufferInfo = modkernel32.NewProc("GetConsoleScreenBufferInfo") procGetCurrentDirectoryW = modkernel32.NewProc("GetCurrentDirectoryW") procGetCurrentProcessId = modkernel32.NewProc("GetCurrentProcessId") @@ -347,8 +349,10 @@ var ( procSetCommMask = modkernel32.NewProc("SetCommMask") procSetCommState = modkernel32.NewProc("SetCommState") procSetCommTimeouts = modkernel32.NewProc("SetCommTimeouts") + procSetConsoleCP = modkernel32.NewProc("SetConsoleCP") procSetConsoleCursorPosition = modkernel32.NewProc("SetConsoleCursorPosition") procSetConsoleMode = modkernel32.NewProc("SetConsoleMode") + procSetConsoleOutputCP = modkernel32.NewProc("SetConsoleOutputCP") procSetCurrentDirectoryW = modkernel32.NewProc("SetCurrentDirectoryW") procSetDefaultDllDirectories = modkernel32.NewProc("SetDefaultDllDirectories") procSetDllDirectoryW = modkernel32.NewProc("SetDllDirectoryW") @@ -2162,6 +2166,15 @@ func GetComputerName(buf *uint16, n *uint32) (err error) { return } +func GetConsoleCP() (cp uint32, err error) { + r0, _, e1 := syscall.Syscall(procGetConsoleCP.Addr(), 0, 0, 0, 0) + cp = uint32(r0) + if cp == 0 { + err = errnoErr(e1) + } + return +} + func GetConsoleMode(console Handle, mode *uint32) (err error) { r1, _, e1 := syscall.Syscall(procGetConsoleMode.Addr(), 2, uintptr(console), uintptr(unsafe.Pointer(mode)), 0) if r1 == 0 { @@ -2170,6 +2183,15 @@ func GetConsoleMode(console Handle, mode *uint32) (err error) { return } +func GetConsoleOutputCP() (cp uint32, err error) { + r0, _, e1 := syscall.Syscall(procGetConsoleOutputCP.Addr(), 0, 0, 0, 0) + cp = uint32(r0) + if cp == 0 { + err = errnoErr(e1) + } + return +} + func GetConsoleScreenBufferInfo(console Handle, info *ConsoleScreenBufferInfo) (err error) { r1, _, e1 := syscall.Syscall(procGetConsoleScreenBufferInfo.Addr(), 2, uintptr(console), uintptr(unsafe.Pointer(info)), 0) if r1 == 0 { @@ -3038,6 +3060,14 @@ func SetCommTimeouts(handle Handle, timeouts *CommTimeouts) (err error) { return } +func SetConsoleCP(cp uint32) (err error) { + r1, _, e1 := syscall.Syscall(procSetConsoleCP.Addr(), 1, uintptr(cp), 0, 0) + if r1 == 0 { + err = errnoErr(e1) + } + return +} + func setConsoleCursorPosition(console Handle, position uint32) (err error) { r1, _, e1 := syscall.Syscall(procSetConsoleCursorPosition.Addr(), 2, uintptr(console), uintptr(position), 0) if r1 == 0 { @@ -3054,6 +3084,14 @@ func SetConsoleMode(console Handle, mode uint32) (err error) { return } +func SetConsoleOutputCP(cp uint32) (err error) { + r1, _, e1 := syscall.Syscall(procSetConsoleOutputCP.Addr(), 1, uintptr(cp), 0, 0) + if r1 == 0 { + err = errnoErr(e1) + } + return +} + func SetCurrentDirectory(path *uint16) (err error) { r1, _, e1 := syscall.Syscall(procSetCurrentDirectoryW.Addr(), 1, uintptr(unsafe.Pointer(path)), 0, 0) if r1 == 0 { diff --git a/vendor/golang.org/x/term/term_windows.go b/vendor/golang.org/x/term/term_windows.go index 465f5606..df6bf948 100644 --- a/vendor/golang.org/x/term/term_windows.go +++ b/vendor/golang.org/x/term/term_windows.go @@ -26,6 +26,7 @@ func makeRaw(fd int) (*State, error) { return nil, err } raw := st &^ (windows.ENABLE_ECHO_INPUT | windows.ENABLE_PROCESSED_INPUT | windows.ENABLE_LINE_INPUT | windows.ENABLE_PROCESSED_OUTPUT) + raw |= windows.ENABLE_VIRTUAL_TERMINAL_INPUT if err := windows.SetConsoleMode(windows.Handle(fd), raw); err != nil { return nil, err } diff --git a/vendor/modules.txt b/vendor/modules.txt index aee54912..1f50ee31 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -84,11 +84,11 @@ github.com/cloudflare/circl/math/mlsbset github.com/cloudflare/circl/sign github.com/cloudflare/circl/sign/ed25519 github.com/cloudflare/circl/sign/ed448 -# github.com/cpuguy83/go-md2man/v2 v2.0.4 +# github.com/cpuguy83/go-md2man/v2 v2.0.5 ## explicit; go 1.11 github.com/cpuguy83/go-md2man/v2/md2man -# github.com/cyphar/filepath-securejoin v0.3.1 -## explicit; go 1.20 +# github.com/cyphar/filepath-securejoin v0.3.3 +## explicit; go 1.21 github.com/cyphar/filepath-securejoin # github.com/davecgh/go-spew v1.1.1 ## explicit @@ -265,7 +265,7 @@ github.com/xanzy/ssh-agent # github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 ## explicit; go 1.15 github.com/xrash/smetrics -# golang.org/x/crypto v0.26.0 +# golang.org/x/crypto v0.27.0 ## explicit; go 1.20 golang.org/x/crypto/argon2 golang.org/x/crypto/blake2b @@ -287,22 +287,22 @@ golang.org/x/crypto/ssh golang.org/x/crypto/ssh/agent golang.org/x/crypto/ssh/internal/bcrypt_pbkdf golang.org/x/crypto/ssh/knownhosts -# golang.org/x/net v0.28.0 +# golang.org/x/net v0.29.0 ## explicit; go 1.18 golang.org/x/net/context golang.org/x/net/internal/socks golang.org/x/net/proxy -# golang.org/x/sys v0.23.0 +# golang.org/x/sys v0.25.0 ## explicit; go 1.18 golang.org/x/sys/cpu golang.org/x/sys/execabs golang.org/x/sys/plan9 golang.org/x/sys/unix golang.org/x/sys/windows -# golang.org/x/term v0.23.0 +# golang.org/x/term v0.24.0 ## explicit; go 1.18 golang.org/x/term -# golang.org/x/text v0.17.0 +# golang.org/x/text v0.18.0 ## explicit; go 1.18 golang.org/x/text/cases golang.org/x/text/internal