diff --git a/Make.inc b/Make.inc
index a143b871efecb..228cd99823c48 100644
--- a/Make.inc
+++ b/Make.inc
@@ -652,8 +652,8 @@ ifeq ($(OS),FreeBSD)
 ifneq (,$(findstring gfortran,$(FC)))
 
 # First let's figure out what version of GCC we're dealing with
-_GCCMAJOR := $(shell $(FC) -dumpversion | cut -d'.' -f1)
-_GCCMINOR := $(shell $(FC) -dumpversion | cut -d'.' -f2)
+_GCCMAJOR := $(shell $(FC) -dumpversion 2>/dev/null | cut -d'.' -f1)
+_GCCMINOR := $(shell $(FC) -dumpversion 2>/dev/null | cut -d'.' -f2)
 
 # The ports system uses major and minor for GCC < 5 (e.g. gcc49 for GCC 4.9), otherwise major only
 ifeq ($(_GCCMAJOR),4)
diff --git a/base/boot.jl b/base/boot.jl
index 90322b69a54d9..2476114b62d70 100644
--- a/base/boot.jl
+++ b/base/boot.jl
@@ -421,10 +421,10 @@ eval(Core, quote
     function CodeInstance(
         mi::MethodInstance, @nospecialize(rettype), @nospecialize(inferred_const),
         @nospecialize(inferred), const_flags::Int32, min_world::UInt, max_world::UInt,
-        ipo_effects::UInt8, effects::UInt8, @nospecialize(argescapes#=::Union{Nothing,Vector{ArgEscapeInfo}}=#),
+        ipo_effects::UInt32, effects::UInt32, @nospecialize(argescapes#=::Union{Nothing,Vector{ArgEscapeInfo}}=#),
         relocatability::UInt8)
         return ccall(:jl_new_codeinst, Ref{CodeInstance},
-            (Any, Any, Any, Any, Int32, UInt, UInt, UInt8, UInt8, Any, UInt8),
+            (Any, Any, Any, Any, Int32, UInt, UInt, UInt32, UInt32, Any, UInt8),
             mi, rettype, inferred_const, inferred, const_flags, min_world, max_world,
             ipo_effects, effects, argescapes,
             relocatability)
diff --git a/base/broadcast.jl b/base/broadcast.jl
index fb9ba9555cfd9..c9694d6645099 100644
--- a/base/broadcast.jl
+++ b/base/broadcast.jl
@@ -973,14 +973,14 @@ end
     destc = dest.chunks
     cind = 1
     bc′ = preprocess(dest, bc)
-    for P in Iterators.partition(eachindex(bc′), bitcache_size)
+    @inbounds for P in Iterators.partition(eachindex(bc′), bitcache_size)
         ind = 1
         @simd for I in P
-            @inbounds tmp[ind] = bc′[I]
+            tmp[ind] = bc′[I]
             ind += 1
         end
         @simd for i in ind:bitcache_size
-            @inbounds tmp[i] = false
+            tmp[i] = false
         end
         dumpbitcache(destc, cind, tmp)
         cind += bitcache_chunks
diff --git a/base/compiler/compiler.jl b/base/compiler/compiler.jl
index 18232b37008f6..6991e2d38437b 100644
--- a/base/compiler/compiler.jl
+++ b/base/compiler/compiler.jl
@@ -58,6 +58,9 @@ include("operators.jl")
 include("pointer.jl")
 include("refvalue.jl")
 
+# the same constructor as defined in float.jl, but with a different name to avoid redefinition
+_Bool(x::Real) = x==0 ? false : x==1 ? true : throw(InexactError(:Bool, Bool, x))
+
 # checked arithmetic
 const checked_add = +
 const checked_sub = -
diff --git a/base/compiler/optimize.jl b/base/compiler/optimize.jl
index 0616204dce748..cd27dbb01b625 100644
--- a/base/compiler/optimize.jl
+++ b/base/compiler/optimize.jl
@@ -558,12 +558,21 @@ function convert_to_ircode(ci::CodeInfo, sv::OptimizationState)
     idx = 1
     oldidx = 1
     changemap = fill(0, length(code))
-    labelmap = coverage ? fill(0, length(code)) : changemap
     prevloc = zero(eltype(ci.codelocs))
     stmtinfo = sv.stmt_info
     codelocs = ci.codelocs
     ssavaluetypes = ci.ssavaluetypes::Vector{Any}
     ssaflags = ci.ssaflags
+    if !coverage && JLOptions().code_coverage == 3 # path-specific coverage mode
+        for line in ci.linetable
+            if is_file_tracked(line.file)
+                # if any line falls in a tracked file enable coverage for all
+                coverage = true
+                break
+            end
+        end
+    end
+    labelmap = coverage ? fill(0, length(code)) : changemap
     while idx <= length(code)
         codeloc = codelocs[idx]
         if coverage && codeloc != prevloc && codeloc != 0
diff --git a/base/compiler/ssair/inlining.jl b/base/compiler/ssair/inlining.jl
index 510c3bdaf5b03..a5b68d6d5198d 100644
--- a/base/compiler/ssair/inlining.jl
+++ b/base/compiler/ssair/inlining.jl
@@ -319,10 +319,15 @@ function ir_inline_item!(compact::IncrementalCompact, idx::Int, argexprs::Vector
     inlined_at = Int(compact.result[idx][:line])
     topline::Int32 = linetable_offset + Int32(1)
     coverage = coverage_enabled(def.module)
+    coverage_by_path = JLOptions().code_coverage == 3
     push!(linetable, LineInfoNode(def.module, def.name, def.file, Int(def.line), inlined_at))
     oldlinetable = spec.ir.linetable
     for oldline in 1:length(oldlinetable)
         entry = oldlinetable[oldline]
+        if !coverage && coverage_by_path && is_file_tracked(entry.file)
+            # include topline coverage entry if in path-specific coverage mode, and any file falls under path
+            coverage = true
+        end
         newentry = LineInfoNode(entry.module, entry.method, entry.file, entry.line,
             (entry.inlined_at > 0 ? entry.inlined_at + linetable_offset + (oldline == 1) : inlined_at))
         if oldline == 1
diff --git a/base/compiler/typeinfer.jl b/base/compiler/typeinfer.jl
index 1c54345b17de5..c20fcd14be0ef 100644
--- a/base/compiler/typeinfer.jl
+++ b/base/compiler/typeinfer.jl
@@ -512,7 +512,7 @@ function finish(me::InferenceState, interp::AbstractInterpreter)
             ipo_effects = Effects(ipo_effects; terminates=ALWAYS_TRUE)
         end
     end
-    me.result.ipo_effects = ipo_effects
+    me.ipo_effects = me.result.ipo_effects = ipo_effects
     validate_code_in_debug_mode(me.linfo, me.src, "inferred")
     nothing
 end
diff --git a/base/compiler/typelattice.jl b/base/compiler/typelattice.jl
index eda1850604306..b0dc3490ee003 100644
--- a/base/compiler/typelattice.jl
+++ b/base/compiler/typelattice.jl
@@ -467,45 +467,3 @@ function stupdate1!(state::VarTable, change::StateUpdate)
     end
     return false
 end
-
-# compute typeintersect over the extended inference lattice,
-# as precisely as we can,
-# where v is in the extended lattice, and t is a Type.
-function tmeet(@nospecialize(v), @nospecialize(t))
-    if isa(v, Const)
-        if !has_free_typevars(t) && !isa(v.val, t)
-            return Bottom
-        end
-        return v
-    elseif isa(v, PartialStruct)
-        has_free_typevars(t) && return v
-        widev = widenconst(v)
-        if widev <: t
-            return v
-        end
-        ti = typeintersect(widev, t)
-        valid_as_lattice(ti) || return Bottom
-        @assert widev <: Tuple
-        new_fields = Vector{Any}(undef, length(v.fields))
-        for i = 1:length(new_fields)
-            vfi = v.fields[i]
-            if isvarargtype(vfi)
-                new_fields[i] = vfi
-            else
-                new_fields[i] = tmeet(vfi, widenconst(getfield_tfunc(t, Const(i))))
-                if new_fields[i] === Bottom
-                    return Bottom
-                end
-            end
-        end
-        return tuple_tfunc(new_fields)
-    elseif isa(v, Conditional)
-        if !(Bool <: t)
-            return Bottom
-        end
-        return v
-    end
-    ti = typeintersect(widenconst(v), t)
-    valid_as_lattice(ti) || return Bottom
-    return ti
-end
diff --git a/base/compiler/types.jl b/base/compiler/types.jl
index 282582c016d97..5e2c7ed46a98d 100644
--- a/base/compiler/types.jl
+++ b/base/compiler/types.jl
@@ -78,11 +78,13 @@ function Effects(e::Effects = EFFECTS_UNKNOWN;
 end
 
 is_total_or_error(effects::Effects) =
-    effects.consistent === ALWAYS_TRUE && effects.effect_free === ALWAYS_TRUE &&
+    effects.consistent === ALWAYS_TRUE &&
+    effects.effect_free === ALWAYS_TRUE &&
     effects.terminates === ALWAYS_TRUE
 
 is_total(effects::Effects) =
-    is_total_or_error(effects) && effects.nothrow === ALWAYS_TRUE
+    is_total_or_error(effects) &&
+    effects.nothrow === ALWAYS_TRUE
 
 is_removable_if_unused(effects::Effects) =
     effects.effect_free === ALWAYS_TRUE &&
@@ -90,19 +92,19 @@ is_removable_if_unused(effects::Effects) =
     effects.nothrow === ALWAYS_TRUE
 
 function encode_effects(e::Effects)
-    return (e.consistent.state << 1) |
-           (e.effect_free.state << 3) |
-           (e.nothrow.state << 5) |
-           (e.terminates.state << 7) |
-           (e.overlayed)
+    return (e.consistent.state << 0) |
+           (e.effect_free.state << 2) |
+           (e.nothrow.state << 4) |
+           (e.terminates.state << 6) |
+           (UInt32(e.overlayed) << 8)
 end
-function decode_effects(e::UInt8)
+function decode_effects(e::UInt32)
     return Effects(
-        TriState((e >> 1) & 0x03),
-        TriState((e >> 3) & 0x03),
-        TriState((e >> 5) & 0x03),
-        TriState((e >> 7) & 0x03),
-        e & 0x01 ≠ 0x00,
+        TriState((e >> 0) & 0x03),
+        TriState((e >> 2) & 0x03),
+        TriState((e >> 4) & 0x03),
+        TriState((e >> 6) & 0x03),
+        _Bool(   (e >> 8) & 0x01),
         false)
 end
 
diff --git a/base/compiler/utilities.jl b/base/compiler/utilities.jl
index 9b1106e964919..f7ff32ecbaefe 100644
--- a/base/compiler/utilities.jl
+++ b/base/compiler/utilities.jl
@@ -359,12 +359,12 @@ inlining_enabled() = (JLOptions().can_inline == 1)
 function coverage_enabled(m::Module)
     ccall(:jl_generating_output, Cint, ()) == 0 || return false # don't alter caches
     cov = JLOptions().code_coverage
-    if cov == 1
+    if cov == 1 # user
         m = moduleroot(m)
         m === Core && return false
         isdefined(Main, :Base) && m === Main.Base && return false
         return true
-    elseif cov == 2
+    elseif cov == 2 # all
         return true
     end
     return false
diff --git a/base/float.jl b/base/float.jl
index 8d7381ce3be4a..60850b7e02f64 100644
--- a/base/float.jl
+++ b/base/float.jl
@@ -358,6 +358,12 @@ floor(::Type{T}, x::AbstractFloat) where {T<:Integer} = trunc(T,round(x, RoundDo
 ceil(::Type{T}, x::AbstractFloat) where {T<:Integer} = trunc(T,round(x, RoundUp))
 round(::Type{T}, x::AbstractFloat) where {T<:Integer} = trunc(T,round(x, RoundNearest))
 
+# Bool
+trunc(::Type{Bool}, x::AbstractFloat) = (-1 < x < 2) ? 1 <= x : throw(InexactError(:trunc, Bool, x))
+floor(::Type{Bool}, x::AbstractFloat) = (0 <= x < 2) ? 1 <= x : throw(InexactError(:floor, Bool, x))
+ceil(::Type{Bool}, x::AbstractFloat)  = (-1 < x <= 1) ? 0 < x : throw(InexactError(:ceil, Bool, x))
+round(::Type{Bool}, x::AbstractFloat) = (-0.5 <= x < 1.5) ? 0.5 < x : throw(InexactError(:round, Bool, x))
+
 round(x::IEEEFloat, r::RoundingMode{:ToZero})  = trunc_llvm(x)
 round(x::IEEEFloat, r::RoundingMode{:Down})    = floor_llvm(x)
 round(x::IEEEFloat, r::RoundingMode{:Up})      = ceil_llvm(x)
diff --git a/base/multidimensional.jl b/base/multidimensional.jl
index 7eb3cd915c3eb..b5e401a7834e7 100644
--- a/base/multidimensional.jl
+++ b/base/multidimensional.jl
@@ -477,9 +477,8 @@ module IteratorsMD
     simd_inner_length(iter::CartesianIndices, I::CartesianIndex) = Base.length(iter.indices[1])
 
     simd_index(iter::CartesianIndices{0}, ::CartesianIndex, I1::Int) = first(iter)
-    @propagate_inbounds function simd_index(iter::CartesianIndices, Ilast::CartesianIndex, I1::Int)
-        CartesianIndex(getindex(iter.indices[1], I1+first(Base.axes1(iter.indices[1]))), Ilast.I...)
-    end
+    @propagate_inbounds simd_index(iter::CartesianIndices, Ilast::CartesianIndex, I1::Int) =
+        CartesianIndex(iter.indices[1][I1+firstindex(iter.indices[1])], Ilast)
 
     # Split out the first N elements of a tuple
     @inline function split(t, V::Val)
@@ -585,7 +584,7 @@ module IteratorsMD
         CartesianIndices(intersect.(a.indices, b.indices))
 
     # Views of reshaped CartesianIndices are used for partitions — ensure these are fast
-    const CartesianPartition{T<:CartesianIndex, P<:CartesianIndices, R<:ReshapedArray{T,1,P}} = SubArray{T,1,R,Tuple{UnitRange{Int}},false}
+    const CartesianPartition{T<:CartesianIndex, P<:CartesianIndices, R<:ReshapedArray{T,1,P}} = SubArray{T,1,R,<:Tuple{AbstractUnitRange{Int}},false}
     eltype(::Type{PartitionIterator{T}}) where {T<:ReshapedArrayLF} = SubArray{eltype(T), 1, T, Tuple{UnitRange{Int}}, true}
     eltype(::Type{PartitionIterator{T}}) where {T<:ReshapedArray} = SubArray{eltype(T), 1, T, Tuple{UnitRange{Int}}, false}
     Iterators.IteratorEltype(::Type{<:PartitionIterator{T}}) where {T<:ReshapedArray} = Iterators.IteratorEltype(T)
@@ -594,7 +593,6 @@ module IteratorsMD
     eltype(::Type{PartitionIterator{T}}) where {T<:Union{UnitRange, StepRange, StepRangeLen, LinRange}} = T
     Iterators.IteratorEltype(::Type{<:PartitionIterator{T}}) where {T<:Union{OneTo, UnitRange, StepRange, StepRangeLen, LinRange}} = Iterators.IteratorEltype(T)
 
-
     @inline function iterate(iter::CartesianPartition)
         isempty(iter) && return nothing
         f = first(iter)
@@ -610,33 +608,45 @@ module IteratorsMD
         # In general, the Cartesian Partition might start and stop in the middle of the outer
         # dimensions — thus the outer range of a CartesianPartition is itself a
         # CartesianPartition.
-        t = tail(iter.parent.parent.indices)
-        ci = CartesianIndices(t)
-        li = LinearIndices(t)
-        return @inbounds view(ci, li[tail(iter[1].I)...]:li[tail(iter[end].I)...])
+        mi = iter.parent.mi
+        ci = iter.parent.parent
+        ax, ax1 = axes(ci), Base.axes1(ci)
+        subs = Base.ind2sub_rs(ax, mi, first(iter.indices[1]))
+        vl, fl = Base._sub2ind(tail(ax), tail(subs)...), subs[1]
+        vr, fr = divrem(last(iter.indices[1]) - 1, mi[end]) .+ (1, first(ax1))
+        oci = CartesianIndices(tail(ci.indices))
+        # A fake CartesianPartition to reuse the outer iterate fallback
+        outer = @inbounds view(ReshapedArray(oci, (length(oci),), mi), vl:vr)
+        init = @inbounds dec(oci[tail(subs)...].I, oci.indices) # real init state
+        # Use Generator to make inner loop branchless
+        @inline function skip_len_I(i::Int, I::CartesianIndex)
+            l = i == 1 ? fl : first(ax1)
+            r = i == length(outer) ? fr : last(ax1)
+            l - first(ax1), r - l + 1, I
+        end
+        (skip_len_I(i, I) for (i, I) in Iterators.enumerate(Iterators.rest(outer, (init, 0))))
     end
-    function simd_outer_range(iter::CartesianPartition{CartesianIndex{2}})
+    @inline function simd_outer_range(iter::CartesianPartition{CartesianIndex{2}})
         # But for two-dimensional Partitions the above is just a simple one-dimensional range
         # over the second dimension; we don't need to worry about non-rectangular staggers in
         # higher dimensions.
-        return @inbounds CartesianIndices((iter[1][2]:iter[end][2],))
-    end
-    @inline function simd_inner_length(iter::CartesianPartition, I::CartesianIndex)
-        inner = iter.parent.parent.indices[1]
-        @inbounds fi = iter[1].I
-        @inbounds li = iter[end].I
-        inner_start = I.I == tail(fi) ? fi[1] : first(inner)
-        inner_end   = I.I == tail(li) ? li[1] : last(inner)
-        return inner_end - inner_start + 1
-    end
-    @inline function simd_index(iter::CartesianPartition, Ilast::CartesianIndex, I1::Int)
-        # I1 is the 0-based distance from the first dimension's offest
-        offset = first(iter.parent.parent.indices[1]) # (this is 1 for 1-based arrays)
-        # In the first column we need to also add in the iter's starting point (branchlessly)
-        f = @inbounds iter[1]
-        startoffset = (Ilast.I == tail(f.I))*(f[1] - 1)
-        CartesianIndex((I1 + offset + startoffset, Ilast.I...))
+        mi = iter.parent.mi
+        ci = iter.parent.parent
+        ax, ax1 = axes(ci), Base.axes1(ci)
+        fl, vl = Base.ind2sub_rs(ax, mi, first(iter.indices[1]))
+        fr, vr = Base.ind2sub_rs(ax, mi, last(iter.indices[1]))
+        outer = @inbounds CartesianIndices((ci.indices[2][vl:vr],))
+        # Use Generator to make inner loop branchless
+        @inline function skip_len_I(I::CartesianIndex{1})
+            l = I == first(outer) ? fl : first(ax1)
+            r = I == last(outer) ? fr : last(ax1)
+            l - first(ax1), r - l + 1, I
+        end
+        (skip_len_I(I) for I in outer)
     end
+    @inline simd_inner_length(iter::CartesianPartition, (_, len, _)::Tuple{Int,Int,CartesianIndex}) = len
+    @propagate_inbounds simd_index(iter::CartesianPartition, (skip, _, I)::Tuple{Int,Int,CartesianIndex}, n::Int) =
+        simd_index(iter.parent.parent, I, n + skip)
 end  # IteratorsMD
 
 
diff --git a/base/options.jl b/base/options.jl
index 52bfb1237a858..9d08af940136f 100644
--- a/base/options.jl
+++ b/base/options.jl
@@ -89,3 +89,7 @@ function unsafe_load_commands(v::Ptr{Ptr{UInt8}})
     end
     return cmds
 end
+
+function is_file_tracked(file::Symbol)
+    return ccall(:jl_is_file_tracked, Cint, (Any,), file) == 1
+end
diff --git a/deps/Versions.make b/deps/Versions.make
index 1d510ee4911b6..77d568ee7c6b5 100644
--- a/deps/Versions.make
+++ b/deps/Versions.make
@@ -75,7 +75,7 @@ OBJCONV_JLL_NAME := Objconv
 OBJCONV_JLL_VER  := 2.49.1+0
 
 # blastrampoline
-BLASTRAMPOLINE_VER := 5.0.1
+BLASTRAMPOLINE_VER := 5.1.0
 BLASTRAMPOLINE_JLL_NAME := libblastrampoline
 
 # OpenBLAS
diff --git a/deps/checksums/blastrampoline b/deps/checksums/blastrampoline
index d97f2fedd926b..3b5e4359e43ec 100644
--- a/deps/checksums/blastrampoline
+++ b/deps/checksums/blastrampoline
@@ -1,34 +1,34 @@
 blastrampoline-d32042273719672c6669f6442a0be5605d434b70.tar.gz/md5/f380e4238a2dec186ecfe9598f75b824
 blastrampoline-d32042273719672c6669f6442a0be5605d434b70.tar.gz/sha512/00437a96b57d99cef946257480e38e1dfdf325c46bc4a1619f5067565dfb7d9f668b0c8415badb0879b933cb1972f3c4e6be4c9e63a8a85728033e2183373819
-libblastrampoline.v5.0.1+0.aarch64-apple-darwin.tar.gz/md5/8b2b28517ef5db95a0b440f1a936422e
-libblastrampoline.v5.0.1+0.aarch64-apple-darwin.tar.gz/sha512/3d479efc47b8c81fa85fd4d2a868a48304051432b92af90a2bcd2142673f2c422419731b8941f987aed429064532e8634ce3ea8f8d71222cf2d9b9e1e8ba2f7f
-libblastrampoline.v5.0.1+0.aarch64-linux-gnu.tar.gz/md5/23e53049a0c30c8d24482a25954ee497
-libblastrampoline.v5.0.1+0.aarch64-linux-gnu.tar.gz/sha512/c5ba3609e5c47066d8a10acdd1c13e25a78bea6003a39a354000c832aeb1cf04a29392089600b10f0d6e5544aa910412bb50f238ac1f81d55ac15f70aaeb3161
-libblastrampoline.v5.0.1+0.aarch64-linux-musl.tar.gz/md5/5b6770a56cf3632473726a6da3da8ac4
-libblastrampoline.v5.0.1+0.aarch64-linux-musl.tar.gz/sha512/13f01e51b754a7bb4f78d0217380923e353499815872694718922a842fb1d41774e83ec07305b0ca9df2b054e26a626c20e685127e467b3bbb5adb3b9de3c7d3
-libblastrampoline.v5.0.1+0.armv6l-linux-gnueabihf.tar.gz/md5/32f33430008184705b37afcce7d09fdc
-libblastrampoline.v5.0.1+0.armv6l-linux-gnueabihf.tar.gz/sha512/2af9ce233b473f2c81c4ba2e82253a88a519e4cbfa2cd410b27b1f1d7d06559376dd3743951105dbaa784310cce378516978b0d56bd8a196e2eb6c5fb7e6e969
-libblastrampoline.v5.0.1+0.armv6l-linux-musleabihf.tar.gz/md5/52da6ab8b5a9e03aebb032441b668d65
-libblastrampoline.v5.0.1+0.armv6l-linux-musleabihf.tar.gz/sha512/a6f1a375b61642e0b2fd235e27b5bf7e0cd1ff308cdfef27b904f62dfb9ac2bc8fa4e9a7869851310da90af4797994d86d581354070810ffedf3deea5afcc388
-libblastrampoline.v5.0.1+0.armv7l-linux-gnueabihf.tar.gz/md5/08fe2bf27a14e6a6fc4f6b394051aac9
-libblastrampoline.v5.0.1+0.armv7l-linux-gnueabihf.tar.gz/sha512/fdf8d054c67ca3e60dfc7739e02e28817d4510558341d9666b3ccc35818d1ea835a30676cfbe66bbb119c5574c683f1626088119dfc672bf730c87811835e48e
-libblastrampoline.v5.0.1+0.armv7l-linux-musleabihf.tar.gz/md5/836fdbe9e759c71b3c7ae6be2ff6cd6a
-libblastrampoline.v5.0.1+0.armv7l-linux-musleabihf.tar.gz/sha512/6333f8b9270f51c9e520ef8eee52c1796659bd7574c4e8cc04774d3b5e0574af63e5b252cc3340a62bf44771970331839083a528f402bc7929f32cffdbeba876
-libblastrampoline.v5.0.1+0.i686-linux-gnu.tar.gz/md5/11f127c422a4b51d6cd0abe370176c25
-libblastrampoline.v5.0.1+0.i686-linux-gnu.tar.gz/sha512/ad8510a804637ed144ee931a11629ee86e3c29e36be394c1f020a04e68b64a04a0eaa976961a993be0693b1f57b687f18dd25d3313aafa217a9140913dc9849d
-libblastrampoline.v5.0.1+0.i686-linux-musl.tar.gz/md5/c865cd79d083de137714df55dfd015c9
-libblastrampoline.v5.0.1+0.i686-linux-musl.tar.gz/sha512/99f4938626f84e5636231f34842c6877be5ac0d528f7bcae6b15d51b82d0daa06eb7d086a28f2c516234a989dd384f932886303f13cbac33f972fbf64b16dfb9
-libblastrampoline.v5.0.1+0.i686-w64-mingw32.tar.gz/md5/e9e2cbb1c90b691fd06f4df81674d36a
-libblastrampoline.v5.0.1+0.i686-w64-mingw32.tar.gz/sha512/c32a7449476f994f8d1bdb576959d6cc54018ac33be2d691b8627467ff5870deac7427e83f275db9b675c5d92bd13254979b06da33b782d6de3b49b1a6dda19c
-libblastrampoline.v5.0.1+0.powerpc64le-linux-gnu.tar.gz/md5/5904dce9e258e4bdf71493e6cdc5fb20
-libblastrampoline.v5.0.1+0.powerpc64le-linux-gnu.tar.gz/sha512/e10761289aaf985e96e0c908f988218450b54b78a5ba0ca67b509d63c422849471b38e952b93e1de0ffa92d9b8e76b16cfd733a05940203213f7f10cdb953dc9
-libblastrampoline.v5.0.1+0.x86_64-apple-darwin.tar.gz/md5/2d15a24ce47dc67ef575ca514530734e
-libblastrampoline.v5.0.1+0.x86_64-apple-darwin.tar.gz/sha512/5209953e6ed72c5840b926c2c50e67f3ef2e8612877e5c6c4962e687870a9c4f95ab83fab1db77419ffdd21e303e5a951a86d21979cbd2e2e8b9d65a2b86a693
-libblastrampoline.v5.0.1+0.x86_64-linux-gnu.tar.gz/md5/67092e794898efbe1d75bbaf19912538
-libblastrampoline.v5.0.1+0.x86_64-linux-gnu.tar.gz/sha512/cc117c4d6d7a34fc7abfff4d40584f63b3ed80a2aa8be887f22a65b25e9196a2173d624bda77e8a1f2c401792c090948ad0a9069af3e48ee886664e1b2dd771f
-libblastrampoline.v5.0.1+0.x86_64-linux-musl.tar.gz/md5/32f65fa0681d81ab4f5a84d18ec0ef40
-libblastrampoline.v5.0.1+0.x86_64-linux-musl.tar.gz/sha512/177f25c3108af15653726871b110d77e0a5e94b06bd6996503f83b7dd7c0d9877beff5eeadbdff4952ac606fcec426c04a97566efc2d88d75ed380e566ffe0c0
-libblastrampoline.v5.0.1+0.x86_64-unknown-freebsd.tar.gz/md5/12494ac279b869c740712b8f774edadf
-libblastrampoline.v5.0.1+0.x86_64-unknown-freebsd.tar.gz/sha512/6b896996f20552bb05d22fb314b6b9ad8e4359aec31f90fe7029cd13d37e6db1c305a87d9622ff4b036b155a12a5b305a8fd56e4074149bad8c3e6a225c70c5d
-libblastrampoline.v5.0.1+0.x86_64-w64-mingw32.tar.gz/md5/4fdbfc6384ba4dbc74eda97dff919511
-libblastrampoline.v5.0.1+0.x86_64-w64-mingw32.tar.gz/sha512/e752486b9e6f6ed293a42337f432c8dcb86246523864674be5ff35fcc49f8cc848f77c41b2af1903959938f620d68b1de6028afc662f9e893045308eef72d624
+libblastrampoline.v5.1.0+0.aarch64-apple-darwin.tar.gz/md5/edf090a17d862c33d611875058438757
+libblastrampoline.v5.1.0+0.aarch64-apple-darwin.tar.gz/sha512/a3413c7d46c04318a5bebf10d6f930d04b5997d4be6be4e2748a7b60f968d2f2be7de140eee6c699962a12e8439f68f144e5323dea17d91587e82f97aaaaaa24
+libblastrampoline.v5.1.0+0.aarch64-linux-gnu.tar.gz/md5/fe88a410d795f805756488915679edbd
+libblastrampoline.v5.1.0+0.aarch64-linux-gnu.tar.gz/sha512/cbd31304278ea67ddc0f766c4647275c87829cf5377c3851153b7568015f4f016fd0f3e095f479c33d23a50f4af8c38bae4555b02dcbf45a04b6e5a0dd3504a8
+libblastrampoline.v5.1.0+0.aarch64-linux-musl.tar.gz/md5/d4d8c393eb28953297b37a7bae79ed2e
+libblastrampoline.v5.1.0+0.aarch64-linux-musl.tar.gz/sha512/3b5dca87e089ac10486f75663b4cf7d404c71b040231b04e1ec5110d13f30ac620b4cb880040106273866d465da9bdda5643887534de8e35668a7ab545422216
+libblastrampoline.v5.1.0+0.armv6l-linux-gnueabihf.tar.gz/md5/8b5f2fbd5442bf31bd10836ffd177968
+libblastrampoline.v5.1.0+0.armv6l-linux-gnueabihf.tar.gz/sha512/f1d6314c785afc0aaa3ebcf8a532312e676ca41d427b9c2abdea88c700df4d6a7cb5cfa54d65493e5c3d711a64062a20a5de7e3b75feee0653115cee7de05446
+libblastrampoline.v5.1.0+0.armv6l-linux-musleabihf.tar.gz/md5/8ed3013c644ab3be5dce013fb23fd413
+libblastrampoline.v5.1.0+0.armv6l-linux-musleabihf.tar.gz/sha512/da40cbb0114d46a66ae41284d36dc855aa52dcd3993643858308f18c5d8eedbf92fc8ee57d3e3cc2153f29670b40bc03a8dd01d5b49dde210c8a7a2d471a59b7
+libblastrampoline.v5.1.0+0.armv7l-linux-gnueabihf.tar.gz/md5/23b8ef9ea92a8d474646d814c0c91577
+libblastrampoline.v5.1.0+0.armv7l-linux-gnueabihf.tar.gz/sha512/97789adc18a54b953ce8696b484a4314e734a8092a27f81f43c1ae269b592b18ba7c67082396220a1906ffb075895c34462be976e0059aded9f6a6948abb1672
+libblastrampoline.v5.1.0+0.armv7l-linux-musleabihf.tar.gz/md5/d5a47ebe37a4a234ee6a4f3cf830e8c5
+libblastrampoline.v5.1.0+0.armv7l-linux-musleabihf.tar.gz/sha512/65366692c074576733e3b3f15d011e326d6a1e2357055a1a0159db31cdd7d5ff0e9aba9a33c1f2a949e128ac10b72776a3f76907df4cadcf7e67ace934cf4ef0
+libblastrampoline.v5.1.0+0.i686-linux-gnu.tar.gz/md5/14a342ab1bd16ef61d747e99acc97e6a
+libblastrampoline.v5.1.0+0.i686-linux-gnu.tar.gz/sha512/8eca984912e69af769f06cd2b38d1df9d724e4e42d6d5b2fcb77a8e74b2aa9f9c31beb36d634e5da28d4d2f0838957f5c5cd336db616768d8ffb60217fe92edc
+libblastrampoline.v5.1.0+0.i686-linux-musl.tar.gz/md5/201e6c737df0c0e2f4327c395133969f
+libblastrampoline.v5.1.0+0.i686-linux-musl.tar.gz/sha512/778daa7a0d3a6fb8d6480a14123e874009f0fdc5f1d3411518f8d9975c45ca418e88d71db72af8465d4064f4c177d0abb70bc568df3a4c765eed7c5aeddca428
+libblastrampoline.v5.1.0+0.i686-w64-mingw32.tar.gz/md5/8ddf4dec49fac4888f94f90143126e5f
+libblastrampoline.v5.1.0+0.i686-w64-mingw32.tar.gz/sha512/388b797f4c86f0ea090058acaff0eed34c42d45092c001410d11a4a4da93668c1729453290872cd44615ee517d62546f4dc42005240a6c36e40e7152f5c9cf5c
+libblastrampoline.v5.1.0+0.powerpc64le-linux-gnu.tar.gz/md5/db626123ab94b489ac8b4d395b2f5cf4
+libblastrampoline.v5.1.0+0.powerpc64le-linux-gnu.tar.gz/sha512/8c96f518dea82057fe85bdb2ee867cc7abc33e9c53fe94dd84d097a16268630c22082db7fc003dadfc4749400f3465564088e05cabd6844c31b870319432c433
+libblastrampoline.v5.1.0+0.x86_64-apple-darwin.tar.gz/md5/65b9aae2f749ec608b61412aa1921d65
+libblastrampoline.v5.1.0+0.x86_64-apple-darwin.tar.gz/sha512/38e974c9260614d855b0b13f78e72bbd65aa889e88101d25441dd4e78ce37baf81bab7de1950d71d8e35b32d62fb88ac9c3f39ab5a4aff11d00619441bc003f8
+libblastrampoline.v5.1.0+0.x86_64-linux-gnu.tar.gz/md5/0ab01f256277b4ea96f6d83c50891b99
+libblastrampoline.v5.1.0+0.x86_64-linux-gnu.tar.gz/sha512/2b2178d74beb1c12e348f6469777d31116f26229c243d5e08a6ac36a74c3eb38854c1d82429d0e7cabee259d0d5220c47c334a561ea5caac6f61d91aa6b34f52
+libblastrampoline.v5.1.0+0.x86_64-linux-musl.tar.gz/md5/52a9da4586daa6572b8fe2c13db6268a
+libblastrampoline.v5.1.0+0.x86_64-linux-musl.tar.gz/sha512/04abc5a0b6f80f10d1fccceee8a0e1c58aba76a45e3f6662ce4115d9d39d20dd05b3859434037d21bf6c5088a5a428565cd86e1cf6d1676666ce7e3eb1921b80
+libblastrampoline.v5.1.0+0.x86_64-unknown-freebsd.tar.gz/md5/f2b66517937a7647086ba96acc81c6a6
+libblastrampoline.v5.1.0+0.x86_64-unknown-freebsd.tar.gz/sha512/c19654b97928bdba36ccf3dbecf8ca994a46929c29c5c120d2d17062128a3df8927230fe7c418d6f780557abb8ce94b6a6a023bddcd3aeb91c8302cdbfe2b39e
+libblastrampoline.v5.1.0+0.x86_64-w64-mingw32.tar.gz/md5/4b50ad8399c733ee5d60ce1ad00e1e5e
+libblastrampoline.v5.1.0+0.x86_64-w64-mingw32.tar.gz/sha512/6a0f1d061350d53dd2a030ba11a0ac02c5ae598cd2c21dda39f95d81a2b0f43a454d60cf32c2fc0546df074181100e2d247d229d62c4a6b94bc7b697b02f0e0e
diff --git a/deps/csl.mk b/deps/csl.mk
index 9f95c00f3cfe7..1940984fdc199 100644
--- a/deps/csl.mk
+++ b/deps/csl.mk
@@ -1,6 +1,6 @@
 # Interrogate the fortran compiler (which is always GCC based) on where it is keeping its libraries
-STD_LIB_PATH := $(shell LANG=C $(FC) -print-search-dirs | grep '^programs: =' | sed -e "s/^programs: =//")
-STD_LIB_PATH += :$(shell LANG=C $(FC) -print-search-dirs | grep '^libraries: =' | sed -e "s/^libraries: =//")
+STD_LIB_PATH := $(shell LANG=C $(FC) -print-search-dirs 2>/dev/null | grep '^programs: =' | sed -e "s/^programs: =//")
+STD_LIB_PATH += :$(shell LANG=C $(FC) -print-search-dirs 2>/dev/null | grep '^libraries: =' | sed -e "s/^libraries: =//")
 ifneq (,$(findstring CYGWIN,$(BUILD_OS))) # the cygwin-mingw32 compiler lies about it search directory paths
 STD_LIB_PATH := $(shell echo '$(STD_LIB_PATH)' | sed -e "s!/lib/!/bin/!g")
 endif
diff --git a/src/dump.c b/src/dump.c
index 956466ac765b2..f04a6b27d8099 100644
--- a/src/dump.c
+++ b/src/dump.c
@@ -618,8 +618,8 @@ static void jl_serialize_code_instance(jl_serializer_state *s, jl_code_instance_
 
     write_uint8(s->s, TAG_CODE_INSTANCE);
     write_uint8(s->s, flags);
-    write_uint8(s->s, codeinst->ipo_purity_bits);
-    write_uint8(s->s, codeinst->purity_bits);
+    write_uint32(s->s, codeinst->ipo_purity_bits);
+    write_uint32(s->s, codeinst->purity_bits);
     jl_serialize_value(s, (jl_value_t*)codeinst->def);
     if (write_ret_type) {
         jl_serialize_value(s, codeinst->inferred);
@@ -1829,8 +1829,8 @@ static jl_value_t *jl_deserialize_value_code_instance(jl_serializer_state *s, jl
     int flags = read_uint8(s->s);
     int validate = (flags >> 0) & 3;
     int constret = (flags >> 2) & 1;
-    codeinst->ipo_purity_bits = read_uint8(s->s);
-    codeinst->purity_bits = read_uint8(s->s);
+    codeinst->ipo_purity_bits = read_uint32(s->s);
+    codeinst->purity_bits = read_uint32(s->s);
     codeinst->def = (jl_method_instance_t*)jl_deserialize_value(s, (jl_value_t**)&codeinst->def);
     jl_gc_wb(codeinst, codeinst->def);
     codeinst->inferred = jl_deserialize_value(s, &codeinst->inferred);
diff --git a/src/gf.c b/src/gf.c
index 964e4d1e01963..a37883251e9c2 100644
--- a/src/gf.c
+++ b/src/gf.c
@@ -220,7 +220,7 @@ JL_DLLEXPORT jl_code_instance_t* jl_new_codeinst(
         jl_method_instance_t *mi, jl_value_t *rettype,
         jl_value_t *inferred_const, jl_value_t *inferred,
         int32_t const_flags, size_t min_world, size_t max_world,
-        uint8_t ipo_effects, uint8_t effects, jl_value_t *argescapes,
+        uint32_t ipo_effects, uint32_t effects, jl_value_t *argescapes,
         uint8_t relocatability);
 JL_DLLEXPORT void jl_mi_cache_insert(jl_method_instance_t *mi JL_ROOTING_ARGUMENT,
                                      jl_code_instance_t *ci JL_ROOTED_ARGUMENT JL_MAYBE_UNROOTED);
@@ -390,7 +390,7 @@ JL_DLLEXPORT jl_code_instance_t *jl_new_codeinst(
         jl_method_instance_t *mi, jl_value_t *rettype,
         jl_value_t *inferred_const, jl_value_t *inferred,
         int32_t const_flags, size_t min_world, size_t max_world,
-        uint8_t ipo_effects, uint8_t effects, jl_value_t *argescapes,
+        uint32_t ipo_effects, uint32_t effects, jl_value_t *argescapes,
         uint8_t relocatability
         /*, jl_array_t *edges, int absolute_max*/)
 {
diff --git a/src/init.c b/src/init.c
index 6bebffdcf326c..98d5081c1daaf 100644
--- a/src/init.c
+++ b/src/init.c
@@ -597,6 +597,13 @@ static void jl_resolve_sysimg_location(JL_IMAGE_SEARCH rel)
     }
 }
 
+JL_DLLEXPORT int jl_is_file_tracked(jl_sym_t *path)
+{
+    const char* path_ = jl_symbol_name(path);
+    int tpath_len = strlen(jl_options.tracked_path);
+    return (strlen(path_) >= tpath_len) && (strncmp(path_, jl_options.tracked_path, tpath_len) == 0);
+}
+
 static void jl_set_io_wait(int v)
 {
     jl_task_t *ct = jl_current_task;
diff --git a/src/jltypes.c b/src/jltypes.c
index e19b4c536d94d..7d8b1ac6bb32b 100644
--- a/src/jltypes.c
+++ b/src/jltypes.c
@@ -2520,7 +2520,7 @@ void jl_init_types(void) JL_GC_DISABLED
                             jl_any_type,
                             //jl_any_type,
                             //jl_bool_type,
-			                jl_uint8_type, jl_uint8_type,
+                            jl_uint32_type, jl_uint32_type,
                             jl_any_type,
                             jl_bool_type,
                             jl_bool_type,
diff --git a/src/julia.h b/src/julia.h
index 5e22c262fb47d..b15008dee91e3 100644
--- a/src/julia.h
+++ b/src/julia.h
@@ -394,21 +394,23 @@ typedef struct _jl_code_instance_t {
 
     // purity results
     union {
-        uint8_t ipo_purity_bits;
+        uint32_t ipo_purity_bits;
         struct {
             uint8_t ipo_consistent:2;
             uint8_t ipo_effect_free:2;
             uint8_t ipo_nothrow:2;
             uint8_t ipo_terminates:2;
+            uint8_t ipo_overlayed:1;
         } ipo_purity_flags;
     };
     union {
-        uint8_t purity_bits;
+        uint32_t purity_bits;
         struct {
             uint8_t consistent:2;
             uint8_t effect_free:2;
             uint8_t nothrow:2;
             uint8_t terminates:2;
+            uint8_t overlayed:1;
         } purity_flags;
     };
     jl_value_t *argescapes; // escape information of call arguments
diff --git a/src/module.c b/src/module.c
index 8f37cc00b1bd6..63dff3ae6deb7 100644
--- a/src/module.c
+++ b/src/module.c
@@ -528,7 +528,7 @@ static void module_import_(jl_module_t *to, jl_module_t *from, jl_sym_t *s, jl_s
             }
         }
         else {
-            jl_binding_t *nb = new_binding(s);
+            jl_binding_t *nb = new_binding(b->name);
             nb->owner = b->owner;
             nb->imported = (explici!=0);
             nb->deprecated = b->deprecated;
diff --git a/src/serialize.h b/src/serialize.h
index 63d7c2d360951..817591b989f93 100644
--- a/src/serialize.h
+++ b/src/serialize.h
@@ -109,6 +109,18 @@ static uint16_t read_uint16(ios_t *s) JL_NOTSAFEPOINT
     return x;
 }
 
+static void write_uint32(ios_t *s, uint32_t i) JL_NOTSAFEPOINT
+{
+    ios_write(s, (char*)&i, 4);
+}
+
+static uint32_t read_uint32(ios_t *s) JL_NOTSAFEPOINT
+{
+    uint32_t x = 0;
+    ios_read(s, (char*)&x, 4);
+    return x;
+}
+
 void *jl_lookup_ser_tag(jl_value_t *v);
 void *jl_lookup_common_symbol(jl_value_t *v);
 jl_value_t *jl_deser_tag(uint8_t tag);
diff --git a/src/support/dirpath.h b/src/support/dirpath.h
index 57c7927f14d55..b2314d571c649 100644
--- a/src/support/dirpath.h
+++ b/src/support/dirpath.h
@@ -6,14 +6,17 @@
 #ifdef _OS_WINDOWS_
 #define PATHSEPSTRING "\\"
 #define PATHLISTSEPSTRING ";"
+#if defined(PATH_MAX)
 #define JL_PATH_MAX PATH_MAX
-#if defined(_COMPILER_CLANG_)
+#else // _COMPILER_CLANG_ may have the name reversed
 #define JL_PATH_MAX MAX_PATH
 #endif
 #else
 #define PATHSEPSTRING "/"
 #define PATHLISTSEPSTRING ":"
-#ifndef JL_PATH_MAX // many platforms don't have a max path, we define one anyways
+#if defined(PATH_MAX)
+#define JL_PATH_MAX PATH_MAX
+#else // many platforms don't have a max path, we define one anyways
 #define JL_PATH_MAX 1024
 #endif
 #endif
diff --git a/stdlib/LibGit2/test/libgit2.jl b/stdlib/LibGit2/test/libgit2.jl
index 2357536e19b5c..2a74ed4908dfc 100644
--- a/stdlib/LibGit2/test/libgit2.jl
+++ b/stdlib/LibGit2/test/libgit2.jl
@@ -634,7 +634,7 @@ mktempdir() do dir
     commit_oid1 = LibGit2.GitHash()
     commit_oid2 = LibGit2.GitHash()
     commit_oid3 = LibGit2.GitHash()
-    master_branch = "master"
+    default_branch = LibGit2.getconfig("init.defaultBranch", "master")
     test_branch = "test_branch"
     test_branch2 = "test_branch_two"
     tag1 = "tag1"
@@ -958,19 +958,19 @@ mktempdir() do dir
                     # various branch properties
                     @test LibGit2.isbranch(brref)
                     @test !LibGit2.isremote(brref)
-                    @test LibGit2.name(brref) == "refs/heads/master"
-                    @test LibGit2.shortname(brref) == master_branch
+                    @test LibGit2.name(brref) == "refs/heads/$(default_branch)"
+                    @test LibGit2.shortname(brref) == default_branch
                     @test LibGit2.ishead(brref)
                     @test LibGit2.upstream(brref) === nothing
 
                     # showing the GitReference to this branch
                     show_strs = split(sprint(show, brref), "\n")
                     @test show_strs[1] == "GitReference:"
-                    @test show_strs[2] == "Branch with name refs/heads/master"
+                    @test show_strs[2] == "Branch with name refs/heads/$(default_branch)"
                     @test show_strs[3] == "Branch is HEAD."
                     @test repo.ptr == LibGit2.repository(brref).ptr
-                    @test brnch == master_branch
-                    @test LibGit2.headname(repo) == master_branch
+                    @test brnch == default_branch
+                    @test LibGit2.headname(repo) == default_branch
 
                     # create a branch *without* setting its tip as HEAD
                     LibGit2.branch!(repo, test_branch, string(commit_oid1), set_head=false)
@@ -991,7 +991,7 @@ mktempdir() do dir
                     end
                 end
                 branches = map(b->LibGit2.shortname(b[1]), LibGit2.GitBranchIter(repo))
-                @test master_branch in branches
+                @test default_branch in branches
                 @test test_branch in branches
             end
         end
@@ -1050,7 +1050,7 @@ mktempdir() do dir
                 @test tag2 in tags
 
                 refs = LibGit2.ref_list(repo)
-                @test refs == ["refs/heads/master", "refs/heads/test_branch", "refs/tags/tag1", "refs/tags/tag2"]
+                @test refs == ["refs/heads/$(default_branch)", "refs/heads/test_branch", "refs/tags/tag1", "refs/tags/tag2"]
                 # test deleting a tag
                 LibGit2.tag_delete(repo, tag1)
                 tags = LibGit2.tag_list(repo)
@@ -1334,7 +1334,7 @@ mktempdir() do dir
             add_and_commit_file(repo, "file1", "111\n")
             # switch back, add a commit, try to merge
             # from branch/merge_a
-            LibGit2.branch!(repo, "master")
+            LibGit2.branch!(repo, default_branch)
 
             # test for showing a Reference to a non-HEAD branch
             brref = LibGit2.GitReference(repo, "refs/heads/branch/merge_a")
@@ -1347,7 +1347,7 @@ mktempdir() do dir
 
             add_and_commit_file(repo, "file2", "222\n")
             upst_ann = LibGit2.GitAnnotated(repo, "branch/merge_a")
-            head_ann = LibGit2.GitAnnotated(repo, "master")
+            head_ann = LibGit2.GitAnnotated(repo, default_branch)
 
             # (fail to) merge them because we can't fastforward
             @test_logs (:warn,"Cannot perform fast-forward merge") !LibGit2.merge!(repo, [upst_ann], true)
@@ -1360,7 +1360,7 @@ mktempdir() do dir
             mv(joinpath(LibGit2.path(repo),"file1"),joinpath(LibGit2.path(repo),"mvfile1"))
             LibGit2.add!(repo, "mvfile1")
             LibGit2.commit(repo, "move file1")
-            LibGit2.branch!(repo, "master")
+            LibGit2.branch!(repo, default_branch)
             upst_ann = LibGit2.GitAnnotated(repo, "branch/merge_b")
             rename_flag = Cint(0)
             rename_flag = LibGit2.toggle(rename_flag, Cint(0)) # turns on the find renames opt
@@ -1438,14 +1438,14 @@ mktempdir() do dir
             # the rebase should fail.
             @test_throws LibGit2.GitError LibGit2.rebase!(repo)
             # Try rebasing on master instead
-            newhead = LibGit2.rebase!(repo, master_branch)
+            newhead = LibGit2.rebase!(repo, default_branch)
             @test newhead == head_oid
 
             # Switch to the master branch
-            LibGit2.branch!(repo, master_branch)
+            LibGit2.branch!(repo, default_branch)
 
             fetch_heads = LibGit2.fetchheads(repo)
-            @test fetch_heads[1].name == "refs/heads/master"
+            @test fetch_heads[1].name == "refs/heads/$(default_branch)"
             @test fetch_heads[1].ismerge == true # we just merged master
             @test fetch_heads[2].name == "refs/heads/test_branch"
             @test fetch_heads[2].ismerge == false
@@ -1485,7 +1485,7 @@ mktempdir() do dir
 
                 # all tag in place
                 branches = map(b->LibGit2.shortname(b[1]), LibGit2.GitBranchIter(repo))
-                @test master_branch in branches
+                @test default_branch in branches
                 @test test_branch in branches
 
                 # issue #16337
@@ -1683,7 +1683,7 @@ mktempdir() do dir
             # add yet another file
             add_and_commit_file(repo, "file4", "444\n")
             # rebase with onto
-            newhead = LibGit2.rebase!(repo, "branch/a", "master")
+            newhead = LibGit2.rebase!(repo, "branch/a", default_branch)
 
             newerhead = LibGit2.head_oid(repo)
             @test newerhead == newhead
@@ -1693,7 +1693,7 @@ mktempdir() do dir
             pre_abort_head = add_and_commit_file(repo, "file6", "666\n")
             # Rebase type
             head_ann = LibGit2.GitAnnotated(repo, "branch/a")
-            upst_ann = LibGit2.GitAnnotated(repo, "master")
+            upst_ann = LibGit2.GitAnnotated(repo, default_branch)
             rb = LibGit2.GitRebase(repo, head_ann, upst_ann)
             @test_throws BoundsError rb[3]
             @test_throws BoundsError rb[0]
@@ -1718,7 +1718,7 @@ mktempdir() do dir
 
             a_head = LibGit2.head_oid(repo)
             add_and_commit_file(repo, "merge_file1", "111\n")
-            LibGit2.branch!(repo, "master")
+            LibGit2.branch!(repo, default_branch)
             a_head_ann = LibGit2.GitAnnotated(repo, "branch/merge_a")
             # merge returns true if successful
             @test_logs (:info,"Review and commit merged changes") LibGit2.merge!(repo, [a_head_ann])
@@ -1751,7 +1751,7 @@ mktempdir() do dir
             close(repo_file)
             # and checkout HEAD once more
             LibGit2.checkout_head(repo, options=LibGit2.CheckoutOptions(checkout_strategy=LibGit2.Consts.CHECKOUT_FORCE))
-            @test LibGit2.headname(repo) == master_branch
+            @test LibGit2.headname(repo) == default_branch
             @test !LibGit2.isdirty(repo)
         end
     end
diff --git a/stdlib/LinearAlgebra/src/blas.jl b/stdlib/LinearAlgebra/src/blas.jl
index 9b5506e993ea8..7d7f77c282bc0 100644
--- a/stdlib/LinearAlgebra/src/blas.jl
+++ b/stdlib/LinearAlgebra/src/blas.jl
@@ -161,12 +161,26 @@ end
 "Check that upper/lower (for special matrices) is correctly specified"
 function chkuplo(uplo::AbstractChar)
     if !(uplo == 'U' || uplo == 'L')
-        throw(ArgumentError("uplo argument must be 'U' (upper) or 'L' (lower), got $uplo"))
+        throw(ArgumentError(lazy"uplo argument must be 'U' (upper) or 'L' (lower), got $uplo"))
     end
     uplo
 end
 
 # Level 1
+# A help function to pick the pointer and inc for 1d like inputs.
+@inline function vec_pointer_stride(x::AbstractArray, stride0check = nothing)
+    isdense(x) && return pointer(x), 1 # simpify runtime check when possibe
+    ndims(x) == 1 || strides(x) == Base.size_to_strides(stride(x, 1), size(x)...) ||
+        throw(ArgumentError("only support vector like inputs"))
+    st = stride(x, 1)
+    isnothing(stride0check) || (st == 0 && throw(stride0check))
+    ptr = st > 0 ? pointer(x) : pointer(x, lastindex(x))
+    ptr, st
+end
+isdense(x) = x isa DenseArray
+isdense(x::Base.FastContiguousSubArray) = isdense(parent(x))
+isdense(x::Base.ReshapedArray) = isdense(parent(x))
+isdense(x::Base.ReinterpretArray) = isdense(parent(x))
 ## copy
 
 """
@@ -257,7 +271,11 @@ for (fname, elty) in ((:dscal_,:Float64),
             DX
         end
 
-        scal!(DA::$elty, DX::AbstractArray{$elty}) = scal!(length(DX),DA,DX,stride(DX,1))
+        function scal!(DA::$elty, DX::AbstractArray{$elty})
+            p, st = vec_pointer_stride(DX, ArgumentError("dest vector with 0 stride is not allowed"))
+            GC.@preserve DX scal!(length(DX), DA, p, abs(st))
+            DX
+        end
     end
 end
 scal(n, DA, DX, incx) = scal!(n, DA, copy(DX), incx)
@@ -361,73 +379,16 @@ for (fname, elty) in ((:cblas_zdotu_sub,:ComplexF64),
     end
 end
 
-@inline function _dot_length_check(x,y)
-    n = length(x)
-    if n != length(y)
-        throw(DimensionMismatch("dot product arguments have lengths $(length(x)) and $(length(y))"))
-    end
-    n
-end
-
 for (elty, f) in ((Float32, :dot), (Float64, :dot),
                   (ComplexF32, :dotc), (ComplexF64, :dotc),
                   (ComplexF32, :dotu), (ComplexF64, :dotu))
     @eval begin
-        function $f(x::DenseArray{$elty}, y::DenseArray{$elty})
-            n = _dot_length_check(x,y)
-            $f(n, x, 1, y, 1)
-        end
-
-        function $f(x::StridedVector{$elty}, y::DenseArray{$elty})
-            n = _dot_length_check(x,y)
-            xstride = stride(x,1)
-            ystride = stride(y,1)
-            x_delta = xstride < 0 ? n : 1
-            GC.@preserve x $f(n, pointer(x, x_delta), xstride, y, ystride)
+        function $f(x::AbstractArray{$elty}, y::AbstractArray{$elty})
+            n, m = length(x), length(y)
+            n == m || throw(DimensionMismatch(lazy"dot product arguments have lengths $n and $m"))
+            GC.@preserve x y $f(n, vec_pointer_stride(x)..., vec_pointer_stride(y)...)
         end
-
-        function $f(x::DenseArray{$elty}, y::StridedVector{$elty})
-            n = _dot_length_check(x,y)
-            xstride = stride(x,1)
-            ystride = stride(y,1)
-            y_delta = ystride < 0 ? n : 1
-            GC.@preserve y $f(n, x, xstride, pointer(y, y_delta), ystride)
-        end
-
-        function $f(x::StridedVector{$elty}, y::StridedVector{$elty})
-            n = _dot_length_check(x,y)
-            xstride = stride(x,1)
-            ystride = stride(y,1)
-            x_delta = xstride < 0 ? n : 1
-            y_delta = ystride < 0 ? n : 1
-            GC.@preserve x y $f(n, pointer(x, x_delta), xstride, pointer(y, y_delta), ystride)
-        end
-    end
-end
-
-function dot(DX::Union{DenseArray{T},AbstractVector{T}}, DY::Union{DenseArray{T},AbstractVector{T}}) where T<:BlasReal
-    require_one_based_indexing(DX, DY)
-    n = length(DX)
-    if n != length(DY)
-        throw(DimensionMismatch(lazy"dot product arguments have lengths $(length(DX)) and $(length(DY))"))
-    end
-    return dot(n, DX, stride(DX, 1), DY, stride(DY, 1))
-end
-function dotc(DX::Union{DenseArray{T},AbstractVector{T}}, DY::Union{DenseArray{T},AbstractVector{T}}) where T<:BlasComplex
-    require_one_based_indexing(DX, DY)
-    n = length(DX)
-    if n != length(DY)
-        throw(DimensionMismatch(lazy"dot product arguments have lengths $(length(DX)) and $(length(DY))"))
     end
-    return dotc(n, DX, stride(DX, 1), DY, stride(DY, 1))
-end
-function dotu(DX::Union{DenseArray{T},AbstractVector{T}}, DY::Union{DenseArray{T},AbstractVector{T}}) where T<:BlasComplex
-    require_one_based_indexing(DX, DY)
-    n = length(DX)
-    if n != length(DY)
-        throw(DimensionMismatch(lazy"dot product arguments have lengths $(length(DX)) and $(length(DY))"))
-    end
-    return dotu(n, DX, stride(DX, 1), DY, stride(DY, 1))
 end
 
 ## nrm2
@@ -461,7 +422,11 @@ for (fname, elty, ret_type) in ((:dnrm2_,:Float64,:Float64),
         end
     end
 end
-nrm2(x::Union{AbstractVector,DenseArray}) = nrm2(length(x), x, stride1(x))
+# openblas returns 0 for negative stride
+function nrm2(x::AbstractArray)
+    p, st = vec_pointer_stride(x, ArgumentError("input vector with 0 stride is not allowed"))
+    GC.@preserve x nrm2(length(x), p, abs(st))
+end
 
 ## asum
 
@@ -498,7 +463,10 @@ for (fname, elty, ret_type) in ((:dasum_,:Float64,:Float64),
         end
     end
 end
-asum(x::Union{AbstractVector,DenseArray}) = asum(length(x), x, stride1(x))
+function asum(x::AbstractArray)
+    p, st = vec_pointer_stride(x, ArgumentError("input vector with 0 stride is not allowed"))
+    GC.@preserve x asum(length(x), p, abs(st))
+end
 
 ## axpy
 
@@ -542,15 +510,17 @@ for (fname, elty) in ((:daxpy_,:Float64),
         end
     end
 end
-function axpy!(alpha::Number, x::Union{DenseArray{T},StridedVector{T}}, y::Union{DenseArray{T},StridedVector{T}}) where T<:BlasFloat
+function axpy!(alpha::Number, x::AbstractArray{T}, y::AbstractArray{T}) where T<:BlasFloat
     if length(x) != length(y)
         throw(DimensionMismatch(lazy"x has length $(length(x)), but y has length $(length(y))"))
     end
-    return axpy!(length(x), convert(T,alpha), x, stride(x, 1), y, stride(y, 1))
+    GC.@preserve x y axpy!(length(x), T(alpha), vec_pointer_stride(x)...,
+        vec_pointer_stride(y, ArgumentError("dest vector with 0 stride is not allowed"))...)
+    y
 end
 
-function axpy!(alpha::Number, x::Array{T}, rx::Union{UnitRange{Ti},AbstractRange{Ti}},
-               y::Array{T}, ry::Union{UnitRange{Ti},AbstractRange{Ti}}) where {T<:BlasFloat,Ti<:Integer}
+function axpy!(alpha::Number, x::Array{T}, rx::AbstractRange{Ti},
+               y::Array{T}, ry::AbstractRange{Ti}) where {T<:BlasFloat,Ti<:Integer}
     if length(rx) != length(ry)
         throw(DimensionMismatch("ranges of differing lengths"))
     end
@@ -562,10 +532,10 @@ function axpy!(alpha::Number, x::Array{T}, rx::Union{UnitRange{Ti},AbstractRange
     end
     GC.@preserve x y axpy!(
         length(rx),
-        convert(T, alpha),
-        pointer(x) + (first(rx) - 1)*sizeof(T),
+        T(alpha),
+        pointer(x, minimum(rx)),
         step(rx),
-        pointer(y) + (first(ry) - 1)*sizeof(T),
+        pointer(y, minimum(ry)),
         step(ry))
 
     return y
@@ -612,12 +582,14 @@ for (fname, elty) in ((:daxpby_,:Float64), (:saxpby_,:Float32),
     end
 end
 
-function axpby!(alpha::Number, x::Union{DenseArray{T},AbstractVector{T}}, beta::Number, y::Union{DenseArray{T},AbstractVector{T}}) where T<:BlasFloat
+function axpby!(alpha::Number, x::AbstractArray{T}, beta::Number, y::AbstractArray{T}) where T<:BlasFloat
     require_one_based_indexing(x, y)
     if length(x) != length(y)
         throw(DimensionMismatch(lazy"x has length $(length(x)), but y has length $(length(y))"))
     end
-    return axpby!(length(x), convert(T, alpha), x, stride(x, 1), convert(T, beta), y, stride(y, 1))
+    GC.@preserve x y axpby!(length(x), T(alpha), vec_pointer_stride(x)..., T(beta),
+        vec_pointer_stride(y, ArgumentError("dest vector with 0 stride is not allowed"))...)
+    y
 end
 
 ## iamax
@@ -633,7 +605,11 @@ for (fname, elty) in ((:idamax_,:Float64),
         end
     end
 end
-iamax(dx::Union{AbstractVector,DenseArray}) = iamax(length(dx), dx, stride1(dx))
+function iamax(dx::AbstractArray)
+    p, st = vec_pointer_stride(dx)
+    st <= 0 && return BlasInt(0)
+    iamax(length(dx), p, st)
+end
 
 """
     iamax(n, dx, incx)
@@ -673,20 +649,16 @@ for (fname, elty) in ((:dgemv_,:Float64),
             end
             chkstride1(A)
             lda = stride(A,2)
-            sX = stride(X,1)
-            sY = stride(Y,1)
+            pX, sX = vec_pointer_stride(X, ArgumentError("input vector with 0 stride is not allowed"))
+            pY, sY = vec_pointer_stride(Y, ArgumentError("dest vector with 0 stride is not allowed"))
+            pA = pointer(A)
             if lda < 0
-                colindex = lastindex(A, 2)
+                pA += (size(A, 2) - 1) * lda * sizeof($elty)
                 lda = -lda
                 trans == 'N' ? (sX = -sX) : (sY = -sY)
-            else
-                colindex = firstindex(A, 2)
             end
             lda >= size(A,1) || size(A,2) <= 1 || error("when `size(A,2) > 1`, `abs(stride(A,2))` must be at least `size(A,1)`")
             lda = max(1, size(A,1), lda)
-            pA = pointer(A, Base._sub2ind(A, 1, colindex))
-            pX = pointer(X, stride(X,1) > 0 ? firstindex(X) : lastindex(X))
-            pY = pointer(Y, stride(Y,1) > 0 ? firstindex(Y) : lastindex(Y))
             GC.@preserve A X Y ccall((@blasfunc($fname), libblastrampoline), Cvoid,
                 (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ref{$elty},
                  Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
@@ -767,14 +739,16 @@ for (fname, elty) in ((:dgbmv_,:Float64),
                        y::AbstractVector{$elty})
             require_one_based_indexing(A, x, y)
             chkstride1(A)
-            ccall((@blasfunc($fname), libblastrampoline), Cvoid,
+            px, stx = vec_pointer_stride(x, ArgumentError("input vector with 0 stride is not allowed"))
+            py, sty = vec_pointer_stride(y, ArgumentError("dest vector with 0 stride is not allowed"))
+            GC.@preserve x y ccall((@blasfunc($fname), libblastrampoline), Cvoid,
                 (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ref{BlasInt},
                  Ref{BlasInt}, Ref{$elty}, Ptr{$elty}, Ref{BlasInt},
                  Ptr{$elty}, Ref{BlasInt}, Ref{$elty}, Ptr{$elty},
                  Ref{BlasInt}, Clong),
                  trans, m, size(A,2), kl,
                  ku, alpha, A, max(1,stride(A,2)),
-                 x, stride(x,1), beta, y, stride(y,1), 1)
+                 px, stx, beta, py, sty, 1)
             y
         end
         function gbmv(trans::AbstractChar, m::Integer, kl::Integer, ku::Integer, alpha::($elty), A::AbstractMatrix{$elty}, x::AbstractVector{$elty})
@@ -828,13 +802,15 @@ for (fname, elty, lib) in ((:dsymv_,:Float64,libblastrampoline),
                 throw(DimensionMismatch(lazy"A has size $(size(A)), and y has length $(length(y))"))
             end
             chkstride1(A)
-            ccall((@blasfunc($fname), $lib), Cvoid,
+            px, stx = vec_pointer_stride(x, ArgumentError("input vector with 0 stride is not allowed"))
+            py, sty = vec_pointer_stride(y, ArgumentError("dest vector with 0 stride is not allowed"))
+            GC.@preserve x y ccall((@blasfunc($fname), $lib), Cvoid,
                 (Ref{UInt8}, Ref{BlasInt}, Ref{$elty}, Ptr{$elty},
                  Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ref{$elty},
                  Ptr{$elty}, Ref{BlasInt}, Clong),
                  uplo, n, alpha, A,
-                 max(1,stride(A,2)), x, stride(x,1), beta,
-                 y, stride(y,1), 1)
+                 max(1,stride(A,2)), px, stx, beta,
+                 py, sty, 1)
             y
         end
         function symv(uplo::AbstractChar, alpha::($elty), A::AbstractMatrix{$elty}, x::AbstractVector{$elty})
@@ -891,15 +867,15 @@ for (fname, elty) in ((:zhemv_,:ComplexF64),
             end
             chkstride1(A)
             lda = max(1, stride(A, 2))
-            incx = stride(x, 1)
-            incy = stride(y, 1)
-            ccall((@blasfunc($fname), libblastrampoline), Cvoid,
+            px, stx = vec_pointer_stride(x, ArgumentError("input vector with 0 stride is not allowed"))
+            py, sty = vec_pointer_stride(y, ArgumentError("dest vector with 0 stride is not allowed"))
+            GC.@preserve x y ccall((@blasfunc($fname), libblastrampoline), Cvoid,
                 (Ref{UInt8}, Ref{BlasInt}, Ref{$elty}, Ptr{$elty},
                  Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ref{$elty},
                  Ptr{$elty}, Ref{BlasInt}, Clong),
                 uplo, n, α, A,
-                lda, x, incx, β,
-                y, incy, 1)
+                lda, px, stx, β,
+                py, sty, 1)
             y
         end
         function hemv(uplo::AbstractChar, α::($elty), A::AbstractMatrix{$elty}, x::AbstractVector{$elty})
@@ -977,19 +953,21 @@ for (fname, elty) in ((:zhpmv_, :ComplexF64),
 end
 
 function hpmv!(uplo::AbstractChar,
-               α::Number, AP::Union{DenseArray{T}, AbstractVector{T}}, x::Union{DenseArray{T}, AbstractVector{T}},
-               β::Number, y::Union{DenseArray{T}, AbstractVector{T}}) where {T <: BlasComplex}
-    chkuplo(uplo)
+               α::Number, AP::AbstractArray{T}, x::AbstractArray{T},
+               β::Number, y::AbstractArray{T}) where {T <: BlasComplex}
     require_one_based_indexing(AP, x, y)
     N = length(x)
     if N != length(y)
-        throw(DimensionMismatch("x has length $(N), but y has length $(length(y))"))
+        throw(DimensionMismatch(lazy"x has length $(N), but y has length $(length(y))"))
     end
     if 2*length(AP) < N*(N + 1)
-        throw(DimensionMismatch("Packed Hermitian matrix A has size smaller than length(x) =  $(N)."))
+        throw(DimensionMismatch(lazy"Packed hermitian matrix A has size smaller than length(x) = $(N)."))
     end
     chkstride1(AP)
-    return hpmv!(uplo, N, convert(T, α), AP, x, stride(x, 1), convert(T, β), y, stride(y, 1))
+    px, stx = vec_pointer_stride(x, ArgumentError("input vector with 0 stride is not allowed"))
+    py, sty = vec_pointer_stride(y, ArgumentError("dest vector with 0 stride is not allowed"))
+    GC.@preserve x y hpmv!(uplo, N, T(α), AP, px, stx, T(β), py, sty)
+    y
 end
 
 """
@@ -1031,13 +1009,15 @@ for (fname, elty) in ((:dsbmv_,:Float64),
             chkuplo(uplo)
             require_one_based_indexing(A, x, y)
             chkstride1(A)
-            ccall((@blasfunc($fname), libblastrampoline), Cvoid,
+            px, stx = vec_pointer_stride(x, ArgumentError("input vector with 0 stride is not allowed"))
+            py, sty = vec_pointer_stride(y, ArgumentError("dest vector with 0 stride is not allowed"))
+            GC.@preserve x y ccall((@blasfunc($fname), libblastrampoline), Cvoid,
                 (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ref{$elty},
                  Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
                  Ref{$elty}, Ptr{$elty}, Ref{BlasInt}, Clong),
                  uplo, size(A,2), k, alpha,
-                 A, max(1,stride(A,2)), x, stride(x,1),
-                 beta, y, stride(y,1), 1)
+                 A, max(1,stride(A,2)), px, stx,
+                 beta, py, sty, 1)
             y
         end
         function sbmv(uplo::AbstractChar, k::Integer, alpha::($elty), A::AbstractMatrix{$elty}, x::AbstractVector{$elty})
@@ -1130,19 +1110,21 @@ for (fname, elty) in ((:dspmv_, :Float64),
 end
 
 function spmv!(uplo::AbstractChar,
-               α::Real, AP::Union{DenseArray{T}, AbstractVector{T}}, x::Union{DenseArray{T}, AbstractVector{T}},
-               β::Real, y::Union{DenseArray{T}, AbstractVector{T}}) where {T <: BlasReal}
-    chkuplo(uplo)
+               α::Real, AP::AbstractArray{T}, x::AbstractArray{T},
+               β::Real, y::AbstractArray{T}) where {T <: BlasReal}
     require_one_based_indexing(AP, x, y)
     N = length(x)
     if N != length(y)
-        throw(DimensionMismatch("x has length $(N), but y has length $(length(y))"))
+        throw(DimensionMismatch(lazy"x has length $(N), but y has length $(length(y))"))
     end
     if 2*length(AP) < N*(N + 1)
-        throw(DimensionMismatch("Packed symmetric matrix A has size smaller than length(x) = $(N)."))
+        throw(DimensionMismatch(lazy"Packed symmetric matrix A has size smaller than length(x) = $(N)."))
     end
     chkstride1(AP)
-    return spmv!(uplo, N, convert(T, α), AP, x, stride(x, 1), convert(T, β), y, stride(y, 1))
+    px, stx = vec_pointer_stride(x, ArgumentError("input vector with 0 stride is not allowed"))
+    py, sty = vec_pointer_stride(y, ArgumentError("dest vector with 0 stride is not allowed"))
+    GC.@preserve x y spmv!(uplo, N, T(α), AP, px, stx, T(β), py, sty)
+    y
 end
 
 """
@@ -1201,16 +1183,17 @@ for (fname, elty) in ((:dspr_, :Float64),
 end
 
 function spr!(uplo::AbstractChar,
-              α::Real, x::Union{DenseArray{T}, AbstractVector{T}},
-              AP::Union{DenseArray{T}, AbstractVector{T}}) where {T <: BlasReal}
+              α::Real, x::AbstractArray{T},
+              AP::AbstractArray{T}) where {T <: BlasReal}
     chkuplo(uplo)
     require_one_based_indexing(AP, x)
     N = length(x)
     if 2*length(AP) < N*(N + 1)
-        throw(DimensionMismatch("Packed symmetric matrix A has size smaller than length(x) = $(N)."))
+        throw(DimensionMismatch(lazy"Packed symmetric matrix A has size smaller than length(x) = $(N)."))
     end
     chkstride1(AP)
-    return spr!(uplo, N, convert(T, α), x, stride(x, 1), AP)
+    px, stx = vec_pointer_stride(x, ArgumentError("input vector with 0 stride is not allowed"))
+    return GC.@preserve x spr!(uplo, N, T(α), px, stx , AP)
 end
 
 """
@@ -1251,13 +1234,15 @@ for (fname, elty) in ((:zhbmv_,:ComplexF64),
             chkuplo(uplo)
             require_one_based_indexing(A, x, y)
             chkstride1(A)
-            ccall((@blasfunc($fname), libblastrampoline), Cvoid,
+            px, stx = vec_pointer_stride(x, ArgumentError("input vector with 0 stride is not allowed"))
+            py, sty = vec_pointer_stride(y, ArgumentError("dest vector with 0 stride is not allowed"))
+            GC.@preserve x y ccall((@blasfunc($fname), libblastrampoline), Cvoid,
                 (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ref{$elty},
                  Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
                  Ref{$elty}, Ptr{$elty}, Ref{BlasInt}, Clong),
                  uplo, size(A,2), k, alpha,
-                 A, max(1,stride(A,2)), x, stride(x,1),
-                 beta, y, stride(y,1), 1)
+                 A, max(1,stride(A,2)), px, stx,
+                 beta, py, sty, 1)
             y
         end
         function hbmv(uplo::AbstractChar, k::Integer, alpha::($elty), A::AbstractMatrix{$elty}, x::AbstractVector{$elty})
@@ -1312,12 +1297,13 @@ for (fname, elty) in ((:dtrmv_,:Float64),
                 throw(DimensionMismatch(lazy"A has size ($n,$n), x has length $(length(x))"))
             end
             chkstride1(A)
-            ccall((@blasfunc($fname), libblastrampoline), Cvoid,
+            px, stx = vec_pointer_stride(x, ArgumentError("input vector with 0 stride is not allowed"))
+            GC.@preserve x ccall((@blasfunc($fname), libblastrampoline), Cvoid,
                 (Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ref{BlasInt},
                  Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
                  Clong, Clong, Clong),
                  uplo, trans, diag, n,
-                 A, max(1,stride(A,2)), x, max(1,stride(x, 1)), 1, 1, 1)
+                 A, max(1,stride(A,2)), px, stx, 1, 1, 1)
             x
         end
         function trmv(uplo::AbstractChar, trans::AbstractChar, diag::AbstractChar, A::AbstractMatrix{$elty}, x::AbstractVector{$elty})
@@ -1368,12 +1354,13 @@ for (fname, elty) in ((:dtrsv_,:Float64),
                 throw(DimensionMismatch(lazy"size of A is $n != length(x) = $(length(x))"))
             end
             chkstride1(A)
-            ccall((@blasfunc($fname), libblastrampoline), Cvoid,
+            px, stx = vec_pointer_stride(x, ArgumentError("input vector with 0 stride is not allowed"))
+            GC.@preserve x ccall((@blasfunc($fname), libblastrampoline), Cvoid,
                 (Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ref{BlasInt},
                  Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
                  Clong, Clong, Clong),
                  uplo, trans, diag, n,
-                 A, max(1,stride(A,2)), x, stride(x, 1), 1, 1, 1)
+                 A, max(1,stride(A,2)), px, stx, 1, 1, 1)
             x
         end
         function trsv(uplo::AbstractChar, trans::AbstractChar, diag::AbstractChar, A::AbstractMatrix{$elty}, x::AbstractVector{$elty})
@@ -1402,13 +1389,13 @@ for (fname, elty) in ((:dger_,:Float64),
             if m != length(x) || n != length(y)
                 throw(DimensionMismatch(lazy"A has size ($m,$n), x has length $(length(x)), y has length $(length(y))"))
             end
-            ccall((@blasfunc($fname), libblastrampoline), Cvoid,
+            px, stx = vec_pointer_stride(x, ArgumentError("input vector with 0 stride is not allowed"))
+            py, sty = vec_pointer_stride(y, ArgumentError("input vector with 0 stride is not allowed"))
+            GC.@preserve x y ccall((@blasfunc($fname), libblastrampoline), Cvoid,
                 (Ref{BlasInt}, Ref{BlasInt}, Ref{$elty}, Ptr{$elty},
                  Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
                  Ref{BlasInt}),
-                 m, n, α, x,
-                 stride(x, 1), y, stride(y, 1), A,
-                 max(1,stride(A,2)))
+                 m, n, α, px, stx, py, sty, A, max(1,stride(A,2)))
             A
         end
     end
@@ -1436,11 +1423,11 @@ for (fname, elty, lib) in ((:dsyr_,:Float64,libblastrampoline),
             if length(x) != n
                 throw(DimensionMismatch(lazy"A has size ($n,$n), x has length $(length(x))"))
             end
-            ccall((@blasfunc($fname), $lib), Cvoid,
+            px, stx = vec_pointer_stride(x, ArgumentError("input vector with 0 stride is not allowed"))
+            GC.@preserve x ccall((@blasfunc($fname), $lib), Cvoid,
                 (Ref{UInt8}, Ref{BlasInt}, Ref{$elty}, Ptr{$elty},
                  Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}),
-                 uplo, n, α, x,
-                 stride(x, 1), A, max(1,stride(A, 2)))
+                 uplo, n, α, px, stx, A, max(1,stride(A, 2)))
             A
         end
     end
@@ -1467,11 +1454,11 @@ for (fname, elty, relty) in ((:zher_,:ComplexF64, :Float64),
             if length(x) != n
                 throw(DimensionMismatch(lazy"A has size ($n,$n), x has length $(length(x))"))
             end
-            ccall((@blasfunc($fname), libblastrampoline), Cvoid,
+            px, stx = vec_pointer_stride(x, ArgumentError("input vector with 0 stride is not allowed"))
+            GC.@preserve x ccall((@blasfunc($fname), libblastrampoline), Cvoid,
                 (Ref{UInt8}, Ref{BlasInt}, Ref{$relty}, Ptr{$elty},
                  Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Clong),
-                 uplo, n, α, x,
-                 stride(x, 1), A, max(1,stride(A,2)), 1)
+                 uplo, n, α, px, stx, A, max(1,stride(A,2)), 1)
             A
         end
     end
@@ -2085,8 +2072,8 @@ end
 
 end # module
 
-function copyto!(dest::Array{T}, rdest::Union{UnitRange{Ti},AbstractRange{Ti}},
-                 src::Array{T}, rsrc::Union{UnitRange{Ti},AbstractRange{Ti}}) where {T<:BlasFloat,Ti<:Integer}
+function copyto!(dest::Array{T}, rdest::AbstractRange{Ti},
+                 src::Array{T}, rsrc::AbstractRange{Ti}) where {T<:BlasFloat,Ti<:Integer}
     if minimum(rdest) < 1 || maximum(rdest) > length(dest)
         throw(ArgumentError(lazy"range out of bounds for dest, of length $(length(dest))"))
     end
@@ -2098,9 +2085,9 @@ function copyto!(dest::Array{T}, rdest::Union{UnitRange{Ti},AbstractRange{Ti}},
     end
     GC.@preserve src dest BLAS.blascopy!(
         length(rsrc),
-        pointer(src) + (first(rsrc) - 1) * sizeof(T),
+        pointer(src, minimum(rsrc)),
         step(rsrc),
-        pointer(dest) + (first(rdest) - 1) * sizeof(T),
+        pointer(dest, minimum(rdest)),
         step(rdest))
 
     return dest
diff --git a/stdlib/LinearAlgebra/src/matmul.jl b/stdlib/LinearAlgebra/src/matmul.jl
index f27a3a768b866..80d9872fdca6e 100644
--- a/stdlib/LinearAlgebra/src/matmul.jl
+++ b/stdlib/LinearAlgebra/src/matmul.jl
@@ -498,7 +498,8 @@ function gemv!(y::StridedVector{T}, tA::AbstractChar, A::StridedVecOrMat{T}, x::
     nA == 0 && return _rmul_or_fill!(y, β)
     alpha, beta = promote(α, β, zero(T))
     if alpha isa Union{Bool,T} && beta isa Union{Bool,T} &&
-        stride(A, 1) == 1 && stride(A, 2) >= size(A, 1)
+        stride(A, 1) == 1 && abs(stride(A, 2)) >= size(A, 1) &&
+        !iszero(stride(x, 1)) # We only check input's stride here.
         return BLAS.gemv!(tA, alpha, A, x, beta, y)
     else
         return generic_matvecmul!(y, tA, A, x, MulAddMul(α, β))
@@ -516,8 +517,9 @@ function gemv!(y::StridedVector{Complex{T}}, tA::AbstractChar, A::StridedVecOrMa
     nA == 0 && return _rmul_or_fill!(y, β)
     alpha, beta = promote(α, β, zero(T))
     if alpha isa Union{Bool,T} && beta isa Union{Bool,T} &&
-        stride(A, 1) == 1 && stride(A, 2) >= size(A, 1) &&
-        stride(y, 1) == 1 && tA == 'N' # reinterpret-based optimization is valid only for contiguous `y`
+        stride(A, 1) == 1 && abs(stride(A, 2)) >= size(A, 1) &&
+        stride(y, 1) == 1 && tA == 'N' && # reinterpret-based optimization is valid only for contiguous `y`
+        !iszero(stride(x, 1))
         BLAS.gemv!(tA, alpha, reinterpret(T, A), x, beta, reinterpret(T, y))
         return y
     else
@@ -535,7 +537,9 @@ function gemv!(y::StridedVector{Complex{T}}, tA::AbstractChar, A::StridedVecOrMa
     mA == 0 && return y
     nA == 0 && return _rmul_or_fill!(y, β)
     alpha, beta = promote(α, β, zero(T))
-    @views if alpha isa Union{Bool,T} && beta isa Union{Bool,T} && stride(A, 1) == 1 && stride(A, 2) >= size(A, 1)
+    @views if alpha isa Union{Bool,T} && beta isa Union{Bool,T} &&
+        stride(A, 1) == 1 && abs(stride(A, 2)) >= size(A, 1) &&
+        !iszero(stride(x, 1))
         xfl = reinterpret(reshape, T, x) # Use reshape here.
         yfl = reinterpret(reshape, T, y)
         BLAS.gemv!(tA, alpha, A, xfl[1, :], beta, yfl[1, :])
diff --git a/stdlib/LinearAlgebra/test/blas.jl b/stdlib/LinearAlgebra/test/blas.jl
index 54b227bca7685..d39f7c45ba205 100644
--- a/stdlib/LinearAlgebra/test/blas.jl
+++ b/stdlib/LinearAlgebra/test/blas.jl
@@ -4,20 +4,31 @@ module TestBLAS
 
 using Test, LinearAlgebra, Random
 using LinearAlgebra: BlasReal, BlasComplex
+fabs(x::Real) = abs(x)
+fabs(x::Complex) = abs(real(x)) + abs(imag(x))
+
+# help function to build packed storage
+function pack(A, uplo)
+    AP = eltype(A)[]
+    n = size(A, 1)
+    for j in 1:n, i in (uplo==:L ? (j:n) : (1:j))
+        push!(AP, A[i,j])
+    end
+    return AP
+end
 
+@testset "vec_pointer_stride" begin
+    a = zeros(4,4,4)
+    @test BLAS.asum(view(a,1:2:4,:,:)) == 0 # vector like
+    @test_throws ArgumentError BLAS.asum(view(a,1:3:4,:,:)) # non-vector like
+end
 Random.seed!(100)
 ## BLAS tests - testing the interface code to BLAS routines
 @testset for elty in [Float32, Float64, ComplexF32, ComplexF64]
 
     @testset "syr2k!" begin
-        U = randn(5,2)
-        V = randn(5,2)
-        if elty == ComplexF32 || elty == ComplexF64
-            U = complex.(U, U)
-            V = complex.(V, V)
-        end
-        U = convert(Array{elty, 2}, U)
-        V = convert(Array{elty, 2}, V)
+        U = randn(elty, 5, 2)
+        V = randn(elty, 5, 2)
         @test tril(LinearAlgebra.BLAS.syr2k('L','N',U,V)) ≈ tril(U*transpose(V) + V*transpose(U))
         @test triu(LinearAlgebra.BLAS.syr2k('U','N',U,V)) ≈ triu(U*transpose(V) + V*transpose(U))
         @test tril(LinearAlgebra.BLAS.syr2k('L','T',U,V)) ≈ tril(transpose(U)*V + transpose(V)*U)
@@ -26,12 +37,8 @@ Random.seed!(100)
 
     if elty in (ComplexF32, ComplexF64)
         @testset "her2k!" begin
-            U = randn(5,2)
-            V = randn(5,2)
-            U = complex.(U, U)
-            V = complex.(V, V)
-            U = convert(Array{elty, 2}, U)
-            V = convert(Array{elty, 2}, V)
+            U = randn(elty, 5, 2)
+            V = randn(elty, 5, 2)
             @test tril(LinearAlgebra.BLAS.her2k('L','N',U,V)) ≈ tril(U*V' + V*U')
             @test triu(LinearAlgebra.BLAS.her2k('U','N',U,V)) ≈ triu(U*V' + V*U')
             @test tril(LinearAlgebra.BLAS.her2k('L','C',U,V)) ≈ tril(U'*V + V'*U)
@@ -48,21 +55,21 @@ Random.seed!(100)
     U4 = triu(fill(elty(1), 4,4))
     Z4 = zeros(elty, (4,4))
 
-    elm1 = convert(elty, -1)
-    el2 = convert(elty, 2)
-    v14 = convert(Vector{elty}, [1:4;])
-    v41 = convert(Vector{elty}, [4:-1:1;])
+    elm1 = elty(-1)
+    el2 = elty(2)
+    v14 = elty[1:4;]
+    v41 = elty[4:-1:1;]
 
     let n = 10
         @testset "dot products" begin
             if elty <: Real
-                x1 = convert(Vector{elty}, randn(n))
-                x2 = convert(Vector{elty}, randn(n))
+                x1 = randn(elty, n)
+                x2 = randn(elty, n)
                 @test BLAS.dot(x1,x2) ≈ sum(x1.*x2)
                 @test_throws DimensionMismatch BLAS.dot(x1,rand(elty, n + 1))
             else
-                z1 = convert(Vector{elty}, complex.(randn(n),randn(n)))
-                z2 = convert(Vector{elty}, complex.(randn(n),randn(n)))
+                z1 = randn(elty, n)
+                z2 = randn(elty, n)
                 @test BLAS.dotc(z1,z2) ≈ sum(conj(z1).*z2)
                 @test BLAS.dotu(z1,z2) ≈ sum(z1.*z2)
                 @test_throws DimensionMismatch BLAS.dotc(z1,rand(elty, n + 1))
@@ -70,92 +77,60 @@ Random.seed!(100)
             end
         end
         @testset "iamax" begin
-            if elty <: Real
-                x = convert(Vector{elty}, randn(n))
-                @test BLAS.iamax(x) == argmax(abs.(x))
-            else
-                z = convert(Vector{elty}, complex.(randn(n),randn(n)))
-                @test BLAS.iamax(z) == argmax(map(x -> abs(real(x)) + abs(imag(x)), z))
-            end
+            x = randn(elty, n)
+            @test BLAS.iamax(x) == findmax(fabs, x)[2]
         end
         @testset "rot!" begin
-            if elty <: Real
-                x = convert(Vector{elty}, randn(n))
-                y = convert(Vector{elty}, randn(n))
-                c = rand(elty)
-                s = rand(elty)
+            x = randn(elty, n)
+            y = randn(elty, n)
+            c = rand(real(elty))
+            for sty in unique!([real(elty), elty])
+                s = rand(sty)
                 x2 = copy(x)
                 y2 = copy(y)
                 BLAS.rot!(n, x, 1, y, 1, c, s)
                 @test x ≈ c*x2 + s*y2
-                @test y ≈ -s*x2 + c*y2
-            else
-                x = convert(Vector{elty}, complex.(randn(n),rand(n)))
-                y = convert(Vector{elty}, complex.(randn(n),rand(n)))
-                cty = (elty == ComplexF32) ? Float32 : Float64
-                c = rand(cty)
-                for sty in [cty, elty]
-                    s = rand(sty)
-                    x2 = copy(x)
-                    y2 = copy(y)
-                    BLAS.rot!(n, x, 1, y, 1, c, s)
-                    @test x ≈ c*x2 + s*y2
-                    @test y ≈ -conj(s)*x2 + c*y2
-                end
+                @test y ≈ -conj(s)*x2 + c*y2
             end
         end
         @testset "axp(b)y" begin
-            if elty <: Real
-                x1 = convert(Vector{elty}, randn(n))
-                x2 = convert(Vector{elty}, randn(n))
-                α  = rand(elty)
-                β  = rand(elty)
-                @test BLAS.axpy!(α,copy(x1),copy(x2)) ≈ α*x1 + x2
-                @test BLAS.axpby!(α,copy(x1),β,copy(x2)) ≈ α*x1 + β*x2
-                @test_throws DimensionMismatch BLAS.axpy!(α, copy(x1), rand(elty, n + 1))
-                @test_throws DimensionMismatch BLAS.axpby!(α, copy(x1), β, rand(elty, n + 1))
-                @test_throws DimensionMismatch BLAS.axpy!(α, copy(x1), 1:div(n,2), copy(x2), 1:n)
-                @test_throws ArgumentError BLAS.axpy!(α, copy(x1), 0:div(n,2), copy(x2), 1:(div(n, 2) + 1))
-                @test_throws ArgumentError BLAS.axpy!(α, copy(x1), 1:div(n,2), copy(x2), 0:(div(n, 2) - 1))
-                @test BLAS.axpy!(α,copy(x1),1:n,copy(x2),1:n) ≈ x2 + α*x1
-            else
-                z1 = convert(Vector{elty}, complex.(randn(n), randn(n)))
-                z2 = convert(Vector{elty}, complex.(randn(n), randn(n)))
-                α  = rand(elty)
-                @test BLAS.axpy!(α, copy(z1), copy(z2)) ≈ z2 + α * z1
-                @test_throws DimensionMismatch BLAS.axpy!(α, copy(z1), rand(elty, n + 1))
-                @test_throws DimensionMismatch BLAS.axpy!(α, copy(z1), 1:div(n, 2), copy(z2), 1:(div(n, 2) + 1))
-                @test_throws ArgumentError BLAS.axpy!(α, copy(z1), 0:div(n,2), copy(z2), 1:(div(n, 2) + 1))
-                @test_throws ArgumentError BLAS.axpy!(α, copy(z1), 1:div(n,2), copy(z2), 0:(div(n, 2) - 1))
-                @test BLAS.axpy!(α,copy(z1),1:n,copy(z2),1:n) ≈ z2 + α*z1
+            x1 = randn(elty, n)
+            x2 = randn(elty, n)
+            α  = rand(elty)
+            β  = rand(elty)
+            for X1 in (x1, view(x1,n:-1:1)), X2 in (x2, view(x2, n:-1:1))
+                @test BLAS.axpy!(α,deepcopy(X1),deepcopy(X2)) ≈ α*X1 + X2
+                @test BLAS.axpby!(α,deepcopy(X1),β,deepcopy(X2)) ≈ α*X1 + β*X2
             end
+            for ind1 in (1:n, n:-1:1), ind2 in (1:n, n:-1:1)
+                @test BLAS.axpy!(α,copy(x1),ind1,copy(x2),ind2) ≈ x2 + α*(ind1 == ind2 ? x1 : reverse(x1))
+            end
+            @test_throws DimensionMismatch BLAS.axpy!(α, copy(x1), rand(elty, n + 1))
+            @test_throws DimensionMismatch BLAS.axpby!(α, copy(x1), β, rand(elty, n + 1))
+            @test_throws DimensionMismatch BLAS.axpy!(α, copy(x1), 1:div(n,2), copy(x2), 1:n)
+            @test_throws ArgumentError BLAS.axpy!(α, copy(x1), 0:div(n,2), copy(x2), 1:(div(n, 2) + 1))
+            @test_throws ArgumentError BLAS.axpy!(α, copy(x1), 1:div(n,2), copy(x2), 0:(div(n, 2) - 1))
         end
         @testset "nrm2, iamax, and asum for StridedVectors" begin
             a = rand(elty,n)
-            b = view(a,2:2:n,1)
-            @test BLAS.nrm2(b) ≈ norm(b)
-            if elty <: Real
-                @test BLAS.asum(b) ≈ sum(abs.(b))
-                @test BLAS.iamax(b) ≈ argmax(abs.(b))
-            else
-                @test BLAS.asum(b) ≈ sum(abs.(real(b))) + sum(abs.(imag(b)))
-                @test BLAS.iamax(b) == argmax(map(x -> abs(real(x)) + abs(imag(x)), b))
+            for ind in (2:2:n, n:-2:2)
+                b = view(a, ind, 1)
+                @test BLAS.nrm2(b) ≈ sqrt(sum(abs2, b))
+                @test BLAS.asum(b) ≈ sum(fabs, b)
+                @test BLAS.iamax(b) == findmax(fabs, b)[2] * (step(ind) >= 0)
             end
         end
-        # scal
-        α = rand(elty)
-        a = rand(elty,n)
-        @test BLAS.scal(n,α,a,1) ≈ α * a
-
-        @testset "trsv" begin
-            A = triu(rand(elty,n,n))
-            @testset "Vector and SubVector" for x in (rand(elty, n), view(rand(elty,2n),1:2:2n))
-                @test A\x ≈ BLAS.trsv('U','N','N',A,x)
-                @test_throws DimensionMismatch BLAS.trsv('U','N','N',A,Vector{elty}(undef,n+1))
+        @testset "scal" begin
+            α = rand(elty)
+            a = rand(elty,n)
+            @test BLAS.scal(n,α,a,1) ≈ α * a
+            for v in (a, view(a, n:-1:1))
+                @test BLAS.scal!(α, deepcopy(v)) ≈ α * v
             end
         end
-        @testset "ger, her, syr" for x in (rand(elty, n), view(rand(elty,2n), 1:2:2n)),
-            y in (rand(elty,n), view(rand(elty,3n), 1:3:3n))
+
+        @testset "ger, her, syr" for x in (rand(elty, n), view(rand(elty,2n), 1:2:2n), view(rand(elty,n), n:-1:1)),
+            y in (rand(elty,n), view(rand(elty,3n), 1:3:3n), view(rand(elty,2n), 2n:-2:2))
 
             A = rand(elty,n,n)
             α = rand(elty)
@@ -178,32 +153,66 @@ Random.seed!(100)
             end
         end
         @testset "copy" begin
-            x1 = convert(Vector{elty}, randn(n))
-            x2 = convert(Vector{elty}, randn(n))
-            BLAS.copyto!(x2, 1:n, x1, 1:n)
-            @test x2 == x1
+            x1 = randn(elty, n)
+            x2 = randn(elty, n)
+            for ind1 in (1:n, n:-1:1), ind2 in (1:n, n:-1:1)
+                @test x2 === BLAS.copyto!(x2, ind1, x1, ind2) == (ind1 == ind2 ? x1 : reverse(x1))
+            end
             @test_throws DimensionMismatch BLAS.copyto!(x2, 1:n, x1, 1:(n - 1))
             @test_throws ArgumentError BLAS.copyto!(x1, 0:div(n, 2), x2, 1:(div(n, 2) + 1))
             @test_throws ArgumentError BLAS.copyto!(x1, 1:(div(n, 2) + 1), x2, 0:div(n, 2))
         end
-        # trmv
-        A = triu(rand(elty,n,n))
-        x = rand(elty,n)
-        @test BLAS.trmv('U','N','N',A,x) ≈ A*x
+        @testset "trmv and trsv" begin
+            A = rand(elty,n,n)
+            x = rand(elty,n)
+            xerr = Vector{elty}(undef,n+1)
+            for uplo in ('U', 'L'), diag in ('U','N'), trans in ('N', 'T', 'C')
+                Wrapper = if uplo == 'U'
+                    diag == 'U' ? UnitUpperTriangular : UpperTriangular
+                else
+                    diag == 'U' ? UnitLowerTriangular : LowerTriangular
+                end
+                fun = trans == 'N' ? identity : trans == 'T' ? transpose : adjoint
+                fullA = collect(fun(Wrapper(A)))
+                @testset "trmv" begin
+                    @test BLAS.trmv(uplo,trans,diag,A,x) ≈ fullA * x
+                    @test_throws DimensionMismatch BLAS.trmv(uplo,trans,diag,A,xerr)
+                    for xx in (x, view(x, n:-1:1))
+                        @test BLAS.trmv!(uplo,trans,diag,A,deepcopy(xx)) ≈ fullA * xx
+                    end
+                end
+                @testset "trsv" begin
+                    @test BLAS.trsv(uplo,trans,diag,A,x) ≈ fullA \ x
+                    @test_throws DimensionMismatch BLAS.trsv(uplo,trans,diag,A,xerr)
+                    for xx in (x, view(x, n:-1:1))
+                        @test BLAS.trsv!(uplo,trans,diag,A,deepcopy(xx)) ≈ fullA \ xx
+                    end
+                end
+            end
+        end
         @testset "symmetric/Hermitian multiplication" begin
             x = rand(elty,n)
             A = rand(elty,n,n)
+            y = rand(elty, n)
+            α = randn(elty)
+            β = randn(elty)
             Aherm = A + A'
             Asymm = A + transpose(A)
-            @testset "symv and hemv" begin
-                @test BLAS.symv('U',Asymm,x) ≈ Asymm*x
-                offsizevec, offsizemat = Array{elty}.(undef,(n+1, (n,n+1)))
-                @test_throws DimensionMismatch BLAS.symv!('U',one(elty),Asymm,x,one(elty),offsizevec)
-                @test_throws DimensionMismatch BLAS.symv('U',offsizemat,x)
+            offsizevec, offsizemat = Array{elty}.(undef,(n+1, (n,n+1)))
+            @testset "symv and hemv" for uplo in ('U', 'L')
+                @test BLAS.symv(uplo,Asymm,x) ≈ Asymm*x
+                for xx in (x, view(x, n:-1:1)), yy in (y, view(y, n:-1:1))
+                    @test BLAS.symv!(uplo,α,Asymm,xx,β,deepcopy(yy)) ≈ α * Asymm * xx + β * yy
+                end
+                @test_throws DimensionMismatch BLAS.symv!(uplo,α,Asymm,x,β,offsizevec)
+                @test_throws DimensionMismatch BLAS.symv(uplo,offsizemat,x)
                 if elty <: BlasComplex
-                    @test BLAS.hemv('U',Aherm,x) ≈ Aherm*x
-                    @test_throws DimensionMismatch BLAS.hemv('U',offsizemat,x)
-                    @test_throws DimensionMismatch BLAS.hemv!('U',one(elty),Aherm,x,one(elty),offsizevec)
+                    @test BLAS.hemv(uplo,Aherm,x) ≈ Aherm*x
+                    for xx in (x, view(x, n:-1:1)), yy in (y, view(y, n:-1:1))
+                        @test BLAS.hemv!(uplo,α,Aherm,xx,β,deepcopy(yy)) ≈ α * Aherm * xx + β * yy
+                    end
+                    @test_throws DimensionMismatch BLAS.hemv(uplo,offsizemat,x)
+                    @test_throws DimensionMismatch BLAS.hemv!(uplo,one(elty),Aherm,x,one(elty),offsizevec)
                 end
             end
 
@@ -233,40 +242,24 @@ Random.seed!(100)
                 # Both matrix dimensions n coincide, as we have Hermitian matrices.
                 # Define the inputs and outputs of hpmv!, y = α*A*x+β*y
                 α = rand(elty)
-                M = rand(elty, n, n)
-                AL = Hermitian(M, :L)
-                AU = Hermitian(M, :U)
+                A = rand(elty, n, n)
                 x = rand(elty, n)
                 β = rand(elty)
                 y = rand(elty, n)
-
-                y_result_julia_lower = α*AL*x + β*y
-
-                # Create lower triangular packing of AL
-                AP = typeof(AL[1,1])[]
-                for j in 1:n
-                    for i in j:n
-                        push!(AP, AL[i,j])
+                for uplo in (:L, :U)
+                    Cuplo = String(uplo)[1]
+                    AH = Hermitian(A, uplo)
+                    # Create lower/upper triangular packing of AL
+                    AP = pack(AH, uplo)
+                    for xx in (x, view(x,n:-1:1)), yy in (y, view(y,n:-1:1))
+                        @test BLAS.hpmv!(Cuplo, α, AP, xx, β, deepcopy(yy)) ≈ α*AH*xx + β*yy
                     end
+                    AP′ = view(zeros(elty, n*(n+1)),1:2:n*(n+1))
+                    @test_throws ErrorException BLAS.hpmv!(Cuplo, α, AP′, x, β, y)
+                    AP′ = view(AP, 1:length(AP′) - 1)
+                    @test_throws DimensionMismatch BLAS.hpmv!(Cuplo, α, AP′, x, β, y)
+                    @test_throws DimensionMismatch BLAS.hpmv!(Cuplo, α, AP′, x, β, view(y,1:n-1))
                 end
-
-                y_result_blas_lower = copy(y)
-                BLAS.hpmv!('L', α, AP, x, β, y_result_blas_lower)
-                @test y_result_julia_lower ≈ y_result_blas_lower
-
-                y_result_julia_upper = α*AU*x + β*y
-
-                # Create upper triangular packing of AU
-                AP = typeof(AU[1,1])[]
-                for j in 1:n
-                    for i in 1:j
-                        push!(AP, AU[i,j])
-                    end
-                end
-
-                y_result_blas_upper = copy(y)
-                BLAS.hpmv!('U', α, AP, x, β, y_result_blas_upper)
-                @test y_result_julia_upper ≈ y_result_blas_upper
             end
         end
 
@@ -276,41 +269,24 @@ Random.seed!(100)
                 # Both matrix dimensions n coincide, as we have symmetric matrices.
                 # Define the inputs and outputs of spmv!, y = α*A*x+β*y
                 α = rand(elty)
-                M = rand(elty, n, n)
-                AL = Symmetric(M, :L)
-                AU = Symmetric(M, :U)
+                A = rand(elty, n, n)
                 x = rand(elty, n)
                 β = rand(elty)
                 y = rand(elty, n)
-
-                y_result_julia_lower = α*AL*x + β*y
-
-                # Create lower triangular packing of AL
-                AP = typeof(M[1,1])[]
-                for j in 1:n
-                    for i in j:n
-                        push!(AP, AL[i,j])
-                    end
-                end
-
-                y_result_blas_lower = copy(y)
-                BLAS.spmv!('L', α, AP, x, β, y_result_blas_lower)
-                @test y_result_julia_lower ≈ y_result_blas_lower
-
-
-                y_result_julia_upper = α*AU*x + β*y
-
-                # Create upper triangular packing of AU
-                AP = typeof(M[1,1])[]
-                for j in 1:n
-                    for i in 1:j
-                        push!(AP, AU[i,j])
+                for uplo in (:L, :U)
+                    Cuplo = String(uplo)[1]
+                    AS = Symmetric(A, uplo)
+                    # Create lower/upper triangular packing of AL
+                    AP = pack(AS, uplo)
+                    for xx in (x, view(x,n:-1:1)), yy in (y, view(y,n:-1:1))
+                        @test BLAS.spmv!(Cuplo, α, AP, xx, β, deepcopy(yy)) ≈ α*AS*xx + β*yy
                     end
+                    AP′ = view(zeros(elty, n*(n+1)),1:2:n*(n+1))
+                    @test_throws ErrorException BLAS.spmv!(Cuplo, α, AP′, x, β, y)
+                    AP′ = view(AP, 1:length(AP′) - 1)
+                    @test_throws DimensionMismatch BLAS.spmv!(Cuplo, α, AP′, x, β, y)
+                    @test_throws DimensionMismatch BLAS.spmv!(Cuplo, α, AP′, x, β, view(y,1:n-1))
                 end
-
-                y_result_blas_upper = copy(y)
-                BLAS.spmv!('U', α, AP, x, β, y_result_blas_upper)
-                @test y_result_julia_upper ≈ y_result_blas_upper
             end
         end
 
@@ -321,39 +297,29 @@ Random.seed!(100)
                 M = rand(elty, n, n)
                 AL = Symmetric(M, :L)
                 AU = Symmetric(M, :U)
-                x = rand(elty, n)
-
-                function pack(A, uplo)
-                    AP = elty[]
-                    for j in 1:n
-                        for i in (uplo==:L ? (j:n) : (1:j))
-                            push!(AP, A[i,j])
-                        end
-                    end
-                    return AP
+                for x in (rand(elty, n), view(rand(elty, n), n:-1:1))
+                    ALP_result_julia_lower = pack(α*x*x' + AL, :L)
+                    ALP_result_blas_lower = pack(AL, :L)
+                    BLAS.spr!('L', α, x, ALP_result_blas_lower)
+                    @test ALP_result_julia_lower ≈ ALP_result_blas_lower
+                    ALP_result_blas_lower = append!(pack(AL, :L), ones(elty, 10))
+                    BLAS.spr!('L', α, x, ALP_result_blas_lower)
+                    @test ALP_result_julia_lower ≈ ALP_result_blas_lower[1:end-10]
+                    ALP_result_blas_lower = reshape(pack(AL, :L), 1, length(ALP_result_julia_lower), 1)
+                    BLAS.spr!('L', α, x, ALP_result_blas_lower)
+                    @test ALP_result_julia_lower ≈ vec(ALP_result_blas_lower)
+
+                    AUP_result_julia_upper = pack(α*x*x' + AU, :U)
+                    AUP_result_blas_upper = pack(AU, :U)
+                    BLAS.spr!('U', α, x, AUP_result_blas_upper)
+                    @test AUP_result_julia_upper ≈ AUP_result_blas_upper
+                    AUP_result_blas_upper = append!(pack(AU, :U), ones(elty, 10))
+                    BLAS.spr!('U', α, x, AUP_result_blas_upper)
+                    @test AUP_result_julia_upper ≈ AUP_result_blas_upper[1:end-10]
+                    AUP_result_blas_upper = reshape(pack(AU, :U), 1, length(AUP_result_julia_upper), 1)
+                    BLAS.spr!('U', α, x, AUP_result_blas_upper)
+                    @test AUP_result_julia_upper ≈ vec(AUP_result_blas_upper)
                 end
-
-                ALP_result_julia_lower = pack(α*x*x' + AL, :L)
-                ALP_result_blas_lower = pack(AL, :L)
-                BLAS.spr!('L', α, x, ALP_result_blas_lower)
-                @test ALP_result_julia_lower ≈ ALP_result_blas_lower
-                ALP_result_blas_lower = append!(pack(AL, :L), ones(elty, 10))
-                BLAS.spr!('L', α, x, ALP_result_blas_lower)
-                @test ALP_result_julia_lower ≈ ALP_result_blas_lower[1:end-10]
-                ALP_result_blas_lower = reshape(pack(AL, :L), 1, length(ALP_result_julia_lower), 1)
-                BLAS.spr!('L', α, x, ALP_result_blas_lower)
-                @test ALP_result_julia_lower ≈ vec(ALP_result_blas_lower)
-
-                AUP_result_julia_upper = pack(α*x*x' + AU, :U)
-                AUP_result_blas_upper = pack(AU, :U)
-                BLAS.spr!('U', α, x, AUP_result_blas_upper)
-                @test AUP_result_julia_upper ≈ AUP_result_blas_upper
-                AUP_result_blas_upper = append!(pack(AU, :U), ones(elty, 10))
-                BLAS.spr!('U', α, x, AUP_result_blas_upper)
-                @test AUP_result_julia_upper ≈ AUP_result_blas_upper[1:end-10]
-                AUP_result_blas_upper = reshape(pack(AU, :U), 1, length(AUP_result_julia_upper), 1)
-                BLAS.spr!('U', α, x, AUP_result_blas_upper)
-                @test AUP_result_julia_upper ≈ vec(AUP_result_blas_upper)
             end
         end
 
@@ -365,33 +331,51 @@ Random.seed!(100)
         #will work for SymTridiagonal,Tridiagonal,Bidiagonal!
         @testset "banded matrix mv" begin
             @testset "gbmv" begin
-                TD  = Tridiagonal(rand(elty,n-1),rand(elty,n),rand(elty,n-1))
-                x   = rand(elty,n)
+                TD = Tridiagonal(rand(elty,n-1),rand(elty,n),rand(elty,n-1))
+                x  = rand(elty, n)
                 #put TD into the BLAS format!
                 fTD = zeros(elty,3,n)
                 fTD[1,2:n] = TD.du
                 fTD[2,:] = TD.d
                 fTD[3,1:n-1] = TD.dl
                 @test BLAS.gbmv('N',n,1,1,fTD,x) ≈ TD*x
+                y = rand(elty, n)
+                α = randn(elty)
+                β = randn(elty)
+                for xx in (x, view(x, n:-1:1)), yy in (y, view(y, n:-1:1))
+                    @test BLAS.gbmv!('N',n,1,1,α,fTD,xx,β,deepcopy(yy)) ≈ α * TD * xx + β * yy
+                end
             end
             #will work for SymTridiagonal only!
-            @testset "sbmv" begin
+            @testset "sbmv and hbmv" begin
+                x = rand(elty,n)
                 if elty <: BlasReal
                     ST  = SymTridiagonal(rand(elty,n),rand(elty,n-1))
-                    x   = rand(elty,n)
                     #put TD into the BLAS format!
                     fST = zeros(elty,2,n)
                     fST[1,2:n] = ST.ev
                     fST[2,:] = ST.dv
                     @test BLAS.sbmv('U',1,fST,x) ≈ ST*x
+                    y = rand(elty, n)
+                    α = randn(elty)
+                    β = randn(elty)
+                    for xx in (x, view(x, n:-1:1)), yy in (y, view(y, n:-1:1))
+                        @test BLAS.sbmv!('U',1,α,fST,xx,β,deepcopy(yy)) ≈ α * ST * xx + β * yy
+                    end
                 else
-                    dv = real(rand(elty,n))
+                    dv = rand(real(elty),n)
                     ev = rand(elty,n-1)
                     bH = zeros(elty,2,n)
                     bH[1,2:n] = ev
                     bH[2,:] = dv
                     fullH = diagm(0 => dv, -1 => conj(ev), 1 => ev)
                     @test BLAS.hbmv('U',1,bH,x) ≈ fullH*x
+                    y = rand(elty, n)
+                    α = randn(elty)
+                    β = randn(elty)
+                    for xx in (x, view(x, n:-1:1)), yy in (y, view(y, n:-1:1))
+                        @test BLAS.hbmv!('U',1,α,bH,xx,β,deepcopy(yy)) ≈ α * fullH * xx + β * yy
+                    end
                 end
             end
         end
@@ -595,8 +579,8 @@ end
         @test BLAS.iamax(x) == 2
 
         M = fill(elty(1.0), 3, 3)
-        BLAS.scal!(elty(2), view(M,:,2))
-        BLAS.scal!(elty(3), view(M,3,:))
+        @test BLAS.scal!(elty(2), view(M,:,2)) === view(M,:,2)
+        @test BLAS.scal!(elty(3), view(M,3,:)) === view(M,3,:)
         @test M == elty[1. 2. 1.; 1. 2. 1.; 3. 6. 3.]
     # Level 2
         A = WrappedArray(elty[1 2; 3 4])
@@ -688,4 +672,36 @@ end
 @test LinearAlgebra.BLAS.libblas == "libblastrampoline"
 @test LinearAlgebra.BLAS.liblapack == "libblastrampoline"
 
+@testset "test for 0-strides" for elty in (Float32, Float64, ComplexF32, ComplexF64)
+    A = randn(elty, 10, 10);
+    a = view([randn(elty)], 1 .+ 0(1:10))
+    b = view([randn(elty)], 1 .+ 0(1:10))
+    α, β = randn(elty), randn(elty)
+    @testset "dot/dotc/dotu" begin
+        if elty <: Real
+            @test BLAS.dot(a,b) ≈ sum(a.*b)
+        else
+            @test BLAS.dotc(a,b) ≈ sum(conj(a).*b)
+            @test BLAS.dotu(a,b) ≈ sum(a.*b)
+        end
+    end
+    @testset "axp(b)y!" begin
+        @test BLAS.axpy!(α,a,copy(b)) ≈ α*a + b
+        @test BLAS.axpby!(α,a,β,copy(b)) ≈ α*a + β*b
+        @test_throws "dest" BLAS.axpy!(α,a,b)
+        @test_throws "dest" BLAS.axpby!(α,a,β,b)
+    end
+    @test BLAS.iamax(a) == 0
+    @test_throws "dest" BLAS.scal!(b[1], a)
+    @testset "nrm2/asum" begin # OpenBLAS allways return 0.0
+        @test_throws "input" BLAS.nrm2(a)
+        @test_throws "input" BLAS.asum(a)
+    end
+    # All level2 reject 0-stride array.
+    @testset "gemv!" begin
+        @test_throws "input" BLAS.gemv!('N', true, A, a, false, copy(b))
+        @test_throws "dest" BLAS.gemv!('N', true, A, copy(a), false, b)
+    end
+end
+
 end # module TestBLAS
diff --git a/stdlib/LinearAlgebra/test/matmul.jl b/stdlib/LinearAlgebra/test/matmul.jl
index 1c482f8cae97a..ea73814a2848b 100644
--- a/stdlib/LinearAlgebra/test/matmul.jl
+++ b/stdlib/LinearAlgebra/test/matmul.jl
@@ -297,6 +297,15 @@ end
     end
 end
 
+@testset "matrix x vector with negative lda or 0 stride" for T in (Float32, Float64)
+    for TA in (T, complex(T)), TB in (T, complex(T))
+        A = view(randn(TA, 10, 10), 1:10, 10:-1:1) # negative lda
+        v = view([randn(TB)], 1 .+ 0(1:10)) # 0 stride
+        Ad, vd = copy(A), copy(v)
+        @test Ad * vd ≈ A * vd ≈ Ad * v ≈ A * v
+    end
+end
+
 @testset "issue #15286" begin
     A = reshape(map(Float64, 1:20), 5, 4)
     C = zeros(8, 8)
diff --git a/stdlib/libblastrampoline_jll/Project.toml b/stdlib/libblastrampoline_jll/Project.toml
index 26c67dae8dffd..8b93e368f8f59 100644
--- a/stdlib/libblastrampoline_jll/Project.toml
+++ b/stdlib/libblastrampoline_jll/Project.toml
@@ -1,6 +1,6 @@
 name = "libblastrampoline_jll"
 uuid = "8e850b90-86db-534c-a0d3-1478176c7d93"
-version = "5.0.1+0"
+version = "5.1.0+0"
 
 [deps]
 Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
diff --git a/test/cmdlineargs.jl b/test/cmdlineargs.jl
index 43d3728dcb988..8d58672d30f09 100644
--- a/test/cmdlineargs.jl
+++ b/test/cmdlineargs.jl
@@ -328,10 +328,6 @@ let exename = `$(Base.julia_cmd()) --startup-file=no --color=no`
         @test_broken occursin(expected_good, got)
 
         # Ask for coverage in specific file
-        # TODO: Figure out why asking for a specific file/dir means some lines are under-counted
-        # NOTE that a different expected reference is loaded here
-        expected = replace(read(joinpath(helperdir, "coverage_file.info.bad2"), String),
-            "<FILENAME>" => realpath(inputfile))
         tfile = realpath(inputfile)
         @test readchomp(`$exename -E "(Base.JLOptions().code_coverage, unsafe_string(Base.JLOptions().tracked_path))" -L $inputfile
             --code-coverage=$covfile --code-coverage=@$tfile`) == "(3, $(repr(tfile)))"
diff --git a/test/iterators.jl b/test/iterators.jl
index 1b2498fb1f905..0a038698ea07c 100644
--- a/test/iterators.jl
+++ b/test/iterators.jl
@@ -550,12 +550,15 @@ end
                                                          (1,1), (8,8), (11, 13),
                                                          (1,1,1), (8, 4, 2), (11, 13, 17)),
                                                 part in (1, 7, 8, 11, 63, 64, 65, 142, 143, 144)
-    P = partition(CartesianIndices(dims), part)
-    for I in P
-        @test length(I) == iterate_length(I) == simd_iterate_length(I) == simd_trip_count(I)
-        @test collect(I) == iterate_elements(I) == simd_iterate_elements(I) == index_elements(I)
+    for fun in (i -> 1:i, i -> 1:2:2i, i -> Base.IdentityUnitRange(-i:i))
+        iter = CartesianIndices(map(fun, dims))
+        P = partition(iter, part)
+        for I in P
+            @test length(I) == iterate_length(I) == simd_iterate_length(I) == simd_trip_count(I)
+            @test collect(I) == iterate_elements(I) == simd_iterate_elements(I) == index_elements(I)
+        end
+        @test all(Base.splat(==), zip(Iterators.flatten(map(collect, P)), iter))
     end
-    @test all(Base.splat(==), zip(Iterators.flatten(map(collect, P)), CartesianIndices(dims)))
 end
 @testset "empty/invalid partitions" begin
     @test_throws ArgumentError partition(1:10, 0)
diff --git a/test/numbers.jl b/test/numbers.jl
index 4875de7fc3bb2..38c7c5c9b9e13 100644
--- a/test/numbers.jl
+++ b/test/numbers.jl
@@ -2811,3 +2811,45 @@ end
     @test_throws MethodError fld(a, b)
     @test_throws MethodError cld(a, b)
 end
+
+@testset "Bool rounding (#25074)" begin
+    @testset "round Bool" begin
+        @test_throws InexactError round(Bool, -4.1)
+        @test_throws InexactError round(Bool, 1.5)
+        @test true == round(Bool, 1.0)
+        @test false == round(Bool, 0.0)
+        @test true == round(Bool, 0.6)
+        @test false == round(Bool, 0.4)
+        @test false == round(Bool, 0.5)
+        @test false == round(Bool, -0.5)
+    end
+
+    @testset "trunc Bool" begin
+        @test_throws InexactError trunc(Bool, -4.1)
+        @test_throws InexactError trunc(Bool, 2.5)
+        @test true == trunc(Bool, 1.0)
+        @test false == trunc(Bool, 0.0)
+        @test false == trunc(Bool, 0.6)
+        @test false == trunc(Bool, 0.4)
+        @test true == trunc(Bool, 1.8)
+        @test false == trunc(Bool, -0.5)
+    end
+
+    @testset "floor Bool" begin
+        @test_throws InexactError floor(Bool, -0.1)
+        @test_throws InexactError floor(Bool, 2.5)
+        @test true == floor(Bool, 1.0)
+        @test false == floor(Bool, 0.0)
+        @test false == floor(Bool, 0.6)
+        @test true == floor(Bool, 1.8)
+    end
+
+    @testset "ceil Bool" begin
+        @test_throws InexactError ceil(Bool, -1.4)
+        @test_throws InexactError ceil(Bool, 1.5)
+        @test true == ceil(Bool, 1.0)
+        @test false == ceil(Bool, 0.0)
+        @test true == ceil(Bool, 0.6)
+        @test false == ceil(Bool, -0.7)
+    end
+end
diff --git a/test/syntax.jl b/test/syntax.jl
index 2f95b9505d056..ff392d5069708 100644
--- a/test/syntax.jl
+++ b/test/syntax.jl
@@ -2514,6 +2514,7 @@ end
 end
 
 module Mod2
+import ..Mod.x as x_from_mod
 const y = 2
 end
 
@@ -2554,6 +2555,11 @@ import .Mod.@mac as @m
 @test_throws ErrorException eval(:(import .Mod.func as @notmacro))
 @test_throws ErrorException eval(:(using .Mod: @mac as notmacro))
 @test_throws ErrorException eval(:(using .Mod: func as @notmacro))
+
+import .Mod2.x_from_mod
+
+@test @isdefined(x_from_mod)
+@test x_from_mod == Mod.x
 end
 
 import .TestImportAs.Mod2 as M2