Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Investigating LLVM IR dump info #420

Closed
1e-to opened this issue Jun 2, 2021 · 1 comment
Closed

Investigating LLVM IR dump info #420

1e-to opened this issue Jun 2, 2021 · 1 comment

Comments

@1e-to
Copy link
Contributor

1e-to commented Jun 2, 2021

Search for nested function info in debug info

  • NUMBA_DUMP_OPTIMIZED - print, after lowering and optimizations
    • Contains nested function information
  • BaseLower - DIBuilder for kernel
    • For nested function only DummyDIBuilder created - because of always inline flag
    • always inline flag disabled if DEBUG enabled. True?
NUMBA_DUMP_OPTIMIZED=1 python sum.py

Output

N 10 

Scheduling on ... 

    Name            Intel(R) UHD Graphics [0x9bca] 

    Driver version  21.20.19883 

    Vendor          Intel(R) Corporation 

    Profile         FULL_PROFILE 

    Filter string   opencl:gpu:0 

Device Context: <dpctl.SyclDevice [backend_type.opencl, device_type.gpu,  Intel(R) UHD Graphics [0x9bca]] at 0x7f44b15980b0> 

before :  [0.86461914 0.68920165 0.62741727 0.9833912  0.18201347 0.1443322 

0.17510708 0.06031802 0.84365124 0.3499528 ] 

before :  [0.12562443 0.97075903 0.29140168 0.23509483 0.8285244  0.01495754 

0.5693495  0.41682884 0.17450567 0.882609  ] 

before :  [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] 

compile_kernel (array(float32, 1d, C), array(float32, 1d, C), array(float32, 1d, C)) 

================================================================================ 

-------------------------------OPTIMIZED DUMP nrt------------------------------- 

; ModuleID = 'nrt' 

source_filename = "<string>" 

target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" 

target triple = "x86_64-unknown-linux-gnu" 

  

@.const.picklebuf.139932679856208 = internal constant { i8*, i32, i8* } { i8* getelementptr inbounds ([96 x i8], [96 x i8]* @.const.pickledata.139932679856208, i32 0, i32 0), i32 96, i8* getelementptr inbounds ([20 x i8], [20 x i8]* @.const.pickledata.139932679856208.sha1, i32 0, i32 0) } 

@.const.pickledata.139932679856208 = internal constant [96 x i8] c"\80\03cbuiltins\0ARuntimeError\0Aq\00X6\00\00\00numba jitted function aborted due to unresolved symbolq\01\85q\02N\87q\03." 

@.const.pickledata.139932679856208.sha1 = internal constant [20 x i8] c"\9F\AB\D6;A\96\D9\B5\A3\87'\EC\0F\13V\15\AB\99>\DA" 

  

; Function Attrs: nofree norecurse nounwind 

define i64 @nrt_atomic_add(i64* nocapture %.1) local_unnamed_addr #0 { 

.3: 

  %.4 = atomicrmw add i64* %.1, i64 1 monotonic 

  %.5 = add i64 %.4, 1 

  ret i64 %.5 

} 

  

; Function Attrs: nofree norecurse nounwind 

define i64 @nrt_atomic_sub(i64* nocapture %.1) local_unnamed_addr #0 { 

.3: 

  %.4 = atomicrmw sub i64* %.1, i64 1 monotonic 

  %.5 = add i64 %.4, -1 

  ret i64 %.5 

} 

  

; Function Attrs: nofree norecurse nounwind 

define i32 @nrt_atomic_cas(i64* nocapture %.1, i64 %.2, i64 %.3, i64* nocapture %.4) local_unnamed_addr #0 { 

.6: 

  %.7 = cmpxchg i64* %.1, i64 %.2, i64 %.3 monotonic monotonic 

  %.8 = extractvalue { i64, i1 } %.7, 0 

  %.9 = extractvalue { i64, i1 } %.7, 1 

  store i64 %.8, i64* %.4, align 8 

  %.11 = zext i1 %.9 to i32 

  ret i32 %.11 

} 

  

; Function Attrs: norecurse nounwind readonly 

define i8* @NRT_MemInfo_data_fast(i8* nocapture readonly %.1) local_unnamed_addr #1 { 

.3: 

  %.5 = getelementptr i8, i8* %.1, i64 24 

  %0 = bitcast i8* %.5 to i8** 

  %.6 = load i8*, i8** %0, align 8 

  ret i8* %.6 

} 

  

; Function Attrs: nofree noinline norecurse nounwind 

define void @NRT_incref(i8* %.1) local_unnamed_addr #2 { 

.3: 

  %.4 = icmp eq i8* %.1, null 

  br i1 %.4, label %.3.if, label %.3.endif, !prof !0 

  

.3.if:                                            ; preds = %.3 

  ret void 

  

.3.endif:                                         ; preds = %.3 

  %.7 = bitcast i8* %.1 to i64* 

  %.4.i = atomicrmw add i64* %.7, i64 1 monotonic 

  ret void 

} 

  

; Function Attrs: noinline 

define void @NRT_decref(i8* %.1) local_unnamed_addr #3 { 

.3: 

  %.4 = icmp eq i8* %.1, null 

  br i1 %.4, label %.3.if, label %.3.endif, !prof !0 

  

.3.if:                                            ; preds = %.3.endif, %.3.endif.if, %.3 

  ret void 

  

.3.endif:                                         ; preds = %.3 

  fence release 

  %.8 = bitcast i8* %.1 to i64* 

  %.4.i = atomicrmw sub i64* %.8, i64 1 monotonic 

  %.10 = icmp eq i64 %.4.i, 1 

  br i1 %.10, label %.3.endif.if, label %.3.if, !prof !0 

  

.3.endif.if:                                      ; preds = %.3.endif 

  fence acquire 

  call void @NRT_MemInfo_call_dtor(i8* nonnull %.1) 

  br label %.3.if 

} 

  

declare void @NRT_MemInfo_call_dtor(i8*) local_unnamed_addr 

  

; Function Attrs: nofree norecurse nounwind writeonly 

define i32 @nrt_unresolved_abort(i8** nocapture readnone %.1, { i8*, i32, i8* }** nocapture %.2) local_unnamed_addr #4 { 

.4: 

  store { i8*, i32, i8* }* @.const.picklebuf.139932679856208, { i8*, i32, i8* }** %.2, align 8 

  ret i32 1, !ret_is_raise !1 

} 

  

; Function Attrs: nounwind 

declare void @llvm.stackprotector(i8*, i8**) #5 

  

attributes #0 = { nofree norecurse nounwind } 

attributes #1 = { norecurse nounwind readonly } 

attributes #2 = { nofree noinline norecurse nounwind } 

attributes #3 = { noinline } 

attributes #4 = { nofree norecurse nounwind writeonly } 

attributes #5 = { nounwind } 

  

!0 = !{!"branch_weights", i32 1, i32 99} 

!1 = !{i1 true} 

  

================================================================================ 

WE ARE HERE 

<numba.core.debuginfo.DummyDIBuilder object at 0x7f448d716690> 

WE ARE HERE 

<numba.core.debuginfo.DummyDIBuilder object at 0x7f448d6eb9d0> 

WE ARE HERE 

<numba.core.debuginfo.DummyDIBuilder object at 0x7f448d716150> 

'DPPYTargetContext' object has no attribute 'lower_extensions' 

local variable 'lower_extension_parfor' referenced before assignment 

================================================================================ 

---------------------------OPTIMIZED DUMP kernel_sum---------------------------- 

; ModuleID = 'kernel_sum' 

source_filename = "<string>" 

target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" 

target triple = "spir64-unknown-unknown" 

  

@dppy_py_devfn__2E_NumbaEnv_2E__5F__5F_main_5F__5F__2E_kernel_5F_sum_24_2_2E_float32_2E_float32 = common local_unnamed_addr global i8* null 

  

; Function Attrs: alwaysinline nofree norecurse nounwind writeonly 

define spir_func i32 @dppy_py_devfn__5F__5F_main_5F__5F__2E_kernel_5F_sum_24_2_2E_float32_2E_float32(float* nocapture %.ret, float %arg.a, float %arg.b) local_unnamed_addr #0 { 

entry: 

  %.13 = fadd float %arg.a, %arg.b 

  store float %.13, float* %.ret, align 4 

  ret i32 0 

} 

  

attributes #0 = { alwaysinline nofree norecurse nounwind writeonly } 

  

================================================================================ 

WE ARE HERE 

<numba.core.debuginfo.DIBuilder object at 0x7f448d62f410> 

WE ARE HERE 

<numba.core.debuginfo.DIBuilder object at 0x7f448d626bd0> 

WE ARE HERE 

<numba.core.debuginfo.DummyDIBuilder object at 0x7f448d62f650> 

'DPPYTargetContext' object has no attribute 'lower_extensions' 

local variable 'lower_extension_parfor' referenced before assignment 

================================================================================ 

------------------------OPTIMIZED DUMP data_parallel_sum------------------------ 

; ModuleID = 'data_parallel_sum' 

source_filename = "<string>" 

target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" 

target triple = "spir64-unknown-unknown" 

  

@dppy_py_devfn__2E_NumbaEnv_2E__5F__5F_main_5F__5F__2E_data_5F_parallel_5F_sum_24_1_2E_array_28_float32_2C__20_1d_2C__20_C_29__2E_array_28_float32_2C__20_1d_2C__20_C_29__2E_array_28_float32_2C__20_1d_2C__20_C_29_ = common local_unnamed_addr global i8* null 

@dppy_py_devfn__2E_NumbaEnv_2E__5F__5F_main_5F__5F__2E_kernel_5F_sum_24_2_2E_float32_2E_float32 = common local_unnamed_addr global i8* null 

  

; Function Attrs: noinline 

define spir_func i32 @dppy_py_devfn__5F__5F_main_5F__5F__2E_data_5F_parallel_5F_sum_24_1_2E_array_28_float32_2C__20_1d_2C__20_C_29__2E_array_28_float32_2C__20_1d_2C__20_C_29__2E_array_28_float32_2C__20_1d_2C__20_C_29_(i8** nocapture %.ret, i8* %arg.a.0, i8* %arg.a.1, i64 %arg.a.2, i64 %arg.a.3, float addrspace(4)* %arg.a.4, i64 %arg.a.5.0, i64 %arg.a.6.0, i8* %arg.b.0, i8* %arg.b.1, i64 %arg.b.2, i64 %arg.b.3, float addrspace(4)* %arg.b.4, i64 %arg.b.5.0, i64 %arg.b.6.0, i8* %arg.c.0, i8* %arg.c.1, i64 %arg.c.2, i64 %arg.c.3, float addrspace(4)* %arg.c.4, i64 %arg.c.5.0, i64 %arg.c.6.0) local_unnamed_addr #0 !dbg !4 { 

entry: 

  call void @llvm.dbg.value(metadata { i8*, i8*, i64, i64, float addrspace(4)*, [1 x i64], [1 x i64] } zeroinitializer, metadata !7, metadata !DIExpression()), !dbg !12 

  call void @llvm.dbg.value(metadata { i8*, i8*, i64, i64, float addrspace(4)*, [1 x i64], [1 x i64] } zeroinitializer, metadata !13, metadata !DIExpression()), !dbg !12 

  call void @llvm.dbg.value(metadata { i8*, i8*, i64, i64, float addrspace(4)*, [1 x i64], [1 x i64] } zeroinitializer, metadata !14, metadata !DIExpression()), !dbg !12 

  call void @llvm.dbg.value(metadata i64 0, metadata !15, metadata !DIExpression()), !dbg !12 

  %.142 = alloca float, align 4, !dbg !17 

  store float 0.000000e+00, float* %.142, align 4, !dbg !17 

  call void @llvm.dbg.value(metadata { i8*, i8*, i64, i64, float addrspace(4)*, [1 x i64], [1 x i64] } undef, metadata !7, metadata !DIExpression()), !dbg !12 

  call void @llvm.dbg.value(metadata { i8*, i8*, i64, i64, float addrspace(4)*, [1 x i64], [1 x i64] } undef, metadata !13, metadata !DIExpression()), !dbg !12 

  call void @llvm.dbg.value(metadata { i8*, i8*, i64, i64, float addrspace(4)*, [1 x i64], [1 x i64] } undef, metadata !14, metadata !DIExpression()), !dbg !12 

  %.56 = call spir_func i64 @_Z13get_global_idj(i32 0), !dbg !18 

  call void @llvm.dbg.value(metadata i64 %.56, metadata !15, metadata !DIExpression()), !dbg !12 

  %.82 = icmp slt i64 %.56, 0, !dbg !17 

  %.83 = select i1 %.82, i64 %arg.a.5.0, i64 0, !dbg !17 

  %.84 = add i64 %.83, %.56, !dbg !17 

  %.97 = getelementptr float, float addrspace(4)* %arg.a.4, i64 %.84, !dbg !17 

  %.98 = load float, float addrspace(4)* %.97, align 4, !dbg !17 

  call void @llvm.dbg.value(metadata { i8*, i8*, i64, i64, float addrspace(4)*, [1 x i64], [1 x i64] } zeroinitializer, metadata !7, metadata !DIExpression()), !dbg !12 

  %.119 = select i1 %.82, i64 %arg.b.5.0, i64 0, !dbg !17 

  %.120 = add i64 %.119, %.56, !dbg !17 

  %.133 = getelementptr float, float addrspace(4)* %arg.b.4, i64 %.120, !dbg !17 

  %.134 = load float, float addrspace(4)* %.133, align 4, !dbg !17 

  call void @llvm.dbg.value(metadata { i8*, i8*, i64, i64, float addrspace(4)*, [1 x i64], [1 x i64] } zeroinitializer, metadata !13, metadata !DIExpression()), !dbg !12 

  store float 0.000000e+00, float* %.142, align 4, !dbg !17 

  %.145 = call spir_func i32 @dppy_py_devfn__5F__5F_main_5F__5F__2E_kernel_5F_sum_24_2_2E_float32_2E_float32(float* nonnull %.142, float %.98, float %.134), !dbg !17 

  switch i32 %.145, label %B0.if [ 

    i32 -2, label %B0.endif 

    i32 0, label %B0.endif 

  ], !dbg !17 

  

B0.if:                                            ; preds = %entry 

  ret i32 %.145, !dbg !17 

  

B0.endif:                                         ; preds = %entry, %entry 

  %.153 = load float, float* %.142, align 4, !dbg !17 

  %.181 = select i1 %.82, i64 %arg.c.5.0, i64 0, !dbg !17 

  %.182 = add i64 %.181, %.56, !dbg !17 

  %.195 = getelementptr float, float addrspace(4)* %arg.c.4, i64 %.182, !dbg !17 

  store float %.153, float addrspace(4)* %.195, align 4, !dbg !17 

  call void @llvm.dbg.value(metadata i64 0, metadata !15, metadata !DIExpression()), !dbg !12 

  call void @llvm.dbg.value(metadata { i8*, i8*, i64, i64, float addrspace(4)*, [1 x i64], [1 x i64] } zeroinitializer, metadata !14, metadata !DIExpression()), !dbg !12 

  store i8* null, i8** %.ret, align 8, !dbg !17 

  ret i32 0, !dbg !17 

} 

  

declare spir_func i64 @_Z13get_global_idj(i32) local_unnamed_addr 

  

; Function Attrs: alwaysinline nofree norecurse nounwind writeonly 

define linkonce_odr spir_func i32 @dppy_py_devfn__5F__5F_main_5F__5F__2E_kernel_5F_sum_24_2_2E_float32_2E_float32(float* nocapture %.ret, float %arg.a, float %arg.b) local_unnamed_addr #1 { 

entry: 

  %.13 = fadd float %arg.a, %arg.b 

  store float %.13, float* %.ret, align 4 

  ret i32 0 

} 

  

; Function Attrs: nounwind readnone speculatable willreturn 

declare void @llvm.dbg.value(metadata, metadata, metadata) #2 

  

attributes #0 = { noinline } 

attributes #1 = { alwaysinline nofree norecurse nounwind writeonly } 

attributes #2 = { nounwind readnone speculatable willreturn } 

  

!llvm.dbg.cu = !{!0} 

!llvm.module.flags = !{!2, !3} 

  

!0 = distinct !DICompileUnit(language: DW_LANG_Python, file: !1, producer: "Numba", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug) 

!1 = !DIFile(filename: "sum.py", directory: "/localdisk/work/etotmeni/stepping") 

!2 = !{i32 2, !"Dwarf Version", i32 4} 

!3 = !{i32 2, !"Debug Info Version", i32 3} 

!4 = distinct !DISubprogram(name: "data_parallel_sum", linkageName: "dppy_py_devfn__5F__5F_main_5F__5F__2E_data_5F_parallel_5F_sum_24_1_2E_array_28_float32_2C__20_1d_2C__20_C_29__2E_array_28_float32_2C__20_1d_2C__20_C_29__2E_array_28_float32_2C__20_1d_2C__20_C_29_", scope: !1, file: !1, line: 15, type: !5, scopeLine: 15, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0) 

!5 = !DISubroutineType(types: !6) 

!6 = !{} 

!7 = !DILocalVariable(name: "a", scope: !4, file: !1, line: 17, type: !8) 

!8 = !DICompositeType(tag: DW_TAG_array_type, name: "{i8*, i8*, i64, i64, float addrspace(4)*, [1 x i64], [1 x i64]}", baseType: !9, size: 448, elements: !10, identifier: "{i8*, i8*, i64, i64, float addrspace(4)*, [1 x i64], [1 x i64]}") 

!9 = !DIBasicType(name: "byte", size: 8, encoding: DW_ATE_unsigned_char) 

!10 = !{!11} 

!11 = !DISubrange(count: 56) 

!12 = !DILocation(line: 0, scope: !4) 

!13 = !DILocalVariable(name: "b", scope: !4, file: !1, line: 17, type: !8) 

!14 = !DILocalVariable(name: "c", scope: !4, file: !1, line: 17, type: !8) 

!15 = !DILocalVariable(name: "i", scope: !4, file: !1, line: 17, type: !16) 

!16 = !DIBasicType(name: "i64", size: 64, encoding: DW_ATE_unsigned) 

!17 = !DILocation(line: 18, column: 1, scope: !4) 

!18 = !DILocation(line: 17, column: 1, scope: !4) 

  

================================================================================ 

!!!!!!!!!!!!!!              create bc opt -O1 -o /tmp/tmpqwbpeci9/0-llvm-friendly-spir.bc /tmp/tmpqwbpeci9/0-llvm-friendly-spir 

!!!!!!!!!!!!!!               ['llvm-spirv', '--spirv-debug-info-version=ocl-100', '-o', '/tmp/tmpqwbpeci9/1-generated-spirv', '/tmp/tmpqwbpeci9/0-llvm-friendly-spir.bc'] 

!!!!!!!!!!!!!!              generated-spirv /tmp/tmpqwbpeci9/1-generated-spirv ['/tmp/tmpqwbpeci9/1-generated-spirv'] 

after :  [0.99024355 1.6599607  0.91881895 1.2184861  1.0105379  0.15928975 

0.7444566  0.47714686 1.0181569  1.2325618 ] 

Done... 
@diptorupd
Copy link
Contributor

What the issue is about is not clear from the description.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants