Compiler Explorer

Source code

; ModuleID = '/app/example.ll'
source_filename = "<source>"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

; Function Attrs: nofree norecurse nosync nounwind readonly uwtable willreturn mustprogress
define dso_local i32 @_Z1fPim(i32* nocapture readonly %arr, i64 %n) local_unnamed_addr #0 !dbg !7 {
entry:
  %0 = load i32, i32* %arr, align 4, !dbg !10, !tbaa !11
  %cmp9 = icmp ugt i64 %n, 1, !dbg !15
  br i1 %cmp9, label %for.body.preheader, label %for.cond.cleanup, !dbg !16

for.body.preheader:                               ; preds = %entry
  %1 = add i64 %n, -1, !dbg !16
  %min.iters.check = icmp ult i64 %1, 8, !dbg !16
  br i1 %min.iters.check, label %for.body.preheader14, label %vector.ph, !dbg !16

vector.ph:                                        ; preds = %for.body.preheader
  %n.vec = and i64 %1, -8, !dbg !16
  %ind.end = or i64 %n.vec, 1, !dbg !16
  %minmax.ident.splatinsert = insertelement <4 x i32> poison, i32 %0, i32 0, !dbg !16
  %minmax.ident.splat = shufflevector <4 x i32> %minmax.ident.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer, !dbg !16
  %2 = add i64 %n.vec, -8, !dbg !16
  %3 = lshr exact i64 %2, 3, !dbg !16
  %4 = add nuw nsw i64 %3, 1, !dbg !16
  %xtraiter = and i64 %4, 1, !dbg !16
  %5 = icmp eq i64 %2, 0, !dbg !16
  br i1 %5, label %middle.block.unr-lcssa, label %vector.ph.new, !dbg !16

vector.ph.new:                                    ; preds = %vector.ph
  %unroll_iter = and i64 %4, 4611686018427387902, !dbg !16
  br label %vector.body, !dbg !16

vector.body:                                      ; preds = %vector.body, %vector.ph.new
  %index = phi i64 [ 0, %vector.ph.new ], [ %index.next.1, %vector.body ]
  %vec.phi = phi <4 x i32> [ %minmax.ident.splat, %vector.ph.new ], [ %20, %vector.body ]
  %vec.phi12 = phi <4 x i32> [ %minmax.ident.splat, %vector.ph.new ], [ %21, %vector.body ]
  %niter = phi i64 [ %unroll_iter, %vector.ph.new ], [ %niter.nsub.1, %vector.body ]
  %offset.idx = or i64 %index, 1
  %6 = getelementptr inbounds i32, i32* %arr, i64 %offset.idx, !dbg !17
  %7 = bitcast i32* %6 to <4 x i32>*, !dbg !17
  %wide.load = load <4 x i32>, <4 x i32>* %7, align 4, !dbg !17, !tbaa !11
  %8 = getelementptr inbounds i32, i32* %6, i64 4, !dbg !17
  %9 = bitcast i32* %8 to <4 x i32>*, !dbg !17
  %wide.load13 = load <4 x i32>, <4 x i32>* %9, align 4, !dbg !17, !tbaa !11
  %10 = icmp sgt <4 x i32> %wide.load, %vec.phi, !dbg !18
  %11 = icmp sgt <4 x i32> %wide.load13, %vec.phi12, !dbg !18
  %12 = select <4 x i1> %10, <4 x i32> %wide.load, <4 x i32> %vec.phi, !dbg !17
  %13 = select <4 x i1> %11, <4 x i32> %wide.load13, <4 x i32> %vec.phi12, !dbg !17
  %offset.idx.1 = or i64 %index, 9
  %14 = getelementptr inbounds i32, i32* %arr, i64 %offset.idx.1, !dbg !17
  %15 = bitcast i32* %14 to <4 x i32>*, !dbg !17
  %wide.load.1 = load <4 x i32>, <4 x i32>* %15, align 4, !dbg !17, !tbaa !11
  %16 = getelementptr inbounds i32, i32* %14, i64 4, !dbg !17
  %17 = bitcast i32* %16 to <4 x i32>*, !dbg !17
  %wide.load13.1 = load <4 x i32>, <4 x i32>* %17, align 4, !dbg !17, !tbaa !11
  %18 = icmp sgt <4 x i32> %wide.load.1, %12, !dbg !18
  %19 = icmp sgt <4 x i32> %wide.load13.1, %13, !dbg !18
  %20 = select <4 x i1> %18, <4 x i32> %wide.load.1, <4 x i32> %12, !dbg !17
  %21 = select <4 x i1> %19, <4 x i32> %wide.load13.1, <4 x i32> %13, !dbg !17
  %index.next.1 = add i64 %index, 16
  %niter.nsub.1 = add i64 %niter, -2
  %niter.ncmp.1 = icmp eq i64 %niter.nsub.1, 0
  br i1 %niter.ncmp.1, label %middle.block.unr-lcssa.loopexit, label %vector.body, !llvm.loop !19

middle.block.unr-lcssa.loopexit:                  ; preds = %vector.body
  %phi.bo = or i64 %index.next.1, 1
  br label %middle.block.unr-lcssa

middle.block.unr-lcssa:                           ; preds = %middle.block.unr-lcssa.loopexit, %vector.ph
  %.lcssa15.ph = phi <4 x i32> [ undef, %vector.ph ], [ %20, %middle.block.unr-lcssa.loopexit ]
  %.lcssa.ph = phi <4 x i32> [ undef, %vector.ph ], [ %21, %middle.block.unr-lcssa.loopexit ]
  %index.unr = phi i64 [ 1, %vector.ph ], [ %phi.bo, %middle.block.unr-lcssa.loopexit ]
  %vec.phi.unr = phi <4 x i32> [ %minmax.ident.splat, %vector.ph ], [ %20, %middle.block.unr-lcssa.loopexit ]
  %vec.phi12.unr = phi <4 x i32> [ %minmax.ident.splat, %vector.ph ], [ %21, %middle.block.unr-lcssa.loopexit ]
  %lcmp.mod.not = icmp eq i64 %xtraiter, 0
  br i1 %lcmp.mod.not, label %middle.block, label %vector.body.epil

vector.body.epil:                                 ; preds = %middle.block.unr-lcssa
  %22 = getelementptr inbounds i32, i32* %arr, i64 %index.unr, !dbg !17
  %23 = bitcast i32* %22 to <4 x i32>*, !dbg !17
  %wide.load.epil = load <4 x i32>, <4 x i32>* %23, align 4, !dbg !17, !tbaa !11
  %24 = getelementptr inbounds i32, i32* %22, i64 4, !dbg !17
  %25 = bitcast i32* %24 to <4 x i32>*, !dbg !17
  %wide.load13.epil = load <4 x i32>, <4 x i32>* %25, align 4, !dbg !17, !tbaa !11
  %26 = icmp sgt <4 x i32> %wide.load13.epil, %vec.phi12.unr, !dbg !18
  %27 = select <4 x i1> %26, <4 x i32> %wide.load13.epil, <4 x i32> %vec.phi12.unr, !dbg !17
  %28 = icmp sgt <4 x i32> %wide.load.epil, %vec.phi.unr, !dbg !18
  %29 = select <4 x i1> %28, <4 x i32> %wide.load.epil, <4 x i32> %vec.phi.unr, !dbg !17
  br label %middle.block, !dbg !16

middle.block:                                     ; preds = %middle.block.unr-lcssa, %vector.body.epil
  %.lcssa15 = phi <4 x i32> [ %.lcssa15.ph, %middle.block.unr-lcssa ], [ %29, %vector.body.epil ], !dbg !17
  %.lcssa = phi <4 x i32> [ %.lcssa.ph, %middle.block.unr-lcssa ], [ %27, %vector.body.epil ], !dbg !17
  %rdx.minmax.cmp = icmp sgt <4 x i32> %.lcssa15, %.lcssa, !dbg !16
  %rdx.minmax.select = select <4 x i1> %rdx.minmax.cmp, <4 x i32> %.lcssa15, <4 x i32> %.lcssa, !dbg !16
  %30 = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %rdx.minmax.select), !dbg !16
  %cmp.n = icmp eq i64 %1, %n.vec, !dbg !16
  br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader14, !dbg !16

for.body.preheader14:                             ; preds = %for.body.preheader, %middle.block
  %i.011.ph = phi i64 [ 1, %for.body.preheader ], [ %ind.end, %middle.block ]
  %max.010.ph = phi i32 [ %0, %for.body.preheader ], [ %30, %middle.block ]
  br label %for.body, !dbg !16

for.cond.cleanup:                                 ; preds = %for.body, %middle.block, %entry
  %max.0.lcssa = phi i32 [ %0, %entry ], [ %30, %middle.block ], [ %spec.select, %for.body ], !dbg !23
  ret i32 %max.0.lcssa, !dbg !24

for.body:                                         ; preds = %for.body.preheader14, %for.body
  %i.011 = phi i64 [ %inc, %for.body ], [ %i.011.ph, %for.body.preheader14 ]
  %max.010 = phi i32 [ %spec.select, %for.body ], [ %max.010.ph, %for.body.preheader14 ]
  %arrayidx1 = getelementptr inbounds i32, i32* %arr, i64 %i.011, !dbg !17
  %31 = load i32, i32* %arrayidx1, align 4, !dbg !17, !tbaa !11
  %cmp2 = icmp sgt i32 %31, %max.010, !dbg !18
  %spec.select = select i1 %cmp2, i32 %31, i32 %max.010, !dbg !17
  %inc = add nuw i64 %i.011, 1, !dbg !25
  %exitcond.not = icmp eq i64 %inc, %n, !dbg !15
  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !dbg !16, !llvm.loop !26
}

; Function Attrs: nofree nosync nounwind readnone willreturn
declare i32 @llvm.vector.reduce.smax.v4i32(<4 x i32>) #1

attributes #0 = { nofree norecurse nosync nounwind readonly uwtable willreturn mustprogress "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
attributes #1 = { nofree nosync nounwind readnone willreturn }

!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!3, !4, !5}
!llvm.ident = !{!6}

!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 13.0.0 (https://github.com/llvm/llvm-project.git 72797dedb720fae22682fc884cbf741c5a2066c2)", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2, splitDebugInlining: false, nameTableKind: None)
!1 = !DIFile(filename: "<source>", directory: "/app")
!2 = !{}
!3 = !{i32 2, !"Debug Info Version", i32 3}
!4 = !{i32 1, !"wchar_size", i32 4}
!5 = !{i32 7, !"uwtable", i32 1}
!6 = !{!"clang version 13.0.0 (https://github.com/llvm/llvm-project.git 72797dedb720fae22682fc884cbf741c5a2066c2)"}
!7 = distinct !DISubprogram(name: "f", scope: !8, file: !8, line: 3, type: !9, scopeLine: 4, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
!8 = !DIFile(filename: "example.cpp", directory: "/app")
!9 = !DISubroutineType(types: !2)
!10 = !DILocation(line: 5, column: 15, scope: !7)
!11 = !{!12, !12, i64 0}
!12 = !{!"int", !13, i64 0}
!13 = !{!"omnipotent char", !14, i64 0}
!14 = !{!"Simple C++ TBAA"}
!15 = !DILocation(line: 6, column: 26, scope: !7)
!16 = !DILocation(line: 6, column: 5, scope: !7)
!17 = !DILocation(line: 8, column: 12, scope: !7)
!18 = !DILocation(line: 8, column: 19, scope: !7)
!19 = distinct !{!19, !16, !20, !21, !22}
!20 = !DILocation(line: 12, column: 5, scope: !7)
!21 = !{!"llvm.loop.mustprogress"}
!22 = !{!"llvm.loop.isvectorized", i32 1}
!23 = !DILocation(line: 0, scope: !7)
!24 = !DILocation(line: 13, column: 5, scope: !7)
!25 = !DILocation(line: 6, column: 32, scope: !7)
!26 = distinct !{!26, !16, !20, !21, !27, !22}
!27 = !{!"llvm.loop.unroll.runtime.disable"}