diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index eb6b5068232abfc7300fa9253e1dcc3cc759c246..661c0a105f4272ebff273f90c444ddcd8d2b55fc 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -15281,6 +15281,14 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, const CallExpr *E) { + SmallVector<Value*, 4> Ops; + + for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) { + if (E->getArg(i)->getType()->isArrayType()) + Ops.push_back(EmitArrayToPointerDecay(E->getArg(i)).getPointer()); + else + Ops.push_back(EmitScalarExpr(E->getArg(i))); + } Intrinsic::ID ID = Intrinsic::not_intrinsic; @@ -15307,9 +15315,6 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, case PPC::BI__builtin_vsx_lxvl: case PPC::BI__builtin_vsx_lxvll: { - SmallVector<Value *, 2> Ops; - Ops.push_back(EmitScalarExpr(E->getArg(0))); - Ops.push_back(EmitScalarExpr(E->getArg(1))); if(BuiltinID == PPC::BI__builtin_vsx_lxvl || BuiltinID == PPC::BI__builtin_vsx_lxvll){ Ops[0] = Builder.CreateBitCast(Ops[0], Int8PtrTy); @@ -15378,10 +15383,6 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, case PPC::BI__builtin_vsx_stxvl: case PPC::BI__builtin_vsx_stxvll: { - SmallVector<Value *, 3> Ops; - Ops.push_back(EmitScalarExpr(E->getArg(0))); - Ops.push_back(EmitScalarExpr(E->getArg(1))); - Ops.push_back(EmitScalarExpr(E->getArg(2))); if(BuiltinID == PPC::BI__builtin_vsx_stxvl || BuiltinID == PPC::BI__builtin_vsx_stxvll ){ Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy); @@ -15434,15 +15435,13 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, // Essentially boils down to performing an unaligned VMX load sequence so // as to avoid crossing a page boundary and then shuffling the elements // into the right side of the vector register. - Value *Op0 = EmitScalarExpr(E->getArg(0)); - Value *Op1 = EmitScalarExpr(E->getArg(1)); - int64_t NumBytes = cast<ConstantInt>(Op1)->getZExtValue(); + int64_t NumBytes = cast<ConstantInt>(Ops[1])->getZExtValue(); llvm::Type *ResTy = ConvertType(E->getType()); bool IsLE = getTarget().isLittleEndian(); // If the user wants the entire vector, just load the entire vector. if (NumBytes == 16) { - Value *BC = Builder.CreateBitCast(Op0, ResTy->getPointerTo()); + Value *BC = Builder.CreateBitCast(Ops[0], ResTy->getPointerTo()); Value *LD = Builder.CreateLoad(Address(BC, ResTy, CharUnits::fromQuantity(1))); if (!IsLE) @@ -15460,14 +15459,16 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, : Intrinsic::ppc_altivec_lvsl); llvm::Function *Vperm = CGM.getIntrinsic(Intrinsic::ppc_altivec_vperm); Value *HiMem = Builder.CreateGEP( - Int8Ty, Op0, ConstantInt::get(Op1->getType(), NumBytes - 1)); - Value *LoLd = Builder.CreateCall(Lvx, Op0, "ld.lo"); + Int8Ty, Ops[0], ConstantInt::get(Ops[1]->getType(), NumBytes - 1)); + Value *LoLd = Builder.CreateCall(Lvx, Ops[0], "ld.lo"); Value *HiLd = Builder.CreateCall(Lvx, HiMem, "ld.hi"); - Value *Mask1 = Builder.CreateCall(Lvs, Op0, "mask1"); + Value *Mask1 = Builder.CreateCall(Lvs, Ops[0], "mask1"); - Op0 = IsLE ? HiLd : LoLd; - Op1 = IsLE ? LoLd : HiLd; - Value *AllElts = Builder.CreateCall(Vperm, {Op0, Op1, Mask1}, "shuffle1"); + Ops.clear(); + Ops.push_back(IsLE ? HiLd : LoLd); + Ops.push_back(IsLE ? LoLd : HiLd); + Ops.push_back(Mask1); + Value *AllElts = Builder.CreateCall(Vperm, Ops, "shuffle1"); Constant *Zero = llvm::Constant::getNullValue(IsLE ? ResTy : AllElts->getType()); if (IsLE) { @@ -15488,25 +15489,23 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, Builder.CreateCall(Vperm, {Zero, AllElts, Mask2}, "shuffle2"), ResTy); } case PPC::BI__builtin_vsx_strmb: { - Value *Op0 = EmitScalarExpr(E->getArg(0)); - Value *Op1 = EmitScalarExpr(E->getArg(1)); - Value *Op2 = EmitScalarExpr(E->getArg(2)); - int64_t NumBytes = cast<ConstantInt>(Op1)->getZExtValue(); + int64_t NumBytes = cast<ConstantInt>(Ops[1])->getZExtValue(); bool IsLE = getTarget().isLittleEndian(); auto StoreSubVec = [&](unsigned Width, unsigned Offset, unsigned EltNo) { // Storing the whole vector, simply store it on BE and reverse bytes and // store on LE. if (Width == 16) { - Value *BC = Builder.CreateBitCast(Op0, Op2->getType()->getPointerTo()); - Value *StVec = Op2; + Value *BC = + Builder.CreateBitCast(Ops[0], Ops[2]->getType()->getPointerTo()); + Value *StVec = Ops[2]; if (IsLE) { SmallVector<int, 16> RevMask; for (int Idx = 0; Idx < 16; Idx++) RevMask.push_back(15 - Idx); - StVec = Builder.CreateShuffleVector(Op2, Op2, RevMask); + StVec = Builder.CreateShuffleVector(Ops[2], Ops[2], RevMask); } return Builder.CreateStore( - StVec, Address(BC, Op2->getType(), CharUnits::fromQuantity(1))); + StVec, Address(BC, Ops[2]->getType(), CharUnits::fromQuantity(1))); } auto *ConvTy = Int64Ty; unsigned NumElts = 0; @@ -15531,9 +15530,9 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, break; } Value *Vec = Builder.CreateBitCast( - Op2, llvm::FixedVectorType::get(ConvTy, NumElts)); - Value *Ptr = - Builder.CreateGEP(Int8Ty, Op0, ConstantInt::get(Int64Ty, Offset)); + Ops[2], llvm::FixedVectorType::get(ConvTy, NumElts)); + Value *Ptr = Builder.CreateGEP(Int8Ty, Ops[0], + ConstantInt::get(Int64Ty, Offset)); Value *PtrBC = Builder.CreateBitCast(Ptr, ConvTy->getPointerTo()); Value *Elt = Builder.CreateExtractElement(Vec, EltNo); if (IsLE && Width > 1) { @@ -15607,20 +15606,17 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, } case PPC::BI__builtin_altivec_vec_replace_elt: case PPC::BI__builtin_altivec_vec_replace_unaligned: { - Value *Op0 = EmitScalarExpr(E->getArg(0)); - Value *Op1 = EmitScalarExpr(E->getArg(1)); - Value *Op2 = EmitScalarExpr(E->getArg(2)); // The third argument of vec_replace_elt and vec_replace_unaligned must // be a compile time constant and will be emitted either to the vinsw // or vinsd instruction. - ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2); + ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]); assert(ArgCI && "Third Arg to vinsw/vinsd intrinsic must be a constant integer!"); llvm::Type *ResultType = ConvertType(E->getType()); llvm::Function *F = nullptr; Value *Call = nullptr; int64_t ConstArg = ArgCI->getSExtValue(); - unsigned ArgWidth = Op1->getType()->getPrimitiveSizeInBits(); + unsigned ArgWidth = Ops[1]->getType()->getPrimitiveSizeInBits(); bool Is32Bit = false; assert((ArgWidth == 32 || ArgWidth == 64) && "Invalid argument width"); // The input to vec_replace_elt is an element index, not a byte index. @@ -15642,24 +15638,24 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, if (getTarget().isLittleEndian()) ConstArg = 8 - ConstArg; } - Op2 = ConstantInt::getSigned(Int32Ty, ConstArg); + Ops[2] = ConstantInt::getSigned(Int32Ty, ConstArg); // Depending on ArgWidth, the input vector could be a float or a double. // If the input vector is a float type, bitcast the inputs to integers. Or, // if the input vector is a double, bitcast the inputs to 64-bit integers. - if (!Op1->getType()->isIntegerTy(ArgWidth)) { - Op0 = Builder.CreateBitCast( - Op0, Is32Bit ? llvm::FixedVectorType::get(Int32Ty, 4) - : llvm::FixedVectorType::get(Int64Ty, 2)); - Op1 = Builder.CreateBitCast(Op1, Is32Bit ? Int32Ty : Int64Ty); + if (!Ops[1]->getType()->isIntegerTy(ArgWidth)) { + Ops[0] = Builder.CreateBitCast( + Ops[0], Is32Bit ? llvm::FixedVectorType::get(Int32Ty, 4) + : llvm::FixedVectorType::get(Int64Ty, 2)); + Ops[1] = Builder.CreateBitCast(Ops[1], Is32Bit ? Int32Ty : Int64Ty); } // Emit the call to vinsw or vinsd. - Call = Builder.CreateCall(F, {Op0, Op1, Op2}); + Call = Builder.CreateCall(F, Ops); // Depending on the builtin, bitcast to the approriate result type. if (BuiltinID == PPC::BI__builtin_altivec_vec_replace_elt && - !Op1->getType()->isIntegerTy()) + !Ops[1]->getType()->isIntegerTy()) return Builder.CreateBitCast(Call, ResultType); else if (BuiltinID == PPC::BI__builtin_altivec_vec_replace_elt && - Op1->getType()->isIntegerTy()) + Ops[1]->getType()->isIntegerTy()) return Call; else return Builder.CreateBitCast(Call, @@ -15676,15 +15672,15 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, } case PPC::BI__builtin_altivec_vadduqm: case PPC::BI__builtin_altivec_vsubuqm: { - Value *Op0 = EmitScalarExpr(E->getArg(0)); - Value *Op1 = EmitScalarExpr(E->getArg(1)); llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128); - Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int128Ty, 1)); - Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int128Ty, 1)); + Ops[0] = + Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int128Ty, 1)); + Ops[1] = + Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(Int128Ty, 1)); if (BuiltinID == PPC::BI__builtin_altivec_vadduqm) - return Builder.CreateAdd(Op0, Op1, "vadduqm"); + return Builder.CreateAdd(Ops[0], Ops[1], "vadduqm"); else - return Builder.CreateSub(Op0, Op1, "vsubuqm"); + return Builder.CreateSub(Ops[0], Ops[1], "vsubuqm"); } // Rotate and insert under mask operation. // __rldimi(rs, is, shift, mask) @@ -15693,37 +15689,29 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, // (rotl(rs, shift) & mask) | (is & ~mask) case PPC::BI__builtin_ppc_rldimi: case PPC::BI__builtin_ppc_rlwimi: { - Value *Op0 = EmitScalarExpr(E->getArg(0)); - Value *Op1 = EmitScalarExpr(E->getArg(1)); - Value *Op2 = EmitScalarExpr(E->getArg(2)); - Value *Op3 = EmitScalarExpr(E->getArg(3)); - llvm::Type *Ty = Op0->getType(); + llvm::Type *Ty = Ops[0]->getType(); Function *F = CGM.getIntrinsic(Intrinsic::fshl, Ty); if (BuiltinID == PPC::BI__builtin_ppc_rldimi) - Op2 = Builder.CreateZExt(Op2, Int64Ty); - Value *Shift = Builder.CreateCall(F, {Op0, Op0, Op2}); - Value *X = Builder.CreateAnd(Shift, Op3); - Value *Y = Builder.CreateAnd(Op1, Builder.CreateNot(Op3)); + Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty); + Value *Shift = Builder.CreateCall(F, {Ops[0], Ops[0], Ops[2]}); + Value *X = Builder.CreateAnd(Shift, Ops[3]); + Value *Y = Builder.CreateAnd(Ops[1], Builder.CreateNot(Ops[3])); return Builder.CreateOr(X, Y); } // Rotate and insert under mask operation. // __rlwnm(rs, shift, mask) // rotl(rs, shift) & mask case PPC::BI__builtin_ppc_rlwnm: { - Value *Op0 = EmitScalarExpr(E->getArg(0)); - Value *Op1 = EmitScalarExpr(E->getArg(1)); - Value *Op2 = EmitScalarExpr(E->getArg(2)); - llvm::Type *Ty = Op0->getType(); + llvm::Type *Ty = Ops[0]->getType(); Function *F = CGM.getIntrinsic(Intrinsic::fshl, Ty); - Value *Shift = Builder.CreateCall(F, {Op0, Op0, Op1}); - return Builder.CreateAnd(Shift, Op2); + Value *Shift = Builder.CreateCall(F, {Ops[0], Ops[0], Ops[1]}); + return Builder.CreateAnd(Shift, Ops[2]); } case PPC::BI__builtin_ppc_poppar4: case PPC::BI__builtin_ppc_poppar8: { - Value *Op0 = EmitScalarExpr(E->getArg(0)); - llvm::Type *ArgType = Op0->getType(); + llvm::Type *ArgType = Ops[0]->getType(); Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType); - Value *Tmp = Builder.CreateCall(F, Op0); + Value *Tmp = Builder.CreateCall(F, Ops[0]); llvm::Type *ResultType = ConvertType(E->getType()); Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1)); @@ -15733,12 +15721,10 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, return Result; } case PPC::BI__builtin_ppc_cmpb: { - Value *Op0 = EmitScalarExpr(E->getArg(0)); - Value *Op1 = EmitScalarExpr(E->getArg(1)); if (getTarget().getTriple().isPPC64()) { Function *F = CGM.getIntrinsic(Intrinsic::ppc_cmpb, {Int64Ty, Int64Ty, Int64Ty}); - return Builder.CreateCall(F, {Op0, Op1}, "cmpb"); + return Builder.CreateCall(F, Ops, "cmpb"); } // For 32 bit, emit the code as below: // %conv = trunc i64 %a to i32 @@ -15756,13 +15742,13 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, // ret i64 %or Function *F = CGM.getIntrinsic(Intrinsic::ppc_cmpb, {Int32Ty, Int32Ty, Int32Ty}); - Value *ArgOneLo = Builder.CreateTrunc(Op0, Int32Ty); - Value *ArgTwoLo = Builder.CreateTrunc(Op1, Int32Ty); + Value *ArgOneLo = Builder.CreateTrunc(Ops[0], Int32Ty); + Value *ArgTwoLo = Builder.CreateTrunc(Ops[1], Int32Ty); Constant *ShiftAmt = ConstantInt::get(Int64Ty, 32); Value *ArgOneHi = - Builder.CreateTrunc(Builder.CreateLShr(Op0, ShiftAmt), Int32Ty); + Builder.CreateTrunc(Builder.CreateLShr(Ops[0], ShiftAmt), Int32Ty); Value *ArgTwoHi = - Builder.CreateTrunc(Builder.CreateLShr(Op1, ShiftAmt), Int32Ty); + Builder.CreateTrunc(Builder.CreateLShr(Ops[1], ShiftAmt), Int32Ty); Value *ResLo = Builder.CreateZExt( Builder.CreateCall(F, {ArgOneLo, ArgTwoLo}, "cmpb"), Int64Ty); Value *ResHiShift = Builder.CreateZExt( @@ -15856,32 +15842,27 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, return FDiv; } case PPC::BI__builtin_ppc_alignx: { - Value *Op0 = EmitScalarExpr(E->getArg(0)); - Value *Op1 = EmitScalarExpr(E->getArg(1)); - ConstantInt *AlignmentCI = cast<ConstantInt>(Op0); + ConstantInt *AlignmentCI = cast<ConstantInt>(Ops[0]); if (AlignmentCI->getValue().ugt(llvm::Value::MaximumAlignment)) AlignmentCI = ConstantInt::get(AlignmentCI->getType(), llvm::Value::MaximumAlignment); - emitAlignmentAssumption(Op1, E->getArg(1), + emitAlignmentAssumption(Ops[1], E->getArg(1), /*The expr loc is sufficient.*/ SourceLocation(), AlignmentCI, nullptr); - return Op1; + return Ops[1]; } case PPC::BI__builtin_ppc_rdlam: { - Value *Op0 = EmitScalarExpr(E->getArg(0)); - Value *Op1 = EmitScalarExpr(E->getArg(1)); - Value *Op2 = EmitScalarExpr(E->getArg(2)); - llvm::Type *Ty = Op0->getType(); - Value *ShiftAmt = Builder.CreateIntCast(Op1, Ty, false); + llvm::Type *Ty = Ops[0]->getType(); + Value *ShiftAmt = Builder.CreateIntCast(Ops[1], Ty, false); Function *F = CGM.getIntrinsic(Intrinsic::fshl, Ty); - Value *Rotate = Builder.CreateCall(F, {Op0, Op0, ShiftAmt}); - return Builder.CreateAnd(Rotate, Op2); + Value *Rotate = Builder.CreateCall(F, {Ops[0], Ops[0], ShiftAmt}); + return Builder.CreateAnd(Rotate, Ops[2]); } case PPC::BI__builtin_ppc_load2r: { Function *F = CGM.getIntrinsic(Intrinsic::ppc_load2r); - Value *Op0 = Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int8PtrTy); - Value *LoadIntrinsic = Builder.CreateCall(F, {Op0}); + Ops[0] = Builder.CreateBitCast(Ops[0], Int8PtrTy); + Value *LoadIntrinsic = Builder.CreateCall(F, Ops); return Builder.CreateTrunc(LoadIntrinsic, Int16Ty); } // FMA variations @@ -15943,14 +15924,11 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, } case PPC::BI__builtin_vsx_insertword: { - Value *Op0 = EmitScalarExpr(E->getArg(0)); - Value *Op1 = EmitScalarExpr(E->getArg(1)); - Value *Op2 = EmitScalarExpr(E->getArg(2)); llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxinsertw); // Third argument is a compile time constant int. It must be clamped to // to the range [0, 12]. - ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2); + ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]); assert(ArgCI && "Third arg to xxinsertw intrinsic must be constant integer"); const int64_t MaxIndex = 12; @@ -15961,38 +15939,40 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, // word from the first argument, and inserts it in the second argument. The // instruction extracts the word from its second input register and inserts // it into its first input register, so swap the first and second arguments. - std::swap(Op0, Op1); + std::swap(Ops[0], Ops[1]); // Need to cast the second argument from a vector of unsigned int to a // vector of long long. - Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int64Ty, 2)); + Ops[1] = + Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(Int64Ty, 2)); if (getTarget().isLittleEndian()) { // Reverse the double words in the vector we will extract from. - Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int64Ty, 2)); - Op0 = Builder.CreateShuffleVector(Op0, Op0, ArrayRef<int>{1, 0}); + Ops[0] = + Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int64Ty, 2)); + Ops[0] = Builder.CreateShuffleVector(Ops[0], Ops[0], ArrayRef<int>{1, 0}); // Reverse the index. Index = MaxIndex - Index; } // Intrinsic expects the first arg to be a vector of int. - Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int32Ty, 4)); - Op2 = ConstantInt::getSigned(Int32Ty, Index); - return Builder.CreateCall(F, {Op0, Op1, Op2}); + Ops[0] = + Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int32Ty, 4)); + Ops[2] = ConstantInt::getSigned(Int32Ty, Index); + return Builder.CreateCall(F, Ops); } case PPC::BI__builtin_vsx_extractuword: { - Value *Op0 = EmitScalarExpr(E->getArg(0)); - Value *Op1 = EmitScalarExpr(E->getArg(1)); llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxextractuw); // Intrinsic expects the first argument to be a vector of doublewords. - Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int64Ty, 2)); + Ops[0] = + Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int64Ty, 2)); // The second argument is a compile time constant int that needs to // be clamped to the range [0, 12]. - ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op1); + ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[1]); assert(ArgCI && "Second Arg to xxextractuw intrinsic must be a constant integer!"); const int64_t MaxIndex = 12; @@ -16001,30 +15981,29 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, if (getTarget().isLittleEndian()) { // Reverse the index. Index = MaxIndex - Index; - Op1 = ConstantInt::getSigned(Int32Ty, Index); + Ops[1] = ConstantInt::getSigned(Int32Ty, Index); // Emit the call, then reverse the double words of the results vector. - Value *Call = Builder.CreateCall(F, {Op0, Op1}); + Value *Call = Builder.CreateCall(F, Ops); Value *ShuffleCall = Builder.CreateShuffleVector(Call, Call, ArrayRef<int>{1, 0}); return ShuffleCall; } else { - Op1 = ConstantInt::getSigned(Int32Ty, Index); - return Builder.CreateCall(F, {Op0, Op1}); + Ops[1] = ConstantInt::getSigned(Int32Ty, Index); + return Builder.CreateCall(F, Ops); } } case PPC::BI__builtin_vsx_xxpermdi: { - Value *Op0 = EmitScalarExpr(E->getArg(0)); - Value *Op1 = EmitScalarExpr(E->getArg(1)); - Value *Op2 = EmitScalarExpr(E->getArg(2)); - ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2); + ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]); assert(ArgCI && "Third arg must be constant integer!"); unsigned Index = ArgCI->getZExtValue(); - Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int64Ty, 2)); - Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int64Ty, 2)); + Ops[0] = + Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int64Ty, 2)); + Ops[1] = + Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(Int64Ty, 2)); // Account for endianness by treating this as just a shuffle. So we use the // same indices for both LE and BE in order to produce expected results in @@ -16033,21 +16012,21 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, int ElemIdx1 = 2 + (Index & 1); int ShuffleElts[2] = {ElemIdx0, ElemIdx1}; - Value *ShuffleCall = Builder.CreateShuffleVector(Op0, Op1, ShuffleElts); + Value *ShuffleCall = + Builder.CreateShuffleVector(Ops[0], Ops[1], ShuffleElts); QualType BIRetType = E->getType(); auto RetTy = ConvertType(BIRetType); return Builder.CreateBitCast(ShuffleCall, RetTy); } case PPC::BI__builtin_vsx_xxsldwi: { - Value *Op0 = EmitScalarExpr(E->getArg(0)); - Value *Op1 = EmitScalarExpr(E->getArg(1)); - Value *Op2 = EmitScalarExpr(E->getArg(2)); - ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2); + ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]); assert(ArgCI && "Third argument must be a compile time constant"); unsigned Index = ArgCI->getZExtValue() & 0x3; - Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int32Ty, 4)); - Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int32Ty, 4)); + Ops[0] = + Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int32Ty, 4)); + Ops[1] = + Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(Int32Ty, 4)); // Create a shuffle mask int ElemIdx0; @@ -16071,31 +16050,28 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, } int ShuffleElts[4] = {ElemIdx0, ElemIdx1, ElemIdx2, ElemIdx3}; - Value *ShuffleCall = Builder.CreateShuffleVector(Op0, Op1, ShuffleElts); + Value *ShuffleCall = + Builder.CreateShuffleVector(Ops[0], Ops[1], ShuffleElts); QualType BIRetType = E->getType(); auto RetTy = ConvertType(BIRetType); return Builder.CreateBitCast(ShuffleCall, RetTy); } case PPC::BI__builtin_pack_vector_int128: { - Value *Op0 = EmitScalarExpr(E->getArg(0)); - Value *Op1 = EmitScalarExpr(E->getArg(1)); bool isLittleEndian = getTarget().isLittleEndian(); Value *UndefValue = - llvm::UndefValue::get(llvm::FixedVectorType::get(Op0->getType(), 2)); + llvm::UndefValue::get(llvm::FixedVectorType::get(Ops[0]->getType(), 2)); Value *Res = Builder.CreateInsertElement( - UndefValue, Op0, (uint64_t)(isLittleEndian ? 1 : 0)); - Res = Builder.CreateInsertElement(Res, Op1, + UndefValue, Ops[0], (uint64_t)(isLittleEndian ? 1 : 0)); + Res = Builder.CreateInsertElement(Res, Ops[1], (uint64_t)(isLittleEndian ? 0 : 1)); return Builder.CreateBitCast(Res, ConvertType(E->getType())); } case PPC::BI__builtin_unpack_vector_int128: { - Value *Op0 = EmitScalarExpr(E->getArg(0)); - Value *Op1 = EmitScalarExpr(E->getArg(1)); - ConstantInt *Index = cast<ConstantInt>(Op1); + ConstantInt *Index = cast<ConstantInt>(Ops[1]); Value *Unpacked = Builder.CreateBitCast( - Op0, llvm::FixedVectorType::get(ConvertType(E->getType()), 2)); + Ops[0], llvm::FixedVectorType::get(ConvertType(E->getType()), 2)); if (getTarget().isLittleEndian()) Index = ConstantInt::get(Index->getType(), 1 - Index->getZExtValue()); @@ -16105,9 +16081,9 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, case PPC::BI__builtin_ppc_sthcx: { llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_sthcx); - Value *Op0 = Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int8PtrTy); - Value *Op1 = Builder.CreateSExt(EmitScalarExpr(E->getArg(1)), Int32Ty); - return Builder.CreateCall(F, {Op0, Op1}); + Ops[0] = Builder.CreateBitCast(Ops[0], Int8PtrTy); + Ops[1] = Builder.CreateSExt(Ops[1], Int32Ty); + return Builder.CreateCall(F, Ops); } // The PPC MMA builtins take a pointer to a __vector_quad as an argument. @@ -16120,12 +16096,6 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, case PPC::BI__builtin_##Name: #include "clang/Basic/BuiltinsPPC.def" { - SmallVector<Value *, 4> Ops; - for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) - if (E->getArg(i)->getType()->isArrayType()) - Ops.push_back(EmitArrayToPointerDecay(E->getArg(i)).getPointer()); - else - Ops.push_back(EmitScalarExpr(E->getArg(i))); // The first argument of these two builtins is a pointer used to store their // result. However, the llvm intrinsics return their result in multiple // return values. So, here we emit code extracting these values from the @@ -16210,9 +16180,8 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, QualType AtomicTy = E->getArg(0)->getType()->getPointeeType(); LValue LV = MakeAddrLValue(Addr, AtomicTy); auto Pair = EmitAtomicCompareExchange( - LV, RValue::get(OldVal), RValue::get(EmitScalarExpr(E->getArg(2))), - E->getExprLoc(), llvm::AtomicOrdering::Monotonic, - llvm::AtomicOrdering::Monotonic, true); + LV, RValue::get(OldVal), RValue::get(Ops[2]), E->getExprLoc(), + llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Monotonic, true); // Unlike c11's atomic_compare_exchange, accroding to // https://www.ibm.com/docs/en/xl-c-and-cpp-aix/16.1?topic=functions-compare-swap-compare-swaplp // > In either case, the contents of the memory location specified by addr @@ -16255,37 +16224,34 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, ? Int32Ty : Int64Ty; Function *F = CGM.getIntrinsic(Intrinsic::ppc_mfspr, RetType); - return Builder.CreateCall(F, {EmitScalarExpr(E->getArg(0))}); + return Builder.CreateCall(F, Ops); } case PPC::BI__builtin_ppc_mtspr: { llvm::Type *RetType = CGM.getDataLayout().getTypeSizeInBits(VoidPtrTy) == 32 ? Int32Ty : Int64Ty; Function *F = CGM.getIntrinsic(Intrinsic::ppc_mtspr, RetType); - return Builder.CreateCall( - F, {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1))}); + return Builder.CreateCall(F, Ops); } case PPC::BI__builtin_ppc_popcntb: { Value *ArgValue = EmitScalarExpr(E->getArg(0)); llvm::Type *ArgType = ArgValue->getType(); Function *F = CGM.getIntrinsic(Intrinsic::ppc_popcntb, {ArgType, ArgType}); - return Builder.CreateCall(F, {ArgValue}, "popcntb"); + return Builder.CreateCall(F, Ops, "popcntb"); } case PPC::BI__builtin_ppc_mtfsf: { // The builtin takes a uint32 that needs to be cast to an // f64 to be passed to the intrinsic. - Value *Cast = Builder.CreateUIToFP(EmitScalarExpr(E->getArg(1)), DoubleTy); + Value *Cast = Builder.CreateUIToFP(Ops[1], DoubleTy); llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_mtfsf); - return Builder.CreateCall(F, {EmitScalarExpr(E->getArg(0)), Cast}, ""); + return Builder.CreateCall(F, {Ops[0], Cast}, ""); } case PPC::BI__builtin_ppc_swdiv_nochk: case PPC::BI__builtin_ppc_swdivs_nochk: { FastMathFlags FMF = Builder.getFastMathFlags(); Builder.getFastMathFlags().setFast(); - Value *FDiv = - Builder.CreateFDiv(EmitScalarExpr(E->getArg(0)), - EmitScalarExpr(E->getArg(1)), "swdiv_nochk"); + Value *FDiv = Builder.CreateFDiv(Ops[0], Ops[1], "swdiv_nochk"); Builder.getFastMathFlags() &= (FMF); return FDiv; } @@ -16325,9 +16291,7 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, Intrinsic::experimental_constrained_sqrt)) .getScalarVal(); case PPC::BI__builtin_ppc_test_data_class: { - Value *Op0 = EmitScalarExpr(E->getArg(0)); - Value *Op1 = EmitScalarExpr(E->getArg(1)); - llvm::Type *ArgType = Op0->getType(); + llvm::Type *ArgType = EmitScalarExpr(E->getArg(0))->getType(); unsigned IntrinsicID; if (ArgType->isDoubleTy()) IntrinsicID = Intrinsic::ppc_test_data_class_d; @@ -16335,43 +16299,24 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, IntrinsicID = Intrinsic::ppc_test_data_class_f; else llvm_unreachable("Invalid Argument Type"); - return Builder.CreateCall(CGM.getIntrinsic(IntrinsicID), {Op0, Op1}, + return Builder.CreateCall(CGM.getIntrinsic(IntrinsicID), Ops, "test_data_class"); } case PPC::BI__builtin_ppc_maxfe: - return Builder.CreateCall( - CGM.getIntrinsic(Intrinsic::ppc_maxfe), - {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)), - EmitScalarExpr(E->getArg(2)), EmitScalarExpr(E->getArg(3))}); + return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_maxfe), Ops); case PPC::BI__builtin_ppc_maxfl: - return Builder.CreateCall( - CGM.getIntrinsic(Intrinsic::ppc_maxfl), - {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)), - EmitScalarExpr(E->getArg(2)), EmitScalarExpr(E->getArg(3))}); + return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_maxfl), Ops); case PPC::BI__builtin_ppc_maxfs: - return Builder.CreateCall( - CGM.getIntrinsic(Intrinsic::ppc_maxfs), - {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)), - EmitScalarExpr(E->getArg(2)), EmitScalarExpr(E->getArg(3))}); + return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_maxfs), Ops); case PPC::BI__builtin_ppc_minfe: - return Builder.CreateCall( - CGM.getIntrinsic(Intrinsic::ppc_minfe), - {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)), - EmitScalarExpr(E->getArg(2)), EmitScalarExpr(E->getArg(3))}); + return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_minfe), Ops); case PPC::BI__builtin_ppc_minfl: - return Builder.CreateCall( - CGM.getIntrinsic(Intrinsic::ppc_minfl), - {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)), - EmitScalarExpr(E->getArg(2)), EmitScalarExpr(E->getArg(3))}); + return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_minfl), Ops); case PPC::BI__builtin_ppc_minfs: - return Builder.CreateCall( - CGM.getIntrinsic(Intrinsic::ppc_minfs), - {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)), - EmitScalarExpr(E->getArg(2)), EmitScalarExpr(E->getArg(3))}); + return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_minfs), Ops); case PPC::BI__builtin_ppc_swdiv: case PPC::BI__builtin_ppc_swdivs: - return Builder.CreateFDiv(EmitScalarExpr(E->getArg(0)), - EmitScalarExpr(E->getArg(1)), "swdiv"); + return Builder.CreateFDiv(Ops[0], Ops[1], "swdiv"); } } diff --git a/clang/test/CodeGen/PowerPC/builtins-ppc-fastmath.c b/clang/test/CodeGen/PowerPC/builtins-ppc-fastmath.c index c2fa0c5f73ce8b5a82e0ed2fa8cd541699e43ae8..1dc0f43cf4dd48b9e0aa3b6274593adbaf25b4cd 100644 --- a/clang/test/CodeGen/PowerPC/builtins-ppc-fastmath.c +++ b/clang/test/CodeGen/PowerPC/builtins-ppc-fastmath.c @@ -18,9 +18,11 @@ extern vector double f; // CHECK-LABEL: @test_flags_recipdivf( // CHECK: [[TMP0:%.*]] = load <4 x float>, <4 x float>* @a, align 16 // CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* @b, align 16 -// CHECK-NEXT: [[RECIPDIV:%.*]] = fdiv fast <4 x float> [[TMP0]], [[TMP1]] -// CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* @c, align 16 -// CHECK-NEXT: [[ADD:%.*]] = fadd <4 x float> [[RECIPDIV]], [[TMP2]] +// CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* @a, align 16 +// CHECK-NEXT: [[TMP3:%.*]] = load <4 x float>, <4 x float>* @b, align 16 +// CHECK-NEXT: [[RECIPDIV:%.*]] = fdiv fast <4 x float> [[TMP2]], [[TMP3]] +// CHECK-NEXT: [[TMP4:%.*]] = load <4 x float>, <4 x float>* @c, align 16 +// CHECK-NEXT: [[ADD:%.*]] = fadd <4 x float> [[RECIPDIV]], [[TMP4]] // CHECK-NEXT: ret <4 x float> [[ADD]] // vector float test_flags_recipdivf() { @@ -30,9 +32,11 @@ vector float test_flags_recipdivf() { // CHECK-LABEL: @test_flags_recipdivd( // CHECK: [[TMP0:%.*]] = load <2 x double>, <2 x double>* @d, align 16 // CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* @e, align 16 -// CHECK-NEXT: [[RECIPDIV:%.*]] = fdiv fast <2 x double> [[TMP0]], [[TMP1]] -// CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, <2 x double>* @f, align 16 -// CHECK-NEXT: [[ADD:%.*]] = fadd <2 x double> [[RECIPDIV]], [[TMP2]] +// CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, <2 x double>* @d, align 16 +// CHECK-NEXT: [[TMP3:%.*]] = load <2 x double>, <2 x double>* @e, align 16 +// CHECK-NEXT: [[RECIPDIV:%.*]] = fdiv fast <2 x double> [[TMP2]], [[TMP3]] +// CHECK-NEXT: [[TMP4:%.*]] = load <2 x double>, <2 x double>* @f, align 16 +// CHECK-NEXT: [[ADD:%.*]] = fadd <2 x double> [[RECIPDIV]], [[TMP4]] // CHECK-NEXT: ret <2 x double> [[ADD]] // vector double test_flags_recipdivd() { @@ -41,10 +45,11 @@ vector double test_flags_recipdivd() { // CHECK-LABEL: @test_flags_rsqrtf( // CHECK: [[TMP0:%.*]] = load <4 x float>, <4 x float>* @a, align 16 -// CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP0]]) -// CHECK-NEXT: [[RSQRT:%.*]] = fdiv fast <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, [[TMP1]] -// CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* @b, align 16 -// CHECK-NEXT: [[ADD:%.*]] = fadd <4 x float> [[RSQRT]], [[TMP2]] +// CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* @a, align 16 +// CHECK-NEXT: [[TMP2:%.*]] = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP1]]) +// CHECK-NEXT: [[RSQRT:%.*]] = fdiv fast <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, [[TMP2]] +// CHECK-NEXT: [[TMP3:%.*]] = load <4 x float>, <4 x float>* @b, align 16 +// CHECK-NEXT: [[ADD:%.*]] = fadd <4 x float> [[RSQRT]], [[TMP3]] // CHECK-NEXT: ret <4 x float> [[ADD]] // vector float test_flags_rsqrtf() { @@ -53,10 +58,11 @@ vector float test_flags_rsqrtf() { // CHECK-LABEL: @test_flags_rsqrtd( // CHECK: [[TMP0:%.*]] = load <2 x double>, <2 x double>* @d, align 16 -// CHECK-NEXT: [[TMP1:%.*]] = call fast <2 x double> @llvm.sqrt.v2f64(<2 x double> [[TMP0]]) -// CHECK-NEXT: [[RSQRT:%.*]] = fdiv fast <2 x double> <double 1.000000e+00, double 1.000000e+00>, [[TMP1]] -// CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, <2 x double>* @e, align 16 -// CHECK-NEXT: [[ADD:%.*]] = fadd <2 x double> [[RSQRT]], [[TMP2]] +// CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* @d, align 16 +// CHECK-NEXT: [[TMP2:%.*]] = call fast <2 x double> @llvm.sqrt.v2f64(<2 x double> [[TMP1]]) +// CHECK-NEXT: [[RSQRT:%.*]] = fdiv fast <2 x double> <double 1.000000e+00, double 1.000000e+00>, [[TMP2]] +// CHECK-NEXT: [[TMP3:%.*]] = load <2 x double>, <2 x double>* @e, align 16 +// CHECK-NEXT: [[ADD:%.*]] = fadd <2 x double> [[RSQRT]], [[TMP3]] // CHECK-NEXT: ret <2 x double> [[ADD]] // vector double test_flags_rsqrtd() { diff --git a/clang/test/CodeGen/PowerPC/builtins-ppc-pair-mma-types.c b/clang/test/CodeGen/PowerPC/builtins-ppc-pair-mma-types.c deleted file mode 100644 index c9b09bd87e127deaa5be5685549c450675d95ac4..0000000000000000000000000000000000000000 --- a/clang/test/CodeGen/PowerPC/builtins-ppc-pair-mma-types.c +++ /dev/null @@ -1,259 +0,0 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// RUN: %clang_cc1 -triple powerpc64le-unknown-unknown -target-cpu pwr10 \ -// RUN: -emit-llvm %s -o - | FileCheck %s -// RUN: %clang_cc1 -triple powerpc64-unknown-unknown -target-cpu pwr10 \ -// RUN: -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK-BE - -// CHECK-LABEL: @testVQLocal( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK-NEXT: [[VC_ADDR:%.*]] = alloca <16 x i8>, align 16 -// CHECK-NEXT: [[VQP:%.*]] = alloca <512 x i1>*, align 8 -// CHECK-NEXT: [[VQ1:%.*]] = alloca <512 x i1>, align 64 -// CHECK-NEXT: [[VQ2:%.*]] = alloca <512 x i1>, align 64 -// CHECK-NEXT: [[VQ3:%.*]] = alloca <512 x i1>, align 64 -// CHECK-NEXT: store i32* [[PTR:%.*]], i32** [[PTR_ADDR]], align 8 -// CHECK-NEXT: store <16 x i8> [[VC:%.*]], <16 x i8>* [[VC_ADDR]], align 16 -// CHECK-NEXT: [[TMP0:%.*]] = load i32*, i32** [[PTR_ADDR]], align 8 -// CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <512 x i1>* -// CHECK-NEXT: store <512 x i1>* [[TMP1]], <512 x i1>** [[VQP]], align 8 -// CHECK-NEXT: [[TMP2:%.*]] = load <512 x i1>*, <512 x i1>** [[VQP]], align 8 -// CHECK-NEXT: [[TMP3:%.*]] = load <512 x i1>, <512 x i1>* [[TMP2]], align 64 -// CHECK-NEXT: store <512 x i1> [[TMP3]], <512 x i1>* [[VQ1]], align 64 -// CHECK-NEXT: [[TMP4:%.*]] = call <512 x i1> @llvm.ppc.mma.xxsetaccz() -// CHECK-NEXT: store <512 x i1> [[TMP4]], <512 x i1>* [[VQ2]], align 64 -// CHECK-NEXT: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[VC_ADDR]], align 16 -// CHECK-NEXT: [[TMP6:%.*]] = load <16 x i8>, <16 x i8>* [[VC_ADDR]], align 16 -// CHECK-NEXT: [[TMP7:%.*]] = call <512 x i1> @llvm.ppc.mma.xvi4ger8(<16 x i8> [[TMP5]], <16 x i8> [[TMP6]]) -// CHECK-NEXT: store <512 x i1> [[TMP7]], <512 x i1>* [[VQ3]], align 64 -// CHECK-NEXT: [[TMP8:%.*]] = load <512 x i1>, <512 x i1>* [[VQ3]], align 64 -// CHECK-NEXT: [[TMP9:%.*]] = load <512 x i1>*, <512 x i1>** [[VQP]], align 8 -// CHECK-NEXT: store <512 x i1> [[TMP8]], <512 x i1>* [[TMP9]], align 64 -// CHECK-NEXT: ret void -// -// CHECK-BE-LABEL: @testVQLocal( -// CHECK-BE-NEXT: entry: -// CHECK-BE-NEXT: [[PTR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK-BE-NEXT: [[VC_ADDR:%.*]] = alloca <16 x i8>, align 16 -// CHECK-BE-NEXT: [[VQP:%.*]] = alloca <512 x i1>*, align 8 -// CHECK-BE-NEXT: [[VQ1:%.*]] = alloca <512 x i1>, align 64 -// CHECK-BE-NEXT: [[VQ2:%.*]] = alloca <512 x i1>, align 64 -// CHECK-BE-NEXT: [[VQ3:%.*]] = alloca <512 x i1>, align 64 -// CHECK-BE-NEXT: store i32* [[PTR:%.*]], i32** [[PTR_ADDR]], align 8 -// CHECK-BE-NEXT: store <16 x i8> [[VC:%.*]], <16 x i8>* [[VC_ADDR]], align 16 -// CHECK-BE-NEXT: [[TMP0:%.*]] = load i32*, i32** [[PTR_ADDR]], align 8 -// CHECK-BE-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <512 x i1>* -// CHECK-BE-NEXT: store <512 x i1>* [[TMP1]], <512 x i1>** [[VQP]], align 8 -// CHECK-BE-NEXT: [[TMP2:%.*]] = load <512 x i1>*, <512 x i1>** [[VQP]], align 8 -// CHECK-BE-NEXT: [[TMP3:%.*]] = load <512 x i1>, <512 x i1>* [[TMP2]], align 64 -// CHECK-BE-NEXT: store <512 x i1> [[TMP3]], <512 x i1>* [[VQ1]], align 64 -// CHECK-BE-NEXT: [[TMP4:%.*]] = call <512 x i1> @llvm.ppc.mma.xxsetaccz() -// CHECK-BE-NEXT: store <512 x i1> [[TMP4]], <512 x i1>* [[VQ2]], align 64 -// CHECK-BE-NEXT: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[VC_ADDR]], align 16 -// CHECK-BE-NEXT: [[TMP6:%.*]] = load <16 x i8>, <16 x i8>* [[VC_ADDR]], align 16 -// CHECK-BE-NEXT: [[TMP7:%.*]] = call <512 x i1> @llvm.ppc.mma.xvi4ger8(<16 x i8> [[TMP5]], <16 x i8> [[TMP6]]) -// CHECK-BE-NEXT: store <512 x i1> [[TMP7]], <512 x i1>* [[VQ3]], align 64 -// CHECK-BE-NEXT: [[TMP8:%.*]] = load <512 x i1>, <512 x i1>* [[VQ3]], align 64 -// CHECK-BE-NEXT: [[TMP9:%.*]] = load <512 x i1>*, <512 x i1>** [[VQP]], align 8 -// CHECK-BE-NEXT: store <512 x i1> [[TMP8]], <512 x i1>* [[TMP9]], align 64 -// CHECK-BE-NEXT: ret void -// -void testVQLocal(int *ptr, vector unsigned char vc) { - __vector_quad *vqp = (__vector_quad *)ptr; - __vector_quad vq1 = *vqp; - __vector_quad vq2; - __builtin_mma_xxsetaccz(&vq2); - __vector_quad vq3; - __builtin_mma_xvi4ger8(&vq3, vc, vc); - *vqp = vq3; -} - -// CHECK-LABEL: @testVPLocal( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK-NEXT: [[VC_ADDR:%.*]] = alloca <16 x i8>, align 16 -// CHECK-NEXT: [[VPP:%.*]] = alloca <256 x i1>*, align 8 -// CHECK-NEXT: [[VP1:%.*]] = alloca <256 x i1>, align 32 -// CHECK-NEXT: [[VP2:%.*]] = alloca <256 x i1>, align 32 -// CHECK-NEXT: [[VP3:%.*]] = alloca <256 x i1>, align 32 -// CHECK-NEXT: [[VQ:%.*]] = alloca <512 x i1>, align 64 -// CHECK-NEXT: store i32* [[PTR:%.*]], i32** [[PTR_ADDR]], align 8 -// CHECK-NEXT: store <16 x i8> [[VC:%.*]], <16 x i8>* [[VC_ADDR]], align 16 -// CHECK-NEXT: [[TMP0:%.*]] = load i32*, i32** [[PTR_ADDR]], align 8 -// CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <256 x i1>* -// CHECK-NEXT: store <256 x i1>* [[TMP1]], <256 x i1>** [[VPP]], align 8 -// CHECK-NEXT: [[TMP2:%.*]] = load <256 x i1>*, <256 x i1>** [[VPP]], align 8 -// CHECK-NEXT: [[TMP3:%.*]] = load <256 x i1>, <256 x i1>* [[TMP2]], align 32 -// CHECK-NEXT: store <256 x i1> [[TMP3]], <256 x i1>* [[VP1]], align 32 -// CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[VC_ADDR]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[VC_ADDR]], align 16 -// CHECK-NEXT: [[TMP6:%.*]] = call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> [[TMP4]], <16 x i8> [[TMP5]]) -// CHECK-NEXT: store <256 x i1> [[TMP6]], <256 x i1>* [[VP2]], align 64 -// CHECK-NEXT: [[TMP7:%.*]] = load <16 x i8>, <16 x i8>* [[VC_ADDR]], align 16 -// CHECK-NEXT: [[TMP8:%.*]] = load <16 x i8>, <16 x i8>* [[VC_ADDR]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> [[TMP8]], <16 x i8> [[TMP7]]) -// CHECK-NEXT: store <256 x i1> [[TMP9]], <256 x i1>* [[VP2]], align 64 -// CHECK-NEXT: [[TMP10:%.*]] = load <256 x i1>, <256 x i1>* [[VP3]], align 32 -// CHECK-NEXT: [[TMP11:%.*]] = load <16 x i8>, <16 x i8>* [[VC_ADDR]], align 16 -// CHECK-NEXT: [[TMP12:%.*]] = call <512 x i1> @llvm.ppc.mma.xvf64ger(<256 x i1> [[TMP10]], <16 x i8> [[TMP11]]) -// CHECK-NEXT: store <512 x i1> [[TMP12]], <512 x i1>* [[VQ]], align 64 -// CHECK-NEXT: [[TMP13:%.*]] = load <256 x i1>, <256 x i1>* [[VP3]], align 32 -// CHECK-NEXT: [[TMP14:%.*]] = load <256 x i1>*, <256 x i1>** [[VPP]], align 8 -// CHECK-NEXT: store <256 x i1> [[TMP13]], <256 x i1>* [[TMP14]], align 32 -// CHECK-NEXT: ret void -// -// CHECK-BE-LABEL: @testVPLocal( -// CHECK-BE-NEXT: entry: -// CHECK-BE-NEXT: [[PTR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK-BE-NEXT: [[VC_ADDR:%.*]] = alloca <16 x i8>, align 16 -// CHECK-BE-NEXT: [[VPP:%.*]] = alloca <256 x i1>*, align 8 -// CHECK-BE-NEXT: [[VP1:%.*]] = alloca <256 x i1>, align 32 -// CHECK-BE-NEXT: [[VP2:%.*]] = alloca <256 x i1>, align 32 -// CHECK-BE-NEXT: [[VP3:%.*]] = alloca <256 x i1>, align 32 -// CHECK-BE-NEXT: [[VQ:%.*]] = alloca <512 x i1>, align 64 -// CHECK-BE-NEXT: store i32* [[PTR:%.*]], i32** [[PTR_ADDR]], align 8 -// CHECK-BE-NEXT: store <16 x i8> [[VC:%.*]], <16 x i8>* [[VC_ADDR]], align 16 -// CHECK-BE-NEXT: [[TMP0:%.*]] = load i32*, i32** [[PTR_ADDR]], align 8 -// CHECK-BE-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <256 x i1>* -// CHECK-BE-NEXT: store <256 x i1>* [[TMP1]], <256 x i1>** [[VPP]], align 8 -// CHECK-BE-NEXT: [[TMP2:%.*]] = load <256 x i1>*, <256 x i1>** [[VPP]], align 8 -// CHECK-BE-NEXT: [[TMP3:%.*]] = load <256 x i1>, <256 x i1>* [[TMP2]], align 32 -// CHECK-BE-NEXT: store <256 x i1> [[TMP3]], <256 x i1>* [[VP1]], align 32 -// CHECK-BE-NEXT: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[VC_ADDR]], align 16 -// CHECK-BE-NEXT: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[VC_ADDR]], align 16 -// CHECK-BE-NEXT: [[TMP6:%.*]] = call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> [[TMP4]], <16 x i8> [[TMP5]]) -// CHECK-BE-NEXT: store <256 x i1> [[TMP6]], <256 x i1>* [[VP2]], align 64 -// CHECK-BE-NEXT: [[TMP7:%.*]] = load <16 x i8>, <16 x i8>* [[VC_ADDR]], align 16 -// CHECK-BE-NEXT: [[TMP8:%.*]] = load <16 x i8>, <16 x i8>* [[VC_ADDR]], align 16 -// CHECK-BE-NEXT: [[TMP9:%.*]] = call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> [[TMP7]], <16 x i8> [[TMP8]]) -// CHECK-BE-NEXT: store <256 x i1> [[TMP9]], <256 x i1>* [[VP2]], align 64 -// CHECK-BE-NEXT: [[TMP10:%.*]] = load <256 x i1>, <256 x i1>* [[VP3]], align 32 -// CHECK-BE-NEXT: [[TMP11:%.*]] = load <16 x i8>, <16 x i8>* [[VC_ADDR]], align 16 -// CHECK-BE-NEXT: [[TMP12:%.*]] = call <512 x i1> @llvm.ppc.mma.xvf64ger(<256 x i1> [[TMP10]], <16 x i8> [[TMP11]]) -// CHECK-BE-NEXT: store <512 x i1> [[TMP12]], <512 x i1>* [[VQ]], align 64 -// CHECK-BE-NEXT: [[TMP13:%.*]] = load <256 x i1>, <256 x i1>* [[VP3]], align 32 -// CHECK-BE-NEXT: [[TMP14:%.*]] = load <256 x i1>*, <256 x i1>** [[VPP]], align 8 -// CHECK-BE-NEXT: store <256 x i1> [[TMP13]], <256 x i1>* [[TMP14]], align 32 -// CHECK-BE-NEXT: ret void -// -void testVPLocal(int *ptr, vector unsigned char vc) { - __vector_pair *vpp = (__vector_pair *)ptr; - __vector_pair vp1 = *vpp; - __vector_pair vp2; - __builtin_vsx_assemble_pair(&vp2, vc, vc); - __builtin_vsx_build_pair(&vp2, vc, vc); - __vector_pair vp3; - __vector_quad vq; - __builtin_mma_xvf64ger(&vq, vp3, vc); - *vpp = vp3; -} - -// CHECK-LABEL: @testRestrictQualifiedPointer2( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[ACC_ADDR:%.*]] = alloca <512 x i1>*, align 8 -// CHECK-NEXT: [[ARR:%.*]] = alloca [4 x <4 x float>], align 16 -// CHECK-NEXT: store <512 x i1>* [[ACC:%.*]], <512 x i1>** [[ACC_ADDR]], align 8 -// CHECK-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[ARR]], i64 0, i64 0 -// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>*, <512 x i1>** [[ACC_ADDR]], align 8 -// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>*, <512 x i1>** [[ACC_ADDR]], align 8 -// CHECK-NEXT: [[TMP2:%.*]] = load <512 x i1>, <512 x i1>* [[TMP1]], align 64 -// CHECK-NEXT: [[TMP3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1> [[TMP2]]) -// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x float>* [[ARRAYDECAY]] to <16 x i8>* -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP4]], i32 0 -// CHECK-NEXT: store <16 x i8> [[TMP5]], <16 x i8>* [[TMP6]], align 16 -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP4]], i32 1 -// CHECK-NEXT: store <16 x i8> [[TMP7]], <16 x i8>* [[TMP8]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP4]], i32 2 -// CHECK-NEXT: store <16 x i8> [[TMP9]], <16 x i8>* [[TMP10]], align 16 -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP4]], i32 3 -// CHECK-NEXT: store <16 x i8> [[TMP11]], <16 x i8>* [[TMP12]], align 16 -// CHECK-NEXT: ret void -// -// CHECK-BE-LABEL: @testRestrictQualifiedPointer2( -// CHECK-BE-NEXT: entry: -// CHECK-BE-NEXT: [[ACC_ADDR:%.*]] = alloca <512 x i1>*, align 8 -// CHECK-BE-NEXT: [[ARR:%.*]] = alloca [4 x <4 x float>], align 16 -// CHECK-BE-NEXT: store <512 x i1>* [[ACC:%.*]], <512 x i1>** [[ACC_ADDR]], align 8 -// CHECK-BE-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[ARR]], i64 0, i64 0 -// CHECK-BE-NEXT: [[TMP0:%.*]] = load <512 x i1>*, <512 x i1>** [[ACC_ADDR]], align 8 -// CHECK-BE-NEXT: [[TMP1:%.*]] = load <512 x i1>*, <512 x i1>** [[ACC_ADDR]], align 8 -// CHECK-BE-NEXT: [[TMP2:%.*]] = load <512 x i1>, <512 x i1>* [[TMP1]], align 64 -// CHECK-BE-NEXT: [[TMP3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1> [[TMP2]]) -// CHECK-BE-NEXT: [[TMP4:%.*]] = bitcast <4 x float>* [[ARRAYDECAY]] to <16 x i8>* -// CHECK-BE-NEXT: [[TMP5:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 0 -// CHECK-BE-NEXT: [[TMP6:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP4]], i32 0 -// CHECK-BE-NEXT: store <16 x i8> [[TMP5]], <16 x i8>* [[TMP6]], align 16 -// CHECK-BE-NEXT: [[TMP7:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 1 -// CHECK-BE-NEXT: [[TMP8:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP4]], i32 1 -// CHECK-BE-NEXT: store <16 x i8> [[TMP7]], <16 x i8>* [[TMP8]], align 16 -// CHECK-BE-NEXT: [[TMP9:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 2 -// CHECK-BE-NEXT: [[TMP10:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP4]], i32 2 -// CHECK-BE-NEXT: store <16 x i8> [[TMP9]], <16 x i8>* [[TMP10]], align 16 -// CHECK-BE-NEXT: [[TMP11:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 3 -// CHECK-BE-NEXT: [[TMP12:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP4]], i32 3 -// CHECK-BE-NEXT: store <16 x i8> [[TMP11]], <16 x i8>* [[TMP12]], align 16 -// CHECK-BE-NEXT: ret void -// -void testRestrictQualifiedPointer2(__vector_quad *__restrict acc) { - vector float arr[4]; - __builtin_mma_disassemble_acc(arr, acc); -} - -// CHECK-LABEL: @testVolatileQualifiedPointer2( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[ACC_ADDR:%.*]] = alloca <512 x i1>*, align 8 -// CHECK-NEXT: [[ARR:%.*]] = alloca [4 x <4 x float>], align 16 -// CHECK-NEXT: store volatile <512 x i1>* [[ACC:%.*]], <512 x i1>** [[ACC_ADDR]], align 8 -// CHECK-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[ARR]], i64 0, i64 0 -// CHECK-NEXT: [[TMP0:%.*]] = load volatile <512 x i1>*, <512 x i1>** [[ACC_ADDR]], align 8 -// CHECK-NEXT: [[TMP1:%.*]] = load volatile <512 x i1>*, <512 x i1>** [[ACC_ADDR]], align 8 -// CHECK-NEXT: [[TMP2:%.*]] = load <512 x i1>, <512 x i1>* [[TMP1]], align 64 -// CHECK-NEXT: [[TMP3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1> [[TMP2]]) -// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x float>* [[ARRAYDECAY]] to <16 x i8>* -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP4]], i32 0 -// CHECK-NEXT: store <16 x i8> [[TMP5]], <16 x i8>* [[TMP6]], align 16 -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP4]], i32 1 -// CHECK-NEXT: store <16 x i8> [[TMP7]], <16 x i8>* [[TMP8]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP4]], i32 2 -// CHECK-NEXT: store <16 x i8> [[TMP9]], <16 x i8>* [[TMP10]], align 16 -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP4]], i32 3 -// CHECK-NEXT: store <16 x i8> [[TMP11]], <16 x i8>* [[TMP12]], align 16 -// CHECK-NEXT: ret void -// -// CHECK-BE-LABEL: @testVolatileQualifiedPointer2( -// CHECK-BE-NEXT: entry: -// CHECK-BE-NEXT: [[ACC_ADDR:%.*]] = alloca <512 x i1>*, align 8 -// CHECK-BE-NEXT: [[ARR:%.*]] = alloca [4 x <4 x float>], align 16 -// CHECK-BE-NEXT: store volatile <512 x i1>* [[ACC:%.*]], <512 x i1>** [[ACC_ADDR]], align 8 -// CHECK-BE-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[ARR]], i64 0, i64 0 -// CHECK-BE-NEXT: [[TMP0:%.*]] = load volatile <512 x i1>*, <512 x i1>** [[ACC_ADDR]], align 8 -// CHECK-BE-NEXT: [[TMP1:%.*]] = load volatile <512 x i1>*, <512 x i1>** [[ACC_ADDR]], align 8 -// CHECK-BE-NEXT: [[TMP2:%.*]] = load <512 x i1>, <512 x i1>* [[TMP1]], align 64 -// CHECK-BE-NEXT: [[TMP3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1> [[TMP2]]) -// CHECK-BE-NEXT: [[TMP4:%.*]] = bitcast <4 x float>* [[ARRAYDECAY]] to <16 x i8>* -// CHECK-BE-NEXT: [[TMP5:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 0 -// CHECK-BE-NEXT: [[TMP6:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP4]], i32 0 -// CHECK-BE-NEXT: store <16 x i8> [[TMP5]], <16 x i8>* [[TMP6]], align 16 -// CHECK-BE-NEXT: [[TMP7:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 1 -// CHECK-BE-NEXT: [[TMP8:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP4]], i32 1 -// CHECK-BE-NEXT: store <16 x i8> [[TMP7]], <16 x i8>* [[TMP8]], align 16 -// CHECK-BE-NEXT: [[TMP9:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 2 -// CHECK-BE-NEXT: [[TMP10:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP4]], i32 2 -// CHECK-BE-NEXT: store <16 x i8> [[TMP9]], <16 x i8>* [[TMP10]], align 16 -// CHECK-BE-NEXT: [[TMP11:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 3 -// CHECK-BE-NEXT: [[TMP12:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP4]], i32 3 -// CHECK-BE-NEXT: store <16 x i8> [[TMP11]], <16 x i8>* [[TMP12]], align 16 -// CHECK-BE-NEXT: ret void -// -void testVolatileQualifiedPointer2(__vector_quad *__volatile acc) { - vector float arr[4]; - __builtin_mma_disassemble_acc(arr, acc); -} diff --git a/clang/test/CodeGen/PowerPC/builtins-ppc-stmtexpr-argument.c b/clang/test/CodeGen/PowerPC/builtins-ppc-stmtexpr-argument.c deleted file mode 100644 index bef60007d369e867fa9171d8872da7aae48fdbf4..0000000000000000000000000000000000000000 --- a/clang/test/CodeGen/PowerPC/builtins-ppc-stmtexpr-argument.c +++ /dev/null @@ -1,22 +0,0 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// REQUIRES: powerpc-registered-target -// RUN: %clang_cc1 -triple powerpc64-unknown-linux-gnu \ -// RUN: -emit-llvm %s -o - -target-cpu pwr7 | FileCheck %s -// RUN: %clang_cc1 -triple powerpc64le-unknown-linux-gnu \ -// RUN: -emit-llvm %s -o - -target-cpu pwr8 | FileCheck %s - -// The argument expression must not be emitted multiple times - -// CHECK-LABEL: @test_fric( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[D:%.*]] = alloca double, align 8 -// CHECK-NEXT: [[TMP:%.*]] = alloca double, align 8 -// CHECK-NEXT: [[TMP0:%.*]] = load double, double* [[D]], align 8 -// CHECK-NEXT: store double [[TMP0]], double* [[TMP]], align 8 -// CHECK-NEXT: [[TMP1:%.*]] = load double, double* [[TMP]], align 8 -// CHECK-NEXT: [[TMP2:%.*]] = call double @llvm.rint.f64(double [[TMP1]]) -// CHECK-NEXT: ret void -// -void test_fric() { - __fric(({double d; d;})); -} diff --git a/clang/test/CodeGen/PowerPC/builtins-ppc-vsx.c b/clang/test/CodeGen/PowerPC/builtins-ppc-vsx.c index 6190a4e9dfdddfef4ab635b46aa84ac5ac580b56..944976cafc91c4650e934678bb6a587f964f8491 100644 --- a/clang/test/CodeGen/PowerPC/builtins-ppc-vsx.c +++ b/clang/test/CodeGen/PowerPC/builtins-ppc-vsx.c @@ -2213,6 +2213,8 @@ vector double xxsldwi_should_not_assert(vector double a, vector double b) { void test_vector_cpsgn_float(vector float a, vector float b) { // CHECK-LABEL: test_vector_cpsgn_float +// CHECK-DAG: load{{.*}}%__a +// CHECK-DAG: load{{.*}}%__b // CHECK-NOT: SEPARATOR // CHECK-DAG: [[RA:%[0-9]+]] = load <4 x float>, <4 x float>* %__a.addr // CHECK-DAG: [[RB:%[0-9]+]] = load <4 x float>, <4 x float>* %__b.addr @@ -2222,6 +2224,8 @@ void test_vector_cpsgn_float(vector float a, vector float b) { void test_vector_cpsgn_double(vector double a, vector double b) { // CHECK-LABEL: test_vector_cpsgn_double +// CHECK-DAG: load{{.*}}%__a +// CHECK-DAG: load{{.*}}%__b // CHECK-NOT: SEPARATOR // CHECK-DAG: [[RA:%[0-9]+]] = load <2 x double>, <2 x double>* %__a.addr // CHECK-DAG: [[RB:%[0-9]+]] = load <2 x double>, <2 x double>* %__b.addr @@ -2231,6 +2235,8 @@ void test_vector_cpsgn_double(vector double a, vector double b) { void test_builtin_xvcpsgnsp(vector float a, vector float b) { // CHECK-LABEL: test_builtin_xvcpsgnsp +// CHECK-DAG: load{{.*}}%a +// CHECK-DAG: load{{.*}}%b // CHECK-NOT: SEPARATOR // CHECK-DAG: [[RA:%[0-9]+]] = load <4 x float>, <4 x float>* %a.addr // CHECK-DAG: [[RB:%[0-9]+]] = load <4 x float>, <4 x float>* %b.addr @@ -2240,6 +2246,8 @@ void test_builtin_xvcpsgnsp(vector float a, vector float b) { void test_builtin_xvcpsgndp(vector double a, vector double b) { // CHECK-LABEL: test_builtin_xvcpsgndp +// CHECK-DAG: load{{.*}}%a +// CHECK-DAG: load{{.*}}%b // CHECK-NOT: SEPARATOR // CHECK-DAG: [[RA:%[0-9]+]] = load <2 x double>, <2 x double>* %a.addr // CHECK-DAG: [[RB:%[0-9]+]] = load <2 x double>, <2 x double>* %b.addr diff --git a/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-cas.c b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-cas.c index df46c9171353ac903e304a7b5b9501761e0543eb..307f8a930147e1d9c5c7558d2978cd8bc9439ce4 100644 --- a/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-cas.c +++ b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-cas.c @@ -14,9 +14,9 @@ // CHECK-NEXT: store i32 [[A:%.*]], i32* [[A_ADDR]], align 4 // CHECK-NEXT: store i32 [[B:%.*]], i32* [[B_ADDR]], align 4 // CHECK-NEXT: store i32 [[C:%.*]], i32* [[C_ADDR]], align 4 -// CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[B_ADDR]], align 4 -// CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[C_ADDR]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = cmpxchg weak volatile i32* [[A_ADDR]], i32 [[TMP0]], i32 [[TMP1]] monotonic monotonic, align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[C_ADDR]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[B_ADDR]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = cmpxchg weak volatile i32* [[A_ADDR]], i32 [[TMP1]], i32 [[TMP0]] monotonic monotonic, align 4 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i32, i1 } [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, i1 } [[TMP2]], 1 // CHECK-NEXT: store i32 [[TMP3]], i32* [[B_ADDR]], align 4 @@ -36,9 +36,9 @@ int test_builtin_ppc_compare_and_swap(int a, int b, int c) { // CHECK-NEXT: store i64 [[A:%.*]], i64* [[A_ADDR]], align 8 // CHECK-NEXT: store i64 [[B:%.*]], i64* [[B_ADDR]], align 8 // CHECK-NEXT: store i64 [[C:%.*]], i64* [[C_ADDR]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = load i64, i64* [[B_ADDR]], align 8 -// CHECK-NEXT: [[TMP1:%.*]] = load i64, i64* [[C_ADDR]], align 8 -// CHECK-NEXT: [[TMP2:%.*]] = cmpxchg weak volatile i64* [[A_ADDR]], i64 [[TMP0]], i64 [[TMP1]] monotonic monotonic, align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load i64, i64* [[C_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load i64, i64* [[B_ADDR]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = cmpxchg weak volatile i64* [[A_ADDR]], i64 [[TMP1]], i64 [[TMP0]] monotonic monotonic, align 8 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i64, i1 } [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1 // CHECK-NEXT: store i64 [[TMP3]], i64* [[B_ADDR]], align 8 diff --git a/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-fetch.c b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-fetch.c index 1b4004d665ae4e66c3e85dff2bda168d4126bed6..7612834e674ca4920809d918bbf866710dcd2dea 100644 --- a/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-fetch.c +++ b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-fetch.c @@ -12,7 +12,8 @@ // CHECK-NEXT: store i32 [[A:%.*]], i32* [[A_ADDR]], align 4 // CHECK-NEXT: store i32 [[B:%.*]], i32* [[B_ADDR]], align 4 // CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[B_ADDR]], align 4 -// CHECK-NEXT: [[TMP1:%.*]] = atomicrmw add i32* [[A_ADDR]], i32 [[TMP0]] monotonic, align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[B_ADDR]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = atomicrmw add i32* [[A_ADDR]], i32 [[TMP1]] monotonic, align 4 // CHECK-NEXT: ret void // void test_builtin_ppc_fetch_and_add(int a, int b) { @@ -26,7 +27,8 @@ void test_builtin_ppc_fetch_and_add(int a, int b) { // CHECK-NEXT: store i64 [[A:%.*]], i64* [[A_ADDR]], align 8 // CHECK-NEXT: store i64 [[B:%.*]], i64* [[B_ADDR]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = load i64, i64* [[B_ADDR]], align 8 -// CHECK-NEXT: [[TMP1:%.*]] = atomicrmw add i64* [[A_ADDR]], i64 [[TMP0]] monotonic, align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load i64, i64* [[B_ADDR]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = atomicrmw add i64* [[A_ADDR]], i64 [[TMP1]] monotonic, align 8 // CHECK-NEXT: ret void // void test_builtin_ppc_fetch_and_addlp(long a, long b) { @@ -39,7 +41,8 @@ void test_builtin_ppc_fetch_and_addlp(long a, long b) { // CHECK-NEXT: store i32 [[A:%.*]], i32* [[A_ADDR]], align 4 // CHECK-NEXT: store i32 [[B:%.*]], i32* [[B_ADDR]], align 4 // CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[B_ADDR]], align 4 -// CHECK-NEXT: [[TMP1:%.*]] = atomicrmw and i32* [[A_ADDR]], i32 [[TMP0]] monotonic, align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[B_ADDR]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = atomicrmw and i32* [[A_ADDR]], i32 [[TMP1]] monotonic, align 4 // CHECK-NEXT: ret void // void test_builtin_ppc_fetch_and_and(unsigned int a, unsigned int b) { @@ -52,7 +55,8 @@ void test_builtin_ppc_fetch_and_and(unsigned int a, unsigned int b) { // CHECK-NEXT: store i64 [[A:%.*]], i64* [[A_ADDR]], align 8 // CHECK-NEXT: store i64 [[B:%.*]], i64* [[B_ADDR]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = load i64, i64* [[B_ADDR]], align 8 -// CHECK-NEXT: [[TMP1:%.*]] = atomicrmw and i64* [[A_ADDR]], i64 [[TMP0]] monotonic, align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load i64, i64* [[B_ADDR]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = atomicrmw and i64* [[A_ADDR]], i64 [[TMP1]] monotonic, align 8 // CHECK-NEXT: ret void // void test_builtin_ppc_fetch_and_andlp(unsigned long a, unsigned long b) { @@ -65,7 +69,8 @@ void test_builtin_ppc_fetch_and_andlp(unsigned long a, unsigned long b) { // CHECK-NEXT: store i32 [[A:%.*]], i32* [[A_ADDR]], align 4 // CHECK-NEXT: store i32 [[B:%.*]], i32* [[B_ADDR]], align 4 // CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[B_ADDR]], align 4 -// CHECK-NEXT: [[TMP1:%.*]] = atomicrmw or i32* [[A_ADDR]], i32 [[TMP0]] monotonic, align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[B_ADDR]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = atomicrmw or i32* [[A_ADDR]], i32 [[TMP1]] monotonic, align 4 // CHECK-NEXT: ret void // void test_builtin_ppc_fetch_and_or(unsigned int a, unsigned int b) { @@ -78,7 +83,8 @@ void test_builtin_ppc_fetch_and_or(unsigned int a, unsigned int b) { // CHECK-NEXT: store i64 [[A:%.*]], i64* [[A_ADDR]], align 8 // CHECK-NEXT: store i64 [[B:%.*]], i64* [[B_ADDR]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = load i64, i64* [[B_ADDR]], align 8 -// CHECK-NEXT: [[TMP1:%.*]] = atomicrmw or i64* [[A_ADDR]], i64 [[TMP0]] monotonic, align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load i64, i64* [[B_ADDR]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = atomicrmw or i64* [[A_ADDR]], i64 [[TMP1]] monotonic, align 8 // CHECK-NEXT: ret void // void test_builtin_ppc_fetch_and_orlp(unsigned long a, unsigned long b) { @@ -91,7 +97,8 @@ void test_builtin_ppc_fetch_and_orlp(unsigned long a, unsigned long b) { // CHECK-NEXT: store i32 [[A:%.*]], i32* [[A_ADDR]], align 4 // CHECK-NEXT: store i32 [[B:%.*]], i32* [[B_ADDR]], align 4 // CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[B_ADDR]], align 4 -// CHECK-NEXT: [[TMP1:%.*]] = atomicrmw xchg i32* [[A_ADDR]], i32 [[TMP0]] monotonic, align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[B_ADDR]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = atomicrmw xchg i32* [[A_ADDR]], i32 [[TMP1]] monotonic, align 4 // CHECK-NEXT: ret void // void test_builtin_ppc_fetch_and_swap(unsigned int a, unsigned int b) { @@ -104,7 +111,8 @@ void test_builtin_ppc_fetch_and_swap(unsigned int a, unsigned int b) { // CHECK-NEXT: store i64 [[A:%.*]], i64* [[A_ADDR]], align 8 // CHECK-NEXT: store i64 [[B:%.*]], i64* [[B_ADDR]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = load i64, i64* [[B_ADDR]], align 8 -// CHECK-NEXT: [[TMP1:%.*]] = atomicrmw xchg i64* [[A_ADDR]], i64 [[TMP0]] monotonic, align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load i64, i64* [[B_ADDR]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = atomicrmw xchg i64* [[A_ADDR]], i64 [[TMP1]] monotonic, align 8 // CHECK-NEXT: ret void // void test_builtin_ppc_fetch_and_swaplp(unsigned long a, unsigned long b) { diff --git a/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-fp.c b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-fp.c index 46b2a74a77e666c4d73cb0381b9d694387398628..94aa15c00df3983496661029c1a8648be35922f3 100644 --- a/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-fp.c +++ b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-fp.c @@ -15,8 +15,9 @@ extern float f; // CHECK-LABEL: @test_fric( // CHECK: [[TMP0:%.*]] = load double, double* @a, align 8 -// CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.rint.f64(double [[TMP0]]) -// CHECK-NEXT: ret double [[TMP1]] +// CHECK-NEXT: [[TMP1:%.*]] = load double, double* @a, align 8 +// CHECK-NEXT: [[TMP2:%.*]] = call double @llvm.rint.f64(double [[TMP1]]) +// CHECK-NEXT: ret double [[TMP2]] // double test_fric() { return __fric(a); @@ -24,8 +25,9 @@ double test_fric() { // CHECK-LABEL: @test_frim( // CHECK: [[TMP0:%.*]] = load double, double* @a, align 8 -// CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.floor.f64(double [[TMP0]]) -// CHECK-NEXT: ret double [[TMP1]] +// CHECK-NEXT: [[TMP1:%.*]] = load double, double* @a, align 8 +// CHECK-NEXT: [[TMP2:%.*]] = call double @llvm.floor.f64(double [[TMP1]]) +// CHECK-NEXT: ret double [[TMP2]] // double test_frim() { return __frim(a); @@ -33,8 +35,9 @@ double test_frim() { // CHECK-LABEL: @test_frims( // CHECK: [[TMP0:%.*]] = load float, float* @d, align 4 -// CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.floor.f32(float [[TMP0]]) -// CHECK-NEXT: ret float [[TMP1]] +// CHECK-NEXT: [[TMP1:%.*]] = load float, float* @d, align 4 +// CHECK-NEXT: [[TMP2:%.*]] = call float @llvm.floor.f32(float [[TMP1]]) +// CHECK-NEXT: ret float [[TMP2]] // float test_frims() { return __frims(d); @@ -42,8 +45,9 @@ float test_frims() { // CHECK-LABEL: @test_frin( // CHECK: [[TMP0:%.*]] = load double, double* @a, align 8 -// CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.round.f64(double [[TMP0]]) -// CHECK-NEXT: ret double [[TMP1]] +// CHECK-NEXT: [[TMP1:%.*]] = load double, double* @a, align 8 +// CHECK-NEXT: [[TMP2:%.*]] = call double @llvm.round.f64(double [[TMP1]]) +// CHECK-NEXT: ret double [[TMP2]] // double test_frin() { return __frin(a); @@ -51,8 +55,9 @@ double test_frin() { // CHECK-LABEL: @test_frins( // CHECK: [[TMP0:%.*]] = load float, float* @d, align 4 -// CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.round.f32(float [[TMP0]]) -// CHECK-NEXT: ret float [[TMP1]] +// CHECK-NEXT: [[TMP1:%.*]] = load float, float* @d, align 4 +// CHECK-NEXT: [[TMP2:%.*]] = call float @llvm.round.f32(float [[TMP1]]) +// CHECK-NEXT: ret float [[TMP2]] // float test_frins() { return __frins(d); @@ -60,8 +65,9 @@ float test_frins() { // CHECK-LABEL: @test_frip( // CHECK: [[TMP0:%.*]] = load double, double* @a, align 8 -// CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.ceil.f64(double [[TMP0]]) -// CHECK-NEXT: ret double [[TMP1]] +// CHECK-NEXT: [[TMP1:%.*]] = load double, double* @a, align 8 +// CHECK-NEXT: [[TMP2:%.*]] = call double @llvm.ceil.f64(double [[TMP1]]) +// CHECK-NEXT: ret double [[TMP2]] // double test_frip() { return __frip(a); @@ -69,8 +75,9 @@ double test_frip() { // CHECK-LABEL: @test_frips( // CHECK: [[TMP0:%.*]] = load float, float* @d, align 4 -// CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.ceil.f32(float [[TMP0]]) -// CHECK-NEXT: ret float [[TMP1]] +// CHECK-NEXT: [[TMP1:%.*]] = load float, float* @d, align 4 +// CHECK-NEXT: [[TMP2:%.*]] = call float @llvm.ceil.f32(float [[TMP1]]) +// CHECK-NEXT: ret float [[TMP2]] // float test_frips() { return __frips(d); @@ -78,8 +85,9 @@ float test_frips() { // CHECK-LABEL: @test_friz( // CHECK: [[TMP0:%.*]] = load double, double* @a, align 8 -// CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.trunc.f64(double [[TMP0]]) -// CHECK-NEXT: ret double [[TMP1]] +// CHECK-NEXT: [[TMP1:%.*]] = load double, double* @a, align 8 +// CHECK-NEXT: [[TMP2:%.*]] = call double @llvm.trunc.f64(double [[TMP1]]) +// CHECK-NEXT: ret double [[TMP2]] // double test_friz() { return __friz(a); @@ -87,8 +95,9 @@ double test_friz() { // CHECK-LABEL: @test_frizs( // CHECK: [[TMP0:%.*]] = load float, float* @d, align 4 -// CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.trunc.f32(float [[TMP0]]) -// CHECK-NEXT: ret float [[TMP1]] +// CHECK-NEXT: [[TMP1:%.*]] = load float, float* @d, align 4 +// CHECK-NEXT: [[TMP2:%.*]] = call float @llvm.trunc.f32(float [[TMP1]]) +// CHECK-NEXT: ret float [[TMP2]] // float test_frizs() { return __frizs(d); @@ -136,8 +145,9 @@ float test_frsqrtes() { // CHECK-LABEL: @test_fsqrt( // CHECK: [[TMP0:%.*]] = load double, double* @a, align 8 -// CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.sqrt.f64(double [[TMP0]]) -// CHECK-NEXT: ret double [[TMP1]] +// CHECK-NEXT: [[TMP1:%.*]] = load double, double* @a, align 8 +// CHECK-NEXT: [[TMP2:%.*]] = call double @llvm.sqrt.f64(double [[TMP1]]) +// CHECK-NEXT: ret double [[TMP2]] // double test_fsqrt() { return __fsqrt(a); @@ -145,8 +155,9 @@ double test_fsqrt() { // CHECK-LABEL: @test_fsqrts( // CHECK: [[TMP0:%.*]] = load float, float* @d, align 4 -// CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.sqrt.f32(float [[TMP0]]) -// CHECK-NEXT: ret float [[TMP1]] +// CHECK-NEXT: [[TMP1:%.*]] = load float, float* @d, align 4 +// CHECK-NEXT: [[TMP2:%.*]] = call float @llvm.sqrt.f32(float [[TMP1]]) +// CHECK-NEXT: ret float [[TMP2]] // float test_fsqrts() { return __fsqrts(d); @@ -154,8 +165,9 @@ float test_fsqrts() { // CHECK-LABEL: @test_builtin_ppc_fric( // CHECK: [[TMP0:%.*]] = load double, double* @a, align 8 -// CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.rint.f64(double [[TMP0]]) -// CHECK-NEXT: ret double [[TMP1]] +// CHECK-NEXT: [[TMP1:%.*]] = load double, double* @a, align 8 +// CHECK-NEXT: [[TMP2:%.*]] = call double @llvm.rint.f64(double [[TMP1]]) +// CHECK-NEXT: ret double [[TMP2]] // double test_builtin_ppc_fric() { return __builtin_ppc_fric(a); @@ -163,8 +175,9 @@ double test_builtin_ppc_fric() { // CHECK-LABEL: @test_builtin_ppc_frim( // CHECK: [[TMP0:%.*]] = load double, double* @a, align 8 -// CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.floor.f64(double [[TMP0]]) -// CHECK-NEXT: ret double [[TMP1]] +// CHECK-NEXT: [[TMP1:%.*]] = load double, double* @a, align 8 +// CHECK-NEXT: [[TMP2:%.*]] = call double @llvm.floor.f64(double [[TMP1]]) +// CHECK-NEXT: ret double [[TMP2]] // double test_builtin_ppc_frim() { return __builtin_ppc_frim(a); @@ -172,8 +185,9 @@ double test_builtin_ppc_frim() { // CHECK-LABEL: @test_builtin_ppc_frims( // CHECK: [[TMP0:%.*]] = load float, float* @d, align 4 -// CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.floor.f32(float [[TMP0]]) -// CHECK-NEXT: ret float [[TMP1]] +// CHECK-NEXT: [[TMP1:%.*]] = load float, float* @d, align 4 +// CHECK-NEXT: [[TMP2:%.*]] = call float @llvm.floor.f32(float [[TMP1]]) +// CHECK-NEXT: ret float [[TMP2]] // float test_builtin_ppc_frims() { return __builtin_ppc_frims(d); @@ -181,8 +195,9 @@ float test_builtin_ppc_frims() { // CHECK-LABEL: @test_builtin_ppc_frin( // CHECK: [[TMP0:%.*]] = load double, double* @a, align 8 -// CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.round.f64(double [[TMP0]]) -// CHECK-NEXT: ret double [[TMP1]] +// CHECK-NEXT: [[TMP1:%.*]] = load double, double* @a, align 8 +// CHECK-NEXT: [[TMP2:%.*]] = call double @llvm.round.f64(double [[TMP1]]) +// CHECK-NEXT: ret double [[TMP2]] // double test_builtin_ppc_frin() { return __builtin_ppc_frin(a); @@ -190,8 +205,9 @@ double test_builtin_ppc_frin() { // CHECK-LABEL: @test_builtin_ppc_frins( // CHECK: [[TMP0:%.*]] = load float, float* @d, align 4 -// CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.round.f32(float [[TMP0]]) -// CHECK-NEXT: ret float [[TMP1]] +// CHECK-NEXT: [[TMP1:%.*]] = load float, float* @d, align 4 +// CHECK-NEXT: [[TMP2:%.*]] = call float @llvm.round.f32(float [[TMP1]]) +// CHECK-NEXT: ret float [[TMP2]] // float test_builtin_ppc_frins() { return __builtin_ppc_frins(d); @@ -199,8 +215,9 @@ float test_builtin_ppc_frins() { // CHECK-LABEL: @test_builtin_ppc_frip( // CHECK: [[TMP0:%.*]] = load double, double* @a, align 8 -// CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.ceil.f64(double [[TMP0]]) -// CHECK-NEXT: ret double [[TMP1]] +// CHECK-NEXT: [[TMP1:%.*]] = load double, double* @a, align 8 +// CHECK-NEXT: [[TMP2:%.*]] = call double @llvm.ceil.f64(double [[TMP1]]) +// CHECK-NEXT: ret double [[TMP2]] // double test_builtin_ppc_frip() { return __builtin_ppc_frip(a); @@ -208,8 +225,9 @@ double test_builtin_ppc_frip() { // CHECK-LABEL: @test_builtin_ppc_frips( // CHECK: [[TMP0:%.*]] = load float, float* @d, align 4 -// CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.ceil.f32(float [[TMP0]]) -// CHECK-NEXT: ret float [[TMP1]] +// CHECK-NEXT: [[TMP1:%.*]] = load float, float* @d, align 4 +// CHECK-NEXT: [[TMP2:%.*]] = call float @llvm.ceil.f32(float [[TMP1]]) +// CHECK-NEXT: ret float [[TMP2]] // float test_builtin_ppc_frips() { return __builtin_ppc_frips(d); @@ -217,8 +235,9 @@ float test_builtin_ppc_frips() { // CHECK-LABEL: @test_builtin_ppc_friz( // CHECK: [[TMP0:%.*]] = load double, double* @a, align 8 -// CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.trunc.f64(double [[TMP0]]) -// CHECK-NEXT: ret double [[TMP1]] +// CHECK-NEXT: [[TMP1:%.*]] = load double, double* @a, align 8 +// CHECK-NEXT: [[TMP2:%.*]] = call double @llvm.trunc.f64(double [[TMP1]]) +// CHECK-NEXT: ret double [[TMP2]] // double test_builtin_ppc_friz() { return __builtin_ppc_friz(a); @@ -226,8 +245,9 @@ double test_builtin_ppc_friz() { // CHECK-LABEL: @test_builtin_ppc_frizs( // CHECK: [[TMP0:%.*]] = load float, float* @d, align 4 -// CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.trunc.f32(float [[TMP0]]) -// CHECK-NEXT: ret float [[TMP1]] +// CHECK-NEXT: [[TMP1:%.*]] = load float, float* @d, align 4 +// CHECK-NEXT: [[TMP2:%.*]] = call float @llvm.trunc.f32(float [[TMP1]]) +// CHECK-NEXT: ret float [[TMP2]] // float test_builtin_ppc_frizs() { return __builtin_ppc_frizs(d); @@ -275,8 +295,9 @@ float test_builtin_ppc_frsqrtes() { // CHECK-LABEL: @test_builtin_ppc_fsqrt( // CHECK: [[TMP0:%.*]] = load double, double* @a, align 8 -// CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.sqrt.f64(double [[TMP0]]) -// CHECK-NEXT: ret double [[TMP1]] +// CHECK-NEXT: [[TMP1:%.*]] = load double, double* @a, align 8 +// CHECK-NEXT: [[TMP2:%.*]] = call double @llvm.sqrt.f64(double [[TMP1]]) +// CHECK-NEXT: ret double [[TMP2]] // double test_builtin_ppc_fsqrt() { return __builtin_ppc_fsqrt(a); @@ -284,8 +305,9 @@ double test_builtin_ppc_fsqrt() { // CHECK-LABEL: @test_builtin_ppc_fsqrts( // CHECK: [[TMP0:%.*]] = load float, float* @d, align 4 -// CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.sqrt.f32(float [[TMP0]]) -// CHECK-NEXT: ret float [[TMP1]] +// CHECK-NEXT: [[TMP1:%.*]] = load float, float* @d, align 4 +// CHECK-NEXT: [[TMP2:%.*]] = call float @llvm.sqrt.f32(float [[TMP1]]) +// CHECK-NEXT: ret float [[TMP2]] // float test_builtin_ppc_fsqrts() { return __builtin_ppc_fsqrts(d); diff --git a/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-math.c b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-math.c index 200540c6e79fd0fad944181c42fceeace6720f9a..a6b2e1903445e5c405015f3d937291b608e99773 100644 --- a/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-math.c +++ b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-math.c @@ -95,6 +95,7 @@ float fnmadds (float f) { // CHECK-LABEL: @fnmsub( // CHECK: [[D_ADDR:%.*]] = alloca double, align 8 // CHECK-NEXT: store double [[D:%.*]], double* [[D_ADDR]], align 8 +// CHECK-COUNT-3: load double, double* [[D_ADDR]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = load double, double* [[D_ADDR]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = load double, double* [[D_ADDR]], align 8 // CHECK-NEXT: [[TMP2:%.*]] = load double, double* [[D_ADDR]], align 8 @@ -108,6 +109,7 @@ double fnmsub (double d) { // CHECK-LABEL: @fnmsubs( // CHECK: [[F_ADDR:%.*]] = alloca float, align 4 // CHECK-NEXT: store float [[F:%.*]], float* [[F_ADDR]], align 4 +// CHECK-COUNT-3: load float, float* [[F_ADDR]], align 4 // CHECK-NEXT: [[TMP0:%.*]] = load float, float* [[F_ADDR]], align 4 // CHECK-NEXT: [[TMP1:%.*]] = load float, float* [[F_ADDR]], align 4 // CHECK-NEXT: [[TMP2:%.*]] = load float, float* [[F_ADDR]], align 4 diff --git a/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-sync.c b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-sync.c index 7a87811da336da654d27f4044ba679fd2f1e35ec..24cd794e6e0069577e05f0aadbbf8a732195f28e 100644 --- a/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-sync.c +++ b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-sync.c @@ -14,11 +14,13 @@ extern void *c; // CHECK-LABEL: @test_popcntb( // CHECK: [[TMP0:%.*]] = load i64, i64* @a, align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load i64, i64* @a, align 8 // CHECK-NEXT: [[POPCNTB:%.*]] = call i64 @llvm.ppc.popcntb.i64.i64(i64 [[TMP0]]) // CHECK-NEXT: ret i64 [[POPCNTB]] // // CHECK-32-LABEL: @test_popcntb( // CHECK-32: [[TMP0:%.*]] = load i32, i32* @a, align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* @a, align 4 // CHECK-32-NEXT: [[POPCNTB:%.*]] = call i32 @llvm.ppc.popcntb.i32.i32(i32 [[TMP0]]) // CHECK-32-NEXT: ret i32 [[POPCNTB]] // @@ -196,11 +198,13 @@ void test_dcbz() { // CHECK-LABEL: @test_builtin_ppc_popcntb( // CHECK: [[TMP0:%.*]] = load i64, i64* @a, align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load i64, i64* @a, align 8 // CHECK-NEXT: [[POPCNTB:%.*]] = call i64 @llvm.ppc.popcntb.i64.i64(i64 [[TMP0]]) // CHECK-NEXT: ret i64 [[POPCNTB]] // // CHECK-32-LABEL: @test_builtin_ppc_popcntb( // CHECK-32: [[TMP0:%.*]] = load i32, i32* @a, align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* @a, align 4 // CHECK-32-NEXT: [[POPCNTB:%.*]] = call i32 @llvm.ppc.popcntb.i32.i32(i32 [[TMP0]]) // CHECK-32-NEXT: ret i32 [[POPCNTB]] // diff --git a/clang/test/CodeGen/PowerPC/ppc-mma-types.c b/clang/test/CodeGen/PowerPC/ppc-mma-types.c index 66cbb79061624ec171facc451e390a780f2f3a84..ad8a9c592906b3ad33d103366b67920ae31fdd3e 100644 --- a/clang/test/CodeGen/PowerPC/ppc-mma-types.c +++ b/clang/test/CodeGen/PowerPC/ppc-mma-types.c @@ -1,23 +1,17 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // RUN: %clang_cc1 -no-opaque-pointers -triple powerpc64le-linux-unknown -target-cpu pwr10 \ -// RUN: -emit-llvm -o - %s | FileCheck %s +// RUN: -emit-llvm -O3 -o - %s | FileCheck %s // RUN: %clang_cc1 -no-opaque-pointers -triple powerpc64le-linux-unknown -target-cpu pwr9 \ -// RUN: -emit-llvm -o - %s | FileCheck %s +// RUN: -emit-llvm -O3 -o - %s | FileCheck %s // RUN: %clang_cc1 -no-opaque-pointers -triple powerpc64le-linux-unknown -target-cpu pwr8 \ -// RUN: -emit-llvm -o - %s | FileCheck %s +// RUN: -emit-llvm -O3 -o - %s | FileCheck %s // CHECK-LABEL: @test1( // CHECK-NEXT: entry: -// CHECK-NEXT: [[PTR1_ADDR:%.*]] = alloca <512 x i1>*, align 8 -// CHECK-NEXT: [[PTR2_ADDR:%.*]] = alloca <512 x i1>*, align 8 -// CHECK-NEXT: store <512 x i1>* [[PTR1:%.*]], <512 x i1>** [[PTR1_ADDR]], align 8 -// CHECK-NEXT: store <512 x i1>* [[PTR2:%.*]], <512 x i1>** [[PTR2_ADDR]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>*, <512 x i1>** [[PTR1_ADDR]], align 8 -// CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds <512 x i1>, <512 x i1>* [[TMP0]], i64 2 -// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[ADD_PTR]], align 64 -// CHECK-NEXT: [[TMP2:%.*]] = load <512 x i1>*, <512 x i1>** [[PTR2_ADDR]], align 8 -// CHECK-NEXT: [[ADD_PTR1:%.*]] = getelementptr inbounds <512 x i1>, <512 x i1>* [[TMP2]], i64 1 -// CHECK-NEXT: store <512 x i1> [[TMP1]], <512 x i1>* [[ADD_PTR1]], align 64 +// CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds <512 x i1>, <512 x i1>* [[PTR1:%.*]], i64 2 +// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, <512 x i1>* [[ADD_PTR]], align 64, [[TBAA2:!tbaa !.*]] +// CHECK-NEXT: [[ADD_PTR1:%.*]] = getelementptr inbounds <512 x i1>, <512 x i1>* [[PTR2:%.*]], i64 1 +// CHECK-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[ADD_PTR1]], align 64, [[TBAA2]] // CHECK-NEXT: ret void // void test1(__vector_quad *ptr1, __vector_quad *ptr2) { @@ -26,422 +20,12 @@ void test1(__vector_quad *ptr1, __vector_quad *ptr2) { // CHECK-LABEL: @test2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[PTR1_ADDR:%.*]] = alloca <256 x i1>*, align 8 -// CHECK-NEXT: [[PTR2_ADDR:%.*]] = alloca <256 x i1>*, align 8 -// CHECK-NEXT: store <256 x i1>* [[PTR1:%.*]], <256 x i1>** [[PTR1_ADDR]], align 8 -// CHECK-NEXT: store <256 x i1>* [[PTR2:%.*]], <256 x i1>** [[PTR2_ADDR]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = load <256 x i1>*, <256 x i1>** [[PTR1_ADDR]], align 8 -// CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds <256 x i1>, <256 x i1>* [[TMP0]], i64 2 -// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, <256 x i1>* [[ADD_PTR]], align 32 -// CHECK-NEXT: [[TMP2:%.*]] = load <256 x i1>*, <256 x i1>** [[PTR2_ADDR]], align 8 -// CHECK-NEXT: [[ADD_PTR1:%.*]] = getelementptr inbounds <256 x i1>, <256 x i1>* [[TMP2]], i64 1 -// CHECK-NEXT: store <256 x i1> [[TMP1]], <256 x i1>* [[ADD_PTR1]], align 32 +// CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds <256 x i1>, <256 x i1>* [[PTR1:%.*]], i64 2 +// CHECK-NEXT: [[TMP0:%.*]] = load <256 x i1>, <256 x i1>* [[ADD_PTR]], align 32, [[TBAA6:!tbaa !.*]] +// CHECK-NEXT: [[ADD_PTR1:%.*]] = getelementptr inbounds <256 x i1>, <256 x i1>* [[PTR2:%.*]], i64 1 +// CHECK-NEXT: store <256 x i1> [[TMP0]], <256 x i1>* [[ADD_PTR1]], align 32, [[TBAA6]] // CHECK-NEXT: ret void // void test2(__vector_pair *ptr1, __vector_pair *ptr2) { *(ptr2 + 1) = *(ptr1 + 2); } - -typedef __vector_quad vq_t; -// CHECK-LABEL: @testVQTypedef( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[INP_ADDR:%.*]] = alloca i32*, align 8 -// CHECK-NEXT: [[OUTP_ADDR:%.*]] = alloca i32*, align 8 -// CHECK-NEXT: [[VQIN:%.*]] = alloca <512 x i1>*, align 8 -// CHECK-NEXT: [[VQOUT:%.*]] = alloca <512 x i1>*, align 8 -// CHECK-NEXT: store i32* [[INP:%.*]], i32** [[INP_ADDR]], align 8 -// CHECK-NEXT: store i32* [[OUTP:%.*]], i32** [[OUTP_ADDR]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = load i32*, i32** [[INP_ADDR]], align 8 -// CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <512 x i1>* -// CHECK-NEXT: store <512 x i1>* [[TMP1]], <512 x i1>** [[VQIN]], align 8 -// CHECK-NEXT: [[TMP2:%.*]] = load i32*, i32** [[OUTP_ADDR]], align 8 -// CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <512 x i1>* -// CHECK-NEXT: store <512 x i1>* [[TMP3]], <512 x i1>** [[VQOUT]], align 8 -// CHECK-NEXT: [[TMP4:%.*]] = load <512 x i1>*, <512 x i1>** [[VQIN]], align 8 -// CHECK-NEXT: [[TMP5:%.*]] = load <512 x i1>, <512 x i1>* [[TMP4]], align 64 -// CHECK-NEXT: [[TMP6:%.*]] = load <512 x i1>*, <512 x i1>** [[VQOUT]], align 8 -// CHECK-NEXT: store <512 x i1> [[TMP5]], <512 x i1>* [[TMP6]], align 64 -// CHECK-NEXT: ret void -// -void testVQTypedef(int *inp, int *outp) { - vq_t *vqin = (vq_t *)inp; - vq_t *vqout = (vq_t *)outp; - *vqout = *vqin; -} - -// CHECK-LABEL: @testVQArg3( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[VQ_ADDR:%.*]] = alloca <512 x i1>*, align 8 -// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK-NEXT: [[VQP:%.*]] = alloca <512 x i1>*, align 8 -// CHECK-NEXT: store <512 x i1>* [[VQ:%.*]], <512 x i1>** [[VQ_ADDR]], align 8 -// CHECK-NEXT: store i32* [[PTR:%.*]], i32** [[PTR_ADDR]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = load i32*, i32** [[PTR_ADDR]], align 8 -// CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <512 x i1>* -// CHECK-NEXT: store <512 x i1>* [[TMP1]], <512 x i1>** [[VQP]], align 8 -// CHECK-NEXT: [[TMP2:%.*]] = load <512 x i1>*, <512 x i1>** [[VQ_ADDR]], align 8 -// CHECK-NEXT: [[TMP3:%.*]] = load <512 x i1>, <512 x i1>* [[TMP2]], align 64 -// CHECK-NEXT: [[TMP4:%.*]] = load <512 x i1>*, <512 x i1>** [[VQP]], align 8 -// CHECK-NEXT: store <512 x i1> [[TMP3]], <512 x i1>* [[TMP4]], align 64 -// CHECK-NEXT: ret void -// -void testVQArg3(__vector_quad *vq, int *ptr) { - __vector_quad *vqp = (__vector_quad *)ptr; - *vqp = *vq; -} - -// CHECK-LABEL: @testVQArg4( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[VQ_ADDR:%.*]] = alloca <512 x i1>*, align 8 -// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK-NEXT: [[VQP:%.*]] = alloca <512 x i1>*, align 8 -// CHECK-NEXT: store <512 x i1>* [[VQ:%.*]], <512 x i1>** [[VQ_ADDR]], align 8 -// CHECK-NEXT: store i32* [[PTR:%.*]], i32** [[PTR_ADDR]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = load i32*, i32** [[PTR_ADDR]], align 8 -// CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <512 x i1>* -// CHECK-NEXT: store <512 x i1>* [[TMP1]], <512 x i1>** [[VQP]], align 8 -// CHECK-NEXT: [[TMP2:%.*]] = load <512 x i1>*, <512 x i1>** [[VQ_ADDR]], align 8 -// CHECK-NEXT: [[TMP3:%.*]] = load <512 x i1>, <512 x i1>* [[TMP2]], align 64 -// CHECK-NEXT: [[TMP4:%.*]] = load <512 x i1>*, <512 x i1>** [[VQP]], align 8 -// CHECK-NEXT: store <512 x i1> [[TMP3]], <512 x i1>* [[TMP4]], align 64 -// CHECK-NEXT: ret void -// -void testVQArg4(const __vector_quad *const vq, int *ptr) { - __vector_quad *vqp = (__vector_quad *)ptr; - *vqp = *vq; -} - -// CHECK-LABEL: @testVQArg5( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[VQA_ADDR:%.*]] = alloca <512 x i1>*, align 8 -// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK-NEXT: [[VQP:%.*]] = alloca <512 x i1>*, align 8 -// CHECK-NEXT: store <512 x i1>* [[VQA:%.*]], <512 x i1>** [[VQA_ADDR]], align 8 -// CHECK-NEXT: store i32* [[PTR:%.*]], i32** [[PTR_ADDR]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = load i32*, i32** [[PTR_ADDR]], align 8 -// CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <512 x i1>* -// CHECK-NEXT: store <512 x i1>* [[TMP1]], <512 x i1>** [[VQP]], align 8 -// CHECK-NEXT: [[TMP2:%.*]] = load <512 x i1>*, <512 x i1>** [[VQA_ADDR]], align 8 -// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds <512 x i1>, <512 x i1>* [[TMP2]], i64 0 -// CHECK-NEXT: [[TMP3:%.*]] = load <512 x i1>, <512 x i1>* [[ARRAYIDX]], align 64 -// CHECK-NEXT: [[TMP4:%.*]] = load <512 x i1>*, <512 x i1>** [[VQP]], align 8 -// CHECK-NEXT: store <512 x i1> [[TMP3]], <512 x i1>* [[TMP4]], align 64 -// CHECK-NEXT: ret void -// -void testVQArg5(__vector_quad vqa[], int *ptr) { - __vector_quad *vqp = (__vector_quad *)ptr; - *vqp = vqa[0]; -} - -// CHECK-LABEL: @testVQArg7( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[VQ_ADDR:%.*]] = alloca <512 x i1>*, align 8 -// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK-NEXT: [[VQP:%.*]] = alloca <512 x i1>*, align 8 -// CHECK-NEXT: store <512 x i1>* [[VQ:%.*]], <512 x i1>** [[VQ_ADDR]], align 8 -// CHECK-NEXT: store i32* [[PTR:%.*]], i32** [[PTR_ADDR]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = load i32*, i32** [[PTR_ADDR]], align 8 -// CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <512 x i1>* -// CHECK-NEXT: store <512 x i1>* [[TMP1]], <512 x i1>** [[VQP]], align 8 -// CHECK-NEXT: [[TMP2:%.*]] = load <512 x i1>*, <512 x i1>** [[VQ_ADDR]], align 8 -// CHECK-NEXT: [[TMP3:%.*]] = load <512 x i1>, <512 x i1>* [[TMP2]], align 64 -// CHECK-NEXT: [[TMP4:%.*]] = load <512 x i1>*, <512 x i1>** [[VQP]], align 8 -// CHECK-NEXT: store <512 x i1> [[TMP3]], <512 x i1>* [[TMP4]], align 64 -// CHECK-NEXT: ret void -// -void testVQArg7(const vq_t *vq, int *ptr) { - __vector_quad *vqp = (__vector_quad *)ptr; - *vqp = *vq; -} - -// CHECK-LABEL: @testVQRet2( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK-NEXT: [[VQP:%.*]] = alloca <512 x i1>*, align 8 -// CHECK-NEXT: store i32* [[PTR:%.*]], i32** [[PTR_ADDR]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = load i32*, i32** [[PTR_ADDR]], align 8 -// CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <512 x i1>* -// CHECK-NEXT: store <512 x i1>* [[TMP1]], <512 x i1>** [[VQP]], align 8 -// CHECK-NEXT: [[TMP2:%.*]] = load <512 x i1>*, <512 x i1>** [[VQP]], align 8 -// CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds <512 x i1>, <512 x i1>* [[TMP2]], i64 2 -// CHECK-NEXT: ret <512 x i1>* [[ADD_PTR]] -// -__vector_quad *testVQRet2(int *ptr) { - __vector_quad *vqp = (__vector_quad *)ptr; - return vqp + 2; -} - -// CHECK-LABEL: @testVQRet3( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK-NEXT: [[VQP:%.*]] = alloca <512 x i1>*, align 8 -// CHECK-NEXT: store i32* [[PTR:%.*]], i32** [[PTR_ADDR]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = load i32*, i32** [[PTR_ADDR]], align 8 -// CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <512 x i1>* -// CHECK-NEXT: store <512 x i1>* [[TMP1]], <512 x i1>** [[VQP]], align 8 -// CHECK-NEXT: [[TMP2:%.*]] = load <512 x i1>*, <512 x i1>** [[VQP]], align 8 -// CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds <512 x i1>, <512 x i1>* [[TMP2]], i64 2 -// CHECK-NEXT: ret <512 x i1>* [[ADD_PTR]] -// -const __vector_quad *testVQRet3(int *ptr) { - __vector_quad *vqp = (__vector_quad *)ptr; - return vqp + 2; -} - -// CHECK-LABEL: @testVQRet5( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK-NEXT: [[VQP:%.*]] = alloca <512 x i1>*, align 8 -// CHECK-NEXT: store i32* [[PTR:%.*]], i32** [[PTR_ADDR]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = load i32*, i32** [[PTR_ADDR]], align 8 -// CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <512 x i1>* -// CHECK-NEXT: store <512 x i1>* [[TMP1]], <512 x i1>** [[VQP]], align 8 -// CHECK-NEXT: [[TMP2:%.*]] = load <512 x i1>*, <512 x i1>** [[VQP]], align 8 -// CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds <512 x i1>, <512 x i1>* [[TMP2]], i64 2 -// CHECK-NEXT: ret <512 x i1>* [[ADD_PTR]] -// -const vq_t *testVQRet5(int *ptr) { - __vector_quad *vqp = (__vector_quad *)ptr; - return vqp + 2; -} - -// CHECK-LABEL: @testVQSizeofAlignof( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK-NEXT: [[VQP:%.*]] = alloca <512 x i1>*, align 8 -// CHECK-NEXT: [[VQ:%.*]] = alloca <512 x i1>, align 64 -// CHECK-NEXT: [[SIZET:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[ALIGNT:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[SIZEV:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[ALIGNV:%.*]] = alloca i32, align 4 -// CHECK-NEXT: store i32* [[PTR:%.*]], i32** [[PTR_ADDR]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = load i32*, i32** [[PTR_ADDR]], align 8 -// CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <512 x i1>* -// CHECK-NEXT: store <512 x i1>* [[TMP1]], <512 x i1>** [[VQP]], align 8 -// CHECK-NEXT: [[TMP2:%.*]] = load <512 x i1>*, <512 x i1>** [[VQP]], align 8 -// CHECK-NEXT: [[TMP3:%.*]] = load <512 x i1>, <512 x i1>* [[TMP2]], align 64 -// CHECK-NEXT: store <512 x i1> [[TMP3]], <512 x i1>* [[VQ]], align 64 -// CHECK-NEXT: store i32 64, i32* [[SIZET]], align 4 -// CHECK-NEXT: store i32 64, i32* [[ALIGNT]], align 4 -// CHECK-NEXT: store i32 64, i32* [[SIZEV]], align 4 -// CHECK-NEXT: store i32 64, i32* [[ALIGNV]], align 4 -// CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[SIZET]], align 4 -// CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[ALIGNT]], align 4 -// CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP4]], [[TMP5]] -// CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[SIZEV]], align 4 -// CHECK-NEXT: [[ADD1:%.*]] = add i32 [[ADD]], [[TMP6]] -// CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[ALIGNV]], align 4 -// CHECK-NEXT: [[ADD2:%.*]] = add i32 [[ADD1]], [[TMP7]] -// CHECK-NEXT: ret i32 [[ADD2]] -// -int testVQSizeofAlignof(int *ptr) { - __vector_quad *vqp = (__vector_quad *)ptr; - __vector_quad vq = *vqp; - unsigned sizet = sizeof(__vector_quad); - unsigned alignt = __alignof__(__vector_quad); - unsigned sizev = sizeof(vq); - unsigned alignv = __alignof__(vq); - return sizet + alignt + sizev + alignv; -} - -typedef __vector_pair vp_t; -// CHECK-LABEL: @testVPTypedef( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[INP_ADDR:%.*]] = alloca i32*, align 8 -// CHECK-NEXT: [[OUTP_ADDR:%.*]] = alloca i32*, align 8 -// CHECK-NEXT: [[VPIN:%.*]] = alloca <256 x i1>*, align 8 -// CHECK-NEXT: [[VPOUT:%.*]] = alloca <256 x i1>*, align 8 -// CHECK-NEXT: store i32* [[INP:%.*]], i32** [[INP_ADDR]], align 8 -// CHECK-NEXT: store i32* [[OUTP:%.*]], i32** [[OUTP_ADDR]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = load i32*, i32** [[INP_ADDR]], align 8 -// CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <256 x i1>* -// CHECK-NEXT: store <256 x i1>* [[TMP1]], <256 x i1>** [[VPIN]], align 8 -// CHECK-NEXT: [[TMP2:%.*]] = load i32*, i32** [[OUTP_ADDR]], align 8 -// CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <256 x i1>* -// CHECK-NEXT: store <256 x i1>* [[TMP3]], <256 x i1>** [[VPOUT]], align 8 -// CHECK-NEXT: [[TMP4:%.*]] = load <256 x i1>*, <256 x i1>** [[VPIN]], align 8 -// CHECK-NEXT: [[TMP5:%.*]] = load <256 x i1>, <256 x i1>* [[TMP4]], align 32 -// CHECK-NEXT: [[TMP6:%.*]] = load <256 x i1>*, <256 x i1>** [[VPOUT]], align 8 -// CHECK-NEXT: store <256 x i1> [[TMP5]], <256 x i1>* [[TMP6]], align 32 -// CHECK-NEXT: ret void -// -void testVPTypedef(int *inp, int *outp) { - vp_t *vpin = (vp_t *)inp; - vp_t *vpout = (vp_t *)outp; - *vpout = *vpin; -} - -// CHECK-LABEL: @testVPArg3( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[VP_ADDR:%.*]] = alloca <256 x i1>*, align 8 -// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK-NEXT: [[VPP:%.*]] = alloca <256 x i1>*, align 8 -// CHECK-NEXT: store <256 x i1>* [[VP:%.*]], <256 x i1>** [[VP_ADDR]], align 8 -// CHECK-NEXT: store i32* [[PTR:%.*]], i32** [[PTR_ADDR]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = load i32*, i32** [[PTR_ADDR]], align 8 -// CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <256 x i1>* -// CHECK-NEXT: store <256 x i1>* [[TMP1]], <256 x i1>** [[VPP]], align 8 -// CHECK-NEXT: [[TMP2:%.*]] = load <256 x i1>*, <256 x i1>** [[VP_ADDR]], align 8 -// CHECK-NEXT: [[TMP3:%.*]] = load <256 x i1>, <256 x i1>* [[TMP2]], align 32 -// CHECK-NEXT: [[TMP4:%.*]] = load <256 x i1>*, <256 x i1>** [[VPP]], align 8 -// CHECK-NEXT: store <256 x i1> [[TMP3]], <256 x i1>* [[TMP4]], align 32 -// CHECK-NEXT: ret void -// -void testVPArg3(__vector_pair *vp, int *ptr) { - __vector_pair *vpp = (__vector_pair *)ptr; - *vpp = *vp; -} - -// CHECK-LABEL: @testVPArg4( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[VP_ADDR:%.*]] = alloca <256 x i1>*, align 8 -// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK-NEXT: [[VPP:%.*]] = alloca <256 x i1>*, align 8 -// CHECK-NEXT: store <256 x i1>* [[VP:%.*]], <256 x i1>** [[VP_ADDR]], align 8 -// CHECK-NEXT: store i32* [[PTR:%.*]], i32** [[PTR_ADDR]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = load i32*, i32** [[PTR_ADDR]], align 8 -// CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <256 x i1>* -// CHECK-NEXT: store <256 x i1>* [[TMP1]], <256 x i1>** [[VPP]], align 8 -// CHECK-NEXT: [[TMP2:%.*]] = load <256 x i1>*, <256 x i1>** [[VP_ADDR]], align 8 -// CHECK-NEXT: [[TMP3:%.*]] = load <256 x i1>, <256 x i1>* [[TMP2]], align 32 -// CHECK-NEXT: [[TMP4:%.*]] = load <256 x i1>*, <256 x i1>** [[VPP]], align 8 -// CHECK-NEXT: store <256 x i1> [[TMP3]], <256 x i1>* [[TMP4]], align 32 -// CHECK-NEXT: ret void -// -void testVPArg4(const __vector_pair *const vp, int *ptr) { - __vector_pair *vpp = (__vector_pair *)ptr; - *vpp = *vp; -} - -// CHECK-LABEL: @testVPArg5( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[VPA_ADDR:%.*]] = alloca <256 x i1>*, align 8 -// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK-NEXT: [[VPP:%.*]] = alloca <256 x i1>*, align 8 -// CHECK-NEXT: store <256 x i1>* [[VPA:%.*]], <256 x i1>** [[VPA_ADDR]], align 8 -// CHECK-NEXT: store i32* [[PTR:%.*]], i32** [[PTR_ADDR]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = load i32*, i32** [[PTR_ADDR]], align 8 -// CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <256 x i1>* -// CHECK-NEXT: store <256 x i1>* [[TMP1]], <256 x i1>** [[VPP]], align 8 -// CHECK-NEXT: [[TMP2:%.*]] = load <256 x i1>*, <256 x i1>** [[VPA_ADDR]], align 8 -// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds <256 x i1>, <256 x i1>* [[TMP2]], i64 0 -// CHECK-NEXT: [[TMP3:%.*]] = load <256 x i1>, <256 x i1>* [[ARRAYIDX]], align 32 -// CHECK-NEXT: [[TMP4:%.*]] = load <256 x i1>*, <256 x i1>** [[VPP]], align 8 -// CHECK-NEXT: store <256 x i1> [[TMP3]], <256 x i1>* [[TMP4]], align 32 -// CHECK-NEXT: ret void -// -void testVPArg5(__vector_pair vpa[], int *ptr) { - __vector_pair *vpp = (__vector_pair *)ptr; - *vpp = vpa[0]; -} - -// CHECK-LABEL: @testVPArg7( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[VP_ADDR:%.*]] = alloca <256 x i1>*, align 8 -// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK-NEXT: [[VPP:%.*]] = alloca <256 x i1>*, align 8 -// CHECK-NEXT: store <256 x i1>* [[VP:%.*]], <256 x i1>** [[VP_ADDR]], align 8 -// CHECK-NEXT: store i32* [[PTR:%.*]], i32** [[PTR_ADDR]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = load i32*, i32** [[PTR_ADDR]], align 8 -// CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <256 x i1>* -// CHECK-NEXT: store <256 x i1>* [[TMP1]], <256 x i1>** [[VPP]], align 8 -// CHECK-NEXT: [[TMP2:%.*]] = load <256 x i1>*, <256 x i1>** [[VP_ADDR]], align 8 -// CHECK-NEXT: [[TMP3:%.*]] = load <256 x i1>, <256 x i1>* [[TMP2]], align 32 -// CHECK-NEXT: [[TMP4:%.*]] = load <256 x i1>*, <256 x i1>** [[VPP]], align 8 -// CHECK-NEXT: store <256 x i1> [[TMP3]], <256 x i1>* [[TMP4]], align 32 -// CHECK-NEXT: ret void -// -void testVPArg7(const vp_t *vp, int *ptr) { - __vector_pair *vpp = (__vector_pair *)ptr; - *vpp = *vp; -} - -// CHECK-LABEL: @testVPRet2( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK-NEXT: [[VPP:%.*]] = alloca <256 x i1>*, align 8 -// CHECK-NEXT: store i32* [[PTR:%.*]], i32** [[PTR_ADDR]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = load i32*, i32** [[PTR_ADDR]], align 8 -// CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <256 x i1>* -// CHECK-NEXT: store <256 x i1>* [[TMP1]], <256 x i1>** [[VPP]], align 8 -// CHECK-NEXT: [[TMP2:%.*]] = load <256 x i1>*, <256 x i1>** [[VPP]], align 8 -// CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds <256 x i1>, <256 x i1>* [[TMP2]], i64 2 -// CHECK-NEXT: ret <256 x i1>* [[ADD_PTR]] -// -__vector_pair *testVPRet2(int *ptr) { - __vector_pair *vpp = (__vector_pair *)ptr; - return vpp + 2; -} - -// CHECK-LABEL: @testVPRet3( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK-NEXT: [[VPP:%.*]] = alloca <256 x i1>*, align 8 -// CHECK-NEXT: store i32* [[PTR:%.*]], i32** [[PTR_ADDR]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = load i32*, i32** [[PTR_ADDR]], align 8 -// CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <256 x i1>* -// CHECK-NEXT: store <256 x i1>* [[TMP1]], <256 x i1>** [[VPP]], align 8 -// CHECK-NEXT: [[TMP2:%.*]] = load <256 x i1>*, <256 x i1>** [[VPP]], align 8 -// CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds <256 x i1>, <256 x i1>* [[TMP2]], i64 2 -// CHECK-NEXT: ret <256 x i1>* [[ADD_PTR]] -// -const __vector_pair *testVPRet3(int *ptr) { - __vector_pair *vpp = (__vector_pair *)ptr; - return vpp + 2; -} - -// CHECK-LABEL: @testVPRet5( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK-NEXT: [[VPP:%.*]] = alloca <256 x i1>*, align 8 -// CHECK-NEXT: store i32* [[PTR:%.*]], i32** [[PTR_ADDR]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = load i32*, i32** [[PTR_ADDR]], align 8 -// CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <256 x i1>* -// CHECK-NEXT: store <256 x i1>* [[TMP1]], <256 x i1>** [[VPP]], align 8 -// CHECK-NEXT: [[TMP2:%.*]] = load <256 x i1>*, <256 x i1>** [[VPP]], align 8 -// CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds <256 x i1>, <256 x i1>* [[TMP2]], i64 2 -// CHECK-NEXT: ret <256 x i1>* [[ADD_PTR]] -// -const vp_t *testVPRet5(int *ptr) { - __vector_pair *vpp = (__vector_pair *)ptr; - return vpp + 2; -} - -// CHECK-LABEL: @testVPSizeofAlignof( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK-NEXT: [[VPP:%.*]] = alloca <256 x i1>*, align 8 -// CHECK-NEXT: [[VP:%.*]] = alloca <256 x i1>, align 32 -// CHECK-NEXT: [[SIZET:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[ALIGNT:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[SIZEV:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[ALIGNV:%.*]] = alloca i32, align 4 -// CHECK-NEXT: store i32* [[PTR:%.*]], i32** [[PTR_ADDR]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = load i32*, i32** [[PTR_ADDR]], align 8 -// CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <256 x i1>* -// CHECK-NEXT: store <256 x i1>* [[TMP1]], <256 x i1>** [[VPP]], align 8 -// CHECK-NEXT: [[TMP2:%.*]] = load <256 x i1>*, <256 x i1>** [[VPP]], align 8 -// CHECK-NEXT: [[TMP3:%.*]] = load <256 x i1>, <256 x i1>* [[TMP2]], align 32 -// CHECK-NEXT: store <256 x i1> [[TMP3]], <256 x i1>* [[VP]], align 32 -// CHECK-NEXT: store i32 32, i32* [[SIZET]], align 4 -// CHECK-NEXT: store i32 32, i32* [[ALIGNT]], align 4 -// CHECK-NEXT: store i32 32, i32* [[SIZEV]], align 4 -// CHECK-NEXT: store i32 32, i32* [[ALIGNV]], align 4 -// CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[SIZET]], align 4 -// CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[ALIGNT]], align 4 -// CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP4]], [[TMP5]] -// CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[SIZEV]], align 4 -// CHECK-NEXT: [[ADD1:%.*]] = add i32 [[ADD]], [[TMP6]] -// CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[ALIGNV]], align 4 -// CHECK-NEXT: [[ADD2:%.*]] = add i32 [[ADD1]], [[TMP7]] -// CHECK-NEXT: ret i32 [[ADD2]] -// -int testVPSizeofAlignof(int *ptr) { - __vector_pair *vpp = (__vector_pair *)ptr; - __vector_pair vp = *vpp; - unsigned sizet = sizeof(__vector_pair); - unsigned alignt = __alignof__(__vector_pair); - unsigned sizev = sizeof(vp); - unsigned alignv = __alignof__(vp); - return sizet + alignt + sizev + alignv; -} diff --git a/clang/test/Sema/ppc-pair-mma-types.c b/clang/test/Sema/ppc-pair-mma-types.c index 293688d49813c3a46641ac274f298628be4f2f43..2ad1079bd966bbd633d7edbbfaeaaa555470fe88 100644 --- a/clang/test/Sema/ppc-pair-mma-types.c +++ b/clang/test/Sema/ppc-pair-mma-types.c @@ -12,6 +12,11 @@ // typedef typedef __vector_quad vq_t; +void testVQTypedef(int *inp, int *outp) { + vq_t *vqin = (vq_t *)inp; + vq_t *vqout = (vq_t *)outp; + *vqout = *vqin; +} // function argument void testVQArg1(__vector_quad vq, int *ptr) { // expected-error {{invalid use of PPC MMA type}} @@ -24,22 +29,57 @@ void testVQArg2(const __vector_quad vq, int *ptr) { // expected-error {{invalid *vqp = vq; } +void testVQArg3(__vector_quad *vq, int *ptr) { + __vector_quad *vqp = (__vector_quad *)ptr; + *vqp = *vq; +} + +void testVQArg4(const __vector_quad *const vq, int *ptr) { + __vector_quad *vqp = (__vector_quad *)ptr; + *vqp = *vq; +} + +void testVQArg5(__vector_quad vqa[], int *ptr) { + __vector_quad *vqp = (__vector_quad *)ptr; + *vqp = vqa[0]; +} + void testVQArg6(const vq_t vq, int *ptr) { // expected-error {{invalid use of PPC MMA type}} __vector_quad *vqp = (__vector_quad *)ptr; *vqp = vq; } +void testVQArg7(const vq_t *vq, int *ptr) { + __vector_quad *vqp = (__vector_quad *)ptr; + *vqp = *vq; +} + // function return __vector_quad testVQRet1(int *ptr) { // expected-error {{invalid use of PPC MMA type}} __vector_quad *vqp = (__vector_quad *)ptr; return *vqp; // expected-error {{invalid use of PPC MMA type}} } +__vector_quad *testVQRet2(int *ptr) { + __vector_quad *vqp = (__vector_quad *)ptr; + return vqp + 2; +} + +const __vector_quad *testVQRet3(int *ptr) { + __vector_quad *vqp = (__vector_quad *)ptr; + return vqp + 2; +} + const vq_t testVQRet4(int *ptr) { // expected-error {{invalid use of PPC MMA type}} __vector_quad *vqp = (__vector_quad *)ptr; return *vqp; // expected-error {{invalid use of PPC MMA type}} } +const vq_t *testVQRet5(int *ptr) { + __vector_quad *vqp = (__vector_quad *)ptr; + return vqp + 2; +} + // global __vector_quad globalvq; // expected-error {{invalid use of PPC MMA type}} const __vector_quad globalvq2; // expected-error {{invalid use of PPC MMA type}} @@ -47,6 +87,16 @@ __vector_quad *globalvqp; const __vector_quad *const globalvqp2; vq_t globalvq_t; // expected-error {{invalid use of PPC MMA type}} +// local +void testVQLocal(int *ptr, vector unsigned char vc) { + __vector_quad *vqp = (__vector_quad *)ptr; + __vector_quad vq1 = *vqp; + __vector_quad vq2; + __builtin_mma_xxsetaccz(&vq2); + __vector_quad vq3; + __builtin_mma_xvi4ger8(&vq3, vc, vc); + *vqp = vq3; +} // struct field struct TestVQStruct { @@ -56,6 +106,17 @@ struct TestVQStruct { __vector_quad *vq; }; +// sizeof / alignof +int testVQSizeofAlignof(int *ptr) { + __vector_quad *vqp = (__vector_quad *)ptr; + __vector_quad vq = *vqp; + unsigned sizet = sizeof(__vector_quad); + unsigned alignt = __alignof__(__vector_quad); + unsigned sizev = sizeof(vq); + unsigned alignv = __alignof__(vq); + return sizet + alignt + sizev + alignv; +} + // operators int testVQOperators1(int *ptr) { __vector_quad *vqp = (__vector_quad *)ptr; @@ -107,6 +168,11 @@ void testVQOperators4(int v, void *ptr) { // typedef typedef __vector_pair vp_t; +void testVPTypedef(int *inp, int *outp) { + vp_t *vpin = (vp_t *)inp; + vp_t *vpout = (vp_t *)outp; + *vpout = *vpin; +} // function argument void testVPArg1(__vector_pair vp, int *ptr) { // expected-error {{invalid use of PPC MMA type}} @@ -119,22 +185,57 @@ void testVPArg2(const __vector_pair vp, int *ptr) { // expected-error {{invalid *vpp = vp; } +void testVPArg3(__vector_pair *vp, int *ptr) { + __vector_pair *vpp = (__vector_pair *)ptr; + *vpp = *vp; +} + +void testVPArg4(const __vector_pair *const vp, int *ptr) { + __vector_pair *vpp = (__vector_pair *)ptr; + *vpp = *vp; +} + +void testVPArg5(__vector_pair vpa[], int *ptr) { + __vector_pair *vpp = (__vector_pair *)ptr; + *vpp = vpa[0]; +} + void testVPArg6(const vp_t vp, int *ptr) { // expected-error {{invalid use of PPC MMA type}} __vector_pair *vpp = (__vector_pair *)ptr; *vpp = vp; } +void testVPArg7(const vp_t *vp, int *ptr) { + __vector_pair *vpp = (__vector_pair *)ptr; + *vpp = *vp; +} + // function return __vector_pair testVPRet1(int *ptr) { // expected-error {{invalid use of PPC MMA type}} __vector_pair *vpp = (__vector_pair *)ptr; return *vpp; // expected-error {{invalid use of PPC MMA type}} } +__vector_pair *testVPRet2(int *ptr) { + __vector_pair *vpp = (__vector_pair *)ptr; + return vpp + 2; +} + +const __vector_pair *testVPRet3(int *ptr) { + __vector_pair *vpp = (__vector_pair *)ptr; + return vpp + 2; +} + const vp_t testVPRet4(int *ptr) { // expected-error {{invalid use of PPC MMA type}} __vector_pair *vpp = (__vector_pair *)ptr; return *vpp; // expected-error {{invalid use of PPC MMA type}} } +const vp_t *testVPRet5(int *ptr) { + __vector_pair *vpp = (__vector_pair *)ptr; + return vpp + 2; +} + // global __vector_pair globalvp; // expected-error {{invalid use of PPC MMA type}} const __vector_pair globalvp2; // expected-error {{invalid use of PPC MMA type}} @@ -142,6 +243,19 @@ __vector_pair *globalvpp; const __vector_pair *const globalvpp2; vp_t globalvp_t; // expected-error {{invalid use of PPC MMA type}} +// local +void testVPLocal(int *ptr, vector unsigned char vc) { + __vector_pair *vpp = (__vector_pair *)ptr; + __vector_pair vp1 = *vpp; + __vector_pair vp2; + __builtin_vsx_assemble_pair(&vp2, vc, vc); + __builtin_vsx_build_pair(&vp2, vc, vc); + __vector_pair vp3; + __vector_quad vq; + __builtin_mma_xvf64ger(&vq, vp3, vc); + *vpp = vp3; +} + // struct field struct TestVPStruct { int a; @@ -150,6 +264,17 @@ struct TestVPStruct { __vector_pair *vp; }; +// sizeof / alignof +int testVPSizeofAlignof(int *ptr) { + __vector_pair *vpp = (__vector_pair *)ptr; + __vector_pair vp = *vpp; + unsigned sizet = sizeof(__vector_pair); + unsigned alignt = __alignof__(__vector_pair); + unsigned sizev = sizeof(vp); + unsigned alignv = __alignof__(vp); + return sizet + alignt + sizev + alignv; +} + // operators int testVPOperators1(int *ptr) { __vector_pair *vpp = (__vector_pair *)ptr; @@ -217,7 +342,17 @@ void testRestrictQualifiedPointer1(int *__restrict acc) { __builtin_mma_disassemble_acc(arr, acc); // expected-error {{passing 'int *restrict' to parameter of incompatible type '__vector_quad *'}} } +void testRestrictQualifiedPointer2(__vector_quad *__restrict acc) { + vector float arr[4]; + __builtin_mma_disassemble_acc(arr, acc); +} + void testVolatileQualifiedPointer1(int *__volatile acc) { vector float arr[4]; __builtin_mma_disassemble_acc(arr, acc); // expected-error {{passing 'int *volatile' to parameter of incompatible type '__vector_quad *'}} } + +void testVolatileQualifiedPointer2(__vector_quad *__volatile acc) { + vector float arr[4]; + __builtin_mma_disassemble_acc(arr, acc); +}