[CIR][CIRGen][Builtin] Add several elementwise FP builtins (#1553)

el-ev · lanza · commit 98e8811284f9 · 2025-04-09T15:53:30.000-07:00
Added:
- `cos`
- `floor`
- `round`
- `rint`
- `nearbyint`
- `sin`
- `sqrt`
- `tan`
- `trunc`
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
@@ -980,9 +980,6 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
       assert(!cir::MissingFeatures::fastMathFlags());
       return emitUnaryMaybeConstrainedFPBuiltin<cir::SqrtOp>(*this, *E);
 
-    case Builtin::BI__builtin_elementwise_sqrt:
-      llvm_unreachable("BI__builtin_elementwise_sqrt NYI");
-
     case Builtin::BItan:
     case Builtin::BItanf:
     case Builtin::BItanl:
@@ -1520,31 +1517,33 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
   case Builtin::BI__builtin_elementwise_bitreverse:
     llvm_unreachable("BI__builtin_elementwise_bitreverse NYI");
   case Builtin::BI__builtin_elementwise_cos:
-    llvm_unreachable("BI__builtin_elementwise_cos NYI");
+    return emitUnaryFPBuiltin<cir::CosOp>(*this, *E);
   case Builtin::BI__builtin_elementwise_cosh:
     llvm_unreachable("BI__builtin_elementwise_cosh NYI");
   case Builtin::BI__builtin_elementwise_floor:
-    llvm_unreachable("BI__builtin_elementwise_floor NYI");
+    return emitUnaryFPBuiltin<cir::FloorOp>(*this, *E);
   case Builtin::BI__builtin_elementwise_popcount:
     llvm_unreachable("BI__builtin_elementwise_popcount NYI");
   case Builtin::BI__builtin_elementwise_roundeven:
     llvm_unreachable("BI__builtin_elementwise_roundeven NYI");
   case Builtin::BI__builtin_elementwise_round:
-    llvm_unreachable("BI__builtin_elementwise_round NYI");
+    return emitUnaryFPBuiltin<cir::RoundOp>(*this, *E);
   case Builtin::BI__builtin_elementwise_rint:
-    llvm_unreachable("BI__builtin_elementwise_rint NYI");
+    return emitUnaryFPBuiltin<cir::RintOp>(*this, *E);
   case Builtin::BI__builtin_elementwise_nearbyint:
-    llvm_unreachable("BI__builtin_elementwise_nearbyint NYI");
+    return emitUnaryFPBuiltin<cir::NearbyintOp>(*this, *E);
   case Builtin::BI__builtin_elementwise_sin:
-    llvm_unreachable("BI__builtin_elementwise_sin NYI");
+    return emitUnaryFPBuiltin<cir::SinOp>(*this, *E);
   case Builtin::BI__builtin_elementwise_sinh:
     llvm_unreachable("BI__builtin_elementwise_sinh NYI");
+  case Builtin::BI__builtin_elementwise_sqrt:
+    return emitUnaryFPBuiltin<cir::SqrtOp>(*this, *E);
   case Builtin::BI__builtin_elementwise_tan:
-    llvm_unreachable("BI__builtin_elementwise_tan NYI");
+    return emitUnaryFPBuiltin<cir::TanOp>(*this, *E);
   case Builtin::BI__builtin_elementwise_tanh:
     llvm_unreachable("BI__builtin_elementwise_tanh NYI");
   case Builtin::BI__builtin_elementwise_trunc:
-    llvm_unreachable("BI__builtin_elementwise_trunc NYI");
+    return emitUnaryFPBuiltin<cir::TruncOp>(*this, *E);
   case Builtin::BI__builtin_elementwise_canonicalize:
     llvm_unreachable("BI__builtin_elementwise_canonicalize NYI");
   case Builtin::BI__builtin_elementwise_copysign:
diff --git a/clang/test/CIR/CodeGen/builtins-elementwise.c b/clang/test/CIR/CodeGen/builtins-elementwise.c
@@ -183,3 +183,192 @@ void test_builtin_elementwise_log10(float f, double d, vfloat4 vf4,
   // LLVM: {{%.*}} = call <4 x double> @llvm.log10.v4f64(<4 x double> {{%.*}})
   vd4 = __builtin_elementwise_log10(vd4);
 }
+
+void test_builtin_elementwise_cos(float f, double d, vfloat4 vf4,
+                                     vdouble4 vd4) {
+  // CIR-LABEL: test_builtin_elementwise_cos
+  // LLVM-LABEL: test_builtin_elementwise_cos
+  // CIR: {{%.*}} = cir.cos {{%.*}} : !cir.float
+  // LLVM: {{%.*}} = call float @llvm.cos.f32(float {{%.*}})
+  f = __builtin_elementwise_cos(f);
+
+  // CIR: {{%.*}} = cir.cos {{%.*}} : !cir.double
+  // LLVM: {{%.*}} = call double @llvm.cos.f64(double {{%.*}})
+  d = __builtin_elementwise_cos(d);
+
+  // CIR: {{%.*}} = cir.cos {{%.*}} : !cir.vector<!cir.float x 4>
+  // LLVM: {{%.*}} = call <4 x float> @llvm.cos.v4f32(<4 x float> {{%.*}})
+  vf4 = __builtin_elementwise_cos(vf4);
+
+  // CIR: {{%.*}} = cir.cos {{%.*}} : !cir.vector<!cir.double x 4>
+  // LLVM: {{%.*}} = call <4 x double> @llvm.cos.v4f64(<4 x double> {{%.*}})
+  vd4 = __builtin_elementwise_cos(vd4);
+}
+
+void test_builtin_elementwise_floor(float f, double d, vfloat4 vf4,
+                   vdouble4 vd4) {
+  // CIR-LABEL: test_builtin_elementwise_floor
+  // LLVM-LABEL: test_builtin_elementwise_floor
+  // CIR: {{%.*}} = cir.floor {{%.*}} : !cir.float
+  // LLVM: {{%.*}} = call float @llvm.floor.f32(float {{%.*}})
+  f = __builtin_elementwise_floor(f);
+
+  // CIR: {{%.*}} = cir.floor {{%.*}} : !cir.double
+  // LLVM: {{%.*}} = call double @llvm.floor.f64(double {{%.*}})
+  d = __builtin_elementwise_floor(d);
+
+  // CIR: {{%.*}} = cir.floor {{%.*}} : !cir.vector<!cir.float x 4>
+  // LLVM: {{%.*}} = call <4 x float> @llvm.floor.v4f32(<4 x float> {{%.*}})
+  vf4 = __builtin_elementwise_floor(vf4);
+
+  // CIR: {{%.*}} = cir.floor {{%.*}} : !cir.vector<!cir.double x 4>
+  // LLVM: {{%.*}} = call <4 x double> @llvm.floor.v4f64(<4 x double> {{%.*}})
+  vd4 = __builtin_elementwise_floor(vd4);
+}
+
+void test_builtin_elementwise_round(float f, double d, vfloat4 vf4,
+                   vdouble4 vd4) {
+  // CIR-LABEL: test_builtin_elementwise_round
+  // LLVM-LABEL: test_builtin_elementwise_round
+  // CIR: {{%.*}} = cir.round {{%.*}} : !cir.float
+  // LLVM: {{%.*}} = call float @llvm.round.f32(float {{%.*}})
+  f = __builtin_elementwise_round(f);
+
+  // CIR: {{%.*}} = cir.round {{%.*}} : !cir.double
+  // LLVM: {{%.*}} = call double @llvm.round.f64(double {{%.*}})
+  d = __builtin_elementwise_round(d);
+
+  // CIR: {{%.*}} = cir.round {{%.*}} : !cir.vector<!cir.float x 4>
+  // LLVM: {{%.*}} = call <4 x float> @llvm.round.v4f32(<4 x float> {{%.*}})
+  vf4 = __builtin_elementwise_round(vf4);
+
+  // CIR: {{%.*}} = cir.round {{%.*}} : !cir.vector<!cir.double x 4>
+  // LLVM: {{%.*}} = call <4 x double> @llvm.round.v4f64(<4 x double> {{%.*}})
+  vd4 = __builtin_elementwise_round(vd4);
+}
+
+void test_builtin_elementwise_rint(float f, double d, vfloat4 vf4,
+                   vdouble4 vd4) {
+  // CIR-LABEL: test_builtin_elementwise_rint
+  // LLVM-LABEL: test_builtin_elementwise_rint
+  // CIR: {{%.*}} = cir.rint {{%.*}} : !cir.float
+  // LLVM: {{%.*}} = call float @llvm.rint.f32(float {{%.*}})
+  f = __builtin_elementwise_rint(f);
+
+  // CIR: {{%.*}} = cir.rint {{%.*}} : !cir.double
+  // LLVM: {{%.*}} = call double @llvm.rint.f64(double {{%.*}})
+  d = __builtin_elementwise_rint(d);
+
+  // CIR: {{%.*}} = cir.rint {{%.*}} : !cir.vector<!cir.float x 4>
+  // LLVM: {{%.*}} = call <4 x float> @llvm.rint.v4f32(<4 x float> {{%.*}})
+  vf4 = __builtin_elementwise_rint(vf4);
+
+  // CIR: {{%.*}} = cir.rint {{%.*}} : !cir.vector<!cir.double x 4>
+  // LLVM: {{%.*}} = call <4 x double> @llvm.rint.v4f64(<4 x double> {{%.*}})
+  vd4 = __builtin_elementwise_rint(vd4);
+}
+
+void test_builtin_elementwise_nearbyint(float f, double d, vfloat4 vf4,
+                   vdouble4 vd4) {
+  // CIR-LABEL: test_builtin_elementwise_nearbyint
+  // LLVM-LABEL: test_builtin_elementwise_nearbyint
+  // CIR: {{%.*}} = cir.nearbyint {{%.*}} : !cir.float
+  // LLVM: {{%.*}} = call float @llvm.nearbyint.f32(float {{%.*}})
+  f = __builtin_elementwise_nearbyint(f);
+
+  // CIR: {{%.*}} = cir.nearbyint {{%.*}} : !cir.double
+  // LLVM: {{%.*}} = call double @llvm.nearbyint.f64(double {{%.*}})
+  d = __builtin_elementwise_nearbyint(d);
+
+  // CIR: {{%.*}} = cir.nearbyint {{%.*}} : !cir.vector<!cir.float x 4>
+  // LLVM: {{%.*}} = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> {{%.*}})
+  vf4 = __builtin_elementwise_nearbyint(vf4);
+
+  // CIR: {{%.*}} = cir.nearbyint {{%.*}} : !cir.vector<!cir.double x 4>
+  // LLVM: {{%.*}} = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> {{%.*}})
+  vd4 = __builtin_elementwise_nearbyint(vd4);
+}
+
+void test_builtin_elementwise_sin(float f, double d, vfloat4 vf4,
+                   vdouble4 vd4) {
+  // CIR-LABEL: test_builtin_elementwise_sin
+  // LLVM-LABEL: test_builtin_elementwise_sin
+  // CIR: {{%.*}} = cir.sin {{%.*}} : !cir.float
+  // LLVM: {{%.*}} = call float @llvm.sin.f32(float {{%.*}})
+  f = __builtin_elementwise_sin(f);
+
+  // CIR: {{%.*}} = cir.sin {{%.*}} : !cir.double
+  // LLVM: {{%.*}} = call double @llvm.sin.f64(double {{%.*}})
+  d = __builtin_elementwise_sin(d);
+
+  // CIR: {{%.*}} = cir.sin {{%.*}} : !cir.vector<!cir.float x 4>
+  // LLVM: {{%.*}} = call <4 x float> @llvm.sin.v4f32(<4 x float> {{%.*}})
+  vf4 = __builtin_elementwise_sin(vf4);
+
+  // CIR: {{%.*}} = cir.sin {{%.*}} : !cir.vector<!cir.double x 4>
+  // LLVM: {{%.*}} = call <4 x double> @llvm.sin.v4f64(<4 x double> {{%.*}})
+  vd4 = __builtin_elementwise_sin(vd4);
+}
+
+void test_builtin_elementwise_sqrt(float f, double d, vfloat4 vf4,
+                   vdouble4 vd4) {
+  // CIR-LABEL: test_builtin_elementwise_sqrt
+  // LLVM-LABEL: test_builtin_elementwise_sqrt
+  // CIR: {{%.*}} = cir.sqrt {{%.*}} : !cir.float
+  // LLVM: {{%.*}} = call float @llvm.sqrt.f32(float {{%.*}})
+  f = __builtin_elementwise_sqrt(f);
+
+  // CIR: {{%.*}} = cir.sqrt {{%.*}} : !cir.double
+  // LLVM: {{%.*}} = call double @llvm.sqrt.f64(double {{%.*}})
+  d = __builtin_elementwise_sqrt(d);
+
+  // CIR: {{%.*}} = cir.sqrt {{%.*}} : !cir.vector<!cir.float x 4>
+  // LLVM: {{%.*}} = call <4 x float> @llvm.sqrt.v4f32(<4 x float> {{%.*}})
+  vf4 = __builtin_elementwise_sqrt(vf4);
+
+  // CIR: {{%.*}} = cir.sqrt {{%.*}} : !cir.vector<!cir.double x 4>
+  // LLVM: {{%.*}} = call <4 x double> @llvm.sqrt.v4f64(<4 x double> {{%.*}})
+  vd4 = __builtin_elementwise_sqrt(vd4);
+}
+
+void test_builtin_elementwise_tan(float f, double d, vfloat4 vf4,
+                   vdouble4 vd4) {
+  // CIR-LABEL: test_builtin_elementwise_tan
+  // LLVM-LABEL: test_builtin_elementwise_tan
+  // CIR: {{%.*}} = cir.tan {{%.*}} : !cir.float
+  // LLVM: {{%.*}} = call float @llvm.tan.f32(float {{%.*}})
+  f = __builtin_elementwise_tan(f);
+
+  // CIR: {{%.*}} = cir.tan {{%.*}} : !cir.double
+  // LLVM: {{%.*}} = call double @llvm.tan.f64(double {{%.*}})
+  d = __builtin_elementwise_tan(d);
+
+  // CIR: {{%.*}} = cir.tan {{%.*}} : !cir.vector<!cir.float x 4>
+  // LLVM: {{%.*}} = call <4 x float> @llvm.tan.v4f32(<4 x float> {{%.*}})
+  vf4 = __builtin_elementwise_tan(vf4);
+
+  // CIR: {{%.*}} = cir.tan {{%.*}} : !cir.vector<!cir.double x 4>
+  // LLVM: {{%.*}} = call <4 x double> @llvm.tan.v4f64(<4 x double> {{%.*}})
+  vd4 = __builtin_elementwise_tan(vd4);
+}
+
+void test_builtin_elementwise_trunc(float f, double d, vfloat4 vf4,
+                   vdouble4 vd4) {
+  // CIR-LABEL: test_builtin_elementwise_trunc
+  // LLVM-LABEL: test_builtin_elementwise_trunc
+  // CIR: {{%.*}} = cir.trunc {{%.*}} : !cir.float
+  // LLVM: {{%.*}} = call float @llvm.trunc.f32(float {{%.*}})
+  f = __builtin_elementwise_trunc(f);
+
+  // CIR: {{%.*}} = cir.trunc {{%.*}} : !cir.double
+  // LLVM: {{%.*}} = call double @llvm.trunc.f64(double {{%.*}})
+  d = __builtin_elementwise_trunc(d);
+
+  // CIR: {{%.*}} = cir.trunc {{%.*}} : !cir.vector<!cir.float x 4>
+  // LLVM: {{%.*}} = call <4 x float> @llvm.trunc.v4f32(<4 x float> {{%.*}})
+  vf4 = __builtin_elementwise_trunc(vf4);
+
+  // CIR: {{%.*}} = cir.trunc {{%.*}} : !cir.vector<!cir.double x 4>
+  // LLVM: {{%.*}} = call <4 x double> @llvm.trunc.v4f64(<4 x double> {{%.*}})
+  vd4 = __builtin_elementwise_trunc(vd4);
+}