Skip to content

Commit 878a574

Browse files
authored
[flang][cuda] Add c_devloc as intrinsic and inline it during lowering (llvm#120648)
Add `c_devloc` as intrinsic and inline it during lowering. `c_devloc` is used in CUDA Fortran to get the address of device variables. For the moment, we borrow almost all semantic checks from `c_loc` except for the pointer or target restriction. The specifications of `c_devloc` are are pretty vague and we will relax/enforce the restrictions based on library and apps usage comparing them to the reference compiler.
1 parent cdbba15 commit 878a574

File tree

8 files changed

+139
-5
lines changed

8 files changed

+139
-5
lines changed

flang/include/flang/Optimizer/Builder/FIRBuilder.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -769,6 +769,11 @@ mlir::Value genMaxWithZero(fir::FirOpBuilder &builder, mlir::Location loc,
769769
mlir::Value genCPtrOrCFunptrAddr(fir::FirOpBuilder &builder, mlir::Location loc,
770770
mlir::Value cPtr, mlir::Type ty);
771771

772+
/// The type(C_DEVPTR) is defined as the derived type with only one
773+
/// component of C_PTR type. Get the C address from the C_PTR component.
774+
mlir::Value genCDevPtrAddr(fir::FirOpBuilder &builder, mlir::Location loc,
775+
mlir::Value cDevPtr, mlir::Type ty);
776+
772777
/// Get the C address value.
773778
mlir::Value genCPtrOrCFunptrValue(fir::FirOpBuilder &builder,
774779
mlir::Location loc, mlir::Value cPtr);

flang/include/flang/Optimizer/Builder/IntrinsicCall.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -214,6 +214,7 @@ struct IntrinsicLibrary {
214214
llvm::ArrayRef<fir::ExtendedValue>);
215215
fir::ExtendedValue genCAssociatedCPtr(mlir::Type,
216216
llvm::ArrayRef<fir::ExtendedValue>);
217+
fir::ExtendedValue genCDevLoc(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
217218
mlir::Value genErfcScaled(mlir::Type resultType,
218219
llvm::ArrayRef<mlir::Value> args);
219220
void genCFPointer(llvm::ArrayRef<fir::ExtendedValue>);

flang/lib/Evaluate/intrinsics.cpp

Lines changed: 73 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2663,6 +2663,8 @@ class IntrinsicProcTable::Implementation {
26632663
ActualArguments &, FoldingContext &) const;
26642664
std::optional<SpecificCall> HandleC_Loc(
26652665
ActualArguments &, FoldingContext &) const;
2666+
std::optional<SpecificCall> HandleC_Devloc(
2667+
ActualArguments &, FoldingContext &) const;
26662668
const std::string &ResolveAlias(const std::string &name) const {
26672669
auto iter{aliases_.find(name)};
26682670
return iter == aliases_.end() ? name : iter->second;
@@ -2690,7 +2692,8 @@ bool IntrinsicProcTable::Implementation::IsIntrinsicFunction(
26902692
return true;
26912693
}
26922694
// special cases
2693-
return name == "__builtin_c_loc" || name == "null";
2695+
return name == "__builtin_c_loc" || name == "__builtin_c_devloc" ||
2696+
name == "null";
26942697
}
26952698
bool IntrinsicProcTable::Implementation::IsIntrinsicSubroutine(
26962699
const std::string &name0) const {
@@ -3080,6 +3083,73 @@ std::optional<SpecificCall> IntrinsicProcTable::Implementation::HandleC_Loc(
30803083
return std::nullopt;
30813084
}
30823085

3086+
// CUDA Fortran C_DEVLOC(x)
3087+
std::optional<SpecificCall> IntrinsicProcTable::Implementation::HandleC_Devloc(
3088+
ActualArguments &arguments, FoldingContext &context) const {
3089+
static const char *const keywords[]{"cptr", nullptr};
3090+
3091+
if (CheckAndRearrangeArguments(arguments, context.messages(), keywords)) {
3092+
CHECK(arguments.size() == 1);
3093+
const auto *expr{arguments[0].value().UnwrapExpr()};
3094+
if (auto typeAndShape{characteristics::TypeAndShape::Characterize(
3095+
arguments[0], context)}) {
3096+
if (expr && !IsContiguous(*expr, context).value_or(true)) {
3097+
context.messages().Say(arguments[0]->sourceLocation(),
3098+
"C_DEVLOC() argument must be contiguous"_err_en_US);
3099+
}
3100+
if (auto constExtents{AsConstantExtents(context, typeAndShape->shape())};
3101+
constExtents && GetSize(*constExtents) == 0) {
3102+
context.messages().Say(arguments[0]->sourceLocation(),
3103+
"C_DEVLOC() argument may not be a zero-sized array"_err_en_US);
3104+
}
3105+
if (!(typeAndShape->type().category() != TypeCategory::Derived ||
3106+
typeAndShape->type().IsAssumedType() ||
3107+
(!typeAndShape->type().IsPolymorphic() &&
3108+
CountNonConstantLenParameters(
3109+
typeAndShape->type().GetDerivedTypeSpec()) == 0))) {
3110+
context.messages().Say(arguments[0]->sourceLocation(),
3111+
"C_DEVLOC() argument must have an intrinsic type, assumed type, or non-polymorphic derived type with no non-constant length parameter"_err_en_US);
3112+
} else if (typeAndShape->type().knownLength().value_or(1) == 0) {
3113+
context.messages().Say(arguments[0]->sourceLocation(),
3114+
"C_DEVLOC() argument may not be zero-length character"_err_en_US);
3115+
} else if (typeAndShape->type().category() != TypeCategory::Derived &&
3116+
!IsInteroperableIntrinsicType(typeAndShape->type()).value_or(true)) {
3117+
if (typeAndShape->type().category() == TypeCategory::Character &&
3118+
typeAndShape->type().kind() == 1) {
3119+
// Default character kind, but length is not known to be 1
3120+
if (context.languageFeatures().ShouldWarn(
3121+
common::UsageWarning::CharacterInteroperability)) {
3122+
context.messages().Say(
3123+
common::UsageWarning::CharacterInteroperability,
3124+
arguments[0]->sourceLocation(),
3125+
"C_DEVLOC() argument has non-interoperable character length"_warn_en_US);
3126+
}
3127+
} else if (context.languageFeatures().ShouldWarn(
3128+
common::UsageWarning::Interoperability)) {
3129+
context.messages().Say(common::UsageWarning::Interoperability,
3130+
arguments[0]->sourceLocation(),
3131+
"C_DEVLOC() argument has non-interoperable intrinsic type or kind"_warn_en_US);
3132+
}
3133+
}
3134+
3135+
characteristics::DummyDataObject ddo{std::move(*typeAndShape)};
3136+
ddo.intent = common::Intent::In;
3137+
return SpecificCall{
3138+
SpecificIntrinsic{"__builtin_c_devloc"s,
3139+
characteristics::Procedure{
3140+
characteristics::FunctionResult{
3141+
DynamicType{GetBuiltinDerivedType(
3142+
builtinsScope_, "__builtin_c_devptr")}},
3143+
characteristics::DummyArguments{
3144+
characteristics::DummyArgument{"cptr"s, std::move(ddo)}},
3145+
characteristics::Procedure::Attrs{
3146+
characteristics::Procedure::Attr::Pure}}},
3147+
std::move(arguments)};
3148+
}
3149+
}
3150+
return std::nullopt;
3151+
}
3152+
30833153
static bool CheckForNonPositiveValues(FoldingContext &context,
30843154
const ActualArgument &arg, const std::string &procName,
30853155
const std::string &argName) {
@@ -3270,6 +3340,8 @@ std::optional<SpecificCall> IntrinsicProcTable::Implementation::Probe(
32703340
} else { // function
32713341
if (call.name == "__builtin_c_loc") {
32723342
return HandleC_Loc(arguments, context);
3343+
} else if (call.name == "__builtin_c_devloc") {
3344+
return HandleC_Devloc(arguments, context);
32733345
} else if (call.name == "null") {
32743346
return HandleNull(arguments, context);
32753347
}

flang/lib/Optimizer/Builder/FIRBuilder.cpp

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1626,6 +1626,25 @@ mlir::Value fir::factory::genCPtrOrCFunptrAddr(fir::FirOpBuilder &builder,
16261626
cPtr, addrFieldIndex);
16271627
}
16281628

1629+
mlir::Value fir::factory::genCDevPtrAddr(fir::FirOpBuilder &builder,
1630+
mlir::Location loc,
1631+
mlir::Value cDevPtr, mlir::Type ty) {
1632+
auto recTy = mlir::cast<fir::RecordType>(ty);
1633+
assert(recTy.getTypeList().size() == 1);
1634+
auto cptrFieldName = recTy.getTypeList()[0].first;
1635+
mlir::Type cptrFieldTy = recTy.getTypeList()[0].second;
1636+
auto fieldIndexType = fir::FieldType::get(ty.getContext());
1637+
mlir::Value cptrFieldIndex = builder.create<fir::FieldIndexOp>(
1638+
loc, fieldIndexType, cptrFieldName, recTy,
1639+
/*typeParams=*/mlir::ValueRange{});
1640+
auto cptrCoord = builder.create<fir::CoordinateOp>(
1641+
loc, builder.getRefType(cptrFieldTy), cDevPtr, cptrFieldIndex);
1642+
auto [addrFieldIndex, addrFieldTy] =
1643+
genCPtrOrCFunptrFieldIndex(builder, loc, cptrFieldTy);
1644+
return builder.create<fir::CoordinateOp>(loc, builder.getRefType(addrFieldTy),
1645+
cptrCoord, addrFieldIndex);
1646+
}
1647+
16291648
mlir::Value fir::factory::genCPtrOrCFunptrValue(fir::FirOpBuilder &builder,
16301649
mlir::Location loc,
16311650
mlir::Value cPtr) {

flang/lib/Optimizer/Builder/IntrinsicCall.cpp

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,7 @@ static constexpr IntrinsicHandler handlers[]{
167167
&I::genCAssociatedCPtr,
168168
{{{"c_ptr_1", asAddr}, {"c_ptr_2", asAddr, handleDynamicOptional}}},
169169
/*isElemental=*/false},
170+
{"c_devloc", &I::genCDevLoc, {{{"x", asBox}}}, /*isElemental=*/false},
170171
{"c_f_pointer",
171172
&I::genCFPointer,
172173
{{{"cptr", asValue},
@@ -2867,11 +2868,14 @@ static mlir::Value getAddrFromBox(fir::FirOpBuilder &builder,
28672868
static fir::ExtendedValue
28682869
genCLocOrCFunLoc(fir::FirOpBuilder &builder, mlir::Location loc,
28692870
mlir::Type resultType, llvm::ArrayRef<fir::ExtendedValue> args,
2870-
bool isFunc = false) {
2871+
bool isFunc = false, bool isDevLoc = false) {
28712872
assert(args.size() == 1);
28722873
mlir::Value res = builder.create<fir::AllocaOp>(loc, resultType);
2873-
mlir::Value resAddr =
2874-
fir::factory::genCPtrOrCFunptrAddr(builder, loc, res, resultType);
2874+
mlir::Value resAddr;
2875+
if (isDevLoc)
2876+
resAddr = fir::factory::genCDevPtrAddr(builder, loc, res, resultType);
2877+
else
2878+
resAddr = fir::factory::genCPtrOrCFunptrAddr(builder, loc, res, resultType);
28752879
assert(fir::isa_box_type(fir::getBase(args[0]).getType()) &&
28762880
"argument must have been lowered to box type");
28772881
mlir::Value argAddr = getAddrFromBox(builder, loc, args[0], isFunc);
@@ -2928,6 +2932,14 @@ IntrinsicLibrary::genCAssociatedCPtr(mlir::Type resultType,
29282932
return genCAssociated(builder, loc, resultType, args);
29292933
}
29302934

2935+
// C_DEVLOC
2936+
fir::ExtendedValue
2937+
IntrinsicLibrary::genCDevLoc(mlir::Type resultType,
2938+
llvm::ArrayRef<fir::ExtendedValue> args) {
2939+
return genCLocOrCFunLoc(builder, loc, resultType, args, /*isFunc=*/false,
2940+
/*isDevLoc=*/true);
2941+
}
2942+
29312943
// C_F_POINTER
29322944
void IntrinsicLibrary::genCFPointer(llvm::ArrayRef<fir::ExtendedValue> args) {
29332945
assert(args.size() == 3);

flang/module/__fortran_builtins.f90

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,9 @@
2222
intrinsic :: __builtin_c_loc
2323
public :: __builtin_c_loc
2424

25+
intrinsic :: __builtin_c_devloc
26+
public :: __builtin_c_devloc
27+
2528
intrinsic :: __builtin_c_f_pointer
2629
public :: __builtin_c_f_pointer
2730

@@ -144,6 +147,7 @@
144147

145148
type :: __force_derived_type_instantiations
146149
type(__builtin_c_ptr) :: c_ptr
150+
type(__builtin_c_devptr) :: c_devptr
147151
type(__builtin_c_funptr) :: c_funptr
148152
type(__builtin_event_type) :: event_type
149153
type(__builtin_lock_type) :: lock_type

flang/module/__fortran_type_info.f90

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
module __fortran_type_info
1515

1616
use, intrinsic :: __fortran_builtins, &
17-
only: __builtin_c_ptr, __builtin_c_funptr
17+
only: __builtin_c_ptr, __builtin_c_devptr, __builtin_c_funptr
1818
implicit none
1919

2020
! Set PRIVATE by default to explicitly only export what is meant
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
! RUN: bbc -emit-hlfir -fcuda %s -o - | FileCheck %s
2+
3+
attributes(global) subroutine testcdevloc(a)
4+
use __fortran_builtins, only: c_devloc => __builtin_c_devloc
5+
integer, device :: a(10)
6+
print*, c_devloc(a(1))
7+
end
8+
9+
! CHECK-LABEL: func.func @_QPtestcdevloc(
10+
! CHECK-SAME: %[[A_ARG:.*]]: !fir.ref<!fir.array<10xi32>> {cuf.data_attr = #cuf.cuda<device>, fir.bindc_name = "a"}) attributes {cuf.proc_attr = #cuf.cuda_proc<global>}
11+
! CHECK: %[[A:.*]]:2 = hlfir.declare %[[A_ARG]](%{{.*}}) dummy_scope %{{.*}} {data_attr = #cuf.cuda<device>, uniq_name = "_QFtestcdevlocEa"} : (!fir.ref<!fir.array<10xi32>>, !fir.shape<1>, !fir.dscope) -> (!fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>)
12+
! CHECK: %[[A1:.*]] = hlfir.designate %[[A]]#0 (%c1{{.*}}) : (!fir.ref<!fir.array<10xi32>>, index) -> !fir.ref<i32>
13+
! CHECK: %[[BOX:.*]] = fir.embox %[[A1]] : (!fir.ref<i32>) -> !fir.box<i32>
14+
! CHECK: %[[CDEVPTR:.*]] = fir.alloca !fir.type<_QM__fortran_builtinsT__builtin_c_devptr{cptr:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>
15+
! CHECK: %[[FIELD_CPTR:.*]] = fir.field_index cptr, !fir.type<_QM__fortran_builtinsT__builtin_c_devptr{cptr:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>
16+
! CHECK: %[[COORD_CPTR:.*]] = fir.coordinate_of %[[CDEVPTR]], %[[FIELD_CPTR]] : (!fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_devptr{cptr:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>, !fir.field) -> !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>
17+
! CHECK: %[[FIELD_ADDRESS:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>
18+
! CHECK: %[[COORD_ADDRESS:.*]] = fir.coordinate_of %[[COORD_CPTR]], %[[FIELD_ADDRESS]] : (!fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>, !fir.field) -> !fir.ref<i64>
19+
! CHECK: %[[BOX_ADDR:.*]] = fir.box_addr %[[BOX]] : (!fir.box<i32>) -> !fir.ref<i32>
20+
! CHECK: %[[ADDRESS_A1:.*]] = fir.convert %[[BOX_ADDR]] : (!fir.ref<i32>) -> i64
21+
! CHECK: fir.store %[[ADDRESS_A1]] to %[[COORD_ADDRESS]] : !fir.ref<i64>

0 commit comments

Comments
 (0)