Commit 0b00b956 by Ben Clayton

LLVMReactor/SpirvShader: Don't zero loads on disabled lanes.

If robustness is disabled, don't bother zeroing disabled lanes. Gives the LLVM optimizer & backend some more wiggle room. Bug: b/135609394 Change-Id: I425eba2b48601a35dc40aa8ca071e6e61f35024f Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/33708 Kokoro-Presubmit: kokoro <noreply+kokoro@google.com> Tested-by: 's avatarBen Clayton <bclayton@google.com> Reviewed-by: 's avatarNicolas Capens <nicolascapens@google.com>
parent 9abf32b0
...@@ -331,9 +331,9 @@ namespace sw ...@@ -331,9 +331,9 @@ namespace sw
} }
if (ptr.hasStaticSequentialOffsets(sizeof(float))) if (ptr.hasStaticSequentialOffsets(sizeof(float)))
{ {
return rr::MaskedLoad(rr::Pointer<T>(ptr.base + ptr.staticOffsets[0]), mask, alignment); return rr::MaskedLoad(rr::Pointer<T>(ptr.base + ptr.staticOffsets[0]), mask, alignment, robust);
} }
return rr::Gather(rr::Pointer<EL>(ptr.base), offsets, mask, alignment); return rr::Gather(rr::Pointer<EL>(ptr.base), offsets, mask, alignment, robust);
} }
else else
{ {
......
...@@ -1596,7 +1596,7 @@ namespace rr ...@@ -1596,7 +1596,7 @@ namespace rr
} }
} }
Value *Nucleus::createMaskedLoad(Value *ptr, Type *elTy, Value *mask, unsigned int alignment) Value *Nucleus::createMaskedLoad(Value *ptr, Type *elTy, Value *mask, unsigned int alignment, bool zeroMaskedLanes)
{ {
ASSERT(V(ptr)->getType()->isPointerTy()); ASSERT(V(ptr)->getType()->isPointerTy());
ASSERT(V(mask)->getType()->isVectorTy()); ASSERT(V(mask)->getType()->isVectorTy());
...@@ -1607,7 +1607,7 @@ namespace rr ...@@ -1607,7 +1607,7 @@ namespace rr
auto elVecTy = ::llvm::VectorType::get(T(elTy), numEls); auto elVecTy = ::llvm::VectorType::get(T(elTy), numEls);
auto elVecPtrTy = elVecTy->getPointerTo(); auto elVecPtrTy = elVecTy->getPointerTo();
auto i8Mask = jit->builder->CreateIntCast(V(mask), ::llvm::VectorType::get(i1Ty, numEls), false); // vec<int, int, ...> -> vec<bool, bool, ...> auto i8Mask = jit->builder->CreateIntCast(V(mask), ::llvm::VectorType::get(i1Ty, numEls), false); // vec<int, int, ...> -> vec<bool, bool, ...>
auto passthrough = ::llvm::Constant::getNullValue(elVecTy); auto passthrough = zeroMaskedLanes ? ::llvm::Constant::getNullValue(elVecTy) : llvm::UndefValue::get(elVecTy);
auto align = ::llvm::ConstantInt::get(i32Ty, alignment); auto align = ::llvm::ConstantInt::get(i32Ty, alignment);
auto func = ::llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::masked_load, { elVecTy, elVecPtrTy } ); auto func = ::llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::masked_load, { elVecTy, elVecPtrTy } );
return V(jit->builder->CreateCall(func, { V(ptr), align, i8Mask, passthrough })); return V(jit->builder->CreateCall(func, { V(ptr), align, i8Mask, passthrough }));
...@@ -1630,7 +1630,7 @@ namespace rr ...@@ -1630,7 +1630,7 @@ namespace rr
jit->builder->CreateCall(func, { V(val), V(ptr), align, i8Mask }); jit->builder->CreateCall(func, { V(val), V(ptr), align, i8Mask });
} }
Value *Nucleus::createGather(Value *base, Type *elTy, Value *offsets, Value *mask, unsigned int alignment) Value *Nucleus::createGather(Value *base, Type *elTy, Value *offsets, Value *mask, unsigned int alignment, bool zeroMaskedLanes)
{ {
ASSERT(V(base)->getType()->isPointerTy()); ASSERT(V(base)->getType()->isPointerTy());
ASSERT(V(offsets)->getType()->isVectorTy()); ASSERT(V(offsets)->getType()->isVectorTy());
...@@ -1648,7 +1648,7 @@ namespace rr ...@@ -1648,7 +1648,7 @@ namespace rr
auto i8Ptrs = jit->builder->CreateGEP(i8Base, V(offsets)); auto i8Ptrs = jit->builder->CreateGEP(i8Base, V(offsets));
auto elPtrs = jit->builder->CreatePointerCast(i8Ptrs, elPtrVecTy); auto elPtrs = jit->builder->CreatePointerCast(i8Ptrs, elPtrVecTy);
auto i8Mask = jit->builder->CreateIntCast(V(mask), ::llvm::VectorType::get(i1Ty, numEls), false); // vec<int, int, ...> -> vec<bool, bool, ...> auto i8Mask = jit->builder->CreateIntCast(V(mask), ::llvm::VectorType::get(i1Ty, numEls), false); // vec<int, int, ...> -> vec<bool, bool, ...>
auto passthrough = ::llvm::Constant::getNullValue(elVecTy); auto passthrough = zeroMaskedLanes ? ::llvm::Constant::getNullValue(elVecTy) : llvm::UndefValue::get(elVecTy);
auto align = ::llvm::ConstantInt::get(i32Ty, alignment); auto align = ::llvm::ConstantInt::get(i32Ty, alignment);
auto func = ::llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::masked_gather, { elVecTy, elPtrVecTy } ); auto func = ::llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::masked_gather, { elVecTy, elPtrVecTy } );
return V(jit->builder->CreateCall(func, { elPtrs, align, i8Mask, passthrough })); return V(jit->builder->CreateCall(func, { elPtrs, align, i8Mask, passthrough }));
......
...@@ -133,11 +133,11 @@ namespace rr ...@@ -133,11 +133,11 @@ namespace rr
static Value *createGEP(Value *ptr, Type *type, Value *index, bool unsignedIndex); static Value *createGEP(Value *ptr, Type *type, Value *index, bool unsignedIndex);
// Masked Load / Store instructions // Masked Load / Store instructions
static Value *createMaskedLoad(Value *base, Type *elementType, Value *mask, unsigned int alignment); static Value *createMaskedLoad(Value *base, Type *elementType, Value *mask, unsigned int alignment, bool zeroMaskedLanes);
static void createMaskedStore(Value *base, Value *value, Value *mask, unsigned int alignment); static void createMaskedStore(Value *base, Value *value, Value *mask, unsigned int alignment);
// Scatter / Gather instructions // Scatter / Gather instructions
static Value *createGather(Value *base, Type *elementType, Value *offsets, Value *mask, unsigned int alignment); static Value *createGather(Value *base, Type *elementType, Value *offsets, Value *mask, unsigned int alignment, bool zeroMaskedLanes);
static void createScatter(Value *base, Value *value, Value *offsets, Value *mask, unsigned int alignment); static void createScatter(Value *base, Value *value, Value *offsets, Value *mask, unsigned int alignment);
// Barrier instructions // Barrier instructions
......
...@@ -4232,14 +4232,14 @@ namespace rr ...@@ -4232,14 +4232,14 @@ namespace rr
Nucleus::setInsertBlock(bodyBB); Nucleus::setInsertBlock(bodyBB);
} }
RValue<Float4> MaskedLoad(RValue<Pointer<Float4>> base, RValue<Int4> mask, unsigned int alignment) RValue<Float4> MaskedLoad(RValue<Pointer<Float4>> base, RValue<Int4> mask, unsigned int alignment, bool zeroMaskedLanes /* = false */)
{ {
return RValue<Float4>(Nucleus::createMaskedLoad(base.value, Float::getType(), mask.value, alignment)); return RValue<Float4>(Nucleus::createMaskedLoad(base.value, Float::getType(), mask.value, alignment, zeroMaskedLanes));
} }
RValue<Int4> MaskedLoad(RValue<Pointer<Int4>> base, RValue<Int4> mask, unsigned int alignment) RValue<Int4> MaskedLoad(RValue<Pointer<Int4>> base, RValue<Int4> mask, unsigned int alignment, bool zeroMaskedLanes /* = false */)
{ {
return RValue<Int4>(Nucleus::createMaskedLoad(base.value, Int::getType(), mask.value, alignment)); return RValue<Int4>(Nucleus::createMaskedLoad(base.value, Int::getType(), mask.value, alignment, zeroMaskedLanes));
} }
void MaskedStore(RValue<Pointer<Float4>> base, RValue<Float4> val, RValue<Int4> mask, unsigned int alignment) void MaskedStore(RValue<Pointer<Float4>> base, RValue<Float4> val, RValue<Int4> mask, unsigned int alignment)
...@@ -4252,14 +4252,14 @@ namespace rr ...@@ -4252,14 +4252,14 @@ namespace rr
Nucleus::createMaskedStore(base.value, val.value, mask.value, alignment); Nucleus::createMaskedStore(base.value, val.value, mask.value, alignment);
} }
RValue<Float4> Gather(RValue<Pointer<Float>> base, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment) RValue<Float4> Gather(RValue<Pointer<Float>> base, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment, bool zeroMaskedLanes /* = false */)
{ {
return RValue<Float4>(Nucleus::createGather(base.value, Float::getType(), offsets.value, mask.value, alignment)); return RValue<Float4>(Nucleus::createGather(base.value, Float::getType(), offsets.value, mask.value, alignment, zeroMaskedLanes));
} }
RValue<Int4> Gather(RValue<Pointer<Int>> base, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment) RValue<Int4> Gather(RValue<Pointer<Int>> base, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment, bool zeroMaskedLanes /* = false */)
{ {
return RValue<Int4>(Nucleus::createGather(base.value, Int::getType(), offsets.value, mask.value, alignment)); return RValue<Int4>(Nucleus::createGather(base.value, Int::getType(), offsets.value, mask.value, alignment, zeroMaskedLanes));
} }
void Scatter(RValue<Pointer<Float>> base, RValue<Float4> val, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment) void Scatter(RValue<Pointer<Float>> base, RValue<Float4> val, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment)
......
...@@ -2369,13 +2369,13 @@ namespace rr ...@@ -2369,13 +2369,13 @@ namespace rr
} }
// TODO: Use SIMD to template these. // TODO: Use SIMD to template these.
RValue<Float4> MaskedLoad(RValue<Pointer<Float4>> base, RValue<Int4> mask, unsigned int alignment); RValue<Float4> MaskedLoad(RValue<Pointer<Float4>> base, RValue<Int4> mask, unsigned int alignment, bool zeroMaskedLanes = false);
RValue<Int4> MaskedLoad(RValue<Pointer<Int4>> base, RValue<Int4> mask, unsigned int alignment); RValue<Int4> MaskedLoad(RValue<Pointer<Int4>> base, RValue<Int4> mask, unsigned int alignment, bool zeroMaskedLanes = false);
void MaskedStore(RValue<Pointer<Float4>> base, RValue<Float4> val, RValue<Int4> mask, unsigned int alignment); void MaskedStore(RValue<Pointer<Float4>> base, RValue<Float4> val, RValue<Int4> mask, unsigned int alignment);
void MaskedStore(RValue<Pointer<Int4>> base, RValue<Int4> val, RValue<Int4> mask, unsigned int alignment); void MaskedStore(RValue<Pointer<Int4>> base, RValue<Int4> val, RValue<Int4> mask, unsigned int alignment);
RValue<Float4> Gather(RValue<Pointer<Float>> base, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment); RValue<Float4> Gather(RValue<Pointer<Float>> base, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment, bool zeroMaskedLanes = false);
RValue<Int4> Gather(RValue<Pointer<Int>> base, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment); RValue<Int4> Gather(RValue<Pointer<Int>> base, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment, bool zeroMaskedLanes = false);
void Scatter(RValue<Pointer<Float>> base, RValue<Float4> val, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment); void Scatter(RValue<Pointer<Float>> base, RValue<Float4> val, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment);
void Scatter(RValue<Pointer<Int>> base, RValue<Int4> val, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment); void Scatter(RValue<Pointer<Int>> base, RValue<Int4> val, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment);
......
...@@ -3476,9 +3476,9 @@ namespace rr ...@@ -3476,9 +3476,9 @@ namespace rr
// Below are functions currently unimplemented for the Subzero backend. // Below are functions currently unimplemented for the Subzero backend.
// They are stubbed to satisfy the linker. // They are stubbed to satisfy the linker.
void Nucleus::createFence(std::memory_order memoryOrder) { UNIMPLEMENTED("Subzero createFence()"); } void Nucleus::createFence(std::memory_order memoryOrder) { UNIMPLEMENTED("Subzero createFence()"); }
Value *Nucleus::createMaskedLoad(Value *ptr, Type *elTy, Value *mask, unsigned int alignment) { UNIMPLEMENTED("Subzero createMaskedLoad()"); return nullptr; } Value *Nucleus::createMaskedLoad(Value *ptr, Type *elTy, Value *mask, unsigned int alignment, bool zeroMaskedLanes) { UNIMPLEMENTED("Subzero createMaskedLoad()"); return nullptr; }
void Nucleus::createMaskedStore(Value *ptr, Value *val, Value *mask, unsigned int alignment) { UNIMPLEMENTED("Subzero createMaskedStore()"); } void Nucleus::createMaskedStore(Value *ptr, Value *val, Value *mask, unsigned int alignment) { UNIMPLEMENTED("Subzero createMaskedStore()"); }
Value *Nucleus::createGather(Value *base, Type *elTy, Value *offsets, Value *mask, unsigned int alignment) { UNIMPLEMENTED("Subzero createGather()"); return nullptr; } Value *Nucleus::createGather(Value *base, Type *elTy, Value *offsets, Value *mask, unsigned int alignment, bool zeroMaskedLanes) { UNIMPLEMENTED("Subzero createGather()"); return nullptr; }
void Nucleus::createScatter(Value *base, Value *val, Value *offsets, Value *mask, unsigned int alignment) { UNIMPLEMENTED("Subzero createScatter()"); } void Nucleus::createScatter(Value *base, Value *val, Value *offsets, Value *mask, unsigned int alignment) { UNIMPLEMENTED("Subzero createScatter()"); }
RValue<Float4> Sin(RValue<Float4> x) { UNIMPLEMENTED("Subzero Sin()"); return Float4(0); } RValue<Float4> Sin(RValue<Float4> x) { UNIMPLEMENTED("Subzero Sin()"); return Float4(0); }
RValue<Float4> Cos(RValue<Float4> x) { UNIMPLEMENTED("Subzero Cos()"); return Float4(0); } RValue<Float4> Cos(RValue<Float4> x) { UNIMPLEMENTED("Subzero Cos()"); return Float4(0); }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment