Commit e7f6cac1 by Dejan Mircevski

Merge branch 'topo' into loopgen-after-readable-order

parents dba28263 159b59fa
......@@ -2,6 +2,7 @@ cmake_minimum_required(VERSION 2.8)
set(SOURCES
GlslangToSpv.cpp
InReadableOrder.cpp
SpvBuilder.cpp
SPVRemapper.cpp
doc.cpp
......
......@@ -1911,6 +1911,14 @@ void TGlslangToSpvTraverser::handleFunctionEntry(const glslang::TIntermAggregate
void TGlslangToSpvTraverser::translateArguments(const glslang::TIntermAggregate& node, std::vector<spv::Id>& arguments)
{
const glslang::TIntermSequence& glslangArguments = node.getSequence();
glslang::TSampler sampler = {};
bool cubeCompare = false;
if (node.isTexture()) {
sampler = glslangArguments[0]->getAsTyped()->getType().getSampler();
cubeCompare = sampler.dim == glslang::EsdCube && sampler.arrayed && sampler.shadow;
}
for (int i = 0; i < (int)glslangArguments.size(); ++i) {
builder.clearAccessChain();
glslangArguments[i]->traverse(this);
......@@ -1929,6 +1937,51 @@ void TGlslangToSpvTraverser::translateArguments(const glslang::TIntermAggregate&
if (i == 0)
lvalue = true;
break;
case glslang::EOpSparseTexture:
if ((cubeCompare && i == 3) || (! cubeCompare && i == 2))
lvalue = true;
break;
case glslang::EOpSparseTextureClamp:
if ((cubeCompare && i == 4) || (! cubeCompare && i == 3))
lvalue = true;
break;
case glslang::EOpSparseTextureLod:
case glslang::EOpSparseTextureOffset:
if (i == 3)
lvalue = true;
break;
case glslang::EOpSparseTextureFetch:
if ((sampler.dim != glslang::EsdRect && i == 3) || (sampler.dim == glslang::EsdRect && i == 2))
lvalue = true;
break;
case glslang::EOpSparseTextureFetchOffset:
if ((sampler.dim != glslang::EsdRect && i == 4) || (sampler.dim == glslang::EsdRect && i == 3))
lvalue = true;
break;
case glslang::EOpSparseTextureLodOffset:
case glslang::EOpSparseTextureGrad:
case glslang::EOpSparseTextureOffsetClamp:
if (i == 4)
lvalue = true;
break;
case glslang::EOpSparseTextureGradOffset:
case glslang::EOpSparseTextureGradClamp:
if (i == 5)
lvalue = true;
break;
case glslang::EOpSparseTextureGradOffsetClamp:
if (i == 6)
lvalue = true;
break;
case glslang::EOpSparseTextureGather:
if ((sampler.shadow && i == 3) || (! sampler.shadow && i == 2))
lvalue = true;
break;
case glslang::EOpSparseTextureGatherOffset:
case glslang::EOpSparseTextureGatherOffsets:
if ((sampler.shadow && i == 4) || (! sampler.shadow && i == 3))
lvalue = true;
break;
default:
break;
}
......@@ -1990,6 +2043,8 @@ spv::Id TGlslangToSpvTraverser::createImageTextureFunctionCall(glslang::TIntermO
return builder.createTextureQueryCall(spv::OpImageQueryLod, params);
case glslang::EOpTextureQueryLevels:
return builder.createTextureQueryCall(spv::OpImageQueryLevels, params);
case glslang::EOpSparseTexelsResident:
return builder.createUnaryOp(spv::OpImageSparseTexelsResident, builder.makeBoolType(), arguments[0]);
default:
assert(0);
break;
......@@ -2017,7 +2072,11 @@ spv::Id TGlslangToSpvTraverser::createImageTextureFunctionCall(glslang::TIntermO
operands.push_back(*opIt);
builder.createNoResultOp(spv::OpImageWrite, operands);
return spv::NoResult;
} else {
} else if (node->isSparseImage()) {
spv::MissingFunctionality("sparse image functions");
return spv::NoResult;
}
else {
// Process image atomic operations
// GLSL "IMAGE_PARAMS" will involve in constructing an image texel pointer and this pointer,
......@@ -2037,7 +2096,7 @@ spv::Id TGlslangToSpvTraverser::createImageTextureFunctionCall(glslang::TIntermO
}
// Check for texture functions other than queries
bool sparse = node->isSparseTexture();
bool cubeCompare = sampler.dim == glslang::EsdCube && sampler.arrayed && sampler.shadow;
// check for bias argument
......@@ -2048,6 +2107,10 @@ spv::Id TGlslangToSpvTraverser::createImageTextureFunctionCall(glslang::TIntermO
++nonBiasArgCount;
if (cracked.grad)
nonBiasArgCount += 2;
if (cracked.lodClamp)
++nonBiasArgCount;
if (sparse)
++nonBiasArgCount;
if ((int)arguments.size() > nonBiasArgCount)
bias = true;
......@@ -2059,9 +2122,10 @@ spv::Id TGlslangToSpvTraverser::createImageTextureFunctionCall(glslang::TIntermO
int extraArgs = 0;
// sort out where Dref is coming from
if (sampler.shadow && sampler.dim == glslang::EsdCube && sampler.arrayed)
if (cubeCompare) {
params.Dref = arguments[2];
else if (sampler.shadow && cracked.gather) {
++extraArgs;
} else if (sampler.shadow && cracked.gather) {
params.Dref = arguments[2];
++extraArgs;
} else if (sampler.shadow) {
......@@ -2093,6 +2157,14 @@ spv::Id TGlslangToSpvTraverser::createImageTextureFunctionCall(glslang::TIntermO
params.offsets = arguments[2 + extraArgs];
++extraArgs;
}
if (cracked.lodClamp) {
params.lodClamp = arguments[2 + extraArgs];
++extraArgs;
}
if (sparse) {
params.texelOut = arguments[2 + extraArgs];
++extraArgs;
}
if (bias) {
params.bias = arguments[2 + extraArgs];
++extraArgs;
......@@ -2107,7 +2179,7 @@ spv::Id TGlslangToSpvTraverser::createImageTextureFunctionCall(glslang::TIntermO
}
}
return builder.createTextureCall(precision, convertGlslangToSpvType(node->getType()), cracked.fetch, cracked.proj, cracked.gather, params);
return builder.createTextureCall(precision, convertGlslangToSpvType(node->getType()), sparse, cracked.fetch, cracked.proj, cracked.gather, params);
}
spv::Id TGlslangToSpvTraverser::handleUserFunctionCall(const glslang::TIntermAggregate* node)
......@@ -2981,7 +3053,7 @@ spv::Id TGlslangToSpvTraverser::createMiscOperation(glslang::TOperator op, spv::
spv::Op opCode = spv::OpNop;
int libCall = -1;
int consumedOperands = operands.size();
size_t consumedOperands = operands.size();
spv::Id typeId0 = 0;
if (consumedOperands > 0)
typeId0 = builder.getTypeId(operands[0]);
......
//
//Copyright (C) 2016 Google, Inc.
//
//All rights reserved.
//
//Redistribution and use in source and binary forms, with or without
//modification, are permitted provided that the following conditions
//are met:
//
// Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
//
// Neither the name of 3Dlabs Inc. Ltd. nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
//THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
//"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
//LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
//FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
//COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
//INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
//BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
//LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
//CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
//LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
//ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
//POSSIBILITY OF SUCH DAMAGE.
//
// Author: Dejan Mircevski, Google
//
// The SPIR-V spec requires code blocks to appear in an order satisfying the
// dominator-tree direction (ie, dominator before the dominated). This is,
// actually, easy to achieve: any pre-order CFG traversal algorithm will do it.
// Because such algorithms visit a block only after traversing some path to it
// from the root, they necessarily visit the block's idom first.
//
// But not every graph-traversal algorithm outputs blocks in an order that
// appears logical to human readers. The problem is that unrelated branches may
// be interspersed with each other, and merge blocks may come before some of the
// branches being merged.
//
// A good, human-readable order of blocks may be achieved by performing
// depth-first search but delaying merge nodes until after all their branches
// have been visited. This is implemented below by the inReadableOrder()
// function.
#include "spvIR.h"
#include <cassert>
#include <unordered_map>
using spv::Block;
using spv::Id;
namespace {
// Traverses CFG in a readable order, invoking a pre-set callback on each block.
// Use by calling visit() on the root block.
class ReadableOrderTraverser {
public:
explicit ReadableOrderTraverser(std::function<void(Block*)> callback) : callback_(callback) {}
// Visits the block if it hasn't been visited already and isn't currently
// being delayed. Invokes callback(block), then descends into its successors.
// Delays merge-block processing until all the branches have been completed.
void visit(Block* block)
{
assert(block);
if (visited_[block] || delayed_[block])
return;
callback_(block);
visited_[block] = true;
Block* mergeBlock = nullptr;
auto mergeInst = block->getMergeInstruction();
if (mergeInst) {
Id mergeId = mergeInst->getIdOperand(0);
mergeBlock = block->getParent().getParent().getInstruction(mergeId)->getBlock();
delayed_[mergeBlock] = true;
}
for (const auto succ : block->getSuccessors())
visit(succ);
if (mergeBlock) {
delayed_[mergeBlock] = false;
visit(mergeBlock);
}
}
private:
std::function<void(Block*)> callback_;
// Whether a block has already been visited or is being delayed.
std::unordered_map<Block *, bool> visited_, delayed_;
};
}
void spv::inReadableOrder(Block* root, std::function<void(Block*)> callback)
{
ReadableOrderTraverser(callback).visit(root);
}
......@@ -52,6 +52,7 @@
#include "spvIR.h"
#include <algorithm>
#include <memory>
#include <stack>
#include <map>
......@@ -201,11 +202,13 @@ public:
void setBuildPoint(Block* bp) { buildPoint = bp; }
Block* getBuildPoint() const { return buildPoint; }
// Make the main function.
// Make the main function. The returned pointer is only valid
// for the lifetime of this builder.
Function* makeMain();
// Make a shader-style function, and create its entry block if entry is non-zero.
// Return the function, pass back the entry.
// The returned pointer is only valid for the lifetime of this builder.
Function* makeFunctionEntry(Id returnType, const char* name, std::vector<Id>& paramTypes, Block **entry = 0);
// Create a return. An 'implicit' return is one not appearing in the source
......@@ -310,10 +313,12 @@ public:
Id gradY;
Id sample;
Id comp;
Id texelOut;
Id lodClamp;
};
// Select the correct texture operation based on all inputs, and emit the correct instruction
Id createTextureCall(Decoration precision, Id resultType, bool fetch, bool proj, bool gather, const TextureParameters&);
Id createTextureCall(Decoration precision, Id resultType, bool sparse, bool fetch, bool proj, bool gather, const TextureParameters&);
// Emit the OpTextureQuery* instruction that was passed in.
// Figure out the right return value and type, and return it.
......@@ -513,7 +518,7 @@ public:
void simplifyAccessChainSwizzle();
void createAndSetNoPredecessorBlock(const char*);
void createSelectionMerge(Block* mergeBlock, unsigned int control);
void dumpInstructions(std::vector<unsigned int>&, const std::vector<Instruction*>&) const;
void dumpInstructions(std::vector<unsigned int>&, const std::vector<std::unique_ptr<Instruction> >&) const;
SourceLanguage source;
int sourceVersion;
......@@ -529,14 +534,15 @@ public:
AccessChain accessChain;
// special blocks of instructions for output
std::vector<Instruction*> imports;
std::vector<Instruction*> entryPoints;
std::vector<Instruction*> executionModes;
std::vector<Instruction*> names;
std::vector<Instruction*> lines;
std::vector<Instruction*> decorations;
std::vector<Instruction*> constantsTypesGlobals;
std::vector<Instruction*> externals;
std::vector<std::unique_ptr<Instruction> > imports;
std::vector<std::unique_ptr<Instruction> > entryPoints;
std::vector<std::unique_ptr<Instruction> > executionModes;
std::vector<std::unique_ptr<Instruction> > names;
std::vector<std::unique_ptr<Instruction> > lines;
std::vector<std::unique_ptr<Instruction> > decorations;
std::vector<std::unique_ptr<Instruction> > constantsTypesGlobals;
std::vector<std::unique_ptr<Instruction> > externals;
std::vector<std::unique_ptr<Function> > functions;
// not output, internally used for quick & dirty canonical (unique) creation
std::vector<Instruction*> groupedConstants[OpConstant]; // all types appear before OpConstant
......
......@@ -53,12 +53,15 @@
#include "spirv.hpp"
#include <algorithm>
#include <vector>
#include <cassert>
#include <functional>
#include <iostream>
#include <assert.h>
#include <memory>
#include <vector>
namespace spv {
class Block;
class Function;
class Module;
......@@ -75,8 +78,8 @@ const MemorySemanticsMask MemorySemanticsAllMemory = (MemorySemanticsMask)0x3FF;
class Instruction {
public:
Instruction(Id resultId, Id typeId, Op opCode) : resultId(resultId), typeId(typeId), opCode(opCode) { }
explicit Instruction(Op opCode) : resultId(NoResult), typeId(NoType), opCode(opCode) { }
Instruction(Id resultId, Id typeId, Op opCode) : resultId(resultId), typeId(typeId), opCode(opCode), block(nullptr) { }
explicit Instruction(Op opCode) : resultId(NoResult), typeId(NoType), opCode(opCode), block(nullptr) { }
virtual ~Instruction() {}
void addIdOperand(Id id) { operands.push_back(id); }
void addImmediateOperand(unsigned int immediate) { operands.push_back(immediate); }
......@@ -107,6 +110,8 @@ public:
addImmediateOperand(word);
}
}
void setBlock(Block* b) { block = b; }
Block* getBlock() const { return block; }
Op getOpCode() const { return opCode; }
int getNumOperands() const { return (int)operands.size(); }
Id getResultId() const { return resultId; }
......@@ -145,6 +150,7 @@ protected:
Op opCode;
std::vector<Id> operands;
std::string originalString; // could be optimized away; convenience for getting string operand
Block* block;
};
//
......@@ -156,18 +162,31 @@ public:
Block(Id id, Function& parent);
virtual ~Block()
{
// TODO: free instructions
}
Id getId() { return instructions.front()->getResultId(); }
Function& getParent() const { return parent; }
void addInstruction(Instruction* inst);
void addPredecessor(Block* pred) { predecessors.push_back(pred); }
void addLocalVariable(Instruction* inst) { localVariables.push_back(inst); }
int getNumPredecessors() const { return (int)predecessors.size(); }
void addInstruction(std::unique_ptr<Instruction> inst);
void addPredecessor(Block* pred) { predecessors.push_back(pred); pred->successors.push_back(this);}
void addLocalVariable(std::unique_ptr<Instruction> inst) { localVariables.push_back(std::move(inst)); }
const std::vector<Block*>& getPredecessors() const { return predecessors; }
const std::vector<Block*>& getSuccessors() const { return successors; }
void setUnreachable() { unreachable = true; }
bool isUnreachable() const { return unreachable; }
// Returns the block's merge instruction, if one exists (otherwise null).
const Instruction* getMergeInstruction() const {
if (instructions.size() < 2) return nullptr;
const Instruction* nextToLast = (instructions.cend() - 2)->get();
switch (nextToLast->getOpCode()) {
case OpSelectionMerge:
case OpLoopMerge:
return nextToLast;
default:
return nullptr;
}
return nullptr;
}
bool isTerminated() const
{
......@@ -206,9 +225,9 @@ protected:
// To enforce keeping parent and ownership in sync:
friend Function;
std::vector<Instruction*> instructions;
std::vector<Block*> predecessors;
std::vector<Instruction*> localVariables;
std::vector<std::unique_ptr<Instruction> > instructions;
std::vector<Block*> predecessors, successors;
std::vector<std::unique_ptr<Instruction> > localVariables;
Function& parent;
// track whether this block is known to be uncreachable (not necessarily
......@@ -217,6 +236,11 @@ protected:
bool unreachable;
};
// Traverses the control-flow graph rooted at root in an order suited for
// readable code generation. Invokes callback at every node in the traversal
// order.
void inReadableOrder(Block* root, std::function<void(Block*)> callback);
//
// SPIR-V IR Function.
//
......@@ -247,7 +271,7 @@ public:
Module& getParent() const { return parent; }
Block* getEntryBlock() const { return blocks.front(); }
Block* getLastBlock() const { return blocks.back(); }
void addLocalVariable(Instruction* inst);
void addLocalVariable(std::unique_ptr<Instruction> inst);
Id getReturnType() const { return functionInstruction.getTypeId(); }
void dump(std::vector<unsigned int>& out) const
{
......@@ -259,8 +283,7 @@ public:
parameterInstructions[p]->dump(out);
// Blocks
for (int b = 0; b < (int)blocks.size(); ++b)
blocks[b]->dump(out);
inReadableOrder(blocks[0], [&out](const Block* b) { b->dump(out); });
Instruction end(0, 0, OpFunctionEnd);
end.dump(out);
}
......@@ -348,22 +371,27 @@ __inline Function::Function(Id id, Id resultType, Id functionType, Id firstParam
}
}
__inline void Function::addLocalVariable(Instruction* inst)
__inline void Function::addLocalVariable(std::unique_ptr<Instruction> inst)
{
blocks[0]->addLocalVariable(inst);
parent.mapInstruction(inst);
Instruction* raw_instruction = inst.get();
blocks[0]->addLocalVariable(std::move(inst));
parent.mapInstruction(raw_instruction);
}
__inline Block::Block(Id id, Function& parent) : parent(parent), unreachable(false)
{
instructions.push_back(new Instruction(id, NoType, OpLabel));
instructions.push_back(std::unique_ptr<Instruction>(new Instruction(id, NoType, OpLabel)));
instructions.back()->setBlock(this);
parent.getParent().mapInstruction(instructions.back().get());
}
__inline void Block::addInstruction(Instruction* inst)
__inline void Block::addInstruction(std::unique_ptr<Instruction> inst)
{
instructions.push_back(inst);
if (inst->getResultId())
parent.getParent().mapInstruction(inst);
Instruction* raw_instruction = inst.get();
instructions.push_back(std::move(inst));
raw_instruction->setBlock(this);
if (raw_instruction->getResultId())
parent.getParent().mapInstruction(raw_instruction);
}
}; // end spv namespace
......
......@@ -435,6 +435,8 @@ void ProcessConfigFile()
}
if (configStrings)
FreeFileData(configStrings);
else
delete[] config;
}
// thread-safe list of shaders to asynchronously grab and compile
......@@ -658,17 +660,27 @@ void StderrIfNonEmpty(const char* str)
}
}
// Simple bundling of what makes a compilation unit for ease in passing around,
// and separation of handling file IO versus API (programmatic) compilation.
struct ShaderCompUnit {
EShLanguage stage;
std::string fileName;
char** text; // memory owned/managed externally
};
//
// For linking mode: Will independently parse each item in the worklist, but then put them
// in the same program and link them together.
// For linking mode: Will independently parse each compilation unit, but then put them
// in the same program and link them together, making at most one linked module per
// pipeline stage.
//
// Uses the new C++ interface instead of the old handle-based interface.
//
void CompileAndLinkShaders()
void CompileAndLinkShaderUnits(std::vector<ShaderCompUnit> compUnits)
{
// keep track of what to free
std::list<glslang::TShader*> shaders;
EShMessages messages = EShMsgDefault;
SetMessageOptions(messages);
......@@ -677,22 +689,13 @@ void CompileAndLinkShaders()
//
glslang::TProgram& program = *new glslang::TProgram;
glslang::TWorkItem* workItem;
while (Worklist.remove(workItem)) {
EShLanguage stage = FindLanguage(workItem->name);
glslang::TShader* shader = new glslang::TShader(stage);
for (auto compUnit : compUnits) {
glslang::TShader* shader = new glslang::TShader(compUnit.stage);
shader->setStrings(compUnit.text, 1);
shaders.push_back(shader);
char** shaderStrings = ReadFileData(workItem->name.c_str());
if (! shaderStrings) {
usage();
delete &program;
return;
}
const int defaultVersion = Options & EOptionDefaultDesktop? 110: 100;
shader->setStrings(shaderStrings, 1);
if (Options & EOptionOutputPreprocessed) {
std::string str;
if (shader->preprocess(&Resources, defaultVersion, ENoProfile, false, false,
......@@ -703,7 +706,6 @@ void CompileAndLinkShaders()
}
StderrIfNonEmpty(shader->getInfoLog());
StderrIfNonEmpty(shader->getInfoDebugLog());
FreeFileData(shaderStrings);
continue;
}
if (! shader->parse(&Resources, defaultVersion, false, messages))
......@@ -711,13 +713,12 @@ void CompileAndLinkShaders()
program.addShader(shader);
if (! (Options & EOptionSuppressInfolog)) {
PutsIfNonEmpty(workItem->name.c_str());
if (! (Options & EOptionSuppressInfolog) &&
! (Options & EOptionMemoryLeakMode)) {
PutsIfNonEmpty(compUnit.fileName.c_str());
PutsIfNonEmpty(shader->getInfoLog());
PutsIfNonEmpty(shader->getInfoDebugLog());
}
FreeFileData(shaderStrings);
}
//
......@@ -727,7 +728,8 @@ void CompileAndLinkShaders()
if (! (Options & EOptionOutputPreprocessed) && ! program.link(messages))
LinkFailed = true;
if (! (Options & EOptionSuppressInfolog)) {
if (! (Options & EOptionSuppressInfolog) &&
! (Options & EOptionMemoryLeakMode)) {
PutsIfNonEmpty(program.getInfoLog());
PutsIfNonEmpty(program.getInfoDebugLog());
}
......@@ -745,10 +747,15 @@ void CompileAndLinkShaders()
if (program.getIntermediate((EShLanguage)stage)) {
std::vector<unsigned int> spirv;
glslang::GlslangToSpv(*program.getIntermediate((EShLanguage)stage), spirv);
glslang::OutputSpv(spirv, GetBinaryName((EShLanguage)stage));
if (Options & EOptionHumanReadableSpv) {
spv::Parameterize();
spv::Disassemble(std::cout, spirv);
// Dump the spv to a file or stdout, etc., but only if not doing
// memory/perf testing, as it's not internal to programmatic use.
if (! (Options & EOptionMemoryLeakMode)) {
glslang::OutputSpv(spirv, GetBinaryName((EShLanguage)stage));
if (Options & EOptionHumanReadableSpv) {
spv::Parameterize();
spv::Disassemble(std::cout, spirv);
}
}
}
}
......@@ -766,6 +773,59 @@ void CompileAndLinkShaders()
}
}
//
// Do file IO part of compile and link, handing off the pure
// API/programmatic mode to CompileAndLinkShaderUnits(), which can
// be put in a loop for testing memory footprint and performance.
//
// This is just for linking mode: meaning all the shaders will be put into the
// the same program linked together.
//
// This means there are a limited number of work items (not multi-threading mode)
// and that the point is testing at the linking level. Hence, to enable
// performance and memory testing, the actual compile/link can be put in
// a loop, independent of processing the work items and file IO.
//
void CompileAndLinkShaderFiles()
{
std::vector<ShaderCompUnit> compUnits;
// Transfer all the work items from to a simple list of
// of compilation units. (We don't care about the thread
// work-item distribution properties in this path, which
// is okay due to the limited number of shaders, know since
// they are all getting linked together.)
glslang::TWorkItem* workItem;
while (Worklist.remove(workItem)) {
ShaderCompUnit compUnit = {
FindLanguage(workItem->name),
workItem->name,
ReadFileData(workItem->name.c_str())
};
if (! compUnit.text) {
usage();
return;
}
compUnits.push_back(compUnit);
}
// Actual call to programmatic processing of compile and link,
// in a loop for testing memory and performance. This part contains
// all the perf/memory that a programmatic consumer will care about.
for (int i = 0; i < ((Options & EOptionMemoryLeakMode) ? 100 : 1); ++i) {
for (int j = 0; j < ((Options & EOptionMemoryLeakMode) ? 100 : 1); ++j)
CompileAndLinkShaderUnits(compUnits);
if (Options & EOptionMemoryLeakMode)
glslang::OS_DumpMemoryCounters();
}
for (auto c : compUnits)
FreeFileData(c.text);
}
int C_DECL main(int argc, char* argv[])
{
ProcessArguments(argc, argv);
......@@ -803,8 +863,13 @@ int C_DECL main(int argc, char* argv[])
if (Options & EOptionLinkProgram ||
Options & EOptionOutputPreprocessed) {
glslang::InitializeProcess();
CompileAndLinkShaders();
CompileAndLinkShaderFiles();
glslang::FinalizeProcess();
for (int w = 0; w < NumWorkItems; ++w) {
if (Work[w]) {
delete Work[w];
}
}
} else {
ShInitialize();
......@@ -837,6 +902,8 @@ int C_DECL main(int argc, char* argv[])
ShFinalize();
}
delete[] Work;
if (CompileFailed)
return EFailCompile;
if (LinkFailed)
......
......@@ -60,7 +60,7 @@ patch out vec4 patchOut; // ERROR
void foo24()
{
dvec3 df, di;
df = modf(outp.xyz, di);
df = modf(dvec3(outp.xyz), di);
}
in float in1;
......
......@@ -115,3 +115,216 @@ void qlod()
lod = textureQueryLod(samp1D, pf); // ERROR, only in fragment
lod = textureQueryLod(samp2Ds, pf2); // ERROR, only in fragment
}
void doubles()
{
double doublev;
dvec2 dvec2v;
dvec3 dvec3v;
dvec4 dvec4v;
bool boolv;
bvec2 bvec2v;
bvec3 bvec3v;
bvec4 bvec4v;
doublev = sqrt(2.9);
dvec2v = sqrt(dvec2(2.7));
dvec3v = sqrt(dvec3(2.0));
dvec4v = sqrt(dvec4(2.1));
doublev += inversesqrt(doublev);
dvec2v += inversesqrt(dvec2v);
dvec3v += inversesqrt(dvec3v);
dvec4v += inversesqrt(dvec4v);
doublev += abs(doublev);
dvec2v += abs(dvec2v);
dvec3v += abs(dvec3v);
dvec4v += abs(dvec4v);
doublev += sign(doublev);
dvec2v += sign(dvec2v);
dvec3v += sign(dvec3v);
dvec4v += sign(dvec4v);
doublev += floor(doublev);
dvec2v += floor(dvec2v);
dvec3v += floor(dvec3v);
dvec4v += floor(dvec4v);
doublev += trunc(doublev);
dvec2v += trunc(dvec2v);
dvec3v += trunc(dvec3v);
dvec4v += trunc(dvec4v);
doublev += round(doublev);
dvec2v += round(dvec2v);
dvec3v += round(dvec3v);
dvec4v += round(dvec4v);
doublev += roundEven(doublev);
dvec2v += roundEven(dvec2v);
dvec3v += roundEven(dvec3v);
dvec4v += roundEven(dvec4v);
doublev += ceil(doublev);
dvec2v += ceil(dvec2v);
dvec3v += ceil(dvec3v);
dvec4v += ceil(dvec4v);
doublev += fract(doublev);
dvec2v += fract(dvec2v);
dvec3v += fract(dvec3v);
dvec4v += fract(dvec4v);
doublev += mod(doublev, doublev);
dvec2v += mod(dvec2v, doublev);
dvec3v += mod(dvec3v, doublev);
dvec4v += mod(dvec4v, doublev);
dvec2v += mod(dvec2v, dvec2v);
dvec3v += mod(dvec3v, dvec3v);
dvec4v += mod(dvec4v, dvec4v);
doublev += modf(doublev, doublev);
dvec2v += modf(dvec2v, dvec2v);
dvec3v += modf(dvec3v, dvec3v);
dvec4v += modf(dvec4v, dvec4v);
doublev += min(doublev, doublev);
dvec2v += min(dvec2v, doublev);
dvec3v += min(dvec3v, doublev);
dvec4v += min(dvec4v, doublev);
dvec2v += min(dvec2v, dvec2v);
dvec3v += min(dvec3v, dvec3v);
dvec4v += min(dvec4v, dvec4v);
doublev += max(doublev, doublev);
dvec2v += max(dvec2v, doublev);
dvec3v += max(dvec3v, doublev);
dvec4v += max(dvec4v, doublev);
dvec2v += max(dvec2v, dvec2v);
dvec3v += max(dvec3v, dvec3v);
dvec4v += max(dvec4v, dvec4v);
doublev += clamp(doublev, doublev, doublev);
dvec2v += clamp(dvec2v, doublev, doublev);
dvec3v += clamp(dvec3v, doublev, doublev);
dvec4v += clamp(dvec4v, doublev, doublev);
dvec2v += clamp(dvec2v, dvec2v, dvec2v);
dvec3v += clamp(dvec3v, dvec3v, dvec3v);
dvec4v += clamp(dvec4v, dvec4v, dvec4v);
doublev += mix(doublev, doublev, doublev);
dvec2v += mix(dvec2v, dvec2v, doublev);
dvec3v += mix(dvec3v, dvec3v, doublev);
dvec4v += mix(dvec4v, dvec4v, doublev);
dvec2v += mix(dvec2v, dvec2v, dvec2v);
dvec3v += mix(dvec3v, dvec3v, dvec3v);
dvec4v += mix(dvec4v, dvec4v, dvec4v);
doublev += mix(doublev, doublev, boolv);
dvec2v += mix(dvec2v, dvec2v, bvec2v);
dvec3v += mix(dvec3v, dvec3v, bvec3v);
dvec4v += mix(dvec4v, dvec4v, bvec4v);
doublev += step(doublev, doublev);
dvec2v += step(dvec2v, dvec2v);
dvec3v += step(dvec3v, dvec3v);
dvec4v += step(dvec4v, dvec4v);
dvec2v += step(doublev, dvec2v);
dvec3v += step(doublev, dvec3v);
dvec4v += step(doublev, dvec4v);
doublev += smoothstep(doublev, doublev, doublev);
dvec2v += smoothstep(dvec2v, dvec2v, dvec2v);
dvec3v += smoothstep(dvec3v, dvec3v, dvec3v);
dvec4v += smoothstep(dvec4v, dvec4v, dvec4v);
dvec2v += smoothstep(doublev, doublev, dvec2v);
dvec3v += smoothstep(doublev, doublev, dvec3v);
dvec4v += smoothstep(doublev, doublev, dvec4v);
boolv = isnan(doublev);
bvec2v = isnan(dvec2v);
bvec3v = isnan(dvec3v);
bvec4v = isnan(dvec4v);
boolv = boolv ? isinf(doublev) : false;
bvec2v = boolv ? isinf(dvec2v) : bvec2(false);
bvec3v = boolv ? isinf(dvec3v) : bvec3(false);
bvec4v = boolv ? isinf(dvec4v) : bvec4(false);
doublev += length(doublev);
doublev += length(dvec2v);
doublev += length(dvec3v);
doublev += length(dvec4v);
doublev += distance(doublev, doublev);
doublev += distance(dvec2v, dvec2v);
doublev += distance(dvec3v, dvec3v);
doublev += distance(dvec4v, dvec4v);
doublev += dot(doublev, doublev);
doublev += dot(dvec2v, dvec2v);
doublev += dot(dvec3v, dvec3v);
doublev += dot(dvec4v, dvec4v);
dvec3v += cross(dvec3v, dvec3v);
doublev += normalize(doublev);
dvec2v += normalize(dvec2v);
dvec3v += normalize(dvec3v);
dvec4v += normalize(dvec4v);
doublev += faceforward(doublev, doublev, doublev);
dvec2v += faceforward(dvec2v, dvec2v, dvec2v);
dvec3v += faceforward(dvec3v, dvec3v, dvec3v);
dvec4v += faceforward(dvec4v, dvec4v, dvec4v);
doublev += reflect(doublev, doublev);
dvec2v += reflect(dvec2v, dvec2v);
dvec3v += reflect(dvec3v, dvec3v);
dvec4v += reflect(dvec4v, dvec4v);
doublev += refract(doublev, doublev, doublev);
dvec2v += refract(dvec2v, dvec2v, doublev);
dvec3v += refract(dvec3v, dvec3v, doublev);
dvec4v += refract(dvec4v, dvec4v, doublev);
dmat2 dmat2v = outerProduct(dvec2v, dvec2v);
dmat3 dmat3v = outerProduct(dvec3v, dvec3v);
dmat4 dmat4v = outerProduct(dvec4v, dvec4v);
dmat2x3 dmat2x3v = outerProduct(dvec3v, dvec2v);
dmat3x2 dmat3x2v = outerProduct(dvec2v, dvec3v);
dmat2x4 dmat2x4v = outerProduct(dvec4v, dvec2v);
dmat4x2 dmat4x2v = outerProduct(dvec2v, dvec4v);
dmat3x4 dmat3x4v = outerProduct(dvec4v, dvec3v);
dmat4x3 dmat4x3v = outerProduct(dvec3v, dvec4v);
dmat2v *= matrixCompMult(dmat2v, dmat2v);
dmat3v *= matrixCompMult(dmat3v, dmat3v);
dmat4v *= matrixCompMult(dmat4v, dmat4v);
dmat2x3v = matrixCompMult(dmat2x3v, dmat2x3v);
dmat2x4v = matrixCompMult(dmat2x4v, dmat2x4v);
dmat3x2v = matrixCompMult(dmat3x2v, dmat3x2v);
dmat3x4v = matrixCompMult(dmat3x4v, dmat3x4v);
dmat4x2v = matrixCompMult(dmat4x2v, dmat4x2v);
dmat4x3v = matrixCompMult(dmat4x3v, dmat4x3v);
dmat2v *= transpose(dmat2v);
dmat3v *= transpose(dmat3v);
dmat4v *= transpose(dmat4v);
dmat2x3v = transpose(dmat3x2v);
dmat3x2v = transpose(dmat2x3v);
dmat2x4v = transpose(dmat4x2v);
dmat4x2v = transpose(dmat2x4v);
dmat3x4v = transpose(dmat4x3v);
dmat4x3v = transpose(dmat3x4v);
doublev += determinant(dmat2v);
doublev += determinant(dmat3v);
doublev += determinant(dmat4v);
dmat2v *= inverse(dmat2v);
dmat3v *= inverse(dmat3v);
dmat4v *= inverse(dmat4v);
}
......@@ -218,26 +218,18 @@ ERROR: node is still EOpNull!
0:? Sequence
0:63 move second child to first child (temp 3-component vector of double)
0:63 'df' (temp 3-component vector of double)
0:63 Convert float to double (temp 3-component vector of double)
0:63 Comma (global 3-component vector of float)
0:63 move second child to first child (temp 3-component vector of float)
0:63 'tempReturn' (global 3-component vector of float)
0:63 modf (global 3-component vector of float)
0:63 vector swizzle (temp 3-component vector of float)
0:63 'outp' (out 4-component vector of float)
0:63 Sequence
0:63 Constant:
0:63 0 (const int)
0:63 Constant:
0:63 1 (const int)
0:63 Constant:
0:63 2 (const int)
0:63 'tempArg' (temp 3-component vector of float)
0:63 move second child to first child (temp 3-component vector of double)
0:63 'di' (temp 3-component vector of double)
0:63 Convert float to double (temp 3-component vector of double)
0:63 'tempArg' (temp 3-component vector of float)
0:63 'tempReturn' (global 3-component vector of float)
0:63 modf (global 3-component vector of double)
0:63 Convert float to double (temp 3-component vector of double)
0:63 vector swizzle (temp 3-component vector of float)
0:63 'outp' (out 4-component vector of float)
0:63 Sequence
0:63 Constant:
0:63 0 (const int)
0:63 Constant:
0:63 1 (const int)
0:63 Constant:
0:63 2 (const int)
0:63 'di' (temp 3-component vector of double)
0:71 Function Definition: foodc1( (global void)
0:71 Function Parameters:
0:73 Sequence
......@@ -707,26 +699,18 @@ ERROR: node is still EOpNull!
0:? Sequence
0:63 move second child to first child (temp 3-component vector of double)
0:63 'df' (temp 3-component vector of double)
0:63 Convert float to double (temp 3-component vector of double)
0:63 Comma (global 3-component vector of float)
0:63 move second child to first child (temp 3-component vector of float)
0:63 'tempReturn' (global 3-component vector of float)
0:63 modf (global 3-component vector of float)
0:63 vector swizzle (temp 3-component vector of float)
0:63 'outp' (out 4-component vector of float)
0:63 Sequence
0:63 Constant:
0:63 0 (const int)
0:63 Constant:
0:63 1 (const int)
0:63 Constant:
0:63 2 (const int)
0:63 'tempArg' (temp 3-component vector of float)
0:63 move second child to first child (temp 3-component vector of double)
0:63 'di' (temp 3-component vector of double)
0:63 Convert float to double (temp 3-component vector of double)
0:63 'tempArg' (temp 3-component vector of float)
0:63 'tempReturn' (global 3-component vector of float)
0:63 modf (global 3-component vector of double)
0:63 Convert float to double (temp 3-component vector of double)
0:63 vector swizzle (temp 3-component vector of float)
0:63 'outp' (out 4-component vector of float)
0:63 Sequence
0:63 Constant:
0:63 0 (const int)
0:63 Constant:
0:63 1 (const int)
0:63 Constant:
0:63 2 (const int)
0:63 'di' (temp 3-component vector of double)
0:71 Function Definition: foodc1( (global void)
0:71 Function Parameters:
0:73 Sequence
......
......@@ -85,7 +85,7 @@ Uniform block reflection:
nameless: offset -1, type ffffffff, size 496, index -1
named: offset -1, type ffffffff, size 304, index -1
c_nameless: offset -1, type ffffffff, size 112, index -1
nested: offset -1, type ffffffff, size 28, index -1
nested: offset -1, type ffffffff, size 32, index -1
abl[0]: offset -1, type ffffffff, size 4, index -1
abl[1]: offset -1, type ffffffff, size 4, index -1
abl[2]: offset -1, type ffffffff, size 4, index -1
......
......@@ -110,23 +110,4 @@ Linked fragment stage:
Branch 49
49: Label
Kill
69: Label
70: 6(float) Load 36(radius)
72: 46(bool) FOrdGreaterThanEqual 70 71
SelectionMerge 74 None
BranchConditional 72 73 74
73: Label
75: 6(float) Load 36(radius)
77: 6(float) ExtInst 1(GLSL.std.450) 26(Pow) 75 76
78: 6(float) FDiv 77 27
79: 6(float) ExtInst 1(GLSL.std.450) 4(FAbs) 78
80: 7(fvec4) Load 15(color)
81: 7(fvec4) CompositeConstruct 79 79 79 79
82: 7(fvec4) FSub 80 81
Store 15(color) 82
Branch 74
74: Label
83: 7(fvec4) Load 15(color)
Store 59(gl_FragColor) 83
Return
FunctionEnd
spv.branch-return.vert
Linked vertex stage:
// Module Version 10000
// Generated by (magic number): 80001
// Id's are bound by 35
Capability Shader
1: ExtInstImport "GLSL.std.450"
MemoryModel Logical GLSL450
EntryPoint Vertex 4 "main" 8 19 34
Source ESSL 300
Name 4 "main"
Name 8 "gl_InstanceID"
Name 19 "gl_Position"
Name 34 "gl_VertexID"
Decorate 8(gl_InstanceID) BuiltIn InstanceId
Decorate 19(gl_Position) BuiltIn Position
Decorate 34(gl_VertexID) BuiltIn VertexId
2: TypeVoid
3: TypeFunction 2
6: TypeInt 32 1
7: TypePointer Input 6(int)
8(gl_InstanceID): 7(ptr) Variable Input
16: TypeFloat 32
17: TypeVector 16(float) 4
18: TypePointer Output 17(fvec4)
19(gl_Position): 18(ptr) Variable Output
20: 16(float) Constant 0
21: 17(fvec4) ConstantComposite 20 20 20 20
26: 16(float) Constant 1039918957
27: TypeInt 32 0
28: 27(int) Constant 0
29: TypePointer Output 16(float)
34(gl_VertexID): 7(ptr) Variable Input
4(main): 2 Function None 3
5: Label
9: 6(int) Load 8(gl_InstanceID)
SelectionMerge 14 None
Switch 9 14
case 0: 10
case 1: 11
case 2: 12
case 3: 13
10: Label
Return
11: Label
Store 19(gl_Position) 21
Branch 14
12: Label
Return
13: Label
Return
14: Label
30: 29(ptr) AccessChain 19(gl_Position) 28
31: 16(float) Load 30
32: 16(float) FAdd 31 26
33: 29(ptr) AccessChain 19(gl_Position) 28
Store 33 32
Return
FunctionEnd
......@@ -60,14 +60,14 @@ Linked vertex stage:
30: 29(ptr) AccessChain 24(colorOut) 25
Store 30 28
Branch 13
13: Label
31: 6(int) Load 8(i)
32: 6(int) IAdd 31 9
Store 8(i) 32
Branch 10
12: Label
35: 29(ptr) AccessChain 24(colorOut) 34
36: 19(fvec4) Load 35
Store 33(gl_Position) 36
Return
13: Label
31: 6(int) Load 8(i)
32: 6(int) IAdd 31 9
Store 8(i) 32
Branch 10
FunctionEnd
......@@ -42,10 +42,10 @@ Linked vertex stage:
16: 6(int) IAdd 14 15
Store 8(i) 16
Branch 13
12: Label
Return
13: Label
17: 6(int) Load 8(i)
20: 19(bool) SLessThan 17 18
BranchConditional 20 10 12
FunctionEnd
12: Label
Return
FunctionEnd
......@@ -63,21 +63,15 @@ Linked vertex stage:
18: 17(bool) IEqual 15 16
SelectionMerge 20 None
BranchConditional 18 19 20
12: Label
Store 41(G) 42
Return
19: Label
Store 21(B) 22
Branch 13
13: Label
37: 6(int) Load 8(i)
38: 6(int) IAdd 37 22
Store 8(i) 38
40: 17(bool) SLessThan 38 39
BranchConditional 40 10 12
19: Label
Store 21(B) 22
Branch 13
23: Label
Store 24(C) 16
Branch 20
20: Label
25: 6(int) Load 8(i)
27: 17(bool) IEqual 25 26
......@@ -86,10 +80,10 @@ Linked vertex stage:
28: Label
Store 30(D) 31
Branch 12
32: Label
Store 33(E) 34
Branch 29
29: Label
Store 35(F) 36
Branch 13
12: Label
Store 41(G) 42
Return
FunctionEnd
......@@ -51,14 +51,14 @@ Linked fragment stage:
21: 7(fvec4) FAdd 20 19
Store 9(color) 21
Branch 16
15: Label
34: 7(fvec4) Load 9(color)
Store 33(gl_FragColor) 34
Return
16: Label
25: 24(ptr) AccessChain 9(color) 23
26: 6(float) Load 25
29: 6(float) Load 28(d)
31: 30(bool) FOrdLessThan 26 29
BranchConditional 31 13 15
FunctionEnd
15: Label
34: 7(fvec4) Load 9(color)
Store 33(gl_FragColor) 34
Return
FunctionEnd
......@@ -64,20 +64,14 @@ Linked vertex stage:
23: 16(bool) IEqual 22 9
SelectionMerge 25 None
BranchConditional 23 24 25
12: Label
Store 42(G) 43
Return
24: Label
Store 26(B) 19
Branch 13
13: Label
40: 6(int) Load 8(i)
41: 6(int) IAdd 40 19
Store 8(i) 41
Branch 10
24: Label
Store 26(B) 19
Branch 13
27: Label
Store 28(C) 19
Branch 25
25: Label
29: 6(int) Load 8(i)
31: 6(int) SMod 29 30
......@@ -87,10 +81,10 @@ Linked vertex stage:
33: Label
Store 35(D) 19
Branch 12
36: Label
Store 37(E) 19
Branch 34
34: Label
Store 38(F) 39
Branch 13
FunctionEnd
12: Label
Store 42(G) 43
Return
FunctionEnd
......@@ -47,13 +47,13 @@ Linked vertex stage:
BranchConditional 17 11 12
11: Label
Branch 13
13: Label
18: 6(int) Load 8(i)
20: 6(int) IAdd 18 19
Store 8(i) 20
Branch 10
12: Label
23: 6(int) Load 8(i)
Store 22(r) 23
Return
13: Label
18: 6(int) Load 8(i)
20: 6(int) IAdd 18 19
Store 8(i) 20
Branch 10
FunctionEnd
......@@ -45,11 +45,11 @@ Linked vertex stage:
11: Label
Store 18(j) 19
Branch 13
13: Label
20: 6(int) Load 8(i)
22: 6(int) IAdd 20 21
Store 8(i) 22
Branch 10
12: Label
Return
13: Label
20: 6(int) Load 8(i)
22: 6(int) IAdd 20 21
Store 8(i) 22
Branch 10
FunctionEnd
......@@ -88,17 +88,17 @@ Linked fragment stage:
31: 7(fvec4) FAdd 30 29
Store 9(color) 31
Branch 20
20: Label
32: 13(int) Load 15(i)
34: 13(int) IAdd 32 33
Store 15(i) 34
Branch 17
19: Label
37: 7(fvec4) Load 9(color)
Store 36(gl_FragColor) 37
Store 39(sum) 40
Store 41(i) 16
Branch 42
20: Label
32: 13(int) Load 15(i)
34: 13(int) IAdd 32 33
Store 15(i) 34
Branch 17
42: Label
46: 13(int) Load 41(i)
48: 25(bool) SLessThan 46 47
......@@ -113,14 +113,14 @@ Linked fragment stage:
59: 6(float) FAdd 58 57
Store 39(sum) 59
Branch 45
45: Label
60: 13(int) Load 41(i)
61: 13(int) IAdd 60 33
Store 41(i) 61
Branch 42
44: Label
Store 62(i) 16
Branch 63
45: Label
60: 13(int) Load 41(i)
61: 13(int) IAdd 60 33
Store 41(i) 61
Branch 42
63: Label
67: 13(int) Load 62(i)
68: 25(bool) SLessThan 67 47
......@@ -136,6 +136,11 @@ Linked fragment stage:
77: 38(ptr) AccessChain 69(tv4) 70
Store 77 76
Branch 66
66: Label
78: 13(int) Load 62(i)
79: 13(int) IAdd 78 33
Store 62(i) 79
Branch 63
65: Label
80: 6(float) Load 39(sum)
81: 7(fvec4) CompositeConstruct 80 80 80 80
......@@ -151,11 +156,6 @@ Linked fragment stage:
Store 86(r) 91
Store 92(i) 16
Branch 93
66: Label
78: 13(int) Load 62(i)
79: 13(int) IAdd 78 33
Store 62(i) 79
Branch 63
93: Label
97: 13(int) Load 92(i)
98: 13(int) Load 23(Count)
......@@ -167,6 +167,11 @@ Linked fragment stage:
104: 38(ptr) AccessChain 86(r) 103
Store 104 102
Branch 96
96: Label
105: 13(int) Load 92(i)
106: 13(int) IAdd 105 33
Store 92(i) 106
Branch 93
95: Label
107: 7(fvec4) Load 86(r)
108: 87(fvec3) VectorShuffle 107 107 0 1 2
......@@ -178,11 +183,6 @@ Linked fragment stage:
Store 36(gl_FragColor) 113
Store 114(i) 16
Branch 115
96: Label
105: 13(int) Load 92(i)
106: 13(int) IAdd 105 33
Store 92(i) 106
Branch 93
115: Label
119: 13(int) Load 114(i)
121: 25(bool) SLessThan 119 120
......@@ -194,11 +194,11 @@ Linked fragment stage:
124: 7(fvec4) VectorTimesScalar 123 122
Store 36(gl_FragColor) 124
Branch 118
118: Label
125: 13(int) Load 114(i)
126: 13(int) IAdd 125 47
Store 114(i) 126
Branch 115
117: Label
Return
118: Label
125: 13(int) Load 114(i)
126: 13(int) IAdd 125 47
Store 114(i) 126
Branch 115
FunctionEnd
......@@ -170,16 +170,16 @@ Linked fragment stage:
86: 30(ptr) AccessChain 83(a) 84
Store 86 85
Branch 79
78: Label
90: 6(int) Load 89(condition)
91: 23(bool) IEqual 90 28
SelectionMerge 93 None
BranchConditional 91 92 93
79: Label
87: 6(int) Load 75(i)
88: 6(int) IAdd 87 28
Store 75(i) 88
Branch 76
78: Label
90: 6(int) Load 89(condition)
91: 23(bool) IEqual 90 28
SelectionMerge 93 None
BranchConditional 91 92 93
92: Label
94: 34 Load 70(localArray)
Store 83(a) 94
......
......@@ -153,14 +153,6 @@ Linked fragment stage:
31: 30(bool) FOrdLessThan 26 29
SelectionMerge 33 None
BranchConditional 31 32 33
15: Label
Branch 73
16: Label
69: 24(ptr) AccessChain 9(color) 35
70: 6(float) Load 69
71: 6(float) Load 28(d4)
72: 30(bool) FOrdLessThan 70 71
BranchConditional 72 13 15
32: Label
36: 24(ptr) AccessChain 9(color) 35
37: 6(float) Load 36
......@@ -179,6 +171,12 @@ Linked fragment stage:
49: 6(float) FAdd 47 48
Store 46 49
Branch 16
16: Label
69: 24(ptr) AccessChain 9(color) 35
70: 6(float) Load 69
71: 6(float) Load 28(d4)
72: 30(bool) FOrdLessThan 70 71
BranchConditional 72 13 15
45: Label
Branch 33
33: Label
......@@ -206,6 +204,8 @@ Linked fragment stage:
Branch 57
57: Label
Branch 16
15: Label
Branch 73
73: Label
78: 24(ptr) AccessChain 9(color) 77
79: 6(float) Load 78
......@@ -220,16 +220,6 @@ Linked fragment stage:
86: 30(bool) FOrdLessThan 84 85
SelectionMerge 88 None
BranchConditional 86 87 92
75: Label
136: 7(fvec4) Load 9(color)
137: 7(fvec4) CompositeConstruct 48 48 48 48
138: 7(fvec4) FAdd 136 137
Store 9(color) 138
141: 7(fvec4) Load 9(color)
Store 140(gl_FragColor) 141
Return
76: Label
Branch 73
87: Label
89: 7(fvec4) Load 9(color)
90: 7(fvec4) CompositeConstruct 48 48 48 48
......@@ -271,6 +261,8 @@ Linked fragment stage:
117: 6(float) FAdd 116 48
Store 115 117
Branch 76
76: Label
Branch 73
114: Label
Branch 104
104: Label
......@@ -298,4 +290,12 @@ Linked fragment stage:
Branch 124
124: Label
Branch 76
FunctionEnd
75: Label
136: 7(fvec4) Load 9(color)
137: 7(fvec4) CompositeConstruct 48 48 48 48
138: 7(fvec4) FAdd 136 137
Store 9(color) 138
141: 7(fvec4) Load 9(color)
Store 140(gl_FragColor) 141
Return
FunctionEnd
spv.merge-unreachable.frag
Warning, version 450 is not yet complete; most version-specific features are present, but some are missing.
Linked fragment stage:
// Module Version 10000
// Generated by (magic number): 80001
// Id's are bound by 25
Capability Shader
1: ExtInstImport "GLSL.std.450"
MemoryModel Logical GLSL450
EntryPoint Fragment 4 "main" 9
ExecutionMode 4 OriginLowerLeft
Source GLSL 450
Name 4 "main"
Name 9 "v"
Decorate 9(v) Location 1
2: TypeVoid
3: TypeFunction 2
6: TypeFloat 32
7: TypeVector 6(float) 4
8: TypePointer Input 7(fvec4)
9(v): 8(ptr) Variable Input
11: 6(float) Constant 1036831949
12: 6(float) Constant 1045220557
13: 6(float) Constant 1050253722
14: 6(float) Constant 1053609165
15: 7(fvec4) ConstantComposite 11 12 13 14
16: TypeBool
17: TypeVector 16(bool) 4
4(main): 2 Function None 3
5: Label
10: 7(fvec4) Load 9(v)
18: 17(bvec4) FOrdEqual 10 15
19: 16(bool) All 18
SelectionMerge 21 None
BranchConditional 19 20 23
20: Label
Kill
23: Label
Return
21: Label
Return
FunctionEnd
......@@ -57,17 +57,11 @@ Linked vertex stage:
23: 16(bool) IEqual 22 9
SelectionMerge 25 None
BranchConditional 23 24 25
12: Label
Store 38(D) 39
Return
13: Label
Branch 10
24: Label
Store 26(B) 21
Branch 13
27: Label
Store 28(C) 21
Branch 25
13: Label
Branch 10
25: Label
29: 6(int) Load 8(i)
31: 6(int) SMod 29 30
......@@ -77,12 +71,12 @@ Linked vertex stage:
33: Label
Store 26(B) 21
Branch 12
35: Label
Store 28(C) 21
Branch 34
34: Label
36: 6(int) Load 8(i)
37: 6(int) IAdd 36 19
Store 8(i) 37
Branch 13
FunctionEnd
12: Label
Store 38(D) 39
Return
FunctionEnd
......@@ -44,8 +44,8 @@ Linked vertex stage:
20: 6(int) IAdd 18 19
Store 8(i) 20
Branch 13
13: Label
Branch 10
12: Label
Return
13: Label
Branch 10
FunctionEnd
......@@ -55,10 +55,10 @@ Linked fragment stage:
31: 7(fvec4) FAdd 30 29
Store 9(color) 31
Branch 16
16: Label
Branch 13
15: Label
34: 7(fvec4) Load 9(color)
Store 33(gl_FragColor) 34
Return
16: Label
Branch 13
FunctionEnd
......@@ -21,6 +21,223 @@ void foo23()
outp.x += textureProjGradOffset(u2drs, outp, vec2(0.0), vec2(0.0), offsets[1]);
}
void doubles()
{
double doublev;
dvec2 dvec2v;
dvec3 dvec3v;
dvec4 dvec4v;
bool boolv;
bvec2 bvec2v;
bvec3 bvec3v;
bvec4 bvec4v;
doublev = sqrt(2.9);
dvec2v = sqrt(dvec2(2.7));
dvec3v = sqrt(dvec3(2.0));
dvec4v = sqrt(dvec4(doublev));
doublev += inversesqrt(doublev);
dvec2v += inversesqrt(dvec2v);
dvec3v += inversesqrt(dvec3v);
dvec4v += inversesqrt(dvec4v);
doublev += abs(doublev);
dvec2v += abs(dvec2v);
dvec3v += abs(dvec3v);
dvec4v += abs(dvec4v);
doublev += sign(doublev);
dvec2v += sign(dvec2v);
dvec3v += sign(dvec3v);
dvec4v += sign(dvec4v);
doublev += floor(doublev);
dvec2v += floor(dvec2v);
dvec3v += floor(dvec3v);
dvec4v += floor(dvec4v);
doublev += trunc(doublev);
dvec2v += trunc(dvec2v);
dvec3v += trunc(dvec3v);
dvec4v += trunc(dvec4v);
doublev += round(doublev);
dvec2v += round(dvec2v);
dvec3v += round(dvec3v);
dvec4v += round(dvec4v);
doublev += roundEven(doublev);
dvec2v += roundEven(dvec2v);
dvec3v += roundEven(dvec3v);
dvec4v += roundEven(dvec4v);
doublev += ceil(doublev);
dvec2v += ceil(dvec2v);
dvec3v += ceil(dvec3v);
dvec4v += ceil(dvec4v);
doublev += fract(doublev);
dvec2v += fract(dvec2v);
dvec3v += fract(dvec3v);
dvec4v += fract(dvec4v);
doublev += mod(doublev, doublev);
dvec2v += mod(dvec2v, doublev);
dvec3v += mod(dvec3v, doublev);
dvec4v += mod(dvec4v, doublev);
dvec2v += mod(dvec2v, dvec2v);
dvec3v += mod(dvec3v, dvec3v);
dvec4v += mod(dvec4v, dvec4v);
doublev += modf(doublev, doublev);
dvec2v += modf(dvec2v, dvec2v);
dvec3v += modf(dvec3v, dvec3v);
dvec4v += modf(dvec4v, dvec4v);
doublev += min(doublev, doublev);
dvec2v += min(dvec2v, doublev);
dvec3v += min(dvec3v, doublev);
dvec4v += min(dvec4v, doublev);
dvec2v += min(dvec2v, dvec2v);
dvec3v += min(dvec3v, dvec3v);
dvec4v += min(dvec4v, dvec4v);
doublev += max(doublev, doublev);
dvec2v += max(dvec2v, doublev);
dvec3v += max(dvec3v, doublev);
dvec4v += max(dvec4v, doublev);
dvec2v += max(dvec2v, dvec2v);
dvec3v += max(dvec3v, dvec3v);
dvec4v += max(dvec4v, dvec4v);
doublev += clamp(doublev, doublev, doublev);
dvec2v += clamp(dvec2v, doublev, doublev);
dvec3v += clamp(dvec3v, doublev, doublev);
dvec4v += clamp(dvec4v, doublev, doublev);
dvec2v += clamp(dvec2v, dvec2v, dvec2v);
dvec3v += clamp(dvec3v, dvec3v, dvec3v);
dvec4v += clamp(dvec4v, dvec4v, dvec4v);
doublev += mix(doublev, doublev, doublev);
dvec2v += mix(dvec2v, dvec2v, doublev);
dvec3v += mix(dvec3v, dvec3v, doublev);
dvec4v += mix(dvec4v, dvec4v, doublev);
dvec2v += mix(dvec2v, dvec2v, dvec2v);
dvec3v += mix(dvec3v, dvec3v, dvec3v);
dvec4v += mix(dvec4v, dvec4v, dvec4v);
doublev += mix(doublev, doublev, boolv);
dvec2v += mix(dvec2v, dvec2v, bvec2v);
dvec3v += mix(dvec3v, dvec3v, bvec3v);
dvec4v += mix(dvec4v, dvec4v, bvec4v);
doublev += step(doublev, doublev);
dvec2v += step(dvec2v, dvec2v);
dvec3v += step(dvec3v, dvec3v);
dvec4v += step(dvec4v, dvec4v);
dvec2v += step(doublev, dvec2v);
dvec3v += step(doublev, dvec3v);
dvec4v += step(doublev, dvec4v);
doublev += smoothstep(doublev, doublev, doublev);
dvec2v += smoothstep(dvec2v, dvec2v, dvec2v);
dvec3v += smoothstep(dvec3v, dvec3v, dvec3v);
dvec4v += smoothstep(dvec4v, dvec4v, dvec4v);
dvec2v += smoothstep(doublev, doublev, dvec2v);
dvec3v += smoothstep(doublev, doublev, dvec3v);
dvec4v += smoothstep(doublev, doublev, dvec4v);
boolv = isnan(doublev);
bvec2v = isnan(dvec2v);
bvec3v = isnan(dvec3v);
bvec4v = isnan(dvec4v);
boolv = boolv ? isinf(doublev) : false;
bvec2v = boolv ? isinf(dvec2v) : bvec2(false);
bvec3v = boolv ? isinf(dvec3v) : bvec3(false);
bvec4v = boolv ? isinf(dvec4v) : bvec4(false);
doublev += length(doublev);
doublev += length(dvec2v);
doublev += length(dvec3v);
doublev += length(dvec4v);
doublev += distance(doublev, doublev);
doublev += distance(dvec2v, dvec2v);
doublev += distance(dvec3v, dvec3v);
doublev += distance(dvec4v, dvec4v);
doublev += dot(doublev, doublev);
doublev += dot(dvec2v, dvec2v);
doublev += dot(dvec3v, dvec3v);
doublev += dot(dvec4v, dvec4v);
dvec3v += cross(dvec3v, dvec3v);
doublev += normalize(doublev);
dvec2v += normalize(dvec2v);
dvec3v += normalize(dvec3v);
dvec4v += normalize(dvec4v);
doublev += faceforward(doublev, doublev, doublev);
dvec2v += faceforward(dvec2v, dvec2v, dvec2v);
dvec3v += faceforward(dvec3v, dvec3v, dvec3v);
dvec4v += faceforward(dvec4v, dvec4v, dvec4v);
doublev += reflect(doublev, doublev);
dvec2v += reflect(dvec2v, dvec2v);
dvec3v += reflect(dvec3v, dvec3v);
dvec4v += reflect(dvec4v, dvec4v);
doublev += refract(doublev, doublev, doublev);
dvec2v += refract(dvec2v, dvec2v, doublev);
dvec3v += refract(dvec3v, dvec3v, doublev);
dvec4v += refract(dvec4v, dvec4v, doublev);
dmat2 dmat2v = outerProduct(dvec2v, dvec2v);
dmat3 dmat3v = outerProduct(dvec3v, dvec3v);
dmat4 dmat4v = outerProduct(dvec4v, dvec4v);
dmat2x3 dmat2x3v = outerProduct(dvec3v, dvec2v);
dmat3x2 dmat3x2v = outerProduct(dvec2v, dvec3v);
dmat2x4 dmat2x4v = outerProduct(dvec4v, dvec2v);
dmat4x2 dmat4x2v = outerProduct(dvec2v, dvec4v);
dmat3x4 dmat3x4v = outerProduct(dvec4v, dvec3v);
dmat4x3 dmat4x3v = outerProduct(dvec3v, dvec4v);
dmat2v *= matrixCompMult(dmat2v, dmat2v);
dmat3v *= matrixCompMult(dmat3v, dmat3v);
dmat4v *= matrixCompMult(dmat4v, dmat4v);
dmat2x3v = matrixCompMult(dmat2x3v, dmat2x3v); // For now, relying on no dead-code elimination
dmat2x4v = matrixCompMult(dmat2x4v, dmat2x4v);
dmat3x2v = matrixCompMult(dmat3x2v, dmat3x2v);
dmat3x4v = matrixCompMult(dmat3x4v, dmat3x4v);
dmat4x2v = matrixCompMult(dmat4x2v, dmat4x2v);
dmat4x3v = matrixCompMult(dmat4x3v, dmat4x3v);
dmat2v *= transpose(dmat2v);
dmat3v *= transpose(dmat3v);
dmat4v *= transpose(dmat4v);
dmat2x3v = transpose(dmat3x2v); // For now, relying on no dead-code elimination
dmat3x2v = transpose(dmat2x3v);
dmat2x4v = transpose(dmat4x2v);
dmat4x2v = transpose(dmat2x4v);
dmat3x4v = transpose(dmat4x3v);
dmat4x3v = transpose(dmat3x4v);
doublev += determinant(dmat2v);
doublev += determinant(dmat3v);
doublev += determinant(dmat4v);
dmat2v *= inverse(dmat2v);
dmat3v *= inverse(dmat3v);
dmat4v *= inverse(dmat4v);
outp *= float(doublev + dvec2v.y + dvec3v.z + dvec4v.w +
dmat2v[1][1] + dmat3v[2][2] + dmat4v[3][3] + dmat2x3v[1][1] + dmat3x2v[1][1] + dmat3x4v[2][2] + dmat4x3v[2][2] + dmat2x4v[1][1] + dmat4x2v[1][1] +
float(boolv) + float(bvec2v.x) + float(bvec3v.x) + float(bvec4v.x));
}
void main()
{
vec4 v;
......@@ -38,5 +255,6 @@ void main()
outp += gl_FragCoord + vl2;
foo23();
doubles();
}
#version 300 es
void main() {
switch (gl_InstanceID) {
case 0: return;
case 1: gl_Position = vec4(0.0); break;
case 2: return;
case 3: return;
}
gl_Position.x += 0.123;
}
#version 450
layout(location=1) in highp vec4 v;
void main (void)
{
if (v == vec4(0.1,0.2,0.3,0.4)) discard;
else return;
}
#version 450
#extension GL_ARB_sparse_texture2: enable
uniform sampler2D s2D;
uniform sampler3D s3D;
uniform sampler2DShadow s2DShadow;
uniform samplerCubeShadow sCubeShadow;
uniform sampler2DArrayShadow s2DArrayShadow;
uniform sampler2DRectShadow s2DRectShadow;
uniform samplerCubeArrayShadow sCubeArrayShadow;
uniform sampler2DMS s2DMS;
uniform isamplerCube isCube;
uniform isampler2DArray is2DArray;
uniform usamplerCubeArray usCubeArray;
uniform usampler2DRect us2DRect;
uniform vec2 c2;
uniform vec3 c3;
uniform vec4 c4;
uniform ivec2 offsets[4];
out vec4 outColor;
void main()
{
int resident = 0;
vec4 texel = vec4(0.0);
ivec4 itexel = ivec4(0);
uvec4 utexel = uvec4(0);
resident |= sparseTextureARB(s2D, c2, texel);
resident |= sparseTextureARB(s3D, c3, texel, 2.0);
resident |= sparseTextureARB(isCube, c3, itexel);
resident |= sparseTextureARB(s2DShadow, c3, texel.x);
resident |= sparseTextureARB(sCubeArrayShadow, c4, 1.0, texel.x);
resident |= sparseTextureLodARB(s2D, c2, 2.0, texel);
resident |= sparseTextureLodARB(usCubeArray, c4, 1.0, utexel);
resident |= sparseTextureLodARB(s2DShadow, c3, 2.0, texel.y);
resident |= sparseTextureOffsetARB(s3D, c3, ivec3(2), texel, 2.0);
resident |= sparseTextureOffsetARB(us2DRect, c2, ivec2(3), utexel);
resident |= sparseTextureOffsetARB(s2DArrayShadow, c4, ivec2(5), texel.z);
resident |= sparseTexelFetchARB(s2D, ivec2(c2), 2, texel);
resident |= sparseTexelFetchARB(us2DRect, ivec2(c2), utexel);
resident |= sparseTexelFetchARB(s2DMS, ivec2(c2), 4, texel);
resident |= sparseTexelFetchOffsetARB(s3D, ivec3(c3), 2, ivec3(4), texel);
resident |= sparseTexelFetchOffsetARB(us2DRect, ivec2(c2), ivec2(3), utexel);
resident |= sparseTextureLodOffsetARB(s2D, c2, 2.0, ivec2(5), texel);
resident |= sparseTextureLodOffsetARB(is2DArray, c3, 2.0, ivec2(6), itexel);
resident |= sparseTextureLodOffsetARB(s2DShadow, c3, 2.0, ivec2(7), texel.z);
resident |= sparseTextureGradARB(s3D, c3, c3, c3, texel);
resident |= sparseTextureGradARB(sCubeShadow, c4, c3, c3, texel.y);
resident |= sparseTextureGradARB(usCubeArray, c4, c3, c3, utexel);
resident |= sparseTextureGradOffsetARB(s2D, c2, c2, c2, ivec2(5), texel);
resident |= sparseTextureGradOffsetARB(s2DRectShadow, c3, c2, c2, ivec2(6), texel.w);
resident |= sparseTextureGradOffsetARB(is2DArray, c3, c2, c2, ivec2(2), itexel);
resident |= sparseTextureGatherARB(s2D, c2, texel);
resident |= sparseTextureGatherARB(is2DArray, c3, itexel, 2);
resident |= sparseTextureGatherARB(s2DArrayShadow, c3, 2.0, texel);
resident |= sparseTextureGatherOffsetARB(s2D, c2, ivec2(4), texel);
resident |= sparseTextureGatherOffsetARB(is2DArray, c3, ivec2(5), itexel, 2);
resident |= sparseTextureGatherOffsetARB(s2DRectShadow, c2, 2.0, ivec2(7), texel);
resident |= sparseTextureGatherOffsetsARB(s2D, c2, offsets, texel);
resident |= sparseTextureGatherOffsetsARB(is2DArray, c3, offsets, itexel, 2);
resident |= sparseTextureGatherOffsetsARB(s2DRectShadow, c2, 2.0, offsets, texel);
outColor = sparseTexelsResidentARB(resident) ? texel : vec4(itexel) + vec4(utexel);
}
\ No newline at end of file
#version 450
#extension GL_ARB_sparse_texture_clamp: enable
uniform sampler2D s2D;
uniform sampler3D s3D;
uniform sampler2DShadow s2DShadow;
uniform samplerCubeShadow sCubeShadow;
uniform sampler2DArrayShadow s2DArrayShadow;
uniform sampler2DRectShadow s2DRectShadow;
uniform samplerCubeArrayShadow sCubeArrayShadow;
uniform isamplerCube isCube;
uniform isampler2DArray is2DArray;
uniform usamplerCubeArray usCubeArray;
uniform usampler2DRect us2DRect;
uniform vec2 c2;
uniform vec3 c3;
uniform vec4 c4;
uniform float lodClamp;
out vec4 outColor;
void main()
{
int resident = 0;
vec4 texel = vec4(0.0);
ivec4 itexel = ivec4(0);
uvec4 utexel = uvec4(0);
resident |= sparseTextureClampARB(s2D, c2, lodClamp, texel);
resident |= sparseTextureClampARB(s3D, c3, lodClamp, texel, 2.0);
resident |= sparseTextureClampARB(isCube, c3, lodClamp, itexel);
resident |= sparseTextureClampARB(s2DShadow, c3, lodClamp, texel.x);
resident |= sparseTextureClampARB(sCubeArrayShadow, c4, 1.0, lodClamp, texel.x);
texel += textureClampARB(s2D, c2, lodClamp);
texel += textureClampARB(s3D, c3, lodClamp, 2.0);
itexel += textureClampARB(isCube, c3, lodClamp);
texel.x += textureClampARB(s2DShadow, c3, lodClamp);
texel.x += textureClampARB(sCubeArrayShadow, c4, 1.0, lodClamp);
resident |= sparseTextureOffsetClampARB(s3D, c3, ivec3(2), lodClamp, texel, 2.0);
resident |= sparseTextureOffsetClampARB(us2DRect, c2, ivec2(3), lodClamp, utexel);
resident |= sparseTextureOffsetClampARB(s2DArrayShadow, c4, ivec2(5), lodClamp, texel.z);
texel += textureOffsetClampARB(s3D, c3, ivec3(2), lodClamp, 2.0);
utexel += textureOffsetClampARB(us2DRect, c2, ivec2(3), lodClamp);
texel.z += textureOffsetClampARB(s2DArrayShadow, c4, ivec2(5), lodClamp);
resident |= sparseTextureGradClampARB(s3D, c3, c3, c3, lodClamp, texel);
resident |= sparseTextureGradClampARB(sCubeShadow, c4, c3, c3, lodClamp, texel.y);
resident |= sparseTextureGradClampARB(usCubeArray, c4, c3, c3, lodClamp, utexel);
texel += textureGradClampARB(s3D, c3, c3, c3, lodClamp);
texel.y += textureGradClampARB(sCubeShadow, c4, c3, c3, lodClamp);
utexel += textureGradClampARB(usCubeArray, c4, c3, c3, lodClamp);
resident |= sparseTextureGradOffsetClampARB(s2D, c2, c2, c2, ivec2(5), lodClamp, texel);
resident |= sparseTextureGradOffsetClampARB(s2DRectShadow, c3, c2, c2, ivec2(6), lodClamp, texel.w);
resident |= sparseTextureGradOffsetClampARB(is2DArray, c3, c2, c2, ivec2(2), lodClamp, itexel);
texel += textureGradOffsetClampARB(s2D, c2, c2, c2, ivec2(5), lodClamp);
texel.w += textureGradOffsetClampARB(s2DRectShadow, c3, c2, c2, ivec2(6), lodClamp);
itexel += textureGradOffsetClampARB(is2DArray, c3, c2, c2, ivec2(2), lodClamp);
outColor = sparseTexelsResidentARB(resident) ? texel : vec4(itexel) + vec4(utexel);
}
\ No newline at end of file
......@@ -34,6 +34,7 @@ spv.always-discard.frag
spv.always-discard2.frag
spv.bitCast.frag
spv.bool.vert
spv.branch-return.vert
spv.conditionalDiscard.frag
spv.conversion.frag
spv.dataOut.frag
......@@ -58,6 +59,7 @@ spv.loopsArtificial.frag
spv.matFun.vert
spv.matrix.frag
spv.matrix2.frag
spv.merge-unreachable.frag
spv.newTexture.frag
spv.nonSquare.vert
spv.Operations.frag
......@@ -68,6 +70,8 @@ spv.qualifiers.vert
spv.shiftOps.frag
spv.simpleFunctionCall.frag
spv.simpleMat.vert
spv.sparseTexture.frag
spv.sparseTextureClamp.frag
spv.structAssignment.frag
spv.structDeref.frag
spv.structure.frag
......
......@@ -369,6 +369,8 @@ enum TOperator {
EOpImageAtomicExchange,
EOpImageAtomicCompSwap,
EOpSparseImageLoad,
EOpImageGuardEnd,
//
......@@ -398,6 +400,31 @@ enum TOperator {
EOpTextureGather,
EOpTextureGatherOffset,
EOpTextureGatherOffsets,
EOpTextureClamp,
EOpTextureOffsetClamp,
EOpTextureGradClamp,
EOpTextureGradOffsetClamp,
EOpSparseTextureGuardBegin,
EOpSparseTexture,
EOpSparseTextureLod,
EOpSparseTextureOffset,
EOpSparseTextureFetch,
EOpSparseTextureFetchOffset,
EOpSparseTextureLodOffset,
EOpSparseTextureGrad,
EOpSparseTextureGradOffset,
EOpSparseTextureGather,
EOpSparseTextureGatherOffset,
EOpSparseTextureGatherOffsets,
EOpSparseTexelsResident,
EOpSparseTextureClamp,
EOpSparseTextureOffsetClamp,
EOpSparseTextureGradClamp,
EOpSparseTextureGradOffsetClamp,
EOpSparseTextureGuardEnd,
EOpTextureGuardEnd,
......@@ -622,6 +649,7 @@ struct TCrackedTextureOp {
bool offsets;
bool gather;
bool grad;
bool lodClamp;
};
//
......@@ -637,6 +665,8 @@ public:
bool isConstructor() const;
bool isTexture() const { return op > EOpTextureGuardBegin && op < EOpTextureGuardEnd; }
bool isImage() const { return op > EOpImageGuardBegin && op < EOpImageGuardEnd; }
bool isSparseTexture() const { return op > EOpSparseTextureGuardBegin && op < EOpSparseTextureGuardEnd; }
bool isSparseImage() const { return op == EOpSparseImageLoad; }
// Crack the op into the individual dimensions of texturing operation.
void crackTexture(TSampler sampler, TCrackedTextureOp& cracked) const
......@@ -649,6 +679,7 @@ public:
cracked.offsets = false;
cracked.gather = false;
cracked.grad = false;
cracked.lodClamp = false;
switch (op) {
case EOpImageQuerySize:
......@@ -657,25 +688,40 @@ public:
case EOpTextureQueryLod:
case EOpTextureQueryLevels:
case EOpTextureQuerySamples:
case EOpSparseTexelsResident:
cracked.query = true;
break;
case EOpTexture:
case EOpSparseTexture:
break;
case EOpTextureClamp:
case EOpSparseTextureClamp:
cracked.lodClamp = true;
break;
case EOpTextureProj:
cracked.proj = true;
break;
case EOpTextureLod:
case EOpSparseTextureLod:
cracked.lod = true;
break;
case EOpTextureOffset:
case EOpSparseTextureOffset:
cracked.offset = true;
break;
case EOpTextureOffsetClamp:
case EOpSparseTextureOffsetClamp:
cracked.offset = true;
cracked.lodClamp = true;
break;
case EOpTextureFetch:
case EOpSparseTextureFetch:
cracked.fetch = true;
if (sampler.dim == Esd1D || (sampler.dim == Esd2D && ! sampler.ms) || sampler.dim == Esd3D)
cracked.lod = true;
break;
case EOpTextureFetchOffset:
case EOpSparseTextureFetchOffset:
cracked.fetch = true;
cracked.offset = true;
if (sampler.dim == Esd1D || (sampler.dim == Esd2D && ! sampler.ms) || sampler.dim == Esd3D)
......@@ -686,6 +732,7 @@ public:
cracked.proj = true;
break;
case EOpTextureLodOffset:
case EOpSparseTextureLodOffset:
cracked.offset = true;
cracked.lod = true;
break;
......@@ -699,9 +746,16 @@ public:
cracked.proj = true;
break;
case EOpTextureGrad:
case EOpSparseTextureGrad:
cracked.grad = true;
break;
case EOpTextureGradClamp:
case EOpSparseTextureGradClamp:
cracked.grad = true;
cracked.lodClamp = true;
break;
case EOpTextureGradOffset:
case EOpSparseTextureGradOffset:
cracked.grad = true;
cracked.offset = true;
break;
......@@ -714,14 +768,23 @@ public:
cracked.offset = true;
cracked.proj = true;
break;
case EOpTextureGradOffsetClamp:
case EOpSparseTextureGradOffsetClamp:
cracked.grad = true;
cracked.offset = true;
cracked.lodClamp = true;
break;
case EOpTextureGather:
case EOpSparseTextureGather:
cracked.gather = true;
break;
case EOpTextureGatherOffset:
case EOpSparseTextureGatherOffset:
cracked.gather = true;
cracked.offset = true;
break;
case EOpTextureGatherOffsets:
case EOpSparseTextureGatherOffsets:
cracked.gather = true;
cracked.offsets = true;
break;
......
......@@ -309,7 +309,7 @@ struct str_hash
unsigned long hash = 5381;
int c;
while ((c = *str++))
while ((c = *str++) != 0)
hash = ((hash << 5) + hash) + c;
return hash;
......
......@@ -173,6 +173,8 @@ void TParseContext::initializeExtensionBehavior()
extensionBehavior[E_GL_ARB_derivative_control] = EBhDisable;
extensionBehavior[E_GL_ARB_shader_texture_image_samples] = EBhDisable;
extensionBehavior[E_GL_ARB_viewport_array] = EBhDisable;
extensionBehavior[E_GL_ARB_sparse_texture2] = EBhDisable;
extensionBehavior[E_GL_ARB_sparse_texture_clamp] = EBhDisable;
// extensionBehavior[E_GL_ARB_cull_distance] = EBhDisable; // present for 4.5, but need extension control over block members
// #line and #include
......@@ -274,6 +276,8 @@ const char* TParseContext::getPreamble()
"#define GL_ARB_derivative_control 1\n"
"#define GL_ARB_shader_texture_image_samples 1\n"
"#define GL_ARB_viewport_array 1\n"
"#define GL_ARB_sparse_texture2 1\n"
"#define GL_ARB_sparse_texture_clamp 1\n"
"#define GL_GOOGLE_cpp_style_line_directive 1\n"
"#define GL_GOOGLE_include_directive 1\n"
......
......@@ -111,6 +111,8 @@ const char* const E_GL_ARB_shader_draw_parameters = "GL_ARB_shader_draw_pa
const char* const E_GL_ARB_derivative_control = "GL_ARB_derivative_control";
const char* const E_GL_ARB_shader_texture_image_samples = "GL_ARB_shader_texture_image_samples";
const char* const E_GL_ARB_viewport_array = "GL_ARB_viewport_array";
const char* const E_GL_ARB_sparse_texture2 = "GL_ARB_sparse_texture2";
const char* const E_GL_ARB_sparse_texture_clamp = "GL_ARB_sparse_texture_clamp";
//const char* const E_GL_ARB_cull_distance = "GL_ARB_cull_distance"; // present for 4.5, but need extension control over block members
// #line and #include
......
......@@ -959,6 +959,11 @@ int TIntermediate::getBaseAlignment(const TType& type, int& size, int& stride, b
size += memberSize;
}
// The structure may have padding at the end; the base offset of
// the member following the sub-structure is rounded up to the next
// multiple of the base alignment of the structure.
RoundToPow2(size, maxAlignment);
return maxAlignment;
}
......@@ -982,7 +987,7 @@ int TIntermediate::getBaseAlignment(const TType& type, int& size, int& stride, b
// rules 5 and 7
if (type.isMatrix()) {
// rule 5: deref to row, not to column, meaning the size of vector is num columns instead of num rows
TType derefType(type, 0, type.getQualifier().layoutMatrix == ElmRowMajor);
TType derefType(type, 0, rowMajor);
alignment = getBaseAlignment(derefType, size, dummyStride, std140, rowMajor);
if (std140)
......
......@@ -195,7 +195,7 @@ int TPpContext::ReadToken(TokenStream *pTok, TPpToken *ppToken)
case PpAtomConstUint:
len = 0;
ch = lReadByte(pTok);
while (ch != 0) {
while (ch != 0 && ch != EndOfInput) {
if (len < MaxTokenLength) {
tokenText[len] = (char)ch;
len++;
......@@ -215,12 +215,10 @@ int TPpContext::ReadToken(TokenStream *pTok, TPpToken *ppToken)
break;
case PpAtomConstFloat:
case PpAtomConstDouble:
strcpy(ppToken->name, tokenText);
ppToken->dval = atof(ppToken->name);
break;
case PpAtomConstInt:
case PpAtomConstUint:
strcpy(ppToken->name, tokenText);
if (len > 0 && tokenText[0] == '0') {
if (len > 1 && (tokenText[1] == 'x' || tokenText[1] == 'X'))
ppToken->ival = strtol(ppToken->name, 0, 16);
......
......@@ -147,6 +147,8 @@ void OS_Sleep(int milliseconds)
Sleep(milliseconds);
}
//#define DUMP_COUNTERS
void OS_DumpMemoryCounters()
{
#ifdef DUMP_COUNTERS
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment