Merge branch 'topo' into loopgen-after-readable-order

e7f6cac1 · Dejan Mircevski · dba28263 · 159b59fa · e7f6cac1 · e7f6cac1
Commit e7f6cac1 authored Jan 19, 2016 by Dejan Mircevski
48 changed files
--- a/SPIRV/CMakeLists.txt
+++ b/SPIRV/CMakeLists.txt
@@ -2,6 +2,7 @@ cmake_minimum_required(VERSION 2.8)

 set(SOURCES
    GlslangToSpv.cpp
+    InReadableOrder.cpp
    SpvBuilder.cpp
    SPVRemapper.cpp
    doc.cpp

--- a/SPIRV/GlslangToSpv.cpp
+++ b/SPIRV/GlslangToSpv.cpp
@@ -1911,6 +1911,14 @@ void TGlslangToSpvTraverser::handleFunctionEntry(const glslang::TIntermAggregate
 void TGlslangToSpvTraverser::translateArguments(const glslang::TIntermAggregate& node, std::vector<spv::Id>& arguments)
 {
    const glslang::TIntermSequence& glslangArguments = node.getSequence();
+
+    glslang::TSampler sampler = {};
+    bool cubeCompare = false;
+    if (node.isTexture()) {
+        sampler = glslangArguments[0]->getAsTyped()->getType().getSampler();
+        cubeCompare = sampler.dim == glslang::EsdCube && sampler.arrayed && sampler.shadow;
+    }
+
    for (int i = 0; i < (int)glslangArguments.size(); ++i) {
        builder.clearAccessChain();
        glslangArguments[i]->traverse(this);
@@ -1929,6 +1937,51 @@ void TGlslangToSpvTraverser::translateArguments(const glslang::TIntermAggregate&
            if (i == 0)
                lvalue = true;
            break;
+        case glslang::EOpSparseTexture:
+            if ((cubeCompare && i == 3) || (! cubeCompare && i == 2))
+                lvalue = true;
+            break;
+        case glslang::EOpSparseTextureClamp:
+            if ((cubeCompare && i == 4) || (! cubeCompare && i == 3))
+                lvalue = true;
+            break;
+        case glslang::EOpSparseTextureLod:
+        case glslang::EOpSparseTextureOffset:
+            if (i == 3)
+                lvalue = true;
+            break;
+        case glslang::EOpSparseTextureFetch:
+            if ((sampler.dim != glslang::EsdRect && i == 3) || (sampler.dim == glslang::EsdRect && i == 2))
+                lvalue = true;
+            break;
+        case glslang::EOpSparseTextureFetchOffset:
+            if ((sampler.dim != glslang::EsdRect && i == 4) || (sampler.dim == glslang::EsdRect && i == 3))
+                lvalue = true;
+            break;
+        case glslang::EOpSparseTextureLodOffset:
+        case glslang::EOpSparseTextureGrad:
+        case glslang::EOpSparseTextureOffsetClamp:
+            if (i == 4)
+                lvalue = true;
+            break;
+        case glslang::EOpSparseTextureGradOffset:
+        case glslang::EOpSparseTextureGradClamp:
+            if (i == 5)
+                lvalue = true;
+            break;
+        case glslang::EOpSparseTextureGradOffsetClamp:
+            if (i == 6)
+                lvalue = true;
+            break;
+         case glslang::EOpSparseTextureGather:
+            if ((sampler.shadow && i == 3) || (! sampler.shadow && i == 2))
+                lvalue = true;
+            break;
+        case glslang::EOpSparseTextureGatherOffset:
+        case glslang::EOpSparseTextureGatherOffsets:
+            if ((sampler.shadow && i == 4) || (! sampler.shadow && i == 3))
+                lvalue = true;
+            break;
        default:
            break;
        }
@@ -1990,6 +2043,8 @@ spv::Id TGlslangToSpvTraverser::createImageTextureFunctionCall(glslang::TIntermO
            return builder.createTextureQueryCall(spv::OpImageQueryLod, params);
        case glslang::EOpTextureQueryLevels:
            return builder.createTextureQueryCall(spv::OpImageQueryLevels, params);
+        case glslang::EOpSparseTexelsResident:
+            return builder.createUnaryOp(spv::OpImageSparseTexelsResident, builder.makeBoolType(), arguments[0]);
        default:
            assert(0);
            break;
@@ -2017,7 +2072,11 @@ spv::Id TGlslangToSpvTraverser::createImageTextureFunctionCall(glslang::TIntermO
                operands.push_back(*opIt);
            builder.createNoResultOp(spv::OpImageWrite, operands);
            return spv::NoResult;
-        } else {
+        } else if (node->isSparseImage()) {
+            spv::MissingFunctionality("sparse image functions");
+            return spv::NoResult;
+        }
+        else {
            // Process image atomic operations

            // GLSL "IMAGE_PARAMS" will involve in constructing an image texel pointer and this pointer,
@@ -2037,7 +2096,7 @@ spv::Id TGlslangToSpvTraverser::createImageTextureFunctionCall(glslang::TIntermO
    }

    // Check for texture functions other than queries
-
+    bool sparse = node->isSparseTexture();
    bool cubeCompare = sampler.dim == glslang::EsdCube && sampler.arrayed && sampler.shadow;

    // check for bias argument
@@ -2048,6 +2107,10 @@ spv::Id TGlslangToSpvTraverser::createImageTextureFunctionCall(glslang::TIntermO
            ++nonBiasArgCount;
        if (cracked.grad)
            nonBiasArgCount += 2;
+        if (cracked.lodClamp)
+            ++nonBiasArgCount;
+        if (sparse)
+            ++nonBiasArgCount;

        if ((int)arguments.size() > nonBiasArgCount)
            bias = true;
@@ -2059,9 +2122,10 @@ spv::Id TGlslangToSpvTraverser::createImageTextureFunctionCall(glslang::TIntermO
    int extraArgs = 0;

    // sort out where Dref is coming from
-    if (sampler.shadow && sampler.dim == glslang::EsdCube && sampler.arrayed)
+    if (cubeCompare) {
        params.Dref = arguments[2];
-    else if (sampler.shadow && cracked.gather) {
+        ++extraArgs;
+    } else if (sampler.shadow && cracked.gather) {
        params.Dref = arguments[2];
        ++extraArgs;
    } else if (sampler.shadow) {
@@ -2093,6 +2157,14 @@ spv::Id TGlslangToSpvTraverser::createImageTextureFunctionCall(glslang::TIntermO
        params.offsets = arguments[2 + extraArgs];
        ++extraArgs;
    }
+    if (cracked.lodClamp) {
+        params.lodClamp = arguments[2 + extraArgs];
+        ++extraArgs;
+    }
+    if (sparse) {
+        params.texelOut = arguments[2 + extraArgs];
+        ++extraArgs;
+    }
    if (bias) {
        params.bias = arguments[2 + extraArgs];
        ++extraArgs;
@@ -2107,7 +2179,7 @@ spv::Id TGlslangToSpvTraverser::createImageTextureFunctionCall(glslang::TIntermO
        }
    }

-    return builder.createTextureCall(precision, convertGlslangToSpvType(node->getType()), cracked.fetch, cracked.proj, cracked.gather, params);
+    return builder.createTextureCall(precision, convertGlslangToSpvType(node->getType()), sparse, cracked.fetch, cracked.proj, cracked.gather, params);
 }

 spv::Id TGlslangToSpvTraverser::handleUserFunctionCall(const glslang::TIntermAggregate* node)
@@ -2981,7 +3053,7 @@ spv::Id TGlslangToSpvTraverser::createMiscOperation(glslang::TOperator op, spv::

    spv::Op opCode = spv::OpNop;
    int libCall = -1;
-    int consumedOperands = operands.size();
+    size_t consumedOperands = operands.size();
    spv::Id typeId0 = 0;
    if (consumedOperands > 0)
        typeId0 = builder.getTypeId(operands[0]);

--- a/SPIRV/InReadableOrder.cpp
+++ b/SPIRV/InReadableOrder.cpp
+//
+//Copyright (C) 2016 Google, Inc.
+//
+//All rights reserved.
+//
+//Redistribution and use in source and binary forms, with or without
+//modification, are permitted provided that the following conditions
+//are met:
+//
+//    Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//
+//    Redistributions in binary form must reproduce the above
+//    copyright notice, this list of conditions and the following
+//    disclaimer in the documentation and/or other materials provided
+//    with the distribution.
+//
+//    Neither the name of 3Dlabs Inc. Ltd. nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+//THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+//"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+//LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+//FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+//COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+//INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+//BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+//LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+//CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+//LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+//ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+//POSSIBILITY OF SUCH DAMAGE.
+
+//
+// Author: Dejan Mircevski, Google
+//
+
+// The SPIR-V spec requires code blocks to appear in an order satisfying the
+// dominator-tree direction (ie, dominator before the dominated).  This is,
+// actually, easy to achieve: any pre-order CFG traversal algorithm will do it.
+// Because such algorithms visit a block only after traversing some path to it
+// from the root, they necessarily visit the block's idom first.
+//
+// But not every graph-traversal algorithm outputs blocks in an order that
+// appears logical to human readers.  The problem is that unrelated branches may
+// be interspersed with each other, and merge blocks may come before some of the
+// branches being merged.
+//
+// A good, human-readable order of blocks may be achieved by performing
+// depth-first search but delaying merge nodes until after all their branches
+// have been visited.  This is implemented below by the inReadableOrder()
+// function.
+
+#include "spvIR.h"
+
+#include <cassert>
+#include <unordered_map>
+
+using spv::Block;
+using spv::Id;
+
+namespace {
+// Traverses CFG in a readable order, invoking a pre-set callback on each block.
+// Use by calling visit() on the root block.
+class ReadableOrderTraverser {
+public:
+    explicit ReadableOrderTraverser(std::function<void(Block*)> callback) : callback_(callback) {}
+    // Visits the block if it hasn't been visited already and isn't currently
+    // being delayed.  Invokes callback(block), then descends into its successors.
+    // Delays merge-block processing until all the branches have been completed.
+    void visit(Block* block)
+    {
+        assert(block);
+        if (visited_[block] || delayed_[block])
+            return;
+        callback_(block);
+        visited_[block] = true;
+        Block* mergeBlock = nullptr;
+        auto mergeInst = block->getMergeInstruction();
+        if (mergeInst) {
+            Id mergeId = mergeInst->getIdOperand(0);
+            mergeBlock = block->getParent().getParent().getInstruction(mergeId)->getBlock();
+            delayed_[mergeBlock] = true;
+        }
+        for (const auto succ : block->getSuccessors())
+            visit(succ);
+        if (mergeBlock) {
+            delayed_[mergeBlock] = false;
+            visit(mergeBlock);
+        }
+    }
+
+private:
+    std::function<void(Block*)> callback_;
+    // Whether a block has already been visited or is being delayed.
+    std::unordered_map<Block *, bool> visited_, delayed_;
+};
+}
+
+void spv::inReadableOrder(Block* root, std::function<void(Block*)> callback)
+{
+    ReadableOrderTraverser(callback).visit(root);
+}
--- a/SPIRV/SpvBuilder.cpp
+++ b/SPIRV/SpvBuilder.cpp
--- a/SPIRV/SpvBuilder.h
+++ b/SPIRV/SpvBuilder.h
@@ -52,6 +52,7 @@
 #include "spvIR.h"

 #include <algorithm>
+#include <memory>
 #include <stack>
 #include <map>

@@ -201,11 +202,13 @@ public:
    void setBuildPoint(Block* bp) { buildPoint = bp; }
    Block* getBuildPoint() const { return buildPoint; }

-    // Make the main function.
+    // Make the main function. The returned pointer is only valid
+    // for the lifetime of this builder.
    Function* makeMain();

    // Make a shader-style function, and create its entry block if entry is non-zero.
    // Return the function, pass back the entry.
+    // The returned pointer is only valid for the lifetime of this builder.
    Function* makeFunctionEntry(Id returnType, const char* name, std::vector<Id>& paramTypes, Block **entry = 0);

    // Create a return. An 'implicit' return is one not appearing in the source
@@ -310,10 +313,12 @@ public:
        Id gradY;
        Id sample;
        Id comp;
+        Id texelOut;
+        Id lodClamp;
    };

    // Select the correct texture operation based on all inputs, and emit the correct instruction
-    Id createTextureCall(Decoration precision, Id resultType, bool fetch, bool proj, bool gather, const TextureParameters&);
+    Id createTextureCall(Decoration precision, Id resultType, bool sparse, bool fetch, bool proj, bool gather, const TextureParameters&);

    // Emit the OpTextureQuery* instruction that was passed in.
    // Figure out the right return value and type, and return it.
@@ -513,7 +518,7 @@ public:
    void simplifyAccessChainSwizzle();
    void createAndSetNoPredecessorBlock(const char*);
    void createSelectionMerge(Block* mergeBlock, unsigned int control);
-    void dumpInstructions(std::vector<unsigned int>&, const std::vector<Instruction*>&) const;
+    void dumpInstructions(std::vector<unsigned int>&, const std::vector<std::unique_ptr<Instruction> >&) const;

    SourceLanguage source;
    int sourceVersion;
@@ -529,14 +534,15 @@ public:
    AccessChain accessChain;

    // special blocks of instructions for output
-    std::vector<Instruction*> imports;
-    std::vector<Instruction*> entryPoints;
-    std::vector<Instruction*> executionModes;
-    std::vector<Instruction*> names;
-    std::vector<Instruction*> lines;
-    std::vector<Instruction*> decorations;
-    std::vector<Instruction*> constantsTypesGlobals;
-    std::vector<Instruction*> externals;
+    std::vector<std::unique_ptr<Instruction> > imports;
+    std::vector<std::unique_ptr<Instruction> > entryPoints;
+    std::vector<std::unique_ptr<Instruction> > executionModes;
+    std::vector<std::unique_ptr<Instruction> > names;
+    std::vector<std::unique_ptr<Instruction> > lines;
+    std::vector<std::unique_ptr<Instruction> > decorations;
+    std::vector<std::unique_ptr<Instruction> > constantsTypesGlobals;
+    std::vector<std::unique_ptr<Instruction> > externals;
+    std::vector<std::unique_ptr<Function> > functions;

     // not output, internally used for quick & dirty canonical (unique) creation
    std::vector<Instruction*> groupedConstants[OpConstant];  // all types appear before OpConstant

--- a/SPIRV/spirv.hpp
+++ b/SPIRV/spirv.hpp
--- a/SPIRV/spvIR.h
+++ b/SPIRV/spvIR.h
@@ -53,12 +53,15 @@
 #include "spirv.hpp"

 #include <algorithm>
-#include <vector>
+#include <cassert>
+#include <functional>
 #include <iostream>
-#include <assert.h>
+#include <memory>
+#include <vector>

 namespace spv {

+class Block;
 class Function;
 class Module;

@@ -75,8 +78,8 @@ const MemorySemanticsMask MemorySemanticsAllMemory = (MemorySemanticsMask)0x3FF;

 class Instruction {
 public:
-    Instruction(Id resultId, Id typeId, Op opCode) : resultId(resultId), typeId(typeId), opCode(opCode) { }
-    explicit Instruction(Op opCode) : resultId(NoResult), typeId(NoType), opCode(opCode) { }
+    Instruction(Id resultId, Id typeId, Op opCode) : resultId(resultId), typeId(typeId), opCode(opCode), block(nullptr) { }
+    explicit Instruction(Op opCode) : resultId(NoResult), typeId(NoType), opCode(opCode), block(nullptr) { }
    virtual ~Instruction() {}
    void addIdOperand(Id id) { operands.push_back(id); }
    void addImmediateOperand(unsigned int immediate) { operands.push_back(immediate); }
@@ -107,6 +110,8 @@ public:
            addImmediateOperand(word);
        }
    }
+    void setBlock(Block* b) { block = b; }
+    Block* getBlock() const { return block; }
    Op getOpCode() const { return opCode; }
    int getNumOperands() const { return (int)operands.size(); }
    Id getResultId() const { return resultId; }
@@ -145,6 +150,7 @@ protected:
    Op opCode;
    std::vector<Id> operands;
    std::string originalString;        // could be optimized away; convenience for getting string operand
+    Block* block;
 };

 //
@@ -156,18 +162,31 @@ public:
    Block(Id id, Function& parent);
    virtual ~Block()
    {
-        // TODO: free instructions
    }
-    
+
    Id getId() { return instructions.front()->getResultId(); }

    Function& getParent() const { return parent; }
-    void addInstruction(Instruction* inst);
-    void addPredecessor(Block* pred) { predecessors.push_back(pred); }
-    void addLocalVariable(Instruction* inst) { localVariables.push_back(inst); }
-    int getNumPredecessors() const { return (int)predecessors.size(); }
+    void addInstruction(std::unique_ptr<Instruction> inst);
+    void addPredecessor(Block* pred) { predecessors.push_back(pred); pred->successors.push_back(this);}
+    void addLocalVariable(std::unique_ptr<Instruction> inst) { localVariables.push_back(std::move(inst)); }
+    const std::vector<Block*>& getPredecessors() const { return predecessors; }
+    const std::vector<Block*>& getSuccessors() const { return successors; }
    void setUnreachable() { unreachable = true; }
    bool isUnreachable() const { return unreachable; }
+    // Returns the block's merge instruction, if one exists (otherwise null).
+    const Instruction* getMergeInstruction() const {
+        if (instructions.size() < 2) return nullptr;
+        const Instruction* nextToLast = (instructions.cend() - 2)->get();
+        switch (nextToLast->getOpCode()) {
+            case OpSelectionMerge:
+            case OpLoopMerge:
+                return nextToLast;
+            default:
+                return nullptr;
+        }
+        return nullptr;
+    }

    bool isTerminated() const
    {
@@ -206,9 +225,9 @@ protected:
    // To enforce keeping parent and ownership in sync:
    friend Function;

-    std::vector<Instruction*> instructions;
-    std::vector<Block*> predecessors;
-    std::vector<Instruction*> localVariables;
+    std::vector<std::unique_ptr<Instruction> > instructions;
+    std::vector<Block*> predecessors, successors;
+    std::vector<std::unique_ptr<Instruction> > localVariables;
    Function& parent;

    // track whether this block is known to be uncreachable (not necessarily 
@@ -217,6 +236,11 @@ protected:
    bool unreachable;
 };

+// Traverses the control-flow graph rooted at root in an order suited for
+// readable code generation.  Invokes callback at every node in the traversal
+// order.
+void inReadableOrder(Block* root, std::function<void(Block*)> callback);
+
 //
 // SPIR-V IR Function.
 //
@@ -247,7 +271,7 @@ public:
    Module& getParent() const { return parent; }
    Block* getEntryBlock() const { return blocks.front(); }
    Block* getLastBlock() const { return blocks.back(); }
-    void addLocalVariable(Instruction* inst);
+    void addLocalVariable(std::unique_ptr<Instruction> inst);
    Id getReturnType() const { return functionInstruction.getTypeId(); }
    void dump(std::vector<unsigned int>& out) const
    {
@@ -259,8 +283,7 @@ public:
            parameterInstructions[p]->dump(out);

        // Blocks
-        for (int b = 0; b < (int)blocks.size(); ++b)
-            blocks[b]->dump(out);
+        inReadableOrder(blocks[0], [&out](const Block* b) { b->dump(out); });
        Instruction end(0, 0, OpFunctionEnd);
        end.dump(out);
    }
@@ -348,22 +371,27 @@ __inline Function::Function(Id id, Id resultType, Id functionType, Id firstParam
    }
 }

-__inline void Function::addLocalVariable(Instruction* inst)
+__inline void Function::addLocalVariable(std::unique_ptr<Instruction> inst)
 {
-    blocks[0]->addLocalVariable(inst);
-    parent.mapInstruction(inst);
+    Instruction* raw_instruction = inst.get();
+    blocks[0]->addLocalVariable(std::move(inst));
+    parent.mapInstruction(raw_instruction);
 }

 __inline Block::Block(Id id, Function& parent) : parent(parent), unreachable(false)
 {
-    instructions.push_back(new Instruction(id, NoType, OpLabel));
+    instructions.push_back(std::unique_ptr<Instruction>(new Instruction(id, NoType, OpLabel)));
+    instructions.back()->setBlock(this);
+    parent.getParent().mapInstruction(instructions.back().get());
 }

-__inline void Block::addInstruction(Instruction* inst)
+__inline void Block::addInstruction(std::unique_ptr<Instruction> inst)
 {
-    instructions.push_back(inst);
-    if (inst->getResultId())
-        parent.getParent().mapInstruction(inst);
+    Instruction* raw_instruction = inst.get();
+    instructions.push_back(std::move(inst));
+    raw_instruction->setBlock(this);
+    if (raw_instruction->getResultId())
+        parent.getParent().mapInstruction(raw_instruction);
 }

 };  // end spv namespace

--- a/StandAlone/StandAlone.cpp
+++ b/StandAlone/StandAlone.cpp
@@ -435,6 +435,8 @@ void ProcessConfigFile()
    }
    if (configStrings)
        FreeFileData(configStrings);
+    else
+        delete[] config;
 }

 // thread-safe list of shaders to asynchronously grab and compile
@@ -658,17 +660,27 @@ void StderrIfNonEmpty(const char* str)
    }
 }

+// Simple bundling of what makes a compilation unit for ease in passing around,
+// and separation of handling file IO versus API (programmatic) compilation.
+struct ShaderCompUnit {
+    EShLanguage stage;
+    std::string fileName;
+    char** text;           // memory owned/managed externally
+};
+
 //
-// For linking mode: Will independently parse each item in the worklist, but then put them
-// in the same program and link them together.
+// For linking mode: Will independently parse each compilation unit, but then put them
+// in the same program and link them together, making at most one linked module per
+// pipeline stage.
 //
 // Uses the new C++ interface instead of the old handle-based interface.
 //
-void CompileAndLinkShaders()
+
+void CompileAndLinkShaderUnits(std::vector<ShaderCompUnit> compUnits)
 {
    // keep track of what to free
    std::list<glslang::TShader*> shaders;
-    
+
    EShMessages messages = EShMsgDefault;
    SetMessageOptions(messages);

@@ -677,22 +689,13 @@ void CompileAndLinkShaders()
    //

    glslang::TProgram& program = *new glslang::TProgram;
-    glslang::TWorkItem* workItem;
-    while (Worklist.remove(workItem)) {
-        EShLanguage stage = FindLanguage(workItem->name);
-        glslang::TShader* shader = new glslang::TShader(stage);
+    for (auto compUnit : compUnits) {
+        glslang::TShader* shader = new glslang::TShader(compUnit.stage);
+        shader->setStrings(compUnit.text, 1);
        shaders.push_back(shader);
-    
-        char** shaderStrings = ReadFileData(workItem->name.c_str());
-        if (! shaderStrings) {
-            usage();
-            delete &program;

-            return;
-        }
        const int defaultVersion = Options & EOptionDefaultDesktop? 110: 100;

-        shader->setStrings(shaderStrings, 1);
        if (Options & EOptionOutputPreprocessed) {
            std::string str;
            if (shader->preprocess(&Resources, defaultVersion, ENoProfile, false, false,
@@ -703,7 +706,6 @@ void CompileAndLinkShaders()
            }
            StderrIfNonEmpty(shader->getInfoLog());
            StderrIfNonEmpty(shader->getInfoDebugLog());
-            FreeFileData(shaderStrings);
            continue;
        }
        if (! shader->parse(&Resources, defaultVersion, false, messages))
@@ -711,13 +713,12 @@ void CompileAndLinkShaders()

        program.addShader(shader);

-        if (! (Options & EOptionSuppressInfolog)) {
-            PutsIfNonEmpty(workItem->name.c_str());
+        if (! (Options & EOptionSuppressInfolog) &&
+            ! (Options & EOptionMemoryLeakMode)) {
+            PutsIfNonEmpty(compUnit.fileName.c_str());
            PutsIfNonEmpty(shader->getInfoLog());
            PutsIfNonEmpty(shader->getInfoDebugLog());
        }
-
-        FreeFileData(shaderStrings);
    }

    //
@@ -727,7 +728,8 @@ void CompileAndLinkShaders()
    if (! (Options & EOptionOutputPreprocessed) && ! program.link(messages))
        LinkFailed = true;

-    if (! (Options & EOptionSuppressInfolog)) {
+    if (! (Options & EOptionSuppressInfolog) &&
+        ! (Options & EOptionMemoryLeakMode)) {
        PutsIfNonEmpty(program.getInfoLog());
        PutsIfNonEmpty(program.getInfoDebugLog());
    }
@@ -745,10 +747,15 @@ void CompileAndLinkShaders()
                if (program.getIntermediate((EShLanguage)stage)) {
                    std::vector<unsigned int> spirv;
                    glslang::GlslangToSpv(*program.getIntermediate((EShLanguage)stage), spirv);
-                    glslang::OutputSpv(spirv, GetBinaryName((EShLanguage)stage));
-                    if (Options & EOptionHumanReadableSpv) {
-                        spv::Parameterize();
-                        spv::Disassemble(std::cout, spirv);
+
+                    // Dump the spv to a file or stdout, etc., but only if not doing
+                    // memory/perf testing, as it's not internal to programmatic use.
+                    if (! (Options & EOptionMemoryLeakMode)) {
+                        glslang::OutputSpv(spirv, GetBinaryName((EShLanguage)stage));
+                        if (Options & EOptionHumanReadableSpv) {
+                            spv::Parameterize();
+                            spv::Disassemble(std::cout, spirv);
+                        }
                    }
                }
            }
@@ -766,6 +773,59 @@ void CompileAndLinkShaders()
    }
 }

+//
+// Do file IO part of compile and link, handing off the pure
+// API/programmatic mode to CompileAndLinkShaderUnits(), which can
+// be put in a loop for testing memory footprint and performance.
+//
+// This is just for linking mode: meaning all the shaders will be put into the
+// the same program linked together.
+//
+// This means there are a limited number of work items (not multi-threading mode)
+// and that the point is testing at the linking level. Hence, to enable
+// performance and memory testing, the actual compile/link can be put in
+// a loop, independent of processing the work items and file IO.
+//
+void CompileAndLinkShaderFiles()
+{
+    std::vector<ShaderCompUnit> compUnits;
+
+    // Transfer all the work items from to a simple list of
+    // of compilation units.  (We don't care about the thread
+    // work-item distribution properties in this path, which
+    // is okay due to the limited number of shaders, know since
+    // they are all getting linked together.)
+    glslang::TWorkItem* workItem;
+    while (Worklist.remove(workItem)) {
+        ShaderCompUnit compUnit = {
+            FindLanguage(workItem->name),
+            workItem->name,
+            ReadFileData(workItem->name.c_str())
+        };
+
+        if (! compUnit.text) {
+            usage();
+            return;
+        }
+
+        compUnits.push_back(compUnit);
+    }
+
+    // Actual call to programmatic processing of compile and link,
+    // in a loop for testing memory and performance.  This part contains
+    // all the perf/memory that a programmatic consumer will care about.
+    for (int i = 0; i < ((Options & EOptionMemoryLeakMode) ? 100 : 1); ++i) {
+        for (int j = 0; j < ((Options & EOptionMemoryLeakMode) ? 100 : 1); ++j)
+           CompileAndLinkShaderUnits(compUnits);
+
+        if (Options & EOptionMemoryLeakMode)
+            glslang::OS_DumpMemoryCounters();
+    }
+
+    for (auto c : compUnits)
+        FreeFileData(c.text);
+}
+
 int C_DECL main(int argc, char* argv[])
 {
    ProcessArguments(argc, argv);
@@ -803,8 +863,13 @@ int C_DECL main(int argc, char* argv[])
    if (Options & EOptionLinkProgram ||
        Options & EOptionOutputPreprocessed) {
        glslang::InitializeProcess();
-        CompileAndLinkShaders();
+        CompileAndLinkShaderFiles();
        glslang::FinalizeProcess();
+        for (int w = 0; w < NumWorkItems; ++w) {
+          if (Work[w]) {
+            delete Work[w];
+          }
+        }
    } else {
        ShInitialize();

@@ -837,6 +902,8 @@ int C_DECL main(int argc, char* argv[])
        ShFinalize();
    }

+    delete[] Work;
+
    if (CompileFailed)
        return EFailCompile;
    if (LinkFailed)

--- a/Test/400.frag
+++ b/Test/400.frag
@@ -60,7 +60,7 @@ patch out vec4 patchOut;            // ERROR
 void foo24()
 {
    dvec3 df, di;
-    df = modf(outp.xyz, di);
+    df = modf(dvec3(outp.xyz), di);
 }

 in float in1;

--- a/Test/400.geom
+++ b/Test/400.geom
@@ -115,3 +115,216 @@ void qlod()
    lod = textureQueryLod(samp1D, pf);      // ERROR, only in fragment
    lod = textureQueryLod(samp2Ds, pf2);    // ERROR, only in fragment
 }
+
+void doubles()
+{
+    double doublev;
+    dvec2 dvec2v;
+    dvec3 dvec3v;
+    dvec4 dvec4v;
+
+    bool boolv;
+    bvec2 bvec2v;
+    bvec3 bvec3v;
+    bvec4 bvec4v;
+
+    doublev = sqrt(2.9);
+    dvec2v  = sqrt(dvec2(2.7));
+    dvec3v  = sqrt(dvec3(2.0));
+    dvec4v  = sqrt(dvec4(2.1));
+
+    doublev += inversesqrt(doublev);
+    dvec2v  += inversesqrt(dvec2v);
+    dvec3v  += inversesqrt(dvec3v);
+    dvec4v  += inversesqrt(dvec4v);
+
+    doublev += abs(doublev);
+    dvec2v  += abs(dvec2v);
+    dvec3v  += abs(dvec3v);
+    dvec4v  += abs(dvec4v);
+
+    doublev += sign(doublev);
+    dvec2v  += sign(dvec2v);
+    dvec3v  += sign(dvec3v);
+    dvec4v  += sign(dvec4v);
+
+    doublev += floor(doublev);
+    dvec2v  += floor(dvec2v);
+    dvec3v  += floor(dvec3v);
+    dvec4v  += floor(dvec4v);
+
+    doublev += trunc(doublev);
+    dvec2v  += trunc(dvec2v);
+    dvec3v  += trunc(dvec3v);
+    dvec4v  += trunc(dvec4v);
+
+    doublev += round(doublev);
+    dvec2v  += round(dvec2v);
+    dvec3v  += round(dvec3v);
+    dvec4v  += round(dvec4v);
+
+    doublev += roundEven(doublev);
+    dvec2v  += roundEven(dvec2v);
+    dvec3v  += roundEven(dvec3v);
+    dvec4v  += roundEven(dvec4v);
+
+    doublev += ceil(doublev);
+    dvec2v  += ceil(dvec2v);
+    dvec3v  += ceil(dvec3v);
+    dvec4v  += ceil(dvec4v);
+
+    doublev += fract(doublev);
+    dvec2v  += fract(dvec2v);
+    dvec3v  += fract(dvec3v);
+    dvec4v  += fract(dvec4v);
+
+    doublev += mod(doublev, doublev);
+    dvec2v  += mod(dvec2v, doublev);
+    dvec3v  += mod(dvec3v, doublev);
+    dvec4v  += mod(dvec4v, doublev);
+    dvec2v  += mod(dvec2v, dvec2v);
+    dvec3v  += mod(dvec3v, dvec3v);
+    dvec4v  += mod(dvec4v, dvec4v);
+
+    doublev += modf(doublev, doublev);
+    dvec2v  += modf(dvec2v,  dvec2v);
+    dvec3v  += modf(dvec3v,  dvec3v);
+    dvec4v  += modf(dvec4v,  dvec4v);
+
+    doublev += min(doublev, doublev);
+    dvec2v  += min(dvec2v, doublev);
+    dvec3v  += min(dvec3v, doublev);
+    dvec4v  += min(dvec4v, doublev);
+    dvec2v  += min(dvec2v, dvec2v);
+    dvec3v  += min(dvec3v, dvec3v);
+    dvec4v  += min(dvec4v, dvec4v);
+
+    doublev += max(doublev, doublev);
+    dvec2v  += max(dvec2v, doublev);
+    dvec3v  += max(dvec3v, doublev);
+    dvec4v  += max(dvec4v, doublev);
+    dvec2v  += max(dvec2v, dvec2v);
+    dvec3v  += max(dvec3v, dvec3v);
+    dvec4v  += max(dvec4v, dvec4v);
+
+    doublev += clamp(doublev, doublev, doublev);
+    dvec2v  += clamp(dvec2v, doublev, doublev);
+    dvec3v  += clamp(dvec3v, doublev, doublev);
+    dvec4v  += clamp(dvec4v, doublev, doublev);
+    dvec2v  += clamp(dvec2v, dvec2v, dvec2v);
+    dvec3v  += clamp(dvec3v, dvec3v, dvec3v);
+    dvec4v  += clamp(dvec4v, dvec4v, dvec4v);
+
+    doublev += mix(doublev, doublev, doublev);
+    dvec2v  += mix(dvec2v, dvec2v, doublev);
+    dvec3v  += mix(dvec3v, dvec3v, doublev);
+    dvec4v  += mix(dvec4v, dvec4v, doublev);
+    dvec2v  += mix(dvec2v, dvec2v, dvec2v);
+    dvec3v  += mix(dvec3v, dvec3v, dvec3v);
+    dvec4v  += mix(dvec4v, dvec4v, dvec4v);
+    doublev += mix(doublev, doublev, boolv);
+    dvec2v  += mix(dvec2v, dvec2v, bvec2v);
+    dvec3v  += mix(dvec3v, dvec3v, bvec3v);
+    dvec4v  += mix(dvec4v, dvec4v, bvec4v);
+
+    doublev += step(doublev, doublev);
+    dvec2v  += step(dvec2v, dvec2v);
+    dvec3v  += step(dvec3v, dvec3v);
+    dvec4v  += step(dvec4v, dvec4v);
+    dvec2v  += step(doublev, dvec2v);
+    dvec3v  += step(doublev, dvec3v);
+    dvec4v  += step(doublev, dvec4v);
+
+    doublev += smoothstep(doublev, doublev, doublev);
+    dvec2v  += smoothstep(dvec2v, dvec2v, dvec2v);
+    dvec3v  += smoothstep(dvec3v, dvec3v, dvec3v);
+    dvec4v  += smoothstep(dvec4v, dvec4v, dvec4v);
+    dvec2v  += smoothstep(doublev, doublev, dvec2v);
+    dvec3v  += smoothstep(doublev, doublev, dvec3v);
+    dvec4v  += smoothstep(doublev, doublev, dvec4v);
+
+    boolv  = isnan(doublev);
+    bvec2v = isnan(dvec2v);
+    bvec3v = isnan(dvec3v);
+    bvec4v = isnan(dvec4v);
+
+    boolv  = boolv ? isinf(doublev) : false;
+    bvec2v = boolv ? isinf(dvec2v)  : bvec2(false);
+    bvec3v = boolv ? isinf(dvec3v)  : bvec3(false);
+    bvec4v = boolv ? isinf(dvec4v)  : bvec4(false);
+
+    doublev += length(doublev);
+    doublev += length(dvec2v);
+    doublev += length(dvec3v);
+    doublev += length(dvec4v);
+
+    doublev += distance(doublev, doublev);
+    doublev += distance(dvec2v, dvec2v);
+    doublev += distance(dvec3v, dvec3v);
+    doublev += distance(dvec4v, dvec4v);
+
+    doublev += dot(doublev, doublev);
+    doublev += dot(dvec2v, dvec2v);
+    doublev += dot(dvec3v, dvec3v);
+    doublev += dot(dvec4v, dvec4v);
+
+    dvec3v += cross(dvec3v, dvec3v);
+
+    doublev += normalize(doublev);
+    dvec2v  += normalize(dvec2v);
+    dvec3v  += normalize(dvec3v);
+    dvec4v  += normalize(dvec4v);
+
+    doublev += faceforward(doublev, doublev, doublev);
+    dvec2v  += faceforward(dvec2v, dvec2v, dvec2v);
+    dvec3v  += faceforward(dvec3v, dvec3v, dvec3v);
+    dvec4v  += faceforward(dvec4v, dvec4v, dvec4v);
+
+    doublev += reflect(doublev, doublev);
+    dvec2v  += reflect(dvec2v, dvec2v);
+    dvec3v  += reflect(dvec3v, dvec3v);
+    dvec4v  += reflect(dvec4v, dvec4v);
+
+    doublev += refract(doublev, doublev, doublev);
+    dvec2v  += refract(dvec2v, dvec2v, doublev);
+    dvec3v  += refract(dvec3v, dvec3v, doublev);
+    dvec4v  += refract(dvec4v, dvec4v, doublev);
+
+    dmat2   dmat2v   = outerProduct(dvec2v, dvec2v);
+    dmat3   dmat3v   = outerProduct(dvec3v, dvec3v);
+    dmat4   dmat4v   = outerProduct(dvec4v, dvec4v);
+    dmat2x3 dmat2x3v = outerProduct(dvec3v, dvec2v);
+    dmat3x2 dmat3x2v = outerProduct(dvec2v, dvec3v);
+    dmat2x4 dmat2x4v = outerProduct(dvec4v, dvec2v);
+    dmat4x2 dmat4x2v = outerProduct(dvec2v, dvec4v);
+    dmat3x4 dmat3x4v = outerProduct(dvec4v, dvec3v);
+    dmat4x3 dmat4x3v = outerProduct(dvec3v, dvec4v);
+
+    dmat2v *= matrixCompMult(dmat2v, dmat2v);
+    dmat3v *= matrixCompMult(dmat3v, dmat3v);
+    dmat4v *= matrixCompMult(dmat4v, dmat4v);
+    dmat2x3v = matrixCompMult(dmat2x3v, dmat2x3v);
+    dmat2x4v = matrixCompMult(dmat2x4v, dmat2x4v);
+    dmat3x2v = matrixCompMult(dmat3x2v, dmat3x2v);
+    dmat3x4v = matrixCompMult(dmat3x4v, dmat3x4v);
+    dmat4x2v = matrixCompMult(dmat4x2v, dmat4x2v);
+    dmat4x3v = matrixCompMult(dmat4x3v, dmat4x3v);
+
+    dmat2v   *= transpose(dmat2v);
+    dmat3v   *= transpose(dmat3v);
+    dmat4v   *= transpose(dmat4v);
+    dmat2x3v  = transpose(dmat3x2v);
+    dmat3x2v  = transpose(dmat2x3v);
+    dmat2x4v  = transpose(dmat4x2v);
+    dmat4x2v  = transpose(dmat2x4v);
+    dmat3x4v  = transpose(dmat4x3v);
+    dmat4x3v  = transpose(dmat3x4v);
+
+    doublev += determinant(dmat2v);
+    doublev += determinant(dmat3v);
+    doublev += determinant(dmat4v);
+
+    dmat2v *= inverse(dmat2v);
+    dmat3v *= inverse(dmat3v);
+    dmat4v *= inverse(dmat4v);
+}
--- a/Test/baseResults/400.frag.out
+++ b/Test/baseResults/400.frag.out
@@ -218,26 +218,18 @@ ERROR: node is still EOpNull!
 0:?     Sequence
 0:63      move second child to first child (temp 3-component vector of double)
 0:63        'df' (temp 3-component vector of double)
-0:63        Convert float to double (temp 3-component vector of double)
-0:63          Comma (global 3-component vector of float)
-0:63            move second child to first child (temp 3-component vector of float)
-0:63              'tempReturn' (global 3-component vector of float)
-0:63              modf (global 3-component vector of float)
-0:63                vector swizzle (temp 3-component vector of float)
-0:63                  'outp' (out 4-component vector of float)
-0:63                  Sequence
-0:63                    Constant:
-0:63                      0 (const int)
-0:63                    Constant:
-0:63                      1 (const int)
-0:63                    Constant:
-0:63                      2 (const int)
-0:63                'tempArg' (temp 3-component vector of float)
-0:63            move second child to first child (temp 3-component vector of double)
-0:63              'di' (temp 3-component vector of double)
-0:63              Convert float to double (temp 3-component vector of double)
-0:63                'tempArg' (temp 3-component vector of float)
-0:63            'tempReturn' (global 3-component vector of float)
+0:63        modf (global 3-component vector of double)
+0:63          Convert float to double (temp 3-component vector of double)
+0:63            vector swizzle (temp 3-component vector of float)
+0:63              'outp' (out 4-component vector of float)
+0:63              Sequence
+0:63                Constant:
+0:63                  0 (const int)
+0:63                Constant:
+0:63                  1 (const int)
+0:63                Constant:
+0:63                  2 (const int)
+0:63          'di' (temp 3-component vector of double)
 0:71  Function Definition: foodc1( (global void)
 0:71    Function Parameters: 
 0:73    Sequence
@@ -707,26 +699,18 @@ ERROR: node is still EOpNull!
 0:?     Sequence
 0:63      move second child to first child (temp 3-component vector of double)
 0:63        'df' (temp 3-component vector of double)
-0:63        Convert float to double (temp 3-component vector of double)
-0:63          Comma (global 3-component vector of float)
-0:63            move second child to first child (temp 3-component vector of float)
-0:63              'tempReturn' (global 3-component vector of float)
-0:63              modf (global 3-component vector of float)
-0:63                vector swizzle (temp 3-component vector of float)
-0:63                  'outp' (out 4-component vector of float)
-0:63                  Sequence
-0:63                    Constant:
-0:63                      0 (const int)
-0:63                    Constant:
-0:63                      1 (const int)
-0:63                    Constant:
-0:63                      2 (const int)
-0:63                'tempArg' (temp 3-component vector of float)
-0:63            move second child to first child (temp 3-component vector of double)
-0:63              'di' (temp 3-component vector of double)
-0:63              Convert float to double (temp 3-component vector of double)
-0:63                'tempArg' (temp 3-component vector of float)
-0:63            'tempReturn' (global 3-component vector of float)
+0:63        modf (global 3-component vector of double)
+0:63          Convert float to double (temp 3-component vector of double)
+0:63            vector swizzle (temp 3-component vector of float)
+0:63              'outp' (out 4-component vector of float)
+0:63              Sequence
+0:63                Constant:
+0:63                  0 (const int)
+0:63                Constant:
+0:63                  1 (const int)
+0:63                Constant:
+0:63                  2 (const int)
+0:63          'di' (temp 3-component vector of double)
 0:71  Function Definition: foodc1( (global void)
 0:71    Function Parameters: 
 0:73    Sequence

--- a/Test/baseResults/400.geom.out
+++ b/Test/baseResults/400.geom.out
--- a/Test/baseResults/reflection.vert.out
+++ b/Test/baseResults/reflection.vert.out
@@ -85,7 +85,7 @@ Uniform block reflection:
 nameless: offset -1, type ffffffff, size 496, index -1
 named: offset -1, type ffffffff, size 304, index -1
 c_nameless: offset -1, type ffffffff, size 112, index -1
-nested: offset -1, type ffffffff, size 28, index -1
+nested: offset -1, type ffffffff, size 32, index -1
 abl[0]: offset -1, type ffffffff, size 4, index -1
 abl[1]: offset -1, type ffffffff, size 4, index -1
 abl[2]: offset -1, type ffffffff, size 4, index -1

--- a/Test/baseResults/spv.400.frag.out
+++ b/Test/baseResults/spv.400.frag.out
--- a/Test/baseResults/spv.always-discard.frag.out
+++ b/Test/baseResults/spv.always-discard.frag.out
@@ -110,23 +110,4 @@ Linked fragment stage:
                                Branch 49
              49:             Label
                              Kill
-              69:             Label
-              70:    6(float) Load 36(radius)
-              72:    46(bool) FOrdGreaterThanEqual 70 71
-                              SelectionMerge 74 None
-                              BranchConditional 72 73 74
-              73:               Label
-              75:    6(float)   Load 36(radius)
-              77:    6(float)   ExtInst 1(GLSL.std.450) 26(Pow) 75 76
-              78:    6(float)   FDiv 77 27
-              79:    6(float)   ExtInst 1(GLSL.std.450) 4(FAbs) 78
-              80:    7(fvec4)   Load 15(color)
-              81:    7(fvec4)   CompositeConstruct 79 79 79 79
-              82:    7(fvec4)   FSub 80 81
-                                Store 15(color) 82
-                                Branch 74
-              74:             Label
-              83:    7(fvec4) Load 15(color)
-                              Store 59(gl_FragColor) 83
-                              Return
                              FunctionEnd
--- a/Test/baseResults/spv.branch-return.vert.out
+++ b/Test/baseResults/spv.branch-return.vert.out
+spv.branch-return.vert
+
+Linked vertex stage:
+
+
+// Module Version 10000
+// Generated by (magic number): 80001
+// Id's are bound by 35
+
+                              Capability Shader
+               1:             ExtInstImport  "GLSL.std.450"
+                              MemoryModel Logical GLSL450
+                              EntryPoint Vertex 4  "main" 8 19 34
+                              Source ESSL 300
+                              Name 4  "main"
+                              Name 8  "gl_InstanceID"
+                              Name 19  "gl_Position"
+                              Name 34  "gl_VertexID"
+                              Decorate 8(gl_InstanceID) BuiltIn InstanceId
+                              Decorate 19(gl_Position) BuiltIn Position
+                              Decorate 34(gl_VertexID) BuiltIn VertexId
+               2:             TypeVoid
+               3:             TypeFunction 2
+               6:             TypeInt 32 1
+               7:             TypePointer Input 6(int)
+8(gl_InstanceID):      7(ptr) Variable Input
+              16:             TypeFloat 32
+              17:             TypeVector 16(float) 4
+              18:             TypePointer Output 17(fvec4)
+ 19(gl_Position):     18(ptr) Variable Output
+              20:   16(float) Constant 0
+              21:   17(fvec4) ConstantComposite 20 20 20 20
+              26:   16(float) Constant 1039918957
+              27:             TypeInt 32 0
+              28:     27(int) Constant 0
+              29:             TypePointer Output 16(float)
+ 34(gl_VertexID):      7(ptr) Variable Input
+         4(main):           2 Function None 3
+               5:             Label
+               9:      6(int) Load 8(gl_InstanceID)
+                              SelectionMerge 14 None
+                              Switch 9 14 
+                                     case 0: 10
+                                     case 1: 11
+                                     case 2: 12
+                                     case 3: 13
+              10:               Label
+                                Return
+              11:               Label
+                                Store 19(gl_Position) 21
+                                Branch 14
+              12:               Label
+                                Return
+              13:               Label
+                                Return
+              14:             Label
+              30:     29(ptr) AccessChain 19(gl_Position) 28
+              31:   16(float) Load 30
+              32:   16(float) FAdd 31 26
+              33:     29(ptr) AccessChain 19(gl_Position) 28
+                              Store 33 32
+                              Return
+                                FunctionEnd
--- a/Test/baseResults/spv.dataOutIndirect.vert.out
+++ b/Test/baseResults/spv.dataOutIndirect.vert.out
@@ -60,14 +60,14 @@ Linked vertex stage:
              30:     29(ptr)   AccessChain 24(colorOut) 25
                                Store 30 28
                                Branch 13
+              13:               Label
+              31:      6(int)   Load 8(i)
+              32:      6(int)   IAdd 31 9
+                                Store 8(i) 32
+                                Branch 10
              12:             Label
              35:     29(ptr) AccessChain 24(colorOut) 34
              36:   19(fvec4) Load 35
                              Store 33(gl_Position) 36
                              Return
-              13:             Label
-              31:      6(int) Load 8(i)
-              32:      6(int) IAdd 31 9
-                              Store 8(i) 32
-                              Branch 10
                              FunctionEnd
--- a/Test/baseResults/spv.do-simple.vert.out
+++ b/Test/baseResults/spv.do-simple.vert.out
@@ -42,10 +42,10 @@ Linked vertex stage:
              16:      6(int) IAdd 14 15
                              Store 8(i) 16
                              Branch 13
-              12:             Label
-                              Return
              13:             Label
              17:      6(int) Load 8(i)
              20:    19(bool) SLessThan 17 18
                              BranchConditional 20 10 12
-                                FunctionEnd
+              12:             Label
+                              Return
+                              FunctionEnd
--- a/Test/baseResults/spv.do-while-continue-break.vert.out
+++ b/Test/baseResults/spv.do-while-continue-break.vert.out
@@ -63,21 +63,15 @@ Linked vertex stage:
              18:    17(bool) IEqual 15 16
                              SelectionMerge 20 None
                              BranchConditional 18 19 20
-              12:               Label
-                                Store 41(G) 42
-                                Return
+              19:               Label
+                                Store 21(B) 22
+                                Branch 13
              13:               Label
              37:      6(int)   Load 8(i)
              38:      6(int)   IAdd 37 22
                                Store 8(i) 38
              40:    17(bool)   SLessThan 38 39
                                BranchConditional 40 10 12
-              19:               Label
-                                Store 21(B) 22
-                                Branch 13
-              23:               Label
-                                Store 24(C) 16
-                                Branch 20
              20:             Label
              25:      6(int) Load 8(i)
              27:    17(bool) IEqual 25 26
@@ -86,10 +80,10 @@ Linked vertex stage:
              28:               Label
                                Store 30(D) 31
                                Branch 12
-              32:               Label
-                                Store 33(E) 34
-                                Branch 29
              29:             Label
                              Store 35(F) 36
                              Branch 13
+              12:             Label
+                              Store 41(G) 42
+                              Return
                              FunctionEnd
--- a/Test/baseResults/spv.doWhileLoop.frag.out
+++ b/Test/baseResults/spv.doWhileLoop.frag.out
@@ -51,14 +51,14 @@ Linked fragment stage:
              21:    7(fvec4) FAdd 20 19
                              Store 9(color) 21
                              Branch 16
-              15:             Label
-              34:    7(fvec4) Load 9(color)
-                              Store 33(gl_FragColor) 34
-                              Return
              16:             Label
              25:     24(ptr) AccessChain 9(color) 23
              26:    6(float) Load 25
              29:    6(float) Load 28(d)
              31:    30(bool) FOrdLessThan 26 29
                              BranchConditional 31 13 15
-                                FunctionEnd
+              15:             Label
+              34:    7(fvec4) Load 9(color)
+                              Store 33(gl_FragColor) 34
+                              Return
+                              FunctionEnd
--- a/Test/baseResults/spv.for-continue-break.vert.out
+++ b/Test/baseResults/spv.for-continue-break.vert.out
@@ -64,20 +64,14 @@ Linked vertex stage:
              23:    16(bool)   IEqual 22 9
                                SelectionMerge 25 None
                                BranchConditional 23 24 25
-              12:                 Label
-                                  Store 42(G) 43
-                                  Return
+              24:                 Label
+                                  Store 26(B) 19
+                                  Branch 13
              13:                 Label
              40:      6(int)     Load 8(i)
              41:      6(int)     IAdd 40 19
                                  Store 8(i) 41
                                  Branch 10
-              24:                 Label
-                                  Store 26(B) 19
-                                  Branch 13
-              27:                 Label
-                                  Store 28(C) 19
-                                  Branch 25
              25:               Label
              29:      6(int)   Load 8(i)
              31:      6(int)   SMod 29 30
@@ -87,10 +81,10 @@ Linked vertex stage:
              33:                 Label
                                  Store 35(D) 19
                                  Branch 12
-              36:                 Label
-                                  Store 37(E) 19
-                                  Branch 34
              34:               Label
                                Store 38(F) 39
                                Branch 13
-                                FunctionEnd
+              12:             Label
+                              Store 42(G) 43
+                              Return
+                              FunctionEnd
--- a/Test/baseResults/spv.for-nobody.vert.out
+++ b/Test/baseResults/spv.for-nobody.vert.out
@@ -47,13 +47,13 @@ Linked vertex stage:
                              BranchConditional 17 11 12
              11:               Label
                                Branch 13
+              13:               Label
+              18:      6(int)   Load 8(i)
+              20:      6(int)   IAdd 18 19
+                                Store 8(i) 20
+                                Branch 10
              12:             Label
              23:      6(int) Load 8(i)
                              Store 22(r) 23
                              Return
-              13:             Label
-              18:      6(int) Load 8(i)
-              20:      6(int) IAdd 18 19
-                              Store 8(i) 20
-                              Branch 10
                              FunctionEnd
--- a/Test/baseResults/spv.for-simple.vert.out
+++ b/Test/baseResults/spv.for-simple.vert.out
@@ -45,11 +45,11 @@ Linked vertex stage:
              11:               Label
                                Store 18(j) 19
                                Branch 13
+              13:               Label
+              20:      6(int)   Load 8(i)
+              22:      6(int)   IAdd 20 21
+                                Store 8(i) 22
+                                Branch 10
              12:             Label
                              Return
-              13:             Label
-              20:      6(int) Load 8(i)
-              22:      6(int) IAdd 20 21
-                              Store 8(i) 22
-                              Branch 10
                              FunctionEnd
--- a/Test/baseResults/spv.forLoop.frag.out
+++ b/Test/baseResults/spv.forLoop.frag.out
@@ -88,17 +88,17 @@ Linked fragment stage:
              31:    7(fvec4)   FAdd 30 29
                                Store 9(color) 31
                                Branch 20
+              20:               Label
+              32:     13(int)   Load 15(i)
+              34:     13(int)   IAdd 32 33
+                                Store 15(i) 34
+                                Branch 17
              19:             Label
              37:    7(fvec4) Load 9(color)
                              Store 36(gl_FragColor) 37
                              Store 39(sum) 40
                              Store 41(i) 16
                              Branch 42
-              20:             Label
-              32:     13(int) Load 15(i)
-              34:     13(int) IAdd 32 33
-                              Store 15(i) 34
-                              Branch 17
              42:             Label
              46:     13(int) Load 41(i)
              48:    25(bool) SLessThan 46 47
@@ -113,14 +113,14 @@ Linked fragment stage:
              59:    6(float)   FAdd 58 57
                                Store 39(sum) 59
                                Branch 45
+              45:               Label
+              60:     13(int)   Load 41(i)
+              61:     13(int)   IAdd 60 33
+                                Store 41(i) 61
+                                Branch 42
              44:             Label
                              Store 62(i) 16
                              Branch 63
-              45:             Label
-              60:     13(int) Load 41(i)
-              61:     13(int) IAdd 60 33
-                              Store 41(i) 61
-                              Branch 42
              63:             Label
              67:     13(int) Load 62(i)
              68:    25(bool) SLessThan 67 47
@@ -136,6 +136,11 @@ Linked fragment stage:
              77:     38(ptr)   AccessChain 69(tv4) 70
                                Store 77 76
                                Branch 66
+              66:               Label
+              78:     13(int)   Load 62(i)
+              79:     13(int)   IAdd 78 33
+                                Store 62(i) 79
+                                Branch 63
              65:             Label
              80:    6(float) Load 39(sum)
              81:    7(fvec4) CompositeConstruct 80 80 80 80
@@ -151,11 +156,6 @@ Linked fragment stage:
                              Store 86(r) 91
                              Store 92(i) 16
                              Branch 93
-              66:             Label
-              78:     13(int) Load 62(i)
-              79:     13(int) IAdd 78 33
-                              Store 62(i) 79
-                              Branch 63
              93:             Label
              97:     13(int) Load 92(i)
              98:     13(int) Load 23(Count)
@@ -167,6 +167,11 @@ Linked fragment stage:
             104:     38(ptr)   AccessChain 86(r) 103
                                Store 104 102
                                Branch 96
+              96:               Label
+             105:     13(int)   Load 92(i)
+             106:     13(int)   IAdd 105 33
+                                Store 92(i) 106
+                                Branch 93
              95:             Label
             107:    7(fvec4) Load 86(r)
             108:   87(fvec3) VectorShuffle 107 107 0 1 2
@@ -178,11 +183,6 @@ Linked fragment stage:
                              Store 36(gl_FragColor) 113
                              Store 114(i) 16
                              Branch 115
-              96:             Label
-             105:     13(int) Load 92(i)
-             106:     13(int) IAdd 105 33
-                              Store 92(i) 106
-                              Branch 93
             115:             Label
             119:     13(int) Load 114(i)
             121:    25(bool) SLessThan 119 120
@@ -194,11 +194,11 @@ Linked fragment stage:
             124:    7(fvec4)   VectorTimesScalar 123 122
                                Store 36(gl_FragColor) 124
                                Branch 118
+             118:               Label
+             125:     13(int)   Load 114(i)
+             126:     13(int)   IAdd 125 47
+                                Store 114(i) 126
+                                Branch 115
             117:             Label
                              Return
-             118:             Label
-             125:     13(int) Load 114(i)
-             126:     13(int) IAdd 125 47
-                              Store 114(i) 126
-                              Branch 115
                              FunctionEnd
--- a/Test/baseResults/spv.localAggregates.frag.out
+++ b/Test/baseResults/spv.localAggregates.frag.out
@@ -170,16 +170,16 @@ Linked fragment stage:
              86:     30(ptr)   AccessChain 83(a) 84
                                Store 86 85
                                Branch 79
-              78:             Label
-              90:      6(int) Load 89(condition)
-              91:    23(bool) IEqual 90 28
-                              SelectionMerge 93 None
-                              BranchConditional 91 92 93
              79:               Label
              87:      6(int)   Load 75(i)
              88:      6(int)   IAdd 87 28
                                Store 75(i) 88
                                Branch 76
+              78:             Label
+              90:      6(int) Load 89(condition)
+              91:    23(bool) IEqual 90 28
+                              SelectionMerge 93 None
+                              BranchConditional 91 92 93
              92:               Label
              94:          34   Load 70(localArray)
                                Store 83(a) 94

--- a/Test/baseResults/spv.loops.frag.out
+++ b/Test/baseResults/spv.loops.frag.out
--- a/Test/baseResults/spv.loopsArtificial.frag.out
+++ b/Test/baseResults/spv.loopsArtificial.frag.out
@@ -153,14 +153,6 @@ Linked fragment stage:
              31:    30(bool) FOrdLessThan 26 29
                              SelectionMerge 33 None
                              BranchConditional 31 32 33
-              15:               Label
-                                Branch 73
-              16:               Label
-              69:     24(ptr)   AccessChain 9(color) 35
-              70:    6(float)   Load 69
-              71:    6(float)   Load 28(d4)
-              72:    30(bool)   FOrdLessThan 70 71
-                                BranchConditional 72 13 15
              32:               Label
              36:     24(ptr)   AccessChain 9(color) 35
              37:    6(float)   Load 36
@@ -179,6 +171,12 @@ Linked fragment stage:
              49:    6(float)     FAdd 47 48
                                  Store 46 49
                                  Branch 16
+              16:                 Label
+              69:     24(ptr)     AccessChain 9(color) 35
+              70:    6(float)     Load 69
+              71:    6(float)     Load 28(d4)
+              72:    30(bool)     FOrdLessThan 70 71
+                                  BranchConditional 72 13 15
              45:               Label
                                Branch 33
              33:             Label
@@ -206,6 +204,8 @@ Linked fragment stage:
                                Branch 57
              57:             Label
                              Branch 16
+              15:             Label
+                              Branch 73
              73:             Label
              78:     24(ptr) AccessChain 9(color) 77
              79:    6(float) Load 78
@@ -220,16 +220,6 @@ Linked fragment stage:
              86:    30(bool)   FOrdLessThan 84 85
                                SelectionMerge 88 None
                                BranchConditional 86 87 92
-              75:                 Label
-             136:    7(fvec4)     Load 9(color)
-             137:    7(fvec4)     CompositeConstruct 48 48 48 48
-             138:    7(fvec4)     FAdd 136 137
-                                  Store 9(color) 138
-             141:    7(fvec4)     Load 9(color)
-                                  Store 140(gl_FragColor) 141
-                                  Return
-              76:                 Label
-                                  Branch 73
              87:                 Label
              89:    7(fvec4)     Load 9(color)
              90:    7(fvec4)     CompositeConstruct 48 48 48 48
@@ -271,6 +261,8 @@ Linked fragment stage:
             117:    6(float)       FAdd 116 48
                                    Store 115 117
                                    Branch 76
+              76:                   Label
+                                    Branch 73
             114:                 Label
                                  Branch 104
             104:               Label
@@ -298,4 +290,12 @@ Linked fragment stage:
                                  Branch 124
             124:               Label
                                Branch 76
-                                FunctionEnd
+              75:             Label
+             136:    7(fvec4) Load 9(color)
+             137:    7(fvec4) CompositeConstruct 48 48 48 48
+             138:    7(fvec4) FAdd 136 137
+                              Store 9(color) 138
+             141:    7(fvec4) Load 9(color)
+                              Store 140(gl_FragColor) 141
+                              Return
+                              FunctionEnd
--- a/Test/baseResults/spv.merge-unreachable.frag.out
+++ b/Test/baseResults/spv.merge-unreachable.frag.out
+spv.merge-unreachable.frag
+Warning, version 450 is not yet complete; most version-specific features are present, but some are missing.
+
+
+Linked fragment stage:
+
+
+// Module Version 10000
+// Generated by (magic number): 80001
+// Id's are bound by 25
+
+                              Capability Shader
+               1:             ExtInstImport  "GLSL.std.450"
+                              MemoryModel Logical GLSL450
+                              EntryPoint Fragment 4  "main" 9
+                              ExecutionMode 4 OriginLowerLeft
+                              Source GLSL 450
+                              Name 4  "main"
+                              Name 9  "v"
+                              Decorate 9(v) Location 1
+               2:             TypeVoid
+               3:             TypeFunction 2
+               6:             TypeFloat 32
+               7:             TypeVector 6(float) 4
+               8:             TypePointer Input 7(fvec4)
+            9(v):      8(ptr) Variable Input
+              11:    6(float) Constant 1036831949
+              12:    6(float) Constant 1045220557
+              13:    6(float) Constant 1050253722
+              14:    6(float) Constant 1053609165
+              15:    7(fvec4) ConstantComposite 11 12 13 14
+              16:             TypeBool
+              17:             TypeVector 16(bool) 4
+         4(main):           2 Function None 3
+               5:             Label
+              10:    7(fvec4) Load 9(v)
+              18:   17(bvec4) FOrdEqual 10 15
+              19:    16(bool) All 18
+                              SelectionMerge 21 None
+                              BranchConditional 19 20 23
+              20:               Label
+                                Kill
+              23:               Label
+                                Return
+              21:             Label
+                              Return
+                              FunctionEnd
--- a/Test/baseResults/spv.sparseTexture.frag.out
+++ b/Test/baseResults/spv.sparseTexture.frag.out
--- a/Test/baseResults/spv.sparseTextureClamp.frag.out
+++ b/Test/baseResults/spv.sparseTextureClamp.frag.out
--- a/Test/baseResults/spv.switch.frag.out
+++ b/Test/baseResults/spv.switch.frag.out
--- a/Test/baseResults/spv.while-continue-break.vert.out
+++ b/Test/baseResults/spv.while-continue-break.vert.out
@@ -57,17 +57,11 @@ Linked vertex stage:
              23:    16(bool)   IEqual 22 9
                                SelectionMerge 25 None
                                BranchConditional 23 24 25
-              12:                 Label
-                                  Store 38(D) 39
-                                  Return
-              13:                 Label
-                                  Branch 10
              24:                 Label
                                  Store 26(B) 21
                                  Branch 13
-              27:                 Label
-                                  Store 28(C) 21
-                                  Branch 25
+              13:                 Label
+                                  Branch 10
              25:               Label
              29:      6(int)   Load 8(i)
              31:      6(int)   SMod 29 30
@@ -77,12 +71,12 @@ Linked vertex stage:
              33:                 Label
                                  Store 26(B) 21
                                  Branch 12
-              35:                 Label
-                                  Store 28(C) 21
-                                  Branch 34
              34:               Label
              36:      6(int)   Load 8(i)
              37:      6(int)   IAdd 36 19
                                Store 8(i) 37
                                Branch 13
-                                FunctionEnd
+              12:             Label
+                              Store 38(D) 39
+                              Return
+                              FunctionEnd
--- a/Test/baseResults/spv.while-simple.vert.out
+++ b/Test/baseResults/spv.while-simple.vert.out
@@ -44,8 +44,8 @@ Linked vertex stage:
              20:      6(int)   IAdd 18 19
                                Store 8(i) 20
                                Branch 13
+              13:               Label
+                                Branch 10
              12:             Label
                              Return
-              13:             Label
-                              Branch 10
                              FunctionEnd
--- a/Test/baseResults/spv.whileLoop.frag.out
+++ b/Test/baseResults/spv.whileLoop.frag.out
@@ -55,10 +55,10 @@ Linked fragment stage:
              31:    7(fvec4)   FAdd 30 29
                                Store 9(color) 31
                                Branch 16
+              16:               Label
+                                Branch 13
              15:             Label
              34:    7(fvec4) Load 9(color)
                              Store 33(gl_FragColor) 34
                              Return
-              16:             Label
-                              Branch 13
                              FunctionEnd
--- a/Test/spv.400.frag
+++ b/Test/spv.400.frag
@@ -21,6 +21,223 @@ void foo23()
    outp.x += textureProjGradOffset(u2drs, outp, vec2(0.0), vec2(0.0), offsets[1]);
 }

+void doubles()
+{
+    double doublev;
+    dvec2 dvec2v;
+    dvec3 dvec3v;
+    dvec4 dvec4v;
+
+    bool boolv;
+    bvec2 bvec2v;
+    bvec3 bvec3v;
+    bvec4 bvec4v;
+
+    doublev = sqrt(2.9);
+    dvec2v  = sqrt(dvec2(2.7));
+    dvec3v  = sqrt(dvec3(2.0));
+    dvec4v  = sqrt(dvec4(doublev));
+
+    doublev += inversesqrt(doublev);
+    dvec2v  += inversesqrt(dvec2v);
+    dvec3v  += inversesqrt(dvec3v);
+    dvec4v  += inversesqrt(dvec4v);
+
+    doublev += abs(doublev);
+    dvec2v  += abs(dvec2v);
+    dvec3v  += abs(dvec3v);
+    dvec4v  += abs(dvec4v);
+
+    doublev += sign(doublev);
+    dvec2v  += sign(dvec2v);
+    dvec3v  += sign(dvec3v);
+    dvec4v  += sign(dvec4v);
+
+    doublev += floor(doublev);
+    dvec2v  += floor(dvec2v);
+    dvec3v  += floor(dvec3v);
+    dvec4v  += floor(dvec4v);
+
+    doublev += trunc(doublev);
+    dvec2v  += trunc(dvec2v);
+    dvec3v  += trunc(dvec3v);
+    dvec4v  += trunc(dvec4v);
+
+    doublev += round(doublev);
+    dvec2v  += round(dvec2v);
+    dvec3v  += round(dvec3v);
+    dvec4v  += round(dvec4v);
+
+    doublev += roundEven(doublev);
+    dvec2v  += roundEven(dvec2v);
+    dvec3v  += roundEven(dvec3v);
+    dvec4v  += roundEven(dvec4v);
+
+    doublev += ceil(doublev);
+    dvec2v  += ceil(dvec2v);
+    dvec3v  += ceil(dvec3v);
+    dvec4v  += ceil(dvec4v);
+
+    doublev += fract(doublev);
+    dvec2v  += fract(dvec2v);
+    dvec3v  += fract(dvec3v);
+    dvec4v  += fract(dvec4v);
+
+    doublev += mod(doublev, doublev);
+    dvec2v  += mod(dvec2v, doublev);
+    dvec3v  += mod(dvec3v, doublev);
+    dvec4v  += mod(dvec4v, doublev);
+    dvec2v  += mod(dvec2v, dvec2v);
+    dvec3v  += mod(dvec3v, dvec3v);
+    dvec4v  += mod(dvec4v, dvec4v);
+
+    doublev += modf(doublev, doublev);
+    dvec2v  += modf(dvec2v,  dvec2v);
+    dvec3v  += modf(dvec3v,  dvec3v);
+    dvec4v  += modf(dvec4v,  dvec4v);
+
+    doublev += min(doublev, doublev);
+    dvec2v  += min(dvec2v, doublev);
+    dvec3v  += min(dvec3v, doublev);
+    dvec4v  += min(dvec4v, doublev);
+    dvec2v  += min(dvec2v, dvec2v);
+    dvec3v  += min(dvec3v, dvec3v);
+    dvec4v  += min(dvec4v, dvec4v);
+
+    doublev += max(doublev, doublev);
+    dvec2v  += max(dvec2v, doublev);
+    dvec3v  += max(dvec3v, doublev);
+    dvec4v  += max(dvec4v, doublev);
+    dvec2v  += max(dvec2v, dvec2v);
+    dvec3v  += max(dvec3v, dvec3v);
+    dvec4v  += max(dvec4v, dvec4v);
+
+    doublev += clamp(doublev, doublev, doublev);
+    dvec2v  += clamp(dvec2v, doublev, doublev);
+    dvec3v  += clamp(dvec3v, doublev, doublev);
+    dvec4v  += clamp(dvec4v, doublev, doublev);
+    dvec2v  += clamp(dvec2v, dvec2v, dvec2v);
+    dvec3v  += clamp(dvec3v, dvec3v, dvec3v);
+    dvec4v  += clamp(dvec4v, dvec4v, dvec4v);
+
+    doublev += mix(doublev, doublev, doublev);
+    dvec2v  += mix(dvec2v, dvec2v, doublev);
+    dvec3v  += mix(dvec3v, dvec3v, doublev);
+    dvec4v  += mix(dvec4v, dvec4v, doublev);
+    dvec2v  += mix(dvec2v, dvec2v, dvec2v);
+    dvec3v  += mix(dvec3v, dvec3v, dvec3v);
+    dvec4v  += mix(dvec4v, dvec4v, dvec4v);
+    doublev += mix(doublev, doublev, boolv);
+    dvec2v  += mix(dvec2v, dvec2v, bvec2v);
+    dvec3v  += mix(dvec3v, dvec3v, bvec3v);
+    dvec4v  += mix(dvec4v, dvec4v, bvec4v);
+
+    doublev += step(doublev, doublev);
+    dvec2v  += step(dvec2v, dvec2v);
+    dvec3v  += step(dvec3v, dvec3v);
+    dvec4v  += step(dvec4v, dvec4v);
+    dvec2v  += step(doublev, dvec2v);
+    dvec3v  += step(doublev, dvec3v);
+    dvec4v  += step(doublev, dvec4v);
+
+    doublev += smoothstep(doublev, doublev, doublev);
+    dvec2v  += smoothstep(dvec2v, dvec2v, dvec2v);
+    dvec3v  += smoothstep(dvec3v, dvec3v, dvec3v);
+    dvec4v  += smoothstep(dvec4v, dvec4v, dvec4v);
+    dvec2v  += smoothstep(doublev, doublev, dvec2v);
+    dvec3v  += smoothstep(doublev, doublev, dvec3v);
+    dvec4v  += smoothstep(doublev, doublev, dvec4v);
+
+    boolv  = isnan(doublev);
+    bvec2v = isnan(dvec2v);
+    bvec3v = isnan(dvec3v);
+    bvec4v = isnan(dvec4v);
+
+    boolv  = boolv ? isinf(doublev) : false;
+    bvec2v = boolv ? isinf(dvec2v)  : bvec2(false);
+    bvec3v = boolv ? isinf(dvec3v)  : bvec3(false);
+    bvec4v = boolv ? isinf(dvec4v)  : bvec4(false);
+
+    doublev += length(doublev);
+    doublev += length(dvec2v);
+    doublev += length(dvec3v);
+    doublev += length(dvec4v);
+
+    doublev += distance(doublev, doublev);
+    doublev += distance(dvec2v, dvec2v);
+    doublev += distance(dvec3v, dvec3v);
+    doublev += distance(dvec4v, dvec4v);
+
+    doublev += dot(doublev, doublev);
+    doublev += dot(dvec2v, dvec2v);
+    doublev += dot(dvec3v, dvec3v);
+    doublev += dot(dvec4v, dvec4v);
+
+    dvec3v += cross(dvec3v, dvec3v);
+
+    doublev += normalize(doublev);
+    dvec2v  += normalize(dvec2v);
+    dvec3v  += normalize(dvec3v);
+    dvec4v  += normalize(dvec4v);
+
+    doublev += faceforward(doublev, doublev, doublev);
+    dvec2v  += faceforward(dvec2v, dvec2v, dvec2v);
+    dvec3v  += faceforward(dvec3v, dvec3v, dvec3v);
+    dvec4v  += faceforward(dvec4v, dvec4v, dvec4v);
+
+    doublev += reflect(doublev, doublev);
+    dvec2v  += reflect(dvec2v, dvec2v);
+    dvec3v  += reflect(dvec3v, dvec3v);
+    dvec4v  += reflect(dvec4v, dvec4v);
+
+    doublev += refract(doublev, doublev, doublev);
+    dvec2v  += refract(dvec2v, dvec2v, doublev);
+    dvec3v  += refract(dvec3v, dvec3v, doublev);
+    dvec4v  += refract(dvec4v, dvec4v, doublev);
+
+    dmat2   dmat2v   = outerProduct(dvec2v, dvec2v);
+    dmat3   dmat3v   = outerProduct(dvec3v, dvec3v);
+    dmat4   dmat4v   = outerProduct(dvec4v, dvec4v);
+    dmat2x3 dmat2x3v = outerProduct(dvec3v, dvec2v);
+    dmat3x2 dmat3x2v = outerProduct(dvec2v, dvec3v);
+    dmat2x4 dmat2x4v = outerProduct(dvec4v, dvec2v);
+    dmat4x2 dmat4x2v = outerProduct(dvec2v, dvec4v);
+    dmat3x4 dmat3x4v = outerProduct(dvec4v, dvec3v);
+    dmat4x3 dmat4x3v = outerProduct(dvec3v, dvec4v);
+
+    dmat2v *= matrixCompMult(dmat2v, dmat2v);
+    dmat3v *= matrixCompMult(dmat3v, dmat3v);
+    dmat4v *= matrixCompMult(dmat4v, dmat4v);
+    dmat2x3v = matrixCompMult(dmat2x3v, dmat2x3v);  // For now, relying on no dead-code elimination
+    dmat2x4v = matrixCompMult(dmat2x4v, dmat2x4v);
+    dmat3x2v = matrixCompMult(dmat3x2v, dmat3x2v);
+    dmat3x4v = matrixCompMult(dmat3x4v, dmat3x4v);
+    dmat4x2v = matrixCompMult(dmat4x2v, dmat4x2v);
+    dmat4x3v = matrixCompMult(dmat4x3v, dmat4x3v);
+
+    dmat2v   *= transpose(dmat2v);
+    dmat3v   *= transpose(dmat3v);
+    dmat4v   *= transpose(dmat4v);
+    dmat2x3v  = transpose(dmat3x2v);  // For now, relying on no dead-code elimination
+    dmat3x2v  = transpose(dmat2x3v);
+    dmat2x4v  = transpose(dmat4x2v);
+    dmat4x2v  = transpose(dmat2x4v);
+    dmat3x4v  = transpose(dmat4x3v);
+    dmat4x3v  = transpose(dmat3x4v);
+
+    doublev += determinant(dmat2v);
+    doublev += determinant(dmat3v);
+    doublev += determinant(dmat4v);
+
+    dmat2v *= inverse(dmat2v);
+    dmat3v *= inverse(dmat3v);
+    dmat4v *= inverse(dmat4v);
+    
+    outp *= float(doublev + dvec2v.y + dvec3v.z + dvec4v.w + 
+                  dmat2v[1][1] + dmat3v[2][2] + dmat4v[3][3] + dmat2x3v[1][1] + dmat3x2v[1][1] + dmat3x4v[2][2] + dmat4x3v[2][2] + dmat2x4v[1][1] + dmat4x2v[1][1] +
+                  float(boolv) + float(bvec2v.x) + float(bvec3v.x) + float(bvec4v.x));
+}
+
 void main()
 {
    vec4 v;
@@ -38,5 +255,6 @@ void main()

    outp += gl_FragCoord + vl2;
    foo23();
+    doubles();
 }

--- a/Test/spv.branch-return.vert
+++ b/Test/spv.branch-return.vert
+#version 300 es
+void main() {
+  switch (gl_InstanceID) {
+    case 0: return;
+    case 1: gl_Position = vec4(0.0); break;
+    case 2: return;
+    case 3: return;
+  }
+  gl_Position.x += 0.123;
+}
--- a/Test/spv.merge-unreachable.frag
+++ b/Test/spv.merge-unreachable.frag
+#version 450
+layout(location=1) in highp vec4 v;
+void main (void)
+{
+  if (v == vec4(0.1,0.2,0.3,0.4)) discard;
+  else return;
+}
--- a/Test/spv.sparseTexture.frag
+++ b/Test/spv.sparseTexture.frag
+#version 450
+#extension GL_ARB_sparse_texture2: enable
+
+uniform sampler2D               s2D;
+uniform sampler3D               s3D;
+uniform sampler2DShadow         s2DShadow;
+uniform samplerCubeShadow       sCubeShadow;
+uniform sampler2DArrayShadow    s2DArrayShadow;
+uniform sampler2DRectShadow     s2DRectShadow;
+uniform samplerCubeArrayShadow  sCubeArrayShadow;
+uniform sampler2DMS             s2DMS;
+
+uniform isamplerCube            isCube;
+uniform isampler2DArray         is2DArray;
+
+uniform usamplerCubeArray       usCubeArray;
+uniform usampler2DRect          us2DRect;
+
+uniform vec2 c2;
+uniform vec3 c3;
+uniform vec4 c4;
+
+uniform ivec2 offsets[4];
+
+out vec4 outColor;
+
+void main()
+{
+    int   resident = 0;
+    vec4  texel  = vec4(0.0);
+    ivec4 itexel = ivec4(0);
+    uvec4 utexel = uvec4(0);
+
+    resident |= sparseTextureARB(s2D, c2, texel);
+    resident |= sparseTextureARB(s3D, c3, texel, 2.0);
+    resident |= sparseTextureARB(isCube, c3, itexel);
+    resident |= sparseTextureARB(s2DShadow, c3, texel.x);
+    resident |= sparseTextureARB(sCubeArrayShadow, c4, 1.0, texel.x);
+
+    resident |= sparseTextureLodARB(s2D, c2, 2.0, texel);
+    resident |= sparseTextureLodARB(usCubeArray, c4, 1.0, utexel);
+    resident |= sparseTextureLodARB(s2DShadow, c3, 2.0, texel.y);
+
+    resident |= sparseTextureOffsetARB(s3D, c3, ivec3(2), texel, 2.0);
+    resident |= sparseTextureOffsetARB(us2DRect, c2, ivec2(3), utexel);
+    resident |= sparseTextureOffsetARB(s2DArrayShadow, c4, ivec2(5), texel.z);
+
+    resident |= sparseTexelFetchARB(s2D, ivec2(c2), 2, texel);
+    resident |= sparseTexelFetchARB(us2DRect, ivec2(c2), utexel);
+    resident |= sparseTexelFetchARB(s2DMS, ivec2(c2), 4, texel);
+
+    resident |= sparseTexelFetchOffsetARB(s3D, ivec3(c3), 2, ivec3(4), texel);
+    resident |= sparseTexelFetchOffsetARB(us2DRect, ivec2(c2), ivec2(3), utexel);
+
+    resident |= sparseTextureLodOffsetARB(s2D, c2, 2.0, ivec2(5), texel);
+    resident |= sparseTextureLodOffsetARB(is2DArray, c3, 2.0, ivec2(6), itexel);
+    resident |= sparseTextureLodOffsetARB(s2DShadow, c3, 2.0, ivec2(7), texel.z);
+
+    resident |= sparseTextureGradARB(s3D, c3, c3, c3, texel);
+    resident |= sparseTextureGradARB(sCubeShadow, c4, c3, c3, texel.y);
+    resident |= sparseTextureGradARB(usCubeArray, c4, c3, c3, utexel);
+
+    resident |= sparseTextureGradOffsetARB(s2D, c2, c2, c2, ivec2(5), texel);
+    resident |= sparseTextureGradOffsetARB(s2DRectShadow, c3, c2, c2, ivec2(6), texel.w);
+    resident |= sparseTextureGradOffsetARB(is2DArray, c3, c2, c2, ivec2(2), itexel);
+
+    resident |= sparseTextureGatherARB(s2D, c2, texel);
+    resident |= sparseTextureGatherARB(is2DArray, c3, itexel, 2);
+    resident |= sparseTextureGatherARB(s2DArrayShadow, c3, 2.0, texel);
+
+    resident |= sparseTextureGatherOffsetARB(s2D, c2, ivec2(4), texel);
+    resident |= sparseTextureGatherOffsetARB(is2DArray, c3, ivec2(5), itexel, 2);
+    resident |= sparseTextureGatherOffsetARB(s2DRectShadow, c2, 2.0, ivec2(7), texel); 
+
+    resident |= sparseTextureGatherOffsetsARB(s2D, c2, offsets, texel);
+    resident |= sparseTextureGatherOffsetsARB(is2DArray, c3, offsets, itexel, 2);
+    resident |= sparseTextureGatherOffsetsARB(s2DRectShadow, c2, 2.0, offsets, texel); 
+
+    outColor = sparseTexelsResidentARB(resident) ? texel : vec4(itexel) + vec4(utexel);
+}
\ No newline at end of file
--- a/Test/spv.sparseTextureClamp.frag
+++ b/Test/spv.sparseTextureClamp.frag
+#version 450
+#extension GL_ARB_sparse_texture_clamp: enable
+
+uniform sampler2D               s2D;
+uniform sampler3D               s3D;
+uniform sampler2DShadow         s2DShadow;
+uniform samplerCubeShadow       sCubeShadow;
+uniform sampler2DArrayShadow    s2DArrayShadow;
+uniform sampler2DRectShadow     s2DRectShadow;
+uniform samplerCubeArrayShadow  sCubeArrayShadow;
+
+uniform isamplerCube            isCube;
+uniform isampler2DArray         is2DArray;
+
+uniform usamplerCubeArray       usCubeArray;
+uniform usampler2DRect          us2DRect;
+
+uniform vec2 c2;
+uniform vec3 c3;
+uniform vec4 c4;
+
+uniform float lodClamp;
+
+out vec4 outColor;
+
+void main()
+{
+    int   resident = 0;
+    vec4  texel  = vec4(0.0);
+    ivec4 itexel = ivec4(0);
+    uvec4 utexel = uvec4(0);
+
+    resident |= sparseTextureClampARB(s2D, c2, lodClamp, texel);
+    resident |= sparseTextureClampARB(s3D, c3, lodClamp, texel, 2.0);
+    resident |= sparseTextureClampARB(isCube, c3, lodClamp, itexel);
+    resident |= sparseTextureClampARB(s2DShadow, c3, lodClamp, texel.x);
+    resident |= sparseTextureClampARB(sCubeArrayShadow, c4, 1.0, lodClamp, texel.x);
+
+    texel   += textureClampARB(s2D, c2, lodClamp);
+    texel   += textureClampARB(s3D, c3, lodClamp, 2.0);
+    itexel  += textureClampARB(isCube, c3, lodClamp);
+    texel.x += textureClampARB(s2DShadow, c3, lodClamp);
+    texel.x += textureClampARB(sCubeArrayShadow, c4, 1.0, lodClamp);
+
+    resident |= sparseTextureOffsetClampARB(s3D, c3, ivec3(2), lodClamp, texel, 2.0);
+    resident |= sparseTextureOffsetClampARB(us2DRect, c2, ivec2(3), lodClamp, utexel);
+    resident |= sparseTextureOffsetClampARB(s2DArrayShadow, c4, ivec2(5), lodClamp, texel.z);
+
+    texel   += textureOffsetClampARB(s3D, c3, ivec3(2), lodClamp, 2.0);
+    utexel  += textureOffsetClampARB(us2DRect, c2, ivec2(3), lodClamp);
+    texel.z += textureOffsetClampARB(s2DArrayShadow, c4, ivec2(5), lodClamp);
+
+    resident |= sparseTextureGradClampARB(s3D, c3, c3, c3, lodClamp, texel);
+    resident |= sparseTextureGradClampARB(sCubeShadow, c4, c3, c3, lodClamp, texel.y);
+    resident |= sparseTextureGradClampARB(usCubeArray, c4, c3, c3, lodClamp, utexel);
+
+    texel   += textureGradClampARB(s3D, c3, c3, c3, lodClamp);
+    texel.y += textureGradClampARB(sCubeShadow, c4, c3, c3, lodClamp);
+    utexel  += textureGradClampARB(usCubeArray, c4, c3, c3, lodClamp);
+
+    resident |= sparseTextureGradOffsetClampARB(s2D, c2, c2, c2, ivec2(5), lodClamp, texel);
+    resident |= sparseTextureGradOffsetClampARB(s2DRectShadow, c3, c2, c2, ivec2(6), lodClamp, texel.w);
+    resident |= sparseTextureGradOffsetClampARB(is2DArray, c3, c2, c2, ivec2(2), lodClamp, itexel);
+
+    texel   += textureGradOffsetClampARB(s2D, c2, c2, c2, ivec2(5), lodClamp);
+    texel.w += textureGradOffsetClampARB(s2DRectShadow, c3, c2, c2, ivec2(6), lodClamp);
+    itexel  += textureGradOffsetClampARB(is2DArray, c3, c2, c2, ivec2(2), lodClamp);
+
+    outColor = sparseTexelsResidentARB(resident) ? texel : vec4(itexel) + vec4(utexel);
+}
\ No newline at end of file
--- a/Test/test-spirv-list
+++ b/Test/test-spirv-list
@@ -34,6 +34,7 @@ spv.always-discard.frag
 spv.always-discard2.frag
 spv.bitCast.frag
 spv.bool.vert
+spv.branch-return.vert
 spv.conditionalDiscard.frag
 spv.conversion.frag
 spv.dataOut.frag
@@ -58,6 +59,7 @@ spv.loopsArtificial.frag
 spv.matFun.vert
 spv.matrix.frag
 spv.matrix2.frag
+spv.merge-unreachable.frag
 spv.newTexture.frag
 spv.nonSquare.vert
 spv.Operations.frag
@@ -68,6 +70,8 @@ spv.qualifiers.vert
 spv.shiftOps.frag
 spv.simpleFunctionCall.frag
 spv.simpleMat.vert
+spv.sparseTexture.frag
+spv.sparseTextureClamp.frag
 spv.structAssignment.frag
 spv.structDeref.frag
 spv.structure.frag

--- a/glslang/Include/intermediate.h
+++ b/glslang/Include/intermediate.h
@@ -369,6 +369,8 @@ enum TOperator {
    EOpImageAtomicExchange,
    EOpImageAtomicCompSwap,

+    EOpSparseImageLoad,
+
    EOpImageGuardEnd,

    //
@@ -398,6 +400,31 @@ enum TOperator {
    EOpTextureGather,
    EOpTextureGatherOffset,
    EOpTextureGatherOffsets,
+    EOpTextureClamp,
+    EOpTextureOffsetClamp,
+    EOpTextureGradClamp,
+    EOpTextureGradOffsetClamp,
+
+    EOpSparseTextureGuardBegin,
+
+    EOpSparseTexture,
+    EOpSparseTextureLod,
+    EOpSparseTextureOffset,
+    EOpSparseTextureFetch,
+    EOpSparseTextureFetchOffset,
+    EOpSparseTextureLodOffset,
+    EOpSparseTextureGrad,
+    EOpSparseTextureGradOffset,
+    EOpSparseTextureGather,
+    EOpSparseTextureGatherOffset,
+    EOpSparseTextureGatherOffsets,
+    EOpSparseTexelsResident,
+    EOpSparseTextureClamp,
+    EOpSparseTextureOffsetClamp,
+    EOpSparseTextureGradClamp,
+    EOpSparseTextureGradOffsetClamp,
+
+    EOpSparseTextureGuardEnd,

    EOpTextureGuardEnd,

@@ -622,6 +649,7 @@ struct TCrackedTextureOp {
    bool offsets;
    bool gather;
    bool grad;
+    bool lodClamp;
 };

 //
@@ -637,6 +665,8 @@ public:
    bool isConstructor() const;
    bool isTexture() const { return op > EOpTextureGuardBegin && op < EOpTextureGuardEnd; }
    bool isImage()   const { return op > EOpImageGuardBegin   && op < EOpImageGuardEnd; }
+    bool isSparseTexture() const { return op > EOpSparseTextureGuardBegin && op < EOpSparseTextureGuardEnd; }
+    bool isSparseImage()   const { return op == EOpSparseImageLoad; }

    // Crack the op into the individual dimensions of texturing operation.
    void crackTexture(TSampler sampler, TCrackedTextureOp& cracked) const
@@ -649,6 +679,7 @@ public:
        cracked.offsets = false;
        cracked.gather = false;
        cracked.grad = false;
+        cracked.lodClamp = false;

        switch (op) {
        case EOpImageQuerySize:
@@ -657,25 +688,40 @@ public:
        case EOpTextureQueryLod:
        case EOpTextureQueryLevels:
        case EOpTextureQuerySamples:
+        case EOpSparseTexelsResident:
            cracked.query = true;
            break;
        case EOpTexture:
+        case EOpSparseTexture:
+            break;
+        case EOpTextureClamp:
+        case EOpSparseTextureClamp:
+            cracked.lodClamp = true;
            break;
        case EOpTextureProj:
            cracked.proj = true;
            break;
        case EOpTextureLod:
+        case EOpSparseTextureLod:
            cracked.lod = true;
            break;
        case EOpTextureOffset:
+        case EOpSparseTextureOffset:
            cracked.offset = true;
            break;
+        case EOpTextureOffsetClamp:
+        case EOpSparseTextureOffsetClamp:
+            cracked.offset = true;
+            cracked.lodClamp = true;
+            break;
        case EOpTextureFetch:
+        case EOpSparseTextureFetch:
            cracked.fetch = true;
            if (sampler.dim == Esd1D || (sampler.dim == Esd2D && ! sampler.ms) || sampler.dim == Esd3D)
                cracked.lod = true;
            break;
        case EOpTextureFetchOffset:
+        case EOpSparseTextureFetchOffset:
            cracked.fetch = true;
            cracked.offset = true;
            if (sampler.dim == Esd1D || (sampler.dim == Esd2D && ! sampler.ms) || sampler.dim == Esd3D)
@@ -686,6 +732,7 @@ public:
            cracked.proj = true;
            break;
        case EOpTextureLodOffset:
+        case EOpSparseTextureLodOffset:
            cracked.offset = true;
            cracked.lod = true;
            break;
@@ -699,9 +746,16 @@ public:
            cracked.proj = true;
            break;
        case EOpTextureGrad:
+        case EOpSparseTextureGrad:
            cracked.grad = true;
            break;
+        case EOpTextureGradClamp:
+        case EOpSparseTextureGradClamp:
+            cracked.grad = true;
+            cracked.lodClamp = true;
+            break;
        case EOpTextureGradOffset:
+        case EOpSparseTextureGradOffset:
            cracked.grad = true;
            cracked.offset = true;
            break;
@@ -714,14 +768,23 @@ public:
            cracked.offset = true;
            cracked.proj = true;
            break;
+        case EOpTextureGradOffsetClamp:
+        case EOpSparseTextureGradOffsetClamp:
+            cracked.grad = true;
+            cracked.offset = true;
+            cracked.lodClamp = true;
+            break;
        case EOpTextureGather:
+        case EOpSparseTextureGather:
            cracked.gather = true;
            break;
        case EOpTextureGatherOffset:
+        case EOpSparseTextureGatherOffset:
            cracked.gather = true;
            cracked.offset = true;
            break;
        case EOpTextureGatherOffsets:
+        case EOpSparseTextureGatherOffsets:
            cracked.gather = true;
            cracked.offsets = true;
            break;

--- a/glslang/MachineIndependent/Initialize.cpp
+++ b/glslang/MachineIndependent/Initialize.cpp
--- a/glslang/MachineIndependent/Scan.cpp
+++ b/glslang/MachineIndependent/Scan.cpp
@@ -309,7 +309,7 @@ struct str_hash
        unsigned long hash = 5381;
        int c;

-        while ((c = *str++))
+        while ((c = *str++) != 0)
            hash = ((hash << 5) + hash) + c;

        return hash;

--- a/glslang/MachineIndependent/Versions.cpp
+++ b/glslang/MachineIndependent/Versions.cpp
@@ -173,6 +173,8 @@ void TParseContext::initializeExtensionBehavior()
    extensionBehavior[E_GL_ARB_derivative_control]           = EBhDisable;
    extensionBehavior[E_GL_ARB_shader_texture_image_samples] = EBhDisable;
    extensionBehavior[E_GL_ARB_viewport_array]               = EBhDisable;
+    extensionBehavior[E_GL_ARB_sparse_texture2]              = EBhDisable;
+    extensionBehavior[E_GL_ARB_sparse_texture_clamp]         = EBhDisable;
 //    extensionBehavior[E_GL_ARB_cull_distance]                = EBhDisable;    // present for 4.5, but need extension control over block members

    // #line and #include
@@ -274,6 +276,8 @@ const char* TParseContext::getPreamble()
            "#define GL_ARB_derivative_control 1\n"
            "#define GL_ARB_shader_texture_image_samples 1\n"
            "#define GL_ARB_viewport_array 1\n"
+            "#define GL_ARB_sparse_texture2 1\n"
+            "#define GL_ARB_sparse_texture_clamp 1\n"

            "#define GL_GOOGLE_cpp_style_line_directive 1\n"
            "#define GL_GOOGLE_include_directive 1\n"

--- a/glslang/MachineIndependent/Versions.h
+++ b/glslang/MachineIndependent/Versions.h
@@ -111,6 +111,8 @@ const char* const E_GL_ARB_shader_draw_parameters       = "GL_ARB_shader_draw_pa
 const char* const E_GL_ARB_derivative_control           = "GL_ARB_derivative_control";
 const char* const E_GL_ARB_shader_texture_image_samples = "GL_ARB_shader_texture_image_samples";
 const char* const E_GL_ARB_viewport_array               = "GL_ARB_viewport_array";
+const char* const E_GL_ARB_sparse_texture2              = "GL_ARB_sparse_texture2";
+const char* const E_GL_ARB_sparse_texture_clamp         = "GL_ARB_sparse_texture_clamp";
 //const char* const E_GL_ARB_cull_distance            = "GL_ARB_cull_distance";  // present for 4.5, but need extension control over block members

 // #line and #include

--- a/glslang/MachineIndependent/linkValidate.cpp
+++ b/glslang/MachineIndependent/linkValidate.cpp
@@ -959,6 +959,11 @@ int TIntermediate::getBaseAlignment(const TType& type, int& size, int& stride, b
            size += memberSize;
        }

+        // The structure may have padding at the end; the base offset of
+        // the member following the sub-structure is rounded up to the next
+        // multiple of the base alignment of the structure.
+        RoundToPow2(size, maxAlignment);
+
        return maxAlignment;
    }

@@ -982,7 +987,7 @@ int TIntermediate::getBaseAlignment(const TType& type, int& size, int& stride, b
    // rules 5 and 7
    if (type.isMatrix()) {
        // rule 5: deref to row, not to column, meaning the size of vector is num columns instead of num rows
-        TType derefType(type, 0, type.getQualifier().layoutMatrix == ElmRowMajor);
+        TType derefType(type, 0, rowMajor);
            
        alignment = getBaseAlignment(derefType, size, dummyStride, std140, rowMajor);
        if (std140)

--- a/glslang/MachineIndependent/preprocessor/PpTokens.cpp
+++ b/glslang/MachineIndependent/preprocessor/PpTokens.cpp
@@ -195,7 +195,7 @@ int TPpContext::ReadToken(TokenStream *pTok, TPpToken *ppToken)
    case PpAtomConstUint:
        len = 0;
        ch = lReadByte(pTok);
-        while (ch != 0) {
+        while (ch != 0 && ch != EndOfInput) {
            if (len < MaxTokenLength) {
                tokenText[len] = (char)ch;
                len++;
@@ -215,12 +215,10 @@ int TPpContext::ReadToken(TokenStream *pTok, TPpToken *ppToken)
            break;
        case PpAtomConstFloat:
        case PpAtomConstDouble:
-            strcpy(ppToken->name, tokenText);
            ppToken->dval = atof(ppToken->name);
            break;
        case PpAtomConstInt:
        case PpAtomConstUint:
-            strcpy(ppToken->name, tokenText);
            if (len > 0 && tokenText[0] == '0') {
                if (len > 1 && (tokenText[1] == 'x' || tokenText[1] == 'X'))
                    ppToken->ival = strtol(ppToken->name, 0, 16);

--- a/glslang/OSDependent/Windows/ossource.cpp
+++ b/glslang/OSDependent/Windows/ossource.cpp
@@ -147,6 +147,8 @@ void OS_Sleep(int milliseconds)
    Sleep(milliseconds);
 }

+//#define DUMP_COUNTERS
+
 void OS_DumpMemoryCounters()
 {
 #ifdef DUMP_COUNTERS