Commit e1a763d1 by Shahbaz Youssefi Committed by Commit Bot

Vulkan: Implement basic barrier perf test

There's a lot more that can go into this perf test, but it requires further work on the Vulkan back end. Bug: angleproject:2999 Change-Id: Iea62bfd09639af108674dcf0a9e7c9d36ccddcef Reviewed-on: https://chromium-review.googlesource.com/c/1437734 Commit-Queue: Shahbaz Youssefi <syoussefi@chromium.org> Reviewed-by: 's avatarYuly Novikov <ynovikov@chromium.org>
parent e923a63e
......@@ -18,8 +18,8 @@ import re
base_path = os.path.abspath(os.path.join(os.path.dirname(os.path.abspath(__file__)), '..'))
# Might have to add lower case "release" in some configurations.
perftests_paths = glob.glob('out/*Release*')
# Look for a [Rr]elease build.
perftests_paths = glob.glob('out/*elease*')
metric = 'wall_time'
max_experiments = 10
......
......@@ -25,6 +25,7 @@ angle_perf_tests_sources = [
"perf_tests/TextureSampling.cpp",
"perf_tests/TexturesPerf.cpp",
"perf_tests/UniformsPerf.cpp",
"perf_tests/VulkanBarriersPerf.cpp",
"perf_tests/third_party/perf/perf_test.cc",
"perf_tests/third_party/perf/perf_test.h",
"test_utils/angle_test_configs.cpp",
......
......@@ -44,7 +44,7 @@ class PointSpritesTest : public ANGLETest
float s2p(float s) { return (s + 1.0f) * 0.5f * (GLfloat)windowWidth; }
void testPointCoordAndPointSizeCompliance(priv::GLProgram program)
void testPointCoordAndPointSizeCompliance(GLProgram program)
{
glUseProgram(program);
......
//
// Copyright 2019 The ANGLE Project Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
//
// VulkanBarriersPerf:
// Performance tests for ANGLE's Vulkan backend w.r.t barrier efficiency.
//
#include <sstream>
#include "ANGLEPerfTest.h"
#include "test_utils/gl_raii.h"
#include "util/shader_utils.h"
using namespace angle;
namespace
{
constexpr unsigned int kIterationsPerStep = 10;
struct VulkanBarriersPerfParams final : public RenderTestParams
{
VulkanBarriersPerfParams(bool largeTransfers, bool slowFS)
{
iterationsPerStep = kIterationsPerStep;
// Common default parameters
eglParameters = egl_platform::VULKAN();
majorVersion = 2;
minorVersion = 0;
windowWidth = 256;
windowHeight = 256;
trackGpuTime = true;
doLargeTransfers = largeTransfers;
doSlowFragmentShaders = slowFS;
}
std::string suffix() const override;
// Static parameters
static constexpr int kImageSizes[3] = {256, 512, 4096};
bool doLargeTransfers;
bool doSlowFragmentShaders;
};
constexpr int VulkanBarriersPerfParams::kImageSizes[];
std::ostream &operator<<(std::ostream &os, const VulkanBarriersPerfParams &params)
{
os << params.suffix().substr(1);
return os;
}
class VulkanBarriersPerfBenchmark : public ANGLERenderTest,
public ::testing::WithParamInterface<VulkanBarriersPerfParams>
{
public:
VulkanBarriersPerfBenchmark();
void initializeBenchmark() override;
void destroyBenchmark() override;
void drawBenchmark() override;
private:
void createTexture(uint32_t textureIndex, uint32_t sizeIndex, bool compressed);
void createFramebuffer(uint32_t fboIndex, uint32_t textureIndex, uint32_t sizeIndex);
void createResources();
// Handle to the program object
GLProgram mProgram;
// Attribute locations
GLint mPositionLoc;
GLint mTexCoordLoc;
// Sampler location
GLint mSamplerLoc;
// Texture handles
GLTexture mTextures[4];
// Framebuffer handles
GLFramebuffer mFbos[2];
// Buffer handle
GLBuffer mVertexBuffer;
GLBuffer mIndexBuffer;
static constexpr size_t kSmallFboIndex = 0;
static constexpr size_t kLargeFboIndex = 1;
static constexpr size_t kSmallTextureIndex = 0;
static constexpr size_t kLargeTextureIndex = 1;
static constexpr size_t kTransferTexture1Index = 2;
static constexpr size_t kTransferTexture2Index = 3;
static constexpr size_t kSmallSizeIndex = 0;
static constexpr size_t kLargeSizeIndex = 1;
static constexpr size_t kHugeSizeIndex = 2;
};
std::string VulkanBarriersPerfParams::suffix() const
{
std::ostringstream sout;
sout << RenderTestParams::suffix();
if (doLargeTransfers)
{
sout << "_transfer";
}
if (doSlowFragmentShaders)
{
sout << "_slowfs";
}
return sout.str();
}
VulkanBarriersPerfBenchmark::VulkanBarriersPerfBenchmark()
: ANGLERenderTest("VulkanBarriersPerf", GetParam()),
mPositionLoc(-1),
mTexCoordLoc(-1),
mSamplerLoc(-1)
{}
constexpr char kVS[] = R"(attribute vec4 a_position;
attribute vec2 a_texCoord;
varying vec2 v_texCoord;
void main()
{
gl_Position = a_position;
v_texCoord = a_texCoord;
})";
constexpr char kShortFS[] = R"(precision mediump float;
varying vec2 v_texCoord;
uniform sampler2D s_texture;
void main()
{
gl_FragColor = texture2D(s_texture, v_texCoord);
})";
constexpr char kSlowFS[] = R"(precision mediump float;
varying vec2 v_texCoord;
uniform sampler2D s_texture;
void main()
{
vec4 outColor = vec4(0);
if (v_texCoord.x < 0.2)
{
for (int i = 0; i < 100; ++i)
{
outColor += texture2D(s_texture, v_texCoord);
}
}
gl_FragColor = outColor;
})";
void VulkanBarriersPerfBenchmark::createTexture(uint32_t textureIndex,
uint32_t sizeIndex,
bool compressed)
{
const auto &params = GetParam();
// TODO(syoussefi): compressed copy using vkCmdCopyImage not yet implemented in the vulkan
// backend. http://anglebug.com/2999
glBindTexture(GL_TEXTURE_2D, mTextures[textureIndex]);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, params.kImageSizes[sizeIndex],
params.kImageSizes[sizeIndex], 0, GL_RGBA, GL_UNSIGNED_BYTE, nullptr);
// Disable mipmapping
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
}
void VulkanBarriersPerfBenchmark::createFramebuffer(uint32_t fboIndex,
uint32_t textureIndex,
uint32_t sizeIndex)
{
createTexture(textureIndex, sizeIndex, false);
glBindFramebuffer(GL_FRAMEBUFFER, mFbos[fboIndex]);
glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D,
mTextures[textureIndex], 0);
}
void VulkanBarriersPerfBenchmark::createResources()
{
const auto &params = GetParam();
mProgram.makeRaster(kVS, params.doSlowFragmentShaders ? kSlowFS : kShortFS);
ASSERT_TRUE(mProgram.valid());
// Get the attribute locations
mPositionLoc = glGetAttribLocation(mProgram, "a_position");
mTexCoordLoc = glGetAttribLocation(mProgram, "a_texCoord");
// Get the sampler location
mSamplerLoc = glGetUniformLocation(mProgram, "s_texture");
// Build the vertex buffer
GLfloat vertices[] = {
-0.5f, 0.5f, 0.0f, // Position 0
0.0f, 0.0f, // TexCoord 0
-0.5f, -0.5f, 0.0f, // Position 1
0.0f, 1.0f, // TexCoord 1
0.5f, -0.5f, 0.0f, // Position 2
1.0f, 1.0f, // TexCoord 2
0.5f, 0.5f, 0.0f, // Position 3
1.0f, 0.0f // TexCoord 3
};
glBindBuffer(GL_ARRAY_BUFFER, mVertexBuffer);
glBufferData(GL_ARRAY_BUFFER, sizeof(vertices), vertices, GL_STATIC_DRAW);
GLushort indices[] = {0, 1, 2, 0, 2, 3};
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, mIndexBuffer);
glBufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(indices), indices, GL_STATIC_DRAW);
// Use tightly packed data
glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
// Create four textures. Two of them are going to be framebuffers, and two are used for large
// transfers.
createFramebuffer(kSmallFboIndex, kSmallTextureIndex, kSmallSizeIndex);
createFramebuffer(kLargeFboIndex, kLargeTextureIndex, kLargeSizeIndex);
if (params.doLargeTransfers)
{
createTexture(kTransferTexture1Index, kHugeSizeIndex, true);
createTexture(kTransferTexture2Index, kHugeSizeIndex, true);
}
}
void VulkanBarriersPerfBenchmark::initializeBenchmark()
{
createResources();
glClearColor(0.0f, 0.0f, 0.0f, 0.0f);
ASSERT_GL_NO_ERROR();
}
void VulkanBarriersPerfBenchmark::destroyBenchmark() {}
void VulkanBarriersPerfBenchmark::drawBenchmark()
{
const auto &params = GetParam();
glUseProgram(mProgram);
// Bind the buffers
glBindBuffer(GL_ARRAY_BUFFER, mVertexBuffer);
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, mIndexBuffer);
// Load the vertex position
glVertexAttribPointer(mPositionLoc, 3, GL_FLOAT, GL_FALSE, 5 * sizeof(GLfloat), 0);
// Load the texture coordinate
glVertexAttribPointer(mTexCoordLoc, 2, GL_FLOAT, GL_FALSE, 5 * sizeof(GLfloat),
reinterpret_cast<void *>(3 * sizeof(GLfloat)));
glEnableVertexAttribArray(mPositionLoc);
glEnableVertexAttribArray(mTexCoordLoc);
// Set the texture sampler to texture unit to 0
glUniform1i(mSamplerLoc, 0);
/*
* The perf benchmark does the following:
*
* - Alternately clear and draw from fbo 1 into fbo 2 and back. This would use the color
* attachment and shader read-only layouts in the fragment shader and color attachment stages.
*
* Once compressed texture copies are supported, alternately transfer large chunks of data from
* texture 1 into texture 2 and back. This would use the transfer layouts in the transfer
* stage.
*
* Once compute shader support is added, another independent set of operations could be a few
* dispatches. This would use the general and shader read-only layouts in the compute stage.
*
* The idea is to create independent pipelines of operations that would run in parallel on the
* GPU. Regressions or inefficiencies in the barrier implementation could result in
* serialization of these jobs, resulting in a hit in performance.
*
* The above operations for example should ideally run on the GPU threads in parallel:
*
* + |---draw---||---draw---||---draw---||---draw---||---draw---|
* + |-----------transfer------------||-----------transfer------------|
* + |-----dispatch------||------dispatch------||------dispatch------|
*
* If barriers are too restrictive, situations like this could happen (draw is blocking
* transfer):
*
* + |---draw---||---draw---||---draw---||---draw---||---draw---|
* + |-----------transfer------------||-----------transfer------------|
*
* Or like this (transfer is blocking draw):
*
* + |---draw---| |---draw---| |---draw---|
* + |-----------transfer------------||-----------transfer------------|
*
* Or like this (draw and transfer blocking each other):
*
* + |---draw---| |---draw---|
* + |-----------transfer------------| |-----------transfer------------|
*
* The idea of doing slow FS calls is to make the second case above slower (by making the draw
* slower than the transfer):
*
* + |------------------draw------------------| |-...draw...-|
* + |-----------transfer------------| |-----------transfer------------|
*/
startGpuTimer();
for (unsigned int iteration = 0; iteration < params.iterationsPerStep; ++iteration)
{
bool altEven = iteration % 2 == 0;
const int fboDestIndex = altEven ? kLargeFboIndex : kSmallFboIndex;
const int fboTexSrcIndex = altEven ? kSmallTextureIndex : kLargeTextureIndex;
const int fboDestSizeIndex = altEven ? kLargeSizeIndex : kSmallSizeIndex;
// Set the viewport
glViewport(0, 0, fboDestSizeIndex, fboDestSizeIndex);
// Clear the color buffer
glClear(GL_COLOR_BUFFER_BIT);
// Bind the framebuffer
glBindFramebuffer(GL_FRAMEBUFFER, mFbos[fboDestIndex]);
// Bind the texture
glActiveTexture(GL_TEXTURE0);
glBindTexture(GL_TEXTURE_2D, mTextures[fboTexSrcIndex]);
ASSERT_GL_NO_ERROR();
glDrawElements(GL_TRIANGLES, 6, GL_UNSIGNED_SHORT, 0);
}
stopGpuTimer();
ASSERT_GL_NO_ERROR();
}
} // namespace
TEST_P(VulkanBarriersPerfBenchmark, Run)
{
run();
}
ANGLE_INSTANTIATE_TEST(VulkanBarriersPerfBenchmark,
VulkanBarriersPerfParams(false, false),
VulkanBarriersPerfParams(true, false),
VulkanBarriersPerfParams(true, true));
......@@ -128,9 +128,7 @@ class GLShader : angle::NonCopyable
GLuint mHandle;
};
// Don't use GLProgram directly, use ANGLE_GL_PROGRAM.
namespace priv
{
// Prefer ANGLE_GL_PROGRAM for local variables.
class GLProgram
{
public:
......@@ -182,40 +180,39 @@ class GLProgram
private:
GLuint mHandle;
};
} // namespace priv
#define ANGLE_GL_EMPTY_PROGRAM(name) \
priv::GLProgram name; \
GLProgram name; \
name.makeEmpty(); \
ASSERT_TRUE(name.valid())
#define ANGLE_GL_PROGRAM(name, vertex, fragment) \
priv::GLProgram name; \
GLProgram name; \
name.makeRaster(vertex, fragment); \
ASSERT_TRUE(name.valid())
#define ANGLE_GL_PROGRAM_WITH_GS(name, vertex, geometry, fragment) \
priv::GLProgram name; \
GLProgram name; \
name.makeRaster(vertex, geometry, fragment); \
ASSERT_TRUE(name.valid())
#define ANGLE_GL_PROGRAM_TRANSFORM_FEEDBACK(name, vertex, fragment, tfVaryings, bufferMode) \
priv::GLProgram name; \
GLProgram name; \
name.makeRasterWithTransformFeedback(vertex, fragment, tfVaryings, bufferMode); \
ASSERT_TRUE(name.valid())
#define ANGLE_GL_COMPUTE_PROGRAM(name, compute) \
priv::GLProgram name; \
GLProgram name; \
name.makeCompute(compute); \
ASSERT_TRUE(name.valid())
#define ANGLE_GL_BINARY_OES_PROGRAM(name, binary, binaryFormat) \
priv::GLProgram name; \
GLProgram name; \
name.makeBinaryOES(binary, binaryFormat); \
ASSERT_TRUE(name.valid())
#define ANGLE_GL_BINARY_ES3_PROGRAM(name, binary, binaryFormat) \
priv::GLProgram name; \
GLProgram name; \
name.makeBinaryES3(binary, binaryFormat); \
ASSERT_TRUE(name.valid())
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment