Commit 1b900878 by Alexis Hetu Committed by Alexis Hétu

Implement ASTC support

This cl adds both LDR and HDR support for ASTC compressed textures. Only LDR formats are exposed in the PhysicalDevice's properties, but HDR support can be added trivially later by adding the HDR formats and exposing "VK_EXT_texture_compression_astc_hdr". Pulled from: https://github.com/ARM-software/astc-encoder Git hash: 81a5e50741b4c8302cf7d78f314a53e44ee68e1f The astc-encode git repo was added to third-party, with a few minor modifications: 1) All encoding related code has been ripped out, only decoding related code remains 2) Replaced ASTC_CODEC_INTERNAL_ERROR() with UNREACHABLE() in a switch statement in astc_color_unquantize.cpp 3) Some functions were using a lot of stack memory, so I added a unique_ptr to allocate the same objects on the heap, to avoid potential issues. LDR ASTC is decoded to 8bit unsigned RGBA. HDR ASTC is decoded to 32b floating point. Tests: dEQP-VK.*astc* Bug: b/150130101 Change-Id: I6b03fed6e1f326a95c7aefe9f9a9d0a89cf24428 Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/41568Reviewed-by: 's avatarNicolas Capens <nicolascapens@google.com> Tested-by: 's avatarNicolas Capens <nicolascapens@google.com> Kokoro-Presubmit: kokoro <noreply+kokoro@google.com>
parent 3e5fe285
......@@ -330,6 +330,7 @@ set(OPENGL_DIR ${SOURCE_DIR}/OpenGL)
set(OPENGL_COMPILER_DIR ${OPENGL_DIR}/compiler)
set(VULKAN_DIR ${SOURCE_DIR}/Vulkan)
set(THIRD_PARTY_DIR ${CMAKE_CURRENT_SOURCE_DIR}/third_party)
set(ASTC_DIR ${THIRD_PARTY_DIR}/astc-encoder)
set(LLVM_DIR ${THIRD_PARTY_DIR}/llvm-7.0/llvm)
set(LLVM_CONFIG_DIR ${THIRD_PARTY_DIR}/llvm-7.0/configs)
set(LIBBACKTRACE_DIR ${THIRD_PARTY_DIR}/libbacktrace/src)
......@@ -1865,6 +1866,8 @@ file(GLOB VULKAN_LIST
${SOURCE_DIR}/WSI/VkSurfaceKHR.hpp
${SOURCE_DIR}/WSI/VkSwapchainKHR.cpp
${SOURCE_DIR}/WSI/VkSwapchainKHR.hpp
${ASTC_DIR}/Source/*.cpp
${ASTC_DIR}/Source/*.h
${CMAKE_CURRENT_SOURCE_DIR}/include/vulkan/*.h}
)
......
......@@ -589,6 +589,28 @@ cc_defaults {
},
}
cc_library_static {
name: "swiftshader_debug",
vendor_available: true,
cflags: [
"-DLOG_TAG=\"swiftshader\"",
],
srcs: [
"System/Debug.cpp",
],
export_include_dirs: [
".",
],
shared_libs: [
"liblog",
],
}
cc_defaults {
name: "libvk_swiftshader_common_defaults",
......@@ -601,7 +623,6 @@ cc_defaults {
"System/Build.cpp",
"System/Configurator.cpp",
"System/CPUID.cpp",
"System/Debug.cpp",
"System/GrallocAndroid.cpp",
"System/Half.cpp",
"System/Linux/MemFd.cpp",
......@@ -615,6 +636,8 @@ cc_defaults {
],
static_libs: [
"swiftshader_astc",
"swiftshader_debug",
"swiftshader_marl",
"swiftshader_spirv-tools",
],
......
// Copyright 2020 The SwiftShader Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "ASTC_Decoder.hpp"
#include "../third_party/astc-encoder/Source/astc_codec_internals.h"
#include "System/Math.hpp"
#include <memory>
#include <unordered_map>
namespace {
void write_imageblock(unsigned char *img,
// picture-block to initialize with image data. We assume that orig_data is valid
const imageblock *pb,
// output dimensions
int xsize, int ysize, int zsize,
// output format
int bytes, int destPitchB, int destSliceB, bool isUnsignedByte,
// block dimensions
int xdim, int ydim, int zdim,
// position to write the block to
int xpos, int ypos, int zpos)
{
const float *fptr = pb->orig_data;
const uint8_t *nptr = pb->nan_texel;
for(int z = 0; z < zdim; z++)
{
for(int y = 0; y < ydim; y++)
{
for(int x = 0; x < xdim; x++)
{
int xi = xpos + x;
int yi = ypos + y;
int zi = zpos + z;
if(xi >= 0 && yi >= 0 && zi >= 0 && xi < xsize && yi < ysize && zi < zsize)
{
unsigned char *pix = &img[zi * destSliceB + yi * destPitchB + xi * bytes];
if(isUnsignedByte)
{
if(*nptr)
{
// NaN-pixel, but we can't display it. Display purple instead.
pix[0] = 0xFF;
pix[1] = 0x00;
pix[2] = 0xFF;
pix[3] = 0xFF;
}
else
{
pix[0] = static_cast<unsigned char>(sw::clamp(fptr[0], 0.0f, 1.0f) * 255.0f + 0.5f);
pix[1] = static_cast<unsigned char>(sw::clamp(fptr[1], 0.0f, 1.0f) * 255.0f + 0.5f);
pix[2] = static_cast<unsigned char>(sw::clamp(fptr[2], 0.0f, 1.0f) * 255.0f + 0.5f);
pix[3] = static_cast<unsigned char>(sw::clamp(fptr[3], 0.0f, 1.0f) * 255.0f + 0.5f);
}
}
else
{
if(*nptr)
{
unsigned int *pixu = reinterpret_cast<unsigned int *>(pix);
pixu[0] = pixu[1] = pixu[2] = pixu[3] = 0x7FFFFFFF; // QNaN
}
else
{
float *pixf = reinterpret_cast<float *>(pix);
pixf[0] = fptr[0];
pixf[1] = fptr[1];
pixf[2] = fptr[2];
pixf[3] = fptr[3];
}
}
}
fptr += 4;
nptr++;
}
}
}
}
} // namespace
void ASTC_Decoder::Decode(const unsigned char *source, unsigned char *dest,
int destWidth, int destHeight, int destDepth,
int bytes, int destPitchB, int destSliceB,
int xBlockSize, int yBlockSize, int zBlockSize,
int xblocks, int yblocks, int zblocks, bool isUnsignedByte)
{
build_quantization_mode_table();
astc_decode_mode decode_mode = isUnsignedByte ? DECODE_LDR : DECODE_HDR;
std::unique_ptr<block_size_descriptor> bsd(new block_size_descriptor);
init_block_size_descriptor(xBlockSize, yBlockSize, zBlockSize, bsd.get());
std::unique_ptr<imageblock> ib(new imageblock);
std::unique_ptr<symbolic_compressed_block> scb(new symbolic_compressed_block);
for(int z = 0; z < zblocks; z++)
{
for(int y = 0; y < yblocks; y++)
{
for(int x = 0; x < xblocks; x++, source += 16)
{
physical_to_symbolic(bsd.get(), *(physical_compressed_block *)source, scb.get());
decompress_symbolic_block(decode_mode, bsd.get(), x * xBlockSize, y * yBlockSize, z * zBlockSize, scb.get(), ib.get());
write_imageblock(dest, ib.get(), destWidth, destHeight, destDepth, bytes, destPitchB, destSliceB, isUnsignedByte,
xBlockSize, yBlockSize, zBlockSize, x * xBlockSize, y * yBlockSize, z * zBlockSize);
}
}
}
term_block_size_descriptor(bsd.get());
}
// Copyright 2020 The SwiftShader Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
struct ASTC_Decoder
{
static void Decode(const unsigned char *source, unsigned char *dest,
int destWidth, int destHeight, int destDepth,
int bytes, int destPitchB, int destSliceB,
int xBlockSize, int yBlockSize, int zBlockSize,
int xblocks, int yblocks, int zblocks, bool isUnsignedByte);
};
\ No newline at end of file
......@@ -16,6 +16,7 @@ import("../swiftshader.gni")
swiftshader_source_set("Device_headers") {
sources = [
"ASTC_Decoder.hpp",
"BC_Decoder.hpp",
"Blitter.hpp",
"Clipper.hpp",
......@@ -30,11 +31,14 @@ swiftshader_source_set("Device_headers") {
"Renderer.hpp",
"SetupProcessor.hpp",
"VertexProcessor.hpp",
"../../third_party/astc-encoder/Source/astc_codec_internals.h",
"../../third_party/astc-encoder/Source/astc_mathlib.h",
]
}
swiftshader_source_set("Device") {
sources = [
"ASTC_Decoder.cpp",
"BC_Decoder.cpp",
"Blitter.cpp",
"Clipper.cpp",
......@@ -48,6 +52,18 @@ swiftshader_source_set("Device") {
"Renderer.cpp",
"SetupProcessor.cpp",
"VertexProcessor.cpp",
"../../third_party/astc-encoder/Source/astc_block_sizes2.cpp",
"../../third_party/astc-encoder/Source/astc_color_unquantize.cpp",
"../../third_party/astc-encoder/Source/astc_decompress_symbolic.cpp",
"../../third_party/astc-encoder/Source/astc_image_load_store.cpp",
"../../third_party/astc-encoder/Source/astc_integer_sequence.cpp",
"../../third_party/astc-encoder/Source/astc_mathlib.cpp",
"../../third_party/astc-encoder/Source/astc_mathlib_softfloat.cpp",
"../../third_party/astc-encoder/Source/astc_partition_tables.cpp",
"../../third_party/astc-encoder/Source/astc_percentile_tables.cpp",
"../../third_party/astc-encoder/Source/astc_quantization.cpp",
"../../third_party/astc-encoder/Source/astc_symbolic_physical.cpp",
"../../third_party/astc-encoder/Source/astc_weight_quant_xfer_tables.cpp",
]
include_dirs = [
......
......@@ -133,9 +133,6 @@ inline int ceilInt4(int x)
!!((x)&0xFFFFFFFE) + \
!!((x)&0xFFFFFFFF))
#define MAX(x, y) ((x) > (y) ? (x) : (y))
#define MIN(x, y) ((x) < (y) ? (x) : (y))
inline unsigned long log2i(int x)
{
#if defined(_MSC_VER)
......
......@@ -16,6 +16,7 @@
#include "VkBuffer.hpp"
#include "VkDevice.hpp"
#include "VkDeviceMemory.hpp"
#include "Device/ASTC_Decoder.hpp"
#include "Device/BC_Decoder.hpp"
#include "Device/Blitter.hpp"
#include "Device/ETC_Decoder.hpp"
......@@ -660,7 +661,15 @@ int Image::rowPitchBytes(VkImageAspectFlagBits aspect, uint32_t mipLevel) const
ASSERT((aspect & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) !=
(VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT));
return getFormat(aspect).pitchB(getMipLevelExtent(aspect, mipLevel).width, borderSize(), true);
VkExtent3D mipLevelExtent = getMipLevelExtent(aspect, mipLevel);
Format usedFormat = getFormat(aspect);
if(usedFormat.isCompressed())
{
VkExtent3D extentInBlocks = imageExtentInBlocks(mipLevelExtent, aspect);
return extentInBlocks.width * usedFormat.bytesPerBlock();
}
return usedFormat.pitchB(mipLevelExtent.width, borderSize(), true);
}
int Image::slicePitchBytes(VkImageAspectFlagBits aspect, uint32_t mipLevel) const
......@@ -673,8 +682,8 @@ int Image::slicePitchBytes(VkImageAspectFlagBits aspect, uint32_t mipLevel) cons
Format usedFormat = getFormat(aspect);
if(usedFormat.isCompressed())
{
sw::align(mipLevelExtent.width, usedFormat.blockWidth());
sw::align(mipLevelExtent.height, usedFormat.blockHeight());
VkExtent3D extentInBlocks = imageExtentInBlocks(mipLevelExtent, aspect);
return extentInBlocks.height * extentInBlocks.width * usedFormat.bytesPerBlock();
}
return usedFormat.sliceB(mipLevelExtent.width, mipLevelExtent.height, borderSize(), true);
......@@ -977,6 +986,50 @@ void Image::prepareForSampling(const VkImageSubresourceRange &subresourceRange)
case VK_FORMAT_BC5_SNORM_BLOCK:
decodeBC(subresourceRange);
break;
case VK_FORMAT_ASTC_4x4_UNORM_BLOCK:
case VK_FORMAT_ASTC_5x4_UNORM_BLOCK:
case VK_FORMAT_ASTC_5x5_UNORM_BLOCK:
case VK_FORMAT_ASTC_6x5_UNORM_BLOCK:
case VK_FORMAT_ASTC_6x6_UNORM_BLOCK:
case VK_FORMAT_ASTC_8x5_UNORM_BLOCK:
case VK_FORMAT_ASTC_8x6_UNORM_BLOCK:
case VK_FORMAT_ASTC_8x8_UNORM_BLOCK:
case VK_FORMAT_ASTC_10x5_UNORM_BLOCK:
case VK_FORMAT_ASTC_10x6_UNORM_BLOCK:
case VK_FORMAT_ASTC_10x8_UNORM_BLOCK:
case VK_FORMAT_ASTC_10x10_UNORM_BLOCK:
case VK_FORMAT_ASTC_12x10_UNORM_BLOCK:
case VK_FORMAT_ASTC_12x12_UNORM_BLOCK:
case VK_FORMAT_ASTC_4x4_SRGB_BLOCK:
case VK_FORMAT_ASTC_5x4_SRGB_BLOCK:
case VK_FORMAT_ASTC_5x5_SRGB_BLOCK:
case VK_FORMAT_ASTC_6x5_SRGB_BLOCK:
case VK_FORMAT_ASTC_6x6_SRGB_BLOCK:
case VK_FORMAT_ASTC_8x5_SRGB_BLOCK:
case VK_FORMAT_ASTC_8x6_SRGB_BLOCK:
case VK_FORMAT_ASTC_8x8_SRGB_BLOCK:
case VK_FORMAT_ASTC_10x5_SRGB_BLOCK:
case VK_FORMAT_ASTC_10x6_SRGB_BLOCK:
case VK_FORMAT_ASTC_10x8_SRGB_BLOCK:
case VK_FORMAT_ASTC_10x10_SRGB_BLOCK:
case VK_FORMAT_ASTC_12x10_SRGB_BLOCK:
case VK_FORMAT_ASTC_12x12_SRGB_BLOCK:
case VK_FORMAT_ASTC_4x4_SFLOAT_BLOCK_EXT:
case VK_FORMAT_ASTC_5x4_SFLOAT_BLOCK_EXT:
case VK_FORMAT_ASTC_5x5_SFLOAT_BLOCK_EXT:
case VK_FORMAT_ASTC_6x5_SFLOAT_BLOCK_EXT:
case VK_FORMAT_ASTC_6x6_SFLOAT_BLOCK_EXT:
case VK_FORMAT_ASTC_8x5_SFLOAT_BLOCK_EXT:
case VK_FORMAT_ASTC_8x6_SFLOAT_BLOCK_EXT:
case VK_FORMAT_ASTC_8x8_SFLOAT_BLOCK_EXT:
case VK_FORMAT_ASTC_10x5_SFLOAT_BLOCK_EXT:
case VK_FORMAT_ASTC_10x6_SFLOAT_BLOCK_EXT:
case VK_FORMAT_ASTC_10x8_SFLOAT_BLOCK_EXT:
case VK_FORMAT_ASTC_10x10_SFLOAT_BLOCK_EXT:
case VK_FORMAT_ASTC_12x10_SFLOAT_BLOCK_EXT:
case VK_FORMAT_ASTC_12x12_SFLOAT_BLOCK_EXT:
decodeASTC(subresourceRange);
break;
default:
break;
}
......@@ -1087,4 +1140,49 @@ void Image::decodeBC(const VkImageSubresourceRange &subresourceRange) const
}
}
void Image::decodeASTC(const VkImageSubresourceRange &subresourceRange) const
{
ASSERT(decompressedImage);
int xBlockSize = format.blockWidth();
int yBlockSize = format.blockHeight();
int zBlockSize = 1;
bool isUnsigned = format.isUnsignedComponent(0);
uint32_t lastLayer = getLastLayerIndex(subresourceRange);
uint32_t lastMipLevel = getLastMipLevel(subresourceRange);
int bytes = decompressedImage->format.bytes();
VkImageSubresourceLayers subresourceLayers = { subresourceRange.aspectMask, subresourceRange.baseMipLevel, subresourceRange.baseArrayLayer, 1 };
for(; subresourceLayers.baseArrayLayer <= lastLayer; subresourceLayers.baseArrayLayer++)
{
for(; subresourceLayers.mipLevel <= lastMipLevel; subresourceLayers.mipLevel++)
{
VkExtent3D mipLevelExtent = getMipLevelExtent(static_cast<VkImageAspectFlagBits>(subresourceLayers.aspectMask), subresourceLayers.mipLevel);
int xblocks = (mipLevelExtent.width + xBlockSize - 1) / xBlockSize;
int yblocks = (mipLevelExtent.height + yBlockSize - 1) / yBlockSize;
int zblocks = (zBlockSize > 1) ? (mipLevelExtent.depth + zBlockSize - 1) / zBlockSize : 1;
if(xblocks <= 0 || yblocks <= 0 || zblocks <= 0)
{
continue;
}
int pitchB = decompressedImage->rowPitchBytes(VK_IMAGE_ASPECT_COLOR_BIT, subresourceLayers.mipLevel);
int sliceB = decompressedImage->slicePitchBytes(VK_IMAGE_ASPECT_COLOR_BIT, subresourceLayers.mipLevel);
for(int32_t depth = 0; depth < static_cast<int32_t>(mipLevelExtent.depth); depth++)
{
uint8_t *source = static_cast<uint8_t *>(getTexelPointer({ 0, 0, depth }, subresourceLayers));
uint8_t *dest = static_cast<uint8_t *>(decompressedImage->getTexelPointer({ 0, 0, depth }, subresourceLayers));
ASTC_Decoder::Decode(source, dest, mipLevelExtent.width, mipLevelExtent.height, mipLevelExtent.depth, bytes, pitchB, sliceB,
xBlockSize, yBlockSize, zBlockSize, xblocks, yblocks, zblocks, isUnsigned);
}
}
}
}
} // namespace vk
......@@ -113,6 +113,7 @@ private:
int borderSize() const;
void decodeETC2(const VkImageSubresourceRange &subresourceRange) const;
void decodeBC(const VkImageSubresourceRange &subresourceRange) const;
void decodeASTC(const VkImageSubresourceRange &subresourceRange) const;
const Device *const device = nullptr;
DeviceMemory *deviceMemory = nullptr;
......
......@@ -76,7 +76,7 @@ const VkPhysicalDeviceFeatures &PhysicalDevice::getFeatures() const
VK_FALSE, // multiViewport
VK_TRUE, // samplerAnisotropy
VK_TRUE, // textureCompressionETC2
VK_FALSE, // textureCompressionASTC_LDR
VK_TRUE, // textureCompressionASTC_LDR
VK_FALSE, // textureCompressionBC
VK_FALSE, // occlusionQueryPrecise
VK_FALSE, // pipelineStatisticsQuery
......@@ -527,6 +527,34 @@ void PhysicalDevice::getFormatProperties(Format format, VkFormatProperties *pFor
case VK_FORMAT_EAC_R11_SNORM_BLOCK:
case VK_FORMAT_EAC_R11G11_UNORM_BLOCK:
case VK_FORMAT_EAC_R11G11_SNORM_BLOCK:
case VK_FORMAT_ASTC_4x4_UNORM_BLOCK:
case VK_FORMAT_ASTC_5x4_UNORM_BLOCK:
case VK_FORMAT_ASTC_5x5_UNORM_BLOCK:
case VK_FORMAT_ASTC_6x5_UNORM_BLOCK:
case VK_FORMAT_ASTC_6x6_UNORM_BLOCK:
case VK_FORMAT_ASTC_8x5_UNORM_BLOCK:
case VK_FORMAT_ASTC_8x6_UNORM_BLOCK:
case VK_FORMAT_ASTC_8x8_UNORM_BLOCK:
case VK_FORMAT_ASTC_10x5_UNORM_BLOCK:
case VK_FORMAT_ASTC_10x6_UNORM_BLOCK:
case VK_FORMAT_ASTC_10x8_UNORM_BLOCK:
case VK_FORMAT_ASTC_10x10_UNORM_BLOCK:
case VK_FORMAT_ASTC_12x10_UNORM_BLOCK:
case VK_FORMAT_ASTC_12x12_UNORM_BLOCK:
case VK_FORMAT_ASTC_4x4_SRGB_BLOCK:
case VK_FORMAT_ASTC_5x4_SRGB_BLOCK:
case VK_FORMAT_ASTC_5x5_SRGB_BLOCK:
case VK_FORMAT_ASTC_6x5_SRGB_BLOCK:
case VK_FORMAT_ASTC_6x6_SRGB_BLOCK:
case VK_FORMAT_ASTC_8x5_SRGB_BLOCK:
case VK_FORMAT_ASTC_8x6_SRGB_BLOCK:
case VK_FORMAT_ASTC_8x8_SRGB_BLOCK:
case VK_FORMAT_ASTC_10x5_SRGB_BLOCK:
case VK_FORMAT_ASTC_10x6_SRGB_BLOCK:
case VK_FORMAT_ASTC_10x8_SRGB_BLOCK:
case VK_FORMAT_ASTC_10x10_SRGB_BLOCK:
case VK_FORMAT_ASTC_12x10_SRGB_BLOCK:
case VK_FORMAT_ASTC_12x12_SRGB_BLOCK:
case VK_FORMAT_D16_UNORM:
case VK_FORMAT_D32_SFLOAT:
case VK_FORMAT_D32_SFLOAT_S8_UINT:
......
//
// Copyright (C) 2020 The Android Open Source Project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
cc_library_static {
name: "swiftshader_astc",
vendor_available: true,
cflags: [
"-DLOG_TAG=\"swiftshader\"",
],
srcs: [
"Source/*.cpp",
],
local_include_dirs: [
"Source",
],
static_libs: [
"swiftshader_debug",
]
}
\ No newline at end of file
// SPDX-License-Identifier: Apache-2.0
// ----------------------------------------------------------------------------
// Copyright 2011-2020 Arm Limited
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
// use this file except in compliance with the License. You may obtain a copy
// of the License at:
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
// License for the specific language governing permissions and limitations
// under the License.
// ----------------------------------------------------------------------------
/**
* @brief Functions to decompress a symbolic block.
*/
#include "astc_codec_internals.h"
static int compute_value_of_texel_int(
int texel_to_get,
const decimation_table* it,
const int* weights
) {
int i;
int summed_value = 8;
int weights_to_evaluate = it->texel_num_weights[texel_to_get];
for (i = 0; i < weights_to_evaluate; i++)
{
summed_value += weights[it->texel_weights[texel_to_get][i]] * it->texel_weights_int[texel_to_get][i];
}
return summed_value >> 4;
}
static uint4 lerp_color_int(
astc_decode_mode decode_mode,
uint4 color0,
uint4 color1,
int weight,
int plane2_weight,
int plane2_color_component // -1 in 1-plane mode
) {
int4 ecolor0 = int4(color0.x, color0.y, color0.z, color0.w);
int4 ecolor1 = int4(color1.x, color1.y, color1.z, color1.w);
int4 eweight1 = int4(weight, weight, weight, weight);
switch (plane2_color_component)
{
case 0:
eweight1.x = plane2_weight;
break;
case 1:
eweight1.y = plane2_weight;
break;
case 2:
eweight1.z = plane2_weight;
break;
case 3:
eweight1.w = plane2_weight;
break;
default:
break;
}
int4 eweight0 = int4(64, 64, 64, 64) - eweight1;
if (decode_mode == DECODE_LDR_SRGB)
{
ecolor0 = int4(ecolor0.x >> 8, ecolor0.y >> 8, ecolor0.z >> 8, ecolor0.w >> 8);
ecolor1 = int4(ecolor1.x >> 8, ecolor1.y >> 8, ecolor1.z >> 8, ecolor1.w >> 8);
}
int4 color = (ecolor0 * eweight0) + (ecolor1 * eweight1) + int4(32, 32, 32, 32);
color = int4(color.x >> 6, color.y >> 6, color.z >> 6, color.w >> 6);
if (decode_mode == DECODE_LDR_SRGB)
color = color * 257;
return uint4(color.x, color.y, color.z, color.w);
}
void decompress_symbolic_block(
astc_decode_mode decode_mode,
const block_size_descriptor* bsd,
int xpos,
int ypos,
int zpos,
const symbolic_compressed_block* scb,
imageblock* blk
) {
blk->xpos = xpos;
blk->ypos = ypos;
blk->zpos = zpos;
int i;
// if we detected an error-block, blow up immediately.
if (scb->error_block)
{
if (decode_mode == DECODE_LDR_SRGB)
{
for (i = 0; i < bsd->texel_count; i++)
{
blk->orig_data[4 * i] = 1.0f;
blk->orig_data[4 * i + 1] = 0.0f;
blk->orig_data[4 * i + 2] = 1.0f;
blk->orig_data[4 * i + 3] = 1.0f;
blk->rgb_lns[i] = 0;
blk->alpha_lns[i] = 0;
blk->nan_texel[i] = 0;
}
}
else
{
for (i = 0; i < bsd->texel_count; i++)
{
blk->orig_data[4 * i] = 0.0f;
blk->orig_data[4 * i + 1] = 0.0f;
blk->orig_data[4 * i + 2] = 0.0f;
blk->orig_data[4 * i + 3] = 0.0f;
blk->rgb_lns[i] = 0;
blk->alpha_lns[i] = 0;
blk->nan_texel[i] = 1;
}
}
imageblock_initialize_work_from_orig(blk, bsd->texel_count);
update_imageblock_flags(blk, bsd->xdim, bsd->ydim, bsd->zdim);
return;
}
if (scb->block_mode < 0)
{
float red = 0, green = 0, blue = 0, alpha = 0;
int use_lns = 0;
int use_nan = 0;
if (scb->block_mode == -2)
{
// For sRGB decoding, we should return only the top 8 bits.
int mask = (decode_mode == DECODE_LDR_SRGB) ? 0xFF00 : 0xFFFF;
red = sf16_to_float(unorm16_to_sf16(scb->constant_color[0] & mask));
green = sf16_to_float(unorm16_to_sf16(scb->constant_color[1] & mask));
blue = sf16_to_float(unorm16_to_sf16(scb->constant_color[2] & mask));
alpha = sf16_to_float(unorm16_to_sf16(scb->constant_color[3] & mask));
use_lns = 0;
use_nan = 0;
}
else
{
switch (decode_mode)
{
case DECODE_LDR_SRGB:
red = 1.0f;
green = 0.0f;
blue = 1.0f;
alpha = 1.0f;
use_lns = 0;
use_nan = 0;
break;
case DECODE_LDR:
red = 0.0f;
green = 0.0f;
blue = 0.0f;
alpha = 0.0f;
use_lns = 0;
use_nan = 1;
break;
case DECODE_HDR:
// constant-color block; unpack from FP16 to FP32.
red = sf16_to_float(scb->constant_color[0]);
green = sf16_to_float(scb->constant_color[1]);
blue = sf16_to_float(scb->constant_color[2]);
alpha = sf16_to_float(scb->constant_color[3]);
use_lns = 1;
use_nan = 0;
break;
}
}
for (i = 0; i < bsd->texel_count; i++)
{
blk->orig_data[4 * i] = red;
blk->orig_data[4 * i + 1] = green;
blk->orig_data[4 * i + 2] = blue;
blk->orig_data[4 * i + 3] = alpha;
blk->rgb_lns[i] = use_lns;
blk->alpha_lns[i] = use_lns;
blk->nan_texel[i] = use_nan;
}
imageblock_initialize_work_from_orig(blk, bsd->texel_count);
update_imageblock_flags(blk, bsd->xdim, bsd->ydim, bsd->zdim);
return;
}
// get the appropriate partition-table entry
int partition_count = scb->partition_count;
const partition_info *pt = get_partition_table(bsd, partition_count);
pt += scb->partition_index;
// get the appropriate block descriptor
const decimation_table *const *ixtab2 = bsd->decimation_tables;
const decimation_table *it = ixtab2[bsd->block_modes[scb->block_mode].decimation_mode];
int is_dual_plane = bsd->block_modes[scb->block_mode].is_dual_plane;
int weight_quantization_level = bsd->block_modes[scb->block_mode].quantization_mode;
// decode the color endpoints
uint4 color_endpoint0[4];
uint4 color_endpoint1[4];
int rgb_hdr_endpoint[4];
int alpha_hdr_endpoint[4];
int nan_endpoint[4];
for (i = 0; i < partition_count; i++)
unpack_color_endpoints(decode_mode,
scb->color_formats[i],
scb->color_quantization_level,
scb->color_values[i],
&(rgb_hdr_endpoint[i]),
&(alpha_hdr_endpoint[i]),
&(nan_endpoint[i]),
&(color_endpoint0[i]),
&(color_endpoint1[i]));
// first unquantize the weights
int uq_plane1_weights[MAX_WEIGHTS_PER_BLOCK];
int uq_plane2_weights[MAX_WEIGHTS_PER_BLOCK];
int weight_count = it->num_weights;
const quantization_and_transfer_table *qat = &(quant_and_xfer_tables[weight_quantization_level]);
for (i = 0; i < weight_count; i++)
{
uq_plane1_weights[i] = qat->unquantized_value[scb->plane1_weights[i]];
}
if (is_dual_plane)
{
for (i = 0; i < weight_count; i++)
uq_plane2_weights[i] = qat->unquantized_value[scb->plane2_weights[i]];
}
// then undecimate them.
int weights[MAX_TEXELS_PER_BLOCK];
int plane2_weights[MAX_TEXELS_PER_BLOCK];
for (i = 0; i < bsd->texel_count; i++)
weights[i] = compute_value_of_texel_int(i, it, uq_plane1_weights);
if (is_dual_plane)
for (i = 0; i < bsd->texel_count; i++)
plane2_weights[i] = compute_value_of_texel_int(i, it, uq_plane2_weights);
int plane2_color_component = scb->plane2_color_component;
// now that we have endpoint colors and weights, we can unpack actual colors for
// each texel.
for (i = 0; i < bsd->texel_count; i++)
{
int partition = pt->partition_of_texel[i];
uint4 color = lerp_color_int(decode_mode,
color_endpoint0[partition],
color_endpoint1[partition],
weights[i],
plane2_weights[i],
is_dual_plane ? plane2_color_component : -1);
blk->rgb_lns[i] = rgb_hdr_endpoint[partition];
blk->alpha_lns[i] = alpha_hdr_endpoint[partition];
blk->nan_texel[i] = nan_endpoint[partition];
blk->data_r[i] = (float)color.x;
blk->data_g[i] = (float)color.y;
blk->data_b[i] = (float)color.z;
blk->data_a[i] = (float)color.w;
}
imageblock_initialize_orig_from_work(blk, bsd->texel_count);
update_imageblock_flags(blk, bsd->xdim, bsd->ydim, bsd->zdim);
}
// SPDX-License-Identifier: Apache-2.0
// ----------------------------------------------------------------------------
// Copyright 2011-2020 Arm Limited
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
// use this file except in compliance with the License. You may obtain a copy
// of the License at:
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
// License for the specific language governing permissions and limitations
// under the License.
// ----------------------------------------------------------------------------
/**
* @brief Functions for loading/storing ASTC compressed images.
*/
#include "astc_codec_internals.h"
// conversion functions between the LNS representation and the FP16 representation.
float float_to_lns(float p)
{
if (astc::isnan(p) || p <= 1.0f / 67108864.0f)
{
// underflow or NaN value, return 0.
// We count underflow if the input value is smaller than 2^-26.
return 0;
}
if (fabsf(p) >= 65536.0f)
{
// overflow, return a +INF value
return 65535;
}
int expo;
float normfrac = frexpf(p, &expo);
float p1;
if (expo < -13)
{
// input number is smaller than 2^-14. In this case, multiply by 2^25.
p1 = p * 33554432.0f;
expo = 0;
}
else
{
expo += 14;
p1 = (normfrac - 0.5f) * 4096.0f;
}
if (p1 < 384.0f)
p1 *= 4.0f / 3.0f;
else if (p1 <= 1408.0f)
p1 += 128.0f;
else
p1 = (p1 + 512.0f) * (4.0f / 5.0f);
p1 += expo * 2048.0f;
return p1 + 1.0f;
}
uint16_t lns_to_sf16(uint16_t p)
{
uint16_t mc = p & 0x7FF;
uint16_t ec = p >> 11;
uint16_t mt;
if (mc < 512)
mt = 3 * mc;
else if (mc < 1536)
mt = 4 * mc - 512;
else
mt = 5 * mc - 2048;
uint16_t res = (ec << 10) | (mt >> 3);
if (res >= 0x7BFF)
res = 0x7BFF;
return res;
}
// conversion function from 16-bit LDR value to FP16.
// note: for LDR interpolation, it is impossible to get a denormal result;
// this simplifies the conversion.
// FALSE; we can receive a very small UNORM16 through the constant-block.
uint16_t unorm16_to_sf16(uint16_t p)
{
if (p == 0xFFFF)
return 0x3C00; // value of 1.0 .
if (p < 4)
return p << 8;
int lz = clz32(p) - 16;
p <<= (lz + 1);
p >>= 6;
p |= (14 - lz) << 10;
return p;
}
// helper function to initialize the work-data from the orig-data
void imageblock_initialize_work_from_orig(
imageblock* pb,
int pixelcount
) {
float *fptr = pb->orig_data;
for (int i = 0; i < pixelcount; i++)
{
if (pb->rgb_lns[i])
{
pb->data_r[i] = float_to_lns(fptr[0]);
pb->data_g[i] = float_to_lns(fptr[1]);
pb->data_b[i] = float_to_lns(fptr[2]);
}
else
{
pb->data_r[i] = fptr[0] * 65535.0f;
pb->data_g[i] = fptr[1] * 65535.0f;
pb->data_b[i] = fptr[2] * 65535.0f;
}
if (pb->alpha_lns[i])
{
pb->data_a[i] = float_to_lns(fptr[3]);
}
else
{
pb->data_a[i] = fptr[3] * 65535.0f;
}
fptr += 4;
}
}
// helper function to initialize the orig-data from the work-data
void imageblock_initialize_orig_from_work(
imageblock* pb,
int pixelcount
) {
float *fptr = pb->orig_data;
for (int i = 0; i < pixelcount; i++)
{
if (pb->rgb_lns[i])
{
fptr[0] = sf16_to_float(lns_to_sf16((uint16_t)pb->data_r[i]));
fptr[1] = sf16_to_float(lns_to_sf16((uint16_t)pb->data_g[i]));
fptr[2] = sf16_to_float(lns_to_sf16((uint16_t)pb->data_b[i]));
}
else
{
fptr[0] = sf16_to_float(unorm16_to_sf16((uint16_t)pb->data_r[i]));
fptr[1] = sf16_to_float(unorm16_to_sf16((uint16_t)pb->data_g[i]));
fptr[2] = sf16_to_float(unorm16_to_sf16((uint16_t)pb->data_b[i]));
}
if (pb->alpha_lns[i])
{
fptr[3] = sf16_to_float(lns_to_sf16((uint16_t)pb->data_a[i]));
}
else
{
fptr[3] = sf16_to_float(unorm16_to_sf16((uint16_t)pb->data_a[i]));
}
fptr += 4;
}
}
/*
For an imageblock, update its flags.
The updating is done based on data, not orig_data.
*/
void update_imageblock_flags(
imageblock* pb,
int xdim,
int ydim,
int zdim
) {
int i;
float red_min = 1e38f, red_max = -1e38f;
float green_min = 1e38f, green_max = -1e38f;
float blue_min = 1e38f, blue_max = -1e38f;
float alpha_min = 1e38f, alpha_max = -1e38f;
int texels_per_block = xdim * ydim * zdim;
int grayscale = 1;
for (i = 0; i < texels_per_block; i++)
{
float red = pb->data_r[i];
float green = pb->data_g[i];
float blue = pb->data_b[i];
float alpha = pb->data_a[i];
if (red < red_min)
red_min = red;
if (red > red_max)
red_max = red;
if (green < green_min)
green_min = green;
if (green > green_max)
green_max = green;
if (blue < blue_min)
blue_min = blue;
if (blue > blue_max)
blue_max = blue;
if (alpha < alpha_min)
alpha_min = alpha;
if (alpha > alpha_max)
alpha_max = alpha;
if (grayscale == 1 && (red != green || red != blue))
grayscale = 0;
}
pb->red_min = red_min;
pb->red_max = red_max;
pb->green_min = green_min;
pb->green_max = green_max;
pb->blue_min = blue_min;
pb->blue_max = blue_max;
pb->alpha_min = alpha_min;
pb->alpha_max = alpha_max;
pb->grayscale = grayscale;
}
// SPDX-License-Identifier: Apache-2.0
// ----------------------------------------------------------------------------
// Copyright 2011-2020 Arm Limited
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
// use this file except in compliance with the License. You may obtain a copy
// of the License at:
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
// License for the specific language governing permissions and limitations
// under the License.
// ----------------------------------------------------------------------------
#include "astc_mathlib.h"
/**
* @brief 64-bit rotate left.
*
* @param val The value to rotate.
* @param count The rotation, in bits.
*/
static inline uint64_t rotl(uint64_t val, int count)
{
return (val << count) | (val >> (64 - count));
}
/* Public function, see header file for detailed documentation */
void astc::rand_init(uint64_t state[2])
{
state[0] = 0xfaf9e171cea1ec6bULL;
state[1] = 0xf1b318cc06af5d71ULL;
}
/* Public function, see header file for detailed documentation */
uint64_t astc::rand(uint64_t state[2])
{
uint64_t s0 = state[0];
uint64_t s1 = state[1];
uint64_t res = s0 + s1;
s1 ^= s0;
state[0] = rotl(s0, 24) ^ s1 ^ (s1 << 16);
state[1] = rotl(s1, 37);
return res;
}
// SPDX-License-Identifier: Apache-2.0
// ----------------------------------------------------------------------------
// Copyright 2011-2020 Arm Limited
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
// use this file except in compliance with the License. You may obtain a copy
// of the License at:
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
// License for the specific language governing permissions and limitations
// under the License.
// ----------------------------------------------------------------------------
/*
* This module implements a variety of mathematical data types and library
* functions used by the codec.
*/
#ifndef ASTC_MATHLIB_H_INCLUDED
#define ASTC_MATHLIB_H_INCLUDED
#include <cmath>
#include <cstdint>
#ifndef M_PI
#define M_PI 3.14159265358979323846
#endif
/* ============================================================================
Fast math library; note that many of the higher-order functions in this set
use approximations which are less accurate, but faster, than <cmath> standard
library equivalents.
Note: Many of these are not necessarily faster than simple C versions when
used on a single scalar value, but are included for testing purposes as most
have an option based on SSE intrinsics and therefore provide an obvious route
to future vectorization.
============================================================================ */
// We support scalar versions of many maths functions which use SSE intrinsics
// as an "optimized" path, using just one lane from the SIMD hardware. In
// reality these are often slower than standard C due to setup and scheduling
// overheads, and the fact that we're not offsetting that cost with any actual
// vectorization.
//
// These variants are only included as a means to test that the accuracy of an
// SSE implementation would be acceptable before refactoring code paths to use
// an actual vectorized implementation which gets some advantage from SSE. It
// is therefore expected that the code will go *slower* with this macro
// set to 1 ...
#define USE_SCALAR_SSE 0
// These are namespaced to avoid colliding with C standard library functions.
namespace astc
{
/**
* @brief Test if a float value is a nan.
*
* @param val The value test.
*
* @return Zero is not a NaN, non-zero otherwise.
*/
static inline int isnan(float val)
{
return val != val;
}
/**
* @brief Initialize the seed structure for a random number generator.
*
* Important note: For the purposes of ASTC we want sets of random numbers to
* use the codec, but we want the same seed value across instances and threads
* to ensure that image output is stable across compressor runs and across
* platforms. Every PRNG created by this call will therefore return the same
* sequence of values ...
*
* @param state The state structure to initialize.
*/
void rand_init(uint64_t state[2]);
/**
* @brief Return the next random number from the generator.
*
* This RNG is an implementation of the "xoroshoro-128+ 1.0" PRNG, based on the
* public-domain implementation given by David Blackman & Sebastiano Vigna at
* http://vigna.di.unimi.it/xorshift/xoroshiro128plus.c
*
* @param state The state structure to use/update.
*/
uint64_t rand(uint64_t state[2]);
}
/* ============================================================================
Utility vector template classes with basic operations
============================================================================ */
template <typename T> class vtype4
{
public:
T x, y, z, w;
vtype4() {}
vtype4(T p, T q, T r, T s) : x(p), y(q), z(r), w(s) {}
vtype4(const vtype4 & p) : x(p.x), y(p.y), z(p.z), w(p.w) {}
vtype4 &operator =(const vtype4 &s) {
this->x = s.x;
this->y = s.y;
this->z = s.z;
this->w = s.w;
return *this;
}
};
typedef vtype4<int> int4;
typedef vtype4<unsigned int> uint4;
static inline int4 operator+(int4 p, int4 q) { return int4( p.x + q.x, p.y + q.y, p.z + q.z, p.w + q.w ); }
static inline uint4 operator+(uint4 p, uint4 q) { return uint4( p.x + q.x, p.y + q.y, p.z + q.z, p.w + q.w ); }
static inline int4 operator-(int4 p, int4 q) { return int4( p.x - q.x, p.y - q.y, p.z - q.z, p.w - q.w ); }
static inline uint4 operator-(uint4 p, uint4 q) { return uint4( p.x - q.x, p.y - q.y, p.z - q.z, p.w - q.w ); }
static inline int4 operator*(int4 p, int4 q) { return int4( p.x * q.x, p.y * q.y, p.z * q.z, p.w * q.w ); }
static inline uint4 operator*(uint4 p, uint4 q) { return uint4( p.x * q.x, p.y * q.y, p.z * q.z, p.w * q.w ); }
static inline int4 operator*(int4 p, int q) { return int4( p.x * q, p.y * q, p.z * q, p.w * q ); }
static inline uint4 operator*(uint4 p, uint32_t q) { return uint4( p.x * q, p.y * q, p.z * q, p.w * q ); }
static inline int4 operator*(int p, int4 q) { return q * p; }
static inline uint4 operator*(uint32_t p, uint4 q) { return q * p; }
#ifndef MIN
#define MIN(x,y) ((x)<(y)?(x):(y))
#endif
#ifndef MAX
#define MAX(x,y) ((x)>(y)?(x):(y))
#endif
/* ============================================================================
Softfloat library with fp32 and fp16 conversion functionality.
============================================================================ */
typedef union if32_
{
uint32_t u;
int32_t s;
float f;
} if32;
uint32_t clz32(uint32_t p);
/* sized soft-float types. These are mapped to the sized integer
types of C99, instead of C's floating-point types; this is because
the library needs to maintain exact, bit-level control on all
operations on these data types. */
typedef uint16_t sf16;
typedef uint32_t sf32;
/* widening float->float conversions */
sf32 sf16_to_sf32(sf16);
float sf16_to_float(sf16);
#endif
// SPDX-License-Identifier: Apache-2.0
// ----------------------------------------------------------------------------
// Copyright 2011-2020 Arm Limited
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
// use this file except in compliance with the License. You may obtain a copy
// of the License at:
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
// License for the specific language governing permissions and limitations
// under the License.
// ----------------------------------------------------------------------------
/**
* @brief Soft-float library for IEEE-754.
*/
#include "astc_mathlib.h"
/******************************************
helper functions and their lookup tables
******************************************/
/* count leading zeros functions. Only used when the input is nonzero. */
#if defined(__GNUC__) && (defined(__i386) || defined(__amd64))
#elif defined(__arm__) && defined(__ARMCC_VERSION)
#elif defined(__arm__) && defined(__GNUC__)
#else
/* table used for the slow default versions. */
static const uint8_t clz_table[256] =
{
8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};
#endif
/*
32-bit count-leading-zeros function: use the Assembly instruction whenever possible. */
uint32_t clz32(uint32_t inp)
{
#if defined(__GNUC__) && (defined(__i386) || defined(__amd64))
uint32_t bsr;
__asm__("bsrl %1, %0": "=r"(bsr):"r"(inp | 1));
return 31 - bsr;
#else
#if defined(__arm__) && defined(__ARMCC_VERSION)
return __clz(inp); /* armcc builtin */
#else
#if defined(__arm__) && defined(__GNUC__)
uint32_t lz;
__asm__("clz %0, %1": "=r"(lz):"r"(inp));
return lz;
#else
/* slow default version */
uint32_t summa = 24;
if (inp >= UINT32_C(0x10000))
{
inp >>= 16;
summa -= 16;
}
if (inp >= UINT32_C(0x100))
{
inp >>= 8;
summa -= 8;
}
return summa + clz_table[inp];
#endif
#endif
#endif
}
/* convert from FP16 to FP32. */
sf32 sf16_to_sf32(sf16 inp)
{
uint32_t inpx = inp;
/*
This table contains, for every FP16 sign/exponent value combination,
the difference between the input FP16 value and the value obtained
by shifting the correct FP32 result right by 13 bits.
This table allows us to handle every case except denormals and NaN
with just 1 table lookup, 2 shifts and 1 add.
*/
#define WITH_MB(a) INT32_C((a) | (1 << 31))
static const int32_t tbl[64] =
{
WITH_MB(0x00000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000),
INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000),
INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000),
INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), WITH_MB(0x38000),
WITH_MB(0x38000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000),
INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000),
INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000),
INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), WITH_MB(0x70000)
};
int32_t res = tbl[inpx >> 10];
res += inpx;
/* the normal cases: the MSB of 'res' is not set. */
if (res >= 0) /* signed compare */
return res << 13;
/* Infinity and Zero: the bottom 10 bits of 'res' are clear. */
if ((res & UINT32_C(0x3FF)) == 0)
return res << 13;
/* NaN: the exponent field of 'inp' is not zero; NaNs must be quietened. */
if ((inpx & 0x7C00) != 0)
return (res << 13) | UINT32_C(0x400000);
/* the remaining cases are Denormals. */
{
uint32_t sign = (inpx & UINT32_C(0x8000)) << 16;
uint32_t mskval = inpx & UINT32_C(0x7FFF);
uint32_t leadingzeroes = clz32(mskval);
mskval <<= leadingzeroes;
return (mskval >> 8) + ((0x85 - leadingzeroes) << 23) + sign;
}
}
/* convert from soft-float to native-float */
float sf16_to_float(sf16 p)
{
if32 i;
i.u = sf16_to_sf32(p);
return i.f;
}
// SPDX-License-Identifier: Apache-2.0
// ----------------------------------------------------------------------------
// Copyright 2011-2020 Arm Limited
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
// use this file except in compliance with the License. You may obtain a copy
// of the License at:
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
// License for the specific language governing permissions and limitations
// under the License.
// ----------------------------------------------------------------------------
/**
* @brief Functions for generating partition tables on demand.
*/
#include "astc_codec_internals.h"
/*
Produce a canonicalized representation of a partition pattern
The largest possible such representation is 432 bits, equal to 7 uint64_t values.
*/
static void gen_canonicalized_partition_table(
int texel_count,
const uint8_t* partition_table,
uint64_t canonicalized[7]
) {
int i;
for (i = 0; i < 7; i++)
canonicalized[i] = 0;
int mapped_index[4];
int map_weight_count = 0;
for (i = 0; i < 4; i++)
mapped_index[i] = -1;
for (i = 0; i < texel_count; i++)
{
int index = partition_table[i];
if (mapped_index[index] == -1)
mapped_index[index] = map_weight_count++;
uint64_t xlat_index = mapped_index[index];
canonicalized[i >> 5] |= xlat_index << (2 * (i & 0x1F));
}
}
static int compare_canonicalized_partition_tables(
const uint64_t part1[7],
const uint64_t part2[7]
) {
if (part1[0] != part2[0])
return 0;
if (part1[1] != part2[1])
return 0;
if (part1[2] != part2[2])
return 0;
if (part1[3] != part2[3])
return 0;
if (part1[4] != part2[4])
return 0;
if (part1[5] != part2[5])
return 0;
if (part1[6] != part2[6])
return 0;
return 1;
}
/*
For a partition table, detect partitionss that are equivalent, then mark them as invalid. This reduces the number of partitions that the codec has to consider and thus improves encode
performance. */
static void partition_table_zap_equal_elements(
int texel_count,
partition_info* pi
) {
int partition_tables_zapped = 0;
int i, j;
uint64_t *canonicalizeds = new uint64_t[PARTITION_COUNT * 7];
for (i = 0; i < PARTITION_COUNT; i++)
{
gen_canonicalized_partition_table(texel_count, pi[i].partition_of_texel, canonicalizeds + i * 7);
}
for (i = 0; i < PARTITION_COUNT; i++)
{
for (j = 0; j < i; j++)
{
if (compare_canonicalized_partition_tables(canonicalizeds + 7 * i, canonicalizeds + 7 * j))
{
pi[i].partition_count = 0;
partition_tables_zapped++;
break;
}
}
}
delete[]canonicalizeds;
}
static uint32_t hash52(uint32_t inp)
{
inp ^= inp >> 15;
inp *= 0xEEDE0891; // (2^4+1)*(2^7+1)*(2^17-1)
inp ^= inp >> 5;
inp += inp << 16;
inp ^= inp >> 7;
inp ^= inp >> 3;
inp ^= inp << 6;
inp ^= inp >> 17;
return inp;
}
static int select_partition(
int seed,
int x,
int y,
int z,
int partitioncount,
int small_block
) {
if (small_block)
{
x <<= 1;
y <<= 1;
z <<= 1;
}
seed += (partitioncount - 1) * 1024;
uint32_t rnum = hash52(seed);
uint8_t seed1 = rnum & 0xF;
uint8_t seed2 = (rnum >> 4) & 0xF;
uint8_t seed3 = (rnum >> 8) & 0xF;
uint8_t seed4 = (rnum >> 12) & 0xF;
uint8_t seed5 = (rnum >> 16) & 0xF;
uint8_t seed6 = (rnum >> 20) & 0xF;
uint8_t seed7 = (rnum >> 24) & 0xF;
uint8_t seed8 = (rnum >> 28) & 0xF;
uint8_t seed9 = (rnum >> 18) & 0xF;
uint8_t seed10 = (rnum >> 22) & 0xF;
uint8_t seed11 = (rnum >> 26) & 0xF;
uint8_t seed12 = ((rnum >> 30) | (rnum << 2)) & 0xF;
// squaring all the seeds in order to bias their distribution
// towards lower values.
seed1 *= seed1;
seed2 *= seed2;
seed3 *= seed3;
seed4 *= seed4;
seed5 *= seed5;
seed6 *= seed6;
seed7 *= seed7;
seed8 *= seed8;
seed9 *= seed9;
seed10 *= seed10;
seed11 *= seed11;
seed12 *= seed12;
int sh1, sh2, sh3;
if (seed & 1)
{
sh1 = (seed & 2 ? 4 : 5);
sh2 = (partitioncount == 3 ? 6 : 5);
}
else
{
sh1 = (partitioncount == 3 ? 6 : 5);
sh2 = (seed & 2 ? 4 : 5);
}
sh3 = (seed & 0x10) ? sh1 : sh2;
seed1 >>= sh1;
seed2 >>= sh2;
seed3 >>= sh1;
seed4 >>= sh2;
seed5 >>= sh1;
seed6 >>= sh2;
seed7 >>= sh1;
seed8 >>= sh2;
seed9 >>= sh3;
seed10 >>= sh3;
seed11 >>= sh3;
seed12 >>= sh3;
int a = seed1 * x + seed2 * y + seed11 * z + (rnum >> 14);
int b = seed3 * x + seed4 * y + seed12 * z + (rnum >> 10);
int c = seed5 * x + seed6 * y + seed9 * z + (rnum >> 6);
int d = seed7 * x + seed8 * y + seed10 * z + (rnum >> 2);
// apply the saw
a &= 0x3F;
b &= 0x3F;
c &= 0x3F;
d &= 0x3F;
// remove some of the components if we are to output < 4 partitions.
if (partitioncount <= 3)
d = 0;
if (partitioncount <= 2)
c = 0;
if (partitioncount <= 1)
b = 0;
int partition;
if (a >= b && a >= c && a >= d)
partition = 0;
else if (b >= c && b >= d)
partition = 1;
else if (c >= d)
partition = 2;
else
partition = 3;
return partition;
}
static void generate_one_partition_table(
const block_size_descriptor* bsd,
int partition_count,
int partition_index,
partition_info* pt
) {
int texels_per_block = bsd->texel_count;
int small_block = texels_per_block < 32;
uint8_t *partition_of_texel = pt->partition_of_texel;
int x, y, z, i;
for (z = 0; z < bsd->zdim; z++)
for (y = 0; y < bsd->ydim; y++)
for (x = 0; x < bsd->xdim; x++)
{
uint8_t part = select_partition(partition_index, x, y, z, partition_count, small_block);
*partition_of_texel++ = part;
}
int counts[4];
for (i = 0; i < 4; i++)
counts[i] = 0;
for (i = 0; i < texels_per_block; i++)
{
int partition = pt->partition_of_texel[i];
pt->texels_of_partition[partition][counts[partition]++] = i;
}
for (i = 0; i < 4; i++)
pt->texels_per_partition[i] = counts[i];
if (counts[0] == 0)
pt->partition_count = 0;
else if (counts[1] == 0)
pt->partition_count = 1;
else if (counts[2] == 0)
pt->partition_count = 2;
else if (counts[3] == 0)
pt->partition_count = 3;
else
pt->partition_count = 4;
for (i = 0; i < 4; i++)
pt->coverage_bitmaps[i] = 0ULL;
int texels_to_process = bsd->texelcount_for_bitmap_partitioning;
for (i = 0; i < texels_to_process; i++)
{
int idx = bsd->texels_for_bitmap_partitioning[i];
pt->coverage_bitmaps[pt->partition_of_texel[idx]] |= 1ULL << i;
}
}
/* Public function, see header file for detailed documentation */
void init_partition_tables(
block_size_descriptor* bsd
) {
partition_info *par_tab2 = bsd->partitions;
partition_info *par_tab3 = par_tab2 + PARTITION_COUNT;
partition_info *par_tab4 = par_tab3 + PARTITION_COUNT;
partition_info *par_tab1 = par_tab4 + PARTITION_COUNT;
generate_one_partition_table(bsd, 1, 0, par_tab1);
for (int i = 0; i < 1024; i++)
{
generate_one_partition_table(bsd, 2, i, par_tab2 + i);
generate_one_partition_table(bsd, 3, i, par_tab3 + i);
generate_one_partition_table(bsd, 4, i, par_tab4 + i);
}
partition_table_zap_equal_elements(bsd->texel_count, par_tab2);
partition_table_zap_equal_elements(bsd->texel_count, par_tab3);
partition_table_zap_equal_elements(bsd->texel_count, par_tab4);
}
// SPDX-License-Identifier: Apache-2.0
// ----------------------------------------------------------------------------
// Copyright 2011-2020 Arm Limited
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
// use this file except in compliance with the License. You may obtain a copy
// of the License at:
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
// License for the specific language governing permissions and limitations
// under the License.
// ----------------------------------------------------------------------------
/**
* @brief Functions and data tables for numeric quantization..
*/
#include "astc_codec_internals.h"
const uint8_t color_unquantization_tables[21][256] = {
{
0, 255
},
{
0, 128, 255
},
{
0, 85, 170, 255
},
{
0, 64, 128, 192, 255
},
{
0, 255, 51, 204, 102, 153
},
{
0, 36, 73, 109, 146, 182, 219, 255
},
{
0, 255, 28, 227, 56, 199, 84, 171, 113, 142
},
{
0, 255, 69, 186, 23, 232, 92, 163, 46, 209, 116, 139
},
{
0, 17, 34, 51, 68, 85, 102, 119, 136, 153, 170, 187, 204, 221, 238, 255
},
{
0, 255, 67, 188, 13, 242, 80, 175, 27, 228, 94, 161, 40, 215, 107, 148,
54, 201, 121, 134
},
{
0, 255, 33, 222, 66, 189, 99, 156, 11, 244, 44, 211, 77, 178, 110, 145,
22, 233, 55, 200, 88, 167, 121, 134
},
{
0, 8, 16, 24, 33, 41, 49, 57, 66, 74, 82, 90, 99, 107, 115, 123,
132, 140, 148, 156, 165, 173, 181, 189, 198, 206, 214, 222, 231, 239, 247, 255
},
{
0, 255, 32, 223, 65, 190, 97, 158, 6, 249, 39, 216, 71, 184, 104, 151,
13, 242, 45, 210, 78, 177, 110, 145, 19, 236, 52, 203, 84, 171, 117, 138,
26, 229, 58, 197, 91, 164, 123, 132
},
{
0, 255, 16, 239, 32, 223, 48, 207, 65, 190, 81, 174, 97, 158, 113, 142,
5, 250, 21, 234, 38, 217, 54, 201, 70, 185, 86, 169, 103, 152, 119, 136,
11, 244, 27, 228, 43, 212, 59, 196, 76, 179, 92, 163, 108, 147, 124, 131
},
{
0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60,
65, 69, 73, 77, 81, 85, 89, 93, 97, 101, 105, 109, 113, 117, 121, 125,
130, 134, 138, 142, 146, 150, 154, 158, 162, 166, 170, 174, 178, 182, 186, 190,
195, 199, 203, 207, 211, 215, 219, 223, 227, 231, 235, 239, 243, 247, 251, 255
},
{
0, 255, 16, 239, 32, 223, 48, 207, 64, 191, 80, 175, 96, 159, 112, 143,
3, 252, 19, 236, 35, 220, 51, 204, 67, 188, 83, 172, 100, 155, 116, 139,
6, 249, 22, 233, 38, 217, 54, 201, 71, 184, 87, 168, 103, 152, 119, 136,
9, 246, 25, 230, 42, 213, 58, 197, 74, 181, 90, 165, 106, 149, 122, 133,
13, 242, 29, 226, 45, 210, 61, 194, 77, 178, 93, 162, 109, 146, 125, 130
},
{
0, 255, 8, 247, 16, 239, 24, 231, 32, 223, 40, 215, 48, 207, 56, 199,
64, 191, 72, 183, 80, 175, 88, 167, 96, 159, 104, 151, 112, 143, 120, 135,
2, 253, 10, 245, 18, 237, 26, 229, 35, 220, 43, 212, 51, 204, 59, 196,
67, 188, 75, 180, 83, 172, 91, 164, 99, 156, 107, 148, 115, 140, 123, 132,
5, 250, 13, 242, 21, 234, 29, 226, 37, 218, 45, 210, 53, 202, 61, 194,
70, 185, 78, 177, 86, 169, 94, 161, 102, 153, 110, 145, 118, 137, 126, 129
},
{
0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30,
32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62,
64, 66, 68, 70, 72, 74, 76, 78, 80, 82, 84, 86, 88, 90, 92, 94,
96, 98, 100, 102, 104, 106, 108, 110, 112, 114, 116, 118, 120, 122, 124, 126,
129, 131, 133, 135, 137, 139, 141, 143, 145, 147, 149, 151, 153, 155, 157, 159,
161, 163, 165, 167, 169, 171, 173, 175, 177, 179, 181, 183, 185, 187, 189, 191,
193, 195, 197, 199, 201, 203, 205, 207, 209, 211, 213, 215, 217, 219, 221, 223,
225, 227, 229, 231, 233, 235, 237, 239, 241, 243, 245, 247, 249, 251, 253, 255
},
{
0, 255, 8, 247, 16, 239, 24, 231, 32, 223, 40, 215, 48, 207, 56, 199,
64, 191, 72, 183, 80, 175, 88, 167, 96, 159, 104, 151, 112, 143, 120, 135,
1, 254, 9, 246, 17, 238, 25, 230, 33, 222, 41, 214, 49, 206, 57, 198,
65, 190, 73, 182, 81, 174, 89, 166, 97, 158, 105, 150, 113, 142, 121, 134,
3, 252, 11, 244, 19, 236, 27, 228, 35, 220, 43, 212, 51, 204, 59, 196,
67, 188, 75, 180, 83, 172, 91, 164, 99, 156, 107, 148, 115, 140, 123, 132,
4, 251, 12, 243, 20, 235, 28, 227, 36, 219, 44, 211, 52, 203, 60, 195,
68, 187, 76, 179, 84, 171, 92, 163, 100, 155, 108, 147, 116, 139, 124, 131,
6, 249, 14, 241, 22, 233, 30, 225, 38, 217, 46, 209, 54, 201, 62, 193,
70, 185, 78, 177, 86, 169, 94, 161, 102, 153, 110, 145, 118, 137, 126, 129
},
{
0, 255, 4, 251, 8, 247, 12, 243, 16, 239, 20, 235, 24, 231, 28, 227,
32, 223, 36, 219, 40, 215, 44, 211, 48, 207, 52, 203, 56, 199, 60, 195,
64, 191, 68, 187, 72, 183, 76, 179, 80, 175, 84, 171, 88, 167, 92, 163,
96, 159, 100, 155, 104, 151, 108, 147, 112, 143, 116, 139, 120, 135, 124, 131,
1, 254, 5, 250, 9, 246, 13, 242, 17, 238, 21, 234, 25, 230, 29, 226,
33, 222, 37, 218, 41, 214, 45, 210, 49, 206, 53, 202, 57, 198, 61, 194,
65, 190, 69, 186, 73, 182, 77, 178, 81, 174, 85, 170, 89, 166, 93, 162,
97, 158, 101, 154, 105, 150, 109, 146, 113, 142, 117, 138, 121, 134, 125, 130,
2, 253, 6, 249, 10, 245, 14, 241, 18, 237, 22, 233, 26, 229, 30, 225,
34, 221, 38, 217, 42, 213, 46, 209, 50, 205, 54, 201, 58, 197, 62, 193,
66, 189, 70, 185, 74, 181, 78, 177, 82, 173, 86, 169, 90, 165, 94, 161,
98, 157, 102, 153, 106, 149, 110, 145, 114, 141, 118, 137, 122, 133, 126, 129
},
{
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,
112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127,
128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143,
144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159,
160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191,
192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207,
208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223,
224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255
}
};
// quantization_mode_table[integercount/2][bits] gives
// us the quantization level for a given integer count and number of bits that
// the integer may fit into. This is needed for color decoding,
// and for the color encoding.
int quantization_mode_table[17][128];
void build_quantization_mode_table()
{
int i, j;
for (i = 0; i <= 16; i++)
{
for (j = 0; j < 128; j++)
{
quantization_mode_table[i][j] = -1;
}
}
for (i = 0; i < 21; i++)
{
for (j = 1; j <= 16; j++)
{
int p = compute_ise_bitcount(2 * j, (quantization_method) i);
if (p < 128)
quantization_mode_table[j][p] = i;
}
}
for (i = 0; i <= 16; i++)
{
int largest_value_so_far = -1;
for (j = 0; j < 128; j++)
{
if (quantization_mode_table[i][j] > largest_value_so_far)
largest_value_so_far = quantization_mode_table[i][j];
else
quantization_mode_table[i][j] = largest_value_so_far;
}
}
}
// SPDX-License-Identifier: Apache-2.0
// ----------------------------------------------------------------------------
// Copyright 2011-2020 Arm Limited
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
// use this file except in compliance with the License. You may obtain a copy
// of the License at:
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
// License for the specific language governing permissions and limitations
// under the License.
// ----------------------------------------------------------------------------
/**
* @brief Functions for converting between symbolic and physical encodings.
*/
#include "astc_codec_internals.h"
// routine to read up to 8 bits
static inline int read_bits(
int bitcount,
int bitoffset,
const uint8_t* ptr
) {
int mask = (1 << bitcount) - 1;
ptr += bitoffset >> 3;
bitoffset &= 7;
int value = ptr[0] | (ptr[1] << 8);
value >>= bitoffset;
value &= mask;
return value;
}
static inline int bitrev8(int p)
{
p = ((p & 0xF) << 4) | ((p >> 4) & 0xF);
p = ((p & 0x33) << 2) | ((p >> 2) & 0x33);
p = ((p & 0x55) << 1) | ((p >> 1) & 0x55);
return p;
}
void physical_to_symbolic(
const block_size_descriptor* bsd,
physical_compressed_block pb,
symbolic_compressed_block* res
) {
uint8_t bswapped[16];
int i, j;
res->error_block = 0;
// get hold of the decimation tables.
const decimation_table *const *ixtab2 = bsd->decimation_tables;
// extract header fields
int block_mode = read_bits(11, 0, pb.data);
if ((block_mode & 0x1FF) == 0x1FC)
{
// void-extent block!
// check what format the data has
if (block_mode & 0x200)
res->block_mode = -1; // floating-point
else
res->block_mode = -2; // unorm16.
res->partition_count = 0;
for (i = 0; i < 4; i++)
{
res->constant_color[i] = pb.data[2 * i + 8] | (pb.data[2 * i + 9] << 8);
}
// additionally, check that the void-extent
if (bsd->zdim == 1)
{
// 2D void-extent
int rsvbits = read_bits(2, 10, pb.data);
if (rsvbits != 3)
res->error_block = 1;
int vx_low_s = read_bits(8, 12, pb.data) | (read_bits(5, 12 + 8, pb.data) << 8);
int vx_high_s = read_bits(8, 25, pb.data) | (read_bits(5, 25 + 8, pb.data) << 8);
int vx_low_t = read_bits(8, 38, pb.data) | (read_bits(5, 38 + 8, pb.data) << 8);
int vx_high_t = read_bits(8, 51, pb.data) | (read_bits(5, 51 + 8, pb.data) << 8);
int all_ones = vx_low_s == 0x1FFF && vx_high_s == 0x1FFF && vx_low_t == 0x1FFF && vx_high_t == 0x1FFF;
if ((vx_low_s >= vx_high_s || vx_low_t >= vx_high_t) && !all_ones)
res->error_block = 1;
}
else
{
// 3D void-extent
int vx_low_s = read_bits(9, 10, pb.data);
int vx_high_s = read_bits(9, 19, pb.data);
int vx_low_t = read_bits(9, 28, pb.data);
int vx_high_t = read_bits(9, 37, pb.data);
int vx_low_p = read_bits(9, 46, pb.data);
int vx_high_p = read_bits(9, 55, pb.data);
int all_ones = vx_low_s == 0x1FF && vx_high_s == 0x1FF && vx_low_t == 0x1FF && vx_high_t == 0x1FF && vx_low_p == 0x1FF && vx_high_p == 0x1FF;
if ((vx_low_s >= vx_high_s || vx_low_t >= vx_high_t || vx_low_p >= vx_high_p) && !all_ones)
res->error_block = 1;
}
return;
}
if (bsd->block_modes[block_mode].permit_decode == 0)
{
res->error_block = 1;
return;
}
int weight_count = ixtab2[bsd->block_modes[block_mode].decimation_mode]->num_weights;
int weight_quantization_method = bsd->block_modes[block_mode].quantization_mode;
int is_dual_plane = bsd->block_modes[block_mode].is_dual_plane;
int real_weight_count = is_dual_plane ? 2 * weight_count : weight_count;
int partition_count = read_bits(2, 11, pb.data) + 1;
res->block_mode = block_mode;
res->partition_count = partition_count;
for (i = 0; i < 16; i++)
bswapped[i] = bitrev8(pb.data[15 - i]);
int bits_for_weights = compute_ise_bitcount(real_weight_count,
(quantization_method) weight_quantization_method);
int below_weights_pos = 128 - bits_for_weights;
if (is_dual_plane)
{
uint8_t indices[64];
decode_ise(weight_quantization_method, real_weight_count, bswapped, indices, 0);
for (i = 0; i < weight_count; i++)
{
res->plane1_weights[i] = indices[2 * i];
res->plane2_weights[i] = indices[2 * i + 1];
}
}
else
{
decode_ise(weight_quantization_method, weight_count, bswapped, res->plane1_weights, 0);
}
if (is_dual_plane && partition_count == 4)
res->error_block = 1;
res->color_formats_matched = 0;
// then, determine the format of each endpoint pair
int color_formats[4];
int encoded_type_highpart_size = 0;
if (partition_count == 1)
{
color_formats[0] = read_bits(4, 13, pb.data);
res->partition_index = 0;
}
else
{
encoded_type_highpart_size = (3 * partition_count) - 4;
below_weights_pos -= encoded_type_highpart_size;
int encoded_type = read_bits(6, 13 + PARTITION_BITS, pb.data) | (read_bits(encoded_type_highpart_size, below_weights_pos, pb.data) << 6);
int baseclass = encoded_type & 0x3;
if (baseclass == 0)
{
for (i = 0; i < partition_count; i++)
{
color_formats[i] = (encoded_type >> 2) & 0xF;
}
below_weights_pos += encoded_type_highpart_size;
res->color_formats_matched = 1;
encoded_type_highpart_size = 0;
}
else
{
int bitpos = 2;
baseclass--;
for (i = 0; i < partition_count; i++)
{
color_formats[i] = (((encoded_type >> bitpos) & 1) + baseclass) << 2;
bitpos++;
}
for (i = 0; i < partition_count; i++)
{
color_formats[i] |= (encoded_type >> bitpos) & 3;
bitpos += 2;
}
}
res->partition_index = read_bits(6, 13, pb.data) | (read_bits(PARTITION_BITS - 6, 19, pb.data) << 6);
}
for (i = 0; i < partition_count; i++)
res->color_formats[i] = color_formats[i];
// then, determine the number of integers we need to unpack for the endpoint pairs
int color_integer_count = 0;
for (i = 0; i < partition_count; i++)
{
int endpoint_class = color_formats[i] >> 2;
color_integer_count += (endpoint_class + 1) * 2;
}
if (color_integer_count > 18)
res->error_block = 1;
// then, determine the color endpoint format to use for these integers
static const int color_bits_arr[5] = { -1, 115 - 4, 113 - 4 - PARTITION_BITS, 113 - 4 - PARTITION_BITS, 113 - 4 - PARTITION_BITS };
int color_bits = color_bits_arr[partition_count] - bits_for_weights - encoded_type_highpart_size;
if (is_dual_plane)
color_bits -= 2;
if (color_bits < 0)
color_bits = 0;
int color_quantization_level = quantization_mode_table[color_integer_count >> 1][color_bits];
res->color_quantization_level = color_quantization_level;
if (color_quantization_level < 4)
res->error_block = 1;
// then unpack the integer-bits
uint8_t values_to_decode[32];
decode_ise(color_quantization_level, color_integer_count, pb.data, values_to_decode, (partition_count == 1 ? 17 : 19 + PARTITION_BITS));
// and distribute them over the endpoint types
int valuecount_to_decode = 0;
for (i = 0; i < partition_count; i++)
{
int vals = 2 * (color_formats[i] >> 2) + 2;
for (j = 0; j < vals; j++)
res->color_values[i][j] = values_to_decode[j + valuecount_to_decode];
valuecount_to_decode += vals;
}
// get hold of color component for second-plane in the case of dual plane of weights.
if (is_dual_plane)
res->plane2_color_component = read_bits(2, below_weights_pos - 2, pb.data);
}
// SPDX-License-Identifier: Apache-2.0
// ----------------------------------------------------------------------------
// Copyright 2011-2020 Arm Limited
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
// use this file except in compliance with the License. You may obtain a copy
// of the License at:
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
// License for the specific language governing permissions and limitations
// under the License.
// ----------------------------------------------------------------------------
/**
* @brief Data tables for quantization transfer.
*/
#include "astc_codec_internals.h"
#define _ 0 // using _ to indicate an entry that will not be used.
const quantization_and_transfer_table quant_and_xfer_tables[12] = {
// quantization method 0, range 0..1
{
QUANT_2,
{0, 64, 255},
{0, 1},
{0, 64},
{0x01004000,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,
_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,
0x01004000}
},
// quantization method 1, range 0..2
{
QUANT_3,
{0, 32, 64, 255},
{0, 1, 2},
{0, 32, 64},
{0x01002000,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,
_,_,0x02004000,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,
_,_,_,_,0x02014020}
},
// quantization method 2, range 0..3
{
QUANT_4,
{0, 21, 43, 64, 255},
{0, 1, 2, 3},
{0, 21, 43, 64},
{0x01001500,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,0x02002b00,_,_,_,_,
_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,0x03014015,_,_,_,_,_,_,_,_,_,_,_,_,
_,_,_,_,_,_,_,_,0x0302402b}
},
// quantization method 3, range 0..4
{
QUANT_5,
{0, 16, 32, 48, 64, 255},
{0, 1, 2, 3, 4},
{0, 16, 32, 48, 64},
{0x01001000,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,0x02002000,_,_,_,_,_,_,_,_,_,
_,_,_,_,_,_,0x03013010,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,0x04024020,_,_,_,
_,_,_,_,_,_,_,_,_,_,_,_,0x04034030}
},
// quantization method 4, range 0..5
{
QUANT_6,
{0, 12, 25, 39, 52, 64, 255},
{0, 2, 4, 5, 3, 1},
{0, 64, 12, 52, 25, 39},
{0x02000c00,_,_,_,_,_,_,_,_,_,_,_,0x04001900,_,_,_,_,_,_,_,_,_,_,_,_,
0x0502270c,_,_,_,_,_,_,_,_,_,_,_,_,_,0x03043419,_,_,_,_,_,_,_,_,_,_,
_,_,0x01054027,_,_,_,_,_,_,_,_,_,_,_,0x01034034}
},
// quantization method 5, range 0..7
{
QUANT_8,
{0, 9, 18, 27, 37, 46, 55, 64, 255},
{0, 1, 2, 3, 4, 5, 6, 7},
{0, 9, 18, 27, 37, 46, 55, 64},
{0x01000900,_,_,_,_,_,_,_,_,0x02001200,_,_,_,_,_,_,_,_,0x03011b09,_,_,
_,_,_,_,_,_,0x04022512,_,_,_,_,_,_,_,_,_,0x05032e1b,_,_,_,_,_,_,_,_,
0x06043725,_,_,_,_,_,_,_,_,0x0705402e,_,_,_,_,_,_,_,_,0x07064037}
},
// quantization method 6, range 0..9
{
QUANT_10,
{0, 7, 14, 21, 28, 36, 43, 50, 57, 64, 255},
{0, 2, 4, 6, 8, 9, 7, 5, 3, 1},
{0, 64, 7, 57, 14, 50, 21, 43, 28, 36},
{0x02000700,_,_,_,_,_,_,0x04000e00,_,_,_,_,_,_,0x06021507,_,_,_,_,_,_,
0x08041c0e,_,_,_,_,_,_,0x09062415,_,_,_,_,_,_,_,0x07082b1c,_,_,_,_,_,
_,0x05093224,_,_,_,_,_,_,0x0307392b,_,_,_,_,_,_,0x01054032,_,_,_,_,_,
_,0x01034039}
},
// quantization method 7, range 0..11
{
QUANT_12,
{0, 5, 11, 17, 23, 28, 36, 41, 47, 53, 59, 64, 255},
{0, 4, 8, 2, 6, 10, 11, 7, 3, 9, 5, 1},
{0, 64, 17, 47, 5, 59, 23, 41, 11, 53, 28, 36},
{0x04000500,_,_,_,_,0x08000b00,_,_,_,_,_,0x02041105,_,_,_,_,_,
0x0608170b,_,_,_,_,_,0x0a021c11,_,_,_,_,0x0b062417,_,_,_,_,_,_,_,
0x070a291c,_,_,_,_,0x030b2f24,_,_,_,_,_,0x09073529,_,_,_,_,_,
0x05033b2f,_,_,_,_,_,0x01094035,_,_,_,_,0x0105403b}
},
// quantization method 8, range 0..15
{
QUANT_16,
{0, 4, 8, 12, 17, 21, 25, 29, 35, 39, 43, 47, 52, 56, 60, 64, 255},
{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
{0, 4, 8, 12, 17, 21, 25, 29, 35, 39, 43, 47, 52, 56, 60, 64},
{0x01000400,_,_,_,0x02000800,_,_,_,0x03010c04,_,_,_,0x04021108,_,_,_,_,
0x0503150c,_,_,_,0x06041911,_,_,_,0x07051d15,_,_,_,0x08062319,_,_,_,_,
_,0x0907271d,_,_,_,0x0a082b23,_,_,_,0x0b092f27,_,_,_,0x0c0a342b,_,_,_,
_,0x0d0b382f,_,_,_,0x0e0c3c34,_,_,_,0x0f0d4038,_,_,_,0x0f0e403c}
},
// quantization method 9, range 0..19
{
QUANT_20,
{0, 3, 6, 9, 13, 16, 19, 23, 26, 29, 35, 38, 41, 45, 48, 51, 55, 58,
61, 64, 255},
{0, 4, 8, 12, 16, 2, 6, 10, 14, 18, 19, 15, 11, 7, 3, 17, 13, 9, 5, 1},
{0, 64, 16, 48, 3, 61, 19, 45, 6, 58, 23, 41, 9, 55, 26, 38, 13, 51,
29, 35},
{0x04000300,_,_,0x08000600,_,_,0x0c040903,_,_,0x10080d06,_,_,_,
0x020c1009,_,_,0x0610130d,_,_,0x0a021710,_,_,_,0x0e061a13,_,_,
0x120a1d17,_,_,0x130e231a,_,_,_,_,_,0x0f12261d,_,_,0x0b132923,_,_,
0x070f2d26,_,_,_,0x030b3029,_,_,0x1107332d,_,_,0x0d033730,_,_,_,
0x09113a33,_,_,0x050d3d37,_,_,0x0109403a,_,_,0x0105403d}
},
// quantization method 10, range 0..23
{
QUANT_24,
{0, 2, 5, 8, 11, 13, 16, 19, 22, 24, 27, 30, 34, 37, 40, 42, 45, 48,
51, 53, 56, 59, 62, 64, 255},
{0, 8, 16, 2, 10, 18, 4, 12, 20, 6, 14, 22, 23, 15, 7, 21, 13, 5, 19,
11, 3, 17, 9, 1},
{0, 64, 8, 56, 16, 48, 24, 40, 2, 62, 11, 53, 19, 45, 27, 37, 5, 59,
13, 51, 22, 42, 30, 34},
{0x08000200,_,0x10000500,_,_,0x02080802,_,_,0x0a100b05,_,_,0x12020d08,
_,0x040a100b,_,_,0x0c12130d,_,_,0x14041610,_,_,0x060c1813,_,
0x0e141b16,_,_,0x16061e18,_,_,0x170e221b,_,_,_,0x0f16251e,_,_,
0x07172822,_,_,0x150f2a25,_,0x0d072d28,_,_,0x0515302a,_,_,0x130d332d,
_,_,0x0b053530,_,0x03133833,_,_,0x110b3b35,_,_,0x09033e38,_,_,
0x0111403b,_,0x0109403e}
},
// quantization method 11, range 0..31
{
QUANT_32,
{0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 34, 36, 38,
40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64, 255},
{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31},
{0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 34, 36, 38,
40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64},
{0x01000200,_,0x02000400,_,0x03010602,_,0x04020804,_,0x05030a06,_,
0x06040c08,_,0x07050e0a,_,0x0806100c,_,0x0907120e,_,0x0a081410,_,
0x0b091612,_,0x0c0a1814,_,0x0d0b1a16,_,0x0e0c1c18,_,0x0f0d1e1a,_,
0x100e221c,_,_,_,0x110f241e,_,0x12102622,_,0x13112824,_,0x14122a26,_,
0x15132c28,_,0x16142e2a,_,0x1715302c,_,0x1816322e,_,0x19173430,_,
0x1a183632,_,0x1b193834,_,0x1c1a3a36,_,0x1d1b3c38,_,0x1e1c3e3a,_,
0x1f1d403c,_,0x1f1e403e}
}
};
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment