/*
 * Copyright (C) 2020 Collabora, Ltd.
 * SPDX-License-Identifier: MIT
 */

/* Autogenerated file, do not edit */


#ifndef _BI_BUILDER_H_
#define _BI_BUILDER_H_

#include "compiler.h"



static inline
bi_instr * bi_acmpstore_i32(bi_builder *b, bi_index src0, bi_index src1, bi_index src2, enum bi_seg seg)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (0 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_ACMPSTORE_I32;
    I->nr_dests = 0;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 0;


    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    I->seg = seg;
    bi_builder_insert(&b->cursor, I);
    return I;
}


static inline
bi_instr * bi_acmpstore_i64(bi_builder *b, bi_index src0, bi_index src1, bi_index src2, enum bi_seg seg)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (0 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_ACMPSTORE_I64;
    I->nr_dests = 0;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 0;


    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    I->seg = seg;
    bi_builder_insert(&b->cursor, I);
    return I;
}


static inline
bi_instr * bi_acmpstore(bi_builder *b, unsigned bitsize, bi_index src0, bi_index src1, bi_index src2, enum bi_seg seg)
{
    if (bitsize == 32)
        return (bi_acmpstore_i32(b, src0, src1, src2, seg));
    else if (bitsize == 64)
        return (bi_acmpstore_i64(b, src0, src1, src2, seg));
    else
        UNREACHABLE("Invalid parameters for ACMPSTORE");
}

static inline
bi_instr * bi_acmpxchg_i32_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bi_index src2, enum bi_seg seg)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_ACMPXCHG_I32;
    I->nr_dests = 1;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    I->seg = seg;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_acmpxchg_i32(bi_builder *b, bi_index src0, bi_index src1, bi_index src2, enum bi_seg seg)
{
    return (bi_acmpxchg_i32_to(b, bi_temp(b->shader), src0, src1, src2, seg))->dest[0];
}


static inline
bi_instr * bi_acmpxchg_i64_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bi_index src2, enum bi_seg seg)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_ACMPXCHG_I64;
    I->nr_dests = 1;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    I->seg = seg;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_acmpxchg_i64(bi_builder *b, bi_index src0, bi_index src1, bi_index src2, enum bi_seg seg)
{
    return (bi_acmpxchg_i64_to(b, bi_temp(b->shader), src0, src1, src2, seg))->dest[0];
}


static inline
bi_instr * bi_acmpxchg_to(bi_builder *b, unsigned bitsize, bi_index dest0, bi_index src0, bi_index src1, bi_index src2, enum bi_seg seg)
{
    if (bitsize == 32)
        return (bi_acmpxchg_i32_to(b, dest0, src0, src1, src2, seg));
    else if (bitsize == 64)
        return (bi_acmpxchg_i64_to(b, dest0, src0, src1, src2, seg));
    else
        UNREACHABLE("Invalid parameters for ACMPXCHG");
}

static inline
bi_index bi_acmpxchg(bi_builder *b, unsigned bitsize, bi_index src0, bi_index src1, bi_index src2, enum bi_seg seg)
{
    if (bitsize == 32)
        return (bi_acmpxchg_i32_to(b, bi_temp(b->shader), src0, src1, src2, seg))->dest[0];
    else if (bitsize == 64)
        return (bi_acmpxchg_i64_to(b, bi_temp(b->shader), src0, src1, src2, seg))->dest[0];
    else
        UNREACHABLE("Invalid parameters for ACMPXCHG");
}

static inline
bi_instr * bi_arshift_i32_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bi_index src2)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_ARSHIFT_I32;
    I->nr_dests = 1;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_arshift_i32(bi_builder *b, bi_index src0, bi_index src1, bi_index src2)
{
    return (bi_arshift_i32_to(b, bi_temp(b->shader), src0, src1, src2))->dest[0];
}


static inline
bi_instr * bi_arshift_v2i16_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bi_index src2)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_ARSHIFT_V2I16;
    I->nr_dests = 1;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_arshift_v2i16(bi_builder *b, bi_index src0, bi_index src1, bi_index src2)
{
    return (bi_arshift_v2i16_to(b, bi_temp(b->shader), src0, src1, src2))->dest[0];
}


static inline
bi_instr * bi_arshift_v4i8_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bi_index src2)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_ARSHIFT_V4I8;
    I->nr_dests = 1;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_arshift_v4i8(bi_builder *b, bi_index src0, bi_index src1, bi_index src2)
{
    return (bi_arshift_v4i8_to(b, bi_temp(b->shader), src0, src1, src2))->dest[0];
}


static inline
bi_instr * bi_arshift_to(bi_builder *b, unsigned bitsize, bi_index dest0, bi_index src0, bi_index src1, bi_index src2)
{
    if (bitsize == 32)
        return (bi_arshift_i32_to(b, dest0, src0, src1, src2));
    else if (bitsize == 16)
        return (bi_arshift_v2i16_to(b, dest0, src0, src1, src2));
    else if (bitsize == 8)
        return (bi_arshift_v4i8_to(b, dest0, src0, src1, src2));
    else
        UNREACHABLE("Invalid parameters for ARSHIFT");
}

static inline
bi_index bi_arshift(bi_builder *b, unsigned bitsize, bi_index src0, bi_index src1, bi_index src2)
{
    if (bitsize == 32)
        return (bi_arshift_i32_to(b, bi_temp(b->shader), src0, src1, src2))->dest[0];
    else if (bitsize == 16)
        return (bi_arshift_v2i16_to(b, bi_temp(b->shader), src0, src1, src2))->dest[0];
    else if (bitsize == 8)
        return (bi_arshift_v4i8_to(b, bi_temp(b->shader), src0, src1, src2))->dest[0];
    else
        UNREACHABLE("Invalid parameters for ARSHIFT");
}

static inline
bi_instr * bi_arshift_double_i32_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bi_index src2, bool bytes2, bool result_word)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_ARSHIFT_DOUBLE_I32;
    I->nr_dests = 1;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    I->bytes2 = bytes2;
    I->result_word = result_word;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_arshift_double_i32(bi_builder *b, bi_index src0, bi_index src1, bi_index src2, bool bytes2, bool result_word)
{
    return (bi_arshift_double_i32_to(b, bi_temp(b->shader), src0, src1, src2, bytes2, result_word))->dest[0];
}


static inline
bi_instr * bi_atest_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bi_index src2)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_ATEST;
    I->nr_dests = 1;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_atest(bi_builder *b, bi_index src0, bi_index src1, bi_index src2)
{
    return (bi_atest_to(b, bi_temp(b->shader), src0, src1, src2))->dest[0];
}


static inline
bi_instr * bi_atom_i32_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bi_index src2, enum bi_atom_opc atom_opc, uint32_t sr_count)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_ATOM_I32;
    I->nr_dests = 1;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    I->atom_opc = atom_opc;
    I->sr_count = sr_count;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_atom_i32(bi_builder *b, bi_index src0, bi_index src1, bi_index src2, enum bi_atom_opc atom_opc, uint32_t sr_count)
{
    return (bi_atom_i32_to(b, bi_temp(b->shader), src0, src1, src2, atom_opc, sr_count))->dest[0];
}


static inline
bi_instr * bi_atom_i64_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bi_index src2, enum bi_atom_opc atom_opc, uint32_t sr_count)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_ATOM_I64;
    I->nr_dests = 1;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    I->atom_opc = atom_opc;
    I->sr_count = sr_count;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_atom_i64(bi_builder *b, bi_index src0, bi_index src1, bi_index src2, enum bi_atom_opc atom_opc, uint32_t sr_count)
{
    return (bi_atom_i64_to(b, bi_temp(b->shader), src0, src1, src2, atom_opc, sr_count))->dest[0];
}


static inline
bi_instr * bi_atom_to(bi_builder *b, unsigned bitsize, bi_index dest0, bi_index src0, bi_index src1, bi_index src2, enum bi_atom_opc atom_opc, uint32_t sr_count)
{
    if (bitsize == 32)
        return (bi_atom_i32_to(b, dest0, src0, src1, src2, atom_opc, sr_count));
    else if (bitsize == 64)
        return (bi_atom_i64_to(b, dest0, src0, src1, src2, atom_opc, sr_count));
    else
        UNREACHABLE("Invalid parameters for ATOM");
}

static inline
bi_index bi_atom(bi_builder *b, unsigned bitsize, bi_index src0, bi_index src1, bi_index src2, enum bi_atom_opc atom_opc, uint32_t sr_count)
{
    if (bitsize == 32)
        return (bi_atom_i32_to(b, bi_temp(b->shader), src0, src1, src2, atom_opc, sr_count))->dest[0];
    else if (bitsize == 64)
        return (bi_atom_i64_to(b, bi_temp(b->shader), src0, src1, src2, atom_opc, sr_count))->dest[0];
    else
        UNREACHABLE("Invalid parameters for ATOM");
}

static inline
bi_instr * bi_atom1_return_i32_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, enum bi_atom_opc atom_opc, uint32_t sr_count)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 2);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_ATOM1_RETURN_I32;
    I->nr_dests = 1;
    I->nr_srcs = 2;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;

    I->atom_opc = atom_opc;
    I->sr_count = sr_count;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_atom1_return_i32(bi_builder *b, bi_index src0, bi_index src1, enum bi_atom_opc atom_opc, uint32_t sr_count)
{
    return (bi_atom1_return_i32_to(b, bi_temp(b->shader), src0, src1, atom_opc, sr_count))->dest[0];
}


static inline
bi_instr * bi_atom1_return_i64_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, enum bi_atom_opc atom_opc, uint32_t sr_count)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 2);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_ATOM1_RETURN_I64;
    I->nr_dests = 1;
    I->nr_srcs = 2;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;

    I->atom_opc = atom_opc;
    I->sr_count = sr_count;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_atom1_return_i64(bi_builder *b, bi_index src0, bi_index src1, enum bi_atom_opc atom_opc, uint32_t sr_count)
{
    return (bi_atom1_return_i64_to(b, bi_temp(b->shader), src0, src1, atom_opc, sr_count))->dest[0];
}


static inline
bi_instr * bi_atom1_return_to(bi_builder *b, unsigned bitsize, bi_index dest0, bi_index src0, bi_index src1, enum bi_atom_opc atom_opc, uint32_t sr_count)
{
    if (bitsize == 32)
        return (bi_atom1_return_i32_to(b, dest0, src0, src1, atom_opc, sr_count));
    else if (bitsize == 64)
        return (bi_atom1_return_i64_to(b, dest0, src0, src1, atom_opc, sr_count));
    else
        UNREACHABLE("Invalid parameters for ATOM1_RETURN");
}

static inline
bi_index bi_atom1_return(bi_builder *b, unsigned bitsize, bi_index src0, bi_index src1, enum bi_atom_opc atom_opc, uint32_t sr_count)
{
    if (bitsize == 32)
        return (bi_atom1_return_i32_to(b, bi_temp(b->shader), src0, src1, atom_opc, sr_count))->dest[0];
    else if (bitsize == 64)
        return (bi_atom1_return_i64_to(b, bi_temp(b->shader), src0, src1, atom_opc, sr_count))->dest[0];
    else
        UNREACHABLE("Invalid parameters for ATOM1_RETURN");
}

static inline
bi_instr * bi_atom_c_i32(bi_builder *b, bi_index src0, bi_index src1, bi_index src2, enum bi_atom_opc atom_opc)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (0 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_ATOM_C_I32;
    I->nr_dests = 0;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 0;


    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    I->atom_opc = atom_opc;
    bi_builder_insert(&b->cursor, I);
    return I;
}


static inline
bi_instr * bi_atom_c_i64(bi_builder *b, bi_index src0, bi_index src1, bi_index src2, enum bi_atom_opc atom_opc)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (0 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_ATOM_C_I64;
    I->nr_dests = 0;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 0;


    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    I->atom_opc = atom_opc;
    bi_builder_insert(&b->cursor, I);
    return I;
}


static inline
bi_instr * bi_atom_c(bi_builder *b, unsigned bitsize, bi_index src0, bi_index src1, bi_index src2, enum bi_atom_opc atom_opc)
{
    if (bitsize == 32)
        return (bi_atom_c_i32(b, src0, src1, src2, atom_opc));
    else if (bitsize == 64)
        return (bi_atom_c_i64(b, src0, src1, src2, atom_opc));
    else
        UNREACHABLE("Invalid parameters for ATOM_C");
}

static inline
bi_instr * bi_atom_c1_i32(bi_builder *b, bi_index src0, bi_index src1, enum bi_atom_opc atom_opc)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (0 + 2);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_ATOM_C1_I32;
    I->nr_dests = 0;
    I->nr_srcs = 2;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 0;


    I->src[0] = src0;
    I->src[1] = src1;

    I->atom_opc = atom_opc;
    bi_builder_insert(&b->cursor, I);
    return I;
}


static inline
bi_instr * bi_atom_c1_i64(bi_builder *b, bi_index src0, bi_index src1, enum bi_atom_opc atom_opc)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (0 + 2);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_ATOM_C1_I64;
    I->nr_dests = 0;
    I->nr_srcs = 2;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 0;


    I->src[0] = src0;
    I->src[1] = src1;

    I->atom_opc = atom_opc;
    bi_builder_insert(&b->cursor, I);
    return I;
}


static inline
bi_instr * bi_atom_c1(bi_builder *b, unsigned bitsize, bi_index src0, bi_index src1, enum bi_atom_opc atom_opc)
{
    if (bitsize == 32)
        return (bi_atom_c1_i32(b, src0, src1, atom_opc));
    else if (bitsize == 64)
        return (bi_atom_c1_i64(b, src0, src1, atom_opc));
    else
        UNREACHABLE("Invalid parameters for ATOM_C1");
}

static inline
bi_instr * bi_atom_c1_return_i32(bi_builder *b, bi_index src0, bi_index src1, enum bi_atom_opc atom_opc)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (0 + 2);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_ATOM_C1_RETURN_I32;
    I->nr_dests = 0;
    I->nr_srcs = 2;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 0;


    I->src[0] = src0;
    I->src[1] = src1;

    I->atom_opc = atom_opc;
    bi_builder_insert(&b->cursor, I);
    return I;
}


static inline
bi_instr * bi_atom_c1_return_i64(bi_builder *b, bi_index src0, bi_index src1, enum bi_atom_opc atom_opc)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (0 + 2);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_ATOM_C1_RETURN_I64;
    I->nr_dests = 0;
    I->nr_srcs = 2;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 0;


    I->src[0] = src0;
    I->src[1] = src1;

    I->atom_opc = atom_opc;
    bi_builder_insert(&b->cursor, I);
    return I;
}


static inline
bi_instr * bi_atom_c1_return(bi_builder *b, unsigned bitsize, bi_index src0, bi_index src1, enum bi_atom_opc atom_opc)
{
    if (bitsize == 32)
        return (bi_atom_c1_return_i32(b, src0, src1, atom_opc));
    else if (bitsize == 64)
        return (bi_atom_c1_return_i64(b, src0, src1, atom_opc));
    else
        UNREACHABLE("Invalid parameters for ATOM_C1_RETURN");
}

static inline
bi_instr * bi_atom_cx_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bi_index src2, bi_index src3, uint32_t sr_count)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 4);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_ATOM_CX;
    I->nr_dests = 1;
    I->nr_srcs = 4;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;
    I->src[3] = src3;

    I->sr_count = sr_count;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_atom_cx(bi_builder *b, bi_index src0, bi_index src1, bi_index src2, bi_index src3, uint32_t sr_count)
{
    return (bi_atom_cx_to(b, bi_temp(b->shader), src0, src1, src2, src3, sr_count))->dest[0];
}


static inline
bi_instr * bi_atom_c_return_i32(bi_builder *b, bi_index src0, bi_index src1, bi_index src2, enum bi_atom_opc atom_opc)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (0 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_ATOM_C_RETURN_I32;
    I->nr_dests = 0;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 0;


    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    I->atom_opc = atom_opc;
    bi_builder_insert(&b->cursor, I);
    return I;
}


static inline
bi_instr * bi_atom_c_return_i64(bi_builder *b, bi_index src0, bi_index src1, bi_index src2, enum bi_atom_opc atom_opc)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (0 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_ATOM_C_RETURN_I64;
    I->nr_dests = 0;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 0;


    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    I->atom_opc = atom_opc;
    bi_builder_insert(&b->cursor, I);
    return I;
}


static inline
bi_instr * bi_atom_c_return(bi_builder *b, unsigned bitsize, bi_index src0, bi_index src1, bi_index src2, enum bi_atom_opc atom_opc)
{
    if (bitsize == 32)
        return (bi_atom_c_return_i32(b, src0, src1, src2, atom_opc));
    else if (bitsize == 64)
        return (bi_atom_c_return_i64(b, src0, src1, src2, atom_opc));
    else
        UNREACHABLE("Invalid parameters for ATOM_C_RETURN");
}

static inline
bi_instr * bi_atom_post_i32_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, enum bi_atom_opc atom_opc)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 2);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_ATOM_POST_I32;
    I->nr_dests = 1;
    I->nr_srcs = 2;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;

    I->atom_opc = atom_opc;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_atom_post_i32(bi_builder *b, bi_index src0, bi_index src1, enum bi_atom_opc atom_opc)
{
    return (bi_atom_post_i32_to(b, bi_temp(b->shader), src0, src1, atom_opc))->dest[0];
}


static inline
bi_instr * bi_atom_post_i64_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, enum bi_atom_opc atom_opc)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 2);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_ATOM_POST_I64;
    I->nr_dests = 1;
    I->nr_srcs = 2;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;

    I->atom_opc = atom_opc;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_atom_post_i64(bi_builder *b, bi_index src0, bi_index src1, enum bi_atom_opc atom_opc)
{
    return (bi_atom_post_i64_to(b, bi_temp(b->shader), src0, src1, atom_opc))->dest[0];
}


static inline
bi_instr * bi_atom_post_to(bi_builder *b, unsigned bitsize, bi_index dest0, bi_index src0, bi_index src1, enum bi_atom_opc atom_opc)
{
    if (bitsize == 32)
        return (bi_atom_post_i32_to(b, dest0, src0, src1, atom_opc));
    else if (bitsize == 64)
        return (bi_atom_post_i64_to(b, dest0, src0, src1, atom_opc));
    else
        UNREACHABLE("Invalid parameters for ATOM_POST");
}

static inline
bi_index bi_atom_post(bi_builder *b, unsigned bitsize, bi_index src0, bi_index src1, enum bi_atom_opc atom_opc)
{
    if (bitsize == 32)
        return (bi_atom_post_i32_to(b, bi_temp(b->shader), src0, src1, atom_opc))->dest[0];
    else if (bitsize == 64)
        return (bi_atom_post_i64_to(b, bi_temp(b->shader), src0, src1, atom_opc))->dest[0];
    else
        UNREACHABLE("Invalid parameters for ATOM_POST");
}

static inline
bi_instr * bi_atom_pre_i64_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bi_index src2, enum bi_atom_opc atom_opc)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_ATOM_PRE_I64;
    I->nr_dests = 1;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    I->atom_opc = atom_opc;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_atom_pre_i64(bi_builder *b, bi_index src0, bi_index src1, bi_index src2, enum bi_atom_opc atom_opc)
{
    return (bi_atom_pre_i64_to(b, bi_temp(b->shader), src0, src1, src2, atom_opc))->dest[0];
}


static inline
bi_instr * bi_atom_return_i32_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bi_index src2, enum bi_atom_opc atom_opc, uint32_t sr_count)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_ATOM_RETURN_I32;
    I->nr_dests = 1;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    I->atom_opc = atom_opc;
    I->sr_count = sr_count;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_atom_return_i32(bi_builder *b, bi_index src0, bi_index src1, bi_index src2, enum bi_atom_opc atom_opc, uint32_t sr_count)
{
    return (bi_atom_return_i32_to(b, bi_temp(b->shader), src0, src1, src2, atom_opc, sr_count))->dest[0];
}


static inline
bi_instr * bi_atom_return_i64_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bi_index src2, enum bi_atom_opc atom_opc, uint32_t sr_count)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_ATOM_RETURN_I64;
    I->nr_dests = 1;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    I->atom_opc = atom_opc;
    I->sr_count = sr_count;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_atom_return_i64(bi_builder *b, bi_index src0, bi_index src1, bi_index src2, enum bi_atom_opc atom_opc, uint32_t sr_count)
{
    return (bi_atom_return_i64_to(b, bi_temp(b->shader), src0, src1, src2, atom_opc, sr_count))->dest[0];
}


static inline
bi_instr * bi_atom_return_to(bi_builder *b, unsigned bitsize, bi_index dest0, bi_index src0, bi_index src1, bi_index src2, enum bi_atom_opc atom_opc, uint32_t sr_count)
{
    if (bitsize == 32)
        return (bi_atom_return_i32_to(b, dest0, src0, src1, src2, atom_opc, sr_count));
    else if (bitsize == 64)
        return (bi_atom_return_i64_to(b, dest0, src0, src1, src2, atom_opc, sr_count));
    else
        UNREACHABLE("Invalid parameters for ATOM_RETURN");
}

static inline
bi_index bi_atom_return(bi_builder *b, unsigned bitsize, bi_index src0, bi_index src1, bi_index src2, enum bi_atom_opc atom_opc, uint32_t sr_count)
{
    if (bitsize == 32)
        return (bi_atom_return_i32_to(b, bi_temp(b->shader), src0, src1, src2, atom_opc, sr_count))->dest[0];
    else if (bitsize == 64)
        return (bi_atom_return_i64_to(b, bi_temp(b->shader), src0, src1, src2, atom_opc, sr_count))->dest[0];
    else
        UNREACHABLE("Invalid parameters for ATOM_RETURN");
}

static inline
bi_instr * bi_axchg_i32_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bi_index src2, enum bi_seg seg)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_AXCHG_I32;
    I->nr_dests = 1;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    I->seg = seg;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_axchg_i32(bi_builder *b, bi_index src0, bi_index src1, bi_index src2, enum bi_seg seg)
{
    return (bi_axchg_i32_to(b, bi_temp(b->shader), src0, src1, src2, seg))->dest[0];
}


static inline
bi_instr * bi_axchg_i64_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bi_index src2, enum bi_seg seg)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_AXCHG_I64;
    I->nr_dests = 1;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    I->seg = seg;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_axchg_i64(bi_builder *b, bi_index src0, bi_index src1, bi_index src2, enum bi_seg seg)
{
    return (bi_axchg_i64_to(b, bi_temp(b->shader), src0, src1, src2, seg))->dest[0];
}


static inline
bi_instr * bi_axchg_to(bi_builder *b, unsigned bitsize, bi_index dest0, bi_index src0, bi_index src1, bi_index src2, enum bi_seg seg)
{
    if (bitsize == 32)
        return (bi_axchg_i32_to(b, dest0, src0, src1, src2, seg));
    else if (bitsize == 64)
        return (bi_axchg_i64_to(b, dest0, src0, src1, src2, seg));
    else
        UNREACHABLE("Invalid parameters for AXCHG");
}

static inline
bi_index bi_axchg(bi_builder *b, unsigned bitsize, bi_index src0, bi_index src1, bi_index src2, enum bi_seg seg)
{
    if (bitsize == 32)
        return (bi_axchg_i32_to(b, bi_temp(b->shader), src0, src1, src2, seg))->dest[0];
    else if (bitsize == 64)
        return (bi_axchg_i64_to(b, bi_temp(b->shader), src0, src1, src2, seg))->dest[0];
    else
        UNREACHABLE("Invalid parameters for AXCHG");
}

static inline
bi_instr * bi_barrier(bi_builder *b)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (0 + 0);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_BARRIER;
    I->nr_dests = 0;
    I->nr_srcs = 0;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 0;



    bi_builder_insert(&b->cursor, I);
    return I;
}


static inline
bi_instr * bi_bitrev_i32_to(bi_builder *b, bi_index dest0, bi_index src0)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 1);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_BITREV_I32;
    I->nr_dests = 1;
    I->nr_srcs = 1;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;

    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_bitrev_i32(bi_builder *b, bi_index src0)
{
    return (bi_bitrev_i32_to(b, bi_temp(b->shader), src0))->dest[0];
}


static inline
bi_instr * bi_blend_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bi_index src2, bi_index src3, bi_index src4, enum bi_register_format register_format, uint32_t sr_count, uint32_t sr_count_2)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 5);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_BLEND;
    I->nr_dests = 1;
    I->nr_srcs = 5;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;
    I->src[3] = src3;
    I->src[4] = src4;

    I->register_format = register_format;
    I->sr_count = sr_count;
    I->sr_count_2 = sr_count_2;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_blend(bi_builder *b, bi_index src0, bi_index src1, bi_index src2, bi_index src3, bi_index src4, enum bi_register_format register_format, uint32_t sr_count, uint32_t sr_count_2)
{
    return (bi_blend_to(b, bi_temp(b->shader), src0, src1, src2, src3, src4, register_format, sr_count, sr_count_2))->dest[0];
}


static inline
bi_instr * bi_branch_f16(bi_builder *b, bi_index src0, bi_index src1, bi_index src2, enum bi_cmpf cmpf)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (0 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_BRANCH_F16;
    I->nr_dests = 0;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 0;


    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    I->cmpf = cmpf;
    bi_builder_insert(&b->cursor, I);
    return I;
}


static inline
bi_instr * bi_branch_f32(bi_builder *b, bi_index src0, bi_index src1, bi_index src2, enum bi_cmpf cmpf)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (0 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_BRANCH_F32;
    I->nr_dests = 0;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 0;


    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    I->cmpf = cmpf;
    bi_builder_insert(&b->cursor, I);
    return I;
}


static inline
bi_instr * bi_branch_i16(bi_builder *b, bi_index src0, bi_index src1, bi_index src2, enum bi_cmpf cmpf)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (0 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_BRANCH_I16;
    I->nr_dests = 0;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 0;


    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    I->cmpf = cmpf;
    bi_builder_insert(&b->cursor, I);
    return I;
}


static inline
bi_instr * bi_branch_i32(bi_builder *b, bi_index src0, bi_index src1, bi_index src2, enum bi_cmpf cmpf)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (0 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_BRANCH_I32;
    I->nr_dests = 0;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 0;


    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    I->cmpf = cmpf;
    bi_builder_insert(&b->cursor, I);
    return I;
}


static inline
bi_instr * bi_branch_s16(bi_builder *b, bi_index src0, bi_index src1, bi_index src2, enum bi_cmpf cmpf)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (0 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_BRANCH_S16;
    I->nr_dests = 0;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 0;


    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    I->cmpf = cmpf;
    bi_builder_insert(&b->cursor, I);
    return I;
}


static inline
bi_instr * bi_branch_s32(bi_builder *b, bi_index src0, bi_index src1, bi_index src2, enum bi_cmpf cmpf)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (0 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_BRANCH_S32;
    I->nr_dests = 0;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 0;


    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    I->cmpf = cmpf;
    bi_builder_insert(&b->cursor, I);
    return I;
}


static inline
bi_instr * bi_branch_u16(bi_builder *b, bi_index src0, bi_index src1, bi_index src2, enum bi_cmpf cmpf)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (0 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_BRANCH_U16;
    I->nr_dests = 0;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 0;


    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    I->cmpf = cmpf;
    bi_builder_insert(&b->cursor, I);
    return I;
}


static inline
bi_instr * bi_branch_u32(bi_builder *b, bi_index src0, bi_index src1, bi_index src2, enum bi_cmpf cmpf)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (0 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_BRANCH_U32;
    I->nr_dests = 0;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 0;


    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    I->cmpf = cmpf;
    bi_builder_insert(&b->cursor, I);
    return I;
}


static inline
bi_instr * bi_branch(bi_builder *b, nir_alu_type type, unsigned bitsize, bi_index src0, bi_index src1, bi_index src2, enum bi_cmpf cmpf)
{
    if ((type == nir_type_float) && bitsize == 16 && (cmpf == BI_CMPF_EQ || cmpf == BI_CMPF_GT || cmpf == BI_CMPF_GE || cmpf == BI_CMPF_NE || cmpf == BI_CMPF_LT || cmpf == BI_CMPF_LE))
        return (bi_branch_f16(b, src0, src1, src2, cmpf));
    else if ((type == nir_type_float) && bitsize == 32 && (cmpf == BI_CMPF_EQ || cmpf == BI_CMPF_GT || cmpf == BI_CMPF_GE || cmpf == BI_CMPF_NE || cmpf == BI_CMPF_LT || cmpf == BI_CMPF_LE))
        return (bi_branch_f32(b, src0, src1, src2, cmpf));
    else if ((type == nir_type_uint || type == nir_type_int) && bitsize == 16 && (cmpf == BI_CMPF_EQ || cmpf == BI_CMPF_NE))
        return (bi_branch_i16(b, src0, src1, src2, cmpf));
    else if ((type == nir_type_uint || type == nir_type_int) && bitsize == 32 && (cmpf == BI_CMPF_EQ || cmpf == BI_CMPF_NE))
        return (bi_branch_i32(b, src0, src1, src2, cmpf));
    else if ((type == nir_type_int) && bitsize == 16 && (cmpf == BI_CMPF_GT || cmpf == BI_CMPF_GE || cmpf == BI_CMPF_LT || cmpf == BI_CMPF_LE))
        return (bi_branch_s16(b, src0, src1, src2, cmpf));
    else if ((type == nir_type_int) && bitsize == 32 && (cmpf == BI_CMPF_GT || cmpf == BI_CMPF_GE || cmpf == BI_CMPF_LT || cmpf == BI_CMPF_LE))
        return (bi_branch_s32(b, src0, src1, src2, cmpf));
    else if ((type == nir_type_uint) && bitsize == 16 && (cmpf == BI_CMPF_GT || cmpf == BI_CMPF_GE || cmpf == BI_CMPF_LT || cmpf == BI_CMPF_LE))
        return (bi_branch_u16(b, src0, src1, src2, cmpf));
    else if ((type == nir_type_uint) && bitsize == 32 && (cmpf == BI_CMPF_GT || cmpf == BI_CMPF_GE || cmpf == BI_CMPF_LT || cmpf == BI_CMPF_LE))
        return (bi_branch_u32(b, src0, src1, src2, cmpf));
    else
        UNREACHABLE("Invalid parameters for BRANCH");
}

static inline
bi_instr * bi_branchc_i16(bi_builder *b, bi_index src0, bi_index src1, bool combine)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (0 + 2);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_BRANCHC_I16;
    I->nr_dests = 0;
    I->nr_srcs = 2;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 0;


    I->src[0] = src0;
    I->src[1] = src1;

    I->combine = combine;
    bi_builder_insert(&b->cursor, I);
    return I;
}


static inline
bi_instr * bi_branchc_i32(bi_builder *b, bi_index src0, bi_index src1, bool combine)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (0 + 2);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_BRANCHC_I32;
    I->nr_dests = 0;
    I->nr_srcs = 2;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 0;


    I->src[0] = src0;
    I->src[1] = src1;

    I->combine = combine;
    bi_builder_insert(&b->cursor, I);
    return I;
}


static inline
bi_instr * bi_branchc(bi_builder *b, unsigned bitsize, bi_index src0, bi_index src1, bool combine)
{
    if (bitsize == 16)
        return (bi_branchc_i16(b, src0, src1, combine));
    else if (bitsize == 32)
        return (bi_branchc_i32(b, src0, src1, combine));
    else
        UNREACHABLE("Invalid parameters for BRANCHC");
}

static inline
bi_instr * bi_branchz_f16(bi_builder *b, bi_index src0, bi_index src1, enum bi_cmpf cmpf)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (0 + 2);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_BRANCHZ_F16;
    I->nr_dests = 0;
    I->nr_srcs = 2;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 0;


    I->src[0] = src0;
    I->src[1] = src1;

    I->cmpf = cmpf;
    bi_builder_insert(&b->cursor, I);
    return I;
}


static inline
bi_instr * bi_branchz_f32(bi_builder *b, bi_index src0, bi_index src1, enum bi_cmpf cmpf)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (0 + 2);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_BRANCHZ_F32;
    I->nr_dests = 0;
    I->nr_srcs = 2;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 0;


    I->src[0] = src0;
    I->src[1] = src1;

    I->cmpf = cmpf;
    bi_builder_insert(&b->cursor, I);
    return I;
}


static inline
bi_instr * bi_branchz_i16(bi_builder *b, bi_index src0, bi_index src1, enum bi_cmpf cmpf)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (0 + 2);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_BRANCHZ_I16;
    I->nr_dests = 0;
    I->nr_srcs = 2;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 0;


    I->src[0] = src0;
    I->src[1] = src1;

    I->cmpf = cmpf;
    bi_builder_insert(&b->cursor, I);
    return I;
}


static inline
bi_instr * bi_branchz_i32(bi_builder *b, bi_index src0, bi_index src1, enum bi_cmpf cmpf)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (0 + 2);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_BRANCHZ_I32;
    I->nr_dests = 0;
    I->nr_srcs = 2;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 0;


    I->src[0] = src0;
    I->src[1] = src1;

    I->cmpf = cmpf;
    bi_builder_insert(&b->cursor, I);
    return I;
}


static inline
bi_instr * bi_branchz_s16(bi_builder *b, bi_index src0, bi_index src1, enum bi_cmpf cmpf)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (0 + 2);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_BRANCHZ_S16;
    I->nr_dests = 0;
    I->nr_srcs = 2;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 0;


    I->src[0] = src0;
    I->src[1] = src1;

    I->cmpf = cmpf;
    bi_builder_insert(&b->cursor, I);
    return I;
}


static inline
bi_instr * bi_branchz_s32(bi_builder *b, bi_index src0, bi_index src1, enum bi_cmpf cmpf)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (0 + 2);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_BRANCHZ_S32;
    I->nr_dests = 0;
    I->nr_srcs = 2;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 0;


    I->src[0] = src0;
    I->src[1] = src1;

    I->cmpf = cmpf;
    bi_builder_insert(&b->cursor, I);
    return I;
}


static inline
bi_instr * bi_branchz_u16(bi_builder *b, bi_index src0, bi_index src1, enum bi_cmpf cmpf)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (0 + 2);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_BRANCHZ_U16;
    I->nr_dests = 0;
    I->nr_srcs = 2;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 0;


    I->src[0] = src0;
    I->src[1] = src1;

    I->cmpf = cmpf;
    bi_builder_insert(&b->cursor, I);
    return I;
}


static inline
bi_instr * bi_branchz_u32(bi_builder *b, bi_index src0, bi_index src1, enum bi_cmpf cmpf)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (0 + 2);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_BRANCHZ_U32;
    I->nr_dests = 0;
    I->nr_srcs = 2;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 0;


    I->src[0] = src0;
    I->src[1] = src1;

    I->cmpf = cmpf;
    bi_builder_insert(&b->cursor, I);
    return I;
}


static inline
bi_instr * bi_branchz(bi_builder *b, nir_alu_type type, unsigned bitsize, bi_index src0, bi_index src1, enum bi_cmpf cmpf)
{
    if ((type == nir_type_float) && bitsize == 16 && (cmpf == BI_CMPF_EQ || cmpf == BI_CMPF_GT || cmpf == BI_CMPF_GE || cmpf == BI_CMPF_NE || cmpf == BI_CMPF_LT || cmpf == BI_CMPF_LE))
        return (bi_branchz_f16(b, src0, src1, cmpf));
    else if ((type == nir_type_float) && bitsize == 32 && (cmpf == BI_CMPF_EQ || cmpf == BI_CMPF_GT || cmpf == BI_CMPF_GE || cmpf == BI_CMPF_NE || cmpf == BI_CMPF_LT || cmpf == BI_CMPF_LE))
        return (bi_branchz_f32(b, src0, src1, cmpf));
    else if ((type == nir_type_uint || type == nir_type_int) && bitsize == 16 && (cmpf == BI_CMPF_EQ || cmpf == BI_CMPF_NE))
        return (bi_branchz_i16(b, src0, src1, cmpf));
    else if ((type == nir_type_uint || type == nir_type_int) && bitsize == 32 && (cmpf == BI_CMPF_EQ || cmpf == BI_CMPF_NE))
        return (bi_branchz_i32(b, src0, src1, cmpf));
    else if ((type == nir_type_int) && bitsize == 16 && (cmpf == BI_CMPF_GT || cmpf == BI_CMPF_GE || cmpf == BI_CMPF_LT || cmpf == BI_CMPF_LE))
        return (bi_branchz_s16(b, src0, src1, cmpf));
    else if ((type == nir_type_int) && bitsize == 32 && (cmpf == BI_CMPF_GT || cmpf == BI_CMPF_GE || cmpf == BI_CMPF_LT || cmpf == BI_CMPF_LE))
        return (bi_branchz_s32(b, src0, src1, cmpf));
    else if ((type == nir_type_uint) && bitsize == 16 && (cmpf == BI_CMPF_GT || cmpf == BI_CMPF_GE || cmpf == BI_CMPF_LT || cmpf == BI_CMPF_LE))
        return (bi_branchz_u16(b, src0, src1, cmpf));
    else if ((type == nir_type_uint) && bitsize == 32 && (cmpf == BI_CMPF_GT || cmpf == BI_CMPF_GE || cmpf == BI_CMPF_LT || cmpf == BI_CMPF_LE))
        return (bi_branchz_u32(b, src0, src1, cmpf));
    else
        UNREACHABLE("Invalid parameters for BRANCHZ");
}

static inline
bi_instr * bi_branchzi(bi_builder *b, bi_index src0, bi_index src1, enum bi_cmpf cmpf)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (0 + 2);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_BRANCHZI;
    I->nr_dests = 0;
    I->nr_srcs = 2;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 0;


    I->src[0] = src0;
    I->src[1] = src1;

    I->cmpf = cmpf;
    bi_builder_insert(&b->cursor, I);
    return I;
}


static inline
bi_instr * bi_branch_diverg(bi_builder *b, bi_index src0)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (0 + 1);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_BRANCH_DIVERG;
    I->nr_dests = 0;
    I->nr_srcs = 1;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 0;


    I->src[0] = src0;

    bi_builder_insert(&b->cursor, I);
    return I;
}


static inline
bi_instr * bi_branch_lowbits_f32(bi_builder *b, bi_index src0, bi_index src1)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (0 + 2);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_BRANCH_LOWBITS_F32;
    I->nr_dests = 0;
    I->nr_srcs = 2;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 0;


    I->src[0] = src0;
    I->src[1] = src1;

    bi_builder_insert(&b->cursor, I);
    return I;
}


static inline
bi_instr * bi_branch_no_diverg(bi_builder *b, bi_index src0)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (0 + 1);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_BRANCH_NO_DIVERG;
    I->nr_dests = 0;
    I->nr_srcs = 1;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 0;


    I->src[0] = src0;

    bi_builder_insert(&b->cursor, I);
    return I;
}


static inline
bi_instr * bi_clper_i32_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, enum bi_inactive_result inactive_result, enum bi_lane_op lane_op, enum bi_subgroup subgroup)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 2);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_CLPER_I32;
    I->nr_dests = 1;
    I->nr_srcs = 2;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;

    I->lane_op = lane_op;
    I->subgroup = subgroup;
    I->inactive_result = inactive_result;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_clper_i32(bi_builder *b, bi_index src0, bi_index src1, enum bi_inactive_result inactive_result, enum bi_lane_op lane_op, enum bi_subgroup subgroup)
{
    return (bi_clper_i32_to(b, bi_temp(b->shader), src0, src1, inactive_result, lane_op, subgroup))->dest[0];
}


static inline
bi_instr * bi_clper_old_i32_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 2);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_CLPER_OLD_I32;
    I->nr_dests = 1;
    I->nr_srcs = 2;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;

    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_clper_old_i32(bi_builder *b, bi_index src0, bi_index src1)
{
    return (bi_clper_old_i32_to(b, bi_temp(b->shader), src0, src1))->dest[0];
}


static inline
bi_instr * bi_clz_u32_to(bi_builder *b, bi_index dest0, bi_index src0, bool mask)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 1);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_CLZ_U32;
    I->nr_dests = 1;
    I->nr_srcs = 1;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;

    I->mask = mask;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_clz_u32(bi_builder *b, bi_index src0, bool mask)
{
    return (bi_clz_u32_to(b, bi_temp(b->shader), src0, mask))->dest[0];
}


static inline
bi_instr * bi_clz_v2u16_to(bi_builder *b, bi_index dest0, bi_index src0, bool mask)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 1);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_CLZ_V2U16;
    I->nr_dests = 1;
    I->nr_srcs = 1;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;

    I->mask = mask;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_clz_v2u16(bi_builder *b, bi_index src0, bool mask)
{
    return (bi_clz_v2u16_to(b, bi_temp(b->shader), src0, mask))->dest[0];
}


static inline
bi_instr * bi_clz_v4u8_to(bi_builder *b, bi_index dest0, bi_index src0, bool mask)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 1);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_CLZ_V4U8;
    I->nr_dests = 1;
    I->nr_srcs = 1;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;

    I->mask = mask;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_clz_v4u8(bi_builder *b, bi_index src0, bool mask)
{
    return (bi_clz_v4u8_to(b, bi_temp(b->shader), src0, mask))->dest[0];
}


static inline
bi_instr * bi_clz_to(bi_builder *b, unsigned bitsize, bi_index dest0, bi_index src0, bool mask)
{
    if (bitsize == 32)
        return (bi_clz_u32_to(b, dest0, src0, mask));
    else if (bitsize == 16)
        return (bi_clz_v2u16_to(b, dest0, src0, mask));
    else if (bitsize == 8)
        return (bi_clz_v4u8_to(b, dest0, src0, mask));
    else
        UNREACHABLE("Invalid parameters for CLZ");
}

static inline
bi_index bi_clz(bi_builder *b, unsigned bitsize, bi_index src0, bool mask)
{
    if (bitsize == 32)
        return (bi_clz_u32_to(b, bi_temp(b->shader), src0, mask))->dest[0];
    else if (bitsize == 16)
        return (bi_clz_v2u16_to(b, bi_temp(b->shader), src0, mask))->dest[0];
    else if (bitsize == 8)
        return (bi_clz_v4u8_to(b, bi_temp(b->shader), src0, mask))->dest[0];
    else
        UNREACHABLE("Invalid parameters for CLZ");
}

static inline
bi_instr * bi_collect_i32_to(bi_builder *b, bi_index dest0, unsigned nr_srcs)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + nr_srcs);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_COLLECT_I32;
    I->nr_dests = 1;
    I->nr_srcs = nr_srcs;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;


    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_collect_i32(bi_builder *b, unsigned nr_srcs)
{
    return (bi_collect_i32_to(b, bi_temp(b->shader), nr_srcs))->dest[0];
}


static inline
bi_instr * bi_csel_f32_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bi_index src2, bi_index src3, enum bi_cmpf cmpf)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 4);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_CSEL_F32;
    I->nr_dests = 1;
    I->nr_srcs = 4;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;
    I->src[3] = src3;

    I->cmpf = cmpf;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_csel_f32(bi_builder *b, bi_index src0, bi_index src1, bi_index src2, bi_index src3, enum bi_cmpf cmpf)
{
    return (bi_csel_f32_to(b, bi_temp(b->shader), src0, src1, src2, src3, cmpf))->dest[0];
}


static inline
bi_instr * bi_csel_i32_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bi_index src2, bi_index src3, enum bi_cmpf cmpf)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 4);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_CSEL_I32;
    I->nr_dests = 1;
    I->nr_srcs = 4;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;
    I->src[3] = src3;

    I->cmpf = cmpf;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_csel_i32(bi_builder *b, bi_index src0, bi_index src1, bi_index src2, bi_index src3, enum bi_cmpf cmpf)
{
    return (bi_csel_i32_to(b, bi_temp(b->shader), src0, src1, src2, src3, cmpf))->dest[0];
}


static inline
bi_instr * bi_csel_s32_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bi_index src2, bi_index src3, enum bi_cmpf cmpf)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 4);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_CSEL_S32;
    I->nr_dests = 1;
    I->nr_srcs = 4;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;
    I->src[3] = src3;

    I->cmpf = cmpf;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_csel_s32(bi_builder *b, bi_index src0, bi_index src1, bi_index src2, bi_index src3, enum bi_cmpf cmpf)
{
    return (bi_csel_s32_to(b, bi_temp(b->shader), src0, src1, src2, src3, cmpf))->dest[0];
}


static inline
bi_instr * bi_csel_u32_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bi_index src2, bi_index src3, enum bi_cmpf cmpf)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 4);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_CSEL_U32;
    I->nr_dests = 1;
    I->nr_srcs = 4;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;
    I->src[3] = src3;

    I->cmpf = cmpf;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_csel_u32(bi_builder *b, bi_index src0, bi_index src1, bi_index src2, bi_index src3, enum bi_cmpf cmpf)
{
    return (bi_csel_u32_to(b, bi_temp(b->shader), src0, src1, src2, src3, cmpf))->dest[0];
}


static inline
bi_instr * bi_csel_v2f16_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bi_index src2, bi_index src3, enum bi_cmpf cmpf)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 4);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_CSEL_V2F16;
    I->nr_dests = 1;
    I->nr_srcs = 4;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;
    I->src[3] = src3;

    I->cmpf = cmpf;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_csel_v2f16(bi_builder *b, bi_index src0, bi_index src1, bi_index src2, bi_index src3, enum bi_cmpf cmpf)
{
    return (bi_csel_v2f16_to(b, bi_temp(b->shader), src0, src1, src2, src3, cmpf))->dest[0];
}


static inline
bi_instr * bi_csel_v2i16_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bi_index src2, bi_index src3, enum bi_cmpf cmpf)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 4);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_CSEL_V2I16;
    I->nr_dests = 1;
    I->nr_srcs = 4;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;
    I->src[3] = src3;

    I->cmpf = cmpf;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_csel_v2i16(bi_builder *b, bi_index src0, bi_index src1, bi_index src2, bi_index src3, enum bi_cmpf cmpf)
{
    return (bi_csel_v2i16_to(b, bi_temp(b->shader), src0, src1, src2, src3, cmpf))->dest[0];
}


static inline
bi_instr * bi_csel_v2s16_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bi_index src2, bi_index src3, enum bi_cmpf cmpf)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 4);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_CSEL_V2S16;
    I->nr_dests = 1;
    I->nr_srcs = 4;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;
    I->src[3] = src3;

    I->cmpf = cmpf;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_csel_v2s16(bi_builder *b, bi_index src0, bi_index src1, bi_index src2, bi_index src3, enum bi_cmpf cmpf)
{
    return (bi_csel_v2s16_to(b, bi_temp(b->shader), src0, src1, src2, src3, cmpf))->dest[0];
}


static inline
bi_instr * bi_csel_v2u16_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bi_index src2, bi_index src3, enum bi_cmpf cmpf)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 4);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_CSEL_V2U16;
    I->nr_dests = 1;
    I->nr_srcs = 4;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;
    I->src[3] = src3;

    I->cmpf = cmpf;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_csel_v2u16(bi_builder *b, bi_index src0, bi_index src1, bi_index src2, bi_index src3, enum bi_cmpf cmpf)
{
    return (bi_csel_v2u16_to(b, bi_temp(b->shader), src0, src1, src2, src3, cmpf))->dest[0];
}


static inline
bi_instr * bi_csel_to(bi_builder *b, nir_alu_type type, unsigned bitsize, bi_index dest0, bi_index src0, bi_index src1, bi_index src2, bi_index src3, enum bi_cmpf cmpf)
{
    if ((type == nir_type_float) && bitsize == 32 && (cmpf == BI_CMPF_EQ || cmpf == BI_CMPF_GT || cmpf == BI_CMPF_GE || cmpf == BI_CMPF_NE || cmpf == BI_CMPF_LT || cmpf == BI_CMPF_LE))
        return (bi_csel_f32_to(b, dest0, src0, src1, src2, src3, cmpf));
    else if ((type == nir_type_uint || type == nir_type_int) && bitsize == 32 && (cmpf == BI_CMPF_EQ || cmpf == BI_CMPF_NE))
        return (bi_csel_i32_to(b, dest0, src0, src1, src2, src3, cmpf));
    else if ((type == nir_type_int) && bitsize == 32 && (cmpf == BI_CMPF_GT || cmpf == BI_CMPF_GE || cmpf == BI_CMPF_LT || cmpf == BI_CMPF_LE))
        return (bi_csel_s32_to(b, dest0, src0, src1, src2, src3, cmpf));
    else if ((type == nir_type_uint) && bitsize == 32 && (cmpf == BI_CMPF_GT || cmpf == BI_CMPF_GE || cmpf == BI_CMPF_LT || cmpf == BI_CMPF_LE))
        return (bi_csel_u32_to(b, dest0, src0, src1, src2, src3, cmpf));
    else if ((type == nir_type_float) && bitsize == 16 && (cmpf == BI_CMPF_EQ || cmpf == BI_CMPF_GT || cmpf == BI_CMPF_GE || cmpf == BI_CMPF_NE || cmpf == BI_CMPF_LT || cmpf == BI_CMPF_LE))
        return (bi_csel_v2f16_to(b, dest0, src0, src1, src2, src3, cmpf));
    else if ((type == nir_type_uint || type == nir_type_int) && bitsize == 16 && (cmpf == BI_CMPF_EQ || cmpf == BI_CMPF_NE))
        return (bi_csel_v2i16_to(b, dest0, src0, src1, src2, src3, cmpf));
    else if ((type == nir_type_int) && bitsize == 16 && (cmpf == BI_CMPF_GT || cmpf == BI_CMPF_GE || cmpf == BI_CMPF_LT || cmpf == BI_CMPF_LE))
        return (bi_csel_v2s16_to(b, dest0, src0, src1, src2, src3, cmpf));
    else if ((type == nir_type_uint) && bitsize == 16 && (cmpf == BI_CMPF_GT || cmpf == BI_CMPF_GE || cmpf == BI_CMPF_LT || cmpf == BI_CMPF_LE))
        return (bi_csel_v2u16_to(b, dest0, src0, src1, src2, src3, cmpf));
    else
        UNREACHABLE("Invalid parameters for CSEL");
}

static inline
bi_index bi_csel(bi_builder *b, nir_alu_type type, unsigned bitsize, bi_index src0, bi_index src1, bi_index src2, bi_index src3, enum bi_cmpf cmpf)
{
    if ((type == nir_type_float) && bitsize == 32 && (cmpf == BI_CMPF_EQ || cmpf == BI_CMPF_GT || cmpf == BI_CMPF_GE || cmpf == BI_CMPF_NE || cmpf == BI_CMPF_LT || cmpf == BI_CMPF_LE))
        return (bi_csel_f32_to(b, bi_temp(b->shader), src0, src1, src2, src3, cmpf))->dest[0];
    else if ((type == nir_type_uint || type == nir_type_int) && bitsize == 32 && (cmpf == BI_CMPF_EQ || cmpf == BI_CMPF_NE))
        return (bi_csel_i32_to(b, bi_temp(b->shader), src0, src1, src2, src3, cmpf))->dest[0];
    else if ((type == nir_type_int) && bitsize == 32 && (cmpf == BI_CMPF_GT || cmpf == BI_CMPF_GE || cmpf == BI_CMPF_LT || cmpf == BI_CMPF_LE))
        return (bi_csel_s32_to(b, bi_temp(b->shader), src0, src1, src2, src3, cmpf))->dest[0];
    else if ((type == nir_type_uint) && bitsize == 32 && (cmpf == BI_CMPF_GT || cmpf == BI_CMPF_GE || cmpf == BI_CMPF_LT || cmpf == BI_CMPF_LE))
        return (bi_csel_u32_to(b, bi_temp(b->shader), src0, src1, src2, src3, cmpf))->dest[0];
    else if ((type == nir_type_float) && bitsize == 16 && (cmpf == BI_CMPF_EQ || cmpf == BI_CMPF_GT || cmpf == BI_CMPF_GE || cmpf == BI_CMPF_NE || cmpf == BI_CMPF_LT || cmpf == BI_CMPF_LE))
        return (bi_csel_v2f16_to(b, bi_temp(b->shader), src0, src1, src2, src3, cmpf))->dest[0];
    else if ((type == nir_type_uint || type == nir_type_int) && bitsize == 16 && (cmpf == BI_CMPF_EQ || cmpf == BI_CMPF_NE))
        return (bi_csel_v2i16_to(b, bi_temp(b->shader), src0, src1, src2, src3, cmpf))->dest[0];
    else if ((type == nir_type_int) && bitsize == 16 && (cmpf == BI_CMPF_GT || cmpf == BI_CMPF_GE || cmpf == BI_CMPF_LT || cmpf == BI_CMPF_LE))
        return (bi_csel_v2s16_to(b, bi_temp(b->shader), src0, src1, src2, src3, cmpf))->dest[0];
    else if ((type == nir_type_uint) && bitsize == 16 && (cmpf == BI_CMPF_GT || cmpf == BI_CMPF_GE || cmpf == BI_CMPF_LT || cmpf == BI_CMPF_LE))
        return (bi_csel_v2u16_to(b, bi_temp(b->shader), src0, src1, src2, src3, cmpf))->dest[0];
    else
        UNREACHABLE("Invalid parameters for CSEL");
}

static inline
bi_instr * bi_cubeface_to(bi_builder *b, bi_index dest0, bi_index dest1, bi_index src0, bi_index src1, bi_index src2)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (2 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_CUBEFACE;
    I->nr_dests = 2;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 2;

    I->dest[0] = dest0;
    I->dest[1] = dest1;

    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    bi_builder_insert(&b->cursor, I);
    return I;
}


static inline
bi_instr * bi_cubeface1_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bi_index src2)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_CUBEFACE1;
    I->nr_dests = 1;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_cubeface1(bi_builder *b, bi_index src0, bi_index src1, bi_index src2)
{
    return (bi_cubeface1_to(b, bi_temp(b->shader), src0, src1, src2))->dest[0];
}


static inline
bi_instr * bi_cubeface2_to(bi_builder *b, bi_index dest0, bi_index src0)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 1);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_CUBEFACE2;
    I->nr_dests = 1;
    I->nr_srcs = 1;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;

    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_cubeface2(bi_builder *b, bi_index src0)
{
    return (bi_cubeface2_to(b, bi_temp(b->shader), src0))->dest[0];
}


static inline
bi_instr * bi_cubeface2_v9_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bi_index src2)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_CUBEFACE2_V9;
    I->nr_dests = 1;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_cubeface2_v9(bi_builder *b, bi_index src0, bi_index src1, bi_index src2)
{
    return (bi_cubeface2_v9_to(b, bi_temp(b->shader), src0, src1, src2))->dest[0];
}


static inline
bi_instr * bi_cube_ssel_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bi_index src2)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_CUBE_SSEL;
    I->nr_dests = 1;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_cube_ssel(bi_builder *b, bi_index src0, bi_index src1, bi_index src2)
{
    return (bi_cube_ssel_to(b, bi_temp(b->shader), src0, src1, src2))->dest[0];
}


static inline
bi_instr * bi_cube_tsel_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bi_index src2)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_CUBE_TSEL;
    I->nr_dests = 1;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_cube_tsel(bi_builder *b, bi_index src0, bi_index src1, bi_index src2)
{
    return (bi_cube_tsel_to(b, bi_temp(b->shader), src0, src1, src2))->dest[0];
}


static inline
bi_instr * bi_discard_b32(bi_builder *b, bi_index src0)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (0 + 1);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_DISCARD_B32;
    I->nr_dests = 0;
    I->nr_srcs = 1;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 0;


    I->src[0] = src0;

    bi_builder_insert(&b->cursor, I);
    return I;
}


static inline
bi_instr * bi_discard_f32(bi_builder *b, bi_index src0, bi_index src1, enum bi_cmpf cmpf)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (0 + 2);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_DISCARD_F32;
    I->nr_dests = 0;
    I->nr_srcs = 2;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 0;


    I->src[0] = src0;
    I->src[1] = src1;

    I->cmpf = cmpf;
    bi_builder_insert(&b->cursor, I);
    return I;
}


static inline
bi_instr * bi_dtsel_imm_to(bi_builder *b, bi_index dest0, bi_index src0, enum bi_table table)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 1);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_DTSEL_IMM;
    I->nr_dests = 1;
    I->nr_srcs = 1;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;

    I->table = table;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_dtsel_imm(bi_builder *b, bi_index src0, enum bi_table table)
{
    return (bi_dtsel_imm_to(b, bi_temp(b->shader), src0, table))->dest[0];
}


static inline
bi_instr * bi_f16_to_f32_to(bi_builder *b, bi_index dest0, bi_index src0)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 1);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_F16_TO_F32;
    I->nr_dests = 1;
    I->nr_srcs = 1;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;

    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_f16_to_f32(bi_builder *b, bi_index src0)
{
    return (bi_f16_to_f32_to(b, bi_temp(b->shader), src0))->dest[0];
}


static inline
bi_instr * bi_f16_to_s32_to(bi_builder *b, bi_index dest0, bi_index src0)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 1);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_F16_TO_S32;
    I->nr_dests = 1;
    I->nr_srcs = 1;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;

    I->round = BI_ROUND_RTZ;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_f16_to_s32(bi_builder *b, bi_index src0)
{
    return (bi_f16_to_s32_to(b, bi_temp(b->shader), src0))->dest[0];
}


static inline
bi_instr * bi_f16_to_u32_to(bi_builder *b, bi_index dest0, bi_index src0)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 1);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_F16_TO_U32;
    I->nr_dests = 1;
    I->nr_srcs = 1;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;

    I->round = BI_ROUND_RTZ;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_f16_to_u32(bi_builder *b, bi_index src0)
{
    return (bi_f16_to_u32_to(b, bi_temp(b->shader), src0))->dest[0];
}


static inline
bi_instr * bi_f32_to_s32_to(bi_builder *b, bi_index dest0, bi_index src0)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 1);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_F32_TO_S32;
    I->nr_dests = 1;
    I->nr_srcs = 1;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;

    I->round = BI_ROUND_RTZ;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_f32_to_s32(bi_builder *b, bi_index src0)
{
    return (bi_f32_to_s32_to(b, bi_temp(b->shader), src0))->dest[0];
}


static inline
bi_instr * bi_f32_to_u32_to(bi_builder *b, bi_index dest0, bi_index src0)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 1);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_F32_TO_U32;
    I->nr_dests = 1;
    I->nr_srcs = 1;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;

    I->round = BI_ROUND_RTZ;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_f32_to_u32(bi_builder *b, bi_index src0)
{
    return (bi_f32_to_u32_to(b, bi_temp(b->shader), src0))->dest[0];
}


static inline
bi_instr * bi_fabsneg_f32_to(bi_builder *b, bi_index dest0, bi_index src0)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 1);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_FABSNEG_F32;
    I->nr_dests = 1;
    I->nr_srcs = 1;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;

    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_fabsneg_f32(bi_builder *b, bi_index src0)
{
    return (bi_fabsneg_f32_to(b, bi_temp(b->shader), src0))->dest[0];
}


static inline
bi_instr * bi_fabsneg_v2f16_to(bi_builder *b, bi_index dest0, bi_index src0)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 1);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_FABSNEG_V2F16;
    I->nr_dests = 1;
    I->nr_srcs = 1;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;

    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_fabsneg_v2f16(bi_builder *b, bi_index src0)
{
    return (bi_fabsneg_v2f16_to(b, bi_temp(b->shader), src0))->dest[0];
}


static inline
bi_instr * bi_fabsneg_to(bi_builder *b, unsigned bitsize, bi_index dest0, bi_index src0)
{
    if (bitsize == 32)
        return (bi_fabsneg_f32_to(b, dest0, src0));
    else if (bitsize == 16)
        return (bi_fabsneg_v2f16_to(b, dest0, src0));
    else
        UNREACHABLE("Invalid parameters for FABSNEG");
}

static inline
bi_index bi_fabsneg(bi_builder *b, unsigned bitsize, bi_index src0)
{
    if (bitsize == 32)
        return (bi_fabsneg_f32_to(b, bi_temp(b->shader), src0))->dest[0];
    else if (bitsize == 16)
        return (bi_fabsneg_v2f16_to(b, bi_temp(b->shader), src0))->dest[0];
    else
        UNREACHABLE("Invalid parameters for FABSNEG");
}

static inline
bi_instr * bi_fadd_f32_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 2);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_FADD_F32;
    I->nr_dests = 1;
    I->nr_srcs = 2;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;

    I->round = bi_round_mode(b->shader, 32);
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_fadd_f32(bi_builder *b, bi_index src0, bi_index src1)
{
    return (bi_fadd_f32_to(b, bi_temp(b->shader), src0, src1))->dest[0];
}


static inline
bi_instr * bi_fadd_v2f16_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 2);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_FADD_V2F16;
    I->nr_dests = 1;
    I->nr_srcs = 2;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;

    I->round = bi_round_mode(b->shader, 16);
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_fadd_v2f16(bi_builder *b, bi_index src0, bi_index src1)
{
    return (bi_fadd_v2f16_to(b, bi_temp(b->shader), src0, src1))->dest[0];
}


static inline
bi_instr * bi_fadd_to(bi_builder *b, unsigned bitsize, bi_index dest0, bi_index src0, bi_index src1)
{
    if (bitsize == 32)
        return (bi_fadd_f32_to(b, dest0, src0, src1));
    else if (bitsize == 16)
        return (bi_fadd_v2f16_to(b, dest0, src0, src1));
    else
        UNREACHABLE("Invalid parameters for FADD");
}

static inline
bi_index bi_fadd(bi_builder *b, unsigned bitsize, bi_index src0, bi_index src1)
{
    if (bitsize == 32)
        return (bi_fadd_f32_to(b, bi_temp(b->shader), src0, src1))->dest[0];
    else if (bitsize == 16)
        return (bi_fadd_v2f16_to(b, bi_temp(b->shader), src0, src1))->dest[0];
    else
        UNREACHABLE("Invalid parameters for FADD");
}

static inline
bi_instr * bi_fadd_imm_f32_to(bi_builder *b, bi_index dest0, bi_index src0, uint32_t index)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 1);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_FADD_IMM_F32;
    I->nr_dests = 1;
    I->nr_srcs = 1;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;

    I->index = index;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_fadd_imm_f32(bi_builder *b, bi_index src0, uint32_t index)
{
    return (bi_fadd_imm_f32_to(b, bi_temp(b->shader), src0, index))->dest[0];
}


static inline
bi_instr * bi_fadd_imm_v2f16_to(bi_builder *b, bi_index dest0, bi_index src0, uint32_t index)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 1);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_FADD_IMM_V2F16;
    I->nr_dests = 1;
    I->nr_srcs = 1;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;

    I->index = index;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_fadd_imm_v2f16(bi_builder *b, bi_index src0, uint32_t index)
{
    return (bi_fadd_imm_v2f16_to(b, bi_temp(b->shader), src0, index))->dest[0];
}


static inline
bi_instr * bi_fadd_imm_to(bi_builder *b, unsigned bitsize, bi_index dest0, bi_index src0, uint32_t index)
{
    if (bitsize == 32)
        return (bi_fadd_imm_f32_to(b, dest0, src0, index));
    else if (bitsize == 16)
        return (bi_fadd_imm_v2f16_to(b, dest0, src0, index));
    else
        UNREACHABLE("Invalid parameters for FADD_IMM");
}

static inline
bi_index bi_fadd_imm(bi_builder *b, unsigned bitsize, bi_index src0, uint32_t index)
{
    if (bitsize == 32)
        return (bi_fadd_imm_f32_to(b, bi_temp(b->shader), src0, index))->dest[0];
    else if (bitsize == 16)
        return (bi_fadd_imm_v2f16_to(b, bi_temp(b->shader), src0, index))->dest[0];
    else
        UNREACHABLE("Invalid parameters for FADD_IMM");
}

static inline
bi_instr * bi_fadd_lscale_f32_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 2);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_FADD_LSCALE_F32;
    I->nr_dests = 1;
    I->nr_srcs = 2;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;

    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_fadd_lscale_f32(bi_builder *b, bi_index src0, bi_index src1)
{
    return (bi_fadd_lscale_f32_to(b, bi_temp(b->shader), src0, src1))->dest[0];
}


static inline
bi_instr * bi_fadd_rscale_f32_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bi_index src2, enum bi_special special)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_FADD_RSCALE_F32;
    I->nr_dests = 1;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    I->special = special;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_fadd_rscale_f32(bi_builder *b, bi_index src0, bi_index src1, bi_index src2, enum bi_special special)
{
    return (bi_fadd_rscale_f32_to(b, bi_temp(b->shader), src0, src1, src2, special))->dest[0];
}


static inline
bi_instr * bi_fclamp_f32_to(bi_builder *b, bi_index dest0, bi_index src0)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 1);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_FCLAMP_F32;
    I->nr_dests = 1;
    I->nr_srcs = 1;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;

    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_fclamp_f32(bi_builder *b, bi_index src0)
{
    return (bi_fclamp_f32_to(b, bi_temp(b->shader), src0))->dest[0];
}


static inline
bi_instr * bi_fclamp_v2f16_to(bi_builder *b, bi_index dest0, bi_index src0)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 1);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_FCLAMP_V2F16;
    I->nr_dests = 1;
    I->nr_srcs = 1;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;

    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_fclamp_v2f16(bi_builder *b, bi_index src0)
{
    return (bi_fclamp_v2f16_to(b, bi_temp(b->shader), src0))->dest[0];
}


static inline
bi_instr * bi_fclamp_to(bi_builder *b, unsigned bitsize, bi_index dest0, bi_index src0)
{
    if (bitsize == 32)
        return (bi_fclamp_f32_to(b, dest0, src0));
    else if (bitsize == 16)
        return (bi_fclamp_v2f16_to(b, dest0, src0));
    else
        UNREACHABLE("Invalid parameters for FCLAMP");
}

static inline
bi_index bi_fclamp(bi_builder *b, unsigned bitsize, bi_index src0)
{
    if (bitsize == 32)
        return (bi_fclamp_f32_to(b, bi_temp(b->shader), src0))->dest[0];
    else if (bitsize == 16)
        return (bi_fclamp_v2f16_to(b, bi_temp(b->shader), src0))->dest[0];
    else
        UNREACHABLE("Invalid parameters for FCLAMP");
}

static inline
bi_instr * bi_fcmp_f32_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, enum bi_cmpf cmpf, enum bi_result_type result_type)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 2);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_FCMP_F32;
    I->nr_dests = 1;
    I->nr_srcs = 2;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;

    I->cmpf = cmpf;
    I->result_type = result_type;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_fcmp_f32(bi_builder *b, bi_index src0, bi_index src1, enum bi_cmpf cmpf, enum bi_result_type result_type)
{
    return (bi_fcmp_f32_to(b, bi_temp(b->shader), src0, src1, cmpf, result_type))->dest[0];
}


static inline
bi_instr * bi_fcmp_v2f16_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, enum bi_cmpf cmpf, enum bi_result_type result_type)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 2);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_FCMP_V2F16;
    I->nr_dests = 1;
    I->nr_srcs = 2;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;

    I->cmpf = cmpf;
    I->result_type = result_type;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_fcmp_v2f16(bi_builder *b, bi_index src0, bi_index src1, enum bi_cmpf cmpf, enum bi_result_type result_type)
{
    return (bi_fcmp_v2f16_to(b, bi_temp(b->shader), src0, src1, cmpf, result_type))->dest[0];
}


static inline
bi_instr * bi_fcmp_to(bi_builder *b, unsigned bitsize, bi_index dest0, bi_index src0, bi_index src1, enum bi_cmpf cmpf, enum bi_result_type result_type)
{
    if (bitsize == 32 && (cmpf == BI_CMPF_EQ || cmpf == BI_CMPF_GT || cmpf == BI_CMPF_GE || cmpf == BI_CMPF_NE || cmpf == BI_CMPF_LT || cmpf == BI_CMPF_LE || cmpf == BI_CMPF_GTLT || cmpf == BI_CMPF_TOTAL || cmpf == BI_CMPF_EQ || cmpf == BI_CMPF_GT || cmpf == BI_CMPF_GE || cmpf == BI_CMPF_NE || cmpf == BI_CMPF_LT || cmpf == BI_CMPF_LE || cmpf == BI_CMPF_GTLT || cmpf == BI_CMPF_TOTAL))
        return (bi_fcmp_f32_to(b, dest0, src0, src1, cmpf, result_type));
    else if (bitsize == 16 && (cmpf == BI_CMPF_EQ || cmpf == BI_CMPF_GT || cmpf == BI_CMPF_GE || cmpf == BI_CMPF_NE || cmpf == BI_CMPF_LT || cmpf == BI_CMPF_LE || cmpf == BI_CMPF_GTLT || cmpf == BI_CMPF_TOTAL || cmpf == BI_CMPF_EQ || cmpf == BI_CMPF_GT || cmpf == BI_CMPF_GE || cmpf == BI_CMPF_NE || cmpf == BI_CMPF_LT || cmpf == BI_CMPF_LE || cmpf == BI_CMPF_GTLT || cmpf == BI_CMPF_TOTAL))
        return (bi_fcmp_v2f16_to(b, dest0, src0, src1, cmpf, result_type));
    else
        UNREACHABLE("Invalid parameters for FCMP");
}

static inline
bi_index bi_fcmp(bi_builder *b, unsigned bitsize, bi_index src0, bi_index src1, enum bi_cmpf cmpf, enum bi_result_type result_type)
{
    if (bitsize == 32 && (cmpf == BI_CMPF_EQ || cmpf == BI_CMPF_GT || cmpf == BI_CMPF_GE || cmpf == BI_CMPF_NE || cmpf == BI_CMPF_LT || cmpf == BI_CMPF_LE || cmpf == BI_CMPF_GTLT || cmpf == BI_CMPF_TOTAL || cmpf == BI_CMPF_EQ || cmpf == BI_CMPF_GT || cmpf == BI_CMPF_GE || cmpf == BI_CMPF_NE || cmpf == BI_CMPF_LT || cmpf == BI_CMPF_LE || cmpf == BI_CMPF_GTLT || cmpf == BI_CMPF_TOTAL))
        return (bi_fcmp_f32_to(b, bi_temp(b->shader), src0, src1, cmpf, result_type))->dest[0];
    else if (bitsize == 16 && (cmpf == BI_CMPF_EQ || cmpf == BI_CMPF_GT || cmpf == BI_CMPF_GE || cmpf == BI_CMPF_NE || cmpf == BI_CMPF_LT || cmpf == BI_CMPF_LE || cmpf == BI_CMPF_GTLT || cmpf == BI_CMPF_TOTAL || cmpf == BI_CMPF_EQ || cmpf == BI_CMPF_GT || cmpf == BI_CMPF_GE || cmpf == BI_CMPF_NE || cmpf == BI_CMPF_LT || cmpf == BI_CMPF_LE || cmpf == BI_CMPF_GTLT || cmpf == BI_CMPF_TOTAL))
        return (bi_fcmp_v2f16_to(b, bi_temp(b->shader), src0, src1, cmpf, result_type))->dest[0];
    else
        UNREACHABLE("Invalid parameters for FCMP");
}

static inline
bi_instr * bi_fcmp_and_f32_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bi_index src2, enum bi_cmpf cmpf, enum bi_result_type result_type)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_FCMP_AND_F32;
    I->nr_dests = 1;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    I->cmpf = cmpf;
    I->result_type = result_type;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_fcmp_and_f32(bi_builder *b, bi_index src0, bi_index src1, bi_index src2, enum bi_cmpf cmpf, enum bi_result_type result_type)
{
    return (bi_fcmp_and_f32_to(b, bi_temp(b->shader), src0, src1, src2, cmpf, result_type))->dest[0];
}


static inline
bi_instr * bi_fcmp_and_v2f16_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bi_index src2, enum bi_cmpf cmpf, enum bi_result_type result_type)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_FCMP_AND_V2F16;
    I->nr_dests = 1;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    I->cmpf = cmpf;
    I->result_type = result_type;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_fcmp_and_v2f16(bi_builder *b, bi_index src0, bi_index src1, bi_index src2, enum bi_cmpf cmpf, enum bi_result_type result_type)
{
    return (bi_fcmp_and_v2f16_to(b, bi_temp(b->shader), src0, src1, src2, cmpf, result_type))->dest[0];
}


static inline
bi_instr * bi_fcmp_and_to(bi_builder *b, unsigned bitsize, bi_index dest0, bi_index src0, bi_index src1, bi_index src2, enum bi_cmpf cmpf, enum bi_result_type result_type)
{
    if (bitsize == 32 && (cmpf == BI_CMPF_EQ || cmpf == BI_CMPF_GT || cmpf == BI_CMPF_GE || cmpf == BI_CMPF_NE || cmpf == BI_CMPF_LT || cmpf == BI_CMPF_LE || cmpf == BI_CMPF_GTLT || cmpf == BI_CMPF_TOTAL || cmpf == BI_CMPF_EQ || cmpf == BI_CMPF_GT || cmpf == BI_CMPF_GE || cmpf == BI_CMPF_NE || cmpf == BI_CMPF_LT || cmpf == BI_CMPF_LE || cmpf == BI_CMPF_GTLT || cmpf == BI_CMPF_TOTAL))
        return (bi_fcmp_and_f32_to(b, dest0, src0, src1, src2, cmpf, result_type));
    else if (bitsize == 16 && (cmpf == BI_CMPF_EQ || cmpf == BI_CMPF_GT || cmpf == BI_CMPF_GE || cmpf == BI_CMPF_NE || cmpf == BI_CMPF_LT || cmpf == BI_CMPF_LE || cmpf == BI_CMPF_GTLT || cmpf == BI_CMPF_TOTAL || cmpf == BI_CMPF_EQ || cmpf == BI_CMPF_GT || cmpf == BI_CMPF_GE || cmpf == BI_CMPF_NE || cmpf == BI_CMPF_LT || cmpf == BI_CMPF_LE || cmpf == BI_CMPF_GTLT || cmpf == BI_CMPF_TOTAL))
        return (bi_fcmp_and_v2f16_to(b, dest0, src0, src1, src2, cmpf, result_type));
    else
        UNREACHABLE("Invalid parameters for FCMP_AND");
}

static inline
bi_index bi_fcmp_and(bi_builder *b, unsigned bitsize, bi_index src0, bi_index src1, bi_index src2, enum bi_cmpf cmpf, enum bi_result_type result_type)
{
    if (bitsize == 32 && (cmpf == BI_CMPF_EQ || cmpf == BI_CMPF_GT || cmpf == BI_CMPF_GE || cmpf == BI_CMPF_NE || cmpf == BI_CMPF_LT || cmpf == BI_CMPF_LE || cmpf == BI_CMPF_GTLT || cmpf == BI_CMPF_TOTAL || cmpf == BI_CMPF_EQ || cmpf == BI_CMPF_GT || cmpf == BI_CMPF_GE || cmpf == BI_CMPF_NE || cmpf == BI_CMPF_LT || cmpf == BI_CMPF_LE || cmpf == BI_CMPF_GTLT || cmpf == BI_CMPF_TOTAL))
        return (bi_fcmp_and_f32_to(b, bi_temp(b->shader), src0, src1, src2, cmpf, result_type))->dest[0];
    else if (bitsize == 16 && (cmpf == BI_CMPF_EQ || cmpf == BI_CMPF_GT || cmpf == BI_CMPF_GE || cmpf == BI_CMPF_NE || cmpf == BI_CMPF_LT || cmpf == BI_CMPF_LE || cmpf == BI_CMPF_GTLT || cmpf == BI_CMPF_TOTAL || cmpf == BI_CMPF_EQ || cmpf == BI_CMPF_GT || cmpf == BI_CMPF_GE || cmpf == BI_CMPF_NE || cmpf == BI_CMPF_LT || cmpf == BI_CMPF_LE || cmpf == BI_CMPF_GTLT || cmpf == BI_CMPF_TOTAL))
        return (bi_fcmp_and_v2f16_to(b, bi_temp(b->shader), src0, src1, src2, cmpf, result_type))->dest[0];
    else
        UNREACHABLE("Invalid parameters for FCMP_AND");
}

static inline
bi_instr * bi_fcmp_or_f32_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bi_index src2, enum bi_cmpf cmpf, enum bi_result_type result_type)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_FCMP_OR_F32;
    I->nr_dests = 1;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    I->cmpf = cmpf;
    I->result_type = result_type;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_fcmp_or_f32(bi_builder *b, bi_index src0, bi_index src1, bi_index src2, enum bi_cmpf cmpf, enum bi_result_type result_type)
{
    return (bi_fcmp_or_f32_to(b, bi_temp(b->shader), src0, src1, src2, cmpf, result_type))->dest[0];
}


static inline
bi_instr * bi_fcmp_or_v2f16_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bi_index src2, enum bi_cmpf cmpf, enum bi_result_type result_type)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_FCMP_OR_V2F16;
    I->nr_dests = 1;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    I->cmpf = cmpf;
    I->result_type = result_type;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_fcmp_or_v2f16(bi_builder *b, bi_index src0, bi_index src1, bi_index src2, enum bi_cmpf cmpf, enum bi_result_type result_type)
{
    return (bi_fcmp_or_v2f16_to(b, bi_temp(b->shader), src0, src1, src2, cmpf, result_type))->dest[0];
}


static inline
bi_instr * bi_fcmp_or_to(bi_builder *b, unsigned bitsize, bi_index dest0, bi_index src0, bi_index src1, bi_index src2, enum bi_cmpf cmpf, enum bi_result_type result_type)
{
    if (bitsize == 32 && (cmpf == BI_CMPF_EQ || cmpf == BI_CMPF_GT || cmpf == BI_CMPF_GE || cmpf == BI_CMPF_NE || cmpf == BI_CMPF_LT || cmpf == BI_CMPF_LE || cmpf == BI_CMPF_GTLT || cmpf == BI_CMPF_TOTAL || cmpf == BI_CMPF_EQ || cmpf == BI_CMPF_GT || cmpf == BI_CMPF_GE || cmpf == BI_CMPF_NE || cmpf == BI_CMPF_LT || cmpf == BI_CMPF_LE || cmpf == BI_CMPF_GTLT || cmpf == BI_CMPF_TOTAL))
        return (bi_fcmp_or_f32_to(b, dest0, src0, src1, src2, cmpf, result_type));
    else if (bitsize == 16 && (cmpf == BI_CMPF_EQ || cmpf == BI_CMPF_GT || cmpf == BI_CMPF_GE || cmpf == BI_CMPF_NE || cmpf == BI_CMPF_LT || cmpf == BI_CMPF_LE || cmpf == BI_CMPF_GTLT || cmpf == BI_CMPF_TOTAL || cmpf == BI_CMPF_EQ || cmpf == BI_CMPF_GT || cmpf == BI_CMPF_GE || cmpf == BI_CMPF_NE || cmpf == BI_CMPF_LT || cmpf == BI_CMPF_LE || cmpf == BI_CMPF_GTLT || cmpf == BI_CMPF_TOTAL))
        return (bi_fcmp_or_v2f16_to(b, dest0, src0, src1, src2, cmpf, result_type));
    else
        UNREACHABLE("Invalid parameters for FCMP_OR");
}

static inline
bi_index bi_fcmp_or(bi_builder *b, unsigned bitsize, bi_index src0, bi_index src1, bi_index src2, enum bi_cmpf cmpf, enum bi_result_type result_type)
{
    if (bitsize == 32 && (cmpf == BI_CMPF_EQ || cmpf == BI_CMPF_GT || cmpf == BI_CMPF_GE || cmpf == BI_CMPF_NE || cmpf == BI_CMPF_LT || cmpf == BI_CMPF_LE || cmpf == BI_CMPF_GTLT || cmpf == BI_CMPF_TOTAL || cmpf == BI_CMPF_EQ || cmpf == BI_CMPF_GT || cmpf == BI_CMPF_GE || cmpf == BI_CMPF_NE || cmpf == BI_CMPF_LT || cmpf == BI_CMPF_LE || cmpf == BI_CMPF_GTLT || cmpf == BI_CMPF_TOTAL))
        return (bi_fcmp_or_f32_to(b, bi_temp(b->shader), src0, src1, src2, cmpf, result_type))->dest[0];
    else if (bitsize == 16 && (cmpf == BI_CMPF_EQ || cmpf == BI_CMPF_GT || cmpf == BI_CMPF_GE || cmpf == BI_CMPF_NE || cmpf == BI_CMPF_LT || cmpf == BI_CMPF_LE || cmpf == BI_CMPF_GTLT || cmpf == BI_CMPF_TOTAL || cmpf == BI_CMPF_EQ || cmpf == BI_CMPF_GT || cmpf == BI_CMPF_GE || cmpf == BI_CMPF_NE || cmpf == BI_CMPF_LT || cmpf == BI_CMPF_LE || cmpf == BI_CMPF_GTLT || cmpf == BI_CMPF_TOTAL))
        return (bi_fcmp_or_v2f16_to(b, bi_temp(b->shader), src0, src1, src2, cmpf, result_type))->dest[0];
    else
        UNREACHABLE("Invalid parameters for FCMP_OR");
}

static inline
bi_instr * bi_fcos_table_u6_to(bi_builder *b, bi_index dest0, bi_index src0, bool offset)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 1);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_FCOS_TABLE_U6;
    I->nr_dests = 1;
    I->nr_srcs = 1;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;

    I->offset = offset;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_fcos_table_u6(bi_builder *b, bi_index src0, bool offset)
{
    return (bi_fcos_table_u6_to(b, bi_temp(b->shader), src0, offset))->dest[0];
}


static inline
bi_instr * bi_fexp_f32_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 2);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_FEXP_F32;
    I->nr_dests = 1;
    I->nr_srcs = 2;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;

    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_fexp_f32(bi_builder *b, bi_index src0, bi_index src1)
{
    return (bi_fexp_f32_to(b, bi_temp(b->shader), src0, src1))->dest[0];
}


static inline
bi_instr * bi_fexp_table_u4_to(bi_builder *b, bi_index dest0, bi_index src0, enum bi_adj adj)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 1);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_FEXP_TABLE_U4;
    I->nr_dests = 1;
    I->nr_srcs = 1;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;

    I->adj = adj;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_fexp_table_u4(bi_builder *b, bi_index src0, enum bi_adj adj)
{
    return (bi_fexp_table_u4_to(b, bi_temp(b->shader), src0, adj))->dest[0];
}


static inline
bi_instr * bi_flogd_f32_to(bi_builder *b, bi_index dest0, bi_index src0)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 1);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_FLOGD_F32;
    I->nr_dests = 1;
    I->nr_srcs = 1;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;

    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_flogd_f32(bi_builder *b, bi_index src0)
{
    return (bi_flogd_f32_to(b, bi_temp(b->shader), src0))->dest[0];
}


static inline
bi_instr * bi_flog_table_f32_to(bi_builder *b, bi_index dest0, bi_index src0, enum bi_mode mode, enum bi_precision precision)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 1);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_FLOG_TABLE_F32;
    I->nr_dests = 1;
    I->nr_srcs = 1;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;

    I->mode = mode;
    I->precision = precision;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_flog_table_f32(bi_builder *b, bi_index src0, enum bi_mode mode, enum bi_precision precision)
{
    return (bi_flog_table_f32_to(b, bi_temp(b->shader), src0, mode, precision))->dest[0];
}


static inline
bi_instr * bi_flshift_double_i32_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bi_index src2, bool bytes2)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_FLSHIFT_DOUBLE_I32;
    I->nr_dests = 1;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    I->bytes2 = bytes2;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_flshift_double_i32(bi_builder *b, bi_index src0, bi_index src1, bi_index src2, bool bytes2)
{
    return (bi_flshift_double_i32_to(b, bi_temp(b->shader), src0, src1, src2, bytes2))->dest[0];
}


static inline
bi_instr * bi_flush_f32_to(bi_builder *b, bi_index dest0, bi_index src0)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 1);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_FLUSH_F32;
    I->nr_dests = 1;
    I->nr_srcs = 1;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;

    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_flush_f32(bi_builder *b, bi_index src0)
{
    return (bi_flush_f32_to(b, bi_temp(b->shader), src0))->dest[0];
}


static inline
bi_instr * bi_flush_v2f16_to(bi_builder *b, bi_index dest0, bi_index src0)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 1);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_FLUSH_V2F16;
    I->nr_dests = 1;
    I->nr_srcs = 1;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;

    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_flush_v2f16(bi_builder *b, bi_index src0)
{
    return (bi_flush_v2f16_to(b, bi_temp(b->shader), src0))->dest[0];
}


static inline
bi_instr * bi_flush_to(bi_builder *b, unsigned bitsize, bi_index dest0, bi_index src0)
{
    if (bitsize == 32)
        return (bi_flush_f32_to(b, dest0, src0));
    else if (bitsize == 16)
        return (bi_flush_v2f16_to(b, dest0, src0));
    else
        UNREACHABLE("Invalid parameters for FLUSH");
}

static inline
bi_index bi_flush(bi_builder *b, unsigned bitsize, bi_index src0)
{
    if (bitsize == 32)
        return (bi_flush_f32_to(b, bi_temp(b->shader), src0))->dest[0];
    else if (bitsize == 16)
        return (bi_flush_v2f16_to(b, bi_temp(b->shader), src0))->dest[0];
    else
        UNREACHABLE("Invalid parameters for FLUSH");
}

static inline
bi_instr * bi_fma_f32_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bi_index src2)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_FMA_F32;
    I->nr_dests = 1;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    I->round = bi_round_mode(b->shader, 32);
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_fma_f32(bi_builder *b, bi_index src0, bi_index src1, bi_index src2)
{
    return (bi_fma_f32_to(b, bi_temp(b->shader), src0, src1, src2))->dest[0];
}


static inline
bi_instr * bi_fma_v2f16_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bi_index src2)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_FMA_V2F16;
    I->nr_dests = 1;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    I->round = bi_round_mode(b->shader, 16);
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_fma_v2f16(bi_builder *b, bi_index src0, bi_index src1, bi_index src2)
{
    return (bi_fma_v2f16_to(b, bi_temp(b->shader), src0, src1, src2))->dest[0];
}


static inline
bi_instr * bi_fma_to(bi_builder *b, unsigned bitsize, bi_index dest0, bi_index src0, bi_index src1, bi_index src2)
{
    if (bitsize == 32)
        return (bi_fma_f32_to(b, dest0, src0, src1, src2));
    else if (bitsize == 16)
        return (bi_fma_v2f16_to(b, dest0, src0, src1, src2));
    else
        UNREACHABLE("Invalid parameters for FMA");
}

static inline
bi_index bi_fma(bi_builder *b, unsigned bitsize, bi_index src0, bi_index src1, bi_index src2)
{
    if (bitsize == 32)
        return (bi_fma_f32_to(b, bi_temp(b->shader), src0, src1, src2))->dest[0];
    else if (bitsize == 16)
        return (bi_fma_v2f16_to(b, bi_temp(b->shader), src0, src1, src2))->dest[0];
    else
        UNREACHABLE("Invalid parameters for FMA");
}

static inline
bi_instr * bi_fmax_f32_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 2);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_FMAX_F32;
    I->nr_dests = 1;
    I->nr_srcs = 2;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;

    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_fmax_f32(bi_builder *b, bi_index src0, bi_index src1)
{
    return (bi_fmax_f32_to(b, bi_temp(b->shader), src0, src1))->dest[0];
}


static inline
bi_instr * bi_fmax_v2f16_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 2);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_FMAX_V2F16;
    I->nr_dests = 1;
    I->nr_srcs = 2;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;

    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_fmax_v2f16(bi_builder *b, bi_index src0, bi_index src1)
{
    return (bi_fmax_v2f16_to(b, bi_temp(b->shader), src0, src1))->dest[0];
}


static inline
bi_instr * bi_fmax_to(bi_builder *b, unsigned bitsize, bi_index dest0, bi_index src0, bi_index src1)
{
    if (bitsize == 32)
        return (bi_fmax_f32_to(b, dest0, src0, src1));
    else if (bitsize == 16)
        return (bi_fmax_v2f16_to(b, dest0, src0, src1));
    else
        UNREACHABLE("Invalid parameters for FMAX");
}

static inline
bi_index bi_fmax(bi_builder *b, unsigned bitsize, bi_index src0, bi_index src1)
{
    if (bitsize == 32)
        return (bi_fmax_f32_to(b, bi_temp(b->shader), src0, src1))->dest[0];
    else if (bitsize == 16)
        return (bi_fmax_v2f16_to(b, bi_temp(b->shader), src0, src1))->dest[0];
    else
        UNREACHABLE("Invalid parameters for FMAX");
}

static inline
bi_instr * bi_fma_rscale_f32_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bi_index src2, bi_index src3, enum bi_special special)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 4);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_FMA_RSCALE_F32;
    I->nr_dests = 1;
    I->nr_srcs = 4;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;
    I->src[3] = src3;

    I->special = special;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_fma_rscale_f32(bi_builder *b, bi_index src0, bi_index src1, bi_index src2, bi_index src3, enum bi_special special)
{
    return (bi_fma_rscale_f32_to(b, bi_temp(b->shader), src0, src1, src2, src3, special))->dest[0];
}


static inline
bi_instr * bi_fma_rscale_v2f16_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bi_index src2, bi_index src3, enum bi_special special)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 4);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_FMA_RSCALE_V2F16;
    I->nr_dests = 1;
    I->nr_srcs = 4;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;
    I->src[3] = src3;

    I->special = special;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_fma_rscale_v2f16(bi_builder *b, bi_index src0, bi_index src1, bi_index src2, bi_index src3, enum bi_special special)
{
    return (bi_fma_rscale_v2f16_to(b, bi_temp(b->shader), src0, src1, src2, src3, special))->dest[0];
}


static inline
bi_instr * bi_fma_rscale_to(bi_builder *b, unsigned bitsize, bi_index dest0, bi_index src0, bi_index src1, bi_index src2, bi_index src3, enum bi_special special)
{
    if (bitsize == 32)
        return (bi_fma_rscale_f32_to(b, dest0, src0, src1, src2, src3, special));
    else if (bitsize == 16)
        return (bi_fma_rscale_v2f16_to(b, dest0, src0, src1, src2, src3, special));
    else
        UNREACHABLE("Invalid parameters for FMA_RSCALE");
}

static inline
bi_index bi_fma_rscale(bi_builder *b, unsigned bitsize, bi_index src0, bi_index src1, bi_index src2, bi_index src3, enum bi_special special)
{
    if (bitsize == 32)
        return (bi_fma_rscale_f32_to(b, bi_temp(b->shader), src0, src1, src2, src3, special))->dest[0];
    else if (bitsize == 16)
        return (bi_fma_rscale_v2f16_to(b, bi_temp(b->shader), src0, src1, src2, src3, special))->dest[0];
    else
        UNREACHABLE("Invalid parameters for FMA_RSCALE");
}

static inline
bi_instr * bi_fmin_f32_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 2);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_FMIN_F32;
    I->nr_dests = 1;
    I->nr_srcs = 2;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;

    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_fmin_f32(bi_builder *b, bi_index src0, bi_index src1)
{
    return (bi_fmin_f32_to(b, bi_temp(b->shader), src0, src1))->dest[0];
}


static inline
bi_instr * bi_fmin_v2f16_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 2);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_FMIN_V2F16;
    I->nr_dests = 1;
    I->nr_srcs = 2;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;

    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_fmin_v2f16(bi_builder *b, bi_index src0, bi_index src1)
{
    return (bi_fmin_v2f16_to(b, bi_temp(b->shader), src0, src1))->dest[0];
}


static inline
bi_instr * bi_fmin_to(bi_builder *b, unsigned bitsize, bi_index dest0, bi_index src0, bi_index src1)
{
    if (bitsize == 32)
        return (bi_fmin_f32_to(b, dest0, src0, src1));
    else if (bitsize == 16)
        return (bi_fmin_v2f16_to(b, dest0, src0, src1));
    else
        UNREACHABLE("Invalid parameters for FMIN");
}

static inline
bi_index bi_fmin(bi_builder *b, unsigned bitsize, bi_index src0, bi_index src1)
{
    if (bitsize == 32)
        return (bi_fmin_f32_to(b, bi_temp(b->shader), src0, src1))->dest[0];
    else if (bitsize == 16)
        return (bi_fmin_v2f16_to(b, bi_temp(b->shader), src0, src1))->dest[0];
    else
        UNREACHABLE("Invalid parameters for FMIN");
}

static inline
bi_instr * bi_fmul_cslice_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 2);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_FMUL_CSLICE;
    I->nr_dests = 1;
    I->nr_srcs = 2;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;

    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_fmul_cslice(bi_builder *b, bi_index src0, bi_index src1)
{
    return (bi_fmul_cslice_to(b, bi_temp(b->shader), src0, src1))->dest[0];
}


static inline
bi_instr * bi_fmul_slice_f32_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 2);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_FMUL_SLICE_F32;
    I->nr_dests = 1;
    I->nr_srcs = 2;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;

    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_fmul_slice_f32(bi_builder *b, bi_index src0, bi_index src1)
{
    return (bi_fmul_slice_f32_to(b, bi_temp(b->shader), src0, src1))->dest[0];
}


static inline
bi_instr * bi_fpclass_f16_to(bi_builder *b, bi_index dest0, bi_index src0)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 1);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_FPCLASS_F16;
    I->nr_dests = 1;
    I->nr_srcs = 1;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;

    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_fpclass_f16(bi_builder *b, bi_index src0)
{
    return (bi_fpclass_f16_to(b, bi_temp(b->shader), src0))->dest[0];
}


static inline
bi_instr * bi_fpclass_f32_to(bi_builder *b, bi_index dest0, bi_index src0)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 1);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_FPCLASS_F32;
    I->nr_dests = 1;
    I->nr_srcs = 1;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;

    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_fpclass_f32(bi_builder *b, bi_index src0)
{
    return (bi_fpclass_f32_to(b, bi_temp(b->shader), src0))->dest[0];
}


static inline
bi_instr * bi_fpclass_to(bi_builder *b, unsigned bitsize, bi_index dest0, bi_index src0)
{
    if (bitsize == 16)
        return (bi_fpclass_f16_to(b, dest0, src0));
    else if (bitsize == 32)
        return (bi_fpclass_f32_to(b, dest0, src0));
    else
        UNREACHABLE("Invalid parameters for FPCLASS");
}

static inline
bi_index bi_fpclass(bi_builder *b, unsigned bitsize, bi_index src0)
{
    if (bitsize == 16)
        return (bi_fpclass_f16_to(b, bi_temp(b->shader), src0))->dest[0];
    else if (bitsize == 32)
        return (bi_fpclass_f32_to(b, bi_temp(b->shader), src0))->dest[0];
    else
        UNREACHABLE("Invalid parameters for FPCLASS");
}

static inline
bi_instr * bi_fpow_sc_apply_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 2);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_FPOW_SC_APPLY;
    I->nr_dests = 1;
    I->nr_srcs = 2;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;

    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_fpow_sc_apply(bi_builder *b, bi_index src0, bi_index src1)
{
    return (bi_fpow_sc_apply_to(b, bi_temp(b->shader), src0, src1))->dest[0];
}


static inline
bi_instr * bi_fpow_sc_det_f16_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, enum bi_func func)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 2);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_FPOW_SC_DET_F16;
    I->nr_dests = 1;
    I->nr_srcs = 2;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;

    I->func = func;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_fpow_sc_det_f16(bi_builder *b, bi_index src0, bi_index src1, enum bi_func func)
{
    return (bi_fpow_sc_det_f16_to(b, bi_temp(b->shader), src0, src1, func))->dest[0];
}


static inline
bi_instr * bi_fpow_sc_det_f32_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, enum bi_func func)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 2);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_FPOW_SC_DET_F32;
    I->nr_dests = 1;
    I->nr_srcs = 2;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;

    I->func = func;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_fpow_sc_det_f32(bi_builder *b, bi_index src0, bi_index src1, enum bi_func func)
{
    return (bi_fpow_sc_det_f32_to(b, bi_temp(b->shader), src0, src1, func))->dest[0];
}


static inline
bi_instr * bi_fpow_sc_det_to(bi_builder *b, unsigned bitsize, bi_index dest0, bi_index src0, bi_index src1, enum bi_func func)
{
    if (bitsize == 16)
        return (bi_fpow_sc_det_f16_to(b, dest0, src0, src1, func));
    else if (bitsize == 32)
        return (bi_fpow_sc_det_f32_to(b, dest0, src0, src1, func));
    else
        UNREACHABLE("Invalid parameters for FPOW_SC_DET");
}

static inline
bi_index bi_fpow_sc_det(bi_builder *b, unsigned bitsize, bi_index src0, bi_index src1, enum bi_func func)
{
    if (bitsize == 16)
        return (bi_fpow_sc_det_f16_to(b, bi_temp(b->shader), src0, src1, func))->dest[0];
    else if (bitsize == 32)
        return (bi_fpow_sc_det_f32_to(b, bi_temp(b->shader), src0, src1, func))->dest[0];
    else
        UNREACHABLE("Invalid parameters for FPOW_SC_DET");
}

static inline
bi_instr * bi_frcp_f16_to(bi_builder *b, bi_index dest0, bi_index src0)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 1);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_FRCP_F16;
    I->nr_dests = 1;
    I->nr_srcs = 1;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;

    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_frcp_f16(bi_builder *b, bi_index src0)
{
    return (bi_frcp_f16_to(b, bi_temp(b->shader), src0))->dest[0];
}


static inline
bi_instr * bi_frcp_f32_to(bi_builder *b, bi_index dest0, bi_index src0)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 1);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_FRCP_F32;
    I->nr_dests = 1;
    I->nr_srcs = 1;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;

    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_frcp_f32(bi_builder *b, bi_index src0)
{
    return (bi_frcp_f32_to(b, bi_temp(b->shader), src0))->dest[0];
}


static inline
bi_instr * bi_frcp_to(bi_builder *b, unsigned bitsize, bi_index dest0, bi_index src0)
{
    if (bitsize == 16)
        return (bi_frcp_f16_to(b, dest0, src0));
    else if (bitsize == 32)
        return (bi_frcp_f32_to(b, dest0, src0));
    else
        UNREACHABLE("Invalid parameters for FRCP");
}

static inline
bi_index bi_frcp(bi_builder *b, unsigned bitsize, bi_index src0)
{
    if (bitsize == 16)
        return (bi_frcp_f16_to(b, bi_temp(b->shader), src0))->dest[0];
    else if (bitsize == 32)
        return (bi_frcp_f32_to(b, bi_temp(b->shader), src0))->dest[0];
    else
        UNREACHABLE("Invalid parameters for FRCP");
}

static inline
bi_instr * bi_frcp_approx_f32_to(bi_builder *b, bi_index dest0, bi_index src0)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 1);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_FRCP_APPROX_F32;
    I->nr_dests = 1;
    I->nr_srcs = 1;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;

    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_frcp_approx_f32(bi_builder *b, bi_index src0)
{
    return (bi_frcp_approx_f32_to(b, bi_temp(b->shader), src0))->dest[0];
}


static inline
bi_instr * bi_frexpe_f32_to(bi_builder *b, bi_index dest0, bi_index src0, bool log, bool sqrt)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 1);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_FREXPE_F32;
    I->nr_dests = 1;
    I->nr_srcs = 1;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;

    I->sqrt = sqrt;
    I->log = log;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_frexpe_f32(bi_builder *b, bi_index src0, bool log, bool sqrt)
{
    return (bi_frexpe_f32_to(b, bi_temp(b->shader), src0, log, sqrt))->dest[0];
}


static inline
bi_instr * bi_frexpe_v2f16_to(bi_builder *b, bi_index dest0, bi_index src0, bool log, bool sqrt)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 1);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_FREXPE_V2F16;
    I->nr_dests = 1;
    I->nr_srcs = 1;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;

    I->sqrt = sqrt;
    I->log = log;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_frexpe_v2f16(bi_builder *b, bi_index src0, bool log, bool sqrt)
{
    return (bi_frexpe_v2f16_to(b, bi_temp(b->shader), src0, log, sqrt))->dest[0];
}


static inline
bi_instr * bi_frexpe_to(bi_builder *b, unsigned bitsize, bi_index dest0, bi_index src0, bool log, bool sqrt)
{
    if (bitsize == 32)
        return (bi_frexpe_f32_to(b, dest0, src0, log, sqrt));
    else if (bitsize == 16)
        return (bi_frexpe_v2f16_to(b, dest0, src0, log, sqrt));
    else
        UNREACHABLE("Invalid parameters for FREXPE");
}

static inline
bi_index bi_frexpe(bi_builder *b, unsigned bitsize, bi_index src0, bool log, bool sqrt)
{
    if (bitsize == 32)
        return (bi_frexpe_f32_to(b, bi_temp(b->shader), src0, log, sqrt))->dest[0];
    else if (bitsize == 16)
        return (bi_frexpe_v2f16_to(b, bi_temp(b->shader), src0, log, sqrt))->dest[0];
    else
        UNREACHABLE("Invalid parameters for FREXPE");
}

static inline
bi_instr * bi_frexpm_f32_to(bi_builder *b, bi_index dest0, bi_index src0, bool log, bool sqrt)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 1);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_FREXPM_F32;
    I->nr_dests = 1;
    I->nr_srcs = 1;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;

    I->sqrt = sqrt;
    I->log = log;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_frexpm_f32(bi_builder *b, bi_index src0, bool log, bool sqrt)
{
    return (bi_frexpm_f32_to(b, bi_temp(b->shader), src0, log, sqrt))->dest[0];
}


static inline
bi_instr * bi_frexpm_v2f16_to(bi_builder *b, bi_index dest0, bi_index src0, bool log, bool sqrt)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 1);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_FREXPM_V2F16;
    I->nr_dests = 1;
    I->nr_srcs = 1;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;

    I->sqrt = sqrt;
    I->log = log;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_frexpm_v2f16(bi_builder *b, bi_index src0, bool log, bool sqrt)
{
    return (bi_frexpm_v2f16_to(b, bi_temp(b->shader), src0, log, sqrt))->dest[0];
}


static inline
bi_instr * bi_frexpm_to(bi_builder *b, unsigned bitsize, bi_index dest0, bi_index src0, bool log, bool sqrt)
{
    if (bitsize == 32)
        return (bi_frexpm_f32_to(b, dest0, src0, log, sqrt));
    else if (bitsize == 16)
        return (bi_frexpm_v2f16_to(b, dest0, src0, log, sqrt));
    else
        UNREACHABLE("Invalid parameters for FREXPM");
}

static inline
bi_index bi_frexpm(bi_builder *b, unsigned bitsize, bi_index src0, bool log, bool sqrt)
{
    if (bitsize == 32)
        return (bi_frexpm_f32_to(b, bi_temp(b->shader), src0, log, sqrt))->dest[0];
    else if (bitsize == 16)
        return (bi_frexpm_v2f16_to(b, bi_temp(b->shader), src0, log, sqrt))->dest[0];
    else
        UNREACHABLE("Invalid parameters for FREXPM");
}

static inline
bi_instr * bi_fround_f32_to(bi_builder *b, bi_index dest0, bi_index src0, enum bi_round round)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 1);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_FROUND_F32;
    I->nr_dests = 1;
    I->nr_srcs = 1;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;

    I->round = round;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_fround_f32(bi_builder *b, bi_index src0, enum bi_round round)
{
    return (bi_fround_f32_to(b, bi_temp(b->shader), src0, round))->dest[0];
}


static inline
bi_instr * bi_fround_v2f16_to(bi_builder *b, bi_index dest0, bi_index src0, enum bi_round round)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 1);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_FROUND_V2F16;
    I->nr_dests = 1;
    I->nr_srcs = 1;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;

    I->round = round;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_fround_v2f16(bi_builder *b, bi_index src0, enum bi_round round)
{
    return (bi_fround_v2f16_to(b, bi_temp(b->shader), src0, round))->dest[0];
}


static inline
bi_instr * bi_fround_to(bi_builder *b, unsigned bitsize, bi_index dest0, bi_index src0, enum bi_round round)
{
    if (bitsize == 32)
        return (bi_fround_f32_to(b, dest0, src0, round));
    else if (bitsize == 16)
        return (bi_fround_v2f16_to(b, dest0, src0, round));
    else
        UNREACHABLE("Invalid parameters for FROUND");
}

static inline
bi_index bi_fround(bi_builder *b, unsigned bitsize, bi_index src0, enum bi_round round)
{
    if (bitsize == 32)
        return (bi_fround_f32_to(b, bi_temp(b->shader), src0, round))->dest[0];
    else if (bitsize == 16)
        return (bi_fround_v2f16_to(b, bi_temp(b->shader), src0, round))->dest[0];
    else
        UNREACHABLE("Invalid parameters for FROUND");
}

static inline
bi_instr * bi_frshift_double_i32_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bi_index src2, bool bytes2)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_FRSHIFT_DOUBLE_I32;
    I->nr_dests = 1;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    I->bytes2 = bytes2;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_frshift_double_i32(bi_builder *b, bi_index src0, bi_index src1, bi_index src2, bool bytes2)
{
    return (bi_frshift_double_i32_to(b, bi_temp(b->shader), src0, src1, src2, bytes2))->dest[0];
}


static inline
bi_instr * bi_frsq_f16_to(bi_builder *b, bi_index dest0, bi_index src0)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 1);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_FRSQ_F16;
    I->nr_dests = 1;
    I->nr_srcs = 1;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;

    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_frsq_f16(bi_builder *b, bi_index src0)
{
    return (bi_frsq_f16_to(b, bi_temp(b->shader), src0))->dest[0];
}


static inline
bi_instr * bi_frsq_f32_to(bi_builder *b, bi_index dest0, bi_index src0)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 1);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_FRSQ_F32;
    I->nr_dests = 1;
    I->nr_srcs = 1;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;

    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_frsq_f32(bi_builder *b, bi_index src0)
{
    return (bi_frsq_f32_to(b, bi_temp(b->shader), src0))->dest[0];
}


static inline
bi_instr * bi_frsq_to(bi_builder *b, unsigned bitsize, bi_index dest0, bi_index src0)
{
    if (bitsize == 16)
        return (bi_frsq_f16_to(b, dest0, src0));
    else if (bitsize == 32)
        return (bi_frsq_f32_to(b, dest0, src0));
    else
        UNREACHABLE("Invalid parameters for FRSQ");
}

static inline
bi_index bi_frsq(bi_builder *b, unsigned bitsize, bi_index src0)
{
    if (bitsize == 16)
        return (bi_frsq_f16_to(b, bi_temp(b->shader), src0))->dest[0];
    else if (bitsize == 32)
        return (bi_frsq_f32_to(b, bi_temp(b->shader), src0))->dest[0];
    else
        UNREACHABLE("Invalid parameters for FRSQ");
}

static inline
bi_instr * bi_frsq_approx_f32_to(bi_builder *b, bi_index dest0, bi_index src0)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 1);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_FRSQ_APPROX_F32;
    I->nr_dests = 1;
    I->nr_srcs = 1;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;

    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_frsq_approx_f32(bi_builder *b, bi_index src0)
{
    return (bi_frsq_approx_f32_to(b, bi_temp(b->shader), src0))->dest[0];
}


static inline
bi_instr * bi_fsincos_offset_u6_to(bi_builder *b, bi_index dest0, bi_index src0, bool scale)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 1);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_FSINCOS_OFFSET_U6;
    I->nr_dests = 1;
    I->nr_srcs = 1;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;

    I->scale = scale;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_fsincos_offset_u6(bi_builder *b, bi_index src0, bool scale)
{
    return (bi_fsincos_offset_u6_to(b, bi_temp(b->shader), src0, scale))->dest[0];
}


static inline
bi_instr * bi_fsin_table_u6_to(bi_builder *b, bi_index dest0, bi_index src0, bool offset)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 1);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_FSIN_TABLE_U6;
    I->nr_dests = 1;
    I->nr_srcs = 1;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;

    I->offset = offset;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_fsin_table_u6(bi_builder *b, bi_index src0, bool offset)
{
    return (bi_fsin_table_u6_to(b, bi_temp(b->shader), src0, offset))->dest[0];
}


static inline
bi_instr * bi_hadd_s32_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, enum bi_round round)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 2);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_HADD_S32;
    I->nr_dests = 1;
    I->nr_srcs = 2;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;

    I->round = round;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_hadd_s32(bi_builder *b, bi_index src0, bi_index src1, enum bi_round round)
{
    return (bi_hadd_s32_to(b, bi_temp(b->shader), src0, src1, round))->dest[0];
}


static inline
bi_instr * bi_hadd_u32_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, enum bi_round round)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 2);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_HADD_U32;
    I->nr_dests = 1;
    I->nr_srcs = 2;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;

    I->round = round;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_hadd_u32(bi_builder *b, bi_index src0, bi_index src1, enum bi_round round)
{
    return (bi_hadd_u32_to(b, bi_temp(b->shader), src0, src1, round))->dest[0];
}


static inline
bi_instr * bi_hadd_v2s16_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, enum bi_round round)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 2);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_HADD_V2S16;
    I->nr_dests = 1;
    I->nr_srcs = 2;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;

    I->round = round;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_hadd_v2s16(bi_builder *b, bi_index src0, bi_index src1, enum bi_round round)
{
    return (bi_hadd_v2s16_to(b, bi_temp(b->shader), src0, src1, round))->dest[0];
}


static inline
bi_instr * bi_hadd_v2u16_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, enum bi_round round)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 2);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_HADD_V2U16;
    I->nr_dests = 1;
    I->nr_srcs = 2;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;

    I->round = round;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_hadd_v2u16(bi_builder *b, bi_index src0, bi_index src1, enum bi_round round)
{
    return (bi_hadd_v2u16_to(b, bi_temp(b->shader), src0, src1, round))->dest[0];
}


static inline
bi_instr * bi_hadd_v4s8_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, enum bi_round round)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 2);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_HADD_V4S8;
    I->nr_dests = 1;
    I->nr_srcs = 2;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;

    I->round = round;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_hadd_v4s8(bi_builder *b, bi_index src0, bi_index src1, enum bi_round round)
{
    return (bi_hadd_v4s8_to(b, bi_temp(b->shader), src0, src1, round))->dest[0];
}


static inline
bi_instr * bi_hadd_v4u8_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, enum bi_round round)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 2);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_HADD_V4U8;
    I->nr_dests = 1;
    I->nr_srcs = 2;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;

    I->round = round;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_hadd_v4u8(bi_builder *b, bi_index src0, bi_index src1, enum bi_round round)
{
    return (bi_hadd_v4u8_to(b, bi_temp(b->shader), src0, src1, round))->dest[0];
}


static inline
bi_instr * bi_hadd_to(bi_builder *b, nir_alu_type type, unsigned bitsize, bi_index dest0, bi_index src0, bi_index src1, enum bi_round round)
{
    if ((type == nir_type_int) && bitsize == 32)
        return (bi_hadd_s32_to(b, dest0, src0, src1, round));
    else if ((type == nir_type_uint) && bitsize == 32)
        return (bi_hadd_u32_to(b, dest0, src0, src1, round));
    else if ((type == nir_type_int) && bitsize == 16)
        return (bi_hadd_v2s16_to(b, dest0, src0, src1, round));
    else if ((type == nir_type_uint) && bitsize == 16)
        return (bi_hadd_v2u16_to(b, dest0, src0, src1, round));
    else if ((type == nir_type_int) && bitsize == 8)
        return (bi_hadd_v4s8_to(b, dest0, src0, src1, round));
    else if ((type == nir_type_uint) && bitsize == 8)
        return (bi_hadd_v4u8_to(b, dest0, src0, src1, round));
    else
        UNREACHABLE("Invalid parameters for HADD");
}

static inline
bi_index bi_hadd(bi_builder *b, nir_alu_type type, unsigned bitsize, bi_index src0, bi_index src1, enum bi_round round)
{
    if ((type == nir_type_int) && bitsize == 32)
        return (bi_hadd_s32_to(b, bi_temp(b->shader), src0, src1, round))->dest[0];
    else if ((type == nir_type_uint) && bitsize == 32)
        return (bi_hadd_u32_to(b, bi_temp(b->shader), src0, src1, round))->dest[0];
    else if ((type == nir_type_int) && bitsize == 16)
        return (bi_hadd_v2s16_to(b, bi_temp(b->shader), src0, src1, round))->dest[0];
    else if ((type == nir_type_uint) && bitsize == 16)
        return (bi_hadd_v2u16_to(b, bi_temp(b->shader), src0, src1, round))->dest[0];
    else if ((type == nir_type_int) && bitsize == 8)
        return (bi_hadd_v4s8_to(b, bi_temp(b->shader), src0, src1, round))->dest[0];
    else if ((type == nir_type_uint) && bitsize == 8)
        return (bi_hadd_v4u8_to(b, bi_temp(b->shader), src0, src1, round))->dest[0];
    else
        UNREACHABLE("Invalid parameters for HADD");
}

static inline
bi_instr * bi_iabs_s32_to(bi_builder *b, bi_index dest0, bi_index src0)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 1);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_IABS_S32;
    I->nr_dests = 1;
    I->nr_srcs = 1;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;

    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_iabs_s32(bi_builder *b, bi_index src0)
{
    return (bi_iabs_s32_to(b, bi_temp(b->shader), src0))->dest[0];
}


static inline
bi_instr * bi_iabs_v2s16_to(bi_builder *b, bi_index dest0, bi_index src0)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 1);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_IABS_V2S16;
    I->nr_dests = 1;
    I->nr_srcs = 1;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;

    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_iabs_v2s16(bi_builder *b, bi_index src0)
{
    return (bi_iabs_v2s16_to(b, bi_temp(b->shader), src0))->dest[0];
}


static inline
bi_instr * bi_iabs_v4s8_to(bi_builder *b, bi_index dest0, bi_index src0)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 1);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_IABS_V4S8;
    I->nr_dests = 1;
    I->nr_srcs = 1;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;

    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_iabs_v4s8(bi_builder *b, bi_index src0)
{
    return (bi_iabs_v4s8_to(b, bi_temp(b->shader), src0))->dest[0];
}


static inline
bi_instr * bi_iabs_to(bi_builder *b, unsigned bitsize, bi_index dest0, bi_index src0)
{
    if (bitsize == 32)
        return (bi_iabs_s32_to(b, dest0, src0));
    else if (bitsize == 16)
        return (bi_iabs_v2s16_to(b, dest0, src0));
    else if (bitsize == 8)
        return (bi_iabs_v4s8_to(b, dest0, src0));
    else
        UNREACHABLE("Invalid parameters for IABS");
}

static inline
bi_index bi_iabs(bi_builder *b, unsigned bitsize, bi_index src0)
{
    if (bitsize == 32)
        return (bi_iabs_s32_to(b, bi_temp(b->shader), src0))->dest[0];
    else if (bitsize == 16)
        return (bi_iabs_v2s16_to(b, bi_temp(b->shader), src0))->dest[0];
    else if (bitsize == 8)
        return (bi_iabs_v4s8_to(b, bi_temp(b->shader), src0))->dest[0];
    else
        UNREACHABLE("Invalid parameters for IABS");
}

static inline
bi_instr * bi_iadd_s32_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bool saturate)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 2);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_IADD_S32;
    I->nr_dests = 1;
    I->nr_srcs = 2;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;

    I->saturate = saturate;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_iadd_s32(bi_builder *b, bi_index src0, bi_index src1, bool saturate)
{
    return (bi_iadd_s32_to(b, bi_temp(b->shader), src0, src1, saturate))->dest[0];
}


static inline
bi_instr * bi_iadd_u32_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bool saturate)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 2);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_IADD_U32;
    I->nr_dests = 1;
    I->nr_srcs = 2;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;

    I->saturate = saturate;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_iadd_u32(bi_builder *b, bi_index src0, bi_index src1, bool saturate)
{
    return (bi_iadd_u32_to(b, bi_temp(b->shader), src0, src1, saturate))->dest[0];
}


static inline
bi_instr * bi_iadd_v2s16_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bool saturate)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 2);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_IADD_V2S16;
    I->nr_dests = 1;
    I->nr_srcs = 2;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;

    I->saturate = saturate;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_iadd_v2s16(bi_builder *b, bi_index src0, bi_index src1, bool saturate)
{
    return (bi_iadd_v2s16_to(b, bi_temp(b->shader), src0, src1, saturate))->dest[0];
}


static inline
bi_instr * bi_iadd_v2u16_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bool saturate)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 2);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_IADD_V2U16;
    I->nr_dests = 1;
    I->nr_srcs = 2;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;

    I->saturate = saturate;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_iadd_v2u16(bi_builder *b, bi_index src0, bi_index src1, bool saturate)
{
    return (bi_iadd_v2u16_to(b, bi_temp(b->shader), src0, src1, saturate))->dest[0];
}


static inline
bi_instr * bi_iadd_v4s8_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bool saturate)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 2);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_IADD_V4S8;
    I->nr_dests = 1;
    I->nr_srcs = 2;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;

    I->saturate = saturate;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_iadd_v4s8(bi_builder *b, bi_index src0, bi_index src1, bool saturate)
{
    return (bi_iadd_v4s8_to(b, bi_temp(b->shader), src0, src1, saturate))->dest[0];
}


static inline
bi_instr * bi_iadd_v4u8_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bool saturate)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 2);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_IADD_V4U8;
    I->nr_dests = 1;
    I->nr_srcs = 2;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;

    I->saturate = saturate;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_iadd_v4u8(bi_builder *b, bi_index src0, bi_index src1, bool saturate)
{
    return (bi_iadd_v4u8_to(b, bi_temp(b->shader), src0, src1, saturate))->dest[0];
}


static inline
bi_instr * bi_iadd_to(bi_builder *b, nir_alu_type type, unsigned bitsize, bi_index dest0, bi_index src0, bi_index src1, bool saturate)
{
    if ((type == nir_type_int) && bitsize == 32)
        return (bi_iadd_s32_to(b, dest0, src0, src1, saturate));
    else if ((type == nir_type_uint) && bitsize == 32)
        return (bi_iadd_u32_to(b, dest0, src0, src1, saturate));
    else if ((type == nir_type_int) && bitsize == 16)
        return (bi_iadd_v2s16_to(b, dest0, src0, src1, saturate));
    else if ((type == nir_type_uint) && bitsize == 16)
        return (bi_iadd_v2u16_to(b, dest0, src0, src1, saturate));
    else if ((type == nir_type_int) && bitsize == 8)
        return (bi_iadd_v4s8_to(b, dest0, src0, src1, saturate));
    else if ((type == nir_type_uint) && bitsize == 8)
        return (bi_iadd_v4u8_to(b, dest0, src0, src1, saturate));
    else
        UNREACHABLE("Invalid parameters for IADD");
}

static inline
bi_index bi_iadd(bi_builder *b, nir_alu_type type, unsigned bitsize, bi_index src0, bi_index src1, bool saturate)
{
    if ((type == nir_type_int) && bitsize == 32)
        return (bi_iadd_s32_to(b, bi_temp(b->shader), src0, src1, saturate))->dest[0];
    else if ((type == nir_type_uint) && bitsize == 32)
        return (bi_iadd_u32_to(b, bi_temp(b->shader), src0, src1, saturate))->dest[0];
    else if ((type == nir_type_int) && bitsize == 16)
        return (bi_iadd_v2s16_to(b, bi_temp(b->shader), src0, src1, saturate))->dest[0];
    else if ((type == nir_type_uint) && bitsize == 16)
        return (bi_iadd_v2u16_to(b, bi_temp(b->shader), src0, src1, saturate))->dest[0];
    else if ((type == nir_type_int) && bitsize == 8)
        return (bi_iadd_v4s8_to(b, bi_temp(b->shader), src0, src1, saturate))->dest[0];
    else if ((type == nir_type_uint) && bitsize == 8)
        return (bi_iadd_v4u8_to(b, bi_temp(b->shader), src0, src1, saturate))->dest[0];
    else
        UNREACHABLE("Invalid parameters for IADD");
}

static inline
bi_instr * bi_iaddc_i32_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bi_index src2)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_IADDC_I32;
    I->nr_dests = 1;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_iaddc_i32(bi_builder *b, bi_index src0, bi_index src1, bi_index src2)
{
    return (bi_iaddc_i32_to(b, bi_temp(b->shader), src0, src1, src2))->dest[0];
}


static inline
bi_instr * bi_iadd_imm_i32_to(bi_builder *b, bi_index dest0, bi_index src0, uint32_t index)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 1);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_IADD_IMM_I32;
    I->nr_dests = 1;
    I->nr_srcs = 1;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;

    I->index = index;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_iadd_imm_i32(bi_builder *b, bi_index src0, uint32_t index)
{
    return (bi_iadd_imm_i32_to(b, bi_temp(b->shader), src0, index))->dest[0];
}


static inline
bi_instr * bi_iadd_imm_v2i16_to(bi_builder *b, bi_index dest0, bi_index src0, uint32_t index)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 1);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_IADD_IMM_V2I16;
    I->nr_dests = 1;
    I->nr_srcs = 1;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;

    I->index = index;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_iadd_imm_v2i16(bi_builder *b, bi_index src0, uint32_t index)
{
    return (bi_iadd_imm_v2i16_to(b, bi_temp(b->shader), src0, index))->dest[0];
}


static inline
bi_instr * bi_iadd_imm_v4i8_to(bi_builder *b, bi_index dest0, bi_index src0, uint32_t index)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 1);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_IADD_IMM_V4I8;
    I->nr_dests = 1;
    I->nr_srcs = 1;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;

    I->index = index;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_iadd_imm_v4i8(bi_builder *b, bi_index src0, uint32_t index)
{
    return (bi_iadd_imm_v4i8_to(b, bi_temp(b->shader), src0, index))->dest[0];
}


static inline
bi_instr * bi_iadd_imm_to(bi_builder *b, unsigned bitsize, bi_index dest0, bi_index src0, uint32_t index)
{
    if (bitsize == 32)
        return (bi_iadd_imm_i32_to(b, dest0, src0, index));
    else if (bitsize == 16)
        return (bi_iadd_imm_v2i16_to(b, dest0, src0, index));
    else if (bitsize == 8)
        return (bi_iadd_imm_v4i8_to(b, dest0, src0, index));
    else
        UNREACHABLE("Invalid parameters for IADD_IMM");
}

static inline
bi_index bi_iadd_imm(bi_builder *b, unsigned bitsize, bi_index src0, uint32_t index)
{
    if (bitsize == 32)
        return (bi_iadd_imm_i32_to(b, bi_temp(b->shader), src0, index))->dest[0];
    else if (bitsize == 16)
        return (bi_iadd_imm_v2i16_to(b, bi_temp(b->shader), src0, index))->dest[0];
    else if (bitsize == 8)
        return (bi_iadd_imm_v4i8_to(b, bi_temp(b->shader), src0, index))->dest[0];
    else
        UNREACHABLE("Invalid parameters for IADD_IMM");
}

static inline
bi_instr * bi_icmp_i32_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, enum bi_cmpf cmpf, enum bi_result_type result_type)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 2);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_ICMP_I32;
    I->nr_dests = 1;
    I->nr_srcs = 2;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;

    I->result_type = result_type;
    I->cmpf = cmpf;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_icmp_i32(bi_builder *b, bi_index src0, bi_index src1, enum bi_cmpf cmpf, enum bi_result_type result_type)
{
    return (bi_icmp_i32_to(b, bi_temp(b->shader), src0, src1, cmpf, result_type))->dest[0];
}


static inline
bi_instr * bi_icmp_s32_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, enum bi_cmpf cmpf, enum bi_result_type result_type)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 2);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_ICMP_S32;
    I->nr_dests = 1;
    I->nr_srcs = 2;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;

    I->result_type = result_type;
    I->cmpf = cmpf;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_icmp_s32(bi_builder *b, bi_index src0, bi_index src1, enum bi_cmpf cmpf, enum bi_result_type result_type)
{
    return (bi_icmp_s32_to(b, bi_temp(b->shader), src0, src1, cmpf, result_type))->dest[0];
}


static inline
bi_instr * bi_icmp_u32_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, enum bi_cmpf cmpf, enum bi_result_type result_type)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 2);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_ICMP_U32;
    I->nr_dests = 1;
    I->nr_srcs = 2;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;

    I->result_type = result_type;
    I->cmpf = cmpf;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_icmp_u32(bi_builder *b, bi_index src0, bi_index src1, enum bi_cmpf cmpf, enum bi_result_type result_type)
{
    return (bi_icmp_u32_to(b, bi_temp(b->shader), src0, src1, cmpf, result_type))->dest[0];
}


static inline
bi_instr * bi_icmp_v2i16_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, enum bi_cmpf cmpf, enum bi_result_type result_type)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 2);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_ICMP_V2I16;
    I->nr_dests = 1;
    I->nr_srcs = 2;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;

    I->result_type = result_type;
    I->cmpf = cmpf;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_icmp_v2i16(bi_builder *b, bi_index src0, bi_index src1, enum bi_cmpf cmpf, enum bi_result_type result_type)
{
    return (bi_icmp_v2i16_to(b, bi_temp(b->shader), src0, src1, cmpf, result_type))->dest[0];
}


static inline
bi_instr * bi_icmp_v2s16_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, enum bi_cmpf cmpf, enum bi_result_type result_type)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 2);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_ICMP_V2S16;
    I->nr_dests = 1;
    I->nr_srcs = 2;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;

    I->result_type = result_type;
    I->cmpf = cmpf;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_icmp_v2s16(bi_builder *b, bi_index src0, bi_index src1, enum bi_cmpf cmpf, enum bi_result_type result_type)
{
    return (bi_icmp_v2s16_to(b, bi_temp(b->shader), src0, src1, cmpf, result_type))->dest[0];
}


static inline
bi_instr * bi_icmp_v2u16_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, enum bi_cmpf cmpf, enum bi_result_type result_type)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 2);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_ICMP_V2U16;
    I->nr_dests = 1;
    I->nr_srcs = 2;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;

    I->result_type = result_type;
    I->cmpf = cmpf;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_icmp_v2u16(bi_builder *b, bi_index src0, bi_index src1, enum bi_cmpf cmpf, enum bi_result_type result_type)
{
    return (bi_icmp_v2u16_to(b, bi_temp(b->shader), src0, src1, cmpf, result_type))->dest[0];
}


static inline
bi_instr * bi_icmp_v4i8_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, enum bi_cmpf cmpf, enum bi_result_type result_type)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 2);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_ICMP_V4I8;
    I->nr_dests = 1;
    I->nr_srcs = 2;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;

    I->result_type = result_type;
    I->cmpf = cmpf;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_icmp_v4i8(bi_builder *b, bi_index src0, bi_index src1, enum bi_cmpf cmpf, enum bi_result_type result_type)
{
    return (bi_icmp_v4i8_to(b, bi_temp(b->shader), src0, src1, cmpf, result_type))->dest[0];
}


static inline
bi_instr * bi_icmp_v4s8_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, enum bi_cmpf cmpf, enum bi_result_type result_type)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 2);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_ICMP_V4S8;
    I->nr_dests = 1;
    I->nr_srcs = 2;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;

    I->result_type = result_type;
    I->cmpf = cmpf;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_icmp_v4s8(bi_builder *b, bi_index src0, bi_index src1, enum bi_cmpf cmpf, enum bi_result_type result_type)
{
    return (bi_icmp_v4s8_to(b, bi_temp(b->shader), src0, src1, cmpf, result_type))->dest[0];
}


static inline
bi_instr * bi_icmp_v4u8_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, enum bi_cmpf cmpf, enum bi_result_type result_type)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 2);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_ICMP_V4U8;
    I->nr_dests = 1;
    I->nr_srcs = 2;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;

    I->result_type = result_type;
    I->cmpf = cmpf;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_icmp_v4u8(bi_builder *b, bi_index src0, bi_index src1, enum bi_cmpf cmpf, enum bi_result_type result_type)
{
    return (bi_icmp_v4u8_to(b, bi_temp(b->shader), src0, src1, cmpf, result_type))->dest[0];
}


static inline
bi_instr * bi_icmp_to(bi_builder *b, nir_alu_type type, unsigned bitsize, bi_index dest0, bi_index src0, bi_index src1, enum bi_cmpf cmpf, enum bi_result_type result_type)
{
    if ((type == nir_type_uint || type == nir_type_int) && bitsize == 32 && (cmpf == BI_CMPF_EQ || cmpf == BI_CMPF_NE))
        return (bi_icmp_i32_to(b, dest0, src0, src1, cmpf, result_type));
    else if ((type == nir_type_int) && bitsize == 32 && (cmpf == BI_CMPF_GT || cmpf == BI_CMPF_GE || cmpf == BI_CMPF_LT || cmpf == BI_CMPF_LE))
        return (bi_icmp_s32_to(b, dest0, src0, src1, cmpf, result_type));
    else if ((type == nir_type_uint) && bitsize == 32 && (cmpf == BI_CMPF_GT || cmpf == BI_CMPF_GE || cmpf == BI_CMPF_LT || cmpf == BI_CMPF_LE))
        return (bi_icmp_u32_to(b, dest0, src0, src1, cmpf, result_type));
    else if ((type == nir_type_uint || type == nir_type_int) && bitsize == 16 && (cmpf == BI_CMPF_EQ || cmpf == BI_CMPF_NE))
        return (bi_icmp_v2i16_to(b, dest0, src0, src1, cmpf, result_type));
    else if ((type == nir_type_int) && bitsize == 16 && (cmpf == BI_CMPF_GT || cmpf == BI_CMPF_GE || cmpf == BI_CMPF_LT || cmpf == BI_CMPF_LE))
        return (bi_icmp_v2s16_to(b, dest0, src0, src1, cmpf, result_type));
    else if ((type == nir_type_uint) && bitsize == 16 && (cmpf == BI_CMPF_GT || cmpf == BI_CMPF_GE || cmpf == BI_CMPF_LT || cmpf == BI_CMPF_LE))
        return (bi_icmp_v2u16_to(b, dest0, src0, src1, cmpf, result_type));
    else if ((type == nir_type_uint || type == nir_type_int) && bitsize == 8 && (cmpf == BI_CMPF_EQ || cmpf == BI_CMPF_NE))
        return (bi_icmp_v4i8_to(b, dest0, src0, src1, cmpf, result_type));
    else if ((type == nir_type_int) && bitsize == 8 && (cmpf == BI_CMPF_GT || cmpf == BI_CMPF_GE || cmpf == BI_CMPF_LT || cmpf == BI_CMPF_LE))
        return (bi_icmp_v4s8_to(b, dest0, src0, src1, cmpf, result_type));
    else if ((type == nir_type_uint) && bitsize == 8 && (cmpf == BI_CMPF_GT || cmpf == BI_CMPF_GE || cmpf == BI_CMPF_LT || cmpf == BI_CMPF_LE))
        return (bi_icmp_v4u8_to(b, dest0, src0, src1, cmpf, result_type));
    else
        UNREACHABLE("Invalid parameters for ICMP");
}

static inline
bi_index bi_icmp(bi_builder *b, nir_alu_type type, unsigned bitsize, bi_index src0, bi_index src1, enum bi_cmpf cmpf, enum bi_result_type result_type)
{
    if ((type == nir_type_uint || type == nir_type_int) && bitsize == 32 && (cmpf == BI_CMPF_EQ || cmpf == BI_CMPF_NE))
        return (bi_icmp_i32_to(b, bi_temp(b->shader), src0, src1, cmpf, result_type))->dest[0];
    else if ((type == nir_type_int) && bitsize == 32 && (cmpf == BI_CMPF_GT || cmpf == BI_CMPF_GE || cmpf == BI_CMPF_LT || cmpf == BI_CMPF_LE))
        return (bi_icmp_s32_to(b, bi_temp(b->shader), src0, src1, cmpf, result_type))->dest[0];
    else if ((type == nir_type_uint) && bitsize == 32 && (cmpf == BI_CMPF_GT || cmpf == BI_CMPF_GE || cmpf == BI_CMPF_LT || cmpf == BI_CMPF_LE))
        return (bi_icmp_u32_to(b, bi_temp(b->shader), src0, src1, cmpf, result_type))->dest[0];
    else if ((type == nir_type_uint || type == nir_type_int) && bitsize == 16 && (cmpf == BI_CMPF_EQ || cmpf == BI_CMPF_NE))
        return (bi_icmp_v2i16_to(b, bi_temp(b->shader), src0, src1, cmpf, result_type))->dest[0];
    else if ((type == nir_type_int) && bitsize == 16 && (cmpf == BI_CMPF_GT || cmpf == BI_CMPF_GE || cmpf == BI_CMPF_LT || cmpf == BI_CMPF_LE))
        return (bi_icmp_v2s16_to(b, bi_temp(b->shader), src0, src1, cmpf, result_type))->dest[0];
    else if ((type == nir_type_uint) && bitsize == 16 && (cmpf == BI_CMPF_GT || cmpf == BI_CMPF_GE || cmpf == BI_CMPF_LT || cmpf == BI_CMPF_LE))
        return (bi_icmp_v2u16_to(b, bi_temp(b->shader), src0, src1, cmpf, result_type))->dest[0];
    else if ((type == nir_type_uint || type == nir_type_int) && bitsize == 8 && (cmpf == BI_CMPF_EQ || cmpf == BI_CMPF_NE))
        return (bi_icmp_v4i8_to(b, bi_temp(b->shader), src0, src1, cmpf, result_type))->dest[0];
    else if ((type == nir_type_int) && bitsize == 8 && (cmpf == BI_CMPF_GT || cmpf == BI_CMPF_GE || cmpf == BI_CMPF_LT || cmpf == BI_CMPF_LE))
        return (bi_icmp_v4s8_to(b, bi_temp(b->shader), src0, src1, cmpf, result_type))->dest[0];
    else if ((type == nir_type_uint) && bitsize == 8 && (cmpf == BI_CMPF_GT || cmpf == BI_CMPF_GE || cmpf == BI_CMPF_LT || cmpf == BI_CMPF_LE))
        return (bi_icmp_v4u8_to(b, bi_temp(b->shader), src0, src1, cmpf, result_type))->dest[0];
    else
        UNREACHABLE("Invalid parameters for ICMP");
}

static inline
bi_instr * bi_icmpf_i32_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bi_index src2)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_ICMPF_I32;
    I->nr_dests = 1;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_icmpf_i32(bi_builder *b, bi_index src0, bi_index src1, bi_index src2)
{
    return (bi_icmpf_i32_to(b, bi_temp(b->shader), src0, src1, src2))->dest[0];
}


static inline
bi_instr * bi_icmpi_i32_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, enum bi_cmpf cmpf, enum bi_result_type result_type)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 2);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_ICMPI_I32;
    I->nr_dests = 1;
    I->nr_srcs = 2;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;

    I->result_type = result_type;
    I->cmpf = cmpf;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_icmpi_i32(bi_builder *b, bi_index src0, bi_index src1, enum bi_cmpf cmpf, enum bi_result_type result_type)
{
    return (bi_icmpi_i32_to(b, bi_temp(b->shader), src0, src1, cmpf, result_type))->dest[0];
}


static inline
bi_instr * bi_icmpi_s32_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, enum bi_cmpf cmpf, enum bi_result_type result_type)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 2);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_ICMPI_S32;
    I->nr_dests = 1;
    I->nr_srcs = 2;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;

    I->result_type = result_type;
    I->cmpf = cmpf;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_icmpi_s32(bi_builder *b, bi_index src0, bi_index src1, enum bi_cmpf cmpf, enum bi_result_type result_type)
{
    return (bi_icmpi_s32_to(b, bi_temp(b->shader), src0, src1, cmpf, result_type))->dest[0];
}


static inline
bi_instr * bi_icmpi_u32_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, enum bi_cmpf cmpf, enum bi_result_type result_type)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 2);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_ICMPI_U32;
    I->nr_dests = 1;
    I->nr_srcs = 2;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;

    I->result_type = result_type;
    I->cmpf = cmpf;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_icmpi_u32(bi_builder *b, bi_index src0, bi_index src1, enum bi_cmpf cmpf, enum bi_result_type result_type)
{
    return (bi_icmpi_u32_to(b, bi_temp(b->shader), src0, src1, cmpf, result_type))->dest[0];
}


static inline
bi_instr * bi_icmpi_to(bi_builder *b, nir_alu_type type, bi_index dest0, bi_index src0, bi_index src1, enum bi_cmpf cmpf, enum bi_result_type result_type)
{
    if ((type == nir_type_uint || type == nir_type_int) && (cmpf == BI_CMPF_EQ || cmpf == BI_CMPF_NE))
        return (bi_icmpi_i32_to(b, dest0, src0, src1, cmpf, result_type));
    else if ((type == nir_type_int) && (cmpf == BI_CMPF_GT || cmpf == BI_CMPF_GE))
        return (bi_icmpi_s32_to(b, dest0, src0, src1, cmpf, result_type));
    else if ((type == nir_type_uint) && (cmpf == BI_CMPF_GT || cmpf == BI_CMPF_GE))
        return (bi_icmpi_u32_to(b, dest0, src0, src1, cmpf, result_type));
    else
        UNREACHABLE("Invalid parameters for ICMPI");
}

static inline
bi_index bi_icmpi(bi_builder *b, nir_alu_type type, bi_index src0, bi_index src1, enum bi_cmpf cmpf, enum bi_result_type result_type)
{
    if ((type == nir_type_uint || type == nir_type_int) && (cmpf == BI_CMPF_EQ || cmpf == BI_CMPF_NE))
        return (bi_icmpi_i32_to(b, bi_temp(b->shader), src0, src1, cmpf, result_type))->dest[0];
    else if ((type == nir_type_int) && (cmpf == BI_CMPF_GT || cmpf == BI_CMPF_GE))
        return (bi_icmpi_s32_to(b, bi_temp(b->shader), src0, src1, cmpf, result_type))->dest[0];
    else if ((type == nir_type_uint) && (cmpf == BI_CMPF_GT || cmpf == BI_CMPF_GE))
        return (bi_icmpi_u32_to(b, bi_temp(b->shader), src0, src1, cmpf, result_type))->dest[0];
    else
        UNREACHABLE("Invalid parameters for ICMPI");
}

static inline
bi_instr * bi_icmpm_i32_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bi_index src2)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_ICMPM_I32;
    I->nr_dests = 1;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_icmpm_i32(bi_builder *b, bi_index src0, bi_index src1, bi_index src2)
{
    return (bi_icmpm_i32_to(b, bi_temp(b->shader), src0, src1, src2))->dest[0];
}


static inline
bi_instr * bi_icmp_and_s32_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bi_index src2, enum bi_cmpf cmpf, enum bi_result_type result_type)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_ICMP_AND_S32;
    I->nr_dests = 1;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    I->cmpf = cmpf;
    I->result_type = result_type;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_icmp_and_s32(bi_builder *b, bi_index src0, bi_index src1, bi_index src2, enum bi_cmpf cmpf, enum bi_result_type result_type)
{
    return (bi_icmp_and_s32_to(b, bi_temp(b->shader), src0, src1, src2, cmpf, result_type))->dest[0];
}


static inline
bi_instr * bi_icmp_and_u32_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bi_index src2, enum bi_cmpf cmpf, enum bi_result_type result_type)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_ICMP_AND_U32;
    I->nr_dests = 1;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    I->cmpf = cmpf;
    I->result_type = result_type;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_icmp_and_u32(bi_builder *b, bi_index src0, bi_index src1, bi_index src2, enum bi_cmpf cmpf, enum bi_result_type result_type)
{
    return (bi_icmp_and_u32_to(b, bi_temp(b->shader), src0, src1, src2, cmpf, result_type))->dest[0];
}


static inline
bi_instr * bi_icmp_and_v2s16_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bi_index src2, enum bi_cmpf cmpf, enum bi_result_type result_type)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_ICMP_AND_V2S16;
    I->nr_dests = 1;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    I->cmpf = cmpf;
    I->result_type = result_type;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_icmp_and_v2s16(bi_builder *b, bi_index src0, bi_index src1, bi_index src2, enum bi_cmpf cmpf, enum bi_result_type result_type)
{
    return (bi_icmp_and_v2s16_to(b, bi_temp(b->shader), src0, src1, src2, cmpf, result_type))->dest[0];
}


static inline
bi_instr * bi_icmp_and_v2u16_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bi_index src2, enum bi_cmpf cmpf, enum bi_result_type result_type)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_ICMP_AND_V2U16;
    I->nr_dests = 1;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    I->cmpf = cmpf;
    I->result_type = result_type;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_icmp_and_v2u16(bi_builder *b, bi_index src0, bi_index src1, bi_index src2, enum bi_cmpf cmpf, enum bi_result_type result_type)
{
    return (bi_icmp_and_v2u16_to(b, bi_temp(b->shader), src0, src1, src2, cmpf, result_type))->dest[0];
}


static inline
bi_instr * bi_icmp_and_v4s8_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bi_index src2, enum bi_cmpf cmpf, enum bi_result_type result_type)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_ICMP_AND_V4S8;
    I->nr_dests = 1;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    I->cmpf = cmpf;
    I->result_type = result_type;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_icmp_and_v4s8(bi_builder *b, bi_index src0, bi_index src1, bi_index src2, enum bi_cmpf cmpf, enum bi_result_type result_type)
{
    return (bi_icmp_and_v4s8_to(b, bi_temp(b->shader), src0, src1, src2, cmpf, result_type))->dest[0];
}


static inline
bi_instr * bi_icmp_and_v4u8_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bi_index src2, enum bi_cmpf cmpf, enum bi_result_type result_type)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_ICMP_AND_V4U8;
    I->nr_dests = 1;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    I->cmpf = cmpf;
    I->result_type = result_type;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_icmp_and_v4u8(bi_builder *b, bi_index src0, bi_index src1, bi_index src2, enum bi_cmpf cmpf, enum bi_result_type result_type)
{
    return (bi_icmp_and_v4u8_to(b, bi_temp(b->shader), src0, src1, src2, cmpf, result_type))->dest[0];
}


static inline
bi_instr * bi_icmp_and_to(bi_builder *b, nir_alu_type type, unsigned bitsize, bi_index dest0, bi_index src0, bi_index src1, bi_index src2, enum bi_cmpf cmpf, enum bi_result_type result_type)
{
    if ((type == nir_type_int) && bitsize == 32 && (cmpf == BI_CMPF_EQ || cmpf == BI_CMPF_NE || cmpf == BI_CMPF_GT || cmpf == BI_CMPF_GE))
        return (bi_icmp_and_s32_to(b, dest0, src0, src1, src2, cmpf, result_type));
    else if ((type == nir_type_uint) && bitsize == 32 && (cmpf == BI_CMPF_EQ || cmpf == BI_CMPF_NE || cmpf == BI_CMPF_GT || cmpf == BI_CMPF_GE))
        return (bi_icmp_and_u32_to(b, dest0, src0, src1, src2, cmpf, result_type));
    else if ((type == nir_type_int) && bitsize == 16 && (cmpf == BI_CMPF_EQ || cmpf == BI_CMPF_NE || cmpf == BI_CMPF_GT || cmpf == BI_CMPF_GE))
        return (bi_icmp_and_v2s16_to(b, dest0, src0, src1, src2, cmpf, result_type));
    else if ((type == nir_type_uint) && bitsize == 16 && (cmpf == BI_CMPF_EQ || cmpf == BI_CMPF_NE || cmpf == BI_CMPF_GT || cmpf == BI_CMPF_GE))
        return (bi_icmp_and_v2u16_to(b, dest0, src0, src1, src2, cmpf, result_type));
    else if ((type == nir_type_int) && bitsize == 8 && (cmpf == BI_CMPF_EQ || cmpf == BI_CMPF_NE || cmpf == BI_CMPF_GT || cmpf == BI_CMPF_GE))
        return (bi_icmp_and_v4s8_to(b, dest0, src0, src1, src2, cmpf, result_type));
    else if ((type == nir_type_uint) && bitsize == 8 && (cmpf == BI_CMPF_EQ || cmpf == BI_CMPF_NE || cmpf == BI_CMPF_GT || cmpf == BI_CMPF_GE))
        return (bi_icmp_and_v4u8_to(b, dest0, src0, src1, src2, cmpf, result_type));
    else
        UNREACHABLE("Invalid parameters for ICMP_AND");
}

static inline
bi_index bi_icmp_and(bi_builder *b, nir_alu_type type, unsigned bitsize, bi_index src0, bi_index src1, bi_index src2, enum bi_cmpf cmpf, enum bi_result_type result_type)
{
    if ((type == nir_type_int) && bitsize == 32 && (cmpf == BI_CMPF_EQ || cmpf == BI_CMPF_NE || cmpf == BI_CMPF_GT || cmpf == BI_CMPF_GE))
        return (bi_icmp_and_s32_to(b, bi_temp(b->shader), src0, src1, src2, cmpf, result_type))->dest[0];
    else if ((type == nir_type_uint) && bitsize == 32 && (cmpf == BI_CMPF_EQ || cmpf == BI_CMPF_NE || cmpf == BI_CMPF_GT || cmpf == BI_CMPF_GE))
        return (bi_icmp_and_u32_to(b, bi_temp(b->shader), src0, src1, src2, cmpf, result_type))->dest[0];
    else if ((type == nir_type_int) && bitsize == 16 && (cmpf == BI_CMPF_EQ || cmpf == BI_CMPF_NE || cmpf == BI_CMPF_GT || cmpf == BI_CMPF_GE))
        return (bi_icmp_and_v2s16_to(b, bi_temp(b->shader), src0, src1, src2, cmpf, result_type))->dest[0];
    else if ((type == nir_type_uint) && bitsize == 16 && (cmpf == BI_CMPF_EQ || cmpf == BI_CMPF_NE || cmpf == BI_CMPF_GT || cmpf == BI_CMPF_GE))
        return (bi_icmp_and_v2u16_to(b, bi_temp(b->shader), src0, src1, src2, cmpf, result_type))->dest[0];
    else if ((type == nir_type_int) && bitsize == 8 && (cmpf == BI_CMPF_EQ || cmpf == BI_CMPF_NE || cmpf == BI_CMPF_GT || cmpf == BI_CMPF_GE))
        return (bi_icmp_and_v4s8_to(b, bi_temp(b->shader), src0, src1, src2, cmpf, result_type))->dest[0];
    else if ((type == nir_type_uint) && bitsize == 8 && (cmpf == BI_CMPF_EQ || cmpf == BI_CMPF_NE || cmpf == BI_CMPF_GT || cmpf == BI_CMPF_GE))
        return (bi_icmp_and_v4u8_to(b, bi_temp(b->shader), src0, src1, src2, cmpf, result_type))->dest[0];
    else
        UNREACHABLE("Invalid parameters for ICMP_AND");
}

static inline
bi_instr * bi_icmp_multi_s32_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bi_index src2, enum bi_cmpf cmpf, enum bi_result_type result_type)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_ICMP_MULTI_S32;
    I->nr_dests = 1;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    I->cmpf = cmpf;
    I->result_type = result_type;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_icmp_multi_s32(bi_builder *b, bi_index src0, bi_index src1, bi_index src2, enum bi_cmpf cmpf, enum bi_result_type result_type)
{
    return (bi_icmp_multi_s32_to(b, bi_temp(b->shader), src0, src1, src2, cmpf, result_type))->dest[0];
}


static inline
bi_instr * bi_icmp_multi_u32_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bi_index src2, enum bi_cmpf cmpf, enum bi_result_type result_type)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_ICMP_MULTI_U32;
    I->nr_dests = 1;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    I->cmpf = cmpf;
    I->result_type = result_type;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_icmp_multi_u32(bi_builder *b, bi_index src0, bi_index src1, bi_index src2, enum bi_cmpf cmpf, enum bi_result_type result_type)
{
    return (bi_icmp_multi_u32_to(b, bi_temp(b->shader), src0, src1, src2, cmpf, result_type))->dest[0];
}


static inline
bi_instr * bi_icmp_multi_to(bi_builder *b, nir_alu_type type, bi_index dest0, bi_index src0, bi_index src1, bi_index src2, enum bi_cmpf cmpf, enum bi_result_type result_type)
{
    if ((type == nir_type_int) && (cmpf == BI_CMPF_EQ || cmpf == BI_CMPF_NE || cmpf == BI_CMPF_GT || cmpf == BI_CMPF_GE))
        return (bi_icmp_multi_s32_to(b, dest0, src0, src1, src2, cmpf, result_type));
    else if ((type == nir_type_uint) && (cmpf == BI_CMPF_EQ || cmpf == BI_CMPF_NE || cmpf == BI_CMPF_GT || cmpf == BI_CMPF_GE))
        return (bi_icmp_multi_u32_to(b, dest0, src0, src1, src2, cmpf, result_type));
    else
        UNREACHABLE("Invalid parameters for ICMP_MULTI");
}

static inline
bi_index bi_icmp_multi(bi_builder *b, nir_alu_type type, bi_index src0, bi_index src1, bi_index src2, enum bi_cmpf cmpf, enum bi_result_type result_type)
{
    if ((type == nir_type_int) && (cmpf == BI_CMPF_EQ || cmpf == BI_CMPF_NE || cmpf == BI_CMPF_GT || cmpf == BI_CMPF_GE))
        return (bi_icmp_multi_s32_to(b, bi_temp(b->shader), src0, src1, src2, cmpf, result_type))->dest[0];
    else if ((type == nir_type_uint) && (cmpf == BI_CMPF_EQ || cmpf == BI_CMPF_NE || cmpf == BI_CMPF_GT || cmpf == BI_CMPF_GE))
        return (bi_icmp_multi_u32_to(b, bi_temp(b->shader), src0, src1, src2, cmpf, result_type))->dest[0];
    else
        UNREACHABLE("Invalid parameters for ICMP_MULTI");
}

static inline
bi_instr * bi_icmp_or_s32_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bi_index src2, enum bi_cmpf cmpf, enum bi_result_type result_type)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_ICMP_OR_S32;
    I->nr_dests = 1;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    I->cmpf = cmpf;
    I->result_type = result_type;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_icmp_or_s32(bi_builder *b, bi_index src0, bi_index src1, bi_index src2, enum bi_cmpf cmpf, enum bi_result_type result_type)
{
    return (bi_icmp_or_s32_to(b, bi_temp(b->shader), src0, src1, src2, cmpf, result_type))->dest[0];
}


static inline
bi_instr * bi_icmp_or_u32_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bi_index src2, enum bi_cmpf cmpf, enum bi_result_type result_type)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_ICMP_OR_U32;
    I->nr_dests = 1;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    I->cmpf = cmpf;
    I->result_type = result_type;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_icmp_or_u32(bi_builder *b, bi_index src0, bi_index src1, bi_index src2, enum bi_cmpf cmpf, enum bi_result_type result_type)
{
    return (bi_icmp_or_u32_to(b, bi_temp(b->shader), src0, src1, src2, cmpf, result_type))->dest[0];
}


static inline
bi_instr * bi_icmp_or_v2s16_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bi_index src2, enum bi_cmpf cmpf, enum bi_result_type result_type)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_ICMP_OR_V2S16;
    I->nr_dests = 1;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    I->cmpf = cmpf;
    I->result_type = result_type;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_icmp_or_v2s16(bi_builder *b, bi_index src0, bi_index src1, bi_index src2, enum bi_cmpf cmpf, enum bi_result_type result_type)
{
    return (bi_icmp_or_v2s16_to(b, bi_temp(b->shader), src0, src1, src2, cmpf, result_type))->dest[0];
}


static inline
bi_instr * bi_icmp_or_v2u16_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bi_index src2, enum bi_cmpf cmpf, enum bi_result_type result_type)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_ICMP_OR_V2U16;
    I->nr_dests = 1;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    I->cmpf = cmpf;
    I->result_type = result_type;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_icmp_or_v2u16(bi_builder *b, bi_index src0, bi_index src1, bi_index src2, enum bi_cmpf cmpf, enum bi_result_type result_type)
{
    return (bi_icmp_or_v2u16_to(b, bi_temp(b->shader), src0, src1, src2, cmpf, result_type))->dest[0];
}


static inline
bi_instr * bi_icmp_or_v4s8_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bi_index src2, enum bi_cmpf cmpf, enum bi_result_type result_type)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_ICMP_OR_V4S8;
    I->nr_dests = 1;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    I->cmpf = cmpf;
    I->result_type = result_type;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_icmp_or_v4s8(bi_builder *b, bi_index src0, bi_index src1, bi_index src2, enum bi_cmpf cmpf, enum bi_result_type result_type)
{
    return (bi_icmp_or_v4s8_to(b, bi_temp(b->shader), src0, src1, src2, cmpf, result_type))->dest[0];
}


static inline
bi_instr * bi_icmp_or_v4u8_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bi_index src2, enum bi_cmpf cmpf, enum bi_result_type result_type)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_ICMP_OR_V4U8;
    I->nr_dests = 1;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    I->cmpf = cmpf;
    I->result_type = result_type;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_icmp_or_v4u8(bi_builder *b, bi_index src0, bi_index src1, bi_index src2, enum bi_cmpf cmpf, enum bi_result_type result_type)
{
    return (bi_icmp_or_v4u8_to(b, bi_temp(b->shader), src0, src1, src2, cmpf, result_type))->dest[0];
}


static inline
bi_instr * bi_icmp_or_to(bi_builder *b, nir_alu_type type, unsigned bitsize, bi_index dest0, bi_index src0, bi_index src1, bi_index src2, enum bi_cmpf cmpf, enum bi_result_type result_type)
{
    if ((type == nir_type_int) && bitsize == 32 && (cmpf == BI_CMPF_EQ || cmpf == BI_CMPF_NE || cmpf == BI_CMPF_GT || cmpf == BI_CMPF_GE))
        return (bi_icmp_or_s32_to(b, dest0, src0, src1, src2, cmpf, result_type));
    else if ((type == nir_type_uint) && bitsize == 32 && (cmpf == BI_CMPF_EQ || cmpf == BI_CMPF_NE || cmpf == BI_CMPF_GT || cmpf == BI_CMPF_GE))
        return (bi_icmp_or_u32_to(b, dest0, src0, src1, src2, cmpf, result_type));
    else if ((type == nir_type_int) && bitsize == 16 && (cmpf == BI_CMPF_EQ || cmpf == BI_CMPF_NE || cmpf == BI_CMPF_GT || cmpf == BI_CMPF_GE))
        return (bi_icmp_or_v2s16_to(b, dest0, src0, src1, src2, cmpf, result_type));
    else if ((type == nir_type_uint) && bitsize == 16 && (cmpf == BI_CMPF_EQ || cmpf == BI_CMPF_NE || cmpf == BI_CMPF_GT || cmpf == BI_CMPF_GE))
        return (bi_icmp_or_v2u16_to(b, dest0, src0, src1, src2, cmpf, result_type));
    else if ((type == nir_type_int) && bitsize == 8 && (cmpf == BI_CMPF_EQ || cmpf == BI_CMPF_NE || cmpf == BI_CMPF_GT || cmpf == BI_CMPF_GE))
        return (bi_icmp_or_v4s8_to(b, dest0, src0, src1, src2, cmpf, result_type));
    else if ((type == nir_type_uint) && bitsize == 8 && (cmpf == BI_CMPF_EQ || cmpf == BI_CMPF_NE || cmpf == BI_CMPF_GT || cmpf == BI_CMPF_GE))
        return (bi_icmp_or_v4u8_to(b, dest0, src0, src1, src2, cmpf, result_type));
    else
        UNREACHABLE("Invalid parameters for ICMP_OR");
}

static inline
bi_index bi_icmp_or(bi_builder *b, nir_alu_type type, unsigned bitsize, bi_index src0, bi_index src1, bi_index src2, enum bi_cmpf cmpf, enum bi_result_type result_type)
{
    if ((type == nir_type_int) && bitsize == 32 && (cmpf == BI_CMPF_EQ || cmpf == BI_CMPF_NE || cmpf == BI_CMPF_GT || cmpf == BI_CMPF_GE))
        return (bi_icmp_or_s32_to(b, bi_temp(b->shader), src0, src1, src2, cmpf, result_type))->dest[0];
    else if ((type == nir_type_uint) && bitsize == 32 && (cmpf == BI_CMPF_EQ || cmpf == BI_CMPF_NE || cmpf == BI_CMPF_GT || cmpf == BI_CMPF_GE))
        return (bi_icmp_or_u32_to(b, bi_temp(b->shader), src0, src1, src2, cmpf, result_type))->dest[0];
    else if ((type == nir_type_int) && bitsize == 16 && (cmpf == BI_CMPF_EQ || cmpf == BI_CMPF_NE || cmpf == BI_CMPF_GT || cmpf == BI_CMPF_GE))
        return (bi_icmp_or_v2s16_to(b, bi_temp(b->shader), src0, src1, src2, cmpf, result_type))->dest[0];
    else if ((type == nir_type_uint) && bitsize == 16 && (cmpf == BI_CMPF_EQ || cmpf == BI_CMPF_NE || cmpf == BI_CMPF_GT || cmpf == BI_CMPF_GE))
        return (bi_icmp_or_v2u16_to(b, bi_temp(b->shader), src0, src1, src2, cmpf, result_type))->dest[0];
    else if ((type == nir_type_int) && bitsize == 8 && (cmpf == BI_CMPF_EQ || cmpf == BI_CMPF_NE || cmpf == BI_CMPF_GT || cmpf == BI_CMPF_GE))
        return (bi_icmp_or_v4s8_to(b, bi_temp(b->shader), src0, src1, src2, cmpf, result_type))->dest[0];
    else if ((type == nir_type_uint) && bitsize == 8 && (cmpf == BI_CMPF_EQ || cmpf == BI_CMPF_NE || cmpf == BI_CMPF_GT || cmpf == BI_CMPF_GE))
        return (bi_icmp_or_v4u8_to(b, bi_temp(b->shader), src0, src1, src2, cmpf, result_type))->dest[0];
    else
        UNREACHABLE("Invalid parameters for ICMP_OR");
}

static inline
bi_instr * bi_idp_v4i8_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 2);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_IDP_V4I8;
    I->nr_dests = 1;
    I->nr_srcs = 2;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;

    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_idp_v4i8(bi_builder *b, bi_index src0, bi_index src1)
{
    return (bi_idp_v4i8_to(b, bi_temp(b->shader), src0, src1))->dest[0];
}


static inline
bi_instr * bi_idpadd_v4s8_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bi_index src2, bool saturate)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_IDPADD_V4S8;
    I->nr_dests = 1;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    I->saturate = saturate;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_idpadd_v4s8(bi_builder *b, bi_index src0, bi_index src1, bi_index src2, bool saturate)
{
    return (bi_idpadd_v4s8_to(b, bi_temp(b->shader), src0, src1, src2, saturate))->dest[0];
}


static inline
bi_instr * bi_idpadd_v4u8_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bi_index src2, bool saturate)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_IDPADD_V4U8;
    I->nr_dests = 1;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    I->saturate = saturate;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_idpadd_v4u8(bi_builder *b, bi_index src0, bi_index src1, bi_index src2, bool saturate)
{
    return (bi_idpadd_v4u8_to(b, bi_temp(b->shader), src0, src1, src2, saturate))->dest[0];
}


static inline
bi_instr * bi_idpadd_to(bi_builder *b, nir_alu_type type, bi_index dest0, bi_index src0, bi_index src1, bi_index src2, bool saturate)
{
    if (type == nir_type_int)
        return (bi_idpadd_v4s8_to(b, dest0, src0, src1, src2, saturate));
    else if (type == nir_type_uint)
        return (bi_idpadd_v4u8_to(b, dest0, src0, src1, src2, saturate));
    else
        UNREACHABLE("Invalid parameters for IDPADD");
}

static inline
bi_index bi_idpadd(bi_builder *b, nir_alu_type type, bi_index src0, bi_index src1, bi_index src2, bool saturate)
{
    if (type == nir_type_int)
        return (bi_idpadd_v4s8_to(b, bi_temp(b->shader), src0, src1, src2, saturate))->dest[0];
    else if (type == nir_type_uint)
        return (bi_idpadd_v4u8_to(b, bi_temp(b->shader), src0, src1, src2, saturate))->dest[0];
    else
        UNREACHABLE("Invalid parameters for IDPADD");
}

static inline
bi_instr * bi_ilogb_f32_to(bi_builder *b, bi_index dest0, bi_index src0)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 1);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_ILOGB_F32;
    I->nr_dests = 1;
    I->nr_srcs = 1;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;

    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_ilogb_f32(bi_builder *b, bi_index src0)
{
    return (bi_ilogb_f32_to(b, bi_temp(b->shader), src0))->dest[0];
}


static inline
bi_instr * bi_ilogb_v2f16_to(bi_builder *b, bi_index dest0, bi_index src0)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 1);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_ILOGB_V2F16;
    I->nr_dests = 1;
    I->nr_srcs = 1;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;

    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_ilogb_v2f16(bi_builder *b, bi_index src0)
{
    return (bi_ilogb_v2f16_to(b, bi_temp(b->shader), src0))->dest[0];
}


static inline
bi_instr * bi_ilogb_to(bi_builder *b, unsigned bitsize, bi_index dest0, bi_index src0)
{
    if (bitsize == 32)
        return (bi_ilogb_f32_to(b, dest0, src0));
    else if (bitsize == 16)
        return (bi_ilogb_v2f16_to(b, dest0, src0));
    else
        UNREACHABLE("Invalid parameters for ILOGB");
}

static inline
bi_index bi_ilogb(bi_builder *b, unsigned bitsize, bi_index src0)
{
    if (bitsize == 32)
        return (bi_ilogb_f32_to(b, bi_temp(b->shader), src0))->dest[0];
    else if (bitsize == 16)
        return (bi_ilogb_v2f16_to(b, bi_temp(b->shader), src0))->dest[0];
    else
        UNREACHABLE("Invalid parameters for ILOGB");
}

static inline
bi_instr * bi_imov_fma_to(bi_builder *b, bi_index dest0, bool threads)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 0);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_IMOV_FMA;
    I->nr_dests = 1;
    I->nr_srcs = 0;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;


    I->threads = threads;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_imov_fma(bi_builder *b, bool threads)
{
    return (bi_imov_fma_to(b, bi_temp(b->shader), threads))->dest[0];
}


static inline
bi_instr * bi_imul_i32_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 2);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_IMUL_I32;
    I->nr_dests = 1;
    I->nr_srcs = 2;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;

    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_imul_i32(bi_builder *b, bi_index src0, bi_index src1)
{
    return (bi_imul_i32_to(b, bi_temp(b->shader), src0, src1))->dest[0];
}


static inline
bi_instr * bi_imul_v2i16_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 2);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_IMUL_V2I16;
    I->nr_dests = 1;
    I->nr_srcs = 2;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;

    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_imul_v2i16(bi_builder *b, bi_index src0, bi_index src1)
{
    return (bi_imul_v2i16_to(b, bi_temp(b->shader), src0, src1))->dest[0];
}


static inline
bi_instr * bi_imul_v4i8_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 2);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_IMUL_V4I8;
    I->nr_dests = 1;
    I->nr_srcs = 2;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;

    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_imul_v4i8(bi_builder *b, bi_index src0, bi_index src1)
{
    return (bi_imul_v4i8_to(b, bi_temp(b->shader), src0, src1))->dest[0];
}


static inline
bi_instr * bi_imul_to(bi_builder *b, unsigned bitsize, bi_index dest0, bi_index src0, bi_index src1)
{
    if (bitsize == 32)
        return (bi_imul_i32_to(b, dest0, src0, src1));
    else if (bitsize == 16)
        return (bi_imul_v2i16_to(b, dest0, src0, src1));
    else if (bitsize == 8)
        return (bi_imul_v4i8_to(b, dest0, src0, src1));
    else
        UNREACHABLE("Invalid parameters for IMUL");
}

static inline
bi_index bi_imul(bi_builder *b, unsigned bitsize, bi_index src0, bi_index src1)
{
    if (bitsize == 32)
        return (bi_imul_i32_to(b, bi_temp(b->shader), src0, src1))->dest[0];
    else if (bitsize == 16)
        return (bi_imul_v2i16_to(b, bi_temp(b->shader), src0, src1))->dest[0];
    else if (bitsize == 8)
        return (bi_imul_v4i8_to(b, bi_temp(b->shader), src0, src1))->dest[0];
    else
        UNREACHABLE("Invalid parameters for IMUL");
}

static inline
bi_instr * bi_imuld_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bool threads)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 2);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_IMULD;
    I->nr_dests = 1;
    I->nr_srcs = 2;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;

    I->threads = threads;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_imuld(bi_builder *b, bi_index src0, bi_index src1, bool threads)
{
    return (bi_imuld_to(b, bi_temp(b->shader), src0, src1, threads))->dest[0];
}


static inline
bi_instr * bi_isub_s32_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bool saturate)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 2);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_ISUB_S32;
    I->nr_dests = 1;
    I->nr_srcs = 2;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;

    I->saturate = saturate;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_isub_s32(bi_builder *b, bi_index src0, bi_index src1, bool saturate)
{
    return (bi_isub_s32_to(b, bi_temp(b->shader), src0, src1, saturate))->dest[0];
}


static inline
bi_instr * bi_isub_u32_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bool saturate)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 2);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_ISUB_U32;
    I->nr_dests = 1;
    I->nr_srcs = 2;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;

    I->saturate = saturate;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_isub_u32(bi_builder *b, bi_index src0, bi_index src1, bool saturate)
{
    return (bi_isub_u32_to(b, bi_temp(b->shader), src0, src1, saturate))->dest[0];
}


static inline
bi_instr * bi_isub_v2s16_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bool saturate)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 2);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_ISUB_V2S16;
    I->nr_dests = 1;
    I->nr_srcs = 2;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;

    I->saturate = saturate;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_isub_v2s16(bi_builder *b, bi_index src0, bi_index src1, bool saturate)
{
    return (bi_isub_v2s16_to(b, bi_temp(b->shader), src0, src1, saturate))->dest[0];
}


static inline
bi_instr * bi_isub_v2u16_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bool saturate)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 2);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_ISUB_V2U16;
    I->nr_dests = 1;
    I->nr_srcs = 2;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;

    I->saturate = saturate;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_isub_v2u16(bi_builder *b, bi_index src0, bi_index src1, bool saturate)
{
    return (bi_isub_v2u16_to(b, bi_temp(b->shader), src0, src1, saturate))->dest[0];
}


static inline
bi_instr * bi_isub_v4s8_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bool saturate)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 2);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_ISUB_V4S8;
    I->nr_dests = 1;
    I->nr_srcs = 2;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;

    I->saturate = saturate;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_isub_v4s8(bi_builder *b, bi_index src0, bi_index src1, bool saturate)
{
    return (bi_isub_v4s8_to(b, bi_temp(b->shader), src0, src1, saturate))->dest[0];
}


static inline
bi_instr * bi_isub_v4u8_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bool saturate)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 2);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_ISUB_V4U8;
    I->nr_dests = 1;
    I->nr_srcs = 2;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;

    I->saturate = saturate;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_isub_v4u8(bi_builder *b, bi_index src0, bi_index src1, bool saturate)
{
    return (bi_isub_v4u8_to(b, bi_temp(b->shader), src0, src1, saturate))->dest[0];
}


static inline
bi_instr * bi_isub_to(bi_builder *b, nir_alu_type type, unsigned bitsize, bi_index dest0, bi_index src0, bi_index src1, bool saturate)
{
    if ((type == nir_type_int) && bitsize == 32)
        return (bi_isub_s32_to(b, dest0, src0, src1, saturate));
    else if ((type == nir_type_uint) && bitsize == 32)
        return (bi_isub_u32_to(b, dest0, src0, src1, saturate));
    else if ((type == nir_type_int) && bitsize == 16)
        return (bi_isub_v2s16_to(b, dest0, src0, src1, saturate));
    else if ((type == nir_type_uint) && bitsize == 16)
        return (bi_isub_v2u16_to(b, dest0, src0, src1, saturate));
    else if ((type == nir_type_int) && bitsize == 8)
        return (bi_isub_v4s8_to(b, dest0, src0, src1, saturate));
    else if ((type == nir_type_uint) && bitsize == 8)
        return (bi_isub_v4u8_to(b, dest0, src0, src1, saturate));
    else
        UNREACHABLE("Invalid parameters for ISUB");
}

static inline
bi_index bi_isub(bi_builder *b, nir_alu_type type, unsigned bitsize, bi_index src0, bi_index src1, bool saturate)
{
    if ((type == nir_type_int) && bitsize == 32)
        return (bi_isub_s32_to(b, bi_temp(b->shader), src0, src1, saturate))->dest[0];
    else if ((type == nir_type_uint) && bitsize == 32)
        return (bi_isub_u32_to(b, bi_temp(b->shader), src0, src1, saturate))->dest[0];
    else if ((type == nir_type_int) && bitsize == 16)
        return (bi_isub_v2s16_to(b, bi_temp(b->shader), src0, src1, saturate))->dest[0];
    else if ((type == nir_type_uint) && bitsize == 16)
        return (bi_isub_v2u16_to(b, bi_temp(b->shader), src0, src1, saturate))->dest[0];
    else if ((type == nir_type_int) && bitsize == 8)
        return (bi_isub_v4s8_to(b, bi_temp(b->shader), src0, src1, saturate))->dest[0];
    else if ((type == nir_type_uint) && bitsize == 8)
        return (bi_isub_v4u8_to(b, bi_temp(b->shader), src0, src1, saturate))->dest[0];
    else
        UNREACHABLE("Invalid parameters for ISUB");
}

static inline
bi_instr * bi_isubb_i32_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bi_index src2)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_ISUBB_I32;
    I->nr_dests = 1;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_isubb_i32(bi_builder *b, bi_index src0, bi_index src1, bi_index src2)
{
    return (bi_isubb_i32_to(b, bi_temp(b->shader), src0, src1, src2))->dest[0];
}


static inline
bi_instr * bi_jump(bi_builder *b, bi_index src0)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (0 + 1);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_JUMP;
    I->nr_dests = 0;
    I->nr_srcs = 1;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 0;


    I->src[0] = src0;

    bi_builder_insert(&b->cursor, I);
    return I;
}


static inline
bi_instr * bi_jump_ex_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bi_index src2, enum bi_stack_mode stack_mode, bool test_mode)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_JUMP_EX;
    I->nr_dests = 1;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    I->test_mode = test_mode;
    I->stack_mode = stack_mode;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_jump_ex(bi_builder *b, bi_index src0, bi_index src1, bi_index src2, enum bi_stack_mode stack_mode, bool test_mode)
{
    return (bi_jump_ex_to(b, bi_temp(b->shader), src0, src1, src2, stack_mode, test_mode))->dest[0];
}


static inline
bi_instr * bi_kaboom(bi_builder *b, bi_index src0)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (0 + 1);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_KABOOM;
    I->nr_dests = 0;
    I->nr_srcs = 1;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 0;


    I->src[0] = src0;

    bi_builder_insert(&b->cursor, I);
    return I;
}


static inline
bi_instr * bi_ldexp_f32_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 2);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_LDEXP_F32;
    I->nr_dests = 1;
    I->nr_srcs = 2;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;

    I->round = bi_round_mode(b->shader, 32);
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_ldexp_f32(bi_builder *b, bi_index src0, bi_index src1)
{
    return (bi_ldexp_f32_to(b, bi_temp(b->shader), src0, src1))->dest[0];
}


static inline
bi_instr * bi_ldexp_v2f16_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 2);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_LDEXP_V2F16;
    I->nr_dests = 1;
    I->nr_srcs = 2;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;

    I->round = bi_round_mode(b->shader, 16);
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_ldexp_v2f16(bi_builder *b, bi_index src0, bi_index src1)
{
    return (bi_ldexp_v2f16_to(b, bi_temp(b->shader), src0, src1))->dest[0];
}


static inline
bi_instr * bi_ldexp_to(bi_builder *b, unsigned bitsize, bi_index dest0, bi_index src0, bi_index src1)
{
    if (bitsize == 32)
        return (bi_ldexp_f32_to(b, dest0, src0, src1));
    else if (bitsize == 16)
        return (bi_ldexp_v2f16_to(b, dest0, src0, src1));
    else
        UNREACHABLE("Invalid parameters for LDEXP");
}

static inline
bi_index bi_ldexp(bi_builder *b, unsigned bitsize, bi_index src0, bi_index src1)
{
    if (bitsize == 32)
        return (bi_ldexp_f32_to(b, bi_temp(b->shader), src0, src1))->dest[0];
    else if (bitsize == 16)
        return (bi_ldexp_v2f16_to(b, bi_temp(b->shader), src0, src1))->dest[0];
    else
        UNREACHABLE("Invalid parameters for LDEXP");
}

static inline
bi_instr * bi_ld_attr_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bi_index src2, enum bi_register_format register_format, enum bi_vecsize vecsize)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_LD_ATTR;
    I->nr_dests = 1;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    I->register_format = register_format;
    I->vecsize = vecsize;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_ld_attr(bi_builder *b, bi_index src0, bi_index src1, bi_index src2, enum bi_register_format register_format, enum bi_vecsize vecsize)
{
    return (bi_ld_attr_to(b, bi_temp(b->shader), src0, src1, src2, register_format, vecsize))->dest[0];
}


static inline
bi_instr * bi_ld_attr_imm_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, enum bi_register_format register_format, enum bi_vecsize vecsize, uint32_t attribute_index)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 2);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_LD_ATTR_IMM;
    I->nr_dests = 1;
    I->nr_srcs = 2;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;

    I->register_format = register_format;
    I->vecsize = vecsize;
    I->attribute_index = attribute_index;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_ld_attr_imm(bi_builder *b, bi_index src0, bi_index src1, enum bi_register_format register_format, enum bi_vecsize vecsize, uint32_t attribute_index)
{
    return (bi_ld_attr_imm_to(b, bi_temp(b->shader), src0, src1, register_format, vecsize, attribute_index))->dest[0];
}


static inline
bi_instr * bi_ld_attr_tex_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bi_index src2, enum bi_register_format register_format, enum bi_vecsize vecsize)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_LD_ATTR_TEX;
    I->nr_dests = 1;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    I->register_format = register_format;
    I->vecsize = vecsize;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_ld_attr_tex(bi_builder *b, bi_index src0, bi_index src1, bi_index src2, enum bi_register_format register_format, enum bi_vecsize vecsize)
{
    return (bi_ld_attr_tex_to(b, bi_temp(b->shader), src0, src1, src2, register_format, vecsize))->dest[0];
}


static inline
bi_instr * bi_ld_cvt_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bi_index src2, enum bi_register_format register_format, enum bi_vecsize vecsize)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_LD_CVT;
    I->nr_dests = 1;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    I->register_format = register_format;
    I->vecsize = vecsize;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_ld_cvt(bi_builder *b, bi_index src0, bi_index src1, bi_index src2, enum bi_register_format register_format, enum bi_vecsize vecsize)
{
    return (bi_ld_cvt_to(b, bi_temp(b->shader), src0, src1, src2, register_format, vecsize))->dest[0];
}


static inline
bi_instr * bi_ld_gclk_u64_to(bi_builder *b, bi_index dest0, enum bi_source source)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 0);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_LD_GCLK_U64;
    I->nr_dests = 1;
    I->nr_srcs = 0;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;


    I->source = source;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_ld_gclk_u64(bi_builder *b, enum bi_source source)
{
    return (bi_ld_gclk_u64_to(b, bi_temp(b->shader), source))->dest[0];
}


static inline
bi_instr * bi_ld_pka_i128_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 2);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_LD_PKA_I128;
    I->nr_dests = 1;
    I->nr_srcs = 2;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;

    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_ld_pka_i128(bi_builder *b, bi_index src0, bi_index src1)
{
    return (bi_ld_pka_i128_to(b, bi_temp(b->shader), src0, src1))->dest[0];
}


static inline
bi_instr * bi_ld_pka_i16_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 2);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_LD_PKA_I16;
    I->nr_dests = 1;
    I->nr_srcs = 2;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;

    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_ld_pka_i16(bi_builder *b, bi_index src0, bi_index src1)
{
    return (bi_ld_pka_i16_to(b, bi_temp(b->shader), src0, src1))->dest[0];
}


static inline
bi_instr * bi_ld_pka_i24_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 2);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_LD_PKA_I24;
    I->nr_dests = 1;
    I->nr_srcs = 2;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;

    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_ld_pka_i24(bi_builder *b, bi_index src0, bi_index src1)
{
    return (bi_ld_pka_i24_to(b, bi_temp(b->shader), src0, src1))->dest[0];
}


static inline
bi_instr * bi_ld_pka_i32_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 2);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_LD_PKA_I32;
    I->nr_dests = 1;
    I->nr_srcs = 2;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;

    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_ld_pka_i32(bi_builder *b, bi_index src0, bi_index src1)
{
    return (bi_ld_pka_i32_to(b, bi_temp(b->shader), src0, src1))->dest[0];
}


static inline
bi_instr * bi_ld_pka_i48_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 2);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_LD_PKA_I48;
    I->nr_dests = 1;
    I->nr_srcs = 2;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;

    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_ld_pka_i48(bi_builder *b, bi_index src0, bi_index src1)
{
    return (bi_ld_pka_i48_to(b, bi_temp(b->shader), src0, src1))->dest[0];
}


static inline
bi_instr * bi_ld_pka_i64_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 2);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_LD_PKA_I64;
    I->nr_dests = 1;
    I->nr_srcs = 2;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;

    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_ld_pka_i64(bi_builder *b, bi_index src0, bi_index src1)
{
    return (bi_ld_pka_i64_to(b, bi_temp(b->shader), src0, src1))->dest[0];
}


static inline
bi_instr * bi_ld_pka_i8_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 2);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_LD_PKA_I8;
    I->nr_dests = 1;
    I->nr_srcs = 2;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;

    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_ld_pka_i8(bi_builder *b, bi_index src0, bi_index src1)
{
    return (bi_ld_pka_i8_to(b, bi_temp(b->shader), src0, src1))->dest[0];
}


static inline
bi_instr * bi_ld_pka_i96_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 2);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_LD_PKA_I96;
    I->nr_dests = 1;
    I->nr_srcs = 2;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;

    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_ld_pka_i96(bi_builder *b, bi_index src0, bi_index src1)
{
    return (bi_ld_pka_i96_to(b, bi_temp(b->shader), src0, src1))->dest[0];
}


static inline
bi_instr * bi_ld_pka_to(bi_builder *b, unsigned bitsize, bi_index dest0, bi_index src0, bi_index src1)
{
    if (bitsize == 128)
        return (bi_ld_pka_i128_to(b, dest0, src0, src1));
    else if (bitsize == 16)
        return (bi_ld_pka_i16_to(b, dest0, src0, src1));
    else if (bitsize == 24)
        return (bi_ld_pka_i24_to(b, dest0, src0, src1));
    else if (bitsize == 32)
        return (bi_ld_pka_i32_to(b, dest0, src0, src1));
    else if (bitsize == 48)
        return (bi_ld_pka_i48_to(b, dest0, src0, src1));
    else if (bitsize == 64)
        return (bi_ld_pka_i64_to(b, dest0, src0, src1));
    else if (bitsize == 8)
        return (bi_ld_pka_i8_to(b, dest0, src0, src1));
    else if (bitsize == 96)
        return (bi_ld_pka_i96_to(b, dest0, src0, src1));
    else
        UNREACHABLE("Invalid parameters for LD_PKA");
}

static inline
bi_index bi_ld_pka(bi_builder *b, unsigned bitsize, bi_index src0, bi_index src1)
{
    if (bitsize == 128)
        return (bi_ld_pka_i128_to(b, bi_temp(b->shader), src0, src1))->dest[0];
    else if (bitsize == 16)
        return (bi_ld_pka_i16_to(b, bi_temp(b->shader), src0, src1))->dest[0];
    else if (bitsize == 24)
        return (bi_ld_pka_i24_to(b, bi_temp(b->shader), src0, src1))->dest[0];
    else if (bitsize == 32)
        return (bi_ld_pka_i32_to(b, bi_temp(b->shader), src0, src1))->dest[0];
    else if (bitsize == 48)
        return (bi_ld_pka_i48_to(b, bi_temp(b->shader), src0, src1))->dest[0];
    else if (bitsize == 64)
        return (bi_ld_pka_i64_to(b, bi_temp(b->shader), src0, src1))->dest[0];
    else if (bitsize == 8)
        return (bi_ld_pka_i8_to(b, bi_temp(b->shader), src0, src1))->dest[0];
    else if (bitsize == 96)
        return (bi_ld_pka_i96_to(b, bi_temp(b->shader), src0, src1))->dest[0];
    else
        UNREACHABLE("Invalid parameters for LD_PKA");
}

static inline
bi_instr * bi_ld_tex_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bi_index src2, enum bi_register_format register_format, enum bi_vecsize vecsize)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_LD_TEX;
    I->nr_dests = 1;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    I->vecsize = vecsize;
    I->register_format = register_format;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_ld_tex(bi_builder *b, bi_index src0, bi_index src1, bi_index src2, enum bi_register_format register_format, enum bi_vecsize vecsize)
{
    return (bi_ld_tex_to(b, bi_temp(b->shader), src0, src1, src2, register_format, vecsize))->dest[0];
}


static inline
bi_instr * bi_ld_tex_imm_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, enum bi_register_format register_format, enum bi_vecsize vecsize, uint32_t texture_index)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 2);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_LD_TEX_IMM;
    I->nr_dests = 1;
    I->nr_srcs = 2;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;

    I->vecsize = vecsize;
    I->register_format = register_format;
    I->texture_index = texture_index;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_ld_tex_imm(bi_builder *b, bi_index src0, bi_index src1, enum bi_register_format register_format, enum bi_vecsize vecsize, uint32_t texture_index)
{
    return (bi_ld_tex_imm_to(b, bi_temp(b->shader), src0, src1, register_format, vecsize, texture_index))->dest[0];
}


static inline
bi_instr * bi_ld_tile_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bi_index src2, enum bi_register_format register_format, enum bi_vecsize vecsize)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_LD_TILE;
    I->nr_dests = 1;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    I->vecsize = vecsize;
    I->register_format = register_format;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_ld_tile(bi_builder *b, bi_index src0, bi_index src1, bi_index src2, enum bi_register_format register_format, enum bi_vecsize vecsize)
{
    return (bi_ld_tile_to(b, bi_temp(b->shader), src0, src1, src2, register_format, vecsize))->dest[0];
}


static inline
bi_instr * bi_ld_var_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, enum bi_register_format register_format, enum bi_sample sample, enum bi_update update, enum bi_vecsize vecsize)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 2);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_LD_VAR;
    I->nr_dests = 1;
    I->nr_srcs = 2;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;

    I->vecsize = vecsize;
    I->update = update;
    I->register_format = register_format;
    I->sample = sample;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_ld_var(bi_builder *b, bi_index src0, bi_index src1, enum bi_register_format register_format, enum bi_sample sample, enum bi_update update, enum bi_vecsize vecsize)
{
    return (bi_ld_var_to(b, bi_temp(b->shader), src0, src1, register_format, sample, update, vecsize))->dest[0];
}


static inline
bi_instr * bi_ld_var_buf_f16_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, enum bi_register_format register_format, enum bi_sample sample, enum bi_source_format source_format, enum bi_update update, enum bi_vecsize vecsize)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 2);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_LD_VAR_BUF_F16;
    I->nr_dests = 1;
    I->nr_srcs = 2;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;

    I->vecsize = vecsize;
    I->source_format = source_format;
    I->register_format = register_format;
    I->sample = sample;
    I->update = update;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_ld_var_buf_f16(bi_builder *b, bi_index src0, bi_index src1, enum bi_register_format register_format, enum bi_sample sample, enum bi_source_format source_format, enum bi_update update, enum bi_vecsize vecsize)
{
    return (bi_ld_var_buf_f16_to(b, bi_temp(b->shader), src0, src1, register_format, sample, source_format, update, vecsize))->dest[0];
}


static inline
bi_instr * bi_ld_var_buf_f32_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, enum bi_register_format register_format, enum bi_sample sample, enum bi_source_format source_format, enum bi_update update, enum bi_vecsize vecsize)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 2);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_LD_VAR_BUF_F32;
    I->nr_dests = 1;
    I->nr_srcs = 2;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;

    I->vecsize = vecsize;
    I->source_format = source_format;
    I->register_format = register_format;
    I->sample = sample;
    I->update = update;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_ld_var_buf_f32(bi_builder *b, bi_index src0, bi_index src1, enum bi_register_format register_format, enum bi_sample sample, enum bi_source_format source_format, enum bi_update update, enum bi_vecsize vecsize)
{
    return (bi_ld_var_buf_f32_to(b, bi_temp(b->shader), src0, src1, register_format, sample, source_format, update, vecsize))->dest[0];
}


static inline
bi_instr * bi_ld_var_buf_to(bi_builder *b, unsigned bitsize, bi_index dest0, bi_index src0, bi_index src1, enum bi_register_format register_format, enum bi_sample sample, enum bi_source_format source_format, enum bi_update update, enum bi_vecsize vecsize)
{
    if (bitsize == 16)
        return (bi_ld_var_buf_f16_to(b, dest0, src0, src1, register_format, sample, source_format, update, vecsize));
    else if (bitsize == 32)
        return (bi_ld_var_buf_f32_to(b, dest0, src0, src1, register_format, sample, source_format, update, vecsize));
    else
        UNREACHABLE("Invalid parameters for LD_VAR_BUF");
}

static inline
bi_index bi_ld_var_buf(bi_builder *b, unsigned bitsize, bi_index src0, bi_index src1, enum bi_register_format register_format, enum bi_sample sample, enum bi_source_format source_format, enum bi_update update, enum bi_vecsize vecsize)
{
    if (bitsize == 16)
        return (bi_ld_var_buf_f16_to(b, bi_temp(b->shader), src0, src1, register_format, sample, source_format, update, vecsize))->dest[0];
    else if (bitsize == 32)
        return (bi_ld_var_buf_f32_to(b, bi_temp(b->shader), src0, src1, register_format, sample, source_format, update, vecsize))->dest[0];
    else
        UNREACHABLE("Invalid parameters for LD_VAR_BUF");
}

static inline
bi_instr * bi_ld_var_buf_imm_f16_to(bi_builder *b, bi_index dest0, bi_index src0, enum bi_register_format register_format, enum bi_sample sample, enum bi_source_format source_format, enum bi_update update, enum bi_vecsize vecsize, uint32_t index)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 1);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_LD_VAR_BUF_IMM_F16;
    I->nr_dests = 1;
    I->nr_srcs = 1;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;

    I->vecsize = vecsize;
    I->source_format = source_format;
    I->register_format = register_format;
    I->sample = sample;
    I->update = update;
    I->index = index;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_ld_var_buf_imm_f16(bi_builder *b, bi_index src0, enum bi_register_format register_format, enum bi_sample sample, enum bi_source_format source_format, enum bi_update update, enum bi_vecsize vecsize, uint32_t index)
{
    return (bi_ld_var_buf_imm_f16_to(b, bi_temp(b->shader), src0, register_format, sample, source_format, update, vecsize, index))->dest[0];
}


static inline
bi_instr * bi_ld_var_buf_imm_f32_to(bi_builder *b, bi_index dest0, bi_index src0, enum bi_register_format register_format, enum bi_sample sample, enum bi_source_format source_format, enum bi_update update, enum bi_vecsize vecsize, uint32_t index)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 1);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_LD_VAR_BUF_IMM_F32;
    I->nr_dests = 1;
    I->nr_srcs = 1;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;

    I->vecsize = vecsize;
    I->source_format = source_format;
    I->register_format = register_format;
    I->sample = sample;
    I->update = update;
    I->index = index;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_ld_var_buf_imm_f32(bi_builder *b, bi_index src0, enum bi_register_format register_format, enum bi_sample sample, enum bi_source_format source_format, enum bi_update update, enum bi_vecsize vecsize, uint32_t index)
{
    return (bi_ld_var_buf_imm_f32_to(b, bi_temp(b->shader), src0, register_format, sample, source_format, update, vecsize, index))->dest[0];
}


static inline
bi_instr * bi_ld_var_buf_imm_to(bi_builder *b, unsigned bitsize, bi_index dest0, bi_index src0, enum bi_register_format register_format, enum bi_sample sample, enum bi_source_format source_format, enum bi_update update, enum bi_vecsize vecsize, uint32_t index)
{
    if (bitsize == 16)
        return (bi_ld_var_buf_imm_f16_to(b, dest0, src0, register_format, sample, source_format, update, vecsize, index));
    else if (bitsize == 32)
        return (bi_ld_var_buf_imm_f32_to(b, dest0, src0, register_format, sample, source_format, update, vecsize, index));
    else
        UNREACHABLE("Invalid parameters for LD_VAR_BUF_IMM");
}

static inline
bi_index bi_ld_var_buf_imm(bi_builder *b, unsigned bitsize, bi_index src0, enum bi_register_format register_format, enum bi_sample sample, enum bi_source_format source_format, enum bi_update update, enum bi_vecsize vecsize, uint32_t index)
{
    if (bitsize == 16)
        return (bi_ld_var_buf_imm_f16_to(b, bi_temp(b->shader), src0, register_format, sample, source_format, update, vecsize, index))->dest[0];
    else if (bitsize == 32)
        return (bi_ld_var_buf_imm_f32_to(b, bi_temp(b->shader), src0, register_format, sample, source_format, update, vecsize, index))->dest[0];
    else
        UNREACHABLE("Invalid parameters for LD_VAR_BUF_IMM");
}

static inline
bi_instr * bi_ld_var_flat_to(bi_builder *b, bi_index dest0, bi_index src0, enum bi_function function, enum bi_register_format register_format, enum bi_vecsize vecsize)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 1);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_LD_VAR_FLAT;
    I->nr_dests = 1;
    I->nr_srcs = 1;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;

    I->vecsize = vecsize;
    I->register_format = register_format;
    I->function = function;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_ld_var_flat(bi_builder *b, bi_index src0, enum bi_function function, enum bi_register_format register_format, enum bi_vecsize vecsize)
{
    return (bi_ld_var_flat_to(b, bi_temp(b->shader), src0, function, register_format, vecsize))->dest[0];
}


static inline
bi_instr * bi_ld_var_flat_imm_to(bi_builder *b, bi_index dest0, enum bi_function function, enum bi_register_format register_format, enum bi_vecsize vecsize, uint32_t index)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 0);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_LD_VAR_FLAT_IMM;
    I->nr_dests = 1;
    I->nr_srcs = 0;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;


    I->vecsize = vecsize;
    I->register_format = register_format;
    I->function = function;
    I->index = index;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_ld_var_flat_imm(bi_builder *b, enum bi_function function, enum bi_register_format register_format, enum bi_vecsize vecsize, uint32_t index)
{
    return (bi_ld_var_flat_imm_to(b, bi_temp(b->shader), function, register_format, vecsize, index))->dest[0];
}


static inline
bi_instr * bi_ld_var_imm_to(bi_builder *b, bi_index dest0, bi_index src0, enum bi_register_format register_format, enum bi_sample sample, enum bi_update update, enum bi_vecsize vecsize, uint32_t index)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 1);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_LD_VAR_IMM;
    I->nr_dests = 1;
    I->nr_srcs = 1;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;

    I->vecsize = vecsize;
    I->update = update;
    I->register_format = register_format;
    I->sample = sample;
    I->index = index;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_ld_var_imm(bi_builder *b, bi_index src0, enum bi_register_format register_format, enum bi_sample sample, enum bi_update update, enum bi_vecsize vecsize, uint32_t index)
{
    return (bi_ld_var_imm_to(b, bi_temp(b->shader), src0, register_format, sample, update, vecsize, index))->dest[0];
}


static inline
bi_instr * bi_ld_var_special_to(bi_builder *b, bi_index dest0, bi_index src0, enum bi_register_format register_format, enum bi_sample sample, enum bi_update update, enum bi_varying_name varying_name, enum bi_vecsize vecsize)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 1);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_LD_VAR_SPECIAL;
    I->nr_dests = 1;
    I->nr_srcs = 1;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;

    I->varying_name = varying_name;
    I->vecsize = vecsize;
    I->update = update;
    I->register_format = register_format;
    I->sample = sample;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_ld_var_special(bi_builder *b, bi_index src0, enum bi_register_format register_format, enum bi_sample sample, enum bi_update update, enum bi_varying_name varying_name, enum bi_vecsize vecsize)
{
    return (bi_ld_var_special_to(b, bi_temp(b->shader), src0, register_format, sample, update, varying_name, vecsize))->dest[0];
}


static inline
bi_instr * bi_lea_attr_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bi_index src2, enum bi_register_format register_format)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_LEA_ATTR;
    I->nr_dests = 1;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    I->register_format = register_format;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_lea_attr(bi_builder *b, bi_index src0, bi_index src1, bi_index src2, enum bi_register_format register_format)
{
    return (bi_lea_attr_to(b, bi_temp(b->shader), src0, src1, src2, register_format))->dest[0];
}


static inline
bi_instr * bi_lea_attr_imm_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, enum bi_register_format register_format, uint32_t attribute_index)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 2);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_LEA_ATTR_IMM;
    I->nr_dests = 1;
    I->nr_srcs = 2;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;

    I->register_format = register_format;
    I->attribute_index = attribute_index;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_lea_attr_imm(bi_builder *b, bi_index src0, bi_index src1, enum bi_register_format register_format, uint32_t attribute_index)
{
    return (bi_lea_attr_imm_to(b, bi_temp(b->shader), src0, src1, register_format, attribute_index))->dest[0];
}


static inline
bi_instr * bi_lea_attr_tex_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bi_index src2, enum bi_register_format register_format)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_LEA_ATTR_TEX;
    I->nr_dests = 1;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    I->register_format = register_format;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_lea_attr_tex(bi_builder *b, bi_index src0, bi_index src1, bi_index src2, enum bi_register_format register_format)
{
    return (bi_lea_attr_tex_to(b, bi_temp(b->shader), src0, src1, src2, register_format))->dest[0];
}


static inline
bi_instr * bi_lea_buf_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 2);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_LEA_BUF;
    I->nr_dests = 1;
    I->nr_srcs = 2;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;

    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_lea_buf(bi_builder *b, bi_index src0, bi_index src1)
{
    return (bi_lea_buf_to(b, bi_temp(b->shader), src0, src1))->dest[0];
}


static inline
bi_instr * bi_lea_buf_imm_to(bi_builder *b, bi_index dest0, bi_index src0)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 1);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_LEA_BUF_IMM;
    I->nr_dests = 1;
    I->nr_srcs = 1;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;

    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_lea_buf_imm(bi_builder *b, bi_index src0)
{
    return (bi_lea_buf_imm_to(b, bi_temp(b->shader), src0))->dest[0];
}


static inline
bi_instr * bi_lea_pka_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 2);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_LEA_PKA;
    I->nr_dests = 1;
    I->nr_srcs = 2;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;

    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_lea_pka(bi_builder *b, bi_index src0, bi_index src1)
{
    return (bi_lea_pka_to(b, bi_temp(b->shader), src0, src1))->dest[0];
}


static inline
bi_instr * bi_lea_tex_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bi_index src2, bool format)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_LEA_TEX;
    I->nr_dests = 1;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    I->format = format;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_lea_tex(bi_builder *b, bi_index src0, bi_index src1, bi_index src2, bool format)
{
    return (bi_lea_tex_to(b, bi_temp(b->shader), src0, src1, src2, format))->dest[0];
}


static inline
bi_instr * bi_lea_tex_imm_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bool format, uint32_t texture_index)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 2);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_LEA_TEX_IMM;
    I->nr_dests = 1;
    I->nr_srcs = 2;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;

    I->format = format;
    I->texture_index = texture_index;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_lea_tex_imm(bi_builder *b, bi_index src0, bi_index src1, bool format, uint32_t texture_index)
{
    return (bi_lea_tex_imm_to(b, bi_temp(b->shader), src0, src1, format, texture_index))->dest[0];
}


static inline
bi_instr * bi_load_i128_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, enum bi_seg seg, uint32_t byte_offset)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 2);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_LOAD_I128;
    I->nr_dests = 1;
    I->nr_srcs = 2;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;

    I->seg = seg;
    I->byte_offset = byte_offset;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_load_i128(bi_builder *b, bi_index src0, bi_index src1, enum bi_seg seg, uint32_t byte_offset)
{
    return (bi_load_i128_to(b, bi_temp(b->shader), src0, src1, seg, byte_offset))->dest[0];
}


static inline
bi_instr * bi_load_i16_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, enum bi_seg seg, uint32_t byte_offset)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 2);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_LOAD_I16;
    I->nr_dests = 1;
    I->nr_srcs = 2;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;

    I->seg = seg;
    I->byte_offset = byte_offset;
    I->extend = BI_EXTEND_ZEXT;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_load_i16(bi_builder *b, bi_index src0, bi_index src1, enum bi_seg seg, uint32_t byte_offset)
{
    return (bi_load_i16_to(b, bi_temp(b->shader), src0, src1, seg, byte_offset))->dest[0];
}


static inline
bi_instr * bi_load_i24_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, enum bi_seg seg, uint32_t byte_offset)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 2);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_LOAD_I24;
    I->nr_dests = 1;
    I->nr_srcs = 2;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;

    I->seg = seg;
    I->byte_offset = byte_offset;
    I->extend = BI_EXTEND_ZEXT;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_load_i24(bi_builder *b, bi_index src0, bi_index src1, enum bi_seg seg, uint32_t byte_offset)
{
    return (bi_load_i24_to(b, bi_temp(b->shader), src0, src1, seg, byte_offset))->dest[0];
}


static inline
bi_instr * bi_load_i32_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, enum bi_seg seg, uint32_t byte_offset)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 2);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_LOAD_I32;
    I->nr_dests = 1;
    I->nr_srcs = 2;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;

    I->seg = seg;
    I->byte_offset = byte_offset;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_load_i32(bi_builder *b, bi_index src0, bi_index src1, enum bi_seg seg, uint32_t byte_offset)
{
    return (bi_load_i32_to(b, bi_temp(b->shader), src0, src1, seg, byte_offset))->dest[0];
}


static inline
bi_instr * bi_load_i48_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, enum bi_seg seg, uint32_t byte_offset)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 2);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_LOAD_I48;
    I->nr_dests = 1;
    I->nr_srcs = 2;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;

    I->seg = seg;
    I->byte_offset = byte_offset;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_load_i48(bi_builder *b, bi_index src0, bi_index src1, enum bi_seg seg, uint32_t byte_offset)
{
    return (bi_load_i48_to(b, bi_temp(b->shader), src0, src1, seg, byte_offset))->dest[0];
}


static inline
bi_instr * bi_load_i64_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, enum bi_seg seg, uint32_t byte_offset)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 2);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_LOAD_I64;
    I->nr_dests = 1;
    I->nr_srcs = 2;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;

    I->seg = seg;
    I->byte_offset = byte_offset;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_load_i64(bi_builder *b, bi_index src0, bi_index src1, enum bi_seg seg, uint32_t byte_offset)
{
    return (bi_load_i64_to(b, bi_temp(b->shader), src0, src1, seg, byte_offset))->dest[0];
}


static inline
bi_instr * bi_load_i8_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, enum bi_seg seg, uint32_t byte_offset)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 2);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_LOAD_I8;
    I->nr_dests = 1;
    I->nr_srcs = 2;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;

    I->seg = seg;
    I->byte_offset = byte_offset;
    I->extend = BI_EXTEND_ZEXT;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_load_i8(bi_builder *b, bi_index src0, bi_index src1, enum bi_seg seg, uint32_t byte_offset)
{
    return (bi_load_i8_to(b, bi_temp(b->shader), src0, src1, seg, byte_offset))->dest[0];
}


static inline
bi_instr * bi_load_i96_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, enum bi_seg seg, uint32_t byte_offset)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 2);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_LOAD_I96;
    I->nr_dests = 1;
    I->nr_srcs = 2;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;

    I->seg = seg;
    I->byte_offset = byte_offset;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_load_i96(bi_builder *b, bi_index src0, bi_index src1, enum bi_seg seg, uint32_t byte_offset)
{
    return (bi_load_i96_to(b, bi_temp(b->shader), src0, src1, seg, byte_offset))->dest[0];
}


static inline
bi_instr * bi_load_to(bi_builder *b, unsigned bitsize, bi_index dest0, bi_index src0, bi_index src1, enum bi_seg seg, uint32_t byte_offset)
{
    if (bitsize == 128)
        return (bi_load_i128_to(b, dest0, src0, src1, seg, byte_offset));
    else if (bitsize == 16)
        return (bi_load_i16_to(b, dest0, src0, src1, seg, byte_offset));
    else if (bitsize == 24)
        return (bi_load_i24_to(b, dest0, src0, src1, seg, byte_offset));
    else if (bitsize == 32)
        return (bi_load_i32_to(b, dest0, src0, src1, seg, byte_offset));
    else if (bitsize == 48)
        return (bi_load_i48_to(b, dest0, src0, src1, seg, byte_offset));
    else if (bitsize == 64)
        return (bi_load_i64_to(b, dest0, src0, src1, seg, byte_offset));
    else if (bitsize == 8)
        return (bi_load_i8_to(b, dest0, src0, src1, seg, byte_offset));
    else if (bitsize == 96)
        return (bi_load_i96_to(b, dest0, src0, src1, seg, byte_offset));
    else
        UNREACHABLE("Invalid parameters for LOAD");
}

static inline
bi_index bi_load(bi_builder *b, unsigned bitsize, bi_index src0, bi_index src1, enum bi_seg seg, uint32_t byte_offset)
{
    if (bitsize == 128)
        return (bi_load_i128_to(b, bi_temp(b->shader), src0, src1, seg, byte_offset))->dest[0];
    else if (bitsize == 16)
        return (bi_load_i16_to(b, bi_temp(b->shader), src0, src1, seg, byte_offset))->dest[0];
    else if (bitsize == 24)
        return (bi_load_i24_to(b, bi_temp(b->shader), src0, src1, seg, byte_offset))->dest[0];
    else if (bitsize == 32)
        return (bi_load_i32_to(b, bi_temp(b->shader), src0, src1, seg, byte_offset))->dest[0];
    else if (bitsize == 48)
        return (bi_load_i48_to(b, bi_temp(b->shader), src0, src1, seg, byte_offset))->dest[0];
    else if (bitsize == 64)
        return (bi_load_i64_to(b, bi_temp(b->shader), src0, src1, seg, byte_offset))->dest[0];
    else if (bitsize == 8)
        return (bi_load_i8_to(b, bi_temp(b->shader), src0, src1, seg, byte_offset))->dest[0];
    else if (bitsize == 96)
        return (bi_load_i96_to(b, bi_temp(b->shader), src0, src1, seg, byte_offset))->dest[0];
    else
        UNREACHABLE("Invalid parameters for LOAD");
}

static inline
bi_instr * bi_logb_f32_to(bi_builder *b, bi_index dest0, bi_index src0)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 1);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_LOGB_F32;
    I->nr_dests = 1;
    I->nr_srcs = 1;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;

    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_logb_f32(bi_builder *b, bi_index src0)
{
    return (bi_logb_f32_to(b, bi_temp(b->shader), src0))->dest[0];
}


static inline
bi_instr * bi_logb_v2f16_to(bi_builder *b, bi_index dest0, bi_index src0)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 1);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_LOGB_V2F16;
    I->nr_dests = 1;
    I->nr_srcs = 1;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;

    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_logb_v2f16(bi_builder *b, bi_index src0)
{
    return (bi_logb_v2f16_to(b, bi_temp(b->shader), src0))->dest[0];
}


static inline
bi_instr * bi_logb_to(bi_builder *b, unsigned bitsize, bi_index dest0, bi_index src0)
{
    if (bitsize == 32)
        return (bi_logb_f32_to(b, dest0, src0));
    else if (bitsize == 16)
        return (bi_logb_v2f16_to(b, dest0, src0));
    else
        UNREACHABLE("Invalid parameters for LOGB");
}

static inline
bi_index bi_logb(bi_builder *b, unsigned bitsize, bi_index src0)
{
    if (bitsize == 32)
        return (bi_logb_f32_to(b, bi_temp(b->shader), src0))->dest[0];
    else if (bitsize == 16)
        return (bi_logb_v2f16_to(b, bi_temp(b->shader), src0))->dest[0];
    else
        UNREACHABLE("Invalid parameters for LOGB");
}

static inline
bi_instr * bi_lrot_double_i32_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bi_index src2, bool bytes2, bool result_word)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_LROT_DOUBLE_I32;
    I->nr_dests = 1;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    I->bytes2 = bytes2;
    I->result_word = result_word;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_lrot_double_i32(bi_builder *b, bi_index src0, bi_index src1, bi_index src2, bool bytes2, bool result_word)
{
    return (bi_lrot_double_i32_to(b, bi_temp(b->shader), src0, src1, src2, bytes2, result_word))->dest[0];
}


static inline
bi_instr * bi_lshift_and_i32_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bi_index src2)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_LSHIFT_AND_I32;
    I->nr_dests = 1;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_lshift_and_i32(bi_builder *b, bi_index src0, bi_index src1, bi_index src2)
{
    return (bi_lshift_and_i32_to(b, bi_temp(b->shader), src0, src1, src2))->dest[0];
}


static inline
bi_instr * bi_lshift_and_v2i16_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bi_index src2)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_LSHIFT_AND_V2I16;
    I->nr_dests = 1;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_lshift_and_v2i16(bi_builder *b, bi_index src0, bi_index src1, bi_index src2)
{
    return (bi_lshift_and_v2i16_to(b, bi_temp(b->shader), src0, src1, src2))->dest[0];
}


static inline
bi_instr * bi_lshift_and_v4i8_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bi_index src2)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_LSHIFT_AND_V4I8;
    I->nr_dests = 1;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_lshift_and_v4i8(bi_builder *b, bi_index src0, bi_index src1, bi_index src2)
{
    return (bi_lshift_and_v4i8_to(b, bi_temp(b->shader), src0, src1, src2))->dest[0];
}


static inline
bi_instr * bi_lshift_and_to(bi_builder *b, unsigned bitsize, bi_index dest0, bi_index src0, bi_index src1, bi_index src2)
{
    if (bitsize == 32)
        return (bi_lshift_and_i32_to(b, dest0, src0, src1, src2));
    else if (bitsize == 16)
        return (bi_lshift_and_v2i16_to(b, dest0, src0, src1, src2));
    else if (bitsize == 8)
        return (bi_lshift_and_v4i8_to(b, dest0, src0, src1, src2));
    else
        UNREACHABLE("Invalid parameters for LSHIFT_AND");
}

static inline
bi_index bi_lshift_and(bi_builder *b, unsigned bitsize, bi_index src0, bi_index src1, bi_index src2)
{
    if (bitsize == 32)
        return (bi_lshift_and_i32_to(b, bi_temp(b->shader), src0, src1, src2))->dest[0];
    else if (bitsize == 16)
        return (bi_lshift_and_v2i16_to(b, bi_temp(b->shader), src0, src1, src2))->dest[0];
    else if (bitsize == 8)
        return (bi_lshift_and_v4i8_to(b, bi_temp(b->shader), src0, src1, src2))->dest[0];
    else
        UNREACHABLE("Invalid parameters for LSHIFT_AND");
}

static inline
bi_instr * bi_lshift_double_i32_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bi_index src2, bool bytes2, bool result_word)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_LSHIFT_DOUBLE_I32;
    I->nr_dests = 1;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    I->bytes2 = bytes2;
    I->result_word = result_word;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_lshift_double_i32(bi_builder *b, bi_index src0, bi_index src1, bi_index src2, bool bytes2, bool result_word)
{
    return (bi_lshift_double_i32_to(b, bi_temp(b->shader), src0, src1, src2, bytes2, result_word))->dest[0];
}


static inline
bi_instr * bi_lshift_or_i32_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bi_index src2)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_LSHIFT_OR_I32;
    I->nr_dests = 1;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_lshift_or_i32(bi_builder *b, bi_index src0, bi_index src1, bi_index src2)
{
    return (bi_lshift_or_i32_to(b, bi_temp(b->shader), src0, src1, src2))->dest[0];
}


static inline
bi_instr * bi_lshift_or_v2i16_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bi_index src2)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_LSHIFT_OR_V2I16;
    I->nr_dests = 1;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_lshift_or_v2i16(bi_builder *b, bi_index src0, bi_index src1, bi_index src2)
{
    return (bi_lshift_or_v2i16_to(b, bi_temp(b->shader), src0, src1, src2))->dest[0];
}


static inline
bi_instr * bi_lshift_or_v4i8_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bi_index src2)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_LSHIFT_OR_V4I8;
    I->nr_dests = 1;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_lshift_or_v4i8(bi_builder *b, bi_index src0, bi_index src1, bi_index src2)
{
    return (bi_lshift_or_v4i8_to(b, bi_temp(b->shader), src0, src1, src2))->dest[0];
}


static inline
bi_instr * bi_lshift_or_to(bi_builder *b, unsigned bitsize, bi_index dest0, bi_index src0, bi_index src1, bi_index src2)
{
    if (bitsize == 32)
        return (bi_lshift_or_i32_to(b, dest0, src0, src1, src2));
    else if (bitsize == 16)
        return (bi_lshift_or_v2i16_to(b, dest0, src0, src1, src2));
    else if (bitsize == 8)
        return (bi_lshift_or_v4i8_to(b, dest0, src0, src1, src2));
    else
        UNREACHABLE("Invalid parameters for LSHIFT_OR");
}

static inline
bi_index bi_lshift_or(bi_builder *b, unsigned bitsize, bi_index src0, bi_index src1, bi_index src2)
{
    if (bitsize == 32)
        return (bi_lshift_or_i32_to(b, bi_temp(b->shader), src0, src1, src2))->dest[0];
    else if (bitsize == 16)
        return (bi_lshift_or_v2i16_to(b, bi_temp(b->shader), src0, src1, src2))->dest[0];
    else if (bitsize == 8)
        return (bi_lshift_or_v4i8_to(b, bi_temp(b->shader), src0, src1, src2))->dest[0];
    else
        UNREACHABLE("Invalid parameters for LSHIFT_OR");
}

static inline
bi_instr * bi_lshift_xor_i32_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bi_index src2)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_LSHIFT_XOR_I32;
    I->nr_dests = 1;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_lshift_xor_i32(bi_builder *b, bi_index src0, bi_index src1, bi_index src2)
{
    return (bi_lshift_xor_i32_to(b, bi_temp(b->shader), src0, src1, src2))->dest[0];
}


static inline
bi_instr * bi_lshift_xor_v2i16_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bi_index src2)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_LSHIFT_XOR_V2I16;
    I->nr_dests = 1;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_lshift_xor_v2i16(bi_builder *b, bi_index src0, bi_index src1, bi_index src2)
{
    return (bi_lshift_xor_v2i16_to(b, bi_temp(b->shader), src0, src1, src2))->dest[0];
}


static inline
bi_instr * bi_lshift_xor_v4i8_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bi_index src2)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_LSHIFT_XOR_V4I8;
    I->nr_dests = 1;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_lshift_xor_v4i8(bi_builder *b, bi_index src0, bi_index src1, bi_index src2)
{
    return (bi_lshift_xor_v4i8_to(b, bi_temp(b->shader), src0, src1, src2))->dest[0];
}


static inline
bi_instr * bi_lshift_xor_to(bi_builder *b, unsigned bitsize, bi_index dest0, bi_index src0, bi_index src1, bi_index src2)
{
    if (bitsize == 32)
        return (bi_lshift_xor_i32_to(b, dest0, src0, src1, src2));
    else if (bitsize == 16)
        return (bi_lshift_xor_v2i16_to(b, dest0, src0, src1, src2));
    else if (bitsize == 8)
        return (bi_lshift_xor_v4i8_to(b, dest0, src0, src1, src2));
    else
        UNREACHABLE("Invalid parameters for LSHIFT_XOR");
}

static inline
bi_index bi_lshift_xor(bi_builder *b, unsigned bitsize, bi_index src0, bi_index src1, bi_index src2)
{
    if (bitsize == 32)
        return (bi_lshift_xor_i32_to(b, bi_temp(b->shader), src0, src1, src2))->dest[0];
    else if (bitsize == 16)
        return (bi_lshift_xor_v2i16_to(b, bi_temp(b->shader), src0, src1, src2))->dest[0];
    else if (bitsize == 8)
        return (bi_lshift_xor_v4i8_to(b, bi_temp(b->shader), src0, src1, src2))->dest[0];
    else
        UNREACHABLE("Invalid parameters for LSHIFT_XOR");
}

static inline
bi_instr * bi_mkvec_v2i16_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 2);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_MKVEC_V2I16;
    I->nr_dests = 1;
    I->nr_srcs = 2;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;

    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_mkvec_v2i16(bi_builder *b, bi_index src0, bi_index src1)
{
    return (bi_mkvec_v2i16_to(b, bi_temp(b->shader), src0, src1))->dest[0];
}


static inline
bi_instr * bi_mkvec_v2i8_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bi_index src2)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_MKVEC_V2I8;
    I->nr_dests = 1;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_mkvec_v2i8(bi_builder *b, bi_index src0, bi_index src1, bi_index src2)
{
    return (bi_mkvec_v2i8_to(b, bi_temp(b->shader), src0, src1, src2))->dest[0];
}


static inline
bi_instr * bi_mkvec_v4i8_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bi_index src2, bi_index src3)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 4);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_MKVEC_V4I8;
    I->nr_dests = 1;
    I->nr_srcs = 4;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;
    I->src[3] = src3;

    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_mkvec_v4i8(bi_builder *b, bi_index src0, bi_index src1, bi_index src2, bi_index src3)
{
    return (bi_mkvec_v4i8_to(b, bi_temp(b->shader), src0, src1, src2, src3))->dest[0];
}


static inline
bi_instr * bi_mov_i32_to(bi_builder *b, bi_index dest0, bi_index src0)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 1);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_MOV_I32;
    I->nr_dests = 1;
    I->nr_srcs = 1;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;

    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_mov_i32(bi_builder *b, bi_index src0)
{
    return (bi_mov_i32_to(b, bi_temp(b->shader), src0))->dest[0];
}


static inline
bi_instr * bi_mux_i32_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bi_index src2, enum bi_mux mux)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_MUX_I32;
    I->nr_dests = 1;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    I->mux = mux;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_mux_i32(bi_builder *b, bi_index src0, bi_index src1, bi_index src2, enum bi_mux mux)
{
    return (bi_mux_i32_to(b, bi_temp(b->shader), src0, src1, src2, mux))->dest[0];
}


static inline
bi_instr * bi_mux_v2i16_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bi_index src2, enum bi_mux mux)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_MUX_V2I16;
    I->nr_dests = 1;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    I->mux = mux;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_mux_v2i16(bi_builder *b, bi_index src0, bi_index src1, bi_index src2, enum bi_mux mux)
{
    return (bi_mux_v2i16_to(b, bi_temp(b->shader), src0, src1, src2, mux))->dest[0];
}


static inline
bi_instr * bi_mux_v4i8_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bi_index src2, enum bi_mux mux)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_MUX_V4I8;
    I->nr_dests = 1;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    I->mux = mux;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_mux_v4i8(bi_builder *b, bi_index src0, bi_index src1, bi_index src2, enum bi_mux mux)
{
    return (bi_mux_v4i8_to(b, bi_temp(b->shader), src0, src1, src2, mux))->dest[0];
}


static inline
bi_instr * bi_mux_to(bi_builder *b, unsigned bitsize, bi_index dest0, bi_index src0, bi_index src1, bi_index src2, enum bi_mux mux)
{
    if (bitsize == 32)
        return (bi_mux_i32_to(b, dest0, src0, src1, src2, mux));
    else if (bitsize == 16)
        return (bi_mux_v2i16_to(b, dest0, src0, src1, src2, mux));
    else if (bitsize == 8)
        return (bi_mux_v4i8_to(b, dest0, src0, src1, src2, mux));
    else
        UNREACHABLE("Invalid parameters for MUX");
}

static inline
bi_index bi_mux(bi_builder *b, unsigned bitsize, bi_index src0, bi_index src1, bi_index src2, enum bi_mux mux)
{
    if (bitsize == 32)
        return (bi_mux_i32_to(b, bi_temp(b->shader), src0, src1, src2, mux))->dest[0];
    else if (bitsize == 16)
        return (bi_mux_v2i16_to(b, bi_temp(b->shader), src0, src1, src2, mux))->dest[0];
    else if (bitsize == 8)
        return (bi_mux_v4i8_to(b, bi_temp(b->shader), src0, src1, src2, mux))->dest[0];
    else
        UNREACHABLE("Invalid parameters for MUX");
}

static inline
bi_instr * bi_nop(bi_builder *b)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (0 + 0);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_NOP;
    I->nr_dests = 0;
    I->nr_srcs = 0;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 0;



    bi_builder_insert(&b->cursor, I);
    return I;
}


static inline
bi_instr * bi_phi_to(bi_builder *b, bi_index dest0, unsigned nr_srcs)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + nr_srcs);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_PHI;
    I->nr_dests = 1;
    I->nr_srcs = nr_srcs;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;


    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_phi(bi_builder *b, unsigned nr_srcs)
{
    return (bi_phi_to(b, bi_temp(b->shader), nr_srcs))->dest[0];
}


static inline
bi_instr * bi_popcount_i32_to(bi_builder *b, bi_index dest0, bi_index src0)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 1);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_POPCOUNT_I32;
    I->nr_dests = 1;
    I->nr_srcs = 1;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;

    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_popcount_i32(bi_builder *b, bi_index src0)
{
    return (bi_popcount_i32_to(b, bi_temp(b->shader), src0))->dest[0];
}


static inline
bi_instr * bi_quiet_f32_to(bi_builder *b, bi_index dest0, bi_index src0)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 1);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_QUIET_F32;
    I->nr_dests = 1;
    I->nr_srcs = 1;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;

    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_quiet_f32(bi_builder *b, bi_index src0)
{
    return (bi_quiet_f32_to(b, bi_temp(b->shader), src0))->dest[0];
}


static inline
bi_instr * bi_quiet_v2f16_to(bi_builder *b, bi_index dest0, bi_index src0)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 1);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_QUIET_V2F16;
    I->nr_dests = 1;
    I->nr_srcs = 1;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;

    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_quiet_v2f16(bi_builder *b, bi_index src0)
{
    return (bi_quiet_v2f16_to(b, bi_temp(b->shader), src0))->dest[0];
}


static inline
bi_instr * bi_quiet_to(bi_builder *b, unsigned bitsize, bi_index dest0, bi_index src0)
{
    if (bitsize == 32)
        return (bi_quiet_f32_to(b, dest0, src0));
    else if (bitsize == 16)
        return (bi_quiet_v2f16_to(b, dest0, src0));
    else
        UNREACHABLE("Invalid parameters for QUIET");
}

static inline
bi_index bi_quiet(bi_builder *b, unsigned bitsize, bi_index src0)
{
    if (bitsize == 32)
        return (bi_quiet_f32_to(b, bi_temp(b->shader), src0))->dest[0];
    else if (bitsize == 16)
        return (bi_quiet_v2f16_to(b, bi_temp(b->shader), src0))->dest[0];
    else
        UNREACHABLE("Invalid parameters for QUIET");
}

static inline
bi_instr * bi_rrot_double_i32_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bi_index src2, bool bytes2, bool result_word)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_RROT_DOUBLE_I32;
    I->nr_dests = 1;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    I->bytes2 = bytes2;
    I->result_word = result_word;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_rrot_double_i32(bi_builder *b, bi_index src0, bi_index src1, bi_index src2, bool bytes2, bool result_word)
{
    return (bi_rrot_double_i32_to(b, bi_temp(b->shader), src0, src1, src2, bytes2, result_word))->dest[0];
}


static inline
bi_instr * bi_rshift_and_i32_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bi_index src2, bool arithmetic)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_RSHIFT_AND_I32;
    I->nr_dests = 1;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    I->arithmetic = arithmetic;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_rshift_and_i32(bi_builder *b, bi_index src0, bi_index src1, bi_index src2, bool arithmetic)
{
    return (bi_rshift_and_i32_to(b, bi_temp(b->shader), src0, src1, src2, arithmetic))->dest[0];
}


static inline
bi_instr * bi_rshift_and_v2i16_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bi_index src2, bool arithmetic)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_RSHIFT_AND_V2I16;
    I->nr_dests = 1;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    I->arithmetic = arithmetic;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_rshift_and_v2i16(bi_builder *b, bi_index src0, bi_index src1, bi_index src2, bool arithmetic)
{
    return (bi_rshift_and_v2i16_to(b, bi_temp(b->shader), src0, src1, src2, arithmetic))->dest[0];
}


static inline
bi_instr * bi_rshift_and_v4i8_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bi_index src2, bool arithmetic)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_RSHIFT_AND_V4I8;
    I->nr_dests = 1;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    I->arithmetic = arithmetic;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_rshift_and_v4i8(bi_builder *b, bi_index src0, bi_index src1, bi_index src2, bool arithmetic)
{
    return (bi_rshift_and_v4i8_to(b, bi_temp(b->shader), src0, src1, src2, arithmetic))->dest[0];
}


static inline
bi_instr * bi_rshift_and_to(bi_builder *b, unsigned bitsize, bi_index dest0, bi_index src0, bi_index src1, bi_index src2, bool arithmetic)
{
    if (bitsize == 32)
        return (bi_rshift_and_i32_to(b, dest0, src0, src1, src2, arithmetic));
    else if (bitsize == 16)
        return (bi_rshift_and_v2i16_to(b, dest0, src0, src1, src2, arithmetic));
    else if (bitsize == 8)
        return (bi_rshift_and_v4i8_to(b, dest0, src0, src1, src2, arithmetic));
    else
        UNREACHABLE("Invalid parameters for RSHIFT_AND");
}

static inline
bi_index bi_rshift_and(bi_builder *b, unsigned bitsize, bi_index src0, bi_index src1, bi_index src2, bool arithmetic)
{
    if (bitsize == 32)
        return (bi_rshift_and_i32_to(b, bi_temp(b->shader), src0, src1, src2, arithmetic))->dest[0];
    else if (bitsize == 16)
        return (bi_rshift_and_v2i16_to(b, bi_temp(b->shader), src0, src1, src2, arithmetic))->dest[0];
    else if (bitsize == 8)
        return (bi_rshift_and_v4i8_to(b, bi_temp(b->shader), src0, src1, src2, arithmetic))->dest[0];
    else
        UNREACHABLE("Invalid parameters for RSHIFT_AND");
}

static inline
bi_instr * bi_rshift_double_i32_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bi_index src2, bool bytes2, bool result_word)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_RSHIFT_DOUBLE_I32;
    I->nr_dests = 1;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    I->bytes2 = bytes2;
    I->result_word = result_word;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_rshift_double_i32(bi_builder *b, bi_index src0, bi_index src1, bi_index src2, bool bytes2, bool result_word)
{
    return (bi_rshift_double_i32_to(b, bi_temp(b->shader), src0, src1, src2, bytes2, result_word))->dest[0];
}


static inline
bi_instr * bi_rshift_or_i32_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bi_index src2, bool arithmetic)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_RSHIFT_OR_I32;
    I->nr_dests = 1;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    I->arithmetic = arithmetic;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_rshift_or_i32(bi_builder *b, bi_index src0, bi_index src1, bi_index src2, bool arithmetic)
{
    return (bi_rshift_or_i32_to(b, bi_temp(b->shader), src0, src1, src2, arithmetic))->dest[0];
}


static inline
bi_instr * bi_rshift_or_v2i16_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bi_index src2, bool arithmetic)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_RSHIFT_OR_V2I16;
    I->nr_dests = 1;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    I->arithmetic = arithmetic;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_rshift_or_v2i16(bi_builder *b, bi_index src0, bi_index src1, bi_index src2, bool arithmetic)
{
    return (bi_rshift_or_v2i16_to(b, bi_temp(b->shader), src0, src1, src2, arithmetic))->dest[0];
}


static inline
bi_instr * bi_rshift_or_v4i8_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bi_index src2, bool arithmetic)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_RSHIFT_OR_V4I8;
    I->nr_dests = 1;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    I->arithmetic = arithmetic;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_rshift_or_v4i8(bi_builder *b, bi_index src0, bi_index src1, bi_index src2, bool arithmetic)
{
    return (bi_rshift_or_v4i8_to(b, bi_temp(b->shader), src0, src1, src2, arithmetic))->dest[0];
}


static inline
bi_instr * bi_rshift_or_to(bi_builder *b, unsigned bitsize, bi_index dest0, bi_index src0, bi_index src1, bi_index src2, bool arithmetic)
{
    if (bitsize == 32)
        return (bi_rshift_or_i32_to(b, dest0, src0, src1, src2, arithmetic));
    else if (bitsize == 16)
        return (bi_rshift_or_v2i16_to(b, dest0, src0, src1, src2, arithmetic));
    else if (bitsize == 8)
        return (bi_rshift_or_v4i8_to(b, dest0, src0, src1, src2, arithmetic));
    else
        UNREACHABLE("Invalid parameters for RSHIFT_OR");
}

static inline
bi_index bi_rshift_or(bi_builder *b, unsigned bitsize, bi_index src0, bi_index src1, bi_index src2, bool arithmetic)
{
    if (bitsize == 32)
        return (bi_rshift_or_i32_to(b, bi_temp(b->shader), src0, src1, src2, arithmetic))->dest[0];
    else if (bitsize == 16)
        return (bi_rshift_or_v2i16_to(b, bi_temp(b->shader), src0, src1, src2, arithmetic))->dest[0];
    else if (bitsize == 8)
        return (bi_rshift_or_v4i8_to(b, bi_temp(b->shader), src0, src1, src2, arithmetic))->dest[0];
    else
        UNREACHABLE("Invalid parameters for RSHIFT_OR");
}

static inline
bi_instr * bi_rshift_xor_i32_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bi_index src2, bool arithmetic)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_RSHIFT_XOR_I32;
    I->nr_dests = 1;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    I->arithmetic = arithmetic;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_rshift_xor_i32(bi_builder *b, bi_index src0, bi_index src1, bi_index src2, bool arithmetic)
{
    return (bi_rshift_xor_i32_to(b, bi_temp(b->shader), src0, src1, src2, arithmetic))->dest[0];
}


static inline
bi_instr * bi_rshift_xor_v2i16_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bi_index src2, bool arithmetic)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_RSHIFT_XOR_V2I16;
    I->nr_dests = 1;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    I->arithmetic = arithmetic;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_rshift_xor_v2i16(bi_builder *b, bi_index src0, bi_index src1, bi_index src2, bool arithmetic)
{
    return (bi_rshift_xor_v2i16_to(b, bi_temp(b->shader), src0, src1, src2, arithmetic))->dest[0];
}


static inline
bi_instr * bi_rshift_xor_v4i8_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bi_index src2, bool arithmetic)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_RSHIFT_XOR_V4I8;
    I->nr_dests = 1;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    I->arithmetic = arithmetic;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_rshift_xor_v4i8(bi_builder *b, bi_index src0, bi_index src1, bi_index src2, bool arithmetic)
{
    return (bi_rshift_xor_v4i8_to(b, bi_temp(b->shader), src0, src1, src2, arithmetic))->dest[0];
}


static inline
bi_instr * bi_rshift_xor_to(bi_builder *b, unsigned bitsize, bi_index dest0, bi_index src0, bi_index src1, bi_index src2, bool arithmetic)
{
    if (bitsize == 32)
        return (bi_rshift_xor_i32_to(b, dest0, src0, src1, src2, arithmetic));
    else if (bitsize == 16)
        return (bi_rshift_xor_v2i16_to(b, dest0, src0, src1, src2, arithmetic));
    else if (bitsize == 8)
        return (bi_rshift_xor_v4i8_to(b, dest0, src0, src1, src2, arithmetic));
    else
        UNREACHABLE("Invalid parameters for RSHIFT_XOR");
}

static inline
bi_index bi_rshift_xor(bi_builder *b, unsigned bitsize, bi_index src0, bi_index src1, bi_index src2, bool arithmetic)
{
    if (bitsize == 32)
        return (bi_rshift_xor_i32_to(b, bi_temp(b->shader), src0, src1, src2, arithmetic))->dest[0];
    else if (bitsize == 16)
        return (bi_rshift_xor_v2i16_to(b, bi_temp(b->shader), src0, src1, src2, arithmetic))->dest[0];
    else if (bitsize == 8)
        return (bi_rshift_xor_v4i8_to(b, bi_temp(b->shader), src0, src1, src2, arithmetic))->dest[0];
    else
        UNREACHABLE("Invalid parameters for RSHIFT_XOR");
}

static inline
bi_instr * bi_s16_to_f32_to(bi_builder *b, bi_index dest0, bi_index src0)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 1);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_S16_TO_F32;
    I->nr_dests = 1;
    I->nr_srcs = 1;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;

    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_s16_to_f32(bi_builder *b, bi_index src0)
{
    return (bi_s16_to_f32_to(b, bi_temp(b->shader), src0))->dest[0];
}


static inline
bi_instr * bi_s16_to_s32_to(bi_builder *b, bi_index dest0, bi_index src0)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 1);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_S16_TO_S32;
    I->nr_dests = 1;
    I->nr_srcs = 1;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;

    I->round = BI_ROUND_RTZ;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_s16_to_s32(bi_builder *b, bi_index src0)
{
    return (bi_s16_to_s32_to(b, bi_temp(b->shader), src0))->dest[0];
}


static inline
bi_instr * bi_s32_to_f32_to(bi_builder *b, bi_index dest0, bi_index src0)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 1);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_S32_TO_F32;
    I->nr_dests = 1;
    I->nr_srcs = 1;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;

    I->round = bi_round_mode(b->shader, 32);
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_s32_to_f32(bi_builder *b, bi_index src0)
{
    return (bi_s32_to_f32_to(b, bi_temp(b->shader), src0))->dest[0];
}


static inline
bi_instr * bi_s8_to_f32_to(bi_builder *b, bi_index dest0, bi_index src0)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 1);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_S8_TO_F32;
    I->nr_dests = 1;
    I->nr_srcs = 1;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;

    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_s8_to_f32(bi_builder *b, bi_index src0)
{
    return (bi_s8_to_f32_to(b, bi_temp(b->shader), src0))->dest[0];
}


static inline
bi_instr * bi_s8_to_s32_to(bi_builder *b, bi_index dest0, bi_index src0)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 1);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_S8_TO_S32;
    I->nr_dests = 1;
    I->nr_srcs = 1;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;

    I->round = BI_ROUND_RTZ;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_s8_to_s32(bi_builder *b, bi_index src0)
{
    return (bi_s8_to_s32_to(b, bi_temp(b->shader), src0))->dest[0];
}


static inline
bi_instr * bi_seg_add_to(bi_builder *b, bi_index dest0, bi_index src0, bool preserve_null, enum bi_seg seg)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 1);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_SEG_ADD;
    I->nr_dests = 1;
    I->nr_srcs = 1;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;

    I->seg = seg;
    I->preserve_null = preserve_null;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_seg_add(bi_builder *b, bi_index src0, bool preserve_null, enum bi_seg seg)
{
    return (bi_seg_add_to(b, bi_temp(b->shader), src0, preserve_null, seg))->dest[0];
}


static inline
bi_instr * bi_seg_add_i64_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bool preserve_null, enum bi_seg seg)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 2);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_SEG_ADD_I64;
    I->nr_dests = 1;
    I->nr_srcs = 2;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;

    I->seg = seg;
    I->preserve_null = preserve_null;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_seg_add_i64(bi_builder *b, bi_index src0, bi_index src1, bool preserve_null, enum bi_seg seg)
{
    return (bi_seg_add_i64_to(b, bi_temp(b->shader), src0, src1, preserve_null, seg))->dest[0];
}


static inline
bi_instr * bi_shaddxh_i32_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 2);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_SHADDXH_I32;
    I->nr_dests = 1;
    I->nr_srcs = 2;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;

    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_shaddxh_i32(bi_builder *b, bi_index src0, bi_index src1)
{
    return (bi_shaddxh_i32_to(b, bi_temp(b->shader), src0, src1))->dest[0];
}


static inline
bi_instr * bi_shaddxl_i64_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, uint32_t shift)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 2);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_SHADDXL_I64;
    I->nr_dests = 1;
    I->nr_srcs = 2;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;

    I->shift = shift;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_shaddxl_i64(bi_builder *b, bi_index src0, bi_index src1, uint32_t shift)
{
    return (bi_shaddxl_i64_to(b, bi_temp(b->shader), src0, src1, shift))->dest[0];
}


static inline
bi_instr * bi_shaddxl_s32_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, uint32_t shift)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 2);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_SHADDXL_S32;
    I->nr_dests = 1;
    I->nr_srcs = 2;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;

    I->shift = shift;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_shaddxl_s32(bi_builder *b, bi_index src0, bi_index src1, uint32_t shift)
{
    return (bi_shaddxl_s32_to(b, bi_temp(b->shader), src0, src1, shift))->dest[0];
}


static inline
bi_instr * bi_shaddxl_u32_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, uint32_t shift)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 2);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_SHADDXL_U32;
    I->nr_dests = 1;
    I->nr_srcs = 2;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;

    I->shift = shift;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_shaddxl_u32(bi_builder *b, bi_index src0, bi_index src1, uint32_t shift)
{
    return (bi_shaddxl_u32_to(b, bi_temp(b->shader), src0, src1, shift))->dest[0];
}


static inline
bi_instr * bi_shaddxl_to(bi_builder *b, nir_alu_type type, unsigned bitsize, bi_index dest0, bi_index src0, bi_index src1, uint32_t shift)
{
    if ((type == nir_type_uint || type == nir_type_int) && bitsize == 64)
        return (bi_shaddxl_i64_to(b, dest0, src0, src1, shift));
    else if ((type == nir_type_int) && bitsize == 32)
        return (bi_shaddxl_s32_to(b, dest0, src0, src1, shift));
    else if ((type == nir_type_uint) && bitsize == 32)
        return (bi_shaddxl_u32_to(b, dest0, src0, src1, shift));
    else
        UNREACHABLE("Invalid parameters for SHADDXL");
}

static inline
bi_index bi_shaddxl(bi_builder *b, nir_alu_type type, unsigned bitsize, bi_index src0, bi_index src1, uint32_t shift)
{
    if ((type == nir_type_uint || type == nir_type_int) && bitsize == 64)
        return (bi_shaddxl_i64_to(b, bi_temp(b->shader), src0, src1, shift))->dest[0];
    else if ((type == nir_type_int) && bitsize == 32)
        return (bi_shaddxl_s32_to(b, bi_temp(b->shader), src0, src1, shift))->dest[0];
    else if ((type == nir_type_uint) && bitsize == 32)
        return (bi_shaddxl_u32_to(b, bi_temp(b->shader), src0, src1, shift))->dest[0];
    else
        UNREACHABLE("Invalid parameters for SHADDXL");
}

static inline
bi_instr * bi_shift_double_i32_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bi_index src2)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_SHIFT_DOUBLE_I32;
    I->nr_dests = 1;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_shift_double_i32(bi_builder *b, bi_index src0, bi_index src1, bi_index src2)
{
    return (bi_shift_double_i32_to(b, bi_temp(b->shader), src0, src1, src2))->dest[0];
}


static inline
bi_instr * bi_split_i32_to(bi_builder *b, unsigned nr_dests, bi_index src0)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (nr_dests + 1);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_SPLIT_I32;
    I->nr_dests = nr_dests;
    I->nr_srcs = 1;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + nr_dests;


    I->src[0] = src0;

    bi_builder_insert(&b->cursor, I);
    return I;
}


static inline
bi_instr * bi_store_i128(bi_builder *b, bi_index src0, bi_index src1, bi_index src2, enum bi_seg seg, uint32_t byte_offset)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (0 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_STORE_I128;
    I->nr_dests = 0;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 0;


    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    I->seg = seg;
    I->byte_offset = byte_offset;
    bi_builder_insert(&b->cursor, I);
    return I;
}


static inline
bi_instr * bi_store_i16(bi_builder *b, bi_index src0, bi_index src1, bi_index src2, enum bi_seg seg, uint32_t byte_offset)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (0 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_STORE_I16;
    I->nr_dests = 0;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 0;


    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    I->seg = seg;
    I->byte_offset = byte_offset;
    I->extend = BI_EXTEND_ZEXT;
    bi_builder_insert(&b->cursor, I);
    return I;
}


static inline
bi_instr * bi_store_i24(bi_builder *b, bi_index src0, bi_index src1, bi_index src2, enum bi_seg seg, uint32_t byte_offset)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (0 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_STORE_I24;
    I->nr_dests = 0;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 0;


    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    I->seg = seg;
    I->byte_offset = byte_offset;
    I->extend = BI_EXTEND_ZEXT;
    bi_builder_insert(&b->cursor, I);
    return I;
}


static inline
bi_instr * bi_store_i32(bi_builder *b, bi_index src0, bi_index src1, bi_index src2, enum bi_seg seg, uint32_t byte_offset)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (0 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_STORE_I32;
    I->nr_dests = 0;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 0;


    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    I->seg = seg;
    I->byte_offset = byte_offset;
    bi_builder_insert(&b->cursor, I);
    return I;
}


static inline
bi_instr * bi_store_i48(bi_builder *b, bi_index src0, bi_index src1, bi_index src2, enum bi_seg seg, uint32_t byte_offset)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (0 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_STORE_I48;
    I->nr_dests = 0;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 0;


    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    I->seg = seg;
    I->byte_offset = byte_offset;
    bi_builder_insert(&b->cursor, I);
    return I;
}


static inline
bi_instr * bi_store_i64(bi_builder *b, bi_index src0, bi_index src1, bi_index src2, enum bi_seg seg, uint32_t byte_offset)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (0 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_STORE_I64;
    I->nr_dests = 0;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 0;


    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    I->seg = seg;
    I->byte_offset = byte_offset;
    bi_builder_insert(&b->cursor, I);
    return I;
}


static inline
bi_instr * bi_store_i8(bi_builder *b, bi_index src0, bi_index src1, bi_index src2, enum bi_seg seg, uint32_t byte_offset)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (0 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_STORE_I8;
    I->nr_dests = 0;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 0;


    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    I->seg = seg;
    I->byte_offset = byte_offset;
    I->extend = BI_EXTEND_ZEXT;
    bi_builder_insert(&b->cursor, I);
    return I;
}


static inline
bi_instr * bi_store_i96(bi_builder *b, bi_index src0, bi_index src1, bi_index src2, enum bi_seg seg, uint32_t byte_offset)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (0 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_STORE_I96;
    I->nr_dests = 0;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 0;


    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    I->seg = seg;
    I->byte_offset = byte_offset;
    bi_builder_insert(&b->cursor, I);
    return I;
}


static inline
bi_instr * bi_store(bi_builder *b, unsigned bitsize, bi_index src0, bi_index src1, bi_index src2, enum bi_seg seg, uint32_t byte_offset)
{
    if (bitsize == 128)
        return (bi_store_i128(b, src0, src1, src2, seg, byte_offset));
    else if (bitsize == 16)
        return (bi_store_i16(b, src0, src1, src2, seg, byte_offset));
    else if (bitsize == 24)
        return (bi_store_i24(b, src0, src1, src2, seg, byte_offset));
    else if (bitsize == 32)
        return (bi_store_i32(b, src0, src1, src2, seg, byte_offset));
    else if (bitsize == 48)
        return (bi_store_i48(b, src0, src1, src2, seg, byte_offset));
    else if (bitsize == 64)
        return (bi_store_i64(b, src0, src1, src2, seg, byte_offset));
    else if (bitsize == 8)
        return (bi_store_i8(b, src0, src1, src2, seg, byte_offset));
    else if (bitsize == 96)
        return (bi_store_i96(b, src0, src1, src2, seg, byte_offset));
    else
        UNREACHABLE("Invalid parameters for STORE");
}

static inline
bi_instr * bi_st_cvt(bi_builder *b, bi_index src0, bi_index src1, bi_index src2, bi_index src3, enum bi_register_format register_format, enum bi_vecsize vecsize)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (0 + 4);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_ST_CVT;
    I->nr_dests = 0;
    I->nr_srcs = 4;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 0;


    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;
    I->src[3] = src3;

    I->register_format = register_format;
    I->vecsize = vecsize;
    bi_builder_insert(&b->cursor, I);
    return I;
}


static inline
bi_instr * bi_st_tile(bi_builder *b, bi_index src0, bi_index src1, bi_index src2, bi_index src3, enum bi_register_format register_format, enum bi_vecsize vecsize)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (0 + 4);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_ST_TILE;
    I->nr_dests = 0;
    I->nr_srcs = 4;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 0;


    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;
    I->src[3] = src3;

    I->vecsize = vecsize;
    I->register_format = register_format;
    bi_builder_insert(&b->cursor, I);
    return I;
}


static inline
bi_instr * bi_swz_v2i16_to(bi_builder *b, bi_index dest0, bi_index src0)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 1);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_SWZ_V2I16;
    I->nr_dests = 1;
    I->nr_srcs = 1;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;

    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_swz_v2i16(bi_builder *b, bi_index src0)
{
    return (bi_swz_v2i16_to(b, bi_temp(b->shader), src0))->dest[0];
}


static inline
bi_instr * bi_swz_v4i8_to(bi_builder *b, bi_index dest0, bi_index src0)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 1);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_SWZ_V4I8;
    I->nr_dests = 1;
    I->nr_srcs = 1;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;

    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_swz_v4i8(bi_builder *b, bi_index src0)
{
    return (bi_swz_v4i8_to(b, bi_temp(b->shader), src0))->dest[0];
}


static inline
bi_instr * bi_swz_to(bi_builder *b, unsigned bitsize, bi_index dest0, bi_index src0)
{
    if (bitsize == 16)
        return (bi_swz_v2i16_to(b, dest0, src0));
    else if (bitsize == 8)
        return (bi_swz_v4i8_to(b, dest0, src0));
    else
        UNREACHABLE("Invalid parameters for SWZ");
}

static inline
bi_index bi_swz(bi_builder *b, unsigned bitsize, bi_index src0)
{
    if (bitsize == 16)
        return (bi_swz_v2i16_to(b, bi_temp(b->shader), src0))->dest[0];
    else if (bitsize == 8)
        return (bi_swz_v4i8_to(b, bi_temp(b->shader), src0))->dest[0];
    else
        UNREACHABLE("Invalid parameters for SWZ");
}

static inline
bi_instr * bi_texc_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bi_index src2, bi_index src3, bool lod_mode, uint32_t sr_count, uint32_t sr_count_2)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 4);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_TEXC;
    I->nr_dests = 1;
    I->nr_srcs = 4;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;
    I->src[3] = src3;

    I->lod_mode = lod_mode;
    I->sr_count = sr_count;
    I->sr_count_2 = sr_count_2;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_texc(bi_builder *b, bi_index src0, bi_index src1, bi_index src2, bi_index src3, bool lod_mode, uint32_t sr_count, uint32_t sr_count_2)
{
    return (bi_texc_to(b, bi_temp(b->shader), src0, src1, src2, src3, lod_mode, sr_count, sr_count_2))->dest[0];
}


static inline
bi_instr * bi_texc_dual_to(bi_builder *b, bi_index dest0, bi_index dest1, bi_index src0, bi_index src1, bi_index src2, bi_index src3, bool lod_mode, uint32_t sr_count, uint32_t sr_count_2)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (2 + 4);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_TEXC_DUAL;
    I->nr_dests = 2;
    I->nr_srcs = 4;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 2;

    I->dest[0] = dest0;
    I->dest[1] = dest1;

    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;
    I->src[3] = src3;

    I->lod_mode = lod_mode;
    I->sr_count = sr_count;
    I->sr_count_2 = sr_count_2;
    bi_builder_insert(&b->cursor, I);
    return I;
}


static inline
bi_instr * bi_texs_2d_f16_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bool lod_mode, uint32_t sampler_index, uint32_t texture_index)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 2);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_TEXS_2D_F16;
    I->nr_dests = 1;
    I->nr_srcs = 2;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;

    I->lod_mode = lod_mode;
    I->sampler_index = sampler_index;
    I->texture_index = texture_index;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_texs_2d_f16(bi_builder *b, bi_index src0, bi_index src1, bool lod_mode, uint32_t sampler_index, uint32_t texture_index)
{
    return (bi_texs_2d_f16_to(b, bi_temp(b->shader), src0, src1, lod_mode, sampler_index, texture_index))->dest[0];
}


static inline
bi_instr * bi_texs_2d_f32_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bool lod_mode, uint32_t sampler_index, uint32_t texture_index)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 2);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_TEXS_2D_F32;
    I->nr_dests = 1;
    I->nr_srcs = 2;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;

    I->lod_mode = lod_mode;
    I->sampler_index = sampler_index;
    I->texture_index = texture_index;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_texs_2d_f32(bi_builder *b, bi_index src0, bi_index src1, bool lod_mode, uint32_t sampler_index, uint32_t texture_index)
{
    return (bi_texs_2d_f32_to(b, bi_temp(b->shader), src0, src1, lod_mode, sampler_index, texture_index))->dest[0];
}


static inline
bi_instr * bi_texs_2d_to(bi_builder *b, unsigned bitsize, bi_index dest0, bi_index src0, bi_index src1, bool lod_mode, uint32_t sampler_index, uint32_t texture_index)
{
    if (bitsize == 16)
        return (bi_texs_2d_f16_to(b, dest0, src0, src1, lod_mode, sampler_index, texture_index));
    else if (bitsize == 32)
        return (bi_texs_2d_f32_to(b, dest0, src0, src1, lod_mode, sampler_index, texture_index));
    else
        UNREACHABLE("Invalid parameters for TEXS_2D");
}

static inline
bi_index bi_texs_2d(bi_builder *b, unsigned bitsize, bi_index src0, bi_index src1, bool lod_mode, uint32_t sampler_index, uint32_t texture_index)
{
    if (bitsize == 16)
        return (bi_texs_2d_f16_to(b, bi_temp(b->shader), src0, src1, lod_mode, sampler_index, texture_index))->dest[0];
    else if (bitsize == 32)
        return (bi_texs_2d_f32_to(b, bi_temp(b->shader), src0, src1, lod_mode, sampler_index, texture_index))->dest[0];
    else
        UNREACHABLE("Invalid parameters for TEXS_2D");
}

static inline
bi_instr * bi_texs_cube_f16_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bi_index src2, uint32_t sampler_index, uint32_t texture_index)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_TEXS_CUBE_F16;
    I->nr_dests = 1;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    I->sampler_index = sampler_index;
    I->texture_index = texture_index;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_texs_cube_f16(bi_builder *b, bi_index src0, bi_index src1, bi_index src2, uint32_t sampler_index, uint32_t texture_index)
{
    return (bi_texs_cube_f16_to(b, bi_temp(b->shader), src0, src1, src2, sampler_index, texture_index))->dest[0];
}


static inline
bi_instr * bi_texs_cube_f32_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bi_index src2, uint32_t sampler_index, uint32_t texture_index)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_TEXS_CUBE_F32;
    I->nr_dests = 1;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    I->sampler_index = sampler_index;
    I->texture_index = texture_index;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_texs_cube_f32(bi_builder *b, bi_index src0, bi_index src1, bi_index src2, uint32_t sampler_index, uint32_t texture_index)
{
    return (bi_texs_cube_f32_to(b, bi_temp(b->shader), src0, src1, src2, sampler_index, texture_index))->dest[0];
}


static inline
bi_instr * bi_texs_cube_to(bi_builder *b, unsigned bitsize, bi_index dest0, bi_index src0, bi_index src1, bi_index src2, uint32_t sampler_index, uint32_t texture_index)
{
    if (bitsize == 16)
        return (bi_texs_cube_f16_to(b, dest0, src0, src1, src2, sampler_index, texture_index));
    else if (bitsize == 32)
        return (bi_texs_cube_f32_to(b, dest0, src0, src1, src2, sampler_index, texture_index));
    else
        UNREACHABLE("Invalid parameters for TEXS_CUBE");
}

static inline
bi_index bi_texs_cube(bi_builder *b, unsigned bitsize, bi_index src0, bi_index src1, bi_index src2, uint32_t sampler_index, uint32_t texture_index)
{
    if (bitsize == 16)
        return (bi_texs_cube_f16_to(b, bi_temp(b->shader), src0, src1, src2, sampler_index, texture_index))->dest[0];
    else if (bitsize == 32)
        return (bi_texs_cube_f32_to(b, bi_temp(b->shader), src0, src1, src2, sampler_index, texture_index))->dest[0];
    else
        UNREACHABLE("Invalid parameters for TEXS_CUBE");
}

static inline
bi_instr * bi_tex_fetch_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bi_index src2, bool array_enable, enum bi_dimension dimension, enum bi_register_format register_format, bool texel_offset, bool wide_indices, enum bi_write_mask write_mask, uint32_t sr_count)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_TEX_FETCH;
    I->nr_dests = 1;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    I->write_mask = write_mask;
    I->dimension = dimension;
    I->wide_indices = wide_indices;
    I->array_enable = array_enable;
    I->texel_offset = texel_offset;
    I->register_format = register_format;
    I->sr_count = sr_count;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_tex_fetch(bi_builder *b, bi_index src0, bi_index src1, bi_index src2, bool array_enable, enum bi_dimension dimension, enum bi_register_format register_format, bool texel_offset, bool wide_indices, enum bi_write_mask write_mask, uint32_t sr_count)
{
    return (bi_tex_fetch_to(b, bi_temp(b->shader), src0, src1, src2, array_enable, dimension, register_format, texel_offset, wide_indices, write_mask, sr_count))->dest[0];
}


static inline
bi_instr * bi_tex_gather_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bi_index src2, bool array_enable, enum bi_dimension dimension, enum bi_fetch_component fetch_component, bool integer_coordinates, enum bi_register_format register_format, bool shadow, bool texel_offset, bool wide_indices, enum bi_write_mask write_mask, uint32_t sr_count)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_TEX_GATHER;
    I->nr_dests = 1;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    I->write_mask = write_mask;
    I->dimension = dimension;
    I->wide_indices = wide_indices;
    I->array_enable = array_enable;
    I->texel_offset = texel_offset;
    I->integer_coordinates = integer_coordinates;
    I->fetch_component = fetch_component;
    I->register_format = register_format;
    I->shadow = shadow;
    I->sr_count = sr_count;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_tex_gather(bi_builder *b, bi_index src0, bi_index src1, bi_index src2, bool array_enable, enum bi_dimension dimension, enum bi_fetch_component fetch_component, bool integer_coordinates, enum bi_register_format register_format, bool shadow, bool texel_offset, bool wide_indices, enum bi_write_mask write_mask, uint32_t sr_count)
{
    return (bi_tex_gather_to(b, bi_temp(b->shader), src0, src1, src2, array_enable, dimension, fetch_component, integer_coordinates, register_format, shadow, texel_offset, wide_indices, write_mask, sr_count))->dest[0];
}


static inline
bi_instr * bi_tex_gradient_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bi_index src2, enum bi_dimension dimension, bool wide_indices, enum bi_write_mask write_mask, uint32_t sr_count)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_TEX_GRADIENT;
    I->nr_dests = 1;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    I->write_mask = write_mask;
    I->dimension = dimension;
    I->wide_indices = wide_indices;
    I->sr_count = sr_count;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_tex_gradient(bi_builder *b, bi_index src0, bi_index src1, bi_index src2, enum bi_dimension dimension, bool wide_indices, enum bi_write_mask write_mask, uint32_t sr_count)
{
    return (bi_tex_gradient_to(b, bi_temp(b->shader), src0, src1, src2, dimension, wide_indices, write_mask, sr_count))->dest[0];
}


static inline
bi_instr * bi_tex_single_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bi_index src2, bool array_enable, enum bi_dimension dimension, enum bi_register_format register_format, bool shadow, bool texel_offset, enum bi_va_lod_mode va_lod_mode, bool wide_indices, enum bi_write_mask write_mask, uint32_t sr_count)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_TEX_SINGLE;
    I->nr_dests = 1;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    I->write_mask = write_mask;
    I->dimension = dimension;
    I->wide_indices = wide_indices;
    I->array_enable = array_enable;
    I->texel_offset = texel_offset;
    I->register_format = register_format;
    I->shadow = shadow;
    I->va_lod_mode = va_lod_mode;
    I->sr_count = sr_count;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_tex_single(bi_builder *b, bi_index src0, bi_index src1, bi_index src2, bool array_enable, enum bi_dimension dimension, enum bi_register_format register_format, bool shadow, bool texel_offset, enum bi_va_lod_mode va_lod_mode, bool wide_indices, enum bi_write_mask write_mask, uint32_t sr_count)
{
    return (bi_tex_single_to(b, bi_temp(b->shader), src0, src1, src2, array_enable, dimension, register_format, shadow, texel_offset, va_lod_mode, wide_indices, write_mask, sr_count))->dest[0];
}


static inline
bi_instr * bi_u16_to_f32_to(bi_builder *b, bi_index dest0, bi_index src0)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 1);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_U16_TO_F32;
    I->nr_dests = 1;
    I->nr_srcs = 1;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;

    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_u16_to_f32(bi_builder *b, bi_index src0)
{
    return (bi_u16_to_f32_to(b, bi_temp(b->shader), src0))->dest[0];
}


static inline
bi_instr * bi_u16_to_u32_to(bi_builder *b, bi_index dest0, bi_index src0)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 1);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_U16_TO_U32;
    I->nr_dests = 1;
    I->nr_srcs = 1;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;

    I->round = BI_ROUND_RTZ;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_u16_to_u32(bi_builder *b, bi_index src0)
{
    return (bi_u16_to_u32_to(b, bi_temp(b->shader), src0))->dest[0];
}


static inline
bi_instr * bi_u32_to_f32_to(bi_builder *b, bi_index dest0, bi_index src0)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 1);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_U32_TO_F32;
    I->nr_dests = 1;
    I->nr_srcs = 1;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;

    I->round = bi_round_mode(b->shader, 32);
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_u32_to_f32(bi_builder *b, bi_index src0)
{
    return (bi_u32_to_f32_to(b, bi_temp(b->shader), src0))->dest[0];
}


static inline
bi_instr * bi_u8_to_f32_to(bi_builder *b, bi_index dest0, bi_index src0)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 1);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_U8_TO_F32;
    I->nr_dests = 1;
    I->nr_srcs = 1;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;

    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_u8_to_f32(bi_builder *b, bi_index src0)
{
    return (bi_u8_to_f32_to(b, bi_temp(b->shader), src0))->dest[0];
}


static inline
bi_instr * bi_u8_to_u32_to(bi_builder *b, bi_index dest0, bi_index src0)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 1);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_U8_TO_U32;
    I->nr_dests = 1;
    I->nr_srcs = 1;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;

    I->round = BI_ROUND_RTZ;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_u8_to_u32(bi_builder *b, bi_index src0)
{
    return (bi_u8_to_u32_to(b, bi_temp(b->shader), src0))->dest[0];
}


static inline
bi_instr * bi_v2f16_to_v2s16_to(bi_builder *b, bi_index dest0, bi_index src0)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 1);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_V2F16_TO_V2S16;
    I->nr_dests = 1;
    I->nr_srcs = 1;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;

    I->round = BI_ROUND_RTZ;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_v2f16_to_v2s16(bi_builder *b, bi_index src0)
{
    return (bi_v2f16_to_v2s16_to(b, bi_temp(b->shader), src0))->dest[0];
}


static inline
bi_instr * bi_v2f16_to_v2u16_to(bi_builder *b, bi_index dest0, bi_index src0)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 1);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_V2F16_TO_V2U16;
    I->nr_dests = 1;
    I->nr_srcs = 1;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;

    I->round = BI_ROUND_RTZ;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_v2f16_to_v2u16(bi_builder *b, bi_index src0)
{
    return (bi_v2f16_to_v2u16_to(b, bi_temp(b->shader), src0))->dest[0];
}


static inline
bi_instr * bi_v2f32_to_v2f16_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 2);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_V2F32_TO_V2F16;
    I->nr_dests = 1;
    I->nr_srcs = 2;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;

    I->round = bi_round_mode(b->shader, 16);
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_v2f32_to_v2f16(bi_builder *b, bi_index src0, bi_index src1)
{
    return (bi_v2f32_to_v2f16_to(b, bi_temp(b->shader), src0, src1))->dest[0];
}


static inline
bi_instr * bi_v2s16_to_v2f16_to(bi_builder *b, bi_index dest0, bi_index src0)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 1);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_V2S16_TO_V2F16;
    I->nr_dests = 1;
    I->nr_srcs = 1;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;

    I->round = bi_round_mode(b->shader, 16);
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_v2s16_to_v2f16(bi_builder *b, bi_index src0)
{
    return (bi_v2s16_to_v2f16_to(b, bi_temp(b->shader), src0))->dest[0];
}


static inline
bi_instr * bi_v2s8_to_v2f16_to(bi_builder *b, bi_index dest0, bi_index src0)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 1);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_V2S8_TO_V2F16;
    I->nr_dests = 1;
    I->nr_srcs = 1;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;

    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_v2s8_to_v2f16(bi_builder *b, bi_index src0)
{
    return (bi_v2s8_to_v2f16_to(b, bi_temp(b->shader), src0))->dest[0];
}


static inline
bi_instr * bi_v2s8_to_v2s16_to(bi_builder *b, bi_index dest0, bi_index src0)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 1);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_V2S8_TO_V2S16;
    I->nr_dests = 1;
    I->nr_srcs = 1;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;

    I->round = BI_ROUND_RTZ;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_v2s8_to_v2s16(bi_builder *b, bi_index src0)
{
    return (bi_v2s8_to_v2s16_to(b, bi_temp(b->shader), src0))->dest[0];
}


static inline
bi_instr * bi_v2u16_to_v2f16_to(bi_builder *b, bi_index dest0, bi_index src0)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 1);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_V2U16_TO_V2F16;
    I->nr_dests = 1;
    I->nr_srcs = 1;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;

    I->round = bi_round_mode(b->shader, 16);
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_v2u16_to_v2f16(bi_builder *b, bi_index src0)
{
    return (bi_v2u16_to_v2f16_to(b, bi_temp(b->shader), src0))->dest[0];
}


static inline
bi_instr * bi_v2u8_to_v2f16_to(bi_builder *b, bi_index dest0, bi_index src0)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 1);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_V2U8_TO_V2F16;
    I->nr_dests = 1;
    I->nr_srcs = 1;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;

    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_v2u8_to_v2f16(bi_builder *b, bi_index src0)
{
    return (bi_v2u8_to_v2f16_to(b, bi_temp(b->shader), src0))->dest[0];
}


static inline
bi_instr * bi_v2u8_to_v2u16_to(bi_builder *b, bi_index dest0, bi_index src0)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 1);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_V2U8_TO_V2U16;
    I->nr_dests = 1;
    I->nr_srcs = 1;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;

    I->round = BI_ROUND_RTZ;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_v2u8_to_v2u16(bi_builder *b, bi_index src0)
{
    return (bi_v2u8_to_v2u16_to(b, bi_temp(b->shader), src0))->dest[0];
}


static inline
bi_instr * bi_var_tex_f16_to(bi_builder *b, bi_index dest0, bool lod_mode, enum bi_sample sample, enum bi_update update, uint32_t texture_index, uint32_t varying_index)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 0);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_VAR_TEX_F16;
    I->nr_dests = 1;
    I->nr_srcs = 0;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;


    I->update = update;
    I->lod_mode = lod_mode;
    I->sample = sample;
    I->texture_index = texture_index;
    I->varying_index = varying_index;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_var_tex_f16(bi_builder *b, bool lod_mode, enum bi_sample sample, enum bi_update update, uint32_t texture_index, uint32_t varying_index)
{
    return (bi_var_tex_f16_to(b, bi_temp(b->shader), lod_mode, sample, update, texture_index, varying_index))->dest[0];
}


static inline
bi_instr * bi_var_tex_f32_to(bi_builder *b, bi_index dest0, bool lod_mode, enum bi_sample sample, enum bi_update update, uint32_t texture_index, uint32_t varying_index)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 0);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_VAR_TEX_F32;
    I->nr_dests = 1;
    I->nr_srcs = 0;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;


    I->update = update;
    I->lod_mode = lod_mode;
    I->sample = sample;
    I->texture_index = texture_index;
    I->varying_index = varying_index;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_var_tex_f32(bi_builder *b, bool lod_mode, enum bi_sample sample, enum bi_update update, uint32_t texture_index, uint32_t varying_index)
{
    return (bi_var_tex_f32_to(b, bi_temp(b->shader), lod_mode, sample, update, texture_index, varying_index))->dest[0];
}


static inline
bi_instr * bi_var_tex_to(bi_builder *b, unsigned bitsize, bi_index dest0, bool lod_mode, enum bi_sample sample, enum bi_update update, uint32_t texture_index, uint32_t varying_index)
{
    if (bitsize == 16)
        return (bi_var_tex_f16_to(b, dest0, lod_mode, sample, update, texture_index, varying_index));
    else if (bitsize == 32)
        return (bi_var_tex_f32_to(b, dest0, lod_mode, sample, update, texture_index, varying_index));
    else
        UNREACHABLE("Invalid parameters for VAR_TEX");
}

static inline
bi_index bi_var_tex(bi_builder *b, unsigned bitsize, bool lod_mode, enum bi_sample sample, enum bi_update update, uint32_t texture_index, uint32_t varying_index)
{
    if (bitsize == 16)
        return (bi_var_tex_f16_to(b, bi_temp(b->shader), lod_mode, sample, update, texture_index, varying_index))->dest[0];
    else if (bitsize == 32)
        return (bi_var_tex_f32_to(b, bi_temp(b->shader), lod_mode, sample, update, texture_index, varying_index))->dest[0];
    else
        UNREACHABLE("Invalid parameters for VAR_TEX");
}

static inline
bi_instr * bi_vn_asst1_f16_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bi_index src2, bool h, bool l)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_VN_ASST1_F16;
    I->nr_dests = 1;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    I->h = h;
    I->l = l;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_vn_asst1_f16(bi_builder *b, bi_index src0, bi_index src1, bi_index src2, bool h, bool l)
{
    return (bi_vn_asst1_f16_to(b, bi_temp(b->shader), src0, src1, src2, h, l))->dest[0];
}


static inline
bi_instr * bi_vn_asst1_f32_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bi_index src2, bi_index src3)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 4);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_VN_ASST1_F32;
    I->nr_dests = 1;
    I->nr_srcs = 4;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;
    I->src[3] = src3;

    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_vn_asst1_f32(bi_builder *b, bi_index src0, bi_index src1, bi_index src2, bi_index src3)
{
    return (bi_vn_asst1_f32_to(b, bi_temp(b->shader), src0, src1, src2, src3))->dest[0];
}


static inline
bi_instr * bi_vn_asst2_f32_to(bi_builder *b, bi_index dest0, bi_index src0, bool scale)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 1);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_VN_ASST2_F32;
    I->nr_dests = 1;
    I->nr_srcs = 1;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;

    I->scale = scale;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_vn_asst2_f32(bi_builder *b, bi_index src0, bool scale)
{
    return (bi_vn_asst2_f32_to(b, bi_temp(b->shader), src0, scale))->dest[0];
}


static inline
bi_instr * bi_vn_asst2_v2f16_to(bi_builder *b, bi_index dest0, bi_index src0)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 1);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_VN_ASST2_V2F16;
    I->nr_dests = 1;
    I->nr_srcs = 1;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;

    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_vn_asst2_v2f16(bi_builder *b, bi_index src0)
{
    return (bi_vn_asst2_v2f16_to(b, bi_temp(b->shader), src0))->dest[0];
}


static inline
bi_instr * bi_wmask_to(bi_builder *b, bi_index dest0, bi_index src0, enum bi_subgroup subgroup, uint32_t fill)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 1);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_WMASK;
    I->nr_dests = 1;
    I->nr_srcs = 1;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;

    I->subgroup = subgroup;
    I->fill = fill;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_wmask(bi_builder *b, bi_index src0, enum bi_subgroup subgroup, uint32_t fill)
{
    return (bi_wmask_to(b, bi_temp(b->shader), src0, subgroup, fill))->dest[0];
}


static inline
bi_instr * bi_zs_emit_to(bi_builder *b, bi_index dest0, bi_index src0, bi_index src1, bi_index src2, bool stencil, bool z)
{

    size_t size = sizeof(bi_instr) + sizeof(bi_index) * (1 + 3);
    bi_instr *I = (bi_instr *) rzalloc_size(b->shader, size);

    I->op = BI_OPCODE_ZS_EMIT;
    I->nr_dests = 1;
    I->nr_srcs = 3;
    I->dest = (bi_index *) (&I[1]);
    I->src = I->dest + 1;

    I->dest[0] = dest0;

    I->src[0] = src0;
    I->src[1] = src1;
    I->src[2] = src2;

    I->stencil = stencil;
    I->z = z;
    bi_builder_insert(&b->cursor, I);
    return I;
}

static inline
bi_index bi_zs_emit(bi_builder *b, bi_index src0, bi_index src1, bi_index src2, bool stencil, bool z)
{
    return (bi_zs_emit_to(b, bi_temp(b->shader), src0, src1, src2, stencil, z))->dest[0];
}


#endif
