OBVHの話。

さて，今回の話題もレイトレ合宿関連です。

昨年のセミナーで林さんがQBVH(Quad Bounding Volume Hierarchy)を紹介して下さいました。

実践QBVH from 秀一林

また，前回のレイトレアドベントカレンダーでお餅さんが「Bounding Volume Hierarchy (BVH) の実装 – 交差判定編」でBVHの交差判定の話をされています。
http://qiita.com/omochi64/items/c2bbe92d707b280896fd

今回は，お餅さんからの流れでOBVH(Octa Bounding Volume Hierarchy)を紹介しようかと思います。

Binary Bounding Volume Hierarchy(BBVH)はノードの数が2つ。
Quad Bounding Volume Hierarchy(QBVH)はノードの数が4つ。
Octa Bounding Volume Hierarchy(OBVH)はノードの数が8つ。

分割方法は色々とありますが，ノードの構築方法はさほど難しくなくて
BBVHは2つに分ければ完成，QBVHは2つに分けたものをさらに2つに分ければ完成。OBVHはさらに2つに分けて完成です。
そんなわけで，BBVHをきっちり作れた人であれば，QBVHもOBVHもほとんど同じに作ることができます。
さてあとは，このQBVHとOBVHを使って演算すれば良いですが，レイトレ合宿では制限時間が設けられているので処理速度が大事になってきます。

ここで出てくるがSIMD演算です。
SIMDとはSingle Instruction/Multiple Data (単一命令/複数データ) の略で、SIMD演算とは1つの命令で複数のデータに対して処理をおこなう演算方式を意味するらしいです。
ちょっと前であれば1命令で4つのfloatデータに対して処理できなかったのですが，最近は便利なもので1命令で8つのfloatデータに対して処理できるようです。
前者がIntel系でいう所のSSEで後者がAVX(Advanced Vector Extensions)というやつです。

そんなわけでOBVHの実装はQBVHでできたノードをさらに2分割する。SSE命令をAVX命令に置き換えれば実装完了です。
交差判定の処理はこんな感じになります。

bool BoundingBox8::IsHit( const Ray8& ray, int& mask ) const
{
    b256 tmin = _mm256_set1_ps( F_HIT_MIN );
    b256 tmax = _mm256_set1_ps( F_HIT_MAX );

    int idx0, idx1;

    // X軸.
    idx0 = ray.sign[ 0 ];
    idx1 = 1 - idx0;
    tmin = _mm256_max_ps( tmin, _mm256_mul_ps( _mm256_sub_ps( value[ idx0 ][ 0 ], ray.pos[ 0 ] ), ray.invDir[ 0 ] ) );
    tmax = _mm256_min_ps( tmax, _mm256_mul_ps( _mm256_sub_ps( value[ idx1 ][ 0 ], ray.pos[ 0 ] ), ray.invDir[ 0 ] ) );

    // Y軸.
    idx0 = ray.sign[ 1 ];
    idx1 = 1 - idx0;
    tmin = _mm256_max_ps( tmin, _mm256_mul_ps( _mm256_sub_ps( value[ idx0 ][ 1 ], ray.pos[ 1 ] ), ray.invDir[ 1 ] ) );
    tmax = _mm256_min_ps( tmax, _mm256_mul_ps( _mm256_sub_ps( value[ idx1 ][ 1 ], ray.pos[ 1 ] ), ray.invDir[ 1 ] ) );

    // Z軸.
    idx0 = ray.sign[ 2 ];
    idx1 = 1 - idx0;
    tmin = _mm256_max_ps( tmin, _mm256_mul_ps( _mm256_sub_ps( value[ idx0 ][ 2 ], ray.pos[ 2 ] ), ray.invDir[ 2 ] ) );
    tmax = _mm256_min_ps( tmax, _mm256_mul_ps( _mm256_sub_ps( value[ idx1 ][ 2 ], ray.pos[ 2 ] ), ray.invDir[ 2 ] ) );

    mask = _mm256_movemask_ps( _mm256_cmp_ps( tmax, tmin, _CMP_GT_OS ) );
    return ( mask > 0 );
}

ちなみに上記のコードをAVX命令を使わずに頑張って書くと下記のような感じになります。

template< typename T > inline
int Sign( const T val )
{ return ( val > T(0) ) ? 1 : (( val < T(0) ) ? -1 : 0 ); }

inline
float Max( const float a, const float b )
{ return ( a > b ) ? a : b; }

inline
float Min( const float a, const float b )
{ return ( a < b ) ? a : b; }

bool BoundingBox8::IsHit( const Ray8& ray, int& mask ) const
{
    b256 tmin = { F_HIT_MIN, F_HIT_MIN, F_HIT_MIN, F_HIT_MIN, F_HIT_MIN, F_HIT_MIN, F_HIT_MIN, F_HIT_MIN };
    b256 tmax = { F_HIT_MAX, F_HIT_MAX, F_HIT_MAX, F_HIT_MAX, F_HIT_MAX, F_HIT_MAX, F_HIT_MAX, F_HIT_MAX };

    int idx0, idx1;

    // X軸
    idx0 = ray.sign[ 0 ];
    idx1 = 1 - idx0;
    for ( unsigned int i=0; i<8; ++i )
    {
        tmin.m256_f32[ i ] = Max( tmin.m256_f32[ i ], ( value[ idx0 ][ 0 ].m256_f32[ i ] - ray.pos[ 0 ].m256_f32[ i ] ) * ray.invDir[ 0 ].m256_f32[ i ] );
        tmax.m256_f32[ i ] = Min( tmax.m256_f32[ i ], ( value[ idx1 ][ 0 ].m256_f32[ i ] - ray.pos[ 0 ].m256_f32[ i ] ) * ray.invDir[ 0 ].m256_f32[ i ] );
    }

    // Y軸
    idx0 = ray.sign[ 1 ];
    idx1 = 1 - idx0;
    for ( unsigned int i=0; i<8; ++i )
    {
        tmin.m256_f32[ i ] = Max( tmin.m256_f32[ i ], ( value[ idx0 ][ 1 ].m256_f32[ i ] - ray.pos[ 1 ].m256_f32[ i ] ) * ray.invDir[ 1 ].m256_f32[ i ] );
        tmax.m256_f32[ i ] = Min( tmax.m256_f32[ i ], ( value[ idx1 ][ 1 ].m256_f32[ i ] - ray.pos[ 1 ].m256_f32[ i ] ) * ray.invDir[ 1 ].m256_f32[ i ] );
    }

    // Z軸
    idx0 = ray.sign[ 2 ];
    idx1 = 1 - idx0;
    for ( unsigned int i=0; i<8; ++i )
    {
        tmin.m256_f32[ i ] = Max( tmin.m256_f32[ i ], ( value[ idx0 ][ 2 ].m256_f32[ i ] - ray.pos[ 2 ].m256_f32[ i ] ) * ray.invDir[ 2 ].m256_f32[ i ] );
        tmax.m256_f32[ i ] = Min( tmax.m256_f32[ i ], ( value[ idx1 ][ 2 ].m256_f32[ i ] - ray.pos[ 2 ].m256_f32[ i ] ) * ray.invDir[ 2 ].m256_f32[ i ] );
    }

    b256i flg;
    flg.m256i_u32[0] = ( tmax.m256_f32[ 0 ] >= tmin.m256_f32[ 0 ] ) ? 0xffffffff : 0x0;
    flg.m256i_u32[1] = ( tmax.m256_f32[ 1 ] >= tmin.m256_f32[ 1 ] ) ? 0xffffffff : 0x0;
    flg.m256i_u32[2] = ( tmax.m256_f32[ 2 ] >= tmin.m256_f32[ 2 ] ) ? 0xffffffff : 0x0;
    flg.m256i_u32[3] = ( tmax.m256_f32[ 3 ] >= tmin.m256_f32[ 3 ] ) ? 0xffffffff : 0x0;
    flg.m256i_u32[0] = ( tmax.m256_f32[ 4 ] >= tmin.m256_f32[ 4 ] ) ? 0xffffffff : 0x0;
    flg.m256i_u32[1] = ( tmax.m256_f32[ 5 ] >= tmin.m256_f32[ 5 ] ) ? 0xffffffff : 0x0;
    flg.m256i_u32[2] = ( tmax.m256_f32[ 6 ] >= tmin.m256_f32[ 6 ] ) ? 0xffffffff : 0x0;
    flg.m256i_u32[3] = ( tmax.m256_f32[ 7 ] >= tmin.m256_f32[ 7 ] ) ? 0xffffffff : 0x0;


    mask = (
          Sign(flg.m256i_u32[7]) << 7
        | Sign(flg.m256i_u32[6]) << 6 
        | Sign(flg.m256i_u32[5]) << 5 
        | Sign(flg.m256i_u32[4]) << 4 
        | Sign(flg.m256i_u32[3]) << 3 
        | Sign(flg.m256i_u32[2]) << 2 
        | Sign(flg.m256i_u32[1]) << 1 
        | Sign(flg.m256i_u32[0]) );
    return ( mask > 0 );
}

一応実装したところ，正確な時間計測はやっていないのですがQBVHの大体半分いくかいかないかぐらいの処理時間でレンダリングを終了できていたような気がします。
レイトレ合宿2!!でレンダリングに使うPCのスペックがまだわからない状態ですが，AVXが使える状態ならばOBVHを実装しておくと良いのかもしれません。

Cookie	期間	説明
cookielawinfo-checkbox-analytics	11 months	This cookie is set by GDPR Cookie Consent plugin. The cookie is used to store the user consent for the cookies in the category "Analytics".
cookielawinfo-checkbox-functional	11 months	The cookie is set by GDPR cookie consent to record the user consent for the cookies in the category "Functional".
cookielawinfo-checkbox-necessary	11 months	This cookie is set by GDPR Cookie Consent plugin. The cookies is used to store the user consent for the cookies in the category "Necessary".
cookielawinfo-checkbox-others	11 months	This cookie is set by GDPR Cookie Consent plugin. The cookie is used to store the user consent for the cookies in the category "Other.
cookielawinfo-checkbox-performance	11 months	This cookie is set by GDPR Cookie Consent plugin. The cookie is used to store the user consent for the cookies in the category "Performance".
viewed_cookie_policy	11 months	The cookie is set by the GDPR Cookie Consent plugin and is used to store whether or not user has consented to the use of cookies. It does not store any personal data.

☆PROJECT　ASURA☆

Pocol's Diary

OBVHの話。

関連

コメントを残すコメントをキャンセル

関連

コメントを残す コメントをキャンセル

個人情報に関するお知らせ

コメントを残すコメントをキャンセル