I have a renderer class, and a mesh class. the mesh class contains all information necessary to draw a mesh.
You can add and remove meshes to the renderer. As meshes are added, they are sorted by material so I don't need to recommit material settings for each different mesh.
However, against all expectations, when I compare the performance of my renderer to a renderer which does no optimisations at all, mine seems to run about five times slower. I'm pretty puzzled about this, and hope someone is able to point me in the right direction.
I have done some profiling with intel vtune and this seems to be my bottleneck:
http://oi45.tinypic.com/2q8999k.jpg
These are the machine instructions for that single line of source code (ID3D10EffectPass::Apply()): http://pastebin.com/p44mr06K
And this is my rendering code:
typedef std::multiset<const Mesh3D*, MeshCompare3D> BUCKET3D; typedef std::map<Material*, BUCKET3D > MESHLIST3D; typedef std::pair<Material*, BUCKET3D > BUCKETPAIR3D; MESHLIST3D m_Meshes3D;
void GFX::Renderer::Draw()
{
//reset the RendererOptimalisationInfo (is needed for letting external functions do their job)
m_OpInfo.passes = -1;
m_OpInfo.pGeometry = nullptr;
m_OpInfo.pLayout = nullptr;
m_OpInfo.pMaterial = nullptr;
m_OpInfo.topology = -1;
GFX::PerFrameInfo perFrameInfo;
perFrameInfo.pMatP = m_pCam3D->GetMatP();
perFrameInfo.pMatV = m_pCam3D->GetMatV();
perFrameInfo.pScene = &m_Scene;
perFrameInfo.pExtraInfo = m_pPerFrameExtraInfo;
GFX::PerObjectInfo perObjectInfo;
MESHLIST3D::const_iterator it;
BUCKET3D::const_iterator bucketIter;
//DRAW
for (it = m_Meshes3D.begin(); it != m_Meshes3D.end(); ++it)
{
//MESHLIST-level checks (material-specific)
it->first->m_pEffect->Commit_Material(it->first, perFrameInfo);
D3D10_TECHNIQUE_DESC tDesc;
GFX::Technique* pTech = it->first->m_pEffect->GetTechnique(it->first->m_sTechnique);
pTech->GetD3DTechnique()->GetDesc(&tDesc);
m_OpInfo.passes = tDesc.Passes;
if( pTech->GetInputLayout() != m_OpInfo.pLayout )
{
m_OpInfo.pLayout = pTech->GetInputLayout();
m_DxCore.pDevice->IASetInputLayout(m_OpInfo.pLayout);
}
for(bucketIter = (it->second.begin()); bucketIter != (it->second.end()); ++bucketIter)
{
//BUCKET-level checks (mesh-specific)
if(!((*bucketIter)->m_Desc.bActive))
{
continue;
}
if( (*bucketIter)->m_Desc.pGeometry != m_OpInfo.pGeometry )
{
m_OpInfo.pGeometry = (*bucketIter)->m_Desc.pGeometry;
UINT offset = 0;
UINT stride = m_OpInfo.pGeometry->GetVertexSize();
m_DxCore.pDevice->
IASetVertexBuffers(0,1,m_OpInfo.pGeometry->GetppVBuffer(), &stride, &offset);
if(m_OpInfo.pGeometry->GetpIBuffer())
{
m_DxCore.pDevice->
IASetIndexBuffer(m_OpInfo.pGeometry->GetpIBuffer(), DXGI_FORMAT_R32_UINT, 0);
}
}
if( (*bucketIter)->m_Desc.pGeometry->GetTopology() != m_OpInfo.topology )
{
m_OpInfo.topology = (*bucketIter)->m_Desc.pGeometry->GetTopology();
m_DxCore.pDevice->IASetPrimitiveTopology( (*bucketIter)->m_Desc.pGeometry->GetTopology() );
}
perObjectInfo.pMatW = &((*bucketIter)->m_Desc.matW);
perObjectInfo.pExtraInfo = (*bucketIter)->m_Desc.pExtraInfo;
it->first->m_pEffect->Commit_Object(perObjectInfo);
//Finally, draw this mesh
if(m_OpInfo.pGeometry->GetpIBuffer())
{
for (int p = 0; p < m_OpInfo.passes; ++p)
{
pTech->GetD3DTechnique()->GetPassByIndex(p)->Apply(0);
m_DxCore.pDevice->DrawIndexed((*bucketIter)->GetDrawCount(),(*bucketIter)->GetDrawStartPos(),0);
}
} else {
for (int p = 0; p < m_OpInfo.passes; ++p)
{
pTech->GetD3DTechnique()->GetPassByIndex(p)->Apply(0);
m_DxCore.pDevice->Draw((*bucketIter)->GetDrawCount(),(*bucketIter)->GetDrawStartPos());
}
}//end if
}//end for
}//end for
}













