union SSE_128 { __m128 sse; float floats[4]; }; void ParticleEmitter::Update(ParticleVertexBuffer *managedBuffer, float deltaTime, const Vector3 &cameraDirection, const Vector3 &cameraPosition) { RandomGen *randGen = ParticleMemoryManager::GetSingleton()->GetMemoryPool(mPoolId)->GetRandomGen(); if (mEmitterLifeTime == 0) { mEmitterCurrentLife = 0; } else if (mEmitterCurrentLife > mEmitterLifeTime) { mShouldEmit = false; } else { mEmitterCurrentLife += deltaTime; } if (mShouldEmit) { float emission = mEmissionPerSecond.GetValueByDelta(deltaTime) * deltaTime; float partialEmission = MathHelper::GetFraction(emission); uint32 numParticlesToEmit = (uint32)emission; mPartialParticle += partialEmission; if (mPartialParticle > 1.0f) { numParticlesToEmit += 1; mPartialParticle -= 1.0f; } int numToMinimum = mMinActiveParticles - mParticles.CurrentSize(); if (numToMinimum > 0) { numParticlesToEmit += numToMinimum; } Vector3 emitterVelocity = mVelocity.GetValueByDelta(deltaTime); for (uint32 i = 0; i < numParticlesToEmit; i++) { if (mParticles.CurrentSize() >= mMaxParticles) { break; } Particle *newParticle = ParticleMemoryManager::GetSingleton()->GetMemoryPool(mPoolId)->GetNewParticle(); if (newParticle == NULL) { break; } newParticle->Position = randGen->RandRange(mSpawnArea * -1.0f, mSpawnArea); newParticle->Rotation = randGen->RandRange(mRotationVariance * -1.0f, mRotationVariance); newParticle->RotationRate = randGen->RandRange(mRotationRateVariance * -1.0f, mRotationRateVariance); float scaleRand = randGen->RandRange(-mScaleVariance, mScaleVariance); newParticle->InitialScale = Vector3(scaleRand, scaleRand, 0); newParticle->Velocity = randGen->RandRange(mVelocityVariance * -1.0f, mVelocityVariance) + emitterVelocity; newParticle->Acceleration = mGravity; newParticle->LifeTime = mParticleLifeTime + randGen->RandRange(mParticleLifeTimeVariance * -1.0f, mParticleLifeTimeVariance); newParticle->ColorMul = Color(1, 1, 1, 1); newParticle->ColorVar = randGen->RandRange(mColorVariance * -1.0f, mColorVariance); newParticle->CurrentLife = 0; newParticle->Scale = newParticle->InitialScale; newParticle->TexOffsets[0] = mUVScrollOffsets[0] + randGen->RandRange(mUVScrollVariance[0] * -1.0f, mUVScrollVariance[0]); newParticle->TexOffsets[1] = mUVScrollOffsets[1] + randGen->RandRange(mUVScrollVariance[1] * -1.0f, mUVScrollVariance[1]); newParticle->TexOffsets[2] = mUVScrollOffsets[2] + randGen->RandRange(mUVScrollVariance[2] * -1.0f, mUVScrollVariance[2]); newParticle->Intensity = randGen->RandRange(-mMaterialIntensityVariance, mMaterialIntensityVariance); newParticle->TextureFrame = Vector2::Zero(); newParticle->TextureFrameTimeVar = randGen->RandRange(mTextureAnimation.FrameTimeVariance * -1.0f, mTextureAnimation.FrameTimeVariance); if (mTextureAnimation.IsAnimated && mTextureAnimation.RandomizeInitialFrame) { if (mTextureAnimation.FrameCountX > 0 && mTextureAnimation.FrameCountY > 0) { newParticle->TextureFrame = Vector2((float)randGen->RandRange(0, mTextureAnimation.FrameCountX - 1), (float)randGen->RandRange(0, mTextureAnimation.FrameCountY - 1)); } } newParticle->TextureFrameTime = 0.0f; mParticles.Add(newParticle); } } ParticleEmitterWideUpdates particleUpdateParams; Vector3 toCamera = Vector3(cameraDirection); particleUpdateParams.particleRight = Vector3::Cross(toCamera, Vector3(0, 1.0f, 0)).Normalized() * -1.0f; if (mFacingType == ParticleFacing_2D_Billboard) { particleUpdateParams.particleUp = Vector3(0, 1.0f, 0); } if (mFacingType == ParticleFacing_3D_Billboard) { particleUpdateParams.particleUp = Vector3::Cross(toCamera, particleUpdateParams.particleRight).Normalized(); } particleUpdateParams.particleForward = Vector3::Cross(particleUpdateParams.particleUp, particleUpdateParams.particleRight); particleUpdateParams.rotationalVelocity = mRotationalVelocity.GetValueByDelta(deltaTime); particleUpdateParams.uvScrollDelta[0] = mUVScroll[0].GetValueByDelta(deltaTime) * deltaTime; particleUpdateParams.uvScrollDelta[1] = mUVScroll[1].GetValueByDelta(deltaTime) * deltaTime; particleUpdateParams.uvScrollDelta[2] = mUVScroll[2].GetValueByDelta(deltaTime) * deltaTime; mStartVert = managedBuffer->GetCurrentVertCount(mPoolId); //remove all dead particles first before we hit our SIMD loop for (int i = 0; i < (int)mParticles.CurrentSize(); i++) { Particle *particle = mParticles[i]; if (particle->CurrentLife > particle->LifeTime) { ParticleMemoryManager::GetSingleton()->GetMemoryPool(mPoolId)->FreeParticle(particle); mParticles.Remove(i); i -= 1; } } //calculate as many groups of 4 as possible for (uint32 i = 0; (i+3) < mParticles.CurrentSize(); i+=4) { Particle *particles[4]; particles[0] = mParticles[i]; particles[1] = mParticles[i + 1]; particles[2] = mParticles[i + 2]; particles[3] = mParticles[i + 3]; UpdateParticles(particles, managedBuffer, deltaTime, particleUpdateParams); } //single-update the remaining particles uint32 numLeftOver = mParticles.CurrentSize() % 4; for (uint32 i = mParticles.CurrentSize() - numLeftOver; i < mParticles.CurrentSize(); i++) { Particle *particle = mParticles[i]; UpdateParticle(particle, managedBuffer, deltaTime, particleUpdateParams); } mEndVert = managedBuffer->GetCurrentVertCount(mPoolId); mDistanceFromCamera = Vector3::GetDistance(mPosition, cameraPosition); } void ParticleEmitter::UpdateParticles(Particle *particles[4], ParticleVertexBuffer *managedBuffer, float deltaTime, const ParticleEmitterWideUpdates &particleUpdates) { SSE_128 particleCurrentLife; SSE_128 particleLifeTime; SSE_128 percentThroughLife; particleCurrentLife.sse = _mm_set_ps(particles[0]->CurrentLife, particles[1]->CurrentLife, particles[2]->CurrentLife, particles[3]->CurrentLife); particleLifeTime.sse = _mm_set_ps(particles[0]->LifeTime, particles[1]->LifeTime, particles[2]->LifeTime, particles[3]->LifeTime); percentThroughLife.sse = _mm_div_ps(particleCurrentLife.sse, particleLifeTime.sse); SSE_128 deltaTimeSSE; deltaTimeSSE.sse = _mm_set1_ps(deltaTime); SSE_128 positionX, positionY, positionZ; positionX.sse = _mm_set_ps(particles[0]->Position.X, particles[1]->Position.X, particles[2]->Position.X, particles[3]->Position.X); positionY.sse = _mm_set_ps(particles[0]->Position.Y, particles[1]->Position.Y, particles[2]->Position.Y, particles[3]->Position.Y); positionZ.sse = _mm_set_ps(particles[0]->Position.Z, particles[1]->Position.Z, particles[2]->Position.Z, particles[3]->Position.Z); SSE_128 velocityX, velocityY, velocityZ; velocityX.sse = _mm_set_ps(particles[0]->Velocity.X, particles[1]->Velocity.X, particles[2]->Velocity.X, particles[3]->Velocity.X); velocityY.sse = _mm_set_ps(particles[0]->Velocity.Y, particles[1]->Velocity.Y, particles[2]->Velocity.Y, particles[3]->Velocity.Y); velocityZ.sse = _mm_set_ps(particles[0]->Velocity.Z, particles[1]->Velocity.Z, particles[2]->Velocity.Z, particles[3]->Velocity.Z); SSE_128 accelerationX, accelerationY, accelerationZ; accelerationX.sse = _mm_set_ps(particles[0]->Acceleration.X, particles[1]->Acceleration.X, particles[2]->Acceleration.X, particles[3]->Acceleration.X); accelerationY.sse = _mm_set_ps(particles[0]->Acceleration.Y, particles[1]->Acceleration.Y, particles[2]->Acceleration.Y, particles[3]->Acceleration.Y); accelerationZ.sse = _mm_set_ps(particles[0]->Acceleration.Z, particles[1]->Acceleration.Z, particles[2]->Acceleration.Z, particles[3]->Acceleration.Z); SSE_128 resultVelocityX, resultVelocityY, resultVelocityZ; resultVelocityX.sse = _mm_mul_ps(accelerationX.sse, deltaTimeSSE.sse); resultVelocityX.sse = _mm_add_ps(resultVelocityX.sse, velocityX.sse); resultVelocityY.sse = _mm_mul_ps(accelerationY.sse, deltaTimeSSE.sse); resultVelocityY.sse = _mm_add_ps(resultVelocityY.sse, velocityY.sse); resultVelocityZ.sse = _mm_mul_ps(accelerationZ.sse, deltaTimeSSE.sse); resultVelocityZ.sse = _mm_add_ps(resultVelocityZ.sse, velocityZ.sse); particles[3]->Velocity = Vector3(resultVelocityX.floats[0], resultVelocityY.floats[0], resultVelocityZ.floats[0]); particles[2]->Velocity = Vector3(resultVelocityX.floats[1], resultVelocityY.floats[1], resultVelocityZ.floats[1]); particles[1]->Velocity = Vector3(resultVelocityX.floats[2], resultVelocityY.floats[2], resultVelocityZ.floats[2]); particles[0]->Velocity = Vector3(resultVelocityX.floats[3], resultVelocityY.floats[3], resultVelocityZ.floats[3]); Vector3 velocityOverLifetime[4]; velocityOverLifetime[3] = mVelocityOverLifetime.GetValueForTime(percentThroughLife.floats[0]); velocityOverLifetime[2] = mVelocityOverLifetime.GetValueForTime(percentThroughLife.floats[1]); velocityOverLifetime[1] = mVelocityOverLifetime.GetValueForTime(percentThroughLife.floats[2]); velocityOverLifetime[0] = mVelocityOverLifetime.GetValueForTime(percentThroughLife.floats[3]); SSE_128 velocityOverTimeX, velocityOverTimeY, velocityOverTimeZ; velocityOverTimeX.sse = _mm_set_ps(velocityOverLifetime[0].X, velocityOverLifetime[1].X, velocityOverLifetime[2].X, velocityOverLifetime[3].X); velocityOverTimeY.sse = _mm_set_ps(velocityOverLifetime[0].Y, velocityOverLifetime[1].Y, velocityOverLifetime[2].Y, velocityOverLifetime[3].Y); velocityOverTimeZ.sse = _mm_set_ps(velocityOverLifetime[0].Z, velocityOverLifetime[1].Z, velocityOverLifetime[2].Z, velocityOverLifetime[3].Z); velocityOverTimeX.sse = _mm_mul_ps(velocityOverTimeX.sse, deltaTimeSSE.sse); velocityOverTimeY.sse = _mm_mul_ps(velocityOverTimeY.sse, deltaTimeSSE.sse); velocityOverTimeZ.sse = _mm_mul_ps(velocityOverTimeZ.sse, deltaTimeSSE.sse); SSE_128 resultPositionX, resultPositionY, resultPositionZ; resultPositionX.sse = _mm_mul_ps(resultVelocityX.sse, deltaTimeSSE.sse); resultPositionX.sse = _mm_add_ps(resultPositionX.sse, velocityOverTimeX.sse); resultPositionX.sse = _mm_add_ps(resultPositionX.sse, positionX.sse); resultPositionY.sse = _mm_mul_ps(resultVelocityY.sse, deltaTimeSSE.sse); resultPositionY.sse = _mm_add_ps(resultPositionY.sse, velocityOverTimeY.sse); resultPositionY.sse = _mm_add_ps(resultPositionY.sse, positionY.sse); resultPositionZ.sse = _mm_mul_ps(resultVelocityZ.sse, deltaTimeSSE.sse); resultPositionZ.sse = _mm_add_ps(resultPositionZ.sse, velocityOverTimeZ.sse); resultPositionZ.sse = _mm_add_ps(resultPositionZ.sse, positionZ.sse); particles[3]->Position = Vector3(resultPositionX.floats[0], resultPositionY.floats[0], resultPositionZ.floats[0]); particles[2]->Position = Vector3(resultPositionX.floats[1], resultPositionY.floats[1], resultPositionZ.floats[1]); particles[1]->Position = Vector3(resultPositionX.floats[2], resultPositionY.floats[2], resultPositionZ.floats[2]); particles[0]->Position = Vector3(resultPositionX.floats[3], resultPositionY.floats[3], resultPositionZ.floats[3]); SSE_128 rotationalVelocity; SSE_128 rotationRate; rotationalVelocity.sse = _mm_set1_ps(particleUpdates.rotationalVelocity); rotationRate.sse = _mm_set_ps(particles[0]->RotationRate, particles[1]->RotationRate, particles[2]->RotationRate, particles[3]->RotationRate); SSE_128 resultRotation; resultRotation.sse = _mm_add_ps(rotationalVelocity.sse, rotationRate.sse); resultRotation.sse = _mm_mul_ps(resultRotation.sse, deltaTimeSSE.sse); particles[3]->Rotation += resultRotation.floats[0]; particles[2]->Rotation += resultRotation.floats[1]; particles[1]->Rotation += resultRotation.floats[2]; particles[0]->Rotation += resultRotation.floats[3]; Vector3 scalars[4]; scalars[3] = mScale.GetValueForTime(percentThroughLife.floats[0]); scalars[2] = mScale.GetValueForTime(percentThroughLife.floats[1]); scalars[1] = mScale.GetValueForTime(percentThroughLife.floats[2]); scalars[0] = mScale.GetValueForTime(percentThroughLife.floats[3]); SSE_128 scalarX, scalarY, scalarZ; scalarX.sse = _mm_set_ps(scalars[0].X, scalars[1].X, scalars[2].X, scalars[3].X); scalarY.sse = _mm_set_ps(scalars[0].Y, scalars[1].Y, scalars[2].Y, scalars[3].Y); scalarZ.sse = _mm_set_ps(scalars[0].Z, scalars[1].Z, scalars[2].Z, scalars[3].Z); SSE_128 initialScaleX, initialScaleY, initialScaleZ; initialScaleX.sse = _mm_set_ps(particles[0]->InitialScale.X, particles[1]->InitialScale.X, particles[2]->InitialScale.X, particles[3]->InitialScale.X); initialScaleY.sse = _mm_set_ps(particles[0]->InitialScale.Y, particles[1]->InitialScale.Y, particles[2]->InitialScale.Y, particles[3]->InitialScale.Y); initialScaleZ.sse = _mm_set_ps(particles[0]->InitialScale.Z, particles[1]->InitialScale.Z, particles[2]->InitialScale.Z, particles[3]->InitialScale.Z); SSE_128 resultScaleX, resultScaleY, resultScaleZ; resultScaleX.sse = _mm_add_ps(scalarX.sse, initialScaleX.sse); resultScaleY.sse = _mm_add_ps(scalarY.sse, initialScaleY.sse); resultScaleZ.sse = _mm_add_ps(scalarZ.sse, initialScaleZ.sse); particles[3]->Scale = Vector3(resultScaleX.floats[0], resultScaleY.floats[0], resultScaleZ.floats[0]); particles[2]->Scale = Vector3(resultScaleX.floats[1], resultScaleY.floats[1], resultScaleZ.floats[1]); particles[1]->Scale = Vector3(resultScaleX.floats[2], resultScaleY.floats[2], resultScaleZ.floats[2]); particles[0]->Scale = Vector3(resultScaleX.floats[3], resultScaleY.floats[3], resultScaleZ.floats[3]); Color colorMultipliers[4]; colorMultipliers[3] = mColorMultiplier.GetValueForTime(percentThroughLife.floats[0]); colorMultipliers[2] = mColorMultiplier.GetValueForTime(percentThroughLife.floats[1]); colorMultipliers[1] = mColorMultiplier.GetValueForTime(percentThroughLife.floats[2]); colorMultipliers[0] = mColorMultiplier.GetValueForTime(percentThroughLife.floats[3]); SSE_128 colorMultiplyR, colorMultiplyG, colorMultiplyB, colorMultiplyA; colorMultiplyR.sse = _mm_set_ps(colorMultipliers[0].R, colorMultipliers[1].R, colorMultipliers[2].R, colorMultipliers[3].R); colorMultiplyG.sse = _mm_set_ps(colorMultipliers[0].G, colorMultipliers[1].G, colorMultipliers[2].G, colorMultipliers[3].G); colorMultiplyB.sse = _mm_set_ps(colorMultipliers[0].B, colorMultipliers[1].B, colorMultipliers[2].B, colorMultipliers[3].B); colorMultiplyA.sse = _mm_set_ps(colorMultipliers[0].A, colorMultipliers[1].A, colorMultipliers[2].A, colorMultipliers[3].A); SSE_128 colorVarR, colorVarG, colorVarB, colorVarA; colorVarR.sse = _mm_set_ps(particles[0]->ColorVar.R, particles[1]->ColorVar.R, particles[2]->ColorVar.R, particles[3]->ColorVar.R); colorVarG.sse = _mm_set_ps(particles[0]->ColorVar.G, particles[1]->ColorVar.G, particles[2]->ColorVar.G, particles[3]->ColorVar.G); colorVarB.sse = _mm_set_ps(particles[0]->ColorVar.B, particles[1]->ColorVar.B, particles[2]->ColorVar.B, particles[3]->ColorVar.B); colorVarA.sse = _mm_set_ps(particles[0]->ColorVar.A, particles[1]->ColorVar.A, particles[2]->ColorVar.A, particles[3]->ColorVar.A); SSE_128 colorResultR, colorResultG, colorResultB, colorResultA; colorResultR.sse = _mm_add_ps(colorMultiplyR.sse, colorVarR.sse); colorResultG.sse = _mm_add_ps(colorMultiplyG.sse, colorVarG.sse); colorResultB.sse = _mm_add_ps(colorMultiplyB.sse, colorVarB.sse); colorResultA.sse = _mm_add_ps(colorMultiplyA.sse, colorVarA.sse); particles[3]->ColorMul = Color(colorResultR.floats[0], colorResultG.floats[0], colorResultB.floats[0], colorResultA.floats[0]); particles[2]->ColorMul = Color(colorResultR.floats[1], colorResultG.floats[1], colorResultB.floats[1], colorResultA.floats[1]); particles[1]->ColorMul = Color(colorResultR.floats[2], colorResultG.floats[2], colorResultB.floats[2], colorResultA.floats[2]); particles[0]->ColorMul = Color(colorResultR.floats[3], colorResultG.floats[3], colorResultB.floats[3], colorResultA.floats[3]); for (uint32 i = 0; i < 3; i++) { Vector2 uvScroll = mUVScroll[i].GetValueByDelta(deltaTime) * deltaTime; SSE_128 texUVScrollX, texUVScrollY; texUVScrollX.sse = _mm_set1_ps(particleUpdates.uvScrollDelta[i].X); texUVScrollY.sse = _mm_set1_ps(particleUpdates.uvScrollDelta[i].Y); SSE_128 textureOffsetX, textureOffsetY; textureOffsetX.sse = _mm_set_ps(particles[0]->TexOffsets[i].X, particles[1]->TexOffsets[i].X, particles[2]->TexOffsets[i].X, particles[3]->TexOffsets[i].X); textureOffsetY.sse = _mm_set_ps(particles[0]->TexOffsets[i].Y, particles[1]->TexOffsets[i].Y, particles[2]->TexOffsets[i].Y, particles[3]->TexOffsets[i].Y); textureOffsetX.sse = _mm_add_ps(textureOffsetX.sse, texUVScrollX.sse); textureOffsetY.sse = _mm_add_ps(textureOffsetY.sse, texUVScrollY.sse); particles[3]->TexOffsets[i] = Vector2(textureOffsetX.floats[0], textureOffsetY.floats[0]); particles[2]->TexOffsets[i] = Vector2(textureOffsetX.floats[1], textureOffsetY.floats[1]); particles[1]->TexOffsets[i] = Vector2(textureOffsetX.floats[2], textureOffsetY.floats[2]); particles[0]->TexOffsets[i] = Vector2(textureOffsetX.floats[3], textureOffsetY.floats[3]); } SSE_128 updatedParticleLife; updatedParticleLife.sse = _mm_add_ps(particleCurrentLife.sse, deltaTimeSSE.sse); particles[3]->CurrentLife = updatedParticleLife.floats[0]; particles[2]->CurrentLife = updatedParticleLife.floats[1]; particles[1]->CurrentLife = updatedParticleLife.floats[2]; particles[0]->CurrentLife = updatedParticleLife.floats[3]; D3DXMATRIX rotationMatrix[4]; D3DXVECTOR3 forwardAsD3DX(particleUpdates.particleForward.X, particleUpdates.particleForward.Y, particleUpdates.particleForward.Z); D3DXMatrixRotationAxis(&rotationMatrix[0], &forwardAsD3DX, particles[0]->Rotation); D3DXMatrixRotationAxis(&rotationMatrix[1], &forwardAsD3DX, particles[1]->Rotation); D3DXMatrixRotationAxis(&rotationMatrix[2], &forwardAsD3DX, particles[2]->Rotation); D3DXMatrixRotationAxis(&rotationMatrix[3], &forwardAsD3DX, particles[3]->Rotation); Vector3 particleUpFinal[4]; particleUpFinal[0] = Vector3::Transform(particleUpdates.particleUp, rotationMatrix[0]); particleUpFinal[1] = Vector3::Transform(particleUpdates.particleUp, rotationMatrix[1]); particleUpFinal[2] = Vector3::Transform(particleUpdates.particleUp, rotationMatrix[2]); particleUpFinal[3] = Vector3::Transform(particleUpdates.particleUp, rotationMatrix[3]); Vector3 particleRightFinal[4]; particleRightFinal[0] = Vector3::Transform(particleUpdates.particleRight, rotationMatrix[0]); particleRightFinal[1] = Vector3::Transform(particleUpdates.particleRight, rotationMatrix[1]); particleRightFinal[2] = Vector3::Transform(particleUpdates.particleRight, rotationMatrix[2]); particleRightFinal[3] = Vector3::Transform(particleUpdates.particleRight, rotationMatrix[3]); SSE_128 particleUpAmountX, particleUpAmountY, particleUpAmountZ; SSE_128 particleRightAmountX, particleRightAmountY, particleRightAmountZ; Vector3 particleRightAmount = particleRightFinal[0] * particles[0]->Scale.X; Vector3 particleUpAmount = particleUpFinal[0] * particles[0]->Scale.Y; particleUpAmountX.sse = _mm_set_ps(particleUpFinal[0].X, particleUpFinal[1].X, particleUpFinal[2].X, particleUpFinal[3].X); particleUpAmountY.sse = _mm_set_ps(particleUpFinal[0].Y, particleUpFinal[1].Y, particleUpFinal[2].Y, particleUpFinal[3].Y); particleUpAmountZ.sse = _mm_set_ps(particleUpFinal[0].Z, particleUpFinal[1].Z, particleUpFinal[2].Z, particleUpFinal[3].Z); particleRightAmountX.sse = _mm_set_ps(particleRightFinal[0].X, particleRightFinal[1].X, particleRightFinal[2].X, particleRightFinal[3].X); particleRightAmountY.sse = _mm_set_ps(particleRightFinal[0].Y, particleRightFinal[1].Y, particleRightFinal[2].Y, particleRightFinal[3].Y); particleRightAmountZ.sse = _mm_set_ps(particleRightFinal[0].Z, particleRightFinal[1].Z, particleRightFinal[2].Z, particleRightFinal[3].Z); particleRightAmountX.sse = _mm_mul_ps(particleRightAmountX.sse, resultScaleX.sse); particleRightAmountY.sse = _mm_mul_ps(particleRightAmountY.sse, resultScaleX.sse); particleRightAmountZ.sse = _mm_mul_ps(particleRightAmountZ.sse, resultScaleX.sse); particleUpAmountX.sse = _mm_mul_ps(particleUpAmountX.sse, resultScaleY.sse); particleUpAmountY.sse = _mm_mul_ps(particleUpAmountY.sse, resultScaleY.sse); particleUpAmountZ.sse = _mm_mul_ps(particleUpAmountZ.sse, resultScaleY.sse); Vector2 texBottomLeft[4][3]; Vector2 texTopRight[4][3]; if (mTextureAnimation.IsAnimated) { if (mTextureAnimation.FramesPerSecond > 0) { for (uint32 i = 0; i < 4; i++) { particles[i]->TextureFrameTime += deltaTime; if (particles[i]->TextureFrameTime > ((1.0f / mTextureAnimation.FramesPerSecond) + particles[i]->TextureFrameTimeVar)) { particles[i]->TextureFrameTime = 0.0f; particles[i]->TextureFrame.X += 1; if (particles[i]->TextureFrame.X > (mTextureAnimation.FrameCountX - 1)) { particles[i]->TextureFrame.X = 0; particles[i]->TextureFrame.Y += 1; if (particles[i]->TextureFrame.Y > (mTextureAnimation.FrameCountY - 1)) { particles[i]->TextureFrame.Y = 0; } } } } } SSE_128 xFrame; SSE_128 yFrame; SSE_128 frameCountX; SSE_128 frameCountY; xFrame.sse = _mm_set_ps(particles[0]->TextureFrame.X, particles[1]->TextureFrame.X, particles[2]->TextureFrame.X, particles[3]->TextureFrame.X); yFrame.sse = _mm_set_ps(particles[0]->TextureFrame.Y, particles[1]->TextureFrame.Y, particles[2]->TextureFrame.Y, particles[3]->TextureFrame.Y); frameCountX.sse = _mm_set1_ps((float)mTextureAnimation.FrameCountX); frameCountY.sse = _mm_set1_ps((float)mTextureAnimation.FrameCountY); SSE_128 xPos; SSE_128 x1Pos; SSE_128 yPos; SSE_128 y1Pos; SSE_128 one; one.sse = _mm_set1_ps(1.0f); xPos.sse = _mm_div_ps(xFrame.sse, frameCountX.sse); x1Pos.sse = _mm_add_ps(xFrame.sse, one.sse); x1Pos.sse = _mm_div_ps(x1Pos.sse, frameCountX.sse); yPos.sse = _mm_div_ps(yFrame.sse, frameCountY.sse); y1Pos.sse = _mm_add_ps(yFrame.sse, one.sse); y1Pos.sse = _mm_div_ps(y1Pos.sse, frameCountY.sse); for (uint32 i = 0; i < 4; i++) { uint32 reverseIndex = 3 - i; texBottomLeft[i][0] = mTextureAnimation.IsAnimatedForTextureIndex[0] ? Vector2(xPos.floats[reverseIndex], y1Pos.floats[reverseIndex]) : Vector2(0.0f, 1.0f); texBottomLeft[i][1] = mTextureAnimation.IsAnimatedForTextureIndex[1] ? Vector2(xPos.floats[reverseIndex], y1Pos.floats[reverseIndex]) : Vector2(0.0f, 1.0f); texBottomLeft[i][2] = mTextureAnimation.IsAnimatedForTextureIndex[2] ? Vector2(xPos.floats[reverseIndex], y1Pos.floats[reverseIndex]) : Vector2(0.0f, 1.0f); texTopRight[i][0] = mTextureAnimation.IsAnimatedForTextureIndex[0] ? Vector2(x1Pos.floats[reverseIndex], yPos.floats[reverseIndex]) : Vector2(1.0f, 0.0f); texTopRight[i][1] = mTextureAnimation.IsAnimatedForTextureIndex[1] ? Vector2(x1Pos.floats[reverseIndex], yPos.floats[reverseIndex]) : Vector2(1.0f, 0.0f); texTopRight[i][2] = mTextureAnimation.IsAnimatedForTextureIndex[2] ? Vector2(x1Pos.floats[reverseIndex], yPos.floats[reverseIndex]) : Vector2(1.0f, 0.0f); } } else { for (uint32 i = 0; i < 4; i++) { texBottomLeft[i][0] = Vector2(0.0f, 1.0f); texBottomLeft[i][1] = Vector2(0.0f, 1.0f); texBottomLeft[i][2] = Vector2(0.0f, 1.0f); texTopRight[i][0] = Vector2(1.0f, 0.0f); texTopRight[i][1] = Vector2(1.0f, 0.0f); texTopRight[i][2] = Vector2(1.0f, 0.0f); } } for (uint32 i = 0; i < 4; i++) { uint32 reverseIndex = 3 - i; float intensity = particles[i]->Intensity + mMaterialIntensity.GetValueForTime(percentThroughLife.floats[reverseIndex]); D3DXVECTOR4 colorAsD3D = particles[i]->ColorMul.AsD3DVector4(); MeshDataD3D bottomLeftVert; MeshDataD3D topLeftVert; MeshDataD3D bottomRightVert; MeshDataD3D topRightVert; SSE_128 posX, posY, posZ; SSE_128 posRightOffsetX, posRightOffsetY, posRightOffsetZ; SSE_128 posUpOffsetX, posUpOffsetY, posUpOffsetZ; SSE_128 texCoord0X, texCoord0Y, texCoord1X, texCoord1Y, texCoord2X, texCoord2Y; SSE_128 texCoordOffset0X, texCoordOffset0Y, texCoordOffset1X, texCoordOffset1Y, texCoordOffset2X, texCoordOffset2Y; posX.sse = _mm_set1_ps(particles[i]->Position.X); posY.sse = _mm_set1_ps(particles[i]->Position.Y); posZ.sse = _mm_set1_ps(particles[i]->Position.Z); posRightOffsetX.sse = _mm_set_ps(-particleRightAmountX.floats[reverseIndex], -particleRightAmountX.floats[reverseIndex], particleRightAmountX.floats[reverseIndex], particleRightAmountX.floats[reverseIndex]); posRightOffsetY.sse = _mm_set_ps(-particleRightAmountY.floats[reverseIndex], -particleRightAmountY.floats[reverseIndex], particleRightAmountY.floats[reverseIndex], particleRightAmountY.floats[reverseIndex]); posRightOffsetZ.sse = _mm_set_ps(-particleRightAmountZ.floats[reverseIndex], -particleRightAmountZ.floats[reverseIndex], particleRightAmountZ.floats[reverseIndex], particleRightAmountZ.floats[reverseIndex]); posUpOffsetX.sse = _mm_set_ps(-particleUpAmountX.floats[reverseIndex], particleUpAmountX.floats[reverseIndex], -particleUpAmountX.floats[reverseIndex], particleUpAmountX.floats[reverseIndex]); posUpOffsetY.sse = _mm_set_ps(-particleUpAmountY.floats[reverseIndex], particleUpAmountY.floats[reverseIndex], -particleUpAmountY.floats[reverseIndex], particleUpAmountY.floats[reverseIndex]); posUpOffsetZ.sse = _mm_set_ps(-particleUpAmountZ.floats[reverseIndex], particleUpAmountZ.floats[reverseIndex], -particleUpAmountZ.floats[reverseIndex], particleUpAmountZ.floats[reverseIndex]); posX.sse = _mm_add_ps(posX.sse, posRightOffsetX.sse); posX.sse = _mm_add_ps(posX.sse, posUpOffsetX.sse); posY.sse = _mm_add_ps(posY.sse, posRightOffsetY.sse); posY.sse = _mm_add_ps(posY.sse, posUpOffsetY.sse); posZ.sse = _mm_add_ps(posZ.sse, posRightOffsetZ.sse); posZ.sse = _mm_add_ps(posZ.sse, posUpOffsetZ.sse); texCoord0X.sse = _mm_set_ps(texBottomLeft[i][0].X, texBottomLeft[i][0].X, texTopRight[i][0].X, texTopRight[i][0].X); texCoord0Y.sse = _mm_set_ps(texBottomLeft[i][0].Y, texTopRight[i][0].Y, texBottomLeft[i][0].Y, texTopRight[i][0].Y); texCoordOffset0X.sse = _mm_set1_ps(particles[i]->TexOffsets[0].X); texCoordOffset0Y.sse = _mm_set1_ps(particles[i]->TexOffsets[0].Y); texCoord1X.sse = _mm_set_ps(texBottomLeft[i][1].X, texBottomLeft[i][1].X, texTopRight[i][1].X, texTopRight[i][1].X); texCoord1Y.sse = _mm_set_ps(texBottomLeft[i][1].Y, texTopRight[i][1].Y, texBottomLeft[i][1].Y, texTopRight[i][1].Y); texCoordOffset1X.sse = _mm_set1_ps(particles[i]->TexOffsets[1].X); texCoordOffset1Y.sse = _mm_set1_ps(particles[i]->TexOffsets[1].Y); texCoord2X.sse = _mm_set_ps(texBottomLeft[i][2].X, texBottomLeft[i][1].X, texTopRight[i][2].X, texTopRight[i][2].X); texCoord2Y.sse = _mm_set_ps(texBottomLeft[i][2].Y, texTopRight[i][1].Y, texBottomLeft[i][2].Y, texTopRight[i][2].Y); texCoordOffset2X.sse = _mm_set1_ps(particles[i]->TexOffsets[2].X); texCoordOffset2Y.sse = _mm_set1_ps(particles[i]->TexOffsets[2].Y); texCoord0X.sse = _mm_add_ps(texCoord0X.sse, texCoordOffset0X.sse); texCoord0Y.sse = _mm_add_ps(texCoord0Y.sse, texCoordOffset0Y.sse); texCoord1X.sse = _mm_add_ps(texCoord1X.sse, texCoordOffset1X.sse); texCoord1Y.sse = _mm_add_ps(texCoord1Y.sse, texCoordOffset1Y.sse); texCoord2X.sse = _mm_add_ps(texCoord2X.sse, texCoordOffset2X.sse); texCoord2Y.sse = _mm_add_ps(texCoord2Y.sse, texCoordOffset2Y.sse); bottomLeftVert.Position = D3DXVECTOR3(posX.floats[3], posY.floats[3], posZ.floats[3]); bottomLeftVert.TexCoord = D3DXVECTOR2(texCoord0X.floats[3], texCoord0Y.floats[3]); bottomLeftVert.Tangent = D3DXVECTOR3(texCoord1X.floats[3], texCoord1Y.floats[3], intensity); bottomLeftVert.Binormal = D3DXVECTOR3(texCoord2X.floats[3], texCoord2Y.floats[3], 0.0f); bottomLeftVert.Color = colorAsD3D; topLeftVert.Position = D3DXVECTOR3(posX.floats[2], posY.floats[2], posZ.floats[2]); topLeftVert.TexCoord = D3DXVECTOR2(texCoord0X.floats[2], texCoord0Y.floats[2]); topLeftVert.Tangent = D3DXVECTOR3(texCoord1X.floats[2], texCoord1Y.floats[2], intensity); topLeftVert.Binormal = D3DXVECTOR3(texCoord2X.floats[2], texCoord2Y.floats[2], 0.0f); topLeftVert.Color = colorAsD3D; bottomRightVert.Position = D3DXVECTOR3(posX.floats[1], posY.floats[1], posZ.floats[1]); bottomRightVert.TexCoord = D3DXVECTOR2(texCoord0X.floats[1], texCoord0Y.floats[1]); bottomRightVert.Tangent = D3DXVECTOR3(texCoord1X.floats[1], texCoord1Y.floats[1], intensity); bottomRightVert.Binormal = D3DXVECTOR3(texCoord2X.floats[1], texCoord2Y.floats[1], 0.0f); bottomRightVert.Color = colorAsD3D; topRightVert.Position = D3DXVECTOR3(posX.floats[0], posY.floats[0], posZ.floats[0]); topRightVert.TexCoord = D3DXVECTOR2(texCoord0X.floats[0], texCoord0Y.floats[0]); topRightVert.Tangent = D3DXVECTOR3(texCoord1X.floats[0], texCoord1Y.floats[0], intensity); topRightVert.Binormal = D3DXVECTOR3(texCoord2X.floats[0], texCoord2Y.floats[0], 0.0f); topRightVert.Color = colorAsD3D; managedBuffer->AddParticleVert(bottomLeftVert, mPoolId); managedBuffer->AddParticleVert(topLeftVert, mPoolId); managedBuffer->AddParticleVert(bottomRightVert, mPoolId); managedBuffer->AddParticleVert(bottomRightVert, mPoolId); managedBuffer->AddParticleVert(topLeftVert, mPoolId); managedBuffer->AddParticleVert(topRightVert, mPoolId); } } void ParticleEmitter::UpdateParticle(Particle *particle, ParticleVertexBuffer *managedBuffer, float deltaTime, const ParticleEmitterWideUpdates& particleUpdates) { float percentThroughLife = particle->CurrentLife / particle->LifeTime; Color colorMultiplier(mColorMultiplier.GetValueForTime(percentThroughLife)); Vector3 scalar = mScale.GetValueForTime(percentThroughLife); particle->Velocity = particle->Velocity + particle->Acceleration * deltaTime; particle->Position = particle->Position + (particle->Velocity * deltaTime) + (mVelocityOverLifetime.GetValueForTime(percentThroughLife) * deltaTime); particle->Rotation += (particleUpdates.rotationalVelocity + particle->RotationRate) * deltaTime; particle->Scale = scalar + particle->InitialScale; particle->ColorMul = colorMultiplier + particle->ColorVar; particle->TexOffsets[0] += particleUpdates.uvScrollDelta[0]; particle->TexOffsets[1] += particleUpdates.uvScrollDelta[0]; particle->TexOffsets[2] += particleUpdates.uvScrollDelta[0]; particle->CurrentLife += deltaTime; D3DXMATRIX rotationMatrix; D3DXVECTOR3 forwardAsD3DX(particleUpdates.particleForward.X, particleUpdates.particleForward.Y, particleUpdates.particleForward.Z); D3DXMatrixRotationAxis(&rotationMatrix, &forwardAsD3DX, particle->Rotation); Vector3 particleUpFinal = Vector3::Transform(particleUpdates.particleUp, rotationMatrix); Vector3 particleRightFinal = Vector3::Transform(particleUpdates.particleRight, rotationMatrix); Vector3 particleRightAmount = particleRightFinal * particle->Scale.X; Vector3 particleUpAmount = particleUpFinal * particle->Scale.Y; Vector2 texBottomLeft[3]; Vector2 texTopRight[3]; if (mTextureAnimation.IsAnimated) { if (mTextureAnimation.FramesPerSecond > 0) { particle->TextureFrameTime += deltaTime; if (particle->TextureFrameTime > ((1.0f / mTextureAnimation.FramesPerSecond) + particle->TextureFrameTimeVar)) { particle->TextureFrameTime = 0.0f; particle->TextureFrame.X += 1; if (particle->TextureFrame.X > (mTextureAnimation.FrameCountX - 1)) { particle->TextureFrame.X = 0; particle->TextureFrame.Y += 1; if (particle->TextureFrame.Y > (mTextureAnimation.FrameCountY - 1)) { particle->TextureFrame.Y = 0; } } } } float xFrame = particle->TextureFrame.X; float yFrame = particle->TextureFrame.Y; float xPos = xFrame / (float)mTextureAnimation.FrameCountX; float x1Pos = (xFrame + 1.0f) / (float)mTextureAnimation.FrameCountX; float yPos = yFrame / (float)mTextureAnimation.FrameCountY; float y1Pos = (yFrame + 1.0f) / (float)mTextureAnimation.FrameCountY; texBottomLeft[0] = mTextureAnimation.IsAnimatedForTextureIndex[0] ? Vector2(xPos, y1Pos) : Vector2(0.0f, 1.0f); texBottomLeft[1] = mTextureAnimation.IsAnimatedForTextureIndex[1] ? Vector2(xPos, y1Pos) : Vector2(0.0f, 1.0f); texBottomLeft[2] = mTextureAnimation.IsAnimatedForTextureIndex[2] ? Vector2(xPos, y1Pos) : Vector2(0.0f, 1.0f); texTopRight[0] = mTextureAnimation.IsAnimatedForTextureIndex[0] ? Vector2(x1Pos, yPos) : Vector2(1.0f, 0.0f); texTopRight[1] = mTextureAnimation.IsAnimatedForTextureIndex[1] ? Vector2(x1Pos, yPos) : Vector2(1.0f, 0.0f); texTopRight[2] = mTextureAnimation.IsAnimatedForTextureIndex[2] ? Vector2(x1Pos, yPos) : Vector2(1.0f, 0.0f); } else { texBottomLeft[0] = Vector2(0.0f, 1.0f); texBottomLeft[1] = Vector2(0.0f, 1.0f); texBottomLeft[2] = Vector2(0.0f, 1.0f); texTopRight[0] = Vector2(1.0f, 0.0f); texTopRight[1] = Vector2(1.0f, 0.0f); texTopRight[2] = Vector2(1.0f, 0.0f); } float intensity = particle->Intensity + mMaterialIntensity.GetValueForTime(percentThroughLife); D3DXVECTOR4 colorAsD3D = particle->ColorMul.AsD3DVector4(); MeshDataD3D bottomLeftVert; MeshDataD3D topLeftVert; MeshDataD3D bottomRightVert; MeshDataD3D topRightVert; SSE_128 posX, posY, posZ; SSE_128 posRightOffsetX, posRightOffsetY, posRightOffsetZ; SSE_128 posUpOffsetX, posUpOffsetY, posUpOffsetZ; SSE_128 texCoord0X, texCoord0Y, texCoord1X, texCoord1Y, texCoord2X, texCoord2Y; SSE_128 texCoordOffset0X, texCoordOffset0Y, texCoordOffset1X, texCoordOffset1Y, texCoordOffset2X, texCoordOffset2Y; posX.sse = _mm_set1_ps(particle->Position.X); posY.sse = _mm_set1_ps(particle->Position.Y); posZ.sse = _mm_set1_ps(particle->Position.Z); posRightOffsetX.sse = _mm_set_ps(-particleRightAmount.X, -particleRightAmount.X, particleRightAmount.X, particleRightAmount.X); posRightOffsetY.sse = _mm_set_ps(-particleRightAmount.Y, -particleRightAmount.Y, particleRightAmount.Y, particleRightAmount.Y); posRightOffsetZ.sse = _mm_set_ps(-particleRightAmount.Z, -particleRightAmount.Z, particleRightAmount.Z, particleRightAmount.Z); posUpOffsetX.sse = _mm_set_ps(-particleUpAmount.X, particleUpAmount.X, -particleUpAmount.X, particleUpAmount.X); posUpOffsetY.sse = _mm_set_ps(-particleUpAmount.Y, particleUpAmount.Y, -particleUpAmount.Y, particleUpAmount.Y); posUpOffsetZ.sse = _mm_set_ps(-particleUpAmount.Z, particleUpAmount.Z, -particleUpAmount.Z, particleUpAmount.Z); posX.sse = _mm_add_ps(posX.sse, posRightOffsetX.sse); posX.sse = _mm_add_ps(posX.sse, posUpOffsetX.sse); posY.sse = _mm_add_ps(posY.sse, posRightOffsetY.sse); posY.sse = _mm_add_ps(posY.sse, posUpOffsetY.sse); posZ.sse = _mm_add_ps(posZ.sse, posRightOffsetZ.sse); posZ.sse = _mm_add_ps(posZ.sse, posUpOffsetZ.sse); texCoord0X.sse = _mm_set_ps(texBottomLeft[0].X, texBottomLeft[0].X, texTopRight[0].X, texTopRight[0].X); texCoord0Y.sse = _mm_set_ps(texBottomLeft[0].Y, texTopRight[0].Y, texBottomLeft[0].Y, texTopRight[0].Y); texCoordOffset0X.sse = _mm_set1_ps(particle->TexOffsets[0].X); texCoordOffset0Y.sse = _mm_set1_ps(particle->TexOffsets[0].Y); texCoord1X.sse = _mm_set_ps(texBottomLeft[1].X, texBottomLeft[1].X, texTopRight[1].X, texTopRight[1].X); texCoord1Y.sse = _mm_set_ps(texBottomLeft[1].Y, texTopRight[1].Y, texBottomLeft[1].Y, texTopRight[1].Y); texCoordOffset1X.sse = _mm_set1_ps(particle->TexOffsets[1].X); texCoordOffset1Y.sse = _mm_set1_ps(particle->TexOffsets[1].Y); texCoord2X.sse = _mm_set_ps(texBottomLeft[2].X, texBottomLeft[1].X, texTopRight[2].X, texTopRight[2].X); texCoord2Y.sse = _mm_set_ps(texBottomLeft[2].Y, texTopRight[1].Y, texBottomLeft[2].Y, texTopRight[2].Y); texCoordOffset2X.sse = _mm_set1_ps(particle->TexOffsets[2].X); texCoordOffset2Y.sse = _mm_set1_ps(particle->TexOffsets[2].Y); texCoord0X.sse = _mm_add_ps(texCoord0X.sse, texCoordOffset0X.sse); texCoord0Y.sse = _mm_add_ps(texCoord0Y.sse, texCoordOffset0Y.sse); texCoord1X.sse = _mm_add_ps(texCoord1X.sse, texCoordOffset1X.sse); texCoord1Y.sse = _mm_add_ps(texCoord1Y.sse, texCoordOffset1Y.sse); texCoord2X.sse = _mm_add_ps(texCoord2X.sse, texCoordOffset2X.sse); texCoord2Y.sse = _mm_add_ps(texCoord2Y.sse, texCoordOffset2Y.sse); bottomLeftVert.Position = D3DXVECTOR3(posX.floats[3], posY.floats[3], posZ.floats[3]); bottomLeftVert.TexCoord = D3DXVECTOR2(texCoord0X.floats[3], texCoord0Y.floats[3]); bottomLeftVert.Tangent = D3DXVECTOR3(texCoord1X.floats[3], texCoord1Y.floats[3], intensity); bottomLeftVert.Binormal = D3DXVECTOR3(texCoord2X.floats[3], texCoord2Y.floats[3], 0.0f); bottomLeftVert.Color = colorAsD3D; topLeftVert.Position = D3DXVECTOR3(posX.floats[2], posY.floats[2], posZ.floats[2]); topLeftVert.TexCoord = D3DXVECTOR2(texCoord0X.floats[2], texCoord0Y.floats[2]); topLeftVert.Tangent = D3DXVECTOR3(texCoord1X.floats[2], texCoord1Y.floats[2], intensity); topLeftVert.Binormal = D3DXVECTOR3(texCoord2X.floats[2], texCoord2Y.floats[2], 0.0f); topLeftVert.Color = colorAsD3D; bottomRightVert.Position = D3DXVECTOR3(posX.floats[1], posY.floats[1], posZ.floats[1]); bottomRightVert.TexCoord = D3DXVECTOR2(texCoord0X.floats[1], texCoord0Y.floats[1]); bottomRightVert.Tangent = D3DXVECTOR3(texCoord1X.floats[1], texCoord1Y.floats[1], intensity); bottomRightVert.Binormal = D3DXVECTOR3(texCoord2X.floats[1], texCoord2Y.floats[1], 0.0f); bottomRightVert.Color = colorAsD3D; topRightVert.Position = D3DXVECTOR3(posX.floats[0], posY.floats[0], posZ.floats[0]); topRightVert.TexCoord = D3DXVECTOR2(texCoord0X.floats[0], texCoord0Y.floats[0]); topRightVert.Tangent = D3DXVECTOR3(texCoord1X.floats[0], texCoord1Y.floats[0], intensity); topRightVert.Binormal = D3DXVECTOR3(texCoord2X.floats[0], texCoord2Y.floats[0], 0.0f); topRightVert.Color = colorAsD3D; managedBuffer->AddParticleVert(bottomLeftVert, mPoolId); managedBuffer->AddParticleVert(topLeftVert, mPoolId); managedBuffer->AddParticleVert(bottomRightVert, mPoolId); managedBuffer->AddParticleVert(bottomRightVert, mPoolId); managedBuffer->AddParticleVert(topLeftVert, mPoolId); managedBuffer->AddParticleVert(topRightVert, mPoolId); }