Refactor the point cache generation

effekseer · May 4, 2024 · 3288427 · 3288427
1 parent da70d22
commit 3288427
Show file tree

Hide file tree

Showing 23 changed files with 2,399 additions and 2,320 deletions.
diff --git a/Dev/Cpp/Effekseer/Effekseer/Model/PointCacheGenerator.cpp b/Dev/Cpp/Effekseer/Effekseer/Model/PointCacheGenerator.cpp
@@ -0,0 +1,157 @@
+#include <random>
+#include "../SIMD/Int4.h"
+#include "../SIMD/Vec2f.h"
+#include "../SIMD/Vec3f.h"
+#include "../SIMD/Vec4f.h"
+#include "../SIMD/Utils.h"
+#include "Model.h"
+#include "PointCacheGenerator.h"
+
+namespace Effekseer
+{
+
+namespace
+{
+struct Random {
+	std::mt19937 engine;
+	std::uniform_real_distribution<float> dist;
+
+	Random(uint32_t seed) : engine(seed), dist(0.0f, 1.0f) {
+	}
+
+	float operator()() {
+		return dist(engine);
+	}
+};
+
+template <class T>
+T RandomTriangle(Random& random, T a, T b, T c) {
+	float u = random(), v = random();
+	float t = std::min(u, v), s = std::max(u, v);
+	float ma = t, mb = 1.0f - s, mc = s - t;
+	return a * ma + b * mb + c * mc;
+}
+
+inline uint32_t PackNormal(SIMD::Vec3f v)
+{
+	v = v.GetNormal();
+	v = (v + SIMD::Vec3f(1.0f)) * 0.5f * 1023.0f;
+	SIMD::Int4 s = v.s.Convert4i();
+	s = SIMD::Int4::Min(SIMD::Int4::Max(s, SIMD::Int4(0)), SIMD::Int4(1023));
+	uint32_t xyz[3];
+	SIMD::Int4::Store3(xyz, s);
+	return xyz[0] | (xyz[1] << 10) | (xyz[2] << 20);
+}
+
+inline uint32_t PackUV(SIMD::Vec2f v)
+{
+	v *= 65535.0f;
+	SIMD::Int4 s = v.s.Convert4i();
+	s = SIMD::Int4::Min(SIMD::Int4::Max(s, SIMD::Int4(0)), SIMD::Int4(65535));
+	uint32_t xy[2];
+	SIMD::Int4::Store2(xy, s);
+	return xy[0] | (xy[1] << 16);
+}
+
+inline uint32_t PackColor(SIMD::Vec4f v)
+{
+	v *= 255.0f;
+	SIMD::Int4 s = v.s.Convert4i();
+	s = SIMD::Int4::Min(SIMD::Int4::Max(s, SIMD::Int4(0)), SIMD::Int4(255));
+	uint32_t xyzw[4];
+	SIMD::Int4::Store4(xyzw, s);
+	return xyzw[0] | (xyzw[1] << 8) | (xyzw[2] << 16) | (xyzw[3] << 24);
+}
+
+}
+
+// (fp32-x3) Point
+void PointCacheGenerator::SetPointBuffer(void* buffer, size_t stride)
+{
+	pointBuffer_ = reinterpret_cast<uintptr_t>(buffer);
+	pointStride_ = stride;
+}
+
+// (Packed32-x4) Normal, Tangent, UV, Color
+void PointCacheGenerator::SetAttributeBuffer(void* buffer, size_t stride)
+{
+	attribBuffer_ = reinterpret_cast<uintptr_t>(buffer);
+	attribStride_ = stride;
+}
+
+void PointCacheGenerator::SetSourceModel(ModelRef model)
+{
+	model_ = model;
+
+	int32_t frameCount = model_->GetFrameCount();
+	modelFaceAreas_.resize((size_t)frameCount);
+
+	// Calculate the area of all faces 
+	for (int32_t frameIndex = 0; frameIndex < frameCount; frameIndex++)
+	{
+		auto vertexes = model_->GetVertexes(frameIndex);
+		int32_t vertexCount = model_->GetVertexCount(frameIndex);
+		auto faces = model_->GetFaces(frameIndex);
+		int32_t faceCount = model_->GetFaceCount(frameIndex);
+
+		auto& faceAreas = modelFaceAreas_[frameIndex];
+		faceAreas.resize((size_t)faceCount);
+
+		for (int32_t faceIndex = 0; faceIndex < faceCount; faceIndex++)
+		{
+			auto& v0 = vertexes[faces[faceIndex].Indexes[0]];
+			auto& v1 = vertexes[faces[faceIndex].Indexes[1]];
+			auto& v2 = vertexes[faces[faceIndex].Indexes[2]];
+			float r0 = Effekseer::Vector3D::Length(v0.Position - v1.Position);
+			float r1 = Effekseer::Vector3D::Length(v1.Position - v2.Position);
+			float r2 = Effekseer::Vector3D::Length(v2.Position - v0.Position);
+			float s = (r0 + r1 + r2) / 2.0f;
+			float area = sqrt(s * (s - r0) * (s - r1) * (s - r2));
+			totalArea_ += area;
+			faceAreas[faceIndex] = area;
+		}
+	}
+}
+
+void PointCacheGenerator::Generate(uint32_t pointCount, uint32_t seed)
+{
+	Random random(seed);
+
+	int32_t frameCount = static_cast<int32_t>(modelFaceAreas_.size());
+
+	uint32_t pointIndex = 0;
+	float summedArea = 0.0f;
+	for (int32_t frameIndex = 0; frameIndex < frameCount; frameIndex++)
+	{
+		auto vertexes = model_->GetVertexes(frameIndex);
+		int32_t vertexCount = model_->GetVertexCount(frameIndex);
+		auto faces = model_->GetFaces(frameIndex);
+		int32_t faceCount = model_->GetFaceCount(frameIndex);
+
+		auto& faceAreas = modelFaceAreas_[frameIndex];
+
+		for (int32_t faceIndex = 0; faceIndex < faceCount; faceIndex++)
+		{
+			auto& v0 = vertexes[faces[faceIndex].Indexes[0]];
+			auto& v1 = vertexes[faces[faceIndex].Indexes[1]];
+			auto& v2 = vertexes[faces[faceIndex].Indexes[2]];
+
+			summedArea += faceAreas[faceIndex];
+
+			uint32_t genCount = (uint32_t)(summedArea / totalArea_ * pointCount) - pointIndex;
+			for (uint32_t i = 0; i < genCount; i++)
+			{
+				Point* point = reinterpret_cast<Point*>(pointBuffer_ + pointIndex * pointStride_);
+				Attribute* attrib = reinterpret_cast<Attribute*>(attribBuffer_ + pointIndex * attribStride_);
+				point->Position = SIMD::ToStruct(RandomTriangle<SIMD::Vec3f>(random, v0.Position, v1.Position, v2.Position));
+				attrib->PackedNormal = PackNormal(RandomTriangle<SIMD::Vec3f>(random, v0.Normal, v1.Normal, v2.Normal));
+				attrib->PackedTangent = PackNormal(RandomTriangle<SIMD::Vec3f>(random, v0.Tangent, v1.Tangent, v2.Tangent));
+				attrib->PackedUV = PackUV(RandomTriangle<SIMD::Vec2f>(random, v0.UV, v1.UV, v2.UV));
+				attrib->PackedColor = PackColor(RandomTriangle<SIMD::Vec4f>(random, v0.VColor, v1.VColor, v2.VColor));
+				pointIndex += 1;
+			}
+		}
+	}
+}
+
+} // namespace Effekseer
diff --git a/Dev/Cpp/Effekseer/Effekseer/Model/PointCacheGenerator.h b/Dev/Cpp/Effekseer/Effekseer/Model/PointCacheGenerator.h
@@ -0,0 +1,53 @@
+
+#ifndef __EFFEKSEER_POINT_CACHE_GENERATOR_H__
+#define __EFFEKSEER_POINT_CACHE_GENERATOR_H__
+
+#include "../Utils/Effekseer.CustomAllocator.h"
+#include <cstdint>
+#include <vector>
+
+namespace Effekseer
+{
+
+/**
+	@brief PointCache generator
+*/
+class PointCacheGenerator
+{
+public:
+	struct Point
+	{
+		Vector3D Position;
+	};
+	// (fp32-x3) Point
+	void SetPointBuffer(void* buffer, size_t stride);
+
+	struct Attribute
+	{
+		uint32_t PackedNormal;
+		uint32_t PackedTangent;
+		uint32_t PackedUV;
+		uint32_t PackedColor;
+	};
+	// (Packed32-x4) Normal, Tangent, UV, Color
+	void SetAttributeBuffer(void* buffer, size_t stride);
+
+	void SetSourceModel(ModelRef model);
+
+	void Generate(uint32_t pointCount, uint32_t seed);
+
+private:
+	uintptr_t pointBuffer_{};
+	size_t pointStride_{};
+	uintptr_t attribBuffer_{};
+	size_t attribStride_{};
+
+	ModelRef model_;
+	CustomVector<CustomVector<float>> modelFaceAreas_;
+	float totalArea_ = 0.0f;
+
+};
+
+} // namespace Effekseer
+
+#endif
diff --git a/Dev/Cpp/Effekseer/Effekseer/SIMD/Float4.h b/Dev/Cpp/Effekseer/Effekseer/SIMD/Float4.h
@@ -3,6 +3,7 @@
 #define __EFFEKSEER_SIMD_FLOAT4_H__
 
 #include <cstdint>
+#include <array>
 #include "Base.h"
 
 #if defined(EFK_SIMD_NEON)

diff --git a/Dev/Cpp/Effekseer/Effekseer/SIMD/Float4_Gen.h b/Dev/Cpp/Effekseer/Effekseer/SIMD/Float4_Gen.h
@@ -40,6 +40,7 @@ struct alignas(16) Float4
 	Float4() = default;
 	Float4(const Float4& rhs) = default;
 	Float4(float x, float y, float z, float w) { vf[0] = x; vf[1] = y; vf[2] = z; vf[3] = w; }
+	Float4(const std::array<float, 4>& v) { *this = Load4(&v); }
 	Float4(float i) { vf[0] = i; vf[1] = i; vf[2] = i; vf[3] = i; }
 
 	float GetX() const { return vf[0]; }

diff --git a/Dev/Cpp/Effekseer/Effekseer/SIMD/Float4_NEON.h b/Dev/Cpp/Effekseer/Effekseer/SIMD/Float4_NEON.h
@@ -37,6 +37,7 @@ struct alignas(16) Float4
 	Float4(float32x4_t rhs) { s = rhs; }
 	Float4(uint32x4_t rhs) { s = vreinterpretq_f32_u32(rhs); }
 	Float4(float x, float y, float z, float w) { const float f[4] = {x, y, z, w}; s = vld1q_f32(f); }
+	Float4(const std::array<float, 4>& v) { *this = Load4(&v); }
 	Float4(float i) { s = vdupq_n_f32(i); }
 
 	float GetX() const { return vgetq_lane_f32(s, 0); }

diff --git a/Dev/Cpp/Effekseer/Effekseer/SIMD/Float4_SSE.h b/Dev/Cpp/Effekseer/Effekseer/SIMD/Float4_SSE.h
@@ -38,6 +38,7 @@ struct alignas(16) Float4
 	Float4(__m128 rhs) { s = rhs; }
 	Float4(__m128i rhs) { s = _mm_castsi128_ps(rhs); }
 	Float4(float x, float y, float z, float w) { s = _mm_setr_ps(x, y, z, w); }
+	Float4(const std::array<float, 4>& v) { *this = Load4(&v); }
 	Float4(float i) { s = _mm_set_ps1(i); }
 
 	float GetX() const { return _mm_cvtss_f32(s); }

diff --git a/Dev/Cpp/Effekseer/Effekseer/SIMD/Int4.h b/Dev/Cpp/Effekseer/Effekseer/SIMD/Int4.h
@@ -3,6 +3,7 @@
 #define __EFFEKSEER_SIMD_INT4_H__
 
 #include <cstdint>
+#include <array>
 #include "Base.h"
 
 #if defined(EFK_SIMD_NEON)

diff --git a/Dev/Cpp/Effekseer/Effekseer/SIMD/Int4_Gen.h b/Dev/Cpp/Effekseer/Effekseer/SIMD/Int4_Gen.h
@@ -31,6 +31,7 @@ struct alignas(16) Int4
 	Int4() = default;
 	Int4(const Int4& rhs) = default;
 	Int4(int32_t x, int32_t y, int32_t z, int32_t w) { vi[0] = x; vi[1] = y; vi[2] = z; vi[3] = w; }
+	Int4(const std::array<int32_t, 4>& v) { *this = Load4(&v); }
 	Int4(int32_t i) { vi[0] = i; vi[1] = i; vi[2] = i; vi[3] = i; }
 
 	int32_t GetX() const { return vi[0]; }

diff --git a/Dev/Cpp/Effekseer/Effekseer/SIMD/Int4_NEON.h b/Dev/Cpp/Effekseer/Effekseer/SIMD/Int4_NEON.h
@@ -26,6 +26,7 @@ struct alignas(16) Int4
 	Int4(const Int4& rhs) = default;
 	Int4(int32x4_t rhs) { s = rhs; }
 	Int4(int32_t x, int32_t y, int32_t z, int32_t w) { const int32_t v[4] = {x, y, z, w}; s = vld1q_s32(v); }
+	Int4(const std::array<int32_t, 4>& v) { *this = Load4(&v); }
 	Int4(int32_t i) { s = vdupq_n_s32(i); }
 
 	int32_t GetX() const { return vgetq_lane_s32(s, 0); }

diff --git a/Dev/Cpp/Effekseer/Effekseer/SIMD/Int4_SSE.h b/Dev/Cpp/Effekseer/Effekseer/SIMD/Int4_SSE.h
@@ -27,6 +27,7 @@ struct alignas(16) Int4
 	Int4(__m128i rhs) { s = rhs; }
 	Int4(__m128 rhs) { s = _mm_castps_si128(rhs); }
 	Int4(int32_t x, int32_t y, int32_t z, int32_t w) { s = _mm_setr_epi32((int)x, (int)y, (int)z, (int)w); }
+	Int4(const std::array<int32_t, 4>& v) { *this = Load4(&v); }
 	Int4(int32_t i) { s = _mm_set1_epi32((int)i); }
 
 	int32_t GetX() const { return _mm_cvtsi128_si32(s); }

diff --git a/Dev/Cpp/Effekseer/Effekseer/SIMD/Utils.cpp b/Dev/Cpp/Effekseer/Effekseer/SIMD/Utils.cpp
@@ -2,6 +2,7 @@
 #include "../Effekseer.InternalStruct.h"
 #include "../Effekseer.Vector2D.h"
 #include "../Effekseer.Vector3D.h"
+#include "../Effekseer.Color.h"
 #include "Vec2f.h"
 #include "Vec3f.h"
 #include "Vec4f.h"
@@ -35,8 +36,8 @@ Vec3f::Vec3f(const Vector3D& vec)
 {
 }
 
-Vec3f::Vec3f(const std::array<float, 3>& vec)
-	: s(vec[0], vec[1], vec[2], 0.0f)
+Vec4f::Vec4f(const Color& vec)
+	: s(vec.R / 255.0f, vec.G / 255.0f, vec.B / 255.0f, vec.A / 255.0f)
 {
 }
 

diff --git a/Dev/Cpp/Effekseer/Effekseer/SIMD/Vec3f.h b/Dev/Cpp/Effekseer/Effekseer/SIMD/Vec3f.h
@@ -34,7 +34,11 @@ struct Vec3f
 	}
 	Vec3f(const Vector3D& vec);
 	Vec3f(const vector3d& vec);
-	Vec3f(const std::array<float, 3>& vec);
+
+	Vec3f::Vec3f(const std::array<float, 3>& vec)
+		: s(vec[0], vec[1], vec[2], 0.0f)
+	{
+	}
 
 	float GetX() const
 	{

diff --git a/Dev/Cpp/Effekseer/Effekseer/SIMD/Vec4f.h b/Dev/Cpp/Effekseer/Effekseer/SIMD/Vec4f.h
@@ -7,6 +7,8 @@
 
 namespace Effekseer
 {
+
+struct Color;
 
 namespace SIMD
 {
@@ -18,6 +20,8 @@ struct Vec4f
 	Vec4f() = default;
 	Vec4f(const Vec4f& vec) = default;
 	Vec4f(const Float4& vec): s(vec) {}
+	Vec4f(const std::array<float, 4>& vec): s(vec[0], vec[1], vec[2], vec[3]) {}
+	Vec4f(const Color& vec);
 
 	float GetX() const { return s.GetX(); }
 	float GetY() const { return s.GetY(); }
@@ -47,12 +51,24 @@ struct Vec4f
 		return *this;
 	}
 
+	Vec4f& operator*=(float o)
+	{
+		s *= o;
+		return *this;
+	}
+
 	Vec4f& operator/=(const Vec4f& o)
 	{
 		this->s = this->s / o.s;
 		return *this;
 	}
 
+	Vec4f& operator/=(float o)
+	{
+		s *= o;
+		return *this;
+	}
+
 	static Vec4f Sqrt(const Vec4f& i);
 	static Vec4f Rsqrt(const Vec4f& i);
 	static Vec4f Abs(const Vec4f& i);
@@ -69,8 +85,12 @@ inline Vec4f operator-(const Vec4f& lhs, const Vec4f& rhs) { return Vec4f{lhs.s
 
 inline Vec4f operator*(const Vec4f& lhs, const Vec4f& rhs) { return Vec4f{lhs.s * rhs.s}; }
 
+inline Vec4f operator*(const Vec4f& lhs, float rhs) { return Vec4f{lhs.s * rhs}; }
+
 inline Vec4f operator/(const Vec4f& lhs, const Vec4f& rhs) { return Vec4f{lhs.s / rhs.s}; }
 
+inline Vec4f operator/(const Vec4f& lhs, float rhs) { return Vec4f{lhs.s / rhs}; }
+
 inline bool operator==(const Vec4f& lhs, const Vec4f& rhs)
 {
 	return Float4::MoveMask(Float4::Equal(lhs.s, rhs.s)) == 0xf;