From 75e21a00e2634740f35bef42fe5341ffd319760c Mon Sep 17 00:00:00 2001 From: Jesus Recuerda Date: Mon, 10 Jun 2019 16:27:46 +0200 Subject: [PATCH 1/2] Add mass and findBestNOccurrences functions --- src/Interop/DLLMatrix.cs | 45 ++++++++++++++++++++++++++ src/Matrix.cs | 58 +++++++++++++++++++++++++++++++++ test/MatrixTests.cs | 69 +++++++++++++++++++++++++++++++++++++++- 3 files changed, 171 insertions(+), 1 deletion(-) diff --git a/src/Interop/DLLMatrix.cs b/src/Interop/DLLMatrix.cs index 9a07a91..6007917 100644 --- a/src/Interop/DLLMatrix.cs +++ b/src/Interop/DLLMatrix.cs @@ -83,5 +83,50 @@ public static extern void stomp([In] ref IntPtr tssa, [In] ref IntPtr tssb, ref [DllImport(DLLLibrary.KhivaPath, CallingConvention = CallingConvention.Cdecl)] public static extern void stomp_self_join([In] ref IntPtr tss, ref long m, [Out] out IntPtr p, [Out] out IntPtr i); + + /// + /// Mueen's Algorithm for Similarity Search. + /// + /// The result has the following structure: + /// - 1st dimension corresponds to the index of the subsequence in the time series. + /// - 2nd dimension corresponds to the number of queries. + /// - 3rd dimension corresponds to the number of time series. + /// + /// For example, the distance in the position (1, 2, 3) correspond to the distance of the third query to the fourth time + /// series for the second subsequence in the time series. + /// + /// [1] Yan Zhu, Zachary Zimmerman, Nader Shakibay Senobari, Chin-Chia Michael Yeh, Gareth Funning, Abdullah Mueen, + /// Philip Brisk and Eamonn Keogh (2016). Matrix Profile II: Exploiting a Novel Algorithm and GPUs to break the one + /// Hundred Million Barrier for Time Series Motifs and Joins. IEEE ICDM 2016. + /// + /// Array whose first dimension is the length of the query time series and the second dimension + /// is the number of queries. + /// Array whose first dimension is the length of the time series and the second dimension is the + /// number of time series. + /// Resulting distances. + [DllImport(DLLLibrary.KhivaPath, CallingConvention = CallingConvention.Cdecl)] + public static extern void mass([In] ref IntPtr query, [In] ref IntPtr tss, [Out] out IntPtr distances); + + /// + /// Calculates the N best matches of several queries in several time series. + /// The result has the following structure: + /// - 1st dimension corresponds to the nth best match. + /// - 2nd dimension corresponds to the number of queries. + /// - 3rd dimension corresponds to the number of time series. + /// + /// For example, the distance in the position (1, 2, 3) corresponds to the second best distance of the third query in the + /// fourth time series. The index in the position (1, 2, 3) is the is the index of the subsequence which leads to the + /// second best distance of the third query in the fourth time series. + /// + /// Array whose first dimension is the length of the query time series and the second dimension + /// is the number of queries. + /// Array whose first dimension is the length of the time series and the second dimension is the + /// number of time series. + /// Number of matches to return. + /// Resulting distances. + /// Resulting indexes. + [DllImport(DLLLibrary.KhivaPath, CallingConvention = CallingConvention.Cdecl)] + public static extern void find_best_n_occurrences([In] ref IntPtr query, [In] ref IntPtr tss, ref long n, + [Out] out IntPtr distances, [Out] out IntPtr indexes); } } \ No newline at end of file diff --git a/src/Matrix.cs b/src/Matrix.cs index 6a1c6a2..fe4e7cb 100644 --- a/src/Matrix.cs +++ b/src/Matrix.cs @@ -115,5 +115,63 @@ public static Tuple StompSelfJoin(KhivaArray tss, long m KhivaArray.Create(i)); return tuple; } + + /// + /// Mueen's Algorithm for Similarity Search. + /// + /// The result has the following structure: + /// - 1st dimension corresponds to the index of the subsequence in the time series. + /// - 2nd dimension corresponds to the number of queries. + /// - 3rd dimension corresponds to the number of time series. + /// + /// For example, the distance in the position (1, 2, 3) correspond to the distance of the third query to the fourth time + /// series for the second subsequence in the time series. + /// + /// [1] Yan Zhu, Zachary Zimmerman, Nader Shakibay Senobari, Chin-Chia Michael Yeh, Gareth Funning, Abdullah Mueen, + /// Philip Brisk and Eamonn Keogh (2016). Matrix Profile II: Exploiting a Novel Algorithm and GPUs to break the one + /// Hundred Million Barrier for Time Series Motifs and Joins. IEEE ICDM 2016. + /// + /// Array whose first dimension is the length of the query time series and the second dimension + /// is the number of queries. + /// Array whose first dimension is the length of the time series and the second dimension is the + /// number of time series. + /// Resulting distances. + public static KhivaArray Mass(KhivaArray query, KhivaArray tss) + { + var q = query.Reference; + var t = tss.Reference; + DLLMatrix.mass(ref q, ref t, out var distances); + query.Reference = q; + tss.Reference = t; + return KhivaArray.Create(distances); + } + + /// + /// Calculates the N best matches of several queries in several time series. + /// The result has the following structure: + /// - 1st dimension corresponds to the nth best match. + /// - 2nd dimension corresponds to the number of queries. + /// - 3rd dimension corresponds to the number of time series. + /// + /// For example, the distance in the position (1, 2, 3) corresponds to the second best distance of the third query in the + /// fourth time series. The index in the position (1, 2, 3) is the is the index of the subsequence which leads to the + /// second best distance of the third query in the fourth time series. + /// + /// Array whose first dimension is the length of the query time series and the second dimension + /// is the number of queries. + /// Array whose first dimension is the length of the time series and the second dimension is the + /// number of time series. + /// Number of matches to return. + /// Tuple with the resulting distances and indexes. + public static Tuple FindBestNOccurrences(KhivaArray query, KhivaArray tss, long n) + { + var q = query.Reference; + var t = tss.Reference; + DLLMatrix.find_best_n_occurrences(ref q, ref t, ref n, out var distances, out var indexes); + query.Reference = q; + tss.Reference = t; + return Tuple.Create(KhivaArray.Create(distances), + KhivaArray.Create(indexes)); + } } } \ No newline at end of file diff --git a/test/MatrixTests.cs b/test/MatrixTests.cs index 7d37478..e56f085 100644 --- a/test/MatrixTests.cs +++ b/test/MatrixTests.cs @@ -303,5 +303,72 @@ public void TestFindBestNMotifsConsecutive() } } } + + [Test] + public void TestMass() + { + float[] q = {4.0F, 3.0F, 8.0F}; + float[] tss = {10.0F, 10.0F, 10.0F, 11.0F, 12.0F, 11.0F, 10.0F, 10.0F, 11.0F, 12.0F, 11.0F, 14.0F, 10.0F, 10.0F}; + using (KhivaArray qArr = KhivaArray.Create(q), tssArr = KhivaArray.Create(tss)) + { + var distancesArr = Matrix.Mass(qArr, tssArr); + using (distancesArr) + { + float[] expectedDistances = {1.732051F, 0.328954F, 1.210135F, 3.150851F, 3.245858F, 2.822044F, + 0.328954F, 1.210135F, 3.150851F, 0.248097F, 3.30187F, 2.82205F}; + + var distances = distancesArr.GetData3D(); + + for (var index = 0; index < 12; index++) + { + Assert.AreEqual(expectedDistances[index], distances[0, 0, index], 1e-4); + } + } + } + } + + [Test] + public void TestMassMultiple() + { + float[,] q = { { 10, 10, 11, 11 }, { 10, 11, 10, 10 } }; + float[,] tss = { { 10, 10, 10, 11, 12, 11, 10 }, { 10, 11, 12, 11, 14, 10, 10 } }; + using (KhivaArray qArr = KhivaArray.Create(q), tssArr = KhivaArray.Create(tss)) + { + var distancesArr = Matrix.Mass(qArr, tssArr); + using (distancesArr) + { + Assert.AreEqual(4, distancesArr.Dims[0]); + Assert.AreEqual(2, distancesArr.Dims[1]); + Assert.AreEqual(2, distancesArr.Dims[2]); + Assert.AreEqual(1, distancesArr.Dims[3]); + + var distances = distancesArr.GetData3D(); + + Assert.AreEqual(2.57832384, distances[1, 0, 2], 1e-4); + Assert.AreEqual(0.50202721, distances[1, 1, 3], 1e-4); + } + } + } + + [Test] + public void TestFindBestNOccurrences() + { + float[,] q = { { 11, 11, 10, 11 }, { 10, 11, 11, 12 } }; + float[,] tss = { {10, 10, 11, 11, 10, 11, 10, 10, 11, 11, 10, 11, 10, 10}, + {11, 10, 10, 11, 10, 11, 11, 10, 11, 11, 14, 10, 11, 10} }; + using (KhivaArray qArr = KhivaArray.Create(q), tssArr = KhivaArray.Create(tss)) + { + var (distancesArr, indexesArr) = Matrix.FindBestNOccurrences(qArr, tssArr, 1); + using (distancesArr) + using (indexesArr) + { + var distances = distancesArr.GetData3D(); + var indexes = indexesArr.GetData3D(); + + Assert.AreEqual(0.0, distances[0, 0, 0], 1e-4); + Assert.AreEqual(7, distances[0, 0, 0]); + } + } + } } -} \ No newline at end of file +} From 493e3c77ffc39cbbc14071fc561de2d50710eb45 Mon Sep 17 00:00:00 2001 From: Jesus Recuerda Date: Mon, 10 Jun 2019 16:46:06 +0200 Subject: [PATCH 2/2] Add mass and findBestNOccurrences functions --- test/MatrixTests.cs | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/test/MatrixTests.cs b/test/MatrixTests.cs index 4aeddb5..3715e7f 100644 --- a/test/MatrixTests.cs +++ b/test/MatrixTests.cs @@ -359,21 +359,42 @@ public void TestMassMultiple() [Test] public void TestFindBestNOccurrences() + { + float[] q = {10, 11, 12}; + float[,] tss = { {10, 10, 11, 11, 12, 11, 10, 10, 11, 12, 11, 10, 10, 11}, + {10, 10, 11, 11, 12, 11, 10, 10, 11, 12, 11, 10, 10, 11} }; + using (KhivaArray qArr = KhivaArray.Create(q), tssArr = KhivaArray.Create(tss)) + { + var (distancesArr, indexesArr) = Matrix.FindBestNOccurrences(qArr, tssArr, 1); + using (distancesArr) + using (indexesArr) + { + var distances = distancesArr.GetData3D(); + var indexes = indexesArr.GetData3D(); + + Assert.AreEqual(0.00, distances[0, 0, 0], 1e-2); + Assert.AreEqual(7, indexes[0, 0, 0]); + } + } + } + + [Test] + public void TestFindBestNOccurrencesMultiple() { float[,] q = { { 11, 11, 10, 11 }, { 10, 11, 11, 12 } }; float[,] tss = { {10, 10, 11, 11, 10, 11, 10, 10, 11, 11, 10, 11, 10, 10}, {11, 10, 10, 11, 10, 11, 11, 10, 11, 11, 14, 10, 11, 10} }; using (KhivaArray qArr = KhivaArray.Create(q), tssArr = KhivaArray.Create(tss)) { - var (distancesArr, indexesArr) = Matrix.FindBestNOccurrences(qArr, tssArr, 1); + var (distancesArr, indexesArr) = Matrix.FindBestNOccurrences(qArr, tssArr, 4); using (distancesArr) using (indexesArr) { var distances = distancesArr.GetData3D(); - var indexes = indexesArr.GetData3D(); + var indexes = indexesArr.GetData3D(); - Assert.AreEqual(0.0, distances[0, 0, 0], 1e-4); - Assert.AreEqual(7, distances[0, 0, 0]); + Assert.AreEqual(1.83880329, distances[1, 0, 2], 1e-2); + Assert.AreEqual(2, indexes[0, 1, 3]); } } }