From 4f21d027ef6546a767224ca1acad7e519a89c90f Mon Sep 17 00:00:00 2001 From: OleksandrZhabenko Date: Mon, 28 Dec 2020 13:06:52 +0200 Subject: [PATCH] Update on Hackage --- CHANGELOG.md | 5 ++ LICENSE | 20 +++++ Setup.hs | 2 + String/Languages/UniquenessPeriods/VectorG.hs | 78 +++++++++++++++++++ phonetic-languages-vector.cabal | 25 ++++++ 5 files changed, 130 insertions(+) create mode 100644 CHANGELOG.md create mode 100644 LICENSE create mode 100644 Setup.hs create mode 100644 String/Languages/UniquenessPeriods/VectorG.hs create mode 100644 phonetic-languages-vector.cabal diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..edd0c8a --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,5 @@ +# Revision history for phonetic-languages-vector + +## 0.1.0.0 -- 2020-10-30 + +* First version. Released on an unsuspecting world. diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..2eaab6b --- /dev/null +++ b/LICENSE @@ -0,0 +1,20 @@ +Copyright (c) 2020 OleksandrZhabenko + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be included +in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/Setup.hs b/Setup.hs new file mode 100644 index 0000000..9a994af --- /dev/null +++ b/Setup.hs @@ -0,0 +1,2 @@ +import Distribution.Simple +main = defaultMain diff --git a/String/Languages/UniquenessPeriods/VectorG.hs b/String/Languages/UniquenessPeriods/VectorG.hs new file mode 100644 index 0000000..00541d9 --- /dev/null +++ b/String/Languages/UniquenessPeriods/VectorG.hs @@ -0,0 +1,78 @@ +-- | +-- Module : String.Languages.UniquenessPeriods.VectorG +-- Copyright : (c) OleksandrZhabenko 2020 +-- License : MIT +-- Stability : Experimental +-- Maintainer : olexandr543@yahoo.com +-- +-- Generalization of the uniqueness-periods and uniqueness-periods-general +-- packages functionality. +-- + +{-# LANGUAGE BangPatterns, FlexibleInstances, MultiParamTypeClasses #-} + +module String.Languages.UniquenessPeriods.VectorG where + +import GHC.Int +import qualified Data.Vector as VB + +data UniquenessGeneral1 a b = UG1 a [b] (VB.Vector b) | UG2 a [b] (VB.Vector b) | UG3 [b] (VB.Vector b) deriving Eq + +class UniquenessGeneral a b where + get :: a -> b + +type UniquenessGeneral2 a = VB.Vector ([Int16], a) + +instance (Eq a) => UniquenessGeneral (UniquenessGeneral1 Bool a) (UniquenessGeneral2 a) where + get (UG1 y whspss v) = uniquenessPeriodsVector1 y whspss v + get (UG2 y whspss v) = uniquenessPeriodsVector2 y whspss v + get (UG3 whspss v) = uniquenessPeriodsVector3 whspss v + +-- | List of 'Int16' in the result is a list of indexes for the occurrences of the value of the @a@ (usually, @a@ is a sound representation or its duration). +-- The first 'Bool' argument defines whether to apply the filtering for not informative (possibly) \"whitespace symbols\" given as the +-- second argument list. The resulting 'VB.Vector' is sorted in the order of the first occurrence of each of the @a@ (usually, @a@ is the sound +-- representation, or its duration, or some other its characteristics) in the given third argument. +uniquenessPeriodsVector1 :: Eq a => Bool -> [a] -> VB.Vector a -> UniquenessGeneral2 a +uniquenessPeriodsVector1 y whspss v + | VB.null v = VB.empty + | otherwise = let !v1 = VB.indexed v in + let f !x = if VB.null x then Nothing + else Just . (\(v2,v3) -> ((VB.toList . VB.map fst $ v2,snd . VB.unsafeIndex v2 $ 0),v3)) . + VB.partition (\(_,xs) -> xs == (snd . VB.unsafeIndex x $ 0)) $ x in + (if y then VB.map (\(js,t) -> (map toEnum js,t)) . VB.filter (\(_,!zs) -> zs `notElem` whspss) else VB.map (\(js,t) -> (map toEnum js,t))) . VB.unfoldr f $ v1 + +-- | List of 'Int16' in the result is a list of distances between the consequential occurrences of the @a@ (usually, @a@ is a sound representation or its duration) +-- in the given 'VB.Vector'. The first 'Bool' argument defines whether to apply the filtering for not informative +-- (possibly) \"whitespace symbols\" given as the second argument list. The resulting 'VB.Vector' is sorted in the order of the first occurrence of each of +-- the @a@ (usually, @a@ is the sound representation or its duration, or some other its characteristics) in the given third argument. +uniquenessPeriodsVector2 :: Eq a => Bool -> [a] -> VB.Vector a -> UniquenessGeneral2 a +uniquenessPeriodsVector2 y whspss v + | VB.null v = VB.empty + | otherwise = let !v1 = VB.indexed v in + let f !x = if VB.null x then Nothing + else Just . (\(v2,v3) -> ((VB.toList . (\v4 -> VB.zipWith subtract v4 (VB.unsafeSlice 1 (VB.length v4 -1) v4)) . VB.map fst $ v2,snd . + VB.unsafeIndex v2 $ 0),v3)) . VB.partition (\(_,xs) -> xs == (snd . VB.unsafeIndex x $ 0)) $ x in + (if y then VB.map (\(js,t) -> (map toEnum js,t)) . VB.filter (\(ys,!zs) -> not (null ys) && zs `notElem` whspss) else VB.map (\(js,t) -> (map toEnum js,t))) . VB.unfoldr f $ v1 + +-- | List of 'Int16' in the result is a list of distances between the consequential occurrences of the @a@ (usually, @a@ is a sound representation or its duration) +-- in the given 'VB.Vector'. But unlikely the 'uniquenessPeriodsVector2' function it finds out only the distances for the repeated not \"whitespece symbols\" +-- occurring in different sublists separated with these \"whitespace symbols\". Therefore, it is much more perceptable for the words order than the former one. +-- The resulting 'VB.Vector' is sorted in the order of the first occurrence of each of +-- the @a@ (usually, @a@ is the sound representation or its duration, or some other its characteristics) in the given second argument. +uniquenessPeriodsVector3 :: Eq a => [a] -> VB.Vector a -> UniquenessGeneral2 a +uniquenessPeriodsVector3 whspss v + | VB.null v = VB.empty + | otherwise = let !v1 = VB.indexed v in let !vs = VB.toList . VB.map toEnum . VB.findIndices (`elem` whspss) $ v in + let f !x = if VB.null x then Nothing + else let !idX0 = snd . VB.unsafeIndex x $ 0 in Just . (\vws (v2,v3) -> ((helpUPV3 vws [] . VB.toList . VB.map fst $ v2,snd . + VB.unsafeIndex v2 $ 0),v3)) vs . VB.partition (\(_,xs) -> xs == idX0) $ x in + VB.filter (\(ys,!zs) -> not (null ys) && (zs `notElem` whspss)) . VB.unfoldr f . VB.map (\(j,t) -> (toEnum j,t)) $ v1 + +-- | Is used inside the 'uniquenessPeriodsVector3'. The first and the third list arguments of non-negative numbers (if not empty) must be sorted in the ascending order. +helpUPV3 :: [Int16] -> [Int16] -> [Int16] -> [Int16] +helpUPV3 (z:zs) !acc (x:y:xs) + | compare ((x - z) * (y - z)) 0 == LT = helpUPV3 zs ((y - x):acc) (y:xs) + | compare y z == GT = helpUPV3 zs acc (x:y:xs) + | otherwise = helpUPV3 (z:zs) acc (y:xs) +helpUPV3 _ !acc _ = acc + diff --git a/phonetic-languages-vector.cabal b/phonetic-languages-vector.cabal new file mode 100644 index 0000000..408a25f --- /dev/null +++ b/phonetic-languages-vector.cabal @@ -0,0 +1,25 @@ +-- Initial phonetic-languages-vector.cabal generated by cabal init. For +-- further documentation, see http://haskell.org/cabal/users-guide/ + +name: phonetic-languages-vector +version: 0.1.0.0 +synopsis: A generalization of the functionality of the uniqueness-periods-vector package. +description: Is intended to use more Data.Vector functionality. +homepage: https://hackage.haskell.org/package/phonetic-languages-vector +license: MIT +license-file: LICENSE +author: OleksandrZhabenko +maintainer: olexandr543@yahoo.com +-- copyright: +category: Language +build-type: Simple +extra-source-files: CHANGELOG.md +cabal-version: >=1.10 + +library + exposed-modules: String.Languages.UniquenessPeriods.VectorG + -- other-modules: + other-extensions: BangPatterns, FlexibleInstances, MultiParamTypeClasses + build-depends: base >=4.7 && <4.15, vector >=0.11 && <0.14 + -- hs-source-dirs: + default-language: Haskell2010