forked from mikeizbicki/hmm
-
Notifications
You must be signed in to change notification settings - Fork 0
/
BioHMM.hs
128 lines (107 loc) · 3.63 KB
/
BioHMM.hs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
{-# LANGUAGE IncoherentInstances #-}
module HMM.BioHMM
where
import HMM
-- import HMMPerf
import HMMFile
import Control.Monad
import Data.Array
import Debug.Trace
import System.IO
-- applyTFsItr hmm count
-- | count==0 = return hmm
-- | otherwise = do
-- newhmm <- applyTFs hmm
-- applyTFsItr newhmm (count-1)
--
-- applyTFs hmm = do
-- tfL <- loadTF
-- applyLoop hmm tfL
-- where applyLoop hmm' tfL
-- | tfL == [] = return hmm'
-- | otherwise = do
--
-- -- next <- x
-- putStrLn ""
-- putStrLn ("TF: "++show (head tfL))
-- let nexthmm = baumWelch hmm' (listArray (1,length $ head tfL) $ head tfL) 1
-- -- return nexthmm
-- putStrLn $ show hmm'
-- applyLoop nexthmm $ tail tfL
findGenes = do
hmmTF <- loadHMM "hmm/TF-3.hmm"
hmmDNA <- loadHMM "hmm/autowinegrape-1000-2.hmm"
let hmm' = hmmJoin hmmDNA hmmTF 0.999
dna <- loadDNAArray 1000
return $ viterbi hmm' dna
createTFhmm file hmm = do
x <- strTF
let hmm' = baumWelch hmm (listArray (1,length x) x) 10
-- hmmIO <- hmm
putStrLn $ show hmm'
saveHMM file hmm'
return hmm'
-- seq hmm' $ putStrLn $ show hmm'
-- createAllDNAhmm = do
-- len <- [1000,10000,20000,30000]
-- order <- [1,2,3]
-- -- trace (show len ++ "-" ++ show order) $ return 1
-- let file = "hmm/autowinegrape-"++show len++"-"++show order++".hmm"
-- return $ createDNAhmm file len $ simpleMM "AGCT" order
createAllDNAhmm = createAllDNAhmm' [(len,order) | len <- [1000,10000,20000,30000], order <- [1,2,3] ]
where createAllDNAhmm' (x:xs) = do
createDNAitr (fst x) (snd x)
createAllDNAhmm' xs
createDNAitr len order = createDNAhmm ("hmm/autowinegrape-"++show len++"-"++show order++".hmm") (len) $ simpleMM "AGCT" (order)
createDNAhmm file len hmm = do
dna <- loadDNAArray len
let hmm' = baumWelch hmm dna 10
putStrLn $ show hmm'
saveHMM file hmm'
return hmm'
verifyHMMFile file = do
hmm <- ((loadHMM file) :: IO (HMM String Char))
verifyhmm hmm
loadDNAArray len = do
dna <- readFile "dna/winegrape-chromosone2"
let dnaArray = listArray (1,len) $ filter isBP dna
return dnaArray
where
isBP x = if x `elem` "AGCT"
then True
else False
strTF = liftM (concat . map ((++) "")) loadTF
loadTF = liftM (filter isValidStr) $ (liftM lines) $ readFile "TFBindingSites"
isValidStr str = (length str > 0) && (not $ elemChecker "#(/)[]|N" str)
elemChecker :: (Eq a) => [a] -> [a] -> Bool
elemChecker elemList list
| elemList == [] = False
| otherwise = if (head elemList) `elem` list
then True
else elemChecker (tail elemList) list
newHMM = HMM { states=[1,2]
, events=['A','G','C','T']
, initProbs = ipTest
, transMatrix = tmTest
, outMatrix = omTest
}
ipTest s
| s == 1 = 0.1
| s == 2 = 0.9
tmTest s1 s2
| s1==1 && s2==1 = 0.9
| s1==1 && s2==2 = 0.1
| s1==2 && s2==1 = 0.5
| s1==2 && s2==2 = 0.5
omTest s e
| s==1 && e=='A' = 0.4
| s==1 && e=='G' = 0.1
| s==1 && e=='C' = 0.1
| s==1 && e=='T' = 0.4
| s==2 && e=='A' = 0.1
| s==2 && e=='G' = 0.4
| s==2 && e=='C' = 0.4
| s==2 && e=='T' = 0.1
bwTest = do
hmm <- loadHMM "hmm/test" ::IO (HMM String Char)
return $ baumWelch hmm (listArray (1,10) "AAAAAAGTGC") 10