-
Notifications
You must be signed in to change notification settings - Fork 1
/
granges_cpg.go
125 lines (110 loc) · 3.8 KB
/
granges_cpg.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
/* Copyright (C) 2016 Philipp Benner
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gonetics
/* -------------------------------------------------------------------------- */
import "fmt"
import "io"
import "log"
import "database/sql"
import _ "github.com/go-sql-driver/mysql"
/* import data from ucsc
* -------------------------------------------------------------------------- */
func ImportCpGIslandsFromUCSC(genome string) GRanges {
/* variables for storing a single database row */
var i_seqname string
var i_from, i_to, i_length, i_cpgNum, i_gcNum int
var i_perCpg, i_perGc, i_obsExp float64
seqnames := []string{}
from := []int{}
to := []int{}
length := []int{}
cpgNum := []int{}
gcNum := []int{}
perCpg := []float64{}
perGc := []float64{}
obsExp := []float64{}
/* open connection */
db, err := sql.Open("mysql",
fmt.Sprintf("genome@tcp(genome-mysql.cse.ucsc.edu:3306)/%s", genome))
if err != nil {
log.Fatal(err)
}
defer db.Close()
err = db.Ping()
if err != nil {
log.Fatal(err)
}
/* receive data */
rows, err := db.Query(
fmt.Sprintf("SELECT chrom, chromStart, chromEnd, length, cpgNum, gcNum, perCpg, perGc, obsExp FROM %s", "cpgIslandExt"))
if err != nil {
log.Fatal(err)
}
defer rows.Close()
for rows.Next() {
err := rows.Scan(&i_seqname, &i_from, &i_to, &i_length, &i_cpgNum, &i_gcNum, &i_perCpg, &i_perGc, &i_obsExp)
if err != nil {
log.Fatal(err)
}
seqnames = append(seqnames, i_seqname)
from = append(from, i_from)
to = append(to, i_to)
length = append(length, i_length)
cpgNum = append(cpgNum, i_cpgNum)
gcNum = append(gcNum, i_gcNum)
perCpg = append(perCpg, i_perCpg)
perGc = append(perGc, i_perGc)
obsExp = append(obsExp, i_obsExp)
}
r := NewGRanges(seqnames, from, to, []byte{})
r.AddMeta("length", length)
r.AddMeta("cpgNum", cpgNum)
r.AddMeta("gcNum", gcNum)
r.AddMeta("perCpg", perCpg)
r.AddMeta("perGc", perGc)
r.AddMeta("obsExp", obsExp)
return r
}
func ReadCpGIslandsFromTable(r io.ReadSeeker) (GRanges, error) {
cpg := GRanges{}
err := cpg.ReadTable(r,
[]string{"length", "cpgNum", "gcNum", "perCpg", "perGc", "obsExp"},
[]string{"[]int", "[]int", "[]int", "[]float64", "[]float64", "[]float64"})
return cpg, err
}
func ImportCpGIslandsFromTable(filename string) (GRanges, error) {
cpg := GRanges{}
err := cpg.ImportTable(filename,
[]string{"length", "cpgNum", "gcNum", "perCpg", "perGc", "obsExp"},
[]string{"[]int", "[]int", "[]int", "[]float64", "[]float64", "[]float64"})
return cpg, err
}
/* -------------------------------------------------------------------------- */
func (regions GRanges) ObservedOverExpectedCpG(genomicSequence StringSet) ([]float64, error) {
r := make([]float64, regions.Length())
for i := 0; i < regions.Length(); i++ {
if sequence, err := genomicSequence.GetSlice(regions.Seqnames[i], regions.Ranges[i]); err != nil {
return nil, err
} else {
// if sequence is nil, it means the fasta file is missing a chromosome
if sequence == nil {
continue
}
r[i] = ObservedOverExpectedCpG(sequence)
}
}
return r, nil
}