-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathCubeImporter.pas
1333 lines (1045 loc) · 51.3 KB
/
CubeImporter.pas
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
UNIT CubeImporter;
{=============================================================================================================
Gabriel Moraru
2016.07
==============================================================================================================
Object capabilities:
+ Import from SCF, FASTA, SEQ, ABI
+ Export to SCF, FASTA, SEQ
===============================================================================}
INTERFACE
USES
Winapi.Windows, System.SysUtils, System.Math, Vcl.Graphics,
CubicDNA, ScfRead, ScfBase, ReadAbi, ReadFasta, ParseGenBankR, CubeBase, CubeBaseSNP, ccCore, ccINIFile, clRamLog, ccRichLog;
TYPE
TCubeImport = class(TCubeAbstractSnp)
private
FParentVer : string;
FUseIntQvAlgo: Boolean; { if true, I will always use my own algorithm to calculate/recalculate QVs. If false, I will use the QV calculated by Phred. If no QV data exista I use internal algorithm to calculate the QV }
FRecallNPeaks: Boolean;
protected
function Boot: Boolean; { Post processing after laoding the object. This function also calls TrimmEngine }
function cloneAndTrim_: TCubeImport; //Check: DO I REALLY NEED IT? Get rid of it { Return a copy of the curent cube with low quality ends (determined with TrimEngine) trimmed }
public
CommentOrig: string; { original comment. Used only for SCF and ABI }
OrigLength : Integer; { Original seq length, before trimming }
ChromaDisplay: TObject; {ASSOCIATED DISPLAY} { pointer to a associated TChromaDisplay object }
constructor Create(aLog: TRamLog);
procedure Clear; override;
{LOG & MESSAGES}
function PropertiesWithName : string; { before I call this I have to apply TrimEngine }
function Properties : string; { before I call this I have to apply TrimEngine }
function ShowEstimatedQuality: string;
function ShowBases : string;
function ShowGoodBases : string;
function ShowBasesAfterTrim : string; { spune cat % din baze au ramas dupa TrimEngine }
function ShowQVExist : string;
function TrustedBasesPercent: Real; { How many trusted bases (bases with QV over GoodQVTresh) are there? }
{IMPORT}
function Import(CONST FileName: string): Boolean; { Fasta and GBK files are not supported because they may contain more than one sample! }
function AssignGBK (CONST Gbk : TGbkObj): Boolean;
function AssignScf (CONST SCF : TScfObj): boolean;
function AssignAbi (CONST ABI : TAbiObj; EditedField: boolean= True): Boolean; { parametrul EditedFiled= arata de unde sa extraga informatia: din campul original sau din campul EDITED }
function AssignSecv (CONST SECV: TFastaObj): Boolean;
property Version: string read FParentVer Write FParentVer; { ce versiune a avut obiectul parinte. Exemplu, pt SCF: '3.00' }
{EXPORT}
function ExportBack (CONST Clean: Boolean; OUT BasesLeft: Integer): Boolean; { Save the object back to disk, in its original format. This will overwrite the parent sample }
function ExportAs (CONST aFileName: string; Clean: Boolean; OUT BasesLeft: Integer): Boolean; { Autodetecteaza tipul secventei din numele fisierului. QvAware=True transfera numai cu bazele QV bun }
procedure ExportAsFASTA (CONST Clean: Boolean; OUT BasesLeft: Integer); overload; { uses cube's name and changes its extension to 'fasta' }
procedure ExportAsFASTA (CONST FileName : string; Clean: Boolean; OUT BasesLeft: Integer); overload;
procedure ExportAsSecv (CONST NewName : string; Clean: Boolean; OUT BasesLeft: Integer);
procedure ExportToSCF (CONST aFileName: string; Clean: Boolean); overload; { This will remove all GAPs }
function ExportToSCF (CONST Clean: Boolean): string; overload; { Returns the name used to save the file }
function AsFasta (CONST aFileName: string; Clean: Boolean): TFastaObj; overload; { Return a TFastaObj object built from this cube }
function AsFasta (CONST Clean: Boolean): TFastaObj; overload; { Same as above but the user doesnt have to provide a file name. It is automatically assumed from Cube's name }
{BUILD}
procedure BuildFromScratch (CONST sBases: BaseString; CONST sFullName, sComments: string); { Build a cube from the given parameters }
property RecomputeQVs : Boolean read FUseIntQvAlgo write FUseIntQvAlgo default TRUE; { if true, I will always use my own algorithm to calculate/recalculate QVs. If false, I will use the QV calculated by Phred. If no QV data exista I use internal algorithm to calculate the QV }
property RecallNPeaks : Boolean read FRecallNPeaks write FRecallNPeaks default TRUE;
{DEBUG}
function DebugToBmp (ShowLinks: Boolean; YDivider, XMultiplier: integer): TBitmap; { creates a bitmap that shows the internal laying of the chromatogram }
procedure DebugToBmpSave(SaveTo: string; ShowLinks: Boolean; CONST YDivider: Integer= 10; CONST XMultiplier: Integer= 5); { creates a bitmap that shows the internal laying of the chromatogram. 10 is a good default value for YDivider. if Path is empty, the routine will autogenerate the path }
procedure DebugToTxt (Path: string);
procedure FakeChroma (PeakHeight: integer; Randomiz: boolean); { FAKE A CHROMA - PeakHeight should be about set at 700. ATENTIE: trebuie sa apelez "Randomize" inainte a a apela "FakeChroma" }
procedure SaveChromaAsBmp(CONST Nume: string);
end;
IMPLEMENTATION
USES
ccAppData, ccIO, cmMath, SCFWrite;
{===============================================================================
CONSTRUCTOR / DESTRUCTOR
===============================================================================}
constructor TCubeImport.Create;
begin
inherited Create(aLog);
FUseIntQvAlgo := TRUE; { If true, it means that the internal algorithm was used to compute the QVs }
FRecallNPeaks := TRUE;
end;
{ Call Clear when:
* create an abstract cube (TCubeBase, TCubeImport, TCubObj), then call Clear immediately after Create.
* create a persistent cube (TCubObjEx), then call Clear when I call LoadFromFile or Assign (if I ever implement Assign). }
procedure TCubeImport.Clear;
begin
inherited Clear; { I have to call inherited. When I call clear in the code, I must also clear the parents of this object }
Version := ''; { what version the parent object had. Example, for SCF: '3.00' }
CommentOrig:= '';
OrigLength := 0;
end;
function TCubeImport.Boot: Boolean; { Post processing after laoding the object. This function also calls TrimmEngine }
procedure CheckQVExists;
VAR i: Integer;
begin
if NOT HasChroma
then FQVExist:= FALSE
else
for i:= 1 TO NoOfBases DO
if CellQV[i] > 1 then { I chose 1 instead of 0, to treat case where QV field was: 0, 1, 0, 1... ) }
begin
FQVExist:= TRUE;
Break;
end; { ReturnInfo_:= 'File name: '+ ShortName +CRLF+ 'File path: '+ ExtractFilePath(ParentFileName) +LBRK+ ABI.ReturnInfo1}
end;
begin
Result:= TRUE;
{ CHECK AGAINST TINY CHROMAS } { see 'amoA pCC1 094.scf' }
if NoOfBases<= ctMinimimSequence then
begin
RamLog.AddWarn('This sample has less than 10 bases!');
EXIT(FALSE);
end;
{ QV exists? }
CheckQVExists;
{ BASES }
DirtyBases:= TRUE;
OrigLength:= NoOfBases;
if HasChroma AND (NoOfBases> 0) then
begin { Make sure that all samples start and end at the ground }
Chroma[ctChromaIndex].HeightA:= 1;
Chroma[ctChromaIndex].HeightC:= 1;
Chroma[ctChromaIndex].HeightG:= 1;
Chroma[ctChromaIndex].HeightT:= 1;
Chroma[NoOfSamples] .HeightA:= 1;
Chroma[NoOfSamples] .HeightC:= 1;
Chroma[NoOfSamples] .HeightG:= 1;
Chroma[NoOfSamples] .HeightT:= 1;
end;
ReasignPeaks; { For some strange reasons, in ABI files, the pointers between the base and the peak are placed few points BEFORE the top of the dome (the real peak). So I have to recalculate their position }
{ CALCULATE QVs }
if HasChroma then
begin
if RecallNPeaks
then RecallNBases; { Recall the N bases }
if NOT QVExist
then CalculateQVs { No QV included. Force CalculateQVs }
else if RecomputeQVs { If 'Prefer internal base caller' then force CalculateQVs }
then CalculateQVs;
end;
buildBases;
{ TRIM ENGINE }
TrimEnds;
end;
{===============================================================================
IMPORTER
This calls TrimEnds and DetectVectors
Note: Fasta and GBK files might contain more than one sample! Only the first sample will be loaded
===============================================================================}
function TCubeImport.Import(CONST FileName: string): Boolean;
VAR
unSCF: TScfRead;
unABI: TAbiObj;
Fasta: TFastaObj;
begin
Result:= FALSE; { Must be initialized otherwise it keeps in STACK the value on the previous run }
{ LOAD SCF }
if IsScf(FileName) then
begin
unSCF:= TScfRead.Create(RamLog);
TRY
if unSCF.LoadFromFile(FileName)
then Result:= AssignScf(unSCF);
FINALLY
FreeAndNil(unSCF);
END
end
else
{ LOAD ABI }
if IsAbi(FileName) then
BEGIN
unABI:= TAbiObj.Create(RamLog);
TRY
if unABI.LoadFromFile(FileName, FALSE)
then Result:= AssignAbi(unABI, TRUE);
FINALLY
FreeAndNil(unABI);
END
END
else
{ LOAD FASTA/SEQ/TXT/GBK }
if IsPlainText(FileName) then
BEGIN
Fasta:= TFastaObj.Create(RamLog);
TRY
if Fasta.LoadFromFile(FileName, FALSE)
then Result:= AssignSecv(Fasta);
FINALLY
FreeAndNil(Fasta);
END
END
else
RamLog.addError('Unsupported file: '+ FileName); { Only for those with chromatograms }
//del - this is done in Boot -> buildBases;
end;
{===============================================================================
ASSIGN ABI
===============================================================================}
function TCubeImport.AssignAbi(CONST ABI: TAbiObj; EditedField: boolean= True): Boolean; { the EditedFiled= parameter shows where to extract the information: from the original field or from the EDITED field }
VAR
ProbRec: RBaseProb;
procedure ImportBases(Side: TSide); { Import Bases and QVs }
VAR
CurBase: Integer;
Ptr2Smpl: Integer;
QV: Byte;
begin
NoOfBases := Side.NrOfBases; { CellMX is indexed in 1 not 0 }
CellMxReset; { I must call this AFTER I set NoOfBases }
Assert(Length(Chroma)= NoOfSamples+1, 'Invalid chroma size.');
for CurBase:= ctCellsIndex to NoOfBases DO
begin
{ Import BASE }
Base[CurBase]:= tbase(Side.BaseArray[CurBase]); { set Base, but it doesn't call 'Changed' }
{ Import POINTERS }
Ptr2Smpl:= Side.Ptr2Smpl[CurBase]; { must be equal to the number of bases, so the association is from BASES TO SAMPLES }
if Ptr2Smpl >= Length(Chroma) { This fixes the Russell case}
then Ptr2Smpl:= Length(Chroma)-1;
Chroma[Ptr2Smpl].Ptr2Base := CurBase; { BASE POS }
Base2Smpl[CurBase]:= Ptr2Smpl;
end;
{ Import QV }
if Length(Side.QvMX) > 0 then { Some ABI files will not have QV info, so the QvMx will be empty }
begin
Assert(NoOfBases= Length(Side.QvMX), 'NoOfBases different than QVCount!');
for CurBase:= ctCellsIndex to NoOfBases DO
begin
QV:= Side.QvMX[CurBase-IndexDiff];
CellQV[CurBase]:= QV;
{ Convert ABI QV to SCF probabilities } { SCF format has this feature but ABI format don't }
if UpCase(Base[CurBase])= 'N' then { For N, all bases have same probability (same QV) }
begin
ProbRec.A:= QV;
ProbRec.C:= QV;
ProbRec.G:= QV;
ProbRec.T:= QV;
end
else
begin
FillChar(ProbRec, SizeOf(ProbRec), #0); { Necessary because records are not automatically initialized! }
case UpCase(Base[CurBase]) of
'A': ProbRec.A:= QV;
'C': ProbRec.C:= QV;
'G': ProbRec.G:= QV;
'T': ProbRec.T:= QV;
end;
end;
CellsMX[CurBase].CellProb:= ProbRec;
end;
end;
end;
{ Import traces (into samples) }
procedure ImportTraces(Trace: TAbiTrace);
VAR j: Integer;
begin
case Trace.Name of
'A': for j:= 0 to NoOfSamples-1 DO Chroma[j+1].HeightA:= Trace.points[j];
'C': for j:= 0 to NoOfSamples-1 DO Chroma[j+1].HeightC:= Trace.points[j];
'G': for j:= 0 to NoOfSamples-1 DO Chroma[j+1].HeightG:= Trace.points[j];
'T': for j:= 0 to NoOfSamples-1 DO Chroma[j+1].HeightT:= Trace.points[j];
end;
end;
begin
Clear; { THIS IS MANDATORY in order to clear the previous data (for example pointerMX ) loaded in Cube. I tested the program without it and didn't worked. }
TRY
FileName:= ABI.FileName;
Comment := '> '+ ScreenName; { ABI has no comment field so 'Comment' will be empty } { Only FASTA files have relevant comments. For SCF and ABI we ignore the comments. Cristina}
CommentOrig:= string(ABI.H.AbiTags);
NoOfSamples:= ABI.NoOfSamples; { This MUST be ABOVE 'import bases' }
{ Import Bases and QVs }
if EditedField
then ImportBases(ABI.Side1)
else ImportBases(ABI.Side2);
{ Import traces (into samples) }
ImportTraces(ABI.Trace1);
ImportTraces(ABI.Trace2);
ImportTraces(ABI.Trace3);
ImportTraces(ABI.Trace4);
Result:= Boot;
EXCEPT
on E : Exception DO
begin
RamLog.AddError(E.Message + CRLF+ 'Invalid ABI file. Please report this error to us.');
Result:= FALSE;
end;
END;
end;
{===============================================================================
ASSIGN SCF
===============================================================================}
function TCubeImport.AssignScf(const SCF: TScfObj): Boolean;
VAR i, CurBase: Integer;
ProbRec: RBaseProb;
iPointer: Cardinal;
begin
Clear; { THIS IS MANDATORY in order to clear the previous data (for example pointerMX ) loaded in Cube. I tested the program without it and didn't worked. }
TRY
ParentType := bfSCF;
NoOfSamples := SCF.NrOfSamples; { reserve memory }
NoOfBases := SCF.NoOfBases; { reserve memory }
Version := String(SCF.H.Version);
FileName := SCF.FileName;
Comment := '> '+ ScreenName;
CommentOrig := SCF.Comments; { original comment. Used only for SCF and ABI } { Some scf files have no comment, others have something like this: NAME=CMO_59 LANE=1 SIGN=A=652,C=962,G=869,T=873 }{ Only FASTA files have relevant comments. For SCF and ABI we ignore the comments. Cristina}
CellMxReset;
for CurBase:= ctCellsIndex to NoOfBases DO { both matrices (Chroma and TempQV) are indexed in 1 }
begin
{ Import BASE }
Base[CurBase]:= tbase(SCF.BaseArray[CurBase].Base);
{ Import pointers }
iPointer:= SCF.BaseArray[CurBase].Ptr2Smpl;
if iPointer> SCF.NrOfSamples { 'Amoa sample' special case }
then iPointer:= 1;
Chroma[iPointer].Ptr2Base := CurBase;
Base2Smpl[CurBase]:= iPointer;
{ Import probabilities }
ProbRec.A:= SCF.BaseArray[CurBase].prob_A;
ProbRec.C:= SCF.BaseArray[CurBase].prob_C;
ProbRec.G:= SCF.BaseArray[CurBase].prob_G;
ProbRec.T:= SCF.BaseArray[CurBase].prob_T;
CellsMX[CurBase].CellProb:= ProbRec;
end;
{ Compute QVs for all bases }
MakeQVFromProbability;
{ Import trace-urile }
for i:= 0 to NoOfSamples-1 DO { SCF is indexed in 0 }
begin
Chroma[i+ ctChromaIndex].HeightA:= SCF.TraceA[i]; { +1 because my Chroma matrix is indexed in 1 }
Chroma[i+ ctChromaIndex].HeightC:= SCF.TraceC[i];
Chroma[i+ ctChromaIndex].HeightG:= SCF.TraceG[i];
Chroma[i+ ctChromaIndex].HeightT:= SCF.TraceT[i];
end;
{ In v3.10 QV-urile sunt stocate ca Log10 }
if Version>= '3.10'then
for i:= 0 to NoOfSamples-1 DO
begin
CellQV [i+1] := round(-10 * Log10(CellQV[i+1]));
if CellQV [i+1] < 1
then CellQV [i+1] := 0;
end;
Result:= Boot;
EXCEPT
RamLog.AddError('SCF file cannot be imported. Please report this error to us.');
Result:= FALSE;
END;
end;
function TCubeImport.ExportToSCF(CONST Clean: Boolean): string; { Returns the name used to save the file. }
begin
Result:= ChangeFileExt(FileName, '.scf');
ExportToSCF(Result, Clean); { This will remove all GAPs }
end;
procedure TCubeImport.ExportToSCF(CONST aFileName: string; Clean: Boolean); { This will remove all GAPs }
VAR UnScf: TScfWriter;
CurSmpl, CubBase, ScfBase, Ptr2Smpl: Integer;
Duplicate: TCubeImport;
Base: TBase;
begin
if Clean AND FQVExist
then Duplicate:= cloneAndTrim_ { CloneAndTrim returns a copy of the curent cube with low quality ends (determined with TrimEnds) trimmed. The caller has to free the cube. }
else Duplicate:= Self;
UnScf:= TScfWriter.Create(RamLog);
TRY
UnScf.NrOfSamples := Duplicate.NoOfSamples; { This will make: SetLength }
UnScf.NoOfBases := Duplicate.NoOfBases; { This will make: SetLength(TempQV, H.NoOfBases+ IndexedIn1) }
UnScf.H.Version := '3.00';
UnScf.Comments := Duplicate.CommentOrig;
UnScf.H.SampleSize := 2;
UnScf.H.ClipLeft := 0; { Obsolete. We don't care. I just initialize them to 0 so when I write this field to disk it won't write a random value }
UnScf.H.ClipRight := 0;
{ Transfera bazele }
ScfBase:= ctCellsIndex;
for CubBase:= ctCellsIndex to Duplicate.NoOfBases DO
begin
Base:= Duplicate.Base[CubBase];
if Base<> GAP
then { Skip GAPs }
begin
Ptr2Smpl:= Duplicate.Base2Smpl[CubBase];
Assert(Ptr2Smpl<= Duplicate.NoOfSamples, ' Ptr2Smpl= '+i2s(Ptr2Smpl));
UnSCF.BaseArray[ScfBase].Ptr2Smpl:= Ptr2Smpl;
UnSCF.BaseArray[ScfBase].Base:= Ansichar(Base);
{ Transfer SCF probabilities } { SCF supports per-base probabilities }
if ParentType= bfSCF then
begin
UnSCF.BaseArray[ScfBase].prob_A:= Duplicate.CellProbab[CubBase].A;
UnSCF.BaseArray[ScfBase].prob_C:= Duplicate.CellProbab[CubBase].C;
UnSCF.BaseArray[ScfBase].prob_G:= Duplicate.CellProbab[CubBase].G;
UnSCF.BaseArray[ScfBase].prob_T:= Duplicate.CellProbab[CubBase].T;
end;
{ Transfer QVs } { However, if a base is edited in DNA Baser editor (or by the internal base caller), the probability is stored in the QV field not in the Probabilities field. So, I need to stansfer also the QVs }
case Duplicate.Base[CubBase] of
'A': UnSCF.BaseArray[ScfBase].prob_A:= Duplicate.CellQV[CubBase];
'C': UnSCF.BaseArray[ScfBase].prob_C:= Duplicate.CellQV[CubBase];
'G': UnSCF.BaseArray[ScfBase].prob_G:= Duplicate.CellQV[CubBase];
'T': UnSCF.BaseArray[ScfBase].prob_T:= Duplicate.CellQV[CubBase];
else
begin
UnSCF.BaseArray[ScfBase].prob_A:= Duplicate.CellQV[CubBase];
UnSCF.BaseArray[ScfBase].prob_C:= Duplicate.CellQV[CubBase];
UnSCF.BaseArray[ScfBase].prob_G:= Duplicate.CellQV[CubBase];
UnSCF.BaseArray[ScfBase].prob_T:= Duplicate.CellQV[CubBase];
end;
end;
Inc(ScfBase);
end
else EmptyDummy; { Skip GAPs }
end;
UnScf.NoOfBases:= ScfBase-1; { Take into account the removed GAPs }
{ Transfera trace-urile }
for CurSmpl:= 0 to Duplicate.NoOfSamples-1 DO { In SCF ChromaMX e indexata in 0 }
begin
UnSCF.TraceA[CurSmpl]:= Duplicate.Chroma[CurSmpl+ ctChromaIndex].HeightA;
UnSCF.TraceC[CurSmpl]:= Duplicate.Chroma[CurSmpl+ ctChromaIndex].HeightC;
UnSCF.TraceG[CurSmpl]:= Duplicate.Chroma[CurSmpl+ ctChromaIndex].HeightG;
UnSCF.TraceT[CurSmpl]:= Duplicate.Chroma[CurSmpl+ ctChromaIndex].HeightT;
end;
UnScf.SaveToFile(aFileName);
FINALLY
FreeAndNil(UnScf);
if Duplicate<> Self //del Clean AND FQVExist
then FreeAndNil(Duplicate);
END;
end;
function TCubeImport.cloneAndTrim_: TCubeImport; { Returns a copy of the curent cube with low quality ends (determined with TrimEnds) trimmed. The caller has to free the cube. }
VAR
b1, b2, SampleDiff, BaseDiff: Integer;
FromBase, ToBase: Integer;
FromSample, ToSample: Integer;
Sample1, Sample2: Integer;
begin
Assert(FQVExist);
Result:= TCubeImport.Create(RamLog);
{ FROM/TO }
CleanFromTo(FromBase, ToBase); { Shows from which to which base I have to cut in order to remove the vectors and the low quality ends }
{ Too short? } { If the sequence left after trimming is to short then I force it to have at least 5 good bases. }
if ToBase-FromBase < ctMinimimSequence then
begin
FromBase:= NoOfBases DIV 2;
ToBase:= FromBase+ ctMinimimSequence;
if ToBase > NoOfBases
then ToBase:= NoOfBases; // RAISE Exception.Create('Chromatogram is too short! '+ FileName);
end;
FromSample:= Base2Smpl[FromBase] - 8; { 6 is half of average peak distance (12) but we take a bit more because some somes are larger than 12 samples. We need it in order to copy the left side of the dome. Otherwise we start copy samples wight in the middle of the dome }
ToSample := Base2Smpl[ToBase] + 8;
if FromSample < ctChromaIndex
then FromSample:= ctChromaIndex;
if ToSample> NoOfSamples
then ToSample:= NoOfSamples;
{cube PROPERTIES}
Result.NoOfBases := 1+ ToBase- FromBase;
Result.NoOfSamples := NoOfSamples; { DUMMY CALL. I need it because in TCubeAbstract.GetPtr2Smpl I have an Assertion that blows. I make the REAL assignment few lines down down }
Result.FQVExist := QVExist;
Result.Comment := Comment;
Result.Reversed := Reversed;
{BASES/QV}
Result.EngTrim1 := EngTrim1; { the left end - for the moment it is used for both ends }
Result.EngTrim2 := EngTrim2; { the right end }
{CUT IT}
b1 := 0;
SampleDiff:= FromSample-1;
BaseDiff := FromBase-1;
{ Copy from specified bases }
for b2:= FromBase to ToBase DO
begin
Inc(b1);
Result.CellSet(b1, Cell(b2));
{ Refac pointerii }
Result.Base2Smpl[b1]:= Result.Base2Smpl[b1]- SampleDiff;
end;
Sample1:= 0;
for Sample2:= FromSample to ToSample DO
begin
Inc(Sample1);
Result.Chroma[Sample1]:= Chroma[Sample2];
{ Refac pointerii }
if Result.Chroma[Sample1].Ptr2Base > 0
then Result.Chroma[Sample1].Ptr2Base:= Result.Chroma[Sample1].Ptr2Base- BaseDiff;
end;
DirtyBases:= TRUE;
DetectVectors;
Result.NoOfSamples:= 1+ ToSample- FromSample;
Result.GoodQVStart:= 1;
//Result.GoodQVEndS:= NoOfSamples;
Result.TrimEnds; { Calls GetGoodBases, GoodQVStart, GoodQVEnd, GoodQVStaSmpl, GoodQVEndSmpl si CHANGED }
end;
{ TRANSFER from plain sequence (SECV) }
function TCubeImport.AssignSecv(CONST SECV: TFastaObj): Boolean;
begin
Result:= FALSE;
Clear; { THIS IS MANDATORY in order to clear the previous data (for example pointerMX ) loaded in Cube. I tested the program without it and didn't worked. }
if (SECV.NoOfBases<= 1) then
begin
RamLog.AddError('Invalid file: the sample is empty.');
EXIT(FALSE);
end;
TRY
{ FILE }
FileName := SECV.FileName; { Note: I should use SECV.ParentName here but in this case it won't see the names assigned to multi-samples by the GenerateVirtualNames (for multi-fasta files) }
Comment := SECV.Comment;
IsPart := SECV.IsPart;
{ cube }
NoOfBases := SECV.NoOfBases;
CellMxReset; { Set pointers and QVs to 0 }
NoOfSamples := 0;
FQVExist := FALSE;
{ Importa bazele }
setBases(SECV.BASES);
Result:= Boot;
EXCEPT
on E: Exception
DO RamLog.AddError('Error while trying to read the sample. '+ E.Message);
END;
end;
{ Import from GBK }
function TCubeImport.AssignGBK(CONST Gbk: TGbkObj): Boolean;
begin
Clear; { THIS IS MANDATORY in order to clear the previous data (for example pointerMX ) loaded in Cube. I tested the program without it and didn't worked. }
if (Gbk.NoOfBases<= 1) then
begin
RamLog.AddError('Invalid file: the sample is empty.');
EXIT(FALSE);
end;
TRY
{FILE}
FileName:= Gbk.FileName;
Comment := Gbk.Comment;
IsPart := Gbk.IsPart;
{cube}
NoOfBases:= Gbk.NoOfBases; { reserve memory }
CellMxReset;
NoOfSamples:= 0;
FQVExist := FALSE;
{Import bases}
setBases(Gbk.BASES);
Result:= Boot;
EXCEPT
RamLog.AddError('Invalid GBK file. Please report this error to us.');
Result:= FALSE;
END;
end;
{===============================================================================
EXPORT IT IN THE ORIGINAL FORMAT
===============================================================================}
function TCubeImport.ExportBack(CONST Clean: Boolean; OUT BasesLeft: Integer): Boolean; { Save the object back to disk, in its original format. This will overwrite the parent sample. Useful after the user changed some bases in it }
begin
Result:= TRUE;
case ParentType of
bfSCF: Result:= ExportToSCF (Clean)= ''; { This will remove all GAPs }
bfTXT: ExportAsSecv (FileName, Clean, BasesLeft);
bfSEQ: ExportAsSecv (FileName, Clean, BasesLeft);
bfFAS: begin Result:= TRUE; ExportAsFASTA(Clean, BasesLeft); end;
else
Result:= FALSE; { bfABI, bfGBK, bfNone }
end;
end;
{===============================================================================
EXPORT TO A DIFFERENT FORMAT
===============================================================================}
function TCubeImport.ExportAs(CONST aFileName: string; Clean: Boolean; OUT BasesLeft: Integer): Boolean; { Autodetecteaza tipul secventei din numele fisierului. QvAware=True transfera numai cu bazele QV bun }
begin
Result:= TRUE;
if IsScf(aFileName)
then ExportToSCF(aFileName, Clean) { This will remove all GAPs }
else
if IsPlainText(aFileName)
then ExportAsSecv(aFileName, Clean, BasesLeft)
else Result:= FALSE; { nu am putut sa salvez pt ca nu recunosc extensia asta }
end;
procedure TCubeImport.ExportAsSecv(CONST NewName: string; Clean: Boolean; OUT BasesLeft: Integer); { 'NewName' is full path. Clean means "Remove low quality ends and vectors" }
VAR Secv: TFastaObj;
begin
Secv:= AsFasta(NewName, Clean);
TRY
BasesLeft:= Secv.NoOfBases;
Secv.Save_AutoDetect;
FINALLY
FreeAndNil(Secv);
END;
end;
procedure TCubeImport.ExportAsFASTA(CONST Clean: Boolean; OUT BasesLeft: Integer); { uses cube's name and changes its extension to 'fasta' }
begin
ExportAsFASTA(ChangeFileExt(FileName, '.FASTA'), Clean, BasesLeft);
end;
procedure TCubeImport.ExportAsFASTA(CONST FileName: string; Clean: Boolean; OUT BasesLeft: Integer);
VAR Secv: TFastaObj;
begin
Assert(SameText(ExtractFileExt(FileName), '.fasta'), 'File ext MUST be Fasta');
Secv:= AsFasta(FileName, Clean);
TRY
BasesLeft:= Secv.NoOfBases;
Secv.Save(TRUE);
FINALLY
FreeAndNil(Secv);
END;
end;
function TCubeImport.AsFasta(CONST aFileName: string; Clean: Boolean): TFastaObj; { Return a TFastaObj object built from this cube. The caller needs to free it }
begin
Result:= TFastaObj.Create(RamLog);
{ Remove low quality ends and vectors } { It also removes GAPS }
if Clean
then Result.BASES := GoodBasesNoVector
else Result.BASES := Bases;
Result.Comment := ValidateComment(Comment);
Result.FileName:= aFileName;
end;
function TCubeImport.AsFasta(CONST Clean: Boolean): TFastaObj; { Same as above but the user doesnt have to provide a file name. It is automatically assumed from Cube's name }
begin
Result:= AsFasta(ChangeFileExt(FileName, '.FASTA'), Clean);
end;
{--------------------------------------------------------------------------------------------------
PROPERTIES
--------------------------------------------------------------------------------------------------}
function TCubeImport.PropertiesWithName: string; { inainte sa apelez asta trebuie sa aplic TrimEnds }
begin
Result:= 'FILE: '+ CRLF
+ ' '+ ShortName+ crlf
+ ' '+ Properties;
end;
function TCubeImport.Properties: string; { inainte sa apelez asta trebuie sa aplic TrimEnds }
VAR Tags, sComment: string;
Proc: Double;
begin
Result:= '';
if HasChroma
then
begin
Proc:= PercentGC(Bases);
Result:= Result+ CRLF+ 'GC: '+ Real2Str(Proc, 2)+ '%';
Result:= Result+ CRLF+ 'AT: '+ Real2Str(100- Proc, 2)+ '%';
Result:= Result+ CRLF+ ShowBases;
Result:= Result+ CRLF+ CRLF+ 'QUALITY INFO:';
if QvComputedInternally { If true, it means that the internal algorithm was used to compute the QVs }
then Result:= Result+ CRLF+ ' Base quality values (QV): computed by '+ AppData.AppName
else Result:= Result+ CRLF+ ' Base quality values (QV): present.';
Result:= Result+ CRLF+ ' '+ ShowBasesAfterTrim;
if QVExist then
begin
Result:= Result+ CRLF+ ' Average quality (before trimming): '+ i2s(AverageQVAll); { Media QV-urilor pt toata Chromatograma (low qv ends included) }
Result:= Result+ CRLF+ ' '+ ShowEstimatedQuality; { Media QV-urilor pt bucata buna din Chromatograma }
Result:= Result+ CRLF+ ' Trusted bases (bases with QV over '+ IntToStr(EngTrim1.GoodQVTresh) + '): '+ Real2Str(TrustedBasesPercent)+ '%';
end;
Result:= Result+ CRLF+ ' Average peak height: ' + i2s(AveragePeakHeight);
end
else
Result:= Result+ CRLF+ 'No of bases: '+ i2s(NoOfBases);
{ COMMENT }
if (Comment= '>') OR (Comment= '> ') OR (Comment= '')
then sComment:= ''
else sComment:= 'Comment:'+ CRLF+ Comment;
if CommentOrig <> ''
then Tags:= 'Abi Tags: '+ CommentOrig;
{ SHOW SHORTER TEXT FIRST }
if (Tags<> '') OR (sComment<> '') then
if Length(Tags) > Length(sComment)
then Result:= Result+ crlf+ sComment+ LBRK+ Tags
else Result:= Result+ crlf+ Tags + LBRK+ sComment;
Result:= System.SysUtils.AdjustLineBreaks(Result, tlbsCRLF)
end;
{ How many trusted bases (bases with QV over GoodQVTresh) are there? }
function TCubeImport.TrustedBasesPercent: Real;
VAR i, TotalGood: Integer;
begin
TotalGood:= 0;
for i:= GoodQVStart to GoodQVStart+ NoOfGoodBases-1 DO
if CellsMX[i].QV >= EngTrim1.GoodQVTresh
then Inc(TotalGood);
Result:= ProcentRepresent(TotalGood, NoOfGoodBases);
end;
{ Media QV-urilor pt bucata buna din Chromatograma }
function TCubeImport.ShowEstimatedQuality: string;
begin
if QVExist
then
begin
Result:= 'Average quality (after trimming): '+ i2s(AverageQV);
case AverageQV of
00..14 : Result:= Result+ ' (very poor)';
15..24 : Result:= Result+ ' (poor)';
25..39 : Result:= Result+ ' (good)';
40..59 : Result:= Result+ ' (very good)';
60..89 : Result:= Result+ ' (excellent)';
else
Result:= Result+ ' (incredibly good)';
end;
end
else Result:= 'Sample quality cannot be estimated (base quality info is missing).';
end;
function TCubeImport.ShowQVExist: string;
begin
if QVExist
then Result:= 'Quality value info: present'
else Result:= 'Quality value info: missing!';
end;
function TCubeImport.ShowBases: string;
begin
Result:= 'Number of bases: '+ i2s(NoOfBases);
end;
function TCubeImport.ShowGoodBases: string;
begin
Result:= 'Number of good bases: '+ i2s(NoOfBases);
end;
{ Tell how many % of bases are left after TrimEnds }
function TCubeImport.ShowBasesAfterTrim: string;
VAR BasesLeft: Integer;
begin
if FQVExist then
begin
BasesLeft:= round( ProcentRepresent(NoOfGoodBases, NoOfBases) );
Result:= 'Bases left after end trimming: '+ i2s(NoOfGoodBases)+ ' ('+ i2s(BasesLeft)+ '%)';
end;
end;
{--------------------------------------------------------------------------------------------------
BUILD FROM SCRATCH
Build a cube from the given parameters.
After that we have to call ComputeColors.
--------------------------------------------------------------------------------------------------}
procedure TCubeImport.BuildFromScratch;
begin
Assert(NoOfBases= 0);
{FILE}
Comment := sComments;
FileName := sFullName;
{cube}
FQVExist := FALSE;
NoOfBases := Length(sBases);
NoOfSamples := 0;
setBases(sBases); { This also calls 'DirtyBases:= true' }
Boot; { This calls DetectVectors }
end;
{--------------------------------------------------------------------------------------------------
DEBUG
--------------------------------------------------------------------------------------------------}
procedure TCubeImport.DebugToTxt(Path: string);
VAR Smpl: Integer;
sFinal, Rw1, Rw2, Rw3, Rw4: string;
begin
Rw1:= 'Sample: ';
Rw2:= 'Bases : ';
Rw3:= 'Base pos: ';
Rw4:= 'CellMX : ';
for Smpl:= ctChromaIndex to NoOfSamples DO
begin
Rw1:= Rw1+ i2s(Smpl)+ Tab;
Rw2:= Rw2+ Char(Sample2Base(Smpl))+ Tab;
Rw3:= Rw3+ i2s(Chroma[Smpl].Ptr2Base)+ Tab;
if Chroma[Smpl].Ptr2Base> NoneAssigned
then Rw4:= Rw4+ Char(Sample2Base(Smpl))+ Tab
else Rw4:= Rw4+ ' '+ Tab;
end;
sFinal:= Rw1+ CRLF+ Rw2+ CRLF+ Rw3+ CRLF+ Rw4
+CRLF
+CRLF+ 'Details:'
+CRLF+ ' NoOfBases '+ i2s(NoOfBases)
+CRLF+ ' NoOfSamples '+ i2s(NoOfSamples)
+CRLF+ ' File '+ FileName
+CRLF;
StringToFile(Path, sFinal, woOverwrite);
end;
procedure TCubeImport.DebugToBmpSave(SaveTo: string; ShowLinks: Boolean; CONST YDivider: Integer= 10; CONST XMultiplier: Integer= 5); { creates a bitmap that shows the internal laying of the chromatogram. 10 is a good default value for YDivider. if Path is empty, the routine will autogenerate the path }
VAR bmp: TBitmap;
begin