diff --git a/mostfiles/parse_dutch.dat b/mostfiles/parse_dutch.dat
index b224e6f..a501094 100644
--- a/mostfiles/parse_dutch.dat
+++ b/mostfiles/parse_dutch.dat
@@ -384,6 +384,7 @@ geraakt
meldt
meld
melden
+meldd
gemeld
slaan
slaat
diff --git a/mostfiles/process_text.nim b/mostfiles/process_text.nim
index b4a9eb7..a5c79eb 100644
--- a/mostfiles/process_text.nim
+++ b/mostfiles/process_text.nim
@@ -1105,7 +1105,7 @@ proc extractSentencesFromText(input_tekst, languagest:string,
# add the first sentences always to the summary
if sentencecountit <= introductionit:
if sentencest.len < stringsizeit:
- summarysq.add(sentencest)
+ summarysq.add(sentencest & ". ")
else:
processingbo = false # header not yet reached
@@ -1134,15 +1134,15 @@ proc extractSentencesFromText(input_tekst, languagest:string,
countit = count(sentencest, '.')
if countit == 0 or countit > 1:
summarysq.add("
" & $sentencecountit & " ===============================" & "
")
- summarysq.add(sentencest & ".")
+ summarysq.add(sentencest & ". ")
elif countit == 1:
summarysq.add("
" & $sentencecountit & " ===============================" & "
")
linesq = sentencest.split('.')
leftpartst = linesq[0]
rightpartst = linesq[1]
- if leftpartst.contains(line): summarysq.add(leftpartst & ".")
- if rightpartst.contains(line): summarysq.add(rightpartst & ".")
+ if leftpartst.contains(line): summarysq.add(leftpartst & ". ")
+ if rightpartst.contains(line): summarysq.add(rightpartst & ". ")
# to prevent more adds for more extraction-words
@@ -1161,7 +1161,9 @@ proc extractSentencesFromText(input_tekst, languagest:string,
# concatenate extracted sentences to text
summaryst = "Number of extractions: " & $linecountit & "
"
for senst in summarysq:
- summaryst &= strip(senst, true, true)
+ # summaryst &= strip(senst, true, true)
+ summaryst &= senst
+
except IOError:
echo "IO error!"
diff --git a/mostfiles/settings_flashread.conf b/mostfiles/settings_flashread.conf
index 588ca4c..c85ded7 100644
--- a/mostfiles/settings_flashread.conf
+++ b/mostfiles/settings_flashread.conf
@@ -20,7 +20,7 @@ port-number___5050___the port thru which to connect to the web-server___
fr_checkset1___none___default values of the named checkboxset___false,,false,,false,,false,,false
>>>Other<<<
-abbreviations___none___abbreviations from which the dots must be pre-removed___Dr. ,,U.S.,,Mr. ,,etc. ,,B.C.,,A.D.
+abbreviations___none___abbreviations from which the dots must be pre-removed___Dr. ,,U.S.,,Mr. ,,etc. ,,B.C.,,A.D.,,D.C.
>>>Test<<<
diff --git a/mostfiles/source_files.nim b/mostfiles/source_files.nim
index 73dc693..93bec34 100644
--- a/mostfiles/source_files.nim
+++ b/mostfiles/source_files.nim
@@ -4,19 +4,30 @@
-to limt file-access
]#
-import strutils
+import strutils, sequtils
import tables
import os
import fr_tools
-
type
DataFileType* = enum
datFileLanguage
datFileSummary
datFileAll
+ FileSpecs = object of RootObj
+ fsName: string
+ fsVersion: float
+
+ FilePhase = object of FileSpecs
+ phaNameFull: string # block-header-name
+ phaSequenceNum: int # order-num in phase-list
+ phaNameCount: int # should be 1
+ phaItemCount: int # preferably > 0
+ phaHasEmptyItem: bool # = zero-length line; must be false
+ phaEndMarkerFound: bool # must be true
+
var
versionfl:float = 0.2
@@ -27,6 +38,31 @@ var
sourcefilestatust*: string = ""
faultsfoundbo: bool = false
+ parse_file_phasesq = @[
+ "PUNCTUATION OF SENTENCES TO HANDLE",
+ "PUNCTUATION OF SENTENCE-PARTS TO HANDLE",
+ "PRONOUNS TO HANDLE",
+ "VERBS TO HANDLE",
+ "LINK-WORDS TO HANDLE",
+ "PREPOSITIONS TO HANDLE",
+ "NOUN-ANNOUNCERS TO HANDLE",
+ "NOUN-REPLACERS TO HANDLE",
+ "AMBIGUOUS WORD-FUNCTIONS TO HANDLE"]
+
+ summary_file_phasesq = @["SIGNAL-WORDS TO HANDLE"]
+
+
+
+template withFile*(f, fn, mode, actions: untyped): untyped =
+ var f: File
+ if open(f, fn, mode):
+ try:
+ actions
+ finally:
+ close(f)
+ else:
+ quit("cannot open: " & fn)
+
proc addLanguageFilesToList() =
# Dynamicly add the language.dat files from the config-file
@@ -99,8 +135,132 @@ proc writeFilePatternToSeq*(filestartwithst: string): seq[string] =
proc evaluateDataFiles*(filetypeu: DataFileType): string =
+ #[
+ - gather the files
+ - move thru the lines
+ - search the first / next cat-header
+ if not found then report
+ test the items until end-marker
+ report if no or empty items
+ ]#
+
+ var
+ parse_lang_filesq, summary_filesq, all_filesq: seq[string]
+ file_reportta = initOrderedTable[string, FilePhase]()
+ tablekeyst: string
+ reportst: string = "Validation of the datafiles (no comment = OK):\p
"
+ phasecountit, itemcountit: int
+ inphasebo: bool = false
+ endmarkerst: string = ">----------------------------------<"
+ phasesq: seq[string]
+
+
+ parse_lang_filesq = writeFilePatternToSeq("parse_")
+ summary_filesq = writeFilePatternToSeq("summary_")
+ all_filesq = concat(parse_lang_filesq, summary_filesq)
+
+ if filetypeu == datFileAll or filetypeu == datFileLanguage:
+ # parse_lang_filesq = writeFilePatternToSeq("parse_")
+ for filest in all_filesq:
+ phasecountit = 1
+ # select correct phase-sequence
+ case filest[0..4]
+ of "parse":
+ phasesq = parse_file_phasesq
+ of "summa":
+ phasesq = summary_file_phasesq
+
+ for phasest in phasesq:
+ tablekeyst = filest & "___" & phasest[0..phasest.len - 11]
+ # preset objects for file
+ file_reportta[tablekeyst] = FilePhase(
+ fsName: filest,
+ phaNameFull: phasest,
+ phaNameCount: 0,
+ phaSequenceNum: phasecountit,
+ phaItemCount: 0,
+ phaHasEmptyItem: false,
+ phaEndMarkerFound: false
+ )
+ phasecountit += 1
+
+ withFile(txt, filest, fmRead):
+ for linest in txt.lines:
+ if linest in phasesq:
+ inphasebo = true
+ itemcountit = 0
+ # blockphase reached; update object
+ tablekeyst = filest & "___" & linest[0..linest.len - 11]
+ file_reportta[tablekeyst].phaNameCount += 1
+ elif inphasebo:
+ if linest == endmarkerst:
+ file_reportta[tablekeyst].phaItemCount = itemcountit
+ file_reportta[tablekeyst].phaEndMarkerFound = true
+ inphasebo = false
+ else: # walking thru items
+ if linest.len == 0:
+ file_reportta[tablekeyst].phaHasEmptyItem = true
+ file_reportta[tablekeyst].phaItemCount = itemcountit
+ itemcountit += 1
+
+
+ # echo file_reportta
+
+ var
+ curfilest, formerfilest: string
+ curphasest, formerphasest: string
+ complaintst, endst, startst: string
+ faultfoundbo: bool = false
+ verbosebo: bool = false
+ skip_othersbo: bool = false
+
+ startst = "
\p"
+ endst = "
\p"
+
+ for keyst, valob in file_reportta:
+
+ curfilest = valob.fsName
+ curphasest = valob.phaNameFull
+ if curfilest != formerfilest:
+ reportst &= curfilest & endst
+
+ complaintst = ""
+
+ if valob.phaNameCount == 0:
+ complaintst &= "---- This block-phase is not found (or mis-spelled)" & endst
+ faultfoundbo = true
+ skip_othersbo = true
+ elif valob.phaNameCount > 1:
+ complaintst &= "---- This block-phase occurs multiple times: " & $valob.phaNameCount & endst
+ faultfoundbo = true
+ skip_othersbo = true
+ if not valob.phaEndMarkerFound:
+ if not skip_othersbo:
+ complaintst &= "---- This block-phase has no (valid) end-marker" & endst
+ faultfoundbo = true
+ skip_othersbo = true
+ if valob.phaItemCount == 0:
+ if not skip_othersbo:
+ complaintst &= "---- This block-phase has NO items (no lines)" & endst
+ faultfoundbo = true
+ if valob.phaHasEmptyItem:
+ if not skip_othersbo:
+ complaintst &= "---- This block-phase has EMPTY items (zero-length lines)" & endst
+ faultfoundbo = true
+
+ formerfilest = valob.fsName
+
+ if faultfoundbo or verbosebo:
+ reportst &= "++ " & curphasest & endst
+ reportst &= complaintst
+ if verbosebo:
+ reportst &= $valob & endst
+
+ faultfoundbo = false
+ skip_othersbo = false
+
+ result = reportst
- result = "Nothing evaluated yet"
@@ -111,5 +271,6 @@ loadTextSourceFiles()
when isMainModule:
# echo textsourcefileta["dutch.dat"]
- echo sourcefilestatust
+ # echo sourcefilestatust
+ echo evaluateDataFiles(datFileAll)
\ No newline at end of file
diff --git a/mostfiles/summary_english_small.dat b/mostfiles/summary_english_small.dat
new file mode 100644
index 0000000..fc63f3e
--- /dev/null
+++ b/mostfiles/summary_english_small.dat
@@ -0,0 +1,35 @@
+Extraction-strings for summarizing with Flashread.
+Language: english
+Summary-type: generic
+version: 1.3
+
+
+
+SIGNAL-WORDS TO HANDLE
+scenario
+theor
+hypothe
+assum
+caus
+effect
+result
+consequence
+conclu
+plan
+plans
+goal
+agenda
+summar
+outcome
+input
+output
+relat
+variabl
+question
+answer
+evidence
+proof
+prove
+>----------------------------------<
+
+
diff --git a/mostfiles/summary_english_tiny.dat b/mostfiles/summary_english_tiny.dat
new file mode 100644
index 0000000..c391d64
--- /dev/null
+++ b/mostfiles/summary_english_tiny.dat
@@ -0,0 +1,20 @@
+Extraction-strings for summarizing with Flashread.
+Language: english
+Summary-type: generic
+version: 1.3
+
+
+
+
+SIGNAL-WORDS TO HANDLE
+scenario
+theor
+hypothe
+plan
+plans
+goal
+agenda
+>----------------------------------<
+
+
+