diff --git a/generator/randomtexttokenizer.d b/generator/randomtexttokenizer.d index ea6342e..0215574 100644 --- a/generator/randomtexttokenizer.d +++ b/generator/randomtexttokenizer.d @@ -7,7 +7,7 @@ import std.random; class RandomTextTokenizer : ITextTokenizer { - private size_t MinLength, MaxLength; + private const size_t MinLength, MaxLength; public this(size_t minLength, size_t maxLength) { @@ -21,41 +21,51 @@ class RandomTextTokenizer : ITextTokenizer public Token[] Tokenize(WGString input, size_t tokenValueSize) { - Token[WGString] tokens = new Token[WGString]; - Token[] resultTokens; + //TODO: fix RangeError + Token[WGString] tokensDict = new Token[WGString]; + Token[] allTokens; - size_t thisTokenSize; - size_t firstTokenSize; + size_t i = 0; + while (i < input.length) + { + // Генерируем случайную длину токена в диапазоне [minValue, maxValue] + tokenValueSize = uniform(MinLength, MaxLength+ 1); - firstTokenSize = thisTokenSize = uniform(MinLength, MaxLength); + // Проверяем, чтобы токен помещался в оставшуюся часть строки + if (i + tokenValueSize > input.length) + break; - for (size_t i = 0; i < input.length; i += thisTokenSize) - { - WGString tokenValue = input[i.. (i + thisTokenSize)]; - - Token token; + WGString tokenValue = input[i .. (i + tokenValueSize)]; - if (tokens.get(tokenValue, null) !is null) + Token token; + if (tokensDict.get(tokenValue, null) !is null) { - token = tokens[tokenValue]; + token = tokensDict[tokenValue]; } - else + else { token = new Token(tokenValue); - tokens[tokenValue] = token; - } + tokensDict[tokenValue] = token; + } - resultTokens ~= token; + allTokens ~= token; - if (i >= firstTokenSize) + if (i >= tokenValueSize) { - WGString prevTokenValue = input[(i - thisTokenSize)..i]; - auto prevToken = tokens[prevTokenValue]; + WGString prevTokenValue = input[i - tokenValueSize .. i]; + auto prevToken = tokensDict[prevTokenValue]; prevToken.AddNextToken(token); } - } - - thisTokenSize = uniform(MinLength, MaxLength); - return resultTokens; + + // Переходим к следующему токену + i += tokenValueSize; + } + + if (allTokens.length > 0) + { + allTokens[$ - 1].NextTokens = null; + } + + return allTokens; } } \ No newline at end of file diff --git a/generator/wgstring.d b/generator/wgstring.d index 0f81066..29ae2f6 100644 --- a/generator/wgstring.d +++ b/generator/wgstring.d @@ -1,3 +1,3 @@ module generator.wgstring; -alias WGString = string; \ No newline at end of file +alias WGString = dstring; \ No newline at end of file diff --git a/main.d b/main.d index 9fa2318..250c4e6 100644 --- a/main.d +++ b/main.d @@ -1,8 +1,9 @@ import std.stdio; import std.algorithm; -import std.file : read, getcwd; +import std.file : readText, getcwd; import std.conv : to; +import core.sys.windows.windows; import generator.itexttokenizer; import generator.texttokenizer; import generator.randomtexttokenizer; @@ -11,17 +12,12 @@ import generator.token; import generator.textgenerator; import generator.wgstring; -extern(Windows) -{ - bool SetConsoleOutputCP(uint wCodePageID); -} - final abstract class Arguments { static: public immutable string ReadFileName = "source="; public immutable string TokenSize = "ts="; - public immutable string TokenRandomSizes = "trs="; + //public immutable string TokenRandomSizes = "trs="; public immutable string TokensGenerate = "tg="; public immutable string TokensNext = "tn="; public immutable string TokensRandomChance = "tr="; @@ -30,12 +26,13 @@ static: WGString ReadInputFromConsole() { - return readln!WGString(); + return to!dstring(readln()); } void main(string[] args) { SetConsoleOutputCP(65001); + SetConsoleCP(65001); ITextTokenizer tokenizer = new TextTokenizer(); size_t tokenSize = 5; @@ -79,7 +76,7 @@ void main(string[] args) { funRecreationsCount = to!(size_t)(arg[Arguments.TokenSize.length..$]); } - if(arg.startsWith(Arguments.TokenRandomSizes)) + /*if(arg.startsWith(Arguments.TokenRandomSizes)) { string minValue, maxValue; minValue = maxValue = ""; @@ -107,16 +104,17 @@ void main(string[] args) } } tokenizer = new RandomTextTokenizer(to!size_t(minValue), to!size_t(maxValue)); - } + } */ } - if(args.length <= 1) + + if(!isShallReadFile) { input = ReadInputFromConsole(); } else { - input = cast(WGString)read(filename); + input = to!WGString(readText(filename)); } Token[] tokens = tokenizer.Tokenize(input, tokenSize);