From a6f34ab2a75725ac1e5caf612cb203bed78770ab Mon Sep 17 00:00:00 2001 From: Adarsha Date: Sun, 8 May 2016 12:05:54 +0530 Subject: [PATCH] Fix #14, Add RegEx pattern extract logic Now a regex with multiple groups can be defined to extract and combined together using another format. --- Influxer/Config/ExtractTransformation.cs | 71 ++++++++++++++--------- Influxer/Config/FilterTransformation.cs | 2 +- Influxer/GenericColumn.cs | 2 +- Influxer/GenericFile.cs | 72 +++++++++++++----------- Influxer/Program.cs | 7 ++- Influxer/Properties/AssemblyInfo.cs | 4 +- 6 files changed, 92 insertions(+), 66 deletions(-) diff --git a/Influxer/Config/ExtractTransformation.cs b/Influxer/Config/ExtractTransformation.cs index a7d4785..ce00919 100644 --- a/Influxer/Config/ExtractTransformation.cs +++ b/Influxer/Config/ExtractTransformation.cs @@ -14,50 +14,57 @@ public enum ExtractType SubString } - public class ExtractTransformation :ConfigurationElement, ITransform,IConfigurationElementCollectionElement + public class ExtractTransformation : ConfigurationElement, ITransform, IConfigurationElementCollectionElement { - [ConfigurationProperty("Type")] + [ConfigurationProperty ("Type")] public ExtractType Type { - get { return (ExtractType)this["Type"]; } + get { return (ExtractType) this["Type"]; } set { this["Type"] = value; } } - [ConfigurationProperty("StartIndex")] + [ConfigurationProperty ("StartIndex")] public int StartIndex { - get { return (int)this["StartIndex"]; } + get { return (int) this["StartIndex"]; } set { this["StartIndex"] = value; } } - [ConfigurationProperty("Length")] + [ConfigurationProperty ("Length")] public int Length { - get { return (int)this["Length"]; } + get { return (int) this["Length"]; } set { this["Length"] = value; } } - [ConfigurationProperty("RegEx")] + [ConfigurationProperty ("RegEx")] public string RegEx { - get { return (string)this["RegEx"]; } + get { return (string) this["RegEx"]; } set { this["RegEx"] = value; } } - [ConfigurationProperty("IsDefault")] + [ConfigurationProperty ("ResultPattern")] + public string ResultPattern + { + get { return (string) this["ResultPattern"]; } + set { this["ResultPattern"] = value; } + } + + [ConfigurationProperty ("IsDefault")] public bool IsDefault { - get { return (bool)this["IsDefault"]; } + get { return (bool) this["IsDefault"]; } set { this["IsDefault"] = value; } } - [ConfigurationProperty("DefaultValue")] + [ConfigurationProperty ("DefaultValue")] public string DefaultValue { - get { return (string)this["DefaultValue"]; } + get { return (string) this["DefaultValue"]; } set { this["DefaultValue"] = value; } } @@ -66,45 +73,57 @@ public Regex ExtractPattern { get { - if (Type == ExtractType.RegEx && _extractPattern == null && !String.IsNullOrWhiteSpace(RegEx)) + if (Type == ExtractType.RegEx && _extractPattern == null && !String.IsNullOrWhiteSpace (RegEx)) { - _extractPattern = new Regex(RegEx, RegexOptions.Compiled | RegexOptions.IgnoreCase); + _extractPattern = new Regex (RegEx, RegexOptions.Compiled | RegexOptions.IgnoreCase); } return _extractPattern; } } - - public bool CanTransform(string content) + + public bool CanTransform (string content) { if (IsDefault) return true; if (Type == ExtractType.SubString) - return !String.IsNullOrWhiteSpace(content) ? content.Length > StartIndex && content.Length > (StartIndex + Length) : false; + return !String.IsNullOrWhiteSpace (content) ? content.Length > StartIndex && content.Length > (StartIndex + Length) : false; else - return !String.IsNullOrWhiteSpace(content) ? _extractPattern.IsMatch(content) : false; + return !String.IsNullOrWhiteSpace (content) ? ExtractPattern.IsMatch (content) : false; } - public string Transform(string content) + public string Transform (string content) { if (IsDefault) return DefaultValue; if (Type == ExtractType.SubString) - return content.Substring(StartIndex, Length); + return content.Substring (StartIndex, Length); else { - var m = _extractPattern.Match(content); + + var m = ExtractPattern.Match (content); if (m.Success) { - return m.Groups[1].Value; + try + { + if (ResultPattern == "") + return m.Groups[0].Value; + else + return string.Format (ResultPattern, m.Groups.Cast ().Skip (1).Select (g => g.Value as object).ToArray ()); + + } + catch (Exception e) + { + throw new ArgumentException ($"Could not extract {content} using {ResultPattern} due to {e.Message}"); + } } - return null; } + return null; } - public string GetKey() + public string GetKey () { - return this.GetHashCode().ToString(); + return this.GetHashCode ().ToString (); } } } diff --git a/Influxer/Config/FilterTransformation.cs b/Influxer/Config/FilterTransformation.cs index 24cd6e5..4b65834 100644 --- a/Influxer/Config/FilterTransformation.cs +++ b/Influxer/Config/FilterTransformation.cs @@ -60,7 +60,7 @@ public string Transform (string content) { if (CanTransform (content)) { - throw new InvalidDataException (String.Format ("{0} filtered out as per rule {1}", content, RegEx)); + throw new InvalidDataException ($"{content} filtered out as per rule {RegEx}"); } return string.Empty; } diff --git a/Influxer/GenericColumn.cs b/Influxer/GenericColumn.cs index d5a9624..70fd05c 100644 --- a/Influxer/GenericColumn.cs +++ b/Influxer/GenericColumn.cs @@ -148,7 +148,7 @@ public Dictionary SplitData (string content) result.Add (_generatedColumns.FirstOrDefault (t => t.Config.IsDefault), content); } else - throw new InvalidDataException (String.Format ("Can't split {0} using specified splitting rules, no default column configured", content)); + throw new InvalidDataException ($"Can't split {content} using specified splitting rules, no default column configured"); return result; } diff --git a/Influxer/GenericFile.cs b/Influxer/GenericFile.cs index 3294484..fa14747 100644 --- a/Influxer/GenericFile.cs +++ b/Influxer/GenericFile.cs @@ -87,7 +87,7 @@ public async Task ProcessGenericFile (string InputFileName, strin { policy = new InfluxRetentionPolicy () { - Name = String.IsNullOrWhiteSpace (settings.InfluxDB.RetentionPolicy) ? String.Format ("InfluxerRetention_{0}min", settings.InfluxDB.RetentionDuration) : settings.InfluxDB.RetentionPolicy, + Name = String.IsNullOrWhiteSpace (settings.InfluxDB.RetentionPolicy) ? $"InfluxerRetention_{settings.InfluxDB.RetentionDuration}min" : settings.InfluxDB.RetentionPolicy, DBName = settings.InfluxDB.DatabaseName, Duration = TimeSpan.FromMinutes (settings.InfluxDB.RetentionDuration), IsDefault = false, @@ -222,7 +222,7 @@ public async Task ProcessGenericFile (string InputFileName, strin { Logger.LogLine (LogLevel.Error, "Process Started {0}, Input {1}, Processed{2}, Failed:{3}", (DateTime.Now - stopwatch.Elapsed), InputFileName, result.PointsFound, result.PointsFailed); foreach (var f in failureReasons.Values) - Logger.LogLine (LogLevel.Error, "{0} lines ({1}) failed due to {2} ({3})", f.Count, String.Join (",", f.LineNumbers), f.ExceptionType, f.Message); + Logger.LogLine (LogLevel.Error, "{0} lines (e.g. {1}) failed due to {2} ({3})", f.Count, String.Join (",", f.LineNumbers.Take (5)), f.ExceptionType, f.Message); if (result.PointsFailed == result.PointsFound) result.ExitCode = ExitCode.UnableToProcess; else @@ -234,9 +234,8 @@ public async Task ProcessGenericFile (string InputFileName, strin { Logger.LogLine (LogLevel.Error, "Failed to process {0}", InputFileName); Logger.LogLine (LogLevel.Error, "\r\nError!! {0}:{1} - {2}", e.GetType ().Name, e.Message, e.StackTrace); - result.ExitCode = ExitCode.UnknownError; + result.ExitCode = ExitCode.UnableToProcess; } - result.ExitCode = ExitCode.Success; return result; } @@ -317,14 +316,7 @@ public bool ValidateData (string InputFileName) foreach (var c in ColumnHeaders) { - if (c.ColumnIndex == settings.GenericFile.TimeColumn - 1) continue; var content = columns[c.ColumnIndex].Replace ("\"", ""); - if (c.ColumnIndex == settings.GenericFile.TimeColumn - 1) - { - DateTime timeStamp; - if (!DateTime.TryParseExact (content, settings.GenericFile.TimeFormat, CultureInfo.InvariantCulture, DateTimeStyles.None, out timeStamp)) - throw new FormatException ("Couldn't parse " + content + " using format " + settings.GenericFile.TimeFormat + ", check -timeformat argument"); - } if (c.HasAutoGenColumns) { pointData.AddRange (c.SplitData (content)); @@ -350,6 +342,12 @@ public bool ValidateData (string InputFileName) continue; } + if (d.Key.ColumnIndex == settings.GenericFile.TimeColumn - 1) + { + DateTime timeStamp; + if (!DateTime.TryParseExact (content, settings.GenericFile.TimeFormat, CultureInfo.InvariantCulture, DateTimeStyles.None, out timeStamp)) + throw new FormatException ("Couldn't parse " + content + " using format " + settings.GenericFile.TimeFormat + ", check -timeformat argument"); + } if (String.IsNullOrWhiteSpace (content)) continue; @@ -366,9 +364,9 @@ public bool ValidateData (string InputFileName) else { if (d.Key.Type == ColumnDataType.NumericalField && (!Double.TryParse (content, out value) || double.IsNaN (value))) - throw new InvalidDataException (String.Format ("{0} has inconsistent data, Can't parse {1} as Number", d.Key.ColumnHeader, content)); + throw new InvalidDataException ($"{d.Key.ColumnHeader} has inconsistent data, Can't parse {content} as Number"); else if (d.Key.Type == ColumnDataType.BooleanField && (!Boolean.TryParse (content, out boolVal))) - throw new InvalidDataException (String.Format ("{0} has inconsistent data, Can't parse {1} as Boolean", d.Key.ColumnHeader, content)); + throw new InvalidDataException ($"{d.Key.ColumnHeader} has inconsistent data, Can't parse {content} as Boolean"); } } if (++lineNo == settings.GenericFile.ValidateRows) @@ -413,10 +411,6 @@ private InfluxDatapoint ProcessGenericLine (string line, List< point.Precision = settings.GenericFile.Precision; point.MeasurementName = settings.GenericFile.TableName; - DateTime timeStamp; - if (!DateTime.TryParseExact (content, settings.GenericFile.TimeFormat, CultureInfo.InvariantCulture, DateTimeStyles.None, out timeStamp)) - throw new FormatException ("Couldn't parse " + content + " using format " + settings.GenericFile.TimeFormat + ", check -timeformat argument"); - point.UtcTimestamp = timeStamp.AddMinutes (settings.GenericFile.UtcOffset); point.InitializeTags (defaultTags); @@ -443,32 +437,44 @@ private InfluxDatapoint ProcessGenericLine (string line, List< content = d.Value; if (d.Key.HasTransformations && d.Key.CanTransform (content)) content = d.Key.Transform (d.Value); + if (String.IsNullOrWhiteSpace (content)) continue; - double value = double.NaN; bool boolVal = false; - if (d.Key.Type == ColumnDataType.NumericalField) - { - if (!Double.TryParse (content, out value) || double.IsNaN (value)) - throw new InvalidDataException (d.Key.ColumnHeader + " has inconsistent data, Unable to parse \"" + content + "\" as number"); - point.Fields.Add (d.Key.ColumnHeader, new InfluxValueField (Math.Round (value, 2))); - } - else if (d.Key.Type == ColumnDataType.StringField) + + if (d.Key.ColumnIndex == settings.GenericFile.TimeColumn - 1) { - point.Fields.Add (d.Key.ColumnHeader, new InfluxValueField (content)); + DateTime timeStamp; + if (!DateTime.TryParseExact (content, settings.GenericFile.TimeFormat, CultureInfo.InvariantCulture, DateTimeStyles.None, out timeStamp)) + throw new FormatException ("Couldn't parse " + content + " using format " + settings.GenericFile.TimeFormat + ", check -timeformat argument"); + point.UtcTimestamp = timeStamp.AddMinutes (settings.GenericFile.UtcOffset); } - else if (d.Key.Type == ColumnDataType.BooleanField) + else { - if (!Boolean.TryParse (content, out boolVal)) - throw new InvalidDataException (d.Key.ColumnHeader + " has inconsistent data, Unable to parse \"" + content + "\" as Boolean"); - point.Fields.Add (d.Key.ColumnHeader, new InfluxValueField (boolVal)); + double value = double.NaN; bool boolVal = false; + if (d.Key.Type == ColumnDataType.NumericalField) + { + if (!Double.TryParse (content, out value) || double.IsNaN (value)) + throw new InvalidDataException (d.Key.ColumnHeader + " has inconsistent data, Unable to parse \"" + content + "\" as number"); + point.Fields.Add (d.Key.ColumnHeader, new InfluxValueField (Math.Round (value, 2))); + } + else if (d.Key.Type == ColumnDataType.StringField) + { + point.Fields.Add (d.Key.ColumnHeader, new InfluxValueField (content)); + } + else if (d.Key.Type == ColumnDataType.BooleanField) + { + if (!Boolean.TryParse (content, out boolVal)) + throw new InvalidDataException (d.Key.ColumnHeader + " has inconsistent data, Unable to parse \"" + content + "\" as Boolean"); + point.Fields.Add (d.Key.ColumnHeader, new InfluxValueField (boolVal)); + } + else if (d.Key.Type == ColumnDataType.Tag) + point.Tags.Add (d.Key.ColumnHeader, content.Replace (settings.InfluxDB.InfluxReserved.ReservedCharecters.ToCharArray (), settings.InfluxDB.InfluxReserved.ReplaceReservedWith)); } - else if (d.Key.Type == ColumnDataType.Tag) - point.Tags.Add (d.Key.ColumnHeader, content.Replace (settings.InfluxDB.InfluxReserved.ReservedCharecters.ToCharArray (), settings.InfluxDB.InfluxReserved.ReplaceReservedWith)); } if (point.Fields.Count == 0) throw new InvalidDataException ("No values found on the row to post to Influx"); - + return point; } } diff --git a/Influxer/Program.cs b/Influxer/Program.cs index 09374b8..1db5c12 100644 --- a/Influxer/Program.cs +++ b/Influxer/Program.cs @@ -38,7 +38,7 @@ class Program static int Main (string[] args) { - + #region Command Line argument processing if (args.Length == 0) { @@ -47,12 +47,13 @@ static int Main (string[] args) } #region Parse command line arguments Dictionary cmdArgs = new Dictionary (); + Regex commandSwitch = new Regex ("^-[a-zA-Z+]|^/[a-zA-Z+]", RegexOptions.Compiled); for (int i = 0; i < args.Length; i++) { - if (args[i].StartsWith ("-") || args[i].StartsWith ("/")) + if (commandSwitch.IsMatch (args[i])) { var key = args[i].ToLower (); - if ((i + 1 < args.Length) && (!(args[i + 1].StartsWith ("-") || args[i + 1].StartsWith ("/")))) + if (i + 1 < args.Length && !commandSwitch.IsMatch (args[i + 1])) { cmdArgs.Add (key.ToLower (), args[i + 1]); i++; diff --git a/Influxer/Properties/AssemblyInfo.cs b/Influxer/Properties/AssemblyInfo.cs index dc0f7d4..aa18fd3 100644 --- a/Influxer/Properties/AssemblyInfo.cs +++ b/Influxer/Properties/AssemblyInfo.cs @@ -32,5 +32,5 @@ // You can specify all the values or you can default the Build and Revision Numbers // by using the '*' as shown below: // [assembly: AssemblyVersion("1.0.*")] -[assembly: AssemblyVersion ("0.5.2.0")] -[assembly: AssemblyFileVersion ("0.5.2.0")] +[assembly: AssemblyVersion ("0.5.3.0")] +[assembly: AssemblyFileVersion ("0.5.3.0")]