Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support minial combine pattern count setting #17

Merged
merged 2 commits into from
Aug 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions demo/uri_drain.ini
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ depth = 4
max_children = 100
max_clusters = 1024
extra_delimiters = ["/"]
combine_min_url_count = ${DRAIN_COMBINE_MIN_URL_COUNT:8}

[PROFILING]
enabled = True
Expand Down
1 change: 1 addition & 0 deletions models/Configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ Drain is the core algorithm of URI Drain.
| max_clusters | int | DRAIN_MAX_CLUSTERS | 1024 | Max number of tracked clusters (unlimited by default). When this number is reached, model starts replacing old clusters with a new ones according to the LRU policy. |
| extra_delimiters | string | DRAIN_EXTRA_DELIMITERS | \["/"\] | The extra delimiters to split the sequence. |
| analysis_min_url_count | int | DRAIN_ANALYSIS_MIN_URL_COUNT | 20 | The minimum number of unique URLs(each service) to trigger the analysis. |
| combine_min_url_count | int | DRAIN_COMBINE_MIN_URL_COUNT | 8 | The minimum number of unique URLs(candidate of each service) to mask as variable URL(encase some similar URL are not restful, such as `/test/one` and `test/two`). |

### Profiling

Expand Down
1 change: 1 addition & 0 deletions models/uri_drain/template_miner.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ def __init__(self,
max_children=self.config.drain_max_children,
max_clusters=self.config.drain_max_clusters,
extra_delimiters=self.config.drain_extra_delimiters,
combine_min_url_count=self.config.drain_combine_min_url_count,
profiler=self.profiler,
param_str=param_str,
# param_extra=param_extra, # MODIFIED:: for URI Drain < It is now a dict since contains multiple types
Expand Down
3 changes: 3 additions & 0 deletions models/uri_drain/template_miner_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ def __init__(self):
self.drain_max_children = 100
self.drain_max_clusters = None
self.drain_analysis_min_url_count = 20
self.drain_combine_min_url_count = 8
self.masking_instructions = []
self.mask_prefix = "<"
self.mask_suffix = ">"
Expand Down Expand Up @@ -82,6 +83,8 @@ def load(self, config_filename: str):
self.parameter_extraction_cache_capacity)
self.drain_analysis_min_url_count = self.read_config_value(parser, section_drain, 'analysis_min_url_count', int,
self.drain_analysis_min_url_count)
self.drain_combine_min_url_count = self.read_config_value(parser, section_drain, 'combine_min_url_count', int,
self.drain_combine_min_url_count)

masking_instructions = []
masking_list = json.loads(masking_instructions_str)
Expand Down
45 changes: 39 additions & 6 deletions models/uri_drain/uri_drain.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,13 @@


class LogCluster: # TODO Modified:: Changed to URICluster
__slots__ = ["log_template_tokens", "cluster_id", "size"]
__slots__ = ["log_template_tokens", "cluster_id", "size", "latest_urls"]

def __init__(self, log_template_tokens: list, cluster_id: int):
def __init__(self, log_template_tokens: list, cluster_id: int, combine_min_url_count: int):
self.log_template_tokens = tuple(log_template_tokens)
self.cluster_id = cluster_id
self.size = 1
self.latest_urls = LRUCache(combine_min_url_count+1)

def get_template(self):
# Modified:: Changed to join by slash instead of space for
Expand All @@ -47,6 +48,27 @@ def get_template(self):
template = '/'.join(self.log_template_tokens)
return f'/{template}'

def adding_url(self, url: str):
if self.latest_urls.__contains__(url):
return
self.latest_urls[url] = True

def __str__(self):
# return f"ID={str(self.cluster_id).ljust(5)} : size={str(self.size).ljust(10)}: {self.get_template()}"
return f"size={str(self.size).ljust(10)}: {self.get_template()}"


class SingleURILogCluster:
__slots__ = ["uri", "cluster_id", "size"]

def __init__(self, uri: str):
self.uri = uri
self.cluster_id = -1
self.size = 1

def get_template(self):
return self.uri

def __str__(self):
# return f"ID={str(self.cluster_id).ljust(5)} : size={str(self.size).ljust(10)}: {self.get_template()}"
return f"size={str(self.size).ljust(10)}: {self.get_template()}"
Expand Down Expand Up @@ -83,6 +105,7 @@ def __init__(self,
sim_th=0.4,
max_children=100,
max_clusters=None,
combine_min_url_count=8,
extra_delimiters=(),
profiler: Profiler = NullProfiler(),
param_str="{var}", # Modified:: required param_str
Expand Down Expand Up @@ -116,6 +139,7 @@ def __init__(self,
self.max_node_depth = depth - 2 # max depth of a prefix tree node, starting from zero
self.sim_th = sim_th
self.max_children = max_children
self.combine_min_url_count = combine_min_url_count
self.root_node = Node()
self.profiler = profiler
self.extra_delimiters = extra_delimiters
Expand All @@ -133,7 +157,14 @@ def __init__(self,

@property
def clusters(self):
return self.id_to_cluster.values()
result = []
for cluster in self.id_to_cluster.values():
if cluster.latest_urls and cluster.latest_urls.__len__() >= self.combine_min_url_count:
result.append(cluster)
continue
for url, _ in cluster.latest_urls.items():
result.append(SingleURILogCluster(url))
return result

@property
def cluster_patterns(self):
Expand Down Expand Up @@ -245,7 +276,7 @@ def add_log_message(self, content: str):
self.profiler.start_section("create_cluster")
self.clusters_counter += 1
cluster_id = self.clusters_counter
match_cluster = LogCluster(content_tokens, cluster_id)
match_cluster = LogCluster(content_tokens, cluster_id, self.combine_min_url_count)
self.id_to_cluster[cluster_id] = match_cluster
self.add_seq_to_prefix_tree(self.root_node, match_cluster)
update_type = "cluster_created"
Expand All @@ -261,7 +292,7 @@ def add_log_message(self, content: str):
update_type = "rejected (create new)"
self.clusters_counter += 1
cluster_id = self.clusters_counter
match_cluster = LogCluster(content_tokens, cluster_id)
match_cluster = LogCluster(content_tokens, cluster_id, self.combine_min_url_count)
self.id_to_cluster[cluster_id] = match_cluster
self.add_seq_to_prefix_tree(self.root_node, match_cluster)
match_cluster.size -= 1
Expand All @@ -278,6 +309,7 @@ def add_log_message(self, content: str):
if self.profiler:
self.profiler.end_section()

match_cluster.adding_url(content)
return match_cluster, update_type

def get_total_cluster_size(self):
Expand Down Expand Up @@ -315,12 +347,13 @@ def __init__(self,
sim_th=0.4,
max_children=100,
max_clusters=None,
combine_min_url_count=8,
extra_delimiters=(),
profiler: Profiler = NullProfiler(),
param_str="<*>",
# param_extra=None, # Modified:: Added param_extra
parametrize_numeric_tokens=True):
super().__init__(depth, sim_th, max_children, max_clusters, extra_delimiters, profiler, param_str,
super().__init__(depth, sim_th, max_children, max_clusters, combine_min_url_count, extra_delimiters, profiler, param_str,
# param_extra,
parametrize_numeric_tokens)

Expand Down
1 change: 1 addition & 0 deletions servers/simple/uri_drain.ini
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ max_children = ${DRAIN_MAX_CHILDREN:100}
max_clusters = ${DRAIN_MAX_CLUSTERS:1024}
extra_delimiters = ${DRAIN_EXTRA_DELIMITERS:["/"]}
analysis_min_url_count = ${DRAIN_ANALYSIS_MIN_URL_COUNT:20}
combine_min_url_count = ${DRAIN_COMBINE_MIN_URL_COUNT:8}

[PROFILING]
enabled = ${PROFILING_ENABLED:False}
Expand Down
4 changes: 1 addition & 3 deletions test/e2e/expected/endpoint_counterexamples.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,5 @@
# See the License for the specific language governing permissions and
# limitations under the License.

patterns:
- "/api/v1/usernames/{var}"
- "/api/v1/users/{var}"
patterns: []
version: "1"
18 changes: 0 additions & 18 deletions test/e2e/expected/endpoint_hard.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,32 +13,14 @@
# limitations under the License.

patterns:
- /api-this-is-a-special-case/v99999999999999999/orders/delete/{var}
- /api-this-is-a-special-case/v99999999999999999/orders/reorder/{var}
- /api-this-is-a-special-case/v99999999999999999/orders/update/{var}
- /api/v1/bills/{var}
- /api/v1/companies/{var}
- /api/v1/companies/{var}/employees/{var}/reviews/{var}
- /api/v1/companies/{var}/tasks/{var}/assignees/{var}
- /api/v1/projects/{var}
- /api/v1/services/{var}
- /api/v1/users/{var}/posts/{var}/comments
- /api/v1/users/{var}/posts/{var}/comments/{var}
- /api/v1/wallets/{var}
- /api/v2/admin/users/{var}
- /api/v2/courses/{var}/modules/{var}/lessons
- /api/v2/customers/{var}
- /api/v3/products/{var}/reviews/{var}/comments
- /api/v3/providers/{var}
- /api/v4/orders/{var}/items/{var}/tracking
- /customer/{var}
- /customer/{var}/order/{var}
- /customer/{var}/profile/{var}/compare/{var}/profile/{var}
- ABC/{var}
- HikariCP/Connection/{var}
- google.com/api/v1/users/{var}
- http://www.google.com/api/v1/users/{var}
- https://www.google.com/api/v1/users/{var}
- top1.abc.example.com.net.cn/api/v1/users/{var}
- www.google.com/api/v1/users/{var}
version: '1'
16 changes: 0 additions & 16 deletions test/e2e/expected/endpoint_hard_3k.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,30 +13,14 @@
# limitations under the License.

patterns:
- /api-this-is-a-special-case/v99999999999999999/orders/delete/{var}
- /api-this-is-a-special-case/v99999999999999999/orders/reorder/{var}
- /api-this-is-a-special-case/v99999999999999999/orders/update/{var}
- /api/v1/bills/{var}
- /api/v1/companies/{var}
- /api/v1/companies/{var}/employees/{var}/reviews/{var}
- /api/v1/companies/{var}/tasks/{var}/assignees/{var}
- /api/v1/projects/{var}
- /api/v1/services/{var}
- /api/v1/users/{var}/posts/{var}/comments
- /api/v1/users/{var}/posts/{var}/comments/{var}
- /api/v1/wallets/{var}
- /api/v2/admin/users/{var}
- /api/v2/courses/{var}/modules/{var}/lessons
- /api/v2/customers/{var}
- /api/v3/products/{var}/reviews/{var}/comments
- /api/v3/providers/{var}
- /api/v4/orders/{var}/items/{var}/tracking
- /customer/{var}
- /customer/{var}/order/{var}
- /customer/{var}/profile/{var}/compare/{var}/profile/{var}
- google.com/api/v1/users/{var}
- http://www.google.com/api/v1/users/{var}
- https://www.google.com/api/v1/users/{var}
- top1.abc.example.com.net.cn/api/v1/users/{var}
- www.google.com/api/v1/users/{var}
version: '1'
10 changes: 0 additions & 10 deletions test/e2e/expected/endpoint_trivial.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,18 +14,8 @@

patterns:
- /api/v1/accounts/{var}
- /api/v1/invoices/{var}
- /api/v1/orders/{var}
- /api/v1/posts/{var}
- /api/v1/products/{var}
- /api/v1/users/{var}
- /api/v2/data/users/{var}
- /api/v999/orders/{var}
- /product/{var}
- /user/{var}
- /user/{var}/post/{var}
- /user/{var}/profile/{var}/compare/{var}/profile/{var}
- GET:/api/v1/users/{var}
- http://www.google.com/api/v1/users/{var}
- https://www.google.com/api/v1/users/{var}
version: '1'
7 changes: 0 additions & 7 deletions test/e2e/expected/endpoint_trivial_3k.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,4 @@ patterns:
- /api/v1/posts/{var}
- /api/v1/products/{var}
- /api/v1/users/{var}
- /api/v2/data/users/{var}
- /api/v999/orders/{var}
- /product/{var}
- /user/{var}
- /user/{var}/post/{var}
- /user/{var}/profile/{var}/compare/{var}/profile/{var}
- GET:/api/v1/users/{var}
version: '1'
Loading