diff --git a/.claude/runtime/metrics/post_tool_use_metrics.jsonl b/.claude/runtime/metrics/post_tool_use_metrics.jsonl index 3f031d9..93d4d9e 100644 --- a/.claude/runtime/metrics/post_tool_use_metrics.jsonl +++ b/.claude/runtime/metrics/post_tool_use_metrics.jsonl @@ -201,3 +201,281 @@ {"timestamp": "2025-10-30T00:51:15.726423", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} {"timestamp": "2025-10-30T00:51:31.112942", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} {"timestamp": "2025-10-30T00:51:42.335094", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T00:51:56.365393", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T00:52:01.451129", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T00:52:03.147088", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T01:08:15.937903", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T01:08:21.364278", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T01:08:26.145087", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T03:45:01.029416", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T03:45:01.171535", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T03:45:01.176458", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T03:45:05.423389", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T03:45:08.912152", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T03:45:09.595499", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T03:45:10.550899", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T03:45:22.789595", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T03:45:24.239062", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T03:45:30.520715", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T03:45:35.878112", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T03:45:36.279619", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T03:45:39.926040", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T03:45:44.040777", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T03:45:46.835073", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T03:45:54.908621", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T03:45:55.355051", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T03:45:55.407842", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T03:46:32.798218", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T03:46:50.964133", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T03:49:16.546573", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T03:49:26.865225", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T03:52:37.721651", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T03:53:04.728322", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T03:53:44.545601", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T03:54:09.905324", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T03:54:12.821865", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T03:54:13.027696", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T03:54:13.917757", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T03:54:30.948928", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T03:54:35.456852", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T03:54:38.816400", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T03:54:40.480964", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T18:01:53.039201", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T18:02:09.003783", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T18:02:10.781240", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T18:02:16.947745", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T18:03:18.180850", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T18:03:19.667630", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T18:05:54.543291", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T18:06:16.335411", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T18:06:36.435393", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T18:06:38.811565", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T18:06:39.660947", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T18:16:10.285783", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T18:16:11.965909", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T18:16:18.268452", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T18:16:20.600587", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T18:26:54.069424", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T18:27:01.532144", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T18:27:03.001964", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T18:27:06.598971", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T18:27:35.495841", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T18:27:44.536379", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T18:27:45.388470", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T18:27:45.551938", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T18:27:55.101729", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T18:27:55.764292", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T18:27:56.871766", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T18:28:06.700714", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T18:28:07.776049", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T18:28:15.349329", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T18:29:29.073150", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T18:30:11.145865", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T18:30:20.923540", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T18:30:23.314618", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T18:30:28.629491", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T18:30:37.061004", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T18:30:38.190822", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T18:30:42.730468", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T18:30:50.590483", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T18:30:54.629870", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T18:30:54.635567", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T18:31:02.616374", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T18:31:11.371786", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T18:31:14.320590", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T18:31:14.326031", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T18:31:28.655145", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T18:31:29.140104", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T18:31:43.517874", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T18:31:45.172293", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T18:32:07.189850", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T18:32:09.105027", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T18:32:32.844181", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T18:32:35.340672", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T18:32:37.661504", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T18:33:02.031451", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T18:33:41.425835", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T18:34:02.997101", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T18:34:16.223907", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T18:34:36.950950", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T18:35:30.956652", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T18:36:33.563723", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T18:36:33.569140", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T18:38:05.795689", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T18:40:22.754377", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T18:41:12.145974", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T18:41:56.752811", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T18:43:34.054992", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T18:45:46.093079", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T18:47:11.568537", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T18:47:57.464074", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T18:48:42.156400", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T18:50:06.317371", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T18:52:39.178226", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T19:01:33.478559", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T19:01:33.483546", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T19:04:36.843389", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T19:05:52.758453", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T19:06:58.750350", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T19:07:06.331377", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T19:16:15.020260", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T19:17:11.179260", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T19:19:21.210519", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T19:19:44.730807", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T19:20:58.387628", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T19:21:55.349905", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T19:23:26.963076", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T19:27:25.388256", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T19:31:11.279766", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T19:32:12.385025", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T19:32:31.302200", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T19:42:30.298537", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T20:01:27.148814", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T20:15:33.871440", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T20:30:36.422682", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T20:36:30.858477", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T20:47:07.692965", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T20:47:38.739725", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T20:47:42.092256", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T20:47:42.377143", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T20:48:05.930016", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T20:48:07.462175", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T20:48:09.732584", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T20:48:33.631286", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T20:48:37.816928", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T20:49:00.928294", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T20:49:22.109716", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T20:49:24.325302", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T20:49:24.821912", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T20:49:42.702267", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T20:49:44.443771", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T20:50:06.756742", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T20:50:07.189155", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T20:50:26.224104", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T20:50:31.093103", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T20:50:47.580292", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T20:50:49.470582", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T20:51:18.283641", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T20:51:23.385229", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T20:51:37.642260", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T20:51:42.699248", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T20:52:06.792886", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T20:52:30.670127", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T20:52:48.652955", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T20:52:51.274502", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T20:53:14.188961", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T20:53:16.321467", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T20:53:39.066887", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T20:53:57.238738", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T20:54:21.217228", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T20:54:23.796577", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T20:54:24.335140", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T20:54:46.758179", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T20:54:48.672643", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T20:54:52.848180", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T20:55:26.528802", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T20:55:28.896040", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T20:56:34.631801", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T20:57:03.511893", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T20:57:05.636101", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T20:57:12.114184", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T20:57:43.537811", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T20:57:48.411044", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T20:58:05.282379", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T20:58:07.402892", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T20:58:37.942899", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T20:58:40.167562", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T20:58:42.841848", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T21:26:34.966606", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T21:26:50.112360", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T21:26:50.155410", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T21:26:50.202634", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T21:26:50.421632", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T21:27:01.577429", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T21:27:02.094353", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T21:27:02.094353", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T21:27:02.139440", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T21:27:12.792632", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T21:27:13.095638", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T21:27:13.782842", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T21:27:24.938191", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T21:27:25.999928", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T21:27:37.609342", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T21:27:38.313335", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T21:27:55.823398", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T21:27:55.866282", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T21:27:55.912485", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T21:28:09.648234", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T21:28:10.338551", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T21:28:10.343528", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T21:29:32.274128", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T21:29:45.441039", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T21:30:59.400438", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T21:31:16.410888", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T21:31:31.933329", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T21:32:57.517622", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T21:33:28.202531", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T21:33:55.453745", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T21:33:55.459233", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T21:34:41.550711", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T21:36:20.486530", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T21:36:20.491746", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T21:36:39.506049", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T21:37:27.933477", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T21:38:48.066614", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T21:39:04.985334", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T21:39:44.103971", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T21:39:46.722892", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T21:39:56.575387", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T21:40:18.803500", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T21:40:27.507004", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T21:40:46.108437", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T21:40:48.900405", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T21:41:18.231745", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T21:41:24.251282", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T21:42:30.135586", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T21:43:06.142380", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T21:43:11.819420", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T21:43:12.051287", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T21:43:18.636023", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T21:43:30.039846", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T21:43:40.001467", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T21:43:48.672965", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T21:43:56.975552", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T21:44:04.603984", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T21:44:15.369586", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T21:44:25.189749", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T21:44:36.326037", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T21:44:45.737035", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T21:44:55.071228", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T21:45:05.324368", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T21:45:15.834804", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T21:45:26.457944", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T21:45:38.060772", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T21:45:47.690279", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T21:45:58.768705", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T21:46:09.182221", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T21:46:20.750344", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T21:46:33.135869", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T21:46:49.267596", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T21:47:01.384984", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T21:47:12.939679", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T21:47:23.498718", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T21:47:33.141625", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T21:47:43.819041", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T21:47:53.155033", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T21:48:05.683820", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T21:48:17.353100", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T21:48:28.413058", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T21:48:39.614245", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T21:48:53.352979", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T21:49:06.125473", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T21:49:18.052190", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T21:49:29.565884", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T21:49:43.351778", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T21:51:05.114527", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T21:51:20.707439", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T21:51:40.796093", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T21:52:26.788377", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T21:52:39.079567", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} +{"timestamp": "2025-10-30T21:53:57.802048", "metric": "tool_usage", "value": "unknown", "hook": "post_tool_use"} diff --git a/README.md b/README.md index 03b269c..a252084 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,10 @@ Cypher-to-KQL translator for Microsoft Sentinel, enabling graph query capabiliti ## Overview -Yellowstone translates Cypher graph queries into KQL (Kusto Query Language) for Microsoft Sentinel. Security analysts can use familiar graph query syntax to investigate relationships between entities like users, devices, and security events. +Yellowstone translates graph queries (Cypher and Gremlin) into KQL (Kusto Query Language) for Microsoft Sentinel. Security analysts can use familiar graph query syntax to investigate relationships between entities like users, devices, and security events. + +**Supported Languages**: Cypher, Gremlin +**Status**: Core translation functional for both languages. ## Quick Start @@ -30,17 +33,22 @@ pip install -e . ### Basic Usage ```python -from yellowstone.parser import parse_cypher -from yellowstone.translator import CypherToKQLTranslator +from yellowstone.models import CypherQuery, TranslationContext +from yellowstone.main_translator import CypherTranslator + +# Works with both Cypher and Gremlin +cypher_query = "MATCH (u:User) WHERE u.age > 25 RETURN u.name" +gremlin_query = "g.V().hasLabel('User').has('age',gt(25)).values('name')" -# Parse Cypher query -cypher = "MATCH (u:User)-[:LOGGED_IN]->(d:Device) WHERE u.age > 25 RETURN u.name" -ast = parse_cypher(cypher) +translator = CypherTranslator() +context = TranslationContext(user_id="analyst", tenant_id="org", permissions=[]) -# Translate to KQL -translator = CypherToKQLTranslator() -result = translator.translate(ast) +# Translate Cypher +result = translator.translate(CypherQuery(query=cypher_query), context) +print(result.query) +# Translate Gremlin (automatically detected) +result = translator.translate(CypherQuery(query=gremlin_query), context) print(result.query) ``` diff --git a/src/yellowstone/gremlin/README.md b/src/yellowstone/gremlin/README.md new file mode 100644 index 0000000..3c85b24 --- /dev/null +++ b/src/yellowstone/gremlin/README.md @@ -0,0 +1,406 @@ +# Gremlin Module + +Complete Gremlin query parsing and translation for Neptune Analytics compatibility. + +## Overview + +This module provides: +1. **Gremlin Parser**: Converts Gremlin query strings to AST structures +2. **Cypher Bridge**: Translates Gremlin AST to Cypher query AST for Neptune Analytics + +## Module Structure + +``` +gremlin/ +├── __init__.py # Public interface +├── ast.py # Gremlin AST data structures (NEW) +├── parser.py # Gremlin query parser (NEW) +├── ast_nodes.py # Legacy Gremlin AST structures +├── cypher_bridge.py # Translation logic +├── README.md # This file +├── examples/ +│ └── basic_usage.py # Parser usage examples (NEW) +└── tests/ + ├── __init__.py + ├── test_parser.py # Parser tests (NEW - 48 tests) + └── test_cypher_bridge.py # Bridge tests +``` + +--- + +# Part 1: Gremlin Parser + +## Quick Start + +```python +from yellowstone.gremlin import parse_gremlin + +# Parse a Gremlin query string +traversal = parse_gremlin("g.V().hasLabel('Person').out('KNOWS').values('name')") + +# Access parsed steps +for step in traversal.steps: + print(f"{step.step_type}: {step}") +``` + +## Supported Gremlin Steps + +### Source Steps +- `V()` - All vertices +- `V(id)` - Specific vertex +- `E()` - All edges +- `E(id)` - Specific edge + +### Filter Steps +- `hasLabel('label')` - Filter by label +- `has('property', value)` - Filter by property value +- `has('property', predicate)` - Filter by predicate (gt, lt, eq, etc.) +- `hasId('id')`, `hasKey('key')`, `hasValue('value')` + +### Traversal Steps +- `out('label')`, `in('label')`, `both('label')` - Navigate relationships +- `outE()`, `inE()`, `bothE()` - Navigate to edges +- `outV()`, `inV()`, `bothV()`, `otherV()` - Navigate from edges to vertices + +### Projection Steps +- `values('prop1', 'prop2')` - Property values +- `valueMap()` - All properties as map +- `properties('prop')` - Property objects +- `elementMap()` - Full element with properties + +### Modifier Steps +- `limit(n)` - Limit results +- `order()` - Order results +- `count()` - Count results +- `dedup()` - Remove duplicates + +### Predicates +- `gt(value)`, `gte(value)`, `lt(value)`, `lte(value)` - Comparisons +- `eq(value)`, `neq(value)` - Equality +- `within(list)`, `without(list)` - List membership + +## Parser Examples + +### Basic Query +```python +query = "g.V().hasLabel('Person')" +traversal = parse_gremlin(query) +# Result: [VertexStep(), FilterStep(hasLabel, 'Person')] +``` + +### With Predicates +```python +query = "g.V().hasLabel('Person').has('age', gt(30))" +traversal = parse_gremlin(query) + +filter_step = traversal.steps[2] +print(filter_step.predicate.operator) # 'gt' +print(filter_step.predicate.value.value) # 30 +``` + +### Complex Multi-Step +```python +query = "g.V().hasLabel('Person').out('knows').dedup().limit(10).values('name')" +traversal = parse_gremlin(query) +# Result: 6 steps with filters, traversal, dedup, limit, projection +``` + +## Parser Testing + +Run 48 comprehensive tests with 85% coverage: + +```bash +uv run pytest src/yellowstone/gremlin/tests/test_parser.py -v +``` + +Test coverage includes: +- Basic parsing (6 tests) +- Filter steps (8 tests) +- Traversal steps (7 tests) +- Projection steps (6 tests) +- Modifier steps (4 tests) +- Complex queries (6 tests) +- Quote handling (2 tests) +- Whitespace (2 tests) +- Error handling (4 tests) +- String representation (3 tests) + +## Parser Examples Script + +Run comprehensive examples: + +```bash +uv run python -m yellowstone.gremlin.examples.basic_usage +``` + +--- + +# Part 2: Gremlin-to-Cypher Bridge + +## Translation Strategy + +The bridge uses a multi-phase translation approach: + +### 1. Collect Information +Scans all Gremlin steps to understand query intent: +- **VertexStep/EdgeStep** → Starting point identification +- **FilterStep** → Property filters and label constraints +- **TraversalStep** → Navigation patterns (out/in/both) +- **ProjectionStep** → Return projections (values) +- **LimitStep/OrderStep** → Result modifiers + +### 2. Build MATCH Clause +Converts traversal patterns to Cypher path patterns: +- `VertexStep()` + `hasLabel('User')` → `(v:User)` +- `out('OWNS')` → `-[:OWNS]->` +- Chains multiple traversals → `(v0)-[:REL1]->(v1)-[:REL2]->(v2)` + +### 3. Build WHERE Clause +Converts filter steps to Cypher predicates: +- `has('prop', value)` → `v.prop = value` +- Multiple filters → Combined with AND logic + +### 4. Build RETURN Clause +Converts projections to Cypher return items: +- No projection → `RETURN v` +- `values('name')` → `RETURN v.name` +- With order/limit → Adds ORDER BY and LIMIT modifiers + +## Public Interface + +### Main Function + +```python +from yellowstone.gremlin.cypher_bridge import translate_gremlin_to_cypher +from yellowstone.gremlin.ast_nodes import GremlinTraversal + +def translate_gremlin_to_cypher(traversal: GremlinTraversal) -> Query: + """ + Translate a Gremlin traversal to a Cypher query AST. + + Args: + traversal: The Gremlin traversal to translate + + Returns: + Query object representing the equivalent Cypher query + + Raises: + TranslationError: If traversal cannot be translated + UnsupportedPatternError: If traversal uses unsupported patterns + """ +``` + +### Exception Classes + +```python +class TranslationError(Exception): + """Raised when Gremlin traversal cannot be translated to Cypher.""" + +class UnsupportedPatternError(TranslationError): + """Raised when encountering unsupported Gremlin patterns.""" +``` + +## Usage Examples + +### Basic Query + +```python +from yellowstone.gremlin.ast_nodes import GremlinTraversal, VertexStep, FilterStep +from yellowstone.gremlin.cypher_bridge import translate_gremlin_to_cypher + +# g.V().hasLabel('User') +traversal = GremlinTraversal(steps=[ + VertexStep(), + FilterStep(predicate='hasLabel', args=['User']) +]) + +query = translate_gremlin_to_cypher(traversal) +# Result: MATCH (v:User) RETURN v +``` + +### Query with Filters + +```python +# g.V().hasLabel('User').has('age', 30).has('active', True) +traversal = GremlinTraversal(steps=[ + VertexStep(), + FilterStep(predicate='hasLabel', args=['User']), + FilterStep(predicate='has', args=['age', 30]), + FilterStep(predicate='has', args=['active', True]) +]) + +query = translate_gremlin_to_cypher(traversal) +# Result: MATCH (v:User) WHERE v.age = 30 AND v.active = true RETURN v +``` + +### Query with Traversal + +```python +# g.V().hasLabel('User').out('OWNS').values('name') +traversal = GremlinTraversal(steps=[ + VertexStep(), + FilterStep(predicate='hasLabel', args=['User']), + TraversalStep(direction='out', edge_label='OWNS'), + ProjectionStep(type='values', properties=['name']) +]) + +query = translate_gremlin_to_cypher(traversal) +# Result: MATCH (v0:User)-[:OWNS]->(v1) RETURN v1.name +``` + +### Complex Query + +```python +# g.V().hasLabel('User').has('active', True) +# .out('OWNS').values('name').order().by('name').limit(10) +traversal = GremlinTraversal(steps=[ + VertexStep(), + FilterStep(predicate='hasLabel', args=['User']), + FilterStep(predicate='has', args=['active', True]), + TraversalStep(direction='out', edge_label='OWNS'), + ProjectionStep(type='values', properties=['name']), + OrderStep(property='name', ascending=True), + LimitStep(count=10) +]) + +query = translate_gremlin_to_cypher(traversal) +# Result: MATCH (v0:User)-[:OWNS]->(v1) +# WHERE v0.active = true +# RETURN v1.name ORDER BY v1.name ASC LIMIT 10 +``` + +## Supported Patterns (MVP) + +### Starting Points +- ✅ `V()` - All vertices +- ✅ `V(id)` - Specific vertex (ID ignored in MVP) +- ❌ `E()` - Edge starting point (not yet supported) + +### Filters +- ✅ `hasLabel('Label')` - Node label constraint +- ✅ `has('property', value)` - Property equality filter +- ❌ `where()` - Complex predicates (not yet supported) +- ❌ `is()` - Value comparison (not yet supported) + +### Traversals +- ✅ `out('LABEL')` - Outgoing relationships +- ✅ `in('LABEL')` - Incoming relationships +- ✅ `both('LABEL')` - Undirected relationships +- ✅ `out()` - Any outgoing relationship +- ❌ `outE()`, `inE()`, `bothE()` - Edge traversals (not yet supported) +- ❌ `inV()`, `outV()` - Vertex from edge (not yet supported) + +### Projections +- ✅ `values('prop1', 'prop2')` - Property values +- ❌ `project()` - Complex projections (not yet supported) +- ❌ `select()` - Variable selection (not yet supported) + +### Modifiers +- ✅ `limit(n)` - Result limit +- ✅ `order().by('prop')` - Result ordering (ascending/descending) +- ❌ `skip(n)` - Result offset (not yet supported) +- ❌ `dedup()` - Distinct results (not yet supported) + +## Mapping Reference + +| Gremlin Pattern | Cypher Equivalent | Example | +|----------------|-------------------|---------| +| `g.V()` | `MATCH (v)` | All vertices | +| `hasLabel('User')` | `(v:User)` | Label constraint | +| `has('age', 30)` | `WHERE v.age = 30` | Property filter | +| `out('OWNS')` | `-[:OWNS]->` | Outgoing relationship | +| `in('OWNS')` | `<-[:OWNS]-` | Incoming relationship | +| `both('KNOWS')` | `-[:KNOWS]-` | Undirected relationship | +| `values('name')` | `RETURN v.name` | Property projection | +| `limit(10)` | `LIMIT 10` | Result limit | +| `order().by('name')` | `ORDER BY v.name` | Result ordering | + +## Error Handling + +The bridge provides clear error messages for unsupported or invalid patterns: + +```python +# Empty traversal +traversal = GremlinTraversal(steps=[]) +# Raises: TranslationError("Empty traversal - no steps to translate") + +# Invalid starting step +traversal = GremlinTraversal(steps=[FilterStep(...)]) +# Raises: TranslationError("Traversal must start with V() or E()") + +# Unsupported pattern +traversal = GremlinTraversal(steps=[ + VertexStep(), + TraversalStep(direction='outE', edge_label='OWNS') +]) +# Raises: UnsupportedPatternError("Traversal direction 'outE' not yet supported") +``` + +## Testing + +Comprehensive test suite with 33 tests covering: +- Basic translations +- Filter translations +- Traversal translations +- Projection translations +- Modifiers (limit, order) +- Complex queries +- Error handling +- Edge cases + +Run tests: +```bash +uv run pytest src/yellowstone/gremlin/tests/test_cypher_bridge.py -v +``` + +## Design Principles + +### Zero-BS Implementation +- All functions work or don't exist +- Clear error messages for unsupported patterns +- No TODOs without implementation + +### Self-Contained Module +- All translation logic in one module +- Clear public interface via exceptions and main function +- Well-documented with examples + +### Contract-Based Testing +- Tests verify inputs/outputs match specifications +- No implementation detail testing +- Full coverage of supported patterns and error cases + +## Future Extensions + +Potential enhancements for future versions: +- Edge starting points (E()) +- Complex predicates (where(), is()) +- Edge traversals (outE(), inE(), inV(), outV()) +- Advanced projections (project(), select()) +- Aggregations (count(), sum(), etc.) +- Subqueries and unions +- Variable bindings across steps + +## Dependencies + +- `yellowstone.gremlin.ast_nodes` - Gremlin AST structures +- `yellowstone.parser.ast_nodes` - Cypher AST structures + +## Module Contract + +**Input**: `GremlinTraversal` object with list of `Step` objects + +**Output**: `Query` object representing equivalent Cypher query + +**Behavior**: +- Translates supported Gremlin patterns to Cypher AST +- Raises `TranslationError` for invalid traversals +- Raises `UnsupportedPatternError` for unsupported patterns +- Preserves query semantics during translation +- Generates valid Cypher AST structures + +**Guarantees**: +- All returned `Query` objects are valid and executable +- Variable names are unique and consistent (v0, v1, v2, ...) +- Path structures are validated before return +- All property values are correctly typed diff --git a/src/yellowstone/gremlin/__init__.py b/src/yellowstone/gremlin/__init__.py new file mode 100644 index 0000000..e4c2be0 --- /dev/null +++ b/src/yellowstone/gremlin/__init__.py @@ -0,0 +1,66 @@ +""" +Gremlin parser module for Yellowstone. + +This module provides a complete Gremlin query parser that converts query strings +into an Abstract Syntax Tree (AST) for further processing and translation. + +Main Components: + - GremlinParser: The main parser class + - parse_gremlin(): Convenience function for parsing queries + - AST Nodes: Complete set of step types and traversal containers + +Example: + >>> from yellowstone.gremlin import parse_gremlin + >>> query_str = "g.V().hasLabel('Person').out('KNOWS').values('name')" + >>> traversal = parse_gremlin(query_str) + >>> print(traversal.steps[0]) + V() +""" + +from .parser import GremlinParser, parse_gremlin, GremlinParseError, GremlinTokenizer +from .ast import ( + GremlinTraversal, + Step, + VertexStep, + EdgeStep, + FilterStep, + TraversalStep, + ProjectionStep, + LimitStep, + OrderStep, + CountStep, + DedupStep, + GremlinValue, + Predicate, +) +from .cypher_bridge import ( + translate_gremlin_to_cypher, + TranslationError, + UnsupportedPatternError, +) + +__all__ = [ + # Parser classes and functions + "GremlinParser", + "parse_gremlin", + "GremlinParseError", + "GremlinTokenizer", + # AST node classes + "GremlinTraversal", + "Step", + "VertexStep", + "EdgeStep", + "FilterStep", + "TraversalStep", + "ProjectionStep", + "LimitStep", + "OrderStep", + "CountStep", + "DedupStep", + "GremlinValue", + "Predicate", + # Bridge translation + "translate_gremlin_to_cypher", + "TranslationError", + "UnsupportedPatternError", +] diff --git a/src/yellowstone/gremlin/ast.py b/src/yellowstone/gremlin/ast.py new file mode 100644 index 0000000..14b9fca --- /dev/null +++ b/src/yellowstone/gremlin/ast.py @@ -0,0 +1,305 @@ +""" +Abstract Syntax Tree (AST) node definitions for Gremlin queries. + +This module defines the complete set of AST nodes used to represent parsed +Gremlin traversal structures. All nodes are Pydantic BaseModel subclasses for +validation, serialization, and type safety. +""" + +from typing import Any, Optional +from pydantic import BaseModel, Field + + +# ============================================================================ +# Base Step Node +# ============================================================================ + + +class Step(BaseModel): + """Base class for all Gremlin traversal steps. + + Every Gremlin step in a traversal chain extends this base class. + """ + + step_type: str = Field(description="The type of step (e.g., 'vertex', 'filter', 'traversal')") + + def __str__(self) -> str: + """Return string representation.""" + return f"{self.step_type}()" + + +# ============================================================================ +# Literal and Value Nodes +# ============================================================================ + + +class GremlinValue(BaseModel): + """Represents a value in a Gremlin query. + + Supports strings, numbers, booleans, and predicates. + + Attributes: + value: The actual value + value_type: The type of value ('string', 'number', 'boolean', 'predicate') + + Example: + >>> GremlinValue(value='marko', value_type='string') + GremlinValue(value='marko', value_type='string') + >>> GremlinValue(value=30, value_type='number') + GremlinValue(value=30, value_type='number') + """ + + value: Any = Field(description="The actual value") + value_type: str = Field( + description="Type of value: 'string', 'number', 'boolean', 'predicate'" + ) + + def __str__(self) -> str: + """Return string representation.""" + if self.value_type == "string": + return f"'{self.value}'" + return str(self.value) + + +class Predicate(BaseModel): + """Represents a Gremlin predicate like gt(30), eq('value'), etc. + + Attributes: + operator: The predicate operator (e.g., 'gt', 'lt', 'eq', 'neq') + value: The comparison value + + Example: + >>> Predicate(operator='gt', value=GremlinValue(value=30, value_type='number')) + """ + + operator: str = Field(description="Predicate operator (gt, lt, eq, neq, gte, lte)") + value: GremlinValue = Field(description="Value to compare against") + + def __str__(self) -> str: + """Return string representation.""" + return f"{self.operator}({self.value})" + + +# ============================================================================ +# Step Implementations +# ============================================================================ + + +class VertexStep(Step): + """Represents g.V() or g.V(id) step. + + Attributes: + vertex_id: Optional vertex ID to start from + + Example: + >>> VertexStep() # g.V() + >>> VertexStep(vertex_id='123') # g.V('123') + """ + + step_type: str = Field(default="vertex") + vertex_id: Optional[str] = Field(default=None, description="Optional vertex ID") + + def __str__(self) -> str: + """Return string representation.""" + if self.vertex_id: + return f"V('{self.vertex_id}')" + return "V()" + + +class EdgeStep(Step): + """Represents g.E() or g.E(id) step. + + Attributes: + edge_id: Optional edge ID to start from + + Example: + >>> EdgeStep() # g.E() + >>> EdgeStep(edge_id='456') # g.E('456') + """ + + step_type: str = Field(default="edge") + edge_id: Optional[str] = Field(default=None, description="Optional edge ID") + + def __str__(self) -> str: + """Return string representation.""" + if self.edge_id: + return f"E('{self.edge_id}')" + return "E()" + + +class FilterStep(Step): + """Represents filtering steps like hasLabel(), has(), hasId(). + + Attributes: + filter_type: Type of filter ('hasLabel', 'has', 'hasId', 'hasKey', 'hasValue') + property_name: Optional property name for has() filters + value: Optional value to filter by + predicate: Optional predicate for complex filtering + + Examples: + >>> FilterStep(filter_type='hasLabel', value=GremlinValue(value='Person', value_type='string')) + >>> FilterStep(filter_type='has', property_name='age', predicate=Predicate(operator='gt', value=...)) + """ + + step_type: str = Field(default="filter") + filter_type: str = Field(description="Type of filter (hasLabel, has, hasId, etc.)") + property_name: Optional[str] = Field(default=None, description="Property name for has() filters") + value: Optional[GremlinValue] = Field(default=None, description="Value to filter by") + predicate: Optional[Predicate] = Field(default=None, description="Predicate for complex filtering") + + def __str__(self) -> str: + """Return string representation.""" + if self.filter_type == "hasLabel": + return f"hasLabel({self.value})" + elif self.filter_type == "has" and self.predicate: + return f"has('{self.property_name}', {self.predicate})" + elif self.filter_type == "has" and self.value: + return f"has('{self.property_name}', {self.value})" + return f"{self.filter_type}()" + + +class TraversalStep(Step): + """Represents traversal steps like out(), in(), both(), outE(), inE(). + + Attributes: + direction: Direction of traversal ('out', 'in', 'both') + traversal_type: Type of traversal ('vertex', 'edge') + edge_label: Optional edge label to filter by + + Examples: + >>> TraversalStep(direction='out', traversal_type='vertex') # out() + >>> TraversalStep(direction='out', traversal_type='vertex', edge_label='KNOWS') # out('KNOWS') + >>> TraversalStep(direction='out', traversal_type='edge', edge_label='CREATED') # outE('CREATED') + """ + + step_type: str = Field(default="traversal") + direction: str = Field(description="Direction: 'out', 'in', or 'both'") + traversal_type: str = Field(description="Type: 'vertex' or 'edge'") + edge_label: Optional[str] = Field(default=None, description="Optional edge label") + + def __str__(self) -> str: + """Return string representation.""" + suffix = "E" if self.traversal_type == "edge" else "" + label_str = f"'{self.edge_label}'" if self.edge_label else "" + return f"{self.direction}{suffix}({label_str})" + + +class ProjectionStep(Step): + """Represents projection steps like values(), valueMap(), properties(). + + Attributes: + projection_type: Type of projection ('values', 'valueMap', 'properties', 'elementMap') + property_names: Optional list of property names to project + + Examples: + >>> ProjectionStep(projection_type='values', property_names=['name']) # values('name') + >>> ProjectionStep(projection_type='valueMap') # valueMap() + """ + + step_type: str = Field(default="projection") + projection_type: str = Field(description="Type: 'values', 'valueMap', 'properties', 'elementMap'") + property_names: list[str] = Field(default_factory=list, description="Property names to project") + + def __str__(self) -> str: + """Return string representation.""" + if self.property_names: + props_str = ", ".join(f"'{p}'" for p in self.property_names) + return f"{self.projection_type}({props_str})" + return f"{self.projection_type}()" + + +class LimitStep(Step): + """Represents limit() step. + + Attributes: + count: Number of results to limit to + + Example: + >>> LimitStep(count=10) # limit(10) + """ + + step_type: str = Field(default="limit") + count: int = Field(description="Number of results to limit") + + def __str__(self) -> str: + """Return string representation.""" + return f"limit({self.count})" + + +class OrderStep(Step): + """Represents order() step. + + Attributes: + order_by: Optional property to order by + order: Order direction ('asc' or 'desc') + + Example: + >>> OrderStep(order_by='name', order='asc') # order().by('name', asc) + """ + + step_type: str = Field(default="order") + order_by: Optional[str] = Field(default=None, description="Property to order by") + order: str = Field(default="asc", description="Order direction: 'asc' or 'desc'") + + def __str__(self) -> str: + """Return string representation.""" + if self.order_by: + return f"order().by('{self.order_by}', {self.order})" + return "order()" + + +class CountStep(Step): + """Represents count() step. + + Example: + >>> CountStep() # count() + """ + + step_type: str = Field(default="count") + + def __str__(self) -> str: + """Return string representation.""" + return "count()" + + +class DedupStep(Step): + """Represents dedup() step for removing duplicates. + + Example: + >>> DedupStep() # dedup() + """ + + step_type: str = Field(default="dedup") + + def __str__(self) -> str: + """Return string representation.""" + return "dedup()" + + +# ============================================================================ +# Traversal Container +# ============================================================================ + + +class GremlinTraversal(BaseModel): + """Represents a complete Gremlin traversal query. + + This is the root AST node for any parsed Gremlin query. + + Attributes: + steps: List of traversal steps in order + + Example: + >>> GremlinTraversal(steps=[ + ... VertexStep(), + ... FilterStep(filter_type='hasLabel', value=GremlinValue(value='Person', value_type='string')), + ... ProjectionStep(projection_type='values', property_names=['name']) + ... ]) + """ + + steps: list[Step] = Field(description="Ordered list of traversal steps") + + def __str__(self) -> str: + """Return string representation of the traversal.""" + steps_str = ".".join(str(step) for step in self.steps) + return f"g.{steps_str}" diff --git a/src/yellowstone/gremlin/cypher_bridge.py b/src/yellowstone/gremlin/cypher_bridge.py new file mode 100644 index 0000000..7116ed4 --- /dev/null +++ b/src/yellowstone/gremlin/cypher_bridge.py @@ -0,0 +1,512 @@ +""" +Gremlin-to-Cypher Bridge Module + +Translates Gremlin AST (from gremlin.ast_nodes) to Cypher AST (from parser.ast_nodes). + +This module provides the core translation logic from Gremlin graph traversals to +equivalent Cypher queries, enabling Gremlin clients to work with Neptune Analytics. + +Translation Strategy: +1. Collect Information - Scan all steps to understand query intent +2. Build MATCH Clause - Convert traversal patterns to Cypher path patterns +3. Build WHERE Clause - Convert filter steps to Cypher predicates +4. Build RETURN Clause - Convert projection steps to Cypher return items + +Example: + >>> from yellowstone.gremlin.parser import parse_gremlin + >>> from yellowstone.gremlin.cypher_bridge import translate_gremlin_to_cypher + >>> + >>> # g.V().hasLabel('User').has('age', 30) + >>> traversal = parse_gremlin("g.V().hasLabel('User').has('age',30)") + >>> + >>> query = translate_gremlin_to_cypher(traversal) + >>> # Returns: MATCH (v:User) WHERE v.age = 30 RETURN v +""" + +from typing import Any, Optional + +from yellowstone.gremlin.ast import ( + GremlinTraversal, + Step, + VertexStep, + EdgeStep, + FilterStep, + TraversalStep, + ProjectionStep, + LimitStep, + OrderStep, +) + +from yellowstone.parser.ast_nodes import ( + Query, + MatchClause, + WhereClause, + ReturnClause, + PathExpression, + NodePattern, + RelationshipPattern, + Identifier, + Literal, + Property, +) + + +class TranslationError(Exception): + """Raised when Gremlin traversal cannot be translated to Cypher.""" + pass + + +class UnsupportedPatternError(TranslationError): + """Raised when encountering unsupported Gremlin patterns.""" + pass + + +class TranslationContext: + """ + Maintains state during translation from Gremlin to Cypher. + + Tracks variables, labels, filters, and other elements needed to build + the Cypher query AST. + + Attributes: + node_counter: Counter for generating unique node variable names (v0, v1, v2, ...) + current_variable: The currently active node variable + node_labels: Dictionary mapping variables to their labels + filters: List of filter conditions to become WHERE clause + traversal_chain: List of nodes and relationships for MATCH pattern + projection: Projection step (values, project, select) if present + limit: Limit value if present + order_by: Order specification if present + """ + + def __init__(self): + self.node_counter = 0 + self.current_variable: Optional[str] = None + self.node_labels: dict[str, str] = {} + self.filters: list[dict[str, Any]] = [] + self.traversal_chain: list[Any] = [] # Nodes and relationships + self.projection: Optional[ProjectionStep] = None + self.limit: Optional[int] = None + self.order_by: Optional[OrderStep] = None + + def new_variable(self) -> str: + """Generate a new unique node variable name.""" + var = f"v{self.node_counter}" + self.node_counter += 1 + self.current_variable = var + return var + + def get_current_variable(self) -> str: + """Get the current active variable, creating one if needed.""" + if self.current_variable is None: + return self.new_variable() + return self.current_variable + + +def translate_gremlin_to_cypher(traversal: GremlinTraversal) -> Query: + """ + Translate a Gremlin traversal to a Cypher query AST. + + Args: + traversal: The Gremlin traversal to translate + + Returns: + A Query object representing the equivalent Cypher query + + Raises: + TranslationError: If the traversal cannot be translated + UnsupportedPatternError: If the traversal uses unsupported patterns + + Example: + >>> traversal = GremlinTraversal(steps=[ + ... VertexStep(), + ... FilterStep(predicate='hasLabel', args=['Person']), + ... FilterStep(predicate='has', args=['name', 'John']) + ... ]) + >>> query = translate_gremlin_to_cypher(traversal) + """ + if not traversal.steps: + raise TranslationError("Empty traversal - no steps to translate") + + # Validate starting step + first_step = traversal.steps[0] + if not isinstance(first_step, (VertexStep, EdgeStep)): + raise TranslationError( + f"Traversal must start with V() or E(), got {type(first_step).__name__}" + ) + + if isinstance(first_step, EdgeStep): + raise UnsupportedPatternError( + "Edge starting point E() not yet supported - start with V() instead" + ) + + # Create translation context + ctx = TranslationContext() + + # Process all steps to collect information + _collect_information(traversal.steps, ctx) + + # Build AST components + match_clause = _build_match_clause(ctx) + where_clause = _build_where_clause(ctx) + return_clause = _build_return_clause(ctx) + + # Construct final query + query = Query( + match_clause=match_clause, + where_clause=where_clause, + return_clause=return_clause + ) + + return query + + +def _collect_information(steps: list[Step], ctx: TranslationContext) -> None: + """ + Scan all steps to understand query intent and collect information. + + This first pass identifies: + - Starting point (VertexStep/EdgeStep) + - Labels from hasLabel filters + - Property filters from has() steps + - Traversal patterns from out/in/both steps + - Projections from values/project/select + - Modifiers like limit and order + + Args: + steps: List of Gremlin steps + ctx: Translation context to populate + """ + # Start with initial vertex + initial_var = ctx.new_variable() + + for i, step in enumerate(steps): + if isinstance(step, VertexStep): + # Initial vertex step - already handled + if i > 0: + raise UnsupportedPatternError( + "Multiple V() steps not supported - use single starting point" + ) + continue + + elif isinstance(step, FilterStep): + _process_filter_step(step, ctx) + + elif isinstance(step, TraversalStep): + _process_traversal_step(step, ctx) + + elif isinstance(step, ProjectionStep): + if ctx.projection is not None: + raise UnsupportedPatternError( + "Multiple projection steps not supported" + ) + ctx.projection = step + + elif isinstance(step, LimitStep): + if ctx.limit is not None: + raise UnsupportedPatternError( + "Multiple limit steps not supported" + ) + ctx.limit = step.count + + elif isinstance(step, OrderStep): + if ctx.order_by is not None: + raise UnsupportedPatternError( + "Multiple order steps not supported" + ) + ctx.order_by = step + + else: + raise UnsupportedPatternError( + f"Unsupported step type: {type(step).__name__}" + ) + + +def _process_filter_step(step: FilterStep, ctx: TranslationContext) -> None: + """ + Process a FilterStep and update context. + + Handles: + - hasLabel('Label') -> adds label to current node + - has('property', value) -> adds filter condition + + Args: + step: The FilterStep to process + ctx: Translation context + """ + current_var = ctx.get_current_variable() + + if step.filter_type == 'hasLabel': + label = step.value.value if step.value else None + if not label: + raise TranslationError("hasLabel requires a label argument") + if not isinstance(label, str): + raise TranslationError(f"hasLabel argument must be string, got {type(label)}") + + # Store label for current variable + if current_var in ctx.node_labels: + raise UnsupportedPatternError( + f"Multiple labels on same node not yet supported (variable: {current_var})" + ) + ctx.node_labels[current_var] = label + + elif step.filter_type == 'has': + if not step.property_name: + raise TranslationError("has requires a property name") + property_name = step.property_name + value = step.value.value if step.value else None + if not isinstance(property_name, str): + raise TranslationError( + f"has property name must be string, got {type(property_name)}" + ) + + # Create filter condition + filter_condition = { + 'type': 'comparison', + 'operator': '=', + 'left': { + 'type': 'property', + 'variable': current_var, + 'property': property_name + }, + 'right': _value_to_literal_dict(value) + } + ctx.filters.append(filter_condition) + + else: + raise UnsupportedPatternError( + f"Filter predicate '{step.filter_type}' not yet supported. " + f"Supported: hasLabel, has" + ) + + +def _process_traversal_step(step: TraversalStep, ctx: TranslationContext) -> None: + """ + Process a TraversalStep and update context. + + Handles: + - out('LABEL') -> creates outgoing relationship pattern + - in('LABEL') -> creates incoming relationship pattern + - both('LABEL') -> creates undirected relationship pattern + + Args: + step: The TraversalStep to process + ctx: Translation context + """ + if step.direction not in ['out', 'in', 'both']: + raise UnsupportedPatternError( + f"Traversal direction '{step.direction}' not yet supported. " + f"Supported: out, in, both" + ) + + # Save current node before creating relationship + source_var = ctx.get_current_variable() + + # Create relationship pattern + rel_type = step.edge_label + direction_map = { + 'out': 'out', + 'in': 'in', + 'both': 'both' + } + + relationship = RelationshipPattern( + variable=None, # Don't bind relationship to variable for now + relationship_type=Identifier(name=rel_type) if rel_type else None, + directed=(step.direction != 'both'), + direction=direction_map[step.direction] + ) + + # Create new target node + target_var = ctx.new_variable() + + # Store pattern for later MATCH construction + ctx.traversal_chain.append({ + 'source': source_var, + 'relationship': relationship, + 'target': target_var + }) + + +def _value_to_literal_dict(value: Any) -> dict[str, Any]: + """ + Convert a Python value to a literal dictionary representation. + + Args: + value: Python value to convert + + Returns: + Dictionary with 'type', 'value', and 'value_type' keys + """ + if isinstance(value, str): + return { + 'type': 'literal', + 'value': value, + 'value_type': 'string' + } + elif isinstance(value, bool): + return { + 'type': 'literal', + 'value': value, + 'value_type': 'boolean' + } + elif isinstance(value, (int, float)): + return { + 'type': 'literal', + 'value': value, + 'value_type': 'number' + } + elif value is None: + return { + 'type': 'literal', + 'value': None, + 'value_type': 'null' + } + else: + raise TranslationError( + f"Unsupported value type: {type(value).__name__}" + ) + + +def _build_match_clause(ctx: TranslationContext) -> MatchClause: + """ + Build the MATCH clause from collected information. + + Creates path expressions from traversal chains, including node patterns + with labels and relationship patterns. + + Args: + ctx: Translation context with collected information + + Returns: + MatchClause containing the path patterns + """ + if not ctx.traversal_chain: + # Simple case: single node with optional label + var = ctx.get_current_variable() + label = ctx.node_labels.get(var) + + node = NodePattern( + variable=Identifier(name=var), + labels=[Identifier(name=label)] if label else [] + ) + + path = PathExpression( + nodes=[node], + relationships=[] + ) + + return MatchClause(paths=[path], optional=False) + + # Complex case: traversal chain with relationships + nodes = [] + relationships = [] + + # Add first node + first_var = f"v0" + first_label = ctx.node_labels.get(first_var) + nodes.append(NodePattern( + variable=Identifier(name=first_var), + labels=[Identifier(name=first_label)] if first_label else [] + )) + + # Add relationships and subsequent nodes + for chain_item in ctx.traversal_chain: + relationships.append(chain_item['relationship']) + + target_var = chain_item['target'] + target_label = ctx.node_labels.get(target_var) + nodes.append(NodePattern( + variable=Identifier(name=target_var), + labels=[Identifier(name=target_label)] if target_label else [] + )) + + path = PathExpression( + nodes=nodes, + relationships=relationships + ) + + # Validate path structure + path.validate_structure() + + return MatchClause(paths=[path], optional=False) + + +def _build_where_clause(ctx: TranslationContext) -> Optional[WhereClause]: + """ + Build the WHERE clause from collected filter conditions. + + Args: + ctx: Translation context with collected filters + + Returns: + WhereClause if filters exist, None otherwise + """ + if not ctx.filters: + return None + + if len(ctx.filters) == 1: + # Single condition + return WhereClause(conditions=ctx.filters[0]) + + # Multiple conditions - combine with AND + conditions = { + 'type': 'logical', + 'operator': 'AND', + 'operands': ctx.filters + } + + return WhereClause(conditions=conditions) + + +def _build_return_clause(ctx: TranslationContext) -> ReturnClause: + """ + Build the RETURN clause from projection step or default to returning current variable. + + Args: + ctx: Translation context with optional projection + + Returns: + ReturnClause with appropriate return items + """ + return_items = [] + + if ctx.projection: + current_var = ctx.get_current_variable() + + if ctx.projection.projection_type == 'values': + # values('prop1', 'prop2') -> RETURN v.prop1, v.prop2 + for prop_name in ctx.projection.property_names: + return_items.append(Property( + variable=Identifier(name=current_var), + property_name=Identifier(name=prop_name) + )) + else: + raise UnsupportedPatternError( + f"Projection type '{ctx.projection.projection_type}' not yet supported. " + f"Supported: values" + ) + else: + # No projection - return current variable + current_var = ctx.get_current_variable() + return_items.append(Identifier(name=current_var)) + + # Build order_by if present + order_by_spec = None + if ctx.order_by: + current_var = ctx.get_current_variable() + order_by_spec = [{ + 'expression': { + 'type': 'property', + 'variable': current_var, + 'property': ctx.order_by.order_by + }, + 'direction': 'ASC' if ctx.order_by.order == 'asc' else 'DESC' + }] + + return ReturnClause( + items=return_items, + distinct=False, + order_by=order_by_spec, + limit=ctx.limit + ) diff --git a/src/yellowstone/gremlin/examples/basic_usage.py b/src/yellowstone/gremlin/examples/basic_usage.py new file mode 100644 index 0000000..f46b36e --- /dev/null +++ b/src/yellowstone/gremlin/examples/basic_usage.py @@ -0,0 +1,212 @@ +""" +Basic Usage Examples for Gremlin-to-Cypher Bridge + +Demonstrates how to use the cypher_bridge module to translate +Gremlin traversals to Cypher queries. +""" + +from yellowstone.gremlin.ast_nodes import ( + GremlinTraversal, + VertexStep, + FilterStep, + TraversalStep, + ProjectionStep, + LimitStep, + OrderStep, +) +from yellowstone.gremlin.cypher_bridge import translate_gremlin_to_cypher + + +def example_simple_query(): + """Example 1: Simple vertex query with label""" + print("=" * 60) + print("Example 1: g.V().hasLabel('User')") + print("=" * 60) + + traversal = GremlinTraversal(steps=[ + VertexStep(), + FilterStep(predicate='hasLabel', args=['User']) + ]) + + query = translate_gremlin_to_cypher(traversal) + + print(f"Match Clause: {query.match_clause.paths[0]}") + print(f"Where Clause: {query.where_clause}") + print(f"Return Items: {query.return_clause.items}") + print() + + +def example_with_filters(): + """Example 2: Query with property filters""" + print("=" * 60) + print("Example 2: g.V().hasLabel('User').has('age', 30).has('active', True)") + print("=" * 60) + + traversal = GremlinTraversal(steps=[ + VertexStep(), + FilterStep(predicate='hasLabel', args=['User']), + FilterStep(predicate='has', args=['age', 30]), + FilterStep(predicate='has', args=['active', True]) + ]) + + query = translate_gremlin_to_cypher(traversal) + + print(f"Match: (v0:User)") + print(f"Where: v0.age = 30 AND v0.active = true") + print(f"Return: v0") + print(f"\nWhere Clause Conditions: {query.where_clause.conditions}") + print() + + +def example_with_traversal(): + """Example 3: Query with relationship traversal""" + print("=" * 60) + print("Example 3: g.V().hasLabel('User').out('OWNS')") + print("=" * 60) + + traversal = GremlinTraversal(steps=[ + VertexStep(), + FilterStep(predicate='hasLabel', args=['User']), + TraversalStep(direction='out', edge_label='OWNS') + ]) + + query = translate_gremlin_to_cypher(traversal) + + path = query.match_clause.paths[0] + print(f"Match Pattern: (v0:User)-[:OWNS]->(v1)") + print(f"Nodes: {len(path.nodes)}") + print(f"Relationships: {len(path.relationships)}") + print(f"Source: {path.nodes[0].variable.name}, Labels: {[l.name for l in path.nodes[0].labels]}") + print(f"Relationship: {path.relationships[0].relationship_type.name}, Direction: {path.relationships[0].direction}") + print(f"Target: {path.nodes[1].variable.name}") + print(f"Return: {query.return_clause.items[0].name}") + print() + + +def example_with_projection(): + """Example 4: Query with property projection""" + print("=" * 60) + print("Example 4: g.V().hasLabel('User').values('name', 'email')") + print("=" * 60) + + traversal = GremlinTraversal(steps=[ + VertexStep(), + FilterStep(predicate='hasLabel', args=['User']), + ProjectionStep(type='values', properties=['name', 'email']) + ]) + + query = translate_gremlin_to_cypher(traversal) + + print(f"Match: (v0:User)") + print(f"Return: v0.name, v0.email") + print(f"\nReturn Items:") + for item in query.return_clause.items: + print(f" - {item.variable.name}.{item.property_name.name}") + print() + + +def example_with_modifiers(): + """Example 5: Query with order and limit""" + print("=" * 60) + print("Example 5: g.V().hasLabel('User').order().by('name').limit(10)") + print("=" * 60) + + traversal = GremlinTraversal(steps=[ + VertexStep(), + FilterStep(predicate='hasLabel', args=['User']), + OrderStep(property='name', ascending=True), + LimitStep(count=10) + ]) + + query = translate_gremlin_to_cypher(traversal) + + print(f"Match: (v0:User)") + print(f"Return: v0") + print(f"Order By: {query.return_clause.order_by[0]['expression']['property']} {query.return_clause.order_by[0]['direction']}") + print(f"Limit: {query.return_clause.limit}") + print() + + +def example_complex_query(): + """Example 6: Complex query combining multiple features""" + print("=" * 60) + print("Example 6: Complex Query") + print("g.V().hasLabel('User').has('active', True)") + print(" .out('OWNS').values('name').order().by('name').limit(5)") + print("=" * 60) + + traversal = GremlinTraversal(steps=[ + VertexStep(), + FilterStep(predicate='hasLabel', args=['User']), + FilterStep(predicate='has', args=['active', True]), + TraversalStep(direction='out', edge_label='OWNS'), + ProjectionStep(type='values', properties=['name']), + OrderStep(property='name', ascending=True), + LimitStep(count=5) + ]) + + query = translate_gremlin_to_cypher(traversal) + + path = query.match_clause.paths[0] + print(f"\nTranslated to:") + print(f"Match: (v0:User)-[:OWNS]->(v1)") + print(f"Where: v0.active = true") + print(f"Return: v1.name ORDER BY v1.name ASC LIMIT 5") + print(f"\nStructure:") + print(f" - Path nodes: {len(path.nodes)}") + print(f" - Relationships: {len(path.relationships)}") + print(f" - Where conditions present: {query.where_clause is not None}") + print(f" - Return items: {len(query.return_clause.items)}") + print(f" - Order by: {query.return_clause.order_by is not None}") + print(f" - Limit: {query.return_clause.limit}") + print() + + +def example_chained_traversals(): + """Example 7: Chained relationship traversals""" + print("=" * 60) + print("Example 7: g.V().out('OWNS').out('HAS_PART').values('serial')") + print("=" * 60) + + traversal = GremlinTraversal(steps=[ + VertexStep(), + TraversalStep(direction='out', edge_label='OWNS'), + TraversalStep(direction='out', edge_label='HAS_PART'), + ProjectionStep(type='values', properties=['serial']) + ]) + + query = translate_gremlin_to_cypher(traversal) + + path = query.match_clause.paths[0] + print(f"Match Pattern: (v0)-[:OWNS]->(v1)-[:HAS_PART]->(v2)") + print(f"Return: v2.serial") + print(f"\nPath Structure:") + for i, node in enumerate(path.nodes): + print(f" Node {i}: {node.variable.name}") + if i < len(path.relationships): + rel = path.relationships[i] + print(f" Rel {i}: {rel.relationship_type.name if rel.relationship_type else 'ANY'} ({rel.direction})") + print() + + +def main(): + """Run all examples""" + print("\n" + "=" * 60) + print("Gremlin-to-Cypher Bridge - Basic Usage Examples") + print("=" * 60 + "\n") + + example_simple_query() + example_with_filters() + example_with_traversal() + example_with_projection() + example_with_modifiers() + example_complex_query() + example_chained_traversals() + + print("=" * 60) + print("All examples completed successfully!") + print("=" * 60) + + +if __name__ == '__main__': + main() diff --git a/src/yellowstone/gremlin/parser.py b/src/yellowstone/gremlin/parser.py new file mode 100644 index 0000000..dda2611 --- /dev/null +++ b/src/yellowstone/gremlin/parser.py @@ -0,0 +1,531 @@ +""" +Gremlin query parser for Yellowstone. + +This module provides a Gremlin query parser that converts query strings +into an Abstract Syntax Tree (AST) for further processing and translation. + +The parser uses a simple tokenization approach followed by method chain parsing. + +Example: + >>> from yellowstone.gremlin import parse_gremlin + >>> query_str = "g.V().hasLabel('Person').out('KNOWS').values('name')" + >>> traversal = parse_gremlin(query_str) + >>> print(traversal.steps[0]) + V() +""" + +import re +from typing import Any, Optional +from .ast import ( + GremlinTraversal, + Step, + VertexStep, + EdgeStep, + FilterStep, + TraversalStep, + ProjectionStep, + LimitStep, + OrderStep, + CountStep, + DedupStep, + GremlinValue, + Predicate, +) + + +class GremlinParseError(Exception): + """Exception raised when Gremlin query parsing fails.""" + + pass + + +class GremlinTokenizer: + """Tokenizes Gremlin query strings into parseable tokens. + + Splits the query into method names and arguments while handling: + - Quoted strings (single and double quotes) + - Numeric values + - Parentheses + - Method chaining (dots) + """ + + def __init__(self, query: str): + """Initialize tokenizer with query string. + + Args: + query: Gremlin query string to tokenize + """ + self.query = query.strip() + self.position = 0 + self.length = len(self.query) + + def tokenize(self) -> list[dict[str, Any]]: + """Tokenize the query into method calls. + + Returns: + List of token dictionaries with 'method' and 'args' keys + + Example: + >>> tokenizer = GremlinTokenizer("g.V().hasLabel('Person')") + >>> tokenizer.tokenize() + [{'method': 'g', 'args': []}, {'method': 'V', 'args': []}, + {'method': 'hasLabel', 'args': ['Person']}] + """ + tokens = [] + + while self.position < self.length: + self._skip_whitespace() + + if self.position >= self.length: + break + + # Skip dots + if self.query[self.position] == ".": + self.position += 1 + continue + + # Read method name + method_name = self._read_identifier() + if not method_name: + raise GremlinParseError( + f"Expected method name at position {self.position}" + ) + + # Check for opening parenthesis + self._skip_whitespace() + args = [] + if self.position < self.length and self.query[self.position] == "(": + args = self._read_arguments() + + tokens.append({"method": method_name, "args": args}) + + return tokens + + def _skip_whitespace(self): + """Skip whitespace characters.""" + while self.position < self.length and self.query[self.position].isspace(): + self.position += 1 + + def _read_identifier(self) -> str: + """Read an identifier (method name or variable). + + Returns: + The identifier string + """ + start = self.position + while self.position < self.length: + char = self.query[self.position] + if char.isalnum() or char == "_": + self.position += 1 + else: + break + return self.query[start : self.position] + + def _read_arguments(self) -> list[Any]: + """Read arguments from parentheses. + + Returns: + List of argument values (strings, numbers, or nested method calls) + """ + if self.query[self.position] != "(": + raise GremlinParseError( + f"Expected '(' at position {self.position}, got '{self.query[self.position]}'" + ) + + self.position += 1 # Skip opening parenthesis + args = [] + + while self.position < self.length: + self._skip_whitespace() + + if self.query[self.position] == ")": + self.position += 1 # Skip closing parenthesis + break + + # Read argument value + arg = self._read_argument_value() + args.append(arg) + + self._skip_whitespace() + + # Check for comma or closing parenthesis + if self.position < self.length: + if self.query[self.position] == ",": + self.position += 1 # Skip comma + elif self.query[self.position] == ")": + continue + else: + # Could be a nested method call + if self.query[self.position] == "(": + continue + + return args + + def _read_argument_value(self) -> Any: + """Read a single argument value. + + Returns: + The argument value (string, number, or dict for predicates) + """ + self._skip_whitespace() + + if self.position >= self.length: + raise GremlinParseError("Unexpected end of input while reading argument") + + char = self.query[self.position] + + # String literal + if char in ("'", '"'): + return self._read_string(char) + + # Number literal + if char.isdigit() or char == "-": + return self._read_number() + + # Predicate or nested method (like gt(30)) + if char.isalpha(): + method_name = self._read_identifier() + if self.position < self.length and self.query[self.position] == "(": + nested_args = self._read_arguments() + # Return as predicate dict + return {"predicate": method_name, "args": nested_args} + # Boolean literals + if method_name.lower() == "true": + return True + if method_name.lower() == "false": + return False + return method_name + + raise GremlinParseError( + f"Unexpected character '{char}' at position {self.position}" + ) + + def _read_string(self, quote_char: str) -> str: + """Read a quoted string. + + Args: + quote_char: The quote character (' or ") + + Returns: + The string value without quotes + """ + self.position += 1 # Skip opening quote + start = self.position + + while self.position < self.length: + if self.query[self.position] == quote_char: + value = self.query[start : self.position] + self.position += 1 # Skip closing quote + return value + if self.query[self.position] == "\\" and self.position + 1 < self.length: + self.position += 2 # Skip escaped character + else: + self.position += 1 + + raise GremlinParseError(f"Unterminated string starting at position {start - 1}") + + def _read_number(self) -> int | float: + """Read a numeric value. + + Returns: + Integer or float value + """ + start = self.position + has_dot = False + + if self.query[self.position] == "-": + self.position += 1 + + while self.position < self.length: + char = self.query[self.position] + if char.isdigit(): + self.position += 1 + elif char == "." and not has_dot: + has_dot = True + self.position += 1 + else: + break + + number_str = self.query[start : self.position] + return float(number_str) if has_dot else int(number_str) + + +class GremlinParser: + """Parser that converts tokenized Gremlin queries into AST. + + This parser handles common Gremlin traversal patterns and creates + appropriate Step objects for each method in the chain. + """ + + def __init__(self): + """Initialize the parser.""" + pass + + def parse(self, query: str) -> GremlinTraversal: + """Parse a Gremlin query string into a GremlinTraversal AST. + + Args: + query: Gremlin query string (e.g., "g.V().hasLabel('Person')") + + Returns: + GremlinTraversal AST object + + Raises: + GremlinParseError: If the query syntax is invalid + + Example: + >>> parser = GremlinParser() + >>> traversal = parser.parse("g.V().hasLabel('Person').values('name')") + >>> len(traversal.steps) + 3 + """ + # Tokenize the query + tokenizer = GremlinTokenizer(query) + tokens = tokenizer.tokenize() + + if not tokens: + raise GremlinParseError("Empty query") + + # First token should be 'g' (graph traversal source) + if tokens[0]["method"] != "g": + raise GremlinParseError( + f"Query must start with 'g', got '{tokens[0]['method']}'" + ) + + # Parse remaining tokens into steps + steps = [] + for token in tokens[1:]: + step = self._parse_step(token) + if step: + steps.append(step) + + if not steps: + raise GremlinParseError("Query must have at least one step after 'g'") + + return GremlinTraversal(steps=steps) + + def _parse_step(self, token: dict[str, Any]) -> Optional[Step]: + """Parse a single token into a Step object. + + Args: + token: Token dictionary with 'method' and 'args' + + Returns: + Step object or None if method is unknown + + Raises: + GremlinParseError: If step arguments are invalid + """ + method = token["method"] + args = token["args"] + + # Vertex step: V() + if method == "V": + vertex_id = args[0] if args else None + if vertex_id is not None and not isinstance(vertex_id, (str, int)): + raise GremlinParseError(f"V() expects string or int ID, got {type(vertex_id)}") + return VertexStep(vertex_id=str(vertex_id) if vertex_id is not None else None) + + # Edge step: E() + if method == "E": + edge_id = args[0] if args else None + if edge_id is not None and not isinstance(edge_id, (str, int)): + raise GremlinParseError(f"E() expects string or int ID, got {type(edge_id)}") + return EdgeStep(edge_id=str(edge_id) if edge_id is not None else None) + + # Filter steps + if method == "hasLabel": + if not args: + raise GremlinParseError("hasLabel() requires a label argument") + return FilterStep( + filter_type="hasLabel", + value=self._parse_value(args[0]), + ) + + if method == "has": + if not args: + raise GremlinParseError("has() requires at least one argument") + if len(args) == 1: + # has('propertyName') - property exists check + return FilterStep(filter_type="has", property_name=str(args[0])) + elif len(args) == 2: + # has('propertyName', value) or has('propertyName', predicate) + property_name = str(args[0]) + value_or_predicate = args[1] + + if isinstance(value_or_predicate, dict) and "predicate" in value_or_predicate: + # Predicate case: has('age', gt(30)) + predicate = self._parse_predicate(value_or_predicate) + return FilterStep( + filter_type="has", + property_name=property_name, + predicate=predicate, + ) + else: + # Value case: has('name', 'marko') + return FilterStep( + filter_type="has", + property_name=property_name, + value=self._parse_value(value_or_predicate), + ) + else: + raise GremlinParseError(f"has() accepts 1-2 arguments, got {len(args)}") + + if method == "hasId": + if not args: + raise GremlinParseError("hasId() requires an ID argument") + return FilterStep( + filter_type="hasId", + value=self._parse_value(args[0]), + ) + + if method == "hasKey": + if not args: + raise GremlinParseError("hasKey() requires a key argument") + return FilterStep( + filter_type="hasKey", + value=self._parse_value(args[0]), + ) + + if method == "hasValue": + if not args: + raise GremlinParseError("hasValue() requires a value argument") + return FilterStep( + filter_type="hasValue", + value=self._parse_value(args[0]), + ) + + # Traversal steps + if method in ("out", "in", "both"): + edge_label = args[0] if args else None + if edge_label is not None and not isinstance(edge_label, str): + raise GremlinParseError(f"{method}() expects string edge label, got {type(edge_label)}") + return TraversalStep( + direction=method, + traversal_type="vertex", + edge_label=edge_label, + ) + + if method in ("outE", "inE", "bothE"): + direction = method[:-1] # Remove 'E' suffix + edge_label = args[0] if args else None + if edge_label is not None and not isinstance(edge_label, str): + raise GremlinParseError(f"{method}() expects string edge label, got {type(edge_label)}") + return TraversalStep( + direction=direction, + traversal_type="edge", + edge_label=edge_label, + ) + + if method in ("outV", "inV", "bothV", "otherV"): + # These convert edges to vertices + direction_map = {"outV": "out", "inV": "in", "bothV": "both", "otherV": "other"} + return TraversalStep( + direction=direction_map[method], + traversal_type="vertex", + ) + + # Projection steps + if method in ("values", "valueMap", "properties", "elementMap"): + property_names = [str(arg) for arg in args if isinstance(arg, str)] + return ProjectionStep( + projection_type=method, + property_names=property_names, + ) + + # Limit step + if method == "limit": + if not args: + raise GremlinParseError("limit() requires a count argument") + if not isinstance(args[0], int): + raise GremlinParseError(f"limit() expects integer, got {type(args[0])}") + return LimitStep(count=args[0]) + + # Order step + if method == "order": + # Basic order() with no arguments + return OrderStep() + + if method == "by": + # This is a modulator for order(), but we'll handle it simply + # For now, skip it (would require more complex AST) + return None + + # Count step + if method == "count": + return CountStep() + + # Dedup step + if method == "dedup": + return DedupStep() + + # Unknown step - raise error + raise GremlinParseError(f"Unknown Gremlin step: {method}()") + + def _parse_value(self, value: Any) -> GremlinValue: + """Parse a value into a GremlinValue object. + + Args: + value: Raw value from tokenizer + + Returns: + GremlinValue object + """ + if isinstance(value, str): + return GremlinValue(value=value, value_type="string") + elif isinstance(value, bool): + return GremlinValue(value=value, value_type="boolean") + elif isinstance(value, (int, float)): + return GremlinValue(value=value, value_type="number") + else: + raise GremlinParseError(f"Unsupported value type: {type(value)}") + + def _parse_predicate(self, predicate_dict: dict[str, Any]) -> Predicate: + """Parse a predicate dictionary into a Predicate object. + + Args: + predicate_dict: Dictionary with 'predicate' and 'args' keys + + Returns: + Predicate object + + Raises: + GremlinParseError: If predicate is invalid + """ + operator = predicate_dict["predicate"] + args = predicate_dict.get("args", []) + + if not args: + raise GremlinParseError(f"Predicate {operator}() requires an argument") + + value = self._parse_value(args[0]) + + # Validate operator + valid_operators = {"gt", "gte", "lt", "lte", "eq", "neq", "within", "without"} + if operator not in valid_operators: + raise GremlinParseError( + f"Unknown predicate operator: {operator}. Valid: {valid_operators}" + ) + + return Predicate(operator=operator, value=value) + + +def parse_gremlin(query: str) -> GremlinTraversal: + """Convenience function to parse a Gremlin query string. + + Args: + query: Gremlin query string + + Returns: + GremlinTraversal AST object + + Raises: + GremlinParseError: If query syntax is invalid + + Example: + >>> traversal = parse_gremlin("g.V().hasLabel('Person').values('name')") + >>> len(traversal.steps) + 3 + """ + parser = GremlinParser() + return parser.parse(query) diff --git a/src/yellowstone/gremlin/tests/__init__.py b/src/yellowstone/gremlin/tests/__init__.py new file mode 100644 index 0000000..665667f --- /dev/null +++ b/src/yellowstone/gremlin/tests/__init__.py @@ -0,0 +1 @@ +"""Tests for Gremlin-to-Cypher translation.""" diff --git a/src/yellowstone/gremlin/tests/test_cypher_bridge.py b/src/yellowstone/gremlin/tests/test_cypher_bridge.py new file mode 100644 index 0000000..bffa008 --- /dev/null +++ b/src/yellowstone/gremlin/tests/test_cypher_bridge.py @@ -0,0 +1,575 @@ +""" +Tests for Gremlin-to-Cypher Bridge + +Tests verify that Gremlin traversals are correctly translated to Cypher query ASTs. +Focus is on contract fulfillment - inputs/outputs match specifications. +""" + +import pytest +from yellowstone.gremlin.ast import ( + GremlinTraversal, + VertexStep, + EdgeStep, + FilterStep, + TraversalStep, + ProjectionStep, + LimitStep, + OrderStep, + GremlinValue, +) + +from yellowstone.gremlin.cypher_bridge import ( + translate_gremlin_to_cypher, + TranslationError, + UnsupportedPatternError, +) + +from yellowstone.parser.ast_nodes import ( + Query, + MatchClause, + WhereClause, + ReturnClause, + Identifier, + NodePattern, + RelationshipPattern, + Property, +) + + +class TestBasicTranslations: + """Test simple, single-step translations.""" + + def test_simple_vertex_query(self): + """Test: g.V().hasLabel('User') -> MATCH (v:User) RETURN v""" + traversal = GremlinTraversal(steps=[ + VertexStep(), + FilterStep(filter_type='hasLabel', value=GremlinValue(value='User', value_type='string')) + ]) + + query = translate_gremlin_to_cypher(traversal) + + # Verify it's a Query + assert isinstance(query, Query) + + # Verify MATCH clause + assert query.match_clause is not None + assert len(query.match_clause.paths) == 1 + path = query.match_clause.paths[0] + assert len(path.nodes) == 1 + assert len(path.relationships) == 0 + + node = path.nodes[0] + assert node.variable.name == 'v0' + assert len(node.labels) == 1 + assert node.labels[0].name == 'User' + + # Verify no WHERE clause + assert query.where_clause is None + + # Verify RETURN clause + assert query.return_clause is not None + assert len(query.return_clause.items) == 1 + assert isinstance(query.return_clause.items[0], Identifier) + assert query.return_clause.items[0].name == 'v0' + + def test_vertex_without_label(self): + """Test: g.V() -> MATCH (v) RETURN v""" + traversal = GremlinTraversal(steps=[ + VertexStep() + ]) + + query = translate_gremlin_to_cypher(traversal) + + # Verify node has no labels + node = query.match_clause.paths[0].nodes[0] + assert len(node.labels) == 0 + assert node.variable.name == 'v0' + + # Still returns the variable + assert isinstance(query.return_clause.items[0], Identifier) + assert query.return_clause.items[0].name == 'v0' + + +class TestFilterTranslations: + """Test filter step translations.""" + + def test_has_filter_single_property(self): + """Test: g.V().hasLabel('User').has('age', 30) -> MATCH (v:User) WHERE v.age = 30 RETURN v""" + traversal = GremlinTraversal(steps=[ + VertexStep(), + FilterStep(filter_type='hasLabel', value=GremlinValue(value='User', value_type='string')), + FilterStep(filter_type='has', property_name='age', value=GremlinValue(value=30, value_type='number')) + ]) + + query = translate_gremlin_to_cypher(traversal) + + # Verify MATCH has label + node = query.match_clause.paths[0].nodes[0] + assert node.labels[0].name == 'User' + + # Verify WHERE clause exists + assert query.where_clause is not None + conditions = query.where_clause.conditions + + assert conditions['type'] == 'comparison' + assert conditions['operator'] == '=' + assert conditions['left']['variable'] == 'v0' + assert conditions['left']['property'] == 'age' + assert conditions['right']['value'] == 30 + assert conditions['right']['value_type'] == 'number' + + def test_has_filter_string_value(self): + """Test: g.V().hasLabel('User').has('name', 'John')""" + traversal = GremlinTraversal(steps=[ + VertexStep(), + FilterStep(filter_type='hasLabel', value=GremlinValue(value='User', value_type='string')), + FilterStep(filter_type='has', property_name='name', value=GremlinValue(value='John', value_type='string')) + ]) + + query = translate_gremlin_to_cypher(traversal) + + conditions = query.where_clause.conditions + assert conditions['right']['value'] == 'John' + assert conditions['right']['value_type'] == 'string' + + def test_has_filter_boolean_value(self): + """Test: g.V().has('active', True)""" + traversal = GremlinTraversal(steps=[ + VertexStep(), + FilterStep(filter_type='has', property_name='active', value=GremlinValue(value=True, value_type='boolean')) + ]) + + query = translate_gremlin_to_cypher(traversal) + + conditions = query.where_clause.conditions + assert conditions['right']['value'] is True + assert conditions['right']['value_type'] == 'boolean' + + def test_multiple_has_filters(self): + """Test: g.V().has('age', 30).has('name', 'John') -> Multiple conditions with AND""" + traversal = GremlinTraversal(steps=[ + VertexStep(), + FilterStep(filter_type='has', property_name='age', value=GremlinValue(value=30, value_type='number')), + FilterStep(filter_type='has', property_name='name', value=GremlinValue(value='John', value_type='string')) + ]) + + query = translate_gremlin_to_cypher(traversal) + + # Verify WHERE clause combines with AND + conditions = query.where_clause.conditions + assert conditions['type'] == 'logical' + assert conditions['operator'] == 'AND' + assert len(conditions['operands']) == 2 + + # Check both conditions present + operands = conditions['operands'] + assert operands[0]['left']['property'] == 'age' + assert operands[0]['right']['value'] == 30 + assert operands[1]['left']['property'] == 'name' + assert operands[1]['right']['value'] == 'John' + + +class TestTraversalTranslations: + """Test traversal step translations.""" + + def test_simple_out_traversal(self): + """Test: g.V().hasLabel('User').out('OWNS') -> MATCH (v0:User)-[:OWNS]->(v1) RETURN v1""" + traversal = GremlinTraversal(steps=[ + VertexStep(), + FilterStep(filter_type='hasLabel', value=GremlinValue(value='User', value_type='string')), + TraversalStep(direction='out', traversal_type='vertex', edge_label='OWNS') + ]) + + query = translate_gremlin_to_cypher(traversal) + + # Verify path structure + path = query.match_clause.paths[0] + assert len(path.nodes) == 2 + assert len(path.relationships) == 1 + + # Verify source node + assert path.nodes[0].variable.name == 'v0' + assert path.nodes[0].labels[0].name == 'User' + + # Verify relationship + rel = path.relationships[0] + assert rel.relationship_type.name == 'OWNS' + assert rel.directed is True + assert rel.direction == 'out' + + # Verify target node + assert path.nodes[1].variable.name == 'v1' + + # Verify return is target node + assert query.return_clause.items[0].name == 'v1' + + def test_in_traversal(self): + """Test: g.V().hasLabel('Item').in('OWNS') -> incoming relationship""" + traversal = GremlinTraversal(steps=[ + VertexStep(), + FilterStep(filter_type='hasLabel', value=GremlinValue(value='Item', value_type='string')), + TraversalStep(direction='in', traversal_type='vertex', edge_label='OWNS') + ]) + + query = translate_gremlin_to_cypher(traversal) + + rel = query.match_clause.paths[0].relationships[0] + assert rel.direction == 'in' + assert rel.directed is True + + def test_both_traversal(self): + """Test: g.V().both('KNOWS') -> undirected relationship""" + traversal = GremlinTraversal(steps=[ + VertexStep(), + TraversalStep(direction='both', traversal_type='vertex', edge_label='KNOWS') + ]) + + query = translate_gremlin_to_cypher(traversal) + + rel = query.match_clause.paths[0].relationships[0] + assert rel.direction == 'both' + assert rel.directed is False + + def test_traversal_without_edge_label(self): + """Test: g.V().out() -> relationship without type""" + traversal = GremlinTraversal(steps=[ + VertexStep(), + TraversalStep(direction='out', traversal_type='vertex', edge_label=None) + ]) + + query = translate_gremlin_to_cypher(traversal) + + rel = query.match_clause.paths[0].relationships[0] + assert rel.relationship_type is None + + def test_chained_traversals(self): + """Test: g.V().out('OWNS').out('HAS_PART') -> (v0)-[:OWNS]->(v1)-[:HAS_PART]->(v2)""" + traversal = GremlinTraversal(steps=[ + VertexStep(), + TraversalStep(direction='out', traversal_type='vertex', edge_label='OWNS'), + TraversalStep(direction='out', traversal_type='vertex', edge_label='HAS_PART') + ]) + + query = translate_gremlin_to_cypher(traversal) + + path = query.match_clause.paths[0] + assert len(path.nodes) == 3 + assert len(path.relationships) == 2 + + # Verify chain + assert path.nodes[0].variable.name == 'v0' + assert path.relationships[0].relationship_type.name == 'OWNS' + assert path.nodes[1].variable.name == 'v1' + assert path.relationships[1].relationship_type.name == 'HAS_PART' + assert path.nodes[2].variable.name == 'v2' + + # Returns final node + assert query.return_clause.items[0].name == 'v2' + + +class TestProjectionTranslations: + """Test projection step translations.""" + + def test_values_single_property(self): + """Test: g.V().hasLabel('User').values('name') -> RETURN v.name""" + traversal = GremlinTraversal(steps=[ + VertexStep(), + FilterStep(filter_type='hasLabel', value=GremlinValue(value='User', value_type='string')), + ProjectionStep(projection_type='values', property_names=['name']) + ]) + + query = translate_gremlin_to_cypher(traversal) + + # Verify return is property access + assert len(query.return_clause.items) == 1 + item = query.return_clause.items[0] + assert isinstance(item, Property) + assert item.variable.name == 'v0' + assert item.property_name.name == 'name' + + def test_values_multiple_properties(self): + """Test: g.V().values('name', 'age') -> RETURN v.name, v.age""" + traversal = GremlinTraversal(steps=[ + VertexStep(), + ProjectionStep(projection_type='values', property_names=['name', 'age']) + ]) + + query = translate_gremlin_to_cypher(traversal) + + # Verify multiple return items + assert len(query.return_clause.items) == 2 + + item1 = query.return_clause.items[0] + assert isinstance(item1, Property) + assert item1.property_name.name == 'name' + + item2 = query.return_clause.items[1] + assert isinstance(item2, Property) + assert item2.property_name.name == 'age' + + +class TestModifiers: + """Test limit and order modifiers.""" + + def test_limit(self): + """Test: g.V().limit(10) -> RETURN v LIMIT 10""" + traversal = GremlinTraversal(steps=[ + VertexStep(), + LimitStep(count=10) + ]) + + query = translate_gremlin_to_cypher(traversal) + + assert query.return_clause.limit == 10 + + def test_order_ascending(self): + """Test: g.V().order().by('name') -> ORDER BY v.name ASC""" + traversal = GremlinTraversal(steps=[ + VertexStep(), + OrderStep(order_by='name', order='asc') + ]) + + query = translate_gremlin_to_cypher(traversal) + + assert query.return_clause.order_by is not None + assert len(query.return_clause.order_by) == 1 + order_spec = query.return_clause.order_by[0] + assert order_spec['expression']['property'] == 'name' + assert order_spec['direction'] == 'ASC' + + def test_order_descending(self): + """Test: g.V().order().by('age', desc) -> ORDER BY v.age DESC""" + traversal = GremlinTraversal(steps=[ + VertexStep(), + OrderStep(order_by='age', order='desc') + ]) + + query = translate_gremlin_to_cypher(traversal) + + order_spec = query.return_clause.order_by[0] + assert order_spec['direction'] == 'DESC' + + def test_combined_modifiers(self): + """Test: g.V().order().by('name').limit(5) -> ORDER BY ... LIMIT ...""" + traversal = GremlinTraversal(steps=[ + VertexStep(), + OrderStep(order_by='name', order='asc'), + LimitStep(count=5) + ]) + + query = translate_gremlin_to_cypher(traversal) + + assert query.return_clause.order_by is not None + assert query.return_clause.limit == 5 + + +class TestComplexQueries: + """Test complex combined patterns.""" + + def test_traversal_with_filters(self): + """Test: g.V().hasLabel('User').has('age', 30).out('OWNS').values('name')""" + traversal = GremlinTraversal(steps=[ + VertexStep(), + FilterStep(filter_type='hasLabel', value=GremlinValue(value='User', value_type='string')), + FilterStep(filter_type='has', property_name='age', value=GremlinValue(value=30, value_type='number')), + TraversalStep(direction='out', traversal_type='vertex', edge_label='OWNS'), + ProjectionStep(projection_type='values', property_names=['name']) + ]) + + query = translate_gremlin_to_cypher(traversal) + + # Verify MATCH pattern + path = query.match_clause.paths[0] + assert len(path.nodes) == 2 + assert path.nodes[0].labels[0].name == 'User' + assert path.relationships[0].relationship_type.name == 'OWNS' + + # Verify WHERE clause + assert query.where_clause is not None + conditions = query.where_clause.conditions + assert conditions['left']['variable'] == 'v0' + assert conditions['left']['property'] == 'age' + assert conditions['right']['value'] == 30 + + # Verify RETURN projection + item = query.return_clause.items[0] + assert isinstance(item, Property) + assert item.variable.name == 'v1' # Target node + assert item.property_name.name == 'name' + + def test_full_featured_query(self): + """Test query with all features: filters, traversal, projection, order, limit""" + traversal = GremlinTraversal(steps=[ + VertexStep(), + FilterStep(filter_type='hasLabel', value=GremlinValue(value='User', value_type='string')), + FilterStep(filter_type='has', property_name='active', value=GremlinValue(value=True, value_type='boolean')), + TraversalStep(direction='out', traversal_type='vertex', edge_label='OWNS'), + ProjectionStep(projection_type='values', property_names=['name']), + OrderStep(order_by='name', order='asc'), + LimitStep(count=10) + ]) + + query = translate_gremlin_to_cypher(traversal) + + # All components should be present + assert query.match_clause is not None + assert query.where_clause is not None + assert query.return_clause is not None + assert query.return_clause.order_by is not None + assert query.return_clause.limit == 10 + + +class TestErrorHandling: + """Test error conditions and validation.""" + + def test_empty_traversal(self): + """Test that empty traversal raises error""" + traversal = GremlinTraversal(steps=[]) + + with pytest.raises(TranslationError, match="Empty traversal"): + translate_gremlin_to_cypher(traversal) + + def test_edge_starting_point_unsupported(self): + """Test that E() starting point raises error""" + traversal = GremlinTraversal(steps=[ + EdgeStep() + ]) + + with pytest.raises(UnsupportedPatternError, match="Edge starting point E\\(\\)"): + translate_gremlin_to_cypher(traversal) + + def test_invalid_starting_step(self): + """Test that non-V/E starting step raises error""" + traversal = GremlinTraversal(steps=[ + FilterStep(filter_type='hasLabel', value=GremlinValue(value='User', value_type='string')) + ]) + + with pytest.raises(TranslationError, match="must start with V\\(\\) or E\\(\\)"): + translate_gremlin_to_cypher(traversal) + + def test_unsupported_filter_predicate(self): + """Test that unsupported filter predicate raises error""" + traversal = GremlinTraversal(steps=[ + VertexStep(), + FilterStep(filter_type='where', value=None) + ]) + + with pytest.raises(UnsupportedPatternError, match="Filter predicate 'where'"): + translate_gremlin_to_cypher(traversal) + + def test_unsupported_traversal_direction(self): + """Test that unsupported traversal direction raises error""" + traversal = GremlinTraversal(steps=[ + VertexStep(), + TraversalStep(direction='outE', traversal_type='edge', edge_label='KNOWS') + ]) + + with pytest.raises(UnsupportedPatternError, match="Traversal direction 'outE'"): + translate_gremlin_to_cypher(traversal) + + def test_unsupported_projection_type(self): + """Test that unsupported projection type raises error""" + traversal = GremlinTraversal(steps=[ + VertexStep(), + ProjectionStep(projection_type='project', property_names=['name']) + ]) + + with pytest.raises(UnsupportedPatternError, match="Projection type 'project'"): + translate_gremlin_to_cypher(traversal) + + def test_multiple_projections_unsupported(self): + """Test that multiple projection steps raise error""" + traversal = GremlinTraversal(steps=[ + VertexStep(), + ProjectionStep(projection_type='values', property_names=['name']), + ProjectionStep(projection_type='values', property_names=['age']) + ]) + + with pytest.raises(UnsupportedPatternError, match="Multiple projection steps"): + translate_gremlin_to_cypher(traversal) + + def test_multiple_limits_unsupported(self): + """Test that multiple limit steps raise error""" + traversal = GremlinTraversal(steps=[ + VertexStep(), + LimitStep(count=10), + LimitStep(count=5) + ]) + + with pytest.raises(UnsupportedPatternError, match="Multiple limit steps"): + translate_gremlin_to_cypher(traversal) + + def test_hasLabel_wrong_arg_count(self): + """Test that hasLabel without value raises error""" + traversal = GremlinTraversal(steps=[ + VertexStep(), + FilterStep(filter_type='hasLabel', value=None) + ]) + + with pytest.raises(TranslationError, match="hasLabel requires a label argument"): + translate_gremlin_to_cypher(traversal) + + def test_has_wrong_arg_count(self): + """Test that has without property name raises error""" + traversal = GremlinTraversal(steps=[ + VertexStep(), + FilterStep(filter_type='has', property_name=None, value=GremlinValue(value='test', value_type='string')) + ]) + + with pytest.raises(TranslationError, match="has requires a property name"): + translate_gremlin_to_cypher(traversal) + + +class TestEdgeCases: + """Test edge cases and boundary conditions.""" + + def test_vertex_with_ids_raises_warning(self): + """Test V(id) - IDs are preserved but may not affect query""" + # For now, IDs are ignored in translation (MVP) + traversal = GremlinTraversal(steps=[ + VertexStep(ids=[1, 2, 3]) + ]) + + # Should succeed - IDs are just ignored for now + query = translate_gremlin_to_cypher(traversal) + assert query is not None + + def test_null_value_in_has(self): + """Test has() with null value""" + traversal = GremlinTraversal(steps=[ + VertexStep(), + FilterStep(filter_type='has', property_name='name', value=GremlinValue(value=None, value_type='null')) + ]) + + query = translate_gremlin_to_cypher(traversal) + + conditions = query.where_clause.conditions + assert conditions['right']['value'] is None + assert conditions['right']['value_type'] == 'null' + + def test_float_value_in_has(self): + """Test has() with float value""" + traversal = GremlinTraversal(steps=[ + VertexStep(), + FilterStep(filter_type='has', property_name='rating', value=GremlinValue(value=4.5, value_type='number')) + ]) + + query = translate_gremlin_to_cypher(traversal) + + conditions = query.where_clause.conditions + assert conditions['right']['value'] == 4.5 + assert conditions['right']['value_type'] == 'number' + + def test_zero_limit(self): + """Test limit(0) - edge case""" + traversal = GremlinTraversal(steps=[ + VertexStep(), + LimitStep(count=0) + ]) + + query = translate_gremlin_to_cypher(traversal) + assert query.return_clause.limit == 0 + + +if __name__ == '__main__': + pytest.main([__file__, '-v']) diff --git a/src/yellowstone/gremlin/tests/test_parser.py b/src/yellowstone/gremlin/tests/test_parser.py new file mode 100644 index 0000000..9a6e053 --- /dev/null +++ b/src/yellowstone/gremlin/tests/test_parser.py @@ -0,0 +1,413 @@ +""" +Tests for Gremlin parser. + +These tests verify that the parser correctly handles all supported +Gremlin query patterns and converts them into the appropriate AST. +""" + +import pytest +from yellowstone.gremlin import ( + parse_gremlin, + GremlinParser, + GremlinParseError, + GremlinTraversal, + VertexStep, + EdgeStep, + FilterStep, + TraversalStep, + ProjectionStep, + LimitStep, + CountStep, + DedupStep, + GremlinValue, + Predicate, +) + + +class TestBasicParsing: + """Test basic parsing functionality.""" + + def test_parse_simple_vertex_step(self): + """Test parsing g.V()""" + traversal = parse_gremlin("g.V()") + assert len(traversal.steps) == 1 + assert isinstance(traversal.steps[0], VertexStep) + assert traversal.steps[0].vertex_id is None + + def test_parse_vertex_step_with_id(self): + """Test parsing g.V('123')""" + traversal = parse_gremlin("g.V('123')") + assert len(traversal.steps) == 1 + assert isinstance(traversal.steps[0], VertexStep) + assert traversal.steps[0].vertex_id == "123" + + def test_parse_vertex_step_with_numeric_id(self): + """Test parsing g.V(123)""" + traversal = parse_gremlin("g.V(123)") + assert len(traversal.steps) == 1 + assert isinstance(traversal.steps[0], VertexStep) + assert traversal.steps[0].vertex_id == "123" + + def test_parse_edge_step(self): + """Test parsing g.E()""" + traversal = parse_gremlin("g.E()") + assert len(traversal.steps) == 1 + assert isinstance(traversal.steps[0], EdgeStep) + assert traversal.steps[0].edge_id is None + + def test_empty_query_raises_error(self): + """Test that empty query raises error""" + with pytest.raises(GremlinParseError): + parse_gremlin("") + + def test_query_without_g_raises_error(self): + """Test that query not starting with 'g' raises error""" + with pytest.raises(GremlinParseError, match="must start with 'g'"): + parse_gremlin("V().hasLabel('Person')") + + +class TestFilterSteps: + """Test filter step parsing.""" + + def test_parse_has_label(self): + """Test parsing g.V().hasLabel('Person')""" + traversal = parse_gremlin("g.V().hasLabel('Person')") + assert len(traversal.steps) == 2 + assert isinstance(traversal.steps[1], FilterStep) + assert traversal.steps[1].filter_type == "hasLabel" + assert traversal.steps[1].value.value == "Person" + assert traversal.steps[1].value.value_type == "string" + + def test_parse_has_with_property_and_value(self): + """Test parsing g.V().has('name','marko')""" + traversal = parse_gremlin("g.V().has('name','marko')") + assert len(traversal.steps) == 2 + filter_step = traversal.steps[1] + assert isinstance(filter_step, FilterStep) + assert filter_step.filter_type == "has" + assert filter_step.property_name == "name" + assert filter_step.value.value == "marko" + + def test_parse_has_with_numeric_value(self): + """Test parsing g.V().has('age', 30)""" + traversal = parse_gremlin("g.V().has('age', 30)") + filter_step = traversal.steps[1] + assert filter_step.property_name == "age" + assert filter_step.value.value == 30 + assert filter_step.value.value_type == "number" + + def test_parse_has_with_predicate_gt(self): + """Test parsing g.V().has('age', gt(30))""" + traversal = parse_gremlin("g.V().has('age', gt(30))") + filter_step = traversal.steps[1] + assert isinstance(filter_step, FilterStep) + assert filter_step.property_name == "age" + assert filter_step.predicate is not None + assert filter_step.predicate.operator == "gt" + assert filter_step.predicate.value.value == 30 + + def test_parse_has_with_predicate_lt(self): + """Test parsing g.V().has('age', lt(50))""" + traversal = parse_gremlin("g.V().has('age', lt(50))") + filter_step = traversal.steps[1] + assert filter_step.predicate.operator == "lt" + assert filter_step.predicate.value.value == 50 + + def test_parse_has_id(self): + """Test parsing g.V().hasId('123')""" + traversal = parse_gremlin("g.V().hasId('123')") + filter_step = traversal.steps[1] + assert filter_step.filter_type == "hasId" + assert filter_step.value.value == "123" + + def test_parse_has_key(self): + """Test parsing g.V().hasKey('name')""" + traversal = parse_gremlin("g.V().hasKey('name')") + filter_step = traversal.steps[1] + assert filter_step.filter_type == "hasKey" + assert filter_step.value.value == "name" + + def test_parse_has_value(self): + """Test parsing g.V().hasValue('John')""" + traversal = parse_gremlin("g.V().hasValue('John')") + filter_step = traversal.steps[1] + assert filter_step.filter_type == "hasValue" + assert filter_step.value.value == "John" + + +class TestTraversalSteps: + """Test traversal step parsing.""" + + def test_parse_out(self): + """Test parsing g.V().out()""" + traversal = parse_gremlin("g.V().out()") + traversal_step = traversal.steps[1] + assert isinstance(traversal_step, TraversalStep) + assert traversal_step.direction == "out" + assert traversal_step.traversal_type == "vertex" + assert traversal_step.edge_label is None + + def test_parse_out_with_label(self): + """Test parsing g.V().out('KNOWS')""" + traversal = parse_gremlin("g.V().out('KNOWS')") + traversal_step = traversal.steps[1] + assert traversal_step.direction == "out" + assert traversal_step.edge_label == "KNOWS" + + def test_parse_in(self): + """Test parsing g.V().in('CREATED')""" + traversal = parse_gremlin("g.V().in('CREATED')") + traversal_step = traversal.steps[1] + assert traversal_step.direction == "in" + assert traversal_step.edge_label == "CREATED" + + def test_parse_both(self): + """Test parsing g.V().both('KNOWS')""" + traversal = parse_gremlin("g.V().both('KNOWS')") + traversal_step = traversal.steps[1] + assert traversal_step.direction == "both" + assert traversal_step.edge_label == "KNOWS" + + def test_parse_out_e(self): + """Test parsing g.V().outE('KNOWS')""" + traversal = parse_gremlin("g.V().outE('KNOWS')") + traversal_step = traversal.steps[1] + assert traversal_step.direction == "out" + assert traversal_step.traversal_type == "edge" + assert traversal_step.edge_label == "KNOWS" + + def test_parse_in_e(self): + """Test parsing g.V().inE()""" + traversal = parse_gremlin("g.V().inE()") + traversal_step = traversal.steps[1] + assert traversal_step.direction == "in" + assert traversal_step.traversal_type == "edge" + + def test_parse_both_e(self): + """Test parsing g.V().bothE()""" + traversal = parse_gremlin("g.V().bothE()") + traversal_step = traversal.steps[1] + assert traversal_step.direction == "both" + assert traversal_step.traversal_type == "edge" + + +class TestProjectionSteps: + """Test projection step parsing.""" + + def test_parse_values_with_property(self): + """Test parsing g.V().values('name')""" + traversal = parse_gremlin("g.V().values('name')") + projection_step = traversal.steps[1] + assert isinstance(projection_step, ProjectionStep) + assert projection_step.projection_type == "values" + assert projection_step.property_names == ["name"] + + def test_parse_values_with_multiple_properties(self): + """Test parsing g.V().values('name', 'age')""" + traversal = parse_gremlin("g.V().values('name', 'age')") + projection_step = traversal.steps[1] + assert projection_step.property_names == ["name", "age"] + + def test_parse_values_without_properties(self): + """Test parsing g.V().values()""" + traversal = parse_gremlin("g.V().values()") + projection_step = traversal.steps[1] + assert projection_step.projection_type == "values" + assert projection_step.property_names == [] + + def test_parse_value_map(self): + """Test parsing g.V().valueMap()""" + traversal = parse_gremlin("g.V().valueMap()") + projection_step = traversal.steps[1] + assert projection_step.projection_type == "valueMap" + + def test_parse_properties(self): + """Test parsing g.V().properties('name')""" + traversal = parse_gremlin("g.V().properties('name')") + projection_step = traversal.steps[1] + assert projection_step.projection_type == "properties" + assert projection_step.property_names == ["name"] + + def test_parse_element_map(self): + """Test parsing g.V().elementMap()""" + traversal = parse_gremlin("g.V().elementMap()") + projection_step = traversal.steps[1] + assert projection_step.projection_type == "elementMap" + + +class TestModifierSteps: + """Test modifier steps (limit, order, count, dedup).""" + + def test_parse_limit(self): + """Test parsing g.V().limit(10)""" + traversal = parse_gremlin("g.V().limit(10)") + limit_step = traversal.steps[1] + assert isinstance(limit_step, LimitStep) + assert limit_step.count == 10 + + def test_parse_count(self): + """Test parsing g.V().count()""" + traversal = parse_gremlin("g.V().count()") + count_step = traversal.steps[1] + assert isinstance(count_step, CountStep) + + def test_parse_dedup(self): + """Test parsing g.V().dedup()""" + traversal = parse_gremlin("g.V().dedup()") + dedup_step = traversal.steps[1] + assert isinstance(dedup_step, DedupStep) + + def test_parse_order(self): + """Test parsing g.V().order()""" + traversal = parse_gremlin("g.V().order()") + order_step = traversal.steps[1] + assert order_step.step_type == "order" + + +class TestComplexQueries: + """Test complex multi-step queries.""" + + def test_parse_person_query(self): + """Test parsing g.V().hasLabel('Person').out('KNOWS').values('name')""" + query = "g.V().hasLabel('Person').out('KNOWS').values('name')" + traversal = parse_gremlin(query) + + assert len(traversal.steps) == 4 + assert isinstance(traversal.steps[0], VertexStep) + assert isinstance(traversal.steps[1], FilterStep) + assert isinstance(traversal.steps[2], TraversalStep) + assert isinstance(traversal.steps[3], ProjectionStep) + + assert traversal.steps[1].value.value == "Person" + assert traversal.steps[2].edge_label == "KNOWS" + assert traversal.steps[3].property_names == ["name"] + + def test_parse_query_with_predicates(self): + """Test parsing g.V().hasLabel('Person').has('age',gt(30)).out('created').values('name')""" + query = "g.V().hasLabel('Person').has('age',gt(30)).out('created').values('name')" + traversal = parse_gremlin(query) + + assert len(traversal.steps) == 5 + assert isinstance(traversal.steps[2], FilterStep) + assert traversal.steps[2].predicate.operator == "gt" + assert traversal.steps[2].predicate.value.value == 30 + + def test_parse_query_with_limit(self): + """Test parsing g.V().hasLabel('Person').limit(5).values('name')""" + query = "g.V().hasLabel('Person').limit(5).values('name')" + traversal = parse_gremlin(query) + + assert len(traversal.steps) == 4 + assert isinstance(traversal.steps[2], LimitStep) + assert traversal.steps[2].count == 5 + + def test_parse_edge_traversal_query(self): + """Test parsing g.V().outE('KNOWS').inV().values('name')""" + query = "g.V().outE('KNOWS').inV().values('name')" + traversal = parse_gremlin(query) + + assert len(traversal.steps) == 4 + assert traversal.steps[1].traversal_type == "edge" + assert traversal.steps[2].direction == "in" + assert traversal.steps[2].traversal_type == "vertex" + + def test_parse_query_with_count(self): + """Test parsing g.V().hasLabel('Person').out('KNOWS').count()""" + query = "g.V().hasLabel('Person').out('KNOWS').count()" + traversal = parse_gremlin(query) + + assert len(traversal.steps) == 4 + assert isinstance(traversal.steps[3], CountStep) + + def test_parse_deduped_query(self): + """Test parsing g.V().out('knows').dedup().values('name')""" + query = "g.V().out('knows').dedup().values('name')" + traversal = parse_gremlin(query) + + assert len(traversal.steps) == 4 + assert isinstance(traversal.steps[2], DedupStep) + + +class TestQuoteHandling: + """Test proper handling of different quote types.""" + + def test_parse_double_quotes(self): + """Test parsing with double quotes""" + traversal = parse_gremlin('g.V().hasLabel("Person")') + assert traversal.steps[1].value.value == "Person" + + def test_parse_mixed_quotes(self): + """Test parsing with mixed quote types""" + traversal = parse_gremlin('g.V().hasLabel("Person").has(\'name\',"John")') + assert traversal.steps[1].value.value == "Person" + assert traversal.steps[2].property_name == "name" + assert traversal.steps[2].value.value == "John" + + +class TestWhitespace: + """Test handling of whitespace in queries.""" + + def test_parse_with_extra_whitespace(self): + """Test parsing with extra whitespace""" + query = "g.V() . hasLabel( 'Person' ) . values( 'name' )" + traversal = parse_gremlin(query) + + assert len(traversal.steps) == 3 + assert traversal.steps[1].value.value == "Person" + assert traversal.steps[2].property_names == ["name"] + + def test_parse_with_newlines(self): + """Test parsing with newlines""" + query = """g.V() + .hasLabel('Person') + .values('name')""" + traversal = parse_gremlin(query) + + assert len(traversal.steps) == 3 + + +class TestErrorHandling: + """Test error handling for invalid queries.""" + + def test_invalid_method_raises_error(self): + """Test that invalid method name raises error""" + with pytest.raises(GremlinParseError, match="Unknown Gremlin step"): + parse_gremlin("g.V().invalidMethod()") + + def test_has_label_without_args_raises_error(self): + """Test that hasLabel without args raises error""" + with pytest.raises(GremlinParseError, match="requires a label argument"): + parse_gremlin("g.V().hasLabel()") + + def test_limit_without_args_raises_error(self): + """Test that limit without args raises error""" + with pytest.raises(GremlinParseError, match="requires a count argument"): + parse_gremlin("g.V().limit()") + + def test_unclosed_string_raises_error(self): + """Test that unclosed string raises error""" + with pytest.raises(GremlinParseError, match="Unterminated string"): + parse_gremlin("g.V().hasLabel('Person)") + + +class TestStringRepresentation: + """Test string representation of AST nodes.""" + + def test_traversal_str(self): + """Test string representation of complete traversal""" + traversal = parse_gremlin("g.V().hasLabel('Person').values('name')") + result = str(traversal) + assert "g." in result + assert "V()" in result + assert "hasLabel" in result + assert "values" in result + + def test_vertex_step_str(self): + """Test string representation of VertexStep""" + traversal = parse_gremlin("g.V()") + assert str(traversal.steps[0]) == "V()" + + def test_vertex_step_with_id_str(self): + """Test string representation of VertexStep with ID""" + traversal = parse_gremlin("g.V('123')") + assert str(traversal.steps[0]) == "V('123')" diff --git a/src/yellowstone/main_translator.py b/src/yellowstone/main_translator.py index 896ed69..85668fc 100644 --- a/src/yellowstone/main_translator.py +++ b/src/yellowstone/main_translator.py @@ -1,5 +1,5 @@ """ -Main Cypher-to-KQL translation engine. +Main translator for Cypher and Gremlin to KQL. """ from typing import Optional @@ -12,6 +12,14 @@ from .translator.where_clause import WhereClauseTranslator from .translator.return_clause import ReturnClauseTranslator +# Gremlin support (optional) +try: + from .gremlin.parser import parse_gremlin + from .gremlin.cypher_bridge import translate_gremlin_to_cypher + GREMLIN_AVAILABLE = True +except ImportError: + GREMLIN_AVAILABLE = False + class TranslationError(Exception): """Raised when translation fails.""" @@ -68,8 +76,19 @@ def translate( TranslationError: If translation fails """ try: - # Step 1: Parse Cypher query into AST - ast = parse_query(cypher.query) + # Step 1: Detect language and parse to AST + query_str = cypher.query.strip() + + if query_str.startswith("g.") or query_str.startswith("g "): + # Gremlin query - parse and bridge to Cypher AST + if not GREMLIN_AVAILABLE: + raise TranslationError("Gremlin support not available") + + gremlin_ast = parse_gremlin(query_str) + ast = translate_gremlin_to_cypher(gremlin_ast) + else: + # Cypher query - parse directly + ast = parse_query(query_str) # Step 2: Determine translation strategy strategy = self._classify_query_complexity(ast) diff --git a/tests/integration/test_gremlin_integration.py b/tests/integration/test_gremlin_integration.py new file mode 100644 index 0000000..5a516dc --- /dev/null +++ b/tests/integration/test_gremlin_integration.py @@ -0,0 +1,75 @@ +"""Integration tests for Gremlin-to-KQL translation.""" +import pytest +from yellowstone.models import CypherQuery, TranslationContext +from yellowstone.main_translator import CypherTranslator + + +@pytest.fixture +def translator(): + return CypherTranslator(enable_ai=False) + + +@pytest.fixture +def context(): + return TranslationContext(user_id="test", tenant_id="test", permissions=[]) + + +class TestGremlinBasic: + """Test basic Gremlin queries.""" + + def test_simple_vertex_query(self, translator, context): + """Test g.V().hasLabel('User')""" + gremlin = CypherQuery(query="g.V().hasLabel('User')") + result = translator.translate(gremlin, context) + + assert "make-graph" in result.query.lower() + assert "graph-match" in result.query + assert "(v0:User)" in result.query + + def test_vertex_with_property_filter(self, translator, context): + """Test g.V().hasLabel('User').has('age', 30)""" + gremlin = CypherQuery(query="g.V().hasLabel('User').has('age',30)") + result = translator.translate(gremlin, context) + + assert "graph-match" in result.query + assert "where v0.age == 30" in result.query + + def test_vertex_with_traversal(self, translator, context): + """Test g.V().hasLabel('User').out('OWNS')""" + gremlin = CypherQuery(query="g.V().hasLabel('User').out('OWNS')") + result = translator.translate(gremlin, context) + + assert "graph-match" in result.query + assert "-[OWNS]->" in result.query + + def test_full_query_with_projection(self, translator, context): + """Test complete Gremlin query with projection""" + gremlin = CypherQuery(query="g.V().hasLabel('User').has('age',30).out('OWNS').values('name')") + result = translator.translate(gremlin, context) + + assert "make-graph" in result.query.lower() + assert "graph-match" in result.query + assert "where v0.age == 30" in result.query + assert "project v1.name" in result.query + + +class TestGremlinTraversals: + """Test Gremlin traversal patterns.""" + + def test_out_traversal(self, translator, context): + """Test out() edge traversal""" + gremlin = CypherQuery(query="g.V().out('CREATED')") + result = translator.translate(gremlin, context) + assert "-[CREATED]->" in result.query + + def test_in_traversal(self, translator, context): + """Test in() edge traversal""" + gremlin = CypherQuery(query="g.V().in('CREATED')") + result = translator.translate(gremlin, context) + assert "<-[CREATED]-" in result.query + + def test_both_traversal(self, translator, context): + """Test both() undirected edge""" + gremlin = CypherQuery(query="g.V().both('KNOWS')") + result = translator.translate(gremlin, context) + assert "-[KNOWS]-" in result.query