From c4cbbe13f989de901e1b769fd30b205834a889f8 Mon Sep 17 00:00:00 2001 From: Ernan Anguiano Date: Thu, 8 Jan 2026 16:36:49 -0800 Subject: [PATCH 1/7] feat: integrate AIC semantic graph indexing into conductor workflow - Bootstrapped AIC core (Skeletonizer, DB Layer, CLI) - Updated '/conductor:setup' to build a semantic dependency graph at project startup - Enhanced '/conductor:newTrack' and '/conductor:implement' to use token-efficient Rich Skeletons - Updated documentation with patent references and packaging configuration --- .aic/graph.db | Bin 0 -> 32768 bytes README.md | 2 + aic/cli.py | 110 ++++++++++++++++++++++++++++++ aic/db.py | 65 ++++++++++++++++++ aic/skeleton.py | 108 +++++++++++++++++++++++++++++ aic/utils.py | 6 ++ commands/conductor/implement.toml | 3 + commands/conductor/newTrack.toml | 3 + commands/conductor/setup.toml | 5 +- pyproject.toml | 21 ++++++ 10 files changed, 322 insertions(+), 1 deletion(-) create mode 100644 .aic/graph.db create mode 100644 aic/cli.py create mode 100644 aic/db.py create mode 100644 aic/skeleton.py create mode 100644 aic/utils.py create mode 100644 pyproject.toml diff --git a/.aic/graph.db b/.aic/graph.db new file mode 100644 index 0000000000000000000000000000000000000000..03e0a24b0d50f0ef69a5583ae3ae154928c969ae GIT binary patch literal 32768 zcmeHQ&5zs071x({W49XyDB4Y%rk=GSNU8-!qC|ZxYP6fRlp0-c679N;VF$#B9In`< zNR^~)Z)2m__R<0cf}YxIdk@+> z&&YRwvnKsJQ&{^fBNe2-t$p_5KTgVw@rVE-fCwN0hyWsh2p|H803v`0AOc?o0>^jq ztLy9Q*ZHgO#h%Y2b>mCR(M{#n-vxepVpy%R_;L5Gg)#b4ZA& zM*mfrDQ)lcwsuPkZO%Qe z&7%HVU3}$1ql9gL4o-5?r(D5akp2^%@Bki&z;l4W?|zY6y>h(vrr+p`o|i&03c>+KOtnJGD;T zU?zbfOw(P*VR~S>PPwuvdfR0fe)ew;cK0?3Pb3e>_SVi0gj`=9(?e!ho)3`_;sb*W zL5GeR#Ds%TLyklWVX`b=xWLP@94pX_;jEBID9{?gyfUgVmdJ>jO1eAj-Ce^mmWVsM8N+qn#pf^0%0O&?v}^* zqg>kS?ewa@^~E76@hL zk-RtaZ}Gvej=LSm6at_lifI!JD+a3yC7F|%s9Cvm$0^A_h1rU@PL2gtKmt@mPTxST zO=YumnY@1UjZM<0mdy+jxFFs08F`QQj)$A%JwNcutnv^P5~^ITq;xPT3j{$L{|r2F z4kg{Se2Yg>KY#%ZB%a(_kyX1sp158RHJA6KxfVt}UNpiIIenQ<#9+Y-Uuc?44dZO( z^*wj|zbkW_Yt1!go~6VC<}lCFwHQdz$oPKF+RpJ6UDMS8eaksGt=;{pBc4lVtcDgB z3A(76aP4XGxBMeV|MCi1s!s~bDzSWx^760tsqLSvL2;WN17oDRQzz^W5BNio1~dKE zs6*`|z=WA5Jm~$CVOh+Xs_>kOC!M$iWk;RZV}7c`*_I!7se9Bu6kUx+cr2KDPO3xb zFh<6~<%$Ut;Md>Fu3kJ|d;hkW0U1M>`!wpqF739M-ZGU|quo`NS{wdVy~f%~w{3Rx zcCFRzm}ZTs?UvGld`#^&X-n;No6KzTkqhTO(ReOqXE^tXr@KoJew_P6zKd`U_0i>v z(x0XGB@Y64AOeU0B7g`W0*C-2fCwN0hyWsh2p|H8z?l$uc_m-Gn6}Ili~E&x`C@S~ zCAQzcQ^*&uEGDLm_iyC$8$xVo-zP%9naghop|ki{MJ$EG_^P1)*&mkS%A0sEG;FI zzyHtWKFoaOuke8fBJg}6fb0LoqGPd04cGtkMT>m7Gp_%iW3CV@M-!$AT>tMnGemIx z|0cop|7kjfvv3aJ`hQMy!H*OV25>hlE$Uxhw9YT$3(S|Yrk2y=`u~U!1K0n@>&<~3 zVV;s;HiUC+!nqI5eK@t!DQbdqpVZt((1&pQKU?^52LA9s1P}p4;0r+D_oe*m#_`$* zuL)A3$nmJqI*>2wtWAM$sM*jNrM1RTr9<6R>&&D;(O7SGP1RKD!;WewW_?)i40Tlp zR>p>*A}dj9>_%3igk|fCpiD$YM!`*qjEvlS_-HaWWMmXNceNy!B0WePW-j=6z+43v zk3O|Ir?DW22NKj24{gHx3Z$Ko*OWa*4LAXEX-*tD?*jpjN!Py1IJvJDxZ*`fnV)m| z=_=8up2wU(Gl3qq3RJ#f#iB|9Bl8}xvD%D`*wh1dN=xSqvXMkGc8w{d2_9>R<{|`l zZHmb`c^vPCR(MQ*?J=8=sb!ryMygn7L(~=G8p^_9Nf6qPlPO5`;P*!`f)D=u(J?nP4V#0Oq>r@>?zaJEj)soV2B*8}$4 zsWjV0W2g^liw#@#j^0qvm_}ndv2+%8w1{81JnjAiXiTFqjoTZTGH!3+%x1v2y#e3% zA@m5~&l}G4^9D4g2^!O2yFM4jv{2T0Urrg`Oax$zC+2+OY~Y?A+ZxLLh$?C;$tRE4 xeZzun7-4%JXm@1&M^a?itts>%oO(4}JD&$4U>6?;JS>hL?%%_I4JzLMe-6Irx;_8^ literal 0 HcmV?d00001 diff --git a/README.md b/README.md index e6a57dc..fb4c9dd 100644 --- a/README.md +++ b/README.md @@ -15,6 +15,7 @@ The philosophy behind Conductor is simple: control your code. By treating contex - **Iterate safely**: Review plans before code is written, keeping you firmly in the loop. - **Work as a team**: Set project-level context for your product, tech stack, and workflow preferences that become a shared foundation for your team. - **Build on existing projects**: Intelligent initialization for both new (Greenfield) and existing (Brownfield) projects. +- **Semantic Awareness (AIC)**: Automatically indexes your codebase into "Rich Skeletons" using the AI Compiler (AIC). This provides the agent with deep context about your API contracts and dependencies with minimal token overhead. - **Smart revert**: A git-aware revert command that understands logical units of work (tracks, phases, tasks) rather than just commit hashes. ## Installation @@ -114,6 +115,7 @@ During implementation, you can also: ## Resources +- [AI Compiler Patent](https://www.tdcommons.org/dpubs_series/8241/): Semantic Dependency Graph for AI Agents - [Gemini CLI extensions](https://geminicli.com/docs/extensions/): Documentation about using extensions in Gemini CLI - [GitHub issues](https://github.com/gemini-cli-extensions/conductor/issues): Report bugs or request features diff --git a/aic/cli.py b/aic/cli.py new file mode 100644 index 0000000..06df88b --- /dev/null +++ b/aic/cli.py @@ -0,0 +1,110 @@ +import argparse +import os +from aic.db import init_db, upsert_node, get_node, get_dependencies, update_edges, mark_dirty +from aic.skeleton import RichSkeletonizer +from aic.utils import calculate_hash + +def index_repo(root_dir="."): + init_db() + skeletonizer = RichSkeletonizer() + for root, dirs, files in os.walk(root_dir): + # Exclusions + dirs[:] = [d for d in dirs if d not in ('.git', '.aic', '__pycache__', 'node_modules')] + + for file in files: + if not file.endswith('.py'): + continue + + file_path = os.path.join(root, file) + rel_path = os.path.relpath(file_path, root_dir) + + with open(file_path, 'r') as f: + content = f.read() + + current_hash = calculate_hash(content) + existing = get_node(rel_path) + + if existing and existing['hash'] == current_hash: + continue + + print(f"Indexing: {rel_path}") + skeleton, dependencies = skeletonizer.skeletonize(content, rel_path) + upsert_node(rel_path, current_hash, skeleton) + mark_dirty(rel_path) + + # Resolve dependencies to file paths + resolved_deps = [] + for dep in dependencies: + resolved = resolve_dep_to_path(dep, rel_path, root_dir) + if resolved: + resolved_deps.append(resolved) + + update_edges(rel_path, resolved_deps) + +def resolve_dep_to_path(dep_name, current_file, root_dir): + """Simple heuristic to resolve module name to file path.""" + # Handle relative imports (e.g., '.module' or '..module') + if dep_name.startswith('.'): + levels = 0 + while dep_name.startswith('.'): + levels += 1 + dep_name = dep_name[1:] + + curr_dir = os.path.dirname(current_file) + for _ in range(levels - 1): + curr_dir = os.path.dirname(curr_dir) + + base_path = os.path.join(curr_dir, dep_name.replace('.', os.sep)) + else: + base_path = os.path.join(root_dir, dep_name.replace('.', os.sep)) + + candidates = [ + base_path + ".py", + os.path.join(base_path, "__init__.py") + ] + + for cand in candidates: + if os.path.exists(cand): + return os.path.relpath(cand, root_dir) + return None + +def get_context(file_path): + node = get_node(file_path) + if not node: + return f"# Error: {file_path} not indexed." + + output = [f"# Context for {file_path}", node['skeleton'], ""] + + deps = get_dependencies(file_path) + if deps: + output.append("## Dependencies") + for dep in deps: + dep_node = get_node(dep) + if dep_node: + output.append(f"### {dep}") + output.append(dep_node['skeleton']) + output.append("") + + return "\n".join(output) + +def main(): + parser = argparse.ArgumentParser(description="AIC: AI Compiler") + subparsers = parser.add_subparsers(dest="command") + + subparsers.add_parser("index") + + context_parser = subparsers.add_parser("context") + context_parser.add_argument("file") + + args = parser.parse_args() + + if args.command == "index": + index_repo() + print("Finished indexing.") + elif args.command == "context": + print(get_context(args.file)) + else: + parser.print_help() + +if __name__ == "__main__": + main() diff --git a/aic/db.py b/aic/db.py new file mode 100644 index 0000000..ccd8058 --- /dev/null +++ b/aic/db.py @@ -0,0 +1,65 @@ +import sqlite3 +import os + +DB_PATH = ".aic/graph.db" + +def get_connection(): + os.makedirs(os.path.dirname(DB_PATH), exist_ok=True) + conn = sqlite3.connect(DB_PATH) + conn.row_factory = sqlite3.Row + return conn + +def init_db(): + with get_connection() as conn: + conn.execute(""" + CREATE TABLE IF NOT EXISTS nodes ( + path TEXT PRIMARY KEY, + hash TEXT, + skeleton TEXT, + status TEXT DEFAULT 'CLEAN' + ) + """) + conn.execute(""" + CREATE TABLE IF NOT EXISTS edges ( + source TEXT, + target TEXT, + PRIMARY KEY (source, target), + FOREIGN KEY(source) REFERENCES nodes(path) + ) + """) + +def upsert_node(path, hash_val, skeleton): + with get_connection() as conn: + conn.execute(""" + INSERT INTO nodes (path, hash, skeleton, status) + VALUES (?, ?, ?, 'CLEAN') + ON CONFLICT(path) DO UPDATE SET + hash = excluded.hash, + skeleton = excluded.skeleton, + status = 'CLEAN' + """, (path, hash_val, skeleton)) + +def mark_dirty(path): + """Mark all nodes that depend on this path as DIRTY.""" + with get_connection() as conn: + conn.execute(""" + UPDATE nodes + SET status = 'DIRTY' + WHERE path IN ( + SELECT source FROM edges WHERE target = ? + ) + """, (path,)) + +def update_edges(source_path, target_paths): + with get_connection() as conn: + conn.execute("DELETE FROM edges WHERE source = ?", (source_path,)) + for target in target_paths: + conn.execute("INSERT OR IGNORE INTO edges (source, target) VALUES (?, ?)", (source_path, target)) + +def get_node(path): + with get_connection() as conn: + return conn.execute("SELECT * FROM nodes WHERE path = ?", (path,)).fetchone() + +def get_dependencies(path): + with get_connection() as conn: + return [row['target'] for row in conn.execute("SELECT target FROM edges WHERE source = ?", (path,)).fetchall()] diff --git a/aic/skeleton.py b/aic/skeleton.py new file mode 100644 index 0000000..734ba6b --- /dev/null +++ b/aic/skeleton.py @@ -0,0 +1,108 @@ +import ast +import os + +class RichSkeletonizer(ast.NodeVisitor): + def __init__(self): + self.skeleton = [] + self.dependencies = set() + self.imports = [] + + def skeletonize(self, source_code, path): + try: + tree = ast.parse(source_code) + except Exception as e: + return f"# BUG: Failed to parse {path}: {str(e)}", set() + + self.visit(tree) + return "\n".join(self.skeleton), self.dependencies + + def visit_Import(self, node): + for alias in node.names: + self.dependencies.add(alias.name) + self.imports.append(f"import {alias.name}") + + def visit_ImportFrom(self, node): + module = node.module or "" + level = node.level + # Handle relative imports level + prefix = "." * level if level > 0 else "" + full_module = prefix + module + + for alias in node.names: + self.dependencies.add(full_module) + self.imports.append(f"from {full_module} import {alias.name}") + + def visit_ClassDef(self, node): + # Extract class signature + self.skeleton.append(f"class {node.name}:") + docstring = ast.get_docstring(node) + if docstring: + self.skeleton.append(f' """{docstring}"""') + + # We don't visit children yet, just let the visitor handle them + # But we want to indent them + old_skeleton = self.skeleton + self.skeleton = [] + self.generic_visit(node) + inner = self.skeleton + self.skeleton = old_skeleton + for line in inner: + self.skeleton.append(f" {line}") + self.skeleton.append("") # Spacer + + def visit_FunctionDef(self, node): + self._skeletonize_func(node) + + def visit_AsyncFunctionDef(self, node): + self._skeletonize_func(node, is_async=True) + + def _skeletonize_func(self, node, is_async=False): + prefix = "async " if is_async else "" + args = ast.unparse(node.args) if hasattr(ast, 'unparse') else "..." + returns = f" -> {ast.unparse(node.returns)}" if hasattr(ast, 'unparse') and node.returns else "" + + signature = f"{prefix}def {node.name}({args}){returns}:" + self.skeleton.append(signature) + + docstring = ast.get_docstring(node) + if docstring: + self.skeleton.append(f' """{docstring}"""') + + # Effects analysis + effects = self._analyze_effects(node) + if effects: + self.skeleton.append(f" # {effects}") + + self.skeleton.append(" ...") + self.skeleton.append("") # Spacer + + def _analyze_effects(self, node): + returns = [] + raises = [] + calls = [] + + for child in ast.walk(node): + if isinstance(child, ast.Return): + if child.value: + try: + returns.append(ast.unparse(child.value)) + except: + returns.append("some_value") + elif isinstance(child, ast.Raise): + if child.exc: + try: + raises.append(ast.unparse(child.exc)) + except: + raises.append("Exception") + elif isinstance(child, ast.Call): + try: + calls.append(ast.unparse(child.func)) + except: + pass + + res = [] + if returns: res.append(f"RETURNS: {' | '.join(list(set(returns))[:3])}") + if raises: res.append(f"RAISES: {' | '.join(list(set(raises))[:3])}") + if calls: res.append(f"CALLS: {' | '.join(list(set(calls))[:5])}") + + return " | ".join(res) diff --git a/aic/utils.py b/aic/utils.py new file mode 100644 index 0000000..db7ff3c --- /dev/null +++ b/aic/utils.py @@ -0,0 +1,6 @@ +import hashlib + +def calculate_hash(content): + if isinstance(content, str): + content = content.encode('utf-8') + return hashlib.sha256(content).hexdigest() diff --git a/commands/conductor/implement.toml b/commands/conductor/implement.toml index d1536bd..1959f17 100644 --- a/commands/conductor/implement.toml +++ b/commands/conductor/implement.toml @@ -67,6 +67,9 @@ CRITICAL: You must validate the success of every tool call. If any tool call fai - `conductor/workflow.md` c. **Error Handling:** If you fail to read any of these files, you MUST stop and inform the user of the error. +4. **Semantic Dependency Awareness (AIC):** + - **Protocol:** During the implementation of any task, if you encounter imported internal modules or functions whose names are not fully self-describing or whose implementation is not directly in your current context, you SHOULD run `python3 -m aic.cli context ` for those modules. This will provide you with the "Rich Skeleton" (signatures, docstrings, and IO/side-effects) of the dependency without consuming excessive tokens. + 4. **Execute Tasks and Update Track Plan:** a. **Announce:** State that you will now execute the tasks from the track's `plan.md` by following the procedures in `workflow.md`. b. **Iterate Through Tasks:** You MUST now loop through each task in the track's `plan.md` one by one. diff --git a/commands/conductor/newTrack.toml b/commands/conductor/newTrack.toml index c4325fc..0b99a75 100644 --- a/commands/conductor/newTrack.toml +++ b/commands/conductor/newTrack.toml @@ -70,6 +70,9 @@ CRITICAL: You must validate the success of every tool call. If any tool call fai * Examples include reproduction steps for bugs, specific scope for chores, or success criteria. * Tailor the questions to the specific request. +4. **Semantic Context (Optional):** + - **Recommendation:** If the track involves complex interactions with existing code, suggest that you can run `python3 -m aic.cli context ` for relevant files to better understand the API contracts and dependencies. This will help you generate a more accurate specification and plan. + 3. **Draft `spec.md`:** Once sufficient information is gathered, draft the content for the track's `spec.md` file, including sections like Overview, Functional Requirements, Non-Functional Requirements (if any), Acceptance Criteria, and Out of Scope. 4. **User Confirmation:** Present the drafted `spec.md` content to the user for review and approval. diff --git a/commands/conductor/setup.toml b/commands/conductor/setup.toml index 97ad40c..cdb37f8 100644 --- a/commands/conductor/setup.toml +++ b/commands/conductor/setup.toml @@ -323,7 +323,10 @@ CRITICAL: When determining model complexity, ALWAYS select the "flash" model, re 1. **Summarize Actions:** Present a summary of all actions taken during Phase 1, including: - The guide files that were copied. - The workflow file that was copied. -2. **Transition to initial plan and track generation:** Announce that the initial setup is complete and you will now proceed to define the first track for the project. +2. **Semantic Indexing (AIC):** + - **Announce Action:** Inform the user: "Finally, I will perform a semantic indexing of your project using AIC (AI Compiler). This creates a lightweight dependency graph to help me understand your code better." + - **Action:** Execute `python3 -m aic.cli index`. +3. **Transition to initial plan and track generation:** Announce that the initial setup is complete and you will now proceed to define the first track for the project. --- diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..ab6349e --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,21 @@ +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project] +name = "conductor-aic" +version = "0.1.1" +description = "Conductor Extension for Gemini CLI with AI Compiler (AIC)" +readme = "README.md" +requires-python = ">=3.8" +license = "Apache-2.0" +authors = [ + { name = "Google DeepMind" }, +] +dependencies = [] + +[project.scripts] +aic = "aic.cli:main" + +[tool.hatch.build.targets.wheel] +packages = ["aic"] From a2839c394c352f3a0172b177ba41d4ec48a9966d Mon Sep 17 00:00:00 2001 From: Ernan Anguiano Date: Tue, 13 Jan 2026 11:57:49 -0800 Subject: [PATCH 2/7] feat: add filesystem tools to aic server and bump version to 0.1.3 --- .aic/graph.db | Bin 32768 -> 49152 bytes aic/__init__.py | 0 aic/server.py | 135 ++++++++++++++++++++++++++++++ commands/conductor/implement.toml | 2 +- commands/conductor/newTrack.toml | 2 +- commands/conductor/setup.toml | 2 +- gemini-extension.json | 14 +++- pyproject.toml | 6 +- 8 files changed, 154 insertions(+), 7 deletions(-) create mode 100644 aic/__init__.py create mode 100644 aic/server.py diff --git a/.aic/graph.db b/.aic/graph.db index 03e0a24b0d50f0ef69a5583ae3ae154928c969ae..cc58f813bf4fe2b631852bddbbc8522c026e0ff1 100644 GIT binary patch literal 49152 zcmeHQTZ|jmdFG0g){#xpLEvs=DblRu4N32s;{BplaW@h-79uOo?h1*a7{uYs8SPZu zAu}_Sw3ZBa8x%>=5b?s7XLos_Ax3hibY~>=k$mJ5p7A`K##SBy9ckM5CLCbBkfzzYw zHkGPfq>rP!YjcFeU)>@kC`tNrB!UFp8iNE1r=luC$wmvZ#cVg)Xpnu2ce&FuCje$U z%o*6Oh|8_D)$3O`mCU89t<`Ipp)w!YtIU-Dc9A~fP?Mg(+H@3u)BlEFpG^Nx`oGfu zp8n9i#9Q)5BJfxv@QpW*%rEaI|Ni;{Gt~2bJIeQWSgBKLG_+=^SSwYvdbzCD8V#lu z3e}ocu5qnWFQ|h(sUQPjAuKo{0ld#1@%$LN{a+y!cF z=b-!9yuN;IL%6;Y1>eZ`o)wA>4>=xGy%Z8h9g&jj4E={)0;f~u0mGfSblvqBW zKYko#8K&X1wN5C+OB5sL`CYCKz)E+2C`-{K27Q}bP8-`db%-kpb~)|ath=K4$OL6L zeDd4Oy48l#I6H24>!NY??AezgKw;gk+ZQX&78nvW-sh&KK$qrh8MZ=QB(s&vSJpRQ z&BHSul84X5psz9L$*JS^#)CGd?vEgJVLw1I@ude42I)jzx&J=0nT^)f)}>A5d1Y<= zb1!@4|H4Z!Hn}CfsJxh2QQVi2%jI?Ms9QagXLDht8G66s<+RD~Q%$gd?(5;EYWRca zNZr0+_3qrrxWjVhH8c<_a0lus)%p<#yb+&paHk9|4&F3j91>7vvbom|2gP(^_CQ4K zGTT+RhrKPCtE8G2^<#@%#mJ}(W*W#%V2F0}VtWTW<3Lh$3o@v^_Bno?UdyASH-mthcuuP#rsL)6Hm!laY72vCX zHaCA}H~H2LngMAYnERBAohED6xLVUowQ{{#EfwnUTU86ZUTW6$hFUMwnhjkq@M^tQ zszE$nZB|&V+Gtj|UcsJA&V9W8oMvZo?&D8)M+Sa5_wj5O?i}zjo%?_icl;Rq`3NrH zKY`l+JTB)Sn>&tQ({IMoZ>E2f{*UxG(;uYYOMee;${&e&Zma_KKKfc0+vqwx&ggVns-+=4sDPQ>eT4!0fqJTLL~ot||| z@AmFM$kC_cOB8qD7+@&u?<7+3MY^GO4Y-W^=?8|={?3u7;%7dBGRan$On))}R6mQ^n zS652^IQCh3&b<^PLD09C!L{~FGpp=)>onICs*RjBreFC8%{D8m*dMMb0|=Vz$GgR_OC`mqZE}o%s zM}h}I>{H)snl;$AEthk z`rFhiDKm9G{V(Z%NdGeZ_4Km%Z<_WgpOFYi1SA3y0f~S_Kq4R!kO)WwBmxIU;1h|Z z=%S~nLoI?nW6p_pNG_(Wpa6*&}v z}NBHdS?7GJb~!1*3Uyl zuAfXSxe57ID=dL>IeNGMf#kIa;kklVQ@ ze-V4~7svkZz%5Wlln5Lcfv=q)2eofJGdl-0at#C@b6r70T(BW##CaZ_*mQ7*MK6Ab z-$fU@E=P}jmjuM({T^KO!Q0aH;SId;rT3fkej|U&A+iVQ{T7f>1ndOUx#Umu{C~i7 zp*@Qn3UC>1%k);Uof8!WIK&;1^dT+-iKGv~nu2Tzl0F20z*giE-vmvDT%i#pk$plW zgAsa5NgqP<6JLuWu7C(jKtP1?fb=1chyVW&w-I!^qx?q#KT@u@4SMdPB*g}net3(I7A;p z`hWc082pz%5&?<8vMz! zfv&9I19HHgwS#nnoq^HS6b2-I+st$f)#CsKlF1$Arp5YO`QsZZGi{=hd}YT0CQJp5 zv-S#@Xn-OQXg%$KFcEGt;16{xJvJ?_Y`7;C0sSZIFq;E!>Z*0qb~6^{dlO>fl>(&z z^%e8h12Zw+jpUXJomYT7j}5vGNa!i*z_Pe0JSh;B2ApnsW@DhL+_v>Wx4YvSl7VjV z9;L@MP@4y)^M+e6B+IhGCf+7P~KpWTV-iBmh((3cT z!EHTnapn>u4!ujP4Hxk6tpl}aCWvK6UYC%mb>3wTO2j?zZZxSDKG&-ujj==h8PJHecNh3@AP_Ii0YDI3J-+c%Hzr&Iu#q} zVL3-b3kHA`;SeQu#egsrAw*(NBETf~@8tWfS$ zSx42YMHt0_;JsLB>Q%i|>@=!cNiTMajgDGXfz!RLRm17d-b=>sY?JJt{OJs3OO;BT zcp@6%lt)}XmH%@zN7f}7h*eRLIr8H%C2Gp##>bu#b@{l~k!* ztyQX2fyOIUs(p`fkSf)bhd{^!8Bk5aAK_pUP#;J1q){~hAHvztNK`>XImm$wQl(P- zqnyA*+D3C@U{57es(-}JLofj1`}1stXew+wJ=(~W#qW6f)QWH1B;();SxC{&>+MK^ ziT4H^#sWsSEk3Z|a0;;Ag89u$QQ7Wk0|=vItl&0T=g?d?&K`FS&NqS?lfV1J;$4iH z1;ei7zV=>A=ln{S{5^Jin8#uoah8! zu-OSlKL0%gHr>E>mF*3j{=h*qAX?DKvIr;Vgf>Jt;>q!CzInlz8>E04r8GiI0%ig? zRX`Aah0?*vSnknF(cEq`IPJ%KBS9+;XOi$d9bp2DfOrf~8;)4SbH@C8xQV&F1#pM= z;P9NbJ?OYGC~jn@bEyBvZp30YX0HE#_iSQ*X*c=vMN*}G=Y(pbtm}GJ!&fT7frfYLRt#r^HK*s@~K~mAY2$s2x`0om#P>mP=UGqx10)v*3-Q&IdU7 z!zrEq7#zF?0evepY zoXx>i@Kyr4F< zBC8deaH@F?Rt8{ML9Px26TF9gbh%s|;OfBf7xr*z#3f9Ss{>OWi4MRWuug^jQ@B7H zthP(k5w>mGqm!$VnFgyhM|D5#ypY7yKruN5&z}uDid7u*W{QrI4A0V9NoK++Sm=KtGX(4bY_RAVecBTR2mv^Ed~~#xVn(N(sP2Ox7EtsX5uN6RvGCDy?gnL_nzm9zat}nL z?U&zu#LQLX{RPZc_z7>yr%y3n=T_k33KtMGmd{G`QG+-=@Q4b#jPUhhwHFr`Lmw_l zW~JN%TBBMw$Z#|&@gkE1uM`FXe1f0w1qOXh zy#c%J>o_P~Ooj?r7T_zqhka;)2g{tl3?waM*GQ9W1lvZI=BZS~6(g4$mn5ygYSmKJ zuIcWha58W4VS-2+j+Rf6oQT`y+*o+242uGEx%E3(geTx|V*?@?S}cfoV5CGDx35jg z5tSBaL|iqp8JebALHviigvQmZOeU!|6UUwM#E8P}2^k2--jYEt=>L897+jjaGP@Ac zB3%;bQZJaN@x9YhR11`D37@-jXx7j5=aTgpE^M7ix|XAEs#RuJ@7t_tRn}PpXI~YL z-_|{gnVi~a=nt5E7hi9E!0$Us#AD8yg(se+rcBGNIP9Lqm9EXI)*rFUn(G{~!;T*x zf6lwiYgnE>Y!&9MHMp+knAXDzKevGw4)fHjIZgcbVv2SIsnZMgr1;Yn8Z8FOK?+UZ d{~x!#b6CeWt+Pbk#7-+WnoxP@x}F$*|8Fzz*XIBL diff --git a/aic/__init__.py b/aic/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/aic/server.py b/aic/server.py new file mode 100644 index 0000000..e6d49b1 --- /dev/null +++ b/aic/server.py @@ -0,0 +1,135 @@ +import asyncio +import os +from mcp.server.fastmcp import FastMCP +from aic.db import init_db, upsert_node, update_edges, mark_dirty +from aic.skeleton import RichSkeletonizer +from aic.utils import calculate_hash +from aic.cli import resolve_dep_to_path, get_context + +# Initialize FastMCP server +mcp = FastMCP("aic") + +@mcp.tool() +async def index_repo(root_dir: str) -> str: + """ + Indexes the repository to build a semantic dependency graph. + Scans for Python files, generates skeletons, and updates the SQLite database. + + Args: + root_dir: The root directory of the repository to index. + """ + init_db() + skeletonizer = RichSkeletonizer() + indexed_count = 0 + + # Ensure we use absolute path for walking + abs_root_dir = os.path.abspath(root_dir) + + for root, dirs, files in os.walk(abs_root_dir): + # Exclusions + dirs[:] = [d for d in dirs if d not in ('.git', '.aic', '__pycache__', 'node_modules')] + + for file in files: + if not file.endswith('.py'): + continue + + file_path = os.path.join(root, file) + rel_path = os.path.relpath(file_path, abs_root_dir) + + try: + with open(file_path, 'r') as f: + content = f.read() + except Exception as e: + print(f"Skipping {rel_path}: {e}") + continue + + current_hash = calculate_hash(content) + # We skip optimization for now to ensure we always get latest state or implement get_node in db + # existing = get_node(rel_path) + # if existing and existing['hash'] == current_hash: + # continue + + skeleton, dependencies = skeletonizer.skeletonize(content, rel_path) + upsert_node(rel_path, current_hash, skeleton) + mark_dirty(rel_path) + + # Resolve dependencies to file paths + resolved_deps = [] + for dep in dependencies: + resolved = resolve_dep_to_path(dep, rel_path, abs_root_dir) + if resolved: + resolved_deps.append(resolved) + + update_edges(rel_path, resolved_deps) + indexed_count += 1 + + return f"Successfully indexed {indexed_count} files in {abs_root_dir}" + +@mcp.tool() +async def get_file_context(file_path: str) -> str: + """ + Retrieves the extensive context for a file, including its skeleton and its direct dependencies' skeletons. + + Args: + file_path: Relative path to the file to get context for. + """ + try: + return get_context(file_path) + except Exception as e: + return f"Error retrieving context for {file_path}: {str(e)}" + +@mcp.tool() +async def list_directory(path: str) -> str: + """ + Lists the files and directories in the specified path. + + Args: + path: The directory path to list. + """ + try: + abs_path = os.path.abspath(path) + if not os.path.exists(abs_path): + return f"Error: Path '{path}' not found." + + items = [] + for name in os.listdir(abs_path): + full_path = os.path.join(abs_path, name) + is_dir = os.path.isdir(full_path) + items.append(f"{name}{'/' if is_dir else ''}") + + return "\n".join(sorted(items)) + except Exception as e: + return f"Error listing directory '{path}': {str(e)}" + +@mcp.tool() +async def run_shell_command(command: str, cwd: str) -> str: + """ + Executes a shell command. + + Args: + command: The command line string to execute. + cwd: Current working directory (use "." for current). + """ + try: + # Resolve "." to None if needed, or just let shell handle it if we pass it? + # asyncio.create_subprocess_shell handles cwd="." fine. + process = await asyncio.create_subprocess_shell( + command, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + cwd=cwd + ) + stdout, stderr = await process.communicate() + + output = f"Exit Code: {process.returncode}\n" + if stdout: + output += f"\nStandard Output:\n{stdout.decode().strip()}" + if stderr: + output += f"\nStandard Error:\n{stderr.decode().strip()}" + + return output + except Exception as e: + return f"Error executing command: {str(e)}" + +if __name__ == "__main__": + mcp.run() diff --git a/commands/conductor/implement.toml b/commands/conductor/implement.toml index 1959f17..63c8ae1 100644 --- a/commands/conductor/implement.toml +++ b/commands/conductor/implement.toml @@ -68,7 +68,7 @@ CRITICAL: You must validate the success of every tool call. If any tool call fai c. **Error Handling:** If you fail to read any of these files, you MUST stop and inform the user of the error. 4. **Semantic Dependency Awareness (AIC):** - - **Protocol:** During the implementation of any task, if you encounter imported internal modules or functions whose names are not fully self-describing or whose implementation is not directly in your current context, you SHOULD run `python3 -m aic.cli context ` for those modules. This will provide you with the "Rich Skeleton" (signatures, docstrings, and IO/side-effects) of the dependency without consuming excessive tokens. + - **Protocol:** During the implementation of any task, if you encounter imported internal modules or functions whose names are not fully self-describing or whose implementation is not directly in your current context, you SHOULD call the `aic_get_file_context` tool for those modules. This will provide you with the "Rich Skeleton" (signatures, docstrings, and IO/side-effects) of the dependency without consuming excessive tokens. 4. **Execute Tasks and Update Track Plan:** a. **Announce:** State that you will now execute the tasks from the track's `plan.md` by following the procedures in `workflow.md`. diff --git a/commands/conductor/newTrack.toml b/commands/conductor/newTrack.toml index 0b99a75..dd9cd5c 100644 --- a/commands/conductor/newTrack.toml +++ b/commands/conductor/newTrack.toml @@ -71,7 +71,7 @@ CRITICAL: You must validate the success of every tool call. If any tool call fai * Tailor the questions to the specific request. 4. **Semantic Context (Optional):** - - **Recommendation:** If the track involves complex interactions with existing code, suggest that you can run `python3 -m aic.cli context ` for relevant files to better understand the API contracts and dependencies. This will help you generate a more accurate specification and plan. + - **Recommendation:** If the track involves complex interactions with existing code, you SHOULD call the `aic_get_file_context` tool for relevant files to better understand the API contracts and dependencies. This will help you generate a more accurate specification and plan. 3. **Draft `spec.md`:** Once sufficient information is gathered, draft the content for the track's `spec.md` file, including sections like Overview, Functional Requirements, Non-Functional Requirements (if any), Acceptance Criteria, and Out of Scope. diff --git a/commands/conductor/setup.toml b/commands/conductor/setup.toml index cdb37f8..7828053 100644 --- a/commands/conductor/setup.toml +++ b/commands/conductor/setup.toml @@ -325,7 +325,7 @@ CRITICAL: When determining model complexity, ALWAYS select the "flash" model, re - The workflow file that was copied. 2. **Semantic Indexing (AIC):** - **Announce Action:** Inform the user: "Finally, I will perform a semantic indexing of your project using AIC (AI Compiler). This creates a lightweight dependency graph to help me understand your code better." - - **Action:** Execute `python3 -m aic.cli index`. + - **Action:** Call the `aic_index` tool to index the repository. 3. **Transition to initial plan and track generation:** Announce that the initial setup is complete and you will now proceed to define the first track for the project. --- diff --git a/gemini-extension.json b/gemini-extension.json index aa75229..2b4d4a2 100644 --- a/gemini-extension.json +++ b/gemini-extension.json @@ -1,5 +1,15 @@ { "name": "conductor", - "version": "0.1.1", - "contextFileName": "GEMINI.md" + "version": "0.1.3", + "contextFileName": "GEMINI.md", + "mcpServers": { + "aic": { + "command": "python3", + "args": [ + "-m", + "aic.server" + ], + "cwd": "${extensionPath}" + } + } } diff --git a/pyproject.toml b/pyproject.toml index ab6349e..84afbfb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "conductor-aic" -version = "0.1.1" +version = "0.1.3" description = "Conductor Extension for Gemini CLI with AI Compiler (AIC)" readme = "README.md" requires-python = ">=3.8" @@ -12,7 +12,9 @@ license = "Apache-2.0" authors = [ { name = "Google DeepMind" }, ] -dependencies = [] +dependencies = [ + "mcp", +] [project.scripts] aic = "aic.cli:main" From 16747f5bcd445270f4b4d348abf36601adc2cf75 Mon Sep 17 00:00:00 2001 From: Ernan Anguiano Date: Tue, 13 Jan 2026 12:03:47 -0800 Subject: [PATCH 3/7] docs: update README to reflect MCP architecture and tools --- README.md | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index fb4c9dd..7f0bba6 100644 --- a/README.md +++ b/README.md @@ -15,7 +15,7 @@ The philosophy behind Conductor is simple: control your code. By treating contex - **Iterate safely**: Review plans before code is written, keeping you firmly in the loop. - **Work as a team**: Set project-level context for your product, tech stack, and workflow preferences that become a shared foundation for your team. - **Build on existing projects**: Intelligent initialization for both new (Greenfield) and existing (Brownfield) projects. -- **Semantic Awareness (AIC)**: Automatically indexes your codebase into "Rich Skeletons" using the AI Compiler (AIC). This provides the agent with deep context about your API contracts and dependencies with minimal token overhead. +- **Semantic Awareness (AIC)**: Automatically indexes your codebase into "Rich Skeletons" using the AI Compiler (AIC). This functionality is powered by a local **Model Context Protocol (MCP)** server that exposes tools for semantic indexing and context retrieval ( `index_repo`, `get_file_context`) directly to the Gemini agent. - **Smart revert**: A git-aware revert command that understands logical units of work (tracks, phases, tasks) rather than just commit hashes. ## Installation @@ -113,6 +113,18 @@ During implementation, you can also: | `/conductor:status` | Displays the current progress of the tracks file and active tracks. | Reads `conductor/tracks.md` | | `/conductor:revert` | Reverts a track, phase, or task by analyzing git history. | Reverts git history | +## Architecture + +Conductor leverages the **Model Context Protocol (MCP)** to provide deep, local integration with your codebase. + +- **Client**: The Gemini CLI acts as the MCP client. +- **Server**: The `aic` package runs as a local MCP server (`python3 -m aic.server`). +- **Tools**: The server exposes the following tools to the agent: + - `index_repo`: Builds/updates the semantic dependency graph. + - `get_file_context`: Retrieves token-optimized skeletons for files and their dependencies. + - `list_directory`: Provides filesystem visibility. + - `run_shell_command`: Allows safe execution of setup and maintenance commands. + ## Resources - [AI Compiler Patent](https://www.tdcommons.org/dpubs_series/8241/): Semantic Dependency Graph for AI Agents From 8e8f69c3013e1862de934c5b6378f8e9edfd7fe2 Mon Sep 17 00:00:00 2001 From: Ernan Anguiano Date: Wed, 14 Jan 2026 22:19:35 -0800 Subject: [PATCH 4/7] fix(aic): rename tools, fix indexing scope, and add universal file support --- README.md | 8 +++--- aic/cli.py | 57 ++++++++++++++++---------------------- aic/server.py | 28 +++++++++++-------- aic/skeleton.py | 20 +++++++++++++- aic/utils.py | 64 +++++++++++++++++++++++++++++++++++++++++++ gemini-extension.json | 6 ++-- 6 files changed, 130 insertions(+), 53 deletions(-) diff --git a/README.md b/README.md index 7f0bba6..fa3228c 100644 --- a/README.md +++ b/README.md @@ -120,10 +120,10 @@ Conductor leverages the **Model Context Protocol (MCP)** to provide deep, local - **Client**: The Gemini CLI acts as the MCP client. - **Server**: The `aic` package runs as a local MCP server (`python3 -m aic.server`). - **Tools**: The server exposes the following tools to the agent: - - `index_repo`: Builds/updates the semantic dependency graph. - - `get_file_context`: Retrieves token-optimized skeletons for files and their dependencies. - - `list_directory`: Provides filesystem visibility. - - `run_shell_command`: Allows safe execution of setup and maintenance commands. + - `aic_index`: Builds/updates the semantic dependency graph. + - `aic_get_file_context`: Retrieves token-optimized skeletons for files and their dependencies. + - `aic_list_directory`: Provides filesystem visibility. + - `aic_run_shell_command`: Allows safe execution of setup and maintenance commands. ## Resources diff --git a/aic/cli.py b/aic/cli.py index 06df88b..55061b2 100644 --- a/aic/cli.py +++ b/aic/cli.py @@ -1,25 +1,38 @@ import argparse import os from aic.db import init_db, upsert_node, get_node, get_dependencies, update_edges, mark_dirty -from aic.skeleton import RichSkeletonizer -from aic.utils import calculate_hash +from aic.skeleton import UniversalSkeletonizer +from aic.utils import calculate_hash, resolve_dep_to_path, get_ignore_patterns, should_ignore def index_repo(root_dir="."): init_db() - skeletonizer = RichSkeletonizer() + skeletonizer = UniversalSkeletonizer() + ignore_patterns = get_ignore_patterns(root_dir) + + indexed_count = 0 + for root, dirs, files in os.walk(root_dir): # Exclusions - dirs[:] = [d for d in dirs if d not in ('.git', '.aic', '__pycache__', 'node_modules')] + dirs[:] = [d for d in dirs if not should_ignore(d, ignore_patterns)] for file in files: - if not file.endswith('.py'): + if should_ignore(file, ignore_patterns): continue file_path = os.path.join(root, file) rel_path = os.path.relpath(file_path, root_dir) - with open(file_path, 'r') as f: - content = f.read() + # Skip non-text files to avoid reading binaries + # Simple heuristic: check extension or try reading + try: + with open(file_path, 'r', encoding='utf-8', errors='strict') as f: + content = f.read() + except UnicodeDecodeError: + # print(f"Skipping binary file: {rel_path}") + continue + except Exception as e: + print(f"Skipping {rel_path}: {e}") + continue current_hash = calculate_hash(content) existing = get_node(rel_path) @@ -40,33 +53,9 @@ def index_repo(root_dir="."): resolved_deps.append(resolved) update_edges(rel_path, resolved_deps) - -def resolve_dep_to_path(dep_name, current_file, root_dir): - """Simple heuristic to resolve module name to file path.""" - # Handle relative imports (e.g., '.module' or '..module') - if dep_name.startswith('.'): - levels = 0 - while dep_name.startswith('.'): - levels += 1 - dep_name = dep_name[1:] - - curr_dir = os.path.dirname(current_file) - for _ in range(levels - 1): - curr_dir = os.path.dirname(curr_dir) - - base_path = os.path.join(curr_dir, dep_name.replace('.', os.sep)) - else: - base_path = os.path.join(root_dir, dep_name.replace('.', os.sep)) - - candidates = [ - base_path + ".py", - os.path.join(base_path, "__init__.py") - ] - - for cand in candidates: - if os.path.exists(cand): - return os.path.relpath(cand, root_dir) - return None + indexed_count += 1 + + print(f"Finished indexing. Processed {indexed_count} files.") def get_context(file_path): node = get_node(file_path) diff --git a/aic/server.py b/aic/server.py index e6d49b1..4f182b6 100644 --- a/aic/server.py +++ b/aic/server.py @@ -2,15 +2,15 @@ import os from mcp.server.fastmcp import FastMCP from aic.db import init_db, upsert_node, update_edges, mark_dirty -from aic.skeleton import RichSkeletonizer -from aic.utils import calculate_hash -from aic.cli import resolve_dep_to_path, get_context +from aic.skeleton import UniversalSkeletonizer +from aic.utils import calculate_hash, resolve_dep_to_path, get_ignore_patterns, should_ignore +from aic.cli import get_context # Initialize FastMCP server mcp = FastMCP("aic") @mcp.tool() -async def index_repo(root_dir: str) -> str: +async def aic_index(root_dir: str) -> str: """ Indexes the repository to build a semantic dependency graph. Scans for Python files, generates skeletons, and updates the SQLite database. @@ -19,26 +19,32 @@ async def index_repo(root_dir: str) -> str: root_dir: The root directory of the repository to index. """ init_db() - skeletonizer = RichSkeletonizer() + skeletonizer = UniversalSkeletonizer() indexed_count = 0 # Ensure we use absolute path for walking abs_root_dir = os.path.abspath(root_dir) + ignore_patterns = get_ignore_patterns(abs_root_dir) for root, dirs, files in os.walk(abs_root_dir): # Exclusions - dirs[:] = [d for d in dirs if d not in ('.git', '.aic', '__pycache__', 'node_modules')] + dirs[:] = [d for d in dirs if not should_ignore(d, ignore_patterns)] for file in files: - if not file.endswith('.py'): + if should_ignore(file, ignore_patterns): continue file_path = os.path.join(root, file) rel_path = os.path.relpath(file_path, abs_root_dir) + # Skip non-text files to avoid reading binaries + # Simple heuristic: check extension or try reading try: - with open(file_path, 'r') as f: + with open(file_path, 'r', encoding='utf-8', errors='strict') as f: content = f.read() + except UnicodeDecodeError: + # print(f"Skipping binary file: {rel_path}") + continue except Exception as e: print(f"Skipping {rel_path}: {e}") continue @@ -66,7 +72,7 @@ async def index_repo(root_dir: str) -> str: return f"Successfully indexed {indexed_count} files in {abs_root_dir}" @mcp.tool() -async def get_file_context(file_path: str) -> str: +async def aic_get_file_context(file_path: str) -> str: """ Retrieves the extensive context for a file, including its skeleton and its direct dependencies' skeletons. @@ -79,7 +85,7 @@ async def get_file_context(file_path: str) -> str: return f"Error retrieving context for {file_path}: {str(e)}" @mcp.tool() -async def list_directory(path: str) -> str: +async def aic_list_directory(path: str) -> str: """ Lists the files and directories in the specified path. @@ -102,7 +108,7 @@ async def list_directory(path: str) -> str: return f"Error listing directory '{path}': {str(e)}" @mcp.tool() -async def run_shell_command(command: str, cwd: str) -> str: +async def aic_run_shell_command(command: str, cwd: str) -> str: """ Executes a shell command. diff --git a/aic/skeleton.py b/aic/skeleton.py index 734ba6b..42d5401 100644 --- a/aic/skeleton.py +++ b/aic/skeleton.py @@ -1,13 +1,17 @@ import ast import os -class RichSkeletonizer(ast.NodeVisitor): +class PythonSkeletonizer(ast.NodeVisitor): def __init__(self): + self.reset() + + def reset(self): self.skeleton = [] self.dependencies = set() self.imports = [] def skeletonize(self, source_code, path): + self.reset() try: tree = ast.parse(source_code) except Exception as e: @@ -106,3 +110,17 @@ def _analyze_effects(self, node): if calls: res.append(f"CALLS: {' | '.join(list(set(calls))[:5])}") return " | ".join(res) + +class UniversalSkeletonizer: + def __init__(self): + self.py_skeletonizer = PythonSkeletonizer() + + def skeletonize(self, source_code, path): + if path.endswith('.py'): + return self.py_skeletonizer.skeletonize(source_code, path) + else: + # For non-Python files, treat content as the skeleton + # Limit size to avoid DB bloat (e.g. 100KB) + if len(source_code) > 100 * 1024: + return f"# Content truncated (size: {len(source_code)} bytes)\n" + source_code[:100*1024] + "...", set() + return source_code, set() diff --git a/aic/utils.py b/aic/utils.py index db7ff3c..04bb5df 100644 --- a/aic/utils.py +++ b/aic/utils.py @@ -1,6 +1,70 @@ import hashlib +import os +import fnmatch def calculate_hash(content): if isinstance(content, str): content = content.encode('utf-8') return hashlib.sha256(content).hexdigest() + +def get_ignore_patterns(root_dir): + """ + Loads ignore patterns from .geminiignore and .gitignore, plus defaults. + """ + # Defaults + patterns = {'.git', '.aic', '__pycache__', 'node_modules', '.DS_Store', 'venv', '.venv', 'env', '.env', 'dist', 'build'} + + for filename in ['.geminiignore', '.gitignore']: + path = os.path.join(root_dir, filename) + if os.path.exists(path): + try: + with open(path, 'r') as f: + for line in f: + line = line.strip() + if not line or line.startswith('#'): + continue + # Normalize pattern: remove leading/trailing slashes for simple matching + # This is a naive implementation; proper gitignore handling is complex + clean_line = line.rstrip('/') + if clean_line: + patterns.add(clean_line) + except Exception: + pass # Fail silently on read errors + + return list(patterns) + +def should_ignore(name, patterns): + """ + Checks if a name matches any of the ignore patterns. + """ + for pattern in patterns: + if fnmatch.fnmatch(name, pattern): + return True + return False + +def resolve_dep_to_path(dep_name, current_file, root_dir): + """Simple heuristic to resolve module name to file path.""" + # Handle relative imports (e.g., '.module' or '..module') + if dep_name.startswith('.'): + levels = 0 + while dep_name.startswith('.'): + levels += 1 + dep_name = dep_name[1:] + + curr_dir = os.path.dirname(current_file) + for _ in range(levels - 1): + curr_dir = os.path.dirname(curr_dir) + + base_path = os.path.join(curr_dir, dep_name.replace('.', os.sep)) + else: + base_path = os.path.join(root_dir, dep_name.replace('.', os.sep)) + + candidates = [ + base_path + ".py", + os.path.join(base_path, "__init__.py") + ] + + for cand in candidates: + if os.path.exists(cand): + return os.path.relpath(cand, root_dir) + return None diff --git a/gemini-extension.json b/gemini-extension.json index 2b4d4a2..9441a19 100644 --- a/gemini-extension.json +++ b/gemini-extension.json @@ -6,10 +6,10 @@ "aic": { "command": "python3", "args": [ - "-m", - "aic.server" + "-c", + "import sys; sys.path.append('${extensionPath}'); import aic.server; aic.server.mcp.run()" ], - "cwd": "${extensionPath}" + "cwd": "${workspaceFolder}" } } } From f030adfe3bae16348dc94e0bc9656002f34da8fa Mon Sep 17 00:00:00 2001 From: Ernan Anguiano Date: Wed, 14 Jan 2026 22:46:02 -0800 Subject: [PATCH 5/7] docs: add testing and usage guide --- TESTING.md | 77 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) create mode 100644 TESTING.md diff --git a/TESTING.md b/TESTING.md new file mode 100644 index 0000000..3deafb0 --- /dev/null +++ b/TESTING.md @@ -0,0 +1,77 @@ +# Conductor AIC Extension - Testing Guide + +This guide explains how to install the updated Conductor extension, verify the new AIC features (universal indexing, ignore patterns), and understand the token savings. + +## 1. Installation + +Since this update is currently in a Pull Request (or your fork), you should install it directly from the source to test. + +### Option A: Install from your Fork (Recommended) +This ensures you are testing the exact code we just pushed. + +1. **Uninstall existing version (if any):** + ```bash + gemini extensions uninstall conductor + ``` + +2. **Install from the fork:** + ```bash + gemini extensions install https://github.com/nan-bit/conductor-aic --branch main + ``` + +### Option B: Install from Local Directory +If you want to test changes before pushing. + +1. Navigate to the parent directory of this repo. +2. Run: + ```bash + gemini extensions install ./conductor-aic + ``` + +## 2. Running Conductor + +1. **Navigate to a project directory** (e.g., the repo you want to work on). +2. **Initialize Conductor:** + ```bash + /conductor:setup + ``` +3. **Observe the Indexing:** + During the setup, the agent will announce it is performing "semantic indexing". + * **Verify Fix:** Watch the output. It should now index **all text files** (not just Python) but **exclude** `node_modules` and other ignored directories (thanks to the `.gitignore` fix). + +## 3. Verifying AIC & Token Savings + +The core value of AIC is reducing context size by providing "Rich Skeletons" (summaries) of files instead of raw content. + +### How to See it in Action: + +1. **Start a Task:** + ```bash + /conductor:newTrack "Update the README to include installation instructions" + /conductor:implement + ``` + +2. **Watch the Agent's Tool Use:** + * As the agent explores the codebase, look for calls to `aic_get_file_context`. + * **The Saving:** Instead of calling `read_file` (which loads 100% of the text), it calls `aic_get_file_context`. + * **Example:** For a 500-line Python file, `aic_get_file_context` might only return ~50 lines of class/function signatures. This is a **~90% token reduction** for that file interaction. + +3. **Check Session Stats:** + You can view the total token usage for your current session: + ```bash + /stats model + ``` + *Compare this against a session where you manually `read_file` many large files to see the difference.* + +## 4. Updating + +If you push more changes to the repo, you can update the extension locally: + +```bash +gemini extensions update conductor +``` + +## 5. Troubleshooting + +* **"Indexed only 1 file":** If this happens, check your `.gitignore`. We now respect it strictly. +* **"Tool not found":** Ensure you uninstalled the old version and installed the new one. The tool names changed from `index_repo` to `aic_index`. From 799a15bc9fc1071f3fd08026c65e097200685d44 Mon Sep 17 00:00:00 2001 From: Ernan Anguiano Date: Wed, 14 Jan 2026 23:14:14 -0800 Subject: [PATCH 6/7] docs: remove outdated and incorrect testing instructions --- TESTING.md | 77 ------------------------------------------------------ 1 file changed, 77 deletions(-) delete mode 100644 TESTING.md diff --git a/TESTING.md b/TESTING.md deleted file mode 100644 index 3deafb0..0000000 --- a/TESTING.md +++ /dev/null @@ -1,77 +0,0 @@ -# Conductor AIC Extension - Testing Guide - -This guide explains how to install the updated Conductor extension, verify the new AIC features (universal indexing, ignore patterns), and understand the token savings. - -## 1. Installation - -Since this update is currently in a Pull Request (or your fork), you should install it directly from the source to test. - -### Option A: Install from your Fork (Recommended) -This ensures you are testing the exact code we just pushed. - -1. **Uninstall existing version (if any):** - ```bash - gemini extensions uninstall conductor - ``` - -2. **Install from the fork:** - ```bash - gemini extensions install https://github.com/nan-bit/conductor-aic --branch main - ``` - -### Option B: Install from Local Directory -If you want to test changes before pushing. - -1. Navigate to the parent directory of this repo. -2. Run: - ```bash - gemini extensions install ./conductor-aic - ``` - -## 2. Running Conductor - -1. **Navigate to a project directory** (e.g., the repo you want to work on). -2. **Initialize Conductor:** - ```bash - /conductor:setup - ``` -3. **Observe the Indexing:** - During the setup, the agent will announce it is performing "semantic indexing". - * **Verify Fix:** Watch the output. It should now index **all text files** (not just Python) but **exclude** `node_modules` and other ignored directories (thanks to the `.gitignore` fix). - -## 3. Verifying AIC & Token Savings - -The core value of AIC is reducing context size by providing "Rich Skeletons" (summaries) of files instead of raw content. - -### How to See it in Action: - -1. **Start a Task:** - ```bash - /conductor:newTrack "Update the README to include installation instructions" - /conductor:implement - ``` - -2. **Watch the Agent's Tool Use:** - * As the agent explores the codebase, look for calls to `aic_get_file_context`. - * **The Saving:** Instead of calling `read_file` (which loads 100% of the text), it calls `aic_get_file_context`. - * **Example:** For a 500-line Python file, `aic_get_file_context` might only return ~50 lines of class/function signatures. This is a **~90% token reduction** for that file interaction. - -3. **Check Session Stats:** - You can view the total token usage for your current session: - ```bash - /stats model - ``` - *Compare this against a session where you manually `read_file` many large files to see the difference.* - -## 4. Updating - -If you push more changes to the repo, you can update the extension locally: - -```bash -gemini extensions update conductor -``` - -## 5. Troubleshooting - -* **"Indexed only 1 file":** If this happens, check your `.gitignore`. We now respect it strictly. -* **"Tool not found":** Ensure you uninstalled the old version and installed the new one. The tool names changed from `index_repo` to `aic_index`. From 9ef414a899ee0841f1b3bcc997d441be07e76b7b Mon Sep 17 00:00:00 2001 From: Ernan Anguiano Date: Wed, 14 Jan 2026 23:30:37 -0800 Subject: [PATCH 7/7] refactor: remove mcp dependency and implement zero-dep server --- aic/server.py | 207 ++++++++++++++++++++++++++++++++++++------ gemini-extension.json | 2 +- pyproject.toml | 4 +- 3 files changed, 179 insertions(+), 34 deletions(-) diff --git a/aic/server.py b/aic/server.py index 4f182b6..5801e66 100644 --- a/aic/server.py +++ b/aic/server.py @@ -1,22 +1,187 @@ import asyncio +import sys +import json +import logging +import inspect import os -from mcp.server.fastmcp import FastMCP +import traceback +from typing import Any, Callable, Dict, List, Optional + from aic.db import init_db, upsert_node, update_edges, mark_dirty from aic.skeleton import UniversalSkeletonizer from aic.utils import calculate_hash, resolve_dep_to_path, get_ignore_patterns, should_ignore from aic.cli import get_context -# Initialize FastMCP server -mcp = FastMCP("aic") +# Configure logging to stderr so it doesn't interfere with JSON-RPC on stdout +logging.basicConfig(stream=sys.stderr, level=logging.INFO) +logger = logging.getLogger("aic-server") + +class MCPServer: + def __init__(self, name: str): + self.name = name + self.tools: Dict[str, Callable] = {} + self.tool_schemas: List[Dict[str, Any]] = [] + + def tool(self): + """Decorator to register a function as a tool.""" + def decorator(func: Callable): + self.register_tool(func) + return func + return decorator + + def register_tool(self, func: Callable): + name = func.__name__ + doc = inspect.getdoc(func) or "" + sig = inspect.signature(func) + + properties = {} + required = [] + + for param_name, param in sig.parameters.items(): + param_type = "string" # Default to string for simplicity in this minimal implementation + if param.annotation == int: + param_type = "integer" + elif param.annotation == bool: + param_type = "boolean" + + properties[param_name] = { + "type": param_type, + "description": f"Parameter {param_name}" + } + # Simple heuristic: parameters without defaults are required + if param.default == inspect.Parameter.empty: + required.append(param_name) + + schema = { + "name": name, + "description": doc, + "inputSchema": { + "type": "object", + "properties": properties, + "required": required + } + } + + self.tools[name] = func + self.tool_schemas.append(schema) + logger.info(f"Registered tool: {name}") -@mcp.tool() + async def handle_request(self, request: Dict[str, Any]) -> Optional[Dict[str, Any]]: + method = request.get("method") + msg_id = request.get("id") + + if method == "tools/list": + return { + "jsonrpc": "2.0", + "id": msg_id, + "result": { + "tools": self.tool_schemas + } + } + + elif method == "tools/call": + params = request.get("params", {}) + tool_name = params.get("name") + tool_args = params.get("arguments", {}) + + if tool_name not in self.tools: + return { + "jsonrpc": "2.0", + "id": msg_id, + "error": { + "code": -32601, + "message": f"Tool not found: {tool_name}" + } + } + + try: + func = self.tools[tool_name] + # Check if async + if inspect.iscoroutinefunction(func): + result = await func(**tool_args) + else: + result = func(**tool_args) + + return { + "jsonrpc": "2.0", + "id": msg_id, + "result": { + "content": [ + { + "type": "text", + "text": str(result) + } + ] + } + } + except Exception as e: + logger.error(f"Error executing {tool_name}: {traceback.format_exc()}") + return { + "jsonrpc": "2.0", + "id": msg_id, + "error": { + "code": -32603, + "message": f"Internal error: {str(e)}" + } + } + + # Handle other MCP lifecycle methods strictly to avoid errors + elif method == "initialize": + return { + "jsonrpc": "2.0", + "id": msg_id, + "result": { + "protocolVersion": "0.1.0", + "capabilities": { + "tools": {} + }, + "serverInfo": { + "name": self.name, + "version": "0.1.0" + } + } + } + elif method == "notifications/initialized": + # No response needed for notifications + return None + + return None + + async def run(self): + logger.info(f"Starting {self.name} server on stdio...") + + # We need to read from stdin line by line (JSON-RPC) + loop = asyncio.get_event_loop() + reader = asyncio.StreamReader() + protocol = asyncio.StreamReaderProtocol(reader) + await loop.connect_read_pipe(lambda: protocol, sys.stdin) + + while True: + try: + line = await reader.readline() + if not line: + break + + message = json.loads(line) + response = await self.handle_request(message) + + if response: + sys.stdout.write(json.dumps(response) + "\n") + sys.stdout.flush() + + except json.JSONDecodeError: + logger.error("Failed to decode JSON from stdin") + except Exception as e: + logger.error(f"Unexpected error: {e}") + break + +server = MCPServer("aic") + +@server.tool() async def aic_index(root_dir: str) -> str: """ Indexes the repository to build a semantic dependency graph. Scans for Python files, generates skeletons, and updates the SQLite database. - - Args: - root_dir: The root directory of the repository to index. """ init_db() skeletonizer = UniversalSkeletonizer() @@ -38,23 +203,17 @@ async def aic_index(root_dir: str) -> str: rel_path = os.path.relpath(file_path, abs_root_dir) # Skip non-text files to avoid reading binaries - # Simple heuristic: check extension or try reading try: with open(file_path, 'r', encoding='utf-8', errors='strict') as f: content = f.read() except UnicodeDecodeError: - # print(f"Skipping binary file: {rel_path}") continue except Exception as e: print(f"Skipping {rel_path}: {e}") continue current_hash = calculate_hash(content) - # We skip optimization for now to ensure we always get latest state or implement get_node in db - # existing = get_node(rel_path) - # if existing and existing['hash'] == current_hash: - # continue - + skeleton, dependencies = skeletonizer.skeletonize(content, rel_path) upsert_node(rel_path, current_hash, skeleton) mark_dirty(rel_path) @@ -71,26 +230,20 @@ async def aic_index(root_dir: str) -> str: return f"Successfully indexed {indexed_count} files in {abs_root_dir}" -@mcp.tool() +@server.tool() async def aic_get_file_context(file_path: str) -> str: """ Retrieves the extensive context for a file, including its skeleton and its direct dependencies' skeletons. - - Args: - file_path: Relative path to the file to get context for. """ try: return get_context(file_path) except Exception as e: return f"Error retrieving context for {file_path}: {str(e)}" -@mcp.tool() +@server.tool() async def aic_list_directory(path: str) -> str: """ Lists the files and directories in the specified path. - - Args: - path: The directory path to list. """ try: abs_path = os.path.abspath(path) @@ -107,18 +260,12 @@ async def aic_list_directory(path: str) -> str: except Exception as e: return f"Error listing directory '{path}': {str(e)}" -@mcp.tool() +@server.tool() async def aic_run_shell_command(command: str, cwd: str) -> str: """ Executes a shell command. - - Args: - command: The command line string to execute. - cwd: Current working directory (use "." for current). """ try: - # Resolve "." to None if needed, or just let shell handle it if we pass it? - # asyncio.create_subprocess_shell handles cwd="." fine. process = await asyncio.create_subprocess_shell( command, stdout=asyncio.subprocess.PIPE, @@ -138,4 +285,4 @@ async def aic_run_shell_command(command: str, cwd: str) -> str: return f"Error executing command: {str(e)}" if __name__ == "__main__": - mcp.run() + asyncio.run(server.run()) \ No newline at end of file diff --git a/gemini-extension.json b/gemini-extension.json index 9441a19..0f7d93e 100644 --- a/gemini-extension.json +++ b/gemini-extension.json @@ -7,7 +7,7 @@ "command": "python3", "args": [ "-c", - "import sys; sys.path.append('${extensionPath}'); import aic.server; aic.server.mcp.run()" + "import sys; sys.path.append('${extensionPath}'); import aic.server; import asyncio; asyncio.run(aic.server.server.run())" ], "cwd": "${workspaceFolder}" } diff --git a/pyproject.toml b/pyproject.toml index 84afbfb..74613ca 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,9 +12,7 @@ license = "Apache-2.0" authors = [ { name = "Google DeepMind" }, ] -dependencies = [ - "mcp", -] +dependencies = [] [project.scripts] aic = "aic.cli:main"