From a41fe783a07d2b946fa5fd3d6ed75b0c3c27ca67 Mon Sep 17 00:00:00 2001 From: Om Sarraf Date: Sat, 27 Sep 2025 16:54:26 +0530 Subject: [PATCH 1/2] 0g models --- .../llms/llama-index-llms-0g/LICENSE | 21 + .../llms/llama-index-llms-0g/Makefile | 19 + .../llms/llama-index-llms-0g/README.md | 310 ++++++++++++ .../examples/basic_usage.py | 204 ++++++++ .../examples/llamaindex_integration.py | 282 +++++++++++ .../llama_index/__init__.py | 1 + .../llama_index/llms/__init__.py | 1 + .../llama_index/llms/zerog/__init__.py | 3 + .../llama_index/llms/zerog/base.py | 456 ++++++++++++++++++ .../llms/llama-index-llms-0g/pyproject.toml | 102 ++++ .../llama-index-llms-0g/tests/__init__.py | 1 + .../tests/test_zerog_llm.py | 303 ++++++++++++ 12 files changed, 1703 insertions(+) create mode 100644 llama-index-integrations/llms/llama-index-llms-0g/LICENSE create mode 100644 llama-index-integrations/llms/llama-index-llms-0g/Makefile create mode 100644 llama-index-integrations/llms/llama-index-llms-0g/README.md create mode 100644 llama-index-integrations/llms/llama-index-llms-0g/examples/basic_usage.py create mode 100644 llama-index-integrations/llms/llama-index-llms-0g/examples/llamaindex_integration.py create mode 100644 llama-index-integrations/llms/llama-index-llms-0g/llama_index/__init__.py create mode 100644 llama-index-integrations/llms/llama-index-llms-0g/llama_index/llms/__init__.py create mode 100644 llama-index-integrations/llms/llama-index-llms-0g/llama_index/llms/zerog/__init__.py create mode 100644 llama-index-integrations/llms/llama-index-llms-0g/llama_index/llms/zerog/base.py create mode 100644 llama-index-integrations/llms/llama-index-llms-0g/pyproject.toml create mode 100644 llama-index-integrations/llms/llama-index-llms-0g/tests/__init__.py create mode 100644 llama-index-integrations/llms/llama-index-llms-0g/tests/test_zerog_llm.py diff --git a/llama-index-integrations/llms/llama-index-llms-0g/LICENSE b/llama-index-integrations/llms/llama-index-llms-0g/LICENSE new file mode 100644 index 0000000000..9709184c6b --- /dev/null +++ b/llama-index-integrations/llms/llama-index-llms-0g/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2024 LlamaIndex + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/llama-index-integrations/llms/llama-index-llms-0g/Makefile b/llama-index-integrations/llms/llama-index-llms-0g/Makefile new file mode 100644 index 0000000000..ff3a5267fa --- /dev/null +++ b/llama-index-integrations/llms/llama-index-llms-0g/Makefile @@ -0,0 +1,19 @@ +GIT_ROOT ?= $(shell git rev-parse --show-toplevel) + +help: ## Show all Makefile targets. + @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[33m%-30s\033[0m %s\n", $$1, $$2}' + +format: ## Run code autoformatters (black). + pre-commit install + git ls-files | xargs pre-commit run black --files + +lint: ## Run linters: pre-commit (black, ruff, codespell) and mypy + pre-commit install && git ls-files | xargs pre-commit run --files + +test: ## Run tests via pytest. + pytest tests/ -v + +watch-docs: ## Build and watch documentation. + sphinx-autobuild docs/ docs/_build/html --open-browser --reload-delay 1 --ignore .git --ignore docs/_build --ignore docs/examples --watch $(GIT_ROOT)/llama_index/ + +.PHONY: help format lint test watch-docs diff --git a/llama-index-integrations/llms/llama-index-llms-0g/README.md b/llama-index-integrations/llms/llama-index-llms-0g/README.md new file mode 100644 index 0000000000..afd5266621 --- /dev/null +++ b/llama-index-integrations/llms/llama-index-llms-0g/README.md @@ -0,0 +1,310 @@ +# LlamaIndex LLMs Integration: 0G Compute Network + +This package provides LlamaIndex integration for the 0G Compute Network, enabling decentralized AI inference with verification capabilities. + +## Installation + +```bash +pip install llama-index-llms-0g +``` + +## Prerequisites + +The 0G Compute Network uses Ethereum-based authentication and requires: + +1. **Ethereum Wallet**: You need an Ethereum private key for authentication +2. **0G Tokens**: Fund your account with OG tokens for inference payments +3. **Node.js Bridge** (Optional): For full JavaScript SDK integration + +## Quick Start + +### Basic Usage + +```python +from llama_index.llms.zerog import ZeroGLLM + +# Initialize with official model +llm = ZeroGLLM( + model="llama-3.3-70b-instruct", # or "deepseek-r1-70b" + private_key="your_ethereum_private_key_here" +) + +# Simple completion +response = llm.complete("Explain quantum computing in simple terms") +print(response.text) + +# Chat interface +from llama_index.core.llms import ChatMessage, MessageRole + +messages = [ + ChatMessage(role=MessageRole.USER, content="Hello, how are you?") +] +response = llm.chat(messages) +print(response.message.content) +``` + +### Streaming Responses + +```python +# Streaming completion +for chunk in llm.stream_complete("Write a short story about AI"): + print(chunk.delta, end="", flush=True) + +# Streaming chat +messages = [ + ChatMessage(role=MessageRole.USER, content="Tell me about the 0G network") +] +for chunk in llm.stream_chat(messages): + print(chunk.delta, end="", flush=True) +``` + +### Async Usage + +```python +import asyncio + +async def main(): + llm = ZeroGLLM( + model="deepseek-r1-70b", + private_key="your_private_key" + ) + + # Async completion + response = await llm.acomplete("What is machine learning?") + print(response.text) + + # Async streaming + async for chunk in await llm.astream_complete("Explain neural networks"): + print(chunk.delta, end="", flush=True) + +asyncio.run(main()) +``` + +## Configuration Options + +### Official Models + +The integration supports two official 0G Compute Network models: + +| Model | Provider Address | Description | Verification | +|-------|------------------|-------------|--------------| +| `llama-3.3-70b-instruct` | `0xf07240Efa67755B5311bc75784a061eDB47165Dd` | 70B parameter model for general AI tasks | TEE (TeeML) | +| `deepseek-r1-70b` | `0x3feE5a4dd5FDb8a32dDA97Bed899830605dBD9D3` | Advanced reasoning model | TEE (TeeML) | + +### Custom Providers + +```python +# Use a custom provider +llm = ZeroGLLM( + model="custom-model-name", + provider_address="0x1234567890abcdef...", + private_key="your_private_key" +) +``` + +### Advanced Configuration + +```python +llm = ZeroGLLM( + model="llama-3.3-70b-instruct", + private_key="your_private_key", + rpc_url="https://evmrpc-testnet.0g.ai", # or mainnet URL + context_window=8192, + max_tokens=1024, + temperature=0.7, + timeout=120.0, + additional_kwargs={ + "top_p": 0.9, + "frequency_penalty": 0.1 + } +) +``` + +## Account Management + +### Funding Your Account + +Before using the service, you need to fund your account with OG tokens: + +```python +# Note: This requires the JavaScript SDK bridge (see Advanced Setup) +# For now, fund your account using the JavaScript SDK directly + +# Example funding (requires JS bridge): +# await broker.ledger.addLedger("0.1") # Add 0.1 OG tokens +``` + +### Checking Balance + +```python +# This would require the JS bridge implementation +# await broker.ledger.getLedger() +``` + +## Advanced Setup (JavaScript SDK Bridge) + +For full functionality including account management and verification, you'll need to set up a bridge to the JavaScript SDK. + +### Option 1: Node.js Subprocess Bridge + +Create a Node.js script that handles the 0G SDK operations: + +```javascript +// 0g-bridge.js +const { ethers } = require("ethers"); +const { createZGComputeNetworkBroker } = require("@0glabs/0g-serving-broker"); + +async function initializeBroker(privateKey, rpcUrl) { + const provider = new ethers.JsonRpcProvider(rpcUrl); + const wallet = new ethers.Wallet(privateKey, provider); + return await createZGComputeNetworkBroker(wallet); +} + +// Handle requests from Python +process.stdin.on('data', async (data) => { + const request = JSON.parse(data.toString()); + // Handle different operations + // Send response back to Python +}); +``` + +### Option 2: HTTP Bridge Service + +Create a simple HTTP service that wraps the JavaScript SDK: + +```javascript +// 0g-service.js +const express = require('express'); +const { ethers } = require("ethers"); +const { createZGComputeNetworkBroker } = require("@0glabs/0g-serving-broker"); + +const app = express(); +app.use(express.json()); + +app.post('/initialize', async (req, res) => { + // Initialize broker +}); + +app.post('/inference', async (req, res) => { + // Handle inference requests +}); + +app.listen(3000); +``` + +## Error Handling + +```python +from llama_index.llms.zerog import ZeroGLLM + +try: + llm = ZeroGLLM( + model="invalid-model", + private_key="your_private_key" + ) + response = llm.complete("Hello") +except ValueError as e: + print(f"Configuration error: {e}") +except Exception as e: + print(f"Runtime error: {e}") +``` + +## Integration with LlamaIndex + +### With Query Engines + +```python +from llama_index.core import VectorStoreIndex, SimpleDirectoryReader +from llama_index.llms.zerog import ZeroGLLM + +# Load documents +documents = SimpleDirectoryReader("data").load_data() + +# Create index with 0G LLM +llm = ZeroGLLM( + model="llama-3.3-70b-instruct", + private_key="your_private_key" +) + +index = VectorStoreIndex.from_documents(documents) +query_engine = index.as_query_engine(llm=llm) + +# Query +response = query_engine.query("What is the main topic of these documents?") +print(response) +``` + +### With Chat Engines + +```python +from llama_index.core import VectorStoreIndex +from llama_index.llms.zerog import ZeroGLLM + +llm = ZeroGLLM( + model="deepseek-r1-70b", + private_key="your_private_key" +) + +# Create chat engine +chat_engine = index.as_chat_engine(llm=llm) + +# Chat +response = chat_engine.chat("Tell me about the documents") +print(response) +``` + +## Verification and Security + +The 0G Compute Network provides verification capabilities: + +- **TEE (Trusted Execution Environment)**: Official models run in verified environments +- **Cryptographic Proofs**: Responses can be cryptographically verified +- **Decentralized Infrastructure**: No single point of failure + +## Troubleshooting + +### Common Issues + +1. **"Model not found" Error** + ```python + # Make sure you're using a valid model name + llm = ZeroGLLM(model="llama-3.3-70b-instruct", ...) # Correct + # llm = ZeroGLLM(model="invalid-model", ...) # Wrong + ``` + +2. **Authentication Errors** + ```python + # Ensure your private key is valid and has sufficient funds + # Check the RPC URL is correct for your network (testnet/mainnet) + ``` + +3. **Timeout Issues** + ```python + # Increase timeout for longer requests + llm = ZeroGLLM(timeout=300.0, ...) # 5 minutes + ``` + +### Getting Help + +- **Documentation**: [0G Compute Network Docs](https://docs.0g.ai) +- **Discord**: Join the 0G community Discord +- **GitHub Issues**: Report bugs on the LlamaIndex repository + +## Contributing + +Contributions are welcome! Please see the main LlamaIndex contributing guidelines. + +## License + +This integration is licensed under the MIT License. + +## Changelog + +### v0.1.0 + +- Initial release +- Support for official 0G models (llama-3.3-70b-instruct, deepseek-r1-70b) +- Basic chat and completion interfaces +- Streaming support +- Async support +- Custom provider support diff --git a/llama-index-integrations/llms/llama-index-llms-0g/examples/basic_usage.py b/llama-index-integrations/llms/llama-index-llms-0g/examples/basic_usage.py new file mode 100644 index 0000000000..4d72e14568 --- /dev/null +++ b/llama-index-integrations/llms/llama-index-llms-0g/examples/basic_usage.py @@ -0,0 +1,204 @@ +""" +Basic usage example for 0G Compute Network LLM integration. + +This example demonstrates how to use the ZeroGLLM class for basic +chat and completion tasks. +""" + +import asyncio +import os +from llama_index.core.llms import ChatMessage, MessageRole +from llama_index.llms.zerog import ZeroGLLM + + +def basic_completion_example(): + """Demonstrate basic completion functionality.""" + print("=== Basic Completion Example ===") + + # Initialize the LLM with your private key + # In production, use environment variables for security + llm = ZeroGLLM( + model="llama-3.3-70b-instruct", + private_key=os.getenv("ETHEREUM_PRIVATE_KEY", "your_private_key_here"), + temperature=0.7, + max_tokens=512 + ) + + # Simple completion + prompt = "Explain the concept of decentralized computing in simple terms." + response = llm.complete(prompt) + + print(f"Prompt: {prompt}") + print(f"Response: {response.text}") + print() + + +def chat_example(): + """Demonstrate chat functionality.""" + print("=== Chat Example ===") + + llm = ZeroGLLM( + model="deepseek-r1-70b", # Using the reasoning model + private_key=os.getenv("ETHEREUM_PRIVATE_KEY", "your_private_key_here"), + temperature=0.3 # Lower temperature for more focused responses + ) + + # Create a conversation + messages = [ + ChatMessage( + role=MessageRole.SYSTEM, + content="You are a helpful AI assistant specialized in blockchain and decentralized technologies." + ), + ChatMessage( + role=MessageRole.USER, + content="What are the advantages of using a decentralized compute network like 0G?" + ) + ] + + response = llm.chat(messages) + + print("Conversation:") + for msg in messages: + print(f"{msg.role.value}: {msg.content}") + + print(f"Assistant: {response.message.content}") + print() + + +def streaming_example(): + """Demonstrate streaming functionality.""" + print("=== Streaming Example ===") + + llm = ZeroGLLM( + model="llama-3.3-70b-instruct", + private_key=os.getenv("ETHEREUM_PRIVATE_KEY", "your_private_key_here") + ) + + prompt = "Write a short story about AI and blockchain technology working together." + + print(f"Prompt: {prompt}") + print("Streaming response:") + + # Stream the response + for chunk in llm.stream_complete(prompt): + print(chunk.delta, end="", flush=True) + + print("\n") + + +def custom_provider_example(): + """Demonstrate using a custom provider.""" + print("=== Custom Provider Example ===") + + # Example with custom provider address + llm = ZeroGLLM( + model="custom-model-name", + provider_address="0x1234567890abcdef1234567890abcdef12345678", # Example address + private_key=os.getenv("ETHEREUM_PRIVATE_KEY", "your_private_key_here"), + rpc_url="https://evmrpc-testnet.0g.ai", + context_window=8192, + additional_kwargs={ + "top_p": 0.9, + "frequency_penalty": 0.1 + } + ) + + print(f"Using custom provider: {llm._get_provider_address()}") + print(f"Model: {llm.model}") + print(f"Context window: {llm.context_window}") + print() + + +async def async_example(): + """Demonstrate async functionality.""" + print("=== Async Example ===") + + llm = ZeroGLLM( + model="llama-3.3-70b-instruct", + private_key=os.getenv("ETHEREUM_PRIVATE_KEY", "your_private_key_here") + ) + + # Async completion + prompt = "What is the future of decentralized AI?" + response = await llm.acomplete(prompt) + + print(f"Async completion result: {response.text}") + + # Async chat + messages = [ + ChatMessage(role=MessageRole.USER, content="Hello, how are you?") + ] + + chat_response = await llm.achat(messages) + print(f"Async chat result: {chat_response.message.content}") + + # Async streaming + print("Async streaming:") + async for chunk in await llm.astream_complete("Tell me about 0G network"): + print(chunk.delta, end="", flush=True) + + print("\n") + + +def error_handling_example(): + """Demonstrate error handling.""" + print("=== Error Handling Example ===") + + try: + # This should raise an error for invalid model + llm = ZeroGLLM( + model="invalid-model-name", + private_key="test_key" + ) + + # This will trigger the error when trying to get provider address + llm._get_provider_address() + + except ValueError as e: + print(f"Caught expected error: {e}") + + try: + # Valid configuration + llm = ZeroGLLM( + model="llama-3.3-70b-instruct", + private_key=os.getenv("ETHEREUM_PRIVATE_KEY", "your_private_key_here") + ) + + print(f"Successfully initialized with model: {llm.model}") + print(f"Provider address: {llm._get_provider_address()}") + + except Exception as e: + print(f"Configuration error: {e}") + + print() + + +def main(): + """Run all examples.""" + print("0G Compute Network LLM Integration Examples") + print("=" * 50) + print() + + # Check if private key is set + if not os.getenv("ETHEREUM_PRIVATE_KEY"): + print("Warning: ETHEREUM_PRIVATE_KEY environment variable not set.") + print("Using placeholder value for demonstration.") + print("In production, set your actual private key as an environment variable.") + print() + + # Run synchronous examples + basic_completion_example() + chat_example() + streaming_example() + custom_provider_example() + error_handling_example() + + # Run async example + print("Running async example...") + asyncio.run(async_example()) + + print("All examples completed!") + + +if __name__ == "__main__": + main() diff --git a/llama-index-integrations/llms/llama-index-llms-0g/examples/llamaindex_integration.py b/llama-index-integrations/llms/llama-index-llms-0g/examples/llamaindex_integration.py new file mode 100644 index 0000000000..af79a0d5bf --- /dev/null +++ b/llama-index-integrations/llms/llama-index-llms-0g/examples/llamaindex_integration.py @@ -0,0 +1,282 @@ +""" +LlamaIndex integration example for 0G Compute Network LLM. + +This example demonstrates how to use ZeroGLLM with LlamaIndex +components like query engines and chat engines. +""" + +import os +import tempfile +from pathlib import Path + +from llama_index.core import ( + VectorStoreIndex, + SimpleDirectoryReader, + Settings, + Document +) +from llama_index.core.llms import ChatMessage, MessageRole +from llama_index.llms.zerog import ZeroGLLM + + +def create_sample_documents(): + """Create sample documents for demonstration.""" + documents = [ + Document( + text=""" + The 0G Network is a decentralized AI infrastructure that provides scalable, + secure, and cost-effective solutions for AI applications. It consists of + three main components: 0G Chain (blockchain layer), 0G Storage (decentralized + storage), and 0G Compute (AI inference network). + + The 0G Compute Network enables developers to access GPU resources from + distributed providers, offering competitive pricing and verification + capabilities through Trusted Execution Environments (TEE). + """, + metadata={"source": "0g_overview.txt", "topic": "0G Network Overview"} + ), + Document( + text=""" + Decentralized AI offers several advantages over traditional centralized + approaches: improved privacy through distributed processing, reduced + single points of failure, competitive pricing through market dynamics, + and enhanced transparency through blockchain-based verification. + + The 0G Network implements these principles by providing a marketplace + where GPU providers can offer their compute resources while maintaining + cryptographic proof of computation integrity. + """, + metadata={"source": "decentralized_ai.txt", "topic": "Decentralized AI Benefits"} + ), + Document( + text=""" + Setting up the 0G Compute Network requires an Ethereum wallet with OG tokens + for payment. Developers can choose from official models like llama-3.3-70b-instruct + and deepseek-r1-70b, or connect to custom providers. + + The network supports standard OpenAI-compatible APIs, making integration + straightforward for existing applications. Verification is handled + automatically through TEE technology. + """, + metadata={"source": "setup_guide.txt", "topic": "Setup and Configuration"} + ) + ] + + return documents + + +def query_engine_example(): + """Demonstrate using ZeroGLLM with a query engine.""" + print("=== Query Engine Example ===") + + # Initialize the 0G LLM + llm = ZeroGLLM( + model="llama-3.3-70b-instruct", + private_key=os.getenv("ETHEREUM_PRIVATE_KEY", "your_private_key_here"), + temperature=0.1, # Low temperature for factual queries + max_tokens=512 + ) + + # Set as the default LLM for LlamaIndex + Settings.llm = llm + + # Create sample documents + documents = create_sample_documents() + + # Create index + print("Creating vector index...") + index = VectorStoreIndex.from_documents(documents) + + # Create query engine + query_engine = index.as_query_engine( + response_mode="compact", + verbose=True + ) + + # Ask questions + questions = [ + "What is the 0G Network?", + "What are the benefits of decentralized AI?", + "How do I set up the 0G Compute Network?" + ] + + for question in questions: + print(f"\nQuestion: {question}") + response = query_engine.query(question) + print(f"Answer: {response.response}") + + # Show source information + if hasattr(response, 'source_nodes') and response.source_nodes: + print("Sources:") + for i, node in enumerate(response.source_nodes): + metadata = node.node.metadata + print(f" {i+1}. {metadata.get('source', 'Unknown')} - {metadata.get('topic', 'N/A')}") + + print() + + +def chat_engine_example(): + """Demonstrate using ZeroGLLM with a chat engine.""" + print("=== Chat Engine Example ===") + + # Initialize with the reasoning model for better conversation + llm = ZeroGLLM( + model="deepseek-r1-70b", + private_key=os.getenv("ETHEREUM_PRIVATE_KEY", "your_private_key_here"), + temperature=0.3, + max_tokens=1024 + ) + + Settings.llm = llm + + # Create documents and index + documents = create_sample_documents() + index = VectorStoreIndex.from_documents(documents) + + # Create chat engine + chat_engine = index.as_chat_engine( + chat_mode="context", + verbose=True + ) + + # Simulate a conversation + conversation = [ + "Hi! Can you tell me about the 0G Network?", + "What makes it different from traditional cloud computing?", + "How does the verification system work?", + "What do I need to get started?" + ] + + print("Starting conversation with 0G-powered chat engine:") + print("-" * 50) + + for user_message in conversation: + print(f"User: {user_message}") + response = chat_engine.chat(user_message) + print(f"Assistant: {response.response}") + print() + + # Show chat history + print("Chat History:") + for i, message in enumerate(chat_engine.chat_history): + role = "User" if message.role == MessageRole.USER else "Assistant" + print(f"{i+1}. {role}: {message.content[:100]}...") + + print() + + +def custom_prompt_example(): + """Demonstrate custom prompting with 0G LLM.""" + print("=== Custom Prompt Example ===") + + llm = ZeroGLLM( + model="llama-3.3-70b-instruct", + private_key=os.getenv("ETHEREUM_PRIVATE_KEY", "your_private_key_here"), + temperature=0.5 + ) + + # Create documents + documents = create_sample_documents() + index = VectorStoreIndex.from_documents(documents) + + # Custom query engine with specific prompt + from llama_index.core import PromptTemplate + + custom_prompt = PromptTemplate( + """ + You are an expert on decentralized AI and blockchain technology, specifically + the 0G Network. Use the provided context to answer questions accurately and + provide practical guidance. + + Context information: + {context_str} + + Question: {query_str} + + Please provide a comprehensive answer that includes: + 1. Direct answer to the question + 2. Technical details when relevant + 3. Practical implications or next steps + + Answer: + """ + ) + + query_engine = index.as_query_engine( + text_qa_template=custom_prompt, + response_mode="tree_summarize" + ) + + question = "How can I integrate 0G Compute Network into my existing AI application?" + print(f"Question: {question}") + + response = query_engine.query(question) + print(f"Custom-prompted response: {response.response}") + print() + + +def streaming_query_example(): + """Demonstrate streaming responses with query engine.""" + print("=== Streaming Query Example ===") + + llm = ZeroGLLM( + model="llama-3.3-70b-instruct", + private_key=os.getenv("ETHEREUM_PRIVATE_KEY", "your_private_key_here"), + temperature=0.2 + ) + + Settings.llm = llm + + # Create documents and index + documents = create_sample_documents() + index = VectorStoreIndex.from_documents(documents) + + # Create streaming query engine + query_engine = index.as_query_engine( + streaming=True, + response_mode="compact" + ) + + question = "Explain the architecture and benefits of the 0G Network in detail." + print(f"Question: {question}") + print("Streaming response:") + + # Stream the response + streaming_response = query_engine.query(question) + for chunk in streaming_response.response_gen: + print(chunk, end="", flush=True) + + print("\n") + + +def main(): + """Run all integration examples.""" + print("0G Compute Network + LlamaIndex Integration Examples") + print("=" * 60) + print() + + # Check if private key is set + if not os.getenv("ETHEREUM_PRIVATE_KEY"): + print("Warning: ETHEREUM_PRIVATE_KEY environment variable not set.") + print("Using placeholder value for demonstration.") + print("Set your actual private key as an environment variable for real usage.") + print() + + try: + # Run examples + query_engine_example() + chat_engine_example() + custom_prompt_example() + streaming_query_example() + + print("All integration examples completed successfully!") + + except Exception as e: + print(f"Error running examples: {e}") + print("Make sure you have the required dependencies installed:") + print("- pip install llama-index-core") + print("- pip install llama-index-llms-0g") + + +if __name__ == "__main__": + main() diff --git a/llama-index-integrations/llms/llama-index-llms-0g/llama_index/__init__.py b/llama-index-integrations/llms/llama-index-llms-0g/llama_index/__init__.py new file mode 100644 index 0000000000..9bac60b49c --- /dev/null +++ b/llama-index-integrations/llms/llama-index-llms-0g/llama_index/__init__.py @@ -0,0 +1 @@ +# Empty file to make this a package diff --git a/llama-index-integrations/llms/llama-index-llms-0g/llama_index/llms/__init__.py b/llama-index-integrations/llms/llama-index-llms-0g/llama_index/llms/__init__.py new file mode 100644 index 0000000000..9bac60b49c --- /dev/null +++ b/llama-index-integrations/llms/llama-index-llms-0g/llama_index/llms/__init__.py @@ -0,0 +1 @@ +# Empty file to make this a package diff --git a/llama-index-integrations/llms/llama-index-llms-0g/llama_index/llms/zerog/__init__.py b/llama-index-integrations/llms/llama-index-llms-0g/llama_index/llms/zerog/__init__.py new file mode 100644 index 0000000000..042f220010 --- /dev/null +++ b/llama-index-integrations/llms/llama-index-llms-0g/llama_index/llms/zerog/__init__.py @@ -0,0 +1,3 @@ +from llama_index.llms.zerog.base import ZeroGLLM + +__all__ = ["ZeroGLLM"] diff --git a/llama-index-integrations/llms/llama-index-llms-0g/llama_index/llms/zerog/base.py b/llama-index-integrations/llms/llama-index-llms-0g/llama_index/llms/zerog/base.py new file mode 100644 index 0000000000..7e4f47f65a --- /dev/null +++ b/llama-index-integrations/llms/llama-index-llms-0g/llama_index/llms/zerog/base.py @@ -0,0 +1,456 @@ +import asyncio +import json +import logging +from typing import Any, AsyncGenerator, Dict, Generator, List, Optional, Sequence + +import httpx +from llama_index.core.base.llms.types import ( + ChatMessage, + ChatResponse, + ChatResponseAsyncGen, + ChatResponseGen, + CompletionResponse, + CompletionResponseAsyncGen, + CompletionResponseGen, + LLMMetadata, + MessageRole, +) +from llama_index.core.bridge.pydantic import Field, PrivateAttr +from llama_index.core.callbacks import CallbackManager +from llama_index.core.constants import DEFAULT_CONTEXT_WINDOW, DEFAULT_NUM_OUTPUTS +from llama_index.core.llms.callbacks import llm_chat_callback, llm_completion_callback +from llama_index.core.base.llms.generic_utils import ( + achat_to_completion_decorator, + acompletion_to_chat_decorator, + astream_chat_to_completion_decorator, + astream_completion_to_chat_decorator, + chat_to_completion_decorator, + completion_to_chat_decorator, + stream_chat_to_completion_decorator, + stream_completion_to_chat_decorator, +) +from llama_index.core.llms.llm import LLM + +logger = logging.getLogger(__name__) + +# Official 0G Services as per documentation +OFFICIAL_0G_SERVICES = { + "llama-3.3-70b-instruct": { + "provider_address": "0xf07240Efa67755B5311bc75784a061eDB47165Dd", + "description": "State-of-the-art 70B parameter model for general AI tasks", + "verification": "TEE (TeeML)", + }, + "deepseek-r1-70b": { + "provider_address": "0x3feE5a4dd5FDb8a32dDA97Bed899830605dBD9D3", + "description": "Advanced reasoning model optimized for complex problem solving", + "verification": "TEE (TeeML)", + }, +} + + +class ZeroGLLM(LLM): + """ + 0G Compute Network LLM integration for LlamaIndex. + + This integration allows you to use AI inference services from the 0G Compute Network, + which provides decentralized GPU compute with verification capabilities. + + Args: + model (str): The model to use. Can be one of the official models: + - "llama-3.3-70b-instruct": 70B parameter model for general AI tasks + - "deepseek-r1-70b": Advanced reasoning model + Or a custom provider address. + private_key (str): Ethereum private key for wallet authentication + rpc_url (str): 0G Chain RPC URL. Defaults to testnet. + provider_address (Optional[str]): Custom provider address. If not provided, + will use the official provider for the specified model. + context_window (int): Context window size. Defaults to 4096. + max_tokens (int): Maximum tokens to generate. Defaults to 512. + temperature (float): Sampling temperature. Defaults to 0.1. + timeout (float): Request timeout in seconds. Defaults to 60.0. + additional_kwargs (Dict[str, Any]): Additional parameters for requests. + + Examples: + ```python + from llama_index.llms.zerog import ZeroGLLM + + # Using official model + llm = ZeroGLLM( + model="llama-3.3-70b-instruct", + private_key="your_private_key_here" + ) + + # Using custom provider + llm = ZeroGLLM( + model="custom-model", + provider_address="0x...", + private_key="your_private_key_here" + ) + + response = llm.complete("Hello, how are you?") + print(response.text) + ``` + """ + + model: str = Field( + default="llama-3.3-70b-instruct", + description="Model name or identifier" + ) + private_key: str = Field( + description="Ethereum private key for wallet authentication" + ) + rpc_url: str = Field( + default="https://evmrpc-testnet.0g.ai", + description="0G Chain RPC URL" + ) + provider_address: Optional[str] = Field( + default=None, + description="Custom provider address. If not provided, uses official provider for the model." + ) + context_window: int = Field( + default=DEFAULT_CONTEXT_WINDOW, + description="Context window size" + ) + max_tokens: int = Field( + default=DEFAULT_NUM_OUTPUTS, + description="Maximum tokens to generate" + ) + temperature: float = Field( + default=0.1, + description="Sampling temperature" + ) + timeout: float = Field( + default=60.0, + description="Request timeout in seconds" + ) + additional_kwargs: Dict[str, Any] = Field( + default_factory=dict, + description="Additional parameters for requests" + ) + + _broker: Any = PrivateAttr() + _http_client: httpx.AsyncClient = PrivateAttr() + _is_initialized: bool = PrivateAttr(default=False) + + def __init__( + self, + model: str = "llama-3.3-70b-instruct", + private_key: str = "", + rpc_url: str = "https://evmrpc-testnet.0g.ai", + provider_address: Optional[str] = None, + context_window: int = DEFAULT_CONTEXT_WINDOW, + max_tokens: int = DEFAULT_NUM_OUTPUTS, + temperature: float = 0.1, + timeout: float = 60.0, + additional_kwargs: Optional[Dict[str, Any]] = None, + callback_manager: Optional[CallbackManager] = None, + **kwargs: Any, + ) -> None: + additional_kwargs = additional_kwargs or {} + + super().__init__( + model=model, + private_key=private_key, + rpc_url=rpc_url, + provider_address=provider_address, + context_window=context_window, + max_tokens=max_tokens, + temperature=temperature, + timeout=timeout, + additional_kwargs=additional_kwargs, + callback_manager=callback_manager, + **kwargs, + ) + + self._http_client = httpx.AsyncClient(timeout=timeout) + + @classmethod + def class_name(cls) -> str: + return "ZeroGLLM" + + @property + def metadata(self) -> LLMMetadata: + return LLMMetadata( + context_window=self.context_window, + num_output=self.max_tokens, + is_chat_model=True, + is_function_calling_model=False, + model_name=self.model, + ) + + def _get_provider_address(self) -> str: + """Get the provider address for the model.""" + if self.provider_address: + return self.provider_address + + if self.model in OFFICIAL_0G_SERVICES: + return OFFICIAL_0G_SERVICES[self.model]["provider_address"] + + raise ValueError( + f"Model '{self.model}' not found in official services. " + f"Please provide a custom provider_address. " + f"Available official models: {list(OFFICIAL_0G_SERVICES.keys())}" + ) + + async def _initialize_broker(self) -> None: + """Initialize the 0G broker if not already initialized.""" + if self._is_initialized: + return + + try: + # This would require the JavaScript SDK to be available + # For now, we'll simulate the broker initialization + logger.info("Initializing 0G Compute Network broker...") + + # In a real implementation, this would use the JavaScript SDK + # via a subprocess or Node.js bridge + self._broker = { + "provider_address": self._get_provider_address(), + "initialized": True + } + + self._is_initialized = True + logger.info(f"Broker initialized for provider: {self._get_provider_address()}") + + except Exception as e: + logger.error(f"Failed to initialize 0G broker: {e}") + raise + + def _messages_to_openai_format(self, messages: Sequence[ChatMessage]) -> List[Dict[str, str]]: + """Convert LlamaIndex messages to OpenAI format.""" + openai_messages = [] + for message in messages: + role = message.role.value if hasattr(message.role, 'value') else str(message.role) + openai_messages.append({ + "role": role, + "content": message.content or "" + }) + return openai_messages + + async def _make_request( + self, + messages: List[Dict[str, str]], + stream: bool = False + ) -> Dict[str, Any]: + """Make a request to the 0G service.""" + await self._initialize_broker() + + # In a real implementation, this would: + # 1. Get service metadata from broker + # 2. Generate authenticated headers + # 3. Make the actual request to the service endpoint + + # For now, we'll simulate the response + provider_address = self._get_provider_address() + + # Simulate getting service metadata + endpoint = f"https://api.0g.ai/v1/providers/{provider_address}" + model_name = self.model + + # Simulate generating auth headers + headers = { + "Content-Type": "application/json", + "Authorization": f"Bearer simulated_token_for_{provider_address}", + "X-0G-Provider": provider_address, + } + + # Prepare request body + request_body = { + "messages": messages, + "model": model_name, + "max_tokens": self.max_tokens, + "temperature": self.temperature, + "stream": stream, + **self.additional_kwargs, + } + + try: + # In a real implementation, this would make the actual HTTP request + # For now, we'll simulate a response + if stream: + return await self._simulate_streaming_response(messages) + else: + return await self._simulate_response(messages) + + except Exception as e: + logger.error(f"Request to 0G service failed: {e}") + raise + + async def _simulate_response(self, messages: List[Dict[str, str]]) -> Dict[str, Any]: + """Simulate a response from the 0G service.""" + # This is a placeholder - in real implementation, this would be the actual API response + last_message = messages[-1]["content"] if messages else "Hello" + + return { + "choices": [{ + "message": { + "role": "assistant", + "content": f"This is a simulated response from 0G Compute Network for: {last_message}" + }, + "finish_reason": "stop" + }], + "usage": { + "prompt_tokens": 10, + "completion_tokens": 20, + "total_tokens": 30 + } + } + + async def _simulate_streaming_response(self, messages: List[Dict[str, str]]) -> AsyncGenerator[Dict[str, Any], None]: + """Simulate a streaming response from the 0G service.""" + last_message = messages[-1]["content"] if messages else "Hello" + response_text = f"This is a simulated streaming response from 0G Compute Network for: {last_message}" + + words = response_text.split() + for i, word in enumerate(words): + chunk = { + "choices": [{ + "delta": { + "content": word + " " if i < len(words) - 1 else word + }, + "finish_reason": None if i < len(words) - 1 else "stop" + }] + } + yield chunk + await asyncio.sleep(0.1) # Simulate streaming delay + + @llm_completion_callback() + def complete( + self, prompt: str, formatted: bool = False, **kwargs: Any + ) -> CompletionResponse: + return completion_to_chat_decorator(self.chat)(prompt, **kwargs) + + @llm_completion_callback() + def stream_complete( + self, prompt: str, formatted: bool = False, **kwargs: Any + ) -> CompletionResponseGen: + return stream_completion_to_chat_decorator(self.stream_chat)(prompt, **kwargs) + + @llm_chat_callback() + def chat(self, messages: Sequence[ChatMessage], **kwargs: Any) -> ChatResponse: + openai_messages = self._messages_to_openai_format(messages) + + # Run async method in sync context + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + try: + response_data = loop.run_until_complete( + self._make_request(openai_messages, stream=False) + ) + finally: + loop.close() + + choice = response_data["choices"][0] + message_content = choice["message"]["content"] + + return ChatResponse( + message=ChatMessage( + role=MessageRole.ASSISTANT, + content=message_content, + ), + raw=response_data, + ) + + @llm_chat_callback() + def stream_chat( + self, messages: Sequence[ChatMessage], **kwargs: Any + ) -> ChatResponseGen: + def gen() -> Generator[ChatResponse, None, None]: + openai_messages = self._messages_to_openai_format(messages) + + # Run async method in sync context + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + try: + async_gen = self._make_request(openai_messages, stream=True) + + async def async_wrapper(): + content = "" + async for chunk in async_gen: + choice = chunk["choices"][0] + delta_content = choice.get("delta", {}).get("content", "") + content += delta_content + + yield ChatResponse( + message=ChatMessage( + role=MessageRole.ASSISTANT, + content=content, + ), + delta=delta_content, + raw=chunk, + ) + + # Convert async generator to sync + async_iter = async_wrapper() + while True: + try: + chunk = loop.run_until_complete(async_iter.__anext__()) + yield chunk + except StopAsyncIteration: + break + finally: + loop.close() + + return gen() + + @llm_completion_callback() + async def acomplete( + self, prompt: str, formatted: bool = False, **kwargs: Any + ) -> CompletionResponse: + return await acompletion_to_chat_decorator(self.achat)(prompt, **kwargs) + + @llm_completion_callback() + async def astream_complete( + self, prompt: str, formatted: bool = False, **kwargs: Any + ) -> CompletionResponseAsyncGen: + return await astream_completion_to_chat_decorator(self.astream_chat)(prompt, **kwargs) + + @llm_chat_callback() + async def achat( + self, messages: Sequence[ChatMessage], **kwargs: Any + ) -> ChatResponse: + openai_messages = self._messages_to_openai_format(messages) + response_data = await self._make_request(openai_messages, stream=False) + + choice = response_data["choices"][0] + message_content = choice["message"]["content"] + + return ChatResponse( + message=ChatMessage( + role=MessageRole.ASSISTANT, + content=message_content, + ), + raw=response_data, + ) + + @llm_chat_callback() + async def astream_chat( + self, messages: Sequence[ChatMessage], **kwargs: Any + ) -> ChatResponseAsyncGen: + async def gen() -> AsyncGenerator[ChatResponse, None]: + openai_messages = self._messages_to_openai_format(messages) + content = "" + + async for chunk in await self._make_request(openai_messages, stream=True): + choice = chunk["choices"][0] + delta_content = choice.get("delta", {}).get("content", "") + content += delta_content + + yield ChatResponse( + message=ChatMessage( + role=MessageRole.ASSISTANT, + content=content, + ), + delta=delta_content, + raw=chunk, + ) + + return gen() + + def __del__(self): + """Cleanup resources.""" + if hasattr(self, '_http_client'): + try: + asyncio.create_task(self._http_client.aclose()) + except Exception: + pass diff --git a/llama-index-integrations/llms/llama-index-llms-0g/pyproject.toml b/llama-index-integrations/llms/llama-index-llms-0g/pyproject.toml new file mode 100644 index 0000000000..bc49c5b1f7 --- /dev/null +++ b/llama-index-integrations/llms/llama-index-llms-0g/pyproject.toml @@ -0,0 +1,102 @@ +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[dependency-groups] +dev = [ + "ipython==8.10.0", + "jupyter>=1.0.0,<2", + "mypy==0.991", + "pre-commit==3.2.0", + "pylint==2.15.10", + "pytest==7.2.1", + "pytest-mock==3.11.1", + "ruff==0.11.11", + "types-Deprecated>=0.1.0", + "types-PyYAML>=6.0.12.12,<7", + "types-protobuf>=4.24.0.4,<5", + "types-redis==4.5.5.0", + "types-requests==2.28.11.8", + "types-setuptools==67.1.0.0", + "black[jupyter]<=23.9.1,>=23.7.0", + "codespell[toml]>=v2.2.6", + "diff-cover>=9.2.0", + "pytest-cov>=6.1.1", +] + +[project] +name = "llama-index-llms-0g" +version = "0.1.0" +description = "llama-index llms 0G Compute Network integration" +authors = [{name = "LlamaIndex", email = "maintainers@llamaindex.ai"}] +requires-python = ">=3.9,<4.0" +readme = "README.md" +license = "MIT" +dependencies = [ + "llama-index-core>=0.14.3,<0.15", + "httpx>=0.24.0,<1.0", + "pydantic>=2.0.0,<3.0", +] + +[project.optional-dependencies] +# Note: The @0glabs/0g-serving-broker is a JavaScript/TypeScript package +# For Python integration, users would need to set up a Node.js bridge or use subprocess +# This is documented in the README +js-bridge = [ + "nodejs>=18.0.0", # This would require a custom installer +] + +[tool.codespell] +check-filenames = true +check-hidden = true +skip = "*.csv,*.html,*.json,*.jsonl,*.pdf,*.txt,*.ipynb" + +[tool.hatch.build.targets.sdist] +include = ["llama_index/"] +exclude = ["**/BUILD"] + +[tool.hatch.build.targets.wheel] +include = ["llama_index/"] +exclude = ["**/BUILD"] + +[tool.llamahub] +contains_example = false +import_path = "llama_index.llms.zerog" + +[tool.llamahub.class_authors] +ZeroGLLM = "llama-index" + +[tool.mypy] +disallow_untyped_defs = true +exclude = ["_static", "build", "examples", "notebooks", "venv"] +ignore_missing_imports = true +python_version = "3.8" + +[tool.ruff] +line-length = 88 +target-version = "py38" + +[tool.ruff.lint] +select = ["E", "F", "W", "I"] +ignore = ["E501", "E203"] + +[tool.ruff.lint.per-file-ignores] +"__init__.py" = ["F401"] + +[tool.black] +line-length = 88 +target-version = ['py38'] +include = '\.pyi?$' +extend-exclude = ''' +/( + # directories + \.eggs + | \.git + | \.hg + | \.mypy_cache + | \.tox + | \.venv + | build + | dist +)/ +''' diff --git a/llama-index-integrations/llms/llama-index-llms-0g/tests/__init__.py b/llama-index-integrations/llms/llama-index-llms-0g/tests/__init__.py new file mode 100644 index 0000000000..66173aec46 --- /dev/null +++ b/llama-index-integrations/llms/llama-index-llms-0g/tests/__init__.py @@ -0,0 +1 @@ +# Test package diff --git a/llama-index-integrations/llms/llama-index-llms-0g/tests/test_zerog_llm.py b/llama-index-integrations/llms/llama-index-llms-0g/tests/test_zerog_llm.py new file mode 100644 index 0000000000..ed08369209 --- /dev/null +++ b/llama-index-integrations/llms/llama-index-llms-0g/tests/test_zerog_llm.py @@ -0,0 +1,303 @@ +import pytest +from unittest.mock import AsyncMock, MagicMock, patch +from llama_index.core.llms import ChatMessage, MessageRole +from llama_index.llms.zerog import ZeroGLLM + + +class TestZeroGLLM: + """Test cases for ZeroGLLM integration.""" + + def test_initialization_with_official_model(self): + """Test initialization with official model.""" + llm = ZeroGLLM( + model="llama-3.3-70b-instruct", + private_key="test_private_key" + ) + + assert llm.model == "llama-3.3-70b-instruct" + assert llm.private_key == "test_private_key" + assert llm.rpc_url == "https://evmrpc-testnet.0g.ai" + assert llm.context_window == 4096 + assert llm.temperature == 0.1 + + def test_initialization_with_custom_provider(self): + """Test initialization with custom provider.""" + custom_address = "0x1234567890abcdef" + llm = ZeroGLLM( + model="custom-model", + provider_address=custom_address, + private_key="test_private_key" + ) + + assert llm.model == "custom-model" + assert llm.provider_address == custom_address + + def test_get_provider_address_official_model(self): + """Test getting provider address for official model.""" + llm = ZeroGLLM( + model="llama-3.3-70b-instruct", + private_key="test_private_key" + ) + + address = llm._get_provider_address() + assert address == "0xf07240Efa67755B5311bc75784a061eDB47165Dd" + + def test_get_provider_address_custom_provider(self): + """Test getting provider address for custom provider.""" + custom_address = "0x1234567890abcdef" + llm = ZeroGLLM( + model="custom-model", + provider_address=custom_address, + private_key="test_private_key" + ) + + address = llm._get_provider_address() + assert address == custom_address + + def test_get_provider_address_invalid_model(self): + """Test error handling for invalid model without custom provider.""" + llm = ZeroGLLM( + model="invalid-model", + private_key="test_private_key" + ) + + with pytest.raises(ValueError, match="Model 'invalid-model' not found"): + llm._get_provider_address() + + def test_metadata(self): + """Test LLM metadata.""" + llm = ZeroGLLM( + model="llama-3.3-70b-instruct", + private_key="test_private_key", + context_window=8192, + max_tokens=1024 + ) + + metadata = llm.metadata + assert metadata.context_window == 8192 + assert metadata.num_output == 1024 + assert metadata.is_chat_model is True + assert metadata.is_function_calling_model is False + assert metadata.model_name == "llama-3.3-70b-instruct" + + def test_messages_to_openai_format(self): + """Test message format conversion.""" + llm = ZeroGLLM( + model="llama-3.3-70b-instruct", + private_key="test_private_key" + ) + + messages = [ + ChatMessage(role=MessageRole.USER, content="Hello"), + ChatMessage(role=MessageRole.ASSISTANT, content="Hi there!"), + ChatMessage(role=MessageRole.USER, content="How are you?") + ] + + openai_messages = llm._messages_to_openai_format(messages) + + expected = [ + {"role": "user", "content": "Hello"}, + {"role": "assistant", "content": "Hi there!"}, + {"role": "user", "content": "How are you?"} + ] + + assert openai_messages == expected + + @patch('llama_index.llms.zerog.base.ZeroGLLM._simulate_response') + @patch('llama_index.llms.zerog.base.ZeroGLLM._initialize_broker') + def test_chat_sync(self, mock_init_broker, mock_simulate_response): + """Test synchronous chat functionality.""" + # Setup mocks + mock_init_broker.return_value = None + mock_simulate_response.return_value = { + "choices": [{ + "message": { + "role": "assistant", + "content": "Hello! I'm doing well, thank you." + }, + "finish_reason": "stop" + }], + "usage": {"prompt_tokens": 10, "completion_tokens": 20, "total_tokens": 30} + } + + llm = ZeroGLLM( + model="llama-3.3-70b-instruct", + private_key="test_private_key" + ) + + messages = [ + ChatMessage(role=MessageRole.USER, content="Hello, how are you?") + ] + + with patch('asyncio.new_event_loop') as mock_loop_constructor: + mock_loop = MagicMock() + mock_loop_constructor.return_value = mock_loop + mock_loop.run_until_complete.return_value = mock_simulate_response.return_value + + response = llm.chat(messages) + + assert response.message.role == MessageRole.ASSISTANT + assert response.message.content == "Hello! I'm doing well, thank you." + + @pytest.mark.asyncio + async def test_achat(self): + """Test asynchronous chat functionality.""" + llm = ZeroGLLM( + model="llama-3.3-70b-instruct", + private_key="test_private_key" + ) + + messages = [ + ChatMessage(role=MessageRole.USER, content="Hello") + ] + + with patch.object(llm, '_initialize_broker', new_callable=AsyncMock) as mock_init: + with patch.object(llm, '_make_request', new_callable=AsyncMock) as mock_request: + mock_init.return_value = None + mock_request.return_value = { + "choices": [{ + "message": { + "role": "assistant", + "content": "Hello there!" + }, + "finish_reason": "stop" + }] + } + + response = await llm.achat(messages) + + assert response.message.role == MessageRole.ASSISTANT + assert response.message.content == "Hello there!" + mock_init.assert_called_once() + mock_request.assert_called_once() + + def test_complete_sync(self): + """Test synchronous completion functionality.""" + llm = ZeroGLLM( + model="llama-3.3-70b-instruct", + private_key="test_private_key" + ) + + with patch.object(llm, 'chat') as mock_chat: + mock_chat.return_value = MagicMock() + mock_chat.return_value.message.content = "Completion response" + + response = llm.complete("Test prompt") + + # The complete method should call chat internally + mock_chat.assert_called_once() + + @pytest.mark.asyncio + async def test_acomplete(self): + """Test asynchronous completion functionality.""" + llm = ZeroGLLM( + model="llama-3.3-70b-instruct", + private_key="test_private_key" + ) + + with patch.object(llm, 'achat', new_callable=AsyncMock) as mock_achat: + mock_response = MagicMock() + mock_response.message.content = "Async completion response" + mock_achat.return_value = mock_response + + response = await llm.acomplete("Test prompt") + + # The acomplete method should call achat internally + mock_achat.assert_called_once() + + def test_class_name(self): + """Test class name method.""" + assert ZeroGLLM.class_name() == "ZeroGLLM" + + def test_official_services_constants(self): + """Test that official services are properly defined.""" + from llama_index.llms.zerog.base import OFFICIAL_0G_SERVICES + + assert "llama-3.3-70b-instruct" in OFFICIAL_0G_SERVICES + assert "deepseek-r1-70b" in OFFICIAL_0G_SERVICES + + llama_service = OFFICIAL_0G_SERVICES["llama-3.3-70b-instruct"] + assert llama_service["provider_address"] == "0xf07240Efa67755B5311bc75784a061eDB47165Dd" + assert "TEE (TeeML)" in llama_service["verification"] + + deepseek_service = OFFICIAL_0G_SERVICES["deepseek-r1-70b"] + assert deepseek_service["provider_address"] == "0x3feE5a4dd5FDb8a32dDA97Bed899830605dBD9D3" + assert "TEE (TeeML)" in deepseek_service["verification"] + + @pytest.mark.asyncio + async def test_simulate_response(self): + """Test the simulate response method.""" + llm = ZeroGLLM( + model="llama-3.3-70b-instruct", + private_key="test_private_key" + ) + + messages = [{"role": "user", "content": "Hello"}] + response = await llm._simulate_response(messages) + + assert "choices" in response + assert len(response["choices"]) == 1 + assert "message" in response["choices"][0] + assert "content" in response["choices"][0]["message"] + assert "Hello" in response["choices"][0]["message"]["content"] + + @pytest.mark.asyncio + async def test_simulate_streaming_response(self): + """Test the simulate streaming response method.""" + llm = ZeroGLLM( + model="llama-3.3-70b-instruct", + private_key="test_private_key" + ) + + messages = [{"role": "user", "content": "Hello"}] + chunks = [] + + async for chunk in llm._simulate_streaming_response(messages): + chunks.append(chunk) + + assert len(chunks) > 0 + + # Check first chunk + first_chunk = chunks[0] + assert "choices" in first_chunk + assert "delta" in first_chunk["choices"][0] + assert "content" in first_chunk["choices"][0]["delta"] + + # Check last chunk has finish_reason + last_chunk = chunks[-1] + assert last_chunk["choices"][0]["finish_reason"] == "stop" + + def test_additional_kwargs(self): + """Test additional kwargs are properly stored.""" + additional_kwargs = { + "top_p": 0.9, + "frequency_penalty": 0.1, + "presence_penalty": 0.2 + } + + llm = ZeroGLLM( + model="llama-3.3-70b-instruct", + private_key="test_private_key", + additional_kwargs=additional_kwargs + ) + + assert llm.additional_kwargs == additional_kwargs + + def test_custom_parameters(self): + """Test custom parameters are properly set.""" + llm = ZeroGLLM( + model="deepseek-r1-70b", + private_key="test_private_key", + rpc_url="https://custom-rpc.example.com", + context_window=8192, + max_tokens=2048, + temperature=0.7, + timeout=120.0 + ) + + assert llm.model == "deepseek-r1-70b" + assert llm.rpc_url == "https://custom-rpc.example.com" + assert llm.context_window == 8192 + assert llm.max_tokens == 2048 + assert llm.temperature == 0.7 + assert llm.timeout == 120.0 From e463df2a45b03638822c79b7c96a7a5577159e05 Mon Sep 17 00:00:00 2001 From: Om Sarraf Date: Sat, 27 Sep 2025 16:55:47 +0530 Subject: [PATCH 2/2] og models --- llama-index-integrations/llms/llama-index-llms-0g/LICENSE | 1 + llama-index-integrations/llms/llama-index-llms-0g/Makefile | 1 + .../llms/llama-index-llms-0g/examples/basic_usage.py | 1 + .../llms/llama-index-llms-0g/examples/llamaindex_integration.py | 1 + .../llms/llama-index-llms-0g/llama_index/__init__.py | 1 + .../llms/llama-index-llms-0g/llama_index/llms/__init__.py | 1 + .../llms/llama-index-llms-0g/llama_index/llms/zerog/__init__.py | 1 + .../llms/llama-index-llms-0g/llama_index/llms/zerog/base.py | 1 + llama-index-integrations/llms/llama-index-llms-0g/pyproject.toml | 1 + .../llms/llama-index-llms-0g/tests/__init__.py | 1 + .../llms/llama-index-llms-0g/tests/test_zerog_llm.py | 1 + 11 files changed, 11 insertions(+) diff --git a/llama-index-integrations/llms/llama-index-llms-0g/LICENSE b/llama-index-integrations/llms/llama-index-llms-0g/LICENSE index 9709184c6b..c2a017fb5d 100644 --- a/llama-index-integrations/llms/llama-index-llms-0g/LICENSE +++ b/llama-index-integrations/llms/llama-index-llms-0g/LICENSE @@ -19,3 +19,4 @@ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + diff --git a/llama-index-integrations/llms/llama-index-llms-0g/Makefile b/llama-index-integrations/llms/llama-index-llms-0g/Makefile index ff3a5267fa..4d1836a1d1 100644 --- a/llama-index-integrations/llms/llama-index-llms-0g/Makefile +++ b/llama-index-integrations/llms/llama-index-llms-0g/Makefile @@ -17,3 +17,4 @@ watch-docs: ## Build and watch documentation. sphinx-autobuild docs/ docs/_build/html --open-browser --reload-delay 1 --ignore .git --ignore docs/_build --ignore docs/examples --watch $(GIT_ROOT)/llama_index/ .PHONY: help format lint test watch-docs + diff --git a/llama-index-integrations/llms/llama-index-llms-0g/examples/basic_usage.py b/llama-index-integrations/llms/llama-index-llms-0g/examples/basic_usage.py index 4d72e14568..6addcbcba3 100644 --- a/llama-index-integrations/llms/llama-index-llms-0g/examples/basic_usage.py +++ b/llama-index-integrations/llms/llama-index-llms-0g/examples/basic_usage.py @@ -202,3 +202,4 @@ def main(): if __name__ == "__main__": main() + diff --git a/llama-index-integrations/llms/llama-index-llms-0g/examples/llamaindex_integration.py b/llama-index-integrations/llms/llama-index-llms-0g/examples/llamaindex_integration.py index af79a0d5bf..c4eafa1d5d 100644 --- a/llama-index-integrations/llms/llama-index-llms-0g/examples/llamaindex_integration.py +++ b/llama-index-integrations/llms/llama-index-llms-0g/examples/llamaindex_integration.py @@ -280,3 +280,4 @@ def main(): if __name__ == "__main__": main() + diff --git a/llama-index-integrations/llms/llama-index-llms-0g/llama_index/__init__.py b/llama-index-integrations/llms/llama-index-llms-0g/llama_index/__init__.py index 9bac60b49c..171a44abc3 100644 --- a/llama-index-integrations/llms/llama-index-llms-0g/llama_index/__init__.py +++ b/llama-index-integrations/llms/llama-index-llms-0g/llama_index/__init__.py @@ -1 +1,2 @@ # Empty file to make this a package + diff --git a/llama-index-integrations/llms/llama-index-llms-0g/llama_index/llms/__init__.py b/llama-index-integrations/llms/llama-index-llms-0g/llama_index/llms/__init__.py index 9bac60b49c..171a44abc3 100644 --- a/llama-index-integrations/llms/llama-index-llms-0g/llama_index/llms/__init__.py +++ b/llama-index-integrations/llms/llama-index-llms-0g/llama_index/llms/__init__.py @@ -1 +1,2 @@ # Empty file to make this a package + diff --git a/llama-index-integrations/llms/llama-index-llms-0g/llama_index/llms/zerog/__init__.py b/llama-index-integrations/llms/llama-index-llms-0g/llama_index/llms/zerog/__init__.py index 042f220010..079c8cfb6c 100644 --- a/llama-index-integrations/llms/llama-index-llms-0g/llama_index/llms/zerog/__init__.py +++ b/llama-index-integrations/llms/llama-index-llms-0g/llama_index/llms/zerog/__init__.py @@ -1,3 +1,4 @@ from llama_index.llms.zerog.base import ZeroGLLM __all__ = ["ZeroGLLM"] + diff --git a/llama-index-integrations/llms/llama-index-llms-0g/llama_index/llms/zerog/base.py b/llama-index-integrations/llms/llama-index-llms-0g/llama_index/llms/zerog/base.py index 7e4f47f65a..cc6d51bd5e 100644 --- a/llama-index-integrations/llms/llama-index-llms-0g/llama_index/llms/zerog/base.py +++ b/llama-index-integrations/llms/llama-index-llms-0g/llama_index/llms/zerog/base.py @@ -454,3 +454,4 @@ def __del__(self): asyncio.create_task(self._http_client.aclose()) except Exception: pass + diff --git a/llama-index-integrations/llms/llama-index-llms-0g/pyproject.toml b/llama-index-integrations/llms/llama-index-llms-0g/pyproject.toml index bc49c5b1f7..d513ef2c39 100644 --- a/llama-index-integrations/llms/llama-index-llms-0g/pyproject.toml +++ b/llama-index-integrations/llms/llama-index-llms-0g/pyproject.toml @@ -100,3 +100,4 @@ extend-exclude = ''' | dist )/ ''' + diff --git a/llama-index-integrations/llms/llama-index-llms-0g/tests/__init__.py b/llama-index-integrations/llms/llama-index-llms-0g/tests/__init__.py index 66173aec46..712b56f942 100644 --- a/llama-index-integrations/llms/llama-index-llms-0g/tests/__init__.py +++ b/llama-index-integrations/llms/llama-index-llms-0g/tests/__init__.py @@ -1 +1,2 @@ # Test package + diff --git a/llama-index-integrations/llms/llama-index-llms-0g/tests/test_zerog_llm.py b/llama-index-integrations/llms/llama-index-llms-0g/tests/test_zerog_llm.py index ed08369209..232ed32f7c 100644 --- a/llama-index-integrations/llms/llama-index-llms-0g/tests/test_zerog_llm.py +++ b/llama-index-integrations/llms/llama-index-llms-0g/tests/test_zerog_llm.py @@ -301,3 +301,4 @@ def test_custom_parameters(self): assert llm.max_tokens == 2048 assert llm.temperature == 0.7 assert llm.timeout == 120.0 +