import requests
import json
from typing import Dict, Any, Optional
from dataclasses import dataclass
from enum import Enum
class LLMModel(Enum):
"""Available LLM models with their characteristics."""
GPT_4O = "gpt-4o"
GPT_4O_MINI = "gpt-4o-mini"
GPT_4_1 = "gpt-4.1"
GPT_4_1_MINI = "gpt-4.1-mini"
GPT_4_1_NANO = "gpt-4.1-nano"
GPT_3_5_TURBO = "gpt-3.5-turbo-0125"
MIXTRAL_8X7B = "mixtral-8x7b-32768"
LLAMA_3_1_8B = "llama-3.1-8b-instant"
class PerformanceTier(Enum):
"""Performance tiers for different use cases."""
PREMIUM = "premium"
BALANCED = "balanced"
EFFICIENT = "efficient"
HIGH_SPEED = "high_speed"
@dataclass
class LLMConfiguration:
"""LLM node configuration with optimization metadata."""
model: str
prompt_id: str
temperature: float
performance_tier: PerformanceTier
context_window: int
use_cases: list[str]
expected_latency: str
class LLMConfigurationManager:
def __init__(self, flow_name: str, api_token: str):
self.flow_name = flow_name
self.api_token = api_token
self.base_url = f"https://{flow_name}.flows.graphorlm.com"
# Predefined optimization configurations
self.optimization_configs = {
"maximum_accuracy": LLMConfiguration(
model=LLMModel.GPT_4O.value,
prompt_id="default_retrieval_prompt",
temperature=0.0,
performance_tier=PerformanceTier.PREMIUM,
context_window=128000,
use_cases=["Technical Q&A", "Factual responses", "Documentation"],
expected_latency="2-4 seconds"
),
"balanced_performance": LLMConfiguration(
model=LLMModel.GPT_4O_MINI.value,
prompt_id="default_retrieval_prompt",
temperature=0.2,
performance_tier=PerformanceTier.BALANCED,
context_window=128000,
use_cases=["General Q&A", "Customer support", "Mixed content"],
expected_latency="1-2 seconds"
),
"high_throughput": LLMConfiguration(
model=LLMModel.MIXTRAL_8X7B.value,
prompt_id="default_retrieval_prompt",
temperature=0.1,
performance_tier=PerformanceTier.HIGH_SPEED,
context_window=32768,
use_cases=["Real-time chat", "High-volume processing", "Quick responses"],
expected_latency="0.5-1 second"
),
"creative_generation": LLMConfiguration(
model=LLMModel.GPT_4_1.value,
prompt_id="creative_content_generator",
temperature=0.8,
performance_tier=PerformanceTier.PREMIUM,
context_window=128000,
use_cases=["Content creation", "Brainstorming", "Varied responses"],
expected_latency="2-5 seconds"
),
"resource_efficient": LLMConfiguration(
model=LLMModel.GPT_4_1_NANO.value,
prompt_id="default_retrieval_prompt",
temperature=0.1,
performance_tier=PerformanceTier.EFFICIENT,
context_window=128000,
use_cases=["Budget-conscious", "Simple Q&A", "Basic responses"],
expected_latency="0.8-1.5 seconds"
)
}
def update_llm_node(
self,
node_id: str,
config: Dict[str, Any]
) -> Dict[str, Any]:
"""Update LLM node configuration."""
url = f"{self.base_url}/llm/{node_id}"
headers = {
"Authorization": f"Bearer {self.api_token}",
"Content-Type": "application/json"
}
payload = {"config": config}
response = requests.patch(url, headers=headers, json=payload)
response.raise_for_status()
return response.json()
def apply_optimization_strategy(
self,
node_id: str,
strategy: str
) -> Dict[str, Any]:
"""Apply a predefined optimization strategy."""
if strategy not in self.optimization_configs:
available = ", ".join(self.optimization_configs.keys())
raise ValueError(f"Unknown strategy: {strategy}. Available: {available}")
config = self.optimization_configs[strategy]
update_config = {
"model": config.model,
"promptId": config.prompt_id,
"temperature": config.temperature
}
result = self.update_llm_node(node_id, update_config)
# Add optimization metadata to result
result["optimization_applied"] = {
"strategy": strategy,
"performance_tier": config.performance_tier.value,
"context_window": config.context_window,
"use_cases": config.use_cases,
"expected_latency": config.expected_latency
}
return result
def analyze_current_configuration(self, node_id: str) -> Dict[str, Any]:
"""Analyze current configuration and suggest improvements."""
# This would typically get current config first
# For demo purposes, we'll provide analysis framework
analysis = {
"current_assessment": {
"performance_tier": "unknown",
"estimated_processing_time": "unknown",
"context_capacity": "unknown",
"creativity_level": "unknown"
},
"recommendations": [],
"alternative_configs": []
}
# Add recommendations based on common patterns
analysis["recommendations"] = [
"Consider temperature 0.0-0.1 for factual Q&A",
"Use temperature 0.2-0.4 for conversational responses",
"Choose temperature 0.5+ for creative content",
"Select high-context models for long documents",
"Use fast models for real-time applications"
]
analysis["alternative_configs"] = [
{
"name": "Accuracy Focused",
"config": self.optimization_configs["maximum_accuracy"],
"trade_offs": "Higher latency, maximum precision"
},
{
"name": "Speed Optimized",
"config": self.optimization_configs["high_throughput"],
"trade_offs": "Lower latency, good quality"
},
{
"name": "Balanced Approach",
"config": self.optimization_configs["balanced_performance"],
"trade_offs": "Moderate latency, versatile quality"
}
]
return analysis
def batch_update_multiple_nodes(
self,
updates: Dict[str, Dict[str, Any]]
) -> Dict[str, Any]:
"""Update multiple LLM nodes with different configurations."""
results = {"successful_updates": [], "failed_updates": []}
for node_id, config in updates.items():
try:
result = self.update_llm_node(node_id, config)
results["successful_updates"].append({
"node_id": node_id,
"result": result,
"config_applied": config
})
except Exception as e:
results["failed_updates"].append({
"node_id": node_id,
"error": str(e),
"attempted_config": config
})
return results
def generate_performance_report(self, strategy: str) -> Dict[str, Any]:
"""Generate a detailed performance report for a configuration strategy."""
if strategy not in self.optimization_configs:
raise ValueError(f"Unknown strategy: {strategy}")
config = self.optimization_configs[strategy]
return {
"strategy_name": strategy,
"configuration": {
"model": config.model,
"prompt_id": config.prompt_id,
"temperature": config.temperature
},
"performance_characteristics": {
"tier": config.performance_tier.value,
"context_window": f"{config.context_window:,} tokens",
"expected_latency": config.expected_latency,
"use_cases": config.use_cases
},
"resource_utilization": {
"computational_intensity": self._get_computational_intensity(config.model),
"memory_requirements": self._get_memory_requirements(config.model),
"throughput_capacity": self._get_throughput_capacity(config.model)
},
"optimization_recommendations": self._get_optimization_recommendations(config)
}
def _get_computational_intensity(self, model: str) -> str:
"""Get computational intensity for a model."""
intensity_map = {
LLMModel.GPT_4O.value: "High",
LLMModel.GPT_4_1.value: "High",
LLMModel.GPT_4O_MINI.value: "Medium",
LLMModel.GPT_4_1_MINI.value: "Medium",
LLMModel.GPT_4_1_NANO.value: "Low",
LLMModel.GPT_3_5_TURBO.value: "Low",
LLMModel.MIXTRAL_8X7B.value: "Medium-High",
LLMModel.LLAMA_3_1_8B.value: "Medium"
}
return intensity_map.get(model, "Unknown")
def _get_memory_requirements(self, model: str) -> str:
"""Get memory requirements for a model."""
memory_map = {
LLMModel.GPT_4O.value: "High (128K context)",
LLMModel.GPT_4_1.value: "High (128K context)",
LLMModel.GPT_4O_MINI.value: "High (128K context)",
LLMModel.GPT_4_1_MINI.value: "High (128K context)",
LLMModel.GPT_4_1_NANO.value: "High (128K context)",
LLMModel.GPT_3_5_TURBO.value: "Medium (16K context)",
LLMModel.MIXTRAL_8X7B.value: "Medium (32K context)",
LLMModel.LLAMA_3_1_8B.value: "Low (8K context)"
}
return memory_map.get(model, "Unknown")
def _get_throughput_capacity(self, model: str) -> str:
"""Get throughput capacity for a model."""
throughput_map = {
LLMModel.GPT_4O.value: "Medium",
LLMModel.GPT_4_1.value: "Medium",
LLMModel.GPT_4O_MINI.value: "High",
LLMModel.GPT_4_1_MINI.value: "High",
LLMModel.GPT_4_1_NANO.value: "Very High",
LLMModel.GPT_3_5_TURBO.value: "Very High",
LLMModel.MIXTRAL_8X7B.value: "Very High",
LLMModel.LLAMA_3_1_8B.value: "Ultra High"
}
return throughput_map.get(model, "Unknown")
def _get_optimization_recommendations(self, config: LLMConfiguration) -> list[str]:
"""Get optimization recommendations for a configuration."""
recommendations = []
if config.temperature == 0.0:
recommendations.append("Perfect for factual Q&A and consistent responses")
elif config.temperature <= 0.3:
recommendations.append("Good balance of consistency and slight variation")
elif config.temperature <= 0.7:
recommendations.append("Suitable for conversational and explanatory responses")
else:
recommendations.append("Ideal for creative content and diverse outputs")
if config.context_window >= 100000:
recommendations.append("Excellent for processing long documents")
elif config.context_window >= 30000:
recommendations.append("Good for medium-length content processing")
else:
recommendations.append("Best for short to medium content processing")
return recommendations
# Usage examples
def demonstrate_llm_configuration():
manager = LLMConfigurationManager("my-rag-pipeline", "YOUR_API_TOKEN")
print("🤖 LLM Configuration Management Demo")
print("=" * 50)
# Apply different optimization strategies
strategies = [
("maximum_accuracy", "llm-node-1"),
("balanced_performance", "llm-node-2"),
("high_throughput", "llm-node-3"),
("creative_generation", "llm-node-4")
]
for strategy, node_id in strategies:
try:
print(f"\n📋 Applying {strategy} to {node_id}")
result = manager.apply_optimization_strategy(node_id, strategy)
optimization = result["optimization_applied"]
print(f" ✅ Success: {result['message']}")
print(f" 📊 Performance Tier: {optimization['performance_tier']}")
print(f" ⏱️ Expected Latency: {optimization['expected_latency']}")
print(f" 🎯 Use Cases: {', '.join(optimization['use_cases'][:2])}")
# Generate performance report
report = manager.generate_performance_report(strategy)
print(f" 🔧 Resource Intensity: {report['resource_utilization']['computational_intensity']}")
print(f" 💾 Memory Requirements: {report['resource_utilization']['memory_requirements']}")
print(f" 🚀 Throughput: {report['resource_utilization']['throughput_capacity']}")
except Exception as e:
print(f" ❌ Error applying {strategy}: {str(e)}")
# Demonstrate batch updates
print(f"\n🔄 Batch Configuration Updates")
batch_updates = {
"llm-support-1": {
"model": "gpt-4o-mini",
"promptId": "customer_support_agent",
"temperature": 0.3
},
"llm-technical-1": {
"model": "gpt-4o",
"promptId": "technical_documentation_assistant",
"temperature": 0.1
},
"llm-creative-1": {
"model": "gpt-4.1",
"promptId": "creative_content_generator",
"temperature": 0.9
}
}
batch_results = manager.batch_update_multiple_nodes(batch_updates)
print(f" ✅ Successful updates: {len(batch_results['successful_updates'])}")
print(f" ❌ Failed updates: {len(batch_results['failed_updates'])}")
for update in batch_results["successful_updates"]:
config = update["config_applied"]
print(f" 📝 {update['node_id']}: {config['model']} (T={config['temperature']})")
# Run demonstration
try:
demonstrate_llm_configuration()
except Exception as e:
print(f"Demo failed: {e}")