import requests
import json
def update_chunking_node(flow_name, node_id, config, api_token):
url = f"https://{flow_name}.flows.graphorlm.com/chunking/{node_id}"
headers = {
"Authorization": f"Bearer {api_token}",
"Content-Type": "application/json"
}
payload = {
"config": config
}
response = requests.patch(url, headers=headers, json=payload)
response.raise_for_status()
return response.json()
def optimize_chunking_configuration(flow_name, node_id, api_token, optimization_type="balanced"):
"""
Apply optimized chunking configurations based on use case
"""
# Predefined optimization configurations
optimizations = {
"balanced": {
"embeddingModel": "text-embedding-3-small",
"chunkingSplitter": "character",
"chunkSize": 1000,
"chunkOverlap": 150,
"chunkSeparator": "\n\n",
"elementsToRemove": ["Header", "Footer"]
},
"high_quality": {
"embeddingModel": "text-embedding-3-large",
"chunkingSplitter": "semantic",
"chunkSize": 1200,
"chunkOverlap": 200,
"chunkSeparator": "\n\n",
"elementsToRemove": ["Header", "Footer", "PageNumber"]
},
"fast_processing": {
"embeddingModel": "text-embedding-3-small",
"chunkingSplitter": "character",
"chunkSize": 800,
"chunkOverlap": 80,
"chunkSeparator": "\n",
"elementsToRemove": ["Header", "Footer"]
},
"academic_papers": {
"embeddingModel": "text-embedding-3-large",
"chunkingSplitter": "semantic",
"chunkSize": 1500,
"chunkOverlap": 300,
"chunkSeparator": "\n\n",
"splitLevel": 1,
"elementsToRemove": ["Header", "Footer", "PageNumber", "Reference"]
},
"code_docs": {
"embeddingModel": "text-embedding-3-small",
"chunkingSplitter": "element",
"chunkSize": 2000,
"chunkOverlap": 0,
"splitLevel": 2,
"elementsToRemove": ["NarrativeText"]
}
}
if optimization_type not in optimizations:
raise ValueError(f"Unknown optimization type: {optimization_type}")
config = optimizations[optimization_type]
print(f"🔧 Applying {optimization_type} optimization to chunking node {node_id}")
print(f"Configuration: {json.dumps(config, indent=2)}")
try:
result = update_chunking_node(flow_name, node_id, config, api_token)
print("✅ Chunking configuration updated successfully!")
print(f"Success: {result['success']}")
print(f"Message: {result['message']}")
print(f"Updated Node ID: {result['node_id']}")
# Display applied configuration
print(f"\n📊 Applied Configuration:")
print(f" Embedding Model: {config['embeddingModel']}")
print(f" Splitter Type: {config['chunkingSplitter']}")
print(f" Chunk Size: {config['chunkSize']}")
print(f" Chunk Overlap: {config['chunkOverlap']}")
print(f" Elements to Remove: {', '.join(config.get('elementsToRemove', []))}")
return result
except requests.exceptions.HTTPError as e:
print(f"❌ Update failed: {e}")
if e.response.status_code == 404:
print("Flow or chunking node not found")
elif e.response.status_code == 400:
print("Invalid configuration parameters")
raise
def batch_update_chunking_configurations(flow_name, api_token, node_configs):
"""
Update multiple chunking nodes with different configurations
"""
results = {
"successful_updates": [],
"failed_updates": [],
"total_nodes": len(node_configs)
}
for node_id, config in node_configs.items():
try:
print(f"\n🔄 Updating node {node_id}...")
result = update_chunking_node(flow_name, node_id, config, api_token)
results["successful_updates"].append({
"node_id": node_id,
"config": config,
"result": result
})
print(f"✅ Success: {result['message']}")
except Exception as e:
error_info = {
"node_id": node_id,
"config": config,
"error": str(e)
}
results["failed_updates"].append(error_info)
print(f"❌ Failed: {e}")
return results
# Usage examples
try:
# Single node optimization
result = optimize_chunking_configuration(
flow_name="my-rag-pipeline",
node_id="chunking-1748287628685",
api_token="YOUR_API_TOKEN",
optimization_type="high_quality"
)
# Batch configuration update
node_configs = {
"chunking-node-1": {
"embeddingModel": "text-embedding-3-small",
"chunkingSplitter": "character",
"chunkSize": 1000,
"chunkOverlap": 150
},
"chunking-node-2": {
"embeddingModel": "text-embedding-3-large",
"chunkingSplitter": "semantic",
"chunkSize": 1200,
"chunkOverlap": 200
}
}
batch_results = batch_update_chunking_configurations(
"my-rag-pipeline",
"YOUR_API_TOKEN",
node_configs
)
print(f"\n📈 Batch Update Summary:")
print(f"Successful: {len(batch_results['successful_updates'])}")
print(f"Failed: {len(batch_results['failed_updates'])}")
except Exception as e:
print(f"Error: {e}")