Tensalis Quickstart Guide
Get hallucination detection running in 5 minutes.
What You'll Build
A simple RAG system with Tensalis verification that: 1. Takes a user question 2. Retrieves relevant context 3. Generates an LLM response 4. Verifies the response is faithful to context 5. Returns verified output or blocks hallucinations
Time: 5 minutes
Prerequisites: Python 3.8+, OpenAI API key (for demo)
Step 1: Install Dependencies
pip install requests openai
That's it. No SDK required - Tensalis is a simple REST API.
Step 2: Your First Verification
Create verify_demo.py:
import requests
def verify_response(llm_output: str, context: list[str]) -> dict:
"""Verify LLM output against source context."""
response = requests.post(
"https://tensalis-engine-zlqsb5lbna-uc.a.run.app/v1/verify",
json={
"response": llm_output,
"reference_facts": context
},
timeout=10
)
response.raise_for_status()
return response.json()
# Example: Truthful response
print("Test 1: Truthful response")
result = verify_response(
llm_output="Returns are accepted within 30 days of purchase.",
context=["Our return policy: Items can be returned within 30 days."]
)
print(f"✅ Status: {result['status']}, Confidence: {result['confidence']:.2%}\n")
# Example: Hallucinated response (wrong number)
print("Test 2: Hallucinated response")
result = verify_response(
llm_output="Returns are accepted within 90 days of purchase.",
context=["Our return policy: Items can be returned within 30 days."]
)
print(f"❌ Status: {result['status']}, Confidence: {result['confidence']:.2%}")
Run it:
python verify_demo.py
Output:
Test 1: Truthful response
✅ Status: VALIDATED, Confidence: 92.00%
Test 2: Hallucinated response
❌ Status: BLOCKED, Confidence: 67.00%
🎉 Congratulations! Tensalis caught the numerical contradiction (30 → 90 days) despite 89.9% embedding similarity.
Step 3: Integrate with Your RAG System
Basic RAG + Verification
import openai
import requests
# Your existing RAG setup
openai.api_key = "YOUR_OPENAI_KEY"
def retrieve_context(question: str) -> list[str]:
"""Your existing retrieval logic."""
# This could be vector search, keyword search, etc.
# For demo, we'll use static context
return [
"Our premium plan costs $99/month.",
"It includes unlimited projects and 10GB storage.",
"Free trial available for 14 days."
]
def generate_response(question: str, context: list[str]) -> str:
"""Generate LLM response from context."""
context_text = "\n".join(context)
response = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": "Answer based only on the provided context."},
{"role": "user", "content": f"Context:\n{context_text}\n\nQuestion: {question}"}
],
temperature=0.3 # Lower temperature = more faithful
)
return response.choices[0].message.content
def verify_response(llm_output: str, context: list[str]) -> dict:
"""Verify with Tensalis."""
response = requests.post(
"https://tensalis-engine-zlqsb5lbna-uc.a.run.app/v1/verify",
json={"response": llm_output, "reference_facts": context},
timeout=10
)
response.raise_for_status()
return response.json()
def safe_rag(question: str) -> dict:
"""RAG pipeline with hallucination detection."""
# 1. Retrieve context
context = retrieve_context(question)
# 2. Generate response
llm_output = generate_response(question, context)
# 3. Verify faithfulness
verification = verify_response(llm_output, context)
# 4. Return verified response or error
if verification["status"] == "VALIDATED":
return {
"answer": llm_output,
"verified": True,
"confidence": verification["confidence"]
}
else:
return {
"answer": None,
"verified": False,
"confidence": verification["confidence"],
"error": "Response failed verification. Please rephrase your question."
}
# Use it
result = safe_rag("How much does the premium plan cost?")
if result["verified"]:
print(f"✅ Verified Answer: {result['answer']}")
print(f" Confidence: {result['confidence']:.1%}")
else:
print(f"❌ {result['error']}")
print(f" Confidence: {result['confidence']:.1%}")
Output:
✅ Verified Answer: The premium plan costs $99/month.
Confidence: 94.3%
Step 4: Handle Blocked Responses
When verification fails, you have options:
Option 1: Regenerate (Recommended)
def safe_rag_with_retry(question: str, max_attempts: int = 2) -> dict:
"""RAG with automatic regeneration on verification failure."""
context = retrieve_context(question)
for attempt in range(max_attempts):
# Generate response
llm_output = generate_response(question, context)
# Verify
verification = verify_response(llm_output, context)
if verification["status"] == "VALIDATED":
return {
"answer": llm_output,
"verified": True,
"confidence": verification["confidence"],
"attempts": attempt + 1
}
# Failed verification - try again with stricter prompt
print(f"Attempt {attempt + 1} failed verification, retrying...")
# All attempts failed
return {
"answer": None,
"verified": False,
"error": "Could not generate verified response after multiple attempts"
}
Option 2: Return Context Directly (Safest)
def safe_rag_fallback(question: str) -> dict:
"""Return raw context if LLM output can't be verified."""
context = retrieve_context(question)
llm_output = generate_response(question, context)
verification = verify_response(llm_output, context)
if verification["status"] == "VALIDATED":
return {"answer": llm_output, "source": "generated"}
else:
# Return source context directly
return {
"answer": f"Based on our documentation:\n" + "\n".join(f"• {c}" for c in context),
"source": "context",
"note": "Generated response could not be verified"
}
Option 3: Warn User (Interactive)
def safe_rag_interactive(question: str) -> dict:
"""Show warning to user when verification fails."""
context = retrieve_context(question)
llm_output = generate_response(question, context)
verification = verify_response(llm_output, context)
return {
"answer": llm_output,
"verified": verification["status"] == "VALIDATED",
"confidence": verification["confidence"],
"warning": None if verification["status"] == "VALIDATED" else
"⚠️ This response could not be fully verified. Please double-check against source documents."
}
Step 5: Production Deployment
Add Error Handling
import requests
from typing import Optional
import time
def verify_response_production(
llm_output: str,
context: list[str],
timeout: int = 10,
max_retries: int = 3
) -> Optional[dict]:
"""Production-ready verification with retries and error handling."""
for attempt in range(max_retries):
try:
response = requests.post(
"https://tensalis-engine-zlqsb5lbna-uc.a.run.app/v1/verify",
json={
"response": llm_output,
"reference_facts": context,
"threshold": 0.85
},
timeout=timeout
)
if response.status_code == 200:
return response.json()
elif response.status_code == 503:
# Service temporarily unavailable - retry
retry_after = response.json().get("retry_after", 5)
time.sleep(retry_after)
continue
else:
# Client error - don't retry
response.raise_for_status()
except requests.exceptions.Timeout:
if attempt < max_retries - 1:
wait_time = 2 ** attempt # Exponential backoff: 1s, 2s, 4s
time.sleep(wait_time)
continue
else:
# Max retries exceeded - fail gracefully
return {
"status": "ERROR",
"confidence": 0.0,
"error": "Verification timeout"
}
except requests.exceptions.RequestException as e:
# Network error - fail gracefully
return {
"status": "ERROR",
"confidence": 0.0,
"error": str(e)
}
return None
Add Monitoring
import logging
from datetime import datetime
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
def verify_and_monitor(llm_output: str, context: list[str]) -> dict:
"""Verify with monitoring/logging."""
start_time = datetime.now()
try:
result = verify_response_production(llm_output, context)
# Log successful verification
logger.info(
f"Verification completed: "
f"status={result['status']}, "
f"confidence={result['confidence']:.2f}, "
f"latency={(datetime.now() - start_time).total_seconds():.2f}s"
)
# Send metrics to your monitoring system
# Example: Datadog, CloudWatch, Prometheus
# metrics.increment(f"tensalis.verification.{result['status'].lower()}")
# metrics.histogram("tensalis.confidence", result['confidence'])
# metrics.histogram("tensalis.latency", result['processing_time_ms'])
return result
except Exception as e:
logger.error(f"Verification failed: {e}")
# Send error metric
# metrics.increment("tensalis.verification.error")
raise
Environment Configuration
Create .env file:
TENSALIS_API_URL=https://tensalis-engine-zlqsb5lbna-uc.a.run.app
TENSALIS_THRESHOLD=0.85
TENSALIS_TIMEOUT=10
TENSALIS_MAX_RETRIES=3
Load in your app:
import os
from dotenv import load_dotenv
load_dotenv()
TENSALIS_CONFIG = {
"api_url": os.getenv("TENSALIS_API_URL"),
"threshold": float(os.getenv("TENSALIS_THRESHOLD", 0.85)),
"timeout": int(os.getenv("TENSALIS_TIMEOUT", 10)),
"max_retries": int(os.getenv("TENSALIS_MAX_RETRIES", 3))
}
Common Integration Patterns
Pattern 1: Synchronous Gate
Block response before returning to user:
def chatbot_response(user_message: str) -> str:
"""Chatbot with synchronous verification."""
context = retrieve_context(user_message)
llm_output = generate_response(user_message, context)
verification = verify_response(llm_output, context)
if verification["status"] == "VALIDATED":
return llm_output
else:
# Regenerate or return safe fallback
return "I couldn't verify that response. Let me try again..."
Use Case: Customer support, FAQ bots, compliance-sensitive applications
Pattern 2: Asynchronous Audit
Verify after returning to user (for analytics):
import asyncio
import aiohttp
async def chatbot_async(user_message: str) -> str:
"""Return immediately, verify in background."""
context = retrieve_context(user_message)
llm_output = generate_response(user_message, context)
# Return to user immediately
response = llm_output
# Verify asynchronously for monitoring
asyncio.create_task(
audit_response(llm_output, context, message_id="msg_123")
)
return response
async def audit_response(llm_output: str, context: list[str], message_id: str):
"""Background verification for analytics."""
async with aiohttp.ClientSession() as session:
async with session.post(
"https://tensalis-engine-zlqsb5lbna-uc.a.run.app/v1/verify",
json={"response": llm_output, "reference_facts": context}
) as response:
result = await response.json()
# Log for analytics
log_verification(message_id, result)
Use Case: High-throughput systems, analytics/monitoring, non-critical applications
Pattern 3: Batch Processing
Verify many responses offline:
def batch_verify(responses: list[tuple[str, list[str]]]) -> list[dict]:
"""Verify multiple responses (currently sequential, batch API coming)."""
results = []
for llm_output, context in responses:
result = verify_response(llm_output, context)
results.append({
"output": llm_output,
"status": result["status"],
"confidence": result["confidence"]
})
return results
# Example: Evaluate your RAG system on test set
test_cases = [
("Answer 1...", ["Context 1..."]),
("Answer 2...", ["Context 2..."]),
# ... 100 more
]
results = batch_verify(test_cases)
# Analyze
validated = sum(1 for r in results if r["status"] == "VALIDATED")
print(f"Validation rate: {validated}/{len(results)} ({validated/len(results):.1%})")
Use Case: Testing, evaluation, quality assurance, model comparison
Framework Integrations
LangChain
from langchain.chains import RetrievalQA
from langchain.llms import OpenAI
from langchain.callbacks.base import BaseCallbackHandler
class TensalisVerificationCallback(BaseCallbackHandler):
"""Verify LangChain outputs with Tensalis."""
def on_llm_end(self, response, **kwargs):
"""Verify response after LLM completes."""
output = response.generations[0][0].text
context = kwargs.get("context", [])
verification = verify_response(output, context)
if verification["status"] != "VALIDATED":
raise ValueError(
f"Response failed verification (confidence: {verification['confidence']:.2%})"
)
# Use it
qa_chain = RetrievalQA.from_chain_type(
llm=OpenAI(),
retriever=vectorstore.as_retriever(),
callbacks=[TensalisVerificationCallback()]
)
LlamaIndex
from llama_index import VectorStoreIndex, ServiceContext
from llama_index.callbacks import CallbackManager
def verify_llamaindex_response(response_obj):
"""Verify LlamaIndex response."""
output = response_obj.response
source_nodes = response_obj.source_nodes
# Extract context from source nodes
context = [node.node.text for node in source_nodes]
verification = verify_response(output, context)
if verification["status"] != "VALIDATED":
raise ValueError("Response failed Tensalis verification")
return response_obj
# Use it
index = VectorStoreIndex.from_documents(documents)
query_engine = index.as_query_engine()
response = query_engine.query("What is the pricing?")
verified_response = verify_llamaindex_response(response)
Streamlit (UI)
import streamlit as st
st.title("RAG Chatbot with Verification")
user_question = st.text_input("Ask a question:")
if st.button("Submit"):
with st.spinner("Retrieving context..."):
context = retrieve_context(user_question)
with st.spinner("Generating response..."):
llm_output = generate_response(user_question, context)
with st.spinner("Verifying response..."):
verification = verify_response(llm_output, context)
if verification["status"] == "VALIDATED":
st.success("✅ Verified Response")
st.write(llm_output)
st.caption(f"Confidence: {verification['confidence']:.1%}")
else:
st.error("❌ Response Failed Verification")
st.warning("The AI generated content that couldn't be verified. Here's the source context instead:")
for i, ctx in enumerate(context, 1):
st.info(f"{i}. {ctx}")
st.caption(f"Confidence: {verification['confidence']:.1%}")
Performance Tips
1. Warm the Service (Avoid Cold Starts)
import schedule
import time
def keep_tensalis_warm():
"""Ping Tensalis every 5 minutes to avoid cold starts."""
try:
verify_response("ping", ["ping"], timeout=5)
except:
pass # Ignore failures - just keeping warm
# Schedule warming pings
schedule.every(5).minutes.do(keep_tensalis_warm)
# Run in background thread
import threading
def run_scheduler():
while True:
schedule.run_pending()
time.sleep(60)
threading.Thread(target=run_scheduler, daemon=True).start()
2. Cache Results
from functools import lru_cache
import hashlib
def cache_key(llm_output: str, context: list[str]) -> str:
"""Generate cache key for verification result."""
content = llm_output + "|||" + "|||".join(sorted(context))
return hashlib.md5(content.encode()).hexdigest()
# Simple in-memory cache
_verification_cache = {}
def verify_with_cache(llm_output: str, context: list[str]) -> dict:
"""Verify with caching (useful for repeated questions)."""
key = cache_key(llm_output, context)
if key in _verification_cache:
print("Cache hit!")
return _verification_cache[key]
result = verify_response(llm_output, context)
_verification_cache[key] = result
return result
3. Parallel Requests (Async)
import asyncio
import aiohttp
async def verify_async(llm_output: str, context: list[str]) -> dict:
"""Async verification for parallel processing."""
async with aiohttp.ClientSession() as session:
async with session.post(
"https://tensalis-engine-zlqsb5lbna-uc.a.run.app/v1/verify",
json={"response": llm_output, "reference_facts": context}
) as response:
return await response.json()
# Verify multiple responses in parallel
async def verify_many(pairs: list[tuple[str, list[str]]]) -> list[dict]:
"""Verify many responses concurrently."""
tasks = [verify_async(output, context) for output, context in pairs]
return await asyncio.gather(*tasks)
# Usage
pairs = [
("Response 1", ["Context 1"]),
("Response 2", ["Context 2"]),
("Response 3", ["Context 3"])
]
results = asyncio.run(verify_many(pairs))
Testing Your Integration
Unit Test Example
import unittest
class TestTensalisIntegration(unittest.TestCase):
def test_truthful_response_validated(self):
"""Truthful responses should be validated."""
result = verify_response(
llm_output="The product costs $99.",
context=["Product price: $99"]
)
self.assertEqual(result["status"], "VALIDATED")
self.assertGreater(result["confidence"], 0.85)
def test_contradictory_response_blocked(self):
"""Contradictory responses should be blocked."""
result = verify_response(
llm_output="Returns accepted within 90 days.",
context=["Return policy: 30 days"]
)
self.assertEqual(result["status"], "BLOCKED")
self.assertLess(result["confidence"], 0.85)
def test_error_handling(self):
"""API errors should be handled gracefully."""
with self.assertRaises(requests.exceptions.HTTPError):
verify_response("", []) # Empty inputs should error
if __name__ == "__main__":
unittest.main()
Troubleshooting
Issue: TimeoutError
# Increase timeout for slow networks
result = verify_response(output, context, timeout=15)
# Or implement retry logic (see Step 5)
Issue: All Responses Blocked
# Check your threshold - might be too high
result = verify_response(output, context, threshold=0.75) # Lower
# Or check if your context is complete
print("Context:", context) # Make sure it has the facts needed
Issue: JSON Decode Error
# Make sure API returned valid JSON
response = requests.post(...)
print("Status:", response.status_code)
print("Raw response:", response.text) # Debug
if response.status_code == 200:
result = response.json()
All third-party product names and trademarks are the property of their respective owners. References are for informational purposes only and do not imply endorsement, affiliation, or comparative performance guarantees.
Next Steps
✅ You're ready to integrate Tensalis!
Recommended Reading: 1. API Reference - Complete API documentation 2. Architecture Guide - How Tensalis works under the hood 3. Best Practices - Production deployment tips
Join the Community: - Discord: https://discord.gg/tensalis - GitHub: https://github.com/tensalis/examples - Email: support@tensalis.com
Questions? Email us at support@tensalis.com or join our Discord.
Last Updated: December 17, 2025